aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/ceph/messenger.c12
-rw-r--r--net/core/dev.c7
-rw-r--r--net/ipv4/tcp_input.c15
-rw-r--r--net/ipv4/tcp_output.c6
-rw-r--r--net/ipv4/tcp_timer.c2
-rw-r--r--net/ipv6/addrconf.c19
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/sched/act_police.c36
-rw-r--r--net/sctp/output.c24
-rw-r--r--net/sctp/socket.c26
-rw-r--r--net/sctp/stream.c1
-rw-r--r--net/smc/af_smc.c11
-rw-r--r--net/smc/smc_cdc.c26
-rw-r--r--net/smc/smc_cdc.h60
-rw-r--r--net/smc/smc_core.c20
-rw-r--r--net/smc/smc_core.h5
-rw-r--r--net/smc/smc_ism.c43
-rw-r--r--net/smc/smc_ism.h1
-rw-r--r--net/smc/smc_wr.c4
19 files changed, 197 insertions, 125 deletions
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 57fcc6b4bf6e..2f126eff275d 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -580,9 +580,15 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
struct bio_vec bvec;
int ret;
- /* sendpage cannot properly handle pages with page_count == 0,
- * we need to fallback to sendmsg if that's the case */
- if (page_count(page) >= 1)
+ /*
+ * sendpage cannot properly handle pages with page_count == 0,
+ * we need to fall back to sendmsg if that's the case.
+ *
+ * Same goes for slab pages: skb_can_coalesce() allows
+ * coalescing neighboring slab objects into a single frag which
+ * triggers one of hardened usercopy checks.
+ */
+ if (page_count(page) >= 1 && !PageSlab(page))
return __ceph_tcp_sendpage(sock, page, offset, size, more);
bvec.bv_page = page;
diff --git a/net/core/dev.c b/net/core/dev.c
index d83582623cd7..f69b2fcdee40 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5981,11 +5981,14 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
if (work_done)
timeout = n->dev->gro_flush_timeout;
+ /* When the NAPI instance uses a timeout and keeps postponing
+ * it, we need to bound somehow the time packets are kept in
+ * the GRO layer
+ */
+ napi_gro_flush(n, !!timeout);
if (timeout)
hrtimer_start(&n->timer, ns_to_ktime(timeout),
HRTIMER_MODE_REL_PINNED);
- else
- napi_gro_flush(n, false);
}
if (unlikely(!list_empty(&n->poll_list))) {
/* If n->poll_list is not empty, we need to mask irqs */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index edaaebfbcd46..568dbf3b711a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4269,7 +4269,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
* If the sack array is full, forget about the last one.
*/
if (this_sack >= TCP_NUM_SACKS) {
- if (tp->compressed_ack)
+ if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
tcp_send_ack(sk);
this_sack--;
tp->rx_opt.num_sacks--;
@@ -4364,6 +4364,7 @@ static bool tcp_try_coalesce(struct sock *sk,
if (TCP_SKB_CB(from)->has_rxtstamp) {
TCP_SKB_CB(to)->has_rxtstamp = true;
to->tstamp = from->tstamp;
+ skb_hwtstamps(to)->hwtstamp = skb_hwtstamps(from)->hwtstamp;
}
return true;
@@ -5189,7 +5190,17 @@ send_now:
if (!tcp_is_sack(tp) ||
tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
goto send_now;
- tp->compressed_ack++;
+
+ if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
+ tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
+ if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
+ NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
+ tp->compressed_ack - TCP_FASTRETRANS_THRESH);
+ tp->compressed_ack = 0;
+ }
+
+ if (++tp->compressed_ack <= TCP_FASTRETRANS_THRESH)
+ goto send_now;
if (hrtimer_is_queued(&tp->compressed_ack_timer))
return;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index d40d4cc53319..c5dc4c4fdadd 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -180,10 +180,10 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
{
struct tcp_sock *tp = tcp_sk(sk);
- if (unlikely(tp->compressed_ack)) {
+ if (unlikely(tp->compressed_ack > TCP_FASTRETRANS_THRESH)) {
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
- tp->compressed_ack);
- tp->compressed_ack = 0;
+ tp->compressed_ack - TCP_FASTRETRANS_THRESH);
+ tp->compressed_ack = TCP_FASTRETRANS_THRESH;
if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
__sock_put(sk);
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 676020663ce8..5f8b6d3cd855 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -740,7 +740,7 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
- if (tp->compressed_ack)
+ if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
tcp_send_ack(sk);
} else {
if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 63a808d5af15..045597b9a7c0 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -179,7 +179,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp);
static void addrconf_dad_work(struct work_struct *w);
static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
bool send_na);
-static void addrconf_dad_run(struct inet6_dev *idev);
+static void addrconf_dad_run(struct inet6_dev *idev, bool restart);
static void addrconf_rs_timer(struct timer_list *t);
static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
@@ -3439,6 +3439,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_change_info *change_info;
struct netdev_notifier_changeupper_info *info;
struct inet6_dev *idev = __in6_dev_get(dev);
struct net *net = dev_net(dev);
@@ -3513,7 +3514,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
break;
}
- if (idev) {
+ if (!IS_ERR_OR_NULL(idev)) {
if (idev->if_flags & IF_READY) {
/* device is already configured -
* but resend MLD reports, we might
@@ -3521,6 +3522,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
* multicast snooping switches
*/
ipv6_mc_up(idev);
+ change_info = ptr;
+ if (change_info->flags_changed & IFF_NOARP)
+ addrconf_dad_run(idev, true);
rt6_sync_up(dev, RTNH_F_LINKDOWN);
break;
}
@@ -3555,7 +3559,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (!IS_ERR_OR_NULL(idev)) {
if (run_pending)
- addrconf_dad_run(idev);
+ addrconf_dad_run(idev, false);
/* Device has an address by now */
rt6_sync_up(dev, RTNH_F_DEAD);
@@ -4173,16 +4177,19 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
addrconf_verify_rtnl();
}
-static void addrconf_dad_run(struct inet6_dev *idev)
+static void addrconf_dad_run(struct inet6_dev *idev, bool restart)
{
struct inet6_ifaddr *ifp;
read_lock_bh(&idev->lock);
list_for_each_entry(ifp, &idev->addr_list, if_list) {
spin_lock(&ifp->lock);
- if (ifp->flags & IFA_F_TENTATIVE &&
- ifp->state == INET6_IFADDR_STATE_DAD)
+ if ((ifp->flags & IFA_F_TENTATIVE &&
+ ifp->state == INET6_IFADDR_STATE_DAD) || restart) {
+ if (restart)
+ ifp->state = INET6_IFADDR_STATE_PREDAD;
addrconf_dad_kick(ifp);
+ }
spin_unlock(&ifp->lock);
}
read_unlock_bh(&idev->lock);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ec3095f13aae..a74650e98f42 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2394,7 +2394,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
void *ph;
__u32 ts;
- ph = skb_shinfo(skb)->destructor_arg;
+ ph = skb_zcopy_get_nouarg(skb);
packet_dec_pending(&po->tx_ring);
ts = __packet_set_timestamp(po, ph, skb);
@@ -2461,7 +2461,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb->mark = po->sk.sk_mark;
skb->tstamp = sockc->transmit_time;
sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags);
- skb_shinfo(skb)->destructor_arg = ph.raw;
+ skb_zcopy_set_nouarg(skb, ph.raw);
skb_reserve(skb, hlen);
skb_reset_network_header(skb);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 052855d47354..37c9b8f0e10f 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -27,10 +27,7 @@ struct tcf_police_params {
u32 tcfp_ewma_rate;
s64 tcfp_burst;
u32 tcfp_mtu;
- s64 tcfp_toks;
- s64 tcfp_ptoks;
s64 tcfp_mtu_ptoks;
- s64 tcfp_t_c;
struct psched_ratecfg rate;
bool rate_present;
struct psched_ratecfg peak;
@@ -41,6 +38,11 @@ struct tcf_police_params {
struct tcf_police {
struct tc_action common;
struct tcf_police_params __rcu *params;
+
+ spinlock_t tcfp_lock ____cacheline_aligned_in_smp;
+ s64 tcfp_toks;
+ s64 tcfp_ptoks;
+ s64 tcfp_t_c;
};
#define to_police(pc) ((struct tcf_police *)pc)
@@ -122,6 +124,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
return ret;
}
ret = ACT_P_CREATED;
+ spin_lock_init(&(to_police(*a)->tcfp_lock));
} else if (!ovr) {
tcf_idr_release(*a, bind);
return -EEXIST;
@@ -186,12 +189,9 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
}
new->tcfp_burst = PSCHED_TICKS2NS(parm->burst);
- new->tcfp_toks = new->tcfp_burst;
- if (new->peak_present) {
+ if (new->peak_present)
new->tcfp_mtu_ptoks = (s64)psched_l2t_ns(&new->peak,
new->tcfp_mtu);
- new->tcfp_ptoks = new->tcfp_mtu_ptoks;
- }
if (tb[TCA_POLICE_AVRATE])
new->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
@@ -207,7 +207,12 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
}
spin_lock_bh(&police->tcf_lock);
- new->tcfp_t_c = ktime_get_ns();
+ spin_lock_bh(&police->tcfp_lock);
+ police->tcfp_t_c = ktime_get_ns();
+ police->tcfp_toks = new->tcfp_burst;
+ if (new->peak_present)
+ police->tcfp_ptoks = new->tcfp_mtu_ptoks;
+ spin_unlock_bh(&police->tcfp_lock);
police->tcf_action = parm->action;
rcu_swap_protected(police->params,
new,
@@ -257,25 +262,28 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
}
now = ktime_get_ns();
- toks = min_t(s64, now - p->tcfp_t_c, p->tcfp_burst);
+ spin_lock_bh(&police->tcfp_lock);
+ toks = min_t(s64, now - police->tcfp_t_c, p->tcfp_burst);
if (p->peak_present) {
- ptoks = toks + p->tcfp_ptoks;
+ ptoks = toks + police->tcfp_ptoks;
if (ptoks > p->tcfp_mtu_ptoks)
ptoks = p->tcfp_mtu_ptoks;
ptoks -= (s64)psched_l2t_ns(&p->peak,
qdisc_pkt_len(skb));
}
- toks += p->tcfp_toks;
+ toks += police->tcfp_toks;
if (toks > p->tcfp_burst)
toks = p->tcfp_burst;
toks -= (s64)psched_l2t_ns(&p->rate, qdisc_pkt_len(skb));
if ((toks|ptoks) >= 0) {
- p->tcfp_t_c = now;
- p->tcfp_toks = toks;
- p->tcfp_ptoks = ptoks;
+ police->tcfp_t_c = now;
+ police->tcfp_toks = toks;
+ police->tcfp_ptoks = ptoks;
+ spin_unlock_bh(&police->tcfp_lock);
ret = p->tcfp_result;
goto inc_drops;
}
+ spin_unlock_bh(&police->tcfp_lock);
}
inc_overlimits:
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 67939ad99c01..b0e74a3e77ec 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -118,6 +118,9 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
sctp_transport_route(tp, NULL, sp);
if (asoc->param_flags & SPP_PMTUD_ENABLE)
sctp_assoc_sync_pmtu(asoc);
+ } else if (!sctp_transport_pmtu_check(tp)) {
+ if (asoc->param_flags & SPP_PMTUD_ENABLE)
+ sctp_assoc_sync_pmtu(asoc);
}
if (asoc->pmtu_pending) {
@@ -396,25 +399,6 @@ finish:
return retval;
}
-static void sctp_packet_release_owner(struct sk_buff *skb)
-{
- sk_free(skb->sk);
-}
-
-static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk)
-{
- skb_orphan(skb);
- skb->sk = sk;
- skb->destructor = sctp_packet_release_owner;
-
- /*
- * The data chunks have already been accounted for in sctp_sendmsg(),
- * therefore only reserve a single byte to keep socket around until
- * the packet has been transmitted.
- */
- refcount_inc(&sk->sk_wmem_alloc);
-}
-
static void sctp_packet_gso_append(struct sk_buff *head, struct sk_buff *skb)
{
if (SCTP_OUTPUT_CB(head)->last == head)
@@ -601,7 +585,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
if (!head)
goto out;
skb_reserve(head, packet->overhead + MAX_HEADER);
- sctp_packet_set_owner_w(head, sk);
+ skb_set_owner_w(head, sk);
/* set sctp header */
sh = skb_push(head, sizeof(struct sctphdr));
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e16c090e89f0..1fb2cad94597 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3951,32 +3951,16 @@ static int sctp_setsockopt_pr_supported(struct sock *sk,
unsigned int optlen)
{
struct sctp_assoc_value params;
- struct sctp_association *asoc;
- int retval = -EINVAL;
if (optlen != sizeof(params))
- goto out;
-
- if (copy_from_user(&params, optval, optlen)) {
- retval = -EFAULT;
- goto out;
- }
-
- asoc = sctp_id2assoc(sk, params.assoc_id);
- if (asoc) {
- asoc->prsctp_enable = !!params.assoc_value;
- } else if (!params.assoc_id) {
- struct sctp_sock *sp = sctp_sk(sk);
+ return -EINVAL;
- sp->ep->prsctp_enable = !!params.assoc_value;
- } else {
- goto out;
- }
+ if (copy_from_user(&params, optval, optlen))
+ return -EFAULT;
- retval = 0;
+ sctp_sk(sk)->ep->prsctp_enable = !!params.assoc_value;
-out:
- return retval;
+ return 0;
}
static int sctp_setsockopt_default_prinfo(struct sock *sk,
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index ffb940d3b57c..3892e7630f3a 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -535,7 +535,6 @@ int sctp_send_add_streams(struct sctp_association *asoc,
goto out;
}
- stream->incnt = incnt;
stream->outcnt = outcnt;
asoc->strreset_outstanding = !!out + !!in;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 4b865250e238..63f08b4e51d6 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -127,6 +127,8 @@ static int smc_release(struct socket *sock)
smc = smc_sk(sk);
/* cleanup for a dangling non-blocking connect */
+ if (smc->connect_info && sk->sk_state == SMC_INIT)
+ tcp_abort(smc->clcsock->sk, ECONNABORTED);
flush_work(&smc->connect_work);
kfree(smc->connect_info);
smc->connect_info = NULL;
@@ -551,7 +553,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
mutex_lock(&smc_create_lgr_pending);
local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev,
- ibport, &aclc->lcl, NULL, 0);
+ ibport, ntoh24(aclc->qpn), &aclc->lcl,
+ NULL, 0);
if (local_contact < 0) {
if (local_contact == -ENOMEM)
reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
@@ -621,7 +624,7 @@ static int smc_connect_ism(struct smc_sock *smc,
int rc = 0;
mutex_lock(&smc_create_lgr_pending);
- local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0,
+ local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0, 0,
NULL, ismdev, aclc->gid);
if (local_contact < 0)
return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0);
@@ -1086,7 +1089,7 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
int *local_contact)
{
/* allocate connection / link group */
- *local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport,
+ *local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport, 0,
&pclc->lcl, NULL, 0);
if (*local_contact < 0) {
if (*local_contact == -ENOMEM)
@@ -1110,7 +1113,7 @@ static int smc_listen_ism_init(struct smc_sock *new_smc,
struct smc_clc_msg_smcd *pclc_smcd;
pclc_smcd = smc_get_clc_msg_smcd(pclc);
- *local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, NULL,
+ *local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, 0, NULL,
ismdev, pclc_smcd->gid);
if (*local_contact < 0) {
if (*local_contact == -ENOMEM)
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index ed5dcf03fe0b..db83332ac1c8 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -81,7 +81,7 @@ static inline void smc_cdc_add_pending_send(struct smc_connection *conn,
sizeof(struct smc_cdc_msg) > SMC_WR_BUF_SIZE,
"must increase SMC_WR_BUF_SIZE to at least sizeof(struct smc_cdc_msg)");
BUILD_BUG_ON_MSG(
- sizeof(struct smc_cdc_msg) != SMC_WR_TX_SIZE,
+ offsetofend(struct smc_cdc_msg, reserved) > SMC_WR_TX_SIZE,
"must adapt SMC_WR_TX_SIZE to sizeof(struct smc_cdc_msg); if not all smc_wr upper layer protocols use the same message size any more, must start to set link->wr_tx_sges[i].length on each individual smc_wr_tx_send()");
BUILD_BUG_ON_MSG(
sizeof(struct smc_cdc_tx_pend) > SMC_WR_TX_PEND_PRIV_SIZE,
@@ -177,23 +177,24 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
int smcd_cdc_msg_send(struct smc_connection *conn)
{
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+ union smc_host_cursor curs;
struct smcd_cdc_msg cdc;
int rc, diff;
memset(&cdc, 0, sizeof(cdc));
cdc.common.type = SMC_CDC_MSG_TYPE;
- cdc.prod_wrap = conn->local_tx_ctrl.prod.wrap;
- cdc.prod_count = conn->local_tx_ctrl.prod.count;
-
- cdc.cons_wrap = conn->local_tx_ctrl.cons.wrap;
- cdc.cons_count = conn->local_tx_ctrl.cons.count;
- cdc.prod_flags = conn->local_tx_ctrl.prod_flags;
- cdc.conn_state_flags = conn->local_tx_ctrl.conn_state_flags;
+ curs.acurs.counter = atomic64_read(&conn->local_tx_ctrl.prod.acurs);
+ cdc.prod.wrap = curs.wrap;
+ cdc.prod.count = curs.count;
+ curs.acurs.counter = atomic64_read(&conn->local_tx_ctrl.cons.acurs);
+ cdc.cons.wrap = curs.wrap;
+ cdc.cons.count = curs.count;
+ cdc.cons.prod_flags = conn->local_tx_ctrl.prod_flags;
+ cdc.cons.conn_state_flags = conn->local_tx_ctrl.conn_state_flags;
rc = smcd_tx_ism_write(conn, &cdc, sizeof(cdc), 0, 1);
if (rc)
return rc;
- smc_curs_copy(&conn->rx_curs_confirmed, &conn->local_tx_ctrl.cons,
- conn);
+ smc_curs_copy(&conn->rx_curs_confirmed, &curs, conn);
/* Calculate transmitted data and increment free send buffer space */
diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin,
&conn->tx_curs_sent);
@@ -331,13 +332,16 @@ static void smc_cdc_msg_recv(struct smc_sock *smc, struct smc_cdc_msg *cdc)
static void smcd_cdc_rx_tsklet(unsigned long data)
{
struct smc_connection *conn = (struct smc_connection *)data;
+ struct smcd_cdc_msg *data_cdc;
struct smcd_cdc_msg cdc;
struct smc_sock *smc;
if (!conn)
return;
- memcpy(&cdc, conn->rmb_desc->cpu_addr, sizeof(cdc));
+ data_cdc = (struct smcd_cdc_msg *)conn->rmb_desc->cpu_addr;
+ smcd_curs_copy(&cdc.prod, &data_cdc->prod, conn);
+ smcd_curs_copy(&cdc.cons, &data_cdc->cons, conn);
smc = container_of(conn, struct smc_sock, conn);
smc_cdc_msg_recv(smc, (struct smc_cdc_msg *)&cdc);
}
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 934df4473a7c..b5bfe38c7f9b 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -48,21 +48,31 @@ struct smc_cdc_msg {
struct smc_cdc_producer_flags prod_flags;
struct smc_cdc_conn_state_flags conn_state_flags;
u8 reserved[18];
-} __packed; /* format defined in RFC7609 */
+};
+
+/* SMC-D cursor format */
+union smcd_cdc_cursor {
+ struct {
+ u16 wrap;
+ u32 count;
+ struct smc_cdc_producer_flags prod_flags;
+ struct smc_cdc_conn_state_flags conn_state_flags;
+ } __packed;
+#ifdef KERNEL_HAS_ATOMIC64
+ atomic64_t acurs; /* for atomic processing */
+#else
+ u64 acurs; /* for atomic processing */
+#endif
+} __aligned(8);
/* CDC message for SMC-D */
struct smcd_cdc_msg {
struct smc_wr_rx_hdr common; /* Type = 0xFE */
u8 res1[7];
- u16 prod_wrap;
- u32 prod_count;
- u8 res2[2];
- u16 cons_wrap;
- u32 cons_count;
- struct smc_cdc_producer_flags prod_flags;
- struct smc_cdc_conn_state_flags conn_state_flags;
+ union smcd_cdc_cursor prod;
+ union smcd_cdc_cursor cons;
u8 res3[8];
-} __packed;
+} __aligned(8);
static inline bool smc_cdc_rxed_any_close(struct smc_connection *conn)
{
@@ -135,6 +145,21 @@ static inline void smc_curs_copy_net(union smc_cdc_cursor *tgt,
#endif
}
+static inline void smcd_curs_copy(union smcd_cdc_cursor *tgt,
+ union smcd_cdc_cursor *src,
+ struct smc_connection *conn)
+{
+#ifndef KERNEL_HAS_ATOMIC64
+ unsigned long flags;
+
+ spin_lock_irqsave(&conn->acurs_lock, flags);
+ tgt->acurs = src->acurs;
+ spin_unlock_irqrestore(&conn->acurs_lock, flags);
+#else
+ atomic64_set(&tgt->acurs, atomic64_read(&src->acurs));
+#endif
+}
+
/* calculate cursor difference between old and new, where old <= new */
static inline int smc_curs_diff(unsigned int size,
union smc_host_cursor *old,
@@ -222,12 +247,17 @@ static inline void smcr_cdc_msg_to_host(struct smc_host_cdc_msg *local,
static inline void smcd_cdc_msg_to_host(struct smc_host_cdc_msg *local,
struct smcd_cdc_msg *peer)
{
- local->prod.wrap = peer->prod_wrap;
- local->prod.count = peer->prod_count;
- local->cons.wrap = peer->cons_wrap;
- local->cons.count = peer->cons_count;
- local->prod_flags = peer->prod_flags;
- local->conn_state_flags = peer->conn_state_flags;
+ union smc_host_cursor temp;
+
+ temp.wrap = peer->prod.wrap;
+ temp.count = peer->prod.count;
+ atomic64_set(&local->prod.acurs, atomic64_read(&temp.acurs));
+
+ temp.wrap = peer->cons.wrap;
+ temp.count = peer->cons.count;
+ atomic64_set(&local->cons.acurs, atomic64_read(&temp.acurs));
+ local->prod_flags = peer->cons.prod_flags;
+ local->conn_state_flags = peer->cons.conn_state_flags;
}
static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 1382ddae591e..35c1cdc93e1c 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -189,6 +189,8 @@ free:
if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
smc_llc_link_inactive(lnk);
+ if (lgr->is_smcd)
+ smc_ism_signal_shutdown(lgr);
smc_lgr_free(lgr);
}
}
@@ -495,7 +497,7 @@ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
}
/* Called when SMC-D device is terminated or peer is lost */
-void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid)
+void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
{
struct smc_link_group *lgr, *l;
LIST_HEAD(lgr_free_list);
@@ -505,7 +507,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid)
list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
if (lgr->is_smcd && lgr->smcd == dev &&
(!peer_gid || lgr->peer_gid == peer_gid) &&
- !list_empty(&lgr->list)) {
+ (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
__smc_lgr_terminate(lgr);
list_move(&lgr->list, &lgr_free_list);
}
@@ -516,6 +518,8 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid)
list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
list_del_init(&lgr->list);
cancel_delayed_work_sync(&lgr->free_work);
+ if (!peer_gid && vlan == VLAN_VID_MASK) /* dev terminated? */
+ smc_ism_signal_shutdown(lgr);
smc_lgr_free(lgr);
}
}
@@ -569,7 +573,7 @@ out:
static bool smcr_lgr_match(struct smc_link_group *lgr,
struct smc_clc_msg_local *lcl,
- enum smc_lgr_role role)
+ enum smc_lgr_role role, u32 clcqpn)
{
return !memcmp(lgr->peer_systemid, lcl->id_for_peer,
SMC_SYSTEMID_LEN) &&
@@ -577,7 +581,9 @@ static bool smcr_lgr_match(struct smc_link_group *lgr,
SMC_GID_SIZE) &&
!memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
sizeof(lcl->mac)) &&
- lgr->role == role;
+ lgr->role == role &&
+ (lgr->role == SMC_SERV ||
+ lgr->lnk[SMC_SINGLE_LINK].peer_qpn == clcqpn);
}
static bool smcd_lgr_match(struct smc_link_group *lgr,
@@ -588,7 +594,7 @@ static bool smcd_lgr_match(struct smc_link_group *lgr,
/* create a new SMC connection (and a new link group if necessary) */
int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
- struct smc_ib_device *smcibdev, u8 ibport,
+ struct smc_ib_device *smcibdev, u8 ibport, u32 clcqpn,
struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
u64 peer_gid)
{
@@ -613,7 +619,7 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
list_for_each_entry(lgr, &smc_lgr_list.list, list) {
write_lock_bh(&lgr->conns_lock);
if ((is_smcd ? smcd_lgr_match(lgr, smcd, peer_gid) :
- smcr_lgr_match(lgr, lcl, role)) &&
+ smcr_lgr_match(lgr, lcl, role, clcqpn)) &&
!lgr->sync_err &&
lgr->vlan_id == vlan_id &&
(role == SMC_CLNT ||
@@ -1034,6 +1040,8 @@ void smc_core_exit(void)
smc_llc_link_inactive(lnk);
}
cancel_delayed_work_sync(&lgr->free_work);
+ if (lgr->is_smcd)
+ smc_ism_signal_shutdown(lgr);
smc_lgr_free(lgr); /* free link group */
}
}
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index e177c6675038..b00287989a3d 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -249,7 +249,8 @@ struct smc_clc_msg_local;
void smc_lgr_forget(struct smc_link_group *lgr);
void smc_lgr_terminate(struct smc_link_group *lgr);
void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport);
-void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid);
+void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
+ unsigned short vlan);
int smc_buf_create(struct smc_sock *smc, bool is_smcd);
int smc_uncompress_bufsize(u8 compressed);
int smc_rmb_rtoken_handling(struct smc_connection *conn,
@@ -264,7 +265,7 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id);
void smc_conn_free(struct smc_connection *conn);
int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
- struct smc_ib_device *smcibdev, u8 ibport,
+ struct smc_ib_device *smcibdev, u8 ibport, u32 clcqpn,
struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
u64 peer_gid);
void smcd_conn_free(struct smc_connection *conn);
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index e36f21ce7252..2fff79db1a59 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -187,22 +187,28 @@ struct smc_ism_event_work {
#define ISM_EVENT_REQUEST 0x0001
#define ISM_EVENT_RESPONSE 0x0002
#define ISM_EVENT_REQUEST_IR 0x00000001
+#define ISM_EVENT_CODE_SHUTDOWN 0x80
#define ISM_EVENT_CODE_TESTLINK 0x83
+union smcd_sw_event_info {
+ u64 info;
+ struct {
+ u8 uid[SMC_LGR_ID_SIZE];
+ unsigned short vlan_id;
+ u16 code;
+ };
+};
+
static void smcd_handle_sw_event(struct smc_ism_event_work *wrk)
{
- union {
- u64 info;
- struct {
- u32 uid;
- unsigned short vlanid;
- u16 code;
- };
- } ev_info;
+ union smcd_sw_event_info ev_info;
+ ev_info.info = wrk->event.info;
switch (wrk->event.code) {
+ case ISM_EVENT_CODE_SHUTDOWN: /* Peer shut down DMBs */
+ smc_smcd_terminate(wrk->smcd, wrk->event.tok, ev_info.vlan_id);
+ break;
case ISM_EVENT_CODE_TESTLINK: /* Activity timer */
- ev_info.info = wrk->event.info;
if (ev_info.code == ISM_EVENT_REQUEST) {
ev_info.code = ISM_EVENT_RESPONSE;
wrk->smcd->ops->signal_event(wrk->smcd,
@@ -215,6 +221,21 @@ static void smcd_handle_sw_event(struct smc_ism_event_work *wrk)
}
}
+int smc_ism_signal_shutdown(struct smc_link_group *lgr)
+{
+ int rc;
+ union smcd_sw_event_info ev_info;
+
+ memcpy(ev_info.uid, lgr->id, SMC_LGR_ID_SIZE);
+ ev_info.vlan_id = lgr->vlan_id;
+ ev_info.code = ISM_EVENT_REQUEST;
+ rc = lgr->smcd->ops->signal_event(lgr->smcd, lgr->peer_gid,
+ ISM_EVENT_REQUEST_IR,
+ ISM_EVENT_CODE_SHUTDOWN,
+ ev_info.info);
+ return rc;
+}
+
/* worker for SMC-D events */
static void smc_ism_event_work(struct work_struct *work)
{
@@ -223,7 +244,7 @@ static void smc_ism_event_work(struct work_struct *work)
switch (wrk->event.type) {
case ISM_EVENT_GID: /* GID event, token is peer GID */
- smc_smcd_terminate(wrk->smcd, wrk->event.tok);
+ smc_smcd_terminate(wrk->smcd, wrk->event.tok, VLAN_VID_MASK);
break;
case ISM_EVENT_DMB:
break;
@@ -289,7 +310,7 @@ void smcd_unregister_dev(struct smcd_dev *smcd)
spin_unlock(&smcd_dev_list.lock);
flush_workqueue(smcd->event_wq);
destroy_workqueue(smcd->event_wq);
- smc_smcd_terminate(smcd, 0);
+ smc_smcd_terminate(smcd, 0, VLAN_VID_MASK);
device_del(&smcd->dev);
}
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
index aee45b860b79..4da946cbfa29 100644
--- a/net/smc/smc_ism.h
+++ b/net/smc/smc_ism.h
@@ -45,4 +45,5 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int buf_size,
int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc);
int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos,
void *data, size_t len);
+int smc_ism_signal_shutdown(struct smc_link_group *lgr);
#endif
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 3c458d279855..c2694750a6a8 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -215,12 +215,14 @@ int smc_wr_tx_put_slot(struct smc_link *link,
pend = container_of(wr_pend_priv, struct smc_wr_tx_pend, priv);
if (pend->idx < link->wr_tx_cnt) {
+ u32 idx = pend->idx;
+
/* clear the full struct smc_wr_tx_pend including .priv */
memset(&link->wr_tx_pends[pend->idx], 0,
sizeof(link->wr_tx_pends[pend->idx]));
memset(&link->wr_tx_bufs[pend->idx], 0,
sizeof(link->wr_tx_bufs[pend->idx]));
- test_and_clear_bit(pend->idx, link->wr_tx_mask);
+ test_and_clear_bit(idx, link->wr_tx_mask);
return 1;
}