diff options
Diffstat (limited to 'drivers/infiniband/hw/cxgb4/cm.c')
-rw-r--r-- | drivers/infiniband/hw/cxgb4/cm.c | 895 |
1 files changed, 751 insertions, 144 deletions
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 6cfd4d8fd0bd..565bfb161c1a 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -38,10 +38,12 @@ #include <linux/inetdevice.h> #include <linux/ip.h> #include <linux/tcp.h> +#include <linux/if_vlan.h> #include <net/neighbour.h> #include <net/netevent.h> #include <net/route.h> +#include <net/tcp.h> #include "iw_cxgb4.h" @@ -61,6 +63,14 @@ static char *states[] = { NULL, }; +static int nocong; +module_param(nocong, int, 0644); +MODULE_PARM_DESC(nocong, "Turn of congestion control (default=0)"); + +static int enable_ecn; +module_param(enable_ecn, int, 0644); +MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)"); + static int dack_mode = 1; module_param(dack_mode, int, 0644); MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)"); @@ -133,14 +143,28 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status); static LIST_HEAD(timeout_list); static spinlock_t timeout_lock; +static void deref_qp(struct c4iw_ep *ep) +{ + c4iw_qp_rem_ref(&ep->com.qp->ibqp); + clear_bit(QP_REFERENCED, &ep->com.flags); +} + +static void ref_qp(struct c4iw_ep *ep) +{ + set_bit(QP_REFERENCED, &ep->com.flags); + c4iw_qp_add_ref(&ep->com.qp->ibqp); +} + static void start_ep_timer(struct c4iw_ep *ep) { PDBG("%s ep %p\n", __func__, ep); if (timer_pending(&ep->timer)) { - PDBG("%s stopped / restarted timer ep %p\n", __func__, ep); - del_timer_sync(&ep->timer); - } else - c4iw_get_ep(&ep->com); + pr_err("%s timer already started! ep %p\n", + __func__, ep); + return; + } + clear_bit(TIMEOUT, &ep->com.flags); + c4iw_get_ep(&ep->com); ep->timer.expires = jiffies + ep_timeout_secs * HZ; ep->timer.data = (unsigned long)ep; ep->timer.function = ep_timeout; @@ -149,15 +173,10 @@ static void start_ep_timer(struct c4iw_ep *ep) static void stop_ep_timer(struct c4iw_ep *ep) { - PDBG("%s ep %p\n", __func__, ep); - if (!timer_pending(&ep->timer)) { - printk(KERN_ERR "%s timer stopped when its not running! " - "ep %p state %u\n", __func__, ep, ep->com.state); - WARN_ON(1); - return; - } + PDBG("%s ep %p stopping\n", __func__, ep); del_timer_sync(&ep->timer); - c4iw_put_ep(&ep->com); + if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) + c4iw_put_ep(&ep->com); } static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb, @@ -262,7 +281,10 @@ void _c4iw_free_ep(struct kref *kref) ep = container_of(kref, struct c4iw_ep, com.kref); PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]); + if (test_bit(QP_REFERENCED, &ep->com.flags)) + deref_qp(ep); if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) { + remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid); cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); @@ -442,6 +464,50 @@ static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } +#define VLAN_NONE 0xfff +#define FILTER_SEL_VLAN_NONE 0xffff +#define FILTER_SEL_WIDTH_P_FC (3+1) /* port uses 3 bits, FCoE one bit */ +#define FILTER_SEL_WIDTH_VIN_P_FC \ + (6 + 7 + FILTER_SEL_WIDTH_P_FC) /* 6 bits are unused, VF uses 7 bits*/ +#define FILTER_SEL_WIDTH_TAG_P_FC \ + (3 + FILTER_SEL_WIDTH_VIN_P_FC) /* PF uses 3 bits */ +#define FILTER_SEL_WIDTH_VLD_TAG_P_FC (1 + FILTER_SEL_WIDTH_TAG_P_FC) + +static unsigned int select_ntuple(struct c4iw_dev *dev, struct dst_entry *dst, + struct l2t_entry *l2t) +{ + unsigned int ntuple = 0; + u32 viid; + + switch (dev->rdev.lldi.filt_mode) { + + /* default filter mode */ + case HW_TPL_FR_MT_PR_IV_P_FC: + if (l2t->vlan == VLAN_NONE) + ntuple |= FILTER_SEL_VLAN_NONE << FILTER_SEL_WIDTH_P_FC; + else { + ntuple |= l2t->vlan << FILTER_SEL_WIDTH_P_FC; + ntuple |= 1 << FILTER_SEL_WIDTH_VLD_TAG_P_FC; + } + ntuple |= l2t->lport << S_PORT | IPPROTO_TCP << + FILTER_SEL_WIDTH_VLD_TAG_P_FC; + break; + case HW_TPL_FR_MT_PR_OV_P_FC: { + viid = cxgb4_port_viid(l2t->neigh->dev); + + ntuple |= FW_VIID_VIN_GET(viid) << FILTER_SEL_WIDTH_P_FC; + ntuple |= FW_VIID_PFN_GET(viid) << FILTER_SEL_WIDTH_VIN_P_FC; + ntuple |= FW_VIID_VIVLD_GET(viid) << FILTER_SEL_WIDTH_TAG_P_FC; + ntuple |= l2t->lport << S_PORT | IPPROTO_TCP << + FILTER_SEL_WIDTH_VLD_TAG_P_FC; + break; + } + default: + break; + } + return ntuple; +} + static int send_connect(struct c4iw_ep *ep) { struct cpl_act_open_req *req; @@ -464,7 +530,8 @@ static int send_connect(struct c4iw_ep *ep) cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); wscale = compute_wscale(rcv_win); - opt0 = KEEP_ALIVE(1) | + opt0 = (nocong ? NO_CONG(1) : 0) | + KEEP_ALIVE(1) | DELACK(1) | WND_SCALE(wscale) | MSS_IDX(mtu_idx) | @@ -475,6 +542,7 @@ static int send_connect(struct c4iw_ep *ep) ULP_MODE(ULP_MODE_TCPDDP) | RCV_BUFSIZ(rcv_win>>10); opt2 = RX_CHANNEL(0) | + CCTRL_ECN(enable_ecn) | RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); if (enable_tcp_timestamps) opt2 |= TSTAMPS_EN(1); @@ -493,8 +561,9 @@ static int send_connect(struct c4iw_ep *ep) req->local_ip = ep->com.local_addr.sin_addr.s_addr; req->peer_ip = ep->com.remote_addr.sin_addr.s_addr; req->opt0 = cpu_to_be64(opt0); - req->params = 0; + req->params = cpu_to_be32(select_ntuple(ep->com.dev, ep->dst, ep->l2t)); req->opt2 = cpu_to_be32(opt2); + set_bit(ACT_OPEN_REQ, &ep->com.history); return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } @@ -630,7 +699,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) memset(mpa, 0, sizeof(*mpa)); memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); mpa->flags = MPA_REJECT; - mpa->revision = mpa_rev; + mpa->revision = ep->mpa_attr.version; mpa->private_data_size = htons(plen); if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { @@ -771,6 +840,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) /* setup the hwtid for this connection */ ep->hwtid = tid; cxgb4_insert_tid(t, ep, tid); + insert_handle(dev, &dev->hwtid_idr, ep, ep->hwtid); ep->snd_seq = be32_to_cpu(req->snd_isn); ep->rcv_seq = be32_to_cpu(req->rcv_isn); @@ -778,7 +848,9 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) set_emss(ep, ntohs(req->tcp_opt)); /* dealloc the atid */ + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid); cxgb4_free_atid(t, atid); + set_bit(ACT_ESTAB, &ep->com.history); /* start MPA negotiation */ send_flowc(ep, NULL); @@ -803,7 +875,7 @@ static void close_complete_upcall(struct c4iw_ep *ep) ep->com.cm_id->event_handler(ep->com.cm_id, &event); ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; - ep->com.qp = NULL; + set_bit(CLOSE_UPCALL, &ep->com.history); } } @@ -812,6 +884,7 @@ static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); close_complete_upcall(ep); state_set(&ep->com, ABORTING); + set_bit(ABORT_CONN, &ep->com.history); return send_abort(ep, skb, gfp); } @@ -826,6 +899,7 @@ static void peer_close_upcall(struct c4iw_ep *ep) PDBG("peer close delivered ep %p cm_id %p tid %u\n", ep, ep->com.cm_id, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); + set_bit(DISCONN_UPCALL, &ep->com.history); } } @@ -843,7 +917,7 @@ static void peer_abort_upcall(struct c4iw_ep *ep) ep->com.cm_id->event_handler(ep->com.cm_id, &event); ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; - ep->com.qp = NULL; + set_bit(ABORT_UPCALL, &ep->com.history); } } @@ -876,12 +950,12 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid, status); + set_bit(CONN_RPL_UPCALL, &ep->com.history); ep->com.cm_id->event_handler(ep->com.cm_id, &event); if (status < 0) { ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; - ep->com.qp = NULL; } } @@ -916,6 +990,7 @@ static void connect_request_upcall(struct c4iw_ep *ep) ep->parent_ep->com.cm_id, &event); } + set_bit(CONNREQ_UPCALL, &ep->com.history); c4iw_put_ep(&ep->parent_ep->com); ep->parent_ep = NULL; } @@ -932,6 +1007,7 @@ static void established_upcall(struct c4iw_ep *ep) if (ep->com.cm_id) { PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); + set_bit(ESTAB_UPCALL, &ep->com.history); } } @@ -1224,11 +1300,13 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) if (mpa->revision > mpa_rev) { printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d," " Received = %d\n", __func__, mpa_rev, mpa->revision); + stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { + stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -1239,6 +1317,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) * Fail if there's too much private data. */ if (plen > MPA_MAX_PRIVATE_DATA) { + stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -1247,6 +1326,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) * If plen does not account for pkt size */ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { + stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -1317,36 +1397,40 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) unsigned int dlen = ntohs(hdr->len); unsigned int tid = GET_TID(hdr); struct tid_info *t = dev->rdev.lldi.tids; + __u8 status = hdr->status; ep = lookup_tid(t, tid); PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen); skb_pull(skb, sizeof(*hdr)); skb_trim(skb, dlen); - ep->rcv_seq += dlen; - BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen)); - /* update RX credits */ update_rx_credits(ep, dlen); switch (state_read(&ep->com)) { case MPA_REQ_SENT: + ep->rcv_seq += dlen; process_mpa_reply(ep, skb); break; case MPA_REQ_WAIT: + ep->rcv_seq += dlen; process_mpa_request(ep, skb); break; - case MPA_REP_SENT: + case FPDU_MODE: { + struct c4iw_qp_attributes attrs; + BUG_ON(!ep->com.qp); + if (status) + pr_err("%s Unexpected streaming data." \ + " qpid %u ep %p state %d tid %u status %d\n", + __func__, ep->com.qp->wq.sq.qid, ep, + state_read(&ep->com), ep->hwtid, status); + attrs.next_state = C4IW_QP_STATE_ERROR; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + c4iw_ep_disconnect(ep, 1, GFP_KERNEL); break; + } default: - printk(KERN_ERR MOD "%s Unexpected streaming data." - " ep %p state %d tid %u\n", - __func__, ep, state_read(&ep->com), ep->hwtid); - - /* - * The ep will timeout and inform the ULP of the failure. - * See ep_timeout(). - */ break; } return 0; @@ -1369,6 +1453,7 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) mutex_lock(&ep->com.mutex); switch (ep->com.state) { case ABORTING: + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); __state_set(&ep->com, DEAD); release = 1; break; @@ -1384,6 +1469,63 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } +static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) +{ + struct sk_buff *skb; + struct fw_ofld_connection_wr *req; + unsigned int mtu_idx; + int wscale; + + skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); + req = (struct fw_ofld_connection_wr *)__skb_put(skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); + req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR)); + req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16))); + req->le.filter = cpu_to_be32(select_ntuple(ep->com.dev, ep->dst, + ep->l2t)); + req->le.lport = ep->com.local_addr.sin_port; + req->le.pport = ep->com.remote_addr.sin_port; + req->le.u.ipv4.lip = ep->com.local_addr.sin_addr.s_addr; + req->le.u.ipv4.pip = ep->com.remote_addr.sin_addr.s_addr; + req->tcb.t_state_to_astid = + htonl(V_FW_OFLD_CONNECTION_WR_T_STATE(TCP_SYN_SENT) | + V_FW_OFLD_CONNECTION_WR_ASTID(atid)); + req->tcb.cplrxdataack_cplpassacceptrpl = + htons(F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK); + req->tcb.tx_max = (__force __be32) jiffies; + req->tcb.rcv_adv = htons(1); + cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); + wscale = compute_wscale(rcv_win); + req->tcb.opt0 = (__force __be64) (TCAM_BYPASS(1) | + (nocong ? NO_CONG(1) : 0) | + KEEP_ALIVE(1) | + DELACK(1) | + WND_SCALE(wscale) | + MSS_IDX(mtu_idx) | + L2T_IDX(ep->l2t->idx) | + TX_CHAN(ep->tx_chan) | + SMAC_SEL(ep->smac_idx) | + DSCP(ep->tos) | + ULP_MODE(ULP_MODE_TCPDDP) | + RCV_BUFSIZ(rcv_win >> 10)); + req->tcb.opt2 = (__force __be32) (PACE(1) | + TX_QUEUE(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) | + RX_CHANNEL(0) | + CCTRL_ECN(enable_ecn) | + RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid)); + if (enable_tcp_timestamps) + req->tcb.opt2 |= (__force __be32) TSTAMPS_EN(1); + if (enable_tcp_sack) + req->tcb.opt2 |= (__force __be32) SACK_EN(1); + if (wscale && enable_tcp_window_scaling) + req->tcb.opt2 |= (__force __be32) WND_SCALE_EN(1); + req->tcb.opt0 = cpu_to_be64((__force u64) req->tcb.opt0); + req->tcb.opt2 = cpu_to_be32((__force u32) req->tcb.opt2); + set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx); + set_bit(ACT_OFLD_CONN, &ep->com.history); + c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +} + /* * Return whether a failed active open has allocated a TID */ @@ -1393,6 +1535,111 @@ static inline int act_open_has_tid(int status) status != CPL_ERR_ARP_MISS; } +#define ACT_OPEN_RETRY_COUNT 2 + +static int c4iw_reconnect(struct c4iw_ep *ep) +{ + int err = 0; + struct rtable *rt; + struct port_info *pi; + struct net_device *pdev; + int step; + struct neighbour *neigh; + + PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id); + init_timer(&ep->timer); + + /* + * Allocate an active TID to initiate a TCP connection. + */ + ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep); + if (ep->atid == -1) { + pr_err("%s - cannot alloc atid.\n", __func__); + err = -ENOMEM; + goto fail2; + } + insert_handle(ep->com.dev, &ep->com.dev->atid_idr, ep, ep->atid); + + /* find a route */ + rt = find_route(ep->com.dev, + ep->com.cm_id->local_addr.sin_addr.s_addr, + ep->com.cm_id->remote_addr.sin_addr.s_addr, + ep->com.cm_id->local_addr.sin_port, + ep->com.cm_id->remote_addr.sin_port, 0); + if (!rt) { + pr_err("%s - cannot find route.\n", __func__); + err = -EHOSTUNREACH; + goto fail3; + } + ep->dst = &rt->dst; + + neigh = dst_neigh_lookup(ep->dst, + &ep->com.cm_id->remote_addr.sin_addr.s_addr); + /* get a l2t entry */ + if (neigh->dev->flags & IFF_LOOPBACK) { + PDBG("%s LOOPBACK\n", __func__); + pdev = ip_dev_find(&init_net, + ep->com.cm_id->remote_addr.sin_addr.s_addr); + ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t, + neigh, pdev, 0); + pi = (struct port_info *)netdev_priv(pdev); + ep->mtu = pdev->mtu; + ep->tx_chan = cxgb4_port_chan(pdev); + ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; + dev_put(pdev); + } else { + ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t, + neigh, neigh->dev, 0); + pi = (struct port_info *)netdev_priv(neigh->dev); + ep->mtu = dst_mtu(ep->dst); + ep->tx_chan = cxgb4_port_chan(neigh->dev); + ep->smac_idx = (cxgb4_port_viid(neigh->dev) & + 0x7F) << 1; + } + + step = ep->com.dev->rdev.lldi.ntxq / ep->com.dev->rdev.lldi.nchan; + ep->txq_idx = pi->port_id * step; + ep->ctrlq_idx = pi->port_id; + step = ep->com.dev->rdev.lldi.nrxq / ep->com.dev->rdev.lldi.nchan; + ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[pi->port_id * step]; + + if (!ep->l2t) { + pr_err("%s - cannot alloc l2e.\n", __func__); + err = -ENOMEM; + goto fail4; + } + + PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", + __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, + ep->l2t->idx); + + state_set(&ep->com, CONNECTING); + ep->tos = 0; + + /* send connect request to rnic */ + err = send_connect(ep); + if (!err) + goto out; + + cxgb4_l2t_release(ep->l2t); +fail4: + dst_release(ep->dst); +fail3: + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); + cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); +fail2: + /* + * remember to send notification to upper layer. + * We are in here so the upper layer is not aware that this is + * re-connect attempt and so, upper layer is still waiting for + * response of 1st connect request. + */ + connect_reply_upcall(ep, -ECONNRESET); + c4iw_put_ep(&ep->com); +out: + return err; +} + static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) { struct c4iw_ep *ep; @@ -1413,6 +1660,8 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } + set_bit(ACT_OPEN_RPL, &ep->com.history); + /* * Log interesting failures. */ @@ -1420,6 +1669,29 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) case CPL_ERR_CONN_RESET: case CPL_ERR_CONN_TIMEDOUT: break; + case CPL_ERR_TCAM_FULL: + if (dev->rdev.lldi.enable_fw_ofld_conn) { + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.tcam_full++; + mutex_unlock(&dev->rdev.stats.lock); + send_fw_act_open_req(ep, + GET_TID_TID(GET_AOPEN_ATID( + ntohl(rpl->atid_status)))); + return 0; + } + break; + case CPL_ERR_CONN_EXIST: + if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { + set_bit(ACT_RETRY_INUSE, &ep->com.history); + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, + atid); + cxgb4_free_atid(t, atid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + c4iw_reconnect(ep); + return 0; + } + break; default: printk(KERN_INFO MOD "Active open failure - " "atid %u status %u errno %d %pI4:%u->%pI4:%u\n", @@ -1437,6 +1709,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) if (status && act_open_has_tid(status)) cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl)); + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid); cxgb4_free_atid(t, atid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); @@ -1453,13 +1726,14 @@ static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) struct c4iw_listen_ep *ep = lookup_stid(t, stid); if (!ep) { - printk(KERN_ERR MOD "stid %d lookup failure!\n", stid); - return 0; + PDBG("%s stid %d lookup failure!\n", __func__, stid); + goto out; } PDBG("%s ep %p status %d error %d\n", __func__, ep, rpl->status, status2errno(rpl->status)); c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status)); +out: return 0; } @@ -1511,14 +1785,15 @@ static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb, skb_get(skb); cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); wscale = compute_wscale(rcv_win); - opt0 = KEEP_ALIVE(1) | + opt0 = (nocong ? NO_CONG(1) : 0) | + KEEP_ALIVE(1) | DELACK(1) | WND_SCALE(wscale) | MSS_IDX(mtu_idx) | L2T_IDX(ep->l2t->idx) | TX_CHAN(ep->tx_chan) | SMAC_SEL(ep->smac_idx) | - DSCP(ep->tos) | + DSCP(ep->tos >> 2) | ULP_MODE(ULP_MODE_TCPDDP) | RCV_BUFSIZ(rcv_win>>10); opt2 = RX_CHANNEL(0) | @@ -1530,6 +1805,15 @@ static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb, opt2 |= SACK_EN(1); if (wscale && enable_tcp_window_scaling) opt2 |= WND_SCALE_EN(1); + if (enable_ecn) { + const struct tcphdr *tcph; + u32 hlen = ntohl(req->hdr_len); + + tcph = (const void *)(req + 1) + G_ETH_HDR_LEN(hlen) + + G_IP_HDR_LEN(hlen); + if (tcph->ece && tcph->cwr) + opt2 |= CCTRL_ECN(1); + } rpl = cplhdr(skb); INIT_TP_WR(rpl, ep->hwtid); @@ -1646,22 +1930,30 @@ out: static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) { - struct c4iw_ep *child_ep, *parent_ep; + struct c4iw_ep *child_ep = NULL, *parent_ep; struct cpl_pass_accept_req *req = cplhdr(skb); unsigned int stid = GET_POPEN_TID(ntohl(req->tos_stid)); struct tid_info *t = dev->rdev.lldi.tids; unsigned int hwtid = GET_TID(req); struct dst_entry *dst; struct rtable *rt; - __be32 local_ip, peer_ip; + __be32 local_ip, peer_ip = 0; __be16 local_port, peer_port; int err; + u16 peer_mss = ntohs(req->tcpopt.mss); parent_ep = lookup_stid(t, stid); - PDBG("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid); - + if (!parent_ep) { + PDBG("%s connect request on invalid stid %d\n", __func__, stid); + goto reject; + } get_4tuple(req, &local_ip, &peer_ip, &local_port, &peer_port); + PDBG("%s parent ep %p hwtid %u laddr 0x%x raddr 0x%x lport %d " \ + "rport %d peer_mss %d\n", __func__, parent_ep, hwtid, + ntohl(local_ip), ntohl(peer_ip), ntohs(local_port), + ntohs(peer_port), peer_mss); + if (state_read(&parent_ep->com) != LISTEN) { printk(KERN_ERR "%s - listening ep not in LISTEN\n", __func__); @@ -1695,6 +1987,9 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } + if (peer_mss && child_ep->mtu > (peer_mss + 40)) + child_ep->mtu = peer_mss + 40; + state_set(&child_ep->com, CONNECTING); child_ep->com.dev = dev; child_ep->com.cm_id = NULL; @@ -1715,7 +2010,9 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) init_timer(&child_ep->timer); cxgb4_insert_tid(t, child_ep, hwtid); + insert_handle(dev, &dev->hwtid_idr, child_ep, child_ep->hwtid); accept_cr(child_ep, peer_ip, skb, req); + set_bit(PASS_ACCEPT_REQ, &child_ep->com.history); goto out; reject: reject_cr(dev, hwtid, peer_ip, skb); @@ -1735,12 +2032,16 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) ep->snd_seq = be32_to_cpu(req->snd_isn); ep->rcv_seq = be32_to_cpu(req->rcv_isn); + PDBG("%s ep %p hwtid %u tcp_opt 0x%02x\n", __func__, ep, tid, + ntohs(req->tcp_opt)); + set_emss(ep, ntohs(req->tcp_opt)); dst_confirm(ep->dst); state_set(&ep->com, MPA_REQ_WAIT); start_ep_timer(ep); send_flowc(ep, skb); + set_bit(PASS_ESTAB, &ep->com.history); return 0; } @@ -1760,6 +2061,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); dst_confirm(ep->dst); + set_bit(PEER_CLOSE, &ep->com.history); mutex_lock(&ep->com.mutex); switch (ep->com.state) { case MPA_REQ_WAIT: @@ -1839,74 +2141,6 @@ static int is_neg_adv_abort(unsigned int status) status == CPL_ERR_PERSIST_NEG_ADVICE; } -static int c4iw_reconnect(struct c4iw_ep *ep) -{ - struct rtable *rt; - int err = 0; - - PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id); - init_timer(&ep->timer); - - /* - * Allocate an active TID to initiate a TCP connection. - */ - ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep); - if (ep->atid == -1) { - printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__); - err = -ENOMEM; - goto fail2; - } - - /* find a route */ - rt = find_route(ep->com.dev, - ep->com.cm_id->local_addr.sin_addr.s_addr, - ep->com.cm_id->remote_addr.sin_addr.s_addr, - ep->com.cm_id->local_addr.sin_port, - ep->com.cm_id->remote_addr.sin_port, 0); - if (!rt) { - printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); - err = -EHOSTUNREACH; - goto fail3; - } - ep->dst = &rt->dst; - - err = import_ep(ep, ep->com.cm_id->remote_addr.sin_addr.s_addr, - ep->dst, ep->com.dev, false); - if (err) { - printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); - goto fail4; - } - - PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", - __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, - ep->l2t->idx); - - state_set(&ep->com, CONNECTING); - ep->tos = 0; - - /* send connect request to rnic */ - err = send_connect(ep); - if (!err) - goto out; - - cxgb4_l2t_release(ep->l2t); -fail4: - dst_release(ep->dst); -fail3: - cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); -fail2: - /* - * remember to send notification to upper layer. - * We are in here so the upper layer is not aware that this is - * re-connect attempt and so, upper layer is still waiting for - * response of 1st connect request. - */ - connect_reply_upcall(ep, -ECONNRESET); - c4iw_put_ep(&ep->com); -out: - return err; -} - static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_abort_req_rss *req = cplhdr(skb); @@ -1927,6 +2161,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) } PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, ep->com.state); + set_bit(PEER_ABORT, &ep->com.history); /* * Wake up any threads in rdma_init() or rdma_fini(). @@ -1945,7 +2180,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) break; case MPA_REQ_SENT: stop_ep_timer(ep); - if (mpa_rev == 2 && ep->tried_with_mpa_v1) + if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1)) connect_reply_upcall(ep, -ECONNRESET); else { /* @@ -2017,9 +2252,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) out: if (release) release_ep_resources(ep); - - /* retry with mpa-v1 */ - if (ep && ep->retry_with_mpa_v1) { + else if (ep->retry_with_mpa_v1) { + remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid); cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); @@ -2141,6 +2375,7 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) c4iw_put_ep(&ep->com); return -ECONNRESET; } + set_bit(ULP_REJECT, &ep->com.history); BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); if (mpa_rev == 0) abort_connection(ep, NULL, GFP_KERNEL); @@ -2170,6 +2405,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); BUG_ON(!qp); + set_bit(ULP_ACCEPT, &ep->com.history); if ((conn_param->ord > c4iw_max_read_depth) || (conn_param->ird > c4iw_max_read_depth)) { abort_connection(ep, NULL, GFP_KERNEL); @@ -2210,6 +2446,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_id->add_ref(cm_id); ep->com.cm_id = cm_id; ep->com.qp = qp; + ref_qp(ep); /* bind QP to EP and move to RTS */ attrs.mpa_attr = ep->mpa_attr; @@ -2240,7 +2477,6 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) return 0; err1: ep->com.cm_id = NULL; - ep->com.qp = NULL; cm_id->rem_ref(cm_id); err: c4iw_put_ep(&ep->com); @@ -2281,6 +2517,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ep->com.cm_id = cm_id; ep->com.qp = get_qhp(dev, conn_param->qpn); BUG_ON(!ep->com.qp); + ref_qp(ep); PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn, ep->com.qp, cm_id); @@ -2293,6 +2530,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) err = -ENOMEM; goto fail2; } + insert_handle(dev, &dev->atid_idr, ep, ep->atid); PDBG("%s saddr 0x%x sport 0x%x raddr 0x%x rport 0x%x\n", __func__, ntohl(cm_id->local_addr.sin_addr.s_addr), @@ -2338,6 +2576,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) fail4: dst_release(ep->dst); fail3: + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); fail2: cm_id->rem_ref(cm_id); @@ -2352,7 +2591,6 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); struct c4iw_listen_ep *ep; - might_sleep(); ep = alloc_ep(sizeof(*ep), GFP_KERNEL); @@ -2371,30 +2609,54 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) /* * Allocate a server TID. */ - ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, PF_INET, ep); + if (dev->rdev.lldi.enable_fw_ofld_conn) + ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids, PF_INET, ep); + else + ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, PF_INET, ep); + if (ep->stid == -1) { printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__); err = -ENOMEM; goto fail2; } - + insert_handle(dev, &dev->stid_idr, ep, ep->stid); state_set(&ep->com, LISTEN); - c4iw_init_wr_wait(&ep->com.wr_wait); - err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0], ep->stid, - ep->com.local_addr.sin_addr.s_addr, - ep->com.local_addr.sin_port, - ep->com.dev->rdev.lldi.rxq_ids[0]); - if (err) - goto fail3; - - /* wait for pass_open_rpl */ - err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0, - __func__); + if (dev->rdev.lldi.enable_fw_ofld_conn) { + do { + err = cxgb4_create_server_filter( + ep->com.dev->rdev.lldi.ports[0], ep->stid, + ep->com.local_addr.sin_addr.s_addr, + ep->com.local_addr.sin_port, + 0, + ep->com.dev->rdev.lldi.rxq_ids[0], + 0, + 0); + if (err == -EBUSY) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(100)); + } + } while (err == -EBUSY); + } else { + c4iw_init_wr_wait(&ep->com.wr_wait); + err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0], + ep->stid, ep->com.local_addr.sin_addr.s_addr, + ep->com.local_addr.sin_port, + 0, + ep->com.dev->rdev.lldi.rxq_ids[0]); + if (!err) + err = c4iw_wait_for_reply(&ep->com.dev->rdev, + &ep->com.wr_wait, + 0, 0, __func__); + } if (!err) { cm_id->provider_data = ep; goto out; } -fail3: + pr_err("%s cxgb4_create_server/filter failed err %d " \ + "stid %d laddr %08x lport %d\n", \ + __func__, err, ep->stid, + ntohl(ep->com.local_addr.sin_addr.s_addr), + ntohs(ep->com.local_addr.sin_port)); cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET); fail2: cm_id->rem_ref(cm_id); @@ -2413,12 +2675,19 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id) might_sleep(); state_set(&ep->com, DEAD); - c4iw_init_wr_wait(&ep->com.wr_wait); - err = listen_stop(ep); - if (err) - goto done; - err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0, - __func__); + if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn) { + err = cxgb4_remove_server_filter( + ep->com.dev->rdev.lldi.ports[0], ep->stid, + ep->com.dev->rdev.lldi.rxq_ids[0], 0); + } else { + c4iw_init_wr_wait(&ep->com.wr_wait); + err = listen_stop(ep); + if (err) + goto done; + err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, + 0, 0, __func__); + } + remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid); cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET); done: cm_id->rem_ref(cm_id); @@ -2482,10 +2751,13 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) if (close) { if (abrupt) { + set_bit(EP_DISC_ABORT, &ep->com.history); close_complete_upcall(ep); ret = send_abort(ep, NULL, gfp); - } else + } else { + set_bit(EP_DISC_CLOSE, &ep->com.history); ret = send_halfclose(ep, gfp); + } if (ret) fatal = 1; } @@ -2495,10 +2767,331 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) return ret; } -static int async_event(struct c4iw_dev *dev, struct sk_buff *skb) +static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, + struct cpl_fw6_msg_ofld_connection_wr_rpl *req) +{ + struct c4iw_ep *ep; + int atid = be32_to_cpu(req->tid); + + ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids, + (__force u32) req->tid); + if (!ep) + return; + + switch (req->retval) { + case FW_ENOMEM: + set_bit(ACT_RETRY_NOMEM, &ep->com.history); + if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { + send_fw_act_open_req(ep, atid); + return; + } + case FW_EADDRINUSE: + set_bit(ACT_RETRY_INUSE, &ep->com.history); + if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { + send_fw_act_open_req(ep, atid); + return; + } + break; + default: + pr_info("%s unexpected ofld conn wr retval %d\n", + __func__, req->retval); + break; + } + pr_err("active ofld_connect_wr failure %d atid %d\n", + req->retval, atid); + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.act_ofld_conn_fails++; + mutex_unlock(&dev->rdev.stats.lock); + connect_reply_upcall(ep, status2errno(req->retval)); + state_set(&ep->com, DEAD); + remove_handle(dev, &dev->atid_idr, atid); + cxgb4_free_atid(dev->rdev.lldi.tids, atid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + c4iw_put_ep(&ep->com); +} + +static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, + struct cpl_fw6_msg_ofld_connection_wr_rpl *req) +{ + struct sk_buff *rpl_skb; + struct cpl_pass_accept_req *cpl; + int ret; + + rpl_skb = (struct sk_buff *)(unsigned long)req->cookie; + BUG_ON(!rpl_skb); + if (req->retval) { + PDBG("%s passive open failure %d\n", __func__, req->retval); + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.pas_ofld_conn_fails++; + mutex_unlock(&dev->rdev.stats.lock); + kfree_skb(rpl_skb); + } else { + cpl = (struct cpl_pass_accept_req *)cplhdr(rpl_skb); + OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, + (__force u32) htonl( + (__force u32) req->tid))); + ret = pass_accept_req(dev, rpl_skb); + if (!ret) + kfree_skb(rpl_skb); + } + return; +} + +static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_fw6_msg *rpl = cplhdr(skb); - c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]); + struct cpl_fw6_msg_ofld_connection_wr_rpl *req; + + switch (rpl->type) { + case FW6_TYPE_CQE: + c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]); + break; + case FW6_TYPE_OFLD_CONNECTION_WR_RPL: + req = (struct cpl_fw6_msg_ofld_connection_wr_rpl *)rpl->data; + switch (req->t_state) { + case TCP_SYN_SENT: + active_ofld_conn_reply(dev, skb, req); + break; + case TCP_SYN_RECV: + passive_ofld_conn_reply(dev, skb, req); + break; + default: + pr_err("%s unexpected ofld conn wr state %d\n", + __func__, req->t_state); + break; + } + break; + } + return 0; +} + +static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) +{ + u32 l2info; + u16 vlantag, len, hdr_len; + u8 intf; + struct cpl_rx_pkt *cpl = cplhdr(skb); + struct cpl_pass_accept_req *req; + struct tcp_options_received tmp_opt; + + /* Store values from cpl_rx_pkt in temporary location. */ + vlantag = (__force u16) cpl->vlan; + len = (__force u16) cpl->len; + l2info = (__force u32) cpl->l2info; + hdr_len = (__force u16) cpl->hdr_len; + intf = cpl->iff; + + __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header)); + + /* + * We need to parse the TCP options from SYN packet. + * to generate cpl_pass_accept_req. + */ + memset(&tmp_opt, 0, sizeof(tmp_opt)); + tcp_clear_options(&tmp_opt); + tcp_parse_options(skb, &tmp_opt, NULL, 0, NULL); + + req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); + req->l2info = cpu_to_be16(V_SYN_INTF(intf) | + V_SYN_MAC_IDX(G_RX_MACIDX( + (__force int) htonl(l2info))) | + F_SYN_XACT_MATCH); + req->hdr_len = cpu_to_be32(V_SYN_RX_CHAN(G_RX_CHAN( + (__force int) htonl(l2info))) | + V_TCP_HDR_LEN(G_RX_TCPHDR_LEN( + (__force int) htons(hdr_len))) | + V_IP_HDR_LEN(G_RX_IPHDR_LEN( + (__force int) htons(hdr_len))) | + V_ETH_HDR_LEN(G_RX_ETHHDR_LEN( + (__force int) htonl(l2info)))); + req->vlan = (__force __be16) vlantag; + req->len = (__force __be16) len; + req->tos_stid = cpu_to_be32(PASS_OPEN_TID(stid) | + PASS_OPEN_TOS(tos)); + req->tcpopt.mss = htons(tmp_opt.mss_clamp); + if (tmp_opt.wscale_ok) + req->tcpopt.wsf = tmp_opt.snd_wscale; + req->tcpopt.tstamp = tmp_opt.saw_tstamp; + if (tmp_opt.sack_ok) + req->tcpopt.sack = 1; + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, 0)); + return; +} + +static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, + __be32 laddr, __be16 lport, + __be32 raddr, __be16 rport, + u32 rcv_isn, u32 filter, u16 window, + u32 rss_qid, u8 port_id) +{ + struct sk_buff *req_skb; + struct fw_ofld_connection_wr *req; + struct cpl_pass_accept_req *cpl = cplhdr(skb); + + req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL); + req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); + req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL(1)); + req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16))); + req->le.version_cpl = htonl(F_FW_OFLD_CONNECTION_WR_CPL); + req->le.filter = (__force __be32) filter; + req->le.lport = lport; + req->le.pport = rport; + req->le.u.ipv4.lip = laddr; + req->le.u.ipv4.pip = raddr; + req->tcb.rcv_nxt = htonl(rcv_isn + 1); + req->tcb.rcv_adv = htons(window); + req->tcb.t_state_to_astid = + htonl(V_FW_OFLD_CONNECTION_WR_T_STATE(TCP_SYN_RECV) | + V_FW_OFLD_CONNECTION_WR_RCV_SCALE(cpl->tcpopt.wsf) | + V_FW_OFLD_CONNECTION_WR_ASTID( + GET_PASS_OPEN_TID(ntohl(cpl->tos_stid)))); + + /* + * We store the qid in opt2 which will be used by the firmware + * to send us the wr response. + */ + req->tcb.opt2 = htonl(V_RSS_QUEUE(rss_qid)); + + /* + * We initialize the MSS index in TCB to 0xF. + * So that when driver sends cpl_pass_accept_rpl + * TCB picks up the correct value. If this was 0 + * TP will ignore any value > 0 for MSS index. + */ + req->tcb.opt0 = cpu_to_be64(V_MSS_IDX(0xF)); + req->cookie = (unsigned long)skb; + + set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id); + cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); +} + +/* + * Handler for CPL_RX_PKT message. Need to handle cpl_rx_pkt + * messages when a filter is being used instead of server to + * redirect a syn packet. When packets hit filter they are redirected + * to the offload queue and driver tries to establish the connection + * using firmware work request. + */ +static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) +{ + int stid; + unsigned int filter; + struct ethhdr *eh = NULL; + struct vlan_ethhdr *vlan_eh = NULL; + struct iphdr *iph; + struct tcphdr *tcph; + struct rss_header *rss = (void *)skb->data; + struct cpl_rx_pkt *cpl = (void *)skb->data; + struct cpl_pass_accept_req *req = (void *)(rss + 1); + struct l2t_entry *e; + struct dst_entry *dst; + struct rtable *rt; + struct c4iw_ep *lep; + u16 window; + struct port_info *pi; + struct net_device *pdev; + u16 rss_qid; + int step; + u32 tx_chan; + struct neighbour *neigh; + + /* Drop all non-SYN packets */ + if (!(cpl->l2info & cpu_to_be32(F_RXF_SYN))) + goto reject; + + /* + * Drop all packets which did not hit the filter. + * Unlikely to happen. + */ + if (!(rss->filter_hit && rss->filter_tid)) + goto reject; + + /* + * Calculate the server tid from filter hit index from cpl_rx_pkt. + */ + stid = (__force int) cpu_to_be32((__force u32) rss->hash_val) + - dev->rdev.lldi.tids->sftid_base + + dev->rdev.lldi.tids->nstids; + + lep = (struct c4iw_ep *)lookup_stid(dev->rdev.lldi.tids, stid); + if (!lep) { + PDBG("%s connect request on invalid stid %d\n", __func__, stid); + goto reject; + } + + if (G_RX_ETHHDR_LEN(ntohl(cpl->l2info)) == ETH_HLEN) { + eh = (struct ethhdr *)(req + 1); + iph = (struct iphdr *)(eh + 1); + } else { + vlan_eh = (struct vlan_ethhdr *)(req + 1); + iph = (struct iphdr *)(vlan_eh + 1); + skb->vlan_tci = ntohs(cpl->vlan); + } + + if (iph->version != 0x4) + goto reject; + + tcph = (struct tcphdr *)(iph + 1); + skb_set_network_header(skb, (void *)iph - (void *)rss); + skb_set_transport_header(skb, (void *)tcph - (void *)rss); + skb_get(skb); + + PDBG("%s lip 0x%x lport %u pip 0x%x pport %u tos %d\n", __func__, + ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr), + ntohs(tcph->source), iph->tos); + + rt = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source, + iph->tos); + if (!rt) { + pr_err("%s - failed to find dst entry!\n", + __func__); + goto reject; + } + dst = &rt->dst; + neigh = dst_neigh_lookup_skb(dst, skb); + + if (neigh->dev->flags & IFF_LOOPBACK) { + pdev = ip_dev_find(&init_net, iph->daddr); + e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, + pdev, 0); + pi = (struct port_info *)netdev_priv(pdev); + tx_chan = cxgb4_port_chan(pdev); + dev_put(pdev); + } else { + e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, + neigh->dev, 0); + pi = (struct port_info *)netdev_priv(neigh->dev); + tx_chan = cxgb4_port_chan(neigh->dev); + } + if (!e) { + pr_err("%s - failed to allocate l2t entry!\n", + __func__); + goto free_dst; + } + + step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; + rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step]; + window = (__force u16) htons((__force u16)tcph->window); + + /* Calcuate filter portion for LE region. */ + filter = (__force unsigned int) cpu_to_be32(select_ntuple(dev, dst, e)); + + /* + * Synthesize the cpl_pass_accept_req. We have everything except the + * TID. Once firmware sends a reply with TID we update the TID field + * in cpl and pass it through the regular cpl_pass_accept_req path. + */ + build_cpl_pass_accept_req(skb, stid, iph->tos); + send_fw_pass_open_req(dev, skb, iph->daddr, tcph->dest, iph->saddr, + tcph->source, ntohl(tcph->seq), filter, window, + rss_qid, pi->port_id); + cxgb4_l2t_release(e); +free_dst: + dst_release(dst); +reject: return 0; } @@ -2521,7 +3114,8 @@ static c4iw_handler_func work_handlers[NUM_CPL_CMDS] = { [CPL_CLOSE_CON_RPL] = close_con_rpl, [CPL_RDMA_TERMINATE] = terminate, [CPL_FW4_ACK] = fw4_ack, - [CPL_FW6_MSG] = async_event + [CPL_FW6_MSG] = deferred_fw6_msg, + [CPL_RX_PKT] = rx_pkt }; static void process_timeout(struct c4iw_ep *ep) @@ -2532,6 +3126,7 @@ static void process_timeout(struct c4iw_ep *ep) mutex_lock(&ep->com.mutex); PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, ep->com.state); + set_bit(TIMEDOUT, &ep->com.history); switch (ep->com.state) { case MPA_REQ_SENT: __state_set(&ep->com, ABORTING); @@ -2551,9 +3146,8 @@ static void process_timeout(struct c4iw_ep *ep) __state_set(&ep->com, ABORTING); break; default: - printk(KERN_ERR "%s unexpected state ep %p tid %u state %u\n", + WARN(1, "%s unexpected state ep %p tid %u state %u\n", __func__, ep, ep->hwtid, ep->com.state); - WARN_ON(1); abort = 0; } mutex_unlock(&ep->com.mutex); @@ -2606,11 +3200,16 @@ static DECLARE_WORK(skb_work, process_work); static void ep_timeout(unsigned long arg) { struct c4iw_ep *ep = (struct c4iw_ep *)arg; + int kickit = 0; spin_lock(&timeout_lock); - list_add_tail(&ep->entry, &timeout_list); + if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { + list_add_tail(&ep->entry, &timeout_list); + kickit = 1; + } spin_unlock(&timeout_lock); - queue_work(workq, &skb_work); + if (kickit) + queue_work(workq, &skb_work); } /* @@ -2653,7 +3252,7 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s type %u\n", __func__, rpl->type); switch (rpl->type) { - case 1: + case FW6_TYPE_WR_RPL: ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff); wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1]; PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret); @@ -2661,7 +3260,8 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) c4iw_wake_up(wr_waitp, ret ? -ret : 0); kfree_skb(skb); break; - case 2: + case FW6_TYPE_CQE: + case FW6_TYPE_OFLD_CONNECTION_WR_RPL: sched(dev, skb); break; default: @@ -2698,8 +3298,14 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) /* * Wake up any threads in rdma_init() or rdma_fini(). + * However, if we are on MPAv2 and want to retry with MPAv1 + * then, don't wake up yet. */ - c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + if (mpa_rev == 2 && !ep->tried_with_mpa_v1) { + if (ep->com.state != MPA_REQ_SENT) + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + } else + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); sched(dev, skb); return 0; } @@ -2724,7 +3330,8 @@ c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = { [CPL_RDMA_TERMINATE] = sched, [CPL_FW4_ACK] = sched, [CPL_SET_TCB_RPL] = set_tcb_rpl, - [CPL_FW6_MSG] = fw6_msg + [CPL_FW6_MSG] = fw6_msg, + [CPL_RX_PKT] = sched }; int __init c4iw_cm_init(void) |