diff options
Diffstat (limited to 'fs/cifs/smbdirect.c')
-rw-r--r-- | fs/cifs/smbdirect.c | 711 |
1 files changed, 290 insertions, 421 deletions
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index 5b1b97e9e0c9..90789aaa6567 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -90,7 +90,7 @@ int smbd_max_send_size = 1364; int smbd_max_fragmented_recv_size = 1024 * 1024; /* The maximum single-message size which can be received */ -int smbd_max_receive_size = 8192; +int smbd_max_receive_size = 1364; /* The timeout to initiate send of a keepalive message on idle */ int smbd_keep_alive_interval = 120; @@ -99,7 +99,7 @@ int smbd_keep_alive_interval = 120; * User configurable initial values for RDMA transport * The actual values used may be lower and are limited to hardware capabilities */ -/* Default maximum number of SGEs in a RDMA write/read */ +/* Default maximum number of pages in a single RDMA write/read */ int smbd_max_frmr_depth = 2048; /* If payload is less than this byte, use RDMA send/recv not read/write */ @@ -246,6 +246,7 @@ smbd_qp_async_error_upcall(struct ib_event *event, void *context) case IB_EVENT_CQ_ERR: case IB_EVENT_QP_FATAL: smbd_disconnect_rdma_connection(info); + break; default: break; @@ -269,7 +270,7 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) struct smbd_request *request = container_of(wc->wr_cqe, struct smbd_request, cqe); - log_rdma_send(INFO, "smbd_request %p completed wc->status=%d\n", + log_rdma_send(INFO, "smbd_request 0x%p completed wc->status=%d\n", request, wc->status); if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { @@ -284,28 +285,22 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) request->sge[i].length, DMA_TO_DEVICE); - if (request->has_payload) { - if (atomic_dec_and_test(&request->info->send_payload_pending)) - wake_up(&request->info->wait_send_payload_pending); - } else { - if (atomic_dec_and_test(&request->info->send_pending)) - wake_up(&request->info->wait_send_pending); - } + if (atomic_dec_and_test(&request->info->send_pending)) + wake_up(&request->info->wait_send_pending); + + wake_up(&request->info->wait_post_send); mempool_free(request, request->info->request_mempool); } static void dump_smbd_negotiate_resp(struct smbd_negotiate_resp *resp) { - log_rdma_event(INFO, "resp message min_version %u max_version %u " - "negotiated_version %u credits_requested %u " - "credits_granted %u status %u max_readwrite_size %u " - "preferred_send_size %u max_receive_size %u " - "max_fragmented_size %u\n", - resp->min_version, resp->max_version, resp->negotiated_version, - resp->credits_requested, resp->credits_granted, resp->status, - resp->max_readwrite_size, resp->preferred_send_size, - resp->max_receive_size, resp->max_fragmented_size); + log_rdma_event(INFO, "resp message min_version %u max_version %u negotiated_version %u credits_requested %u credits_granted %u status %u max_readwrite_size %u preferred_send_size %u max_receive_size %u max_fragmented_size %u\n", + resp->min_version, resp->max_version, + resp->negotiated_version, resp->credits_requested, + resp->credits_granted, resp->status, + resp->max_readwrite_size, resp->preferred_send_size, + resp->max_receive_size, resp->max_fragmented_size); } /* @@ -383,27 +378,6 @@ static bool process_negotiation_response( return true; } -/* - * Check and schedule to send an immediate packet - * This is used to extend credtis to remote peer to keep the transport busy - */ -static void check_and_send_immediate(struct smbd_connection *info) -{ - if (info->transport_status != SMBD_CONNECTED) - return; - - info->send_immediate = true; - - /* - * Promptly send a packet if our peer is running low on receive - * credits - */ - if (atomic_read(&info->receive_credits) < - info->receive_credit_target - 1) - queue_delayed_work( - info->workqueue, &info->send_immediate_work, 0); -} - static void smbd_post_send_credits(struct work_struct *work) { int ret = 0; @@ -453,29 +427,16 @@ static void smbd_post_send_credits(struct work_struct *work) info->new_credits_offered += ret; spin_unlock(&info->lock_new_credits_offered); - atomic_add(ret, &info->receive_credits); - - /* Check if we can post new receive and grant credits to peer */ - check_and_send_immediate(info); -} - -static void smbd_recv_done_work(struct work_struct *work) -{ - struct smbd_connection *info = - container_of(work, struct smbd_connection, recv_done_work); - - /* - * We may have new send credits granted from remote peer - * If any sender is blcoked on lack of credets, unblock it - */ - if (atomic_read(&info->send_credits)) - wake_up_interruptible(&info->wait_send_queue); - - /* - * Check if we need to send something to remote peer to - * grant more credits or respond to KEEP_ALIVE packet - */ - check_and_send_immediate(info); + /* Promptly send an immediate packet as defined in [MS-SMBD] 3.1.1.1 */ + info->send_immediate = true; + if (atomic_read(&info->receive_credits) < + info->receive_credit_target - 1) { + if (info->keep_alive_requested == KEEP_ALIVE_PENDING || + info->send_immediate) { + log_keep_alive(INFO, "send an empty message\n"); + smbd_post_send_empty(info); + } + } } /* Called from softirq, when recv is done */ @@ -487,10 +448,9 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) struct smbd_connection *info = response->info; int data_length = 0; - log_rdma_recv(INFO, "response=%p type=%d wc status=%d wc opcode %d " - "byte_len=%d pkey_index=%x\n", - response, response->type, wc->status, wc->opcode, - wc->byte_len, wc->pkey_index); + log_rdma_recv(INFO, "response=0x%p type=%d wc status=%d wc opcode %d byte_len=%d pkey_index=%u\n", + response, response->type, wc->status, wc->opcode, + wc->byte_len, wc->pkey_index); if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { log_rdma_recv(INFO, "wc->status=%d opcode=%d\n", @@ -546,15 +506,21 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) atomic_dec(&info->receive_credits); info->receive_credit_target = le16_to_cpu(data_transfer->credits_requested); - atomic_add(le16_to_cpu(data_transfer->credits_granted), - &info->send_credits); + if (le16_to_cpu(data_transfer->credits_granted)) { + atomic_add(le16_to_cpu(data_transfer->credits_granted), + &info->send_credits); + /* + * We have new send credits granted from remote peer + * If any sender is waiting for credits, unblock it + */ + wake_up_interruptible(&info->wait_send_queue); + } - log_incoming(INFO, "data flags %d data_offset %d " - "data_length %d remaining_data_length %d\n", - le16_to_cpu(data_transfer->flags), - le32_to_cpu(data_transfer->data_offset), - le32_to_cpu(data_transfer->data_length), - le32_to_cpu(data_transfer->remaining_data_length)); + log_incoming(INFO, "data flags %d data_offset %d data_length %d remaining_data_length %d\n", + le16_to_cpu(data_transfer->flags), + le32_to_cpu(data_transfer->data_offset), + le32_to_cpu(data_transfer->data_length), + le32_to_cpu(data_transfer->remaining_data_length)); /* Send a KEEP_ALIVE response right away if requested */ info->keep_alive_requested = KEEP_ALIVE_NONE; @@ -563,7 +529,6 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) info->keep_alive_requested = KEEP_ALIVE_PENDING; } - queue_work(info->workqueue, &info->recv_done_work); return; default: @@ -607,8 +572,13 @@ static struct rdma_cm_id *smbd_create_id( log_rdma_event(ERR, "rdma_resolve_addr() failed %i\n", rc); goto out; } - wait_for_completion_interruptible_timeout( + rc = wait_for_completion_interruptible_timeout( &info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT)); + /* e.g. if interrupted returns -ERESTARTSYS */ + if (rc < 0) { + log_rdma_event(ERR, "rdma_resolve_addr timeout rc: %i\n", rc); + goto out; + } rc = info->ri_rc; if (rc) { log_rdma_event(ERR, "rdma_resolve_addr() completed %i\n", rc); @@ -621,8 +591,13 @@ static struct rdma_cm_id *smbd_create_id( log_rdma_event(ERR, "rdma_resolve_route() failed %i\n", rc); goto out; } - wait_for_completion_interruptible_timeout( + rc = wait_for_completion_interruptible_timeout( &info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT)); + /* e.g. if interrupted returns -ERESTARTSYS */ + if (rc < 0) { + log_rdma_event(ERR, "rdma_resolve_addr timeout rc: %i\n", rc); + goto out; + } rc = info->ri_rc; if (rc) { log_rdma_event(ERR, "rdma_resolve_route() completed %i\n", rc); @@ -663,14 +638,10 @@ static int smbd_ia_open( } if (!frwr_is_supported(&info->id->device->attrs)) { - log_rdma_event(ERR, - "Fast Registration Work Requests " - "(FRWR) is not supported\n"); - log_rdma_event(ERR, - "Device capability flags = %llx " - "max_fast_reg_page_list_len = %u\n", - info->id->device->attrs.device_cap_flags, - info->id->device->attrs.max_fast_reg_page_list_len); + log_rdma_event(ERR, "Fast Registration Work Requests (FRWR) is not supported\n"); + log_rdma_event(ERR, "Device capability flags = %llx max_fast_reg_page_list_len = %u\n", + info->id->device->attrs.device_cap_flags, + info->id->device->attrs.max_fast_reg_page_list_len); rc = -EPROTONOSUPPORT; goto out2; } @@ -678,7 +649,7 @@ static int smbd_ia_open( smbd_max_frmr_depth, info->id->device->attrs.max_fast_reg_page_list_len); info->mr_type = IB_MR_TYPE_MEM_REG; - if (info->id->device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) + if (info->id->device->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) info->mr_type = IB_MR_TYPE_SG_GAPS; info->pd = ib_alloc_pd(info->id->device, 0); @@ -752,11 +723,10 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info) send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; - log_rdma_send(INFO, "sge addr=%llx length=%x lkey=%x\n", + log_rdma_send(INFO, "sge addr=0x%llx length=%u lkey=0x%x\n", request->sge[0].addr, request->sge[0].length, request->sge[0].lkey); - request->has_payload = false; atomic_inc(&info->send_pending); rc = ib_post_send(info->id->qp, &send_wr, NULL); if (!rc) @@ -813,45 +783,96 @@ static int manage_keep_alive_before_sending(struct smbd_connection *info) return 0; } -/* - * Build and prepare the SMBD packet header - * This function waits for avaialbe send credits and build a SMBD packet - * header. The caller then optional append payload to the packet after - * the header - * intput values - * size: the size of the payload - * remaining_data_length: remaining data to send if this is part of a - * fragmented packet - * output values - * request_out: the request allocated from this function - * return values: 0 on success, otherwise actual error code returned - */ -static int smbd_create_header(struct smbd_connection *info, - int size, int remaining_data_length, - struct smbd_request **request_out) +/* Post the send request */ +static int smbd_post_send(struct smbd_connection *info, + struct smbd_request *request) { + struct ib_send_wr send_wr; + int rc, i; + + for (i = 0; i < request->num_sge; i++) { + log_rdma_send(INFO, + "rdma_request sge[%d] addr=0x%llx length=%u\n", + i, request->sge[i].addr, request->sge[i].length); + ib_dma_sync_single_for_device( + info->id->device, + request->sge[i].addr, + request->sge[i].length, + DMA_TO_DEVICE); + } + + request->cqe.done = send_done; + + send_wr.next = NULL; + send_wr.wr_cqe = &request->cqe; + send_wr.sg_list = request->sge; + send_wr.num_sge = request->num_sge; + send_wr.opcode = IB_WR_SEND; + send_wr.send_flags = IB_SEND_SIGNALED; + + rc = ib_post_send(info->id->qp, &send_wr, NULL); + if (rc) { + log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); + smbd_disconnect_rdma_connection(info); + rc = -EAGAIN; + } else + /* Reset timer for idle connection after packet is sent */ + mod_delayed_work(info->workqueue, &info->idle_timer_work, + info->keep_alive_interval*HZ); + + return rc; +} + +static int smbd_post_send_sgl(struct smbd_connection *info, + struct scatterlist *sgl, int data_length, int remaining_data_length) +{ + int num_sgs; + int i, rc; + int header_length; struct smbd_request *request; struct smbd_data_transfer *packet; - int header_length; - int rc; + int new_credits; + struct scatterlist *sg; +wait_credit: /* Wait for send credits. A SMBD packet needs one credit */ rc = wait_event_interruptible(info->wait_send_queue, atomic_read(&info->send_credits) > 0 || info->transport_status != SMBD_CONNECTED); if (rc) - return rc; + goto err_wait_credit; if (info->transport_status != SMBD_CONNECTED) { - log_outgoing(ERR, "disconnected not sending\n"); - return -EAGAIN; + log_outgoing(ERR, "disconnected not sending on wait_credit\n"); + rc = -EAGAIN; + goto err_wait_credit; + } + if (unlikely(atomic_dec_return(&info->send_credits) < 0)) { + atomic_inc(&info->send_credits); + goto wait_credit; + } + +wait_send_queue: + wait_event(info->wait_post_send, + atomic_read(&info->send_pending) < info->send_credit_target || + info->transport_status != SMBD_CONNECTED); + + if (info->transport_status != SMBD_CONNECTED) { + log_outgoing(ERR, "disconnected not sending on wait_send_queue\n"); + rc = -EAGAIN; + goto err_wait_send_queue; + } + + if (unlikely(atomic_inc_return(&info->send_pending) > + info->send_credit_target)) { + atomic_dec(&info->send_pending); + goto wait_send_queue; } - atomic_dec(&info->send_credits); request = mempool_alloc(info->request_mempool, GFP_KERNEL); if (!request) { rc = -ENOMEM; - goto err; + goto err_alloc; } request->info = info; @@ -859,8 +880,11 @@ static int smbd_create_header(struct smbd_connection *info, /* Fill in the packet header */ packet = smbd_request_payload(request); packet->credits_requested = cpu_to_le16(info->send_credit_target); - packet->credits_granted = - cpu_to_le16(manage_credits_prior_sending(info)); + + new_credits = manage_credits_prior_sending(info); + atomic_add(new_credits, &info->receive_credits); + packet->credits_granted = cpu_to_le16(new_credits); + info->send_immediate = false; packet->flags = 0; @@ -868,26 +892,25 @@ static int smbd_create_header(struct smbd_connection *info, packet->flags |= cpu_to_le16(SMB_DIRECT_RESPONSE_REQUESTED); packet->reserved = 0; - if (!size) + if (!data_length) packet->data_offset = 0; else packet->data_offset = cpu_to_le32(24); - packet->data_length = cpu_to_le32(size); + packet->data_length = cpu_to_le32(data_length); packet->remaining_data_length = cpu_to_le32(remaining_data_length); packet->padding = 0; - log_outgoing(INFO, "credits_requested=%d credits_granted=%d " - "data_offset=%d data_length=%d remaining_data_length=%d\n", - le16_to_cpu(packet->credits_requested), - le16_to_cpu(packet->credits_granted), - le32_to_cpu(packet->data_offset), - le32_to_cpu(packet->data_length), - le32_to_cpu(packet->remaining_data_length)); + log_outgoing(INFO, "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n", + le16_to_cpu(packet->credits_requested), + le16_to_cpu(packet->credits_granted), + le32_to_cpu(packet->data_offset), + le32_to_cpu(packet->data_length), + le32_to_cpu(packet->remaining_data_length)); /* Map the packet to DMA */ header_length = sizeof(struct smbd_data_transfer); /* If this is a packet without payload, don't send padding */ - if (!size) + if (!data_length) header_length = offsetof(struct smbd_data_transfer, padding); request->num_sge = 1; @@ -896,102 +919,15 @@ static int smbd_create_header(struct smbd_connection *info, header_length, DMA_TO_DEVICE); if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) { - mempool_free(request, info->request_mempool); rc = -EIO; - goto err; + request->sge[0].addr = 0; + goto err_dma; } request->sge[0].length = header_length; request->sge[0].lkey = info->pd->local_dma_lkey; - *request_out = request; - return 0; - -err: - atomic_inc(&info->send_credits); - return rc; -} - -static void smbd_destroy_header(struct smbd_connection *info, - struct smbd_request *request) -{ - - ib_dma_unmap_single(info->id->device, - request->sge[0].addr, - request->sge[0].length, - DMA_TO_DEVICE); - mempool_free(request, info->request_mempool); - atomic_inc(&info->send_credits); -} - -/* Post the send request */ -static int smbd_post_send(struct smbd_connection *info, - struct smbd_request *request, bool has_payload) -{ - struct ib_send_wr send_wr; - int rc, i; - - for (i = 0; i < request->num_sge; i++) { - log_rdma_send(INFO, - "rdma_request sge[%d] addr=%llu length=%u\n", - i, request->sge[i].addr, request->sge[i].length); - ib_dma_sync_single_for_device( - info->id->device, - request->sge[i].addr, - request->sge[i].length, - DMA_TO_DEVICE); - } - - request->cqe.done = send_done; - - send_wr.next = NULL; - send_wr.wr_cqe = &request->cqe; - send_wr.sg_list = request->sge; - send_wr.num_sge = request->num_sge; - send_wr.opcode = IB_WR_SEND; - send_wr.send_flags = IB_SEND_SIGNALED; - - if (has_payload) { - request->has_payload = true; - atomic_inc(&info->send_payload_pending); - } else { - request->has_payload = false; - atomic_inc(&info->send_pending); - } - - rc = ib_post_send(info->id->qp, &send_wr, NULL); - if (rc) { - log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); - if (has_payload) { - if (atomic_dec_and_test(&info->send_payload_pending)) - wake_up(&info->wait_send_payload_pending); - } else { - if (atomic_dec_and_test(&info->send_pending)) - wake_up(&info->wait_send_pending); - } - smbd_disconnect_rdma_connection(info); - rc = -EAGAIN; - } else - /* Reset timer for idle connection after packet is sent */ - mod_delayed_work(info->workqueue, &info->idle_timer_work, - info->keep_alive_interval*HZ); - - return rc; -} - -static int smbd_post_send_sgl(struct smbd_connection *info, - struct scatterlist *sgl, int data_length, int remaining_data_length) -{ - int num_sgs; - int i, rc; - struct smbd_request *request; - struct scatterlist *sg; - - rc = smbd_create_header( - info, data_length, remaining_data_length, &request); - if (rc) - return rc; - + /* Fill in the packet data payload */ num_sgs = sgl ? sg_nents(sgl) : 0; for_each_sg(sgl, sg, num_sgs, i) { request->sge[i+1].addr = @@ -1001,25 +937,41 @@ static int smbd_post_send_sgl(struct smbd_connection *info, info->id->device, request->sge[i+1].addr)) { rc = -EIO; request->sge[i+1].addr = 0; - goto dma_mapping_failure; + goto err_dma; } request->sge[i+1].length = sg->length; request->sge[i+1].lkey = info->pd->local_dma_lkey; request->num_sge++; } - rc = smbd_post_send(info, request, data_length); + rc = smbd_post_send(info, request); if (!rc) return 0; -dma_mapping_failure: - for (i = 1; i < request->num_sge; i++) +err_dma: + for (i = 0; i < request->num_sge; i++) if (request->sge[i].addr) ib_dma_unmap_single(info->id->device, request->sge[i].addr, request->sge[i].length, DMA_TO_DEVICE); - smbd_destroy_header(info, request); + mempool_free(request, info->request_mempool); + + /* roll back receive credits and credits to be offered */ + spin_lock(&info->lock_new_credits_offered); + info->new_credits_offered += new_credits; + spin_unlock(&info->lock_new_credits_offered); + atomic_sub(new_credits, &info->receive_credits); + +err_alloc: + if (atomic_dec_and_test(&info->send_pending)) + wake_up(&info->wait_send_pending); + +err_wait_send_queue: + /* roll back send credits and pending */ + atomic_inc(&info->send_credits); + +err_wait_credit: return rc; } @@ -1065,9 +1017,9 @@ static int smbd_post_send_data( { int i; u32 data_length = 0; - struct scatterlist sgl[SMBDIRECT_MAX_SGE]; + struct scatterlist sgl[SMBDIRECT_MAX_SEND_SGE - 1]; - if (n_vec > SMBDIRECT_MAX_SGE) { + if (n_vec > SMBDIRECT_MAX_SEND_SGE - 1) { cifs_dbg(VFS, "Can't fit data to SGL, n_vec=%d\n", n_vec); return -EINVAL; } @@ -1127,11 +1079,9 @@ static int smbd_negotiate(struct smbd_connection *info) response->type = SMBD_NEGOTIATE_RESP; rc = smbd_post_recv(info, response); - log_rdma_event(INFO, - "smbd_post_recv rc=%d iov.addr=%llx iov.length=%x " - "iov.lkey=%x\n", - rc, response->sge.addr, - response->sge.length, response->sge.lkey); + log_rdma_event(INFO, "smbd_post_recv rc=%d iov.addr=0x%llx iov.length=%u iov.lkey=0x%x\n", + rc, response->sge.addr, + response->sge.length, response->sge.lkey); if (rc) return rc; @@ -1341,25 +1291,6 @@ static void destroy_receive_buffers(struct smbd_connection *info) mempool_free(response, info->response_mempool); } -/* - * Check and send an immediate or keep alive packet - * The condition to send those packets are defined in [MS-SMBD] 3.1.1.1 - * Connection.KeepaliveRequested and Connection.SendImmediate - * The idea is to extend credits to server as soon as it becomes available - */ -static void send_immediate_work(struct work_struct *work) -{ - struct smbd_connection *info = container_of( - work, struct smbd_connection, - send_immediate_work.work); - - if (info->keep_alive_requested == KEEP_ALIVE_PENDING || - info->send_immediate) { - log_keep_alive(INFO, "send an empty message\n"); - smbd_post_send_empty(info); - } -} - /* Implement idle connection timer [MS-SMBD] 3.1.6.2 */ static void idle_connection_timer(struct work_struct *work) { @@ -1414,16 +1345,12 @@ void smbd_destroy(struct TCP_Server_Info *server) log_rdma_event(INFO, "cancelling idle timer\n"); cancel_delayed_work_sync(&info->idle_timer_work); - log_rdma_event(INFO, "cancelling send immediate work\n"); - cancel_delayed_work_sync(&info->send_immediate_work); log_rdma_event(INFO, "wait for all send posted to IB to finish\n"); wait_event(info->wait_send_pending, atomic_read(&info->send_pending) == 0); - wait_event(info->wait_send_payload_pending, - atomic_read(&info->send_payload_pending) == 0); - /* It's not posssible for upper layer to get to reassembly */ + /* It's not possible for upper layer to get to reassembly */ log_rdma_event(INFO, "drain the reassembly queue\n"); do { spin_lock_irqsave(&info->reassembly_queue_lock, flags); @@ -1455,9 +1382,9 @@ void smbd_destroy(struct TCP_Server_Info *server) log_rdma_event(INFO, "freeing mr list\n"); wake_up_interruptible_all(&info->wait_mr); while (atomic_read(&info->mr_used_count)) { - mutex_unlock(&server->srv_mutex); + cifs_server_unlock(server); msleep(1000); - mutex_lock(&server->srv_mutex); + cifs_server_lock(server); } destroy_mr_list(info); @@ -1612,25 +1539,19 @@ static struct smbd_connection *_smbd_get_connection( if (smbd_send_credit_target > info->id->device->attrs.max_cqe || smbd_send_credit_target > info->id->device->attrs.max_qp_wr) { - log_rdma_event(ERR, - "consider lowering send_credit_target = %d. " - "Possible CQE overrun, device " - "reporting max_cpe %d max_qp_wr %d\n", - smbd_send_credit_target, - info->id->device->attrs.max_cqe, - info->id->device->attrs.max_qp_wr); + log_rdma_event(ERR, "consider lowering send_credit_target = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", + smbd_send_credit_target, + info->id->device->attrs.max_cqe, + info->id->device->attrs.max_qp_wr); goto config_failed; } if (smbd_receive_credit_max > info->id->device->attrs.max_cqe || smbd_receive_credit_max > info->id->device->attrs.max_qp_wr) { - log_rdma_event(ERR, - "consider lowering receive_credit_max = %d. " - "Possible CQE overrun, device " - "reporting max_cpe %d max_qp_wr %d\n", - smbd_receive_credit_max, - info->id->device->attrs.max_cqe, - info->id->device->attrs.max_qp_wr); + log_rdma_event(ERR, "consider lowering receive_credit_max = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", + smbd_receive_credit_max, + info->id->device->attrs.max_cqe, + info->id->device->attrs.max_qp_wr); goto config_failed; } @@ -1641,17 +1562,15 @@ static struct smbd_connection *_smbd_get_connection( info->max_receive_size = smbd_max_receive_size; info->keep_alive_interval = smbd_keep_alive_interval; - if (info->id->device->attrs.max_send_sge < SMBDIRECT_MAX_SGE) { - log_rdma_event(ERR, - "warning: device max_send_sge = %d too small\n", - info->id->device->attrs.max_send_sge); - log_rdma_event(ERR, "Queue Pair creation may fail\n"); - } - if (info->id->device->attrs.max_recv_sge < SMBDIRECT_MAX_SGE) { + if (info->id->device->attrs.max_send_sge < SMBDIRECT_MAX_SEND_SGE || + info->id->device->attrs.max_recv_sge < SMBDIRECT_MAX_RECV_SGE) { log_rdma_event(ERR, - "warning: device max_recv_sge = %d too small\n", + "device %.*s max_send_sge/max_recv_sge = %d/%d too small\n", + IB_DEVICE_NAME_MAX, + info->id->device->name, + info->id->device->attrs.max_send_sge, info->id->device->attrs.max_recv_sge); - log_rdma_event(ERR, "Queue Pair creation may fail\n"); + goto config_failed; } info->send_cq = NULL; @@ -1677,8 +1596,8 @@ static struct smbd_connection *_smbd_get_connection( qp_attr.qp_context = info; qp_attr.cap.max_send_wr = info->send_credit_target; qp_attr.cap.max_recv_wr = info->receive_credit_max; - qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SGE; - qp_attr.cap.max_recv_sge = SMBDIRECT_MAX_SGE; + qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SEND_SGE; + qp_attr.cap.max_recv_sge = SMBDIRECT_MAX_RECV_SGE; qp_attr.cap.max_inline_data = 0; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; @@ -1751,18 +1670,15 @@ static struct smbd_connection *_smbd_get_connection( init_waitqueue_head(&info->wait_send_queue); INIT_DELAYED_WORK(&info->idle_timer_work, idle_connection_timer); - INIT_DELAYED_WORK(&info->send_immediate_work, send_immediate_work); queue_delayed_work(info->workqueue, &info->idle_timer_work, info->keep_alive_interval*HZ); init_waitqueue_head(&info->wait_send_pending); atomic_set(&info->send_pending, 0); - init_waitqueue_head(&info->wait_send_payload_pending); - atomic_set(&info->send_payload_pending, 0); + init_waitqueue_head(&info->wait_post_send); INIT_WORK(&info->disconnect_work, smbd_disconnect_rdma_work); - INIT_WORK(&info->recv_done_work, smbd_recv_done_work); INIT_WORK(&info->post_send_credits_work, smbd_post_send_credits); info->new_credits_offered = 0; spin_lock_init(&info->lock_new_credits_offered); @@ -1940,11 +1856,9 @@ again: to_read -= to_copy; data_read += to_copy; - log_read(INFO, "_get_first_reassembly memcpy %d bytes " - "data_transfer_length-offset=%d after that " - "to_read=%d data_read=%d offset=%d\n", - to_copy, data_length - offset, - to_read, data_read, offset); + log_read(INFO, "_get_first_reassembly memcpy %d bytes data_transfer_length-offset=%d after that to_read=%d data_read=%d offset=%d\n", + to_copy, data_length - offset, + to_read, data_read, offset); } spin_lock_irq(&info->reassembly_queue_lock); @@ -1953,10 +1867,9 @@ again: spin_unlock_irq(&info->reassembly_queue_lock); info->first_entry_offset = offset; - log_read(INFO, "returning to thread data_read=%d " - "reassembly_data_length=%d first_entry_offset=%d\n", - data_read, info->reassembly_data_length, - info->first_entry_offset); + log_read(INFO, "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n", + data_read, info->reassembly_data_length, + info->first_entry_offset); read_rfc1002_done: return data_read; } @@ -2027,7 +1940,7 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) if (iov_iter_rw(&msg->msg_iter) == WRITE) { /* It's a bug in upper layer to get there */ - cifs_dbg(VFS, "CIFS: invalid msg iter dir %u\n", + cifs_dbg(VFS, "Invalid msg iter dir %u\n", iov_iter_rw(&msg->msg_iter)); rc = -EINVAL; goto out; @@ -2049,7 +1962,7 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) default: /* It's a bug in upper layer to get there */ - cifs_dbg(VFS, "CIFS: invalid msg type %d\n", + cifs_dbg(VFS, "Invalid msg type %d\n", iov_iter_type(&msg->msg_iter)); rc = -EINVAL; } @@ -2071,10 +1984,11 @@ int smbd_send(struct TCP_Server_Info *server, int num_rqst, struct smb_rqst *rqst_array) { struct smbd_connection *info = server->smbd_conn; - struct kvec vec; + struct kvec vecs[SMBDIRECT_MAX_SEND_SGE - 1]; int nvecs; int size; unsigned int buflen, remaining_data_length; + unsigned int offset, remaining_vec_data_length; int start, i, j; int max_iov_size = info->max_send_size - sizeof(struct smbd_data_transfer); @@ -2083,10 +1997,8 @@ int smbd_send(struct TCP_Server_Info *server, struct smb_rqst *rqst; int rqst_idx; - if (info->transport_status != SMBD_CONNECTED) { - rc = -EAGAIN; - goto done; - } + if (info->transport_status != SMBD_CONNECTED) + return -EAGAIN; /* * Add in the page array if there is one. The caller needs to set @@ -2097,135 +2009,95 @@ int smbd_send(struct TCP_Server_Info *server, for (i = 0; i < num_rqst; i++) remaining_data_length += smb_rqst_len(server, &rqst_array[i]); - if (remaining_data_length + sizeof(struct smbd_data_transfer) > - info->max_fragmented_send_size) { + if (unlikely(remaining_data_length > info->max_fragmented_send_size)) { + /* assertion: payload never exceeds negotiated maximum */ log_write(ERR, "payload size %d > max size %d\n", remaining_data_length, info->max_fragmented_send_size); - rc = -EINVAL; - goto done; + return -EINVAL; } log_write(INFO, "num_rqst=%d total length=%u\n", num_rqst, remaining_data_length); rqst_idx = 0; -next_rqst: - rqst = &rqst_array[rqst_idx]; - iov = rqst->rq_iov; - - cifs_dbg(FYI, "Sending smb (RDMA): idx=%d smb_len=%lu\n", - rqst_idx, smb_rqst_len(server, rqst)); - for (i = 0; i < rqst->rq_nvec; i++) - dump_smb(iov[i].iov_base, iov[i].iov_len); - - - log_write(INFO, "rqst_idx=%d nvec=%d rqst->rq_npages=%d rq_pagesz=%d " - "rq_tailsz=%d buflen=%lu\n", - rqst_idx, rqst->rq_nvec, rqst->rq_npages, rqst->rq_pagesz, - rqst->rq_tailsz, smb_rqst_len(server, rqst)); - - start = i = 0; - buflen = 0; - while (true) { - buflen += iov[i].iov_len; - if (buflen > max_iov_size) { - if (i > start) { - remaining_data_length -= - (buflen-iov[i].iov_len); - log_write(INFO, "sending iov[] from start=%d " - "i=%d nvecs=%d " - "remaining_data_length=%d\n", - start, i, i-start, - remaining_data_length); - rc = smbd_post_send_data( - info, &iov[start], i-start, - remaining_data_length); - if (rc) - goto done; - } else { - /* iov[start] is too big, break it */ - nvecs = (buflen+max_iov_size-1)/max_iov_size; - log_write(INFO, "iov[%d] iov_base=%p buflen=%d" - " break to %d vectors\n", - start, iov[start].iov_base, - buflen, nvecs); - for (j = 0; j < nvecs; j++) { - vec.iov_base = - (char *)iov[start].iov_base + - j*max_iov_size; - vec.iov_len = max_iov_size; - if (j == nvecs-1) - vec.iov_len = - buflen - - max_iov_size*(nvecs-1); - remaining_data_length -= vec.iov_len; - log_write(INFO, - "sending vec j=%d iov_base=%p" - " iov_len=%zu " - "remaining_data_length=%d\n", - j, vec.iov_base, vec.iov_len, - remaining_data_length); - rc = smbd_post_send_data( - info, &vec, 1, - remaining_data_length); - if (rc) - goto done; + do { + rqst = &rqst_array[rqst_idx]; + iov = rqst->rq_iov; + + cifs_dbg(FYI, "Sending smb (RDMA): idx=%d smb_len=%lu\n", + rqst_idx, smb_rqst_len(server, rqst)); + remaining_vec_data_length = 0; + for (i = 0; i < rqst->rq_nvec; i++) { + remaining_vec_data_length += iov[i].iov_len; + dump_smb(iov[i].iov_base, iov[i].iov_len); + } + + log_write(INFO, "rqst_idx=%d nvec=%d rqst->rq_npages=%d rq_pagesz=%d rq_tailsz=%d buflen=%lu\n", + rqst_idx, rqst->rq_nvec, + rqst->rq_npages, rqst->rq_pagesz, + rqst->rq_tailsz, smb_rqst_len(server, rqst)); + + start = 0; + offset = 0; + do { + buflen = 0; + i = start; + j = 0; + while (i < rqst->rq_nvec && + j < SMBDIRECT_MAX_SEND_SGE - 1 && + buflen < max_iov_size) { + + vecs[j].iov_base = iov[i].iov_base + offset; + if (buflen + iov[i].iov_len > max_iov_size) { + vecs[j].iov_len = + max_iov_size - iov[i].iov_len; + buflen = max_iov_size; + offset = vecs[j].iov_len; + } else { + vecs[j].iov_len = + iov[i].iov_len - offset; + buflen += vecs[j].iov_len; + offset = 0; + ++i; } - i++; - if (i == rqst->rq_nvec) - break; + ++j; } - start = i; - buflen = 0; - } else { - i++; - if (i == rqst->rq_nvec) { - /* send out all remaining vecs */ - remaining_data_length -= buflen; - log_write(INFO, - "sending iov[] from start=%d i=%d " - "nvecs=%d remaining_data_length=%d\n", - start, i, i-start, + + remaining_vec_data_length -= buflen; + remaining_data_length -= buflen; + log_write(INFO, "sending %s iov[%d] from start=%d nvecs=%d remaining_data_length=%d\n", + remaining_vec_data_length > 0 ? + "partial" : "complete", + rqst->rq_nvec, start, j, remaining_data_length); - rc = smbd_post_send_data(info, &iov[start], - i-start, remaining_data_length); + + start = i; + rc = smbd_post_send_data(info, vecs, j, remaining_data_length); + if (rc) + goto done; + } while (remaining_vec_data_length > 0); + + /* now sending pages if there are any */ + for (i = 0; i < rqst->rq_npages; i++) { + rqst_page_get_length(rqst, i, &buflen, &offset); + nvecs = (buflen + max_iov_size - 1) / max_iov_size; + log_write(INFO, "sending pages buflen=%d nvecs=%d\n", + buflen, nvecs); + for (j = 0; j < nvecs; j++) { + size = min_t(unsigned int, max_iov_size, remaining_data_length); + remaining_data_length -= size; + log_write(INFO, "sending pages i=%d offset=%d size=%d remaining_data_length=%d\n", + i, j * max_iov_size + offset, size, + remaining_data_length); + rc = smbd_post_send_page( + info, rqst->rq_pages[i], + j*max_iov_size + offset, + size, remaining_data_length); if (rc) goto done; - break; } } - log_write(INFO, "looping i=%d buflen=%d\n", i, buflen); - } - - /* now sending pages if there are any */ - for (i = 0; i < rqst->rq_npages; i++) { - unsigned int offset; - - rqst_page_get_length(rqst, i, &buflen, &offset); - nvecs = (buflen + max_iov_size - 1) / max_iov_size; - log_write(INFO, "sending pages buflen=%d nvecs=%d\n", - buflen, nvecs); - for (j = 0; j < nvecs; j++) { - size = max_iov_size; - if (j == nvecs-1) - size = buflen - j*max_iov_size; - remaining_data_length -= size; - log_write(INFO, "sending pages i=%d offset=%d size=%d" - " remaining_data_length=%d\n", - i, j*max_iov_size+offset, size, - remaining_data_length); - rc = smbd_post_send_page( - info, rqst->rq_pages[i], - j*max_iov_size + offset, - size, remaining_data_length); - if (rc) - goto done; - } - } - - rqst_idx++; - if (rqst_idx < num_rqst) - goto next_rqst; + } while (++rqst_idx < num_rqst); done: /* @@ -2235,8 +2107,8 @@ done: * that means all the I/Os have been out and we are good to return */ - wait_event(info->wait_send_payload_pending, - atomic_read(&info->send_payload_pending) == 0); + wait_event(info->wait_send_pending, + atomic_read(&info->send_pending) == 0); return rc; } @@ -2287,11 +2159,9 @@ static void smbd_mr_recovery_work(struct work_struct *work) info->pd, info->mr_type, info->max_frmr_depth); if (IS_ERR(smbdirect_mr->mr)) { - log_rdma_mr(ERR, - "ib_alloc_mr failed mr_type=%x " - "max_frmr_depth=%x\n", - info->mr_type, - info->max_frmr_depth); + log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", + info->mr_type, + info->max_frmr_depth); smbd_disconnect_rdma_connection(info); continue; } @@ -2354,9 +2224,8 @@ static int allocate_mr_list(struct smbd_connection *info) smbdirect_mr->mr = ib_alloc_mr(info->pd, info->mr_type, info->max_frmr_depth); if (IS_ERR(smbdirect_mr->mr)) { - log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x " - "max_frmr_depth=%x\n", - info->mr_type, info->max_frmr_depth); + log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", + info->mr_type, info->max_frmr_depth); goto out; } smbdirect_mr->sgl = kcalloc( |