diff options
Diffstat (limited to 'drivers/infiniband')
281 files changed, 17013 insertions, 16126 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 33d3ce9c888e..aa36ac618e72 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -78,20 +78,21 @@ config INFINIBAND_VIRT_DMA def_bool !HIGHMEM if INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS -source "drivers/infiniband/hw/mthca/Kconfig" -source "drivers/infiniband/hw/qib/Kconfig" +source "drivers/infiniband/hw/bnxt_re/Kconfig" source "drivers/infiniband/hw/cxgb4/Kconfig" source "drivers/infiniband/hw/efa/Kconfig" +source "drivers/infiniband/hw/erdma/Kconfig" +source "drivers/infiniband/hw/hfi1/Kconfig" +source "drivers/infiniband/hw/hns/Kconfig" source "drivers/infiniband/hw/irdma/Kconfig" source "drivers/infiniband/hw/mlx4/Kconfig" source "drivers/infiniband/hw/mlx5/Kconfig" +source "drivers/infiniband/hw/mthca/Kconfig" source "drivers/infiniband/hw/ocrdma/Kconfig" -source "drivers/infiniband/hw/vmw_pvrdma/Kconfig" -source "drivers/infiniband/hw/usnic/Kconfig" -source "drivers/infiniband/hw/hns/Kconfig" -source "drivers/infiniband/hw/bnxt_re/Kconfig" -source "drivers/infiniband/hw/hfi1/Kconfig" source "drivers/infiniband/hw/qedr/Kconfig" +source "drivers/infiniband/hw/qib/Kconfig" +source "drivers/infiniband/hw/usnic/Kconfig" +source "drivers/infiniband/hw/vmw_pvrdma/Kconfig" source "drivers/infiniband/sw/rdmavt/Kconfig" source "drivers/infiniband/sw/rxe/Kconfig" source "drivers/infiniband/sw/siw/Kconfig" diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 65e3e7df8a4b..f253295795f0 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -37,7 +37,6 @@ #include <linux/inetdevice.h> #include <linux/slab.h> #include <linux/workqueue.h> -#include <linux/module.h> #include <net/arp.h> #include <net/neighbour.h> #include <net/route.h> diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 0c98dd3dee67..4084d05a4510 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -33,7 +33,7 @@ * SOFTWARE. */ -#include <linux/module.h> +#include <linux/if_vlan.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/workqueue.h> @@ -955,7 +955,7 @@ int rdma_query_gid(struct ib_device *device, u32 port_num, { struct ib_gid_table *table; unsigned long flags; - int res = -EINVAL; + int res; if (!rdma_is_port_valid(device, port_num)) return -EINVAL; @@ -963,9 +963,15 @@ int rdma_query_gid(struct ib_device *device, u32 port_num, table = rdma_gid_table(device, port_num); read_lock_irqsave(&table->rwlock, flags); - if (index < 0 || index >= table->sz || - !is_gid_entry_valid(table->data_vec[index])) + if (index < 0 || index >= table->sz) { + res = -EINVAL; goto done; + } + + if (!is_gid_entry_valid(table->data_vec[index])) { + res = -ENOENT; + goto done; + } memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid)); res = 0; diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index c903b74f46a4..1f9938a2c475 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -175,6 +175,7 @@ struct cm_device { struct cm_av { struct cm_port *port; struct rdma_ah_attr ah_attr; + u16 dlid_datapath; u16 pkey_index; u8 timeout; }; @@ -617,7 +618,6 @@ static struct cm_id_private *cm_insert_listen(struct cm_id_private *cm_id_priv, struct rb_node *parent = NULL; struct cm_id_private *cur_cm_id_priv; __be64 service_id = cm_id_priv->id.service_id; - __be64 service_mask = cm_id_priv->id.service_mask; unsigned long flags; spin_lock_irqsave(&cm.lock, flags); @@ -625,9 +625,16 @@ static struct cm_id_private *cm_insert_listen(struct cm_id_private *cm_id_priv, parent = *link; cur_cm_id_priv = rb_entry(parent, struct cm_id_private, service_node); - if ((cur_cm_id_priv->id.service_mask & service_id) == - (service_mask & cur_cm_id_priv->id.service_id) && - (cm_id_priv->id.device == cur_cm_id_priv->id.device)) { + + if (cm_id_priv->id.device < cur_cm_id_priv->id.device) + link = &(*link)->rb_left; + else if (cm_id_priv->id.device > cur_cm_id_priv->id.device) + link = &(*link)->rb_right; + else if (be64_lt(service_id, cur_cm_id_priv->id.service_id)) + link = &(*link)->rb_left; + else if (be64_gt(service_id, cur_cm_id_priv->id.service_id)) + link = &(*link)->rb_right; + else { /* * Sharing an ib_cm_id with different handlers is not * supported @@ -643,17 +650,6 @@ static struct cm_id_private *cm_insert_listen(struct cm_id_private *cm_id_priv, spin_unlock_irqrestore(&cm.lock, flags); return cur_cm_id_priv; } - - if (cm_id_priv->id.device < cur_cm_id_priv->id.device) - link = &(*link)->rb_left; - else if (cm_id_priv->id.device > cur_cm_id_priv->id.device) - link = &(*link)->rb_right; - else if (be64_lt(service_id, cur_cm_id_priv->id.service_id)) - link = &(*link)->rb_left; - else if (be64_gt(service_id, cur_cm_id_priv->id.service_id)) - link = &(*link)->rb_right; - else - link = &(*link)->rb_right; } cm_id_priv->listen_sharecount++; rb_link_node(&cm_id_priv->service_node, parent, link); @@ -670,12 +666,7 @@ static struct cm_id_private *cm_find_listen(struct ib_device *device, while (node) { cm_id_priv = rb_entry(node, struct cm_id_private, service_node); - if ((cm_id_priv->id.service_mask & service_id) == - cm_id_priv->id.service_id && - (cm_id_priv->id.device == device)) { - refcount_inc(&cm_id_priv->refcount); - return cm_id_priv; - } + if (device < cm_id_priv->id.device) node = node->rb_left; else if (device > cm_id_priv->id.device) @@ -684,8 +675,10 @@ static struct cm_id_private *cm_find_listen(struct ib_device *device, node = node->rb_left; else if (be64_gt(service_id, cm_id_priv->id.service_id)) node = node->rb_right; - else - node = node->rb_right; + else { + refcount_inc(&cm_id_priv->refcount); + return cm_id_priv; + } } return NULL; } @@ -1158,22 +1151,17 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id) } EXPORT_SYMBOL(ib_destroy_cm_id); -static int cm_init_listen(struct cm_id_private *cm_id_priv, __be64 service_id, - __be64 service_mask) +static int cm_init_listen(struct cm_id_private *cm_id_priv, __be64 service_id) { - service_mask = service_mask ? service_mask : ~cpu_to_be64(0); - service_id &= service_mask; if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID && (service_id != IB_CM_ASSIGN_SERVICE_ID)) return -EINVAL; - if (service_id == IB_CM_ASSIGN_SERVICE_ID) { + if (service_id == IB_CM_ASSIGN_SERVICE_ID) cm_id_priv->id.service_id = cpu_to_be64(cm.listen_service_id++); - cm_id_priv->id.service_mask = ~cpu_to_be64(0); - } else { + else cm_id_priv->id.service_id = service_id; - cm_id_priv->id.service_mask = service_mask; - } + return 0; } @@ -1185,12 +1173,8 @@ static int cm_init_listen(struct cm_id_private *cm_id_priv, __be64 service_id, * and service ID resolution requests. The service ID should be specified * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will * assign a service ID to the caller. - * @service_mask: Mask applied to service ID used to listen across a - * range of service IDs. If set to 0, the service ID is matched - * exactly. This parameter is ignored if %service_id is set to - * IB_CM_ASSIGN_SERVICE_ID. */ -int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask) +int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id) { struct cm_id_private *cm_id_priv = container_of(cm_id, struct cm_id_private, id); @@ -1203,7 +1187,7 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask) goto out; } - ret = cm_init_listen(cm_id_priv, service_id, service_mask); + ret = cm_init_listen(cm_id_priv, service_id); if (ret) goto out; @@ -1251,9 +1235,11 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, if (IS_ERR(cm_id_priv)) return ERR_CAST(cm_id_priv); - err = cm_init_listen(cm_id_priv, service_id, 0); - if (err) + err = cm_init_listen(cm_id_priv, service_id); + if (err) { + ib_destroy_cm_id(&cm_id_priv->id); return ERR_PTR(err); + } spin_lock_irq(&cm_id_priv->lock); listen_id_priv = cm_insert_listen(cm_id_priv, cm_handler); @@ -1319,6 +1305,7 @@ static void cm_format_req(struct cm_req_msg *req_msg, struct sa_path_rec *pri_path = param->primary_path; struct sa_path_rec *alt_path = param->alternate_path; bool pri_ext = false; + __be16 lid; if (pri_path->rec_type == SA_PATH_REC_TYPE_OPA) pri_ext = opa_is_extended_lid(pri_path->opa.dlid, @@ -1378,9 +1365,16 @@ static void cm_format_req(struct cm_req_msg *req_msg, htons(ntohl(sa_path_get_dlid( pri_path))))); } else { + + if (param->primary_path_inbound) { + lid = param->primary_path_inbound->ib.dlid; + IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg, + be16_to_cpu(lid)); + } else + IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg, + be16_to_cpu(IB_LID_PERMISSIVE)); + /* Work-around until there's a way to obtain remote LID info */ - IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg, - be16_to_cpu(IB_LID_PERMISSIVE)); IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg, be16_to_cpu(IB_LID_PERMISSIVE)); } @@ -1520,7 +1514,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, } } cm_id->service_id = param->service_id; - cm_id->service_mask = ~cpu_to_be64(0); cm_id_priv->timeout_ms = cm_convert_to_ms( param->primary_path->packet_life_time) * 2 + cm_convert_to_ms( @@ -1536,6 +1529,10 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, spin_lock_irqsave(&cm_id_priv->lock, flags); cm_move_av_from_path(&cm_id_priv->av, &av); + if (param->primary_path_outbound) + cm_id_priv->av.dlid_datapath = + be16_to_cpu(param->primary_path_outbound->ib.dlid); + if (param->alternate_path) cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av); @@ -1630,14 +1627,13 @@ static void cm_path_set_rec_type(struct ib_device *ib_device, u32 port_num, static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg, struct sa_path_rec *primary_path, - struct sa_path_rec *alt_path) + struct sa_path_rec *alt_path, + struct ib_wc *wc) { u32 lid; if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) { - sa_path_set_dlid(primary_path, - IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID, - req_msg)); + sa_path_set_dlid(primary_path, wc->slid); sa_path_set_slid(primary_path, IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg)); @@ -1674,7 +1670,8 @@ static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg, static void cm_format_paths_from_req(struct cm_req_msg *req_msg, struct sa_path_rec *primary_path, - struct sa_path_rec *alt_path) + struct sa_path_rec *alt_path, + struct ib_wc *wc) { primary_path->dgid = *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg); @@ -1732,7 +1729,7 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg, if (sa_path_is_roce(alt_path)) alt_path->roce.route_resolved = false; } - cm_format_path_lid_from_req(req_msg, primary_path, alt_path); + cm_format_path_lid_from_req(req_msg, primary_path, alt_path, wc); } static u16 cm_get_bth_pkey(struct cm_work *work) @@ -2077,7 +2074,6 @@ static int cm_req_handler(struct cm_work *work) cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg)); cm_id_priv->id.service_id = cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg)); - cm_id_priv->id.service_mask = ~cpu_to_be64(0); cm_id_priv->tid = req_msg->hdr.tid; cm_id_priv->timeout_ms = cm_convert_to_ms( IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg)); @@ -2146,7 +2142,7 @@ static int cm_req_handler(struct cm_work *work) if (cm_req_has_alt_path(req_msg)) work->path[1].rec_type = work->path[0].rec_type; cm_format_paths_from_req(req_msg, &work->path[0], - &work->path[1]); + &work->path[1], work->mad_recv_wc->wc); if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE) sa_path_set_dmac(&work->path[0], cm_id_priv->av.ah_attr.roce.dmac); @@ -2171,6 +2167,10 @@ static int cm_req_handler(struct cm_work *work) NULL, 0); goto rejected; } + if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_IB) + cm_id_priv->av.dlid_datapath = + IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg); + if (cm_req_has_alt_path(req_msg)) { ret = cm_init_av_by_path(&work->path[1], NULL, &cm_id_priv->alt_av); @@ -2824,6 +2824,7 @@ static int cm_dreq_handler(struct cm_work *work) switch (cm_id_priv->id.state) { case IB_CM_REP_SENT: case IB_CM_DREQ_SENT: + case IB_CM_MRA_REP_RCVD: ib_cancel_mad(cm_id_priv->msg); break; case IB_CM_ESTABLISHED: @@ -2831,8 +2832,6 @@ static int cm_dreq_handler(struct cm_work *work) cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) ib_cancel_mad(cm_id_priv->msg); break; - case IB_CM_MRA_REP_RCVD: - break; case IB_CM_TIMEWAIT: atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES] [CM_DREQ_COUNTER]); @@ -3322,7 +3321,7 @@ static int cm_lap_handler(struct cm_work *work) ret = cm_init_av_by_path(param->alternate_path, NULL, &alt_av); if (ret) { rdma_destroy_ah_attr(&ah_attr); - return -EINVAL; + goto deref; } spin_lock_irq(&cm_id_priv->lock); @@ -3485,7 +3484,6 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, spin_lock_irqsave(&cm_id_priv->lock, flags); cm_move_av_from_path(&cm_id_priv->av, &av); cm_id->service_id = param->service_id; - cm_id->service_mask = ~cpu_to_be64(0); cm_id_priv->timeout_ms = param->timeout_ms; cm_id_priv->max_cm_retries = param->max_cm_retries; if (cm_id->state != IB_CM_IDLE) { @@ -3560,7 +3558,6 @@ static int cm_sidr_req_handler(struct cm_work *work) cpu_to_be32(IBA_GET(CM_SIDR_REQ_REQUESTID, sidr_req_msg)); cm_id_priv->id.service_id = cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg)); - cm_id_priv->id.service_mask = ~cpu_to_be64(0); cm_id_priv->tid = sidr_req_msg->hdr.tid; wc = work->mad_recv_wc->wc; @@ -4133,6 +4130,10 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN; qp_attr->ah_attr = cm_id_priv->av.ah_attr; + if ((qp_attr->ah_attr.type == RDMA_AH_ATTR_TYPE_IB) && + cm_id_priv->av.dlid_datapath && + (cm_id_priv->av.dlid_datapath != 0xffff)) + qp_attr->ah_attr.ib.dlid = cm_id_priv->av.dlid_datapath; qp_attr->path_mtu = cm_id_priv->path_mtu; qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 835ac54d4a24..26d1772179b8 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -11,6 +11,7 @@ #include <linux/in6.h> #include <linux/mutex.h> #include <linux/random.h> +#include <linux/rbtree.h> #include <linux/igmp.h> #include <linux/xarray.h> #include <linux/inetdevice.h> @@ -20,6 +21,7 @@ #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <net/netevent.h> #include <net/tcp.h> #include <net/ipv6.h> #include <net/ip_fib.h> @@ -67,8 +69,8 @@ static const char * const cma_events[] = { [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", }; -static void cma_set_mgid(struct rdma_id_private *id_priv, struct sockaddr *addr, - union ib_gid *mgid); +static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, + enum ib_gid_type gid_type); const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) { @@ -168,6 +170,9 @@ static struct ib_sa_client sa_client; static LIST_HEAD(dev_list); static LIST_HEAD(listen_any_list); static DEFINE_MUTEX(lock); +static struct rb_root id_table = RB_ROOT; +/* Serialize operations of id_table tree */ +static DEFINE_SPINLOCK(id_table_lock); static struct workqueue_struct *cma_wq; static unsigned int cma_pernet_id; @@ -202,6 +207,11 @@ struct xarray *cma_pernet_xa(struct net *net, enum rdma_ucm_port_space ps) } } +struct id_table_entry { + struct list_head id_list; + struct rb_node rb_node; +}; + struct cma_device { struct list_head list; struct ib_device *device; @@ -420,11 +430,21 @@ static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr) return hdr->ip_version >> 4; } -static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) +static void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) { hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); } +static struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) +{ + return (struct sockaddr *)&id_priv->id.route.addr.src_addr; +} + +static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) +{ + return (struct sockaddr *)&id_priv->id.route.addr.dst_addr; +} + static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join) { struct in_device *in_dev = NULL; @@ -445,6 +465,117 @@ static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join) return (in_dev) ? 0 : -ENODEV; } +static int compare_netdev_and_ip(int ifindex_a, struct sockaddr *sa, + struct id_table_entry *entry_b) +{ + struct rdma_id_private *id_priv = list_first_entry( + &entry_b->id_list, struct rdma_id_private, id_list_entry); + int ifindex_b = id_priv->id.route.addr.dev_addr.bound_dev_if; + struct sockaddr *sb = cma_dst_addr(id_priv); + + if (ifindex_a != ifindex_b) + return (ifindex_a > ifindex_b) ? 1 : -1; + + if (sa->sa_family != sb->sa_family) + return sa->sa_family - sb->sa_family; + + if (sa->sa_family == AF_INET) + return memcmp((char *)&((struct sockaddr_in *)sa)->sin_addr, + (char *)&((struct sockaddr_in *)sb)->sin_addr, + sizeof(((struct sockaddr_in *)sa)->sin_addr)); + + return ipv6_addr_cmp(&((struct sockaddr_in6 *)sa)->sin6_addr, + &((struct sockaddr_in6 *)sb)->sin6_addr); +} + +static int cma_add_id_to_tree(struct rdma_id_private *node_id_priv) +{ + struct rb_node **new, *parent = NULL; + struct id_table_entry *this, *node; + unsigned long flags; + int result; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + spin_lock_irqsave(&id_table_lock, flags); + new = &id_table.rb_node; + while (*new) { + this = container_of(*new, struct id_table_entry, rb_node); + result = compare_netdev_and_ip( + node_id_priv->id.route.addr.dev_addr.bound_dev_if, + cma_dst_addr(node_id_priv), this); + + parent = *new; + if (result < 0) + new = &((*new)->rb_left); + else if (result > 0) + new = &((*new)->rb_right); + else { + list_add_tail(&node_id_priv->id_list_entry, + &this->id_list); + kfree(node); + goto unlock; + } + } + + INIT_LIST_HEAD(&node->id_list); + list_add_tail(&node_id_priv->id_list_entry, &node->id_list); + + rb_link_node(&node->rb_node, parent, new); + rb_insert_color(&node->rb_node, &id_table); + +unlock: + spin_unlock_irqrestore(&id_table_lock, flags); + return 0; +} + +static struct id_table_entry * +node_from_ndev_ip(struct rb_root *root, int ifindex, struct sockaddr *sa) +{ + struct rb_node *node = root->rb_node; + struct id_table_entry *data; + int result; + + while (node) { + data = container_of(node, struct id_table_entry, rb_node); + result = compare_netdev_and_ip(ifindex, sa, data); + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return data; + } + + return NULL; +} + +static void cma_remove_id_from_tree(struct rdma_id_private *id_priv) +{ + struct id_table_entry *data; + unsigned long flags; + + spin_lock_irqsave(&id_table_lock, flags); + if (list_empty(&id_priv->id_list_entry)) + goto out; + + data = node_from_ndev_ip(&id_table, + id_priv->id.route.addr.dev_addr.bound_dev_if, + cma_dst_addr(id_priv)); + if (!data) + goto out; + + list_del_init(&id_priv->id_list_entry); + if (list_empty(&data->id_list)) { + rb_erase(&data->rb_node, &id_table); + kfree(data); + } +out: + spin_unlock_irqrestore(&id_table_lock, flags); +} + static void _cma_attach_to_dev(struct rdma_id_private *id_priv, struct cma_device *cma_dev) { @@ -481,16 +612,6 @@ static void cma_release_dev(struct rdma_id_private *id_priv) mutex_unlock(&lock); } -static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) -{ - return (struct sockaddr *) &id_priv->id.route.addr.src_addr; -} - -static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) -{ - return (struct sockaddr *) &id_priv->id.route.addr.dst_addr; -} - static inline unsigned short cma_family(struct rdma_id_private *id_priv) { return id_priv->id.route.addr.src_addr.ss_family; @@ -766,6 +887,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) unsigned int p; u16 pkey, index; enum ib_port_state port_state; + int ret; int i; cma_dev = NULL; @@ -784,9 +906,14 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) if (ib_get_cached_port_state(cur_dev->device, p, &port_state)) continue; - for (i = 0; !rdma_query_gid(cur_dev->device, - p, i, &gid); - i++) { + + for (i = 0; i < cur_dev->device->port_data[p].immutable.gid_tbl_len; + ++i) { + ret = rdma_query_gid(cur_dev->device, p, i, + &gid); + if (ret) + continue; + if (!memcmp(&gid, dgid, sizeof(gid))) { cma_dev = cur_dev; sgid = gid; @@ -855,6 +982,7 @@ __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler, refcount_set(&id_priv->refcount, 1); mutex_init(&id_priv->handler_mutex); INIT_LIST_HEAD(&id_priv->device_item); + INIT_LIST_HEAD(&id_priv->id_list_entry); INIT_LIST_HEAD(&id_priv->listen_list); INIT_LIST_HEAD(&id_priv->mc_list); get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); @@ -1428,7 +1556,7 @@ static bool validate_ipv4_net_dev(struct net_device *net_dev, return false; memset(&fl4, 0, sizeof(fl4)); - fl4.flowi4_iif = net_dev->ifindex; + fl4.flowi4_oif = net_dev->ifindex; fl4.daddr = daddr; fl4.saddr = saddr; @@ -1713,8 +1841,8 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id, } if (!validate_net_dev(*net_dev, - (struct sockaddr *)&req->listen_addr_storage, - (struct sockaddr *)&req->src_addr_storage)) { + (struct sockaddr *)&req->src_addr_storage, + (struct sockaddr *)&req->listen_addr_storage)) { id_priv = ERR_PTR(-EHOSTUNREACH); goto err; } @@ -1840,17 +1968,19 @@ static void destroy_mc(struct rdma_id_private *id_priv, if (dev_addr->bound_dev_if) ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); - if (ndev) { + if (ndev && !send_only) { + enum ib_gid_type gid_type; union ib_gid mgid; - cma_set_mgid(id_priv, (struct sockaddr *)&mc->addr, - &mgid); - - if (!send_only) - cma_igmp_send(ndev, &mgid, false); - - dev_put(ndev); + gid_type = id_priv->cma_dev->default_gid_type + [id_priv->id.port_num - + rdma_start_port( + id_priv->cma_dev->device)]; + cma_iboe_set_mgid((struct sockaddr *)&mc->addr, &mgid, + gid_type); + cma_igmp_send(ndev, &mgid, false); } + dev_put(ndev); cancel_work_sync(&mc->iboe_join.work); } @@ -1875,6 +2005,7 @@ static void _destroy_id(struct rdma_id_private *id_priv, cma_cancel_operation(id_priv, state); rdma_restrack_del(&id_priv->res); + cma_remove_id_from_tree(id_priv); if (id_priv->cma_dev) { if (rdma_cap_ib_cm(id_priv->id.device, 1)) { if (id_priv->cm_id.ib) @@ -1895,6 +2026,8 @@ static void _destroy_id(struct rdma_id_private *id_priv, cma_id_put(id_priv->id.context); kfree(id_priv->id.route.path_rec); + kfree(id_priv->id.route.path_rec_inbound); + kfree(id_priv->id.route.path_rec_outbound); put_net(id_priv->id.route.addr.dev_addr.net); kfree(id_priv); @@ -2110,14 +2243,14 @@ cma_ib_new_conn_id(const struct rdma_cm_id *listen_id, goto err; rt = &id->route; - rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; - rt->path_rec = kmalloc_array(rt->num_paths, sizeof(*rt->path_rec), - GFP_KERNEL); + rt->num_pri_alt_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; + rt->path_rec = kmalloc_array(rt->num_pri_alt_paths, + sizeof(*rt->path_rec), GFP_KERNEL); if (!rt->path_rec) goto err; rt->path_rec[0] = *path; - if (rt->num_paths == 2) + if (rt->num_pri_alt_paths == 2) rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; if (net_dev) { @@ -2634,7 +2767,7 @@ int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout) { struct rdma_id_private *id_priv; - if (id->qp_type != IB_QPT_RC) + if (id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_INI) return -EINVAL; id_priv = container_of(id, struct rdma_id_private, id); @@ -2686,26 +2819,72 @@ int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer) } EXPORT_SYMBOL(rdma_set_min_rnr_timer); +static void route_set_path_rec_inbound(struct cma_work *work, + struct sa_path_rec *path_rec) +{ + struct rdma_route *route = &work->id->id.route; + + if (!route->path_rec_inbound) { + route->path_rec_inbound = + kzalloc(sizeof(*route->path_rec_inbound), GFP_KERNEL); + if (!route->path_rec_inbound) + return; + } + + *route->path_rec_inbound = *path_rec; +} + +static void route_set_path_rec_outbound(struct cma_work *work, + struct sa_path_rec *path_rec) +{ + struct rdma_route *route = &work->id->id.route; + + if (!route->path_rec_outbound) { + route->path_rec_outbound = + kzalloc(sizeof(*route->path_rec_outbound), GFP_KERNEL); + if (!route->path_rec_outbound) + return; + } + + *route->path_rec_outbound = *path_rec; +} + static void cma_query_handler(int status, struct sa_path_rec *path_rec, - void *context) + int num_prs, void *context) { struct cma_work *work = context; struct rdma_route *route; + int i; route = &work->id->id.route; - if (!status) { - route->num_paths = 1; - *route->path_rec = *path_rec; - } else { - work->old_state = RDMA_CM_ROUTE_QUERY; - work->new_state = RDMA_CM_ADDR_RESOLVED; - work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; - work->event.status = status; - pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n", - status); + if (status) + goto fail; + + for (i = 0; i < num_prs; i++) { + if (!path_rec[i].flags || (path_rec[i].flags & IB_PATH_GMP)) + *route->path_rec = path_rec[i]; + else if (path_rec[i].flags & IB_PATH_INBOUND) + route_set_path_rec_inbound(work, &path_rec[i]); + else if (path_rec[i].flags & IB_PATH_OUTBOUND) + route_set_path_rec_outbound(work, &path_rec[i]); + } + if (!route->path_rec) { + status = -EINVAL; + goto fail; } + route->num_pri_alt_paths = 1; + queue_work(cma_wq, &work->work); + return; + +fail: + work->old_state = RDMA_CM_ROUTE_QUERY; + work->new_state = RDMA_CM_ADDR_RESOLVED; + work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; + work->event.status = status; + pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n", + status); queue_work(cma_wq, &work->work); } @@ -2950,7 +3129,7 @@ int rdma_set_ib_path(struct rdma_cm_id *id, dev_put(ndev); } - id->route.num_paths = 1; + id->route.num_pri_alt_paths = 1; return 0; err_free: @@ -3083,7 +3262,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) goto err1; } - route->num_paths = 1; + route->num_pri_alt_paths = 1; ndev = cma_iboe_set_path_rec_l2_fields(id_priv); if (!ndev) { @@ -3143,7 +3322,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) err2: kfree(route->path_rec); route->path_rec = NULL; - route->num_paths = 0; + route->num_pri_alt_paths = 0; err1: kfree(work); return ret; @@ -3164,8 +3343,11 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms) cma_id_get(id_priv); if (rdma_cap_ib_sa(id->device, id->port_num)) ret = cma_resolve_ib_route(id_priv, timeout_ms); - else if (rdma_protocol_roce(id->device, id->port_num)) + else if (rdma_protocol_roce(id->device, id->port_num)) { ret = cma_resolve_iboe_route(id_priv); + if (!ret) + cma_add_id_to_tree(id_priv); + } else if (rdma_protocol_iwarp(id->device, id->port_num)) ret = cma_resolve_iw_route(id_priv); else @@ -3362,22 +3544,30 @@ err: static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, const struct sockaddr *dst_addr) { - if (!src_addr || !src_addr->sa_family) { - src_addr = (struct sockaddr *) &id->route.addr.src_addr; - src_addr->sa_family = dst_addr->sa_family; - if (IS_ENABLED(CONFIG_IPV6) && - dst_addr->sa_family == AF_INET6) { - struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; - struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; - src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; - if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) - id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id; - } else if (dst_addr->sa_family == AF_IB) { - ((struct sockaddr_ib *) src_addr)->sib_pkey = - ((struct sockaddr_ib *) dst_addr)->sib_pkey; - } + struct sockaddr_storage zero_sock = {}; + + if (src_addr && src_addr->sa_family) + return rdma_bind_addr(id, src_addr); + + /* + * When the src_addr is not specified, automatically supply an any addr + */ + zero_sock.ss_family = dst_addr->sa_family; + if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) { + struct sockaddr_in6 *src_addr6 = + (struct sockaddr_in6 *)&zero_sock; + struct sockaddr_in6 *dst_addr6 = + (struct sockaddr_in6 *)dst_addr; + + src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; + if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) + id->route.addr.dev_addr.bound_dev_if = + dst_addr6->sin6_scope_id; + } else if (dst_addr->sa_family == AF_IB) { + ((struct sockaddr_ib *)&zero_sock)->sib_pkey = + ((struct sockaddr_ib *)dst_addr)->sib_pkey; } - return rdma_bind_addr(id, src_addr); + return rdma_bind_addr(id, (struct sockaddr *)&zero_sock); } /* @@ -3617,7 +3807,7 @@ static int cma_alloc_any_port(enum rdma_ucm_port_space ps, inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; - rover = prandom_u32() % remaining + low; + rover = prandom_u32_max(remaining) + low; retry: if (last_used_port != rover) { struct rdma_bind_list *bind_list; @@ -4033,8 +4223,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, memset(&req, 0, sizeof req); offset = cma_user_data_offset(id_priv); - req.private_data_len = offset + conn_param->private_data_len; - if (req.private_data_len < conn_param->private_data_len) + if (check_add_overflow(offset, conn_param->private_data_len, &req.private_data_len)) return -EINVAL; if (req.private_data_len) { @@ -4093,8 +4282,7 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, memset(&req, 0, sizeof req); offset = cma_user_data_offset(id_priv); - req.private_data_len = offset + conn_param->private_data_len; - if (req.private_data_len < conn_param->private_data_len) + if (check_add_overflow(offset, conn_param->private_data_len, &req.private_data_len)) return -EINVAL; if (req.private_data_len) { @@ -4125,7 +4313,9 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, } req.primary_path = &route->path_rec[0]; - if (route->num_paths == 2) + req.primary_path_inbound = route->path_rec_inbound; + req.primary_path_outbound = route->path_rec_outbound; + if (route->num_pri_alt_paths == 2) req.alternate_path = &route->path_rec[1]; req.ppath_sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr; @@ -4908,10 +5098,87 @@ out: return ret; } +static void cma_netevent_work_handler(struct work_struct *_work) +{ + struct rdma_id_private *id_priv = + container_of(_work, struct rdma_id_private, id.net_work); + struct rdma_cm_event event = {}; + + mutex_lock(&id_priv->handler_mutex); + + if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING || + READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL) + goto out_unlock; + + event.event = RDMA_CM_EVENT_UNREACHABLE; + event.status = -ETIMEDOUT; + + if (cma_cm_event_handler(id_priv, &event)) { + __acquire(&id_priv->handler_mutex); + id_priv->cm_id.ib = NULL; + cma_id_put(id_priv); + destroy_id_handler_unlock(id_priv); + return; + } + +out_unlock: + mutex_unlock(&id_priv->handler_mutex); + cma_id_put(id_priv); +} + +static int cma_netevent_callback(struct notifier_block *self, + unsigned long event, void *ctx) +{ + struct id_table_entry *ips_node = NULL; + struct rdma_id_private *current_id; + struct neighbour *neigh = ctx; + unsigned long flags; + + if (event != NETEVENT_NEIGH_UPDATE) + return NOTIFY_DONE; + + spin_lock_irqsave(&id_table_lock, flags); + if (neigh->tbl->family == AF_INET6) { + struct sockaddr_in6 neigh_sock_6; + + neigh_sock_6.sin6_family = AF_INET6; + neigh_sock_6.sin6_addr = *(struct in6_addr *)neigh->primary_key; + ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex, + (struct sockaddr *)&neigh_sock_6); + } else if (neigh->tbl->family == AF_INET) { + struct sockaddr_in neigh_sock_4; + + neigh_sock_4.sin_family = AF_INET; + neigh_sock_4.sin_addr.s_addr = *(__be32 *)(neigh->primary_key); + ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex, + (struct sockaddr *)&neigh_sock_4); + } else + goto out; + + if (!ips_node) + goto out; + + list_for_each_entry(current_id, &ips_node->id_list, id_list_entry) { + if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr, + neigh->ha, ETH_ALEN)) + continue; + INIT_WORK(¤t_id->id.net_work, cma_netevent_work_handler); + cma_id_get(current_id); + queue_work(cma_wq, ¤t_id->id.net_work); + } +out: + spin_unlock_irqrestore(&id_table_lock, flags); + return NOTIFY_DONE; +} + static struct notifier_block cma_nb = { .notifier_call = cma_netdev_callback }; +static struct notifier_block cma_netevent_cb = { + .notifier_call = cma_netevent_callback +}; + static void cma_send_device_removal_put(struct rdma_id_private *id_priv) { struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL }; @@ -5134,6 +5401,7 @@ static int __init cma_init(void) ib_sa_register_client(&sa_client); register_netdevice_notifier(&cma_nb); + register_netevent_notifier(&cma_netevent_cb); ret = ib_register_client(&cma_client); if (ret) @@ -5148,6 +5416,7 @@ static int __init cma_init(void) err_ib: ib_unregister_client(&cma_client); err: + unregister_netevent_notifier(&cma_netevent_cb); unregister_netdevice_notifier(&cma_nb); ib_sa_unregister_client(&sa_client); unregister_pernet_subsys(&cma_pernet_operations); @@ -5160,6 +5429,7 @@ static void __exit cma_cleanup(void) { cma_configfs_exit(); ib_unregister_client(&cma_client); + unregister_netevent_notifier(&cma_netevent_cb); unregister_netdevice_notifier(&cma_nb); ib_sa_unregister_client(&sa_client); unregister_pernet_subsys(&cma_pernet_operations); diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 9ac16e0db761..7b68b3ea979f 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -30,7 +30,6 @@ * SOFTWARE. */ -#include <linux/module.h> #include <linux/configfs.h> #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> @@ -293,7 +292,7 @@ static struct config_group *make_cma_dev(struct config_group *group, goto fail; } - strlcpy(cma_dev_group->name, name, sizeof(cma_dev_group->name)); + strscpy(cma_dev_group->name, name, sizeof(cma_dev_group->name)); config_group_init_type_name(&cma_dev_group->ports_group, "ports", &cma_ports_group_type); diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h index 757a0ef79872..b7354c94cf1b 100644 --- a/drivers/infiniband/core/cma_priv.h +++ b/drivers/infiniband/core/cma_priv.h @@ -64,6 +64,7 @@ struct rdma_id_private { struct list_head listen_item; struct list_head listen_list; }; + struct list_head id_list_entry; struct cma_device *cma_dev; struct list_head mc_list; diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index 433b426729d4..a70876a0a231 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -2,7 +2,6 @@ /* * Copyright (c) 2015 HGST, a Western Digital Company. */ -#include <linux/module.h> #include <linux/err.h> #include <linux/slab.h> #include <rdma/ib_verbs.h> diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 22a4adda7981..b69e2c4e4d2a 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -58,6 +58,7 @@ struct workqueue_struct *ib_comp_wq; struct workqueue_struct *ib_comp_unbound_wq; struct workqueue_struct *ib_wq; EXPORT_SYMBOL_GPL(ib_wq); +static struct workqueue_struct *ib_unreg_wq; /* * Each of the three rwsem locks (devices, clients, client_data) protects the @@ -421,7 +422,7 @@ int ib_device_rename(struct ib_device *ibdev, const char *name) return ret; } - strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX); + strscpy(ibdev->name, name, IB_DEVICE_NAME_MAX); ret = rename_compat_devs(ibdev); downgrade_write(&devices_rwsem); @@ -1216,7 +1217,7 @@ static int assign_name(struct ib_device *device, const char *name) ret = -ENFILE; goto out; } - strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX); + strscpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX); ret = xa_alloc_cyclic(&devices, &device->index, device, xa_limit_31b, &last_id, GFP_KERNEL); @@ -1602,7 +1603,7 @@ void ib_unregister_device_queued(struct ib_device *ib_dev) WARN_ON(!refcount_read(&ib_dev->refcount)); WARN_ON(!ib_dev->ops.dealloc_driver); get_device(&ib_dev->dev); - if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work)) + if (!queue_work(ib_unreg_wq, &ib_dev->unregistration_work)) put_device(&ib_dev->dev); } EXPORT_SYMBOL(ib_unregister_device_queued); @@ -2461,7 +2462,8 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid, ++i) { ret = rdma_query_gid(device, port, i, &tmp_gid); if (ret) - return ret; + continue; + if (!memcmp(&tmp_gid, gid, sizeof *gid)) { *port_num = port; if (index) @@ -2612,7 +2614,6 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, create_counters); SET_DEVICE_OP(dev_ops, create_cq); SET_DEVICE_OP(dev_ops, create_flow); - SET_DEVICE_OP(dev_ops, create_flow_action_esp); SET_DEVICE_OP(dev_ops, create_qp); SET_DEVICE_OP(dev_ops, create_rwq_ind_table); SET_DEVICE_OP(dev_ops, create_srq); @@ -2675,7 +2676,6 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, modify_ah); SET_DEVICE_OP(dev_ops, modify_cq); SET_DEVICE_OP(dev_ops, modify_device); - SET_DEVICE_OP(dev_ops, modify_flow_action_esp); SET_DEVICE_OP(dev_ops, modify_hw_stat); SET_DEVICE_OP(dev_ops, modify_port); SET_DEVICE_OP(dev_ops, modify_qp); @@ -2752,27 +2752,28 @@ static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { static int __init ib_core_init(void) { - int ret; + int ret = -ENOMEM; ib_wq = alloc_workqueue("infiniband", 0, 0); if (!ib_wq) return -ENOMEM; + ib_unreg_wq = alloc_workqueue("ib-unreg-wq", WQ_UNBOUND, + WQ_UNBOUND_MAX_ACTIVE); + if (!ib_unreg_wq) + goto err; + ib_comp_wq = alloc_workqueue("ib-comp-wq", WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0); - if (!ib_comp_wq) { - ret = -ENOMEM; - goto err; - } + if (!ib_comp_wq) + goto err_unbound; ib_comp_unbound_wq = alloc_workqueue("ib-comp-unb-wq", WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE); - if (!ib_comp_unbound_wq) { - ret = -ENOMEM; + if (!ib_comp_unbound_wq) goto err_comp; - } ret = class_register(&ib_class); if (ret) { @@ -2814,10 +2815,18 @@ static int __init ib_core_init(void) nldev_init(); rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table); - roce_gid_mgmt_init(); + ret = roce_gid_mgmt_init(); + if (ret) { + pr_warn("Couldn't init RoCE GID management\n"); + goto err_parent; + } return 0; +err_parent: + rdma_nl_unregister(RDMA_NL_LS); + nldev_exit(); + unregister_pernet_device(&rdma_dev_net_ops); err_compat: unregister_blocking_lsm_notifier(&ibdev_lsm_nb); err_sa: @@ -2832,6 +2841,8 @@ err_comp_unbound: destroy_workqueue(ib_comp_unbound_wq); err_comp: destroy_workqueue(ib_comp_wq); +err_unbound: + destroy_workqueue(ib_unreg_wq); err: destroy_workqueue(ib_wq); return ret; @@ -2853,7 +2864,7 @@ static void __exit ib_core_cleanup(void) destroy_workqueue(ib_comp_wq); /* Make sure that any pending umem accounting work is done. */ destroy_workqueue(ib_wq); - flush_workqueue(system_unbound_wq); + destroy_workqueue(ib_unreg_wq); WARN_ON(!xa_empty(&clients)); WARN_ON(!xa_empty(&devices)); } diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h index 3a42ad43056e..d6fc8402158a 100644 --- a/drivers/infiniband/core/iwpm_util.h +++ b/drivers/infiniband/core/iwpm_util.h @@ -33,7 +33,6 @@ #ifndef _IWPM_UTIL_H #define _IWPM_UTIL_H -#include <linux/module.h> #include <linux/io.h> #include <linux/in.h> #include <linux/in6.h> diff --git a/drivers/infiniband/core/lag.c b/drivers/infiniband/core/lag.c index 7063e41eaf26..c77d7d2559a1 100644 --- a/drivers/infiniband/core/lag.c +++ b/drivers/infiniband/core/lag.c @@ -7,8 +7,7 @@ #include <rdma/ib_cache.h> #include <rdma/lag.h> -static struct sk_buff *rdma_build_skb(struct ib_device *device, - struct net_device *netdev, +static struct sk_buff *rdma_build_skb(struct net_device *netdev, struct rdma_ah_attr *ah_attr, gfp_t flags) { @@ -86,7 +85,7 @@ static struct net_device *rdma_get_xmit_slave_udp(struct ib_device *device, struct net_device *slave; struct sk_buff *skb; - skb = rdma_build_skb(device, master, ah_attr, flags); + skb = rdma_build_skb(master, ah_attr, flags); if (!skb) return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index f5aacaf7fb8e..12dc97067ed2 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1739,7 +1739,7 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, if (!device) return -EINVAL; - if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) { + if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) { ib_device_put(device); return -EINVAL; } @@ -1951,9 +1951,10 @@ static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[], u32 port) { struct rdma_hw_stats *stats; - int rem, i, index, ret = 0; struct nlattr *entry_attr; unsigned long *target; + int rem, i, ret = 0; + u32 index; stats = ib_get_hw_stats_port(device, port); if (!stats) @@ -2536,7 +2537,7 @@ void __init nldev_init(void) rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table); } -void __exit nldev_exit(void) +void nldev_exit(void) { rdma_nl_unregister(RDMA_NL_NLDEV); } diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 94d83b665a2f..29b1ab1d5f93 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -68,7 +68,7 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, * In exclusive access mode, we check that the counter is zero (nobody * claimed this object) and we set it to -1. Releasing a shared access * lock is done simply by decreasing the counter. As for exclusive - * access locks, since only a single one of them is is allowed + * access locks, since only a single one of them is allowed * concurrently, setting the counter to zero is enough for releasing * this lock. */ diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index 68197e576433..e958c43dd28f 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -250,7 +250,7 @@ static bool upper_device_filter(struct ib_device *ib_dev, u32 port, /** * is_upper_ndev_bond_master_filter - Check if a given netdevice - * is bond master device of netdevice of the the RDMA device of port. + * is bond master device of netdevice of the RDMA device of port. * @ib_dev: IB device to check * @port: Port to consider for adding default GID * @rdma_ndev: Pointer to rdma netdevice diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 5a3bd41b331c..8367974b7998 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -2,6 +2,7 @@ /* * Copyright (c) 2016 HGST, a Western Digital Company. */ +#include <linux/memremap.h> #include <linux/moduleparam.h> #include <linux/slab.h> #include <linux/pci-p2pdma.h> @@ -273,33 +274,6 @@ static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp, return 1; } -static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg, - u32 sg_cnt, enum dma_data_direction dir) -{ - if (is_pci_p2pdma_page(sg_page(sg))) - pci_p2pdma_unmap_sg(dev->dma_device, sg, sg_cnt, dir); - else - ib_dma_unmap_sg(dev, sg, sg_cnt, dir); -} - -static int rdma_rw_map_sgtable(struct ib_device *dev, struct sg_table *sgt, - enum dma_data_direction dir) -{ - int nents; - - if (is_pci_p2pdma_page(sg_page(sgt->sgl))) { - if (WARN_ON_ONCE(ib_uses_virt_dma(dev))) - return 0; - nents = pci_p2pdma_map_sg(dev->dma_device, sgt->sgl, - sgt->orig_nents, dir); - if (!nents) - return -EIO; - sgt->nents = nents; - return 0; - } - return ib_dma_map_sgtable_attrs(dev, sgt, dir, 0); -} - /** * rdma_rw_ctx_init - initialize a RDMA READ/WRITE context * @ctx: context to initialize @@ -326,7 +300,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, }; int ret; - ret = rdma_rw_map_sgtable(dev, &sgt, dir); + ret = ib_dma_map_sgtable_attrs(dev, &sgt, dir, 0); if (ret) return ret; sg_cnt = sgt.nents; @@ -365,7 +339,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, return ret; out_unmap_sg: - rdma_rw_unmap_sg(dev, sgt.sgl, sgt.orig_nents, dir); + ib_dma_unmap_sgtable_attrs(dev, &sgt, dir, 0); return ret; } EXPORT_SYMBOL(rdma_rw_ctx_init); @@ -413,12 +387,12 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, return -EINVAL; } - ret = rdma_rw_map_sgtable(dev, &sgt, dir); + ret = ib_dma_map_sgtable_attrs(dev, &sgt, dir, 0); if (ret) return ret; if (prot_sg_cnt) { - ret = rdma_rw_map_sgtable(dev, &prot_sgt, dir); + ret = ib_dma_map_sgtable_attrs(dev, &prot_sgt, dir, 0); if (ret) goto out_unmap_sg; } @@ -485,9 +459,9 @@ out_free_ctx: kfree(ctx->reg); out_unmap_prot_sg: if (prot_sgt.nents) - rdma_rw_unmap_sg(dev, prot_sgt.sgl, prot_sgt.orig_nents, dir); + ib_dma_unmap_sgtable_attrs(dev, &prot_sgt, dir, 0); out_unmap_sg: - rdma_rw_unmap_sg(dev, sgt.sgl, sgt.orig_nents, dir); + ib_dma_unmap_sgtable_attrs(dev, &sgt, dir, 0); return ret; } EXPORT_SYMBOL(rdma_rw_ctx_signature_init); @@ -620,7 +594,7 @@ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, break; } - rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir); + ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); } EXPORT_SYMBOL(rdma_rw_ctx_destroy); @@ -648,8 +622,8 @@ void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp, kfree(ctx->reg); if (prot_sg_cnt) - rdma_rw_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir); - rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir); + ib_dma_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir); + ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); } EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 74ecd7456a11..0de83d9a4985 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -32,7 +32,6 @@ * SOFTWARE. */ -#include <linux/module.h> #include <linux/init.h> #include <linux/err.h> #include <linux/random.h> @@ -51,6 +50,7 @@ #include <rdma/ib_marshall.h> #include <rdma/ib_addr.h> #include <rdma/opa_addr.h> +#include <rdma/rdma_cm.h> #include "sa.h" #include "core_priv.h" @@ -105,7 +105,8 @@ struct ib_sa_device { }; struct ib_sa_query { - void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *); + void (*callback)(struct ib_sa_query *sa_query, int status, + int num_prs, struct ib_sa_mad *mad); void (*release)(struct ib_sa_query *); struct ib_sa_client *client; struct ib_sa_port *port; @@ -117,6 +118,12 @@ struct ib_sa_query { u32 seq; /* Local svc request sequence number */ unsigned long timeout; /* Local svc timeout */ u8 path_use; /* How will the pathrecord be used */ + + /* A separate buffer to save pathrecords of a response, as in cases + * like IB/netlink, mulptiple pathrecords are supported, so that + * mad->data is not large enough to hold them + */ + void *resp_pr_data; }; #define IB_SA_ENABLE_LOCAL_SERVICE 0x00000001 @@ -124,7 +131,8 @@ struct ib_sa_query { #define IB_SA_QUERY_OPA 0x00000004 struct ib_sa_path_query { - void (*callback)(int, struct sa_path_rec *, void *); + void (*callback)(int status, struct sa_path_rec *rec, + int num_paths, void *context); void *context; struct ib_sa_query sa_query; struct sa_path_rec *conv_pr; @@ -713,7 +721,7 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb, if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) && sa_rec->reversible != 0) - query->path_use = LS_RESOLVE_PATH_USE_GMP; + query->path_use = LS_RESOLVE_PATH_USE_ALL; else query->path_use = LS_RESOLVE_PATH_USE_UNIDIRECTIONAL; header->path_use = query->path_use; @@ -866,50 +874,81 @@ static void send_handler(struct ib_mad_agent *agent, static void ib_nl_process_good_resolve_rsp(struct ib_sa_query *query, const struct nlmsghdr *nlh) { + struct ib_path_rec_data *srec, *drec; + struct ib_sa_path_query *path_query; struct ib_mad_send_wc mad_send_wc; - struct ib_sa_mad *mad = NULL; const struct nlattr *head, *curr; - struct ib_path_rec_data *rec; - int len, rem; + struct ib_sa_mad *mad = NULL; + int len, rem, num_prs = 0; u32 mask = 0; int status = -EIO; - if (query->callback) { - head = (const struct nlattr *) nlmsg_data(nlh); - len = nlmsg_len(nlh); - switch (query->path_use) { - case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL: - mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND; - break; + if (!query->callback) + goto out; - case LS_RESOLVE_PATH_USE_ALL: - case LS_RESOLVE_PATH_USE_GMP: - default: - mask = IB_PATH_PRIMARY | IB_PATH_GMP | - IB_PATH_BIDIRECTIONAL; - break; + path_query = container_of(query, struct ib_sa_path_query, sa_query); + mad = query->mad_buf->mad; + if (!path_query->conv_pr && + (be16_to_cpu(mad->mad_hdr.attr_id) == IB_SA_ATTR_PATH_REC)) { + /* Need a larger buffer for possible multiple PRs */ + query->resp_pr_data = kvcalloc(RDMA_PRIMARY_PATH_MAX_REC_NUM, + sizeof(*drec), GFP_KERNEL); + if (!query->resp_pr_data) { + query->callback(query, -ENOMEM, 0, NULL); + return; } - nla_for_each_attr(curr, head, len, rem) { - if (curr->nla_type == LS_NLA_TYPE_PATH_RECORD) { - rec = nla_data(curr); - /* - * Get the first one. In the future, we may - * need to get up to 6 pathrecords. - */ - if ((rec->flags & mask) == mask) { - mad = query->mad_buf->mad; - mad->mad_hdr.method |= - IB_MGMT_METHOD_RESP; - memcpy(mad->data, rec->path_rec, - sizeof(rec->path_rec)); - status = 0; - break; - } - } + } + + head = (const struct nlattr *) nlmsg_data(nlh); + len = nlmsg_len(nlh); + switch (query->path_use) { + case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL: + mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND; + break; + + case LS_RESOLVE_PATH_USE_ALL: + mask = IB_PATH_PRIMARY; + break; + + case LS_RESOLVE_PATH_USE_GMP: + default: + mask = IB_PATH_PRIMARY | IB_PATH_GMP | + IB_PATH_BIDIRECTIONAL; + break; + } + + drec = (struct ib_path_rec_data *)query->resp_pr_data; + nla_for_each_attr(curr, head, len, rem) { + if (curr->nla_type != LS_NLA_TYPE_PATH_RECORD) + continue; + + srec = nla_data(curr); + if ((srec->flags & mask) != mask) + continue; + + status = 0; + if (!drec) { + memcpy(mad->data, srec->path_rec, + sizeof(srec->path_rec)); + num_prs = 1; + break; } - query->callback(query, status, mad); + + memcpy(drec, srec, sizeof(*drec)); + drec++; + num_prs++; + if (num_prs >= RDMA_PRIMARY_PATH_MAX_REC_NUM) + break; } + if (!status) + mad->mad_hdr.method |= IB_MGMT_METHOD_RESP; + + query->callback(query, status, num_prs, mad); + kvfree(query->resp_pr_data); + query->resp_pr_data = NULL; + +out: mad_send_wc.send_buf = query->mad_buf; mad_send_wc.status = IB_WC_SUCCESS; send_handler(query->mad_buf->mad_agent, &mad_send_wc); @@ -1035,10 +1074,9 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb, struct netlink_ext_ack *extack) { unsigned long flags; - struct ib_sa_query *query; + struct ib_sa_query *query = NULL, *iter; struct ib_mad_send_buf *send_buf; struct ib_mad_send_wc mad_send_wc; - int found = 0; int ret; if ((nlh->nlmsg_flags & NLM_F_REQUEST) || @@ -1046,20 +1084,21 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb, return -EPERM; spin_lock_irqsave(&ib_nl_request_lock, flags); - list_for_each_entry(query, &ib_nl_request_list, list) { + list_for_each_entry(iter, &ib_nl_request_list, list) { /* * If the query is cancelled, let the timeout routine * take care of it. */ - if (nlh->nlmsg_seq == query->seq) { - found = !ib_sa_query_cancelled(query); - if (found) - list_del(&query->list); + if (nlh->nlmsg_seq == iter->seq) { + if (!ib_sa_query_cancelled(iter)) { + list_del(&iter->list); + query = iter; + } break; } } - if (!found) { + if (!query) { spin_unlock_irqrestore(&ib_nl_request_lock, flags); goto resp_out; } @@ -1412,41 +1451,90 @@ static int opa_pr_query_possible(struct ib_sa_client *client, return PR_IB_SUPPORTED; } +static void ib_sa_pr_callback_single(struct ib_sa_path_query *query, + int status, struct ib_sa_mad *mad) +{ + struct sa_path_rec rec = {}; + + ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), + mad->data, &rec); + rec.rec_type = SA_PATH_REC_TYPE_IB; + sa_path_set_dmac_zero(&rec); + + if (query->conv_pr) { + struct sa_path_rec opa; + + memset(&opa, 0, sizeof(struct sa_path_rec)); + sa_convert_path_ib_to_opa(&opa, &rec); + query->callback(status, &opa, 1, query->context); + } else { + query->callback(status, &rec, 1, query->context); + } +} + +/** + * ib_sa_pr_callback_multiple() - Parse path records then do callback. + * + * In a multiple-PR case the PRs are saved in "query->resp_pr_data" + * (instead of"mad->data") and with "ib_path_rec_data" structure format, + * so that rec->flags can be set to indicate the type of PR. + * This is valid only in IB fabric. + */ +static void ib_sa_pr_callback_multiple(struct ib_sa_path_query *query, + int status, int num_prs, + struct ib_path_rec_data *rec_data) +{ + struct sa_path_rec *rec; + int i; + + rec = kvcalloc(num_prs, sizeof(*rec), GFP_KERNEL); + if (!rec) { + query->callback(-ENOMEM, NULL, 0, query->context); + return; + } + + for (i = 0; i < num_prs; i++) { + ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), + rec_data[i].path_rec, rec + i); + rec[i].rec_type = SA_PATH_REC_TYPE_IB; + sa_path_set_dmac_zero(rec + i); + rec[i].flags = rec_data[i].flags; + } + + query->callback(status, rec, num_prs, query->context); + kvfree(rec); +} + static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, - int status, + int status, int num_prs, struct ib_sa_mad *mad) { struct ib_sa_path_query *query = container_of(sa_query, struct ib_sa_path_query, sa_query); + struct sa_path_rec rec; - if (mad) { - struct sa_path_rec rec; - - if (sa_query->flags & IB_SA_QUERY_OPA) { - ib_unpack(opa_path_rec_table, - ARRAY_SIZE(opa_path_rec_table), - mad->data, &rec); - rec.rec_type = SA_PATH_REC_TYPE_OPA; - query->callback(status, &rec, query->context); - } else { - ib_unpack(path_rec_table, - ARRAY_SIZE(path_rec_table), - mad->data, &rec); - rec.rec_type = SA_PATH_REC_TYPE_IB; - sa_path_set_dmac_zero(&rec); - - if (query->conv_pr) { - struct sa_path_rec opa; + if (!mad || !num_prs) { + query->callback(status, NULL, 0, query->context); + return; + } - memset(&opa, 0, sizeof(struct sa_path_rec)); - sa_convert_path_ib_to_opa(&opa, &rec); - query->callback(status, &opa, query->context); - } else { - query->callback(status, &rec, query->context); - } + if (sa_query->flags & IB_SA_QUERY_OPA) { + if (num_prs != 1) { + query->callback(-EINVAL, NULL, 0, query->context); + return; } - } else - query->callback(status, NULL, query->context); + + ib_unpack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table), + mad->data, &rec); + rec.rec_type = SA_PATH_REC_TYPE_OPA; + query->callback(status, &rec, num_prs, query->context); + } else { + if (!sa_query->resp_pr_data) + ib_sa_pr_callback_single(query, status, mad); + else + ib_sa_pr_callback_multiple(query, status, num_prs, + sa_query->resp_pr_data); + } } static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) @@ -1490,7 +1578,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, unsigned long timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct sa_path_rec *resp, - void *context), + int num_paths, void *context), void *context, struct ib_sa_query **sa_query) { @@ -1589,7 +1677,7 @@ err1: EXPORT_SYMBOL(ib_sa_path_rec_get); static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query, - int status, + int status, int num_prs, struct ib_sa_mad *mad) { struct ib_sa_mcmember_query *query = @@ -1681,7 +1769,7 @@ err1: /* Support GuidInfoRecord */ static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query, - int status, + int status, int num_paths, struct ib_sa_mad *mad) { struct ib_sa_guidinfo_query *query = @@ -1791,7 +1879,7 @@ static void ib_classportinfo_cb(void *context) } static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query, - int status, + int status, int num_prs, struct ib_sa_mad *mad) { unsigned long flags; @@ -1967,13 +2055,13 @@ static void send_handler(struct ib_mad_agent *agent, /* No callback -- already got recv */ break; case IB_WC_RESP_TIMEOUT_ERR: - query->callback(query, -ETIMEDOUT, NULL); + query->callback(query, -ETIMEDOUT, 0, NULL); break; case IB_WC_WR_FLUSH_ERR: - query->callback(query, -EINTR, NULL); + query->callback(query, -EINTR, 0, NULL); break; default: - query->callback(query, -EIO, NULL); + query->callback(query, -EIO, 0, NULL); break; } @@ -2001,10 +2089,10 @@ static void recv_handler(struct ib_mad_agent *mad_agent, if (mad_recv_wc->wc->status == IB_WC_SUCCESS) query->callback(query, mad_recv_wc->recv_buf.mad->mad_hdr.status ? - -EINVAL : 0, + -EINVAL : 0, 1, (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad); else - query->callback(query, -EIO, NULL); + query->callback(query, -EIO, 0, NULL); } ib_free_recv_mad(mad_recv_wc); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index a3f84b50c46a..84c53bd2a52d 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -433,6 +433,7 @@ static struct attribute *port_default_attrs[] = { &ib_port_attr_link_layer.attr, NULL }; +ATTRIBUTE_GROUPS(port_default); static ssize_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf) { @@ -774,7 +775,7 @@ static void ib_port_gid_attr_release(struct kobject *kobj) static struct kobj_type port_type = { .release = ib_port_release, .sysfs_ops = &port_sysfs_ops, - .default_attrs = port_default_attrs + .default_groups = port_default_groups, }; static struct kobj_type gid_attr_type = { diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 2b72c4fa9550..bf42650f125b 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -95,6 +95,7 @@ struct ucma_context { u64 uid; struct list_head list; + struct list_head mc_list; struct work_struct close_work; }; @@ -105,6 +106,7 @@ struct ucma_multicast { u64 uid; u8 join_state; + struct list_head list; struct sockaddr_storage addr; }; @@ -198,6 +200,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) INIT_WORK(&ctx->close_work, ucma_close_id); init_completion(&ctx->comp); + INIT_LIST_HEAD(&ctx->mc_list); /* So list_del() will work if we don't do ucma_finish_ctx() */ INIT_LIST_HEAD(&ctx->list); ctx->file = file; @@ -484,19 +487,19 @@ err1: static void ucma_cleanup_multicast(struct ucma_context *ctx) { - struct ucma_multicast *mc; - unsigned long index; + struct ucma_multicast *mc, *tmp; - xa_for_each(&multicast_table, index, mc) { - if (mc->ctx != ctx) - continue; + xa_lock(&multicast_table); + list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) { + list_del(&mc->list); /* * At this point mc->ctx->ref is 0 so the mc cannot leave the * lock on the reader and this is enough serialization */ - xa_erase(&multicast_table, index); + __xa_erase(&multicast_table, mc->id); kfree(mc); } + xa_unlock(&multicast_table); } static void ucma_cleanup_mc_events(struct ucma_multicast *mc) @@ -751,8 +754,8 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, { struct rdma_dev_addr *dev_addr; - resp->num_paths = route->num_paths; - switch (route->num_paths) { + resp->num_paths = route->num_pri_alt_paths; + switch (route->num_pri_alt_paths) { case 0: dev_addr = &route->addr.dev_addr; rdma_addr_get_dgid(dev_addr, @@ -778,8 +781,8 @@ static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, struct rdma_route *route) { - resp->num_paths = route->num_paths; - switch (route->num_paths) { + resp->num_paths = route->num_pri_alt_paths; + switch (route->num_pri_alt_paths) { case 0: rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr, (union ib_gid *)&resp->ib_route[0].dgid); @@ -918,7 +921,7 @@ static ssize_t ucma_query_path(struct ucma_context *ctx, if (!resp) return -ENOMEM; - resp->num_paths = ctx->cm_id->route.num_paths; + resp->num_paths = ctx->cm_id->route.num_pri_alt_paths; for (i = 0, out_len -= sizeof(*resp); i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data); i++, out_len -= sizeof(struct ib_path_rec_data)) { @@ -1469,12 +1472,16 @@ static ssize_t ucma_process_join(struct ucma_file *file, mc->uid = cmd->uid; memcpy(&mc->addr, addr, cmd->addr_size); - if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, + xa_lock(&multicast_table); + if (__xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, GFP_KERNEL)) { ret = -ENOMEM; goto err_free_mc; } + list_add_tail(&mc->list, &ctx->mc_list); + xa_unlock(&multicast_table); + mutex_lock(&ctx->mutex); ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, join_state, mc); @@ -1500,8 +1507,11 @@ err_leave_multicast: mutex_unlock(&ctx->mutex); ucma_cleanup_mc_events(mc); err_xa_erase: - xa_erase(&multicast_table, mc->id); + xa_lock(&multicast_table); + list_del(&mc->list); + __xa_erase(&multicast_table, mc->id); err_free_mc: + xa_unlock(&multicast_table); kfree(mc); err_put_ctx: ucma_put_ctx(ctx); @@ -1569,15 +1579,17 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file, mc = ERR_PTR(-EINVAL); else if (!refcount_inc_not_zero(&mc->ctx->ref)) mc = ERR_PTR(-ENXIO); - else - __xa_erase(&multicast_table, mc->id); - xa_unlock(&multicast_table); if (IS_ERR(mc)) { + xa_unlock(&multicast_table); ret = PTR_ERR(mc); goto out; } + list_del(&mc->list); + __xa_erase(&multicast_table, mc->id); + xa_unlock(&multicast_table); + mutex_lock(&mc->ctx->mutex); rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr); mutex_unlock(&mc->ctx->mutex); diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c index f0760741f281..04c04e6d24c3 100644 --- a/drivers/infiniband/core/umem_dmabuf.c +++ b/drivers/infiniband/core/umem_dmabuf.c @@ -16,9 +16,9 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) { struct sg_table *sgt; struct scatterlist *sg; - struct dma_fence *fence; unsigned long start, end, cur = 0; unsigned int nmap = 0; + long ret; int i; dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); @@ -68,10 +68,13 @@ wait_fence: * may be not up-to-date. Wait for the exporter to finish * the migration. */ - fence = dma_resv_excl_fence(umem_dmabuf->attach->dmabuf->resv); - if (fence) - return dma_fence_wait(fence, false); - + ret = dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv, + DMA_RESV_USAGE_KERNEL, + false, MAX_SCHEDULE_TIMEOUT); + if (ret < 0) + return ret; + if (ret == 0) + return -ETIMEDOUT; return 0; } EXPORT_SYMBOL(ib_umem_dmabuf_map_pages); diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 7a47343d11f9..e9fa22d31c23 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -43,8 +43,6 @@ #include <linux/hmm.h> #include <linux/pagemap.h> -#include <rdma/ib_verbs.h> -#include <rdma/ib_umem.h> #include <rdma/ib_umem_odp.h> #include "uverbs.h" @@ -227,7 +225,6 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_device *device, const struct mmu_interval_notifier_ops *ops) { struct ib_umem_odp *umem_odp; - struct mm_struct *mm; int ret; if (WARN_ON_ONCE(!(access & IB_ACCESS_ON_DEMAND))) @@ -241,7 +238,7 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_device *device, umem_odp->umem.length = size; umem_odp->umem.address = addr; umem_odp->umem.writable = ib_access_writable(access); - umem_odp->umem.owning_mm = mm = current->mm; + umem_odp->umem.owning_mm = current->mm; umem_odp->notifier.ops = ops; umem_odp->page_shift = PAGE_SHIFT; @@ -456,14 +453,14 @@ retry: break; } } - /* upon sucesss lock should stay on hold for the callee */ + /* upon success lock should stay on hold for the callee */ if (!ret) ret = dma_index - start_idx; else mutex_unlock(&umem_odp->umem_mutex); out_put_mm: - mmput(owning_mm); + mmput_async(owning_mm); out_put_task: if (owning_process) put_task_struct(owning_process); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index d1345d76d9b1..4796f6a8828c 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -337,7 +337,7 @@ static void copy_query_dev_fields(struct ib_ucontext *ucontext, resp->hw_ver = attr->hw_ver; resp->max_qp = attr->max_qp; resp->max_qp_wr = attr->max_qp_wr; - resp->device_cap_flags = lower_32_bits(attr->device_cap_flags); + resp->device_cap_flags = lower_32_bits(attr->device_cap_flags); resp->max_sge = min(attr->max_send_sge, attr->max_recv_sge); resp->max_sge_rd = attr->max_sge_rd; resp->max_cq = attr->max_cq; @@ -739,6 +739,7 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) mr->uobject = uobj; atomic_inc(&pd->usecnt); mr->iova = cmd.hca_va; + mr->length = cmd.length; rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); rdma_restrack_set_name(&mr->res, NULL); @@ -861,8 +862,10 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) mr->pd = new_pd; atomic_inc(&new_pd->usecnt); } - if (cmd.flags & IB_MR_REREG_TRANS) + if (cmd.flags & IB_MR_REREG_TRANS) { mr->iova = cmd.hca_va; + mr->length = cmd.length; + } } memset(&resp, 0, sizeof(resp)); @@ -1399,7 +1402,6 @@ static int create_qp(struct uverbs_attr_bundle *attrs, attr.sq_sig_type = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; attr.qp_type = cmd->qp_type; - attr.create_flags = 0; attr.cap.max_send_wr = cmd->max_send_wr; attr.cap.max_recv_wr = cmd->max_recv_wr; diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 990f0724acc6..d9799706c58e 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -337,6 +337,14 @@ static int uverbs_process_attr(struct bundle_priv *pbundle, break; + case UVERBS_ATTR_TYPE_RAW_FD: + if (uattr->attr_data.reserved || uattr->len != 0 || + uattr->data_s64 < INT_MIN || uattr->data_s64 > INT_MAX) + return -EINVAL; + /* _uverbs_get_const_signed() is the accessor */ + e->ptr_attr.data = uattr->data_s64; + break; + case UVERBS_ATTR_TYPE_IDRS_ARRAY: return uverbs_process_idrs_array(pbundle, attr_uapi, &e->objs_arr_attr, uattr, diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index b8d715c68ca4..11a080646916 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -66,7 +66,7 @@ void ib_copy_ah_attr_to_user(struct ib_device *device, struct rdma_ah_attr *src = ah_attr; struct rdma_ah_attr conv_ah; - memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved)); + memset(&dst->grh, 0, sizeof(dst->grh)); if ((ah_attr->type == RDMA_AH_ATTR_TYPE_OPA) && (rdma_ah_get_dlid(ah_attr) > be16_to_cpu(IB_LID_PERMISSIVE)) && diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index d42ed7ff223e..0ddcf6da66c4 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -46,385 +46,6 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject, return action->device->ops.destroy_flow_action(action); } -static u64 esp_flags_uverbs_to_verbs(struct uverbs_attr_bundle *attrs, - u32 flags, bool is_modify) -{ - u64 verbs_flags = flags; - - if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ESN)) - verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED; - - if (is_modify && uverbs_attr_is_valid(attrs, - UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS)) - verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS; - - return verbs_flags; -}; - -static int validate_flow_action_esp_keymat_aes_gcm(struct ib_flow_action_attrs_esp_keymats *keymat) -{ - struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm = - &keymat->keymat.aes_gcm; - - if (aes_gcm->iv_algo > IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ) - return -EOPNOTSUPP; - - if (aes_gcm->key_len != 32 && - aes_gcm->key_len != 24 && - aes_gcm->key_len != 16) - return -EINVAL; - - if (aes_gcm->icv_len != 16 && - aes_gcm->icv_len != 8 && - aes_gcm->icv_len != 12) - return -EINVAL; - - return 0; -} - -static int (* const flow_action_esp_keymat_validate[])(struct ib_flow_action_attrs_esp_keymats *keymat) = { - [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = validate_flow_action_esp_keymat_aes_gcm, -}; - -static int flow_action_esp_replay_none(struct ib_flow_action_attrs_esp_replays *replay, - bool is_modify) -{ - /* This is used in order to modify an esp flow action with an enabled - * replay protection to a disabled one. This is only supported via - * modify, as in create verb we can simply drop the REPLAY attribute and - * achieve the same thing. - */ - return is_modify ? 0 : -EINVAL; -} - -static int flow_action_esp_replay_def_ok(struct ib_flow_action_attrs_esp_replays *replay, - bool is_modify) -{ - /* Some replay protections could always be enabled without validating - * anything. - */ - return 0; -} - -static int (* const flow_action_esp_replay_validate[])(struct ib_flow_action_attrs_esp_replays *replay, - bool is_modify) = { - [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = flow_action_esp_replay_none, - [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = flow_action_esp_replay_def_ok, -}; - -static int parse_esp_ip(enum ib_flow_spec_type proto, - const void __user *val_ptr, - size_t len, union ib_flow_spec *out) -{ - int ret; - const struct ib_uverbs_flow_ipv4_filter ipv4 = { - .src_ip = cpu_to_be32(0xffffffffUL), - .dst_ip = cpu_to_be32(0xffffffffUL), - .proto = 0xff, - .tos = 0xff, - .ttl = 0xff, - .flags = 0xff, - }; - const struct ib_uverbs_flow_ipv6_filter ipv6 = { - .src_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, - .dst_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, - .flow_label = cpu_to_be32(0xffffffffUL), - .next_hdr = 0xff, - .traffic_class = 0xff, - .hop_limit = 0xff, - }; - union { - struct ib_uverbs_flow_ipv4_filter ipv4; - struct ib_uverbs_flow_ipv6_filter ipv6; - } user_val = {}; - const void *user_pmask; - size_t val_len; - - /* If the flow IPv4/IPv6 flow specifications are extended, the mask - * should be changed as well. - */ - BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv4_filter, flags) + - sizeof(ipv4.flags) != sizeof(ipv4)); - BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv6_filter, reserved) + - sizeof(ipv6.reserved) != sizeof(ipv6)); - - switch (proto) { - case IB_FLOW_SPEC_IPV4: - if (len > sizeof(user_val.ipv4) && - !ib_is_buffer_cleared(val_ptr + sizeof(user_val.ipv4), - len - sizeof(user_val.ipv4))) - return -EOPNOTSUPP; - - val_len = min_t(size_t, len, sizeof(user_val.ipv4)); - ret = copy_from_user(&user_val.ipv4, val_ptr, - val_len); - if (ret) - return -EFAULT; - - user_pmask = &ipv4; - break; - case IB_FLOW_SPEC_IPV6: - if (len > sizeof(user_val.ipv6) && - !ib_is_buffer_cleared(val_ptr + sizeof(user_val.ipv6), - len - sizeof(user_val.ipv6))) - return -EOPNOTSUPP; - - val_len = min_t(size_t, len, sizeof(user_val.ipv6)); - ret = copy_from_user(&user_val.ipv6, val_ptr, - val_len); - if (ret) - return -EFAULT; - - user_pmask = &ipv6; - break; - default: - return -EOPNOTSUPP; - } - - return ib_uverbs_kern_spec_to_ib_spec_filter(proto, user_pmask, - &user_val, - val_len, out); -} - -static int flow_action_esp_get_encap(struct ib_flow_spec_list *out, - struct uverbs_attr_bundle *attrs) -{ - struct ib_uverbs_flow_action_esp_encap uverbs_encap; - int ret; - - ret = uverbs_copy_from(&uverbs_encap, attrs, - UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP); - if (ret) - return ret; - - /* We currently support only one encap */ - if (uverbs_encap.next_ptr) - return -EOPNOTSUPP; - - if (uverbs_encap.type != IB_FLOW_SPEC_IPV4 && - uverbs_encap.type != IB_FLOW_SPEC_IPV6) - return -EOPNOTSUPP; - - return parse_esp_ip(uverbs_encap.type, - u64_to_user_ptr(uverbs_encap.val_ptr), - uverbs_encap.len, - &out->spec); -} - -struct ib_flow_action_esp_attr { - struct ib_flow_action_attrs_esp hdr; - struct ib_flow_action_attrs_esp_keymats keymat; - struct ib_flow_action_attrs_esp_replays replay; - /* We currently support only one spec */ - struct ib_flow_spec_list encap; -}; - -#define ESP_LAST_SUPPORTED_FLAG IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW -static int parse_flow_action_esp(struct ib_device *ib_dev, - struct uverbs_attr_bundle *attrs, - struct ib_flow_action_esp_attr *esp_attr, - bool is_modify) -{ - struct ib_uverbs_flow_action_esp uverbs_esp = {}; - int ret; - - /* Optional param, if it doesn't exist, we get -ENOENT and skip it */ - ret = uverbs_copy_from(&esp_attr->hdr.esn, attrs, - UVERBS_ATTR_FLOW_ACTION_ESP_ESN); - if (IS_UVERBS_COPY_ERR(ret)) - return ret; - - /* This can be called from FLOW_ACTION_ESP_MODIFY where - * UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS is optional - */ - if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS)) { - ret = uverbs_copy_from_or_zero(&uverbs_esp, attrs, - UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS); - if (ret) - return ret; - - if (uverbs_esp.flags & ~((ESP_LAST_SUPPORTED_FLAG << 1) - 1)) - return -EOPNOTSUPP; - - esp_attr->hdr.spi = uverbs_esp.spi; - esp_attr->hdr.seq = uverbs_esp.seq; - esp_attr->hdr.tfc_pad = uverbs_esp.tfc_pad; - esp_attr->hdr.hard_limit_pkts = uverbs_esp.hard_limit_pkts; - } - esp_attr->hdr.flags = esp_flags_uverbs_to_verbs(attrs, uverbs_esp.flags, - is_modify); - - if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT)) { - esp_attr->keymat.protocol = - uverbs_attr_get_enum_id(attrs, - UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT); - ret = uverbs_copy_from_or_zero(&esp_attr->keymat.keymat, - attrs, - UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT); - if (ret) - return ret; - - ret = flow_action_esp_keymat_validate[esp_attr->keymat.protocol](&esp_attr->keymat); - if (ret) - return ret; - - esp_attr->hdr.keymat = &esp_attr->keymat; - } - - if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY)) { - esp_attr->replay.protocol = - uverbs_attr_get_enum_id(attrs, - UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY); - - ret = uverbs_copy_from_or_zero(&esp_attr->replay.replay, - attrs, - UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY); - if (ret) - return ret; - - ret = flow_action_esp_replay_validate[esp_attr->replay.protocol](&esp_attr->replay, - is_modify); - if (ret) - return ret; - - esp_attr->hdr.replay = &esp_attr->replay; - } - - if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP)) { - ret = flow_action_esp_get_encap(&esp_attr->encap, attrs); - if (ret) - return ret; - - esp_attr->hdr.encap = &esp_attr->encap; - } - - return 0; -} - -static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)( - struct uverbs_attr_bundle *attrs) -{ - struct ib_uobject *uobj = uverbs_attr_get_uobject( - attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE); - struct ib_device *ib_dev = attrs->context->device; - int ret; - struct ib_flow_action *action; - struct ib_flow_action_esp_attr esp_attr = {}; - - if (!ib_dev->ops.create_flow_action_esp) - return -EOPNOTSUPP; - - ret = parse_flow_action_esp(ib_dev, attrs, &esp_attr, false); - if (ret) - return ret; - - /* No need to check as this attribute is marked as MANDATORY */ - action = ib_dev->ops.create_flow_action_esp(ib_dev, &esp_attr.hdr, - attrs); - if (IS_ERR(action)) - return PTR_ERR(action); - - uverbs_flow_action_fill_action(action, uobj, ib_dev, - IB_FLOW_ACTION_ESP); - - return 0; -} - -static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)( - struct uverbs_attr_bundle *attrs) -{ - struct ib_uobject *uobj = uverbs_attr_get_uobject( - attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE); - struct ib_flow_action *action = uobj->object; - int ret; - struct ib_flow_action_esp_attr esp_attr = {}; - - if (!action->device->ops.modify_flow_action_esp) - return -EOPNOTSUPP; - - ret = parse_flow_action_esp(action->device, attrs, &esp_attr, true); - if (ret) - return ret; - - if (action->type != IB_FLOW_ACTION_ESP) - return -EINVAL; - - return action->device->ops.modify_flow_action_esp(action, - &esp_attr.hdr, - attrs); -} - -static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = { - [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_STRUCT( - struct ib_uverbs_flow_action_esp_keymat_aes_gcm, - aes_key), - }, -}; - -static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = { - [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_NO_DATA(), - }, - [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = { - .type = UVERBS_ATTR_TYPE_PTR_IN, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, - size), - }, -}; - -DECLARE_UVERBS_NAMED_METHOD( - UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, - UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_NEW, - UA_MANDATORY), - UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, - hard_limit_pkts), - UA_MANDATORY), - UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, - UVERBS_ATTR_TYPE(__u32), - UA_OPTIONAL), - UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, - uverbs_flow_action_esp_keymat, - UA_MANDATORY), - UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, - uverbs_flow_action_esp_replay, - UA_OPTIONAL), - UVERBS_ATTR_PTR_IN( - UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, - UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap), - UA_OPTIONAL)); - -DECLARE_UVERBS_NAMED_METHOD( - UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY, - UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_ACCESS_WRITE, - UA_MANDATORY), - UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, - UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, - hard_limit_pkts), - UA_OPTIONAL), - UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, - UVERBS_ATTR_TYPE(__u32), - UA_OPTIONAL), - UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, - uverbs_flow_action_esp_keymat, - UA_OPTIONAL), - UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, - uverbs_flow_action_esp_replay, - UA_OPTIONAL), - UVERBS_ATTR_PTR_IN( - UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, - UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap), - UA_OPTIONAL)); - DECLARE_UVERBS_NAMED_METHOD_DESTROY( UVERBS_METHOD_FLOW_ACTION_DESTROY, UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE, @@ -435,9 +56,7 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY( DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_FLOW_ACTION, UVERBS_TYPE_ALLOC_IDR(uverbs_free_flow_action), - &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE), - &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY), - &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)); + &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY)); const struct uapi_definition uverbs_def_obj_flow_action[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED( diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c index 2f2c7646fce1..a02916a3a79c 100644 --- a/drivers/infiniband/core/uverbs_uapi.c +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -447,6 +447,9 @@ static int uapi_finalize(struct uverbs_api *uapi) uapi->num_write_ex = max_write_ex + 1; data = kmalloc_array(uapi->num_write + uapi->num_write_ex, sizeof(*uapi->write_methods), GFP_KERNEL); + if (!data) + return -ENOMEM; + for (i = 0; i != uapi->num_write + uapi->num_write_ex; i++) data[i] = &uapi->notsupp_method; uapi->write_methods = data; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index c18634bec212..26b021f43ba4 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -268,9 +268,6 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, return ERR_PTR(-ENOMEM); pd->device = device; - pd->uobject = NULL; - pd->__internal_mr = NULL; - atomic_set(&pd->usecnt, 0); pd->flags = flags; rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD); @@ -284,7 +281,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, } rdma_restrack_add(&pd->res); - if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) + if (device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY) pd->local_dma_lkey = device->local_dma_lkey; else mr_access_flags |= IB_ACCESS_LOCAL_WRITE; @@ -311,7 +308,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, pd->__internal_mr = mr; - if (!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) + if (!(device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY)) pd->local_dma_lkey = pd->__internal_mr->lkey; if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) @@ -341,11 +338,6 @@ int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata) pd->__internal_mr = NULL; } - /* uverbs manipulates usecnt with proper locking, while the kabi - * requires the caller to guarantee we can't race here. - */ - WARN_ON(atomic_read(&pd->usecnt)); - ret = pd->device->ops.dealloc_pd(pd, udata); if (ret) return ret; @@ -1046,7 +1038,7 @@ struct ib_srq *ib_create_srq_user(struct ib_pd *pd, ret = pd->device->ops.create_srq(srq, srq_init_attr, udata); if (ret) { rdma_restrack_put(&srq->res); - atomic_dec(&srq->pd->usecnt); + atomic_dec(&pd->usecnt); if (srq->srq_type == IB_SRQT_XRC && srq->ext.xrc.xrcd) atomic_dec(&srq->ext.xrc.xrcd->usecnt); if (ib_srq_has_cq(srq->srq_type)) @@ -2139,8 +2131,8 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct ib_mr *mr; if (access_flags & IB_ACCESS_ON_DEMAND) { - if (!(pd->device->attrs.device_cap_flags & - IB_DEVICE_ON_DEMAND_PAGING)) { + if (!(pd->device->attrs.kernel_cap_flags & + IBK_ON_DEMAND_PAGING)) { pr_debug("ODP support not available\n"); return ERR_PTR(-EINVAL); } @@ -2153,9 +2145,12 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return mr; mr->device = pd->device; + mr->type = IB_MR_TYPE_USER; mr->pd = pd; mr->dm = NULL; atomic_inc(&pd->usecnt); + mr->iova = virt_addr; + mr->length = length; rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); rdma_restrack_parent_name(&mr->res, &pd->res); diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index fba0b3be903e..6b3a88046125 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -13,3 +13,4 @@ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/ obj-$(CONFIG_INFINIBAND_HNS) += hns/ obj-$(CONFIG_INFINIBAND_QEDR) += qedr/ obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re/ +obj-$(CONFIG_INFINIBAND_ERDMA) += erdma/ diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 79401e6c6aa9..785c37cae3c0 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -173,7 +173,7 @@ struct bnxt_re_dev { /* Max of 2 lossless traffic class supported per port */ u16 cosq[2]; - /* QP for for handling QP1 packets */ + /* QP for handling QP1 packets */ struct bnxt_re_gsi_context gsi_ctx; struct bnxt_re_stats stats; atomic_t nq_alloc_cnt; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 29cc0d14399a..989edc789633 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -146,13 +146,13 @@ int bnxt_re_query_device(struct ib_device *ibdev, | IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_SHUTDOWN_PORT | IB_DEVICE_SYS_IMAGE_GUID - | IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_RESIZE_MAX_WR | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_N_NOTIFY_CQ | IB_DEVICE_MEM_WINDOW | IB_DEVICE_MEM_WINDOW_TYPE_2B | IB_DEVICE_MEM_MGT_EXTENSIONS; + ib_attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY; ib_attr->max_send_sge = dev_attr->max_qp_sges; ib_attr->max_recv_sge = dev_attr->max_qp_sges; ib_attr->max_sge_rd = dev_attr->max_qp_sges; @@ -262,13 +262,12 @@ void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str) int bnxt_re_query_pkey(struct ib_device *ibdev, u32 port_num, u16 index, u16 *pkey) { - struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); + if (index > 0) + return -EINVAL; - /* Ignore port_num */ + *pkey = IB_DEFAULT_PKEY_FULL; - memset(pkey, 0, sizeof(*pkey)); - return bnxt_qplib_get_pkey(&rdev->qplib_res, - &rdev->qplib_res.pkey_tbl, index, pkey); + return 0; } int bnxt_re_query_gid(struct ib_device *ibdev, u32 port_num, diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index b44944fb9b24..8c0c80a8d338 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -725,7 +725,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) /* ib device init */ ibdev->node_type = RDMA_NODE_IB_CA; - strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA", + strscpy(ibdev->node_desc, BNXT_RE_DESC " HCA", strlen(BNXT_RE_DESC) + 5); ibdev->phys_port_cnt = 1; @@ -893,7 +893,6 @@ static int bnxt_re_srqn_handler(struct bnxt_qplib_nq *nq, struct bnxt_re_srq *srq = container_of(handle, struct bnxt_re_srq, qplib_srq); struct ib_event ib_event; - int rc = 0; ib_event.device = &srq->rdev->ibdev; ib_event.element.srq = &srq->ib_srq; @@ -907,7 +906,7 @@ static int bnxt_re_srqn_handler(struct bnxt_qplib_nq *nq, (*srq->ib_srq.event_handler)(&ib_event, srq->ib_srq.srq_context); } - return rc; + return 0; } static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index ca88849559bf..96e581ced50e 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -46,6 +46,7 @@ #include <linux/delay.h> #include <linux/prefetch.h> #include <linux/if_ether.h> +#include <rdma/ib_mad.h> #include "roce_hsi.h" @@ -1232,7 +1233,7 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct cmdq_modify_qp req; struct creq_modify_qp_resp resp; - u16 cmd_flags = 0, pkey; + u16 cmd_flags = 0; u32 temp32[4]; u32 bmask; int rc; @@ -1255,11 +1256,9 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS) req.access = qp->access; - if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_PKEY) { - if (!bnxt_qplib_get_pkey(res, &res->pkey_tbl, - qp->pkey_index, &pkey)) - req.pkey = cpu_to_le16(pkey); - } + if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_PKEY) + req.pkey = cpu_to_le16(IB_DEFAULT_PKEY_FULL); + if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_QKEY) req.qkey = cpu_to_le32(qp->qkey); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 3de854727460..061b2895dd9b 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -555,7 +555,7 @@ skip_ctx_setup: void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) { - kfree(rcfw->cmdq.cmdq_bitmap); + bitmap_free(rcfw->cmdq.cmdq_bitmap); kfree(rcfw->qp_tbl); kfree(rcfw->crsqe_tbl); bnxt_qplib_free_hwq(rcfw->res, &rcfw->cmdq.hwq); @@ -572,7 +572,6 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res, struct bnxt_qplib_sg_info sginfo = {}; struct bnxt_qplib_cmdq_ctx *cmdq; struct bnxt_qplib_creq_ctx *creq; - u32 bmap_size = 0; rcfw->pdev = res->pdev; cmdq = &rcfw->cmdq; @@ -613,13 +612,10 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res, if (!rcfw->crsqe_tbl) goto fail; - bmap_size = BITS_TO_LONGS(rcfw->cmdq_depth) * sizeof(unsigned long); - cmdq->cmdq_bitmap = kzalloc(bmap_size, GFP_KERNEL); + cmdq->cmdq_bitmap = bitmap_zalloc(rcfw->cmdq_depth, GFP_KERNEL); if (!cmdq->cmdq_bitmap) goto fail; - cmdq->bmap_size = bmap_size; - /* Allocate one extra to hold the QP1 entries */ rcfw->qp_tbl_size = qp_tbl_sz + 1; rcfw->qp_tbl = kcalloc(rcfw->qp_tbl_size, sizeof(struct bnxt_qplib_qp_node), @@ -667,8 +663,8 @@ void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) iounmap(cmdq->cmdq_mbox.reg.bar_reg); iounmap(creq->creq_db.reg.bar_reg); - indx = find_first_bit(cmdq->cmdq_bitmap, cmdq->bmap_size); - if (indx != cmdq->bmap_size) + indx = find_first_bit(cmdq->cmdq_bitmap, rcfw->cmdq_depth); + if (indx != rcfw->cmdq_depth) dev_err(&rcfw->pdev->dev, "disabling RCFW with pending cmd-bit %lx\n", indx); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 82faa4e4cda8..0a3d8e7da3d4 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -152,7 +152,6 @@ struct bnxt_qplib_cmdq_ctx { wait_queue_head_t waitq; unsigned long flags; unsigned long *cmdq_bitmap; - u32 bmap_size; u32 seq_num; }; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index bc1ba4b51ba4..126d4f26f75a 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -649,31 +649,6 @@ static void bnxt_qplib_init_sgid_tbl(struct bnxt_qplib_sgid_tbl *sgid_tbl, memset(sgid_tbl->hw_id, -1, sizeof(u16) * sgid_tbl->max); } -static void bnxt_qplib_free_pkey_tbl(struct bnxt_qplib_res *res, - struct bnxt_qplib_pkey_tbl *pkey_tbl) -{ - if (!pkey_tbl->tbl) - dev_dbg(&res->pdev->dev, "PKEY tbl not present\n"); - else - kfree(pkey_tbl->tbl); - - pkey_tbl->tbl = NULL; - pkey_tbl->max = 0; - pkey_tbl->active = 0; -} - -static int bnxt_qplib_alloc_pkey_tbl(struct bnxt_qplib_res *res, - struct bnxt_qplib_pkey_tbl *pkey_tbl, - u16 max) -{ - pkey_tbl->tbl = kcalloc(max, sizeof(u16), GFP_KERNEL); - if (!pkey_tbl->tbl) - return -ENOMEM; - - pkey_tbl->max = max; - return 0; -}; - /* PDs */ int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pdt, struct bnxt_qplib_pd *pd) { @@ -843,24 +818,6 @@ unmap_io: return -ENOMEM; } -/* PKEYs */ -static void bnxt_qplib_cleanup_pkey_tbl(struct bnxt_qplib_pkey_tbl *pkey_tbl) -{ - memset(pkey_tbl->tbl, 0, sizeof(u16) * pkey_tbl->max); - pkey_tbl->active = 0; -} - -static void bnxt_qplib_init_pkey_tbl(struct bnxt_qplib_res *res, - struct bnxt_qplib_pkey_tbl *pkey_tbl) -{ - u16 pkey = 0xFFFF; - - memset(pkey_tbl->tbl, 0, sizeof(u16) * pkey_tbl->max); - - /* pkey default = 0xFFFF */ - bnxt_qplib_add_pkey(res, pkey_tbl, &pkey, false); -} - /* Stats */ static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev, struct bnxt_qplib_stats *stats) @@ -891,21 +848,18 @@ static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev, void bnxt_qplib_cleanup_res(struct bnxt_qplib_res *res) { - bnxt_qplib_cleanup_pkey_tbl(&res->pkey_tbl); bnxt_qplib_cleanup_sgid_tbl(res, &res->sgid_tbl); } int bnxt_qplib_init_res(struct bnxt_qplib_res *res) { bnxt_qplib_init_sgid_tbl(&res->sgid_tbl, res->netdev); - bnxt_qplib_init_pkey_tbl(res, &res->pkey_tbl); return 0; } void bnxt_qplib_free_res(struct bnxt_qplib_res *res) { - bnxt_qplib_free_pkey_tbl(res, &res->pkey_tbl); bnxt_qplib_free_sgid_tbl(res, &res->sgid_tbl); bnxt_qplib_free_pd_tbl(&res->pd_tbl); bnxt_qplib_free_dpi_tbl(res, &res->dpi_tbl); @@ -924,10 +878,6 @@ int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev, if (rc) goto fail; - rc = bnxt_qplib_alloc_pkey_tbl(res, &res->pkey_tbl, dev_attr->max_pkey); - if (rc) - goto fail; - rc = bnxt_qplib_alloc_pd_tbl(res, &res->pd_tbl, dev_attr->max_pd); if (rc) goto fail; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index e1411a2352a7..982e2c96dac2 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -185,12 +185,6 @@ struct bnxt_qplib_sgid_tbl { u8 *vlan; }; -struct bnxt_qplib_pkey_tbl { - u16 *tbl; - u16 max; - u16 active; -}; - struct bnxt_qplib_dpi { u32 dpi; void __iomem *dbr; @@ -258,7 +252,6 @@ struct bnxt_qplib_res { struct bnxt_qplib_rcfw *rcfw; struct bnxt_qplib_pd_tbl pd_tbl; struct bnxt_qplib_sgid_tbl sgid_tbl; - struct bnxt_qplib_pkey_tbl pkey_tbl; struct bnxt_qplib_dpi_tbl dpi_tbl; bool prio; bool is_vf; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 379e715ebd30..b802981b7171 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -146,17 +146,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, attr->max_srq = le16_to_cpu(sb->max_srq); attr->max_srq_wqes = le32_to_cpu(sb->max_srq_wr) - 1; attr->max_srq_sges = sb->max_srq_sge; - attr->max_pkey = le32_to_cpu(sb->max_pkeys); - /* - * Some versions of FW reports more than 0xFFFF. - * Restrict it for now to 0xFFFF to avoid - * reporting trucated value - */ - if (attr->max_pkey > 0xFFFF) { - /* ib_port_attr::pkey_tbl_len is u16 */ - attr->max_pkey = 0xFFFF; - } - + attr->max_pkey = 1; attr->max_inline_data = le32_to_cpu(sb->max_inline_data); attr->l2_db_size = (sb->l2_db_space_size + 1) * (0x01 << RCFW_DBR_BASE_PAGE_SHIFT); @@ -414,93 +404,6 @@ int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, return rc; } -/* pkeys */ -int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res, - struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 index, - u16 *pkey) -{ - if (index == 0xFFFF) { - *pkey = 0xFFFF; - return 0; - } - if (index >= pkey_tbl->max) { - dev_err(&res->pdev->dev, - "Index %d exceeded PKEY table max (%d)\n", - index, pkey_tbl->max); - return -EINVAL; - } - memcpy(pkey, &pkey_tbl->tbl[index], sizeof(*pkey)); - return 0; -} - -int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res, - struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey, - bool update) -{ - int i, rc = 0; - - if (!pkey_tbl) { - dev_err(&res->pdev->dev, "PKEY table not allocated\n"); - return -EINVAL; - } - - /* Do we need a pkey_lock here? */ - if (!pkey_tbl->active) { - dev_err(&res->pdev->dev, "PKEY table has no active entries\n"); - return -ENOMEM; - } - for (i = 0; i < pkey_tbl->max; i++) { - if (!memcmp(&pkey_tbl->tbl[i], pkey, sizeof(*pkey))) - break; - } - if (i == pkey_tbl->max) { - dev_err(&res->pdev->dev, - "PKEY 0x%04x not found in the pkey table\n", *pkey); - return -ENOMEM; - } - memset(&pkey_tbl->tbl[i], 0, sizeof(*pkey)); - pkey_tbl->active--; - - /* unlock */ - return rc; -} - -int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res, - struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey, - bool update) -{ - int i, free_idx, rc = 0; - - if (!pkey_tbl) { - dev_err(&res->pdev->dev, "PKEY table not allocated\n"); - return -EINVAL; - } - - /* Do we need a pkey_lock here? */ - if (pkey_tbl->active == pkey_tbl->max) { - dev_err(&res->pdev->dev, "PKEY table is full\n"); - return -ENOMEM; - } - free_idx = pkey_tbl->max; - for (i = 0; i < pkey_tbl->max; i++) { - if (!memcmp(&pkey_tbl->tbl[i], pkey, sizeof(*pkey))) - return -EALREADY; - else if (!pkey_tbl->tbl[i] && free_idx == pkey_tbl->max) - free_idx = i; - } - if (free_idx == pkey_tbl->max) { - dev_err(&res->pdev->dev, - "PKEY table is FULL but count is not MAX??\n"); - return -ENOMEM; - } - /* Add PKEY to the pkey_tbl */ - memcpy(&pkey_tbl->tbl[free_idx], pkey, sizeof(*pkey)); - pkey_tbl->active++; - - /* unlock */ - return rc; -} - /* AH */ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, bool block) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index a18f568cb23e..5939e8fc8353 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -255,15 +255,6 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, struct bnxt_qplib_gid *gid, u16 gid_idx, const u8 *smac); -int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res, - struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 index, - u16 *pkey); -int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res, - struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey, - bool update); -int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res, - struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey, - bool update); int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_dev_attr *attr, bool vf); int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res, diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 913f39ee4416..499a425a3379 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -734,7 +734,7 @@ static int send_connect(struct c4iw_ep *ep) &ep->com.remote_addr; int ret; enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; - u32 isn = (prandom_u32() & ~7UL) - 1; + u32 isn = (get_random_u32() & ~7UL) - 1; struct net_device *netdev; u64 params; @@ -2468,30 +2468,24 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, opt2 |= CCTRL_ECN_V(1); } - skb_get(skb); - rpl = cplhdr(skb); if (!is_t4(adapter_type)) { - skb_trim(skb, roundup(sizeof(*rpl5), 16)); - rpl5 = (void *)rpl; - INIT_TP_WR(rpl5, ep->hwtid); - } else { - skb_trim(skb, sizeof(*rpl)); - INIT_TP_WR(rpl, ep->hwtid); - } - OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, - ep->hwtid)); + u32 isn = (get_random_u32() & ~7UL) - 1; - if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) { - u32 isn = (prandom_u32() & ~7UL) - 1; + skb = get_skb(skb, roundup(sizeof(*rpl5), 16), GFP_KERNEL); + rpl5 = __skb_put_zero(skb, roundup(sizeof(*rpl5), 16)); + rpl = (void *)rpl5; + INIT_TP_WR_CPL(rpl5, CPL_PASS_ACCEPT_RPL, ep->hwtid); opt2 |= T5_OPT_2_VALID_F; opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE); opt2 |= T5_ISS_F; - rpl5 = (void *)rpl; - memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16)); if (peer2peer) isn += 4; rpl5->iss = cpu_to_be32(isn); pr_debug("iss %u\n", be32_to_cpu(rpl5->iss)); + } else { + skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); + rpl = __skb_put_zero(skb, sizeof(*rpl)); + INIT_TP_WR_CPL(rpl, CPL_PASS_ACCEPT_RPL, ep->hwtid); } rpl->opt0 = cpu_to_be64(opt0); diff --git a/drivers/infiniband/hw/cxgb4/id_table.c b/drivers/infiniband/hw/cxgb4/id_table.c index 724d23297b35..280d61466855 100644 --- a/drivers/infiniband/hw/cxgb4/id_table.c +++ b/drivers/infiniband/hw/cxgb4/id_table.c @@ -54,12 +54,12 @@ u32 c4iw_id_alloc(struct c4iw_id_table *alloc) if (obj < alloc->max) { if (alloc->flags & C4IW_ID_TABLE_F_RANDOM) - alloc->last += prandom_u32() % RANDOM_SKIP; + alloc->last += prandom_u32_max(RANDOM_SKIP); else alloc->last = obj + 1; if (alloc->last >= alloc->max) alloc->last = 0; - set_bit(obj, alloc->table); + __set_bit(obj, alloc->table); obj += alloc->start; } else obj = -1; @@ -75,37 +75,32 @@ void c4iw_id_free(struct c4iw_id_table *alloc, u32 obj) obj -= alloc->start; spin_lock_irqsave(&alloc->lock, flags); - clear_bit(obj, alloc->table); + __clear_bit(obj, alloc->table); spin_unlock_irqrestore(&alloc->lock, flags); } int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num, u32 reserved, u32 flags) { - int i; - alloc->start = start; alloc->flags = flags; if (flags & C4IW_ID_TABLE_F_RANDOM) - alloc->last = prandom_u32() % RANDOM_SKIP; + alloc->last = prandom_u32_max(RANDOM_SKIP); else alloc->last = 0; - alloc->max = num; + alloc->max = num; spin_lock_init(&alloc->lock); - alloc->table = kmalloc_array(BITS_TO_LONGS(num), sizeof(long), - GFP_KERNEL); + alloc->table = bitmap_zalloc(num, GFP_KERNEL); if (!alloc->table) return -ENOMEM; - bitmap_zero(alloc->table, num); if (!(alloc->flags & C4IW_ID_TABLE_F_EMPTY)) - for (i = 0; i < reserved; ++i) - set_bit(i, alloc->table); + bitmap_set(alloc->table, 0, reserved); return 0; } void c4iw_id_table_free(struct c4iw_id_table *alloc) { - kfree(alloc->table); + bitmap_free(alloc->table); } diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 12f33467c672..50cb2259bf87 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -314,7 +314,6 @@ enum db_state { struct c4iw_dev { struct ib_device ibdev; struct c4iw_rdev rdev; - u32 device_cap_flags; struct xarray cqs; struct xarray qps; struct xarray mrs; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 0c8fd5a85fcb..246b739ddb2b 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -41,6 +41,7 @@ #include <linux/ethtool.h> #include <linux/rtnetlink.h> #include <linux/inetdevice.h> +#include <net/addrconf.h> #include <linux/io.h> #include <asm/irq.h> @@ -264,10 +265,14 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro return -EINVAL; dev = to_c4iw_dev(ibdev); - memcpy(&props->sys_image_guid, dev->rdev.lldi.ports[0]->dev_addr, 6); + addrconf_addr_eui48((u8 *)&props->sys_image_guid, + dev->rdev.lldi.ports[0]->dev_addr); props->hw_ver = CHELSIO_CHIP_RELEASE(dev->rdev.lldi.adapter_type); props->fw_ver = dev->rdev.lldi.fw_vers; - props->device_cap_flags = dev->device_cap_flags; + props->device_cap_flags = IB_DEVICE_MEM_WINDOW; + props->kernel_cap_flags = IBK_LOCAL_DMA_LKEY; + if (fastreg_support) + props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; props->page_size_cap = T4_PAGESIZE_MASK; props->vendor_id = (u32)dev->rdev.lldi.pdev->vendor; props->vendor_part_id = (u32)dev->rdev.lldi.pdev->device; @@ -525,11 +530,8 @@ void c4iw_register_device(struct work_struct *work) struct c4iw_dev *dev = ctx->dev; pr_debug("c4iw_dev %p\n", dev); - memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); - memcpy(&dev->ibdev.node_guid, dev->rdev.lldi.ports[0]->dev_addr, 6); - dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW; - if (fastreg_support) - dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + addrconf_addr_eui48((u8 *)&dev->ibdev.node_guid, + dev->rdev.lldi.ports[0]->dev_addr); dev->ibdev.local_dma_lkey = 0; dev->ibdev.node_type = RDMA_NODE_RNIC; BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index d20b4ef2c853..ffbd9a89981e 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2460,6 +2460,7 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, memset(attr, 0, sizeof(*attr)); memset(init_attr, 0, sizeof(*init_attr)); attr->qp_state = to_ib_qp_state(qhp->attr.state); + attr->cur_qp_state = to_ib_qp_state(qhp->attr.state); init_attr->cap.max_send_wr = qhp->attr.sq_num_entries; init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries; init_attr->cap.max_send_sge = qhp->attr.sq_max_sges; diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index 0b0b93b529f3..d4b9226088bd 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -444,7 +444,10 @@ struct efa_admin_create_cq_cmd { /* * 4:0 : cq_entry_size_words - size of CQ entry in * 32-bit words, valid values: 4, 8. - * 7:5 : reserved7 - MBZ + * 5 : set_src_addr - If set, source address will be + * filled on RX completions from unknown senders. + * Requires 8 words CQ entry size. + * 7:6 : reserved7 - MBZ */ u8 cq_caps_2; @@ -980,6 +983,7 @@ struct efa_admin_host_info { #define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5) #define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6) #define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) +#define EFA_ADMIN_CREATE_CQ_CMD_SET_SRC_ADDR_MASK BIT(5) /* create_cq_resp */ #define EFA_ADMIN_CREATE_CQ_RESP_DB_VALID_MASK BIT(0) diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index fb405da4e1db..8f8885e002ba 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -168,7 +168,10 @@ int efa_com_create_cq(struct efa_com_dev *edev, EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED, 1); create_cmd.eqn = params->eqn; } - + if (params->set_src_addr) { + EFA_SET(&create_cmd.cq_caps_2, + EFA_ADMIN_CREATE_CQ_CMD_SET_SRC_ADDR, 1); + } efa_com_set_dma_addr(params->dma_addr, &create_cmd.cq_ba.mem_addr_high, &create_cmd.cq_ba.mem_addr_low); diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index c33010bbf9e8..0898ad5bc340 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -75,7 +75,8 @@ struct efa_com_create_cq_params { u16 uarn; u16 eqn; u8 entry_size_in_bytes; - bool interrupt_mode_enabled; + u8 interrupt_mode_enabled : 1; + u8 set_src_addr : 1; }; struct efa_com_create_cq_result { diff --git a/drivers/infiniband/hw/efa/efa_io_defs.h b/drivers/infiniband/hw/efa/efa_io_defs.h new file mode 100644 index 000000000000..17ba8984b11e --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_io_defs.h @@ -0,0 +1,289 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef _EFA_IO_H_ +#define _EFA_IO_H_ + +#define EFA_IO_TX_DESC_NUM_BUFS 2 +#define EFA_IO_TX_DESC_NUM_RDMA_BUFS 1 +#define EFA_IO_TX_DESC_INLINE_MAX_SIZE 32 +#define EFA_IO_TX_DESC_IMM_DATA_SIZE 4 + +enum efa_io_queue_type { + /* send queue (of a QP) */ + EFA_IO_SEND_QUEUE = 1, + /* recv queue (of a QP) */ + EFA_IO_RECV_QUEUE = 2, +}; + +enum efa_io_send_op_type { + /* send message */ + EFA_IO_SEND = 0, + /* RDMA read */ + EFA_IO_RDMA_READ = 1, +}; + +enum efa_io_comp_status { + /* Successful completion */ + EFA_IO_COMP_STATUS_OK = 0, + /* Flushed during QP destroy */ + EFA_IO_COMP_STATUS_FLUSHED = 1, + /* Internal QP error */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_QP_INTERNAL_ERROR = 2, + /* Bad operation type */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_OP_TYPE = 3, + /* Bad AH */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_AH = 4, + /* LKEY not registered or does not match IOVA */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_LKEY = 5, + /* Message too long */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_BAD_LENGTH = 6, + /* Destination ENI is down or does not run EFA */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_ADDRESS = 7, + /* Connection was reset by remote side */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_ABORT = 8, + /* Bad dest QP number (QP does not exist or is in error state) */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_DEST_QPN = 9, + /* Destination resource not ready (no WQEs posted on RQ) */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_RNR = 10, + /* Receiver SGL too short */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_LENGTH = 11, + /* Unexpected status returned by responder */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_STATUS = 12, + /* Unresponsive remote - detected locally */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_UNRESP_REMOTE = 13, +}; + +struct efa_io_tx_meta_desc { + /* Verbs-generated Request ID */ + u16 req_id; + + /* + * control flags + * 3:0 : op_type - operation type: send/rdma/fast mem + * ops/etc + * 4 : has_imm - immediate_data field carries valid + * data. + * 5 : inline_msg - inline mode - inline message data + * follows this descriptor (no buffer descriptors). + * Note that it is different from immediate data + * 6 : meta_extension - Extended metadata. MBZ + * 7 : meta_desc - Indicates metadata descriptor. + * Must be set. + */ + u8 ctrl1; + + /* + * control flags + * 0 : phase + * 1 : reserved25 - MBZ + * 2 : first - Indicates first descriptor in + * transaction. Must be set. + * 3 : last - Indicates last descriptor in + * transaction. Must be set. + * 4 : comp_req - Indicates whether completion should + * be posted, after packet is transmitted. Valid only + * for the first descriptor + * 7:5 : reserved29 - MBZ + */ + u8 ctrl2; + + u16 dest_qp_num; + + /* + * If inline_msg bit is set, length of inline message in bytes, + * otherwise length of SGL (number of buffers). + */ + u16 length; + + /* + * immediate data: if has_imm is set, then this field is included + * within Tx message and reported in remote Rx completion. + */ + u32 immediate_data; + + u16 ah; + + u16 reserved; + + /* Queue key */ + u32 qkey; + + u8 reserved2[12]; +}; + +/* + * Tx queue buffer descriptor, for any transport type. Preceded by metadata + * descriptor. + */ +struct efa_io_tx_buf_desc { + /* length in bytes */ + u32 length; + + /* + * 23:0 : lkey - local memory translation key + * 31:24 : reserved - MBZ + */ + u32 lkey; + + /* Buffer address bits[31:0] */ + u32 buf_addr_lo; + + /* Buffer address bits[63:32] */ + u32 buf_addr_hi; +}; + +struct efa_io_remote_mem_addr { + /* length in bytes */ + u32 length; + + /* remote memory translation key */ + u32 rkey; + + /* Buffer address bits[31:0] */ + u32 buf_addr_lo; + + /* Buffer address bits[63:32] */ + u32 buf_addr_hi; +}; + +struct efa_io_rdma_req { + /* Remote memory address */ + struct efa_io_remote_mem_addr remote_mem; + + /* Local memory address */ + struct efa_io_tx_buf_desc local_mem[1]; +}; + +/* + * Tx WQE, composed of tx meta descriptors followed by either tx buffer + * descriptors or inline data + */ +struct efa_io_tx_wqe { + /* TX meta */ + struct efa_io_tx_meta_desc meta; + + union { + /* Send buffer descriptors */ + struct efa_io_tx_buf_desc sgl[2]; + + u8 inline_data[32]; + + /* RDMA local and remote memory addresses */ + struct efa_io_rdma_req rdma_req; + } data; +}; + +/* + * Rx buffer descriptor; RX WQE is composed of one or more RX buffer + * descriptors. + */ +struct efa_io_rx_desc { + /* Buffer address bits[31:0] */ + u32 buf_addr_lo; + + /* Buffer Pointer[63:32] */ + u32 buf_addr_hi; + + /* Verbs-generated request id. */ + u16 req_id; + + /* Length in bytes. */ + u16 length; + + /* + * LKey and control flags + * 23:0 : lkey + * 29:24 : reserved - MBZ + * 30 : first - Indicates first descriptor in WQE + * 31 : last - Indicates last descriptor in WQE + */ + u32 lkey_ctrl; +}; + +/* Common IO completion descriptor */ +struct efa_io_cdesc_common { + /* + * verbs-generated request ID, as provided in the completed tx or rx + * descriptor. + */ + u16 req_id; + + u8 status; + + /* + * flags + * 0 : phase - Phase bit + * 2:1 : q_type - enum efa_io_queue_type: send/recv + * 3 : has_imm - indicates that immediate data is + * present - for RX completions only + * 7:4 : reserved28 - MBZ + */ + u8 flags; + + /* local QP number */ + u16 qp_num; + + /* Transferred length */ + u16 length; +}; + +/* Tx completion descriptor */ +struct efa_io_tx_cdesc { + /* Common completion info */ + struct efa_io_cdesc_common common; +}; + +/* Rx Completion Descriptor */ +struct efa_io_rx_cdesc { + /* Common completion info */ + struct efa_io_cdesc_common common; + + /* Remote Address Handle FW index, 0xFFFF indicates invalid ah */ + u16 ah; + + u16 src_qp_num; + + /* Immediate data */ + u32 imm; +}; + +/* Extended Rx Completion Descriptor */ +struct efa_io_rx_cdesc_ex { + /* Base RX completion info */ + struct efa_io_rx_cdesc rx_cdesc_base; + + /* + * Valid only in case of unknown AH (0xFFFF) and CQ set_src_addr is + * enabled. + */ + u8 src_addr[16]; +}; + +/* tx_meta_desc */ +#define EFA_IO_TX_META_DESC_OP_TYPE_MASK GENMASK(3, 0) +#define EFA_IO_TX_META_DESC_HAS_IMM_MASK BIT(4) +#define EFA_IO_TX_META_DESC_INLINE_MSG_MASK BIT(5) +#define EFA_IO_TX_META_DESC_META_EXTENSION_MASK BIT(6) +#define EFA_IO_TX_META_DESC_META_DESC_MASK BIT(7) +#define EFA_IO_TX_META_DESC_PHASE_MASK BIT(0) +#define EFA_IO_TX_META_DESC_FIRST_MASK BIT(2) +#define EFA_IO_TX_META_DESC_LAST_MASK BIT(3) +#define EFA_IO_TX_META_DESC_COMP_REQ_MASK BIT(4) + +/* tx_buf_desc */ +#define EFA_IO_TX_BUF_DESC_LKEY_MASK GENMASK(23, 0) + +/* rx_desc */ +#define EFA_IO_RX_DESC_LKEY_MASK GENMASK(23, 0) +#define EFA_IO_RX_DESC_FIRST_MASK BIT(30) +#define EFA_IO_RX_DESC_LAST_MASK BIT(31) + +/* cdesc_common */ +#define EFA_IO_CDESC_COMMON_PHASE_MASK BIT(0) +#define EFA_IO_CDESC_COMMON_Q_TYPE_MASK GENMASK(2, 1) +#define EFA_IO_CDESC_COMMON_HAS_IMM_MASK BIT(3) + +#endif /* _EFA_IO_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 94b94cca4870..15ee92081118 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include <linux/module.h> @@ -14,10 +14,12 @@ #define PCI_DEV_ID_EFA0_VF 0xefa0 #define PCI_DEV_ID_EFA1_VF 0xefa1 +#define PCI_DEV_ID_EFA2_VF 0xefa2 static const struct pci_device_id efa_pci_tbl[] = { { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA0_VF) }, { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA1_VF) }, + { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA2_VF) }, { } }; diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index ecfe70eb5efb..31454643f8c5 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include <linux/dma-buf.h> @@ -15,6 +15,7 @@ #include <rdma/uverbs_ioctl.h> #include "efa.h" +#include "efa_io_defs.h" enum { EFA_MMAP_DMA_PAGE = 0, @@ -242,6 +243,7 @@ int efa_query_device(struct ib_device *ibdev, resp.max_rq_wr = dev_attr->max_rq_depth; resp.max_rdma_size = dev_attr->max_rdma_size; + resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_WITH_SGID; if (EFA_DEV_CAP(dev, RDMA_READ)) resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ; @@ -1064,6 +1066,7 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct efa_ibv_create_cq cmd = {}; struct efa_cq *cq = to_ecq(ibcq); int entries = attr->cqe; + bool set_src_addr; int err; ibdev_dbg(ibdev, "create_cq entries %d\n", entries); @@ -1109,7 +1112,10 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, goto err_out; } - if (!cmd.cq_entry_size) { + set_src_addr = !!(cmd.flags & EFA_CREATE_CQ_WITH_SGID); + if ((cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc_ex)) && + (set_src_addr || + cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc))) { ibdev_dbg(ibdev, "Invalid entry size [%u]\n", cmd.cq_entry_size); err = -EINVAL; @@ -1138,6 +1144,7 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, params.dma_addr = cq->dma_addr; params.entry_size_in_bytes = cmd.cq_entry_size; params.num_sub_cqs = cmd.num_sub_cqs; + params.set_src_addr = set_src_addr; if (cmd.flags & EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL) { cq->eq = efa_vec2eq(dev, attr->comp_vector); params.eqn = cq->eq->eeq.eqn; diff --git a/drivers/infiniband/hw/erdma/Kconfig b/drivers/infiniband/hw/erdma/Kconfig new file mode 100644 index 000000000000..169038e3ceb1 --- /dev/null +++ b/drivers/infiniband/hw/erdma/Kconfig @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0-only +config INFINIBAND_ERDMA + tristate "Alibaba Elastic RDMA Adapter (ERDMA) support" + depends on PCI_MSI && 64BIT + depends on INFINIBAND_ADDR_TRANS + depends on INFINIBAND_USER_ACCESS + help + This is a RDMA/iWarp driver for Alibaba Elastic RDMA Adapter(ERDMA), + which supports RDMA features in Alibaba cloud environment. + + To compile this driver as module, choose M here. The module will be + called erdma. diff --git a/drivers/infiniband/hw/erdma/Makefile b/drivers/infiniband/hw/erdma/Makefile new file mode 100644 index 000000000000..51d2ef91905a --- /dev/null +++ b/drivers/infiniband/hw/erdma/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_INFINIBAND_ERDMA) := erdma.o + +erdma-y := erdma_cm.o erdma_main.o erdma_cmdq.o erdma_cq.o erdma_verbs.o erdma_qp.o erdma_eq.o diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h new file mode 100644 index 000000000000..730783fbc894 --- /dev/null +++ b/drivers/infiniband/hw/erdma/erdma.h @@ -0,0 +1,289 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ + +/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */ +/* Kai Shen <kaishen@linux.alibaba.com> */ +/* Copyright (c) 2020-2022, Alibaba Group. */ + +#ifndef __ERDMA_H__ +#define __ERDMA_H__ + +#include <linux/bitfield.h> +#include <linux/netdevice.h> +#include <linux/pci.h> +#include <linux/xarray.h> +#include <rdma/ib_verbs.h> + +#include "erdma_hw.h" + +#define DRV_MODULE_NAME "erdma" +#define ERDMA_NODE_DESC "Elastic RDMA(iWARP) stack" + +struct erdma_eq { + void *qbuf; + dma_addr_t qbuf_dma_addr; + + spinlock_t lock; + + u32 depth; + + u16 ci; + u16 rsvd; + + atomic64_t event_num; + atomic64_t notify_num; + + u64 __iomem *db_addr; + u64 *db_record; +}; + +struct erdma_cmdq_sq { + void *qbuf; + dma_addr_t qbuf_dma_addr; + + spinlock_t lock; + + u32 depth; + u16 ci; + u16 pi; + + u16 wqebb_cnt; + + u64 *db_record; +}; + +struct erdma_cmdq_cq { + void *qbuf; + dma_addr_t qbuf_dma_addr; + + spinlock_t lock; + + u32 depth; + u32 ci; + u32 cmdsn; + + u64 *db_record; + + atomic64_t armed_num; +}; + +enum { + ERDMA_CMD_STATUS_INIT, + ERDMA_CMD_STATUS_ISSUED, + ERDMA_CMD_STATUS_FINISHED, + ERDMA_CMD_STATUS_TIMEOUT +}; + +struct erdma_comp_wait { + struct completion wait_event; + u32 cmd_status; + u32 ctx_id; + u16 sq_pi; + u8 comp_status; + u8 rsvd; + u32 comp_data[4]; +}; + +enum { + ERDMA_CMDQ_STATE_OK_BIT = 0, + ERDMA_CMDQ_STATE_TIMEOUT_BIT = 1, + ERDMA_CMDQ_STATE_CTX_ERR_BIT = 2, +}; + +#define ERDMA_CMDQ_TIMEOUT_MS 15000 +#define ERDMA_REG_ACCESS_WAIT_MS 20 +#define ERDMA_WAIT_DEV_DONE_CNT 500 + +struct erdma_cmdq { + unsigned long *comp_wait_bitmap; + struct erdma_comp_wait *wait_pool; + spinlock_t lock; + + bool use_event; + + struct erdma_cmdq_sq sq; + struct erdma_cmdq_cq cq; + struct erdma_eq eq; + + unsigned long state; + + struct semaphore credits; + u16 max_outstandings; +}; + +#define COMPROMISE_CC ERDMA_CC_CUBIC +enum erdma_cc_alg { + ERDMA_CC_NEWRENO = 0, + ERDMA_CC_CUBIC, + ERDMA_CC_HPCC_RTT, + ERDMA_CC_HPCC_ECN, + ERDMA_CC_HPCC_INT, + ERDMA_CC_METHODS_NUM +}; + +struct erdma_devattr { + u32 fw_version; + + unsigned char peer_addr[ETH_ALEN]; + + int numa_node; + enum erdma_cc_alg cc; + u32 grp_num; + u32 irq_num; + + bool disable_dwqe; + u16 dwqe_pages; + u16 dwqe_entries; + + u32 max_qp; + u32 max_send_wr; + u32 max_recv_wr; + u32 max_ord; + u32 max_ird; + + u32 max_send_sge; + u32 max_recv_sge; + u32 max_sge_rd; + u32 max_cq; + u32 max_cqe; + u64 max_mr_size; + u32 max_mr; + u32 max_pd; + u32 max_mw; + u32 local_dma_key; +}; + +#define ERDMA_IRQNAME_SIZE 50 + +struct erdma_irq { + char name[ERDMA_IRQNAME_SIZE]; + u32 msix_vector; + cpumask_t affinity_hint_mask; +}; + +struct erdma_eq_cb { + bool ready; + void *dev; /* All EQs use this fields to get erdma_dev struct */ + struct erdma_irq irq; + struct erdma_eq eq; + struct tasklet_struct tasklet; +}; + +struct erdma_resource_cb { + unsigned long *bitmap; + spinlock_t lock; + u32 next_alloc_idx; + u32 max_cap; +}; + +enum { + ERDMA_RES_TYPE_PD = 0, + ERDMA_RES_TYPE_STAG_IDX = 1, + ERDMA_RES_CNT = 2, +}; + +#define ERDMA_EXTRA_BUFFER_SIZE ERDMA_DB_SIZE +#define WARPPED_BUFSIZE(size) ((size) + ERDMA_EXTRA_BUFFER_SIZE) + +struct erdma_dev { + struct ib_device ibdev; + struct net_device *netdev; + struct pci_dev *pdev; + struct notifier_block netdev_nb; + + resource_size_t func_bar_addr; + resource_size_t func_bar_len; + u8 __iomem *func_bar; + + struct erdma_devattr attrs; + /* physical port state (only one port per device) */ + enum ib_port_state state; + u32 mtu; + + /* cmdq and aeq use the same msix vector */ + struct erdma_irq comm_irq; + struct erdma_cmdq cmdq; + struct erdma_eq aeq; + struct erdma_eq_cb ceqs[ERDMA_NUM_MSIX_VEC - 1]; + + spinlock_t lock; + struct erdma_resource_cb res_cb[ERDMA_RES_CNT]; + struct xarray qp_xa; + struct xarray cq_xa; + + u32 next_alloc_qpn; + u32 next_alloc_cqn; + + spinlock_t db_bitmap_lock; + /* We provide max 64 uContexts that each has one SQ doorbell Page. */ + DECLARE_BITMAP(sdb_page, ERDMA_DWQE_TYPE0_CNT); + /* + * We provide max 496 uContexts that each has one SQ normal Db, + * and one directWQE db。 + */ + DECLARE_BITMAP(sdb_entry, ERDMA_DWQE_TYPE1_CNT); + + atomic_t num_ctx; + struct list_head cep_list; +}; + +static inline void *get_queue_entry(void *qbuf, u32 idx, u32 depth, u32 shift) +{ + idx &= (depth - 1); + + return qbuf + (idx << shift); +} + +static inline struct erdma_dev *to_edev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct erdma_dev, ibdev); +} + +static inline u32 erdma_reg_read32(struct erdma_dev *dev, u32 reg) +{ + return readl(dev->func_bar + reg); +} + +static inline u64 erdma_reg_read64(struct erdma_dev *dev, u32 reg) +{ + return readq(dev->func_bar + reg); +} + +static inline void erdma_reg_write32(struct erdma_dev *dev, u32 reg, u32 value) +{ + writel(value, dev->func_bar + reg); +} + +static inline void erdma_reg_write64(struct erdma_dev *dev, u32 reg, u64 value) +{ + writeq(value, dev->func_bar + reg); +} + +static inline u32 erdma_reg_read32_filed(struct erdma_dev *dev, u32 reg, + u32 filed_mask) +{ + u32 val = erdma_reg_read32(dev, reg); + + return FIELD_GET(filed_mask, val); +} + +int erdma_cmdq_init(struct erdma_dev *dev); +void erdma_finish_cmdq_init(struct erdma_dev *dev); +void erdma_cmdq_destroy(struct erdma_dev *dev); + +void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op); +int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size, + u64 *resp0, u64 *resp1); +void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq); + +int erdma_ceqs_init(struct erdma_dev *dev); +void erdma_ceqs_uninit(struct erdma_dev *dev); +void notify_eq(struct erdma_eq *eq); +void *get_next_valid_eqe(struct erdma_eq *eq); + +int erdma_aeq_init(struct erdma_dev *dev); +void erdma_aeq_destroy(struct erdma_dev *dev); + +void erdma_aeq_event_handler(struct erdma_dev *dev); +void erdma_ceq_completion_handler(struct erdma_eq_cb *ceq_cb); + +#endif diff --git a/drivers/infiniband/hw/erdma/erdma_cm.c b/drivers/infiniband/hw/erdma/erdma_cm.c new file mode 100644 index 000000000000..74f6348f240a --- /dev/null +++ b/drivers/infiniband/hw/erdma/erdma_cm.c @@ -0,0 +1,1422 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */ +/* Kai Shen <kaishen@linux.alibaba.com> */ +/* Copyright (c) 2020-2022, Alibaba Group. */ + +/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ +/* Fredy Neeser */ +/* Greg Joyce <greg@opengridcomputing.com> */ +/* Copyright (c) 2008-2019, IBM Corporation */ +/* Copyright (c) 2017, Open Grid Computing, Inc. */ + +#include <linux/workqueue.h> + +#include "erdma.h" +#include "erdma_cm.h" +#include "erdma_verbs.h" + +static struct workqueue_struct *erdma_cm_wq; + +static void erdma_cm_llp_state_change(struct sock *sk); +static void erdma_cm_llp_data_ready(struct sock *sk); +static void erdma_cm_llp_error_report(struct sock *sk); + +static void erdma_sk_assign_cm_upcalls(struct sock *sk) +{ + write_lock_bh(&sk->sk_callback_lock); + sk->sk_state_change = erdma_cm_llp_state_change; + sk->sk_data_ready = erdma_cm_llp_data_ready; + sk->sk_error_report = erdma_cm_llp_error_report; + write_unlock_bh(&sk->sk_callback_lock); +} + +static void erdma_sk_save_upcalls(struct sock *sk) +{ + struct erdma_cep *cep = sk_to_cep(sk); + + write_lock_bh(&sk->sk_callback_lock); + cep->sk_state_change = sk->sk_state_change; + cep->sk_data_ready = sk->sk_data_ready; + cep->sk_error_report = sk->sk_error_report; + write_unlock_bh(&sk->sk_callback_lock); +} + +static void erdma_sk_restore_upcalls(struct sock *sk, struct erdma_cep *cep) +{ + sk->sk_state_change = cep->sk_state_change; + sk->sk_data_ready = cep->sk_data_ready; + sk->sk_error_report = cep->sk_error_report; + sk->sk_user_data = NULL; +} + +static void erdma_socket_disassoc(struct socket *s) +{ + struct sock *sk = s->sk; + struct erdma_cep *cep; + + if (sk) { + write_lock_bh(&sk->sk_callback_lock); + cep = sk_to_cep(sk); + if (cep) { + erdma_sk_restore_upcalls(sk, cep); + erdma_cep_put(cep); + } else { + WARN_ON_ONCE(1); + } + write_unlock_bh(&sk->sk_callback_lock); + } else { + WARN_ON_ONCE(1); + } +} + +static void erdma_cep_socket_assoc(struct erdma_cep *cep, struct socket *s) +{ + cep->sock = s; + erdma_cep_get(cep); + s->sk->sk_user_data = cep; + + erdma_sk_save_upcalls(s->sk); + erdma_sk_assign_cm_upcalls(s->sk); +} + +static void erdma_disassoc_listen_cep(struct erdma_cep *cep) +{ + if (cep->listen_cep) { + erdma_cep_put(cep->listen_cep); + cep->listen_cep = NULL; + } +} + +static struct erdma_cep *erdma_cep_alloc(struct erdma_dev *dev) +{ + struct erdma_cep *cep = kzalloc(sizeof(*cep), GFP_KERNEL); + unsigned long flags; + + if (!cep) + return NULL; + + INIT_LIST_HEAD(&cep->listenq); + INIT_LIST_HEAD(&cep->devq); + INIT_LIST_HEAD(&cep->work_freelist); + + kref_init(&cep->ref); + cep->state = ERDMA_EPSTATE_IDLE; + init_waitqueue_head(&cep->waitq); + spin_lock_init(&cep->lock); + cep->dev = dev; + + spin_lock_irqsave(&dev->lock, flags); + list_add_tail(&cep->devq, &dev->cep_list); + spin_unlock_irqrestore(&dev->lock, flags); + + return cep; +} + +static void erdma_cm_free_work(struct erdma_cep *cep) +{ + struct list_head *w, *tmp; + struct erdma_cm_work *work; + + list_for_each_safe(w, tmp, &cep->work_freelist) { + work = list_entry(w, struct erdma_cm_work, list); + list_del(&work->list); + kfree(work); + } +} + +static void erdma_cancel_mpatimer(struct erdma_cep *cep) +{ + spin_lock_bh(&cep->lock); + if (cep->mpa_timer) { + if (cancel_delayed_work(&cep->mpa_timer->work)) { + erdma_cep_put(cep); + kfree(cep->mpa_timer); + } + cep->mpa_timer = NULL; + } + spin_unlock_bh(&cep->lock); +} + +static void erdma_put_work(struct erdma_cm_work *work) +{ + INIT_LIST_HEAD(&work->list); + spin_lock_bh(&work->cep->lock); + list_add(&work->list, &work->cep->work_freelist); + spin_unlock_bh(&work->cep->lock); +} + +static void erdma_cep_set_inuse(struct erdma_cep *cep) +{ + unsigned long flags; + + spin_lock_irqsave(&cep->lock, flags); + while (cep->in_use) { + spin_unlock_irqrestore(&cep->lock, flags); + wait_event_interruptible(cep->waitq, !cep->in_use); + if (signal_pending(current)) + flush_signals(current); + + spin_lock_irqsave(&cep->lock, flags); + } + + cep->in_use = 1; + spin_unlock_irqrestore(&cep->lock, flags); +} + +static void erdma_cep_set_free(struct erdma_cep *cep) +{ + unsigned long flags; + + spin_lock_irqsave(&cep->lock, flags); + cep->in_use = 0; + spin_unlock_irqrestore(&cep->lock, flags); + + wake_up(&cep->waitq); +} + +static void __erdma_cep_dealloc(struct kref *ref) +{ + struct erdma_cep *cep = container_of(ref, struct erdma_cep, ref); + struct erdma_dev *dev = cep->dev; + unsigned long flags; + + WARN_ON(cep->listen_cep); + + kfree(cep->private_data); + kfree(cep->mpa.pdata); + spin_lock_bh(&cep->lock); + if (!list_empty(&cep->work_freelist)) + erdma_cm_free_work(cep); + spin_unlock_bh(&cep->lock); + + spin_lock_irqsave(&dev->lock, flags); + list_del(&cep->devq); + spin_unlock_irqrestore(&dev->lock, flags); + kfree(cep); +} + +static struct erdma_cm_work *erdma_get_work(struct erdma_cep *cep) +{ + struct erdma_cm_work *work = NULL; + + spin_lock_bh(&cep->lock); + if (!list_empty(&cep->work_freelist)) { + work = list_entry(cep->work_freelist.next, struct erdma_cm_work, + list); + list_del_init(&work->list); + } + + spin_unlock_bh(&cep->lock); + return work; +} + +static int erdma_cm_alloc_work(struct erdma_cep *cep, int num) +{ + struct erdma_cm_work *work; + + while (num--) { + work = kmalloc(sizeof(*work), GFP_KERNEL); + if (!work) { + if (!(list_empty(&cep->work_freelist))) + erdma_cm_free_work(cep); + return -ENOMEM; + } + work->cep = cep; + INIT_LIST_HEAD(&work->list); + list_add(&work->list, &cep->work_freelist); + } + + return 0; +} + +static int erdma_cm_upcall(struct erdma_cep *cep, enum iw_cm_event_type reason, + int status) +{ + struct iw_cm_event event; + struct iw_cm_id *cm_id; + + memset(&event, 0, sizeof(event)); + event.status = status; + event.event = reason; + + if (reason == IW_CM_EVENT_CONNECT_REQUEST) { + event.provider_data = cep; + cm_id = cep->listen_cep->cm_id; + + event.ird = cep->dev->attrs.max_ird; + event.ord = cep->dev->attrs.max_ord; + } else { + cm_id = cep->cm_id; + } + + if (reason == IW_CM_EVENT_CONNECT_REQUEST || + reason == IW_CM_EVENT_CONNECT_REPLY) { + u16 pd_len = be16_to_cpu(cep->mpa.hdr.params.pd_len); + + if (pd_len && cep->mpa.pdata) { + event.private_data_len = pd_len; + event.private_data = cep->mpa.pdata; + } + + getname_local(cep->sock, &event.local_addr); + getname_peer(cep->sock, &event.remote_addr); + } + + return cm_id->event_handler(cm_id, &event); +} + +void erdma_qp_cm_drop(struct erdma_qp *qp) +{ + struct erdma_cep *cep = qp->cep; + + if (!qp->cep) + return; + + erdma_cep_set_inuse(cep); + + /* already closed. */ + if (cep->state == ERDMA_EPSTATE_CLOSED) + goto out; + + if (cep->cm_id) { + switch (cep->state) { + case ERDMA_EPSTATE_AWAIT_MPAREP: + erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, + -EINVAL); + break; + case ERDMA_EPSTATE_RDMA_MODE: + erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0); + break; + case ERDMA_EPSTATE_IDLE: + case ERDMA_EPSTATE_LISTENING: + case ERDMA_EPSTATE_CONNECTING: + case ERDMA_EPSTATE_AWAIT_MPAREQ: + case ERDMA_EPSTATE_RECVD_MPAREQ: + case ERDMA_EPSTATE_CLOSED: + default: + break; + } + cep->cm_id->rem_ref(cep->cm_id); + cep->cm_id = NULL; + erdma_cep_put(cep); + } + cep->state = ERDMA_EPSTATE_CLOSED; + + if (cep->sock) { + erdma_socket_disassoc(cep->sock); + sock_release(cep->sock); + cep->sock = NULL; + } + + if (cep->qp) { + cep->qp = NULL; + erdma_qp_put(qp); + } +out: + erdma_cep_set_free(cep); +} + +void erdma_cep_put(struct erdma_cep *cep) +{ + WARN_ON(kref_read(&cep->ref) < 1); + kref_put(&cep->ref, __erdma_cep_dealloc); +} + +void erdma_cep_get(struct erdma_cep *cep) +{ + kref_get(&cep->ref); +} + +static int erdma_send_mpareqrep(struct erdma_cep *cep, const void *pdata, + u8 pd_len) +{ + struct socket *s = cep->sock; + struct mpa_rr *rr = &cep->mpa.hdr; + struct kvec iov[3]; + struct msghdr msg; + int iovec_num = 0; + int ret; + int mpa_len; + + memset(&msg, 0, sizeof(msg)); + + rr->params.pd_len = cpu_to_be16(pd_len); + + iov[iovec_num].iov_base = rr; + iov[iovec_num].iov_len = sizeof(*rr); + iovec_num++; + mpa_len = sizeof(*rr); + + iov[iovec_num].iov_base = &cep->mpa.ext_data; + iov[iovec_num].iov_len = sizeof(cep->mpa.ext_data); + iovec_num++; + mpa_len += sizeof(cep->mpa.ext_data); + + if (pd_len) { + iov[iovec_num].iov_base = (char *)pdata; + iov[iovec_num].iov_len = pd_len; + mpa_len += pd_len; + iovec_num++; + } + + ret = kernel_sendmsg(s, &msg, iov, iovec_num, mpa_len); + + return ret < 0 ? ret : 0; +} + +static inline int ksock_recv(struct socket *sock, char *buf, size_t size, + int flags) +{ + struct kvec iov = { buf, size }; + struct msghdr msg = { .msg_name = NULL, .msg_flags = flags }; + + return kernel_recvmsg(sock, &msg, &iov, 1, size, flags); +} + +static int __recv_mpa_hdr(struct erdma_cep *cep, int hdr_rcvd, char *hdr, + int hdr_size, int *rcvd_out) +{ + struct socket *s = cep->sock; + int rcvd; + + *rcvd_out = 0; + if (hdr_rcvd < hdr_size) { + rcvd = ksock_recv(s, hdr + hdr_rcvd, hdr_size - hdr_rcvd, + MSG_DONTWAIT); + if (rcvd == -EAGAIN) + return -EAGAIN; + + if (rcvd <= 0) + return -ECONNABORTED; + + hdr_rcvd += rcvd; + *rcvd_out = rcvd; + + if (hdr_rcvd < hdr_size) + return -EAGAIN; + } + + return 0; +} + +static void __mpa_rr_set_revision(__be16 *bits, u8 rev) +{ + *bits = (*bits & ~MPA_RR_MASK_REVISION) | + (cpu_to_be16(rev) & MPA_RR_MASK_REVISION); +} + +static u8 __mpa_rr_revision(__be16 mpa_rr_bits) +{ + __be16 rev = mpa_rr_bits & MPA_RR_MASK_REVISION; + + return (u8)be16_to_cpu(rev); +} + +static void __mpa_ext_set_cc(__be32 *bits, u32 cc) +{ + *bits = (*bits & ~MPA_EXT_FLAG_CC) | + (cpu_to_be32(cc) & MPA_EXT_FLAG_CC); +} + +static u8 __mpa_ext_cc(__be32 mpa_ext_bits) +{ + __be32 cc = mpa_ext_bits & MPA_EXT_FLAG_CC; + + return (u8)be32_to_cpu(cc); +} + +/* + * Receive MPA Request/Reply header. + * + * Returns 0 if complete MPA Request/Reply haeder including + * eventual private data was received. Returns -EAGAIN if + * header was partially received or negative error code otherwise. + * + * Context: May be called in process context only + */ +static int erdma_recv_mpa_rr(struct erdma_cep *cep) +{ + struct mpa_rr *hdr = &cep->mpa.hdr; + struct socket *s = cep->sock; + u16 pd_len; + int rcvd, to_rcv, ret, pd_rcvd; + + if (cep->mpa.bytes_rcvd < sizeof(struct mpa_rr)) { + ret = __recv_mpa_hdr(cep, cep->mpa.bytes_rcvd, + (char *)&cep->mpa.hdr, + sizeof(struct mpa_rr), &rcvd); + cep->mpa.bytes_rcvd += rcvd; + if (ret) + return ret; + } + + if (be16_to_cpu(hdr->params.pd_len) > MPA_MAX_PRIVDATA || + __mpa_rr_revision(hdr->params.bits) != MPA_REVISION_EXT_1) + return -EPROTO; + + if (cep->mpa.bytes_rcvd - sizeof(struct mpa_rr) < + sizeof(struct erdma_mpa_ext)) { + ret = __recv_mpa_hdr( + cep, cep->mpa.bytes_rcvd - sizeof(struct mpa_rr), + (char *)&cep->mpa.ext_data, + sizeof(struct erdma_mpa_ext), &rcvd); + cep->mpa.bytes_rcvd += rcvd; + if (ret) + return ret; + } + + pd_len = be16_to_cpu(hdr->params.pd_len); + pd_rcvd = cep->mpa.bytes_rcvd - sizeof(struct mpa_rr) - + sizeof(struct erdma_mpa_ext); + to_rcv = pd_len - pd_rcvd; + + if (!to_rcv) { + /* + * We have received the whole MPA Request/Reply message. + * Check against peer protocol violation. + */ + u32 word; + + ret = __recv_mpa_hdr(cep, 0, (char *)&word, sizeof(word), + &rcvd); + if (ret == -EAGAIN && rcvd == 0) + return 0; + + if (ret) + return ret; + + return -EPROTO; + } + + /* + * At this point, MPA header has been fully received, and pd_len != 0. + * So, begin to receive private data. + */ + if (!cep->mpa.pdata) { + cep->mpa.pdata = kmalloc(pd_len + 4, GFP_KERNEL); + if (!cep->mpa.pdata) + return -ENOMEM; + } + + rcvd = ksock_recv(s, cep->mpa.pdata + pd_rcvd, to_rcv + 4, + MSG_DONTWAIT); + if (rcvd < 0) + return rcvd; + + if (rcvd > to_rcv) + return -EPROTO; + + cep->mpa.bytes_rcvd += rcvd; + + if (to_rcv == rcvd) + return 0; + + return -EAGAIN; +} + +/* + * erdma_proc_mpareq() + * + * Read MPA Request from socket and signal new connection to IWCM + * if success. Caller must hold lock on corresponding listening CEP. + */ +static int erdma_proc_mpareq(struct erdma_cep *cep) +{ + struct mpa_rr *req; + int ret; + + ret = erdma_recv_mpa_rr(cep); + if (ret) + return ret; + + req = &cep->mpa.hdr; + + if (memcmp(req->key, MPA_KEY_REQ, MPA_KEY_SIZE)) + return -EPROTO; + + memcpy(req->key, MPA_KEY_REP, MPA_KEY_SIZE); + + /* Currently does not support marker and crc. */ + if (req->params.bits & MPA_RR_FLAG_MARKERS || + req->params.bits & MPA_RR_FLAG_CRC) + goto reject_conn; + + cep->state = ERDMA_EPSTATE_RECVD_MPAREQ; + + /* Keep reference until IWCM accepts/rejects */ + erdma_cep_get(cep); + ret = erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REQUEST, 0); + if (ret) + erdma_cep_put(cep); + + return ret; + +reject_conn: + req->params.bits &= ~MPA_RR_FLAG_MARKERS; + req->params.bits |= MPA_RR_FLAG_REJECT; + req->params.bits &= ~MPA_RR_FLAG_CRC; + + kfree(cep->mpa.pdata); + cep->mpa.pdata = NULL; + erdma_send_mpareqrep(cep, NULL, 0); + + return -EOPNOTSUPP; +} + +static int erdma_proc_mpareply(struct erdma_cep *cep) +{ + struct erdma_qp_attrs qp_attrs; + struct erdma_qp *qp = cep->qp; + struct mpa_rr *rep; + int ret; + + ret = erdma_recv_mpa_rr(cep); + if (ret) + goto out_err; + + erdma_cancel_mpatimer(cep); + + rep = &cep->mpa.hdr; + + if (memcmp(rep->key, MPA_KEY_REP, MPA_KEY_SIZE)) { + ret = -EPROTO; + goto out_err; + } + + if (rep->params.bits & MPA_RR_FLAG_REJECT) { + erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNRESET); + return -ECONNRESET; + } + + /* Currently does not support marker and crc. */ + if ((rep->params.bits & MPA_RR_FLAG_MARKERS) || + (rep->params.bits & MPA_RR_FLAG_CRC)) { + erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED); + return -EINVAL; + } + + memset(&qp_attrs, 0, sizeof(qp_attrs)); + qp_attrs.irq_size = cep->ird; + qp_attrs.orq_size = cep->ord; + qp_attrs.state = ERDMA_QP_STATE_RTS; + + down_write(&qp->state_lock); + if (qp->attrs.state > ERDMA_QP_STATE_RTR) { + ret = -EINVAL; + up_write(&qp->state_lock); + goto out_err; + } + + qp->attrs.qp_type = ERDMA_QP_ACTIVE; + if (__mpa_ext_cc(cep->mpa.ext_data.bits) != qp->attrs.cc) + qp->attrs.cc = COMPROMISE_CC; + + ret = erdma_modify_qp_internal(qp, &qp_attrs, + ERDMA_QP_ATTR_STATE | + ERDMA_QP_ATTR_LLP_HANDLE | + ERDMA_QP_ATTR_MPA); + + up_write(&qp->state_lock); + + if (!ret) { + ret = erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, 0); + if (!ret) + cep->state = ERDMA_EPSTATE_RDMA_MODE; + + return 0; + } + +out_err: + if (ret != -EAGAIN) + erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL); + + return ret; +} + +static void erdma_accept_newconn(struct erdma_cep *cep) +{ + struct socket *s = cep->sock; + struct socket *new_s = NULL; + struct erdma_cep *new_cep = NULL; + int ret = 0; + + if (cep->state != ERDMA_EPSTATE_LISTENING) + goto error; + + new_cep = erdma_cep_alloc(cep->dev); + if (!new_cep) + goto error; + + /* + * 4: Allocate a sufficient number of work elements + * to allow concurrent handling of local + peer close + * events, MPA header processing + MPA timeout. + */ + if (erdma_cm_alloc_work(new_cep, 4) != 0) + goto error; + + /* + * Copy saved socket callbacks from listening CEP + * and assign new socket with new CEP + */ + new_cep->sk_state_change = cep->sk_state_change; + new_cep->sk_data_ready = cep->sk_data_ready; + new_cep->sk_error_report = cep->sk_error_report; + + ret = kernel_accept(s, &new_s, O_NONBLOCK); + if (ret != 0) + goto error; + + new_cep->sock = new_s; + erdma_cep_get(new_cep); + new_s->sk->sk_user_data = new_cep; + + tcp_sock_set_nodelay(new_s->sk); + new_cep->state = ERDMA_EPSTATE_AWAIT_MPAREQ; + + ret = erdma_cm_queue_work(new_cep, ERDMA_CM_WORK_MPATIMEOUT); + if (ret) + goto error; + + new_cep->listen_cep = cep; + erdma_cep_get(cep); + + if (atomic_read(&new_s->sk->sk_rmem_alloc)) { + /* MPA REQ already queued */ + erdma_cep_set_inuse(new_cep); + ret = erdma_proc_mpareq(new_cep); + if (ret != -EAGAIN) { + erdma_cep_put(cep); + new_cep->listen_cep = NULL; + if (ret) { + erdma_cep_set_free(new_cep); + goto error; + } + } + erdma_cep_set_free(new_cep); + } + return; + +error: + if (new_cep) { + new_cep->state = ERDMA_EPSTATE_CLOSED; + erdma_cancel_mpatimer(new_cep); + + erdma_cep_put(new_cep); + new_cep->sock = NULL; + } + + if (new_s) { + erdma_socket_disassoc(new_s); + sock_release(new_s); + } +} + +static int erdma_newconn_connected(struct erdma_cep *cep) +{ + int ret = 0; + + cep->mpa.hdr.params.bits = 0; + __mpa_rr_set_revision(&cep->mpa.hdr.params.bits, MPA_REVISION_EXT_1); + + memcpy(cep->mpa.hdr.key, MPA_KEY_REQ, MPA_KEY_SIZE); + cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.cookie); + __mpa_ext_set_cc(&cep->mpa.ext_data.bits, cep->qp->attrs.cc); + + ret = erdma_send_mpareqrep(cep, cep->private_data, cep->pd_len); + cep->state = ERDMA_EPSTATE_AWAIT_MPAREP; + cep->mpa.hdr.params.pd_len = 0; + + if (ret >= 0) + ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_MPATIMEOUT); + + return ret; +} + +static void erdma_cm_work_handler(struct work_struct *w) +{ + struct erdma_cm_work *work; + struct erdma_cep *cep; + int release_cep = 0, ret = 0; + + work = container_of(w, struct erdma_cm_work, work.work); + cep = work->cep; + + erdma_cep_set_inuse(cep); + + switch (work->type) { + case ERDMA_CM_WORK_CONNECTED: + erdma_cancel_mpatimer(cep); + if (cep->state == ERDMA_EPSTATE_CONNECTING) { + ret = erdma_newconn_connected(cep); + if (ret) { + erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, + -EIO); + release_cep = 1; + } + } + break; + case ERDMA_CM_WORK_CONNECTTIMEOUT: + if (cep->state == ERDMA_EPSTATE_CONNECTING) { + cep->mpa_timer = NULL; + erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, + -ETIMEDOUT); + release_cep = 1; + } + break; + case ERDMA_CM_WORK_ACCEPT: + erdma_accept_newconn(cep); + break; + case ERDMA_CM_WORK_READ_MPAHDR: + if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) { + if (cep->listen_cep) { + erdma_cep_set_inuse(cep->listen_cep); + + if (cep->listen_cep->state == + ERDMA_EPSTATE_LISTENING) + ret = erdma_proc_mpareq(cep); + else + ret = -EFAULT; + + erdma_cep_set_free(cep->listen_cep); + + if (ret != -EAGAIN) { + erdma_cep_put(cep->listen_cep); + cep->listen_cep = NULL; + if (ret) + erdma_cep_put(cep); + } + } + } else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) { + ret = erdma_proc_mpareply(cep); + } + + if (ret && ret != -EAGAIN) + release_cep = 1; + break; + case ERDMA_CM_WORK_CLOSE_LLP: + if (cep->cm_id) + erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0); + release_cep = 1; + break; + case ERDMA_CM_WORK_PEER_CLOSE: + if (cep->cm_id) { + if (cep->state == ERDMA_EPSTATE_CONNECTING || + cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) { + /* + * MPA reply not received, but connection drop + */ + erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, + -ECONNRESET); + } else if (cep->state == ERDMA_EPSTATE_RDMA_MODE) { + /* + * NOTE: IW_CM_EVENT_DISCONNECT is given just + * to transition IWCM into CLOSING. + */ + erdma_cm_upcall(cep, IW_CM_EVENT_DISCONNECT, 0); + erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0); + } + } else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) { + /* Socket close before MPA request received. */ + erdma_disassoc_listen_cep(cep); + erdma_cep_put(cep); + } + release_cep = 1; + break; + case ERDMA_CM_WORK_MPATIMEOUT: + cep->mpa_timer = NULL; + if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) { + /* + * MPA request timed out: + * Hide any partially received private data and signal + * timeout + */ + cep->mpa.hdr.params.pd_len = 0; + + if (cep->cm_id) + erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, + -ETIMEDOUT); + release_cep = 1; + } else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) { + /* No MPA req received after peer TCP stream setup. */ + erdma_disassoc_listen_cep(cep); + + erdma_cep_put(cep); + release_cep = 1; + } + break; + default: + WARN(1, "Undefined CM work type: %d\n", work->type); + } + + if (release_cep) { + erdma_cancel_mpatimer(cep); + cep->state = ERDMA_EPSTATE_CLOSED; + if (cep->qp) { + struct erdma_qp *qp = cep->qp; + /* + * Serialize a potential race with application + * closing the QP and calling erdma_qp_cm_drop() + */ + erdma_qp_get(qp); + erdma_cep_set_free(cep); + + erdma_qp_llp_close(qp); + erdma_qp_put(qp); + + erdma_cep_set_inuse(cep); + cep->qp = NULL; + erdma_qp_put(qp); + } + + if (cep->sock) { + erdma_socket_disassoc(cep->sock); + sock_release(cep->sock); + cep->sock = NULL; + } + + if (cep->cm_id) { + cep->cm_id->rem_ref(cep->cm_id); + cep->cm_id = NULL; + if (cep->state != ERDMA_EPSTATE_LISTENING) + erdma_cep_put(cep); + } + } + erdma_cep_set_free(cep); + erdma_put_work(work); + erdma_cep_put(cep); +} + +int erdma_cm_queue_work(struct erdma_cep *cep, enum erdma_work_type type) +{ + struct erdma_cm_work *work = erdma_get_work(cep); + unsigned long delay = 0; + + if (!work) + return -ENOMEM; + + work->type = type; + work->cep = cep; + + erdma_cep_get(cep); + + INIT_DELAYED_WORK(&work->work, erdma_cm_work_handler); + + if (type == ERDMA_CM_WORK_MPATIMEOUT) { + cep->mpa_timer = work; + + if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) + delay = MPAREP_TIMEOUT; + else + delay = MPAREQ_TIMEOUT; + } else if (type == ERDMA_CM_WORK_CONNECTTIMEOUT) { + cep->mpa_timer = work; + + delay = CONNECT_TIMEOUT; + } + + queue_delayed_work(erdma_cm_wq, &work->work, delay); + + return 0; +} + +static void erdma_cm_llp_data_ready(struct sock *sk) +{ + struct erdma_cep *cep; + + read_lock(&sk->sk_callback_lock); + + cep = sk_to_cep(sk); + if (!cep) + goto out; + + if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ || + cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) + erdma_cm_queue_work(cep, ERDMA_CM_WORK_READ_MPAHDR); + +out: + read_unlock(&sk->sk_callback_lock); +} + +static void erdma_cm_llp_error_report(struct sock *sk) +{ + struct erdma_cep *cep = sk_to_cep(sk); + + if (cep) + cep->sk_error_report(sk); +} + +static void erdma_cm_llp_state_change(struct sock *sk) +{ + struct erdma_cep *cep; + void (*orig_state_change)(struct sock *sk); + + read_lock(&sk->sk_callback_lock); + + cep = sk_to_cep(sk); + if (!cep) { + read_unlock(&sk->sk_callback_lock); + return; + } + orig_state_change = cep->sk_state_change; + + switch (sk->sk_state) { + case TCP_ESTABLISHED: + if (cep->state == ERDMA_EPSTATE_CONNECTING) + erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTED); + else + erdma_cm_queue_work(cep, ERDMA_CM_WORK_ACCEPT); + break; + case TCP_CLOSE: + case TCP_CLOSE_WAIT: + if (cep->state != ERDMA_EPSTATE_LISTENING) + erdma_cm_queue_work(cep, ERDMA_CM_WORK_PEER_CLOSE); + break; + default: + break; + } + read_unlock(&sk->sk_callback_lock); + orig_state_change(sk); +} + +static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr, + int laddrlen, struct sockaddr *raddr, + int raddrlen, int flags) +{ + int ret; + + sock_set_reuseaddr(s->sk); + ret = s->ops->bind(s, laddr, laddrlen); + if (ret) + return ret; + ret = s->ops->connect(s, raddr, raddrlen, flags); + return ret < 0 ? ret : 0; +} + +int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) +{ + struct erdma_dev *dev = to_edev(id->device); + struct erdma_qp *qp; + struct erdma_cep *cep = NULL; + struct socket *s = NULL; + struct sockaddr *laddr = (struct sockaddr *)&id->m_local_addr; + struct sockaddr *raddr = (struct sockaddr *)&id->m_remote_addr; + u16 pd_len = params->private_data_len; + int ret; + + if (pd_len > MPA_MAX_PRIVDATA) + return -EINVAL; + + if (params->ird > dev->attrs.max_ird || + params->ord > dev->attrs.max_ord) + return -EINVAL; + + if (laddr->sa_family != AF_INET || raddr->sa_family != AF_INET) + return -EAFNOSUPPORT; + + qp = find_qp_by_qpn(dev, params->qpn); + if (!qp) + return -ENOENT; + erdma_qp_get(qp); + + ret = sock_create(AF_INET, SOCK_STREAM, IPPROTO_TCP, &s); + if (ret < 0) + goto error_put_qp; + + cep = erdma_cep_alloc(dev); + if (!cep) { + ret = -ENOMEM; + goto error_release_sock; + } + + erdma_cep_set_inuse(cep); + + /* Associate QP with CEP */ + erdma_cep_get(cep); + qp->cep = cep; + cep->qp = qp; + + /* Associate cm_id with CEP */ + id->add_ref(id); + cep->cm_id = id; + + /* + * 6: Allocate a sufficient number of work elements + * to allow concurrent handling of local + peer close + * events, MPA header processing + MPA timeout, connected event + * and connect timeout. + */ + ret = erdma_cm_alloc_work(cep, 6); + if (ret != 0) { + ret = -ENOMEM; + goto error_release_cep; + } + + cep->ird = params->ird; + cep->ord = params->ord; + cep->state = ERDMA_EPSTATE_CONNECTING; + + erdma_cep_socket_assoc(cep, s); + + if (pd_len) { + cep->pd_len = pd_len; + cep->private_data = kmalloc(pd_len, GFP_KERNEL); + if (!cep->private_data) { + ret = -ENOMEM; + goto error_disassoc; + } + + memcpy(cep->private_data, params->private_data, + params->private_data_len); + } + + ret = kernel_bindconnect(s, laddr, sizeof(*laddr), raddr, + sizeof(*raddr), O_NONBLOCK); + if (ret != -EINPROGRESS && ret != 0) { + goto error_disassoc; + } else if (ret == 0) { + ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTED); + if (ret) + goto error_disassoc; + } else { + ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTTIMEOUT); + if (ret) + goto error_disassoc; + } + + erdma_cep_set_free(cep); + return 0; + +error_disassoc: + kfree(cep->private_data); + cep->private_data = NULL; + cep->pd_len = 0; + + erdma_socket_disassoc(s); + +error_release_cep: + /* disassoc with cm_id */ + cep->cm_id = NULL; + id->rem_ref(id); + + /* disassoc with qp */ + qp->cep = NULL; + erdma_cep_put(cep); + cep->qp = NULL; + + cep->state = ERDMA_EPSTATE_CLOSED; + + erdma_cep_set_free(cep); + + /* release the cep. */ + erdma_cep_put(cep); + +error_release_sock: + if (s) + sock_release(s); +error_put_qp: + erdma_qp_put(qp); + + return ret; +} + +int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) +{ + struct erdma_dev *dev = to_edev(id->device); + struct erdma_cep *cep = (struct erdma_cep *)id->provider_data; + struct erdma_qp *qp; + struct erdma_qp_attrs qp_attrs; + int ret; + + erdma_cep_set_inuse(cep); + erdma_cep_put(cep); + + /* Free lingering inbound private data */ + if (cep->mpa.hdr.params.pd_len) { + cep->mpa.hdr.params.pd_len = 0; + kfree(cep->mpa.pdata); + cep->mpa.pdata = NULL; + } + erdma_cancel_mpatimer(cep); + + if (cep->state != ERDMA_EPSTATE_RECVD_MPAREQ) { + erdma_cep_set_free(cep); + erdma_cep_put(cep); + + return -ECONNRESET; + } + + qp = find_qp_by_qpn(dev, params->qpn); + if (!qp) + return -ENOENT; + erdma_qp_get(qp); + + down_write(&qp->state_lock); + if (qp->attrs.state > ERDMA_QP_STATE_RTR) { + ret = -EINVAL; + up_write(&qp->state_lock); + goto error; + } + + if (params->ord > dev->attrs.max_ord || + params->ird > dev->attrs.max_ord) { + ret = -EINVAL; + up_write(&qp->state_lock); + goto error; + } + + if (params->private_data_len > MPA_MAX_PRIVDATA) { + ret = -EINVAL; + up_write(&qp->state_lock); + goto error; + } + + cep->ird = params->ird; + cep->ord = params->ord; + + cep->cm_id = id; + id->add_ref(id); + + memset(&qp_attrs, 0, sizeof(qp_attrs)); + qp_attrs.orq_size = params->ord; + qp_attrs.irq_size = params->ird; + + qp_attrs.state = ERDMA_QP_STATE_RTS; + + /* Associate QP with CEP */ + erdma_cep_get(cep); + qp->cep = cep; + cep->qp = qp; + + cep->state = ERDMA_EPSTATE_RDMA_MODE; + + qp->attrs.qp_type = ERDMA_QP_PASSIVE; + qp->attrs.pd_len = params->private_data_len; + + if (qp->attrs.cc != __mpa_ext_cc(cep->mpa.ext_data.bits)) + qp->attrs.cc = COMPROMISE_CC; + + /* move to rts */ + ret = erdma_modify_qp_internal(qp, &qp_attrs, + ERDMA_QP_ATTR_STATE | + ERDMA_QP_ATTR_ORD | + ERDMA_QP_ATTR_LLP_HANDLE | + ERDMA_QP_ATTR_IRD | + ERDMA_QP_ATTR_MPA); + up_write(&qp->state_lock); + + if (ret) + goto error; + + cep->mpa.ext_data.bits = 0; + __mpa_ext_set_cc(&cep->mpa.ext_data.bits, qp->attrs.cc); + cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.cookie); + + ret = erdma_send_mpareqrep(cep, params->private_data, + params->private_data_len); + if (!ret) { + ret = erdma_cm_upcall(cep, IW_CM_EVENT_ESTABLISHED, 0); + if (ret) + goto error; + + erdma_cep_set_free(cep); + + return 0; + } + +error: + erdma_socket_disassoc(cep->sock); + sock_release(cep->sock); + cep->sock = NULL; + + cep->state = ERDMA_EPSTATE_CLOSED; + + if (cep->cm_id) { + cep->cm_id->rem_ref(id); + cep->cm_id = NULL; + } + + if (qp->cep) { + erdma_cep_put(cep); + qp->cep = NULL; + } + + cep->qp = NULL; + erdma_qp_put(qp); + + erdma_cep_set_free(cep); + erdma_cep_put(cep); + + return ret; +} + +int erdma_reject(struct iw_cm_id *id, const void *pdata, u8 plen) +{ + struct erdma_cep *cep = (struct erdma_cep *)id->provider_data; + + erdma_cep_set_inuse(cep); + erdma_cep_put(cep); + + erdma_cancel_mpatimer(cep); + + if (cep->state != ERDMA_EPSTATE_RECVD_MPAREQ) { + erdma_cep_set_free(cep); + erdma_cep_put(cep); + + return -ECONNRESET; + } + + if (__mpa_rr_revision(cep->mpa.hdr.params.bits) == MPA_REVISION_EXT_1) { + cep->mpa.hdr.params.bits |= MPA_RR_FLAG_REJECT; /* reject */ + erdma_send_mpareqrep(cep, pdata, plen); + } + + erdma_socket_disassoc(cep->sock); + sock_release(cep->sock); + cep->sock = NULL; + + cep->state = ERDMA_EPSTATE_CLOSED; + + erdma_cep_set_free(cep); + erdma_cep_put(cep); + + return 0; +} + +int erdma_create_listen(struct iw_cm_id *id, int backlog) +{ + struct socket *s; + struct erdma_cep *cep = NULL; + int ret = 0; + struct erdma_dev *dev = to_edev(id->device); + int addr_family = id->local_addr.ss_family; + struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr); + + if (addr_family != AF_INET) + return -EAFNOSUPPORT; + + ret = sock_create(addr_family, SOCK_STREAM, IPPROTO_TCP, &s); + if (ret < 0) + return ret; + + sock_set_reuseaddr(s->sk); + + /* For wildcard addr, limit binding to current device only */ + if (ipv4_is_zeronet(laddr->sin_addr.s_addr)) + s->sk->sk_bound_dev_if = dev->netdev->ifindex; + + ret = s->ops->bind(s, (struct sockaddr *)laddr, + sizeof(struct sockaddr_in)); + if (ret) + goto error; + + cep = erdma_cep_alloc(dev); + if (!cep) { + ret = -ENOMEM; + goto error; + } + erdma_cep_socket_assoc(cep, s); + + ret = erdma_cm_alloc_work(cep, backlog); + if (ret) + goto error; + + ret = s->ops->listen(s, backlog); + if (ret) + goto error; + + cep->cm_id = id; + id->add_ref(id); + + if (!id->provider_data) { + id->provider_data = + kmalloc(sizeof(struct list_head), GFP_KERNEL); + if (!id->provider_data) { + ret = -ENOMEM; + goto error; + } + INIT_LIST_HEAD((struct list_head *)id->provider_data); + } + + list_add_tail(&cep->listenq, (struct list_head *)id->provider_data); + cep->state = ERDMA_EPSTATE_LISTENING; + + return 0; + +error: + if (cep) { + erdma_cep_set_inuse(cep); + + if (cep->cm_id) { + cep->cm_id->rem_ref(cep->cm_id); + cep->cm_id = NULL; + } + cep->sock = NULL; + erdma_socket_disassoc(s); + cep->state = ERDMA_EPSTATE_CLOSED; + + erdma_cep_set_free(cep); + erdma_cep_put(cep); + } + sock_release(s); + + return ret; +} + +static void erdma_drop_listeners(struct iw_cm_id *id) +{ + struct list_head *p, *tmp; + /* + * In case of a wildcard rdma_listen on a multi-homed device, + * a listener's IWCM id is associated with more than one listening CEP. + */ + list_for_each_safe(p, tmp, (struct list_head *)id->provider_data) { + struct erdma_cep *cep = + list_entry(p, struct erdma_cep, listenq); + + list_del(p); + + erdma_cep_set_inuse(cep); + + if (cep->cm_id) { + cep->cm_id->rem_ref(cep->cm_id); + cep->cm_id = NULL; + } + if (cep->sock) { + erdma_socket_disassoc(cep->sock); + sock_release(cep->sock); + cep->sock = NULL; + } + cep->state = ERDMA_EPSTATE_CLOSED; + erdma_cep_set_free(cep); + erdma_cep_put(cep); + } +} + +int erdma_destroy_listen(struct iw_cm_id *id) +{ + if (!id->provider_data) + return 0; + + erdma_drop_listeners(id); + kfree(id->provider_data); + id->provider_data = NULL; + + return 0; +} + +int erdma_cm_init(void) +{ + erdma_cm_wq = create_singlethread_workqueue("erdma_cm_wq"); + if (!erdma_cm_wq) + return -ENOMEM; + + return 0; +} + +void erdma_cm_exit(void) +{ + if (erdma_cm_wq) + destroy_workqueue(erdma_cm_wq); +} diff --git a/drivers/infiniband/hw/erdma/erdma_cm.h b/drivers/infiniband/hw/erdma/erdma_cm.h new file mode 100644 index 000000000000..8a3f998fec9b --- /dev/null +++ b/drivers/infiniband/hw/erdma/erdma_cm.h @@ -0,0 +1,167 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ + +/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */ +/* Kai Shen <kaishen@linux.alibaba.com> */ +/* Copyright (c) 2020-2022, Alibaba Group. */ + +/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ +/* Greg Joyce <greg@opengridcomputing.com> */ +/* Copyright (c) 2008-2019, IBM Corporation */ +/* Copyright (c) 2017, Open Grid Computing, Inc. */ + +#ifndef __ERDMA_CM_H__ +#define __ERDMA_CM_H__ + +#include <linux/tcp.h> +#include <net/sock.h> +#include <rdma/iw_cm.h> + +/* iWarp MPA protocol defs */ +#define MPA_REVISION_EXT_1 129 +#define MPA_MAX_PRIVDATA RDMA_MAX_PRIVATE_DATA +#define MPA_KEY_REQ "MPA ID Req Frame" +#define MPA_KEY_REP "MPA ID Rep Frame" +#define MPA_KEY_SIZE 16 +#define MPA_DEFAULT_HDR_LEN 28 + +struct mpa_rr_params { + __be16 bits; + __be16 pd_len; +}; + +/* + * MPA request/response Hdr bits & fields + */ +enum { + MPA_RR_FLAG_MARKERS = __cpu_to_be16(0x8000), + MPA_RR_FLAG_CRC = __cpu_to_be16(0x4000), + MPA_RR_FLAG_REJECT = __cpu_to_be16(0x2000), + MPA_RR_RESERVED = __cpu_to_be16(0x1f00), + MPA_RR_MASK_REVISION = __cpu_to_be16(0x00ff) +}; + +/* + * MPA request/reply header + */ +struct mpa_rr { + u8 key[16]; + struct mpa_rr_params params; +}; + +struct erdma_mpa_ext { + __be32 cookie; + __be32 bits; +}; + +enum { + MPA_EXT_FLAG_CC = cpu_to_be32(0x0000000f), +}; + +struct erdma_mpa_info { + struct mpa_rr hdr; /* peer mpa hdr in host byte order */ + struct erdma_mpa_ext ext_data; + char *pdata; + int bytes_rcvd; +}; + +struct erdma_sk_upcalls { + void (*sk_state_change)(struct sock *sk); + void (*sk_data_ready)(struct sock *sk, int bytes); + void (*sk_error_report)(struct sock *sk); +}; + +struct erdma_dev; + +enum erdma_cep_state { + ERDMA_EPSTATE_IDLE = 1, + ERDMA_EPSTATE_LISTENING, + ERDMA_EPSTATE_CONNECTING, + ERDMA_EPSTATE_AWAIT_MPAREQ, + ERDMA_EPSTATE_RECVD_MPAREQ, + ERDMA_EPSTATE_AWAIT_MPAREP, + ERDMA_EPSTATE_RDMA_MODE, + ERDMA_EPSTATE_CLOSED +}; + +struct erdma_cep { + struct iw_cm_id *cm_id; + struct erdma_dev *dev; + struct list_head devq; + spinlock_t lock; + struct kref ref; + int in_use; + wait_queue_head_t waitq; + enum erdma_cep_state state; + + struct list_head listenq; + struct erdma_cep *listen_cep; + + struct erdma_qp *qp; + struct socket *sock; + + struct erdma_cm_work *mpa_timer; + struct list_head work_freelist; + + struct erdma_mpa_info mpa; + int ord; + int ird; + + int pd_len; + /* hold user's private data. */ + void *private_data; + + /* Saved upcalls of socket llp.sock */ + void (*sk_state_change)(struct sock *sk); + void (*sk_data_ready)(struct sock *sk); + void (*sk_error_report)(struct sock *sk); +}; + +#define MPAREQ_TIMEOUT (HZ * 20) +#define MPAREP_TIMEOUT (HZ * 10) +#define CONNECT_TIMEOUT (HZ * 10) + +enum erdma_work_type { + ERDMA_CM_WORK_ACCEPT = 1, + ERDMA_CM_WORK_READ_MPAHDR, + ERDMA_CM_WORK_CLOSE_LLP, /* close socket */ + ERDMA_CM_WORK_PEER_CLOSE, /* socket indicated peer close */ + ERDMA_CM_WORK_MPATIMEOUT, + ERDMA_CM_WORK_CONNECTED, + ERDMA_CM_WORK_CONNECTTIMEOUT +}; + +struct erdma_cm_work { + struct delayed_work work; + struct list_head list; + enum erdma_work_type type; + struct erdma_cep *cep; +}; + +#define to_sockaddr_in(a) (*(struct sockaddr_in *)(&(a))) + +static inline int getname_peer(struct socket *s, struct sockaddr_storage *a) +{ + return s->ops->getname(s, (struct sockaddr *)a, 1); +} + +static inline int getname_local(struct socket *s, struct sockaddr_storage *a) +{ + return s->ops->getname(s, (struct sockaddr *)a, 0); +} + +int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *param); +int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *param); +int erdma_reject(struct iw_cm_id *id, const void *pdata, u8 plen); +int erdma_create_listen(struct iw_cm_id *id, int backlog); +int erdma_destroy_listen(struct iw_cm_id *id); + +void erdma_cep_get(struct erdma_cep *ceq); +void erdma_cep_put(struct erdma_cep *ceq); +int erdma_cm_queue_work(struct erdma_cep *ceq, enum erdma_work_type type); + +int erdma_cm_init(void); +void erdma_cm_exit(void); + +#define sk_to_cep(sk) ((struct erdma_cep *)((sk)->sk_user_data)) + +#endif diff --git a/drivers/infiniband/hw/erdma/erdma_cmdq.c b/drivers/infiniband/hw/erdma/erdma_cmdq.c new file mode 100644 index 000000000000..6ebfa6989b11 --- /dev/null +++ b/drivers/infiniband/hw/erdma/erdma_cmdq.c @@ -0,0 +1,487 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */ +/* Kai Shen <kaishen@linux.alibaba.com> */ +/* Copyright (c) 2020-2022, Alibaba Group. */ + +#include "erdma.h" + +static void arm_cmdq_cq(struct erdma_cmdq *cmdq) +{ + struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq); + u64 db_data = FIELD_PREP(ERDMA_CQDB_CI_MASK, cmdq->cq.ci) | + FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) | + FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cmdq->cq.cmdsn) | + FIELD_PREP(ERDMA_CQDB_IDX_MASK, cmdq->cq.cmdsn); + + *cmdq->cq.db_record = db_data; + writeq(db_data, dev->func_bar + ERDMA_CMDQ_CQDB_REG); + + atomic64_inc(&cmdq->cq.armed_num); +} + +static void kick_cmdq_db(struct erdma_cmdq *cmdq) +{ + struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq); + u64 db_data = FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi); + + *cmdq->sq.db_record = db_data; + writeq(db_data, dev->func_bar + ERDMA_CMDQ_SQDB_REG); +} + +static struct erdma_comp_wait *get_comp_wait(struct erdma_cmdq *cmdq) +{ + int comp_idx; + + spin_lock(&cmdq->lock); + comp_idx = find_first_zero_bit(cmdq->comp_wait_bitmap, + cmdq->max_outstandings); + if (comp_idx == cmdq->max_outstandings) { + spin_unlock(&cmdq->lock); + return ERR_PTR(-ENOMEM); + } + + __set_bit(comp_idx, cmdq->comp_wait_bitmap); + spin_unlock(&cmdq->lock); + + return &cmdq->wait_pool[comp_idx]; +} + +static void put_comp_wait(struct erdma_cmdq *cmdq, + struct erdma_comp_wait *comp_wait) +{ + int used; + + cmdq->wait_pool[comp_wait->ctx_id].cmd_status = ERDMA_CMD_STATUS_INIT; + spin_lock(&cmdq->lock); + used = __test_and_clear_bit(comp_wait->ctx_id, cmdq->comp_wait_bitmap); + spin_unlock(&cmdq->lock); + + WARN_ON(!used); +} + +static int erdma_cmdq_wait_res_init(struct erdma_dev *dev, + struct erdma_cmdq *cmdq) +{ + int i; + + cmdq->wait_pool = + devm_kcalloc(&dev->pdev->dev, cmdq->max_outstandings, + sizeof(struct erdma_comp_wait), GFP_KERNEL); + if (!cmdq->wait_pool) + return -ENOMEM; + + spin_lock_init(&cmdq->lock); + cmdq->comp_wait_bitmap = devm_bitmap_zalloc( + &dev->pdev->dev, cmdq->max_outstandings, GFP_KERNEL); + if (!cmdq->comp_wait_bitmap) + return -ENOMEM; + + for (i = 0; i < cmdq->max_outstandings; i++) { + init_completion(&cmdq->wait_pool[i].wait_event); + cmdq->wait_pool[i].ctx_id = i; + } + + return 0; +} + +static int erdma_cmdq_sq_init(struct erdma_dev *dev) +{ + struct erdma_cmdq *cmdq = &dev->cmdq; + struct erdma_cmdq_sq *sq = &cmdq->sq; + u32 buf_size; + + sq->wqebb_cnt = SQEBB_COUNT(ERDMA_CMDQ_SQE_SIZE); + sq->depth = cmdq->max_outstandings * sq->wqebb_cnt; + + buf_size = sq->depth << SQEBB_SHIFT; + + sq->qbuf = + dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size), + &sq->qbuf_dma_addr, GFP_KERNEL); + if (!sq->qbuf) + return -ENOMEM; + + sq->db_record = (u64 *)(sq->qbuf + buf_size); + + spin_lock_init(&sq->lock); + + erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_H_REG, + upper_32_bits(sq->qbuf_dma_addr)); + erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_L_REG, + lower_32_bits(sq->qbuf_dma_addr)); + erdma_reg_write32(dev, ERDMA_REGS_CMDQ_DEPTH_REG, sq->depth); + erdma_reg_write64(dev, ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG, + sq->qbuf_dma_addr + buf_size); + + return 0; +} + +static int erdma_cmdq_cq_init(struct erdma_dev *dev) +{ + struct erdma_cmdq *cmdq = &dev->cmdq; + struct erdma_cmdq_cq *cq = &cmdq->cq; + u32 buf_size; + + cq->depth = cmdq->sq.depth; + buf_size = cq->depth << CQE_SHIFT; + + cq->qbuf = + dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size), + &cq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO); + if (!cq->qbuf) + return -ENOMEM; + + spin_lock_init(&cq->lock); + + cq->db_record = (u64 *)(cq->qbuf + buf_size); + + atomic64_set(&cq->armed_num, 0); + + erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_H_REG, + upper_32_bits(cq->qbuf_dma_addr)); + erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_L_REG, + lower_32_bits(cq->qbuf_dma_addr)); + erdma_reg_write64(dev, ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG, + cq->qbuf_dma_addr + buf_size); + + return 0; +} + +static int erdma_cmdq_eq_init(struct erdma_dev *dev) +{ + struct erdma_cmdq *cmdq = &dev->cmdq; + struct erdma_eq *eq = &cmdq->eq; + u32 buf_size; + + eq->depth = cmdq->max_outstandings; + buf_size = eq->depth << EQE_SHIFT; + + eq->qbuf = + dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size), + &eq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO); + if (!eq->qbuf) + return -ENOMEM; + + spin_lock_init(&eq->lock); + atomic64_set(&eq->event_num, 0); + + eq->db_addr = + (u64 __iomem *)(dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG); + eq->db_record = (u64 *)(eq->qbuf + buf_size); + + erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_H_REG, + upper_32_bits(eq->qbuf_dma_addr)); + erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_L_REG, + lower_32_bits(eq->qbuf_dma_addr)); + erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_DEPTH_REG, eq->depth); + erdma_reg_write64(dev, ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG, + eq->qbuf_dma_addr + buf_size); + + return 0; +} + +int erdma_cmdq_init(struct erdma_dev *dev) +{ + int err, i; + struct erdma_cmdq *cmdq = &dev->cmdq; + u32 sts, ctrl; + + cmdq->max_outstandings = ERDMA_CMDQ_MAX_OUTSTANDING; + cmdq->use_event = false; + + sema_init(&cmdq->credits, cmdq->max_outstandings); + + err = erdma_cmdq_wait_res_init(dev, cmdq); + if (err) + return err; + + err = erdma_cmdq_sq_init(dev); + if (err) + return err; + + err = erdma_cmdq_cq_init(dev); + if (err) + goto err_destroy_sq; + + err = erdma_cmdq_eq_init(dev); + if (err) + goto err_destroy_cq; + + ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_INIT_MASK, 1); + erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl); + + for (i = 0; i < ERDMA_WAIT_DEV_DONE_CNT; i++) { + sts = erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG, + ERDMA_REG_DEV_ST_INIT_DONE_MASK); + if (sts) + break; + + msleep(ERDMA_REG_ACCESS_WAIT_MS); + } + + if (i == ERDMA_WAIT_DEV_DONE_CNT) { + dev_err(&dev->pdev->dev, "wait init done failed.\n"); + err = -ETIMEDOUT; + goto err_destroy_eq; + } + + set_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); + + return 0; + +err_destroy_eq: + dma_free_coherent(&dev->pdev->dev, + (cmdq->eq.depth << EQE_SHIFT) + + ERDMA_EXTRA_BUFFER_SIZE, + cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr); + +err_destroy_cq: + dma_free_coherent(&dev->pdev->dev, + (cmdq->cq.depth << CQE_SHIFT) + + ERDMA_EXTRA_BUFFER_SIZE, + cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr); + +err_destroy_sq: + dma_free_coherent(&dev->pdev->dev, + (cmdq->sq.depth << SQEBB_SHIFT) + + ERDMA_EXTRA_BUFFER_SIZE, + cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr); + + return err; +} + +void erdma_finish_cmdq_init(struct erdma_dev *dev) +{ + /* after device init successfully, change cmdq to event mode. */ + dev->cmdq.use_event = true; + arm_cmdq_cq(&dev->cmdq); +} + +void erdma_cmdq_destroy(struct erdma_dev *dev) +{ + struct erdma_cmdq *cmdq = &dev->cmdq; + + clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); + + dma_free_coherent(&dev->pdev->dev, + (cmdq->eq.depth << EQE_SHIFT) + + ERDMA_EXTRA_BUFFER_SIZE, + cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr); + dma_free_coherent(&dev->pdev->dev, + (cmdq->sq.depth << SQEBB_SHIFT) + + ERDMA_EXTRA_BUFFER_SIZE, + cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr); + dma_free_coherent(&dev->pdev->dev, + (cmdq->cq.depth << CQE_SHIFT) + + ERDMA_EXTRA_BUFFER_SIZE, + cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr); +} + +static void *get_next_valid_cmdq_cqe(struct erdma_cmdq *cmdq) +{ + __be32 *cqe = get_queue_entry(cmdq->cq.qbuf, cmdq->cq.ci, + cmdq->cq.depth, CQE_SHIFT); + u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK, + __be32_to_cpu(READ_ONCE(*cqe))); + + return owner ^ !!(cmdq->cq.ci & cmdq->cq.depth) ? cqe : NULL; +} + +static void push_cmdq_sqe(struct erdma_cmdq *cmdq, u64 *req, size_t req_len, + struct erdma_comp_wait *comp_wait) +{ + __le64 *wqe; + u64 hdr = *req; + + comp_wait->cmd_status = ERDMA_CMD_STATUS_ISSUED; + reinit_completion(&comp_wait->wait_event); + comp_wait->sq_pi = cmdq->sq.pi; + + wqe = get_queue_entry(cmdq->sq.qbuf, cmdq->sq.pi, cmdq->sq.depth, + SQEBB_SHIFT); + memcpy(wqe, req, req_len); + + cmdq->sq.pi += cmdq->sq.wqebb_cnt; + hdr |= FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi) | + FIELD_PREP(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK, + comp_wait->ctx_id) | + FIELD_PREP(ERDMA_CMD_HDR_WQEBB_CNT_MASK, cmdq->sq.wqebb_cnt - 1); + *wqe = cpu_to_le64(hdr); + + kick_cmdq_db(cmdq); +} + +static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq) +{ + struct erdma_comp_wait *comp_wait; + u32 hdr0, sqe_idx; + __be32 *cqe; + u16 ctx_id; + u64 *sqe; + int i; + + cqe = get_next_valid_cmdq_cqe(cmdq); + if (!cqe) + return -EAGAIN; + + cmdq->cq.ci++; + + dma_rmb(); + hdr0 = __be32_to_cpu(*cqe); + sqe_idx = __be32_to_cpu(*(cqe + 1)); + + sqe = get_queue_entry(cmdq->sq.qbuf, sqe_idx, cmdq->sq.depth, + SQEBB_SHIFT); + ctx_id = FIELD_GET(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK, *sqe); + comp_wait = &cmdq->wait_pool[ctx_id]; + if (comp_wait->cmd_status != ERDMA_CMD_STATUS_ISSUED) + return -EIO; + + comp_wait->cmd_status = ERDMA_CMD_STATUS_FINISHED; + comp_wait->comp_status = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, hdr0); + cmdq->sq.ci += cmdq->sq.wqebb_cnt; + + for (i = 0; i < 4; i++) + comp_wait->comp_data[i] = __be32_to_cpu(*(cqe + 2 + i)); + + if (cmdq->use_event) + complete(&comp_wait->wait_event); + + return 0; +} + +static void erdma_polling_cmd_completions(struct erdma_cmdq *cmdq) +{ + unsigned long flags; + u16 comp_num; + + spin_lock_irqsave(&cmdq->cq.lock, flags); + + /* We must have less than # of max_outstandings + * completions at one time. + */ + for (comp_num = 0; comp_num < cmdq->max_outstandings; comp_num++) + if (erdma_poll_single_cmd_completion(cmdq)) + break; + + if (comp_num && cmdq->use_event) + arm_cmdq_cq(cmdq); + + spin_unlock_irqrestore(&cmdq->cq.lock, flags); +} + +void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq) +{ + int got_event = 0; + + if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state) || + !cmdq->use_event) + return; + + while (get_next_valid_eqe(&cmdq->eq)) { + cmdq->eq.ci++; + got_event++; + } + + if (got_event) { + cmdq->cq.cmdsn++; + erdma_polling_cmd_completions(cmdq); + } + + notify_eq(&cmdq->eq); +} + +static int erdma_poll_cmd_completion(struct erdma_comp_wait *comp_ctx, + struct erdma_cmdq *cmdq, u32 timeout) +{ + unsigned long comp_timeout = jiffies + msecs_to_jiffies(timeout); + + while (1) { + erdma_polling_cmd_completions(cmdq); + if (comp_ctx->cmd_status != ERDMA_CMD_STATUS_ISSUED) + break; + + if (time_is_before_jiffies(comp_timeout)) + return -ETIME; + + msleep(20); + } + + return 0; +} + +static int erdma_wait_cmd_completion(struct erdma_comp_wait *comp_ctx, + struct erdma_cmdq *cmdq, u32 timeout) +{ + unsigned long flags = 0; + + wait_for_completion_timeout(&comp_ctx->wait_event, + msecs_to_jiffies(timeout)); + + if (unlikely(comp_ctx->cmd_status != ERDMA_CMD_STATUS_FINISHED)) { + spin_lock_irqsave(&cmdq->cq.lock, flags); + comp_ctx->cmd_status = ERDMA_CMD_STATUS_TIMEOUT; + spin_unlock_irqrestore(&cmdq->cq.lock, flags); + return -ETIME; + } + + return 0; +} + +void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op) +{ + *hdr = FIELD_PREP(ERDMA_CMD_HDR_SUB_MOD_MASK, mod) | + FIELD_PREP(ERDMA_CMD_HDR_OPCODE_MASK, op); +} + +int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size, + u64 *resp0, u64 *resp1) +{ + struct erdma_comp_wait *comp_wait; + int ret; + + if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state)) + return -ENODEV; + + down(&cmdq->credits); + + comp_wait = get_comp_wait(cmdq); + if (IS_ERR(comp_wait)) { + clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); + set_bit(ERDMA_CMDQ_STATE_CTX_ERR_BIT, &cmdq->state); + up(&cmdq->credits); + return PTR_ERR(comp_wait); + } + + spin_lock(&cmdq->sq.lock); + push_cmdq_sqe(cmdq, req, req_size, comp_wait); + spin_unlock(&cmdq->sq.lock); + + if (cmdq->use_event) + ret = erdma_wait_cmd_completion(comp_wait, cmdq, + ERDMA_CMDQ_TIMEOUT_MS); + else + ret = erdma_poll_cmd_completion(comp_wait, cmdq, + ERDMA_CMDQ_TIMEOUT_MS); + + if (ret) { + set_bit(ERDMA_CMDQ_STATE_TIMEOUT_BIT, &cmdq->state); + clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); + goto out; + } + + if (comp_wait->comp_status) + ret = -EIO; + + if (resp0 && resp1) { + *resp0 = *((u64 *)&comp_wait->comp_data[0]); + *resp1 = *((u64 *)&comp_wait->comp_data[2]); + } + put_comp_wait(cmdq, comp_wait); + +out: + up(&cmdq->credits); + + return ret; +} diff --git a/drivers/infiniband/hw/erdma/erdma_cq.c b/drivers/infiniband/hw/erdma/erdma_cq.c new file mode 100644 index 000000000000..58e0dc5c75d1 --- /dev/null +++ b/drivers/infiniband/hw/erdma/erdma_cq.c @@ -0,0 +1,201 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */ +/* Kai Shen <kaishen@linux.alibaba.com> */ +/* Copyright (c) 2020-2022, Alibaba Group. */ + +#include "erdma_verbs.h" + +static void *get_next_valid_cqe(struct erdma_cq *cq) +{ + __be32 *cqe = get_queue_entry(cq->kern_cq.qbuf, cq->kern_cq.ci, + cq->depth, CQE_SHIFT); + u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK, + __be32_to_cpu(READ_ONCE(*cqe))); + + return owner ^ !!(cq->kern_cq.ci & cq->depth) ? cqe : NULL; +} + +static void notify_cq(struct erdma_cq *cq, u8 solcitied) +{ + u64 db_data = + FIELD_PREP(ERDMA_CQDB_IDX_MASK, (cq->kern_cq.notify_cnt)) | + FIELD_PREP(ERDMA_CQDB_CQN_MASK, cq->cqn) | + FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) | + FIELD_PREP(ERDMA_CQDB_SOL_MASK, solcitied) | + FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cq->kern_cq.cmdsn) | + FIELD_PREP(ERDMA_CQDB_CI_MASK, cq->kern_cq.ci); + + *cq->kern_cq.db_record = db_data; + writeq(db_data, cq->kern_cq.db); +} + +int erdma_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + struct erdma_cq *cq = to_ecq(ibcq); + unsigned long irq_flags; + int ret = 0; + + spin_lock_irqsave(&cq->kern_cq.lock, irq_flags); + + notify_cq(cq, (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + + if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && get_next_valid_cqe(cq)) + ret = 1; + + cq->kern_cq.notify_cnt++; + + spin_unlock_irqrestore(&cq->kern_cq.lock, irq_flags); + + return ret; +} + +static const enum ib_wc_opcode wc_mapping_table[ERDMA_NUM_OPCODES] = { + [ERDMA_OP_WRITE] = IB_WC_RDMA_WRITE, + [ERDMA_OP_READ] = IB_WC_RDMA_READ, + [ERDMA_OP_SEND] = IB_WC_SEND, + [ERDMA_OP_SEND_WITH_IMM] = IB_WC_SEND, + [ERDMA_OP_RECEIVE] = IB_WC_RECV, + [ERDMA_OP_RECV_IMM] = IB_WC_RECV_RDMA_WITH_IMM, + [ERDMA_OP_RECV_INV] = IB_WC_RECV, + [ERDMA_OP_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, + [ERDMA_OP_RSP_SEND_IMM] = IB_WC_RECV, + [ERDMA_OP_SEND_WITH_INV] = IB_WC_SEND, + [ERDMA_OP_REG_MR] = IB_WC_REG_MR, + [ERDMA_OP_LOCAL_INV] = IB_WC_LOCAL_INV, + [ERDMA_OP_READ_WITH_INV] = IB_WC_RDMA_READ, +}; + +static const struct { + enum erdma_wc_status erdma; + enum ib_wc_status base; + enum erdma_vendor_err vendor; +} map_cqe_status[ERDMA_NUM_WC_STATUS] = { + { ERDMA_WC_SUCCESS, IB_WC_SUCCESS, ERDMA_WC_VENDOR_NO_ERR }, + { ERDMA_WC_GENERAL_ERR, IB_WC_GENERAL_ERR, ERDMA_WC_VENDOR_NO_ERR }, + { ERDMA_WC_RECV_WQE_FORMAT_ERR, IB_WC_GENERAL_ERR, + ERDMA_WC_VENDOR_INVALID_RQE }, + { ERDMA_WC_RECV_STAG_INVALID_ERR, IB_WC_REM_ACCESS_ERR, + ERDMA_WC_VENDOR_RQE_INVALID_STAG }, + { ERDMA_WC_RECV_ADDR_VIOLATION_ERR, IB_WC_REM_ACCESS_ERR, + ERDMA_WC_VENDOR_RQE_ADDR_VIOLATION }, + { ERDMA_WC_RECV_RIGHT_VIOLATION_ERR, IB_WC_REM_ACCESS_ERR, + ERDMA_WC_VENDOR_RQE_ACCESS_RIGHT_ERR }, + { ERDMA_WC_RECV_PDID_ERR, IB_WC_REM_ACCESS_ERR, + ERDMA_WC_VENDOR_RQE_INVALID_PD }, + { ERDMA_WC_RECV_WARRPING_ERR, IB_WC_REM_ACCESS_ERR, + ERDMA_WC_VENDOR_RQE_WRAP_ERR }, + { ERDMA_WC_SEND_WQE_FORMAT_ERR, IB_WC_LOC_QP_OP_ERR, + ERDMA_WC_VENDOR_INVALID_SQE }, + { ERDMA_WC_SEND_WQE_ORD_EXCEED, IB_WC_GENERAL_ERR, + ERDMA_WC_VENDOR_ZERO_ORD }, + { ERDMA_WC_SEND_STAG_INVALID_ERR, IB_WC_LOC_ACCESS_ERR, + ERDMA_WC_VENDOR_SQE_INVALID_STAG }, + { ERDMA_WC_SEND_ADDR_VIOLATION_ERR, IB_WC_LOC_ACCESS_ERR, + ERDMA_WC_VENDOR_SQE_ADDR_VIOLATION }, + { ERDMA_WC_SEND_RIGHT_VIOLATION_ERR, IB_WC_LOC_ACCESS_ERR, + ERDMA_WC_VENDOR_SQE_ACCESS_ERR }, + { ERDMA_WC_SEND_PDID_ERR, IB_WC_LOC_ACCESS_ERR, + ERDMA_WC_VENDOR_SQE_INVALID_PD }, + { ERDMA_WC_SEND_WARRPING_ERR, IB_WC_LOC_ACCESS_ERR, + ERDMA_WC_VENDOR_SQE_WARP_ERR }, + { ERDMA_WC_FLUSH_ERR, IB_WC_WR_FLUSH_ERR, ERDMA_WC_VENDOR_NO_ERR }, + { ERDMA_WC_RETRY_EXC_ERR, IB_WC_RETRY_EXC_ERR, ERDMA_WC_VENDOR_NO_ERR }, +}; + +#define ERDMA_POLLCQ_NO_QP 1 + +static int erdma_poll_one_cqe(struct erdma_cq *cq, struct ib_wc *wc) +{ + struct erdma_dev *dev = to_edev(cq->ibcq.device); + u8 opcode, syndrome, qtype; + struct erdma_kqp *kern_qp; + struct erdma_cqe *cqe; + struct erdma_qp *qp; + u16 wqe_idx, depth; + u32 qpn, cqe_hdr; + u64 *id_table; + u64 *wqe_hdr; + + cqe = get_next_valid_cqe(cq); + if (!cqe) + return -EAGAIN; + + cq->kern_cq.ci++; + + /* cqbuf should be ready when we poll */ + dma_rmb(); + + qpn = be32_to_cpu(cqe->qpn); + wqe_idx = be32_to_cpu(cqe->qe_idx); + cqe_hdr = be32_to_cpu(cqe->hdr); + + qp = find_qp_by_qpn(dev, qpn); + if (!qp) + return ERDMA_POLLCQ_NO_QP; + + kern_qp = &qp->kern_qp; + + qtype = FIELD_GET(ERDMA_CQE_HDR_QTYPE_MASK, cqe_hdr); + syndrome = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, cqe_hdr); + opcode = FIELD_GET(ERDMA_CQE_HDR_OPCODE_MASK, cqe_hdr); + + if (qtype == ERDMA_CQE_QTYPE_SQ) { + id_table = kern_qp->swr_tbl; + depth = qp->attrs.sq_size; + wqe_hdr = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx, + qp->attrs.sq_size, SQEBB_SHIFT); + kern_qp->sq_ci = + FIELD_GET(ERDMA_SQE_HDR_WQEBB_CNT_MASK, *wqe_hdr) + + wqe_idx + 1; + } else { + id_table = kern_qp->rwr_tbl; + depth = qp->attrs.rq_size; + } + wc->wr_id = id_table[wqe_idx & (depth - 1)]; + wc->byte_len = be32_to_cpu(cqe->size); + + wc->wc_flags = 0; + + wc->opcode = wc_mapping_table[opcode]; + if (opcode == ERDMA_OP_RECV_IMM || opcode == ERDMA_OP_RSP_SEND_IMM) { + wc->ex.imm_data = cpu_to_be32(le32_to_cpu(cqe->imm_data)); + wc->wc_flags |= IB_WC_WITH_IMM; + } else if (opcode == ERDMA_OP_RECV_INV) { + wc->ex.invalidate_rkey = be32_to_cpu(cqe->inv_rkey); + wc->wc_flags |= IB_WC_WITH_INVALIDATE; + } + + if (syndrome >= ERDMA_NUM_WC_STATUS) + syndrome = ERDMA_WC_GENERAL_ERR; + + wc->status = map_cqe_status[syndrome].base; + wc->vendor_err = map_cqe_status[syndrome].vendor; + wc->qp = &qp->ibqp; + + return 0; +} + +int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + struct erdma_cq *cq = to_ecq(ibcq); + unsigned long flags; + int npolled, ret; + + spin_lock_irqsave(&cq->kern_cq.lock, flags); + + for (npolled = 0; npolled < num_entries;) { + ret = erdma_poll_one_cqe(cq, wc + npolled); + + if (ret == -EAGAIN) /* no received new CQEs. */ + break; + else if (ret) /* ignore invalid CQEs. */ + continue; + + npolled++; + } + + spin_unlock_irqrestore(&cq->kern_cq.lock, flags); + + return npolled; +} diff --git a/drivers/infiniband/hw/erdma/erdma_eq.c b/drivers/infiniband/hw/erdma/erdma_eq.c new file mode 100644 index 000000000000..ed54130d924b --- /dev/null +++ b/drivers/infiniband/hw/erdma/erdma_eq.c @@ -0,0 +1,320 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */ +/* Kai Shen <kaishen@linux.alibaba.com> */ +/* Copyright (c) 2020-2022, Alibaba Group. */ + +#include "erdma_verbs.h" + +#define MAX_POLL_CHUNK_SIZE 16 + +void notify_eq(struct erdma_eq *eq) +{ + u64 db_data = FIELD_PREP(ERDMA_EQDB_CI_MASK, eq->ci) | + FIELD_PREP(ERDMA_EQDB_ARM_MASK, 1); + + *eq->db_record = db_data; + writeq(db_data, eq->db_addr); + + atomic64_inc(&eq->notify_num); +} + +void *get_next_valid_eqe(struct erdma_eq *eq) +{ + u64 *eqe = get_queue_entry(eq->qbuf, eq->ci, eq->depth, EQE_SHIFT); + u32 owner = FIELD_GET(ERDMA_CEQE_HDR_O_MASK, READ_ONCE(*eqe)); + + return owner ^ !!(eq->ci & eq->depth) ? eqe : NULL; +} + +void erdma_aeq_event_handler(struct erdma_dev *dev) +{ + struct erdma_aeqe *aeqe; + u32 cqn, qpn; + struct erdma_qp *qp; + struct erdma_cq *cq; + struct ib_event event; + u32 poll_cnt = 0; + + memset(&event, 0, sizeof(event)); + + while (poll_cnt < MAX_POLL_CHUNK_SIZE) { + aeqe = get_next_valid_eqe(&dev->aeq); + if (!aeqe) + break; + + dma_rmb(); + + dev->aeq.ci++; + atomic64_inc(&dev->aeq.event_num); + poll_cnt++; + + if (FIELD_GET(ERDMA_AEQE_HDR_TYPE_MASK, + le32_to_cpu(aeqe->hdr)) == ERDMA_AE_TYPE_CQ_ERR) { + cqn = le32_to_cpu(aeqe->event_data0); + cq = find_cq_by_cqn(dev, cqn); + if (!cq) + continue; + + event.device = cq->ibcq.device; + event.element.cq = &cq->ibcq; + event.event = IB_EVENT_CQ_ERR; + if (cq->ibcq.event_handler) + cq->ibcq.event_handler(&event, + cq->ibcq.cq_context); + } else { + qpn = le32_to_cpu(aeqe->event_data0); + qp = find_qp_by_qpn(dev, qpn); + if (!qp) + continue; + + event.device = qp->ibqp.device; + event.element.qp = &qp->ibqp; + event.event = IB_EVENT_QP_FATAL; + if (qp->ibqp.event_handler) + qp->ibqp.event_handler(&event, + qp->ibqp.qp_context); + } + } + + notify_eq(&dev->aeq); +} + +int erdma_aeq_init(struct erdma_dev *dev) +{ + struct erdma_eq *eq = &dev->aeq; + u32 buf_size; + + eq->depth = ERDMA_DEFAULT_EQ_DEPTH; + buf_size = eq->depth << EQE_SHIFT; + + eq->qbuf = + dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size), + &eq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO); + if (!eq->qbuf) + return -ENOMEM; + + spin_lock_init(&eq->lock); + atomic64_set(&eq->event_num, 0); + atomic64_set(&eq->notify_num, 0); + + eq->db_addr = (u64 __iomem *)(dev->func_bar + ERDMA_REGS_AEQ_DB_REG); + eq->db_record = (u64 *)(eq->qbuf + buf_size); + + erdma_reg_write32(dev, ERDMA_REGS_AEQ_ADDR_H_REG, + upper_32_bits(eq->qbuf_dma_addr)); + erdma_reg_write32(dev, ERDMA_REGS_AEQ_ADDR_L_REG, + lower_32_bits(eq->qbuf_dma_addr)); + erdma_reg_write32(dev, ERDMA_REGS_AEQ_DEPTH_REG, eq->depth); + erdma_reg_write64(dev, ERDMA_AEQ_DB_HOST_ADDR_REG, + eq->qbuf_dma_addr + buf_size); + + return 0; +} + +void erdma_aeq_destroy(struct erdma_dev *dev) +{ + struct erdma_eq *eq = &dev->aeq; + + dma_free_coherent(&dev->pdev->dev, + WARPPED_BUFSIZE(eq->depth << EQE_SHIFT), eq->qbuf, + eq->qbuf_dma_addr); +} + +void erdma_ceq_completion_handler(struct erdma_eq_cb *ceq_cb) +{ + struct erdma_dev *dev = ceq_cb->dev; + struct erdma_cq *cq; + u32 poll_cnt = 0; + u64 *ceqe; + int cqn; + + if (!ceq_cb->ready) + return; + + while (poll_cnt < MAX_POLL_CHUNK_SIZE) { + ceqe = get_next_valid_eqe(&ceq_cb->eq); + if (!ceqe) + break; + + dma_rmb(); + ceq_cb->eq.ci++; + poll_cnt++; + cqn = FIELD_GET(ERDMA_CEQE_HDR_CQN_MASK, READ_ONCE(*ceqe)); + + cq = find_cq_by_cqn(dev, cqn); + if (!cq) + continue; + + if (rdma_is_kernel_res(&cq->ibcq.res)) + cq->kern_cq.cmdsn++; + + if (cq->ibcq.comp_handler) + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); + } + + notify_eq(&ceq_cb->eq); +} + +static irqreturn_t erdma_intr_ceq_handler(int irq, void *data) +{ + struct erdma_eq_cb *ceq_cb = data; + + tasklet_schedule(&ceq_cb->tasklet); + + return IRQ_HANDLED; +} + +static void erdma_intr_ceq_task(unsigned long data) +{ + erdma_ceq_completion_handler((struct erdma_eq_cb *)data); +} + +static int erdma_set_ceq_irq(struct erdma_dev *dev, u16 ceqn) +{ + struct erdma_eq_cb *eqc = &dev->ceqs[ceqn]; + int err; + + snprintf(eqc->irq.name, ERDMA_IRQNAME_SIZE, "erdma-ceq%u@pci:%s", ceqn, + pci_name(dev->pdev)); + eqc->irq.msix_vector = pci_irq_vector(dev->pdev, ceqn + 1); + + tasklet_init(&dev->ceqs[ceqn].tasklet, erdma_intr_ceq_task, + (unsigned long)&dev->ceqs[ceqn]); + + cpumask_set_cpu(cpumask_local_spread(ceqn + 1, dev->attrs.numa_node), + &eqc->irq.affinity_hint_mask); + + err = request_irq(eqc->irq.msix_vector, erdma_intr_ceq_handler, 0, + eqc->irq.name, eqc); + if (err) { + dev_err(&dev->pdev->dev, "failed to request_irq(%d)\n", err); + return err; + } + + irq_set_affinity_hint(eqc->irq.msix_vector, + &eqc->irq.affinity_hint_mask); + + return 0; +} + +static void erdma_free_ceq_irq(struct erdma_dev *dev, u16 ceqn) +{ + struct erdma_eq_cb *eqc = &dev->ceqs[ceqn]; + + irq_set_affinity_hint(eqc->irq.msix_vector, NULL); + free_irq(eqc->irq.msix_vector, eqc); +} + +static int create_eq_cmd(struct erdma_dev *dev, u32 eqn, struct erdma_eq *eq) +{ + struct erdma_cmdq_create_eq_req req; + dma_addr_t db_info_dma_addr; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON, + CMDQ_OPCODE_CREATE_EQ); + req.eqn = eqn; + req.depth = ilog2(eq->depth); + req.qbuf_addr = eq->qbuf_dma_addr; + req.qtype = ERDMA_EQ_TYPE_CEQ; + /* Vector index is the same as EQN. */ + req.vector_idx = eqn; + db_info_dma_addr = eq->qbuf_dma_addr + (eq->depth << EQE_SHIFT); + req.db_dma_addr_l = lower_32_bits(db_info_dma_addr); + req.db_dma_addr_h = upper_32_bits(db_info_dma_addr); + + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); +} + +static int erdma_ceq_init_one(struct erdma_dev *dev, u16 ceqn) +{ + struct erdma_eq *eq = &dev->ceqs[ceqn].eq; + u32 buf_size = ERDMA_DEFAULT_EQ_DEPTH << EQE_SHIFT; + int ret; + + eq->qbuf = + dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size), + &eq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO); + if (!eq->qbuf) + return -ENOMEM; + + spin_lock_init(&eq->lock); + atomic64_set(&eq->event_num, 0); + atomic64_set(&eq->notify_num, 0); + + eq->depth = ERDMA_DEFAULT_EQ_DEPTH; + eq->db_addr = + (u64 __iomem *)(dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG + + (ceqn + 1) * ERDMA_DB_SIZE); + eq->db_record = (u64 *)(eq->qbuf + buf_size); + eq->ci = 0; + dev->ceqs[ceqn].dev = dev; + + /* CEQ indexed from 1, 0 rsvd for CMDQ-EQ. */ + ret = create_eq_cmd(dev, ceqn + 1, eq); + dev->ceqs[ceqn].ready = ret ? false : true; + + return ret; +} + +static void erdma_ceq_uninit_one(struct erdma_dev *dev, u16 ceqn) +{ + struct erdma_eq *eq = &dev->ceqs[ceqn].eq; + u32 buf_size = ERDMA_DEFAULT_EQ_DEPTH << EQE_SHIFT; + struct erdma_cmdq_destroy_eq_req req; + int err; + + dev->ceqs[ceqn].ready = 0; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON, + CMDQ_OPCODE_DESTROY_EQ); + /* CEQ indexed from 1, 0 rsvd for CMDQ-EQ. */ + req.eqn = ceqn + 1; + req.qtype = ERDMA_EQ_TYPE_CEQ; + req.vector_idx = ceqn + 1; + + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + if (err) + return; + + dma_free_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size), eq->qbuf, + eq->qbuf_dma_addr); +} + +int erdma_ceqs_init(struct erdma_dev *dev) +{ + u32 i, j; + int err; + + for (i = 0; i < dev->attrs.irq_num - 1; i++) { + err = erdma_ceq_init_one(dev, i); + if (err) + goto out_err; + + err = erdma_set_ceq_irq(dev, i); + if (err) { + erdma_ceq_uninit_one(dev, i); + goto out_err; + } + } + + return 0; + +out_err: + for (j = 0; j < i; j++) { + erdma_free_ceq_irq(dev, j); + erdma_ceq_uninit_one(dev, j); + } + + return err; +} + +void erdma_ceqs_uninit(struct erdma_dev *dev) +{ + u32 i; + + for (i = 0; i < dev->attrs.irq_num - 1; i++) { + erdma_free_ceq_irq(dev, i); + erdma_ceq_uninit_one(dev, i); + } +} diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h new file mode 100644 index 000000000000..e788887732e1 --- /dev/null +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -0,0 +1,514 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ + +/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */ +/* Kai Shen <kaishen@linux.alibaba.com> */ +/* Copyright (c) 2020-2022, Alibaba Group. */ + +#ifndef __ERDMA_HW_H__ +#define __ERDMA_HW_H__ + +#include <linux/kernel.h> +#include <linux/types.h> + +/* PCIe device related definition. */ +#define PCI_VENDOR_ID_ALIBABA 0x1ded + +#define ERDMA_PCI_WIDTH 64 +#define ERDMA_FUNC_BAR 0 +#define ERDMA_MISX_BAR 2 + +#define ERDMA_BAR_MASK (BIT(ERDMA_FUNC_BAR) | BIT(ERDMA_MISX_BAR)) + +/* MSI-X related. */ +#define ERDMA_NUM_MSIX_VEC 32U +#define ERDMA_MSIX_VECTOR_CMDQ 0 + +/* PCIe Bar0 Registers. */ +#define ERDMA_REGS_VERSION_REG 0x0 +#define ERDMA_REGS_DEV_CTRL_REG 0x10 +#define ERDMA_REGS_DEV_ST_REG 0x14 +#define ERDMA_REGS_NETDEV_MAC_L_REG 0x18 +#define ERDMA_REGS_NETDEV_MAC_H_REG 0x1C +#define ERDMA_REGS_CMDQ_SQ_ADDR_L_REG 0x20 +#define ERDMA_REGS_CMDQ_SQ_ADDR_H_REG 0x24 +#define ERDMA_REGS_CMDQ_CQ_ADDR_L_REG 0x28 +#define ERDMA_REGS_CMDQ_CQ_ADDR_H_REG 0x2C +#define ERDMA_REGS_CMDQ_DEPTH_REG 0x30 +#define ERDMA_REGS_CMDQ_EQ_DEPTH_REG 0x34 +#define ERDMA_REGS_CMDQ_EQ_ADDR_L_REG 0x38 +#define ERDMA_REGS_CMDQ_EQ_ADDR_H_REG 0x3C +#define ERDMA_REGS_AEQ_ADDR_L_REG 0x40 +#define ERDMA_REGS_AEQ_ADDR_H_REG 0x44 +#define ERDMA_REGS_AEQ_DEPTH_REG 0x48 +#define ERDMA_REGS_GRP_NUM_REG 0x4c +#define ERDMA_REGS_AEQ_DB_REG 0x50 +#define ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG 0x60 +#define ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG 0x68 +#define ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG 0x70 +#define ERDMA_AEQ_DB_HOST_ADDR_REG 0x78 +#define ERDMA_REGS_STATS_TSO_IN_PKTS_REG 0x80 +#define ERDMA_REGS_STATS_TSO_OUT_PKTS_REG 0x88 +#define ERDMA_REGS_STATS_TSO_OUT_BYTES_REG 0x90 +#define ERDMA_REGS_STATS_TX_DROP_PKTS_REG 0x98 +#define ERDMA_REGS_STATS_TX_BPS_METER_DROP_PKTS_REG 0xa0 +#define ERDMA_REGS_STATS_TX_PPS_METER_DROP_PKTS_REG 0xa8 +#define ERDMA_REGS_STATS_RX_PKTS_REG 0xc0 +#define ERDMA_REGS_STATS_RX_BYTES_REG 0xc8 +#define ERDMA_REGS_STATS_RX_DROP_PKTS_REG 0xd0 +#define ERDMA_REGS_STATS_RX_BPS_METER_DROP_PKTS_REG 0xd8 +#define ERDMA_REGS_STATS_RX_PPS_METER_DROP_PKTS_REG 0xe0 +#define ERDMA_REGS_CEQ_DB_BASE_REG 0x100 +#define ERDMA_CMDQ_SQDB_REG 0x200 +#define ERDMA_CMDQ_CQDB_REG 0x300 + +/* DEV_CTRL_REG details. */ +#define ERDMA_REG_DEV_CTRL_RESET_MASK 0x00000001 +#define ERDMA_REG_DEV_CTRL_INIT_MASK 0x00000002 + +/* DEV_ST_REG details. */ +#define ERDMA_REG_DEV_ST_RESET_DONE_MASK 0x00000001U +#define ERDMA_REG_DEV_ST_INIT_DONE_MASK 0x00000002U + +/* eRDMA PCIe DBs definition. */ +#define ERDMA_BAR_DB_SPACE_BASE 4096 + +#define ERDMA_BAR_SQDB_SPACE_OFFSET ERDMA_BAR_DB_SPACE_BASE +#define ERDMA_BAR_SQDB_SPACE_SIZE (384 * 1024) + +#define ERDMA_BAR_RQDB_SPACE_OFFSET \ + (ERDMA_BAR_SQDB_SPACE_OFFSET + ERDMA_BAR_SQDB_SPACE_SIZE) +#define ERDMA_BAR_RQDB_SPACE_SIZE (96 * 1024) + +#define ERDMA_BAR_CQDB_SPACE_OFFSET \ + (ERDMA_BAR_RQDB_SPACE_OFFSET + ERDMA_BAR_RQDB_SPACE_SIZE) + +/* Doorbell page resources related. */ +/* + * Max # of parallelly issued directSQE is 3072 per device, + * hardware organizes this into 24 group, per group has 128 credits. + */ +#define ERDMA_DWQE_MAX_GRP_CNT 24 +#define ERDMA_DWQE_NUM_PER_GRP 128 + +#define ERDMA_DWQE_TYPE0_CNT 64 +#define ERDMA_DWQE_TYPE1_CNT 496 +/* type1 DB contains 2 DBs, takes 256Byte. */ +#define ERDMA_DWQE_TYPE1_CNT_PER_PAGE 16 + +#define ERDMA_SDB_SHARED_PAGE_INDEX 95 + +/* Doorbell related. */ +#define ERDMA_DB_SIZE 8 + +#define ERDMA_CQDB_IDX_MASK GENMASK_ULL(63, 56) +#define ERDMA_CQDB_CQN_MASK GENMASK_ULL(55, 32) +#define ERDMA_CQDB_ARM_MASK BIT_ULL(31) +#define ERDMA_CQDB_SOL_MASK BIT_ULL(30) +#define ERDMA_CQDB_CMDSN_MASK GENMASK_ULL(29, 28) +#define ERDMA_CQDB_CI_MASK GENMASK_ULL(23, 0) + +#define ERDMA_EQDB_ARM_MASK BIT(31) +#define ERDMA_EQDB_CI_MASK GENMASK_ULL(23, 0) + +#define ERDMA_PAGE_SIZE_SUPPORT 0x7FFFF000 + +/* WQE related. */ +#define EQE_SIZE 16 +#define EQE_SHIFT 4 +#define RQE_SIZE 32 +#define RQE_SHIFT 5 +#define CQE_SIZE 32 +#define CQE_SHIFT 5 +#define SQEBB_SIZE 32 +#define SQEBB_SHIFT 5 +#define SQEBB_MASK (~(SQEBB_SIZE - 1)) +#define SQEBB_ALIGN(size) ((size + SQEBB_SIZE - 1) & SQEBB_MASK) +#define SQEBB_COUNT(size) (SQEBB_ALIGN(size) >> SQEBB_SHIFT) + +#define ERDMA_MAX_SQE_SIZE 128 +#define ERDMA_MAX_WQEBB_PER_SQE 4 + +/* CMDQ related. */ +#define ERDMA_CMDQ_MAX_OUTSTANDING 128 +#define ERDMA_CMDQ_SQE_SIZE 64 + +/* cmdq sub module definition. */ +enum CMDQ_WQE_SUB_MOD { + CMDQ_SUBMOD_RDMA = 0, + CMDQ_SUBMOD_COMMON = 1 +}; + +enum CMDQ_RDMA_OPCODE { + CMDQ_OPCODE_QUERY_DEVICE = 0, + CMDQ_OPCODE_CREATE_QP = 1, + CMDQ_OPCODE_DESTROY_QP = 2, + CMDQ_OPCODE_MODIFY_QP = 3, + CMDQ_OPCODE_CREATE_CQ = 4, + CMDQ_OPCODE_DESTROY_CQ = 5, + CMDQ_OPCODE_REG_MR = 8, + CMDQ_OPCODE_DEREG_MR = 9 +}; + +enum CMDQ_COMMON_OPCODE { + CMDQ_OPCODE_CREATE_EQ = 0, + CMDQ_OPCODE_DESTROY_EQ = 1, + CMDQ_OPCODE_QUERY_FW_INFO = 2, + CMDQ_OPCODE_CONF_MTU = 3, +}; + +/* cmdq-SQE HDR */ +#define ERDMA_CMD_HDR_WQEBB_CNT_MASK GENMASK_ULL(54, 52) +#define ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK GENMASK_ULL(47, 32) +#define ERDMA_CMD_HDR_SUB_MOD_MASK GENMASK_ULL(25, 24) +#define ERDMA_CMD_HDR_OPCODE_MASK GENMASK_ULL(23, 16) +#define ERDMA_CMD_HDR_WQEBB_INDEX_MASK GENMASK_ULL(15, 0) + +struct erdma_cmdq_destroy_cq_req { + u64 hdr; + u32 cqn; +}; + +#define ERDMA_EQ_TYPE_AEQ 0 +#define ERDMA_EQ_TYPE_CEQ 1 + +struct erdma_cmdq_create_eq_req { + u64 hdr; + u64 qbuf_addr; + u8 vector_idx; + u8 eqn; + u8 depth; + u8 qtype; + u32 db_dma_addr_l; + u32 db_dma_addr_h; +}; + +struct erdma_cmdq_destroy_eq_req { + u64 hdr; + u64 rsvd0; + u8 vector_idx; + u8 eqn; + u8 rsvd1; + u8 qtype; +}; + +struct erdma_cmdq_config_mtu_req { + u64 hdr; + u32 mtu; +}; + +/* create_cq cfg0 */ +#define ERDMA_CMD_CREATE_CQ_DEPTH_MASK GENMASK(31, 24) +#define ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK GENMASK(23, 20) +#define ERDMA_CMD_CREATE_CQ_CQN_MASK GENMASK(19, 0) + +/* create_cq cfg1 */ +#define ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK GENMASK(31, 16) +#define ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK BIT(15) +#define ERDMA_CMD_CREATE_CQ_EQN_MASK GENMASK(9, 0) + +struct erdma_cmdq_create_cq_req { + u64 hdr; + u32 cfg0; + u32 qbuf_addr_l; + u32 qbuf_addr_h; + u32 cfg1; + u64 cq_db_info_addr; + u32 first_page_offset; +}; + +/* regmr/deregmr cfg0 */ +#define ERDMA_CMD_MR_VALID_MASK BIT(31) +#define ERDMA_CMD_MR_KEY_MASK GENMASK(27, 20) +#define ERDMA_CMD_MR_MPT_IDX_MASK GENMASK(19, 0) + +/* regmr cfg1 */ +#define ERDMA_CMD_REGMR_PD_MASK GENMASK(31, 12) +#define ERDMA_CMD_REGMR_TYPE_MASK GENMASK(7, 6) +#define ERDMA_CMD_REGMR_RIGHT_MASK GENMASK(5, 2) +#define ERDMA_CMD_REGMR_ACC_MODE_MASK GENMASK(1, 0) + +/* regmr cfg2 */ +#define ERDMA_CMD_REGMR_PAGESIZE_MASK GENMASK(31, 27) +#define ERDMA_CMD_REGMR_MTT_TYPE_MASK GENMASK(21, 20) +#define ERDMA_CMD_REGMR_MTT_CNT_MASK GENMASK(19, 0) + +struct erdma_cmdq_reg_mr_req { + u64 hdr; + u32 cfg0; + u32 cfg1; + u64 start_va; + u32 size; + u32 cfg2; + u64 phy_addr[4]; +}; + +struct erdma_cmdq_dereg_mr_req { + u64 hdr; + u32 cfg; +}; + +/* modify qp cfg */ +#define ERDMA_CMD_MODIFY_QP_STATE_MASK GENMASK(31, 24) +#define ERDMA_CMD_MODIFY_QP_CC_MASK GENMASK(23, 20) +#define ERDMA_CMD_MODIFY_QP_QPN_MASK GENMASK(19, 0) + +struct erdma_cmdq_modify_qp_req { + u64 hdr; + u32 cfg; + u32 cookie; + __be32 dip; + __be32 sip; + __be16 sport; + __be16 dport; + u32 send_nxt; + u32 recv_nxt; +}; + +/* create qp cfg0 */ +#define ERDMA_CMD_CREATE_QP_SQ_DEPTH_MASK GENMASK(31, 20) +#define ERDMA_CMD_CREATE_QP_QPN_MASK GENMASK(19, 0) + +/* create qp cfg1 */ +#define ERDMA_CMD_CREATE_QP_RQ_DEPTH_MASK GENMASK(31, 20) +#define ERDMA_CMD_CREATE_QP_PD_MASK GENMASK(19, 0) + +/* create qp cqn_mtt_cfg */ +#define ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK GENMASK(31, 28) +#define ERDMA_CMD_CREATE_QP_CQN_MASK GENMASK(23, 0) + +/* create qp mtt_cfg */ +#define ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK GENMASK(31, 12) +#define ERDMA_CMD_CREATE_QP_MTT_CNT_MASK GENMASK(11, 1) +#define ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK BIT(0) + +#define ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK GENMASK_ULL(31, 0) + +struct erdma_cmdq_create_qp_req { + u64 hdr; + u32 cfg0; + u32 cfg1; + u32 sq_cqn_mtt_cfg; + u32 rq_cqn_mtt_cfg; + u64 sq_buf_addr; + u64 rq_buf_addr; + u32 sq_mtt_cfg; + u32 rq_mtt_cfg; + u64 sq_db_info_dma_addr; + u64 rq_db_info_dma_addr; +}; + +struct erdma_cmdq_destroy_qp_req { + u64 hdr; + u32 qpn; +}; + +/* cap qword 0 definition */ +#define ERDMA_CMD_DEV_CAP_MAX_CQE_MASK GENMASK_ULL(47, 40) +#define ERDMA_CMD_DEV_CAP_MAX_RECV_WR_MASK GENMASK_ULL(23, 16) +#define ERDMA_CMD_DEV_CAP_MAX_MR_SIZE_MASK GENMASK_ULL(7, 0) + +/* cap qword 1 definition */ +#define ERDMA_CMD_DEV_CAP_DMA_LOCAL_KEY_MASK GENMASK_ULL(63, 32) +#define ERDMA_CMD_DEV_CAP_DEFAULT_CC_MASK GENMASK_ULL(31, 28) +#define ERDMA_CMD_DEV_CAP_QBLOCK_MASK GENMASK_ULL(27, 16) +#define ERDMA_CMD_DEV_CAP_MAX_MW_MASK GENMASK_ULL(7, 0) + +#define ERDMA_NQP_PER_QBLOCK 1024 + +#define ERDMA_CMD_INFO0_FW_VER_MASK GENMASK_ULL(31, 0) + +/* CQE hdr */ +#define ERDMA_CQE_HDR_OWNER_MASK BIT(31) +#define ERDMA_CQE_HDR_OPCODE_MASK GENMASK(23, 16) +#define ERDMA_CQE_HDR_QTYPE_MASK GENMASK(15, 8) +#define ERDMA_CQE_HDR_SYNDROME_MASK GENMASK(7, 0) + +#define ERDMA_CQE_QTYPE_SQ 0 +#define ERDMA_CQE_QTYPE_RQ 1 +#define ERDMA_CQE_QTYPE_CMDQ 2 + +struct erdma_cqe { + __be32 hdr; + __be32 qe_idx; + __be32 qpn; + union { + __le32 imm_data; + __be32 inv_rkey; + }; + __be32 size; + __be32 rsvd[3]; +}; + +struct erdma_sge { + __aligned_le64 laddr; + __le32 length; + __le32 lkey; +}; + +/* Receive Queue Element */ +struct erdma_rqe { + __le16 qe_idx; + __le16 rsvd0; + __le32 qpn; + __le32 rsvd1; + __le32 rsvd2; + __le64 to; + __le32 length; + __le32 stag; +}; + +/* SQE */ +#define ERDMA_SQE_HDR_SGL_LEN_MASK GENMASK_ULL(63, 56) +#define ERDMA_SQE_HDR_WQEBB_CNT_MASK GENMASK_ULL(54, 52) +#define ERDMA_SQE_HDR_QPN_MASK GENMASK_ULL(51, 32) +#define ERDMA_SQE_HDR_OPCODE_MASK GENMASK_ULL(31, 27) +#define ERDMA_SQE_HDR_DWQE_MASK BIT_ULL(26) +#define ERDMA_SQE_HDR_INLINE_MASK BIT_ULL(25) +#define ERDMA_SQE_HDR_FENCE_MASK BIT_ULL(24) +#define ERDMA_SQE_HDR_SE_MASK BIT_ULL(23) +#define ERDMA_SQE_HDR_CE_MASK BIT_ULL(22) +#define ERDMA_SQE_HDR_WQEBB_INDEX_MASK GENMASK_ULL(15, 0) + +/* REG MR attrs */ +#define ERDMA_SQE_MR_MODE_MASK GENMASK(1, 0) +#define ERDMA_SQE_MR_ACCESS_MASK GENMASK(5, 2) +#define ERDMA_SQE_MR_MTT_TYPE_MASK GENMASK(7, 6) +#define ERDMA_SQE_MR_MTT_CNT_MASK GENMASK(31, 12) + +struct erdma_write_sqe { + __le64 hdr; + __be32 imm_data; + __le32 length; + + __le32 sink_stag; + __le32 sink_to_l; + __le32 sink_to_h; + + __le32 rsvd; + + struct erdma_sge sgl[0]; +}; + +struct erdma_send_sqe { + __le64 hdr; + union { + __be32 imm_data; + __le32 invalid_stag; + }; + + __le32 length; + struct erdma_sge sgl[0]; +}; + +struct erdma_readreq_sqe { + __le64 hdr; + __le32 invalid_stag; + __le32 length; + __le32 sink_stag; + __le32 sink_to_l; + __le32 sink_to_h; + __le32 rsvd; +}; + +struct erdma_reg_mr_sqe { + __le64 hdr; + __le64 addr; + __le32 length; + __le32 stag; + __le32 attrs; + __le32 rsvd; +}; + +/* EQ related. */ +#define ERDMA_DEFAULT_EQ_DEPTH 256 + +/* ceqe */ +#define ERDMA_CEQE_HDR_DB_MASK BIT_ULL(63) +#define ERDMA_CEQE_HDR_PI_MASK GENMASK_ULL(55, 32) +#define ERDMA_CEQE_HDR_O_MASK BIT_ULL(31) +#define ERDMA_CEQE_HDR_CQN_MASK GENMASK_ULL(19, 0) + +/* aeqe */ +#define ERDMA_AEQE_HDR_O_MASK BIT(31) +#define ERDMA_AEQE_HDR_TYPE_MASK GENMASK(23, 16) +#define ERDMA_AEQE_HDR_SUBTYPE_MASK GENMASK(7, 0) + +#define ERDMA_AE_TYPE_QP_FATAL_EVENT 0 +#define ERDMA_AE_TYPE_QP_ERQ_ERR_EVENT 1 +#define ERDMA_AE_TYPE_ACC_ERR_EVENT 2 +#define ERDMA_AE_TYPE_CQ_ERR 3 +#define ERDMA_AE_TYPE_OTHER_ERROR 4 + +struct erdma_aeqe { + __le32 hdr; + __le32 event_data0; + __le32 event_data1; + __le32 rsvd; +}; + +enum erdma_opcode { + ERDMA_OP_WRITE = 0, + ERDMA_OP_READ = 1, + ERDMA_OP_SEND = 2, + ERDMA_OP_SEND_WITH_IMM = 3, + + ERDMA_OP_RECEIVE = 4, + ERDMA_OP_RECV_IMM = 5, + ERDMA_OP_RECV_INV = 6, + + ERDMA_OP_RSVD0 = 7, + ERDMA_OP_RSVD1 = 8, + ERDMA_OP_WRITE_WITH_IMM = 9, + + ERDMA_OP_RSVD2 = 10, + ERDMA_OP_RSVD3 = 11, + + ERDMA_OP_RSP_SEND_IMM = 12, + ERDMA_OP_SEND_WITH_INV = 13, + + ERDMA_OP_REG_MR = 14, + ERDMA_OP_LOCAL_INV = 15, + ERDMA_OP_READ_WITH_INV = 16, + ERDMA_NUM_OPCODES = 17, + ERDMA_OP_INVALID = ERDMA_NUM_OPCODES + 1 +}; + +enum erdma_wc_status { + ERDMA_WC_SUCCESS = 0, + ERDMA_WC_GENERAL_ERR = 1, + ERDMA_WC_RECV_WQE_FORMAT_ERR = 2, + ERDMA_WC_RECV_STAG_INVALID_ERR = 3, + ERDMA_WC_RECV_ADDR_VIOLATION_ERR = 4, + ERDMA_WC_RECV_RIGHT_VIOLATION_ERR = 5, + ERDMA_WC_RECV_PDID_ERR = 6, + ERDMA_WC_RECV_WARRPING_ERR = 7, + ERDMA_WC_SEND_WQE_FORMAT_ERR = 8, + ERDMA_WC_SEND_WQE_ORD_EXCEED = 9, + ERDMA_WC_SEND_STAG_INVALID_ERR = 10, + ERDMA_WC_SEND_ADDR_VIOLATION_ERR = 11, + ERDMA_WC_SEND_RIGHT_VIOLATION_ERR = 12, + ERDMA_WC_SEND_PDID_ERR = 13, + ERDMA_WC_SEND_WARRPING_ERR = 14, + ERDMA_WC_FLUSH_ERR = 15, + ERDMA_WC_RETRY_EXC_ERR = 16, + ERDMA_NUM_WC_STATUS +}; + +enum erdma_vendor_err { + ERDMA_WC_VENDOR_NO_ERR = 0, + ERDMA_WC_VENDOR_INVALID_RQE = 1, + ERDMA_WC_VENDOR_RQE_INVALID_STAG = 2, + ERDMA_WC_VENDOR_RQE_ADDR_VIOLATION = 3, + ERDMA_WC_VENDOR_RQE_ACCESS_RIGHT_ERR = 4, + ERDMA_WC_VENDOR_RQE_INVALID_PD = 5, + ERDMA_WC_VENDOR_RQE_WRAP_ERR = 6, + ERDMA_WC_VENDOR_INVALID_SQE = 0x20, + ERDMA_WC_VENDOR_ZERO_ORD = 0x21, + ERDMA_WC_VENDOR_SQE_INVALID_STAG = 0x30, + ERDMA_WC_VENDOR_SQE_ADDR_VIOLATION = 0x31, + ERDMA_WC_VENDOR_SQE_ACCESS_ERR = 0x32, + ERDMA_WC_VENDOR_SQE_INVALID_PD = 0x33, + ERDMA_WC_VENDOR_SQE_WARP_ERR = 0x34 +}; + +#endif diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c new file mode 100644 index 000000000000..49778bb294ae --- /dev/null +++ b/drivers/infiniband/hw/erdma/erdma_main.c @@ -0,0 +1,605 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */ +/* Kai Shen <kaishen@linux.alibaba.com> */ +/* Copyright (c) 2020-2022, Alibaba Group. */ + +#include <linux/module.h> +#include <net/addrconf.h> +#include <rdma/erdma-abi.h> + +#include "erdma.h" +#include "erdma_cm.h" +#include "erdma_verbs.h" + +MODULE_AUTHOR("Cheng Xu <chengyou@linux.alibaba.com>"); +MODULE_DESCRIPTION("Alibaba elasticRDMA adapter driver"); +MODULE_LICENSE("Dual BSD/GPL"); + +static int erdma_netdev_event(struct notifier_block *nb, unsigned long event, + void *arg) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(arg); + struct erdma_dev *dev = container_of(nb, struct erdma_dev, netdev_nb); + + if (dev->netdev == NULL || dev->netdev != netdev) + goto done; + + switch (event) { + case NETDEV_UP: + dev->state = IB_PORT_ACTIVE; + erdma_port_event(dev, IB_EVENT_PORT_ACTIVE); + break; + case NETDEV_DOWN: + dev->state = IB_PORT_DOWN; + erdma_port_event(dev, IB_EVENT_PORT_ERR); + break; + case NETDEV_CHANGEMTU: + if (dev->mtu != netdev->mtu) { + erdma_set_mtu(dev, netdev->mtu); + dev->mtu = netdev->mtu; + } + break; + case NETDEV_REGISTER: + case NETDEV_UNREGISTER: + case NETDEV_CHANGEADDR: + case NETDEV_GOING_DOWN: + case NETDEV_CHANGE: + default: + break; + } + +done: + return NOTIFY_OK; +} + +static int erdma_enum_and_get_netdev(struct erdma_dev *dev) +{ + struct net_device *netdev; + int ret = -ENODEV; + + /* Already binded to a net_device, so we skip. */ + if (dev->netdev) + return 0; + + rtnl_lock(); + for_each_netdev(&init_net, netdev) { + /* + * In erdma, the paired netdev and ibdev should have the same + * MAC address. erdma can get the value from its PCIe bar + * registers. Since erdma can not get the paired netdev + * reference directly, we do a traverse here to get the paired + * netdev. + */ + if (ether_addr_equal_unaligned(netdev->perm_addr, + dev->attrs.peer_addr)) { + ret = ib_device_set_netdev(&dev->ibdev, netdev, 1); + if (ret) { + rtnl_unlock(); + ibdev_warn(&dev->ibdev, + "failed (%d) to link netdev", ret); + return ret; + } + + dev->netdev = netdev; + break; + } + } + + rtnl_unlock(); + + return ret; +} + +static int erdma_device_register(struct erdma_dev *dev) +{ + struct ib_device *ibdev = &dev->ibdev; + int ret; + + ret = erdma_enum_and_get_netdev(dev); + if (ret) + return ret; + + dev->mtu = dev->netdev->mtu; + addrconf_addr_eui48((u8 *)&ibdev->node_guid, dev->netdev->dev_addr); + + ret = ib_register_device(ibdev, "erdma_%d", &dev->pdev->dev); + if (ret) { + dev_err(&dev->pdev->dev, + "ib_register_device failed: ret = %d\n", ret); + return ret; + } + + dev->netdev_nb.notifier_call = erdma_netdev_event; + ret = register_netdevice_notifier(&dev->netdev_nb); + if (ret) { + ibdev_err(&dev->ibdev, "failed to register notifier.\n"); + ib_unregister_device(ibdev); + } + + return ret; +} + +static irqreturn_t erdma_comm_irq_handler(int irq, void *data) +{ + struct erdma_dev *dev = data; + + erdma_cmdq_completion_handler(&dev->cmdq); + erdma_aeq_event_handler(dev); + + return IRQ_HANDLED; +} + +static void erdma_dwqe_resource_init(struct erdma_dev *dev) +{ + int total_pages, type0, type1; + + dev->attrs.grp_num = erdma_reg_read32(dev, ERDMA_REGS_GRP_NUM_REG); + + if (dev->attrs.grp_num < 4) + dev->attrs.disable_dwqe = true; + else + dev->attrs.disable_dwqe = false; + + /* One page contains 4 goups. */ + total_pages = dev->attrs.grp_num * 4; + + if (dev->attrs.grp_num >= ERDMA_DWQE_MAX_GRP_CNT) { + dev->attrs.grp_num = ERDMA_DWQE_MAX_GRP_CNT; + type0 = ERDMA_DWQE_TYPE0_CNT; + type1 = ERDMA_DWQE_TYPE1_CNT / ERDMA_DWQE_TYPE1_CNT_PER_PAGE; + } else { + type1 = total_pages / 3; + type0 = total_pages - type1 - 1; + } + + dev->attrs.dwqe_pages = type0; + dev->attrs.dwqe_entries = type1 * ERDMA_DWQE_TYPE1_CNT_PER_PAGE; +} + +static int erdma_request_vectors(struct erdma_dev *dev) +{ + int expect_irq_num = min(num_possible_cpus() + 1, ERDMA_NUM_MSIX_VEC); + int ret; + + ret = pci_alloc_irq_vectors(dev->pdev, 1, expect_irq_num, PCI_IRQ_MSIX); + if (ret < 0) { + dev_err(&dev->pdev->dev, "request irq vectors failed(%d)\n", + ret); + return ret; + } + dev->attrs.irq_num = ret; + + return 0; +} + +static int erdma_comm_irq_init(struct erdma_dev *dev) +{ + snprintf(dev->comm_irq.name, ERDMA_IRQNAME_SIZE, "erdma-common@pci:%s", + pci_name(dev->pdev)); + dev->comm_irq.msix_vector = + pci_irq_vector(dev->pdev, ERDMA_MSIX_VECTOR_CMDQ); + + cpumask_set_cpu(cpumask_first(cpumask_of_pcibus(dev->pdev->bus)), + &dev->comm_irq.affinity_hint_mask); + irq_set_affinity_hint(dev->comm_irq.msix_vector, + &dev->comm_irq.affinity_hint_mask); + + return request_irq(dev->comm_irq.msix_vector, erdma_comm_irq_handler, 0, + dev->comm_irq.name, dev); +} + +static void erdma_comm_irq_uninit(struct erdma_dev *dev) +{ + irq_set_affinity_hint(dev->comm_irq.msix_vector, NULL); + free_irq(dev->comm_irq.msix_vector, dev); +} + +static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev) +{ + int ret; + + erdma_dwqe_resource_init(dev); + + ret = dma_set_mask_and_coherent(&pdev->dev, + DMA_BIT_MASK(ERDMA_PCI_WIDTH)); + if (ret) + return ret; + + dma_set_max_seg_size(&pdev->dev, UINT_MAX); + + return 0; +} + +static void erdma_device_uninit(struct erdma_dev *dev) +{ + u32 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_RESET_MASK, 1); + + erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl); +} + +static const struct pci_device_id erdma_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_ALIBABA, 0x107f) }, + {} +}; + +static int erdma_probe_dev(struct pci_dev *pdev) +{ + struct erdma_dev *dev; + int bars, err; + u32 version; + + err = pci_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "pci_enable_device failed(%d)\n", err); + return err; + } + + pci_set_master(pdev); + + dev = ib_alloc_device(erdma_dev, ibdev); + if (!dev) { + dev_err(&pdev->dev, "ib_alloc_device failed\n"); + err = -ENOMEM; + goto err_disable_device; + } + + pci_set_drvdata(pdev, dev); + dev->pdev = pdev; + dev->attrs.numa_node = dev_to_node(&pdev->dev); + + bars = pci_select_bars(pdev, IORESOURCE_MEM); + err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME); + if (bars != ERDMA_BAR_MASK || err) { + err = err ? err : -EINVAL; + goto err_ib_device_release; + } + + dev->func_bar_addr = pci_resource_start(pdev, ERDMA_FUNC_BAR); + dev->func_bar_len = pci_resource_len(pdev, ERDMA_FUNC_BAR); + + dev->func_bar = + devm_ioremap(&pdev->dev, dev->func_bar_addr, dev->func_bar_len); + if (!dev->func_bar) { + dev_err(&pdev->dev, "devm_ioremap failed.\n"); + err = -EFAULT; + goto err_release_bars; + } + + version = erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG); + if (version == 0) { + /* we knows that it is a non-functional function. */ + err = -ENODEV; + goto err_iounmap_func_bar; + } + + err = erdma_device_init(dev, pdev); + if (err) + goto err_iounmap_func_bar; + + err = erdma_request_vectors(dev); + if (err) + goto err_iounmap_func_bar; + + err = erdma_comm_irq_init(dev); + if (err) + goto err_free_vectors; + + err = erdma_aeq_init(dev); + if (err) + goto err_uninit_comm_irq; + + err = erdma_cmdq_init(dev); + if (err) + goto err_uninit_aeq; + + err = erdma_ceqs_init(dev); + if (err) + goto err_uninit_cmdq; + + erdma_finish_cmdq_init(dev); + + return 0; + +err_uninit_cmdq: + erdma_device_uninit(dev); + erdma_cmdq_destroy(dev); + +err_uninit_aeq: + erdma_aeq_destroy(dev); + +err_uninit_comm_irq: + erdma_comm_irq_uninit(dev); + +err_free_vectors: + pci_free_irq_vectors(dev->pdev); + +err_iounmap_func_bar: + devm_iounmap(&pdev->dev, dev->func_bar); + +err_release_bars: + pci_release_selected_regions(pdev, bars); + +err_ib_device_release: + ib_dealloc_device(&dev->ibdev); + +err_disable_device: + pci_disable_device(pdev); + + return err; +} + +static void erdma_remove_dev(struct pci_dev *pdev) +{ + struct erdma_dev *dev = pci_get_drvdata(pdev); + + erdma_ceqs_uninit(dev); + + erdma_device_uninit(dev); + + erdma_cmdq_destroy(dev); + erdma_aeq_destroy(dev); + erdma_comm_irq_uninit(dev); + pci_free_irq_vectors(dev->pdev); + + devm_iounmap(&pdev->dev, dev->func_bar); + pci_release_selected_regions(pdev, ERDMA_BAR_MASK); + + ib_dealloc_device(&dev->ibdev); + + pci_disable_device(pdev); +} + +#define ERDMA_GET_CAP(name, cap) FIELD_GET(ERDMA_CMD_DEV_CAP_##name##_MASK, cap) + +static int erdma_dev_attrs_init(struct erdma_dev *dev) +{ + int err; + u64 req_hdr, cap0, cap1; + + erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_QUERY_DEVICE); + + err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0, + &cap1); + if (err) + return err; + + dev->attrs.max_cqe = 1 << ERDMA_GET_CAP(MAX_CQE, cap0); + dev->attrs.max_mr_size = 1ULL << ERDMA_GET_CAP(MAX_MR_SIZE, cap0); + dev->attrs.max_mw = 1 << ERDMA_GET_CAP(MAX_MW, cap1); + dev->attrs.max_recv_wr = 1 << ERDMA_GET_CAP(MAX_RECV_WR, cap0); + dev->attrs.local_dma_key = ERDMA_GET_CAP(DMA_LOCAL_KEY, cap1); + dev->attrs.cc = ERDMA_GET_CAP(DEFAULT_CC, cap1); + dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1); + dev->attrs.max_mr = dev->attrs.max_qp << 1; + dev->attrs.max_cq = dev->attrs.max_qp << 1; + + dev->attrs.max_send_wr = ERDMA_MAX_SEND_WR; + dev->attrs.max_ord = ERDMA_MAX_ORD; + dev->attrs.max_ird = ERDMA_MAX_IRD; + dev->attrs.max_send_sge = ERDMA_MAX_SEND_SGE; + dev->attrs.max_recv_sge = ERDMA_MAX_RECV_SGE; + dev->attrs.max_sge_rd = ERDMA_MAX_SGE_RD; + dev->attrs.max_pd = ERDMA_MAX_PD; + + dev->res_cb[ERDMA_RES_TYPE_PD].max_cap = ERDMA_MAX_PD; + dev->res_cb[ERDMA_RES_TYPE_STAG_IDX].max_cap = dev->attrs.max_mr; + + erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_COMMON, + CMDQ_OPCODE_QUERY_FW_INFO); + + err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0, + &cap1); + if (!err) + dev->attrs.fw_version = + FIELD_GET(ERDMA_CMD_INFO0_FW_VER_MASK, cap0); + + return err; +} + +static int erdma_res_cb_init(struct erdma_dev *dev) +{ + int i, j; + + for (i = 0; i < ERDMA_RES_CNT; i++) { + dev->res_cb[i].next_alloc_idx = 1; + spin_lock_init(&dev->res_cb[i].lock); + dev->res_cb[i].bitmap = + bitmap_zalloc(dev->res_cb[i].max_cap, GFP_KERNEL); + if (!dev->res_cb[i].bitmap) + goto err; + } + + return 0; + +err: + for (j = 0; j < i; j++) + bitmap_free(dev->res_cb[j].bitmap); + + return -ENOMEM; +} + +static void erdma_res_cb_free(struct erdma_dev *dev) +{ + int i; + + for (i = 0; i < ERDMA_RES_CNT; i++) + bitmap_free(dev->res_cb[i].bitmap); +} + +static const struct ib_device_ops erdma_device_ops = { + .owner = THIS_MODULE, + .driver_id = RDMA_DRIVER_ERDMA, + .uverbs_abi_ver = ERDMA_ABI_VERSION, + + .alloc_mr = erdma_ib_alloc_mr, + .alloc_pd = erdma_alloc_pd, + .alloc_ucontext = erdma_alloc_ucontext, + .create_cq = erdma_create_cq, + .create_qp = erdma_create_qp, + .dealloc_pd = erdma_dealloc_pd, + .dealloc_ucontext = erdma_dealloc_ucontext, + .dereg_mr = erdma_dereg_mr, + .destroy_cq = erdma_destroy_cq, + .destroy_qp = erdma_destroy_qp, + .get_dma_mr = erdma_get_dma_mr, + .get_port_immutable = erdma_get_port_immutable, + .iw_accept = erdma_accept, + .iw_add_ref = erdma_qp_get_ref, + .iw_connect = erdma_connect, + .iw_create_listen = erdma_create_listen, + .iw_destroy_listen = erdma_destroy_listen, + .iw_get_qp = erdma_get_ibqp, + .iw_reject = erdma_reject, + .iw_rem_ref = erdma_qp_put_ref, + .map_mr_sg = erdma_map_mr_sg, + .mmap = erdma_mmap, + .mmap_free = erdma_mmap_free, + .modify_qp = erdma_modify_qp, + .post_recv = erdma_post_recv, + .post_send = erdma_post_send, + .poll_cq = erdma_poll_cq, + .query_device = erdma_query_device, + .query_gid = erdma_query_gid, + .query_port = erdma_query_port, + .query_qp = erdma_query_qp, + .req_notify_cq = erdma_req_notify_cq, + .reg_user_mr = erdma_reg_user_mr, + + INIT_RDMA_OBJ_SIZE(ib_cq, erdma_cq, ibcq), + INIT_RDMA_OBJ_SIZE(ib_pd, erdma_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_ucontext, erdma_ucontext, ibucontext), + INIT_RDMA_OBJ_SIZE(ib_qp, erdma_qp, ibqp), +}; + +static int erdma_ib_device_add(struct pci_dev *pdev) +{ + struct erdma_dev *dev = pci_get_drvdata(pdev); + struct ib_device *ibdev = &dev->ibdev; + u64 mac; + int ret; + + ret = erdma_dev_attrs_init(dev); + if (ret) + return ret; + + ibdev->node_type = RDMA_NODE_RNIC; + memcpy(ibdev->node_desc, ERDMA_NODE_DESC, sizeof(ERDMA_NODE_DESC)); + + /* + * Current model (one-to-one device association): + * One ERDMA device per net_device or, equivalently, + * per physical port. + */ + ibdev->phys_port_cnt = 1; + ibdev->num_comp_vectors = dev->attrs.irq_num - 1; + + ib_set_device_ops(ibdev, &erdma_device_ops); + + INIT_LIST_HEAD(&dev->cep_list); + + spin_lock_init(&dev->lock); + xa_init_flags(&dev->qp_xa, XA_FLAGS_ALLOC1); + xa_init_flags(&dev->cq_xa, XA_FLAGS_ALLOC1); + dev->next_alloc_cqn = 1; + dev->next_alloc_qpn = 1; + + ret = erdma_res_cb_init(dev); + if (ret) + return ret; + + spin_lock_init(&dev->db_bitmap_lock); + bitmap_zero(dev->sdb_page, ERDMA_DWQE_TYPE0_CNT); + bitmap_zero(dev->sdb_entry, ERDMA_DWQE_TYPE1_CNT); + + atomic_set(&dev->num_ctx, 0); + + mac = erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_L_REG); + mac |= (u64)erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_H_REG) << 32; + + u64_to_ether_addr(mac, dev->attrs.peer_addr); + + ret = erdma_device_register(dev); + if (ret) + goto err_out; + + return 0; + +err_out: + xa_destroy(&dev->qp_xa); + xa_destroy(&dev->cq_xa); + + erdma_res_cb_free(dev); + + return ret; +} + +static void erdma_ib_device_remove(struct pci_dev *pdev) +{ + struct erdma_dev *dev = pci_get_drvdata(pdev); + + unregister_netdevice_notifier(&dev->netdev_nb); + ib_unregister_device(&dev->ibdev); + + erdma_res_cb_free(dev); + xa_destroy(&dev->qp_xa); + xa_destroy(&dev->cq_xa); +} + +static int erdma_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + int ret; + + ret = erdma_probe_dev(pdev); + if (ret) + return ret; + + ret = erdma_ib_device_add(pdev); + if (ret) { + erdma_remove_dev(pdev); + return ret; + } + + return 0; +} + +static void erdma_remove(struct pci_dev *pdev) +{ + erdma_ib_device_remove(pdev); + erdma_remove_dev(pdev); +} + +static struct pci_driver erdma_pci_driver = { + .name = DRV_MODULE_NAME, + .id_table = erdma_pci_tbl, + .probe = erdma_probe, + .remove = erdma_remove +}; + +MODULE_DEVICE_TABLE(pci, erdma_pci_tbl); + +static __init int erdma_init_module(void) +{ + int ret; + + ret = erdma_cm_init(); + if (ret) + return ret; + + ret = pci_register_driver(&erdma_pci_driver); + if (ret) + erdma_cm_exit(); + + return ret; +} + +static void __exit erdma_exit_module(void) +{ + pci_unregister_driver(&erdma_pci_driver); + + erdma_cm_exit(); +} + +module_init(erdma_init_module); +module_exit(erdma_exit_module); diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c new file mode 100644 index 000000000000..5fe1a339a435 --- /dev/null +++ b/drivers/infiniband/hw/erdma/erdma_qp.c @@ -0,0 +1,555 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */ +/* Kai Shen <kaishen@linux.alibaba.com> */ +/* Copyright (c) 2020-2021, Alibaba Group */ +/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ +/* Copyright (c) 2008-2019, IBM Corporation */ + +#include "erdma_cm.h" +#include "erdma_verbs.h" + +void erdma_qp_llp_close(struct erdma_qp *qp) +{ + struct erdma_qp_attrs qp_attrs; + + down_write(&qp->state_lock); + + switch (qp->attrs.state) { + case ERDMA_QP_STATE_RTS: + case ERDMA_QP_STATE_RTR: + case ERDMA_QP_STATE_IDLE: + case ERDMA_QP_STATE_TERMINATE: + qp_attrs.state = ERDMA_QP_STATE_CLOSING; + erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE); + break; + case ERDMA_QP_STATE_CLOSING: + qp->attrs.state = ERDMA_QP_STATE_IDLE; + break; + default: + break; + } + + if (qp->cep) { + erdma_cep_put(qp->cep); + qp->cep = NULL; + } + + up_write(&qp->state_lock); +} + +struct ib_qp *erdma_get_ibqp(struct ib_device *ibdev, int id) +{ + struct erdma_qp *qp = find_qp_by_qpn(to_edev(ibdev), id); + + if (qp) + return &qp->ibqp; + + return NULL; +} + +static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp, + struct erdma_qp_attrs *attrs, + enum erdma_qp_attr_mask mask) +{ + int ret; + struct erdma_dev *dev = qp->dev; + struct erdma_cmdq_modify_qp_req req; + struct tcp_sock *tp; + struct erdma_cep *cep = qp->cep; + struct sockaddr_storage local_addr, remote_addr; + + if (!(mask & ERDMA_QP_ATTR_LLP_HANDLE)) + return -EINVAL; + + if (!(mask & ERDMA_QP_ATTR_MPA)) + return -EINVAL; + + ret = getname_local(cep->sock, &local_addr); + if (ret < 0) + return ret; + + ret = getname_peer(cep->sock, &remote_addr); + if (ret < 0) + return ret; + + qp->attrs.state = ERDMA_QP_STATE_RTS; + + tp = tcp_sk(qp->cep->sock->sk); + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_MODIFY_QP); + + req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, qp->attrs.state) | + FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, qp->attrs.cc) | + FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp)); + + req.cookie = be32_to_cpu(qp->cep->mpa.ext_data.cookie); + req.dip = to_sockaddr_in(remote_addr).sin_addr.s_addr; + req.sip = to_sockaddr_in(local_addr).sin_addr.s_addr; + req.dport = to_sockaddr_in(remote_addr).sin_port; + req.sport = to_sockaddr_in(local_addr).sin_port; + + req.send_nxt = tp->snd_nxt; + /* rsvd tcp seq for mpa-rsp in server. */ + if (qp->attrs.qp_type == ERDMA_QP_PASSIVE) + req.send_nxt += MPA_DEFAULT_HDR_LEN + qp->attrs.pd_len; + req.recv_nxt = tp->rcv_nxt; + + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); +} + +static int erdma_modify_qp_state_to_stop(struct erdma_qp *qp, + struct erdma_qp_attrs *attrs, + enum erdma_qp_attr_mask mask) +{ + struct erdma_dev *dev = qp->dev; + struct erdma_cmdq_modify_qp_req req; + + qp->attrs.state = attrs->state; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_MODIFY_QP); + + req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, attrs->state) | + FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp)); + + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); +} + +int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, + enum erdma_qp_attr_mask mask) +{ + int drop_conn, ret = 0; + + if (!mask) + return 0; + + if (!(mask & ERDMA_QP_ATTR_STATE)) + return 0; + + switch (qp->attrs.state) { + case ERDMA_QP_STATE_IDLE: + case ERDMA_QP_STATE_RTR: + if (attrs->state == ERDMA_QP_STATE_RTS) { + ret = erdma_modify_qp_state_to_rts(qp, attrs, mask); + } else if (attrs->state == ERDMA_QP_STATE_ERROR) { + qp->attrs.state = ERDMA_QP_STATE_ERROR; + if (qp->cep) { + erdma_cep_put(qp->cep); + qp->cep = NULL; + } + ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); + } + break; + case ERDMA_QP_STATE_RTS: + drop_conn = 0; + + if (attrs->state == ERDMA_QP_STATE_CLOSING) { + ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); + drop_conn = 1; + } else if (attrs->state == ERDMA_QP_STATE_TERMINATE) { + qp->attrs.state = ERDMA_QP_STATE_TERMINATE; + ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); + drop_conn = 1; + } else if (attrs->state == ERDMA_QP_STATE_ERROR) { + ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); + qp->attrs.state = ERDMA_QP_STATE_ERROR; + drop_conn = 1; + } + + if (drop_conn) + erdma_qp_cm_drop(qp); + + break; + case ERDMA_QP_STATE_TERMINATE: + if (attrs->state == ERDMA_QP_STATE_ERROR) + qp->attrs.state = ERDMA_QP_STATE_ERROR; + break; + case ERDMA_QP_STATE_CLOSING: + if (attrs->state == ERDMA_QP_STATE_IDLE) { + qp->attrs.state = ERDMA_QP_STATE_IDLE; + } else if (attrs->state == ERDMA_QP_STATE_ERROR) { + ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); + qp->attrs.state = ERDMA_QP_STATE_ERROR; + } else if (attrs->state != ERDMA_QP_STATE_CLOSING) { + return -ECONNABORTED; + } + break; + default: + break; + } + + return ret; +} + +static void erdma_qp_safe_free(struct kref *ref) +{ + struct erdma_qp *qp = container_of(ref, struct erdma_qp, ref); + + complete(&qp->safe_free); +} + +void erdma_qp_put(struct erdma_qp *qp) +{ + WARN_ON(kref_read(&qp->ref) < 1); + kref_put(&qp->ref, erdma_qp_safe_free); +} + +void erdma_qp_get(struct erdma_qp *qp) +{ + kref_get(&qp->ref); +} + +static int fill_inline_data(struct erdma_qp *qp, + const struct ib_send_wr *send_wr, u16 wqe_idx, + u32 sgl_offset, __le32 *length_field) +{ + u32 remain_size, copy_size, data_off, bytes = 0; + char *data; + int i = 0; + + wqe_idx += (sgl_offset >> SQEBB_SHIFT); + sgl_offset &= (SQEBB_SIZE - 1); + data = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx, qp->attrs.sq_size, + SQEBB_SHIFT); + + while (i < send_wr->num_sge) { + bytes += send_wr->sg_list[i].length; + if (bytes > (int)ERDMA_MAX_INLINE) + return -EINVAL; + + remain_size = send_wr->sg_list[i].length; + data_off = 0; + + while (1) { + copy_size = min(remain_size, SQEBB_SIZE - sgl_offset); + + memcpy(data + sgl_offset, + (void *)(uintptr_t)send_wr->sg_list[i].addr + + data_off, + copy_size); + remain_size -= copy_size; + data_off += copy_size; + sgl_offset += copy_size; + wqe_idx += (sgl_offset >> SQEBB_SHIFT); + sgl_offset &= (SQEBB_SIZE - 1); + + data = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx, + qp->attrs.sq_size, SQEBB_SHIFT); + if (!remain_size) + break; + } + + i++; + } + *length_field = cpu_to_le32(bytes); + + return bytes; +} + +static int fill_sgl(struct erdma_qp *qp, const struct ib_send_wr *send_wr, + u16 wqe_idx, u32 sgl_offset, __le32 *length_field) +{ + int i = 0; + u32 bytes = 0; + char *sgl; + + if (send_wr->num_sge > qp->dev->attrs.max_send_sge) + return -EINVAL; + + if (sgl_offset & 0xF) + return -EINVAL; + + while (i < send_wr->num_sge) { + wqe_idx += (sgl_offset >> SQEBB_SHIFT); + sgl_offset &= (SQEBB_SIZE - 1); + sgl = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx, + qp->attrs.sq_size, SQEBB_SHIFT); + + bytes += send_wr->sg_list[i].length; + memcpy(sgl + sgl_offset, &send_wr->sg_list[i], + sizeof(struct ib_sge)); + + sgl_offset += sizeof(struct ib_sge); + i++; + } + + *length_field = cpu_to_le32(bytes); + return 0; +} + +static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, + const struct ib_send_wr *send_wr) +{ + u32 wqe_size, wqebb_cnt, hw_op, flags, sgl_offset; + u32 idx = *pi & (qp->attrs.sq_size - 1); + enum ib_wr_opcode op = send_wr->opcode; + struct erdma_readreq_sqe *read_sqe; + struct erdma_reg_mr_sqe *regmr_sge; + struct erdma_write_sqe *write_sqe; + struct erdma_send_sqe *send_sqe; + struct ib_rdma_wr *rdma_wr; + struct erdma_mr *mr; + __le32 *length_field; + u64 wqe_hdr, *entry; + struct ib_sge *sge; + u32 attrs; + int ret; + + entry = get_queue_entry(qp->kern_qp.sq_buf, idx, qp->attrs.sq_size, + SQEBB_SHIFT); + + /* Clear the SQE header section. */ + *entry = 0; + + qp->kern_qp.swr_tbl[idx] = send_wr->wr_id; + flags = send_wr->send_flags; + wqe_hdr = FIELD_PREP( + ERDMA_SQE_HDR_CE_MASK, + ((flags & IB_SEND_SIGNALED) || qp->kern_qp.sig_all) ? 1 : 0); + wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SE_MASK, + flags & IB_SEND_SOLICITED ? 1 : 0); + wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_FENCE_MASK, + flags & IB_SEND_FENCE ? 1 : 0); + wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_INLINE_MASK, + flags & IB_SEND_INLINE ? 1 : 0); + wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, QP_ID(qp)); + + switch (op) { + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + hw_op = ERDMA_OP_WRITE; + if (op == IB_WR_RDMA_WRITE_WITH_IMM) + hw_op = ERDMA_OP_WRITE_WITH_IMM; + wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op); + rdma_wr = container_of(send_wr, struct ib_rdma_wr, wr); + write_sqe = (struct erdma_write_sqe *)entry; + + write_sqe->imm_data = send_wr->ex.imm_data; + write_sqe->sink_stag = cpu_to_le32(rdma_wr->rkey); + write_sqe->sink_to_h = + cpu_to_le32(upper_32_bits(rdma_wr->remote_addr)); + write_sqe->sink_to_l = + cpu_to_le32(lower_32_bits(rdma_wr->remote_addr)); + + length_field = &write_sqe->length; + wqe_size = sizeof(struct erdma_write_sqe); + sgl_offset = wqe_size; + break; + case IB_WR_RDMA_READ: + case IB_WR_RDMA_READ_WITH_INV: + read_sqe = (struct erdma_readreq_sqe *)entry; + if (unlikely(send_wr->num_sge != 1)) + return -EINVAL; + hw_op = ERDMA_OP_READ; + if (op == IB_WR_RDMA_READ_WITH_INV) { + hw_op = ERDMA_OP_READ_WITH_INV; + read_sqe->invalid_stag = + cpu_to_le32(send_wr->ex.invalidate_rkey); + } + + wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op); + rdma_wr = container_of(send_wr, struct ib_rdma_wr, wr); + read_sqe->length = cpu_to_le32(send_wr->sg_list[0].length); + read_sqe->sink_stag = cpu_to_le32(send_wr->sg_list[0].lkey); + read_sqe->sink_to_l = + cpu_to_le32(lower_32_bits(send_wr->sg_list[0].addr)); + read_sqe->sink_to_h = + cpu_to_le32(upper_32_bits(send_wr->sg_list[0].addr)); + + sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1, + qp->attrs.sq_size, SQEBB_SHIFT); + sge->addr = rdma_wr->remote_addr; + sge->lkey = rdma_wr->rkey; + sge->length = send_wr->sg_list[0].length; + wqe_size = sizeof(struct erdma_readreq_sqe) + + send_wr->num_sge * sizeof(struct ib_sge); + + goto out; + case IB_WR_SEND: + case IB_WR_SEND_WITH_IMM: + case IB_WR_SEND_WITH_INV: + send_sqe = (struct erdma_send_sqe *)entry; + hw_op = ERDMA_OP_SEND; + if (op == IB_WR_SEND_WITH_IMM) { + hw_op = ERDMA_OP_SEND_WITH_IMM; + send_sqe->imm_data = send_wr->ex.imm_data; + } else if (op == IB_WR_SEND_WITH_INV) { + hw_op = ERDMA_OP_SEND_WITH_INV; + send_sqe->invalid_stag = + cpu_to_le32(send_wr->ex.invalidate_rkey); + } + wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op); + length_field = &send_sqe->length; + wqe_size = sizeof(struct erdma_send_sqe); + sgl_offset = wqe_size; + + break; + case IB_WR_REG_MR: + wqe_hdr |= + FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, ERDMA_OP_REG_MR); + regmr_sge = (struct erdma_reg_mr_sqe *)entry; + mr = to_emr(reg_wr(send_wr)->mr); + + mr->access = ERDMA_MR_ACC_LR | + to_erdma_access_flags(reg_wr(send_wr)->access); + regmr_sge->addr = cpu_to_le64(mr->ibmr.iova); + regmr_sge->length = cpu_to_le32(mr->ibmr.length); + regmr_sge->stag = cpu_to_le32(reg_wr(send_wr)->key); + attrs = FIELD_PREP(ERDMA_SQE_MR_MODE_MASK, 0) | + FIELD_PREP(ERDMA_SQE_MR_ACCESS_MASK, mr->access) | + FIELD_PREP(ERDMA_SQE_MR_MTT_CNT_MASK, + mr->mem.mtt_nents); + + if (mr->mem.mtt_nents < ERDMA_MAX_INLINE_MTT_ENTRIES) { + attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 0); + /* Copy SGLs to SQE content to accelerate */ + memcpy(get_queue_entry(qp->kern_qp.sq_buf, idx + 1, + qp->attrs.sq_size, SQEBB_SHIFT), + mr->mem.mtt_buf, MTT_SIZE(mr->mem.mtt_nents)); + wqe_size = sizeof(struct erdma_reg_mr_sqe) + + MTT_SIZE(mr->mem.mtt_nents); + } else { + attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 1); + wqe_size = sizeof(struct erdma_reg_mr_sqe); + } + + regmr_sge->attrs = cpu_to_le32(attrs); + goto out; + case IB_WR_LOCAL_INV: + wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, + ERDMA_OP_LOCAL_INV); + regmr_sge = (struct erdma_reg_mr_sqe *)entry; + regmr_sge->stag = cpu_to_le32(send_wr->ex.invalidate_rkey); + wqe_size = sizeof(struct erdma_reg_mr_sqe); + goto out; + default: + return -EOPNOTSUPP; + } + + if (flags & IB_SEND_INLINE) { + ret = fill_inline_data(qp, send_wr, idx, sgl_offset, + length_field); + if (ret < 0) + return -EINVAL; + wqe_size += ret; + wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK, ret); + } else { + ret = fill_sgl(qp, send_wr, idx, sgl_offset, length_field); + if (ret) + return -EINVAL; + wqe_size += send_wr->num_sge * sizeof(struct ib_sge); + wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK, + send_wr->num_sge); + } + +out: + wqebb_cnt = SQEBB_COUNT(wqe_size); + wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_CNT_MASK, wqebb_cnt - 1); + *pi += wqebb_cnt; + wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, *pi); + + *entry = wqe_hdr; + + return 0; +} + +static void kick_sq_db(struct erdma_qp *qp, u16 pi) +{ + u64 db_data = FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, QP_ID(qp)) | + FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, pi); + + *(u64 *)qp->kern_qp.sq_db_info = db_data; + writeq(db_data, qp->kern_qp.hw_sq_db); +} + +int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr, + const struct ib_send_wr **bad_send_wr) +{ + struct erdma_qp *qp = to_eqp(ibqp); + int ret = 0; + const struct ib_send_wr *wr = send_wr; + unsigned long flags; + u16 sq_pi; + + if (!send_wr) + return -EINVAL; + + spin_lock_irqsave(&qp->lock, flags); + sq_pi = qp->kern_qp.sq_pi; + + while (wr) { + if ((u16)(sq_pi - qp->kern_qp.sq_ci) >= qp->attrs.sq_size) { + ret = -ENOMEM; + *bad_send_wr = send_wr; + break; + } + + ret = erdma_push_one_sqe(qp, &sq_pi, wr); + if (ret) { + *bad_send_wr = wr; + break; + } + qp->kern_qp.sq_pi = sq_pi; + kick_sq_db(qp, sq_pi); + + wr = wr->next; + } + spin_unlock_irqrestore(&qp->lock, flags); + + return ret; +} + +static int erdma_post_recv_one(struct erdma_qp *qp, + const struct ib_recv_wr *recv_wr) +{ + struct erdma_rqe *rqe = + get_queue_entry(qp->kern_qp.rq_buf, qp->kern_qp.rq_pi, + qp->attrs.rq_size, RQE_SHIFT); + + rqe->qe_idx = cpu_to_le16(qp->kern_qp.rq_pi + 1); + rqe->qpn = cpu_to_le32(QP_ID(qp)); + + if (recv_wr->num_sge == 0) { + rqe->length = 0; + } else if (recv_wr->num_sge == 1) { + rqe->stag = cpu_to_le32(recv_wr->sg_list[0].lkey); + rqe->to = cpu_to_le64(recv_wr->sg_list[0].addr); + rqe->length = cpu_to_le32(recv_wr->sg_list[0].length); + } else { + return -EINVAL; + } + + *(u64 *)qp->kern_qp.rq_db_info = *(u64 *)rqe; + writeq(*(u64 *)rqe, qp->kern_qp.hw_rq_db); + + qp->kern_qp.rwr_tbl[qp->kern_qp.rq_pi & (qp->attrs.rq_size - 1)] = + recv_wr->wr_id; + qp->kern_qp.rq_pi++; + + return 0; +} + +int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr) +{ + const struct ib_recv_wr *wr = recv_wr; + struct erdma_qp *qp = to_eqp(ibqp); + unsigned long flags; + int ret; + + spin_lock_irqsave(&qp->lock, flags); + + while (wr) { + ret = erdma_post_recv_one(qp, wr); + if (ret) { + *bad_recv_wr = wr; + break; + } + wr = wr->next; + } + + spin_unlock_irqrestore(&qp->lock, flags); + return ret; +} diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c new file mode 100644 index 000000000000..62be98e2b941 --- /dev/null +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -0,0 +1,1459 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */ +/* Kai Shen <kaishen@linux.alibaba.com> */ +/* Copyright (c) 2020-2022, Alibaba Group. */ + +/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ +/* Copyright (c) 2008-2019, IBM Corporation */ + +/* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. */ + +#include <linux/vmalloc.h> +#include <net/addrconf.h> +#include <rdma/erdma-abi.h> +#include <rdma/ib_umem.h> +#include <rdma/uverbs_ioctl.h> + +#include "erdma.h" +#include "erdma_cm.h" +#include "erdma_verbs.h" + +static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp) +{ + struct erdma_cmdq_create_qp_req req; + struct erdma_pd *pd = to_epd(qp->ibqp.pd); + struct erdma_uqp *user_qp; + u64 resp0, resp1; + int err; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_CREATE_QP); + + req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_QP_SQ_DEPTH_MASK, + ilog2(qp->attrs.sq_size)) | + FIELD_PREP(ERDMA_CMD_CREATE_QP_QPN_MASK, QP_ID(qp)); + req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_QP_RQ_DEPTH_MASK, + ilog2(qp->attrs.rq_size)) | + FIELD_PREP(ERDMA_CMD_CREATE_QP_PD_MASK, pd->pdn); + + if (rdma_is_kernel_res(&qp->ibqp.res)) { + u32 pgsz_range = ilog2(SZ_1M) - PAGE_SHIFT; + + req.sq_cqn_mtt_cfg = + FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, + pgsz_range) | + FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn); + req.rq_cqn_mtt_cfg = + FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, + pgsz_range) | + FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn); + + req.sq_mtt_cfg = + FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK, 0) | + FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, 1) | + FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK, + ERDMA_MR_INLINE_MTT); + req.rq_mtt_cfg = req.sq_mtt_cfg; + + req.rq_buf_addr = qp->kern_qp.rq_buf_dma_addr; + req.sq_buf_addr = qp->kern_qp.sq_buf_dma_addr; + req.sq_db_info_dma_addr = qp->kern_qp.sq_buf_dma_addr + + (qp->attrs.sq_size << SQEBB_SHIFT); + req.rq_db_info_dma_addr = qp->kern_qp.rq_buf_dma_addr + + (qp->attrs.rq_size << RQE_SHIFT); + } else { + user_qp = &qp->user_qp; + req.sq_cqn_mtt_cfg = FIELD_PREP( + ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, + ilog2(user_qp->sq_mtt.page_size) - PAGE_SHIFT); + req.sq_cqn_mtt_cfg |= + FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn); + + req.rq_cqn_mtt_cfg = FIELD_PREP( + ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, + ilog2(user_qp->rq_mtt.page_size) - PAGE_SHIFT); + req.rq_cqn_mtt_cfg |= + FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn); + + req.sq_mtt_cfg = user_qp->sq_mtt.page_offset; + req.sq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, + user_qp->sq_mtt.mtt_nents) | + FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK, + user_qp->sq_mtt.mtt_type); + + req.rq_mtt_cfg = user_qp->rq_mtt.page_offset; + req.rq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, + user_qp->rq_mtt.mtt_nents) | + FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK, + user_qp->rq_mtt.mtt_type); + + req.sq_buf_addr = user_qp->sq_mtt.mtt_entry[0]; + req.rq_buf_addr = user_qp->rq_mtt.mtt_entry[0]; + + req.sq_db_info_dma_addr = user_qp->sq_db_info_dma_addr; + req.rq_db_info_dma_addr = user_qp->rq_db_info_dma_addr; + } + + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0, + &resp1); + if (!err) + qp->attrs.cookie = + FIELD_GET(ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK, resp0); + + return err; +} + +static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) +{ + struct erdma_cmdq_reg_mr_req req; + struct erdma_pd *pd = to_epd(mr->ibmr.pd); + u64 *phy_addr; + int i; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_REG_MR); + + req.cfg0 = FIELD_PREP(ERDMA_CMD_MR_VALID_MASK, mr->valid) | + FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, mr->ibmr.lkey & 0xFF) | + FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, mr->ibmr.lkey >> 8); + req.cfg1 = FIELD_PREP(ERDMA_CMD_REGMR_PD_MASK, pd->pdn) | + FIELD_PREP(ERDMA_CMD_REGMR_TYPE_MASK, mr->type) | + FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access) | + FIELD_PREP(ERDMA_CMD_REGMR_ACC_MODE_MASK, 0); + req.cfg2 = FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK, + ilog2(mr->mem.page_size)) | + FIELD_PREP(ERDMA_CMD_REGMR_MTT_TYPE_MASK, mr->mem.mtt_type) | + FIELD_PREP(ERDMA_CMD_REGMR_MTT_CNT_MASK, mr->mem.page_cnt); + + if (mr->type == ERDMA_MR_TYPE_DMA) + goto post_cmd; + + if (mr->type == ERDMA_MR_TYPE_NORMAL) { + req.start_va = mr->mem.va; + req.size = mr->mem.len; + } + + if (mr->type == ERDMA_MR_TYPE_FRMR || + mr->mem.mtt_type == ERDMA_MR_INDIRECT_MTT) { + phy_addr = req.phy_addr; + *phy_addr = mr->mem.mtt_entry[0]; + } else { + phy_addr = req.phy_addr; + for (i = 0; i < mr->mem.mtt_nents; i++) + *phy_addr++ = mr->mem.mtt_entry[i]; + } + +post_cmd: + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); +} + +static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq) +{ + struct erdma_cmdq_create_cq_req req; + u32 page_size; + struct erdma_mem *mtt; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_CREATE_CQ); + + req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_CQN_MASK, cq->cqn) | + FIELD_PREP(ERDMA_CMD_CREATE_CQ_DEPTH_MASK, ilog2(cq->depth)); + req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_EQN_MASK, cq->assoc_eqn); + + if (rdma_is_kernel_res(&cq->ibcq.res)) { + page_size = SZ_32M; + req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK, + ilog2(page_size) - PAGE_SHIFT); + req.qbuf_addr_l = lower_32_bits(cq->kern_cq.qbuf_dma_addr); + req.qbuf_addr_h = upper_32_bits(cq->kern_cq.qbuf_dma_addr); + + req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, 1) | + FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK, + ERDMA_MR_INLINE_MTT); + + req.first_page_offset = 0; + req.cq_db_info_addr = + cq->kern_cq.qbuf_dma_addr + (cq->depth << CQE_SHIFT); + } else { + mtt = &cq->user_cq.qbuf_mtt; + req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK, + ilog2(mtt->page_size) - PAGE_SHIFT); + if (mtt->mtt_nents == 1) { + req.qbuf_addr_l = lower_32_bits(*(u64 *)mtt->mtt_buf); + req.qbuf_addr_h = upper_32_bits(*(u64 *)mtt->mtt_buf); + } else { + req.qbuf_addr_l = lower_32_bits(mtt->mtt_entry[0]); + req.qbuf_addr_h = upper_32_bits(mtt->mtt_entry[0]); + } + req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, + mtt->mtt_nents); + req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK, + mtt->mtt_type); + + req.first_page_offset = mtt->page_offset; + req.cq_db_info_addr = cq->user_cq.db_info_dma_addr; + } + + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); +} + +static int erdma_alloc_idx(struct erdma_resource_cb *res_cb) +{ + int idx; + unsigned long flags; + + spin_lock_irqsave(&res_cb->lock, flags); + idx = find_next_zero_bit(res_cb->bitmap, res_cb->max_cap, + res_cb->next_alloc_idx); + if (idx == res_cb->max_cap) { + idx = find_first_zero_bit(res_cb->bitmap, res_cb->max_cap); + if (idx == res_cb->max_cap) { + res_cb->next_alloc_idx = 1; + spin_unlock_irqrestore(&res_cb->lock, flags); + return -ENOSPC; + } + } + + set_bit(idx, res_cb->bitmap); + res_cb->next_alloc_idx = idx + 1; + spin_unlock_irqrestore(&res_cb->lock, flags); + + return idx; +} + +static inline void erdma_free_idx(struct erdma_resource_cb *res_cb, u32 idx) +{ + unsigned long flags; + u32 used; + + spin_lock_irqsave(&res_cb->lock, flags); + used = __test_and_clear_bit(idx, res_cb->bitmap); + spin_unlock_irqrestore(&res_cb->lock, flags); + WARN_ON(!used); +} + +static struct rdma_user_mmap_entry * +erdma_user_mmap_entry_insert(struct erdma_ucontext *uctx, void *address, + u32 size, u8 mmap_flag, u64 *mmap_offset) +{ + struct erdma_user_mmap_entry *entry = + kzalloc(sizeof(*entry), GFP_KERNEL); + int ret; + + if (!entry) + return NULL; + + entry->address = (u64)address; + entry->mmap_flag = mmap_flag; + + size = PAGE_ALIGN(size); + + ret = rdma_user_mmap_entry_insert(&uctx->ibucontext, &entry->rdma_entry, + size); + if (ret) { + kfree(entry); + return NULL; + } + + *mmap_offset = rdma_user_mmap_get_offset(&entry->rdma_entry); + + return &entry->rdma_entry; +} + +int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, + struct ib_udata *unused) +{ + struct erdma_dev *dev = to_edev(ibdev); + + memset(attr, 0, sizeof(*attr)); + + attr->max_mr_size = dev->attrs.max_mr_size; + attr->vendor_id = PCI_VENDOR_ID_ALIBABA; + attr->vendor_part_id = dev->pdev->device; + attr->hw_ver = dev->pdev->revision; + attr->max_qp = dev->attrs.max_qp - 1; + attr->max_qp_wr = min(dev->attrs.max_send_wr, dev->attrs.max_recv_wr); + attr->max_qp_rd_atom = dev->attrs.max_ord; + attr->max_qp_init_rd_atom = dev->attrs.max_ird; + attr->max_res_rd_atom = dev->attrs.max_qp * dev->attrs.max_ird; + attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS; + attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY; + ibdev->local_dma_lkey = dev->attrs.local_dma_key; + attr->max_send_sge = dev->attrs.max_send_sge; + attr->max_recv_sge = dev->attrs.max_recv_sge; + attr->max_sge_rd = dev->attrs.max_sge_rd; + attr->max_cq = dev->attrs.max_cq - 1; + attr->max_cqe = dev->attrs.max_cqe; + attr->max_mr = dev->attrs.max_mr; + attr->max_pd = dev->attrs.max_pd; + attr->max_mw = dev->attrs.max_mw; + attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA; + attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT; + attr->fw_ver = dev->attrs.fw_version; + + if (dev->netdev) + addrconf_addr_eui48((u8 *)&attr->sys_image_guid, + dev->netdev->dev_addr); + + return 0; +} + +int erdma_query_gid(struct ib_device *ibdev, u32 port, int idx, + union ib_gid *gid) +{ + struct erdma_dev *dev = to_edev(ibdev); + + memset(gid, 0, sizeof(*gid)); + ether_addr_copy(gid->raw, dev->attrs.peer_addr); + + return 0; +} + +int erdma_query_port(struct ib_device *ibdev, u32 port, + struct ib_port_attr *attr) +{ + struct erdma_dev *dev = to_edev(ibdev); + struct net_device *ndev = dev->netdev; + + memset(attr, 0, sizeof(*attr)); + + attr->gid_tbl_len = 1; + attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP; + attr->max_msg_sz = -1; + + if (!ndev) + goto out; + + ib_get_eth_speed(ibdev, port, &attr->active_speed, &attr->active_width); + attr->max_mtu = ib_mtu_int_to_enum(ndev->mtu); + attr->active_mtu = ib_mtu_int_to_enum(ndev->mtu); + if (netif_running(ndev) && netif_carrier_ok(ndev)) + dev->state = IB_PORT_ACTIVE; + else + dev->state = IB_PORT_DOWN; + attr->state = dev->state; + +out: + if (dev->state == IB_PORT_ACTIVE) + attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; + else + attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; + + return 0; +} + +int erdma_get_port_immutable(struct ib_device *ibdev, u32 port, + struct ib_port_immutable *port_immutable) +{ + port_immutable->gid_tbl_len = 1; + port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + + return 0; +} + +int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +{ + struct erdma_pd *pd = to_epd(ibpd); + struct erdma_dev *dev = to_edev(ibpd->device); + int pdn; + + pdn = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_PD]); + if (pdn < 0) + return pdn; + + pd->pdn = pdn; + + return 0; +} + +int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +{ + struct erdma_pd *pd = to_epd(ibpd); + struct erdma_dev *dev = to_edev(ibpd->device); + + erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_PD], pd->pdn); + + return 0; +} + +static int erdma_qp_validate_cap(struct erdma_dev *dev, + struct ib_qp_init_attr *attrs) +{ + if ((attrs->cap.max_send_wr > dev->attrs.max_send_wr) || + (attrs->cap.max_recv_wr > dev->attrs.max_recv_wr) || + (attrs->cap.max_send_sge > dev->attrs.max_send_sge) || + (attrs->cap.max_recv_sge > dev->attrs.max_recv_sge) || + (attrs->cap.max_inline_data > ERDMA_MAX_INLINE) || + !attrs->cap.max_send_wr || !attrs->cap.max_recv_wr) { + return -EINVAL; + } + + return 0; +} + +static int erdma_qp_validate_attr(struct erdma_dev *dev, + struct ib_qp_init_attr *attrs) +{ + if (attrs->qp_type != IB_QPT_RC) + return -EOPNOTSUPP; + + if (attrs->srq) + return -EOPNOTSUPP; + + if (!attrs->send_cq || !attrs->recv_cq) + return -EOPNOTSUPP; + + return 0; +} + +static void free_kernel_qp(struct erdma_qp *qp) +{ + struct erdma_dev *dev = qp->dev; + + vfree(qp->kern_qp.swr_tbl); + vfree(qp->kern_qp.rwr_tbl); + + if (qp->kern_qp.sq_buf) + dma_free_coherent( + &dev->pdev->dev, + WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT), + qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr); + + if (qp->kern_qp.rq_buf) + dma_free_coherent( + &dev->pdev->dev, + WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT), + qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr); +} + +static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp, + struct ib_qp_init_attr *attrs) +{ + struct erdma_kqp *kqp = &qp->kern_qp; + int size; + + if (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) + kqp->sig_all = 1; + + kqp->sq_pi = 0; + kqp->sq_ci = 0; + kqp->rq_pi = 0; + kqp->rq_ci = 0; + kqp->hw_sq_db = + dev->func_bar + (ERDMA_SDB_SHARED_PAGE_INDEX << PAGE_SHIFT); + kqp->hw_rq_db = dev->func_bar + ERDMA_BAR_RQDB_SPACE_OFFSET; + + kqp->swr_tbl = vmalloc(qp->attrs.sq_size * sizeof(u64)); + kqp->rwr_tbl = vmalloc(qp->attrs.rq_size * sizeof(u64)); + if (!kqp->swr_tbl || !kqp->rwr_tbl) + goto err_out; + + size = (qp->attrs.sq_size << SQEBB_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE; + kqp->sq_buf = dma_alloc_coherent(&dev->pdev->dev, size, + &kqp->sq_buf_dma_addr, GFP_KERNEL); + if (!kqp->sq_buf) + goto err_out; + + size = (qp->attrs.rq_size << RQE_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE; + kqp->rq_buf = dma_alloc_coherent(&dev->pdev->dev, size, + &kqp->rq_buf_dma_addr, GFP_KERNEL); + if (!kqp->rq_buf) + goto err_out; + + kqp->sq_db_info = kqp->sq_buf + (qp->attrs.sq_size << SQEBB_SHIFT); + kqp->rq_db_info = kqp->rq_buf + (qp->attrs.rq_size << RQE_SHIFT); + + return 0; + +err_out: + free_kernel_qp(qp); + return -ENOMEM; +} + +static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem, + u64 start, u64 len, int access, u64 virt, + unsigned long req_page_size, u8 force_indirect_mtt) +{ + struct ib_block_iter biter; + uint64_t *phy_addr = NULL; + int ret = 0; + + mem->umem = ib_umem_get(&dev->ibdev, start, len, access); + if (IS_ERR(mem->umem)) { + ret = PTR_ERR(mem->umem); + mem->umem = NULL; + return ret; + } + + mem->va = virt; + mem->len = len; + mem->page_size = ib_umem_find_best_pgsz(mem->umem, req_page_size, virt); + mem->page_offset = start & (mem->page_size - 1); + mem->mtt_nents = ib_umem_num_dma_blocks(mem->umem, mem->page_size); + mem->page_cnt = mem->mtt_nents; + + if (mem->page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES || + force_indirect_mtt) { + mem->mtt_type = ERDMA_MR_INDIRECT_MTT; + mem->mtt_buf = + alloc_pages_exact(MTT_SIZE(mem->page_cnt), GFP_KERNEL); + if (!mem->mtt_buf) { + ret = -ENOMEM; + goto error_ret; + } + phy_addr = mem->mtt_buf; + } else { + mem->mtt_type = ERDMA_MR_INLINE_MTT; + phy_addr = mem->mtt_entry; + } + + rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size) { + *phy_addr = rdma_block_iter_dma_address(&biter); + phy_addr++; + } + + if (mem->mtt_type == ERDMA_MR_INDIRECT_MTT) { + mem->mtt_entry[0] = + dma_map_single(&dev->pdev->dev, mem->mtt_buf, + MTT_SIZE(mem->page_cnt), DMA_TO_DEVICE); + if (dma_mapping_error(&dev->pdev->dev, mem->mtt_entry[0])) { + free_pages_exact(mem->mtt_buf, MTT_SIZE(mem->page_cnt)); + mem->mtt_buf = NULL; + ret = -ENOMEM; + goto error_ret; + } + } + + return 0; + +error_ret: + if (mem->umem) { + ib_umem_release(mem->umem); + mem->umem = NULL; + } + + return ret; +} + +static void put_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem) +{ + if (mem->mtt_buf) { + dma_unmap_single(&dev->pdev->dev, mem->mtt_entry[0], + MTT_SIZE(mem->page_cnt), DMA_TO_DEVICE); + free_pages_exact(mem->mtt_buf, MTT_SIZE(mem->page_cnt)); + } + + if (mem->umem) { + ib_umem_release(mem->umem); + mem->umem = NULL; + } +} + +static int erdma_map_user_dbrecords(struct erdma_ucontext *ctx, + u64 dbrecords_va, + struct erdma_user_dbrecords_page **dbr_page, + dma_addr_t *dma_addr) +{ + struct erdma_user_dbrecords_page *page = NULL; + int rv = 0; + + mutex_lock(&ctx->dbrecords_page_mutex); + + list_for_each_entry(page, &ctx->dbrecords_page_list, list) + if (page->va == (dbrecords_va & PAGE_MASK)) + goto found; + + page = kmalloc(sizeof(*page), GFP_KERNEL); + if (!page) { + rv = -ENOMEM; + goto out; + } + + page->va = (dbrecords_va & PAGE_MASK); + page->refcnt = 0; + + page->umem = ib_umem_get(ctx->ibucontext.device, + dbrecords_va & PAGE_MASK, PAGE_SIZE, 0); + if (IS_ERR(page->umem)) { + rv = PTR_ERR(page->umem); + kfree(page); + goto out; + } + + list_add(&page->list, &ctx->dbrecords_page_list); + +found: + *dma_addr = sg_dma_address(page->umem->sgt_append.sgt.sgl) + + (dbrecords_va & ~PAGE_MASK); + *dbr_page = page; + page->refcnt++; + +out: + mutex_unlock(&ctx->dbrecords_page_mutex); + return rv; +} + +static void +erdma_unmap_user_dbrecords(struct erdma_ucontext *ctx, + struct erdma_user_dbrecords_page **dbr_page) +{ + if (!ctx || !(*dbr_page)) + return; + + mutex_lock(&ctx->dbrecords_page_mutex); + if (--(*dbr_page)->refcnt == 0) { + list_del(&(*dbr_page)->list); + ib_umem_release((*dbr_page)->umem); + kfree(*dbr_page); + } + + *dbr_page = NULL; + mutex_unlock(&ctx->dbrecords_page_mutex); +} + +static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx, + u64 va, u32 len, u64 db_info_va) +{ + dma_addr_t db_info_dma_addr; + u32 rq_offset; + int ret; + + if (len < (PAGE_ALIGN(qp->attrs.sq_size * SQEBB_SIZE) + + qp->attrs.rq_size * RQE_SIZE)) + return -EINVAL; + + ret = get_mtt_entries(qp->dev, &qp->user_qp.sq_mtt, va, + qp->attrs.sq_size << SQEBB_SHIFT, 0, va, + (SZ_1M - SZ_4K), 1); + if (ret) + return ret; + + rq_offset = PAGE_ALIGN(qp->attrs.sq_size << SQEBB_SHIFT); + qp->user_qp.rq_offset = rq_offset; + + ret = get_mtt_entries(qp->dev, &qp->user_qp.rq_mtt, va + rq_offset, + qp->attrs.rq_size << RQE_SHIFT, 0, va + rq_offset, + (SZ_1M - SZ_4K), 1); + if (ret) + goto put_sq_mtt; + + ret = erdma_map_user_dbrecords(uctx, db_info_va, + &qp->user_qp.user_dbr_page, + &db_info_dma_addr); + if (ret) + goto put_rq_mtt; + + qp->user_qp.sq_db_info_dma_addr = db_info_dma_addr; + qp->user_qp.rq_db_info_dma_addr = db_info_dma_addr + ERDMA_DB_SIZE; + + return 0; + +put_rq_mtt: + put_mtt_entries(qp->dev, &qp->user_qp.rq_mtt); + +put_sq_mtt: + put_mtt_entries(qp->dev, &qp->user_qp.sq_mtt); + + return ret; +} + +static void free_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx) +{ + put_mtt_entries(qp->dev, &qp->user_qp.sq_mtt); + put_mtt_entries(qp->dev, &qp->user_qp.rq_mtt); + erdma_unmap_user_dbrecords(uctx, &qp->user_qp.user_dbr_page); +} + +int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, + struct ib_udata *udata) +{ + struct erdma_qp *qp = to_eqp(ibqp); + struct erdma_dev *dev = to_edev(ibqp->device); + struct erdma_ucontext *uctx = rdma_udata_to_drv_context( + udata, struct erdma_ucontext, ibucontext); + struct erdma_ureq_create_qp ureq; + struct erdma_uresp_create_qp uresp; + int ret; + + ret = erdma_qp_validate_cap(dev, attrs); + if (ret) + goto err_out; + + ret = erdma_qp_validate_attr(dev, attrs); + if (ret) + goto err_out; + + qp->scq = to_ecq(attrs->send_cq); + qp->rcq = to_ecq(attrs->recv_cq); + qp->dev = dev; + qp->attrs.cc = dev->attrs.cc; + + init_rwsem(&qp->state_lock); + kref_init(&qp->ref); + init_completion(&qp->safe_free); + + ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp, + XA_LIMIT(1, dev->attrs.max_qp - 1), + &dev->next_alloc_qpn, GFP_KERNEL); + if (ret < 0) { + ret = -ENOMEM; + goto err_out; + } + + qp->attrs.sq_size = roundup_pow_of_two(attrs->cap.max_send_wr * + ERDMA_MAX_WQEBB_PER_SQE); + qp->attrs.rq_size = roundup_pow_of_two(attrs->cap.max_recv_wr); + + if (uctx) { + ret = ib_copy_from_udata(&ureq, udata, + min(sizeof(ureq), udata->inlen)); + if (ret) + goto err_out_xa; + + ret = init_user_qp(qp, uctx, ureq.qbuf_va, ureq.qbuf_len, + ureq.db_record_va); + if (ret) + goto err_out_xa; + + memset(&uresp, 0, sizeof(uresp)); + + uresp.num_sqe = qp->attrs.sq_size; + uresp.num_rqe = qp->attrs.rq_size; + uresp.qp_id = QP_ID(qp); + uresp.rq_offset = qp->user_qp.rq_offset; + + ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (ret) + goto err_out_cmd; + } else { + init_kernel_qp(dev, qp, attrs); + } + + qp->attrs.max_send_sge = attrs->cap.max_send_sge; + qp->attrs.max_recv_sge = attrs->cap.max_recv_sge; + qp->attrs.state = ERDMA_QP_STATE_IDLE; + + ret = create_qp_cmd(dev, qp); + if (ret) + goto err_out_cmd; + + spin_lock_init(&qp->lock); + + return 0; + +err_out_cmd: + if (uctx) + free_user_qp(qp, uctx); + else + free_kernel_qp(qp); +err_out_xa: + xa_erase(&dev->qp_xa, QP_ID(qp)); +err_out: + return ret; +} + +static int erdma_create_stag(struct erdma_dev *dev, u32 *stag) +{ + int stag_idx; + + stag_idx = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX]); + if (stag_idx < 0) + return stag_idx; + + /* For now, we always let key field be zero. */ + *stag = (stag_idx << 8); + + return 0; +} + +struct ib_mr *erdma_get_dma_mr(struct ib_pd *ibpd, int acc) +{ + struct erdma_dev *dev = to_edev(ibpd->device); + struct erdma_mr *mr; + u32 stag; + int ret; + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + ret = erdma_create_stag(dev, &stag); + if (ret) + goto out_free; + + mr->type = ERDMA_MR_TYPE_DMA; + + mr->ibmr.lkey = stag; + mr->ibmr.rkey = stag; + mr->ibmr.pd = ibpd; + mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(acc); + ret = regmr_cmd(dev, mr); + if (ret) + goto out_remove_stag; + + return &mr->ibmr; + +out_remove_stag: + erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX], + mr->ibmr.lkey >> 8); + +out_free: + kfree(mr); + + return ERR_PTR(ret); +} + +struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, + u32 max_num_sg) +{ + struct erdma_mr *mr; + struct erdma_dev *dev = to_edev(ibpd->device); + int ret; + u32 stag; + + if (mr_type != IB_MR_TYPE_MEM_REG) + return ERR_PTR(-EOPNOTSUPP); + + if (max_num_sg > ERDMA_MR_MAX_MTT_CNT) + return ERR_PTR(-EINVAL); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + ret = erdma_create_stag(dev, &stag); + if (ret) + goto out_free; + + mr->type = ERDMA_MR_TYPE_FRMR; + + mr->ibmr.lkey = stag; + mr->ibmr.rkey = stag; + mr->ibmr.pd = ibpd; + /* update it in FRMR. */ + mr->access = ERDMA_MR_ACC_LR | ERDMA_MR_ACC_LW | ERDMA_MR_ACC_RR | + ERDMA_MR_ACC_RW; + + mr->mem.page_size = PAGE_SIZE; /* update it later. */ + mr->mem.page_cnt = max_num_sg; + mr->mem.mtt_type = ERDMA_MR_INDIRECT_MTT; + mr->mem.mtt_buf = + alloc_pages_exact(MTT_SIZE(mr->mem.page_cnt), GFP_KERNEL); + if (!mr->mem.mtt_buf) { + ret = -ENOMEM; + goto out_remove_stag; + } + + mr->mem.mtt_entry[0] = + dma_map_single(&dev->pdev->dev, mr->mem.mtt_buf, + MTT_SIZE(mr->mem.page_cnt), DMA_TO_DEVICE); + if (dma_mapping_error(&dev->pdev->dev, mr->mem.mtt_entry[0])) { + ret = -ENOMEM; + goto out_free_mtt; + } + + ret = regmr_cmd(dev, mr); + if (ret) + goto out_dma_unmap; + + return &mr->ibmr; + +out_dma_unmap: + dma_unmap_single(&dev->pdev->dev, mr->mem.mtt_entry[0], + MTT_SIZE(mr->mem.page_cnt), DMA_TO_DEVICE); +out_free_mtt: + free_pages_exact(mr->mem.mtt_buf, MTT_SIZE(mr->mem.page_cnt)); + +out_remove_stag: + erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX], + mr->ibmr.lkey >> 8); + +out_free: + kfree(mr); + + return ERR_PTR(ret); +} + +static int erdma_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct erdma_mr *mr = to_emr(ibmr); + + if (mr->mem.mtt_nents >= mr->mem.page_cnt) + return -1; + + *((u64 *)mr->mem.mtt_buf + mr->mem.mtt_nents) = addr; + mr->mem.mtt_nents++; + + return 0; +} + +int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, + unsigned int *sg_offset) +{ + struct erdma_mr *mr = to_emr(ibmr); + int num; + + mr->mem.mtt_nents = 0; + + num = ib_sg_to_pages(&mr->ibmr, sg, sg_nents, sg_offset, + erdma_set_page); + + return num; +} + +struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, + u64 virt, int access, struct ib_udata *udata) +{ + struct erdma_mr *mr = NULL; + struct erdma_dev *dev = to_edev(ibpd->device); + u32 stag; + int ret; + + if (!len || len > dev->attrs.max_mr_size) + return ERR_PTR(-EINVAL); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + ret = get_mtt_entries(dev, &mr->mem, start, len, access, virt, + SZ_2G - SZ_4K, 0); + if (ret) + goto err_out_free; + + ret = erdma_create_stag(dev, &stag); + if (ret) + goto err_out_put_mtt; + + mr->ibmr.lkey = mr->ibmr.rkey = stag; + mr->ibmr.pd = ibpd; + mr->mem.va = virt; + mr->mem.len = len; + mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(access); + mr->valid = 1; + mr->type = ERDMA_MR_TYPE_NORMAL; + + ret = regmr_cmd(dev, mr); + if (ret) + goto err_out_mr; + + return &mr->ibmr; + +err_out_mr: + erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX], + mr->ibmr.lkey >> 8); + +err_out_put_mtt: + put_mtt_entries(dev, &mr->mem); + +err_out_free: + kfree(mr); + + return ERR_PTR(ret); +} + +int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) +{ + struct erdma_mr *mr; + struct erdma_dev *dev = to_edev(ibmr->device); + struct erdma_cmdq_dereg_mr_req req; + int ret; + + mr = to_emr(ibmr); + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_DEREG_MR); + + req.cfg = FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, ibmr->lkey >> 8) | + FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, ibmr->lkey & 0xFF); + + ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + if (ret) + return ret; + + erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX], ibmr->lkey >> 8); + + put_mtt_entries(dev, &mr->mem); + + kfree(mr); + return 0; +} + +int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +{ + struct erdma_cq *cq = to_ecq(ibcq); + struct erdma_dev *dev = to_edev(ibcq->device); + struct erdma_ucontext *ctx = rdma_udata_to_drv_context( + udata, struct erdma_ucontext, ibucontext); + int err; + struct erdma_cmdq_destroy_cq_req req; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_DESTROY_CQ); + req.cqn = cq->cqn; + + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + if (err) + return err; + + if (rdma_is_kernel_res(&cq->ibcq.res)) { + dma_free_coherent(&dev->pdev->dev, + WARPPED_BUFSIZE(cq->depth << CQE_SHIFT), + cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr); + } else { + erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page); + put_mtt_entries(dev, &cq->user_cq.qbuf_mtt); + } + + xa_erase(&dev->cq_xa, cq->cqn); + + return 0; +} + +int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) +{ + struct erdma_qp *qp = to_eqp(ibqp); + struct erdma_dev *dev = to_edev(ibqp->device); + struct erdma_ucontext *ctx = rdma_udata_to_drv_context( + udata, struct erdma_ucontext, ibucontext); + struct erdma_qp_attrs qp_attrs; + int err; + struct erdma_cmdq_destroy_qp_req req; + + down_write(&qp->state_lock); + qp_attrs.state = ERDMA_QP_STATE_ERROR; + erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE); + up_write(&qp->state_lock); + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_DESTROY_QP); + req.qpn = QP_ID(qp); + + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + if (err) + return err; + + erdma_qp_put(qp); + wait_for_completion(&qp->safe_free); + + if (rdma_is_kernel_res(&qp->ibqp.res)) { + vfree(qp->kern_qp.swr_tbl); + vfree(qp->kern_qp.rwr_tbl); + dma_free_coherent( + &dev->pdev->dev, + WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT), + qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr); + dma_free_coherent( + &dev->pdev->dev, + WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT), + qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr); + } else { + put_mtt_entries(dev, &qp->user_qp.sq_mtt); + put_mtt_entries(dev, &qp->user_qp.rq_mtt); + erdma_unmap_user_dbrecords(ctx, &qp->user_qp.user_dbr_page); + } + + if (qp->cep) + erdma_cep_put(qp->cep); + xa_erase(&dev->qp_xa, QP_ID(qp)); + + return 0; +} + +void erdma_qp_get_ref(struct ib_qp *ibqp) +{ + erdma_qp_get(to_eqp(ibqp)); +} + +void erdma_qp_put_ref(struct ib_qp *ibqp) +{ + erdma_qp_put(to_eqp(ibqp)); +} + +int erdma_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma) +{ + struct rdma_user_mmap_entry *rdma_entry; + struct erdma_user_mmap_entry *entry; + pgprot_t prot; + int err; + + rdma_entry = rdma_user_mmap_entry_get(ctx, vma); + if (!rdma_entry) + return -EINVAL; + + entry = to_emmap(rdma_entry); + + switch (entry->mmap_flag) { + case ERDMA_MMAP_IO_NC: + /* map doorbell. */ + prot = pgprot_device(vma->vm_page_prot); + break; + default: + return -EINVAL; + } + + err = rdma_user_mmap_io(ctx, vma, PFN_DOWN(entry->address), PAGE_SIZE, + prot, rdma_entry); + + rdma_user_mmap_entry_put(rdma_entry); + return err; +} + +void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry) +{ + struct erdma_user_mmap_entry *entry = to_emmap(rdma_entry); + + kfree(entry); +} + +#define ERDMA_SDB_PAGE 0 +#define ERDMA_SDB_ENTRY 1 +#define ERDMA_SDB_SHARED 2 + +static void alloc_db_resources(struct erdma_dev *dev, + struct erdma_ucontext *ctx) +{ + u32 bitmap_idx; + struct erdma_devattr *attrs = &dev->attrs; + + if (attrs->disable_dwqe) + goto alloc_normal_db; + + /* Try to alloc independent SDB page. */ + spin_lock(&dev->db_bitmap_lock); + bitmap_idx = find_first_zero_bit(dev->sdb_page, attrs->dwqe_pages); + if (bitmap_idx != attrs->dwqe_pages) { + set_bit(bitmap_idx, dev->sdb_page); + spin_unlock(&dev->db_bitmap_lock); + + ctx->sdb_type = ERDMA_SDB_PAGE; + ctx->sdb_idx = bitmap_idx; + ctx->sdb_page_idx = bitmap_idx; + ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET + + (bitmap_idx << PAGE_SHIFT); + ctx->sdb_page_off = 0; + + return; + } + + bitmap_idx = find_first_zero_bit(dev->sdb_entry, attrs->dwqe_entries); + if (bitmap_idx != attrs->dwqe_entries) { + set_bit(bitmap_idx, dev->sdb_entry); + spin_unlock(&dev->db_bitmap_lock); + + ctx->sdb_type = ERDMA_SDB_ENTRY; + ctx->sdb_idx = bitmap_idx; + ctx->sdb_page_idx = attrs->dwqe_pages + + bitmap_idx / ERDMA_DWQE_TYPE1_CNT_PER_PAGE; + ctx->sdb_page_off = bitmap_idx % ERDMA_DWQE_TYPE1_CNT_PER_PAGE; + + ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET + + (ctx->sdb_page_idx << PAGE_SHIFT); + + return; + } + + spin_unlock(&dev->db_bitmap_lock); + +alloc_normal_db: + ctx->sdb_type = ERDMA_SDB_SHARED; + ctx->sdb_idx = 0; + ctx->sdb_page_idx = ERDMA_SDB_SHARED_PAGE_INDEX; + ctx->sdb_page_off = 0; + + ctx->sdb = dev->func_bar_addr + (ctx->sdb_page_idx << PAGE_SHIFT); +} + +static void erdma_uctx_user_mmap_entries_remove(struct erdma_ucontext *uctx) +{ + rdma_user_mmap_entry_remove(uctx->sq_db_mmap_entry); + rdma_user_mmap_entry_remove(uctx->rq_db_mmap_entry); + rdma_user_mmap_entry_remove(uctx->cq_db_mmap_entry); +} + +int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata) +{ + struct erdma_ucontext *ctx = to_ectx(ibctx); + struct erdma_dev *dev = to_edev(ibctx->device); + int ret; + struct erdma_uresp_alloc_ctx uresp = {}; + + if (atomic_inc_return(&dev->num_ctx) > ERDMA_MAX_CONTEXT) { + ret = -ENOMEM; + goto err_out; + } + + INIT_LIST_HEAD(&ctx->dbrecords_page_list); + mutex_init(&ctx->dbrecords_page_mutex); + + alloc_db_resources(dev, ctx); + + ctx->rdb = dev->func_bar_addr + ERDMA_BAR_RQDB_SPACE_OFFSET; + ctx->cdb = dev->func_bar_addr + ERDMA_BAR_CQDB_SPACE_OFFSET; + + if (udata->outlen < sizeof(uresp)) { + ret = -EINVAL; + goto err_out; + } + + ctx->sq_db_mmap_entry = erdma_user_mmap_entry_insert( + ctx, (void *)ctx->sdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.sdb); + if (!ctx->sq_db_mmap_entry) { + ret = -ENOMEM; + goto err_out; + } + + ctx->rq_db_mmap_entry = erdma_user_mmap_entry_insert( + ctx, (void *)ctx->rdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.rdb); + if (!ctx->rq_db_mmap_entry) { + ret = -EINVAL; + goto err_out; + } + + ctx->cq_db_mmap_entry = erdma_user_mmap_entry_insert( + ctx, (void *)ctx->cdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.cdb); + if (!ctx->cq_db_mmap_entry) { + ret = -EINVAL; + goto err_out; + } + + uresp.dev_id = dev->pdev->device; + uresp.sdb_type = ctx->sdb_type; + uresp.sdb_offset = ctx->sdb_page_off; + + ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (ret) + goto err_out; + + return 0; + +err_out: + erdma_uctx_user_mmap_entries_remove(ctx); + atomic_dec(&dev->num_ctx); + return ret; +} + +void erdma_dealloc_ucontext(struct ib_ucontext *ibctx) +{ + struct erdma_ucontext *ctx = to_ectx(ibctx); + struct erdma_dev *dev = to_edev(ibctx->device); + + spin_lock(&dev->db_bitmap_lock); + if (ctx->sdb_type == ERDMA_SDB_PAGE) + clear_bit(ctx->sdb_idx, dev->sdb_page); + else if (ctx->sdb_type == ERDMA_SDB_ENTRY) + clear_bit(ctx->sdb_idx, dev->sdb_entry); + + erdma_uctx_user_mmap_entries_remove(ctx); + + spin_unlock(&dev->db_bitmap_lock); + + atomic_dec(&dev->num_ctx); +} + +static int ib_qp_state_to_erdma_qp_state[IB_QPS_ERR + 1] = { + [IB_QPS_RESET] = ERDMA_QP_STATE_IDLE, + [IB_QPS_INIT] = ERDMA_QP_STATE_IDLE, + [IB_QPS_RTR] = ERDMA_QP_STATE_RTR, + [IB_QPS_RTS] = ERDMA_QP_STATE_RTS, + [IB_QPS_SQD] = ERDMA_QP_STATE_CLOSING, + [IB_QPS_SQE] = ERDMA_QP_STATE_TERMINATE, + [IB_QPS_ERR] = ERDMA_QP_STATE_ERROR +}; + +int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, + struct ib_udata *udata) +{ + struct erdma_qp_attrs new_attrs; + enum erdma_qp_attr_mask erdma_attr_mask = 0; + struct erdma_qp *qp = to_eqp(ibqp); + int ret = 0; + + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) + return -EOPNOTSUPP; + + memset(&new_attrs, 0, sizeof(new_attrs)); + + if (attr_mask & IB_QP_STATE) { + new_attrs.state = ib_qp_state_to_erdma_qp_state[attr->qp_state]; + + erdma_attr_mask |= ERDMA_QP_ATTR_STATE; + } + + down_write(&qp->state_lock); + + ret = erdma_modify_qp_internal(qp, &new_attrs, erdma_attr_mask); + + up_write(&qp->state_lock); + + return ret; +} + +int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) +{ + struct erdma_qp *qp; + struct erdma_dev *dev; + + if (ibqp && qp_attr && qp_init_attr) { + qp = to_eqp(ibqp); + dev = to_edev(ibqp->device); + } else { + return -EINVAL; + } + + qp_attr->cap.max_inline_data = ERDMA_MAX_INLINE; + qp_init_attr->cap.max_inline_data = ERDMA_MAX_INLINE; + + qp_attr->cap.max_send_wr = qp->attrs.sq_size; + qp_attr->cap.max_recv_wr = qp->attrs.rq_size; + qp_attr->cap.max_send_sge = qp->attrs.max_send_sge; + qp_attr->cap.max_recv_sge = qp->attrs.max_recv_sge; + + qp_attr->path_mtu = ib_mtu_int_to_enum(dev->netdev->mtu); + qp_attr->max_rd_atomic = qp->attrs.irq_size; + qp_attr->max_dest_rd_atomic = qp->attrs.orq_size; + + qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ; + + qp_init_attr->cap = qp_attr->cap; + + return 0; +} + +static int erdma_init_user_cq(struct erdma_ucontext *ctx, struct erdma_cq *cq, + struct erdma_ureq_create_cq *ureq) +{ + int ret; + struct erdma_dev *dev = to_edev(cq->ibcq.device); + + ret = get_mtt_entries(dev, &cq->user_cq.qbuf_mtt, ureq->qbuf_va, + ureq->qbuf_len, 0, ureq->qbuf_va, SZ_64M - SZ_4K, + 1); + if (ret) + return ret; + + ret = erdma_map_user_dbrecords(ctx, ureq->db_record_va, + &cq->user_cq.user_dbr_page, + &cq->user_cq.db_info_dma_addr); + if (ret) + put_mtt_entries(dev, &cq->user_cq.qbuf_mtt); + + return ret; +} + +static int erdma_init_kernel_cq(struct erdma_cq *cq) +{ + struct erdma_dev *dev = to_edev(cq->ibcq.device); + + cq->kern_cq.qbuf = + dma_alloc_coherent(&dev->pdev->dev, + WARPPED_BUFSIZE(cq->depth << CQE_SHIFT), + &cq->kern_cq.qbuf_dma_addr, GFP_KERNEL); + if (!cq->kern_cq.qbuf) + return -ENOMEM; + + cq->kern_cq.db_record = + (u64 *)(cq->kern_cq.qbuf + (cq->depth << CQE_SHIFT)); + spin_lock_init(&cq->kern_cq.lock); + /* use default cqdb addr */ + cq->kern_cq.db = dev->func_bar + ERDMA_BAR_CQDB_SPACE_OFFSET; + + return 0; +} + +int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) +{ + struct erdma_cq *cq = to_ecq(ibcq); + struct erdma_dev *dev = to_edev(ibcq->device); + unsigned int depth = attr->cqe; + int ret; + struct erdma_ucontext *ctx = rdma_udata_to_drv_context( + udata, struct erdma_ucontext, ibucontext); + + if (depth > dev->attrs.max_cqe) + return -EINVAL; + + depth = roundup_pow_of_two(depth); + cq->ibcq.cqe = depth; + cq->depth = depth; + cq->assoc_eqn = attr->comp_vector + 1; + + ret = xa_alloc_cyclic(&dev->cq_xa, &cq->cqn, cq, + XA_LIMIT(1, dev->attrs.max_cq - 1), + &dev->next_alloc_cqn, GFP_KERNEL); + if (ret < 0) + return ret; + + if (!rdma_is_kernel_res(&ibcq->res)) { + struct erdma_ureq_create_cq ureq; + struct erdma_uresp_create_cq uresp; + + ret = ib_copy_from_udata(&ureq, udata, + min(udata->inlen, sizeof(ureq))); + if (ret) + goto err_out_xa; + + ret = erdma_init_user_cq(ctx, cq, &ureq); + if (ret) + goto err_out_xa; + + uresp.cq_id = cq->cqn; + uresp.num_cqe = depth; + + ret = ib_copy_to_udata(udata, &uresp, + min(sizeof(uresp), udata->outlen)); + if (ret) + goto err_free_res; + } else { + ret = erdma_init_kernel_cq(cq); + if (ret) + goto err_out_xa; + } + + ret = create_cq_cmd(dev, cq); + if (ret) + goto err_free_res; + + return 0; + +err_free_res: + if (!rdma_is_kernel_res(&ibcq->res)) { + erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page); + put_mtt_entries(dev, &cq->user_cq.qbuf_mtt); + } else { + dma_free_coherent(&dev->pdev->dev, + WARPPED_BUFSIZE(depth << CQE_SHIFT), + cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr); + } + +err_out_xa: + xa_erase(&dev->cq_xa, cq->cqn); + + return ret; +} + +void erdma_set_mtu(struct erdma_dev *dev, u32 mtu) +{ + struct erdma_cmdq_config_mtu_req req; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON, + CMDQ_OPCODE_CONF_MTU); + req.mtu = mtu; + + erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); +} + +void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason) +{ + struct ib_event event; + + event.device = &dev->ibdev; + event.element.port_num = 1; + event.event = reason; + + ib_dispatch_event(&event); +} diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h new file mode 100644 index 000000000000..ab6380635e9e --- /dev/null +++ b/drivers/infiniband/hw/erdma/erdma_verbs.h @@ -0,0 +1,335 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ + +/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */ +/* Kai Shen <kaishen@linux.alibaba.com> */ +/* Copyright (c) 2020-2022, Alibaba Group. */ + +#ifndef __ERDMA_VERBS_H__ +#define __ERDMA_VERBS_H__ + +#include "erdma.h" + +/* RDMA Capability. */ +#define ERDMA_MAX_PD (128 * 1024) +#define ERDMA_MAX_SEND_WR 4096 +#define ERDMA_MAX_ORD 128 +#define ERDMA_MAX_IRD 128 +#define ERDMA_MAX_SGE_RD 1 +#define ERDMA_MAX_CONTEXT (128 * 1024) +#define ERDMA_MAX_SEND_SGE 6 +#define ERDMA_MAX_RECV_SGE 1 +#define ERDMA_MAX_INLINE (sizeof(struct erdma_sge) * (ERDMA_MAX_SEND_SGE)) +#define ERDMA_MAX_FRMR_PA 512 + +enum { + ERDMA_MMAP_IO_NC = 0, /* no cache */ +}; + +struct erdma_user_mmap_entry { + struct rdma_user_mmap_entry rdma_entry; + u64 address; + u8 mmap_flag; +}; + +struct erdma_ucontext { + struct ib_ucontext ibucontext; + + u32 sdb_type; + u32 sdb_idx; + u32 sdb_page_idx; + u32 sdb_page_off; + u64 sdb; + u64 rdb; + u64 cdb; + + struct rdma_user_mmap_entry *sq_db_mmap_entry; + struct rdma_user_mmap_entry *rq_db_mmap_entry; + struct rdma_user_mmap_entry *cq_db_mmap_entry; + + /* doorbell records */ + struct list_head dbrecords_page_list; + struct mutex dbrecords_page_mutex; +}; + +struct erdma_pd { + struct ib_pd ibpd; + u32 pdn; +}; + +/* + * MemoryRegion definition. + */ +#define ERDMA_MAX_INLINE_MTT_ENTRIES 4 +#define MTT_SIZE(mtt_cnt) (mtt_cnt << 3) /* per mtt takes 8 Bytes. */ +#define ERDMA_MR_MAX_MTT_CNT 524288 +#define ERDMA_MTT_ENTRY_SIZE 8 + +#define ERDMA_MR_TYPE_NORMAL 0 +#define ERDMA_MR_TYPE_FRMR 1 +#define ERDMA_MR_TYPE_DMA 2 + +#define ERDMA_MR_INLINE_MTT 0 +#define ERDMA_MR_INDIRECT_MTT 1 + +#define ERDMA_MR_ACC_LR BIT(0) +#define ERDMA_MR_ACC_LW BIT(1) +#define ERDMA_MR_ACC_RR BIT(2) +#define ERDMA_MR_ACC_RW BIT(3) + +static inline u8 to_erdma_access_flags(int access) +{ + return (access & IB_ACCESS_REMOTE_READ ? ERDMA_MR_ACC_RR : 0) | + (access & IB_ACCESS_LOCAL_WRITE ? ERDMA_MR_ACC_LW : 0) | + (access & IB_ACCESS_REMOTE_WRITE ? ERDMA_MR_ACC_RW : 0); +} + +struct erdma_mem { + struct ib_umem *umem; + void *mtt_buf; + u32 mtt_type; + u32 page_size; + u32 page_offset; + u32 page_cnt; + u32 mtt_nents; + + u64 va; + u64 len; + + u64 mtt_entry[ERDMA_MAX_INLINE_MTT_ENTRIES]; +}; + +struct erdma_mr { + struct ib_mr ibmr; + struct erdma_mem mem; + u8 type; + u8 access; + u8 valid; +}; + +struct erdma_user_dbrecords_page { + struct list_head list; + struct ib_umem *umem; + u64 va; + int refcnt; +}; + +struct erdma_uqp { + struct erdma_mem sq_mtt; + struct erdma_mem rq_mtt; + + dma_addr_t sq_db_info_dma_addr; + dma_addr_t rq_db_info_dma_addr; + + struct erdma_user_dbrecords_page *user_dbr_page; + + u32 rq_offset; +}; + +struct erdma_kqp { + u16 sq_pi; + u16 sq_ci; + + u16 rq_pi; + u16 rq_ci; + + u64 *swr_tbl; + u64 *rwr_tbl; + + void __iomem *hw_sq_db; + void __iomem *hw_rq_db; + + void *sq_buf; + dma_addr_t sq_buf_dma_addr; + + void *rq_buf; + dma_addr_t rq_buf_dma_addr; + + void *sq_db_info; + void *rq_db_info; + + u8 sig_all; +}; + +enum erdma_qp_state { + ERDMA_QP_STATE_IDLE = 0, + ERDMA_QP_STATE_RTR = 1, + ERDMA_QP_STATE_RTS = 2, + ERDMA_QP_STATE_CLOSING = 3, + ERDMA_QP_STATE_TERMINATE = 4, + ERDMA_QP_STATE_ERROR = 5, + ERDMA_QP_STATE_UNDEF = 7, + ERDMA_QP_STATE_COUNT = 8 +}; + +enum erdma_qp_attr_mask { + ERDMA_QP_ATTR_STATE = (1 << 0), + ERDMA_QP_ATTR_LLP_HANDLE = (1 << 2), + ERDMA_QP_ATTR_ORD = (1 << 3), + ERDMA_QP_ATTR_IRD = (1 << 4), + ERDMA_QP_ATTR_SQ_SIZE = (1 << 5), + ERDMA_QP_ATTR_RQ_SIZE = (1 << 6), + ERDMA_QP_ATTR_MPA = (1 << 7) +}; + +struct erdma_qp_attrs { + enum erdma_qp_state state; + enum erdma_cc_alg cc; /* Congestion control algorithm */ + u32 sq_size; + u32 rq_size; + u32 orq_size; + u32 irq_size; + u32 max_send_sge; + u32 max_recv_sge; + u32 cookie; +#define ERDMA_QP_ACTIVE 0 +#define ERDMA_QP_PASSIVE 1 + u8 qp_type; + u8 pd_len; +}; + +struct erdma_qp { + struct ib_qp ibqp; + struct kref ref; + struct completion safe_free; + struct erdma_dev *dev; + struct erdma_cep *cep; + struct rw_semaphore state_lock; + + union { + struct erdma_kqp kern_qp; + struct erdma_uqp user_qp; + }; + + struct erdma_cq *scq; + struct erdma_cq *rcq; + + struct erdma_qp_attrs attrs; + spinlock_t lock; +}; + +struct erdma_kcq_info { + void *qbuf; + dma_addr_t qbuf_dma_addr; + u32 ci; + u32 cmdsn; + u32 notify_cnt; + + spinlock_t lock; + u8 __iomem *db; + u64 *db_record; +}; + +struct erdma_ucq_info { + struct erdma_mem qbuf_mtt; + struct erdma_user_dbrecords_page *user_dbr_page; + dma_addr_t db_info_dma_addr; +}; + +struct erdma_cq { + struct ib_cq ibcq; + u32 cqn; + + u32 depth; + u32 assoc_eqn; + + union { + struct erdma_kcq_info kern_cq; + struct erdma_ucq_info user_cq; + }; +}; + +#define QP_ID(qp) ((qp)->ibqp.qp_num) + +static inline struct erdma_qp *find_qp_by_qpn(struct erdma_dev *dev, int id) +{ + return (struct erdma_qp *)xa_load(&dev->qp_xa, id); +} + +static inline struct erdma_cq *find_cq_by_cqn(struct erdma_dev *dev, int id) +{ + return (struct erdma_cq *)xa_load(&dev->cq_xa, id); +} + +void erdma_qp_get(struct erdma_qp *qp); +void erdma_qp_put(struct erdma_qp *qp); +int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, + enum erdma_qp_attr_mask mask); +void erdma_qp_llp_close(struct erdma_qp *qp); +void erdma_qp_cm_drop(struct erdma_qp *qp); + +static inline struct erdma_ucontext *to_ectx(struct ib_ucontext *ibctx) +{ + return container_of(ibctx, struct erdma_ucontext, ibucontext); +} + +static inline struct erdma_pd *to_epd(struct ib_pd *pd) +{ + return container_of(pd, struct erdma_pd, ibpd); +} + +static inline struct erdma_mr *to_emr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct erdma_mr, ibmr); +} + +static inline struct erdma_qp *to_eqp(struct ib_qp *qp) +{ + return container_of(qp, struct erdma_qp, ibqp); +} + +static inline struct erdma_cq *to_ecq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct erdma_cq, ibcq); +} + +static inline struct erdma_user_mmap_entry * +to_emmap(struct rdma_user_mmap_entry *ibmmap) +{ + return container_of(ibmmap, struct erdma_user_mmap_entry, rdma_entry); +} + +int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *data); +void erdma_dealloc_ucontext(struct ib_ucontext *ibctx); +int erdma_query_device(struct ib_device *dev, struct ib_device_attr *attr, + struct ib_udata *data); +int erdma_get_port_immutable(struct ib_device *dev, u32 port, + struct ib_port_immutable *ib_port_immutable); +int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *data); +int erdma_query_port(struct ib_device *dev, u32 port, + struct ib_port_attr *attr); +int erdma_query_gid(struct ib_device *dev, u32 port, int idx, + union ib_gid *gid); +int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *data); +int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); +int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr, + struct ib_udata *data); +int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask, + struct ib_qp_init_attr *init_attr); +int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask, + struct ib_udata *data); +int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); +int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); +int erdma_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); +struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, + u64 virt, int access, struct ib_udata *udata); +struct ib_mr *erdma_get_dma_mr(struct ib_pd *ibpd, int rights); +int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *data); +int erdma_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma); +void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry); +void erdma_qp_get_ref(struct ib_qp *ibqp); +void erdma_qp_put_ref(struct ib_qp *ibqp); +struct ib_qp *erdma_get_ibqp(struct ib_device *dev, int id); +int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr, + const struct ib_send_wr **bad_send_wr); +int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr); +int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); +struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, + u32 max_num_sg); +int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, + unsigned int *sg_offset); +void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason); +void erdma_set_mtu(struct erdma_dev *dev, u32 mtu); + +#endif diff --git a/drivers/infiniband/hw/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig index 6eb739052121..14b92e12bf29 100644 --- a/drivers/infiniband/hw/hfi1/Kconfig +++ b/drivers/infiniband/hw/hfi1/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config INFINIBAND_HFI1 tristate "Cornelis OPX Gen1 support" - depends on X86_64 && INFINIBAND_RDMAVT && I2C + depends on X86_64 && INFINIBAND_RDMAVT && I2C && !UML select MMU_NOTIFIER select CRC32 select I2C_ALGOBIT diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 98c813ba4304..877f8e84a672 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -5,7 +5,6 @@ #include <linux/topology.h> #include <linux/cpumask.h> -#include <linux/module.h> #include <linux/interrupt.h> #include <linux/numa.h> @@ -667,7 +666,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) * engines, use the same CPU cores as general/control * context. */ - if (cpumask_weight(&entry->def_intr.mask) == 0) + if (cpumask_empty(&entry->def_intr.mask)) cpumask_copy(&entry->def_intr.mask, &entry->general_intr_mask); } @@ -687,7 +686,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) * vectors, use the same CPU core as the general/control * context. */ - if (cpumask_weight(&entry->comp_vect_mask) == 0) + if (cpumask_empty(&entry->comp_vect_mask)) cpumask_copy(&entry->comp_vect_mask, &entry->general_intr_mask); } diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index f1245c94ae26..ebe970f76232 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -8753,7 +8753,7 @@ static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data, /* * When writing a LCB CSR, out_data contains the full value to - * to be written, while in_data contains the relative LCB + * be written, while in_data contains the relative LCB * address in 7:0. Do the work here, rather than the caller, * of distrubting the write data to where it needs to go: * diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 995991d9709d..166ad6b828dc 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -137,61 +137,6 @@ #define HFI1_USER_SWVERSION ((HFI1_USER_SWMAJOR << HFI1_SWMAJOR_SHIFT) | \ HFI1_USER_SWMINOR) -#ifndef HFI1_KERN_TYPE -#define HFI1_KERN_TYPE 0 -#endif - -/* - * Similarly, this is the kernel version going back to the user. It's - * slightly different, in that we want to tell if the driver was built as - * part of a Intel release, or from the driver from openfabrics.org, - * kernel.org, or a standard distribution, for support reasons. - * The high bit is 0 for non-Intel and 1 for Intel-built/supplied. - * - * It's returned by the driver to the user code during initialization in the - * spi_sw_version field of hfi1_base_info, so the user code can in turn - * check for compatibility with the kernel. -*/ -#define HFI1_KERN_SWVERSION ((HFI1_KERN_TYPE << 31) | HFI1_USER_SWVERSION) - -/* - * Define the driver version number. This is something that refers only - * to the driver itself, not the software interfaces it supports. - */ -#ifndef HFI1_DRIVER_VERSION_BASE -#define HFI1_DRIVER_VERSION_BASE "0.9-294" -#endif - -/* create the final driver version string */ -#ifdef HFI1_IDSTR -#define HFI1_DRIVER_VERSION HFI1_DRIVER_VERSION_BASE " " HFI1_IDSTR -#else -#define HFI1_DRIVER_VERSION HFI1_DRIVER_VERSION_BASE -#endif - -/* - * Diagnostics can send a packet by writing the following - * struct to the diag packet special file. - * - * This allows a custom PBC qword, so that special modes and deliberate - * changes to CRCs can be used. - */ -#define _DIAG_PKT_VERS 1 -struct diag_pkt { - __u16 version; /* structure version */ - __u16 unit; /* which device */ - __u16 sw_index; /* send sw index to use */ - __u16 len; /* data length, in bytes */ - __u16 port; /* port number */ - __u16 unused; - __u32 flags; /* call flags */ - __u64 data; /* user data pointer */ - __u64 pbc; /* PBC for the packet */ -}; - -/* diag_pkt flags */ -#define F_DIAGPKT_WAIT 0x1 /* wait until packet is sent */ - /* * The next set of defines are for packet headers, and chip register * and memory bits that are visible to and/or used by user-mode software. diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 22a3cdb940be..80ba1e53c068 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -7,7 +7,6 @@ #include <linux/seq_file.h> #include <linux/kernel.h> #include <linux/export.h> -#include <linux/module.h> #include <linux/string.h> #include <linux/types.h> #include <linux/ratelimit.h> diff --git a/drivers/infiniband/hw/hfi1/device.c b/drivers/infiniband/hw/hfi1/device.c index 68a184c39941..8ceff7141baf 100644 --- a/drivers/infiniband/hw/hfi1/device.c +++ b/drivers/infiniband/hw/hfi1/device.c @@ -4,7 +4,6 @@ */ #include <linux/cdev.h> -#include <linux/module.h> #include <linux/device.h> #include <linux/fs.h> diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index e2c634af40e9..8e71bef9d982 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -29,12 +29,6 @@ #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt -/* - * The size has to be longer than this string, so we can append - * board/chip information to it in the initialization code. - */ -const char ib_hfi1_version[] = HFI1_DRIVER_VERSION "\n"; - DEFINE_MUTEX(hfi1_mutex); /* general driver use */ unsigned int hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU; diff --git a/drivers/infiniband/hw/hfi1/efivar.c b/drivers/infiniband/hw/hfi1/efivar.c index e8ed05516bf2..7741a1d69097 100644 --- a/drivers/infiniband/hw/hfi1/efivar.c +++ b/drivers/infiniband/hw/hfi1/efivar.c @@ -72,7 +72,7 @@ static int read_efi_var(const char *name, unsigned long *size, * is in the EFIVAR_FS code and may not be compiled in. * However, even that is insufficient since it does not cover * EFI_BUFFER_TOO_SMALL which could be an important return. - * For now, just split out succces or not found. + * For now, just split out success or not found. */ ret = status == EFI_SUCCESS ? 0 : status == EFI_NOT_FOUND ? -ENOENT : diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c index e2e4f9f6fae2..3af77a0840ab 100644 --- a/drivers/infiniband/hw/hfi1/fault.c +++ b/drivers/infiniband/hw/hfi1/fault.c @@ -6,7 +6,6 @@ #include <linux/debugfs.h> #include <linux/seq_file.h> #include <linux/kernel.h> -#include <linux/module.h> #include <linux/types.h> #include <linux/bitmap.h> diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 1783a6ea5427..f5f9269fdc16 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -265,6 +265,8 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) unsigned long dim = from->nr_segs; int idx; + if (!HFI1_CAP_IS_KSET(SDMA)) + return -EINVAL; idx = srcu_read_lock(&fd->pq_srcu); pq = srcu_dereference(fd->pq, &fd->pq_srcu); if (!cq || !pq) { @@ -963,7 +965,7 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, uctxt->userversion = uinfo->userversion; uctxt->flags = hfi1_cap_mask; /* save current flag state */ init_waitqueue_head(&uctxt->wait); - strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm)); + strscpy(uctxt->comm, current->comm, sizeof(uctxt->comm)); memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)); uctxt->jkey = generate_jkey(current_uid()); hfi1_stats.sps_ctxts++; @@ -1177,8 +1179,10 @@ static int setup_base_ctxt(struct hfi1_filedata *fd, goto done; ret = init_user_ctxt(fd, uctxt); - if (ret) + if (ret) { + hfi1_free_ctxt_rcv_groups(uctxt); goto done; + } user_init(uctxt); @@ -1220,7 +1224,7 @@ static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len) memset(&binfo, 0, sizeof(binfo)); binfo.hw_version = dd->revision; - binfo.sw_version = HFI1_KERN_SWVERSION; + binfo.sw_version = HFI1_USER_SWVERSION; binfo.bthqp = RVT_KDETH_QP_PREFIX; binfo.jkey = uctxt->jkey; /* diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 31e63e245ea9..1d77514ebbee 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -5,7 +5,6 @@ #include <linux/firmware.h> #include <linux/mutex.h> -#include <linux/module.h> #include <linux/delay.h> #include <linux/crc32.h> @@ -1115,7 +1114,7 @@ static void turn_off_spicos(struct hfi1_devdata *dd, int flags) * Reset all of the fabric serdes for this HFI in preparation to take the * link to Polling. * - * To do a reset, we need to write to to the serdes registers. Unfortunately, + * To do a reset, we need to write to the serdes registers. Unfortunately, * the fabric serdes download to the other HFI on the ASIC will have turned * off the firmware validation on this HFI. This means we can't write to the * registers to reset the serdes. Work around this by performing a complete diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 4436ed41547c..436372b31431 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -489,7 +489,7 @@ void set_link_ipg(struct hfi1_pportdata *ppd) u16 shift, mult; u64 src; u32 current_egress_rate; /* Mbits /sec */ - u32 max_pkt_time; + u64 max_pkt_time; /* * max_pkt_time is the maximum packet egress time in units * of the fabric clock period 1/(805 MHz). diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h index 909122934246..aec60d4888eb 100644 --- a/drivers/infiniband/hw/hfi1/ipoib.h +++ b/drivers/infiniband/hw/hfi1/ipoib.h @@ -55,7 +55,7 @@ union hfi1_ipoib_flow { */ struct ipoib_txreq { struct sdma_txreq txreq; - struct hfi1_sdma_header sdma_hdr; + struct hfi1_sdma_header *sdma_hdr; int sdma_status; int complete; struct hfi1_ipoib_dev_priv *priv; diff --git a/drivers/infiniband/hw/hfi1/ipoib_main.c b/drivers/infiniband/hw/hfi1/ipoib_main.c index e1a2b02bbd91..5d814afdf7f3 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_main.c +++ b/drivers/infiniband/hw/hfi1/ipoib_main.c @@ -22,26 +22,35 @@ static int hfi1_ipoib_dev_init(struct net_device *dev) int ret; dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) + return -ENOMEM; ret = priv->netdev_ops->ndo_init(dev); if (ret) - return ret; + goto out_ret; ret = hfi1_netdev_add_data(priv->dd, qpn_from_mac(priv->netdev->dev_addr), dev); if (ret < 0) { priv->netdev_ops->ndo_uninit(dev); - return ret; + goto out_ret; } return 0; +out_ret: + free_percpu(dev->tstats); + dev->tstats = NULL; + return ret; } static void hfi1_ipoib_dev_uninit(struct net_device *dev) { struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + free_percpu(dev->tstats); + dev->tstats = NULL; + hfi1_netdev_remove_data(priv->dd, qpn_from_mac(priv->netdev->dev_addr)); priv->netdev_ops->ndo_uninit(dev); @@ -166,12 +175,7 @@ static void hfi1_ipoib_netdev_dtor(struct net_device *dev) hfi1_ipoib_rxq_deinit(priv->netdev); free_percpu(dev->tstats); -} - -static void hfi1_ipoib_free_rdma_netdev(struct net_device *dev) -{ - hfi1_ipoib_netdev_dtor(dev); - free_netdev(dev); + dev->tstats = NULL; } static void hfi1_ipoib_set_id(struct net_device *dev, int id) @@ -211,24 +215,23 @@ static int hfi1_ipoib_setup_rn(struct ib_device *device, priv->port_num = port_num; priv->netdev_ops = netdev->netdev_ops; - netdev->netdev_ops = &hfi1_ipoib_netdev_ops; - ib_query_pkey(device, port_num, priv->pkey_index, &priv->pkey); rc = hfi1_ipoib_txreq_init(priv); if (rc) { dd_dev_err(dd, "IPoIB netdev TX init - failed(%d)\n", rc); - hfi1_ipoib_free_rdma_netdev(netdev); return rc; } rc = hfi1_ipoib_rxq_init(netdev); if (rc) { dd_dev_err(dd, "IPoIB netdev RX init - failed(%d)\n", rc); - hfi1_ipoib_free_rdma_netdev(netdev); + hfi1_ipoib_txreq_deinit(priv); return rc; } + netdev->netdev_ops = &hfi1_ipoib_netdev_ops; + netdev->priv_destructor = hfi1_ipoib_netdev_dtor; netdev->needs_free_netdev = true; diff --git a/drivers/infiniband/hw/hfi1/ipoib_rx.c b/drivers/infiniband/hw/hfi1/ipoib_rx.c index 3afa7545242c..629691a572ef 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_rx.c +++ b/drivers/infiniband/hw/hfi1/ipoib_rx.c @@ -11,13 +11,10 @@ static void copy_ipoib_buf(struct sk_buff *skb, void *data, int size) { - void *dst_data; - skb_checksum_none_assert(skb); skb->protocol = *((__be16 *)data); - dst_data = skb_put(skb, size); - memcpy(dst_data, data, size); + skb_put_data(skb, data, size); skb->mac_header = HFI1_IPOIB_PSEUDO_LEN; skb_pull(skb, HFI1_IPOIB_ENCAP_LEN); } diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c index f4010890309f..5d9a7b09ca37 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_tx.c +++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c @@ -122,7 +122,7 @@ static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget) dd_dev_warn(priv->dd, "%s: Status = 0x%x pbc 0x%llx txq = %d sde = %d\n", __func__, tx->sdma_status, - le64_to_cpu(tx->sdma_hdr.pbc), tx->txq->q_idx, + le64_to_cpu(tx->sdma_hdr->pbc), tx->txq->q_idx, tx->txq->sde->this_idx); } @@ -231,7 +231,7 @@ static int hfi1_ipoib_build_tx_desc(struct ipoib_txreq *tx, { struct hfi1_devdata *dd = txp->dd; struct sdma_txreq *txreq = &tx->txreq; - struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr; + struct hfi1_sdma_header *sdma_hdr = tx->sdma_hdr; u16 pkt_bytes = sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2) + tx->skb->len; int ret; @@ -256,7 +256,7 @@ static void hfi1_ipoib_build_ib_tx_headers(struct ipoib_txreq *tx, struct ipoib_txparms *txp) { struct hfi1_ipoib_dev_priv *priv = tx->txq->priv; - struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr; + struct hfi1_sdma_header *sdma_hdr = tx->sdma_hdr; struct sk_buff *skb = tx->skb; struct hfi1_pportdata *ppd = ppd_from_ibp(txp->ibp); struct rdma_ah_attr *ah_attr = txp->ah_attr; @@ -483,7 +483,7 @@ static int hfi1_ipoib_send_dma_single(struct net_device *dev, if (likely(!ret)) { tx_ok: trace_sdma_output_ibhdr(txq->priv->dd, - &tx->sdma_hdr.hdr, + &tx->sdma_hdr->hdr, ib_is_sc5(txp->flow.sc5)); hfi1_ipoib_check_queue_depth(txq); return NETDEV_TX_OK; @@ -547,7 +547,7 @@ static int hfi1_ipoib_send_dma_list(struct net_device *dev, hfi1_ipoib_check_queue_depth(txq); trace_sdma_output_ibhdr(txq->priv->dd, - &tx->sdma_hdr.hdr, + &tx->sdma_hdr->hdr, ib_is_sc5(txp->flow.sc5)); if (!netdev_xmit_more()) @@ -683,7 +683,8 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) { struct net_device *dev = priv->netdev; u32 tx_ring_size, tx_item_size; - int i; + struct hfi1_ipoib_circ_buf *tx_ring; + int i, j; /* * Ring holds 1 less than tx_ring_size @@ -701,7 +702,9 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) for (i = 0; i < dev->num_tx_queues; i++) { struct hfi1_ipoib_txq *txq = &priv->txqs[i]; + struct ipoib_txreq *tx; + tx_ring = &txq->tx_ring; iowait_init(&txq->wait, 0, hfi1_ipoib_flush_txq, @@ -725,18 +728,21 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) priv->dd->node); txq->tx_ring.items = - kcalloc_node(tx_ring_size, tx_item_size, - GFP_KERNEL, priv->dd->node); + kvzalloc_node(array_size(tx_ring_size, tx_item_size), + GFP_KERNEL, priv->dd->node); if (!txq->tx_ring.items) goto free_txqs; txq->tx_ring.max_items = tx_ring_size; - txq->tx_ring.shift = ilog2(tx_ring_size); + txq->tx_ring.shift = ilog2(tx_item_size); txq->tx_ring.avail = hfi1_ipoib_ring_hwat(txq); + tx_ring = &txq->tx_ring; + for (j = 0; j < tx_ring_size; j++) + hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr = + kzalloc_node(sizeof(*tx->sdma_hdr), + GFP_KERNEL, priv->dd->node); - netif_tx_napi_add(dev, &txq->napi, - hfi1_ipoib_poll_tx_ring, - NAPI_POLL_WEIGHT); + netif_napi_add_tx(dev, &txq->napi, hfi1_ipoib_poll_tx_ring); } return 0; @@ -746,7 +752,10 @@ free_txqs: struct hfi1_ipoib_txq *txq = &priv->txqs[i]; netif_napi_del(&txq->napi); - kfree(txq->tx_ring.items); + tx_ring = &txq->tx_ring; + for (j = 0; j < tx_ring_size; j++) + kfree(hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr); + kvfree(tx_ring->items); } kfree(priv->txqs); @@ -780,17 +789,20 @@ static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq) void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv) { - int i; + int i, j; for (i = 0; i < priv->netdev->num_tx_queues; i++) { struct hfi1_ipoib_txq *txq = &priv->txqs[i]; + struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; iowait_cancel_work(&txq->wait); iowait_sdma_drain(&txq->wait); hfi1_ipoib_drain_tx_list(txq); netif_napi_del(&txq->napi); hfi1_ipoib_drain_tx_ring(txq); - kfree(txq->tx_ring.items); + for (j = 0; j < tx_ring->max_items; j++) + kfree(hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr); + kvfree(tx_ring->items); } kfree(priv->txqs); diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index 876cc78a22cc..7333646021bb 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -80,6 +80,9 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler) unsigned long flags; struct list_head del_list; + /* Prevent freeing of mm until we are completely finished. */ + mmgrab(handler->mn.mm); + /* Unregister first so we don't get any more notifications. */ mmu_notifier_unregister(&handler->mn, handler->mn.mm); @@ -102,6 +105,9 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler) do_remove(handler, &del_list); + /* Now the mm may be freed. */ + mmdrop(handler->mn.mm); + kfree(handler); } diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c index 03b098a494b5..3dfa5aff2512 100644 --- a/drivers/infiniband/hw/hfi1/netdev_rx.c +++ b/drivers/infiniband/hw/hfi1/netdev_rx.c @@ -216,7 +216,7 @@ static int hfi1_netdev_rxq_init(struct hfi1_netdev_rx *rx) * right now. */ set_bit(NAPI_STATE_NO_BUSY_POLL, &rxq->napi.state); - netif_napi_add(dev, &rxq->napi, hfi1_netdev_rx_napi, 64); + netif_napi_add_weight(dev, &rxq->napi, hfi1_netdev_rx_napi, 64); rc = msix_netdev_request_rcd_irq(rxq->rcd); if (rc) goto bail_context_irq_failure; diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 3d42bd2b36bd..51ae58c02b15 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -913,8 +913,7 @@ void sc_disable(struct send_context *sc) spin_unlock(&sc->release_lock); write_seqlock(&sc->waitlock); - if (!list_empty(&sc->piowait)) - list_move(&sc->piowait, &wake_list); + list_splice_init(&sc->piowait, &wake_list); write_sequnlock(&sc->waitlock); while (!list_empty(&wake_list)) { struct iowait *wait; diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c index 136f9a99e1e0..7690f996d5e3 100644 --- a/drivers/infiniband/hw/hfi1/pio_copy.c +++ b/drivers/infiniband/hw/hfi1/pio_copy.c @@ -172,7 +172,7 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n) } /* - * Read nbytes from "from" and and place them in the low bytes + * Read nbytes from "from" and place them in the low bytes * of pbuf->carry. Other bytes are left as-is. Any previous * value in pbuf->carry is lost. * diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index f07d328689d3..a95b654f5254 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1288,11 +1288,13 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines) kvfree(sde->tx_ring); sde->tx_ring = NULL; } - spin_lock_irq(&dd->sde_map_lock); - sdma_map_free(rcu_access_pointer(dd->sdma_map)); - RCU_INIT_POINTER(dd->sdma_map, NULL); - spin_unlock_irq(&dd->sde_map_lock); - synchronize_rcu(); + if (rcu_access_pointer(dd->sdma_map)) { + spin_lock_irq(&dd->sde_map_lock); + sdma_map_free(rcu_access_pointer(dd->sdma_map)); + RCU_INIT_POINTER(dd->sdma_map, NULL); + spin_unlock_irq(&dd->sde_map_lock); + synchronize_rcu(); + } kfree(dd->per_sdma); dd->per_sdma = NULL; diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 2a7abf7a1f7f..18b05ffb415a 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -850,7 +850,7 @@ void hfi1_kern_init_ctxt_generations(struct hfi1_ctxtdata *rcd) int i; for (i = 0; i < RXE_NUM_TID_FLOWS; i++) { - rcd->flows[i].generation = mask_generation(prandom_u32()); + rcd->flows[i].generation = mask_generation(get_random_u32()); kern_set_hw_flow(rcd, KERN_GENERATION_RESERVED, i); } } diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h index 707f1053f0b7..582b6f68df3d 100644 --- a/drivers/infiniband/hw/hfi1/trace_dbg.h +++ b/drivers/infiniband/hw/hfi1/trace_dbg.h @@ -26,14 +26,10 @@ DECLARE_EVENT_CLASS(hfi1_trace_template, TP_PROTO(const char *function, struct va_format *vaf), TP_ARGS(function, vaf), TP_STRUCT__entry(__string(function, function) - __dynamic_array(char, msg, MAX_MSG_LEN) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign(__assign_str(function, function); - WARN_ON_ONCE(vsnprintf - (__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= - MAX_MSG_LEN); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("(%s) %s", __get_str(function), diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 5b11c8282744..a71c5a36ceba 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -161,9 +161,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, if (!pq->reqs) goto pq_reqs_nomem; - pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size), - sizeof(*pq->req_in_use), - GFP_KERNEL); + pq->req_in_use = bitmap_zalloc(hfi1_sdma_comp_ring_size, GFP_KERNEL); if (!pq->req_in_use) goto pq_reqs_no_in_use; @@ -210,7 +208,7 @@ cq_comps_nomem: cq_nomem: kmem_cache_destroy(pq->txreq_cache); pq_txreq_nomem: - kfree(pq->req_in_use); + bitmap_free(pq->req_in_use); pq_reqs_no_in_use: kfree(pq->reqs); pq_reqs_nomem: @@ -257,7 +255,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, pq->wait, !atomic_read(&pq->n_reqs)); kfree(pq->reqs); - kfree(pq->req_in_use); + bitmap_free(pq->req_in_use); kmem_cache_destroy(pq->txreq_cache); flush_pq_iowait(pq); kfree(pq); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index dc9211f3a009..e6e17984553c 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1300,8 +1300,8 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd) IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE | - IB_DEVICE_MEM_MGT_EXTENSIONS | - IB_DEVICE_RDMA_NETDEV_OPA; + IB_DEVICE_MEM_MGT_EXTENSIONS; + rdi->dparms.props.kernel_cap_flags = IBK_RDMA_NETDEV_OPA; rdi->dparms.props.page_size_cap = PAGE_SIZE; rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3; rdi->dparms.props.vendor_part_id = dd->pcidev->device; @@ -1397,8 +1397,7 @@ static int query_port(struct rvt_dev_info *rdi, u32 port_num, 4096 : hfi1_max_mtu), IB_MTU_4096); props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : mtu_to_enum(ppd->ibmtu, IB_MTU_4096); - props->phys_mtu = HFI1_CAP_IS_KSET(AIP) ? hfi1_max_mtu : - ib_mtu_enum_to_int(props->max_mtu); + props->phys_mtu = hfi1_max_mtu; return 0; } @@ -1448,12 +1447,10 @@ static int shut_down_port(struct rvt_dev_info *rdi, u32 port_num) struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); struct hfi1_devdata *dd = dd_from_dev(verbs_dev); struct hfi1_pportdata *ppd = &dd->pport[port_num - 1]; - int ret; set_link_down_reason(ppd, OPA_LINKDOWN_REASON_UNKNOWN, 0, OPA_LINKDOWN_REASON_UNKNOWN); - ret = set_link_state(ppd, HLS_DN_DOWNDEF); - return ret; + return set_link_state(ppd, HLS_DN_DOWNDEF); } static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, @@ -1802,7 +1799,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ib_set_device_ops(ibdev, &hfi1_dev_ops); - strlcpy(ibdev->node_desc, init_utsname()->nodename, + strscpy(ibdev->node_desc, init_utsname()->nodename, sizeof(ibdev->node_desc)); /* diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 38565532d654..7f30f32b34dc 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -391,9 +391,6 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait); int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send); -extern const u32 rc_only_opcode; -extern const u32 uc_only_opcode; - int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet); u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig index 18d10ebf900b..ab3fbba70789 100644 --- a/drivers/infiniband/hw/hns/Kconfig +++ b/drivers/infiniband/hw/hns/Kconfig @@ -5,22 +5,9 @@ config INFINIBAND_HNS depends on ARM64 || (COMPILE_TEST && 64BIT) depends on (HNS_DSAF && HNS_ENET) || HNS3 help - This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine - is used in Hisilicon Hip06 and more further ICT SoC based on - platform device. + This is a RoCE/RDMA driver for the Hisilicon RoCE engine. - To compile HIP06 or HIP08 driver as module, choose M here. - -config INFINIBAND_HNS_HIP06 - bool "Hisilicon Hip06 Family RoCE support" - depends on INFINIBAND_HNS && HNS && HNS_DSAF && HNS_ENET - depends on INFINIBAND_HNS=m || (HNS_DSAF=y && HNS_ENET=y) - help - RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip06 and - Hip07 SoC. These RoCE engines are platform devices. - - To compile this driver, choose Y here: if INFINIBAND_HNS is m, this - module will be called hns-roce-hw-v1 + To compile HIP08 driver as module, choose M here. config INFINIBAND_HNS_HIP08 bool "Hisilicon Hip08 Family RoCE support" diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index e105945b94a1..a7d259238305 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -9,12 +9,7 @@ hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o -ifdef CONFIG_INFINIBAND_HNS_HIP06 -hns-roce-hw-v1-objs := hns_roce_hw_v1.o $(hns-roce-objs) -obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v1.o -endif - ifdef CONFIG_INFINIBAND_HNS_HIP08 -hns-roce-hw-v2-objs := hns_roce_hw_v2.o hns_roce_hw_v2_dfx.o $(hns-roce-objs) +hns-roce-hw-v2-objs := hns_roce_hw_v2.o $(hns-roce-objs) obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v2.o endif diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index cc258edec331..480c062dd04f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -30,7 +30,6 @@ * SOFTWARE. */ -#include <linux/platform_device.h> #include <linux/pci.h> #include <rdma/ib_addr.h> #include <rdma/ib_cache.h> @@ -42,9 +41,8 @@ static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr) u16 sport; if (!fl) - sport = get_random_u32() % - (IB_ROCE_UDP_ENCAP_VALID_PORT_MAX + 1 - - IB_ROCE_UDP_ENCAP_VALID_PORT_MIN) + + sport = prandom_u32_max(IB_ROCE_UDP_ENCAP_VALID_PORT_MAX + 1 - + IB_ROCE_UDP_ENCAP_VALID_PORT_MIN) + IB_ROCE_UDP_ENCAP_VALID_PORT_MIN; else sport = rdma_flow_label_to_udp_sport(fl); @@ -61,7 +59,7 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct hns_roce_ah *ah = to_hr_ah(ibah); int ret = 0; - if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 && udata) + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 && udata) return -EOPNOTSUPP; ah->av.port = rdma_ah_get_port_num(ah_attr); @@ -80,7 +78,7 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); /* HIP08 needs to record vlan info in Address Vector */ - if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08) { + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { ret = rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr, &ah->av.vlan_id, NULL); if (ret) diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index d4fa0fd52294..11a78ceae568 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -31,10 +31,9 @@ * SOFTWARE. */ -#include <linux/platform_device.h> #include <linux/vmalloc.h> -#include "hns_roce_device.h" #include <rdma/ib_umem.h> +#include "hns_roce_device.h" void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf) { diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c index 84f3f2b5f097..864413607571 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.c +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -31,7 +31,6 @@ */ #include <linux/dmapool.h> -#include <linux/platform_device.h> #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_cmd.h" @@ -39,45 +38,36 @@ #define CMD_POLL_TOKEN 0xffff #define CMD_MAX_NUM 32 -static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, u64 in_param, - u64 out_param, u32 in_modifier, - u8 op_modifier, u16 op, u16 token, - int event) +static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, + struct hns_roce_mbox_msg *mbox_msg) { - return hr_dev->hw->post_mbox(hr_dev, in_param, out_param, in_modifier, - op_modifier, op, token, event); + return hr_dev->hw->post_mbox(hr_dev, mbox_msg); } /* this should be called with "poll_sem" */ -static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, - u64 out_param, unsigned long in_modifier, - u8 op_modifier, u16 op, - unsigned int timeout) +static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, + struct hns_roce_mbox_msg *mbox_msg) { int ret; - ret = hns_roce_cmd_mbox_post_hw(hr_dev, in_param, out_param, - in_modifier, op_modifier, op, - CMD_POLL_TOKEN, 0); + ret = hns_roce_cmd_mbox_post_hw(hr_dev, mbox_msg); if (ret) { dev_err_ratelimited(hr_dev->dev, - "failed to post mailbox %x in poll mode, ret = %d.\n", - op, ret); + "failed to post mailbox 0x%x in poll mode, ret = %d.\n", + mbox_msg->cmd, ret); return ret; } - return hr_dev->hw->poll_mbox_done(hr_dev, timeout); + return hr_dev->hw->poll_mbox_done(hr_dev); } -static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, - u64 out_param, unsigned long in_modifier, - u8 op_modifier, u16 op, unsigned int timeout) +static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, + struct hns_roce_mbox_msg *mbox_msg) { int ret; down(&hr_dev->cmd.poll_sem); - ret = __hns_roce_cmd_mbox_poll(hr_dev, in_param, out_param, in_modifier, - op_modifier, op, timeout); + ret = __hns_roce_cmd_mbox_poll(hr_dev, mbox_msg); up(&hr_dev->cmd.poll_sem); return ret; @@ -91,7 +81,7 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status, if (unlikely(token != context->token)) { dev_err_ratelimited(hr_dev->dev, - "[cmd] invalid ae token %x,context token is %x!\n", + "[cmd] invalid ae token 0x%x, context token is 0x%x.\n", token, context->token); return; } @@ -101,10 +91,8 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status, complete(&context->done); } -static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, - u64 out_param, unsigned long in_modifier, - u8 op_modifier, u16 op, - unsigned int timeout) +static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, + struct hns_roce_mbox_msg *mbox_msg) { struct hns_roce_cmdq *cmd = &hr_dev->cmd; struct hns_roce_cmd_context *context; @@ -125,66 +113,70 @@ static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, reinit_completion(&context->done); - ret = hns_roce_cmd_mbox_post_hw(hr_dev, in_param, out_param, - in_modifier, op_modifier, op, - context->token, 1); + mbox_msg->token = context->token; + ret = hns_roce_cmd_mbox_post_hw(hr_dev, mbox_msg); if (ret) { dev_err_ratelimited(dev, - "failed to post mailbox %x in event mode, ret = %d.\n", - op, ret); + "failed to post mailbox 0x%x in event mode, ret = %d.\n", + mbox_msg->cmd, ret); goto out; } if (!wait_for_completion_timeout(&context->done, - msecs_to_jiffies(timeout))) { - dev_err_ratelimited(dev, "[cmd] token %x mailbox %x timeout.\n", - context->token, op); + msecs_to_jiffies(HNS_ROCE_CMD_TIMEOUT_MSECS))) { + dev_err_ratelimited(dev, "[cmd] token 0x%x mailbox 0x%x timeout.\n", + context->token, mbox_msg->cmd); ret = -EBUSY; goto out; } ret = context->result; if (ret) - dev_err_ratelimited(dev, "[cmd] token %x mailbox %x error %d\n", - context->token, op, ret); + dev_err_ratelimited(dev, "[cmd] token 0x%x mailbox 0x%x error %d.\n", + context->token, mbox_msg->cmd, ret); out: context->busy = 0; return ret; } -static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, - u64 out_param, unsigned long in_modifier, - u8 op_modifier, u16 op, unsigned int timeout) +static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, + struct hns_roce_mbox_msg *mbox_msg) { int ret; down(&hr_dev->cmd.event_sem); - ret = __hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, in_modifier, - op_modifier, op, timeout); + ret = __hns_roce_cmd_mbox_wait(hr_dev, mbox_msg); up(&hr_dev->cmd.event_sem); return ret; } int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, - unsigned long in_modifier, u8 op_modifier, u16 op, - unsigned int timeout) + u8 cmd, unsigned long tag) { + struct hns_roce_mbox_msg mbox_msg = {}; bool is_busy; if (hr_dev->hw->chk_mbox_avail) if (!hr_dev->hw->chk_mbox_avail(hr_dev, &is_busy)) return is_busy ? -EBUSY : 0; - if (hr_dev->cmd.use_events) - return hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, - in_modifier, op_modifier, op, - timeout); - else - return hns_roce_cmd_mbox_poll(hr_dev, in_param, out_param, - in_modifier, op_modifier, op, - timeout); + mbox_msg.in_param = in_param; + mbox_msg.out_param = out_param; + mbox_msg.cmd = cmd; + mbox_msg.tag = tag; + + if (hr_dev->cmd.use_events) { + mbox_msg.event_en = 1; + + return hns_roce_cmd_mbox_wait(hr_dev, &mbox_msg); + } else { + mbox_msg.event_en = 0; + mbox_msg.token = CMD_POLL_TOKEN; + + return hns_roce_cmd_mbox_poll(hr_dev, &mbox_msg); + } } int hns_roce_cmd_init(struct hns_roce_dev *hr_dev) @@ -270,3 +262,15 @@ void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev, dma_pool_free(hr_dev->cmd.pool, mailbox->buf, mailbox->dma); kfree(mailbox); } + +int hns_roce_create_hw_ctx(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + u8 cmd, unsigned long idx) +{ + return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cmd, idx); +} + +int hns_roce_destroy_hw_ctx(struct hns_roce_dev *dev, u8 cmd, unsigned long idx) +{ + return hns_roce_cmd_mbox(dev, 0, 0, cmd, idx); +} diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index 8025e7f657fa..052a3d60905a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -140,12 +140,16 @@ enum { }; int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, - unsigned long in_modifier, u8 op_modifier, u16 op, - unsigned int timeout); + u8 cmd, unsigned long tag); struct hns_roce_cmd_mailbox * hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev); void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev, struct hns_roce_cmd_mailbox *mailbox); +int hns_roce_create_hw_ctx(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + u8 cmd, unsigned long idx); +int hns_roce_destroy_hw_ctx(struct hns_roce_dev *dev, u8 cmd, + unsigned long idx); #endif /* _HNS_ROCE_CMD_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index b73e55de83ac..465d1f914b6c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -104,208 +104,6 @@ #define hr_reg_read(ptr, field) _hr_reg_read(ptr, field) -#define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3 -#define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4 - -#define ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S 5 - -#define ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S 6 - -#define ROCEE_GLB_CFG_ROCEE_PORT_ST_S 10 -#define ROCEE_GLB_CFG_ROCEE_PORT_ST_M \ - (((1UL << 6) - 1) << ROCEE_GLB_CFG_ROCEE_PORT_ST_S) - -#define ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S 16 - -#define ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S 0 -#define ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M \ - (((1UL << 24) - 1) << ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S) - -#define ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S 24 -#define ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M \ - (((1UL << 4) - 1) << ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S) - -#define ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S 0 -#define ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M \ - (((1UL << 24) - 1) << ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S) - -#define ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S 24 -#define ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M \ - (((1UL << 4) - 1) << ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S) - -#define ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S 0 -#define ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M \ - (((1UL << 16) - 1) << ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S) - -#define ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S 16 -#define ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M \ - (((1UL << 16) - 1) << ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S) - -#define ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S 0 -#define ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M \ - (((1UL << 16) - 1) << ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S) - -#define ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S 16 -#define ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M \ - (((1UL << 16) - 1) << ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S) - -#define ROCEE_RAQ_WL_ROCEE_RAQ_WL_S 0 -#define ROCEE_RAQ_WL_ROCEE_RAQ_WL_M \ - (((1UL << 8) - 1) << ROCEE_RAQ_WL_ROCEE_RAQ_WL_S) - -#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S 0 -#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_M \ - (((1UL << 15) - 1) << \ - ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S) - -#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S 16 -#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_M \ - (((1UL << 4) - 1) << \ - ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S) - -#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_EN_S 20 - -#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE 21 - -#define ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S 0 -#define ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M \ - (((1UL << 5) - 1) << ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S) - -#define ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S 5 -#define ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M \ - (((1UL << 5) - 1) << ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S) - -#define ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S 0 -#define ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M \ - (((1UL << 5) - 1) << ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S) - -#define ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S 5 -#define ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M \ - (((1UL << 5) - 1) << ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S) - -#define ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S 0 -#define ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M \ - (((1UL << 5) - 1) << ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S) - -#define ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S 8 -#define ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M \ - (((1UL << 5) - 1) << ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S) - -#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S 0 -#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M \ - (((1UL << 19) - 1) << ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S) - -#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_S 19 - -#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S 20 -#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M \ - (((1UL << 2) - 1) << ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S) - -#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S 22 -#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M \ - (((1UL << 5) - 1) << ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S) - -#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S 31 - -#define ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S 0 -#define ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M \ - (((1UL << 3) - 1) << ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S) - -#define ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S 0 -#define ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M \ - (((1UL << 15) - 1) << ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S) - -#define ROCEE_MB6_ROCEE_MB_CMD_S 0 -#define ROCEE_MB6_ROCEE_MB_CMD_M \ - (((1UL << 8) - 1) << ROCEE_MB6_ROCEE_MB_CMD_S) - -#define ROCEE_MB6_ROCEE_MB_CMD_MDF_S 8 -#define ROCEE_MB6_ROCEE_MB_CMD_MDF_M \ - (((1UL << 4) - 1) << ROCEE_MB6_ROCEE_MB_CMD_MDF_S) - -#define ROCEE_MB6_ROCEE_MB_EVENT_S 14 - -#define ROCEE_MB6_ROCEE_MB_HW_RUN_S 15 - -#define ROCEE_MB6_ROCEE_MB_TOKEN_S 16 -#define ROCEE_MB6_ROCEE_MB_TOKEN_M \ - (((1UL << 16) - 1) << ROCEE_MB6_ROCEE_MB_TOKEN_S) - -#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S 0 -#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_M \ - (((1UL << 24) - 1) << ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S) - -#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S 24 -#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_M \ - (((1UL << 4) - 1) << ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S) - -#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S 28 -#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M \ - (((1UL << 3) - 1) << ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S) - -#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S 31 - -#define ROCEE_SMAC_H_ROCEE_SMAC_H_S 0 -#define ROCEE_SMAC_H_ROCEE_SMAC_H_M \ - (((1UL << 16) - 1) << ROCEE_SMAC_H_ROCEE_SMAC_H_S) - -#define ROCEE_SMAC_H_ROCEE_PORT_MTU_S 16 -#define ROCEE_SMAC_H_ROCEE_PORT_MTU_M \ - (((1UL << 4) - 1) << ROCEE_SMAC_H_ROCEE_PORT_MTU_S) - -#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S 0 -#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M \ - (((1UL << 2) - 1) << ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S) - -#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S 8 -#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M \ - (((1UL << 4) - 1) << ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S) - -#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQ_ALM_OVF_INT_ST_S 17 - -#define ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S 0 -#define ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M \ - (((1UL << 5) - 1) << ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S) - -#define ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S 16 -#define ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M \ - (((1UL << 16) - 1) << ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S) - -#define ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S 0 -#define ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M \ - (((1UL << 16) - 1) << ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S) - -#define ROCEE_CAEP_CEQC_SHIFT_CAEP_CEQ_ALM_OVF_INT_ST_S 16 -#define ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S 1 -#define ROCEE_CAEP_CEQ_ALM_OVF_CAEP_CEQ_ALM_OVF_S 0 - -#define ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S 0 -#define ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S 1 - -#define ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S 0 - -#define ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S 0 -#define ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M \ - (((1UL << 28) - 1) << ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S) - -#define ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S 0 -#define ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M \ - (((1UL << 28) - 1) << ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) - -#define ROCEE_SDB_INV_CNT_SDB_INV_CNT_S 0 -#define ROCEE_SDB_INV_CNT_SDB_INV_CNT_M \ - (((1UL << 16) - 1) << ROCEE_SDB_INV_CNT_SDB_INV_CNT_S) - -#define ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S 0 -#define ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M \ - (((1UL << 16) - 1) << ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S) - -#define ROCEE_SDB_CNT_CMP_BITS 16 - -#define ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S 20 - -#define ROCEE_CNT_CLR_CE_CNT_CLR_CE_S 0 - /*************ROCEE_REG DEFINITION****************/ #define ROCEE_VENDOR_ID_REG 0x0 #define ROCEE_VENDOR_PART_ID_REG 0x4 diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index d763f097599f..736dc2f993b4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -30,7 +30,6 @@ * SOFTWARE. */ -#include <linux/platform_device.h> #include <rdma/ib_umem.h> #include <rdma/uverbs_ioctl.h> #include "hns_roce_device.h" @@ -101,12 +100,39 @@ static void free_cqn(struct hns_roce_dev *hr_dev, unsigned long cqn) mutex_unlock(&cq_table->bank_mutex); } +static int hns_roce_create_cqc(struct hns_roce_dev *hr_dev, + struct hns_roce_cq *hr_cq, + u64 *mtts, dma_addr_t dma_handle) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_cmd_mailbox *mailbox; + int ret; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) { + ibdev_err(ibdev, "failed to alloc mailbox for CQC.\n"); + return PTR_ERR(mailbox); + } + + hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle); + + ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_CQC, + hr_cq->cqn); + if (ret) + ibdev_err(ibdev, + "failed to send create cmd for CQ(0x%lx), ret = %d.\n", + hr_cq->cqn, ret); + + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + + return ret; +} + static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_cmd_mailbox *mailbox; - u64 mtts[MTT_MIN_COUNT] = { 0 }; + u64 mtts[MTT_MIN_COUNT] = {}; dma_addr_t dma_handle; int ret; @@ -122,7 +148,7 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) if (ret) { ibdev_err(ibdev, "failed to get CQ(0x%lx) context, ret = %d.\n", hr_cq->cqn, ret); - goto err_out; + return ret; } ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL)); @@ -131,41 +157,17 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) goto err_put; } - /* Allocate mailbox memory */ - mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) { - ret = PTR_ERR(mailbox); - goto err_xa; - } - - hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle); - - /* Send mailbox to hw */ - ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_cq->cqn, 0, - HNS_ROCE_CMD_CREATE_CQC, HNS_ROCE_CMD_TIMEOUT_MSECS); - hns_roce_free_cmd_mailbox(hr_dev, mailbox); - if (ret) { - ibdev_err(ibdev, - "failed to send create cmd for CQ(0x%lx), ret = %d.\n", - hr_cq->cqn, ret); + ret = hns_roce_create_cqc(hr_dev, hr_cq, mtts, dma_handle); + if (ret) goto err_xa; - } - - hr_cq->cons_index = 0; - hr_cq->arm_sn = 1; - - refcount_set(&hr_cq->refcount, 1); - init_completion(&hr_cq->free); return 0; err_xa: xa_erase(&cq_table->array, hr_cq->cqn); - err_put: hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); -err_out: return ret; } @@ -175,9 +177,8 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) struct device *dev = hr_dev->dev; int ret; - ret = hns_roce_cmd_mbox(hr_dev, 0, 0, hr_cq->cqn, 1, - HNS_ROCE_CMD_DESTROY_CQC, - HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_CQC, + hr_cq->cqn); if (ret) dev_err(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret, hr_cq->cqn); @@ -406,15 +407,6 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, goto err_cqn; } - /* - * For the QP created by kernel space, tptr value should be initialized - * to zero; For the QP created by user space, it will cause synchronous - * problems if tptr is set to zero here, so we initialize it in user - * space. - */ - if (!udata && hr_cq->tptr_addr) - *hr_cq->tptr_addr = 0; - if (udata) { resp.cqn = hr_cq->cqn; ret = ib_copy_to_udata(udata, &resp, @@ -423,6 +415,11 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, goto err_cqc; } + hr_cq->cons_index = 0; + hr_cq->arm_sn = 1; + refcount_set(&hr_cq->refcount, 1); + init_completion(&hr_cq->free); + return 0; err_cqc: @@ -441,9 +438,6 @@ int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); - if (hr_dev->hw->destroy_cq) - hr_dev->hw->destroy_cq(ib_cq, udata); - free_cqc(hr_dev, hr_cq); free_cqn(hr_dev, hr_cq->cqn); free_cq_db(hr_dev, hr_cq, udata); @@ -460,7 +454,7 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn) hr_cq = xa_load(&hr_dev->cq_table.array, cqn & (hr_dev->caps.num_cqs - 1)); if (!hr_cq) { - dev_warn(hr_dev->dev, "Completion event for bogus CQ 0x%06x\n", + dev_warn(hr_dev->dev, "completion event for bogus CQ 0x%06x\n", cqn); return; } @@ -481,14 +475,14 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type) hr_cq = xa_load(&hr_dev->cq_table.array, cqn & (hr_dev->caps.num_cqs - 1)); if (!hr_cq) { - dev_warn(dev, "Async event for bogus CQ 0x%06x\n", cqn); + dev_warn(dev, "async event for bogus CQ 0x%06x\n", cqn); return; } if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID && event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR && event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) { - dev_err(dev, "Unexpected event type 0x%x on CQ 0x%06x\n", + dev_err(dev, "unexpected event type 0x%x on CQ 0x%06x\n", event_type, cqn); return; } diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c index 751470c7a2ce..5c4c0480832b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_db.c +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -4,7 +4,6 @@ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. */ -#include <linux/platform_device.h> #include <rdma/ib_umem.h> #include "hns_roce_device.h" diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 43e17d61cb63..723e55a7de8d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -36,36 +36,18 @@ #include <rdma/ib_verbs.h> #include <rdma/hns-abi.h> -#define DRV_NAME "hns_roce" - #define PCI_REVISION_ID_HIP08 0x21 #define PCI_REVISION_ID_HIP09 0x30 -#define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6') - #define HNS_ROCE_MAX_MSG_LEN 0x80000000 #define HNS_ROCE_IB_MIN_SQ_STRIDE 6 #define BA_BYTE_LEN 8 -/* Hardware specification only for v1 engine */ #define HNS_ROCE_MIN_CQE_NUM 0x40 -#define HNS_ROCE_MIN_WQE_NUM 0x20 #define HNS_ROCE_MIN_SRQ_WQE_NUM 1 -/* Hardware specification only for v1 engine */ -#define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7 -#define HNS_ROCE_MAX_MTPT_PBL_NUM 0x100000 - -#define HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS 20 -#define HNS_ROCE_MAX_FREE_CQ_WAIT_CNT \ - (5000 / HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS) -#define HNS_ROCE_CQE_WCMD_EMPTY_BIT 0x2 -#define HNS_ROCE_MIN_CQE_CNT 16 - -#define HNS_ROCE_RESERVED_SGE 1 - #define HNS_ROCE_MAX_IRQ_NUM 128 #define HNS_ROCE_SGE_IN_WQE 2 @@ -102,18 +84,12 @@ #define HNS_ROCE_FRMR_MAX_PA 512 #define PKEY_ID 0xffff -#define GUID_LEN 8 #define NODE_DESC_SIZE 64 #define DB_REG_OFFSET 0x1000 /* Configure to HW for PAGE_SIZE larger than 4KB */ #define PG_SHIFT_OFFSET (PAGE_SHIFT - 12) -#define PAGES_SHIFT_8 8 -#define PAGES_SHIFT_16 16 -#define PAGES_SHIFT_24 24 -#define PAGES_SHIFT_32 32 - #define HNS_ROCE_IDX_QUE_ENTRY_SZ 4 #define SRQ_DB_REG 0x230 @@ -122,11 +98,6 @@ #define CQ_BANKID_SHIFT 2 -/* The chip implementation of the consumer index is calculated - * according to twice the actual EQ depth - */ -#define EQ_DEPTH_COEFF 2 - enum { SERV_TYPE_RC, SERV_TYPE_UC, @@ -135,16 +106,6 @@ enum { SERV_TYPE_XRC = 5, }; -enum hns_roce_qp_state { - HNS_ROCE_QP_STATE_RST, - HNS_ROCE_QP_STATE_INIT, - HNS_ROCE_QP_STATE_RTR, - HNS_ROCE_QP_STATE_RTS, - HNS_ROCE_QP_STATE_SQD, - HNS_ROCE_QP_STATE_ERR, - HNS_ROCE_QP_NUM_STATE, -}; - enum hns_roce_event { HNS_ROCE_EVENT_TYPE_PATH_MIG = 0x01, HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED = 0x02, @@ -168,8 +129,6 @@ enum hns_roce_event { HNS_ROCE_EVENT_TYPE_INVALID_XRCETH = 0x17, }; -#define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12 - enum { HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0), HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1), @@ -182,6 +141,7 @@ enum { HNS_ROCE_CAP_FLAG_FRMR = BIT(8), HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL = BIT(9), HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10), + HNS_ROCE_CAP_FLAG_DIRECT_WQE = BIT(12), HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14), HNS_ROCE_CAP_FLAG_STASH = BIT(17), }; @@ -227,7 +187,7 @@ struct hns_roce_uar { enum hns_roce_mmap_type { HNS_ROCE_MMAP_TYPE_DB = 1, - HNS_ROCE_MMAP_TYPE_TPTR, + HNS_ROCE_MMAP_TYPE_DWQE, }; struct hns_user_mmap_entry { @@ -242,7 +202,6 @@ struct hns_roce_ucontext { struct list_head page_list; struct mutex page_mutex; struct hns_user_mmap_entry *db_mmap_entry; - struct hns_user_mmap_entry *tptr_mmap_entry; }; struct hns_roce_pd { @@ -281,7 +240,6 @@ struct hns_roce_hem_table { /* Single obj size */ unsigned long obj_size; unsigned long table_chunk_size; - int lowmem; struct mutex mutex; struct hns_roce_hem **hem; u64 **bt_l1; @@ -345,19 +303,16 @@ struct hns_roce_mw { u32 pbl_buf_pg_sz; }; -/* Only support 4K page size for mr register */ -#define MR_SIZE_4K 0 - struct hns_roce_mr { struct ib_mr ibmr; u64 iova; /* MR's virtual original addr */ u64 size; /* Address range of MR */ u32 key; /* Key of MR */ u32 pd; /* PD num of MR */ - u32 access; /* Access permission of MR */ + u32 access; /* Access permission of MR */ int enabled; /* MR's active status */ - int type; /* MR's register type */ - u32 pbl_hop_num; /* multi-hop number */ + int type; /* MR's register type */ + u32 pbl_hop_num; /* multi-hop number */ struct hns_roce_mtr pbl_mtr; u32 npages; dma_addr_t *page_list; @@ -374,17 +329,17 @@ struct hns_roce_wq { u32 wqe_cnt; /* WQE num */ u32 max_gs; u32 rsv_sge; - int offset; - int wqe_shift; /* WQE size */ + u32 offset; + u32 wqe_shift; /* WQE size */ u32 head; u32 tail; void __iomem *db_reg; }; struct hns_roce_sge { - unsigned int sge_cnt; /* SGE num */ - int offset; - int sge_shift; /* SGE size */ + unsigned int sge_cnt; /* SGE num */ + u32 offset; + u32 sge_shift; /* SGE size */ }; struct hns_roce_buf_list { @@ -453,7 +408,6 @@ struct hns_roce_cq { u32 cons_index; u32 *set_ci_db; void __iomem *db_reg; - u16 *tptr_addr; int arm_sn; int cqe_size; unsigned long cqn; @@ -468,7 +422,7 @@ struct hns_roce_cq { struct hns_roce_idx_que { struct hns_roce_mtr mtr; - int entry_shift; + u32 entry_shift; unsigned long *bitmap; u32 head; u32 tail; @@ -480,7 +434,7 @@ struct hns_roce_srq { u32 wqe_cnt; int max_gs; u32 rsv_sge; - int wqe_shift; + u32 wqe_shift; u32 cqn; u32 xrcdn; void __iomem *db_reg; @@ -539,10 +493,6 @@ struct hns_roce_srq_table { struct hns_roce_hem_table table; }; -struct hns_roce_raq_table { - struct hns_roce_buf_list *e_raq_buf; -}; - struct hns_roce_av { u8 port; u8 gid_index; @@ -572,6 +522,11 @@ struct hns_roce_cmd_context { u16 busy; }; +enum hns_roce_cmdq_state { + HNS_ROCE_CMDQ_STATE_NORMAL, + HNS_ROCE_CMDQ_STATE_FATAL_ERR, +}; + struct hns_roce_cmdq { struct dma_pool *pool; struct semaphore poll_sem; @@ -591,6 +546,7 @@ struct hns_roce_cmdq { * close device, switch into poll mode(non event mode) */ u8 use_events; + enum hns_roce_cmdq_state state; }; struct hns_roce_cmd_mailbox { @@ -598,6 +554,15 @@ struct hns_roce_cmd_mailbox { dma_addr_t dma; }; +struct hns_roce_mbox_msg { + u64 in_param; + u64 out_param; + u8 cmd; + u32 tag; + u16 token; + u8 event_en; +}; + struct hns_roce_dev; struct hns_roce_rinl_sge { @@ -627,17 +592,12 @@ struct hns_roce_work { u32 queue_num; }; -enum { - HNS_ROCE_QP_CAP_DIRECT_WQE = BIT(5), -}; - struct hns_roce_qp { struct ib_qp ibqp; struct hns_roce_wq rq; struct hns_roce_db rdb; struct hns_roce_db sdb; unsigned long en_flags; - u32 doorbell_qpn; enum ib_sig_type sq_signal_bits; struct hns_roce_wq sq; @@ -650,9 +610,7 @@ struct hns_roce_qp { u8 sl; u8 resp_depth; u8 state; - u32 access_flags; u32 atomic_rd_en; - u32 pkey_index; u32 qkey; void (*event)(struct hns_roce_qp *qp, enum hns_roce_event event_type); @@ -667,14 +625,16 @@ struct hns_roce_qp { u32 next_sge; enum ib_mtu path_mtu; u32 max_inline_data; + u8 free_mr_en; /* 0: flush needed, 1: unneeded */ unsigned long flush_flag; struct hns_roce_work flush_work; struct hns_roce_rinl_buf rq_inl_buf; - struct list_head node; /* all qps are on a list */ - struct list_head rq_node; /* all recv qps are on a list */ - struct list_head sq_node; /* all send qps are on a list */ + struct list_head node; /* all qps are on a list */ + struct list_head rq_node; /* all recv qps are on a list */ + struct list_head sq_node; /* all send qps are on a list */ + struct hns_user_mmap_entry *dwqe_mmap_entry; }; struct hns_roce_ib_iboe { @@ -684,16 +644,16 @@ struct hns_roce_ib_iboe { u8 phy_port[HNS_ROCE_MAX_PORTS]; }; -enum { - HNS_ROCE_EQ_STAT_INVALID = 0, - HNS_ROCE_EQ_STAT_VALID = 2, -}; - struct hns_roce_ceqe { __le32 comp; __le32 rsv[15]; }; +#define CEQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_ceqe, h, l) + +#define CEQE_CQN CEQE_FIELD_LOC(23, 0) +#define CEQE_OWNER CEQE_FIELD_LOC(31, 31) + struct hns_roce_aeqe { __le32 asyn; union { @@ -713,6 +673,13 @@ struct hns_roce_aeqe { __le32 rsv[12]; }; +#define AEQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_aeqe, h, l) + +#define AEQE_EVENT_TYPE AEQE_FIELD_LOC(7, 0) +#define AEQE_SUB_TYPE AEQE_FIELD_LOC(15, 8) +#define AEQE_OWNER AEQE_FIELD_LOC(31, 31) +#define AEQE_EVENT_QUEUE_NUM AEQE_FIELD_LOC(55, 32) + struct hns_roce_eq { struct hns_roce_dev *hr_dev; void __iomem *db_reg; @@ -720,12 +687,9 @@ struct hns_roce_eq { int type_flag; /* Aeq:1 ceq:0 */ int eqn; u32 entries; - u32 log_entries; int eqe_size; int irq; - int log_page_size; u32 cons_index; - struct hns_roce_buf_list *buf_list; int over_ignore; int coalesce; int arm_st; @@ -740,7 +704,6 @@ struct hns_roce_eq { struct hns_roce_eq_table { struct hns_roce_eq *eq; - void __iomem **eqc_base; /* only for hw v1 */ }; enum cong_type { @@ -761,19 +724,17 @@ struct hns_roce_caps { u32 max_sq_sg; u32 max_sq_inline; u32 max_rq_sg; - u32 max_extend_sg; + u32 rsv0; u32 num_qps; u32 num_pi_qps; u32 reserved_qps; - int num_qpc_timer; - int num_cqc_timer; - int num_srqs; + u32 num_srqs; u32 max_wqes; u32 max_srq_wrs; u32 max_srq_sges; u32 max_sq_desc_sz; u32 max_rq_desc_sz; - u32 max_srq_desc_sz; + u32 rsv2; int max_qp_init_rdma; int max_qp_dest_rdma; u32 num_cqs; @@ -781,12 +742,12 @@ struct hns_roce_caps { u32 min_cqes; u32 min_wqes; u32 reserved_cqs; - int reserved_srqs; + u32 reserved_srqs; int num_aeq_vectors; int num_comp_vectors; int num_other_vectors; u32 num_mtpts; - u32 num_mtt_segs; + u32 rsv1; u32 num_srqwqe_segs; u32 num_idx_segs; int reserved_mrws; @@ -855,7 +816,7 @@ struct hns_roce_caps { u32 cqc_timer_ba_pg_sz; u32 cqc_timer_buf_pg_sz; u32 cqc_timer_hop_num; - u32 cqe_ba_pg_sz; /* page_size = 4K*(2^cqe_ba_pg_sz) */ + u32 cqe_ba_pg_sz; /* page_size = 4K*(2^cqe_ba_pg_sz) */ u32 cqe_buf_pg_sz; u32 cqe_hop_num; u32 srqwqe_ba_pg_sz; @@ -874,7 +835,7 @@ struct hns_roce_caps { u32 gmv_hop_num; u32 sl_num; u32 llm_buf_pg_sz; - u32 chunk_sz; /* chunk size in non multihop mode */ + u32 chunk_sz; /* chunk size in non multihop mode */ u64 flags; u16 default_ceq_max_cnt; u16 default_ceq_period; @@ -885,11 +846,6 @@ struct hns_roce_caps { enum cong_type cong_type; }; -struct hns_roce_dfx_hw { - int (*query_cqc_info)(struct hns_roce_dev *hr_dev, u32 cqn, - int *buffer); -}; - enum hns_roce_device_state { HNS_ROCE_DEVICE_STATE_INITED, HNS_ROCE_DEVICE_STATE_RST_DOWN, @@ -897,26 +853,21 @@ enum hns_roce_device_state { }; struct hns_roce_hw { - int (*reset)(struct hns_roce_dev *hr_dev, bool enable); int (*cmq_init)(struct hns_roce_dev *hr_dev); void (*cmq_exit)(struct hns_roce_dev *hr_dev); int (*hw_profile)(struct hns_roce_dev *hr_dev); int (*hw_init)(struct hns_roce_dev *hr_dev); void (*hw_exit)(struct hns_roce_dev *hr_dev); - int (*post_mbox)(struct hns_roce_dev *hr_dev, u64 in_param, - u64 out_param, u32 in_modifier, u8 op_modifier, u16 op, - u16 token, int event); - int (*poll_mbox_done)(struct hns_roce_dev *hr_dev, - unsigned int timeout); + int (*post_mbox)(struct hns_roce_dev *hr_dev, + struct hns_roce_mbox_msg *mbox_msg); + int (*poll_mbox_done)(struct hns_roce_dev *hr_dev); bool (*chk_mbox_avail)(struct hns_roce_dev *hr_dev, bool *is_busy); - int (*set_gid)(struct hns_roce_dev *hr_dev, u32 port, int gid_index, + int (*set_gid)(struct hns_roce_dev *hr_dev, int gid_index, const union ib_gid *gid, const struct ib_gid_attr *attr); int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, const u8 *addr); - void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port, - enum ib_mtu mtu); int (*write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf, - struct hns_roce_mr *mr, unsigned long mtpt_idx); + struct hns_roce_mr *mr); int (*rereg_write_mtpt)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, int flags, void *mb_buf); @@ -927,34 +878,33 @@ struct hns_roce_hw { struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts, dma_addr_t dma_handle); int (*set_hem)(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_table *table, int obj, int step_idx); + struct hns_roce_hem_table *table, int obj, u32 step_idx); int (*clear_hem)(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, int obj, - int step_idx); + u32 step_idx); int (*modify_qp)(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state); int (*qp_flow_control_init)(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); - int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, - struct ib_udata *udata); - int (*destroy_cq)(struct ib_cq *ibcq, struct ib_udata *udata); + void (*dereg_mr)(struct hns_roce_dev *hr_dev); int (*init_eq)(struct hns_roce_dev *hr_dev); void (*cleanup_eq)(struct hns_roce_dev *hr_dev); int (*write_srqc)(struct hns_roce_srq *srq, void *mb_buf); + int (*query_cqc)(struct hns_roce_dev *hr_dev, u32 cqn, void *buffer); + int (*query_qpc)(struct hns_roce_dev *hr_dev, u32 qpn, void *buffer); + int (*query_mpt)(struct hns_roce_dev *hr_dev, u32 key, void *buffer); const struct ib_device_ops *hns_roce_dev_ops; const struct ib_device_ops *hns_roce_dev_srq_ops; }; struct hns_roce_dev { struct ib_device ib_dev; - struct platform_device *pdev; struct pci_dev *pci_dev; struct device *dev; struct hns_roce_uar priv_uar; const char *irq_names[HNS_ROCE_MAX_IRQ_NUM]; spinlock_t sm_lock; - spinlock_t bt_cmd_lock; bool active; bool is_reset; bool dis_db; @@ -1001,15 +951,14 @@ struct hns_roce_dev { int loop_idc; u32 sdb_offset; u32 odb_offset; - dma_addr_t tptr_dma_addr; /* only for hw v1 */ - u32 tptr_size; /* only for hw v1 */ const struct hns_roce_hw *hw; void *priv; struct workqueue_struct *irq_workq; - const struct hns_roce_dfx_hw *dfx; + struct work_struct ecc_work; u32 func_num; u32 is_vf; u32 cong_algo_tmpl_id; + u64 dwqe_page; }; static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) @@ -1158,7 +1107,7 @@ void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev); /* hns roce hw need current block and next block addr from mtt */ #define MTT_MIN_COUNT 2 int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr); + u32 offset, u64 *mtt_buf, int mtt_max, u64 *base_addr); int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, struct hns_roce_buf_attr *buf_attr, unsigned int page_shift, struct ib_udata *udata, @@ -1205,9 +1154,6 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); -int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long mpt_index); unsigned long key_to_hw_index(u32 key); int hns_roce_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); @@ -1245,7 +1191,6 @@ void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n); void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n); bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq, struct ib_cq *ib_cq); -enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state); void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq); void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, @@ -1277,8 +1222,12 @@ u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u32 port, int gid_index); void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); -int hns_roce_fill_res_cq_entry(struct sk_buff *msg, - struct ib_cq *ib_cq); +int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq); +int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq); +int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp); +int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp); +int hns_roce_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr); +int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr); struct hns_user_mmap_entry * hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, size_t length, diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index fa15d79eabb3..aa8a08d1c014 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -31,7 +31,6 @@ * SOFTWARE. */ -#include <linux/platform_device.h> #include "hns_roce_device.h" #include "hns_roce_hem.h" #include "hns_roce_common.h" @@ -456,7 +455,7 @@ static int alloc_mhop_hem(struct hns_roce_dev *hr_dev, * alloc bt space chunk for MTT/CQE. */ size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size : bt_size; - flag = (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) | __GFP_NOWARN; + flag = GFP_KERNEL | __GFP_NOWARN; table->hem[index->buf] = hns_roce_alloc_hem(hr_dev, size >> PAGE_SHIFT, size, flag); if (!table->hem[index->buf]) { @@ -489,7 +488,7 @@ static int set_mhop_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_index *index) { struct ib_device *ibdev = &hr_dev->ib_dev; - int step_idx; + u32 step_idx; int ret = 0; if (index->inited & HEM_INDEX_L0) { @@ -589,8 +588,7 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, table->hem[i] = hns_roce_alloc_hem(hr_dev, table->table_chunk_size >> PAGE_SHIFT, table->table_chunk_size, - (table->lowmem ? GFP_KERNEL : - GFP_HIGHUSER) | __GFP_NOWARN); + GFP_KERNEL | __GFP_NOWARN); if (!table->hem[i]) { ret = -ENOMEM; goto out; @@ -619,7 +617,7 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev, struct ib_device *ibdev = &hr_dev->ib_dev; u32 hop_num = mhop->hop_num; u32 chunk_ba_num; - int step_idx; + u32 step_idx; index->inited = HEM_INDEX_BUF; chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN; @@ -726,9 +724,6 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, int length; int i, j; - if (!table->lowmem) - return NULL; - mutex_lock(&table->mutex); if (!hns_roce_check_whether_mhop(hr_dev, table->type)) { @@ -784,8 +779,7 @@ out: int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, u32 type, - unsigned long obj_size, unsigned long nobj, - int use_lowmem) + unsigned long obj_size, unsigned long nobj) { unsigned long obj_per_chunk; unsigned long num_hem; @@ -862,7 +856,6 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, table->type = type; table->num_hem = num_hem; table->obj_size = obj_size; - table->lowmem = use_lowmem; mutex_init(&table->mutex); return 0; @@ -933,7 +926,7 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, if (table->hem[i]) { if (hr_dev->hw->clear_hem(hr_dev, table, i * table->table_chunk_size / table->obj_size, 0)) - dev_err(dev, "Clear HEM base address failed.\n"); + dev_err(dev, "clear HEM base address failed.\n"); hns_roce_free_hem(hr_dev, table->hem[i]); } @@ -987,7 +980,7 @@ struct hns_roce_hem_head { static struct hns_roce_hem_item * hem_list_alloc_item(struct hns_roce_dev *hr_dev, int start, int end, int count, - bool exist_bt, int bt_level) + bool exist_bt) { struct hns_roce_hem_item *hem; @@ -1196,7 +1189,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, start_aligned = (distance / step) * step + r->offset; end = min_t(int, start_aligned + step - 1, max_ofs); cur = hem_list_alloc_item(hr_dev, start_aligned, end, unit, - true, level); + true); if (!cur) { ret = -ENOMEM; goto err_exit; @@ -1248,7 +1241,7 @@ alloc_root_hem(struct hns_roce_dev *hr_dev, int unit, int *max_ba_num, /* indicate to last region */ r = ®ions[region_cnt - 1]; hem = hem_list_alloc_item(hr_dev, offset, r->offset + r->count - 1, - ba_num, true, 0); + ba_num, true); if (!hem) return ERR_PTR(-ENOMEM); @@ -1265,7 +1258,7 @@ static int alloc_fake_root_bt(struct hns_roce_dev *hr_dev, void *cpu_base, struct hns_roce_hem_item *hem; hem = hem_list_alloc_item(hr_dev, r->offset, r->offset + r->count - 1, - r->count, false, 0); + r->count, false); if (!hem) return -ENOMEM; @@ -1422,7 +1415,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, &hem_list->btm_bt); if (ret) { dev_err(hr_dev->dev, - "alloc hem trunk fail ret=%d!\n", ret); + "alloc hem trunk fail ret = %d!\n", ret); goto err_alloc; } } @@ -1431,7 +1424,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, ret = hem_list_alloc_root_bt(hr_dev, hem_list, unit, regions, region_cnt); if (ret) - dev_err(hr_dev->dev, "alloc hem root fail ret=%d!\n", ret); + dev_err(hr_dev->dev, "alloc hem root fail ret = %d!\n", ret); else return 0; @@ -1469,19 +1462,17 @@ void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list) void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, - int offset, int *mtt_cnt, u64 *phy_addr) + int offset, int *mtt_cnt) { struct list_head *head = &hem_list->btm_bt; struct hns_roce_hem_item *hem, *temp_hem; void *cpu_base = NULL; - u64 phy_base = 0; int nr = 0; list_for_each_entry_safe(hem, temp_hem, head, sibling) { if (hem_list_page_is_in_range(hem, offset)) { nr = offset - hem->start; cpu_base = hem->addr + nr * BA_BYTE_LEN; - phy_base = hem->dma_addr + nr * BA_BYTE_LEN; nr = hem->end + 1 - offset; break; } @@ -1490,8 +1481,5 @@ void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, if (mtt_cnt) *mtt_cnt = nr; - if (phy_addr) - *phy_addr = phy_base; - return cpu_base; } diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index 2d84a6b3f05d..7d23d3c51da4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -111,8 +111,7 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, dma_addr_t *dma_handle); int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, u32 type, - unsigned long obj_size, unsigned long nobj, - int use_lowmem); + unsigned long obj_size, unsigned long nobj); void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table); void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev); @@ -132,7 +131,7 @@ void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list); void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, - int offset, int *mtt_cnt, u64 *phy_addr); + int offset, int *mtt_cnt); static inline void hns_roce_hem_first(struct hns_roce_hem *hem, struct hns_roce_hem_iter *iter) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c deleted file mode 100644 index f4af3992ba95..000000000000 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ /dev/null @@ -1,4675 +0,0 @@ -/* - * Copyright (c) 2016 Hisilicon Limited. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/platform_device.h> -#include <linux/acpi.h> -#include <linux/etherdevice.h> -#include <linux/interrupt.h> -#include <linux/of.h> -#include <linux/of_platform.h> -#include <rdma/ib_umem.h> -#include "hns_roce_common.h" -#include "hns_roce_device.h" -#include "hns_roce_cmd.h" -#include "hns_roce_hem.h" -#include "hns_roce_hw_v1.h" - -/** - * hns_get_gid_index - Get gid index. - * @hr_dev: pointer to structure hns_roce_dev. - * @port: port, value range: 0 ~ MAX - * @gid_index: gid_index, value range: 0 ~ MAX - * Description: - * N ports shared gids, allocation method as follow: - * GID[0][0], GID[1][0],.....GID[N - 1][0], - * GID[0][0], GID[1][0],.....GID[N - 1][0], - * And so on - */ -u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u32 port, int gid_index) -{ - return gid_index * hr_dev->caps.num_ports + port; -} - -static void set_data_seg(struct hns_roce_wqe_data_seg *dseg, struct ib_sge *sg) -{ - dseg->lkey = cpu_to_le32(sg->lkey); - dseg->addr = cpu_to_le64(sg->addr); - dseg->len = cpu_to_le32(sg->length); -} - -static void set_raddr_seg(struct hns_roce_wqe_raddr_seg *rseg, u64 remote_addr, - u32 rkey) -{ - rseg->raddr = cpu_to_le64(remote_addr); - rseg->rkey = cpu_to_le32(rkey); - rseg->len = 0; -} - -static int hns_roce_v1_post_send(struct ib_qp *ibqp, - const struct ib_send_wr *wr, - const struct ib_send_wr **bad_wr) -{ - struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); - struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah); - struct hns_roce_ud_send_wqe *ud_sq_wqe = NULL; - struct hns_roce_wqe_ctrl_seg *ctrl = NULL; - struct hns_roce_wqe_data_seg *dseg = NULL; - struct hns_roce_qp *qp = to_hr_qp(ibqp); - struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_sq_db sq_db = {}; - int ps_opcode, i; - unsigned long flags = 0; - void *wqe = NULL; - __le32 doorbell[2]; - const u8 *smac; - int ret = 0; - int loopback; - u32 wqe_idx; - int nreq; - - if (unlikely(ibqp->qp_type != IB_QPT_GSI && - ibqp->qp_type != IB_QPT_RC)) { - dev_err(dev, "un-supported QP type\n"); - *bad_wr = NULL; - return -EOPNOTSUPP; - } - - spin_lock_irqsave(&qp->sq.lock, flags); - - for (nreq = 0; wr; ++nreq, wr = wr->next) { - if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { - ret = -ENOMEM; - *bad_wr = wr; - goto out; - } - - wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1); - - if (unlikely(wr->num_sge > qp->sq.max_gs)) { - dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n", - wr->num_sge, qp->sq.max_gs); - ret = -EINVAL; - *bad_wr = wr; - goto out; - } - - wqe = hns_roce_get_send_wqe(qp, wqe_idx); - qp->sq.wrid[wqe_idx] = wr->wr_id; - - /* Corresponding to the RC and RD type wqe process separately */ - if (ibqp->qp_type == IB_QPT_GSI) { - ud_sq_wqe = wqe; - roce_set_field(ud_sq_wqe->dmac_h, - UD_SEND_WQE_U32_4_DMAC_0_M, - UD_SEND_WQE_U32_4_DMAC_0_S, - ah->av.mac[0]); - roce_set_field(ud_sq_wqe->dmac_h, - UD_SEND_WQE_U32_4_DMAC_1_M, - UD_SEND_WQE_U32_4_DMAC_1_S, - ah->av.mac[1]); - roce_set_field(ud_sq_wqe->dmac_h, - UD_SEND_WQE_U32_4_DMAC_2_M, - UD_SEND_WQE_U32_4_DMAC_2_S, - ah->av.mac[2]); - roce_set_field(ud_sq_wqe->dmac_h, - UD_SEND_WQE_U32_4_DMAC_3_M, - UD_SEND_WQE_U32_4_DMAC_3_S, - ah->av.mac[3]); - - roce_set_field(ud_sq_wqe->u32_8, - UD_SEND_WQE_U32_8_DMAC_4_M, - UD_SEND_WQE_U32_8_DMAC_4_S, - ah->av.mac[4]); - roce_set_field(ud_sq_wqe->u32_8, - UD_SEND_WQE_U32_8_DMAC_5_M, - UD_SEND_WQE_U32_8_DMAC_5_S, - ah->av.mac[5]); - - smac = (const u8 *)hr_dev->dev_addr[qp->port]; - loopback = ether_addr_equal_unaligned(ah->av.mac, - smac) ? 1 : 0; - roce_set_bit(ud_sq_wqe->u32_8, - UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S, - loopback); - - roce_set_field(ud_sq_wqe->u32_8, - UD_SEND_WQE_U32_8_OPERATION_TYPE_M, - UD_SEND_WQE_U32_8_OPERATION_TYPE_S, - HNS_ROCE_WQE_OPCODE_SEND); - roce_set_field(ud_sq_wqe->u32_8, - UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_M, - UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_S, - 2); - roce_set_bit(ud_sq_wqe->u32_8, - UD_SEND_WQE_U32_8_SEND_GL_ROUTING_HDR_FLAG_S, - 1); - - ud_sq_wqe->u32_8 |= (wr->send_flags & IB_SEND_SIGNALED ? - cpu_to_le32(HNS_ROCE_WQE_CQ_NOTIFY) : 0) | - (wr->send_flags & IB_SEND_SOLICITED ? - cpu_to_le32(HNS_ROCE_WQE_SE) : 0) | - ((wr->opcode == IB_WR_SEND_WITH_IMM) ? - cpu_to_le32(HNS_ROCE_WQE_IMM) : 0); - - roce_set_field(ud_sq_wqe->u32_16, - UD_SEND_WQE_U32_16_DEST_QP_M, - UD_SEND_WQE_U32_16_DEST_QP_S, - ud_wr(wr)->remote_qpn); - roce_set_field(ud_sq_wqe->u32_16, - UD_SEND_WQE_U32_16_MAX_STATIC_RATE_M, - UD_SEND_WQE_U32_16_MAX_STATIC_RATE_S, - ah->av.stat_rate); - - roce_set_field(ud_sq_wqe->u32_36, - UD_SEND_WQE_U32_36_FLOW_LABEL_M, - UD_SEND_WQE_U32_36_FLOW_LABEL_S, - ah->av.flowlabel); - roce_set_field(ud_sq_wqe->u32_36, - UD_SEND_WQE_U32_36_PRIORITY_M, - UD_SEND_WQE_U32_36_PRIORITY_S, - ah->av.sl); - roce_set_field(ud_sq_wqe->u32_36, - UD_SEND_WQE_U32_36_SGID_INDEX_M, - UD_SEND_WQE_U32_36_SGID_INDEX_S, - hns_get_gid_index(hr_dev, qp->phy_port, - ah->av.gid_index)); - - roce_set_field(ud_sq_wqe->u32_40, - UD_SEND_WQE_U32_40_HOP_LIMIT_M, - UD_SEND_WQE_U32_40_HOP_LIMIT_S, - ah->av.hop_limit); - roce_set_field(ud_sq_wqe->u32_40, - UD_SEND_WQE_U32_40_TRAFFIC_CLASS_M, - UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S, - ah->av.tclass); - - memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN); - - ud_sq_wqe->va0_l = - cpu_to_le32((u32)wr->sg_list[0].addr); - ud_sq_wqe->va0_h = - cpu_to_le32((wr->sg_list[0].addr) >> 32); - ud_sq_wqe->l_key0 = - cpu_to_le32(wr->sg_list[0].lkey); - - ud_sq_wqe->va1_l = - cpu_to_le32((u32)wr->sg_list[1].addr); - ud_sq_wqe->va1_h = - cpu_to_le32((wr->sg_list[1].addr) >> 32); - ud_sq_wqe->l_key1 = - cpu_to_le32(wr->sg_list[1].lkey); - } else if (ibqp->qp_type == IB_QPT_RC) { - u32 tmp_len = 0; - - ctrl = wqe; - memset(ctrl, 0, sizeof(struct hns_roce_wqe_ctrl_seg)); - for (i = 0; i < wr->num_sge; i++) - tmp_len += wr->sg_list[i].length; - - ctrl->msg_length = - cpu_to_le32(le32_to_cpu(ctrl->msg_length) + tmp_len); - - ctrl->sgl_pa_h = 0; - ctrl->flag = 0; - - switch (wr->opcode) { - case IB_WR_SEND_WITH_IMM: - case IB_WR_RDMA_WRITE_WITH_IMM: - ctrl->imm_data = wr->ex.imm_data; - break; - case IB_WR_SEND_WITH_INV: - ctrl->inv_key = - cpu_to_le32(wr->ex.invalidate_rkey); - break; - default: - ctrl->imm_data = 0; - break; - } - - /* Ctrl field, ctrl set type: sig, solic, imm, fence */ - /* SO wait for conforming application scenarios */ - ctrl->flag |= (wr->send_flags & IB_SEND_SIGNALED ? - cpu_to_le32(HNS_ROCE_WQE_CQ_NOTIFY) : 0) | - (wr->send_flags & IB_SEND_SOLICITED ? - cpu_to_le32(HNS_ROCE_WQE_SE) : 0) | - ((wr->opcode == IB_WR_SEND_WITH_IMM || - wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) ? - cpu_to_le32(HNS_ROCE_WQE_IMM) : 0) | - (wr->send_flags & IB_SEND_FENCE ? - (cpu_to_le32(HNS_ROCE_WQE_FENCE)) : 0); - - wqe += sizeof(struct hns_roce_wqe_ctrl_seg); - - switch (wr->opcode) { - case IB_WR_RDMA_READ: - ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_READ; - set_raddr_seg(wqe, rdma_wr(wr)->remote_addr, - rdma_wr(wr)->rkey); - break; - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_WRITE; - set_raddr_seg(wqe, rdma_wr(wr)->remote_addr, - rdma_wr(wr)->rkey); - break; - case IB_WR_SEND: - case IB_WR_SEND_WITH_INV: - case IB_WR_SEND_WITH_IMM: - ps_opcode = HNS_ROCE_WQE_OPCODE_SEND; - break; - case IB_WR_LOCAL_INV: - case IB_WR_ATOMIC_CMP_AND_SWP: - case IB_WR_ATOMIC_FETCH_AND_ADD: - case IB_WR_LSO: - default: - ps_opcode = HNS_ROCE_WQE_OPCODE_MASK; - break; - } - ctrl->flag |= cpu_to_le32(ps_opcode); - wqe += sizeof(struct hns_roce_wqe_raddr_seg); - - dseg = wqe; - if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) { - if (le32_to_cpu(ctrl->msg_length) > - hr_dev->caps.max_sq_inline) { - ret = -EINVAL; - *bad_wr = wr; - dev_err(dev, "inline len(1-%d)=%d, illegal", - le32_to_cpu(ctrl->msg_length), - hr_dev->caps.max_sq_inline); - goto out; - } - for (i = 0; i < wr->num_sge; i++) { - memcpy(wqe, ((void *) (uintptr_t) - wr->sg_list[i].addr), - wr->sg_list[i].length); - wqe += wr->sg_list[i].length; - } - ctrl->flag |= cpu_to_le32(HNS_ROCE_WQE_INLINE); - } else { - /* sqe num is two */ - for (i = 0; i < wr->num_sge; i++) - set_data_seg(dseg + i, wr->sg_list + i); - - ctrl->flag |= cpu_to_le32(wr->num_sge << - HNS_ROCE_WQE_SGE_NUM_BIT); - } - } - } - -out: - /* Set DB return */ - if (likely(nreq)) { - qp->sq.head += nreq; - - roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SQ_HEAD_M, - SQ_DOORBELL_U32_4_SQ_HEAD_S, - (qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1))); - roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SL_M, - SQ_DOORBELL_U32_4_SL_S, qp->sl); - roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_PORT_M, - SQ_DOORBELL_U32_4_PORT_S, qp->phy_port); - roce_set_field(sq_db.u32_8, SQ_DOORBELL_U32_8_QPN_M, - SQ_DOORBELL_U32_8_QPN_S, qp->doorbell_qpn); - roce_set_bit(sq_db.u32_8, SQ_DOORBELL_HW_SYNC_S, 1); - - doorbell[0] = sq_db.u32_4; - doorbell[1] = sq_db.u32_8; - - hns_roce_write64_k(doorbell, qp->sq.db_reg); - } - - spin_unlock_irqrestore(&qp->sq.lock, flags); - - return ret; -} - -static int hns_roce_v1_post_recv(struct ib_qp *ibqp, - const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr) -{ - struct hns_roce_rq_wqe_ctrl *ctrl = NULL; - struct hns_roce_wqe_data_seg *scat = NULL; - struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); - struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_rq_db rq_db = {}; - __le32 doorbell[2] = {0}; - unsigned long flags = 0; - unsigned int wqe_idx; - int ret = 0; - int nreq; - int i; - u32 reg_val; - - spin_lock_irqsave(&hr_qp->rq.lock, flags); - - for (nreq = 0; wr; ++nreq, wr = wr->next) { - if (hns_roce_wq_overflow(&hr_qp->rq, nreq, - hr_qp->ibqp.recv_cq)) { - ret = -ENOMEM; - *bad_wr = wr; - goto out; - } - - wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); - - if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { - dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n", - wr->num_sge, hr_qp->rq.max_gs); - ret = -EINVAL; - *bad_wr = wr; - goto out; - } - - ctrl = hns_roce_get_recv_wqe(hr_qp, wqe_idx); - - roce_set_field(ctrl->rwqe_byte_12, - RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_M, - RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_S, - wr->num_sge); - - scat = (struct hns_roce_wqe_data_seg *)(ctrl + 1); - - for (i = 0; i < wr->num_sge; i++) - set_data_seg(scat + i, wr->sg_list + i); - - hr_qp->rq.wrid[wqe_idx] = wr->wr_id; - } - -out: - if (likely(nreq)) { - hr_qp->rq.head += nreq; - - if (ibqp->qp_type == IB_QPT_GSI) { - __le32 tmp; - - /* SW update GSI rq header */ - reg_val = roce_read(to_hr_dev(ibqp->device), - ROCEE_QP1C_CFG3_0_REG + - QP1C_CFGN_OFFSET * hr_qp->phy_port); - tmp = cpu_to_le32(reg_val); - roce_set_field(tmp, - ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M, - ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S, - hr_qp->rq.head); - reg_val = le32_to_cpu(tmp); - roce_write(to_hr_dev(ibqp->device), - ROCEE_QP1C_CFG3_0_REG + - QP1C_CFGN_OFFSET * hr_qp->phy_port, reg_val); - } else { - roce_set_field(rq_db.u32_4, RQ_DOORBELL_U32_4_RQ_HEAD_M, - RQ_DOORBELL_U32_4_RQ_HEAD_S, - hr_qp->rq.head); - roce_set_field(rq_db.u32_8, RQ_DOORBELL_U32_8_QPN_M, - RQ_DOORBELL_U32_8_QPN_S, hr_qp->qpn); - roce_set_field(rq_db.u32_8, RQ_DOORBELL_U32_8_CMD_M, - RQ_DOORBELL_U32_8_CMD_S, 1); - roce_set_bit(rq_db.u32_8, RQ_DOORBELL_U32_8_HW_SYNC_S, - 1); - - doorbell[0] = rq_db.u32_4; - doorbell[1] = rq_db.u32_8; - - hns_roce_write64_k(doorbell, hr_qp->rq.db_reg); - } - } - spin_unlock_irqrestore(&hr_qp->rq.lock, flags); - - return ret; -} - -static void hns_roce_set_db_event_mode(struct hns_roce_dev *hr_dev, - int sdb_mode, int odb_mode) -{ - __le32 tmp; - u32 val; - - val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); - tmp = cpu_to_le32(val); - roce_set_bit(tmp, ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S, sdb_mode); - roce_set_bit(tmp, ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S, odb_mode); - val = le32_to_cpu(tmp); - roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); -} - -static int hns_roce_v1_set_hem(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_table *table, int obj, - int step_idx) -{ - spinlock_t *lock = &hr_dev->bt_cmd_lock; - struct device *dev = hr_dev->dev; - struct hns_roce_hem_iter iter; - void __iomem *bt_cmd; - __le32 bt_cmd_val[2]; - __le32 bt_cmd_h = 0; - unsigned long flags; - __le32 bt_cmd_l; - int ret = 0; - u64 bt_ba; - long end; - - /* Find the HEM(Hardware Entry Memory) entry */ - unsigned long i = obj / (table->table_chunk_size / table->obj_size); - - switch (table->type) { - case HEM_TYPE_QPC: - case HEM_TYPE_MTPT: - case HEM_TYPE_CQC: - case HEM_TYPE_SRQC: - roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, - ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, table->type); - break; - default: - return ret; - } - - roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M, - ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj); - roce_set_bit(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0); - roce_set_bit(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1); - - /* Currently iter only a chunk */ - for (hns_roce_hem_first(table->hem[i], &iter); - !hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) { - bt_ba = hns_roce_hem_addr(&iter) >> HNS_HW_PAGE_SHIFT; - - spin_lock_irqsave(lock, flags); - - bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG; - - end = HW_SYNC_TIMEOUT_MSECS; - while (end > 0) { - if (!(readl(bt_cmd) >> BT_CMD_SYNC_SHIFT)) - break; - - mdelay(HW_SYNC_SLEEP_TIME_INTERVAL); - end -= HW_SYNC_SLEEP_TIME_INTERVAL; - } - - if (end <= 0) { - dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n"); - spin_unlock_irqrestore(lock, flags); - return -EBUSY; - } - - bt_cmd_l = cpu_to_le32(bt_ba); - roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M, - ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, - upper_32_bits(bt_ba)); - - bt_cmd_val[0] = bt_cmd_l; - bt_cmd_val[1] = bt_cmd_h; - hns_roce_write64_k(bt_cmd_val, - hr_dev->reg_base + ROCEE_BT_CMD_L_REG); - spin_unlock_irqrestore(lock, flags); - } - - return ret; -} - -static void hns_roce_set_db_ext_mode(struct hns_roce_dev *hr_dev, u32 sdb_mode, - u32 odb_mode) -{ - __le32 tmp; - u32 val; - - /* Configure SDB/ODB extend mode */ - val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); - tmp = cpu_to_le32(val); - roce_set_bit(tmp, ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S, sdb_mode); - roce_set_bit(tmp, ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S, odb_mode); - val = le32_to_cpu(tmp); - roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); -} - -static void hns_roce_set_sdb(struct hns_roce_dev *hr_dev, u32 sdb_alept, - u32 sdb_alful) -{ - __le32 tmp; - u32 val; - - /* Configure SDB */ - val = roce_read(hr_dev, ROCEE_DB_SQ_WL_REG); - tmp = cpu_to_le32(val); - roce_set_field(tmp, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M, - ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S, sdb_alful); - roce_set_field(tmp, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M, - ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S, sdb_alept); - val = le32_to_cpu(tmp); - roce_write(hr_dev, ROCEE_DB_SQ_WL_REG, val); -} - -static void hns_roce_set_odb(struct hns_roce_dev *hr_dev, u32 odb_alept, - u32 odb_alful) -{ - __le32 tmp; - u32 val; - - /* Configure ODB */ - val = roce_read(hr_dev, ROCEE_DB_OTHERS_WL_REG); - tmp = cpu_to_le32(val); - roce_set_field(tmp, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M, - ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S, odb_alful); - roce_set_field(tmp, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M, - ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S, odb_alept); - val = le32_to_cpu(tmp); - roce_write(hr_dev, ROCEE_DB_OTHERS_WL_REG, val); -} - -static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept, - u32 ext_sdb_alful) -{ - struct hns_roce_v1_priv *priv = hr_dev->priv; - struct hns_roce_db_table *db = &priv->db_table; - struct device *dev = &hr_dev->pdev->dev; - dma_addr_t sdb_dma_addr; - __le32 tmp; - u32 val; - - /* Configure extend SDB threshold */ - roce_write(hr_dev, ROCEE_EXT_DB_SQ_WL_EMPTY_REG, ext_sdb_alept); - roce_write(hr_dev, ROCEE_EXT_DB_SQ_WL_REG, ext_sdb_alful); - - /* Configure extend SDB base addr */ - sdb_dma_addr = db->ext_db->sdb_buf_list->map; - roce_write(hr_dev, ROCEE_EXT_DB_SQ_REG, (u32)(sdb_dma_addr >> 12)); - - /* Configure extend SDB depth */ - val = roce_read(hr_dev, ROCEE_EXT_DB_SQ_H_REG); - tmp = cpu_to_le32(val); - roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M, - ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S, - db->ext_db->esdb_dep); - /* - * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of - * using 4K page, and shift more 32 because of - * calculating the high 32 bit value evaluated to hardware. - */ - roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M, - ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S, sdb_dma_addr >> 44); - val = le32_to_cpu(tmp); - roce_write(hr_dev, ROCEE_EXT_DB_SQ_H_REG, val); - - dev_dbg(dev, "ext SDB depth: 0x%x\n", db->ext_db->esdb_dep); - dev_dbg(dev, "ext SDB threshold: empty: 0x%x, ful: 0x%x\n", - ext_sdb_alept, ext_sdb_alful); -} - -static void hns_roce_set_odb_ext(struct hns_roce_dev *hr_dev, u32 ext_odb_alept, - u32 ext_odb_alful) -{ - struct hns_roce_v1_priv *priv = hr_dev->priv; - struct hns_roce_db_table *db = &priv->db_table; - struct device *dev = &hr_dev->pdev->dev; - dma_addr_t odb_dma_addr; - __le32 tmp; - u32 val; - - /* Configure extend ODB threshold */ - roce_write(hr_dev, ROCEE_EXT_DB_OTHERS_WL_EMPTY_REG, ext_odb_alept); - roce_write(hr_dev, ROCEE_EXT_DB_OTHERS_WL_REG, ext_odb_alful); - - /* Configure extend ODB base addr */ - odb_dma_addr = db->ext_db->odb_buf_list->map; - roce_write(hr_dev, ROCEE_EXT_DB_OTH_REG, (u32)(odb_dma_addr >> 12)); - - /* Configure extend ODB depth */ - val = roce_read(hr_dev, ROCEE_EXT_DB_OTH_H_REG); - tmp = cpu_to_le32(val); - roce_set_field(tmp, ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M, - ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S, - db->ext_db->eodb_dep); - roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M, - ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S, - db->ext_db->eodb_dep); - val = le32_to_cpu(tmp); - roce_write(hr_dev, ROCEE_EXT_DB_OTH_H_REG, val); - - dev_dbg(dev, "ext ODB depth: 0x%x\n", db->ext_db->eodb_dep); - dev_dbg(dev, "ext ODB threshold: empty: 0x%x, ful: 0x%x\n", - ext_odb_alept, ext_odb_alful); -} - -static int hns_roce_db_ext_init(struct hns_roce_dev *hr_dev, u32 sdb_ext_mod, - u32 odb_ext_mod) -{ - struct hns_roce_v1_priv *priv = hr_dev->priv; - struct hns_roce_db_table *db = &priv->db_table; - struct device *dev = &hr_dev->pdev->dev; - dma_addr_t sdb_dma_addr; - dma_addr_t odb_dma_addr; - int ret = 0; - - db->ext_db = kmalloc(sizeof(*db->ext_db), GFP_KERNEL); - if (!db->ext_db) - return -ENOMEM; - - if (sdb_ext_mod) { - db->ext_db->sdb_buf_list = kmalloc( - sizeof(*db->ext_db->sdb_buf_list), GFP_KERNEL); - if (!db->ext_db->sdb_buf_list) { - ret = -ENOMEM; - goto ext_sdb_buf_fail_out; - } - - db->ext_db->sdb_buf_list->buf = dma_alloc_coherent(dev, - HNS_ROCE_V1_EXT_SDB_SIZE, - &sdb_dma_addr, GFP_KERNEL); - if (!db->ext_db->sdb_buf_list->buf) { - ret = -ENOMEM; - goto alloc_sq_db_buf_fail; - } - db->ext_db->sdb_buf_list->map = sdb_dma_addr; - - db->ext_db->esdb_dep = ilog2(HNS_ROCE_V1_EXT_SDB_DEPTH); - hns_roce_set_sdb_ext(hr_dev, HNS_ROCE_V1_EXT_SDB_ALEPT, - HNS_ROCE_V1_EXT_SDB_ALFUL); - } else - hns_roce_set_sdb(hr_dev, HNS_ROCE_V1_SDB_ALEPT, - HNS_ROCE_V1_SDB_ALFUL); - - if (odb_ext_mod) { - db->ext_db->odb_buf_list = kmalloc( - sizeof(*db->ext_db->odb_buf_list), GFP_KERNEL); - if (!db->ext_db->odb_buf_list) { - ret = -ENOMEM; - goto ext_odb_buf_fail_out; - } - - db->ext_db->odb_buf_list->buf = dma_alloc_coherent(dev, - HNS_ROCE_V1_EXT_ODB_SIZE, - &odb_dma_addr, GFP_KERNEL); - if (!db->ext_db->odb_buf_list->buf) { - ret = -ENOMEM; - goto alloc_otr_db_buf_fail; - } - db->ext_db->odb_buf_list->map = odb_dma_addr; - - db->ext_db->eodb_dep = ilog2(HNS_ROCE_V1_EXT_ODB_DEPTH); - hns_roce_set_odb_ext(hr_dev, HNS_ROCE_V1_EXT_ODB_ALEPT, - HNS_ROCE_V1_EXT_ODB_ALFUL); - } else - hns_roce_set_odb(hr_dev, HNS_ROCE_V1_ODB_ALEPT, - HNS_ROCE_V1_ODB_ALFUL); - - hns_roce_set_db_ext_mode(hr_dev, sdb_ext_mod, odb_ext_mod); - - return 0; - -alloc_otr_db_buf_fail: - kfree(db->ext_db->odb_buf_list); - -ext_odb_buf_fail_out: - if (sdb_ext_mod) { - dma_free_coherent(dev, HNS_ROCE_V1_EXT_SDB_SIZE, - db->ext_db->sdb_buf_list->buf, - db->ext_db->sdb_buf_list->map); - } - -alloc_sq_db_buf_fail: - if (sdb_ext_mod) - kfree(db->ext_db->sdb_buf_list); - -ext_sdb_buf_fail_out: - kfree(db->ext_db); - return ret; -} - -static struct hns_roce_qp *hns_roce_v1_create_lp_qp(struct hns_roce_dev *hr_dev, - struct ib_pd *pd) -{ - struct device *dev = &hr_dev->pdev->dev; - struct ib_qp_init_attr init_attr; - struct ib_qp *qp; - - memset(&init_attr, 0, sizeof(struct ib_qp_init_attr)); - init_attr.qp_type = IB_QPT_RC; - init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; - init_attr.cap.max_recv_wr = HNS_ROCE_MIN_WQE_NUM; - init_attr.cap.max_send_wr = HNS_ROCE_MIN_WQE_NUM; - - qp = ib_create_qp(pd, &init_attr); - if (IS_ERR(qp)) { - dev_err(dev, "Create loop qp for mr free failed!"); - return NULL; - } - - return to_hr_qp(qp); -} - -static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) -{ - struct hns_roce_v1_priv *priv = hr_dev->priv; - struct hns_roce_free_mr *free_mr = &priv->free_mr; - struct hns_roce_caps *caps = &hr_dev->caps; - struct ib_device *ibdev = &hr_dev->ib_dev; - struct device *dev = &hr_dev->pdev->dev; - struct ib_cq_init_attr cq_init_attr; - struct ib_qp_attr attr = { 0 }; - struct hns_roce_qp *hr_qp; - struct ib_cq *cq; - struct ib_pd *pd; - union ib_gid dgid; - __be64 subnet_prefix; - int attr_mask = 0; - int ret; - int i, j; - u8 queue_en[HNS_ROCE_V1_RESV_QP] = { 0 }; - u8 phy_port; - u32 port = 0; - u8 sl; - - /* Reserved cq for loop qp */ - cq_init_attr.cqe = HNS_ROCE_MIN_WQE_NUM * 2; - cq_init_attr.comp_vector = 0; - - cq = rdma_zalloc_drv_obj(ibdev, ib_cq); - if (!cq) - return -ENOMEM; - - ret = hns_roce_create_cq(cq, &cq_init_attr, NULL); - if (ret) { - dev_err(dev, "Create cq for reserved loop qp failed!"); - goto alloc_cq_failed; - } - free_mr->mr_free_cq = to_hr_cq(cq); - free_mr->mr_free_cq->ib_cq.device = &hr_dev->ib_dev; - free_mr->mr_free_cq->ib_cq.uobject = NULL; - free_mr->mr_free_cq->ib_cq.comp_handler = NULL; - free_mr->mr_free_cq->ib_cq.event_handler = NULL; - free_mr->mr_free_cq->ib_cq.cq_context = NULL; - atomic_set(&free_mr->mr_free_cq->ib_cq.usecnt, 0); - - pd = rdma_zalloc_drv_obj(ibdev, ib_pd); - if (!pd) { - ret = -ENOMEM; - goto alloc_mem_failed; - } - - pd->device = ibdev; - ret = hns_roce_alloc_pd(pd, NULL); - if (ret) - goto alloc_pd_failed; - - free_mr->mr_free_pd = to_hr_pd(pd); - free_mr->mr_free_pd->ibpd.device = &hr_dev->ib_dev; - free_mr->mr_free_pd->ibpd.uobject = NULL; - free_mr->mr_free_pd->ibpd.__internal_mr = NULL; - atomic_set(&free_mr->mr_free_pd->ibpd.usecnt, 0); - - attr.qp_access_flags = IB_ACCESS_REMOTE_WRITE; - attr.pkey_index = 0; - attr.min_rnr_timer = 0; - /* Disable read ability */ - attr.max_dest_rd_atomic = 0; - attr.max_rd_atomic = 0; - /* Use arbitrary values as rq_psn and sq_psn */ - attr.rq_psn = 0x0808; - attr.sq_psn = 0x0808; - attr.retry_cnt = 7; - attr.rnr_retry = 7; - attr.timeout = 0x12; - attr.path_mtu = IB_MTU_256; - attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; - rdma_ah_set_grh(&attr.ah_attr, NULL, 0, 0, 1, 0); - rdma_ah_set_static_rate(&attr.ah_attr, 3); - - subnet_prefix = cpu_to_be64(0xfe80000000000000LL); - for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) { - phy_port = (i >= HNS_ROCE_MAX_PORTS) ? (i - 2) : - (i % HNS_ROCE_MAX_PORTS); - sl = i / HNS_ROCE_MAX_PORTS; - - for (j = 0; j < caps->num_ports; j++) { - if (hr_dev->iboe.phy_port[j] == phy_port) { - queue_en[i] = 1; - port = j; - break; - } - } - - if (!queue_en[i]) - continue; - - free_mr->mr_free_qp[i] = hns_roce_v1_create_lp_qp(hr_dev, pd); - if (!free_mr->mr_free_qp[i]) { - dev_err(dev, "Create loop qp failed!\n"); - ret = -ENOMEM; - goto create_lp_qp_failed; - } - hr_qp = free_mr->mr_free_qp[i]; - - hr_qp->port = port; - hr_qp->phy_port = phy_port; - hr_qp->ibqp.qp_type = IB_QPT_RC; - hr_qp->ibqp.device = &hr_dev->ib_dev; - hr_qp->ibqp.uobject = NULL; - atomic_set(&hr_qp->ibqp.usecnt, 0); - hr_qp->ibqp.pd = pd; - hr_qp->ibqp.recv_cq = cq; - hr_qp->ibqp.send_cq = cq; - - rdma_ah_set_port_num(&attr.ah_attr, port + 1); - rdma_ah_set_sl(&attr.ah_attr, sl); - attr.port_num = port + 1; - - attr.dest_qp_num = hr_qp->qpn; - memcpy(rdma_ah_retrieve_dmac(&attr.ah_attr), - hr_dev->dev_addr[port], - ETH_ALEN); - - memcpy(&dgid.raw, &subnet_prefix, sizeof(u64)); - memcpy(&dgid.raw[8], hr_dev->dev_addr[port], 3); - memcpy(&dgid.raw[13], hr_dev->dev_addr[port] + 3, 3); - dgid.raw[11] = 0xff; - dgid.raw[12] = 0xfe; - dgid.raw[8] ^= 2; - rdma_ah_set_dgid_raw(&attr.ah_attr, dgid.raw); - - ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask, - IB_QPS_RESET, IB_QPS_INIT); - if (ret) { - dev_err(dev, "modify qp failed(%d)!\n", ret); - goto create_lp_qp_failed; - } - - ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, IB_QP_DEST_QPN, - IB_QPS_INIT, IB_QPS_RTR); - if (ret) { - dev_err(dev, "modify qp failed(%d)!\n", ret); - goto create_lp_qp_failed; - } - - ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask, - IB_QPS_RTR, IB_QPS_RTS); - if (ret) { - dev_err(dev, "modify qp failed(%d)!\n", ret); - goto create_lp_qp_failed; - } - } - - return 0; - -create_lp_qp_failed: - for (i -= 1; i >= 0; i--) { - hr_qp = free_mr->mr_free_qp[i]; - if (ib_destroy_qp(&hr_qp->ibqp)) - dev_err(dev, "Destroy qp %d for mr free failed!\n", i); - } - - hns_roce_dealloc_pd(pd, NULL); - -alloc_pd_failed: - kfree(pd); - -alloc_mem_failed: - hns_roce_destroy_cq(cq, NULL); -alloc_cq_failed: - kfree(cq); - return ret; -} - -static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev) -{ - struct hns_roce_v1_priv *priv = hr_dev->priv; - struct hns_roce_free_mr *free_mr = &priv->free_mr; - struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_qp *hr_qp; - int ret; - int i; - - for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) { - hr_qp = free_mr->mr_free_qp[i]; - if (!hr_qp) - continue; - - ret = ib_destroy_qp(&hr_qp->ibqp); - if (ret) - dev_err(dev, "Destroy qp %d for mr free failed(%d)!\n", - i, ret); - } - - hns_roce_destroy_cq(&free_mr->mr_free_cq->ib_cq, NULL); - kfree(&free_mr->mr_free_cq->ib_cq); - hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd, NULL); - kfree(&free_mr->mr_free_pd->ibpd); -} - -static int hns_roce_db_init(struct hns_roce_dev *hr_dev) -{ - struct hns_roce_v1_priv *priv = hr_dev->priv; - struct hns_roce_db_table *db = &priv->db_table; - struct device *dev = &hr_dev->pdev->dev; - u32 sdb_ext_mod; - u32 odb_ext_mod; - u32 sdb_evt_mod; - u32 odb_evt_mod; - int ret; - - memset(db, 0, sizeof(*db)); - - /* Default DB mode */ - sdb_ext_mod = HNS_ROCE_SDB_EXTEND_MODE; - odb_ext_mod = HNS_ROCE_ODB_EXTEND_MODE; - sdb_evt_mod = HNS_ROCE_SDB_NORMAL_MODE; - odb_evt_mod = HNS_ROCE_ODB_POLL_MODE; - - db->sdb_ext_mod = sdb_ext_mod; - db->odb_ext_mod = odb_ext_mod; - - /* Init extend DB */ - ret = hns_roce_db_ext_init(hr_dev, sdb_ext_mod, odb_ext_mod); - if (ret) { - dev_err(dev, "Failed in extend DB configuration.\n"); - return ret; - } - - hns_roce_set_db_event_mode(hr_dev, sdb_evt_mod, odb_evt_mod); - - return 0; -} - -static void hns_roce_v1_recreate_lp_qp_work_fn(struct work_struct *work) -{ - struct hns_roce_recreate_lp_qp_work *lp_qp_work; - struct hns_roce_dev *hr_dev; - - lp_qp_work = container_of(work, struct hns_roce_recreate_lp_qp_work, - work); - hr_dev = to_hr_dev(lp_qp_work->ib_dev); - - hns_roce_v1_release_lp_qp(hr_dev); - - if (hns_roce_v1_rsv_lp_qp(hr_dev)) - dev_err(&hr_dev->pdev->dev, "create reserver qp failed\n"); - - if (lp_qp_work->comp_flag) - complete(lp_qp_work->comp); - - kfree(lp_qp_work); -} - -static int hns_roce_v1_recreate_lp_qp(struct hns_roce_dev *hr_dev) -{ - long end = HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS; - struct hns_roce_v1_priv *priv = hr_dev->priv; - struct hns_roce_free_mr *free_mr = &priv->free_mr; - struct hns_roce_recreate_lp_qp_work *lp_qp_work; - struct device *dev = &hr_dev->pdev->dev; - struct completion comp; - - lp_qp_work = kzalloc(sizeof(struct hns_roce_recreate_lp_qp_work), - GFP_KERNEL); - if (!lp_qp_work) - return -ENOMEM; - - INIT_WORK(&(lp_qp_work->work), hns_roce_v1_recreate_lp_qp_work_fn); - - lp_qp_work->ib_dev = &(hr_dev->ib_dev); - lp_qp_work->comp = ∁ - lp_qp_work->comp_flag = 1; - - init_completion(lp_qp_work->comp); - - queue_work(free_mr->free_mr_wq, &(lp_qp_work->work)); - - while (end > 0) { - if (try_wait_for_completion(&comp)) - return 0; - msleep(HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE); - end -= HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE; - } - - lp_qp_work->comp_flag = 0; - if (try_wait_for_completion(&comp)) - return 0; - - dev_warn(dev, "recreate lp qp failed 20s timeout and return failed!\n"); - return -ETIMEDOUT; -} - -static int hns_roce_v1_send_lp_wqe(struct hns_roce_qp *hr_qp) -{ - struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); - struct device *dev = &hr_dev->pdev->dev; - struct ib_send_wr send_wr; - const struct ib_send_wr *bad_wr; - int ret; - - memset(&send_wr, 0, sizeof(send_wr)); - send_wr.next = NULL; - send_wr.num_sge = 0; - send_wr.send_flags = 0; - send_wr.sg_list = NULL; - send_wr.wr_id = (unsigned long long)&send_wr; - send_wr.opcode = IB_WR_RDMA_WRITE; - - ret = hns_roce_v1_post_send(&hr_qp->ibqp, &send_wr, &bad_wr); - if (ret) { - dev_err(dev, "Post write wqe for mr free failed(%d)!", ret); - return ret; - } - - return 0; -} - -static void hns_roce_v1_mr_free_work_fn(struct work_struct *work) -{ - unsigned long end = - msecs_to_jiffies(HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS) + jiffies; - struct hns_roce_mr_free_work *mr_work = - container_of(work, struct hns_roce_mr_free_work, work); - struct hns_roce_dev *hr_dev = to_hr_dev(mr_work->ib_dev); - struct hns_roce_v1_priv *priv = hr_dev->priv; - struct hns_roce_free_mr *free_mr = &priv->free_mr; - struct hns_roce_cq *mr_free_cq = free_mr->mr_free_cq; - struct hns_roce_mr *hr_mr = mr_work->mr; - struct device *dev = &hr_dev->pdev->dev; - struct ib_wc wc[HNS_ROCE_V1_RESV_QP]; - struct hns_roce_qp *hr_qp; - int ne = 0; - int ret; - int i; - - for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) { - hr_qp = free_mr->mr_free_qp[i]; - if (!hr_qp) - continue; - ne++; - - ret = hns_roce_v1_send_lp_wqe(hr_qp); - if (ret) { - dev_err(dev, - "Send wqe (qp:0x%lx) for mr free failed(%d)!\n", - hr_qp->qpn, ret); - goto free_work; - } - } - - if (!ne) { - dev_err(dev, "Reserved loop qp is absent!\n"); - goto free_work; - } - - do { - ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc); - if (ret < 0 && hr_qp) { - dev_err(dev, - "(qp:0x%lx) starts, Poll cqe failed(%d) for mr 0x%x free! Remain %d cqe\n", - hr_qp->qpn, ret, hr_mr->key, ne); - goto free_work; - } - ne -= ret; - usleep_range(HNS_ROCE_V1_FREE_MR_WAIT_VALUE * 1000, - (1 + HNS_ROCE_V1_FREE_MR_WAIT_VALUE) * 1000); - } while (ne && time_before_eq(jiffies, end)); - - if (ne != 0) - dev_err(dev, - "Poll cqe for mr 0x%x free timeout! Remain %d cqe\n", - hr_mr->key, ne); - -free_work: - if (mr_work->comp_flag) - complete(mr_work->comp); - kfree(mr_work); -} - -static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, - struct hns_roce_mr *mr, struct ib_udata *udata) -{ - struct hns_roce_v1_priv *priv = hr_dev->priv; - struct hns_roce_free_mr *free_mr = &priv->free_mr; - long end = HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS; - struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_mr_free_work *mr_work; - unsigned long start = jiffies; - struct completion comp; - int ret = 0; - - if (mr->enabled) { - if (hns_roce_hw_destroy_mpt(hr_dev, NULL, - key_to_hw_index(mr->key) & - (hr_dev->caps.num_mtpts - 1))) - dev_warn(dev, "DESTROY_MPT failed!\n"); - } - - mr_work = kzalloc(sizeof(*mr_work), GFP_KERNEL); - if (!mr_work) { - ret = -ENOMEM; - goto free_mr; - } - - INIT_WORK(&(mr_work->work), hns_roce_v1_mr_free_work_fn); - - mr_work->ib_dev = &(hr_dev->ib_dev); - mr_work->comp = ∁ - mr_work->comp_flag = 1; - mr_work->mr = (void *)mr; - init_completion(mr_work->comp); - - queue_work(free_mr->free_mr_wq, &(mr_work->work)); - - while (end > 0) { - if (try_wait_for_completion(&comp)) - goto free_mr; - msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE); - end -= HNS_ROCE_V1_FREE_MR_WAIT_VALUE; - } - - mr_work->comp_flag = 0; - if (try_wait_for_completion(&comp)) - goto free_mr; - - dev_warn(dev, "Free mr work 0x%x over 50s and failed!\n", mr->key); - ret = -ETIMEDOUT; - -free_mr: - dev_dbg(dev, "Free mr 0x%x use 0x%x us.\n", - mr->key, jiffies_to_usecs(jiffies) - jiffies_to_usecs(start)); - - ida_free(&hr_dev->mr_table.mtpt_ida.ida, (int)key_to_hw_index(mr->key)); - hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr); - kfree(mr); - - return ret; -} - -static void hns_roce_db_free(struct hns_roce_dev *hr_dev) -{ - struct hns_roce_v1_priv *priv = hr_dev->priv; - struct hns_roce_db_table *db = &priv->db_table; - struct device *dev = &hr_dev->pdev->dev; - - if (db->sdb_ext_mod) { - dma_free_coherent(dev, HNS_ROCE_V1_EXT_SDB_SIZE, - db->ext_db->sdb_buf_list->buf, - db->ext_db->sdb_buf_list->map); - kfree(db->ext_db->sdb_buf_list); - } - - if (db->odb_ext_mod) { - dma_free_coherent(dev, HNS_ROCE_V1_EXT_ODB_SIZE, - db->ext_db->odb_buf_list->buf, - db->ext_db->odb_buf_list->map); - kfree(db->ext_db->odb_buf_list); - } - - kfree(db->ext_db); -} - -static int hns_roce_raq_init(struct hns_roce_dev *hr_dev) -{ - struct hns_roce_v1_priv *priv = hr_dev->priv; - struct hns_roce_raq_table *raq = &priv->raq_table; - struct device *dev = &hr_dev->pdev->dev; - dma_addr_t addr; - int raq_shift; - __le32 tmp; - u32 val; - int ret; - - raq->e_raq_buf = kzalloc(sizeof(*(raq->e_raq_buf)), GFP_KERNEL); - if (!raq->e_raq_buf) - return -ENOMEM; - - raq->e_raq_buf->buf = dma_alloc_coherent(dev, HNS_ROCE_V1_RAQ_SIZE, - &addr, GFP_KERNEL); - if (!raq->e_raq_buf->buf) { - ret = -ENOMEM; - goto err_dma_alloc_raq; - } - raq->e_raq_buf->map = addr; - - /* Configure raq extended address. 48bit 4K align */ - roce_write(hr_dev, ROCEE_EXT_RAQ_REG, raq->e_raq_buf->map >> 12); - - /* Configure raq_shift */ - raq_shift = ilog2(HNS_ROCE_V1_RAQ_SIZE / HNS_ROCE_V1_RAQ_ENTRY); - val = roce_read(hr_dev, ROCEE_EXT_RAQ_H_REG); - tmp = cpu_to_le32(val); - roce_set_field(tmp, ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M, - ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S, raq_shift); - /* - * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of - * using 4K page, and shift more 32 because of - * calculating the high 32 bit value evaluated to hardware. - */ - roce_set_field(tmp, ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M, - ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S, - raq->e_raq_buf->map >> 44); - val = le32_to_cpu(tmp); - roce_write(hr_dev, ROCEE_EXT_RAQ_H_REG, val); - dev_dbg(dev, "Configure raq_shift 0x%x.\n", val); - - /* Configure raq threshold */ - val = roce_read(hr_dev, ROCEE_RAQ_WL_REG); - tmp = cpu_to_le32(val); - roce_set_field(tmp, ROCEE_RAQ_WL_ROCEE_RAQ_WL_M, - ROCEE_RAQ_WL_ROCEE_RAQ_WL_S, - HNS_ROCE_V1_EXT_RAQ_WF); - val = le32_to_cpu(tmp); - roce_write(hr_dev, ROCEE_RAQ_WL_REG, val); - dev_dbg(dev, "Configure raq_wl 0x%x.\n", val); - - /* Enable extend raq */ - val = roce_read(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG); - tmp = cpu_to_le32(val); - roce_set_field(tmp, - ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_M, - ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S, - POL_TIME_INTERVAL_VAL); - roce_set_bit(tmp, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE, 1); - roce_set_field(tmp, - ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_M, - ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S, - 2); - roce_set_bit(tmp, - ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_EN_S, 1); - val = le32_to_cpu(tmp); - roce_write(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG, val); - dev_dbg(dev, "Configure WrmsPolTimeInterval 0x%x.\n", val); - - /* Enable raq drop */ - val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); - tmp = cpu_to_le32(val); - roce_set_bit(tmp, ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S, 1); - val = le32_to_cpu(tmp); - roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); - dev_dbg(dev, "Configure GlbCfg = 0x%x.\n", val); - - return 0; - -err_dma_alloc_raq: - kfree(raq->e_raq_buf); - return ret; -} - -static void hns_roce_raq_free(struct hns_roce_dev *hr_dev) -{ - struct hns_roce_v1_priv *priv = hr_dev->priv; - struct hns_roce_raq_table *raq = &priv->raq_table; - struct device *dev = &hr_dev->pdev->dev; - - dma_free_coherent(dev, HNS_ROCE_V1_RAQ_SIZE, raq->e_raq_buf->buf, - raq->e_raq_buf->map); - kfree(raq->e_raq_buf); -} - -static void hns_roce_port_enable(struct hns_roce_dev *hr_dev, int enable_flag) -{ - __le32 tmp; - u32 val; - - if (enable_flag) { - val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); - /* Open all ports */ - tmp = cpu_to_le32(val); - roce_set_field(tmp, ROCEE_GLB_CFG_ROCEE_PORT_ST_M, - ROCEE_GLB_CFG_ROCEE_PORT_ST_S, - ALL_PORT_VAL_OPEN); - val = le32_to_cpu(tmp); - roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); - } else { - val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); - /* Close all ports */ - tmp = cpu_to_le32(val); - roce_set_field(tmp, ROCEE_GLB_CFG_ROCEE_PORT_ST_M, - ROCEE_GLB_CFG_ROCEE_PORT_ST_S, 0x0); - val = le32_to_cpu(tmp); - roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); - } -} - -static int hns_roce_bt_init(struct hns_roce_dev *hr_dev) -{ - struct hns_roce_v1_priv *priv = hr_dev->priv; - struct device *dev = &hr_dev->pdev->dev; - int ret; - - priv->bt_table.qpc_buf.buf = dma_alloc_coherent(dev, - HNS_ROCE_BT_RSV_BUF_SIZE, &priv->bt_table.qpc_buf.map, - GFP_KERNEL); - if (!priv->bt_table.qpc_buf.buf) - return -ENOMEM; - - priv->bt_table.mtpt_buf.buf = dma_alloc_coherent(dev, - HNS_ROCE_BT_RSV_BUF_SIZE, &priv->bt_table.mtpt_buf.map, - GFP_KERNEL); - if (!priv->bt_table.mtpt_buf.buf) { - ret = -ENOMEM; - goto err_failed_alloc_mtpt_buf; - } - - priv->bt_table.cqc_buf.buf = dma_alloc_coherent(dev, - HNS_ROCE_BT_RSV_BUF_SIZE, &priv->bt_table.cqc_buf.map, - GFP_KERNEL); - if (!priv->bt_table.cqc_buf.buf) { - ret = -ENOMEM; - goto err_failed_alloc_cqc_buf; - } - - return 0; - -err_failed_alloc_cqc_buf: - dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE, - priv->bt_table.mtpt_buf.buf, priv->bt_table.mtpt_buf.map); - -err_failed_alloc_mtpt_buf: - dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE, - priv->bt_table.qpc_buf.buf, priv->bt_table.qpc_buf.map); - - return ret; -} - -static void hns_roce_bt_free(struct hns_roce_dev *hr_dev) -{ - struct hns_roce_v1_priv *priv = hr_dev->priv; - struct device *dev = &hr_dev->pdev->dev; - - dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE, - priv->bt_table.cqc_buf.buf, priv->bt_table.cqc_buf.map); - - dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE, - priv->bt_table.mtpt_buf.buf, priv->bt_table.mtpt_buf.map); - - dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE, - priv->bt_table.qpc_buf.buf, priv->bt_table.qpc_buf.map); -} - -static int hns_roce_tptr_init(struct hns_roce_dev *hr_dev) -{ - struct hns_roce_v1_priv *priv = hr_dev->priv; - struct hns_roce_buf_list *tptr_buf = &priv->tptr_table.tptr_buf; - struct device *dev = &hr_dev->pdev->dev; - - /* - * This buffer will be used for CQ's tptr(tail pointer), also - * named ci(customer index). Every CQ will use 2 bytes to save - * cqe ci in hip06. Hardware will read this area to get new ci - * when the queue is almost full. - */ - tptr_buf->buf = dma_alloc_coherent(dev, HNS_ROCE_V1_TPTR_BUF_SIZE, - &tptr_buf->map, GFP_KERNEL); - if (!tptr_buf->buf) - return -ENOMEM; - - hr_dev->tptr_dma_addr = tptr_buf->map; - hr_dev->tptr_size = HNS_ROCE_V1_TPTR_BUF_SIZE; - - return 0; -} - -static void hns_roce_tptr_free(struct hns_roce_dev *hr_dev) -{ - struct hns_roce_v1_priv *priv = hr_dev->priv; - struct hns_roce_buf_list *tptr_buf = &priv->tptr_table.tptr_buf; - struct device *dev = &hr_dev->pdev->dev; - - dma_free_coherent(dev, HNS_ROCE_V1_TPTR_BUF_SIZE, - tptr_buf->buf, tptr_buf->map); -} - -static int hns_roce_free_mr_init(struct hns_roce_dev *hr_dev) -{ - struct hns_roce_v1_priv *priv = hr_dev->priv; - struct hns_roce_free_mr *free_mr = &priv->free_mr; - struct device *dev = &hr_dev->pdev->dev; - int ret; - - free_mr->free_mr_wq = create_singlethread_workqueue("hns_roce_free_mr"); - if (!free_mr->free_mr_wq) { - dev_err(dev, "Create free mr workqueue failed!\n"); - return -ENOMEM; - } - - ret = hns_roce_v1_rsv_lp_qp(hr_dev); - if (ret) { - dev_err(dev, "Reserved loop qp failed(%d)!\n", ret); - destroy_workqueue(free_mr->free_mr_wq); - } - - return ret; -} - -static void hns_roce_free_mr_free(struct hns_roce_dev *hr_dev) -{ - struct hns_roce_v1_priv *priv = hr_dev->priv; - struct hns_roce_free_mr *free_mr = &priv->free_mr; - - destroy_workqueue(free_mr->free_mr_wq); - - hns_roce_v1_release_lp_qp(hr_dev); -} - -/** - * hns_roce_v1_reset - reset RoCE - * @hr_dev: RoCE device struct pointer - * @dereset: true -- drop reset, false -- reset - * return 0 - success , negative --fail - */ -static int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset) -{ - struct device_node *dsaf_node; - struct device *dev = &hr_dev->pdev->dev; - struct device_node *np = dev->of_node; - struct fwnode_handle *fwnode; - int ret; - - /* check if this is DT/ACPI case */ - if (dev_of_node(dev)) { - dsaf_node = of_parse_phandle(np, "dsaf-handle", 0); - if (!dsaf_node) { - dev_err(dev, "could not find dsaf-handle\n"); - return -EINVAL; - } - fwnode = &dsaf_node->fwnode; - } else if (is_acpi_device_node(dev->fwnode)) { - struct fwnode_reference_args args; - - ret = acpi_node_get_property_reference(dev->fwnode, - "dsaf-handle", 0, &args); - if (ret) { - dev_err(dev, "could not find dsaf-handle\n"); - return ret; - } - fwnode = args.fwnode; - } else { - dev_err(dev, "cannot read data from DT or ACPI\n"); - return -ENXIO; - } - - ret = hns_dsaf_roce_reset(fwnode, false); - if (ret) - return ret; - - if (dereset) { - msleep(SLEEP_TIME_INTERVAL); - ret = hns_dsaf_roce_reset(fwnode, true); - } - - return ret; -} - -static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) -{ - struct hns_roce_caps *caps = &hr_dev->caps; - int i; - - hr_dev->vendor_id = roce_read(hr_dev, ROCEE_VENDOR_ID_REG); - hr_dev->vendor_part_id = roce_read(hr_dev, ROCEE_VENDOR_PART_ID_REG); - hr_dev->sys_image_guid = roce_read(hr_dev, ROCEE_SYS_IMAGE_GUID_L_REG) | - ((u64)roce_read(hr_dev, - ROCEE_SYS_IMAGE_GUID_H_REG) << 32); - hr_dev->hw_rev = HNS_ROCE_HW_VER1; - - caps->num_qps = HNS_ROCE_V1_MAX_QP_NUM; - caps->max_wqes = HNS_ROCE_V1_MAX_WQE_NUM; - caps->min_wqes = HNS_ROCE_MIN_WQE_NUM; - caps->num_cqs = HNS_ROCE_V1_MAX_CQ_NUM; - caps->min_cqes = HNS_ROCE_MIN_CQE_NUM; - caps->max_cqes = HNS_ROCE_V1_MAX_CQE_NUM; - caps->max_sq_sg = HNS_ROCE_V1_SG_NUM; - caps->max_rq_sg = HNS_ROCE_V1_SG_NUM; - caps->max_sq_inline = HNS_ROCE_V1_INLINE_SIZE; - caps->num_uars = HNS_ROCE_V1_UAR_NUM; - caps->phy_num_uars = HNS_ROCE_V1_PHY_UAR_NUM; - caps->num_aeq_vectors = HNS_ROCE_V1_AEQE_VEC_NUM; - caps->num_comp_vectors = HNS_ROCE_V1_COMP_VEC_NUM; - caps->num_other_vectors = HNS_ROCE_V1_ABNORMAL_VEC_NUM; - caps->num_mtpts = HNS_ROCE_V1_MAX_MTPT_NUM; - caps->num_mtt_segs = HNS_ROCE_V1_MAX_MTT_SEGS; - caps->num_pds = HNS_ROCE_V1_MAX_PD_NUM; - caps->max_qp_init_rdma = HNS_ROCE_V1_MAX_QP_INIT_RDMA; - caps->max_qp_dest_rdma = HNS_ROCE_V1_MAX_QP_DEST_RDMA; - caps->max_sq_desc_sz = HNS_ROCE_V1_MAX_SQ_DESC_SZ; - caps->max_rq_desc_sz = HNS_ROCE_V1_MAX_RQ_DESC_SZ; - caps->qpc_sz = HNS_ROCE_V1_QPC_SIZE; - caps->irrl_entry_sz = HNS_ROCE_V1_IRRL_ENTRY_SIZE; - caps->cqc_entry_sz = HNS_ROCE_V1_CQC_ENTRY_SIZE; - caps->mtpt_entry_sz = HNS_ROCE_V1_MTPT_ENTRY_SIZE; - caps->mtt_entry_sz = HNS_ROCE_V1_MTT_ENTRY_SIZE; - caps->cqe_sz = HNS_ROCE_V1_CQE_SIZE; - caps->page_size_cap = HNS_ROCE_V1_PAGE_SIZE_SUPPORT; - caps->reserved_lkey = 0; - caps->reserved_pds = 0; - caps->reserved_mrws = 1; - caps->reserved_uars = 0; - caps->reserved_cqs = 0; - caps->reserved_qps = 12; /* 2 SQP per port, six ports total 12 */ - caps->chunk_sz = HNS_ROCE_V1_TABLE_CHUNK_SIZE; - - for (i = 0; i < caps->num_ports; i++) - caps->pkey_table_len[i] = 1; - - for (i = 0; i < caps->num_ports; i++) { - /* Six ports shared 16 GID in v1 engine */ - if (i >= (HNS_ROCE_V1_GID_NUM % caps->num_ports)) - caps->gid_table_len[i] = HNS_ROCE_V1_GID_NUM / - caps->num_ports; - else - caps->gid_table_len[i] = HNS_ROCE_V1_GID_NUM / - caps->num_ports + 1; - } - - caps->ceqe_depth = HNS_ROCE_V1_COMP_EQE_NUM; - caps->aeqe_depth = HNS_ROCE_V1_ASYNC_EQE_NUM; - caps->local_ca_ack_delay = roce_read(hr_dev, ROCEE_ACK_DELAY_REG); - caps->max_mtu = IB_MTU_2048; - - return 0; -} - -static int hns_roce_v1_init(struct hns_roce_dev *hr_dev) -{ - int ret; - u32 val; - __le32 tmp; - struct device *dev = &hr_dev->pdev->dev; - - /* DMAE user config */ - val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG1_REG); - tmp = cpu_to_le32(val); - roce_set_field(tmp, ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M, - ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S, 0xf); - roce_set_field(tmp, ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M, - ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S, - 1 << PAGES_SHIFT_16); - val = le32_to_cpu(tmp); - roce_write(hr_dev, ROCEE_DMAE_USER_CFG1_REG, val); - - val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG2_REG); - tmp = cpu_to_le32(val); - roce_set_field(tmp, ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M, - ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S, 0xf); - roce_set_field(tmp, ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M, - ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S, - 1 << PAGES_SHIFT_16); - - ret = hns_roce_db_init(hr_dev); - if (ret) { - dev_err(dev, "doorbell init failed!\n"); - return ret; - } - - ret = hns_roce_raq_init(hr_dev); - if (ret) { - dev_err(dev, "raq init failed!\n"); - goto error_failed_raq_init; - } - - ret = hns_roce_bt_init(hr_dev); - if (ret) { - dev_err(dev, "bt init failed!\n"); - goto error_failed_bt_init; - } - - ret = hns_roce_tptr_init(hr_dev); - if (ret) { - dev_err(dev, "tptr init failed!\n"); - goto error_failed_tptr_init; - } - - ret = hns_roce_free_mr_init(hr_dev); - if (ret) { - dev_err(dev, "free mr init failed!\n"); - goto error_failed_free_mr_init; - } - - hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_UP); - - return 0; - -error_failed_free_mr_init: - hns_roce_tptr_free(hr_dev); - -error_failed_tptr_init: - hns_roce_bt_free(hr_dev); - -error_failed_bt_init: - hns_roce_raq_free(hr_dev); - -error_failed_raq_init: - hns_roce_db_free(hr_dev); - return ret; -} - -static void hns_roce_v1_exit(struct hns_roce_dev *hr_dev) -{ - hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN); - hns_roce_free_mr_free(hr_dev); - hns_roce_tptr_free(hr_dev); - hns_roce_bt_free(hr_dev); - hns_roce_raq_free(hr_dev); - hns_roce_db_free(hr_dev); -} - -static int hns_roce_v1_cmd_pending(struct hns_roce_dev *hr_dev) -{ - u32 status = readl(hr_dev->reg_base + ROCEE_MB6_REG); - - return (!!(status & (1 << HCR_GO_BIT))); -} - -static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, - u64 out_param, u32 in_modifier, u8 op_modifier, - u16 op, u16 token, int event) -{ - u32 __iomem *hcr = (u32 __iomem *)(hr_dev->reg_base + ROCEE_MB1_REG); - unsigned long end; - u32 val = 0; - __le32 tmp; - - end = msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS) + jiffies; - while (hns_roce_v1_cmd_pending(hr_dev)) { - if (time_after(jiffies, end)) { - dev_err(hr_dev->dev, "jiffies=%d end=%d\n", - (int)jiffies, (int)end); - return -EAGAIN; - } - cond_resched(); - } - - tmp = cpu_to_le32(val); - roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_CMD_M, ROCEE_MB6_ROCEE_MB_CMD_S, - op); - roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_CMD_MDF_M, - ROCEE_MB6_ROCEE_MB_CMD_MDF_S, op_modifier); - roce_set_bit(tmp, ROCEE_MB6_ROCEE_MB_EVENT_S, event); - roce_set_bit(tmp, ROCEE_MB6_ROCEE_MB_HW_RUN_S, 1); - roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_TOKEN_M, - ROCEE_MB6_ROCEE_MB_TOKEN_S, token); - - val = le32_to_cpu(tmp); - writeq(in_param, hcr + 0); - writeq(out_param, hcr + 2); - writel(in_modifier, hcr + 4); - /* Memory barrier */ - wmb(); - - writel(val, hcr + 5); - - return 0; -} - -static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev, - unsigned int timeout) -{ - u8 __iomem *hcr = hr_dev->reg_base + ROCEE_MB1_REG; - unsigned long end; - u32 status = 0; - - end = msecs_to_jiffies(timeout) + jiffies; - while (hns_roce_v1_cmd_pending(hr_dev) && time_before(jiffies, end)) - cond_resched(); - - if (hns_roce_v1_cmd_pending(hr_dev)) { - dev_err(hr_dev->dev, "[cmd_poll]hw run cmd TIMEDOUT!\n"); - return -ETIMEDOUT; - } - - status = le32_to_cpu((__force __le32) - __raw_readl(hcr + HCR_STATUS_OFFSET)); - if ((status & STATUS_MASK) != 0x1) { - dev_err(hr_dev->dev, "mailbox status 0x%x!\n", status); - return -EBUSY; - } - - return 0; -} - -static int hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u32 port, - int gid_index, const union ib_gid *gid, - const struct ib_gid_attr *attr) -{ - unsigned long flags; - u32 *p = NULL; - u8 gid_idx; - - gid_idx = hns_get_gid_index(hr_dev, port, gid_index); - - spin_lock_irqsave(&hr_dev->iboe.lock, flags); - - p = (u32 *)&gid->raw[0]; - roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_L_0_REG + - (HNS_ROCE_V1_GID_NUM * gid_idx)); - - p = (u32 *)&gid->raw[4]; - roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_ML_0_REG + - (HNS_ROCE_V1_GID_NUM * gid_idx)); - - p = (u32 *)&gid->raw[8]; - roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_MH_0_REG + - (HNS_ROCE_V1_GID_NUM * gid_idx)); - - p = (u32 *)&gid->raw[0xc]; - roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_H_0_REG + - (HNS_ROCE_V1_GID_NUM * gid_idx)); - - spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); - - return 0; -} - -static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, - const u8 *addr) -{ - u32 reg_smac_l; - u16 reg_smac_h; - __le32 tmp; - u16 *p_h; - u32 *p; - u32 val; - - /* - * When mac changed, loopback may fail - * because of smac not equal to dmac. - * We Need to release and create reserved qp again. - */ - if (hr_dev->hw->dereg_mr) { - int ret; - - ret = hns_roce_v1_recreate_lp_qp(hr_dev); - if (ret && ret != -ETIMEDOUT) - return ret; - } - - p = (u32 *)(&addr[0]); - reg_smac_l = *p; - roce_raw_write(reg_smac_l, hr_dev->reg_base + ROCEE_SMAC_L_0_REG + - PHY_PORT_OFFSET * phy_port); - - val = roce_read(hr_dev, - ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET); - tmp = cpu_to_le32(val); - p_h = (u16 *)(&addr[4]); - reg_smac_h = *p_h; - roce_set_field(tmp, ROCEE_SMAC_H_ROCEE_SMAC_H_M, - ROCEE_SMAC_H_ROCEE_SMAC_H_S, reg_smac_h); - val = le32_to_cpu(tmp); - roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET, - val); - - return 0; -} - -static void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port, - enum ib_mtu mtu) -{ - __le32 tmp; - u32 val; - - val = roce_read(hr_dev, - ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET); - tmp = cpu_to_le32(val); - roce_set_field(tmp, ROCEE_SMAC_H_ROCEE_PORT_MTU_M, - ROCEE_SMAC_H_ROCEE_PORT_MTU_S, mtu); - val = le32_to_cpu(tmp); - roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET, - val); -} - -static int hns_roce_v1_write_mtpt(struct hns_roce_dev *hr_dev, void *mb_buf, - struct hns_roce_mr *mr, - unsigned long mtpt_idx) -{ - u64 pages[HNS_ROCE_MAX_INNER_MTPT_NUM] = { 0 }; - struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_v1_mpt_entry *mpt_entry; - dma_addr_t pbl_ba; - int count; - int i; - - /* MPT filled into mailbox buf */ - mpt_entry = (struct hns_roce_v1_mpt_entry *)mb_buf; - memset(mpt_entry, 0, sizeof(*mpt_entry)); - - roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_KEY_STATE_M, - MPT_BYTE_4_KEY_STATE_S, KEY_VALID); - roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_KEY_M, - MPT_BYTE_4_KEY_S, mr->key); - roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_PAGE_SIZE_M, - MPT_BYTE_4_PAGE_SIZE_S, MR_SIZE_4K); - roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_MW_TYPE_S, 0); - roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_MW_BIND_ENABLE_S, - (mr->access & IB_ACCESS_MW_BIND ? 1 : 0)); - roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_OWN_S, 0); - roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_MEMORY_LOCATION_TYPE_M, - MPT_BYTE_4_MEMORY_LOCATION_TYPE_S, mr->type); - roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_ATOMIC_S, 0); - roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_LOCAL_WRITE_S, - (mr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0)); - roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_WRITE_S, - (mr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0)); - roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_READ_S, - (mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0)); - roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_INVAL_ENABLE_S, - 0); - roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_ADDRESS_TYPE_S, 0); - - roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M, - MPT_BYTE_12_PBL_ADDR_H_S, 0); - roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_MW_BIND_COUNTER_M, - MPT_BYTE_12_MW_BIND_COUNTER_S, 0); - - mpt_entry->virt_addr_l = cpu_to_le32((u32)mr->iova); - mpt_entry->virt_addr_h = cpu_to_le32((u32)(mr->iova >> 32)); - mpt_entry->length = cpu_to_le32((u32)mr->size); - - roce_set_field(mpt_entry->mpt_byte_28, MPT_BYTE_28_PD_M, - MPT_BYTE_28_PD_S, mr->pd); - roce_set_field(mpt_entry->mpt_byte_28, MPT_BYTE_28_L_KEY_IDX_L_M, - MPT_BYTE_28_L_KEY_IDX_L_S, mtpt_idx); - roce_set_field(mpt_entry->mpt_byte_64, MPT_BYTE_64_L_KEY_IDX_H_M, - MPT_BYTE_64_L_KEY_IDX_H_S, mtpt_idx >> MTPT_IDX_SHIFT); - - /* DMA memory register */ - if (mr->type == MR_TYPE_DMA) - return 0; - - count = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages, - ARRAY_SIZE(pages), &pbl_ba); - if (count < 1) { - ibdev_err(ibdev, "failed to find PBL mtr, count = %d.", count); - return -ENOBUFS; - } - - /* Register user mr */ - for (i = 0; i < count; i++) { - switch (i) { - case 0: - mpt_entry->pa0_l = cpu_to_le32((u32)(pages[i])); - roce_set_field(mpt_entry->mpt_byte_36, - MPT_BYTE_36_PA0_H_M, - MPT_BYTE_36_PA0_H_S, - (u32)(pages[i] >> PAGES_SHIFT_32)); - break; - case 1: - roce_set_field(mpt_entry->mpt_byte_36, - MPT_BYTE_36_PA1_L_M, - MPT_BYTE_36_PA1_L_S, (u32)(pages[i])); - roce_set_field(mpt_entry->mpt_byte_40, - MPT_BYTE_40_PA1_H_M, - MPT_BYTE_40_PA1_H_S, - (u32)(pages[i] >> PAGES_SHIFT_24)); - break; - case 2: - roce_set_field(mpt_entry->mpt_byte_40, - MPT_BYTE_40_PA2_L_M, - MPT_BYTE_40_PA2_L_S, (u32)(pages[i])); - roce_set_field(mpt_entry->mpt_byte_44, - MPT_BYTE_44_PA2_H_M, - MPT_BYTE_44_PA2_H_S, - (u32)(pages[i] >> PAGES_SHIFT_16)); - break; - case 3: - roce_set_field(mpt_entry->mpt_byte_44, - MPT_BYTE_44_PA3_L_M, - MPT_BYTE_44_PA3_L_S, (u32)(pages[i])); - roce_set_field(mpt_entry->mpt_byte_48, - MPT_BYTE_48_PA3_H_M, - MPT_BYTE_48_PA3_H_S, - (u32)(pages[i] >> PAGES_SHIFT_8)); - break; - case 4: - mpt_entry->pa4_l = cpu_to_le32((u32)(pages[i])); - roce_set_field(mpt_entry->mpt_byte_56, - MPT_BYTE_56_PA4_H_M, - MPT_BYTE_56_PA4_H_S, - (u32)(pages[i] >> PAGES_SHIFT_32)); - break; - case 5: - roce_set_field(mpt_entry->mpt_byte_56, - MPT_BYTE_56_PA5_L_M, - MPT_BYTE_56_PA5_L_S, (u32)(pages[i])); - roce_set_field(mpt_entry->mpt_byte_60, - MPT_BYTE_60_PA5_H_M, - MPT_BYTE_60_PA5_H_S, - (u32)(pages[i] >> PAGES_SHIFT_24)); - break; - case 6: - roce_set_field(mpt_entry->mpt_byte_60, - MPT_BYTE_60_PA6_L_M, - MPT_BYTE_60_PA6_L_S, (u32)(pages[i])); - roce_set_field(mpt_entry->mpt_byte_64, - MPT_BYTE_64_PA6_H_M, - MPT_BYTE_64_PA6_H_S, - (u32)(pages[i] >> PAGES_SHIFT_16)); - break; - default: - break; - } - } - - mpt_entry->pbl_addr_l = cpu_to_le32(pbl_ba); - roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M, - MPT_BYTE_12_PBL_ADDR_H_S, upper_32_bits(pbl_ba)); - - return 0; -} - -static void *get_cqe(struct hns_roce_cq *hr_cq, int n) -{ - return hns_roce_buf_offset(hr_cq->mtr.kmem, n * HNS_ROCE_V1_CQE_SIZE); -} - -static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n) -{ - struct hns_roce_cqe *hr_cqe = get_cqe(hr_cq, n & hr_cq->ib_cq.cqe); - - /* Get cqe when Owner bit is Conversely with the MSB of cons_idx */ - return (roce_get_bit(hr_cqe->cqe_byte_4, CQE_BYTE_4_OWNER_S) ^ - !!(n & hr_cq->cq_depth)) ? hr_cqe : NULL; -} - -static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *hr_cq) -{ - return get_sw_cqe(hr_cq, hr_cq->cons_index); -} - -static void hns_roce_v1_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index) -{ - __le32 doorbell[2]; - - doorbell[0] = cpu_to_le32(cons_index & ((hr_cq->cq_depth << 1) - 1)); - doorbell[1] = 0; - roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1); - roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M, - ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3); - roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_M, - ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S, 0); - roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_M, - ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S, hr_cq->cqn); - - hns_roce_write64_k(doorbell, hr_cq->db_reg); -} - -static void __hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, - struct hns_roce_srq *srq) -{ - struct hns_roce_cqe *cqe, *dest; - u32 prod_index; - int nfreed = 0; - u8 owner_bit; - - for (prod_index = hr_cq->cons_index; get_sw_cqe(hr_cq, prod_index); - ++prod_index) { - if (prod_index == hr_cq->cons_index + hr_cq->ib_cq.cqe) - break; - } - - /* - * Now backwards through the CQ, removing CQ entries - * that match our QP by overwriting them with next entries. - */ - while ((int) --prod_index - (int) hr_cq->cons_index >= 0) { - cqe = get_cqe(hr_cq, prod_index & hr_cq->ib_cq.cqe); - if ((roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M, - CQE_BYTE_16_LOCAL_QPN_S) & - HNS_ROCE_CQE_QPN_MASK) == qpn) { - /* In v1 engine, not support SRQ */ - ++nfreed; - } else if (nfreed) { - dest = get_cqe(hr_cq, (prod_index + nfreed) & - hr_cq->ib_cq.cqe); - owner_bit = roce_get_bit(dest->cqe_byte_4, - CQE_BYTE_4_OWNER_S); - memcpy(dest, cqe, sizeof(*cqe)); - roce_set_bit(dest->cqe_byte_4, CQE_BYTE_4_OWNER_S, - owner_bit); - } - } - - if (nfreed) { - hr_cq->cons_index += nfreed; - hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index); - } -} - -static void hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, - struct hns_roce_srq *srq) -{ - spin_lock_irq(&hr_cq->lock); - __hns_roce_v1_cq_clean(hr_cq, qpn, srq); - spin_unlock_irq(&hr_cq->lock); -} - -static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq, void *mb_buf, - u64 *mtts, dma_addr_t dma_handle) -{ - struct hns_roce_v1_priv *priv = hr_dev->priv; - struct hns_roce_buf_list *tptr_buf = &priv->tptr_table.tptr_buf; - struct hns_roce_cq_context *cq_context = mb_buf; - dma_addr_t tptr_dma_addr; - int offset; - - memset(cq_context, 0, sizeof(*cq_context)); - - /* Get the tptr for this CQ. */ - offset = hr_cq->cqn * HNS_ROCE_V1_TPTR_ENTRY_SIZE; - tptr_dma_addr = tptr_buf->map + offset; - hr_cq->tptr_addr = (u16 *)(tptr_buf->buf + offset); - - /* Register cq_context members */ - roce_set_field(cq_context->cqc_byte_4, - CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_M, - CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S, CQ_STATE_VALID); - roce_set_field(cq_context->cqc_byte_4, CQ_CONTEXT_CQC_BYTE_4_CQN_M, - CQ_CONTEXT_CQC_BYTE_4_CQN_S, hr_cq->cqn); - - cq_context->cq_bt_l = cpu_to_le32((u32)dma_handle); - - roce_set_field(cq_context->cqc_byte_12, - CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_M, - CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_S, - ((u64)dma_handle >> 32)); - roce_set_field(cq_context->cqc_byte_12, - CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_M, - CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S, - ilog2(hr_cq->cq_depth)); - roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CEQN_M, - CQ_CONTEXT_CQC_BYTE_12_CEQN_S, hr_cq->vector); - - cq_context->cur_cqe_ba0_l = cpu_to_le32((u32)(mtts[0])); - - roce_set_field(cq_context->cqc_byte_20, - CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_M, - CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S, (mtts[0]) >> 32); - /* Dedicated hardware, directly set 0 */ - roce_set_field(cq_context->cqc_byte_20, - CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_M, - CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_S, 0); - /** - * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of - * using 4K page, and shift more 32 because of - * calculating the high 32 bit value evaluated to hardware. - */ - roce_set_field(cq_context->cqc_byte_20, - CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M, - CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S, - tptr_dma_addr >> 44); - - cq_context->cqe_tptr_addr_l = cpu_to_le32((u32)(tptr_dma_addr >> 12)); - - roce_set_field(cq_context->cqc_byte_32, - CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M, - CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_S, 0); - roce_set_bit(cq_context->cqc_byte_32, - CQ_CONTEXT_CQC_BYTE_32_SE_FLAG_S, 0); - roce_set_bit(cq_context->cqc_byte_32, - CQ_CONTEXT_CQC_BYTE_32_CE_FLAG_S, 0); - roce_set_bit(cq_context->cqc_byte_32, - CQ_CONTEXT_CQC_BYTE_32_NOTIFICATION_FLAG_S, 0); - roce_set_bit(cq_context->cqc_byte_32, - CQ_CQNTEXT_CQC_BYTE_32_TYPE_OF_COMPLETION_NOTIFICATION_S, - 0); - /* The initial value of cq's ci is 0 */ - roce_set_field(cq_context->cqc_byte_32, - CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M, - CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0); -} - -static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, - enum ib_cq_notify_flags flags) -{ - struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); - u32 notification_flag; - __le32 doorbell[2] = {}; - - notification_flag = (flags & IB_CQ_SOLICITED_MASK) == - IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL; - /* - * flags = 0; Notification Flag = 1, next - * flags = 1; Notification Flag = 0, solocited - */ - doorbell[0] = - cpu_to_le32(hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1)); - roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1); - roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M, - ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3); - roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_M, - ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S, 1); - roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_M, - ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S, - hr_cq->cqn | notification_flag); - - hns_roce_write64_k(doorbell, hr_cq->db_reg); - - return 0; -} - -static int hns_roce_v1_poll_one(struct hns_roce_cq *hr_cq, - struct hns_roce_qp **cur_qp, struct ib_wc *wc) -{ - int qpn; - int is_send; - u16 wqe_ctr; - u32 status; - u32 opcode; - struct hns_roce_cqe *cqe; - struct hns_roce_qp *hr_qp; - struct hns_roce_wq *wq; - struct hns_roce_wqe_ctrl_seg *sq_wqe; - struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device); - struct device *dev = &hr_dev->pdev->dev; - - /* Find cqe according consumer index */ - cqe = next_cqe_sw(hr_cq); - if (!cqe) - return -EAGAIN; - - ++hr_cq->cons_index; - /* Memory barrier */ - rmb(); - /* 0->SQ, 1->RQ */ - is_send = !(roce_get_bit(cqe->cqe_byte_4, CQE_BYTE_4_SQ_RQ_FLAG_S)); - - /* Local_qpn in UD cqe is always 1, so it needs to compute new qpn */ - if (roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M, - CQE_BYTE_16_LOCAL_QPN_S) <= 1) { - qpn = roce_get_field(cqe->cqe_byte_20, CQE_BYTE_20_PORT_NUM_M, - CQE_BYTE_20_PORT_NUM_S) + - roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M, - CQE_BYTE_16_LOCAL_QPN_S) * - HNS_ROCE_MAX_PORTS; - } else { - qpn = roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M, - CQE_BYTE_16_LOCAL_QPN_S); - } - - if (!*cur_qp || (qpn & HNS_ROCE_CQE_QPN_MASK) != (*cur_qp)->qpn) { - hr_qp = __hns_roce_qp_lookup(hr_dev, qpn); - if (unlikely(!hr_qp)) { - dev_err(dev, "CQ %06lx with entry for unknown QPN %06x\n", - hr_cq->cqn, (qpn & HNS_ROCE_CQE_QPN_MASK)); - return -EINVAL; - } - - *cur_qp = hr_qp; - } - - wc->qp = &(*cur_qp)->ibqp; - wc->vendor_err = 0; - - status = roce_get_field(cqe->cqe_byte_4, - CQE_BYTE_4_STATUS_OF_THE_OPERATION_M, - CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) & - HNS_ROCE_CQE_STATUS_MASK; - switch (status) { - case HNS_ROCE_CQE_SUCCESS: - wc->status = IB_WC_SUCCESS; - break; - case HNS_ROCE_CQE_SYNDROME_LOCAL_LENGTH_ERR: - wc->status = IB_WC_LOC_LEN_ERR; - break; - case HNS_ROCE_CQE_SYNDROME_LOCAL_QP_OP_ERR: - wc->status = IB_WC_LOC_QP_OP_ERR; - break; - case HNS_ROCE_CQE_SYNDROME_LOCAL_PROT_ERR: - wc->status = IB_WC_LOC_PROT_ERR; - break; - case HNS_ROCE_CQE_SYNDROME_WR_FLUSH_ERR: - wc->status = IB_WC_WR_FLUSH_ERR; - break; - case HNS_ROCE_CQE_SYNDROME_MEM_MANAGE_OPERATE_ERR: - wc->status = IB_WC_MW_BIND_ERR; - break; - case HNS_ROCE_CQE_SYNDROME_BAD_RESP_ERR: - wc->status = IB_WC_BAD_RESP_ERR; - break; - case HNS_ROCE_CQE_SYNDROME_LOCAL_ACCESS_ERR: - wc->status = IB_WC_LOC_ACCESS_ERR; - break; - case HNS_ROCE_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR: - wc->status = IB_WC_REM_INV_REQ_ERR; - break; - case HNS_ROCE_CQE_SYNDROME_REMOTE_ACCESS_ERR: - wc->status = IB_WC_REM_ACCESS_ERR; - break; - case HNS_ROCE_CQE_SYNDROME_REMOTE_OP_ERR: - wc->status = IB_WC_REM_OP_ERR; - break; - case HNS_ROCE_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: - wc->status = IB_WC_RETRY_EXC_ERR; - break; - case HNS_ROCE_CQE_SYNDROME_RNR_RETRY_EXC_ERR: - wc->status = IB_WC_RNR_RETRY_EXC_ERR; - break; - default: - wc->status = IB_WC_GENERAL_ERR; - break; - } - - /* CQE status error, directly return */ - if (wc->status != IB_WC_SUCCESS) - return 0; - - if (is_send) { - /* SQ conrespond to CQE */ - sq_wqe = hns_roce_get_send_wqe(*cur_qp, - roce_get_field(cqe->cqe_byte_4, - CQE_BYTE_4_WQE_INDEX_M, - CQE_BYTE_4_WQE_INDEX_S) & - ((*cur_qp)->sq.wqe_cnt-1)); - switch (le32_to_cpu(sq_wqe->flag) & HNS_ROCE_WQE_OPCODE_MASK) { - case HNS_ROCE_WQE_OPCODE_SEND: - wc->opcode = IB_WC_SEND; - break; - case HNS_ROCE_WQE_OPCODE_RDMA_READ: - wc->opcode = IB_WC_RDMA_READ; - wc->byte_len = le32_to_cpu(cqe->byte_cnt); - break; - case HNS_ROCE_WQE_OPCODE_RDMA_WRITE: - wc->opcode = IB_WC_RDMA_WRITE; - break; - case HNS_ROCE_WQE_OPCODE_LOCAL_INV: - wc->opcode = IB_WC_LOCAL_INV; - break; - case HNS_ROCE_WQE_OPCODE_UD_SEND: - wc->opcode = IB_WC_SEND; - break; - default: - wc->status = IB_WC_GENERAL_ERR; - break; - } - wc->wc_flags = (le32_to_cpu(sq_wqe->flag) & HNS_ROCE_WQE_IMM ? - IB_WC_WITH_IMM : 0); - - wq = &(*cur_qp)->sq; - if ((*cur_qp)->sq_signal_bits) { - /* - * If sg_signal_bit is 1, - * firstly tail pointer updated to wqe - * which current cqe correspond to - */ - wqe_ctr = (u16)roce_get_field(cqe->cqe_byte_4, - CQE_BYTE_4_WQE_INDEX_M, - CQE_BYTE_4_WQE_INDEX_S); - wq->tail += (wqe_ctr - (u16)wq->tail) & - (wq->wqe_cnt - 1); - } - wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; - ++wq->tail; - } else { - /* RQ conrespond to CQE */ - wc->byte_len = le32_to_cpu(cqe->byte_cnt); - opcode = roce_get_field(cqe->cqe_byte_4, - CQE_BYTE_4_OPERATION_TYPE_M, - CQE_BYTE_4_OPERATION_TYPE_S) & - HNS_ROCE_CQE_OPCODE_MASK; - switch (opcode) { - case HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE: - wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; - wc->wc_flags = IB_WC_WITH_IMM; - wc->ex.imm_data = - cpu_to_be32(le32_to_cpu(cqe->immediate_data)); - break; - case HNS_ROCE_OPCODE_SEND_DATA_RECEIVE: - if (roce_get_bit(cqe->cqe_byte_4, - CQE_BYTE_4_IMM_INDICATOR_S)) { - wc->opcode = IB_WC_RECV; - wc->wc_flags = IB_WC_WITH_IMM; - wc->ex.imm_data = cpu_to_be32( - le32_to_cpu(cqe->immediate_data)); - } else { - wc->opcode = IB_WC_RECV; - wc->wc_flags = 0; - } - break; - default: - wc->status = IB_WC_GENERAL_ERR; - break; - } - - /* Update tail pointer, record wr_id */ - wq = &(*cur_qp)->rq; - wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; - ++wq->tail; - wc->sl = (u8)roce_get_field(cqe->cqe_byte_20, CQE_BYTE_20_SL_M, - CQE_BYTE_20_SL_S); - wc->src_qp = (u8)roce_get_field(cqe->cqe_byte_20, - CQE_BYTE_20_REMOTE_QPN_M, - CQE_BYTE_20_REMOTE_QPN_S); - wc->wc_flags |= (roce_get_bit(cqe->cqe_byte_20, - CQE_BYTE_20_GRH_PRESENT_S) ? - IB_WC_GRH : 0); - wc->pkey_index = (u16)roce_get_field(cqe->cqe_byte_28, - CQE_BYTE_28_P_KEY_IDX_M, - CQE_BYTE_28_P_KEY_IDX_S); - } - - return 0; -} - -int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) -{ - struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); - struct hns_roce_qp *cur_qp = NULL; - unsigned long flags; - int npolled; - int ret; - - spin_lock_irqsave(&hr_cq->lock, flags); - - for (npolled = 0; npolled < num_entries; ++npolled) { - ret = hns_roce_v1_poll_one(hr_cq, &cur_qp, wc + npolled); - if (ret) - break; - } - - if (npolled) { - *hr_cq->tptr_addr = hr_cq->cons_index & - ((hr_cq->cq_depth << 1) - 1); - - hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index); - } - - spin_unlock_irqrestore(&hr_cq->lock, flags); - - if (ret == 0 || ret == -EAGAIN) - return npolled; - else - return ret; -} - -static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_table *table, int obj, - int step_idx) -{ - struct hns_roce_v1_priv *priv = hr_dev->priv; - struct device *dev = &hr_dev->pdev->dev; - long end = HW_SYNC_TIMEOUT_MSECS; - __le32 bt_cmd_val[2] = {0}; - unsigned long flags = 0; - void __iomem *bt_cmd; - u64 bt_ba = 0; - - switch (table->type) { - case HEM_TYPE_QPC: - bt_ba = priv->bt_table.qpc_buf.map >> 12; - break; - case HEM_TYPE_MTPT: - bt_ba = priv->bt_table.mtpt_buf.map >> 12; - break; - case HEM_TYPE_CQC: - bt_ba = priv->bt_table.cqc_buf.map >> 12; - break; - case HEM_TYPE_SRQC: - dev_dbg(dev, "HEM_TYPE_SRQC not support.\n"); - return -EINVAL; - default: - return 0; - } - roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, - ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, table->type); - roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M, - ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj); - roce_set_bit(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0); - roce_set_bit(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1); - - spin_lock_irqsave(&hr_dev->bt_cmd_lock, flags); - - bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG; - - while (1) { - if (readl(bt_cmd) >> BT_CMD_SYNC_SHIFT) { - if (!end) { - dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n"); - spin_unlock_irqrestore(&hr_dev->bt_cmd_lock, - flags); - return -EBUSY; - } - } else { - break; - } - mdelay(HW_SYNC_SLEEP_TIME_INTERVAL); - end -= HW_SYNC_SLEEP_TIME_INTERVAL; - } - - bt_cmd_val[0] = cpu_to_le32(bt_ba); - roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M, - ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, bt_ba >> 32); - hns_roce_write64_k(bt_cmd_val, hr_dev->reg_base + ROCEE_BT_CMD_L_REG); - - spin_unlock_irqrestore(&hr_dev->bt_cmd_lock, flags); - - return 0; -} - -static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev, - enum hns_roce_qp_state cur_state, - enum hns_roce_qp_state new_state, - struct hns_roce_qp_context *context, - struct hns_roce_qp *hr_qp) -{ - static const u16 - op[HNS_ROCE_QP_NUM_STATE][HNS_ROCE_QP_NUM_STATE] = { - [HNS_ROCE_QP_STATE_RST] = { - [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP, - [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP, - [HNS_ROCE_QP_STATE_INIT] = HNS_ROCE_CMD_RST2INIT_QP, - }, - [HNS_ROCE_QP_STATE_INIT] = { - [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP, - [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP, - /* Note: In v1 engine, HW doesn't support RST2INIT. - * We use RST2INIT cmd instead of INIT2INIT. - */ - [HNS_ROCE_QP_STATE_INIT] = HNS_ROCE_CMD_RST2INIT_QP, - [HNS_ROCE_QP_STATE_RTR] = HNS_ROCE_CMD_INIT2RTR_QP, - }, - [HNS_ROCE_QP_STATE_RTR] = { - [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP, - [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP, - [HNS_ROCE_QP_STATE_RTS] = HNS_ROCE_CMD_RTR2RTS_QP, - }, - [HNS_ROCE_QP_STATE_RTS] = { - [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP, - [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP, - [HNS_ROCE_QP_STATE_RTS] = HNS_ROCE_CMD_RTS2RTS_QP, - [HNS_ROCE_QP_STATE_SQD] = HNS_ROCE_CMD_RTS2SQD_QP, - }, - [HNS_ROCE_QP_STATE_SQD] = { - [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP, - [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP, - [HNS_ROCE_QP_STATE_RTS] = HNS_ROCE_CMD_SQD2RTS_QP, - [HNS_ROCE_QP_STATE_SQD] = HNS_ROCE_CMD_SQD2SQD_QP, - }, - [HNS_ROCE_QP_STATE_ERR] = { - [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP, - [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP, - } - }; - - struct hns_roce_cmd_mailbox *mailbox; - struct device *dev = &hr_dev->pdev->dev; - int ret; - - if (cur_state >= HNS_ROCE_QP_NUM_STATE || - new_state >= HNS_ROCE_QP_NUM_STATE || - !op[cur_state][new_state]) { - dev_err(dev, "[modify_qp]not support state %d to %d\n", - cur_state, new_state); - return -EINVAL; - } - - if (op[cur_state][new_state] == HNS_ROCE_CMD_2RST_QP) - return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2, - HNS_ROCE_CMD_2RST_QP, - HNS_ROCE_CMD_TIMEOUT_MSECS); - - if (op[cur_state][new_state] == HNS_ROCE_CMD_2ERR_QP) - return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2, - HNS_ROCE_CMD_2ERR_QP, - HNS_ROCE_CMD_TIMEOUT_MSECS); - - mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); - - memcpy(mailbox->buf, context, sizeof(*context)); - - ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0, - op[cur_state][new_state], - HNS_ROCE_CMD_TIMEOUT_MSECS); - - hns_roce_free_cmd_mailbox(hr_dev, mailbox); - return ret; -} - -static int find_wqe_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, - u64 *sq_ba, u64 *rq_ba, dma_addr_t *bt_ba) -{ - struct ib_device *ibdev = &hr_dev->ib_dev; - int count; - - count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, sq_ba, 1, bt_ba); - if (count < 1) { - ibdev_err(ibdev, "Failed to find SQ ba\n"); - return -ENOBUFS; - } - - count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, rq_ba, - 1, NULL); - if (!count) { - ibdev_err(ibdev, "Failed to find RQ ba\n"); - return -ENOBUFS; - } - - return 0; -} - -static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, - int attr_mask, enum ib_qp_state cur_state, - enum ib_qp_state new_state) -{ - struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); - struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - struct hns_roce_sqp_context *context; - dma_addr_t dma_handle = 0; - u32 __iomem *addr; - u64 sq_ba = 0; - u64 rq_ba = 0; - __le32 tmp; - u32 reg_val; - - context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return -ENOMEM; - - /* Search QP buf's MTTs */ - if (find_wqe_mtt(hr_dev, hr_qp, &sq_ba, &rq_ba, &dma_handle)) - goto out; - - if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { - roce_set_field(context->qp1c_bytes_4, - QP1C_BYTES_4_SQ_WQE_SHIFT_M, - QP1C_BYTES_4_SQ_WQE_SHIFT_S, - ilog2((unsigned int)hr_qp->sq.wqe_cnt)); - roce_set_field(context->qp1c_bytes_4, - QP1C_BYTES_4_RQ_WQE_SHIFT_M, - QP1C_BYTES_4_RQ_WQE_SHIFT_S, - ilog2((unsigned int)hr_qp->rq.wqe_cnt)); - roce_set_field(context->qp1c_bytes_4, QP1C_BYTES_4_PD_M, - QP1C_BYTES_4_PD_S, to_hr_pd(ibqp->pd)->pdn); - - context->sq_rq_bt_l = cpu_to_le32(dma_handle); - roce_set_field(context->qp1c_bytes_12, - QP1C_BYTES_12_SQ_RQ_BT_H_M, - QP1C_BYTES_12_SQ_RQ_BT_H_S, - upper_32_bits(dma_handle)); - - roce_set_field(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_HEAD_M, - QP1C_BYTES_16_RQ_HEAD_S, hr_qp->rq.head); - roce_set_field(context->qp1c_bytes_16, QP1C_BYTES_16_PORT_NUM_M, - QP1C_BYTES_16_PORT_NUM_S, hr_qp->phy_port); - roce_set_bit(context->qp1c_bytes_16, - QP1C_BYTES_16_SIGNALING_TYPE_S, - hr_qp->sq_signal_bits); - roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_BA_FLG_S, - 1); - roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SQ_BA_FLG_S, - 1); - roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_QP1_ERR_S, - 0); - - roce_set_field(context->qp1c_bytes_20, QP1C_BYTES_20_SQ_HEAD_M, - QP1C_BYTES_20_SQ_HEAD_S, hr_qp->sq.head); - roce_set_field(context->qp1c_bytes_20, QP1C_BYTES_20_PKEY_IDX_M, - QP1C_BYTES_20_PKEY_IDX_S, attr->pkey_index); - - context->cur_rq_wqe_ba_l = cpu_to_le32(rq_ba); - - roce_set_field(context->qp1c_bytes_28, - QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M, - QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S, - upper_32_bits(rq_ba)); - roce_set_field(context->qp1c_bytes_28, - QP1C_BYTES_28_RQ_CUR_IDX_M, - QP1C_BYTES_28_RQ_CUR_IDX_S, 0); - - roce_set_field(context->qp1c_bytes_32, - QP1C_BYTES_32_RX_CQ_NUM_M, - QP1C_BYTES_32_RX_CQ_NUM_S, - to_hr_cq(ibqp->recv_cq)->cqn); - roce_set_field(context->qp1c_bytes_32, - QP1C_BYTES_32_TX_CQ_NUM_M, - QP1C_BYTES_32_TX_CQ_NUM_S, - to_hr_cq(ibqp->send_cq)->cqn); - - context->cur_sq_wqe_ba_l = cpu_to_le32(sq_ba); - - roce_set_field(context->qp1c_bytes_40, - QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M, - QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S, - upper_32_bits(sq_ba)); - roce_set_field(context->qp1c_bytes_40, - QP1C_BYTES_40_SQ_CUR_IDX_M, - QP1C_BYTES_40_SQ_CUR_IDX_S, 0); - - /* Copy context to QP1C register */ - addr = (u32 __iomem *)(hr_dev->reg_base + - ROCEE_QP1C_CFG0_0_REG + - hr_qp->phy_port * sizeof(*context)); - - writel(le32_to_cpu(context->qp1c_bytes_4), addr); - writel(le32_to_cpu(context->sq_rq_bt_l), addr + 1); - writel(le32_to_cpu(context->qp1c_bytes_12), addr + 2); - writel(le32_to_cpu(context->qp1c_bytes_16), addr + 3); - writel(le32_to_cpu(context->qp1c_bytes_20), addr + 4); - writel(le32_to_cpu(context->cur_rq_wqe_ba_l), addr + 5); - writel(le32_to_cpu(context->qp1c_bytes_28), addr + 6); - writel(le32_to_cpu(context->qp1c_bytes_32), addr + 7); - writel(le32_to_cpu(context->cur_sq_wqe_ba_l), addr + 8); - writel(le32_to_cpu(context->qp1c_bytes_40), addr + 9); - } - - /* Modify QP1C status */ - reg_val = roce_read(hr_dev, ROCEE_QP1C_CFG0_0_REG + - hr_qp->phy_port * sizeof(*context)); - tmp = cpu_to_le32(reg_val); - roce_set_field(tmp, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M, - ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S, new_state); - reg_val = le32_to_cpu(tmp); - roce_write(hr_dev, ROCEE_QP1C_CFG0_0_REG + - hr_qp->phy_port * sizeof(*context), reg_val); - - hr_qp->state = new_state; - if (new_state == IB_QPS_RESET) { - hns_roce_v1_cq_clean(to_hr_cq(ibqp->recv_cq), hr_qp->qpn, - ibqp->srq ? to_hr_srq(ibqp->srq) : NULL); - if (ibqp->send_cq != ibqp->recv_cq) - hns_roce_v1_cq_clean(to_hr_cq(ibqp->send_cq), - hr_qp->qpn, NULL); - - hr_qp->rq.head = 0; - hr_qp->rq.tail = 0; - hr_qp->sq.head = 0; - hr_qp->sq.tail = 0; - } - - kfree(context); - return 0; - -out: - kfree(context); - return -EINVAL; -} - -static bool check_qp_state(enum ib_qp_state cur_state, - enum ib_qp_state new_state) -{ - static const bool sm[][IB_QPS_ERR + 1] = { - [IB_QPS_RESET] = { [IB_QPS_RESET] = true, - [IB_QPS_INIT] = true }, - [IB_QPS_INIT] = { [IB_QPS_RESET] = true, - [IB_QPS_INIT] = true, - [IB_QPS_RTR] = true, - [IB_QPS_ERR] = true }, - [IB_QPS_RTR] = { [IB_QPS_RESET] = true, - [IB_QPS_RTS] = true, - [IB_QPS_ERR] = true }, - [IB_QPS_RTS] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true }, - [IB_QPS_SQD] = {}, - [IB_QPS_SQE] = {}, - [IB_QPS_ERR] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true } - }; - - return sm[cur_state][new_state]; -} - -static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, - int attr_mask, enum ib_qp_state cur_state, - enum ib_qp_state new_state) -{ - struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); - struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_qp_context *context; - const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); - dma_addr_t dma_handle_2 = 0; - dma_addr_t dma_handle = 0; - __le32 doorbell[2] = {0}; - u64 *mtts_2 = NULL; - int ret = -EINVAL; - const u8 *smac; - u64 sq_ba = 0; - u64 rq_ba = 0; - u32 port; - u32 port_num; - u8 *dmac; - - if (!check_qp_state(cur_state, new_state)) { - ibdev_err(ibqp->device, - "not support QP(%u) status from %d to %d\n", - ibqp->qp_num, cur_state, new_state); - return -EINVAL; - } - - context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return -ENOMEM; - - /* Search qp buf's mtts */ - if (find_wqe_mtt(hr_dev, hr_qp, &sq_ba, &rq_ba, &dma_handle)) - goto out; - - /* Search IRRL's mtts */ - mtts_2 = hns_roce_table_find(hr_dev, &hr_dev->qp_table.irrl_table, - hr_qp->qpn, &dma_handle_2); - if (mtts_2 == NULL) { - dev_err(dev, "qp irrl_table find failed\n"); - goto out; - } - - /* - * Reset to init - * Mandatory param: - * IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS - * Optional param: NA - */ - if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { - roce_set_field(context->qpc_bytes_4, - QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M, - QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S, - to_hr_qp_type(hr_qp->ibqp.qp_type)); - - roce_set_bit(context->qpc_bytes_4, - QP_CONTEXT_QPC_BYTE_4_ENABLE_FPMR_S, 0); - roce_set_bit(context->qpc_bytes_4, - QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S, - !!(attr->qp_access_flags & IB_ACCESS_REMOTE_READ)); - roce_set_bit(context->qpc_bytes_4, - QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S, - !!(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) - ); - roce_set_bit(context->qpc_bytes_4, - QP_CONTEXT_QPC_BYTE_4_ATOMIC_OPERATION_ENABLE_S, - !!(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) - ); - roce_set_bit(context->qpc_bytes_4, - QP_CONTEXT_QPC_BYTE_4_RDMAR_USE_S, 1); - roce_set_field(context->qpc_bytes_4, - QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_M, - QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S, - ilog2((unsigned int)hr_qp->sq.wqe_cnt)); - roce_set_field(context->qpc_bytes_4, - QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_M, - QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S, - ilog2((unsigned int)hr_qp->rq.wqe_cnt)); - roce_set_field(context->qpc_bytes_4, - QP_CONTEXT_QPC_BYTES_4_PD_M, - QP_CONTEXT_QPC_BYTES_4_PD_S, - to_hr_pd(ibqp->pd)->pdn); - hr_qp->access_flags = attr->qp_access_flags; - roce_set_field(context->qpc_bytes_8, - QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_M, - QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S, - to_hr_cq(ibqp->send_cq)->cqn); - roce_set_field(context->qpc_bytes_8, - QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_M, - QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S, - to_hr_cq(ibqp->recv_cq)->cqn); - - if (ibqp->srq) - roce_set_field(context->qpc_bytes_12, - QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_M, - QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S, - to_hr_srq(ibqp->srq)->srqn); - - roce_set_field(context->qpc_bytes_12, - QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M, - QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S, - attr->pkey_index); - hr_qp->pkey_index = attr->pkey_index; - roce_set_field(context->qpc_bytes_16, - QP_CONTEXT_QPC_BYTES_16_QP_NUM_M, - QP_CONTEXT_QPC_BYTES_16_QP_NUM_S, hr_qp->qpn); - } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { - roce_set_field(context->qpc_bytes_4, - QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M, - QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S, - to_hr_qp_type(hr_qp->ibqp.qp_type)); - roce_set_bit(context->qpc_bytes_4, - QP_CONTEXT_QPC_BYTE_4_ENABLE_FPMR_S, 0); - if (attr_mask & IB_QP_ACCESS_FLAGS) { - roce_set_bit(context->qpc_bytes_4, - QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S, - !!(attr->qp_access_flags & - IB_ACCESS_REMOTE_READ)); - roce_set_bit(context->qpc_bytes_4, - QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S, - !!(attr->qp_access_flags & - IB_ACCESS_REMOTE_WRITE)); - } else { - roce_set_bit(context->qpc_bytes_4, - QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S, - !!(hr_qp->access_flags & - IB_ACCESS_REMOTE_READ)); - roce_set_bit(context->qpc_bytes_4, - QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S, - !!(hr_qp->access_flags & - IB_ACCESS_REMOTE_WRITE)); - } - - roce_set_bit(context->qpc_bytes_4, - QP_CONTEXT_QPC_BYTE_4_RDMAR_USE_S, 1); - roce_set_field(context->qpc_bytes_4, - QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_M, - QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S, - ilog2((unsigned int)hr_qp->sq.wqe_cnt)); - roce_set_field(context->qpc_bytes_4, - QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_M, - QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S, - ilog2((unsigned int)hr_qp->rq.wqe_cnt)); - roce_set_field(context->qpc_bytes_4, - QP_CONTEXT_QPC_BYTES_4_PD_M, - QP_CONTEXT_QPC_BYTES_4_PD_S, - to_hr_pd(ibqp->pd)->pdn); - - roce_set_field(context->qpc_bytes_8, - QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_M, - QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S, - to_hr_cq(ibqp->send_cq)->cqn); - roce_set_field(context->qpc_bytes_8, - QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_M, - QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S, - to_hr_cq(ibqp->recv_cq)->cqn); - - if (ibqp->srq) - roce_set_field(context->qpc_bytes_12, - QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_M, - QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S, - to_hr_srq(ibqp->srq)->srqn); - if (attr_mask & IB_QP_PKEY_INDEX) - roce_set_field(context->qpc_bytes_12, - QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M, - QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S, - attr->pkey_index); - else - roce_set_field(context->qpc_bytes_12, - QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M, - QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S, - hr_qp->pkey_index); - - roce_set_field(context->qpc_bytes_16, - QP_CONTEXT_QPC_BYTES_16_QP_NUM_M, - QP_CONTEXT_QPC_BYTES_16_QP_NUM_S, hr_qp->qpn); - } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { - if ((attr_mask & IB_QP_ALT_PATH) || - (attr_mask & IB_QP_ACCESS_FLAGS) || - (attr_mask & IB_QP_PKEY_INDEX) || - (attr_mask & IB_QP_QKEY)) { - dev_err(dev, "INIT2RTR attr_mask error\n"); - goto out; - } - - dmac = (u8 *)attr->ah_attr.roce.dmac; - - context->sq_rq_bt_l = cpu_to_le32(dma_handle); - roce_set_field(context->qpc_bytes_24, - QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M, - QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S, - upper_32_bits(dma_handle)); - roce_set_bit(context->qpc_bytes_24, - QP_CONTEXT_QPC_BYTE_24_REMOTE_ENABLE_E2E_CREDITS_S, - 1); - roce_set_field(context->qpc_bytes_24, - QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M, - QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S, - attr->min_rnr_timer); - context->irrl_ba_l = cpu_to_le32((u32)(dma_handle_2)); - roce_set_field(context->qpc_bytes_32, - QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M, - QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S, - ((u32)(dma_handle_2 >> 32)) & - QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M); - roce_set_field(context->qpc_bytes_32, - QP_CONTEXT_QPC_BYTES_32_MIG_STATE_M, - QP_CONTEXT_QPC_BYTES_32_MIG_STATE_S, 0); - roce_set_bit(context->qpc_bytes_32, - QP_CONTEXT_QPC_BYTE_32_LOCAL_ENABLE_E2E_CREDITS_S, - 1); - roce_set_bit(context->qpc_bytes_32, - QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S, - hr_qp->sq_signal_bits); - - port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) : - hr_qp->port; - smac = (const u8 *)hr_dev->dev_addr[port]; - /* when dmac equals smac or loop_idc is 1, it should loopback */ - if (ether_addr_equal_unaligned(dmac, smac) || - hr_dev->loop_idc == 0x1) - roce_set_bit(context->qpc_bytes_32, - QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1); - - roce_set_bit(context->qpc_bytes_32, - QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S, - rdma_ah_get_ah_flags(&attr->ah_attr)); - roce_set_field(context->qpc_bytes_32, - QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_M, - QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S, - ilog2((unsigned int)attr->max_dest_rd_atomic)); - - if (attr_mask & IB_QP_DEST_QPN) - roce_set_field(context->qpc_bytes_36, - QP_CONTEXT_QPC_BYTES_36_DEST_QP_M, - QP_CONTEXT_QPC_BYTES_36_DEST_QP_S, - attr->dest_qp_num); - - /* Configure GID index */ - port_num = rdma_ah_get_port_num(&attr->ah_attr); - roce_set_field(context->qpc_bytes_36, - QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M, - QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S, - hns_get_gid_index(hr_dev, - port_num - 1, - grh->sgid_index)); - - memcpy(&(context->dmac_l), dmac, 4); - - roce_set_field(context->qpc_bytes_44, - QP_CONTEXT_QPC_BYTES_44_DMAC_H_M, - QP_CONTEXT_QPC_BYTES_44_DMAC_H_S, - *((u16 *)(&dmac[4]))); - roce_set_field(context->qpc_bytes_44, - QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_M, - QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_S, - rdma_ah_get_static_rate(&attr->ah_attr)); - roce_set_field(context->qpc_bytes_44, - QP_CONTEXT_QPC_BYTES_44_HOPLMT_M, - QP_CONTEXT_QPC_BYTES_44_HOPLMT_S, - grh->hop_limit); - - roce_set_field(context->qpc_bytes_48, - QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M, - QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S, - grh->flow_label); - roce_set_field(context->qpc_bytes_48, - QP_CONTEXT_QPC_BYTES_48_TCLASS_M, - QP_CONTEXT_QPC_BYTES_48_TCLASS_S, - grh->traffic_class); - roce_set_field(context->qpc_bytes_48, - QP_CONTEXT_QPC_BYTES_48_MTU_M, - QP_CONTEXT_QPC_BYTES_48_MTU_S, attr->path_mtu); - - memcpy(context->dgid, grh->dgid.raw, - sizeof(grh->dgid.raw)); - - dev_dbg(dev, "dmac:%x :%lx\n", context->dmac_l, - roce_get_field(context->qpc_bytes_44, - QP_CONTEXT_QPC_BYTES_44_DMAC_H_M, - QP_CONTEXT_QPC_BYTES_44_DMAC_H_S)); - - roce_set_field(context->qpc_bytes_68, - QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_M, - QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_S, - hr_qp->rq.head); - roce_set_field(context->qpc_bytes_68, - QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_M, - QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S, 0); - - context->cur_rq_wqe_ba_l = cpu_to_le32(rq_ba); - - roce_set_field(context->qpc_bytes_76, - QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M, - QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S, - upper_32_bits(rq_ba)); - roce_set_field(context->qpc_bytes_76, - QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_M, - QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S, 0); - - context->rx_rnr_time = 0; - - roce_set_field(context->qpc_bytes_84, - QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_M, - QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_S, - attr->rq_psn - 1); - roce_set_field(context->qpc_bytes_84, - QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_M, - QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_S, 0); - - roce_set_field(context->qpc_bytes_88, - QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_M, - QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S, - attr->rq_psn); - roce_set_bit(context->qpc_bytes_88, - QP_CONTEXT_QPC_BYTES_88_RX_REQ_PSN_ERR_FLAG_S, 0); - roce_set_bit(context->qpc_bytes_88, - QP_CONTEXT_QPC_BYTES_88_RX_LAST_OPCODE_FLG_S, 0); - roce_set_field(context->qpc_bytes_88, - QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_M, - QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_S, - 0); - roce_set_field(context->qpc_bytes_88, - QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_M, - QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_S, - 0); - - context->dma_length = 0; - context->r_key = 0; - context->va_l = 0; - context->va_h = 0; - - roce_set_field(context->qpc_bytes_108, - QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_M, - QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_S, 0); - roce_set_bit(context->qpc_bytes_108, - QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_FLG_S, 0); - roce_set_bit(context->qpc_bytes_108, - QP_CONTEXT_QPC_BYTES_108_TRRL_TDB_PSN_FLG_S, 0); - - roce_set_field(context->qpc_bytes_112, - QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_M, - QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_S, 0); - roce_set_field(context->qpc_bytes_112, - QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_M, - QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_S, 0); - - /* For chip resp ack */ - roce_set_field(context->qpc_bytes_156, - QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M, - QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S, - hr_qp->phy_port); - roce_set_field(context->qpc_bytes_156, - QP_CONTEXT_QPC_BYTES_156_SL_M, - QP_CONTEXT_QPC_BYTES_156_SL_S, - rdma_ah_get_sl(&attr->ah_attr)); - hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); - } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) { - /* If exist optional param, return error */ - if ((attr_mask & IB_QP_ALT_PATH) || - (attr_mask & IB_QP_ACCESS_FLAGS) || - (attr_mask & IB_QP_QKEY) || - (attr_mask & IB_QP_PATH_MIG_STATE) || - (attr_mask & IB_QP_CUR_STATE) || - (attr_mask & IB_QP_MIN_RNR_TIMER)) { - dev_err(dev, "RTR2RTS attr_mask error\n"); - goto out; - } - - context->rx_cur_sq_wqe_ba_l = cpu_to_le32(sq_ba); - - roce_set_field(context->qpc_bytes_120, - QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M, - QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S, - upper_32_bits(sq_ba)); - - roce_set_field(context->qpc_bytes_124, - QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_M, - QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_S, 0); - roce_set_field(context->qpc_bytes_124, - QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_M, - QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_S, 0); - - roce_set_field(context->qpc_bytes_128, - QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_M, - QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_S, - attr->sq_psn); - roce_set_bit(context->qpc_bytes_128, - QP_CONTEXT_QPC_BYTES_128_RX_ACK_PSN_ERR_FLG_S, 0); - roce_set_field(context->qpc_bytes_128, - QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_M, - QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_S, - 0); - roce_set_bit(context->qpc_bytes_128, - QP_CONTEXT_QPC_BYTES_128_IRRL_PSN_VLD_FLG_S, 0); - - roce_set_field(context->qpc_bytes_132, - QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_M, - QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_S, 0); - roce_set_field(context->qpc_bytes_132, - QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_M, - QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_S, 0); - - roce_set_field(context->qpc_bytes_136, - QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_M, - QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_S, - attr->sq_psn); - roce_set_field(context->qpc_bytes_136, - QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_M, - QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_S, - attr->sq_psn); - - roce_set_field(context->qpc_bytes_140, - QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_M, - QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_S, - (attr->sq_psn >> SQ_PSN_SHIFT)); - roce_set_field(context->qpc_bytes_140, - QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_M, - QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_S, 0); - roce_set_bit(context->qpc_bytes_140, - QP_CONTEXT_QPC_BYTES_140_RNR_RETRY_FLG_S, 0); - - roce_set_field(context->qpc_bytes_148, - QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_M, - QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_S, 0); - roce_set_field(context->qpc_bytes_148, - QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M, - QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S, - attr->retry_cnt); - roce_set_field(context->qpc_bytes_148, - QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_M, - QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_S, - attr->rnr_retry); - roce_set_field(context->qpc_bytes_148, - QP_CONTEXT_QPC_BYTES_148_LSN_M, - QP_CONTEXT_QPC_BYTES_148_LSN_S, 0x100); - - context->rnr_retry = 0; - - roce_set_field(context->qpc_bytes_156, - QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_M, - QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_S, - attr->retry_cnt); - if (attr->timeout < 0x12) { - dev_info(dev, "ack timeout value(0x%x) must bigger than 0x12.\n", - attr->timeout); - roce_set_field(context->qpc_bytes_156, - QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M, - QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S, - 0x12); - } else { - roce_set_field(context->qpc_bytes_156, - QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M, - QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S, - attr->timeout); - } - roce_set_field(context->qpc_bytes_156, - QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_M, - QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_S, - attr->rnr_retry); - roce_set_field(context->qpc_bytes_156, - QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M, - QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S, - hr_qp->phy_port); - roce_set_field(context->qpc_bytes_156, - QP_CONTEXT_QPC_BYTES_156_SL_M, - QP_CONTEXT_QPC_BYTES_156_SL_S, - rdma_ah_get_sl(&attr->ah_attr)); - hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); - roce_set_field(context->qpc_bytes_156, - QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M, - QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S, - ilog2((unsigned int)attr->max_rd_atomic)); - roce_set_field(context->qpc_bytes_156, - QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_M, - QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_S, 0); - context->pkt_use_len = 0; - - roce_set_field(context->qpc_bytes_164, - QP_CONTEXT_QPC_BYTES_164_SQ_PSN_M, - QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S, attr->sq_psn); - roce_set_field(context->qpc_bytes_164, - QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_M, - QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_S, 0); - - roce_set_field(context->qpc_bytes_168, - QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_M, - QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_S, - attr->sq_psn); - roce_set_field(context->qpc_bytes_168, - QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_M, - QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_S, 0); - roce_set_field(context->qpc_bytes_168, - QP_CONTEXT_QPC_BYTES_168_DB_TYPE_M, - QP_CONTEXT_QPC_BYTES_168_DB_TYPE_S, 0); - roce_set_bit(context->qpc_bytes_168, - QP_CONTEXT_QPC_BYTES_168_MSG_LP_IND_S, 0); - roce_set_bit(context->qpc_bytes_168, - QP_CONTEXT_QPC_BYTES_168_CSDB_LP_IND_S, 0); - roce_set_bit(context->qpc_bytes_168, - QP_CONTEXT_QPC_BYTES_168_QP_ERR_FLG_S, 0); - context->sge_use_len = 0; - - roce_set_field(context->qpc_bytes_176, - QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_M, - QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_S, 0); - roce_set_field(context->qpc_bytes_176, - QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_M, - QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_S, - 0); - roce_set_field(context->qpc_bytes_180, - QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_M, - QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_S, 0); - roce_set_field(context->qpc_bytes_180, - QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M, - QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S, 0); - - context->tx_cur_sq_wqe_ba_l = cpu_to_le32(sq_ba); - - roce_set_field(context->qpc_bytes_188, - QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M, - QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S, - upper_32_bits(sq_ba)); - roce_set_bit(context->qpc_bytes_188, - QP_CONTEXT_QPC_BYTES_188_PKT_RETRY_FLG_S, 0); - roce_set_field(context->qpc_bytes_188, - QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_M, - QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S, - 0); - } - - /* Every status migrate must change state */ - roce_set_field(context->qpc_bytes_144, - QP_CONTEXT_QPC_BYTES_144_QP_STATE_M, - QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, new_state); - - /* SW pass context to HW */ - ret = hns_roce_v1_qp_modify(hr_dev, to_hns_roce_state(cur_state), - to_hns_roce_state(new_state), context, - hr_qp); - if (ret) { - dev_err(dev, "hns_roce_qp_modify failed\n"); - goto out; - } - - /* - * Use rst2init to instead of init2init with drv, - * need to hw to flash RQ HEAD by DB again - */ - if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { - roce_set_field(doorbell[0], RQ_DOORBELL_U32_4_RQ_HEAD_M, - RQ_DOORBELL_U32_4_RQ_HEAD_S, hr_qp->rq.head); - roce_set_field(doorbell[1], RQ_DOORBELL_U32_8_QPN_M, - RQ_DOORBELL_U32_8_QPN_S, hr_qp->qpn); - roce_set_field(doorbell[1], RQ_DOORBELL_U32_8_CMD_M, - RQ_DOORBELL_U32_8_CMD_S, 1); - roce_set_bit(doorbell[1], RQ_DOORBELL_U32_8_HW_SYNC_S, 1); - - if (ibqp->uobject) { - hr_qp->rq.db_reg = hr_dev->reg_base + - hr_dev->odb_offset + - DB_REG_OFFSET * hr_dev->priv_uar.index; - } - - hns_roce_write64_k(doorbell, hr_qp->rq.db_reg); - } - - hr_qp->state = new_state; - - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) - hr_qp->resp_depth = attr->max_dest_rd_atomic; - if (attr_mask & IB_QP_PORT) { - hr_qp->port = attr->port_num - 1; - hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port]; - } - - if (new_state == IB_QPS_RESET && !ibqp->uobject) { - hns_roce_v1_cq_clean(to_hr_cq(ibqp->recv_cq), hr_qp->qpn, - ibqp->srq ? to_hr_srq(ibqp->srq) : NULL); - if (ibqp->send_cq != ibqp->recv_cq) - hns_roce_v1_cq_clean(to_hr_cq(ibqp->send_cq), - hr_qp->qpn, NULL); - - hr_qp->rq.head = 0; - hr_qp->rq.tail = 0; - hr_qp->sq.head = 0; - hr_qp->sq.tail = 0; - } -out: - kfree(context); - return ret; -} - -static int hns_roce_v1_modify_qp(struct ib_qp *ibqp, - const struct ib_qp_attr *attr, int attr_mask, - enum ib_qp_state cur_state, - enum ib_qp_state new_state) -{ - if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) - return -EOPNOTSUPP; - - if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) - return hns_roce_v1_m_sqp(ibqp, attr, attr_mask, cur_state, - new_state); - else - return hns_roce_v1_m_qp(ibqp, attr, attr_mask, cur_state, - new_state); -} - -static enum ib_qp_state to_ib_qp_state(enum hns_roce_qp_state state) -{ - switch (state) { - case HNS_ROCE_QP_STATE_RST: - return IB_QPS_RESET; - case HNS_ROCE_QP_STATE_INIT: - return IB_QPS_INIT; - case HNS_ROCE_QP_STATE_RTR: - return IB_QPS_RTR; - case HNS_ROCE_QP_STATE_RTS: - return IB_QPS_RTS; - case HNS_ROCE_QP_STATE_SQD: - return IB_QPS_SQD; - case HNS_ROCE_QP_STATE_ERR: - return IB_QPS_ERR; - default: - return IB_QPS_ERR; - } -} - -static int hns_roce_v1_query_qpc(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - struct hns_roce_qp_context *hr_context) -{ - struct hns_roce_cmd_mailbox *mailbox; - int ret; - - mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); - - ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, hr_qp->qpn, 0, - HNS_ROCE_CMD_QUERY_QP, - HNS_ROCE_CMD_TIMEOUT_MSECS); - if (!ret) - memcpy(hr_context, mailbox->buf, sizeof(*hr_context)); - else - dev_err(&hr_dev->pdev->dev, "QUERY QP cmd process error\n"); - - hns_roce_free_cmd_mailbox(hr_dev, mailbox); - - return ret; -} - -static int hns_roce_v1_q_sqp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, - int qp_attr_mask, - struct ib_qp_init_attr *qp_init_attr) -{ - struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); - struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - struct hns_roce_sqp_context context; - u32 addr; - - mutex_lock(&hr_qp->mutex); - - if (hr_qp->state == IB_QPS_RESET) { - qp_attr->qp_state = IB_QPS_RESET; - goto done; - } - - addr = ROCEE_QP1C_CFG0_0_REG + - hr_qp->port * sizeof(struct hns_roce_sqp_context); - context.qp1c_bytes_4 = cpu_to_le32(roce_read(hr_dev, addr)); - context.sq_rq_bt_l = cpu_to_le32(roce_read(hr_dev, addr + 1)); - context.qp1c_bytes_12 = cpu_to_le32(roce_read(hr_dev, addr + 2)); - context.qp1c_bytes_16 = cpu_to_le32(roce_read(hr_dev, addr + 3)); - context.qp1c_bytes_20 = cpu_to_le32(roce_read(hr_dev, addr + 4)); - context.cur_rq_wqe_ba_l = cpu_to_le32(roce_read(hr_dev, addr + 5)); - context.qp1c_bytes_28 = cpu_to_le32(roce_read(hr_dev, addr + 6)); - context.qp1c_bytes_32 = cpu_to_le32(roce_read(hr_dev, addr + 7)); - context.cur_sq_wqe_ba_l = cpu_to_le32(roce_read(hr_dev, addr + 8)); - context.qp1c_bytes_40 = cpu_to_le32(roce_read(hr_dev, addr + 9)); - - hr_qp->state = roce_get_field(context.qp1c_bytes_4, - QP1C_BYTES_4_QP_STATE_M, - QP1C_BYTES_4_QP_STATE_S); - qp_attr->qp_state = hr_qp->state; - qp_attr->path_mtu = IB_MTU_256; - qp_attr->path_mig_state = IB_MIG_ARMED; - qp_attr->qkey = QKEY_VAL; - qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; - qp_attr->rq_psn = 0; - qp_attr->sq_psn = 0; - qp_attr->dest_qp_num = 1; - qp_attr->qp_access_flags = 6; - - qp_attr->pkey_index = roce_get_field(context.qp1c_bytes_20, - QP1C_BYTES_20_PKEY_IDX_M, - QP1C_BYTES_20_PKEY_IDX_S); - qp_attr->port_num = hr_qp->port + 1; - qp_attr->sq_draining = 0; - qp_attr->max_rd_atomic = 0; - qp_attr->max_dest_rd_atomic = 0; - qp_attr->min_rnr_timer = 0; - qp_attr->timeout = 0; - qp_attr->retry_cnt = 0; - qp_attr->rnr_retry = 0; - qp_attr->alt_timeout = 0; - -done: - qp_attr->cur_qp_state = qp_attr->qp_state; - qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt; - qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs; - qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt; - qp_attr->cap.max_send_sge = hr_qp->sq.max_gs; - qp_attr->cap.max_inline_data = 0; - qp_init_attr->cap = qp_attr->cap; - qp_init_attr->create_flags = 0; - - mutex_unlock(&hr_qp->mutex); - - return 0; -} - -static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, - int qp_attr_mask, - struct ib_qp_init_attr *qp_init_attr) -{ - struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); - struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_qp_context *context; - int tmp_qp_state; - int ret = 0; - int state; - - context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return -ENOMEM; - - memset(qp_attr, 0, sizeof(*qp_attr)); - memset(qp_init_attr, 0, sizeof(*qp_init_attr)); - - mutex_lock(&hr_qp->mutex); - - if (hr_qp->state == IB_QPS_RESET) { - qp_attr->qp_state = IB_QPS_RESET; - goto done; - } - - ret = hns_roce_v1_query_qpc(hr_dev, hr_qp, context); - if (ret) { - dev_err(dev, "query qpc error\n"); - ret = -EINVAL; - goto out; - } - - state = roce_get_field(context->qpc_bytes_144, - QP_CONTEXT_QPC_BYTES_144_QP_STATE_M, - QP_CONTEXT_QPC_BYTES_144_QP_STATE_S); - tmp_qp_state = (int)to_ib_qp_state((enum hns_roce_qp_state)state); - if (tmp_qp_state == -1) { - dev_err(dev, "to_ib_qp_state error\n"); - ret = -EINVAL; - goto out; - } - hr_qp->state = (u8)tmp_qp_state; - qp_attr->qp_state = (enum ib_qp_state)hr_qp->state; - qp_attr->path_mtu = (enum ib_mtu)roce_get_field(context->qpc_bytes_48, - QP_CONTEXT_QPC_BYTES_48_MTU_M, - QP_CONTEXT_QPC_BYTES_48_MTU_S); - qp_attr->path_mig_state = IB_MIG_ARMED; - qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; - if (hr_qp->ibqp.qp_type == IB_QPT_UD) - qp_attr->qkey = QKEY_VAL; - - qp_attr->rq_psn = roce_get_field(context->qpc_bytes_88, - QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_M, - QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S); - qp_attr->sq_psn = (u32)roce_get_field(context->qpc_bytes_164, - QP_CONTEXT_QPC_BYTES_164_SQ_PSN_M, - QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S); - qp_attr->dest_qp_num = (u8)roce_get_field(context->qpc_bytes_36, - QP_CONTEXT_QPC_BYTES_36_DEST_QP_M, - QP_CONTEXT_QPC_BYTES_36_DEST_QP_S); - qp_attr->qp_access_flags = ((roce_get_bit(context->qpc_bytes_4, - QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S)) << 2) | - ((roce_get_bit(context->qpc_bytes_4, - QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S)) << 1) | - ((roce_get_bit(context->qpc_bytes_4, - QP_CONTEXT_QPC_BYTE_4_ATOMIC_OPERATION_ENABLE_S)) << 3); - - if (hr_qp->ibqp.qp_type == IB_QPT_RC) { - struct ib_global_route *grh = - rdma_ah_retrieve_grh(&qp_attr->ah_attr); - - rdma_ah_set_sl(&qp_attr->ah_attr, - roce_get_field(context->qpc_bytes_156, - QP_CONTEXT_QPC_BYTES_156_SL_M, - QP_CONTEXT_QPC_BYTES_156_SL_S)); - rdma_ah_set_ah_flags(&qp_attr->ah_attr, IB_AH_GRH); - grh->flow_label = - roce_get_field(context->qpc_bytes_48, - QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M, - QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S); - grh->sgid_index = - roce_get_field(context->qpc_bytes_36, - QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M, - QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S); - grh->hop_limit = - roce_get_field(context->qpc_bytes_44, - QP_CONTEXT_QPC_BYTES_44_HOPLMT_M, - QP_CONTEXT_QPC_BYTES_44_HOPLMT_S); - grh->traffic_class = - roce_get_field(context->qpc_bytes_48, - QP_CONTEXT_QPC_BYTES_48_TCLASS_M, - QP_CONTEXT_QPC_BYTES_48_TCLASS_S); - - memcpy(grh->dgid.raw, context->dgid, - sizeof(grh->dgid.raw)); - } - - qp_attr->pkey_index = roce_get_field(context->qpc_bytes_12, - QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M, - QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S); - qp_attr->port_num = hr_qp->port + 1; - qp_attr->sq_draining = 0; - qp_attr->max_rd_atomic = 1 << roce_get_field(context->qpc_bytes_156, - QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M, - QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S); - qp_attr->max_dest_rd_atomic = 1 << roce_get_field(context->qpc_bytes_32, - QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_M, - QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S); - qp_attr->min_rnr_timer = (u8)(roce_get_field(context->qpc_bytes_24, - QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M, - QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S)); - qp_attr->timeout = (u8)(roce_get_field(context->qpc_bytes_156, - QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M, - QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S)); - qp_attr->retry_cnt = roce_get_field(context->qpc_bytes_148, - QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M, - QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S); - qp_attr->rnr_retry = (u8)le32_to_cpu(context->rnr_retry); - -done: - qp_attr->cur_qp_state = qp_attr->qp_state; - qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt; - qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs; - - if (!ibqp->uobject) { - qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt; - qp_attr->cap.max_send_sge = hr_qp->sq.max_gs; - } else { - qp_attr->cap.max_send_wr = 0; - qp_attr->cap.max_send_sge = 0; - } - - qp_init_attr->cap = qp_attr->cap; - -out: - mutex_unlock(&hr_qp->mutex); - kfree(context); - return ret; -} - -static int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, - int qp_attr_mask, - struct ib_qp_init_attr *qp_init_attr) -{ - struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - - return hr_qp->doorbell_qpn <= 1 ? - hns_roce_v1_q_sqp(ibqp, qp_attr, qp_attr_mask, qp_init_attr) : - hns_roce_v1_q_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr); -} - -int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) -{ - struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); - struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - struct hns_roce_cq *send_cq, *recv_cq; - int ret; - - ret = hns_roce_v1_modify_qp(ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET); - if (ret) - return ret; - - send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL; - recv_cq = hr_qp->ibqp.recv_cq ? to_hr_cq(hr_qp->ibqp.recv_cq) : NULL; - - hns_roce_lock_cqs(send_cq, recv_cq); - if (!udata) { - if (recv_cq) - __hns_roce_v1_cq_clean(recv_cq, hr_qp->qpn, - (hr_qp->ibqp.srq ? - to_hr_srq(hr_qp->ibqp.srq) : - NULL)); - - if (send_cq && send_cq != recv_cq) - __hns_roce_v1_cq_clean(send_cq, hr_qp->qpn, NULL); - } - hns_roce_qp_remove(hr_dev, hr_qp); - hns_roce_unlock_cqs(send_cq, recv_cq); - - hns_roce_qp_destroy(hr_dev, hr_qp, udata); - - return 0; -} - -static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) -{ - struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device); - struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); - struct device *dev = &hr_dev->pdev->dev; - u32 cqe_cnt_ori; - u32 cqe_cnt_cur; - int wait_time = 0; - - /* - * Before freeing cq buffer, we need to ensure that the outstanding CQE - * have been written by checking the CQE counter. - */ - cqe_cnt_ori = roce_read(hr_dev, ROCEE_SCAEP_WR_CQE_CNT); - while (1) { - if (roce_read(hr_dev, ROCEE_CAEP_CQE_WCMD_EMPTY) & - HNS_ROCE_CQE_WCMD_EMPTY_BIT) - break; - - cqe_cnt_cur = roce_read(hr_dev, ROCEE_SCAEP_WR_CQE_CNT); - if ((cqe_cnt_cur - cqe_cnt_ori) >= HNS_ROCE_MIN_CQE_CNT) - break; - - msleep(HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS); - if (wait_time > HNS_ROCE_MAX_FREE_CQ_WAIT_CNT) { - dev_warn(dev, "Destroy cq 0x%lx timeout!\n", - hr_cq->cqn); - break; - } - wait_time++; - } - return 0; -} - -static void set_eq_cons_index_v1(struct hns_roce_eq *eq, u32 req_not) -{ - roce_raw_write((eq->cons_index & HNS_ROCE_V1_CONS_IDX_M) | - (req_not << eq->log_entries), eq->db_reg); -} - -static void hns_roce_v1_wq_catas_err_handle(struct hns_roce_dev *hr_dev, - struct hns_roce_aeqe *aeqe, int qpn) -{ - struct device *dev = &hr_dev->pdev->dev; - - dev_warn(dev, "Local Work Queue Catastrophic Error.\n"); - switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M, - HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) { - case HNS_ROCE_LWQCE_QPC_ERROR: - dev_warn(dev, "QP %d, QPC error.\n", qpn); - break; - case HNS_ROCE_LWQCE_MTU_ERROR: - dev_warn(dev, "QP %d, MTU error.\n", qpn); - break; - case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR: - dev_warn(dev, "QP %d, WQE BA addr error.\n", qpn); - break; - case HNS_ROCE_LWQCE_WQE_ADDR_ERROR: - dev_warn(dev, "QP %d, WQE addr error.\n", qpn); - break; - case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR: - dev_warn(dev, "QP %d, WQE shift error\n", qpn); - break; - case HNS_ROCE_LWQCE_SL_ERROR: - dev_warn(dev, "QP %d, SL error.\n", qpn); - break; - case HNS_ROCE_LWQCE_PORT_ERROR: - dev_warn(dev, "QP %d, port error.\n", qpn); - break; - default: - break; - } -} - -static void hns_roce_v1_local_wq_access_err_handle(struct hns_roce_dev *hr_dev, - struct hns_roce_aeqe *aeqe, - int qpn) -{ - struct device *dev = &hr_dev->pdev->dev; - - dev_warn(dev, "Local Access Violation Work Queue Error.\n"); - switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M, - HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) { - case HNS_ROCE_LAVWQE_R_KEY_VIOLATION: - dev_warn(dev, "QP %d, R_key violation.\n", qpn); - break; - case HNS_ROCE_LAVWQE_LENGTH_ERROR: - dev_warn(dev, "QP %d, length error.\n", qpn); - break; - case HNS_ROCE_LAVWQE_VA_ERROR: - dev_warn(dev, "QP %d, VA error.\n", qpn); - break; - case HNS_ROCE_LAVWQE_PD_ERROR: - dev_err(dev, "QP %d, PD error.\n", qpn); - break; - case HNS_ROCE_LAVWQE_RW_ACC_ERROR: - dev_warn(dev, "QP %d, rw acc error.\n", qpn); - break; - case HNS_ROCE_LAVWQE_KEY_STATE_ERROR: - dev_warn(dev, "QP %d, key state error.\n", qpn); - break; - case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR: - dev_warn(dev, "QP %d, MR operation error.\n", qpn); - break; - default: - break; - } -} - -static void hns_roce_v1_qp_err_handle(struct hns_roce_dev *hr_dev, - struct hns_roce_aeqe *aeqe, - int event_type) -{ - struct device *dev = &hr_dev->pdev->dev; - int phy_port; - int qpn; - - qpn = roce_get_field(aeqe->event.queue_event.num, - HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M, - HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S); - phy_port = roce_get_field(aeqe->event.queue_event.num, - HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M, - HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S); - if (qpn <= 1) - qpn = HNS_ROCE_MAX_PORTS * qpn + phy_port; - - switch (event_type) { - case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: - dev_warn(dev, "Invalid Req Local Work Queue Error.\n" - "QP %d, phy_port %d.\n", qpn, phy_port); - break; - case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: - hns_roce_v1_wq_catas_err_handle(hr_dev, aeqe, qpn); - break; - case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: - hns_roce_v1_local_wq_access_err_handle(hr_dev, aeqe, qpn); - break; - default: - break; - } - - hns_roce_qp_event(hr_dev, qpn, event_type); -} - -static void hns_roce_v1_cq_err_handle(struct hns_roce_dev *hr_dev, - struct hns_roce_aeqe *aeqe, - int event_type) -{ - struct device *dev = &hr_dev->pdev->dev; - u32 cqn; - - cqn = roce_get_field(aeqe->event.queue_event.num, - HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M, - HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S); - - switch (event_type) { - case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: - dev_warn(dev, "CQ 0x%x access err.\n", cqn); - break; - case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: - dev_warn(dev, "CQ 0x%x overflow\n", cqn); - break; - case HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID: - dev_warn(dev, "CQ 0x%x ID invalid.\n", cqn); - break; - default: - break; - } - - hns_roce_cq_event(hr_dev, cqn, event_type); -} - -static void hns_roce_v1_db_overflow_handle(struct hns_roce_dev *hr_dev, - struct hns_roce_aeqe *aeqe) -{ - struct device *dev = &hr_dev->pdev->dev; - - switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M, - HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) { - case HNS_ROCE_DB_SUBTYPE_SDB_OVF: - dev_warn(dev, "SDB overflow.\n"); - break; - case HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF: - dev_warn(dev, "SDB almost overflow.\n"); - break; - case HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP: - dev_warn(dev, "SDB almost empty.\n"); - break; - case HNS_ROCE_DB_SUBTYPE_ODB_OVF: - dev_warn(dev, "ODB overflow.\n"); - break; - case HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF: - dev_warn(dev, "ODB almost overflow.\n"); - break; - case HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP: - dev_warn(dev, "SDB almost empty.\n"); - break; - default: - break; - } -} - -static struct hns_roce_aeqe *get_aeqe_v1(struct hns_roce_eq *eq, u32 entry) -{ - unsigned long off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQE_SIZE; - - return (struct hns_roce_aeqe *)((u8 *) - (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) + - off % HNS_ROCE_BA_SIZE); -} - -static struct hns_roce_aeqe *next_aeqe_sw_v1(struct hns_roce_eq *eq) -{ - struct hns_roce_aeqe *aeqe = get_aeqe_v1(eq, eq->cons_index); - - return (roce_get_bit(aeqe->asyn, HNS_ROCE_AEQE_U32_4_OWNER_S) ^ - !!(eq->cons_index & eq->entries)) ? aeqe : NULL; -} - -static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev, - struct hns_roce_eq *eq) -{ - struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_aeqe *aeqe; - int aeqes_found = 0; - int event_type; - - while ((aeqe = next_aeqe_sw_v1(eq))) { - /* Make sure we read the AEQ entry after we have checked the - * ownership bit - */ - dma_rmb(); - - dev_dbg(dev, "aeqe = %pK, aeqe->asyn.event_type = 0x%lx\n", - aeqe, - roce_get_field(aeqe->asyn, - HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M, - HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S)); - event_type = roce_get_field(aeqe->asyn, - HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M, - HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S); - switch (event_type) { - case HNS_ROCE_EVENT_TYPE_PATH_MIG: - dev_warn(dev, "PATH MIG not supported\n"); - break; - case HNS_ROCE_EVENT_TYPE_COMM_EST: - dev_warn(dev, "COMMUNICATION established\n"); - break; - case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: - dev_warn(dev, "SQ DRAINED not supported\n"); - break; - case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: - dev_warn(dev, "PATH MIG failed\n"); - break; - case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: - case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: - case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: - hns_roce_v1_qp_err_handle(hr_dev, aeqe, event_type); - break; - case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: - case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: - case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: - dev_warn(dev, "SRQ not support!\n"); - break; - case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: - case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: - case HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID: - hns_roce_v1_cq_err_handle(hr_dev, aeqe, event_type); - break; - case HNS_ROCE_EVENT_TYPE_PORT_CHANGE: - dev_warn(dev, "port change.\n"); - break; - case HNS_ROCE_EVENT_TYPE_MB: - hns_roce_cmd_event(hr_dev, - le16_to_cpu(aeqe->event.cmd.token), - aeqe->event.cmd.status, - le64_to_cpu(aeqe->event.cmd.out_param - )); - break; - case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: - hns_roce_v1_db_overflow_handle(hr_dev, aeqe); - break; - default: - dev_warn(dev, "Unhandled event %d on EQ %d at idx %u.\n", - event_type, eq->eqn, eq->cons_index); - break; - } - - eq->cons_index++; - aeqes_found = 1; - - if (eq->cons_index > 2 * hr_dev->caps.aeqe_depth - 1) - eq->cons_index = 0; - } - - set_eq_cons_index_v1(eq, 0); - - return aeqes_found; -} - -static struct hns_roce_ceqe *get_ceqe_v1(struct hns_roce_eq *eq, u32 entry) -{ - unsigned long off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQE_SIZE; - - return (struct hns_roce_ceqe *)((u8 *) - (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) + - off % HNS_ROCE_BA_SIZE); -} - -static struct hns_roce_ceqe *next_ceqe_sw_v1(struct hns_roce_eq *eq) -{ - struct hns_roce_ceqe *ceqe = get_ceqe_v1(eq, eq->cons_index); - - return (!!(roce_get_bit(ceqe->comp, - HNS_ROCE_CEQE_CEQE_COMP_OWNER_S))) ^ - (!!(eq->cons_index & eq->entries)) ? ceqe : NULL; -} - -static int hns_roce_v1_ceq_int(struct hns_roce_dev *hr_dev, - struct hns_roce_eq *eq) -{ - struct hns_roce_ceqe *ceqe; - int ceqes_found = 0; - u32 cqn; - - while ((ceqe = next_ceqe_sw_v1(eq))) { - /* Make sure we read CEQ entry after we have checked the - * ownership bit - */ - dma_rmb(); - - cqn = roce_get_field(ceqe->comp, - HNS_ROCE_CEQE_CEQE_COMP_CQN_M, - HNS_ROCE_CEQE_CEQE_COMP_CQN_S); - hns_roce_cq_completion(hr_dev, cqn); - - ++eq->cons_index; - ceqes_found = 1; - - if (eq->cons_index > - EQ_DEPTH_COEFF * hr_dev->caps.ceqe_depth - 1) - eq->cons_index = 0; - } - - set_eq_cons_index_v1(eq, 0); - - return ceqes_found; -} - -static irqreturn_t hns_roce_v1_msix_interrupt_eq(int irq, void *eq_ptr) -{ - struct hns_roce_eq *eq = eq_ptr; - struct hns_roce_dev *hr_dev = eq->hr_dev; - int int_work; - - if (eq->type_flag == HNS_ROCE_CEQ) - /* CEQ irq routine, CEQ is pulse irq, not clear */ - int_work = hns_roce_v1_ceq_int(hr_dev, eq); - else - /* AEQ irq routine, AEQ is pulse irq, not clear */ - int_work = hns_roce_v1_aeq_int(hr_dev, eq); - - return IRQ_RETVAL(int_work); -} - -static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id) -{ - struct hns_roce_dev *hr_dev = dev_id; - struct device *dev = &hr_dev->pdev->dev; - int int_work = 0; - u32 caepaemask_val; - u32 cealmovf_val; - u32 caepaest_val; - u32 aeshift_val; - u32 ceshift_val; - u32 cemask_val; - __le32 tmp; - int i; - - /* - * Abnormal interrupt: - * AEQ overflow, ECC multi-bit err, CEQ overflow must clear - * interrupt, mask irq, clear irq, cancel mask operation - */ - aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG); - tmp = cpu_to_le32(aeshift_val); - - /* AEQE overflow */ - if (roce_get_bit(tmp, - ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQ_ALM_OVF_INT_ST_S) == 1) { - dev_warn(dev, "AEQ overflow!\n"); - - /* Set mask */ - caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG); - tmp = cpu_to_le32(caepaemask_val); - roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, - HNS_ROCE_INT_MASK_ENABLE); - caepaemask_val = le32_to_cpu(tmp); - roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val); - - /* Clear int state(INT_WC : write 1 clear) */ - caepaest_val = roce_read(hr_dev, ROCEE_CAEP_AE_ST_REG); - tmp = cpu_to_le32(caepaest_val); - roce_set_bit(tmp, ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S, 1); - caepaest_val = le32_to_cpu(tmp); - roce_write(hr_dev, ROCEE_CAEP_AE_ST_REG, caepaest_val); - - /* Clear mask */ - caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG); - tmp = cpu_to_le32(caepaemask_val); - roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, - HNS_ROCE_INT_MASK_DISABLE); - caepaemask_val = le32_to_cpu(tmp); - roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val); - } - - /* CEQ almost overflow */ - for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) { - ceshift_val = roce_read(hr_dev, ROCEE_CAEP_CEQC_SHIFT_0_REG + - i * CEQ_REG_OFFSET); - tmp = cpu_to_le32(ceshift_val); - - if (roce_get_bit(tmp, - ROCEE_CAEP_CEQC_SHIFT_CAEP_CEQ_ALM_OVF_INT_ST_S) == 1) { - dev_warn(dev, "CEQ[%d] almost overflow!\n", i); - int_work++; - - /* Set mask */ - cemask_val = roce_read(hr_dev, - ROCEE_CAEP_CE_IRQ_MASK_0_REG + - i * CEQ_REG_OFFSET); - tmp = cpu_to_le32(cemask_val); - roce_set_bit(tmp, - ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S, - HNS_ROCE_INT_MASK_ENABLE); - cemask_val = le32_to_cpu(tmp); - roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + - i * CEQ_REG_OFFSET, cemask_val); - - /* Clear int state(INT_WC : write 1 clear) */ - cealmovf_val = roce_read(hr_dev, - ROCEE_CAEP_CEQ_ALM_OVF_0_REG + - i * CEQ_REG_OFFSET); - tmp = cpu_to_le32(cealmovf_val); - roce_set_bit(tmp, - ROCEE_CAEP_CEQ_ALM_OVF_CAEP_CEQ_ALM_OVF_S, - 1); - cealmovf_val = le32_to_cpu(tmp); - roce_write(hr_dev, ROCEE_CAEP_CEQ_ALM_OVF_0_REG + - i * CEQ_REG_OFFSET, cealmovf_val); - - /* Clear mask */ - cemask_val = roce_read(hr_dev, - ROCEE_CAEP_CE_IRQ_MASK_0_REG + - i * CEQ_REG_OFFSET); - tmp = cpu_to_le32(cemask_val); - roce_set_bit(tmp, - ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S, - HNS_ROCE_INT_MASK_DISABLE); - cemask_val = le32_to_cpu(tmp); - roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + - i * CEQ_REG_OFFSET, cemask_val); - } - } - - /* ECC multi-bit error alarm */ - dev_warn(dev, "ECC UCERR ALARM: 0x%x, 0x%x, 0x%x\n", - roce_read(hr_dev, ROCEE_ECC_UCERR_ALM0_REG), - roce_read(hr_dev, ROCEE_ECC_UCERR_ALM1_REG), - roce_read(hr_dev, ROCEE_ECC_UCERR_ALM2_REG)); - - dev_warn(dev, "ECC CERR ALARM: 0x%x, 0x%x, 0x%x\n", - roce_read(hr_dev, ROCEE_ECC_CERR_ALM0_REG), - roce_read(hr_dev, ROCEE_ECC_CERR_ALM1_REG), - roce_read(hr_dev, ROCEE_ECC_CERR_ALM2_REG)); - - return IRQ_RETVAL(int_work); -} - -static void hns_roce_v1_int_mask_enable(struct hns_roce_dev *hr_dev) -{ - u32 aemask_val; - int masken = 0; - __le32 tmp; - int i; - - /* AEQ INT */ - aemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG); - tmp = cpu_to_le32(aemask_val); - roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, - masken); - roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S, masken); - aemask_val = le32_to_cpu(tmp); - roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, aemask_val); - - /* CEQ INT */ - for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) { - /* IRQ mask */ - roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + - i * CEQ_REG_OFFSET, masken); - } -} - -static void hns_roce_v1_free_eq(struct hns_roce_dev *hr_dev, - struct hns_roce_eq *eq) -{ - int npages = (PAGE_ALIGN(eq->eqe_size * eq->entries) + - HNS_ROCE_BA_SIZE - 1) / HNS_ROCE_BA_SIZE; - int i; - - if (!eq->buf_list) - return; - - for (i = 0; i < npages; ++i) - dma_free_coherent(&hr_dev->pdev->dev, HNS_ROCE_BA_SIZE, - eq->buf_list[i].buf, eq->buf_list[i].map); - - kfree(eq->buf_list); -} - -static void hns_roce_v1_enable_eq(struct hns_roce_dev *hr_dev, int eq_num, - int enable_flag) -{ - void __iomem *eqc = hr_dev->eq_table.eqc_base[eq_num]; - __le32 tmp; - u32 val; - - val = readl(eqc); - tmp = cpu_to_le32(val); - - if (enable_flag) - roce_set_field(tmp, - ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, - ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S, - HNS_ROCE_EQ_STAT_VALID); - else - roce_set_field(tmp, - ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, - ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S, - HNS_ROCE_EQ_STAT_INVALID); - - val = le32_to_cpu(tmp); - writel(val, eqc); -} - -static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev, - struct hns_roce_eq *eq) -{ - void __iomem *eqc = hr_dev->eq_table.eqc_base[eq->eqn]; - struct device *dev = &hr_dev->pdev->dev; - dma_addr_t tmp_dma_addr; - u32 eqcuridx_val; - u32 eqconsindx_val; - u32 eqshift_val; - __le32 tmp2 = 0; - __le32 tmp1 = 0; - __le32 tmp = 0; - int num_bas; - int ret; - int i; - - num_bas = (PAGE_ALIGN(eq->entries * eq->eqe_size) + - HNS_ROCE_BA_SIZE - 1) / HNS_ROCE_BA_SIZE; - - if ((eq->entries * eq->eqe_size) > HNS_ROCE_BA_SIZE) { - dev_err(dev, "[error]eq buf %d gt ba size(%d) need bas=%d\n", - (eq->entries * eq->eqe_size), HNS_ROCE_BA_SIZE, - num_bas); - return -EINVAL; - } - - eq->buf_list = kcalloc(num_bas, sizeof(*eq->buf_list), GFP_KERNEL); - if (!eq->buf_list) - return -ENOMEM; - - for (i = 0; i < num_bas; ++i) { - eq->buf_list[i].buf = dma_alloc_coherent(dev, HNS_ROCE_BA_SIZE, - &tmp_dma_addr, - GFP_KERNEL); - if (!eq->buf_list[i].buf) { - ret = -ENOMEM; - goto err_out_free_pages; - } - - eq->buf_list[i].map = tmp_dma_addr; - } - eq->cons_index = 0; - roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, - ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S, - HNS_ROCE_EQ_STAT_INVALID); - roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M, - ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S, - eq->log_entries); - eqshift_val = le32_to_cpu(tmp); - writel(eqshift_val, eqc); - - /* Configure eq extended address 12~44bit */ - writel((u32)(eq->buf_list[0].map >> 12), eqc + 4); - - /* - * Configure eq extended address 45~49 bit. - * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of - * using 4K page, and shift more 32 because of - * calculating the high 32 bit value evaluated to hardware. - */ - roce_set_field(tmp1, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M, - ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S, - eq->buf_list[0].map >> 44); - roce_set_field(tmp1, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M, - ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S, 0); - eqcuridx_val = le32_to_cpu(tmp1); - writel(eqcuridx_val, eqc + 8); - - /* Configure eq consumer index */ - roce_set_field(tmp2, ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M, - ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S, 0); - eqconsindx_val = le32_to_cpu(tmp2); - writel(eqconsindx_val, eqc + 0xc); - - return 0; - -err_out_free_pages: - for (i -= 1; i >= 0; i--) - dma_free_coherent(dev, HNS_ROCE_BA_SIZE, eq->buf_list[i].buf, - eq->buf_list[i].map); - - kfree(eq->buf_list); - return ret; -} - -static int hns_roce_v1_init_eq_table(struct hns_roce_dev *hr_dev) -{ - struct hns_roce_eq_table *eq_table = &hr_dev->eq_table; - struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_eq *eq; - int irq_num; - int eq_num; - int ret; - int i, j; - - eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors; - irq_num = eq_num + hr_dev->caps.num_other_vectors; - - eq_table->eq = kcalloc(eq_num, sizeof(*eq_table->eq), GFP_KERNEL); - if (!eq_table->eq) - return -ENOMEM; - - eq_table->eqc_base = kcalloc(eq_num, sizeof(*eq_table->eqc_base), - GFP_KERNEL); - if (!eq_table->eqc_base) { - ret = -ENOMEM; - goto err_eqc_base_alloc_fail; - } - - for (i = 0; i < eq_num; i++) { - eq = &eq_table->eq[i]; - eq->hr_dev = hr_dev; - eq->eqn = i; - eq->irq = hr_dev->irq[i]; - eq->log_page_size = PAGE_SHIFT; - - if (i < hr_dev->caps.num_comp_vectors) { - /* CEQ */ - eq_table->eqc_base[i] = hr_dev->reg_base + - ROCEE_CAEP_CEQC_SHIFT_0_REG + - CEQ_REG_OFFSET * i; - eq->type_flag = HNS_ROCE_CEQ; - eq->db_reg = hr_dev->reg_base + - ROCEE_CAEP_CEQC_CONS_IDX_0_REG + - CEQ_REG_OFFSET * i; - eq->entries = hr_dev->caps.ceqe_depth; - eq->log_entries = ilog2(eq->entries); - eq->eqe_size = HNS_ROCE_CEQE_SIZE; - } else { - /* AEQ */ - eq_table->eqc_base[i] = hr_dev->reg_base + - ROCEE_CAEP_AEQC_AEQE_SHIFT_REG; - eq->type_flag = HNS_ROCE_AEQ; - eq->db_reg = hr_dev->reg_base + - ROCEE_CAEP_AEQE_CONS_IDX_REG; - eq->entries = hr_dev->caps.aeqe_depth; - eq->log_entries = ilog2(eq->entries); - eq->eqe_size = HNS_ROCE_AEQE_SIZE; - } - } - - /* Disable irq */ - hns_roce_v1_int_mask_enable(hr_dev); - - /* Configure ce int interval */ - roce_write(hr_dev, ROCEE_CAEP_CE_INTERVAL_CFG_REG, - HNS_ROCE_CEQ_DEFAULT_INTERVAL); - - /* Configure ce int burst num */ - roce_write(hr_dev, ROCEE_CAEP_CE_BURST_NUM_CFG_REG, - HNS_ROCE_CEQ_DEFAULT_BURST_NUM); - - for (i = 0; i < eq_num; i++) { - ret = hns_roce_v1_create_eq(hr_dev, &eq_table->eq[i]); - if (ret) { - dev_err(dev, "eq create failed\n"); - goto err_create_eq_fail; - } - } - - for (j = 0; j < irq_num; j++) { - if (j < eq_num) - ret = request_irq(hr_dev->irq[j], - hns_roce_v1_msix_interrupt_eq, 0, - hr_dev->irq_names[j], - &eq_table->eq[j]); - else - ret = request_irq(hr_dev->irq[j], - hns_roce_v1_msix_interrupt_abn, 0, - hr_dev->irq_names[j], hr_dev); - - if (ret) { - dev_err(dev, "request irq error!\n"); - goto err_request_irq_fail; - } - } - - for (i = 0; i < eq_num; i++) - hns_roce_v1_enable_eq(hr_dev, i, EQ_ENABLE); - - return 0; - -err_request_irq_fail: - for (j -= 1; j >= 0; j--) - free_irq(hr_dev->irq[j], &eq_table->eq[j]); - -err_create_eq_fail: - for (i -= 1; i >= 0; i--) - hns_roce_v1_free_eq(hr_dev, &eq_table->eq[i]); - - kfree(eq_table->eqc_base); - -err_eqc_base_alloc_fail: - kfree(eq_table->eq); - - return ret; -} - -static void hns_roce_v1_cleanup_eq_table(struct hns_roce_dev *hr_dev) -{ - struct hns_roce_eq_table *eq_table = &hr_dev->eq_table; - int irq_num; - int eq_num; - int i; - - eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors; - irq_num = eq_num + hr_dev->caps.num_other_vectors; - for (i = 0; i < eq_num; i++) { - /* Disable EQ */ - hns_roce_v1_enable_eq(hr_dev, i, EQ_DISABLE); - - free_irq(hr_dev->irq[i], &eq_table->eq[i]); - - hns_roce_v1_free_eq(hr_dev, &eq_table->eq[i]); - } - for (i = eq_num; i < irq_num; i++) - free_irq(hr_dev->irq[i], hr_dev); - - kfree(eq_table->eqc_base); - kfree(eq_table->eq); -} - -static const struct ib_device_ops hns_roce_v1_dev_ops = { - .destroy_qp = hns_roce_v1_destroy_qp, - .poll_cq = hns_roce_v1_poll_cq, - .post_recv = hns_roce_v1_post_recv, - .post_send = hns_roce_v1_post_send, - .query_qp = hns_roce_v1_query_qp, - .req_notify_cq = hns_roce_v1_req_notify_cq, -}; - -static const struct hns_roce_hw hns_roce_hw_v1 = { - .reset = hns_roce_v1_reset, - .hw_profile = hns_roce_v1_profile, - .hw_init = hns_roce_v1_init, - .hw_exit = hns_roce_v1_exit, - .post_mbox = hns_roce_v1_post_mbox, - .poll_mbox_done = hns_roce_v1_chk_mbox, - .set_gid = hns_roce_v1_set_gid, - .set_mac = hns_roce_v1_set_mac, - .set_mtu = hns_roce_v1_set_mtu, - .write_mtpt = hns_roce_v1_write_mtpt, - .write_cqc = hns_roce_v1_write_cqc, - .set_hem = hns_roce_v1_set_hem, - .clear_hem = hns_roce_v1_clear_hem, - .modify_qp = hns_roce_v1_modify_qp, - .dereg_mr = hns_roce_v1_dereg_mr, - .destroy_cq = hns_roce_v1_destroy_cq, - .init_eq = hns_roce_v1_init_eq_table, - .cleanup_eq = hns_roce_v1_cleanup_eq_table, - .hns_roce_dev_ops = &hns_roce_v1_dev_ops, -}; - -static const struct of_device_id hns_roce_of_match[] = { - { .compatible = "hisilicon,hns-roce-v1", .data = &hns_roce_hw_v1, }, - {}, -}; -MODULE_DEVICE_TABLE(of, hns_roce_of_match); - -static const struct acpi_device_id hns_roce_acpi_match[] = { - { "HISI00D1", (kernel_ulong_t)&hns_roce_hw_v1 }, - {}, -}; -MODULE_DEVICE_TABLE(acpi, hns_roce_acpi_match); - -static struct -platform_device *hns_roce_find_pdev(struct fwnode_handle *fwnode) -{ - struct device *dev; - - /* get the 'device' corresponding to the matching 'fwnode' */ - dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode); - /* get the platform device */ - return dev ? to_platform_device(dev) : NULL; -} - -static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev) -{ - struct device *dev = &hr_dev->pdev->dev; - struct platform_device *pdev = NULL; - struct net_device *netdev = NULL; - struct device_node *net_node; - int port_cnt = 0; - u8 phy_port; - int ret; - int i; - - /* check if we are compatible with the underlying SoC */ - if (dev_of_node(dev)) { - const struct of_device_id *of_id; - - of_id = of_match_node(hns_roce_of_match, dev->of_node); - if (!of_id) { - dev_err(dev, "device is not compatible!\n"); - return -ENXIO; - } - hr_dev->hw = (const struct hns_roce_hw *)of_id->data; - if (!hr_dev->hw) { - dev_err(dev, "couldn't get H/W specific DT data!\n"); - return -ENXIO; - } - } else if (is_acpi_device_node(dev->fwnode)) { - const struct acpi_device_id *acpi_id; - - acpi_id = acpi_match_device(hns_roce_acpi_match, dev); - if (!acpi_id) { - dev_err(dev, "device is not compatible!\n"); - return -ENXIO; - } - hr_dev->hw = (const struct hns_roce_hw *) acpi_id->driver_data; - if (!hr_dev->hw) { - dev_err(dev, "couldn't get H/W specific ACPI data!\n"); - return -ENXIO; - } - } else { - dev_err(dev, "can't read compatibility data from DT or ACPI\n"); - return -ENXIO; - } - - /* get the mapped register base address */ - hr_dev->reg_base = devm_platform_ioremap_resource(hr_dev->pdev, 0); - if (IS_ERR(hr_dev->reg_base)) - return PTR_ERR(hr_dev->reg_base); - - /* read the node_guid of IB device from the DT or ACPI */ - ret = device_property_read_u8_array(dev, "node-guid", - (u8 *)&hr_dev->ib_dev.node_guid, - GUID_LEN); - if (ret) { - dev_err(dev, "couldn't get node_guid from DT or ACPI!\n"); - return ret; - } - - /* get the RoCE associated ethernet ports or netdevices */ - for (i = 0; i < HNS_ROCE_MAX_PORTS; i++) { - if (dev_of_node(dev)) { - net_node = of_parse_phandle(dev->of_node, "eth-handle", - i); - if (!net_node) - continue; - pdev = of_find_device_by_node(net_node); - } else if (is_acpi_device_node(dev->fwnode)) { - struct fwnode_reference_args args; - - ret = acpi_node_get_property_reference(dev->fwnode, - "eth-handle", - i, &args); - if (ret) - continue; - pdev = hns_roce_find_pdev(args.fwnode); - } else { - dev_err(dev, "cannot read data from DT or ACPI\n"); - return -ENXIO; - } - - if (pdev) { - netdev = platform_get_drvdata(pdev); - phy_port = (u8)i; - if (netdev) { - hr_dev->iboe.netdevs[port_cnt] = netdev; - hr_dev->iboe.phy_port[port_cnt] = phy_port; - } else { - dev_err(dev, "no netdev found with pdev %s\n", - pdev->name); - return -ENODEV; - } - port_cnt++; - } - } - - if (port_cnt == 0) { - dev_err(dev, "unable to get eth-handle for available ports!\n"); - return -EINVAL; - } - - hr_dev->caps.num_ports = port_cnt; - - /* cmd issue mode: 0 is poll, 1 is event */ - hr_dev->cmd_mod = 1; - hr_dev->loop_idc = 0; - hr_dev->sdb_offset = ROCEE_DB_SQ_L_0_REG; - hr_dev->odb_offset = ROCEE_DB_OTHERS_L_0_REG; - - /* read the interrupt names from the DT or ACPI */ - ret = device_property_read_string_array(dev, "interrupt-names", - hr_dev->irq_names, - HNS_ROCE_V1_MAX_IRQ_NUM); - if (ret < 0) { - dev_err(dev, "couldn't get interrupt names from DT or ACPI!\n"); - return ret; - } - - /* fetch the interrupt numbers */ - for (i = 0; i < HNS_ROCE_V1_MAX_IRQ_NUM; i++) { - hr_dev->irq[i] = platform_get_irq(hr_dev->pdev, i); - if (hr_dev->irq[i] <= 0) - return -EINVAL; - } - - return 0; -} - -/** - * hns_roce_probe - RoCE driver entrance - * @pdev: pointer to platform device - * Return : int - * - */ -static int hns_roce_probe(struct platform_device *pdev) -{ - int ret; - struct hns_roce_dev *hr_dev; - struct device *dev = &pdev->dev; - - hr_dev = ib_alloc_device(hns_roce_dev, ib_dev); - if (!hr_dev) - return -ENOMEM; - - hr_dev->priv = kzalloc(sizeof(struct hns_roce_v1_priv), GFP_KERNEL); - if (!hr_dev->priv) { - ret = -ENOMEM; - goto error_failed_kzalloc; - } - - hr_dev->pdev = pdev; - hr_dev->dev = dev; - platform_set_drvdata(pdev, hr_dev); - - if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64ULL)) && - dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32ULL))) { - dev_err(dev, "Not usable DMA addressing mode\n"); - ret = -EIO; - goto error_failed_get_cfg; - } - - ret = hns_roce_get_cfg(hr_dev); - if (ret) { - dev_err(dev, "Get Configuration failed!\n"); - goto error_failed_get_cfg; - } - - ret = hns_roce_init(hr_dev); - if (ret) { - dev_err(dev, "RoCE engine init failed!\n"); - goto error_failed_get_cfg; - } - - return 0; - -error_failed_get_cfg: - kfree(hr_dev->priv); - -error_failed_kzalloc: - ib_dealloc_device(&hr_dev->ib_dev); - - return ret; -} - -/** - * hns_roce_remove - remove RoCE device - * @pdev: pointer to platform device - */ -static int hns_roce_remove(struct platform_device *pdev) -{ - struct hns_roce_dev *hr_dev = platform_get_drvdata(pdev); - - hns_roce_exit(hr_dev); - kfree(hr_dev->priv); - ib_dealloc_device(&hr_dev->ib_dev); - - return 0; -} - -static struct platform_driver hns_roce_driver = { - .probe = hns_roce_probe, - .remove = hns_roce_remove, - .driver = { - .name = DRV_NAME, - .of_match_table = hns_roce_of_match, - .acpi_match_table = ACPI_PTR(hns_roce_acpi_match), - }, -}; - -module_platform_driver(hns_roce_driver); - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("Wei Hu <xavier.huwei@huawei.com>"); -MODULE_AUTHOR("Nenglong Zhao <zhaonenglong@hisilicon.com>"); -MODULE_AUTHOR("Lijun Ou <oulijun@huawei.com>"); -MODULE_DESCRIPTION("Hisilicon Hip06 Family RoCE Driver"); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h deleted file mode 100644 index 60fdcbae6729..000000000000 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ /dev/null @@ -1,1147 +0,0 @@ -/* - * Copyright (c) 2016 Hisilicon Limited. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _HNS_ROCE_HW_V1_H -#define _HNS_ROCE_HW_V1_H - -#define CQ_STATE_VALID 2 - -#define HNS_ROCE_V1_MAX_PD_NUM 0x8000 -#define HNS_ROCE_V1_MAX_CQ_NUM 0x10000 -#define HNS_ROCE_V1_MAX_CQE_NUM 0x8000 - -#define HNS_ROCE_V1_MAX_QP_NUM 0x40000 -#define HNS_ROCE_V1_MAX_WQE_NUM 0x4000 - -#define HNS_ROCE_V1_MAX_MTPT_NUM 0x80000 - -#define HNS_ROCE_V1_MAX_MTT_SEGS 0x100000 - -#define HNS_ROCE_V1_MAX_QP_INIT_RDMA 128 -#define HNS_ROCE_V1_MAX_QP_DEST_RDMA 128 - -#define HNS_ROCE_V1_MAX_SQ_DESC_SZ 64 -#define HNS_ROCE_V1_MAX_RQ_DESC_SZ 64 -#define HNS_ROCE_V1_SG_NUM 2 -#define HNS_ROCE_V1_INLINE_SIZE 32 - -#define HNS_ROCE_V1_UAR_NUM 256 -#define HNS_ROCE_V1_PHY_UAR_NUM 8 - -#define HNS_ROCE_V1_GID_NUM 16 -#define HNS_ROCE_V1_RESV_QP 8 - -#define HNS_ROCE_V1_MAX_IRQ_NUM 34 -#define HNS_ROCE_V1_COMP_VEC_NUM 32 -#define HNS_ROCE_V1_AEQE_VEC_NUM 1 -#define HNS_ROCE_V1_ABNORMAL_VEC_NUM 1 - -#define HNS_ROCE_V1_COMP_EQE_NUM 0x8000 -#define HNS_ROCE_V1_ASYNC_EQE_NUM 0x400 - -#define HNS_ROCE_V1_QPC_SIZE 256 -#define HNS_ROCE_V1_IRRL_ENTRY_SIZE 8 -#define HNS_ROCE_V1_CQC_ENTRY_SIZE 64 -#define HNS_ROCE_V1_MTPT_ENTRY_SIZE 64 -#define HNS_ROCE_V1_MTT_ENTRY_SIZE 64 - -#define HNS_ROCE_V1_CQE_SIZE 32 -#define HNS_ROCE_V1_PAGE_SIZE_SUPPORT 0xFFFFF000 - -#define HNS_ROCE_V1_TABLE_CHUNK_SIZE (1 << 17) - -#define HNS_ROCE_V1_EXT_RAQ_WF 8 -#define HNS_ROCE_V1_RAQ_ENTRY 64 -#define HNS_ROCE_V1_RAQ_DEPTH 32768 -#define HNS_ROCE_V1_RAQ_SIZE (HNS_ROCE_V1_RAQ_ENTRY * HNS_ROCE_V1_RAQ_DEPTH) - -#define HNS_ROCE_V1_SDB_DEPTH 0x400 -#define HNS_ROCE_V1_ODB_DEPTH 0x400 - -#define HNS_ROCE_V1_DB_RSVD 0x80 - -#define HNS_ROCE_V1_SDB_ALEPT HNS_ROCE_V1_DB_RSVD -#define HNS_ROCE_V1_SDB_ALFUL (HNS_ROCE_V1_SDB_DEPTH - HNS_ROCE_V1_DB_RSVD) -#define HNS_ROCE_V1_ODB_ALEPT HNS_ROCE_V1_DB_RSVD -#define HNS_ROCE_V1_ODB_ALFUL (HNS_ROCE_V1_ODB_DEPTH - HNS_ROCE_V1_DB_RSVD) - -#define HNS_ROCE_V1_EXT_SDB_DEPTH 0x4000 -#define HNS_ROCE_V1_EXT_ODB_DEPTH 0x4000 -#define HNS_ROCE_V1_EXT_SDB_ENTRY 16 -#define HNS_ROCE_V1_EXT_ODB_ENTRY 16 -#define HNS_ROCE_V1_EXT_SDB_SIZE \ - (HNS_ROCE_V1_EXT_SDB_DEPTH * HNS_ROCE_V1_EXT_SDB_ENTRY) -#define HNS_ROCE_V1_EXT_ODB_SIZE \ - (HNS_ROCE_V1_EXT_ODB_DEPTH * HNS_ROCE_V1_EXT_ODB_ENTRY) - -#define HNS_ROCE_V1_EXT_SDB_ALEPT HNS_ROCE_V1_DB_RSVD -#define HNS_ROCE_V1_EXT_SDB_ALFUL \ - (HNS_ROCE_V1_EXT_SDB_DEPTH - HNS_ROCE_V1_DB_RSVD) -#define HNS_ROCE_V1_EXT_ODB_ALEPT HNS_ROCE_V1_DB_RSVD -#define HNS_ROCE_V1_EXT_ODB_ALFUL \ - (HNS_ROCE_V1_EXT_ODB_DEPTH - HNS_ROCE_V1_DB_RSVD) - -#define HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS 50000 -#define HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS 10000 -#define HNS_ROCE_V1_FREE_MR_WAIT_VALUE 5 -#define HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE 20 - -#define HNS_ROCE_BT_RSV_BUF_SIZE (1 << 17) - -#define HNS_ROCE_V1_TPTR_ENTRY_SIZE 2 -#define HNS_ROCE_V1_TPTR_BUF_SIZE \ - (HNS_ROCE_V1_TPTR_ENTRY_SIZE * HNS_ROCE_V1_MAX_CQ_NUM) - -#define HNS_ROCE_ODB_POLL_MODE 0 - -#define HNS_ROCE_SDB_NORMAL_MODE 0 -#define HNS_ROCE_SDB_EXTEND_MODE 1 - -#define HNS_ROCE_ODB_EXTEND_MODE 1 - -#define KEY_VALID 0x02 - -#define HNS_ROCE_CQE_QPN_MASK 0x3ffff -#define HNS_ROCE_CQE_STATUS_MASK 0x1f -#define HNS_ROCE_CQE_OPCODE_MASK 0xf - -#define HNS_ROCE_CQE_SUCCESS 0x00 -#define HNS_ROCE_CQE_SYNDROME_LOCAL_LENGTH_ERR 0x01 -#define HNS_ROCE_CQE_SYNDROME_LOCAL_QP_OP_ERR 0x02 -#define HNS_ROCE_CQE_SYNDROME_LOCAL_PROT_ERR 0x03 -#define HNS_ROCE_CQE_SYNDROME_WR_FLUSH_ERR 0x04 -#define HNS_ROCE_CQE_SYNDROME_MEM_MANAGE_OPERATE_ERR 0x05 -#define HNS_ROCE_CQE_SYNDROME_BAD_RESP_ERR 0x06 -#define HNS_ROCE_CQE_SYNDROME_LOCAL_ACCESS_ERR 0x07 -#define HNS_ROCE_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR 0x08 -#define HNS_ROCE_CQE_SYNDROME_REMOTE_ACCESS_ERR 0x09 -#define HNS_ROCE_CQE_SYNDROME_REMOTE_OP_ERR 0x0a -#define HNS_ROCE_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR 0x0b -#define HNS_ROCE_CQE_SYNDROME_RNR_RETRY_EXC_ERR 0x0c - -#define QP1C_CFGN_OFFSET 0x28 -#define PHY_PORT_OFFSET 0x8 -#define MTPT_IDX_SHIFT 16 -#define ALL_PORT_VAL_OPEN 0x3f -#define POL_TIME_INTERVAL_VAL 0x80 -#define SLEEP_TIME_INTERVAL 20 -#define SQ_PSN_SHIFT 8 -#define QKEY_VAL 0x80010000 -#define SDB_INV_CNT_OFFSET 8 - -#define HNS_ROCE_CEQ_DEFAULT_INTERVAL 0x10 -#define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x10 - -#define HNS_ROCE_INT_MASK_DISABLE 0 -#define HNS_ROCE_INT_MASK_ENABLE 1 - -#define CEQ_REG_OFFSET 0x18 - -#define HNS_ROCE_CEQE_CEQE_COMP_OWNER_S 0 - -#define HNS_ROCE_V1_CONS_IDX_M GENMASK(15, 0) - -#define HNS_ROCE_CEQE_CEQE_COMP_CQN_S 16 -#define HNS_ROCE_CEQE_CEQE_COMP_CQN_M GENMASK(31, 16) - -#define HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S 16 -#define HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M GENMASK(23, 16) - -#define HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S 24 -#define HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M GENMASK(30, 24) - -#define HNS_ROCE_AEQE_U32_4_OWNER_S 31 - -#define HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S 0 -#define HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M GENMASK(23, 0) - -#define HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S 25 -#define HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M GENMASK(27, 25) - -#define HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S 0 -#define HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M GENMASK(15, 0) - -#define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S 0 -#define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M GENMASK(4, 0) - -/* Local Work Queue Catastrophic Error,SUBTYPE 0x5 */ -enum { - HNS_ROCE_LWQCE_QPC_ERROR = 1, - HNS_ROCE_LWQCE_MTU_ERROR, - HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR, - HNS_ROCE_LWQCE_WQE_ADDR_ERROR, - HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR, - HNS_ROCE_LWQCE_SL_ERROR, - HNS_ROCE_LWQCE_PORT_ERROR, -}; - -/* Local Access Violation Work Queue Error,SUBTYPE 0x7 */ -enum { - HNS_ROCE_LAVWQE_R_KEY_VIOLATION = 1, - HNS_ROCE_LAVWQE_LENGTH_ERROR, - HNS_ROCE_LAVWQE_VA_ERROR, - HNS_ROCE_LAVWQE_PD_ERROR, - HNS_ROCE_LAVWQE_RW_ACC_ERROR, - HNS_ROCE_LAVWQE_KEY_STATE_ERROR, - HNS_ROCE_LAVWQE_MR_OPERATION_ERROR, -}; - -/* DOORBELL overflow subtype */ -enum { - HNS_ROCE_DB_SUBTYPE_SDB_OVF = 1, - HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF, - HNS_ROCE_DB_SUBTYPE_ODB_OVF, - HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF, - HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP, - HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP, -}; - -enum { - /* RQ&SRQ related operations */ - HNS_ROCE_OPCODE_SEND_DATA_RECEIVE = 0x06, - HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE, -}; - -enum { - HNS_ROCE_PORT_DOWN = 0, - HNS_ROCE_PORT_UP, -}; - -struct hns_roce_cq_context { - __le32 cqc_byte_4; - __le32 cq_bt_l; - __le32 cqc_byte_12; - __le32 cur_cqe_ba0_l; - __le32 cqc_byte_20; - __le32 cqe_tptr_addr_l; - __le32 cur_cqe_ba1_l; - __le32 cqc_byte_32; -}; - -#define CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S 0 -#define CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_M \ - (((1UL << 2) - 1) << CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S) - -#define CQ_CONTEXT_CQC_BYTE_4_CQN_S 16 -#define CQ_CONTEXT_CQC_BYTE_4_CQN_M \ - (((1UL << 16) - 1) << CQ_CONTEXT_CQC_BYTE_4_CQN_S) - -#define CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_S 0 -#define CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_M \ - (((1UL << 17) - 1) << CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_S) - -#define CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S 20 -#define CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_M \ - (((1UL << 4) - 1) << CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S) - -#define CQ_CONTEXT_CQC_BYTE_12_CEQN_S 24 -#define CQ_CONTEXT_CQC_BYTE_12_CEQN_M \ - (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_12_CEQN_S) - -#define CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S 0 -#define CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_M \ - (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S) - -#define CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_S 16 -#define CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_M \ - (((1UL << 16) - 1) << CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_S) - -#define CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S 8 -#define CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M \ - (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S) - -#define CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_S 0 -#define CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M \ - (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_S) - -#define CQ_CONTEXT_CQC_BYTE_32_SE_FLAG_S 9 - -#define CQ_CONTEXT_CQC_BYTE_32_CE_FLAG_S 8 -#define CQ_CONTEXT_CQC_BYTE_32_NOTIFICATION_FLAG_S 14 -#define CQ_CQNTEXT_CQC_BYTE_32_TYPE_OF_COMPLETION_NOTIFICATION_S 15 - -#define CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S 16 -#define CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M \ - (((1UL << 16) - 1) << CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S) - -struct hns_roce_cqe { - __le32 cqe_byte_4; - union { - __le32 r_key; - __le32 immediate_data; - }; - __le32 byte_cnt; - __le32 cqe_byte_16; - __le32 cqe_byte_20; - __le32 s_mac_l; - __le32 cqe_byte_28; - __le32 reserved; -}; - -#define CQE_BYTE_4_OWNER_S 7 -#define CQE_BYTE_4_SQ_RQ_FLAG_S 14 - -#define CQE_BYTE_4_STATUS_OF_THE_OPERATION_S 8 -#define CQE_BYTE_4_STATUS_OF_THE_OPERATION_M \ - (((1UL << 5) - 1) << CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) - -#define CQE_BYTE_4_WQE_INDEX_S 16 -#define CQE_BYTE_4_WQE_INDEX_M (((1UL << 14) - 1) << CQE_BYTE_4_WQE_INDEX_S) - -#define CQE_BYTE_4_OPERATION_TYPE_S 0 -#define CQE_BYTE_4_OPERATION_TYPE_M \ - (((1UL << 4) - 1) << CQE_BYTE_4_OPERATION_TYPE_S) - -#define CQE_BYTE_4_IMM_INDICATOR_S 15 - -#define CQE_BYTE_16_LOCAL_QPN_S 0 -#define CQE_BYTE_16_LOCAL_QPN_M (((1UL << 24) - 1) << CQE_BYTE_16_LOCAL_QPN_S) - -#define CQE_BYTE_20_PORT_NUM_S 26 -#define CQE_BYTE_20_PORT_NUM_M (((1UL << 3) - 1) << CQE_BYTE_20_PORT_NUM_S) - -#define CQE_BYTE_20_SL_S 24 -#define CQE_BYTE_20_SL_M (((1UL << 2) - 1) << CQE_BYTE_20_SL_S) - -#define CQE_BYTE_20_REMOTE_QPN_S 0 -#define CQE_BYTE_20_REMOTE_QPN_M \ - (((1UL << 24) - 1) << CQE_BYTE_20_REMOTE_QPN_S) - -#define CQE_BYTE_20_GRH_PRESENT_S 29 - -#define CQE_BYTE_28_P_KEY_IDX_S 16 -#define CQE_BYTE_28_P_KEY_IDX_M (((1UL << 16) - 1) << CQE_BYTE_28_P_KEY_IDX_S) - -#define CQ_DB_REQ_NOT_SOL 0 -#define CQ_DB_REQ_NOT (1 << 16) - -struct hns_roce_v1_mpt_entry { - __le32 mpt_byte_4; - __le32 pbl_addr_l; - __le32 mpt_byte_12; - __le32 virt_addr_l; - __le32 virt_addr_h; - __le32 length; - __le32 mpt_byte_28; - __le32 pa0_l; - __le32 mpt_byte_36; - __le32 mpt_byte_40; - __le32 mpt_byte_44; - __le32 mpt_byte_48; - __le32 pa4_l; - __le32 mpt_byte_56; - __le32 mpt_byte_60; - __le32 mpt_byte_64; -}; - -#define MPT_BYTE_4_KEY_STATE_S 0 -#define MPT_BYTE_4_KEY_STATE_M (((1UL << 2) - 1) << MPT_BYTE_4_KEY_STATE_S) - -#define MPT_BYTE_4_KEY_S 8 -#define MPT_BYTE_4_KEY_M (((1UL << 8) - 1) << MPT_BYTE_4_KEY_S) - -#define MPT_BYTE_4_PAGE_SIZE_S 16 -#define MPT_BYTE_4_PAGE_SIZE_M (((1UL << 2) - 1) << MPT_BYTE_4_PAGE_SIZE_S) - -#define MPT_BYTE_4_MW_TYPE_S 20 - -#define MPT_BYTE_4_MW_BIND_ENABLE_S 21 - -#define MPT_BYTE_4_OWN_S 22 - -#define MPT_BYTE_4_MEMORY_LOCATION_TYPE_S 24 -#define MPT_BYTE_4_MEMORY_LOCATION_TYPE_M \ - (((1UL << 2) - 1) << MPT_BYTE_4_MEMORY_LOCATION_TYPE_S) - -#define MPT_BYTE_4_REMOTE_ATOMIC_S 26 -#define MPT_BYTE_4_LOCAL_WRITE_S 27 -#define MPT_BYTE_4_REMOTE_WRITE_S 28 -#define MPT_BYTE_4_REMOTE_READ_S 29 -#define MPT_BYTE_4_REMOTE_INVAL_ENABLE_S 30 -#define MPT_BYTE_4_ADDRESS_TYPE_S 31 - -#define MPT_BYTE_12_PBL_ADDR_H_S 0 -#define MPT_BYTE_12_PBL_ADDR_H_M \ - (((1UL << 17) - 1) << MPT_BYTE_12_PBL_ADDR_H_S) - -#define MPT_BYTE_12_MW_BIND_COUNTER_S 17 -#define MPT_BYTE_12_MW_BIND_COUNTER_M \ - (((1UL << 15) - 1) << MPT_BYTE_12_MW_BIND_COUNTER_S) - -#define MPT_BYTE_28_PD_S 0 -#define MPT_BYTE_28_PD_M (((1UL << 16) - 1) << MPT_BYTE_28_PD_S) - -#define MPT_BYTE_28_L_KEY_IDX_L_S 16 -#define MPT_BYTE_28_L_KEY_IDX_L_M \ - (((1UL << 16) - 1) << MPT_BYTE_28_L_KEY_IDX_L_S) - -#define MPT_BYTE_36_PA0_H_S 0 -#define MPT_BYTE_36_PA0_H_M (((1UL << 5) - 1) << MPT_BYTE_36_PA0_H_S) - -#define MPT_BYTE_36_PA1_L_S 8 -#define MPT_BYTE_36_PA1_L_M (((1UL << 24) - 1) << MPT_BYTE_36_PA1_L_S) - -#define MPT_BYTE_40_PA1_H_S 0 -#define MPT_BYTE_40_PA1_H_M (((1UL << 13) - 1) << MPT_BYTE_40_PA1_H_S) - -#define MPT_BYTE_40_PA2_L_S 16 -#define MPT_BYTE_40_PA2_L_M (((1UL << 16) - 1) << MPT_BYTE_40_PA2_L_S) - -#define MPT_BYTE_44_PA2_H_S 0 -#define MPT_BYTE_44_PA2_H_M (((1UL << 21) - 1) << MPT_BYTE_44_PA2_H_S) - -#define MPT_BYTE_44_PA3_L_S 24 -#define MPT_BYTE_44_PA3_L_M (((1UL << 8) - 1) << MPT_BYTE_44_PA3_L_S) - -#define MPT_BYTE_48_PA3_H_S 0 -#define MPT_BYTE_48_PA3_H_M (((1UL << 29) - 1) << MPT_BYTE_48_PA3_H_S) - -#define MPT_BYTE_56_PA4_H_S 0 -#define MPT_BYTE_56_PA4_H_M (((1UL << 5) - 1) << MPT_BYTE_56_PA4_H_S) - -#define MPT_BYTE_56_PA5_L_S 8 -#define MPT_BYTE_56_PA5_L_M (((1UL << 24) - 1) << MPT_BYTE_56_PA5_L_S) - -#define MPT_BYTE_60_PA5_H_S 0 -#define MPT_BYTE_60_PA5_H_M (((1UL << 13) - 1) << MPT_BYTE_60_PA5_H_S) - -#define MPT_BYTE_60_PA6_L_S 16 -#define MPT_BYTE_60_PA6_L_M (((1UL << 16) - 1) << MPT_BYTE_60_PA6_L_S) - -#define MPT_BYTE_64_PA6_H_S 0 -#define MPT_BYTE_64_PA6_H_M (((1UL << 21) - 1) << MPT_BYTE_64_PA6_H_S) - -#define MPT_BYTE_64_L_KEY_IDX_H_S 24 -#define MPT_BYTE_64_L_KEY_IDX_H_M \ - (((1UL << 8) - 1) << MPT_BYTE_64_L_KEY_IDX_H_S) - -struct hns_roce_wqe_ctrl_seg { - __le32 sgl_pa_h; - __le32 flag; - union { - __be32 imm_data; - __le32 inv_key; - }; - __le32 msg_length; -}; - -struct hns_roce_wqe_data_seg { - __le64 addr; - __le32 lkey; - __le32 len; -}; - -struct hns_roce_wqe_raddr_seg { - __le32 rkey; - __le32 len; /* reserved */ - __le64 raddr; -}; - -struct hns_roce_rq_wqe_ctrl { - __le32 rwqe_byte_4; - __le32 rocee_sgl_ba_l; - __le32 rwqe_byte_12; - __le32 reserved[5]; -}; - -#define RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_S 16 -#define RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_M \ - (((1UL << 6) - 1) << RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_S) - -#define HNS_ROCE_QP_DESTROY_TIMEOUT_MSECS 10000 - -#define GID_LEN 16 - -struct hns_roce_ud_send_wqe { - __le32 dmac_h; - __le32 u32_8; - __le32 immediate_data; - - __le32 u32_16; - union { - unsigned char dgid[GID_LEN]; - struct { - __le32 u32_20; - __le32 u32_24; - __le32 u32_28; - __le32 u32_32; - }; - }; - - __le32 u32_36; - __le32 u32_40; - - __le32 va0_l; - __le32 va0_h; - __le32 l_key0; - - __le32 va1_l; - __le32 va1_h; - __le32 l_key1; -}; - -#define UD_SEND_WQE_U32_4_DMAC_0_S 0 -#define UD_SEND_WQE_U32_4_DMAC_0_M \ - (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_0_S) - -#define UD_SEND_WQE_U32_4_DMAC_1_S 8 -#define UD_SEND_WQE_U32_4_DMAC_1_M \ - (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_1_S) - -#define UD_SEND_WQE_U32_4_DMAC_2_S 16 -#define UD_SEND_WQE_U32_4_DMAC_2_M \ - (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_2_S) - -#define UD_SEND_WQE_U32_4_DMAC_3_S 24 -#define UD_SEND_WQE_U32_4_DMAC_3_M \ - (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_3_S) - -#define UD_SEND_WQE_U32_8_DMAC_4_S 0 -#define UD_SEND_WQE_U32_8_DMAC_4_M \ - (((1UL << 8) - 1) << UD_SEND_WQE_U32_8_DMAC_4_S) - -#define UD_SEND_WQE_U32_8_DMAC_5_S 8 -#define UD_SEND_WQE_U32_8_DMAC_5_M \ - (((1UL << 8) - 1) << UD_SEND_WQE_U32_8_DMAC_5_S) - -#define UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S 22 - -#define UD_SEND_WQE_U32_8_OPERATION_TYPE_S 16 -#define UD_SEND_WQE_U32_8_OPERATION_TYPE_M \ - (((1UL << 4) - 1) << UD_SEND_WQE_U32_8_OPERATION_TYPE_S) - -#define UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_S 24 -#define UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_M \ - (((1UL << 6) - 1) << UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_S) - -#define UD_SEND_WQE_U32_8_SEND_GL_ROUTING_HDR_FLAG_S 31 - -#define UD_SEND_WQE_U32_16_DEST_QP_S 0 -#define UD_SEND_WQE_U32_16_DEST_QP_M \ - (((1UL << 24) - 1) << UD_SEND_WQE_U32_16_DEST_QP_S) - -#define UD_SEND_WQE_U32_16_MAX_STATIC_RATE_S 24 -#define UD_SEND_WQE_U32_16_MAX_STATIC_RATE_M \ - (((1UL << 8) - 1) << UD_SEND_WQE_U32_16_MAX_STATIC_RATE_S) - -#define UD_SEND_WQE_U32_36_FLOW_LABEL_S 0 -#define UD_SEND_WQE_U32_36_FLOW_LABEL_M \ - (((1UL << 20) - 1) << UD_SEND_WQE_U32_36_FLOW_LABEL_S) - -#define UD_SEND_WQE_U32_36_PRIORITY_S 20 -#define UD_SEND_WQE_U32_36_PRIORITY_M \ - (((1UL << 4) - 1) << UD_SEND_WQE_U32_36_PRIORITY_S) - -#define UD_SEND_WQE_U32_36_SGID_INDEX_S 24 -#define UD_SEND_WQE_U32_36_SGID_INDEX_M \ - (((1UL << 8) - 1) << UD_SEND_WQE_U32_36_SGID_INDEX_S) - -#define UD_SEND_WQE_U32_40_HOP_LIMIT_S 0 -#define UD_SEND_WQE_U32_40_HOP_LIMIT_M \ - (((1UL << 8) - 1) << UD_SEND_WQE_U32_40_HOP_LIMIT_S) - -#define UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S 8 -#define UD_SEND_WQE_U32_40_TRAFFIC_CLASS_M \ - (((1UL << 8) - 1) << UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S) - -struct hns_roce_sqp_context { - __le32 qp1c_bytes_4; - __le32 sq_rq_bt_l; - __le32 qp1c_bytes_12; - __le32 qp1c_bytes_16; - __le32 qp1c_bytes_20; - __le32 cur_rq_wqe_ba_l; - __le32 qp1c_bytes_28; - __le32 qp1c_bytes_32; - __le32 cur_sq_wqe_ba_l; - __le32 qp1c_bytes_40; -}; - -#define QP1C_BYTES_4_QP_STATE_S 0 -#define QP1C_BYTES_4_QP_STATE_M \ - (((1UL << 3) - 1) << QP1C_BYTES_4_QP_STATE_S) - -#define QP1C_BYTES_4_SQ_WQE_SHIFT_S 8 -#define QP1C_BYTES_4_SQ_WQE_SHIFT_M \ - (((1UL << 4) - 1) << QP1C_BYTES_4_SQ_WQE_SHIFT_S) - -#define QP1C_BYTES_4_RQ_WQE_SHIFT_S 12 -#define QP1C_BYTES_4_RQ_WQE_SHIFT_M \ - (((1UL << 4) - 1) << QP1C_BYTES_4_RQ_WQE_SHIFT_S) - -#define QP1C_BYTES_4_PD_S 16 -#define QP1C_BYTES_4_PD_M (((1UL << 16) - 1) << QP1C_BYTES_4_PD_S) - -#define QP1C_BYTES_12_SQ_RQ_BT_H_S 0 -#define QP1C_BYTES_12_SQ_RQ_BT_H_M \ - (((1UL << 17) - 1) << QP1C_BYTES_12_SQ_RQ_BT_H_S) - -#define QP1C_BYTES_16_RQ_HEAD_S 0 -#define QP1C_BYTES_16_RQ_HEAD_M (((1UL << 15) - 1) << QP1C_BYTES_16_RQ_HEAD_S) - -#define QP1C_BYTES_16_PORT_NUM_S 16 -#define QP1C_BYTES_16_PORT_NUM_M \ - (((1UL << 3) - 1) << QP1C_BYTES_16_PORT_NUM_S) - -#define QP1C_BYTES_16_SIGNALING_TYPE_S 27 -#define QP1C_BYTES_16_LOCAL_ENABLE_E2E_CREDIT_S 28 -#define QP1C_BYTES_16_RQ_BA_FLG_S 29 -#define QP1C_BYTES_16_SQ_BA_FLG_S 30 -#define QP1C_BYTES_16_QP1_ERR_S 31 - -#define QP1C_BYTES_20_SQ_HEAD_S 0 -#define QP1C_BYTES_20_SQ_HEAD_M (((1UL << 15) - 1) << QP1C_BYTES_20_SQ_HEAD_S) - -#define QP1C_BYTES_20_PKEY_IDX_S 16 -#define QP1C_BYTES_20_PKEY_IDX_M \ - (((1UL << 16) - 1) << QP1C_BYTES_20_PKEY_IDX_S) - -#define QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S 0 -#define QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M \ - (((1UL << 5) - 1) << QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S) - -#define QP1C_BYTES_28_RQ_CUR_IDX_S 16 -#define QP1C_BYTES_28_RQ_CUR_IDX_M \ - (((1UL << 15) - 1) << QP1C_BYTES_28_RQ_CUR_IDX_S) - -#define QP1C_BYTES_32_TX_CQ_NUM_S 0 -#define QP1C_BYTES_32_TX_CQ_NUM_M \ - (((1UL << 16) - 1) << QP1C_BYTES_32_TX_CQ_NUM_S) - -#define QP1C_BYTES_32_RX_CQ_NUM_S 16 -#define QP1C_BYTES_32_RX_CQ_NUM_M \ - (((1UL << 16) - 1) << QP1C_BYTES_32_RX_CQ_NUM_S) - -#define QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S 0 -#define QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M \ - (((1UL << 5) - 1) << QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S) - -#define QP1C_BYTES_40_SQ_CUR_IDX_S 16 -#define QP1C_BYTES_40_SQ_CUR_IDX_M \ - (((1UL << 15) - 1) << QP1C_BYTES_40_SQ_CUR_IDX_S) - -#define HNS_ROCE_WQE_INLINE (1UL<<31) -#define HNS_ROCE_WQE_SE (1UL<<30) - -#define HNS_ROCE_WQE_SGE_NUM_BIT 24 -#define HNS_ROCE_WQE_IMM (1UL<<23) -#define HNS_ROCE_WQE_FENCE (1UL<<21) -#define HNS_ROCE_WQE_CQ_NOTIFY (1UL<<20) - -#define HNS_ROCE_WQE_OPCODE_SEND (0<<16) -#define HNS_ROCE_WQE_OPCODE_RDMA_READ (1<<16) -#define HNS_ROCE_WQE_OPCODE_RDMA_WRITE (2<<16) -#define HNS_ROCE_WQE_OPCODE_LOCAL_INV (4<<16) -#define HNS_ROCE_WQE_OPCODE_UD_SEND (7<<16) -#define HNS_ROCE_WQE_OPCODE_MASK (15<<16) - -struct hns_roce_qp_context { - __le32 qpc_bytes_4; - __le32 qpc_bytes_8; - __le32 qpc_bytes_12; - __le32 qpc_bytes_16; - __le32 sq_rq_bt_l; - __le32 qpc_bytes_24; - __le32 irrl_ba_l; - __le32 qpc_bytes_32; - __le32 qpc_bytes_36; - __le32 dmac_l; - __le32 qpc_bytes_44; - __le32 qpc_bytes_48; - u8 dgid[16]; - __le32 qpc_bytes_68; - __le32 cur_rq_wqe_ba_l; - __le32 qpc_bytes_76; - __le32 rx_rnr_time; - __le32 qpc_bytes_84; - __le32 qpc_bytes_88; - union { - __le32 rx_sge_len; - __le32 dma_length; - }; - union { - __le32 rx_sge_num; - __le32 rx_send_pktn; - __le32 r_key; - }; - __le32 va_l; - __le32 va_h; - __le32 qpc_bytes_108; - __le32 qpc_bytes_112; - __le32 rx_cur_sq_wqe_ba_l; - __le32 qpc_bytes_120; - __le32 qpc_bytes_124; - __le32 qpc_bytes_128; - __le32 qpc_bytes_132; - __le32 qpc_bytes_136; - __le32 qpc_bytes_140; - __le32 qpc_bytes_144; - __le32 qpc_bytes_148; - union { - __le32 rnr_retry; - __le32 ack_time; - }; - __le32 qpc_bytes_156; - __le32 pkt_use_len; - __le32 qpc_bytes_164; - __le32 qpc_bytes_168; - union { - __le32 sge_use_len; - __le32 pa_use_len; - }; - __le32 qpc_bytes_176; - __le32 qpc_bytes_180; - __le32 tx_cur_sq_wqe_ba_l; - __le32 qpc_bytes_188; - __le32 rvd21; -}; - -#define QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S 0 -#define QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M \ - (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S) - -#define QP_CONTEXT_QPC_BYTE_4_ENABLE_FPMR_S 3 -#define QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S 4 -#define QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S 5 -#define QP_CONTEXT_QPC_BYTE_4_ATOMIC_OPERATION_ENABLE_S 6 -#define QP_CONTEXT_QPC_BYTE_4_RDMAR_USE_S 7 - -#define QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S 8 -#define QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_M \ - (((1UL << 4) - 1) << QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S) - -#define QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S 12 -#define QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_M \ - (((1UL << 4) - 1) << QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S) - -#define QP_CONTEXT_QPC_BYTES_4_PD_S 16 -#define QP_CONTEXT_QPC_BYTES_4_PD_M \ - (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_4_PD_S) - -#define QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S 0 -#define QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_M \ - (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S) - -#define QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S 16 -#define QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_M \ - (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S) - -#define QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S 0 -#define QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_M \ - (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S) - -#define QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S 16 -#define QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M \ - (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S) - -#define QP_CONTEXT_QPC_BYTES_16_QP_NUM_S 0 -#define QP_CONTEXT_QPC_BYTES_16_QP_NUM_M \ - (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_16_QP_NUM_S) - -#define QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S 0 -#define QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M \ - (((1UL << 17) - 1) << QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S) - -#define QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S 18 -#define QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M \ - (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S) - -#define QP_CONTEXT_QPC_BYTE_24_REMOTE_ENABLE_E2E_CREDITS_S 23 - -#define QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S 0 -#define QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M \ - (((1UL << 17) - 1) << QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S) - -#define QP_CONTEXT_QPC_BYTES_32_MIG_STATE_S 18 -#define QP_CONTEXT_QPC_BYTES_32_MIG_STATE_M \ - (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_32_MIG_STATE_S) - -#define QP_CONTEXT_QPC_BYTE_32_LOCAL_ENABLE_E2E_CREDITS_S 20 -#define QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S 21 -#define QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S 22 -#define QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S 23 - -#define QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S 24 -#define QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_M \ - (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S) - -#define QP_CONTEXT_QPC_BYTES_36_DEST_QP_S 0 -#define QP_CONTEXT_QPC_BYTES_36_DEST_QP_M \ - (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_36_DEST_QP_S) - -#define QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S 24 -#define QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M \ - (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S) - -#define QP_CONTEXT_QPC_BYTES_44_DMAC_H_S 0 -#define QP_CONTEXT_QPC_BYTES_44_DMAC_H_M \ - (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_44_DMAC_H_S) - -#define QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_S 16 -#define QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_M \ - (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_S) - -#define QP_CONTEXT_QPC_BYTES_44_HOPLMT_S 24 -#define QP_CONTEXT_QPC_BYTES_44_HOPLMT_M \ - (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_44_HOPLMT_S) - -#define QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S 0 -#define QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M \ - (((1UL << 20) - 1) << QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S) - -#define QP_CONTEXT_QPC_BYTES_48_TCLASS_S 20 -#define QP_CONTEXT_QPC_BYTES_48_TCLASS_M \ - (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_48_TCLASS_S) - -#define QP_CONTEXT_QPC_BYTES_48_MTU_S 28 -#define QP_CONTEXT_QPC_BYTES_48_MTU_M \ - (((1UL << 4) - 1) << QP_CONTEXT_QPC_BYTES_48_MTU_S) - -#define QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_S 0 -#define QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_M \ - (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_S) - -#define QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S 16 -#define QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_M \ - (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S) - -#define QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S 0 -#define QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M \ - (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S) - -#define QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S 8 -#define QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_M \ - (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S) - -#define QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_S 0 -#define QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_M \ - (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_S) - -#define QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_S 24 -#define QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_M \ - (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_S) - -#define QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S 0 -#define QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_M \ - (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S) - -#define QP_CONTEXT_QPC_BYTES_88_RX_REQ_PSN_ERR_FLAG_S 24 -#define QP_CONTEXT_QPC_BYTES_88_RX_LAST_OPCODE_FLG_S 25 - -#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_S 26 -#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_M \ - (((1UL << 2) - 1) << \ - QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_S) - -#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_S 29 -#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_M \ - (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_S) - -#define QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_S 0 -#define QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_M \ - (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_S) - -#define QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_FLG_S 24 -#define QP_CONTEXT_QPC_BYTES_108_TRRL_TDB_PSN_FLG_S 25 - -#define QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_S 0 -#define QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_M \ - (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_S) - -#define QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_S 24 -#define QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_M \ - (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_S) - -#define QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S 0 -#define QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M \ - (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S) - -#define QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_S 0 -#define QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_M \ - (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_S) - -#define QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_S 16 -#define QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_M \ - (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_S) - -#define QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_S 0 -#define QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_M \ - (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_S) - -#define QP_CONTEXT_QPC_BYTES_128_RX_ACK_PSN_ERR_FLG_S 24 - -#define QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_S 25 -#define QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_M \ - (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_S) - -#define QP_CONTEXT_QPC_BYTES_128_IRRL_PSN_VLD_FLG_S 27 - -#define QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_S 0 -#define QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_M \ - (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_S) - -#define QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_S 24 -#define QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_M \ - (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_S) - -#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_S 0 -#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_M \ - (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_S) - -#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_S 24 -#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_M \ - (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_S) - -#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_S 0 -#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_M \ - (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_S) - -#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_S 16 -#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_M \ - (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_S) - -#define QP_CONTEXT_QPC_BYTES_140_RNR_RETRY_FLG_S 31 - -#define QP_CONTEXT_QPC_BYTES_144_QP_STATE_S 0 -#define QP_CONTEXT_QPC_BYTES_144_QP_STATE_M \ - (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_144_QP_STATE_S) - -#define QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_S 0 -#define QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_M \ - (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_S) - -#define QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S 2 -#define QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M \ - (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S) - -#define QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_S 5 -#define QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_M \ - (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_S) - -#define QP_CONTEXT_QPC_BYTES_148_LSN_S 8 -#define QP_CONTEXT_QPC_BYTES_148_LSN_M \ - (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_148_LSN_S) - -#define QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_S 0 -#define QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_M \ - (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_S) - -#define QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S 3 -#define QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M \ - (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S) - -#define QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_S 8 -#define QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_M \ - (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_S) - -#define QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S 11 -#define QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M \ - (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S) - -#define QP_CONTEXT_QPC_BYTES_156_SL_S 14 -#define QP_CONTEXT_QPC_BYTES_156_SL_M \ - (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_156_SL_S) - -#define QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S 16 -#define QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M \ - (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S) - -#define QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_S 24 -#define QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_M \ - (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_S) - -#define QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S 0 -#define QP_CONTEXT_QPC_BYTES_164_SQ_PSN_M \ - (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S) - -#define QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_S 24 -#define QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_M \ - (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_S) - -#define QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_S 0 -#define QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_M \ - (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_S) - -#define QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_S 24 -#define QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_M \ - (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_S) - -#define QP_CONTEXT_QPC_BYTES_168_DB_TYPE_S 26 -#define QP_CONTEXT_QPC_BYTES_168_DB_TYPE_M \ - (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_168_DB_TYPE_S) - -#define QP_CONTEXT_QPC_BYTES_168_MSG_LP_IND_S 28 -#define QP_CONTEXT_QPC_BYTES_168_CSDB_LP_IND_S 29 -#define QP_CONTEXT_QPC_BYTES_168_QP_ERR_FLG_S 30 - -#define QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_S 0 -#define QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_M \ - (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_S) - -#define QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_S 16 -#define QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_M \ - (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_S) - -#define QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S 0 -#define QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M \ - (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S) - -#define QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_S 16 -#define QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_M \ - (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_S) - -#define QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S 0 -#define QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M \ - (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S) - -#define QP_CONTEXT_QPC_BYTES_188_PKT_RETRY_FLG_S 8 - -#define QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S 16 -#define QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_M \ - (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S) - -#define STATUS_MASK 0xff -#define GO_BIT_TIMEOUT_MSECS 10000 -#define HCR_STATUS_OFFSET 0x18 -#define HCR_GO_BIT 15 - -struct hns_roce_rq_db { - __le32 u32_4; - __le32 u32_8; -}; - -#define RQ_DOORBELL_U32_4_RQ_HEAD_S 0 -#define RQ_DOORBELL_U32_4_RQ_HEAD_M \ - (((1UL << 15) - 1) << RQ_DOORBELL_U32_4_RQ_HEAD_S) - -#define RQ_DOORBELL_U32_8_QPN_S 0 -#define RQ_DOORBELL_U32_8_QPN_M (((1UL << 24) - 1) << RQ_DOORBELL_U32_8_QPN_S) - -#define RQ_DOORBELL_U32_8_CMD_S 28 -#define RQ_DOORBELL_U32_8_CMD_M (((1UL << 3) - 1) << RQ_DOORBELL_U32_8_CMD_S) - -#define RQ_DOORBELL_U32_8_HW_SYNC_S 31 - -struct hns_roce_sq_db { - __le32 u32_4; - __le32 u32_8; -}; - -#define SQ_DOORBELL_U32_4_SQ_HEAD_S 0 -#define SQ_DOORBELL_U32_4_SQ_HEAD_M \ - (((1UL << 15) - 1) << SQ_DOORBELL_U32_4_SQ_HEAD_S) - -#define SQ_DOORBELL_U32_4_SL_S 16 -#define SQ_DOORBELL_U32_4_SL_M \ - (((1UL << 2) - 1) << SQ_DOORBELL_U32_4_SL_S) - -#define SQ_DOORBELL_U32_4_PORT_S 18 -#define SQ_DOORBELL_U32_4_PORT_M (((1UL << 3) - 1) << SQ_DOORBELL_U32_4_PORT_S) - -#define SQ_DOORBELL_U32_8_QPN_S 0 -#define SQ_DOORBELL_U32_8_QPN_M (((1UL << 24) - 1) << SQ_DOORBELL_U32_8_QPN_S) - -#define SQ_DOORBELL_HW_SYNC_S 31 - -struct hns_roce_ext_db { - int esdb_dep; - int eodb_dep; - struct hns_roce_buf_list *sdb_buf_list; - struct hns_roce_buf_list *odb_buf_list; -}; - -struct hns_roce_db_table { - int sdb_ext_mod; - int odb_ext_mod; - struct hns_roce_ext_db *ext_db; -}; - -#define HW_SYNC_SLEEP_TIME_INTERVAL 20 -#define HW_SYNC_TIMEOUT_MSECS (25 * HW_SYNC_SLEEP_TIME_INTERVAL) -#define BT_CMD_SYNC_SHIFT 31 -#define HNS_ROCE_BA_SIZE (32 * 4096) - -struct hns_roce_bt_table { - struct hns_roce_buf_list qpc_buf; - struct hns_roce_buf_list mtpt_buf; - struct hns_roce_buf_list cqc_buf; -}; - -struct hns_roce_tptr_table { - struct hns_roce_buf_list tptr_buf; -}; - -struct hns_roce_qp_work { - struct work_struct work; - struct ib_device *ib_dev; - struct hns_roce_qp *qp; - u32 db_wait_stage; - u32 sdb_issue_ptr; - u32 sdb_inv_cnt; - u32 sche_cnt; -}; - -struct hns_roce_mr_free_work { - struct work_struct work; - struct ib_device *ib_dev; - struct completion *comp; - int comp_flag; - void *mr; -}; - -struct hns_roce_recreate_lp_qp_work { - struct work_struct work; - struct ib_device *ib_dev; - struct completion *comp; - int comp_flag; -}; - -struct hns_roce_free_mr { - struct workqueue_struct *free_mr_wq; - struct hns_roce_qp *mr_free_qp[HNS_ROCE_V1_RESV_QP]; - struct hns_roce_cq *mr_free_cq; - struct hns_roce_pd *mr_free_pd; -}; - -struct hns_roce_v1_priv { - struct hns_roce_db_table db_table; - struct hns_roce_raq_table raq_table; - struct hns_roce_bt_table bt_table; - struct hns_roce_tptr_table tptr_table; - struct hns_roce_free_mr free_mr; -}; - -int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset); -int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); -int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); - -#endif diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index bbfa1332dedc..1435fe2ea176 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -55,6 +55,42 @@ enum { CMD_RST_PRC_EBUSY, }; +enum ecc_resource_type { + ECC_RESOURCE_QPC, + ECC_RESOURCE_CQC, + ECC_RESOURCE_MPT, + ECC_RESOURCE_SRQC, + ECC_RESOURCE_GMV, + ECC_RESOURCE_QPC_TIMER, + ECC_RESOURCE_CQC_TIMER, + ECC_RESOURCE_SCCC, + ECC_RESOURCE_COUNT, +}; + +static const struct { + const char *name; + u8 read_bt0_op; + u8 write_bt0_op; +} fmea_ram_res[] = { + { "ECC_RESOURCE_QPC", + HNS_ROCE_CMD_READ_QPC_BT0, HNS_ROCE_CMD_WRITE_QPC_BT0 }, + { "ECC_RESOURCE_CQC", + HNS_ROCE_CMD_READ_CQC_BT0, HNS_ROCE_CMD_WRITE_CQC_BT0 }, + { "ECC_RESOURCE_MPT", + HNS_ROCE_CMD_READ_MPT_BT0, HNS_ROCE_CMD_WRITE_MPT_BT0 }, + { "ECC_RESOURCE_SRQC", + HNS_ROCE_CMD_READ_SRQC_BT0, HNS_ROCE_CMD_WRITE_SRQC_BT0 }, + /* ECC_RESOURCE_GMV is handled by cmdq, not mailbox */ + { "ECC_RESOURCE_GMV", + 0, 0 }, + { "ECC_RESOURCE_QPC_TIMER", + HNS_ROCE_CMD_READ_QPC_TIMER_BT0, HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0 }, + { "ECC_RESOURCE_CQC_TIMER", + HNS_ROCE_CMD_READ_CQC_TIMER_BT0, HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0 }, + { "ECC_RESOURCE_SCCC", + HNS_ROCE_CMD_READ_SCCC_BT0, HNS_ROCE_CMD_WRITE_SCCC_BT0 }, +}; + static inline void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg, struct ib_sge *sg) { @@ -82,7 +118,6 @@ static const u32 hns_roce_op_code[] = { HR_OPC_MAP(ATOMIC_CMP_AND_SWP, ATOM_CMP_AND_SWAP), HR_OPC_MAP(ATOMIC_FETCH_AND_ADD, ATOM_FETCH_AND_ADD), HR_OPC_MAP(SEND_WITH_INV, SEND_WITH_INV), - HR_OPC_MAP(LOCAL_INV, LOCAL_INV), HR_OPC_MAP(MASKED_ATOMIC_CMP_AND_SWP, ATOM_MSK_CMP_AND_SWAP), HR_OPC_MAP(MASKED_ATOMIC_FETCH_AND_ADD, ATOM_MSK_FETCH_AND_ADD), HR_OPC_MAP(REG_MR, FAST_REG_PMR), @@ -149,8 +184,7 @@ static void set_atomic_seg(const struct ib_send_wr *wr, aseg->cmp_data = 0; } - roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge); } static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, @@ -158,8 +192,7 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, unsigned int *sge_idx, u32 msg_len) { struct ib_device *ibdev = &(to_hr_dev(qp->ibqp.device))->ib_dev; - unsigned int dseg_len = sizeof(struct hns_roce_v2_wqe_data_seg); - unsigned int ext_sge_sz = qp->sq.max_gs * dseg_len; + unsigned int ext_sge_sz = qp->sq.max_gs * HNS_ROCE_SGE_SIZE; unsigned int left_len_in_pg; unsigned int idx = *sge_idx; unsigned int i = 0; @@ -187,7 +220,7 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, if (len <= left_len_in_pg) { memcpy(dseg, addr, len); - idx += len / dseg_len; + idx += len / HNS_ROCE_SGE_SIZE; i++; if (i >= wr->num_sge) @@ -202,7 +235,7 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, len -= left_len_in_pg; addr += left_len_in_pg; - idx += left_len_in_pg / dseg_len; + idx += left_len_in_pg / HNS_ROCE_SGE_SIZE; dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1)); left_len_in_pg = 1 << HNS_HW_PAGE_SHIFT; @@ -271,8 +304,7 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ib_send_wr *wr, dseg += sizeof(struct hns_roce_v2_rc_send_wqe); if (msg_len <= HNS_ROCE_V2_MAX_RC_INL_INN_SZ) { - roce_set_bit(rc_sq_wqe->byte_20, - V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 0); + hr_reg_clear(rc_sq_wqe, RC_SEND_WQE_INL_TYPE); for (i = 0; i < wr->num_sge; i++) { memcpy(dseg, ((void *)wr->sg_list[i].addr), @@ -280,17 +312,13 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ib_send_wr *wr, dseg += wr->sg_list[i].length; } } else { - roce_set_bit(rc_sq_wqe->byte_20, - V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 1); + hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_INL_TYPE); ret = fill_ext_sge_inl_data(qp, wr, &curr_idx, msg_len); if (ret) return ret; - roce_set_field(rc_sq_wqe->byte_16, - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, - curr_idx - *sge_idx); + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, curr_idx - *sge_idx); } *sge_idx = curr_idx; @@ -309,12 +337,10 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, int j = 0; int i; - roce_set_field(rc_sq_wqe->byte_20, - V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M, - V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, - (*sge_ind) & (qp->sge.sge_cnt - 1)); + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_MSG_START_SGE_IDX, + (*sge_ind) & (qp->sge.sge_cnt - 1)); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S, + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_INLINE, !!(wr->send_flags & IB_SEND_INLINE)); if (wr->send_flags & IB_SEND_INLINE) return set_rc_inl(qp, wr, rc_sq_wqe, sge_ind); @@ -339,9 +365,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, valid_num_sge - HNS_ROCE_SGE_IN_WQE); } - roce_set_field(rc_sq_wqe->byte_16, - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge); return 0; } @@ -355,7 +379,7 @@ static int check_send_valid(struct hns_roce_dev *hr_dev, if (unlikely(ibqp->qp_type != IB_QPT_RC && ibqp->qp_type != IB_QPT_GSI && ibqp->qp_type != IB_QPT_UD)) { - ibdev_err(ibdev, "Not supported QP(0x%x)type!\n", + ibdev_err(ibdev, "not supported QP(0x%x)type!\n", ibqp->qp_type); return -EOPNOTSUPP; } else if (unlikely(hr_qp->state == IB_QPS_RESET || @@ -412,8 +436,7 @@ static int set_ud_opcode(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, ud_sq_wqe->immtdata = get_immtdata(wr); - roce_set_field(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OPCODE_M, - V2_UD_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op)); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_OPCODE, to_hr_opcode(ib_op)); return 0; } @@ -424,21 +447,15 @@ static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, struct ib_device *ib_dev = ah->ibah.device; struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); - roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, - V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport); - - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, - V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit); - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M, - V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass); - roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M, - V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_UDPSPN, ah->av.udp_sport); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_HOPLIMIT, ah->av.hop_limit); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_TCLASS, ah->av.tclass); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_FLOW_LABEL, ah->av.flowlabel); if (WARN_ON(ah->av.sl > MAX_SERVICE_LEVEL)) return -EINVAL; - roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M, - V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SL, ah->av.sl); ud_sq_wqe->sgid_index = ah->av.gid_index; @@ -448,10 +465,8 @@ static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) return 0; - roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S, - ah->av.vlan_en); - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, - V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_VLAN_EN, ah->av.vlan_en); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_VLAN, ah->av.vlan_id); return 0; } @@ -476,27 +491,19 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, ud_sq_wqe->msg_len = cpu_to_le32(msg_len); - roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_CQE_S, + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_CQE, !!(wr->send_flags & IB_SEND_SIGNALED)); - - roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_SE_S, + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SE, !!(wr->send_flags & IB_SEND_SOLICITED)); - roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_PD_M, - V2_UD_SEND_WQE_BYTE_16_PD_S, to_hr_pd(qp->ibqp.pd)->pdn); - - roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M, - V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); - - roce_set_field(ud_sq_wqe->byte_20, - V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M, - V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, - curr_idx & (qp->sge.sge_cnt - 1)); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_PD, to_hr_pd(qp->ibqp.pd)->pdn); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SGE_NUM, valid_num_sge); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_MSG_START_SGE_IDX, + curr_idx & (qp->sge.sge_cnt - 1)); ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ? qp->qkey : ud_wr(wr)->remote_qkey); - roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M, - V2_UD_SEND_WQE_BYTE_32_DQPN_S, ud_wr(wr)->remote_qpn); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_DQPN, ud_wr(wr)->remote_qpn); ret = fill_ud_av(ud_sq_wqe, ah); if (ret) @@ -516,8 +523,7 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, dma_wmb(); *sge_idx = curr_idx; - roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S, - owner_bit); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_OWNER, owner_bit); return 0; } @@ -552,9 +558,6 @@ static int set_rc_opcode(struct hns_roce_dev *hr_dev, else ret = -EOPNOTSUPP; break; - case IB_WR_LOCAL_INV: - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1); - fallthrough; case IB_WR_SEND_WITH_INV: rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); break; @@ -565,11 +568,11 @@ static int set_rc_opcode(struct hns_roce_dev *hr_dev, if (unlikely(ret)) return ret; - roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M, - V2_RC_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op)); + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_OPCODE, to_hr_opcode(ib_op)); return ret; } + static inline int set_rc_wqe(struct hns_roce_qp *qp, const struct ib_send_wr *wr, void *wqe, unsigned int *sge_idx, @@ -590,13 +593,13 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, if (WARN_ON(ret)) return ret; - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FENCE_S, + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_FENCE, (wr->send_flags & IB_SEND_FENCE) ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SE_S, + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SE, (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_CQE_S, + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_CQE, (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || @@ -616,8 +619,7 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, dma_wmb(); *sge_idx = curr_idx; - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, - owner_bit); + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_OWNER, owner_bit); return ret; } @@ -630,7 +632,7 @@ static inline void update_sq_db(struct hns_roce_dev *hr_dev, } else { struct hns_roce_v2_db sq_db = {}; - hr_reg_write(&sq_db, DB_TAG, qp->doorbell_qpn); + hr_reg_write(&sq_db, DB_TAG, qp->qpn); hr_reg_write(&sq_db, DB_CMD, HNS_ROCE_V2_SQ_DB); hr_reg_write(&sq_db, DB_PI, qp->sq.head); hr_reg_write(&sq_db, DB_SL, qp->sl); @@ -678,16 +680,15 @@ static void hns_roce_write512(struct hns_roce_dev *hr_dev, u64 *val, static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, void *wqe) { +#define HNS_ROCE_SL_SHIFT 2 struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe; /* All kinds of DirectWQE have the same header field layout */ - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FLAG_S, 1); - roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M, - V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S, qp->sl); - roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M, - V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S, qp->sl >> 2); - roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M, - V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head); + hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_FLAG); + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_L, qp->sl); + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_H, + qp->sl >> HNS_ROCE_SL_SHIFT); + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_WQE_INDEX, qp->sq.head); hns_roce_write512(hr_dev, wqe, qp->sq.db_reg); } @@ -1263,6 +1264,16 @@ static int hns_roce_cmq_csq_done(struct hns_roce_dev *hr_dev) return tail == priv->cmq.csq.head; } +static void update_cmdq_status(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hnae3_handle *handle = priv->handle; + + if (handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT || + handle->rinfo.instance_state == HNS_ROCE_STATE_INIT) + hr_dev->cmd.state = HNS_ROCE_CMDQ_STATE_FATAL_ERR; +} + static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, struct hns_roce_cmq_desc *desc, int num) { @@ -1294,7 +1305,8 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, } while (++timeout < priv->cmq.tx_timeout); if (hns_roce_cmq_csq_done(hr_dev)) { - for (ret = 0, i = 0; i < num; i++) { + ret = 0; + for (i = 0; i < num; i++) { /* check the result of hardware write back */ desc[i] = csq->desc[tail++]; if (tail == csq->desc_num) @@ -1305,17 +1317,19 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, continue; dev_err_ratelimited(hr_dev->dev, - "Cmdq IO error, opcode = %x, return = %x\n", + "Cmdq IO error, opcode = 0x%x, return = 0x%x.\n", desc->opcode, desc_ret); ret = -EIO; } } else { /* FW/HW reset or incorrect number of desc */ tail = roce_read(hr_dev, ROCEE_TX_CMQ_CI_REG); - dev_warn(hr_dev->dev, "CMDQ move tail from %d to %d\n", + dev_warn(hr_dev->dev, "CMDQ move tail from %u to %u.\n", csq->head, tail); csq->head = tail; + update_cmdq_status(hr_dev); + ret = -EAGAIN; } @@ -1330,6 +1344,9 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, bool busy; int ret; + if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR) + return -EIO; + if (!v2_chk_mbox_is_avail(hr_dev, &busy)) return busy ? -EBUSY : 0; @@ -1342,17 +1359,17 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, return ret; } -static int config_hem_ba_to_hw(struct hns_roce_dev *hr_dev, unsigned long obj, - dma_addr_t base_addr, u16 op) +static int config_hem_ba_to_hw(struct hns_roce_dev *hr_dev, + dma_addr_t base_addr, u8 cmd, unsigned long tag) { - struct hns_roce_cmd_mailbox *mbox = hns_roce_alloc_cmd_mailbox(hr_dev); + struct hns_roce_cmd_mailbox *mbox; int ret; + mbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR(mbox)) return PTR_ERR(mbox); - ret = hns_roce_cmd_mbox(hr_dev, base_addr, mbox->dma, obj, 0, op, - HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = hns_roce_cmd_mbox(hr_dev, base_addr, mbox->dma, cmd, tag); hns_roce_free_cmd_mailbox(hr_dev, mbox); return ret; } @@ -1384,20 +1401,20 @@ static void func_clr_hw_resetting_state(struct hns_roce_dev *hr_dev, hr_dev->dis_db = true; dev_warn(hr_dev->dev, - "Func clear is pending, device in resetting state.\n"); + "func clear is pending, device in resetting state.\n"); end = HNS_ROCE_V2_HW_RST_TIMEOUT; while (end) { if (!ops->get_hw_reset_stat(handle)) { hr_dev->is_reset = true; dev_info(hr_dev->dev, - "Func clear success after reset.\n"); + "func clear success after reset.\n"); return; } msleep(HNS_ROCE_V2_HW_RST_COMPLETION_WAIT); end -= HNS_ROCE_V2_HW_RST_COMPLETION_WAIT; } - dev_warn(hr_dev->dev, "Func clear failed.\n"); + dev_warn(hr_dev->dev, "func clear failed.\n"); } static void func_clr_sw_resetting_state(struct hns_roce_dev *hr_dev, @@ -1409,21 +1426,21 @@ static void func_clr_sw_resetting_state(struct hns_roce_dev *hr_dev, hr_dev->dis_db = true; dev_warn(hr_dev->dev, - "Func clear is pending, device in resetting state.\n"); + "func clear is pending, device in resetting state.\n"); end = HNS_ROCE_V2_HW_RST_TIMEOUT; while (end) { if (ops->ae_dev_reset_cnt(handle) != hr_dev->reset_cnt) { hr_dev->is_reset = true; dev_info(hr_dev->dev, - "Func clear success after sw reset\n"); + "func clear success after sw reset\n"); return; } msleep(HNS_ROCE_V2_HW_RST_COMPLETION_WAIT); end -= HNS_ROCE_V2_HW_RST_COMPLETION_WAIT; } - dev_warn(hr_dev->dev, "Func clear failed because of unfinished sw reset\n"); + dev_warn(hr_dev->dev, "func clear failed because of unfinished sw reset\n"); } static void hns_roce_func_clr_rst_proc(struct hns_roce_dev *hr_dev, int retval, @@ -1436,7 +1453,7 @@ static void hns_roce_func_clr_rst_proc(struct hns_roce_dev *hr_dev, int retval, if (ops->ae_dev_reset_cnt(handle) != hr_dev->reset_cnt) { hr_dev->dis_db = true; hr_dev->is_reset = true; - dev_info(hr_dev->dev, "Func clear success after reset.\n"); + dev_info(hr_dev->dev, "func clear success after reset.\n"); return; } @@ -1453,9 +1470,9 @@ static void hns_roce_func_clr_rst_proc(struct hns_roce_dev *hr_dev, int retval, if (retval && !flag) dev_warn(hr_dev->dev, - "Func clear read failed, ret = %d.\n", retval); + "func clear read failed, ret = %d.\n", retval); - dev_warn(hr_dev->dev, "Func clear failed.\n"); + dev_warn(hr_dev->dev, "func clear failed.\n"); } static void __hns_roce_function_clear(struct hns_roce_dev *hr_dev, int vf_id) @@ -1476,7 +1493,7 @@ static void __hns_roce_function_clear(struct hns_roce_dev *hr_dev, int vf_id) ret = hns_roce_cmq_send(hr_dev, &desc, 1); if (ret) { fclr_write_fail_flag = true; - dev_err(hr_dev->dev, "Func clear write failed, ret = %d.\n", + dev_err(hr_dev->dev, "func clear write failed, ret = %d.\n", ret); goto out; } @@ -1497,7 +1514,7 @@ static void __hns_roce_function_clear(struct hns_roce_dev *hr_dev, int vf_id) if (ret) continue; - if (roce_get_bit(resp->func_done, FUNC_CLEAR_RST_FUN_DONE_S)) { + if (hr_reg_read(resp, FUNC_CLEAR_RST_FUN_DONE)) { if (vf_id == 0) hr_dev->is_reset = true; return; @@ -1508,7 +1525,7 @@ out: hns_roce_func_clr_rst_proc(hr_dev, ret, fclr_write_fail_flag); } -static void hns_roce_free_vf_resource(struct hns_roce_dev *hr_dev, int vf_id) +static int hns_roce_free_vf_resource(struct hns_roce_dev *hr_dev, int vf_id) { enum hns_roce_opcode_type opcode = HNS_ROCE_OPC_ALLOC_VF_RES; struct hns_roce_cmq_desc desc[2]; @@ -1519,17 +1536,29 @@ static void hns_roce_free_vf_resource(struct hns_roce_dev *hr_dev, int vf_id) desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); hns_roce_cmq_setup_basic_desc(&desc[1], opcode, false); hr_reg_write(req_a, FUNC_RES_A_VF_ID, vf_id); - hns_roce_cmq_send(hr_dev, desc, 2); + + return hns_roce_cmq_send(hr_dev, desc, 2); } static void hns_roce_function_clear(struct hns_roce_dev *hr_dev) { + int ret; int i; + if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR) + return; + for (i = hr_dev->func_num - 1; i >= 0; i--) { __hns_roce_function_clear(hr_dev, i); - if (i != 0) - hns_roce_free_vf_resource(hr_dev, i); + + if (i == 0) + continue; + + ret = hns_roce_free_vf_resource(hr_dev, i); + if (ret) + ibdev_err(&hr_dev->ib_dev, + "failed to free vf resource, vf_id = %d, ret = %d.\n", + i, ret); } } @@ -1571,7 +1600,7 @@ static int hns_roce_query_func_info(struct hns_roce_dev *hr_dev) struct hns_roce_cmq_desc desc; int ret; - if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09) { + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { hr_dev->func_num = 1; return 0; } @@ -1594,11 +1623,17 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev) { struct hns_roce_cmq_desc desc; struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data; + u32 clock_cycles_of_1us; hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GLOBAL_PARAM, false); - hr_reg_write(req, CFG_GLOBAL_PARAM_1US_CYCLES, 0x3e8); + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) + clock_cycles_of_1us = HNS_ROCE_1NS_CFG; + else + clock_cycles_of_1us = HNS_ROCE_1US_CFG; + + hr_reg_write(req, CFG_GLOBAL_PARAM_1US_CYCLES, clock_cycles_of_1us); hr_reg_write(req, CFG_GLOBAL_PARAM_UDP_PORT, ROCE_V2_UDP_DPORT); return hns_roce_cmq_send(hr_dev, &desc, 1); @@ -1749,17 +1784,16 @@ static int __hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev, swt = (struct hns_roce_vf_switch *)desc.data; hns_roce_cmq_setup_basic_desc(&desc, HNS_SWITCH_PARAMETER_CFG, true); swt->rocee_sel |= cpu_to_le32(HNS_ICL_SWITCH_CMD_ROCEE_SEL); - roce_set_field(swt->fun_id, VF_SWITCH_DATA_FUN_ID_VF_ID_M, - VF_SWITCH_DATA_FUN_ID_VF_ID_S, vf_id); + hr_reg_write(swt, VF_SWITCH_VF_ID, vf_id); ret = hns_roce_cmq_send(hr_dev, &desc, 1); if (ret) return ret; desc.flag = cpu_to_le16(HNS_ROCE_CMD_FLAG_IN); desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR); - roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LPBK_S, 1); - roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S, 0); - roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S, 1); + hr_reg_enable(swt, VF_SWITCH_ALW_LPBK); + hr_reg_clear(swt, VF_SWITCH_ALW_LCL_LPBK); + hr_reg_enable(swt, VF_SWITCH_ALW_DST_OVRD); return hns_roce_cmq_send(hr_dev, &desc, 1); } @@ -1927,7 +1961,6 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->min_cqes = HNS_ROCE_MIN_CQE_NUM; caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM; caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM; - caps->max_extend_sg = HNS_ROCE_V2_MAX_EXTEND_SGE_NUM; caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM; caps->num_uars = HNS_ROCE_V2_UAR_NUM; @@ -1938,14 +1971,13 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM; caps->num_pds = HNS_ROCE_V2_MAX_PD_NUM; - caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM; - caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM; + caps->qpc_timer_bt_num = HNS_ROCE_V2_MAX_QPC_TIMER_BT_NUM; + caps->cqc_timer_bt_num = HNS_ROCE_V2_MAX_CQC_TIMER_BT_NUM; caps->max_qp_init_rdma = HNS_ROCE_V2_MAX_QP_INIT_RDMA; caps->max_qp_dest_rdma = HNS_ROCE_V2_MAX_QP_DEST_RDMA; caps->max_sq_desc_sz = HNS_ROCE_V2_MAX_SQ_DESC_SZ; caps->max_rq_desc_sz = HNS_ROCE_V2_MAX_RQ_DESC_SZ; - caps->max_srq_desc_sz = HNS_ROCE_V2_MAX_SRQ_DESC_SZ; caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ; caps->trrl_entry_sz = HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ; caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ; @@ -1997,7 +2029,8 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM; if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { - caps->flags |= HNS_ROCE_CAP_FLAG_STASH; + caps->flags |= HNS_ROCE_CAP_FLAG_STASH | + HNS_ROCE_CAP_FLAG_DIRECT_WQE; caps->max_sq_inline = HNS_ROCE_V3_MAX_SQ_INLINE; } else { caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE; @@ -2138,7 +2171,6 @@ static void apply_func_caps(struct hns_roce_dev *hr_dev) caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ; caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ; - caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM; caps->pbl_hop_num = HNS_ROCE_PBL_HOP_NUM; caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; @@ -2146,15 +2178,17 @@ static void apply_func_caps(struct hns_roce_dev *hr_dev) caps->num_xrcds = HNS_ROCE_V2_MAX_XRCD_NUM; caps->reserved_xrcds = HNS_ROCE_V2_RSV_XRCD_NUM; - caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS; caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS; caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS; if (!caps->num_comp_vectors) - caps->num_comp_vectors = min_t(u32, caps->eqc_bt_num - 1, - (u32)priv->handle->rinfo.num_vectors - 2); + caps->num_comp_vectors = + min_t(u32, caps->eqc_bt_num - HNS_ROCE_V2_AEQE_VEC_NUM, + (u32)priv->handle->rinfo.num_vectors - + (HNS_ROCE_V2_AEQE_VEC_NUM + HNS_ROCE_V2_ABNORMAL_VEC_NUM)); if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { + caps->eqe_hop_num = HNS_ROCE_V3_EQE_HOP_NUM; caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE; caps->aeqe_size = HNS_ROCE_V3_EQE_SIZE; @@ -2175,6 +2209,7 @@ static void apply_func_caps(struct hns_roce_dev *hr_dev) } else { u32 func_num = max_t(u32, 1, hr_dev->func_num); + caps->eqe_hop_num = HNS_ROCE_V2_EQE_HOP_NUM; caps->ceqe_size = HNS_ROCE_CEQE_SIZE; caps->aeqe_size = HNS_ROCE_AEQE_SIZE; caps->gid_table_len[0] /= func_num; @@ -2231,16 +2266,12 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->max_sq_inline = le16_to_cpu(resp_a->max_sq_inline); caps->max_rq_sg = le16_to_cpu(resp_a->max_rq_sg); caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg); - caps->max_extend_sg = le32_to_cpu(resp_a->max_extend_sg); - caps->num_qpc_timer = le16_to_cpu(resp_a->num_qpc_timer); - caps->num_cqc_timer = le16_to_cpu(resp_a->num_cqc_timer); caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges); caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges); caps->num_aeq_vectors = resp_a->num_aeq_vectors; caps->num_other_vectors = resp_a->num_other_vectors; caps->max_sq_desc_sz = resp_a->max_sq_desc_sz; caps->max_rq_desc_sz = resp_a->max_rq_desc_sz; - caps->max_srq_desc_sz = resp_a->max_srq_desc_sz; caps->cqe_sz = resp_a->cqe_sz; caps->mtpt_entry_sz = resp_b->mtpt_entry_sz; @@ -2260,87 +2291,39 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) ctx_hop_num = resp_b->ctx_hop_num; pbl_hop_num = resp_b->pbl_hop_num; - caps->num_pds = 1 << roce_get_field(resp_c->cap_flags_num_pds, - V2_QUERY_PF_CAPS_C_NUM_PDS_M, - V2_QUERY_PF_CAPS_C_NUM_PDS_S); - caps->flags = roce_get_field(resp_c->cap_flags_num_pds, - V2_QUERY_PF_CAPS_C_CAP_FLAGS_M, - V2_QUERY_PF_CAPS_C_CAP_FLAGS_S); + caps->num_pds = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_PDS); + + caps->flags = hr_reg_read(resp_c, PF_CAPS_C_CAP_FLAGS); caps->flags |= le16_to_cpu(resp_d->cap_flags_ex) << HNS_ROCE_CAP_FLAGS_EX_SHIFT; - caps->num_cqs = 1 << roce_get_field(resp_c->max_gid_num_cqs, - V2_QUERY_PF_CAPS_C_NUM_CQS_M, - V2_QUERY_PF_CAPS_C_NUM_CQS_S); - caps->gid_table_len[0] = roce_get_field(resp_c->max_gid_num_cqs, - V2_QUERY_PF_CAPS_C_MAX_GID_M, - V2_QUERY_PF_CAPS_C_MAX_GID_S); - - caps->max_cqes = 1 << roce_get_field(resp_c->cq_depth, - V2_QUERY_PF_CAPS_C_CQ_DEPTH_M, - V2_QUERY_PF_CAPS_C_CQ_DEPTH_S); - caps->num_mtpts = 1 << roce_get_field(resp_c->num_mrws, - V2_QUERY_PF_CAPS_C_NUM_MRWS_M, - V2_QUERY_PF_CAPS_C_NUM_MRWS_S); - caps->num_qps = 1 << roce_get_field(resp_c->ord_num_qps, - V2_QUERY_PF_CAPS_C_NUM_QPS_M, - V2_QUERY_PF_CAPS_C_NUM_QPS_S); - caps->max_qp_init_rdma = roce_get_field(resp_c->ord_num_qps, - V2_QUERY_PF_CAPS_C_MAX_ORD_M, - V2_QUERY_PF_CAPS_C_MAX_ORD_S); + caps->num_cqs = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_CQS); + caps->gid_table_len[0] = hr_reg_read(resp_c, PF_CAPS_C_MAX_GID); + caps->max_cqes = 1 << hr_reg_read(resp_c, PF_CAPS_C_CQ_DEPTH); + caps->num_mtpts = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_MRWS); + caps->num_qps = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_QPS); + caps->max_qp_init_rdma = hr_reg_read(resp_c, PF_CAPS_C_MAX_ORD); caps->max_qp_dest_rdma = caps->max_qp_init_rdma; caps->max_wqes = 1 << le16_to_cpu(resp_c->sq_depth); - caps->num_srqs = 1 << roce_get_field(resp_d->wq_hop_num_max_srqs, - V2_QUERY_PF_CAPS_D_NUM_SRQS_M, - V2_QUERY_PF_CAPS_D_NUM_SRQS_S); - caps->cong_type = roce_get_field(resp_d->wq_hop_num_max_srqs, - V2_QUERY_PF_CAPS_D_CONG_TYPE_M, - V2_QUERY_PF_CAPS_D_CONG_TYPE_S); - caps->max_srq_wrs = 1 << le16_to_cpu(resp_d->srq_depth); - caps->ceqe_depth = 1 << roce_get_field(resp_d->num_ceqs_ceq_depth, - V2_QUERY_PF_CAPS_D_CEQ_DEPTH_M, - V2_QUERY_PF_CAPS_D_CEQ_DEPTH_S); - caps->num_comp_vectors = roce_get_field(resp_d->num_ceqs_ceq_depth, - V2_QUERY_PF_CAPS_D_NUM_CEQS_M, - V2_QUERY_PF_CAPS_D_NUM_CEQS_S); - - caps->aeqe_depth = 1 << roce_get_field(resp_d->arm_st_aeq_depth, - V2_QUERY_PF_CAPS_D_AEQ_DEPTH_M, - V2_QUERY_PF_CAPS_D_AEQ_DEPTH_S); - caps->default_aeq_arm_st = roce_get_field(resp_d->arm_st_aeq_depth, - V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_M, - V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_S); - caps->default_ceq_arm_st = roce_get_field(resp_d->arm_st_aeq_depth, - V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_M, - V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_S); - caps->reserved_pds = roce_get_field(resp_d->num_uars_rsv_pds, - V2_QUERY_PF_CAPS_D_RSV_PDS_M, - V2_QUERY_PF_CAPS_D_RSV_PDS_S); - caps->num_uars = 1 << roce_get_field(resp_d->num_uars_rsv_pds, - V2_QUERY_PF_CAPS_D_NUM_UARS_M, - V2_QUERY_PF_CAPS_D_NUM_UARS_S); - caps->reserved_qps = roce_get_field(resp_d->rsv_uars_rsv_qps, - V2_QUERY_PF_CAPS_D_RSV_QPS_M, - V2_QUERY_PF_CAPS_D_RSV_QPS_S); - caps->reserved_uars = roce_get_field(resp_d->rsv_uars_rsv_qps, - V2_QUERY_PF_CAPS_D_RSV_UARS_M, - V2_QUERY_PF_CAPS_D_RSV_UARS_S); - caps->reserved_mrws = roce_get_field(resp_e->chunk_size_shift_rsv_mrws, - V2_QUERY_PF_CAPS_E_RSV_MRWS_M, - V2_QUERY_PF_CAPS_E_RSV_MRWS_S); - caps->chunk_sz = 1 << roce_get_field(resp_e->chunk_size_shift_rsv_mrws, - V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_M, - V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_S); - caps->reserved_cqs = roce_get_field(resp_e->rsv_cqs, - V2_QUERY_PF_CAPS_E_RSV_CQS_M, - V2_QUERY_PF_CAPS_E_RSV_CQS_S); - caps->reserved_srqs = roce_get_field(resp_e->rsv_srqs, - V2_QUERY_PF_CAPS_E_RSV_SRQS_M, - V2_QUERY_PF_CAPS_E_RSV_SRQS_S); - caps->reserved_lkey = roce_get_field(resp_e->rsv_lkey, - V2_QUERY_PF_CAPS_E_RSV_LKEYS_M, - V2_QUERY_PF_CAPS_E_RSV_LKEYS_S); + caps->num_srqs = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_SRQS); + caps->cong_type = hr_reg_read(resp_d, PF_CAPS_D_CONG_TYPE); + caps->max_srq_wrs = 1 << le16_to_cpu(resp_d->srq_depth); + caps->ceqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_CEQ_DEPTH); + caps->num_comp_vectors = hr_reg_read(resp_d, PF_CAPS_D_NUM_CEQS); + caps->aeqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_AEQ_DEPTH); + caps->default_aeq_arm_st = hr_reg_read(resp_d, PF_CAPS_D_AEQ_ARM_ST); + caps->default_ceq_arm_st = hr_reg_read(resp_d, PF_CAPS_D_CEQ_ARM_ST); + caps->reserved_pds = hr_reg_read(resp_d, PF_CAPS_D_RSV_PDS); + caps->num_uars = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_UARS); + caps->reserved_qps = hr_reg_read(resp_d, PF_CAPS_D_RSV_QPS); + caps->reserved_uars = hr_reg_read(resp_d, PF_CAPS_D_RSV_UARS); + + caps->reserved_mrws = hr_reg_read(resp_e, PF_CAPS_E_RSV_MRWS); + caps->chunk_sz = 1 << hr_reg_read(resp_e, PF_CAPS_E_CHUNK_SIZE_SHIFT); + caps->reserved_cqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_CQS); + caps->reserved_srqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_SRQS); + caps->reserved_lkey = hr_reg_read(resp_e, PF_CAPS_E_RSV_LKEYS); caps->default_ceq_max_cnt = le16_to_cpu(resp_e->ceq_max_cnt); caps->default_ceq_period = le16_to_cpu(resp_e->ceq_period); caps->default_aeq_max_cnt = le16_to_cpu(resp_e->aeq_max_cnt); @@ -2355,15 +2338,9 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->cqe_hop_num = pbl_hop_num; caps->srqwqe_hop_num = pbl_hop_num; caps->idx_hop_num = pbl_hop_num; - caps->wqe_sq_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs, - V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_M, - V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_S); - caps->wqe_sge_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs, - V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_M, - V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_S); - caps->wqe_rq_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs, - V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M, - V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S); + caps->wqe_sq_hop_num = hr_reg_read(resp_d, PF_CAPS_D_SQWQE_HOP_NUM); + caps->wqe_sge_hop_num = hr_reg_read(resp_d, PF_CAPS_D_EX_SGE_HOP_NUM); + caps->wqe_rq_hop_num = hr_reg_read(resp_d, PF_CAPS_D_RQWQE_HOP_NUM); return 0; } @@ -2387,7 +2364,7 @@ static int hns_roce_config_entry_size(struct hns_roce_dev *hr_dev) struct hns_roce_caps *caps = &hr_dev->caps; int ret; - if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09) + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) return 0; ret = config_hem_entry_size(hr_dev, HNS_ROCE_CFG_QPC_SIZE, @@ -2654,6 +2631,198 @@ static void free_dip_list(struct hns_roce_dev *hr_dev) spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags); } +static void free_mr_exit(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_v2_free_mr *free_mr = &priv->free_mr; + int ret; + int i; + + for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) { + if (free_mr->rsv_qp[i]) { + ret = ib_destroy_qp(free_mr->rsv_qp[i]); + if (ret) + ibdev_err(&hr_dev->ib_dev, + "failed to destroy qp in free mr.\n"); + + free_mr->rsv_qp[i] = NULL; + } + } + + if (free_mr->rsv_cq) { + ib_destroy_cq(free_mr->rsv_cq); + free_mr->rsv_cq = NULL; + } + + if (free_mr->rsv_pd) { + ib_dealloc_pd(free_mr->rsv_pd); + free_mr->rsv_pd = NULL; + } +} + +static int free_mr_alloc_res(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_v2_free_mr *free_mr = &priv->free_mr; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct ib_cq_init_attr cq_init_attr = {}; + struct ib_qp_init_attr qp_init_attr = {}; + struct ib_pd *pd; + struct ib_cq *cq; + struct ib_qp *qp; + int ret; + int i; + + pd = ib_alloc_pd(ibdev, 0); + if (IS_ERR(pd)) { + ibdev_err(ibdev, "failed to create pd for free mr.\n"); + return PTR_ERR(pd); + } + free_mr->rsv_pd = pd; + + cq_init_attr.cqe = HNS_ROCE_FREE_MR_USED_CQE_NUM; + cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_init_attr); + if (IS_ERR(cq)) { + ibdev_err(ibdev, "failed to create cq for free mr.\n"); + ret = PTR_ERR(cq); + goto create_failed; + } + free_mr->rsv_cq = cq; + + qp_init_attr.qp_type = IB_QPT_RC; + qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + qp_init_attr.send_cq = free_mr->rsv_cq; + qp_init_attr.recv_cq = free_mr->rsv_cq; + for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) { + qp_init_attr.cap.max_send_wr = HNS_ROCE_FREE_MR_USED_SQWQE_NUM; + qp_init_attr.cap.max_send_sge = HNS_ROCE_FREE_MR_USED_SQSGE_NUM; + qp_init_attr.cap.max_recv_wr = HNS_ROCE_FREE_MR_USED_RQWQE_NUM; + qp_init_attr.cap.max_recv_sge = HNS_ROCE_FREE_MR_USED_RQSGE_NUM; + + qp = ib_create_qp(free_mr->rsv_pd, &qp_init_attr); + if (IS_ERR(qp)) { + ibdev_err(ibdev, "failed to create qp for free mr.\n"); + ret = PTR_ERR(qp); + goto create_failed; + } + + free_mr->rsv_qp[i] = qp; + } + + return 0; + +create_failed: + free_mr_exit(hr_dev); + + return ret; +} + +static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev, + struct ib_qp_attr *attr, int sl_num) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_v2_free_mr *free_mr = &priv->free_mr; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_qp *hr_qp; + int loopback; + int mask; + int ret; + + hr_qp = to_hr_qp(free_mr->rsv_qp[sl_num]); + hr_qp->free_mr_en = 1; + + mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS; + attr->qp_state = IB_QPS_INIT; + attr->port_num = 1; + attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE; + ret = ib_modify_qp(&hr_qp->ibqp, attr, mask); + if (ret) { + ibdev_err(ibdev, "failed to modify qp to init, ret = %d.\n", + ret); + return ret; + } + + loopback = hr_dev->loop_idc; + /* Set qpc lbi = 1 incidate loopback IO */ + hr_dev->loop_idc = 1; + + mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | + IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER; + attr->qp_state = IB_QPS_RTR; + attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; + attr->path_mtu = IB_MTU_256; + attr->dest_qp_num = hr_qp->qpn; + attr->rq_psn = HNS_ROCE_FREE_MR_USED_PSN; + + rdma_ah_set_sl(&attr->ah_attr, (u8)sl_num); + + ret = ib_modify_qp(&hr_qp->ibqp, attr, mask); + hr_dev->loop_idc = loopback; + if (ret) { + ibdev_err(ibdev, "failed to modify qp to rtr, ret = %d.\n", + ret); + return ret; + } + + mask = IB_QP_STATE | IB_QP_SQ_PSN | IB_QP_RETRY_CNT | IB_QP_TIMEOUT | + IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC; + attr->qp_state = IB_QPS_RTS; + attr->sq_psn = HNS_ROCE_FREE_MR_USED_PSN; + attr->retry_cnt = HNS_ROCE_FREE_MR_USED_QP_RETRY_CNT; + attr->timeout = HNS_ROCE_FREE_MR_USED_QP_TIMEOUT; + ret = ib_modify_qp(&hr_qp->ibqp, attr, mask); + if (ret) + ibdev_err(ibdev, "failed to modify qp to rts, ret = %d.\n", + ret); + + return ret; +} + +static int free_mr_modify_qp(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_v2_free_mr *free_mr = &priv->free_mr; + struct ib_qp_attr attr = {}; + int ret; + int i; + + rdma_ah_set_grh(&attr.ah_attr, NULL, 0, 0, 1, 0); + rdma_ah_set_static_rate(&attr.ah_attr, 3); + rdma_ah_set_port_num(&attr.ah_attr, 1); + + for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) { + ret = free_mr_modify_rsv_qp(hr_dev, &attr, i); + if (ret) + return ret; + } + + return 0; +} + +static int free_mr_init(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_v2_free_mr *free_mr = &priv->free_mr; + int ret; + + mutex_init(&free_mr->mutex); + + ret = free_mr_alloc_res(hr_dev); + if (ret) + return ret; + + ret = free_mr_modify_qp(hr_dev); + if (ret) + goto err_modify_qp; + + return 0; + +err_modify_qp: + free_mr_exit(hr_dev); + + return ret; +} + static int get_hem_table(struct hns_roce_dev *hr_dev) { unsigned int qpc_count; @@ -2770,21 +2939,21 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) free_dip_list(hr_dev); } -static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev, u64 in_param, - u64 out_param, u32 in_modifier, u8 op_modifier, - u16 op, u16 token, int event) +static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev, + struct hns_roce_mbox_msg *mbox_msg) { struct hns_roce_cmq_desc desc; struct hns_roce_post_mbox *mb = (struct hns_roce_post_mbox *)desc.data; hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_POST_MB, false); - mb->in_param_l = cpu_to_le32(in_param); - mb->in_param_h = cpu_to_le32(in_param >> 32); - mb->out_param_l = cpu_to_le32(out_param); - mb->out_param_h = cpu_to_le32(out_param >> 32); - mb->cmd_tag = cpu_to_le32(in_modifier << 8 | op); - mb->token_event_en = cpu_to_le32(event << 16 | token); + mb->in_param_l = cpu_to_le32(mbox_msg->in_param); + mb->in_param_h = cpu_to_le32(mbox_msg->in_param >> 32); + mb->out_param_l = cpu_to_le32(mbox_msg->out_param); + mb->out_param_h = cpu_to_le32(mbox_msg->out_param >> 32); + mb->cmd_tag = cpu_to_le32(mbox_msg->tag << 8 | mbox_msg->cmd); + mb->token_event_en = cpu_to_le32(mbox_msg->event_en << 16 | + mbox_msg->token); return hns_roce_cmq_send(hr_dev, &desc, 1); } @@ -2802,6 +2971,9 @@ static int v2_wait_mbox_complete(struct hns_roce_dev *hr_dev, u32 timeout, mb_st = (struct hns_roce_mbox_status *)desc.data; end = msecs_to_jiffies(timeout) + jiffies; while (v2_chk_mbox_is_avail(hr_dev, &busy)) { + if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR) + return -EIO; + status = 0; hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST, true); @@ -2837,9 +3009,8 @@ static int v2_wait_mbox_complete(struct hns_roce_dev *hr_dev, u32 timeout, return ret; } -static int v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, - u64 out_param, u32 in_modifier, u8 op_modifier, - u16 op, u16 token, int event) +static int v2_post_mbox(struct hns_roce_dev *hr_dev, + struct hns_roce_mbox_msg *mbox_msg) { u8 status = 0; int ret; @@ -2855,8 +3026,7 @@ static int v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, } /* Post new message to mbox */ - ret = hns_roce_mbox_post(hr_dev, in_param, out_param, in_modifier, - op_modifier, op, token, event); + ret = hns_roce_mbox_post(hr_dev, mbox_msg); if (ret) dev_err_ratelimited(hr_dev->dev, "failed to post mailbox, ret = %d.\n", ret); @@ -2864,12 +3034,13 @@ static int v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, return ret; } -static int v2_poll_mbox_done(struct hns_roce_dev *hr_dev, unsigned int timeout) +static int v2_poll_mbox_done(struct hns_roce_dev *hr_dev) { u8 status = 0; int ret; - ret = v2_wait_mbox_complete(hr_dev, timeout, &status); + ret = v2_wait_mbox_complete(hr_dev, HNS_ROCE_CMD_TIMEOUT_MSECS, + &status); if (!ret) { if (status != MB_ST_COMPLETE_SUCC) return -EBUSY; @@ -2906,10 +3077,8 @@ static int config_sgid_table(struct hns_roce_dev *hr_dev, hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false); - roce_set_field(sgid_tb->table_idx_rsv, CFG_SGID_TB_TABLE_IDX_M, - CFG_SGID_TB_TABLE_IDX_S, gid_index); - roce_set_field(sgid_tb->vf_sgid_type_rsv, CFG_SGID_TB_VF_SGID_TYPE_M, - CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type); + hr_reg_write(sgid_tb, CFG_SGID_TB_TABLE_IDX, gid_index); + hr_reg_write(sgid_tb, CFG_SGID_TB_VF_SGID_TYPE, sgid_type); copy_gid(&sgid_tb->vf_sgid_l, gid); @@ -2944,25 +3113,20 @@ static int config_gmv_table(struct hns_roce_dev *hr_dev, copy_gid(&tb_a->vf_sgid_l, gid); - roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_SGID_TYPE_M, - CFG_GMV_TB_VF_SGID_TYPE_S, sgid_type); - roce_set_bit(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_EN_S, - vlan_id < VLAN_CFI_MASK); - roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_ID_M, - CFG_GMV_TB_VF_VLAN_ID_S, vlan_id); + hr_reg_write(tb_a, GMV_TB_A_VF_SGID_TYPE, sgid_type); + hr_reg_write(tb_a, GMV_TB_A_VF_VLAN_EN, vlan_id < VLAN_CFI_MASK); + hr_reg_write(tb_a, GMV_TB_A_VF_VLAN_ID, vlan_id); tb_b->vf_smac_l = cpu_to_le32(*(u32 *)mac); - roce_set_field(tb_b->vf_smac_h, CFG_GMV_TB_SMAC_H_M, - CFG_GMV_TB_SMAC_H_S, *(u16 *)&mac[4]); - roce_set_field(tb_b->table_idx_rsv, CFG_GMV_TB_SGID_IDX_M, - CFG_GMV_TB_SGID_IDX_S, gid_index); + hr_reg_write(tb_b, GMV_TB_B_SMAC_H, *(u16 *)&mac[4]); + hr_reg_write(tb_b, GMV_TB_B_SGID_IDX, gid_index); return hns_roce_cmq_send(hr_dev, desc, 2); } -static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u32 port, - int gid_index, const union ib_gid *gid, +static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, int gid_index, + const union ib_gid *gid, const struct ib_gid_attr *attr) { enum hns_roce_sgid_type sgid_type = GID_TYPE_FLAG_ROCE_V1; @@ -3005,10 +3169,8 @@ static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, reg_smac_l = *(u32 *)(&addr[0]); reg_smac_h = *(u16 *)(&addr[4]); - roce_set_field(smac_tb->tb_idx_rsv, CFG_SMAC_TB_IDX_M, - CFG_SMAC_TB_IDX_S, phy_port); - roce_set_field(smac_tb->vf_smac_h_rsv, CFG_SMAC_TB_VF_SMAC_H_M, - CFG_SMAC_TB_VF_SMAC_H_S, reg_smac_h); + hr_reg_write(smac_tb, CFG_SMAC_TB_IDX, phy_port); + hr_reg_write(smac_tb, CFG_SMAC_TB_VF_SMAC_H, reg_smac_h); smac_tb->vf_smac_l = cpu_to_le32(reg_smac_l); return hns_roce_cmq_send(hr_dev, &desc, 1); @@ -3037,38 +3199,29 @@ static int set_mtpt_pbl(struct hns_roce_dev *hr_dev, mpt_entry->pbl_size = cpu_to_le32(mr->npages); mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> 3); - roce_set_field(mpt_entry->byte_48_mode_ba, - V2_MPT_BYTE_48_PBL_BA_H_M, V2_MPT_BYTE_48_PBL_BA_H_S, - upper_32_bits(pbl_ba >> 3)); + hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3)); mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0])); - roce_set_field(mpt_entry->byte_56_pa0_h, V2_MPT_BYTE_56_PA0_H_M, - V2_MPT_BYTE_56_PA0_H_S, upper_32_bits(pages[0])); + hr_reg_write(mpt_entry, MPT_PA0_H, upper_32_bits(pages[0])); mpt_entry->pa1_l = cpu_to_le32(lower_32_bits(pages[1])); - roce_set_field(mpt_entry->byte_64_buf_pa1, V2_MPT_BYTE_64_PA1_H_M, - V2_MPT_BYTE_64_PA1_H_S, upper_32_bits(pages[1])); - roce_set_field(mpt_entry->byte_64_buf_pa1, - V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M, - V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, - to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); + hr_reg_write(mpt_entry, MPT_PA1_H, upper_32_bits(pages[1])); + hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ, + to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); return 0; } static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev, - void *mb_buf, struct hns_roce_mr *mr, - unsigned long mtpt_idx) + void *mb_buf, struct hns_roce_mr *mr) { struct hns_roce_v2_mpt_entry *mpt_entry; - int ret; mpt_entry = mb_buf; memset(mpt_entry, 0, sizeof(*mpt_entry)); hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID); hr_reg_write(mpt_entry, MPT_PD, mr->pd); - hr_reg_enable(mpt_entry, MPT_L_INV_EN); hr_reg_write_bool(mpt_entry, MPT_BIND_EN, mr->access & IB_ACCESS_MW_BIND); @@ -3100,9 +3253,7 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev, to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); hr_reg_enable(mpt_entry, MPT_INNER_PA_VLD); - ret = set_mtpt_pbl(hr_dev, mpt_entry, mr); - - return ret; + return set_mtpt_pbl(hr_dev, mpt_entry, mr); } static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, @@ -3113,24 +3264,19 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, u32 mr_access_flags = mr->access; int ret = 0; - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, - V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID); - - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, - V2_MPT_BYTE_4_PD_S, mr->pd); + hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID); + hr_reg_write(mpt_entry, MPT_PD, mr->pd); if (flags & IB_MR_REREG_ACCESS) { - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, - V2_MPT_BYTE_8_BIND_EN_S, + hr_reg_write(mpt_entry, MPT_BIND_EN, (mr_access_flags & IB_ACCESS_MW_BIND ? 1 : 0)); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, - V2_MPT_BYTE_8_ATOMIC_EN_S, + hr_reg_write(mpt_entry, MPT_ATOMIC_EN, mr_access_flags & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S, + hr_reg_write(mpt_entry, MPT_RR_EN, mr_access_flags & IB_ACCESS_REMOTE_READ ? 1 : 0); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S, + hr_reg_write(mpt_entry, MPT_RW_EN, mr_access_flags & IB_ACCESS_REMOTE_WRITE ? 1 : 0); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S, + hr_reg_write(mpt_entry, MPT_LW_EN, mr_access_flags & IB_ACCESS_LOCAL_WRITE ? 1 : 0); } @@ -3161,37 +3307,27 @@ static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev, return -ENOBUFS; } - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, - V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE); - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M, - V2_MPT_BYTE_4_PBL_HOP_NUM_S, 1); - roce_set_field(mpt_entry->byte_4_pd_hop_st, - V2_MPT_BYTE_4_PBL_BA_PG_SZ_M, - V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, - to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, - V2_MPT_BYTE_4_PD_S, mr->pd); + hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_FREE); + hr_reg_write(mpt_entry, MPT_PD, mr->pd); + + hr_reg_enable(mpt_entry, MPT_RA_EN); + hr_reg_enable(mpt_entry, MPT_R_INV_EN); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 1); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1); + hr_reg_enable(mpt_entry, MPT_FRE); + hr_reg_clear(mpt_entry, MPT_MR_MW); + hr_reg_enable(mpt_entry, MPT_BPD); + hr_reg_clear(mpt_entry, MPT_PA); - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_FRE_S, 1); - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0); - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 0); - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1); + hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, 1); + hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ, + to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); + hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ, + to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); mpt_entry->pbl_size = cpu_to_le32(mr->npages); mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(pbl_ba >> 3)); - roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M, - V2_MPT_BYTE_48_PBL_BA_H_S, - upper_32_bits(pbl_ba >> 3)); - - roce_set_field(mpt_entry->byte_64_buf_pa1, - V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M, - V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, - to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); + hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3)); return 0; } @@ -3203,39 +3339,123 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw) mpt_entry = mb_buf; memset(mpt_entry, 0, sizeof(*mpt_entry)); - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, - V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE); - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, - V2_MPT_BYTE_4_PD_S, mw->pdn); - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M, - V2_MPT_BYTE_4_PBL_HOP_NUM_S, - mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : - mw->pbl_hop_num); - roce_set_field(mpt_entry->byte_4_pd_hop_st, - V2_MPT_BYTE_4_PBL_BA_PG_SZ_M, - V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, - mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET); - - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S, 1); - - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0); - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 1); - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1); - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BQP_S, - mw->ibmw.type == IB_MW_TYPE_1 ? 0 : 1); + hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_FREE); + hr_reg_write(mpt_entry, MPT_PD, mw->pdn); + + hr_reg_enable(mpt_entry, MPT_R_INV_EN); + hr_reg_enable(mpt_entry, MPT_LW_EN); - roce_set_field(mpt_entry->byte_64_buf_pa1, - V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M, - V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, - mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET); + hr_reg_enable(mpt_entry, MPT_MR_MW); + hr_reg_enable(mpt_entry, MPT_BPD); + hr_reg_clear(mpt_entry, MPT_PA); + hr_reg_write(mpt_entry, MPT_BQP, + mw->ibmw.type == IB_MW_TYPE_1 ? 0 : 1); mpt_entry->lkey = cpu_to_le32(mw->rkey); + hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, + mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : + mw->pbl_hop_num); + hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ, + mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET); + hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ, + mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET); + return 0; } +static int free_mr_post_send_lp_wqe(struct hns_roce_qp *hr_qp) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); + struct ib_device *ibdev = &hr_dev->ib_dev; + const struct ib_send_wr *bad_wr; + struct ib_rdma_wr rdma_wr = {}; + struct ib_send_wr *send_wr; + int ret; + + send_wr = &rdma_wr.wr; + send_wr->opcode = IB_WR_RDMA_WRITE; + + ret = hns_roce_v2_post_send(&hr_qp->ibqp, send_wr, &bad_wr); + if (ret) { + ibdev_err(ibdev, "failed to post wqe for free mr, ret = %d.\n", + ret); + return ret; + } + + return 0; +} + +static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries, + struct ib_wc *wc); + +static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_v2_free_mr *free_mr = &priv->free_mr; + struct ib_wc wc[ARRAY_SIZE(free_mr->rsv_qp)]; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_qp *hr_qp; + unsigned long end; + int cqe_cnt = 0; + int npolled; + int ret; + int i; + + /* + * If the device initialization is not complete or in the uninstall + * process, then there is no need to execute free mr. + */ + if (priv->handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT || + priv->handle->rinfo.instance_state == HNS_ROCE_STATE_INIT || + hr_dev->state == HNS_ROCE_DEVICE_STATE_UNINIT) + return; + + mutex_lock(&free_mr->mutex); + + for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) { + hr_qp = to_hr_qp(free_mr->rsv_qp[i]); + + ret = free_mr_post_send_lp_wqe(hr_qp); + if (ret) { + ibdev_err(ibdev, + "failed to send wqe (qp:0x%lx) for free mr, ret = %d.\n", + hr_qp->qpn, ret); + break; + } + + cqe_cnt++; + } + + end = msecs_to_jiffies(HNS_ROCE_V2_FREE_MR_TIMEOUT) + jiffies; + while (cqe_cnt) { + npolled = hns_roce_v2_poll_cq(free_mr->rsv_cq, cqe_cnt, wc); + if (npolled < 0) { + ibdev_err(ibdev, + "failed to poll cqe for free mr, remain %d cqe.\n", + cqe_cnt); + goto out; + } + + if (time_after(jiffies, end)) { + ibdev_err(ibdev, + "failed to poll cqe for free mr and timeout, remain %d cqe.\n", + cqe_cnt); + goto out; + } + cqe_cnt -= npolled; + } + +out: + mutex_unlock(&free_mr->mutex); +} + +static void hns_roce_v2_dereg_mr(struct hns_roce_dev *hr_dev) +{ + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) + free_mr_send_cmd_to_hw(hr_dev); +} + static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n) { return hns_roce_buf_offset(hr_cq->mtr.kmem, n * hr_cq->cqe_size); @@ -3571,7 +3791,6 @@ static const u32 wc_send_op_map[] = { HR_WC_OP_MAP(RDMA_READ, RDMA_READ), HR_WC_OP_MAP(RDMA_WRITE, RDMA_WRITE), HR_WC_OP_MAP(RDMA_WRITE_WITH_IMM, RDMA_WRITE), - HR_WC_OP_MAP(LOCAL_INV, LOCAL_INV), HR_WC_OP_MAP(ATOM_CMP_AND_SWAP, COMP_SWAP), HR_WC_OP_MAP(ATOM_FETCH_AND_ADD, FETCH_ADD), HR_WC_OP_MAP(ATOM_MSK_CMP_AND_SWAP, MASKED_COMP_SWAP), @@ -3621,9 +3840,6 @@ static void fill_send_wc(struct ib_wc *wc, struct hns_roce_v2_cqe *cqe) case HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM: wc->wc_flags |= IB_WC_WITH_IMM; break; - case HNS_ROCE_V2_WQE_OP_LOCAL_INV: - wc->wc_flags |= IB_WC_WITH_INVALIDATE; - break; case HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP: case HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD: case HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP: @@ -3809,38 +4025,38 @@ out: } static int get_op_for_set_hem(struct hns_roce_dev *hr_dev, u32 type, - int step_idx, u16 *mbox_op) + u32 step_idx, u8 *mbox_cmd) { - u16 op; + u8 cmd; switch (type) { case HEM_TYPE_QPC: - op = HNS_ROCE_CMD_WRITE_QPC_BT0; + cmd = HNS_ROCE_CMD_WRITE_QPC_BT0; break; case HEM_TYPE_MTPT: - op = HNS_ROCE_CMD_WRITE_MPT_BT0; + cmd = HNS_ROCE_CMD_WRITE_MPT_BT0; break; case HEM_TYPE_CQC: - op = HNS_ROCE_CMD_WRITE_CQC_BT0; + cmd = HNS_ROCE_CMD_WRITE_CQC_BT0; break; case HEM_TYPE_SRQC: - op = HNS_ROCE_CMD_WRITE_SRQC_BT0; + cmd = HNS_ROCE_CMD_WRITE_SRQC_BT0; break; case HEM_TYPE_SCCC: - op = HNS_ROCE_CMD_WRITE_SCCC_BT0; + cmd = HNS_ROCE_CMD_WRITE_SCCC_BT0; break; case HEM_TYPE_QPC_TIMER: - op = HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0; + cmd = HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0; break; case HEM_TYPE_CQC_TIMER: - op = HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0; + cmd = HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0; break; default: dev_warn(hr_dev->dev, "failed to check hem type %u.\n", type); return -EINVAL; } - *mbox_op = op + step_idx; + *mbox_cmd = cmd + step_idx; return 0; } @@ -3863,10 +4079,10 @@ static int config_gmv_ba_to_hw(struct hns_roce_dev *hr_dev, unsigned long obj, } static int set_hem_to_hw(struct hns_roce_dev *hr_dev, int obj, - dma_addr_t base_addr, u32 hem_type, int step_idx) + dma_addr_t base_addr, u32 hem_type, u32 step_idx) { int ret; - u16 op; + u8 cmd; if (unlikely(hem_type == HEM_TYPE_GMV)) return config_gmv_ba_to_hw(hr_dev, obj, base_addr); @@ -3874,16 +4090,16 @@ static int set_hem_to_hw(struct hns_roce_dev *hr_dev, int obj, if (unlikely(hem_type == HEM_TYPE_SCCC && step_idx)) return 0; - ret = get_op_for_set_hem(hr_dev, hem_type, step_idx, &op); + ret = get_op_for_set_hem(hr_dev, hem_type, step_idx, &cmd); if (ret < 0) return ret; - return config_hem_ba_to_hw(hr_dev, obj, base_addr, op); + return config_hem_ba_to_hw(hr_dev, base_addr, cmd, obj); } static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, int obj, - int step_idx) + u32 step_idx) { struct hns_roce_hem_iter iter; struct hns_roce_hem_mhop mhop; @@ -3941,29 +4157,29 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, } static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_table *table, int obj, - int step_idx) + struct hns_roce_hem_table *table, + int tag, u32 step_idx) { - struct device *dev = hr_dev->dev; struct hns_roce_cmd_mailbox *mailbox; + struct device *dev = hr_dev->dev; + u8 cmd = 0xff; int ret; - u16 op = 0xff; if (!hns_roce_check_whether_mhop(hr_dev, table->type)) return 0; switch (table->type) { case HEM_TYPE_QPC: - op = HNS_ROCE_CMD_DESTROY_QPC_BT0; + cmd = HNS_ROCE_CMD_DESTROY_QPC_BT0; break; case HEM_TYPE_MTPT: - op = HNS_ROCE_CMD_DESTROY_MPT_BT0; + cmd = HNS_ROCE_CMD_DESTROY_MPT_BT0; break; case HEM_TYPE_CQC: - op = HNS_ROCE_CMD_DESTROY_CQC_BT0; + cmd = HNS_ROCE_CMD_DESTROY_CQC_BT0; break; case HEM_TYPE_SRQC: - op = HNS_ROCE_CMD_DESTROY_SRQC_BT0; + cmd = HNS_ROCE_CMD_DESTROY_SRQC_BT0; break; case HEM_TYPE_SCCC: case HEM_TYPE_QPC_TIMER: @@ -3976,15 +4192,13 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev, return 0; } - op += step_idx; + cmd += step_idx; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); - /* configure the tag and op */ - ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, obj, 0, op, - HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, cmd, tag); hns_roce_free_cmd_mailbox(hr_dev, mailbox); return ret; @@ -4008,9 +4222,8 @@ static int hns_roce_v2_qp_modify(struct hns_roce_dev *hr_dev, memcpy(mailbox->buf, context, qpc_size); memcpy(mailbox->buf + qpc_size, qpc_mask, qpc_size); - ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0, - HNS_ROCE_CMD_MODIFY_QPC, - HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, + HNS_ROCE_CMD_MODIFY_QPC, hr_qp->qpn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); @@ -4075,7 +4288,6 @@ static inline int get_pdn(struct ib_pd *ib_pd) static void modify_qp_reset_to_init(struct ib_qp *ibqp, const struct ib_qp_attr *attr, - int attr_mask, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { @@ -4139,7 +4351,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, } static void modify_qp_init_to_init(struct ib_qp *ibqp, - const struct ib_qp_attr *attr, int attr_mask, + const struct ib_qp_attr *attr, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { @@ -4388,7 +4600,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, hr_reg_clear(qpc_mask, QPC_DQPN); } - memcpy(&(context->dmac), dmac, sizeof(u32)); + memcpy(&context->dmac, dmac, sizeof(u32)); hr_reg_write(context, QPC_DMAC_H, *((u16 *)(&dmac[4]))); qpc_mask->dmac = 0; hr_reg_clear(qpc_mask, QPC_DMAC_H); @@ -4482,14 +4694,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, return 0; } -static inline u16 get_udp_sport(u32 fl, u32 lqpn, u32 rqpn) -{ - if (!fl) - fl = rdma_calc_flow_label(lqpn, rqpn); - - return rdma_flow_label_to_udp_sport(fl); -} - static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr, u32 *dip_idx) { @@ -4666,6 +4870,18 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, u8 hr_port; int ret; + /* + * If free_mr_en of qp is set, it means that this qp comes from + * free mr. This qp will perform the loopback operation. + * In the loopback scenario, only sl needs to be set. + */ + if (hr_qp->free_mr_en) { + hr_reg_write(context, QPC_SL, rdma_ah_get_sl(&attr->ah_attr)); + hr_reg_clear(qpc_mask, QPC_SL); + hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); + return 0; + } + ib_port = (attr_mask & IB_QP_PORT) ? attr->port_num : hr_qp->port + 1; hr_port = ib_port - 1; is_roce_protocol = rdma_cap_eth_ah(&hr_dev->ib_dev, ib_port) && @@ -4677,9 +4893,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, if (ret) return ret; - if (gid_attr) - is_udp = (gid_attr->gid_type == - IB_GID_TYPE_ROCE_UDP_ENCAP); + is_udp = (gid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP); } /* Only HIP08 needs to set the vlan_en bits in QPC */ @@ -4706,8 +4920,9 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, } hr_reg_write(context, QPC_UDPSPN, - is_udp ? get_udp_sport(grh->flow_label, ibqp->qp_num, - attr->dest_qp_num) : 0); + is_udp ? rdma_get_udp_sport(grh->flow_label, ibqp->qp_num, + attr->dest_qp_num) : + 0); hr_reg_clear(qpc_mask, QPC_UDPSPN); @@ -4733,7 +4948,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); if (unlikely(hr_qp->sl > MAX_SERVICE_LEVEL)) { ibdev_err(ibdev, - "failed to fill QPC, sl (%d) shouldn't be larger than %d.\n", + "failed to fill QPC, sl (%u) shouldn't be larger than %d.\n", hr_qp->sl, MAX_SERVICE_LEVEL); return -EINVAL; } @@ -4762,7 +4977,8 @@ static bool check_qp_state(enum ib_qp_state cur_state, [IB_QPS_ERR] = true }, [IB_QPS_SQD] = {}, [IB_QPS_SQE] = {}, - [IB_QPS_ERR] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true } + [IB_QPS_ERR] = { [IB_QPS_RESET] = true, + [IB_QPS_ERR] = true } }; return sm[cur_state][new_state]; @@ -4786,11 +5002,9 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { memset(qpc_mask, 0, hr_dev->caps.qpc_sz); - modify_qp_reset_to_init(ibqp, attr, attr_mask, context, - qpc_mask); + modify_qp_reset_to_init(ibqp, attr, context, qpc_mask); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { - modify_qp_init_to_init(ibqp, attr, attr_mask, context, - qpc_mask); + modify_qp_init_to_init(ibqp, attr, context, qpc_mask); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { ret = modify_qp_init_to_rtr(ibqp, attr, attr_mask, context, qpc_mask); @@ -4802,6 +5016,30 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, return ret; } +static bool check_qp_timeout_cfg_range(struct hns_roce_dev *hr_dev, u8 *timeout) +{ +#define QP_ACK_TIMEOUT_MAX_HIP08 20 +#define QP_ACK_TIMEOUT_OFFSET 10 +#define QP_ACK_TIMEOUT_MAX 31 + + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { + if (*timeout > QP_ACK_TIMEOUT_MAX_HIP08) { + ibdev_warn(&hr_dev->ib_dev, + "local ACK timeout shall be 0 to 20.\n"); + return false; + } + *timeout += QP_ACK_TIMEOUT_OFFSET; + } else if (hr_dev->pci_dev->revision > PCI_REVISION_ID_HIP08) { + if (*timeout > QP_ACK_TIMEOUT_MAX) { + ibdev_warn(&hr_dev->ib_dev, + "local ACK timeout shall be 0 to 31.\n"); + return false; + } + } + + return true; +} + static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, @@ -4811,6 +5049,7 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); int ret = 0; + u8 timeout; if (attr_mask & IB_QP_AV) { ret = hns_roce_v2_set_path(ibqp, attr, attr_mask, context, @@ -4820,12 +5059,10 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, } if (attr_mask & IB_QP_TIMEOUT) { - if (attr->timeout < 31) { - hr_reg_write(context, QPC_AT, attr->timeout); + timeout = attr->timeout; + if (check_qp_timeout_cfg_range(hr_dev, &timeout)) { + hr_reg_write(context, QPC_AT, timeout); hr_reg_clear(qpc_mask, QPC_AT); - } else { - ibdev_warn(&hr_dev->ib_dev, - "Local ACK timeout shall be 0 to 30.\n"); } } @@ -4882,7 +5119,9 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask); if (attr_mask & IB_QP_MIN_RNR_TIMER) { - hr_reg_write(context, QPC_MIN_RNR_TIME, attr->min_rnr_timer); + hr_reg_write(context, QPC_MIN_RNR_TIME, + hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 ? + HNS_ROCE_RNR_TIMER_10NS : attr->min_rnr_timer); hr_reg_clear(qpc_mask, QPC_MIN_RNR_TIME); } @@ -5053,9 +5292,8 @@ static int to_ib_qp_st(enum hns_roce_v2_qp_state state) return (state < ARRAY_SIZE(map)) ? map[state] : -1; } -static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - struct hns_roce_v2_qp_context *hr_context) +static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev, u32 qpn, + void *buffer) { struct hns_roce_cmd_mailbox *mailbox; int ret; @@ -5064,13 +5302,12 @@ static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev, if (IS_ERR(mailbox)) return PTR_ERR(mailbox); - ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, hr_qp->qpn, 0, - HNS_ROCE_CMD_QUERY_QPC, - HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_QPC, + qpn); if (ret) goto out; - memcpy(hr_context, mailbox->buf, hr_dev->caps.qpc_sz); + memcpy(buffer, mailbox->buf, hr_dev->caps.qpc_sz); out: hns_roce_free_cmd_mailbox(hr_dev, mailbox); @@ -5100,7 +5337,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, goto done; } - ret = hns_roce_v2_query_qpc(hr_dev, hr_qp, &context); + ret = hns_roce_v2_query_qpc(hr_dev, hr_qp->qpn, &context); if (ret) { ibdev_err(ibdev, "failed to query QPC, ret = %d.\n", ret); ret = -EINVAL; @@ -5298,7 +5535,7 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, msleep(20); } - ibdev_err(ibdev, "Query SCC clr done flag overtime.\n"); + ibdev_err(ibdev, "query SCC clr done flag overtime.\n"); ret = -ETIMEDOUT; out: @@ -5432,9 +5669,8 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, hr_reg_write(srq_context, SRQC_LIMIT_WL, srq_attr->srq_limit); hr_reg_clear(srqc_mask, SRQC_LIMIT_WL); - ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, srq->srqn, 0, - HNS_ROCE_CMD_MODIFY_SRQC, - HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, + HNS_ROCE_CMD_MODIFY_SRQC, srq->srqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { ibdev_err(&hr_dev->ib_dev, @@ -5460,9 +5696,8 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) return PTR_ERR(mailbox); srq_context = mailbox->buf; - ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, srq->srqn, 0, - HNS_ROCE_CMD_QUERY_SRQC, - HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, + HNS_ROCE_CMD_QUERY_SRQC, srq->srqn); if (ret) { ibdev_err(&hr_dev->ib_dev, "failed to process cmd of querying SRQ, ret = %d.\n", @@ -5499,12 +5734,21 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) hr_reg_write(cq_context, CQC_CQ_MAX_CNT, cq_count); hr_reg_clear(cqc_mask, CQC_CQ_MAX_CNT); + + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { + if (cq_period * HNS_ROCE_CLOCK_ADJUST > USHRT_MAX) { + dev_info(hr_dev->dev, + "cq_period(%u) reached the upper limit, adjusted to 65.\n", + cq_period); + cq_period = HNS_ROCE_MAX_CQ_PERIOD; + } + cq_period *= HNS_ROCE_CLOCK_ADJUST; + } hr_reg_write(cq_context, CQC_CQ_PERIOD, cq_period); hr_reg_clear(cqc_mask, CQC_CQ_PERIOD); - ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_cq->cqn, 1, - HNS_ROCE_CMD_MODIFY_CQC, - HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, + HNS_ROCE_CMD_MODIFY_CQC, hr_cq->cqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) ibdev_err(&hr_dev->ib_dev, @@ -5514,6 +5758,64 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) return ret; } +static int hns_roce_v2_query_cqc(struct hns_roce_dev *hr_dev, u32 cqn, + void *buffer) +{ + struct hns_roce_v2_cq_context *context; + struct hns_roce_cmd_mailbox *mailbox; + int ret; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + context = mailbox->buf; + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, + HNS_ROCE_CMD_QUERY_CQC, cqn); + if (ret) { + ibdev_err(&hr_dev->ib_dev, + "failed to process cmd when querying CQ, ret = %d.\n", + ret); + goto err_mailbox; + } + + memcpy(buffer, context, sizeof(*context)); + +err_mailbox: + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + + return ret; +} + +static int hns_roce_v2_query_mpt(struct hns_roce_dev *hr_dev, u32 key, + void *buffer) +{ + struct hns_roce_v2_mpt_entry *context; + struct hns_roce_cmd_mailbox *mailbox; + int ret; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + context = mailbox->buf; + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_MPT, + key_to_hw_index(key)); + if (ret) { + ibdev_err(&hr_dev->ib_dev, + "failed to process cmd when querying MPT, ret = %d.\n", + ret); + goto err_mailbox; + } + + memcpy(buffer, context, sizeof(*context)); + +err_mailbox: + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + + return ret; +} + static void hns_roce_irq_work_handle(struct work_struct *work) { struct hns_roce_work *irq_work = @@ -5522,26 +5824,26 @@ static void hns_roce_irq_work_handle(struct work_struct *work) switch (irq_work->event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: - ibdev_info(ibdev, "Path migrated succeeded.\n"); + ibdev_info(ibdev, "path migrated succeeded.\n"); break; case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: - ibdev_warn(ibdev, "Path migration failed.\n"); + ibdev_warn(ibdev, "path migration failed.\n"); break; case HNS_ROCE_EVENT_TYPE_COMM_EST: break; case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: - ibdev_warn(ibdev, "Send queue drained.\n"); + ibdev_warn(ibdev, "send queue drained.\n"); break; case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: - ibdev_err(ibdev, "Local work queue 0x%x catast error, sub_event type is: %d\n", + ibdev_err(ibdev, "local work queue 0x%x catast error, sub_event type is: %d\n", irq_work->queue_num, irq_work->sub_type); break; case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: - ibdev_err(ibdev, "Invalid request local work queue 0x%x error.\n", + ibdev_err(ibdev, "invalid request local work queue 0x%x error.\n", irq_work->queue_num); break; case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: - ibdev_err(ibdev, "Local access violation work queue 0x%x error, sub_event type is: %d\n", + ibdev_err(ibdev, "local access violation work queue 0x%x error, sub_event type is: %d\n", irq_work->queue_num, irq_work->sub_type); break; case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: @@ -5563,7 +5865,7 @@ static void hns_roce_irq_work_handle(struct work_struct *work) ibdev_warn(ibdev, "DB overflow.\n"); break; case HNS_ROCE_EVENT_TYPE_FLR: - ibdev_warn(ibdev, "Function level reset.\n"); + ibdev_warn(ibdev, "function level reset.\n"); break; case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION: ibdev_err(ibdev, "xrc domain violation error.\n"); @@ -5587,12 +5889,12 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, if (!irq_work) return; - INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle); + INIT_WORK(&irq_work->work, hns_roce_irq_work_handle); irq_work->hr_dev = hr_dev; irq_work->event_type = eq->event_type; irq_work->sub_type = eq->sub_type; irq_work->queue_num = queue_num; - queue_work(hr_dev->irq_workq, &(irq_work->work)); + queue_work(hr_dev->irq_workq, &irq_work->work); } static void update_eq_db(struct hns_roce_eq *eq) @@ -5627,16 +5929,16 @@ static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq) (eq->cons_index & (eq->entries - 1)) * eq->eqe_size); - return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^ + return (hr_reg_read(aeqe, AEQE_OWNER) ^ !!(eq->cons_index & eq->entries)) ? aeqe : NULL; } -static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, - struct hns_roce_eq *eq) +static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, + struct hns_roce_eq *eq) { struct device *dev = hr_dev->dev; struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq); - int aeqe_found = 0; + irqreturn_t aeqe_found = IRQ_NONE; int event_type; u32 queue_num; int sub_type; @@ -5647,15 +5949,9 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, */ dma_rmb(); - event_type = roce_get_field(aeqe->asyn, - HNS_ROCE_V2_AEQE_EVENT_TYPE_M, - HNS_ROCE_V2_AEQE_EVENT_TYPE_S); - sub_type = roce_get_field(aeqe->asyn, - HNS_ROCE_V2_AEQE_SUB_TYPE_M, - HNS_ROCE_V2_AEQE_SUB_TYPE_S); - queue_num = roce_get_field(aeqe->event.queue_event.num, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); + event_type = hr_reg_read(aeqe, AEQE_EVENT_TYPE); + sub_type = hr_reg_read(aeqe, AEQE_SUB_TYPE); + queue_num = hr_reg_read(aeqe, AEQE_EVENT_QUEUE_NUM); switch (event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: @@ -5688,7 +5984,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, case HNS_ROCE_EVENT_TYPE_FLR: break; default: - dev_err(dev, "Unhandled event %d on EQ %d at idx %u.\n", + dev_err(dev, "unhandled event %d on EQ %d at idx %u.\n", event_type, eq->eqn, eq->cons_index); break; } @@ -5696,7 +5992,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, eq->event_type = event_type; eq->sub_type = sub_type; ++eq->cons_index; - aeqe_found = 1; + aeqe_found = IRQ_HANDLED; hns_roce_v2_init_irq_work(hr_dev, eq, queue_num); @@ -5704,7 +6000,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, } update_eq_db(eq); - return aeqe_found; + + return IRQ_RETVAL(aeqe_found); } static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq) @@ -5715,15 +6012,15 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq) (eq->cons_index & (eq->entries - 1)) * eq->eqe_size); - return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^ - (!!(eq->cons_index & eq->entries)) ? ceqe : NULL; + return (hr_reg_read(ceqe, CEQE_OWNER) ^ + !!(eq->cons_index & eq->entries)) ? ceqe : NULL; } -static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev, - struct hns_roce_eq *eq) +static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev, + struct hns_roce_eq *eq) { struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq); - int ceqe_found = 0; + irqreturn_t ceqe_found = IRQ_NONE; u32 cqn; while (ceqe) { @@ -5732,59 +6029,53 @@ static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev, */ dma_rmb(); - cqn = roce_get_field(ceqe->comp, HNS_ROCE_V2_CEQE_COMP_CQN_M, - HNS_ROCE_V2_CEQE_COMP_CQN_S); + cqn = hr_reg_read(ceqe, CEQE_CQN); hns_roce_cq_completion(hr_dev, cqn); ++eq->cons_index; - ceqe_found = 1; + ceqe_found = IRQ_HANDLED; ceqe = next_ceqe_sw_v2(eq); } update_eq_db(eq); - return ceqe_found; + return IRQ_RETVAL(ceqe_found); } static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr) { struct hns_roce_eq *eq = eq_ptr; struct hns_roce_dev *hr_dev = eq->hr_dev; - int int_work; + irqreturn_t int_work; if (eq->type_flag == HNS_ROCE_CEQ) /* Completion event interrupt */ int_work = hns_roce_v2_ceq_int(hr_dev, eq); else - /* Asychronous event interrupt */ + /* Asynchronous event interrupt */ int_work = hns_roce_v2_aeq_int(hr_dev, eq); return IRQ_RETVAL(int_work); } -static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id) +static irqreturn_t abnormal_interrupt_basic(struct hns_roce_dev *hr_dev, + u32 int_st) { - struct hns_roce_dev *hr_dev = dev_id; - struct device *dev = hr_dev->dev; - int int_work = 0; - u32 int_st; + struct pci_dev *pdev = hr_dev->pci_dev; + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev); + const struct hnae3_ae_ops *ops = ae_dev->ops; + irqreturn_t int_work = IRQ_NONE; u32 int_en; - /* Abnormal interrupt */ - int_st = roce_read(hr_dev, ROCEE_VF_ABN_INT_ST_REG); int_en = roce_read(hr_dev, ROCEE_VF_ABN_INT_EN_REG); if (int_st & BIT(HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S)) { - struct pci_dev *pdev = hr_dev->pci_dev; - struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev); - const struct hnae3_ae_ops *ops = ae_dev->ops; + dev_err(hr_dev->dev, "AEQ overflow!\n"); - dev_err(dev, "AEQ overflow!\n"); - - int_st |= 1 << HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S; - roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st); + roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, + 1 << HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S); /* Set reset level for reset_event() */ if (ops->set_default_reset_request) @@ -5796,19 +6087,165 @@ static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id) int_en |= 1 << HNS_ROCE_V2_VF_ABN_INT_EN_S; roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en); - int_work = 1; - } else if (int_st & BIT(HNS_ROCE_V2_VF_INT_ST_RAS_INT_S)) { - dev_err(dev, "RAS interrupt!\n"); + int_work = IRQ_HANDLED; + } else { + dev_err(hr_dev->dev, "there is no basic abn irq found.\n"); + } - int_st |= 1 << HNS_ROCE_V2_VF_INT_ST_RAS_INT_S; - roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st); + return IRQ_RETVAL(int_work); +} - int_en |= 1 << HNS_ROCE_V2_VF_ABN_INT_EN_S; - roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en); +static int fmea_ram_ecc_query(struct hns_roce_dev *hr_dev, + struct fmea_ram_ecc *ecc_info) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data; + int ret; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_QUERY_RAM_ECC, true); + ret = hns_roce_cmq_send(hr_dev, &desc, 1); + if (ret) + return ret; + + ecc_info->is_ecc_err = hr_reg_read(req, QUERY_RAM_ECC_1BIT_ERR); + ecc_info->res_type = hr_reg_read(req, QUERY_RAM_ECC_RES_TYPE); + ecc_info->index = hr_reg_read(req, QUERY_RAM_ECC_TAG); + + return 0; +} + +static int fmea_recover_gmv(struct hns_roce_dev *hr_dev, u32 idx) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data; + u32 addr_upper; + u32 addr_low; + int ret; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GMV_BT, true); + hr_reg_write(req, CFG_GMV_BT_IDX, idx); + + ret = hns_roce_cmq_send(hr_dev, &desc, 1); + if (ret) { + dev_err(hr_dev->dev, + "failed to execute cmd to read gmv, ret = %d.\n", ret); + return ret; + } + + addr_low = hr_reg_read(req, CFG_GMV_BT_BA_L); + addr_upper = hr_reg_read(req, CFG_GMV_BT_BA_H); + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GMV_BT, false); + hr_reg_write(req, CFG_GMV_BT_BA_L, addr_low); + hr_reg_write(req, CFG_GMV_BT_BA_H, addr_upper); + hr_reg_write(req, CFG_GMV_BT_IDX, idx); + + return hns_roce_cmq_send(hr_dev, &desc, 1); +} + +static u64 fmea_get_ram_res_addr(u32 res_type, __le64 *data) +{ + if (res_type == ECC_RESOURCE_QPC_TIMER || + res_type == ECC_RESOURCE_CQC_TIMER || + res_type == ECC_RESOURCE_SCCC) + return le64_to_cpu(*data); + + return le64_to_cpu(*data) << PAGE_SHIFT; +} + +static int fmea_recover_others(struct hns_roce_dev *hr_dev, u32 res_type, + u32 index) +{ + u8 write_bt0_op = fmea_ram_res[res_type].write_bt0_op; + u8 read_bt0_op = fmea_ram_res[res_type].read_bt0_op; + struct hns_roce_cmd_mailbox *mailbox; + u64 addr; + int ret; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, read_bt0_op, index); + if (ret) { + dev_err(hr_dev->dev, + "failed to execute cmd to read fmea ram, ret = %d.\n", + ret); + goto out; + } + + addr = fmea_get_ram_res_addr(res_type, mailbox->buf); + + ret = hns_roce_cmd_mbox(hr_dev, addr, 0, write_bt0_op, index); + if (ret) + dev_err(hr_dev->dev, + "failed to execute cmd to write fmea ram, ret = %d.\n", + ret); + +out: + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + return ret; +} + +static void fmea_ram_ecc_recover(struct hns_roce_dev *hr_dev, + struct fmea_ram_ecc *ecc_info) +{ + u32 res_type = ecc_info->res_type; + u32 index = ecc_info->index; + int ret; + + BUILD_BUG_ON(ARRAY_SIZE(fmea_ram_res) != ECC_RESOURCE_COUNT); - int_work = 1; + if (res_type >= ECC_RESOURCE_COUNT) { + dev_err(hr_dev->dev, "unsupported fmea ram ecc type %u.\n", + res_type); + return; + } + + if (res_type == ECC_RESOURCE_GMV) + ret = fmea_recover_gmv(hr_dev, index); + else + ret = fmea_recover_others(hr_dev, res_type, index); + if (ret) + dev_err(hr_dev->dev, + "failed to recover %s, index = %u, ret = %d.\n", + fmea_ram_res[res_type].name, index, ret); +} + +static void fmea_ram_ecc_work(struct work_struct *ecc_work) +{ + struct hns_roce_dev *hr_dev = + container_of(ecc_work, struct hns_roce_dev, ecc_work); + struct fmea_ram_ecc ecc_info = {}; + + if (fmea_ram_ecc_query(hr_dev, &ecc_info)) { + dev_err(hr_dev->dev, "failed to query fmea ram ecc.\n"); + return; + } + + if (!ecc_info.is_ecc_err) { + dev_err(hr_dev->dev, "there is no fmea ram ecc err found.\n"); + return; + } + + fmea_ram_ecc_recover(hr_dev, &ecc_info); +} + +static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id) +{ + struct hns_roce_dev *hr_dev = dev_id; + irqreturn_t int_work = IRQ_NONE; + u32 int_st; + + int_st = roce_read(hr_dev, ROCEE_VF_ABN_INT_ST_REG); + + if (int_st) { + int_work = abnormal_interrupt_basic(hr_dev, int_st); + } else if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { + queue_work(hr_dev->irq_workq, &hr_dev->ecc_work); + int_work = IRQ_HANDLED; } else { - dev_err(dev, "There is no abnormal irq found!\n"); + dev_err(hr_dev->dev, "there is no abnormal irq found.\n"); } return IRQ_RETVAL(int_work); @@ -5827,21 +6264,20 @@ static void hns_roce_v2_int_mask_enable(struct hns_roce_dev *hr_dev, roce_write(hr_dev, ROCEE_VF_ABN_INT_CFG_REG, enable_flag); } -static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, int eqn) +static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, u32 eqn) { struct device *dev = hr_dev->dev; int ret; + u8 cmd; if (eqn < hr_dev->caps.num_comp_vectors) - ret = hns_roce_cmd_mbox(hr_dev, 0, 0, eqn & HNS_ROCE_V2_EQN_M, - 0, HNS_ROCE_CMD_DESTROY_CEQC, - HNS_ROCE_CMD_TIMEOUT_MSECS); + cmd = HNS_ROCE_CMD_DESTROY_CEQC; else - ret = hns_roce_cmd_mbox(hr_dev, 0, 0, eqn & HNS_ROCE_V2_EQN_M, - 0, HNS_ROCE_CMD_DESTROY_AEQC, - HNS_ROCE_CMD_TIMEOUT_MSECS); + cmd = HNS_ROCE_CMD_DESTROY_AEQC; + + ret = hns_roce_destroy_hw_ctx(hr_dev, cmd, eqn & HNS_ROCE_V2_EQN_M); if (ret) - dev_err(dev, "[mailbox cmd] destroy eqc(%d) failed.\n", eqn); + dev_err(dev, "[mailbox cmd] destroy eqc(%u) failed.\n", eqn); } static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) @@ -5894,6 +6330,15 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, hr_reg_write(eqc, EQC_EQ_PROD_INDX, HNS_ROCE_EQ_INIT_PROD_IDX); hr_reg_write(eqc, EQC_EQ_MAX_CNT, eq->eq_max_cnt); + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { + if (eq->eq_period * HNS_ROCE_CLOCK_ADJUST > USHRT_MAX) { + dev_info(hr_dev->dev, "eq_period(%u) reached the upper limit, adjusted to 65.\n", + eq->eq_period); + eq->eq_period = HNS_ROCE_MAX_EQ_PERIOD; + } + eq->eq_period *= HNS_ROCE_CLOCK_ADJUST; + } + hr_reg_write(eqc, EQC_EQ_PERIOD, eq->eq_period); hr_reg_write(eqc, EQC_EQE_REPORT_TIMER, HNS_ROCE_EQ_INIT_REPORT_TIMER); hr_reg_write(eqc, EQC_EQE_BA_L, bt_ba >> 3); @@ -5930,22 +6375,21 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT, NULL, 0); if (err) - dev_err(hr_dev->dev, "Failed to alloc EQE mtr, err %d\n", err); + dev_err(hr_dev->dev, "failed to alloc EQE mtr, err %d\n", err); return err; } static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, - struct hns_roce_eq *eq, - unsigned int eq_cmd) + struct hns_roce_eq *eq, u8 eq_cmd) { struct hns_roce_cmd_mailbox *mailbox; int ret; /* Allocate mailbox memory */ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR_OR_NULL(mailbox)) - return -ENOMEM; + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); ret = alloc_eq_buf(hr_dev, eq); if (ret) @@ -5955,8 +6399,7 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, if (ret) goto err_cmd_mbox; - ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, eq->eqn, 0, - eq_cmd, HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = hns_roce_create_hw_ctx(hr_dev, mailbox, eq_cmd, eq->eqn); if (ret) { dev_err(hr_dev->dev, "[mailbox cmd] create eqc failed.\n"); goto err_cmd_mbox; @@ -6021,7 +6464,7 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num, 0, hr_dev->irq_names[j - comp_num], &eq_table->eq[j - other_num]); if (ret) { - dev_err(hr_dev->dev, "Request irq error!\n"); + dev_err(hr_dev->dev, "request irq error!\n"); goto err_request_failed; } } @@ -6067,14 +6510,14 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev) struct hns_roce_eq_table *eq_table = &hr_dev->eq_table; struct device *dev = hr_dev->dev; struct hns_roce_eq *eq; - unsigned int eq_cmd; - int irq_num; - int eq_num; int other_num; int comp_num; int aeq_num; - int i; + int irq_num; + int eq_num; + u8 eq_cmd; int ret; + int i; other_num = hr_dev->caps.num_other_vectors; comp_num = hr_dev->caps.num_comp_vectors; @@ -6119,6 +6562,8 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev) } } + INIT_WORK(&hr_dev->ecc_work, fmea_ram_ecc_work); + hr_dev->irq_workq = alloc_ordered_workqueue("hns_roce_irq_workq", 0); if (!hr_dev->irq_workq) { dev_err(dev, "failed to create irq workqueue.\n"); @@ -6172,10 +6617,6 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev) kfree(eq_table->eq); } -static const struct hns_roce_dfx_hw hns_roce_dfx_hw_v2 = { - .query_cqc_info = hns_roce_v2_query_cqc_info, -}; - static const struct ib_device_ops hns_roce_v2_dev_ops = { .destroy_qp = hns_roce_v2_destroy_qp, .modify_cq = hns_roce_v2_modify_cq, @@ -6211,10 +6652,14 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .set_hem = hns_roce_v2_set_hem, .clear_hem = hns_roce_v2_clear_hem, .modify_qp = hns_roce_v2_modify_qp, + .dereg_mr = hns_roce_v2_dereg_mr, .qp_flow_control_init = hns_roce_v2_qp_flow_control_init, .init_eq = hns_roce_v2_init_eq_table, .cleanup_eq = hns_roce_v2_cleanup_eq_table, .write_srqc = hns_roce_v2_write_srqc, + .query_cqc = hns_roce_v2_query_cqc, + .query_qpc = hns_roce_v2_query_qpc, + .query_mpt = hns_roce_v2_query_mpt, .hns_roce_dev_ops = &hns_roce_v2_dev_ops, .hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops, }; @@ -6246,7 +6691,6 @@ static void hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, hr_dev->is_vf = id->driver_data; hr_dev->dev = &handle->pdev->dev; hr_dev->hw = &hns_roce_hw_v2; - hr_dev->dfx = &hns_roce_dfx_hw_v2; hr_dev->sdb_offset = ROCEE_DB_SQ_L_0_REG; hr_dev->odb_offset = hr_dev->sdb_offset; @@ -6292,14 +6736,25 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) ret = hns_roce_init(hr_dev); if (ret) { dev_err(hr_dev->dev, "RoCE Engine init failed!\n"); - goto error_failed_get_cfg; + goto error_failed_cfg; + } + + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { + ret = free_mr_init(hr_dev); + if (ret) { + dev_err(hr_dev->dev, "failed to init free mr!\n"); + goto error_failed_roce_init; + } } handle->priv = hr_dev; return 0; -error_failed_get_cfg: +error_failed_roce_init: + hns_roce_exit(hr_dev); + +error_failed_cfg: kfree(hr_dev->priv); error_failed_kzalloc: @@ -6321,6 +6776,9 @@ static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, hr_dev->state = HNS_ROCE_DEVICE_STATE_UNINIT; hns_roce_handle_device_err(hr_dev); + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) + free_mr_exit(hr_dev); + hns_roce_exit(hr_dev); kfree(hr_dev->priv); ib_dealloc_device(&hr_dev->ib_dev); @@ -6344,7 +6802,7 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) if (!id) return 0; - if (id->driver_data && handle->pdev->revision < PCI_REVISION_ID_HIP09) + if (id->driver_data && handle->pdev->revision == PCI_REVISION_ID_HIP08) return 0; ret = __hns_roce_hw_v2_init_instance(handle); @@ -6428,7 +6886,7 @@ static int hns_roce_hw_v2_reset_notify_init(struct hnae3_handle *handle) dev_err(dev, "In reset process RoCE reinit failed %d.\n", ret); } else { handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INITED; - dev_info(dev, "Reset done, RoCE client reinit finished.\n"); + dev_info(dev, "reset done, RoCE client reinit finished.\n"); } return ret; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 4d904d5e82be..c7bf2d52c1cd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -35,41 +35,25 @@ #include <linux/bitops.h> -#define HNS_ROCE_VF_QPC_BT_NUM 256 -#define HNS_ROCE_VF_SCCC_BT_NUM 64 -#define HNS_ROCE_VF_SRQC_BT_NUM 64 -#define HNS_ROCE_VF_CQC_BT_NUM 64 -#define HNS_ROCE_VF_MPT_BT_NUM 64 -#define HNS_ROCE_VF_SMAC_NUM 32 -#define HNS_ROCE_VF_SL_NUM 8 -#define HNS_ROCE_VF_GMV_BT_NUM 256 - #define HNS_ROCE_V2_MAX_QP_NUM 0x1000 -#define HNS_ROCE_V2_MAX_QPC_TIMER_NUM 0x200 #define HNS_ROCE_V2_MAX_WQE_NUM 0x8000 -#define HNS_ROCE_V2_MAX_SRQ 0x100000 #define HNS_ROCE_V2_MAX_SRQ_WR 0x8000 #define HNS_ROCE_V2_MAX_SRQ_SGE 64 #define HNS_ROCE_V2_MAX_CQ_NUM 0x100000 -#define HNS_ROCE_V2_MAX_CQC_TIMER_NUM 0x100 +#define HNS_ROCE_V2_MAX_QPC_TIMER_BT_NUM 0x100 +#define HNS_ROCE_V2_MAX_CQC_TIMER_BT_NUM 0x100 #define HNS_ROCE_V2_MAX_SRQ_NUM 0x100000 #define HNS_ROCE_V2_MAX_CQE_NUM 0x400000 -#define HNS_ROCE_V2_MAX_SRQWQE_NUM 0x8000 #define HNS_ROCE_V2_MAX_RQ_SGE_NUM 64 #define HNS_ROCE_V2_MAX_SQ_SGE_NUM 64 -#define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000 #define HNS_ROCE_V2_MAX_SQ_INLINE 0x20 #define HNS_ROCE_V3_MAX_SQ_INLINE 0x400 #define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32 #define HNS_ROCE_V2_UAR_NUM 256 #define HNS_ROCE_V2_PHY_UAR_NUM 1 -#define HNS_ROCE_V2_MAX_IRQ_NUM 65 -#define HNS_ROCE_V2_COMP_VEC_NUM 63 #define HNS_ROCE_V2_AEQE_VEC_NUM 1 #define HNS_ROCE_V2_ABNORMAL_VEC_NUM 1 #define HNS_ROCE_V2_MAX_MTPT_NUM 0x100000 -#define HNS_ROCE_V2_MAX_MTT_SEGS 0x1000000 -#define HNS_ROCE_V2_MAX_CQE_SEGS 0x1000000 #define HNS_ROCE_V2_MAX_SRQWQE_SEGS 0x1000000 #define HNS_ROCE_V2_MAX_IDX_SEGS 0x1000000 #define HNS_ROCE_V2_MAX_PD_NUM 0x1000000 @@ -79,9 +63,7 @@ #define HNS_ROCE_V2_MAX_QP_DEST_RDMA 128 #define HNS_ROCE_V2_MAX_SQ_DESC_SZ 64 #define HNS_ROCE_V2_MAX_RQ_DESC_SZ 16 -#define HNS_ROCE_V2_MAX_SRQ_DESC_SZ 64 #define HNS_ROCE_V2_IRRL_ENTRY_SZ 64 -#define HNS_ROCE_V2_TRRL_ENTRY_SZ 48 #define HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ 100 #define HNS_ROCE_V2_CQC_ENTRY_SZ 64 #define HNS_ROCE_V2_SRQC_ENTRY_SZ 64 @@ -98,12 +80,11 @@ #define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE -#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 +#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFF000 #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2 #define HNS_ROCE_INVALID_LKEY 0x0 #define HNS_ROCE_INVALID_SGE_LENGTH 0x80000000 #define HNS_ROCE_CMQ_TX_TIMEOUT 30000 -#define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2 #define HNS_ROCE_V2_RSV_QPS 8 #define HNS_ROCE_V2_HW_RST_TIMEOUT 1000 @@ -117,12 +98,14 @@ #define HNS_ROCE_CQE_HOP_NUM 1 #define HNS_ROCE_SRQWQE_HOP_NUM 1 #define HNS_ROCE_PBL_HOP_NUM 2 -#define HNS_ROCE_EQE_HOP_NUM 2 #define HNS_ROCE_IDX_HOP_NUM 1 #define HNS_ROCE_SQWQE_HOP_NUM 2 #define HNS_ROCE_EXT_SGE_HOP_NUM 1 #define HNS_ROCE_RQWQE_HOP_NUM 2 +#define HNS_ROCE_V2_EQE_HOP_NUM 2 +#define HNS_ROCE_V3_EQE_HOP_NUM 1 + #define HNS_ROCE_BA_PG_SZ_SUPPORTED_256K 6 #define HNS_ROCE_BA_PG_SZ_SUPPORTED_16K 2 #define HNS_ROCE_V2_GID_INDEX_NUM 16 @@ -153,6 +136,18 @@ enum { #define CMD_CSQ_DESC_NUM 1024 #define CMD_CRQ_DESC_NUM 1024 +/* Free mr used parameters */ +#define HNS_ROCE_FREE_MR_USED_CQE_NUM 128 +#define HNS_ROCE_FREE_MR_USED_QP_NUM 0x8 +#define HNS_ROCE_FREE_MR_USED_PSN 0x0808 +#define HNS_ROCE_FREE_MR_USED_QP_RETRY_CNT 0x7 +#define HNS_ROCE_FREE_MR_USED_QP_TIMEOUT 0x12 +#define HNS_ROCE_FREE_MR_USED_SQWQE_NUM 128 +#define HNS_ROCE_FREE_MR_USED_SQSGE_NUM 0x2 +#define HNS_ROCE_FREE_MR_USED_RQWQE_NUM 128 +#define HNS_ROCE_FREE_MR_USED_RQSGE_NUM 0x2 +#define HNS_ROCE_V2_FREE_MR_TIMEOUT 4500 + enum { NO_ARMED = 0x0, REG_NXT_CEQE = 0x2, @@ -184,7 +179,6 @@ enum { HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP = 0x8, HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD = 0x9, HNS_ROCE_V2_WQE_OP_FAST_REG_PMR = 0xa, - HNS_ROCE_V2_WQE_OP_LOCAL_INV = 0xb, HNS_ROCE_V2_WQE_OP_BIND_MW = 0xc, HNS_ROCE_V2_WQE_OP_MASK = 0x1f, }; @@ -252,6 +246,7 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_CFG_GMV_TBL = 0x850f, HNS_ROCE_OPC_CFG_GMV_BT = 0x8510, HNS_ROCE_OPC_EXT_CFG = 0x8512, + HNS_ROCE_QUERY_RAM_ECC = 0x8513, HNS_SWITCH_PARAMETER_CFG = 0x1033, }; @@ -305,33 +300,6 @@ struct hns_roce_v2_cq_context { #define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0 #define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0 -#define V2_CQC_BYTE_4_ARM_ST_S 6 -#define V2_CQC_BYTE_4_ARM_ST_M GENMASK(7, 6) - -#define V2_CQC_BYTE_4_CEQN_S 15 -#define V2_CQC_BYTE_4_CEQN_M GENMASK(23, 15) - -#define V2_CQC_BYTE_8_CQN_S 0 -#define V2_CQC_BYTE_8_CQN_M GENMASK(23, 0) - -#define V2_CQC_BYTE_16_CQE_HOP_NUM_S 30 -#define V2_CQC_BYTE_16_CQE_HOP_NUM_M GENMASK(31, 30) - -#define V2_CQC_BYTE_28_CQ_PRODUCER_IDX_S 0 -#define V2_CQC_BYTE_28_CQ_PRODUCER_IDX_M GENMASK(23, 0) - -#define V2_CQC_BYTE_32_CQ_CONSUMER_IDX_S 0 -#define V2_CQC_BYTE_32_CQ_CONSUMER_IDX_M GENMASK(23, 0) - -#define V2_CQC_BYTE_52_CQE_CNT_S 0 -#define V2_CQC_BYTE_52_CQE_CNT_M GENMASK(23, 0) - -#define V2_CQC_BYTE_56_CQ_MAX_CNT_S 0 -#define V2_CQC_BYTE_56_CQ_MAX_CNT_M GENMASK(15, 0) - -#define V2_CQC_BYTE_56_CQ_PERIOD_S 16 -#define V2_CQC_BYTE_56_CQ_PERIOD_M GENMASK(31, 16) - #define CQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cq_context, h, l) #define CQC_CQ_ST CQC_FIELD_LOC(1, 0) @@ -434,6 +402,7 @@ enum hns_roce_v2_qp_state { struct hns_roce_v2_qp_context_ex { __le32 data[64]; }; + struct hns_roce_v2_qp_context { __le32 byte_4_sqpn_tst; __le32 wqe_sge_ba; @@ -786,16 +755,20 @@ struct hns_roce_v2_mpt_entry { #define MPT_INNER_PA_VLD MPT_FIELD_LOC(71, 71) #define MPT_MW_BIND_QPN MPT_FIELD_LOC(95, 72) #define MPT_BOUND_LKEY MPT_FIELD_LOC(127, 96) -#define MPT_LEN MPT_FIELD_LOC(191, 128) +#define MPT_LEN_L MPT_FIELD_LOC(159, 128) +#define MPT_LEN_H MPT_FIELD_LOC(191, 160) #define MPT_LKEY MPT_FIELD_LOC(223, 192) #define MPT_VA MPT_FIELD_LOC(287, 224) #define MPT_PBL_SIZE MPT_FIELD_LOC(319, 288) -#define MPT_PBL_BA MPT_FIELD_LOC(380, 320) +#define MPT_PBL_BA_L MPT_FIELD_LOC(351, 320) +#define MPT_PBL_BA_H MPT_FIELD_LOC(380, 352) #define MPT_BLK_MODE MPT_FIELD_LOC(381, 381) #define MPT_RSV0 MPT_FIELD_LOC(383, 382) -#define MPT_PA0 MPT_FIELD_LOC(441, 384) +#define MPT_PA0_L MPT_FIELD_LOC(415, 384) +#define MPT_PA0_H MPT_FIELD_LOC(441, 416) #define MPT_BOUND_VA MPT_FIELD_LOC(447, 442) -#define MPT_PA1 MPT_FIELD_LOC(505, 448) +#define MPT_PA1_L MPT_FIELD_LOC(479, 448) +#define MPT_PA1_H MPT_FIELD_LOC(505, 480) #define MPT_PERSIST_EN MPT_FIELD_LOC(506, 506) #define MPT_RSV2 MPT_FIELD_LOC(507, 507) #define MPT_PBL_BUF_PG_SZ MPT_FIELD_LOC(511, 508) @@ -901,48 +874,24 @@ struct hns_roce_v2_ud_send_wqe { u8 dgid[GID_LEN_V2]; }; -#define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0 -#define V2_UD_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0) - -#define V2_UD_SEND_WQE_BYTE_4_OWNER_S 7 - -#define V2_UD_SEND_WQE_BYTE_4_CQE_S 8 - -#define V2_UD_SEND_WQE_BYTE_4_SE_S 11 - -#define V2_UD_SEND_WQE_BYTE_16_PD_S 0 -#define V2_UD_SEND_WQE_BYTE_16_PD_M GENMASK(23, 0) - -#define V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S 24 -#define V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M GENMASK(31, 24) - -#define V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0 -#define V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0) - -#define V2_UD_SEND_WQE_BYTE_24_UDPSPN_S 16 -#define V2_UD_SEND_WQE_BYTE_24_UDPSPN_M GENMASK(31, 16) - -#define V2_UD_SEND_WQE_BYTE_32_DQPN_S 0 -#define V2_UD_SEND_WQE_BYTE_32_DQPN_M GENMASK(23, 0) - -#define V2_UD_SEND_WQE_BYTE_36_VLAN_S 0 -#define V2_UD_SEND_WQE_BYTE_36_VLAN_M GENMASK(15, 0) - -#define V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S 16 -#define V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M GENMASK(23, 16) - -#define V2_UD_SEND_WQE_BYTE_36_TCLASS_S 24 -#define V2_UD_SEND_WQE_BYTE_36_TCLASS_M GENMASK(31, 24) - -#define V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S 0 -#define V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M GENMASK(19, 0) - -#define V2_UD_SEND_WQE_BYTE_40_SL_S 20 -#define V2_UD_SEND_WQE_BYTE_40_SL_M GENMASK(23, 20) - -#define V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S 30 - -#define V2_UD_SEND_WQE_BYTE_40_LBI_S 31 +#define UD_SEND_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_ud_send_wqe, h, l) + +#define UD_SEND_WQE_OPCODE UD_SEND_WQE_FIELD_LOC(4, 0) +#define UD_SEND_WQE_OWNER UD_SEND_WQE_FIELD_LOC(7, 7) +#define UD_SEND_WQE_CQE UD_SEND_WQE_FIELD_LOC(8, 8) +#define UD_SEND_WQE_SE UD_SEND_WQE_FIELD_LOC(11, 11) +#define UD_SEND_WQE_PD UD_SEND_WQE_FIELD_LOC(119, 96) +#define UD_SEND_WQE_SGE_NUM UD_SEND_WQE_FIELD_LOC(127, 120) +#define UD_SEND_WQE_MSG_START_SGE_IDX UD_SEND_WQE_FIELD_LOC(151, 128) +#define UD_SEND_WQE_UDPSPN UD_SEND_WQE_FIELD_LOC(191, 176) +#define UD_SEND_WQE_DQPN UD_SEND_WQE_FIELD_LOC(247, 224) +#define UD_SEND_WQE_VLAN UD_SEND_WQE_FIELD_LOC(271, 256) +#define UD_SEND_WQE_HOPLIMIT UD_SEND_WQE_FIELD_LOC(279, 272) +#define UD_SEND_WQE_TCLASS UD_SEND_WQE_FIELD_LOC(287, 280) +#define UD_SEND_WQE_FLOW_LABEL UD_SEND_WQE_FIELD_LOC(307, 288) +#define UD_SEND_WQE_SL UD_SEND_WQE_FIELD_LOC(311, 308) +#define UD_SEND_WQE_VLAN_EN UD_SEND_WQE_FIELD_LOC(318, 318) +#define UD_SEND_WQE_LBI UD_SEND_WQE_FIELD_LOC(319, 319) struct hns_roce_v2_rc_send_wqe { __le32 byte_4; @@ -957,42 +906,22 @@ struct hns_roce_v2_rc_send_wqe { __le64 va; }; -#define V2_RC_SEND_WQE_BYTE_4_OPCODE_S 0 -#define V2_RC_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0) - -#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S 5 -#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M GENMASK(6, 5) - -#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S 13 -#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M GENMASK(14, 13) - -#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S 15 -#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M GENMASK(30, 15) - -#define V2_RC_SEND_WQE_BYTE_4_OWNER_S 7 - -#define V2_RC_SEND_WQE_BYTE_4_CQE_S 8 - -#define V2_RC_SEND_WQE_BYTE_4_FENCE_S 9 - -#define V2_RC_SEND_WQE_BYTE_4_SO_S 10 - -#define V2_RC_SEND_WQE_BYTE_4_SE_S 11 - -#define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12 - -#define V2_RC_SEND_WQE_BYTE_4_FLAG_S 31 - -#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0 -#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0) - -#define V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S 24 -#define V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M GENMASK(31, 24) - -#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0 -#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0) - -#define V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S 31 +#define RC_SEND_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_rc_send_wqe, h, l) + +#define RC_SEND_WQE_OPCODE RC_SEND_WQE_FIELD_LOC(4, 0) +#define RC_SEND_WQE_DB_SL_L RC_SEND_WQE_FIELD_LOC(6, 5) +#define RC_SEND_WQE_DB_SL_H RC_SEND_WQE_FIELD_LOC(14, 13) +#define RC_SEND_WQE_OWNER RC_SEND_WQE_FIELD_LOC(7, 7) +#define RC_SEND_WQE_CQE RC_SEND_WQE_FIELD_LOC(8, 8) +#define RC_SEND_WQE_FENCE RC_SEND_WQE_FIELD_LOC(9, 9) +#define RC_SEND_WQE_SE RC_SEND_WQE_FIELD_LOC(11, 11) +#define RC_SEND_WQE_INLINE RC_SEND_WQE_FIELD_LOC(12, 12) +#define RC_SEND_WQE_WQE_INDEX RC_SEND_WQE_FIELD_LOC(30, 15) +#define RC_SEND_WQE_FLAG RC_SEND_WQE_FIELD_LOC(31, 31) +#define RC_SEND_WQE_XRC_SRQN RC_SEND_WQE_FIELD_LOC(119, 96) +#define RC_SEND_WQE_SGE_NUM RC_SEND_WQE_FIELD_LOC(127, 120) +#define RC_SEND_WQE_MSG_START_SGE_IDX RC_SEND_WQE_FIELD_LOC(151, 128) +#define RC_SEND_WQE_INL_TYPE RC_SEND_WQE_FIELD_LOC(159, 159) struct hns_roce_wqe_frmr_seg { __le32 pbl_size; @@ -1035,7 +964,10 @@ struct hns_roce_func_clear { __le32 rsv[4]; }; -#define FUNC_CLEAR_RST_FUN_DONE_S 0 +#define FUNC_CLEAR_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_func_clear, h, l) + +#define FUNC_CLEAR_RST_FUN_DONE FUNC_CLEAR_FIELD_LOC(32, 32) + /* Each physical function manages up to 248 virtual functions, it takes up to * 100ms for each function to execute clear. If an abnormal reset occurs, it is * executed twice at most, so it takes up to 249 * 2 * 100ms. @@ -1114,12 +1046,12 @@ struct hns_roce_vf_switch { __le32 resv3; }; -#define VF_SWITCH_DATA_FUN_ID_VF_ID_S 3 -#define VF_SWITCH_DATA_FUN_ID_VF_ID_M GENMASK(10, 3) +#define VF_SWITCH_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_vf_switch, h, l) -#define VF_SWITCH_DATA_CFG_ALW_LPBK_S 1 -#define VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S 2 -#define VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S 3 +#define VF_SWITCH_VF_ID VF_SWITCH_FIELD_LOC(42, 35) +#define VF_SWITCH_ALW_LPBK VF_SWITCH_FIELD_LOC(65, 65) +#define VF_SWITCH_ALW_LCL_LPBK VF_SWITCH_FIELD_LOC(66, 66) +#define VF_SWITCH_ALW_DST_OVRD VF_SWITCH_FIELD_LOC(67, 67) struct hns_roce_post_mbox { __le32 in_param_l; @@ -1173,6 +1105,11 @@ enum { #define CFG_GMV_BT_BA_H CMQ_REQ_FIELD_LOC(51, 32) #define CFG_GMV_BT_IDX CMQ_REQ_FIELD_LOC(95, 64) +/* Fields of HNS_ROCE_QUERY_RAM_ECC */ +#define QUERY_RAM_ECC_1BIT_ERR CMQ_REQ_FIELD_LOC(31, 0) +#define QUERY_RAM_ECC_RES_TYPE CMQ_REQ_FIELD_LOC(63, 32) +#define QUERY_RAM_ECC_TAG CMQ_REQ_FIELD_LOC(95, 64) + struct hns_roce_cfg_sgid_tb { __le32 table_idx_rsv; __le32 vf_sgid_l; @@ -1182,11 +1119,10 @@ struct hns_roce_cfg_sgid_tb { __le32 vf_sgid_type_rsv; }; -#define CFG_SGID_TB_TABLE_IDX_S 0 -#define CFG_SGID_TB_TABLE_IDX_M GENMASK(7, 0) +#define SGID_TB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_sgid_tb, h, l) -#define CFG_SGID_TB_VF_SGID_TYPE_S 0 -#define CFG_SGID_TB_VF_SGID_TYPE_M GENMASK(1, 0) +#define CFG_SGID_TB_TABLE_IDX SGID_TB_FIELD_LOC(7, 0) +#define CFG_SGID_TB_VF_SGID_TYPE SGID_TB_FIELD_LOC(161, 160) struct hns_roce_cfg_smac_tb { __le32 tb_idx_rsv; @@ -1194,11 +1130,11 @@ struct hns_roce_cfg_smac_tb { __le32 vf_smac_h_rsv; __le32 rsv[3]; }; -#define CFG_SMAC_TB_IDX_S 0 -#define CFG_SMAC_TB_IDX_M GENMASK(7, 0) -#define CFG_SMAC_TB_VF_SMAC_H_S 0 -#define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0) +#define SMAC_TB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_smac_tb, h, l) + +#define CFG_SMAC_TB_IDX SMAC_TB_FIELD_LOC(7, 0) +#define CFG_SMAC_TB_VF_SMAC_H SMAC_TB_FIELD_LOC(79, 64) struct hns_roce_cfg_gmv_tb_a { __le32 vf_sgid_l; @@ -1209,16 +1145,11 @@ struct hns_roce_cfg_gmv_tb_a { __le32 resv; }; -#define CFG_GMV_TB_SGID_IDX_S 0 -#define CFG_GMV_TB_SGID_IDX_M GENMASK(7, 0) +#define GMV_TB_A_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_gmv_tb_a, h, l) -#define CFG_GMV_TB_VF_SGID_TYPE_S 0 -#define CFG_GMV_TB_VF_SGID_TYPE_M GENMASK(1, 0) - -#define CFG_GMV_TB_VF_VLAN_EN_S 2 - -#define CFG_GMV_TB_VF_VLAN_ID_S 16 -#define CFG_GMV_TB_VF_VLAN_ID_M GENMASK(27, 16) +#define GMV_TB_A_VF_SGID_TYPE GMV_TB_A_FIELD_LOC(129, 128) +#define GMV_TB_A_VF_VLAN_EN GMV_TB_A_FIELD_LOC(130, 130) +#define GMV_TB_A_VF_VLAN_ID GMV_TB_A_FIELD_LOC(155, 144) struct hns_roce_cfg_gmv_tb_b { __le32 vf_smac_l; @@ -1227,8 +1158,10 @@ struct hns_roce_cfg_gmv_tb_b { __le32 resv[3]; }; -#define CFG_GMV_TB_SMAC_H_S 0 -#define CFG_GMV_TB_SMAC_H_M GENMASK(15, 0) +#define GMV_TB_B_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_gmv_tb_b, h, l) + +#define GMV_TB_B_SMAC_H GMV_TB_B_FIELD_LOC(47, 32) +#define GMV_TB_B_SGID_IDX GMV_TB_B_FIELD_LOC(71, 64) #define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 5 struct hns_roce_query_pf_caps_a { @@ -1237,7 +1170,7 @@ struct hns_roce_query_pf_caps_a { __le16 max_sq_sg; __le16 max_sq_inline; __le16 max_rq_sg; - __le32 max_extend_sg; + __le32 rsv0; __le16 num_qpc_timer; __le16 num_cqc_timer; __le16 max_srq_sges; @@ -1245,7 +1178,7 @@ struct hns_roce_query_pf_caps_a { u8 num_other_vectors; u8 max_sq_desc_sz; u8 max_rq_desc_sz; - u8 max_srq_desc_sz; + u8 rsv1; u8 cqe_sz; }; @@ -1280,29 +1213,17 @@ struct hns_roce_query_pf_caps_c { __le16 rq_depth; }; -#define V2_QUERY_PF_CAPS_C_NUM_PDS_S 0 -#define V2_QUERY_PF_CAPS_C_NUM_PDS_M GENMASK(19, 0) - -#define V2_QUERY_PF_CAPS_C_CAP_FLAGS_S 20 -#define V2_QUERY_PF_CAPS_C_CAP_FLAGS_M GENMASK(31, 20) - -#define V2_QUERY_PF_CAPS_C_NUM_CQS_S 0 -#define V2_QUERY_PF_CAPS_C_NUM_CQS_M GENMASK(19, 0) +#define PF_CAPS_C_FIELD_LOC(h, l) \ + FIELD_LOC(struct hns_roce_query_pf_caps_c, h, l) -#define V2_QUERY_PF_CAPS_C_MAX_GID_S 20 -#define V2_QUERY_PF_CAPS_C_MAX_GID_M GENMASK(28, 20) - -#define V2_QUERY_PF_CAPS_C_CQ_DEPTH_S 0 -#define V2_QUERY_PF_CAPS_C_CQ_DEPTH_M GENMASK(22, 0) - -#define V2_QUERY_PF_CAPS_C_NUM_MRWS_S 0 -#define V2_QUERY_PF_CAPS_C_NUM_MRWS_M GENMASK(19, 0) - -#define V2_QUERY_PF_CAPS_C_NUM_QPS_S 0 -#define V2_QUERY_PF_CAPS_C_NUM_QPS_M GENMASK(19, 0) - -#define V2_QUERY_PF_CAPS_C_MAX_ORD_S 20 -#define V2_QUERY_PF_CAPS_C_MAX_ORD_M GENMASK(27, 20) +#define PF_CAPS_C_NUM_PDS PF_CAPS_C_FIELD_LOC(19, 0) +#define PF_CAPS_C_CAP_FLAGS PF_CAPS_C_FIELD_LOC(31, 20) +#define PF_CAPS_C_NUM_CQS PF_CAPS_C_FIELD_LOC(51, 32) +#define PF_CAPS_C_MAX_GID PF_CAPS_C_FIELD_LOC(60, 52) +#define PF_CAPS_C_CQ_DEPTH PF_CAPS_C_FIELD_LOC(86, 64) +#define PF_CAPS_C_NUM_MRWS PF_CAPS_C_FIELD_LOC(115, 96) +#define PF_CAPS_C_NUM_QPS PF_CAPS_C_FIELD_LOC(147, 128) +#define PF_CAPS_C_MAX_ORD PF_CAPS_C_FIELD_LOC(155, 148) struct hns_roce_query_pf_caps_d { __le32 wq_hop_num_max_srqs; @@ -1313,20 +1234,26 @@ struct hns_roce_query_pf_caps_d { __le32 num_uars_rsv_pds; __le32 rsv_uars_rsv_qps; }; -#define V2_QUERY_PF_CAPS_D_NUM_SRQS_S 0 -#define V2_QUERY_PF_CAPS_D_NUM_SRQS_M GENMASK(19, 0) - -#define V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S 20 -#define V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M GENMASK(21, 20) -#define V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_S 22 -#define V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_M GENMASK(23, 22) - -#define V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_S 24 -#define V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_M GENMASK(25, 24) - -#define V2_QUERY_PF_CAPS_D_CONG_TYPE_S 26 -#define V2_QUERY_PF_CAPS_D_CONG_TYPE_M GENMASK(29, 26) +#define PF_CAPS_D_FIELD_LOC(h, l) \ + FIELD_LOC(struct hns_roce_query_pf_caps_d, h, l) + +#define PF_CAPS_D_NUM_SRQS PF_CAPS_D_FIELD_LOC(19, 0) +#define PF_CAPS_D_RQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(21, 20) +#define PF_CAPS_D_EX_SGE_HOP_NUM PF_CAPS_D_FIELD_LOC(23, 22) +#define PF_CAPS_D_SQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(25, 24) +#define PF_CAPS_D_CONG_TYPE PF_CAPS_D_FIELD_LOC(29, 26) +#define PF_CAPS_D_CEQ_DEPTH PF_CAPS_D_FIELD_LOC(85, 64) +#define PF_CAPS_D_NUM_CEQS PF_CAPS_D_FIELD_LOC(95, 86) +#define PF_CAPS_D_AEQ_DEPTH PF_CAPS_D_FIELD_LOC(117, 96) +#define PF_CAPS_D_AEQ_ARM_ST PF_CAPS_D_FIELD_LOC(119, 118) +#define PF_CAPS_D_CEQ_ARM_ST PF_CAPS_D_FIELD_LOC(121, 120) +#define PF_CAPS_D_RSV_PDS PF_CAPS_D_FIELD_LOC(147, 128) +#define PF_CAPS_D_NUM_UARS PF_CAPS_D_FIELD_LOC(155, 148) +#define PF_CAPS_D_RSV_QPS PF_CAPS_D_FIELD_LOC(179, 160) +#define PF_CAPS_D_RSV_UARS PF_CAPS_D_FIELD_LOC(187, 180) + +#define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12 struct hns_roce_congestion_algorithm { u8 alg_sel; @@ -1335,33 +1262,6 @@ struct hns_roce_congestion_algorithm { u8 wnd_mode_sel; }; -#define V2_QUERY_PF_CAPS_D_CEQ_DEPTH_S 0 -#define V2_QUERY_PF_CAPS_D_CEQ_DEPTH_M GENMASK(21, 0) - -#define V2_QUERY_PF_CAPS_D_NUM_CEQS_S 22 -#define V2_QUERY_PF_CAPS_D_NUM_CEQS_M GENMASK(31, 22) - -#define V2_QUERY_PF_CAPS_D_AEQ_DEPTH_S 0 -#define V2_QUERY_PF_CAPS_D_AEQ_DEPTH_M GENMASK(21, 0) - -#define V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_S 22 -#define V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_M GENMASK(23, 22) - -#define V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_S 24 -#define V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_M GENMASK(25, 24) - -#define V2_QUERY_PF_CAPS_D_RSV_PDS_S 0 -#define V2_QUERY_PF_CAPS_D_RSV_PDS_M GENMASK(19, 0) - -#define V2_QUERY_PF_CAPS_D_NUM_UARS_S 20 -#define V2_QUERY_PF_CAPS_D_NUM_UARS_M GENMASK(27, 20) - -#define V2_QUERY_PF_CAPS_D_RSV_QPS_S 0 -#define V2_QUERY_PF_CAPS_D_RSV_QPS_M GENMASK(19, 0) - -#define V2_QUERY_PF_CAPS_D_RSV_UARS_S 20 -#define V2_QUERY_PF_CAPS_D_RSV_UARS_M GENMASK(27, 20) - struct hns_roce_query_pf_caps_e { __le32 chunk_size_shift_rsv_mrws; __le32 rsv_cqs; @@ -1373,20 +1273,14 @@ struct hns_roce_query_pf_caps_e { __le16 aeq_period; }; -#define V2_QUERY_PF_CAPS_E_RSV_MRWS_S 0 -#define V2_QUERY_PF_CAPS_E_RSV_MRWS_M GENMASK(19, 0) - -#define V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_S 20 -#define V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_M GENMASK(31, 20) - -#define V2_QUERY_PF_CAPS_E_RSV_CQS_S 0 -#define V2_QUERY_PF_CAPS_E_RSV_CQS_M GENMASK(19, 0) +#define PF_CAPS_E_FIELD_LOC(h, l) \ + FIELD_LOC(struct hns_roce_query_pf_caps_e, h, l) -#define V2_QUERY_PF_CAPS_E_RSV_SRQS_S 0 -#define V2_QUERY_PF_CAPS_E_RSV_SRQS_M GENMASK(19, 0) - -#define V2_QUERY_PF_CAPS_E_RSV_LKEYS_S 0 -#define V2_QUERY_PF_CAPS_E_RSV_LKEYS_M GENMASK(19, 0) +#define PF_CAPS_E_RSV_MRWS PF_CAPS_E_FIELD_LOC(19, 0) +#define PF_CAPS_E_CHUNK_SIZE_SHIFT PF_CAPS_E_FIELD_LOC(31, 20) +#define PF_CAPS_E_RSV_CQS PF_CAPS_E_FIELD_LOC(51, 32) +#define PF_CAPS_E_RSV_SRQS PF_CAPS_E_FIELD_LOC(83, 64) +#define PF_CAPS_E_RSV_LKEYS PF_CAPS_E_FIELD_LOC(115, 96) struct hns_roce_cmq_req { __le32 data[6]; @@ -1432,18 +1326,40 @@ struct hns_roce_link_table { #define HNS_ROCE_EXT_LLM_ENTRY(addr, id) (((id) << (64 - 12)) | ((addr) >> 12)) #define HNS_ROCE_EXT_LLM_MIN_PAGES(que_num) ((que_num) * 4 + 2) +struct hns_roce_v2_free_mr { + struct ib_qp *rsv_qp[HNS_ROCE_FREE_MR_USED_QP_NUM]; + struct ib_cq *rsv_cq; + struct ib_pd *rsv_pd; + struct mutex mutex; +}; + struct hns_roce_v2_priv { struct hnae3_handle *handle; struct hns_roce_v2_cmq cmq; struct hns_roce_link_table ext_llm; + struct hns_roce_v2_free_mr free_mr; }; struct hns_roce_dip { u8 dgid[GID_LEN_V2]; u32 dip_idx; - struct list_head node; /* all dips are on a list */ + struct list_head node; /* all dips are on a list */ +}; + +struct fmea_ram_ecc { + u32 is_ecc_err; + u32 res_type; + u32 index; }; +/* only for RNR timeout issue of HIP08 */ +#define HNS_ROCE_CLOCK_ADJUST 1000 +#define HNS_ROCE_MAX_CQ_PERIOD 65 +#define HNS_ROCE_MAX_EQ_PERIOD 65 +#define HNS_ROCE_RNR_TIMER_10NS 1 +#define HNS_ROCE_1US_CFG 999 +#define HNS_ROCE_1NS_CFG 0 + #define HNS_ROCE_AEQ_DEFAULT_BURST_NUM 0x0 #define HNS_ROCE_AEQ_DEFAULT_INTERVAL 0x0 #define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x0 @@ -1471,14 +1387,10 @@ struct hns_roce_dip { #define HNS_ROCE_EQ_INIT_CONS_IDX 0 #define HNS_ROCE_EQ_INIT_NXT_EQE_BA 0 -#define HNS_ROCE_V2_CEQ_CEQE_OWNER_S 31 -#define HNS_ROCE_V2_AEQ_AEQE_OWNER_S 31 - #define HNS_ROCE_V2_COMP_EQE_NUM 0x1000 #define HNS_ROCE_V2_ASYNC_EQE_NUM 0x1000 #define HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S 0 -#define HNS_ROCE_V2_VF_INT_ST_RAS_INT_S 1 #define HNS_ROCE_EQ_DB_CMD_AEQ 0x0 #define HNS_ROCE_EQ_DB_CMD_AEQ_ARMED 0x1 @@ -1530,18 +1442,6 @@ struct hns_roce_eq_context { #define EQC_NEX_EQE_BA_H EQC_FIELD_LOC(339, 320) #define EQC_EQE_SIZE EQC_FIELD_LOC(341, 340) -#define HNS_ROCE_V2_CEQE_COMP_CQN_S 0 -#define HNS_ROCE_V2_CEQE_COMP_CQN_M GENMASK(23, 0) - -#define HNS_ROCE_V2_AEQE_EVENT_TYPE_S 0 -#define HNS_ROCE_V2_AEQE_EVENT_TYPE_M GENMASK(7, 0) - -#define HNS_ROCE_V2_AEQE_SUB_TYPE_S 8 -#define HNS_ROCE_V2_AEQE_SUB_TYPE_M GENMASK(15, 8) - -#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0 -#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0) - #define MAX_SERVICE_LEVEL 0x7 struct hns_roce_wqe_atomic_seg { @@ -1559,9 +1459,6 @@ struct hns_roce_sccc_clr_done { __le32 rsv[5]; }; -int hns_roce_v2_query_cqc_info(struct hns_roce_dev *hr_dev, u32 cqn, - int *buffer); - static inline void hns_roce_write64(struct hns_roce_dev *hr_dev, __le32 val[2], void __iomem *dest) { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c deleted file mode 100644 index 5a97b5a0b7be..000000000000 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c +++ /dev/null @@ -1,35 +0,0 @@ -// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) -// Copyright (c) 2019 Hisilicon Limited. - -#include "hnae3.h" -#include "hns_roce_device.h" -#include "hns_roce_cmd.h" -#include "hns_roce_hw_v2.h" - -int hns_roce_v2_query_cqc_info(struct hns_roce_dev *hr_dev, u32 cqn, - int *buffer) -{ - struct hns_roce_v2_cq_context *cq_context; - struct hns_roce_cmd_mailbox *mailbox; - int ret; - - mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); - - cq_context = mailbox->buf; - ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, cqn, 0, - HNS_ROCE_CMD_QUERY_CQC, - HNS_ROCE_CMD_TIMEOUT_MSECS); - if (ret) { - dev_err(hr_dev->dev, "QUERY cqc cmd process error\n"); - goto err_mailbox; - } - - memcpy(buffer, cq_context, sizeof(*cq_context)); - -err_mailbox: - hns_roce_free_cmd_mailbox(hr_dev, mailbox); - - return ret; -} diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 4194b626f3c6..dcf89689a4c6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -31,7 +31,6 @@ * SOFTWARE. */ #include <linux/acpi.h> -#include <linux/of_platform.h> #include <linux/module.h> #include <linux/pci.h> #include <rdma/ib_addr.h> @@ -70,7 +69,7 @@ static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context) if (port >= hr_dev->caps.num_ports) return -EINVAL; - ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &attr->gid, attr); + ret = hr_dev->hw->set_gid(hr_dev, attr->index, &attr->gid, attr); return ret; } @@ -84,7 +83,7 @@ static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context) if (port >= hr_dev->caps.num_ports) return -EINVAL; - ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, NULL, NULL); + ret = hr_dev->hw->set_gid(hr_dev, attr->index, NULL, NULL); return ret; } @@ -98,7 +97,7 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u32 port, netdev = hr_dev->iboe.netdevs[port]; if (!netdev) { - dev_err(dev, "Can't find netdev on port(%u)!\n", port); + dev_err(dev, "can't find netdev on port(%u)!\n", port); return -ENODEV; } @@ -152,9 +151,6 @@ static int hns_roce_setup_mtu_mac(struct hns_roce_dev *hr_dev) u8 i; for (i = 0; i < hr_dev->caps.num_ports; i++) { - if (hr_dev->hw->set_mtu) - hr_dev->hw->set_mtu(hr_dev, hr_dev->iboe.phy_port[i], - hr_dev->caps.max_mtu); ret = hns_roce_set_mac(hr_dev, i, hr_dev->iboe.netdevs[i]->dev_addr); if (ret) @@ -243,7 +239,7 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u32 port_num, net_dev = hr_dev->iboe.netdevs[port]; if (!net_dev) { spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); - dev_err(dev, "Find netdev %u failed!\n", port); + dev_err(dev, "find netdev %u failed!\n", port); return -EINVAL; } @@ -270,6 +266,9 @@ static enum rdma_link_layer hns_roce_get_link_layer(struct ib_device *device, static int hns_roce_query_pkey(struct ib_device *ib_dev, u32 port, u16 index, u16 *pkey) { + if (index > 0) + return -EINVAL; + *pkey = PKEY_ID; return 0; @@ -307,9 +306,22 @@ hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, entry->address = address; entry->mmap_type = mmap_type; - ret = rdma_user_mmap_entry_insert_exact( - ucontext, &entry->rdma_entry, length, - mmap_type == HNS_ROCE_MMAP_TYPE_DB ? 0 : 1); + switch (mmap_type) { + /* pgoff 0 must be used by DB for compatibility */ + case HNS_ROCE_MMAP_TYPE_DB: + ret = rdma_user_mmap_entry_insert_exact( + ucontext, &entry->rdma_entry, length, 0); + break; + case HNS_ROCE_MMAP_TYPE_DWQE: + ret = rdma_user_mmap_entry_insert_range( + ucontext, &entry->rdma_entry, length, 1, + U32_MAX); + break; + default: + ret = -EINVAL; + break; + } + if (ret) { kfree(entry); return NULL; @@ -323,18 +335,12 @@ static void hns_roce_dealloc_uar_entry(struct hns_roce_ucontext *context) if (context->db_mmap_entry) rdma_user_mmap_entry_remove( &context->db_mmap_entry->rdma_entry); - - if (context->tptr_mmap_entry) - rdma_user_mmap_entry_remove( - &context->tptr_mmap_entry->rdma_entry); } static int hns_roce_alloc_uar_entry(struct ib_ucontext *uctx) { struct hns_roce_ucontext *context = to_hr_ucontext(uctx); - struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device); u64 address; - int ret; address = context->uar.pfn << PAGE_SHIFT; context->db_mmap_entry = hns_roce_user_mmap_entry_insert( @@ -342,27 +348,7 @@ static int hns_roce_alloc_uar_entry(struct ib_ucontext *uctx) if (!context->db_mmap_entry) return -ENOMEM; - if (!hr_dev->tptr_dma_addr || !hr_dev->tptr_size) - return 0; - - /* - * FIXME: using io_remap_pfn_range on the dma address returned - * by dma_alloc_coherent is totally wrong. - */ - context->tptr_mmap_entry = - hns_roce_user_mmap_entry_insert(uctx, hr_dev->tptr_dma_addr, - hr_dev->tptr_size, - HNS_ROCE_MMAP_TYPE_TPTR); - if (!context->tptr_mmap_entry) { - ret = -ENOMEM; - goto err; - } - return 0; - -err: - hns_roce_dealloc_uar_entry(context); - return ret; } static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, @@ -436,10 +422,15 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma) entry = to_hns_mmap(rdma_entry); pfn = entry->address >> PAGE_SHIFT; - prot = vma->vm_page_prot; - if (entry->mmap_type != HNS_ROCE_MMAP_TYPE_TPTR) - prot = pgprot_noncached(prot); + switch (entry->mmap_type) { + case HNS_ROCE_MMAP_TYPE_DB: + case HNS_ROCE_MMAP_TYPE_DWQE: + prot = pgprot_device(vma->vm_page_prot); + break; + default: + return -EINVAL; + } ret = rdma_user_mmap_io(uctx, vma, pfn, rdma_entry->npages * PAGE_SIZE, prot, rdma_entry); @@ -524,7 +515,6 @@ static const struct ib_device_ops hns_roce_dev_ops = { .destroy_ah = hns_roce_destroy_ah, .destroy_cq = hns_roce_destroy_cq, .disassociate_ucontext = hns_roce_disassociate_ucontext, - .fill_res_cq_entry = hns_roce_fill_res_cq_entry, .get_dma_mr = hns_roce_get_dma_mr, .get_link_layer = hns_roce_get_link_layer, .get_port_immutable = hns_roce_port_immutable, @@ -575,6 +565,15 @@ static const struct ib_device_ops hns_roce_dev_xrcd_ops = { INIT_RDMA_OBJ_SIZE(ib_xrcd, hns_roce_xrcd, ibxrcd), }; +static const struct ib_device_ops hns_roce_dev_restrack_ops = { + .fill_res_cq_entry = hns_roce_fill_res_cq_entry, + .fill_res_cq_entry_raw = hns_roce_fill_res_cq_entry_raw, + .fill_res_qp_entry = hns_roce_fill_res_qp_entry, + .fill_res_qp_entry_raw = hns_roce_fill_res_qp_entry_raw, + .fill_res_mr_entry = hns_roce_fill_res_mr_entry, + .fill_res_mr_entry_raw = hns_roce_fill_res_mr_entry_raw, +}; + static int hns_roce_register_device(struct hns_roce_dev *hr_dev) { int ret; @@ -614,6 +613,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops); ib_set_device_ops(ib_dev, &hns_roce_dev_ops); + ib_set_device_ops(ib_dev, &hns_roce_dev_restrack_ops); for (i = 0; i < hr_dev->caps.num_ports; i++) { if (!hr_dev->iboe.netdevs[i]) continue; @@ -659,17 +659,17 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table, HEM_TYPE_MTPT, hr_dev->caps.mtpt_entry_sz, - hr_dev->caps.num_mtpts, 1); + hr_dev->caps.num_mtpts); if (ret) { - dev_err(dev, "Failed to init MTPT context memory, aborting.\n"); + dev_err(dev, "failed to init MTPT context memory, aborting.\n"); return ret; } ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.qp_table, HEM_TYPE_QPC, hr_dev->caps.qpc_sz, - hr_dev->caps.num_qps, 1); + hr_dev->caps.num_qps); if (ret) { - dev_err(dev, "Failed to init QP context memory, aborting.\n"); + dev_err(dev, "failed to init QP context memory, aborting.\n"); goto err_unmap_dmpt; } @@ -677,9 +677,9 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) HEM_TYPE_IRRL, hr_dev->caps.irrl_entry_sz * hr_dev->caps.max_qp_init_rdma, - hr_dev->caps.num_qps, 1); + hr_dev->caps.num_qps); if (ret) { - dev_err(dev, "Failed to init irrl_table memory, aborting.\n"); + dev_err(dev, "failed to init irrl_table memory, aborting.\n"); goto err_unmap_qp; } @@ -689,19 +689,19 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) HEM_TYPE_TRRL, hr_dev->caps.trrl_entry_sz * hr_dev->caps.max_qp_dest_rdma, - hr_dev->caps.num_qps, 1); + hr_dev->caps.num_qps); if (ret) { dev_err(dev, - "Failed to init trrl_table memory, aborting.\n"); + "failed to init trrl_table memory, aborting.\n"); goto err_unmap_irrl; } } ret = hns_roce_init_hem_table(hr_dev, &hr_dev->cq_table.table, HEM_TYPE_CQC, hr_dev->caps.cqc_entry_sz, - hr_dev->caps.num_cqs, 1); + hr_dev->caps.num_cqs); if (ret) { - dev_err(dev, "Failed to init CQ context memory, aborting.\n"); + dev_err(dev, "failed to init CQ context memory, aborting.\n"); goto err_unmap_trrl; } @@ -709,10 +709,10 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) ret = hns_roce_init_hem_table(hr_dev, &hr_dev->srq_table.table, HEM_TYPE_SRQC, hr_dev->caps.srqc_entry_sz, - hr_dev->caps.num_srqs, 1); + hr_dev->caps.num_srqs); if (ret) { dev_err(dev, - "Failed to init SRQ context memory, aborting.\n"); + "failed to init SRQ context memory, aborting.\n"); goto err_unmap_cq; } } @@ -722,10 +722,10 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) &hr_dev->qp_table.sccc_table, HEM_TYPE_SCCC, hr_dev->caps.sccc_sz, - hr_dev->caps.num_qps, 1); + hr_dev->caps.num_qps); if (ret) { dev_err(dev, - "Failed to init SCC context memory, aborting.\n"); + "failed to init SCC context memory, aborting.\n"); goto err_unmap_srq; } } @@ -734,10 +734,10 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qpc_timer_table, HEM_TYPE_QPC_TIMER, hr_dev->caps.qpc_timer_entry_sz, - hr_dev->caps.num_qpc_timer, 1); + hr_dev->caps.qpc_timer_bt_num); if (ret) { dev_err(dev, - "Failed to init QPC timer memory, aborting.\n"); + "failed to init QPC timer memory, aborting.\n"); goto err_unmap_ctx; } } @@ -746,10 +746,10 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) ret = hns_roce_init_hem_table(hr_dev, &hr_dev->cqc_timer_table, HEM_TYPE_CQC_TIMER, hr_dev->caps.cqc_timer_entry_sz, - hr_dev->caps.num_cqc_timer, 1); + hr_dev->caps.cqc_timer_bt_num); if (ret) { dev_err(dev, - "Failed to init CQC timer memory, aborting.\n"); + "failed to init CQC timer memory, aborting.\n"); goto err_unmap_qpc_timer; } } @@ -758,7 +758,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) ret = hns_roce_init_hem_table(hr_dev, &hr_dev->gmv_table, HEM_TYPE_GMV, hr_dev->caps.gmv_entry_sz, - hr_dev->caps.gmv_entry_num, 1); + hr_dev->caps.gmv_entry_num); if (ret) { dev_err(dev, "failed to init gmv table memory, ret = %d\n", @@ -816,7 +816,6 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) int ret; spin_lock_init(&hr_dev->sm_lock); - spin_lock_init(&hr_dev->bt_cmd_lock); if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) { @@ -828,13 +827,13 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) ret = hns_roce_uar_alloc(hr_dev, &hr_dev->priv_uar); if (ret) { - dev_err(dev, "Failed to allocate priv_uar.\n"); + dev_err(dev, "failed to allocate priv_uar.\n"); goto err_uar_table_free; } ret = hns_roce_init_qp_table(hr_dev); if (ret) { - dev_err(dev, "Failed to init qp_table.\n"); + dev_err(dev, "failed to init qp_table.\n"); goto err_uar_table_free; } @@ -847,9 +846,8 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) hns_roce_init_cq_table(hr_dev); - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) hns_roce_init_srq_table(hr_dev); - } return 0; @@ -907,26 +905,19 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) struct device *dev = hr_dev->dev; int ret; - if (hr_dev->hw->reset) { - ret = hr_dev->hw->reset(hr_dev, true); - if (ret) { - dev_err(dev, "Reset RoCE engine failed!\n"); - return ret; - } - } hr_dev->is_reset = false; if (hr_dev->hw->cmq_init) { ret = hr_dev->hw->cmq_init(hr_dev); if (ret) { - dev_err(dev, "Init RoCE Command Queue failed!\n"); - goto error_failed_cmq_init; + dev_err(dev, "init RoCE Command Queue failed!\n"); + return ret; } } ret = hr_dev->hw->hw_profile(hr_dev); if (ret) { - dev_err(dev, "Get RoCE engine profile failed!\n"); + dev_err(dev, "get RoCE engine profile failed!\n"); goto error_failed_cmd_init; } @@ -1003,12 +994,6 @@ error_failed_cmd_init: if (hr_dev->hw->cmq_exit) hr_dev->hw->cmq_exit(hr_dev); -error_failed_cmq_init: - if (hr_dev->hw->reset) { - if (hr_dev->hw->reset(hr_dev, false)) - dev_err(dev, "Dereset RoCE engine failed!\n"); - } - return ret; } @@ -1028,8 +1013,6 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev) hns_roce_cmd_cleanup(hr_dev); if (hr_dev->hw->cmq_exit) hr_dev->hw->cmq_exit(hr_dev); - if (hr_dev->hw->reset) - hr_dev->hw->reset(hr_dev, false); } MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 7089ac780291..845ac7d3831f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -31,7 +31,6 @@ * SOFTWARE. */ -#include <linux/platform_device.h> #include <linux/vmalloc.h> #include <rdma/ib_umem.h> #include "hns_roce_device.h" @@ -48,24 +47,6 @@ unsigned long key_to_hw_index(u32 key) return (key << 24) | (key >> 8); } -static int hns_roce_hw_create_mpt(struct hns_roce_dev *hr_dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long mpt_index) -{ - return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0, - HNS_ROCE_CMD_CREATE_MPT, - HNS_ROCE_CMD_TIMEOUT_MSECS); -} - -int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long mpt_index) -{ - return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0, - mpt_index, !mailbox, HNS_ROCE_CMD_DESTROY_MPT, - HNS_ROCE_CMD_TIMEOUT_MSECS); -} - static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { struct hns_roce_ida *mtpt_ida = &hr_dev->mr_table.mtpt_ida; @@ -81,7 +62,7 @@ static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) return -ENOMEM; } - mr->key = hw_index_to_key(id); /* MR key */ + mr->key = hw_index_to_key(id); /* MR key */ err = hns_roce_table_get(hr_dev, &hr_dev->mr_table.mtpt_table, (unsigned long)id); @@ -138,14 +119,13 @@ static void free_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr); } -static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, - struct hns_roce_mr *mr) +static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { struct ib_device *ibdev = &hr_dev->ib_dev; int ret; if (mr->enabled) { - ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, + ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT, key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1)); if (ret) @@ -167,14 +147,11 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, /* Allocate mailbox memory */ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) { - ret = PTR_ERR(mailbox); - return ret; - } + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); if (mr->type != MR_TYPE_FRMR) - ret = hr_dev->hw->write_mtpt(hr_dev, mailbox->buf, mr, - mtpt_idx); + ret = hr_dev->hw->write_mtpt(hr_dev, mailbox->buf, mr); else ret = hr_dev->hw->frmr_write_mtpt(hr_dev, mailbox->buf, mr); if (ret) { @@ -182,7 +159,7 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, goto err_page; } - ret = hns_roce_hw_create_mpt(hr_dev, mailbox, + ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_MPT, mtpt_idx & (hr_dev->caps.num_mtpts - 1)); if (ret) { dev_err(dev, "failed to create mpt, ret = %d.\n", ret); @@ -213,7 +190,7 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc) int ret; mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (mr == NULL) + if (!mr) return ERR_PTR(-ENOMEM); mr->type = MR_TYPE_DMA; @@ -272,7 +249,6 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err_alloc_pbl; mr->ibmr.rkey = mr->ibmr.lkey = mr->key; - mr->ibmr.length = length; return &mr->ibmr; @@ -305,13 +281,14 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, return ERR_CAST(mailbox); mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1); - ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, mtpt_idx, 0, - HNS_ROCE_CMD_QUERY_MPT, - HNS_ROCE_CMD_TIMEOUT_MSECS); + + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_MPT, + mtpt_idx); if (ret) goto free_cmd_mbox; - ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, mtpt_idx); + ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT, + mtpt_idx); if (ret) ibdev_warn(ib_dev, "failed to destroy MPT, ret = %d.\n", ret); @@ -341,7 +318,8 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, goto free_cmd_mbox; } - ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx); + ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_MPT, + mtpt_idx); if (ret) { ibdev_err(ib_dev, "failed to create MPT, ret = %d.\n", ret); goto free_cmd_mbox; @@ -361,16 +339,14 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); struct hns_roce_mr *mr = to_hr_mr(ibmr); - int ret = 0; - if (hr_dev->hw->dereg_mr) { - ret = hr_dev->hw->dereg_mr(hr_dev, mr, udata); - } else { - hns_roce_mr_free(hr_dev, mr); - kfree(mr); - } + if (hr_dev->hw->dereg_mr) + hr_dev->hw->dereg_mr(hr_dev); - return ret; + hns_roce_mr_free(hr_dev, mr); + kfree(mr); + + return 0; } struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, @@ -486,7 +462,7 @@ static void hns_roce_mw_free(struct hns_roce_dev *hr_dev, int ret; if (mw->enabled) { - ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, + ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT, key_to_hw_index(mw->rkey) & (hr_dev->caps.num_mtpts - 1)); if (ret) @@ -526,7 +502,7 @@ static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev, goto err_page; } - ret = hns_roce_hw_create_mpt(hr_dev, mailbox, + ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_MPT, mtpt_idx & (hr_dev->caps.num_mtpts - 1)); if (ret) { dev_err(dev, "MW CREATE_MPT failed (%d)\n", ret); @@ -609,15 +585,12 @@ static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, while (offset < end && npage < max_count) { count = 0; mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, - offset, &count, NULL); + offset, &count); if (!mtts) return -ENOBUFS; for (i = 0; i < count && npage < max_count; i++) { - if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) - addr = to_hr_hw_page_addr(pages[npage]); - else - addr = pages[npage]; + addr = pages[npage]; mtts[i] = cpu_to_le64(addr); npage++; @@ -824,11 +797,11 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, } int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr) + u32 offset, u64 *mtt_buf, int mtt_max, u64 *base_addr) { struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; int mtt_count, left; - int start_index; + u32 start_index; int total = 0; __le64 *mtts; u32 npage; @@ -847,10 +820,7 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, continue; addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT); - if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) - mtt_buf[total] = to_hr_hw_page_addr(addr); - else - mtt_buf[total] = addr; + mtt_buf[total] = addr; total++; } @@ -864,7 +834,7 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, mtt_count = 0; mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, start_index + total, - &mtt_count, NULL); + &mtt_count); if (!mtts || !mtt_count) goto done; @@ -884,10 +854,10 @@ done: static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev, struct hns_roce_buf_attr *attr, struct hns_roce_hem_cfg *cfg, - unsigned int *buf_page_shift, int unalinged_size) + unsigned int *buf_page_shift, u64 unalinged_size) { struct hns_roce_buf_region *r; - int first_region_padding; + u64 first_region_padding; int page_cnt, region_cnt; unsigned int page_shift; size_t buf_size; diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 81ffad77ae42..783e71852c50 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -30,7 +30,6 @@ * SOFTWARE. */ -#include <linux/platform_device.h> #include <linux/pci.h> #include "hns_roce_device.h" @@ -86,7 +85,6 @@ int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) { struct hns_roce_ida *uar_ida = &hr_dev->uar_ida; - struct resource *res; int id; /* Using bitmap to manager UAR index */ @@ -104,18 +102,9 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) else uar->index = 0; - if (!dev_is_pci(hr_dev->dev)) { - res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0); - if (!res) { - ida_free(&uar_ida->ida, id); - dev_err(&hr_dev->pdev->dev, "memory resource not found!\n"); - return -EINVAL; - } - uar->pfn = ((res->start) >> PAGE_SHIFT) + uar->index; - } else { - uar->pfn = ((pci_resource_start(hr_dev->pci_dev, 2)) - >> PAGE_SHIFT); - } + uar->pfn = ((pci_resource_start(hr_dev->pci_dev, 2)) >> PAGE_SHIFT); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DIRECT_WQE) + hr_dev->dwqe_page = pci_resource_start(hr_dev->pci_dev, 4); return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 9af4509894e6..f0bd82a18069 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -32,7 +32,6 @@ */ #include <linux/pci.h> -#include <linux/platform_device.h> #include <rdma/ib_addr.h> #include <rdma/ib_umem.h> #include <rdma/uverbs_ioctl.h> @@ -57,7 +56,7 @@ static void flush_work_handle(struct work_struct *work) if (test_and_clear_bit(HNS_ROCE_FLUSH_FLAG, &hr_qp->flush_flag)) { ret = hns_roce_modify_qp(&hr_qp->ibqp, &attr, attr_mask, NULL); if (ret) - dev_err(dev, "Modify QP to error state failed(%d) during CQE flush\n", + dev_err(dev, "modify QP to error state failed(%d) during CQE flush\n", ret); } @@ -106,16 +105,15 @@ void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type) xa_unlock(&hr_dev->qp_table_xa); if (!qp) { - dev_warn(dev, "Async event for bogus QP %08x\n", qpn); + dev_warn(dev, "async event for bogus QP %08x\n", qpn); return; } - if (hr_dev->hw_rev != HNS_ROCE_HW_VER1 && - (event_type == HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR || - event_type == HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR || - event_type == HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR || - event_type == HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION || - event_type == HNS_ROCE_EVENT_TYPE_INVALID_XRCETH)) { + if (event_type == HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR || + event_type == HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR || + event_type == HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR || + event_type == HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION || + event_type == HNS_ROCE_EVENT_TYPE_INVALID_XRCETH) { qp->state = IB_QPS_ERR; flush_cqe(hr_dev, qp); @@ -219,14 +217,7 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) int ret; if (hr_qp->ibqp.qp_type == IB_QPT_GSI) { - /* when hw version is v1, the sqpn is allocated */ - if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) - num = HNS_ROCE_MAX_PORTS + - hr_dev->iboe.phy_port[hr_qp->port]; - else - num = 1; - - hr_qp->doorbell_qpn = 1; + num = 1; } else { mutex_lock(&qp_table->bank_mutex); bankid = get_least_load_bankid_for_qp(qp_table->bank); @@ -242,8 +233,6 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) qp_table->bank[bankid].inuse++; mutex_unlock(&qp_table->bank_mutex); - - hr_qp->doorbell_qpn = (u32)num; } hr_qp->qpn = num; @@ -251,26 +240,6 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) return 0; } -enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state) -{ - switch (state) { - case IB_QPS_RESET: - return HNS_ROCE_QP_STATE_RST; - case IB_QPS_INIT: - return HNS_ROCE_QP_STATE_INIT; - case IB_QPS_RTR: - return HNS_ROCE_QP_STATE_RTR; - case IB_QPS_RTS: - return HNS_ROCE_QP_STATE_RTS; - case IB_QPS_SQD: - return HNS_ROCE_QP_STATE_SQD; - case IB_QPS_ERR: - return HNS_ROCE_QP_STATE_ERR; - default: - return HNS_ROCE_QP_NUM_STATE; - } -} - static void add_qp_to_list(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_cq *send_cq, struct ib_cq *recv_cq) @@ -306,7 +275,7 @@ static int hns_roce_qp_store(struct hns_roce_dev *hr_dev, ret = xa_err(xa_store_irq(xa, hr_qp->qpn, hr_qp, GFP_KERNEL)); if (ret) - dev_err(hr_dev->dev, "Failed to xa store for QPC\n"); + dev_err(hr_dev->dev, "failed to xa store for QPC\n"); else /* add QP to device's QP list for softwc */ add_qp_to_list(hr_dev, hr_qp, init_attr->send_cq, @@ -324,22 +293,17 @@ static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) if (!hr_qp->qpn) return -EINVAL; - /* In v1 engine, GSI QP context is saved in the RoCE hw's register */ - if (hr_qp->ibqp.qp_type == IB_QPT_GSI && - hr_dev->hw_rev == HNS_ROCE_HW_VER1) - return 0; - /* Alloc memory for QPC */ ret = hns_roce_table_get(hr_dev, &qp_table->qp_table, hr_qp->qpn); if (ret) { - dev_err(dev, "Failed to get QPC table\n"); + dev_err(dev, "failed to get QPC table\n"); goto err_out; } /* Alloc memory for IRRL */ ret = hns_roce_table_get(hr_dev, &qp_table->irrl_table, hr_qp->qpn); if (ret) { - dev_err(dev, "Failed to get IRRL table\n"); + dev_err(dev, "failed to get IRRL table\n"); goto err_put_qp; } @@ -348,7 +312,7 @@ static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) ret = hns_roce_table_get(hr_dev, &qp_table->trrl_table, hr_qp->qpn); if (ret) { - dev_err(dev, "Failed to get TRRL table\n"); + dev_err(dev, "failed to get TRRL table\n"); goto err_put_irrl; } } @@ -358,7 +322,7 @@ static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table, hr_qp->qpn); if (ret) { - dev_err(dev, "Failed to get SCC CTX table\n"); + dev_err(dev, "failed to get SCC CTX table\n"); goto err_put_trrl; } } @@ -379,6 +343,11 @@ err_out: return ret; } +static void qp_user_mmap_entry_remove(struct hns_roce_qp *hr_qp) +{ + rdma_user_mmap_entry_remove(&hr_qp->dwqe_mmap_entry->rdma_entry); +} + void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { struct xarray *xa = &hr_dev->qp_table_xa; @@ -402,11 +371,6 @@ static void free_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; - /* In v1 engine, GSI QP context is saved in the RoCE hw's register */ - if (hr_qp->ibqp.qp_type == IB_QPT_GSI && - hr_dev->hw_rev == HNS_ROCE_HW_VER1) - return; - if (hr_dev->caps.trrl_entry_sz) hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn); hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn); @@ -495,11 +459,8 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge) + hr_qp->rq.rsv_sge); - if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE) - hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz); - else - hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz * - hr_qp->rq.max_gs); + hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz * + hr_qp->rq.max_gs); hr_qp->rq.wqe_cnt = cnt; if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE && @@ -535,11 +496,6 @@ static void set_ext_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; - if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) { - hr_qp->sq.max_gs = HNS_ROCE_SGE_IN_WQE; - return; - } - hr_qp->sq.max_gs = max(1U, cap->max_send_sge); wqe_sge_cnt = get_wqe_ext_sge_cnt(hr_qp); @@ -780,7 +736,11 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, goto err_inline; } + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DIRECT_WQE) + hr_qp->en_flags |= HNS_ROCE_QP_CAP_DIRECT_WQE; + return 0; + err_inline: free_rq_inline_buf(hr_qp); @@ -822,6 +782,35 @@ static inline bool kernel_qp_has_rdb(struct hns_roce_dev *hr_dev, hns_roce_qp_has_rq(init_attr)); } +static int qp_mmap_entry(struct hns_roce_qp *hr_qp, + struct hns_roce_dev *hr_dev, + struct ib_udata *udata, + struct hns_roce_ib_create_qp_resp *resp) +{ + struct hns_roce_ucontext *uctx = + rdma_udata_to_drv_context(udata, + struct hns_roce_ucontext, ibucontext); + struct rdma_user_mmap_entry *rdma_entry; + u64 address; + + address = hr_dev->dwqe_page + hr_qp->qpn * HNS_ROCE_DWQE_SIZE; + + hr_qp->dwqe_mmap_entry = + hns_roce_user_mmap_entry_insert(&uctx->ibucontext, address, + HNS_ROCE_DWQE_SIZE, + HNS_ROCE_MMAP_TYPE_DWQE); + + if (!hr_qp->dwqe_mmap_entry) { + ibdev_err(&hr_dev->ib_dev, "failed to get dwqe mmap entry.\n"); + return -ENOMEM; + } + + rdma_entry = &hr_qp->dwqe_mmap_entry->rdma_entry; + resp->dwqe_mmap_key = rdma_user_mmap_get_offset(rdma_entry); + + return 0; +} + static int alloc_user_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_qp_init_attr *init_attr, @@ -909,10 +898,16 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, hr_qp->en_flags |= HNS_ROCE_QP_CAP_OWNER_DB; if (udata) { + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE) { + ret = qp_mmap_entry(hr_qp, hr_dev, udata, resp); + if (ret) + return ret; + } + ret = alloc_user_qp_db(hr_dev, hr_qp, init_attr, udata, ucmd, resp); if (ret) - return ret; + goto err_remove_qp; } else { ret = alloc_kernel_qp_db(hr_dev, hr_qp, init_attr); if (ret) @@ -920,6 +915,12 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, } return 0; + +err_remove_qp: + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE) + qp_user_mmap_entry_remove(hr_qp); + + return ret; } static void free_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, @@ -933,6 +934,8 @@ static void free_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, hns_roce_db_unmap_user(uctx, &hr_qp->rdb); if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) hns_roce_db_unmap_user(uctx, &hr_qp->sdb); + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE) + qp_user_mmap_entry_remove(hr_qp); } else { if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB) hns_roce_free_db(hr_dev, &hr_qp->rdb); @@ -1158,7 +1161,7 @@ static int check_qp_type(struct hns_roce_dev *hr_dev, enum ib_qp_type type, goto out; break; case IB_QPT_UD: - if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 && + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 && is_user) goto out; break; @@ -1200,7 +1203,7 @@ int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr, ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, hr_qp); if (ret) - ibdev_err(ibdev, "Create QP type 0x%x failed(%d)\n", + ibdev_err(ibdev, "create QP type 0x%x failed(%d)\n", init_attr->qp_type, ret); return ret; @@ -1391,7 +1394,7 @@ void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, } } -static inline void *get_wqe(struct hns_roce_qp *hr_qp, int offset) +static inline void *get_wqe(struct hns_roce_qp *hr_qp, u32 offset) { return hns_roce_buf_offset(hr_qp->mtr.kmem, offset); } diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 259444c0a630..989a2af2e938 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -9,112 +9,223 @@ #include "hns_roce_device.h" #include "hns_roce_hw_v2.h" -static int hns_roce_fill_cq(struct sk_buff *msg, - struct hns_roce_v2_cq_context *context) +#define MAX_ENTRY_NUM 256 + +int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq) { - if (rdma_nl_put_driver_u32(msg, "state", - roce_get_field(context->byte_4_pg_ceqn, - V2_CQC_BYTE_4_ARM_ST_M, - V2_CQC_BYTE_4_ARM_ST_S))) + struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); + struct nlattr *table_attr; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + return -EMSGSIZE; + + if (rdma_nl_put_driver_u32(msg, "cq_depth", hr_cq->cq_depth)) goto err; - if (rdma_nl_put_driver_u32(msg, "ceqn", - roce_get_field(context->byte_4_pg_ceqn, - V2_CQC_BYTE_4_CEQN_M, - V2_CQC_BYTE_4_CEQN_S))) + if (rdma_nl_put_driver_u32(msg, "cons_index", hr_cq->cons_index)) goto err; - if (rdma_nl_put_driver_u32(msg, "cqn", - roce_get_field(context->byte_8_cqn, - V2_CQC_BYTE_8_CQN_M, - V2_CQC_BYTE_8_CQN_S))) + if (rdma_nl_put_driver_u32(msg, "cqe_size", hr_cq->cqe_size)) goto err; - if (rdma_nl_put_driver_u32(msg, "hopnum", - roce_get_field(context->byte_16_hop_addr, - V2_CQC_BYTE_16_CQE_HOP_NUM_M, - V2_CQC_BYTE_16_CQE_HOP_NUM_S))) + if (rdma_nl_put_driver_u32(msg, "arm_sn", hr_cq->arm_sn)) goto err; - if (rdma_nl_put_driver_u32( - msg, "pi", - roce_get_field(context->byte_28_cq_pi, - V2_CQC_BYTE_28_CQ_PRODUCER_IDX_M, - V2_CQC_BYTE_28_CQ_PRODUCER_IDX_S))) + nla_nest_end(msg, table_attr); + + return 0; + +err: + nla_nest_cancel(msg, table_attr); + + return -EMSGSIZE; +} + +int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); + struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); + struct hns_roce_v2_cq_context context; + u32 data[MAX_ENTRY_NUM] = {}; + int offset = 0; + int ret; + + if (!hr_dev->hw->query_cqc) + return -EINVAL; + + ret = hr_dev->hw->query_cqc(hr_dev, hr_cq->cqn, &context); + if (ret) + return -EINVAL; + + data[offset++] = hr_reg_read(&context, CQC_CQ_ST); + data[offset++] = hr_reg_read(&context, CQC_SHIFT); + data[offset++] = hr_reg_read(&context, CQC_CQE_SIZE); + data[offset++] = hr_reg_read(&context, CQC_CQE_CNT); + data[offset++] = hr_reg_read(&context, CQC_CQ_PRODUCER_IDX); + data[offset++] = hr_reg_read(&context, CQC_CQ_CONSUMER_IDX); + data[offset++] = hr_reg_read(&context, CQC_DB_RECORD_EN); + data[offset++] = hr_reg_read(&context, CQC_ARM_ST); + data[offset++] = hr_reg_read(&context, CQC_CMD_SN); + data[offset++] = hr_reg_read(&context, CQC_CEQN); + data[offset++] = hr_reg_read(&context, CQC_CQ_MAX_CNT); + data[offset++] = hr_reg_read(&context, CQC_CQ_PERIOD); + data[offset++] = hr_reg_read(&context, CQC_CQE_HOP_NUM); + data[offset++] = hr_reg_read(&context, CQC_CQE_BAR_PG_SZ); + data[offset++] = hr_reg_read(&context, CQC_CQE_BUF_PG_SZ); + + ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, offset * sizeof(u32), data); + + return ret; +} + +int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp) +{ + struct hns_roce_qp *hr_qp = to_hr_qp(ib_qp); + struct nlattr *table_attr; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + return -EMSGSIZE; + + if (rdma_nl_put_driver_u32_hex(msg, "sq_wqe_cnt", hr_qp->sq.wqe_cnt)) goto err; - if (rdma_nl_put_driver_u32( - msg, "ci", - roce_get_field(context->byte_32_cq_ci, - V2_CQC_BYTE_32_CQ_CONSUMER_IDX_M, - V2_CQC_BYTE_32_CQ_CONSUMER_IDX_S))) + if (rdma_nl_put_driver_u32_hex(msg, "sq_max_gs", hr_qp->sq.max_gs)) goto err; - if (rdma_nl_put_driver_u32( - msg, "coalesce", - roce_get_field(context->byte_56_cqe_period_maxcnt, - V2_CQC_BYTE_56_CQ_MAX_CNT_M, - V2_CQC_BYTE_56_CQ_MAX_CNT_S))) + if (rdma_nl_put_driver_u32_hex(msg, "rq_wqe_cnt", hr_qp->rq.wqe_cnt)) goto err; - if (rdma_nl_put_driver_u32( - msg, "period", - roce_get_field(context->byte_56_cqe_period_maxcnt, - V2_CQC_BYTE_56_CQ_PERIOD_M, - V2_CQC_BYTE_56_CQ_PERIOD_S))) + if (rdma_nl_put_driver_u32_hex(msg, "rq_max_gs", hr_qp->rq.max_gs)) goto err; - if (rdma_nl_put_driver_u32(msg, "cnt", - roce_get_field(context->byte_52_cqe_cnt, - V2_CQC_BYTE_52_CQE_CNT_M, - V2_CQC_BYTE_52_CQE_CNT_S))) + if (rdma_nl_put_driver_u32_hex(msg, "ext_sge_sge_cnt", hr_qp->sge.sge_cnt)) goto err; + nla_nest_end(msg, table_attr); + return 0; err: + nla_nest_cancel(msg, table_attr); + return -EMSGSIZE; } -int hns_roce_fill_res_cq_entry(struct sk_buff *msg, - struct ib_cq *ib_cq) +int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp) { - struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); - struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); - struct hns_roce_v2_cq_context *context; - struct nlattr *table_attr; + struct hns_roce_dev *hr_dev = to_hr_dev(ib_qp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ib_qp); + struct hns_roce_v2_qp_context context; + u32 data[MAX_ENTRY_NUM] = {}; + int offset = 0; int ret; - if (!hr_dev->dfx->query_cqc_info) + if (!hr_dev->hw->query_qpc) return -EINVAL; - context = kzalloc(sizeof(struct hns_roce_v2_cq_context), GFP_KERNEL); - if (!context) - return -ENOMEM; - - ret = hr_dev->dfx->query_cqc_info(hr_dev, hr_cq->cqn, (int *)context); + ret = hr_dev->hw->query_qpc(hr_dev, hr_qp->qpn, &context); if (ret) - goto err; + return -EINVAL; + + data[offset++] = hr_reg_read(&context, QPC_QP_ST); + data[offset++] = hr_reg_read(&context, QPC_ERR_TYPE); + data[offset++] = hr_reg_read(&context, QPC_CHECK_FLG); + data[offset++] = hr_reg_read(&context, QPC_SRQ_EN); + data[offset++] = hr_reg_read(&context, QPC_SRQN); + data[offset++] = hr_reg_read(&context, QPC_QKEY_XRCD); + data[offset++] = hr_reg_read(&context, QPC_TX_CQN); + data[offset++] = hr_reg_read(&context, QPC_RX_CQN); + data[offset++] = hr_reg_read(&context, QPC_SQ_PRODUCER_IDX); + data[offset++] = hr_reg_read(&context, QPC_SQ_CONSUMER_IDX); + data[offset++] = hr_reg_read(&context, QPC_RQ_RECORD_EN); + data[offset++] = hr_reg_read(&context, QPC_RQ_PRODUCER_IDX); + data[offset++] = hr_reg_read(&context, QPC_RQ_CONSUMER_IDX); + data[offset++] = hr_reg_read(&context, QPC_SQ_SHIFT); + data[offset++] = hr_reg_read(&context, QPC_RQWS); + data[offset++] = hr_reg_read(&context, QPC_RQ_SHIFT); + data[offset++] = hr_reg_read(&context, QPC_SGE_SHIFT); + data[offset++] = hr_reg_read(&context, QPC_SQ_HOP_NUM); + data[offset++] = hr_reg_read(&context, QPC_RQ_HOP_NUM); + data[offset++] = hr_reg_read(&context, QPC_SGE_HOP_NUM); + data[offset++] = hr_reg_read(&context, QPC_WQE_SGE_BA_PG_SZ); + data[offset++] = hr_reg_read(&context, QPC_WQE_SGE_BUF_PG_SZ); + data[offset++] = hr_reg_read(&context, QPC_RETRY_NUM_INIT); + data[offset++] = hr_reg_read(&context, QPC_RETRY_CNT); + data[offset++] = hr_reg_read(&context, QPC_SQ_CUR_PSN); + data[offset++] = hr_reg_read(&context, QPC_SQ_MAX_PSN); + data[offset++] = hr_reg_read(&context, QPC_SQ_FLUSH_IDX); + data[offset++] = hr_reg_read(&context, QPC_SQ_MAX_IDX); + data[offset++] = hr_reg_read(&context, QPC_SQ_TX_ERR); + data[offset++] = hr_reg_read(&context, QPC_SQ_RX_ERR); + data[offset++] = hr_reg_read(&context, QPC_RQ_RX_ERR); + data[offset++] = hr_reg_read(&context, QPC_RQ_TX_ERR); + data[offset++] = hr_reg_read(&context, QPC_RQ_CQE_IDX); + data[offset++] = hr_reg_read(&context, QPC_RQ_RTY_TX_ERR); + + ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, offset * sizeof(u32), data); + + return ret; +} + +int hns_roce_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr) +{ + struct hns_roce_mr *hr_mr = to_hr_mr(ib_mr); + struct nlattr *table_attr; table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); - if (!table_attr) { - ret = -EMSGSIZE; + if (!table_attr) + return -EMSGSIZE; + + if (rdma_nl_put_driver_u32_hex(msg, "pbl_hop_num", hr_mr->pbl_hop_num)) + goto err; + + if (rdma_nl_put_driver_u32_hex(msg, "ba_pg_shift", + hr_mr->pbl_mtr.hem_cfg.ba_pg_shift)) goto err; - } - if (hns_roce_fill_cq(msg, context)) { - ret = -EMSGSIZE; - goto err_cancel_table; - } + if (rdma_nl_put_driver_u32_hex(msg, "buf_pg_shift", + hr_mr->pbl_mtr.hem_cfg.buf_pg_shift)) + goto err; nla_nest_end(msg, table_attr); - kfree(context); return 0; -err_cancel_table: - nla_nest_cancel(msg, table_attr); err: - kfree(context); + nla_nest_cancel(msg, table_attr); + + return -EMSGSIZE; +} + +int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_mr->device); + struct hns_roce_mr *hr_mr = to_hr_mr(ib_mr); + struct hns_roce_v2_mpt_entry context; + u32 data[MAX_ENTRY_NUM] = {}; + int offset = 0; + int ret; + + if (!hr_dev->hw->query_mpt) + return -EINVAL; + + ret = hr_dev->hw->query_mpt(hr_dev, hr_mr->key, &context); + if (ret) + return -EINVAL; + + data[offset++] = hr_reg_read(&context, MPT_ST); + data[offset++] = hr_reg_read(&context, MPT_PD); + data[offset++] = hr_reg_read(&context, MPT_LKEY); + data[offset++] = hr_reg_read(&context, MPT_LEN_L); + data[offset++] = hr_reg_read(&context, MPT_LEN_H); + data[offset++] = hr_reg_read(&context, MPT_PBL_SIZE); + data[offset++] = hr_reg_read(&context, MPT_PBL_HOP_NUM); + data[offset++] = hr_reg_read(&context, MPT_PBL_BA_PG_SZ); + data[offset++] = hr_reg_read(&context, MPT_PBL_BUF_PG_SZ); + + ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, offset * sizeof(u32), data); + return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 6eee9deadd12..8dae98f827eb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -59,58 +59,39 @@ static void hns_roce_ib_srq_event(struct hns_roce_srq *srq, } } -static int hns_roce_hw_create_srq(struct hns_roce_dev *dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long srq_num) +static int alloc_srqn(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { - return hns_roce_cmd_mbox(dev, mailbox->dma, 0, srq_num, 0, - HNS_ROCE_CMD_CREATE_SRQ, - HNS_ROCE_CMD_TIMEOUT_MSECS); -} - -static int hns_roce_hw_destroy_srq(struct hns_roce_dev *dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long srq_num) -{ - return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, srq_num, - mailbox ? 0 : 1, HNS_ROCE_CMD_DESTROY_SRQ, - HNS_ROCE_CMD_TIMEOUT_MSECS); -} - -static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) -{ - struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; struct hns_roce_ida *srq_ida = &hr_dev->srq_table.srq_ida; - struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_cmd_mailbox *mailbox; - int ret; int id; id = ida_alloc_range(&srq_ida->ida, srq_ida->min, srq_ida->max, GFP_KERNEL); if (id < 0) { - ibdev_err(ibdev, "failed to alloc srq(%d).\n", id); + ibdev_err(&hr_dev->ib_dev, "failed to alloc srq(%d).\n", id); return -ENOMEM; } - srq->srqn = (unsigned long)id; - ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn); - if (ret) { - ibdev_err(ibdev, "failed to get SRQC table, ret = %d.\n", ret); - goto err_out; - } + srq->srqn = id; - ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL)); - if (ret) { - ibdev_err(ibdev, "failed to store SRQC, ret = %d.\n", ret); - goto err_put; - } + return 0; +} + +static void free_srqn(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) +{ + ida_free(&hr_dev->srq_table.srq_ida.ida, (int)srq->srqn); +} + +static int hns_roce_create_srqc(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_cmd_mailbox *mailbox; + int ret; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR_OR_NULL(mailbox)) { + if (IS_ERR(mailbox)) { ibdev_err(ibdev, "failed to alloc mailbox for SRQC.\n"); - ret = -ENOMEM; - goto err_xa; + return PTR_ERR(mailbox); } ret = hr_dev->hw->write_srqc(srq, mailbox->buf); @@ -119,24 +100,44 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) goto err_mbox; } - ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn); - if (ret) { + ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_SRQ, + srq->srqn); + if (ret) ibdev_err(ibdev, "failed to config SRQC, ret = %d.\n", ret); - goto err_mbox; - } +err_mbox: hns_roce_free_cmd_mailbox(hr_dev, mailbox); + return ret; +} + +static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) +{ + struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; + struct ib_device *ibdev = &hr_dev->ib_dev; + int ret; + + ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn); + if (ret) { + ibdev_err(ibdev, "failed to get SRQC table, ret = %d.\n", ret); + return ret; + } + + ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL)); + if (ret) { + ibdev_err(ibdev, "failed to store SRQC, ret = %d.\n", ret); + goto err_put; + } + + ret = hns_roce_create_srqc(hr_dev, srq); + if (ret) + goto err_xa; return 0; -err_mbox: - hns_roce_free_cmd_mailbox(hr_dev, mailbox); err_xa: xa_erase(&srq_table->xa, srq->srqn); err_put: hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn); -err_out: - ida_free(&srq_ida->ida, id); return ret; } @@ -146,7 +147,8 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; int ret; - ret = hns_roce_hw_destroy_srq(hr_dev, NULL, srq->srqn); + ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_SRQ, + srq->srqn); if (ret) dev_err(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n", ret, srq->srqn); @@ -158,7 +160,6 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) wait_for_completion(&srq->free); hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn); - ida_free(&srq_table->srq_ida.ida, (int)srq->srqn); } static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, @@ -259,7 +260,7 @@ static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) static void free_srq_wrid(struct hns_roce_srq *srq) { - kfree(srq->wrid); + kvfree(srq->wrid); srq->wrid = NULL; } @@ -406,10 +407,14 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, if (ret) return ret; - ret = alloc_srqc(hr_dev, srq); + ret = alloc_srqn(hr_dev, srq); if (ret) goto err_srq_buf; + ret = alloc_srqc(hr_dev, srq); + if (ret) + goto err_srqn; + if (udata) { resp.srqn = srq->srqn; if (ib_copy_to_udata(udata, &resp, @@ -428,6 +433,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, err_srqc: free_srqc(hr_dev, srq); +err_srqn: + free_srqn(hr_dev, srq); err_srq_buf: free_srq_buf(hr_dev, srq); @@ -440,6 +447,7 @@ int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) struct hns_roce_srq *srq = to_hr_srq(ibsrq); free_srqc(hr_dev, srq); + free_srqn(hr_dev, srq); free_srq_buf(hr_dev, srq); return 0; } diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 6dea0a49d171..7b086fe63a24 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -1477,12 +1477,13 @@ irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, u16 dst_port, list_for_each_entry (listen_node, &cm_core->listen_list, list) { memcpy(listen_addr, listen_node->loc_addr, sizeof(listen_addr)); listen_port = listen_node->loc_port; + if (listen_port != dst_port || + !(listener_state & listen_node->listener_state)) + continue; /* compare node pair, return node handle if a match */ - if ((!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) || - !memcmp(listen_addr, ip_zero, sizeof(listen_addr))) && - listen_port == dst_port && - vlan_id == listen_node->vlan_id && - (listener_state & listen_node->listener_state)) { + if (!memcmp(listen_addr, ip_zero, sizeof(listen_addr)) || + (!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) && + vlan_id == listen_node->vlan_id)) { refcount_inc(&listen_node->refcnt); spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); @@ -1501,15 +1502,14 @@ irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, u16 dst_port, * @cm_info: CM info for parent listen node * @cm_parent_listen_node: The parent listen node */ -static enum irdma_status_code -irdma_del_multiple_qhash(struct irdma_device *iwdev, - struct irdma_cm_info *cm_info, - struct irdma_cm_listener *cm_parent_listen_node) +static int irdma_del_multiple_qhash(struct irdma_device *iwdev, + struct irdma_cm_info *cm_info, + struct irdma_cm_listener *cm_parent_listen_node) { struct irdma_cm_listener *child_listen_node; - enum irdma_status_code ret = IRDMA_ERR_CFG; struct list_head *pos, *tpos; unsigned long flags; + int ret = -EINVAL; spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags); list_for_each_safe (pos, tpos, @@ -1618,16 +1618,16 @@ u16 irdma_get_vlan_ipv4(u32 *addr) * Adds a qhash and a child listen node for every IPv6 address * on the adapter and adds the associated qhash filter */ -static enum irdma_status_code -irdma_add_mqh_6(struct irdma_device *iwdev, struct irdma_cm_info *cm_info, - struct irdma_cm_listener *cm_parent_listen_node) +static int irdma_add_mqh_6(struct irdma_device *iwdev, + struct irdma_cm_info *cm_info, + struct irdma_cm_listener *cm_parent_listen_node) { struct net_device *ip_dev; struct inet6_dev *idev; struct inet6_ifaddr *ifp, *tmp; - enum irdma_status_code ret = 0; struct irdma_cm_listener *child_listen_node; unsigned long flags; + int ret = 0; rtnl_lock(); for_each_netdev(&init_net, ip_dev) { @@ -1653,7 +1653,7 @@ irdma_add_mqh_6(struct irdma_device *iwdev, struct irdma_cm_info *cm_info, child_listen_node); if (!child_listen_node) { ibdev_dbg(&iwdev->ibdev, "CM: listener memory allocation\n"); - ret = IRDMA_ERR_NO_MEMORY; + ret = -ENOMEM; goto exit; } @@ -1700,16 +1700,16 @@ exit: * Adds a qhash and a child listen node for every IPv4 address * on the adapter and adds the associated qhash filter */ -static enum irdma_status_code -irdma_add_mqh_4(struct irdma_device *iwdev, struct irdma_cm_info *cm_info, - struct irdma_cm_listener *cm_parent_listen_node) +static int irdma_add_mqh_4(struct irdma_device *iwdev, + struct irdma_cm_info *cm_info, + struct irdma_cm_listener *cm_parent_listen_node) { struct net_device *ip_dev; struct in_device *idev; struct irdma_cm_listener *child_listen_node; - enum irdma_status_code ret = 0; unsigned long flags; const struct in_ifaddr *ifa; + int ret = 0; rtnl_lock(); for_each_netdev(&init_net, ip_dev) { @@ -1734,7 +1734,7 @@ irdma_add_mqh_4(struct irdma_device *iwdev, struct irdma_cm_info *cm_info, if (!child_listen_node) { ibdev_dbg(&iwdev->ibdev, "CM: listener memory allocation\n"); in_dev_put(idev); - ret = IRDMA_ERR_NO_MEMORY; + ret = -ENOMEM; goto exit; } @@ -1781,9 +1781,9 @@ exit: * @cm_info: CM info for parent listen node * @cm_listen_node: The parent listen node */ -static enum irdma_status_code -irdma_add_mqh(struct irdma_device *iwdev, struct irdma_cm_info *cm_info, - struct irdma_cm_listener *cm_listen_node) +static int irdma_add_mqh(struct irdma_device *iwdev, + struct irdma_cm_info *cm_info, + struct irdma_cm_listener *cm_listen_node) { if (cm_info->ipv4) return irdma_add_mqh_4(iwdev, cm_info, cm_listen_node); @@ -2200,7 +2200,7 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, /* set our node specific transport info */ cm_node->ipv4 = cm_info->ipv4; cm_node->vlan_id = cm_info->vlan_id; - if (cm_node->vlan_id >= VLAN_N_VID && iwdev->dcb) + if (cm_node->vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode) cm_node->vlan_id = 0; cm_node->tos = cm_info->tos; cm_node->user_pri = cm_info->user_pri; @@ -2209,8 +2209,12 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, ibdev_warn(&iwdev->ibdev, "application TOS[%d] and remote client TOS[%d] mismatch\n", listener->tos, cm_info->tos); - cm_node->tos = max(listener->tos, cm_info->tos); - cm_node->user_pri = rt_tos2priority(cm_node->tos); + if (iwdev->vsi.dscp_mode) { + cm_node->user_pri = listener->user_pri; + } else { + cm_node->tos = max(listener->tos, cm_info->tos); + cm_node->user_pri = rt_tos2priority(cm_node->tos); + } ibdev_dbg(&iwdev->ibdev, "DCB: listener: TOS:[%d] UP:[%d]\n", cm_node->tos, cm_node->user_pri); @@ -2305,10 +2309,8 @@ err: return NULL; } -static void irdma_cm_node_free_cb(struct rcu_head *rcu_head) +static void irdma_destroy_connection(struct irdma_cm_node *cm_node) { - struct irdma_cm_node *cm_node = - container_of(rcu_head, struct irdma_cm_node, rcu_head); struct irdma_cm_core *cm_core = cm_node->cm_core; struct irdma_qp *iwqp; struct irdma_cm_info nfo; @@ -2356,7 +2358,6 @@ static void irdma_cm_node_free_cb(struct rcu_head *rcu_head) } cm_core->cm_free_ah(cm_node); - kfree(cm_node); } /** @@ -2384,8 +2385,9 @@ void irdma_rem_ref_cm_node(struct irdma_cm_node *cm_node) spin_unlock_irqrestore(&cm_core->ht_lock, flags); - /* wait for all list walkers to exit their grace period */ - call_rcu(&cm_node->rcu_head, irdma_cm_node_free_cb); + irdma_destroy_connection(cm_node); + + kfree_rcu(cm_node, rcu_head); } /** @@ -3201,8 +3203,7 @@ static void irdma_cm_free_ah_nop(struct irdma_cm_node *cm_node) * @iwdev: iwarp device structure * @rdma_ver: HW version */ -enum irdma_status_code irdma_setup_cm_core(struct irdma_device *iwdev, - u8 rdma_ver) +int irdma_setup_cm_core(struct irdma_device *iwdev, u8 rdma_ver) { struct irdma_cm_core *cm_core = &iwdev->cm_core; @@ -3212,7 +3213,7 @@ enum irdma_status_code irdma_setup_cm_core(struct irdma_device *iwdev, /* Handles CM event work items send to Iwarp core */ cm_core->event_wq = alloc_ordered_workqueue("iwarp-event-wq", 0); if (!cm_core->event_wq) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; INIT_LIST_HEAD(&cm_core->listen_list); @@ -3244,15 +3245,10 @@ enum irdma_status_code irdma_setup_cm_core(struct irdma_device *iwdev, */ void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core) { - unsigned long flags; - if (!cm_core) return; - spin_lock_irqsave(&cm_core->ht_lock, flags); - if (timer_pending(&cm_core->tcp_timer)) - del_timer_sync(&cm_core->tcp_timer); - spin_unlock_irqrestore(&cm_core->ht_lock, flags); + del_timer_sync(&cm_core->tcp_timer); destroy_workqueue(cm_core->event_wq); cm_core->dev->ws_reset(&cm_core->iwdev->vsi); @@ -3465,12 +3461,6 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp) } cm_id = iwqp->cm_id; - /* make sure we havent already closed this connection */ - if (!cm_id) { - spin_unlock_irqrestore(&iwqp->lock, flags); - return; - } - original_hw_tcp_state = iwqp->hw_tcp_state; original_ibqp_state = iwqp->ibqp_state; last_ae = iwqp->last_aeq; @@ -3492,11 +3482,11 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp) disconn_status = -ECONNRESET; } - if ((original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED || - original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT || - last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE || - last_ae == IRDMA_AE_BAD_CLOSE || - last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset)) { + if (original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED || + original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT || + last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE || + last_ae == IRDMA_AE_BAD_CLOSE || + last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset || !cm_id) { issue_close = 1; iwqp->cm_id = NULL; qp->term_flags = 0; @@ -3835,7 +3825,11 @@ int irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_info.cm_id = cm_id; cm_info.qh_qpid = iwdev->vsi.ilq->qp_id; cm_info.tos = cm_id->tos; - cm_info.user_pri = rt_tos2priority(cm_id->tos); + if (iwdev->vsi.dscp_mode) + cm_info.user_pri = + iwqp->sc_qp.vsi->dscp_map[irdma_tos2dscp(cm_info.tos)]; + else + cm_info.user_pri = rt_tos2priority(cm_id->tos); if (iwqp->sc_qp.dev->ws_add(iwqp->sc_qp.vsi, cm_info.user_pri)) return -ENOMEM; @@ -3915,10 +3909,10 @@ int irdma_create_listen(struct iw_cm_id *cm_id, int backlog) struct irdma_device *iwdev; struct irdma_cm_listener *cm_listen_node; struct irdma_cm_info cm_info = {}; - enum irdma_status_code err; struct sockaddr_in *laddr; struct sockaddr_in6 *laddr6; bool wildcard = false; + int err; iwdev = to_iwdev(cm_id->device); if (!iwdev) @@ -3959,7 +3953,7 @@ int irdma_create_listen(struct iw_cm_id *cm_id, int backlog) } } - if (cm_info.vlan_id >= VLAN_N_VID && iwdev->dcb) + if (cm_info.vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode) cm_info.vlan_id = 0; cm_info.backlog = backlog; cm_info.cm_id = cm_id; @@ -3977,7 +3971,11 @@ int irdma_create_listen(struct iw_cm_id *cm_id, int backlog) cm_id->provider_data = cm_listen_node; cm_listen_node->tos = cm_id->tos; - cm_listen_node->user_pri = rt_tos2priority(cm_id->tos); + if (iwdev->vsi.dscp_mode) + cm_listen_node->user_pri = + iwdev->vsi.dscp_map[irdma_tos2dscp(cm_id->tos)]; + else + cm_listen_node->user_pri = rt_tos2priority(cm_id->tos); cm_info.user_pri = cm_listen_node->user_pri; if (!cm_listen_node->reused_node) { if (wildcard) { @@ -4234,10 +4232,6 @@ void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr, struct irdma_cm_node *cm_node; struct list_head teardown_list; struct ib_qp_attr attr; - struct irdma_sc_vsi *vsi = &iwdev->vsi; - struct irdma_sc_qp *sc_qp; - struct irdma_qp *qp; - int i; INIT_LIST_HEAD(&teardown_list); @@ -4254,52 +4248,6 @@ void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr, irdma_cm_disconn(cm_node->iwqp); irdma_rem_ref_cm_node(cm_node); } - if (!iwdev->roce_mode) - return; - - INIT_LIST_HEAD(&teardown_list); - for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) { - mutex_lock(&vsi->qos[i].qos_mutex); - list_for_each_safe (list_node, list_core_temp, - &vsi->qos[i].qplist) { - u32 qp_ip[4]; - - sc_qp = container_of(list_node, struct irdma_sc_qp, - list); - if (sc_qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_RC) - continue; - - qp = sc_qp->qp_uk.back_qp; - if (!disconnect_all) { - if (nfo->ipv4) - qp_ip[0] = qp->udp_info.local_ipaddr[3]; - else - memcpy(qp_ip, - &qp->udp_info.local_ipaddr[0], - sizeof(qp_ip)); - } - - if (disconnect_all || - (nfo->vlan_id == (qp->udp_info.vlan_tag & VLAN_VID_MASK) && - !memcmp(qp_ip, ipaddr, nfo->ipv4 ? 4 : 16))) { - spin_lock(&iwdev->rf->qptable_lock); - if (iwdev->rf->qp_table[sc_qp->qp_uk.qp_id]) { - irdma_qp_add_ref(&qp->ibqp); - list_add(&qp->teardown_entry, - &teardown_list); - } - spin_unlock(&iwdev->rf->qptable_lock); - } - } - mutex_unlock(&vsi->qos[i].qos_mutex); - } - - list_for_each_safe (list_node, list_core_temp, &teardown_list) { - qp = container_of(list_node, struct irdma_qp, teardown_entry); - attr.qp_state = IB_QPS_ERR; - irdma_modify_qp_roce(&qp->ibqp, &attr, IB_QP_STATE, NULL); - irdma_qp_rem_ref(&qp->ibqp); - } } /** @@ -4325,11 +4273,11 @@ static void irdma_qhash_ctrl(struct irdma_device *iwdev, struct list_head *child_listen_list = &parent_listen_node->child_listen_list; struct irdma_cm_listener *child_listen_node; struct list_head *pos, *tpos; - enum irdma_status_code err; bool node_allocated = false; enum irdma_quad_hash_manage_type op = ifup ? IRDMA_QHASH_MANAGE_TYPE_ADD : IRDMA_QHASH_MANAGE_TYPE_DELETE; + int err; list_for_each_safe (pos, tpos, child_listen_list) { child_listen_node = list_entry(pos, struct irdma_cm_listener, diff --git a/drivers/infiniband/hw/irdma/cm.h b/drivers/infiniband/hw/irdma/cm.h index 3bf42728e9b7..19c284975fc7 100644 --- a/drivers/infiniband/hw/irdma/cm.h +++ b/drivers/infiniband/hw/irdma/cm.h @@ -384,6 +384,13 @@ int irdma_schedule_cm_timer(struct irdma_cm_node *cm_node, struct irdma_puda_buf *sqbuf, enum irdma_timer_type type, int send_retrans, int close_when_complete); + +static inline u8 irdma_tos2dscp(u8 tos) +{ +#define IRDMA_DSCP_VAL GENMASK(7, 2) + return (u8)FIELD_GET(IRDMA_DSCP_VAL, tos); +} + int irdma_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int irdma_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); int irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index 7264f8c2f7d5..a41e0d21143a 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB /* Copyright (c) 2015 - 2021 Intel Corporation */ +#include <linux/etherdevice.h> + #include "osdep.h" -#include "status.h" #include "hmc.h" #include "defs.h" #include "type.h" @@ -68,6 +69,31 @@ void irdma_sc_suspend_resume_qps(struct irdma_sc_vsi *vsi, u8 op) } } +static void irdma_set_qos_info(struct irdma_sc_vsi *vsi, + struct irdma_l2params *l2p) +{ + u8 i; + + vsi->qos_rel_bw = l2p->vsi_rel_bw; + vsi->qos_prio_type = l2p->vsi_prio_type; + vsi->dscp_mode = l2p->dscp_mode; + if (l2p->dscp_mode) { + memcpy(vsi->dscp_map, l2p->dscp_map, sizeof(vsi->dscp_map)); + for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) + l2p->up2tc[i] = i; + } + for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) { + if (vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) + vsi->qos[i].qs_handle = l2p->qs_handle_list[i]; + vsi->qos[i].traffic_class = l2p->up2tc[i]; + vsi->qos[i].rel_bw = + l2p->tc_info[vsi->qos[i].traffic_class].rel_bw; + vsi->qos[i].prio_type = + l2p->tc_info[vsi->qos[i].traffic_class].prio_type; + vsi->qos[i].valid = false; + } +} + /** * irdma_change_l2params - given the new l2 parameters, change all qp * @vsi: RDMA VSI pointer @@ -86,6 +112,7 @@ void irdma_change_l2params(struct irdma_sc_vsi *vsi, return; vsi->tc_change_pending = false; + irdma_set_qos_info(vsi, l2params); irdma_sc_suspend_resume_qps(vsi, IRDMA_OP_RESUME); } @@ -152,17 +179,16 @@ void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_i * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_add_arp_cache_entry(struct irdma_sc_cqp *cqp, - struct irdma_add_arp_cache_entry_info *info, - u64 scratch, bool post_sq) +static int irdma_sc_add_arp_cache_entry(struct irdma_sc_cqp *cqp, + struct irdma_add_arp_cache_entry_info *info, + u64 scratch, bool post_sq) { __le64 *wqe; u64 hdr; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 8, info->reach_max); set_64bit_val(wqe, 16, ether_addr_to_u64(info->mac_addr)); @@ -190,16 +216,15 @@ irdma_sc_add_arp_cache_entry(struct irdma_sc_cqp *cqp, * @arp_index: arp index to delete arp entry * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_del_arp_cache_entry(struct irdma_sc_cqp *cqp, u64 scratch, - u16 arp_index, bool post_sq) +static int irdma_sc_del_arp_cache_entry(struct irdma_sc_cqp *cqp, u64 scratch, + u16 arp_index, bool post_sq) { __le64 *wqe; u64 hdr; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; hdr = arp_index | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MANAGE_ARP) | @@ -224,17 +249,16 @@ irdma_sc_del_arp_cache_entry(struct irdma_sc_cqp *cqp, u64 scratch, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_manage_apbvt_entry(struct irdma_sc_cqp *cqp, - struct irdma_apbvt_info *info, u64 scratch, - bool post_sq) +static int irdma_sc_manage_apbvt_entry(struct irdma_sc_cqp *cqp, + struct irdma_apbvt_info *info, + u64 scratch, bool post_sq) { __le64 *wqe; u64 hdr; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, info->port); @@ -272,7 +296,7 @@ irdma_sc_manage_apbvt_entry(struct irdma_sc_cqp *cqp, * quad hash entry in the hardware will point to iwarp's qp * number and requires no calls from the driver. */ -static enum irdma_status_code +static int irdma_sc_manage_qhash_table_entry(struct irdma_sc_cqp *cqp, struct irdma_qhash_table_info *info, u64 scratch, bool post_sq) @@ -285,7 +309,7 @@ irdma_sc_manage_qhash_table_entry(struct irdma_sc_cqp *cqp, wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 0, ether_addr_to_u64(info->mac_addr)); @@ -348,10 +372,9 @@ irdma_sc_manage_qhash_table_entry(struct irdma_sc_cqp *cqp, * @qp: sc qp * @info: initialization qp info */ -enum irdma_status_code irdma_sc_qp_init(struct irdma_sc_qp *qp, - struct irdma_qp_init_info *info) +int irdma_sc_qp_init(struct irdma_sc_qp *qp, struct irdma_qp_init_info *info) { - enum irdma_status_code ret_code; + int ret_code; u32 pble_obj_cnt; u16 wqe_size; @@ -359,7 +382,7 @@ enum irdma_status_code irdma_sc_qp_init(struct irdma_sc_qp *qp, info->pd->dev->hw_attrs.uk_attrs.max_hw_wq_frags || info->qp_uk_init_info.max_rq_frag_cnt > info->pd->dev->hw_attrs.uk_attrs.max_hw_wq_frags) - return IRDMA_ERR_INVALID_FRAG_COUNT; + return -EINVAL; qp->dev = info->pd->dev; qp->vsi = info->vsi; @@ -382,7 +405,7 @@ enum irdma_status_code irdma_sc_qp_init(struct irdma_sc_qp *qp, if ((info->virtual_map && info->sq_pa >= pble_obj_cnt) || (info->virtual_map && info->rq_pa >= pble_obj_cnt)) - return IRDMA_ERR_INVALID_PBLE_INDEX; + return -EINVAL; qp->llp_stream_handle = (void *)(-1); qp->hw_sq_size = irdma_get_encoded_wqe_size(qp->qp_uk.sq_ring.size, @@ -422,8 +445,8 @@ enum irdma_status_code irdma_sc_qp_init(struct irdma_sc_qp *qp, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -enum irdma_status_code irdma_sc_qp_create(struct irdma_sc_qp *qp, struct irdma_create_qp_info *info, - u64 scratch, bool post_sq) +int irdma_sc_qp_create(struct irdma_sc_qp *qp, struct irdma_create_qp_info *info, + u64 scratch, bool post_sq) { struct irdma_sc_cqp *cqp; __le64 *wqe; @@ -431,12 +454,12 @@ enum irdma_status_code irdma_sc_qp_create(struct irdma_sc_qp *qp, struct irdma_c cqp = qp->dev->cqp; if (qp->qp_uk.qp_id < cqp->dev->hw_attrs.min_hw_qp_id || - qp->qp_uk.qp_id > (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt - 1)) - return IRDMA_ERR_INVALID_QP_ID; + qp->qp_uk.qp_id >= cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt) + return -EINVAL; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, qp->hw_host_ctx_pa); set_64bit_val(wqe, 40, qp->shadow_area_pa); @@ -473,9 +496,8 @@ enum irdma_status_code irdma_sc_qp_create(struct irdma_sc_qp *qp, struct irdma_c * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -enum irdma_status_code irdma_sc_qp_modify(struct irdma_sc_qp *qp, - struct irdma_modify_qp_info *info, - u64 scratch, bool post_sq) +int irdma_sc_qp_modify(struct irdma_sc_qp *qp, struct irdma_modify_qp_info *info, + u64 scratch, bool post_sq) { __le64 *wqe; struct irdma_sc_cqp *cqp; @@ -486,7 +508,7 @@ enum irdma_status_code irdma_sc_qp_modify(struct irdma_sc_qp *qp, cqp = qp->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; if (info->next_iwarp_state == IRDMA_QP_STATE_TERMINATE) { if (info->dont_send_fin) @@ -544,9 +566,8 @@ enum irdma_status_code irdma_sc_qp_modify(struct irdma_sc_qp *qp, * @ignore_mw_bnd: memory window bind flag * @post_sq: flag for cqp db to ring */ -enum irdma_status_code irdma_sc_qp_destroy(struct irdma_sc_qp *qp, u64 scratch, - bool remove_hash_idx, bool ignore_mw_bnd, - bool post_sq) +int irdma_sc_qp_destroy(struct irdma_sc_qp *qp, u64 scratch, + bool remove_hash_idx, bool ignore_mw_bnd, bool post_sq) { __le64 *wqe; struct irdma_sc_cqp *cqp; @@ -555,7 +576,7 @@ enum irdma_status_code irdma_sc_qp_destroy(struct irdma_sc_qp *qp, u64 scratch, cqp = qp->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, qp->hw_host_ctx_pa); set_64bit_val(wqe, 40, qp->shadow_area_pa); @@ -737,16 +758,15 @@ void irdma_sc_qp_setctx_roce(struct irdma_sc_qp *qp, __le64 *qp_ctx, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_alloc_local_mac_entry(struct irdma_sc_cqp *cqp, u64 scratch, - bool post_sq) +static int irdma_sc_alloc_local_mac_entry(struct irdma_sc_cqp *cqp, u64 scratch, + bool post_sq) { __le64 *wqe; u64 hdr; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_ALLOCATE_LOC_MAC_TABLE_ENTRY) | @@ -772,17 +792,16 @@ irdma_sc_alloc_local_mac_entry(struct irdma_sc_cqp *cqp, u64 scratch, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_add_local_mac_entry(struct irdma_sc_cqp *cqp, - struct irdma_local_mac_entry_info *info, - u64 scratch, bool post_sq) +static int irdma_sc_add_local_mac_entry(struct irdma_sc_cqp *cqp, + struct irdma_local_mac_entry_info *info, + u64 scratch, bool post_sq) { __le64 *wqe; u64 header; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 32, ether_addr_to_u64(info->mac_addr)); @@ -811,16 +830,16 @@ irdma_sc_add_local_mac_entry(struct irdma_sc_cqp *cqp, * @ignore_ref_count: to force mac adde delete * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_del_local_mac_entry(struct irdma_sc_cqp *cqp, u64 scratch, - u16 entry_idx, u8 ignore_ref_count, bool post_sq) +static int irdma_sc_del_local_mac_entry(struct irdma_sc_cqp *cqp, u64 scratch, + u16 entry_idx, u8 ignore_ref_count, + bool post_sq) { __le64 *wqe; u64 header; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; header = FIELD_PREP(IRDMA_CQPSQ_MLM_TABLEIDX, entry_idx) | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MANAGE_LOC_MAC_TABLE) | @@ -1033,10 +1052,9 @@ void irdma_sc_qp_setctx(struct irdma_sc_qp *qp, __le64 *qp_ctx, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_alloc_stag(struct irdma_sc_dev *dev, - struct irdma_allocate_stag_info *info, u64 scratch, - bool post_sq) +static int irdma_sc_alloc_stag(struct irdma_sc_dev *dev, + struct irdma_allocate_stag_info *info, + u64 scratch, bool post_sq) { __le64 *wqe; struct irdma_sc_cqp *cqp; @@ -1053,7 +1071,7 @@ irdma_sc_alloc_stag(struct irdma_sc_dev *dev, cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 8, FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID) | @@ -1095,10 +1113,9 @@ irdma_sc_alloc_stag(struct irdma_sc_dev *dev, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev, - struct irdma_reg_ns_stag_info *info, u64 scratch, - bool post_sq) +static int irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev, + struct irdma_reg_ns_stag_info *info, + u64 scratch, bool post_sq) { __le64 *wqe; u64 fbo; @@ -1116,7 +1133,7 @@ irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev, else if (info->page_size == 0x1000) page_size = IRDMA_PAGE_SIZE_4K; else - return IRDMA_ERR_PARAM; + return -EINVAL; if (info->access_rights & (IRDMA_ACCESS_FLAGS_REMOTEREAD_ONLY | IRDMA_ACCESS_FLAGS_REMOTEWRITE_ONLY)) @@ -1126,12 +1143,12 @@ irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev, pble_obj_cnt = dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; if (info->chunk_size && info->first_pm_pbl_index >= pble_obj_cnt) - return IRDMA_ERR_INVALID_PBLE_INDEX; + return -EINVAL; cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; fbo = info->va & (info->page_size - 1); set_64bit_val(wqe, 0, @@ -1184,10 +1201,9 @@ irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_dealloc_stag(struct irdma_sc_dev *dev, - struct irdma_dealloc_stag_info *info, u64 scratch, - bool post_sq) +static int irdma_sc_dealloc_stag(struct irdma_sc_dev *dev, + struct irdma_dealloc_stag_info *info, + u64 scratch, bool post_sq) { u64 hdr; __le64 *wqe; @@ -1196,7 +1212,7 @@ irdma_sc_dealloc_stag(struct irdma_sc_dev *dev, cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 8, FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID)); @@ -1225,9 +1241,9 @@ irdma_sc_dealloc_stag(struct irdma_sc_dev *dev, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_mw_alloc(struct irdma_sc_dev *dev, struct irdma_mw_alloc_info *info, - u64 scratch, bool post_sq) +static int irdma_sc_mw_alloc(struct irdma_sc_dev *dev, + struct irdma_mw_alloc_info *info, u64 scratch, + bool post_sq) { u64 hdr; struct irdma_sc_cqp *cqp; @@ -1236,7 +1252,7 @@ irdma_sc_mw_alloc(struct irdma_sc_dev *dev, struct irdma_mw_alloc_info *info, cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 8, FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID)); @@ -1266,9 +1282,9 @@ irdma_sc_mw_alloc(struct irdma_sc_dev *dev, struct irdma_mw_alloc_info *info, * @info: fast mr info * @post_sq: flag for cqp db to ring */ -enum irdma_status_code -irdma_sc_mr_fast_register(struct irdma_sc_qp *qp, - struct irdma_fast_reg_stag_info *info, bool post_sq) +int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp, + struct irdma_fast_reg_stag_info *info, + bool post_sq) { u64 temp, hdr; __le64 *wqe; @@ -1290,7 +1306,7 @@ irdma_sc_mr_fast_register(struct irdma_sc_qp *qp, wqe = irdma_qp_get_next_send_wqe(&qp->qp_uk, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA, 0, &sq_info); if (!wqe) - return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; + return -ENOMEM; irdma_clr_wqes(&qp->qp_uk, wqe_idx); @@ -1819,8 +1835,7 @@ void irdma_terminate_received(struct irdma_sc_qp *qp, } } -static enum irdma_status_code irdma_null_ws_add(struct irdma_sc_vsi *vsi, - u8 user_pri) +static int irdma_null_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri) { return 0; } @@ -1843,7 +1858,6 @@ static void irdma_null_ws_reset(struct irdma_sc_vsi *vsi) void irdma_sc_vsi_init(struct irdma_sc_vsi *vsi, struct irdma_vsi_init_info *info) { - struct irdma_l2params *l2p; int i; vsi->dev = info->dev; @@ -1856,18 +1870,8 @@ void irdma_sc_vsi_init(struct irdma_sc_vsi *vsi, if (vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) vsi->fcn_id = info->dev->hmc_fn_id; - l2p = info->params; - vsi->qos_rel_bw = l2p->vsi_rel_bw; - vsi->qos_prio_type = l2p->vsi_prio_type; + irdma_set_qos_info(vsi, info->params); for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) { - if (vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) - vsi->qos[i].qs_handle = l2p->qs_handle_list[i]; - vsi->qos[i].traffic_class = info->params->up2tc[i]; - vsi->qos[i].rel_bw = - l2p->tc_info[vsi->qos[i].traffic_class].rel_bw; - vsi->qos[i].prio_type = - l2p->tc_info[vsi->qos[i].traffic_class].prio_type; - vsi->qos[i].valid = false; mutex_init(&vsi->qos[i].qos_mutex); INIT_LIST_HEAD(&vsi->qos[i].qplist); } @@ -1916,8 +1920,8 @@ static u8 irdma_get_fcn_id(struct irdma_sc_vsi *vsi) * @vsi: pointer to the vsi structure * @info: The info structure used for initialization */ -enum irdma_status_code irdma_vsi_stats_init(struct irdma_sc_vsi *vsi, - struct irdma_vsi_stats_info *info) +int irdma_vsi_stats_init(struct irdma_sc_vsi *vsi, + struct irdma_vsi_stats_info *info) { u8 fcn_id = info->fcn_id; struct irdma_dma_mem *stats_buff_mem; @@ -1932,7 +1936,7 @@ enum irdma_status_code irdma_vsi_stats_init(struct irdma_sc_vsi *vsi, &stats_buff_mem->pa, GFP_KERNEL); if (!stats_buff_mem->va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; vsi->pestat->gather_info.gather_stats_va = stats_buff_mem->va; vsi->pestat->gather_info.last_gather_stats_va = @@ -1959,7 +1963,7 @@ stats_error: stats_buff_mem->va, stats_buff_mem->pa); stats_buff_mem->va = NULL; - return IRDMA_ERR_CQP_COMPL_ERROR; + return -EIO; } /** @@ -2021,19 +2025,19 @@ u8 irdma_get_encoded_wqe_size(u32 wqsize, enum irdma_queue_type queue_type) * @info: gather stats info structure * @scratch: u64 saved to be used during cqp completion */ -static enum irdma_status_code -irdma_sc_gather_stats(struct irdma_sc_cqp *cqp, - struct irdma_stats_gather_info *info, u64 scratch) +static int irdma_sc_gather_stats(struct irdma_sc_cqp *cqp, + struct irdma_stats_gather_info *info, + u64 scratch) { __le64 *wqe; u64 temp; if (info->stats_buff_mem.size < IRDMA_GATHER_STATS_BUF_SIZE) - return IRDMA_ERR_BUF_TOO_SHORT; + return -ENOMEM; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 40, FIELD_PREP(IRDMA_CQPSQ_STATS_HMC_FCN_INDEX, info->hmc_fcn_index)); @@ -2068,17 +2072,16 @@ irdma_sc_gather_stats(struct irdma_sc_cqp *cqp, * @alloc: alloc vs. delete flag * @scratch: u64 saved to be used during cqp completion */ -static enum irdma_status_code -irdma_sc_manage_stats_inst(struct irdma_sc_cqp *cqp, - struct irdma_stats_inst_info *info, bool alloc, - u64 scratch) +static int irdma_sc_manage_stats_inst(struct irdma_sc_cqp *cqp, + struct irdma_stats_inst_info *info, + bool alloc, u64 scratch) { __le64 *wqe; u64 temp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 40, FIELD_PREP(IRDMA_CQPSQ_STATS_HMC_FCN_INDEX, info->hmc_fn_id)); @@ -2106,9 +2109,8 @@ irdma_sc_manage_stats_inst(struct irdma_sc_cqp *cqp, * @info: User priority map info * @scratch: u64 saved to be used during cqp completion */ -static enum irdma_status_code irdma_sc_set_up_map(struct irdma_sc_cqp *cqp, - struct irdma_up_info *info, - u64 scratch) +static int irdma_sc_set_up_map(struct irdma_sc_cqp *cqp, + struct irdma_up_info *info, u64 scratch) { __le64 *wqe; u64 temp = 0; @@ -2116,7 +2118,7 @@ static enum irdma_status_code irdma_sc_set_up_map(struct irdma_sc_cqp *cqp, wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) temp |= (u64)info->map[i] << (i * 8); @@ -2149,17 +2151,16 @@ static enum irdma_status_code irdma_sc_set_up_map(struct irdma_sc_cqp *cqp, * @node_op: 0 for add 1 for modify, 2 for delete * @scratch: u64 saved to be used during cqp completion */ -static enum irdma_status_code -irdma_sc_manage_ws_node(struct irdma_sc_cqp *cqp, - struct irdma_ws_node_info *info, - enum irdma_ws_node_op node_op, u64 scratch) +static int irdma_sc_manage_ws_node(struct irdma_sc_cqp *cqp, + struct irdma_ws_node_info *info, + enum irdma_ws_node_op node_op, u64 scratch) { __le64 *wqe; u64 temp = 0; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 32, FIELD_PREP(IRDMA_CQPSQ_WS_VSI, info->vsi) | @@ -2192,9 +2193,9 @@ irdma_sc_manage_ws_node(struct irdma_sc_cqp *cqp, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -enum irdma_status_code irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp, - struct irdma_qp_flush_info *info, - u64 scratch, bool post_sq) +int irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp, + struct irdma_qp_flush_info *info, u64 scratch, + bool post_sq) { u64 temp = 0; __le64 *wqe; @@ -2213,13 +2214,13 @@ enum irdma_status_code irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp, ibdev_dbg(to_ibdev(qp->dev), "CQP: Additional flush request ignored for qp %x\n", qp->qp_uk.qp_id); - return IRDMA_ERR_FLUSHED_Q; + return -EALREADY; } cqp = qp->pd->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; if (info->userflushcode) { if (flush_rq) @@ -2266,9 +2267,9 @@ enum irdma_status_code irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code irdma_sc_gen_ae(struct irdma_sc_qp *qp, - struct irdma_gen_ae_info *info, - u64 scratch, bool post_sq) +static int irdma_sc_gen_ae(struct irdma_sc_qp *qp, + struct irdma_gen_ae_info *info, u64 scratch, + bool post_sq) { u64 temp; __le64 *wqe; @@ -2278,7 +2279,7 @@ static enum irdma_status_code irdma_sc_gen_ae(struct irdma_sc_qp *qp, cqp = qp->pd->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; temp = info->ae_code | FIELD_PREP(IRDMA_CQPSQ_FWQE_AESOURCE, info->ae_src); @@ -2306,10 +2307,9 @@ static enum irdma_status_code irdma_sc_gen_ae(struct irdma_sc_qp *qp, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_qp_upload_context(struct irdma_sc_dev *dev, - struct irdma_upload_context_info *info, u64 scratch, - bool post_sq) +static int irdma_sc_qp_upload_context(struct irdma_sc_dev *dev, + struct irdma_upload_context_info *info, + u64 scratch, bool post_sq) { __le64 *wqe; struct irdma_sc_cqp *cqp; @@ -2318,7 +2318,7 @@ irdma_sc_qp_upload_context(struct irdma_sc_dev *dev, cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, info->buf_pa); @@ -2347,21 +2347,20 @@ irdma_sc_qp_upload_context(struct irdma_sc_dev *dev, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_manage_push_page(struct irdma_sc_cqp *cqp, - struct irdma_cqp_manage_push_page_info *info, - u64 scratch, bool post_sq) +static int irdma_sc_manage_push_page(struct irdma_sc_cqp *cqp, + struct irdma_cqp_manage_push_page_info *info, + u64 scratch, bool post_sq) { __le64 *wqe; u64 hdr; if (info->free_page && info->push_idx >= cqp->dev->hw_attrs.max_hw_device_pages) - return IRDMA_ERR_INVALID_PUSH_PAGE_INDEX; + return -EINVAL; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, info->qs_handle); hdr = FIELD_PREP(IRDMA_CQPSQ_MPP_PPIDX, info->push_idx) | @@ -2387,16 +2386,15 @@ irdma_sc_manage_push_page(struct irdma_sc_cqp *cqp, * @qp: sc qp struct * @scratch: u64 saved to be used during cqp completion */ -static enum irdma_status_code irdma_sc_suspend_qp(struct irdma_sc_cqp *cqp, - struct irdma_sc_qp *qp, - u64 scratch) +static int irdma_sc_suspend_qp(struct irdma_sc_cqp *cqp, struct irdma_sc_qp *qp, + u64 scratch) { u64 hdr; __le64 *wqe; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; hdr = FIELD_PREP(IRDMA_CQPSQ_SUSPENDQP_QPID, qp->qp_uk.qp_id) | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_SUSPEND_QP) | @@ -2418,16 +2416,15 @@ static enum irdma_status_code irdma_sc_suspend_qp(struct irdma_sc_cqp *cqp, * @qp: sc qp struct * @scratch: u64 saved to be used during cqp completion */ -static enum irdma_status_code irdma_sc_resume_qp(struct irdma_sc_cqp *cqp, - struct irdma_sc_qp *qp, - u64 scratch) +static int irdma_sc_resume_qp(struct irdma_sc_cqp *cqp, struct irdma_sc_qp *qp, + u64 scratch) { u64 hdr; __le64 *wqe; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, FIELD_PREP(IRDMA_CQPSQ_RESUMEQP_QSHANDLE, qp->qs_handle)); @@ -2460,14 +2457,13 @@ static inline void irdma_sc_cq_ack(struct irdma_sc_cq *cq) * @cq: cq struct * @info: cq initialization info */ -enum irdma_status_code irdma_sc_cq_init(struct irdma_sc_cq *cq, - struct irdma_cq_init_info *info) +int irdma_sc_cq_init(struct irdma_sc_cq *cq, struct irdma_cq_init_info *info) { u32 pble_obj_cnt; pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; if (info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt) - return IRDMA_ERR_INVALID_PBLE_INDEX; + return -EINVAL; cq->cq_pa = info->cq_base_pa; cq->dev = info->dev; @@ -2498,23 +2494,21 @@ enum irdma_status_code irdma_sc_cq_init(struct irdma_sc_cq *cq, * @check_overflow: flag for overflow check * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code irdma_sc_cq_create(struct irdma_sc_cq *cq, - u64 scratch, - bool check_overflow, - bool post_sq) +static int irdma_sc_cq_create(struct irdma_sc_cq *cq, u64 scratch, + bool check_overflow, bool post_sq) { __le64 *wqe; struct irdma_sc_cqp *cqp; u64 hdr; struct irdma_sc_ceq *ceq; - enum irdma_status_code ret_code = 0; + int ret_code = 0; cqp = cq->dev->cqp; - if (cq->cq_uk.cq_id > (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].max_cnt - 1)) - return IRDMA_ERR_INVALID_CQ_ID; + if (cq->cq_uk.cq_id >= cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].max_cnt) + return -EINVAL; - if (cq->ceq_id > (cq->dev->hmc_fpm_misc.max_ceqs - 1)) - return IRDMA_ERR_INVALID_CEQ_ID; + if (cq->ceq_id >= cq->dev->hmc_fpm_misc.max_ceqs) + return -EINVAL; ceq = cq->dev->ceq[cq->ceq_id]; if (ceq && ceq->reg_cq) @@ -2527,7 +2521,7 @@ static enum irdma_status_code irdma_sc_cq_create(struct irdma_sc_cq *cq, if (!wqe) { if (ceq && ceq->reg_cq) irdma_sc_remove_cq_ctx(ceq, cq); - return IRDMA_ERR_RING_FULL; + return -ENOMEM; } set_64bit_val(wqe, 0, cq->cq_uk.cq_size); @@ -2573,8 +2567,7 @@ static enum irdma_status_code irdma_sc_cq_create(struct irdma_sc_cq *cq, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -enum irdma_status_code irdma_sc_cq_destroy(struct irdma_sc_cq *cq, u64 scratch, - bool post_sq) +int irdma_sc_cq_destroy(struct irdma_sc_cq *cq, u64 scratch, bool post_sq) { struct irdma_sc_cqp *cqp; __le64 *wqe; @@ -2584,7 +2577,7 @@ enum irdma_status_code irdma_sc_cq_destroy(struct irdma_sc_cq *cq, u64 scratch, cqp = cq->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; ceq = cq->dev->ceq[cq->ceq_id]; if (ceq && ceq->reg_cq) @@ -2640,9 +2633,9 @@ void irdma_sc_cq_resize(struct irdma_sc_cq *cq, struct irdma_modify_cq_info *inf * @scratch: u64 saved to be used during cqp completion * @post_sq: flag to post to sq */ -static enum irdma_status_code -irdma_sc_cq_modify(struct irdma_sc_cq *cq, struct irdma_modify_cq_info *info, - u64 scratch, bool post_sq) +static int irdma_sc_cq_modify(struct irdma_sc_cq *cq, + struct irdma_modify_cq_info *info, u64 scratch, + bool post_sq) { struct irdma_sc_cqp *cqp; __le64 *wqe; @@ -2652,12 +2645,12 @@ irdma_sc_cq_modify(struct irdma_sc_cq *cq, struct irdma_modify_cq_info *info, pble_obj_cnt = cq->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; if (info->cq_resize && info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt) - return IRDMA_ERR_INVALID_PBLE_INDEX; + return -EINVAL; cqp = cq->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 0, info->cq_size); set_64bit_val(wqe, 8, (uintptr_t)cq >> 1); @@ -2731,8 +2724,8 @@ static inline void irdma_get_cqp_reg_info(struct irdma_sc_cqp *cqp, u32 *val, * @tail: wqtail register value * @count: how many times to try for completion */ -static enum irdma_status_code irdma_cqp_poll_registers(struct irdma_sc_cqp *cqp, - u32 tail, u32 count) +static int irdma_cqp_poll_registers(struct irdma_sc_cqp *cqp, u32 tail, + u32 count) { u32 i = 0; u32 newtail, error, val; @@ -2744,7 +2737,7 @@ static enum irdma_status_code irdma_cqp_poll_registers(struct irdma_sc_cqp *cqp, ibdev_dbg(to_ibdev(cqp->dev), "CQP: CQPERRCODES error_code[x%08X]\n", error); - return IRDMA_ERR_CQP_COMPL_ERROR; + return -EIO; } if (newtail != tail) { /* SUCCESS */ @@ -2755,7 +2748,7 @@ static enum irdma_status_code irdma_cqp_poll_registers(struct irdma_sc_cqp *cqp, udelay(cqp->dev->hw_attrs.max_sleep_count); } - return IRDMA_ERR_TIMEOUT; + return -ETIMEDOUT; } /** @@ -2910,10 +2903,9 @@ static u64 irdma_sc_decode_fpm_query(__le64 *buf, u32 buf_idx, * parses fpm query buffer and copy max_cnt and * size value of hmc objects in hmc_info */ -static enum irdma_status_code -irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf, - struct irdma_hmc_info *hmc_info, - struct irdma_hmc_fpm_misc *hmc_fpm_misc) +static int irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf, + struct irdma_hmc_info *hmc_info, + struct irdma_hmc_fpm_misc *hmc_fpm_misc) { struct irdma_hmc_obj_info *obj_info; u64 temp; @@ -2952,7 +2944,7 @@ irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf, obj_info[IRDMA_HMC_IW_XFFL].size = 4; hmc_fpm_misc->xf_block_size = FIELD_GET(IRDMA_QUERY_FPM_XFBLOCKSIZE, temp); if (!hmc_fpm_misc->xf_block_size) - return IRDMA_ERR_INVALID_SIZE; + return -EINVAL; irdma_sc_decode_fpm_query(buf, 72, obj_info, IRDMA_HMC_IW_Q1); get_64bit_val(buf, 80, &temp); @@ -2961,7 +2953,7 @@ irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf, hmc_fpm_misc->q1_block_size = FIELD_GET(IRDMA_QUERY_FPM_Q1BLOCKSIZE, temp); if (!hmc_fpm_misc->q1_block_size) - return IRDMA_ERR_INVALID_SIZE; + return -EINVAL; irdma_sc_decode_fpm_query(buf, 88, obj_info, IRDMA_HMC_IW_TIMER); @@ -2985,7 +2977,7 @@ irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf, hmc_fpm_misc->rrf_block_size = FIELD_GET(IRDMA_QUERY_FPM_RRFBLOCKSIZE, temp); if (!hmc_fpm_misc->rrf_block_size && obj_info[IRDMA_HMC_IW_RRFFL].max_cnt) - return IRDMA_ERR_INVALID_SIZE; + return -EINVAL; irdma_sc_decode_fpm_query(buf, 144, obj_info, IRDMA_HMC_IW_HDR); irdma_sc_decode_fpm_query(buf, 152, obj_info, IRDMA_HMC_IW_MD); @@ -2997,7 +2989,7 @@ irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf, hmc_fpm_misc->ooiscf_block_size = FIELD_GET(IRDMA_QUERY_FPM_OOISCFBLOCKSIZE, temp); if (!hmc_fpm_misc->ooiscf_block_size && obj_info[IRDMA_HMC_IW_OOISCFFL].max_cnt) - return IRDMA_ERR_INVALID_SIZE; + return -EINVAL; return 0; } @@ -3025,8 +3017,7 @@ static u32 irdma_sc_find_reg_cq(struct irdma_sc_ceq *ceq, * @ceq: ceq sc structure * @cq: cq sc structure */ -enum irdma_status_code irdma_sc_add_cq_ctx(struct irdma_sc_ceq *ceq, - struct irdma_sc_cq *cq) +int irdma_sc_add_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq) { unsigned long flags; @@ -3034,7 +3025,7 @@ enum irdma_status_code irdma_sc_add_cq_ctx(struct irdma_sc_ceq *ceq, if (ceq->reg_cq_size == ceq->elem_cnt) { spin_unlock_irqrestore(&ceq->req_cq_lock, flags); - return IRDMA_ERR_REG_CQ_FULL; + return -ENOMEM; } ceq->reg_cq[ceq->reg_cq_size++] = cq; @@ -3075,15 +3066,15 @@ exit: * * Initializes the object and context buffers for a control Queue Pair. */ -enum irdma_status_code irdma_sc_cqp_init(struct irdma_sc_cqp *cqp, - struct irdma_cqp_init_info *info) +int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp, + struct irdma_cqp_init_info *info) { u8 hw_sq_size; if (info->sq_size > IRDMA_CQP_SW_SQSIZE_2048 || info->sq_size < IRDMA_CQP_SW_SQSIZE_4 || ((info->sq_size & (info->sq_size - 1)))) - return IRDMA_ERR_INVALID_SIZE; + return -EINVAL; hw_sq_size = irdma_get_encoded_wqe_size(info->sq_size, IRDMA_QUEUE_TYPE_CQP); @@ -3133,13 +3124,12 @@ enum irdma_status_code irdma_sc_cqp_init(struct irdma_sc_cqp *cqp, * @maj_err: If error, major err number * @min_err: If error, minor err number */ -enum irdma_status_code irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, - u16 *min_err) +int irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, u16 *min_err) { u64 temp; u8 hw_rev; u32 cnt = 0, p1, p2, val = 0, err_code; - enum irdma_status_code ret_code; + int ret_code; hw_rev = cqp->dev->hw_attrs.uk_attrs.hw_rev; cqp->sdbuf.size = ALIGN(IRDMA_UPDATE_SD_BUFF_SIZE * cqp->sq_size, @@ -3148,7 +3138,7 @@ enum irdma_status_code irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_er cqp->sdbuf.size, &cqp->sdbuf.pa, GFP_KERNEL); if (!cqp->sdbuf.va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; spin_lock_init(&cqp->dev->cqp_lock); @@ -3203,7 +3193,7 @@ enum irdma_status_code irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_er do { if (cnt++ > cqp->dev->hw_attrs.max_done_count) { - ret_code = IRDMA_ERR_TIMEOUT; + ret_code = -ETIMEDOUT; goto err; } udelay(cqp->dev->hw_attrs.max_sleep_count); @@ -3211,7 +3201,7 @@ enum irdma_status_code irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_er } while (!val); if (FLD_RS_32(cqp->dev, val, IRDMA_CCQPSTATUS_CCQP_ERR)) { - ret_code = IRDMA_ERR_DEVICE_NOT_SUPPORTED; + ret_code = -EOPNOTSUPP; goto err; } @@ -3252,7 +3242,7 @@ __le64 *irdma_sc_cqp_get_next_send_wqe_idx(struct irdma_sc_cqp *cqp, u64 scratch u32 *wqe_idx) { __le64 *wqe = NULL; - enum irdma_status_code ret_code; + int ret_code; if (IRDMA_RING_FULL_ERR(cqp->sq_ring)) { ibdev_dbg(to_ibdev(cqp->dev), @@ -3279,16 +3269,16 @@ __le64 *irdma_sc_cqp_get_next_send_wqe_idx(struct irdma_sc_cqp *cqp, u64 scratch * irdma_sc_cqp_destroy - destroy cqp during close * @cqp: struct for cqp hw */ -enum irdma_status_code irdma_sc_cqp_destroy(struct irdma_sc_cqp *cqp) +int irdma_sc_cqp_destroy(struct irdma_sc_cqp *cqp) { u32 cnt = 0, val; - enum irdma_status_code ret_code = 0; + int ret_code = 0; writel(0, cqp->dev->hw_regs[IRDMA_CCQPHIGH]); writel(0, cqp->dev->hw_regs[IRDMA_CCQPLOW]); do { if (cnt++ > cqp->dev->hw_attrs.max_done_count) { - ret_code = IRDMA_ERR_TIMEOUT; + ret_code = -ETIMEDOUT; break; } udelay(cqp->dev->hw_attrs.max_sleep_count); @@ -3333,8 +3323,8 @@ void irdma_sc_ccq_arm(struct irdma_sc_cq *ccq) * @ccq: ccq sc struct * @info: completion q entry to return */ -enum irdma_status_code irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq, - struct irdma_ccq_cqe_info *info) +int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq, + struct irdma_ccq_cqe_info *info) { u64 qp_ctx, temp, temp1; __le64 *cqe; @@ -3342,7 +3332,7 @@ enum irdma_status_code irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq, u32 wqe_idx; u32 error; u8 polarity; - enum irdma_status_code ret_code = 0; + int ret_code = 0; if (ccq->cq_uk.avoid_mem_cflct) cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(&ccq->cq_uk); @@ -3352,7 +3342,7 @@ enum irdma_status_code irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq, get_64bit_val(cqe, 24, &temp); polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, temp); if (polarity != ccq->cq_uk.polarity) - return IRDMA_ERR_Q_EMPTY; + return -ENOENT; get_64bit_val(cqe, 8, &qp_ctx); cqp = (struct irdma_sc_cqp *)(unsigned long)qp_ctx; @@ -3399,25 +3389,25 @@ enum irdma_status_code irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq, * @op_code: cqp opcode for completion * @compl_info: completion q entry to return */ -enum irdma_status_code irdma_sc_poll_for_cqp_op_done(struct irdma_sc_cqp *cqp, u8 op_code, - struct irdma_ccq_cqe_info *compl_info) +int irdma_sc_poll_for_cqp_op_done(struct irdma_sc_cqp *cqp, u8 op_code, + struct irdma_ccq_cqe_info *compl_info) { struct irdma_ccq_cqe_info info = {}; struct irdma_sc_cq *ccq; - enum irdma_status_code ret_code = 0; + int ret_code = 0; u32 cnt = 0; ccq = cqp->dev->ccq; while (1) { if (cnt++ > 100 * cqp->dev->hw_attrs.max_done_count) - return IRDMA_ERR_TIMEOUT; + return -ETIMEDOUT; if (irdma_sc_ccq_get_cqe_info(ccq, &info)) { udelay(cqp->dev->hw_attrs.max_sleep_count); continue; } if (info.error && info.op_code != IRDMA_CQP_OP_QUERY_STAG) { - ret_code = IRDMA_ERR_CQP_COMPL_ERROR; + ret_code = -EIO; break; } /* make sure op code matches*/ @@ -3441,17 +3431,16 @@ enum irdma_status_code irdma_sc_poll_for_cqp_op_done(struct irdma_sc_cqp *cqp, u * @info: info for the manage function table operation * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_manage_hmc_pm_func_table(struct irdma_sc_cqp *cqp, - struct irdma_hmc_fcn_info *info, - u64 scratch, bool post_sq) +static int irdma_sc_manage_hmc_pm_func_table(struct irdma_sc_cqp *cqp, + struct irdma_hmc_fcn_info *info, + u64 scratch, bool post_sq) { __le64 *wqe; u64 hdr; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 0, 0); set_64bit_val(wqe, 8, 0); @@ -3484,8 +3473,7 @@ irdma_sc_manage_hmc_pm_func_table(struct irdma_sc_cqp *cqp, * for fpm commit * @cqp: struct for cqp hw */ -static enum irdma_status_code -irdma_sc_commit_fpm_val_done(struct irdma_sc_cqp *cqp) +static int irdma_sc_commit_fpm_val_done(struct irdma_sc_cqp *cqp) { return irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_COMMIT_FPM_VAL, NULL); @@ -3500,19 +3488,19 @@ irdma_sc_commit_fpm_val_done(struct irdma_sc_cqp *cqp) * @post_sq: flag for cqp db to ring * @wait_type: poll ccq or cqp registers for cqp completion */ -static enum irdma_status_code -irdma_sc_commit_fpm_val(struct irdma_sc_cqp *cqp, u64 scratch, u8 hmc_fn_id, - struct irdma_dma_mem *commit_fpm_mem, bool post_sq, - u8 wait_type) +static int irdma_sc_commit_fpm_val(struct irdma_sc_cqp *cqp, u64 scratch, + u8 hmc_fn_id, + struct irdma_dma_mem *commit_fpm_mem, + bool post_sq, u8 wait_type) { __le64 *wqe; u64 hdr; u32 tail, val, error; - enum irdma_status_code ret_code = 0; + int ret_code = 0; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, hmc_fn_id); set_64bit_val(wqe, 32, commit_fpm_mem->pa); @@ -3546,8 +3534,7 @@ irdma_sc_commit_fpm_val(struct irdma_sc_cqp *cqp, u64 scratch, u8 hmc_fn_id, * query fpm * @cqp: struct for cqp hw */ -static enum irdma_status_code -irdma_sc_query_fpm_val_done(struct irdma_sc_cqp *cqp) +static int irdma_sc_query_fpm_val_done(struct irdma_sc_cqp *cqp) { return irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_QUERY_FPM_VAL, NULL); @@ -3562,19 +3549,19 @@ irdma_sc_query_fpm_val_done(struct irdma_sc_cqp *cqp) * @post_sq: flag for cqp db to ring * @wait_type: poll ccq or cqp registers for cqp completion */ -static enum irdma_status_code -irdma_sc_query_fpm_val(struct irdma_sc_cqp *cqp, u64 scratch, u8 hmc_fn_id, - struct irdma_dma_mem *query_fpm_mem, bool post_sq, - u8 wait_type) +static int irdma_sc_query_fpm_val(struct irdma_sc_cqp *cqp, u64 scratch, + u8 hmc_fn_id, + struct irdma_dma_mem *query_fpm_mem, + bool post_sq, u8 wait_type) { __le64 *wqe; u64 hdr; u32 tail, val, error; - enum irdma_status_code ret_code = 0; + int ret_code = 0; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, hmc_fn_id); set_64bit_val(wqe, 32, query_fpm_mem->pa); @@ -3606,21 +3593,21 @@ irdma_sc_query_fpm_val(struct irdma_sc_cqp *cqp, u64 scratch, u8 hmc_fn_id, * @ceq: ceq sc structure * @info: ceq initialization info */ -enum irdma_status_code irdma_sc_ceq_init(struct irdma_sc_ceq *ceq, - struct irdma_ceq_init_info *info) +int irdma_sc_ceq_init(struct irdma_sc_ceq *ceq, + struct irdma_ceq_init_info *info) { u32 pble_obj_cnt; if (info->elem_cnt < info->dev->hw_attrs.min_hw_ceq_size || info->elem_cnt > info->dev->hw_attrs.max_hw_ceq_size) - return IRDMA_ERR_INVALID_SIZE; + return -EINVAL; - if (info->ceq_id > (info->dev->hmc_fpm_misc.max_ceqs - 1)) - return IRDMA_ERR_INVALID_CEQ_ID; + if (info->ceq_id >= info->dev->hmc_fpm_misc.max_ceqs) + return -EINVAL; pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; if (info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt) - return IRDMA_ERR_INVALID_PBLE_INDEX; + return -EINVAL; ceq->size = sizeof(*ceq); ceq->ceqe_base = (struct irdma_ceqe *)info->ceqe_base; @@ -3653,8 +3640,8 @@ enum irdma_status_code irdma_sc_ceq_init(struct irdma_sc_ceq *ceq, * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code irdma_sc_ceq_create(struct irdma_sc_ceq *ceq, u64 scratch, - bool post_sq) +static int irdma_sc_ceq_create(struct irdma_sc_ceq *ceq, u64 scratch, + bool post_sq) { struct irdma_sc_cqp *cqp; __le64 *wqe; @@ -3663,7 +3650,7 @@ static enum irdma_status_code irdma_sc_ceq_create(struct irdma_sc_ceq *ceq, u64 cqp = ceq->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, ceq->elem_cnt); set_64bit_val(wqe, 32, (ceq->virtual_map ? 0 : ceq->ceq_elem_pa)); @@ -3695,8 +3682,7 @@ static enum irdma_status_code irdma_sc_ceq_create(struct irdma_sc_ceq *ceq, u64 * irdma_sc_cceq_create_done - poll for control ceq wqe to complete * @ceq: ceq sc structure */ -static enum irdma_status_code -irdma_sc_cceq_create_done(struct irdma_sc_ceq *ceq) +static int irdma_sc_cceq_create_done(struct irdma_sc_ceq *ceq) { struct irdma_sc_cqp *cqp; @@ -3709,7 +3695,7 @@ irdma_sc_cceq_create_done(struct irdma_sc_ceq *ceq) * irdma_sc_cceq_destroy_done - poll for destroy cceq to complete * @ceq: ceq sc structure */ -enum irdma_status_code irdma_sc_cceq_destroy_done(struct irdma_sc_ceq *ceq) +int irdma_sc_cceq_destroy_done(struct irdma_sc_ceq *ceq) { struct irdma_sc_cqp *cqp; @@ -3728,9 +3714,9 @@ enum irdma_status_code irdma_sc_cceq_destroy_done(struct irdma_sc_ceq *ceq) * @ceq: ceq sc structure * @scratch: u64 saved to be used during cqp completion */ -enum irdma_status_code irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch) +int irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch) { - enum irdma_status_code ret_code; + int ret_code; struct irdma_sc_dev *dev = ceq->dev; dev->ccq->vsi = ceq->vsi; @@ -3753,8 +3739,7 @@ enum irdma_status_code irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratc * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -enum irdma_status_code irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch, - bool post_sq) +int irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch, bool post_sq) { struct irdma_sc_cqp *cqp; __le64 *wqe; @@ -3763,7 +3748,7 @@ enum irdma_status_code irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratc cqp = ceq->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, ceq->elem_cnt); set_64bit_val(wqe, 48, ceq->first_pm_pbl_idx); @@ -3882,19 +3867,19 @@ void irdma_sc_cleanup_ceqes(struct irdma_sc_cq *cq, struct irdma_sc_ceq *ceq) * @aeq: aeq structure ptr * @info: aeq initialization info */ -enum irdma_status_code irdma_sc_aeq_init(struct irdma_sc_aeq *aeq, - struct irdma_aeq_init_info *info) +int irdma_sc_aeq_init(struct irdma_sc_aeq *aeq, + struct irdma_aeq_init_info *info) { u32 pble_obj_cnt; if (info->elem_cnt < info->dev->hw_attrs.min_hw_aeq_size || info->elem_cnt > info->dev->hw_attrs.max_hw_aeq_size) - return IRDMA_ERR_INVALID_SIZE; + return -EINVAL; pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; if (info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt) - return IRDMA_ERR_INVALID_PBLE_INDEX; + return -EINVAL; aeq->size = sizeof(*aeq); aeq->polarity = 1; @@ -3919,8 +3904,8 @@ enum irdma_status_code irdma_sc_aeq_init(struct irdma_sc_aeq *aeq, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code irdma_sc_aeq_create(struct irdma_sc_aeq *aeq, - u64 scratch, bool post_sq) +static int irdma_sc_aeq_create(struct irdma_sc_aeq *aeq, u64 scratch, + bool post_sq) { __le64 *wqe; struct irdma_sc_cqp *cqp; @@ -3929,7 +3914,7 @@ static enum irdma_status_code irdma_sc_aeq_create(struct irdma_sc_aeq *aeq, cqp = aeq->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, aeq->elem_cnt); set_64bit_val(wqe, 32, (aeq->virtual_map ? 0 : aeq->aeq_elem_pa)); @@ -3958,8 +3943,8 @@ static enum irdma_status_code irdma_sc_aeq_create(struct irdma_sc_aeq *aeq, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code irdma_sc_aeq_destroy(struct irdma_sc_aeq *aeq, - u64 scratch, bool post_sq) +static int irdma_sc_aeq_destroy(struct irdma_sc_aeq *aeq, u64 scratch, + bool post_sq) { __le64 *wqe; struct irdma_sc_cqp *cqp; @@ -3972,7 +3957,7 @@ static enum irdma_status_code irdma_sc_aeq_destroy(struct irdma_sc_aeq *aeq, cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, aeq->elem_cnt); set_64bit_val(wqe, 48, aeq->first_pm_pbl_idx); hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_AEQ) | @@ -3995,8 +3980,8 @@ static enum irdma_status_code irdma_sc_aeq_destroy(struct irdma_sc_aeq *aeq, * @aeq: aeq structure ptr * @info: aeqe info to be returned */ -enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, - struct irdma_aeqe_info *info) +int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, + struct irdma_aeqe_info *info) { u64 temp, compl_ctx; __le64 *aeqe; @@ -4010,7 +3995,7 @@ enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, polarity = (u8)FIELD_GET(IRDMA_AEQE_VALID, temp); if (aeq->polarity != polarity) - return IRDMA_ERR_Q_EMPTY; + return -ENOENT; print_hex_dump_debug("WQE: AEQ_ENTRY WQE", DUMP_PREFIX_OFFSET, 16, 8, aeqe, 16, false); @@ -4155,22 +4140,21 @@ void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count) * @cq: sc's cq ctruct * @info: info for control cq initialization */ -enum irdma_status_code irdma_sc_ccq_init(struct irdma_sc_cq *cq, - struct irdma_ccq_init_info *info) +int irdma_sc_ccq_init(struct irdma_sc_cq *cq, struct irdma_ccq_init_info *info) { u32 pble_obj_cnt; if (info->num_elem < info->dev->hw_attrs.uk_attrs.min_hw_cq_size || info->num_elem > info->dev->hw_attrs.uk_attrs.max_hw_cq_size) - return IRDMA_ERR_INVALID_SIZE; + return -EINVAL; - if (info->ceq_id > (info->dev->hmc_fpm_misc.max_ceqs - 1)) - return IRDMA_ERR_INVALID_CEQ_ID; + if (info->ceq_id >= info->dev->hmc_fpm_misc.max_ceqs) + return -EINVAL; pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; if (info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt) - return IRDMA_ERR_INVALID_PBLE_INDEX; + return -EINVAL; cq->cq_pa = info->cq_pa; cq->cq_uk.cq_base = info->cq_base; @@ -4207,7 +4191,7 @@ enum irdma_status_code irdma_sc_ccq_init(struct irdma_sc_cq *cq, * irdma_sc_ccq_create_done - poll cqp for ccq create * @ccq: ccq sc struct */ -static inline enum irdma_status_code irdma_sc_ccq_create_done(struct irdma_sc_cq *ccq) +static inline int irdma_sc_ccq_create_done(struct irdma_sc_cq *ccq) { struct irdma_sc_cqp *cqp; @@ -4223,10 +4207,10 @@ static inline enum irdma_status_code irdma_sc_ccq_create_done(struct irdma_sc_cq * @check_overflow: overlow flag for ccq * @post_sq: flag for cqp db to ring */ -enum irdma_status_code irdma_sc_ccq_create(struct irdma_sc_cq *ccq, u64 scratch, - bool check_overflow, bool post_sq) +int irdma_sc_ccq_create(struct irdma_sc_cq *ccq, u64 scratch, + bool check_overflow, bool post_sq) { - enum irdma_status_code ret_code; + int ret_code; ret_code = irdma_sc_cq_create(ccq, scratch, check_overflow, post_sq); if (ret_code) @@ -4248,19 +4232,18 @@ enum irdma_status_code irdma_sc_ccq_create(struct irdma_sc_cq *ccq, u64 scratch, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -enum irdma_status_code irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch, - bool post_sq) +int irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch, bool post_sq) { struct irdma_sc_cqp *cqp; __le64 *wqe; u64 hdr; - enum irdma_status_code ret_code = 0; + int ret_code = 0; u32 tail, val, error; cqp = ccq->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 0, ccq->cq_uk.cq_size); set_64bit_val(wqe, 8, (uintptr_t)ccq >> 1); @@ -4299,13 +4282,12 @@ enum irdma_status_code irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch * @dev : ptr to irdma_dev struct * @hmc_fn_id: hmc function id */ -enum irdma_status_code irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev, - u8 hmc_fn_id) +int irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev, u8 hmc_fn_id) { struct irdma_hmc_info *hmc_info; struct irdma_hmc_fpm_misc *hmc_fpm_misc; struct irdma_dma_mem query_fpm_mem; - enum irdma_status_code ret_code = 0; + int ret_code = 0; u8 wait_type; hmc_info = dev->hmc_info; @@ -4336,14 +4318,13 @@ enum irdma_status_code irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev, * @dev : ptr to irdma_dev struct * @hmc_fn_id: hmc function id */ -static enum irdma_status_code irdma_sc_cfg_iw_fpm(struct irdma_sc_dev *dev, - u8 hmc_fn_id) +static int irdma_sc_cfg_iw_fpm(struct irdma_sc_dev *dev, u8 hmc_fn_id) { struct irdma_hmc_info *hmc_info; struct irdma_hmc_obj_info *obj_info; __le64 *buf; struct irdma_dma_mem commit_fpm_mem; - enum irdma_status_code ret_code = 0; + int ret_code = 0; u8 wait_type; hmc_info = dev->hmc_info; @@ -4406,9 +4387,8 @@ static enum irdma_status_code irdma_sc_cfg_iw_fpm(struct irdma_sc_dev *dev, * @info: sd info for wqe * @scratch: u64 saved to be used during cqp completion */ -static enum irdma_status_code -cqp_sds_wqe_fill(struct irdma_sc_cqp *cqp, struct irdma_update_sds_info *info, - u64 scratch) +static int cqp_sds_wqe_fill(struct irdma_sc_cqp *cqp, + struct irdma_update_sds_info *info, u64 scratch) { u64 data; u64 hdr; @@ -4420,7 +4400,7 @@ cqp_sds_wqe_fill(struct irdma_sc_cqp *cqp, struct irdma_update_sds_info *info, wqe = irdma_sc_cqp_get_next_send_wqe_idx(cqp, scratch, &wqe_idx); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; wqe_entries = (info->cnt > 3) ? 3 : info->cnt; mem_entries = info->cnt - wqe_entries; @@ -4486,12 +4466,11 @@ cqp_sds_wqe_fill(struct irdma_sc_cqp *cqp, struct irdma_update_sds_info *info, * @info: sd info for sd's * @scratch: u64 saved to be used during cqp completion */ -static enum irdma_status_code -irdma_update_pe_sds(struct irdma_sc_dev *dev, - struct irdma_update_sds_info *info, u64 scratch) +static int irdma_update_pe_sds(struct irdma_sc_dev *dev, + struct irdma_update_sds_info *info, u64 scratch) { struct irdma_sc_cqp *cqp = dev->cqp; - enum irdma_status_code ret_code; + int ret_code; ret_code = cqp_sds_wqe_fill(cqp, info, scratch); if (!ret_code) @@ -4505,13 +4484,12 @@ irdma_update_pe_sds(struct irdma_sc_dev *dev, * @dev: sc device struct * @info: sd info for sd's */ -enum irdma_status_code -irdma_update_sds_noccq(struct irdma_sc_dev *dev, - struct irdma_update_sds_info *info) +int irdma_update_sds_noccq(struct irdma_sc_dev *dev, + struct irdma_update_sds_info *info) { u32 error, val, tail; struct irdma_sc_cqp *cqp = dev->cqp; - enum irdma_status_code ret_code; + int ret_code; ret_code = cqp_sds_wqe_fill(cqp, info, 0); if (ret_code) @@ -4532,10 +4510,9 @@ irdma_update_sds_noccq(struct irdma_sc_dev *dev, * @post_sq: flag for cqp db to ring * @poll_registers: flag to poll register for cqp completion */ -enum irdma_status_code -irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch, - u8 hmc_fn_id, bool post_sq, - bool poll_registers) +int irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch, + u8 hmc_fn_id, bool post_sq, + bool poll_registers) { u64 hdr; __le64 *wqe; @@ -4543,7 +4520,7 @@ irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch, wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, FIELD_PREP(IRDMA_SHMC_PAGE_ALLOCATED_HMC_FN_ID, hmc_fn_id)); @@ -4618,8 +4595,7 @@ static u32 irdma_est_sd(struct irdma_sc_dev *dev, * irdma_sc_query_rdma_features_done - poll cqp for query features done * @cqp: struct for cqp hw */ -static enum irdma_status_code -irdma_sc_query_rdma_features_done(struct irdma_sc_cqp *cqp) +static int irdma_sc_query_rdma_features_done(struct irdma_sc_cqp *cqp) { return irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_QUERY_RDMA_FEATURES, @@ -4632,16 +4608,15 @@ irdma_sc_query_rdma_features_done(struct irdma_sc_cqp *cqp) * @buf: buffer to hold query info * @scratch: u64 saved to be used during cqp completion */ -static enum irdma_status_code -irdma_sc_query_rdma_features(struct irdma_sc_cqp *cqp, - struct irdma_dma_mem *buf, u64 scratch) +static int irdma_sc_query_rdma_features(struct irdma_sc_cqp *cqp, + struct irdma_dma_mem *buf, u64 scratch) { __le64 *wqe; u64 temp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; temp = buf->pa; set_64bit_val(wqe, 32, temp); @@ -4665,9 +4640,9 @@ irdma_sc_query_rdma_features(struct irdma_sc_cqp *cqp, * irdma_get_rdma_features - get RDMA features * @dev: sc device struct */ -enum irdma_status_code irdma_get_rdma_features(struct irdma_sc_dev *dev) +int irdma_get_rdma_features(struct irdma_sc_dev *dev) { - enum irdma_status_code ret_code; + int ret_code; struct irdma_dma_mem feat_buf; u64 temp; u16 byte_idx, feat_type, feat_cnt, feat_idx; @@ -4677,7 +4652,7 @@ enum irdma_status_code irdma_get_rdma_features(struct irdma_sc_dev *dev) feat_buf.va = dma_alloc_coherent(dev->hw->device, feat_buf.size, &feat_buf.pa, GFP_KERNEL); if (!feat_buf.va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; ret_code = irdma_sc_query_rdma_features(dev->cqp, &feat_buf, 0); if (!ret_code) @@ -4688,7 +4663,7 @@ enum irdma_status_code irdma_get_rdma_features(struct irdma_sc_dev *dev) get_64bit_val(feat_buf.va, 0, &temp); feat_cnt = (u16)FIELD_GET(IRDMA_FEATURE_CNT, temp); if (feat_cnt < 2) { - ret_code = IRDMA_ERR_INVALID_FEAT_CNT; + ret_code = -EINVAL; goto exit; } else if (feat_cnt > IRDMA_MAX_FEATURES) { ibdev_dbg(to_ibdev(dev), @@ -4702,7 +4677,7 @@ enum irdma_status_code irdma_get_rdma_features(struct irdma_sc_dev *dev) feat_buf.size, &feat_buf.pa, GFP_KERNEL); if (!feat_buf.va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; ret_code = irdma_sc_query_rdma_features(dev->cqp, &feat_buf, 0); if (!ret_code) @@ -4713,7 +4688,7 @@ enum irdma_status_code irdma_get_rdma_features(struct irdma_sc_dev *dev) get_64bit_val(feat_buf.va, 0, &temp); feat_cnt = (u16)FIELD_GET(IRDMA_FEATURE_CNT, temp); if (feat_cnt < 2) { - ret_code = IRDMA_ERR_INVALID_FEAT_CNT; + ret_code = -EINVAL; goto exit; } } @@ -4792,7 +4767,7 @@ static void cfg_fpm_value_gen_2(struct irdma_sc_dev *dev, * @dev: sc device struct * @qp_count: desired qp count */ -enum irdma_status_code irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) +int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) { struct irdma_virt_mem virt_mem; u32 i, mem_size; @@ -4803,7 +4778,7 @@ enum irdma_status_code irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) u32 loop_count = 0; struct irdma_hmc_info *hmc_info; struct irdma_hmc_fpm_misc *hmc_fpm_misc; - enum irdma_status_code ret_code = 0; + int ret_code = 0; hmc_info = dev->hmc_info; hmc_fpm_misc = &dev->hmc_fpm_misc; @@ -4897,10 +4872,12 @@ enum irdma_status_code irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) sd_diff = sd_needed - hmc_fpm_misc->max_sds; if (sd_diff > 128) { - if (qpwanted > 128 && sd_diff > 144) + if (!(loop_count % 2) && qpwanted > 128) { qpwanted /= 2; - mrwanted /= 2; - pblewanted /= 2; + } else { + mrwanted /= 2; + pblewanted /= 2; + } continue; } if (dev->cqp->hmc_profile != IRDMA_HMC_PROFILE_FAVOR_VF && @@ -4930,7 +4907,7 @@ enum irdma_status_code irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) ibdev_dbg(to_ibdev(dev), "HMC: cfg_fpm failed loop_cnt=%d, sd_needed=%d, max sd count %d\n", loop_count, sd_needed, hmc_info->sd_table.sd_cnt); - return IRDMA_ERR_CFG; + return -EINVAL; } if (loop_count > 1 && sd_needed < hmc_fpm_misc->max_sds) { @@ -4966,7 +4943,7 @@ enum irdma_status_code irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) if (!virt_mem.va) { ibdev_dbg(to_ibdev(dev), "HMC: failed to allocate memory for sd_entry buffer\n"); - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } hmc_info->sd_table.sd_entry = virt_mem.va; @@ -4978,10 +4955,10 @@ enum irdma_status_code irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) * @dev: rdma device * @pcmdinfo: cqp command info */ -static enum irdma_status_code irdma_exec_cqp_cmd(struct irdma_sc_dev *dev, - struct cqp_cmds_info *pcmdinfo) +static int irdma_exec_cqp_cmd(struct irdma_sc_dev *dev, + struct cqp_cmds_info *pcmdinfo) { - enum irdma_status_code status; + int status; struct irdma_dma_mem val_mem; bool alloc = false; @@ -5243,7 +5220,7 @@ static enum irdma_status_code irdma_exec_cqp_cmd(struct irdma_sc_dev *dev, pcmdinfo->in.u.mc_modify.scratch); break; default: - status = IRDMA_NOT_SUPPORTED; + status = -EOPNOTSUPP; break; } @@ -5255,10 +5232,10 @@ static enum irdma_status_code irdma_exec_cqp_cmd(struct irdma_sc_dev *dev, * @dev: sc device struct * @pcmdinfo: cqp command info */ -enum irdma_status_code irdma_process_cqp_cmd(struct irdma_sc_dev *dev, - struct cqp_cmds_info *pcmdinfo) +int irdma_process_cqp_cmd(struct irdma_sc_dev *dev, + struct cqp_cmds_info *pcmdinfo) { - enum irdma_status_code status = 0; + int status = 0; unsigned long flags; spin_lock_irqsave(&dev->cqp_lock, flags); @@ -5274,9 +5251,9 @@ enum irdma_status_code irdma_process_cqp_cmd(struct irdma_sc_dev *dev, * irdma_process_bh - called from tasklet for cqp list * @dev: sc device struct */ -enum irdma_status_code irdma_process_bh(struct irdma_sc_dev *dev) +int irdma_process_bh(struct irdma_sc_dev *dev) { - enum irdma_status_code status = 0; + int status = 0; struct cqp_cmds_info *pcmdinfo; unsigned long flags; @@ -5364,12 +5341,11 @@ static inline void irdma_sc_init_hw(struct irdma_sc_dev *dev) * @dev: Device pointer * @info: Device init info */ -enum irdma_status_code irdma_sc_dev_init(enum irdma_vers ver, - struct irdma_sc_dev *dev, - struct irdma_device_init_info *info) +int irdma_sc_dev_init(enum irdma_vers ver, struct irdma_sc_dev *dev, + struct irdma_device_init_info *info) { u32 val; - enum irdma_status_code ret_code = 0; + int ret_code = 0; u8 db_size; INIT_LIST_HEAD(&dev->cqp_cmd_head); /* for CQP command backlog */ @@ -5413,7 +5389,7 @@ enum irdma_status_code irdma_sc_dev_init(enum irdma_vers ver, irdma_sc_init_hw(dev); if (irdma_wait_pe_ready(dev)) - return IRDMA_ERR_TIMEOUT; + return -ETIMEDOUT; val = readl(dev->hw_regs[IRDMA_GLPCI_LBARCTRL]); db_size = (u8)FIELD_GET(IRDMA_GLPCI_LBARCTRL_PE_DB_SIZE, val); @@ -5421,7 +5397,7 @@ enum irdma_status_code irdma_sc_dev_init(enum irdma_vers ver, ibdev_dbg(to_ibdev(dev), "DEV: RDMA PE doorbell is not enabled in CSR val 0x%x db_size=%d\n", val, db_size); - return IRDMA_ERR_PE_DOORBELL_NOT_ENA; + return -ENODEV; } dev->db_addr = dev->hw->hw_addr + (uintptr_t)dev->hw_regs[IRDMA_DB_ADDR_OFFSET]; diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index cc3d9a365b35..c1906cab5c8a 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -314,6 +314,7 @@ enum irdma_cqp_op_type { #define IRDMA_AE_IB_REMOTE_ACCESS_ERROR 0x020d #define IRDMA_AE_IB_REMOTE_OP_ERROR 0x020e #define IRDMA_AE_WQE_LSMM_TOO_LONG 0x0220 +#define IRDMA_AE_INVALID_REQUEST 0x0223 #define IRDMA_AE_DDP_INVALID_MSN_GAP_IN_MSN 0x0301 #define IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER 0x0303 #define IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION 0x0304 @@ -964,7 +965,7 @@ enum irdma_cqp_op_type { (_ring).head = ((_ring).head + 1) % size; \ (_retcode) = 0; \ } else { \ - (_retcode) = IRDMA_ERR_RING_FULL; \ + (_retcode) = -ENOMEM; \ } \ } #define IRDMA_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \ @@ -975,7 +976,7 @@ enum irdma_cqp_op_type { (_ring).head = ((_ring).head + (_count)) % size; \ (_retcode) = 0; \ } else { \ - (_retcode) = IRDMA_ERR_RING_FULL; \ + (_retcode) = -ENOMEM; \ } \ } #define IRDMA_SQ_RING_MOVE_HEAD(_ring, _retcode) \ @@ -986,7 +987,7 @@ enum irdma_cqp_op_type { (_ring).head = ((_ring).head + 1) % size; \ (_retcode) = 0; \ } else { \ - (_retcode) = IRDMA_ERR_RING_FULL; \ + (_retcode) = -ENOMEM; \ } \ } #define IRDMA_SQ_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \ @@ -997,7 +998,7 @@ enum irdma_cqp_op_type { (_ring).head = ((_ring).head + (_count)) % size; \ (_retcode) = 0; \ } else { \ - (_retcode) = IRDMA_ERR_RING_FULL; \ + (_retcode) = -ENOMEM; \ } \ } #define IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(_ring, _count) \ diff --git a/drivers/infiniband/hw/irdma/hmc.c b/drivers/infiniband/hw/irdma/hmc.c index ecffcb93c05a..49307ce8c4da 100644 --- a/drivers/infiniband/hw/irdma/hmc.c +++ b/drivers/infiniband/hw/irdma/hmc.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB /* Copyright (c) 2015 - 2021 Intel Corporation */ #include "osdep.h" -#include "status.h" #include "hmc.h" #include "defs.h" #include "type.h" @@ -121,10 +120,8 @@ static inline void irdma_invalidate_pf_hmc_pd(struct irdma_sc_dev *dev, u32 sd_i * @type: paged or direct sd * @setsd: flag to set or clear sd */ -enum irdma_status_code irdma_hmc_sd_one(struct irdma_sc_dev *dev, u8 hmc_fn_id, - u64 pa, u32 sd_idx, - enum irdma_sd_entry_type type, - bool setsd) +int irdma_hmc_sd_one(struct irdma_sc_dev *dev, u8 hmc_fn_id, u64 pa, u32 sd_idx, + enum irdma_sd_entry_type type, bool setsd) { struct irdma_update_sds_info sdinfo; @@ -145,16 +142,15 @@ enum irdma_status_code irdma_hmc_sd_one(struct irdma_sc_dev *dev, u8 hmc_fn_id, * @sd_cnt: number of sd entries * @setsd: flag to set or clear sd */ -static enum irdma_status_code irdma_hmc_sd_grp(struct irdma_sc_dev *dev, - struct irdma_hmc_info *hmc_info, - u32 sd_index, u32 sd_cnt, - bool setsd) +static int irdma_hmc_sd_grp(struct irdma_sc_dev *dev, + struct irdma_hmc_info *hmc_info, u32 sd_index, + u32 sd_cnt, bool setsd) { struct irdma_hmc_sd_entry *sd_entry; struct irdma_update_sds_info sdinfo = {}; u64 pa; u32 i; - enum irdma_status_code ret_code = 0; + int ret_code = 0; sdinfo.hmc_fn_id = hmc_info->hmc_fn_id; for (i = sd_index; i < sd_index + sd_cnt; i++) { @@ -196,16 +192,15 @@ static enum irdma_status_code irdma_hmc_sd_grp(struct irdma_sc_dev *dev, * @dev: pointer to the device structure * @info: create obj info */ -static enum irdma_status_code -irdma_hmc_finish_add_sd_reg(struct irdma_sc_dev *dev, - struct irdma_hmc_create_obj_info *info) +static int irdma_hmc_finish_add_sd_reg(struct irdma_sc_dev *dev, + struct irdma_hmc_create_obj_info *info) { if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) - return IRDMA_ERR_INVALID_HMC_OBJ_INDEX; + return -EINVAL; if ((info->start_idx + info->count) > info->hmc_info->hmc_obj[info->rsrc_type].cnt) - return IRDMA_ERR_INVALID_HMC_OBJ_COUNT; + return -EINVAL; if (!info->add_sd_cnt) return 0; @@ -222,9 +217,8 @@ irdma_hmc_finish_add_sd_reg(struct irdma_sc_dev *dev, * This will allocate memory for PDs and backing pages and populate * the sd and pd entries. */ -enum irdma_status_code -irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev, - struct irdma_hmc_create_obj_info *info) +int irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev, + struct irdma_hmc_create_obj_info *info) { struct irdma_hmc_sd_entry *sd_entry; u32 sd_idx, sd_lmt; @@ -232,10 +226,10 @@ irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev, u32 pd_idx1 = 0, pd_lmt1 = 0; u32 i, j; bool pd_error = false; - enum irdma_status_code ret_code = 0; + int ret_code = 0; if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) - return IRDMA_ERR_INVALID_HMC_OBJ_INDEX; + return -EINVAL; if ((info->start_idx + info->count) > info->hmc_info->hmc_obj[info->rsrc_type].cnt) { @@ -243,7 +237,7 @@ irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev, "HMC: error type %u, start = %u, req cnt %u, cnt = %u\n", info->rsrc_type, info->start_idx, info->count, info->hmc_info->hmc_obj[info->rsrc_type].cnt); - return IRDMA_ERR_INVALID_HMC_OBJ_COUNT; + return -EINVAL; } irdma_find_sd_index_limit(info->hmc_info, info->rsrc_type, @@ -251,7 +245,7 @@ irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev, &sd_lmt); if (sd_idx >= info->hmc_info->sd_table.sd_cnt || sd_lmt > info->hmc_info->sd_table.sd_cnt) { - return IRDMA_ERR_INVALID_SD_INDEX; + return -EINVAL; } irdma_find_pd_index_limit(info->hmc_info, info->rsrc_type, @@ -312,7 +306,7 @@ exit_sd_error: irdma_prep_remove_pd_page(info->hmc_info, (j - 1)); break; default: - ret_code = IRDMA_ERR_INVALID_SD_TYPE; + ret_code = -EINVAL; break; } j--; @@ -327,12 +321,12 @@ exit_sd_error: * @info: dele obj info * @reset: true if called before reset */ -static enum irdma_status_code -irdma_finish_del_sd_reg(struct irdma_sc_dev *dev, - struct irdma_hmc_del_obj_info *info, bool reset) +static int irdma_finish_del_sd_reg(struct irdma_sc_dev *dev, + struct irdma_hmc_del_obj_info *info, + bool reset) { struct irdma_hmc_sd_entry *sd_entry; - enum irdma_status_code ret_code = 0; + int ret_code = 0; u32 i, sd_idx; struct irdma_dma_mem *mem; @@ -373,22 +367,21 @@ irdma_finish_del_sd_reg(struct irdma_sc_dev *dev, * caller should deallocate memory allocated previously for * book-keeping information about PDs and backing storage. */ -enum irdma_status_code irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev, - struct irdma_hmc_del_obj_info *info, - bool reset) +int irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev, + struct irdma_hmc_del_obj_info *info, bool reset) { struct irdma_hmc_pd_table *pd_table; u32 sd_idx, sd_lmt; u32 pd_idx, pd_lmt, rel_pd_idx; u32 i, j; - enum irdma_status_code ret_code = 0; + int ret_code = 0; if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) { ibdev_dbg(to_ibdev(dev), "HMC: error start_idx[%04d] >= [type %04d].cnt[%04d]\n", info->start_idx, info->rsrc_type, info->hmc_info->hmc_obj[info->rsrc_type].cnt); - return IRDMA_ERR_INVALID_HMC_OBJ_INDEX; + return -EINVAL; } if ((info->start_idx + info->count) > @@ -397,7 +390,7 @@ enum irdma_status_code irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev, "HMC: error start_idx[%04d] + count %04d >= [type %04d].cnt[%04d]\n", info->start_idx, info->count, info->rsrc_type, info->hmc_info->hmc_obj[info->rsrc_type].cnt); - return IRDMA_ERR_INVALID_HMC_OBJ_COUNT; + return -EINVAL; } irdma_find_pd_index_limit(info->hmc_info, info->rsrc_type, @@ -433,7 +426,7 @@ enum irdma_status_code irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev, if (sd_idx >= info->hmc_info->sd_table.sd_cnt || sd_lmt > info->hmc_info->sd_table.sd_cnt) { ibdev_dbg(to_ibdev(dev), "HMC: invalid sd_idx\n"); - return IRDMA_ERR_INVALID_SD_INDEX; + return -EINVAL; } for (i = sd_idx; i < sd_lmt; i++) { @@ -477,11 +470,9 @@ enum irdma_status_code irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev, * @type: what type of segment descriptor we're manipulating * @direct_mode_sz: size to alloc in direct mode */ -enum irdma_status_code irdma_add_sd_table_entry(struct irdma_hw *hw, - struct irdma_hmc_info *hmc_info, - u32 sd_index, - enum irdma_sd_entry_type type, - u64 direct_mode_sz) +int irdma_add_sd_table_entry(struct irdma_hw *hw, + struct irdma_hmc_info *hmc_info, u32 sd_index, + enum irdma_sd_entry_type type, u64 direct_mode_sz) { struct irdma_hmc_sd_entry *sd_entry; struct irdma_dma_mem dma_mem; @@ -499,7 +490,7 @@ enum irdma_status_code irdma_add_sd_table_entry(struct irdma_hw *hw, dma_mem.va = dma_alloc_coherent(hw->device, dma_mem.size, &dma_mem.pa, GFP_KERNEL); if (!dma_mem.va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; if (type == IRDMA_SD_TYPE_PAGED) { struct irdma_virt_mem *vmem = &sd_entry->u.pd_table.pd_entry_virt_mem; @@ -510,7 +501,7 @@ enum irdma_status_code irdma_add_sd_table_entry(struct irdma_hw *hw, dma_free_coherent(hw->device, dma_mem.size, dma_mem.va, dma_mem.pa); dma_mem.va = NULL; - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } sd_entry->u.pd_table.pd_entry = vmem->va; @@ -549,10 +540,9 @@ enum irdma_status_code irdma_add_sd_table_entry(struct irdma_hw *hw, * aligned on 4K boundary and zeroed memory. * 2. It should be 4K in size. */ -enum irdma_status_code irdma_add_pd_table_entry(struct irdma_sc_dev *dev, - struct irdma_hmc_info *hmc_info, - u32 pd_index, - struct irdma_dma_mem *rsrc_pg) +int irdma_add_pd_table_entry(struct irdma_sc_dev *dev, + struct irdma_hmc_info *hmc_info, u32 pd_index, + struct irdma_dma_mem *rsrc_pg) { struct irdma_hmc_pd_table *pd_table; struct irdma_hmc_pd_entry *pd_entry; @@ -563,7 +553,7 @@ enum irdma_status_code irdma_add_pd_table_entry(struct irdma_sc_dev *dev, u64 page_desc; if (pd_index / IRDMA_HMC_PD_CNT_IN_SD >= hmc_info->sd_table.sd_cnt) - return IRDMA_ERR_INVALID_PAGE_DESC_INDEX; + return -EINVAL; sd_idx = (pd_index / IRDMA_HMC_PD_CNT_IN_SD); if (hmc_info->sd_table.sd_entry[sd_idx].entry_type != @@ -584,7 +574,7 @@ enum irdma_status_code irdma_add_pd_table_entry(struct irdma_sc_dev *dev, page->size, &page->pa, GFP_KERNEL); if (!page->va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; pd_entry->rsrc_pg = false; } @@ -621,9 +611,8 @@ enum irdma_status_code irdma_add_pd_table_entry(struct irdma_sc_dev *dev, * 1. Caller can deallocate the memory used by backing storage after this * function returns. */ -enum irdma_status_code irdma_remove_pd_bp(struct irdma_sc_dev *dev, - struct irdma_hmc_info *hmc_info, - u32 idx) +int irdma_remove_pd_bp(struct irdma_sc_dev *dev, + struct irdma_hmc_info *hmc_info, u32 idx) { struct irdma_hmc_pd_entry *pd_entry; struct irdma_hmc_pd_table *pd_table; @@ -635,11 +624,11 @@ enum irdma_status_code irdma_remove_pd_bp(struct irdma_sc_dev *dev, sd_idx = idx / IRDMA_HMC_PD_CNT_IN_SD; rel_pd_idx = idx % IRDMA_HMC_PD_CNT_IN_SD; if (sd_idx >= hmc_info->sd_table.sd_cnt) - return IRDMA_ERR_INVALID_PAGE_DESC_INDEX; + return -EINVAL; sd_entry = &hmc_info->sd_table.sd_entry[sd_idx]; if (sd_entry->entry_type != IRDMA_SD_TYPE_PAGED) - return IRDMA_ERR_INVALID_SD_TYPE; + return -EINVAL; pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table; pd_entry = &pd_table->pd_entry[rel_pd_idx]; @@ -656,7 +645,7 @@ enum irdma_status_code irdma_remove_pd_bp(struct irdma_sc_dev *dev, if (!pd_entry->rsrc_pg) { mem = &pd_entry->bp.addr; if (!mem || !mem->va) - return IRDMA_ERR_PARAM; + return -EINVAL; dma_free_coherent(dev->hw->device, mem->size, mem->va, mem->pa); @@ -673,14 +662,13 @@ enum irdma_status_code irdma_remove_pd_bp(struct irdma_sc_dev *dev, * @hmc_info: pointer to the HMC configuration information structure * @idx: the page index */ -enum irdma_status_code irdma_prep_remove_sd_bp(struct irdma_hmc_info *hmc_info, - u32 idx) +int irdma_prep_remove_sd_bp(struct irdma_hmc_info *hmc_info, u32 idx) { struct irdma_hmc_sd_entry *sd_entry; sd_entry = &hmc_info->sd_table.sd_entry[idx]; if (--sd_entry->u.bp.use_cnt) - return IRDMA_ERR_NOT_READY; + return -EBUSY; hmc_info->sd_table.use_cnt--; sd_entry->valid = false; @@ -693,15 +681,14 @@ enum irdma_status_code irdma_prep_remove_sd_bp(struct irdma_hmc_info *hmc_info, * @hmc_info: pointer to the HMC configuration information structure * @idx: segment descriptor index to find the relevant page descriptor */ -enum irdma_status_code -irdma_prep_remove_pd_page(struct irdma_hmc_info *hmc_info, u32 idx) +int irdma_prep_remove_pd_page(struct irdma_hmc_info *hmc_info, u32 idx) { struct irdma_hmc_sd_entry *sd_entry; sd_entry = &hmc_info->sd_table.sd_entry[idx]; if (sd_entry->u.pd_table.use_cnt) - return IRDMA_ERR_NOT_READY; + return -EBUSY; sd_entry->valid = false; hmc_info->sd_table.use_cnt--; diff --git a/drivers/infiniband/hw/irdma/hmc.h b/drivers/infiniband/hw/irdma/hmc.h index e2139c788b1b..f5c5dacc7021 100644 --- a/drivers/infiniband/hw/irdma/hmc.h +++ b/drivers/infiniband/hw/irdma/hmc.h @@ -141,40 +141,29 @@ struct irdma_hmc_del_obj_info { bool privileged; }; -enum irdma_status_code irdma_copy_dma_mem(struct irdma_hw *hw, void *dest_buf, - struct irdma_dma_mem *src_mem, - u64 src_offset, u64 size); -enum irdma_status_code -irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev, - struct irdma_hmc_create_obj_info *info); -enum irdma_status_code irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev, - struct irdma_hmc_del_obj_info *info, - bool reset); -enum irdma_status_code irdma_hmc_sd_one(struct irdma_sc_dev *dev, u8 hmc_fn_id, - u64 pa, u32 sd_idx, - enum irdma_sd_entry_type type, - bool setsd); -enum irdma_status_code -irdma_update_sds_noccq(struct irdma_sc_dev *dev, - struct irdma_update_sds_info *info); +int irdma_copy_dma_mem(struct irdma_hw *hw, void *dest_buf, + struct irdma_dma_mem *src_mem, u64 src_offset, u64 size); +int irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev, + struct irdma_hmc_create_obj_info *info); +int irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev, + struct irdma_hmc_del_obj_info *info, bool reset); +int irdma_hmc_sd_one(struct irdma_sc_dev *dev, u8 hmc_fn_id, u64 pa, u32 sd_idx, + enum irdma_sd_entry_type type, + bool setsd); +int irdma_update_sds_noccq(struct irdma_sc_dev *dev, + struct irdma_update_sds_info *info); struct irdma_vfdev *irdma_vfdev_from_fpm(struct irdma_sc_dev *dev, u8 hmc_fn_id); struct irdma_hmc_info *irdma_vf_hmcinfo_from_fpm(struct irdma_sc_dev *dev, u8 hmc_fn_id); -enum irdma_status_code irdma_add_sd_table_entry(struct irdma_hw *hw, - struct irdma_hmc_info *hmc_info, - u32 sd_index, - enum irdma_sd_entry_type type, - u64 direct_mode_sz); -enum irdma_status_code irdma_add_pd_table_entry(struct irdma_sc_dev *dev, - struct irdma_hmc_info *hmc_info, - u32 pd_index, - struct irdma_dma_mem *rsrc_pg); -enum irdma_status_code irdma_remove_pd_bp(struct irdma_sc_dev *dev, - struct irdma_hmc_info *hmc_info, - u32 idx); -enum irdma_status_code irdma_prep_remove_sd_bp(struct irdma_hmc_info *hmc_info, - u32 idx); -enum irdma_status_code -irdma_prep_remove_pd_page(struct irdma_hmc_info *hmc_info, u32 idx); +int irdma_add_sd_table_entry(struct irdma_hw *hw, + struct irdma_hmc_info *hmc_info, u32 sd_index, + enum irdma_sd_entry_type type, u64 direct_mode_sz); +int irdma_add_pd_table_entry(struct irdma_sc_dev *dev, + struct irdma_hmc_info *hmc_info, u32 pd_index, + struct irdma_dma_mem *rsrc_pg); +int irdma_remove_pd_bp(struct irdma_sc_dev *dev, + struct irdma_hmc_info *hmc_info, u32 idx); +int irdma_prep_remove_sd_bp(struct irdma_hmc_info *hmc_info, u32 idx); +int irdma_prep_remove_pd_page(struct irdma_hmc_info *hmc_info, u32 idx); #endif /* IRDMA_HMC_H */ diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index b4c657f5f2f9..ab246447520b 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -61,7 +61,7 @@ static void irdma_iwarp_ce_handler(struct irdma_sc_cq *iwcq) struct irdma_cq *cq = iwcq->back_cq; if (!cq->user_mode) - cq->armed = false; + atomic_set(&cq->armed, 0); if (cq->ibcq.comp_handler) cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } @@ -75,12 +75,12 @@ static void irdma_puda_ce_handler(struct irdma_pci_f *rf, struct irdma_sc_cq *cq) { struct irdma_sc_dev *dev = &rf->sc_dev; - enum irdma_status_code status; u32 compl_error; + int status; do { status = irdma_puda_poll_cmpl(dev, cq, &compl_error); - if (status == IRDMA_ERR_Q_EMPTY) + if (status == -ENOENT) break; if (status) { ibdev_dbg(to_ibdev(dev), "ERR: puda status = %d\n", status); @@ -138,59 +138,68 @@ static void irdma_set_flush_fields(struct irdma_sc_qp *qp, qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC; switch (info->ae_id) { - case IRDMA_AE_AMP_UNALLOCATED_STAG: case IRDMA_AE_AMP_BOUNDS_VIOLATION: case IRDMA_AE_AMP_INVALID_STAG: - qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR; - fallthrough; + case IRDMA_AE_AMP_RIGHTS_VIOLATION: + case IRDMA_AE_AMP_UNALLOCATED_STAG: case IRDMA_AE_AMP_BAD_PD: - case IRDMA_AE_UDA_XMIT_BAD_PD: + case IRDMA_AE_AMP_BAD_QP: + case IRDMA_AE_AMP_BAD_STAG_KEY: + case IRDMA_AE_AMP_BAD_STAG_INDEX: + case IRDMA_AE_AMP_TO_WRAP: + case IRDMA_AE_PRIV_OPERATION_DENIED: qp->flush_code = FLUSH_PROT_ERR; + qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR; break; - case IRDMA_AE_AMP_BAD_QP: + case IRDMA_AE_UDA_XMIT_BAD_PD: case IRDMA_AE_WQE_UNEXPECTED_OPCODE: qp->flush_code = FLUSH_LOC_QP_OP_ERR; + qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC; + break; + case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG: + case IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT: + case IRDMA_AE_UDA_L4LEN_INVALID: + case IRDMA_AE_DDP_UBE_INVALID_MO: + case IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER: + qp->flush_code = FLUSH_LOC_LEN_ERR; + qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC; break; - case IRDMA_AE_AMP_BAD_STAG_KEY: - case IRDMA_AE_AMP_BAD_STAG_INDEX: - case IRDMA_AE_AMP_TO_WRAP: - case IRDMA_AE_AMP_RIGHTS_VIOLATION: case IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS: - case IRDMA_AE_PRIV_OPERATION_DENIED: - case IRDMA_AE_IB_INVALID_REQUEST: case IRDMA_AE_IB_REMOTE_ACCESS_ERROR: qp->flush_code = FLUSH_REM_ACCESS_ERR; qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR; break; case IRDMA_AE_LLP_SEGMENT_TOO_SMALL: - case IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER: - case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG: - case IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT: - case IRDMA_AE_UDA_L4LEN_INVALID: + case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR: case IRDMA_AE_ROCE_RSP_LENGTH_ERROR: - qp->flush_code = FLUSH_LOC_LEN_ERR; + case IRDMA_AE_IB_REMOTE_OP_ERROR: + qp->flush_code = FLUSH_REM_OP_ERR; + qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC; break; case IRDMA_AE_LCE_QP_CATASTROPHIC: qp->flush_code = FLUSH_FATAL_ERR; + qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC; break; - case IRDMA_AE_DDP_UBE_INVALID_MO: case IRDMA_AE_IB_RREQ_AND_Q1_FULL: - case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR: qp->flush_code = FLUSH_GENERAL_ERR; break; case IRDMA_AE_LLP_TOO_MANY_RETRIES: qp->flush_code = FLUSH_RETRY_EXC_ERR; + qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC; break; case IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS: case IRDMA_AE_AMP_MWBIND_BIND_DISABLED: case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS: qp->flush_code = FLUSH_MW_BIND_ERR; + qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR; break; - case IRDMA_AE_IB_REMOTE_OP_ERROR: - qp->flush_code = FLUSH_REM_OP_ERR; + case IRDMA_AE_IB_INVALID_REQUEST: + qp->flush_code = FLUSH_REM_INV_REQ_ERR; + qp->event_type = IRDMA_QP_EVENT_REQ_ERR; break; default: - qp->flush_code = FLUSH_FATAL_ERR; + qp->flush_code = FLUSH_GENERAL_ERR; + qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC; break; } } @@ -257,10 +266,6 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) iwqp->last_aeq = info->ae_id; spin_unlock_irqrestore(&iwqp->lock, flags); ctx_info = &iwqp->ctx_info; - if (rdma_protocol_roce(&iwqp->iwdev->ibdev, 1)) - ctx_info->roce_info->err_rq_idx_valid = true; - else - ctx_info->iwarp_info->err_rq_idx_valid = true; } else { if (info->ae_id != IRDMA_AE_CQ_OPERATION_ERROR) continue; @@ -370,16 +375,12 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) case IRDMA_AE_LCE_FUNCTION_CATASTROPHIC: case IRDMA_AE_LCE_CQ_CATASTROPHIC: case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG: - if (rdma_protocol_roce(&iwdev->ibdev, 1)) - ctx_info->roce_info->err_rq_idx_valid = false; - else - ctx_info->iwarp_info->err_rq_idx_valid = false; - fallthrough; default: - ibdev_err(&iwdev->ibdev, "abnormal ae_id = 0x%x bool qp=%d qp_id = %d\n", - info->ae_id, info->qp, info->qp_cq_id); + ibdev_err(&iwdev->ibdev, "abnormal ae_id = 0x%x bool qp=%d qp_id = %d, ae_src=%d\n", + info->ae_id, info->qp, info->qp_cq_id, info->ae_src); if (rdma_protocol_roce(&iwdev->ibdev, 1)) { - if (!info->sq && ctx_info->roce_info->err_rq_idx_valid) { + ctx_info->roce_info->err_rq_idx_valid = info->rq; + if (info->rq) { ctx_info->roce_info->err_rq_idx = info->wqe_idx; irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info); @@ -388,7 +389,8 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) irdma_cm_disconn(iwqp); break; } - if (!info->sq && ctx_info->iwarp_info->err_rq_idx_valid) { + ctx_info->iwarp_info->err_rq_idx_valid = info->rq; + if (info->rq) { ctx_info->iwarp_info->err_rq_idx = info->wqe_idx; ctx_info->tcp_info_valid = false; ctx_info->iwarp_info_valid = true; @@ -456,7 +458,7 @@ static void irdma_ceq_dpc(struct tasklet_struct *t) * Allocate iwdev msix table and copy the msix info to the table * Return 0 if successful, otherwise return error */ -static enum irdma_status_code irdma_save_msix_info(struct irdma_pci_f *rf) +static int irdma_save_msix_info(struct irdma_pci_f *rf) { struct irdma_qvlist_info *iw_qvlist; struct irdma_qv_info *iw_qvinfo; @@ -466,13 +468,13 @@ static enum irdma_status_code irdma_save_msix_info(struct irdma_pci_f *rf) size_t size; if (!rf->msix_count) - return IRDMA_ERR_NO_INTR; + return -EINVAL; size = sizeof(struct irdma_msix_vector) * rf->msix_count; size += struct_size(iw_qvlist, qv_info, rf->msix_count); rf->iw_msixtbl = kzalloc(size, GFP_KERNEL); if (!rf->iw_msixtbl) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; rf->iw_qvlist = (struct irdma_qvlist_info *) (&rf->iw_msixtbl[rf->msix_count]); @@ -550,7 +552,7 @@ static void irdma_destroy_irq(struct irdma_pci_f *rf, struct irdma_sc_dev *dev = &rf->sc_dev; dev->irq_ops->irdma_dis_irq(dev, msix_vec->idx); - irq_set_affinity_hint(msix_vec->irq, NULL); + irq_update_affinity_hint(msix_vec->irq, NULL); free_irq(msix_vec->irq, dev_id); } @@ -564,9 +566,9 @@ static void irdma_destroy_irq(struct irdma_pci_f *rf, */ static void irdma_destroy_cqp(struct irdma_pci_f *rf, bool free_hwcqp) { - enum irdma_status_code status = 0; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_cqp *cqp = &rf->cqp; + int status = 0; if (rf->cqp_cmpl_wq) destroy_workqueue(rf->cqp_cmpl_wq); @@ -606,9 +608,9 @@ static void irdma_destroy_virt_aeq(struct irdma_pci_f *rf) */ static void irdma_destroy_aeq(struct irdma_pci_f *rf) { - enum irdma_status_code status = IRDMA_ERR_NOT_READY; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_aeq *aeq = &rf->aeq; + int status = -EBUSY; if (!rf->msix_shared) { rf->sc_dev.irq_ops->irdma_cfg_aeq(&rf->sc_dev, rf->iw_msixtbl->idx, false); @@ -642,8 +644,8 @@ exit: */ static void irdma_destroy_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq) { - enum irdma_status_code status; struct irdma_sc_dev *dev = &rf->sc_dev; + int status; if (rf->reset) goto exit; @@ -733,7 +735,7 @@ static void irdma_destroy_ccq(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_ccq *ccq = &rf->ccq; - enum irdma_status_code status = 0; + int status = 0; if (!rf->reset) status = irdma_sc_ccq_destroy(dev->ccq, 0, true); @@ -796,9 +798,8 @@ static void irdma_del_hmc_objects(struct irdma_sc_dev *dev, * @dev: hardware control device structure * @info: information for the hmc object to create */ -static enum irdma_status_code -irdma_create_hmc_obj_type(struct irdma_sc_dev *dev, - struct irdma_hmc_create_obj_info *info) +static int irdma_create_hmc_obj_type(struct irdma_sc_dev *dev, + struct irdma_hmc_create_obj_info *info) { return irdma_sc_create_hmc_obj(dev, info); } @@ -812,13 +813,12 @@ irdma_create_hmc_obj_type(struct irdma_sc_dev *dev, * Create the device hmc objects and allocate hmc pages * Return 0 if successful, otherwise clean up and return error */ -static enum irdma_status_code -irdma_create_hmc_objs(struct irdma_pci_f *rf, bool privileged, enum irdma_vers vers) +static int irdma_create_hmc_objs(struct irdma_pci_f *rf, bool privileged, + enum irdma_vers vers) { struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_hmc_create_obj_info info = {}; - enum irdma_status_code status = 0; - int i; + int i, status = 0; info.hmc_info = dev->hmc_info; info.privileged = privileged; @@ -868,9 +868,9 @@ irdma_create_hmc_objs(struct irdma_pci_f *rf, bool privileged, enum irdma_vers v * update the memptr to point to the new aligned memory * Return 0 if successful, otherwise return no memory error */ -static enum irdma_status_code -irdma_obj_aligned_mem(struct irdma_pci_f *rf, struct irdma_dma_mem *memptr, - u32 size, u32 mask) +static int irdma_obj_aligned_mem(struct irdma_pci_f *rf, + struct irdma_dma_mem *memptr, u32 size, + u32 mask) { unsigned long va, newva; unsigned long extra; @@ -884,7 +884,7 @@ irdma_obj_aligned_mem(struct irdma_pci_f *rf, struct irdma_dma_mem *memptr, memptr->pa = rf->obj_next.pa + extra; memptr->size = size; if (((u8 *)memptr->va + size) > ((u8 *)rf->obj_mem.va + rf->obj_mem.size)) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; rf->obj_next.va = (u8 *)memptr->va + size; rf->obj_next.pa = memptr->pa + size; @@ -899,25 +899,24 @@ irdma_obj_aligned_mem(struct irdma_pci_f *rf, struct irdma_dma_mem *memptr, * Return 0, if the cqp and all the resources associated with it * are successfully created, otherwise return error */ -static enum irdma_status_code irdma_create_cqp(struct irdma_pci_f *rf) +static int irdma_create_cqp(struct irdma_pci_f *rf) { - enum irdma_status_code status; u32 sqsize = IRDMA_CQP_SW_SQSIZE_2048; struct irdma_dma_mem mem; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_cqp_init_info cqp_init_info = {}; struct irdma_cqp *cqp = &rf->cqp; u16 maj_err, min_err; - int i; + int i, status; cqp->cqp_requests = kcalloc(sqsize, sizeof(*cqp->cqp_requests), GFP_KERNEL); if (!cqp->cqp_requests) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp->scratch_array = kcalloc(sqsize, sizeof(*cqp->scratch_array), GFP_KERNEL); if (!cqp->scratch_array) { kfree(cqp->cqp_requests); - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } dev->cqp = &cqp->sc_cqp; @@ -929,7 +928,7 @@ static enum irdma_status_code irdma_create_cqp(struct irdma_pci_f *rf) if (!cqp->sq.va) { kfree(cqp->scratch_array); kfree(cqp->cqp_requests); - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } status = irdma_obj_aligned_mem(rf, &mem, sizeof(struct irdma_cqp_ctx), @@ -999,12 +998,12 @@ exit: * Return 0, if the ccq and the resources associated with it * are successfully created, otherwise return error */ -static enum irdma_status_code irdma_create_ccq(struct irdma_pci_f *rf) +static int irdma_create_ccq(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; - enum irdma_status_code status; struct irdma_ccq_init_info info = {}; struct irdma_ccq *ccq = &rf->ccq; + int status; dev->ccq = &ccq->sc_cq; dev->ccq->dev = dev; @@ -1015,7 +1014,7 @@ static enum irdma_status_code irdma_create_ccq(struct irdma_pci_f *rf) ccq->mem_cq.va = dma_alloc_coherent(dev->hw->device, ccq->mem_cq.size, &ccq->mem_cq.pa, GFP_KERNEL); if (!ccq->mem_cq.va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; status = irdma_obj_aligned_mem(rf, &ccq->shadow_area, ccq->shadow_area.size, @@ -1054,9 +1053,9 @@ exit: * Allocate a mac ip entry and add it to the hw table Return 0 * if successful, otherwise return error */ -static enum irdma_status_code irdma_alloc_set_mac(struct irdma_device *iwdev) +static int irdma_alloc_set_mac(struct irdma_device *iwdev) { - enum irdma_status_code status; + int status; status = irdma_alloc_local_mac_entry(iwdev->rf, &iwdev->mac_ip_table_idx); @@ -1082,9 +1081,8 @@ static enum irdma_status_code irdma_alloc_set_mac(struct irdma_device *iwdev) * Allocate interrupt resources and enable irq handling * Return 0 if successful, otherwise return error */ -static enum irdma_status_code -irdma_cfg_ceq_vector(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, - u32 ceq_id, struct irdma_msix_vector *msix_vec) +static int irdma_cfg_ceq_vector(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, + u32 ceq_id, struct irdma_msix_vector *msix_vec) { int status; @@ -1100,10 +1098,10 @@ irdma_cfg_ceq_vector(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, } cpumask_clear(&msix_vec->mask); cpumask_set_cpu(msix_vec->cpu_affinity, &msix_vec->mask); - irq_set_affinity_hint(msix_vec->irq, &msix_vec->mask); + irq_update_affinity_hint(msix_vec->irq, &msix_vec->mask); if (status) { ibdev_dbg(&rf->iwdev->ibdev, "ERR: ceq irq config fail\n"); - return IRDMA_ERR_CFG; + return status; } msix_vec->ceq_id = ceq_id; @@ -1119,7 +1117,7 @@ irdma_cfg_ceq_vector(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, * Allocate interrupt resources and enable irq handling * Return 0 if successful, otherwise return error */ -static enum irdma_status_code irdma_cfg_aeq_vector(struct irdma_pci_f *rf) +static int irdma_cfg_aeq_vector(struct irdma_pci_f *rf) { struct irdma_msix_vector *msix_vec = rf->iw_msixtbl; u32 ret = 0; @@ -1131,7 +1129,7 @@ static enum irdma_status_code irdma_cfg_aeq_vector(struct irdma_pci_f *rf) } if (ret) { ibdev_dbg(&rf->iwdev->ibdev, "ERR: aeq irq config fail\n"); - return IRDMA_ERR_CFG; + return -EINVAL; } rf->sc_dev.irq_ops->irdma_cfg_aeq(&rf->sc_dev, msix_vec->idx, true); @@ -1149,12 +1147,10 @@ static enum irdma_status_code irdma_cfg_aeq_vector(struct irdma_pci_f *rf) * Return 0, if the ceq and the resources associated with it * are successfully created, otherwise return error */ -static enum irdma_status_code irdma_create_ceq(struct irdma_pci_f *rf, - struct irdma_ceq *iwceq, - u32 ceq_id, - struct irdma_sc_vsi *vsi) +static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, + u32 ceq_id, struct irdma_sc_vsi *vsi) { - enum irdma_status_code status; + int status; struct irdma_ceq_init_info info = {}; struct irdma_sc_dev *dev = &rf->sc_dev; u64 scratch; @@ -1169,7 +1165,7 @@ static enum irdma_status_code irdma_create_ceq(struct irdma_pci_f *rf, iwceq->mem.va = dma_alloc_coherent(dev->hw->device, iwceq->mem.size, &iwceq->mem.pa, GFP_KERNEL); if (!iwceq->mem.va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; info.ceq_id = ceq_id; info.ceqe_base = iwceq->mem.va; @@ -1205,18 +1201,18 @@ static enum irdma_status_code irdma_create_ceq(struct irdma_pci_f *rf, * Create the ceq 0 and configure it's msix interrupt vector * Return 0, if successfully set up, otherwise return error */ -static enum irdma_status_code irdma_setup_ceq_0(struct irdma_pci_f *rf) +static int irdma_setup_ceq_0(struct irdma_pci_f *rf) { struct irdma_ceq *iwceq; struct irdma_msix_vector *msix_vec; u32 i; - enum irdma_status_code status = 0; + int status = 0; u32 num_ceqs; num_ceqs = min(rf->msix_count, rf->sc_dev.hmc_fpm_misc.max_ceqs); rf->ceqlist = kcalloc(num_ceqs, sizeof(*rf->ceqlist), GFP_KERNEL); if (!rf->ceqlist) { - status = IRDMA_ERR_NO_MEMORY; + status = -ENOMEM; goto exit; } @@ -1262,14 +1258,13 @@ exit: * Create the ceq's and configure their msix interrupt vectors * Return 0, if ceqs are successfully set up, otherwise return error */ -static enum irdma_status_code irdma_setup_ceqs(struct irdma_pci_f *rf, - struct irdma_sc_vsi *vsi) +static int irdma_setup_ceqs(struct irdma_pci_f *rf, struct irdma_sc_vsi *vsi) { u32 i; u32 ceq_id; struct irdma_ceq *iwceq; struct irdma_msix_vector *msix_vec; - enum irdma_status_code status; + int status; u32 num_ceqs; num_ceqs = min(rf->msix_count, rf->sc_dev.hmc_fpm_misc.max_ceqs); @@ -1303,22 +1298,21 @@ del_ceqs: return status; } -static enum irdma_status_code irdma_create_virt_aeq(struct irdma_pci_f *rf, - u32 size) +static int irdma_create_virt_aeq(struct irdma_pci_f *rf, u32 size) { - enum irdma_status_code status = IRDMA_ERR_NO_MEMORY; struct irdma_aeq *aeq = &rf->aeq; dma_addr_t *pg_arr; u32 pg_cnt; + int status; if (rf->rdma_ver < IRDMA_GEN_2) - return IRDMA_NOT_SUPPORTED; + return -EOPNOTSUPP; aeq->mem.size = sizeof(struct irdma_sc_aeqe) * size; aeq->mem.va = vzalloc(aeq->mem.size); if (!aeq->mem.va) - return status; + return -ENOMEM; pg_cnt = DIV_ROUND_UP(aeq->mem.size, PAGE_SIZE); status = irdma_get_pble(rf->pble_rsrc, &aeq->palloc, pg_cnt, true); @@ -1345,15 +1339,15 @@ static enum irdma_status_code irdma_create_virt_aeq(struct irdma_pci_f *rf, * Return 0, if the aeq and the resources associated with it * are successfully created, otherwise return error */ -static enum irdma_status_code irdma_create_aeq(struct irdma_pci_f *rf) +static int irdma_create_aeq(struct irdma_pci_f *rf) { - enum irdma_status_code status; struct irdma_aeq_init_info info = {}; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_aeq *aeq = &rf->aeq; struct irdma_hmc_info *hmc_info = rf->sc_dev.hmc_info; u32 aeq_size; u8 multiplier = (rf->protocol_used == IRDMA_IWARP_PROTOCOL_ONLY) ? 2 : 1; + int status; aeq_size = multiplier * hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt + hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt; @@ -1412,10 +1406,10 @@ err: * Create the aeq and configure its msix interrupt vector * Return 0 if successful, otherwise return error */ -static enum irdma_status_code irdma_setup_aeq(struct irdma_pci_f *rf) +static int irdma_setup_aeq(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; - enum irdma_status_code status; + int status; status = irdma_create_aeq(rf); if (status) @@ -1439,10 +1433,10 @@ static enum irdma_status_code irdma_setup_aeq(struct irdma_pci_f *rf) * * Return 0 if successful, otherwise return error */ -static enum irdma_status_code irdma_initialize_ilq(struct irdma_device *iwdev) +static int irdma_initialize_ilq(struct irdma_device *iwdev) { struct irdma_puda_rsrc_info info = {}; - enum irdma_status_code status; + int status; info.type = IRDMA_PUDA_RSRC_TYPE_ILQ; info.cq_id = 1; @@ -1469,10 +1463,10 @@ static enum irdma_status_code irdma_initialize_ilq(struct irdma_device *iwdev) * * Return 0 if successful, otherwise return error */ -static enum irdma_status_code irdma_initialize_ieq(struct irdma_device *iwdev) +static int irdma_initialize_ieq(struct irdma_device *iwdev) { struct irdma_puda_rsrc_info info = {}; - enum irdma_status_code status; + int status; info.type = IRDMA_PUDA_RSRC_TYPE_IEQ; info.cq_id = 2; @@ -1515,15 +1509,12 @@ void irdma_reinitialize_ieq(struct irdma_sc_vsi *vsi) * the hmc objects and create the objects * Return 0 if successful, otherwise return error */ -static enum irdma_status_code irdma_hmc_setup(struct irdma_pci_f *rf) +static int irdma_hmc_setup(struct irdma_pci_f *rf) { - enum irdma_status_code status; + int status; u32 qpcnt; - if (rf->rdma_ver == IRDMA_GEN_1) - qpcnt = rsrc_limits_table[rf->limits_sel].qplimit * 2; - else - qpcnt = rsrc_limits_table[rf->limits_sel].qplimit; + qpcnt = rsrc_limits_table[rf->limits_sel].qplimit; rf->sd_type = IRDMA_SD_TYPE_DIRECT; status = irdma_cfg_fpm_val(&rf->sc_dev, qpcnt); @@ -1551,7 +1542,7 @@ static void irdma_del_init_mem(struct irdma_pci_f *rf) rf->obj_mem.pa); rf->obj_mem.va = NULL; if (rf->rdma_ver != IRDMA_GEN_1) { - kfree(rf->allocated_ws_nodes); + bitmap_free(rf->allocated_ws_nodes); rf->allocated_ws_nodes = NULL; } kfree(rf->ceqlist); @@ -1570,9 +1561,9 @@ static void irdma_del_init_mem(struct irdma_pci_f *rf) * Return 0 if successful, otherwise clean up the resources * and return error */ -static enum irdma_status_code irdma_initialize_dev(struct irdma_pci_f *rf) +static int irdma_initialize_dev(struct irdma_pci_f *rf) { - enum irdma_status_code status; + int status; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_device_init_info info = {}; struct irdma_dma_mem mem; @@ -1584,7 +1575,7 @@ static enum irdma_status_code irdma_initialize_dev(struct irdma_pci_f *rf) rf->hmc_info_mem = kzalloc(size, GFP_KERNEL); if (!rf->hmc_info_mem) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; rf->pble_rsrc = (struct irdma_hmc_pble_rsrc *)rf->hmc_info_mem; dev->hmc_info = &rf->hw.hmc; @@ -1608,7 +1599,7 @@ static enum irdma_status_code irdma_initialize_dev(struct irdma_pci_f *rf) info.fpm_commit_buf = mem.va; info.bar0 = rf->hw.hw_addr; - info.hmc_fn_id = PCI_FUNC(rf->pcidev->devfn); + info.hmc_fn_id = rf->pf_id; info.hw = &rf->hw; status = irdma_sc_dev_init(rf->rdma_ver, &rf->sc_dev, &info); if (status) @@ -1667,9 +1658,9 @@ void irdma_rt_deinit_hw(struct irdma_device *iwdev) destroy_workqueue(iwdev->cleanup_wq); } -static enum irdma_status_code irdma_setup_init_state(struct irdma_pci_f *rf) +static int irdma_setup_init_state(struct irdma_pci_f *rf) { - enum irdma_status_code status; + int status; status = irdma_save_msix_info(rf); if (status) @@ -1680,7 +1671,7 @@ static enum irdma_status_code irdma_setup_init_state(struct irdma_pci_f *rf) rf->obj_mem.va = dma_alloc_coherent(rf->hw.device, rf->obj_mem.size, &rf->obj_mem.pa, GFP_KERNEL); if (!rf->obj_mem.va) { - status = IRDMA_ERR_NO_MEMORY; + status = -ENOMEM; goto clean_msixtbl; } @@ -1709,14 +1700,14 @@ clean_msixtbl: */ static void irdma_get_used_rsrc(struct irdma_device *iwdev) { - iwdev->rf->used_pds = find_next_zero_bit(iwdev->rf->allocated_pds, - iwdev->rf->max_pd, 0); - iwdev->rf->used_qps = find_next_zero_bit(iwdev->rf->allocated_qps, - iwdev->rf->max_qp, 0); - iwdev->rf->used_cqs = find_next_zero_bit(iwdev->rf->allocated_cqs, - iwdev->rf->max_cq, 0); - iwdev->rf->used_mrs = find_next_zero_bit(iwdev->rf->allocated_mrs, - iwdev->rf->max_mr, 0); + iwdev->rf->used_pds = find_first_zero_bit(iwdev->rf->allocated_pds, + iwdev->rf->max_pd); + iwdev->rf->used_qps = find_first_zero_bit(iwdev->rf->allocated_qps, + iwdev->rf->max_qp); + iwdev->rf->used_cqs = find_first_zero_bit(iwdev->rf->allocated_cqs, + iwdev->rf->max_cq); + iwdev->rf->used_mrs = find_first_zero_bit(iwdev->rf->allocated_mrs, + iwdev->rf->max_mr); } void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf) @@ -1763,14 +1754,14 @@ void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf) * Create device queues ILQ, IEQ, CEQs and PBLEs. Setup irdma * device resource objects. */ -enum irdma_status_code irdma_rt_init_hw(struct irdma_device *iwdev, - struct irdma_l2params *l2params) +int irdma_rt_init_hw(struct irdma_device *iwdev, + struct irdma_l2params *l2params) { struct irdma_pci_f *rf = iwdev->rf; struct irdma_sc_dev *dev = &rf->sc_dev; - enum irdma_status_code status; struct irdma_vsi_init_info vsi_info = {}; struct irdma_vsi_stats_info stats_info = {}; + int status; vsi_info.dev = dev; vsi_info.back_vsi = iwdev; @@ -1788,7 +1779,7 @@ enum irdma_status_code irdma_rt_init_hw(struct irdma_device *iwdev, stats_info.pestat = kzalloc(sizeof(*stats_info.pestat), GFP_KERNEL); if (!stats_info.pestat) { irdma_cleanup_cm_core(&iwdev->cm_core); - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } stats_info.fcn_id = dev->hmc_fn_id; status = irdma_vsi_stats_init(&iwdev->vsi, &stats_info); @@ -1835,10 +1826,6 @@ enum irdma_status_code irdma_rt_init_hw(struct irdma_device *iwdev, rf->rsrc_created = true; } - iwdev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | - IB_DEVICE_MEM_WINDOW | - IB_DEVICE_MEM_MGT_EXTENSIONS; - if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) irdma_alloc_set_mac(iwdev); irdma_add_ip(iwdev); @@ -1850,7 +1837,7 @@ enum irdma_status_code irdma_rt_init_hw(struct irdma_device *iwdev, iwdev->cleanup_wq = alloc_workqueue("irdma-cleanup-wq", WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE); if (!iwdev->cleanup_wq) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; irdma_get_used_rsrc(iwdev); init_waitqueue_head(&iwdev->suspend_wq); @@ -1870,10 +1857,10 @@ enum irdma_status_code irdma_rt_init_hw(struct irdma_device *iwdev, * * Create admin queues, HMC obejcts and RF resource objects */ -enum irdma_status_code irdma_ctrl_init_hw(struct irdma_pci_f *rf) +int irdma_ctrl_init_hw(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; - enum irdma_status_code status; + int status; do { status = irdma_setup_init_state(rf); if (status) @@ -1915,7 +1902,7 @@ enum irdma_status_code irdma_ctrl_init_hw(struct irdma_pci_f *rf) rf->cqp_cmpl_wq = alloc_ordered_workqueue("cqp_cmpl_wq", WQ_HIGHPRI | WQ_UNBOUND); if (!rf->cqp_cmpl_wq) { - status = IRDMA_ERR_NO_MEMORY; + status = -ENOMEM; break; } INIT_WORK(&rf->cqp_cmpl_work, cqp_compl_worker); @@ -1984,9 +1971,8 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf) u32 ret; if (rf->rdma_ver != IRDMA_GEN_1) { - rf->allocated_ws_nodes = - kcalloc(BITS_TO_LONGS(IRDMA_MAX_WS_NODES), - sizeof(unsigned long), GFP_KERNEL); + rf->allocated_ws_nodes = bitmap_zalloc(IRDMA_MAX_WS_NODES, + GFP_KERNEL); if (!rf->allocated_ws_nodes) return -ENOMEM; @@ -2035,7 +2021,7 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf) return 0; mem_rsrc_kzalloc_fail: - kfree(rf->allocated_ws_nodes); + bitmap_free(rf->allocated_ws_nodes); rf->allocated_ws_nodes = NULL; return ret; @@ -2202,11 +2188,11 @@ int irdma_add_local_mac_entry(struct irdma_pci_f *rf, const u8 *mac_addr, u16 id struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; cqp_info->post_sq = 1; @@ -2238,11 +2224,11 @@ int irdma_alloc_local_mac_entry(struct irdma_pci_f *rf, u16 *mac_tbl_idx) struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; - enum irdma_status_code status = 0; + int status = 0; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; cqp_info->cqp_cmd = IRDMA_OP_ALLOC_LOCAL_MAC_ENTRY; @@ -2264,18 +2250,17 @@ int irdma_alloc_local_mac_entry(struct irdma_pci_f *rf, u16 *mac_tbl_idx) * @accel_local_port: port for apbvt * @add_port: add ordelete port */ -static enum irdma_status_code -irdma_cqp_manage_apbvt_cmd(struct irdma_device *iwdev, u16 accel_local_port, - bool add_port) +static int irdma_cqp_manage_apbvt_cmd(struct irdma_device *iwdev, + u16 accel_local_port, bool add_port) { struct irdma_apbvt_info *info; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, add_port); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; info = &cqp_info->in.u.manage_apbvt_entry.info; @@ -2429,22 +2414,21 @@ static void irdma_send_syn_cqp_callback(struct irdma_cqp_request *cqp_request) * @cmnode: cmnode associated with connection * @wait: wait for completion */ -enum irdma_status_code -irdma_manage_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cminfo, - enum irdma_quad_entry_type etype, - enum irdma_quad_hash_manage_type mtype, void *cmnode, - bool wait) +int irdma_manage_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cminfo, + enum irdma_quad_entry_type etype, + enum irdma_quad_hash_manage_type mtype, void *cmnode, + bool wait) { struct irdma_qhash_table_info *info; - enum irdma_status_code status; struct irdma_cqp *iwcqp = &iwdev->rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_cm_node *cm_node = cmnode; + int status; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, wait); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; info = &cqp_info->in.u.manage_qhash_table_entry.info; @@ -2558,12 +2542,10 @@ static void irdma_hw_flush_wqes_callback(struct irdma_cqp_request *cqp_request) * @info: info for flush * @wait: flag wait for completion */ -enum irdma_status_code irdma_hw_flush_wqes(struct irdma_pci_f *rf, - struct irdma_sc_qp *qp, - struct irdma_qp_flush_info *info, - bool wait) +int irdma_hw_flush_wqes(struct irdma_pci_f *rf, struct irdma_sc_qp *qp, + struct irdma_qp_flush_info *info, bool wait) { - enum irdma_status_code status; + int status; struct irdma_qp_flush_info *hw_info; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; @@ -2571,7 +2553,7 @@ enum irdma_status_code irdma_hw_flush_wqes(struct irdma_pci_f *rf, cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, wait); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; if (!wait) @@ -2619,7 +2601,7 @@ enum irdma_status_code irdma_hw_flush_wqes(struct irdma_pci_f *rf, info->sq = true; new_req = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!new_req) { - status = IRDMA_ERR_NO_MEMORY; + status = -ENOMEM; goto put_cqp; } cqp_info = &new_req->info; @@ -2705,24 +2687,29 @@ void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask) info.sq = flush_mask & IRDMA_FLUSH_SQ; info.rq = flush_mask & IRDMA_FLUSH_RQ; - if (flush_mask & IRDMA_REFLUSH) { - if (info.sq) - iwqp->sc_qp.flush_sq = false; - if (info.rq) - iwqp->sc_qp.flush_rq = false; - } - /* Generate userflush errors in CQE */ info.sq_major_code = IRDMA_FLUSH_MAJOR_ERR; info.sq_minor_code = FLUSH_GENERAL_ERR; info.rq_major_code = IRDMA_FLUSH_MAJOR_ERR; info.rq_minor_code = FLUSH_GENERAL_ERR; info.userflushcode = true; - if (flush_code) { - if (info.sq && iwqp->sc_qp.sq_flush_code) - info.sq_minor_code = flush_code; - if (info.rq && iwqp->sc_qp.rq_flush_code) - info.rq_minor_code = flush_code; + + if (flush_mask & IRDMA_REFLUSH) { + if (info.sq) + iwqp->sc_qp.flush_sq = false; + if (info.rq) + iwqp->sc_qp.flush_rq = false; + } else { + if (flush_code) { + if (info.sq && iwqp->sc_qp.sq_flush_code) + info.sq_minor_code = flush_code; + if (info.rq && iwqp->sc_qp.rq_flush_code) + info.rq_minor_code = flush_code; + } + if (!iwqp->user_mode) + queue_delayed_work(iwqp->iwdev->cleanup_wq, + &iwqp->dwork_flush, + msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS)); } /* Issue flush */ diff --git a/drivers/infiniband/hw/irdma/i40iw_hw.c b/drivers/infiniband/hw/irdma/i40iw_hw.c index 64148ad8a604..50299f58b6b3 100644 --- a/drivers/infiniband/hw/irdma/i40iw_hw.c +++ b/drivers/infiniband/hw/irdma/i40iw_hw.c @@ -3,7 +3,6 @@ #include "osdep.h" #include "type.h" #include "i40iw_hw.h" -#include "status.h" #include "protos.h" static u32 i40iw_regs[IRDMA_MAX_REGS] = { @@ -202,6 +201,7 @@ void i40iw_init_hw(struct irdma_sc_dev *dev) dev->hw_attrs.uk_attrs.max_hw_read_sges = I40IW_MAX_SGE_RD; dev->hw_attrs.max_hw_device_pages = I40IW_MAX_PUSH_PAGE_COUNT; dev->hw_attrs.uk_attrs.max_hw_inline = I40IW_MAX_INLINE_DATA_SIZE; + dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M; dev->hw_attrs.max_hw_ird = I40IW_MAX_IRD_SIZE; dev->hw_attrs.max_hw_ord = I40IW_MAX_ORD_SIZE; dev->hw_attrs.max_hw_wqes = I40IW_MAX_WQ_ENTRIES; diff --git a/drivers/infiniband/hw/irdma/i40iw_if.c b/drivers/infiniband/hw/irdma/i40iw_if.c index d219f64b2c3d..4053ead32416 100644 --- a/drivers/infiniband/hw/irdma/i40iw_if.c +++ b/drivers/infiniband/hw/irdma/i40iw_if.c @@ -77,6 +77,7 @@ static void i40iw_fill_device_info(struct irdma_device *iwdev, struct i40e_info rf->rdma_ver = IRDMA_GEN_1; rf->gen_ops.request_reset = i40iw_request_reset; rf->pcidev = cdev_info->pcidev; + rf->pf_id = cdev_info->fid; rf->hw.hw_addr = cdev_info->hw_addr; rf->cdev = cdev_info; rf->msix_count = cdev_info->msix_count; @@ -138,7 +139,7 @@ static int i40iw_open(struct i40e_info *cdev_info, struct i40e_client *client) if (last_qset == IRDMA_NO_QSET) last_qset = qset; else if ((qset != last_qset) && (qset != IRDMA_NO_QSET)) - iwdev->dcb = true; + iwdev->dcb_vlan_mode = true; } if (irdma_rt_init_hw(iwdev, &l2params)) { @@ -198,7 +199,7 @@ static void i40iw_remove(struct auxiliary_device *aux_dev) aux_dev); struct i40e_info *cdev_info = i40e_adev->ldev; - return i40e_client_device_unregister(cdev_info); + i40e_client_device_unregister(cdev_info); } static const struct auxiliary_device_id i40iw_auxiliary_id_table[] = { diff --git a/drivers/infiniband/hw/irdma/icrdma_hw.c b/drivers/infiniband/hw/irdma/icrdma_hw.c index cf53b17510cd..5986fd906308 100644 --- a/drivers/infiniband/hw/irdma/icrdma_hw.c +++ b/drivers/infiniband/hw/irdma/icrdma_hw.c @@ -139,6 +139,7 @@ void icrdma_init_hw(struct irdma_sc_dev *dev) dev->cqp_db = dev->hw_regs[IRDMA_CQPDB]; dev->cq_ack_db = dev->hw_regs[IRDMA_CQACK]; dev->irq_ops = &icrdma_irq_ops; + dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M | SZ_1G; dev->hw_attrs.max_hw_ird = ICRDMA_MAX_IRD_SIZE; dev->hw_attrs.max_hw_ord = ICRDMA_MAX_ORD_SIZE; dev->hw_attrs.max_stat_inst = ICRDMA_MAX_STATS_COUNT; diff --git a/drivers/infiniband/hw/irdma/irdma.h b/drivers/infiniband/hw/irdma/irdma.h index 46c12334c735..4789e85d717b 100644 --- a/drivers/infiniband/hw/irdma/irdma.h +++ b/drivers/infiniband/hw/irdma/irdma.h @@ -127,6 +127,7 @@ struct irdma_hw_attrs { u64 max_hw_outbound_msg_size; u64 max_hw_inbound_msg_size; u64 max_mr_size; + u64 page_size_cap; u32 min_hw_qp_id; u32 min_hw_aeq_size; u32 max_hw_aeq_size; diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index 51a41359e0b4..514453777e07 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -79,6 +79,10 @@ static void irdma_fill_qos_info(struct irdma_l2params *l2params, } for (i = 0; i < IIDC_MAX_USER_PRIORITY; i++) l2params->up2tc[i] = qos_info->up2tc[i]; + if (qos_info->pfc_mode == IIDC_DSCP_PFC_MODE) { + l2params->dscp_mode = true; + memcpy(l2params->dscp_map, qos_info->dscp_map, sizeof(l2params->dscp_map)); + } } static void irdma_iidc_event_handler(struct ice_pf *pf, struct iidc_event *event) @@ -108,8 +112,9 @@ static void irdma_iidc_event_handler(struct ice_pf *pf, struct iidc_event *event l2params.tc_changed = true; ibdev_dbg(&iwdev->ibdev, "CLNT: TC Change\n"); ice_get_qos_params(pf, &qos_info); - iwdev->dcb = qos_info.num_tc > 1; irdma_fill_qos_info(&l2params, &qos_info); + if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY) + iwdev->dcb_vlan_mode = qos_info.num_tc > 1 && !l2params.dscp_mode; irdma_change_l2params(&iwdev->vsi, &l2params); } else if (*event->type & BIT(IIDC_EVENT_CRIT_ERR)) { ibdev_warn(&iwdev->ibdev, "ICE OICR event notification: oicr = 0x%08x\n", @@ -157,8 +162,8 @@ static void irdma_request_reset(struct irdma_pci_f *rf) * @vsi: vsi structure * @tc_node: Traffic class node */ -static enum irdma_status_code irdma_lan_register_qset(struct irdma_sc_vsi *vsi, - struct irdma_ws_node *tc_node) +static int irdma_lan_register_qset(struct irdma_sc_vsi *vsi, + struct irdma_ws_node *tc_node) { struct irdma_device *iwdev = vsi->back_vsi; struct ice_pf *pf = iwdev->rf->cdev; @@ -171,7 +176,7 @@ static enum irdma_status_code irdma_lan_register_qset(struct irdma_sc_vsi *vsi, ret = ice_add_rdma_qset(pf, &qset); if (ret) { ibdev_dbg(&iwdev->ibdev, "WS: LAN alloc_res for rdma qset failed.\n"); - return IRDMA_ERR_REG_QSET; + return ret; } tc_node->l2_sched_node_id = qset.teid; @@ -207,7 +212,7 @@ static void irdma_remove(struct auxiliary_device *aux_dev) struct iidc_auxiliary_dev, adev); struct ice_pf *pf = iidc_adev->pf; - struct irdma_device *iwdev = dev_get_drvdata(&aux_dev->dev); + struct irdma_device *iwdev = auxiliary_get_drvdata(aux_dev); irdma_ib_unregister_device(iwdev); ice_rdma_update_vsi_filter(pf, iwdev->vsi_num, false); @@ -226,16 +231,18 @@ static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf rf->hw.hw_addr = pf->hw.hw_addr; rf->pcidev = pf->pdev; rf->msix_count = pf->num_rdma_msix; + rf->pf_id = pf->hw.pf_id; rf->msix_entries = &pf->msix_entries[pf->rdma_base_vector]; rf->default_vsi.vsi_idx = vsi->vsi_num; - rf->protocol_used = IRDMA_ROCE_PROTOCOL_ONLY; + rf->protocol_used = pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2 ? + IRDMA_ROCE_PROTOCOL_ONLY : IRDMA_IWARP_PROTOCOL_ONLY; rf->rdma_ver = IRDMA_GEN_2; rf->rsrc_profile = IRDMA_HMC_PROFILE_DEFAULT; rf->rst_to = IRDMA_RST_TIMEOUT_HZ; rf->gen_ops.request_reset = irdma_request_reset; rf->limits_sel = 7; rf->iwdev = iwdev; - + mutex_init(&iwdev->ah_tbl_lock); iwdev->netdev = vsi->netdev; iwdev->vsi_num = vsi->vsi_num; iwdev->init_state = INITIAL_STATE; @@ -274,18 +281,19 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_ irdma_fill_device_info(iwdev, pf, vsi); rf = iwdev->rf; - if (irdma_ctrl_init_hw(rf)) { - err = -EIO; + err = irdma_ctrl_init_hw(rf); + if (err) goto err_ctrl_init; - } l2params.mtu = iwdev->netdev->mtu; ice_get_qos_params(pf, &qos_info); irdma_fill_qos_info(&l2params, &qos_info); - if (irdma_rt_init_hw(iwdev, &l2params)) { - err = -EIO; + if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY) + iwdev->dcb_vlan_mode = l2params.num_tc > 1 && !l2params.dscp_mode; + + err = irdma_rt_init_hw(iwdev, &l2params); + if (err) goto err_rt_init; - } err = irdma_ib_register_device(iwdev); if (err) @@ -294,7 +302,7 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_ ice_rdma_update_vsi_filter(pf, iwdev->vsi_num, true); ibdev_dbg(&iwdev->ibdev, "INIT: Gen2 PF[%d] device probe success\n", PCI_FUNC(rf->pcidev->devfn)); - dev_set_drvdata(&aux_dev->dev, iwdev); + auxiliary_set_drvdata(aux_dev, iwdev); return 0; diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index cb218cab79ac..65e966ad3453 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -40,7 +40,6 @@ #include <rdma/ib_umem.h> #include <rdma/ib_cache.h> #include <rdma/uverbs_ioctl.h> -#include "status.h" #include "osdep.h" #include "defs.h" #include "hmc.h" @@ -86,7 +85,7 @@ extern struct auxiliary_driver i40iw_auxiliary_drv; #define IRDMA_NO_QSET 0xffff #define IW_CFG_FPM_QP_COUNT 32768 -#define IRDMA_MAX_PAGES_PER_FMR 512 +#define IRDMA_MAX_PAGES_PER_FMR 262144 #define IRDMA_MIN_PAGES_PER_FMR 1 #define IRDMA_CQP_COMPL_RQ_WQE_FLUSHED 2 #define IRDMA_CQP_COMPL_SQ_WQE_FLUSHED 3 @@ -242,8 +241,8 @@ struct irdma_qvlist_info { struct irdma_gen_ops { void (*request_reset)(struct irdma_pci_f *rf); - enum irdma_status_code (*register_qset)(struct irdma_sc_vsi *vsi, - struct irdma_ws_node *tc_node); + int (*register_qset)(struct irdma_sc_vsi *vsi, + struct irdma_ws_node *tc_node); void (*unregister_qset)(struct irdma_sc_vsi *vsi, struct irdma_ws_node *tc_node); }; @@ -257,6 +256,7 @@ struct irdma_pci_f { u8 *mem_rsrc; u8 rdma_ver; u8 rst_to; + u8 pf_id; enum irdma_protocol_used protocol_used; u32 sd_type; u32 msix_count; @@ -332,11 +332,12 @@ struct irdma_device { struct workqueue_struct *cleanup_wq; struct irdma_sc_vsi vsi; struct irdma_cm_core cm_core; + DECLARE_HASHTABLE(ah_hash_tbl, 8); + struct mutex ah_tbl_lock; /* protect AH hash table access */ u32 roce_cwnd; u32 roce_ackcreds; u32 vendor_id; u32 vendor_part_id; - u32 device_cap_flags; u32 push_mode; u32 rcv_wnd; u16 mac_ip_table_idx; @@ -345,7 +346,7 @@ struct irdma_device { u8 iw_status; bool roce_mode:1; bool roce_dcqcn_en:1; - bool dcb:1; + bool dcb_vlan_mode:1; bool iw_ooo:1; enum init_completion_state init_state; @@ -457,10 +458,10 @@ static inline void irdma_free_rsrc(struct irdma_pci_f *rf, spin_unlock_irqrestore(&rf->rsrc_lock, flags); } -enum irdma_status_code irdma_ctrl_init_hw(struct irdma_pci_f *rf); +int irdma_ctrl_init_hw(struct irdma_pci_f *rf); void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf); -enum irdma_status_code irdma_rt_init_hw(struct irdma_device *iwdev, - struct irdma_l2params *l2params); +int irdma_rt_init_hw(struct irdma_device *iwdev, + struct irdma_l2params *l2params); void irdma_rt_deinit_hw(struct irdma_device *iwdev); void irdma_qp_add_ref(struct ib_qp *ibqp); void irdma_qp_rem_ref(struct ib_qp *ibqp); @@ -489,9 +490,8 @@ void irdma_cm_disconn(struct irdma_qp *qp); bool irdma_cqp_crit_err(struct irdma_sc_dev *dev, u8 cqp_cmd, u16 maj_err_code, u16 min_err_code); -enum irdma_status_code -irdma_handle_cqp_op(struct irdma_pci_f *rf, - struct irdma_cqp_request *cqp_request); +int irdma_handle_cqp_op(struct irdma_pci_f *rf, + struct irdma_cqp_request *cqp_request); int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); @@ -500,21 +500,17 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, void irdma_cq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_cq *cq); void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf); -enum irdma_status_code irdma_hw_modify_qp(struct irdma_device *iwdev, - struct irdma_qp *iwqp, - struct irdma_modify_qp_info *info, - bool wait); -enum irdma_status_code irdma_qp_suspend_resume(struct irdma_sc_qp *qp, - bool suspend); -enum irdma_status_code -irdma_manage_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cminfo, - enum irdma_quad_entry_type etype, - enum irdma_quad_hash_manage_type mtype, void *cmnode, - bool wait); +int irdma_hw_modify_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp, + struct irdma_modify_qp_info *info, bool wait); +int irdma_qp_suspend_resume(struct irdma_sc_qp *qp, bool suspend); +int irdma_manage_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cminfo, + enum irdma_quad_entry_type etype, + enum irdma_quad_hash_manage_type mtype, void *cmnode, + bool wait); void irdma_receive_ilq(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *rbuf); void irdma_free_sqbuf(struct irdma_sc_vsi *vsi, void *bufp); void irdma_free_qp_rsrc(struct irdma_qp *iwqp); -enum irdma_status_code irdma_setup_cm_core(struct irdma_device *iwdev, u8 ver); +int irdma_setup_cm_core(struct irdma_device *iwdev, u8 ver); void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core); void irdma_next_iw_state(struct irdma_qp *iwqp, u8 state, u8 del_hash, u8 term, u8 term_len); @@ -523,10 +519,8 @@ int irdma_send_reset(struct irdma_cm_node *cm_node); struct irdma_cm_node *irdma_find_node(struct irdma_cm_core *cm_core, u16 rem_port, u32 *rem_addr, u16 loc_port, u32 *loc_addr, u16 vlan_id); -enum irdma_status_code irdma_hw_flush_wqes(struct irdma_pci_f *rf, - struct irdma_sc_qp *qp, - struct irdma_qp_flush_info *info, - bool wait); +int irdma_hw_flush_wqes(struct irdma_pci_f *rf, struct irdma_sc_qp *qp, + struct irdma_qp_flush_info *info, bool wait); void irdma_gen_ae(struct irdma_pci_f *rf, struct irdma_sc_qp *qp, struct irdma_gen_ae_info *info, bool wait); void irdma_copy_ip_ntohl(u32 *dst, __be32 *src); diff --git a/drivers/infiniband/hw/irdma/osdep.h b/drivers/infiniband/hw/irdma/osdep.h index 63d8bb3a6903..fc1ba2a3e6fb 100644 --- a/drivers/infiniband/hw/irdma/osdep.h +++ b/drivers/infiniband/hw/irdma/osdep.h @@ -5,6 +5,7 @@ #include <linux/pci.h> #include <linux/bitfield.h> +#include <linux/net/intel/iidc.h> #include <crypto/hash.h> #include <rdma/ib_verbs.h> @@ -42,32 +43,28 @@ enum irdma_status_code irdma_vf_wait_vchnl_resp(struct irdma_sc_dev *dev); bool irdma_vf_clear_to_send(struct irdma_sc_dev *dev); void irdma_add_dev_ref(struct irdma_sc_dev *dev); void irdma_put_dev_ref(struct irdma_sc_dev *dev); -enum irdma_status_code irdma_ieq_check_mpacrc(struct shash_desc *desc, - void *addr, u32 len, u32 val); +int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len, + u32 val); struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev, struct irdma_puda_buf *buf); void irdma_send_ieq_ack(struct irdma_sc_qp *qp); void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len, u32 seqnum); void irdma_free_hash_desc(struct shash_desc *hash_desc); -enum irdma_status_code irdma_init_hash_desc(struct shash_desc **hash_desc); -enum irdma_status_code -irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, - struct irdma_puda_buf *buf); -enum irdma_status_code irdma_cqp_sds_cmd(struct irdma_sc_dev *dev, - struct irdma_update_sds_info *info); -enum irdma_status_code -irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev, - struct irdma_hmc_fcn_info *hmcfcninfo, - u16 *pmf_idx); -enum irdma_status_code -irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev, - struct irdma_dma_mem *val_mem, u8 hmc_fn_id); -enum irdma_status_code -irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev, - struct irdma_dma_mem *val_mem, u8 hmc_fn_id); -enum irdma_status_code irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev, - struct irdma_dma_mem *mem); +int irdma_init_hash_desc(struct shash_desc **hash_desc); +int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, + struct irdma_puda_buf *buf); +int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev, + struct irdma_update_sds_info *info); +int irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev, + struct irdma_hmc_fcn_info *hmcfcninfo, + u16 *pmf_idx); +int irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev, + struct irdma_dma_mem *val_mem, u8 hmc_fn_id); +int irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev, + struct irdma_dma_mem *val_mem, u8 hmc_fn_id); +int irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev, + struct irdma_dma_mem *mem); void *irdma_remove_cqp_head(struct irdma_sc_dev *dev); void irdma_term_modify_qp(struct irdma_sc_qp *qp, u8 next_state, u8 term, u8 term_len); @@ -79,7 +76,7 @@ void irdma_hw_stats_stop_timer(struct irdma_sc_vsi *vsi); void wr32(struct irdma_hw *hw, u32 reg, u32 val); u32 rd32(struct irdma_hw *hw, u32 reg); u64 rd64(struct irdma_hw *hw, u32 reg); -enum irdma_status_code irdma_map_vm_page_list(struct irdma_hw *hw, void *va, - dma_addr_t *pg_dma, u32 pg_cnt); +int irdma_map_vm_page_list(struct irdma_hw *hw, void *va, dma_addr_t *pg_dma, + u32 pg_cnt); void irdma_unmap_vm_page_list(struct irdma_hw *hw, dma_addr_t *pg_dma, u32 pg_cnt); #endif /* IRDMA_OSDEP_H */ diff --git a/drivers/infiniband/hw/irdma/pble.c b/drivers/infiniband/hw/irdma/pble.c index fed49da770f3..cdc0b8a6ed48 100644 --- a/drivers/infiniband/hw/irdma/pble.c +++ b/drivers/infiniband/hw/irdma/pble.c @@ -1,15 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB /* Copyright (c) 2015 - 2021 Intel Corporation */ #include "osdep.h" -#include "status.h" #include "hmc.h" #include "defs.h" #include "type.h" #include "protos.h" #include "pble.h" -static enum irdma_status_code -add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc); +static int add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc); /** * irdma_destroy_pble_prm - destroy prm during module unload @@ -35,13 +33,12 @@ void irdma_destroy_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc) * @dev: irdma_sc_dev struct * @pble_rsrc: pble resources */ -enum irdma_status_code -irdma_hmc_init_pble(struct irdma_sc_dev *dev, - struct irdma_hmc_pble_rsrc *pble_rsrc) +int irdma_hmc_init_pble(struct irdma_sc_dev *dev, + struct irdma_hmc_pble_rsrc *pble_rsrc) { struct irdma_hmc_info *hmc_info; u32 fpm_idx = 0; - enum irdma_status_code status = 0; + int status = 0; hmc_info = dev->hmc_info; pble_rsrc->dev = dev; @@ -60,7 +57,7 @@ irdma_hmc_init_pble(struct irdma_sc_dev *dev, INIT_LIST_HEAD(&pble_rsrc->pinfo.clist); if (add_pble_prm(pble_rsrc)) { irdma_destroy_pble_prm(pble_rsrc); - status = IRDMA_ERR_NO_MEMORY; + status = -ENOMEM; } return status; @@ -84,12 +81,11 @@ static void get_sd_pd_idx(struct irdma_hmc_pble_rsrc *pble_rsrc, * @pble_rsrc: pble resource ptr * @info: page info for sd */ -static enum irdma_status_code -add_sd_direct(struct irdma_hmc_pble_rsrc *pble_rsrc, - struct irdma_add_page_info *info) +static int add_sd_direct(struct irdma_hmc_pble_rsrc *pble_rsrc, + struct irdma_add_page_info *info) { struct irdma_sc_dev *dev = pble_rsrc->dev; - enum irdma_status_code ret_code = 0; + int ret_code = 0; struct sd_pd_idx *idx = &info->idx; struct irdma_chunk *chunk = info->chunk; struct irdma_hmc_info *hmc_info = info->hmc_info; @@ -137,9 +133,8 @@ static u32 fpm_to_idx(struct irdma_hmc_pble_rsrc *pble_rsrc, u64 addr) * @pble_rsrc: pble resource management * @info: page info for sd */ -static enum irdma_status_code -add_bp_pages(struct irdma_hmc_pble_rsrc *pble_rsrc, - struct irdma_add_page_info *info) +static int add_bp_pages(struct irdma_hmc_pble_rsrc *pble_rsrc, + struct irdma_add_page_info *info) { struct irdma_sc_dev *dev = pble_rsrc->dev; u8 *addr; @@ -148,13 +143,13 @@ add_bp_pages(struct irdma_hmc_pble_rsrc *pble_rsrc, struct irdma_hmc_sd_entry *sd_entry = info->sd_entry; struct irdma_hmc_info *hmc_info = info->hmc_info; struct irdma_chunk *chunk = info->chunk; - enum irdma_status_code status = 0; + int status = 0; u32 rel_pd_idx = info->idx.rel_pd_idx; u32 pd_idx = info->idx.pd_idx; u32 i; if (irdma_pble_get_paged_mem(chunk, info->pages)) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; status = irdma_add_sd_table_entry(dev->hw, hmc_info, info->idx.sd_idx, IRDMA_SD_TYPE_PAGED, @@ -207,8 +202,7 @@ static enum irdma_sd_entry_type irdma_get_type(struct irdma_sc_dev *dev, * add_pble_prm - add a sd entry for pble resoure * @pble_rsrc: pble resource management */ -static enum irdma_status_code -add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc) +static int add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc) { struct irdma_sc_dev *dev = pble_rsrc->dev; struct irdma_hmc_sd_entry *sd_entry; @@ -216,22 +210,22 @@ add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc) struct irdma_chunk *chunk; struct irdma_add_page_info info; struct sd_pd_idx *idx = &info.idx; - enum irdma_status_code ret_code = 0; + int ret_code = 0; enum irdma_sd_entry_type sd_entry_type; u64 sd_reg_val = 0; struct irdma_virt_mem chunkmem; u32 pages; if (pble_rsrc->unallocated_pble < PBLE_PER_PAGE) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; if (pble_rsrc->next_fpm_addr & 0xfff) - return IRDMA_ERR_INVALID_PAGE_DESC_INDEX; + return -EINVAL; chunkmem.size = sizeof(*chunk); chunkmem.va = kzalloc(chunkmem.size, GFP_KERNEL); if (!chunkmem.va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; chunk = chunkmem.va; chunk->chunkmem = chunkmem; @@ -337,9 +331,8 @@ static void free_lvl2(struct irdma_hmc_pble_rsrc *pble_rsrc, * @pble_rsrc: pble resource management * @palloc: level 2 pble allocation */ -static enum irdma_status_code -get_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, - struct irdma_pble_alloc *palloc) +static int get_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, + struct irdma_pble_alloc *palloc) { u32 lf4k, lflast, total, i; u32 pblcnt = PBLE_PER_PAGE; @@ -347,7 +340,7 @@ get_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, struct irdma_pble_level2 *lvl2 = &palloc->level2; struct irdma_pble_info *root = &lvl2->root; struct irdma_pble_info *leaf; - enum irdma_status_code ret_code; + int ret_code; u64 fpm_addr; /* number of full 512 (4K) leafs) */ @@ -359,7 +352,7 @@ get_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, lvl2->leafmem.size = (sizeof(*leaf) * total); lvl2->leafmem.va = kzalloc(lvl2->leafmem.size, GFP_KERNEL); if (!lvl2->leafmem.va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; lvl2->leaf = lvl2->leafmem.va; leaf = lvl2->leaf; @@ -368,7 +361,7 @@ get_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, if (ret_code) { kfree(lvl2->leafmem.va); lvl2->leaf = NULL; - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } root->idx = fpm_to_idx(pble_rsrc, fpm_addr); @@ -397,7 +390,7 @@ get_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, error: free_lvl2(pble_rsrc, palloc); - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } /** @@ -405,11 +398,10 @@ error: * @pble_rsrc: pble resource management * @palloc: level 1 pble allocation */ -static enum irdma_status_code -get_lvl1_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, - struct irdma_pble_alloc *palloc) +static int get_lvl1_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, + struct irdma_pble_alloc *palloc) { - enum irdma_status_code ret_code; + int ret_code; u64 fpm_addr; struct irdma_pble_info *lvl1 = &palloc->level1; @@ -417,7 +409,7 @@ get_lvl1_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, palloc->total_cnt << 3, &lvl1->addr, &fpm_addr); if (ret_code) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; palloc->level = PBLE_LEVEL_1; lvl1->idx = fpm_to_idx(pble_rsrc, fpm_addr); @@ -433,11 +425,10 @@ get_lvl1_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, * @palloc: contains all inforamtion regarding pble (idx + pble addr) * @level1_only: flag for a level 1 PBLE */ -static enum irdma_status_code -get_lvl1_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, - struct irdma_pble_alloc *palloc, bool level1_only) +static int get_lvl1_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, + struct irdma_pble_alloc *palloc, bool level1_only) { - enum irdma_status_code status = 0; + int status = 0; status = get_lvl1_pble(pble_rsrc, palloc); if (!status || level1_only || palloc->total_cnt <= PBLE_PER_PAGE) @@ -455,11 +446,11 @@ get_lvl1_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, * @pble_cnt: #of pbles requested * @level1_only: true if only pble level 1 to acquire */ -enum irdma_status_code irdma_get_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, - struct irdma_pble_alloc *palloc, - u32 pble_cnt, bool level1_only) +int irdma_get_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, + struct irdma_pble_alloc *palloc, u32 pble_cnt, + bool level1_only) { - enum irdma_status_code status = 0; + int status = 0; int max_sds = 0; int i; diff --git a/drivers/infiniband/hw/irdma/pble.h b/drivers/infiniband/hw/irdma/pble.h index aa20827dcc9d..29d295463559 100644 --- a/drivers/infiniband/hw/irdma/pble.h +++ b/drivers/infiniband/hw/irdma/pble.h @@ -69,7 +69,7 @@ struct irdma_add_page_info { struct irdma_chunk { struct list_head list; struct irdma_dma_info dmainfo; - void *bitmapbuf; + unsigned long *bitmapbuf; u32 sizeofbitmap; u64 size; @@ -108,20 +108,18 @@ struct irdma_hmc_pble_rsrc { }; void irdma_destroy_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc); -enum irdma_status_code -irdma_hmc_init_pble(struct irdma_sc_dev *dev, - struct irdma_hmc_pble_rsrc *pble_rsrc); +int irdma_hmc_init_pble(struct irdma_sc_dev *dev, + struct irdma_hmc_pble_rsrc *pble_rsrc); void irdma_free_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, struct irdma_pble_alloc *palloc); -enum irdma_status_code irdma_get_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, - struct irdma_pble_alloc *palloc, - u32 pble_cnt, bool level1_only); -enum irdma_status_code irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm, - struct irdma_chunk *pchunk); -enum irdma_status_code -irdma_prm_get_pbles(struct irdma_pble_prm *pprm, - struct irdma_pble_chunkinfo *chunkinfo, u64 mem_size, - u64 **vaddr, u64 *fpm_addr); +int irdma_get_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, + struct irdma_pble_alloc *palloc, u32 pble_cnt, + bool level1_only); +int irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm, + struct irdma_chunk *pchunk); +int irdma_prm_get_pbles(struct irdma_pble_prm *pprm, + struct irdma_pble_chunkinfo *chunkinfo, u64 mem_size, + u64 **vaddr, u64 *fpm_addr); void irdma_prm_return_pbles(struct irdma_pble_prm *pprm, struct irdma_pble_chunkinfo *chunkinfo); void irdma_pble_acquire_lock(struct irdma_hmc_pble_rsrc *pble_rsrc, @@ -129,7 +127,6 @@ void irdma_pble_acquire_lock(struct irdma_hmc_pble_rsrc *pble_rsrc, void irdma_pble_release_lock(struct irdma_hmc_pble_rsrc *pble_rsrc, unsigned long *flags); void irdma_pble_free_paged_mem(struct irdma_chunk *chunk); -enum irdma_status_code irdma_pble_get_paged_mem(struct irdma_chunk *chunk, - u32 pg_cnt); +int irdma_pble_get_paged_mem(struct irdma_chunk *chunk, u32 pg_cnt); void irdma_prm_rem_bitmapmem(struct irdma_hw *hw, struct irdma_chunk *chunk); #endif /* IRDMA_PBLE_H */ diff --git a/drivers/infiniband/hw/irdma/protos.h b/drivers/infiniband/hw/irdma/protos.h index a17c0ffb0cc8..9b6e919ae2a9 100644 --- a/drivers/infiniband/hw/irdma/protos.h +++ b/drivers/infiniband/hw/irdma/protos.h @@ -12,58 +12,51 @@ #define CQP_TIMEOUT_THRESHOLD 500 /* init operations */ -enum irdma_status_code irdma_sc_dev_init(enum irdma_vers ver, - struct irdma_sc_dev *dev, - struct irdma_device_init_info *info); +int irdma_sc_dev_init(enum irdma_vers ver, struct irdma_sc_dev *dev, + struct irdma_device_init_info *info); void irdma_sc_rt_init(struct irdma_sc_dev *dev); void irdma_sc_cqp_post_sq(struct irdma_sc_cqp *cqp); __le64 *irdma_sc_cqp_get_next_send_wqe(struct irdma_sc_cqp *cqp, u64 scratch); -enum irdma_status_code -irdma_sc_mr_fast_register(struct irdma_sc_qp *qp, - struct irdma_fast_reg_stag_info *info, bool post_sq); +int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp, + struct irdma_fast_reg_stag_info *info, + bool post_sq); /* HMC/FPM functions */ -enum irdma_status_code irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev, - u8 hmc_fn_id); +int irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev, u8 hmc_fn_id); /* stats misc */ -enum irdma_status_code -irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev, - struct irdma_vsi_pestat *pestat, bool wait); +int irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev, + struct irdma_vsi_pestat *pestat, bool wait); void irdma_cqp_gather_stats_gen1(struct irdma_sc_dev *dev, struct irdma_vsi_pestat *pestat); void irdma_hw_stats_read_all(struct irdma_vsi_pestat *stats, struct irdma_dev_hw_stats *stats_values, u64 *hw_stats_regs_32, u64 *hw_stats_regs_64, u8 hw_rev); -enum irdma_status_code -irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd, - struct irdma_ws_node_info *node_info); -enum irdma_status_code irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev, - struct irdma_sc_ceq *sc_ceq, u8 op); -enum irdma_status_code irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev, - struct irdma_sc_aeq *sc_aeq, u8 op); -enum irdma_status_code -irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi, u8 cmd, - struct irdma_stats_inst_info *stats_info); +int irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd, + struct irdma_ws_node_info *node_info); +int irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_ceq *sc_ceq, + u8 op); +int irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_aeq *sc_aeq, + u8 op); +int irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi, u8 cmd, + struct irdma_stats_inst_info *stats_info); u16 irdma_alloc_ws_node_id(struct irdma_sc_dev *dev); void irdma_free_ws_node_id(struct irdma_sc_dev *dev, u16 node_id); void irdma_update_stats(struct irdma_dev_hw_stats *hw_stats, struct irdma_gather_stats *gather_stats, struct irdma_gather_stats *last_gather_stats); /* vsi functions */ -enum irdma_status_code irdma_vsi_stats_init(struct irdma_sc_vsi *vsi, - struct irdma_vsi_stats_info *info); +int irdma_vsi_stats_init(struct irdma_sc_vsi *vsi, + struct irdma_vsi_stats_info *info); void irdma_vsi_stats_free(struct irdma_sc_vsi *vsi); void irdma_sc_vsi_init(struct irdma_sc_vsi *vsi, struct irdma_vsi_init_info *info); -enum irdma_status_code irdma_sc_add_cq_ctx(struct irdma_sc_ceq *ceq, - struct irdma_sc_cq *cq); +int irdma_sc_add_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq); void irdma_sc_remove_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq); /* misc L2 param change functions */ void irdma_change_l2params(struct irdma_sc_vsi *vsi, struct irdma_l2params *l2params); void irdma_sc_suspend_resume_qps(struct irdma_sc_vsi *vsi, u8 suspend); -enum irdma_status_code irdma_cqp_qp_suspend_resume(struct irdma_sc_qp *qp, - u8 cmd); +int irdma_cqp_qp_suspend_resume(struct irdma_sc_qp *qp, u8 cmd); void irdma_qp_add_qos(struct irdma_sc_qp *qp); void irdma_qp_rem_qos(struct irdma_sc_qp *qp); struct irdma_sc_qp *irdma_get_qp_from_list(struct list_head *head, @@ -81,31 +74,26 @@ void irdma_terminate_received(struct irdma_sc_qp *qp, /* misc */ u8 irdma_get_encoded_wqe_size(u32 wqsize, enum irdma_queue_type queue_type); void irdma_modify_qp_to_err(struct irdma_sc_qp *sc_qp); -enum irdma_status_code -irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch, - u8 hmc_fn_id, bool post_sq, - bool poll_registers); -enum irdma_status_code irdma_cfg_fpm_val(struct irdma_sc_dev *dev, - u32 qp_count); -enum irdma_status_code irdma_get_rdma_features(struct irdma_sc_dev *dev); +int irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch, + u8 hmc_fn_id, bool post_sq, + bool poll_registers); +int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count); +int irdma_get_rdma_features(struct irdma_sc_dev *dev); void free_sd_mem(struct irdma_sc_dev *dev); -enum irdma_status_code irdma_process_cqp_cmd(struct irdma_sc_dev *dev, - struct cqp_cmds_info *pcmdinfo); -enum irdma_status_code irdma_process_bh(struct irdma_sc_dev *dev); -enum irdma_status_code irdma_cqp_sds_cmd(struct irdma_sc_dev *dev, - struct irdma_update_sds_info *info); -enum irdma_status_code -irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev, - struct irdma_dma_mem *val_mem, u8 hmc_fn_id); -enum irdma_status_code -irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev, - struct irdma_dma_mem *val_mem, u8 hmc_fn_id); -enum irdma_status_code irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev, - struct irdma_dma_mem *mem); -enum irdma_status_code -irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev, - struct irdma_hmc_fcn_info *hmcfcninfo, - u16 *pmf_idx); +int irdma_process_cqp_cmd(struct irdma_sc_dev *dev, + struct cqp_cmds_info *pcmdinfo); +int irdma_process_bh(struct irdma_sc_dev *dev); +int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev, + struct irdma_update_sds_info *info); +int irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev, + struct irdma_dma_mem *val_mem, u8 hmc_fn_id); +int irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev, + struct irdma_dma_mem *val_mem, u8 hmc_fn_id); +int irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev, + struct irdma_dma_mem *mem); +int irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev, + struct irdma_hmc_fcn_info *hmcfcninfo, + u16 *pmf_idx); void irdma_add_dev_ref(struct irdma_sc_dev *dev); void irdma_put_dev_ref(struct irdma_sc_dev *dev); void *irdma_remove_cqp_head(struct irdma_sc_dev *dev); diff --git a/drivers/infiniband/hw/irdma/puda.c b/drivers/infiniband/hw/irdma/puda.c index 58e7d875643b..4ec9639f1bdb 100644 --- a/drivers/infiniband/hw/irdma/puda.c +++ b/drivers/infiniband/hw/irdma/puda.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB /* Copyright (c) 2015 - 2021 Intel Corporation */ #include "osdep.h" -#include "status.h" #include "hmc.h" #include "defs.h" #include "type.h" @@ -114,8 +113,7 @@ static void irdma_puda_post_recvbuf(struct irdma_puda_rsrc *rsrc, u32 wqe_idx, * @rsrc: resource to use for buffer * @initial: flag if during init time */ -static enum irdma_status_code -irdma_puda_replenish_rq(struct irdma_puda_rsrc *rsrc, bool initial) +static int irdma_puda_replenish_rq(struct irdma_puda_rsrc *rsrc, bool initial) { u32 i; u32 invalid_cnt = rsrc->rxq_invalid_cnt; @@ -124,7 +122,7 @@ irdma_puda_replenish_rq(struct irdma_puda_rsrc *rsrc, bool initial) for (i = 0; i < invalid_cnt; i++) { buf = irdma_puda_get_bufpool(rsrc); if (!buf) - return IRDMA_ERR_list_empty; + return -ENOBUFS; irdma_puda_post_recvbuf(rsrc, rsrc->rx_wqe_idx, buf, initial); rsrc->rx_wqe_idx = ((rsrc->rx_wqe_idx + 1) % rsrc->rq_size); rsrc->rxq_invalid_cnt--; @@ -193,19 +191,16 @@ static void irdma_puda_dele_buf(struct irdma_sc_dev *dev, static __le64 *irdma_puda_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx) { - __le64 *wqe = NULL; - enum irdma_status_code ret_code = 0; + int ret_code = 0; *wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); if (!*wqe_idx) qp->swqe_polarity = !qp->swqe_polarity; IRDMA_RING_MOVE_HEAD(qp->sq_ring, ret_code); if (ret_code) - return wqe; - - wqe = qp->sq_base[*wqe_idx].elem; + return NULL; - return wqe; + return qp->sq_base[*wqe_idx].elem; } /** @@ -213,8 +208,8 @@ static __le64 *irdma_puda_get_next_send_wqe(struct irdma_qp_uk *qp, * @cq: cq for poll * @info: info return for successful completion */ -static enum irdma_status_code -irdma_puda_poll_info(struct irdma_sc_cq *cq, struct irdma_puda_cmpl_info *info) +static int irdma_puda_poll_info(struct irdma_sc_cq *cq, + struct irdma_puda_cmpl_info *info) { struct irdma_cq_uk *cq_uk = &cq->cq_uk; u64 qword0, qword2, qword3, qword6; @@ -233,7 +228,7 @@ irdma_puda_poll_info(struct irdma_sc_cq *cq, struct irdma_puda_cmpl_info *info) get_64bit_val(cqe, 24, &qword3); valid_bit = (bool)FIELD_GET(IRDMA_CQ_VALID, qword3); if (valid_bit != cq_uk->polarity) - return IRDMA_ERR_Q_EMPTY; + return -ENOENT; if (cq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) ext_valid = (bool)FIELD_GET(IRDMA_CQ_EXTCQE, qword3); @@ -246,7 +241,7 @@ irdma_puda_poll_info(struct irdma_sc_cq *cq, struct irdma_puda_cmpl_info *info) if (!peek_head) polarity ^= 1; if (polarity != cq_uk->polarity) - return IRDMA_ERR_Q_EMPTY; + return -ENOENT; IRDMA_RING_MOVE_HEAD_NOCHECK(cq_uk->cq_ring); if (!IRDMA_RING_CURRENT_HEAD(cq_uk->cq_ring)) @@ -267,7 +262,7 @@ irdma_puda_poll_info(struct irdma_sc_cq *cq, struct irdma_puda_cmpl_info *info) major_err = (u32)(FIELD_GET(IRDMA_CQ_MAJERR, qword3)); minor_err = (u32)(FIELD_GET(IRDMA_CQ_MINERR, qword3)); info->compl_error = major_err << 16 | minor_err; - return IRDMA_ERR_CQ_COMPL_ERROR; + return -EIO; } get_64bit_val(cqe, 0, &qword0); @@ -319,14 +314,13 @@ irdma_puda_poll_info(struct irdma_sc_cq *cq, struct irdma_puda_cmpl_info *info) * @cq: cq getting interrupt * @compl_err: return any completion err */ -enum irdma_status_code irdma_puda_poll_cmpl(struct irdma_sc_dev *dev, - struct irdma_sc_cq *cq, - u32 *compl_err) +int irdma_puda_poll_cmpl(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq, + u32 *compl_err) { struct irdma_qp_uk *qp; struct irdma_cq_uk *cq_uk = &cq->cq_uk; struct irdma_puda_cmpl_info info = {}; - enum irdma_status_code ret = 0; + int ret = 0; struct irdma_puda_buf *buf; struct irdma_puda_rsrc *rsrc; u8 cq_type = cq->cq_type; @@ -337,24 +331,24 @@ enum irdma_status_code irdma_puda_poll_cmpl(struct irdma_sc_dev *dev, cq->vsi->ieq; } else { ibdev_dbg(to_ibdev(dev), "PUDA: qp_type error\n"); - return IRDMA_ERR_BAD_PTR; + return -EINVAL; } ret = irdma_puda_poll_info(cq, &info); *compl_err = info.compl_error; - if (ret == IRDMA_ERR_Q_EMPTY) + if (ret == -ENOENT) return ret; if (ret) goto done; qp = info.qp; if (!qp || !rsrc) { - ret = IRDMA_ERR_BAD_PTR; + ret = -EFAULT; goto done; } if (qp->qp_id != rsrc->qp_id) { - ret = IRDMA_ERR_BAD_PTR; + ret = -EFAULT; goto done; } @@ -422,8 +416,7 @@ done: * @qp: puda qp for send * @info: buffer information for transmit */ -enum irdma_status_code irdma_puda_send(struct irdma_sc_qp *qp, - struct irdma_puda_send_info *info) +int irdma_puda_send(struct irdma_sc_qp *qp, struct irdma_puda_send_info *info) { __le64 *wqe; u32 iplen, l4len; @@ -443,7 +436,7 @@ enum irdma_status_code irdma_puda_send(struct irdma_sc_qp *qp, wqe = irdma_puda_get_next_send_wqe(&qp->qp_uk, &wqe_idx); if (!wqe) - return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; + return -ENOMEM; qp->qp_uk.sq_wrtrk_array[wqe_idx].wrid = (uintptr_t)info->scratch; /* Third line of WQE descriptor */ @@ -503,7 +496,7 @@ void irdma_puda_send_buf(struct irdma_puda_rsrc *rsrc, struct irdma_puda_buf *buf) { struct irdma_puda_send_info info; - enum irdma_status_code ret = 0; + int ret = 0; unsigned long flags; spin_lock_irqsave(&rsrc->bufpool_lock, flags); @@ -603,19 +596,18 @@ static void irdma_puda_qp_setctx(struct irdma_puda_rsrc *rsrc) * @dev: Device * @qp: Resource qp */ -static enum irdma_status_code irdma_puda_qp_wqe(struct irdma_sc_dev *dev, - struct irdma_sc_qp *qp) +static int irdma_puda_qp_wqe(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp) { struct irdma_sc_cqp *cqp; __le64 *wqe; u64 hdr; struct irdma_ccq_cqe_info compl_info; - enum irdma_status_code status = 0; + int status = 0; cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, 0); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, qp->hw_host_ctx_pa); set_64bit_val(wqe, 40, qp->shadow_area_pa); @@ -643,11 +635,11 @@ static enum irdma_status_code irdma_puda_qp_wqe(struct irdma_sc_dev *dev, * irdma_puda_qp_create - create qp for resource * @rsrc: resource to use for buffer */ -static enum irdma_status_code irdma_puda_qp_create(struct irdma_puda_rsrc *rsrc) +static int irdma_puda_qp_create(struct irdma_puda_rsrc *rsrc) { struct irdma_sc_qp *qp = &rsrc->qp; struct irdma_qp_uk *ukqp = &qp->qp_uk; - enum irdma_status_code ret = 0; + int ret = 0; u32 sq_size, rq_size; struct irdma_dma_mem *mem; @@ -659,7 +651,7 @@ static enum irdma_status_code irdma_puda_qp_create(struct irdma_puda_rsrc *rsrc) rsrc->qpmem.size, &rsrc->qpmem.pa, GFP_KERNEL); if (!rsrc->qpmem.va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; mem = &rsrc->qpmem; memset(mem->va, 0, rsrc->qpmem.size); @@ -722,19 +714,18 @@ static enum irdma_status_code irdma_puda_qp_create(struct irdma_puda_rsrc *rsrc) * @dev: Device * @cq: resource for cq */ -static enum irdma_status_code irdma_puda_cq_wqe(struct irdma_sc_dev *dev, - struct irdma_sc_cq *cq) +static int irdma_puda_cq_wqe(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq) { __le64 *wqe; struct irdma_sc_cqp *cqp; u64 hdr; struct irdma_ccq_cqe_info compl_info; - enum irdma_status_code status = 0; + int status = 0; cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, 0); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 0, cq->cq_uk.cq_size); set_64bit_val(wqe, 8, (uintptr_t)cq >> 1); @@ -775,11 +766,11 @@ static enum irdma_status_code irdma_puda_cq_wqe(struct irdma_sc_dev *dev, * irdma_puda_cq_create - create cq for resource * @rsrc: resource for which cq to create */ -static enum irdma_status_code irdma_puda_cq_create(struct irdma_puda_rsrc *rsrc) +static int irdma_puda_cq_create(struct irdma_puda_rsrc *rsrc) { struct irdma_sc_dev *dev = rsrc->dev; struct irdma_sc_cq *cq = &rsrc->cq; - enum irdma_status_code ret = 0; + int ret = 0; u32 cqsize; struct irdma_dma_mem *mem; struct irdma_cq_init_info info = {}; @@ -792,7 +783,7 @@ static enum irdma_status_code irdma_puda_cq_create(struct irdma_puda_rsrc *rsrc) rsrc->cqmem.va = dma_alloc_coherent(dev->hw->device, rsrc->cqmem.size, &rsrc->cqmem.pa, GFP_KERNEL); if (!rsrc->cqmem.va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; mem = &rsrc->cqmem; info.dev = dev; @@ -833,7 +824,7 @@ error: */ static void irdma_puda_free_qp(struct irdma_puda_rsrc *rsrc) { - enum irdma_status_code ret; + int ret; struct irdma_ccq_cqe_info compl_info; struct irdma_sc_dev *dev = rsrc->dev; @@ -865,7 +856,7 @@ static void irdma_puda_free_qp(struct irdma_puda_rsrc *rsrc) */ static void irdma_puda_free_cq(struct irdma_puda_rsrc *rsrc) { - enum irdma_status_code ret; + int ret; struct irdma_ccq_cqe_info compl_info; struct irdma_sc_dev *dev = rsrc->dev; @@ -967,8 +958,7 @@ void irdma_puda_dele_rsrc(struct irdma_sc_vsi *vsi, enum puda_rsrc_type type, * @rsrc: resource for buffer allocation * @count: number of buffers to create */ -static enum irdma_status_code irdma_puda_allocbufs(struct irdma_puda_rsrc *rsrc, - u32 count) +static int irdma_puda_allocbufs(struct irdma_puda_rsrc *rsrc, u32 count) { u32 i; struct irdma_puda_buf *buf; @@ -978,7 +968,7 @@ static enum irdma_status_code irdma_puda_allocbufs(struct irdma_puda_rsrc *rsrc, buf = irdma_puda_alloc_buf(rsrc->dev, rsrc->buf_size); if (!buf) { rsrc->stats_buf_alloc_fail++; - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } irdma_puda_ret_bufpool(rsrc, buf); rsrc->alloc_buf_count++; @@ -1001,11 +991,11 @@ static enum irdma_status_code irdma_puda_allocbufs(struct irdma_puda_rsrc *rsrc, * @vsi: sc VSI struct * @info: resource information */ -enum irdma_status_code irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi, - struct irdma_puda_rsrc_info *info) +int irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi, + struct irdma_puda_rsrc_info *info) { struct irdma_sc_dev *dev = vsi->dev; - enum irdma_status_code ret = 0; + int ret = 0; struct irdma_puda_rsrc *rsrc; u32 pudasize; u32 sqwridsize, rqwridsize; @@ -1023,12 +1013,12 @@ enum irdma_status_code irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi, vmem = &vsi->ieq_mem; break; default: - return IRDMA_NOT_SUPPORTED; + return -EOPNOTSUPP; } vmem->size = pudasize + sqwridsize + rqwridsize; vmem->va = kzalloc(vmem->size, GFP_KERNEL); if (!vmem->va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; rsrc = vmem->va; spin_lock_init(&rsrc->bufpool_lock); @@ -1046,7 +1036,7 @@ enum irdma_status_code irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi, rsrc->xmit_complete = irdma_ieq_tx_compl; break; default: - return IRDMA_NOT_SUPPORTED; + return -EOPNOTSUPP; } rsrc->type = info->type; @@ -1323,12 +1313,12 @@ static void irdma_ieq_compl_pfpdu(struct irdma_puda_rsrc *ieq, * @buf: first receive buffer * @fpdu_len: total length of fpdu */ -static enum irdma_status_code -irdma_ieq_create_pbufl(struct irdma_pfpdu *pfpdu, struct list_head *rxlist, - struct list_head *pbufl, struct irdma_puda_buf *buf, - u16 fpdu_len) +static int irdma_ieq_create_pbufl(struct irdma_pfpdu *pfpdu, + struct list_head *rxlist, + struct list_head *pbufl, + struct irdma_puda_buf *buf, u16 fpdu_len) { - enum irdma_status_code status = 0; + int status = 0; struct irdma_puda_buf *nextbuf; u32 nextseqnum; u16 plen = fpdu_len - buf->datalen; @@ -1338,13 +1328,13 @@ irdma_ieq_create_pbufl(struct irdma_pfpdu *pfpdu, struct list_head *rxlist, do { nextbuf = irdma_puda_get_listbuf(rxlist); if (!nextbuf) { - status = IRDMA_ERR_list_empty; + status = -ENOBUFS; break; } list_add_tail(&nextbuf->list, pbufl); if (nextbuf->seqnum != nextseqnum) { pfpdu->bad_seq_num++; - status = IRDMA_ERR_SEQ_NUM; + status = -ERANGE; break; } if (nextbuf->datalen >= plen) { @@ -1366,11 +1356,11 @@ irdma_ieq_create_pbufl(struct irdma_pfpdu *pfpdu, struct list_head *rxlist, * @buf: receive buffer * @fpdu_len: fpdu len in the buffer */ -static enum irdma_status_code -irdma_ieq_handle_partial(struct irdma_puda_rsrc *ieq, struct irdma_pfpdu *pfpdu, - struct irdma_puda_buf *buf, u16 fpdu_len) +static int irdma_ieq_handle_partial(struct irdma_puda_rsrc *ieq, + struct irdma_pfpdu *pfpdu, + struct irdma_puda_buf *buf, u16 fpdu_len) { - enum irdma_status_code status = 0; + int status = 0; u8 *crcptr; u32 mpacrc; u32 seqnum = buf->seqnum; @@ -1390,7 +1380,7 @@ irdma_ieq_handle_partial(struct irdma_puda_rsrc *ieq, struct irdma_pfpdu *pfpdu, txbuf = irdma_puda_get_bufpool(ieq); if (!txbuf) { pfpdu->no_tx_bufs++; - status = IRDMA_ERR_NO_TXBUFS; + status = -ENOBUFS; goto error; } @@ -1434,9 +1424,9 @@ error: * @pfpdu: partial management per user qp * @buf: receive buffer */ -static enum irdma_status_code irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq, - struct irdma_pfpdu *pfpdu, - struct irdma_puda_buf *buf) +static int irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq, + struct irdma_pfpdu *pfpdu, + struct irdma_puda_buf *buf) { u16 fpdu_len = 0; u16 datalen = buf->datalen; @@ -1450,7 +1440,7 @@ static enum irdma_status_code irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq, bool partial = false; struct irdma_puda_buf *txbuf; struct list_head *rxlist = &pfpdu->rxlist; - enum irdma_status_code ret = 0; + int ret = 0; ioffset = (u16)(buf->data - (u8 *)buf->mem.va); while (datalen) { @@ -1459,7 +1449,7 @@ static enum irdma_status_code irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq, ibdev_dbg(to_ibdev(ieq->dev), "IEQ: error bad fpdu len\n"); list_add(&buf->list, rxlist); - return IRDMA_ERR_MPA_CRC; + return -EINVAL; } if (datalen < fpdu_len) { @@ -1475,7 +1465,7 @@ static enum irdma_status_code irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq, list_add(&buf->list, rxlist); ibdev_dbg(to_ibdev(ieq->dev), "ERR: IRDMA_ERR_MPA_CRC\n"); - return IRDMA_ERR_MPA_CRC; + return -EINVAL; } full++; pfpdu->fpdu_processed++; @@ -1490,7 +1480,7 @@ static enum irdma_status_code irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq, if (!txbuf) { pfpdu->no_tx_bufs++; list_add(&buf->list, rxlist); - return IRDMA_ERR_NO_TXBUFS; + return -ENOBUFS; } /* modify txbuf's buffer header */ irdma_ieq_setup_tx_buf(buf, txbuf); @@ -1539,7 +1529,7 @@ void irdma_ieq_process_fpdus(struct irdma_sc_qp *qp, struct irdma_pfpdu *pfpdu = &qp->pfpdu; struct list_head *rxlist = &pfpdu->rxlist; struct irdma_puda_buf *buf; - enum irdma_status_code status; + int status; do { if (list_empty(rxlist)) @@ -1557,7 +1547,7 @@ void irdma_ieq_process_fpdus(struct irdma_sc_qp *qp, } /* keep processing buffers from the head of the list */ status = irdma_ieq_process_buf(ieq, pfpdu, buf); - if (status == IRDMA_ERR_MPA_CRC) { + if (status == -EINVAL) { pfpdu->mpa_crc_err = true; while (!list_empty(rxlist)) { buf = irdma_puda_get_listbuf(rxlist); @@ -1576,8 +1566,7 @@ void irdma_ieq_process_fpdus(struct irdma_sc_qp *qp, * @qp: qp pointer * @buf: buf received on IEQ used to create AH */ -static enum irdma_status_code irdma_ieq_create_ah(struct irdma_sc_qp *qp, - struct irdma_puda_buf *buf) +static int irdma_ieq_create_ah(struct irdma_sc_qp *qp, struct irdma_puda_buf *buf) { struct irdma_ah_info ah_info = {}; diff --git a/drivers/infiniband/hw/irdma/puda.h b/drivers/infiniband/hw/irdma/puda.h index db3a51170020..5f5124db6ddf 100644 --- a/drivers/infiniband/hw/irdma/puda.h +++ b/drivers/infiniband/hw/irdma/puda.h @@ -151,42 +151,33 @@ void irdma_puda_ret_bufpool(struct irdma_puda_rsrc *rsrc, struct irdma_puda_buf *buf); void irdma_puda_send_buf(struct irdma_puda_rsrc *rsrc, struct irdma_puda_buf *buf); -enum irdma_status_code irdma_puda_send(struct irdma_sc_qp *qp, - struct irdma_puda_send_info *info); -enum irdma_status_code -irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi, - struct irdma_puda_rsrc_info *info); +int irdma_puda_send(struct irdma_sc_qp *qp, struct irdma_puda_send_info *info); +int irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi, + struct irdma_puda_rsrc_info *info); void irdma_puda_dele_rsrc(struct irdma_sc_vsi *vsi, enum puda_rsrc_type type, bool reset); -enum irdma_status_code irdma_puda_poll_cmpl(struct irdma_sc_dev *dev, - struct irdma_sc_cq *cq, - u32 *compl_err); +int irdma_puda_poll_cmpl(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq, + u32 *compl_err); struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev, struct irdma_puda_buf *buf); -enum irdma_status_code -irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, - struct irdma_puda_buf *buf); -enum irdma_status_code irdma_ieq_check_mpacrc(struct shash_desc *desc, - void *addr, u32 len, u32 val); -enum irdma_status_code irdma_init_hash_desc(struct shash_desc **desc); +int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, + struct irdma_puda_buf *buf); +int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len, u32 val); +int irdma_init_hash_desc(struct shash_desc **desc); void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp); void irdma_free_hash_desc(struct shash_desc *desc); -void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len, - u32 seqnum); -enum irdma_status_code irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, - struct irdma_sc_qp *qp); -enum irdma_status_code irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, - struct irdma_sc_cq *cq); -enum irdma_status_code irdma_cqp_qp_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp); +void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len, u32 seqnum); +int irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp); +int irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq); +int irdma_cqp_qp_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp); void irdma_cqp_cq_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq); void irdma_puda_ieq_get_ah_info(struct irdma_sc_qp *qp, struct irdma_ah_info *ah_info); -enum irdma_status_code irdma_puda_create_ah(struct irdma_sc_dev *dev, - struct irdma_ah_info *ah_info, - bool wait, enum puda_rsrc_type type, - void *cb_param, - struct irdma_sc_ah **ah); +int irdma_puda_create_ah(struct irdma_sc_dev *dev, + struct irdma_ah_info *ah_info, bool wait, + enum puda_rsrc_type type, void *cb_param, + struct irdma_sc_ah **ah); void irdma_puda_free_ah(struct irdma_sc_dev *dev, struct irdma_sc_ah *ah); void irdma_ieq_process_fpdus(struct irdma_sc_qp *qp, struct irdma_puda_rsrc *ieq); diff --git a/drivers/infiniband/hw/irdma/status.h b/drivers/infiniband/hw/irdma/status.h deleted file mode 100644 index 22ea3888253a..000000000000 --- a/drivers/infiniband/hw/irdma/status.h +++ /dev/null @@ -1,71 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */ -/* Copyright (c) 2015 - 2020 Intel Corporation */ -#ifndef IRDMA_STATUS_H -#define IRDMA_STATUS_H - -/* Error Codes */ -enum irdma_status_code { - IRDMA_SUCCESS = 0, - IRDMA_ERR_NVM = -1, - IRDMA_ERR_NVM_CHECKSUM = -2, - IRDMA_ERR_CFG = -4, - IRDMA_ERR_PARAM = -5, - IRDMA_ERR_DEVICE_NOT_SUPPORTED = -6, - IRDMA_ERR_RESET_FAILED = -7, - IRDMA_ERR_SWFW_SYNC = -8, - IRDMA_ERR_NO_MEMORY = -9, - IRDMA_ERR_BAD_PTR = -10, - IRDMA_ERR_INVALID_PD_ID = -11, - IRDMA_ERR_INVALID_QP_ID = -12, - IRDMA_ERR_INVALID_CQ_ID = -13, - IRDMA_ERR_INVALID_CEQ_ID = -14, - IRDMA_ERR_INVALID_AEQ_ID = -15, - IRDMA_ERR_INVALID_SIZE = -16, - IRDMA_ERR_INVALID_ARP_INDEX = -17, - IRDMA_ERR_INVALID_FPM_FUNC_ID = -18, - IRDMA_ERR_QP_INVALID_MSG_SIZE = -19, - IRDMA_ERR_QP_TOOMANY_WRS_POSTED = -20, - IRDMA_ERR_INVALID_FRAG_COUNT = -21, - IRDMA_ERR_Q_EMPTY = -22, - IRDMA_ERR_INVALID_ALIGNMENT = -23, - IRDMA_ERR_FLUSHED_Q = -24, - IRDMA_ERR_INVALID_PUSH_PAGE_INDEX = -25, - IRDMA_ERR_INVALID_INLINE_DATA_SIZE = -26, - IRDMA_ERR_TIMEOUT = -27, - IRDMA_ERR_OPCODE_MISMATCH = -28, - IRDMA_ERR_CQP_COMPL_ERROR = -29, - IRDMA_ERR_INVALID_VF_ID = -30, - IRDMA_ERR_INVALID_HMCFN_ID = -31, - IRDMA_ERR_BACKING_PAGE_ERROR = -32, - IRDMA_ERR_NO_PBLCHUNKS_AVAILABLE = -33, - IRDMA_ERR_INVALID_PBLE_INDEX = -34, - IRDMA_ERR_INVALID_SD_INDEX = -35, - IRDMA_ERR_INVALID_PAGE_DESC_INDEX = -36, - IRDMA_ERR_INVALID_SD_TYPE = -37, - IRDMA_ERR_MEMCPY_FAILED = -38, - IRDMA_ERR_INVALID_HMC_OBJ_INDEX = -39, - IRDMA_ERR_INVALID_HMC_OBJ_COUNT = -40, - IRDMA_ERR_BUF_TOO_SHORT = -43, - IRDMA_ERR_BAD_IWARP_CQE = -44, - IRDMA_ERR_NVM_BLANK_MODE = -45, - IRDMA_ERR_NOT_IMPL = -46, - IRDMA_ERR_PE_DOORBELL_NOT_ENA = -47, - IRDMA_ERR_NOT_READY = -48, - IRDMA_NOT_SUPPORTED = -49, - IRDMA_ERR_FIRMWARE_API_VER = -50, - IRDMA_ERR_RING_FULL = -51, - IRDMA_ERR_MPA_CRC = -61, - IRDMA_ERR_NO_TXBUFS = -62, - IRDMA_ERR_SEQ_NUM = -63, - IRDMA_ERR_list_empty = -64, - IRDMA_ERR_INVALID_MAC_ADDR = -65, - IRDMA_ERR_BAD_STAG = -66, - IRDMA_ERR_CQ_COMPL_ERROR = -67, - IRDMA_ERR_Q_DESTROYED = -68, - IRDMA_ERR_INVALID_FEAT_CNT = -69, - IRDMA_ERR_REG_CQ_FULL = -70, - IRDMA_ERR_VF_MSG_ERROR = -71, - IRDMA_ERR_NO_INTR = -72, - IRDMA_ERR_REG_QSET = -73, -}; -#endif /* IRDMA_STATUS_H */ diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index 9483bb3e10ea..517d41a1c289 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -2,7 +2,6 @@ /* Copyright (c) 2015 - 2021 Intel Corporation */ #ifndef IRDMA_TYPE_H #define IRDMA_TYPE_H -#include "status.h" #include "osdep.h" #include "irdma.h" #include "user.h" @@ -99,6 +98,7 @@ enum irdma_term_mpa_errors { enum irdma_qp_event_type { IRDMA_QP_EVENT_CATASTROPHIC, IRDMA_QP_EVENT_ACCESS_ERR, + IRDMA_QP_EVENT_REQ_ERR, }; enum irdma_hw_stats_index_32b { @@ -402,8 +402,8 @@ struct irdma_sc_cqp { u64 host_ctx_pa; void *back_cqp; struct irdma_sc_dev *dev; - enum irdma_status_code (*process_cqp_sds)(struct irdma_sc_dev *dev, - struct irdma_update_sds_info *info); + int (*process_cqp_sds)(struct irdma_sc_dev *dev, + struct irdma_update_sds_info *info); struct irdma_dma_mem sdbuf; struct irdma_ring sq_ring; struct irdma_cqp_quanta *sq_base; @@ -605,12 +605,14 @@ struct irdma_sc_vsi { struct irdma_qos qos[IRDMA_MAX_USER_PRIORITY]; struct irdma_vsi_pestat *pestat; atomic_t qp_suspend_reqs; - enum irdma_status_code (*register_qset)(struct irdma_sc_vsi *vsi, - struct irdma_ws_node *tc_node); + int (*register_qset)(struct irdma_sc_vsi *vsi, + struct irdma_ws_node *tc_node); void (*unregister_qset)(struct irdma_sc_vsi *vsi, struct irdma_ws_node *tc_node); u8 qos_rel_bw; u8 qos_prio_type; + u8 dscp_map[IIDC_MAX_DSCP_MAPPING]; + bool dscp_mode:1; }; struct irdma_sc_dev { @@ -655,7 +657,7 @@ struct irdma_sc_dev { bool vchnl_up:1; bool ceq_valid:1; u8 pci_rev; - enum irdma_status_code (*ws_add)(struct irdma_sc_vsi *vsi, u8 user_pri); + int (*ws_add)(struct irdma_sc_vsi *vsi, u8 user_pri); void (*ws_remove)(struct irdma_sc_vsi *vsi, u8 user_pri); void (*ws_reset)(struct irdma_sc_vsi *vsi); }; @@ -735,11 +737,13 @@ struct irdma_l2params { u16 qs_handle_list[IRDMA_MAX_USER_PRIORITY]; u16 mtu; u8 up2tc[IRDMA_MAX_USER_PRIORITY]; + u8 dscp_map[IIDC_MAX_DSCP_MAPPING]; u8 num_tc; u8 vsi_rel_bw; u8 vsi_prio_type; bool mtu_changed:1; bool tc_changed:1; + bool dscp_mode:1; }; struct irdma_vsi_init_info { @@ -750,8 +754,8 @@ struct irdma_vsi_init_info { u16 pf_data_vsi_num; enum irdma_vm_vf_type vm_vf_type; u16 vm_id; - enum irdma_status_code (*register_qset)(struct irdma_sc_vsi *vsi, - struct irdma_ws_node *tc_node); + int (*register_qset)(struct irdma_sc_vsi *vsi, + struct irdma_ws_node *tc_node); void (*unregister_qset)(struct irdma_sc_vsi *vsi, struct irdma_ws_node *tc_node); }; @@ -1198,29 +1202,27 @@ struct irdma_irq_ops { }; void irdma_sc_ccq_arm(struct irdma_sc_cq *ccq); -enum irdma_status_code irdma_sc_ccq_create(struct irdma_sc_cq *ccq, u64 scratch, - bool check_overflow, bool post_sq); -enum irdma_status_code irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch, - bool post_sq); -enum irdma_status_code irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq, - struct irdma_ccq_cqe_info *info); -enum irdma_status_code irdma_sc_ccq_init(struct irdma_sc_cq *ccq, - struct irdma_ccq_init_info *info); - -enum irdma_status_code irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch); -enum irdma_status_code irdma_sc_cceq_destroy_done(struct irdma_sc_ceq *ceq); - -enum irdma_status_code irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch, - bool post_sq); -enum irdma_status_code irdma_sc_ceq_init(struct irdma_sc_ceq *ceq, - struct irdma_ceq_init_info *info); +int irdma_sc_ccq_create(struct irdma_sc_cq *ccq, u64 scratch, + bool check_overflow, bool post_sq); +int irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch, bool post_sq); +int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq, + struct irdma_ccq_cqe_info *info); +int irdma_sc_ccq_init(struct irdma_sc_cq *ccq, + struct irdma_ccq_init_info *info); + +int irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch); +int irdma_sc_cceq_destroy_done(struct irdma_sc_ceq *ceq); + +int irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch, bool post_sq); +int irdma_sc_ceq_init(struct irdma_sc_ceq *ceq, + struct irdma_ceq_init_info *info); void irdma_sc_cleanup_ceqes(struct irdma_sc_cq *cq, struct irdma_sc_ceq *ceq); void *irdma_sc_process_ceq(struct irdma_sc_dev *dev, struct irdma_sc_ceq *ceq); -enum irdma_status_code irdma_sc_aeq_init(struct irdma_sc_aeq *aeq, - struct irdma_aeq_init_info *info); -enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, - struct irdma_aeqe_info *info); +int irdma_sc_aeq_init(struct irdma_sc_aeq *aeq, + struct irdma_aeq_init_info *info); +int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, + struct irdma_aeqe_info *info); void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count); void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_id, @@ -1228,31 +1230,27 @@ void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_i void irdma_cfg_aeq(struct irdma_sc_dev *dev, u32 idx, bool enable); void irdma_check_cqp_progress(struct irdma_cqp_timeout *cqp_timeout, struct irdma_sc_dev *dev); -enum irdma_status_code irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, - u16 *min_err); -enum irdma_status_code irdma_sc_cqp_destroy(struct irdma_sc_cqp *cqp); -enum irdma_status_code irdma_sc_cqp_init(struct irdma_sc_cqp *cqp, - struct irdma_cqp_init_info *info); +int irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, u16 *min_err); +int irdma_sc_cqp_destroy(struct irdma_sc_cqp *cqp); +int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp, + struct irdma_cqp_init_info *info); void irdma_sc_cqp_post_sq(struct irdma_sc_cqp *cqp); -enum irdma_status_code irdma_sc_poll_for_cqp_op_done(struct irdma_sc_cqp *cqp, u8 opcode, - struct irdma_ccq_cqe_info *cmpl_info); -enum irdma_status_code irdma_sc_fast_register(struct irdma_sc_qp *qp, - struct irdma_fast_reg_stag_info *info, - bool post_sq); -enum irdma_status_code irdma_sc_qp_create(struct irdma_sc_qp *qp, - struct irdma_create_qp_info *info, - u64 scratch, bool post_sq); -enum irdma_status_code irdma_sc_qp_destroy(struct irdma_sc_qp *qp, - u64 scratch, bool remove_hash_idx, - bool ignore_mw_bnd, bool post_sq); -enum irdma_status_code irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp, - struct irdma_qp_flush_info *info, - u64 scratch, bool post_sq); -enum irdma_status_code irdma_sc_qp_init(struct irdma_sc_qp *qp, - struct irdma_qp_init_info *info); -enum irdma_status_code irdma_sc_qp_modify(struct irdma_sc_qp *qp, - struct irdma_modify_qp_info *info, - u64 scratch, bool post_sq); +int irdma_sc_poll_for_cqp_op_done(struct irdma_sc_cqp *cqp, u8 opcode, + struct irdma_ccq_cqe_info *cmpl_info); +int irdma_sc_fast_register(struct irdma_sc_qp *qp, + struct irdma_fast_reg_stag_info *info, bool post_sq); +int irdma_sc_qp_create(struct irdma_sc_qp *qp, + struct irdma_create_qp_info *info, u64 scratch, + bool post_sq); +int irdma_sc_qp_destroy(struct irdma_sc_qp *qp, u64 scratch, + bool remove_hash_idx, bool ignore_mw_bnd, bool post_sq); +int irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp, + struct irdma_qp_flush_info *info, u64 scratch, + bool post_sq); +int irdma_sc_qp_init(struct irdma_sc_qp *qp, struct irdma_qp_init_info *info); +int irdma_sc_qp_modify(struct irdma_sc_qp *qp, + struct irdma_modify_qp_info *info, u64 scratch, + bool post_sq); void irdma_sc_send_lsmm(struct irdma_sc_qp *qp, void *lsmm_buf, u32 size, irdma_stag stag); @@ -1261,14 +1259,12 @@ void irdma_sc_qp_setctx(struct irdma_sc_qp *qp, __le64 *qp_ctx, struct irdma_qp_host_ctx_info *info); void irdma_sc_qp_setctx_roce(struct irdma_sc_qp *qp, __le64 *qp_ctx, struct irdma_qp_host_ctx_info *info); -enum irdma_status_code irdma_sc_cq_destroy(struct irdma_sc_cq *cq, u64 scratch, - bool post_sq); -enum irdma_status_code irdma_sc_cq_init(struct irdma_sc_cq *cq, - struct irdma_cq_init_info *info); +int irdma_sc_cq_destroy(struct irdma_sc_cq *cq, u64 scratch, bool post_sq); +int irdma_sc_cq_init(struct irdma_sc_cq *cq, struct irdma_cq_init_info *info); void irdma_sc_cq_resize(struct irdma_sc_cq *cq, struct irdma_modify_cq_info *info); -enum irdma_status_code irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, - u64 scratch, u8 hmc_fn_id, - bool post_sq, bool poll_registers); +int irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch, + u8 hmc_fn_id, bool post_sq, + bool poll_registers); void sc_vsi_update_stats(struct irdma_sc_vsi *vsi); struct cqp_info { diff --git a/drivers/infiniband/hw/irdma/uda.c b/drivers/infiniband/hw/irdma/uda.c index f5b1b6150cdc..284cec2a74de 100644 --- a/drivers/infiniband/hw/irdma/uda.c +++ b/drivers/infiniband/hw/irdma/uda.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB /* Copyright (c) 2016 - 2021 Intel Corporation */ +#include <linux/etherdevice.h> + #include "osdep.h" -#include "status.h" #include "hmc.h" #include "defs.h" #include "type.h" @@ -16,16 +17,15 @@ * @op: Operation * @scratch: u64 saved to be used during cqp completion */ -enum irdma_status_code irdma_sc_access_ah(struct irdma_sc_cqp *cqp, - struct irdma_ah_info *info, - u32 op, u64 scratch) +int irdma_sc_access_ah(struct irdma_sc_cqp *cqp, struct irdma_ah_info *info, + u32 op, u64 scratch) { __le64 *wqe; u64 qw1, qw2; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 0, ether_addr_to_u64(info->mac_addr) << 16); qw1 = FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_PDINDEXLO, info->pd_idx) | @@ -84,8 +84,7 @@ enum irdma_status_code irdma_sc_access_ah(struct irdma_sc_cqp *cqp, * irdma_create_mg_ctx() - create a mcg context * @info: multicast group context info */ -static enum irdma_status_code -irdma_create_mg_ctx(struct irdma_mcast_grp_info *info) +static void irdma_create_mg_ctx(struct irdma_mcast_grp_info *info) { struct irdma_mcast_grp_ctx_entry_info *entry_info = NULL; u8 idx = 0; /* index in the array */ @@ -104,8 +103,6 @@ irdma_create_mg_ctx(struct irdma_mcast_grp_info *info) ctx_idx++; } } - - return 0; } /** @@ -115,27 +112,24 @@ irdma_create_mg_ctx(struct irdma_mcast_grp_info *info) * @op: operation to perform * @scratch: u64 saved to be used during cqp completion */ -enum irdma_status_code irdma_access_mcast_grp(struct irdma_sc_cqp *cqp, - struct irdma_mcast_grp_info *info, - u32 op, u64 scratch) +int irdma_access_mcast_grp(struct irdma_sc_cqp *cqp, + struct irdma_mcast_grp_info *info, u32 op, + u64 scratch) { __le64 *wqe; - enum irdma_status_code ret_code = 0; if (info->mg_id >= IRDMA_UDA_MAX_FSI_MGS) { ibdev_dbg(to_ibdev(cqp->dev), "WQE: mg_id out of range\n"); - return IRDMA_ERR_PARAM; + return -EINVAL; } wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) { ibdev_dbg(to_ibdev(cqp->dev), "WQE: ring full\n"); - return IRDMA_ERR_RING_FULL; + return -ENOMEM; } - ret_code = irdma_create_mg_ctx(info); - if (ret_code) - return ret_code; + irdma_create_mg_ctx(info); set_64bit_val(wqe, 32, info->dma_mem_mc.pa); set_64bit_val(wqe, 16, @@ -196,8 +190,8 @@ static bool irdma_compare_mgs(struct irdma_mcast_grp_ctx_entry_info *entry1, * @ctx: Multcast group context * @mg: Multcast group info */ -enum irdma_status_code irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx, - struct irdma_mcast_grp_ctx_entry_info *mg) +int irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx, + struct irdma_mcast_grp_ctx_entry_info *mg) { u32 idx; bool free_entry_found = false; @@ -226,7 +220,7 @@ enum irdma_status_code irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx, return 0; } - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } /** @@ -237,8 +231,8 @@ enum irdma_status_code irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx, * Finds and removes a specific mulicast group from context, all * parameters must match to remove a multicast group. */ -enum irdma_status_code irdma_sc_del_mcast_grp(struct irdma_mcast_grp_info *ctx, - struct irdma_mcast_grp_ctx_entry_info *mg) +int irdma_sc_del_mcast_grp(struct irdma_mcast_grp_info *ctx, + struct irdma_mcast_grp_ctx_entry_info *mg) { u32 idx; @@ -267,5 +261,5 @@ enum irdma_status_code irdma_sc_del_mcast_grp(struct irdma_mcast_grp_info *ctx, } } - return IRDMA_ERR_PARAM; + return -EINVAL; } diff --git a/drivers/infiniband/hw/irdma/uda.h b/drivers/infiniband/hw/irdma/uda.h index a4ad0367dc96..fe4820ff0cca 100644 --- a/drivers/infiniband/hw/irdma/uda.h +++ b/drivers/infiniband/hw/irdma/uda.h @@ -32,56 +32,54 @@ struct irdma_sc_ah { struct irdma_ah_info ah_info; }; -enum irdma_status_code irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx, - struct irdma_mcast_grp_ctx_entry_info *mg); -enum irdma_status_code irdma_sc_del_mcast_grp(struct irdma_mcast_grp_info *ctx, - struct irdma_mcast_grp_ctx_entry_info *mg); -enum irdma_status_code irdma_sc_access_ah(struct irdma_sc_cqp *cqp, struct irdma_ah_info *info, - u32 op, u64 scratch); -enum irdma_status_code irdma_access_mcast_grp(struct irdma_sc_cqp *cqp, - struct irdma_mcast_grp_info *info, - u32 op, u64 scratch); +int irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx, + struct irdma_mcast_grp_ctx_entry_info *mg); +int irdma_sc_del_mcast_grp(struct irdma_mcast_grp_info *ctx, + struct irdma_mcast_grp_ctx_entry_info *mg); +int irdma_sc_access_ah(struct irdma_sc_cqp *cqp, struct irdma_ah_info *info, + u32 op, u64 scratch); +int irdma_access_mcast_grp(struct irdma_sc_cqp *cqp, + struct irdma_mcast_grp_info *info, u32 op, + u64 scratch); static inline void irdma_sc_init_ah(struct irdma_sc_dev *dev, struct irdma_sc_ah *ah) { ah->dev = dev; } -static inline enum irdma_status_code irdma_sc_create_ah(struct irdma_sc_cqp *cqp, - struct irdma_ah_info *info, - u64 scratch) +static inline int irdma_sc_create_ah(struct irdma_sc_cqp *cqp, + struct irdma_ah_info *info, u64 scratch) { return irdma_sc_access_ah(cqp, info, IRDMA_CQP_OP_CREATE_ADDR_HANDLE, scratch); } -static inline enum irdma_status_code irdma_sc_destroy_ah(struct irdma_sc_cqp *cqp, - struct irdma_ah_info *info, - u64 scratch) +static inline int irdma_sc_destroy_ah(struct irdma_sc_cqp *cqp, + struct irdma_ah_info *info, u64 scratch) { return irdma_sc_access_ah(cqp, info, IRDMA_CQP_OP_DESTROY_ADDR_HANDLE, scratch); } -static inline enum irdma_status_code irdma_sc_create_mcast_grp(struct irdma_sc_cqp *cqp, - struct irdma_mcast_grp_info *info, - u64 scratch) +static inline int irdma_sc_create_mcast_grp(struct irdma_sc_cqp *cqp, + struct irdma_mcast_grp_info *info, + u64 scratch) { return irdma_access_mcast_grp(cqp, info, IRDMA_CQP_OP_CREATE_MCAST_GRP, scratch); } -static inline enum irdma_status_code irdma_sc_modify_mcast_grp(struct irdma_sc_cqp *cqp, - struct irdma_mcast_grp_info *info, - u64 scratch) +static inline int irdma_sc_modify_mcast_grp(struct irdma_sc_cqp *cqp, + struct irdma_mcast_grp_info *info, + u64 scratch) { return irdma_access_mcast_grp(cqp, info, IRDMA_CQP_OP_MODIFY_MCAST_GRP, scratch); } -static inline enum irdma_status_code irdma_sc_destroy_mcast_grp(struct irdma_sc_cqp *cqp, - struct irdma_mcast_grp_info *info, - u64 scratch) +static inline int irdma_sc_destroy_mcast_grp(struct irdma_sc_cqp *cqp, + struct irdma_mcast_grp_info *info, + u64 scratch) { return irdma_access_mcast_grp(cqp, info, IRDMA_CQP_OP_DESTROY_MCAST_GRP, scratch); diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index 57a9444e9ea7..a6e5d350a94c 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB /* Copyright (c) 2015 - 2021 Intel Corporation */ #include "osdep.h" -#include "status.h" #include "defs.h" #include "user.h" #include "irdma.h" @@ -56,7 +55,7 @@ static void irdma_set_fragment_gen_1(__le64 *wqe, u32 offset, * irdma_nop_1 - insert a NOP wqe * @qp: hw qp ptr */ -static enum irdma_status_code irdma_nop_1(struct irdma_qp_uk *qp) +static int irdma_nop_1(struct irdma_qp_uk *qp) { u64 hdr; __le64 *wqe; @@ -64,7 +63,7 @@ static enum irdma_status_code irdma_nop_1(struct irdma_qp_uk *qp) bool signaled = false; if (!qp->sq_ring.head) - return IRDMA_ERR_PARAM; + return -EINVAL; wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); wqe = qp->sq_base[wqe_idx].elem; @@ -245,7 +244,7 @@ __le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx, __le64 *irdma_qp_get_next_recv_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx) { __le64 *wqe; - enum irdma_status_code ret_code; + int ret_code; if (IRDMA_RING_FULL_ERR(qp->rq_ring)) return NULL; @@ -268,16 +267,15 @@ __le64 *irdma_qp_get_next_recv_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx) * @info: post sq information * @post_sq: flag to post sq */ -enum irdma_status_code irdma_uk_rdma_write(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, - bool post_sq) +int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, + bool post_sq) { u64 hdr; __le64 *wqe; struct irdma_rdma_write *op_info; u32 i, wqe_idx; u32 total_size = 0, byte_off; - enum irdma_status_code ret_code; + int ret_code; u32 frag_cnt, addl_frag_cnt; bool read_fence = false; u16 quanta; @@ -286,7 +284,7 @@ enum irdma_status_code irdma_uk_rdma_write(struct irdma_qp_uk *qp, op_info = &info->op.rdma_write; if (op_info->num_lo_sges > qp->max_sq_frag_cnt) - return IRDMA_ERR_INVALID_FRAG_COUNT; + return -EINVAL; for (i = 0; i < op_info->num_lo_sges; i++) total_size += op_info->lo_sg_list[i].length; @@ -305,7 +303,7 @@ enum irdma_status_code irdma_uk_rdma_write(struct irdma_qp_uk *qp, wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info); if (!wqe) - return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; + return -ENOMEM; irdma_clr_wqes(qp, wqe_idx); @@ -370,12 +368,11 @@ enum irdma_status_code irdma_uk_rdma_write(struct irdma_qp_uk *qp, * @inv_stag: flag for inv_stag * @post_sq: flag to post sq */ -enum irdma_status_code irdma_uk_rdma_read(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, - bool inv_stag, bool post_sq) +int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, + bool inv_stag, bool post_sq) { struct irdma_rdma_read *op_info; - enum irdma_status_code ret_code; + int ret_code; u32 i, byte_off, total_size = 0; bool local_fence = false; u32 addl_frag_cnt; @@ -388,7 +385,7 @@ enum irdma_status_code irdma_uk_rdma_read(struct irdma_qp_uk *qp, op_info = &info->op.rdma_read; if (qp->max_sq_frag_cnt < op_info->num_lo_sges) - return IRDMA_ERR_INVALID_FRAG_COUNT; + return -EINVAL; for (i = 0; i < op_info->num_lo_sges; i++) total_size += op_info->lo_sg_list[i].length; @@ -400,7 +397,7 @@ enum irdma_status_code irdma_uk_rdma_read(struct irdma_qp_uk *qp, wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info); if (!wqe) - return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; + return -ENOMEM; irdma_clr_wqes(qp, wqe_idx); @@ -457,15 +454,14 @@ enum irdma_status_code irdma_uk_rdma_read(struct irdma_qp_uk *qp, * @info: post sq information * @post_sq: flag to post sq */ -enum irdma_status_code irdma_uk_send(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, - bool post_sq) +int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, + bool post_sq) { __le64 *wqe; struct irdma_post_send *op_info; u64 hdr; u32 i, wqe_idx, total_size = 0, byte_off; - enum irdma_status_code ret_code; + int ret_code; u32 frag_cnt, addl_frag_cnt; bool read_fence = false; u16 quanta; @@ -474,7 +470,7 @@ enum irdma_status_code irdma_uk_send(struct irdma_qp_uk *qp, op_info = &info->op.send; if (qp->max_sq_frag_cnt < op_info->num_sges) - return IRDMA_ERR_INVALID_FRAG_COUNT; + return -EINVAL; for (i = 0; i < op_info->num_sges; i++) total_size += op_info->sg_list[i].length; @@ -490,7 +486,7 @@ enum irdma_status_code irdma_uk_send(struct irdma_qp_uk *qp, wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info); if (!wqe) - return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; + return -ENOMEM; irdma_clr_wqes(qp, wqe_idx); @@ -501,7 +497,8 @@ enum irdma_status_code irdma_uk_send(struct irdma_qp_uk *qp, FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data)); i = 0; } else { - qp->wqe_ops.iw_set_fragment(wqe, 0, op_info->sg_list, + qp->wqe_ops.iw_set_fragment(wqe, 0, + frag_cnt ? op_info->sg_list : NULL, qp->swqe_polarity); i = 1; } @@ -678,9 +675,8 @@ static u16 irdma_inline_data_size_to_quanta(u32 data_size) * @info: post sq information * @post_sq: flag to post sq */ -enum irdma_status_code -irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, - bool post_sq) +int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, bool post_sq) { __le64 *wqe; struct irdma_inline_rdma_write *op_info; @@ -693,13 +689,13 @@ irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *in op_info = &info->op.inline_rdma_write; if (op_info->len > qp->max_inline_data) - return IRDMA_ERR_INVALID_INLINE_DATA_SIZE; + return -EINVAL; quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len); wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len, info); if (!wqe) - return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; + return -ENOMEM; irdma_clr_wqes(qp, wqe_idx); @@ -745,9 +741,8 @@ irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *in * @info: post sq information * @post_sq: flag to post sq */ -enum irdma_status_code irdma_uk_inline_send(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, - bool post_sq) +int irdma_uk_inline_send(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, bool post_sq) { __le64 *wqe; struct irdma_post_inline_send *op_info; @@ -760,13 +755,13 @@ enum irdma_status_code irdma_uk_inline_send(struct irdma_qp_uk *qp, op_info = &info->op.inline_send; if (op_info->len > qp->max_inline_data) - return IRDMA_ERR_INVALID_INLINE_DATA_SIZE; + return -EINVAL; quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len); wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len, info); if (!wqe) - return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; + return -ENOMEM; irdma_clr_wqes(qp, wqe_idx); @@ -817,9 +812,9 @@ enum irdma_status_code irdma_uk_inline_send(struct irdma_qp_uk *qp, * @info: post sq information * @post_sq: flag to post sq */ -enum irdma_status_code -irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, bool post_sq) +int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, + bool post_sq) { __le64 *wqe; struct irdma_inv_local_stag *op_info; @@ -835,7 +830,7 @@ irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA, 0, info); if (!wqe) - return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; + return -ENOMEM; irdma_clr_wqes(qp, wqe_idx); @@ -871,8 +866,8 @@ irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, * @qp: hw qp ptr * @info: post rq information */ -enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp, - struct irdma_post_rq_info *info) +int irdma_uk_post_receive(struct irdma_qp_uk *qp, + struct irdma_post_rq_info *info) { u32 wqe_idx, i, byte_off; u32 addl_frag_cnt; @@ -880,11 +875,11 @@ enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp, u64 hdr; if (qp->max_rq_frag_cnt < info->num_sges) - return IRDMA_ERR_INVALID_FRAG_COUNT; + return -EINVAL; wqe = irdma_qp_get_next_recv_wqe(qp, &wqe_idx); if (!wqe) - return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; + return -ENOMEM; qp->rq_wrid_array[wqe_idx] = info->wr_id; addl_frag_cnt = info->num_sges > 1 ? (info->num_sges - 1) : 0; @@ -1000,17 +995,18 @@ void irdma_uk_cq_request_notification(struct irdma_cq_uk *cq, * @cq: hw cq * @info: cq poll information returned */ -enum irdma_status_code -irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info) +int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, + struct irdma_cq_poll_info *info) { u64 comp_ctx, qword0, qword2, qword3; __le64 *cqe; struct irdma_qp_uk *qp; struct irdma_ring *pring = NULL; u32 wqe_idx, q_type; - enum irdma_status_code ret_code; + int ret_code; bool move_cq_head = true; u8 polarity; + u8 op_type; bool ext_valid; __le64 *ext_cqe; @@ -1022,7 +1018,7 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info) get_64bit_val(cqe, 24, &qword3); polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); if (polarity != cq->polarity) - return IRDMA_ERR_Q_EMPTY; + return -ENOENT; /* Ensure CQE contents are read after valid bit is checked */ dma_rmb(); @@ -1045,7 +1041,7 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info) polarity ^= 1; } if (polarity != cq->polarity) - return IRDMA_ERR_Q_EMPTY; + return -ENOENT; /* Ensure ext CQE contents are read after ext valid bit is checked */ dma_rmb(); @@ -1112,7 +1108,7 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info) info->solicited_event = (bool)FIELD_GET(IRDMACQ_SOEVENT, qword3); qp = (struct irdma_qp_uk *)(unsigned long)comp_ctx; if (!qp || qp->destroy_pending) { - ret_code = IRDMA_ERR_Q_DESTROYED; + ret_code = -EFAULT; goto exit; } wqe_idx = (u32)FIELD_GET(IRDMA_CQ_WQEIDX, qword3); @@ -1126,7 +1122,7 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info) if (info->comp_status == IRDMA_COMPL_STATUS_FLUSHED || info->comp_status == IRDMA_COMPL_STATUS_UNKNOWN) { if (!IRDMA_RING_MORE_WORK(qp->rq_ring)) { - ret_code = IRDMA_ERR_Q_EMPTY; + ret_code = -ENOENT; goto exit; } @@ -1186,14 +1182,13 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info) wqe_idx + qp->sq_wrtrk_array[wqe_idx].quanta); } else { if (!IRDMA_RING_MORE_WORK(qp->sq_ring)) { - ret_code = IRDMA_ERR_Q_EMPTY; + ret_code = -ENOENT; goto exit; } do { __le64 *sw_wqe; u64 wqe_qword; - u8 op_type; u32 tail; tail = qp->sq_ring.tail; @@ -1210,6 +1205,8 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info) break; } } while (1); + if (op_type == IRDMA_OP_TYPE_BIND_MW && info->minor_err == FLUSH_PROT_ERR) + info->minor_err = FLUSH_MW_BIND_ERR; qp->sq_flush_seen = true; if (!IRDMA_RING_MORE_WORK(qp->sq_ring)) qp->sq_flush_complete = true; @@ -1303,15 +1300,15 @@ void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge, * @sqdepth: depth of SQ * */ -enum irdma_status_code irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, - u32 sq_size, u8 shift, u32 *sqdepth) +int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, + u32 *sqdepth) { *sqdepth = irdma_qp_round_up((sq_size << shift) + IRDMA_SQ_RSVD); if (*sqdepth < (IRDMA_QP_SW_MIN_WQSIZE << shift)) *sqdepth = IRDMA_QP_SW_MIN_WQSIZE << shift; else if (*sqdepth > uk_attrs->max_hw_wq_quanta) - return IRDMA_ERR_INVALID_SIZE; + return -EINVAL; return 0; } @@ -1323,15 +1320,15 @@ enum irdma_status_code irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, * @shift: shift which determines size of WQE * @rqdepth: depth of RQ */ -enum irdma_status_code irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, - u32 rq_size, u8 shift, u32 *rqdepth) +int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, + u32 *rqdepth) { *rqdepth = irdma_qp_round_up((rq_size << shift) + IRDMA_RQ_RSVD); if (*rqdepth < (IRDMA_QP_SW_MIN_WQSIZE << shift)) *rqdepth = IRDMA_QP_SW_MIN_WQSIZE << shift; else if (*rqdepth > uk_attrs->max_hw_rq_quanta) - return IRDMA_ERR_INVALID_SIZE; + return -EINVAL; return 0; } @@ -1381,17 +1378,16 @@ static void irdma_setup_connection_wqes(struct irdma_qp_uk *qp, * allowed. Then size of wqe * the number of wqes should be the * amount of memory allocated for sq and rq. */ -enum irdma_status_code irdma_uk_qp_init(struct irdma_qp_uk *qp, - struct irdma_qp_uk_init_info *info) +int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info) { - enum irdma_status_code ret_code = 0; + int ret_code = 0; u32 sq_ring_size; u8 sqshift, rqshift; qp->uk_attrs = info->uk_attrs; if (info->max_sq_frag_cnt > qp->uk_attrs->max_hw_wq_frags || info->max_rq_frag_cnt > qp->uk_attrs->max_hw_wq_frags) - return IRDMA_ERR_INVALID_FRAG_COUNT; + return -EINVAL; irdma_get_wqe_shift(qp->uk_attrs, info->max_rq_frag_cnt, 0, &rqshift); if (qp->uk_attrs->hw_rev == IRDMA_GEN_1) { @@ -1502,8 +1498,7 @@ void irdma_uk_clean_cq(void *q, struct irdma_cq_uk *cq) * @signaled: signaled for completion * @post_sq: ring doorbell */ -enum irdma_status_code irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, - bool signaled, bool post_sq) +int irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq) { __le64 *wqe; u64 hdr; @@ -1515,7 +1510,7 @@ enum irdma_status_code irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA, 0, &info); if (!wqe) - return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; + return -ENOMEM; irdma_clr_wqes(qp, wqe_idx); @@ -1541,7 +1536,7 @@ enum irdma_status_code irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, * @frag_cnt: number of fragments * @quanta: quanta for frag_cnt */ -enum irdma_status_code irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta) +int irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta) { switch (frag_cnt) { case 0: @@ -1577,7 +1572,7 @@ enum irdma_status_code irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta) *quanta = 8; break; default: - return IRDMA_ERR_INVALID_FRAG_COUNT; + return -EINVAL; } return 0; @@ -1588,7 +1583,7 @@ enum irdma_status_code irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta) * @frag_cnt: number of fragments * @wqe_size: size in bytes given frag_cnt */ -enum irdma_status_code irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size) +int irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size) { switch (frag_cnt) { case 0: @@ -1615,7 +1610,7 @@ enum irdma_status_code irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size) *wqe_size = 256; break; default: - return IRDMA_ERR_INVALID_FRAG_COUNT; + return -EINVAL; } return 0; diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index 3c811fb88404..2ef61923c926 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -103,6 +103,7 @@ enum irdma_flush_opcode { FLUSH_FATAL_ERR, FLUSH_RETRY_EXC_ERR, FLUSH_MW_BIND_ERR, + FLUSH_REM_INV_REQ_ERR, }; enum irdma_cmpl_status { @@ -270,29 +271,24 @@ struct irdma_cq_poll_info { bool imm_valid:1; }; -enum irdma_status_code irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, - bool post_sq); -enum irdma_status_code irdma_uk_inline_send(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, - bool post_sq); - -enum irdma_status_code irdma_uk_post_nop(struct irdma_qp_uk *qp, u64 wr_id, - bool signaled, bool post_sq); -enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp, - struct irdma_post_rq_info *info); +int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, bool post_sq); +int irdma_uk_inline_send(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, bool post_sq); +int irdma_uk_post_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, + bool post_sq); +int irdma_uk_post_receive(struct irdma_qp_uk *qp, + struct irdma_post_rq_info *info); void irdma_uk_qp_post_wr(struct irdma_qp_uk *qp); -enum irdma_status_code irdma_uk_rdma_read(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, - bool inv_stag, bool post_sq); -enum irdma_status_code irdma_uk_rdma_write(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, - bool post_sq); -enum irdma_status_code irdma_uk_send(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, bool post_sq); -enum irdma_status_code irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, - bool post_sq); +int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, + bool inv_stag, bool post_sq); +int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, + bool post_sq); +int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, + bool post_sq); +int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, + bool post_sq); struct irdma_wqe_uk_ops { void (*iw_copy_inline_data)(u8 *dest, u8 *src, u32 len, u8 polarity); @@ -303,16 +299,16 @@ struct irdma_wqe_uk_ops { struct irdma_bind_window *op_info); }; -enum irdma_status_code irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, - struct irdma_cq_poll_info *info); +int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, + struct irdma_cq_poll_info *info); void irdma_uk_cq_request_notification(struct irdma_cq_uk *cq, enum irdma_cmpl_notify cq_notify); void irdma_uk_cq_resize(struct irdma_cq_uk *cq, void *cq_base, int size); void irdma_uk_cq_set_resized_cnt(struct irdma_cq_uk *qp, u16 cnt); void irdma_uk_cq_init(struct irdma_cq_uk *cq, struct irdma_cq_uk_init_info *info); -enum irdma_status_code irdma_uk_qp_init(struct irdma_qp_uk *qp, - struct irdma_qp_uk_init_info *info); +int irdma_uk_qp_init(struct irdma_qp_uk *qp, + struct irdma_qp_uk_init_info *info); struct irdma_sq_uk_wr_trk_info { u64 wrid; u32 wr_len; @@ -413,16 +409,15 @@ __le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx, struct irdma_post_sq_info *info); __le64 *irdma_qp_get_next_recv_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx); void irdma_uk_clean_cq(void *q, struct irdma_cq_uk *cq); -enum irdma_status_code irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, - bool signaled, bool post_sq); -enum irdma_status_code irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta); -enum irdma_status_code irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size); +int irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq); +int irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta); +int irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size); void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge, u32 inline_data, u8 *shift); -enum irdma_status_code irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, - u32 sq_size, u8 shift, u32 *wqdepth); -enum irdma_status_code irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, - u32 rq_size, u8 shift, u32 *wqdepth); +int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, + u32 *wqdepth); +int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, + u32 *wqdepth); void irdma_qp_push_wqe(struct irdma_qp_uk *qp, __le64 *wqe, u16 quanta, u32 wqe_idx, bool post_sq); void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx); diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 398736d8c78a..8dfc9e154d73 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -150,31 +150,35 @@ int irdma_inetaddr_event(struct notifier_block *notifier, unsigned long event, void *ptr) { struct in_ifaddr *ifa = ptr; - struct net_device *netdev = ifa->ifa_dev->dev; + struct net_device *real_dev, *netdev = ifa->ifa_dev->dev; struct irdma_device *iwdev; struct ib_device *ibdev; u32 local_ipaddr; - ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_IRDMA); + real_dev = rdma_vlan_dev_real_dev(netdev); + if (!real_dev) + real_dev = netdev; + + ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA); if (!ibdev) return NOTIFY_DONE; iwdev = to_iwdev(ibdev); local_ipaddr = ntohl(ifa->ifa_address); ibdev_dbg(&iwdev->ibdev, - "DEV: netdev %p event %lu local_ip=%pI4 MAC=%pM\n", netdev, - event, &local_ipaddr, netdev->dev_addr); + "DEV: netdev %p event %lu local_ip=%pI4 MAC=%pM\n", real_dev, + event, &local_ipaddr, real_dev->dev_addr); switch (event) { case NETDEV_DOWN: - irdma_manage_arp_cache(iwdev->rf, netdev->dev_addr, + irdma_manage_arp_cache(iwdev->rf, real_dev->dev_addr, &local_ipaddr, true, IRDMA_ARP_DELETE); - irdma_if_notify(iwdev, netdev, &local_ipaddr, true, false); + irdma_if_notify(iwdev, real_dev, &local_ipaddr, true, false); irdma_gid_change_event(&iwdev->ibdev); break; case NETDEV_UP: case NETDEV_CHANGEADDR: - irdma_add_arp(iwdev->rf, &local_ipaddr, true, netdev->dev_addr); - irdma_if_notify(iwdev, netdev, &local_ipaddr, true, true); + irdma_add_arp(iwdev->rf, &local_ipaddr, true, real_dev->dev_addr); + irdma_if_notify(iwdev, real_dev, &local_ipaddr, true, true); irdma_gid_change_event(&iwdev->ibdev); break; default: @@ -196,32 +200,36 @@ int irdma_inet6addr_event(struct notifier_block *notifier, unsigned long event, void *ptr) { struct inet6_ifaddr *ifa = ptr; - struct net_device *netdev = ifa->idev->dev; + struct net_device *real_dev, *netdev = ifa->idev->dev; struct irdma_device *iwdev; struct ib_device *ibdev; u32 local_ipaddr6[4]; - ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_IRDMA); + real_dev = rdma_vlan_dev_real_dev(netdev); + if (!real_dev) + real_dev = netdev; + + ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA); if (!ibdev) return NOTIFY_DONE; iwdev = to_iwdev(ibdev); irdma_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32); ibdev_dbg(&iwdev->ibdev, - "DEV: netdev %p event %lu local_ip=%pI6 MAC=%pM\n", netdev, - event, local_ipaddr6, netdev->dev_addr); + "DEV: netdev %p event %lu local_ip=%pI6 MAC=%pM\n", real_dev, + event, local_ipaddr6, real_dev->dev_addr); switch (event) { case NETDEV_DOWN: - irdma_manage_arp_cache(iwdev->rf, netdev->dev_addr, + irdma_manage_arp_cache(iwdev->rf, real_dev->dev_addr, local_ipaddr6, false, IRDMA_ARP_DELETE); - irdma_if_notify(iwdev, netdev, local_ipaddr6, false, false); + irdma_if_notify(iwdev, real_dev, local_ipaddr6, false, false); irdma_gid_change_event(&iwdev->ibdev); break; case NETDEV_UP: case NETDEV_CHANGEADDR: irdma_add_arp(iwdev->rf, local_ipaddr6, false, - netdev->dev_addr); - irdma_if_notify(iwdev, netdev, local_ipaddr6, false, true); + real_dev->dev_addr); + irdma_if_notify(iwdev, real_dev, local_ipaddr6, false, true); irdma_gid_change_event(&iwdev->ibdev); break; default: @@ -243,21 +251,23 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event, void *ptr) { struct neighbour *neigh = ptr; + struct net_device *real_dev, *netdev = (struct net_device *)neigh->dev; struct irdma_device *iwdev; struct ib_device *ibdev; __be32 *p; u32 local_ipaddr[4] = {}; bool ipv4 = true; - ibdev = ib_device_get_by_netdev((struct net_device *)neigh->dev, - RDMA_DRIVER_IRDMA); - if (!ibdev) - return NOTIFY_DONE; - - iwdev = to_iwdev(ibdev); - switch (event) { case NETEVENT_NEIGH_UPDATE: + real_dev = rdma_vlan_dev_real_dev(netdev); + if (!real_dev) + real_dev = netdev; + ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA); + if (!ibdev) + return NOTIFY_DONE; + + iwdev = to_iwdev(ibdev); p = (__be32 *)neigh->primary_key; if (neigh->tbl->family == AF_INET6) { ipv4 = false; @@ -278,13 +288,12 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event, irdma_manage_arp_cache(iwdev->rf, neigh->ha, local_ipaddr, ipv4, IRDMA_ARP_DELETE); + ib_device_put(ibdev); break; default: break; } - ib_device_put(ibdev); - return NOTIFY_DONE; } @@ -551,12 +560,12 @@ void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf) * @rf: RDMA PCI function * @cqp_request: cqp request to wait */ -static enum irdma_status_code irdma_wait_event(struct irdma_pci_f *rf, - struct irdma_cqp_request *cqp_request) +static int irdma_wait_event(struct irdma_pci_f *rf, + struct irdma_cqp_request *cqp_request) { struct irdma_cqp_timeout cqp_timeout = {}; bool cqp_error = false; - enum irdma_status_code err_code = 0; + int err_code = 0; cqp_timeout.compl_cqp_cmds = rf->sc_dev.cqp_cmd_stats[IRDMA_OP_CMPL_CMDS]; do { @@ -575,17 +584,20 @@ static enum irdma_status_code irdma_wait_event(struct irdma_pci_f *rf, rf->reset = true; rf->gen_ops.request_reset(rf); } - return IRDMA_ERR_TIMEOUT; + return -ETIMEDOUT; } while (1); cqp_error = cqp_request->compl_info.error; if (cqp_error) { - err_code = IRDMA_ERR_CQP_COMPL_ERROR; - if (cqp_request->compl_info.maj_err_code == 0xFFFF && - cqp_request->compl_info.min_err_code == 0x8029) { - if (!rf->reset) { - rf->reset = true; - rf->gen_ops.request_reset(rf); + err_code = -EIO; + if (cqp_request->compl_info.maj_err_code == 0xFFFF) { + if (cqp_request->compl_info.min_err_code == 0x8002) + err_code = -EBUSY; + else if (cqp_request->compl_info.min_err_code == 0x8029) { + if (!rf->reset) { + rf->reset = true; + rf->gen_ops.request_reset(rf); + } } } } @@ -643,6 +655,7 @@ static const char *const irdma_cqp_cmd_names[IRDMA_MAX_CQP_OPS] = { }; static const struct irdma_cqp_err_info irdma_noncrit_err_list[] = { + {0xffff, 0x8002, "Invalid State"}, {0xffff, 0x8006, "Flush No Wqe Pending"}, {0xffff, 0x8007, "Modify QP Bad Close"}, {0xffff, 0x8009, "LLP Closed"}, @@ -680,16 +693,16 @@ bool irdma_cqp_crit_err(struct irdma_sc_dev *dev, u8 cqp_cmd, * @rf: RDMA PCI function * @cqp_request: cqp request to process */ -enum irdma_status_code irdma_handle_cqp_op(struct irdma_pci_f *rf, - struct irdma_cqp_request *cqp_request) +int irdma_handle_cqp_op(struct irdma_pci_f *rf, + struct irdma_cqp_request *cqp_request) { struct irdma_sc_dev *dev = &rf->sc_dev; struct cqp_cmds_info *info = &cqp_request->info; - enum irdma_status_code status; + int status; bool put_cqp_request = true; if (rf->reset) - return IRDMA_ERR_NOT_READY; + return -EBUSY; irdma_get_cqp_request(cqp_request); status = irdma_process_cqp_cmd(dev, info); @@ -791,17 +804,17 @@ void *irdma_remove_cqp_head(struct irdma_sc_dev *dev) * @sdinfo: information for sd cqp * */ -enum irdma_status_code irdma_cqp_sds_cmd(struct irdma_sc_dev *dev, - struct irdma_update_sds_info *sdinfo) +int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev, + struct irdma_update_sds_info *sdinfo) { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_pci_f *rf = dev_to_rf(dev); - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; memcpy(&cqp_info->in.u.update_pe_sds.info, sdinfo, @@ -822,19 +835,18 @@ enum irdma_status_code irdma_cqp_sds_cmd(struct irdma_sc_dev *dev, * @qp: hardware control qp * @op: suspend or resume */ -enum irdma_status_code irdma_cqp_qp_suspend_resume(struct irdma_sc_qp *qp, - u8 op) +int irdma_cqp_qp_suspend_resume(struct irdma_sc_qp *qp, u8 op) { struct irdma_sc_dev *dev = qp->dev; struct irdma_cqp_request *cqp_request; struct irdma_sc_cqp *cqp = dev->cqp; struct cqp_cmds_info *cqp_info; struct irdma_pci_f *rf = dev_to_rf(dev); - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, false); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; cqp_info->cqp_cmd = op; @@ -940,18 +952,17 @@ void irdma_terminate_del_timer(struct irdma_sc_qp *qp) * @val_mem: buffer for fpm * @hmc_fn_id: function id for fpm */ -enum irdma_status_code -irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev, - struct irdma_dma_mem *val_mem, u8 hmc_fn_id) +int irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev, + struct irdma_dma_mem *val_mem, u8 hmc_fn_id) { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_pci_f *rf = dev_to_rf(dev); - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; cqp_request->param = NULL; @@ -975,18 +986,17 @@ irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev, * @val_mem: buffer with fpm values * @hmc_fn_id: function id for fpm */ -enum irdma_status_code -irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev, - struct irdma_dma_mem *val_mem, u8 hmc_fn_id) +int irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev, + struct irdma_dma_mem *val_mem, u8 hmc_fn_id) { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_pci_f *rf = dev_to_rf(dev); - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; cqp_request->param = NULL; @@ -1009,18 +1019,17 @@ irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev, * @dev: device pointer * @cq: pointer to created cq */ -enum irdma_status_code irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, - struct irdma_sc_cq *cq) +int irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq) { struct irdma_pci_f *rf = dev_to_rf(dev); struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; cqp_info->cqp_cmd = IRDMA_OP_CQ_CREATE; @@ -1039,19 +1048,18 @@ enum irdma_status_code irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, * @dev: device pointer * @qp: pointer to created qp */ -enum irdma_status_code irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, - struct irdma_sc_qp *qp) +int irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp) { struct irdma_pci_f *rf = dev_to_rf(dev); struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_create_qp_info *qp_info; - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; qp_info = &cqp_request->info.in.u.qp_create.info; @@ -1079,7 +1087,7 @@ static void irdma_dealloc_push_page(struct irdma_pci_f *rf, { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; - enum irdma_status_code status; + int status; if (qp->push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX) return; @@ -1179,12 +1187,10 @@ static void irdma_hw_modify_qp_callback(struct irdma_cqp_request *cqp_request) * @info: info for modify qp * @wait: flag to wait or not for modify qp completion */ -enum irdma_status_code irdma_hw_modify_qp(struct irdma_device *iwdev, - struct irdma_qp *iwqp, - struct irdma_modify_qp_info *info, - bool wait) +int irdma_hw_modify_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp, + struct irdma_modify_qp_info *info, bool wait) { - enum irdma_status_code status; + int status; struct irdma_pci_f *rf = iwdev->rf; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; @@ -1192,7 +1198,7 @@ enum irdma_status_code irdma_hw_modify_qp(struct irdma_device *iwdev, cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, wait); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; if (!wait) { cqp_request->callback_fcn = irdma_hw_modify_qp_callback; @@ -1230,7 +1236,7 @@ enum irdma_status_code irdma_hw_modify_qp(struct irdma_device *iwdev, cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, wait); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; m_info = &cqp_info->in.u.qp_modify.info; @@ -1271,17 +1277,17 @@ void irdma_cqp_cq_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq) * @dev: device pointer * @qp: pointer to qp */ -enum irdma_status_code irdma_cqp_qp_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp) +int irdma_cqp_qp_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp) { struct irdma_pci_f *rf = dev_to_rf(dev); struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; memset(cqp_info, 0, sizeof(*cqp_info)); @@ -1317,20 +1323,20 @@ void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp) * irdma_init_hash_desc - initialize hash for crc calculation * @desc: cryption type */ -enum irdma_status_code irdma_init_hash_desc(struct shash_desc **desc) +int irdma_init_hash_desc(struct shash_desc **desc) { struct crypto_shash *tfm; struct shash_desc *tdesc; tfm = crypto_alloc_shash("crc32c", 0, 0); if (IS_ERR(tfm)) - return IRDMA_ERR_MPA_CRC; + return -EINVAL; tdesc = kzalloc(sizeof(*tdesc) + crypto_shash_descsize(tfm), GFP_KERNEL); if (!tdesc) { crypto_free_shash(tfm); - return IRDMA_ERR_MPA_CRC; + return -EINVAL; } tdesc->tfm = tfm; @@ -1358,19 +1364,19 @@ void irdma_free_hash_desc(struct shash_desc *desc) * @len: length of buffer * @val: value to be compared */ -enum irdma_status_code irdma_ieq_check_mpacrc(struct shash_desc *desc, - void *addr, u32 len, u32 val) +int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len, + u32 val) { u32 crc = 0; int ret; - enum irdma_status_code ret_code = 0; + int ret_code = 0; crypto_shash_init(desc); ret = crypto_shash_update(desc, addr, len); if (!ret) crypto_shash_final(desc, (u8 *)&crc); if (crc != val) - ret_code = IRDMA_ERR_MPA_CRC; + ret_code = -EINVAL; return ret_code; } @@ -1524,9 +1530,8 @@ void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len, * @info: to get information * @buf: puda buffer */ -static enum irdma_status_code -irdma_gen1_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, - struct irdma_puda_buf *buf) +static int irdma_gen1_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, + struct irdma_puda_buf *buf) { struct iphdr *iph; struct ipv6hdr *ip6h; @@ -1563,7 +1568,7 @@ irdma_gen1_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, ibdev_dbg(to_ibdev(buf->vsi->dev), "ERR: payload_len = 0x%x totallen expected0x%x\n", info->payload_len, buf->totallen); - return IRDMA_ERR_INVALID_SIZE; + return -EINVAL; } buf->tcphlen = tcph->doff << 2; @@ -1580,9 +1585,8 @@ irdma_gen1_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, * @info: to get information * @buf: puda buffer */ -enum irdma_status_code -irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, - struct irdma_puda_buf *buf) +int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, + struct irdma_puda_buf *buf) { struct tcphdr *tcph; u32 pkt_len; @@ -1861,20 +1865,19 @@ static void irdma_process_cqp_stats(struct irdma_cqp_request *cqp_request) * @pestat: pointer to stats info * @wait: flag to wait or not wait for stats */ -enum irdma_status_code -irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev, - struct irdma_vsi_pestat *pestat, bool wait) +int irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev, + struct irdma_vsi_pestat *pestat, bool wait) { struct irdma_pci_f *rf = dev_to_rf(dev); struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, wait); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; memset(cqp_info, 0, sizeof(*cqp_info)); @@ -1900,22 +1903,21 @@ irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev, * @cmd: command to allocate or free * @stats_info: pointer to allocate stats info */ -enum irdma_status_code -irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi, u8 cmd, - struct irdma_stats_inst_info *stats_info) +int irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi, u8 cmd, + struct irdma_stats_inst_info *stats_info) { struct irdma_pci_f *rf = dev_to_rf(vsi->dev); struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; - enum irdma_status_code status; + int status; bool wait = false; if (cmd == IRDMA_OP_STATS_ALLOCATE) wait = true; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, wait); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; memset(cqp_info, 0, sizeof(*cqp_info)); @@ -1938,17 +1940,17 @@ irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi, u8 cmd, * @sc_ceq: pointer to ceq structure * @op: Create or Destroy */ -enum irdma_status_code irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev, - struct irdma_sc_ceq *sc_ceq, u8 op) +int irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_ceq *sc_ceq, + u8 op) { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_pci_f *rf = dev_to_rf(dev); - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; cqp_info->post_sq = 1; @@ -1968,17 +1970,17 @@ enum irdma_status_code irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev, * @sc_aeq: pointer to aeq structure * @op: Create or Destroy */ -enum irdma_status_code irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev, - struct irdma_sc_aeq *sc_aeq, u8 op) +int irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_aeq *sc_aeq, + u8 op) { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_pci_f *rf = dev_to_rf(dev); - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; cqp_info->post_sq = 1; @@ -1998,16 +2000,15 @@ enum irdma_status_code irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev, * @cmd: Add, modify or delete * @node_info: pointer to ws node info */ -enum irdma_status_code -irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd, - struct irdma_ws_node_info *node_info) +int irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd, + struct irdma_ws_node_info *node_info) { struct irdma_pci_f *rf = dev_to_rf(dev); struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_sc_cqp *cqp = &iwcqp->sc_cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; - enum irdma_status_code status; + int status; bool poll; if (!rf->sc_dev.ceq_valid) @@ -2017,7 +2018,7 @@ irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd, cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, !poll); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; memset(cqp_info, 0, sizeof(*cqp_info)); @@ -2066,7 +2067,7 @@ int irdma_ah_cqp_op(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, u8 cmd, { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; - enum irdma_status_code status; + int status; if (cmd != IRDMA_OP_AH_CREATE && cmd != IRDMA_OP_AH_DESTROY) return -EINVAL; @@ -2148,11 +2149,10 @@ static void irdma_ilq_ah_cb(struct irdma_cqp_request *cqp_request) * @ah_ret: Returned pointer to address handle if created * */ -enum irdma_status_code irdma_puda_create_ah(struct irdma_sc_dev *dev, - struct irdma_ah_info *ah_info, - bool wait, enum puda_rsrc_type type, - void *cb_param, - struct irdma_sc_ah **ah_ret) +int irdma_puda_create_ah(struct irdma_sc_dev *dev, + struct irdma_ah_info *ah_info, bool wait, + enum puda_rsrc_type type, void *cb_param, + struct irdma_sc_ah **ah_ret) { struct irdma_sc_ah *ah; struct irdma_pci_f *rf = dev_to_rf(dev); @@ -2161,7 +2161,7 @@ enum irdma_status_code irdma_puda_create_ah(struct irdma_sc_dev *dev, ah = kzalloc(sizeof(*ah), GFP_ATOMIC); *ah_ret = ah; if (!ah) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; err = irdma_alloc_rsrc(rf, rf->allocated_ahs, rf->max_ah, &ah_info->ah_idx, &rf->next_ah); @@ -2187,7 +2187,7 @@ error: err_free: kfree(ah); *ah_ret = NULL; - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } /** @@ -2229,19 +2229,19 @@ void irdma_gsi_ud_qp_ah_cb(struct irdma_cqp_request *cqp_request) * @pprm: pble resource manager * @pchunk: chunk of memory to add */ -enum irdma_status_code irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm, - struct irdma_chunk *pchunk) +int irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm, + struct irdma_chunk *pchunk) { u64 sizeofbitmap; if (pchunk->size & 0xfff) - return IRDMA_ERR_PARAM; + return -EINVAL; sizeofbitmap = (u64)pchunk->size >> pprm->pble_shift; pchunk->bitmapbuf = bitmap_zalloc(sizeofbitmap, GFP_KERNEL); if (!pchunk->bitmapbuf) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; pchunk->sizeofbitmap = sizeofbitmap; /* each pble is 8 bytes hence shift by 3 */ @@ -2259,10 +2259,9 @@ enum irdma_status_code irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm, * @vaddr: returns virtual address of pble memory * @fpm_addr: returns fpm address of pble memory */ -enum irdma_status_code -irdma_prm_get_pbles(struct irdma_pble_prm *pprm, - struct irdma_pble_chunkinfo *chunkinfo, u64 mem_size, - u64 **vaddr, u64 *fpm_addr) +int irdma_prm_get_pbles(struct irdma_pble_prm *pprm, + struct irdma_pble_chunkinfo *chunkinfo, u64 mem_size, + u64 **vaddr, u64 *fpm_addr) { u64 bits_needed; u64 bit_idx = PBLE_INVALID_IDX; @@ -2290,7 +2289,7 @@ irdma_prm_get_pbles(struct irdma_pble_prm *pprm, if (!pchunk || bit_idx >= pchunk->sizeofbitmap) { spin_unlock_irqrestore(&pprm->prm_lock, flags); - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } bitmap_set(pchunk->bitmapbuf, bit_idx, bits_needed); @@ -2325,8 +2324,8 @@ void irdma_prm_return_pbles(struct irdma_pble_prm *pprm, spin_unlock_irqrestore(&pprm->prm_lock, flags); } -enum irdma_status_code irdma_map_vm_page_list(struct irdma_hw *hw, void *va, - dma_addr_t *pg_dma, u32 pg_cnt) +int irdma_map_vm_page_list(struct irdma_hw *hw, void *va, dma_addr_t *pg_dma, + u32 pg_cnt) { struct page *vm_page; int i; @@ -2350,7 +2349,7 @@ enum irdma_status_code irdma_map_vm_page_list(struct irdma_hw *hw, void *va, err: irdma_unmap_vm_page_list(hw, pg_dma, i); - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } void irdma_unmap_vm_page_list(struct irdma_hw *hw, dma_addr_t *pg_dma, u32 pg_cnt) @@ -2386,15 +2385,14 @@ done: * @chunk: chunk to add for paged memory * @pg_cnt: number of pages needed */ -enum irdma_status_code irdma_pble_get_paged_mem(struct irdma_chunk *chunk, - u32 pg_cnt) +int irdma_pble_get_paged_mem(struct irdma_chunk *chunk, u32 pg_cnt) { u32 size; void *va; chunk->dmainfo.dmaaddrs = kzalloc(pg_cnt << 3, GFP_KERNEL); if (!chunk->dmainfo.dmaaddrs) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; size = PAGE_SIZE * pg_cnt; va = vmalloc(size); @@ -2416,7 +2414,7 @@ err: kfree(chunk->dmainfo.dmaaddrs); chunk->dmainfo.dmaaddrs = NULL; - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } /** @@ -2481,6 +2479,9 @@ void irdma_ib_qp_event(struct irdma_qp *iwqp, enum irdma_qp_event_type event) case IRDMA_QP_EVENT_ACCESS_ERR: ibevent.event = IB_EVENT_QP_ACCESS_ERR; break; + case IRDMA_QP_EVENT_REQ_ERR: + ibevent.event = IB_EVENT_QP_REQ_ERR; + break; } ibevent.device = iwqp->ibqp.device; ibevent.element.qp = &iwqp->ibqp; @@ -2501,3 +2502,150 @@ bool irdma_cq_empty(struct irdma_cq *iwcq) return polarity != ukcq->polarity; } + +void irdma_remove_cmpls_list(struct irdma_cq *iwcq) +{ + struct irdma_cmpl_gen *cmpl_node; + struct list_head *tmp_node, *list_node; + + list_for_each_safe (list_node, tmp_node, &iwcq->cmpl_generated) { + cmpl_node = list_entry(list_node, struct irdma_cmpl_gen, list); + list_del(&cmpl_node->list); + kfree(cmpl_node); + } +} + +int irdma_generated_cmpls(struct irdma_cq *iwcq, struct irdma_cq_poll_info *cq_poll_info) +{ + struct irdma_cmpl_gen *cmpl; + + if (list_empty(&iwcq->cmpl_generated)) + return -ENOENT; + cmpl = list_first_entry_or_null(&iwcq->cmpl_generated, struct irdma_cmpl_gen, list); + list_del(&cmpl->list); + memcpy(cq_poll_info, &cmpl->cpi, sizeof(*cq_poll_info)); + kfree(cmpl); + + ibdev_dbg(iwcq->ibcq.device, + "VERBS: %s: Poll artificially generated completion for QP 0x%X, op %u, wr_id=0x%llx\n", + __func__, cq_poll_info->qp_id, cq_poll_info->op_type, + cq_poll_info->wr_id); + + return 0; +} + +/** + * irdma_set_cpi_common_values - fill in values for polling info struct + * @cpi: resulting structure of cq_poll_info type + * @qp: QPair + * @qp_num: id of the QP + */ +static void irdma_set_cpi_common_values(struct irdma_cq_poll_info *cpi, + struct irdma_qp_uk *qp, u32 qp_num) +{ + cpi->comp_status = IRDMA_COMPL_STATUS_FLUSHED; + cpi->error = true; + cpi->major_err = IRDMA_FLUSH_MAJOR_ERR; + cpi->minor_err = FLUSH_GENERAL_ERR; + cpi->qp_handle = (irdma_qp_handle)(uintptr_t)qp; + cpi->qp_id = qp_num; +} + +static inline void irdma_comp_handler(struct irdma_cq *cq) +{ + if (!cq->ibcq.comp_handler) + return; + if (atomic_cmpxchg(&cq->armed, 1, 0)) + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); +} + +void irdma_generate_flush_completions(struct irdma_qp *iwqp) +{ + struct irdma_qp_uk *qp = &iwqp->sc_qp.qp_uk; + struct irdma_ring *sq_ring = &qp->sq_ring; + struct irdma_ring *rq_ring = &qp->rq_ring; + struct irdma_cmpl_gen *cmpl; + __le64 *sw_wqe; + u64 wqe_qword; + u32 wqe_idx; + bool compl_generated = false; + unsigned long flags1; + + spin_lock_irqsave(&iwqp->iwscq->lock, flags1); + if (irdma_cq_empty(iwqp->iwscq)) { + unsigned long flags2; + + spin_lock_irqsave(&iwqp->lock, flags2); + while (IRDMA_RING_MORE_WORK(*sq_ring)) { + cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC); + if (!cmpl) { + spin_unlock_irqrestore(&iwqp->lock, flags2); + spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1); + return; + } + + wqe_idx = sq_ring->tail; + irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id); + + cmpl->cpi.wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; + sw_wqe = qp->sq_base[wqe_idx].elem; + get_64bit_val(sw_wqe, 24, &wqe_qword); + cmpl->cpi.op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, IRDMAQPSQ_OPCODE); + /* remove the SQ WR by moving SQ tail*/ + IRDMA_RING_SET_TAIL(*sq_ring, + sq_ring->tail + qp->sq_wrtrk_array[sq_ring->tail].quanta); + + ibdev_dbg(iwqp->iwscq->ibcq.device, + "DEV: %s: adding wr_id = 0x%llx SQ Completion to list qp_id=%d\n", + __func__, cmpl->cpi.wr_id, qp->qp_id); + list_add_tail(&cmpl->list, &iwqp->iwscq->cmpl_generated); + compl_generated = true; + } + spin_unlock_irqrestore(&iwqp->lock, flags2); + spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1); + if (compl_generated) + irdma_comp_handler(iwqp->iwscq); + } else { + spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1); + mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush, + msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS)); + } + + spin_lock_irqsave(&iwqp->iwrcq->lock, flags1); + if (irdma_cq_empty(iwqp->iwrcq)) { + unsigned long flags2; + + spin_lock_irqsave(&iwqp->lock, flags2); + while (IRDMA_RING_MORE_WORK(*rq_ring)) { + cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC); + if (!cmpl) { + spin_unlock_irqrestore(&iwqp->lock, flags2); + spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1); + return; + } + + wqe_idx = rq_ring->tail; + irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id); + + cmpl->cpi.wr_id = qp->rq_wrid_array[wqe_idx]; + cmpl->cpi.op_type = IRDMA_OP_TYPE_REC; + /* remove the RQ WR by moving RQ tail */ + IRDMA_RING_SET_TAIL(*rq_ring, rq_ring->tail + 1); + ibdev_dbg(iwqp->iwrcq->ibcq.device, + "DEV: %s: adding wr_id = 0x%llx RQ Completion to list qp_id=%d, wqe_idx=%d\n", + __func__, cmpl->cpi.wr_id, qp->qp_id, + wqe_idx); + list_add_tail(&cmpl->list, &iwqp->iwrcq->cmpl_generated); + + compl_generated = true; + } + spin_unlock_irqrestore(&iwqp->lock, flags2); + spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1); + if (compl_generated) + irdma_comp_handler(iwqp->iwrcq); + } else { + spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1); + mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush, + msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS)); + } +} diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 8cd5f9261692..a22afbb25bc5 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -21,30 +21,36 @@ static int irdma_query_device(struct ib_device *ibdev, return -EINVAL; memset(props, 0, sizeof(*props)); - ether_addr_copy((u8 *)&props->sys_image_guid, iwdev->netdev->dev_addr); + addrconf_addr_eui48((u8 *)&props->sys_image_guid, + iwdev->netdev->dev_addr); props->fw_ver = (u64)irdma_fw_major_ver(&rf->sc_dev) << 32 | irdma_fw_minor_ver(&rf->sc_dev); - props->device_cap_flags = iwdev->device_cap_flags; + props->device_cap_flags = IB_DEVICE_MEM_WINDOW | + IB_DEVICE_MEM_MGT_EXTENSIONS; + props->kernel_cap_flags = IBK_LOCAL_DMA_LKEY; props->vendor_id = pcidev->vendor; props->vendor_part_id = pcidev->device; props->hw_ver = rf->pcidev->revision; - props->page_size_cap = SZ_4K | SZ_2M | SZ_1G; + props->page_size_cap = hw_attrs->page_size_cap; props->max_mr_size = hw_attrs->max_mr_size; props->max_qp = rf->max_qp - rf->used_qps; props->max_qp_wr = hw_attrs->max_qp_wr; props->max_send_sge = hw_attrs->uk_attrs.max_hw_wq_frags; props->max_recv_sge = hw_attrs->uk_attrs.max_hw_wq_frags; props->max_cq = rf->max_cq - rf->used_cqs; - props->max_cqe = rf->max_cqe; + props->max_cqe = rf->max_cqe - 1; props->max_mr = rf->max_mr - rf->used_mrs; props->max_mw = props->max_mr; props->max_pd = rf->max_pd - rf->used_pds; props->max_sge_rd = hw_attrs->uk_attrs.max_hw_read_sges; props->max_qp_rd_atom = hw_attrs->max_hw_ird; props->max_qp_init_rd_atom = hw_attrs->max_hw_ord; - if (rdma_protocol_roce(ibdev, 1)) + if (rdma_protocol_roce(ibdev, 1)) { + props->device_cap_flags |= IB_DEVICE_RC_RNR_NAK_GEN; props->max_pkeys = IRDMA_PKEY_TBL_SZ; + } + props->max_ah = rf->max_ah; props->max_mcast_grp = rf->max_mcg; props->max_mcast_qp_attach = IRDMA_MAX_MGS_PER_CTX; @@ -255,7 +261,7 @@ static void irdma_alloc_push_page(struct irdma_qp *iwqp) struct cqp_cmds_info *cqp_info; struct irdma_device *iwdev = iwqp->iwdev; struct irdma_sc_qp *qp = &iwqp->sc_qp; - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true); if (!cqp_request) @@ -293,13 +299,19 @@ static void irdma_alloc_push_page(struct irdma_qp *iwqp) static int irdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { +#define IRDMA_ALLOC_UCTX_MIN_REQ_LEN offsetofend(struct irdma_alloc_ucontext_req, rsvd8) +#define IRDMA_ALLOC_UCTX_MIN_RESP_LEN offsetofend(struct irdma_alloc_ucontext_resp, rsvd) struct ib_device *ibdev = uctx->device; struct irdma_device *iwdev = to_iwdev(ibdev); - struct irdma_alloc_ucontext_req req; + struct irdma_alloc_ucontext_req req = {}; struct irdma_alloc_ucontext_resp uresp = {}; struct irdma_ucontext *ucontext = to_ucontext(uctx); struct irdma_uk_attrs *uk_attrs; + if (udata->inlen < IRDMA_ALLOC_UCTX_MIN_REQ_LEN || + udata->outlen < IRDMA_ALLOC_UCTX_MIN_RESP_LEN) + return -EINVAL; + if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) return -EINVAL; @@ -311,7 +323,7 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx, uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs; /* GEN_1 legacy support with libi40iw */ - if (udata->outlen < sizeof(uresp)) { + if (udata->outlen == IRDMA_ALLOC_UCTX_MIN_RESP_LEN) { if (uk_attrs->hw_rev != IRDMA_GEN_1) return -EOPNOTSUPP; @@ -383,6 +395,7 @@ static void irdma_dealloc_ucontext(struct ib_ucontext *context) */ static int irdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata) { +#define IRDMA_ALLOC_PD_MIN_RESP_LEN offsetofend(struct irdma_alloc_pd_resp, rsvd) struct irdma_pd *iwpd = to_iwpd(pd); struct irdma_device *iwdev = to_iwdev(pd->device); struct irdma_sc_dev *dev = &iwdev->rf->sc_dev; @@ -392,6 +405,9 @@ static int irdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata) u32 pd_id = 0; int err; + if (udata && udata->outlen < IRDMA_ALLOC_PD_MIN_RESP_LEN) + return -EINVAL; + err = irdma_alloc_rsrc(rf, rf->allocated_pds, rf->max_pd, &pd_id, &rf->next_pd); if (err) @@ -532,6 +548,9 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) if (iwqp->iwarp_state == IRDMA_QP_STATE_RTS) irdma_modify_qp_to_err(&iwqp->sc_qp); + if (!iwqp->user_mode) + cancel_delayed_work_sync(&iwqp->dwork_flush); + irdma_qp_rem_ref(&iwqp->ibqp); wait_for_completion(&iwqp->free_qp); irdma_free_lsmm_rsrc(iwqp); @@ -591,7 +610,7 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, u32 sqdepth, rqdepth; u8 sqshift, rqshift; u32 size; - enum irdma_status_code status; + int status; struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info; struct irdma_uk_attrs *uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs; @@ -602,7 +621,7 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, status = irdma_get_sqdepth(uk_attrs, ukinfo->sq_size, sqshift, &sqdepth); if (status) - return -ENOMEM; + return status; if (uk_attrs->hw_rev == IRDMA_GEN_1) rqshift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; @@ -613,7 +632,7 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, status = irdma_get_rqdepth(uk_attrs, ukinfo->rq_size, rqshift, &rqdepth); if (status) - return -ENOMEM; + return status; iwqp->kqp.sq_wrid_mem = kcalloc(sqdepth, sizeof(*iwqp->kqp.sq_wrid_mem), GFP_KERNEL); @@ -667,7 +686,7 @@ static int irdma_cqp_create_qp_cmd(struct irdma_qp *iwqp) struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_create_qp_info *qp_info; - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!cqp_request) @@ -687,7 +706,7 @@ static int irdma_cqp_create_qp_cmd(struct irdma_qp *iwqp) status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); - return status ? -ENOMEM : 0; + return status; } static void irdma_roce_fill_and_set_qpctx_info(struct irdma_qp *iwqp, @@ -787,6 +806,14 @@ static int irdma_validate_qp_attrs(struct ib_qp_init_attr *init_attr, return 0; } +static void irdma_flush_worker(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct irdma_qp *iwqp = container_of(dwork, struct irdma_qp, dwork_flush); + + irdma_generate_flush_completions(iwqp); +} + /** * irdma_create_qp - create qp * @ibqp: ptr of qp @@ -797,15 +824,16 @@ static int irdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { +#define IRDMA_CREATE_QP_MIN_REQ_LEN offsetofend(struct irdma_create_qp_req, user_compl_ctx) +#define IRDMA_CREATE_QP_MIN_RESP_LEN offsetofend(struct irdma_create_qp_resp, rsvd) struct ib_pd *ibpd = ibqp->pd; struct irdma_pd *iwpd = to_iwpd(ibpd); struct irdma_device *iwdev = to_iwdev(ibpd->device); struct irdma_pci_f *rf = iwdev->rf; struct irdma_qp *iwqp = to_iwqp(ibqp); - struct irdma_create_qp_req req; + struct irdma_create_qp_req req = {}; struct irdma_create_qp_resp uresp = {}; u32 qp_num = 0; - enum irdma_status_code ret; int err_code; int sq_size; int rq_size; @@ -820,6 +848,10 @@ static int irdma_create_qp(struct ib_qp *ibqp, if (err_code) return err_code; + if (udata && (udata->inlen < IRDMA_CREATE_QP_MIN_REQ_LEN || + udata->outlen < IRDMA_CREATE_QP_MIN_RESP_LEN)) + return -EINVAL; + sq_size = init_attr->cap.max_send_wr; rq_size = init_attr->cap.max_recv_wr; @@ -907,6 +939,7 @@ static int irdma_create_qp(struct ib_qp *ibqp, init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver; irdma_setup_virt_qp(iwdev, iwqp, &init_info); } else { + INIT_DELAYED_WORK(&iwqp->dwork_flush, irdma_flush_worker); init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER; err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info, init_attr); } @@ -935,9 +968,8 @@ static int irdma_create_qp(struct ib_qp *ibqp, if (dev->hw_attrs.uk_attrs.hw_rev > IRDMA_GEN_1) init_info.qp_uk_init_info.qp_caps |= IRDMA_PUSH_MODE; - ret = irdma_sc_qp_init(qp, &init_info); - if (ret) { - err_code = -EPROTO; + err_code = irdma_sc_qp_init(qp, &init_info); + if (err_code) { ibdev_dbg(&iwdev->ibdev, "VERBS: qp_init fail\n"); goto error; } @@ -1104,6 +1136,8 @@ static int irdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { +#define IRDMA_MODIFY_QP_MIN_REQ_LEN offsetofend(struct irdma_modify_qp_req, rq_flush) +#define IRDMA_MODIFY_QP_MIN_RESP_LEN offsetofend(struct irdma_modify_qp_resp, push_valid) struct irdma_pd *iwpd = to_iwpd(ibqp->pd); struct irdma_qp *iwqp = to_iwqp(ibqp); struct irdma_device *iwdev = iwqp->iwdev; @@ -1122,6 +1156,13 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, roce_info = &iwqp->roce_info; udp_info = &iwqp->udp_info; + if (udata) { + /* udata inlen/outlen can be 0 when supporting legacy libi40iw */ + if ((udata->inlen && udata->inlen < IRDMA_MODIFY_QP_MIN_REQ_LEN) || + (udata->outlen && udata->outlen < IRDMA_MODIFY_QP_MIN_RESP_LEN)) + return -EINVAL; + } + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) return -EOPNOTSUPP; @@ -1170,6 +1211,10 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, udp_info->ttl = attr->ah_attr.grh.hop_limit; udp_info->flow_label = attr->ah_attr.grh.flow_label; udp_info->tos = attr->ah_attr.grh.traffic_class; + udp_info->src_port = + rdma_get_udp_sport(udp_info->flow_label, + ibqp->qp_num, + roce_info->dest_qp); irdma_qp_rem_qos(&iwqp->sc_qp); dev->ws_remove(iwqp->sc_qp.vsi, ctx_info->user_pri); ctx_info->user_pri = rt_tos2priority(udp_info->tos); @@ -1184,7 +1229,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (ret) return ret; - if (vlan_id >= VLAN_N_VID && iwdev->dcb) + if (vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode) vlan_id = 0; if (vlan_id < VLAN_N_VID) { udp_info->insert_vlan_tag = true; @@ -1197,7 +1242,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, av->attrs = attr->ah_attr; rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid_attr->gid); rdma_gid2ip((struct sockaddr *)&av->dgid_addr, &attr->ah_attr.grh.dgid); - if (av->sgid_addr.saddr.sa_family == AF_INET6) { + if (av->net_type == RDMA_NETWORK_IPV6) { __be32 *daddr = av->dgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32; __be32 *saddr = @@ -1213,7 +1258,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, &local_ip[0], false, NULL, IRDMA_ARP_RESOLVE); - } else { + } else if (av->net_type == RDMA_NETWORK_IPV4) { __be32 saddr = av->sgid_addr.saddr_in.sin_addr.s_addr; __be32 daddr = av->dgid_addr.saddr_in.sin_addr.s_addr; @@ -1354,7 +1399,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) { spin_unlock_irqrestore(&iwqp->lock, flags); - if (udata) { + if (udata && udata->inlen) { if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen))) return -EINVAL; @@ -1395,18 +1440,18 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, } if (iwqp->ibqp_state > IB_QPS_RTS && !iwqp->flush_issued) { - iwqp->flush_issued = 1; spin_unlock_irqrestore(&iwqp->lock, flags); irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ | IRDMA_FLUSH_RQ | IRDMA_FLUSH_WAIT); + iwqp->flush_issued = 1; } else { spin_unlock_irqrestore(&iwqp->lock, flags); } } else { iwqp->ibqp_state = attr->qp_state; } - if (udata && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { + if (udata && udata->outlen && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { struct irdma_ucontext *ucontext; ucontext = rdma_udata_to_drv_context(udata, @@ -1446,6 +1491,8 @@ exit: int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { +#define IRDMA_MODIFY_QP_MIN_REQ_LEN offsetofend(struct irdma_modify_qp_req, rq_flush) +#define IRDMA_MODIFY_QP_MIN_RESP_LEN offsetofend(struct irdma_modify_qp_resp, push_valid) struct irdma_qp *iwqp = to_iwqp(ibqp); struct irdma_device *iwdev = iwqp->iwdev; struct irdma_sc_dev *dev = &iwdev->rf->sc_dev; @@ -1460,6 +1507,13 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, int err; unsigned long flags; + if (udata) { + /* udata inlen/outlen can be 0 when supporting legacy libi40iw */ + if ((udata->inlen && udata->inlen < IRDMA_MODIFY_QP_MIN_REQ_LEN) || + (udata->outlen && udata->outlen < IRDMA_MODIFY_QP_MIN_RESP_LEN)) + return -EINVAL; + } + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) return -EOPNOTSUPP; @@ -1545,7 +1599,7 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, case IB_QPS_RESET: if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) { spin_unlock_irqrestore(&iwqp->lock, flags); - if (udata) { + if (udata && udata->inlen) { if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen))) return -EINVAL; @@ -1615,13 +1669,13 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, if (issue_modify_qp && iwqp->ibqp_state > IB_QPS_RTS) { if (dont_wait) { - if (iwqp->cm_id && iwqp->hw_tcp_state) { + if (iwqp->hw_tcp_state) { spin_lock_irqsave(&iwqp->lock, flags); iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSED; iwqp->last_aeq = IRDMA_AE_RESET_SENT; spin_unlock_irqrestore(&iwqp->lock, flags); - irdma_cm_disconn(iwqp); } + irdma_cm_disconn(iwqp); } else { int close_timer_started; @@ -1642,7 +1696,7 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, } } } - if (attr_mask & IB_QP_STATE && udata && + if (attr_mask & IB_QP_STATE && udata && udata->outlen && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { struct irdma_ucontext *ucontext; @@ -1752,16 +1806,18 @@ static int irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) unsigned long flags; spin_lock_irqsave(&iwcq->lock, flags); + if (!list_empty(&iwcq->cmpl_generated)) + irdma_remove_cmpls_list(iwcq); if (!list_empty(&iwcq->resize_list)) irdma_process_resize_list(iwcq, iwdev, NULL); spin_unlock_irqrestore(&iwcq->lock, flags); irdma_cq_wq_destroy(iwdev->rf, cq); - irdma_cq_free_rsrc(iwdev->rf, iwcq); spin_lock_irqsave(&iwceq->ce_lock, flags); irdma_sc_cleanup_ceqes(cq, ceq); spin_unlock_irqrestore(&iwceq->ce_lock, flags); + irdma_cq_free_rsrc(iwdev->rf, iwcq); return 0; } @@ -1775,6 +1831,7 @@ static int irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) static int irdma_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) { +#define IRDMA_RESIZE_CQ_MIN_REQ_LEN offsetofend(struct irdma_resize_cq_req, user_cq_buffer) struct irdma_cq *iwcq = to_iwcq(ibcq); struct irdma_sc_dev *dev = iwcq->sc_cq.dev; struct irdma_cqp_request *cqp_request; @@ -1787,7 +1844,6 @@ static int irdma_resize_cq(struct ib_cq *ibcq, int entries, struct irdma_device *iwdev; struct irdma_pci_f *rf; struct irdma_cq_buf *cq_buf = NULL; - enum irdma_status_code status = 0; unsigned long flags; int ret; @@ -1798,6 +1854,9 @@ static int irdma_resize_cq(struct ib_cq *ibcq, int entries, IRDMA_FEATURE_CQ_RESIZE)) return -EOPNOTSUPP; + if (udata && udata->inlen < IRDMA_RESIZE_CQ_MIN_REQ_LEN) + return -EINVAL; + if (entries > rf->max_cqe) return -EINVAL; @@ -1880,12 +1939,10 @@ static int irdma_resize_cq(struct ib_cq *ibcq, int entries, cqp_info->in.u.cq_modify.cq = &iwcq->sc_cq; cqp_info->in.u.cq_modify.scratch = (uintptr_t)cqp_request; cqp_info->post_sq = 1; - status = irdma_handle_cqp_op(rf, cqp_request); + ret = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); - if (status) { - ret = -EPROTO; + if (ret) goto error; - } spin_lock_irqsave(&iwcq->lock, flags); if (cq_buf) { @@ -1932,6 +1989,8 @@ static int irdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata) { +#define IRDMA_CREATE_CQ_MIN_REQ_LEN offsetofend(struct irdma_create_cq_req, user_cq_buf) +#define IRDMA_CREATE_CQ_MIN_RESP_LEN offsetofend(struct irdma_create_cq_resp, cq_size) struct ib_device *ibdev = ibcq->device; struct irdma_device *iwdev = to_iwdev(ibdev); struct irdma_pci_f *rf = iwdev->rf; @@ -1940,7 +1999,6 @@ static int irdma_create_cq(struct ib_cq *ibcq, struct irdma_sc_cq *cq; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_cq_init_info info = {}; - enum irdma_status_code status; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_cq_uk_init_info *ukinfo = &info.cq_uk_init_info; @@ -1951,6 +2009,11 @@ static int irdma_create_cq(struct ib_cq *ibcq, err_code = cq_validate_flags(attr->flags, dev->hw_attrs.uk_attrs.hw_rev); if (err_code) return err_code; + + if (udata && (udata->inlen < IRDMA_CREATE_CQ_MIN_REQ_LEN || + udata->outlen < IRDMA_CREATE_CQ_MIN_RESP_LEN)) + return -EINVAL; + err_code = irdma_alloc_rsrc(rf, rf->allocated_cqs, rf->max_cq, &cq_num, &rf->next_cq); if (err_code) @@ -1960,6 +2023,7 @@ static int irdma_create_cq(struct ib_cq *ibcq, cq->back_cq = iwcq; spin_lock_init(&iwcq->lock); INIT_LIST_HEAD(&iwcq->resize_list); + INIT_LIST_HEAD(&iwcq->cmpl_generated); info.dev = dev; ukinfo->cq_size = max(entries, 4); ukinfo->cq_id = cq_num; @@ -2090,12 +2154,10 @@ static int irdma_create_cq(struct ib_cq *ibcq, cqp_info->in.u.cq_create.cq = cq; cqp_info->in.u.cq_create.check_overflow = true; cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request; - status = irdma_handle_cqp_op(rf, cqp_request); + err_code = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); - if (status) { - err_code = -ENOMEM; + if (err_code) goto cq_free_rsrc; - } if (udata) { struct irdma_create_cq_resp resp = {}; @@ -2304,14 +2366,14 @@ static int irdma_setup_pbles(struct irdma_pci_f *rf, struct irdma_mr *iwmr, struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc; struct irdma_pble_info *pinfo; u64 *pbl; - enum irdma_status_code status; + int status; enum irdma_pble_level level = PBLE_LEVEL_1; if (use_pbles) { status = irdma_get_pble(rf->pble_rsrc, palloc, iwmr->page_cnt, false); if (status) - return -ENOMEM; + return status; iwpbl->pbl_allocated = true; level = palloc->level; @@ -2429,7 +2491,7 @@ static int irdma_hw_alloc_mw(struct irdma_device *iwdev, struct irdma_mr *iwmr) struct irdma_pd *iwpd = to_iwpd(iwmr->ibmr.pd); struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true); if (!cqp_request) @@ -2452,7 +2514,7 @@ static int irdma_hw_alloc_mw(struct irdma_device *iwdev, struct irdma_mr *iwmr) status = irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); - return status ? -ENOMEM : 0; + return status; } /** @@ -2504,7 +2566,7 @@ static int irdma_dealloc_mw(struct ib_mw *ibmw) cqp_info = &cqp_request->info; info = &cqp_info->in.u.dealloc_stag.info; memset(info, 0, sizeof(*info)); - info->pd_id = iwpd->sc_pd.pd_id & 0x00007fff; + info->pd_id = iwpd->sc_pd.pd_id; info->stag_idx = ibmw->rkey >> IRDMA_CQPSQ_STAG_IDX_S; info->mr = false; cqp_info->cqp_cmd = IRDMA_OP_DEALLOC_STAG; @@ -2528,8 +2590,7 @@ static int irdma_hw_alloc_stag(struct irdma_device *iwdev, { struct irdma_allocate_stag_info *info; struct irdma_pd *iwpd = to_iwpd(iwmr->ibmr.pd); - enum irdma_status_code status; - int err = 0; + int status; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; @@ -2551,10 +2612,8 @@ static int irdma_hw_alloc_stag(struct irdma_device *iwdev, cqp_info->in.u.alloc_stag.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); - if (status) - err = -ENOMEM; - return err; + return status; } /** @@ -2570,9 +2629,8 @@ static struct ib_mr *irdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, struct irdma_pble_alloc *palloc; struct irdma_pbl *iwpbl; struct irdma_mr *iwmr; - enum irdma_status_code status; u32 stag; - int err_code = -ENOMEM; + int err_code; iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL); if (!iwmr) @@ -2594,9 +2652,9 @@ static struct ib_mr *irdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, iwmr->type = IRDMA_MEMREG_TYPE_MEM; palloc = &iwpbl->pble_alloc; iwmr->page_cnt = max_num_sg; - status = irdma_get_pble(iwdev->rf->pble_rsrc, palloc, iwmr->page_cnt, - true); - if (status) + err_code = irdma_get_pble(iwdev->rf->pble_rsrc, palloc, iwmr->page_cnt, + false); + if (err_code) goto err_get_pble; err_code = irdma_hw_alloc_stag(iwdev, iwmr); @@ -2631,8 +2689,16 @@ static int irdma_set_page(struct ib_mr *ibmr, u64 addr) if (unlikely(iwmr->npages == iwmr->page_cnt)) return -ENOMEM; - pbl = palloc->level1.addr; - pbl[iwmr->npages++] = addr; + if (palloc->level == PBLE_LEVEL_2) { + struct irdma_pble_info *palloc_info = + palloc->level2.leaf + (iwmr->npages >> PBLE_512_SHIFT); + + palloc_info->addr[iwmr->npages & (PBLE_PER_PAGE - 1)] = addr; + } else { + pbl = palloc->level1.addr; + pbl[iwmr->npages] = addr; + } + iwmr->npages++; return 0; } @@ -2667,10 +2733,9 @@ static int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr, struct irdma_reg_ns_stag_info *stag_info; struct irdma_pd *iwpd = to_iwpd(iwmr->ibmr.pd); struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc; - enum irdma_status_code status; - int err = 0; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; + int ret; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true); if (!cqp_request) @@ -2707,12 +2772,10 @@ static int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr, cqp_info->post_sq = 1; cqp_info->in.u.mr_reg_non_shared.dev = &iwdev->rf->sc_dev; cqp_info->in.u.mr_reg_non_shared.scratch = (uintptr_t)cqp_request; - status = irdma_handle_cqp_op(iwdev->rf, cqp_request); + ret = irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); - if (status) - err = -ENOMEM; - return err; + return ret; } /** @@ -2728,6 +2791,7 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, u64 virt, int access, struct ib_udata *udata) { +#define IRDMA_MEM_REG_MIN_REQ_LEN offsetofend(struct irdma_mem_reg_req, sq_pages) struct irdma_device *iwdev = to_iwdev(pd->device); struct irdma_ucontext *ucontext; struct irdma_pble_alloc *palloc; @@ -2745,6 +2809,9 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size) return ERR_PTR(-EINVAL); + if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN) + return ERR_PTR(-EINVAL); + region = ib_umem_get(pd->device, start, len, access); if (IS_ERR(region)) { @@ -2774,7 +2841,7 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) { iwmr->page_size = ib_umem_find_best_pgsz(region, - SZ_4K | SZ_2M | SZ_1G, + iwdev->rf->sc_dev.hw_attrs.page_size_cap, virt); if (unlikely(!iwmr->page_size)) { kfree(iwmr); @@ -2892,7 +2959,6 @@ struct ib_mr *irdma_reg_phys_mr(struct ib_pd *pd, u64 addr, u64 size, int access struct irdma_device *iwdev = to_iwdev(pd->device); struct irdma_pbl *iwpbl; struct irdma_mr *iwmr; - enum irdma_status_code status; u32 stag; int ret; @@ -2920,10 +2986,9 @@ struct ib_mr *irdma_reg_phys_mr(struct ib_pd *pd, u64 addr, u64 size, int access iwmr->pgaddrmem[0] = addr; iwmr->len = size; iwmr->page_size = SZ_4K; - status = irdma_hwreg_mr(iwdev, iwmr, access); - if (status) { + ret = irdma_hwreg_mr(iwdev, iwmr, access); + if (ret) { irdma_free_stag(iwdev, stag); - ret = -ENOMEM; goto err; } @@ -2996,6 +3061,7 @@ static int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; + int status; if (iwmr->type != IRDMA_MEMREG_TYPE_MEM) { if (iwmr->region) { @@ -3016,7 +3082,7 @@ static int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) cqp_info = &cqp_request->info; info = &cqp_info->in.u.dealloc_stag.info; memset(info, 0, sizeof(*info)); - info->pd_id = iwpd->sc_pd.pd_id & 0x00007fff; + info->pd_id = iwpd->sc_pd.pd_id; info->stag_idx = ib_mr->rkey >> IRDMA_CQPSQ_STAG_IDX_S; info->mr = true; if (iwpbl->pbl_allocated) @@ -3026,8 +3092,11 @@ static int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) cqp_info->post_sq = 1; cqp_info->in.u.dealloc_stag.dev = &iwdev->rf->sc_dev; cqp_info->in.u.dealloc_stag.scratch = (uintptr_t)cqp_request; - irdma_handle_cqp_op(iwdev->rf, cqp_request); + status = irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); + if (status) + return status; + irdma_free_stag(iwdev, iwmr->stag); done: if (iwpbl->pbl_allocated) @@ -3052,20 +3121,16 @@ static int irdma_post_send(struct ib_qp *ibqp, struct irdma_qp_uk *ukqp; struct irdma_sc_dev *dev; struct irdma_post_sq_info info; - enum irdma_status_code ret; int err = 0; unsigned long flags; bool inv_stag; struct irdma_ah *ah; - bool reflush = false; iwqp = to_iwqp(ibqp); ukqp = &iwqp->sc_qp.qp_uk; dev = &iwqp->iwdev->rf->sc_dev; spin_lock_irqsave(&iwqp->lock, flags); - if (iwqp->flush_issued && ukqp->sq_flush_complete) - reflush = true; while (ib_wr) { memset(&info, 0, sizeof(info)); inv_stag = false; @@ -3111,7 +3176,7 @@ static int irdma_post_send(struct ib_qp *ibqp, info.op.inline_send.qkey = ud_wr(ib_wr)->remote_qkey; info.op.inline_send.dest_qp = ud_wr(ib_wr)->remote_qpn; } - ret = irdma_uk_inline_send(ukqp, &info, false); + err = irdma_uk_inline_send(ukqp, &info, false); } else { info.op.send.num_sges = ib_wr->num_sge; info.op.send.sg_list = ib_wr->sg_list; @@ -3122,14 +3187,7 @@ static int irdma_post_send(struct ib_qp *ibqp, info.op.send.qkey = ud_wr(ib_wr)->remote_qkey; info.op.send.dest_qp = ud_wr(ib_wr)->remote_qpn; } - ret = irdma_uk_send(ukqp, &info, false); - } - - if (ret) { - if (ret == IRDMA_ERR_QP_TOOMANY_WRS_POSTED) - err = -ENOMEM; - else - err = -EINVAL; + err = irdma_uk_send(ukqp, &info, false); } break; case IB_WR_RDMA_WRITE_WITH_IMM: @@ -3155,20 +3213,13 @@ static int irdma_post_send(struct ib_qp *ibqp, rdma_wr(ib_wr)->remote_addr; info.op.inline_rdma_write.rem_addr.lkey = rdma_wr(ib_wr)->rkey; - ret = irdma_uk_inline_rdma_write(ukqp, &info, false); + err = irdma_uk_inline_rdma_write(ukqp, &info, false); } else { info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list; info.op.rdma_write.num_lo_sges = ib_wr->num_sge; info.op.rdma_write.rem_addr.addr = rdma_wr(ib_wr)->remote_addr; info.op.rdma_write.rem_addr.lkey = rdma_wr(ib_wr)->rkey; - ret = irdma_uk_rdma_write(ukqp, &info, false); - } - - if (ret) { - if (ret == IRDMA_ERR_QP_TOOMANY_WRS_POSTED) - err = -ENOMEM; - else - err = -EINVAL; + err = irdma_uk_rdma_write(ukqp, &info, false); } break; case IB_WR_RDMA_READ_WITH_INV: @@ -3185,21 +3236,12 @@ static int irdma_post_send(struct ib_qp *ibqp, info.op.rdma_read.rem_addr.lkey = rdma_wr(ib_wr)->rkey; info.op.rdma_read.lo_sg_list = (void *)ib_wr->sg_list; info.op.rdma_read.num_lo_sges = ib_wr->num_sge; - - ret = irdma_uk_rdma_read(ukqp, &info, inv_stag, false); - if (ret) { - if (ret == IRDMA_ERR_QP_TOOMANY_WRS_POSTED) - err = -ENOMEM; - else - err = -EINVAL; - } + err = irdma_uk_rdma_read(ukqp, &info, inv_stag, false); break; case IB_WR_LOCAL_INV: info.op_type = IRDMA_OP_TYPE_INV_STAG; info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey; - ret = irdma_uk_stag_local_invalidate(ukqp, &info, true); - if (ret) - err = -ENOMEM; + err = irdma_uk_stag_local_invalidate(ukqp, &info, true); break; case IB_WR_REG_MR: { struct irdma_mr *iwmr = to_iwmr(reg_wr(ib_wr)->mr); @@ -3221,10 +3263,8 @@ static int irdma_post_send(struct ib_qp *ibqp, stag_info.local_fence = ib_wr->send_flags & IB_SEND_FENCE; if (iwmr->npages > IRDMA_MIN_PAGES_PER_FMR) stag_info.chunk_size = 1; - ret = irdma_sc_mr_fast_register(&iwqp->sc_qp, &stag_info, + err = irdma_sc_mr_fast_register(&iwqp->sc_qp, &stag_info, true); - if (ret) - err = -ENOMEM; break; } default: @@ -3240,15 +3280,14 @@ static int irdma_post_send(struct ib_qp *ibqp, ib_wr = ib_wr->next; } - if (!iwqp->flush_issued && iwqp->hw_iwarp_state <= IRDMA_QP_STATE_RTS) { - irdma_uk_qp_post_wr(ukqp); - spin_unlock_irqrestore(&iwqp->lock, flags); - } else if (reflush) { - ukqp->sq_flush_complete = false; + if (!iwqp->flush_issued) { + if (iwqp->hw_iwarp_state <= IRDMA_QP_STATE_RTS) + irdma_uk_qp_post_wr(ukqp); spin_unlock_irqrestore(&iwqp->lock, flags); - irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ | IRDMA_REFLUSH); } else { spin_unlock_irqrestore(&iwqp->lock, flags); + mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush, + msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS)); } if (err) *bad_wr = ib_wr; @@ -3269,29 +3308,21 @@ static int irdma_post_recv(struct ib_qp *ibqp, struct irdma_qp *iwqp; struct irdma_qp_uk *ukqp; struct irdma_post_rq_info post_recv = {}; - enum irdma_status_code ret = 0; unsigned long flags; int err = 0; - bool reflush = false; iwqp = to_iwqp(ibqp); ukqp = &iwqp->sc_qp.qp_uk; spin_lock_irqsave(&iwqp->lock, flags); - if (iwqp->flush_issued && ukqp->rq_flush_complete) - reflush = true; while (ib_wr) { post_recv.num_sges = ib_wr->num_sge; post_recv.wr_id = ib_wr->wr_id; post_recv.sg_list = ib_wr->sg_list; - ret = irdma_uk_post_receive(ukqp, &post_recv); - if (ret) { + err = irdma_uk_post_receive(ukqp, &post_recv); + if (err) { ibdev_dbg(&iwqp->iwdev->ibdev, - "VERBS: post_recv err %d\n", ret); - if (ret == IRDMA_ERR_QP_TOOMANY_WRS_POSTED) - err = -ENOMEM; - else - err = -EINVAL; + "VERBS: post_recv err %d\n", err); goto out; } @@ -3299,13 +3330,10 @@ static int irdma_post_recv(struct ib_qp *ibqp, } out: - if (reflush) { - ukqp->rq_flush_complete = false; - spin_unlock_irqrestore(&iwqp->lock, flags); - irdma_flush_wqes(iwqp, IRDMA_FLUSH_RQ | IRDMA_REFLUSH); - } else { - spin_unlock_irqrestore(&iwqp->lock, flags); - } + spin_unlock_irqrestore(&iwqp->lock, flags); + if (iwqp->flush_issued) + mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush, + msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS)); if (err) *bad_wr = ib_wr; @@ -3336,6 +3364,8 @@ static enum ib_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode return IB_WC_RETRY_EXC_ERR; case FLUSH_MW_BIND_ERR: return IB_WC_MW_BIND_ERR; + case FLUSH_REM_INV_REQ_ERR: + return IB_WC_REM_INV_REQ_ERR; case FLUSH_FATAL_ERR: default: return IB_WC_FATAL_ERR; @@ -3478,7 +3508,7 @@ static int __irdma_poll_cq(struct irdma_cq *iwcq, int num_entries, struct ib_wc struct irdma_cq_buf *last_buf = NULL; struct irdma_cq_poll_info *cur_cqe = &iwcq->cur_cqe; struct irdma_cq_buf *cq_buf; - enum irdma_status_code ret; + int ret; struct irdma_device *iwdev; struct irdma_cq_uk *ukcq; bool cq_new_cqe = false; @@ -3498,10 +3528,10 @@ static int __irdma_poll_cq(struct irdma_cq *iwcq, int num_entries, struct ib_wc cq_new_cqe = true; continue; } - if (ret == IRDMA_ERR_Q_EMPTY) + if (ret == -ENOENT) break; /* QP using the CQ is destroyed. Skip reporting this CQE */ - if (ret == IRDMA_ERR_Q_DESTROYED) { + if (ret == -EFAULT) { cq_new_cqe = true; continue; } @@ -3517,16 +3547,21 @@ static int __irdma_poll_cq(struct irdma_cq *iwcq, int num_entries, struct ib_wc /* check the current CQ for new cqes */ while (npolled < num_entries) { ret = irdma_poll_one(ukcq, cur_cqe, entry + npolled); + if (ret == -ENOENT) { + ret = irdma_generated_cmpls(iwcq, cur_cqe); + if (!ret) + irdma_process_cqe(entry + npolled, cur_cqe); + } if (!ret) { ++npolled; cq_new_cqe = true; continue; } - if (ret == IRDMA_ERR_Q_EMPTY) + if (ret == -ENOENT) break; /* QP using the CQ is destroyed. Skip reporting this CQE */ - if (ret == IRDMA_ERR_Q_DESTROYED) { + if (ret == -EFAULT) { cq_new_cqe = true; continue; } @@ -3548,7 +3583,7 @@ error: ibdev_dbg(&iwdev->ibdev, "%s: Error polling CQ, irdma_err: %d\n", __func__, ret); - return -EINVAL; + return ret; } /** @@ -3598,13 +3633,13 @@ static int irdma_req_notify_cq(struct ib_cq *ibcq, if (iwcq->last_notify == IRDMA_CQ_COMPL_SOLICITED && notify_flags != IB_CQ_SOLICITED) promo_event = true; - if (!iwcq->armed || promo_event) { - iwcq->armed = true; + if (!atomic_cmpxchg(&iwcq->armed, 0, 1) || promo_event) { iwcq->last_notify = cq_notify; irdma_uk_cq_request_notification(ukcq, cq_notify); } - if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && !irdma_cq_empty(iwcq)) + if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && + (!irdma_cq_empty(iwcq) || !list_empty(&iwcq->cmpl_generated))) ret = 1; spin_unlock_irqrestore(&iwcq->lock, flags); @@ -3854,7 +3889,7 @@ static int irdma_mcast_cqp_op(struct irdma_device *iwdev, { struct cqp_cmds_info *cqp_info; struct irdma_cqp_request *cqp_request; - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true); if (!cqp_request) @@ -3868,10 +3903,8 @@ static int irdma_mcast_cqp_op(struct irdma_device *iwdev, cqp_info->in.u.mc_create.cqp = &iwdev->rf->cqp.sc_cqp; status = irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); - if (status) - return -ENOMEM; - return 0; + return status; } /** @@ -3927,11 +3960,7 @@ static int irdma_attach_mcast(struct ib_qp *ibqp, union ib_gid *ibgid, u16 lid) int ret = 0; bool ipv4; u16 vlan_id; - union { - struct sockaddr saddr; - struct sockaddr_in saddr_in; - struct sockaddr_in6 saddr_in6; - } sgid_addr; + union irdma_sockaddr sgid_addr; unsigned char dmac[ETH_ALEN]; rdma_gid2ip((struct sockaddr *)&sgid_addr, ibgid); @@ -4067,11 +4096,7 @@ static int irdma_detach_mcast(struct ib_qp *ibqp, union ib_gid *ibgid, u16 lid) struct irdma_mcast_grp_ctx_entry_info mcg_info = {}; int ret; unsigned long flags; - union { - struct sockaddr saddr; - struct sockaddr_in saddr_in; - struct sockaddr_in6 saddr_in6; - } sgid_addr; + union irdma_sockaddr sgid_addr; rdma_gid2ip((struct sockaddr *)&sgid_addr, ibgid); if (!ipv6_addr_v4mapped((struct in6_addr *)ibgid)) @@ -4127,17 +4152,47 @@ static int irdma_detach_mcast(struct ib_qp *ibqp, union ib_gid *ibgid, u16 lid) return 0; } -/** - * irdma_create_ah - create address handle - * @ibah: address handle - * @attr: address handle attributes - * @udata: User data - * - * returns 0 on success, error otherwise - */ -static int irdma_create_ah(struct ib_ah *ibah, - struct rdma_ah_init_attr *attr, - struct ib_udata *udata) +static int irdma_create_hw_ah(struct irdma_device *iwdev, struct irdma_ah *ah, bool sleep) +{ + struct irdma_pci_f *rf = iwdev->rf; + int err; + + err = irdma_alloc_rsrc(rf, rf->allocated_ahs, rf->max_ah, &ah->sc_ah.ah_info.ah_idx, + &rf->next_ah); + if (err) + return err; + + err = irdma_ah_cqp_op(rf, &ah->sc_ah, IRDMA_OP_AH_CREATE, sleep, + irdma_gsi_ud_qp_ah_cb, &ah->sc_ah); + + if (err) { + ibdev_dbg(&iwdev->ibdev, "VERBS: CQP-OP Create AH fail"); + goto err_ah_create; + } + + if (!sleep) { + int cnt = CQP_COMPL_WAIT_TIME_MS * CQP_TIMEOUT_THRESHOLD; + + do { + irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq); + mdelay(1); + } while (!ah->sc_ah.ah_info.ah_valid && --cnt); + + if (!cnt) { + ibdev_dbg(&iwdev->ibdev, "VERBS: CQP create AH timed out"); + err = -ETIMEDOUT; + goto err_ah_create; + } + } + return 0; + +err_ah_create: + irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_ahs, ah->sc_ah.ah_info.ah_idx); + + return err; +} + +static int irdma_setup_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr) { struct irdma_pd *pd = to_iwpd(ibah->pd); struct irdma_ah *ah = container_of(ibah, struct irdma_ah, ibah); @@ -4146,25 +4201,13 @@ static int irdma_create_ah(struct ib_ah *ibah, struct irdma_device *iwdev = to_iwdev(ibah->pd->device); struct irdma_pci_f *rf = iwdev->rf; struct irdma_sc_ah *sc_ah; - u32 ah_id = 0; struct irdma_ah_info *ah_info; - struct irdma_create_ah_resp uresp; - union { - struct sockaddr saddr; - struct sockaddr_in saddr_in; - struct sockaddr_in6 saddr_in6; - } sgid_addr, dgid_addr; + union irdma_sockaddr sgid_addr, dgid_addr; int err; u8 dmac[ETH_ALEN]; - err = irdma_alloc_rsrc(rf, rf->allocated_ahs, rf->max_ah, &ah_id, - &rf->next_ah); - if (err) - return err; - ah->pd = pd; sc_ah = &ah->sc_ah; - sc_ah->ah_info.ah_idx = ah_id; sc_ah->ah_info.vsi = &iwdev->vsi; irdma_sc_init_ah(&rf->sc_dev, sc_ah); ah->sgid_index = ah_attr->grh.sgid_index; @@ -4174,10 +4217,7 @@ static int irdma_create_ah(struct ib_ah *ibah, rdma_gid2ip((struct sockaddr *)&dgid_addr, &ah_attr->grh.dgid); ah->av.attrs = *ah_attr; ah->av.net_type = rdma_gid_attr_network_type(sgid_attr); - ah->av.sgid_addr.saddr = sgid_addr.saddr; - ah->av.dgid_addr.saddr = dgid_addr.saddr; ah_info = &sc_ah->ah_info; - ah_info->ah_idx = ah_id; ah_info->pd_idx = pd->sc_pd.pd_id; if (ah_attr->ah_flags & IB_AH_GRH) { ah_info->flow_label = ah_attr->grh.flow_label; @@ -4186,7 +4226,7 @@ static int irdma_create_ah(struct ib_ah *ibah, } ether_addr_copy(dmac, ah_attr->roce.dmac); - if (rdma_gid_attr_network_type(sgid_attr) == RDMA_NETWORK_IPV4) { + if (ah->av.net_type == RDMA_NETWORK_IPV4) { ah_info->ipv4_valid = true; ah_info->dest_ip_addr[0] = ntohl(dgid_addr.saddr_in.sin_addr.s_addr); @@ -4214,17 +4254,15 @@ static int irdma_create_ah(struct ib_ah *ibah, err = rdma_read_gid_l2_fields(sgid_attr, &ah_info->vlan_tag, ah_info->mac_addr); if (err) - goto error; + return err; ah_info->dst_arpindex = irdma_add_arp(iwdev->rf, ah_info->dest_ip_addr, ah_info->ipv4_valid, dmac); - if (ah_info->dst_arpindex == -1) { - err = -EINVAL; - goto error; - } + if (ah_info->dst_arpindex == -1) + return -EINVAL; - if (ah_info->vlan_tag >= VLAN_N_VID && iwdev->dcb) + if (ah_info->vlan_tag >= VLAN_N_VID && iwdev->dcb_vlan_mode) ah_info->vlan_tag = 0; if (ah_info->vlan_tag < VLAN_N_VID) { @@ -4233,43 +4271,38 @@ static int irdma_create_ah(struct ib_ah *ibah, rt_tos2priority(ah_info->tc_tos) << VLAN_PRIO_SHIFT; } - err = irdma_ah_cqp_op(iwdev->rf, sc_ah, IRDMA_OP_AH_CREATE, - attr->flags & RDMA_CREATE_AH_SLEEPABLE, - irdma_gsi_ud_qp_ah_cb, sc_ah); - - if (err) { - ibdev_dbg(&iwdev->ibdev, - "VERBS: CQP-OP Create AH fail"); - goto error; - } - - if (!(attr->flags & RDMA_CREATE_AH_SLEEPABLE)) { - int cnt = CQP_COMPL_WAIT_TIME_MS * CQP_TIMEOUT_THRESHOLD; - - do { - irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq); - mdelay(1); - } while (!sc_ah->ah_info.ah_valid && --cnt); + return 0; +} - if (!cnt) { - ibdev_dbg(&iwdev->ibdev, - "VERBS: CQP create AH timed out"); - err = -ETIMEDOUT; - goto error; +/** + * irdma_ah_exists - Check for existing identical AH + * @iwdev: irdma device + * @new_ah: AH to check for + * + * returns true if AH is found, false if not found. + */ +static bool irdma_ah_exists(struct irdma_device *iwdev, + struct irdma_ah *new_ah) +{ + struct irdma_ah *ah; + u32 key = new_ah->sc_ah.ah_info.dest_ip_addr[0] ^ + new_ah->sc_ah.ah_info.dest_ip_addr[1] ^ + new_ah->sc_ah.ah_info.dest_ip_addr[2] ^ + new_ah->sc_ah.ah_info.dest_ip_addr[3]; + + hash_for_each_possible(iwdev->ah_hash_tbl, ah, list, key) { + /* Set ah_valid and ah_id the same so memcmp can work */ + new_ah->sc_ah.ah_info.ah_idx = ah->sc_ah.ah_info.ah_idx; + new_ah->sc_ah.ah_info.ah_valid = ah->sc_ah.ah_info.ah_valid; + if (!memcmp(&ah->sc_ah.ah_info, &new_ah->sc_ah.ah_info, + sizeof(ah->sc_ah.ah_info))) { + refcount_inc(&ah->refcnt); + new_ah->parent_ah = ah; + return true; } } - if (udata) { - uresp.ah_id = ah->sc_ah.ah_info.ah_idx; - err = ib_copy_to_udata(udata, &uresp, - min(sizeof(uresp), udata->outlen)); - } - return 0; - -error: - irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_ahs, ah_id); - - return err; + return false; } /** @@ -4282,6 +4315,17 @@ static int irdma_destroy_ah(struct ib_ah *ibah, u32 ah_flags) struct irdma_device *iwdev = to_iwdev(ibah->device); struct irdma_ah *ah = to_iwah(ibah); + if ((ah_flags & RDMA_DESTROY_AH_SLEEPABLE) && ah->parent_ah) { + mutex_lock(&iwdev->ah_tbl_lock); + if (!refcount_dec_and_test(&ah->parent_ah->refcnt)) { + mutex_unlock(&iwdev->ah_tbl_lock); + return 0; + } + hash_del(&ah->parent_ah->list); + kfree(ah->parent_ah); + mutex_unlock(&iwdev->ah_tbl_lock); + } + irdma_ah_cqp_op(iwdev->rf, &ah->sc_ah, IRDMA_OP_AH_DESTROY, false, NULL, ah); @@ -4292,6 +4336,84 @@ static int irdma_destroy_ah(struct ib_ah *ibah, u32 ah_flags) } /** + * irdma_create_user_ah - create user address handle + * @ibah: address handle + * @attr: address handle attributes + * @udata: User data + * + * returns 0 on success, error otherwise + */ +static int irdma_create_user_ah(struct ib_ah *ibah, + struct rdma_ah_init_attr *attr, + struct ib_udata *udata) +{ +#define IRDMA_CREATE_AH_MIN_RESP_LEN offsetofend(struct irdma_create_ah_resp, rsvd) + struct irdma_ah *ah = container_of(ibah, struct irdma_ah, ibah); + struct irdma_device *iwdev = to_iwdev(ibah->pd->device); + struct irdma_create_ah_resp uresp; + struct irdma_ah *parent_ah; + int err; + + if (udata && udata->outlen < IRDMA_CREATE_AH_MIN_RESP_LEN) + return -EINVAL; + + err = irdma_setup_ah(ibah, attr); + if (err) + return err; + mutex_lock(&iwdev->ah_tbl_lock); + if (!irdma_ah_exists(iwdev, ah)) { + err = irdma_create_hw_ah(iwdev, ah, true); + if (err) { + mutex_unlock(&iwdev->ah_tbl_lock); + return err; + } + /* Add new AH to list */ + parent_ah = kmemdup(ah, sizeof(*ah), GFP_KERNEL); + if (parent_ah) { + u32 key = parent_ah->sc_ah.ah_info.dest_ip_addr[0] ^ + parent_ah->sc_ah.ah_info.dest_ip_addr[1] ^ + parent_ah->sc_ah.ah_info.dest_ip_addr[2] ^ + parent_ah->sc_ah.ah_info.dest_ip_addr[3]; + + ah->parent_ah = parent_ah; + hash_add(iwdev->ah_hash_tbl, &parent_ah->list, key); + refcount_set(&parent_ah->refcnt, 1); + } + } + mutex_unlock(&iwdev->ah_tbl_lock); + + uresp.ah_id = ah->sc_ah.ah_info.ah_idx; + err = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen)); + if (err) + irdma_destroy_ah(ibah, attr->flags); + + return err; +} + +/** + * irdma_create_ah - create address handle + * @ibah: address handle + * @attr: address handle attributes + * @udata: NULL + * + * returns 0 on success, error otherwise + */ +static int irdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr, + struct ib_udata *udata) +{ + struct irdma_ah *ah = container_of(ibah, struct irdma_ah, ibah); + struct irdma_device *iwdev = to_iwdev(ibah->pd->device); + int err; + + err = irdma_setup_ah(ibah, attr); + if (err) + return err; + err = irdma_create_hw_ah(iwdev, ah, attr->flags & RDMA_CREATE_AH_SLEEPABLE); + + return err; +} + +/** * irdma_query_ah - Query address handle * @ibah: pointer to address handle * @ah_attr: address handle attributes @@ -4321,28 +4443,10 @@ static enum rdma_link_layer irdma_get_link_layer(struct ib_device *ibdev, return IB_LINK_LAYER_ETHERNET; } -static __be64 irdma_mac_to_guid(struct net_device *ndev) -{ - const unsigned char *mac = ndev->dev_addr; - __be64 guid; - unsigned char *dst = (unsigned char *)&guid; - - dst[0] = mac[0] ^ 2; - dst[1] = mac[1]; - dst[2] = mac[2]; - dst[3] = 0xff; - dst[4] = 0xfe; - dst[5] = mac[3]; - dst[6] = mac[4]; - dst[7] = mac[5]; - - return guid; -} - static const struct ib_device_ops irdma_roce_dev_ops = { .attach_mcast = irdma_attach_mcast, .create_ah = irdma_create_ah, - .create_user_ah = irdma_create_ah, + .create_user_ah = irdma_create_user_ah, .destroy_ah = irdma_destroy_ah, .detach_mcast = irdma_detach_mcast, .get_link_layer = irdma_get_link_layer, @@ -4408,7 +4512,8 @@ static const struct ib_device_ops irdma_dev_ops = { static void irdma_init_roce_device(struct irdma_device *iwdev) { iwdev->ibdev.node_type = RDMA_NODE_IB_CA; - iwdev->ibdev.node_guid = irdma_mac_to_guid(iwdev->netdev); + addrconf_addr_eui48((u8 *)&iwdev->ibdev.node_guid, + iwdev->netdev->dev_addr); ib_set_device_ops(&iwdev->ibdev, &irdma_roce_dev_ops); } @@ -4421,7 +4526,8 @@ static int irdma_init_iw_device(struct irdma_device *iwdev) struct net_device *netdev = iwdev->netdev; iwdev->ibdev.node_type = RDMA_NODE_RNIC; - ether_addr_copy((u8 *)&iwdev->ibdev.node_guid, netdev->dev_addr); + addrconf_addr_eui48((u8 *)&iwdev->ibdev.node_guid, + netdev->dev_addr); iwdev->ibdev.ops.iw_add_ref = irdma_qp_add_ref; iwdev->ibdev.ops.iw_rem_ref = irdma_qp_rem_ref; iwdev->ibdev.ops.iw_get_qp = irdma_get_qp; diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h index d0fdef8d09ea..4309b7159f42 100644 --- a/drivers/infiniband/hw/irdma/verbs.h +++ b/drivers/infiniband/hw/irdma/verbs.h @@ -4,6 +4,7 @@ #define IRDMA_VERBS_H #define IRDMA_MAX_SAVED_PHY_PGADDR 4 +#define IRDMA_FLUSH_DELAY_MS 20 #define IRDMA_PKEY_TBL_SZ 1 #define IRDMA_DEFAULT_PKEY 0xFFFF @@ -25,14 +26,16 @@ struct irdma_pd { struct irdma_sc_pd sc_pd; }; +union irdma_sockaddr { + struct sockaddr_in saddr_in; + struct sockaddr_in6 saddr_in6; +}; + struct irdma_av { u8 macaddr[16]; struct rdma_ah_attr attrs; - union { - struct sockaddr saddr; - struct sockaddr_in saddr_in; - struct sockaddr_in6 saddr_in6; - } sgid_addr, dgid_addr; + union irdma_sockaddr sgid_addr; + union irdma_sockaddr dgid_addr; u8 net_type; }; @@ -43,6 +46,9 @@ struct irdma_ah { struct irdma_av av; u8 sgid_index; union ib_gid dgid; + struct hlist_node list; + refcount_t refcnt; + struct irdma_ah *parent_ah; /* AH from cached list */ }; struct irdma_hmc_pble { @@ -110,7 +116,7 @@ struct irdma_cq { u16 cq_size; u16 cq_num; bool user_mode; - bool armed; + atomic_t armed; enum irdma_cmpl_notify last_notify; u32 polled_cmpls; u32 cq_mem_size; @@ -121,6 +127,12 @@ struct irdma_cq { struct irdma_pbl *iwpbl_shadow; struct list_head resize_list; struct irdma_cq_poll_info cur_cqe; + struct list_head cmpl_generated; +}; + +struct irdma_cmpl_gen { + struct list_head list; + struct irdma_cq_poll_info cpi; }; struct disconn_work { @@ -161,6 +173,7 @@ struct irdma_qp { refcount_t refcnt; struct iw_cm_id *cm_id; struct irdma_cm_node *cm_node; + struct delayed_work dwork_flush; struct ib_mr *lsmm_mr; atomic_t hw_mod_qp_pend; enum ib_qp_state ibqp_state; @@ -224,4 +237,7 @@ int irdma_ib_register_device(struct irdma_device *iwdev); void irdma_ib_unregister_device(struct irdma_device *iwdev); void irdma_ib_dealloc_device(struct ib_device *ibdev); void irdma_ib_qp_event(struct irdma_qp *iwqp, enum irdma_qp_event_type event); +void irdma_generate_flush_completions(struct irdma_qp *iwqp); +void irdma_remove_cmpls_list(struct irdma_cq *iwcq); +int irdma_generated_cmpls(struct irdma_cq *iwcq, struct irdma_cq_poll_info *cq_poll_info); #endif /* IRDMA_VERBS_H */ diff --git a/drivers/infiniband/hw/irdma/ws.c b/drivers/infiniband/hw/irdma/ws.c index b0d6ee0739f5..20bc8d0d7f1f 100644 --- a/drivers/infiniband/hw/irdma/ws.c +++ b/drivers/infiniband/hw/irdma/ws.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB /* Copyright (c) 2017 - 2021 Intel Corporation */ #include "osdep.h" -#include "status.h" #include "hmc.h" #include "defs.h" #include "type.h" @@ -87,8 +86,8 @@ static void irdma_free_node(struct irdma_sc_vsi *vsi, * @node: pointer to node * @cmd: add, remove or modify */ -static enum irdma_status_code -irdma_ws_cqp_cmd(struct irdma_sc_vsi *vsi, struct irdma_ws_node *node, u8 cmd) +static int irdma_ws_cqp_cmd(struct irdma_sc_vsi *vsi, + struct irdma_ws_node *node, u8 cmd) { struct irdma_ws_node_info node_info = {}; @@ -106,7 +105,7 @@ irdma_ws_cqp_cmd(struct irdma_sc_vsi *vsi, struct irdma_ws_node *node, u8 cmd) node_info.enable = node->enable; if (irdma_cqp_ws_node_cmd(vsi->dev, cmd, &node_info)) { ibdev_dbg(to_ibdev(vsi->dev), "WS: CQP WS CMD failed\n"); - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } if (node->type_leaf && cmd == IRDMA_OP_WS_ADD_NODE) { @@ -234,18 +233,18 @@ static void irdma_remove_leaf(struct irdma_sc_vsi *vsi, u8 user_pri) * @vsi: vsi pointer * @user_pri: user priority */ -enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri) +int irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri) { struct irdma_ws_node *ws_tree_root; struct irdma_ws_node *vsi_node; struct irdma_ws_node *tc_node; u16 traffic_class; - enum irdma_status_code ret = 0; + int ret = 0; int i; mutex_lock(&vsi->dev->ws_mutex); if (vsi->tc_change_pending) { - ret = IRDMA_ERR_NOT_READY; + ret = -EBUSY; goto exit; } @@ -258,7 +257,7 @@ enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri) ws_tree_root = irdma_alloc_node(vsi, user_pri, WS_NODE_TYPE_PARENT, NULL); if (!ws_tree_root) { - ret = IRDMA_ERR_NO_MEMORY; + ret = -ENOMEM; goto exit; } @@ -283,7 +282,7 @@ enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri) vsi_node = irdma_alloc_node(vsi, user_pri, WS_NODE_TYPE_PARENT, ws_tree_root); if (!vsi_node) { - ret = IRDMA_ERR_NO_MEMORY; + ret = -ENOMEM; goto vsi_add_err; } @@ -310,7 +309,7 @@ enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri) tc_node = irdma_alloc_node(vsi, user_pri, WS_NODE_TYPE_LEAF, vsi_node); if (!tc_node) { - ret = IRDMA_ERR_NO_MEMORY; + ret = -ENOMEM; goto leaf_add_err; } diff --git a/drivers/infiniband/hw/irdma/ws.h b/drivers/infiniband/hw/irdma/ws.h index f0e16f630701..d431e3327d26 100644 --- a/drivers/infiniband/hw/irdma/ws.h +++ b/drivers/infiniband/hw/irdma/ws.h @@ -34,7 +34,7 @@ struct irdma_ws_node { }; struct irdma_sc_vsi; -enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri); +int irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri); void irdma_ws_remove(struct irdma_sc_vsi *vsi, u8 user_pri); void irdma_ws_reset(struct irdma_sc_vsi *vsi); diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index e2e1f5daddc4..111fa88a3be4 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -38,7 +38,6 @@ #include <rdma/ib_sa.h> #include <rdma/ib_pack.h> #include <linux/mlx4/cmd.h> -#include <linux/module.h> #include <linux/init.h> #include <linux/errno.h> #include <rdma/ib_user_verbs.h> diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index 4aff1c8298b1..12b481d138cf 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -80,6 +80,7 @@ struct cm_req_msg { union ib_gid primary_path_sgid; }; +static struct workqueue_struct *cm_wq; static void set_local_comm_id(struct ib_mad *mad, u32 cm_id) { @@ -288,10 +289,10 @@ static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id) /*make sure that there is no schedule inside the scheduled work.*/ if (!sriov->is_going_down && !id->scheduled_delete) { id->scheduled_delete = 1; - schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT); + queue_delayed_work(cm_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT); } else if (id->scheduled_delete) { /* Adjust timeout if already scheduled */ - mod_delayed_work(system_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT); + mod_delayed_work(cm_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT); } spin_unlock_irqrestore(&sriov->going_down_lock, flags); spin_unlock(&sriov->id_map_lock); @@ -370,7 +371,7 @@ static int alloc_rej_tmout(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id, int sl ret = xa_err(item); else /* If a retry, adjust delayed work */ - mod_delayed_work(system_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT); + mod_delayed_work(cm_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT); goto err_or_exists; } xa_unlock(&sriov->xa_rej_tmout); @@ -393,7 +394,7 @@ static int alloc_rej_tmout(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id, int sl return xa_err(old); } - schedule_delayed_work(&item->timeout, CM_CLEANUP_CACHE_TIMEOUT); + queue_delayed_work(cm_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT); return 0; @@ -500,7 +501,7 @@ static void rej_tmout_xa_cleanup(struct mlx4_ib_sriov *sriov, int slave) xa_lock(&sriov->xa_rej_tmout); xa_for_each(&sriov->xa_rej_tmout, id, item) { if (slave < 0 || slave == item->slave) { - mod_delayed_work(system_wq, &item->timeout, 0); + mod_delayed_work(cm_wq, &item->timeout, 0); flush_needed = true; ++cnt; } @@ -508,7 +509,7 @@ static void rej_tmout_xa_cleanup(struct mlx4_ib_sriov *sriov, int slave) xa_unlock(&sriov->xa_rej_tmout); if (flush_needed) { - flush_scheduled_work(); + flush_workqueue(cm_wq); pr_debug("Deleted %d entries in xarray for slave %d during cleanup\n", cnt, slave); } @@ -540,7 +541,7 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave) spin_unlock(&sriov->id_map_lock); if (need_flush) - flush_scheduled_work(); /* make sure all timers were flushed */ + flush_workqueue(cm_wq); /* make sure all timers were flushed */ /* now, remove all leftover entries from databases*/ spin_lock(&sriov->id_map_lock); @@ -587,3 +588,17 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave) rej_tmout_xa_cleanup(sriov, slave); } + +int mlx4_ib_cm_init(void) +{ + cm_wq = alloc_workqueue("mlx4_ib_cm", 0, 0); + if (!cm_wq) + return -ENOMEM; + + return 0; +} + +void mlx4_ib_cm_destroy(void) +{ + destroy_workqueue(cm_wq); +} diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index d13ecbdd4391..a37cfac5e23f 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -96,7 +96,7 @@ static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num, __be64 mlx4_ib_gen_node_guid(void) { #define NODE_GUID_HI ((u64) (((u64)IB_OPENIB_OUI) << 40)) - return cpu_to_be64(NODE_GUID_HI | prandom_u32()); + return cpu_to_be64(NODE_GUID_HI | get_random_u32()); } __be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 0d2fa3338784..ba47874f90d3 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -85,14 +85,6 @@ static enum rdma_link_layer mlx4_ib_port_link_layer(struct ib_device *device, static struct workqueue_struct *wq; -static void init_query_mad(struct ib_smp *mad) -{ - mad->base_version = 1; - mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; - mad->class_version = 1; - mad->method = IB_MGMT_METHOD_GET; -} - static int check_flow_steering_support(struct mlx4_dev *dev) { int eth_num_ports = 0; @@ -471,7 +463,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, if (!in_mad || !out_mad) goto out; - init_query_mad(in_mad); + ib_init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS, @@ -487,8 +479,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | - IB_DEVICE_RC_RNR_NAK_GEN | - IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; + IB_DEVICE_RC_RNR_NAK_GEN; + props->kernel_cap_flags = IBK_BLOCK_MULTICAST_LOOPBACK; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR) props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR) @@ -502,9 +494,9 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, if (dev->dev->caps.max_gso_sz && (dev->dev->rev_id != MLX4_IB_CARD_REV_A0) && (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH)) - props->device_cap_flags |= IB_DEVICE_UD_TSO; + props->kernel_cap_flags |= IBK_UD_TSO; if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY) - props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; + props->kernel_cap_flags |= IBK_LOCAL_DMA_LKEY; if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) && (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) && (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR)) @@ -669,7 +661,7 @@ static int ib_link_query_port(struct ib_device *ibdev, u32 port, if (!in_mad || !out_mad) goto out; - init_query_mad(in_mad); + ib_init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); @@ -721,7 +713,7 @@ static int ib_link_query_port(struct ib_device *ibdev, u32 port, /* If reported active speed is QDR, check if is FDR-10 */ if (props->active_speed == IB_SPEED_QDR) { - init_query_mad(in_mad); + ib_init_query_mad(in_mad); in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); @@ -848,7 +840,7 @@ int __mlx4_ib_query_gid(struct ib_device *ibdev, u32 port, int index, if (!in_mad || !out_mad) goto out; - init_query_mad(in_mad); + ib_init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); @@ -870,7 +862,7 @@ int __mlx4_ib_query_gid(struct ib_device *ibdev, u32 port, int index, } } - init_query_mad(in_mad); + ib_init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; in_mad->attr_mod = cpu_to_be32(index / 8); @@ -917,7 +909,7 @@ static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u32 port, if (!in_mad || !out_mad) goto out; - init_query_mad(in_mad); + ib_init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_SL_TO_VL_TABLE; in_mad->attr_mod = 0; @@ -971,7 +963,7 @@ int __mlx4_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index, if (!in_mad || !out_mad) goto out; - init_query_mad(in_mad); + ib_init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; in_mad->attr_mod = cpu_to_be32(index / 32); @@ -1990,7 +1982,7 @@ static int init_node_data(struct mlx4_ib_dev *dev) if (!in_mad || !out_mad) goto out; - init_query_mad(in_mad); + ib_init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; if (mlx4_is_master(dev->dev)) mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; @@ -2784,10 +2776,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (err) goto err_counter; - ibdev->ib_uc_qpns_bitmap = - kmalloc_array(BITS_TO_LONGS(ibdev->steer_qpn_count), - sizeof(long), - GFP_KERNEL); + ibdev->ib_uc_qpns_bitmap = bitmap_alloc(ibdev->steer_qpn_count, + GFP_KERNEL); if (!ibdev->ib_uc_qpns_bitmap) goto err_steer_qp_release; @@ -2875,7 +2865,7 @@ err_diag_counters: mlx4_ib_diag_cleanup(ibdev); err_steer_free_bitmap: - kfree(ibdev->ib_uc_qpns_bitmap); + bitmap_free(ibdev->ib_uc_qpns_bitmap); err_steer_qp_release: mlx4_qp_release_range(dev, ibdev->steer_qpn_base, @@ -2988,7 +2978,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) mlx4_qp_release_range(dev, ibdev->steer_qpn_base, ibdev->steer_qpn_count); - kfree(ibdev->ib_uc_qpns_bitmap); + bitmap_free(ibdev->ib_uc_qpns_bitmap); iounmap(ibdev->uar_map); for (p = 0; p < ibdev->num_ports; ++p) @@ -3247,7 +3237,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, case MLX4_DEV_EVENT_PORT_MGMT_CHANGE: ew = kmalloc(sizeof *ew, GFP_ATOMIC); if (!ew) - break; + return; INIT_WORK(&ew->work, handle_port_mgmt_change_event); memcpy(&ew->ib_eqe, eqe, sizeof *eqe); @@ -3317,10 +3307,14 @@ static int __init mlx4_ib_init(void) if (!wq) return -ENOMEM; - err = mlx4_ib_mcg_init(); + err = mlx4_ib_cm_init(); if (err) goto clean_wq; + err = mlx4_ib_mcg_init(); + if (err) + goto clean_cm; + err = mlx4_register_interface(&mlx4_ib_interface); if (err) goto clean_mcg; @@ -3330,6 +3324,9 @@ static int __init mlx4_ib_init(void) clean_mcg: mlx4_ib_mcg_destroy(); +clean_cm: + mlx4_ib_cm_destroy(); + clean_wq: destroy_workqueue(wq); return err; @@ -3339,6 +3336,7 @@ static void __exit mlx4_ib_cleanup(void) { mlx4_unregister_interface(&mlx4_ib_interface); mlx4_ib_mcg_destroy(); + mlx4_ib_cm_destroy(); destroy_workqueue(wq); } diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index d84023b4b1b8..6a3b0f121045 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -937,4 +937,7 @@ mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table) int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, int *num_of_mtts); +int mlx4_ib_cm_init(void); +void mlx4_ib_cm_destroy(void); + #endif /* MLX4_IB_H */ diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 04a67b481608..a40bf58bcdd3 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -439,7 +439,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err_mr; mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; - mr->ibmr.length = length; mr->ibmr.page_size = 1U << shift; return &mr->ibmr; diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 6a381751c0d8..c4cf91235eee 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -320,7 +320,6 @@ int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { err = -EIO; *bad_wr = wr; - nreq = 0; goto out; } diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index f43380106bd0..612ee8190a2d 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -19,6 +19,7 @@ mlx5_ib-y := ah.o \ restrack.o \ srq.o \ srq_cmd.o \ + umr.o \ wr.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c index 0b61df52332a..290ea8ac3838 100644 --- a/drivers/infiniband/hw/mlx5/cong.c +++ b/drivers/infiniband/hw/mlx5/cong.c @@ -433,8 +433,7 @@ void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num) dev->port[port_num].dbg_cc_params = dbg_cc_params; - dbg_cc_params->root = debugfs_create_dir("cc_params", - mdev->priv.dbg_root); + dbg_cc_params->root = debugfs_create_dir("cc_params", mlx5_debugfs_get_dev_root(mdev)); for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) { dbg_cc_params->params[i].offset = i; diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index a190fb581591..be189e0525de 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -328,8 +328,11 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, } wc->vendor_err = cqe->vendor_err_synd; - if (dump) + if (dump) { + mlx5_ib_warn(dev, "WC error: %d, Message: %s\n", wc->status, + ib_wc_status_msg(wc->status)); dump_cqe(dev, cqe); + } } static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, @@ -520,6 +523,10 @@ repoll: "Requestor" : "Responder", cq->mcq.cqn); mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n", err_cqe->syndrome, err_cqe->vendor_err_synd); + if (wc->status != IB_WC_WR_FLUSH_ERR && + (*cur_qp)->type == MLX5_IB_QPT_REG_UMR) + dev->umrc.state = MLX5_UMR_STATE_RECOVER; + if (opcode == MLX5_CQE_REQ_ERR) { wq = &(*cur_qp)->sq; wqe_ctr = be16_to_cpu(cqe64->wqe_counter); diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 08b7f6bc56c3..2211a0be16f3 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -907,6 +907,7 @@ static bool devx_is_whitelist_cmd(void *in) case MLX5_CMD_OP_QUERY_HCA_CAP: case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS: return true; default: return false; @@ -962,6 +963,7 @@ static bool devx_is_general_cmd(void *in, struct mlx5_ib_dev *dev) case MLX5_CMD_OP_QUERY_CONG_PARAMS: case MLX5_CMD_OP_QUERY_CONG_STATISTICS: case MLX5_CMD_OP_QUERY_LAG: + case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS: return true; default: return false; @@ -1055,7 +1057,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)( int cmd_out_len = uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT); void *cmd_out; - int err; + int err, err2; int uid; c = devx_ufile2uctx(attrs); @@ -1076,14 +1078,16 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)( return PTR_ERR(cmd_out); MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); - err = mlx5_cmd_exec(dev->mdev, cmd_in, - uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN), - cmd_out, cmd_out_len); - if (err) + err = mlx5_cmd_do(dev->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN), + cmd_out, cmd_out_len); + if (err && err != -EREMOTEIO) return err; - return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out, + err2 = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out, cmd_out_len); + + return err2 ?: err; } static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, @@ -1457,7 +1461,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; struct devx_obj *obj; u16 obj_type = 0; - int err; + int err, err2 = 0; int uid; u32 obj_id; u16 opcode; @@ -1497,15 +1501,18 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( !is_apu_cq(dev, cmd_in)) { obj->flags |= DEVX_OBJ_FLAGS_CQ; obj->core_cq.comp = devx_cq_comp; - err = mlx5_core_create_cq(dev->mdev, &obj->core_cq, - cmd_in, cmd_in_len, cmd_out, - cmd_out_len); + err = mlx5_create_cq(dev->mdev, &obj->core_cq, + cmd_in, cmd_in_len, cmd_out, + cmd_out_len); } else { - err = mlx5_cmd_exec(dev->mdev, cmd_in, - cmd_in_len, - cmd_out, cmd_out_len); + err = mlx5_cmd_do(dev->mdev, cmd_in, cmd_in_len, + cmd_out, cmd_out_len); } + if (err == -EREMOTEIO) + err2 = uverbs_copy_to(attrs, + MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, + cmd_out, cmd_out_len); if (err) goto obj_free; @@ -1548,7 +1555,7 @@ obj_destroy: sizeof(out)); obj_free: kfree(obj); - return err; + return err2 ?: err; } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( @@ -1563,7 +1570,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device); void *cmd_out; - int err; + int err, err2; int uid; if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id)) @@ -1586,14 +1593,16 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); devx_set_umem_valid(cmd_in); - err = mlx5_cmd_exec(mdev->mdev, cmd_in, - uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN), - cmd_out, cmd_out_len); - if (err) + err = mlx5_cmd_do(mdev->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN), + cmd_out, cmd_out_len); + if (err && err != -EREMOTEIO) return err; - return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, + err2 = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT, cmd_out, cmd_out_len); + + return err2 ?: err; } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( @@ -1607,7 +1616,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context( &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); void *cmd_out; - int err; + int err, err2; int uid; struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device); @@ -1629,14 +1638,16 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( return PTR_ERR(cmd_out); MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); - err = mlx5_cmd_exec(mdev->mdev, cmd_in, - uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN), - cmd_out, cmd_out_len); - if (err) + err = mlx5_cmd_do(mdev->mdev, cmd_in, + uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN), + cmd_out, cmd_out_len); + if (err && err != -EREMOTEIO) return err; - return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, + err2 = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, cmd_out, cmd_out_len); + + return err2 ?: err; } struct devx_async_event_queue { @@ -1886,8 +1897,10 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table, key_level2, obj_event, GFP_KERNEL); - if (err) + if (err) { + kfree(obj_event); return err; + } INIT_LIST_HEAD(&obj_event->obj_sub_list); } @@ -2147,32 +2160,39 @@ err: static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, struct uverbs_attr_bundle *attrs, - struct devx_umem *obj) + struct devx_umem *obj, u32 access_flags) { u64 addr; size_t size; - u32 access; int err; if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) || uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN)) return -EFAULT; - err = uverbs_get_flags32(&access, attrs, - MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, - IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_READ); + err = ib_check_mr_access(&dev->ib_dev, access_flags); if (err) return err; - err = ib_check_mr_access(&dev->ib_dev, access); - if (err) - return err; + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_DMABUF_FD)) { + struct ib_umem_dmabuf *umem_dmabuf; + int dmabuf_fd; - obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access); - if (IS_ERR(obj->umem)) - return PTR_ERR(obj->umem); + err = uverbs_get_raw_fd(&dmabuf_fd, attrs, + MLX5_IB_ATTR_DEVX_UMEM_REG_DMABUF_FD); + if (err) + return -EFAULT; + + umem_dmabuf = ib_umem_dmabuf_get_pinned( + &dev->ib_dev, addr, size, dmabuf_fd, access_flags); + if (IS_ERR(umem_dmabuf)) + return PTR_ERR(umem_dmabuf); + obj->umem = &umem_dmabuf->umem; + } else { + obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access_flags); + if (IS_ERR(obj->umem)) + return PTR_ERR(obj->umem); + } return 0; } @@ -2211,7 +2231,8 @@ static unsigned int devx_umem_find_best_pgsize(struct ib_umem *umem, static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev, struct uverbs_attr_bundle *attrs, struct devx_umem *obj, - struct devx_umem_reg_cmd *cmd) + struct devx_umem_reg_cmd *cmd, + int access) { unsigned long pgsz_bitmap; unsigned int page_size; @@ -2260,6 +2281,9 @@ static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev, MLX5_SET(umem, umem, page_offset, ib_umem_dma_offset(obj->umem, page_size)); + if (mlx5_umem_needs_ats(dev, obj->umem, access)) + MLX5_SET(umem, umem, ats, 1); + mlx5_ib_populate_pas(obj->umem, page_size, mtt, (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) | MLX5_IB_MTT_READ); @@ -2277,20 +2301,30 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context( &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device); + int access_flags; int err; if (!c->devx_uid) return -EINVAL; + err = uverbs_get_flags32(&access_flags, attrs, + MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, + IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_RELAXED_ORDERING); + if (err) + return err; + obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL); if (!obj) return -ENOMEM; - err = devx_umem_get(dev, &c->ibucontext, attrs, obj); + err = devx_umem_get(dev, &c->ibucontext, attrs, obj, access_flags); if (err) goto err_obj_free; - err = devx_umem_reg_cmd_alloc(dev, attrs, obj, &cmd); + err = devx_umem_reg_cmd_alloc(dev, attrs, obj, &cmd, access_flags); if (err) goto err_umem_release; @@ -2822,6 +2856,8 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN, UVERBS_ATTR_TYPE(u64), UA_MANDATORY), + UVERBS_ATTR_RAW_FD(MLX5_IB_ATTR_DEVX_UMEM_REG_DMABUF_FD, + UA_OPTIONAL), UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, enum ib_access_flags), UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_PGSZ_BITMAP, diff --git a/drivers/infiniband/hw/mlx5/dm.c b/drivers/infiniband/hw/mlx5/dm.c index 001d766cf291..3669c90b2dad 100644 --- a/drivers/infiniband/hw/mlx5/dm.c +++ b/drivers/infiniband/hw/mlx5/dm.c @@ -336,9 +336,15 @@ err_copy: static enum mlx5_sw_icm_type get_icm_type(int uapi_type) { - return uapi_type == MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM ? - MLX5_SW_ICM_TYPE_STEERING : - MLX5_SW_ICM_TYPE_HEADER_MODIFY; + switch (uapi_type) { + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: + return MLX5_SW_ICM_TYPE_HEADER_MODIFY; + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM: + return MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN; + case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + default: + return MLX5_SW_ICM_TYPE_STEERING; + } } static struct ib_dm *handle_alloc_dm_sw_icm(struct ib_ucontext *ctx, @@ -347,11 +353,32 @@ static struct ib_dm *handle_alloc_dm_sw_icm(struct ib_ucontext *ctx, int type) { struct mlx5_core_dev *dev = to_mdev(ctx->device)->mdev; - enum mlx5_sw_icm_type icm_type = get_icm_type(type); + enum mlx5_sw_icm_type icm_type; struct mlx5_ib_dm_icm *dm; u64 act_size; int err; + if (!capable(CAP_SYS_RAWIO) || !capable(CAP_NET_RAW)) + return ERR_PTR(-EPERM); + + switch (type) { + case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: + if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner) || + MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner) || + MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) || + MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2))) + return ERR_PTR(-EOPNOTSUPP); + break; + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM: + if (!MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) || + !MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2)) + return ERR_PTR(-EOPNOTSUPP); + break; + default: + return ERR_PTR(-EOPNOTSUPP); + } + dm = kzalloc(sizeof(*dm), GFP_KERNEL); if (!dm) return ERR_PTR(-ENOMEM); @@ -359,19 +386,6 @@ static struct ib_dm *handle_alloc_dm_sw_icm(struct ib_ucontext *ctx, dm->base.type = type; dm->base.ibdm.device = ctx->device; - if (!capable(CAP_SYS_RAWIO) || !capable(CAP_NET_RAW)) { - err = -EPERM; - goto free; - } - - if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner) || - MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner) || - MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) || - MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2))) { - err = -EOPNOTSUPP; - goto free; - } - /* Allocation size must a multiple of the basic block size * and a power of 2. */ @@ -379,6 +393,8 @@ static struct ib_dm *handle_alloc_dm_sw_icm(struct ib_ucontext *ctx, act_size = roundup_pow_of_two(act_size); dm->base.size = act_size; + icm_type = get_icm_type(type); + err = mlx5_dm_sw_icm_alloc(dev, icm_type, act_size, attr->alignment, to_mucontext(ctx)->devx_uid, &dm->base.dev_addr, &dm->obj_id); @@ -420,8 +436,8 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, case MLX5_IB_UAPI_DM_TYPE_MEMIC: return handle_alloc_dm_memic(context, attr, attrs); case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: - return handle_alloc_dm_sw_icm(context, attr, attrs, type); case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM: return handle_alloc_dm_sw_icm(context, attr, attrs, type); default: return ERR_PTR(-EOPNOTSUPP); @@ -474,6 +490,7 @@ static int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, return 0; case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM: return mlx5_dm_icm_dealloc(ctx, to_icm(ibdm)); default: return -EOPNOTSUPP; diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c index 6398e2f48579..e32111117a5e 100644 --- a/drivers/infiniband/hw/mlx5/doorbell.c +++ b/drivers/infiniband/hw/mlx5/doorbell.c @@ -32,6 +32,7 @@ #include <linux/kref.h> #include <linux/slab.h> +#include <linux/sched/mm.h> #include <rdma/ib_umem.h> #include "mlx5_ib.h" diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index b780185d9dc6..490ec308e309 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -15,7 +15,6 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/fs.h> #include <linux/mlx5/fs_helpers.h> -#include <linux/mlx5/accel.h> #include <linux/mlx5/eswitch.h> #include <net/inet_ecn.h> #include "mlx5_ib.h" @@ -148,16 +147,6 @@ int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, { switch (maction->ib_action.type) { - case IB_FLOW_ACTION_ESP: - if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT)) - return -EINVAL; - /* Currently only AES_GCM keymat is supported by the driver */ - action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx; - action->action |= is_egress ? - MLX5_FLOW_CONTEXT_ACTION_ENCRYPT : - MLX5_FLOW_CONTEXT_ACTION_DECRYPT; - return 0; case IB_FLOW_ACTION_UNSPECIFIED: if (maction->flow_action_raw.sub_type == MLX5_IB_FLOW_ACTION_MODIFY_HEADER) { @@ -368,14 +357,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, ib_spec->type & IB_FLOW_SPEC_INNER); break; case IB_FLOW_SPEC_ESP: - if (ib_spec->esp.mask.seq) - return -EOPNOTSUPP; - - MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, - ntohl(ib_spec->esp.mask.spi)); - MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, - ntohl(ib_spec->esp.val.spi)); - break; + return -EOPNOTSUPP; case IB_FLOW_SPEC_TCP: if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, LAST_TCP_UDP_FIELD)) @@ -587,47 +569,6 @@ static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr) return false; } -enum valid_spec { - VALID_SPEC_INVALID, - VALID_SPEC_VALID, - VALID_SPEC_NA, -}; - -static enum valid_spec -is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev, - const struct mlx5_flow_spec *spec, - const struct mlx5_flow_act *flow_act, - bool egress) -{ - const u32 *match_c = spec->match_criteria; - bool is_crypto = - (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT)); - bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c); - bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP; - - /* - * Currently only crypto is supported in egress, when regular egress - * rules would be supported, always return VALID_SPEC_NA. - */ - if (!is_crypto) - return VALID_SPEC_NA; - - return is_crypto && is_ipsec && - (!egress || (!is_drop && - !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ? - VALID_SPEC_VALID : VALID_SPEC_INVALID; -} - -static bool is_valid_spec(struct mlx5_core_dev *mdev, - const struct mlx5_flow_spec *spec, - const struct mlx5_flow_act *flow_act, - bool egress) -{ - /* We curretly only support ipsec egress flow */ - return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID; -} - static bool is_valid_ethertype(struct mlx5_core_dev *mdev, const struct ib_flow_attr *flow_attr, bool check_inner) @@ -738,7 +679,15 @@ enum flow_table_type { #define MLX5_FS_MAX_TYPES 6 #define MLX5_FS_MAX_ENTRIES BIT(16) -static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns, +static bool mlx5_ib_shared_ft_allowed(struct ib_device *device) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + + return MLX5_CAP_GEN(dev->mdev, shared_object_to_user_object_allowed); +} + +static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev, + struct mlx5_flow_namespace *ns, struct mlx5_ib_flow_prio *prio, int priority, int num_entries, int num_groups, @@ -747,6 +696,8 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns, struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_table *ft; + if (mlx5_ib_shared_ft_allowed(&dev->ib_dev)) + ft_attr.uid = MLX5_SHARED_RESOURCE_UID; ft_attr.prio = priority; ft_attr.max_fte = num_entries; ft_attr.flags = flags; @@ -843,8 +794,8 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, ft = prio->flow_table; if (!ft) - return _get_prio(ns, prio, priority, max_table_size, num_groups, - flags); + return _get_prio(dev, ns, prio, priority, max_table_size, + num_groups, flags); return prio; } @@ -986,7 +937,7 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num, prio = &dev->flow_db->opfcs[type]; if (!prio->flow_table) { - prio = _get_prio(ns, prio, priority, + prio = _get_prio(dev, ns, prio, priority, dev->num_ports * MAX_OPFC_RULES, 1, 0); if (IS_ERR(prio)) { err = PTR_ERR(prio); @@ -1154,12 +1105,6 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); - if (is_egress && - !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) { - err = -EINVAL; - goto free; - } - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { struct mlx5_ib_mcounters *mcounters; @@ -1472,8 +1417,8 @@ free_ucmd: } static struct mlx5_ib_flow_prio * -_get_flow_table(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_matcher *fs_matcher, +_get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority, + enum mlx5_flow_namespace_type ns_type, bool mcast) { struct mlx5_flow_namespace *ns = NULL; @@ -1486,11 +1431,11 @@ _get_flow_table(struct mlx5_ib_dev *dev, if (mcast) priority = MLX5_IB_FLOW_MCAST_PRIO; else - priority = ib_prio_to_core_prio(fs_matcher->priority, false); + priority = ib_prio_to_core_prio(user_priority, false); esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != DEVLINK_ESWITCH_ENCAP_MODE_NONE; - switch (fs_matcher->ns_type) { + switch (ns_type) { case MLX5_FLOW_NAMESPACE_BYPASS: max_table_size = BIT( MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size)); @@ -1508,7 +1453,7 @@ _get_flow_table(struct mlx5_ib_dev *dev, !esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; break; - case MLX5_FLOW_NAMESPACE_FDB: + case MLX5_FLOW_NAMESPACE_FDB_BYPASS: max_table_size = BIT( MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size)); if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap) @@ -1517,17 +1462,17 @@ _get_flow_table(struct mlx5_ib_dev *dev, reformat_l3_tunnel_to_l2) && esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - priority = FDB_BYPASS_PATH; + priority = user_priority; break; case MLX5_FLOW_NAMESPACE_RDMA_RX: max_table_size = BIT( MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, log_max_ft_size)); - priority = fs_matcher->priority; + priority = user_priority; break; case MLX5_FLOW_NAMESPACE_RDMA_TX: max_table_size = BIT( MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size)); - priority = fs_matcher->priority; + priority = user_priority; break; default: break; @@ -1535,19 +1480,19 @@ _get_flow_table(struct mlx5_ib_dev *dev, max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES); - ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type); + ns = mlx5_get_flow_namespace(dev->mdev, ns_type); if (!ns) return ERR_PTR(-EOPNOTSUPP); - switch (fs_matcher->ns_type) { + switch (ns_type) { case MLX5_FLOW_NAMESPACE_BYPASS: prio = &dev->flow_db->prios[priority]; break; case MLX5_FLOW_NAMESPACE_EGRESS: prio = &dev->flow_db->egress_prios[priority]; break; - case MLX5_FLOW_NAMESPACE_FDB: - prio = &dev->flow_db->fdb; + case MLX5_FLOW_NAMESPACE_FDB_BYPASS: + prio = &dev->flow_db->fdb[priority]; break; case MLX5_FLOW_NAMESPACE_RDMA_RX: prio = &dev->flow_db->rdma_rx[priority]; @@ -1564,7 +1509,7 @@ _get_flow_table(struct mlx5_ib_dev *dev, if (prio->flow_table) return prio; - return _get_prio(ns, prio, priority, max_table_size, + return _get_prio(dev, ns, prio, priority, max_table_size, MLX5_FS_MAX_TYPES, flags); } @@ -1683,7 +1628,8 @@ static struct mlx5_ib_flow_handler *raw_fs_rule_add( mcast = raw_fs_is_multicast(fs_matcher, cmd_in); mutex_lock(&dev->flow_db->lock); - ft_prio = _get_flow_table(dev, fs_matcher, mcast); + ft_prio = _get_flow_table(dev, fs_matcher->priority, + fs_matcher->ns_type, mcast); if (IS_ERR(ft_prio)) { err = PTR_ERR(ft_prio); goto unlock; @@ -1740,149 +1686,6 @@ unlock: return ERR_PTR(err); } -static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags) -{ - u32 flags = 0; - - if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA) - flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA; - - return flags; -} - -#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED \ - MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA -static struct ib_flow_action * -mlx5_ib_create_flow_action_esp(struct ib_device *device, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_dev *mdev = to_mdev(device); - struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm; - struct mlx5_accel_esp_xfrm_attrs accel_attrs = {}; - struct mlx5_ib_flow_action *action; - u64 action_flags; - u64 flags; - int err = 0; - - err = uverbs_get_flags64( - &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, - ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1)); - if (err) - return ERR_PTR(err); - - flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags); - - /* We current only support a subset of the standard features. Only a - * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn - * (with overlap). Full offload mode isn't supported. - */ - if (!attr->keymat || attr->replay || attr->encap || - attr->spi || attr->seq || attr->tfc_pad || - attr->hard_limit_pkts || - (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT))) - return ERR_PTR(-EOPNOTSUPP); - - if (attr->keymat->protocol != - IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM) - return ERR_PTR(-EOPNOTSUPP); - - aes_gcm = &attr->keymat->keymat.aes_gcm; - - if (aes_gcm->icv_len != 16 || - aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ) - return ERR_PTR(-EOPNOTSUPP); - - action = kmalloc(sizeof(*action), GFP_KERNEL); - if (!action) - return ERR_PTR(-ENOMEM); - - action->esp_aes_gcm.ib_flags = attr->flags; - memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key, - sizeof(accel_attrs.keymat.aes_gcm.aes_key)); - accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8; - memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt, - sizeof(accel_attrs.keymat.aes_gcm.salt)); - memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv, - sizeof(accel_attrs.keymat.aes_gcm.seq_iv)); - accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8; - accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ; - accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM; - - accel_attrs.esn = attr->esn; - if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED; - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; - - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT) - accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT; - - action->esp_aes_gcm.ctx = - mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags); - if (IS_ERR(action->esp_aes_gcm.ctx)) { - err = PTR_ERR(action->esp_aes_gcm.ctx); - goto err_parse; - } - - action->esp_aes_gcm.ib_flags = attr->flags; - - return &action->ib_action; - -err_parse: - kfree(action); - return ERR_PTR(err); -} - -static int -mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_flow_action *maction = to_mflow_act(action); - struct mlx5_accel_esp_xfrm_attrs accel_attrs; - int err = 0; - - if (attr->keymat || attr->replay || attr->encap || - attr->spi || attr->seq || attr->tfc_pad || - attr->hard_limit_pkts || - (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | - IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS | - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))) - return -EOPNOTSUPP; - - /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can - * be modified. - */ - if (!(maction->esp_aes_gcm.ib_flags & - IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) && - attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)) - return -EINVAL; - - memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs, - sizeof(accel_attrs)); - - accel_attrs.esn = attr->esn; - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; - else - accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; - - err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx, - &accel_attrs); - if (err) - return err; - - maction->esp_aes_gcm.ib_flags &= - ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; - maction->esp_aes_gcm.ib_flags |= - attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; - - return 0; -} - static void destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) { switch (maction->flow_action_raw.sub_type) { @@ -1906,13 +1709,6 @@ static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action) struct mlx5_ib_flow_action *maction = to_mflow_act(action); switch (action->type) { - case IB_FLOW_ACTION_ESP: - /* - * We only support aes_gcm by now, so we implicitly know this is - * the underline crypto. - */ - mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx); - break; case IB_FLOW_ACTION_UNSPECIFIED: destroy_flow_action_raw(maction); break; @@ -1937,7 +1733,7 @@ mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type, *namespace = MLX5_FLOW_NAMESPACE_EGRESS; break; case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB: - *namespace = MLX5_FLOW_NAMESPACE_FDB; + *namespace = MLX5_FLOW_NAMESPACE_FDB_BYPASS; break; case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX: *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX; @@ -2029,8 +1825,8 @@ static int get_dests(struct uverbs_attr_bundle *attrs, } /* Allow only DEVX object, drop as dest for FDB */ - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !(dest_devx || - (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP))) + if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS && + !(dest_devx || (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP))) return -EINVAL; /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */ @@ -2050,7 +1846,7 @@ static int get_dests(struct uverbs_attr_bundle *attrs, if (!is_flow_dest(devx_obj, dest_id, dest_type)) return -EINVAL; /* Allow only flow table as dest when inserting to FDB or RDMA_RX */ - if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB || + if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS || fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) return -EINVAL; @@ -2230,6 +2026,23 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject, return 0; } +static int steering_anchor_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_steering_anchor *obj = uobject->object; + + if (atomic_read(&obj->usecnt)) + return -EBUSY; + + mutex_lock(&obj->dev->flow_db->lock); + put_flow_table(obj->dev, obj->ft_prio, true); + mutex_unlock(&obj->dev->flow_db->lock); + + kfree(obj); + return 0; +} + static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs, struct mlx5_ib_flow_matcher *obj) { @@ -2265,12 +2078,10 @@ static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs, if (err) return err; - if (flags) { - mlx5_ib_ft_type_to_namespace( + if (flags) + return mlx5_ib_ft_type_to_namespace( MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX, &obj->ns_type); - return 0; - } } obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS; @@ -2320,7 +2131,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( if (err) goto end; - if (obj->ns_type == MLX5_FLOW_NAMESPACE_FDB && + if (obj->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS && mlx5_eswitch_mode(dev->mdev) != MLX5_ESWITCH_OFFLOADS) { err = -EINVAL; goto end; @@ -2336,6 +2147,75 @@ end: return err; } +static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE); + struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); + enum mlx5_ib_uapi_flow_table_type ib_uapi_ft_type; + enum mlx5_flow_namespace_type ns_type; + struct mlx5_ib_steering_anchor *obj; + struct mlx5_ib_flow_prio *ft_prio; + u16 priority; + u32 ft_id; + int err; + + if (!capable(CAP_NET_RAW)) + return -EPERM; + + err = uverbs_get_const(&ib_uapi_ft_type, attrs, + MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE); + if (err) + return err; + + err = mlx5_ib_ft_type_to_namespace(ib_uapi_ft_type, &ns_type); + if (err) + return err; + + err = uverbs_copy_from(&priority, attrs, + MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY); + if (err) + return err; + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) + return -ENOMEM; + + mutex_lock(&dev->flow_db->lock); + ft_prio = _get_flow_table(dev, priority, ns_type, 0); + if (IS_ERR(ft_prio)) { + mutex_unlock(&dev->flow_db->lock); + err = PTR_ERR(ft_prio); + goto free_obj; + } + + ft_prio->refcount++; + ft_id = mlx5_flow_table_id(ft_prio->flow_table); + mutex_unlock(&dev->flow_db->lock); + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID, + &ft_id, sizeof(ft_id)); + if (err) + goto put_flow_table; + + uobj->object = obj; + obj->dev = dev; + obj->ft_prio = ft_prio; + atomic_set(&obj->usecnt, 0); + + return 0; + +put_flow_table: + mutex_lock(&dev->flow_db->lock); + put_flow_table(dev, ft_prio, true); + mutex_unlock(&dev->flow_db->lock); +free_obj: + kfree(obj); + + return err; +} + static struct ib_flow_action * mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev, enum mlx5_ib_uapi_flow_table_type ft_type, @@ -2692,6 +2572,35 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER, &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE), &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY)); +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_STEERING_ANCHOR_CREATE, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE, + MLX5_IB_OBJECT_STEERING_ANCHOR, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE, + enum mlx5_ib_uapi_flow_table_type, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY, + UVERBS_ATTR_TYPE(u16), + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_STEERING_ANCHOR_DESTROY_HANDLE, + MLX5_IB_OBJECT_STEERING_ANCHOR, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT( + MLX5_IB_OBJECT_STEERING_ANCHOR, + UVERBS_TYPE_ALLOC_IDR(steering_anchor_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE), + &UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY)); + const struct uapi_definition mlx5_ib_flow_defs[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED( MLX5_IB_OBJECT_FLOW_MATCHER), @@ -2700,6 +2609,9 @@ const struct uapi_definition mlx5_ib_flow_defs[] = { &mlx5_ib_fs), UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, &mlx5_ib_flow_actions), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_STEERING_ANCHOR, + UAPI_DEF_IS_OBJ_SUPPORTED(mlx5_ib_shared_ft_allowed)), {}, }; @@ -2709,11 +2621,6 @@ static const struct ib_device_ops flow_ops = { .destroy_flow_action = mlx5_ib_destroy_flow_action, }; -static const struct ib_device_ops flow_ipsec_ops = { - .create_flow_action_esp = mlx5_ib_create_flow_action_esp, - .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp, -}; - int mlx5_ib_fs_init(struct mlx5_ib_dev *dev) { dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL); @@ -2724,9 +2631,5 @@ int mlx5_ib_fs_init(struct mlx5_ib_dev *dev) mutex_init(&dev->flow_db->lock); ib_set_device_ops(&dev->ib_dev, &flow_ops); - if (mlx5_accel_ipsec_device_caps(dev->mdev) & - MLX5_ACCEL_IPSEC_CAP_DEVICE) - ib_set_device_ops(&dev->ib_dev, &flow_ipsec_ops); - return 0; } diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 3ad8f637c589..b804f2dd5628 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -100,7 +100,7 @@ int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp, port_type) == MLX5_CAP_PORT_TYPE_IB) num_qps = pd->device->attrs.max_pkeys; else if (dev->lag_active) - num_qps = MLX5_MAX_PORTS; + num_qps = dev->lag_ports; } gsi = &mqp->gsi; diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c index f2f62875d072..afeb5e53254f 100644 --- a/drivers/infiniband/hw/mlx5/ib_virt.c +++ b/drivers/infiniband/hw/mlx5/ib_virt.c @@ -30,7 +30,6 @@ * SOFTWARE. */ -#include <linux/module.h> #include <linux/mlx5/vport.h> #include "mlx5_ib.h" diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index ec242a5a17a3..9c8a7b206dcf 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -147,6 +147,28 @@ static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt, vl_15_dropped); } +static int query_ib_ppcnt(struct mlx5_core_dev *dev, u8 port_num, void *out, + size_t sz) +{ + u32 *in; + int err; + + in = kvzalloc(sz, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + return err; + } + + MLX5_SET(ppcnt_reg, in, local_port, port_num); + + MLX5_SET(ppcnt_reg, in, grp, MLX5_INFINIBAND_PORT_COUNTERS_GROUP); + err = mlx5_core_access_reg(dev, in, sz, out, + sz, MLX5_REG_PPCNT, 0, 0); + + kvfree(in); + return err; +} + static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num, const struct ib_mad *in_mad, struct ib_mad *out_mad) { @@ -166,6 +188,12 @@ static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num, mdev = dev->mdev; mdev_port_num = 1; } + if (MLX5_CAP_GEN(dev->mdev, num_ports) == 1) { + /* set local port to one for Function-Per-Port HCA. */ + mdev = dev->mdev; + mdev_port_num = 1; + } + /* Declaring support of extended counters */ if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) { struct ib_class_port_info cpi = {}; @@ -202,8 +230,7 @@ static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num, goto done; } - err = mlx5_core_query_ib_ppcnt(mdev, mdev_port_num, - out_cnt, sz); + err = query_ib_ppcnt(mdev, mdev_port_num, out_cnt, sz); if (!err) pma_cnt_assign(pma_cnt, out_cnt); } @@ -291,7 +318,7 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, unsigned int port) if (!in_mad || !out_mad) goto out; - init_query_mad(in_mad); + ib_init_query_mad(in_mad); in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); @@ -318,7 +345,7 @@ static int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, if (!in_mad) return -ENOMEM; - init_query_mad(in_mad); + ib_init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, @@ -405,7 +432,7 @@ int mlx5_query_mad_ifc_node_desc(struct mlx5_ib_dev *dev, char *node_desc) if (!in_mad || !out_mad) goto out; - init_query_mad(in_mad); + ib_init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); @@ -430,7 +457,7 @@ int mlx5_query_mad_ifc_node_guid(struct mlx5_ib_dev *dev, __be64 *node_guid) if (!in_mad || !out_mad) goto out; - init_query_mad(in_mad); + ib_init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); @@ -456,7 +483,7 @@ int mlx5_query_mad_ifc_pkey(struct ib_device *ibdev, u32 port, u16 index, if (!in_mad || !out_mad) goto out; - init_query_mad(in_mad); + ib_init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; in_mad->attr_mod = cpu_to_be32(index / 32); @@ -485,7 +512,7 @@ int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u32 port, int index, if (!in_mad || !out_mad) goto out; - init_query_mad(in_mad); + ib_init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); @@ -496,7 +523,7 @@ int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u32 port, int index, memcpy(gid->raw, out_mad->data + 8, 8); - init_query_mad(in_mad); + ib_init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; in_mad->attr_mod = cpu_to_be32(index / 8); @@ -530,7 +557,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u32 port, /* props being zeroed by the caller, avoid zeroing it here */ - init_query_mad(in_mad); + ib_init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); @@ -584,6 +611,11 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u32 port, props->port_cap_flags2 & IB_PORT_LINK_SPEED_HDR_SUP) props->active_speed = IB_SPEED_HDR; break; + case 8: + if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP && + props->port_cap_flags2 & IB_PORT_LINK_SPEED_NDR_SUP) + props->active_speed = IB_SPEED_NDR; + break; } } @@ -591,7 +623,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u32 port, if (props->active_speed == 4) { if (dev->port_caps[port - 1].ext_port_cap & MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) { - init_query_mad(in_mad); + ib_init_query_mad(in_mad); in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 5ec8bd2f0b2f..c669ef6e47e7 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -26,7 +26,7 @@ #include <linux/mlx5/eswitch.h> #include <linux/list.h> #include <rdma/ib_smi.h> -#include <rdma/ib_umem.h> +#include <rdma/ib_umem_odp.h> #include <rdma/lag.h> #include <linux/in.h> #include <linux/etherdevice.h> @@ -41,12 +41,11 @@ #include "wr.h" #include "restrack.h" #include "counters.h" -#include <linux/mlx5/accel.h> +#include "umr.h" #include <rdma/uverbs_std_types.h> #include <rdma/uverbs_ioctl.h> #include <rdma/mlx5_user_ioctl_verbs.h> #include <rdma/mlx5_user_ioctl_cmds.h> -#include <rdma/ib_umem_odp.h> #define UVERBS_MODULE_NAME mlx5_ib #include <rdma/uverbs_named_ioctl.h> @@ -855,13 +854,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, IB_DEVICE_MEM_WINDOW_TYPE_2B; props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); /* We support 'Gappy' memory registration too */ - props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG; + props->kernel_cap_flags |= IBK_SG_GAPS_REG; } /* IB_WR_REG_MR always requires changing the entity size with UMR */ if (!MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; if (MLX5_CAP_GEN(mdev, sho)) { - props->device_cap_flags |= IB_DEVICE_INTEGRITY_HANDOVER; + props->kernel_cap_flags |= IBK_INTEGRITY_HANDOVER; /* At this stage no support for signature handover */ props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 | IB_PROT_T10DIF_TYPE_2 | @@ -870,7 +869,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, IB_GUARD_T10DIF_CSUM; } if (MLX5_CAP_GEN(mdev, block_lb_mc)) - props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; + props->kernel_cap_flags |= IBK_BLOCK_MULTICAST_LOOPBACK; if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && raw_support) { if (MLX5_CAP_ETH(mdev, csum_cap)) { @@ -906,10 +905,6 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_RX_HASH_SRC_PORT_UDP | MLX5_RX_HASH_DST_PORT_UDP | MLX5_RX_HASH_INNER; - if (mlx5_accel_ipsec_device_caps(dev->mdev) & - MLX5_ACCEL_IPSEC_CAP_DEVICE) - resp.rss_caps.rx_hash_fields_mask |= - MLX5_RX_HASH_IPSEC_SPI; resp.response_length += sizeof(resp.rss_caps); } } else { @@ -921,7 +916,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; - props->device_cap_flags |= IB_DEVICE_UD_TSO; + props->kernel_cap_flags |= IBK_UD_TSO; } if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) && @@ -997,7 +992,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { if (dev->odp_caps.general_caps & IB_ODP_SUPPORT) - props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; + props->kernel_cap_flags |= IBK_ON_DEMAND_PAGING; props->odp_caps = dev->odp_caps; if (!uhw) { /* ODP for kernel QPs is not implemented for receive @@ -1018,11 +1013,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (MLX5_CAP_GEN(mdev, cd)) - props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL; - if (mlx5_core_is_vf(mdev)) - props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION; + props->kernel_cap_flags |= IBK_VIRTUAL_FUNCTION; if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET && raw_support) { @@ -1791,23 +1783,6 @@ static int set_ucontext_resp(struct ib_ucontext *uctx, resp->num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1; - - if (mlx5_accel_ipsec_device_caps(dev->mdev) & - MLX5_ACCEL_IPSEC_CAP_DEVICE) { - if (mlx5_get_flow_namespace(dev->mdev, - MLX5_FLOW_NAMESPACE_EGRESS)) - resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM; - if (mlx5_accel_ipsec_device_caps(dev->mdev) & - MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA) - resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA; - if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) - resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING; - if (mlx5_accel_ipsec_device_caps(dev->mdev) & - MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN) - resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN; - /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */ - } - resp->tot_bfregs = bfregi->lib_uar_dyn ? 0 : bfregi->total_num_bfregs - bfregi->num_dyn_bfregs; resp->num_ports = dev->num_ports; @@ -1850,6 +1825,9 @@ static int set_ucontext_resp(struct ib_ucontext *uctx, if (MLX5_CAP_GEN(dev->mdev, drain_sigerr)) resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_SQD2RTS; + resp->comp_mask |= + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_MKEY_UPDATE_TAG; + return 0; } @@ -2762,26 +2740,24 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev) int err; int port; - for (port = 1; port <= ARRAY_SIZE(dev->port_caps); port++) { - dev->port_caps[port - 1].has_smi = false; - if (MLX5_CAP_GEN(dev->mdev, port_type) == - MLX5_CAP_PORT_TYPE_IB) { - if (MLX5_CAP_GEN(dev->mdev, ib_virt)) { - err = mlx5_query_hca_vport_context(dev->mdev, 0, - port, 0, - &vport_ctx); - if (err) { - mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n", - port, err); - return err; - } - dev->port_caps[port - 1].has_smi = - vport_ctx.has_smi; - } else { - dev->port_caps[port - 1].has_smi = true; - } + if (MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_IB) + return 0; + + for (port = 1; port <= dev->num_ports; port++) { + if (!MLX5_CAP_GEN(dev->mdev, ib_virt)) { + dev->port_caps[port - 1].has_smi = true; + continue; } + err = mlx5_query_hca_vport_context(dev->mdev, 0, port, 0, + &vport_ctx); + if (err) { + mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n", + port, err); + return err; + } + dev->port_caps[port - 1].has_smi = vport_ctx.has_smi; } + return 0; } @@ -3013,6 +2989,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) } dev->flow_db->lag_demux_ft = ft; + dev->lag_ports = mlx5_lag_get_num_ports(mdev); dev->lag_active = true; return 0; @@ -3605,13 +3582,6 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_UAR, &UVERBS_METHOD(MLX5_IB_METHOD_UAR_OBJ_DESTROY)); ADD_UVERBS_ATTRIBUTES_SIMPLE( - mlx5_ib_flow_action, - UVERBS_OBJECT_FLOW_ACTION, - UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, - UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, - enum mlx5_ib_uapi_flow_action_flags)); - -ADD_UVERBS_ATTRIBUTES_SIMPLE( mlx5_ib_query_context, UVERBS_OBJECT_DEVICE, UVERBS_METHOD_QUERY_CONTEXT, @@ -3628,8 +3598,6 @@ static const struct uapi_definition mlx5_ib_defs[] = { UAPI_DEF_CHAIN(mlx5_ib_std_types_defs), UAPI_DEF_CHAIN(mlx5_ib_dm_defs), - UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, - &mlx5_ib_flow_action), UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DEVICE, &mlx5_ib_query_context), UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR, UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)), @@ -4034,16 +4002,11 @@ static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) { int err; - err = mlx5_mr_cache_cleanup(dev); + err = mlx5_mkey_cache_cleanup(dev); if (err) mlx5_ib_warn(dev, "mr cache cleanup failed\n"); - if (dev->umrc.qp) - ib_destroy_qp(dev->umrc.qp); - if (dev->umrc.cq) - ib_free_cq(dev->umrc.cq); - if (dev->umrc.pd) - ib_dealloc_pd(dev->umrc.pd); + mlx5r_umr_resource_cleanup(dev); } static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) @@ -4051,112 +4014,19 @@ static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) ib_unregister_device(&dev->ib_dev); } -enum { - MAX_UMR_WR = 128, -}; - static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev) { - struct ib_qp_init_attr *init_attr = NULL; - struct ib_qp_attr *attr = NULL; - struct ib_pd *pd; - struct ib_cq *cq; - struct ib_qp *qp; int ret; - attr = kzalloc(sizeof(*attr), GFP_KERNEL); - init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL); - if (!attr || !init_attr) { - ret = -ENOMEM; - goto error_0; - } - - pd = ib_alloc_pd(&dev->ib_dev, 0); - if (IS_ERR(pd)) { - mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); - ret = PTR_ERR(pd); - goto error_0; - } - - cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); - if (IS_ERR(cq)) { - mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); - ret = PTR_ERR(cq); - goto error_2; - } - - init_attr->send_cq = cq; - init_attr->recv_cq = cq; - init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; - init_attr->cap.max_send_wr = MAX_UMR_WR; - init_attr->cap.max_send_sge = 1; - init_attr->qp_type = MLX5_IB_QPT_REG_UMR; - init_attr->port_num = 1; - qp = ib_create_qp(pd, init_attr); - if (IS_ERR(qp)) { - mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); - ret = PTR_ERR(qp); - goto error_3; - } - - attr->qp_state = IB_QPS_INIT; - attr->port_num = 1; - ret = ib_modify_qp(qp, attr, - IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT); - if (ret) { - mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); - goto error_4; - } - - memset(attr, 0, sizeof(*attr)); - attr->qp_state = IB_QPS_RTR; - attr->path_mtu = IB_MTU_256; - - ret = ib_modify_qp(qp, attr, IB_QP_STATE); - if (ret) { - mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); - goto error_4; - } - - memset(attr, 0, sizeof(*attr)); - attr->qp_state = IB_QPS_RTS; - ret = ib_modify_qp(qp, attr, IB_QP_STATE); - if (ret) { - mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); - goto error_4; - } - - dev->umrc.qp = qp; - dev->umrc.cq = cq; - dev->umrc.pd = pd; + ret = mlx5r_umr_resource_init(dev); + if (ret) + return ret; - sema_init(&dev->umrc.sem, MAX_UMR_WR); - ret = mlx5_mr_cache_init(dev); + ret = mlx5_mkey_cache_init(dev); if (ret) { mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); - goto error_4; + mlx5r_umr_resource_cleanup(dev); } - - kfree(attr); - kfree(init_attr); - - return 0; - -error_4: - ib_destroy_qp(qp); - dev->umrc.qp = NULL; - -error_3: - ib_free_cq(cq); - dev->umrc.cq = NULL; - -error_2: - ib_dealloc_pd(pd); - dev->umrc.pd = NULL; - -error_0: - kfree(attr); - kfree(init_attr); return ret; } @@ -4178,7 +4048,7 @@ static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev) if (!mlx5_debugfs_root) return 0; - root = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root); + root = debugfs_create_dir("delay_drop", mlx5_debugfs_get_dev_root(dev->mdev)); dev->delay_drop.dir_debugfs = root; debugfs_create_atomic_t("num_timeout_events", 0400, root, @@ -4422,7 +4292,7 @@ static int mlx5r_mp_probe(struct auxiliary_device *adev, } mutex_unlock(&mlx5_ib_multiport_mutex); - dev_set_drvdata(&adev->dev, mpi); + auxiliary_set_drvdata(adev, mpi); return 0; } @@ -4430,7 +4300,7 @@ static void mlx5r_mp_remove(struct auxiliary_device *adev) { struct mlx5_ib_multiport_info *mpi; - mpi = dev_get_drvdata(&adev->dev); + mpi = auxiliary_get_drvdata(adev); mutex_lock(&mlx5_ib_multiport_mutex); if (mpi->ibdev) mlx5_ib_unbind_slave_port(mpi->ibdev, mpi); @@ -4468,7 +4338,7 @@ static int mlx5r_probe(struct auxiliary_device *adev, dev->mdev = mdev; dev->num_ports = num_ports; - if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_is_roce_init_enabled(mdev)) + if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_get_roce_state(mdev)) profile = &raw_eth_profile; else profile = &pf_profile; @@ -4480,7 +4350,7 @@ static int mlx5r_probe(struct auxiliary_device *adev, return ret; } - dev_set_drvdata(&adev->dev, dev); + auxiliary_set_drvdata(adev, dev); return 0; } @@ -4488,7 +4358,7 @@ static void mlx5r_remove(struct auxiliary_device *adev) { struct mlx5_ib_dev *dev; - dev = dev_get_drvdata(&adev->dev); + dev = auxiliary_get_drvdata(adev); __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); } diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 844545064c9e..96ffbbaf0a73 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -30,8 +30,6 @@ * SOFTWARE. */ -#include <linux/module.h> -#include <rdma/ib_umem.h> #include <rdma/ib_umem_odp.h> #include "mlx5_ib.h" #include <linux/jiffies.h> @@ -153,6 +151,7 @@ static int post_send_nop(struct mlx5_ib_dev *dev, struct ib_qp *ibqp, u64 wr_id, for (i = 0; i < 8; i++) mlx5_write64(&mmio_wqe[i * 2], bf->bfreg->map + bf->offset + i * 8); + io_stop_wc(); bf->offset ^= bf->buf_size; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 4a7a56ed740b..4a7f7064bd0e 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -232,6 +232,7 @@ enum { #define MLX5_IB_NUM_FLOW_FT (MLX5_IB_FLOW_LEFTOVERS_PRIO + 1) #define MLX5_IB_NUM_SNIFFER_FTS 2 #define MLX5_IB_NUM_EGRESS_FTS 1 +#define MLX5_IB_NUM_FDB_FTS MLX5_BY_PASS_NUM_REGULAR_PRIOS struct mlx5_ib_flow_prio { struct mlx5_flow_table *flow_table; unsigned int refcount; @@ -258,6 +259,12 @@ struct mlx5_ib_flow_matcher { u8 match_criteria_enable; }; +struct mlx5_ib_steering_anchor { + struct mlx5_ib_flow_prio *ft_prio; + struct mlx5_ib_dev *dev; + atomic_t usecnt; +}; + struct mlx5_ib_pp { u16 index; struct mlx5_core_dev *mdev; @@ -276,7 +283,7 @@ struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio egress_prios[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS]; struct mlx5_ib_flow_prio egress[MLX5_IB_NUM_EGRESS_FTS]; - struct mlx5_ib_flow_prio fdb; + struct mlx5_ib_flow_prio fdb[MLX5_IB_NUM_FDB_FTS]; struct mlx5_ib_flow_prio rdma_rx[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio opfcs[MLX5_IB_OPCOUNTER_MAX]; @@ -290,16 +297,9 @@ struct mlx5_ib_flow_db { }; /* Use macros here so that don't have to duplicate - * enum ib_send_flags and enum ib_qp_type for low-level driver + * enum ib_qp_type for low-level driver */ -#define MLX5_IB_SEND_UMR_ENABLE_MR (IB_SEND_RESERVED_START << 0) -#define MLX5_IB_SEND_UMR_DISABLE_MR (IB_SEND_RESERVED_START << 1) -#define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 2) -#define MLX5_IB_SEND_UMR_UPDATE_XLT (IB_SEND_RESERVED_START << 3) -#define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (IB_SEND_RESERVED_START << 4) -#define MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS IB_SEND_RESERVED_END - #define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1 /* * IB_QPT_GSI creates the software wrapper around GSI, and MLX5_IB_QPT_HW_GSI @@ -310,9 +310,6 @@ struct mlx5_ib_flow_db { #define MLX5_IB_QPT_DCT IB_QPT_RESERVED4 #define MLX5_IB_WR_UMR IB_WR_RESERVED1 -#define MLX5_IB_UMR_OCTOWORD 16 -#define MLX5_IB_UMR_XLT_ALIGNMENT 64 - #define MLX5_IB_UPD_XLT_ZAP BIT(0) #define MLX5_IB_UPD_XLT_ENABLE BIT(1) #define MLX5_IB_UPD_XLT_ATOMIC BIT(2) @@ -538,24 +535,6 @@ struct mlx5_ib_cq_buf { int nent; }; -struct mlx5_umr_wr { - struct ib_send_wr wr; - u64 virt_addr; - u64 offset; - struct ib_pd *pd; - unsigned int page_shift; - unsigned int xlt_size; - u64 length; - int access_flags; - u32 mkey; - u8 ignore_free_state:1; -}; - -static inline const struct mlx5_umr_wr *umr_wr(const struct ib_send_wr *wr) -{ - return container_of(wr, struct mlx5_umr_wr, wr); -} - enum mlx5_ib_cq_pr_flags { MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD = 1 << 0, MLX5_IB_CQ_PR_FLAGS_REAL_TIME_TS = 1 << 1, @@ -640,6 +619,7 @@ struct mlx5_ib_mkey { unsigned int ndescs; struct wait_queue_head wait; refcount_t usecount; + struct mlx5_cache_ent *cache_ent; }; #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE) @@ -662,20 +642,10 @@ struct mlx5_ib_mr { struct ib_mr ibmr; struct mlx5_ib_mkey mmkey; - /* User MR data */ - struct mlx5_cache_ent *cache_ent; + struct ib_umem *umem; - /* This is zero'd when the MR is allocated */ union { - /* Used only while the MR is in the cache */ - struct { - u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; - struct mlx5_async_work cb_work; - /* Cache list element */ - struct list_head list; - }; - - /* Used only by kernel MRs */ + /* Used only by kernel MRs (umem == NULL) */ struct { void *descs; void *descs_alloc; @@ -696,9 +666,8 @@ struct mlx5_ib_mr { int data_length; }; - /* Used only by User MRs */ + /* Used only by User MRs (umem != NULL) */ struct { - struct ib_umem *umem; unsigned int page_shift; /* Current access_flags */ int access_flags; @@ -715,12 +684,6 @@ struct mlx5_ib_mr { }; }; -/* Zero the fields in the mr that are variant depending on usage */ -static inline void mlx5_clear_mr(struct mlx5_ib_mr *mr) -{ - memset(mr->out, 0, sizeof(*mr) - offsetof(struct mlx5_ib_mr, out)); -} - static inline bool is_odp_mr(struct mlx5_ib_mr *mr) { return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem && @@ -744,56 +707,67 @@ struct mlx5_ib_umr_context { struct completion done; }; +enum { + MLX5_UMR_STATE_UNINIT, + MLX5_UMR_STATE_ACTIVE, + MLX5_UMR_STATE_RECOVER, + MLX5_UMR_STATE_ERR, +}; + struct umr_common { struct ib_pd *pd; struct ib_cq *cq; struct ib_qp *qp; - /* control access to UMR QP + /* Protects from UMR QP overflow */ struct semaphore sem; + /* Protects from using UMR while the UMR is not active + */ + struct mutex lock; + unsigned int state; }; struct mlx5_cache_ent { - struct list_head head; - /* sync access to the cahce entry - */ - spinlock_t lock; - + struct xarray mkeys; + unsigned long stored; + unsigned long reserved; char name[4]; u32 order; - u32 xlt; u32 access_mode; u32 page; + unsigned int ndescs; u8 disabled:1; u8 fill_to_high_water:1; /* - * - available_mrs is the length of list head, ie the number of MRs - * available for immediate allocation. - * - total_mrs is available_mrs plus all in use MRs that could be - * returned to the cache. - * - limit is the low water mark for available_mrs, 2* limit is the + * - limit is the low water mark for stored mkeys, 2* limit is the * upper water mark. - * - pending is the number of MRs currently being created */ - u32 total_mrs; - u32 available_mrs; + u32 in_use; u32 limit; - u32 pending; /* Statistics */ u32 miss; struct mlx5_ib_dev *dev; - struct work_struct work; struct delayed_work dwork; }; -struct mlx5_mr_cache { +struct mlx5r_async_create_mkey { + union { + u32 in[MLX5_ST_SZ_BYTES(create_mkey_in)]; + u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; + }; + struct mlx5_async_work cb_work; + struct mlx5_cache_ent *ent; + u32 mkey; +}; + +struct mlx5_mkey_cache { struct workqueue_struct *wq; - struct mlx5_cache_ent ent[MAX_MR_CACHE_ENTRIES]; + struct mlx5_cache_ent ent[MAX_MKEY_CACHE_ENTRIES]; struct dentry *root; unsigned long last_add; }; @@ -1092,7 +1066,7 @@ struct mlx5_ib_dev { struct mlx5_ib_resources devr; atomic_t mkey_var; - struct mlx5_mr_cache cache; + struct mlx5_mkey_cache cache; struct timer_list delay_timer; /* Prevents soft lock on massive reg MRs */ struct mutex slow_path_mutex; @@ -1131,6 +1105,7 @@ struct mlx5_ib_dev { struct xarray sig_mrs; struct mlx5_port_caps port_caps[MLX5_MAX_PORTS]; u16 pkey_table_len; + u8 lag_ports; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -1290,9 +1265,6 @@ int mlx5_ib_advise_mr(struct ib_pd *pd, struct uverbs_attr_bundle *attrs); int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); int mlx5_ib_dealloc_mw(struct ib_mw *mw); -int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, - int page_shift, int flags); -int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags); struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, int access_flags); void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); @@ -1339,11 +1311,12 @@ void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas, u64 access_flags); void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); int mlx5_ib_get_cqe_size(struct ib_cq *ibcq); -int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); -int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); +int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev); +int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev); struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - unsigned int entry, int access_flags); + struct mlx5_cache_ent *ent, + int access_flags); int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status); @@ -1367,7 +1340,7 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq); void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev); int __init mlx5_ib_odp_init(void); void mlx5_ib_odp_cleanup(void); -void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent); +void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent); void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, struct mlx5_ib_mr *mr, int flags); @@ -1386,7 +1359,7 @@ static inline int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {} static inline int mlx5_ib_odp_init(void) { return 0; } static inline void mlx5_ib_odp_cleanup(void) {} -static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {} +static inline void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent) {} static inline void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, struct mlx5_ib_mr *mr, int flags) {} @@ -1465,22 +1438,11 @@ extern const struct uapi_definition mlx5_ib_flow_defs[]; extern const struct uapi_definition mlx5_ib_qos_defs[]; extern const struct uapi_definition mlx5_ib_std_types_defs[]; -static inline void init_query_mad(struct ib_smp *mad) -{ - mad->base_version = 1; - mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; - mad->class_version = 1; - mad->method = IB_MGMT_METHOD_GET; -} - static inline int is_qp1(enum ib_qp_type qp_type) { return qp_type == MLX5_IB_QPT_HW_GSI || qp_type == IB_QPT_GSI; } -#define MLX5_MAX_UMR_SHIFT 16 -#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT) - static inline u32 check_cq_create_flags(u32 flags) { /* @@ -1546,71 +1508,12 @@ static inline int get_uars_per_sys_page(struct mlx5_ib_dev *dev, bool lib_suppor MLX5_UARS_IN_PAGE : 1; } -static inline int get_num_static_uars(struct mlx5_ib_dev *dev, - struct mlx5_bfreg_info *bfregi) -{ - return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages; -} - extern void *xlt_emergency_page; int bfregn_to_uar_index(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, u32 bfregn, bool dyn_bfreg); -static inline bool mlx5_ib_can_load_pas_with_umr(struct mlx5_ib_dev *dev, - size_t length) -{ - /* - * umr_check_mkey_mask() rejects MLX5_MKEY_MASK_PAGE_SIZE which is - * always set if MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (aka - * MLX5_IB_UPD_XLT_ADDR and MLX5_IB_UPD_XLT_ENABLE) is set. Thus, a mkey - * can never be enabled without this capability. Simplify this weird - * quirky hardware by just saying it can't use PAS lists with UMR at - * all. - */ - if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) - return false; - - /* - * length is the size of the MR in bytes when mlx5_ib_update_xlt() is - * used. - */ - if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) && - length >= MLX5_MAX_UMR_PAGES * PAGE_SIZE) - return false; - return true; -} - -/* - * true if an existing MR can be reconfigured to new access_flags using UMR. - * Older HW cannot use UMR to update certain elements of the MKC. See - * umr_check_mkey_mask(), get_umr_update_access_mask() and umr_check_mkey_mask() - */ -static inline bool mlx5_ib_can_reconfig_with_umr(struct mlx5_ib_dev *dev, - unsigned int current_access_flags, - unsigned int target_access_flags) -{ - unsigned int diffs = current_access_flags ^ target_access_flags; - - if ((diffs & IB_ACCESS_REMOTE_ATOMIC) && - MLX5_CAP_GEN(dev->mdev, atomic) && - MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) - return false; - - if ((diffs & IB_ACCESS_RELAXED_ORDERING) && - MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) && - !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) - return false; - - if ((diffs & IB_ACCESS_RELAXED_ORDERING) && - MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && - !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) - return false; - - return true; -} - static inline int mlx5r_store_odp_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mkey *mmkey) { @@ -1638,6 +1541,18 @@ int mlx5_ib_test_wc(struct mlx5_ib_dev *dev); static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev) { + /* + * If the driver is in hash mode and the port_select_flow_table_bypass cap + * is supported, it means that the driver no longer needs to assign the port + * affinity by default. If a user wants to set the port affinity explicitly, + * the user has a dedicated API to do that, so there is no need to assign + * the port affinity by default. + */ + if (dev->lag_active && + mlx5_lag_mode_is_hash(dev->mdev) && + MLX5_CAP_PORT_SELECTION(dev->mdev, port_select_flow_table_bypass)) + return 0; + return dev->lag_active || (MLX5_CAP_GEN(dev->mdev, num_lag_ports) > 1 && MLX5_CAP_GEN(dev->mdev, lag_tx_port_affinity)); @@ -1648,4 +1563,40 @@ static inline bool rt_supported(int ts_cap) return ts_cap == MLX5_TIMESTAMP_FORMAT_CAP_REAL_TIME || ts_cap == MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME; } + +/* + * PCI Peer to Peer is a trainwreck. If no switch is present then things + * sometimes work, depending on the pci_distance_p2p logic for excluding broken + * root complexes. However if a switch is present in the path, then things get + * really ugly depending on how the switch is setup. This table assumes that the + * root complex is strict and is validating that all req/reps are matches + * perfectly - so any scenario where it sees only half the transaction is a + * failure. + * + * CR/RR/DT ATS RO P2P + * 00X X X OK + * 010 X X fails (request is routed to root but root never sees comp) + * 011 0 X fails (request is routed to root but root never sees comp) + * 011 1 X OK + * 10X X 1 OK + * 101 X 0 fails (completion is routed to root but root didn't see req) + * 110 X 0 SLOW + * 111 0 0 SLOW + * 111 1 0 fails (completion is routed to root but root didn't see req) + * 111 1 1 OK + * + * Unfortunately we cannot reliably know if a switch is present or what the + * CR/RR/DT ACS settings are, as in a VM that is all hidden. Assume that + * CR/RR/DT is 111 if the ATS cap is enabled and follow the last three rows. + * + * For now assume if the umem is a dma_buf then it is P2P. + */ +static inline bool mlx5_umem_needs_ats(struct mlx5_ib_dev *dev, + struct ib_umem *umem, int access_flags) +{ + if (!MLX5_CAP_GEN(dev->mdev, ats) || !umem->is_dmabuf) + return false; + return access_flags & IB_ACCESS_RELAXED_ORDERING; +} + #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 63e2129f1142..410cc5fd2523 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -39,18 +39,10 @@ #include <linux/delay.h> #include <linux/dma-buf.h> #include <linux/dma-resv.h> -#include <rdma/ib_umem.h> #include <rdma/ib_umem_odp.h> -#include <rdma/ib_verbs.h> #include "dm.h" #include "mlx5_ib.h" - -/* - * We can't use an array for xlt_emergency_page because dma_map_single doesn't - * work on kernel modules memory - */ -void *xlt_emergency_page; -static DEFINE_MUTEX(xlt_emergency_page_mutex); +#include "umr.h" enum { MAX_PENDING_REG_MR = 8, @@ -68,7 +60,6 @@ static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, struct ib_pd *pd) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - bool ro_pci_enabled = pcie_relaxed_ordering_enabled(dev->mdev->pdev); MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); @@ -76,27 +67,27 @@ static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); MLX5_SET(mkc, mkc, lr, 1); - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) - MLX5_SET(mkc, mkc, relaxed_ordering_write, - (acc & IB_ACCESS_RELAXED_ORDERING) && ro_pci_enabled); - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) - MLX5_SET(mkc, mkc, relaxed_ordering_read, - (acc & IB_ACCESS_RELAXED_ORDERING) && ro_pci_enabled); + if ((acc & IB_ACCESS_RELAXED_ORDERING) && + pcie_relaxed_ordering_enabled(dev->mdev->pdev)) { + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) + MLX5_SET(mkc, mkc, relaxed_ordering_write, 1); + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) + MLX5_SET(mkc, mkc, relaxed_ordering_read, 1); + } MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET64(mkc, mkc, start_addr, start_addr); } -static void assign_mkey_variant(struct mlx5_ib_dev *dev, - struct mlx5_ib_mkey *mkey, u32 *in) +static void assign_mkey_variant(struct mlx5_ib_dev *dev, u32 *mkey, u32 *in) { u8 key = atomic_inc_return(&dev->mkey_var); void *mkc; mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, mkey_7_0, key); - mkey->key = key; + *mkey = key; } static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, @@ -104,7 +95,7 @@ static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, { int ret; - assign_mkey_variant(dev, mkey, in); + assign_mkey_variant(dev, &mkey->key, in); ret = mlx5_core_create_mkey(dev->mdev, &mkey->key, in, inlen); if (!ret) init_waitqueue_head(&mkey->wait); @@ -112,27 +103,23 @@ static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, return ret; } -static int -mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev, - struct mlx5_ib_mkey *mkey, - struct mlx5_async_ctx *async_ctx, - u32 *in, int inlen, u32 *out, int outlen, - struct mlx5_async_work *context) +static int mlx5_ib_create_mkey_cb(struct mlx5r_async_create_mkey *async_create) { - MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); - assign_mkey_variant(dev, mkey, in); - return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, - create_mkey_callback, context); + struct mlx5_ib_dev *dev = async_create->ent->dev; + size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + size_t outlen = MLX5_ST_SZ_BYTES(create_mkey_out); + + MLX5_SET(create_mkey_in, async_create->in, opcode, + MLX5_CMD_OP_CREATE_MKEY); + assign_mkey_variant(dev, &async_create->mkey, async_create->in); + return mlx5_cmd_exec_cb(&dev->async_ctx, async_create->in, inlen, + async_create->out, outlen, create_mkey_callback, + &async_create->cb_work); } -static int mr_cache_max_order(struct mlx5_ib_dev *dev); +static int mkey_cache_max_order(struct mlx5_ib_dev *dev); static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent); -static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) -{ - return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled); -} - static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key))); @@ -140,186 +127,277 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) return mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key); } +static void create_mkey_warn(struct mlx5_ib_dev *dev, int status, void *out) +{ + if (status == -ENXIO) /* core driver is not available */ + return; + + mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); + if (status != -EREMOTEIO) /* driver specific failure */ + return; + + /* Failed in FW, print cmd out failure details */ + mlx5_cmd_out_err(dev->mdev, MLX5_CMD_OP_CREATE_MKEY, 0, out); +} + + +static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings, + void *to_store) +{ + XA_STATE(xas, &ent->mkeys, 0); + void *curr; + + xa_lock_irq(&ent->mkeys); + if (limit_pendings && + (ent->reserved - ent->stored) > MAX_PENDING_REG_MR) { + xa_unlock_irq(&ent->mkeys); + return -EAGAIN; + } + while (1) { + /* + * This is cmpxchg (NULL, XA_ZERO_ENTRY) however this version + * doesn't transparently unlock. Instead we set the xas index to + * the current value of reserved every iteration. + */ + xas_set(&xas, ent->reserved); + curr = xas_load(&xas); + if (!curr) { + if (to_store && ent->stored == ent->reserved) + xas_store(&xas, to_store); + else + xas_store(&xas, XA_ZERO_ENTRY); + if (xas_valid(&xas)) { + ent->reserved++; + if (to_store) { + if (ent->stored != ent->reserved) + __xa_store(&ent->mkeys, + ent->stored, + to_store, + GFP_KERNEL); + ent->stored++; + queue_adjust_cache_locked(ent); + WRITE_ONCE(ent->dev->cache.last_add, + jiffies); + } + } + } + xa_unlock_irq(&ent->mkeys); + + /* + * Notice xas_nomem() must always be called as it cleans + * up any cached allocation. + */ + if (!xas_nomem(&xas, GFP_KERNEL)) + break; + xa_lock_irq(&ent->mkeys); + } + if (xas_error(&xas)) + return xas_error(&xas); + if (WARN_ON(curr)) + return -EINVAL; + return 0; +} + +static void undo_push_reserve_mkey(struct mlx5_cache_ent *ent) +{ + void *old; + + ent->reserved--; + old = __xa_erase(&ent->mkeys, ent->reserved); + WARN_ON(old); +} + +static void push_to_reserved(struct mlx5_cache_ent *ent, u32 mkey) +{ + void *old; + + old = __xa_store(&ent->mkeys, ent->stored, xa_mk_value(mkey), 0); + WARN_ON(old); + ent->stored++; +} + +static u32 pop_stored_mkey(struct mlx5_cache_ent *ent) +{ + void *old, *xa_mkey; + + ent->stored--; + ent->reserved--; + + if (ent->stored == ent->reserved) { + xa_mkey = __xa_erase(&ent->mkeys, ent->stored); + WARN_ON(!xa_mkey); + return (u32)xa_to_value(xa_mkey); + } + + xa_mkey = __xa_store(&ent->mkeys, ent->stored, XA_ZERO_ENTRY, + GFP_KERNEL); + WARN_ON(!xa_mkey || xa_is_err(xa_mkey)); + old = __xa_erase(&ent->mkeys, ent->reserved); + WARN_ON(old); + return (u32)xa_to_value(xa_mkey); +} + static void create_mkey_callback(int status, struct mlx5_async_work *context) { - struct mlx5_ib_mr *mr = - container_of(context, struct mlx5_ib_mr, cb_work); - struct mlx5_cache_ent *ent = mr->cache_ent; + struct mlx5r_async_create_mkey *mkey_out = + container_of(context, struct mlx5r_async_create_mkey, cb_work); + struct mlx5_cache_ent *ent = mkey_out->ent; struct mlx5_ib_dev *dev = ent->dev; unsigned long flags; if (status) { - mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); - kfree(mr); - spin_lock_irqsave(&ent->lock, flags); - ent->pending--; + create_mkey_warn(dev, status, mkey_out->out); + kfree(mkey_out); + xa_lock_irqsave(&ent->mkeys, flags); + undo_push_reserve_mkey(ent); WRITE_ONCE(dev->fill_delay, 1); - spin_unlock_irqrestore(&ent->lock, flags); + xa_unlock_irqrestore(&ent->mkeys, flags); mod_timer(&dev->delay_timer, jiffies + HZ); return; } - mr->mmkey.type = MLX5_MKEY_MR; - mr->mmkey.key |= mlx5_idx_to_mkey( - MLX5_GET(create_mkey_out, mr->out, mkey_index)); - init_waitqueue_head(&mr->mmkey.wait); - + mkey_out->mkey |= mlx5_idx_to_mkey( + MLX5_GET(create_mkey_out, mkey_out->out, mkey_index)); WRITE_ONCE(dev->cache.last_add, jiffies); - spin_lock_irqsave(&ent->lock, flags); - list_add_tail(&mr->list, &ent->head); - ent->available_mrs++; - ent->total_mrs++; + xa_lock_irqsave(&ent->mkeys, flags); + push_to_reserved(ent, mkey_out->mkey); /* If we are doing fill_to_high_water then keep going. */ queue_adjust_cache_locked(ent); - ent->pending--; - spin_unlock_irqrestore(&ent->lock, flags); + xa_unlock_irqrestore(&ent->mkeys, flags); + kfree(mkey_out); } -static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) +static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs) { - struct mlx5_ib_mr *mr; + int ret = 0; - mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) - return NULL; - mr->cache_ent = ent; + switch (access_mode) { + case MLX5_MKC_ACCESS_MODE_MTT: + ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD / + sizeof(struct mlx5_mtt)); + break; + case MLX5_MKC_ACCESS_MODE_KSM: + ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD / + sizeof(struct mlx5_klm)); + break; + default: + WARN_ON(1); + } + return ret; +} +static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc) +{ set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd); MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, umr_en, 1); MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7); - MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); + MLX5_SET(mkc, mkc, translations_octword_size, + get_mkc_octo_size(ent->access_mode, ent->ndescs)); MLX5_SET(mkc, mkc, log_page_size, ent->page); - return mr; } /* Asynchronously schedule new MRs to be populated in the cache. */ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) { - size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - struct mlx5_ib_mr *mr; + struct mlx5r_async_create_mkey *async_create; void *mkc; - u32 *in; int err = 0; int i; - in = kzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; - - mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); for (i = 0; i < num; i++) { - mr = alloc_cache_mr(ent, mkc); - if (!mr) { - err = -ENOMEM; - break; - } - spin_lock_irq(&ent->lock); - if (ent->pending >= MAX_PENDING_REG_MR) { - err = -EAGAIN; - spin_unlock_irq(&ent->lock); - kfree(mr); - break; - } - ent->pending++; - spin_unlock_irq(&ent->lock); - err = mlx5_ib_create_mkey_cb(ent->dev, &mr->mmkey, - &ent->dev->async_ctx, in, inlen, - mr->out, sizeof(mr->out), - &mr->cb_work); + async_create = kzalloc(sizeof(struct mlx5r_async_create_mkey), + GFP_KERNEL); + if (!async_create) + return -ENOMEM; + mkc = MLX5_ADDR_OF(create_mkey_in, async_create->in, + memory_key_mkey_entry); + set_cache_mkc(ent, mkc); + async_create->ent = ent; + + err = push_mkey(ent, true, NULL); + if (err) + goto free_async_create; + + err = mlx5_ib_create_mkey_cb(async_create); if (err) { - spin_lock_irq(&ent->lock); - ent->pending--; - spin_unlock_irq(&ent->lock); mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err); - kfree(mr); - break; + goto err_undo_reserve; } } - kfree(in); + return 0; + +err_undo_reserve: + xa_lock_irq(&ent->mkeys); + undo_push_reserve_mkey(ent); + xa_unlock_irq(&ent->mkeys); +free_async_create: + kfree(async_create); return err; } /* Synchronously create a MR in the cache */ -static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent) +static int create_cache_mkey(struct mlx5_cache_ent *ent, u32 *mkey) { size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - struct mlx5_ib_mr *mr; void *mkc; u32 *in; int err; in = kzalloc(inlen, GFP_KERNEL); if (!in) - return ERR_PTR(-ENOMEM); + return -ENOMEM; mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + set_cache_mkc(ent, mkc); - mr = alloc_cache_mr(ent, mkc); - if (!mr) { - err = -ENOMEM; - goto free_in; - } - - err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey.key, in, inlen); + err = mlx5_core_create_mkey(ent->dev->mdev, mkey, in, inlen); if (err) - goto free_mr; + goto free_in; - init_waitqueue_head(&mr->mmkey.wait); - mr->mmkey.type = MLX5_MKEY_MR; WRITE_ONCE(ent->dev->cache.last_add, jiffies); - spin_lock_irq(&ent->lock); - ent->total_mrs++; - spin_unlock_irq(&ent->lock); - kfree(in); - return mr; -free_mr: - kfree(mr); free_in: kfree(in); - return ERR_PTR(err); + return err; } static void remove_cache_mr_locked(struct mlx5_cache_ent *ent) { - struct mlx5_ib_mr *mr; + u32 mkey; - lockdep_assert_held(&ent->lock); - if (list_empty(&ent->head)) + lockdep_assert_held(&ent->mkeys.xa_lock); + if (!ent->stored) return; - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); - list_del(&mr->list); - ent->available_mrs--; - ent->total_mrs--; - spin_unlock_irq(&ent->lock); - mlx5_core_destroy_mkey(ent->dev->mdev, mr->mmkey.key); - kfree(mr); - spin_lock_irq(&ent->lock); + mkey = pop_stored_mkey(ent); + xa_unlock_irq(&ent->mkeys); + mlx5_core_destroy_mkey(ent->dev->mdev, mkey); + xa_lock_irq(&ent->mkeys); } static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target, bool limit_fill) + __acquires(&ent->mkeys) __releases(&ent->mkeys) { int err; - lockdep_assert_held(&ent->lock); + lockdep_assert_held(&ent->mkeys.xa_lock); while (true) { if (limit_fill) target = ent->limit * 2; - if (target == ent->available_mrs + ent->pending) + if (target == ent->reserved) return 0; - if (target > ent->available_mrs + ent->pending) { - u32 todo = target - (ent->available_mrs + ent->pending); + if (target > ent->reserved) { + u32 todo = target - ent->reserved; - spin_unlock_irq(&ent->lock); + xa_unlock_irq(&ent->mkeys); err = add_keys(ent, todo); if (err == -EAGAIN) usleep_range(3000, 5000); - spin_lock_irq(&ent->lock); + xa_lock_irq(&ent->mkeys); if (err) { if (err != -EAGAIN) return err; @@ -344,15 +422,15 @@ static ssize_t size_write(struct file *filp, const char __user *buf, /* * Target is the new value of total_mrs the user requests, however we - * cannot free MRs that are in use. Compute the target value for - * available_mrs. + * cannot free MRs that are in use. Compute the target value for stored + * mkeys. */ - spin_lock_irq(&ent->lock); - if (target < ent->total_mrs - ent->available_mrs) { + xa_lock_irq(&ent->mkeys); + if (target < ent->in_use) { err = -EINVAL; goto err_unlock; } - target = target - (ent->total_mrs - ent->available_mrs); + target = target - ent->in_use; if (target < ent->limit || target > ent->limit*2) { err = -EINVAL; goto err_unlock; @@ -360,12 +438,12 @@ static ssize_t size_write(struct file *filp, const char __user *buf, err = resize_available_mrs(ent, target, false); if (err) goto err_unlock; - spin_unlock_irq(&ent->lock); + xa_unlock_irq(&ent->mkeys); return count; err_unlock: - spin_unlock_irq(&ent->lock); + xa_unlock_irq(&ent->mkeys); return err; } @@ -376,7 +454,7 @@ static ssize_t size_read(struct file *filp, char __user *buf, size_t count, char lbuf[20]; int err; - err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->total_mrs); + err = snprintf(lbuf, sizeof(lbuf), "%ld\n", ent->stored + ent->in_use); if (err < 0) return err; @@ -405,10 +483,10 @@ static ssize_t limit_write(struct file *filp, const char __user *buf, * Upon set we immediately fill the cache to high water mark implied by * the limit. */ - spin_lock_irq(&ent->lock); + xa_lock_irq(&ent->mkeys); ent->limit = var; err = resize_available_mrs(ent, 0, true); - spin_unlock_irq(&ent->lock); + xa_unlock_irq(&ent->mkeys); if (err) return err; return count; @@ -435,17 +513,17 @@ static const struct file_operations limit_fops = { .read = limit_read, }; -static bool someone_adding(struct mlx5_mr_cache *cache) +static bool someone_adding(struct mlx5_mkey_cache *cache) { unsigned int i; - for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) { struct mlx5_cache_ent *ent = &cache->ent[i]; bool ret; - spin_lock_irq(&ent->lock); - ret = ent->available_mrs < ent->limit; - spin_unlock_irq(&ent->lock); + xa_lock_irq(&ent->mkeys); + ret = ent->stored < ent->limit; + xa_unlock_irq(&ent->mkeys); if (ret) return true; } @@ -459,55 +537,54 @@ static bool someone_adding(struct mlx5_mr_cache *cache) */ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) { - lockdep_assert_held(&ent->lock); + lockdep_assert_held(&ent->mkeys.xa_lock); if (ent->disabled || READ_ONCE(ent->dev->fill_delay)) return; - if (ent->available_mrs < ent->limit) { + if (ent->stored < ent->limit) { ent->fill_to_high_water = true; - queue_work(ent->dev->cache.wq, &ent->work); + mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); } else if (ent->fill_to_high_water && - ent->available_mrs + ent->pending < 2 * ent->limit) { + ent->reserved < 2 * ent->limit) { /* * Once we start populating due to hitting a low water mark * continue until we pass the high water mark. */ - queue_work(ent->dev->cache.wq, &ent->work); - } else if (ent->available_mrs == 2 * ent->limit) { + mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); + } else if (ent->stored == 2 * ent->limit) { ent->fill_to_high_water = false; - } else if (ent->available_mrs > 2 * ent->limit) { + } else if (ent->stored > 2 * ent->limit) { /* Queue deletion of excess entries */ ent->fill_to_high_water = false; - if (ent->pending) + if (ent->stored != ent->reserved) queue_delayed_work(ent->dev->cache.wq, &ent->dwork, msecs_to_jiffies(1000)); else - queue_work(ent->dev->cache.wq, &ent->work); + mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); } } static void __cache_work_func(struct mlx5_cache_ent *ent) { struct mlx5_ib_dev *dev = ent->dev; - struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_mkey_cache *cache = &dev->cache; int err; - spin_lock_irq(&ent->lock); + xa_lock_irq(&ent->mkeys); if (ent->disabled) goto out; - if (ent->fill_to_high_water && - ent->available_mrs + ent->pending < 2 * ent->limit && + if (ent->fill_to_high_water && ent->reserved < 2 * ent->limit && !READ_ONCE(dev->fill_delay)) { - spin_unlock_irq(&ent->lock); + xa_unlock_irq(&ent->mkeys); err = add_keys(ent, 1); - spin_lock_irq(&ent->lock); + xa_lock_irq(&ent->mkeys); if (ent->disabled) goto out; if (err) { /* - * EAGAIN only happens if pending is positive, so we - * will be rescheduled from reg_mr_callback(). The only + * EAGAIN only happens if there are pending MRs, so we + * will be rescheduled when storing them. The only * failure path here is ENOMEM. */ if (err != -EAGAIN) { @@ -519,7 +596,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) msecs_to_jiffies(1000)); } } - } else if (ent->available_mrs > 2 * ent->limit) { + } else if (ent->stored > 2 * ent->limit) { bool need_delay; /* @@ -534,20 +611,22 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) * the garbage collection work to try to run in next cycle, in * order to free CPU resources to other tasks. */ - spin_unlock_irq(&ent->lock); + xa_unlock_irq(&ent->mkeys); need_delay = need_resched() || someone_adding(cache) || !time_after(jiffies, READ_ONCE(cache->last_add) + 300 * HZ); - spin_lock_irq(&ent->lock); + xa_lock_irq(&ent->mkeys); if (ent->disabled) goto out; - if (need_delay) + if (need_delay) { queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); + goto out; + } remove_cache_mr_locked(ent); queue_adjust_cache_locked(ent); } out: - spin_unlock_irq(&ent->lock); + xa_unlock_irq(&ent->mkeys); } static void delayed_cache_work_func(struct work_struct *work) @@ -558,113 +637,64 @@ static void delayed_cache_work_func(struct work_struct *work) __cache_work_func(ent); } -static void cache_work_func(struct work_struct *work) -{ - struct mlx5_cache_ent *ent; - - ent = container_of(work, struct mlx5_cache_ent, work); - __cache_work_func(ent); -} - -/* Allocate a special entry from the cache */ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - unsigned int entry, int access_flags) + struct mlx5_cache_ent *ent, + int access_flags) { - struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; + int err; - if (WARN_ON(entry <= MR_CACHE_LAST_STD_ENTRY || - entry >= ARRAY_SIZE(cache->ent))) - return ERR_PTR(-EINVAL); - - /* Matches access in alloc_cache_mr() */ - if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) + if (!mlx5r_umr_can_reconfig(dev, 0, access_flags)) return ERR_PTR(-EOPNOTSUPP); - ent = &cache->ent[entry]; - spin_lock_irq(&ent->lock); - if (list_empty(&ent->head)) { - spin_unlock_irq(&ent->lock); - mr = create_cache_mr(ent); - if (IS_ERR(mr)) - return mr; - } else { - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); - list_del(&mr->list); - ent->available_mrs--; - queue_adjust_cache_locked(ent); - spin_unlock_irq(&ent->lock); - - mlx5_clear_mr(mr); - } - mr->access_flags = access_flags; - return mr; -} + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); -/* Return a MR already available in the cache */ -static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent) -{ - struct mlx5_ib_mr *mr = NULL; - struct mlx5_cache_ent *ent = req_ent; + xa_lock_irq(&ent->mkeys); + ent->in_use++; - spin_lock_irq(&ent->lock); - if (!list_empty(&ent->head)) { - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); - list_del(&mr->list); - ent->available_mrs--; + if (!ent->stored) { queue_adjust_cache_locked(ent); - spin_unlock_irq(&ent->lock); - mlx5_clear_mr(mr); - return mr; + ent->miss++; + xa_unlock_irq(&ent->mkeys); + err = create_cache_mkey(ent, &mr->mmkey.key); + if (err) { + xa_lock_irq(&ent->mkeys); + ent->in_use--; + xa_unlock_irq(&ent->mkeys); + kfree(mr); + return ERR_PTR(err); + } + } else { + mr->mmkey.key = pop_stored_mkey(ent); + queue_adjust_cache_locked(ent); + xa_unlock_irq(&ent->mkeys); } - queue_adjust_cache_locked(ent); - spin_unlock_irq(&ent->lock); - req_ent->miss++; - return NULL; -} - -static void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) -{ - struct mlx5_cache_ent *ent = mr->cache_ent; - - spin_lock_irq(&ent->lock); - list_add_tail(&mr->list, &ent->head); - ent->available_mrs++; - queue_adjust_cache_locked(ent); - spin_unlock_irq(&ent->lock); + mr->mmkey.cache_ent = ent; + mr->mmkey.type = MLX5_MKEY_MR; + init_waitqueue_head(&mr->mmkey.wait); + return mr; } static void clean_keys(struct mlx5_ib_dev *dev, int c) { - struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_mkey_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; - struct mlx5_ib_mr *tmp_mr; - struct mlx5_ib_mr *mr; - LIST_HEAD(del_list); + u32 mkey; cancel_delayed_work(&ent->dwork); - while (1) { - spin_lock_irq(&ent->lock); - if (list_empty(&ent->head)) { - spin_unlock_irq(&ent->lock); - break; - } - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); - list_move(&mr->list, &del_list); - ent->available_mrs--; - ent->total_mrs--; - spin_unlock_irq(&ent->lock); - mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key); - } - - list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { - list_del(&mr->list); - kfree(mr); + xa_lock_irq(&ent->mkeys); + while (ent->stored) { + mkey = pop_stored_mkey(ent); + xa_unlock_irq(&ent->mkeys); + mlx5_core_destroy_mkey(dev->mdev, mkey); + xa_lock_irq(&ent->mkeys); } + xa_unlock_irq(&ent->mkeys); } -static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) { if (!mlx5_debugfs_root || dev->is_rep) return; @@ -673,9 +703,9 @@ static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) dev->cache.root = NULL; } -static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) +static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev) { - struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_mkey_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; struct dentry *dir; int i; @@ -683,15 +713,15 @@ static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) if (!mlx5_debugfs_root || dev->is_rep) return; - cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root); + cache->root = debugfs_create_dir("mr_cache", mlx5_debugfs_get_dev_root(dev->mdev)); - for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) { ent = &cache->ent[i]; sprintf(ent->name, "%d", ent->order); dir = debugfs_create_dir(ent->name, cache->root); debugfs_create_file("size", 0600, dir, ent, &size_fops); debugfs_create_file("limit", 0600, dir, ent, &limit_fops); - debugfs_create_u32("cur", 0400, dir, &ent->available_mrs); + debugfs_create_ulong("cur", 0400, dir, &ent->stored); debugfs_create_u32("miss", 0600, dir, &ent->miss); } } @@ -703,9 +733,9 @@ static void delay_time_func(struct timer_list *t) WRITE_ONCE(dev->fill_delay, 0); } -int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) +int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) { - struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_mkey_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; int i; @@ -718,66 +748,62 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); timer_setup(&dev->delay_timer, delay_time_func, 0); - for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) { ent = &cache->ent[i]; - INIT_LIST_HEAD(&ent->head); - spin_lock_init(&ent->lock); + xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ); ent->order = i + 2; ent->dev = dev; ent->limit = 0; - INIT_WORK(&ent->work, cache_work_func); INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); - if (i > MR_CACHE_LAST_STD_ENTRY) { - mlx5_odp_init_mr_cache_entry(ent); + if (i > MKEY_CACHE_LAST_STD_ENTRY) { + mlx5_odp_init_mkey_cache_entry(ent); continue; } - if (ent->order > mr_cache_max_order(dev)) + if (ent->order > mkey_cache_max_order(dev)) continue; ent->page = PAGE_SHIFT; - ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) / - MLX5_IB_UMR_OCTOWORD; + ent->ndescs = 1 << ent->order; ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && !dev->is_rep && mlx5_core_is_pf(dev->mdev) && - mlx5_ib_can_load_pas_with_umr(dev, 0)) + mlx5r_umr_can_load_pas(dev, 0)) ent->limit = dev->mdev->profile.mr_cache[i].limit; else ent->limit = 0; - spin_lock_irq(&ent->lock); + xa_lock_irq(&ent->mkeys); queue_adjust_cache_locked(ent); - spin_unlock_irq(&ent->lock); + xa_unlock_irq(&ent->mkeys); } - mlx5_mr_cache_debugfs_init(dev); + mlx5_mkey_cache_debugfs_init(dev); return 0; } -int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) +int mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) { unsigned int i; if (!dev->cache.wq) return 0; - for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) { struct mlx5_cache_ent *ent = &dev->cache.ent[i]; - spin_lock_irq(&ent->lock); + xa_lock_irq(&ent->mkeys); ent->disabled = true; - spin_unlock_irq(&ent->lock); - cancel_work_sync(&ent->work); + xa_unlock_irq(&ent->mkeys); cancel_delayed_work_sync(&ent->dwork); } - mlx5_mr_cache_debugfs_cleanup(dev); + mlx5_mkey_cache_debugfs_cleanup(dev); mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); - for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) + for (i = 0; i < MAX_MKEY_CACHE_ENTRIES; i++) clean_keys(dev, i); destroy_workqueue(dev->cache.wq); @@ -844,65 +870,22 @@ static int get_octo_len(u64 addr, u64 len, int page_shift) return (npages + 1) / 2; } -static int mr_cache_max_order(struct mlx5_ib_dev *dev) +static int mkey_cache_max_order(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) - return MR_CACHE_LAST_STD_ENTRY + 2; + return MKEY_CACHE_LAST_STD_ENTRY + 2; return MLX5_MAX_UMR_SHIFT; } -static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) +static struct mlx5_cache_ent *mkey_cache_ent_from_order(struct mlx5_ib_dev *dev, + unsigned int order) { - struct mlx5_ib_umr_context *context = - container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe); - - context->status = wc->status; - complete(&context->done); -} - -static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) -{ - context->cqe.done = mlx5_ib_umr_done; - context->status = -1; - init_completion(&context->done); -} - -static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev, - struct mlx5_umr_wr *umrwr) -{ - struct umr_common *umrc = &dev->umrc; - const struct ib_send_wr *bad; - int err; - struct mlx5_ib_umr_context umr_context; - - mlx5_ib_init_umr_context(&umr_context); - umrwr->wr.wr_cqe = &umr_context.cqe; - - down(&umrc->sem); - err = ib_post_send(umrc->qp, &umrwr->wr, &bad); - if (err) { - mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err); - } else { - wait_for_completion(&umr_context.done); - if (umr_context.status != IB_WC_SUCCESS) { - mlx5_ib_warn(dev, "reg umr failed (%u)\n", - umr_context.status); - err = -EFAULT; - } - } - up(&umrc->sem); - return err; -} - -static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev, - unsigned int order) -{ - struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_mkey_cache *cache = &dev->cache; if (order < cache->ent[0].order) return &cache->ent[0]; order = order - cache->ent[0].order; - if (order > MR_CACHE_LAST_STD_ENTRY) + if (order > MKEY_CACHE_LAST_STD_ENTRY) return NULL; return &cache->ent[order]; } @@ -945,30 +928,24 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, 0, iova); if (WARN_ON(!page_size)) return ERR_PTR(-EINVAL); - ent = mr_cache_ent_from_order( + ent = mkey_cache_ent_from_order( dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size))); /* * Matches access in alloc_cache_mr(). If the MR can't come from the * cache then synchronously create an uncached one. */ if (!ent || ent->limit == 0 || - !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) { + !mlx5r_umr_can_reconfig(dev, 0, access_flags) || + mlx5_umem_needs_ats(dev, umem, access_flags)) { mutex_lock(&dev->slow_path_mutex); mr = reg_create(pd, umem, iova, access_flags, page_size, false); mutex_unlock(&dev->slow_path_mutex); return mr; } - mr = get_cache_mr(ent); - if (!mr) { - mr = create_cache_mr(ent); - /* - * The above already tried to do the same stuff as reg_create(), - * no reason to try it again. - */ - if (IS_ERR(mr)) - return mr; - } + mr = mlx5_mr_cache_alloc(dev, ent, access_flags); + if (IS_ERR(mr)) + return mr; mr->ibmr.pd = pd; mr->umem = umem; @@ -978,289 +955,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, return mr; } -#define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \ - MLX5_UMR_MTT_ALIGNMENT) -#define MLX5_SPARE_UMR_CHUNK 0x10000 - -/* - * Allocate a temporary buffer to hold the per-page information to transfer to - * HW. For efficiency this should be as large as it can be, but buffer - * allocation failure is not allowed, so try smaller sizes. - */ -static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask) -{ - const size_t xlt_chunk_align = - MLX5_UMR_MTT_ALIGNMENT / ent_size; - size_t size; - void *res = NULL; - - static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0); - - /* - * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the - * allocation can't trigger any kind of reclaim. - */ - might_sleep(); - - gfp_mask |= __GFP_ZERO | __GFP_NORETRY; - - /* - * If the system already has a suitable high order page then just use - * that, but don't try hard to create one. This max is about 1M, so a - * free x86 huge page will satisfy it. - */ - size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align), - MLX5_MAX_UMR_CHUNK); - *nents = size / ent_size; - res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, - get_order(size)); - if (res) - return res; - - if (size > MLX5_SPARE_UMR_CHUNK) { - size = MLX5_SPARE_UMR_CHUNK; - *nents = size / ent_size; - res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, - get_order(size)); - if (res) - return res; - } - - *nents = PAGE_SIZE / ent_size; - res = (void *)__get_free_page(gfp_mask); - if (res) - return res; - - mutex_lock(&xlt_emergency_page_mutex); - memset(xlt_emergency_page, 0, PAGE_SIZE); - return xlt_emergency_page; -} - -static void mlx5_ib_free_xlt(void *xlt, size_t length) -{ - if (xlt == xlt_emergency_page) { - mutex_unlock(&xlt_emergency_page_mutex); - return; - } - - free_pages((unsigned long)xlt, get_order(length)); -} - -/* - * Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for - * submission. - */ -static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr, - struct mlx5_umr_wr *wr, struct ib_sge *sg, - size_t nents, size_t ent_size, - unsigned int flags) -{ - struct mlx5_ib_dev *dev = mr_to_mdev(mr); - struct device *ddev = &dev->mdev->pdev->dev; - dma_addr_t dma; - void *xlt; - - xlt = mlx5_ib_alloc_xlt(&nents, ent_size, - flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : - GFP_KERNEL); - sg->length = nents * ent_size; - dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE); - if (dma_mapping_error(ddev, dma)) { - mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); - mlx5_ib_free_xlt(xlt, sg->length); - return NULL; - } - sg->addr = dma; - sg->lkey = dev->umrc.pd->local_dma_lkey; - - memset(wr, 0, sizeof(*wr)); - wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; - if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) - wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; - wr->wr.sg_list = sg; - wr->wr.num_sge = 1; - wr->wr.opcode = MLX5_IB_WR_UMR; - wr->pd = mr->ibmr.pd; - wr->mkey = mr->mmkey.key; - wr->length = mr->ibmr.length; - wr->virt_addr = mr->ibmr.iova; - wr->access_flags = mr->access_flags; - wr->page_shift = mr->page_shift; - wr->xlt_size = sg->length; - return xlt; -} - -static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt, - struct ib_sge *sg) -{ - struct device *ddev = &dev->mdev->pdev->dev; - - dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE); - mlx5_ib_free_xlt(xlt, sg->length); -} - -static unsigned int xlt_wr_final_send_flags(unsigned int flags) -{ - unsigned int res = 0; - - if (flags & MLX5_IB_UPD_XLT_ENABLE) - res |= MLX5_IB_SEND_UMR_ENABLE_MR | - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | - MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; - if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS) - res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; - if (flags & MLX5_IB_UPD_XLT_ADDR) - res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; - return res; -} - -int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, - int page_shift, int flags) -{ - struct mlx5_ib_dev *dev = mr_to_mdev(mr); - struct device *ddev = &dev->mdev->pdev->dev; - void *xlt; - struct mlx5_umr_wr wr; - struct ib_sge sg; - int err = 0; - int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT) - ? sizeof(struct mlx5_klm) - : sizeof(struct mlx5_mtt); - const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size; - const int page_mask = page_align - 1; - size_t pages_mapped = 0; - size_t pages_to_map = 0; - size_t pages_iter; - size_t size_to_map = 0; - size_t orig_sg_length; - - if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && - !umr_can_use_indirect_mkey(dev)) - return -EPERM; - - if (WARN_ON(!mr->umem->is_odp)) - return -EINVAL; - - /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, - * so we need to align the offset and length accordingly - */ - if (idx & page_mask) { - npages += idx & page_mask; - idx &= ~page_mask; - } - pages_to_map = ALIGN(npages, page_align); - - xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags); - if (!xlt) - return -ENOMEM; - pages_iter = sg.length / desc_size; - orig_sg_length = sg.length; - - if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { - struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - size_t max_pages = ib_umem_odp_num_pages(odp) - idx; - - pages_to_map = min_t(size_t, pages_to_map, max_pages); - } - - wr.page_shift = page_shift; - - for (pages_mapped = 0; - pages_mapped < pages_to_map && !err; - pages_mapped += pages_iter, idx += pages_iter) { - npages = min_t(int, pages_iter, pages_to_map - pages_mapped); - size_to_map = npages * desc_size; - dma_sync_single_for_cpu(ddev, sg.addr, sg.length, - DMA_TO_DEVICE); - mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); - dma_sync_single_for_device(ddev, sg.addr, sg.length, - DMA_TO_DEVICE); - - sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT); - - if (pages_mapped + pages_iter >= pages_to_map) - wr.wr.send_flags |= xlt_wr_final_send_flags(flags); - - wr.offset = idx * desc_size; - wr.xlt_size = sg.length; - - err = mlx5_ib_post_send_wait(dev, &wr); - } - sg.length = orig_sg_length; - mlx5_ib_unmap_free_xlt(dev, xlt, &sg); - return err; -} - -/* - * Send the DMA list to the HW for a normal MR using UMR. - * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP - * flag may be used. - */ -int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) -{ - struct mlx5_ib_dev *dev = mr_to_mdev(mr); - struct device *ddev = &dev->mdev->pdev->dev; - struct ib_block_iter biter; - struct mlx5_mtt *cur_mtt; - struct mlx5_umr_wr wr; - size_t orig_sg_length; - struct mlx5_mtt *mtt; - size_t final_size; - struct ib_sge sg; - int err = 0; - - if (WARN_ON(mr->umem->is_odp)) - return -EINVAL; - - mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, - ib_umem_num_dma_blocks(mr->umem, - 1 << mr->page_shift), - sizeof(*mtt), flags); - if (!mtt) - return -ENOMEM; - orig_sg_length = sg.length; - - cur_mtt = mtt; - rdma_for_each_block (mr->umem->sgt_append.sgt.sgl, &biter, - mr->umem->sgt_append.sgt.nents, - BIT(mr->page_shift)) { - if (cur_mtt == (void *)mtt + sg.length) { - dma_sync_single_for_device(ddev, sg.addr, sg.length, - DMA_TO_DEVICE); - err = mlx5_ib_post_send_wait(dev, &wr); - if (err) - goto err; - dma_sync_single_for_cpu(ddev, sg.addr, sg.length, - DMA_TO_DEVICE); - wr.offset += sg.length; - cur_mtt = mtt; - } - - cur_mtt->ptag = - cpu_to_be64(rdma_block_iter_dma_address(&biter) | - MLX5_IB_MTT_PRESENT); - - if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP)) - cur_mtt->ptag = 0; - - cur_mtt++; - } - - final_size = (void *)cur_mtt - (void *)mtt; - sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT); - memset(cur_mtt, 0, sg.length - final_size); - wr.wr.send_flags |= xlt_wr_final_send_flags(flags); - wr.xlt_size = sg.length; - - dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); - err = mlx5_ib_post_send_wait(dev, &wr); - -err: - sg.length = orig_sg_length; - mlx5_ib_unmap_free_xlt(dev, mtt, &sg); - return err; -} - /* * If ibmr is NULL it will be allocated by reg_create. * Else, the given ibmr will be used. @@ -1323,6 +1017,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, MLX5_SET(mkc, mkc, translations_octword_size, get_octo_len(iova, umem->length, mr->page_shift)); MLX5_SET(mkc, mkc, log_page_size, mr->page_shift); + if (mlx5_umem_needs_ats(dev, umem, access_flags)) + MLX5_SET(mkc, mkc, ma_translation_mode, 1); if (populate) { MLX5_SET(create_mkey_in, in, translations_octword_actual_size, get_octo_len(iova, umem->length, mr->page_shift)); @@ -1430,6 +1126,7 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, break; case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM: if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS) return ERR_PTR(-EINVAL); @@ -1451,7 +1148,7 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem, bool xlt_with_umr; int err; - xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length); + xlt_with_umr = mlx5r_umr_can_load_pas(dev, umem->length); if (xlt_with_umr) { mr = alloc_cacheable_mr(pd, umem, iova, access_flags); } else { @@ -1477,7 +1174,7 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem, * configured properly but left disabled. It is safe to go ahead * and configure it again via UMR while enabling it. */ - err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE); + err = mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE); if (err) { mlx5_ib_dereg_mr(&mr->ibmr, NULL); return ERR_PTR(err); @@ -1514,7 +1211,7 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length, } /* ODP requires xlt update via umr to work. */ - if (!mlx5_ib_can_load_pas_with_umr(dev, length)) + if (!mlx5r_umr_can_load_pas(dev, length)) return ERR_PTR(-EINVAL); odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags, @@ -1576,7 +1273,7 @@ static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach) if (!umem_dmabuf->sgt) return; - mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP); + mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP); ib_umem_dmabuf_unmap_pages(umem_dmabuf); } @@ -1604,7 +1301,7 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset, offset, virt_addr, length, fd, access_flags); /* dmabuf requires xlt update via umr to work. */ - if (!mlx5_ib_can_load_pas_with_umr(dev, length)) + if (!mlx5r_umr_can_load_pas(dev, length)) return ERR_PTR(-EINVAL); umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, offset, length, fd, @@ -1641,31 +1338,6 @@ err_dereg_mr: return ERR_PTR(err); } -/** - * revoke_mr - Fence all DMA on the MR - * @mr: The MR to fence - * - * Upon return the NIC will not be doing any DMA to the pages under the MR, - * and any DMA in progress will be completed. Failure of this function - * indicates the HW has failed catastrophically. - */ -static int revoke_mr(struct mlx5_ib_mr *mr) -{ - struct mlx5_umr_wr umrwr = {}; - - if (mr_to_mdev(mr)->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) - return 0; - - umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR | - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; - umrwr.wr.opcode = MLX5_IB_WR_UMR; - umrwr.pd = mr_to_mdev(mr)->umrc.pd; - umrwr.mkey = mr->mmkey.key; - umrwr.ignore_free_state = 1; - - return mlx5_ib_post_send_wait(mr_to_mdev(mr), &umrwr); -} - /* * True if the change in access flags can be done via UMR, only some access * flags can be updated. @@ -1679,32 +1351,8 @@ static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev, if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING)) return false; - return mlx5_ib_can_reconfig_with_umr(dev, current_access_flags, - target_access_flags); -} - -static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, - int access_flags) -{ - struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); - struct mlx5_umr_wr umrwr = { - .wr = { - .send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS, - .opcode = MLX5_IB_WR_UMR, - }, - .mkey = mr->mmkey.key, - .pd = pd, - .access_flags = access_flags, - }; - int err; - - err = mlx5_ib_post_send_wait(dev, &umrwr); - if (err) - return err; - - mr->access_flags = access_flags; - return 0; + return mlx5r_umr_can_reconfig(dev, current_access_flags, + target_access_flags); } static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, @@ -1715,16 +1363,16 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); /* We only track the allocated sizes of MRs from the cache */ - if (!mr->cache_ent) + if (!mr->mmkey.cache_ent) return false; - if (!mlx5_ib_can_load_pas_with_umr(dev, new_umem->length)) + if (!mlx5r_umr_can_load_pas(dev, new_umem->length)) return false; *page_size = mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova); if (WARN_ON(!*page_size)) return false; - return (1ULL << mr->cache_ent->order) >= + return (1ULL << mr->mmkey.cache_ent->order) >= ib_umem_num_dma_blocks(new_umem, *page_size); } @@ -1742,7 +1390,7 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd, * with it. This ensure the change is atomic relative to any use of the * MR. */ - err = revoke_mr(mr); + err = mlx5r_umr_revoke_mr(mr); if (err) return err; @@ -1755,12 +1403,11 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd, upd_flags |= MLX5_IB_UPD_XLT_ACCESS; } - mr->ibmr.length = new_umem->length; mr->ibmr.iova = iova; mr->ibmr.length = new_umem->length; mr->page_shift = order_base_2(page_size); mr->umem = new_umem; - err = mlx5_ib_update_mr_pas(mr, upd_flags); + err = mlx5r_umr_update_mr_pas(mr, upd_flags); if (err) { /* * The MR is revoked at this point so there is no issue to free @@ -1807,7 +1454,8 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, /* Fast path for PD/access change */ if (can_use_umr_rereg_access(dev, mr->access_flags, new_access_flags)) { - err = umr_rereg_pd_access(mr, new_pd, new_access_flags); + err = mlx5r_umr_rereg_pd_access(mr, new_pd, + new_access_flags); if (err) return ERR_PTR(err); return NULL; @@ -1820,7 +1468,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, * Only one active MR can refer to a umem at one time, revoke * the old MR before assigning the umem to the new one. */ - err = revoke_mr(mr); + err = mlx5r_umr_revoke_mr(mr); if (err) return ERR_PTR(err); umem = mr->umem; @@ -1904,18 +1552,19 @@ err: return ret; } -static void mlx5_free_priv_descs(struct mlx5_ib_mr *mr) +static void +mlx5_free_priv_descs(struct mlx5_ib_mr *mr) { - struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); - int size = mr->max_descs * mr->desc_size; - - if (!mr->descs) - return; + if (!mr->umem && mr->descs) { + struct ib_device *device = mr->ibmr.device; + int size = mr->max_descs * mr->desc_size; + struct mlx5_ib_dev *dev = to_mdev(device); - dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size, - DMA_TO_DEVICE); - kfree(mr->descs_alloc); - mr->descs = NULL; + dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size, + DMA_TO_DEVICE); + kfree(mr->descs_alloc); + mr->descs = NULL; + } } int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) @@ -1963,15 +1612,17 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) } /* Stop DMA */ - if (mr->cache_ent) { - if (revoke_mr(mr)) { - spin_lock_irq(&mr->cache_ent->lock); - mr->cache_ent->total_mrs--; - spin_unlock_irq(&mr->cache_ent->lock); - mr->cache_ent = NULL; - } + if (mr->mmkey.cache_ent) { + xa_lock_irq(&mr->mmkey.cache_ent->mkeys); + mr->mmkey.cache_ent->in_use--; + xa_unlock_irq(&mr->mmkey.cache_ent->mkeys); + + if (mlx5r_umr_revoke_mr(mr) || + push_mkey(mr->mmkey.cache_ent, false, + xa_mk_value(mr->mmkey.key))) + mr->mmkey.cache_ent = NULL; } - if (!mr->cache_ent) { + if (!mr->mmkey.cache_ent) { rc = destroy_mkey(to_mdev(mr->ibmr.device), mr); if (rc) return rc; @@ -1988,13 +1639,10 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) mlx5_ib_free_odp_mr(mr); } - if (mr->cache_ent) { - mlx5_mr_cache_free(dev, mr); - } else { - if (!udata) - mlx5_free_priv_descs(mr); - kfree(mr); - } + if (!mr->mmkey.cache_ent) + mlx5_free_priv_descs(mr); + + kfree(mr); return 0; } @@ -2079,6 +1727,7 @@ static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd, if (err) goto err_free_in; + mr->umem = NULL; kfree(in); return mr; @@ -2205,6 +1854,7 @@ static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd, } mr->ibmr.device = pd->device; + mr->umem = NULL; switch (mr_type) { case IB_MR_TYPE_MEM_REG: diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 91eb615b89ee..bc97958818bb 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -30,7 +30,6 @@ * SOFTWARE. */ -#include <rdma/ib_umem.h> #include <rdma/ib_umem_odp.h> #include <linux/kernel.h> #include <linux/dma-buf.h> @@ -38,6 +37,7 @@ #include "mlx5_ib.h" #include "cmd.h" +#include "umr.h" #include "qp.h" #include <linux/mlx5/eq.h> @@ -117,7 +117,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, * * xa_store() * mutex_lock(umem_mutex) - * mlx5_ib_update_xlt() + * mlx5r_umr_update_xlt() * mutex_unlock(umem_mutex) * destroy lkey * @@ -198,9 +198,9 @@ static void free_implicit_child_mr_work(struct work_struct *work) mlx5r_deref_wait_odp_mkey(&mr->mmkey); mutex_lock(&odp_imr->umem_mutex); - mlx5_ib_update_xlt(mr->parent, ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT, - 1, 0, - MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); + mlx5r_umr_update_xlt(mr->parent, + ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT, 1, 0, + MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); mlx5_ib_dereg_mr(&mr->ibmr, NULL); @@ -282,19 +282,19 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni, u64 umr_offset = idx & umr_block_mask; if (in_block && umr_offset == 0) { - mlx5_ib_update_xlt(mr, blk_start_idx, - idx - blk_start_idx, 0, - MLX5_IB_UPD_XLT_ZAP | - MLX5_IB_UPD_XLT_ATOMIC); + mlx5r_umr_update_xlt(mr, blk_start_idx, + idx - blk_start_idx, 0, + MLX5_IB_UPD_XLT_ZAP | + MLX5_IB_UPD_XLT_ATOMIC); in_block = 0; } } } if (in_block) - mlx5_ib_update_xlt(mr, blk_start_idx, - idx - blk_start_idx + 1, 0, - MLX5_IB_UPD_XLT_ZAP | - MLX5_IB_UPD_XLT_ATOMIC); + mlx5r_umr_update_xlt(mr, blk_start_idx, + idx - blk_start_idx + 1, 0, + MLX5_IB_UPD_XLT_ZAP | + MLX5_IB_UPD_XLT_ATOMIC); mlx5_update_odp_stats(mr, invalidations, invalidations); @@ -323,8 +323,7 @@ static void internal_fill_odp_caps(struct mlx5_ib_dev *dev) memset(caps, 0, sizeof(*caps)); - if (!MLX5_CAP_GEN(dev->mdev, pg) || - !mlx5_ib_can_load_pas_with_umr(dev, 0)) + if (!MLX5_CAP_GEN(dev->mdev, pg) || !mlx5r_umr_can_load_pas(dev, 0)) return; caps->general_caps = IB_ODP_SUPPORT; @@ -407,6 +406,7 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, unsigned long idx) { + struct mlx5_ib_dev *dev = mr_to_mdev(imr); struct ib_umem_odp *odp; struct mlx5_ib_mr *mr; struct mlx5_ib_mr *ret; @@ -418,13 +418,14 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, if (IS_ERR(odp)) return ERR_CAST(odp); - mr = mlx5_mr_cache_alloc( - mr_to_mdev(imr), MLX5_IMR_MTT_CACHE_ENTRY, imr->access_flags); + mr = mlx5_mr_cache_alloc(dev, &dev->cache.ent[MLX5_IMR_MTT_CACHE_ENTRY], + imr->access_flags); if (IS_ERR(mr)) { ib_umem_odp_release(odp); return mr; } + mr->access_flags = imr->access_flags; mr->ibmr.pd = imr->ibmr.pd; mr->ibmr.device = &mr_to_mdev(imr)->ib_dev; mr->umem = &odp->umem; @@ -440,11 +441,11 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, */ refcount_set(&mr->mmkey.usecount, 2); - err = mlx5_ib_update_xlt(mr, 0, - MLX5_IMR_MTT_ENTRIES, - PAGE_SHIFT, - MLX5_IB_UPD_XLT_ZAP | - MLX5_IB_UPD_XLT_ENABLE); + err = mlx5r_umr_update_xlt(mr, 0, + MLX5_IMR_MTT_ENTRIES, + PAGE_SHIFT, + MLX5_IB_UPD_XLT_ZAP | + MLX5_IB_UPD_XLT_ENABLE); if (err) { ret = ERR_PTR(err); goto out_mr; @@ -485,36 +486,37 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, struct mlx5_ib_mr *imr; int err; - if (!mlx5_ib_can_load_pas_with_umr(dev, - MLX5_IMR_MTT_ENTRIES * PAGE_SIZE)) + if (!mlx5r_umr_can_load_pas(dev, MLX5_IMR_MTT_ENTRIES * PAGE_SIZE)) return ERR_PTR(-EOPNOTSUPP); umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags); if (IS_ERR(umem_odp)) return ERR_CAST(umem_odp); - imr = mlx5_mr_cache_alloc(dev, MLX5_IMR_KSM_CACHE_ENTRY, access_flags); + imr = mlx5_mr_cache_alloc(dev, + &dev->cache.ent[MLX5_IMR_KSM_CACHE_ENTRY], + access_flags); if (IS_ERR(imr)) { ib_umem_odp_release(umem_odp); return imr; } + imr->access_flags = access_flags; imr->ibmr.pd = &pd->ibpd; imr->ibmr.iova = 0; imr->umem = &umem_odp->umem; imr->ibmr.lkey = imr->mmkey.key; imr->ibmr.rkey = imr->mmkey.key; imr->ibmr.device = &dev->ib_dev; - imr->umem = &umem_odp->umem; imr->is_odp_implicit = true; xa_init(&imr->implicit_children); - err = mlx5_ib_update_xlt(imr, 0, - mlx5_imr_ksm_entries, - MLX5_KSM_PAGE_SHIFT, - MLX5_IB_UPD_XLT_INDIRECT | - MLX5_IB_UPD_XLT_ZAP | - MLX5_IB_UPD_XLT_ENABLE); + err = mlx5r_umr_update_xlt(imr, 0, + mlx5_imr_ksm_entries, + MLX5_KSM_PAGE_SHIFT, + MLX5_IB_UPD_XLT_INDIRECT | + MLX5_IB_UPD_XLT_ZAP | + MLX5_IB_UPD_XLT_ENABLE); if (err) goto out_mr; @@ -577,7 +579,7 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, * No need to check whether the MTTs really belong to this MR, since * ib_umem_odp_map_dma_and_lock already checks this. */ - ret = mlx5_ib_update_xlt(mr, start_idx, np, page_shift, xlt_flags); + ret = mlx5r_umr_update_xlt(mr, start_idx, np, page_shift, xlt_flags); mutex_unlock(&odp->umem_mutex); if (ret < 0) { @@ -675,9 +677,9 @@ out: * next pagefault handler will see the new information. */ mutex_lock(&odp_imr->umem_mutex); - err = mlx5_ib_update_xlt(imr, upd_start_idx, upd_len, 0, - MLX5_IB_UPD_XLT_INDIRECT | - MLX5_IB_UPD_XLT_ATOMIC); + err = mlx5r_umr_update_xlt(imr, upd_start_idx, upd_len, 0, + MLX5_IB_UPD_XLT_INDIRECT | + MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); if (err) { mlx5_ib_err(mr_to_mdev(imr), "Failed to update PAS\n"); @@ -711,7 +713,7 @@ static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt, ib_umem_dmabuf_unmap_pages(umem_dmabuf); err = -EINVAL; } else { - err = mlx5_ib_update_mr_pas(mr, xlt_flags); + err = mlx5r_umr_update_mr_pas(mr, xlt_flags); } dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); @@ -792,7 +794,8 @@ static bool mkey_is_eq(struct mlx5_ib_mkey *mmkey, u32 key) { if (!mmkey) return false; - if (mmkey->type == MLX5_MKEY_MW) + if (mmkey->type == MLX5_MKEY_MW || + mmkey->type == MLX5_MKEY_INDIRECT_DEVX) return mlx5_base_mkey(mmkey->key) == mlx5_base_mkey(key); return mmkey->key == key; } @@ -1541,16 +1544,10 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int; param = (struct mlx5_eq_param) { - .irq_index = MLX5_IRQ_EQ_CTRL, .nent = MLX5_IB_NUM_PF_EQE, }; param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_FAULT; - if (!zalloc_cpumask_var(¶m.affinity, GFP_KERNEL)) { - err = -ENOMEM; - goto err_wq; - } eq->core = mlx5_eq_create_generic(dev->mdev, ¶m); - free_cpumask_var(param.affinity); if (IS_ERR(eq->core)) { err = PTR_ERR(eq->core); goto err_wq; @@ -1591,7 +1588,7 @@ mlx5_ib_odp_destroy_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) return err; } -void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) +void mlx5_odp_init_mkey_cache_entry(struct mlx5_cache_ent *ent) { if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) return; @@ -1599,18 +1596,14 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) switch (ent->order - 2) { case MLX5_IMR_MTT_CACHE_ENTRY: ent->page = PAGE_SHIFT; - ent->xlt = MLX5_IMR_MTT_ENTRIES * - sizeof(struct mlx5_mtt) / - MLX5_IB_UMR_OCTOWORD; + ent->ndescs = MLX5_IMR_MTT_ENTRIES; ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; ent->limit = 0; break; case MLX5_IMR_KSM_CACHE_ENTRY: ent->page = MLX5_KSM_PAGE_SHIFT; - ent->xlt = mlx5_imr_ksm_entries * - sizeof(struct mlx5_klm) / - MLX5_IB_UMR_OCTOWORD; + ent->ndescs = mlx5_imr_ksm_entries; ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM; ent->limit = 0; break; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index e5abbcfc1d57..40d9410ec303 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -30,7 +30,7 @@ * SOFTWARE. */ -#include <linux/module.h> +#include <linux/etherdevice.h> #include <rdma/ib_umem.h> #include <rdma/ib_cache.h> #include <rdma/ib_user_verbs.h> @@ -40,6 +40,7 @@ #include "ib_rep.h" #include "counters.h" #include "cmd.h" +#include "umr.h" #include "qp.h" #include "wr.h" @@ -614,7 +615,8 @@ enum { static int max_bfregs(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi) { - return get_num_static_uars(dev, bfregi) * MLX5_NON_FP_BFREGS_PER_UAR; + return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * + bfregi->num_static_sys_pages * MLX5_NON_FP_BFREGS_PER_UAR; } static int num_med_bfreg(struct mlx5_ib_dev *dev, @@ -3906,7 +3908,7 @@ static unsigned int get_tx_affinity_rr(struct mlx5_ib_dev *dev, tx_port_affinity = &dev->port[port_num].roce.tx_port_affinity; return (unsigned int)atomic_add_return(1, tx_port_affinity) % - MLX5_MAX_PORTS + 1; + (dev->lag_active ? dev->lag_ports : MLX5_CAP_GEN(dev->mdev, num_lag_ports)) + 1; } static bool qp_supports_affinity(struct mlx5_ib_qp *qp) @@ -4464,6 +4466,7 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, err = mlx5_core_create_dct(dev, &qp->dct.mdct, qp->dct.in, MLX5_ST_SZ_BYTES(create_dct_in), out, sizeof(out)); + err = mlx5_cmd_check(dev->mdev, err, qp->dct.in, out); if (err) return err; resp.dctn = qp->dct.mdct.mqp.qpn; diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c index 8844eacf2380..542e4c63a8de 100644 --- a/drivers/infiniband/hw/mlx5/qpc.c +++ b/drivers/infiniband/hw/mlx5/qpc.c @@ -220,7 +220,7 @@ int mlx5_core_create_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct, init_completion(&dct->drained); MLX5_SET(create_dct_in, in, opcode, MLX5_CMD_OP_CREATE_DCT); - err = mlx5_cmd_exec(dev->mdev, in, inlen, out, outlen); + err = mlx5_cmd_do(dev->mdev, in, inlen, out, outlen); if (err) return err; diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 191c4ee7db62..09b365a98bbf 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -3,7 +3,6 @@ * Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved. */ -#include <linux/module.h> #include <linux/mlx5/qp.h> #include <linux/slab.h> #include <rdma/ib_umem.h> diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c new file mode 100644 index 000000000000..d5105b5c9979 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/umr.c @@ -0,0 +1,761 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */ + +#include <rdma/ib_umem_odp.h> +#include "mlx5_ib.h" +#include "umr.h" +#include "wr.h" + +/* + * We can't use an array for xlt_emergency_page because dma_map_single doesn't + * work on kernel modules memory + */ +void *xlt_emergency_page; +static DEFINE_MUTEX(xlt_emergency_page_mutex); + +static __be64 get_umr_enable_mr_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + +static __be64 get_umr_disable_mr_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + +static __be64 get_umr_update_translation_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR; + + return cpu_to_be64(result); +} + +static __be64 get_umr_update_access_mask(struct mlx5_ib_dev *dev) +{ + u64 result; + + result = MLX5_MKEY_MASK_LR | + MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW; + + if (MLX5_CAP_GEN(dev->mdev, atomic)) + result |= MLX5_MKEY_MASK_A; + + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) + result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE; + + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) + result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ; + + return cpu_to_be64(result); +} + +static __be64 get_umr_update_pd_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_PD; + + return cpu_to_be64(result); +} + +static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) +{ + if (mask & MLX5_MKEY_MASK_PAGE_SIZE && + MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) + return -EPERM; + + if (mask & MLX5_MKEY_MASK_A && + MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) + return -EPERM; + + if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) + return -EPERM; + + if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) + return -EPERM; + + return 0; +} + +enum { + MAX_UMR_WR = 128, +}; + +static int mlx5r_umr_qp_rst2rts(struct mlx5_ib_dev *dev, struct ib_qp *qp) +{ + struct ib_qp_attr attr = {}; + int ret; + + attr.qp_state = IB_QPS_INIT; + attr.port_num = 1; + ret = ib_modify_qp(qp, &attr, + IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); + return ret; + } + + memset(&attr, 0, sizeof(attr)); + attr.qp_state = IB_QPS_RTR; + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); + return ret; + } + + memset(&attr, 0, sizeof(attr)); + attr.qp_state = IB_QPS_RTS; + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); + return ret; + } + + return 0; +} + +int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev) +{ + struct ib_qp_init_attr init_attr = {}; + struct ib_pd *pd; + struct ib_cq *cq; + struct ib_qp *qp; + int ret; + + pd = ib_alloc_pd(&dev->ib_dev, 0); + if (IS_ERR(pd)) { + mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); + return PTR_ERR(pd); + } + + cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); + if (IS_ERR(cq)) { + mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); + ret = PTR_ERR(cq); + goto destroy_pd; + } + + init_attr.send_cq = cq; + init_attr.recv_cq = cq; + init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + init_attr.cap.max_send_wr = MAX_UMR_WR; + init_attr.cap.max_send_sge = 1; + init_attr.qp_type = MLX5_IB_QPT_REG_UMR; + init_attr.port_num = 1; + qp = ib_create_qp(pd, &init_attr); + if (IS_ERR(qp)) { + mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); + ret = PTR_ERR(qp); + goto destroy_cq; + } + + ret = mlx5r_umr_qp_rst2rts(dev, qp); + if (ret) + goto destroy_qp; + + dev->umrc.qp = qp; + dev->umrc.cq = cq; + dev->umrc.pd = pd; + + sema_init(&dev->umrc.sem, MAX_UMR_WR); + mutex_init(&dev->umrc.lock); + dev->umrc.state = MLX5_UMR_STATE_ACTIVE; + + return 0; + +destroy_qp: + ib_destroy_qp(qp); +destroy_cq: + ib_free_cq(cq); +destroy_pd: + ib_dealloc_pd(pd); + return ret; +} + +void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev) +{ + if (dev->umrc.state == MLX5_UMR_STATE_UNINIT) + return; + ib_destroy_qp(dev->umrc.qp); + ib_free_cq(dev->umrc.cq); + ib_dealloc_pd(dev->umrc.pd); +} + +static int mlx5r_umr_recover(struct mlx5_ib_dev *dev) +{ + struct umr_common *umrc = &dev->umrc; + struct ib_qp_attr attr; + int err; + + attr.qp_state = IB_QPS_RESET; + err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE); + if (err) { + mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); + goto err; + } + + err = mlx5r_umr_qp_rst2rts(dev, umrc->qp); + if (err) + goto err; + + umrc->state = MLX5_UMR_STATE_ACTIVE; + return 0; + +err: + umrc->state = MLX5_UMR_STATE_ERR; + return err; +} + +static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe, + struct mlx5r_umr_wqe *wqe, bool with_data) +{ + unsigned int wqe_size = + with_data ? sizeof(struct mlx5r_umr_wqe) : + sizeof(struct mlx5r_umr_wqe) - + sizeof(struct mlx5_wqe_data_seg); + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_wqe_ctrl_seg *ctrl; + union { + struct ib_cqe *ib_cqe; + u64 wr_id; + } id; + void *cur_edge, *seg; + unsigned long flags; + unsigned int idx; + int size, err; + + if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)) + return -EIO; + + spin_lock_irqsave(&qp->sq.lock, flags); + + err = mlx5r_begin_wqe(qp, &seg, &ctrl, &idx, &size, &cur_edge, 0, + cpu_to_be32(mkey), false, false); + if (WARN_ON(err)) + goto out; + + qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; + + mlx5r_memcpy_send_wqe(&qp->sq, &cur_edge, &seg, &size, wqe, wqe_size); + + id.ib_cqe = cqe; + mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, id.wr_id, 0, + MLX5_FENCE_MODE_INITIATOR_SMALL, MLX5_OPCODE_UMR); + + mlx5r_ring_db(qp, 1, ctrl); + +out: + spin_unlock_irqrestore(&qp->sq.lock, flags); + + return err; +} + +static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct mlx5_ib_umr_context *context = + container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe); + + context->status = wc->status; + complete(&context->done); +} + +static inline void mlx5r_umr_init_context(struct mlx5r_umr_context *context) +{ + context->cqe.done = mlx5r_umr_done; + init_completion(&context->done); +} + +static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey, + struct mlx5r_umr_wqe *wqe, bool with_data) +{ + struct umr_common *umrc = &dev->umrc; + struct mlx5r_umr_context umr_context; + int err; + + err = umr_check_mkey_mask(dev, be64_to_cpu(wqe->ctrl_seg.mkey_mask)); + if (WARN_ON(err)) + return err; + + mlx5r_umr_init_context(&umr_context); + + down(&umrc->sem); + while (true) { + mutex_lock(&umrc->lock); + if (umrc->state == MLX5_UMR_STATE_ERR) { + mutex_unlock(&umrc->lock); + err = -EFAULT; + break; + } + + if (umrc->state == MLX5_UMR_STATE_RECOVER) { + mutex_unlock(&umrc->lock); + usleep_range(3000, 5000); + continue; + } + + err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe, + with_data); + mutex_unlock(&umrc->lock); + if (err) { + mlx5_ib_warn(dev, "UMR post send failed, err %d\n", + err); + break; + } + + wait_for_completion(&umr_context.done); + + if (umr_context.status == IB_WC_SUCCESS) + break; + + if (umr_context.status == IB_WC_WR_FLUSH_ERR) + continue; + + WARN_ON_ONCE(1); + mlx5_ib_warn(dev, + "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs\n", + umr_context.status); + mutex_lock(&umrc->lock); + err = mlx5r_umr_recover(dev); + mutex_unlock(&umrc->lock); + if (err) + mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n", + err); + err = -EFAULT; + break; + } + up(&umrc->sem); + return err; +} + +/** + * mlx5r_umr_revoke_mr - Fence all DMA on the MR + * @mr: The MR to fence + * + * Upon return the NIC will not be doing any DMA to the pages under the MR, + * and any DMA in progress will be completed. Failure of this function + * indicates the HW has failed catastrophically. + */ +int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr) +{ + struct mlx5_ib_dev *dev = mr_to_mdev(mr); + struct mlx5r_umr_wqe wqe = {}; + + if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + return 0; + + wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask(); + wqe.ctrl_seg.mkey_mask |= get_umr_disable_mr_mask(); + wqe.ctrl_seg.flags |= MLX5_UMR_INLINE; + + MLX5_SET(mkc, &wqe.mkey_seg, free, 1); + MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(dev->umrc.pd)->pdn); + MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff); + MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0, + mlx5_mkey_variant(mr->mmkey.key)); + + return mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false); +} + +static void mlx5r_umr_set_access_flags(struct mlx5_ib_dev *dev, + struct mlx5_mkey_seg *seg, + unsigned int access_flags) +{ + MLX5_SET(mkc, seg, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); + MLX5_SET(mkc, seg, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); + MLX5_SET(mkc, seg, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); + MLX5_SET(mkc, seg, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); + MLX5_SET(mkc, seg, lr, 1); + MLX5_SET(mkc, seg, relaxed_ordering_write, + !!(access_flags & IB_ACCESS_RELAXED_ORDERING)); + MLX5_SET(mkc, seg, relaxed_ordering_read, + !!(access_flags & IB_ACCESS_RELAXED_ORDERING)); +} + +int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, + int access_flags) +{ + struct mlx5_ib_dev *dev = mr_to_mdev(mr); + struct mlx5r_umr_wqe wqe = {}; + int err; + + wqe.ctrl_seg.mkey_mask = get_umr_update_access_mask(dev); + wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask(); + wqe.ctrl_seg.flags = MLX5_UMR_CHECK_FREE; + wqe.ctrl_seg.flags |= MLX5_UMR_INLINE; + + mlx5r_umr_set_access_flags(dev, &wqe.mkey_seg, access_flags); + MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff); + MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0, + mlx5_mkey_variant(mr->mmkey.key)); + + err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false); + if (err) + return err; + + mr->access_flags = access_flags; + return 0; +} + +#define MLX5_MAX_UMR_CHUNK \ + ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - MLX5_UMR_MTT_ALIGNMENT) +#define MLX5_SPARE_UMR_CHUNK 0x10000 + +/* + * Allocate a temporary buffer to hold the per-page information to transfer to + * HW. For efficiency this should be as large as it can be, but buffer + * allocation failure is not allowed, so try smaller sizes. + */ +static void *mlx5r_umr_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask) +{ + const size_t xlt_chunk_align = MLX5_UMR_MTT_ALIGNMENT / ent_size; + size_t size; + void *res = NULL; + + static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0); + + /* + * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the + * allocation can't trigger any kind of reclaim. + */ + might_sleep(); + + gfp_mask |= __GFP_ZERO | __GFP_NORETRY; + + /* + * If the system already has a suitable high order page then just use + * that, but don't try hard to create one. This max is about 1M, so a + * free x86 huge page will satisfy it. + */ + size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align), + MLX5_MAX_UMR_CHUNK); + *nents = size / ent_size; + res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, + get_order(size)); + if (res) + return res; + + if (size > MLX5_SPARE_UMR_CHUNK) { + size = MLX5_SPARE_UMR_CHUNK; + *nents = size / ent_size; + res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, + get_order(size)); + if (res) + return res; + } + + *nents = PAGE_SIZE / ent_size; + res = (void *)__get_free_page(gfp_mask); + if (res) + return res; + + mutex_lock(&xlt_emergency_page_mutex); + memset(xlt_emergency_page, 0, PAGE_SIZE); + return xlt_emergency_page; +} + +static void mlx5r_umr_free_xlt(void *xlt, size_t length) +{ + if (xlt == xlt_emergency_page) { + mutex_unlock(&xlt_emergency_page_mutex); + return; + } + + free_pages((unsigned long)xlt, get_order(length)); +} + +static void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt, + struct ib_sge *sg) +{ + struct device *ddev = &dev->mdev->pdev->dev; + + dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE); + mlx5r_umr_free_xlt(xlt, sg->length); +} + +/* + * Create an XLT buffer ready for submission. + */ +static void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg, + size_t nents, size_t ent_size, + unsigned int flags) +{ + struct device *ddev = &dev->mdev->pdev->dev; + dma_addr_t dma; + void *xlt; + + xlt = mlx5r_umr_alloc_xlt(&nents, ent_size, + flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : + GFP_KERNEL); + sg->length = nents * ent_size; + dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE); + if (dma_mapping_error(ddev, dma)) { + mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); + mlx5r_umr_free_xlt(xlt, sg->length); + return NULL; + } + sg->addr = dma; + sg->lkey = dev->umrc.pd->local_dma_lkey; + + return xlt; +} + +static void +mlx5r_umr_set_update_xlt_ctrl_seg(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg, + unsigned int flags, struct ib_sge *sg) +{ + if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) + /* fail if free */ + ctrl_seg->flags = MLX5_UMR_CHECK_FREE; + else + /* fail if not free */ + ctrl_seg->flags = MLX5_UMR_CHECK_NOT_FREE; + ctrl_seg->xlt_octowords = + cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length)); +} + +static void mlx5r_umr_set_update_xlt_mkey_seg(struct mlx5_ib_dev *dev, + struct mlx5_mkey_seg *mkey_seg, + struct mlx5_ib_mr *mr, + unsigned int page_shift) +{ + mlx5r_umr_set_access_flags(dev, mkey_seg, mr->access_flags); + MLX5_SET(mkc, mkey_seg, pd, to_mpd(mr->ibmr.pd)->pdn); + MLX5_SET64(mkc, mkey_seg, start_addr, mr->ibmr.iova); + MLX5_SET64(mkc, mkey_seg, len, mr->ibmr.length); + MLX5_SET(mkc, mkey_seg, log_page_size, page_shift); + MLX5_SET(mkc, mkey_seg, qpn, 0xffffff); + MLX5_SET(mkc, mkey_seg, mkey_7_0, mlx5_mkey_variant(mr->mmkey.key)); +} + +static void +mlx5r_umr_set_update_xlt_data_seg(struct mlx5_wqe_data_seg *data_seg, + struct ib_sge *sg) +{ + data_seg->byte_count = cpu_to_be32(sg->length); + data_seg->lkey = cpu_to_be32(sg->lkey); + data_seg->addr = cpu_to_be64(sg->addr); +} + +static void mlx5r_umr_update_offset(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg, + u64 offset) +{ + u64 octo_offset = mlx5r_umr_get_xlt_octo(offset); + + ctrl_seg->xlt_offset = cpu_to_be16(octo_offset & 0xffff); + ctrl_seg->xlt_offset_47_16 = cpu_to_be32(octo_offset >> 16); + ctrl_seg->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN; +} + +static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev, + struct mlx5r_umr_wqe *wqe, + struct mlx5_ib_mr *mr, struct ib_sge *sg, + unsigned int flags) +{ + bool update_pd_access, update_translation; + + if (flags & MLX5_IB_UPD_XLT_ENABLE) + wqe->ctrl_seg.mkey_mask |= get_umr_enable_mr_mask(); + + update_pd_access = flags & MLX5_IB_UPD_XLT_ENABLE || + flags & MLX5_IB_UPD_XLT_PD || + flags & MLX5_IB_UPD_XLT_ACCESS; + + if (update_pd_access) { + wqe->ctrl_seg.mkey_mask |= get_umr_update_access_mask(dev); + wqe->ctrl_seg.mkey_mask |= get_umr_update_pd_mask(); + } + + update_translation = + flags & MLX5_IB_UPD_XLT_ENABLE || flags & MLX5_IB_UPD_XLT_ADDR; + + if (update_translation) { + wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask(); + if (!mr->ibmr.length) + MLX5_SET(mkc, &wqe->mkey_seg, length64, 1); + } + + wqe->ctrl_seg.xlt_octowords = + cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length)); + wqe->data_seg.byte_count = cpu_to_be32(sg->length); +} + +/* + * Send the DMA list to the HW for a normal MR using UMR. + * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP + * flag may be used. + */ +int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) +{ + struct mlx5_ib_dev *dev = mr_to_mdev(mr); + struct device *ddev = &dev->mdev->pdev->dev; + struct mlx5r_umr_wqe wqe = {}; + struct ib_block_iter biter; + struct mlx5_mtt *cur_mtt; + size_t orig_sg_length; + struct mlx5_mtt *mtt; + size_t final_size; + struct ib_sge sg; + u64 offset = 0; + int err = 0; + + if (WARN_ON(mr->umem->is_odp)) + return -EINVAL; + + mtt = mlx5r_umr_create_xlt( + dev, &sg, ib_umem_num_dma_blocks(mr->umem, 1 << mr->page_shift), + sizeof(*mtt), flags); + if (!mtt) + return -ENOMEM; + + orig_sg_length = sg.length; + + mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg); + mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr, + mr->page_shift); + mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg); + + cur_mtt = mtt; + rdma_for_each_block(mr->umem->sgt_append.sgt.sgl, &biter, + mr->umem->sgt_append.sgt.nents, + BIT(mr->page_shift)) { + if (cur_mtt == (void *)mtt + sg.length) { + dma_sync_single_for_device(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + + err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, + true); + if (err) + goto err; + dma_sync_single_for_cpu(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + offset += sg.length; + mlx5r_umr_update_offset(&wqe.ctrl_seg, offset); + + cur_mtt = mtt; + } + + cur_mtt->ptag = + cpu_to_be64(rdma_block_iter_dma_address(&biter) | + MLX5_IB_MTT_PRESENT); + + if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP)) + cur_mtt->ptag = 0; + + cur_mtt++; + } + + final_size = (void *)cur_mtt - (void *)mtt; + sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT); + memset(cur_mtt, 0, sg.length - final_size); + mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags); + + dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); + err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true); + +err: + sg.length = orig_sg_length; + mlx5r_umr_unmap_free_xlt(dev, mtt, &sg); + return err; +} + +static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) +{ + return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled); +} + +int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, + int page_shift, int flags) +{ + int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT) + ? sizeof(struct mlx5_klm) + : sizeof(struct mlx5_mtt); + const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size; + struct mlx5_ib_dev *dev = mr_to_mdev(mr); + struct device *ddev = &dev->mdev->pdev->dev; + const int page_mask = page_align - 1; + struct mlx5r_umr_wqe wqe = {}; + size_t pages_mapped = 0; + size_t pages_to_map = 0; + size_t size_to_map = 0; + size_t orig_sg_length; + size_t pages_iter; + struct ib_sge sg; + int err = 0; + void *xlt; + + if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && + !umr_can_use_indirect_mkey(dev)) + return -EPERM; + + if (WARN_ON(!mr->umem->is_odp)) + return -EINVAL; + + /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, + * so we need to align the offset and length accordingly + */ + if (idx & page_mask) { + npages += idx & page_mask; + idx &= ~page_mask; + } + pages_to_map = ALIGN(npages, page_align); + + xlt = mlx5r_umr_create_xlt(dev, &sg, npages, desc_size, flags); + if (!xlt) + return -ENOMEM; + + pages_iter = sg.length / desc_size; + orig_sg_length = sg.length; + + if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + size_t max_pages = ib_umem_odp_num_pages(odp) - idx; + + pages_to_map = min_t(size_t, pages_to_map, max_pages); + } + + mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg); + mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr, page_shift); + mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg); + + for (pages_mapped = 0; + pages_mapped < pages_to_map && !err; + pages_mapped += pages_iter, idx += pages_iter) { + npages = min_t(int, pages_iter, pages_to_map - pages_mapped); + size_to_map = npages * desc_size; + dma_sync_single_for_cpu(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); + dma_sync_single_for_device(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT); + + if (pages_mapped + pages_iter >= pages_to_map) + mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags); + mlx5r_umr_update_offset(&wqe.ctrl_seg, idx * desc_size); + err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true); + } + sg.length = orig_sg_length; + mlx5r_umr_unmap_free_xlt(dev, xlt, &sg); + return err; +} diff --git a/drivers/infiniband/hw/mlx5/umr.h b/drivers/infiniband/hw/mlx5/umr.h new file mode 100644 index 000000000000..c9d0021381a2 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/umr.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */ + +#ifndef _MLX5_IB_UMR_H +#define _MLX5_IB_UMR_H + +#include "mlx5_ib.h" + + +#define MLX5_MAX_UMR_SHIFT 16 +#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT) + +#define MLX5_IB_UMR_OCTOWORD 16 +#define MLX5_IB_UMR_XLT_ALIGNMENT 64 + +int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev); +void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev); + +static inline bool mlx5r_umr_can_load_pas(struct mlx5_ib_dev *dev, + size_t length) +{ + /* + * umr_check_mkey_mask() rejects MLX5_MKEY_MASK_PAGE_SIZE which is + * always set if MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (aka + * MLX5_IB_UPD_XLT_ADDR and MLX5_IB_UPD_XLT_ENABLE) is set. Thus, a mkey + * can never be enabled without this capability. Simplify this weird + * quirky hardware by just saying it can't use PAS lists with UMR at + * all. + */ + if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) + return false; + + /* + * length is the size of the MR in bytes when mlx5_ib_update_xlt() is + * used. + */ + if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) && + length >= MLX5_MAX_UMR_PAGES * PAGE_SIZE) + return false; + return true; +} + +/* + * true if an existing MR can be reconfigured to new access_flags using UMR. + * Older HW cannot use UMR to update certain elements of the MKC. See + * get_umr_update_access_mask() and umr_check_mkey_mask() + */ +static inline bool mlx5r_umr_can_reconfig(struct mlx5_ib_dev *dev, + unsigned int current_access_flags, + unsigned int target_access_flags) +{ + unsigned int diffs = current_access_flags ^ target_access_flags; + + if ((diffs & IB_ACCESS_REMOTE_ATOMIC) && + MLX5_CAP_GEN(dev->mdev, atomic) && + MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) + return false; + + if ((diffs & IB_ACCESS_RELAXED_ORDERING) && + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) + return false; + + if ((diffs & IB_ACCESS_RELAXED_ORDERING) && + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) + return false; + + return true; +} + +static inline u64 mlx5r_umr_get_xlt_octo(u64 bytes) +{ + return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) / + MLX5_IB_UMR_OCTOWORD; +} + +struct mlx5r_umr_context { + struct ib_cqe cqe; + enum ib_wc_status status; + struct completion done; +}; + +struct mlx5r_umr_wqe { + struct mlx5_wqe_umr_ctrl_seg ctrl_seg; + struct mlx5_mkey_seg mkey_seg; + struct mlx5_wqe_data_seg data_seg; +}; + +int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr); +int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, + int access_flags); +int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags); +int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, + int page_shift, int flags); + +#endif /* _MLX5_IB_UMR_H */ diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c index 51e48ca9016e..855f3f4fefad 100644 --- a/drivers/infiniband/hw/mlx5/wr.c +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -7,6 +7,7 @@ #include <linux/mlx5/qp.h> #include <linux/mlx5/driver.h> #include "wr.h" +#include "umr.h" static const u32 mlx5_ib_opcode[] = { [IB_WR_SEND] = MLX5_OPCODE_SEND, @@ -25,58 +26,7 @@ static const u32 mlx5_ib_opcode[] = { [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, }; -/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the - * next nearby edge and get new address translation for current WQE position. - * @sq - SQ buffer. - * @seg: Current WQE position (16B aligned). - * @wqe_sz: Total current WQE size [16B]. - * @cur_edge: Updated current edge. - */ -static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg, - u32 wqe_sz, void **cur_edge) -{ - u32 idx; - - if (likely(*seg != *cur_edge)) - return; - - idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1); - *cur_edge = get_sq_edge(sq, idx); - - *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx); -} - -/* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's - * pointers. At the end @seg is aligned to 16B regardless the copied size. - * @sq - SQ buffer. - * @cur_edge: Updated current edge. - * @seg: Current WQE position (16B aligned). - * @wqe_sz: Total current WQE size [16B]. - * @src: Pointer to copy from. - * @n: Number of bytes to copy. - */ -static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge, - void **seg, u32 *wqe_sz, const void *src, - size_t n) -{ - while (likely(n)) { - size_t leftlen = *cur_edge - *seg; - size_t copysz = min_t(size_t, leftlen, n); - size_t stride; - - memcpy(*seg, src, copysz); - - n -= copysz; - src += copysz; - stride = !n ? ALIGN(copysz, 16) : copysz; - *seg += stride; - *wqe_sz += stride >> 4; - handle_post_send_edge(sq, seg, *wqe_sz, cur_edge); - } -} - -static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, - struct ib_cq *ib_cq) +int mlx5r_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq) { struct mlx5_ib_cq *cq; unsigned int cur; @@ -122,9 +72,9 @@ static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp, eseg->mss = cpu_to_be16(ud_wr->mss); eseg->inline_hdr.sz = cpu_to_be16(left); - /* memcpy_send_wqe should get a 16B align address. Hence, we - * first copy up to the current edge and then, if needed, - * continue to memcpy_send_wqe. + /* mlx5r_memcpy_send_wqe should get a 16B align address. Hence, + * we first copy up to the current edge and then, if needed, + * continue to mlx5r_memcpy_send_wqe. */ copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start, left); @@ -138,8 +88,8 @@ static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp, handle_post_send_edge(&qp->sq, seg, *size, cur_edge); left -= copysz; pdata += copysz; - memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata, - left); + mlx5r_memcpy_send_wqe(&qp->sq, cur_edge, seg, size, + pdata, left); } return; @@ -165,12 +115,6 @@ static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg) dseg->addr = cpu_to_be64(sg->addr); } -static u64 get_xlt_octo(u64 bytes) -{ - return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) / - MLX5_IB_UMR_OCTOWORD; -} - static __be64 frwr_mkey_mask(bool atomic) { u64 result; @@ -222,7 +166,7 @@ static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, memset(umr, 0, sizeof(*umr)); umr->flags = flags; - umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); + umr->xlt_octowords = cpu_to_be16(mlx5r_umr_get_xlt_octo(size)); umr->mkey_mask = frwr_mkey_mask(atomic); } @@ -233,134 +177,6 @@ static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr) umr->flags = MLX5_UMR_INLINE; } -static __be64 get_umr_enable_mr_mask(void) -{ - u64 result; - - result = MLX5_MKEY_MASK_KEY | - MLX5_MKEY_MASK_FREE; - - return cpu_to_be64(result); -} - -static __be64 get_umr_disable_mr_mask(void) -{ - u64 result; - - result = MLX5_MKEY_MASK_FREE; - - return cpu_to_be64(result); -} - -static __be64 get_umr_update_translation_mask(void) -{ - u64 result; - - result = MLX5_MKEY_MASK_LEN | - MLX5_MKEY_MASK_PAGE_SIZE | - MLX5_MKEY_MASK_START_ADDR; - - return cpu_to_be64(result); -} - -static __be64 get_umr_update_access_mask(int atomic, - int relaxed_ordering_write, - int relaxed_ordering_read) -{ - u64 result; - - result = MLX5_MKEY_MASK_LR | - MLX5_MKEY_MASK_LW | - MLX5_MKEY_MASK_RR | - MLX5_MKEY_MASK_RW; - - if (atomic) - result |= MLX5_MKEY_MASK_A; - - if (relaxed_ordering_write) - result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE; - - if (relaxed_ordering_read) - result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ; - - return cpu_to_be64(result); -} - -static __be64 get_umr_update_pd_mask(void) -{ - u64 result; - - result = MLX5_MKEY_MASK_PD; - - return cpu_to_be64(result); -} - -static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) -{ - if (mask & MLX5_MKEY_MASK_PAGE_SIZE && - MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) - return -EPERM; - - if (mask & MLX5_MKEY_MASK_A && - MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) - return -EPERM; - - if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE && - !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) - return -EPERM; - - if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ && - !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) - return -EPERM; - - return 0; -} - -static int set_reg_umr_segment(struct mlx5_ib_dev *dev, - struct mlx5_wqe_umr_ctrl_seg *umr, - const struct ib_send_wr *wr) -{ - const struct mlx5_umr_wr *umrwr = umr_wr(wr); - - memset(umr, 0, sizeof(*umr)); - - if (!umrwr->ignore_free_state) { - if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE) - /* fail if free */ - umr->flags = MLX5_UMR_CHECK_FREE; - else - /* fail if not free */ - umr->flags = MLX5_UMR_CHECK_NOT_FREE; - } - - umr->xlt_octowords = cpu_to_be16(get_xlt_octo(umrwr->xlt_size)); - if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) { - u64 offset = get_xlt_octo(umrwr->offset); - - umr->xlt_offset = cpu_to_be16(offset & 0xffff); - umr->xlt_offset_47_16 = cpu_to_be32(offset >> 16); - umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN; - } - if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION) - umr->mkey_mask |= get_umr_update_translation_mask(); - if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) { - umr->mkey_mask |= get_umr_update_access_mask( - !!(MLX5_CAP_GEN(dev->mdev, atomic)), - !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)), - !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))); - umr->mkey_mask |= get_umr_update_pd_mask(); - } - if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR) - umr->mkey_mask |= get_umr_enable_mr_mask(); - if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) - umr->mkey_mask |= get_umr_disable_mr_mask(); - - if (!wr->num_sge) - umr->flags |= MLX5_UMR_INLINE; - - return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask)); -} - static u8 get_umr_flags(int acc) { return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) | @@ -398,43 +214,6 @@ static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg) seg->status = MLX5_MKEY_STATUS_FREE; } -static void set_reg_mkey_segment(struct mlx5_ib_dev *dev, - struct mlx5_mkey_seg *seg, - const struct ib_send_wr *wr) -{ - const struct mlx5_umr_wr *umrwr = umr_wr(wr); - - memset(seg, 0, sizeof(*seg)); - if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) - MLX5_SET(mkc, seg, free, 1); - - MLX5_SET(mkc, seg, a, - !!(umrwr->access_flags & IB_ACCESS_REMOTE_ATOMIC)); - MLX5_SET(mkc, seg, rw, - !!(umrwr->access_flags & IB_ACCESS_REMOTE_WRITE)); - MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ)); - MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE)); - MLX5_SET(mkc, seg, lr, 1); - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) - MLX5_SET(mkc, seg, relaxed_ordering_write, - !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) - MLX5_SET(mkc, seg, relaxed_ordering_read, - !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); - - if (umrwr->pd) - MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn); - if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION && - !umrwr->length) - MLX5_SET(mkc, seg, length64, 1); - - MLX5_SET64(mkc, seg, start_addr, umrwr->virt_addr); - MLX5_SET64(mkc, seg, len, umrwr->length); - MLX5_SET(mkc, seg, log_page_size, umrwr->page_shift); - MLX5_SET(mkc, seg, qpn, 0xffffff); - MLX5_SET(mkc, seg, mkey_7_0, mlx5_mkey_variant(umrwr->mkey)); -} - static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, struct mlx5_ib_mr *mr, struct mlx5_ib_pd *pd) @@ -760,7 +539,7 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | MLX5_MKEY_BSF_EN | pdn); seg->len = cpu_to_be64(length); - seg->xlt_oct_size = cpu_to_be32(get_xlt_octo(size)); + seg->xlt_oct_size = cpu_to_be32(mlx5r_umr_get_xlt_octo(size)); seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); } @@ -770,7 +549,7 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, memset(umr, 0, sizeof(*umr)); umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE; - umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); + umr->xlt_octowords = cpu_to_be16(mlx5r_umr_get_xlt_octo(size)); umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE); umr->mkey_mask = sig_mkey_mask(); } @@ -870,7 +649,7 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, * Relaxed Ordering is set implicitly in mlx5_set_umr_free_mkey() and * kernel ULPs are not aware of it, so we don't set it here. */ - if (!mlx5_ib_can_reconfig_with_umr(dev, 0, wr->access)) { + if (!mlx5r_umr_can_reconfig(dev, 0, wr->access)) { mlx5_ib_warn( to_mdev(qp->ibqp.device), "Fast update for MR access flags is not possible\n"); @@ -899,8 +678,8 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, handle_post_send_edge(&qp->sq, seg, *size, cur_edge); if (umr_inline) { - memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs, - mr_list_size); + mlx5r_memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs, + mr_list_size); *size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4); } else { set_reg_data_seg(*seg, mr, pd); @@ -942,23 +721,22 @@ static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16) } } -static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, - struct mlx5_wqe_ctrl_seg **ctrl, - const struct ib_send_wr *wr, unsigned int *idx, - int *size, void **cur_edge, int nreq, - bool send_signaled, bool solicited) +int mlx5r_begin_wqe(struct mlx5_ib_qp *qp, void **seg, + struct mlx5_wqe_ctrl_seg **ctrl, unsigned int *idx, + int *size, void **cur_edge, int nreq, __be32 general_id, + bool send_signaled, bool solicited) { - if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) + if (unlikely(mlx5r_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) return -ENOMEM; *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); *seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx); *ctrl = *seg; *(uint32_t *)(*seg + 8) = 0; - (*ctrl)->imm = send_ieth(wr); + (*ctrl)->general_id = general_id; (*ctrl)->fm_ce_se = qp->sq_signal_bits | - (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) | - (solicited ? MLX5_WQE_CTRL_SOLICITED : 0); + (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) | + (solicited ? MLX5_WQE_CTRL_SOLICITED : 0); *seg += sizeof(**ctrl); *size = sizeof(**ctrl) / 16; @@ -972,16 +750,14 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, const struct ib_send_wr *wr, unsigned int *idx, int *size, void **cur_edge, int nreq) { - return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq, - wr->send_flags & IB_SEND_SIGNALED, - wr->send_flags & IB_SEND_SOLICITED); + return mlx5r_begin_wqe(qp, seg, ctrl, idx, size, cur_edge, nreq, + send_ieth(wr), wr->send_flags & IB_SEND_SIGNALED, + wr->send_flags & IB_SEND_SOLICITED); } -static void finish_wqe(struct mlx5_ib_qp *qp, - struct mlx5_wqe_ctrl_seg *ctrl, - void *seg, u8 size, void *cur_edge, - unsigned int idx, u64 wr_id, int nreq, u8 fence, - u32 mlx5_opcode) +void mlx5r_finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, + void *seg, u8 size, void *cur_edge, unsigned int idx, + u64 wr_id, int nreq, u8 fence, u32 mlx5_opcode) { u8 opmod = 0; @@ -1045,8 +821,8 @@ static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, /* * SET_PSV WQEs are not signaled and solicited on error. */ - err = __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq, - false, true); + err = mlx5r_begin_wqe(qp, seg, ctrl, idx, size, cur_edge, nreq, + send_ieth(wr), false, true); if (unlikely(err)) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; @@ -1057,8 +833,8 @@ static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, mlx5_ib_warn(dev, "\n"); goto out; } - finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq, - next_fence, MLX5_OPCODE_SET_PSV); + mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, + nreq, next_fence, MLX5_OPCODE_SET_PSV); out: return err; @@ -1098,8 +874,8 @@ static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev, if (unlikely(err)) goto out; - finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, - nreq, fence, MLX5_OPCODE_UMR); + mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, + wr->wr_id, nreq, fence, MLX5_OPCODE_UMR); err = begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq); if (unlikely(err)) { @@ -1130,8 +906,8 @@ static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev, mlx5_ib_warn(dev, "\n"); goto out; } - finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq, - fence, MLX5_OPCODE_UMR); + mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, + nreq, fence, MLX5_OPCODE_UMR); sig_attrs = mr->ibmr.sig_attrs; err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq, @@ -1246,33 +1022,30 @@ static void handle_qpt_ud(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, } } -static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - const struct ib_send_wr *wr, - struct mlx5_wqe_ctrl_seg **ctrl, void **seg, - int *size, void **cur_edge, unsigned int idx) +void mlx5r_ring_db(struct mlx5_ib_qp *qp, unsigned int nreq, + struct mlx5_wqe_ctrl_seg *ctrl) { - int err = 0; + struct mlx5_bf *bf = &qp->bf; - if (unlikely(wr->opcode != MLX5_IB_WR_UMR)) { - err = -EINVAL; - mlx5_ib_warn(dev, "bad opcode %d\n", wr->opcode); - goto out; - } + qp->sq.head += nreq; - qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; - (*ctrl)->imm = cpu_to_be32(umr_wr(wr)->mkey); - err = set_reg_umr_segment(dev, *seg, wr); - if (unlikely(err)) - goto out; - *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); - *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - set_reg_mkey_segment(dev, *seg, wr); - *seg += sizeof(struct mlx5_mkey_seg); - *size += sizeof(struct mlx5_mkey_seg) / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); -out: - return err; + /* Make sure that descriptors are written before + * updating doorbell record and ringing the doorbell + */ + wmb(); + + qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); + + /* Make sure doorbell record is visible to the HCA before + * we hit doorbell. + */ + wmb(); + + mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset); + /* Make sure doorbells don't leak out of SQ spinlock + * and reach the HCA out of order. + */ + bf->offset ^= bf->buf_size; } int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, @@ -1283,7 +1056,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_wqe_xrc_seg *xrc; - struct mlx5_bf *bf; void *cur_edge; int size; unsigned long flags; @@ -1305,8 +1077,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, if (qp->type == IB_QPT_GSI) return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr); - bf = &qp->bf; - spin_lock_irqsave(&qp->sq.lock, flags); for (nreq = 0; wr; nreq++, wr = wr->next) { @@ -1384,12 +1154,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, case IB_QPT_UD: handle_qpt_ud(qp, wr, &seg, &size, &cur_edge); break; - case MLX5_IB_QPT_REG_UMR: - err = handle_qpt_reg_umr(dev, qp, wr, &ctrl, &seg, - &size, &cur_edge, idx); - if (unlikely(err)) - goto out; - break; default: break; @@ -1418,35 +1182,16 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, } qp->next_fence = next_fence; - finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq, - fence, mlx5_ib_opcode[wr->opcode]); + mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, + nreq, fence, mlx5_ib_opcode[wr->opcode]); skip_psv: if (0) dump_wqe(qp, idx, size); } out: - if (likely(nreq)) { - qp->sq.head += nreq; - - /* Make sure that descriptors are written before - * updating doorbell record and ringing the doorbell - */ - wmb(); - - qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); - - /* Make sure doorbell record is visible to the HCA before - * we hit doorbell. - */ - wmb(); - - mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset); - /* Make sure doorbells don't leak out of SQ spinlock - * and reach the HCA out of order. - */ - bf->offset ^= bf->buf_size; - } + if (likely(nreq)) + mlx5r_ring_db(qp, nreq, ctrl); spin_unlock_irqrestore(&qp->sq.lock, flags); @@ -1486,7 +1231,7 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, ind = qp->rq.head & (qp->rq.wqe_cnt - 1); for (nreq = 0; wr; nreq++, wr = wr->next) { - if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { + if (mlx5r_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { err = -ENOMEM; *bad_wr = wr; goto out; diff --git a/drivers/infiniband/hw/mlx5/wr.h b/drivers/infiniband/hw/mlx5/wr.h index 4f0057516402..2dc89438000d 100644 --- a/drivers/infiniband/hw/mlx5/wr.h +++ b/drivers/infiniband/hw/mlx5/wr.h @@ -41,6 +41,66 @@ static inline void *get_sq_edge(struct mlx5_ib_wq *sq, u32 idx) return fragment_end + MLX5_SEND_WQE_BB; } +/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the + * next nearby edge and get new address translation for current WQE position. + * @sq: SQ buffer. + * @seg: Current WQE position (16B aligned). + * @wqe_sz: Total current WQE size [16B]. + * @cur_edge: Updated current edge. + */ +static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg, + u32 wqe_sz, void **cur_edge) +{ + u32 idx; + + if (likely(*seg != *cur_edge)) + return; + + idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1); + *cur_edge = get_sq_edge(sq, idx); + + *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx); +} + +/* mlx5r_memcpy_send_wqe - copy data from src to WQE and update the relevant + * WQ's pointers. At the end @seg is aligned to 16B regardless the copied size. + * @sq: SQ buffer. + * @cur_edge: Updated current edge. + * @seg: Current WQE position (16B aligned). + * @wqe_sz: Total current WQE size [16B]. + * @src: Pointer to copy from. + * @n: Number of bytes to copy. + */ +static inline void mlx5r_memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge, + void **seg, u32 *wqe_sz, + const void *src, size_t n) +{ + while (likely(n)) { + size_t leftlen = *cur_edge - *seg; + size_t copysz = min_t(size_t, leftlen, n); + size_t stride; + + memcpy(*seg, src, copysz); + + n -= copysz; + src += copysz; + stride = !n ? ALIGN(copysz, 16) : copysz; + *seg += stride; + *wqe_sz += stride >> 4; + handle_post_send_edge(sq, seg, *wqe_sz, cur_edge); + } +} + +int mlx5r_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq); +int mlx5r_begin_wqe(struct mlx5_ib_qp *qp, void **seg, + struct mlx5_wqe_ctrl_seg **ctrl, unsigned int *idx, + int *size, void **cur_edge, int nreq, __be32 general_id, + bool send_signaled, bool solicited); +void mlx5r_finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, + void *seg, u8 size, void *cur_edge, unsigned int idx, + u64 wr_id, int nreq, u8 fence, u32 mlx5_opcode); +void mlx5r_ring_db(struct mlx5_ib_qp *qp, unsigned int nreq, + struct mlx5_wqe_ctrl_seg *ctrl); int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr, bool drain); int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c index aef1d274a14e..9f0f79d02d3c 100644 --- a/drivers/infiniband/hw/mthca/mthca_allocator.c +++ b/drivers/infiniband/hw/mthca/mthca_allocator.c @@ -51,7 +51,7 @@ u32 mthca_alloc(struct mthca_alloc *alloc) } if (obj < alloc->max) { - set_bit(obj, alloc->table); + __set_bit(obj, alloc->table); obj |= alloc->top; } else obj = -1; @@ -69,7 +69,7 @@ void mthca_free(struct mthca_alloc *alloc, u32 obj) spin_lock_irqsave(&alloc->lock, flags); - clear_bit(obj, alloc->table); + __clear_bit(obj, alloc->table); alloc->last = min(alloc->last, obj); alloc->top = (alloc->top + alloc->max) & alloc->mask; @@ -79,8 +79,6 @@ void mthca_free(struct mthca_alloc *alloc, u32 obj) int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask, u32 reserved) { - int i; - /* num must be a power of 2 */ if (num != 1 << (ffs(num) - 1)) return -EINVAL; @@ -90,21 +88,18 @@ int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask, alloc->max = num; alloc->mask = mask; spin_lock_init(&alloc->lock); - alloc->table = kmalloc_array(BITS_TO_LONGS(num), sizeof(long), - GFP_KERNEL); + alloc->table = bitmap_zalloc(num, GFP_KERNEL); if (!alloc->table) return -ENOMEM; - bitmap_zero(alloc->table, num); - for (i = 0; i < reserved; ++i) - set_bit(i, alloc->table); + bitmap_set(alloc->table, 0, reserved); return 0; } void mthca_alloc_cleanup(struct mthca_alloc *alloc) { - kfree(alloc->table); + bitmap_free(alloc->table); } /* diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index bdf5ed38de22..f330ce895d88 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1252,7 +1252,7 @@ static void get_board_id(void *vsd, char *board_id) if (be16_to_cpup(vsd + VSD_OFFSET_SIG1) == VSD_SIGNATURE_TOPSPIN && be16_to_cpup(vsd + VSD_OFFSET_SIG2) == VSD_SIGNATURE_TOPSPIN) { - strlcpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MTHCA_BOARD_ID_LEN); + strscpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MTHCA_BOARD_ID_LEN); } else { /* * The board ID is a string but the firmware byte diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index f507c4cd46d3..b54bc8865dae 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -939,12 +939,8 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (err) { - dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n"); - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n"); - goto err_free_res; - } + dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n"); + goto err_free_res; } /* We can handle large RDMA requests, so allow larger segments. */ diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index ce0e0867e488..192f83fd7c8a 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -101,13 +101,13 @@ static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order) return -1; found: - clear_bit(seg, buddy->bits[o]); + __clear_bit(seg, buddy->bits[o]); --buddy->num_free[o]; while (o > order) { --o; seg <<= 1; - set_bit(seg ^ 1, buddy->bits[o]); + __set_bit(seg ^ 1, buddy->bits[o]); ++buddy->num_free[o]; } @@ -125,13 +125,13 @@ static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order) spin_lock(&buddy->lock); while (test_bit(seg ^ 1, buddy->bits[order])) { - clear_bit(seg ^ 1, buddy->bits[order]); + __clear_bit(seg ^ 1, buddy->bits[order]); --buddy->num_free[order]; seg >>= 1; ++order; } - set_bit(seg, buddy->bits[order]); + __set_bit(seg, buddy->bits[order]); ++buddy->num_free[order]; spin_unlock(&buddy->lock); @@ -139,7 +139,7 @@ static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order) static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order) { - int i, s; + int i; buddy->max_order = max_order; spin_lock_init(&buddy->lock); @@ -152,22 +152,20 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order) goto err_out; for (i = 0; i <= buddy->max_order; ++i) { - s = BITS_TO_LONGS(1 << (buddy->max_order - i)); - buddy->bits[i] = kmalloc_array(s, sizeof(long), GFP_KERNEL); + buddy->bits[i] = bitmap_zalloc(1 << (buddy->max_order - i), + GFP_KERNEL); if (!buddy->bits[i]) goto err_out_free; - bitmap_zero(buddy->bits[i], - 1 << (buddy->max_order - i)); } - set_bit(0, buddy->bits[buddy->max_order]); + __set_bit(0, buddy->bits[buddy->max_order]); buddy->num_free[buddy->max_order] = 1; return 0; err_out_free: for (i = 0; i <= buddy->max_order; ++i) - kfree(buddy->bits[i]); + bitmap_free(buddy->bits[i]); err_out: kfree(buddy->bits); @@ -181,7 +179,7 @@ static void mthca_buddy_cleanup(struct mthca_buddy *buddy) int i; for (i = 0; i <= buddy->max_order; ++i) - kfree(buddy->bits[i]); + bitmap_free(buddy->bits[i]); kfree(buddy->bits); kfree(buddy->num_free); @@ -469,8 +467,7 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, mpt_entry->start = cpu_to_be64(iova); mpt_entry->length = cpu_to_be64(total_size); - memset(&mpt_entry->lkey, 0, - sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey)); + memset_startat(mpt_entry, 0, lkey); if (mr->mtt) mpt_entry->mtt_seg = diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c index 7ea970774839..69af65f1b332 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.c +++ b/drivers/infiniband/hw/mthca/mthca_profile.c @@ -31,8 +31,6 @@ * SOFTWARE. */ -#include <linux/module.h> -#include <linux/moduleparam.h> #include <linux/string.h> #include <linux/slab.h> diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index ceee23ebc0f2..c46df53f26cf 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -50,14 +50,6 @@ #include <rdma/mthca-abi.h> #include "mthca_memfree.h" -static void init_query_mad(struct ib_smp *mad) -{ - mad->base_version = 1; - mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; - mad->class_version = 1; - mad->method = IB_MGMT_METHOD_GET; -} - static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *uhw) { @@ -78,7 +70,7 @@ static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *pr props->fw_ver = mdev->fw_ver; - init_query_mad(in_mad); + ib_init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; err = mthca_MAD_IFC(mdev, 1, 1, @@ -140,7 +132,7 @@ static int mthca_query_port(struct ib_device *ibdev, /* props being zeroed by the caller, avoid zeroing it here */ - init_query_mad(in_mad); + ib_init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); @@ -234,7 +226,7 @@ static int mthca_query_pkey(struct ib_device *ibdev, if (!in_mad || !out_mad) goto out; - init_query_mad(in_mad); + ib_init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; in_mad->attr_mod = cpu_to_be32(index / 32); @@ -263,7 +255,7 @@ static int mthca_query_gid(struct ib_device *ibdev, u32 port, if (!in_mad || !out_mad) goto out; - init_query_mad(in_mad); + ib_init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); @@ -274,7 +266,7 @@ static int mthca_query_gid(struct ib_device *ibdev, u32 port, memcpy(gid->raw, out_mad->data + 8, 8); - init_query_mad(in_mad); + ib_init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; in_mad->attr_mod = cpu_to_be32(index / 8); @@ -1006,7 +998,7 @@ static int mthca_init_node_data(struct mthca_dev *dev) if (!in_mad || !out_mad) goto out; - init_query_mad(in_mad); + ib_init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; err = mthca_MAD_IFC(dev, 1, 1, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index c51c3f40700e..56f06c68f31a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1363,7 +1363,7 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev) dev->hba_port_num = (hba_attribs->ptpnum_maxdoms_hbast_cv & OCRDMA_HBA_ATTRB_PTNUM_MASK) >> OCRDMA_HBA_ATTRB_PTNUM_SHIFT; - strlcpy(dev->model_number, + strscpy(dev->model_number, hba_attribs->controller_model_number, sizeof(dev->model_number)); } @@ -1506,7 +1506,6 @@ int ocrdma_mbx_dealloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd) static int ocrdma_mbx_alloc_pd_range(struct ocrdma_dev *dev) { int status = -ENOMEM; - size_t pd_bitmap_size; struct ocrdma_alloc_pd_range *cmd; struct ocrdma_alloc_pd_range_rsp *rsp; @@ -1528,10 +1527,8 @@ static int ocrdma_mbx_alloc_pd_range(struct ocrdma_dev *dev) dev->pd_mgr->pd_dpp_start = rsp->dpp_page_pdid & OCRDMA_ALLOC_PD_RNG_RSP_START_PDID_MASK; dev->pd_mgr->max_dpp_pd = rsp->pd_count; - pd_bitmap_size = - BITS_TO_LONGS(rsp->pd_count) * sizeof(long); - dev->pd_mgr->pd_dpp_bitmap = kzalloc(pd_bitmap_size, - GFP_KERNEL); + dev->pd_mgr->pd_dpp_bitmap = bitmap_zalloc(rsp->pd_count, + GFP_KERNEL); } kfree(cmd); } @@ -1547,9 +1544,8 @@ static int ocrdma_mbx_alloc_pd_range(struct ocrdma_dev *dev) dev->pd_mgr->pd_norm_start = rsp->dpp_page_pdid & OCRDMA_ALLOC_PD_RNG_RSP_START_PDID_MASK; dev->pd_mgr->max_normal_pd = rsp->pd_count; - pd_bitmap_size = BITS_TO_LONGS(rsp->pd_count) * sizeof(long); - dev->pd_mgr->pd_norm_bitmap = kzalloc(pd_bitmap_size, - GFP_KERNEL); + dev->pd_mgr->pd_norm_bitmap = bitmap_zalloc(rsp->pd_count, + GFP_KERNEL); } kfree(cmd); @@ -1611,8 +1607,8 @@ void ocrdma_alloc_pd_pool(struct ocrdma_dev *dev) static void ocrdma_free_pd_pool(struct ocrdma_dev *dev) { ocrdma_mbx_dealloc_pd_range(dev); - kfree(dev->pd_mgr->pd_norm_bitmap); - kfree(dev->pd_mgr->pd_dpp_bitmap); + bitmap_free(dev->pd_mgr->pd_norm_bitmap); + bitmap_free(dev->pd_mgr->pd_dpp_bitmap); kfree(dev->pd_mgr); } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 7abf6cf1e937..5d4b3bc16493 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -62,20 +62,6 @@ MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION); MODULE_AUTHOR("Emulex Corporation"); MODULE_LICENSE("Dual BSD/GPL"); -void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid) -{ - u8 mac_addr[6]; - - memcpy(&mac_addr[0], &dev->nic_info.mac_addr[0], ETH_ALEN); - guid[0] = mac_addr[0] ^ 2; - guid[1] = mac_addr[1]; - guid[2] = mac_addr[2]; - guid[3] = 0xff; - guid[4] = 0xfe; - guid[5] = mac_addr[3]; - guid[6] = mac_addr[4]; - guid[7] = mac_addr[5]; -} static enum rdma_link_layer ocrdma_link_layer(struct ib_device *device, u32 port_num) { @@ -203,7 +189,8 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) { int ret; - ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid); + addrconf_addr_eui48((u8 *)&dev->ibdev.node_guid, + dev->nic_info.mac_addr); BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC, sizeof(OCRDMA_NODE_DESC)); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 735123d0e9ec..dd4021b11963 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -41,6 +41,7 @@ */ #include <linux/dma-mapping.h> +#include <net/addrconf.h> #include <rdma/ib_verbs.h> #include <rdma/ib_user_verbs.h> #include <rdma/iw_cm.h> @@ -74,7 +75,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, memset(attr, 0, sizeof *attr); memcpy(&attr->fw_ver, &dev->attr.fw_ver[0], min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver))); - ocrdma_get_guid(dev, (u8 *)&attr->sys_image_guid); + addrconf_addr_eui48((u8 *)&attr->sys_image_guid, + dev->nic_info.mac_addr); attr->max_mr_size = dev->attr.max_mr_size; attr->page_size_cap = 0xffff000; attr->vendor_id = dev->nic_info.pdev->vendor; @@ -88,8 +90,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_SHUTDOWN_PORT | IB_DEVICE_SYS_IMAGE_GUID | - IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; + attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY; attr->max_send_sge = dev->attr.max_send_sge; attr->max_recv_sge = dev->attr.max_recv_sge; attr->max_sge_rd = dev->attr.max_rdma_sge; @@ -245,13 +247,13 @@ static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr, static u16 _ocrdma_pd_mgr_get_bitmap(struct ocrdma_dev *dev, bool dpp_pool) { u16 pd_bitmap_idx = 0; - const unsigned long *pd_bitmap; + unsigned long *pd_bitmap; if (dpp_pool) { pd_bitmap = dev->pd_mgr->pd_dpp_bitmap; pd_bitmap_idx = find_first_zero_bit(pd_bitmap, dev->pd_mgr->max_dpp_pd); - __set_bit(pd_bitmap_idx, dev->pd_mgr->pd_dpp_bitmap); + __set_bit(pd_bitmap_idx, pd_bitmap); dev->pd_mgr->pd_dpp_count++; if (dev->pd_mgr->pd_dpp_count > dev->pd_mgr->pd_dpp_thrsh) dev->pd_mgr->pd_dpp_thrsh = dev->pd_mgr->pd_dpp_count; @@ -259,7 +261,7 @@ static u16 _ocrdma_pd_mgr_get_bitmap(struct ocrdma_dev *dev, bool dpp_pool) pd_bitmap = dev->pd_mgr->pd_norm_bitmap; pd_bitmap_idx = find_first_zero_bit(pd_bitmap, dev->pd_mgr->max_normal_pd); - __set_bit(pd_bitmap_idx, dev->pd_mgr->pd_norm_bitmap); + __set_bit(pd_bitmap_idx, pd_bitmap); dev->pd_mgr->pd_norm_count++; if (dev->pd_mgr->pd_norm_count > dev->pd_mgr->pd_norm_thrsh) dev->pd_mgr->pd_norm_thrsh = dev->pd_mgr->pd_norm_count; @@ -1844,12 +1846,10 @@ int ocrdma_modify_srq(struct ib_srq *ibsrq, int ocrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) { - int status; struct ocrdma_srq *srq; srq = get_ocrdma_srq(ibsrq); - status = ocrdma_mbx_query_srq(srq, srq_attr); - return status; + return ocrdma_mbx_query_srq(srq, srq_attr); } int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) @@ -1960,7 +1960,6 @@ static int ocrdma_build_inline_sges(struct ocrdma_qp *qp, static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, const struct ib_send_wr *wr) { - int status; struct ocrdma_sge *sge; u32 wqe_size = sizeof(*hdr); @@ -1972,8 +1971,7 @@ static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, sge = (struct ocrdma_sge *)(hdr + 1); } - status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size); - return status; + return ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size); } static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index b73d742a520c..f860b7fcef33 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -59,7 +59,6 @@ int ocrdma_query_port(struct ib_device *ibdev, u32 port, enum rdma_protocol_type ocrdma_query_protocol(struct ib_device *device, u32 port_num); -void ocrdma_get_guid(struct ocrdma_dev *, u8 *guid); int ocrdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey); int ocrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 65ce6d0f1885..ba0c3e4c07d8 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -344,6 +344,10 @@ static int qedr_alloc_resources(struct qedr_dev *dev) if (IS_IWARP(dev)) { xa_init(&dev->qps); dev->iwarp_wq = create_singlethread_workqueue("qedr_iwarpq"); + if (!dev->iwarp_wq) { + rc = -ENOMEM; + goto err1; + } } /* Allocate Status blocks for CNQ */ @@ -351,7 +355,7 @@ static int qedr_alloc_resources(struct qedr_dev *dev) GFP_KERNEL); if (!dev->sb_array) { rc = -ENOMEM; - goto err1; + goto err_destroy_wq; } dev->cnq_array = kcalloc(dev->num_cnq, @@ -402,6 +406,9 @@ err3: kfree(dev->cnq_array); err2: kfree(dev->sb_array); +err_destroy_wq: + if (IS_IWARP(dev)) + destroy_workqueue(dev->iwarp_wq); err1: kfree(dev->sgid_tbl); return rc; @@ -500,7 +507,6 @@ static void qedr_sync_free_irqs(struct qedr_dev *dev) if (dev->int_info.msix_cnt) { idx = i * dev->num_hwfns + dev->affin_hwfn_idx; vector = dev->int_info.msix[idx].vector; - synchronize_irq(vector); free_irq(vector, &dev->cnq_array[i]); } } diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 8def88cfa300..db9ef3e1eb97 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -418,6 +418,7 @@ struct qedr_qp { u32 sq_psn; u32 qkey; u32 dest_qp_num; + u8 timeout; /* Relevant to qps created from kernel space only (ULPs) */ u8 prev_wqe_size; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 9100009f0a23..d745ce9dc88a 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -134,7 +134,8 @@ int qedr_query_device(struct ib_device *ibdev, attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe); attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD | IB_DEVICE_RC_RNR_NAK_GEN | - IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; + IB_DEVICE_MEM_MGT_EXTENSIONS; + attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY; if (!rdma_protocol_iwarp(&dev->ibdev, 1)) attr->device_cap_flags |= IB_DEVICE_XRC; @@ -1931,6 +1932,7 @@ static int qedr_create_user_qp(struct qedr_dev *dev, /* db offset was calculated in copy_qp_uresp, now set in the user q */ if (qedr_qp_has_sq(qp)) { qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset; + qp->sq.max_wr = attrs->cap.max_send_wr; rc = qedr_db_recovery_add(dev, qp->usq.db_addr, &qp->usq.db_rec_data->db_data, DB_REC_WIDTH_32B, @@ -1941,6 +1943,7 @@ static int qedr_create_user_qp(struct qedr_dev *dev, if (qedr_qp_has_rq(qp)) { qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset; + qp->rq.max_wr = attrs->cap.max_recv_wr; rc = qedr_db_recovery_add(dev, qp->urq.db_addr, &qp->urq.db_rec_data->db_data, DB_REC_WIDTH_32B, @@ -2610,6 +2613,8 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 1 << max_t(int, attr->timeout - 8, 0); else qp_params.ack_timeout = 0; + + qp->timeout = attr->timeout; } if (attr_mask & IB_QP_RETRY_CNT) { @@ -2769,7 +2774,7 @@ int qedr_query_qp(struct ib_qp *ibqp, rdma_ah_set_dgid_raw(&qp_attr->ah_attr, ¶ms.dgid.bytes[0]); rdma_ah_set_port_num(&qp_attr->ah_attr, 1); rdma_ah_set_sl(&qp_attr->ah_attr, 0); - qp_attr->timeout = params.timeout; + qp_attr->timeout = qp->timeout; qp_attr->rnr_retry = params.rnr_retry; qp_attr->retry_cnt = params.retry_cnt; qp_attr->min_rnr_timer = params.min_rnr_nak_timer; @@ -3079,7 +3084,7 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd, else DP_ERR(dev, "roce alloc tid returned error %d\n", rc); - goto err0; + goto err1; } /* Index only, 18 bit long, lkey = itid << 8 | key */ @@ -3103,7 +3108,7 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd, rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); if (rc) { DP_ERR(dev, "roce register tid returned an error %d\n", rc); - goto err1; + goto err2; } mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; @@ -3112,8 +3117,10 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd, DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey); return mr; -err1: +err2: dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); +err1: + qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); err0: kfree(mr); return ERR_PTR(rc); diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 9363bccfc6e7..26c615772be3 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -196,7 +196,7 @@ struct qib_ctxtdata { pid_t pid; pid_t subpid[QLOGIC_IB_MAX_SUBCTXT]; /* same size as task_struct .comm[], command that opened context */ - char comm[16]; + char comm[TASK_COMM_LEN]; /* pkeys set by this use of this ctxt */ u16 pkeys[4]; /* so file ops can get at unit */ @@ -321,7 +321,7 @@ struct qib_verbs_txreq { * These 7 values (SDR, DDR, and QDR may be ORed for auto-speed * negotiation) are used for the 3rd argument to path_f_set_ib_cfg * with cmd QIB_IB_CFG_SPD_ENB, by direct calls or via sysfs. They - * are also the the possible values for qib_link_speed_enabled and active + * are also the possible values for qib_link_speed_enabled and active * The values were chosen to match values used within the IB spec. */ #define QIB_IB_SDR 1 @@ -678,7 +678,7 @@ struct qib_pportdata { /* Observers. Not to be taken lightly, possibly not to ship. */ /* * If a diag read or write is to (bottom <= offset <= top), - * the "hoook" is called, allowing, e.g. shadows to be + * the "hook" is called, allowing, e.g. shadows to be * updated in sync with the driver. struct diag_observer * is the "visible" part. */ diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 63854f4b6524..3937144b2ae5 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -153,7 +153,7 @@ static int qib_get_base_info(struct file *fp, void __user *ubase, kinfo->spi_tidcnt += dd->rcvtidcnt % subctxt_cnt; /* * for this use, may be cfgctxts summed over all chips that - * are are configured and present + * are configured and present */ kinfo->spi_nctxts = dd->cfgctxts; /* unit (chip/board) our context is on */ @@ -851,7 +851,7 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma, ret = -EPERM; goto bail; } - /* don't allow them to later change to writeable with mprotect */ + /* don't allow them to later change to writable with mprotect */ vma->vm_flags &= ~VM_MAYWRITE; start = vma->vm_start; @@ -941,7 +941,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, goto bail; } /* - * Don't allow permission to later change to writeable + * Don't allow permission to later change to writable * with mprotect. */ vma->vm_flags &= ~VM_MAYWRITE; @@ -1321,7 +1321,7 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, rcd->tid_pg_list = ptmp; rcd->pid = current->pid; init_waitqueue_head(&dd->rcd[ctxt]->wait); - strlcpy(rcd->comm, current->comm, sizeof(rcd->comm)); + get_task_comm(rcd->comm, current); ctxt_fp(fp) = rcd; qib_stats.sps_ctxts++; dd->freectxts--; diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index a0c5f3bdc324..a973905afd13 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -32,7 +32,6 @@ * SOFTWARE. */ -#include <linux/module.h> #include <linux/fs.h> #include <linux/fs_context.h> #include <linux/mount.h> diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index a9b83bc13f4a..aea571943768 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -3030,7 +3030,7 @@ static int qib_6120_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs) /* Does read/modify/write to appropriate registers to * set output and direction bits selected by mask. - * these are in their canonical postions (e.g. lsb of + * these are in their canonical positions (e.g. lsb of * dir will end up in D48 of extctrl on existing chips). * returns contents of GP Inputs. */ diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index d1c0bc31869f..6af57067c32e 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -58,7 +58,7 @@ static void qib_set_ib_7220_lstate(struct qib_pportdata *, u16, u16); /* * This file contains almost all the chip-specific register information and * access functions for the QLogic QLogic_IB 7220 PCI-Express chip, with the - * exception of SerDes support, which in in qib_sd7220.c. + * exception of SerDes support, which in qib_sd7220.c. */ /* Below uses machine-generated qib_chipnum_regs.h file */ @@ -634,7 +634,7 @@ static const struct qib_hwerror_msgs qib_7220_hwerror_msgs[] = { QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIECPLTIMEOUT, "PCIe completion timeout"), /* - * In practice, it's unlikely wthat we'll see PCIe PLL, or bus + * In practice, it's unlikely that we'll see PCIe PLL, or bus * parity or memory parity error failures, because most likely we * won't be able to talk to the core of the chip. Nonetheless, we * might see them, if they are in parts of the PCIe core that aren't @@ -2988,7 +2988,7 @@ done: * the utility. Names need to be 12 chars or less (w/o newline), for proper * display by utility. * Non-error counters are first. - * Start of "error" conters is indicated by a leading "E " on the first + * Start of "error" counters is indicated by a leading "E " on the first * "error" counter, and doesn't count in label length. * The EgrOvfl list needs to be last so we truncate them at the configured * context count for the device. @@ -3742,7 +3742,7 @@ static int qib_7220_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs) /* * Does read/modify/write to appropriate registers to * set output and direction bits selected by mask. - * these are in their canonical postions (e.g. lsb of + * these are in their canonical positions (e.g. lsb of * dir will end up in D48 of extctrl on existing chips). * returns contents of GP Inputs. */ diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index ab98b6a3ae1e..9d2dd135b784 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -2124,7 +2124,7 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg, if (hwerrs & HWE_MASK(PowerOnBISTFailed)) { isfatal = 1; - strlcpy(msg, + strscpy(msg, "[Memory BIST test failed, InfiniPath hardware unusable]", msgl); /* ignore from now on, so disable until driver reloaded */ @@ -2850,9 +2850,9 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd) qib_7322_free_irq(dd); kfree(dd->cspec->cntrs); - kfree(dd->cspec->sendchkenable); - kfree(dd->cspec->sendgrhchk); - kfree(dd->cspec->sendibchk); + bitmap_free(dd->cspec->sendchkenable); + bitmap_free(dd->cspec->sendgrhchk); + bitmap_free(dd->cspec->sendibchk); kfree(dd->cspec->msix_entries); for (i = 0; i < dd->num_pports; i++) { unsigned long flags; @@ -5665,7 +5665,7 @@ static int qib_7322_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs) /* * Does read/modify/write to appropriate registers to * set output and direction bits selected by mask. - * these are in their canonical postions (e.g. lsb of + * these are in their canonical positions (e.g. lsb of * dir will end up in D48 of extctrl on existing chips). * returns contents of GP Inputs. */ @@ -6383,18 +6383,11 @@ static int qib_init_7322_variables(struct qib_devdata *dd) features = qib_7322_boardname(dd); /* now that piobcnt2k and 4k set, we can allocate these */ - sbufcnt = dd->piobcnt2k + dd->piobcnt4k + - NUM_VL15_BUFS + BITS_PER_LONG - 1; - sbufcnt /= BITS_PER_LONG; - dd->cspec->sendchkenable = - kmalloc_array(sbufcnt, sizeof(*dd->cspec->sendchkenable), - GFP_KERNEL); - dd->cspec->sendgrhchk = - kmalloc_array(sbufcnt, sizeof(*dd->cspec->sendgrhchk), - GFP_KERNEL); - dd->cspec->sendibchk = - kmalloc_array(sbufcnt, sizeof(*dd->cspec->sendibchk), - GFP_KERNEL); + sbufcnt = dd->piobcnt2k + dd->piobcnt4k + NUM_VL15_BUFS; + + dd->cspec->sendchkenable = bitmap_zalloc(sbufcnt, GFP_KERNEL); + dd->cspec->sendgrhchk = bitmap_zalloc(sbufcnt, GFP_KERNEL); + dd->cspec->sendibchk = bitmap_zalloc(sbufcnt, GFP_KERNEL); if (!dd->cspec->sendchkenable || !dd->cspec->sendgrhchk || !dd->cspec->sendibchk) { ret = -ENOMEM; diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index d1a72e89e297..45211008449f 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1106,8 +1106,7 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) if (!qib_cpulist_count) { u32 count = num_online_cpus(); - qib_cpulist = kcalloc(BITS_TO_LONGS(count), sizeof(long), - GFP_KERNEL); + qib_cpulist = bitmap_zalloc(count, GFP_KERNEL); if (qib_cpulist) qib_cpulist_count = count; } @@ -1279,7 +1278,7 @@ static void __exit qib_ib_cleanup(void) #endif qib_cpulist_count = 0; - kfree(qib_cpulist); + bitmap_free(qib_cpulist); WARN_ON(!xa_empty(&qib_dev_table)); qib_dev_cleanup(); diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index cb2a02d671e2..692b64efad97 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -295,7 +295,7 @@ void qib_free_irq(struct qib_devdata *dd) * Setup pcie interrupt stuff again after a reset. I'd like to just call * pci_enable_msi() again for msi, but when I do that, * the MSI enable bit doesn't get set in the command word, and - * we switch to to a different interrupt vector, which is confusing, + * we switch to a different interrupt vector, which is confusing, * so I instead just do it all inline. Perhaps somehow can tie this * into the PCIe hotplug support at some point */ diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c index 81b810d006c0..1dc3ccf0cf1f 100644 --- a/drivers/infiniband/hw/qib/qib_sd7220.c +++ b/drivers/infiniband/hw/qib/qib_sd7220.c @@ -587,7 +587,7 @@ static int epb_access(struct qib_devdata *dd, int sdnum, int claim) /* Need to release */ u64 pollval; /* - * The only writeable bits are the request and CS. + * The only writable bits are the request and CS. * Both should be clear */ u64 newval = 0; diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 0a3b28142c05..41c272980f91 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -541,7 +541,7 @@ static struct attribute *port_diagc_attributes[] = { }; static const struct attribute_group port_diagc_group = { - .name = "linkcontrol", + .name = "diag_counters", .attrs = port_diagc_attributes, }; diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index ac11943a5ddb..bf2f30d67949 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -941,7 +941,7 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, &addrlimit) || addrlimit > type_max(typeof(pkt->addrlimit))) { ret = -EINVAL; - goto free_pbc; + goto free_pkt; } pkt->addrlimit = addrlimit; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index ef91bff5c23c..0080f0be72fe 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -425,7 +425,7 @@ static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) } #endif -static void copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss, +static void qib_copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss, u32 length, unsigned flush_wc) { u32 extra = 0; @@ -975,7 +975,7 @@ static int qib_verbs_send_pio(struct rvt_qp *qp, struct ib_header *ibhdr, qib_pio_copy(piobuf, addr, dwords); goto done; } - copy_io(piobuf, ss, len, flush_wc); + qib_copy_io(piobuf, ss, len, flush_wc); done: if (dd->flags & QIB_USE_SPCL_TRIG) { u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023; diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.c b/drivers/infiniband/hw/usnic/usnic_debugfs.c index e5a3f02fb078..10a8cd5ba076 100644 --- a/drivers/infiniband/hw/usnic/usnic_debugfs.c +++ b/drivers/infiniband/hw/usnic/usnic_debugfs.c @@ -32,7 +32,6 @@ */ #include <linux/debugfs.h> -#include <linux/module.h> #include "usnic.h" #include "usnic_log.h" diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index d346dd48e731..46653ad56f5a 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -534,6 +534,11 @@ static int usnic_ib_pci_probe(struct pci_dev *pdev, struct usnic_ib_vf *vf; enum usnic_vnic_res_type res_type; + if (!device_iommu_mapped(&pdev->dev)) { + usnic_err("IOMMU required but not present or enabled. USNIC QPs will not function w/o enabling IOMMU\n"); + return -EPERM; + } + vf = kzalloc(sizeof(*vf), GFP_KERNEL); if (!vf) return -ENOMEM; @@ -642,12 +647,6 @@ static int __init usnic_ib_init(void) printk_once(KERN_INFO "%s", usnic_version); - err = usnic_uiom_init(DRV_NAME); - if (err) { - usnic_err("Unable to initialize umem with err %d\n", err); - return err; - } - err = pci_register_driver(&usnic_ib_pci_driver); if (err) { usnic_err("Unable to register with PCI\n"); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c index 3b60fa9cb58d..59bfbfaee325 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c @@ -32,7 +32,6 @@ */ #include <linux/bug.h> #include <linux/errno.h> -#include <linux/module.h> #include <linux/spinlock.h> #include "usnic_log.h" diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c index 586b0e52ba7f..fdb63a8fb997 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c @@ -31,7 +31,6 @@ * */ -#include <linux/module.h> #include <linux/init.h> #include <linux/errno.h> @@ -243,10 +242,11 @@ static struct attribute *usnic_ib_qpn_default_attrs[] = { &qpn_attr_summary.attr, NULL }; +ATTRIBUTE_GROUPS(usnic_ib_qpn_default); static struct kobj_type usnic_ib_qpn_type = { .sysfs_ops = &usnic_ib_qpn_sysfs_ops, - .default_attrs = usnic_ib_qpn_default_attrs + .default_groups = usnic_ib_qpn_default_groups, }; int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 756a83bcff58..6e8c4fbb8083 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -30,7 +30,6 @@ * SOFTWARE. * */ -#include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/errno.h> @@ -306,7 +305,8 @@ int usnic_ib_query_device(struct ib_device *ibdev, props->max_qp = qp_per_vf * kref_read(&us_ibdev->vf_cnt); props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT | - IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; + IB_DEVICE_SYS_IMAGE_GUID; + props->kernel_cap_flags = IBK_BLOCK_MULTICAST_LOOPBACK; props->max_cq = us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ] * kref_read(&us_ibdev->vf_cnt); props->max_pd = USNIC_UIOM_MAX_PD_CNT; @@ -442,12 +442,10 @@ int usnic_ib_query_gid(struct ib_device *ibdev, u32 port, int index, int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct usnic_ib_pd *pd = to_upd(ibpd); - void *umem_pd; - umem_pd = pd->umem_pd = usnic_uiom_alloc_pd(); - if (IS_ERR_OR_NULL(umem_pd)) { - return umem_pd ? PTR_ERR(umem_pd) : -ENOMEM; - } + pd->umem_pd = usnic_uiom_alloc_pd(ibpd->device->dev.parent); + if (IS_ERR(pd->umem_pd)) + return PTR_ERR(pd->umem_pd); return 0; } @@ -709,4 +707,3 @@ int usnic_ib_mmap(struct ib_ucontext *context, usnic_err("No VF %u found\n", vfid); return -EINVAL; } - diff --git a/drivers/infiniband/hw/usnic/usnic_transport.c b/drivers/infiniband/hw/usnic/usnic_transport.c index 82dd810bc000..dc37066900a5 100644 --- a/drivers/infiniband/hw/usnic/usnic_transport.c +++ b/drivers/infiniband/hw/usnic/usnic_transport.c @@ -32,7 +32,6 @@ */ #include <linux/bitmap.h> #include <linux/file.h> -#include <linux/module.h> #include <linux/slab.h> #include <net/inet_sock.h> diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 760b254ba42d..67923ced6e2d 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -40,7 +40,6 @@ #include <linux/iommu.h> #include <linux/workqueue.h> #include <linux/list.h> -#include <linux/pci.h> #include <rdma/ib_verbs.h> #include "usnic_log.h" @@ -96,7 +95,6 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, int ret; int off; int i; - int flags; dma_addr_t pa; unsigned int gup_flags; struct mm_struct *mm; @@ -133,8 +131,6 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, goto out; } - flags = IOMMU_READ | IOMMU_CACHE; - flags |= (writable) ? IOMMU_WRITE : 0; gup_flags = FOLL_WRITE; gup_flags |= (writable) ? 0 : FOLL_FORCE; cur_base = addr & PAGE_MASK; @@ -439,7 +435,7 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr) __usnic_uiom_release_tail(uiomr); } -struct usnic_uiom_pd *usnic_uiom_alloc_pd(void) +struct usnic_uiom_pd *usnic_uiom_alloc_pd(struct device *dev) { struct usnic_uiom_pd *pd; void *domain; @@ -448,7 +444,7 @@ struct usnic_uiom_pd *usnic_uiom_alloc_pd(void) if (!pd) return ERR_PTR(-ENOMEM); - pd->domain = domain = iommu_domain_alloc(&pci_bus_type); + pd->domain = domain = iommu_domain_alloc(dev->bus); if (!domain) { usnic_err("Failed to allocate IOMMU domain"); kfree(pd); @@ -483,7 +479,7 @@ int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev) if (err) goto out_free_dev; - if (!iommu_capable(dev->bus, IOMMU_CAP_CACHE_COHERENCY)) { + if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) { usnic_err("IOMMU of %s does not support cache coherency\n", dev_name(dev)); err = -EINVAL; @@ -556,13 +552,3 @@ void usnic_uiom_free_dev_list(struct device **devs) { kfree(devs); } - -int usnic_uiom_init(char *drv_name) -{ - if (!iommu_present(&pci_bus_type)) { - usnic_err("IOMMU required but not present or enabled. USNIC QPs will not function w/o enabling IOMMU\n"); - return -EPERM; - } - - return 0; -} diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h index 7ec8991ace67..5a9acf941510 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.h +++ b/drivers/infiniband/hw/usnic/usnic_uiom.h @@ -80,7 +80,7 @@ struct usnic_uiom_chunk { struct scatterlist page_list[]; }; -struct usnic_uiom_pd *usnic_uiom_alloc_pd(void); +struct usnic_uiom_pd *usnic_uiom_alloc_pd(struct device *dev); void usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd); int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev); void usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd, @@ -91,5 +91,4 @@ struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd, unsigned long addr, size_t size, int access, int dmasync); void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr); -int usnic_uiom_init(char *drv_name); #endif /* USNIC_UIOM_H_ */ diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.c b/drivers/infiniband/hw/usnic/usnic_vnic.c index ebe08f348453..0c47f73aaed5 100644 --- a/drivers/infiniband/hw/usnic/usnic_vnic.c +++ b/drivers/infiniband/hw/usnic/usnic_vnic.c @@ -31,7 +31,6 @@ * */ #include <linux/errno.h> -#include <linux/module.h> #include <linux/pci.h> #include "usnic_ib.h" diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c index bf51357ea3aa..9a4de962e947 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c @@ -63,12 +63,12 @@ int pvrdma_uar_table_init(struct pvrdma_dev *dev) tbl->max = num; tbl->mask = mask; spin_lock_init(&tbl->lock); - tbl->table = kcalloc(BITS_TO_LONGS(num), sizeof(long), GFP_KERNEL); + tbl->table = bitmap_zalloc(num, GFP_KERNEL); if (!tbl->table) return -ENOMEM; /* 0th UAR is taken by the device. */ - set_bit(0, tbl->table); + __set_bit(0, tbl->table); return 0; } @@ -77,7 +77,7 @@ void pvrdma_uar_table_cleanup(struct pvrdma_dev *dev) { struct pvrdma_id_table *tbl = &dev->uar_table.tbl; - kfree(tbl->table); + bitmap_free(tbl->table); } int pvrdma_uar_alloc(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar) @@ -100,7 +100,7 @@ int pvrdma_uar_alloc(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar) return -ENOMEM; } - set_bit(obj, tbl->table); + __set_bit(obj, tbl->table); obj |= tbl->top; spin_unlock_irqrestore(&tbl->lock, flags); @@ -120,7 +120,7 @@ void pvrdma_uar_free(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar) obj = uar->index & (tbl->max - 1); spin_lock_irqsave(&tbl->lock, flags); - clear_bit(obj, tbl->table); + __clear_bit(obj, tbl->table); tbl->last = min(tbl->last, obj); tbl->top = (tbl->top + tbl->max) & tbl->mask; spin_unlock_irqrestore(&tbl->lock, flags); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 105f3a155939..343288b02792 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -811,12 +811,10 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, } /* Enable 64-Bit DMA */ - if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) != 0) { - ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); - if (ret != 0) { - dev_err(&pdev->dev, "dma_set_mask failed\n"); - goto err_free_resource; - } + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) { + dev_err(&pdev->dev, "dma_set_mask failed\n"); + goto err_free_resource; } dma_set_max_seg_size(&pdev->dev, UINT_MAX); pci_set_master(pdev); diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 3305f2744bfa..3acab569fbb9 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -2775,7 +2775,7 @@ void rvt_qp_iter(struct rvt_dev_info *rdi, EXPORT_SYMBOL(rvt_qp_iter); /* - * This should be called with s_lock held. + * This should be called with s_lock and r_lock held. */ void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status) @@ -3073,6 +3073,8 @@ do_write: case IB_WR_ATOMIC_FETCH_AND_ADD: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) goto inv_err; + if (unlikely(wqe->atomic_wr.remote_addr & (sizeof(u64) - 1))) + goto inv_err; if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), wqe->atomic_wr.remote_addr, wqe->atomic_wr.rkey, @@ -3132,7 +3134,9 @@ send_comp: rvp->n_loop_pkts++; flush_send: sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; + spin_lock(&sqp->r_lock); rvt_send_complete(sqp, wqe, send_status); + spin_unlock(&sqp->r_lock); if (local_ops) { atomic_dec(&sqp->local_ops_pending); local_ops = 0; @@ -3186,9 +3190,15 @@ serr: spin_unlock_irqrestore(&qp->r_lock, flags); serr_no_r_lock: spin_lock_irqsave(&sqp->s_lock, flags); + spin_lock(&sqp->r_lock); rvt_send_complete(sqp, wqe, send_status); + spin_unlock(&sqp->r_lock); if (sqp->ibqp.qp_type == IB_QPT_RC) { - int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR); + int lastwqe; + + spin_lock(&sqp->r_lock); + lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR); + spin_unlock(&sqp->r_lock); sqp->s_flags &= ~RVT_S_BUSY; spin_unlock_irqrestore(&sqp->s_lock, flags); diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 59481ae39505..d61f8de7f21c 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -15,7 +15,7 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("RDMA Verbs Transport Library"); -static int rvt_init(void) +static int __init rvt_init(void) { int ret = rvt_driver_cq_init(); @@ -26,7 +26,7 @@ static int rvt_init(void) } module_init(rvt_init); -static void rvt_cleanup(void) +static void __exit rvt_cleanup(void) { rvt_cq_exit(); } diff --git a/drivers/infiniband/sw/rxe/Makefile b/drivers/infiniband/sw/rxe/Makefile index 1e24673e9318..5395a581f4bb 100644 --- a/drivers/infiniband/sw/rxe/Makefile +++ b/drivers/infiniband/sw/rxe/Makefile @@ -22,5 +22,4 @@ rdma_rxe-y := \ rxe_mcast.o \ rxe_task.o \ rxe_net.o \ - rxe_sysfs.o \ rxe_hw_counters.o diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 8e0f9c489cab..51daac5c4feb 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -13,8 +13,6 @@ MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib"); MODULE_DESCRIPTION("Soft RDMA transport"); MODULE_LICENSE("Dual BSD/GPL"); -bool rxe_initialized; - /* free resources for a rxe device all objects created for this device must * have been destroyed */ @@ -30,8 +28,8 @@ void rxe_dealloc(struct ib_device *ib_dev) rxe_pool_cleanup(&rxe->cq_pool); rxe_pool_cleanup(&rxe->mr_pool); rxe_pool_cleanup(&rxe->mw_pool); - rxe_pool_cleanup(&rxe->mc_grp_pool); - rxe_pool_cleanup(&rxe->mc_elem_pool); + + WARN_ON(!RB_EMPTY_ROOT(&rxe->mcg_tree)); if (rxe->tfm) crypto_free_shash(rxe->tfm); @@ -48,6 +46,7 @@ static void rxe_init_device_param(struct rxe_dev *rxe) rxe->attr.max_qp = RXE_MAX_QP; rxe->attr.max_qp_wr = RXE_MAX_QP_WR; rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS; + rxe->attr.kernel_cap_flags = IBK_ALLOW_USER_UNREG; rxe->attr.max_send_sge = RXE_MAX_SGE; rxe->attr.max_recv_sge = RXE_MAX_SGE; rxe->attr.max_sge_rd = RXE_MAX_SGE_RD; @@ -116,106 +115,37 @@ static void rxe_init_ports(struct rxe_dev *rxe) } /* init pools of managed objects */ -static int rxe_init_pools(struct rxe_dev *rxe) +static void rxe_init_pools(struct rxe_dev *rxe) { - int err; - - err = rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC, - rxe->max_ucontext); - if (err) - goto err1; - - err = rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD, - rxe->attr.max_pd); - if (err) - goto err2; - - err = rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH, - rxe->attr.max_ah); - if (err) - goto err3; - - err = rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ, - rxe->attr.max_srq); - if (err) - goto err4; - - err = rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP, - rxe->attr.max_qp); - if (err) - goto err5; - - err = rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ, - rxe->attr.max_cq); - if (err) - goto err6; - - err = rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR, - rxe->attr.max_mr); - if (err) - goto err7; - - err = rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW, - rxe->attr.max_mw); - if (err) - goto err8; - - err = rxe_pool_init(rxe, &rxe->mc_grp_pool, RXE_TYPE_MC_GRP, - rxe->attr.max_mcast_grp); - if (err) - goto err9; - - err = rxe_pool_init(rxe, &rxe->mc_elem_pool, RXE_TYPE_MC_ELEM, - rxe->attr.max_total_mcast_qp_attach); - if (err) - goto err10; - - return 0; - -err10: - rxe_pool_cleanup(&rxe->mc_grp_pool); -err9: - rxe_pool_cleanup(&rxe->mw_pool); -err8: - rxe_pool_cleanup(&rxe->mr_pool); -err7: - rxe_pool_cleanup(&rxe->cq_pool); -err6: - rxe_pool_cleanup(&rxe->qp_pool); -err5: - rxe_pool_cleanup(&rxe->srq_pool); -err4: - rxe_pool_cleanup(&rxe->ah_pool); -err3: - rxe_pool_cleanup(&rxe->pd_pool); -err2: - rxe_pool_cleanup(&rxe->uc_pool); -err1: - return err; + rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC); + rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD); + rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH); + rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ); + rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP); + rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ); + rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR); + rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW); } /* initialize rxe device state */ -static int rxe_init(struct rxe_dev *rxe) +static void rxe_init(struct rxe_dev *rxe) { - int err; - /* init default device parameters */ rxe_init_device_param(rxe); rxe_init_ports(rxe); - - err = rxe_init_pools(rxe); - if (err) - return err; + rxe_init_pools(rxe); /* init pending mmap list */ spin_lock_init(&rxe->mmap_offset_lock); spin_lock_init(&rxe->pending_lock); INIT_LIST_HEAD(&rxe->pending_mmaps); - mutex_init(&rxe->usdev_lock); + /* init multicast support */ + spin_lock_init(&rxe->mcg_lock); + rxe->mcg_tree = RB_ROOT; - return 0; + mutex_init(&rxe->usdev_lock); } void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) @@ -237,12 +167,7 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) */ int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name) { - int err; - - err = rxe_init(rxe); - if (err) - return err; - + rxe_init(rxe); rxe_set_mtu(rxe, mtu); return rxe_register_device(rxe, ibdev_name); @@ -290,7 +215,6 @@ static int __init rxe_module_init(void) return err; rdma_link_register(&rxe_link_ops); - rxe_initialized = true; pr_info("loaded\n"); return 0; } @@ -301,7 +225,6 @@ static void __exit rxe_module_exit(void) ib_unregister_driver(RDMA_DRIVER_RXE); rxe_net_exit(); - rxe_initialized = false; pr_info("unloaded\n"); } diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h index 1bb3fb618bf5..30fbdf3bc76a 100644 --- a/drivers/infiniband/sw/rxe/rxe.h +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -12,7 +12,6 @@ #endif #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/module.h> #include <linux/skbuff.h> #include <rdma/ib_verbs.h> @@ -39,8 +38,6 @@ #define RXE_ROCE_V2_SPORT (0xc000) -extern bool rxe_initialized; - void rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu); int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name); diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index 38c7b6fb39d7..3b05314ca739 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -99,11 +99,14 @@ void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr) av->network_type = type; } -struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) +struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp) { struct rxe_ah *ah; u32 ah_num; + if (ahp) + *ahp = NULL; + if (!pkt || !pkt->qp) return NULL; @@ -117,10 +120,22 @@ struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) if (ah_num) { /* only new user provider or kernel client */ ah = rxe_pool_get_index(&pkt->rxe->ah_pool, ah_num); - if (!ah || ah->ah_num != ah_num || rxe_ah_pd(ah) != pkt->qp->pd) { + if (!ah) { pr_warn("Unable to find AH matching ah_num\n"); return NULL; } + + if (rxe_ah_pd(ah) != pkt->qp->pd) { + pr_warn("PDs don't match for AH and QP\n"); + rxe_put(ah); + return NULL; + } + + if (ahp) + *ahp = ah; + else + rxe_put(ah); + return &ah->av; } diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index d771ba8449a1..fb0c008af78c 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -114,6 +114,8 @@ void retransmit_timer(struct timer_list *t) { struct rxe_qp *qp = from_timer(qp, t, retrans_timer); + pr_debug("%s: fired for qp#%d\n", __func__, qp->elem.index); + if (qp->valid) { qp->comp.timeout = 1; rxe_run_task(&qp->comp.task, 1); @@ -458,8 +460,6 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, struct rxe_send_wqe *wqe) { - unsigned long flags; - if (wqe->has_rd_atomic) { wqe->has_rd_atomic = 0; atomic_inc(&qp->req.rd_atomic); @@ -472,11 +472,11 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp, if (unlikely(qp->req.state == QP_STATE_DRAIN)) { /* state_lock used by requester & completer */ - spin_lock_irqsave(&qp->state_lock, flags); + spin_lock_bh(&qp->state_lock); if ((qp->req.state == QP_STATE_DRAIN) && (qp->comp.psn == qp->req.psn)) { qp->req.state = QP_STATE_DRAINED; - spin_unlock_irqrestore(&qp->state_lock, flags); + spin_unlock_bh(&qp->state_lock); if (qp->ibqp.event_handler) { struct ib_event ev; @@ -488,7 +488,7 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp, qp->ibqp.qp_context); } } else { - spin_unlock_irqrestore(&qp->state_lock, flags); + spin_unlock_bh(&qp->state_lock); } } @@ -528,7 +528,7 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify) struct rxe_queue *q = qp->sq.queue; while ((skb = skb_dequeue(&qp->resp_pkts))) { - rxe_drop_ref(qp); + rxe_put(qp); kfree_skb(skb); ib_device_put(qp->ibqp.device); } @@ -550,7 +550,7 @@ static void free_pkt(struct rxe_pkt_info *pkt) struct ib_device *dev = qp->ibqp.device; kfree_skb(skb); - rxe_drop_ref(qp); + rxe_put(qp); ib_device_put(dev); } @@ -562,16 +562,16 @@ int rxe_completer(void *arg) struct sk_buff *skb = NULL; struct rxe_pkt_info *pkt = NULL; enum comp_state state; - int ret = 0; + int ret; - rxe_add_ref(qp); + if (!rxe_get(qp)) + return -EAGAIN; - if (!qp->valid || qp->req.state == QP_STATE_ERROR || - qp->req.state == QP_STATE_RESET) { + if (!qp->valid || qp->comp.state == QP_STATE_ERROR || + qp->comp.state == QP_STATE_RESET) { rxe_drain_resp_pkts(qp, qp->valid && - qp->req.state == QP_STATE_ERROR); - ret = -EAGAIN; - goto done; + qp->comp.state == QP_STATE_ERROR); + goto exit; } if (qp->comp.timeout) { @@ -581,10 +581,8 @@ int rxe_completer(void *arg) qp->comp.timeout_retry = 0; } - if (qp->req.need_retry) { - ret = -EAGAIN; - goto done; - } + if (qp->req.need_retry) + goto exit; state = COMPST_GET_ACK; @@ -677,8 +675,7 @@ int rxe_completer(void *arg) qp->qp_timeout_jiffies) mod_timer(&qp->retrans_timer, jiffies + qp->qp_timeout_jiffies); - ret = -EAGAIN; - goto done; + goto exit; case COMPST_ERROR_RETRY: /* we come here if the retry timer fired and we did @@ -690,10 +687,8 @@ int rxe_completer(void *arg) */ /* there is nothing to retry in this case */ - if (!wqe || (wqe->state == wqe_state_posted)) { - ret = -EAGAIN; - goto done; - } + if (!wqe || (wqe->state == wqe_state_posted)) + goto exit; /* if we've started a retry, don't start another * retry sequence, unless this is a timeout. @@ -731,18 +726,21 @@ int rxe_completer(void *arg) break; case COMPST_RNR_RETRY: + /* we come here if we received an RNR NAK */ if (qp->comp.rnr_retry > 0) { if (qp->comp.rnr_retry != 7) qp->comp.rnr_retry--; - qp->req.need_retry = 1; + /* don't start a retry flow until the + * rnr timer has fired + */ + qp->req.wait_for_rnr_timer = 1; pr_debug("qp#%d set rnr nak timer\n", qp_num(qp)); mod_timer(&qp->rnr_nak_timer, jiffies + rnrnak_jiffies(aeth_syn(pkt) & ~AETH_TYPE_MASK)); - ret = -EAGAIN; - goto done; + goto exit; } else { rxe_counter_inc(rxe, RXE_CNT_RNR_RETRY_EXCEEDED); @@ -755,15 +753,23 @@ int rxe_completer(void *arg) WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS); do_complete(qp, wqe); rxe_qp_error(qp); - ret = -EAGAIN; - goto done; + goto exit; } } + /* A non-zero return value will cause rxe_do_task to + * exit its loop and end the tasklet. A zero return + * will continue looping and return to rxe_completer + */ done: + ret = 0; + goto out; +exit: + ret = -EAGAIN; +out: if (pkt) free_pkt(pkt); - rxe_drop_ref(qp); + rxe_put(qp); return ret; } diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index 6848426c074f..b1a0ab3cd4bd 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -19,16 +19,16 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, } if (cqe > rxe->attr.max_cqe) { - pr_warn("cqe(%d) > max_cqe(%d)\n", - cqe, rxe->attr.max_cqe); + pr_debug("cqe(%d) > max_cqe(%d)\n", + cqe, rxe->attr.max_cqe); goto err1; } if (cq) { count = queue_count(cq->queue, QUEUE_TYPE_TO_CLIENT); if (cqe < count) { - pr_warn("cqe(%d) < current # elements in queue (%d)", - cqe, count); + pr_debug("cqe(%d) < current # elements in queue (%d)", + cqe, count); goto err1; } } @@ -106,9 +106,9 @@ int rxe_cq_resize_queue(struct rxe_cq *cq, int cqe, int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) { struct ib_event ev; - unsigned long flags; int full; void *addr; + unsigned long flags; spin_lock_irqsave(&cq->cq_lock, flags); @@ -150,9 +150,9 @@ void rxe_cq_disable(struct rxe_cq *cq) spin_unlock_irqrestore(&cq->cq_lock, flags); } -void rxe_cq_cleanup(struct rxe_pool_entry *arg) +void rxe_cq_cleanup(struct rxe_pool_elem *elem) { - struct rxe_cq *cq = container_of(arg, typeof(*cq), pelem); + struct rxe_cq *cq = container_of(elem, typeof(*cq), elem); if (cq->queue) rxe_queue_cleanup(cq->queue); diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c index e03af3012590..46bb07c5c4df 100644 --- a/drivers/infiniband/sw/rxe/rxe_icrc.c +++ b/drivers/infiniband/sw/rxe/rxe_icrc.c @@ -151,18 +151,8 @@ int rxe_icrc_check(struct sk_buff *skb, struct rxe_pkt_info *pkt) payload_size(pkt) + bth_pad(pkt)); icrc = ~icrc; - if (unlikely(icrc != pkt_icrc)) { - if (skb->protocol == htons(ETH_P_IPV6)) - pr_warn_ratelimited("bad ICRC from %pI6c\n", - &ipv6_hdr(skb)->saddr); - else if (skb->protocol == htons(ETH_P_IP)) - pr_warn_ratelimited("bad ICRC from %pI4\n", - &ip_hdr(skb)->saddr); - else - pr_warn_ratelimited("bad ICRC from unknown\n"); - + if (unlikely(icrc != pkt_icrc)) return -EINVAL; - } return 0; } diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 1ca43b859d80..c2a5c8814a48 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -19,7 +19,7 @@ void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr); void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr); -struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt); +struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp); /* rxe_cq.c */ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, @@ -37,21 +37,13 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited); void rxe_cq_disable(struct rxe_cq *cq); -void rxe_cq_cleanup(struct rxe_pool_entry *arg); +void rxe_cq_cleanup(struct rxe_pool_elem *elem); /* rxe_mcast.c */ -int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid, - struct rxe_mc_grp **grp_p); - -int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, - struct rxe_mc_grp *grp); - -int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, - union ib_gid *mgid); - -void rxe_drop_all_mcast_groups(struct rxe_qp *qp); - -void rxe_mc_cleanup(struct rxe_pool_entry *arg); +struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid); +int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid); +int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid); +void rxe_cleanup_mcg(struct kref *kref); /* rxe_mmap.c */ struct rxe_mmap_info { @@ -72,10 +64,10 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); /* rxe_mr.c */ u8 rxe_get_next_key(u32 last_key); -void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr); -int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, +void rxe_mr_init_dma(int access, struct rxe_mr *mr); +int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, int access, struct rxe_mr *mr); -int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr); +int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr); int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, enum rxe_mr_copy_dir dir); int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma, @@ -85,11 +77,10 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, enum rxe_mr_lookup_type type); int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length); int advance_dma_data(struct rxe_dma_info *dma, unsigned int length); -int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey); +int rxe_invalidate_mr(struct rxe_qp *qp, u32 key); int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe); -int rxe_mr_set_page(struct ib_mr *ibmr, u64 addr); int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); -void rxe_mr_cleanup(struct rxe_pool_entry *arg); +void rxe_mr_cleanup(struct rxe_pool_elem *elem); /* rxe_mw.c */ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata); @@ -97,41 +88,32 @@ int rxe_dealloc_mw(struct ib_mw *ibmw); int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe); int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey); struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey); -void rxe_mw_cleanup(struct rxe_pool_entry *arg); +void rxe_mw_cleanup(struct rxe_pool_elem *elem); /* rxe_net.c */ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, int paylen, struct rxe_pkt_info *pkt); -int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb); +int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt, + struct sk_buff *skb); int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, struct sk_buff *skb); const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num); -int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid); -int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid); /* rxe_qp.c */ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init); - int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, struct ib_qp_init_attr *init, struct rxe_create_qp_resp __user *uresp, struct ib_pd *ibpd, struct ib_udata *udata); - int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init); - int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, struct ib_qp_attr *attr, int mask); - int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, struct ib_udata *udata); - int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask); - void rxe_qp_error(struct rxe_qp *qp); - -void rxe_qp_destroy(struct rxe_qp *qp); - -void rxe_qp_cleanup(struct rxe_pool_entry *arg); +int rxe_qp_chk_destroy(struct rxe_qp *qp); +void rxe_qp_cleanup(struct rxe_pool_elem *elem); static inline int qp_num(struct rxe_qp *qp) { @@ -162,7 +144,7 @@ static inline int rcv_wqe_size(int max_sge) max_sge * sizeof(struct ib_sge); } -void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res); +void free_rd_atomic_resource(struct resp_res *res); static inline void rxe_advance_resp_resource(struct rxe_qp *qp) { @@ -175,18 +157,16 @@ void retransmit_timer(struct timer_list *t); void rnr_nak_timer(struct timer_list *t); /* rxe_srq.c */ -#define IB_SRQ_INIT_MASK (~IB_SRQ_LIMIT) - -int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, - struct ib_srq_attr *attr, enum ib_srq_attr_mask mask); - +int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init); int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_init_attr *init, struct ib_udata *udata, struct rxe_create_srq_resp __user *uresp); - +int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, + struct ib_srq_attr *attr, enum ib_srq_attr_mask mask); int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, struct rxe_modify_srq_cmd *ucmd, struct ib_udata *udata); +void rxe_srq_cleanup(struct rxe_pool_elem *elem); void rxe_dealloc(struct ib_device *ib_dev); diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c index 1c1d1b53312d..86cc2e18a7fd 100644 --- a/drivers/infiniband/sw/rxe/rxe_mcast.c +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c @@ -1,179 +1,479 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* + * Copyright (c) 2022 Hewlett Packard Enterprise, Inc. All rights reserved. * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. */ +/* + * rxe_mcast.c implements driver support for multicast transport. + * It is based on two data structures struct rxe_mcg ('mcg') and + * struct rxe_mca ('mca'). An mcg is allocated each time a qp is + * attached to a new mgid for the first time. These are indexed by + * a red-black tree using the mgid. This data structure is searched + * for the mcg when a multicast packet is received and when another + * qp is attached to the same mgid. It is cleaned up when the last qp + * is detached from the mcg. Each time a qp is attached to an mcg an + * mca is created. It holds a pointer to the qp and is added to a list + * of qp's that are attached to the mcg. The qp_list is used to replicate + * mcast packets in the rxe receive path. + */ + #include "rxe.h" -#include "rxe_loc.h" -/* caller should hold mc_grp_pool->pool_lock */ -static struct rxe_mc_grp *create_grp(struct rxe_dev *rxe, - struct rxe_pool *pool, - union ib_gid *mgid) +/** + * rxe_mcast_add - add multicast address to rxe device + * @rxe: rxe device object + * @mgid: multicast address as a gid + * + * Returns 0 on success else an error + */ +static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) { - int err; - struct rxe_mc_grp *grp; + unsigned char ll_addr[ETH_ALEN]; - grp = rxe_alloc_locked(&rxe->mc_grp_pool); - if (!grp) - return ERR_PTR(-ENOMEM); + ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); - INIT_LIST_HEAD(&grp->qp_list); - spin_lock_init(&grp->mcg_lock); - grp->rxe = rxe; - rxe_add_key_locked(grp, mgid); + return dev_mc_add(rxe->ndev, ll_addr); +} - err = rxe_mcast_add(rxe, mgid); - if (unlikely(err)) { - rxe_drop_key_locked(grp); - rxe_drop_ref(grp); - return ERR_PTR(err); +/** + * rxe_mcast_del - delete multicast address from rxe device + * @rxe: rxe device object + * @mgid: multicast address as a gid + * + * Returns 0 on success else an error + */ +static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid) +{ + unsigned char ll_addr[ETH_ALEN]; + + ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); + + return dev_mc_del(rxe->ndev, ll_addr); +} + +/** + * __rxe_insert_mcg - insert an mcg into red-black tree (rxe->mcg_tree) + * @mcg: mcg object with an embedded red-black tree node + * + * Context: caller must hold a reference to mcg and rxe->mcg_lock and + * is responsible to avoid adding the same mcg twice to the tree. + */ +static void __rxe_insert_mcg(struct rxe_mcg *mcg) +{ + struct rb_root *tree = &mcg->rxe->mcg_tree; + struct rb_node **link = &tree->rb_node; + struct rb_node *node = NULL; + struct rxe_mcg *tmp; + int cmp; + + while (*link) { + node = *link; + tmp = rb_entry(node, struct rxe_mcg, node); + + cmp = memcmp(&tmp->mgid, &mcg->mgid, sizeof(mcg->mgid)); + if (cmp > 0) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; } - return grp; + rb_link_node(&mcg->node, node, link); + rb_insert_color(&mcg->node, tree); } -int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid, - struct rxe_mc_grp **grp_p) +/** + * __rxe_remove_mcg - remove an mcg from red-black tree holding lock + * @mcg: mcast group object with an embedded red-black tree node + * + * Context: caller must hold a reference to mcg and rxe->mcg_lock + */ +static void __rxe_remove_mcg(struct rxe_mcg *mcg) { - int err; - struct rxe_mc_grp *grp; - struct rxe_pool *pool = &rxe->mc_grp_pool; - unsigned long flags; + rb_erase(&mcg->node, &mcg->rxe->mcg_tree); +} - if (rxe->attr.max_mcast_qp_attach == 0) - return -EINVAL; +/** + * __rxe_lookup_mcg - lookup mcg in rxe->mcg_tree while holding lock + * @rxe: rxe device object + * @mgid: multicast IP address + * + * Context: caller must hold rxe->mcg_lock + * Returns: mcg on success and takes a ref to mcg else NULL + */ +static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe, + union ib_gid *mgid) +{ + struct rb_root *tree = &rxe->mcg_tree; + struct rxe_mcg *mcg; + struct rb_node *node; + int cmp; + + node = tree->rb_node; - write_lock_irqsave(&pool->pool_lock, flags); + while (node) { + mcg = rb_entry(node, struct rxe_mcg, node); - grp = rxe_pool_get_key_locked(pool, mgid); - if (grp) - goto done; + cmp = memcmp(&mcg->mgid, mgid, sizeof(*mgid)); - grp = create_grp(rxe, pool, mgid); - if (IS_ERR(grp)) { - write_unlock_irqrestore(&pool->pool_lock, flags); - err = PTR_ERR(grp); - return err; + if (cmp > 0) + node = node->rb_left; + else if (cmp < 0) + node = node->rb_right; + else + break; } -done: - write_unlock_irqrestore(&pool->pool_lock, flags); - *grp_p = grp; - return 0; + if (node) { + kref_get(&mcg->ref_cnt); + return mcg; + } + + return NULL; } -int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, - struct rxe_mc_grp *grp) +/** + * rxe_lookup_mcg - lookup up mcg in red-back tree + * @rxe: rxe device object + * @mgid: multicast IP address + * + * Returns: mcg if found else NULL + */ +struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid) { + struct rxe_mcg *mcg; + + spin_lock_bh(&rxe->mcg_lock); + mcg = __rxe_lookup_mcg(rxe, mgid); + spin_unlock_bh(&rxe->mcg_lock); + + return mcg; +} + +/** + * __rxe_init_mcg - initialize a new mcg + * @rxe: rxe device + * @mgid: multicast address as a gid + * @mcg: new mcg object + * + * Context: caller should hold rxe->mcg lock + */ +static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, + struct rxe_mcg *mcg) +{ + kref_init(&mcg->ref_cnt); + memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); + INIT_LIST_HEAD(&mcg->qp_list); + mcg->rxe = rxe; + + /* caller holds a ref on mcg but that will be + * dropped when mcg goes out of scope. We need to take a ref + * on the pointer that will be saved in the red-black tree + * by __rxe_insert_mcg and used to lookup mcg from mgid later. + * Inserting mcg makes it visible to outside so this should + * be done last after the object is ready. + */ + kref_get(&mcg->ref_cnt); + __rxe_insert_mcg(mcg); +} + +/** + * rxe_get_mcg - lookup or allocate a mcg + * @rxe: rxe device object + * @mgid: multicast IP address as a gid + * + * Returns: mcg on success else ERR_PTR(error) + */ +static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) +{ + struct rxe_mcg *mcg, *tmp; int err; - struct rxe_mc_elem *elem; - /* check to see of the qp is already a member of the group */ - spin_lock_bh(&qp->grp_lock); - spin_lock_bh(&grp->mcg_lock); - list_for_each_entry(elem, &grp->qp_list, qp_list) { - if (elem->qp == qp) { - err = 0; - goto out; - } - } + if (rxe->attr.max_mcast_grp == 0) + return ERR_PTR(-EINVAL); + + /* check to see if mcg already exists */ + mcg = rxe_lookup_mcg(rxe, mgid); + if (mcg) + return mcg; - if (grp->num_qp >= rxe->attr.max_mcast_qp_attach) { + /* check to see if we have reached limit */ + if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) { err = -ENOMEM; - goto out; + goto err_dec; } - elem = rxe_alloc_locked(&rxe->mc_elem_pool); - if (!elem) { + /* speculative alloc of new mcg */ + mcg = kzalloc(sizeof(*mcg), GFP_KERNEL); + if (!mcg) { err = -ENOMEM; - goto out; + goto err_dec; + } + + spin_lock_bh(&rxe->mcg_lock); + /* re-check to see if someone else just added it */ + tmp = __rxe_lookup_mcg(rxe, mgid); + if (tmp) { + spin_unlock_bh(&rxe->mcg_lock); + atomic_dec(&rxe->mcg_num); + kfree(mcg); + return tmp; + } + + __rxe_init_mcg(rxe, mgid, mcg); + spin_unlock_bh(&rxe->mcg_lock); + + /* add mcast address outside of lock */ + err = rxe_mcast_add(rxe, mgid); + if (!err) + return mcg; + + kfree(mcg); +err_dec: + atomic_dec(&rxe->mcg_num); + return ERR_PTR(err); +} + +/** + * rxe_cleanup_mcg - cleanup mcg for kref_put + * @kref: struct kref embnedded in mcg + */ +void rxe_cleanup_mcg(struct kref *kref) +{ + struct rxe_mcg *mcg = container_of(kref, typeof(*mcg), ref_cnt); + + kfree(mcg); +} + +/** + * __rxe_destroy_mcg - destroy mcg object holding rxe->mcg_lock + * @mcg: the mcg object + * + * Context: caller is holding rxe->mcg_lock + * no qp's are attached to mcg + */ +static void __rxe_destroy_mcg(struct rxe_mcg *mcg) +{ + struct rxe_dev *rxe = mcg->rxe; + + /* remove mcg from red-black tree then drop ref */ + __rxe_remove_mcg(mcg); + kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); + + atomic_dec(&rxe->mcg_num); +} + +/** + * rxe_destroy_mcg - destroy mcg object + * @mcg: the mcg object + * + * Context: no qp's are attached to mcg + */ +static void rxe_destroy_mcg(struct rxe_mcg *mcg) +{ + /* delete mcast address outside of lock */ + rxe_mcast_del(mcg->rxe, &mcg->mgid); + + spin_lock_bh(&mcg->rxe->mcg_lock); + __rxe_destroy_mcg(mcg); + spin_unlock_bh(&mcg->rxe->mcg_lock); +} + +/** + * __rxe_init_mca - initialize a new mca holding lock + * @qp: qp object + * @mcg: mcg object + * @mca: empty space for new mca + * + * Context: caller must hold references on qp and mcg, rxe->mcg_lock + * and pass memory for new mca + * + * Returns: 0 on success else an error + */ +static int __rxe_init_mca(struct rxe_qp *qp, struct rxe_mcg *mcg, + struct rxe_mca *mca) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + int n; + + n = atomic_inc_return(&rxe->mcg_attach); + if (n > rxe->attr.max_total_mcast_qp_attach) { + atomic_dec(&rxe->mcg_attach); + return -ENOMEM; + } + + n = atomic_inc_return(&mcg->qp_num); + if (n > rxe->attr.max_mcast_qp_attach) { + atomic_dec(&mcg->qp_num); + atomic_dec(&rxe->mcg_attach); + return -ENOMEM; } - /* each qp holds a ref on the grp */ - rxe_add_ref(grp); + atomic_inc(&qp->mcg_num); + + rxe_get(qp); + mca->qp = qp; + + list_add_tail(&mca->qp_list, &mcg->qp_list); + + return 0; +} - grp->num_qp++; - elem->qp = qp; - elem->grp = grp; +/** + * rxe_attach_mcg - attach qp to mcg if not already attached + * @qp: qp object + * @mcg: mcg object + * + * Context: caller must hold reference on qp and mcg. + * Returns: 0 on success else an error + */ +static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) +{ + struct rxe_dev *rxe = mcg->rxe; + struct rxe_mca *mca, *tmp; + int err; - list_add(&elem->qp_list, &grp->qp_list); - list_add(&elem->grp_list, &qp->grp_list); + /* check to see if the qp is already a member of the group */ + spin_lock_bh(&rxe->mcg_lock); + list_for_each_entry(mca, &mcg->qp_list, qp_list) { + if (mca->qp == qp) { + spin_unlock_bh(&rxe->mcg_lock); + return 0; + } + } + spin_unlock_bh(&rxe->mcg_lock); - err = 0; + /* speculative alloc new mca without using GFP_ATOMIC */ + mca = kzalloc(sizeof(*mca), GFP_KERNEL); + if (!mca) + return -ENOMEM; + + spin_lock_bh(&rxe->mcg_lock); + /* re-check to see if someone else just attached qp */ + list_for_each_entry(tmp, &mcg->qp_list, qp_list) { + if (tmp->qp == qp) { + kfree(mca); + err = 0; + goto out; + } + } + + err = __rxe_init_mca(qp, mcg, mca); + if (err) + kfree(mca); out: - spin_unlock_bh(&grp->mcg_lock); - spin_unlock_bh(&qp->grp_lock); + spin_unlock_bh(&rxe->mcg_lock); return err; } -int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, - union ib_gid *mgid) +/** + * __rxe_cleanup_mca - cleanup mca object holding lock + * @mca: mca object + * @mcg: mcg object + * + * Context: caller must hold a reference to mcg and rxe->mcg_lock + */ +static void __rxe_cleanup_mca(struct rxe_mca *mca, struct rxe_mcg *mcg) { - struct rxe_mc_grp *grp; - struct rxe_mc_elem *elem, *tmp; + list_del(&mca->qp_list); - grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid); - if (!grp) - goto err1; + atomic_dec(&mcg->qp_num); + atomic_dec(&mcg->rxe->mcg_attach); + atomic_dec(&mca->qp->mcg_num); + rxe_put(mca->qp); - spin_lock_bh(&qp->grp_lock); - spin_lock_bh(&grp->mcg_lock); + kfree(mca); +} + +/** + * rxe_detach_mcg - detach qp from mcg + * @mcg: mcg object + * @qp: qp object + * + * Returns: 0 on success else an error if qp is not attached. + */ +static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) +{ + struct rxe_dev *rxe = mcg->rxe; + struct rxe_mca *mca, *tmp; - list_for_each_entry_safe(elem, tmp, &grp->qp_list, qp_list) { - if (elem->qp == qp) { - list_del(&elem->qp_list); - list_del(&elem->grp_list); - grp->num_qp--; + spin_lock_bh(&rxe->mcg_lock); + list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) { + if (mca->qp == qp) { + __rxe_cleanup_mca(mca, mcg); - spin_unlock_bh(&grp->mcg_lock); - spin_unlock_bh(&qp->grp_lock); - rxe_drop_ref(elem); - rxe_drop_ref(grp); /* ref held by QP */ - rxe_drop_ref(grp); /* ref from get_key */ + /* if the number of qp's attached to the + * mcast group falls to zero go ahead and + * tear it down. This will not free the + * object since we are still holding a ref + * from the caller + */ + if (atomic_read(&mcg->qp_num) <= 0) + __rxe_destroy_mcg(mcg); + + spin_unlock_bh(&rxe->mcg_lock); return 0; } } - spin_unlock_bh(&grp->mcg_lock); - spin_unlock_bh(&qp->grp_lock); - rxe_drop_ref(grp); /* ref from get_key */ -err1: + /* we didn't find the qp on the list */ + spin_unlock_bh(&rxe->mcg_lock); return -EINVAL; } -void rxe_drop_all_mcast_groups(struct rxe_qp *qp) +/** + * rxe_attach_mcast - attach qp to multicast group (see IBA-11.3.1) + * @ibqp: (IB) qp object + * @mgid: multicast IP address + * @mlid: multicast LID, ignored for RoCEv2 (see IBA-A17.5.6) + * + * Returns: 0 on success else an errno + */ +int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) { - struct rxe_mc_grp *grp; - struct rxe_mc_elem *elem; + int err; + struct rxe_dev *rxe = to_rdev(ibqp->device); + struct rxe_qp *qp = to_rqp(ibqp); + struct rxe_mcg *mcg; - while (1) { - spin_lock_bh(&qp->grp_lock); - if (list_empty(&qp->grp_list)) { - spin_unlock_bh(&qp->grp_lock); - break; - } - elem = list_first_entry(&qp->grp_list, struct rxe_mc_elem, - grp_list); - list_del(&elem->grp_list); - spin_unlock_bh(&qp->grp_lock); - - grp = elem->grp; - spin_lock_bh(&grp->mcg_lock); - list_del(&elem->qp_list); - grp->num_qp--; - spin_unlock_bh(&grp->mcg_lock); - rxe_drop_ref(grp); - rxe_drop_ref(elem); - } + /* takes a ref on mcg if successful */ + mcg = rxe_get_mcg(rxe, mgid); + if (IS_ERR(mcg)) + return PTR_ERR(mcg); + + err = rxe_attach_mcg(mcg, qp); + + /* if we failed to attach the first qp to mcg tear it down */ + if (atomic_read(&mcg->qp_num) == 0) + rxe_destroy_mcg(mcg); + + kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); + + return err; } -void rxe_mc_cleanup(struct rxe_pool_entry *arg) +/** + * rxe_detach_mcast - detach qp from multicast group (see IBA-11.3.2) + * @ibqp: address of (IB) qp object + * @mgid: multicast IP address + * @mlid: multicast LID, ignored for RoCEv2 (see IBA-A17.5.6) + * + * Returns: 0 on success else an errno + */ +int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) { - struct rxe_mc_grp *grp = container_of(arg, typeof(*grp), pelem); - struct rxe_dev *rxe = grp->rxe; + struct rxe_dev *rxe = to_rdev(ibqp->device); + struct rxe_qp *qp = to_rqp(ibqp); + struct rxe_mcg *mcg; + int err; - rxe_drop_key(grp); - rxe_mcast_delete(rxe, &grp->mgid); + mcg = rxe_lookup_mcg(rxe, mgid); + if (!mcg) + return -EINVAL; + + err = rxe_detach_mcg(mcg, qp); + kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); + + return err; } diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c index 035f226af133..9149b6095429 100644 --- a/drivers/infiniband/sw/rxe/rxe_mmap.c +++ b/drivers/infiniband/sw/rxe/rxe_mmap.c @@ -4,7 +4,6 @@ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. */ -#include <linux/module.h> #include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/errno.h> diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 53271df10e47..502e9ada99b3 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -24,7 +24,7 @@ u8 rxe_get_next_key(u32 last_key) int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) { - struct rxe_map_set *set = mr->cur_map_set; + switch (mr->type) { case IB_MR_TYPE_DMA: @@ -32,8 +32,8 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) case IB_MR_TYPE_USER: case IB_MR_TYPE_MEM_REG: - if (iova < set->iova || length > set->length || - iova > set->iova + set->length - length) + if (iova < mr->ibmr.iova || length > mr->ibmr.length || + iova > mr->ibmr.iova + mr->ibmr.length - length) return -EFAULT; return 0; @@ -50,7 +50,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) static void rxe_mr_init(int access, struct rxe_mr *mr) { - u32 lkey = mr->pelem.index << 8 | rxe_get_next_key(-1); + u32 lkey = mr->elem.index << 8 | rxe_get_next_key(-1); u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; /* set ibmr->l/rkey and also copy into private l/rkey @@ -65,106 +65,56 @@ static void rxe_mr_init(int access, struct rxe_mr *mr) mr->map_shift = ilog2(RXE_BUF_PER_MAP); } -static void rxe_mr_free_map_set(int num_map, struct rxe_map_set *set) -{ - int i; - - for (i = 0; i < num_map; i++) - kfree(set->map[i]); - - kfree(set->map); - kfree(set); -} - -static int rxe_mr_alloc_map_set(int num_map, struct rxe_map_set **setp) +static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf) { int i; - struct rxe_map_set *set; + int num_map; + struct rxe_map **map = mr->map; - set = kmalloc(sizeof(*set), GFP_KERNEL); - if (!set) - goto err_out; + num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; - set->map = kmalloc_array(num_map, sizeof(struct rxe_map *), GFP_KERNEL); - if (!set->map) - goto err_free_set; + mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); + if (!mr->map) + goto err1; for (i = 0; i < num_map; i++) { - set->map[i] = kmalloc(sizeof(struct rxe_map), GFP_KERNEL); - if (!set->map[i]) - goto err_free_map; + mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); + if (!mr->map[i]) + goto err2; } - *setp = set; - - return 0; - -err_free_map: - for (i--; i >= 0; i--) - kfree(set->map[i]); - - kfree(set->map); -err_free_set: - kfree(set); -err_out: - return -ENOMEM; -} - -/** - * rxe_mr_alloc() - Allocate memory map array(s) for MR - * @mr: Memory region - * @num_buf: Number of buffer descriptors to support - * @both: If non zero allocate both mr->map and mr->next_map - * else just allocate mr->map. Used for fast MRs - * - * Return: 0 on success else an error - */ -static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf, int both) -{ - int ret; - int num_map; - BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP)); - num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; mr->map_shift = ilog2(RXE_BUF_PER_MAP); mr->map_mask = RXE_BUF_PER_MAP - 1; + mr->num_buf = num_buf; - mr->max_buf = num_map * RXE_BUF_PER_MAP; mr->num_map = num_map; - - ret = rxe_mr_alloc_map_set(num_map, &mr->cur_map_set); - if (ret) - goto err_out; - - if (both) { - ret = rxe_mr_alloc_map_set(num_map, &mr->next_map_set); - if (ret) { - rxe_mr_free_map_set(mr->num_map, mr->cur_map_set); - goto err_out; - } - } + mr->max_buf = num_map * RXE_BUF_PER_MAP; return 0; -err_out: +err2: + for (i--; i >= 0; i--) + kfree(mr->map[i]); + + kfree(mr->map); +err1: return -ENOMEM; } -void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr) +void rxe_mr_init_dma(int access, struct rxe_mr *mr) { rxe_mr_init(access, mr); - mr->ibmr.pd = &pd->ibpd; mr->access = access; mr->state = RXE_MR_STATE_VALID; mr->type = IB_MR_TYPE_DMA; } -int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, +int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, int access, struct rxe_mr *mr) { - struct rxe_map_set *set; struct rxe_map **map; struct rxe_phys_buf *buf = NULL; struct ib_umem *umem; @@ -172,8 +122,9 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, int num_buf; void *vaddr; int err; + int i; - umem = ib_umem_get(pd->ibpd.device, start, length, access); + umem = ib_umem_get(&rxe->ib_dev, start, length, access); if (IS_ERR(umem)) { pr_warn("%s: Unable to pin memory region err = %d\n", __func__, (int)PTR_ERR(umem)); @@ -185,20 +136,18 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, rxe_mr_init(access, mr); - err = rxe_mr_alloc(mr, num_buf, 0); + err = rxe_mr_alloc(mr, num_buf); if (err) { pr_warn("%s: Unable to allocate memory for map\n", __func__); goto err_release_umem; } - set = mr->cur_map_set; - set->page_shift = PAGE_SHIFT; - set->page_mask = PAGE_SIZE - 1; - - num_buf = 0; - map = set->map; + mr->page_shift = PAGE_SHIFT; + mr->page_mask = PAGE_SIZE - 1; + num_buf = 0; + map = mr->map; if (length > 0) { buf = map[0]->buf; @@ -221,42 +170,39 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, buf->size = PAGE_SIZE; num_buf++; buf++; + } } - mr->ibmr.pd = &pd->ibpd; mr->umem = umem; mr->access = access; + mr->offset = ib_umem_offset(umem); mr->state = RXE_MR_STATE_VALID; mr->type = IB_MR_TYPE_USER; - set->length = length; - set->iova = iova; - set->va = start; - set->offset = ib_umem_offset(umem); - return 0; err_cleanup_map: - rxe_mr_free_map_set(mr->num_map, mr->cur_map_set); + for (i = 0; i < mr->num_map; i++) + kfree(mr->map[i]); + kfree(mr->map); err_release_umem: ib_umem_release(umem); err_out: return err; } -int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr) +int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr) { int err; /* always allow remote access for FMRs */ rxe_mr_init(IB_ACCESS_REMOTE, mr); - err = rxe_mr_alloc(mr, max_pages, 1); + err = rxe_mr_alloc(mr, max_pages); if (err) goto err1; - mr->ibmr.pd = &pd->ibpd; mr->max_buf = max_pages; mr->state = RXE_MR_STATE_FREE; mr->type = IB_MR_TYPE_MEM_REG; @@ -270,24 +216,21 @@ err1: static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, size_t *offset_out) { - struct rxe_map_set *set = mr->cur_map_set; - size_t offset = iova - set->iova + set->offset; + size_t offset = iova - mr->ibmr.iova + mr->offset; int map_index; int buf_index; u64 length; - struct rxe_map *map; - if (likely(set->page_shift)) { - *offset_out = offset & set->page_mask; - offset >>= set->page_shift; + if (likely(mr->page_shift)) { + *offset_out = offset & mr->page_mask; + offset >>= mr->page_shift; *n_out = offset & mr->map_mask; *m_out = offset >> mr->map_shift; } else { map_index = 0; buf_index = 0; - map = set->map[map_index]; - length = map->buf[buf_index].size; + length = mr->map[map_index]->buf[buf_index].size; while (offset >= length) { offset -= length; @@ -297,8 +240,7 @@ static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, map_index++; buf_index = 0; } - map = set->map[map_index]; - length = map->buf[buf_index].size; + length = mr->map[map_index]->buf[buf_index].size; } *m_out = map_index; @@ -319,7 +261,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) goto out; } - if (!mr->cur_map_set) { + if (!mr->map) { addr = (void *)(uintptr_t)iova; goto out; } @@ -332,13 +274,13 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) lookup_iova(mr, iova, &m, &n, &offset); - if (offset + length > mr->cur_map_set->map[m]->buf[n].size) { + if (offset + length > mr->map[m]->buf[n].size) { pr_warn("crosses page boundary\n"); addr = NULL; goto out; } - addr = (void *)(uintptr_t)mr->cur_map_set->map[m]->buf[n].addr + offset; + addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset; out: return addr; @@ -374,7 +316,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, return 0; } - WARN_ON_ONCE(!mr->cur_map_set); + WARN_ON_ONCE(!mr->map); err = mr_check_range(mr, iova, length); if (err) { @@ -384,7 +326,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, lookup_iova(mr, iova, &m, &i, &offset); - map = mr->cur_map_set->map + m; + map = mr->map + m; buf = map[0]->buf + i; while (length > 0) { @@ -461,7 +403,7 @@ int copy_data( if (offset >= sge->length) { if (mr) { - rxe_drop_ref(mr); + rxe_put(mr); mr = NULL; } sge++; @@ -506,13 +448,13 @@ int copy_data( dma->resid = resid; if (mr) - rxe_drop_ref(mr); + rxe_put(mr); return 0; err2: if (mr) - rxe_drop_ref(mr); + rxe_put(mr); err1: return err; } @@ -571,29 +513,29 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, (type == RXE_LOOKUP_REMOTE && mr->rkey != key) || mr_pd(mr) != pd || (access && !(access & mr->access)) || mr->state != RXE_MR_STATE_VALID)) { - rxe_drop_ref(mr); + rxe_put(mr); mr = NULL; } return mr; } -int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey) +int rxe_invalidate_mr(struct rxe_qp *qp, u32 key) { struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_mr *mr; int ret; - mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8); + mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8); if (!mr) { - pr_err("%s: No MR for rkey %#x\n", __func__, rkey); + pr_err("%s: No MR for key %#x\n", __func__, key); ret = -EINVAL; goto err; } - if (rkey != mr->rkey) { - pr_err("%s: rkey (%#x) doesn't match mr->rkey (%#x)\n", - __func__, rkey, mr->rkey); + if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) { + pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n", + __func__, key, (mr->rkey ? mr->rkey : mr->lkey)); ret = -EINVAL; goto err_drop_ref; } @@ -615,7 +557,7 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey) ret = 0; err_drop_ref: - rxe_drop_ref(mr); + rxe_put(mr); err: return ret; } @@ -630,9 +572,8 @@ err: int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe) { struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr); - u32 key = wqe->wr.wr.reg.key & 0xff; + u32 key = wqe->wr.wr.reg.key; u32 access = wqe->wr.wr.reg.access; - struct rxe_map_set *set; /* user can only register MR in free state */ if (unlikely(mr->state != RXE_MR_STATE_FREE)) { @@ -648,36 +589,19 @@ int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe) return -EINVAL; } + /* user is only allowed to change key portion of l/rkey */ + if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) { + pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n", + __func__, key, mr->lkey); + return -EINVAL; + } + mr->access = access; - mr->lkey = (mr->lkey & ~0xff) | key; - mr->rkey = (access & IB_ACCESS_REMOTE) ? mr->lkey : 0; + mr->lkey = key; + mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0; + mr->ibmr.iova = wqe->wr.wr.reg.mr->iova; mr->state = RXE_MR_STATE_VALID; - set = mr->cur_map_set; - mr->cur_map_set = mr->next_map_set; - mr->cur_map_set->iova = wqe->wr.wr.reg.mr->iova; - mr->next_map_set = set; - - return 0; -} - -int rxe_mr_set_page(struct ib_mr *ibmr, u64 addr) -{ - struct rxe_mr *mr = to_rmr(ibmr); - struct rxe_map_set *set = mr->next_map_set; - struct rxe_map *map; - struct rxe_phys_buf *buf; - - if (unlikely(set->nbuf == mr->num_buf)) - return -ENOMEM; - - map = set->map[set->nbuf / RXE_BUF_PER_MAP]; - buf = &map->buf[set->nbuf % RXE_BUF_PER_MAP]; - - buf->addr = addr; - buf->size = ibmr->page_size; - set->nbuf++; - return 0; } @@ -685,29 +609,27 @@ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct rxe_mr *mr = to_rmr(ibmr); - if (atomic_read(&mr->num_mw) > 0) { - pr_warn("%s: Attempt to deregister an MR while bound to MWs\n", - __func__); + /* See IBA 10.6.7.2.6 */ + if (atomic_read(&mr->num_mw) > 0) return -EINVAL; - } - mr->state = RXE_MR_STATE_INVALID; - rxe_drop_ref(mr_pd(mr)); - rxe_drop_index(mr); - rxe_drop_ref(mr); + rxe_cleanup(mr); return 0; } -void rxe_mr_cleanup(struct rxe_pool_entry *arg) +void rxe_mr_cleanup(struct rxe_pool_elem *elem) { - struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem); + struct rxe_mr *mr = container_of(elem, typeof(*mr), elem); + int i; + rxe_put(mr_pd(mr)); ib_umem_release(mr->umem); - if (mr->cur_map_set) - rxe_mr_free_map_set(mr->num_map, mr->cur_map_set); + if (mr->map) { + for (i = 0; i < mr->num_map; i++) + kfree(mr->map[i]); - if (mr->next_map_set) - rxe_mr_free_map_set(mr->num_map, mr->next_map_set); + kfree(mr->map); + } } diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c index 9534a7fe1a98..902b7df7aaed 100644 --- a/drivers/infiniband/sw/rxe/rxe_mw.c +++ b/drivers/infiniband/sw/rxe/rxe_mw.c @@ -3,6 +3,14 @@ * Copyright (c) 2020 Hewlett Packard Enterprise, Inc. All rights reserved. */ +/* + * The rdma_rxe driver supports type 1 or type 2B memory windows. + * Type 1 MWs are created by ibv_alloc_mw() verbs calls and bound by + * ibv_bind_mw() calls. Type 2 MWs are also created by ibv_alloc_mw() + * but bound by bind_mw work requests. The ibv_bind_mw() call is converted + * by libibverbs to a bind_mw work request. + */ + #include "rxe.h" int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) @@ -12,58 +20,29 @@ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) struct rxe_dev *rxe = to_rdev(ibmw->device); int ret; - rxe_add_ref(pd); + rxe_get(pd); ret = rxe_add_to_pool(&rxe->mw_pool, mw); if (ret) { - rxe_drop_ref(pd); + rxe_put(pd); return ret; } - rxe_add_index(mw); - mw->rkey = ibmw->rkey = (mw->pelem.index << 8) | rxe_get_next_key(-1); + mw->rkey = ibmw->rkey = (mw->elem.index << 8) | rxe_get_next_key(-1); mw->state = (mw->ibmw.type == IB_MW_TYPE_2) ? RXE_MW_STATE_FREE : RXE_MW_STATE_VALID; spin_lock_init(&mw->lock); - return 0; -} - -static void rxe_do_dealloc_mw(struct rxe_mw *mw) -{ - if (mw->mr) { - struct rxe_mr *mr = mw->mr; - - mw->mr = NULL; - atomic_dec(&mr->num_mw); - rxe_drop_ref(mr); - } + rxe_finalize(mw); - if (mw->qp) { - struct rxe_qp *qp = mw->qp; - - mw->qp = NULL; - rxe_drop_ref(qp); - } - - mw->access = 0; - mw->addr = 0; - mw->length = 0; - mw->state = RXE_MW_STATE_INVALID; + return 0; } int rxe_dealloc_mw(struct ib_mw *ibmw) { struct rxe_mw *mw = to_rmw(ibmw); - struct rxe_pd *pd = to_rpd(ibmw->pd); - unsigned long flags; - spin_lock_irqsave(&mw->lock, flags); - rxe_do_dealloc_mw(mw); - spin_unlock_irqrestore(&mw->lock, flags); - - rxe_drop_ref(mw); - rxe_drop_ref(pd); + rxe_cleanup(mw); return 0; } @@ -71,8 +50,6 @@ int rxe_dealloc_mw(struct ib_mw *ibmw) static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, struct rxe_mw *mw, struct rxe_mr *mr) { - u32 key = wqe->wr.wr.mw.rkey & 0xff; - if (mw->ibmw.type == IB_MW_TYPE_1) { if (unlikely(mw->state != RXE_MW_STATE_VALID)) { pr_err_once( @@ -110,11 +87,6 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, } } - if (unlikely(key == (mw->rkey & 0xff))) { - pr_err_once("attempt to bind MW with same key\n"); - return -EINVAL; - } - /* remaining checks only apply to a nonzero MR */ if (!mr) return 0; @@ -136,21 +108,21 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, (IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC)) && !(mr->access & IB_ACCESS_LOCAL_WRITE))) { pr_err_once( - "attempt to bind an writeable MW to an MR without local write access\n"); + "attempt to bind an Writable MW to an MR without local write access\n"); return -EINVAL; } /* C10-75 */ if (mw->access & IB_ZERO_BASED) { - if (unlikely(wqe->wr.wr.mw.length > mr->cur_map_set->length)) { + if (unlikely(wqe->wr.wr.mw.length > mr->ibmr.length)) { pr_err_once( "attempt to bind a ZB MW outside of the MR\n"); return -EINVAL; } } else { - if (unlikely((wqe->wr.wr.mw.addr < mr->cur_map_set->iova) || + if (unlikely((wqe->wr.wr.mw.addr < mr->ibmr.iova) || ((wqe->wr.wr.mw.addr + wqe->wr.wr.mw.length) > - (mr->cur_map_set->iova + mr->cur_map_set->length)))) { + (mr->ibmr.iova + mr->ibmr.length)))) { pr_err_once( "attempt to bind a VA MW outside of the MR\n"); return -EINVAL; @@ -172,7 +144,7 @@ static void rxe_do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, mw->length = wqe->wr.wr.mw.length; if (mw->mr) { - rxe_drop_ref(mw->mr); + rxe_put(mw->mr); atomic_dec(&mw->mr->num_mw); mw->mr = NULL; } @@ -180,11 +152,11 @@ static void rxe_do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, if (mw->length) { mw->mr = mr; atomic_inc(&mr->num_mw); - rxe_add_ref(mr); + rxe_get(mr); } if (mw->ibmw.type == IB_MW_TYPE_2) { - rxe_add_ref(qp); + rxe_get(qp); mw->qp = qp; } } @@ -197,7 +169,6 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe) struct rxe_dev *rxe = to_rdev(qp->ibqp.device); u32 mw_rkey = wqe->wr.wr.mw.mw_rkey; u32 mr_lkey = wqe->wr.wr.mw.mr_lkey; - unsigned long flags; mw = rxe_pool_get_index(&rxe->mw_pool, mw_rkey >> 8); if (unlikely(!mw)) { @@ -225,7 +196,7 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe) mr = NULL; } - spin_lock_irqsave(&mw->lock, flags); + spin_lock_bh(&mw->lock); ret = rxe_check_bind_mw(qp, wqe, mw, mr); if (ret) @@ -233,12 +204,12 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe) rxe_do_bind_mw(qp, wqe, mw, mr); err_unlock: - spin_unlock_irqrestore(&mw->lock, flags); + spin_unlock_bh(&mw->lock); err_drop_mr: if (mr) - rxe_drop_ref(mr); + rxe_put(mr); err_drop_mw: - rxe_drop_ref(mw); + rxe_put(mw); err: return ret; } @@ -263,13 +234,13 @@ static void rxe_do_invalidate_mw(struct rxe_mw *mw) /* valid type 2 MW will always have a QP pointer */ qp = mw->qp; mw->qp = NULL; - rxe_drop_ref(qp); + rxe_put(qp); /* valid type 2 MW will always have an MR pointer */ mr = mw->mr; mw->mr = NULL; atomic_dec(&mr->num_mw); - rxe_drop_ref(mr); + rxe_put(mr); mw->access = 0; mw->addr = 0; @@ -280,7 +251,6 @@ static void rxe_do_invalidate_mw(struct rxe_mw *mw) int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey) { struct rxe_dev *rxe = to_rdev(qp->ibqp.device); - unsigned long flags; struct rxe_mw *mw; int ret; @@ -295,7 +265,7 @@ int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey) goto err_drop_ref; } - spin_lock_irqsave(&mw->lock, flags); + spin_lock_bh(&mw->lock); ret = rxe_check_invalidate_mw(qp, mw); if (ret) @@ -303,9 +273,9 @@ int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey) rxe_do_invalidate_mw(mw); err_unlock: - spin_unlock_irqrestore(&mw->lock, flags); + spin_unlock_bh(&mw->lock); err_drop_ref: - rxe_drop_ref(mw); + rxe_put(mw); err: return ret; } @@ -326,16 +296,37 @@ struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey) (mw->length == 0) || (access && !(access & mw->access)) || mw->state != RXE_MW_STATE_VALID)) { - rxe_drop_ref(mw); + rxe_put(mw); return NULL; } return mw; } -void rxe_mw_cleanup(struct rxe_pool_entry *elem) +void rxe_mw_cleanup(struct rxe_pool_elem *elem) { - struct rxe_mw *mw = container_of(elem, typeof(*mw), pelem); + struct rxe_mw *mw = container_of(elem, typeof(*mw), elem); + struct rxe_pd *pd = to_rpd(mw->ibmw.pd); + + rxe_put(pd); + + if (mw->mr) { + struct rxe_mr *mr = mw->mr; + + mw->mr = NULL; + atomic_dec(&mr->num_mw); + rxe_put(mr); + } + + if (mw->qp) { + struct rxe_qp *qp = mw->qp; + + mw->qp = NULL; + rxe_put(qp); + } - rxe_drop_index(mw); + mw->access = 0; + mw->addr = 0; + mw->length = 0; + mw->state = RXE_MW_STATE_INVALID; } diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 2cb810cb890a..35f327b9d4b8 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -20,28 +20,6 @@ static struct rxe_recv_sockets recv_sockets; -int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) -{ - int err; - unsigned char ll_addr[ETH_ALEN]; - - ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); - err = dev_mc_add(rxe->ndev, ll_addr); - - return err; -} - -int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid) -{ - int err; - unsigned char ll_addr[ETH_ALEN]; - - ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); - err = dev_mc_del(rxe->ndev, ll_addr); - - return err; -} - static struct dst_entry *rxe_find_route4(struct net_device *ndev, struct in_addr *saddr, struct in_addr *daddr) @@ -167,7 +145,6 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) goto drop; if (skb_linearize(skb)) { - pr_err("skb_linearize failed\n"); ib_device_put(&rxe->ib_dev); goto drop; } @@ -293,13 +270,13 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb, ip6h->payload_len = htons(skb->len - sizeof(*ip6h)); } -static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb) +static int prepare4(struct rxe_av *av, struct rxe_pkt_info *pkt, + struct sk_buff *skb) { struct rxe_qp *qp = pkt->qp; struct dst_entry *dst; bool xnet = false; __be16 df = htons(IP_DF); - struct rxe_av *av = rxe_get_av(pkt); struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr; struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr; @@ -319,11 +296,11 @@ static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb) return 0; } -static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb) +static int prepare6(struct rxe_av *av, struct rxe_pkt_info *pkt, + struct sk_buff *skb) { struct rxe_qp *qp = pkt->qp; struct dst_entry *dst; - struct rxe_av *av = rxe_get_av(pkt); struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr; struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr; @@ -344,16 +321,17 @@ static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb) return 0; } -int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb) +int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt, + struct sk_buff *skb) { int err = 0; if (skb->protocol == htons(ETH_P_IP)) - err = prepare4(pkt, skb); + err = prepare4(av, pkt, skb); else if (skb->protocol == htons(ETH_P_IPV6)) - err = prepare6(pkt, skb); + err = prepare6(av, pkt, skb); - if (ether_addr_equal(skb->dev->dev_addr, rxe_get_av(pkt)->dmac)) + if (ether_addr_equal(skb->dev->dev_addr, av->dmac)) pkt->mask |= RXE_LOOPBACK_MASK; return err; @@ -369,7 +347,7 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb) skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW)) rxe_run_task(&qp->req.task, 1); - rxe_drop_ref(qp); + rxe_put(qp); } static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) @@ -379,7 +357,7 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) skb->destructor = rxe_skb_tx_dtor; skb->sk = pkt->qp->sk->sk; - rxe_add_ref(pkt->qp); + rxe_get(pkt->qp); atomic_inc(&pkt->qp->skb_out); if (skb->protocol == htons(ETH_P_IP)) { @@ -389,7 +367,7 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) } else { pr_err("Unknown layer 3 protocol: %d\n", skb->protocol); atomic_dec(&pkt->qp->skb_out); - rxe_drop_ref(pkt->qp); + rxe_put(pkt->qp); kfree_skb(skb); return -EINVAL; } @@ -444,7 +422,6 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, else err = rxe_send(skb, pkt); if (err) { - rxe->xmit_errors++; rxe_counter_inc(rxe, RXE_CNT_SEND_ERR); return err; } diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c index 3ef5a10a6efd..d4ba4d506f17 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.c +++ b/drivers/infiniband/sw/rxe/rxe_opcode.c @@ -29,7 +29,6 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = { [IB_WR_SEND] = { .name = "IB_WR_SEND", .mask = { - [IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK, [IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK, [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK, [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK, @@ -39,7 +38,6 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = { [IB_WR_SEND_WITH_IMM] = { .name = "IB_WR_SEND_WITH_IMM", .mask = { - [IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK, [IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK, [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK, [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK, @@ -108,8 +106,8 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = { struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { [IB_OPCODE_RC_SEND_FIRST] = { .name = "IB_OPCODE_RC_SEND_FIRST", - .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_RWR_MASK - | RXE_SEND_MASK | RXE_START_MASK, + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_RWR_MASK | + RXE_SEND_MASK | RXE_START_MASK, .length = RXE_BTH_BYTES, .offset = { [RXE_BTH] = 0, @@ -117,9 +115,9 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { } }, [IB_OPCODE_RC_SEND_MIDDLE] = { - .name = "IB_OPCODE_RC_SEND_MIDDLE]", - .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK - | RXE_MIDDLE_MASK, + .name = "IB_OPCODE_RC_SEND_MIDDLE", + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK | + RXE_MIDDLE_MASK, .length = RXE_BTH_BYTES, .offset = { [RXE_BTH] = 0, @@ -128,8 +126,8 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { }, [IB_OPCODE_RC_SEND_LAST] = { .name = "IB_OPCODE_RC_SEND_LAST", - .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK - | RXE_SEND_MASK | RXE_END_MASK, + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK | + RXE_SEND_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES, .offset = { [RXE_BTH] = 0, @@ -138,21 +136,21 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { }, [IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE] = { .name = "IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE", - .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK, + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, .offset = { [RXE_BTH] = 0, [RXE_IMMDT] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_IMMDT_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IMMDT_BYTES, } }, [IB_OPCODE_RC_SEND_ONLY] = { .name = "IB_OPCODE_RC_SEND_ONLY", - .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK - | RXE_RWR_MASK | RXE_SEND_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK | + RXE_RWR_MASK | RXE_SEND_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES, .offset = { [RXE_BTH] = 0, @@ -161,33 +159,33 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { }, [IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE] = { .name = "IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE", - .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, .offset = { [RXE_BTH] = 0, [RXE_IMMDT] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_IMMDT_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IMMDT_BYTES, } }, [IB_OPCODE_RC_RDMA_WRITE_FIRST] = { .name = "IB_OPCODE_RC_RDMA_WRITE_FIRST", - .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_WRITE_MASK | RXE_START_MASK, + .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_WRITE_MASK | RXE_START_MASK, .length = RXE_BTH_BYTES + RXE_RETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES, } }, [IB_OPCODE_RC_RDMA_WRITE_MIDDLE] = { .name = "IB_OPCODE_RC_RDMA_WRITE_MIDDLE", - .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK - | RXE_MIDDLE_MASK, + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK | + RXE_MIDDLE_MASK, .length = RXE_BTH_BYTES, .offset = { [RXE_BTH] = 0, @@ -196,8 +194,8 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { }, [IB_OPCODE_RC_RDMA_WRITE_LAST] = { .name = "IB_OPCODE_RC_RDMA_WRITE_LAST", - .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK - | RXE_END_MASK, + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK | + RXE_END_MASK, .length = RXE_BTH_BYTES, .offset = { [RXE_BTH] = 0, @@ -206,69 +204,69 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { }, [IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = { .name = "IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE", - .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK - | RXE_END_MASK, + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK | + RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, .offset = { [RXE_BTH] = 0, [RXE_IMMDT] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_IMMDT_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IMMDT_BYTES, } }, [IB_OPCODE_RC_RDMA_WRITE_ONLY] = { .name = "IB_OPCODE_RC_RDMA_WRITE_ONLY", - .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_WRITE_MASK | RXE_START_MASK - | RXE_END_MASK, + .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_WRITE_MASK | RXE_START_MASK | + RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_RETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES, } }, [IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = { .name = "IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE", - .mask = RXE_RETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK - | RXE_REQ_MASK | RXE_WRITE_MASK - | RXE_COMP_MASK | RXE_RWR_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_RETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | + RXE_REQ_MASK | RXE_WRITE_MASK | + RXE_COMP_MASK | RXE_RWR_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RETH] = RXE_BTH_BYTES, - [RXE_IMMDT] = RXE_BTH_BYTES - + RXE_RETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RETH_BYTES - + RXE_IMMDT_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_RETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES + + RXE_IMMDT_BYTES, } }, [IB_OPCODE_RC_RDMA_READ_REQUEST] = { .name = "IB_OPCODE_RC_RDMA_READ_REQUEST", - .mask = RXE_RETH_MASK | RXE_REQ_MASK | RXE_READ_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_RETH_MASK | RXE_REQ_MASK | RXE_READ_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_RETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES, } }, [IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST] = { .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST", - .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK - | RXE_START_MASK, + .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK | + RXE_START_MASK, .length = RXE_BTH_BYTES + RXE_AETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_AETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_AETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_AETH_BYTES, } }, [IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE] = { @@ -282,109 +280,110 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { }, [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = { .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST", - .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK - | RXE_END_MASK, + .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK | + RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_AETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_AETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_AETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_AETH_BYTES, } }, [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = { .name = "IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY", - .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_AETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_AETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_AETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_AETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_AETH_BYTES, } }, [IB_OPCODE_RC_ACKNOWLEDGE] = { .name = "IB_OPCODE_RC_ACKNOWLEDGE", - .mask = RXE_AETH_MASK | RXE_ACK_MASK | RXE_START_MASK - | RXE_END_MASK, + .mask = RXE_AETH_MASK | RXE_ACK_MASK | RXE_START_MASK | + RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_AETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_AETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_AETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_AETH_BYTES, } }, [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = { .name = "IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE", - .mask = RXE_AETH_MASK | RXE_ATMACK_MASK | RXE_ACK_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_AETH_MASK | RXE_ATMACK_MASK | RXE_ACK_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_ATMACK_BYTES + RXE_AETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_AETH] = RXE_BTH_BYTES, - [RXE_ATMACK] = RXE_BTH_BYTES - + RXE_AETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_ATMACK_BYTES + RXE_AETH_BYTES, + [RXE_ATMACK] = RXE_BTH_BYTES + + RXE_AETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_ATMACK_BYTES + + RXE_AETH_BYTES, } }, [IB_OPCODE_RC_COMPARE_SWAP] = { .name = "IB_OPCODE_RC_COMPARE_SWAP", - .mask = RXE_ATMETH_MASK | RXE_REQ_MASK | RXE_ATOMIC_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_ATMETH_MASK | RXE_REQ_MASK | RXE_ATOMIC_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_ATMETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_ATMETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_ATMETH_BYTES, } }, [IB_OPCODE_RC_FETCH_ADD] = { .name = "IB_OPCODE_RC_FETCH_ADD", - .mask = RXE_ATMETH_MASK | RXE_REQ_MASK | RXE_ATOMIC_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_ATMETH_MASK | RXE_REQ_MASK | RXE_ATOMIC_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_ATMETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_ATMETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_ATMETH_BYTES, } }, [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = { .name = "IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE", - .mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK, + .mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_IETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_IETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_IETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IETH_BYTES, } }, [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = { .name = "IB_OPCODE_RC_SEND_ONLY_INV", - .mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK - | RXE_END_MASK | RXE_START_MASK, + .mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK | + RXE_END_MASK | RXE_START_MASK, .length = RXE_BTH_BYTES + RXE_IETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_IETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_IETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IETH_BYTES, } }, /* UC */ [IB_OPCODE_UC_SEND_FIRST] = { .name = "IB_OPCODE_UC_SEND_FIRST", - .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_RWR_MASK - | RXE_SEND_MASK | RXE_START_MASK, + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_RWR_MASK | + RXE_SEND_MASK | RXE_START_MASK, .length = RXE_BTH_BYTES, .offset = { [RXE_BTH] = 0, @@ -393,8 +392,8 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { }, [IB_OPCODE_UC_SEND_MIDDLE] = { .name = "IB_OPCODE_UC_SEND_MIDDLE", - .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK - | RXE_MIDDLE_MASK, + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK | + RXE_MIDDLE_MASK, .length = RXE_BTH_BYTES, .offset = { [RXE_BTH] = 0, @@ -403,8 +402,8 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { }, [IB_OPCODE_UC_SEND_LAST] = { .name = "IB_OPCODE_UC_SEND_LAST", - .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK - | RXE_SEND_MASK | RXE_END_MASK, + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK | + RXE_SEND_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES, .offset = { [RXE_BTH] = 0, @@ -413,21 +412,21 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { }, [IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE] = { .name = "IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE", - .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK, + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_COMP_MASK | RXE_SEND_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, .offset = { [RXE_BTH] = 0, [RXE_IMMDT] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_IMMDT_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IMMDT_BYTES, } }, [IB_OPCODE_UC_SEND_ONLY] = { .name = "IB_OPCODE_UC_SEND_ONLY", - .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK - | RXE_RWR_MASK | RXE_SEND_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK | + RXE_RWR_MASK | RXE_SEND_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES, .offset = { [RXE_BTH] = 0, @@ -436,33 +435,33 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { }, [IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE] = { .name = "IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE", - .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, .offset = { [RXE_BTH] = 0, [RXE_IMMDT] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_IMMDT_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IMMDT_BYTES, } }, [IB_OPCODE_UC_RDMA_WRITE_FIRST] = { .name = "IB_OPCODE_UC_RDMA_WRITE_FIRST", - .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_WRITE_MASK | RXE_START_MASK, + .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_WRITE_MASK | RXE_START_MASK, .length = RXE_BTH_BYTES + RXE_RETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES, } }, [IB_OPCODE_UC_RDMA_WRITE_MIDDLE] = { .name = "IB_OPCODE_UC_RDMA_WRITE_MIDDLE", - .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK - | RXE_MIDDLE_MASK, + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK | + RXE_MIDDLE_MASK, .length = RXE_BTH_BYTES, .offset = { [RXE_BTH] = 0, @@ -471,8 +470,8 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { }, [IB_OPCODE_UC_RDMA_WRITE_LAST] = { .name = "IB_OPCODE_UC_RDMA_WRITE_LAST", - .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK - | RXE_END_MASK, + .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_WRITE_MASK | + RXE_END_MASK, .length = RXE_BTH_BYTES, .offset = { [RXE_BTH] = 0, @@ -481,460 +480,460 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { }, [IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = { .name = "IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE", - .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK - | RXE_END_MASK, + .mask = RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK | + RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES, .offset = { [RXE_BTH] = 0, [RXE_IMMDT] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_IMMDT_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_IMMDT_BYTES, } }, [IB_OPCODE_UC_RDMA_WRITE_ONLY] = { .name = "IB_OPCODE_UC_RDMA_WRITE_ONLY", - .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_WRITE_MASK | RXE_START_MASK - | RXE_END_MASK, + .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_WRITE_MASK | RXE_START_MASK | + RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_RETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES, } }, [IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = { .name = "IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE", - .mask = RXE_RETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK - | RXE_REQ_MASK | RXE_WRITE_MASK - | RXE_COMP_MASK | RXE_RWR_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_RETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | + RXE_REQ_MASK | RXE_WRITE_MASK | + RXE_COMP_MASK | RXE_RWR_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RETH] = RXE_BTH_BYTES, - [RXE_IMMDT] = RXE_BTH_BYTES - + RXE_RETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RETH_BYTES - + RXE_IMMDT_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_RETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES + + RXE_IMMDT_BYTES, } }, /* RD */ [IB_OPCODE_RD_SEND_FIRST] = { .name = "IB_OPCODE_RD_SEND_FIRST", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK - | RXE_REQ_MASK | RXE_RWR_MASK | RXE_SEND_MASK - | RXE_START_MASK, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK | + RXE_REQ_MASK | RXE_RWR_MASK | RXE_SEND_MASK | + RXE_START_MASK, .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, } }, [IB_OPCODE_RD_SEND_MIDDLE] = { .name = "IB_OPCODE_RD_SEND_MIDDLE", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK - | RXE_REQ_MASK | RXE_SEND_MASK - | RXE_MIDDLE_MASK, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK | + RXE_REQ_MASK | RXE_SEND_MASK | + RXE_MIDDLE_MASK, .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, } }, [IB_OPCODE_RD_SEND_LAST] = { .name = "IB_OPCODE_RD_SEND_LAST", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK - | RXE_REQ_MASK | RXE_COMP_MASK | RXE_SEND_MASK - | RXE_END_MASK, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK | + RXE_REQ_MASK | RXE_COMP_MASK | RXE_SEND_MASK | + RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, } }, [IB_OPCODE_RD_SEND_LAST_WITH_IMMEDIATE] = { .name = "IB_OPCODE_RD_SEND_LAST_WITH_IMMEDIATE", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK - | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_COMP_MASK | RXE_SEND_MASK - | RXE_END_MASK, - .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES - + RXE_RDETH_BYTES, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK | + RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_COMP_MASK | RXE_SEND_MASK | + RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_IMMDT] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES - + RXE_IMMDT_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_IMMDT_BYTES, } }, [IB_OPCODE_RD_SEND_ONLY] = { .name = "IB_OPCODE_RD_SEND_ONLY", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK - | RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK - | RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK | + RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK | + RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, } }, [IB_OPCODE_RD_SEND_ONLY_WITH_IMMEDIATE] = { .name = "IB_OPCODE_RD_SEND_ONLY_WITH_IMMEDIATE", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK - | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK - | RXE_START_MASK | RXE_END_MASK, - .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES - + RXE_RDETH_BYTES, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK | + RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK | + RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_IMMDT] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES - + RXE_IMMDT_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_IMMDT_BYTES, } }, [IB_OPCODE_RD_RDMA_WRITE_FIRST] = { .name = "IB_OPCODE_RD_RDMA_WRITE_FIRST", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK - | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_WRITE_MASK | RXE_START_MASK, - .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES - + RXE_RDETH_BYTES, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK | + RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_WRITE_MASK | RXE_START_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_RETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES - + RXE_RETH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_RETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_RETH_BYTES, } }, [IB_OPCODE_RD_RDMA_WRITE_MIDDLE] = { .name = "IB_OPCODE_RD_RDMA_WRITE_MIDDLE", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK - | RXE_REQ_MASK | RXE_WRITE_MASK - | RXE_MIDDLE_MASK, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK | + RXE_REQ_MASK | RXE_WRITE_MASK | + RXE_MIDDLE_MASK, .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, } }, [IB_OPCODE_RD_RDMA_WRITE_LAST] = { .name = "IB_OPCODE_RD_RDMA_WRITE_LAST", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK - | RXE_REQ_MASK | RXE_WRITE_MASK - | RXE_END_MASK, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_PAYLOAD_MASK | + RXE_REQ_MASK | RXE_WRITE_MASK | + RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_DETH_BYTES + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, } }, [IB_OPCODE_RD_RDMA_WRITE_LAST_WITH_IMMEDIATE] = { .name = "IB_OPCODE_RD_RDMA_WRITE_LAST_WITH_IMMEDIATE", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK - | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK - | RXE_END_MASK, - .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES - + RXE_RDETH_BYTES, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_IMMDT_MASK | + RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_WRITE_MASK | RXE_COMP_MASK | RXE_RWR_MASK | + RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_IMMDT] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES - + RXE_IMMDT_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_IMMDT_BYTES, } }, [IB_OPCODE_RD_RDMA_WRITE_ONLY] = { .name = "IB_OPCODE_RD_RDMA_WRITE_ONLY", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK - | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_WRITE_MASK | RXE_START_MASK - | RXE_END_MASK, - .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES - + RXE_RDETH_BYTES, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK | + RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_WRITE_MASK | RXE_START_MASK | + RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_RETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES - + RXE_RETH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_RETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_RETH_BYTES, } }, [IB_OPCODE_RD_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = { .name = "IB_OPCODE_RD_RDMA_WRITE_ONLY_WITH_IMMEDIATE", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK - | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK - | RXE_REQ_MASK | RXE_WRITE_MASK - | RXE_COMP_MASK | RXE_RWR_MASK - | RXE_START_MASK | RXE_END_MASK, - .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES - + RXE_DETH_BYTES + RXE_RDETH_BYTES, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK | + RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | + RXE_REQ_MASK | RXE_WRITE_MASK | + RXE_COMP_MASK | RXE_RWR_MASK | + RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_RETH_BYTES + + RXE_DETH_BYTES + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_RETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, - [RXE_IMMDT] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES - + RXE_RETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES - + RXE_RETH_BYTES - + RXE_IMMDT_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_RETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_RETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES + + RXE_RETH_BYTES + + RXE_IMMDT_BYTES, } }, [IB_OPCODE_RD_RDMA_READ_REQUEST] = { .name = "IB_OPCODE_RD_RDMA_READ_REQUEST", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK - | RXE_REQ_MASK | RXE_READ_MASK - | RXE_START_MASK | RXE_END_MASK, - .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES - + RXE_RDETH_BYTES, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_RETH_MASK | + RXE_REQ_MASK | RXE_READ_MASK | + RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_RETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RETH_BYTES - + RXE_DETH_BYTES - + RXE_RDETH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_RETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RETH_BYTES + + RXE_DETH_BYTES + + RXE_RDETH_BYTES, } }, [IB_OPCODE_RD_RDMA_READ_RESPONSE_FIRST] = { .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_FIRST", - .mask = RXE_RDETH_MASK | RXE_AETH_MASK - | RXE_PAYLOAD_MASK | RXE_ACK_MASK - | RXE_START_MASK, + .mask = RXE_RDETH_MASK | RXE_AETH_MASK | + RXE_PAYLOAD_MASK | RXE_ACK_MASK | + RXE_START_MASK, .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_AETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_AETH_BYTES, + [RXE_AETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_AETH_BYTES, } }, [IB_OPCODE_RD_RDMA_READ_RESPONSE_MIDDLE] = { .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_MIDDLE", - .mask = RXE_RDETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK - | RXE_MIDDLE_MASK, + .mask = RXE_RDETH_MASK | RXE_PAYLOAD_MASK | RXE_ACK_MASK | + RXE_MIDDLE_MASK, .length = RXE_BTH_BYTES + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, } }, [IB_OPCODE_RD_RDMA_READ_RESPONSE_LAST] = { .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_LAST", - .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_PAYLOAD_MASK - | RXE_ACK_MASK | RXE_END_MASK, + .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_PAYLOAD_MASK | + RXE_ACK_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_AETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_AETH_BYTES, + [RXE_AETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_AETH_BYTES, } }, [IB_OPCODE_RD_RDMA_READ_RESPONSE_ONLY] = { .name = "IB_OPCODE_RD_RDMA_READ_RESPONSE_ONLY", - .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_PAYLOAD_MASK - | RXE_ACK_MASK | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_PAYLOAD_MASK | + RXE_ACK_MASK | RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_AETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_AETH_BYTES, + [RXE_AETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_AETH_BYTES, } }, [IB_OPCODE_RD_ACKNOWLEDGE] = { .name = "IB_OPCODE_RD_ACKNOWLEDGE", - .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_ACK_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_ACK_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_AETH_BYTES + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_AETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, + [RXE_AETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, } }, [IB_OPCODE_RD_ATOMIC_ACKNOWLEDGE] = { .name = "IB_OPCODE_RD_ATOMIC_ACKNOWLEDGE", - .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_ATMACK_MASK - | RXE_ACK_MASK | RXE_START_MASK | RXE_END_MASK, - .length = RXE_BTH_BYTES + RXE_ATMACK_BYTES + RXE_AETH_BYTES - + RXE_RDETH_BYTES, + .mask = RXE_RDETH_MASK | RXE_AETH_MASK | RXE_ATMACK_MASK | + RXE_ACK_MASK | RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_ATMACK_BYTES + RXE_AETH_BYTES + + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_AETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_ATMACK] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_AETH_BYTES, + [RXE_AETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_ATMACK] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_AETH_BYTES, } }, [IB_OPCODE_RD_COMPARE_SWAP] = { .name = "RD_COMPARE_SWAP", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_ATMETH_MASK - | RXE_REQ_MASK | RXE_ATOMIC_MASK - | RXE_START_MASK | RXE_END_MASK, - .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES + RXE_DETH_BYTES - + RXE_RDETH_BYTES, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_ATMETH_MASK | + RXE_REQ_MASK | RXE_ATOMIC_MASK | + RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_ATMETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_ATMETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, [RXE_PAYLOAD] = RXE_BTH_BYTES + - + RXE_ATMETH_BYTES - + RXE_DETH_BYTES + - + RXE_RDETH_BYTES, + RXE_ATMETH_BYTES + + RXE_DETH_BYTES + + RXE_RDETH_BYTES, } }, [IB_OPCODE_RD_FETCH_ADD] = { .name = "IB_OPCODE_RD_FETCH_ADD", - .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_ATMETH_MASK - | RXE_REQ_MASK | RXE_ATOMIC_MASK - | RXE_START_MASK | RXE_END_MASK, - .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES + RXE_DETH_BYTES - + RXE_RDETH_BYTES, + .mask = RXE_RDETH_MASK | RXE_DETH_MASK | RXE_ATMETH_MASK | + RXE_REQ_MASK | RXE_ATOMIC_MASK | + RXE_START_MASK | RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_ATMETH_BYTES + RXE_DETH_BYTES + + RXE_RDETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_RDETH] = RXE_BTH_BYTES, - [RXE_DETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES, - [RXE_ATMETH] = RXE_BTH_BYTES - + RXE_RDETH_BYTES - + RXE_DETH_BYTES, + [RXE_DETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES, + [RXE_ATMETH] = RXE_BTH_BYTES + + RXE_RDETH_BYTES + + RXE_DETH_BYTES, [RXE_PAYLOAD] = RXE_BTH_BYTES + - + RXE_ATMETH_BYTES - + RXE_DETH_BYTES + - + RXE_RDETH_BYTES, + RXE_ATMETH_BYTES + + RXE_DETH_BYTES + + RXE_RDETH_BYTES, } }, /* UD */ [IB_OPCODE_UD_SEND_ONLY] = { .name = "IB_OPCODE_UD_SEND_ONLY", - .mask = RXE_DETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK - | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK - | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_DETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK | + RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_DETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_DETH] = RXE_BTH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_DETH_BYTES, } }, [IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE] = { .name = "IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE", - .mask = RXE_DETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK - | RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK - | RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK, + .mask = RXE_DETH_MASK | RXE_IMMDT_MASK | RXE_PAYLOAD_MASK | + RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK | + RXE_SEND_MASK | RXE_START_MASK | RXE_END_MASK, .length = RXE_BTH_BYTES + RXE_IMMDT_BYTES + RXE_DETH_BYTES, .offset = { [RXE_BTH] = 0, [RXE_DETH] = RXE_BTH_BYTES, - [RXE_IMMDT] = RXE_BTH_BYTES - + RXE_DETH_BYTES, - [RXE_PAYLOAD] = RXE_BTH_BYTES - + RXE_DETH_BYTES - + RXE_IMMDT_BYTES, + [RXE_IMMDT] = RXE_BTH_BYTES + + RXE_DETH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + + RXE_DETH_BYTES + + RXE_IMMDT_BYTES, } }, diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index 918270e34a35..86c7a8bf3cbb 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -50,9 +50,7 @@ enum rxe_device_param { | IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_SRQ_RESIZE | IB_DEVICE_MEM_MGT_EXTENSIONS - | IB_DEVICE_ALLOW_USER_UNREG | IB_DEVICE_MEM_WINDOW - | IB_DEVICE_MEM_WINDOW_TYPE_2A | IB_DEVICE_MEM_WINDOW_TYPE_2B, RXE_MAX_SGE = 32, RXE_MAX_WQE_SIZE = sizeof(struct rxe_send_wqe) + @@ -107,6 +105,12 @@ enum rxe_device_param { RXE_INFLIGHT_SKBS_PER_QP_HIGH = 64, RXE_INFLIGHT_SKBS_PER_QP_LOW = 16, + /* Max number of interations of each tasklet + * before yielding the cpu to let other + * work make progress + */ + RXE_MAX_ITERATIONS = 1024, + /* Delay before calling arbiter timer */ RXE_NSEC_ARB_TIMER_DELAY = 200, diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 2e80bb6aa957..f50620f5a0a1 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -5,499 +5,298 @@ */ #include "rxe.h" -#include "rxe_loc.h" + +#define RXE_POOL_TIMEOUT (200) +#define RXE_POOL_ALIGN (16) static const struct rxe_type_info { const char *name; size_t size; size_t elem_offset; - void (*cleanup)(struct rxe_pool_entry *obj); - enum rxe_pool_flags flags; + void (*cleanup)(struct rxe_pool_elem *elem); u32 min_index; u32 max_index; - size_t key_offset; - size_t key_size; + u32 max_elem; } rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_UC] = { - .name = "rxe-uc", + .name = "uc", .size = sizeof(struct rxe_ucontext), - .elem_offset = offsetof(struct rxe_ucontext, pelem), - .flags = RXE_POOL_NO_ALLOC, + .elem_offset = offsetof(struct rxe_ucontext, elem), + .min_index = 1, + .max_index = UINT_MAX, + .max_elem = UINT_MAX, }, [RXE_TYPE_PD] = { - .name = "rxe-pd", + .name = "pd", .size = sizeof(struct rxe_pd), - .elem_offset = offsetof(struct rxe_pd, pelem), - .flags = RXE_POOL_NO_ALLOC, + .elem_offset = offsetof(struct rxe_pd, elem), + .min_index = 1, + .max_index = UINT_MAX, + .max_elem = UINT_MAX, }, [RXE_TYPE_AH] = { - .name = "rxe-ah", + .name = "ah", .size = sizeof(struct rxe_ah), - .elem_offset = offsetof(struct rxe_ah, pelem), - .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, + .elem_offset = offsetof(struct rxe_ah, elem), .min_index = RXE_MIN_AH_INDEX, .max_index = RXE_MAX_AH_INDEX, + .max_elem = RXE_MAX_AH_INDEX - RXE_MIN_AH_INDEX + 1, }, [RXE_TYPE_SRQ] = { - .name = "rxe-srq", + .name = "srq", .size = sizeof(struct rxe_srq), - .elem_offset = offsetof(struct rxe_srq, pelem), - .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, + .elem_offset = offsetof(struct rxe_srq, elem), + .cleanup = rxe_srq_cleanup, .min_index = RXE_MIN_SRQ_INDEX, .max_index = RXE_MAX_SRQ_INDEX, + .max_elem = RXE_MAX_SRQ_INDEX - RXE_MIN_SRQ_INDEX + 1, }, [RXE_TYPE_QP] = { - .name = "rxe-qp", + .name = "qp", .size = sizeof(struct rxe_qp), - .elem_offset = offsetof(struct rxe_qp, pelem), + .elem_offset = offsetof(struct rxe_qp, elem), .cleanup = rxe_qp_cleanup, - .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, .min_index = RXE_MIN_QP_INDEX, .max_index = RXE_MAX_QP_INDEX, + .max_elem = RXE_MAX_QP_INDEX - RXE_MIN_QP_INDEX + 1, }, [RXE_TYPE_CQ] = { - .name = "rxe-cq", + .name = "cq", .size = sizeof(struct rxe_cq), - .elem_offset = offsetof(struct rxe_cq, pelem), - .flags = RXE_POOL_NO_ALLOC, + .elem_offset = offsetof(struct rxe_cq, elem), .cleanup = rxe_cq_cleanup, + .min_index = 1, + .max_index = UINT_MAX, + .max_elem = UINT_MAX, }, [RXE_TYPE_MR] = { - .name = "rxe-mr", + .name = "mr", .size = sizeof(struct rxe_mr), - .elem_offset = offsetof(struct rxe_mr, pelem), + .elem_offset = offsetof(struct rxe_mr, elem), .cleanup = rxe_mr_cleanup, - .flags = RXE_POOL_INDEX, .min_index = RXE_MIN_MR_INDEX, .max_index = RXE_MAX_MR_INDEX, + .max_elem = RXE_MAX_MR_INDEX - RXE_MIN_MR_INDEX + 1, }, [RXE_TYPE_MW] = { - .name = "rxe-mw", + .name = "mw", .size = sizeof(struct rxe_mw), - .elem_offset = offsetof(struct rxe_mw, pelem), + .elem_offset = offsetof(struct rxe_mw, elem), .cleanup = rxe_mw_cleanup, - .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, .min_index = RXE_MIN_MW_INDEX, .max_index = RXE_MAX_MW_INDEX, - }, - [RXE_TYPE_MC_GRP] = { - .name = "rxe-mc_grp", - .size = sizeof(struct rxe_mc_grp), - .elem_offset = offsetof(struct rxe_mc_grp, pelem), - .cleanup = rxe_mc_cleanup, - .flags = RXE_POOL_KEY, - .key_offset = offsetof(struct rxe_mc_grp, mgid), - .key_size = sizeof(union ib_gid), - }, - [RXE_TYPE_MC_ELEM] = { - .name = "rxe-mc_elem", - .size = sizeof(struct rxe_mc_elem), - .elem_offset = offsetof(struct rxe_mc_elem, pelem), + .max_elem = RXE_MAX_MW_INDEX - RXE_MIN_MW_INDEX + 1, }, }; -static inline const char *pool_name(struct rxe_pool *pool) -{ - return rxe_type_info[pool->type].name; -} - -static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) -{ - int err = 0; - - if ((max - min + 1) < pool->max_elem) { - pr_warn("not enough indices for max_elem\n"); - err = -EINVAL; - goto out; - } - - pool->index.max_index = max; - pool->index.min_index = min; - - pool->index.table = bitmap_zalloc(max - min + 1, GFP_KERNEL); - if (!pool->index.table) { - err = -ENOMEM; - goto out; - } - -out: - return err; -} - -int rxe_pool_init( - struct rxe_dev *rxe, - struct rxe_pool *pool, - enum rxe_elem_type type, - unsigned int max_elem) +void rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool, + enum rxe_elem_type type) { - int err = 0; - size_t size = rxe_type_info[type].size; + const struct rxe_type_info *info = &rxe_type_info[type]; memset(pool, 0, sizeof(*pool)); pool->rxe = rxe; + pool->name = info->name; pool->type = type; - pool->max_elem = max_elem; - pool->elem_size = ALIGN(size, RXE_POOL_ALIGN); - pool->flags = rxe_type_info[type].flags; - pool->index.tree = RB_ROOT; - pool->key.tree = RB_ROOT; - pool->cleanup = rxe_type_info[type].cleanup; + pool->max_elem = info->max_elem; + pool->elem_size = ALIGN(info->size, RXE_POOL_ALIGN); + pool->elem_offset = info->elem_offset; + pool->cleanup = info->cleanup; atomic_set(&pool->num_elem, 0); - rwlock_init(&pool->pool_lock); - - if (rxe_type_info[type].flags & RXE_POOL_INDEX) { - err = rxe_pool_init_index(pool, - rxe_type_info[type].max_index, - rxe_type_info[type].min_index); - if (err) - goto out; - } - - if (rxe_type_info[type].flags & RXE_POOL_KEY) { - pool->key.key_offset = rxe_type_info[type].key_offset; - pool->key.key_size = rxe_type_info[type].key_size; - } - -out: - return err; + xa_init_flags(&pool->xa, XA_FLAGS_ALLOC); + pool->limit.min = info->min_index; + pool->limit.max = info->max_index; } void rxe_pool_cleanup(struct rxe_pool *pool) { - if (atomic_read(&pool->num_elem) > 0) - pr_warn("%s pool destroyed with unfree'd elem\n", - pool_name(pool)); - - bitmap_free(pool->index.table); -} - -static u32 alloc_index(struct rxe_pool *pool) -{ - u32 index; - u32 range = pool->index.max_index - pool->index.min_index + 1; - - index = find_next_zero_bit(pool->index.table, range, pool->index.last); - if (index >= range) - index = find_first_zero_bit(pool->index.table, range); - - WARN_ON_ONCE(index >= range); - set_bit(index, pool->index.table); - pool->index.last = index; - return index + pool->index.min_index; -} - -static int rxe_insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new) -{ - struct rb_node **link = &pool->index.tree.rb_node; - struct rb_node *parent = NULL; - struct rxe_pool_entry *elem; - - while (*link) { - parent = *link; - elem = rb_entry(parent, struct rxe_pool_entry, index_node); - - if (elem->index == new->index) { - pr_warn("element already exists!\n"); - return -EINVAL; - } - - if (elem->index > new->index) - link = &(*link)->rb_left; - else - link = &(*link)->rb_right; - } - - rb_link_node(&new->index_node, parent, link); - rb_insert_color(&new->index_node, &pool->index.tree); - - return 0; -} - -static int rxe_insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new) -{ - struct rb_node **link = &pool->key.tree.rb_node; - struct rb_node *parent = NULL; - struct rxe_pool_entry *elem; - int cmp; - - while (*link) { - parent = *link; - elem = rb_entry(parent, struct rxe_pool_entry, key_node); - - cmp = memcmp((u8 *)elem + pool->key.key_offset, - (u8 *)new + pool->key.key_offset, pool->key.key_size); - - if (cmp == 0) { - pr_warn("key already exists!\n"); - return -EINVAL; - } - - if (cmp > 0) - link = &(*link)->rb_left; - else - link = &(*link)->rb_right; - } - - rb_link_node(&new->key_node, parent, link); - rb_insert_color(&new->key_node, &pool->key.tree); - - return 0; -} - -int __rxe_add_key_locked(struct rxe_pool_entry *elem, void *key) -{ - struct rxe_pool *pool = elem->pool; - int err; - - memcpy((u8 *)elem + pool->key.key_offset, key, pool->key.key_size); - err = rxe_insert_key(pool, elem); - - return err; -} - -int __rxe_add_key(struct rxe_pool_entry *elem, void *key) -{ - struct rxe_pool *pool = elem->pool; - unsigned long flags; - int err; - - write_lock_irqsave(&pool->pool_lock, flags); - err = __rxe_add_key_locked(elem, key); - write_unlock_irqrestore(&pool->pool_lock, flags); - - return err; -} - -void __rxe_drop_key_locked(struct rxe_pool_entry *elem) -{ - struct rxe_pool *pool = elem->pool; - - rb_erase(&elem->key_node, &pool->key.tree); -} - -void __rxe_drop_key(struct rxe_pool_entry *elem) -{ - struct rxe_pool *pool = elem->pool; - unsigned long flags; - - write_lock_irqsave(&pool->pool_lock, flags); - __rxe_drop_key_locked(elem); - write_unlock_irqrestore(&pool->pool_lock, flags); + WARN_ON(!xa_empty(&pool->xa)); } -int __rxe_add_index_locked(struct rxe_pool_entry *elem) -{ - struct rxe_pool *pool = elem->pool; - int err; - - elem->index = alloc_index(pool); - err = rxe_insert_index(pool, elem); - - return err; -} - -int __rxe_add_index(struct rxe_pool_entry *elem) +void *rxe_alloc(struct rxe_pool *pool) { - struct rxe_pool *pool = elem->pool; - unsigned long flags; + struct rxe_pool_elem *elem; + void *obj; int err; - write_lock_irqsave(&pool->pool_lock, flags); - err = __rxe_add_index_locked(elem); - write_unlock_irqrestore(&pool->pool_lock, flags); - - return err; -} - -void __rxe_drop_index_locked(struct rxe_pool_entry *elem) -{ - struct rxe_pool *pool = elem->pool; - - clear_bit(elem->index - pool->index.min_index, pool->index.table); - rb_erase(&elem->index_node, &pool->index.tree); -} - -void __rxe_drop_index(struct rxe_pool_entry *elem) -{ - struct rxe_pool *pool = elem->pool; - unsigned long flags; - - write_lock_irqsave(&pool->pool_lock, flags); - __rxe_drop_index_locked(elem); - write_unlock_irqrestore(&pool->pool_lock, flags); -} - -void *rxe_alloc_locked(struct rxe_pool *pool) -{ - const struct rxe_type_info *info = &rxe_type_info[pool->type]; - struct rxe_pool_entry *elem; - u8 *obj; + if (WARN_ON(!(pool->type == RXE_TYPE_MR))) + return NULL; if (atomic_inc_return(&pool->num_elem) > pool->max_elem) - goto out_cnt; + goto err_cnt; - obj = kzalloc(info->size, GFP_ATOMIC); + obj = kzalloc(pool->elem_size, GFP_KERNEL); if (!obj) - goto out_cnt; + goto err_cnt; - elem = (struct rxe_pool_entry *)(obj + info->elem_offset); + elem = (struct rxe_pool_elem *)((u8 *)obj + pool->elem_offset); elem->pool = pool; + elem->obj = obj; kref_init(&elem->ref_cnt); + init_completion(&elem->complete); + + /* allocate index in array but leave pointer as NULL so it + * can't be looked up until rxe_finalize() is called + */ + err = xa_alloc_cyclic(&pool->xa, &elem->index, NULL, pool->limit, + &pool->next, GFP_KERNEL); + if (err < 0) + goto err_free; return obj; -out_cnt: +err_free: + kfree(obj); +err_cnt: atomic_dec(&pool->num_elem); return NULL; } -void *rxe_alloc(struct rxe_pool *pool) +int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem, + bool sleepable) { - const struct rxe_type_info *info = &rxe_type_info[pool->type]; - struct rxe_pool_entry *elem; - u8 *obj; - - if (atomic_inc_return(&pool->num_elem) > pool->max_elem) - goto out_cnt; + int err; + gfp_t gfp_flags; - obj = kzalloc(info->size, GFP_KERNEL); - if (!obj) - goto out_cnt; + if (WARN_ON(pool->type == RXE_TYPE_MR)) + return -EINVAL; - elem = (struct rxe_pool_entry *)(obj + info->elem_offset); + if (atomic_inc_return(&pool->num_elem) > pool->max_elem) + goto err_cnt; elem->pool = pool; + elem->obj = (u8 *)elem - pool->elem_offset; kref_init(&elem->ref_cnt); + init_completion(&elem->complete); - return obj; - -out_cnt: - atomic_dec(&pool->num_elem); - return NULL; -} - -int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem) -{ - if (atomic_inc_return(&pool->num_elem) > pool->max_elem) - goto out_cnt; + /* AH objects are unique in that the create_ah verb + * can be called in atomic context. If the create_ah + * call is not sleepable use GFP_ATOMIC. + */ + gfp_flags = sleepable ? GFP_KERNEL : GFP_ATOMIC; - elem->pool = pool; - kref_init(&elem->ref_cnt); + if (sleepable) + might_sleep(); + err = xa_alloc_cyclic(&pool->xa, &elem->index, NULL, pool->limit, + &pool->next, gfp_flags); + if (err < 0) + goto err_cnt; return 0; -out_cnt: +err_cnt: atomic_dec(&pool->num_elem); return -EINVAL; } -void rxe_elem_release(struct kref *kref) +void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) { - struct rxe_pool_entry *elem = - container_of(kref, struct rxe_pool_entry, ref_cnt); - struct rxe_pool *pool = elem->pool; - const struct rxe_type_info *info = &rxe_type_info[pool->type]; - u8 *obj; + struct rxe_pool_elem *elem; + struct xarray *xa = &pool->xa; + void *obj; + + rcu_read_lock(); + elem = xa_load(xa, index); + if (elem && kref_get_unless_zero(&elem->ref_cnt)) + obj = elem->obj; + else + obj = NULL; + rcu_read_unlock(); - if (pool->cleanup) - pool->cleanup(elem); + return obj; +} - if (!(pool->flags & RXE_POOL_NO_ALLOC)) { - obj = (u8 *)elem - info->elem_offset; - kfree(obj); - } +static void rxe_elem_release(struct kref *kref) +{ + struct rxe_pool_elem *elem = container_of(kref, typeof(*elem), ref_cnt); - atomic_dec(&pool->num_elem); + complete(&elem->complete); } -void *rxe_pool_get_index_locked(struct rxe_pool *pool, u32 index) +int __rxe_cleanup(struct rxe_pool_elem *elem, bool sleepable) { - const struct rxe_type_info *info = &rxe_type_info[pool->type]; - struct rb_node *node; - struct rxe_pool_entry *elem; - u8 *obj; - - node = pool->index.tree.rb_node; - - while (node) { - elem = rb_entry(node, struct rxe_pool_entry, index_node); - - if (elem->index > index) - node = node->rb_left; - else if (elem->index < index) - node = node->rb_right; - else - break; - } - - if (node) { - kref_get(&elem->ref_cnt); - obj = (u8 *)elem - info->elem_offset; + struct rxe_pool *pool = elem->pool; + struct xarray *xa = &pool->xa; + static int timeout = RXE_POOL_TIMEOUT; + int ret, err = 0; + void *xa_ret; + + if (sleepable) + might_sleep(); + + /* erase xarray entry to prevent looking up + * the pool elem from its index + */ + xa_ret = xa_erase(xa, elem->index); + WARN_ON(xa_err(xa_ret)); + + /* if this is the last call to rxe_put complete the + * object. It is safe to touch obj->elem after this since + * it is freed below + */ + __rxe_put(elem); + + /* wait until all references to the object have been + * dropped before final object specific cleanup and + * return to rdma-core + */ + if (sleepable) { + if (!completion_done(&elem->complete) && timeout) { + ret = wait_for_completion_timeout(&elem->complete, + timeout); + + /* Shouldn't happen. There are still references to + * the object but, rather than deadlock, free the + * object or pass back to rdma-core. + */ + if (WARN_ON(!ret)) + err = -EINVAL; + } } else { - obj = NULL; + unsigned long until = jiffies + timeout; + + /* AH objects are unique in that the destroy_ah verb + * can be called in atomic context. This delay + * replaces the wait_for_completion call above + * when the destroy_ah call is not sleepable + */ + while (!completion_done(&elem->complete) && + time_before(jiffies, until)) + mdelay(1); + + if (WARN_ON(!completion_done(&elem->complete))) + err = -EINVAL; } - return obj; -} + if (pool->cleanup) + pool->cleanup(elem); -void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) -{ - u8 *obj; - unsigned long flags; + if (pool->type == RXE_TYPE_MR) + kfree_rcu(elem->obj); - read_lock_irqsave(&pool->pool_lock, flags); - obj = rxe_pool_get_index_locked(pool, index); - read_unlock_irqrestore(&pool->pool_lock, flags); + atomic_dec(&pool->num_elem); - return obj; + return err; } -void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key) +int __rxe_get(struct rxe_pool_elem *elem) { - const struct rxe_type_info *info = &rxe_type_info[pool->type]; - struct rb_node *node; - struct rxe_pool_entry *elem; - u8 *obj; - int cmp; - - node = pool->key.tree.rb_node; - - while (node) { - elem = rb_entry(node, struct rxe_pool_entry, key_node); - - cmp = memcmp((u8 *)elem + pool->key.key_offset, - key, pool->key.key_size); - - if (cmp > 0) - node = node->rb_left; - else if (cmp < 0) - node = node->rb_right; - else - break; - } - - if (node) { - kref_get(&elem->ref_cnt); - obj = (u8 *)elem - info->elem_offset; - } else { - obj = NULL; - } - - return obj; + return kref_get_unless_zero(&elem->ref_cnt); } -void *rxe_pool_get_key(struct rxe_pool *pool, void *key) +int __rxe_put(struct rxe_pool_elem *elem) { - u8 *obj; - unsigned long flags; + return kref_put(&elem->ref_cnt, rxe_elem_release); +} - read_lock_irqsave(&pool->pool_lock, flags); - obj = rxe_pool_get_key_locked(pool, key); - read_unlock_irqrestore(&pool->pool_lock, flags); +void __rxe_finalize(struct rxe_pool_elem *elem) +{ + void *xa_ret; - return obj; + xa_ret = xa_store(&elem->pool->xa, elem->index, elem, GFP_KERNEL); + WARN_ON(xa_err(xa_ret)); } diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index 8ecd9f870aea..9d83cb32092f 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -7,15 +7,6 @@ #ifndef RXE_POOL_H #define RXE_POOL_H -#define RXE_POOL_ALIGN (16) -#define RXE_POOL_CACHE_FLAGS (0) - -enum rxe_pool_flags { - RXE_POOL_INDEX = BIT(1), - RXE_POOL_KEY = BIT(2), - RXE_POOL_NO_ALLOC = BIT(4), -}; - enum rxe_elem_type { RXE_TYPE_UC, RXE_TYPE_PD, @@ -25,137 +16,70 @@ enum rxe_elem_type { RXE_TYPE_CQ, RXE_TYPE_MR, RXE_TYPE_MW, - RXE_TYPE_MC_GRP, - RXE_TYPE_MC_ELEM, RXE_NUM_TYPES, /* keep me last */ }; -struct rxe_pool_entry; - -struct rxe_pool_entry { +struct rxe_pool_elem { struct rxe_pool *pool; + void *obj; struct kref ref_cnt; struct list_head list; - - /* only used if keyed */ - struct rb_node key_node; - - /* only used if indexed */ - struct rb_node index_node; + struct completion complete; u32 index; }; struct rxe_pool { struct rxe_dev *rxe; - rwlock_t pool_lock; /* protects pool add/del/search */ - size_t elem_size; - void (*cleanup)(struct rxe_pool_entry *obj); - enum rxe_pool_flags flags; + const char *name; + void (*cleanup)(struct rxe_pool_elem *elem); enum rxe_elem_type type; unsigned int max_elem; atomic_t num_elem; + size_t elem_size; + size_t elem_offset; - /* only used if indexed */ - struct { - struct rb_root tree; - unsigned long *table; - u32 last; - u32 max_index; - u32 min_index; - } index; - - /* only used if keyed */ - struct { - struct rb_root tree; - size_t key_offset; - size_t key_size; - } key; + struct xarray xa; + struct xa_limit limit; + u32 next; }; /* initialize a pool of objects with given limit on * number of elements. gets parameters from rxe_type_info * pool elements will be allocated out of a slab cache */ -int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool, - enum rxe_elem_type type, u32 max_elem); +void rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool, + enum rxe_elem_type type); /* free resources from object pool */ void rxe_pool_cleanup(struct rxe_pool *pool); -/* allocate an object from pool holding and not holding the pool lock */ -void *rxe_alloc_locked(struct rxe_pool *pool); - +/* allocate an object from pool */ void *rxe_alloc(struct rxe_pool *pool); /* connect already allocated object to pool */ -int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem); - -#define rxe_add_to_pool(pool, obj) __rxe_add_to_pool(pool, &(obj)->pelem) - -/* assign an index to an indexed object and insert object into - * pool's rb tree holding and not holding the pool_lock - */ -int __rxe_add_index_locked(struct rxe_pool_entry *elem); - -#define rxe_add_index_locked(obj) __rxe_add_index_locked(&(obj)->pelem) - -int __rxe_add_index(struct rxe_pool_entry *elem); - -#define rxe_add_index(obj) __rxe_add_index(&(obj)->pelem) - -/* drop an index and remove object from rb tree - * holding and not holding the pool_lock - */ -void __rxe_drop_index_locked(struct rxe_pool_entry *elem); - -#define rxe_drop_index_locked(obj) __rxe_drop_index_locked(&(obj)->pelem) - -void __rxe_drop_index(struct rxe_pool_entry *elem); - -#define rxe_drop_index(obj) __rxe_drop_index(&(obj)->pelem) - -/* assign a key to a keyed object and insert object into - * pool's rb tree holding and not holding pool_lock - */ -int __rxe_add_key_locked(struct rxe_pool_entry *elem, void *key); - -#define rxe_add_key_locked(obj, key) __rxe_add_key_locked(&(obj)->pelem, key) - -int __rxe_add_key(struct rxe_pool_entry *elem, void *key); - -#define rxe_add_key(obj, key) __rxe_add_key(&(obj)->pelem, key) - -/* remove elem from rb tree holding and not holding the pool_lock */ -void __rxe_drop_key_locked(struct rxe_pool_entry *elem); - -#define rxe_drop_key_locked(obj) __rxe_drop_key_locked(&(obj)->pelem) - -void __rxe_drop_key(struct rxe_pool_entry *elem); - -#define rxe_drop_key(obj) __rxe_drop_key(&(obj)->pelem) - -/* lookup an indexed object from index holding and not holding the pool_lock. - * takes a reference on object - */ -void *rxe_pool_get_index_locked(struct rxe_pool *pool, u32 index); +int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem, + bool sleepable); +#define rxe_add_to_pool(pool, obj) __rxe_add_to_pool(pool, &(obj)->elem, true) +#define rxe_add_to_pool_ah(pool, obj, sleepable) __rxe_add_to_pool(pool, \ + &(obj)->elem, sleepable) +/* lookup an indexed object from index. takes a reference on object */ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index); -/* lookup keyed object from key holding and not holding the pool_lock. - * takes a reference on the objecti - */ -void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key); +int __rxe_get(struct rxe_pool_elem *elem); +#define rxe_get(obj) __rxe_get(&(obj)->elem) -void *rxe_pool_get_key(struct rxe_pool *pool, void *key); +int __rxe_put(struct rxe_pool_elem *elem); +#define rxe_put(obj) __rxe_put(&(obj)->elem) -/* cleanup an object when all references are dropped */ -void rxe_elem_release(struct kref *kref); +int __rxe_cleanup(struct rxe_pool_elem *elem, bool sleepable); +#define rxe_cleanup(obj) __rxe_cleanup(&(obj)->elem, true) +#define rxe_cleanup_ah(obj, sleepable) __rxe_cleanup(&(obj)->elem, sleepable) -/* take a reference on an object */ -#define rxe_add_ref(elem) kref_get(&(elem)->pelem.ref_cnt) +#define rxe_read(obj) kref_read(&(obj)->elem.ref_cnt) -/* drop a reference on an object */ -#define rxe_drop_ref(elem) kref_put(&(elem)->pelem.ref_cnt, rxe_elem_release) +void __rxe_finalize(struct rxe_pool_elem *elem); +#define rxe_finalize(obj) __rxe_finalize(&(obj)->elem) #endif /* RXE_POOL_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 54b8711321c1..a62bab88415c 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -19,34 +19,34 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap, int has_srq) { if (cap->max_send_wr > rxe->attr.max_qp_wr) { - pr_warn("invalid send wr = %d > %d\n", - cap->max_send_wr, rxe->attr.max_qp_wr); + pr_debug("invalid send wr = %u > %d\n", + cap->max_send_wr, rxe->attr.max_qp_wr); goto err1; } if (cap->max_send_sge > rxe->attr.max_send_sge) { - pr_warn("invalid send sge = %d > %d\n", - cap->max_send_sge, rxe->attr.max_send_sge); + pr_debug("invalid send sge = %u > %d\n", + cap->max_send_sge, rxe->attr.max_send_sge); goto err1; } if (!has_srq) { if (cap->max_recv_wr > rxe->attr.max_qp_wr) { - pr_warn("invalid recv wr = %d > %d\n", - cap->max_recv_wr, rxe->attr.max_qp_wr); + pr_debug("invalid recv wr = %u > %d\n", + cap->max_recv_wr, rxe->attr.max_qp_wr); goto err1; } if (cap->max_recv_sge > rxe->attr.max_recv_sge) { - pr_warn("invalid recv sge = %d > %d\n", - cap->max_recv_sge, rxe->attr.max_recv_sge); + pr_debug("invalid recv sge = %u > %d\n", + cap->max_recv_sge, rxe->attr.max_recv_sge); goto err1; } } if (cap->max_inline_data > rxe->max_inline_data) { - pr_warn("invalid max inline data = %d > %d\n", - cap->max_inline_data, rxe->max_inline_data); + pr_debug("invalid max inline data = %u > %d\n", + cap->max_inline_data, rxe->max_inline_data); goto err1; } @@ -63,7 +63,6 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) int port_num = init->port_num; switch (init->qp_type) { - case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_RC: case IB_QPT_UC: @@ -74,28 +73,23 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) } if (!init->recv_cq || !init->send_cq) { - pr_warn("missing cq\n"); + pr_debug("missing cq\n"); goto err1; } if (rxe_qp_chk_cap(rxe, cap, !!init->srq)) goto err1; - if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) { + if (init->qp_type == IB_QPT_GSI) { if (!rdma_is_port_valid(&rxe->ib_dev, port_num)) { - pr_warn("invalid port = %d\n", port_num); + pr_debug("invalid port = %d\n", port_num); goto err1; } port = &rxe->port; - if (init->qp_type == IB_QPT_SMI && port->qp_smi_index) { - pr_warn("SMI QP exists for port %d\n", port_num); - goto err1; - } - if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) { - pr_warn("GSI QP exists for port %d\n", port_num); + pr_debug("GSI QP exists for port %d\n", port_num); goto err1; } } @@ -126,21 +120,15 @@ static void free_rd_atomic_resources(struct rxe_qp *qp) for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) { struct resp_res *res = &qp->resp.resources[i]; - free_rd_atomic_resource(qp, res); + free_rd_atomic_resource(res); } kfree(qp->resp.resources); qp->resp.resources = NULL; } } -void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res) +void free_rd_atomic_resource(struct resp_res *res) { - if (res->type == RXE_ATOMIC_MASK) { - kfree_skb(res->atomic.skb); - } else if (res->type == RXE_READ_MASK) { - if (res->read.mr) - rxe_drop_ref(res->read.mr); - } res->type = 0; } @@ -152,7 +140,7 @@ static void cleanup_rd_atomic_resources(struct rxe_qp *qp) if (qp->resp.resources) { for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) { res = &qp->resp.resources[i]; - free_rd_atomic_resource(qp, res); + free_rd_atomic_resource(res); } } } @@ -167,16 +155,10 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, qp->attr.path_mtu = 1; qp->mtu = ib_mtu_enum_to_int(qp->attr.path_mtu); - qpn = qp->pelem.index; + qpn = qp->elem.index; port = &rxe->port; switch (init->qp_type) { - case IB_QPT_SMI: - qp->ibqp.qp_num = 0; - port->qp_smi_index = qpn; - qp->attr.port_num = init->port_num; - break; - case IB_QPT_GSI: qp->ibqp.qp_num = 1; port->qp_gsi_index = qpn; @@ -188,11 +170,16 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, break; } - INIT_LIST_HEAD(&qp->grp_list); - - spin_lock_init(&qp->grp_lock); spin_lock_init(&qp->state_lock); + spin_lock_init(&qp->req.task.state_lock); + spin_lock_init(&qp->resp.task.state_lock); + spin_lock_init(&qp->comp.task.state_lock); + + spin_lock_init(&qp->sq.sq_lock); + spin_lock_init(&qp->rq.producer_lock); + spin_lock_init(&qp->rq.consumer_lock); + atomic_set(&qp->ssn, 0); atomic_set(&qp->skb_out, 0); } @@ -217,8 +204,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, * the port number must be in the Dynamic Ports range * (0xc000 - 0xffff). */ - qp->src_port = RXE_ROCE_V2_SPORT + - (hash_32_generic(qp_num(qp), 14) & 0x3fff); + qp->src_port = RXE_ROCE_V2_SPORT + (hash_32(qp_num(qp), 14) & 0x3fff); qp->sq.max_wr = init->cap.max_send_wr; /* These caps are limited by rxe_qp_chk_cap() done by the caller */ @@ -250,15 +236,15 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, QUEUE_TYPE_FROM_CLIENT); qp->req.state = QP_STATE_RESET; + qp->comp.state = QP_STATE_RESET; qp->req.opcode = -1; qp->comp.opcode = -1; - spin_lock_init(&qp->sq.sq_lock); skb_queue_head_init(&qp->req_pkts); - rxe_init_task(rxe, &qp->req.task, qp, + rxe_init_task(&qp->req.task, qp, rxe_requester, "req"); - rxe_init_task(rxe, &qp->comp.task, qp, + rxe_init_task(&qp->comp.task, qp, rxe_completer, "comp"); qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */ @@ -304,12 +290,9 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, } } - spin_lock_init(&qp->rq.producer_lock); - spin_lock_init(&qp->rq.consumer_lock); - skb_queue_head_init(&qp->resp_pkts); - rxe_init_task(rxe, &qp->resp.task, qp, + rxe_init_task(&qp->resp.task, qp, rxe_responder, "resp"); qp->resp.opcode = OPCODE_NONE; @@ -331,17 +314,20 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, struct rxe_cq *scq = to_rcq(init->send_cq); struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL; - rxe_add_ref(pd); - rxe_add_ref(rcq); - rxe_add_ref(scq); + rxe_get(pd); + rxe_get(rcq); + rxe_get(scq); if (srq) - rxe_add_ref(srq); + rxe_get(srq); qp->pd = pd; qp->rcq = rcq; qp->scq = scq; qp->srq = srq; + atomic_inc(&rcq->num_wq); + atomic_inc(&scq->num_wq); + rxe_qp_init_misc(rxe, qp, init); err = rxe_qp_init_req(rxe, qp, init, udata, uresp); @@ -361,16 +347,19 @@ err2: rxe_queue_cleanup(qp->sq.queue); qp->sq.queue = NULL; err1: + atomic_dec(&rcq->num_wq); + atomic_dec(&scq->num_wq); + qp->pd = NULL; qp->rcq = NULL; qp->scq = NULL; qp->srq = NULL; if (srq) - rxe_drop_ref(srq); - rxe_drop_ref(scq); - rxe_drop_ref(rcq); - rxe_drop_ref(pd); + rxe_put(srq); + rxe_put(scq); + rxe_put(rcq); + rxe_put(pd); return err; } @@ -413,7 +402,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, attr->qp_state : cur_state; if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask)) { - pr_warn("invalid mask or state for qp\n"); + pr_debug("invalid mask or state for qp\n"); goto err1; } @@ -427,7 +416,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (mask & IB_QP_PORT) { if (!rdma_is_port_valid(&rxe->ib_dev, attr->port_num)) { - pr_warn("invalid port %d\n", attr->port_num); + pr_debug("invalid port %d\n", attr->port_num); goto err1; } } @@ -442,12 +431,12 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr)) goto err1; if (!rdma_is_port_valid(&rxe->ib_dev, attr->alt_port_num)) { - pr_warn("invalid alt port %d\n", attr->alt_port_num); + pr_debug("invalid alt port %d\n", attr->alt_port_num); goto err1; } if (attr->alt_timeout > 31) { - pr_warn("invalid QP alt timeout %d > 31\n", - attr->alt_timeout); + pr_debug("invalid QP alt timeout %d > 31\n", + attr->alt_timeout); goto err1; } } @@ -468,17 +457,16 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (mask & IB_QP_MAX_QP_RD_ATOMIC) { if (attr->max_rd_atomic > rxe->attr.max_qp_rd_atom) { - pr_warn("invalid max_rd_atomic %d > %d\n", - attr->max_rd_atomic, - rxe->attr.max_qp_rd_atom); + pr_debug("invalid max_rd_atomic %d > %d\n", + attr->max_rd_atomic, + rxe->attr.max_qp_rd_atom); goto err1; } } if (mask & IB_QP_TIMEOUT) { if (attr->timeout > 31) { - pr_warn("invalid QP timeout %d > 31\n", - attr->timeout); + pr_debug("invalid QP timeout %d > 31\n", attr->timeout); goto err1; } } @@ -504,6 +492,7 @@ static void rxe_qp_reset(struct rxe_qp *qp) /* move qp to the reset state */ qp->req.state = QP_STATE_RESET; + qp->comp.state = QP_STATE_RESET; qp->resp.state = QP_STATE_RESET; /* let state machines reset themselves drain work and packet queues @@ -521,6 +510,7 @@ static void rxe_qp_reset(struct rxe_qp *qp) atomic_set(&qp->ssn, 0); qp->req.opcode = -1; qp->req.need_retry = 0; + qp->req.wait_for_rnr_timer = 0; qp->req.noack_pkts = 0; qp->resp.msn = 0; qp->resp.opcode = -1; @@ -529,7 +519,7 @@ static void rxe_qp_reset(struct rxe_qp *qp) qp->resp.sent_psn_nak = 0; if (qp->resp.mr) { - rxe_drop_ref(qp->resp.mr); + rxe_put(qp->resp.mr); qp->resp.mr = NULL; } @@ -566,6 +556,7 @@ void rxe_qp_error(struct rxe_qp *qp) { qp->req.state = QP_STATE_ERROR; qp->resp.state = QP_STATE_ERROR; + qp->comp.state = QP_STATE_ERROR; qp->attr.qp_state = IB_QPS_ERR; /* drain work and packet queues */ @@ -703,6 +694,7 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, pr_debug("qp#%d state -> INIT\n", qp_num(qp)); qp->req.state = QP_STATE_INIT; qp->resp.state = QP_STATE_INIT; + qp->comp.state = QP_STATE_INIT; break; case IB_QPS_RTR: @@ -713,6 +705,7 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, case IB_QPS_RTS: pr_debug("qp#%d state -> RTS\n", qp_num(qp)); qp->req.state = QP_STATE_READY; + qp->comp.state = QP_STATE_READY; break; case IB_QPS_SQD: @@ -771,9 +764,25 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask) return 0; } -/* called by the destroy qp verb */ -void rxe_qp_destroy(struct rxe_qp *qp) +int rxe_qp_chk_destroy(struct rxe_qp *qp) { + /* See IBA o10-2.2.3 + * An attempt to destroy a QP while attached to a mcast group + * will fail immediately. + */ + if (atomic_read(&qp->mcg_num)) { + pr_debug("Attempt to destroy QP while attached to multicast group\n"); + return -EBUSY; + } + + return 0; +} + +/* called when the last reference to the qp is dropped */ +static void rxe_qp_do_cleanup(struct work_struct *work) +{ + struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work); + qp->valid = 0; qp->qp_timeout_jiffies = 0; rxe_cleanup_task(&qp->resp.task); @@ -787,54 +796,54 @@ void rxe_qp_destroy(struct rxe_qp *qp) rxe_cleanup_task(&qp->comp.task); /* flush out any receive wr's or pending requests */ - __rxe_do_task(&qp->req.task); + if (qp->req.task.func) + __rxe_do_task(&qp->req.task); + if (qp->sq.queue) { __rxe_do_task(&qp->comp.task); __rxe_do_task(&qp->req.task); } -} - -/* called when the last reference to the qp is dropped */ -static void rxe_qp_do_cleanup(struct work_struct *work) -{ - struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work); - - rxe_drop_all_mcast_groups(qp); if (qp->sq.queue) rxe_queue_cleanup(qp->sq.queue); if (qp->srq) - rxe_drop_ref(qp->srq); + rxe_put(qp->srq); if (qp->rq.queue) rxe_queue_cleanup(qp->rq.queue); - if (qp->scq) - rxe_drop_ref(qp->scq); - if (qp->rcq) - rxe_drop_ref(qp->rcq); - if (qp->pd) - rxe_drop_ref(qp->pd); + if (qp->scq) { + atomic_dec(&qp->scq->num_wq); + rxe_put(qp->scq); + } - if (qp->resp.mr) { - rxe_drop_ref(qp->resp.mr); - qp->resp.mr = NULL; + if (qp->rcq) { + atomic_dec(&qp->rcq->num_wq); + rxe_put(qp->rcq); } + if (qp->pd) + rxe_put(qp->pd); + + if (qp->resp.mr) + rxe_put(qp->resp.mr); + if (qp_type(qp) == IB_QPT_RC) sk_dst_reset(qp->sk->sk); free_rd_atomic_resources(qp); - kernel_sock_shutdown(qp->sk, SHUT_RDWR); - sock_release(qp->sk); + if (qp->sk) { + kernel_sock_shutdown(qp->sk, SHUT_RDWR); + sock_release(qp->sk); + } } /* called when the last reference to the qp is dropped */ -void rxe_qp_cleanup(struct rxe_pool_entry *arg) +void rxe_qp_cleanup(struct rxe_pool_elem *elem) { - struct rxe_qp *qp = container_of(arg, typeof(*qp), pelem); + struct rxe_qp *qp = container_of(elem, typeof(*qp), elem); execute_in_process_context(rxe_qp_do_cleanup, &qp->cleanup_work); } diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c index 6e6e023c1b45..d6dbf5a0058d 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.c +++ b/drivers/infiniband/sw/rxe/rxe_queue.c @@ -112,23 +112,25 @@ static int resize_finish(struct rxe_queue *q, struct rxe_queue *new_q, unsigned int num_elem) { enum queue_type type = q->type; + u32 new_prod; u32 prod; u32 cons; if (!queue_empty(q, q->type) && (num_elem < queue_count(q, type))) return -EINVAL; - prod = queue_get_producer(new_q, type); + new_prod = queue_get_producer(new_q, type); + prod = queue_get_producer(q, type); cons = queue_get_consumer(q, type); - while (!queue_empty(q, type)) { - memcpy(queue_addr_from_index(new_q, prod), + while ((prod - cons) & q->index_mask) { + memcpy(queue_addr_from_index(new_q, new_prod), queue_addr_from_index(q, cons), new_q->elem_size); - prod = queue_next_index(new_q, prod); + new_prod = queue_next_index(new_q, new_prod); cons = queue_next_index(q, cons); } - new_q->buf->producer_index = prod; + new_q->buf->producer_index = new_prod; q->buf->consumer_index = cons; /* update private index copies */ @@ -151,7 +153,8 @@ int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p, struct rxe_queue *new_q; unsigned int num_elem = *num_elem_p; int err; - unsigned long flags = 0, flags1; + unsigned long producer_flags; + unsigned long consumer_flags; new_q = rxe_queue_init(q->rxe, &num_elem, elem_size, q->type); if (!new_q) @@ -165,17 +168,17 @@ int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p, goto err1; } - spin_lock_irqsave(consumer_lock, flags1); + spin_lock_irqsave(consumer_lock, consumer_flags); if (producer_lock) { - spin_lock_irqsave(producer_lock, flags); + spin_lock_irqsave(producer_lock, producer_flags); err = resize_finish(q, new_q, num_elem); - spin_unlock_irqrestore(producer_lock, flags); + spin_unlock_irqrestore(producer_lock, producer_flags); } else { err = resize_finish(q, new_q, num_elem); } - spin_unlock_irqrestore(consumer_lock, flags1); + spin_unlock_irqrestore(consumer_lock, consumer_flags); rxe_queue_cleanup(new_q); /* new/old dep on err */ if (err) diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h index 6227112ef7a2..ed44042782fa 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.h +++ b/drivers/infiniband/sw/rxe/rxe_queue.h @@ -7,9 +7,6 @@ #ifndef RXE_QUEUE_H #define RXE_QUEUE_H -/* for definition of shared struct rxe_queue_buf */ -#include <uapi/rdma/rdma_user_rxe.h> - /* Implements a simple circular buffer that is shared between user * and the driver and can be resized. The requested element size is * rounded up to a power of 2 and the number of elements in the buffer @@ -53,6 +50,8 @@ enum queue_type { QUEUE_TYPE_FROM_DRIVER, }; +struct rxe_queue_buf; + struct rxe_queue { struct rxe_dev *rxe; struct rxe_queue_buf *buf; diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 6a6cc1fa90e4..434a693cd4a5 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -16,48 +16,36 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, unsigned int pkt_type; if (unlikely(!qp->valid)) - goto err1; + return -EINVAL; pkt_type = pkt->opcode & 0xe0; switch (qp_type(qp)) { case IB_QPT_RC: - if (unlikely(pkt_type != IB_OPCODE_RC)) { - pr_warn_ratelimited("bad qp type\n"); - goto err1; - } + if (unlikely(pkt_type != IB_OPCODE_RC)) + return -EINVAL; break; case IB_QPT_UC: - if (unlikely(pkt_type != IB_OPCODE_UC)) { - pr_warn_ratelimited("bad qp type\n"); - goto err1; - } + if (unlikely(pkt_type != IB_OPCODE_UC)) + return -EINVAL; break; case IB_QPT_UD: - case IB_QPT_SMI: case IB_QPT_GSI: - if (unlikely(pkt_type != IB_OPCODE_UD)) { - pr_warn_ratelimited("bad qp type\n"); - goto err1; - } + if (unlikely(pkt_type != IB_OPCODE_UD)) + return -EINVAL; break; default: - pr_warn_ratelimited("unsupported qp type\n"); - goto err1; + return -EINVAL; } if (pkt->mask & RXE_REQ_MASK) { if (unlikely(qp->resp.state != QP_STATE_READY)) - goto err1; + return -EINVAL; } else if (unlikely(qp->req.state < QP_STATE_READY || - qp->req.state > QP_STATE_DRAINED)) { - goto err1; - } + qp->req.state > QP_STATE_DRAINED)) + return -EINVAL; return 0; - -err1: - return -EINVAL; } static void set_bad_pkey_cntr(struct rxe_port *port) @@ -85,26 +73,20 @@ static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, pkt->pkey_index = 0; if (!pkey_match(pkey, IB_DEFAULT_PKEY_FULL)) { - pr_warn_ratelimited("bad pkey = 0x%x\n", pkey); set_bad_pkey_cntr(port); - goto err1; + return -EINVAL; } if (qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) { u32 qkey = (qpn == 1) ? GSI_QKEY : qp->attr.qkey; if (unlikely(deth_qkey(pkt) != qkey)) { - pr_warn_ratelimited("bad qkey, got 0x%x expected 0x%x for qpn 0x%x\n", - deth_qkey(pkt), qkey, qpn); set_qkey_viol_cntr(port); - goto err1; + return -EINVAL; } } return 0; - -err1: - return -EINVAL; } static int check_addr(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, @@ -113,13 +95,10 @@ static int check_addr(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct sk_buff *skb = PKT_TO_SKB(pkt); if (qp_type(qp) != IB_QPT_RC && qp_type(qp) != IB_QPT_UC) - goto done; + return 0; - if (unlikely(pkt->port_num != qp->attr.port_num)) { - pr_warn_ratelimited("port %d != qp port %d\n", - pkt->port_num, qp->attr.port_num); - goto err1; - } + if (unlikely(pkt->port_num != qp->attr.port_num)) + return -EINVAL; if (skb->protocol == htons(ETH_P_IP)) { struct in_addr *saddr = @@ -127,19 +106,9 @@ static int check_addr(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct in_addr *daddr = &qp->pri_av.dgid_addr._sockaddr_in.sin_addr; - if (ip_hdr(skb)->daddr != saddr->s_addr) { - pr_warn_ratelimited("dst addr %pI4 != qp source addr %pI4\n", - &ip_hdr(skb)->daddr, - &saddr->s_addr); - goto err1; - } - - if (ip_hdr(skb)->saddr != daddr->s_addr) { - pr_warn_ratelimited("source addr %pI4 != qp dst addr %pI4\n", - &ip_hdr(skb)->saddr, - &daddr->s_addr); - goto err1; - } + if ((ip_hdr(skb)->daddr != saddr->s_addr) || + (ip_hdr(skb)->saddr != daddr->s_addr)) + return -EINVAL; } else if (skb->protocol == htons(ETH_P_IPV6)) { struct in6_addr *saddr = @@ -147,24 +116,12 @@ static int check_addr(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct in6_addr *daddr = &qp->pri_av.dgid_addr._sockaddr_in6.sin6_addr; - if (memcmp(&ipv6_hdr(skb)->daddr, saddr, sizeof(*saddr))) { - pr_warn_ratelimited("dst addr %pI6 != qp source addr %pI6\n", - &ipv6_hdr(skb)->daddr, saddr); - goto err1; - } - - if (memcmp(&ipv6_hdr(skb)->saddr, daddr, sizeof(*daddr))) { - pr_warn_ratelimited("source addr %pI6 != qp dst addr %pI6\n", - &ipv6_hdr(skb)->saddr, daddr); - goto err1; - } + if (memcmp(&ipv6_hdr(skb)->daddr, saddr, sizeof(*saddr)) || + memcmp(&ipv6_hdr(skb)->saddr, daddr, sizeof(*daddr))) + return -EINVAL; } -done: return 0; - -err1: - return -EINVAL; } static int hdr_check(struct rxe_pkt_info *pkt) @@ -176,24 +133,18 @@ static int hdr_check(struct rxe_pkt_info *pkt) int index; int err; - if (unlikely(bth_tver(pkt) != BTH_TVER)) { - pr_warn_ratelimited("bad tver\n"); + if (unlikely(bth_tver(pkt) != BTH_TVER)) goto err1; - } - if (unlikely(qpn == 0)) { - pr_warn_once("QP 0 not supported"); + if (unlikely(qpn == 0)) goto err1; - } if (qpn != IB_MULTICAST_QPN) { index = (qpn == 1) ? port->qp_gsi_index : qpn; qp = rxe_pool_get_index(&rxe->qp_pool, index); - if (unlikely(!qp)) { - pr_warn_ratelimited("no qp matches qpn 0x%x\n", qpn); + if (unlikely(!qp)) goto err1; - } err = check_type_state(rxe, pkt, qp); if (unlikely(err)) @@ -207,17 +158,15 @@ static int hdr_check(struct rxe_pkt_info *pkt) if (unlikely(err)) goto err2; } else { - if (unlikely((pkt->mask & RXE_GRH_MASK) == 0)) { - pr_warn_ratelimited("no grh for mcast qpn\n"); + if (unlikely((pkt->mask & RXE_GRH_MASK) == 0)) goto err1; - } } pkt->qp = qp; return 0; err2: - rxe_drop_ref(qp); + rxe_put(qp); err1: return -EINVAL; } @@ -233,8 +182,8 @@ static inline void rxe_rcv_pkt(struct rxe_pkt_info *pkt, struct sk_buff *skb) static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) { struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); - struct rxe_mc_grp *mcg; - struct rxe_mc_elem *mce; + struct rxe_mcg *mcg; + struct rxe_mca *mca; struct rxe_qp *qp; union ib_gid dgid; int err; @@ -246,19 +195,19 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) memcpy(&dgid, &ipv6_hdr(skb)->daddr, sizeof(dgid)); /* lookup mcast group corresponding to mgid, takes a ref */ - mcg = rxe_pool_get_key(&rxe->mc_grp_pool, &dgid); + mcg = rxe_lookup_mcg(rxe, &dgid); if (!mcg) goto drop; /* mcast group not registered */ - spin_lock_bh(&mcg->mcg_lock); + spin_lock_bh(&rxe->mcg_lock); /* this is unreliable datagram service so we let * failures to deliver a multicast packet to a * single QP happen and just move on and try * the rest of them on the list */ - list_for_each_entry(mce, &mcg->qp_list, qp_list) { - qp = mce->qp; + list_for_each_entry(mca, &mcg->qp_list, qp_list) { + qp = mca->qp; /* validate qp for incoming packet */ err = check_type_state(rxe, pkt, qp); @@ -273,7 +222,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) * skb and pass to the QP. Pass the original skb to * the last QP in the list. */ - if (mce->qp_list.next != &mcg->qp_list) { + if (mca->qp_list.next != &mcg->qp_list) { struct sk_buff *cskb; struct rxe_pkt_info *cpkt; @@ -288,19 +237,19 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) cpkt = SKB_TO_PKT(cskb); cpkt->qp = qp; - rxe_add_ref(qp); + rxe_get(qp); rxe_rcv_pkt(cpkt, cskb); } else { pkt->qp = qp; - rxe_add_ref(qp); + rxe_get(qp); rxe_rcv_pkt(pkt, skb); skb = NULL; /* mark consumed */ } } - spin_unlock_bh(&mcg->mcg_lock); + spin_unlock_bh(&rxe->mcg_lock); - rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */ + kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); if (likely(!skb)) return; @@ -365,10 +314,8 @@ void rxe_rcv(struct sk_buff *skb) if (unlikely(skb->len < RXE_BTH_BYTES)) goto drop; - if (rxe_chk_dgid(rxe, skb) < 0) { - pr_warn_ratelimited("failed checking dgid\n"); + if (rxe_chk_dgid(rxe, skb) < 0) goto drop; - } pkt->opcode = bth_opcode(pkt); pkt->psn = bth_psn(pkt); @@ -397,7 +344,7 @@ void rxe_rcv(struct sk_buff *skb) drop: if (pkt->qp) - rxe_drop_ref(pkt->qp); + rxe_put(pkt->qp); kfree_skb(skb); ib_device_put(&rxe->ib_dev); diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 0c9d2af15f3d..f63771207970 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -15,8 +15,7 @@ static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe, u32 opcode); static inline void retry_first_write_send(struct rxe_qp *qp, - struct rxe_send_wqe *wqe, - unsigned int mask, int npsn) + struct rxe_send_wqe *wqe, int npsn) { int i; @@ -33,8 +32,6 @@ static inline void retry_first_write_send(struct rxe_qp *qp, } else { advance_dma_data(&wqe->dma, to_send); } - if (mask & WR_WRITE_MASK) - wqe->iova += qp->mtu; } } @@ -85,7 +82,7 @@ static void req_retry(struct rxe_qp *qp) if (mask & WR_WRITE_OR_SEND_MASK) { npsn = (qp->comp.psn - wqe->first_psn) & BTH_PSN_MASK; - retry_first_write_send(qp, wqe, mask, npsn); + retry_first_write_send(qp, wqe, npsn); } if (mask & WR_READ_MASK) { @@ -103,14 +100,17 @@ void rnr_nak_timer(struct timer_list *t) { struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer); - pr_debug("qp#%d rnr nak timer fired\n", qp_num(qp)); + pr_debug("%s: fired for qp#%d\n", __func__, qp_num(qp)); + + /* request a send queue retry */ + qp->req.need_retry = 1; + qp->req.wait_for_rnr_timer = 0; rxe_run_task(&qp->req.task, 1); } static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) { struct rxe_send_wqe *wqe; - unsigned long flags; struct rxe_queue *q = qp->sq.queue; unsigned int index = qp->req.wqe_index; unsigned int cons; @@ -124,25 +124,23 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) /* check to see if we are drained; * state_lock used by requester and completer */ - spin_lock_irqsave(&qp->state_lock, flags); + spin_lock_bh(&qp->state_lock); do { if (qp->req.state != QP_STATE_DRAIN) { /* comp just finished */ - spin_unlock_irqrestore(&qp->state_lock, - flags); + spin_unlock_bh(&qp->state_lock); break; } if (wqe && ((index != cons) || (wqe->state != wqe_state_posted))) { /* comp not done yet */ - spin_unlock_irqrestore(&qp->state_lock, - flags); + spin_unlock_bh(&qp->state_lock); break; } qp->req.state = QP_STATE_DRAINED; - spin_unlock_irqrestore(&qp->state_lock, flags); + spin_unlock_bh(&qp->state_lock); if (qp->ibqp.event_handler) { struct ib_event ev; @@ -166,16 +164,36 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) (wqe->state != wqe_state_processing))) return NULL; - if (unlikely((wqe->wr.send_flags & IB_SEND_FENCE) && - (index != cons))) { - qp->req.wait_fence = 1; - return NULL; - } - wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp); return wqe; } +/** + * rxe_wqe_is_fenced - check if next wqe is fenced + * @qp: the queue pair + * @wqe: the next wqe + * + * Returns: 1 if wqe needs to wait + * 0 if wqe is ready to go + */ +static int rxe_wqe_is_fenced(struct rxe_qp *qp, struct rxe_send_wqe *wqe) +{ + /* Local invalidate fence (LIF) see IBA 10.6.5.1 + * Requires ALL previous operations on the send queue + * are complete. Make mandatory for the rxe driver. + */ + if (wqe->wr.opcode == IB_WR_LOCAL_INV) + return qp->req.wqe_index != queue_get_consumer(qp->sq.queue, + QUEUE_TYPE_FROM_CLIENT); + + /* Fence see IBA 10.8.3.3 + * Requires that all previous read and atomic operations + * are complete. + */ + return (wqe->wr.send_flags & IB_SEND_FENCE) && + atomic_read(&qp->req.rd_atomic) != qp->attr.max_rd_atomic; +} + static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits) { switch (opcode) { @@ -311,7 +329,6 @@ static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe, case IB_QPT_UC: return next_opcode_uc(qp, opcode, fits); - case IB_QPT_SMI: case IB_QPT_UD: case IB_QPT_GSI: switch (opcode) { @@ -361,38 +378,25 @@ static inline int get_mtu(struct rxe_qp *qp) } static struct sk_buff *init_req_packet(struct rxe_qp *qp, + struct rxe_av *av, struct rxe_send_wqe *wqe, - int opcode, int payload, + int opcode, u32 payload, struct rxe_pkt_info *pkt) { struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct sk_buff *skb; struct rxe_send_wr *ibwr = &wqe->wr; - struct rxe_av *av; int pad = (-payload) & 0x3; int paylen; int solicited; - u16 pkey; u32 qp_num; int ack_req; /* length from start of bth to end of icrc */ paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; - - /* pkt->hdr, port_num and mask are initialized in ifc layer */ - pkt->rxe = rxe; - pkt->opcode = opcode; - pkt->qp = qp; - pkt->psn = qp->req.psn; - pkt->mask = rxe_opcode[opcode].mask; - pkt->paylen = paylen; - pkt->wqe = wqe; + pkt->paylen = paylen; /* init skb */ - av = rxe_get_av(pkt); - if (!av) - return NULL; - skb = rxe_init_packet(rxe, av, paylen, pkt); if (unlikely(!skb)) return NULL; @@ -404,8 +408,6 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, (pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) == (RXE_WRITE_MASK | RXE_IMMDT_MASK)); - pkey = IB_DEFAULT_PKEY_FULL; - qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn : qp->attr.dest_qp_num; @@ -414,7 +416,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, if (ack_req) qp->req.noack_pkts = 0; - bth_init(pkt, pkt->opcode, solicited, 0, pad, pkey, qp_num, + bth_init(pkt, pkt->opcode, solicited, 0, pad, IB_DEFAULT_PKEY_FULL, qp_num, ack_req, pkt->psn); /* init optional headers */ @@ -432,8 +434,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, if (pkt->mask & RXE_ATMETH_MASK) { atmeth_set_va(pkt, wqe->iova); - if (opcode == IB_OPCODE_RC_COMPARE_SWAP || - opcode == IB_OPCODE_RD_COMPARE_SWAP) { + if (opcode == IB_OPCODE_RC_COMPARE_SWAP) { atmeth_set_swap_add(pkt, ibwr->wr.atomic.swap); atmeth_set_comp(pkt, ibwr->wr.atomic.compare_add); } else { @@ -453,13 +454,13 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, return skb; } -static int finish_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt, struct sk_buff *skb, - int paylen) +static int finish_packet(struct rxe_qp *qp, struct rxe_av *av, + struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt, + struct sk_buff *skb, u32 payload) { int err; - err = rxe_prepare(pkt, skb); + err = rxe_prepare(av, pkt, skb); if (err) return err; @@ -467,19 +468,19 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, if (wqe->wr.send_flags & IB_SEND_INLINE) { u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset]; - memcpy(payload_addr(pkt), tmp, paylen); + memcpy(payload_addr(pkt), tmp, payload); - wqe->dma.resid -= paylen; - wqe->dma.sge_offset += paylen; + wqe->dma.resid -= payload; + wqe->dma.sge_offset += payload; } else { err = copy_data(qp->pd, 0, &wqe->dma, - payload_addr(pkt), paylen, + payload_addr(pkt), payload, RXE_FROM_MR_OBJ); if (err) return err; } if (bth_pad(pkt)) { - u8 *pad = payload_addr(pkt) + paylen; + u8 *pad = payload_addr(pkt) + payload; memset(pad, 0, bth_pad(pkt)); } @@ -503,7 +504,7 @@ static void update_wqe_state(struct rxe_qp *qp, static void update_wqe_psn(struct rxe_qp *qp, struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt, - int payload) + u32 payload) { /* number of packets left to send including current one */ int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu; @@ -545,8 +546,7 @@ static void rollback_state(struct rxe_send_wqe *wqe, qp->req.psn = rollback_psn; } -static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt, int payload) +static void update_state(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { qp->req.opcode = pkt->opcode; @@ -604,9 +604,11 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) wqe->status = IB_WC_SUCCESS; qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index); - if ((wqe->wr.send_flags & IB_SEND_SIGNALED) || - qp->sq_sig_type == IB_SIGNAL_ALL_WR) - rxe_run_task(&qp->comp.task, 1); + /* There is no ack coming for local work requests + * which can lead to a deadlock. So go ahead and complete + * it now. + */ + rxe_run_task(&qp->comp.task, 1); return 0; } @@ -614,24 +616,39 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) int rxe_requester(void *arg) { struct rxe_qp *qp = (struct rxe_qp *)arg; + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_pkt_info pkt; struct sk_buff *skb; struct rxe_send_wqe *wqe; enum rxe_hdr_mask mask; - int payload; + u32 payload; int mtu; int opcode; + int err; int ret; struct rxe_send_wqe rollback_wqe; u32 rollback_psn; struct rxe_queue *q = qp->sq.queue; + struct rxe_ah *ah; + struct rxe_av *av; - rxe_add_ref(qp); + if (!rxe_get(qp)) + return -EAGAIN; -next_wqe: - if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) + if (unlikely(!qp->valid)) goto exit; + if (unlikely(qp->req.state == QP_STATE_ERROR)) { + wqe = req_next_wqe(qp); + if (wqe) + /* + * Generate an error completion for error qp state + */ + goto err; + else + goto exit; + } + if (unlikely(qp->req.state == QP_STATE_RESET)) { qp->req.wqe_index = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT); @@ -639,10 +656,17 @@ next_wqe: qp->req.need_rd_atomic = 0; qp->req.wait_psn = 0; qp->req.need_retry = 0; + qp->req.wait_for_rnr_timer = 0; goto exit; } - if (unlikely(qp->req.need_retry)) { + /* we come here if the retransmit timer has fired + * or if the rnr timer has fired. If the retransmit + * timer fires while we are processing an RNR NAK wait + * until the rnr timer has fired before starting the + * retry flow + */ + if (unlikely(qp->req.need_retry && !qp->req.wait_for_rnr_timer)) { req_retry(qp); qp->req.need_retry = 0; } @@ -651,12 +675,17 @@ next_wqe: if (unlikely(!wqe)) goto exit; + if (rxe_wqe_is_fenced(qp, wqe)) { + qp->req.wait_fence = 1; + goto exit; + } + if (wqe->mask & WR_LOCAL_OP_MASK) { - ret = rxe_do_local_ops(qp, wqe); - if (unlikely(ret)) + err = rxe_do_local_ops(qp, wqe); + if (unlikely(err)) goto err; else - goto next_wqe; + goto done; } if (unlikely(qp_type(qp) == IB_QPT_RC && @@ -676,7 +705,7 @@ next_wqe: opcode = next_opcode(qp, wqe, wqe->wr.opcode); if (unlikely(opcode < 0)) { wqe->status = IB_WC_LOC_QP_OP_ERR; - goto exit; + goto err; } mask = rxe_opcode[opcode].mask; @@ -704,31 +733,51 @@ next_wqe: qp->req.wqe_index); wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; - __rxe_do_task(&qp->comp.task); - rxe_drop_ref(qp); - return 0; + rxe_run_task(&qp->comp.task, 0); + goto done; } payload = mtu; } - skb = init_req_packet(qp, wqe, opcode, payload, &pkt); + pkt.rxe = rxe; + pkt.opcode = opcode; + pkt.qp = qp; + pkt.psn = qp->req.psn; + pkt.mask = rxe_opcode[opcode].mask; + pkt.wqe = wqe; + + av = rxe_get_av(&pkt, &ah); + if (unlikely(!av)) { + pr_err("qp#%d Failed no address vector\n", qp_num(qp)); + wqe->status = IB_WC_LOC_QP_OP_ERR; + goto err; + } + + skb = init_req_packet(qp, av, wqe, opcode, payload, &pkt); if (unlikely(!skb)) { pr_err("qp#%d Failed allocating skb\n", qp_num(qp)); wqe->status = IB_WC_LOC_QP_OP_ERR; + if (ah) + rxe_put(ah); goto err; } - ret = finish_packet(qp, wqe, &pkt, skb, payload); - if (unlikely(ret)) { + err = finish_packet(qp, av, wqe, &pkt, skb, payload); + if (unlikely(err)) { pr_debug("qp#%d Error during finish packet\n", qp_num(qp)); - if (ret == -EFAULT) + if (err == -EFAULT) wqe->status = IB_WC_LOC_PROT_ERR; else wqe->status = IB_WC_LOC_QP_OP_ERR; kfree_skb(skb); + if (ah) + rxe_put(ah); goto err; } + if (ah) + rxe_put(ah); + /* * To prevent a race on wqe access between requester and completer, * wqe members state and psn need to be set before calling @@ -738,13 +787,14 @@ next_wqe: save_state(wqe, qp, &rollback_wqe, &rollback_psn); update_wqe_state(qp, wqe, &pkt); update_wqe_psn(qp, wqe, &pkt, payload); - ret = rxe_xmit_packet(qp, &pkt, skb); - if (ret) { + + err = rxe_xmit_packet(qp, &pkt, skb); + if (err) { qp->need_req_skb = 1; rollback_state(wqe, qp, &rollback_wqe, rollback_psn); - if (ret == -EAGAIN) { + if (err == -EAGAIN) { rxe_run_task(&qp->req.task, 1); goto exit; } @@ -753,15 +803,25 @@ next_wqe: goto err; } - update_state(qp, wqe, &pkt, payload); - - goto next_wqe; + update_state(qp, &pkt); + /* A non-zero return value will cause rxe_do_task to + * exit its loop and end the tasklet. A zero return + * will continue looping and return to rxe_requester + */ +done: + ret = 0; + goto out; err: + /* update wqe_index for each wqe completion */ + qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index); wqe->state = wqe_state_error; - __rxe_do_task(&qp->comp.task); - + qp->req.state = QP_STATE_ERROR; + rxe_run_task(&qp->comp.task, 0); exit: - rxe_drop_ref(qp); - return -EAGAIN; + ret = -EAGAIN; +out: + rxe_put(qp); + + return ret; } diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index e8f435fa6e4d..693081e813ec 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -21,6 +21,7 @@ enum resp_states { RESPST_CHK_RKEY, RESPST_EXECUTE, RESPST_READ_REPLY, + RESPST_ATOMIC_REPLY, RESPST_COMPLETE, RESPST_ACKNOWLEDGE, RESPST_CLEANUP, @@ -55,6 +56,7 @@ static char *resp_state_name[] = { [RESPST_CHK_RKEY] = "CHK_RKEY", [RESPST_EXECUTE] = "EXECUTE", [RESPST_READ_REPLY] = "READ_REPLY", + [RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY", [RESPST_COMPLETE] = "COMPLETE", [RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE", [RESPST_CLEANUP] = "CLEANUP", @@ -99,7 +101,7 @@ static inline enum resp_states get_req(struct rxe_qp *qp, if (qp->resp.state == QP_STATE_ERROR) { while ((skb = skb_dequeue(&qp->req_pkts))) { - rxe_drop_ref(qp); + rxe_put(qp); kfree_skb(skb); ib_device_put(qp->ibqp.device); } @@ -277,7 +279,6 @@ static enum resp_states check_op_valid(struct rxe_qp *qp, break; case IB_QPT_UD: - case IB_QPT_SMI: case IB_QPT_GSI: break; @@ -297,21 +298,22 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp) struct ib_event ev; unsigned int count; size_t size; + unsigned long flags; if (srq->error) return RESPST_ERR_RNR; - spin_lock_bh(&srq->rq.consumer_lock); + spin_lock_irqsave(&srq->rq.consumer_lock, flags); wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT); if (!wqe) { - spin_unlock_bh(&srq->rq.consumer_lock); + spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); return RESPST_ERR_RNR; } /* don't trust user space data */ if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) { - spin_unlock_bh(&srq->rq.consumer_lock); + spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); pr_warn("%s: invalid num_sge in SRQ entry\n", __func__); return RESPST_ERR_MALFORMED_WQE; } @@ -327,11 +329,11 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp) goto event; } - spin_unlock_bh(&srq->rq.consumer_lock); + spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); return RESPST_CHK_LENGTH; event: - spin_unlock_bh(&srq->rq.consumer_lock); + spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); ev.device = qp->ibqp.device; ev.element.srq = qp->ibqp.srq; ev.event = IB_EVENT_SRQ_LIMIT_REACHED; @@ -448,7 +450,8 @@ static enum resp_states check_rkey(struct rxe_qp *qp, if (rkey_is_mw(rkey)) { mw = rxe_lookup_mw(qp, access, rkey); if (!mw) { - pr_err("%s: no MW matches rkey %#x\n", __func__, rkey); + pr_debug("%s: no MW matches rkey %#x\n", + __func__, rkey); state = RESPST_ERR_RKEY_VIOLATION; goto err; } @@ -463,12 +466,13 @@ static enum resp_states check_rkey(struct rxe_qp *qp, if (mw->access & IB_ZERO_BASED) qp->resp.offset = mw->addr; - rxe_drop_ref(mw); - rxe_add_ref(mr); + rxe_put(mw); + rxe_get(mr); } else { mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE); if (!mr) { - pr_err("%s: no MR matches rkey %#x\n", __func__, rkey); + pr_debug("%s: no MR matches rkey %#x\n", + __func__, rkey); state = RESPST_ERR_RKEY_VIOLATION; goto err; } @@ -507,9 +511,9 @@ static enum resp_states check_rkey(struct rxe_qp *qp, err: if (mr) - rxe_drop_ref(mr); + rxe_put(mr); if (mw) - rxe_drop_ref(mw); + rxe_put(mw); return state; } @@ -549,50 +553,106 @@ out: return rc; } +static struct resp_res *rxe_prepare_res(struct rxe_qp *qp, + struct rxe_pkt_info *pkt, + int type) +{ + struct resp_res *res; + u32 pkts; + + res = &qp->resp.resources[qp->resp.res_head]; + rxe_advance_resp_resource(qp); + free_rd_atomic_resource(res); + + res->type = type; + res->replay = 0; + + switch (type) { + case RXE_READ_MASK: + res->read.va = qp->resp.va + qp->resp.offset; + res->read.va_org = qp->resp.va + qp->resp.offset; + res->read.resid = qp->resp.resid; + res->read.length = qp->resp.resid; + res->read.rkey = qp->resp.rkey; + + pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1); + res->first_psn = pkt->psn; + res->cur_psn = pkt->psn; + res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK; + + res->state = rdatm_res_state_new; + break; + case RXE_ATOMIC_MASK: + res->first_psn = pkt->psn; + res->last_psn = pkt->psn; + res->cur_psn = pkt->psn; + break; + } + + return res; +} + /* Guarantee atomicity of atomic operations at the machine level. */ static DEFINE_SPINLOCK(atomic_ops_lock); -static enum resp_states process_atomic(struct rxe_qp *qp, - struct rxe_pkt_info *pkt) +static enum resp_states atomic_reply(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) { u64 *vaddr; enum resp_states ret; struct rxe_mr *mr = qp->resp.mr; + struct resp_res *res = qp->resp.res; + u64 value; - if (mr->state != RXE_MR_STATE_VALID) { - ret = RESPST_ERR_RKEY_VIOLATION; - goto out; + if (!res) { + res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_MASK); + qp->resp.res = res; } - vaddr = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, sizeof(u64)); + if (!res->replay) { + if (mr->state != RXE_MR_STATE_VALID) { + ret = RESPST_ERR_RKEY_VIOLATION; + goto out; + } - /* check vaddr is 8 bytes aligned. */ - if (!vaddr || (uintptr_t)vaddr & 7) { - ret = RESPST_ERR_MISALIGNED_ATOMIC; - goto out; - } + vaddr = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, + sizeof(u64)); - spin_lock_bh(&atomic_ops_lock); + /* check vaddr is 8 bytes aligned. */ + if (!vaddr || (uintptr_t)vaddr & 7) { + ret = RESPST_ERR_MISALIGNED_ATOMIC; + goto out; + } - qp->resp.atomic_orig = *vaddr; + spin_lock_bh(&atomic_ops_lock); + res->atomic.orig_val = value = *vaddr; - if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP || - pkt->opcode == IB_OPCODE_RD_COMPARE_SWAP) { - if (*vaddr == atmeth_comp(pkt)) - *vaddr = atmeth_swap_add(pkt); - } else { - *vaddr += atmeth_swap_add(pkt); - } + if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP) { + if (value == atmeth_comp(pkt)) + value = atmeth_swap_add(pkt); + } else { + value += atmeth_swap_add(pkt); + } - spin_unlock_bh(&atomic_ops_lock); + *vaddr = value; + spin_unlock_bh(&atomic_ops_lock); - ret = RESPST_NONE; + qp->resp.msn++; + + /* next expected psn, read handles this separately */ + qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + qp->resp.ack_psn = qp->resp.psn; + + qp->resp.opcode = pkt->opcode; + qp->resp.status = IB_WC_SUCCESS; + } + + ret = RESPST_ACKNOWLEDGE; out: return ret; } static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, - struct rxe_pkt_info *pkt, struct rxe_pkt_info *ack, int opcode, int payload, @@ -630,9 +690,9 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, } if (ack->mask & RXE_ATMACK_MASK) - atmack_set_orig(ack, qp->resp.atomic_orig); + atmack_set_orig(ack, qp->resp.res->atomic.orig_val); - err = rxe_prepare(ack, skb); + err = rxe_prepare(&qp->pri_av, ack, skb); if (err) { kfree_skb(skb); return NULL; @@ -641,6 +701,59 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, return skb; } +/** + * rxe_recheck_mr - revalidate MR from rkey and get a reference + * @qp: the qp + * @rkey: the rkey + * + * This code allows the MR to be invalidated or deregistered or + * the MW if one was used to be invalidated or deallocated. + * It is assumed that the access permissions if originally good + * are OK and the mappings to be unchanged. + * + * TODO: If someone reregisters an MR to change its size or + * access permissions during the processing of an RDMA read + * we should kill the responder resource and complete the + * operation with an error. + * + * Return: mr on success else NULL + */ +static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct rxe_mr *mr; + struct rxe_mw *mw; + + if (rkey_is_mw(rkey)) { + mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8); + if (!mw) + return NULL; + + mr = mw->mr; + if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID || + !mr || mr->state != RXE_MR_STATE_VALID) { + rxe_put(mw); + return NULL; + } + + rxe_get(mr); + rxe_put(mw); + + return mr; + } + + mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8); + if (!mr) + return NULL; + + if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) { + rxe_put(mr); + return NULL; + } + + return mr; +} + /* RDMA read response. If res is not NULL, then we have a current RDMA request * being processed or replayed. */ @@ -655,53 +768,32 @@ static enum resp_states read_reply(struct rxe_qp *qp, int opcode; int err; struct resp_res *res = qp->resp.res; + struct rxe_mr *mr; if (!res) { - /* This is the first time we process that request. Get a - * resource - */ - res = &qp->resp.resources[qp->resp.res_head]; - - free_rd_atomic_resource(qp, res); - rxe_advance_resp_resource(qp); - - res->type = RXE_READ_MASK; - res->replay = 0; - - res->read.va = qp->resp.va + - qp->resp.offset; - res->read.va_org = qp->resp.va + - qp->resp.offset; - - res->first_psn = req_pkt->psn; + res = rxe_prepare_res(qp, req_pkt, RXE_READ_MASK); + qp->resp.res = res; + } - if (reth_len(req_pkt)) { - res->last_psn = (req_pkt->psn + - (reth_len(req_pkt) + mtu - 1) / - mtu - 1) & BTH_PSN_MASK; + if (res->state == rdatm_res_state_new) { + if (!res->replay) { + mr = qp->resp.mr; + qp->resp.mr = NULL; } else { - res->last_psn = res->first_psn; + mr = rxe_recheck_mr(qp, res->read.rkey); + if (!mr) + return RESPST_ERR_RKEY_VIOLATION; } - res->cur_psn = req_pkt->psn; - - res->read.resid = qp->resp.resid; - res->read.length = qp->resp.resid; - res->read.rkey = qp->resp.rkey; - /* note res inherits the reference to mr from qp */ - res->read.mr = qp->resp.mr; - qp->resp.mr = NULL; - - qp->resp.res = res; - res->state = rdatm_res_state_new; - } - - if (res->state == rdatm_res_state_new) { if (res->read.resid <= mtu) opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY; else opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST; } else { + mr = rxe_recheck_mr(qp, res->read.rkey); + if (!mr) + return RESPST_ERR_RKEY_VIOLATION; + if (res->read.resid > mtu) opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE; else @@ -712,15 +804,17 @@ static enum resp_states read_reply(struct rxe_qp *qp, payload = min_t(int, res->read.resid, mtu); - skb = prepare_ack_packet(qp, req_pkt, &ack_pkt, opcode, payload, + skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload, res->cur_psn, AETH_ACK_UNLIMITED); - if (!skb) + if (!skb) { + rxe_put(mr); return RESPST_ERR_RNR; + } - err = rxe_mr_copy(res->read.mr, res->read.va, payload_addr(&ack_pkt), - payload, RXE_FROM_MR_OBJ); - if (err) - pr_err("Failed copying memory\n"); + rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), + payload, RXE_FROM_MR_OBJ); + if (mr) + rxe_put(mr); if (bth_pad(&ack_pkt)) { u8 *pad = payload_addr(&ack_pkt) + payload; @@ -729,10 +823,8 @@ static enum resp_states read_reply(struct rxe_qp *qp, } err = rxe_xmit_packet(qp, &ack_pkt, skb); - if (err) { - pr_err("Failed sending RDMA reply.\n"); + if (err) return RESPST_ERR_RNR; - } res->read.va += payload; res->read.resid -= payload; @@ -771,7 +863,6 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) if (pkt->mask & RXE_SEND_MASK) { if (qp_type(qp) == IB_QPT_UD || - qp_type(qp) == IB_QPT_SMI || qp_type(qp) == IB_QPT_GSI) { if (skb->protocol == htons(ETH_P_IP)) { memset(&hdr.reserved, 0, @@ -798,9 +889,7 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) qp->resp.msn++; return RESPST_READ_REPLY; } else if (pkt->mask & RXE_ATOMIC_MASK) { - err = process_atomic(qp, pkt); - if (err) - return err; + return RESPST_ATOMIC_REPLY; } else { /* Unreachable */ WARN_ON_ONCE(1); @@ -814,6 +903,10 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) return RESPST_ERR_INVALIDATE_RKEY; } + if (pkt->mask & RXE_END_MASK) + /* We successfully processed this new request. */ + qp->resp.msn++; + /* next expected psn, read handles this separately */ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; qp->resp.ack_psn = qp->resp.psn; @@ -821,11 +914,9 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) qp->resp.opcode = pkt->opcode; qp->resp.status = IB_WC_SUCCESS; - if (pkt->mask & RXE_COMP_MASK) { - /* We successfully processed this new request. */ - qp->resp.msn++; + if (pkt->mask & RXE_COMP_MASK) return RESPST_COMPLETE; - } else if (qp_type(qp) == IB_QPT_RC) + else if (qp_type(qp) == IB_QPT_RC) return RESPST_ACKNOWLEDGE; else return RESPST_CLEANUP; @@ -935,62 +1026,41 @@ finish: return RESPST_CLEANUP; } -static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, - u8 syndrome, u32 psn) + +static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn, + int opcode, const char *msg) { - int err = 0; + int err; struct rxe_pkt_info ack_pkt; struct sk_buff *skb; - skb = prepare_ack_packet(qp, pkt, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE, - 0, psn, syndrome); - if (!skb) { - err = -ENOMEM; - goto err1; - } + skb = prepare_ack_packet(qp, &ack_pkt, opcode, 0, psn, syndrome); + if (!skb) + return -ENOMEM; err = rxe_xmit_packet(qp, &ack_pkt, skb); if (err) - pr_err_ratelimited("Failed sending ack\n"); + pr_err_ratelimited("Failed sending %s\n", msg); -err1: return err; } -static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, - u8 syndrome) +static int send_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) { - int rc = 0; - struct rxe_pkt_info ack_pkt; - struct sk_buff *skb; - struct resp_res *res; - - skb = prepare_ack_packet(qp, pkt, &ack_pkt, - IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, 0, pkt->psn, - syndrome); - if (!skb) { - rc = -ENOMEM; - goto out; - } + return send_common_ack(qp, syndrome, psn, + IB_OPCODE_RC_ACKNOWLEDGE, "ACK"); +} - res = &qp->resp.resources[qp->resp.res_head]; - free_rd_atomic_resource(qp, res); - rxe_advance_resp_resource(qp); +static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) +{ + int ret = send_common_ack(qp, syndrome, psn, + IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, "ATOMIC ACK"); - skb_get(skb); - res->type = RXE_ATOMIC_MASK; - res->atomic.skb = skb; - res->first_psn = ack_pkt.psn; - res->last_psn = ack_pkt.psn; - res->cur_psn = ack_pkt.psn; - - rc = rxe_xmit_packet(qp, &ack_pkt, skb); - if (rc) { - pr_err_ratelimited("Failed sending ack\n"); - rxe_drop_ref(qp); - } -out: - return rc; + /* have to clear this since it is used to trigger + * long read replies + */ + qp->resp.res = NULL; + return ret; } static enum resp_states acknowledge(struct rxe_qp *qp, @@ -1000,11 +1070,11 @@ static enum resp_states acknowledge(struct rxe_qp *qp, return RESPST_CLEANUP; if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED) - send_ack(qp, pkt, qp->resp.aeth_syndrome, pkt->psn); + send_ack(qp, qp->resp.aeth_syndrome, pkt->psn); else if (pkt->mask & RXE_ATOMIC_MASK) - send_atomic_ack(qp, pkt, AETH_ACK_UNLIMITED); + send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); else if (bth_ack(pkt)) - send_ack(qp, pkt, AETH_ACK_UNLIMITED, pkt->psn); + send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); return RESPST_CLEANUP; } @@ -1016,13 +1086,13 @@ static enum resp_states cleanup(struct rxe_qp *qp, if (pkt) { skb = skb_dequeue(&qp->req_pkts); - rxe_drop_ref(qp); + rxe_put(qp); kfree_skb(skb); ib_device_put(qp->ibqp.device); } if (qp->resp.mr) { - rxe_drop_ref(qp->resp.mr); + rxe_put(qp->resp.mr); qp->resp.mr = NULL; } @@ -1057,7 +1127,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, if (pkt->mask & RXE_SEND_MASK || pkt->mask & RXE_WRITE_MASK) { /* SEND. Ack again and cleanup. C9-105. */ - send_ack(qp, pkt, AETH_ACK_UNLIMITED, prev_psn); + send_ack(qp, AETH_ACK_UNLIMITED, prev_psn); return RESPST_CLEANUP; } else if (pkt->mask & RXE_READ_MASK) { struct resp_res *res; @@ -1111,14 +1181,11 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, /* Find the operation in our list of responder resources. */ res = find_resource(qp, pkt->psn); if (res) { - skb_get(res->atomic.skb); - /* Resend the result. */ - rc = rxe_xmit_packet(qp, pkt, res->atomic.skb); - if (rc) { - pr_err("Failed resending result. This flow is not handled - skb ignored\n"); - rc = RESPST_CLEANUP; - goto out; - } + res->replay = 1; + res->cur_psn = pkt->psn; + qp->resp.res = res; + rc = RESPST_ATOMIC_REPLY; + goto out; } /* Resource not found. Class D error. Drop the request. */ @@ -1166,7 +1233,7 @@ static enum resp_states do_class_d1e_error(struct rxe_qp *qp) } if (qp->resp.mr) { - rxe_drop_ref(qp->resp.mr); + rxe_put(qp->resp.mr); qp->resp.mr = NULL; } @@ -1180,7 +1247,7 @@ static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify) struct rxe_queue *q = qp->rq.queue; while ((skb = skb_dequeue(&qp->req_pkts))) { - rxe_drop_ref(qp); + rxe_put(qp); kfree_skb(skb); ib_device_put(qp->ibqp.device); } @@ -1198,16 +1265,15 @@ int rxe_responder(void *arg) struct rxe_dev *rxe = to_rdev(qp->ibqp.device); enum resp_states state; struct rxe_pkt_info *pkt = NULL; - int ret = 0; + int ret; - rxe_add_ref(qp); + if (!rxe_get(qp)) + return -EAGAIN; qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; - if (!qp->valid) { - ret = -EINVAL; - goto done; - } + if (!qp->valid) + goto exit; switch (qp->resp.state) { case QP_STATE_RESET: @@ -1253,6 +1319,9 @@ int rxe_responder(void *arg) case RESPST_READ_REPLY: state = read_reply(qp, pkt); break; + case RESPST_ATOMIC_REPLY: + state = atomic_reply(qp, pkt); + break; case RESPST_ACKNOWLEDGE: state = acknowledge(qp, pkt); break; @@ -1264,7 +1333,7 @@ int rxe_responder(void *arg) break; case RESPST_ERR_PSN_OUT_OF_SEQ: /* RC only - Class B. Drop packet. */ - send_ack(qp, pkt, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn); + send_ack(qp, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn); state = RESPST_CLEANUP; break; @@ -1286,7 +1355,7 @@ int rxe_responder(void *arg) if (qp_type(qp) == IB_QPT_RC) { rxe_counter_inc(rxe, RXE_CNT_SND_RNR); /* RC - class B */ - send_ack(qp, pkt, AETH_RNR_NAK | + send_ack(qp, AETH_RNR_NAK | (~AETH_TYPE_MASK & qp->attr.min_rnr_timer), pkt->psn); @@ -1375,7 +1444,7 @@ int rxe_responder(void *arg) case RESPST_ERROR: qp->resp.goto_error = 0; - pr_warn("qp#%d moved to error state\n", qp_num(qp)); + pr_debug("qp#%d moved to error state\n", qp_num(qp)); rxe_qp_error(qp); goto exit; @@ -1384,9 +1453,16 @@ int rxe_responder(void *arg) } } + /* A non-zero return value will cause rxe_do_task to + * exit its loop and end the tasklet. A zero return + * will continue looping and return to rxe_responder + */ +done: + ret = 0; + goto out; exit: ret = -EAGAIN; -done: - rxe_drop_ref(qp); +out: + rxe_put(qp); return ret; } diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c index eb1c4c3b3a78..02b39498c370 100644 --- a/drivers/infiniband/sw/rxe/rxe_srq.c +++ b/drivers/infiniband/sw/rxe/rxe_srq.c @@ -6,64 +6,34 @@ #include <linux/vmalloc.h> #include "rxe.h" -#include "rxe_loc.h" #include "rxe_queue.h" -int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, - struct ib_srq_attr *attr, enum ib_srq_attr_mask mask) +int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init) { - if (srq && srq->error) { - pr_warn("srq in error state\n"); + struct ib_srq_attr *attr = &init->attr; + + if (attr->max_wr > rxe->attr.max_srq_wr) { + pr_warn("max_wr(%d) > max_srq_wr(%d)\n", + attr->max_wr, rxe->attr.max_srq_wr); goto err1; } - if (mask & IB_SRQ_MAX_WR) { - if (attr->max_wr > rxe->attr.max_srq_wr) { - pr_warn("max_wr(%d) > max_srq_wr(%d)\n", - attr->max_wr, rxe->attr.max_srq_wr); - goto err1; - } - - if (attr->max_wr <= 0) { - pr_warn("max_wr(%d) <= 0\n", attr->max_wr); - goto err1; - } - - if (srq && srq->limit && (attr->max_wr < srq->limit)) { - pr_warn("max_wr (%d) < srq->limit (%d)\n", - attr->max_wr, srq->limit); - goto err1; - } - - if (attr->max_wr < RXE_MIN_SRQ_WR) - attr->max_wr = RXE_MIN_SRQ_WR; + if (attr->max_wr <= 0) { + pr_warn("max_wr(%d) <= 0\n", attr->max_wr); + goto err1; } - if (mask & IB_SRQ_LIMIT) { - if (attr->srq_limit > rxe->attr.max_srq_wr) { - pr_warn("srq_limit(%d) > max_srq_wr(%d)\n", - attr->srq_limit, rxe->attr.max_srq_wr); - goto err1; - } + if (attr->max_wr < RXE_MIN_SRQ_WR) + attr->max_wr = RXE_MIN_SRQ_WR; - if (srq && (attr->srq_limit > srq->rq.queue->buf->index_mask)) { - pr_warn("srq_limit (%d) > cur limit(%d)\n", - attr->srq_limit, - srq->rq.queue->buf->index_mask); - goto err1; - } + if (attr->max_sge > rxe->attr.max_srq_sge) { + pr_warn("max_sge(%d) > max_srq_sge(%d)\n", + attr->max_sge, rxe->attr.max_srq_sge); + goto err1; } - if (mask == IB_SRQ_INIT_MASK) { - if (attr->max_sge > rxe->attr.max_srq_sge) { - pr_warn("max_sge(%d) > max_srq_sge(%d)\n", - attr->max_sge, rxe->attr.max_srq_sge); - goto err1; - } - - if (attr->max_sge < RXE_MIN_SRQ_SGE) - attr->max_sge = RXE_MIN_SRQ_SGE; - } + if (attr->max_sge < RXE_MIN_SRQ_SGE) + attr->max_sge = RXE_MIN_SRQ_SGE; return 0; @@ -83,7 +53,7 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, srq->ibsrq.event_handler = init->event_handler; srq->ibsrq.srq_context = init->srq_context; srq->limit = init->attr.srq_limit; - srq->srq_num = srq->pelem.index; + srq->srq_num = srq->elem.index; srq->rq.max_wr = init->attr.max_wr; srq->rq.max_sge = init->attr.max_sge; @@ -93,8 +63,7 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, spin_lock_init(&srq->rq.consumer_lock); type = QUEUE_TYPE_FROM_CLIENT; - q = rxe_queue_init(rxe, &srq->rq.max_wr, - srq_wqe_size, type); + q = rxe_queue_init(rxe, &srq->rq.max_wr, srq_wqe_size, type); if (!q) { pr_warn("unable to allocate queue for srq\n"); return -ENOMEM; @@ -121,6 +90,57 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, return 0; } +int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, + struct ib_srq_attr *attr, enum ib_srq_attr_mask mask) +{ + if (srq->error) { + pr_warn("srq in error state\n"); + goto err1; + } + + if (mask & IB_SRQ_MAX_WR) { + if (attr->max_wr > rxe->attr.max_srq_wr) { + pr_warn("max_wr(%d) > max_srq_wr(%d)\n", + attr->max_wr, rxe->attr.max_srq_wr); + goto err1; + } + + if (attr->max_wr <= 0) { + pr_warn("max_wr(%d) <= 0\n", attr->max_wr); + goto err1; + } + + if (srq->limit && (attr->max_wr < srq->limit)) { + pr_warn("max_wr (%d) < srq->limit (%d)\n", + attr->max_wr, srq->limit); + goto err1; + } + + if (attr->max_wr < RXE_MIN_SRQ_WR) + attr->max_wr = RXE_MIN_SRQ_WR; + } + + if (mask & IB_SRQ_LIMIT) { + if (attr->srq_limit > rxe->attr.max_srq_wr) { + pr_warn("srq_limit(%d) > max_srq_wr(%d)\n", + attr->srq_limit, rxe->attr.max_srq_wr); + goto err1; + } + + if (attr->srq_limit > srq->rq.queue->buf->index_mask) { + pr_warn("srq_limit (%d) > cur limit(%d)\n", + attr->srq_limit, + srq->rq.queue->buf->index_mask); + goto err1; + } + } + + return 0; + +err1: + return -EINVAL; +} + int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, struct rxe_modify_srq_cmd *ucmd, struct ib_udata *udata) @@ -154,3 +174,14 @@ err2: srq->rq.queue = NULL; return err; } + +void rxe_srq_cleanup(struct rxe_pool_elem *elem) +{ + struct rxe_srq *srq = container_of(elem, typeof(*srq), elem); + + if (srq->pd) + rxe_put(srq->pd); + + if (srq->rq.queue) + rxe_queue_cleanup(srq->rq.queue); +} diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c deleted file mode 100644 index 666202ddff48..000000000000 --- a/drivers/infiniband/sw/rxe/rxe_sysfs.c +++ /dev/null @@ -1,119 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB -/* - * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. - * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - */ - -#include "rxe.h" -#include "rxe_net.h" - -/* Copy argument and remove trailing CR. Return the new length. */ -static int sanitize_arg(const char *val, char *intf, int intf_len) -{ - int len; - - if (!val) - return 0; - - /* Remove newline. */ - for (len = 0; len < intf_len - 1 && val[len] && val[len] != '\n'; len++) - intf[len] = val[len]; - intf[len] = 0; - - if (len == 0 || (val[len] != 0 && val[len] != '\n')) - return 0; - - return len; -} - -static int rxe_param_set_add(const char *val, const struct kernel_param *kp) -{ - int len; - int err = 0; - char intf[32]; - struct net_device *ndev; - struct rxe_dev *exists; - - if (!rxe_initialized) { - pr_err("Module parameters are not supported, use rdma link add or rxe_cfg\n"); - return -EAGAIN; - } - - len = sanitize_arg(val, intf, sizeof(intf)); - if (!len) { - pr_err("add: invalid interface name\n"); - return -EINVAL; - } - - ndev = dev_get_by_name(&init_net, intf); - if (!ndev) { - pr_err("interface %s not found\n", intf); - return -EINVAL; - } - - if (is_vlan_dev(ndev)) { - pr_err("rxe creation allowed on top of a real device only\n"); - err = -EPERM; - goto err; - } - - exists = rxe_get_dev_from_net(ndev); - if (exists) { - ib_device_put(&exists->ib_dev); - pr_err("already configured on %s\n", intf); - err = -EINVAL; - goto err; - } - - err = rxe_net_add("rxe%d", ndev); - if (err) { - pr_err("failed to add %s\n", intf); - goto err; - } - -err: - dev_put(ndev); - return err; -} - -static int rxe_param_set_remove(const char *val, const struct kernel_param *kp) -{ - int len; - char intf[32]; - struct ib_device *ib_dev; - - len = sanitize_arg(val, intf, sizeof(intf)); - if (!len) { - pr_err("add: invalid interface name\n"); - return -EINVAL; - } - - if (strncmp("all", intf, len) == 0) { - pr_info("rxe_sys: remove all"); - ib_unregister_driver(RDMA_DRIVER_RXE); - return 0; - } - - ib_dev = ib_device_get_by_name(intf, RDMA_DRIVER_RXE); - if (!ib_dev) { - pr_err("not configured on %s\n", intf); - return -EINVAL; - } - - ib_unregister_device_and_put(ib_dev); - - return 0; -} - -static const struct kernel_param_ops rxe_add_ops = { - .set = rxe_param_set_add, -}; - -static const struct kernel_param_ops rxe_remove_ops = { - .set = rxe_param_set_remove, -}; - -module_param_cb(add, &rxe_add_ops, NULL, 0200); -MODULE_PARM_DESC(add, "DEPRECATED. Create RXE device over network interface"); -module_param_cb(remove, &rxe_remove_ops, NULL, 0200); -MODULE_PARM_DESC(remove, "DEPRECATED. Remove RXE device over network interface"); diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index 6951fdcb31bf..ec2b7de1c497 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -8,7 +8,7 @@ #include <linux/interrupt.h> #include <linux/hardirq.h> -#include "rxe_task.h" +#include "rxe.h" int __rxe_do_task(struct rxe_task *task) @@ -32,25 +32,25 @@ void rxe_do_task(struct tasklet_struct *t) { int cont; int ret; - unsigned long flags; struct rxe_task *task = from_tasklet(task, t, tasklet); + unsigned int iterations = RXE_MAX_ITERATIONS; - spin_lock_irqsave(&task->state_lock, flags); + spin_lock_bh(&task->state_lock); switch (task->state) { case TASK_STATE_START: task->state = TASK_STATE_BUSY; - spin_unlock_irqrestore(&task->state_lock, flags); + spin_unlock_bh(&task->state_lock); break; case TASK_STATE_BUSY: task->state = TASK_STATE_ARMED; fallthrough; case TASK_STATE_ARMED: - spin_unlock_irqrestore(&task->state_lock, flags); + spin_unlock_bh(&task->state_lock); return; default: - spin_unlock_irqrestore(&task->state_lock, flags); + spin_unlock_bh(&task->state_lock); pr_warn("%s failed with bad state %d\n", __func__, task->state); return; } @@ -59,16 +59,23 @@ void rxe_do_task(struct tasklet_struct *t) cont = 0; ret = task->func(task->arg); - spin_lock_irqsave(&task->state_lock, flags); + spin_lock_bh(&task->state_lock); switch (task->state) { case TASK_STATE_BUSY: - if (ret) + if (ret) { task->state = TASK_STATE_START; - else + } else if (iterations--) { cont = 1; + } else { + /* reschedule the tasklet and exit + * the loop to give up the cpu + */ + tasklet_schedule(&task->tasklet); + task->state = TASK_STATE_START; + } break; - /* soneone tried to run the task since the last time we called + /* someone tried to run the task since the last time we called * func, so we will call one more time regardless of the * return value */ @@ -81,16 +88,15 @@ void rxe_do_task(struct tasklet_struct *t) pr_warn("%s failed with bad state %d\n", __func__, task->state); } - spin_unlock_irqrestore(&task->state_lock, flags); + spin_unlock_bh(&task->state_lock); } while (cont); task->ret = ret; } -int rxe_init_task(void *obj, struct rxe_task *task, +int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *), char *name) { - task->obj = obj; task->arg = arg; task->func = func; snprintf(task->name, sizeof(task->name), "%s", name); @@ -106,7 +112,6 @@ int rxe_init_task(void *obj, struct rxe_task *task, void rxe_cleanup_task(struct rxe_task *task) { - unsigned long flags; bool idle; /* @@ -116,9 +121,9 @@ void rxe_cleanup_task(struct rxe_task *task) task->destroyed = true; do { - spin_lock_irqsave(&task->state_lock, flags); + spin_lock_bh(&task->state_lock); idle = (task->state == TASK_STATE_START); - spin_unlock_irqrestore(&task->state_lock, flags); + spin_unlock_bh(&task->state_lock); } while (!idle); tasklet_kill(&task->tasklet); diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h index 11d183fd3338..7f612a1c68a7 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.h +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -19,7 +19,6 @@ enum { * called again. */ struct rxe_task { - void *obj; struct tasklet_struct tasklet; int state; spinlock_t state_lock; /* spinlock for task state */ @@ -35,7 +34,7 @@ struct rxe_task { * arg => parameter to pass to fcn * func => function to call until it returns != 0 */ -int rxe_init_task(void *obj, struct rxe_task *task, +int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *), char *name); /* cleanup task */ diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 0aa0d7e52773..88825edc7dce 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -7,8 +7,8 @@ #include <linux/dma-mapping.h> #include <net/addrconf.h> #include <rdma/uverbs_ioctl.h> + #include "rxe.h" -#include "rxe_loc.h" #include "rxe_queue.h" #include "rxe_hw_counters.h" @@ -115,7 +115,7 @@ static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc) { struct rxe_ucontext *uc = to_ruc(ibuc); - rxe_drop_ref(uc); + rxe_cleanup(uc); } static int rxe_port_immutable(struct ib_device *dev, u32 port_num, @@ -149,7 +149,7 @@ static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct rxe_pd *pd = to_rpd(ibpd); - rxe_drop_ref(pd); + rxe_cleanup(pd); return 0; } @@ -176,21 +176,20 @@ static int rxe_create_ah(struct ib_ah *ibah, if (err) return err; - err = rxe_add_to_pool(&rxe->ah_pool, ah); + err = rxe_add_to_pool_ah(&rxe->ah_pool, ah, + init_attr->flags & RDMA_CREATE_AH_SLEEPABLE); if (err) return err; /* create index > 0 */ - rxe_add_index(ah); - ah->ah_num = ah->pelem.index; + ah->ah_num = ah->elem.index; if (uresp) { /* only if new user provider */ err = copy_to_user(&uresp->ah_num, &ah->ah_num, sizeof(uresp->ah_num)); if (err) { - rxe_drop_index(ah); - rxe_drop_ref(ah); + rxe_cleanup(ah); return -EFAULT; } } else if (ah->is_user) { @@ -199,6 +198,8 @@ static int rxe_create_ah(struct ib_ah *ibah, } rxe_init_av(init_attr->ah_attr, &ah->av); + rxe_finalize(ah); + return 0; } @@ -230,8 +231,8 @@ static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags) { struct rxe_ah *ah = to_rah(ibah); - rxe_drop_index(ah); - rxe_drop_ref(ah); + rxe_cleanup_ah(ah, flags & RDMA_DESTROY_AH_SLEEPABLE); + return 0; } @@ -261,7 +262,6 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) recv_wqe = queue_producer_addr(rq->queue, QUEUE_TYPE_TO_DRIVER); recv_wqe->wr_id = ibwr->wr_id; - recv_wqe->num_sge = num_sge; memcpy(recv_wqe->dma.sge, ibwr->sg_list, num_sge * sizeof(struct ib_sge)); @@ -289,36 +289,35 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init, struct rxe_srq *srq = to_rsrq(ibsrq); struct rxe_create_srq_resp __user *uresp = NULL; - if (init->srq_type != IB_SRQT_BASIC) - return -EOPNOTSUPP; - if (udata) { if (udata->outlen < sizeof(*uresp)) return -EINVAL; uresp = udata->outbuf; } - err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK); + if (init->srq_type != IB_SRQT_BASIC) + return -EOPNOTSUPP; + + err = rxe_srq_chk_init(rxe, init); if (err) - goto err1; + return err; err = rxe_add_to_pool(&rxe->srq_pool, srq); if (err) - goto err1; + return err; - rxe_add_ref(pd); + rxe_get(pd); srq->pd = pd; err = rxe_srq_from_init(rxe, srq, init, udata, uresp); if (err) - goto err2; + goto err_cleanup; return 0; -err2: - rxe_drop_ref(pd); - rxe_drop_ref(srq); -err1: +err_cleanup: + rxe_cleanup(srq); + return err; } @@ -342,16 +341,12 @@ static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, err = rxe_srq_chk_attr(rxe, srq, attr, mask); if (err) - goto err1; + return err; err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata); if (err) - goto err1; - + return err; return 0; - -err1: - return err; } static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) @@ -371,11 +366,7 @@ static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct rxe_srq *srq = to_rsrq(ibsrq); - if (srq->rq.queue) - rxe_queue_cleanup(srq->rq.queue); - - rxe_drop_ref(srq->pd); - rxe_drop_ref(srq); + rxe_cleanup(srq); return 0; } @@ -383,8 +374,8 @@ static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { int err = 0; - unsigned long flags; struct rxe_srq *srq = to_rsrq(ibsrq); + unsigned long flags; spin_lock_irqsave(&srq->rq.producer_lock, flags); @@ -438,16 +429,15 @@ static int rxe_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init, if (err) return err; - rxe_add_index(qp); err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibqp->pd, udata); if (err) goto qp_init; + rxe_finalize(qp); return 0; qp_init: - rxe_drop_index(qp); - rxe_drop_ref(qp); + rxe_cleanup(qp); return err; } @@ -469,6 +459,11 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (err) goto err1; + if ((mask & IB_QP_AV) && (attr->ah_attr.ah_flags & IB_AH_GRH)) + qp->src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label, + qp->ibqp.qp_num, + qp->attr.dest_qp_num); + return 0; err1: @@ -489,10 +484,13 @@ static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct rxe_qp *qp = to_rqp(ibqp); + int ret; + + ret = rxe_qp_chk_destroy(qp); + if (ret) + return ret; - rxe_qp_destroy(qp); - rxe_drop_index(qp); - rxe_drop_ref(qp); + rxe_cleanup(qp); return 0; } @@ -527,12 +525,10 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, const struct ib_send_wr *ibwr) { wr->wr_id = ibwr->wr_id; - wr->num_sge = ibwr->num_sge; wr->opcode = ibwr->opcode; wr->send_flags = ibwr->send_flags; if (qp_type(qp) == IB_QPT_UD || - qp_type(qp) == IB_QPT_SMI || qp_type(qp) == IB_QPT_GSI) { struct ib_ah *ibah = ud_wr(ibwr)->ah; @@ -803,9 +799,15 @@ static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct rxe_cq *cq = to_rcq(ibcq); + /* See IBA C11-17: The CI shall return an error if this Verb is + * invoked while a Work Queue is still associated with the CQ. + */ + if (atomic_read(&cq->num_wq)) + return -EINVAL; + rxe_cq_disable(cq); - rxe_drop_ref(cq); + rxe_cleanup(cq); return 0; } @@ -870,9 +872,9 @@ static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt) static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct rxe_cq *cq = to_rcq(ibcq); - unsigned long irq_flags; int ret = 0; int empty; + unsigned long irq_flags; spin_lock_irqsave(&cq->cq_lock, irq_flags); if (cq->notify != IB_CQ_NEXT_COMP) @@ -898,9 +900,11 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access) if (!mr) return ERR_PTR(-ENOMEM); - rxe_add_index(mr); - rxe_add_ref(pd); - rxe_mr_init_dma(pd, access, mr); + rxe_get(pd); + mr->ibmr.pd = ibpd; + + rxe_mr_init_dma(access, mr); + rxe_finalize(mr); return &mr->ibmr; } @@ -922,20 +926,20 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, goto err2; } - rxe_add_index(mr); - rxe_add_ref(pd); + rxe_get(pd); + mr->ibmr.pd = ibpd; - err = rxe_mr_init_user(pd, start, length, iova, access, mr); + err = rxe_mr_init_user(rxe, start, length, iova, access, mr); if (err) goto err3; + rxe_finalize(mr); + return &mr->ibmr; err3: - rxe_drop_ref(pd); - rxe_drop_index(mr); - rxe_drop_ref(mr); + rxe_cleanup(mr); err2: return ERR_PTR(err); } @@ -957,72 +961,57 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, goto err1; } - rxe_add_index(mr); + rxe_get(pd); + mr->ibmr.pd = ibpd; - rxe_add_ref(pd); - - err = rxe_mr_init_fast(pd, max_num_sg, mr); + err = rxe_mr_init_fast(max_num_sg, mr); if (err) goto err2; + rxe_finalize(mr); + return &mr->ibmr; err2: - rxe_drop_ref(pd); - rxe_drop_index(mr); - rxe_drop_ref(mr); + rxe_cleanup(mr); err1: return ERR_PTR(err); } -/* build next_map_set from scatterlist - * The IB_WR_REG_MR WR will swap map_sets - */ -static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, - int sg_nents, unsigned int *sg_offset) +static int rxe_set_page(struct ib_mr *ibmr, u64 addr) { struct rxe_mr *mr = to_rmr(ibmr); - struct rxe_map_set *set = mr->next_map_set; - int n; + struct rxe_map *map; + struct rxe_phys_buf *buf; - set->nbuf = 0; + if (unlikely(mr->nbuf == mr->num_buf)) + return -ENOMEM; - n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_mr_set_page); + map = mr->map[mr->nbuf / RXE_BUF_PER_MAP]; + buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP]; - set->va = ibmr->iova; - set->iova = ibmr->iova; - set->length = ibmr->length; - set->page_shift = ilog2(ibmr->page_size); - set->page_mask = ibmr->page_size - 1; - set->offset = set->iova & set->page_mask; + buf->addr = addr; + buf->size = ibmr->page_size; + mr->nbuf++; - return n; + return 0; } -static int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) +static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, + int sg_nents, unsigned int *sg_offset) { - int err; - struct rxe_dev *rxe = to_rdev(ibqp->device); - struct rxe_qp *qp = to_rqp(ibqp); - struct rxe_mc_grp *grp; - - /* takes a ref on grp if successful */ - err = rxe_mcast_get_grp(rxe, mgid, &grp); - if (err) - return err; + struct rxe_mr *mr = to_rmr(ibmr); + int n; - err = rxe_mcast_add_grp_elem(rxe, qp, grp); + mr->nbuf = 0; - rxe_drop_ref(grp); - return err; -} + n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page); -static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) -{ - struct rxe_dev *rxe = to_rdev(ibqp->device); - struct rxe_qp *qp = to_rqp(ibqp); + mr->page_shift = ilog2(ibmr->page_size); + mr->page_mask = ibmr->page_size - 1; + mr->offset = ibmr->iova & mr->page_mask; - return rxe_mcast_drop_grp_elem(rxe, qp, mgid); + return n; } static ssize_t parent_show(struct device *device, diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 35e041450090..5f5cbfcb3569 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -9,7 +9,6 @@ #include <linux/interrupt.h> #include <linux/workqueue.h> -#include <rdma/rdma_user_rxe.h> #include "rxe_pool.h" #include "rxe_task.h" #include "rxe_hw_counters.h" @@ -35,17 +34,17 @@ static inline int psn_compare(u32 psn_a, u32 psn_b) struct rxe_ucontext { struct ib_ucontext ibuc; - struct rxe_pool_entry pelem; + struct rxe_pool_elem elem; }; struct rxe_pd { struct ib_pd ibpd; - struct rxe_pool_entry pelem; + struct rxe_pool_elem elem; }; struct rxe_ah { struct ib_ah ibah; - struct rxe_pool_entry pelem; + struct rxe_pool_elem elem; struct rxe_av av; bool is_user; int ah_num; @@ -60,13 +59,14 @@ struct rxe_cqe { struct rxe_cq { struct ib_cq ibcq; - struct rxe_pool_entry pelem; + struct rxe_pool_elem elem; struct rxe_queue *queue; spinlock_t cq_lock; u8 notify; bool is_dying; bool is_user; struct tasklet_struct comp_task; + atomic_t num_wq; }; enum wqe_state { @@ -95,7 +95,7 @@ struct rxe_rq { struct rxe_srq { struct ib_srq ibsrq; - struct rxe_pool_entry pelem; + struct rxe_pool_elem elem; struct rxe_pd *pd; struct rxe_rq rq; u32 srq_num; @@ -123,11 +123,13 @@ struct rxe_req_info { int need_rd_atomic; int wait_psn; int need_retry; + int wait_for_rnr_timer; int noack_pkts; struct rxe_task task; }; struct rxe_comp_info { + enum rxe_qp_state state; u32 psn; int opcode; int timeout; @@ -154,10 +156,9 @@ struct resp_res { union { struct { - struct sk_buff *skb; + u64 orig_val; } atomic; struct { - struct rxe_mr *mr; u64 va_org; u32 rkey; u32 length; @@ -189,7 +190,6 @@ struct rxe_resp_info { u32 resid; u32 rkey; u32 length; - u64 atomic_orig; /* SRQ only */ struct { @@ -209,7 +209,7 @@ struct rxe_resp_info { struct rxe_qp { struct ib_qp ibqp; - struct rxe_pool_entry pelem; + struct rxe_pool_elem elem; struct ib_qp_attr attr; unsigned int valid; unsigned int mtu; @@ -232,9 +232,7 @@ struct rxe_qp { struct rxe_av pri_av; struct rxe_av alt_av; - /* list of mcast groups qp has joined (for cleanup) */ - struct list_head grp_list; - spinlock_t grp_lock; /* guard grp_list */ + atomic_t mcg_num; struct sk_buff_head req_pkts; struct sk_buff_head resp_pkts; @@ -290,17 +288,6 @@ struct rxe_map { struct rxe_phys_buf buf[RXE_BUF_PER_MAP]; }; -struct rxe_map_set { - struct rxe_map **map; - u64 va; - u64 iova; - size_t length; - u32 offset; - u32 nbuf; - int page_shift; - int page_mask; -}; - static inline int rkey_is_mw(u32 rkey) { u32 index = rkey >> 8; @@ -309,7 +296,7 @@ static inline int rkey_is_mw(u32 rkey) } struct rxe_mr { - struct rxe_pool_entry pelem; + struct rxe_pool_elem elem; struct ib_mr ibmr; struct ib_umem *umem; @@ -318,20 +305,23 @@ struct rxe_mr { u32 rkey; enum rxe_mr_state state; enum ib_mr_type type; + u32 offset; int access; + int page_shift; + int page_mask; int map_shift; int map_mask; u32 num_buf; + u32 nbuf; u32 max_buf; u32 num_map; atomic_t num_mw; - struct rxe_map_set *cur_map_set; - struct rxe_map_set *next_map_set; + struct rxe_map **map; }; enum rxe_mw_state { @@ -342,7 +332,7 @@ enum rxe_mw_state { struct rxe_mw { struct ib_mw ibmw; - struct rxe_pool_entry pelem; + struct rxe_pool_elem elem; spinlock_t lock; enum rxe_mw_state state; struct rxe_qp *qp; /* Type 2 only */ @@ -353,23 +343,20 @@ struct rxe_mw { u64 length; }; -struct rxe_mc_grp { - struct rxe_pool_entry pelem; - spinlock_t mcg_lock; /* guard group */ +struct rxe_mcg { + struct rb_node node; + struct kref ref_cnt; struct rxe_dev *rxe; struct list_head qp_list; union ib_gid mgid; - int num_qp; + atomic_t qp_num; u32 qkey; u16 pkey; }; -struct rxe_mc_elem { - struct rxe_pool_entry pelem; +struct rxe_mca { struct list_head qp_list; - struct list_head grp_list; struct rxe_qp *qp; - struct rxe_mc_grp *grp; }; struct rxe_port { @@ -379,7 +366,6 @@ struct rxe_port { spinlock_t port_lock; /* guard port */ unsigned int mtu_cap; /* special QPs */ - u32 qp_smi_index; u32 qp_gsi_index; }; @@ -392,8 +378,6 @@ struct rxe_dev { struct net_device *ndev; - int xmit_errors; - struct rxe_pool uc_pool; struct rxe_pool pd_pool; struct rxe_pool ah_pool; @@ -402,8 +386,12 @@ struct rxe_dev { struct rxe_pool cq_pool; struct rxe_pool mr_pool; struct rxe_pool mw_pool; - struct rxe_pool mc_grp_pool; - struct rxe_pool mc_elem_pool; + + /* multicast support */ + spinlock_t mcg_lock; + struct rb_root mcg_tree; + atomic_t mcg_num; + atomic_t mcg_attach; spinlock_t pending_lock; /* guard pending_mmaps */ struct list_head pending_mmaps; @@ -484,6 +472,4 @@ static inline struct rxe_pd *rxe_mw_pd(struct rxe_mw *mw) int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name); -void rxe_mc_cleanup(struct rxe_pool_entry *arg); - #endif /* RXE_VERBS_H */ diff --git a/drivers/infiniband/sw/siw/Kconfig b/drivers/infiniband/sw/siw/Kconfig index 1b5105cbabae..81b70a3eeb87 100644 --- a/drivers/infiniband/sw/siw/Kconfig +++ b/drivers/infiniband/sw/siw/Kconfig @@ -1,7 +1,10 @@ config RDMA_SIW tristate "Software RDMA over TCP/IP (iWARP) driver" - depends on INET && INFINIBAND && LIBCRC32C + depends on INET && INFINIBAND depends on INFINIBAND_VIRT_DMA + select LIBCRC32C + select CRYPTO + select CRYPTO_CRC32C help This driver implements the iWARP RDMA transport over the Linux TCP/IP network stack. It enables a system with a diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index 368959ae9a8c..2f3a9cda3850 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -418,6 +418,7 @@ struct siw_qp { struct ib_qp base_qp; struct siw_device *sdev; struct kref ref; + struct completion qp_free; struct list_head devq; int tx_cpu; struct siw_qp_attrs attrs; @@ -644,14 +645,9 @@ static inline struct siw_sqe *orq_get_current(struct siw_qp *qp) return &qp->orq[qp->orq_get % qp->attrs.orq_size]; } -static inline struct siw_sqe *orq_get_tail(struct siw_qp *qp) -{ - return &qp->orq[qp->orq_put % qp->attrs.orq_size]; -} - static inline struct siw_sqe *orq_get_free(struct siw_qp *qp) { - struct siw_sqe *orq_e = orq_get_tail(qp); + struct siw_sqe *orq_e = &qp->orq[qp->orq_put % qp->attrs.orq_size]; if (READ_ONCE(orq_e->flags) == 0) return orq_e; diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 7acdd3c3a599..f88d2971c2c6 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -725,11 +725,11 @@ static int siw_proc_mpareply(struct siw_cep *cep) enum mpa_v2_ctrl mpa_p2p_mode = MPA_V2_RDMA_NO_RTR; rv = siw_recv_mpa_rr(cep); - if (rv != -EAGAIN) - siw_cancel_mpatimer(cep); if (rv) goto out_err; + siw_cancel_mpatimer(cep); + rep = &cep->mpa.hdr; if (__mpa_rr_revision(rep->params.bits) > MPA_REVISION_2) { @@ -895,7 +895,8 @@ static int siw_proc_mpareply(struct siw_cep *cep) } out_err: - siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL); + if (rv != -EAGAIN) + siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL); return rv; } @@ -968,14 +969,15 @@ static void siw_accept_newconn(struct siw_cep *cep) siw_cep_set_inuse(new_cep); rv = siw_proc_mpareq(new_cep); - siw_cep_set_free(new_cep); - if (rv != -EAGAIN) { siw_cep_put(cep); new_cep->listen_cep = NULL; - if (rv) + if (rv) { + siw_cep_set_free(new_cep); goto error; + } } + siw_cep_set_free(new_cep); } return; diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 9093e6a80b26..dacc174604bf 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -98,15 +98,14 @@ static int siw_create_tx_threads(void) continue; siw_tx_thread[cpu] = - kthread_create(siw_run_sq, (unsigned long *)(long)cpu, - "siw_tx/%d", cpu); + kthread_run_on_cpu(siw_run_sq, + (unsigned long *)(long)cpu, + cpu, "siw_tx/%u"); if (IS_ERR(siw_tx_thread[cpu])) { siw_tx_thread[cpu] = NULL; continue; } - kthread_bind(siw_tx_thread[cpu], cpu); - wake_up_process(siw_tx_thread[cpu]); assigned++; } return assigned; @@ -120,6 +119,7 @@ static int siw_dev_qualified(struct net_device *netdev) * <linux/if_arp.h> for type identifiers. */ if (netdev->type == ARPHRD_ETHER || netdev->type == ARPHRD_IEEE802 || + netdev->type == ARPHRD_NONE || (netdev->type == ARPHRD_LOOPBACK && loopback_enabled)) return 1; @@ -316,12 +316,12 @@ static struct siw_device *siw_device_create(struct net_device *netdev) sdev->netdev = netdev; - if (netdev->type != ARPHRD_LOOPBACK) { + if (netdev->type != ARPHRD_LOOPBACK && netdev->type != ARPHRD_NONE) { addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, netdev->dev_addr); } else { /* - * The loopback device does not have a HW address, + * This device does not have a HW address, * but connection mangagement lib expects gid != 0 */ size_t len = min_t(size_t, strlen(base_dev->name), 6); diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 7e01f2438afc..e6f634971228 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -1342,6 +1342,6 @@ void siw_free_qp(struct kref *ref) vfree(qp->orq); siw_put_tx_cpu(qp->tx_cpu); - + complete(&qp->qp_free); atomic_dec(&sdev->num_qp); } diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index 60116f20653c..fd721cc19682 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -961,27 +961,28 @@ out: static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx) { struct sk_buff *skb = srx->skb; + int avail = min(srx->skb_new, srx->fpdu_part_rem); u8 *tbuf = (u8 *)&srx->trailer.crc - srx->pad; __wsum crc_in, crc_own = 0; siw_dbg_qp(qp, "expected %d, available %d, pad %u\n", srx->fpdu_part_rem, srx->skb_new, srx->pad); - if (srx->skb_new < srx->fpdu_part_rem) - return -EAGAIN; - - skb_copy_bits(skb, srx->skb_offset, tbuf, srx->fpdu_part_rem); + skb_copy_bits(skb, srx->skb_offset, tbuf, avail); - if (srx->mpa_crc_hd && srx->pad) - crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad); + srx->skb_new -= avail; + srx->skb_offset += avail; + srx->skb_copied += avail; + srx->fpdu_part_rem -= avail; - srx->skb_new -= srx->fpdu_part_rem; - srx->skb_offset += srx->fpdu_part_rem; - srx->skb_copied += srx->fpdu_part_rem; + if (srx->fpdu_part_rem) + return -EAGAIN; if (!srx->mpa_crc_hd) return 0; + if (srx->pad) + crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad); /* * CRC32 is computed, transmitted and received directly in NBO, * so there's never a reason to convert byte order. @@ -1083,10 +1084,9 @@ static int siw_get_hdr(struct siw_rx_stream *srx) * completely received. */ if (iwarp_pktinfo[opcode].hdr_len > sizeof(struct iwarp_ctrl_tagged)) { - bytes = iwarp_pktinfo[opcode].hdr_len - MIN_DDP_HDR; + int hdrlen = iwarp_pktinfo[opcode].hdr_len; - if (srx->skb_new < bytes) - return -EAGAIN; + bytes = min_t(int, hdrlen - MIN_DDP_HDR, srx->skb_new); skb_copy_bits(skb, srx->skb_offset, (char *)c_hdr + srx->fpdu_part_rcvd, bytes); @@ -1096,6 +1096,9 @@ static int siw_get_hdr(struct siw_rx_stream *srx) srx->skb_new -= bytes; srx->skb_offset += bytes; srx->skb_copied += bytes; + + if (srx->fpdu_part_rcvd < hdrlen) + return -EAGAIN; } /* @@ -1153,11 +1156,12 @@ static int siw_check_tx_fence(struct siw_qp *qp) spin_lock_irqsave(&qp->orq_lock, flags); - rreq = orq_get_current(qp); - /* free current orq entry */ + rreq = orq_get_current(qp); WRITE_ONCE(rreq->flags, 0); + qp->orq_get++; + if (qp->tx_ctx.orq_fence) { if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) { pr_warn("siw: [QP %u]: fence resume: bad status %d\n", @@ -1165,10 +1169,12 @@ static int siw_check_tx_fence(struct siw_qp *qp) rv = -EPROTO; goto out; } - /* resume SQ processing */ + /* resume SQ processing, if possible */ if (tx_waiting->sqe.opcode == SIW_OP_READ || tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) { - rreq = orq_get_tail(qp); + + /* SQ processing was stopped because of a full ORQ */ + rreq = orq_get_free(qp); if (unlikely(!rreq)) { pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp)); rv = -EPROTO; @@ -1181,15 +1187,14 @@ static int siw_check_tx_fence(struct siw_qp *qp) resume_tx = 1; } else if (siw_orq_empty(qp)) { + /* + * SQ processing was stopped by fenced work request. + * Resume since all previous Read's are now completed. + */ qp->tx_ctx.orq_fence = 0; resume_tx = 1; - } else { - pr_warn("siw: [QP %u]: fence resume: orq idx: %d:%d\n", - qp_id(qp), qp->orq_get, qp->orq_put); - rv = -EPROTO; } } - qp->orq_get++; out: spin_unlock_irqrestore(&qp->orq_lock, flags); diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index 1f4e60257700..7d47b521070b 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -29,7 +29,7 @@ static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx) dma_addr_t paddr = siw_pbl_get_buffer(pbl, offset, NULL, idx); if (paddr) - return virt_to_page(paddr); + return virt_to_page((void *)paddr); return NULL; } @@ -533,13 +533,23 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) kunmap_local(kaddr); } } else { - u64 va = sge->laddr + sge_off; + /* + * Cast to an uintptr_t to preserve all 64 bits + * in sge->laddr. + */ + uintptr_t va = (uintptr_t)(sge->laddr + sge_off); - page_array[seg] = virt_to_page(va & PAGE_MASK); + /* + * virt_to_page() takes a (void *) pointer + * so cast to a (void *) meaning it will be 64 + * bits on a 64 bit platform and 32 bits on a + * 32 bit platform. + */ + page_array[seg] = virt_to_page((void *)(va & PAGE_MASK)); if (do_crc) crypto_shash_update( c_tx->mpa_crc_hd, - (void *)(uintptr_t)va, + (void *)va, plen); } diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 1b36350601fa..3e814cfb298c 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -8,6 +8,7 @@ #include <linux/uaccess.h> #include <linux/vmalloc.h> #include <linux/xarray.h> +#include <net/addrconf.h> #include <rdma/iw_cm.h> #include <rdma/ib_verbs.h> @@ -131,8 +132,8 @@ int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr, /* Revisit atomic caps if RFC 7306 gets supported */ attr->atomic_cap = 0; - attr->device_cap_flags = - IB_DEVICE_MEM_MGT_EXTENSIONS | IB_DEVICE_ALLOW_USER_UNREG; + attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS; + attr->kernel_cap_flags = IBK_ALLOW_USER_UNREG; attr->max_cq = sdev->attrs.max_cq; attr->max_cqe = sdev->attrs.max_cqe; attr->max_fast_reg_page_list_len = SIW_MAX_SGE_PBL; @@ -155,7 +156,8 @@ int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr, attr->vendor_id = SIW_VENDOR_ID; attr->vendor_part_id = sdev->vendor_part_id; - memcpy(&attr->sys_image_guid, sdev->netdev->dev_addr, 6); + addrconf_addr_eui48((u8 *)&attr->sys_image_guid, + sdev->netdev->dev_addr); return 0; } @@ -311,7 +313,8 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, if (atomic_inc_return(&sdev->num_qp) > SIW_MAX_QP) { siw_dbg(base_dev, "too many QP's\n"); - return -ENOMEM; + rv = -ENOMEM; + goto err_atomic; } if (attrs->qp_type != IB_QPT_RC) { siw_dbg(base_dev, "only RC QP's supported\n"); @@ -477,6 +480,8 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, list_add_tail(&qp->devq, &sdev->qp_list); spin_unlock_irqrestore(&sdev->lock, flags); + init_completion(&qp->qp_free); + return 0; err_out_xa: @@ -621,6 +626,7 @@ int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata) qp->scq = qp->rcq = NULL; siw_qp_put(qp); + wait_for_completion(&qp->qp_free); return 0; } @@ -660,7 +666,7 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr, kbuf += core_sge->length; core_sge++; } - sqe->sge[0].length = bytes > 0 ? bytes : 0; + sqe->sge[0].length = max(bytes, 0); sqe->num_sge = bytes > 0 ? 1 : 0; return bytes; @@ -1164,7 +1170,7 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, err_out: siw_dbg(base_cq->device, "CQ creation failed: %d", rv); - if (cq && cq->queue) { + if (cq->queue) { struct siw_ucontext *ctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 44d8d151ff90..35e9c8a330e2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -411,6 +411,7 @@ struct ipoib_dev_priv { struct dentry *path_dentry; #endif u64 hca_caps; + u64 kernel_caps; struct ipoib_ethtool_st ethtool; unsigned int max_send_sge; const struct net_device_ops *rn_ops; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index fd9d7f2c4d64..b610d36295bb 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -465,7 +465,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, goto err_qp; } - psn = prandom_u32() & 0xffffff; + psn = get_random_u32() & 0xffffff; ret = ipoib_cm_modify_rx_qp(dev, cm_id, p->qp, psn); if (ret) goto err_modify; @@ -884,8 +884,8 @@ int ipoib_cm_dev_open(struct net_device *dev) goto err_cm; } - ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num), - 0); + ret = ib_cm_listen(priv->cm.id, + cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num)); if (ret) { pr_warn("%s: failed to listen on ID 0x%llx\n", priv->ca->name, IPOIB_CM_IETF_ID | priv->qp->qp_num); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index a09ca21f7dff..8af99b18d361 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -65,10 +65,10 @@ static void ipoib_get_drvinfo(struct net_device *netdev, ib_get_device_fw_str(priv->ca, drvinfo->fw_version); - strlcpy(drvinfo->bus_info, dev_name(priv->ca->dev.parent), + strscpy(drvinfo->bus_info, dev_name(priv->ca->dev.parent), sizeof(drvinfo->bus_info)); - strlcpy(drvinfo->driver, "ib_ipoib", sizeof(drvinfo->driver)); + strscpy(drvinfo->driver, "ib_ipoib", sizeof(drvinfo->driver)); } static int ipoib_get_coalesce(struct net_device *dev, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 2c3dca41d3bd..ed25061fac62 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -573,7 +573,7 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, unsigned int usable_sge = priv->max_send_sge - !!skb_headlen(skb); if (skb_is_gso(skb)) { - hlen = skb_transport_offset(skb) + tcp_hdrlen(skb); + hlen = skb_tcp_all_headers(skb); phead = skb->data; if (unlikely(!skb_pull(skb, hlen))) { ipoib_warn(priv, "linear data too small\n"); @@ -1109,7 +1109,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv) * if he sets the device address back to be based on GID index 0, * he no longer wishs to control it. * - * If the user doesn't control the the device address, + * If the user doesn't control the device address, * IPOIB_FLAG_DEV_ADDR_SET is set and ib_find_gid failed it means * the port GUID has changed and GID at index 0 has changed * so we need to change priv->local_gid and priv->dev->dev_addr diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 9934b8bd7f56..ac25fc80fb33 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -742,7 +742,7 @@ void ipoib_flush_paths(struct net_device *dev) static void path_rec_completion(int status, struct sa_path_rec *pathrec, - void *path_ptr) + int num_prs, void *path_ptr) { struct ipoib_path *path = path_ptr; struct net_device *dev = path->dev; @@ -1664,8 +1664,10 @@ static void ipoib_napi_add(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - netif_napi_add(dev, &priv->recv_napi, ipoib_rx_poll, IPOIB_NUM_WC); - netif_napi_add(dev, &priv->send_napi, ipoib_tx_poll, MAX_SEND_CQE); + netif_napi_add_weight(dev, &priv->recv_napi, ipoib_rx_poll, + IPOIB_NUM_WC); + netif_napi_add_weight(dev, &priv->send_napi, ipoib_tx_poll, + MAX_SEND_CQE); } static void ipoib_napi_del(struct net_device *dev) @@ -1850,11 +1852,12 @@ static void ipoib_parent_unregister_pre(struct net_device *ndev) static void ipoib_set_dev_features(struct ipoib_dev_priv *priv) { priv->hca_caps = priv->ca->attrs.device_cap_flags; + priv->kernel_caps = priv->ca->attrs.kernel_cap_flags; if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM; - if (priv->hca_caps & IB_DEVICE_UD_TSO) + if (priv->kernel_caps & IBK_UD_TSO) priv->dev->hw_features |= NETIF_F_TSO; priv->dev->features |= priv->dev->hw_features; @@ -2201,7 +2204,7 @@ int ipoib_intf_init(struct ib_device *hca, u32 port, const char *name, priv->rn_ops = dev->netdev_ops; - if (hca->attrs.device_cap_flags & IB_DEVICE_VIRTUAL_FUNCTION) + if (hca->attrs.kernel_cap_flags & IBK_VIRTUAL_FUNCTION) dev->netdev_ops = &ipoib_netdev_ops_vf; else dev->netdev_ops = &ipoib_netdev_ops_pf; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c index 5b05cf3837da..ea16ba5d8da6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c @@ -32,7 +32,6 @@ #include <linux/netdevice.h> #include <linux/if_arp.h> /* For ARPHRD_xxx */ -#include <linux/module.h> #include <net/rtnetlink.h> #include "ipoib.h" diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 5a150a080ac2..368e5d77416d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -197,16 +197,16 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) init_attr.send_cq = priv->send_cq; init_attr.recv_cq = priv->recv_cq; - if (priv->hca_caps & IB_DEVICE_UD_TSO) + if (priv->kernel_caps & IBK_UD_TSO) init_attr.create_flags |= IB_QP_CREATE_IPOIB_UD_LSO; - if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK) + if (priv->kernel_caps & IBK_BLOCK_MULTICAST_LOOPBACK) init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING) init_attr.create_flags |= IB_QP_CREATE_NETIF_QP; - if (priv->hca_caps & IB_DEVICE_RDMA_NETDEV_OPA) + if (priv->kernel_caps & IBK_RDMA_NETDEV_OPA) init_attr.create_flags |= IB_QP_CREATE_NETDEV_USE; priv->qp = ib_create_qp(priv->pd, &init_attr); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 0322dc75396f..4bd161e86f8d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -30,7 +30,6 @@ * SOFTWARE. */ -#include <linux/module.h> #include <linux/sched/signal.h> #include <linux/init.h> diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 776e46ee95da..620ae5b2d80d 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -113,10 +113,6 @@ bool iser_pi_enable = false; module_param_named(pi_enable, iser_pi_enable, bool, S_IRUGO); MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)"); -int iser_pi_guard; -module_param_named(pi_guard, iser_pi_guard, int, S_IRUGO); -MODULE_PARM_DESC(pi_guard, "T10-PI guard_type [deprecated]"); - static int iscsi_iser_set(const char *val, const struct kernel_param *kp) { int ret; @@ -139,9 +135,8 @@ static int iscsi_iser_set(const char *val, const struct kernel_param *kp) * Notes: In case of data length errors or iscsi PDU completion failures * this routine will signal iscsi layer of connection failure. */ -void -iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr, - char *rx_data, int rx_data_len) +void iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr, + char *rx_data, int rx_data_len) { int rc = 0; int datalen; @@ -176,8 +171,7 @@ error: * Netes: This routine can't fail, just assign iscsi task * hdr and max hdr size. */ -static int -iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode) +static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode) { struct iscsi_iser_task *iser_task = task->dd_data; @@ -198,9 +192,8 @@ iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode) * state mutex to avoid dereferencing the IB device which * may have already been terminated. */ -int -iser_initialize_task_headers(struct iscsi_task *task, - struct iser_tx_desc *tx_desc) +int iser_initialize_task_headers(struct iscsi_task *task, + struct iser_tx_desc *tx_desc) { struct iser_conn *iser_conn = task->conn->dd_data; struct iser_device *device = iser_conn->ib_conn.device; @@ -237,8 +230,7 @@ iser_initialize_task_headers(struct iscsi_task *task, * Return: Returns zero on success or -ENOMEM when failing * to init task headers (dma mapping error). */ -static int -iscsi_iser_task_init(struct iscsi_task *task) +static int iscsi_iser_task_init(struct iscsi_task *task) { struct iscsi_iser_task *iser_task = task->dd_data; int ret; @@ -272,8 +264,8 @@ iscsi_iser_task_init(struct iscsi_task *task) * xmit. * **/ -static int -iscsi_iser_mtask_xmit(struct iscsi_conn *conn, struct iscsi_task *task) +static int iscsi_iser_mtask_xmit(struct iscsi_conn *conn, + struct iscsi_task *task) { int error = 0; @@ -290,9 +282,8 @@ iscsi_iser_mtask_xmit(struct iscsi_conn *conn, struct iscsi_task *task) return error; } -static int -iscsi_iser_task_xmit_unsol_data(struct iscsi_conn *conn, - struct iscsi_task *task) +static int iscsi_iser_task_xmit_unsol_data(struct iscsi_conn *conn, + struct iscsi_task *task) { struct iscsi_r2t_info *r2t = &task->unsol_r2t; struct iscsi_data hdr; @@ -326,8 +317,7 @@ iscsi_iser_task_xmit_unsol_data_exit: * * Return: zero on success or escalates $error on failure. */ -static int -iscsi_iser_task_xmit(struct iscsi_task *task) +static int iscsi_iser_task_xmit(struct iscsi_task *task) { struct iscsi_conn *conn = task->conn; struct iscsi_iser_task *iser_task = task->dd_data; @@ -410,8 +400,7 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task) * * In addition the error sector is marked. */ -static u8 -iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector) +static u8 iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector) { struct iscsi_iser_task *iser_task = task->dd_data; enum iser_data_dir dir = iser_task->dir[ISER_DIR_IN] ? @@ -460,11 +449,9 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, * -EINVAL in case end-point doesn't exsits anymore or iser connection * state is not UP (teardown already started). */ -static int -iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, - struct iscsi_cls_conn *cls_conn, - uint64_t transport_eph, - int is_leading) +static int iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, + struct iscsi_cls_conn *cls_conn, + uint64_t transport_eph, int is_leading) { struct iscsi_conn *conn = cls_conn->dd_data; struct iser_conn *iser_conn; @@ -519,8 +506,7 @@ out: * from this point iscsi must call conn_stop in session/connection * teardown so iser transport must wait for it. */ -static int -iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn) +static int iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn) { struct iscsi_conn *iscsi_conn; struct iser_conn *iser_conn; @@ -542,8 +528,7 @@ iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn) * handle, so we call it under iser the state lock to protect against * this kind of race. */ -static void -iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) +static void iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) { struct iscsi_conn *conn = cls_conn->dd_data; struct iser_conn *iser_conn = conn->dd_data; @@ -578,18 +563,16 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) * * Removes and free iscsi host. */ -static void -iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) +static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) { struct Scsi_Host *shost = iscsi_session_to_shost(cls_session); iscsi_session_teardown(cls_session); - iscsi_host_remove(shost); + iscsi_host_remove(shost, false); iscsi_host_free(shost); } -static inline unsigned int -iser_dif_prot_caps(int prot_caps) +static inline unsigned int iser_dif_prot_caps(int prot_caps) { int ret = 0; @@ -667,7 +650,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, SHOST_DIX_GUARD_CRC); } - if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) + if (!(ib_dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)) shost->virt_boundary_mask = SZ_4K - 1; if (iscsi_host_add(shost, ib_dev->dev.parent)) { @@ -702,15 +685,14 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, return cls_session; remove_host: - iscsi_host_remove(shost); + iscsi_host_remove(shost, false); free_host: iscsi_host_free(shost); return NULL; } -static int -iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn, - enum iscsi_param param, char *buf, int buflen) +static int iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn, + enum iscsi_param param, char *buf, int buflen) { int value; @@ -760,8 +742,8 @@ iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn, * * Output connection statistics. */ -static void -iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats) +static void iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, + struct iscsi_stats *stats) { struct iscsi_conn *conn = cls_conn->dd_data; @@ -812,9 +794,9 @@ static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep, * Return: iscsi_endpoint created by iscsi layer or ERR_PTR(error) * if fails. */ -static struct iscsi_endpoint * -iscsi_iser_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, - int non_blocking) +static struct iscsi_endpoint *iscsi_iser_ep_connect(struct Scsi_Host *shost, + struct sockaddr *dst_addr, + int non_blocking) { int err; struct iser_conn *iser_conn; @@ -857,8 +839,7 @@ failure: * or more likely iser connection state transitioned to TEMINATING or * DOWN during the wait period. */ -static int -iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) +static int iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) { struct iser_conn *iser_conn = ep->dd_data; int rc; @@ -893,8 +874,7 @@ iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) * and cleanup or actually call it immediately in case we didn't pass * iscsi conn bind/start stage, thus it is safe. */ -static void -iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep) +static void iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep) { struct iser_conn *iser_conn = ep->dd_data; @@ -991,6 +971,7 @@ static struct scsi_host_template iscsi_iser_sht = { .proc_name = "iscsi_iser", .this_id = -1, .track_queue_depth = 1, + .cmd_size = sizeof(struct iscsi_cmd), }; static struct iscsi_transport iscsi_iser_transport = { diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 9f6ac0a09a78..dee8c97ff056 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -119,8 +119,6 @@ #define ISER_QP_MAX_RECV_DTOS (ISER_DEF_XMIT_CMDS_MAX) -#define ISER_MIN_POSTED_RX (ISER_DEF_XMIT_CMDS_MAX >> 2) - /* the max TX (send) WR supported by the iSER QP is defined by * * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect * * to have at max for SCSI command. The tx posting & completion handling code * @@ -148,8 +146,6 @@ - ISER_MAX_RX_MISC_PDUS) / \ (1 + ISER_INFLIGHT_DATAOUTS)) -#define ISER_SIGNAL_CMD_COUNT 32 - /* Constant PDU lengths calculations */ #define ISER_HEADERS_LEN (sizeof(struct iser_ctrl) + sizeof(struct iscsi_hdr)) @@ -207,12 +203,12 @@ struct iser_reg_resources; * * @sge: memory region sg element * @rkey: memory region remote key - * @mem_h: pointer to registration context (FMR/Fastreg) + * @desc: pointer to fast registration context */ struct iser_mem_reg { - struct ib_sge sge; - u32 rkey; - void *mem_h; + struct ib_sge sge; + u32 rkey; + struct iser_fr_desc *desc; }; enum iser_desc_type { @@ -366,11 +362,8 @@ struct iser_fr_pool { * @qp: Connection Queue-pair * @cq: Connection completion queue * @cq_size: The number of max outstanding completions - * @post_recv_buf_count: post receive counter - * @sig_count: send work request signal count - * @rx_wr: receive work request for batch posts * @device: reference to iser device - * @fr_pool: connection fast registration poool + * @fr_pool: connection fast registration pool * @pi_support: Indicate device T10-PI support * @reg_cqe: completion handler */ @@ -379,9 +372,6 @@ struct ib_conn { struct ib_qp *qp; struct ib_cq *cq; u32 cq_size; - int post_recv_buf_count; - u8 sig_count; - struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; struct iser_device *device; struct iser_fr_pool fr_pool; bool pi_support; @@ -397,8 +387,6 @@ struct ib_conn { * @state: connection logical state * @qp_max_recv_dtos: maximum number of data outs, corresponds * to max number of post recvs - * @qp_max_recv_dtos_mask: (qp_max_recv_dtos - 1) - * @min_posted_rx: (qp_max_recv_dtos >> 2) * @max_cmds: maximum cmds allowed for this connection * @name: connection peer portal * @release_work: deffered work for release job @@ -409,7 +397,6 @@ struct ib_conn { * (state is ISER_CONN_UP) * @conn_list: entry in ig conn list * @login_desc: login descriptor - * @rx_desc_head: head of rx_descs cyclic buffer * @rx_descs: rx buffers array (cyclic buffer) * @num_rx_descs: number of rx descriptors * @scsi_sg_tablesize: scsi host sg_tablesize @@ -422,8 +409,6 @@ struct iser_conn { struct iscsi_endpoint *ep; enum iser_conn_state state; unsigned qp_max_recv_dtos; - unsigned qp_max_recv_dtos_mask; - unsigned min_posted_rx; u16 max_cmds; char name[ISER_OBJECT_NAME_SIZE]; struct work_struct release_work; @@ -433,7 +418,6 @@ struct iser_conn { struct completion up_completion; struct list_head conn_list; struct iser_login_desc login_desc; - unsigned int rx_desc_head; struct iser_rx_desc *rx_descs; u32 num_rx_descs; unsigned short scsi_sg_tablesize; @@ -486,7 +470,6 @@ struct iser_global { extern struct iser_global ig; extern int iser_debug_level; extern bool iser_pi_enable; -extern int iser_pi_guard; extern unsigned int iser_max_sectors; extern bool iser_always_reg; @@ -543,18 +526,17 @@ int iser_connect(struct iser_conn *iser_conn, int non_blocking); int iser_post_recvl(struct iser_conn *iser_conn); -int iser_post_recvm(struct iser_conn *iser_conn, int count); -int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, - bool signal); +int iser_post_recvm(struct iser_conn *iser_conn, + struct iser_rx_desc *rx_desc); +int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc); int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, - struct iser_data_buf *data, enum iser_data_dir iser_dir, enum dma_data_direction dma_dir); void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task, - struct iser_data_buf *data, - enum dma_data_direction dir); + enum iser_data_dir iser_dir, + enum dma_data_direction dma_dir); int iser_initialize_task_headers(struct iscsi_task *task, struct iser_tx_desc *tx_desc); diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 27a6f75a9912..7b83f48f60c5 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -52,30 +52,17 @@ static int iser_prepare_read_cmd(struct iscsi_task *task) struct iser_mem_reg *mem_reg; int err; struct iser_ctrl *hdr = &iser_task->desc.iser_header; - struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN]; err = iser_dma_map_task_data(iser_task, - buf_in, ISER_DIR_IN, DMA_FROM_DEVICE); if (err) return err; - if (scsi_prot_sg_count(iser_task->sc)) { - struct iser_data_buf *pbuf_in = &iser_task->prot[ISER_DIR_IN]; - - err = iser_dma_map_task_data(iser_task, - pbuf_in, - ISER_DIR_IN, - DMA_FROM_DEVICE); - if (err) - return err; - } - err = iser_reg_mem_fastreg(iser_task, ISER_DIR_IN, false); if (err) { iser_err("Failed to set up Data-IN RDMA\n"); - return err; + goto out_err; } mem_reg = &iser_task->rdma_reg[ISER_DIR_IN]; @@ -88,6 +75,10 @@ static int iser_prepare_read_cmd(struct iscsi_task *task) (unsigned long long)mem_reg->sge.addr); return 0; + +out_err: + iser_dma_unmap_task_data(iser_task, ISER_DIR_IN, DMA_FROM_DEVICE); + return err; } /* Register user buffer memory and initialize passive rdma @@ -95,11 +86,8 @@ static int iser_prepare_read_cmd(struct iscsi_task *task) * task->data[ISER_DIR_OUT].data_len, Protection size * is stored at task->prot[ISER_DIR_OUT].data_len */ -static int -iser_prepare_write_cmd(struct iscsi_task *task, - unsigned int imm_sz, - unsigned int unsol_sz, - unsigned int edtl) +static int iser_prepare_write_cmd(struct iscsi_task *task, unsigned int imm_sz, + unsigned int unsol_sz, unsigned int edtl) { struct iscsi_iser_task *iser_task = task->dd_data; struct iser_mem_reg *mem_reg; @@ -109,28 +97,16 @@ iser_prepare_write_cmd(struct iscsi_task *task, struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1]; err = iser_dma_map_task_data(iser_task, - buf_out, ISER_DIR_OUT, DMA_TO_DEVICE); if (err) return err; - if (scsi_prot_sg_count(iser_task->sc)) { - struct iser_data_buf *pbuf_out = &iser_task->prot[ISER_DIR_OUT]; - - err = iser_dma_map_task_data(iser_task, - pbuf_out, - ISER_DIR_OUT, - DMA_TO_DEVICE); - if (err) - return err; - } - err = iser_reg_mem_fastreg(iser_task, ISER_DIR_OUT, buf_out->data_len == imm_sz); - if (err != 0) { + if (err) { iser_err("Failed to register write cmd RDMA mem\n"); - return err; + goto out_err; } mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT]; @@ -157,11 +133,15 @@ iser_prepare_write_cmd(struct iscsi_task *task, } return 0; + +out_err: + iser_dma_unmap_task_data(iser_task, ISER_DIR_OUT, DMA_TO_DEVICE); + return err; } /* creates a new tx descriptor and adds header regd buffer */ -static void iser_create_send_desc(struct iser_conn *iser_conn, - struct iser_tx_desc *tx_desc) +static void iser_create_send_desc(struct iser_conn *iser_conn, + struct iser_tx_desc *tx_desc) { struct iser_device *device = iser_conn->ib_conn.device; @@ -247,8 +227,6 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, struct iser_device *device = ib_conn->device; iser_conn->qp_max_recv_dtos = session->cmds_max; - iser_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */ - iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2; if (iser_alloc_fastreg_pool(ib_conn, session->scsi_cmds_max, iser_conn->pages_per_mr)) @@ -280,7 +258,6 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, rx_sg->lkey = device->pd->local_dma_lkey; } - iser_conn->rx_desc_head = 0; return 0; rx_desc_dma_map_failed: @@ -322,37 +299,35 @@ void iser_free_rx_descriptors(struct iser_conn *iser_conn) static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) { struct iser_conn *iser_conn = conn->dd_data; - struct ib_conn *ib_conn = &iser_conn->ib_conn; struct iscsi_session *session = conn->session; + int err = 0; + int i; iser_dbg("req op %x flags %x\n", req->opcode, req->flags); /* check if this is the last login - going to full feature phase */ if ((req->flags & ISCSI_FULL_FEATURE_PHASE) != ISCSI_FULL_FEATURE_PHASE) - return 0; - - /* - * Check that there is one posted recv buffer - * (for the last login response). - */ - WARN_ON(ib_conn->post_recv_buf_count != 1); + goto out; if (session->discovery_sess) { iser_info("Discovery session, re-using login RX buffer\n"); - return 0; - } else - iser_info("Normal session, posting batch of RX %d buffers\n", - iser_conn->min_posted_rx); - - /* Initial post receive buffers */ - if (iser_post_recvm(iser_conn, iser_conn->min_posted_rx)) - return -ENOMEM; + goto out; + } - return 0; -} + iser_info("Normal session, posting batch of RX %d buffers\n", + iser_conn->qp_max_recv_dtos - 1); -static inline bool iser_signal_comp(u8 sig_count) -{ - return ((sig_count % ISER_SIGNAL_CMD_COUNT) == 0); + /* + * Initial post receive buffers. + * There is one already posted recv buffer (for the last login + * response). Therefore, the first recv buffer is skipped here. + */ + for (i = 1; i < iser_conn->qp_max_recv_dtos; i++) { + err = iser_post_recvm(iser_conn, &iser_conn->rx_descs[i]); + if (err) + goto out; + } +out: + return err; } /** @@ -360,8 +335,7 @@ static inline bool iser_signal_comp(u8 sig_count) * @conn: link to matching iscsi connection * @task: SCSI command task */ -int iser_send_command(struct iscsi_conn *conn, - struct iscsi_task *task) +int iser_send_command(struct iscsi_conn *conn, struct iscsi_task *task) { struct iser_conn *iser_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; @@ -371,7 +345,6 @@ int iser_send_command(struct iscsi_conn *conn, struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr; struct scsi_cmnd *sc = task->sc; struct iser_tx_desc *tx_desc = &iser_task->desc; - u8 sig_count = ++iser_conn->ib_conn.sig_count; edtl = ntohl(hdr->data_length); @@ -418,8 +391,7 @@ int iser_send_command(struct iscsi_conn *conn, iser_task->status = ISER_TASK_STATUS_STARTED; - err = iser_post_send(&iser_conn->ib_conn, tx_desc, - iser_signal_comp(sig_count)); + err = iser_post_send(&iser_conn->ib_conn, tx_desc); if (!err) return 0; @@ -434,8 +406,7 @@ send_command_error: * @task: SCSI command task * @hdr: pointer to the LLD's iSCSI message header */ -int iser_send_data_out(struct iscsi_conn *conn, - struct iscsi_task *task, +int iser_send_data_out(struct iscsi_conn *conn, struct iscsi_task *task, struct iscsi_data *hdr) { struct iser_conn *iser_conn = conn->dd_data; @@ -487,7 +458,7 @@ int iser_send_data_out(struct iscsi_conn *conn, itt, buf_offset, data_seg_len); - err = iser_post_send(&iser_conn->ib_conn, tx_desc, true); + err = iser_post_send(&iser_conn->ib_conn, tx_desc); if (!err) return 0; @@ -497,8 +468,7 @@ send_data_out_error: return err; } -int iser_send_control(struct iscsi_conn *conn, - struct iscsi_task *task) +int iser_send_control(struct iscsi_conn *conn, struct iscsi_task *task) { struct iser_conn *iser_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; @@ -550,7 +520,7 @@ int iser_send_control(struct iscsi_conn *conn, goto send_control_error; } - err = iser_post_send(&iser_conn->ib_conn, mdesc, true); + err = iser_post_send(&iser_conn->ib_conn, mdesc); if (!err) return 0; @@ -567,6 +537,7 @@ void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc) struct iscsi_hdr *hdr; char *data; int length; + bool full_feature_phase; if (unlikely(wc->status != IB_WC_SUCCESS)) { iser_err_comp(wc, "login_rsp"); @@ -580,6 +551,9 @@ void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc) hdr = desc->rsp + sizeof(struct iser_ctrl); data = desc->rsp + ISER_HEADERS_LEN; length = wc->byte_len - ISER_HEADERS_LEN; + full_feature_phase = ((hdr->flags & ISCSI_FULL_FEATURE_PHASE) == + ISCSI_FULL_FEATURE_PHASE) && + (hdr->flags & ISCSI_FLAG_CMD_FINAL); iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode, hdr->itt, length); @@ -590,11 +564,15 @@ void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc) desc->rsp_dma, ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); - ib_conn->post_recv_buf_count--; + if (!full_feature_phase || + iser_conn->iscsi_conn->session->discovery_sess) + return; + + /* Post the first RX buffer that is skipped in iser_post_rx_bufs() */ + iser_post_recvm(iser_conn, iser_conn->rx_descs); } -static inline int -iser_inv_desc(struct iser_fr_desc *desc, u32 rkey) +static inline int iser_inv_desc(struct iser_fr_desc *desc, u32 rkey) { if (unlikely((!desc->sig_protected && rkey != desc->rsc.mr->rkey) || (desc->sig_protected && rkey != desc->rsc.sig_mr->rkey))) { @@ -607,10 +585,8 @@ iser_inv_desc(struct iser_fr_desc *desc, u32 rkey) return 0; } -static int -iser_check_remote_inv(struct iser_conn *iser_conn, - struct ib_wc *wc, - struct iscsi_hdr *hdr) +static int iser_check_remote_inv(struct iser_conn *iser_conn, struct ib_wc *wc, + struct iscsi_hdr *hdr) { if (wc->wc_flags & IB_WC_WITH_INVALIDATE) { struct iscsi_task *task; @@ -631,13 +607,13 @@ iser_check_remote_inv(struct iser_conn *iser_conn, struct iser_fr_desc *desc; if (iser_task->dir[ISER_DIR_IN]) { - desc = iser_task->rdma_reg[ISER_DIR_IN].mem_h; + desc = iser_task->rdma_reg[ISER_DIR_IN].desc; if (unlikely(iser_inv_desc(desc, rkey))) return -EINVAL; } if (iser_task->dir[ISER_DIR_OUT]) { - desc = iser_task->rdma_reg[ISER_DIR_OUT].mem_h; + desc = iser_task->rdma_reg[ISER_DIR_OUT].desc; if (unlikely(iser_inv_desc(desc, rkey))) return -EINVAL; } @@ -657,8 +633,7 @@ void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc) struct iser_conn *iser_conn = to_iser_conn(ib_conn); struct iser_rx_desc *desc = iser_rx(wc->wr_cqe); struct iscsi_hdr *hdr; - int length; - int outstanding, count, err; + int length, err; if (unlikely(wc->status != IB_WC_SUCCESS)) { iser_err_comp(wc, "task_rsp"); @@ -687,20 +662,9 @@ void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc) desc->dma_addr, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); - /* decrementing conn->post_recv_buf_count only --after-- freeing the * - * task eliminates the need to worry on tasks which are completed in * - * parallel to the execution of iser_conn_term. So the code that waits * - * for the posted rx bufs refcount to become zero handles everything */ - ib_conn->post_recv_buf_count--; - - outstanding = ib_conn->post_recv_buf_count; - if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) { - count = min(iser_conn->qp_max_recv_dtos - outstanding, - iser_conn->min_posted_rx); - err = iser_post_recvm(iser_conn, count); - if (err) - iser_err("posting %d rx bufs err %d\n", count, err); - } + err = iser_post_recvm(iser_conn, desc); + if (err) + iser_err("posting rx buffer err %d\n", err); } void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc) @@ -764,27 +728,16 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task) void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) { - int prot_count = scsi_prot_sg_count(iser_task->sc); if (iser_task->dir[ISER_DIR_IN]) { iser_unreg_mem_fastreg(iser_task, ISER_DIR_IN); - iser_dma_unmap_task_data(iser_task, - &iser_task->data[ISER_DIR_IN], + iser_dma_unmap_task_data(iser_task, ISER_DIR_IN, DMA_FROM_DEVICE); - if (prot_count) - iser_dma_unmap_task_data(iser_task, - &iser_task->prot[ISER_DIR_IN], - DMA_FROM_DEVICE); } if (iser_task->dir[ISER_DIR_OUT]) { iser_unreg_mem_fastreg(iser_task, ISER_DIR_OUT); - iser_dma_unmap_task_data(iser_task, - &iser_task->data[ISER_DIR_OUT], + iser_dma_unmap_task_data(iser_task, ISER_DIR_OUT, DMA_TO_DEVICE); - if (prot_count) - iser_dma_unmap_task_data(iser_task, - &iser_task->prot[ISER_DIR_OUT], - DMA_TO_DEVICE); } } diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 9776b755d848..29ae2c6a250a 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -30,7 +30,6 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include <linux/module.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/mm.h> @@ -44,8 +43,7 @@ void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc) iser_err_comp(wc, "memreg"); } -static struct iser_fr_desc * -iser_reg_desc_get_fr(struct ib_conn *ib_conn) +static struct iser_fr_desc *iser_reg_desc_get_fr(struct ib_conn *ib_conn) { struct iser_fr_pool *fr_pool = &ib_conn->fr_pool; struct iser_fr_desc *desc; @@ -60,9 +58,8 @@ iser_reg_desc_get_fr(struct ib_conn *ib_conn) return desc; } -static void -iser_reg_desc_put_fr(struct ib_conn *ib_conn, - struct iser_fr_desc *desc) +static void iser_reg_desc_put_fr(struct ib_conn *ib_conn, + struct iser_fr_desc *desc) { struct iser_fr_pool *fr_pool = &ib_conn->fr_pool; unsigned long flags; @@ -73,10 +70,10 @@ iser_reg_desc_put_fr(struct ib_conn *ib_conn, } int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, - struct iser_data_buf *data, - enum iser_data_dir iser_dir, - enum dma_data_direction dma_dir) + enum iser_data_dir iser_dir, + enum dma_data_direction dma_dir) { + struct iser_data_buf *data = &iser_task->data[iser_dir]; struct ib_device *dev; iser_task->dir[iser_dir] = 1; @@ -87,22 +84,44 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, iser_err("dma_map_sg failed!!!\n"); return -EINVAL; } + + if (scsi_prot_sg_count(iser_task->sc)) { + struct iser_data_buf *pdata = &iser_task->prot[iser_dir]; + + pdata->dma_nents = ib_dma_map_sg(dev, pdata->sg, pdata->size, dma_dir); + if (unlikely(pdata->dma_nents == 0)) { + iser_err("protection dma_map_sg failed!!!\n"); + goto out_unmap; + } + } + return 0; + +out_unmap: + ib_dma_unmap_sg(dev, data->sg, data->size, dma_dir); + return -EINVAL; } + void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task, - struct iser_data_buf *data, - enum dma_data_direction dir) + enum iser_data_dir iser_dir, + enum dma_data_direction dma_dir) { + struct iser_data_buf *data = &iser_task->data[iser_dir]; struct ib_device *dev; dev = iser_task->iser_conn->ib_conn.device->ib_device; - ib_dma_unmap_sg(dev, data->sg, data->size, dir); + ib_dma_unmap_sg(dev, data->sg, data->size, dma_dir); + + if (scsi_prot_sg_count(iser_task->sc)) { + struct iser_data_buf *pdata = &iser_task->prot[iser_dir]; + + ib_dma_unmap_sg(dev, pdata->sg, pdata->size, dma_dir); + } } -static int -iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem, - struct iser_mem_reg *reg) +static int iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem, + struct iser_mem_reg *reg) { struct scatterlist *sg = mem->sg; @@ -133,7 +152,7 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, struct iser_fr_desc *desc; struct ib_mr_status mr_status; - desc = reg->mem_h; + desc = reg->desc; if (!desc) return; @@ -150,12 +169,12 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, ib_check_mr_status(desc->rsc.sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status); } - iser_reg_desc_put_fr(&iser_task->iser_conn->ib_conn, reg->mem_h); - reg->mem_h = NULL; + iser_reg_desc_put_fr(&iser_task->iser_conn->ib_conn, reg->desc); + reg->desc = NULL; } -static void -iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_domain *domain) +static void iser_set_dif_domain(struct scsi_cmnd *sc, + struct ib_sig_domain *domain) { domain->sig_type = IB_SIG_TYPE_T10_DIF; domain->sig.dif.pi_interval = scsi_prot_interval(sc); @@ -171,8 +190,8 @@ iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_domain *domain) domain->sig.dif.ref_remap = true; } -static int -iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs) +static int iser_set_sig_attrs(struct scsi_cmnd *sc, + struct ib_sig_attrs *sig_attrs) { switch (scsi_get_prot_op(sc)) { case SCSI_PROT_WRITE_INSERT: @@ -205,8 +224,7 @@ iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs) return 0; } -static inline void -iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask) +static inline void iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask) { *mask = 0; if (sc->prot_flags & SCSI_PROT_REF_CHECK) @@ -215,11 +233,8 @@ iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask) *mask |= IB_SIG_CHECK_GUARD; } -static inline void -iser_inv_rkey(struct ib_send_wr *inv_wr, - struct ib_mr *mr, - struct ib_cqe *cqe, - struct ib_send_wr *next_wr) +static inline void iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr, + struct ib_cqe *cqe, struct ib_send_wr *next_wr) { inv_wr->opcode = IB_WR_LOCAL_INV; inv_wr->wr_cqe = cqe; @@ -229,12 +244,11 @@ iser_inv_rkey(struct ib_send_wr *inv_wr, inv_wr->next = next_wr; } -static int -iser_reg_sig_mr(struct iscsi_iser_task *iser_task, - struct iser_data_buf *mem, - struct iser_data_buf *sig_mem, - struct iser_reg_resources *rsc, - struct iser_mem_reg *sig_reg) +static int iser_reg_sig_mr(struct iscsi_iser_task *iser_task, + struct iser_data_buf *mem, + struct iser_data_buf *sig_mem, + struct iser_reg_resources *rsc, + struct iser_mem_reg *sig_reg) { struct iser_tx_desc *tx_desc = &iser_task->desc; struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe; @@ -335,42 +349,26 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, return 0; } -static int -iser_reg_data_sg(struct iscsi_iser_task *task, - struct iser_data_buf *mem, - struct iser_fr_desc *desc, - bool use_dma_key, - struct iser_mem_reg *reg) -{ - struct iser_device *device = task->iser_conn->ib_conn.device; - - if (use_dma_key) - return iser_reg_dma(device, mem, reg); - - return iser_fast_reg_mr(task, mem, &desc->rsc, reg); -} - int iser_reg_mem_fastreg(struct iscsi_iser_task *task, enum iser_data_dir dir, bool all_imm) { struct ib_conn *ib_conn = &task->iser_conn->ib_conn; + struct iser_device *device = ib_conn->device; struct iser_data_buf *mem = &task->data[dir]; struct iser_mem_reg *reg = &task->rdma_reg[dir]; - struct iser_fr_desc *desc = NULL; + struct iser_fr_desc *desc; bool use_dma_key; int err; use_dma_key = mem->dma_nents == 1 && (all_imm || !iser_always_reg) && scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL; + if (use_dma_key) + return iser_reg_dma(device, mem, reg); - if (!use_dma_key) { - desc = iser_reg_desc_get_fr(ib_conn); - reg->mem_h = desc; - } - + desc = iser_reg_desc_get_fr(ib_conn); if (scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL) { - err = iser_reg_data_sg(task, mem, desc, use_dma_key, reg); + err = iser_fast_reg_mr(task, mem, &desc->rsc, reg); if (unlikely(err)) goto err_reg; } else { @@ -382,11 +380,12 @@ int iser_reg_mem_fastreg(struct iscsi_iser_task *task, desc->sig_protected = true; } + reg->desc = desc; + return 0; err_reg: - if (desc) - iser_reg_desc_put_fr(ib_conn, desc); + iser_reg_desc_put_fr(ib_conn, desc); return err; } diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index b566f7cb7797..a00ca117303a 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -32,7 +32,6 @@ * SOFTWARE. */ #include <linux/kernel.h> -#include <linux/module.h> #include <linux/slab.h> #include <linux/delay.h> @@ -116,7 +115,7 @@ iser_create_fastreg_desc(struct iser_device *device, if (!desc) return ERR_PTR(-ENOMEM); - if (ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) + if (ib_dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) mr_type = IB_MR_TYPE_SG_GAPS; else mr_type = IB_MR_TYPE_MEM_REG; @@ -247,6 +246,7 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn) device = ib_conn->device; ib_dev = device->ib_device; + /* +1 for drain */ if (ib_conn->pi_support) max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1; else @@ -265,14 +265,15 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn) memset(&init_attr, 0, sizeof(init_attr)); init_attr.event_handler = iser_qp_event_callback; - init_attr.qp_context = (void *)ib_conn; - init_attr.send_cq = ib_conn->cq; - init_attr.recv_cq = ib_conn->cq; - init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; + init_attr.qp_context = (void *)ib_conn; + init_attr.send_cq = ib_conn->cq; + init_attr.recv_cq = ib_conn->cq; + /* +1 for drain */ + init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS + 1; init_attr.cap.max_send_sge = 2; init_attr.cap.max_recv_sge = 1; - init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; - init_attr.qp_type = IB_QPT_RC; + init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; + init_attr.qp_type = IB_QPT_RC; init_attr.cap.max_send_wr = max_send_wr; if (ib_conn->pi_support) init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN; @@ -283,9 +284,8 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn) goto out_err; ib_conn->qp = ib_conn->cma_id->qp; - iser_info("setting conn %p cma_id %p qp %p max_send_wr %d\n", - ib_conn, ib_conn->cma_id, - ib_conn->cma_id->qp, max_send_wr); + iser_info("setting conn %p cma_id %p qp %p max_send_wr %d\n", ib_conn, + ib_conn->cma_id, ib_conn->cma_id->qp, max_send_wr); return ret; out_err: @@ -313,7 +313,7 @@ struct iser_device *iser_device_find_by_ib_device(struct rdma_cm_id *cma_id) goto inc_refcnt; device = kzalloc(sizeof *device, GFP_KERNEL); - if (device == NULL) + if (!device) goto out; /* assign this device to the device */ @@ -392,8 +392,7 @@ void iser_release_work(struct work_struct *work) * so the cm_id removal is out of here. It is Safe to * be invoked multiple times. */ -static void iser_free_ib_conn_res(struct iser_conn *iser_conn, - bool destroy) +static void iser_free_ib_conn_res(struct iser_conn *iser_conn, bool destroy) { struct ib_conn *ib_conn = &iser_conn->ib_conn; struct iser_device *device = ib_conn->device; @@ -401,7 +400,7 @@ static void iser_free_ib_conn_res(struct iser_conn *iser_conn, iser_info("freeing conn %p cma_id %p qp %p\n", iser_conn, ib_conn->cma_id, ib_conn->qp); - if (ib_conn->qp != NULL) { + if (ib_conn->qp) { rdma_destroy_qp(ib_conn->cma_id); ib_cq_pool_put(ib_conn->cq, ib_conn->cq_size); ib_conn->qp = NULL; @@ -411,7 +410,7 @@ static void iser_free_ib_conn_res(struct iser_conn *iser_conn, if (iser_conn->rx_descs) iser_free_rx_descriptors(iser_conn); - if (device != NULL) { + if (device) { iser_device_try_release(device); ib_conn->device = NULL; } @@ -445,7 +444,7 @@ void iser_conn_release(struct iser_conn *iser_conn) iser_free_ib_conn_res(iser_conn, true); mutex_unlock(&iser_conn->state_mutex); - if (ib_conn->cma_id != NULL) { + if (ib_conn->cma_id) { rdma_destroy_id(ib_conn->cma_id); ib_conn->cma_id = NULL; } @@ -488,7 +487,7 @@ int iser_conn_terminate(struct iser_conn *iser_conn) iser_conn, err); /* block until all flush errors are consumed */ - ib_drain_sq(ib_conn->qp); + ib_drain_qp(ib_conn->qp); } return 1; @@ -501,13 +500,12 @@ static void iser_connect_error(struct rdma_cm_id *cma_id) { struct iser_conn *iser_conn; - iser_conn = (struct iser_conn *)cma_id->context; + iser_conn = cma_id->context; iser_conn->state = ISER_CONN_TERMINATING; } -static void -iser_calc_scsi_params(struct iser_conn *iser_conn, - unsigned int max_sectors) +static void iser_calc_scsi_params(struct iser_conn *iser_conn, + unsigned int max_sectors) { struct iser_device *device = iser_conn->ib_conn.device; struct ib_device_attr *attr = &device->ib_device->attrs; @@ -521,7 +519,7 @@ iser_calc_scsi_params(struct iser_conn *iser_conn, * (head and tail) for a single page worth data, so one additional * entry is required. */ - if (attr->device_cap_flags & IB_DEVICE_SG_GAPS_REG) + if (attr->kernel_cap_flags & IBK_SG_GAPS_REG) reserved_mr_pages = 0; else reserved_mr_pages = 1; @@ -545,11 +543,11 @@ iser_calc_scsi_params(struct iser_conn *iser_conn, static void iser_addr_handler(struct rdma_cm_id *cma_id) { struct iser_device *device; - struct iser_conn *iser_conn; - struct ib_conn *ib_conn; + struct iser_conn *iser_conn; + struct ib_conn *ib_conn; int ret; - iser_conn = (struct iser_conn *)cma_id->context; + iser_conn = cma_id->context; if (iser_conn->state != ISER_CONN_PENDING) /* bailout */ return; @@ -566,8 +564,8 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id) /* connection T10-PI support */ if (iser_pi_enable) { - if (!(device->ib_device->attrs.device_cap_flags & - IB_DEVICE_INTEGRITY_HANDOVER)) { + if (!(device->ib_device->attrs.kernel_cap_flags & + IBK_INTEGRITY_HANDOVER)) { iser_warn("T10-PI requested but not supported on %s, " "continue without T10-PI\n", dev_name(&ib_conn->device->ib_device->dev)); @@ -593,9 +591,9 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id) static void iser_route_handler(struct rdma_cm_id *cma_id) { struct rdma_conn_param conn_param; - int ret; + int ret; struct iser_cm_hdr req_hdr; - struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context; + struct iser_conn *iser_conn = cma_id->context; struct ib_conn *ib_conn = &iser_conn->ib_conn; struct ib_device *ib_dev = ib_conn->device->ib_device; @@ -609,9 +607,9 @@ static void iser_route_handler(struct rdma_cm_id *cma_id) memset(&conn_param, 0, sizeof conn_param); conn_param.responder_resources = ib_dev->attrs.max_qp_rd_atom; - conn_param.initiator_depth = 1; - conn_param.retry_count = 7; - conn_param.rnr_retry_count = 6; + conn_param.initiator_depth = 1; + conn_param.retry_count = 7; + conn_param.rnr_retry_count = 6; memset(&req_hdr, 0, sizeof(req_hdr)); req_hdr.flags = ISER_ZBVA_NOT_SUP; @@ -638,7 +636,7 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id, struct ib_qp_attr attr; struct ib_qp_init_attr init_attr; - iser_conn = (struct iser_conn *)cma_id->context; + iser_conn = cma_id->context; if (iser_conn->state != ISER_CONN_PENDING) /* bailout */ return; @@ -661,7 +659,7 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id, static void iser_disconnected_handler(struct rdma_cm_id *cma_id) { - struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context; + struct iser_conn *iser_conn = cma_id->context; if (iser_conn_terminate(iser_conn)) { if (iser_conn->iscsi_conn) @@ -675,7 +673,7 @@ static void iser_disconnected_handler(struct rdma_cm_id *cma_id) static void iser_cleanup_handler(struct rdma_cm_id *cma_id, bool destroy) { - struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context; + struct iser_conn *iser_conn = cma_id->context; /* * We are not guaranteed that we visited disconnected_handler @@ -687,12 +685,13 @@ static void iser_cleanup_handler(struct rdma_cm_id *cma_id, complete(&iser_conn->ib_completion); } -static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) +static int iser_cma_handler(struct rdma_cm_id *cma_id, + struct rdma_cm_event *event) { struct iser_conn *iser_conn; int ret = 0; - iser_conn = (struct iser_conn *)cma_id->context; + iser_conn = cma_id->context; iser_info("%s (%d): status %d conn %p id %p\n", rdma_event_msg(event->event), event->event, event->status, cma_id->context, cma_id); @@ -757,7 +756,6 @@ void iser_conn_init(struct iser_conn *iser_conn) INIT_LIST_HEAD(&iser_conn->conn_list); mutex_init(&iser_conn->state_mutex); - ib_conn->post_recv_buf_count = 0; ib_conn->reg_cqe.done = iser_reg_comp; } @@ -765,10 +763,8 @@ void iser_conn_init(struct iser_conn *iser_conn) * starts the process of connecting to the target * sleeps until the connection is established or rejected */ -int iser_connect(struct iser_conn *iser_conn, - struct sockaddr *src_addr, - struct sockaddr *dst_addr, - int non_blocking) +int iser_connect(struct iser_conn *iser_conn, struct sockaddr *src_addr, + struct sockaddr *dst_addr, int non_blocking) { struct ib_conn *ib_conn = &iser_conn->ib_conn; int err = 0; @@ -785,8 +781,7 @@ int iser_connect(struct iser_conn *iser_conn, iser_conn->state = ISER_CONN_PENDING; ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler, - (void *)iser_conn, - RDMA_PS_TCP, IB_QPT_RC); + iser_conn, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(ib_conn->cma_id)) { err = PTR_ERR(ib_conn->cma_id); iser_err("rdma_create_id failed: %d\n", err); @@ -829,7 +824,7 @@ int iser_post_recvl(struct iser_conn *iser_conn) struct ib_conn *ib_conn = &iser_conn->ib_conn; struct iser_login_desc *desc = &iser_conn->login_desc; struct ib_recv_wr wr; - int ib_ret; + int ret; desc->sge.addr = desc->rsp_dma; desc->sge.length = ISER_RX_LOGIN_SIZE; @@ -841,46 +836,30 @@ int iser_post_recvl(struct iser_conn *iser_conn) wr.num_sge = 1; wr.next = NULL; - ib_conn->post_recv_buf_count++; - ib_ret = ib_post_recv(ib_conn->qp, &wr, NULL); - if (ib_ret) { - iser_err("ib_post_recv failed ret=%d\n", ib_ret); - ib_conn->post_recv_buf_count--; - } + ret = ib_post_recv(ib_conn->qp, &wr, NULL); + if (unlikely(ret)) + iser_err("ib_post_recv login failed ret=%d\n", ret); - return ib_ret; + return ret; } -int iser_post_recvm(struct iser_conn *iser_conn, int count) +int iser_post_recvm(struct iser_conn *iser_conn, struct iser_rx_desc *rx_desc) { struct ib_conn *ib_conn = &iser_conn->ib_conn; - unsigned int my_rx_head = iser_conn->rx_desc_head; - struct iser_rx_desc *rx_desc; - struct ib_recv_wr *wr; - int i, ib_ret; - - for (wr = ib_conn->rx_wr, i = 0; i < count; i++, wr++) { - rx_desc = &iser_conn->rx_descs[my_rx_head]; - rx_desc->cqe.done = iser_task_rsp; - wr->wr_cqe = &rx_desc->cqe; - wr->sg_list = &rx_desc->rx_sg; - wr->num_sge = 1; - wr->next = wr + 1; - my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask; - } + struct ib_recv_wr wr; + int ret; - wr--; - wr->next = NULL; /* mark end of work requests list */ + rx_desc->cqe.done = iser_task_rsp; + wr.wr_cqe = &rx_desc->cqe; + wr.sg_list = &rx_desc->rx_sg; + wr.num_sge = 1; + wr.next = NULL; - ib_conn->post_recv_buf_count += count; - ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, NULL); - if (unlikely(ib_ret)) { - iser_err("ib_post_recv failed ret=%d\n", ib_ret); - ib_conn->post_recv_buf_count -= count; - } else - iser_conn->rx_desc_head = my_rx_head; + ret = ib_post_recv(ib_conn->qp, &wr, NULL); + if (unlikely(ret)) + iser_err("ib_post_recv failed ret=%d\n", ret); - return ib_ret; + return ret; } @@ -888,16 +867,14 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count) * iser_post_send - Initiate a Send DTO operation * @ib_conn: connection RDMA resources * @tx_desc: iSER TX descriptor - * @signal: true to send work request as SIGNALED * * Return: 0 on success, -1 on failure */ -int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, - bool signal) +int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc) { struct ib_send_wr *wr = &tx_desc->send_wr; struct ib_send_wr *first_wr; - int ib_ret; + int ret; ib_dma_sync_single_for_device(ib_conn->device->ib_device, tx_desc->dma_addr, ISER_HEADERS_LEN, @@ -908,7 +885,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, wr->sg_list = tx_desc->tx_sg; wr->num_sge = tx_desc->num_sge; wr->opcode = IB_WR_SEND; - wr->send_flags = signal ? IB_SEND_SIGNALED : 0; + wr->send_flags = IB_SEND_SIGNALED; if (tx_desc->inv_wr.next) first_wr = &tx_desc->inv_wr; @@ -917,19 +894,19 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, else first_wr = wr; - ib_ret = ib_post_send(ib_conn->qp, first_wr, NULL); - if (unlikely(ib_ret)) + ret = ib_post_send(ib_conn->qp, first_wr, NULL); + if (unlikely(ret)) iser_err("ib_post_send failed, ret:%d opcode:%d\n", - ib_ret, wr->opcode); + ret, wr->opcode); - return ib_ret; + return ret; } u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir, sector_t *sector) { struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir]; - struct iser_fr_desc *desc = reg->mem_h; + struct iser_fr_desc *desc = reg->desc; unsigned long sector_size = iser_task->sc->device->sector_size; struct ib_mr_status mr_status; int ret; diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 636d590765f9..b360a1527cd1 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -42,11 +42,12 @@ MODULE_PARM_DESC(sg_tablesize, static DEFINE_MUTEX(device_list_mutex); static LIST_HEAD(device_list); +static struct workqueue_struct *isert_login_wq; static struct workqueue_struct *isert_comp_wq; static struct workqueue_struct *isert_release_wq; static int -isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd); +isert_put_response(struct iscsit_conn *conn, struct iscsit_cmd *cmd); static int isert_login_post_recv(struct isert_conn *isert_conn); static int @@ -230,7 +231,7 @@ isert_create_device_ib_res(struct isert_device *device) } /* Check signature cap */ - if (ib_dev->attrs.device_cap_flags & IB_DEVICE_INTEGRITY_HANDOVER) + if (ib_dev->attrs.kernel_cap_flags & IBK_INTEGRITY_HANDOVER) device->pi_capable = true; else device->pi_capable = false; @@ -909,7 +910,7 @@ isert_login_post_recv(struct isert_conn *isert_conn) } static int -isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login, +isert_put_login_tx(struct iscsit_conn *conn, struct iscsi_login *login, u32 length) { struct isert_conn *isert_conn = conn->context; @@ -976,7 +977,7 @@ isert_rx_login_req(struct isert_conn *isert_conn) { struct iser_rx_desc *rx_desc = isert_conn->login_desc; int rx_buflen = isert_conn->login_req_len; - struct iscsi_conn *conn = isert_conn->conn; + struct iscsit_conn *conn = isert_conn->conn; struct iscsi_login *login = conn->conn_login; int size; @@ -1017,24 +1018,24 @@ isert_rx_login_req(struct isert_conn *isert_conn) complete(&isert_conn->login_comp); return; } - schedule_delayed_work(&conn->login_work, 0); + queue_delayed_work(isert_login_wq, &conn->login_work, 0); } -static struct iscsi_cmd -*isert_allocate_cmd(struct iscsi_conn *conn, struct iser_rx_desc *rx_desc) +static struct iscsit_cmd +*isert_allocate_cmd(struct iscsit_conn *conn, struct iser_rx_desc *rx_desc) { struct isert_conn *isert_conn = conn->context; struct isert_cmd *isert_cmd; - struct iscsi_cmd *cmd; + struct iscsit_cmd *cmd; cmd = iscsit_allocate_cmd(conn, TASK_INTERRUPTIBLE); if (!cmd) { - isert_err("Unable to allocate iscsi_cmd + isert_cmd\n"); + isert_err("Unable to allocate iscsit_cmd + isert_cmd\n"); return NULL; } isert_cmd = iscsit_priv_cmd(cmd); isert_cmd->conn = isert_conn; - isert_cmd->iscsi_cmd = cmd; + isert_cmd->iscsit_cmd = cmd; isert_cmd->rx_desc = rx_desc; return cmd; @@ -1042,10 +1043,10 @@ static struct iscsi_cmd static int isert_handle_scsi_cmd(struct isert_conn *isert_conn, - struct isert_cmd *isert_cmd, struct iscsi_cmd *cmd, + struct isert_cmd *isert_cmd, struct iscsit_cmd *cmd, struct iser_rx_desc *rx_desc, unsigned char *buf) { - struct iscsi_conn *conn = isert_conn->conn; + struct iscsit_conn *conn = isert_conn->conn; struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)buf; int imm_data, imm_data_len, unsol_data, sg_nents, rc; bool dump_payload = false; @@ -1114,8 +1115,8 @@ isert_handle_iscsi_dataout(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, unsigned char *buf) { struct scatterlist *sg_start; - struct iscsi_conn *conn = isert_conn->conn; - struct iscsi_cmd *cmd = NULL; + struct iscsit_conn *conn = isert_conn->conn; + struct iscsit_cmd *cmd = NULL; struct iscsi_data *hdr = (struct iscsi_data *)buf; u32 unsol_data_len = ntoh24(hdr->dlength); int rc, sg_nents, sg_off, page_off; @@ -1171,10 +1172,10 @@ isert_handle_iscsi_dataout(struct isert_conn *isert_conn, static int isert_handle_nop_out(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, - struct iscsi_cmd *cmd, struct iser_rx_desc *rx_desc, + struct iscsit_cmd *cmd, struct iser_rx_desc *rx_desc, unsigned char *buf) { - struct iscsi_conn *conn = isert_conn->conn; + struct iscsit_conn *conn = isert_conn->conn; struct iscsi_nopout *hdr = (struct iscsi_nopout *)buf; int rc; @@ -1190,10 +1191,10 @@ isert_handle_nop_out(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, static int isert_handle_text_cmd(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, - struct iscsi_cmd *cmd, struct iser_rx_desc *rx_desc, + struct iscsit_cmd *cmd, struct iser_rx_desc *rx_desc, struct iscsi_text *hdr) { - struct iscsi_conn *conn = isert_conn->conn; + struct iscsit_conn *conn = isert_conn->conn; u32 payload_length = ntoh24(hdr->dlength); int rc; unsigned char *text_in = NULL; @@ -1220,8 +1221,8 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, uint32_t write_stag, uint64_t write_va) { struct iscsi_hdr *hdr = isert_get_iscsi_hdr(rx_desc); - struct iscsi_conn *conn = isert_conn->conn; - struct iscsi_cmd *cmd; + struct iscsit_conn *conn = isert_conn->conn; + struct iscsit_cmd *cmd; struct isert_cmd *isert_cmd; int ret = -EINVAL; u8 opcode = (hdr->opcode & ISCSI_OPCODE_MASK); @@ -1404,7 +1405,7 @@ isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc) static void isert_rdma_rw_ctx_destroy(struct isert_cmd *cmd, struct isert_conn *conn) { - struct se_cmd *se_cmd = &cmd->iscsi_cmd->se_cmd; + struct se_cmd *se_cmd = &cmd->iscsit_cmd->se_cmd; enum dma_data_direction dir = target_reverse_dma_direction(se_cmd); if (!cmd->rw.nr_ops) @@ -1426,9 +1427,9 @@ isert_rdma_rw_ctx_destroy(struct isert_cmd *cmd, struct isert_conn *conn) static void isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err) { - struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; + struct iscsit_cmd *cmd = isert_cmd->iscsit_cmd; struct isert_conn *isert_conn = isert_cmd->conn; - struct iscsi_conn *conn = isert_conn->conn; + struct iscsit_conn *conn = isert_conn->conn; struct iscsi_text_rsp *hdr; isert_dbg("Cmd %p\n", isert_cmd); @@ -1575,7 +1576,7 @@ isert_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) struct isert_device *device = isert_conn->device; struct iser_tx_desc *desc = cqe_to_tx_desc(wc->wr_cqe); struct isert_cmd *isert_cmd = tx_desc_to_cmd(desc); - struct se_cmd *cmd = &isert_cmd->iscsi_cmd->se_cmd; + struct se_cmd *cmd = &isert_cmd->iscsit_cmd->se_cmd; int ret = 0; if (unlikely(wc->status != IB_WC_SUCCESS)) { @@ -1604,7 +1605,7 @@ isert_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) /* * XXX: isert_put_response() failure is not retried. */ - ret = isert_put_response(isert_conn->conn, isert_cmd->iscsi_cmd); + ret = isert_put_response(isert_conn->conn, isert_cmd->iscsit_cmd); if (ret) pr_warn_ratelimited("isert_put_response() ret: %d\n", ret); } @@ -1617,7 +1618,7 @@ isert_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc) struct isert_device *device = isert_conn->device; struct iser_tx_desc *desc = cqe_to_tx_desc(wc->wr_cqe); struct isert_cmd *isert_cmd = tx_desc_to_cmd(desc); - struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; + struct iscsit_cmd *cmd = isert_cmd->iscsit_cmd; struct se_cmd *se_cmd = &cmd->se_cmd; int ret = 0; @@ -1662,7 +1663,7 @@ isert_do_control_comp(struct work_struct *work) struct isert_cmd, comp_work); struct isert_conn *isert_conn = isert_cmd->conn; struct ib_device *ib_dev = isert_conn->cm_id->device; - struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; + struct iscsit_cmd *cmd = isert_cmd->iscsit_cmd; isert_dbg("Cmd %p i_state %d\n", isert_cmd, cmd->i_state); @@ -1720,7 +1721,7 @@ isert_send_done(struct ib_cq *cq, struct ib_wc *wc) isert_dbg("Cmd %p\n", isert_cmd); - switch (isert_cmd->iscsi_cmd->i_state) { + switch (isert_cmd->iscsit_cmd->i_state) { case ISTATE_SEND_TASKMGTRSP: case ISTATE_SEND_LOGOUTRSP: case ISTATE_SEND_REJECT: @@ -1731,7 +1732,7 @@ isert_send_done(struct ib_cq *cq, struct ib_wc *wc) queue_work(isert_comp_wq, &isert_cmd->comp_work); return; default: - isert_cmd->iscsi_cmd->i_state = ISTATE_SENT_STATUS; + isert_cmd->iscsit_cmd->i_state = ISTATE_SENT_STATUS; isert_completion_put(tx_desc, isert_cmd, ib_dev, false); break; } @@ -1755,7 +1756,7 @@ isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd) } static int -isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd) +isert_put_response(struct iscsit_conn *conn, struct iscsit_cmd *cmd) { struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = conn->context; @@ -1806,7 +1807,7 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd) } static void -isert_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd) +isert_aborted_task(struct iscsit_conn *conn, struct iscsit_cmd *cmd) { struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = conn->context; @@ -1822,7 +1823,7 @@ isert_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd) } static enum target_prot_op -isert_get_sup_prot_ops(struct iscsi_conn *conn) +isert_get_sup_prot_ops(struct iscsit_conn *conn) { struct isert_conn *isert_conn = conn->context; struct isert_device *device = isert_conn->device; @@ -1842,7 +1843,7 @@ isert_get_sup_prot_ops(struct iscsi_conn *conn) } static int -isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn, +isert_put_nopin(struct iscsit_cmd *cmd, struct iscsit_conn *conn, bool nopout_response) { struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); @@ -1862,7 +1863,7 @@ isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn, } static int -isert_put_logout_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) +isert_put_logout_rsp(struct iscsit_cmd *cmd, struct iscsit_conn *conn) { struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = conn->context; @@ -1880,7 +1881,7 @@ isert_put_logout_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) } static int -isert_put_tm_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) +isert_put_tm_rsp(struct iscsit_cmd *cmd, struct iscsit_conn *conn) { struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = conn->context; @@ -1898,7 +1899,7 @@ isert_put_tm_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) } static int -isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn) +isert_put_reject(struct iscsit_cmd *cmd, struct iscsit_conn *conn) { struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = conn->context; @@ -1933,7 +1934,7 @@ isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn) } static int -isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) +isert_put_text_rsp(struct iscsit_cmd *cmd, struct iscsit_conn *conn) { struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = conn->context; @@ -2035,7 +2036,7 @@ static int isert_rdma_rw_ctx_post(struct isert_cmd *cmd, struct isert_conn *conn, struct ib_cqe *cqe, struct ib_send_wr *chain_wr) { - struct se_cmd *se_cmd = &cmd->iscsi_cmd->se_cmd; + struct se_cmd *se_cmd = &cmd->iscsit_cmd->se_cmd; enum dma_data_direction dir = target_reverse_dma_direction(se_cmd); u8 port_num = conn->cm_id->port_num; u64 addr; @@ -2048,7 +2049,7 @@ isert_rdma_rw_ctx_post(struct isert_cmd *cmd, struct isert_conn *conn, if (dir == DMA_FROM_DEVICE) { addr = cmd->write_va; rkey = cmd->write_stag; - offset = cmd->iscsi_cmd->write_data_done; + offset = cmd->iscsit_cmd->write_data_done; } else { addr = cmd->read_va; rkey = cmd->read_stag; @@ -2088,7 +2089,7 @@ rdma_ctx_post: } static int -isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd) +isert_put_datain(struct iscsit_conn *conn, struct iscsit_cmd *cmd) { struct se_cmd *se_cmd = &cmd->se_cmd; struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); @@ -2129,7 +2130,7 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd) } static int -isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) +isert_get_dataout(struct iscsit_conn *conn, struct iscsit_cmd *cmd, bool recovery) { struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); int ret; @@ -2147,7 +2148,7 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) } static int -isert_immediate_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state) +isert_immediate_queue(struct iscsit_conn *conn, struct iscsit_cmd *cmd, int state) { struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); int ret = 0; @@ -2172,7 +2173,7 @@ isert_immediate_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state) } static int -isert_response_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state) +isert_response_queue(struct iscsit_conn *conn, struct iscsit_cmd *cmd, int state) { struct isert_conn *isert_conn = conn->context; int ret; @@ -2332,7 +2333,7 @@ isert_rdma_accept(struct isert_conn *isert_conn) } static int -isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login) +isert_get_login_rx(struct iscsit_conn *conn, struct iscsi_login *login) { struct isert_conn *isert_conn = conn->context; int ret; @@ -2348,9 +2349,9 @@ isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login) /* * For login requests after the first PDU, isert_rx_login_req() will - * kick schedule_delayed_work(&conn->login_work) as the packet is - * received, which turns this callback from iscsi_target_do_login_rx() - * into a NOP. + * kick queue_delayed_work(isert_login_wq, &conn->login_work) as + * the packet is received, which turns this callback from + * iscsi_target_do_login_rx() into a NOP. */ if (!login->first_request) return 0; @@ -2368,7 +2369,7 @@ isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login) } static void -isert_set_conn_info(struct iscsi_np *np, struct iscsi_conn *conn, +isert_set_conn_info(struct iscsi_np *np, struct iscsit_conn *conn, struct isert_conn *isert_conn) { struct rdma_cm_id *cm_id = isert_conn->cm_id; @@ -2381,7 +2382,7 @@ isert_set_conn_info(struct iscsi_np *np, struct iscsi_conn *conn, } static int -isert_accept_np(struct iscsi_np *np, struct iscsi_conn *conn) +isert_accept_np(struct iscsi_np *np, struct iscsit_conn *conn) { struct isert_np *isert_np = np->np_context; struct isert_conn *isert_conn; @@ -2489,7 +2490,7 @@ static void isert_release_work(struct work_struct *work) static void isert_wait4logout(struct isert_conn *isert_conn) { - struct iscsi_conn *conn = isert_conn->conn; + struct iscsit_conn *conn = isert_conn->conn; isert_info("conn %p\n", isert_conn); @@ -2501,9 +2502,9 @@ isert_wait4logout(struct isert_conn *isert_conn) } static void -isert_wait4cmds(struct iscsi_conn *conn) +isert_wait4cmds(struct iscsit_conn *conn) { - isert_info("iscsi_conn %p\n", conn); + isert_info("iscsit_conn %p\n", conn); if (conn->sess) { target_stop_session(conn->sess->se_sess); @@ -2521,9 +2522,9 @@ isert_wait4cmds(struct iscsi_conn *conn) * before blocking on the target_wait_for_session_cmds */ static void -isert_put_unsol_pending_cmds(struct iscsi_conn *conn) +isert_put_unsol_pending_cmds(struct iscsit_conn *conn) { - struct iscsi_cmd *cmd, *tmp; + struct iscsit_cmd *cmd, *tmp; static LIST_HEAD(drop_cmd_list); spin_lock_bh(&conn->cmd_lock); @@ -2546,7 +2547,7 @@ isert_put_unsol_pending_cmds(struct iscsi_conn *conn) } } -static void isert_wait_conn(struct iscsi_conn *conn) +static void isert_wait_conn(struct iscsit_conn *conn) { struct isert_conn *isert_conn = conn->context; @@ -2564,7 +2565,7 @@ static void isert_wait_conn(struct iscsi_conn *conn) queue_work(isert_release_wq, &isert_conn->release_work); } -static void isert_free_conn(struct iscsi_conn *conn) +static void isert_free_conn(struct iscsit_conn *conn) { struct isert_conn *isert_conn = conn->context; @@ -2572,7 +2573,7 @@ static void isert_free_conn(struct iscsi_conn *conn) isert_put_conn(isert_conn); } -static void isert_get_rx_pdu(struct iscsi_conn *conn) +static void isert_get_rx_pdu(struct iscsit_conn *conn) { struct completion comp; @@ -2606,20 +2607,23 @@ static struct iscsit_transport iser_target_transport = { static int __init isert_init(void) { - int ret; + isert_login_wq = alloc_workqueue("isert_login_wq", 0, 0); + if (!isert_login_wq) { + isert_err("Unable to allocate isert_login_wq\n"); + return -ENOMEM; + } isert_comp_wq = alloc_workqueue("isert_comp_wq", WQ_UNBOUND | WQ_HIGHPRI, 0); if (!isert_comp_wq) { isert_err("Unable to allocate isert_comp_wq\n"); - return -ENOMEM; + goto destroy_login_wq; } isert_release_wq = alloc_workqueue("isert_release_wq", WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE); if (!isert_release_wq) { isert_err("Unable to allocate isert_release_wq\n"); - ret = -ENOMEM; goto destroy_comp_wq; } @@ -2630,17 +2634,20 @@ static int __init isert_init(void) destroy_comp_wq: destroy_workqueue(isert_comp_wq); +destroy_login_wq: + destroy_workqueue(isert_login_wq); - return ret; + return -ENOMEM; } static void __exit isert_exit(void) { - flush_scheduled_work(); + flush_workqueue(isert_login_wq); destroy_workqueue(isert_release_wq); destroy_workqueue(isert_comp_wq); iscsit_unregister_transport(&iser_target_transport); isert_info("iSER_TARGET[0] - Released iser_target_transport\n"); + destroy_workqueue(isert_login_wq); } MODULE_DESCRIPTION("iSER-Target for mainline target infrastructure"); diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index ca8cfebe26ca..0b2dfd6e7e27 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -146,7 +146,7 @@ struct isert_cmd { u64 pdu_buf_dma; u32 pdu_buf_len; struct isert_conn *conn; - struct iscsi_cmd *iscsi_cmd; + struct iscsit_cmd *iscsit_cmd; struct iser_tx_desc tx_desc; struct iser_rx_desc *rx_desc; struct rdma_rw_ctx rw; @@ -173,7 +173,7 @@ struct isert_conn { u64 login_rsp_dma; struct iser_rx_desc *rx_descs; struct ib_recv_wr rx_wr[ISERT_QP_MAX_RECV_DTOS]; - struct iscsi_conn *conn; + struct iscsit_conn *conn; struct list_head node; struct completion login_comp; struct completion login_req_comp; diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c index 42d557dff19d..29b3d8fce3f5 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c @@ -124,8 +124,8 @@ static struct vnic_stats vnic_gstrings_stats[] = { static void vnic_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { - strlcpy(drvinfo->driver, opa_vnic_driver_name, sizeof(drvinfo->driver)); - strlcpy(drvinfo->bus_info, dev_name(netdev->dev.parent), + strscpy(drvinfo->driver, opa_vnic_driver_name, sizeof(drvinfo->driver)); + strscpy(drvinfo->bus_info, dev_name(netdev->dev.parent), sizeof(drvinfo->bus_info)); } diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c index aeff68f582d3..071f35711468 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c @@ -50,7 +50,6 @@ * netdev functionality. */ -#include <linux/module.h> #include <linux/if_vlan.h> #include <linux/crc32.h> diff --git a/drivers/infiniband/ulp/rtrs/Makefile b/drivers/infiniband/ulp/rtrs/Makefile index 3898509be270..5227e7788e1f 100644 --- a/drivers/infiniband/ulp/rtrs/Makefile +++ b/drivers/infiniband/ulp/rtrs/Makefile @@ -1,12 +1,18 @@ # SPDX-License-Identifier: GPL-2.0-or-later +CFLAGS_rtrs-clt-trace.o = -I$(src) + rtrs-client-y := rtrs-clt.o \ rtrs-clt-stats.o \ - rtrs-clt-sysfs.o + rtrs-clt-sysfs.o \ + rtrs-clt-trace.o + +CFLAGS_rtrs-srv-trace.o = -I$(src) rtrs-server-y := rtrs-srv.o \ rtrs-srv-stats.o \ - rtrs-srv-sysfs.o + rtrs-srv-sysfs.o \ + rtrs-srv-trace.o rtrs-core-y := rtrs.o diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c index 76e4352fe3f6..1e6ffafa2db3 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c @@ -13,8 +13,8 @@ void rtrs_clt_update_wc_stats(struct rtrs_clt_con *con) { - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); - struct rtrs_clt_stats *stats = sess->stats; + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); + struct rtrs_clt_stats *stats = clt_path->stats; struct rtrs_clt_stats_pcpu *s; int cpu; @@ -32,11 +32,7 @@ void rtrs_clt_update_wc_stats(struct rtrs_clt_con *con) void rtrs_clt_inc_failover_cnt(struct rtrs_clt_stats *stats) { - struct rtrs_clt_stats_pcpu *s; - - s = get_cpu_ptr(stats->pcpu_stats); - s->rdma.failover_cnt++; - put_cpu_ptr(stats->pcpu_stats); + this_cpu_inc(stats->pcpu_stats->rdma.failover_cnt); } int rtrs_clt_stats_migration_from_cnt_to_str(struct rtrs_clt_stats *stats, char *buf) @@ -169,19 +165,15 @@ int rtrs_clt_reset_all_stats(struct rtrs_clt_stats *s, bool enable) static inline void rtrs_clt_update_rdma_stats(struct rtrs_clt_stats *stats, size_t size, int d) { - struct rtrs_clt_stats_pcpu *s; - - s = get_cpu_ptr(stats->pcpu_stats); - s->rdma.dir[d].cnt++; - s->rdma.dir[d].size_total += size; - put_cpu_ptr(stats->pcpu_stats); + this_cpu_inc(stats->pcpu_stats->rdma.dir[d].cnt); + this_cpu_add(stats->pcpu_stats->rdma.dir[d].size_total, size); } void rtrs_clt_update_all_stats(struct rtrs_clt_io_req *req, int dir) { struct rtrs_clt_con *con = req->con; - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); - struct rtrs_clt_stats *stats = sess->stats; + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); + struct rtrs_clt_stats *stats = clt_path->stats; unsigned int len; len = req->usr_len + req->data_len; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c index 0e69180c3771..d3c436ead694 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c @@ -16,21 +16,21 @@ #define MIN_MAX_RECONN_ATT -1 #define MAX_MAX_RECONN_ATT 9999 -static void rtrs_clt_sess_release(struct kobject *kobj) +static void rtrs_clt_path_release(struct kobject *kobj) { - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; - sess = container_of(kobj, struct rtrs_clt_sess, kobj); + clt_path = container_of(kobj, struct rtrs_clt_path, kobj); - free_sess(sess); + free_path(clt_path); } static struct kobj_type ktype_sess = { .sysfs_ops = &kobj_sysfs_ops, - .release = rtrs_clt_sess_release + .release = rtrs_clt_path_release }; -static void rtrs_clt_sess_stats_release(struct kobject *kobj) +static void rtrs_clt_path_stats_release(struct kobject *kobj) { struct rtrs_clt_stats *stats; @@ -43,14 +43,15 @@ static void rtrs_clt_sess_stats_release(struct kobject *kobj) static struct kobj_type ktype_stats = { .sysfs_ops = &kobj_sysfs_ops, - .release = rtrs_clt_sess_stats_release, + .release = rtrs_clt_path_stats_release, }; static ssize_t max_reconnect_attempts_show(struct device *dev, struct device_attribute *attr, char *page) { - struct rtrs_clt *clt = container_of(dev, struct rtrs_clt, dev); + struct rtrs_clt_sess *clt = container_of(dev, struct rtrs_clt_sess, + dev); return sysfs_emit(page, "%d\n", rtrs_clt_get_max_reconnect_attempts(clt)); @@ -63,7 +64,8 @@ static ssize_t max_reconnect_attempts_store(struct device *dev, { int value; int ret; - struct rtrs_clt *clt = container_of(dev, struct rtrs_clt, dev); + struct rtrs_clt_sess *clt = container_of(dev, struct rtrs_clt_sess, + dev); ret = kstrtoint(buf, 10, &value); if (ret) { @@ -90,9 +92,9 @@ static ssize_t mpath_policy_show(struct device *dev, struct device_attribute *attr, char *page) { - struct rtrs_clt *clt; + struct rtrs_clt_sess *clt; - clt = container_of(dev, struct rtrs_clt, dev); + clt = container_of(dev, struct rtrs_clt_sess, dev); switch (clt->mp_policy) { case MP_POLICY_RR: @@ -114,12 +116,12 @@ static ssize_t mpath_policy_store(struct device *dev, const char *buf, size_t count) { - struct rtrs_clt *clt; + struct rtrs_clt_sess *clt; int value; int ret; size_t len = 0; - clt = container_of(dev, struct rtrs_clt, dev); + clt = container_of(dev, struct rtrs_clt_sess, dev); ret = kstrtoint(buf, 10, &value); if (!ret && (value == MP_POLICY_RR || @@ -154,8 +156,7 @@ static DEVICE_ATTR_RW(mpath_policy); static ssize_t add_path_show(struct device *dev, struct device_attribute *attr, char *page) { - return sysfs_emit( - page, + return sysfs_emit(page, "Usage: echo [<source addr>@]<destination addr> > %s\n\n*addr ::= [ ip:<ipv4|ipv6> | gid:<gid> ]\n", attr->attr.name); } @@ -169,12 +170,12 @@ static ssize_t add_path_store(struct device *dev, .src = &srcaddr, .dst = &dstaddr }; - struct rtrs_clt *clt; + struct rtrs_clt_sess *clt; const char *nl; size_t len; int err; - clt = container_of(dev, struct rtrs_clt, dev); + clt = container_of(dev, struct rtrs_clt_sess, dev); nl = strchr(buf, '\n'); if (nl) @@ -197,10 +198,10 @@ static DEVICE_ATTR_RW(add_path); static ssize_t rtrs_clt_state_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; - sess = container_of(kobj, struct rtrs_clt_sess, kobj); - if (sess->state == RTRS_CLT_CONNECTED) + clt_path = container_of(kobj, struct rtrs_clt_path, kobj); + if (clt_path->state == RTRS_CLT_CONNECTED) return sysfs_emit(page, "connected\n"); return sysfs_emit(page, "disconnected\n"); @@ -219,16 +220,16 @@ static ssize_t rtrs_clt_reconnect_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; int ret; - sess = container_of(kobj, struct rtrs_clt_sess, kobj); + clt_path = container_of(kobj, struct rtrs_clt_path, kobj); if (!sysfs_streq(buf, "1")) { - rtrs_err(sess->clt, "%s: unknown value: '%s'\n", + rtrs_err(clt_path->clt, "%s: unknown value: '%s'\n", attr->attr.name, buf); return -EINVAL; } - ret = rtrs_clt_reconnect_from_sysfs(sess); + ret = rtrs_clt_reconnect_from_sysfs(clt_path); if (ret) return ret; @@ -249,15 +250,15 @@ static ssize_t rtrs_clt_disconnect_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; - sess = container_of(kobj, struct rtrs_clt_sess, kobj); + clt_path = container_of(kobj, struct rtrs_clt_path, kobj); if (!sysfs_streq(buf, "1")) { - rtrs_err(sess->clt, "%s: unknown value: '%s'\n", + rtrs_err(clt_path->clt, "%s: unknown value: '%s'\n", attr->attr.name, buf); return -EINVAL; } - rtrs_clt_close_conns(sess, true); + rtrs_clt_close_conns(clt_path, true); return count; } @@ -276,16 +277,16 @@ static ssize_t rtrs_clt_remove_path_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; int ret; - sess = container_of(kobj, struct rtrs_clt_sess, kobj); + clt_path = container_of(kobj, struct rtrs_clt_path, kobj); if (!sysfs_streq(buf, "1")) { - rtrs_err(sess->clt, "%s: unknown value: '%s'\n", + rtrs_err(clt_path->clt, "%s: unknown value: '%s'\n", attr->attr.name, buf); return -EINVAL; } - ret = rtrs_clt_remove_path_from_sysfs(sess, &attr->attr); + ret = rtrs_clt_remove_path_from_sysfs(clt_path, &attr->attr); if (ret) return ret; @@ -333,11 +334,11 @@ static ssize_t rtrs_clt_hca_port_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; - sess = container_of(kobj, typeof(*sess), kobj); + clt_path = container_of(kobj, typeof(*clt_path), kobj); - return sysfs_emit(page, "%u\n", sess->hca_port); + return sysfs_emit(page, "%u\n", clt_path->hca_port); } static struct kobj_attribute rtrs_clt_hca_port_attr = @@ -347,11 +348,11 @@ static ssize_t rtrs_clt_hca_name_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; - sess = container_of(kobj, struct rtrs_clt_sess, kobj); + clt_path = container_of(kobj, struct rtrs_clt_path, kobj); - return sysfs_emit(page, "%s\n", sess->hca_name); + return sysfs_emit(page, "%s\n", clt_path->hca_name); } static struct kobj_attribute rtrs_clt_hca_name_attr = @@ -361,12 +362,12 @@ static ssize_t rtrs_clt_cur_latency_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; - sess = container_of(kobj, struct rtrs_clt_sess, kobj); + clt_path = container_of(kobj, struct rtrs_clt_path, kobj); return sysfs_emit(page, "%lld ns\n", - ktime_to_ns(sess->s.hb_cur_latency)); + ktime_to_ns(clt_path->s.hb_cur_latency)); } static struct kobj_attribute rtrs_clt_cur_latency_attr = @@ -376,11 +377,11 @@ static ssize_t rtrs_clt_src_addr_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; int len; - sess = container_of(kobj, struct rtrs_clt_sess, kobj); - len = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, page, + clt_path = container_of(kobj, struct rtrs_clt_path, kobj); + len = sockaddr_to_str((struct sockaddr *)&clt_path->s.src_addr, page, PAGE_SIZE); len += sysfs_emit_at(page, len, "\n"); return len; @@ -393,11 +394,11 @@ static ssize_t rtrs_clt_dst_addr_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; int len; - sess = container_of(kobj, struct rtrs_clt_sess, kobj); - len = sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, page, + clt_path = container_of(kobj, struct rtrs_clt_path, kobj); + len = sockaddr_to_str((struct sockaddr *)&clt_path->s.dst_addr, page, PAGE_SIZE); len += sysfs_emit_at(page, len, "\n"); return len; @@ -406,7 +407,7 @@ static ssize_t rtrs_clt_dst_addr_show(struct kobject *kobj, static struct kobj_attribute rtrs_clt_dst_addr_attr = __ATTR(dst_addr, 0444, rtrs_clt_dst_addr_show, NULL); -static struct attribute *rtrs_clt_sess_attrs[] = { +static struct attribute *rtrs_clt_path_attrs[] = { &rtrs_clt_hca_name_attr.attr, &rtrs_clt_hca_port_attr.attr, &rtrs_clt_src_addr_attr.attr, @@ -419,42 +420,43 @@ static struct attribute *rtrs_clt_sess_attrs[] = { NULL, }; -static const struct attribute_group rtrs_clt_sess_attr_group = { - .attrs = rtrs_clt_sess_attrs, +static const struct attribute_group rtrs_clt_path_attr_group = { + .attrs = rtrs_clt_path_attrs, }; -int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess) +int rtrs_clt_create_path_files(struct rtrs_clt_path *clt_path) { - struct rtrs_clt *clt = sess->clt; + struct rtrs_clt_sess *clt = clt_path->clt; char str[NAME_MAX]; int err; struct rtrs_addr path = { - .src = &sess->s.src_addr, - .dst = &sess->s.dst_addr, + .src = &clt_path->s.src_addr, + .dst = &clt_path->s.dst_addr, }; rtrs_addr_to_str(&path, str, sizeof(str)); - err = kobject_init_and_add(&sess->kobj, &ktype_sess, clt->kobj_paths, + err = kobject_init_and_add(&clt_path->kobj, &ktype_sess, + clt->kobj_paths, "%s", str); if (err) { pr_err("kobject_init_and_add: %d\n", err); - kobject_put(&sess->kobj); + kobject_put(&clt_path->kobj); return err; } - err = sysfs_create_group(&sess->kobj, &rtrs_clt_sess_attr_group); + err = sysfs_create_group(&clt_path->kobj, &rtrs_clt_path_attr_group); if (err) { pr_err("sysfs_create_group(): %d\n", err); goto put_kobj; } - err = kobject_init_and_add(&sess->stats->kobj_stats, &ktype_stats, - &sess->kobj, "stats"); + err = kobject_init_and_add(&clt_path->stats->kobj_stats, &ktype_stats, + &clt_path->kobj, "stats"); if (err) { pr_err("kobject_init_and_add: %d\n", err); - kobject_put(&sess->stats->kobj_stats); + kobject_put(&clt_path->stats->kobj_stats); goto remove_group; } - err = sysfs_create_group(&sess->stats->kobj_stats, + err = sysfs_create_group(&clt_path->stats->kobj_stats, &rtrs_clt_stats_attr_group); if (err) { pr_err("failed to create stats sysfs group, err: %d\n", err); @@ -464,25 +466,25 @@ int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess) return 0; put_kobj_stats: - kobject_del(&sess->stats->kobj_stats); - kobject_put(&sess->stats->kobj_stats); + kobject_del(&clt_path->stats->kobj_stats); + kobject_put(&clt_path->stats->kobj_stats); remove_group: - sysfs_remove_group(&sess->kobj, &rtrs_clt_sess_attr_group); + sysfs_remove_group(&clt_path->kobj, &rtrs_clt_path_attr_group); put_kobj: - kobject_del(&sess->kobj); - kobject_put(&sess->kobj); + kobject_del(&clt_path->kobj); + kobject_put(&clt_path->kobj); return err; } -void rtrs_clt_destroy_sess_files(struct rtrs_clt_sess *sess, +void rtrs_clt_destroy_path_files(struct rtrs_clt_path *clt_path, const struct attribute *sysfs_self) { - kobject_del(&sess->stats->kobj_stats); - kobject_put(&sess->stats->kobj_stats); + kobject_del(&clt_path->stats->kobj_stats); + kobject_put(&clt_path->stats->kobj_stats); if (sysfs_self) - sysfs_remove_file_self(&sess->kobj, sysfs_self); - kobject_del(&sess->kobj); + sysfs_remove_file_self(&clt_path->kobj, sysfs_self); + kobject_del(&clt_path->kobj); } static struct attribute *rtrs_clt_attrs[] = { @@ -496,12 +498,12 @@ static const struct attribute_group rtrs_clt_attr_group = { .attrs = rtrs_clt_attrs, }; -int rtrs_clt_create_sysfs_root_files(struct rtrs_clt *clt) +int rtrs_clt_create_sysfs_root_files(struct rtrs_clt_sess *clt) { return sysfs_create_group(&clt->dev.kobj, &rtrs_clt_attr_group); } -void rtrs_clt_destroy_sysfs_root(struct rtrs_clt *clt) +void rtrs_clt_destroy_sysfs_root(struct rtrs_clt_sess *clt) { sysfs_remove_group(&clt->dev.kobj, &rtrs_clt_attr_group); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-trace.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-trace.c new file mode 100644 index 000000000000..f14fa1f36ce8 --- /dev/null +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-trace.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * RDMA Network Block Driver + * + * Copyright (c) 2022 1&1 IONOS SE. All rights reserved. + */ +#include "rtrs.h" +#include "rtrs-clt.h" + +/* + * We include this last to have the helpers above available for the trace + * event implementations. + */ +#define CREATE_TRACE_POINTS +#include "rtrs-clt-trace.h" diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-trace.h b/drivers/infiniband/ulp/rtrs/rtrs-clt-trace.h new file mode 100644 index 000000000000..7738e2676855 --- /dev/null +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-trace.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * RDMA Network Block Driver + * + * Copyright (c) 2022 1&1 IONOS SE. All rights reserved. + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rtrs_clt + +#if !defined(_TRACE_RTRS_CLT_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_RTRS_CLT_H + +#include <linux/tracepoint.h> + +struct rtrs_clt_path; +struct rtrs_clt_sess; + +TRACE_DEFINE_ENUM(RTRS_CLT_CONNECTING); +TRACE_DEFINE_ENUM(RTRS_CLT_CONNECTING_ERR); +TRACE_DEFINE_ENUM(RTRS_CLT_RECONNECTING); +TRACE_DEFINE_ENUM(RTRS_CLT_CONNECTED); +TRACE_DEFINE_ENUM(RTRS_CLT_CLOSING); +TRACE_DEFINE_ENUM(RTRS_CLT_CLOSED); +TRACE_DEFINE_ENUM(RTRS_CLT_DEAD); + +#define show_rtrs_clt_state(x) \ + __print_symbolic(x, \ + { RTRS_CLT_CONNECTING, "CONNECTING" }, \ + { RTRS_CLT_CONNECTING_ERR, "CONNECTING_ERR" }, \ + { RTRS_CLT_RECONNECTING, "RECONNECTING" }, \ + { RTRS_CLT_CONNECTED, "CONNECTED" }, \ + { RTRS_CLT_CLOSING, "CLOSING" }, \ + { RTRS_CLT_CLOSED, "CLOSED" }, \ + { RTRS_CLT_DEAD, "DEAD" }) + +DECLARE_EVENT_CLASS(rtrs_clt_conn_class, + TP_PROTO(struct rtrs_clt_path *clt_path), + + TP_ARGS(clt_path), + + TP_STRUCT__entry( + __field(int, state) + __field(int, reconnect_attempts) + __field(int, max_reconnect_attempts) + __field(int, fail_cnt) + __field(int, success_cnt) + __array(char, sessname, NAME_MAX) + ), + + TP_fast_assign( + struct rtrs_clt_sess *clt = clt_path->clt; + + __entry->state = clt_path->state; + __entry->reconnect_attempts = clt_path->reconnect_attempts; + __entry->max_reconnect_attempts = clt->max_reconnect_attempts; + __entry->fail_cnt = clt_path->stats->reconnects.fail_cnt; + __entry->success_cnt = clt_path->stats->reconnects.successful_cnt; + memcpy(__entry->sessname, kobject_name(&clt_path->kobj), NAME_MAX); + ), + + TP_printk("RTRS-CLT: sess='%s' state=%s attempts='%d' max-attempts='%d' fail='%d' success='%d'", + __entry->sessname, + show_rtrs_clt_state(__entry->state), + __entry->reconnect_attempts, + __entry->max_reconnect_attempts, + __entry->fail_cnt, + __entry->success_cnt + ) +); + +#define DEFINE_CLT_CONN_EVENT(name) \ +DEFINE_EVENT(rtrs_clt_conn_class, rtrs_##name, \ + TP_PROTO(struct rtrs_clt_path *clt_path), \ + TP_ARGS(clt_path)) + +DEFINE_CLT_CONN_EVENT(clt_reconnect_work); +DEFINE_CLT_CONN_EVENT(clt_close_conns); +DEFINE_CLT_CONN_EVENT(rdma_error_recovery); + +#endif /* _TRACE_RTRS_CLT_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE rtrs-clt-trace +#include <trace/define_trace.h> + diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 15c0077dd27e..8546b8816524 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -16,6 +16,7 @@ #include "rtrs-clt.h" #include "rtrs-log.h" +#include "rtrs-clt-trace.h" #define RTRS_CONNECT_TIMEOUT_MS 30000 /* @@ -46,21 +47,24 @@ static struct rtrs_rdma_dev_pd dev_pd = { static struct workqueue_struct *rtrs_wq; static struct class *rtrs_clt_dev_class; -static inline bool rtrs_clt_is_connected(const struct rtrs_clt *clt) +static inline bool rtrs_clt_is_connected(const struct rtrs_clt_sess *clt) { - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; bool connected = false; rcu_read_lock(); - list_for_each_entry_rcu(sess, &clt->paths_list, s.entry) - connected |= READ_ONCE(sess->state) == RTRS_CLT_CONNECTED; + list_for_each_entry_rcu(clt_path, &clt->paths_list, s.entry) + if (READ_ONCE(clt_path->state) == RTRS_CLT_CONNECTED) { + connected = true; + break; + } rcu_read_unlock(); return connected; } static struct rtrs_permit * -__rtrs_get_permit(struct rtrs_clt *clt, enum rtrs_clt_con_type con_type) +__rtrs_get_permit(struct rtrs_clt_sess *clt, enum rtrs_clt_con_type con_type) { size_t max_depth = clt->queue_depth; struct rtrs_permit *permit; @@ -87,7 +91,7 @@ __rtrs_get_permit(struct rtrs_clt *clt, enum rtrs_clt_con_type con_type) return permit; } -static inline void __rtrs_put_permit(struct rtrs_clt *clt, +static inline void __rtrs_put_permit(struct rtrs_clt_sess *clt, struct rtrs_permit *permit) { clear_bit_unlock(permit->mem_id, clt->permits_map); @@ -107,7 +111,7 @@ static inline void __rtrs_put_permit(struct rtrs_clt *clt, * Context: * Can sleep if @wait == RTRS_PERMIT_WAIT */ -struct rtrs_permit *rtrs_clt_get_permit(struct rtrs_clt *clt, +struct rtrs_permit *rtrs_clt_get_permit(struct rtrs_clt_sess *clt, enum rtrs_clt_con_type con_type, enum wait_type can_wait) { @@ -142,7 +146,8 @@ EXPORT_SYMBOL(rtrs_clt_get_permit); * Context: * Does not matter */ -void rtrs_clt_put_permit(struct rtrs_clt *clt, struct rtrs_permit *permit) +void rtrs_clt_put_permit(struct rtrs_clt_sess *clt, + struct rtrs_permit *permit) { if (WARN_ON(!test_bit(permit->mem_id, clt->permits_map))) return; @@ -163,29 +168,29 @@ EXPORT_SYMBOL(rtrs_clt_put_permit); /** * rtrs_permit_to_clt_con() - returns RDMA connection pointer by the permit - * @sess: client session pointer + * @clt_path: client path pointer * @permit: permit for the allocation of the RDMA buffer * Note: * IO connection starts from 1. * 0 connection is for user messages. */ static -struct rtrs_clt_con *rtrs_permit_to_clt_con(struct rtrs_clt_sess *sess, +struct rtrs_clt_con *rtrs_permit_to_clt_con(struct rtrs_clt_path *clt_path, struct rtrs_permit *permit) { int id = 0; if (permit->con_type == RTRS_IO_CON) - id = (permit->cpu_id % (sess->s.irq_con_num - 1)) + 1; + id = (permit->cpu_id % (clt_path->s.irq_con_num - 1)) + 1; - return to_clt_con(sess->s.con[id]); + return to_clt_con(clt_path->s.con[id]); } /** * rtrs_clt_change_state() - change the session state through session state * machine. * - * @sess: client session to change the state of. + * @clt_path: client path to change the state of. * @new_state: state to change to. * * returns true if sess's state is changed to new state, otherwise return false. @@ -193,15 +198,15 @@ struct rtrs_clt_con *rtrs_permit_to_clt_con(struct rtrs_clt_sess *sess, * Locks: * state_wq lock must be hold. */ -static bool rtrs_clt_change_state(struct rtrs_clt_sess *sess, +static bool rtrs_clt_change_state(struct rtrs_clt_path *clt_path, enum rtrs_clt_state new_state) { enum rtrs_clt_state old_state; bool changed = false; - lockdep_assert_held(&sess->state_wq.lock); + lockdep_assert_held(&clt_path->state_wq.lock); - old_state = sess->state; + old_state = clt_path->state; switch (new_state) { case RTRS_CLT_CONNECTING: switch (old_state) { @@ -275,51 +280,45 @@ static bool rtrs_clt_change_state(struct rtrs_clt_sess *sess, break; } if (changed) { - sess->state = new_state; - wake_up_locked(&sess->state_wq); + clt_path->state = new_state; + wake_up_locked(&clt_path->state_wq); } return changed; } -static bool rtrs_clt_change_state_from_to(struct rtrs_clt_sess *sess, +static bool rtrs_clt_change_state_from_to(struct rtrs_clt_path *clt_path, enum rtrs_clt_state old_state, enum rtrs_clt_state new_state) { bool changed = false; - spin_lock_irq(&sess->state_wq.lock); - if (sess->state == old_state) - changed = rtrs_clt_change_state(sess, new_state); - spin_unlock_irq(&sess->state_wq.lock); + spin_lock_irq(&clt_path->state_wq.lock); + if (clt_path->state == old_state) + changed = rtrs_clt_change_state(clt_path, new_state); + spin_unlock_irq(&clt_path->state_wq.lock); return changed; } +static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_path *clt_path); static void rtrs_rdma_error_recovery(struct rtrs_clt_con *con) { - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); + + trace_rtrs_rdma_error_recovery(clt_path); - if (rtrs_clt_change_state_from_to(sess, + if (rtrs_clt_change_state_from_to(clt_path, RTRS_CLT_CONNECTED, RTRS_CLT_RECONNECTING)) { - struct rtrs_clt *clt = sess->clt; - unsigned int delay_ms; - - /* - * Normal scenario, reconnect if we were successfully connected - */ - delay_ms = clt->reconnect_delay_sec * 1000; - queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, - msecs_to_jiffies(delay_ms + - prandom_u32() % RTRS_RECONNECT_SEED)); + queue_work(rtrs_wq, &clt_path->err_recovery_work); } else { /* * Error can happen just on establishing new connection, * so notify waiter with error state, waiter is responsible * for cleaning the rest and reconnect if needed. */ - rtrs_clt_change_state_from_to(sess, + rtrs_clt_change_state_from_to(clt_path, RTRS_CLT_CONNECTING, RTRS_CLT_CONNECTING_ERR); } @@ -330,7 +329,7 @@ static void rtrs_clt_fast_reg_done(struct ib_cq *cq, struct ib_wc *wc) struct rtrs_clt_con *con = to_clt_con(wc->qp->qp_context); if (wc->status != IB_WC_SUCCESS) { - rtrs_err(con->c.sess, "Failed IB_WR_REG_MR: %s\n", + rtrs_err(con->c.path, "Failed IB_WR_REG_MR: %s\n", ib_wc_status_msg(wc->status)); rtrs_rdma_error_recovery(con); } @@ -350,7 +349,7 @@ static void rtrs_clt_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc) struct rtrs_clt_con *con = to_clt_con(wc->qp->qp_context); if (wc->status != IB_WC_SUCCESS) { - rtrs_err(con->c.sess, "Failed IB_WR_LOCAL_INV: %s\n", + rtrs_err(con->c.path, "Failed IB_WR_LOCAL_INV: %s\n", ib_wc_status_msg(wc->status)); rtrs_rdma_error_recovery(con); } @@ -380,14 +379,14 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno, bool notify, bool can_wait) { struct rtrs_clt_con *con = req->con; - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; int err; if (WARN_ON(!req->in_use)) return; if (WARN_ON(!req->con)) return; - sess = to_clt_sess(con->c.sess); + clt_path = to_clt_path(con->c.path); if (req->sg_cnt) { if (req->dir == DMA_FROM_DEVICE && req->need_inv) { @@ -417,7 +416,7 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno, refcount_inc(&req->ref); err = rtrs_inv_rkey(req); if (err) { - rtrs_err(con->c.sess, "Send INV WR key=%#x: %d\n", + rtrs_err(con->c.path, "Send INV WR key=%#x: %d\n", req->mr->rkey, err); } else if (can_wait) { wait_for_completion(&req->inv_comp); @@ -433,21 +432,21 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno, if (!refcount_dec_and_test(&req->ref)) return; } - ib_dma_unmap_sg(sess->s.dev->ib_dev, req->sglist, + ib_dma_unmap_sg(clt_path->s.dev->ib_dev, req->sglist, req->sg_cnt, req->dir); } if (!refcount_dec_and_test(&req->ref)) return; if (req->mp_policy == MP_POLICY_MIN_INFLIGHT) - atomic_dec(&sess->stats->inflight); + atomic_dec(&clt_path->stats->inflight); req->in_use = false; req->con = NULL; if (errno) { - rtrs_err_rl(con->c.sess, "IO request failed: error=%d path=%s [%s:%u] notify=%d\n", - errno, kobject_name(&sess->kobj), sess->hca_name, - sess->hca_port, notify); + rtrs_err_rl(con->c.path, "IO request failed: error=%d path=%s [%s:%u] notify=%d\n", + errno, kobject_name(&clt_path->kobj), clt_path->hca_name, + clt_path->hca_port, notify); } if (notify) @@ -459,12 +458,12 @@ static int rtrs_post_send_rdma(struct rtrs_clt_con *con, struct rtrs_rbuf *rbuf, u32 off, u32 imm, struct ib_send_wr *wr) { - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); enum ib_send_flags flags; struct ib_sge sge; if (!req->sg_size) { - rtrs_wrn(con->c.sess, + rtrs_wrn(con->c.path, "Doing RDMA Write failed, no data supplied\n"); return -EINVAL; } @@ -472,16 +471,17 @@ static int rtrs_post_send_rdma(struct rtrs_clt_con *con, /* user data and user message in the first list element */ sge.addr = req->iu->dma_addr; sge.length = req->sg_size; - sge.lkey = sess->s.dev->ib_pd->local_dma_lkey; + sge.lkey = clt_path->s.dev->ib_pd->local_dma_lkey; /* * From time to time we have to post signalled sends, * or send queue will fill up and only QP reset can help. */ - flags = atomic_inc_return(&con->c.wr_cnt) % sess->s.signal_interval ? + flags = atomic_inc_return(&con->c.wr_cnt) % clt_path->s.signal_interval ? 0 : IB_SEND_SIGNALED; - ib_dma_sync_single_for_device(sess->s.dev->ib_dev, req->iu->dma_addr, + ib_dma_sync_single_for_device(clt_path->s.dev->ib_dev, + req->iu->dma_addr, req->sg_size, DMA_TO_DEVICE); return rtrs_iu_post_rdma_write_imm(&con->c, req->iu, &sge, 1, @@ -489,15 +489,15 @@ static int rtrs_post_send_rdma(struct rtrs_clt_con *con, imm, flags, wr, NULL); } -static void process_io_rsp(struct rtrs_clt_sess *sess, u32 msg_id, +static void process_io_rsp(struct rtrs_clt_path *clt_path, u32 msg_id, s16 errno, bool w_inval) { struct rtrs_clt_io_req *req; - if (WARN_ON(msg_id >= sess->queue_depth)) + if (WARN_ON(msg_id >= clt_path->queue_depth)) return; - req = &sess->reqs[msg_id]; + req = &clt_path->reqs[msg_id]; /* Drop need_inv if server responded with send with invalidation */ req->need_inv &= !w_inval; complete_rdma_req(req, errno, true, false); @@ -507,21 +507,21 @@ static void rtrs_clt_recv_done(struct rtrs_clt_con *con, struct ib_wc *wc) { struct rtrs_iu *iu; int err; - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); - WARN_ON((sess->flags & RTRS_MSG_NEW_RKEY_F) == 0); + WARN_ON((clt_path->flags & RTRS_MSG_NEW_RKEY_F) == 0); iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); err = rtrs_iu_post_recv(&con->c, iu); if (err) { - rtrs_err(con->c.sess, "post iu failed %d\n", err); + rtrs_err(con->c.path, "post iu failed %d\n", err); rtrs_rdma_error_recovery(con); } } static void rtrs_clt_rkey_rsp_done(struct rtrs_clt_con *con, struct ib_wc *wc) { - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); struct rtrs_msg_rkey_rsp *msg; u32 imm_type, imm_payload; bool w_inval = false; @@ -529,25 +529,26 @@ static void rtrs_clt_rkey_rsp_done(struct rtrs_clt_con *con, struct ib_wc *wc) u32 buf_id; int err; - WARN_ON((sess->flags & RTRS_MSG_NEW_RKEY_F) == 0); + WARN_ON((clt_path->flags & RTRS_MSG_NEW_RKEY_F) == 0); iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); if (wc->byte_len < sizeof(*msg)) { - rtrs_err(con->c.sess, "rkey response is malformed: size %d\n", + rtrs_err(con->c.path, "rkey response is malformed: size %d\n", wc->byte_len); goto out; } - ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, iu->dma_addr, + ib_dma_sync_single_for_cpu(clt_path->s.dev->ib_dev, iu->dma_addr, iu->size, DMA_FROM_DEVICE); msg = iu->buf; if (le16_to_cpu(msg->type) != RTRS_MSG_RKEY_RSP) { - rtrs_err(sess->clt, "rkey response is malformed: type %d\n", + rtrs_err(clt_path->clt, + "rkey response is malformed: type %d\n", le16_to_cpu(msg->type)); goto out; } buf_id = le16_to_cpu(msg->buf_id); - if (WARN_ON(buf_id >= sess->queue_depth)) + if (WARN_ON(buf_id >= clt_path->queue_depth)) goto out; rtrs_from_imm(be32_to_cpu(wc->ex.imm_data), &imm_type, &imm_payload); @@ -560,10 +561,10 @@ static void rtrs_clt_rkey_rsp_done(struct rtrs_clt_con *con, struct ib_wc *wc) if (WARN_ON(buf_id != msg_id)) goto out; - sess->rbufs[buf_id].rkey = le32_to_cpu(msg->rkey); - process_io_rsp(sess, msg_id, err, w_inval); + clt_path->rbufs[buf_id].rkey = le32_to_cpu(msg->rkey); + process_io_rsp(clt_path, msg_id, err, w_inval); } - ib_dma_sync_single_for_device(sess->s.dev->ib_dev, iu->dma_addr, + ib_dma_sync_single_for_device(clt_path->s.dev->ib_dev, iu->dma_addr, iu->size, DMA_FROM_DEVICE); return rtrs_clt_recv_done(con, wc); out: @@ -600,14 +601,14 @@ static int rtrs_post_recv_empty_x2(struct rtrs_con *con, struct ib_cqe *cqe) static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) { struct rtrs_clt_con *con = to_clt_con(wc->qp->qp_context); - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); u32 imm_type, imm_payload; bool w_inval = false; int err; if (wc->status != IB_WC_SUCCESS) { if (wc->status != IB_WC_WR_FLUSH_ERR) { - rtrs_err(sess->clt, "RDMA failed: %s\n", + rtrs_err(clt_path->clt, "RDMA failed: %s\n", ib_wc_status_msg(wc->status)); rtrs_rdma_error_recovery(con); } @@ -632,21 +633,21 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) w_inval = (imm_type == RTRS_IO_RSP_W_INV_IMM); rtrs_from_io_rsp_imm(imm_payload, &msg_id, &err); - process_io_rsp(sess, msg_id, err, w_inval); + process_io_rsp(clt_path, msg_id, err, w_inval); } else if (imm_type == RTRS_HB_MSG_IMM) { WARN_ON(con->c.cid); - rtrs_send_hb_ack(&sess->s); - if (sess->flags & RTRS_MSG_NEW_RKEY_F) + rtrs_send_hb_ack(&clt_path->s); + if (clt_path->flags & RTRS_MSG_NEW_RKEY_F) return rtrs_clt_recv_done(con, wc); } else if (imm_type == RTRS_HB_ACK_IMM) { WARN_ON(con->c.cid); - sess->s.hb_missed_cnt = 0; - sess->s.hb_cur_latency = - ktime_sub(ktime_get(), sess->s.hb_last_sent); - if (sess->flags & RTRS_MSG_NEW_RKEY_F) + clt_path->s.hb_missed_cnt = 0; + clt_path->s.hb_cur_latency = + ktime_sub(ktime_get(), clt_path->s.hb_last_sent); + if (clt_path->flags & RTRS_MSG_NEW_RKEY_F) return rtrs_clt_recv_done(con, wc); } else { - rtrs_wrn(con->c.sess, "Unknown IMM type %u\n", + rtrs_wrn(con->c.path, "Unknown IMM type %u\n", imm_type); } if (w_inval) @@ -658,7 +659,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) else err = rtrs_post_recv_empty(&con->c, &io_comp_cqe); if (err) { - rtrs_err(con->c.sess, "rtrs_post_recv_empty(): %d\n", + rtrs_err(con->c.path, "rtrs_post_recv_empty(): %d\n", err); rtrs_rdma_error_recovery(con); } @@ -670,7 +671,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) WARN_ON(!(wc->wc_flags & IB_WC_WITH_INVALIDATE || wc->wc_flags & IB_WC_WITH_IMM)); WARN_ON(wc->wr_cqe->done != rtrs_clt_rdma_done); - if (sess->flags & RTRS_MSG_NEW_RKEY_F) { + if (clt_path->flags & RTRS_MSG_NEW_RKEY_F) { if (wc->wc_flags & IB_WC_WITH_INVALIDATE) return rtrs_clt_recv_done(con, wc); @@ -685,7 +686,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) break; default: - rtrs_wrn(sess->clt, "Unexpected WC type: %d\n", wc->opcode); + rtrs_wrn(clt_path->clt, "Unexpected WC type: %d\n", wc->opcode); return; } } @@ -693,10 +694,10 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) static int post_recv_io(struct rtrs_clt_con *con, size_t q_size) { int err, i; - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); for (i = 0; i < q_size; i++) { - if (sess->flags & RTRS_MSG_NEW_RKEY_F) { + if (clt_path->flags & RTRS_MSG_NEW_RKEY_F) { struct rtrs_iu *iu = &con->rsp_ius[i]; err = rtrs_iu_post_recv(&con->c, iu); @@ -710,16 +711,16 @@ static int post_recv_io(struct rtrs_clt_con *con, size_t q_size) return 0; } -static int post_recv_sess(struct rtrs_clt_sess *sess) +static int post_recv_path(struct rtrs_clt_path *clt_path) { size_t q_size = 0; int err, cid; - for (cid = 0; cid < sess->s.con_num; cid++) { + for (cid = 0; cid < clt_path->s.con_num; cid++) { if (cid == 0) q_size = SERVICE_CON_QUEUE_DEPTH; else - q_size = sess->queue_depth; + q_size = clt_path->queue_depth; /* * x2 for RDMA read responses + FR key invalidations, @@ -727,9 +728,10 @@ static int post_recv_sess(struct rtrs_clt_sess *sess) */ q_size *= 2; - err = post_recv_io(to_clt_con(sess->s.con[cid]), q_size); + err = post_recv_io(to_clt_con(clt_path->s.con[cid]), q_size); if (err) { - rtrs_err(sess->clt, "post_recv_io(), err: %d\n", err); + rtrs_err(clt_path->clt, "post_recv_io(), err: %d\n", + err); return err; } } @@ -740,29 +742,29 @@ static int post_recv_sess(struct rtrs_clt_sess *sess) struct path_it { int i; struct list_head skip_list; - struct rtrs_clt *clt; - struct rtrs_clt_sess *(*next_path)(struct path_it *it); + struct rtrs_clt_sess *clt; + struct rtrs_clt_path *(*next_path)(struct path_it *it); }; -/** - * list_next_or_null_rr_rcu - get next list element in round-robin fashion. +/* + * rtrs_clt_get_next_path_or_null - get clt path from the list or return NULL * @head: the head for the list. - * @ptr: the list head to take the next element from. - * @type: the type of the struct this is embedded in. - * @memb: the name of the list_head within the struct. + * @clt_path: The element to take the next clt_path from. * - * Next element returned in round-robin fashion, i.e. head will be skipped, + * Next clt path returned in round-robin fashion, i.e. head will be skipped, * but if list is observed as empty, NULL will be returned. * - * This primitive may safely run concurrently with the _rcu list-mutation + * This function may safely run concurrently with the _rcu list-mutation * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). */ -#define list_next_or_null_rr_rcu(head, ptr, type, memb) \ -({ \ - list_next_or_null_rcu(head, ptr, type, memb) ?: \ - list_next_or_null_rcu(head, READ_ONCE((ptr)->next), \ - type, memb); \ -}) +static inline struct rtrs_clt_path * +rtrs_clt_get_next_path_or_null(struct list_head *head, struct rtrs_clt_path *clt_path) +{ + return list_next_or_null_rcu(head, &clt_path->s.entry, typeof(*clt_path), s.entry) ?: + list_next_or_null_rcu(head, + READ_ONCE((&clt_path->s.entry)->next), + typeof(*clt_path), s.entry); +} /** * get_next_path_rr() - Returns path in round-robin fashion. @@ -773,11 +775,11 @@ struct path_it { * Locks: * rcu_read_lock() must be hold. */ -static struct rtrs_clt_sess *get_next_path_rr(struct path_it *it) +static struct rtrs_clt_path *get_next_path_rr(struct path_it *it) { - struct rtrs_clt_sess __rcu **ppcpu_path; - struct rtrs_clt_sess *path; - struct rtrs_clt *clt; + struct rtrs_clt_path __rcu **ppcpu_path; + struct rtrs_clt_path *path; + struct rtrs_clt_sess *clt; clt = it->clt; @@ -793,10 +795,8 @@ static struct rtrs_clt_sess *get_next_path_rr(struct path_it *it) path = list_first_or_null_rcu(&clt->paths_list, typeof(*path), s.entry); else - path = list_next_or_null_rr_rcu(&clt->paths_list, - &path->s.entry, - typeof(*path), - s.entry); + path = rtrs_clt_get_next_path_or_null(&clt->paths_list, path); + rcu_assign_pointer(*ppcpu_path, path); return path; @@ -811,26 +811,26 @@ static struct rtrs_clt_sess *get_next_path_rr(struct path_it *it) * Locks: * rcu_read_lock() must be hold. */ -static struct rtrs_clt_sess *get_next_path_min_inflight(struct path_it *it) +static struct rtrs_clt_path *get_next_path_min_inflight(struct path_it *it) { - struct rtrs_clt_sess *min_path = NULL; - struct rtrs_clt *clt = it->clt; - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *min_path = NULL; + struct rtrs_clt_sess *clt = it->clt; + struct rtrs_clt_path *clt_path; int min_inflight = INT_MAX; int inflight; - list_for_each_entry_rcu(sess, &clt->paths_list, s.entry) { - if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED) + list_for_each_entry_rcu(clt_path, &clt->paths_list, s.entry) { + if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTED) continue; - if (!list_empty(raw_cpu_ptr(sess->mp_skip_entry))) + if (!list_empty(raw_cpu_ptr(clt_path->mp_skip_entry))) continue; - inflight = atomic_read(&sess->stats->inflight); + inflight = atomic_read(&clt_path->stats->inflight); if (inflight < min_inflight) { min_inflight = inflight; - min_path = sess; + min_path = clt_path; } } @@ -862,26 +862,26 @@ static struct rtrs_clt_sess *get_next_path_min_inflight(struct path_it *it) * Therefore the caller MUST check the returned * path is NULL and trigger the IO error. */ -static struct rtrs_clt_sess *get_next_path_min_latency(struct path_it *it) +static struct rtrs_clt_path *get_next_path_min_latency(struct path_it *it) { - struct rtrs_clt_sess *min_path = NULL; - struct rtrs_clt *clt = it->clt; - struct rtrs_clt_sess *sess; - ktime_t min_latency = INT_MAX; + struct rtrs_clt_path *min_path = NULL; + struct rtrs_clt_sess *clt = it->clt; + struct rtrs_clt_path *clt_path; + ktime_t min_latency = KTIME_MAX; ktime_t latency; - list_for_each_entry_rcu(sess, &clt->paths_list, s.entry) { - if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED) + list_for_each_entry_rcu(clt_path, &clt->paths_list, s.entry) { + if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTED) continue; - if (!list_empty(raw_cpu_ptr(sess->mp_skip_entry))) + if (!list_empty(raw_cpu_ptr(clt_path->mp_skip_entry))) continue; - latency = sess->s.hb_cur_latency; + latency = clt_path->s.hb_cur_latency; if (latency < min_latency) { min_latency = latency; - min_path = sess; + min_path = clt_path; } } @@ -895,7 +895,7 @@ static struct rtrs_clt_sess *get_next_path_min_latency(struct path_it *it) return min_path; } -static inline void path_it_init(struct path_it *it, struct rtrs_clt *clt) +static inline void path_it_init(struct path_it *it, struct rtrs_clt_sess *clt) { INIT_LIST_HEAD(&it->skip_list); it->clt = clt; @@ -913,7 +913,7 @@ static inline void path_it_deinit(struct path_it *it) { struct list_head *skip, *tmp; /* - * The skip_list is used only for the MIN_INFLIGHT policy. + * The skip_list is used only for the MIN_INFLIGHT and MIN_LATENCY policies. * We need to remove paths from it, so that next IO can insert * paths (->mp_skip_entry) into a skip_list again. */ @@ -928,7 +928,7 @@ static inline void path_it_deinit(struct path_it *it) * the corresponding buffer of rtrs_iu (req->iu->buf), which later on will * also hold the control message of rtrs. * @req: an io request holding information about IO. - * @sess: client session + * @clt_path: client path * @conf: conformation callback function to notify upper layer. * @permit: permit for allocation of RDMA remote buffer * @priv: private pointer @@ -940,7 +940,7 @@ static inline void path_it_deinit(struct path_it *it) * @dir: direction of the IO. */ static void rtrs_clt_init_req(struct rtrs_clt_io_req *req, - struct rtrs_clt_sess *sess, + struct rtrs_clt_path *clt_path, void (*conf)(void *priv, int errno), struct rtrs_permit *permit, void *priv, const struct kvec *vec, size_t usr_len, @@ -958,13 +958,13 @@ static void rtrs_clt_init_req(struct rtrs_clt_io_req *req, req->sg_cnt = sg_cnt; req->priv = priv; req->dir = dir; - req->con = rtrs_permit_to_clt_con(sess, permit); + req->con = rtrs_permit_to_clt_con(clt_path, permit); req->conf = conf; req->need_inv = false; req->need_inv_comp = false; req->inv_errno = 0; refcount_set(&req->ref, 1); - req->mp_policy = sess->clt->mp_policy; + req->mp_policy = clt_path->clt->mp_policy; iov_iter_kvec(&iter, READ, vec, 1, usr_len); len = _copy_from_iter(req->iu->buf, usr_len, &iter); @@ -974,7 +974,7 @@ static void rtrs_clt_init_req(struct rtrs_clt_io_req *req, } static struct rtrs_clt_io_req * -rtrs_clt_get_req(struct rtrs_clt_sess *sess, +rtrs_clt_get_req(struct rtrs_clt_path *clt_path, void (*conf)(void *priv, int errno), struct rtrs_permit *permit, void *priv, const struct kvec *vec, size_t usr_len, @@ -983,14 +983,14 @@ rtrs_clt_get_req(struct rtrs_clt_sess *sess, { struct rtrs_clt_io_req *req; - req = &sess->reqs[permit->mem_id]; - rtrs_clt_init_req(req, sess, conf, permit, priv, vec, usr_len, + req = &clt_path->reqs[permit->mem_id]; + rtrs_clt_init_req(req, clt_path, conf, permit, priv, vec, usr_len, sg, sg_cnt, data_len, dir); return req; } static struct rtrs_clt_io_req * -rtrs_clt_get_copy_req(struct rtrs_clt_sess *alive_sess, +rtrs_clt_get_copy_req(struct rtrs_clt_path *alive_path, struct rtrs_clt_io_req *fail_req) { struct rtrs_clt_io_req *req; @@ -999,8 +999,8 @@ rtrs_clt_get_copy_req(struct rtrs_clt_sess *alive_sess, .iov_len = fail_req->usr_len }; - req = &alive_sess->reqs[fail_req->permit->mem_id]; - rtrs_clt_init_req(req, alive_sess, fail_req->conf, fail_req->permit, + req = &alive_path->reqs[fail_req->permit->mem_id]; + rtrs_clt_init_req(req, alive_path, fail_req->conf, fail_req->permit, fail_req->priv, &vec, fail_req->usr_len, fail_req->sglist, fail_req->sg_cnt, fail_req->data_len, fail_req->dir); @@ -1010,10 +1010,11 @@ rtrs_clt_get_copy_req(struct rtrs_clt_sess *alive_sess, static int rtrs_post_rdma_write_sg(struct rtrs_clt_con *con, struct rtrs_clt_io_req *req, struct rtrs_rbuf *rbuf, bool fr_en, - u32 size, u32 imm, struct ib_send_wr *wr, + u32 count, u32 size, u32 imm, + struct ib_send_wr *wr, struct ib_send_wr *tail) { - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); struct ib_sge *sge = req->sge; enum ib_send_flags flags; struct scatterlist *sg; @@ -1030,25 +1031,26 @@ static int rtrs_post_rdma_write_sg(struct rtrs_clt_con *con, num_sge = 2; ptail = tail; } else { - for_each_sg(req->sglist, sg, req->sg_cnt, i) { + for_each_sg(req->sglist, sg, count, i) { sge[i].addr = sg_dma_address(sg); sge[i].length = sg_dma_len(sg); - sge[i].lkey = sess->s.dev->ib_pd->local_dma_lkey; + sge[i].lkey = clt_path->s.dev->ib_pd->local_dma_lkey; } - num_sge = 1 + req->sg_cnt; + num_sge = 1 + count; } sge[i].addr = req->iu->dma_addr; sge[i].length = size; - sge[i].lkey = sess->s.dev->ib_pd->local_dma_lkey; + sge[i].lkey = clt_path->s.dev->ib_pd->local_dma_lkey; /* * From time to time we have to post signalled sends, * or send queue will fill up and only QP reset can help. */ - flags = atomic_inc_return(&con->c.wr_cnt) % sess->s.signal_interval ? + flags = atomic_inc_return(&con->c.wr_cnt) % clt_path->s.signal_interval ? 0 : IB_SEND_SIGNALED; - ib_dma_sync_single_for_device(sess->s.dev->ib_dev, req->iu->dma_addr, + ib_dma_sync_single_for_device(clt_path->s.dev->ib_dev, + req->iu->dma_addr, size, DMA_TO_DEVICE); return rtrs_iu_post_rdma_write_imm(&con->c, req->iu, sge, num_sge, @@ -1074,8 +1076,8 @@ static int rtrs_map_sg_fr(struct rtrs_clt_io_req *req, size_t count) static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) { struct rtrs_clt_con *con = req->con; - struct rtrs_sess *s = con->c.sess; - struct rtrs_clt_sess *sess = to_clt_sess(s); + struct rtrs_path *s = con->c.path; + struct rtrs_clt_path *clt_path = to_clt_path(s); struct rtrs_msg_rdma_write *msg; struct rtrs_rbuf *rbuf; @@ -1088,13 +1090,13 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) const size_t tsize = sizeof(*msg) + req->data_len + req->usr_len; - if (tsize > sess->chunk_size) { + if (tsize > clt_path->chunk_size) { rtrs_wrn(s, "Write request failed, size too big %zu > %d\n", - tsize, sess->chunk_size); + tsize, clt_path->chunk_size); return -EMSGSIZE; } if (req->sg_cnt) { - count = ib_dma_map_sg(sess->s.dev->ib_dev, req->sglist, + count = ib_dma_map_sg(clt_path->s.dev->ib_dev, req->sglist, req->sg_cnt, req->dir); if (!count) { rtrs_wrn(s, "Write request failed, map failed\n"); @@ -1111,7 +1113,7 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) imm = rtrs_to_io_req_imm(imm); buf_id = req->permit->mem_id; req->sg_size = tsize; - rbuf = &sess->rbufs[buf_id]; + rbuf = &clt_path->rbufs[buf_id]; if (count) { ret = rtrs_map_sg_fr(req, count); @@ -1119,7 +1121,7 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) rtrs_err_rl(s, "Write request failed, failed to map fast reg. data, err: %d\n", ret); - ib_dma_unmap_sg(sess->s.dev->ib_dev, req->sglist, + ib_dma_unmap_sg(clt_path->s.dev->ib_dev, req->sglist, req->sg_cnt, req->dir); return ret; } @@ -1147,18 +1149,18 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) */ rtrs_clt_update_all_stats(req, WRITE); - ret = rtrs_post_rdma_write_sg(req->con, req, rbuf, fr_en, + ret = rtrs_post_rdma_write_sg(req->con, req, rbuf, fr_en, count, req->usr_len + sizeof(*msg), imm, wr, &inv_wr); if (ret) { rtrs_err_rl(s, "Write request failed: error=%d path=%s [%s:%u]\n", - ret, kobject_name(&sess->kobj), sess->hca_name, - sess->hca_port); + ret, kobject_name(&clt_path->kobj), clt_path->hca_name, + clt_path->hca_port); if (req->mp_policy == MP_POLICY_MIN_INFLIGHT) - atomic_dec(&sess->stats->inflight); + atomic_dec(&clt_path->stats->inflight); if (req->sg_cnt) - ib_dma_unmap_sg(sess->s.dev->ib_dev, req->sglist, + ib_dma_unmap_sg(clt_path->s.dev->ib_dev, req->sglist, req->sg_cnt, req->dir); } @@ -1168,10 +1170,10 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req) static int rtrs_clt_read_req(struct rtrs_clt_io_req *req) { struct rtrs_clt_con *con = req->con; - struct rtrs_sess *s = con->c.sess; - struct rtrs_clt_sess *sess = to_clt_sess(s); + struct rtrs_path *s = con->c.path; + struct rtrs_clt_path *clt_path = to_clt_path(s); struct rtrs_msg_rdma_read *msg; - struct rtrs_ib_dev *dev = sess->s.dev; + struct rtrs_ib_dev *dev = clt_path->s.dev; struct ib_reg_wr rwr; struct ib_send_wr *wr = NULL; @@ -1181,10 +1183,10 @@ static int rtrs_clt_read_req(struct rtrs_clt_io_req *req) const size_t tsize = sizeof(*msg) + req->data_len + req->usr_len; - if (tsize > sess->chunk_size) { + if (tsize > clt_path->chunk_size) { rtrs_wrn(s, "Read request failed, message size is %zu, bigger than CHUNK_SIZE %d\n", - tsize, sess->chunk_size); + tsize, clt_path->chunk_size); return -EMSGSIZE; } @@ -1254,15 +1256,15 @@ static int rtrs_clt_read_req(struct rtrs_clt_io_req *req) */ rtrs_clt_update_all_stats(req, READ); - ret = rtrs_post_send_rdma(req->con, req, &sess->rbufs[buf_id], + ret = rtrs_post_send_rdma(req->con, req, &clt_path->rbufs[buf_id], req->data_len, imm, wr); if (ret) { rtrs_err_rl(s, "Read request failed: error=%d path=%s [%s:%u]\n", - ret, kobject_name(&sess->kobj), sess->hca_name, - sess->hca_port); + ret, kobject_name(&clt_path->kobj), clt_path->hca_name, + clt_path->hca_port); if (req->mp_policy == MP_POLICY_MIN_INFLIGHT) - atomic_dec(&sess->stats->inflight); + atomic_dec(&clt_path->stats->inflight); req->need_inv = false; if (req->sg_cnt) ib_dma_unmap_sg(dev->ib_dev, req->sglist, @@ -1277,21 +1279,21 @@ static int rtrs_clt_read_req(struct rtrs_clt_io_req *req) * @clt: clt context * @fail_req: a failed io request. */ -static int rtrs_clt_failover_req(struct rtrs_clt *clt, +static int rtrs_clt_failover_req(struct rtrs_clt_sess *clt, struct rtrs_clt_io_req *fail_req) { - struct rtrs_clt_sess *alive_sess; + struct rtrs_clt_path *alive_path; struct rtrs_clt_io_req *req; int err = -ECONNABORTED; struct path_it it; rcu_read_lock(); for (path_it_init(&it, clt); - (alive_sess = it.next_path(&it)) && it.i < it.clt->paths_num; + (alive_path = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) { - if (READ_ONCE(alive_sess->state) != RTRS_CLT_CONNECTED) + if (READ_ONCE(alive_path->state) != RTRS_CLT_CONNECTED) continue; - req = rtrs_clt_get_copy_req(alive_sess, fail_req); + req = rtrs_clt_get_copy_req(alive_path, fail_req); if (req->dir == DMA_TO_DEVICE) err = rtrs_clt_write_req(req); else @@ -1301,7 +1303,7 @@ static int rtrs_clt_failover_req(struct rtrs_clt *clt, continue; } /* Success path */ - rtrs_clt_inc_failover_cnt(alive_sess->stats); + rtrs_clt_inc_failover_cnt(alive_path->stats); break; } path_it_deinit(&it); @@ -1310,16 +1312,16 @@ static int rtrs_clt_failover_req(struct rtrs_clt *clt, return err; } -static void fail_all_outstanding_reqs(struct rtrs_clt_sess *sess) +static void fail_all_outstanding_reqs(struct rtrs_clt_path *clt_path) { - struct rtrs_clt *clt = sess->clt; + struct rtrs_clt_sess *clt = clt_path->clt; struct rtrs_clt_io_req *req; int i, err; - if (!sess->reqs) + if (!clt_path->reqs) return; - for (i = 0; i < sess->queue_depth; ++i) { - req = &sess->reqs[i]; + for (i = 0; i < clt_path->queue_depth; ++i) { + req = &clt_path->reqs[i]; if (!req->in_use) continue; @@ -1337,38 +1339,39 @@ static void fail_all_outstanding_reqs(struct rtrs_clt_sess *sess) } } -static void free_sess_reqs(struct rtrs_clt_sess *sess) +static void free_path_reqs(struct rtrs_clt_path *clt_path) { struct rtrs_clt_io_req *req; int i; - if (!sess->reqs) + if (!clt_path->reqs) return; - for (i = 0; i < sess->queue_depth; ++i) { - req = &sess->reqs[i]; + for (i = 0; i < clt_path->queue_depth; ++i) { + req = &clt_path->reqs[i]; if (req->mr) ib_dereg_mr(req->mr); kfree(req->sge); - rtrs_iu_free(req->iu, sess->s.dev->ib_dev, 1); + rtrs_iu_free(req->iu, clt_path->s.dev->ib_dev, 1); } - kfree(sess->reqs); - sess->reqs = NULL; + kfree(clt_path->reqs); + clt_path->reqs = NULL; } -static int alloc_sess_reqs(struct rtrs_clt_sess *sess) +static int alloc_path_reqs(struct rtrs_clt_path *clt_path) { struct rtrs_clt_io_req *req; int i, err = -ENOMEM; - sess->reqs = kcalloc(sess->queue_depth, sizeof(*sess->reqs), - GFP_KERNEL); - if (!sess->reqs) + clt_path->reqs = kcalloc(clt_path->queue_depth, + sizeof(*clt_path->reqs), + GFP_KERNEL); + if (!clt_path->reqs) return -ENOMEM; - for (i = 0; i < sess->queue_depth; ++i) { - req = &sess->reqs[i]; - req->iu = rtrs_iu_alloc(1, sess->max_hdr_size, GFP_KERNEL, - sess->s.dev->ib_dev, + for (i = 0; i < clt_path->queue_depth; ++i) { + req = &clt_path->reqs[i]; + req->iu = rtrs_iu_alloc(1, clt_path->max_hdr_size, GFP_KERNEL, + clt_path->s.dev->ib_dev, DMA_TO_DEVICE, rtrs_clt_rdma_done); if (!req->iu) @@ -1378,13 +1381,14 @@ static int alloc_sess_reqs(struct rtrs_clt_sess *sess) if (!req->sge) goto out; - req->mr = ib_alloc_mr(sess->s.dev->ib_pd, IB_MR_TYPE_MEM_REG, - sess->max_pages_per_mr); + req->mr = ib_alloc_mr(clt_path->s.dev->ib_pd, + IB_MR_TYPE_MEM_REG, + clt_path->max_pages_per_mr); if (IS_ERR(req->mr)) { err = PTR_ERR(req->mr); req->mr = NULL; - pr_err("Failed to alloc sess->max_pages_per_mr %d\n", - sess->max_pages_per_mr); + pr_err("Failed to alloc clt_path->max_pages_per_mr %d\n", + clt_path->max_pages_per_mr); goto out; } @@ -1394,18 +1398,17 @@ static int alloc_sess_reqs(struct rtrs_clt_sess *sess) return 0; out: - free_sess_reqs(sess); + free_path_reqs(clt_path); return err; } -static int alloc_permits(struct rtrs_clt *clt) +static int alloc_permits(struct rtrs_clt_sess *clt) { unsigned int chunk_bits; int err, i; - clt->permits_map = kcalloc(BITS_TO_LONGS(clt->queue_depth), - sizeof(long), GFP_KERNEL); + clt->permits_map = bitmap_zalloc(clt->queue_depth, GFP_KERNEL); if (!clt->permits_map) { err = -ENOMEM; goto out_err; @@ -1427,33 +1430,31 @@ static int alloc_permits(struct rtrs_clt *clt) return 0; err_map: - kfree(clt->permits_map); + bitmap_free(clt->permits_map); clt->permits_map = NULL; out_err: return err; } -static void free_permits(struct rtrs_clt *clt) +static void free_permits(struct rtrs_clt_sess *clt) { - if (clt->permits_map) { - size_t sz = clt->queue_depth; - + if (clt->permits_map) wait_event(clt->permits_wait, - find_first_bit(clt->permits_map, sz) >= sz); - } - kfree(clt->permits_map); + bitmap_empty(clt->permits_map, clt->queue_depth)); + + bitmap_free(clt->permits_map); clt->permits_map = NULL; kfree(clt->permits); clt->permits = NULL; } -static void query_fast_reg_mode(struct rtrs_clt_sess *sess) +static void query_fast_reg_mode(struct rtrs_clt_path *clt_path) { struct ib_device *ib_dev; u64 max_pages_per_mr; int mr_page_shift; - ib_dev = sess->s.dev->ib_dev; + ib_dev = clt_path->s.dev->ib_dev; /* * Use the smallest page size supported by the HCA, down to a @@ -1463,24 +1464,24 @@ static void query_fast_reg_mode(struct rtrs_clt_sess *sess) mr_page_shift = max(12, ffs(ib_dev->attrs.page_size_cap) - 1); max_pages_per_mr = ib_dev->attrs.max_mr_size; do_div(max_pages_per_mr, (1ull << mr_page_shift)); - sess->max_pages_per_mr = - min3(sess->max_pages_per_mr, (u32)max_pages_per_mr, + clt_path->max_pages_per_mr = + min3(clt_path->max_pages_per_mr, (u32)max_pages_per_mr, ib_dev->attrs.max_fast_reg_page_list_len); - sess->clt->max_segments = - min(sess->max_pages_per_mr, sess->clt->max_segments); + clt_path->clt->max_segments = + min(clt_path->max_pages_per_mr, clt_path->clt->max_segments); } -static bool rtrs_clt_change_state_get_old(struct rtrs_clt_sess *sess, +static bool rtrs_clt_change_state_get_old(struct rtrs_clt_path *clt_path, enum rtrs_clt_state new_state, enum rtrs_clt_state *old_state) { bool changed; - spin_lock_irq(&sess->state_wq.lock); + spin_lock_irq(&clt_path->state_wq.lock); if (old_state) - *old_state = sess->state; - changed = rtrs_clt_change_state(sess, new_state); - spin_unlock_irq(&sess->state_wq.lock); + *old_state = clt_path->state; + changed = rtrs_clt_change_state(clt_path, new_state); + spin_unlock_irq(&clt_path->state_wq.lock); return changed; } @@ -1492,9 +1493,9 @@ static void rtrs_clt_hb_err_handler(struct rtrs_con *c) rtrs_rdma_error_recovery(con); } -static void rtrs_clt_init_hb(struct rtrs_clt_sess *sess) +static void rtrs_clt_init_hb(struct rtrs_clt_path *clt_path) { - rtrs_init_hb(&sess->s, &io_comp_cqe, + rtrs_init_hb(&clt_path->s, &io_comp_cqe, RTRS_HB_INTERVAL_MS, RTRS_HB_MISSED_MAX, rtrs_clt_hb_err_handler, @@ -1504,17 +1505,32 @@ static void rtrs_clt_init_hb(struct rtrs_clt_sess *sess) static void rtrs_clt_reconnect_work(struct work_struct *work); static void rtrs_clt_close_work(struct work_struct *work); -static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt, +static void rtrs_clt_err_recovery_work(struct work_struct *work) +{ + struct rtrs_clt_path *clt_path; + struct rtrs_clt_sess *clt; + int delay_ms; + + clt_path = container_of(work, struct rtrs_clt_path, err_recovery_work); + clt = clt_path->clt; + delay_ms = clt->reconnect_delay_sec * 1000; + rtrs_clt_stop_and_destroy_conns(clt_path); + queue_delayed_work(rtrs_wq, &clt_path->reconnect_dwork, + msecs_to_jiffies(delay_ms + + prandom_u32_max(RTRS_RECONNECT_SEED))); +} + +static struct rtrs_clt_path *alloc_path(struct rtrs_clt_sess *clt, const struct rtrs_addr *path, size_t con_num, u32 nr_poll_queues) { - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; int err = -ENOMEM; int cpu; size_t total_con; - sess = kzalloc(sizeof(*sess), GFP_KERNEL); - if (!sess) + clt_path = kzalloc(sizeof(*clt_path), GFP_KERNEL); + if (!clt_path) goto err; /* @@ -1522,20 +1538,21 @@ static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt, * +1: Extra connection for user messages */ total_con = con_num + nr_poll_queues + 1; - sess->s.con = kcalloc(total_con, sizeof(*sess->s.con), GFP_KERNEL); - if (!sess->s.con) - goto err_free_sess; + clt_path->s.con = kcalloc(total_con, sizeof(*clt_path->s.con), + GFP_KERNEL); + if (!clt_path->s.con) + goto err_free_path; - sess->s.con_num = total_con; - sess->s.irq_con_num = con_num + 1; + clt_path->s.con_num = total_con; + clt_path->s.irq_con_num = con_num + 1; - sess->stats = kzalloc(sizeof(*sess->stats), GFP_KERNEL); - if (!sess->stats) + clt_path->stats = kzalloc(sizeof(*clt_path->stats), GFP_KERNEL); + if (!clt_path->stats) goto err_free_con; - mutex_init(&sess->init_mutex); - uuid_gen(&sess->s.uuid); - memcpy(&sess->s.dst_addr, path->dst, + mutex_init(&clt_path->init_mutex); + uuid_gen(&clt_path->s.uuid); + memcpy(&clt_path->s.dst_addr, path->dst, rdma_addr_size((struct sockaddr *)path->dst)); /* @@ -1544,53 +1561,55 @@ static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt, * the sess->src_addr will contain only zeros, which is then fine. */ if (path->src) - memcpy(&sess->s.src_addr, path->src, + memcpy(&clt_path->s.src_addr, path->src, rdma_addr_size((struct sockaddr *)path->src)); - strscpy(sess->s.sessname, clt->sessname, sizeof(sess->s.sessname)); - sess->clt = clt; - sess->max_pages_per_mr = RTRS_MAX_SEGMENTS; - init_waitqueue_head(&sess->state_wq); - sess->state = RTRS_CLT_CONNECTING; - atomic_set(&sess->connected_cnt, 0); - INIT_WORK(&sess->close_work, rtrs_clt_close_work); - INIT_DELAYED_WORK(&sess->reconnect_dwork, rtrs_clt_reconnect_work); - rtrs_clt_init_hb(sess); - - sess->mp_skip_entry = alloc_percpu(typeof(*sess->mp_skip_entry)); - if (!sess->mp_skip_entry) + strscpy(clt_path->s.sessname, clt->sessname, + sizeof(clt_path->s.sessname)); + clt_path->clt = clt; + clt_path->max_pages_per_mr = RTRS_MAX_SEGMENTS; + init_waitqueue_head(&clt_path->state_wq); + clt_path->state = RTRS_CLT_CONNECTING; + atomic_set(&clt_path->connected_cnt, 0); + INIT_WORK(&clt_path->close_work, rtrs_clt_close_work); + INIT_WORK(&clt_path->err_recovery_work, rtrs_clt_err_recovery_work); + INIT_DELAYED_WORK(&clt_path->reconnect_dwork, rtrs_clt_reconnect_work); + rtrs_clt_init_hb(clt_path); + + clt_path->mp_skip_entry = alloc_percpu(typeof(*clt_path->mp_skip_entry)); + if (!clt_path->mp_skip_entry) goto err_free_stats; for_each_possible_cpu(cpu) - INIT_LIST_HEAD(per_cpu_ptr(sess->mp_skip_entry, cpu)); + INIT_LIST_HEAD(per_cpu_ptr(clt_path->mp_skip_entry, cpu)); - err = rtrs_clt_init_stats(sess->stats); + err = rtrs_clt_init_stats(clt_path->stats); if (err) goto err_free_percpu; - return sess; + return clt_path; err_free_percpu: - free_percpu(sess->mp_skip_entry); + free_percpu(clt_path->mp_skip_entry); err_free_stats: - kfree(sess->stats); + kfree(clt_path->stats); err_free_con: - kfree(sess->s.con); -err_free_sess: - kfree(sess); + kfree(clt_path->s.con); +err_free_path: + kfree(clt_path); err: return ERR_PTR(err); } -void free_sess(struct rtrs_clt_sess *sess) +void free_path(struct rtrs_clt_path *clt_path) { - free_percpu(sess->mp_skip_entry); - mutex_destroy(&sess->init_mutex); - kfree(sess->s.con); - kfree(sess->rbufs); - kfree(sess); + free_percpu(clt_path->mp_skip_entry); + mutex_destroy(&clt_path->init_mutex); + kfree(clt_path->s.con); + kfree(clt_path->rbufs); + kfree(clt_path); } -static int create_con(struct rtrs_clt_sess *sess, unsigned int cid) +static int create_con(struct rtrs_clt_path *clt_path, unsigned int cid) { struct rtrs_clt_con *con; @@ -1601,28 +1620,28 @@ static int create_con(struct rtrs_clt_sess *sess, unsigned int cid) /* Map first two connections to the first CPU */ con->cpu = (cid ? cid - 1 : 0) % nr_cpu_ids; con->c.cid = cid; - con->c.sess = &sess->s; + con->c.path = &clt_path->s; /* Align with srv, init as 1 */ atomic_set(&con->c.wr_cnt, 1); mutex_init(&con->con_mutex); - sess->s.con[cid] = &con->c; + clt_path->s.con[cid] = &con->c; return 0; } static void destroy_con(struct rtrs_clt_con *con) { - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); - sess->s.con[con->c.cid] = NULL; + clt_path->s.con[con->c.cid] = NULL; mutex_destroy(&con->con_mutex); kfree(con); } static int create_con_cq_qp(struct rtrs_clt_con *con) { - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); u32 max_send_wr, max_recv_wr, cq_num, max_send_sge, wr_limit; int err, cq_vector; struct rtrs_msg_rkey_rsp *rsp; @@ -1631,7 +1650,7 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) if (con->c.cid == 0) { max_send_sge = 1; /* We must be the first here */ - if (WARN_ON(sess->s.dev)) + if (WARN_ON(clt_path->s.dev)) return -EINVAL; /* @@ -1639,16 +1658,16 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) * Be careful not to close user connection before ib dev * is gracefully put. */ - sess->s.dev = rtrs_ib_dev_find_or_add(con->c.cm_id->device, + clt_path->s.dev = rtrs_ib_dev_find_or_add(con->c.cm_id->device, &dev_pd); - if (!sess->s.dev) { - rtrs_wrn(sess->clt, + if (!clt_path->s.dev) { + rtrs_wrn(clt_path->clt, "rtrs_ib_dev_find_get_or_add(): no memory\n"); return -ENOMEM; } - sess->s.dev_ref = 1; - query_fast_reg_mode(sess); - wr_limit = sess->s.dev->ib_dev->attrs.max_qp_wr; + clt_path->s.dev_ref = 1; + query_fast_reg_mode(clt_path); + wr_limit = clt_path->s.dev->ib_dev->attrs.max_qp_wr; /* * Two (request + registration) completion for send * Two for recv if always_invalidate is set on server @@ -1665,27 +1684,28 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) * This is always true if user connection (cid == 0) is * established first. */ - if (WARN_ON(!sess->s.dev)) + if (WARN_ON(!clt_path->s.dev)) return -EINVAL; - if (WARN_ON(!sess->queue_depth)) + if (WARN_ON(!clt_path->queue_depth)) return -EINVAL; - wr_limit = sess->s.dev->ib_dev->attrs.max_qp_wr; + wr_limit = clt_path->s.dev->ib_dev->attrs.max_qp_wr; /* Shared between connections */ - sess->s.dev_ref++; + clt_path->s.dev_ref++; max_send_wr = min_t(int, wr_limit, /* QD * (REQ + RSP + FR REGS or INVS) + drain */ - sess->queue_depth * 3 + 1); + clt_path->queue_depth * 3 + 1); max_recv_wr = min_t(int, wr_limit, - sess->queue_depth * 3 + 1); + clt_path->queue_depth * 3 + 1); max_send_sge = 2; } atomic_set(&con->c.sq_wr_avail, max_send_wr); cq_num = max_send_wr + max_recv_wr; /* alloc iu to recv new rkey reply when server reports flags set */ - if (sess->flags & RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) { + if (clt_path->flags & RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) { con->rsp_ius = rtrs_iu_alloc(cq_num, sizeof(*rsp), - GFP_KERNEL, sess->s.dev->ib_dev, + GFP_KERNEL, + clt_path->s.dev->ib_dev, DMA_FROM_DEVICE, rtrs_clt_rdma_done); if (!con->rsp_ius) @@ -1693,13 +1713,13 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) con->queue_num = cq_num; } cq_num = max_send_wr + max_recv_wr; - cq_vector = con->cpu % sess->s.dev->ib_dev->num_comp_vectors; - if (con->c.cid >= sess->s.irq_con_num) - err = rtrs_cq_qp_create(&sess->s, &con->c, max_send_sge, + cq_vector = con->cpu % clt_path->s.dev->ib_dev->num_comp_vectors; + if (con->c.cid >= clt_path->s.irq_con_num) + err = rtrs_cq_qp_create(&clt_path->s, &con->c, max_send_sge, cq_vector, cq_num, max_send_wr, max_recv_wr, IB_POLL_DIRECT); else - err = rtrs_cq_qp_create(&sess->s, &con->c, max_send_sge, + err = rtrs_cq_qp_create(&clt_path->s, &con->c, max_send_sge, cq_vector, cq_num, max_send_wr, max_recv_wr, IB_POLL_SOFTIRQ); /* @@ -1711,7 +1731,7 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) static void destroy_con_cq_qp(struct rtrs_clt_con *con) { - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); /* * Be careful here: destroy_con_cq_qp() can be called even @@ -1720,13 +1740,14 @@ static void destroy_con_cq_qp(struct rtrs_clt_con *con) lockdep_assert_held(&con->con_mutex); rtrs_cq_qp_destroy(&con->c); if (con->rsp_ius) { - rtrs_iu_free(con->rsp_ius, sess->s.dev->ib_dev, con->queue_num); + rtrs_iu_free(con->rsp_ius, clt_path->s.dev->ib_dev, + con->queue_num); con->rsp_ius = NULL; con->queue_num = 0; } - if (sess->s.dev_ref && !--sess->s.dev_ref) { - rtrs_ib_dev_put(sess->s.dev); - sess->s.dev = NULL; + if (clt_path->s.dev_ref && !--clt_path->s.dev_ref) { + rtrs_ib_dev_put(clt_path->s.dev); + clt_path->s.dev = NULL; } } @@ -1745,7 +1766,7 @@ static void destroy_cm(struct rtrs_clt_con *con) static int rtrs_rdma_addr_resolved(struct rtrs_clt_con *con) { - struct rtrs_sess *s = con->c.sess; + struct rtrs_path *s = con->c.path; int err; mutex_lock(&con->con_mutex); @@ -1764,8 +1785,8 @@ static int rtrs_rdma_addr_resolved(struct rtrs_clt_con *con) static int rtrs_rdma_route_resolved(struct rtrs_clt_con *con) { - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); - struct rtrs_clt *clt = sess->clt; + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); + struct rtrs_clt_sess *clt = clt_path->clt; struct rtrs_msg_conn_req msg; struct rdma_conn_param param; @@ -1782,11 +1803,11 @@ static int rtrs_rdma_route_resolved(struct rtrs_clt_con *con) .magic = cpu_to_le16(RTRS_MAGIC), .version = cpu_to_le16(RTRS_PROTO_VER), .cid = cpu_to_le16(con->c.cid), - .cid_num = cpu_to_le16(sess->s.con_num), - .recon_cnt = cpu_to_le16(sess->s.recon_cnt), + .cid_num = cpu_to_le16(clt_path->s.con_num), + .recon_cnt = cpu_to_le16(clt_path->s.recon_cnt), }; - msg.first_conn = sess->for_new_clt ? FIRST_CONN : 0; - uuid_copy(&msg.sess_uuid, &sess->s.uuid); + msg.first_conn = clt_path->for_new_clt ? FIRST_CONN : 0; + uuid_copy(&msg.sess_uuid, &clt_path->s.uuid); uuid_copy(&msg.paths_uuid, &clt->paths_uuid); err = rdma_connect_locked(con->c.cm_id, ¶m); @@ -1799,8 +1820,8 @@ static int rtrs_rdma_route_resolved(struct rtrs_clt_con *con) static int rtrs_rdma_conn_established(struct rtrs_clt_con *con, struct rdma_cm_event *ev) { - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); - struct rtrs_clt *clt = sess->clt; + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); + struct rtrs_clt_sess *clt = clt_path->clt; const struct rtrs_msg_conn_rsp *msg; u16 version, queue_depth; int errno; @@ -1831,31 +1852,32 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con, if (con->c.cid == 0) { queue_depth = le16_to_cpu(msg->queue_depth); - if (sess->queue_depth > 0 && queue_depth != sess->queue_depth) { + if (clt_path->queue_depth > 0 && queue_depth != clt_path->queue_depth) { rtrs_err(clt, "Error: queue depth changed\n"); /* * Stop any more reconnection attempts */ - sess->reconnect_attempts = -1; + clt_path->reconnect_attempts = -1; rtrs_err(clt, "Disabling auto-reconnect. Trigger a manual reconnect after issue is resolved\n"); return -ECONNRESET; } - if (!sess->rbufs) { - sess->rbufs = kcalloc(queue_depth, sizeof(*sess->rbufs), - GFP_KERNEL); - if (!sess->rbufs) + if (!clt_path->rbufs) { + clt_path->rbufs = kcalloc(queue_depth, + sizeof(*clt_path->rbufs), + GFP_KERNEL); + if (!clt_path->rbufs) return -ENOMEM; } - sess->queue_depth = queue_depth; - sess->s.signal_interval = min_not_zero(queue_depth, + clt_path->queue_depth = queue_depth; + clt_path->s.signal_interval = min_not_zero(queue_depth, (unsigned short) SERVICE_CON_QUEUE_DEPTH); - sess->max_hdr_size = le32_to_cpu(msg->max_hdr_size); - sess->max_io_size = le32_to_cpu(msg->max_io_size); - sess->flags = le32_to_cpu(msg->flags); - sess->chunk_size = sess->max_io_size + sess->max_hdr_size; + clt_path->max_hdr_size = le32_to_cpu(msg->max_hdr_size); + clt_path->max_io_size = le32_to_cpu(msg->max_io_size); + clt_path->flags = le32_to_cpu(msg->flags); + clt_path->chunk_size = clt_path->max_io_size + clt_path->max_hdr_size; /* * Global IO size is always a minimum. @@ -1866,20 +1888,20 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con, * connections in parallel, use lock. */ mutex_lock(&clt->paths_mutex); - clt->queue_depth = sess->queue_depth; - clt->max_io_size = min_not_zero(sess->max_io_size, + clt->queue_depth = clt_path->queue_depth; + clt->max_io_size = min_not_zero(clt_path->max_io_size, clt->max_io_size); mutex_unlock(&clt->paths_mutex); /* * Cache the hca_port and hca_name for sysfs */ - sess->hca_port = con->c.cm_id->port_num; - scnprintf(sess->hca_name, sizeof(sess->hca_name), - sess->s.dev->ib_dev->name); - sess->s.src_addr = con->c.cm_id->route.addr.src_addr; + clt_path->hca_port = con->c.cm_id->port_num; + scnprintf(clt_path->hca_name, sizeof(clt_path->hca_name), + clt_path->s.dev->ib_dev->name); + clt_path->s.src_addr = con->c.cm_id->route.addr.src_addr; /* set for_new_clt, to allow future reconnect on any path */ - sess->for_new_clt = 1; + clt_path->for_new_clt = 1; } return 0; @@ -1887,16 +1909,16 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con, static inline void flag_success_on_conn(struct rtrs_clt_con *con) { - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); - atomic_inc(&sess->connected_cnt); + atomic_inc(&clt_path->connected_cnt); con->cm_err = 1; } static int rtrs_rdma_conn_rejected(struct rtrs_clt_con *con, struct rdma_cm_event *ev) { - struct rtrs_sess *s = con->c.sess; + struct rtrs_path *s = con->c.path; const struct rtrs_msg_conn_rsp *msg; const char *rej_msg; int status, errno; @@ -1924,23 +1946,25 @@ static int rtrs_rdma_conn_rejected(struct rtrs_clt_con *con, return -ECONNRESET; } -void rtrs_clt_close_conns(struct rtrs_clt_sess *sess, bool wait) +void rtrs_clt_close_conns(struct rtrs_clt_path *clt_path, bool wait) { - if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_CLOSING, NULL)) - queue_work(rtrs_wq, &sess->close_work); + trace_rtrs_clt_close_conns(clt_path); + + if (rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CLOSING, NULL)) + queue_work(rtrs_wq, &clt_path->close_work); if (wait) - flush_work(&sess->close_work); + flush_work(&clt_path->close_work); } static inline void flag_error_on_conn(struct rtrs_clt_con *con, int cm_err) { if (con->cm_err == 1) { - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; - sess = to_clt_sess(con->c.sess); - if (atomic_dec_and_test(&sess->connected_cnt)) + clt_path = to_clt_path(con->c.path); + if (atomic_dec_and_test(&clt_path->connected_cnt)) - wake_up(&sess->state_wq); + wake_up(&clt_path->state_wq); } con->cm_err = cm_err; } @@ -1949,8 +1973,8 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *ev) { struct rtrs_clt_con *con = cm_id->context; - struct rtrs_sess *s = con->c.sess; - struct rtrs_clt_sess *sess = to_clt_sess(s); + struct rtrs_path *s = con->c.path; + struct rtrs_clt_path *clt_path = to_clt_path(s); int cm_err = 0; switch (ev->event) { @@ -1968,7 +1992,7 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id, * i.e. wake up without state change, but we set cm_err. */ flag_success_on_conn(con); - wake_up(&sess->state_wq); + wake_up(&clt_path->state_wq); return 0; } break; @@ -1997,7 +2021,7 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id, /* * Device removal is a special case. Queue close and return 0. */ - rtrs_clt_close_conns(sess, false); + rtrs_clt_close_conns(clt_path, false); return 0; default: rtrs_err(s, "Unexpected RDMA CM error (CM event: %s, err: %d)\n", @@ -2020,13 +2044,13 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id, static int create_cm(struct rtrs_clt_con *con) { - struct rtrs_sess *s = con->c.sess; - struct rtrs_clt_sess *sess = to_clt_sess(s); + struct rtrs_path *s = con->c.path; + struct rtrs_clt_path *clt_path = to_clt_path(s); struct rdma_cm_id *cm_id; int err; cm_id = rdma_create_id(&init_net, rtrs_clt_rdma_cm_handler, con, - sess->s.dst_addr.ss_family == AF_IB ? + clt_path->s.dst_addr.ss_family == AF_IB ? RDMA_PS_IB : RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) { err = PTR_ERR(cm_id); @@ -2042,8 +2066,8 @@ static int create_cm(struct rtrs_clt_con *con) rtrs_err(s, "Set address reuse failed, err: %d\n", err); goto destroy_cm; } - err = rdma_resolve_addr(cm_id, (struct sockaddr *)&sess->s.src_addr, - (struct sockaddr *)&sess->s.dst_addr, + err = rdma_resolve_addr(cm_id, (struct sockaddr *)&clt_path->s.src_addr, + (struct sockaddr *)&clt_path->s.dst_addr, RTRS_CONNECT_TIMEOUT_MS); if (err) { rtrs_err(s, "Failed to resolve address, err: %d\n", err); @@ -2055,8 +2079,8 @@ static int create_cm(struct rtrs_clt_con *con) * or session state was really changed to error by device removal. */ err = wait_event_interruptible_timeout( - sess->state_wq, - con->cm_err || sess->state != RTRS_CLT_CONNECTING, + clt_path->state_wq, + con->cm_err || clt_path->state != RTRS_CLT_CONNECTING, msecs_to_jiffies(RTRS_CONNECT_TIMEOUT_MS)); if (err == 0 || err == -ERESTARTSYS) { if (err == 0) @@ -2068,7 +2092,7 @@ static int create_cm(struct rtrs_clt_con *con) err = con->cm_err; goto errr; } - if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTING) { + if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTING) { /* Device removal */ err = -ECONNABORTED; goto errr; @@ -2087,9 +2111,9 @@ destroy_cm: return err; } -static void rtrs_clt_sess_up(struct rtrs_clt_sess *sess) +static void rtrs_clt_path_up(struct rtrs_clt_path *clt_path) { - struct rtrs_clt *clt = sess->clt; + struct rtrs_clt_sess *clt = clt_path->clt; int up; /* @@ -2113,19 +2137,19 @@ static void rtrs_clt_sess_up(struct rtrs_clt_sess *sess) mutex_unlock(&clt->paths_ev_mutex); /* Mark session as established */ - sess->established = true; - sess->reconnect_attempts = 0; - sess->stats->reconnects.successful_cnt++; + clt_path->established = true; + clt_path->reconnect_attempts = 0; + clt_path->stats->reconnects.successful_cnt++; } -static void rtrs_clt_sess_down(struct rtrs_clt_sess *sess) +static void rtrs_clt_path_down(struct rtrs_clt_path *clt_path) { - struct rtrs_clt *clt = sess->clt; + struct rtrs_clt_sess *clt = clt_path->clt; - if (!sess->established) + if (!clt_path->established) return; - sess->established = false; + clt_path->established = false; mutex_lock(&clt->paths_ev_mutex); WARN_ON(!clt->paths_up); if (--clt->paths_up == 0) @@ -2133,19 +2157,19 @@ static void rtrs_clt_sess_down(struct rtrs_clt_sess *sess) mutex_unlock(&clt->paths_ev_mutex); } -static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess) +static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_path *clt_path) { struct rtrs_clt_con *con; unsigned int cid; - WARN_ON(READ_ONCE(sess->state) == RTRS_CLT_CONNECTED); + WARN_ON(READ_ONCE(clt_path->state) == RTRS_CLT_CONNECTED); /* * Possible race with rtrs_clt_open(), when DEVICE_REMOVAL comes * exactly in between. Start destroying after it finishes. */ - mutex_lock(&sess->init_mutex); - mutex_unlock(&sess->init_mutex); + mutex_lock(&clt_path->init_mutex); + mutex_unlock(&clt_path->init_mutex); /* * All IO paths must observe !CONNECTED state before we @@ -2153,7 +2177,7 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess) */ synchronize_rcu(); - rtrs_stop_hb(&sess->s); + rtrs_stop_hb(&clt_path->s); /* * The order it utterly crucial: firstly disconnect and complete all @@ -2162,15 +2186,15 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess) * eventually notify upper layer about session disconnection. */ - for (cid = 0; cid < sess->s.con_num; cid++) { - if (!sess->s.con[cid]) + for (cid = 0; cid < clt_path->s.con_num; cid++) { + if (!clt_path->s.con[cid]) break; - con = to_clt_con(sess->s.con[cid]); + con = to_clt_con(clt_path->s.con[cid]); stop_cm(con); } - fail_all_outstanding_reqs(sess); - free_sess_reqs(sess); - rtrs_clt_sess_down(sess); + fail_all_outstanding_reqs(clt_path); + free_path_reqs(clt_path); + rtrs_clt_path_down(clt_path); /* * Wait for graceful shutdown, namely when peer side invokes @@ -2180,13 +2204,14 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess) * since CM does not fire anything. That is fine, we are not in * hurry. */ - wait_event_timeout(sess->state_wq, !atomic_read(&sess->connected_cnt), + wait_event_timeout(clt_path->state_wq, + !atomic_read(&clt_path->connected_cnt), msecs_to_jiffies(RTRS_CONNECT_TIMEOUT_MS)); - for (cid = 0; cid < sess->s.con_num; cid++) { - if (!sess->s.con[cid]) + for (cid = 0; cid < clt_path->s.con_num; cid++) { + if (!clt_path->s.con[cid]) break; - con = to_clt_con(sess->s.con[cid]); + con = to_clt_con(clt_path->s.con[cid]); mutex_lock(&con->con_mutex); destroy_con_cq_qp(con); mutex_unlock(&con->con_mutex); @@ -2195,26 +2220,15 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess) } } -static inline bool xchg_sessions(struct rtrs_clt_sess __rcu **rcu_ppcpu_path, - struct rtrs_clt_sess *sess, - struct rtrs_clt_sess *next) -{ - struct rtrs_clt_sess **ppcpu_path; - - /* Call cmpxchg() without sparse warnings */ - ppcpu_path = (typeof(ppcpu_path))rcu_ppcpu_path; - return sess == cmpxchg(ppcpu_path, sess, next); -} - -static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess) +static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_path *clt_path) { - struct rtrs_clt *clt = sess->clt; - struct rtrs_clt_sess *next; + struct rtrs_clt_sess *clt = clt_path->clt; + struct rtrs_clt_path *next; bool wait_for_grace = false; int cpu; mutex_lock(&clt->paths_mutex); - list_del_rcu(&sess->s.entry); + list_del_rcu(&clt_path->s.entry); /* Make sure everybody observes path removal. */ synchronize_rcu(); @@ -2255,8 +2269,7 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess) * removed. If @sess is the last element, then @next is NULL. */ rcu_read_lock(); - next = list_next_or_null_rr_rcu(&clt->paths_list, &sess->s.entry, - typeof(*next), s.entry); + next = rtrs_clt_get_next_path_or_null(&clt->paths_list, clt_path); rcu_read_unlock(); /* @@ -2264,11 +2277,11 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess) * removed, so change the pointer manually. */ for_each_possible_cpu(cpu) { - struct rtrs_clt_sess __rcu **ppcpu_path; + struct rtrs_clt_path __rcu **ppcpu_path; ppcpu_path = per_cpu_ptr(clt->pcpu_path, cpu); if (rcu_dereference_protected(*ppcpu_path, - lockdep_is_held(&clt->paths_mutex)) != sess) + lockdep_is_held(&clt->paths_mutex)) != clt_path) /* * synchronize_rcu() was called just after deleting * entry from the list, thus IO code path cannot @@ -2281,7 +2294,8 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess) * We race with IO code path, which also changes pointer, * thus we have to be careful not to overwrite it. */ - if (xchg_sessions(ppcpu_path, sess, next)) + if (try_cmpxchg((struct rtrs_clt_path **)ppcpu_path, &clt_path, + next)) /* * @ppcpu_path was successfully replaced with @next, * that means that someone could also pick up the @@ -2296,29 +2310,30 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess) mutex_unlock(&clt->paths_mutex); } -static void rtrs_clt_add_path_to_arr(struct rtrs_clt_sess *sess) +static void rtrs_clt_add_path_to_arr(struct rtrs_clt_path *clt_path) { - struct rtrs_clt *clt = sess->clt; + struct rtrs_clt_sess *clt = clt_path->clt; mutex_lock(&clt->paths_mutex); clt->paths_num++; - list_add_tail_rcu(&sess->s.entry, &clt->paths_list); + list_add_tail_rcu(&clt_path->s.entry, &clt->paths_list); mutex_unlock(&clt->paths_mutex); } static void rtrs_clt_close_work(struct work_struct *work) { - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; - sess = container_of(work, struct rtrs_clt_sess, close_work); + clt_path = container_of(work, struct rtrs_clt_path, close_work); - cancel_delayed_work_sync(&sess->reconnect_dwork); - rtrs_clt_stop_and_destroy_conns(sess); - rtrs_clt_change_state_get_old(sess, RTRS_CLT_CLOSED, NULL); + cancel_work_sync(&clt_path->err_recovery_work); + cancel_delayed_work_sync(&clt_path->reconnect_dwork); + rtrs_clt_stop_and_destroy_conns(clt_path); + rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CLOSED, NULL); } -static int init_conns(struct rtrs_clt_sess *sess) +static int init_conns(struct rtrs_clt_path *clt_path) { unsigned int cid; int err; @@ -2328,31 +2343,31 @@ static int init_conns(struct rtrs_clt_sess *sess) * to avoid clashes with previous sessions not yet closed * sessions on a server side. */ - sess->s.recon_cnt++; + clt_path->s.recon_cnt++; /* Establish all RDMA connections */ - for (cid = 0; cid < sess->s.con_num; cid++) { - err = create_con(sess, cid); + for (cid = 0; cid < clt_path->s.con_num; cid++) { + err = create_con(clt_path, cid); if (err) goto destroy; - err = create_cm(to_clt_con(sess->s.con[cid])); + err = create_cm(to_clt_con(clt_path->s.con[cid])); if (err) { - destroy_con(to_clt_con(sess->s.con[cid])); + destroy_con(to_clt_con(clt_path->s.con[cid])); goto destroy; } } - err = alloc_sess_reqs(sess); + err = alloc_path_reqs(clt_path); if (err) goto destroy; - rtrs_start_hb(&sess->s); + rtrs_start_hb(&clt_path->s); return 0; destroy: while (cid--) { - struct rtrs_clt_con *con = to_clt_con(sess->s.con[cid]); + struct rtrs_clt_con *con = to_clt_con(clt_path->s.con[cid]); stop_cm(con); @@ -2367,7 +2382,7 @@ destroy: * doing rdma_resolve_addr(), switch to CONNECTION_ERR state * manually to keep reconnecting. */ - rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL); + rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CONNECTING_ERR, NULL); return err; } @@ -2375,31 +2390,32 @@ destroy: static void rtrs_clt_info_req_done(struct ib_cq *cq, struct ib_wc *wc) { struct rtrs_clt_con *con = to_clt_con(wc->qp->qp_context); - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); struct rtrs_iu *iu; iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); - rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); + rtrs_iu_free(iu, clt_path->s.dev->ib_dev, 1); if (wc->status != IB_WC_SUCCESS) { - rtrs_err(sess->clt, "Sess info request send failed: %s\n", + rtrs_err(clt_path->clt, "Path info request send failed: %s\n", ib_wc_status_msg(wc->status)); - rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL); + rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CONNECTING_ERR, NULL); return; } rtrs_clt_update_wc_stats(con); } -static int process_info_rsp(struct rtrs_clt_sess *sess, +static int process_info_rsp(struct rtrs_clt_path *clt_path, const struct rtrs_msg_info_rsp *msg) { unsigned int sg_cnt, total_len; int i, sgi; sg_cnt = le16_to_cpu(msg->sg_cnt); - if (!sg_cnt || (sess->queue_depth % sg_cnt)) { - rtrs_err(sess->clt, "Incorrect sg_cnt %d, is not multiple\n", + if (!sg_cnt || (clt_path->queue_depth % sg_cnt)) { + rtrs_err(clt_path->clt, + "Incorrect sg_cnt %d, is not multiple\n", sg_cnt); return -EINVAL; } @@ -2408,15 +2424,15 @@ static int process_info_rsp(struct rtrs_clt_sess *sess, * Check if IB immediate data size is enough to hold the mem_id and * the offset inside the memory chunk. */ - if ((ilog2(sg_cnt - 1) + 1) + (ilog2(sess->chunk_size - 1) + 1) > + if ((ilog2(sg_cnt - 1) + 1) + (ilog2(clt_path->chunk_size - 1) + 1) > MAX_IMM_PAYL_BITS) { - rtrs_err(sess->clt, + rtrs_err(clt_path->clt, "RDMA immediate size (%db) not enough to encode %d buffers of size %dB\n", - MAX_IMM_PAYL_BITS, sg_cnt, sess->chunk_size); + MAX_IMM_PAYL_BITS, sg_cnt, clt_path->chunk_size); return -EINVAL; } total_len = 0; - for (sgi = 0, i = 0; sgi < sg_cnt && i < sess->queue_depth; sgi++) { + for (sgi = 0, i = 0; sgi < sg_cnt && i < clt_path->queue_depth; sgi++) { const struct rtrs_sg_desc *desc = &msg->desc[sgi]; u32 len, rkey; u64 addr; @@ -2427,26 +2443,28 @@ static int process_info_rsp(struct rtrs_clt_sess *sess, total_len += len; - if (!len || (len % sess->chunk_size)) { - rtrs_err(sess->clt, "Incorrect [%d].len %d\n", sgi, + if (!len || (len % clt_path->chunk_size)) { + rtrs_err(clt_path->clt, "Incorrect [%d].len %d\n", + sgi, len); return -EINVAL; } - for ( ; len && i < sess->queue_depth; i++) { - sess->rbufs[i].addr = addr; - sess->rbufs[i].rkey = rkey; + for ( ; len && i < clt_path->queue_depth; i++) { + clt_path->rbufs[i].addr = addr; + clt_path->rbufs[i].rkey = rkey; - len -= sess->chunk_size; - addr += sess->chunk_size; + len -= clt_path->chunk_size; + addr += clt_path->chunk_size; } } /* Sanity check */ - if (sgi != sg_cnt || i != sess->queue_depth) { - rtrs_err(sess->clt, "Incorrect sg vector, not fully mapped\n"); + if (sgi != sg_cnt || i != clt_path->queue_depth) { + rtrs_err(clt_path->clt, + "Incorrect sg vector, not fully mapped\n"); return -EINVAL; } - if (total_len != sess->chunk_size * sess->queue_depth) { - rtrs_err(sess->clt, "Incorrect total_len %d\n", total_len); + if (total_len != clt_path->chunk_size * clt_path->queue_depth) { + rtrs_err(clt_path->clt, "Incorrect total_len %d\n", total_len); return -EINVAL; } @@ -2456,7 +2474,7 @@ static int process_info_rsp(struct rtrs_clt_sess *sess, static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) { struct rtrs_clt_con *con = to_clt_con(wc->qp->qp_context); - struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); + struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); struct rtrs_msg_info_rsp *msg; enum rtrs_clt_state state; struct rtrs_iu *iu; @@ -2468,37 +2486,37 @@ static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) WARN_ON(con->c.cid); iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); if (wc->status != IB_WC_SUCCESS) { - rtrs_err(sess->clt, "Sess info response recv failed: %s\n", + rtrs_err(clt_path->clt, "Path info response recv failed: %s\n", ib_wc_status_msg(wc->status)); goto out; } WARN_ON(wc->opcode != IB_WC_RECV); if (wc->byte_len < sizeof(*msg)) { - rtrs_err(sess->clt, "Sess info response is malformed: size %d\n", + rtrs_err(clt_path->clt, "Path info response is malformed: size %d\n", wc->byte_len); goto out; } - ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, iu->dma_addr, + ib_dma_sync_single_for_cpu(clt_path->s.dev->ib_dev, iu->dma_addr, iu->size, DMA_FROM_DEVICE); msg = iu->buf; if (le16_to_cpu(msg->type) != RTRS_MSG_INFO_RSP) { - rtrs_err(sess->clt, "Sess info response is malformed: type %d\n", + rtrs_err(clt_path->clt, "Path info response is malformed: type %d\n", le16_to_cpu(msg->type)); goto out; } rx_sz = sizeof(*msg); rx_sz += sizeof(msg->desc[0]) * le16_to_cpu(msg->sg_cnt); if (wc->byte_len < rx_sz) { - rtrs_err(sess->clt, "Sess info response is malformed: size %d\n", + rtrs_err(clt_path->clt, "Path info response is malformed: size %d\n", wc->byte_len); goto out; } - err = process_info_rsp(sess, msg); + err = process_info_rsp(clt_path, msg); if (err) goto out; - err = post_recv_sess(sess); + err = post_recv_path(clt_path); if (err) goto out; @@ -2506,25 +2524,25 @@ static void rtrs_clt_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) out: rtrs_clt_update_wc_stats(con); - rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); - rtrs_clt_change_state_get_old(sess, state, NULL); + rtrs_iu_free(iu, clt_path->s.dev->ib_dev, 1); + rtrs_clt_change_state_get_old(clt_path, state, NULL); } -static int rtrs_send_sess_info(struct rtrs_clt_sess *sess) +static int rtrs_send_path_info(struct rtrs_clt_path *clt_path) { - struct rtrs_clt_con *usr_con = to_clt_con(sess->s.con[0]); + struct rtrs_clt_con *usr_con = to_clt_con(clt_path->s.con[0]); struct rtrs_msg_info_req *msg; struct rtrs_iu *tx_iu, *rx_iu; size_t rx_sz; int err; rx_sz = sizeof(struct rtrs_msg_info_rsp); - rx_sz += sizeof(struct rtrs_sg_desc) * sess->queue_depth; + rx_sz += sizeof(struct rtrs_sg_desc) * clt_path->queue_depth; tx_iu = rtrs_iu_alloc(1, sizeof(struct rtrs_msg_info_req), GFP_KERNEL, - sess->s.dev->ib_dev, DMA_TO_DEVICE, + clt_path->s.dev->ib_dev, DMA_TO_DEVICE, rtrs_clt_info_req_done); - rx_iu = rtrs_iu_alloc(1, rx_sz, GFP_KERNEL, sess->s.dev->ib_dev, + rx_iu = rtrs_iu_alloc(1, rx_sz, GFP_KERNEL, clt_path->s.dev->ib_dev, DMA_FROM_DEVICE, rtrs_clt_info_rsp_done); if (!tx_iu || !rx_iu) { err = -ENOMEM; @@ -2533,33 +2551,34 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess) /* Prepare for getting info response */ err = rtrs_iu_post_recv(&usr_con->c, rx_iu); if (err) { - rtrs_err(sess->clt, "rtrs_iu_post_recv(), err: %d\n", err); + rtrs_err(clt_path->clt, "rtrs_iu_post_recv(), err: %d\n", err); goto out; } rx_iu = NULL; msg = tx_iu->buf; msg->type = cpu_to_le16(RTRS_MSG_INFO_REQ); - memcpy(msg->sessname, sess->s.sessname, sizeof(msg->sessname)); + memcpy(msg->pathname, clt_path->s.sessname, sizeof(msg->pathname)); - ib_dma_sync_single_for_device(sess->s.dev->ib_dev, tx_iu->dma_addr, + ib_dma_sync_single_for_device(clt_path->s.dev->ib_dev, + tx_iu->dma_addr, tx_iu->size, DMA_TO_DEVICE); /* Send info request */ err = rtrs_iu_post_send(&usr_con->c, tx_iu, sizeof(*msg), NULL); if (err) { - rtrs_err(sess->clt, "rtrs_iu_post_send(), err: %d\n", err); + rtrs_err(clt_path->clt, "rtrs_iu_post_send(), err: %d\n", err); goto out; } tx_iu = NULL; /* Wait for state change */ - wait_event_interruptible_timeout(sess->state_wq, - sess->state != RTRS_CLT_CONNECTING, + wait_event_interruptible_timeout(clt_path->state_wq, + clt_path->state != RTRS_CLT_CONNECTING, msecs_to_jiffies( RTRS_CONNECT_TIMEOUT_MS)); - if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED) { - if (READ_ONCE(sess->state) == RTRS_CLT_CONNECTING_ERR) + if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTED) { + if (READ_ONCE(clt_path->state) == RTRS_CLT_CONNECTING_ERR) err = -ECONNRESET; else err = -ETIMEDOUT; @@ -2567,82 +2586,81 @@ static int rtrs_send_sess_info(struct rtrs_clt_sess *sess) out: if (tx_iu) - rtrs_iu_free(tx_iu, sess->s.dev->ib_dev, 1); + rtrs_iu_free(tx_iu, clt_path->s.dev->ib_dev, 1); if (rx_iu) - rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1); + rtrs_iu_free(rx_iu, clt_path->s.dev->ib_dev, 1); if (err) /* If we've never taken async path because of malloc problems */ - rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING_ERR, NULL); + rtrs_clt_change_state_get_old(clt_path, + RTRS_CLT_CONNECTING_ERR, NULL); return err; } /** - * init_sess() - establishes all session connections and does handshake - * @sess: client session. + * init_path() - establishes all path connections and does handshake + * @clt_path: client path. * In case of error full close or reconnect procedure should be taken, * because reconnect or close async works can be started. */ -static int init_sess(struct rtrs_clt_sess *sess) +static int init_path(struct rtrs_clt_path *clt_path) { int err; char str[NAME_MAX]; struct rtrs_addr path = { - .src = &sess->s.src_addr, - .dst = &sess->s.dst_addr, + .src = &clt_path->s.src_addr, + .dst = &clt_path->s.dst_addr, }; rtrs_addr_to_str(&path, str, sizeof(str)); - mutex_lock(&sess->init_mutex); - err = init_conns(sess); + mutex_lock(&clt_path->init_mutex); + err = init_conns(clt_path); if (err) { - rtrs_err(sess->clt, + rtrs_err(clt_path->clt, "init_conns() failed: err=%d path=%s [%s:%u]\n", err, - str, sess->hca_name, sess->hca_port); + str, clt_path->hca_name, clt_path->hca_port); goto out; } - err = rtrs_send_sess_info(sess); + err = rtrs_send_path_info(clt_path); if (err) { - rtrs_err( - sess->clt, - "rtrs_send_sess_info() failed: err=%d path=%s [%s:%u]\n", - err, str, sess->hca_name, sess->hca_port); + rtrs_err(clt_path->clt, + "rtrs_send_path_info() failed: err=%d path=%s [%s:%u]\n", + err, str, clt_path->hca_name, clt_path->hca_port); goto out; } - rtrs_clt_sess_up(sess); + rtrs_clt_path_up(clt_path); out: - mutex_unlock(&sess->init_mutex); + mutex_unlock(&clt_path->init_mutex); return err; } static void rtrs_clt_reconnect_work(struct work_struct *work) { - struct rtrs_clt_sess *sess; - struct rtrs_clt *clt; - unsigned int delay_ms; + struct rtrs_clt_path *clt_path; + struct rtrs_clt_sess *clt; int err; - sess = container_of(to_delayed_work(work), struct rtrs_clt_sess, - reconnect_dwork); - clt = sess->clt; + clt_path = container_of(to_delayed_work(work), struct rtrs_clt_path, + reconnect_dwork); + clt = clt_path->clt; - if (READ_ONCE(sess->state) != RTRS_CLT_RECONNECTING) + trace_rtrs_clt_reconnect_work(clt_path); + + if (READ_ONCE(clt_path->state) != RTRS_CLT_RECONNECTING) return; - if (sess->reconnect_attempts >= clt->max_reconnect_attempts) { - /* Close a session completely if max attempts is reached */ - rtrs_clt_close_conns(sess, false); + if (clt_path->reconnect_attempts >= clt->max_reconnect_attempts) { + /* Close a path completely if max attempts is reached */ + rtrs_clt_close_conns(clt_path, false); return; } - sess->reconnect_attempts++; + clt_path->reconnect_attempts++; - /* Stop everything */ - rtrs_clt_stop_and_destroy_conns(sess); msleep(RTRS_RECONNECT_BACKOFF); - if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_CONNECTING, NULL)) { - err = init_sess(sess); + if (rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CONNECTING, NULL)) { + err = init_path(clt_path); if (err) goto reconnect_again; } @@ -2650,31 +2668,30 @@ static void rtrs_clt_reconnect_work(struct work_struct *work) return; reconnect_again: - if (rtrs_clt_change_state_get_old(sess, RTRS_CLT_RECONNECTING, NULL)) { - sess->stats->reconnects.fail_cnt++; - delay_ms = clt->reconnect_delay_sec * 1000; - queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, - msecs_to_jiffies(delay_ms + - prandom_u32() % - RTRS_RECONNECT_SEED)); + if (rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_RECONNECTING, NULL)) { + clt_path->stats->reconnects.fail_cnt++; + queue_work(rtrs_wq, &clt_path->err_recovery_work); } } static void rtrs_clt_dev_release(struct device *dev) { - struct rtrs_clt *clt = container_of(dev, struct rtrs_clt, dev); + struct rtrs_clt_sess *clt = container_of(dev, struct rtrs_clt_sess, + dev); + mutex_destroy(&clt->paths_ev_mutex); + mutex_destroy(&clt->paths_mutex); kfree(clt); } -static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num, +static struct rtrs_clt_sess *alloc_clt(const char *sessname, size_t paths_num, u16 port, size_t pdu_sz, void *priv, void (*link_ev)(void *priv, enum rtrs_clt_link_ev ev), unsigned int reconnect_delay_sec, unsigned int max_reconnect_attempts) { - struct rtrs_clt *clt; + struct rtrs_clt_sess *clt; int err; if (!paths_num || paths_num > MAX_PATHS_NUM) @@ -2693,6 +2710,8 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num, return ERR_PTR(-ENOMEM); } + clt->dev.class = rtrs_clt_dev_class; + clt->dev.release = rtrs_clt_dev_release; uuid_gen(&clt->paths_uuid); INIT_LIST_HEAD_RCU(&clt->paths_list); clt->paths_num = paths_num; @@ -2709,60 +2728,58 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num, init_waitqueue_head(&clt->permits_wait); mutex_init(&clt->paths_ev_mutex); mutex_init(&clt->paths_mutex); + device_initialize(&clt->dev); - clt->dev.class = rtrs_clt_dev_class; - clt->dev.release = rtrs_clt_dev_release; err = dev_set_name(&clt->dev, "%s", sessname); if (err) - goto err; + goto err_put; + /* * Suppress user space notification until * sysfs files are created */ dev_set_uevent_suppress(&clt->dev, true); - err = device_register(&clt->dev); - if (err) { - put_device(&clt->dev); - goto err; - } + err = device_add(&clt->dev); + if (err) + goto err_put; clt->kobj_paths = kobject_create_and_add("paths", &clt->dev.kobj); if (!clt->kobj_paths) { err = -ENOMEM; - goto err_dev; + goto err_del; } err = rtrs_clt_create_sysfs_root_files(clt); if (err) { kobject_del(clt->kobj_paths); kobject_put(clt->kobj_paths); - goto err_dev; + goto err_del; } dev_set_uevent_suppress(&clt->dev, false); kobject_uevent(&clt->dev.kobj, KOBJ_ADD); return clt; -err_dev: - device_unregister(&clt->dev); -err: +err_del: + device_del(&clt->dev); +err_put: free_percpu(clt->pcpu_path); - kfree(clt); + put_device(&clt->dev); return ERR_PTR(err); } -static void free_clt(struct rtrs_clt *clt) +static void free_clt(struct rtrs_clt_sess *clt) { - free_permits(clt); free_percpu(clt->pcpu_path); - mutex_destroy(&clt->paths_ev_mutex); - mutex_destroy(&clt->paths_mutex); - /* release callback will free clt in last put */ + + /* + * release callback will free clt and destroy mutexes in last put + */ device_unregister(&clt->dev); } /** - * rtrs_clt_open() - Open a session to an RTRS server + * rtrs_clt_open() - Open a path to an RTRS server * @ops: holds the link event callback and the private pointer. - * @sessname: name of the session + * @pathname: name of the path to an RTRS server * @paths: Paths to be established defined by their src and dst addresses * @paths_num: Number of elements in the @paths array * @port: port to be used by the RTRS session @@ -2777,24 +2794,24 @@ static void free_clt(struct rtrs_clt *clt) * * Return a valid pointer on success otherwise PTR_ERR. */ -struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, - const char *sessname, +struct rtrs_clt_sess *rtrs_clt_open(struct rtrs_clt_ops *ops, + const char *pathname, const struct rtrs_addr *paths, size_t paths_num, u16 port, size_t pdu_sz, u8 reconnect_delay_sec, s16 max_reconnect_attempts, u32 nr_poll_queues) { - struct rtrs_clt_sess *sess, *tmp; - struct rtrs_clt *clt; + struct rtrs_clt_path *clt_path, *tmp; + struct rtrs_clt_sess *clt; int err, i; - if (strchr(sessname, '/') || strchr(sessname, '.')) { - pr_err("sessname cannot contain / and .\n"); + if (strchr(pathname, '/') || strchr(pathname, '.')) { + pr_err("pathname cannot contain / and .\n"); err = -EINVAL; goto out; } - clt = alloc_clt(sessname, paths_num, port, pdu_sz, ops->priv, + clt = alloc_clt(pathname, paths_num, port, pdu_sz, ops->priv, ops->link_ev, reconnect_delay_sec, max_reconnect_attempts); @@ -2803,49 +2820,49 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, goto out; } for (i = 0; i < paths_num; i++) { - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; - sess = alloc_sess(clt, &paths[i], nr_cpu_ids, + clt_path = alloc_path(clt, &paths[i], nr_cpu_ids, nr_poll_queues); - if (IS_ERR(sess)) { - err = PTR_ERR(sess); - goto close_all_sess; + if (IS_ERR(clt_path)) { + err = PTR_ERR(clt_path); + goto close_all_path; } if (!i) - sess->for_new_clt = 1; - list_add_tail_rcu(&sess->s.entry, &clt->paths_list); + clt_path->for_new_clt = 1; + list_add_tail_rcu(&clt_path->s.entry, &clt->paths_list); - err = init_sess(sess); + err = init_path(clt_path); if (err) { - list_del_rcu(&sess->s.entry); - rtrs_clt_close_conns(sess, true); - free_percpu(sess->stats->pcpu_stats); - kfree(sess->stats); - free_sess(sess); - goto close_all_sess; + list_del_rcu(&clt_path->s.entry); + rtrs_clt_close_conns(clt_path, true); + free_percpu(clt_path->stats->pcpu_stats); + kfree(clt_path->stats); + free_path(clt_path); + goto close_all_path; } - err = rtrs_clt_create_sess_files(sess); + err = rtrs_clt_create_path_files(clt_path); if (err) { - list_del_rcu(&sess->s.entry); - rtrs_clt_close_conns(sess, true); - free_percpu(sess->stats->pcpu_stats); - kfree(sess->stats); - free_sess(sess); - goto close_all_sess; + list_del_rcu(&clt_path->s.entry); + rtrs_clt_close_conns(clt_path, true); + free_percpu(clt_path->stats->pcpu_stats); + kfree(clt_path->stats); + free_path(clt_path); + goto close_all_path; } } err = alloc_permits(clt); if (err) - goto close_all_sess; + goto close_all_path; return clt; -close_all_sess: - list_for_each_entry_safe(sess, tmp, &clt->paths_list, s.entry) { - rtrs_clt_destroy_sess_files(sess, NULL); - rtrs_clt_close_conns(sess, true); - kobject_put(&sess->kobj); +close_all_path: + list_for_each_entry_safe(clt_path, tmp, &clt->paths_list, s.entry) { + rtrs_clt_destroy_path_files(clt_path, NULL); + rtrs_clt_close_conns(clt_path, true); + kobject_put(&clt_path->kobj); } rtrs_clt_destroy_sysfs_root(clt); free_clt(clt); @@ -2856,37 +2873,40 @@ out: EXPORT_SYMBOL(rtrs_clt_open); /** - * rtrs_clt_close() - Close a session + * rtrs_clt_close() - Close a path * @clt: Session handle. Session is freed upon return. */ -void rtrs_clt_close(struct rtrs_clt *clt) +void rtrs_clt_close(struct rtrs_clt_sess *clt) { - struct rtrs_clt_sess *sess, *tmp; + struct rtrs_clt_path *clt_path, *tmp; /* Firstly forbid sysfs access */ rtrs_clt_destroy_sysfs_root(clt); /* Now it is safe to iterate over all paths without locks */ - list_for_each_entry_safe(sess, tmp, &clt->paths_list, s.entry) { - rtrs_clt_close_conns(sess, true); - rtrs_clt_destroy_sess_files(sess, NULL); - kobject_put(&sess->kobj); + list_for_each_entry_safe(clt_path, tmp, &clt->paths_list, s.entry) { + rtrs_clt_close_conns(clt_path, true); + rtrs_clt_destroy_path_files(clt_path, NULL); + kobject_put(&clt_path->kobj); } + free_permits(clt); free_clt(clt); } EXPORT_SYMBOL(rtrs_clt_close); -int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_sess *sess) +int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_path *clt_path) { enum rtrs_clt_state old_state; int err = -EBUSY; bool changed; - changed = rtrs_clt_change_state_get_old(sess, RTRS_CLT_RECONNECTING, + changed = rtrs_clt_change_state_get_old(clt_path, + RTRS_CLT_RECONNECTING, &old_state); if (changed) { - sess->reconnect_attempts = 0; - queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, 0); + clt_path->reconnect_attempts = 0; + rtrs_clt_stop_and_destroy_conns(clt_path); + queue_delayed_work(rtrs_wq, &clt_path->reconnect_dwork, 0); } if (changed || old_state == RTRS_CLT_RECONNECTING) { /* @@ -2894,15 +2914,15 @@ int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_sess *sess) * execution, so do the flush if we have queued something * right now or work is pending. */ - flush_delayed_work(&sess->reconnect_dwork); - err = (READ_ONCE(sess->state) == + flush_delayed_work(&clt_path->reconnect_dwork); + err = (READ_ONCE(clt_path->state) == RTRS_CLT_CONNECTED ? 0 : -ENOTCONN); } return err; } -int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_sess *sess, +int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_path *clt_path, const struct attribute *sysfs_self) { enum rtrs_clt_state old_state; @@ -2918,27 +2938,27 @@ int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_sess *sess, * removing the path. */ do { - rtrs_clt_close_conns(sess, true); - changed = rtrs_clt_change_state_get_old(sess, + rtrs_clt_close_conns(clt_path, true); + changed = rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_DEAD, &old_state); } while (!changed && old_state != RTRS_CLT_DEAD); if (changed) { - rtrs_clt_remove_path_from_arr(sess); - rtrs_clt_destroy_sess_files(sess, sysfs_self); - kobject_put(&sess->kobj); + rtrs_clt_remove_path_from_arr(clt_path); + rtrs_clt_destroy_path_files(clt_path, sysfs_self); + kobject_put(&clt_path->kobj); } return 0; } -void rtrs_clt_set_max_reconnect_attempts(struct rtrs_clt *clt, int value) +void rtrs_clt_set_max_reconnect_attempts(struct rtrs_clt_sess *clt, int value) { clt->max_reconnect_attempts = (unsigned int)value; } -int rtrs_clt_get_max_reconnect_attempts(const struct rtrs_clt *clt) +int rtrs_clt_get_max_reconnect_attempts(const struct rtrs_clt_sess *clt) { return (int)clt->max_reconnect_attempts; } @@ -2968,12 +2988,12 @@ int rtrs_clt_get_max_reconnect_attempts(const struct rtrs_clt *clt) * On dir=WRITE rtrs client will rdma write data in sg to server side. */ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops, - struct rtrs_clt *clt, struct rtrs_permit *permit, - const struct kvec *vec, size_t nr, size_t data_len, - struct scatterlist *sg, unsigned int sg_cnt) + struct rtrs_clt_sess *clt, struct rtrs_permit *permit, + const struct kvec *vec, size_t nr, size_t data_len, + struct scatterlist *sg, unsigned int sg_cnt) { struct rtrs_clt_io_req *req; - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; enum dma_data_direction dma_dir; int err = -ECONNABORTED, i; @@ -2995,19 +3015,19 @@ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops, rcu_read_lock(); for (path_it_init(&it, clt); - (sess = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) { - if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED) + (clt_path = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) { + if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTED) continue; - if (usr_len + hdr_len > sess->max_hdr_size) { - rtrs_wrn_rl(sess->clt, + if (usr_len + hdr_len > clt_path->max_hdr_size) { + rtrs_wrn_rl(clt_path->clt, "%s request failed, user message size is %zu and header length %zu, but max size is %u\n", dir == READ ? "Read" : "Write", - usr_len, hdr_len, sess->max_hdr_size); + usr_len, hdr_len, clt_path->max_hdr_size); err = -EMSGSIZE; break; } - req = rtrs_clt_get_req(sess, ops->conf_fn, permit, ops->priv, + req = rtrs_clt_get_req(clt_path, ops->conf_fn, permit, ops->priv, vec, usr_len, sg, sg_cnt, data_len, dma_dir); if (dir == READ) @@ -3028,21 +3048,21 @@ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops, } EXPORT_SYMBOL(rtrs_clt_request); -int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index) +int rtrs_clt_rdma_cq_direct(struct rtrs_clt_sess *clt, unsigned int index) { /* If no path, return -1 for block layer not to try again */ int cnt = -1; struct rtrs_con *con; - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; struct path_it it; rcu_read_lock(); for (path_it_init(&it, clt); - (sess = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) { - if (READ_ONCE(sess->state) != RTRS_CLT_CONNECTED) + (clt_path = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) { + if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTED) continue; - con = sess->s.con[index + 1]; + con = clt_path->s.con[index + 1]; cnt = ib_process_cq_direct(con->cq, -1); if (cnt) break; @@ -3062,7 +3082,7 @@ EXPORT_SYMBOL(rtrs_clt_rdma_cq_direct); * 0 on success * -ECOMM no connection to the server */ -int rtrs_clt_query(struct rtrs_clt *clt, struct rtrs_attrs *attr) +int rtrs_clt_query(struct rtrs_clt_sess *clt, struct rtrs_attrs *attr) { if (!rtrs_clt_is_connected(clt)) return -ECOMM; @@ -3077,15 +3097,15 @@ int rtrs_clt_query(struct rtrs_clt *clt, struct rtrs_attrs *attr) } EXPORT_SYMBOL(rtrs_clt_query); -int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt, +int rtrs_clt_create_path_from_sysfs(struct rtrs_clt_sess *clt, struct rtrs_addr *addr) { - struct rtrs_clt_sess *sess; + struct rtrs_clt_path *clt_path; int err; - sess = alloc_sess(clt, addr, nr_cpu_ids, 0); - if (IS_ERR(sess)) - return PTR_ERR(sess); + clt_path = alloc_path(clt, addr, nr_cpu_ids, 0); + if (IS_ERR(clt_path)) + return PTR_ERR(clt_path); mutex_lock(&clt->paths_mutex); if (clt->paths_num == 0) { @@ -3094,7 +3114,7 @@ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt, * the addition of the first path is like a new session for * the storage server */ - sess->for_new_clt = 1; + clt_path->for_new_clt = 1; } mutex_unlock(&clt->paths_mutex); @@ -3104,24 +3124,24 @@ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt, * IO will never grab it. Also it is very important to add * path before init, since init fires LINK_CONNECTED event. */ - rtrs_clt_add_path_to_arr(sess); + rtrs_clt_add_path_to_arr(clt_path); - err = init_sess(sess); + err = init_path(clt_path); if (err) - goto close_sess; + goto close_path; - err = rtrs_clt_create_sess_files(sess); + err = rtrs_clt_create_path_files(clt_path); if (err) - goto close_sess; + goto close_path; return 0; -close_sess: - rtrs_clt_remove_path_from_arr(sess); - rtrs_clt_close_conns(sess, true); - free_percpu(sess->stats->pcpu_stats); - kfree(sess->stats); - free_sess(sess); +close_path: + rtrs_clt_remove_path_from_arr(clt_path); + rtrs_clt_close_conns(clt_path, true); + free_percpu(clt_path->stats->pcpu_stats); + kfree(clt_path->stats); + free_path(clt_path); return err; } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h index 9afffccff973..f848c0392d98 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h @@ -124,9 +124,9 @@ struct rtrs_rbuf { u32 rkey; }; -struct rtrs_clt_sess { - struct rtrs_sess s; - struct rtrs_clt *clt; +struct rtrs_clt_path { + struct rtrs_path s; + struct rtrs_clt_sess *clt; wait_queue_head_t state_wq; enum rtrs_clt_state state; atomic_t connected_cnt; @@ -134,6 +134,7 @@ struct rtrs_clt_sess { struct rtrs_clt_io_req *reqs; struct delayed_work reconnect_dwork; struct work_struct close_work; + struct work_struct err_recovery_work; unsigned int reconnect_attempts; bool established; struct rtrs_rbuf *rbufs; @@ -153,10 +154,10 @@ struct rtrs_clt_sess { *mp_skip_entry; }; -struct rtrs_clt { +struct rtrs_clt_sess { struct list_head paths_list; /* rcu protected list */ size_t paths_num; - struct rtrs_clt_sess + struct rtrs_clt_path __rcu * __percpu *pcpu_path; uuid_t paths_uuid; int paths_up; @@ -186,31 +187,32 @@ static inline struct rtrs_clt_con *to_clt_con(struct rtrs_con *c) return container_of(c, struct rtrs_clt_con, c); } -static inline struct rtrs_clt_sess *to_clt_sess(struct rtrs_sess *s) +static inline struct rtrs_clt_path *to_clt_path(struct rtrs_path *s) { - return container_of(s, struct rtrs_clt_sess, s); + return container_of(s, struct rtrs_clt_path, s); } -static inline int permit_size(struct rtrs_clt *clt) +static inline int permit_size(struct rtrs_clt_sess *clt) { return sizeof(struct rtrs_permit) + clt->pdu_sz; } -static inline struct rtrs_permit *get_permit(struct rtrs_clt *clt, int idx) +static inline struct rtrs_permit *get_permit(struct rtrs_clt_sess *clt, + int idx) { return (struct rtrs_permit *)(clt->permits + permit_size(clt) * idx); } -int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_sess *sess); -void rtrs_clt_close_conns(struct rtrs_clt_sess *sess, bool wait); -int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt, +int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_path *path); +void rtrs_clt_close_conns(struct rtrs_clt_path *clt_path, bool wait); +int rtrs_clt_create_path_from_sysfs(struct rtrs_clt_sess *clt, struct rtrs_addr *addr); -int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_sess *sess, +int rtrs_clt_remove_path_from_sysfs(struct rtrs_clt_path *path, const struct attribute *sysfs_self); -void rtrs_clt_set_max_reconnect_attempts(struct rtrs_clt *clt, int value); -int rtrs_clt_get_max_reconnect_attempts(const struct rtrs_clt *clt); -void free_sess(struct rtrs_clt_sess *sess); +void rtrs_clt_set_max_reconnect_attempts(struct rtrs_clt_sess *clt, int value); +int rtrs_clt_get_max_reconnect_attempts(const struct rtrs_clt_sess *clt); +void free_path(struct rtrs_clt_path *clt_path); /* rtrs-clt-stats.c */ @@ -239,11 +241,11 @@ ssize_t rtrs_clt_reset_all_help(struct rtrs_clt_stats *stats, /* rtrs-clt-sysfs.c */ -int rtrs_clt_create_sysfs_root_files(struct rtrs_clt *clt); -void rtrs_clt_destroy_sysfs_root(struct rtrs_clt *clt); +int rtrs_clt_create_sysfs_root_files(struct rtrs_clt_sess *clt); +void rtrs_clt_destroy_sysfs_root(struct rtrs_clt_sess *clt); -int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess); -void rtrs_clt_destroy_sess_files(struct rtrs_clt_sess *sess, +int rtrs_clt_create_path_files(struct rtrs_clt_path *clt_path); +void rtrs_clt_destroy_path_files(struct rtrs_clt_path *clt_path, const struct attribute *sysfs_self); #endif /* RTRS_CLT_H */ diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h index 78eac9a4f703..a2420eecaf5a 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h @@ -23,6 +23,16 @@ #define RTRS_PROTO_VER_STRING __stringify(RTRS_PROTO_VER_MAJOR) "." \ __stringify(RTRS_PROTO_VER_MINOR) +/* + * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS) + * and the minimum chunk size is 4096 (2^12). + * So the maximum sess_queue_depth is 65535 (2^16 - 1) in theory + * since queue_depth in rtrs_msg_conn_rsp is defined as le16. + * Therefore the pratical max value of sess_queue_depth is + * somewhere between 1 and 65535 and it depends on the system. + */ +#define MAX_SESS_QUEUE_DEPTH 65535 + enum rtrs_imm_const { MAX_IMM_TYPE_BITS = 4, MAX_IMM_TYPE_MASK = ((1 << MAX_IMM_TYPE_BITS) - 1), @@ -46,16 +56,6 @@ enum { MAX_PATHS_NUM = 128, - /* - * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS) - * and the minimum chunk size is 4096 (2^12). - * So the maximum sess_queue_depth is 65536 (2^16) in theory. - * But mempool_create, create_qp and ib_post_send fail with - * "cannot allocate memory" error if sess_queue_depth is too big. - * Therefore the pratical max value of sess_queue_depth is - * somewhere between 1 and 65534 and it depends on the system. - */ - MAX_SESS_QUEUE_DEPTH = 65535, MIN_CHUNK_SIZE = 8192, RTRS_HB_INTERVAL_MS = 5000, @@ -90,7 +90,7 @@ struct rtrs_ib_dev { }; struct rtrs_con { - struct rtrs_sess *sess; + struct rtrs_path *path; struct ib_qp *qp; struct ib_cq *cq; struct rdma_cm_id *cm_id; @@ -100,7 +100,7 @@ struct rtrs_con { atomic_t sq_wr_avail; }; -struct rtrs_sess { +struct rtrs_path { struct list_head entry; struct sockaddr_storage dst_addr; struct sockaddr_storage src_addr; @@ -229,11 +229,11 @@ struct rtrs_msg_conn_rsp { /** * struct rtrs_msg_info_req * @type: @RTRS_MSG_INFO_REQ - * @sessname: Session name chosen by client + * @pathname: Path name chosen by client */ struct rtrs_msg_info_req { __le16 type; - u8 sessname[NAME_MAX]; + u8 pathname[NAME_MAX]; u8 reserved[15]; }; @@ -313,19 +313,19 @@ int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu, int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe); -int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con, +int rtrs_cq_qp_create(struct rtrs_path *path, struct rtrs_con *con, u32 max_send_sge, int cq_vector, int nr_cqe, u32 max_send_wr, u32 max_recv_wr, enum ib_poll_context poll_ctx); void rtrs_cq_qp_destroy(struct rtrs_con *con); -void rtrs_init_hb(struct rtrs_sess *sess, struct ib_cqe *cqe, +void rtrs_init_hb(struct rtrs_path *path, struct ib_cqe *cqe, unsigned int interval_ms, unsigned int missed_max, void (*err_handler)(struct rtrs_con *con), struct workqueue_struct *wq); -void rtrs_start_hb(struct rtrs_sess *sess); -void rtrs_stop_hb(struct rtrs_sess *sess); -void rtrs_send_hb_ack(struct rtrs_sess *sess); +void rtrs_start_hb(struct rtrs_path *path); +void rtrs_stop_hb(struct rtrs_path *path); +void rtrs_send_hb_ack(struct rtrs_path *path); void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags, struct rtrs_rdma_dev_pd *pool); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c index 44b1c1652131..2aff1213a19d 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c @@ -14,9 +14,14 @@ int rtrs_srv_reset_rdma_stats(struct rtrs_srv_stats *stats, bool enable) { if (enable) { - struct rtrs_srv_stats_rdma_stats *r = &stats->rdma_stats; + int cpu; + struct rtrs_srv_stats_rdma_stats *r; + + for_each_possible_cpu(cpu) { + r = per_cpu_ptr(stats->rdma_stats, cpu); + memset(r, 0, sizeof(*r)); + } - memset(r, 0, sizeof(*r)); return 0; } @@ -25,11 +30,22 @@ int rtrs_srv_reset_rdma_stats(struct rtrs_srv_stats *stats, bool enable) ssize_t rtrs_srv_stats_rdma_to_str(struct rtrs_srv_stats *stats, char *page) { - struct rtrs_srv_stats_rdma_stats *r = &stats->rdma_stats; + int cpu; + struct rtrs_srv_stats_rdma_stats sum; + struct rtrs_srv_stats_rdma_stats *r; + + memset(&sum, 0, sizeof(sum)); + + for_each_possible_cpu(cpu) { + r = per_cpu_ptr(stats->rdma_stats, cpu); + + sum.dir[READ].cnt += r->dir[READ].cnt; + sum.dir[READ].size_total += r->dir[READ].size_total; + sum.dir[WRITE].cnt += r->dir[WRITE].cnt; + sum.dir[WRITE].size_total += r->dir[WRITE].size_total; + } - return sysfs_emit(page, "%lld %lld %lld %lldn %u\n", - (s64)atomic64_read(&r->dir[READ].cnt), - (s64)atomic64_read(&r->dir[READ].size_total), - (s64)atomic64_read(&r->dir[WRITE].cnt), - (s64)atomic64_read(&r->dir[WRITE].size_total), 0); + return sysfs_emit(page, "%llu %llu %llu %llu\n", + sum.dir[READ].cnt, sum.dir[READ].size_total, + sum.dir[WRITE].cnt, sum.dir[WRITE].size_total); } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index 9c43ce5ba1c1..2a3c9ac64a42 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -15,10 +15,10 @@ static void rtrs_srv_release(struct kobject *kobj) { - struct rtrs_srv_sess *sess; + struct rtrs_srv_path *srv_path; - sess = container_of(kobj, struct rtrs_srv_sess, kobj); - kfree(sess); + srv_path = container_of(kobj, struct rtrs_srv_path, kobj); + kfree(srv_path); } static struct kobj_type ktype = { @@ -36,24 +36,25 @@ static ssize_t rtrs_srv_disconnect_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - struct rtrs_srv_sess *sess; - struct rtrs_sess *s; + struct rtrs_srv_path *srv_path; + struct rtrs_path *s; char str[MAXHOSTNAMELEN]; - sess = container_of(kobj, struct rtrs_srv_sess, kobj); - s = &sess->s; + srv_path = container_of(kobj, struct rtrs_srv_path, kobj); + s = &srv_path->s; if (!sysfs_streq(buf, "1")) { rtrs_err(s, "%s: invalid value: '%s'\n", attr->attr.name, buf); return -EINVAL; } - sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, str, sizeof(str)); + sockaddr_to_str((struct sockaddr *)&srv_path->s.dst_addr, str, + sizeof(str)); rtrs_info(s, "disconnect for path %s requested\n", str); /* first remove sysfs itself to avoid deadlock */ - sysfs_remove_file_self(&sess->kobj, &attr->attr); - close_sess(sess); + sysfs_remove_file_self(&srv_path->kobj, &attr->attr); + close_path(srv_path); return count; } @@ -66,11 +67,11 @@ static ssize_t rtrs_srv_hca_port_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - struct rtrs_srv_sess *sess; + struct rtrs_srv_path *srv_path; struct rtrs_con *usr_con; - sess = container_of(kobj, typeof(*sess), kobj); - usr_con = sess->s.con[0]; + srv_path = container_of(kobj, typeof(*srv_path), kobj); + usr_con = srv_path->s.con[0]; return sysfs_emit(page, "%u\n", usr_con->cm_id->port_num); } @@ -82,11 +83,11 @@ static ssize_t rtrs_srv_hca_name_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - struct rtrs_srv_sess *sess; + struct rtrs_srv_path *srv_path; - sess = container_of(kobj, struct rtrs_srv_sess, kobj); + srv_path = container_of(kobj, struct rtrs_srv_path, kobj); - return sysfs_emit(page, "%s\n", sess->s.dev->ib_dev->name); + return sysfs_emit(page, "%s\n", srv_path->s.dev->ib_dev->name); } static struct kobj_attribute rtrs_srv_hca_name_attr = @@ -96,11 +97,11 @@ static ssize_t rtrs_srv_src_addr_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - struct rtrs_srv_sess *sess; + struct rtrs_srv_path *srv_path; int cnt; - sess = container_of(kobj, struct rtrs_srv_sess, kobj); - cnt = sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, + srv_path = container_of(kobj, struct rtrs_srv_path, kobj); + cnt = sockaddr_to_str((struct sockaddr *)&srv_path->s.dst_addr, page, PAGE_SIZE); return cnt + sysfs_emit_at(page, cnt, "\n"); } @@ -112,11 +113,11 @@ static ssize_t rtrs_srv_dst_addr_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - struct rtrs_srv_sess *sess; + struct rtrs_srv_path *srv_path; int len; - sess = container_of(kobj, struct rtrs_srv_sess, kobj); - len = sockaddr_to_str((struct sockaddr *)&sess->s.src_addr, page, + srv_path = container_of(kobj, struct rtrs_srv_path, kobj); + len = sockaddr_to_str((struct sockaddr *)&srv_path->s.src_addr, page, PAGE_SIZE); len += sysfs_emit_at(page, len, "\n"); return len; @@ -125,7 +126,7 @@ static ssize_t rtrs_srv_dst_addr_show(struct kobject *kobj, static struct kobj_attribute rtrs_srv_dst_addr_attr = __ATTR(dst_addr, 0444, rtrs_srv_dst_addr_show, NULL); -static struct attribute *rtrs_srv_sess_attrs[] = { +static struct attribute *rtrs_srv_path_attrs[] = { &rtrs_srv_hca_name_attr.attr, &rtrs_srv_hca_port_attr.attr, &rtrs_srv_src_addr_attr.attr, @@ -134,8 +135,8 @@ static struct attribute *rtrs_srv_sess_attrs[] = { NULL, }; -static const struct attribute_group rtrs_srv_sess_attr_group = { - .attrs = rtrs_srv_sess_attrs, +static const struct attribute_group rtrs_srv_path_attr_group = { + .attrs = rtrs_srv_path_attrs, }; STAT_ATTR(struct rtrs_srv_stats, rdma, @@ -151,9 +152,9 @@ static const struct attribute_group rtrs_srv_stats_attr_group = { .attrs = rtrs_srv_stats_attrs, }; -static int rtrs_srv_create_once_sysfs_root_folders(struct rtrs_srv_sess *sess) +static int rtrs_srv_create_once_sysfs_root_folders(struct rtrs_srv_path *srv_path) { - struct rtrs_srv *srv = sess->srv; + struct rtrs_srv_sess *srv = srv_path->srv; int err = 0; mutex_lock(&srv->paths_mutex); @@ -164,7 +165,7 @@ static int rtrs_srv_create_once_sysfs_root_folders(struct rtrs_srv_sess *sess) goto unlock; } srv->dev.class = rtrs_dev_class; - err = dev_set_name(&srv->dev, "%s", sess->s.sessname); + err = dev_set_name(&srv->dev, "%s", srv_path->s.sessname); if (err) goto unlock; @@ -196,9 +197,9 @@ unlock: } static void -rtrs_srv_destroy_once_sysfs_root_folders(struct rtrs_srv_sess *sess) +rtrs_srv_destroy_once_sysfs_root_folders(struct rtrs_srv_path *srv_path) { - struct rtrs_srv *srv = sess->srv; + struct rtrs_srv_sess *srv = srv_path->srv; mutex_lock(&srv->paths_mutex); if (!--srv->dev_ref) { @@ -213,33 +214,35 @@ rtrs_srv_destroy_once_sysfs_root_folders(struct rtrs_srv_sess *sess) } } -static void rtrs_srv_sess_stats_release(struct kobject *kobj) +static void rtrs_srv_path_stats_release(struct kobject *kobj) { struct rtrs_srv_stats *stats; stats = container_of(kobj, struct rtrs_srv_stats, kobj_stats); + free_percpu(stats->rdma_stats); + kfree(stats); } static struct kobj_type ktype_stats = { .sysfs_ops = &kobj_sysfs_ops, - .release = rtrs_srv_sess_stats_release, + .release = rtrs_srv_path_stats_release, }; -static int rtrs_srv_create_stats_files(struct rtrs_srv_sess *sess) +static int rtrs_srv_create_stats_files(struct rtrs_srv_path *srv_path) { int err; - struct rtrs_sess *s = &sess->s; + struct rtrs_path *s = &srv_path->s; - err = kobject_init_and_add(&sess->stats->kobj_stats, &ktype_stats, - &sess->kobj, "stats"); + err = kobject_init_and_add(&srv_path->stats->kobj_stats, &ktype_stats, + &srv_path->kobj, "stats"); if (err) { rtrs_err(s, "kobject_init_and_add(): %d\n", err); - kobject_put(&sess->stats->kobj_stats); + kobject_put(&srv_path->stats->kobj_stats); return err; } - err = sysfs_create_group(&sess->stats->kobj_stats, + err = sysfs_create_group(&srv_path->stats->kobj_stats, &rtrs_srv_stats_attr_group); if (err) { rtrs_err(s, "sysfs_create_group(): %d\n", err); @@ -249,64 +252,64 @@ static int rtrs_srv_create_stats_files(struct rtrs_srv_sess *sess) return 0; err: - kobject_del(&sess->stats->kobj_stats); - kobject_put(&sess->stats->kobj_stats); + kobject_del(&srv_path->stats->kobj_stats); + kobject_put(&srv_path->stats->kobj_stats); return err; } -int rtrs_srv_create_sess_files(struct rtrs_srv_sess *sess) +int rtrs_srv_create_path_files(struct rtrs_srv_path *srv_path) { - struct rtrs_srv *srv = sess->srv; - struct rtrs_sess *s = &sess->s; + struct rtrs_srv_sess *srv = srv_path->srv; + struct rtrs_path *s = &srv_path->s; char str[NAME_MAX]; int err; struct rtrs_addr path = { - .src = &sess->s.dst_addr, - .dst = &sess->s.src_addr, + .src = &srv_path->s.dst_addr, + .dst = &srv_path->s.src_addr, }; rtrs_addr_to_str(&path, str, sizeof(str)); - err = rtrs_srv_create_once_sysfs_root_folders(sess); + err = rtrs_srv_create_once_sysfs_root_folders(srv_path); if (err) return err; - err = kobject_init_and_add(&sess->kobj, &ktype, srv->kobj_paths, + err = kobject_init_and_add(&srv_path->kobj, &ktype, srv->kobj_paths, "%s", str); if (err) { rtrs_err(s, "kobject_init_and_add(): %d\n", err); goto destroy_root; } - err = sysfs_create_group(&sess->kobj, &rtrs_srv_sess_attr_group); + err = sysfs_create_group(&srv_path->kobj, &rtrs_srv_path_attr_group); if (err) { rtrs_err(s, "sysfs_create_group(): %d\n", err); goto put_kobj; } - err = rtrs_srv_create_stats_files(sess); + err = rtrs_srv_create_stats_files(srv_path); if (err) goto remove_group; return 0; remove_group: - sysfs_remove_group(&sess->kobj, &rtrs_srv_sess_attr_group); + sysfs_remove_group(&srv_path->kobj, &rtrs_srv_path_attr_group); put_kobj: - kobject_del(&sess->kobj); + kobject_del(&srv_path->kobj); destroy_root: - kobject_put(&sess->kobj); - rtrs_srv_destroy_once_sysfs_root_folders(sess); + kobject_put(&srv_path->kobj); + rtrs_srv_destroy_once_sysfs_root_folders(srv_path); return err; } -void rtrs_srv_destroy_sess_files(struct rtrs_srv_sess *sess) +void rtrs_srv_destroy_path_files(struct rtrs_srv_path *srv_path) { - if (sess->kobj.state_in_sysfs) { - kobject_del(&sess->stats->kobj_stats); - kobject_put(&sess->stats->kobj_stats); - sysfs_remove_group(&sess->kobj, &rtrs_srv_sess_attr_group); - kobject_put(&sess->kobj); + if (srv_path->kobj.state_in_sysfs) { + kobject_del(&srv_path->stats->kobj_stats); + kobject_put(&srv_path->stats->kobj_stats); + sysfs_remove_group(&srv_path->kobj, &rtrs_srv_path_attr_group); + kobject_put(&srv_path->kobj); - rtrs_srv_destroy_once_sysfs_root_folders(sess); + rtrs_srv_destroy_once_sysfs_root_folders(srv_path); } } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-trace.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-trace.c new file mode 100644 index 000000000000..29ca59ceb0dd --- /dev/null +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-trace.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * RDMA Network Block Driver + * + * Copyright (c) 2022 1&1 IONOS SE. All rights reserved. + */ +#include "rtrs.h" +#include "rtrs-pri.h" +#include "rtrs-srv.h" + +/* + * We include this last to have the helpers above available for the trace + * event implementations. + */ +#define CREATE_TRACE_POINTS +#include "rtrs-srv-trace.h" diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-trace.h b/drivers/infiniband/ulp/rtrs/rtrs-srv-trace.h new file mode 100644 index 000000000000..587d3e033081 --- /dev/null +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-trace.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * RDMA Network Block Driver + * + * Copyright (c) 2022 1&1 IONOS SE. All rights reserved. + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rtrs_srv + +#if !defined(_TRACE_RTRS_SRV_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_RTRS_SRV_H + +#include <linux/tracepoint.h> + +struct rtrs_srv_op; +struct rtrs_srv_con; +struct rtrs_srv_path; + +TRACE_DEFINE_ENUM(RTRS_SRV_CONNECTING); +TRACE_DEFINE_ENUM(RTRS_SRV_CONNECTED); +TRACE_DEFINE_ENUM(RTRS_SRV_CLOSING); +TRACE_DEFINE_ENUM(RTRS_SRV_CLOSED); + +#define show_rtrs_srv_state(x) \ + __print_symbolic(x, \ + { RTRS_SRV_CONNECTING, "CONNECTING" }, \ + { RTRS_SRV_CONNECTED, "CONNECTED" }, \ + { RTRS_SRV_CLOSING, "CLOSING" }, \ + { RTRS_SRV_CLOSED, "CLOSED" }) + +TRACE_EVENT(send_io_resp_imm, + TP_PROTO(struct rtrs_srv_op *id, + bool need_inval, + bool always_invalidate, + int errno), + + TP_ARGS(id, need_inval, always_invalidate, errno), + + TP_STRUCT__entry( + __field(u8, dir) + __field(bool, need_inval) + __field(bool, always_invalidate) + __field(u32, msg_id) + __field(int, wr_cnt) + __field(u32, signal_interval) + __field(int, state) + __field(int, errno) + __array(char, sessname, NAME_MAX) + ), + + TP_fast_assign( + struct rtrs_srv_con *con = id->con; + struct rtrs_path *s = con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); + + __entry->dir = id->dir; + __entry->state = srv_path->state; + __entry->errno = errno; + __entry->need_inval = need_inval; + __entry->always_invalidate = always_invalidate; + __entry->msg_id = id->msg_id; + __entry->wr_cnt = atomic_read(&con->c.wr_cnt); + __entry->signal_interval = s->signal_interval; + memcpy(__entry->sessname, kobject_name(&srv_path->kobj), NAME_MAX); + ), + + TP_printk("sess='%s' state='%s' dir=%s err='%d' inval='%d' glob-inval='%d' msgid='%u' wrcnt='%d' sig-interval='%u'", + __entry->sessname, + show_rtrs_srv_state(__entry->state), + __print_symbolic(__entry->dir, + { READ, "READ" }, + { WRITE, "WRITE" }), + __entry->errno, + __entry->need_inval, + __entry->always_invalidate, + __entry->msg_id, + __entry->wr_cnt, + __entry->signal_interval + ) +); + +#endif /* _TRACE_RTRS_SRV_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE rtrs-srv-trace +#include <trace/define_trace.h> + diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 7df71f8cf149..22d7ba05e9fe 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -11,12 +11,12 @@ #define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt #include <linux/module.h> -#include <linux/mempool.h> #include "rtrs-srv.h" #include "rtrs-log.h" #include <rdma/ib_cm.h> #include <rdma/ib_verbs.h> +#include "rtrs-srv-trace.h" MODULE_DESCRIPTION("RDMA Transport Server"); MODULE_LICENSE("GPL"); @@ -26,11 +26,7 @@ MODULE_LICENSE("GPL"); #define DEFAULT_SESS_QUEUE_DEPTH 512 #define MAX_HDR_SIZE PAGE_SIZE -/* We guarantee to serve 10 paths at least */ -#define CHUNK_POOL_SZ 10 - static struct rtrs_rdma_dev_pd dev_pd; -static mempool_t *chunk_pool; struct class *rtrs_dev_class; static struct rtrs_srv_ib_ctx ib_ctx; @@ -62,19 +58,14 @@ static inline struct rtrs_srv_con *to_srv_con(struct rtrs_con *c) return container_of(c, struct rtrs_srv_con, c); } -static inline struct rtrs_srv_sess *to_srv_sess(struct rtrs_sess *s) -{ - return container_of(s, struct rtrs_srv_sess, s); -} - -static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess, +static bool rtrs_srv_change_state(struct rtrs_srv_path *srv_path, enum rtrs_srv_state new_state) { enum rtrs_srv_state old_state; bool changed = false; - spin_lock_irq(&sess->state_lock); - old_state = sess->state; + spin_lock_irq(&srv_path->state_lock); + old_state = srv_path->state; switch (new_state) { case RTRS_SRV_CONNECTED: if (old_state == RTRS_SRV_CONNECTING) @@ -93,8 +84,8 @@ static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess, break; } if (changed) - sess->state = new_state; - spin_unlock_irq(&sess->state_lock); + srv_path->state = new_state; + spin_unlock_irq(&srv_path->state_lock); return changed; } @@ -106,16 +97,16 @@ static void free_id(struct rtrs_srv_op *id) kfree(id); } -static void rtrs_srv_free_ops_ids(struct rtrs_srv_sess *sess) +static void rtrs_srv_free_ops_ids(struct rtrs_srv_path *srv_path) { - struct rtrs_srv *srv = sess->srv; + struct rtrs_srv_sess *srv = srv_path->srv; int i; - if (sess->ops_ids) { + if (srv_path->ops_ids) { for (i = 0; i < srv->queue_depth; i++) - free_id(sess->ops_ids[i]); - kfree(sess->ops_ids); - sess->ops_ids = NULL; + free_id(srv_path->ops_ids[i]); + kfree(srv_path->ops_ids); + srv_path->ops_ids = NULL; } } @@ -127,21 +118,24 @@ static struct ib_cqe io_comp_cqe = { static inline void rtrs_srv_inflight_ref_release(struct percpu_ref *ref) { - struct rtrs_srv_sess *sess = container_of(ref, struct rtrs_srv_sess, ids_inflight_ref); + struct rtrs_srv_path *srv_path = container_of(ref, + struct rtrs_srv_path, + ids_inflight_ref); - percpu_ref_exit(&sess->ids_inflight_ref); - complete(&sess->complete_done); + percpu_ref_exit(&srv_path->ids_inflight_ref); + complete(&srv_path->complete_done); } -static int rtrs_srv_alloc_ops_ids(struct rtrs_srv_sess *sess) +static int rtrs_srv_alloc_ops_ids(struct rtrs_srv_path *srv_path) { - struct rtrs_srv *srv = sess->srv; + struct rtrs_srv_sess *srv = srv_path->srv; struct rtrs_srv_op *id; int i, ret; - sess->ops_ids = kcalloc(srv->queue_depth, sizeof(*sess->ops_ids), - GFP_KERNEL); - if (!sess->ops_ids) + srv_path->ops_ids = kcalloc(srv->queue_depth, + sizeof(*srv_path->ops_ids), + GFP_KERNEL); + if (!srv_path->ops_ids) goto err; for (i = 0; i < srv->queue_depth; ++i) { @@ -149,44 +143,44 @@ static int rtrs_srv_alloc_ops_ids(struct rtrs_srv_sess *sess) if (!id) goto err; - sess->ops_ids[i] = id; + srv_path->ops_ids[i] = id; } - ret = percpu_ref_init(&sess->ids_inflight_ref, + ret = percpu_ref_init(&srv_path->ids_inflight_ref, rtrs_srv_inflight_ref_release, 0, GFP_KERNEL); if (ret) { pr_err("Percpu reference init failed\n"); goto err; } - init_completion(&sess->complete_done); + init_completion(&srv_path->complete_done); return 0; err: - rtrs_srv_free_ops_ids(sess); + rtrs_srv_free_ops_ids(srv_path); return -ENOMEM; } -static inline void rtrs_srv_get_ops_ids(struct rtrs_srv_sess *sess) +static inline void rtrs_srv_get_ops_ids(struct rtrs_srv_path *srv_path) { - percpu_ref_get(&sess->ids_inflight_ref); + percpu_ref_get(&srv_path->ids_inflight_ref); } -static inline void rtrs_srv_put_ops_ids(struct rtrs_srv_sess *sess) +static inline void rtrs_srv_put_ops_ids(struct rtrs_srv_path *srv_path) { - percpu_ref_put(&sess->ids_inflight_ref); + percpu_ref_put(&srv_path->ids_inflight_ref); } static void rtrs_srv_reg_mr_done(struct ib_cq *cq, struct ib_wc *wc) { struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context); - struct rtrs_sess *s = con->c.sess; - struct rtrs_srv_sess *sess = to_srv_sess(s); + struct rtrs_path *s = con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); if (wc->status != IB_WC_SUCCESS) { rtrs_err(s, "REG MR failed: %s\n", ib_wc_status_msg(wc->status)); - close_sess(sess); + close_path(srv_path); return; } } @@ -197,9 +191,9 @@ static struct ib_cqe local_reg_cqe = { static int rdma_write_sg(struct rtrs_srv_op *id) { - struct rtrs_sess *s = id->con->c.sess; - struct rtrs_srv_sess *sess = to_srv_sess(s); - dma_addr_t dma_addr = sess->dma_addr[id->msg_id]; + struct rtrs_path *s = id->con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); + dma_addr_t dma_addr = srv_path->dma_addr[id->msg_id]; struct rtrs_srv_mr *srv_mr; struct ib_send_wr inv_wr; struct ib_rdma_wr imm_wr; @@ -233,7 +227,7 @@ static int rdma_write_sg(struct rtrs_srv_op *id) return -EINVAL; } - plist->lkey = sess->s.dev->ib_pd->local_dma_lkey; + plist->lkey = srv_path->s.dev->ib_pd->local_dma_lkey; offset += plist->length; wr->wr.sg_list = plist; @@ -284,7 +278,7 @@ static int rdma_write_sg(struct rtrs_srv_op *id) if (always_invalidate) { struct rtrs_msg_rkey_rsp *msg; - srv_mr = &sess->mrs[id->msg_id]; + srv_mr = &srv_path->mrs[id->msg_id]; rwr.wr.opcode = IB_WR_REG_MR; rwr.wr.wr_cqe = &local_reg_cqe; rwr.wr.num_sge = 0; @@ -300,11 +294,11 @@ static int rdma_write_sg(struct rtrs_srv_op *id) list.addr = srv_mr->iu->dma_addr; list.length = sizeof(*msg); - list.lkey = sess->s.dev->ib_pd->local_dma_lkey; + list.lkey = srv_path->s.dev->ib_pd->local_dma_lkey; imm_wr.wr.sg_list = &list; imm_wr.wr.num_sge = 1; imm_wr.wr.opcode = IB_WR_SEND_WITH_IMM; - ib_dma_sync_single_for_device(sess->s.dev->ib_dev, + ib_dma_sync_single_for_device(srv_path->s.dev->ib_dev, srv_mr->iu->dma_addr, srv_mr->iu->size, DMA_TO_DEVICE); } else { @@ -317,7 +311,7 @@ static int rdma_write_sg(struct rtrs_srv_op *id) 0, need_inval)); imm_wr.wr.wr_cqe = &io_comp_cqe; - ib_dma_sync_single_for_device(sess->s.dev->ib_dev, dma_addr, + ib_dma_sync_single_for_device(srv_path->s.dev->ib_dev, dma_addr, offset, DMA_BIDIRECTIONAL); err = ib_post_send(id->con->c.qp, &id->tx_wr.wr, NULL); @@ -341,8 +335,8 @@ static int rdma_write_sg(struct rtrs_srv_op *id) static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, int errno) { - struct rtrs_sess *s = con->c.sess; - struct rtrs_srv_sess *sess = to_srv_sess(s); + struct rtrs_path *s = con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); struct ib_send_wr inv_wr, *wr = NULL; struct ib_rdma_wr imm_wr; struct ib_reg_wr rwr; @@ -377,6 +371,8 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, } } + trace_send_io_resp_imm(id, need_inval, always_invalidate, errno); + if (need_inval && always_invalidate) { wr = &inv_wr; inv_wr.next = &rwr.wr; @@ -402,7 +398,7 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, struct ib_sge list; struct rtrs_msg_rkey_rsp *msg; - srv_mr = &sess->mrs[id->msg_id]; + srv_mr = &srv_path->mrs[id->msg_id]; rwr.wr.next = &imm_wr.wr; rwr.wr.opcode = IB_WR_REG_MR; rwr.wr.wr_cqe = &local_reg_cqe; @@ -419,11 +415,11 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, list.addr = srv_mr->iu->dma_addr; list.length = sizeof(*msg); - list.lkey = sess->s.dev->ib_pd->local_dma_lkey; + list.lkey = srv_path->s.dev->ib_pd->local_dma_lkey; imm_wr.wr.sg_list = &list; imm_wr.wr.num_sge = 1; imm_wr.wr.opcode = IB_WR_SEND_WITH_IMM; - ib_dma_sync_single_for_device(sess->s.dev->ib_dev, + ib_dma_sync_single_for_device(srv_path->s.dev->ib_dev, srv_mr->iu->dma_addr, srv_mr->iu->size, DMA_TO_DEVICE); } else { @@ -444,11 +440,11 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, return err; } -void close_sess(struct rtrs_srv_sess *sess) +void close_path(struct rtrs_srv_path *srv_path) { - if (rtrs_srv_change_state(sess, RTRS_SRV_CLOSING)) - queue_work(rtrs_wq, &sess->close_work); - WARN_ON(sess->state != RTRS_SRV_CLOSING); + if (rtrs_srv_change_state(srv_path, RTRS_SRV_CLOSING)) + queue_work(rtrs_wq, &srv_path->close_work); + WARN_ON(srv_path->state != RTRS_SRV_CLOSING); } static inline const char *rtrs_srv_state_str(enum rtrs_srv_state state) @@ -480,35 +476,35 @@ static inline const char *rtrs_srv_state_str(enum rtrs_srv_state state) */ bool rtrs_srv_resp_rdma(struct rtrs_srv_op *id, int status) { - struct rtrs_srv_sess *sess; + struct rtrs_srv_path *srv_path; struct rtrs_srv_con *con; - struct rtrs_sess *s; + struct rtrs_path *s; int err; if (WARN_ON(!id)) return true; con = id->con; - s = con->c.sess; - sess = to_srv_sess(s); + s = con->c.path; + srv_path = to_srv_path(s); id->status = status; - if (sess->state != RTRS_SRV_CONNECTED) { + if (srv_path->state != RTRS_SRV_CONNECTED) { rtrs_err_rl(s, - "Sending I/O response failed, session %s is disconnected, sess state %s\n", - kobject_name(&sess->kobj), - rtrs_srv_state_str(sess->state)); + "Sending I/O response failed, server path %s is disconnected, path state %s\n", + kobject_name(&srv_path->kobj), + rtrs_srv_state_str(srv_path->state)); goto out; } if (always_invalidate) { - struct rtrs_srv_mr *mr = &sess->mrs[id->msg_id]; + struct rtrs_srv_mr *mr = &srv_path->mrs[id->msg_id]; ib_update_fast_reg_key(mr->mr, ib_inc_rkey(mr->mr->rkey)); } if (atomic_sub_return(1, &con->c.sq_wr_avail) < 0) { - rtrs_err(s, "IB send queue full: sess=%s cid=%d\n", - kobject_name(&sess->kobj), + rtrs_err(s, "IB send queue full: srv_path=%s cid=%d\n", + kobject_name(&srv_path->kobj), con->c.cid); atomic_add(1, &con->c.sq_wr_avail); spin_lock(&con->rsp_wr_wait_lock); @@ -523,12 +519,12 @@ bool rtrs_srv_resp_rdma(struct rtrs_srv_op *id, int status) err = rdma_write_sg(id); if (err) { - rtrs_err_rl(s, "IO response failed: %d: sess=%s\n", err, - kobject_name(&sess->kobj)); - close_sess(sess); + rtrs_err_rl(s, "IO response failed: %d: srv_path=%s\n", err, + kobject_name(&srv_path->kobj)); + close_path(srv_path); } out: - rtrs_srv_put_ops_ids(sess); + rtrs_srv_put_ops_ids(srv_path); return true; } EXPORT_SYMBOL(rtrs_srv_resp_rdma); @@ -538,33 +534,33 @@ EXPORT_SYMBOL(rtrs_srv_resp_rdma); * @srv: Session pointer * @priv: The private pointer that is associated with the session. */ -void rtrs_srv_set_sess_priv(struct rtrs_srv *srv, void *priv) +void rtrs_srv_set_sess_priv(struct rtrs_srv_sess *srv, void *priv) { srv->priv = priv; } EXPORT_SYMBOL(rtrs_srv_set_sess_priv); -static void unmap_cont_bufs(struct rtrs_srv_sess *sess) +static void unmap_cont_bufs(struct rtrs_srv_path *srv_path) { int i; - for (i = 0; i < sess->mrs_num; i++) { + for (i = 0; i < srv_path->mrs_num; i++) { struct rtrs_srv_mr *srv_mr; - srv_mr = &sess->mrs[i]; - rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1); + srv_mr = &srv_path->mrs[i]; + rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1); ib_dereg_mr(srv_mr->mr); - ib_dma_unmap_sg(sess->s.dev->ib_dev, srv_mr->sgt.sgl, + ib_dma_unmap_sg(srv_path->s.dev->ib_dev, srv_mr->sgt.sgl, srv_mr->sgt.nents, DMA_BIDIRECTIONAL); sg_free_table(&srv_mr->sgt); } - kfree(sess->mrs); + kfree(srv_path->mrs); } -static int map_cont_bufs(struct rtrs_srv_sess *sess) +static int map_cont_bufs(struct rtrs_srv_path *srv_path) { - struct rtrs_srv *srv = sess->srv; - struct rtrs_sess *ss = &sess->s; + struct rtrs_srv_sess *srv = srv_path->srv; + struct rtrs_path *ss = &srv_path->s; int i, mri, err, mrs_num; unsigned int chunk_bits; int chunks_per_mr = 1; @@ -581,23 +577,23 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess) mrs_num = srv->queue_depth; } else { chunks_per_mr = - sess->s.dev->ib_dev->attrs.max_fast_reg_page_list_len; + srv_path->s.dev->ib_dev->attrs.max_fast_reg_page_list_len; mrs_num = DIV_ROUND_UP(srv->queue_depth, chunks_per_mr); chunks_per_mr = DIV_ROUND_UP(srv->queue_depth, mrs_num); } - sess->mrs = kcalloc(mrs_num, sizeof(*sess->mrs), GFP_KERNEL); - if (!sess->mrs) + srv_path->mrs = kcalloc(mrs_num, sizeof(*srv_path->mrs), GFP_KERNEL); + if (!srv_path->mrs) return -ENOMEM; - sess->mrs_num = mrs_num; + srv_path->mrs_num = mrs_num; for (mri = 0; mri < mrs_num; mri++) { - struct rtrs_srv_mr *srv_mr = &sess->mrs[mri]; + struct rtrs_srv_mr *srv_mr = &srv_path->mrs[mri]; struct sg_table *sgt = &srv_mr->sgt; struct scatterlist *s; struct ib_mr *mr; - int nr, chunks; + int nr, nr_sgt, chunks; chunks = chunks_per_mr * mri; if (!always_invalidate) @@ -612,19 +608,19 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess) sg_set_page(s, srv->chunks[chunks + i], max_chunk_size, 0); - nr = ib_dma_map_sg(sess->s.dev->ib_dev, sgt->sgl, + nr_sgt = ib_dma_map_sg(srv_path->s.dev->ib_dev, sgt->sgl, sgt->nents, DMA_BIDIRECTIONAL); - if (nr < sgt->nents) { - err = nr < 0 ? nr : -EINVAL; + if (!nr_sgt) { + err = -EINVAL; goto free_sg; } - mr = ib_alloc_mr(sess->s.dev->ib_pd, IB_MR_TYPE_MEM_REG, - sgt->nents); + mr = ib_alloc_mr(srv_path->s.dev->ib_pd, IB_MR_TYPE_MEM_REG, + nr_sgt); if (IS_ERR(mr)) { err = PTR_ERR(mr); goto unmap_sg; } - nr = ib_map_mr_sg(mr, sgt->sgl, sgt->nents, + nr = ib_map_mr_sg(mr, sgt->sgl, nr_sgt, NULL, max_chunk_size); if (nr < 0 || nr < sgt->nents) { err = nr < 0 ? nr : -EINVAL; @@ -634,7 +630,7 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess) if (always_invalidate) { srv_mr->iu = rtrs_iu_alloc(1, sizeof(struct rtrs_msg_rkey_rsp), - GFP_KERNEL, sess->s.dev->ib_dev, + GFP_KERNEL, srv_path->s.dev->ib_dev, DMA_TO_DEVICE, rtrs_srv_rdma_done); if (!srv_mr->iu) { err = -ENOMEM; @@ -643,8 +639,8 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess) } } /* Eventually dma addr for each chunk can be cached */ - for_each_sg(sgt->sgl, s, sgt->orig_nents, i) - sess->dma_addr[chunks + i] = sg_dma_address(s); + for_each_sg(sgt->sgl, s, nr_sgt, i) + srv_path->dma_addr[chunks + i] = sg_dma_address(s); ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); srv_mr->mr = mr; @@ -652,75 +648,75 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess) continue; err: while (mri--) { - srv_mr = &sess->mrs[mri]; + srv_mr = &srv_path->mrs[mri]; sgt = &srv_mr->sgt; mr = srv_mr->mr; - rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1); + rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1); dereg_mr: ib_dereg_mr(mr); unmap_sg: - ib_dma_unmap_sg(sess->s.dev->ib_dev, sgt->sgl, + ib_dma_unmap_sg(srv_path->s.dev->ib_dev, sgt->sgl, sgt->nents, DMA_BIDIRECTIONAL); free_sg: sg_free_table(sgt); } - kfree(sess->mrs); + kfree(srv_path->mrs); return err; } chunk_bits = ilog2(srv->queue_depth - 1) + 1; - sess->mem_bits = (MAX_IMM_PAYL_BITS - chunk_bits); + srv_path->mem_bits = (MAX_IMM_PAYL_BITS - chunk_bits); return 0; } static void rtrs_srv_hb_err_handler(struct rtrs_con *c) { - close_sess(to_srv_sess(c->sess)); + close_path(to_srv_path(c->path)); } -static void rtrs_srv_init_hb(struct rtrs_srv_sess *sess) +static void rtrs_srv_init_hb(struct rtrs_srv_path *srv_path) { - rtrs_init_hb(&sess->s, &io_comp_cqe, + rtrs_init_hb(&srv_path->s, &io_comp_cqe, RTRS_HB_INTERVAL_MS, RTRS_HB_MISSED_MAX, rtrs_srv_hb_err_handler, rtrs_wq); } -static void rtrs_srv_start_hb(struct rtrs_srv_sess *sess) +static void rtrs_srv_start_hb(struct rtrs_srv_path *srv_path) { - rtrs_start_hb(&sess->s); + rtrs_start_hb(&srv_path->s); } -static void rtrs_srv_stop_hb(struct rtrs_srv_sess *sess) +static void rtrs_srv_stop_hb(struct rtrs_srv_path *srv_path) { - rtrs_stop_hb(&sess->s); + rtrs_stop_hb(&srv_path->s); } static void rtrs_srv_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) { struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context); - struct rtrs_sess *s = con->c.sess; - struct rtrs_srv_sess *sess = to_srv_sess(s); + struct rtrs_path *s = con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); struct rtrs_iu *iu; iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); - rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); + rtrs_iu_free(iu, srv_path->s.dev->ib_dev, 1); if (wc->status != IB_WC_SUCCESS) { rtrs_err(s, "Sess info response send failed: %s\n", ib_wc_status_msg(wc->status)); - close_sess(sess); + close_path(srv_path); return; } WARN_ON(wc->opcode != IB_WC_SEND); } -static void rtrs_srv_sess_up(struct rtrs_srv_sess *sess) +static void rtrs_srv_path_up(struct rtrs_srv_path *srv_path) { - struct rtrs_srv *srv = sess->srv; + struct rtrs_srv_sess *srv = srv_path->srv; struct rtrs_srv_ctx *ctx = srv->ctx; int up; @@ -731,18 +727,18 @@ static void rtrs_srv_sess_up(struct rtrs_srv_sess *sess) mutex_unlock(&srv->paths_ev_mutex); /* Mark session as established */ - sess->established = true; + srv_path->established = true; } -static void rtrs_srv_sess_down(struct rtrs_srv_sess *sess) +static void rtrs_srv_path_down(struct rtrs_srv_path *srv_path) { - struct rtrs_srv *srv = sess->srv; + struct rtrs_srv_sess *srv = srv_path->srv; struct rtrs_srv_ctx *ctx = srv->ctx; - if (!sess->established) + if (!srv_path->established) return; - sess->established = false; + srv_path->established = false; mutex_lock(&srv->paths_ev_mutex); WARN_ON(!srv->paths_up); if (--srv->paths_up == 0) @@ -750,11 +746,11 @@ static void rtrs_srv_sess_down(struct rtrs_srv_sess *sess) mutex_unlock(&srv->paths_ev_mutex); } -static bool exist_sessname(struct rtrs_srv_ctx *ctx, - const char *sessname, const uuid_t *path_uuid) +static bool exist_pathname(struct rtrs_srv_ctx *ctx, + const char *pathname, const uuid_t *path_uuid) { - struct rtrs_srv *srv; - struct rtrs_srv_sess *sess; + struct rtrs_srv_sess *srv; + struct rtrs_srv_path *srv_path; bool found = false; mutex_lock(&ctx->srv_mutex); @@ -767,9 +763,9 @@ static bool exist_sessname(struct rtrs_srv_ctx *ctx, continue; } - list_for_each_entry(sess, &srv->paths_list, s.entry) { - if (strlen(sess->s.sessname) == strlen(sessname) && - !strcmp(sess->s.sessname, sessname)) { + list_for_each_entry(srv_path, &srv->paths_list, s.entry) { + if (strlen(srv_path->s.sessname) == strlen(pathname) && + !strcmp(srv_path->s.sessname, pathname)) { found = true; break; } @@ -782,14 +778,14 @@ static bool exist_sessname(struct rtrs_srv_ctx *ctx, return found; } -static int post_recv_sess(struct rtrs_srv_sess *sess); +static int post_recv_path(struct rtrs_srv_path *srv_path); static int rtrs_rdma_do_reject(struct rdma_cm_id *cm_id, int errno); static int process_info_req(struct rtrs_srv_con *con, struct rtrs_msg_info_req *msg) { - struct rtrs_sess *s = con->c.sess; - struct rtrs_srv_sess *sess = to_srv_sess(s); + struct rtrs_path *s = con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); struct ib_send_wr *reg_wr = NULL; struct rtrs_msg_info_rsp *rsp; struct rtrs_iu *tx_iu; @@ -797,31 +793,32 @@ static int process_info_req(struct rtrs_srv_con *con, int mri, err; size_t tx_sz; - err = post_recv_sess(sess); + err = post_recv_path(srv_path); if (err) { - rtrs_err(s, "post_recv_sess(), err: %d\n", err); + rtrs_err(s, "post_recv_path(), err: %d\n", err); return err; } - if (strchr(msg->sessname, '/') || strchr(msg->sessname, '.')) { - rtrs_err(s, "sessname cannot contain / and .\n"); + if (strchr(msg->pathname, '/') || strchr(msg->pathname, '.')) { + rtrs_err(s, "pathname cannot contain / and .\n"); return -EINVAL; } - if (exist_sessname(sess->srv->ctx, - msg->sessname, &sess->srv->paths_uuid)) { - rtrs_err(s, "sessname is duplicated: %s\n", msg->sessname); + if (exist_pathname(srv_path->srv->ctx, + msg->pathname, &srv_path->srv->paths_uuid)) { + rtrs_err(s, "pathname is duplicated: %s\n", msg->pathname); return -EPERM; } - strscpy(sess->s.sessname, msg->sessname, sizeof(sess->s.sessname)); + strscpy(srv_path->s.sessname, msg->pathname, + sizeof(srv_path->s.sessname)); - rwr = kcalloc(sess->mrs_num, sizeof(*rwr), GFP_KERNEL); + rwr = kcalloc(srv_path->mrs_num, sizeof(*rwr), GFP_KERNEL); if (!rwr) return -ENOMEM; tx_sz = sizeof(*rsp); - tx_sz += sizeof(rsp->desc[0]) * sess->mrs_num; - tx_iu = rtrs_iu_alloc(1, tx_sz, GFP_KERNEL, sess->s.dev->ib_dev, + tx_sz += sizeof(rsp->desc[0]) * srv_path->mrs_num; + tx_iu = rtrs_iu_alloc(1, tx_sz, GFP_KERNEL, srv_path->s.dev->ib_dev, DMA_TO_DEVICE, rtrs_srv_info_rsp_done); if (!tx_iu) { err = -ENOMEM; @@ -830,10 +827,10 @@ static int process_info_req(struct rtrs_srv_con *con, rsp = tx_iu->buf; rsp->type = cpu_to_le16(RTRS_MSG_INFO_RSP); - rsp->sg_cnt = cpu_to_le16(sess->mrs_num); + rsp->sg_cnt = cpu_to_le16(srv_path->mrs_num); - for (mri = 0; mri < sess->mrs_num; mri++) { - struct ib_mr *mr = sess->mrs[mri].mr; + for (mri = 0; mri < srv_path->mrs_num; mri++) { + struct ib_mr *mr = srv_path->mrs[mri].mr; rsp->desc[mri].addr = cpu_to_le64(mr->iova); rsp->desc[mri].key = cpu_to_le32(mr->rkey); @@ -854,13 +851,13 @@ static int process_info_req(struct rtrs_srv_con *con, reg_wr = &rwr[mri].wr; } - err = rtrs_srv_create_sess_files(sess); + err = rtrs_srv_create_path_files(srv_path); if (err) goto iu_free; - kobject_get(&sess->kobj); - get_device(&sess->srv->dev); - rtrs_srv_change_state(sess, RTRS_SRV_CONNECTED); - rtrs_srv_start_hb(sess); + kobject_get(&srv_path->kobj); + get_device(&srv_path->srv->dev); + rtrs_srv_change_state(srv_path, RTRS_SRV_CONNECTED); + rtrs_srv_start_hb(srv_path); /* * We do not account number of established connections at the current @@ -868,9 +865,10 @@ static int process_info_req(struct rtrs_srv_con *con, * all connections are successfully established. Thus, simply notify * listener with a proper event if we are the first path. */ - rtrs_srv_sess_up(sess); + rtrs_srv_path_up(srv_path); - ib_dma_sync_single_for_device(sess->s.dev->ib_dev, tx_iu->dma_addr, + ib_dma_sync_single_for_device(srv_path->s.dev->ib_dev, + tx_iu->dma_addr, tx_iu->size, DMA_TO_DEVICE); /* Send info response */ @@ -878,7 +876,7 @@ static int process_info_req(struct rtrs_srv_con *con, if (err) { rtrs_err(s, "rtrs_iu_post_send(), err: %d\n", err); iu_free: - rtrs_iu_free(tx_iu, sess->s.dev->ib_dev, 1); + rtrs_iu_free(tx_iu, srv_path->s.dev->ib_dev, 1); } rwr_free: kfree(rwr); @@ -889,8 +887,8 @@ rwr_free: static void rtrs_srv_info_req_done(struct ib_cq *cq, struct ib_wc *wc) { struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context); - struct rtrs_sess *s = con->c.sess; - struct rtrs_srv_sess *sess = to_srv_sess(s); + struct rtrs_path *s = con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); struct rtrs_msg_info_req *msg; struct rtrs_iu *iu; int err; @@ -910,7 +908,7 @@ static void rtrs_srv_info_req_done(struct ib_cq *cq, struct ib_wc *wc) wc->byte_len); goto close; } - ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, iu->dma_addr, + ib_dma_sync_single_for_cpu(srv_path->s.dev->ib_dev, iu->dma_addr, iu->size, DMA_FROM_DEVICE); msg = iu->buf; if (le16_to_cpu(msg->type) != RTRS_MSG_INFO_REQ) { @@ -923,22 +921,22 @@ static void rtrs_srv_info_req_done(struct ib_cq *cq, struct ib_wc *wc) goto close; out: - rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); + rtrs_iu_free(iu, srv_path->s.dev->ib_dev, 1); return; close: - close_sess(sess); + close_path(srv_path); goto out; } static int post_recv_info_req(struct rtrs_srv_con *con) { - struct rtrs_sess *s = con->c.sess; - struct rtrs_srv_sess *sess = to_srv_sess(s); + struct rtrs_path *s = con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); struct rtrs_iu *rx_iu; int err; rx_iu = rtrs_iu_alloc(1, sizeof(struct rtrs_msg_info_req), - GFP_KERNEL, sess->s.dev->ib_dev, + GFP_KERNEL, srv_path->s.dev->ib_dev, DMA_FROM_DEVICE, rtrs_srv_info_req_done); if (!rx_iu) return -ENOMEM; @@ -946,7 +944,7 @@ static int post_recv_info_req(struct rtrs_srv_con *con) err = rtrs_iu_post_recv(&con->c, rx_iu); if (err) { rtrs_err(s, "rtrs_iu_post_recv(), err: %d\n", err); - rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1); + rtrs_iu_free(rx_iu, srv_path->s.dev->ib_dev, 1); return err; } @@ -966,20 +964,20 @@ static int post_recv_io(struct rtrs_srv_con *con, size_t q_size) return 0; } -static int post_recv_sess(struct rtrs_srv_sess *sess) +static int post_recv_path(struct rtrs_srv_path *srv_path) { - struct rtrs_srv *srv = sess->srv; - struct rtrs_sess *s = &sess->s; + struct rtrs_srv_sess *srv = srv_path->srv; + struct rtrs_path *s = &srv_path->s; size_t q_size; int err, cid; - for (cid = 0; cid < sess->s.con_num; cid++) { + for (cid = 0; cid < srv_path->s.con_num; cid++) { if (cid == 0) q_size = SERVICE_CON_QUEUE_DEPTH; else q_size = srv->queue_depth; - err = post_recv_io(to_srv_con(sess->s.con[cid]), q_size); + err = post_recv_io(to_srv_con(srv_path->s.con[cid]), q_size); if (err) { rtrs_err(s, "post_recv_io(), err: %d\n", err); return err; @@ -993,9 +991,9 @@ static void process_read(struct rtrs_srv_con *con, struct rtrs_msg_rdma_read *msg, u32 buf_id, u32 off) { - struct rtrs_sess *s = con->c.sess; - struct rtrs_srv_sess *sess = to_srv_sess(s); - struct rtrs_srv *srv = sess->srv; + struct rtrs_path *s = con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); + struct rtrs_srv_sess *srv = srv_path->srv; struct rtrs_srv_ctx *ctx = srv->ctx; struct rtrs_srv_op *id; @@ -1003,10 +1001,10 @@ static void process_read(struct rtrs_srv_con *con, void *data; int ret; - if (sess->state != RTRS_SRV_CONNECTED) { + if (srv_path->state != RTRS_SRV_CONNECTED) { rtrs_err_rl(s, "Processing read request failed, session is disconnected, sess state %s\n", - rtrs_srv_state_str(sess->state)); + rtrs_srv_state_str(srv_path->state)); return; } if (msg->sg_cnt != 1 && msg->sg_cnt != 0) { @@ -1014,9 +1012,9 @@ static void process_read(struct rtrs_srv_con *con, "Processing read request failed, invalid message\n"); return; } - rtrs_srv_get_ops_ids(sess); - rtrs_srv_update_rdma_stats(sess->stats, off, READ); - id = sess->ops_ids[buf_id]; + rtrs_srv_get_ops_ids(srv_path); + rtrs_srv_update_rdma_stats(srv_path->stats, off, READ); + id = srv_path->ops_ids[buf_id]; id->con = con; id->dir = READ; id->msg_id = buf_id; @@ -1024,7 +1022,7 @@ static void process_read(struct rtrs_srv_con *con, usr_len = le16_to_cpu(msg->usr_len); data_len = off - usr_len; data = page_address(srv->chunks[buf_id]); - ret = ctx->ops.rdma_ev(srv->priv, id, READ, data, data_len, + ret = ctx->ops.rdma_ev(srv->priv, id, data, data_len, data + data_len, usr_len); if (ret) { @@ -1042,18 +1040,18 @@ send_err_msg: rtrs_err_rl(s, "Sending err msg for failed RDMA-Write-Req failed, msg_id %d, err: %d\n", buf_id, ret); - close_sess(sess); + close_path(srv_path); } - rtrs_srv_put_ops_ids(sess); + rtrs_srv_put_ops_ids(srv_path); } static void process_write(struct rtrs_srv_con *con, struct rtrs_msg_rdma_write *req, u32 buf_id, u32 off) { - struct rtrs_sess *s = con->c.sess; - struct rtrs_srv_sess *sess = to_srv_sess(s); - struct rtrs_srv *srv = sess->srv; + struct rtrs_path *s = con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); + struct rtrs_srv_sess *srv = srv_path->srv; struct rtrs_srv_ctx *ctx = srv->ctx; struct rtrs_srv_op *id; @@ -1061,15 +1059,15 @@ static void process_write(struct rtrs_srv_con *con, void *data; int ret; - if (sess->state != RTRS_SRV_CONNECTED) { + if (srv_path->state != RTRS_SRV_CONNECTED) { rtrs_err_rl(s, "Processing write request failed, session is disconnected, sess state %s\n", - rtrs_srv_state_str(sess->state)); + rtrs_srv_state_str(srv_path->state)); return; } - rtrs_srv_get_ops_ids(sess); - rtrs_srv_update_rdma_stats(sess->stats, off, WRITE); - id = sess->ops_ids[buf_id]; + rtrs_srv_get_ops_ids(srv_path); + rtrs_srv_update_rdma_stats(srv_path->stats, off, WRITE); + id = srv_path->ops_ids[buf_id]; id->con = con; id->dir = WRITE; id->msg_id = buf_id; @@ -1077,7 +1075,7 @@ static void process_write(struct rtrs_srv_con *con, usr_len = le16_to_cpu(req->usr_len); data_len = off - usr_len; data = page_address(srv->chunks[buf_id]); - ret = ctx->ops.rdma_ev(srv->priv, id, WRITE, data, data_len, + ret = ctx->ops.rdma_ev(srv->priv, id, data, data_len, data + data_len, usr_len); if (ret) { rtrs_err_rl(s, @@ -1094,20 +1092,21 @@ send_err_msg: rtrs_err_rl(s, "Processing write request failed, sending I/O response failed, msg_id %d, err: %d\n", buf_id, ret); - close_sess(sess); + close_path(srv_path); } - rtrs_srv_put_ops_ids(sess); + rtrs_srv_put_ops_ids(srv_path); } static void process_io_req(struct rtrs_srv_con *con, void *msg, u32 id, u32 off) { - struct rtrs_sess *s = con->c.sess; - struct rtrs_srv_sess *sess = to_srv_sess(s); + struct rtrs_path *s = con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); struct rtrs_msg_rdma_hdr *hdr; unsigned int type; - ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, sess->dma_addr[id], + ib_dma_sync_single_for_cpu(srv_path->s.dev->ib_dev, + srv_path->dma_addr[id], max_chunk_size, DMA_BIDIRECTIONAL); hdr = msg; type = le16_to_cpu(hdr->type); @@ -1129,7 +1128,7 @@ static void process_io_req(struct rtrs_srv_con *con, void *msg, return; err: - close_sess(sess); + close_path(srv_path); } static void rtrs_srv_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc) @@ -1137,16 +1136,16 @@ static void rtrs_srv_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc) struct rtrs_srv_mr *mr = container_of(wc->wr_cqe, typeof(*mr), inv_cqe); struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context); - struct rtrs_sess *s = con->c.sess; - struct rtrs_srv_sess *sess = to_srv_sess(s); - struct rtrs_srv *srv = sess->srv; + struct rtrs_path *s = con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); + struct rtrs_srv_sess *srv = srv_path->srv; u32 msg_id, off; void *data; if (wc->status != IB_WC_SUCCESS) { rtrs_err(s, "Failed IB_WR_LOCAL_INV: %s\n", ib_wc_status_msg(wc->status)); - close_sess(sess); + close_path(srv_path); } msg_id = mr->msg_id; off = mr->msg_off; @@ -1194,9 +1193,9 @@ static void rtrs_rdma_process_wr_wait_list(struct rtrs_srv_con *con) static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc) { struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context); - struct rtrs_sess *s = con->c.sess; - struct rtrs_srv_sess *sess = to_srv_sess(s); - struct rtrs_srv *srv = sess->srv; + struct rtrs_path *s = con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); + struct rtrs_srv_sess *srv = srv_path->srv; u32 imm_type, imm_payload; int err; @@ -1206,7 +1205,7 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc) "%s (wr_cqe: %p, type: %d, vendor_err: 0x%x, len: %u)\n", ib_wc_status_msg(wc->status), wc->wr_cqe, wc->opcode, wc->vendor_err, wc->byte_len); - close_sess(sess); + close_path(srv_path); } return; } @@ -1222,7 +1221,7 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc) err = rtrs_post_recv_empty(&con->c, &io_comp_cqe); if (err) { rtrs_err(s, "rtrs_post_recv(), err: %d\n", err); - close_sess(sess); + close_path(srv_path); break; } rtrs_from_imm(be32_to_cpu(wc->ex.imm_data), @@ -1231,16 +1230,16 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc) u32 msg_id, off; void *data; - msg_id = imm_payload >> sess->mem_bits; - off = imm_payload & ((1 << sess->mem_bits) - 1); + msg_id = imm_payload >> srv_path->mem_bits; + off = imm_payload & ((1 << srv_path->mem_bits) - 1); if (msg_id >= srv->queue_depth || off >= max_chunk_size) { rtrs_err(s, "Wrong msg_id %u, off %u\n", msg_id, off); - close_sess(sess); + close_path(srv_path); return; } if (always_invalidate) { - struct rtrs_srv_mr *mr = &sess->mrs[msg_id]; + struct rtrs_srv_mr *mr = &srv_path->mrs[msg_id]; mr->msg_off = off; mr->msg_id = msg_id; @@ -1248,7 +1247,7 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc) if (err) { rtrs_err(s, "rtrs_post_recv(), err: %d\n", err); - close_sess(sess); + close_path(srv_path); break; } } else { @@ -1257,10 +1256,10 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc) } } else if (imm_type == RTRS_HB_MSG_IMM) { WARN_ON(con->c.cid); - rtrs_send_hb_ack(&sess->s); + rtrs_send_hb_ack(&srv_path->s); } else if (imm_type == RTRS_HB_ACK_IMM) { WARN_ON(con->c.cid); - sess->s.hb_missed_cnt = 0; + srv_path->s.hb_missed_cnt = 0; } else { rtrs_wrn(s, "Unknown IMM type %u\n", imm_type); } @@ -1284,22 +1283,23 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc) } /** - * rtrs_srv_get_sess_name() - Get rtrs_srv peer hostname. + * rtrs_srv_get_path_name() - Get rtrs_srv peer hostname. * @srv: Session - * @sessname: Sessname buffer + * @pathname: Pathname buffer * @len: Length of sessname buffer */ -int rtrs_srv_get_sess_name(struct rtrs_srv *srv, char *sessname, size_t len) +int rtrs_srv_get_path_name(struct rtrs_srv_sess *srv, char *pathname, + size_t len) { - struct rtrs_srv_sess *sess; + struct rtrs_srv_path *srv_path; int err = -ENOTCONN; mutex_lock(&srv->paths_mutex); - list_for_each_entry(sess, &srv->paths_list, s.entry) { - if (sess->state != RTRS_SRV_CONNECTED) + list_for_each_entry(srv_path, &srv->paths_list, s.entry) { + if (srv_path->state != RTRS_SRV_CONNECTED) continue; - strscpy(sessname, sess->s.sessname, - min_t(size_t, sizeof(sess->s.sessname), len)); + strscpy(pathname, srv_path->s.sessname, + min_t(size_t, sizeof(srv_path->s.sessname), len)); err = 0; break; } @@ -1307,50 +1307,51 @@ int rtrs_srv_get_sess_name(struct rtrs_srv *srv, char *sessname, size_t len) return err; } -EXPORT_SYMBOL(rtrs_srv_get_sess_name); +EXPORT_SYMBOL(rtrs_srv_get_path_name); /** * rtrs_srv_get_queue_depth() - Get rtrs_srv qdepth. * @srv: Session */ -int rtrs_srv_get_queue_depth(struct rtrs_srv *srv) +int rtrs_srv_get_queue_depth(struct rtrs_srv_sess *srv) { return srv->queue_depth; } EXPORT_SYMBOL(rtrs_srv_get_queue_depth); -static int find_next_bit_ring(struct rtrs_srv_sess *sess) +static int find_next_bit_ring(struct rtrs_srv_path *srv_path) { - struct ib_device *ib_dev = sess->s.dev->ib_dev; + struct ib_device *ib_dev = srv_path->s.dev->ib_dev; int v; - v = cpumask_next(sess->cur_cq_vector, &cq_affinity_mask); + v = cpumask_next(srv_path->cur_cq_vector, &cq_affinity_mask); if (v >= nr_cpu_ids || v >= ib_dev->num_comp_vectors) v = cpumask_first(&cq_affinity_mask); return v; } -static int rtrs_srv_get_next_cq_vector(struct rtrs_srv_sess *sess) +static int rtrs_srv_get_next_cq_vector(struct rtrs_srv_path *srv_path) { - sess->cur_cq_vector = find_next_bit_ring(sess); + srv_path->cur_cq_vector = find_next_bit_ring(srv_path); - return sess->cur_cq_vector; + return srv_path->cur_cq_vector; } static void rtrs_srv_dev_release(struct device *dev) { - struct rtrs_srv *srv = container_of(dev, struct rtrs_srv, dev); + struct rtrs_srv_sess *srv = container_of(dev, struct rtrs_srv_sess, + dev); kfree(srv); } -static void free_srv(struct rtrs_srv *srv) +static void free_srv(struct rtrs_srv_sess *srv) { int i; WARN_ON(refcount_read(&srv->refcount)); for (i = 0; i < srv->queue_depth; i++) - mempool_free(srv->chunks[i], chunk_pool); + __free_pages(srv->chunks[i], get_order(max_chunk_size)); kfree(srv->chunks); mutex_destroy(&srv->paths_mutex); mutex_destroy(&srv->paths_ev_mutex); @@ -1358,11 +1359,11 @@ static void free_srv(struct rtrs_srv *srv) put_device(&srv->dev); } -static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, +static struct rtrs_srv_sess *get_or_create_srv(struct rtrs_srv_ctx *ctx, const uuid_t *paths_uuid, bool first_conn) { - struct rtrs_srv *srv; + struct rtrs_srv_sess *srv; int i; mutex_lock(&ctx->srv_mutex); @@ -1403,7 +1404,8 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, goto err_free_srv; for (i = 0; i < srv->queue_depth; i++) { - srv->chunks[i] = mempool_alloc(chunk_pool, GFP_KERNEL); + srv->chunks[i] = alloc_pages(GFP_KERNEL, + get_order(max_chunk_size)); if (!srv->chunks[i]) goto err_free_chunks; } @@ -1416,7 +1418,7 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, err_free_chunks: while (i--) - mempool_free(srv->chunks[i], chunk_pool); + __free_pages(srv->chunks[i], get_order(max_chunk_size)); kfree(srv->chunks); err_free_srv: @@ -1424,7 +1426,7 @@ err_free_srv: return ERR_PTR(-ENOMEM); } -static void put_srv(struct rtrs_srv *srv) +static void put_srv(struct rtrs_srv_sess *srv) { if (refcount_dec_and_test(&srv->refcount)) { struct rtrs_srv_ctx *ctx = srv->ctx; @@ -1438,23 +1440,23 @@ static void put_srv(struct rtrs_srv *srv) } } -static void __add_path_to_srv(struct rtrs_srv *srv, - struct rtrs_srv_sess *sess) +static void __add_path_to_srv(struct rtrs_srv_sess *srv, + struct rtrs_srv_path *srv_path) { - list_add_tail(&sess->s.entry, &srv->paths_list); + list_add_tail(&srv_path->s.entry, &srv->paths_list); srv->paths_num++; WARN_ON(srv->paths_num >= MAX_PATHS_NUM); } -static void del_path_from_srv(struct rtrs_srv_sess *sess) +static void del_path_from_srv(struct rtrs_srv_path *srv_path) { - struct rtrs_srv *srv = sess->srv; + struct rtrs_srv_sess *srv = srv_path->srv; if (WARN_ON(!srv)) return; mutex_lock(&srv->paths_mutex); - list_del(&sess->s.entry); + list_del(&srv_path->s.entry); WARN_ON(!srv->paths_num); srv->paths_num--; mutex_unlock(&srv->paths_mutex); @@ -1484,47 +1486,48 @@ static int sockaddr_cmp(const struct sockaddr *a, const struct sockaddr *b) } } -static bool __is_path_w_addr_exists(struct rtrs_srv *srv, +static bool __is_path_w_addr_exists(struct rtrs_srv_sess *srv, struct rdma_addr *addr) { - struct rtrs_srv_sess *sess; + struct rtrs_srv_path *srv_path; - list_for_each_entry(sess, &srv->paths_list, s.entry) - if (!sockaddr_cmp((struct sockaddr *)&sess->s.dst_addr, + list_for_each_entry(srv_path, &srv->paths_list, s.entry) + if (!sockaddr_cmp((struct sockaddr *)&srv_path->s.dst_addr, (struct sockaddr *)&addr->dst_addr) && - !sockaddr_cmp((struct sockaddr *)&sess->s.src_addr, + !sockaddr_cmp((struct sockaddr *)&srv_path->s.src_addr, (struct sockaddr *)&addr->src_addr)) return true; return false; } -static void free_sess(struct rtrs_srv_sess *sess) +static void free_path(struct rtrs_srv_path *srv_path) { - if (sess->kobj.state_in_sysfs) { - kobject_del(&sess->kobj); - kobject_put(&sess->kobj); + if (srv_path->kobj.state_in_sysfs) { + kobject_del(&srv_path->kobj); + kobject_put(&srv_path->kobj); } else { - kfree(sess->stats); - kfree(sess); + free_percpu(srv_path->stats->rdma_stats); + kfree(srv_path->stats); + kfree(srv_path); } } static void rtrs_srv_close_work(struct work_struct *work) { - struct rtrs_srv_sess *sess; + struct rtrs_srv_path *srv_path; struct rtrs_srv_con *con; int i; - sess = container_of(work, typeof(*sess), close_work); + srv_path = container_of(work, typeof(*srv_path), close_work); - rtrs_srv_destroy_sess_files(sess); - rtrs_srv_stop_hb(sess); + rtrs_srv_destroy_path_files(srv_path); + rtrs_srv_stop_hb(srv_path); - for (i = 0; i < sess->s.con_num; i++) { - if (!sess->s.con[i]) + for (i = 0; i < srv_path->s.con_num; i++) { + if (!srv_path->s.con[i]) continue; - con = to_srv_con(sess->s.con[i]); + con = to_srv_con(srv_path->s.con[i]); rdma_disconnect(con->c.cm_id); ib_drain_qp(con->c.qp); } @@ -1533,41 +1536,41 @@ static void rtrs_srv_close_work(struct work_struct *work) * Degrade ref count to the usual model with a single shared * atomic_t counter */ - percpu_ref_kill(&sess->ids_inflight_ref); + percpu_ref_kill(&srv_path->ids_inflight_ref); /* Wait for all completion */ - wait_for_completion(&sess->complete_done); + wait_for_completion(&srv_path->complete_done); /* Notify upper layer if we are the last path */ - rtrs_srv_sess_down(sess); + rtrs_srv_path_down(srv_path); - unmap_cont_bufs(sess); - rtrs_srv_free_ops_ids(sess); + unmap_cont_bufs(srv_path); + rtrs_srv_free_ops_ids(srv_path); - for (i = 0; i < sess->s.con_num; i++) { - if (!sess->s.con[i]) + for (i = 0; i < srv_path->s.con_num; i++) { + if (!srv_path->s.con[i]) continue; - con = to_srv_con(sess->s.con[i]); + con = to_srv_con(srv_path->s.con[i]); rtrs_cq_qp_destroy(&con->c); rdma_destroy_id(con->c.cm_id); kfree(con); } - rtrs_ib_dev_put(sess->s.dev); + rtrs_ib_dev_put(srv_path->s.dev); - del_path_from_srv(sess); - put_srv(sess->srv); - sess->srv = NULL; - rtrs_srv_change_state(sess, RTRS_SRV_CLOSED); + del_path_from_srv(srv_path); + put_srv(srv_path->srv); + srv_path->srv = NULL; + rtrs_srv_change_state(srv_path, RTRS_SRV_CLOSED); - kfree(sess->dma_addr); - kfree(sess->s.con); - free_sess(sess); + kfree(srv_path->dma_addr); + kfree(srv_path->s.con); + free_path(srv_path); } -static int rtrs_rdma_do_accept(struct rtrs_srv_sess *sess, +static int rtrs_rdma_do_accept(struct rtrs_srv_path *srv_path, struct rdma_cm_id *cm_id) { - struct rtrs_srv *srv = sess->srv; + struct rtrs_srv_sess *srv = srv_path->srv; struct rtrs_msg_conn_rsp msg; struct rdma_conn_param param; int err; @@ -1615,25 +1618,25 @@ static int rtrs_rdma_do_reject(struct rdma_cm_id *cm_id, int errno) return errno; } -static struct rtrs_srv_sess * -__find_sess(struct rtrs_srv *srv, const uuid_t *sess_uuid) +static struct rtrs_srv_path * +__find_path(struct rtrs_srv_sess *srv, const uuid_t *sess_uuid) { - struct rtrs_srv_sess *sess; + struct rtrs_srv_path *srv_path; - list_for_each_entry(sess, &srv->paths_list, s.entry) { - if (uuid_equal(&sess->s.uuid, sess_uuid)) - return sess; + list_for_each_entry(srv_path, &srv->paths_list, s.entry) { + if (uuid_equal(&srv_path->s.uuid, sess_uuid)) + return srv_path; } return NULL; } -static int create_con(struct rtrs_srv_sess *sess, +static int create_con(struct rtrs_srv_path *srv_path, struct rdma_cm_id *cm_id, unsigned int cid) { - struct rtrs_srv *srv = sess->srv; - struct rtrs_sess *s = &sess->s; + struct rtrs_srv_sess *srv = srv_path->srv; + struct rtrs_path *s = &srv_path->s; struct rtrs_srv_con *con; u32 cq_num, max_send_wr, max_recv_wr, wr_limit; @@ -1648,10 +1651,10 @@ static int create_con(struct rtrs_srv_sess *sess, spin_lock_init(&con->rsp_wr_wait_lock); INIT_LIST_HEAD(&con->rsp_wr_wait_list); con->c.cm_id = cm_id; - con->c.sess = &sess->s; + con->c.path = &srv_path->s; con->c.cid = cid; atomic_set(&con->c.wr_cnt, 1); - wr_limit = sess->s.dev->ib_dev->attrs.max_qp_wr; + wr_limit = srv_path->s.dev->ib_dev->attrs.max_qp_wr; if (con->c.cid == 0) { /* @@ -1684,10 +1687,10 @@ static int create_con(struct rtrs_srv_sess *sess, } cq_num = max_send_wr + max_recv_wr; atomic_set(&con->c.sq_wr_avail, max_send_wr); - cq_vector = rtrs_srv_get_next_cq_vector(sess); + cq_vector = rtrs_srv_get_next_cq_vector(srv_path); /* TODO: SOFTIRQ can be faster, but be careful with softirq context */ - err = rtrs_cq_qp_create(&sess->s, &con->c, 1, cq_vector, cq_num, + err = rtrs_cq_qp_create(&srv_path->s, &con->c, 1, cq_vector, cq_num, max_send_wr, max_recv_wr, IB_POLL_WORKQUEUE); if (err) { @@ -1699,8 +1702,8 @@ static int create_con(struct rtrs_srv_sess *sess, if (err) goto free_cqqp; } - WARN_ON(sess->s.con[cid]); - sess->s.con[cid] = &con->c; + WARN_ON(srv_path->s.con[cid]); + srv_path->s.con[cid] = &con->c; /* * Change context from server to current connection. The other @@ -1719,13 +1722,13 @@ err: return err; } -static struct rtrs_srv_sess *__alloc_sess(struct rtrs_srv *srv, +static struct rtrs_srv_path *__alloc_path(struct rtrs_srv_sess *srv, struct rdma_cm_id *cm_id, unsigned int con_num, unsigned int recon_cnt, const uuid_t *uuid) { - struct rtrs_srv_sess *sess; + struct rtrs_srv_path *srv_path; int err = -ENOMEM; char str[NAME_MAX]; struct rtrs_addr path; @@ -1739,74 +1742,82 @@ static struct rtrs_srv_sess *__alloc_sess(struct rtrs_srv *srv, pr_err("Path with same addr exists\n"); goto err; } - sess = kzalloc(sizeof(*sess), GFP_KERNEL); - if (!sess) + srv_path = kzalloc(sizeof(*srv_path), GFP_KERNEL); + if (!srv_path) goto err; - sess->stats = kzalloc(sizeof(*sess->stats), GFP_KERNEL); - if (!sess->stats) + srv_path->stats = kzalloc(sizeof(*srv_path->stats), GFP_KERNEL); + if (!srv_path->stats) goto err_free_sess; - sess->stats->sess = sess; - - sess->dma_addr = kcalloc(srv->queue_depth, sizeof(*sess->dma_addr), - GFP_KERNEL); - if (!sess->dma_addr) + srv_path->stats->rdma_stats = alloc_percpu(struct rtrs_srv_stats_rdma_stats); + if (!srv_path->stats->rdma_stats) goto err_free_stats; - sess->s.con = kcalloc(con_num, sizeof(*sess->s.con), GFP_KERNEL); - if (!sess->s.con) + srv_path->stats->srv_path = srv_path; + + srv_path->dma_addr = kcalloc(srv->queue_depth, + sizeof(*srv_path->dma_addr), + GFP_KERNEL); + if (!srv_path->dma_addr) + goto err_free_percpu; + + srv_path->s.con = kcalloc(con_num, sizeof(*srv_path->s.con), + GFP_KERNEL); + if (!srv_path->s.con) goto err_free_dma_addr; - sess->state = RTRS_SRV_CONNECTING; - sess->srv = srv; - sess->cur_cq_vector = -1; - sess->s.dst_addr = cm_id->route.addr.dst_addr; - sess->s.src_addr = cm_id->route.addr.src_addr; + srv_path->state = RTRS_SRV_CONNECTING; + srv_path->srv = srv; + srv_path->cur_cq_vector = -1; + srv_path->s.dst_addr = cm_id->route.addr.dst_addr; + srv_path->s.src_addr = cm_id->route.addr.src_addr; /* temporary until receiving session-name from client */ - path.src = &sess->s.src_addr; - path.dst = &sess->s.dst_addr; + path.src = &srv_path->s.src_addr; + path.dst = &srv_path->s.dst_addr; rtrs_addr_to_str(&path, str, sizeof(str)); - strscpy(sess->s.sessname, str, sizeof(sess->s.sessname)); - - sess->s.con_num = con_num; - sess->s.irq_con_num = con_num; - sess->s.recon_cnt = recon_cnt; - uuid_copy(&sess->s.uuid, uuid); - spin_lock_init(&sess->state_lock); - INIT_WORK(&sess->close_work, rtrs_srv_close_work); - rtrs_srv_init_hb(sess); - - sess->s.dev = rtrs_ib_dev_find_or_add(cm_id->device, &dev_pd); - if (!sess->s.dev) { + strscpy(srv_path->s.sessname, str, sizeof(srv_path->s.sessname)); + + srv_path->s.con_num = con_num; + srv_path->s.irq_con_num = con_num; + srv_path->s.recon_cnt = recon_cnt; + uuid_copy(&srv_path->s.uuid, uuid); + spin_lock_init(&srv_path->state_lock); + INIT_WORK(&srv_path->close_work, rtrs_srv_close_work); + rtrs_srv_init_hb(srv_path); + + srv_path->s.dev = rtrs_ib_dev_find_or_add(cm_id->device, &dev_pd); + if (!srv_path->s.dev) { err = -ENOMEM; goto err_free_con; } - err = map_cont_bufs(sess); + err = map_cont_bufs(srv_path); if (err) goto err_put_dev; - err = rtrs_srv_alloc_ops_ids(sess); + err = rtrs_srv_alloc_ops_ids(srv_path); if (err) goto err_unmap_bufs; - __add_path_to_srv(srv, sess); + __add_path_to_srv(srv, srv_path); - return sess; + return srv_path; err_unmap_bufs: - unmap_cont_bufs(sess); + unmap_cont_bufs(srv_path); err_put_dev: - rtrs_ib_dev_put(sess->s.dev); + rtrs_ib_dev_put(srv_path->s.dev); err_free_con: - kfree(sess->s.con); + kfree(srv_path->s.con); err_free_dma_addr: - kfree(sess->dma_addr); + kfree(srv_path->dma_addr); +err_free_percpu: + free_percpu(srv_path->stats->rdma_stats); err_free_stats: - kfree(sess->stats); + kfree(srv_path->stats); err_free_sess: - kfree(sess); + kfree(srv_path); err: return ERR_PTR(err); } @@ -1816,8 +1827,8 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, size_t len) { struct rtrs_srv_ctx *ctx = cm_id->context; - struct rtrs_srv_sess *sess; - struct rtrs_srv *srv; + struct rtrs_srv_path *srv_path; + struct rtrs_srv_sess *srv; u16 version, con_num, cid; u16 recon_cnt; @@ -1857,16 +1868,16 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, goto reject_w_err; } mutex_lock(&srv->paths_mutex); - sess = __find_sess(srv, &msg->sess_uuid); - if (sess) { - struct rtrs_sess *s = &sess->s; + srv_path = __find_path(srv, &msg->sess_uuid); + if (srv_path) { + struct rtrs_path *s = &srv_path->s; /* Session already holds a reference */ put_srv(srv); - if (sess->state != RTRS_SRV_CONNECTING) { + if (srv_path->state != RTRS_SRV_CONNECTING) { rtrs_err(s, "Session in wrong state: %s\n", - rtrs_srv_state_str(sess->state)); + rtrs_srv_state_str(srv_path->state)); mutex_unlock(&srv->paths_mutex); goto reject_w_err; } @@ -1886,19 +1897,19 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, goto reject_w_err; } } else { - sess = __alloc_sess(srv, cm_id, con_num, recon_cnt, + srv_path = __alloc_path(srv, cm_id, con_num, recon_cnt, &msg->sess_uuid); - if (IS_ERR(sess)) { + if (IS_ERR(srv_path)) { mutex_unlock(&srv->paths_mutex); put_srv(srv); - err = PTR_ERR(sess); + err = PTR_ERR(srv_path); pr_err("RTRS server session allocation failed: %d\n", err); goto reject_w_err; } } - err = create_con(sess, cm_id, cid); + err = create_con(srv_path, cm_id, cid); if (err) { - rtrs_err((&sess->s), "create_con(), error %d\n", err); + rtrs_err((&srv_path->s), "create_con(), error %d\n", err); rtrs_rdma_do_reject(cm_id, err); /* * Since session has other connections we follow normal way @@ -1907,9 +1918,9 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, */ goto close_and_return_err; } - err = rtrs_rdma_do_accept(sess, cm_id); + err = rtrs_rdma_do_accept(srv_path, cm_id); if (err) { - rtrs_err((&sess->s), "rtrs_rdma_do_accept(), error %d\n", err); + rtrs_err((&srv_path->s), "rtrs_rdma_do_accept(), error %d\n", err); rtrs_rdma_do_reject(cm_id, err); /* * Since current connection was successfully added to the @@ -1929,7 +1940,7 @@ reject_w_err: close_and_return_err: mutex_unlock(&srv->paths_mutex); - close_sess(sess); + close_path(srv_path); return err; } @@ -1937,14 +1948,14 @@ close_and_return_err: static int rtrs_srv_rdma_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *ev) { - struct rtrs_srv_sess *sess = NULL; - struct rtrs_sess *s = NULL; + struct rtrs_srv_path *srv_path = NULL; + struct rtrs_path *s = NULL; if (ev->event != RDMA_CM_EVENT_CONNECT_REQUEST) { struct rtrs_con *c = cm_id->context; - s = c->sess; - sess = to_srv_sess(s); + s = c->path; + srv_path = to_srv_path(s); } switch (ev->event) { @@ -1968,7 +1979,7 @@ static int rtrs_srv_rdma_cm_handler(struct rdma_cm_id *cm_id, case RDMA_CM_EVENT_ADDR_CHANGE: case RDMA_CM_EVENT_TIMEWAIT_EXIT: case RDMA_CM_EVENT_DEVICE_REMOVAL: - close_sess(sess); + close_path(srv_path); break; default: pr_err("Ignoring unexpected CM event %s, err %d\n", @@ -2176,23 +2187,23 @@ struct rtrs_srv_ctx *rtrs_srv_open(struct rtrs_srv_ops *ops, u16 port) } EXPORT_SYMBOL(rtrs_srv_open); -static void close_sessions(struct rtrs_srv *srv) +static void close_paths(struct rtrs_srv_sess *srv) { - struct rtrs_srv_sess *sess; + struct rtrs_srv_path *srv_path; mutex_lock(&srv->paths_mutex); - list_for_each_entry(sess, &srv->paths_list, s.entry) - close_sess(sess); + list_for_each_entry(srv_path, &srv->paths_list, s.entry) + close_path(srv_path); mutex_unlock(&srv->paths_mutex); } static void close_ctx(struct rtrs_srv_ctx *ctx) { - struct rtrs_srv *srv; + struct rtrs_srv_sess *srv; mutex_lock(&ctx->srv_mutex); list_for_each_entry(srv, &ctx->srv_list, ctx_list) - close_sessions(srv); + close_paths(srv); mutex_unlock(&ctx->srv_mutex); flush_workqueue(rtrs_wq); } @@ -2256,14 +2267,10 @@ static int __init rtrs_server_init(void) err); return err; } - chunk_pool = mempool_create_page_pool(sess_queue_depth * CHUNK_POOL_SZ, - get_order(max_chunk_size)); - if (!chunk_pool) - return -ENOMEM; rtrs_dev_class = class_create(THIS_MODULE, "rtrs-server"); if (IS_ERR(rtrs_dev_class)) { err = PTR_ERR(rtrs_dev_class); - goto out_chunk_pool; + goto out_err; } rtrs_wq = alloc_workqueue("rtrs_server_wq", 0, 0); if (!rtrs_wq) { @@ -2275,9 +2282,7 @@ static int __init rtrs_server_init(void) out_dev_class: class_destroy(rtrs_dev_class); -out_chunk_pool: - mempool_destroy(chunk_pool); - +out_err: return err; } @@ -2285,7 +2290,6 @@ static void __exit rtrs_server_exit(void) { destroy_workqueue(rtrs_wq); class_destroy(rtrs_dev_class); - mempool_destroy(chunk_pool); rtrs_rdma_dev_pd_deinit(&dev_pd); } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.h b/drivers/infiniband/ulp/rtrs/rtrs-srv.h index 7d403c12faf3..2f8a638e36fa 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.h @@ -12,6 +12,7 @@ #include <linux/device.h> #include <linux/refcount.h> +#include <linux/percpu.h> #include "rtrs-pri.h" /* @@ -29,15 +30,15 @@ enum rtrs_srv_state { */ struct rtrs_srv_stats_rdma_stats { struct { - atomic64_t cnt; - atomic64_t size_total; + u64 cnt; + u64 size_total; } dir[2]; }; struct rtrs_srv_stats { - struct kobject kobj_stats; - struct rtrs_srv_stats_rdma_stats rdma_stats; - struct rtrs_srv_sess *sess; + struct kobject kobj_stats; + struct rtrs_srv_stats_rdma_stats __percpu *rdma_stats; + struct rtrs_srv_path *srv_path; }; struct rtrs_srv_con { @@ -71,9 +72,9 @@ struct rtrs_srv_mr { struct rtrs_iu *iu; /* send buffer for new rkey msg */ }; -struct rtrs_srv_sess { - struct rtrs_sess s; - struct rtrs_srv *srv; +struct rtrs_srv_path { + struct rtrs_path s; + struct rtrs_srv_sess *srv; struct work_struct close_work; enum rtrs_srv_state state; spinlock_t state_lock; @@ -90,7 +91,12 @@ struct rtrs_srv_sess { struct rtrs_srv_stats *stats; }; -struct rtrs_srv { +static inline struct rtrs_srv_path *to_srv_path(struct rtrs_path *s) +{ + return container_of(s, struct rtrs_srv_path, s); +} + +struct rtrs_srv_sess { struct list_head paths_list; int paths_up; struct mutex paths_ev_mutex; @@ -125,13 +131,13 @@ struct rtrs_srv_ib_ctx { extern struct class *rtrs_dev_class; -void close_sess(struct rtrs_srv_sess *sess); +void close_path(struct rtrs_srv_path *srv_path); static inline void rtrs_srv_update_rdma_stats(struct rtrs_srv_stats *s, size_t size, int d) { - atomic64_inc(&s->rdma_stats.dir[d].cnt); - atomic64_add(size, &s->rdma_stats.dir[d].size_total); + this_cpu_inc(s->rdma_stats->dir[d].cnt); + this_cpu_add(s->rdma_stats->dir[d].size_total, size); } /* functions which are implemented in rtrs-srv-stats.c */ @@ -142,7 +148,7 @@ ssize_t rtrs_srv_reset_all_help(struct rtrs_srv_stats *stats, char *page, size_t len); /* functions which are implemented in rtrs-srv-sysfs.c */ -int rtrs_srv_create_sess_files(struct rtrs_srv_sess *sess); -void rtrs_srv_destroy_sess_files(struct rtrs_srv_sess *sess); +int rtrs_srv_create_path_files(struct rtrs_srv_path *srv_path); +void rtrs_srv_destroy_path_files(struct rtrs_srv_path *srv_path); #endif /* RTRS_SRV_H */ diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index 37952c8e768c..ed324b47d93a 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -69,16 +69,16 @@ EXPORT_SYMBOL_GPL(rtrs_iu_free); int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu) { - struct rtrs_sess *sess = con->sess; + struct rtrs_path *path = con->path; struct ib_recv_wr wr; struct ib_sge list; list.addr = iu->dma_addr; list.length = iu->size; - list.lkey = sess->dev->ib_pd->local_dma_lkey; + list.lkey = path->dev->ib_pd->local_dma_lkey; if (list.length == 0) { - rtrs_wrn(con->sess, + rtrs_wrn(con->path, "Posting receive work request failed, sg list is empty\n"); return -EINVAL; } @@ -126,7 +126,7 @@ static int rtrs_post_send(struct ib_qp *qp, struct ib_send_wr *head, int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size, struct ib_send_wr *head) { - struct rtrs_sess *sess = con->sess; + struct rtrs_path *path = con->path; struct ib_send_wr wr; struct ib_sge list; @@ -135,7 +135,7 @@ int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size, list.addr = iu->dma_addr; list.length = size; - list.lkey = sess->dev->ib_pd->local_dma_lkey; + list.lkey = path->dev->ib_pd->local_dma_lkey; wr = (struct ib_send_wr) { .wr_cqe = &iu->cqe, @@ -175,7 +175,7 @@ int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu, * length error */ for (i = 0; i < num_sge; i++) - if (WARN_ON(sge[i].length == 0)) + if (WARN_ONCE(sge[i].length == 0, "sg %d is zero length\n", i)) return -EINVAL; return rtrs_post_send(con->qp, head, &wr.wr, tail); @@ -188,11 +188,11 @@ static int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_send_wr *head) { struct ib_rdma_wr wr; - struct rtrs_sess *sess = con->sess; + struct rtrs_path *path = con->path; enum ib_send_flags sflags; atomic_dec_if_positive(&con->sq_wr_avail); - sflags = (atomic_inc_return(&con->wr_cnt) % sess->signal_interval) ? + sflags = (atomic_inc_return(&con->wr_cnt) % path->signal_interval) ? 0 : IB_SEND_SIGNALED; wr = (struct ib_rdma_wr) { @@ -211,12 +211,12 @@ static void qp_event_handler(struct ib_event *ev, void *ctx) switch (ev->event) { case IB_EVENT_COMM_EST: - rtrs_info(con->sess, "QP event %s (%d) received\n", + rtrs_info(con->path, "QP event %s (%d) received\n", ib_event_msg(ev->event), ev->event); rdma_notify(con->cm_id, IB_EVENT_COMM_EST); break; default: - rtrs_info(con->sess, "Unhandled QP event %s (%d) received\n", + rtrs_info(con->path, "Unhandled QP event %s (%d) received\n", ib_event_msg(ev->event), ev->event); break; } @@ -224,7 +224,7 @@ static void qp_event_handler(struct ib_event *ev, void *ctx) static bool is_pollqueue(struct rtrs_con *con) { - return con->cid >= con->sess->irq_con_num; + return con->cid >= con->path->irq_con_num; } static int create_cq(struct rtrs_con *con, int cq_vector, int nr_cqe, @@ -240,7 +240,7 @@ static int create_cq(struct rtrs_con *con, int cq_vector, int nr_cqe, cq = ib_cq_pool_get(cm_id->device, nr_cqe, cq_vector, poll_ctx); if (IS_ERR(cq)) { - rtrs_err(con->sess, "Creating completion queue failed, errno: %ld\n", + rtrs_err(con->path, "Creating completion queue failed, errno: %ld\n", PTR_ERR(cq)); return PTR_ERR(cq); } @@ -271,7 +271,7 @@ static int create_qp(struct rtrs_con *con, struct ib_pd *pd, ret = rdma_create_qp(cm_id, pd, &init_attr); if (ret) { - rtrs_err(con->sess, "Creating QP failed, err: %d\n", ret); + rtrs_err(con->path, "Creating QP failed, err: %d\n", ret); return ret; } con->qp = cm_id->qp; @@ -290,7 +290,7 @@ static void destroy_cq(struct rtrs_con *con) con->cq = NULL; } -int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con, +int rtrs_cq_qp_create(struct rtrs_path *path, struct rtrs_con *con, u32 max_send_sge, int cq_vector, int nr_cqe, u32 max_send_wr, u32 max_recv_wr, enum ib_poll_context poll_ctx) @@ -301,13 +301,13 @@ int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con, if (err) return err; - err = create_qp(con, sess->dev->ib_pd, max_send_wr, max_recv_wr, + err = create_qp(con, path->dev->ib_pd, max_send_wr, max_recv_wr, max_send_sge); if (err) { destroy_cq(con); return err; } - con->sess = sess; + con->path = path; return 0; } @@ -323,24 +323,24 @@ void rtrs_cq_qp_destroy(struct rtrs_con *con) } EXPORT_SYMBOL_GPL(rtrs_cq_qp_destroy); -static void schedule_hb(struct rtrs_sess *sess) +static void schedule_hb(struct rtrs_path *path) { - queue_delayed_work(sess->hb_wq, &sess->hb_dwork, - msecs_to_jiffies(sess->hb_interval_ms)); + queue_delayed_work(path->hb_wq, &path->hb_dwork, + msecs_to_jiffies(path->hb_interval_ms)); } -void rtrs_send_hb_ack(struct rtrs_sess *sess) +void rtrs_send_hb_ack(struct rtrs_path *path) { - struct rtrs_con *usr_con = sess->con[0]; + struct rtrs_con *usr_con = path->con[0]; u32 imm; int err; imm = rtrs_to_imm(RTRS_HB_ACK_IMM, 0); - err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm, + err = rtrs_post_rdma_write_imm_empty(usr_con, path->hb_cqe, imm, NULL); if (err) { - rtrs_err(sess, "send HB ACK failed, errno: %d\n", err); - sess->hb_err_handler(usr_con); + rtrs_err(path, "send HB ACK failed, errno: %d\n", err); + path->hb_err_handler(usr_con); return; } } @@ -349,63 +349,63 @@ EXPORT_SYMBOL_GPL(rtrs_send_hb_ack); static void hb_work(struct work_struct *work) { struct rtrs_con *usr_con; - struct rtrs_sess *sess; + struct rtrs_path *path; u32 imm; int err; - sess = container_of(to_delayed_work(work), typeof(*sess), hb_dwork); - usr_con = sess->con[0]; + path = container_of(to_delayed_work(work), typeof(*path), hb_dwork); + usr_con = path->con[0]; - if (sess->hb_missed_cnt > sess->hb_missed_max) { - rtrs_err(sess, "HB missed max reached.\n"); - sess->hb_err_handler(usr_con); + if (path->hb_missed_cnt > path->hb_missed_max) { + rtrs_err(path, "HB missed max reached.\n"); + path->hb_err_handler(usr_con); return; } - if (sess->hb_missed_cnt++) { + if (path->hb_missed_cnt++) { /* Reschedule work without sending hb */ - schedule_hb(sess); + schedule_hb(path); return; } - sess->hb_last_sent = ktime_get(); + path->hb_last_sent = ktime_get(); imm = rtrs_to_imm(RTRS_HB_MSG_IMM, 0); - err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm, + err = rtrs_post_rdma_write_imm_empty(usr_con, path->hb_cqe, imm, NULL); if (err) { - rtrs_err(sess, "HB send failed, errno: %d\n", err); - sess->hb_err_handler(usr_con); + rtrs_err(path, "HB send failed, errno: %d\n", err); + path->hb_err_handler(usr_con); return; } - schedule_hb(sess); + schedule_hb(path); } -void rtrs_init_hb(struct rtrs_sess *sess, struct ib_cqe *cqe, +void rtrs_init_hb(struct rtrs_path *path, struct ib_cqe *cqe, unsigned int interval_ms, unsigned int missed_max, void (*err_handler)(struct rtrs_con *con), struct workqueue_struct *wq) { - sess->hb_cqe = cqe; - sess->hb_interval_ms = interval_ms; - sess->hb_err_handler = err_handler; - sess->hb_wq = wq; - sess->hb_missed_max = missed_max; - sess->hb_missed_cnt = 0; - INIT_DELAYED_WORK(&sess->hb_dwork, hb_work); + path->hb_cqe = cqe; + path->hb_interval_ms = interval_ms; + path->hb_err_handler = err_handler; + path->hb_wq = wq; + path->hb_missed_max = missed_max; + path->hb_missed_cnt = 0; + INIT_DELAYED_WORK(&path->hb_dwork, hb_work); } EXPORT_SYMBOL_GPL(rtrs_init_hb); -void rtrs_start_hb(struct rtrs_sess *sess) +void rtrs_start_hb(struct rtrs_path *path) { - schedule_hb(sess); + schedule_hb(path); } EXPORT_SYMBOL_GPL(rtrs_start_hb); -void rtrs_stop_hb(struct rtrs_sess *sess) +void rtrs_stop_hb(struct rtrs_path *path) { - cancel_delayed_work_sync(&sess->hb_dwork); - sess->hb_missed_cnt = 0; + cancel_delayed_work_sync(&path->hb_dwork); + path->hb_missed_cnt = 0; } EXPORT_SYMBOL_GPL(rtrs_stop_hb); @@ -479,7 +479,6 @@ static int rtrs_str_to_sockaddr(const char *addr, size_t len, */ int sockaddr_to_str(const struct sockaddr *addr, char *buf, size_t len) { - switch (addr->sa_family) { case AF_IB: return scnprintf(buf, len, "gid:%pI6", diff --git a/drivers/infiniband/ulp/rtrs/rtrs.h b/drivers/infiniband/ulp/rtrs/rtrs.h index 859c79685daf..b48b53a7c143 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.h +++ b/drivers/infiniband/ulp/rtrs/rtrs.h @@ -13,9 +13,9 @@ #include <linux/scatterlist.h> struct rtrs_permit; -struct rtrs_clt; +struct rtrs_clt_sess; struct rtrs_srv_ctx; -struct rtrs_srv; +struct rtrs_srv_sess; struct rtrs_srv_op; /* @@ -52,14 +52,14 @@ struct rtrs_clt_ops { void (*link_ev)(void *priv, enum rtrs_clt_link_ev ev); }; -struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, - const char *sessname, +struct rtrs_clt_sess *rtrs_clt_open(struct rtrs_clt_ops *ops, + const char *pathname, const struct rtrs_addr *paths, size_t path_cnt, u16 port, size_t pdu_sz, u8 reconnect_delay_sec, s16 max_reconnect_attempts, u32 nr_poll_queues); -void rtrs_clt_close(struct rtrs_clt *sess); +void rtrs_clt_close(struct rtrs_clt_sess *clt); enum wait_type { RTRS_PERMIT_NOWAIT = 0, @@ -77,11 +77,12 @@ enum rtrs_clt_con_type { RTRS_IO_CON }; -struct rtrs_permit *rtrs_clt_get_permit(struct rtrs_clt *sess, - enum rtrs_clt_con_type con_type, - enum wait_type wait); +struct rtrs_permit *rtrs_clt_get_permit(struct rtrs_clt_sess *sess, + enum rtrs_clt_con_type con_type, + enum wait_type wait); -void rtrs_clt_put_permit(struct rtrs_clt *sess, struct rtrs_permit *permit); +void rtrs_clt_put_permit(struct rtrs_clt_sess *sess, + struct rtrs_permit *permit); /** * rtrs_clt_req_ops - it holds the request confirmation callback @@ -98,10 +99,10 @@ struct rtrs_clt_req_ops { }; int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops, - struct rtrs_clt *sess, struct rtrs_permit *permit, + struct rtrs_clt_sess *sess, struct rtrs_permit *permit, const struct kvec *vec, size_t nr, size_t len, struct scatterlist *sg, unsigned int sg_cnt); -int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index); +int rtrs_clt_rdma_cq_direct(struct rtrs_clt_sess *clt, unsigned int index); /** * rtrs_attrs - RTRS session attributes @@ -112,7 +113,7 @@ struct rtrs_attrs { u32 max_segments; }; -int rtrs_clt_query(struct rtrs_clt *sess, struct rtrs_attrs *attr); +int rtrs_clt_query(struct rtrs_clt_sess *sess, struct rtrs_attrs *attr); /* * Here goes RTRS server API @@ -138,7 +139,6 @@ struct rtrs_srv_ops { * @priv: Private data set by rtrs_srv_set_sess_priv() * @id: internal RTRS operation id - * @dir: READ/WRITE * @data: Pointer to (bidirectional) rdma memory area: * - in case of %RTRS_SRV_RDMA_EV_RECV contains * data sent by the client @@ -150,7 +150,7 @@ struct rtrs_srv_ops { * @usrlen: Size of the user message */ int (*rdma_ev)(void *priv, - struct rtrs_srv_op *id, int dir, + struct rtrs_srv_op *id, void *data, size_t datalen, const void *usr, size_t usrlen); /** @@ -163,7 +163,7 @@ struct rtrs_srv_ops { * @priv: Private data from user if previously set with * rtrs_srv_set_sess_priv() */ - int (*link_ev)(struct rtrs_srv *sess, enum rtrs_srv_link_ev ev, + int (*link_ev)(struct rtrs_srv_sess *sess, enum rtrs_srv_link_ev ev, void *priv); }; @@ -173,11 +173,12 @@ void rtrs_srv_close(struct rtrs_srv_ctx *ctx); bool rtrs_srv_resp_rdma(struct rtrs_srv_op *id, int errno); -void rtrs_srv_set_sess_priv(struct rtrs_srv *sess, void *priv); +void rtrs_srv_set_sess_priv(struct rtrs_srv_sess *sess, void *priv); -int rtrs_srv_get_sess_name(struct rtrs_srv *sess, char *sessname, size_t len); +int rtrs_srv_get_path_name(struct rtrs_srv_sess *sess, char *pathname, + size_t len); -int rtrs_srv_get_queue_depth(struct rtrs_srv *sess); +int rtrs_srv_get_queue_depth(struct rtrs_srv_sess *sess); int rtrs_addr_to_sockaddr(const char *str, size_t len, u16 port, struct rtrs_addr *addr); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index e174e853f8a4..1075c2ac8fe2 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -430,7 +430,7 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, spin_lock_init(&pool->lock); INIT_LIST_HEAD(&pool->free_list); - if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) + if (device->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) mr_type = IB_MR_TYPE_SG_GAPS; else mr_type = IB_MR_TYPE_MEM_REG; @@ -699,7 +699,7 @@ static void srp_free_ch_ib(struct srp_target_port *target, static void srp_path_rec_completion(int status, struct sa_path_rec *pathrec, - void *ch_ptr) + int num_paths, void *ch_ptr) { struct srp_rdma_ch *ch = ch_ptr; struct srp_target_port *target = ch->target; @@ -1282,8 +1282,7 @@ struct srp_terminate_context { int scsi_result; }; -static bool srp_terminate_cmd(struct scsi_cmnd *scmnd, void *context_ptr, - bool reserved) +static bool srp_terminate_cmd(struct scsi_cmnd *scmnd, void *context_ptr) { struct srp_terminate_context *context = context_ptr; struct srp_target_port *target = context->srp_target; @@ -1962,7 +1961,8 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) if (scmnd) { req = scsi_cmd_priv(scmnd); scmnd = srp_claim_req(ch, req, NULL, scmnd); - } else { + } + if (!scmnd) { shost_printk(KERN_ERR, target->scsi_host, "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n", rsp->tag, ch - target->ch, ch->qp->qp_num); @@ -2789,7 +2789,7 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, static int srp_abort(struct scsi_cmnd *scmnd) { struct srp_target_port *target = host_to_target(scmnd->device->host); - struct srp_request *req = (struct srp_request *) scmnd->host_scribble; + struct srp_request *req = scsi_cmd_priv(scmnd); u32 tag; u16 ch_idx; struct srp_rdma_ch *ch; @@ -2797,8 +2797,6 @@ static int srp_abort(struct scsi_cmnd *scmnd) shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); - if (!req) - return SUCCESS; tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmnd)); ch_idx = blk_mq_unique_tag_to_hwq(tag); if (WARN_ON_ONCE(ch_idx >= target->ch_count)) @@ -2991,7 +2989,7 @@ static ssize_t local_ib_port_show(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sysfs_emit(buf, "%d\n", target->srp_host->port); + return sysfs_emit(buf, "%u\n", target->srp_host->port); } static DEVICE_ATTR_RO(local_ib_port); @@ -3179,11 +3177,16 @@ static void srp_release_dev(struct device *dev) struct srp_host *host = container_of(dev, struct srp_host, dev); - complete(&host->released); + kfree(host); } +static struct attribute *srp_class_attrs[]; + +ATTRIBUTE_GROUPS(srp_class); + static struct class srp_class = { .name = "infiniband_srp", + .dev_groups = srp_class_groups, .dev_release = srp_release_dev }; @@ -3650,7 +3653,7 @@ static ssize_t add_target_store(struct device *dev, target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; target_host->max_segment_size = ib_dma_max_seg_size(ibdev); - if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) + if (!(ibdev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)) target_host->virt_boundary_mask = ~srp_dev->mr_page_mask; target = host_to_target(target_host); @@ -3706,8 +3709,8 @@ static ssize_t add_target_store(struct device *dev, } if (srp_dev->use_fast_reg) { - bool gaps_reg = (ibdev->attrs.device_cap_flags & - IB_DEVICE_SG_GAPS_REG); + bool gaps_reg = ibdev->attrs.kernel_cap_flags & + IBK_SG_GAPS_REG; max_sectors_per_mr = srp_dev->max_pages_per_mr << (ilog2(srp_dev->mr_page_size) - 9); @@ -3884,12 +3887,19 @@ static ssize_t port_show(struct device *dev, struct device_attribute *attr, { struct srp_host *host = container_of(dev, struct srp_host, dev); - return sysfs_emit(buf, "%d\n", host->port); + return sysfs_emit(buf, "%u\n", host->port); } static DEVICE_ATTR_RO(port); -static struct srp_host *srp_add_port(struct srp_device *device, u8 port) +static struct attribute *srp_class_attrs[] = { + &dev_attr_add_target.attr, + &dev_attr_ibdev.attr, + &dev_attr_port.attr, + NULL +}; + +static struct srp_host *srp_add_port(struct srp_device *device, u32 port) { struct srp_host *host; @@ -3899,33 +3909,24 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port) INIT_LIST_HEAD(&host->target_list); spin_lock_init(&host->target_lock); - init_completion(&host->released); mutex_init(&host->add_target_mutex); host->srp_dev = device; host->port = port; + device_initialize(&host->dev); host->dev.class = &srp_class; host->dev.parent = device->dev->dev.parent; - dev_set_name(&host->dev, "srp-%s-%d", dev_name(&device->dev->dev), - port); - - if (device_register(&host->dev)) - goto free_host; - if (device_create_file(&host->dev, &dev_attr_add_target)) - goto err_class; - if (device_create_file(&host->dev, &dev_attr_ibdev)) - goto err_class; - if (device_create_file(&host->dev, &dev_attr_port)) - goto err_class; + if (dev_set_name(&host->dev, "srp-%s-%u", dev_name(&device->dev->dev), + port)) + goto put_host; + if (device_add(&host->dev)) + goto put_host; return host; -err_class: - device_unregister(&host->dev); - -free_host: - kfree(host); - +put_host: + device_del(&host->dev); + put_device(&host->dev); return NULL; } @@ -3937,7 +3938,7 @@ static void srp_rename_dev(struct ib_device *device, void *client_data) list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) { char name[IB_DEVICE_NAME_MAX + 8]; - snprintf(name, sizeof(name), "srp-%s-%d", + snprintf(name, sizeof(name), "srp-%s-%u", dev_name(&device->dev), host->port); device_rename(&host->dev, name); } @@ -3949,7 +3950,7 @@ static int srp_add_one(struct ib_device *device) struct ib_device_attr *attr = &device->attrs; struct srp_host *host; int mr_page_shift; - unsigned int p; + u32 p; u64 max_pages_per_mr; unsigned int flags = 0; @@ -4031,12 +4032,11 @@ static void srp_remove_one(struct ib_device *device, void *client_data) srp_dev = client_data; list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) { - device_unregister(&host->dev); /* - * Wait for the sysfs entry to go away, so that no new - * target ports can be created. + * Remove the add_target sysfs entry so that no new target ports + * can be created. */ - wait_for_completion(&host->released); + device_del(&host->dev); /* * Remove all target ports. @@ -4047,12 +4047,14 @@ static void srp_remove_one(struct ib_device *device, void *client_data) spin_unlock(&host->target_lock); /* - * Wait for tl_err and target port removal tasks. + * srp_queue_remove_work() queues a call to + * srp_remove_target(). The latter function cancels + * target->tl_err_work so waiting for the remove works to + * finish is sufficient. */ - flush_workqueue(system_long_wq); flush_workqueue(srp_remove_wq); - kfree(host); + put_device(&host->dev); } ib_dealloc_pd(srp_dev->pd); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index abccddeea1e3..00b0068fda20 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -92,6 +92,9 @@ enum srp_iu_type { }; /* + * RDMA adapter in the initiator system. + * + * @dev_list: List of RDMA ports associated with this RDMA adapter (srp_host). * @mr_page_mask: HCA memory registration page mask. * @mr_page_size: HCA memory registration page size. * @mr_max_size: Maximum size in bytes of a single FR registration request. @@ -109,13 +112,18 @@ struct srp_device { bool use_fast_reg; }; +/* + * One port of an RDMA adapter in the initiator system. + * + * @target_list: List of connected target ports (struct srp_target_port). + * @target_lock: Protects @target_list. + */ struct srp_host { struct srp_device *srp_dev; - u8 port; + u32 port; struct device dev; struct list_head target_list; spinlock_t target_lock; - struct completion released; struct list_head list; struct mutex add_target_mutex; }; @@ -183,7 +191,7 @@ struct srp_rdma_ch { }; /** - * struct srp_target_port + * struct srp_target_port - RDMA port in the SRP target system * @comp_vector: Completion vector used by the first RDMA channel created for * this target port. */ diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index f86ee1c4b970..3c3fae738c3e 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -565,12 +565,9 @@ static int srpt_refresh_port(struct srpt_port *sport) if (ret) return ret; - sport->port_guid_id.wwn.priv = sport; - srpt_format_guid(sport->port_guid_id.name, - sizeof(sport->port_guid_id.name), + srpt_format_guid(sport->guid_name, ARRAY_SIZE(sport->guid_name), &sport->gid.global.interface_id); - sport->port_gid_id.wwn.priv = sport; - snprintf(sport->port_gid_id.name, sizeof(sport->port_gid_id.name), + snprintf(sport->gid_name, ARRAY_SIZE(sport->gid_name), "0x%016llx%016llx", be64_to_cpu(sport->gid.global.subnet_prefix), be64_to_cpu(sport->gid.global.interface_id)); @@ -1424,7 +1421,7 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID; srp_rsp->sense_data_len = cpu_to_be32(sense_data_len); - memcpy(srp_rsp + 1, sense_data, sense_data_len); + memcpy(srp_rsp->data, sense_data, sense_data_len); } return sizeof(*srp_rsp) + sense_data_len; @@ -2221,13 +2218,13 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, ch->zw_cqe.done = srpt_zerolength_write_done; INIT_WORK(&ch->release_work, srpt_release_channel_work); ch->sport = sport; - if (ib_cm_id) { - ch->ib_cm.cm_id = ib_cm_id; - ib_cm_id->context = ch; - } else { + if (rdma_cm_id) { ch->using_rdma_cm = true; ch->rdma_cm.cm_id = rdma_cm_id; rdma_cm_id->context = ch; + } else { + ch->ib_cm.cm_id = ib_cm_id; + ib_cm_id->context = ch; } /* * ch->rq_size should be at least as large as the initiator queue @@ -2303,7 +2300,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, goto free_recv_ring; } - strlcpy(ch->sess_name, src_addr, sizeof(ch->sess_name)); + strscpy(ch->sess_name, src_addr, sizeof(ch->sess_name)); snprintf(i_port_id, sizeof(i_port_id), "0x%016llx%016llx", be64_to_cpu(*(__be64 *)nexus->i_port_id), be64_to_cpu(*(__be64 *)(nexus->i_port_id + 8))); @@ -2314,31 +2311,35 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, tag_num = ch->rq_size; tag_size = 1; /* ib_srpt does not use se_sess->sess_cmd_map */ - mutex_lock(&sport->port_guid_id.mutex); - list_for_each_entry(stpg, &sport->port_guid_id.tpg_list, entry) { - if (!IS_ERR_OR_NULL(ch->sess)) - break; - ch->sess = target_setup_session(&stpg->tpg, tag_num, + if (sport->guid_id) { + mutex_lock(&sport->guid_id->mutex); + list_for_each_entry(stpg, &sport->guid_id->tpg_list, entry) { + if (!IS_ERR_OR_NULL(ch->sess)) + break; + ch->sess = target_setup_session(&stpg->tpg, tag_num, tag_size, TARGET_PROT_NORMAL, ch->sess_name, ch, NULL); + } + mutex_unlock(&sport->guid_id->mutex); } - mutex_unlock(&sport->port_guid_id.mutex); - mutex_lock(&sport->port_gid_id.mutex); - list_for_each_entry(stpg, &sport->port_gid_id.tpg_list, entry) { - if (!IS_ERR_OR_NULL(ch->sess)) - break; - ch->sess = target_setup_session(&stpg->tpg, tag_num, + if (sport->gid_id) { + mutex_lock(&sport->gid_id->mutex); + list_for_each_entry(stpg, &sport->gid_id->tpg_list, entry) { + if (!IS_ERR_OR_NULL(ch->sess)) + break; + ch->sess = target_setup_session(&stpg->tpg, tag_num, tag_size, TARGET_PROT_NORMAL, i_port_id, ch, NULL); - if (!IS_ERR_OR_NULL(ch->sess)) - break; - /* Retry without leading "0x" */ - ch->sess = target_setup_session(&stpg->tpg, tag_num, + if (!IS_ERR_OR_NULL(ch->sess)) + break; + /* Retry without leading "0x" */ + ch->sess = target_setup_session(&stpg->tpg, tag_num, tag_size, TARGET_PROT_NORMAL, i_port_id + 2, ch, NULL); + } + mutex_unlock(&sport->gid_id->mutex); } - mutex_unlock(&sport->port_gid_id.mutex); if (IS_ERR_OR_NULL(ch->sess)) { WARN_ON_ONCE(ch->sess == NULL); @@ -2983,7 +2984,12 @@ static int srpt_release_sport(struct srpt_port *sport) return 0; } -static struct se_wwn *__srpt_lookup_wwn(const char *name) +struct port_and_port_id { + struct srpt_port *sport; + struct srpt_port_id **port_id; +}; + +static struct port_and_port_id __srpt_lookup_port(const char *name) { struct ib_device *dev; struct srpt_device *sdev; @@ -2998,25 +3004,38 @@ static struct se_wwn *__srpt_lookup_wwn(const char *name) for (i = 0; i < dev->phys_port_cnt; i++) { sport = &sdev->port[i]; - if (strcmp(sport->port_guid_id.name, name) == 0) - return &sport->port_guid_id.wwn; - if (strcmp(sport->port_gid_id.name, name) == 0) - return &sport->port_gid_id.wwn; + if (strcmp(sport->guid_name, name) == 0) { + kref_get(&sdev->refcnt); + return (struct port_and_port_id){ + sport, &sport->guid_id}; + } + if (strcmp(sport->gid_name, name) == 0) { + kref_get(&sdev->refcnt); + return (struct port_and_port_id){ + sport, &sport->gid_id}; + } } } - return NULL; + return (struct port_and_port_id){}; } -static struct se_wwn *srpt_lookup_wwn(const char *name) +/** + * srpt_lookup_port() - Look up an RDMA port by name + * @name: ASCII port name + * + * Increments the RDMA port reference count if an RDMA port pointer is returned. + * The caller must drop that reference count by calling srpt_port_put_ref(). + */ +static struct port_and_port_id srpt_lookup_port(const char *name) { - struct se_wwn *wwn; + struct port_and_port_id papi; spin_lock(&srpt_dev_lock); - wwn = __srpt_lookup_wwn(name); + papi = __srpt_lookup_port(name); spin_unlock(&srpt_dev_lock); - return wwn; + return papi; } static void srpt_free_srq(struct srpt_device *sdev) @@ -3101,6 +3120,18 @@ static int srpt_use_srq(struct srpt_device *sdev, bool use_srq) return ret; } +static void srpt_free_sdev(struct kref *refcnt) +{ + struct srpt_device *sdev = container_of(refcnt, typeof(*sdev), refcnt); + + kfree(sdev); +} + +static void srpt_sdev_put(struct srpt_device *sdev) +{ + kref_put(&sdev->refcnt, srpt_free_sdev); +} + /** * srpt_add_one - InfiniBand device addition callback function * @device: Describes a HCA. @@ -3119,6 +3150,7 @@ static int srpt_add_one(struct ib_device *device) if (!sdev) return -ENOMEM; + kref_init(&sdev->refcnt); sdev->device = device; mutex_init(&sdev->sdev_mutex); @@ -3159,7 +3191,7 @@ static int srpt_add_one(struct ib_device *device) * if this HCA is gone bad and replaced by different HCA */ ret = sdev->cm_id ? - ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0) : + ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid)) : 0; if (ret < 0) { pr_err("ib_cm_listen() failed: %d (cm_id state = %d)\n", ret, @@ -3182,10 +3214,6 @@ static int srpt_add_one(struct ib_device *device) sport->port_attrib.srp_sq_size = DEF_SRPT_SQ_SIZE; sport->port_attrib.use_srq = false; INIT_WORK(&sport->work, srpt_refresh_port_work); - mutex_init(&sport->port_guid_id.mutex); - INIT_LIST_HEAD(&sport->port_guid_id.tpg_list); - mutex_init(&sport->port_gid_id.mutex); - INIT_LIST_HEAD(&sport->port_gid_id.tpg_list); ret = srpt_refresh_port(sport); if (ret) { @@ -3214,7 +3242,7 @@ err_ring: srpt_free_srq(sdev); ib_dealloc_pd(sdev->pd); free_dev: - kfree(sdev); + srpt_sdev_put(sdev); pr_info("%s(%s) failed.\n", __func__, dev_name(&device->dev)); return ret; } @@ -3258,7 +3286,7 @@ static void srpt_remove_one(struct ib_device *device, void *client_data) ib_dealloc_pd(sdev->pd); - kfree(sdev); + srpt_sdev_put(sdev); } static struct ib_client srpt_client = { @@ -3286,10 +3314,10 @@ static struct srpt_port_id *srpt_wwn_to_sport_id(struct se_wwn *wwn) { struct srpt_port *sport = wwn->priv; - if (wwn == &sport->port_guid_id.wwn) - return &sport->port_guid_id; - if (wwn == &sport->port_gid_id.wwn) - return &sport->port_gid_id; + if (sport->guid_id && &sport->guid_id->wwn == wwn) + return sport->guid_id; + if (sport->gid_id && &sport->gid_id->wwn == wwn) + return sport->gid_id; WARN_ON_ONCE(true); return NULL; } @@ -3774,7 +3802,31 @@ static struct se_wwn *srpt_make_tport(struct target_fabric_configfs *tf, struct config_group *group, const char *name) { - return srpt_lookup_wwn(name) ? : ERR_PTR(-EINVAL); + struct port_and_port_id papi = srpt_lookup_port(name); + struct srpt_port *sport = papi.sport; + struct srpt_port_id *port_id; + + if (!papi.port_id) + return ERR_PTR(-EINVAL); + if (*papi.port_id) { + /* Attempt to create a directory that already exists. */ + WARN_ON_ONCE(true); + return &(*papi.port_id)->wwn; + } + port_id = kzalloc(sizeof(*port_id), GFP_KERNEL); + if (!port_id) { + srpt_sdev_put(sport->sdev); + return ERR_PTR(-ENOMEM); + } + mutex_init(&port_id->mutex); + INIT_LIST_HEAD(&port_id->tpg_list); + port_id->wwn.priv = sport; + memcpy(port_id->name, port_id == sport->guid_id ? sport->guid_name : + sport->gid_name, ARRAY_SIZE(port_id->name)); + + *papi.port_id = port_id; + + return &port_id->wwn; } /** @@ -3783,6 +3835,18 @@ static struct se_wwn *srpt_make_tport(struct target_fabric_configfs *tf, */ static void srpt_drop_tport(struct se_wwn *wwn) { + struct srpt_port_id *port_id = container_of(wwn, typeof(*port_id), wwn); + struct srpt_port *sport = wwn->priv; + + if (sport->guid_id == port_id) + sport->guid_id = NULL; + else if (sport->gid_id == port_id) + sport->gid_id = NULL; + else + WARN_ON_ONCE(true); + + srpt_sdev_put(sport->sdev); + kfree(port_id); } static ssize_t srpt_wwn_version_show(struct config_item *item, char *buf) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 76e66f630c17..4c46b301eea1 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -376,7 +376,7 @@ struct srpt_tpg { }; /** - * struct srpt_port_id - information about an RDMA port name + * struct srpt_port_id - LIO RDMA port information * @mutex: Protects @tpg_list changes. * @tpg_list: TPGs associated with the RDMA port name. * @wwn: WWN associated with the RDMA port name. @@ -393,7 +393,7 @@ struct srpt_port_id { }; /** - * struct srpt_port - information associated by SRPT with a single IB port + * struct srpt_port - SRPT RDMA port information * @sdev: backpointer to the HCA information. * @mad_agent: per-port management datagram processing information. * @enabled: Whether or not this target port is enabled. @@ -402,8 +402,10 @@ struct srpt_port_id { * @lid: cached value of the port's lid. * @gid: cached value of the port's gid. * @work: work structure for refreshing the aforementioned cached values. - * @port_guid_id: target port GUID - * @port_gid_id: target port GID + * @guid_name: port name in GUID format. + * @guid_id: LIO target port information for the port name in GUID format. + * @gid_name: port name in GID format. + * @gid_id: LIO target port information for the port name in GID format. * @port_attrib: Port attributes that can be accessed through configfs. * @refcount: Number of objects associated with this port. * @freed_channels: Completion that will be signaled once @refcount becomes 0. @@ -419,8 +421,10 @@ struct srpt_port { u32 lid; union ib_gid gid; struct work_struct work; - struct srpt_port_id port_guid_id; - struct srpt_port_id port_gid_id; + char guid_name[64]; + struct srpt_port_id *guid_id; + char gid_name[64]; + struct srpt_port_id *gid_id; struct srpt_port_attrib port_attrib; atomic_t refcount; struct completion *freed_channels; @@ -430,6 +434,7 @@ struct srpt_port { /** * struct srpt_device - information associated by SRPT with a single HCA + * @refcnt: Reference count for this device. * @device: Backpointer to the struct ib_device managed by the IB core. * @pd: IB protection domain. * @lkey: L_Key (local key) with write access to all local memory. @@ -445,6 +450,7 @@ struct srpt_port { * @port: Information about the ports owned by this HCA. */ struct srpt_device { + struct kref refcnt; struct ib_device *device; struct ib_pd *pd; u32 lkey; |