aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core/verbs.c
diff options
context:
space:
mode:
authorParav Pandit <parav@mellanox.com>2017-11-14 14:51:49 +0200
committerJason Gunthorpe <jgg@mellanox.com>2017-12-18 13:49:43 -0700
commit1060f86534147c2830db4bbc9dd849d1892a611b (patch)
treef4c8d11a3fa5145531a2bd294b592c614bcc7cb9 /drivers/infiniband/core/verbs.c
parentIB/ocrdma: Remove unneeded conversions to bool (diff)
downloadlinux-dev-1060f86534147c2830db4bbc9dd849d1892a611b.tar.xz
linux-dev-1060f86534147c2830db4bbc9dd849d1892a611b.zip
IB/{core/cm}: Fix generating a return AH for RoCEE
When computing a UD reverse path (return AH) from a WC the code was not doing a route lookup anchored in a specific netdevice. This caused several bugs, including broken IPv6 link-local address support in RoCEv2. [1] This fixes the lookup by determining the GID table entry that the HW matched to the SGID for the WC and then using the netdevice from that entry to perform the route and ND lookup for the 'DGID' to build a return AH. RoCE GID table management ensures that right upper netdevices of the physical netdevices are added. Therefore init_ah_from_wc doesn't need to perform such check. Now that route lookup is done based on the netdevice of the GID entry, simplify code to not have ifindex and vlan pointers. As part of that, refactor to have netdevice as input parameter. This is already discussed at [2]. Finally ib_init_ah_from_wc resolves dmac for unicast GID in similar way as what ib_resolve_eth_dmac() does. So ib_resolve_eth_dmac is refactored to split for unicast and non unicast GIDs, so that it can be reused by ib_init_ah_from_wc. While we are at refactoring ib_resolve_eth_dmac(), it is further simplified (a) to avoid hoplimit as optional parameter, as there is only one user who always queries hoplimit. (b) for empty line. (c) avoided zero initialization of ret. (d) removed as exported symbol as only ib core uses it. For IPv6, this is tested using simple rping test as below. rping -sv -a ::0 rping -c -a fe80::268a:7ff:fe55:4661%ens2f1 -C 1 -v -d [1] https://www.spinics.net/lists/linux-rdma/msg45690.html [2] https://www.spinics.net/lists/linux-rdma/msg45710.html Signed-off-by: Parav Pandit <parav@mellanox.com> Reviewed-by: Matan Barak <matanb@mellanox.com> Reviewed-by: Mark Bloch <markb@mellanox.com> Reported-by: Roland Dreier <roland@purestorage.com> Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'drivers/infiniband/core/verbs.c')
-rw-r--r--drivers/infiniband/core/verbs.c136
1 files changed, 58 insertions, 78 deletions
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 3fb8fb6cc824..54b56c4fcc38 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -481,6 +481,40 @@ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
}
EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
+/* Resolve destination mac address and hop limit for unicast destination
+ * GID entry, considering the source GID entry as well.
+ * ah_attribute must have have valid port_num, sgid_index.
+ */
+static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr)
+{
+ struct ib_gid_attr sgid_attr;
+ struct ib_global_route *grh;
+ int hop_limit = 0xff;
+ union ib_gid sgid;
+ int ret;
+
+ grh = rdma_ah_retrieve_grh(ah_attr);
+
+ ret = ib_query_gid(device,
+ rdma_ah_get_port_num(ah_attr),
+ grh->sgid_index,
+ &sgid, &sgid_attr);
+ if (ret || !sgid_attr.ndev) {
+ if (!ret)
+ ret = -ENXIO;
+ return ret;
+ }
+
+ ret = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
+ ah_attr->roce.dmac,
+ sgid_attr.ndev, &hop_limit);
+ dev_put(sgid_attr.ndev);
+
+ grh->hop_limit = hop_limit;
+ return ret;
+}
+
/*
* This function creates ah from the incoming packet.
* Incoming packet has dgid of the receiver node on which this code is
@@ -490,9 +524,6 @@ EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
* as sgid and, sgid is used as dgid because sgid contains destinations
* GID whom to respond to.
*
- * This is why when calling rdma_addr_find_l2_eth_by_grh() function, the
- * position of arguments dgid and sgid do not match the order of the
- * parameters.
*/
int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
const struct ib_wc *wc, const struct ib_grh *grh,
@@ -523,57 +554,33 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
if (ret)
return ret;
+ rdma_ah_set_sl(ah_attr, wc->sl);
+ rdma_ah_set_port_num(ah_attr, port_num);
+
if (rdma_protocol_roce(device, port_num)) {
- int if_index = 0;
u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
wc->vlan_id : 0xffff;
- struct net_device *idev;
- struct net_device *resolved_dev;
if (!(wc->wc_flags & IB_WC_GRH))
return -EPROTOTYPE;
- if (!device->get_netdev)
- return -EOPNOTSUPP;
-
- idev = device->get_netdev(device, port_num);
- if (!idev)
- return -ENODEV;
-
- ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid,
- ah_attr->roce.dmac,
- wc->wc_flags & IB_WC_WITH_VLAN ?
- NULL : &vlan_id,
- &if_index, &hoplimit);
- if (ret) {
- dev_put(idev);
- return ret;
- }
-
- resolved_dev = dev_get_by_index(&init_net, if_index);
- rcu_read_lock();
- if (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev,
- resolved_dev))
- ret = -EHOSTUNREACH;
- rcu_read_unlock();
- dev_put(idev);
- dev_put(resolved_dev);
- if (ret)
- return ret;
-
- ret = get_sgid_index_from_eth(device, port_num, vlan_id,
- &dgid, gid_type, &gid_index);
+ ret = get_sgid_index_from_eth(device, port_num,
+ vlan_id, &dgid,
+ gid_type, &gid_index);
if (ret)
return ret;
- }
- rdma_ah_set_dlid(ah_attr, wc->slid);
- rdma_ah_set_sl(ah_attr, wc->sl);
- rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
- rdma_ah_set_port_num(ah_attr, port_num);
+ flow_class = be32_to_cpu(grh->version_tclass_flow);
+ rdma_ah_set_grh(ah_attr, &sgid,
+ flow_class & 0xFFFFF,
+ (u8)gid_index, hoplimit,
+ (flow_class >> 20) & 0xFF);
+ return ib_resolve_unicast_gid_dmac(device, ah_attr);
+ } else {
+ rdma_ah_set_dlid(ah_attr, wc->slid);
+ rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
- if (wc->wc_flags & IB_WC_GRH) {
- if (!rdma_cap_eth_ah(device, port_num)) {
+ if (wc->wc_flags & IB_WC_GRH) {
if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
ret = ib_find_cached_gid_by_port(device, &dgid,
IB_GID_TYPE_IB,
@@ -584,16 +591,15 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
} else {
gid_index = 0;
}
- }
-
- flow_class = be32_to_cpu(grh->version_tclass_flow);
- rdma_ah_set_grh(ah_attr, &sgid,
- flow_class & 0xFFFFF,
- (u8)gid_index, hoplimit,
- (flow_class >> 20) & 0xFF);
+ flow_class = be32_to_cpu(grh->version_tclass_flow);
+ rdma_ah_set_grh(ah_attr, &sgid,
+ flow_class & 0xFFFFF,
+ (u8)gid_index, hoplimit,
+ (flow_class >> 20) & 0xFF);
+ }
+ return 0;
}
- return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_wc);
@@ -1290,34 +1296,8 @@ static int ib_resolve_eth_dmac(struct ib_device *device,
(char *)ah_attr->roce.dmac);
}
} else {
- union ib_gid sgid;
- struct ib_gid_attr sgid_attr;
- int ifindex;
- int hop_limit;
-
- ret = ib_query_gid(device,
- rdma_ah_get_port_num(ah_attr),
- grh->sgid_index,
- &sgid, &sgid_attr);
-
- if (ret || !sgid_attr.ndev) {
- if (!ret)
- ret = -ENXIO;
- goto out;
- }
-
- ifindex = sgid_attr.ndev->ifindex;
-
- ret =
- rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
- ah_attr->roce.dmac,
- NULL, &ifindex, &hop_limit);
-
- dev_put(sgid_attr.ndev);
-
- grh->hop_limit = hop_limit;
+ ret = ib_resolve_unicast_gid_dmac(device, ah_attr);
}
-out:
return ret;
}