aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core/device.c
diff options
context:
space:
mode:
authorMatan Barak <matanb@mellanox.com>2015-07-30 18:33:26 +0300
committerDoug Ledford <dledford@redhat.com>2015-08-30 18:08:50 -0400
commit03db3a2d81e6e84f3ed3cb9e087cae17d762642b (patch)
tree772aff939dea090e0e7e2e4e37ca76c044d17403 /drivers/infiniband/core/device.c
parentIB/core: Make ib_alloc_device init the kobject (diff)
downloadlinux-dev-03db3a2d81e6e84f3ed3cb9e087cae17d762642b.tar.xz
linux-dev-03db3a2d81e6e84f3ed3cb9e087cae17d762642b.zip
IB/core: Add RoCE GID table management
RoCE GIDs are based on IP addresses configured on Ethernet net-devices which relate to the RDMA (RoCE) device port. Currently, each of the low-level drivers that support RoCE (ocrdma, mlx4) manages its own RoCE port GID table. As there's nothing which is essentially vendor specific, we generalize that, and enhance the RDMA core GID cache to do this job. In order to populate the GID table, we listen for events: (a) netdev up/down/change_addr events - if a netdev is built onto our RoCE device, we need to add/delete its IPs. This involves adding all GIDs related to this ndev, add default GIDs, etc. (b) inet events - add new GIDs (according to the IP addresses) to the table. For programming the port RoCE GID table, providers must implement the add_gid and del_gid callbacks. RoCE GID management requires us to state the associated net_device alongside the GID. This information is necessary in order to manage the GID table. For example, when a net_device is removed, its associated GIDs need to be removed as well. RoCE mandates generating a default GID for each port, based on the related net-device's IPv6 link local. In contrast to the GID based on the regular IPv6 link-local (as we generate GID per IP address), the default GID is also available when the net device is down (in order to support loopback). Locking is done as follows: The patch modify the GID table code both for new RoCE drivers implementing the add_gid/del_gid callbacks and for current RoCE and IB drivers that do not. The flows for updating the table are different, so the locking requirements are too. While updating RoCE GID table, protection against multiple writers is achieved via mutex_lock(&table->lock). Since writing to a table requires us to find an entry (possible a free entry) in the table and then modify it, this mutex protects both the find_gid and write_gid ensuring the atomicity of the action. Each entry in the GID cache is protected by rwlock. In RoCE, writing (usually results from netdev notifier) involves invoking the vendor's add_gid and del_gid callbacks, which could sleep. Therefore, an invalid flag is added for each entry. Updates for RoCE are done via a workqueue, thus sleeping is permitted. In IB, updates are done in write_lock_irq(&device->cache.lock), thus write_gid isn't allowed to sleep and add_gid/del_gid are not called. When passing net-device into/out-of the GID cache, the device is always passed held (dev_hold). The code uses a single work item for updating all RDMA devices, following a netdev or inet notifier. The patch moves the cache from being a client (which was incorrect, as the cache is part of the IB infrastructure) to being explicitly initialized/freed when a device is registered/removed. Signed-off-by: Matan Barak <matanb@mellanox.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/infiniband/core/device.c')
-rw-r--r--drivers/infiniband/core/device.c96
1 files changed, 88 insertions, 8 deletions
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index a4a914afae1c..dfa2c5744741 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -40,6 +40,8 @@
#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <rdma/rdma_netlink.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
#include "core_priv.h"
@@ -169,6 +171,7 @@ static void ib_device_release(struct device *device)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
+ ib_cache_release_one(dev);
kfree(dev->port_immutable);
kfree(dev);
}
@@ -342,10 +345,17 @@ int ib_register_device(struct ib_device *device,
goto out;
}
+ ret = ib_cache_setup_one(device);
+ if (ret) {
+ printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n");
+ goto out;
+ }
+
ret = ib_device_register_sysfs(device, port_callback);
if (ret) {
printk(KERN_WARNING "Couldn't register device %s with driver model\n",
device->name);
+ ib_cache_cleanup_one(device);
goto out;
}
@@ -399,6 +409,7 @@ void ib_unregister_device(struct ib_device *device)
mutex_unlock(&device_mutex);
ib_device_unregister_sysfs(device);
+ ib_cache_cleanup_one(device);
down_write(&lists_rwsem);
spin_lock_irqsave(&device->client_data_lock, flags);
@@ -670,11 +681,80 @@ EXPORT_SYMBOL(ib_query_port);
int ib_query_gid(struct ib_device *device,
u8 port_num, int index, union ib_gid *gid)
{
+ if (rdma_cap_roce_gid_table(device, port_num))
+ return ib_get_cached_gid(device, port_num, index, gid);
+
return device->query_gid(device, port_num, index, gid);
}
EXPORT_SYMBOL(ib_query_gid);
/**
+ * ib_enum_roce_netdev - enumerate all RoCE ports
+ * @ib_dev : IB device we want to query
+ * @filter: Should we call the callback?
+ * @filter_cookie: Cookie passed to filter
+ * @cb: Callback to call for each found RoCE ports
+ * @cookie: Cookie passed back to the callback
+ *
+ * Enumerates all of the physical RoCE ports of ib_dev
+ * which are related to netdevice and calls callback() on each
+ * device for which filter() function returns non zero.
+ */
+void ib_enum_roce_netdev(struct ib_device *ib_dev,
+ roce_netdev_filter filter,
+ void *filter_cookie,
+ roce_netdev_callback cb,
+ void *cookie)
+{
+ u8 port;
+
+ for (port = rdma_start_port(ib_dev); port <= rdma_end_port(ib_dev);
+ port++)
+ if (rdma_protocol_roce(ib_dev, port)) {
+ struct net_device *idev = NULL;
+
+ if (ib_dev->get_netdev)
+ idev = ib_dev->get_netdev(ib_dev, port);
+
+ if (idev &&
+ idev->reg_state >= NETREG_UNREGISTERED) {
+ dev_put(idev);
+ idev = NULL;
+ }
+
+ if (filter(ib_dev, port, idev, filter_cookie))
+ cb(ib_dev, port, idev, cookie);
+
+ if (idev)
+ dev_put(idev);
+ }
+}
+
+/**
+ * ib_enum_all_roce_netdevs - enumerate all RoCE devices
+ * @filter: Should we call the callback?
+ * @filter_cookie: Cookie passed to filter
+ * @cb: Callback to call for each found RoCE ports
+ * @cookie: Cookie passed back to the callback
+ *
+ * Enumerates all RoCE devices' physical ports which are related
+ * to netdevices and calls callback() on each device for which
+ * filter() function returns non zero.
+ */
+void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
+ void *filter_cookie,
+ roce_netdev_callback cb,
+ void *cookie)
+{
+ struct ib_device *dev;
+
+ down_read(&lists_rwsem);
+ list_for_each_entry(dev, &device_list, core_list)
+ ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie);
+ up_read(&lists_rwsem);
+}
+
+/**
* ib_query_pkey - Get P_Key table entry
* @device:Device to query
* @port_num:Port number to query
@@ -753,6 +833,13 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid,
int ret, port, i;
for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
+ if (rdma_cap_roce_gid_table(device, port)) {
+ if (!ib_cache_gid_find_by_port(device, gid, port,
+ NULL, index))
+ *port_num = port;
+ return 0;
+ }
+
for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
ret = ib_query_gid(device, port, i, &tmp_gid);
if (ret)
@@ -874,17 +961,10 @@ static int __init ib_core_init(void)
goto err_sysfs;
}
- ret = ib_cache_setup();
- if (ret) {
- printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n");
- goto err_nl;
- }
+ ib_cache_setup();
return 0;
-err_nl:
- ibnl_cleanup();
-
err_sysfs:
class_unregister(&ib_class);