aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/hfi1/affinity.c
diff options
context:
space:
mode:
authorMike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>2021-03-29 09:48:19 -0400
committerJason Gunthorpe <jgg@nvidia.com>2021-04-07 15:31:59 -0300
commit5de61a47eb9064cbbc5f3360d639e8e34a690a54 (patch)
treee45252434f6b86f50366e33fe68d71caa72f745f /drivers/infiniband/hw/hfi1/affinity.c
parentRDMA/cxgb4: check for ipv6 address properly while destroying listener (diff)
downloadlinux-dev-5de61a47eb9064cbbc5f3360d639e8e34a690a54.tar.xz
linux-dev-5de61a47eb9064cbbc5f3360d639e8e34a690a54.zip
IB/hfi1: Fix probe time panic when AIP is enabled with a buggy BIOS
A panic can result when AIP is enabled: BUG: unable to handle kernel NULL pointer dereference at 000000000000000 PGD 0 P4D 0 Oops: 0000 1 SMP PTI CPU: 70 PID: 981 Comm: systemd-udevd Tainted: G OE --------- - - 4.18.0-240.el8.x86_64 #1 Hardware name: Intel Corporation S2600KP/S2600KP, BIOS SE5C610.86B.01.01.0005.101720141054 10/17/2014 RIP: 0010:__bitmap_and+0x1b/0x70 RSP: 0018:ffff99aa0845f9f0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8d5a6fc18000 RCX: 0000000000000048 RDX: 0000000000000000 RSI: ffffffffc06336f0 RDI: ffff8d5a8fa67750 RBP: 0000000000000079 R08: 0000000fffffffff R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffffffffc06336f0 R13: 00000000000000a0 R14: ffff8d5a6fc18000 R15: 0000000000000003 FS: 00007fec137a5980(0000) GS:ffff8d5a9fa80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000a04b48002 CR4: 00000000001606e0 Call Trace: hfi1_num_netdev_contexts+0x7c/0x110 [hfi1] hfi1_init_dd+0xd7f/0x1a90 [hfi1] ? pci_bus_read_config_dword+0x49/0x70 ? pci_mmcfg_read+0x3e/0xe0 do_init_one.isra.18+0x336/0x640 [hfi1] local_pci_probe+0x41/0x90 pci_device_probe+0x105/0x1c0 really_probe+0x212/0x440 driver_probe_device+0x49/0xc0 device_driver_attach+0x50/0x60 __driver_attach+0x61/0x130 ? device_driver_attach+0x60/0x60 bus_for_each_dev+0x77/0xc0 ? klist_add_tail+0x3b/0x70 bus_add_driver+0x14d/0x1e0 ? dev_init+0x10b/0x10b [hfi1] driver_register+0x6b/0xb0 ? dev_init+0x10b/0x10b [hfi1] hfi1_mod_init+0x1e6/0x20a [hfi1] do_one_initcall+0x46/0x1c3 ? free_unref_page_commit+0x91/0x100 ? _cond_resched+0x15/0x30 ? kmem_cache_alloc_trace+0x140/0x1c0 do_init_module+0x5a/0x220 load_module+0x14b4/0x17e0 ? __do_sys_finit_module+0xa8/0x110 __do_sys_finit_module+0xa8/0x110 do_syscall_64+0x5b/0x1a0 The issue happens when pcibus_to_node() returns NO_NUMA_NODE. Fix this issue by moving the initialization of dd->node to hfi1_devdata allocation and remove the other pcibus_to_node() calls in the probe path and use dd->node instead. Affinity logic is adjusted to use a new field dd->affinity_entry as a guard instead of dd->node. Fixes: 4730f4a6c6b2 ("IB/hfi1: Activate the dummy netdev") Link: https://lore.kernel.org/r/1617025700-31865-4-git-send-email-dennis.dalessandro@cornelisnetworks.com Cc: stable@vger.kernel.org Signed-off-by: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'drivers/infiniband/hw/hfi1/affinity.c')
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c21
1 files changed, 5 insertions, 16 deletions
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 2a91b8d95e12..04b1e8f021f6 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -632,22 +632,11 @@ static void _dev_comp_vect_cpu_mask_clean_up(struct hfi1_devdata *dd,
*/
int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
{
- int node = pcibus_to_node(dd->pcidev->bus);
struct hfi1_affinity_node *entry;
const struct cpumask *local_mask;
int curr_cpu, possible, i, ret;
bool new_entry = false;
- /*
- * If the BIOS does not have the NUMA node information set, select
- * NUMA 0 so we get consistent performance.
- */
- if (node < 0) {
- dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n");
- node = 0;
- }
- dd->node = node;
-
local_mask = cpumask_of_node(dd->node);
if (cpumask_first(local_mask) >= nr_cpu_ids)
local_mask = topology_core_cpumask(0);
@@ -660,7 +649,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
* create an entry in the global affinity structure and initialize it.
*/
if (!entry) {
- entry = node_affinity_allocate(node);
+ entry = node_affinity_allocate(dd->node);
if (!entry) {
dd_dev_err(dd,
"Unable to allocate global affinity node\n");
@@ -751,6 +740,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
if (new_entry)
node_affinity_add_tail(entry);
+ dd->affinity_entry = entry;
mutex_unlock(&node_affinity.lock);
return 0;
@@ -766,10 +756,9 @@ void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd)
{
struct hfi1_affinity_node *entry;
- if (dd->node < 0)
- return;
-
mutex_lock(&node_affinity.lock);
+ if (!dd->affinity_entry)
+ goto unlock;
entry = node_affinity_lookup(dd->node);
if (!entry)
goto unlock;
@@ -780,8 +769,8 @@ void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd)
*/
_dev_comp_vect_cpu_mask_clean_up(dd, entry);
unlock:
+ dd->affinity_entry = NULL;
mutex_unlock(&node_affinity.lock);
- dd->node = NUMA_NO_NODE;
}
/*