diff options
Diffstat (limited to 'drivers/infiniband/hw/hfi1')
42 files changed, 3614 insertions, 1179 deletions
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index 0cf97a09b64b..88085f65432e 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -12,7 +12,7 @@ hfi1-y := affinity.o chip.o device.o driver.o efivar.o \ init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \ qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \ uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \ - verbs_txreq.o + verbs_txreq.o vnic_main.o vnic_sdma.o hfi1-$(CONFIG_DEBUG_FS) += debugfs.o CFLAGS_trace.o = -I$(src) diff --git a/drivers/infiniband/hw/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h index 0d58fe3b49b5..794e6814a531 100644 --- a/drivers/infiniband/hw/hfi1/aspm.h +++ b/drivers/infiniband/hw/hfi1/aspm.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -229,14 +229,17 @@ static inline void aspm_ctx_timer_function(unsigned long data) spin_unlock_irqrestore(&rcd->aspm_lock, flags); } -/* Disable interrupt processing for verbs contexts when PSM contexts are open */ +/* + * Disable interrupt processing for verbs contexts when PSM or VNIC contexts + * are open. + */ static inline void aspm_disable_all(struct hfi1_devdata *dd) { struct hfi1_ctxtdata *rcd; unsigned long flags; unsigned i; - for (i = 0; i < dd->first_user_ctxt; i++) { + for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { rcd = dd->rcd[i]; del_timer_sync(&rcd->aspm_timer); spin_lock_irqsave(&rcd->aspm_lock, flags); @@ -260,7 +263,7 @@ static inline void aspm_enable_all(struct hfi1_devdata *dd) if (aspm_mode != ASPM_MODE_DYNAMIC) return; - for (i = 0; i < dd->first_user_ctxt; i++) { + for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { rcd = dd->rcd[i]; spin_lock_irqsave(&rcd->aspm_lock, flags); rcd->aspm_intr_enable = true; @@ -276,7 +279,7 @@ static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd) (unsigned long)rcd); rcd->aspm_intr_supported = rcd->dd->aspm_supported && aspm_mode == ASPM_MODE_DYNAMIC && - rcd->ctxt < rcd->dd->first_user_ctxt; + rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt; } static inline void aspm_init(struct hfi1_devdata *dd) @@ -286,7 +289,7 @@ static inline void aspm_init(struct hfi1_devdata *dd) spin_lock_init(&dd->aspm_lock); dd->aspm_supported = aspm_hw_l1_supported(dd); - for (i = 0; i < dd->first_user_ctxt; i++) + for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) aspm_ctx_init(dd->rcd[i]); /* Start with ASPM disabled */ diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 121a4c920f1b..5d6b1eeaa9a0 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -64,6 +64,7 @@ #include "platform.h" #include "aspm.h" #include "affinity.h" +#include "debugfs.h" #define NUM_IB_PORTS 1 @@ -125,9 +126,16 @@ struct flag_table { #define DEFAULT_KRCVQS 2 #define MIN_KERNEL_KCTXTS 2 #define FIRST_KERNEL_KCTXT 1 -/* sizes for both the QP and RSM map tables */ -#define NUM_MAP_ENTRIES 256 -#define NUM_MAP_REGS 32 + +/* + * RSM instance allocation + * 0 - Verbs + * 1 - User Fecn Handling + * 2 - Vnic + */ +#define RSM_INS_VERBS 0 +#define RSM_INS_FECN 1 +#define RSM_INS_VNIC 2 /* Bit offset into the GUID which carries HFI id information */ #define GUID_HFI_INDEX_SHIFT 39 @@ -138,8 +146,7 @@ struct flag_table { #define is_emulator_p(dd) ((((dd)->irev) & 0xf) == 3) #define is_emulator_s(dd) ((((dd)->irev) & 0xf) == 4) -/* RSM fields */ - +/* RSM fields for Verbs */ /* packet type */ #define IB_PACKET_TYPE 2ull #define QW_SHIFT 6ull @@ -169,6 +176,28 @@ struct flag_table { /* QPN[m+n:1] QW 1, OFFSET 1 */ #define QPN_SELECT_OFFSET ((1ull << QW_SHIFT) | (1ull)) +/* RSM fields for Vnic */ +/* L2_TYPE: QW 0, OFFSET 61 - for match */ +#define L2_TYPE_QW 0ull +#define L2_TYPE_BIT_OFFSET 61ull +#define L2_TYPE_OFFSET(off) ((L2_TYPE_QW << QW_SHIFT) | (off)) +#define L2_TYPE_MATCH_OFFSET L2_TYPE_OFFSET(L2_TYPE_BIT_OFFSET) +#define L2_TYPE_MASK 3ull +#define L2_16B_VALUE 2ull + +/* L4_TYPE QW 1, OFFSET 0 - for match */ +#define L4_TYPE_QW 1ull +#define L4_TYPE_BIT_OFFSET 0ull +#define L4_TYPE_OFFSET(off) ((L4_TYPE_QW << QW_SHIFT) | (off)) +#define L4_TYPE_MATCH_OFFSET L4_TYPE_OFFSET(L4_TYPE_BIT_OFFSET) +#define L4_16B_TYPE_MASK 0xFFull +#define L4_16B_ETH_VALUE 0x78ull + +/* 16B VESWID - for select */ +#define L4_16B_HDR_VESWID_OFFSET ((2 << QW_SHIFT) | (16ull)) +/* 16B ENTROPY - for select */ +#define L2_16B_ENTROPY_OFFSET ((1 << QW_SHIFT) | (32ull)) + /* defines to build power on SC2VL table */ #define SC2VL_VAL( \ num, \ @@ -1026,7 +1055,7 @@ static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg); static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg); static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg); static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg); -static void set_partition_keys(struct hfi1_pportdata *); +static void set_partition_keys(struct hfi1_pportdata *ppd); static const char *link_state_name(u32 state); static const char *link_state_reason_name(struct hfi1_pportdata *ppd, u32 state); @@ -1039,12 +1068,14 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs); static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc); static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr); -static void handle_temp_err(struct hfi1_devdata *); -static void dc_shutdown(struct hfi1_devdata *); -static void dc_start(struct hfi1_devdata *); +static void handle_temp_err(struct hfi1_devdata *dd); +static void dc_shutdown(struct hfi1_devdata *dd); +static void dc_start(struct hfi1_devdata *dd); static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, unsigned int *np); static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd); +static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms); +static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index); /* * Error interrupt table entry. This is used as input to the interrupt @@ -6379,18 +6410,17 @@ static void lcb_shutdown(struct hfi1_devdata *dd, int abort) * * The expectation is that the caller of this routine would have taken * care of properly transitioning the link into the correct state. + * NOTE: the caller needs to acquire the dd->dc8051_lock lock + * before calling this function. */ -static void dc_shutdown(struct hfi1_devdata *dd) +static void _dc_shutdown(struct hfi1_devdata *dd) { - unsigned long flags; + lockdep_assert_held(&dd->dc8051_lock); - spin_lock_irqsave(&dd->dc8051_lock, flags); - if (dd->dc_shutdown) { - spin_unlock_irqrestore(&dd->dc8051_lock, flags); + if (dd->dc_shutdown) return; - } + dd->dc_shutdown = 1; - spin_unlock_irqrestore(&dd->dc8051_lock, flags); /* Shutdown the LCB */ lcb_shutdown(dd, 1); /* @@ -6401,35 +6431,45 @@ static void dc_shutdown(struct hfi1_devdata *dd) write_csr(dd, DC_DC8051_CFG_RST, 0x1); } +static void dc_shutdown(struct hfi1_devdata *dd) +{ + mutex_lock(&dd->dc8051_lock); + _dc_shutdown(dd); + mutex_unlock(&dd->dc8051_lock); +} + /* * Calling this after the DC has been brought out of reset should not * do any damage. + * NOTE: the caller needs to acquire the dd->dc8051_lock lock + * before calling this function. */ -static void dc_start(struct hfi1_devdata *dd) +static void _dc_start(struct hfi1_devdata *dd) { - unsigned long flags; - int ret; + lockdep_assert_held(&dd->dc8051_lock); - spin_lock_irqsave(&dd->dc8051_lock, flags); if (!dd->dc_shutdown) - goto done; - spin_unlock_irqrestore(&dd->dc8051_lock, flags); + return; + /* Take the 8051 out of reset */ write_csr(dd, DC_DC8051_CFG_RST, 0ull); /* Wait until 8051 is ready */ - ret = wait_fm_ready(dd, TIMEOUT_8051_START); - if (ret) { + if (wait_fm_ready(dd, TIMEOUT_8051_START)) dd_dev_err(dd, "%s: timeout starting 8051 firmware\n", __func__); - } + /* Take away reset for LCB and RX FPE (set in lcb_shutdown). */ write_csr(dd, DCC_CFG_RESET, 0x10); /* lcb_shutdown() with abort=1 does not restore these */ write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en); - spin_lock_irqsave(&dd->dc8051_lock, flags); dd->dc_shutdown = 0; -done: - spin_unlock_irqrestore(&dd->dc8051_lock, flags); +} + +static void dc_start(struct hfi1_devdata *dd) +{ + mutex_lock(&dd->dc8051_lock); + _dc_start(dd); + mutex_unlock(&dd->dc8051_lock); } /* @@ -6701,7 +6741,13 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd) int i; /* enable all kernel contexts */ - for (i = 0; i < dd->n_krcv_queues; i++) { + for (i = 0; i < dd->num_rcv_contexts; i++) { + struct hfi1_ctxtdata *rcd = dd->rcd[i]; + + /* Ensure all non-user contexts(including vnic) are enabled */ + if (!rcd || !rcd->sc || (rcd->sc->type == SC_USER)) + continue; + rcvmask = HFI1_RCVCTRL_CTXT_ENB; /* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */ rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ? @@ -7077,7 +7123,7 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd) { struct hfi1_devdata *dd = ppd->dd; - /* Sanity check - ppd->pkeys[2] should be 0, or already initalized */ + /* Sanity check - ppd->pkeys[2] should be 0, or already initialized */ if (!((ppd->pkeys[2] == 0) || (ppd->pkeys[2] == FULL_MGMT_P_KEY))) dd_dev_warn(dd, "%s pkey[2] already set to 0x%x, resetting it to 0x%x\n", __func__, ppd->pkeys[2], FULL_MGMT_P_KEY); @@ -7165,7 +7211,7 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width, * set the max_rate field in handle_verify_cap until v0.19. */ if ((dd->icode == ICODE_RTL_SILICON) && - (dd->dc8051_ver < dc8051_ver(0, 19))) { + (dd->dc8051_ver < dc8051_ver(0, 19, 0))) { /* max_rate: 0 = 12.5G, 1 = 25G */ switch (max_rate) { case 0: @@ -7277,15 +7323,6 @@ void handle_verify_cap(struct work_struct *work) lcb_shutdown(dd, 0); adjust_lcb_for_fpga_serdes(dd); - /* - * These are now valid: - * remote VerifyCap fields in the general LNI config - * CSR DC8051_STS_REMOTE_GUID - * CSR DC8051_STS_REMOTE_NODE_TYPE - * CSR DC8051_STS_REMOTE_FM_SECURITY - * CSR DC8051_STS_REMOTE_PORT_NO - */ - read_vc_remote_phy(dd, &power_management, &continious); read_vc_remote_fabric(dd, &vau, &z, &vcu, &vl15buf, &partner_supported_crc); @@ -7350,7 +7387,7 @@ void handle_verify_cap(struct work_struct *work) } ppd->link_speed_active = 0; /* invalid value */ - if (dd->dc8051_ver < dc8051_ver(0, 20)) { + if (dd->dc8051_ver < dc8051_ver(0, 20, 0)) { /* remote_tx_rate: 0 = 12.5G, 1 = 25G */ switch (remote_tx_rate) { case 0: @@ -7416,20 +7453,6 @@ void handle_verify_cap(struct work_struct *work) write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */ set_8051_lcb_access(dd); - ppd->neighbor_guid = - read_csr(dd, DC_DC8051_STS_REMOTE_GUID); - ppd->neighbor_port_number = read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) & - DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK; - ppd->neighbor_type = - read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) & - DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK; - ppd->neighbor_fm_security = - read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) & - DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK; - dd_dev_info(dd, - "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n", - ppd->neighbor_guid, ppd->neighbor_type, - ppd->mgmt_allowed, ppd->neighbor_fm_security); if (ppd->mgmt_allowed) add_full_mgmt_pkey(ppd); @@ -7897,6 +7920,9 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg) reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK; } + if (unlikely(hfi1_dbg_fault_suppress_err(&dd->verbs_dev))) + reg &= ~DCC_ERR_FLG_LATE_EBP_ERR_SMASK; + /* report any remaining errors */ if (reg) dd_dev_info_ratelimited(dd, "DCC Error: %s\n", @@ -7995,7 +8021,9 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) if (likely(source < dd->num_rcv_contexts)) { rcd = dd->rcd[source]; if (rcd) { - if (source < dd->first_user_ctxt) + /* Check for non-user contexts, including vnic */ + if ((source < dd->first_dyn_alloc_ctxt) || + (rcd->sc && (rcd->sc->type == SC_KERNEL))) rcd->do_interrupt(rcd, 0); else handle_user_interrupt(rcd); @@ -8023,7 +8051,8 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source) rcd = dd->rcd[source]; if (rcd) { /* only pay attention to user urgent interrupts */ - if (source >= dd->first_user_ctxt) + if ((source >= dd->first_dyn_alloc_ctxt) && + (!rcd->sc || (rcd->sc->type == SC_USER))) handle_user_interrupt(rcd); return; /* OK */ } @@ -8156,10 +8185,10 @@ static irqreturn_t sdma_interrupt(int irq, void *data) /* handle the interrupt(s) */ sdma_engine_interrupt(sde, status); - } else + } else { dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n", sde->this_idx); - + } return IRQ_HANDLED; } @@ -8344,6 +8373,52 @@ static int read_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 *data) } /* + * Provide a cache for some of the LCB registers in case the LCB is + * unavailable. + * (The LCB is unavailable in certain link states, for example.) + */ +struct lcb_datum { + u32 off; + u64 val; +}; + +static struct lcb_datum lcb_cache[] = { + { DC_LCB_ERR_INFO_RX_REPLAY_CNT, 0}, + { DC_LCB_ERR_INFO_SEQ_CRC_CNT, 0 }, + { DC_LCB_ERR_INFO_REINIT_FROM_PEER_CNT, 0 }, +}; + +static void update_lcb_cache(struct hfi1_devdata *dd) +{ + int i; + int ret; + u64 val; + + for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) { + ret = read_lcb_csr(dd, lcb_cache[i].off, &val); + + /* Update if we get good data */ + if (likely(ret != -EBUSY)) + lcb_cache[i].val = val; + } +} + +static int read_lcb_cache(u32 off, u64 *val) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) { + if (lcb_cache[i].off == off) { + *val = lcb_cache[i].val; + return 0; + } + } + + pr_warn("%s bad offset 0x%x\n", __func__, off); + return -1; +} + +/* * Read an LCB CSR. Access may not be in host control, so check. * Return 0 on success, -EBUSY on failure. */ @@ -8354,9 +8429,13 @@ int read_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 *data) /* if up, go through the 8051 for the value */ if (ppd->host_link_state & HLS_UP) return read_lcb_via_8051(dd, addr, data); - /* if going up or down, no access */ - if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE)) - return -EBUSY; + /* if going up or down, check the cache, otherwise, no access */ + if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE)) { + if (read_lcb_cache(addr, data)) + return -EBUSY; + return 0; + } + /* otherwise, host has access */ *data = read_csr(dd, addr); return 0; @@ -8371,7 +8450,7 @@ static int write_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 data) int ret; if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR || - (dd->dc8051_ver < dc8051_ver(0, 20))) { + (dd->dc8051_ver < dc8051_ver(0, 20, 0))) { if (acquire_lcb_access(dd, 0) == 0) { write_csr(dd, addr, data); release_lcb_access(dd, 0); @@ -8420,16 +8499,11 @@ static int do_8051_command( { u64 reg, completed; int return_code; - unsigned long flags; unsigned long timeout; hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data); - /* - * Alternative to holding the lock for a long time: - * - keep busy wait - have other users bounce off - */ - spin_lock_irqsave(&dd->dc8051_lock, flags); + mutex_lock(&dd->dc8051_lock); /* We can't send any commands to the 8051 if it's in reset */ if (dd->dc_shutdown) { @@ -8455,10 +8529,8 @@ static int do_8051_command( return_code = -ENXIO; goto fail; } - spin_unlock_irqrestore(&dd->dc8051_lock, flags); - dc_shutdown(dd); - dc_start(dd); - spin_lock_irqsave(&dd->dc8051_lock, flags); + _dc_shutdown(dd); + _dc_start(dd); } /* @@ -8539,8 +8611,7 @@ static int do_8051_command( write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0); fail: - spin_unlock_irqrestore(&dd->dc8051_lock, flags); - + mutex_unlock(&dd->dc8051_lock); return return_code; } @@ -8677,13 +8748,20 @@ static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id, & REMOTE_DEVICE_REV_MASK; } -void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b) +void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor, + u8 *ver_patch) { u32 frame; read_8051_config(dd, MISC_STATUS, GENERAL_CONFIG, &frame); - *ver_a = (frame >> STS_FM_VERSION_A_SHIFT) & STS_FM_VERSION_A_MASK; - *ver_b = (frame >> STS_FM_VERSION_B_SHIFT) & STS_FM_VERSION_B_MASK; + *ver_major = (frame >> STS_FM_VERSION_MAJOR_SHIFT) & + STS_FM_VERSION_MAJOR_MASK; + *ver_minor = (frame >> STS_FM_VERSION_MINOR_SHIFT) & + STS_FM_VERSION_MINOR_MASK; + + read_8051_config(dd, VERSION_PATCH, GENERAL_CONFIG, &frame); + *ver_patch = (frame >> STS_FM_VERSION_PATCH_SHIFT) & + STS_FM_VERSION_PATCH_MASK; } static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management, @@ -8891,8 +8969,6 @@ int send_idle_sma(struct hfi1_devdata *dd, u64 message) */ static int do_quick_linkup(struct hfi1_devdata *dd) { - u64 reg; - unsigned long timeout; int ret; lcb_shutdown(dd, 0); @@ -8915,19 +8991,9 @@ static int do_quick_linkup(struct hfi1_devdata *dd) write_csr(dd, DC_LCB_CFG_RUN, 1ull << DC_LCB_CFG_RUN_EN_SHIFT); - /* watch LCB_STS_LINK_TRANSFER_ACTIVE */ - timeout = jiffies + msecs_to_jiffies(10); - while (1) { - reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE); - if (reg) - break; - if (time_after(jiffies, timeout)) { - dd_dev_err(dd, - "timeout waiting for LINK_TRANSFER_ACTIVE\n"); - return -ETIMEDOUT; - } - udelay(2); - } + ret = wait_link_transfer_active(dd, 10); + if (ret) + return ret; write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT); @@ -9091,7 +9157,7 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd) if (ret) goto set_local_link_attributes_fail; - if (dd->dc8051_ver < dc8051_ver(0, 20)) { + if (dd->dc8051_ver < dc8051_ver(0, 20, 0)) { /* set the tx rate to the fastest enabled */ if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G) ppd->local_tx_rate = 1; @@ -9274,7 +9340,7 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd, if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) || (qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING)) - dd_dev_info(dd, "%s: QSFP cable on fire\n", + dd_dev_info(dd, "%s: QSFP cable temperature too high\n", __func__); if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) || @@ -9494,8 +9560,11 @@ static int test_qsfp_read(struct hfi1_pportdata *ppd) int ret; u8 status; - /* report success if not a QSFP */ - if (ppd->port_type != PORT_TYPE_QSFP) + /* + * Report success if not a QSFP or, if it is a QSFP, but the cable is + * not present + */ + if (ppd->port_type != PORT_TYPE_QSFP || !qsfp_mod_present(ppd)) return 0; /* read byte 2, the status byte */ @@ -10082,6 +10151,64 @@ static void check_lni_states(struct hfi1_pportdata *ppd) decode_state_complete(ppd, last_remote_state, "received"); } +/* wait for wait_ms for LINK_TRANSFER_ACTIVE to go to 1 */ +static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms) +{ + u64 reg; + unsigned long timeout; + + /* watch LCB_STS_LINK_TRANSFER_ACTIVE */ + timeout = jiffies + msecs_to_jiffies(wait_ms); + while (1) { + reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE); + if (reg) + break; + if (time_after(jiffies, timeout)) { + dd_dev_err(dd, + "timeout waiting for LINK_TRANSFER_ACTIVE\n"); + return -ETIMEDOUT; + } + udelay(2); + } + return 0; +} + +/* called when the logical link state is not down as it should be */ +static void force_logical_link_state_down(struct hfi1_pportdata *ppd) +{ + struct hfi1_devdata *dd = ppd->dd; + + /* + * Bring link up in LCB loopback + */ + write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 1); + write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK, + DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK); + + write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0); + write_csr(dd, DC_LCB_CFG_REINIT_AS_SLAVE, 0); + write_csr(dd, DC_LCB_CFG_CNT_FOR_SKIP_STALL, 0x110); + write_csr(dd, DC_LCB_CFG_LOOPBACK, 0x2); + + write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0); + (void)read_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET); + udelay(3); + write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 1); + write_csr(dd, DC_LCB_CFG_RUN, 1ull << DC_LCB_CFG_RUN_EN_SHIFT); + + wait_link_transfer_active(dd, 100); + + /* + * Bring the link down again. + */ + write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 1); + write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 0); + write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK, 0); + + /* call again to adjust ppd->statusp, if needed */ + get_logical_state(ppd); +} + /* * Helper for set_link_state(). Do not call except from that routine. * Expects ppd->hls_mutex to be held. @@ -10098,13 +10225,15 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) int do_transition; int do_wait; + update_lcb_cache(dd); + previous_state = ppd->host_link_state; ppd->host_link_state = HLS_GOING_OFFLINE; pstate = read_physical_state(dd); if (pstate == PLS_OFFLINE) { do_transition = 0; /* in right state */ do_wait = 0; /* ...no need to wait */ - } else if ((pstate & 0xff) == PLS_OFFLINE) { + } else if ((pstate & 0xf0) == PLS_OFFLINE) { do_transition = 0; /* in an offline transient state */ do_wait = 1; /* ...wait for it to settle */ } else { @@ -10135,15 +10264,18 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) return ret; } - /* make sure the logical state is also down */ - wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000); - /* * Now in charge of LCB - must be after the physical state is * offline.quiet and before host_link_state is changed. */ set_host_lcb_access(dd); write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */ + + /* make sure the logical state is also down */ + ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000); + if (ret) + force_logical_link_state_down(ppd); + ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */ if (ppd->port_type == PORT_TYPE_QSFP && @@ -10380,11 +10512,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) goto unexpected; } - ppd->host_link_state = HLS_UP_INIT; ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000); if (ret) { - /* logical state didn't change, stay at going_up */ - ppd->host_link_state = HLS_GOING_UP; dd_dev_err(dd, "%s: logical state did not change to INIT\n", __func__); @@ -10398,6 +10527,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); handle_linkup_change(dd, 1); + ppd->host_link_state = HLS_UP_INIT; } break; case HLS_UP_ARMED: @@ -11853,6 +11983,10 @@ static void free_cntrs(struct hfi1_devdata *dd) dd->scntrs = NULL; kfree(dd->cntrnames); dd->cntrnames = NULL; + if (dd->update_cntr_wq) { + destroy_workqueue(dd->update_cntr_wq); + dd->update_cntr_wq = NULL; + } } static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry, @@ -12008,7 +12142,7 @@ u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data) return write_dev_port_cntr(ppd->dd, entry, sval, ppd, vl, data); } -static void update_synth_timer(unsigned long opaque) +static void do_update_synth_timer(struct work_struct *work) { u64 cur_tx; u64 cur_rx; @@ -12017,8 +12151,8 @@ static void update_synth_timer(unsigned long opaque) int i, j, vl; struct hfi1_pportdata *ppd; struct cntr_entry *entry; - - struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque; + struct hfi1_devdata *dd = container_of(work, struct hfi1_devdata, + update_cntr_work); /* * Rather than keep beating on the CSRs pick a minimal set that we can @@ -12101,7 +12235,13 @@ static void update_synth_timer(unsigned long opaque) } else { hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit); } +} + +static void update_synth_timer(unsigned long opaque) +{ + struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque; + queue_work(dd->update_cntr_wq, &dd->update_cntr_work); mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); } @@ -12337,6 +12477,13 @@ static int init_cntrs(struct hfi1_devdata *dd) if (init_cpu_counters(dd)) goto bail; + dd->update_cntr_wq = alloc_ordered_workqueue("hfi1_update_cntr_%d", + WQ_MEM_RECLAIM, dd->unit); + if (!dd->update_cntr_wq) + goto bail; + + INIT_WORK(&dd->update_cntr_work, do_update_synth_timer); + mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); return 0; bail: @@ -12515,7 +12662,7 @@ u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd) #define SET_STATIC_RATE_CONTROL_SMASK(r) \ (r |= SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK) -int hfi1_init_ctxt(struct send_context *sc) +void hfi1_init_ctxt(struct send_context *sc) { if (sc) { struct hfi1_devdata *dd = sc->dd; @@ -12532,7 +12679,6 @@ int hfi1_init_ctxt(struct send_context *sc) write_kctxt_csr(dd, sc->hw_context, SEND_CTXT_CHECK_ENABLE, reg); } - return 0; } int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp) @@ -12726,7 +12872,10 @@ static int request_msix_irqs(struct hfi1_devdata *dd) first_sdma = last_general; last_sdma = first_sdma + dd->num_sdma; first_rx = last_sdma; - last_rx = first_rx + dd->n_krcv_queues; + last_rx = first_rx + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT; + + /* VNIC MSIx interrupts get mapped when VNIC contexts are created */ + dd->first_dyn_msix_idx = first_rx + dd->n_krcv_queues; /* * Sanity check - the code expects all SDMA chip source @@ -12740,7 +12889,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) const char *err_info; irq_handler_t handler; irq_handler_t thread = NULL; - void *arg; + void *arg = NULL; int idx; struct hfi1_ctxtdata *rcd = NULL; struct sdma_engine *sde = NULL; @@ -12767,24 +12916,25 @@ static int request_msix_irqs(struct hfi1_devdata *dd) } else if (first_rx <= i && i < last_rx) { idx = i - first_rx; rcd = dd->rcd[idx]; - /* no interrupt if no rcd */ - if (!rcd) - continue; - /* - * Set the interrupt register and mask for this - * context's interrupt. - */ - rcd->ireg = (IS_RCVAVAIL_START + idx) / 64; - rcd->imask = ((u64)1) << - ((IS_RCVAVAIL_START + idx) % 64); - handler = receive_context_interrupt; - thread = receive_context_thread; - arg = rcd; - snprintf(me->name, sizeof(me->name), - DRIVER_NAME "_%d kctxt%d", dd->unit, idx); - err_info = "receive context"; - remap_intr(dd, IS_RCVAVAIL_START + idx, i); - me->type = IRQ_RCVCTXT; + if (rcd) { + /* + * Set the interrupt register and mask for this + * context's interrupt. + */ + rcd->ireg = (IS_RCVAVAIL_START + idx) / 64; + rcd->imask = ((u64)1) << + ((IS_RCVAVAIL_START + idx) % 64); + handler = receive_context_interrupt; + thread = receive_context_thread; + arg = rcd; + snprintf(me->name, sizeof(me->name), + DRIVER_NAME "_%d kctxt%d", + dd->unit, idx); + err_info = "receive context"; + remap_intr(dd, IS_RCVAVAIL_START + idx, i); + me->type = IRQ_RCVCTXT; + rcd->msix_intr = i; + } } else { /* not in our expected range - complain, then * ignore it @@ -12822,6 +12972,84 @@ static int request_msix_irqs(struct hfi1_devdata *dd) return ret; } +void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd) +{ + int i; + + if (!dd->num_msix_entries) { + synchronize_irq(dd->pcidev->irq); + return; + } + + for (i = 0; i < dd->vnic.num_ctxt; i++) { + struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i]; + struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr]; + + synchronize_irq(me->msix.vector); + } +} + +void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd) +{ + struct hfi1_devdata *dd = rcd->dd; + struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr]; + + if (!me->arg) /* => no irq, no affinity */ + return; + + hfi1_put_irq_affinity(dd, me); + free_irq(me->msix.vector, me->arg); + + me->arg = NULL; +} + +void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd) +{ + struct hfi1_devdata *dd = rcd->dd; + struct hfi1_msix_entry *me; + int idx = rcd->ctxt; + void *arg = rcd; + int ret; + + rcd->msix_intr = dd->vnic.msix_idx++; + me = &dd->msix_entries[rcd->msix_intr]; + + /* + * Set the interrupt register and mask for this + * context's interrupt. + */ + rcd->ireg = (IS_RCVAVAIL_START + idx) / 64; + rcd->imask = ((u64)1) << + ((IS_RCVAVAIL_START + idx) % 64); + + snprintf(me->name, sizeof(me->name), + DRIVER_NAME "_%d kctxt%d", dd->unit, idx); + me->name[sizeof(me->name) - 1] = 0; + me->type = IRQ_RCVCTXT; + + remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr); + + ret = request_threaded_irq(me->msix.vector, receive_context_interrupt, + receive_context_thread, 0, me->name, arg); + if (ret) { + dd_dev_err(dd, "vnic irq request (vector %d, idx %d) fail %d\n", + me->msix.vector, idx, ret); + return; + } + /* + * assign arg after request_irq call, so it will be + * cleaned up + */ + me->arg = arg; + + ret = hfi1_get_irq_affinity(dd, me); + if (ret) { + dd_dev_err(dd, + "unable to pin IRQ %d\n", ret); + free_irq(me->msix.vector, me->arg); + } +} + /* * Set the general handler to accept all interrupts, remap all * chip interrupts back to MSI-X 0. @@ -12853,7 +13081,7 @@ static int set_up_interrupts(struct hfi1_devdata *dd) * N interrupts - one per used SDMA engine * M interrupt - one per kernel receive context */ - total = 1 + dd->num_sdma + dd->n_krcv_queues; + total = 1 + dd->num_sdma + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT; entries = kcalloc(total, sizeof(*entries), GFP_KERNEL); if (!entries) { @@ -12918,7 +13146,8 @@ fail: * * num_rcv_contexts - number of contexts being used * n_krcv_queues - number of kernel contexts - * first_user_ctxt - first non-kernel context in array of contexts + * first_dyn_alloc_ctxt - first dynamically allocated context + * in array of contexts * freectxts - number of free user contexts * num_send_contexts - number of PIO send contexts being used */ @@ -12995,10 +13224,14 @@ static int set_up_context_variables(struct hfi1_devdata *dd) total_contexts = num_kernel_contexts + num_user_contexts; } - /* the first N are kernel contexts, the rest are user contexts */ + /* Accommodate VNIC contexts */ + if ((total_contexts + HFI1_NUM_VNIC_CTXT) <= dd->chip_rcv_contexts) + total_contexts += HFI1_NUM_VNIC_CTXT; + + /* the first N are kernel contexts, the rest are user/vnic contexts */ dd->num_rcv_contexts = total_contexts; dd->n_krcv_queues = num_kernel_contexts; - dd->first_user_ctxt = num_kernel_contexts; + dd->first_dyn_alloc_ctxt = num_kernel_contexts; dd->num_user_contexts = num_user_contexts; dd->freectxts = num_user_contexts; dd_dev_info(dd, @@ -13454,11 +13687,8 @@ static void reset_rxe_csrs(struct hfi1_devdata *dd) write_csr(dd, RCV_COUNTER_ARRAY32 + (8 * i), 0); for (i = 0; i < RXE_NUM_64_BIT_COUNTERS; i++) write_csr(dd, RCV_COUNTER_ARRAY64 + (8 * i), 0); - for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++) { - write_csr(dd, RCV_RSM_CFG + (8 * i), 0); - write_csr(dd, RCV_RSM_SELECT + (8 * i), 0); - write_csr(dd, RCV_RSM_MATCH + (8 * i), 0); - } + for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++) + clear_rsm_rule(dd, i); for (i = 0; i < 32; i++) write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), 0); @@ -13610,14 +13840,14 @@ static void init_chip(struct hfi1_devdata *dd) dd_dev_info(dd, "Resetting CSRs with FLR\n"); /* do the FLR, the DC reset will remain */ - hfi1_pcie_flr(dd); + pcie_flr(dd->pcidev); /* restore command and BARs */ restore_pci_variables(dd); if (is_ax(dd)) { dd_dev_info(dd, "Resetting CSRs with FLR\n"); - hfi1_pcie_flr(dd); + pcie_flr(dd->pcidev); restore_pci_variables(dd); } } else { @@ -13817,6 +14047,16 @@ static void add_rsm_rule(struct hfi1_devdata *dd, u8 rule_index, (u64)rrd->value2 << RCV_RSM_MATCH_VALUE2_SHIFT); } +/* + * Clear a receive side mapping rule. + */ +static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index) +{ + write_csr(dd, RCV_RSM_CFG + (8 * rule_index), 0); + write_csr(dd, RCV_RSM_SELECT + (8 * rule_index), 0); + write_csr(dd, RCV_RSM_MATCH + (8 * rule_index), 0); +} + /* return the number of RSM map table entries that will be used for QOS */ static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, unsigned int *np) @@ -13932,7 +14172,7 @@ static void init_qos(struct hfi1_devdata *dd, struct rsm_map_table *rmt) rrd.value2 = LRH_SC_VALUE; /* add rule 0 */ - add_rsm_rule(dd, 0, &rrd); + add_rsm_rule(dd, RSM_INS_VERBS, &rrd); /* mark RSM map entries as used */ rmt->used += rmt_entries; @@ -13962,7 +14202,7 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd, /* * RSM will extract the destination context as an index into the * map table. The destination contexts are a sequential block - * in the range first_user_ctxt...num_rcv_contexts-1 (inclusive). + * in the range first_dyn_alloc_ctxt...num_rcv_contexts-1 (inclusive). * Map entries are accessed as offset + extracted value. Adjust * the added offset so this sequence can be placed anywhere in * the table - as long as the entries themselves do not wrap. @@ -13970,9 +14210,9 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd, * start with that to allow for a "negative" offset. */ offset = (u8)(NUM_MAP_ENTRIES + (int)rmt->used - - (int)dd->first_user_ctxt); + (int)dd->first_dyn_alloc_ctxt); - for (i = dd->first_user_ctxt, idx = rmt->used; + for (i = dd->first_dyn_alloc_ctxt, idx = rmt->used; i < dd->num_rcv_contexts; i++, idx++) { /* replace with identity mapping */ regoff = (idx % 8) * 8; @@ -14006,11 +14246,84 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd, rrd.value2 = 1; /* add rule 1 */ - add_rsm_rule(dd, 1, &rrd); + add_rsm_rule(dd, RSM_INS_FECN, &rrd); rmt->used += dd->num_user_contexts; } +/* Initialize RSM for VNIC */ +void hfi1_init_vnic_rsm(struct hfi1_devdata *dd) +{ + u8 i, j; + u8 ctx_id = 0; + u64 reg; + u32 regoff; + struct rsm_rule_data rrd; + + if (hfi1_vnic_is_rsm_full(dd, NUM_VNIC_MAP_ENTRIES)) { + dd_dev_err(dd, "Vnic RSM disabled, rmt entries used = %d\n", + dd->vnic.rmt_start); + return; + } + + dev_dbg(&(dd)->pcidev->dev, "Vnic rsm start = %d, end %d\n", + dd->vnic.rmt_start, + dd->vnic.rmt_start + NUM_VNIC_MAP_ENTRIES); + + /* Update RSM mapping table, 32 regs, 256 entries - 1 ctx per byte */ + regoff = RCV_RSM_MAP_TABLE + (dd->vnic.rmt_start / 8) * 8; + reg = read_csr(dd, regoff); + for (i = 0; i < NUM_VNIC_MAP_ENTRIES; i++) { + /* Update map register with vnic context */ + j = (dd->vnic.rmt_start + i) % 8; + reg &= ~(0xffllu << (j * 8)); + reg |= (u64)dd->vnic.ctxt[ctx_id++]->ctxt << (j * 8); + /* Wrap up vnic ctx index */ + ctx_id %= dd->vnic.num_ctxt; + /* Write back map register */ + if (j == 7 || ((i + 1) == NUM_VNIC_MAP_ENTRIES)) { + dev_dbg(&(dd)->pcidev->dev, + "Vnic rsm map reg[%d] =0x%llx\n", + regoff - RCV_RSM_MAP_TABLE, reg); + + write_csr(dd, regoff, reg); + regoff += 8; + if (i < (NUM_VNIC_MAP_ENTRIES - 1)) + reg = read_csr(dd, regoff); + } + } + + /* Add rule for vnic */ + rrd.offset = dd->vnic.rmt_start; + rrd.pkt_type = 4; + /* Match 16B packets */ + rrd.field1_off = L2_TYPE_MATCH_OFFSET; + rrd.mask1 = L2_TYPE_MASK; + rrd.value1 = L2_16B_VALUE; + /* Match ETH L4 packets */ + rrd.field2_off = L4_TYPE_MATCH_OFFSET; + rrd.mask2 = L4_16B_TYPE_MASK; + rrd.value2 = L4_16B_ETH_VALUE; + /* Calc context from veswid and entropy */ + rrd.index1_off = L4_16B_HDR_VESWID_OFFSET; + rrd.index1_width = ilog2(NUM_VNIC_MAP_ENTRIES); + rrd.index2_off = L2_16B_ENTROPY_OFFSET; + rrd.index2_width = ilog2(NUM_VNIC_MAP_ENTRIES); + add_rsm_rule(dd, RSM_INS_VNIC, &rrd); + + /* Enable RSM if not already enabled */ + add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK); +} + +void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd) +{ + clear_rsm_rule(dd, RSM_INS_VNIC); + + /* Disable RSM if used only by vnic */ + if (dd->vnic.rmt_start == 0) + clear_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK); +} + static void init_rxe(struct hfi1_devdata *dd) { struct rsm_map_table *rmt; @@ -14023,6 +14336,8 @@ static void init_rxe(struct hfi1_devdata *dd) init_qos(dd, rmt); init_user_fecn_handling(dd, rmt); complete_rsm_map_table(dd, rmt); + /* record number of used rsm map entries for vnic */ + dd->vnic.rmt_start = rmt->used; kfree(rmt); /* @@ -14212,30 +14527,24 @@ done: return ret; } -int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt) +int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt) { - struct hfi1_ctxtdata *rcd; - unsigned sctxt; - int ret = 0; + u8 hw_ctxt; u64 reg; - if (ctxt < dd->num_rcv_contexts) { - rcd = dd->rcd[ctxt]; - } else { - ret = -EINVAL; - goto done; - } - if (!rcd || !rcd->sc) { - ret = -EINVAL; - goto done; - } - sctxt = rcd->sc->hw_context; - reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE); + if (!ctxt || !ctxt->sc) + return -EINVAL; + + if (ctxt->ctxt >= dd->num_rcv_contexts) + return -EINVAL; + + hw_ctxt = ctxt->sc->hw_context; + reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE); reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK; - write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg); - write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0); -done: - return ret; + write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg); + write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0); + + return 0; } /* diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 043fd21dc5f3..cbe455d9ab8b 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1,7 +1,7 @@ #ifndef _CHIP_H #define _CHIP_H /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -394,7 +394,8 @@ #define LAST_REMOTE_STATE_COMPLETE 0x13 #define LINK_QUALITY_INFO 0x14 #define REMOTE_DEVICE_ID 0x15 -#define LINK_DOWN_REASON 0x16 +#define LINK_DOWN_REASON 0x16 /* first byte of offset 0x16 */ +#define VERSION_PATCH 0x16 /* last byte of offset 0x16 */ /* 8051 lane specific register field IDs */ #define TX_EQ_SETTINGS 0x00 @@ -524,10 +525,12 @@ enum { #define SUPPORTED_CRCS (CAP_CRC_14B | CAP_CRC_48B) /* misc status version fields */ -#define STS_FM_VERSION_A_SHIFT 16 -#define STS_FM_VERSION_A_MASK 0xff -#define STS_FM_VERSION_B_SHIFT 24 -#define STS_FM_VERSION_B_MASK 0xff +#define STS_FM_VERSION_MINOR_SHIFT 16 +#define STS_FM_VERSION_MINOR_MASK 0xff +#define STS_FM_VERSION_MAJOR_SHIFT 24 +#define STS_FM_VERSION_MAJOR_MASK 0xff +#define STS_FM_VERSION_PATCH_SHIFT 24 +#define STS_FM_VERSION_PATCH_MASK 0xff /* LCB_CFG_CRC_MODE TX_VAL and RX_VAL CRC mode values */ #define LCB_CRC_16B 0x0 /* 16b CRC */ @@ -633,7 +636,8 @@ static inline void write_uctxt_csr(struct hfi1_devdata *dd, int ctxt, write_csr(dd, offset0 + (0x1000 * ctxt), value); } -u64 create_pbc(struct hfi1_pportdata *ppd, u64, int, u32, u32); +u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl, + u32 dw_len); /* firmware.c */ #define SBUS_MASTER_BROADCAST 0xfd @@ -698,7 +702,8 @@ void fabric_serdes_reset(struct hfi1_devdata *dd); int read_8051_data(struct hfi1_devdata *dd, u32 addr, u32 len, u64 *result); /* chip.c */ -void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b); +void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor, + u8 *ver_patch); void read_guid(struct hfi1_devdata *dd); int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout); void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason, @@ -724,7 +729,8 @@ int bringup_serdes(struct hfi1_pportdata *ppd); void set_intr_state(struct hfi1_devdata *dd, u32 enable); void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, int refresh_widths); -void update_usrhead(struct hfi1_ctxtdata *, u32, u32, u32, u32, u32); +void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd, + u32 intr_adjust, u32 npkts); int stop_drain_data_vls(struct hfi1_devdata *dd); int open_fill_data_vls(struct hfi1_devdata *dd); u32 ns_to_cclock(struct hfi1_devdata *dd, u32 ns); @@ -1343,7 +1349,7 @@ void hfi1_start_cleanup(struct hfi1_devdata *dd); void hfi1_clear_tids(struct hfi1_ctxtdata *rcd); struct ib_header *hfi1_get_msgheader( struct hfi1_devdata *dd, __le32 *rhf_addr); -int hfi1_init_ctxt(struct send_context *sc); +void hfi1_init_ctxt(struct send_context *sc); void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, u32 type, unsigned long pa, u16 order); void hfi1_quiet_serdes(struct hfi1_pportdata *ppd); @@ -1356,8 +1362,10 @@ int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val); int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey); int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt); int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey); -int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt); +int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt); void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality); +void hfi1_init_vnic_rsm(struct hfi1_devdata *dd); +void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd); /* * Interrupt source table. diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 1b783bbee4bb..995d62c7f9a7 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -331,12 +331,15 @@ struct diag_pkt { #define FULL_MGMT_P_KEY 0xFFFF #define DEFAULT_P_KEY LIM_MGMT_P_KEY -#define HFI1_FECN_SHIFT 31 -#define HFI1_FECN_MASK 1 -#define HFI1_FECN_SMASK BIT(HFI1_FECN_SHIFT) -#define HFI1_BECN_SHIFT 30 -#define HFI1_BECN_MASK 1 -#define HFI1_BECN_SMASK BIT(HFI1_BECN_SHIFT) + +/** + * 0xF8 - 4 bits of multicast range and 1 bit for collective range + * Example: For 24 bit LID space, + * Multicast range: 0xF00000 to 0xF7FFFF + * Collective range: 0xF80000 to 0xFFFFFE + */ +#define HFI1_MCAST_NR 0x4 /* Number of top bits set */ +#define HFI1_COLLECTIVE_NR 0x1 /* Number of bits after MCAST_NR */ #define HFI1_PSM_IOC_BASE_SEQ 0x0 diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 7fe9dd885746..e9fa3c293e42 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -1,6 +1,6 @@ #ifdef CONFIG_DEBUG_FS /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -51,8 +51,12 @@ #include <linux/export.h> #include <linux/module.h> #include <linux/string.h> +#include <linux/types.h> +#include <linux/ratelimit.h> +#include <linux/fault-inject.h> #include "hfi.h" +#include "trace.h" #include "debugfs.h" #include "device.h" #include "qp.h" @@ -170,7 +174,7 @@ static int _opcode_stats_seq_show(struct seq_file *s, void *v) struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; struct hfi1_devdata *dd = dd_from_dev(ibd); - for (j = 0; j < dd->first_user_ctxt; j++) { + for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) { if (!dd->rcd[j]) continue; n_packets += dd->rcd[j]->opstats->stats[i].n_packets; @@ -196,7 +200,7 @@ static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos) if (!*pos) return SEQ_START_TOKEN; - if (*pos >= dd->first_user_ctxt) + if (*pos >= dd->first_dyn_alloc_ctxt) return NULL; return pos; } @@ -210,7 +214,7 @@ static void *_ctx_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) return pos; ++*pos; - if (*pos >= dd->first_user_ctxt) + if (*pos >= dd->first_dyn_alloc_ctxt) return NULL; return pos; } @@ -1063,6 +1067,222 @@ DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list); DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list) DEBUGFS_FILE_OPS(sdma_cpu_list); +#ifdef CONFIG_FAULT_INJECTION +static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + struct hfi1_opcode_stats_perctx *opstats; + + if (*pos >= ARRAY_SIZE(opstats->stats)) + return NULL; + return pos; +} + +static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct hfi1_opcode_stats_perctx *opstats; + + ++*pos; + if (*pos >= ARRAY_SIZE(opstats->stats)) + return NULL; + return pos; +} + +static void _fault_stats_seq_stop(struct seq_file *s, void *v) +{ +} + +static int _fault_stats_seq_show(struct seq_file *s, void *v) +{ + loff_t *spos = v; + loff_t i = *spos, j; + u64 n_packets = 0, n_bytes = 0; + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + + for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) { + if (!dd->rcd[j]) + continue; + n_packets += dd->rcd[j]->opstats->stats[i].n_packets; + n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes; + } + if (!n_packets && !n_bytes) + return SEQ_SKIP; + if (!ibd->fault_opcode->n_rxfaults[i] && + !ibd->fault_opcode->n_txfaults[i]) + return SEQ_SKIP; + seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i, + (unsigned long long)n_packets, + (unsigned long long)n_bytes, + (unsigned long long)ibd->fault_opcode->n_rxfaults[i], + (unsigned long long)ibd->fault_opcode->n_txfaults[i]); + return 0; +} + +DEBUGFS_SEQ_FILE_OPS(fault_stats); +DEBUGFS_SEQ_FILE_OPEN(fault_stats); +DEBUGFS_FILE_OPS(fault_stats); + +static void fault_exit_opcode_debugfs(struct hfi1_ibdev *ibd) +{ + debugfs_remove_recursive(ibd->fault_opcode->dir); + kfree(ibd->fault_opcode); + ibd->fault_opcode = NULL; +} + +static int fault_init_opcode_debugfs(struct hfi1_ibdev *ibd) +{ + struct dentry *parent = ibd->hfi1_ibdev_dbg; + + ibd->fault_opcode = kzalloc(sizeof(*ibd->fault_opcode), GFP_KERNEL); + if (!ibd->fault_opcode) + return -ENOMEM; + + ibd->fault_opcode->attr.interval = 1; + ibd->fault_opcode->attr.require_end = ULONG_MAX; + ibd->fault_opcode->attr.stacktrace_depth = 32; + ibd->fault_opcode->attr.dname = NULL; + ibd->fault_opcode->attr.verbose = 0; + ibd->fault_opcode->fault_by_opcode = false; + ibd->fault_opcode->opcode = 0; + ibd->fault_opcode->mask = 0xff; + + ibd->fault_opcode->dir = + fault_create_debugfs_attr("fault_opcode", + parent, + &ibd->fault_opcode->attr); + if (IS_ERR(ibd->fault_opcode->dir)) { + kfree(ibd->fault_opcode); + return -ENOENT; + } + + DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault_opcode->dir, ibd); + if (!debugfs_create_bool("fault_by_opcode", 0600, + ibd->fault_opcode->dir, + &ibd->fault_opcode->fault_by_opcode)) + goto fail; + if (!debugfs_create_x8("opcode", 0600, ibd->fault_opcode->dir, + &ibd->fault_opcode->opcode)) + goto fail; + if (!debugfs_create_x8("mask", 0600, ibd->fault_opcode->dir, + &ibd->fault_opcode->mask)) + goto fail; + + return 0; +fail: + fault_exit_opcode_debugfs(ibd); + return -ENOMEM; +} + +static void fault_exit_packet_debugfs(struct hfi1_ibdev *ibd) +{ + debugfs_remove_recursive(ibd->fault_packet->dir); + kfree(ibd->fault_packet); + ibd->fault_packet = NULL; +} + +static int fault_init_packet_debugfs(struct hfi1_ibdev *ibd) +{ + struct dentry *parent = ibd->hfi1_ibdev_dbg; + + ibd->fault_packet = kzalloc(sizeof(*ibd->fault_packet), GFP_KERNEL); + if (!ibd->fault_packet) + return -ENOMEM; + + ibd->fault_packet->attr.interval = 1; + ibd->fault_packet->attr.require_end = ULONG_MAX; + ibd->fault_packet->attr.stacktrace_depth = 32; + ibd->fault_packet->attr.dname = NULL; + ibd->fault_packet->attr.verbose = 0; + ibd->fault_packet->fault_by_packet = false; + + ibd->fault_packet->dir = + fault_create_debugfs_attr("fault_packet", + parent, + &ibd->fault_opcode->attr); + if (IS_ERR(ibd->fault_packet->dir)) { + kfree(ibd->fault_packet); + return -ENOENT; + } + + if (!debugfs_create_bool("fault_by_packet", 0600, + ibd->fault_packet->dir, + &ibd->fault_packet->fault_by_packet)) + goto fail; + if (!debugfs_create_u64("fault_stats", 0400, + ibd->fault_packet->dir, + &ibd->fault_packet->n_faults)) + goto fail; + + return 0; +fail: + fault_exit_packet_debugfs(ibd); + return -ENOMEM; +} + +static void fault_exit_debugfs(struct hfi1_ibdev *ibd) +{ + fault_exit_opcode_debugfs(ibd); + fault_exit_packet_debugfs(ibd); +} + +static int fault_init_debugfs(struct hfi1_ibdev *ibd) +{ + int ret = 0; + + ret = fault_init_opcode_debugfs(ibd); + if (ret) + return ret; + + ret = fault_init_packet_debugfs(ibd); + if (ret) + fault_exit_opcode_debugfs(ibd); + + return ret; +} + +bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) +{ + return ibd->fault_suppress_err; +} + +bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx) +{ + bool ret = false; + struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device); + + if (!ibd->fault_opcode || !ibd->fault_opcode->fault_by_opcode) + return false; + if (ibd->fault_opcode->opcode != (opcode & ibd->fault_opcode->mask)) + return false; + ret = should_fail(&ibd->fault_opcode->attr, 1); + if (ret) { + trace_hfi1_fault_opcode(qp, opcode); + if (rx) + ibd->fault_opcode->n_rxfaults[opcode]++; + else + ibd->fault_opcode->n_txfaults[opcode]++; + } + return ret; +} + +bool hfi1_dbg_fault_packet(struct hfi1_packet *packet) +{ + struct rvt_dev_info *rdi = &packet->rcd->ppd->dd->verbs_dev.rdi; + struct hfi1_ibdev *ibd = dev_from_rdi(rdi); + bool ret = false; + + if (!ibd->fault_packet || !ibd->fault_packet->fault_by_packet) + return false; + + ret = should_fail(&ibd->fault_packet->attr, 1); + if (ret) { + ++ibd->fault_packet->n_faults; + trace_hfi1_fault_packet(packet); + } + return ret; +} +#endif + void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) { char name[sizeof("port0counters") + 1]; @@ -1112,12 +1332,22 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) !port_cntr_ops[i].ops.write ? S_IRUGO : S_IRUGO | S_IWUSR); } + +#ifdef CONFIG_FAULT_INJECTION + debugfs_create_bool("fault_suppress_err", 0600, + ibd->hfi1_ibdev_dbg, + &ibd->fault_suppress_err); + fault_init_debugfs(ibd); +#endif } void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd) { if (!hfi1_dbg_root) goto out; +#ifdef CONFIG_FAULT_INJECTION + fault_exit_debugfs(ibd); +#endif debugfs_remove(ibd->hfi1_ibdev_link); debugfs_remove_recursive(ibd->hfi1_ibdev_dbg); out: diff --git a/drivers/infiniband/hw/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h index b6fb6814f1b8..38c38a98156d 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.h +++ b/drivers/infiniband/hw/hfi1/debugfs.h @@ -53,23 +53,79 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd); void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd); void hfi1_dbg_init(void); void hfi1_dbg_exit(void); + +#ifdef CONFIG_FAULT_INJECTION +#include <linux/fault-inject.h> +struct fault_opcode { + struct fault_attr attr; + struct dentry *dir; + bool fault_by_opcode; + u64 n_rxfaults[256]; + u64 n_txfaults[256]; + u8 opcode; + u8 mask; +}; + +struct fault_packet { + struct fault_attr attr; + struct dentry *dir; + bool fault_by_packet; + u64 n_faults; +}; + +bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx); +bool hfi1_dbg_fault_packet(struct hfi1_packet *packet); +bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd); +#else +static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet) +{ + return false; +} + +static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, + u32 opcode, bool rx) +{ + return false; +} + +static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) +{ + return false; +} +#endif + #else static inline void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) { } -void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd) +static inline void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd) +{ +} + +static inline void hfi1_dbg_init(void) { } -void hfi1_dbg_init(void) +static inline void hfi1_dbg_exit(void) { } -void hfi1_dbg_exit(void) +static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet) { + return false; } +static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, + u32 opcode, bool rx) +{ + return false; +} + +static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) +{ + return false; +} #endif #endif /* _HFI1_DEBUGFS_H */ diff --git a/drivers/infiniband/hw/hfi1/device.c b/drivers/infiniband/hw/hfi1/device.c index bf64b5a7bfd7..bbb6069dec2a 100644 --- a/drivers/infiniband/hw/hfi1/device.c +++ b/drivers/infiniband/hw/hfi1/device.c @@ -69,7 +69,7 @@ int hfi1_cdev_init(int minor, const char *name, cdev_init(cdev, fops); cdev->owner = THIS_MODULE; - cdev->kobj.parent = parent; + cdev_set_parent(cdev, parent); kobject_set_name(&cdev->kobj, name); ret = cdev_add(cdev, dev, 1); diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 3881c951f6af..a50870e455a3 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -59,6 +59,8 @@ #include "trace.h" #include "qp.h" #include "sdma.h" +#include "debugfs.h" +#include "vnic.h" #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt @@ -83,8 +85,8 @@ module_param_named(cu, hfi1_cu, uint, S_IRUGO); MODULE_PARM_DESC(cu, "Credit return units"); unsigned long hfi1_cap_mask = HFI1_CAP_MASK_DEFAULT; -static int hfi1_caps_set(const char *, const struct kernel_param *); -static int hfi1_caps_get(char *, const struct kernel_param *); +static int hfi1_caps_set(const char *val, const struct kernel_param *kp); +static int hfi1_caps_get(char *buffer, const struct kernel_param *kp); static const struct kernel_param_ops cap_ops = { .set = hfi1_caps_set, .get = hfi1_caps_get @@ -209,42 +211,6 @@ int hfi1_count_active_units(void) } /* - * Return count of all units, optionally return in arguments - * the number of usable (present) units, and the number of - * ports that are up. - */ -int hfi1_count_units(int *npresentp, int *nupp) -{ - int nunits = 0, npresent = 0, nup = 0; - struct hfi1_devdata *dd; - unsigned long flags; - int pidx; - struct hfi1_pportdata *ppd; - - spin_lock_irqsave(&hfi1_devs_lock, flags); - - list_for_each_entry(dd, &hfi1_dev_list, list) { - nunits++; - if ((dd->flags & HFI1_PRESENT) && dd->kregbase) - npresent++; - for (pidx = 0; pidx < dd->num_pports; ++pidx) { - ppd = dd->pport + pidx; - if (ppd->lid && ppd->linkup) - nup++; - } - } - - spin_unlock_irqrestore(&hfi1_devs_lock, flags); - - if (npresentp) - *npresentp = npresent; - if (nupp) - *nupp = nup; - - return nunits; -} - -/* * Get address of eager buffer from it's index (allocated in chunks, not * contiguous). */ @@ -283,7 +249,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, { struct ib_header *rhdr = packet->hdr; u32 rte = rhf_rcv_type_err(packet->rhf); - int lnh = be16_to_cpu(rhdr->lrh[0]) & 3; + int lnh = ib_get_lnh(rhdr); struct hfi1_ibport *ibp = rcd_to_iport(rcd); struct hfi1_devdata *dd = ppd->dd; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; @@ -295,7 +261,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, /* For TIDERR and RC QPs preemptively schedule a NAK */ struct ib_other_headers *ohdr = NULL; u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */ - u16 lid = be16_to_cpu(rhdr->lrh[1]); + u16 lid = ib_get_dlid(rhdr); u32 qp_num; u32 rcv_flags = 0; @@ -396,7 +362,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, u16 rlid; u8 svc_type, sl, sc5; - sc5 = hdr2sc(rhdr, packet->rhf); + sc5 = hfi1_9B_get_sc5(rhdr, packet->rhf); sl = ibp->sc_to_sl[sc5]; lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK; @@ -414,7 +380,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, svc_type = IB_CC_SVCTYPE_UD; break; case IB_QPT_UC: - rlid = be16_to_cpu(rhdr->lrh[3]); + rlid = ib_get_slid(rhdr); rqpn = qp->remote_qpn; svc_type = IB_CC_SVCTYPE_UC; break; @@ -460,7 +426,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, struct ib_other_headers *ohdr = pkt->ohdr; struct ib_grh *grh = NULL; u32 rqpn = 0, bth1; - u16 rlid, dlid = be16_to_cpu(hdr->lrh[1]); + u16 rlid, dlid = ib_get_dlid(hdr); u8 sc, svc_type; bool is_mcast = false; @@ -471,19 +437,19 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_UD: - rlid = be16_to_cpu(hdr->lrh[3]); + rlid = ib_get_slid(hdr); rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; svc_type = IB_CC_SVCTYPE_UD; is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); break; case IB_QPT_UC: - rlid = qp->remote_ah_attr.dlid; + rlid = rdma_ah_get_dlid(&qp->remote_ah_attr); rqpn = qp->remote_qpn; svc_type = IB_CC_SVCTYPE_UC; break; case IB_QPT_RC: - rlid = qp->remote_ah_attr.dlid; + rlid = rdma_ah_get_dlid(&qp->remote_ah_attr); rqpn = qp->remote_qpn; svc_type = IB_CC_SVCTYPE_RC; break; @@ -491,16 +457,16 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, return; } - sc = hdr2sc(hdr, pkt->rhf); + sc = hfi1_9B_get_sc5(hdr, pkt->rhf); bth1 = be32_to_cpu(ohdr->bth[1]); - if (do_cnp && (bth1 & HFI1_FECN_SMASK)) { + if (do_cnp && (bth1 & IB_FECN_SMASK)) { u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]); return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh); } - if (!is_mcast && (bth1 & HFI1_BECN_SMASK)) { + if (!is_mcast && (bth1 & IB_BECN_SMASK)) { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); u32 lqpn = bth1 & RVT_QPN_MASK; u8 sl = ibp->sc_to_sl[sc]; @@ -621,8 +587,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) packet->hdr = hfi1_get_msgheader(dd, rhf_addr); hdr = packet->hdr; - - lnh = be16_to_cpu(hdr->lrh[0]) & 3; + lnh = ib_get_lnh(hdr); if (lnh == HFI1_LRH_BTH) { packet->ohdr = &hdr->u.oth; @@ -634,7 +599,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) } bth1 = be32_to_cpu(packet->ohdr->bth[1]); - is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK)); + is_ecn = !!(bth1 & (IB_FECN_SMASK | IB_BECN_SMASK)); if (!is_ecn) goto next; @@ -652,7 +617,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) rcu_read_unlock(); /* turn off BECN, FECN */ - bth1 &= ~(HFI1_FECN_SMASK | HFI1_BECN_SMASK); + bth1 &= ~(IB_FECN_SMASK | IB_BECN_SMASK); packet->ohdr->bth[1] = cpu_to_be32(bth1); next: update_ps_mdata(&mdata, rcd); @@ -872,20 +837,42 @@ bail: return last; } -static inline void set_all_nodma_rtail(struct hfi1_devdata *dd) +static inline void set_nodma_rtail(struct hfi1_devdata *dd, u8 ctxt) { int i; - for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++) + /* + * For dynamically allocated kernel contexts (like vnic) switch + * interrupt handler only for that context. Otherwise, switch + * interrupt handler for all statically allocated kernel contexts. + */ + if (ctxt >= dd->first_dyn_alloc_ctxt) { + dd->rcd[ctxt]->do_interrupt = + &handle_receive_interrupt_nodma_rtail; + return; + } + + for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) dd->rcd[i]->do_interrupt = &handle_receive_interrupt_nodma_rtail; } -static inline void set_all_dma_rtail(struct hfi1_devdata *dd) +static inline void set_dma_rtail(struct hfi1_devdata *dd, u8 ctxt) { int i; - for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++) + /* + * For dynamically allocated kernel contexts (like vnic) switch + * interrupt handler only for that context. Otherwise, switch + * interrupt handler for all statically allocated kernel contexts. + */ + if (ctxt >= dd->first_dyn_alloc_ctxt) { + dd->rcd[ctxt]->do_interrupt = + &handle_receive_interrupt_dma_rtail; + return; + } + + for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) dd->rcd[i]->do_interrupt = &handle_receive_interrupt_dma_rtail; } @@ -895,8 +882,13 @@ void set_all_slowpath(struct hfi1_devdata *dd) int i; /* HFI1_CTRL_CTXT must always use the slow path interrupt handler */ - for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++) - dd->rcd[i]->do_interrupt = &handle_receive_interrupt; + for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) { + struct hfi1_ctxtdata *rcd = dd->rcd[i]; + + if ((i < dd->first_dyn_alloc_ctxt) || + (rcd && rcd->sc && (rcd->sc->type == SC_KERNEL))) + rcd->do_interrupt = &handle_receive_interrupt; + } } static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, @@ -908,7 +900,8 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, packet->rhf_addr); u8 etype = rhf_rcv_type(packet->rhf); - if (etype == RHF_RCV_TYPE_IB && hdr2sc(hdr, packet->rhf) != 0xf) { + if (etype == RHF_RCV_TYPE_IB && + hfi1_9B_get_sc5(hdr, packet->rhf) != 0xf) { int hwstate = read_logical_state(dd); if (hwstate != LSTATE_ACTIVE) { @@ -1006,7 +999,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) last = RCV_PKT_DONE; if (needset) { dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n"); - set_all_nodma_rtail(dd); + set_nodma_rtail(dd, rcd->ctxt); needset = 0; } } else { @@ -1028,7 +1021,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) if (needset) { dd_dev_info(dd, "Switching to DMA_RTAIL\n"); - set_all_dma_rtail(dd); + set_dma_rtail(dd, rcd->ctxt); needset = 0; } } @@ -1077,10 +1070,10 @@ void receive_interrupt_work(struct work_struct *work) set_link_state(ppd, HLS_UP_ACTIVE); /* - * Interrupt all kernel contexts that could have had an - * interrupt during auto activation. + * Interrupt all statically allocated kernel contexts that could + * have had an interrupt during auto activation. */ - for (i = HFI1_CTRL_CTXT; i < dd->first_user_ctxt; i++) + for (i = HFI1_CTRL_CTXT; i < dd->first_dyn_alloc_ctxt; i++) force_recv_intr(dd->rcd[i]); } @@ -1294,8 +1287,9 @@ int hfi1_reset_device(int unit) spin_lock_irqsave(&dd->uctxt_lock, flags); if (dd->rcd) - for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) { - if (!dd->rcd[i] || !dd->rcd[i]->cnt) + for (i = dd->first_dyn_alloc_ctxt; + i < dd->num_rcv_contexts; i++) { + if (!dd->rcd[i]) continue; spin_unlock_irqrestore(&dd->uctxt_lock, flags); ret = -EBUSY; @@ -1354,6 +1348,9 @@ void handle_eflags(struct hfi1_packet *packet) */ int process_receive_ib(struct hfi1_packet *packet) { + if (unlikely(hfi1_dbg_fault_packet(packet))) + return RHF_RCV_CONTINUE; + trace_hfi1_rcvhdr(packet->rcd->ppd->dd, packet->rcd->ctxt, rhf_err_flags(packet->rhf), @@ -1363,6 +1360,11 @@ int process_receive_ib(struct hfi1_packet *packet) packet->updegr, rhf_egr_index(packet->rhf)); + if (unlikely( + (hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) && + (packet->rhf & RHF_DC_ERR)))) + return RHF_RCV_CONTINUE; + if (unlikely(rhf_err_flags(packet->rhf))) { handle_eflags(packet); return RHF_RCV_CONTINUE; @@ -1372,15 +1374,31 @@ int process_receive_ib(struct hfi1_packet *packet) return RHF_RCV_CONTINUE; } +static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet) +{ + /* Packet received in VNIC context via RSM */ + if (packet->rcd->is_vnic) + return true; + + if ((HFI1_GET_L2_TYPE(packet->ebuf) == OPA_VNIC_L2_TYPE) && + (HFI1_GET_L4_TYPE(packet->ebuf) == OPA_VNIC_L4_ETHR)) + return true; + + return false; +} + int process_receive_bypass(struct hfi1_packet *packet) { struct hfi1_devdata *dd = packet->rcd->dd; - if (unlikely(rhf_err_flags(packet->rhf))) + if (unlikely(rhf_err_flags(packet->rhf))) { handle_eflags(packet); + } else if (hfi1_is_vnic_packet(packet)) { + hfi1_vnic_bypass_rcv(packet); + return RHF_RCV_CONTINUE; + } - dd_dev_err(dd, - "Bypass packets are not supported in normal operation. Dropping\n"); + dd_dev_err(dd, "Unsupported bypass packet. Dropping\n"); incr_cntr64(&dd->sw_rcv_bypass_packet_errors); if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) { u64 *flits = packet->ebuf; @@ -1398,6 +1416,12 @@ int process_receive_bypass(struct hfi1_packet *packet) int process_receive_error(struct hfi1_packet *packet) { + /* KHdrHCRCErr -- KDETH packet with a bad HCRC */ + if (unlikely( + hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) && + rhf_rcv_type_err(packet->rhf) == 3)) + return RHF_RCV_CONTINUE; + handle_eflags(packet); if (unlikely(rhf_err_flags(packet->rhf))) @@ -1409,6 +1433,8 @@ int process_receive_error(struct hfi1_packet *packet) int kdeth_process_expected(struct hfi1_packet *packet) { + if (unlikely(hfi1_dbg_fault_packet(packet))) + return RHF_RCV_CONTINUE; if (unlikely(rhf_err_flags(packet->rhf))) handle_eflags(packet); @@ -1421,6 +1447,8 @@ int kdeth_process_eager(struct hfi1_packet *packet) { if (unlikely(rhf_err_flags(packet->rhf))) handle_eflags(packet); + if (unlikely(hfi1_dbg_fault_packet(packet))) + return RHF_RCV_CONTINUE; dd_dev_err(packet->rcd->dd, "Unhandled eager packet received. Dropping.\n"); diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index f78c739b330a..3158128d57e8 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -49,6 +49,7 @@ #include <linux/vmalloc.h> #include <linux/io.h> #include <linux/sched/mm.h> +#include <linux/bitmap.h> #include <rdma/ib.h> @@ -70,30 +71,37 @@ /* * File operation functions */ -static int hfi1_file_open(struct inode *, struct file *); -static int hfi1_file_close(struct inode *, struct file *); -static ssize_t hfi1_write_iter(struct kiocb *, struct iov_iter *); -static unsigned int hfi1_poll(struct file *, struct poll_table_struct *); -static int hfi1_file_mmap(struct file *, struct vm_area_struct *); - -static u64 kvirt_to_phys(void *); -static int assign_ctxt(struct file *, struct hfi1_user_info *); -static int init_subctxts(struct hfi1_ctxtdata *, const struct hfi1_user_info *); -static int user_init(struct file *); -static int get_ctxt_info(struct file *, void __user *, __u32); -static int get_base_info(struct file *, void __user *, __u32); -static int setup_ctxt(struct file *); -static int setup_subctxt(struct hfi1_ctxtdata *); -static int get_user_context(struct file *, struct hfi1_user_info *, int); -static int find_shared_ctxt(struct file *, const struct hfi1_user_info *); -static int allocate_ctxt(struct file *, struct hfi1_devdata *, - struct hfi1_user_info *); -static unsigned int poll_urgent(struct file *, struct poll_table_struct *); -static unsigned int poll_next(struct file *, struct poll_table_struct *); -static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long); -static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16); -static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int); -static int vma_fault(struct vm_fault *); +static int hfi1_file_open(struct inode *inode, struct file *fp); +static int hfi1_file_close(struct inode *inode, struct file *fp); +static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from); +static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt); +static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma); + +static u64 kvirt_to_phys(void *addr); +static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo); +static int init_subctxts(struct hfi1_ctxtdata *uctxt, + const struct hfi1_user_info *uinfo); +static int init_user_ctxt(struct hfi1_filedata *fd); +static void user_init(struct hfi1_ctxtdata *uctxt); +static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, + __u32 len); +static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, + __u32 len); +static int setup_base_ctxt(struct hfi1_filedata *fd); +static int setup_subctxt(struct hfi1_ctxtdata *uctxt); + +static int find_sub_ctxt(struct hfi1_filedata *fd, + const struct hfi1_user_info *uinfo); +static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, + struct hfi1_user_info *uinfo); +static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt); +static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt); +static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, + unsigned long events); +static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey); +static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, + int start_stop); +static int vma_fault(struct vm_fault *vmf); static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, unsigned long arg); @@ -173,6 +181,9 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) struct hfi1_devdata, user_cdev); + if (!((dd->flags & HFI1_PRESENT) && dd->kregbase)) + return -EINVAL; + if (!atomic_inc_not_zero(&dd->user_refcount)) return -ENXIO; @@ -187,6 +198,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) fd->rec_cpu_num = -1; /* no cpu affinity by default */ fd->mm = current->mm; mmgrab(fd->mm); + fd->dd = dd; fp->private_data = fd; } else { fp->private_data = NULL; @@ -229,20 +241,14 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, sizeof(uinfo))) return -EFAULT; - ret = assign_ctxt(fp, &uinfo); - if (ret < 0) - return ret; - ret = setup_ctxt(fp); - if (ret) - return ret; - ret = user_init(fp); + ret = assign_ctxt(fd, &uinfo); break; case HFI1_IOCTL_CTXT_INFO: - ret = get_ctxt_info(fp, (void __user *)(unsigned long)arg, + ret = get_ctxt_info(fd, (void __user *)(unsigned long)arg, sizeof(struct hfi1_ctxt_info)); break; case HFI1_IOCTL_USER_INFO: - ret = get_base_info(fp, (void __user *)(unsigned long)arg, + ret = get_base_info(fd, (void __user *)(unsigned long)arg, sizeof(struct hfi1_base_info)); break; case HFI1_IOCTL_CREDIT_UPD: @@ -256,7 +262,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, sizeof(tinfo))) return -EFAULT; - ret = hfi1_user_exp_rcv_setup(fp, &tinfo); + ret = hfi1_user_exp_rcv_setup(fd, &tinfo); if (!ret) { /* * Copy the number of tidlist entries we used @@ -278,7 +284,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, sizeof(tinfo))) return -EFAULT; - ret = hfi1_user_exp_rcv_clear(fp, &tinfo); + ret = hfi1_user_exp_rcv_clear(fd, &tinfo); if (ret) break; addr = arg + offsetof(struct hfi1_tid_info, tidcnt); @@ -293,7 +299,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, sizeof(tinfo))) return -EFAULT; - ret = hfi1_user_exp_rcv_invalid(fp, &tinfo); + ret = hfi1_user_exp_rcv_invalid(fd, &tinfo); if (ret) break; addr = arg + offsetof(struct hfi1_tid_info, tidcnt); @@ -430,7 +436,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) unsigned long count = 0; ret = hfi1_user_sdma_process_request( - kiocb->ki_filp, (struct iovec *)(from->iov + done), + fd, (struct iovec *)(from->iov + done), dim, &count); if (ret) { reqs = ret; @@ -586,8 +592,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) * knows where it's own bitmap is within the page. */ memaddr = (unsigned long)(dd->events + - ((uctxt->ctxt - dd->first_user_ctxt) * - HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK; + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * + HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK; memlen = PAGE_SIZE; /* * v3.7 removes VM_RESERVED but the effect is kept by @@ -597,6 +603,10 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) vmf = 1; break; case STATUS: + if (flags & (unsigned long)(VM_WRITE | VM_EXEC)) { + ret = -EPERM; + goto done; + } memaddr = kvirt_to_phys((void *)dd->status); memlen = PAGE_SIZE; flags |= VM_IO | VM_DONTEXPAND; @@ -752,16 +762,19 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) /* release the cpu */ hfi1_put_proc_affinity(fdata->rec_cpu_num); + /* clean up rcv side */ + hfi1_user_exp_rcv_free(fdata); + /* * Clear any left over, unhandled events so the next process that * gets this context doesn't get confused. */ - ev = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) * + ev = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * HFI1_MAX_SHARED_CTXTS) + fdata->subctxt; *ev = 0; - if (--uctxt->cnt) { - uctxt->active_slaves &= ~(1 << fdata->subctxt); + __clear_bit(fdata->subctxt, uctxt->in_use_ctxts); + if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) { mutex_unlock(&hfi1_mutex); goto done; } @@ -791,8 +804,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) dd->rcd[uctxt->ctxt] = NULL; - hfi1_user_exp_rcv_free(fdata); - hfi1_clear_ctxt_pkey(dd, uctxt->ctxt); + hfi1_user_exp_rcv_grp_free(uctxt); + hfi1_clear_ctxt_pkey(dd, uctxt); uctxt->rcvwait_to = 0; uctxt->piowait_to = 0; @@ -832,121 +845,135 @@ static u64 kvirt_to_phys(void *addr) return paddr; } -static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo) +static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) { - int i_minor, ret = 0; + int ret; unsigned int swmajor, swminor; swmajor = uinfo->userversion >> 16; - if (swmajor != HFI1_USER_SWMAJOR) { - ret = -ENODEV; - goto done; - } + if (swmajor != HFI1_USER_SWMAJOR) + return -ENODEV; swminor = uinfo->userversion & 0xffff; mutex_lock(&hfi1_mutex); - /* First, lets check if we need to setup a shared context? */ + /* + * Get a sub context if necessary. + * ret < 0 error, 0 no context, 1 sub-context found + */ + ret = 0; if (uinfo->subctxt_cnt) { - struct hfi1_filedata *fd = fp->private_data; - - ret = find_shared_ctxt(fp, uinfo); - if (ret < 0) - goto done_unlock; - if (ret) { + ret = find_sub_ctxt(fd, uinfo); + if (ret > 0) fd->rec_cpu_num = hfi1_get_proc_affinity(fd->uctxt->numa_id); - } } /* - * We execute the following block if we couldn't find a - * shared context or if context sharing is not required. + * Allocate a base context if context sharing is not required or we + * couldn't find a sub context. */ - if (!ret) { - i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE; - ret = get_user_context(fp, uinfo, i_minor); - } -done_unlock: + if (!ret) + ret = allocate_ctxt(fd, fd->dd, uinfo); + mutex_unlock(&hfi1_mutex); -done: + + /* Depending on the context type, do the appropriate init */ + if (ret > 0) { + /* + * sub-context info can only be set up after the base + * context has been completed. + */ + ret = wait_event_interruptible(fd->uctxt->wait, !test_bit( + HFI1_CTXT_BASE_UNINIT, + &fd->uctxt->event_flags)); + if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) { + clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); + return -ENOMEM; + } + /* The only thing a sub context needs is the user_xxx stuff */ + if (!ret) + ret = init_user_ctxt(fd); + + if (ret) + clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); + } else if (!ret) { + ret = setup_base_ctxt(fd); + if (fd->uctxt->subctxt_cnt) { + /* If there is an error, set the failed bit. */ + if (ret) + set_bit(HFI1_CTXT_BASE_FAILED, + &fd->uctxt->event_flags); + /* + * Base context is done, notify anybody using a + * sub-context that is waiting for this completion + */ + clear_bit(HFI1_CTXT_BASE_UNINIT, + &fd->uctxt->event_flags); + wake_up(&fd->uctxt->wait); + } + } + return ret; } -static int get_user_context(struct file *fp, struct hfi1_user_info *uinfo, - int devno) +/* + * The hfi1_mutex must be held when this function is called. It is + * necessary to ensure serialized access to the bitmask in_use_ctxts. + */ +static int find_sub_ctxt(struct hfi1_filedata *fd, + const struct hfi1_user_info *uinfo) { - struct hfi1_devdata *dd = NULL; - int devmax, npresent, nup; + int i; + struct hfi1_devdata *dd = fd->dd; + u16 subctxt; - devmax = hfi1_count_units(&npresent, &nup); - if (!npresent) - return -ENXIO; + for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) { + struct hfi1_ctxtdata *uctxt = dd->rcd[i]; - if (!nup) - return -ENETDOWN; + /* Skip ctxts which are not yet open */ + if (!uctxt || + bitmap_empty(uctxt->in_use_ctxts, + HFI1_MAX_SHARED_CTXTS)) + continue; - dd = hfi1_lookup(devno); - if (!dd) - return -ENODEV; - else if (!dd->freectxts) - return -EBUSY; + /* Skip dynamically allocted kernel contexts */ + if (uctxt->sc && (uctxt->sc->type == SC_KERNEL)) + continue; - return allocate_ctxt(fp, dd, uinfo); -} + /* Skip ctxt if it doesn't match the requested one */ + if (memcmp(uctxt->uuid, uinfo->uuid, + sizeof(uctxt->uuid)) || + uctxt->jkey != generate_jkey(current_uid()) || + uctxt->subctxt_id != uinfo->subctxt_id || + uctxt->subctxt_cnt != uinfo->subctxt_cnt) + continue; -static int find_shared_ctxt(struct file *fp, - const struct hfi1_user_info *uinfo) -{ - int devmax, ndev, i; - int ret = 0; - struct hfi1_filedata *fd = fp->private_data; + /* Verify the sharing process matches the master */ + if (uctxt->userversion != uinfo->userversion) + return -EINVAL; - devmax = hfi1_count_units(NULL, NULL); + /* Find an unused context */ + subctxt = find_first_zero_bit(uctxt->in_use_ctxts, + HFI1_MAX_SHARED_CTXTS); + if (subctxt >= uctxt->subctxt_cnt) + return -EBUSY; - for (ndev = 0; ndev < devmax; ndev++) { - struct hfi1_devdata *dd = hfi1_lookup(ndev); + fd->uctxt = uctxt; + fd->subctxt = subctxt; + __set_bit(fd->subctxt, uctxt->in_use_ctxts); - if (!(dd && (dd->flags & HFI1_PRESENT) && dd->kregbase)) - continue; - for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) { - struct hfi1_ctxtdata *uctxt = dd->rcd[i]; - - /* Skip ctxts which are not yet open */ - if (!uctxt || !uctxt->cnt) - continue; - /* Skip ctxt if it doesn't match the requested one */ - if (memcmp(uctxt->uuid, uinfo->uuid, - sizeof(uctxt->uuid)) || - uctxt->jkey != generate_jkey(current_uid()) || - uctxt->subctxt_id != uinfo->subctxt_id || - uctxt->subctxt_cnt != uinfo->subctxt_cnt) - continue; - - /* Verify the sharing process matches the master */ - if (uctxt->userversion != uinfo->userversion || - uctxt->cnt >= uctxt->subctxt_cnt) { - ret = -EINVAL; - goto done; - } - fd->uctxt = uctxt; - fd->subctxt = uctxt->cnt++; - uctxt->active_slaves |= 1 << fd->subctxt; - ret = 1; - goto done; - } + return 1; } -done: - return ret; + return 0; } -static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, +static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, struct hfi1_user_info *uinfo) { - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt; - unsigned ctxt; + unsigned int ctxt; int ret, numa; if (dd->flags & HFI1_FROZEN) { @@ -960,7 +987,16 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, return -EIO; } - for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts; ctxt++) + /* + * This check is sort of redundant to the next EBUSY error. It would + * also indicate an inconsistancy in the driver if this value was + * zero, but there were still contexts available. + */ + if (!dd->freectxts) + return -EBUSY; + + for (ctxt = dd->first_dyn_alloc_ctxt; + ctxt < dd->num_rcv_contexts; ctxt++) if (!dd->rcd[ctxt]) break; @@ -1002,12 +1038,12 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, goto ctxdata_free; /* - * Setup shared context resources if the user-level has requested - * shared contexts and this is the 'master' process. + * Setup sub context resources if the user-level has requested + * sub contexts. * This has to be done here so the rest of the sub-contexts find the * proper master. */ - if (uinfo->subctxt_cnt && !fd->subctxt) { + if (uinfo->subctxt_cnt) { ret = init_subctxts(uctxt, uinfo); /* * On error, we don't need to disable and de-allocate the @@ -1044,7 +1080,7 @@ ctxdata_free: static int init_subctxts(struct hfi1_ctxtdata *uctxt, const struct hfi1_user_info *uinfo) { - unsigned num_subctxts; + u16 num_subctxts; num_subctxts = uinfo->subctxt_cnt; if (num_subctxts > HFI1_MAX_SHARED_CTXTS) @@ -1052,9 +1088,8 @@ static int init_subctxts(struct hfi1_ctxtdata *uctxt, uctxt->subctxt_cnt = uinfo->subctxt_cnt; uctxt->subctxt_id = uinfo->subctxt_id; - uctxt->active_slaves = 1; uctxt->redirect_seq_cnt = 1; - set_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags); + set_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); return 0; } @@ -1062,13 +1097,12 @@ static int init_subctxts(struct hfi1_ctxtdata *uctxt, static int setup_subctxt(struct hfi1_ctxtdata *uctxt) { int ret = 0; - unsigned num_subctxts = uctxt->subctxt_cnt; + u16 num_subctxts = uctxt->subctxt_cnt; uctxt->subctxt_uregbase = vmalloc_user(PAGE_SIZE); - if (!uctxt->subctxt_uregbase) { - ret = -ENOMEM; - goto bail; - } + if (!uctxt->subctxt_uregbase) + return -ENOMEM; + /* We can take the size of the RcvHdr Queue from the master */ uctxt->subctxt_rcvhdr_base = vmalloc_user(uctxt->rcvhdrq_size * num_subctxts); @@ -1083,25 +1117,22 @@ static int setup_subctxt(struct hfi1_ctxtdata *uctxt) ret = -ENOMEM; goto bail_rhdr; } - goto bail; + + return 0; + bail_rhdr: vfree(uctxt->subctxt_rcvhdr_base); + uctxt->subctxt_rcvhdr_base = NULL; bail_ureg: vfree(uctxt->subctxt_uregbase); uctxt->subctxt_uregbase = NULL; -bail: + return ret; } -static int user_init(struct file *fp) +static void user_init(struct hfi1_ctxtdata *uctxt) { unsigned int rcvctrl_ops = 0; - struct hfi1_filedata *fd = fp->private_data; - struct hfi1_ctxtdata *uctxt = fd->uctxt; - - /* make sure that the context has already been setup */ - if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags)) - return -EFAULT; /* initialize poll variables... */ uctxt->urgent = 0; @@ -1149,20 +1180,12 @@ static int user_init(struct file *fp) else rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS; hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt); - - /* Notify any waiting slaves */ - if (uctxt->subctxt_cnt) { - clear_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags); - wake_up(&uctxt->wait); - } - - return 0; } -static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len) +static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, + __u32 len) { struct hfi1_ctxt_info cinfo; - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; int ret = 0; @@ -1200,75 +1223,71 @@ static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len) return ret; } -static int setup_ctxt(struct file *fp) +static int init_user_ctxt(struct hfi1_filedata *fd) +{ + struct hfi1_ctxtdata *uctxt = fd->uctxt; + int ret; + + ret = hfi1_user_sdma_alloc_queues(uctxt, fd); + if (ret) + return ret; + + ret = hfi1_user_exp_rcv_init(fd); + + return ret; +} + +static int setup_base_ctxt(struct hfi1_filedata *fd) { - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; int ret = 0; - /* - * Context should be set up only once, including allocation and - * programming of eager buffers. This is done if context sharing - * is not requested or by the master process. - */ - if (!uctxt->subctxt_cnt || !fd->subctxt) { - ret = hfi1_init_ctxt(uctxt->sc); - if (ret) - goto done; + hfi1_init_ctxt(uctxt->sc); - /* Now allocate the RcvHdr queue and eager buffers. */ - ret = hfi1_create_rcvhdrq(dd, uctxt); - if (ret) - goto done; - ret = hfi1_setup_eagerbufs(uctxt); - if (ret) - goto done; - if (uctxt->subctxt_cnt && !fd->subctxt) { - ret = setup_subctxt(uctxt); - if (ret) - goto done; - } - } else { - ret = wait_event_interruptible(uctxt->wait, !test_bit( - HFI1_CTXT_MASTER_UNINIT, - &uctxt->event_flags)); - if (ret) - goto done; - } + /* Now allocate the RcvHdr queue and eager buffers. */ + ret = hfi1_create_rcvhdrq(dd, uctxt); + if (ret) + return ret; - ret = hfi1_user_sdma_alloc_queues(uctxt, fp); + ret = hfi1_setup_eagerbufs(uctxt); if (ret) - goto done; - /* - * Expected receive has to be setup for all processes (including - * shared contexts). However, it has to be done after the master - * context has been fully configured as it depends on the - * eager/expected split of the RcvArray entries. - * Setting it up here ensures that the subcontexts will be waiting - * (due to the above wait_event_interruptible() until the master - * is setup. - */ - ret = hfi1_user_exp_rcv_init(fp); + goto setup_failed; + + /* If sub-contexts are enabled, do the appropriate setup */ + if (uctxt->subctxt_cnt) + ret = setup_subctxt(uctxt); if (ret) - goto done; + goto setup_failed; - set_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags); -done: + ret = hfi1_user_exp_rcv_grp_init(fd); + if (ret) + goto setup_failed; + + ret = init_user_ctxt(fd); + if (ret) + goto setup_failed; + + user_init(uctxt); + + return 0; + +setup_failed: + hfi1_free_ctxtdata(dd, uctxt); return ret; } -static int get_base_info(struct file *fp, void __user *ubase, __u32 len) +static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, + __u32 len) { struct hfi1_base_info binfo; - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; ssize_t sz; unsigned offset; int ret = 0; - trace_hfi1_uctxtdata(uctxt->dd, uctxt); + trace_hfi1_uctxtdata(uctxt->dd, uctxt, fd->subctxt); memset(&binfo, 0, sizeof(binfo)); binfo.hw_version = dd->revision; @@ -1306,7 +1325,7 @@ static int get_base_info(struct file *fp, void __user *ubase, __u32 len) */ binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt, fd->subctxt, 0); - offset = offset_in_page((((uctxt->ctxt - dd->first_user_ctxt) * + offset = offset_in_page((((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * HFI1_MAX_SHARED_CTXTS) + fd->subctxt) * sizeof(*dd->events)); binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt, @@ -1400,12 +1419,12 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) } spin_lock_irqsave(&dd->uctxt_lock, flags); - for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts; + for (ctxt = dd->first_dyn_alloc_ctxt; ctxt < dd->num_rcv_contexts; ctxt++) { uctxt = dd->rcd[ctxt]; if (uctxt) { unsigned long *evs = dd->events + - (uctxt->ctxt - dd->first_user_ctxt) * + (uctxt->ctxt - dd->first_dyn_alloc_ctxt) * HFI1_MAX_SHARED_CTXTS; int i; /* @@ -1432,7 +1451,7 @@ done: * overflow conditions. start_stop==1 re-enables, to be used to * re-init the software copy of the head register */ -static int manage_rcvq(struct hfi1_ctxtdata *uctxt, unsigned subctxt, +static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, int start_stop) { struct hfi1_devdata *dd = uctxt->dd; @@ -1467,7 +1486,7 @@ bail: * User process then performs actions appropriate to bit having been * set, if desired, and checks again in future. */ -static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt, +static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, unsigned long events) { int i; @@ -1477,7 +1496,7 @@ static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt, if (!dd->events) return 0; - evs = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) * + evs = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * HFI1_MAX_SHARED_CTXTS) + subctxt; for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) { @@ -1488,8 +1507,7 @@ static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt, return 0; } -static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned subctxt, - u16 pkey) +static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey) { int ret = -ENOENT, i, intable = 0; struct hfi1_pportdata *ppd = uctxt->ppd; diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 0dd50cdb039a..4042c11b2742 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -1004,7 +1004,9 @@ static int load_8051_firmware(struct hfi1_devdata *dd, { u64 reg; int ret; - u8 ver_a, ver_b; + u8 ver_major; + u8 ver_minor; + u8 ver_patch; /* * DC Reset sequence @@ -1073,10 +1075,10 @@ static int load_8051_firmware(struct hfi1_devdata *dd, return -ETIMEDOUT; } - read_misc_status(dd, &ver_a, &ver_b); - dd_dev_info(dd, "8051 firmware version %d.%d\n", - (int)ver_b, (int)ver_a); - dd->dc8051_ver = dc8051_ver(ver_b, ver_a); + read_misc_status(dd, &ver_major, &ver_minor, &ver_patch); + dd_dev_info(dd, "8051 firmware version %d.%d.%d\n", + (int)ver_major, (int)ver_minor, (int)ver_patch); + dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch); return 0; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 0808e3c3ba39..da322e6668cc 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1,7 +1,7 @@ #ifndef _HFI1_KERNEL_H #define _HFI1_KERNEL_H /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -54,6 +54,7 @@ #include <linux/list.h> #include <linux/scatterlist.h> #include <linux/slab.h> +#include <linux/idr.h> #include <linux/io.h> #include <linux/fs.h> #include <linux/completion.h> @@ -66,6 +67,7 @@ #include <linux/i2c-algo-bit.h> #include <rdma/ib_hdrs.h> #include <linux/rhashtable.h> +#include <linux/netdevice.h> #include <rdma/rdma_vt.h> #include "chip_registers.h" @@ -194,12 +196,6 @@ struct hfi1_ctxtdata { void *rcvhdrq; /* kernel virtual address where hdrqtail is updated */ volatile __le64 *rcvhdrtail_kvaddr; - /* - * Shared page for kernel to signal user processes that send buffers - * need disarming. The process should call HFI1_CMD_DISARM_BUFS - * or HFI1_CMD_ACK_EVENT with IPATH_EVENT_DISARM_BUFS set. - */ - unsigned long *user_event_mask; /* when waiting for rcv or pioavail */ wait_queue_head_t wait; /* rcvhdrq size (for freeing) */ @@ -222,13 +218,12 @@ struct hfi1_ctxtdata { * (ignoring forks, dup, etc. for now) */ int cnt; + /* Device context index */ + unsigned ctxt; /* - * how much space to leave at start of eager TID entries for - * protocol use, on each TID + * non-zero if ctxt can be shared, and defines the maximum number of + * sub-contexts for this device context. */ - /* instead of calculating it */ - unsigned ctxt; - /* non-zero if ctxt is being shared. */ u16 subctxt_cnt; /* non-zero if ctxt is being shared. */ u16 subctxt_id; @@ -278,16 +273,18 @@ struct hfi1_ctxtdata { struct hfi1_devdata *dd; /* so functions that need physical port can get it easily */ struct hfi1_pportdata *ppd; + /* associated msix interrupt */ + u32 msix_intr; /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */ void *subctxt_uregbase; /* An array of pages for the eager receive buffers * N */ void *subctxt_rcvegrbuf; /* An array of pages for the eager header queue entries * N */ void *subctxt_rcvhdr_base; + /* Bitmask of in use context(s) */ + DECLARE_BITMAP(in_use_ctxts, HFI1_MAX_SHARED_CTXTS); /* The version of the library which opened this ctxt */ u32 userversion; - /* Bitmask of active slaves */ - u32 active_slaves; /* Type of packets or conditions we want to poll for */ u16 poll_type; /* receive packet sequence counter */ @@ -337,6 +334,12 @@ struct hfi1_ctxtdata { * packets with the wrong interrupt handler. */ int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded); + + /* Indicates that this is vnic context */ + bool is_vnic; + + /* vnic queue index this context is mapped to */ + u8 vnic_q_idx; }; /* @@ -474,7 +477,7 @@ struct rvt_sge_state; #define HFI1_PART_ENFORCE_OUT 0x2 /* how often we check for synthetic counter wrap around */ -#define SYNTH_CNT_TIME 2 +#define SYNTH_CNT_TIME 3 /* Counter flags */ #define CNTR_NORMAL 0x0 /* Normal counters, just read register */ @@ -808,6 +811,32 @@ struct hfi1_asic_data { struct hfi1_i2c_bus *i2c_bus1; }; +/* sizes for both the QP and RSM map tables */ +#define NUM_MAP_ENTRIES 256 +#define NUM_MAP_REGS 32 + +/* + * Number of VNIC contexts used. Ensure it is less than or equal to + * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE). + */ +#define HFI1_NUM_VNIC_CTXT 8 + +/* Number of VNIC RSM entries */ +#define NUM_VNIC_MAP_ENTRIES 8 + +/* Virtual NIC information */ +struct hfi1_vnic_data { + struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT]; + struct kmem_cache *txreq_cache; + u8 num_vports; + struct idr vesw_idr; + u8 rmt_start; + u8 num_ctxt; + u32 msix_idx; +}; + +struct hfi1_vnic_vport_info; + /* device data struct now contains only "general per-device" info. * fields related to a physical IB port are in a hfi1_pportdata struct. */ @@ -926,8 +955,9 @@ struct hfi1_devdata { spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */ /* around rcd and (user ctxts) ctxt_cnt use (intr vs free) */ spinlock_t uctxt_lock; /* rcd and user context changes */ - /* exclusive access to 8051 */ - spinlock_t dc8051_lock; + struct mutex dc8051_lock; /* exclusive access to 8051 */ + struct workqueue_struct *update_cntr_wq; + struct work_struct update_cntr_work; /* exclusive access to 8051 memory */ spinlock_t dc8051_memlock; int dc8051_timed_out; /* remember if the 8051 timed out */ @@ -1020,7 +1050,7 @@ struct hfi1_devdata { u8 qos_shift; u16 irev; /* implementation revision */ - u16 dc8051_ver; /* 8051 firmware version */ + u32 dc8051_ver; /* 8051 firmware version */ spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */ struct platform_config platform_config; @@ -1031,6 +1061,7 @@ struct hfi1_devdata { /* MSI-X information */ struct hfi1_msix_entry *msix_entries; u32 num_msix_entries; + u32 first_dyn_msix_idx; /* INTx information */ u32 requested_intx_irq; /* did we request one? */ @@ -1115,6 +1146,9 @@ struct hfi1_devdata { send_routine process_dma_send; void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, const void *from, size_t count); + int (*process_vnic_dma_send)(struct hfi1_devdata *dd, u8 q_idx, + struct hfi1_vnic_vport_info *vinfo, + struct sk_buff *skb, u64 pbc, u8 plen); /* hfi1_pportdata, points to array of (physical) port-specific * data structs, indexed by pidx (0..n-1) */ @@ -1126,8 +1160,8 @@ struct hfi1_devdata { u16 flags; /* Number of physical ports available */ u8 num_pports; - /* Lowest context number which can be used by user processes */ - u8 first_user_ctxt; + /* Lowest context number which can be used by user processes or VNIC */ + u8 first_dyn_alloc_ctxt; /* adding a new field here would make it part of this cacheline */ /* seqlock for sc2vl */ @@ -1167,15 +1201,24 @@ struct hfi1_devdata { bool eprom_available; /* true if EPROM is available for this device */ bool aspm_supported; /* Does HW support ASPM */ bool aspm_enabled; /* ASPM state: enabled/disabled */ - struct rhashtable sdma_rht; + struct rhashtable *sdma_rht; struct kobject kobj; + + /* vnic data */ + struct hfi1_vnic_data vnic; }; +static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare) +{ + return (dd->vnic.rmt_start + spare) > NUM_MAP_ENTRIES; +} + /* 8051 firmware version helper */ -#define dc8051_ver(a, b) ((a) << 8 | (b)) -#define dc8051_ver_maj(a) ((a & 0xff00) >> 8) -#define dc8051_ver_min(a) (a & 0x00ff) +#define dc8051_ver(a, b, c) ((a) << 16 | (b) << 8 | (c)) +#define dc8051_ver_maj(a) (((a) & 0xff0000) >> 16) +#define dc8051_ver_min(a) (((a) & 0x00ff00) >> 8) +#define dc8051_ver_patch(a) ((a) & 0x0000ff) /* f_put_tid types */ #define PT_EXPECTED 0 @@ -1188,10 +1231,11 @@ struct mmu_rb_handler; /* Private data for file operations */ struct hfi1_filedata { + struct hfi1_devdata *dd; struct hfi1_ctxtdata *uctxt; - unsigned subctxt; struct hfi1_user_sdma_comp_q *cq; struct hfi1_user_sdma_pkt_q *pq; + u16 subctxt; /* for cpu affinity; -1 if none */ int rec_cpu_num; u32 tid_n_pinned; @@ -1213,28 +1257,31 @@ struct hfi1_devdata *hfi1_lookup(int unit); extern u32 hfi1_cpulist_count; extern unsigned long *hfi1_cpulist; -int hfi1_init(struct hfi1_devdata *, int); -int hfi1_count_units(int *npresentp, int *nupp); +int hfi1_init(struct hfi1_devdata *dd, int reinit); int hfi1_count_active_units(void); -int hfi1_diag_add(struct hfi1_devdata *); -void hfi1_diag_remove(struct hfi1_devdata *); +int hfi1_diag_add(struct hfi1_devdata *dd); +void hfi1_diag_remove(struct hfi1_devdata *dd); void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup); void handle_user_interrupt(struct hfi1_ctxtdata *rcd); -int hfi1_create_rcvhdrq(struct hfi1_devdata *, struct hfi1_ctxtdata *); -int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *); +int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); +int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd); int hfi1_create_ctxts(struct hfi1_devdata *dd); -struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *, u32, int); -void hfi1_init_pportdata(struct pci_dev *, struct hfi1_pportdata *, - struct hfi1_devdata *, u8, u8); -void hfi1_free_ctxtdata(struct hfi1_devdata *, struct hfi1_ctxtdata *); - -int handle_receive_interrupt(struct hfi1_ctxtdata *, int); -int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int); -int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int); +struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, + int numa); +void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, + struct hfi1_devdata *dd, u8 hw_pidx, u8 port); +void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); + +int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread); +int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread); +int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread); void set_all_slowpath(struct hfi1_devdata *dd); +void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd); +void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd); +void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd); extern const struct pci_device_id hfi1_pci_tbl[]; @@ -1254,16 +1301,24 @@ int hfi1_reset_device(int); /* return the driver's idea of the logical OPA port state */ static inline u32 driver_lstate(struct hfi1_pportdata *ppd) { - return ppd->lstate; /* use the cached value */ + /* + * The driver does some processing from the time the logical + * link state is at INIT to the time the SM can be notified + * as such. Return IB_PORT_DOWN until the software state + * is ready. + */ + if (ppd->lstate == IB_PORT_INIT && !(ppd->host_link_state & HLS_UP)) + return IB_PORT_DOWN; + else + return ppd->lstate; } void receive_interrupt_work(struct work_struct *work); /* extract service channel from header and rhf */ -static inline int hdr2sc(struct ib_header *hdr, u64 rhf) +static inline int hfi1_9B_get_sc5(struct ib_header *hdr, u64 rhf) { - return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) | - ((!!(rhf_dc_info(rhf))) << 4); + return ib_get_sc(hdr) | ((!!(rhf_dc_info(rhf))) << 4); } #define HFI1_JKEY_WIDTH 16 @@ -1519,7 +1574,7 @@ bad: u32 lrh_max_header_bytes(struct hfi1_devdata *dd); int mtu_to_enum(u32 mtu, int default_if_bad); -u16 enum_to_mtu(int); +u16 enum_to_mtu(int mtu); static inline int valid_ib_mtu(unsigned int mtu) { return mtu == 256 || mtu == 512 || @@ -1533,15 +1588,15 @@ static inline int valid_opa_max_mtu(unsigned int mtu) (valid_ib_mtu(mtu) || mtu == 8192 || mtu == 10240); } -int set_mtu(struct hfi1_pportdata *); +int set_mtu(struct hfi1_pportdata *ppd); -int hfi1_set_lid(struct hfi1_pportdata *, u32, u8); -void hfi1_disable_after_error(struct hfi1_devdata *); -int hfi1_set_uevent_bits(struct hfi1_pportdata *, const int); -int hfi1_rcvbuf_validate(u32, u8, u16 *); +int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc); +void hfi1_disable_after_error(struct hfi1_devdata *dd); +int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit); +int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encode); -int fm_get_table(struct hfi1_pportdata *, int, void *); -int fm_set_table(struct hfi1_pportdata *, int, void *); +int fm_get_table(struct hfi1_pportdata *ppd, int which, void *t); +int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t); void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf); void reset_link_credits(struct hfi1_devdata *dd); @@ -1597,9 +1652,9 @@ static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt, u32 bth1; bth1 = be32_to_cpu(ohdr->bth[1]); - if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) { + if (unlikely(bth1 & (IB_BECN_SMASK | IB_FECN_SMASK))) { hfi1_process_ecn_slowpath(qp, pkt, do_cnp); - return bth1 & HFI1_FECN_SMASK; + return !!(bth1 & IB_FECN_SMASK); } return false; } @@ -1663,19 +1718,19 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd) #define HFI1_PBC_LENGTH_MASK ((1 << 11) - 1) /* ctxt_flag bit offsets */ - /* context has been setup */ -#define HFI1_CTXT_SETUP_DONE 1 + /* base context has not finished initializing */ +#define HFI1_CTXT_BASE_UNINIT 1 + /* base context initaliation failed */ +#define HFI1_CTXT_BASE_FAILED 2 /* waiting for a packet to arrive */ -#define HFI1_CTXT_WAITING_RCV 2 - /* master has not finished initializing */ -#define HFI1_CTXT_MASTER_UNINIT 4 +#define HFI1_CTXT_WAITING_RCV 3 /* waiting for an urgent packet to arrive */ -#define HFI1_CTXT_WAITING_URG 5 +#define HFI1_CTXT_WAITING_URG 4 /* free up any allocated data at closes */ -struct hfi1_devdata *hfi1_init_dd(struct pci_dev *, - const struct pci_device_id *); -void hfi1_free_devdata(struct hfi1_devdata *); +struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, + const struct pci_device_id *ent); +void hfi1_free_devdata(struct hfi1_devdata *dd); struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra); /* LED beaconing functions */ @@ -1750,24 +1805,24 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd) extern const char ib_hfi1_version[]; -int hfi1_device_create(struct hfi1_devdata *); -void hfi1_device_remove(struct hfi1_devdata *); +int hfi1_device_create(struct hfi1_devdata *dd); +void hfi1_device_remove(struct hfi1_devdata *dd); int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, struct kobject *kobj); -int hfi1_verbs_register_sysfs(struct hfi1_devdata *); -void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *); +int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd); +void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd); /* Hook for sysfs read of QSFP */ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len); -int hfi1_pcie_init(struct pci_dev *, const struct pci_device_id *); -void hfi1_pcie_cleanup(struct pci_dev *); -int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *); +int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent); +void hfi1_pcie_cleanup(struct pci_dev *pdev); +int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev); void hfi1_pcie_ddcleanup(struct hfi1_devdata *); -void hfi1_pcie_flr(struct hfi1_devdata *); -int pcie_speeds(struct hfi1_devdata *); -void request_msix(struct hfi1_devdata *, u32 *, struct hfi1_msix_entry *); -void hfi1_enable_intx(struct pci_dev *); +int pcie_speeds(struct hfi1_devdata *dd); +void request_msix(struct hfi1_devdata *dd, u32 *nent, + struct hfi1_msix_entry *entry); +void hfi1_enable_intx(struct pci_dev *pdev); void restore_pci_variables(struct hfi1_devdata *dd); int do_pcie_gen3_transition(struct hfi1_devdata *dd); int parse_platform_config(struct hfi1_devdata *dd); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index f40864e9a3b2..4a11d4da4c92 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -53,6 +53,7 @@ #include <linux/module.h> #include <linux/printk.h> #include <linux/hrtimer.h> +#include <linux/bitmap.h> #include <rdma/rdma_vt.h> #include "hfi.h" @@ -65,10 +66,12 @@ #include "verbs.h" #include "aspm.h" #include "affinity.h" +#include "vnic.h" #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt +#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5 /* * min buffers we want to have per context, after driver */ @@ -100,9 +103,9 @@ static unsigned hfi1_rcvarr_split = 25; module_param_named(rcvarr_split, hfi1_rcvarr_split, uint, S_IRUGO); MODULE_PARM_DESC(rcvarr_split, "Percent of context's RcvArray entries used for Eager buffers"); -static uint eager_buffer_size = (2 << 20); /* 2MB */ +static uint eager_buffer_size = (8 << 20); /* 8MB */ module_param(eager_buffer_size, uint, S_IRUGO); -MODULE_PARM_DESC(eager_buffer_size, "Size of the eager buffers, default: 2MB"); +MODULE_PARM_DESC(eager_buffer_size, "Size of the eager buffers, default: 8MB"); static uint rcvhdrcnt = 2048; /* 2x the max eager buffer count */ module_param_named(rcvhdrcnt, rcvhdrcnt, uint, S_IRUGO); @@ -116,7 +119,7 @@ unsigned int user_credit_return_threshold = 33; /* default is 33% */ module_param(user_credit_return_threshold, uint, S_IRUGO); MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user send contexts, return when unreturned credits passes this many blocks (in percent of allocated blocks, 0 is off)"); -static inline u64 encode_rcv_header_entry_size(u16); +static inline u64 encode_rcv_header_entry_size(u16 size); static struct idr hfi1_unit_table; u32 hfi1_cpulist_count; @@ -139,7 +142,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) goto nomem; /* create one or more kernel contexts */ - for (i = 0; i < dd->first_user_ctxt; ++i) { + for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) { struct hfi1_pportdata *ppd; struct hfi1_ctxtdata *rcd; @@ -174,13 +177,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) goto nomem; } - ret = hfi1_init_ctxt(rcd->sc); - if (ret < 0) { - dd_dev_err(dd, - "Failed to setup kernel receive context, failing\n"); - ret = -EFAULT; - goto bail; - } + hfi1_init_ctxt(rcd->sc); } /* @@ -192,7 +189,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) return 0; nomem: ret = -ENOMEM; -bail: + if (dd->rcd) { for (i = 0; i < dd->num_rcv_contexts; ++i) hfi1_free_ctxtdata(dd, dd->rcd[i]); @@ -214,9 +211,9 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, u32 base; if (dd->rcv_entries.nctxt_extra > - dd->num_rcv_contexts - dd->first_user_ctxt) + dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt) kctxt_ngroups = (dd->rcv_entries.nctxt_extra - - (dd->num_rcv_contexts - dd->first_user_ctxt)); + (dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt)); rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, numa); if (rcd) { u32 rcvtids, max_entries; @@ -226,7 +223,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, INIT_LIST_HEAD(&rcd->qp_wait_list); rcd->ppd = ppd; rcd->dd = dd; - rcd->cnt = 1; + __set_bit(0, rcd->in_use_ctxts); rcd->ctxt = ctxt; dd->rcd[ctxt] = rcd; rcd->numa_id = numa; @@ -238,27 +235,29 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, * Calculate the context's RcvArray entry starting point. * We do this here because we have to take into account all * the RcvArray entries that previous context would have - * taken and we have to account for any extra groups - * assigned to the kernel or user contexts. + * taken and we have to account for any extra groups assigned + * to the static (kernel) or dynamic (vnic/user) contexts. */ - if (ctxt < dd->first_user_ctxt) { + if (ctxt < dd->first_dyn_alloc_ctxt) { if (ctxt < kctxt_ngroups) { base = ctxt * (dd->rcv_entries.ngroups + 1); rcd->rcv_array_groups++; - } else + } else { base = kctxt_ngroups + (ctxt * dd->rcv_entries.ngroups); + } } else { - u16 ct = ctxt - dd->first_user_ctxt; + u16 ct = ctxt - dd->first_dyn_alloc_ctxt; base = ((dd->n_krcv_queues * dd->rcv_entries.ngroups) + kctxt_ngroups); if (ct < dd->rcv_entries.nctxt_extra) { base += ct * (dd->rcv_entries.ngroups + 1); rcd->rcv_array_groups++; - } else + } else { base += dd->rcv_entries.nctxt_extra + (ct * dd->rcv_entries.ngroups); + } } rcd->eager_base = base * dd->rcv_entries.group_size; @@ -322,7 +321,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, } rcd->egrbufs.rcvtid_size = HFI1_MAX_EAGER_BUFFER_SIZE; - if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */ + /* Applicable only for statically created kernel contexts */ + if (ctxt < dd->first_dyn_alloc_ctxt) { rcd->opstats = kzalloc_node(sizeof(*rcd->opstats), GFP_KERNEL, numa); if (!rcd->opstats) @@ -482,6 +482,9 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, default_pkey_idx = 1; ppd->pkeys[default_pkey_idx] = DEFAULT_P_KEY; + ppd->part_enforce |= HFI1_PART_ENFORCE_IN; + ppd->part_enforce |= HFI1_PART_ENFORCE_OUT; + if (loopback) { hfi1_early_err(&pdev->dev, "Faking data partition 0x8001 in idx %u\n", @@ -585,7 +588,7 @@ static void enable_chip(struct hfi1_devdata *dd) * Enable kernel ctxts' receive and receive interrupt. * Other ctxts done as user opens and initializes them. */ - for (i = 0; i < dd->first_user_ctxt; ++i) { + for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) { rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB; rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ? HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS; @@ -616,7 +619,7 @@ static int create_workqueues(struct hfi1_devdata *dd) alloc_workqueue( "hfi%d_%d", WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE, - dd->num_sdma, + HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES, dd->unit, pidx); if (!ppd->hfi1_wq) goto wq_error; @@ -679,6 +682,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) dd->process_pio_send = hfi1_verbs_send_pio; dd->process_dma_send = hfi1_verbs_send_dma; dd->pio_inline_send = pio_copy; + dd->process_vnic_dma_send = hfi1_vnic_send_dma; if (is_ax(dd)) { atomic_set(&dd->drop_packet, DROP_PACKET_ON); @@ -714,7 +718,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) } /* dd->rcd can be NULL if early initialization failed */ - for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) { + for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) { /* * Set up the (kernel) rcvhdr queue and egr TIDs. If doing * re-init, the simplest way to handle this is to free @@ -960,7 +964,6 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) kfree(rcd->egrbufs.buffers); sc_free(rcd->sc); - vfree(rcd->user_event_mask); vfree(rcd->subctxt_uregbase); vfree(rcd->subctxt_rcvegrbuf); vfree(rcd->subctxt_rcvhdr_base); @@ -1078,11 +1081,11 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) spin_lock_init(&dd->uctxt_lock); spin_lock_init(&dd->hfi1_diag_trans_lock); spin_lock_init(&dd->sc_init_lock); - spin_lock_init(&dd->dc8051_lock); spin_lock_init(&dd->dc8051_memlock); seqlock_init(&dd->sc2vl_lock); spin_lock_init(&dd->sde_map_lock); spin_lock_init(&dd->pio_map_lock); + mutex_init(&dd->dc8051_lock); init_waitqueue_head(&dd->event_queue); dd->int_counter = alloc_percpu(u64); @@ -1425,6 +1428,16 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* First, lock the non-writable module parameters */ HFI1_CAP_LOCK(); + /* Validate dev ids */ + if (!(ent->device == PCI_DEVICE_ID_INTEL0 || + ent->device == PCI_DEVICE_ID_INTEL1)) { + hfi1_early_err(&pdev->dev, + "Failing on unknown Intel deviceid 0x%x\n", + ent->device); + ret = -ENODEV; + goto bail; + } + /* Validate some global module parameters */ ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt); if (ret) @@ -1470,15 +1483,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto bail; - if (!(ent->device == PCI_DEVICE_ID_INTEL0 || - ent->device == PCI_DEVICE_ID_INTEL1)) { - hfi1_early_err(&pdev->dev, - "Failing on unknown Intel deviceid 0x%x\n", - ent->device); - ret = -ENODEV; - goto clean_bail; - } - /* * Do device-specific initialization, function table setup, dd * allocation, etc. @@ -1497,6 +1501,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* do the generic initialization */ initfail = hfi1_init(dd, 0); + /* setup vnic */ + hfi1_vnic_setup(dd); + ret = hfi1_register_ib_device(dd); /* @@ -1530,6 +1537,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) hfi1_device_remove(dd); if (!ret) hfi1_unregister_ib_device(dd); + hfi1_vnic_cleanup(dd); postinit_cleanup(dd); if (initfail) ret = initfail; @@ -1574,6 +1582,9 @@ static void remove_one(struct pci_dev *pdev) /* unregister from IB core */ hfi1_unregister_ib_device(dd); + /* cleanup vnic */ + hfi1_vnic_cleanup(dd); + /* * Disable the IB link, disable interrupts on the device, * clear dma engines, etc. @@ -1613,8 +1624,11 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) amt = PAGE_ALIGN(rcd->rcvhdrq_cnt * rcd->rcvhdrqentsize * sizeof(u32)); - gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ? - GFP_USER : GFP_KERNEL; + if ((rcd->ctxt < dd->first_dyn_alloc_ctxt) || + (rcd->sc && (rcd->sc->type == SC_KERNEL))) + gfp_flags = GFP_KERNEL; + else + gfp_flags = GFP_USER; rcd->rcvhdrq = dma_zalloc_coherent( &dd->pcidev->dev, amt, &rcd->rcvhdrq_dma, gfp_flags | __GFP_COMP); @@ -1668,8 +1682,6 @@ bail_free: dd_dev_err(dd, "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n", rcd->ctxt); - vfree(rcd->user_event_mask); - rcd->user_event_mask = NULL; dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq, rcd->rcvhdrq_dma); rcd->rcvhdrq = NULL; @@ -1758,6 +1770,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) !HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR)) { dd_dev_err(dd, "ctxt%u: Failed to allocate eager buffers\n", rcd->ctxt); + ret = -ENOMEM; goto bail_rcvegrbuf_phys; } @@ -1835,7 +1848,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) "ctxt%u: current Eager buffer size is invalid %u\n", rcd->ctxt, rcd->egrbufs.rcvtid_size); ret = -EINVAL; - goto bail; + goto bail_rcvegrbuf_phys; } for (idx = 0; idx < rcd->egrbufs.alloced; idx++) { @@ -1843,7 +1856,8 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) rcd->egrbufs.rcvtids[idx].dma, order); cond_resched(); } - goto bail; + + return 0; bail_rcvegrbuf_phys: for (idx = 0; idx < rcd->egrbufs.alloced && @@ -1857,6 +1871,6 @@ bail_rcvegrbuf_phys: rcd->egrbufs.buffers[idx].dma = 0; rcd->egrbufs.buffers[idx].len = 0; } -bail: + return ret; } diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index 65348d16ab2f..ba265d0ae93b 100644 --- a/drivers/infiniband/hw/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c @@ -47,6 +47,7 @@ #include <linux/pci.h> #include <linux/delay.h> +#include <linux/bitmap.h> #include "hfi.h" #include "common.h" @@ -131,19 +132,24 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup) if (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR) { set_up_vl15(dd, dd->vau, dd->vl15_init); assign_remote_cm_au_table(dd, dd->vcu); - ppd->neighbor_guid = - read_csr(dd, DC_DC8051_STS_REMOTE_GUID); - ppd->neighbor_type = - read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) & - DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK; - ppd->neighbor_port_number = - read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) & - DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK; - dd_dev_info(dd, "Neighbor GUID: %llx Neighbor type %d\n", - ppd->neighbor_guid, - ppd->neighbor_type); } + ppd->neighbor_guid = + read_csr(dd, DC_DC8051_STS_REMOTE_GUID); + ppd->neighbor_type = + read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) & + DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK; + ppd->neighbor_port_number = + read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) & + DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK; + ppd->neighbor_fm_security = + read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) & + DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK; + dd_dev_info(dd, + "Neighbor Guid %llx, Type %d, Port Num %d\n", + ppd->neighbor_guid, ppd->neighbor_type, + ppd->neighbor_port_number); + /* physical link went up */ ppd->linkup = 1; ppd->offline_disabled_reason = @@ -184,7 +190,7 @@ void handle_user_interrupt(struct hfi1_ctxtdata *rcd) unsigned long flags; spin_lock_irqsave(&dd->uctxt_lock, flags); - if (!rcd->cnt) + if (bitmap_empty(rcd->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) goto done; if (test_and_clear_bit(HFI1_CTXT_WAITING_RCV, &rcd->event_flags)) { diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 09cda3c35e82..5977673a52d4 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -53,6 +53,7 @@ #include "mad.h" #include "trace.h" #include "qp.h" +#include "vnic.h" /* the reset value from the FM is supposed to be 0xffff, handle both */ #define OPA_LINK_WIDTH_RESET_OLD 0x0fff @@ -650,9 +651,11 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE : 0); pi->port_packet_format.supported = - cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B); + cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B | + OPA_PORT_PACKET_FORMAT_16B); pi->port_packet_format.enabled = - cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B); + cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B | + OPA_PORT_PACKET_FORMAT_16B); /* flit_control.interleave is (OPA V1, version .76): * bits use @@ -701,7 +704,13 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT; pi->buffer_units = cpu_to_be32(buffer_units); - pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported); + pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported | + OPA_CAP_MASK3_IsEthOnFabricSupported); + /* Driver does not support mcast/collective configuration */ + pi->opa_cap_mask &= + cpu_to_be16(~OPA_CAP_MASK3_IsAddrRangeConfigSupported); + pi->collectivemask_multicastmask = ((HFI1_COLLECTIVE_NR & 0x7) + << 3 | (HFI1_MCAST_NR & 0x7)); /* HFI supports a replay buffer 128 LTPs in size */ pi->replay_depth.buffer = 0x80; @@ -1146,16 +1155,6 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, ppd->linkinit_reason = (pi->partenforce_filterraw & OPA_PI_MASK_LINKINIT_REASON); - /* enable/disable SW pkey checking as per FM control */ - if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_IN) - ppd->part_enforce |= HFI1_PART_ENFORCE_IN; - else - ppd->part_enforce &= ~HFI1_PART_ENFORCE_IN; - - if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_OUT) - ppd->part_enforce |= HFI1_PART_ENFORCE_OUT; - else - ppd->part_enforce &= ~HFI1_PART_ENFORCE_OUT; /* Must be a valid unicast LID address. */ if ((smlid == 0 && ls_old > IB_PORT_INIT) || @@ -1167,9 +1166,9 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, spin_lock_irqsave(&ibp->rvp.lock, flags); if (ibp->rvp.sm_ah) { if (smlid != ibp->rvp.sm_lid) - ibp->rvp.sm_ah->attr.dlid = smlid; + rdma_ah_set_dlid(&ibp->rvp.sm_ah->attr, smlid); if (msl != ibp->rvp.sm_sl) - ibp->rvp.sm_ah->attr.sl = msl; + rdma_ah_set_sl(&ibp->rvp.sm_ah->attr, msl); } spin_unlock_irqrestore(&ibp->rvp.lock, flags); if (smlid != ibp->rvp.sm_lid) @@ -1465,25 +1464,15 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len); } -static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data) -{ - u64 *val = data; - - *val++ = read_csr(dd, SEND_SC2VLT0); - *val++ = read_csr(dd, SEND_SC2VLT1); - *val++ = read_csr(dd, SEND_SC2VLT2); - *val++ = read_csr(dd, SEND_SC2VLT3); - return 0; -} - #define ILLEGAL_VL 12 /* * filter_sc2vlt changes mappings to VL15 to ILLEGAL_VL (except * for SC15, which must map to VL15). If we don't remap things this * way it is possible for VL15 counters to increment when we try to * send on a SC which is mapped to an invalid VL. + * When getting the table convert ILLEGAL_VL back to VL15. */ -static void filter_sc2vlt(void *data) +static void filter_sc2vlt(void *data, bool set) { int i; u8 *pd = data; @@ -1491,8 +1480,14 @@ static void filter_sc2vlt(void *data) for (i = 0; i < OPA_MAX_SCS; i++) { if (i == 15) continue; - if ((pd[i] & 0x1f) == 0xf) - pd[i] = ILLEGAL_VL; + + if (set) { + if ((pd[i] & 0x1f) == 0xf) + pd[i] = ILLEGAL_VL; + } else { + if ((pd[i] & 0x1f) == ILLEGAL_VL) + pd[i] = 0xf; + } } } @@ -1500,7 +1495,7 @@ static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data) { u64 *val = data; - filter_sc2vlt(data); + filter_sc2vlt(data, true); write_csr(dd, SEND_SC2VLT0, *val++); write_csr(dd, SEND_SC2VLT1, *val++); @@ -1512,6 +1507,19 @@ static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data) return 0; } +static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data) +{ + u64 *val = (u64 *)data; + + *val++ = read_csr(dd, SEND_SC2VLT0); + *val++ = read_csr(dd, SEND_SC2VLT1); + *val++ = read_csr(dd, SEND_SC2VLT2); + *val++ = read_csr(dd, SEND_SC2VLT3); + + filter_sc2vlt((u64 *)data, false); + return 0; +} + static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, u32 *resp_len) @@ -1986,31 +1994,6 @@ struct opa_pma_mad { u8 data[2024]; } __packed; -struct opa_class_port_info { - u8 base_version; - u8 class_version; - __be16 cap_mask; - __be32 cap_mask2_resp_time; - - u8 redirect_gid[16]; - __be32 redirect_tc_fl; - __be32 redirect_lid; - __be32 redirect_sl_qp; - __be32 redirect_qkey; - - u8 trap_gid[16]; - __be32 trap_tc_fl; - __be32 trap_lid; - __be32 trap_hl_qp; - __be32 trap_qkey; - - __be16 trap_pkey; - __be16 redirect_pkey; - - u8 trap_sl_rsvd; - u8 reserved[3]; -} __packed; - struct opa_port_status_req { __u8 port_num; __u8 reserved[3]; diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 0829fce06172..93faf86d54b6 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -240,36 +240,6 @@ void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd) iounmap(dd->piobase); } -/* - * Do a Function Level Reset (FLR) on the device. - * Based on static function drivers/pci/pci.c:pcie_flr(). - */ -void hfi1_pcie_flr(struct hfi1_devdata *dd) -{ - int i; - u16 status; - - /* no need to check for the capability - we know the device has it */ - - /* wait for Transaction Pending bit to clear, at most a few ms */ - for (i = 0; i < 4; i++) { - if (i) - msleep((1 << (i - 1)) * 100); - - pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVSTA, &status); - if (!(status & PCI_EXP_DEVSTA_TRPND)) - goto clear; - } - - dd_dev_err(dd, "Transaction Pending bit is not clearing, proceeding with reset anyway\n"); - -clear: - pcie_capability_set_word(dd->pcidev, PCI_EXP_DEVCTL, - PCI_EXP_DEVCTL_BCR_FLR); - /* PCIe spec requires the function to be back within 100ms */ - msleep(100); -} - static void msix_setup(struct hfi1_devdata *dd, int pos, u32 *msixcnt, struct hfi1_msix_entry *hfi1_msix_entry) { @@ -583,7 +553,7 @@ pci_mmio_enabled(struct pci_dev *pdev) if (words == ~0ULL) ret = PCI_ERS_RESULT_NEED_RESET; dd_dev_info(dd, - "HFI1 mmio_enabled function called, read wordscntr %Lx, returning %d\n", + "HFI1 mmio_enabled function called, read wordscntr %llx, returning %d\n", words, ret); } return ret; diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 615be68e40b3..ed72b5aca139 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -703,6 +703,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, { struct send_context_info *sci; struct send_context *sc = NULL; + int req_type = type; dma_addr_t dma; unsigned long flags; u64 reg; @@ -729,6 +730,13 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, return NULL; } + /* + * VNIC contexts are dynamically allocated. + * Hence, pick a user context for VNIC. + */ + if (type == SC_VNIC) + type = SC_USER; + spin_lock_irqsave(&dd->sc_lock, flags); ret = sc_hw_alloc(dd, type, &sw_index, &hw_context); if (ret) { @@ -738,6 +746,15 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, return NULL; } + /* + * VNIC contexts are used by kernel driver. + * Hence, mark them as kernel contexts. + */ + if (req_type == SC_VNIC) { + dd->send_contexts[sw_index].type = SC_KERNEL; + type = SC_KERNEL; + } + sci = &dd->send_contexts[sw_index]; sci->sc = sc; diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index 867e5ffc3595..99ca5edb0b43 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -1,7 +1,7 @@ #ifndef _PIO_H #define _PIO_H /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -54,6 +54,12 @@ #define SC_USER 3 /* must be the last one: it may take all left */ #define SC_MAX 4 /* count of send context types */ +/* + * SC_VNIC types are allocated (dynamically) from the user context pool, + * (SC_USER) and used by kernel driver as kernel contexts (SC_KERNEL). + */ +#define SC_VNIC SC_MAX + /* invalid send context index */ #define INVALID_SCI 0xff @@ -195,7 +201,7 @@ struct sc_config_sizes { * | mask | --/ |--------------------| * |--------------------------| -/ | * | * | actual_vls (max 8) | -/ |--------------------| - * |--------------------------| --/ | ksc[n] -> sc n | + * |--------------------------| --/ | ksc[n-1] -> sc n | * | vls (max 8) | -/ +--------------------+ * |--------------------------| --/ * | map[0] |-/ @@ -208,21 +214,21 @@ struct sc_config_sizes { * |--------------------------| |--------------------| * | map[vls - 1] |- | * | * +--------------------------+ \- |--------------------| - * \- | ksc[m] -> sc m+n | + * \- | ksc[m-1] -> sc m+n | * \ +--------------------+ * \- * \ - * \- +--------------------+ - * \- | mask | - * \ |--------------------| - * \- | ksc[0] -> sc 1+m+n | - * \- |--------------------| - * >| ksc[1] -> sc 2+m+n | - * |--------------------| - * | * | - * |--------------------| - * | ksc[o] -> sc o+m+n | - * +--------------------+ + * \- +----------------------+ + * \- | mask | + * \ |----------------------| + * \- | ksc[0] -> sc 1+m+n | + * \- |----------------------| + * >| ksc[1] -> sc 2+m+n | + * |----------------------| + * | * | + * |----------------------| + * | ksc[o-1] -> sc o+m+n | + * +----------------------+ * */ diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index c4ebd097b20f..650305cc0373 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -294,7 +294,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp, ah = ibah_to_rvtah(wqe->ud_wr.ah); if (wqe->length > (1 << ah->log_pmtu)) return -EINVAL; - if (ibp->sl_to_sc[ah->attr.sl] == 0xf) + if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf) return -EINVAL; default: break; @@ -631,8 +631,8 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) qp->s_tail, qp->s_head, qp->s_size, qp->s_avail, qp->remote_qpn, - qp->remote_ah_attr.dlid, - qp->remote_ah_attr.sl, + rdma_ah_get_dlid(&qp->remote_ah_attr), + rdma_ah_get_sl(&qp->remote_ah_attr), qp->pmtu, qp->s_retry, qp->s_retry_cnt, @@ -731,9 +731,7 @@ void quiesce_qp(struct rvt_qp *qp) void notify_qp_reset(struct rvt_qp *qp) { - struct hfi1_qp_priv *priv = qp->priv; - - priv->r_adefered = 0; + qp->r_adefered = 0; clear_ahg(qp); } @@ -748,7 +746,7 @@ void hfi1_migrate_qp(struct rvt_qp *qp) qp->s_mig_state = IB_MIG_MIGRATED; qp->remote_ah_attr = qp->alt_ah_attr; - qp->port_num = qp->alt_ah_attr.port_num; + qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr); qp->s_pkey_index = qp->s_alt_pkey_index; qp->s_flags |= RVT_S_AHG_CLEAR; priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr); @@ -778,7 +776,7 @@ u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu) u8 sc, vl; ibp = &dd->pport[qp->port_num - 1].ibport_data; - sc = ibp->sl_to_sc[qp->remote_ah_attr.sl]; + sc = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)]; vl = sc_to_vlt(dd, sc); mtu = verbs_mtu_enum_to_int(qp->ibqp.device, pmtu); @@ -861,7 +859,7 @@ void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl) if (qp->port_num == ppd->port && (qp->ibqp.qp_type == IB_QPT_UC || qp->ibqp.qp_type == IB_QPT_RC) && - qp->remote_ah_attr.sl == sl && + rdma_ah_get_sl(&qp->remote_ah_attr) == sl && (ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK)) { spin_lock_irq(&qp->r_lock); diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 7382be11afca..069bdaf061ab 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -274,7 +274,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) goto bail_no_tx; ohdr = &ps->s_txreq->phdr.hdr.u.oth; - if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) + if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) ohdr = &ps->s_txreq->phdr.hdr.u.l.oth; /* Sending responses has higher priority over sending requests. */ @@ -727,10 +727,9 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, struct ib_header hdr; struct ib_other_headers *ohdr; unsigned long flags; - struct hfi1_qp_priv *priv = qp->priv; /* clear the defer count */ - priv->r_adefered = 0; + qp->r_adefered = 0; /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ if (qp->s_flags & RVT_S_RESP_PENDING) @@ -744,9 +743,10 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, /* Construct the header */ /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4 */ hwords = 6; - if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { + if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) { hwords += hfi1_make_grh(ibp, &hdr.u.l.grh, - &qp->remote_ah_attr.grh, hwords, 0); + rdma_ah_read_grh(&qp->remote_ah_attr), + hwords, 0); ohdr = &hdr.u.l.oth; lrh0 = HFI1_LRH_GRH; } else { @@ -763,17 +763,19 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, IB_AETH_CREDIT_SHIFT)); else ohdr->u.aeth = rvt_compute_aeth(qp); - sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl]; + sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)]; /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT); - lrh0 |= (sc5 & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4; + lrh0 |= (sc5 & 0xf) << 12 | (rdma_ah_get_sl(&qp->remote_ah_attr) + & 0xf) << 4; hdr.lrh[0] = cpu_to_be16(lrh0); - hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); + hdr.lrh[1] = cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr)); hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); - hdr.lrh[3] = cpu_to_be16(ppd->lid | qp->remote_ah_attr.src_path_bits); + hdr.lrh[3] = cpu_to_be16(ppd->lid | + rdma_ah_get_path_bits(&qp->remote_ah_attr)); ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); - ohdr->bth[1] |= cpu_to_be32((!!is_fecn) << HFI1_BECN_SHIFT); + ohdr->bth[1] |= cpu_to_be32((!!is_fecn) << IB_BECN_SHIFT); ohdr->bth[2] = cpu_to_be32(mask_psn(qp->r_ack_psn)); /* Don't try to send ACKs if the link isn't ACTIVE */ @@ -994,12 +996,12 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) return; /* Find out where the BTH is */ - if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH) + if (ib_get_lnh(hdr) == HFI1_LRH_BTH) ohdr = &hdr->u.oth; else ohdr = &hdr->u.l.oth; - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; + opcode = ib_bth_get_opcode(ohdr); if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && opcode <= OP(ATOMIC_ACKNOWLEDGE)) { WARN_ON(!qp->s_rdma_ack_cnt); @@ -1028,13 +1030,17 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) break; s_last = qp->s_last; + trace_hfi1_qp_send_completion(qp, wqe, s_last); if (++s_last >= qp->s_size) s_last = 0; qp->s_last = s_last; /* see post_send() */ barrier(); rvt_put_swqe(wqe); - rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); + rvt_qp_swqe_complete(qp, + wqe, + ib_hfi1_wc_opcode[wqe->wr.opcode], + IB_WC_SUCCESS); } /* * If we were waiting for sends to complete before re-sending, @@ -1076,12 +1082,16 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, rvt_put_swqe(wqe); s_last = qp->s_last; + trace_hfi1_qp_send_completion(qp, wqe, s_last); if (++s_last >= qp->s_size) s_last = 0; qp->s_last = s_last; /* see post_send() */ barrier(); - rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS); + rvt_qp_swqe_complete(qp, + wqe, + ib_hfi1_wc_opcode[wqe->wr.opcode], + IB_WC_SUCCESS); } else { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); @@ -1092,10 +1102,11 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, */ if (ppd->dd->flags & HFI1_HAS_SEND_DMA) { struct sdma_engine *engine; + u8 sl = rdma_ah_get_sl(&qp->remote_ah_attr); u8 sc5; /* For now use sc to find engine */ - sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl]; + sc5 = ibp->sl_to_sc[sl]; engine = qp_to_sdma_engine(qp, sc5); sdma_engine_progress_schedule(engine); } @@ -1516,7 +1527,7 @@ read_middle: if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd)) goto ack_done; /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + pad = ib_bth_get_pad(ohdr); /* * Check that the data size is >= 0 && <= pmtu. * Remember to account for ICRC (4). @@ -1540,7 +1551,7 @@ read_middle: if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) goto ack_op_err; /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + pad = ib_bth_get_pad(ohdr); /* * Check that the data size is >= 1 && <= pmtu. * Remember to account for ICRC (4). @@ -1592,9 +1603,7 @@ static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd, static inline void rc_cancel_ack(struct rvt_qp *qp) { - struct hfi1_qp_priv *priv = qp->priv; - - priv->r_adefered = 0; + qp->r_adefered = 0; if (list_empty(&qp->rspwait)) return; list_del_init(&qp->rspwait); @@ -1922,7 +1931,8 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) int diff; struct ib_reth *reth; unsigned long flags; - int ret, is_fecn = 0; + int ret; + bool is_fecn = false; bool copy_last = false; u32 rkey; @@ -1934,7 +1944,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) is_fecn = process_ecn(qp, packet, false); psn = be32_to_cpu(ohdr->bth[2]); - opcode = (bth0 >> 24) & 0xff; + opcode = ib_bth_get_opcode(ohdr); /* * Process responses (ACKs) before anything else. Note that the @@ -2065,7 +2075,7 @@ no_immediate_data: wc.ex.imm_data = 0; send_last: /* Get the number of bytes the message was padded by. */ - pad = (bth0 >> 20) & 3; + pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) @@ -2089,7 +2099,7 @@ send_last: wc.opcode = IB_WC_RECV; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.slid = qp->remote_ah_attr.dlid; + wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr); /* * It seems that IB mandates the presence of an SL in a * work completion only for the UD transport (see section @@ -2101,7 +2111,7 @@ send_last: * * See also OPA Vol. 1, section 9.7.6, and table 9-17. */ - wc.sl = qp->remote_ah_attr.sl; + wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr); /* zero fields that are N/A */ wc.vendor_err = 0; wc.pkey_index = 0; @@ -2301,13 +2311,11 @@ send_last: qp->r_nak_state = 0; /* Send an ACK if requested or required. */ if (psn & IB_BTH_REQ_ACK) { - struct hfi1_qp_priv *priv = qp->priv; - if (packet->numpkt == 0) { rc_cancel_ack(qp); goto send_ack; } - if (priv->r_adefered >= HFI1_PSN_CREDIT) { + if (qp->r_adefered >= HFI1_PSN_CREDIT) { rc_cancel_ack(qp); goto send_ack; } @@ -2315,7 +2323,7 @@ send_last: rc_cancel_ack(qp); goto send_ack; } - priv->r_adefered++; + qp->r_adefered++; rc_defered_ack(rcd, qp); } return; @@ -2378,7 +2386,7 @@ void hfi1_rc_hdrerr( return; psn = be32_to_cpu(ohdr->bth[2]); - opcode = (bth0 >> 24) & 0xff; + opcode = ib_bth_get_opcode(ohdr); /* Only deal with RDMA Writes for now */ if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) { diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index aa15bcbfb079..3a17daba28a9 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -219,72 +219,84 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr, { __be64 guid; unsigned long flags; - u8 sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl]; + u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)]; if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) { if (!has_grh) { - if (qp->alt_ah_attr.ah_flags & IB_AH_GRH) + if (rdma_ah_get_ah_flags(&qp->alt_ah_attr) & + IB_AH_GRH) goto err; } else { - if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH)) + const struct ib_global_route *grh; + + if (!(rdma_ah_get_ah_flags(&qp->alt_ah_attr) & + IB_AH_GRH)) goto err; - guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index); + grh = rdma_ah_read_grh(&qp->alt_ah_attr); + guid = get_sguid(ibp, grh->sgid_index); if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix, guid)) goto err; if (!gid_ok( &hdr->u.l.grh.sgid, - qp->alt_ah_attr.grh.dgid.global.subnet_prefix, - qp->alt_ah_attr.grh.dgid.global.interface_id)) + grh->dgid.global.subnet_prefix, + grh->dgid.global.interface_id)) goto err; } - if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, - sc5, be16_to_cpu(hdr->lrh[3])))) { + if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5, + ib_get_slid(hdr)))) { hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, (u16)bth0, - (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, + ib_get_sl(hdr), 0, qp->ibqp.qp_num, - be16_to_cpu(hdr->lrh[3]), - be16_to_cpu(hdr->lrh[1])); + ib_get_slid(hdr), + ib_get_dlid(hdr)); goto err; } /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */ - if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid || - ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num) + if (ib_get_slid(hdr) != + rdma_ah_get_dlid(&qp->alt_ah_attr) || + ppd_from_ibp(ibp)->port != + rdma_ah_get_port_num(&qp->alt_ah_attr)) goto err; spin_lock_irqsave(&qp->s_lock, flags); hfi1_migrate_qp(qp); spin_unlock_irqrestore(&qp->s_lock, flags); } else { if (!has_grh) { - if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) + if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & + IB_AH_GRH) goto err; } else { - if (!(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) + const struct ib_global_route *grh; + + if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & + IB_AH_GRH)) goto err; - guid = get_sguid(ibp, - qp->remote_ah_attr.grh.sgid_index); + grh = rdma_ah_read_grh(&qp->remote_ah_attr); + guid = get_sguid(ibp, grh->sgid_index); if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix, guid)) goto err; if (!gid_ok( &hdr->u.l.grh.sgid, - qp->remote_ah_attr.grh.dgid.global.subnet_prefix, - qp->remote_ah_attr.grh.dgid.global.interface_id)) + grh->dgid.global.subnet_prefix, + grh->dgid.global.interface_id)) goto err; } - if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, - sc5, be16_to_cpu(hdr->lrh[3])))) { + if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5, + ib_get_slid(hdr)))) { hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, (u16)bth0, - (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, + ib_get_sl(hdr), 0, qp->ibqp.qp_num, - be16_to_cpu(hdr->lrh[3]), - be16_to_cpu(hdr->lrh[1])); + ib_get_slid(hdr), + ib_get_dlid(hdr)); goto err; } /* Validate the SLID. See Ch. 9.6.1.5 */ - if (be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid || + if (ib_get_slid(hdr) != + rdma_ah_get_dlid(&qp->remote_ah_attr) || ppd_from_ibp(ibp)->port != qp->port_num) goto err; if (qp->s_mig_state == IB_MIG_REARM && @@ -542,8 +554,8 @@ do_write: wc.byte_len = wqe->length; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.slid = qp->remote_ah_attr.dlid; - wc.sl = qp->remote_ah_attr.sl; + wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr); + wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr); wc.port_num = 1; /* Signal completion event if the solicited bit is set. */ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, @@ -637,7 +649,7 @@ done: * Return the size of the header in 32 bit words. */ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, - struct ib_global_route *grh, u32 hwords, u32 nwords) + const struct ib_global_route *grh, u32 hwords, u32 nwords) { hdr->version_tclass_flow = cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) | @@ -731,15 +743,17 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, extra_bytes = -ps->s_txreq->s_cur_size & 3; nwords = (ps->s_txreq->s_cur_size + extra_bytes) >> 2; lrh0 = HFI1_LRH_BTH; - if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { - qp->s_hdrwords += hfi1_make_grh(ibp, - &ps->s_txreq->phdr.hdr.u.l.grh, - &qp->remote_ah_attr.grh, - qp->s_hdrwords, nwords); + if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) { + qp->s_hdrwords += + hfi1_make_grh(ibp, + &ps->s_txreq->phdr.hdr.u.l.grh, + rdma_ah_read_grh(&qp->remote_ah_attr), + qp->s_hdrwords, nwords); lrh0 = HFI1_LRH_GRH; middle = 0; } - lrh0 |= (priv->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4; + lrh0 |= (priv->s_sc & 0xf) << 12 | + (rdma_ah_get_sl(&qp->remote_ah_attr) & 0xf) << 4; /* * reset s_ahg/AHG fields * @@ -763,11 +777,13 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, else qp->s_flags &= ~RVT_S_AHG_VALID; ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0); - ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); + ps->s_txreq->phdr.hdr.lrh[1] = + cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr)); ps->s_txreq->phdr.hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid | - qp->remote_ah_attr.src_path_bits); + ps->s_txreq->phdr.hdr.lrh[3] = + cpu_to_be16(ppd_from_ibp(ibp)->lid | + rdma_ah_get_path_bits(&qp->remote_ah_attr)); bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index); bth0 |= extra_bytes << 20; ohdr->bth[0] = cpu_to_be32(bth0); @@ -775,7 +791,7 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, if (qp->s_flags & RVT_S_ECN) { qp->s_flags &= ~RVT_S_ECN; /* we recently received a FECN, so return a BECN */ - bth1 |= (HFI1_BECN_MASK << HFI1_BECN_SHIFT); + bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT); } ohdr->bth[1] = cpu_to_be32(bth1); ohdr->bth[2] = cpu_to_be32(bth2); @@ -784,59 +800,102 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, /* when sending, force a reschedule every one of these periods */ #define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */ +/** + * schedule_send_yield - test for a yield required for QP send engine + * @timeout: Final time for timeout slice for jiffies + * @qp: a pointer to QP + * @ps: a pointer to a structure with commonly lookup values for + * the the send engine progress + * + * This routine checks if the time slice for the QP has expired + * for RC QPs, if so an additional work entry is queued. At this + * point, other QPs have an opportunity to be scheduled. It + * returns true if a yield is required, otherwise, false + * is returned. + */ +static bool schedule_send_yield(struct rvt_qp *qp, + struct hfi1_pkt_state *ps) +{ + if (unlikely(time_after(jiffies, ps->timeout))) { + if (!ps->in_thread || + workqueue_congested(ps->cpu, ps->ppd->hfi1_wq)) { + spin_lock_irqsave(&qp->s_lock, ps->flags); + qp->s_flags &= ~RVT_S_BUSY; + hfi1_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, ps->flags); + this_cpu_inc(*ps->ppd->dd->send_schedule); + trace_hfi1_rc_expired_time_slice(qp, true); + return true; + } + + cond_resched(); + this_cpu_inc(*ps->ppd->dd->send_schedule); + ps->timeout = jiffies + ps->timeout_int; + } + + trace_hfi1_rc_expired_time_slice(qp, false); + return false; +} + +void hfi1_do_send_from_rvt(struct rvt_qp *qp) +{ + hfi1_do_send(qp, false); +} + void _hfi1_do_send(struct work_struct *work) { struct iowait *wait = container_of(work, struct iowait, iowork); struct rvt_qp *qp = iowait_to_qp(wait); - hfi1_do_send(qp); + hfi1_do_send(qp, true); } /** * hfi1_do_send - perform a send on a QP * @work: contains a pointer to the QP + * @in_thread: true if in a workqueue thread * * Process entries in the send work queue until credit or queue is * exhausted. Only allow one CPU to send a packet per QP. * Otherwise, two threads could send packets out of order. */ -void hfi1_do_send(struct rvt_qp *qp) +void hfi1_do_send(struct rvt_qp *qp, bool in_thread) { struct hfi1_pkt_state ps; struct hfi1_qp_priv *priv = qp->priv; int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps); - unsigned long timeout; - unsigned long timeout_int; - int cpu; ps.dev = to_idev(qp->ibqp.device); ps.ibp = to_iport(qp->ibqp.device, qp->port_num); ps.ppd = ppd_from_ibp(ps.ibp); + ps.in_thread = in_thread; + + trace_hfi1_rc_do_send(qp, in_thread); switch (qp->ibqp.qp_type) { case IB_QPT_RC: - if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc - ) - 1)) == - ps.ppd->lid)) { + if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) & + ~((1 << ps.ppd->lmc) - 1)) == + ps.ppd->lid)) { ruc_loopback(qp); return; } make_req = hfi1_make_rc_req; - timeout_int = (qp->timeout_jiffies); + ps.timeout_int = qp->timeout_jiffies; break; case IB_QPT_UC: - if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc - ) - 1)) == - ps.ppd->lid)) { + if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) & + ~((1 << ps.ppd->lmc) - 1)) == + ps.ppd->lid)) { ruc_loopback(qp); return; } make_req = hfi1_make_uc_req; - timeout_int = SEND_RESCHED_TIMEOUT; + ps.timeout_int = SEND_RESCHED_TIMEOUT; break; default: make_req = hfi1_make_ud_req; - timeout_int = SEND_RESCHED_TIMEOUT; + ps.timeout_int = SEND_RESCHED_TIMEOUT; } spin_lock_irqsave(&qp->s_lock, ps.flags); @@ -849,9 +908,11 @@ void hfi1_do_send(struct rvt_qp *qp) qp->s_flags |= RVT_S_BUSY; - timeout = jiffies + (timeout_int) / 8; - cpu = priv->s_sde ? priv->s_sde->cpu : + ps.timeout_int = ps.timeout_int / 8; + ps.timeout = jiffies + ps.timeout_int; + ps.cpu = priv->s_sde ? priv->s_sde->cpu : cpumask_first(cpumask_of_node(ps.ppd->dd->node)); + /* insure a pre-built packet is handled */ ps.s_txreq = get_waiting_verbs_txreq(qp); do { @@ -867,28 +928,9 @@ void hfi1_do_send(struct rvt_qp *qp) /* Record that s_ahg is empty. */ qp->s_hdrwords = 0; /* allow other tasks to run */ - if (unlikely(time_after(jiffies, timeout))) { - if (workqueue_congested(cpu, - ps.ppd->hfi1_wq)) { - spin_lock_irqsave( - &qp->s_lock, - ps.flags); - qp->s_flags &= ~RVT_S_BUSY; - hfi1_schedule_send(qp); - spin_unlock_irqrestore( - &qp->s_lock, - ps.flags); - this_cpu_inc( - *ps.ppd->dd->send_schedule); - return; - } - if (!irqs_disabled()) { - cond_resched(); - this_cpu_inc( - *ps.ppd->dd->send_schedule); - } - timeout = jiffies + (timeout_int) / 8; - } + if (schedule_send_yield(qp, &ps)) + return; + spin_lock_irqsave(&qp->s_lock, ps.flags); } } while (make_req(qp, &ps)); @@ -909,8 +951,10 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, last = qp->s_last; old_last = last; + trace_hfi1_qp_send_completion(qp, wqe, last); if (++last >= qp->s_size) last = 0; + trace_hfi1_qp_send_completion(qp, wqe, last); qp->s_last = last; /* See post_send() */ barrier(); @@ -920,7 +964,10 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, qp->ibqp.qp_type == IB_QPT_GSI) atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); - rvt_qp_swqe_complete(qp, wqe, status); + rvt_qp_swqe_complete(qp, + wqe, + ib_hfi1_wc_opcode[wqe->wr.opcode], + status); if (qp->s_acked == old_last) qp->s_acked = last; diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 5cde1ecda0fe..bfd0d5187e9b 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -868,7 +868,7 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd, cpu_id = smp_processor_id(); rcu_read_lock(); - rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu_id, + rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu_id, sdma_rht_params); if (rht_node && rht_node->map[vl]) { @@ -962,7 +962,12 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, continue; } - rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu, + if (vl >= ARRAY_SIZE(rht_node->map)) { + ret = -EINVAL; + goto out; + } + + rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu, sdma_rht_params); if (!rht_node) { rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL); @@ -982,7 +987,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, rht_node->map[vl]->ctr = 1; rht_node->map[vl]->sde[0] = sde; - ret = rhashtable_insert_fast(&dd->sdma_rht, + ret = rhashtable_insert_fast(dd->sdma_rht, &rht_node->node, sdma_rht_params); if (ret) { @@ -1025,7 +1030,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, if (cpumask_test_cpu(cpu, mask)) continue; - rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu, + rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu, sdma_rht_params); if (rht_node) { bool empty = true; @@ -1049,7 +1054,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, } if (empty) { - ret = rhashtable_remove_fast(&dd->sdma_rht, + ret = rhashtable_remove_fast(dd->sdma_rht, &rht_node->node, sdma_rht_params); WARN_ON(ret); @@ -1108,7 +1113,7 @@ void sdma_seqfile_dump_cpu_list(struct seq_file *s, struct sdma_rht_node *rht_node; int i, j; - rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpuid, + rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpuid, sdma_rht_params); if (!rht_node) return; @@ -1322,6 +1327,12 @@ static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines) synchronize_rcu(); kfree(dd->per_sdma); dd->per_sdma = NULL; + + if (dd->sdma_rht) { + rhashtable_free_and_destroy(dd->sdma_rht, sdma_rht_free, NULL); + kfree(dd->sdma_rht); + dd->sdma_rht = NULL; + } } /** @@ -1341,12 +1352,14 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) { unsigned this_idx; struct sdma_engine *sde; + struct rhashtable *tmp_sdma_rht; u16 descq_cnt; void *curr_head; struct hfi1_pportdata *ppd = dd->pport + port; u32 per_sdma_credits; uint idle_cnt = sdma_idle_cnt; size_t num_engines = dd->chip_sdma_engines; + int ret = -ENOMEM; if (!HFI1_CAP_IS_KSET(SDMA)) { HFI1_CAP_CLEAR(SDMA_AHG); @@ -1378,7 +1391,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) /* alloc memory for array of send engines */ dd->per_sdma = kcalloc(num_engines, sizeof(*dd->per_sdma), GFP_KERNEL); if (!dd->per_sdma) - return -ENOMEM; + return ret; idle_cnt = ns_to_cclock(dd, idle_cnt); if (!sdma_desct_intr) @@ -1507,18 +1520,27 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) dd->flags |= HFI1_HAS_SEND_DMA; dd->flags |= idle_cnt ? HFI1_HAS_SDMA_TIMEOUT : 0; dd->num_sdma = num_engines; - if (sdma_map_init(dd, port, ppd->vls_operational, NULL)) + ret = sdma_map_init(dd, port, ppd->vls_operational, NULL); + if (ret < 0) goto bail; - if (rhashtable_init(&dd->sdma_rht, &sdma_rht_params)) + tmp_sdma_rht = kzalloc(sizeof(*tmp_sdma_rht), GFP_KERNEL); + if (!tmp_sdma_rht) { + ret = -ENOMEM; goto bail; + } + + ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params); + if (ret < 0) + goto bail; + dd->sdma_rht = tmp_sdma_rht; dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma); return 0; bail: sdma_clean(dd, num_engines); - return -ENOMEM; + return ret; } /** @@ -1604,7 +1626,6 @@ void sdma_exit(struct hfi1_devdata *dd) sdma_finalput(&sde->state); } sdma_clean(dd, dd->num_sdma); - rhashtable_free_and_destroy(&dd->sdma_rht, sdma_rht_free, NULL); } /* diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 21f1e2834f37..64f10b8b5db8 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -966,34 +966,34 @@ void sdma_engine_interrupt(struct sdma_engine *sde, u64 status); * | mask | --/ |--------------------| * |--------------------------| -/ | * | * | actual_vls (max 8) | -/ |--------------------| - * |--------------------------| --/ | sde[n] -> eng n | + * |--------------------------| --/ | sde[n-1] -> eng n | * | vls (max 8) | -/ +--------------------+ * |--------------------------| --/ * | map[0] |-/ - * |--------------------------| +--------------------+ - * | map[1] |--- | mask | - * |--------------------------| \---- |--------------------| - * | * | \-- | sde[0] -> eng 1+n | - * | * | \---- |--------------------| - * | * | \->| sde[1] -> eng 2+n | - * |--------------------------| |--------------------| - * | map[vls - 1] |- | * | - * +--------------------------+ \- |--------------------| - * \- | sde[m] -> eng m+n | - * \ +--------------------+ + * |--------------------------| +---------------------+ + * | map[1] |--- | mask | + * |--------------------------| \---- |---------------------| + * | * | \-- | sde[0] -> eng 1+n | + * | * | \---- |---------------------| + * | * | \->| sde[1] -> eng 2+n | + * |--------------------------| |---------------------| + * | map[vls - 1] |- | * | + * +--------------------------+ \- |---------------------| + * \- | sde[m-1] -> eng m+n | + * \ +---------------------+ * \- * \ - * \- +--------------------+ - * \- | mask | - * \ |--------------------| - * \- | sde[0] -> eng 1+m+n| - * \- |--------------------| - * >| sde[1] -> eng 2+m+n| - * |--------------------| - * | * | - * |--------------------| - * | sde[o] -> eng o+m+n| - * +--------------------+ + * \- +----------------------+ + * \- | mask | + * \ |----------------------| + * \- | sde[0] -> eng 1+m+n | + * \- |----------------------| + * >| sde[1] -> eng 2+m+n | + * |----------------------| + * | * | + * |----------------------| + * | sde[o-1] -> eng o+m+n| + * +----------------------+ * */ diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 919a5474e651..50d140d25e38 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -542,7 +542,7 @@ static ssize_t show_nctxts(struct device *device, * give a more accurate picture of total contexts available. */ return scnprintf(buf, PAGE_SIZE, "%u\n", - min(dd->num_rcv_contexts - dd->first_user_ctxt, + min(dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt, (u32)dd->sc_sizes[SC_USER].count)); } diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index e86798af6903..eafae487face 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -51,13 +51,12 @@ u8 ibhdr_exhdr_len(struct ib_header *hdr) { struct ib_other_headers *ohdr; u8 opcode; - u8 lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3); - if (lnh == HFI1_LRH_BTH) + if (ib_get_lnh(hdr) == HFI1_LRH_BTH) ohdr = &hdr->u.oth; else ohdr = &hdr->u.l.oth; - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; + opcode = ib_bth_get_opcode(ohdr); return hdr_len_by_opcode[opcode] == 0 ? 0 : hdr_len_by_opcode[opcode] - (12 + 8); } diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h index 26ae789e47cf..4eb4cc798035 100644 --- a/drivers/infiniband/hw/hfi1/trace_ctxts.h +++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h @@ -57,12 +57,14 @@ #define UCTXT_FMT \ "cred:%u, credaddr:0x%llx, piobase:0x%p, rcvhdr_cnt:%u, " \ - "rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx" + "rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx, subctxt_cnt:%u" TRACE_EVENT(hfi1_uctxtdata, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt), - TP_ARGS(dd, uctxt), + TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt, + unsigned int subctxt), + TP_ARGS(dd, uctxt, subctxt), TP_STRUCT__entry(DD_DEV_ENTRY(dd) __field(unsigned int, ctxt) + __field(unsigned int, subctxt) __field(u32, credits) __field(u64, hw_free) __field(void __iomem *, piobase) @@ -70,9 +72,11 @@ TRACE_EVENT(hfi1_uctxtdata, __field(u64, rcvhdrq_dma) __field(u32, eager_cnt) __field(u64, rcvegr_dma) + __field(unsigned int, subctxt_cnt) ), TP_fast_assign(DD_DEV_ASSIGN(dd); __entry->ctxt = uctxt->ctxt; + __entry->subctxt = subctxt; __entry->credits = uctxt->sc->credits; __entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free); __entry->piobase = uctxt->sc->base_addr; @@ -80,17 +84,20 @@ TRACE_EVENT(hfi1_uctxtdata, __entry->rcvhdrq_dma = uctxt->rcvhdrq_dma; __entry->eager_cnt = uctxt->egrbufs.alloced; __entry->rcvegr_dma = uctxt->egrbufs.rcvtids[0].dma; + __entry->subctxt_cnt = uctxt->subctxt_cnt; ), - TP_printk("[%s] ctxt %u " UCTXT_FMT, + TP_printk("[%s] ctxt %u:%u " UCTXT_FMT, __get_str(dev), __entry->ctxt, + __entry->subctxt, __entry->credits, __entry->hw_free, __entry->piobase, __entry->rcvhdrq_cnt, __entry->rcvhdrq_dma, __entry->eager_cnt, - __entry->rcvegr_dma + __entry->rcvegr_dma, + __entry->subctxt_cnt ) ); diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h index 382fcda3a5f6..090f6b506953 100644 --- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -139,11 +139,11 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template, __entry->pkey = be32_to_cpu(ohdr->bth[0]) & 0xffff; __entry->f = - (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) & - HFI1_FECN_MASK; + (be32_to_cpu(ohdr->bth[1]) >> IB_FECN_SHIFT) & + IB_FECN_MASK; __entry->b = - (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) & - HFI1_BECN_MASK; + (be32_to_cpu(ohdr->bth[1]) >> IB_BECN_SHIFT) & + IB_BECN_MASK; __entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; __entry->a = diff --git a/drivers/infiniband/hw/hfi1/trace_misc.h b/drivers/infiniband/hw/hfi1/trace_misc.h index d308454af7fd..deac77ddaeab 100644 --- a/drivers/infiniband/hw/hfi1/trace_misc.h +++ b/drivers/infiniband/hw/hfi1/trace_misc.h @@ -72,6 +72,54 @@ TRACE_EVENT(hfi1_interrupt, __entry->src) ); +#ifdef CONFIG_FAULT_INJECTION +TRACE_EVENT(hfi1_fault_opcode, + TP_PROTO(struct rvt_qp *qp, u8 opcode), + TP_ARGS(qp, opcode), + TP_STRUCT__entry(DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) + __field(u32, qpn) + __field(u8, opcode) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) + __entry->qpn = qp->ibqp.qp_num; + __entry->opcode = opcode; + ), + TP_printk("[%s] qpn 0x%x opcode 0x%x", + __get_str(dev), __entry->qpn, __entry->opcode) +); + +TRACE_EVENT(hfi1_fault_packet, + TP_PROTO(struct hfi1_packet *packet), + TP_ARGS(packet), + TP_STRUCT__entry(DD_DEV_ENTRY(packet->rcd->ppd->dd) + __field(u64, eflags) + __field(u32, ctxt) + __field(u32, hlen) + __field(u32, tlen) + __field(u32, updegr) + __field(u32, etail) + ), + TP_fast_assign(DD_DEV_ASSIGN(packet->rcd->ppd->dd); + __entry->eflags = rhf_err_flags(packet->rhf); + __entry->ctxt = packet->rcd->ctxt; + __entry->hlen = packet->hlen; + __entry->tlen = packet->tlen; + __entry->updegr = packet->updegr; + __entry->etail = rhf_egr_index(packet->rhf); + ), + TP_printk( + "[%s] ctxt %d eflags 0x%llx hlen %d tlen %d updegr %d etail %d", + __get_str(dev), + __entry->ctxt, + __entry->eflags, + __entry->hlen, + __entry->tlen, + __entry->updegr, + __entry->etail + ) +); +#endif + #endif /* __HFI1_TRACE_MISC_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/infiniband/hw/hfi1/trace_rc.h b/drivers/infiniband/hw/hfi1/trace_rc.h index 5ea5005f9f41..8ce476570462 100644 --- a/drivers/infiniband/hw/hfi1/trace_rc.h +++ b/drivers/infiniband/hw/hfi1/trace_rc.h @@ -1,5 +1,5 @@ /* -* Copyright(c) 2015, 2016 Intel Corporation. +* Copyright(c) 2015, 2016, 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -104,11 +104,6 @@ DEFINE_EVENT(hfi1_rc_template, hfi1_ack, TP_ARGS(qp, psn) ); -DEFINE_EVENT(hfi1_rc_template, hfi1_timeout, - TP_PROTO(struct rvt_qp *qp, u32 psn), - TP_ARGS(qp, psn) -); - DEFINE_EVENT(hfi1_rc_template, hfi1_rcv_error, TP_PROTO(struct rvt_qp *qp, u32 psn), TP_ARGS(qp, psn) diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h index 415d6be42c5d..c59809a7f121 100644 --- a/drivers/infiniband/hw/hfi1/trace_tx.h +++ b/drivers/infiniband/hw/hfi1/trace_tx.h @@ -633,6 +633,83 @@ DEFINE_EVENT(hfi1_bct_template, bct_get, TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc), TP_ARGS(dd, bc)); +TRACE_EVENT( + hfi1_qp_send_completion, + TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe, u32 idx), + TP_ARGS(qp, wqe, idx), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) + __field(struct rvt_swqe *, wqe) + __field(u64, wr_id) + __field(u32, qpn) + __field(u32, qpt) + __field(u32, length) + __field(u32, idx) + __field(u32, ssn) + __field(enum ib_wr_opcode, opcode) + __field(int, send_flags) + ), + TP_fast_assign( + DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) + __entry->wqe = wqe; + __entry->wr_id = wqe->wr.wr_id; + __entry->qpn = qp->ibqp.qp_num; + __entry->qpt = qp->ibqp.qp_type; + __entry->length = wqe->length; + __entry->idx = idx; + __entry->ssn = wqe->ssn; + __entry->opcode = wqe->wr.opcode; + __entry->send_flags = wqe->wr.send_flags; + ), + TP_printk( + "[%s] qpn 0x%x qpt %u wqe %p idx %u wr_id %llx length %u ssn %u opcode %x send_flags %x", + __get_str(dev), + __entry->qpn, + __entry->qpt, + __entry->wqe, + __entry->idx, + __entry->wr_id, + __entry->length, + __entry->ssn, + __entry->opcode, + __entry->send_flags + ) +); + +DECLARE_EVENT_CLASS( + hfi1_do_send_template, + TP_PROTO(struct rvt_qp *qp, bool flag), + TP_ARGS(qp, flag), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) + __field(u32, qpn) + __field(bool, flag) + ), + TP_fast_assign( + DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) + __entry->qpn = qp->ibqp.qp_num; + __entry->flag = flag; + ), + TP_printk( + "[%s] qpn %x flag %d", + __get_str(dev), + __entry->qpn, + __entry->flag + ) +); + +DEFINE_EVENT( + hfi1_do_send_template, hfi1_rc_do_send, + TP_PROTO(struct rvt_qp *qp, bool flag), + TP_ARGS(qp, flag) +); + +DEFINE_EVENT( + hfi1_do_send_template, hfi1_rc_expired_time_slice, + TP_PROTO(struct rvt_qp *qp, bool flag), + TP_ARGS(qp, flag) +); + #endif /* __HFI1_TRACE_TX_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 4b2a8400c823..5da1e4546543 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -94,7 +94,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) } ohdr = &ps->s_txreq->phdr.hdr.u.oth; - if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) + if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) ohdr = &ps->s_txreq->phdr.hdr.u.l.oth; /* Get the next send request. */ @@ -320,7 +320,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) process_ecn(qp, packet, true); psn = be32_to_cpu(ohdr->bth[2]); - opcode = (bth0 >> 24) & 0xff; + opcode = ib_bth_get_opcode(ohdr); /* Compare the PSN verses the expected PSN. */ if (unlikely(cmp_psn(psn, qp->r_psn) != 0)) { @@ -433,7 +433,7 @@ no_immediate_data: wc.wc_flags = 0; send_last: /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) @@ -451,7 +451,7 @@ last_imm: wc.status = IB_WC_SUCCESS; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.slid = qp->remote_ah_attr.dlid; + wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr); /* * It seems that IB mandates the presence of an SL in a * work completion only for the UD transport (see section @@ -463,7 +463,7 @@ last_imm: * * See also OPA Vol. 1, section 9.7.6, and table 9-17. */ - wc.sl = qp->remote_ah_attr.sl; + wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr); /* zero fields that are N/A */ wc.vendor_err = 0; wc.pkey_index = 0; @@ -528,7 +528,7 @@ rdma_last_imm: wc.wc_flags = IB_WC_WITH_IMM; /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) @@ -555,7 +555,7 @@ rdma_last_imm: case OP(RDMA_WRITE_LAST): rdma_last: /* Get the number of bytes the message was padded by. */ - pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 13ea4eb6ef3d..6a4e95cefae5 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -68,7 +68,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num); struct hfi1_pportdata *ppd; struct rvt_qp *qp; - struct ib_ah_attr *ah_attr; + struct rdma_ah_attr *ah_attr; unsigned long flags; struct rvt_sge_state ssge; struct rvt_sge *sge; @@ -103,17 +103,17 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) if (qp->ibqp.qp_num > 1) { u16 pkey; u16 slid; - u8 sc5 = ibp->sl_to_sc[ah_attr->sl]; + u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)]; pkey = hfi1_get_pkey(ibp, sqp->s_pkey_index); - slid = ppd->lid | (ah_attr->src_path_bits & + slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & ((1 << ppd->lmc) - 1)); if (unlikely(ingress_pkey_check(ppd, pkey, sc5, qp->s_pkey_index, slid))) { hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, pkey, - ah_attr->sl, + rdma_ah_get_sl(ah_attr), sqp->ibqp.qp_num, qp->ibqp.qp_num, - slid, ah_attr->dlid); + slid, rdma_ah_get_dlid(ah_attr)); goto drop; } } @@ -131,13 +131,13 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) if (unlikely(qkey != qp->qkey)) { u16 lid; - lid = ppd->lid | (ah_attr->src_path_bits & + lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & ((1 << ppd->lmc) - 1)); hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey, - ah_attr->sl, + rdma_ah_get_sl(ah_attr), sqp->ibqp.qp_num, qp->ibqp.qp_num, lid, - ah_attr->dlid); + rdma_ah_get_dlid(ah_attr)); goto drop; } } @@ -183,11 +183,11 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) goto bail_unlock; } - if (ah_attr->ah_flags & IB_AH_GRH) { + if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { struct ib_grh grh; - struct ib_global_route grd = ah_attr->grh; + const struct ib_global_route *grd = rdma_ah_read_grh(ah_attr); - hfi1_make_grh(ibp, &grh, &grd, 0, 0); + hfi1_make_grh(ibp, &grh, grd, 0, 0); hfi1_copy_sge(&qp->r_sge, &grh, sizeof(grh), true, false); wc.wc_flags |= IB_WC_GRH; @@ -243,12 +243,13 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) } else { wc.pkey_index = 0; } - wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1)); + wc.slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & + ((1 << ppd->lmc) - 1)); /* Check for loopback when the port lid is not set */ if (wc.slid == 0 && sqp->ibqp.qp_type == IB_QPT_GSI) wc.slid = be16_to_cpu(IB_LID_PERMISSIVE); - wc.sl = ah_attr->sl; - wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1); + wc.sl = rdma_ah_get_sl(ah_attr); + wc.dlid_path_bits = rdma_ah_get_dlid(ah_attr) & ((1 << ppd->lmc) - 1); wc.port_num = qp->port_num; /* Signal completion event if the solicited bit is set. */ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, @@ -272,7 +273,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; struct ib_other_headers *ohdr; - struct ib_ah_attr *ah_attr; + struct rdma_ah_attr *ah_attr; struct hfi1_pportdata *ppd; struct hfi1_ibport *ibp; struct rvt_swqe *wqe; @@ -319,9 +320,9 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) ibp = to_iport(qp->ibqp.device, qp->port_num); ppd = ppd_from_ibp(ibp); ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr; - if (ah_attr->dlid < be16_to_cpu(IB_MULTICAST_LID_BASE) || - ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) { - lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); + if (rdma_ah_get_dlid(ah_attr) < be16_to_cpu(IB_MULTICAST_LID_BASE) || + rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)) { + lid = rdma_ah_get_dlid(ah_attr) & ~((1 << ppd->lmc) - 1); if (unlikely(!loopback && (lid == ppd->lid || (lid == be16_to_cpu(IB_LID_PERMISSIVE) && @@ -356,7 +357,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_hdrwords = 7; ps->s_txreq->s_cur_size = wqe->length; ps->s_txreq->ss = &qp->s_sge; - qp->s_srate = ah_attr->static_rate; + qp->s_srate = rdma_ah_get_static_rate(ah_attr); qp->srate_mbps = ib_rate_to_mbps(qp->s_srate); qp->s_wqe = wqe; qp->s_sge.sge = wqe->sg_list[0]; @@ -364,11 +365,11 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_sge.num_sge = wqe->wr.num_sge; qp->s_sge.total_len = wqe->length; - if (ah_attr->ah_flags & IB_AH_GRH) { + if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { /* Header size in 32-bit words. */ qp->s_hdrwords += hfi1_make_grh(ibp, &ps->s_txreq->phdr.hdr.u.l.grh, - &ah_attr->grh, + rdma_ah_read_grh(ah_attr), qp->s_hdrwords, nwords); lrh0 = HFI1_LRH_GRH; ohdr = &ps->s_txreq->phdr.hdr.u.l.oth; @@ -388,8 +389,8 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) } else { bth0 = IB_OPCODE_UD_SEND_ONLY << 24; } - sc5 = ibp->sl_to_sc[ah_attr->sl]; - lrh0 |= (ah_attr->sl & 0xf) << 4; + sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)]; + lrh0 |= (rdma_ah_get_sl(ah_attr) & 0xf) << 4; if (qp->ibqp.qp_type == IB_QPT_SMI) { lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */ priv->s_sc = 0xf; @@ -402,15 +403,17 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); ps->s_txreq->psc = priv->s_sendcontext; ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0); - ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); + ps->s_txreq->phdr.hdr.lrh[1] = + cpu_to_be16(rdma_ah_get_dlid(ah_attr)); ps->s_txreq->phdr.hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - if (ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) { + if (rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)) { ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE; } else { lid = ppd->lid; if (lid) { - lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1); + lid |= rdma_ah_get_path_bits(ah_attr) & + ((1 << ppd->lmc) - 1); ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(lid); } else { ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE; @@ -537,7 +540,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, bth0 = pkey | (IB_OPCODE_CNP << 24); ohdr->bth[0] = cpu_to_be32(bth0); - ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << HFI1_BECN_SHIFT)); + ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << IB_BECN_SHIFT)); ohdr->bth[2] = 0; /* PSN 0 */ hdr.lrh[0] = cpu_to_be16(lrh0); @@ -680,7 +683,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; bool has_grh = rcv_flags & HFI1_HAS_GRH; - u8 sc5 = hdr2sc(hdr, packet->rhf); + u8 sc5 = hfi1_9B_get_sc5(hdr, packet->rhf); u32 bth1; u8 sl_from_sc, sl; u16 slid; @@ -688,18 +691,16 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) qkey = be32_to_cpu(ohdr->u.ud.deth[0]); src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; - dlid = be16_to_cpu(hdr->lrh[1]); + dlid = ib_get_dlid(hdr); bth1 = be32_to_cpu(ohdr->bth[1]); - slid = be16_to_cpu(hdr->lrh[3]); - pkey = (u16)be32_to_cpu(ohdr->bth[0]); - sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; - extra_bytes = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + slid = ib_get_slid(hdr); + pkey = ib_bth_get_pkey(ohdr); + opcode = ib_bth_get_opcode(ohdr); + sl = ib_get_sl(hdr); + extra_bytes = ib_bth_get_pad(ohdr); extra_bytes += (SIZE_OF_CRC << 2); sl_from_sc = ibp->sc_to_sl[sc5]; - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; - opcode &= 0xff; - process_ecn(qp, packet, (opcode != IB_OPCODE_CNP)); /* * Get the number of bytes the message was padded by diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 4a8295399e71..a8f0aa4722f6 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -53,7 +53,7 @@ struct tid_group { struct list_head list; - unsigned base; + u32 base; u8 size; u8 used; u8 map; @@ -82,20 +82,25 @@ struct tid_pageset { (unsigned long)(len) - 1) & PAGE_MASK) - \ ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT)) -static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *, - struct hfi1_filedata *); -static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *); -static int set_rcvarray_entry(struct file *, unsigned long, u32, - struct tid_group *, struct page **, unsigned); -static int tid_rb_insert(void *, struct mmu_rb_node *); +static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, + struct exp_tid_set *set, + struct hfi1_filedata *fd); +static u32 find_phys_blocks(struct page **pages, unsigned npages, + struct tid_pageset *list); +static int set_rcvarray_entry(struct hfi1_filedata *fd, unsigned long vaddr, + u32 rcventry, struct tid_group *grp, + struct page **pages, unsigned npages); +static int tid_rb_insert(void *arg, struct mmu_rb_node *node); static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, struct tid_rb_node *tnode); -static void tid_rb_remove(void *, struct mmu_rb_node *); -static int tid_rb_invalidate(void *, struct mmu_rb_node *); -static int program_rcvarray(struct file *, unsigned long, struct tid_group *, - struct tid_pageset *, unsigned, u16, struct page **, - u32 *, unsigned *, unsigned *); -static int unprogram_rcvarray(struct file *, u32, struct tid_group **); +static void tid_rb_remove(void *arg, struct mmu_rb_node *node); +static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode); +static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr, + struct tid_group *grp, struct tid_pageset *sets, + unsigned start, u16 count, struct page **pages, + u32 *tidlist, unsigned *tididx, unsigned *pmapped); +static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, + struct tid_group **grp); static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); static struct mmu_rb_ops tid_rb_ops = { @@ -149,62 +154,72 @@ static inline void tid_group_move(struct tid_group *group, tid_group_add_tail(group, s2); } +int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd) +{ + struct hfi1_ctxtdata *uctxt = fd->uctxt; + struct hfi1_devdata *dd = fd->dd; + u32 tidbase; + u32 i; + struct tid_group *grp, *gptr; + + exp_tid_group_init(&uctxt->tid_group_list); + exp_tid_group_init(&uctxt->tid_used_list); + exp_tid_group_init(&uctxt->tid_full_list); + + tidbase = uctxt->expected_base; + for (i = 0; i < uctxt->expected_count / + dd->rcv_entries.group_size; i++) { + grp = kzalloc(sizeof(*grp), GFP_KERNEL); + if (!grp) + goto grp_failed; + + grp->size = dd->rcv_entries.group_size; + grp->base = tidbase; + tid_group_add_tail(grp, &uctxt->tid_group_list); + tidbase += dd->rcv_entries.group_size; + } + + return 0; + +grp_failed: + list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, + list) { + list_del_init(&grp->list); + kfree(grp); + } + + return -ENOMEM; +} + /* * Initialize context and file private data needed for Expected * receive caching. This needs to be done after the context has * been configured with the eager/expected RcvEntry counts. */ -int hfi1_user_exp_rcv_init(struct file *fp) +int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd) { - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; - unsigned tidbase; - int i, ret = 0; + int ret = 0; spin_lock_init(&fd->tid_lock); spin_lock_init(&fd->invalid_lock); - if (!uctxt->subctxt_cnt || !fd->subctxt) { - exp_tid_group_init(&uctxt->tid_group_list); - exp_tid_group_init(&uctxt->tid_used_list); - exp_tid_group_init(&uctxt->tid_full_list); - - tidbase = uctxt->expected_base; - for (i = 0; i < uctxt->expected_count / - dd->rcv_entries.group_size; i++) { - struct tid_group *grp; - - grp = kzalloc(sizeof(*grp), GFP_KERNEL); - if (!grp) { - /* - * If we fail here, the groups already - * allocated will be freed by the close - * call. - */ - ret = -ENOMEM; - goto done; - } - grp->size = dd->rcv_entries.group_size; - grp->base = tidbase; - tid_group_add_tail(grp, &uctxt->tid_group_list); - tidbase += dd->rcv_entries.group_size; - } - } - fd->entry_to_rb = kcalloc(uctxt->expected_count, - sizeof(struct rb_node *), - GFP_KERNEL); + sizeof(struct rb_node *), + GFP_KERNEL); if (!fd->entry_to_rb) return -ENOMEM; if (!HFI1_CAP_UGET_MASK(uctxt->flags, TID_UNMAP)) { fd->invalid_tid_idx = 0; - fd->invalid_tids = kzalloc(uctxt->expected_count * - sizeof(u32), GFP_KERNEL); + fd->invalid_tids = kcalloc(uctxt->expected_count, + sizeof(*fd->invalid_tids), + GFP_KERNEL); if (!fd->invalid_tids) { - ret = -ENOMEM; - goto done; + kfree(fd->entry_to_rb); + fd->entry_to_rb = NULL; + return -ENOMEM; } /* @@ -247,41 +262,44 @@ int hfi1_user_exp_rcv_init(struct file *fp) fd->tid_limit = uctxt->expected_count; } spin_unlock(&fd->tid_lock); -done: + return ret; } -int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) +void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt) { - struct hfi1_ctxtdata *uctxt = fd->uctxt; struct tid_group *grp, *gptr; - if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags)) - return 0; + list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, + list) { + list_del_init(&grp->list); + kfree(grp); + } + hfi1_clear_tids(uctxt); +} + +void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) +{ + struct hfi1_ctxtdata *uctxt = fd->uctxt; + /* * The notifier would have been removed when the process'es mm * was freed. */ - if (fd->handler) + if (fd->handler) { hfi1_mmu_rb_unregister(fd->handler); - - kfree(fd->invalid_tids); - - if (!uctxt->cnt) { + } else { if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); - list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, - list) { - list_del_init(&grp->list); - kfree(grp); - } - hfi1_clear_tids(uctxt); } + kfree(fd->invalid_tids); + fd->invalid_tids = NULL; + kfree(fd->entry_to_rb); - return 0; + fd->entry_to_rb = NULL; } /* @@ -350,10 +368,10 @@ static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index) * can fit into the group. If the group becomes fully * used, move it to tid_full_list. */ -int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo) +int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, + struct hfi1_tid_info *tinfo) { int ret = 0, need_group = 0, pinned; - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; unsigned npages, ngroups, pageidx = 0, pageset_count, npagesets, @@ -450,7 +468,7 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo) struct tid_group *grp = tid_group_pop(&uctxt->tid_group_list); - ret = program_rcvarray(fp, vaddr, grp, pagesets, + ret = program_rcvarray(fd, vaddr, grp, pagesets, pageidx, dd->rcv_entries.group_size, pages, tidlist, &tididx, &mapped); /* @@ -496,7 +514,7 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo) unsigned use = min_t(unsigned, pageset_count - pageidx, grp->size - grp->used); - ret = program_rcvarray(fp, vaddr, grp, pagesets, + ret = program_rcvarray(fd, vaddr, grp, pagesets, pageidx, use, pages, tidlist, &tididx, &mapped); if (ret < 0) { @@ -546,7 +564,7 @@ nomem: * everything done so far so we don't leak resources. */ tinfo->tidlist = (unsigned long)&tidlist; - hfi1_user_exp_rcv_clear(fp, tinfo); + hfi1_user_exp_rcv_clear(fd, tinfo); tinfo->tidlist = 0; ret = -EFAULT; goto bail; @@ -570,14 +588,17 @@ bail: return ret > 0 ? 0 : ret; } -int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo) +int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, + struct hfi1_tid_info *tinfo) { int ret = 0; - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; u32 *tidinfo; unsigned tididx; + if (unlikely(tinfo->tidcnt > fd->tid_used)) + return -EINVAL; + tidinfo = memdup_user((void __user *)(unsigned long)tinfo->tidlist, sizeof(tidinfo[0]) * tinfo->tidcnt); if (IS_ERR(tidinfo)) @@ -585,7 +606,7 @@ int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo) mutex_lock(&uctxt->exp_lock); for (tididx = 0; tididx < tinfo->tidcnt; tididx++) { - ret = unprogram_rcvarray(fp, tidinfo[tididx], NULL); + ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL); if (ret) { hfi1_cdbg(TID, "Failed to unprogram rcv array %d", ret); @@ -602,12 +623,12 @@ int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo) return ret; } -int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo) +int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd, + struct hfi1_tid_info *tinfo) { - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; unsigned long *ev = uctxt->dd->events + - (((uctxt->ctxt - uctxt->dd->first_user_ctxt) * + (((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) * HFI1_MAX_SHARED_CTXTS) + fd->subctxt); u32 *array; int ret = 0; @@ -719,7 +740,7 @@ static u32 find_phys_blocks(struct page **pages, unsigned npages, /** * program_rcvarray() - program an RcvArray group with receive buffers - * @fp: file pointer + * @fd: filedata pointer * @vaddr: starting user virtual address * @grp: RcvArray group * @sets: array of struct tid_pageset holding information on physically @@ -744,13 +765,12 @@ static u32 find_phys_blocks(struct page **pages, unsigned npages, * -ENOMEM or -EFAULT on error from set_rcvarray_entry(), or * number of RcvArray entries programmed. */ -static int program_rcvarray(struct file *fp, unsigned long vaddr, +static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr, struct tid_group *grp, struct tid_pageset *sets, unsigned start, u16 count, struct page **pages, u32 *tidlist, unsigned *tididx, unsigned *pmapped) { - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; u16 idx; @@ -791,7 +811,7 @@ static int program_rcvarray(struct file *fp, unsigned long vaddr, npages = sets[setidx].count; pageidx = sets[setidx].idx; - ret = set_rcvarray_entry(fp, vaddr + (pageidx * PAGE_SIZE), + ret = set_rcvarray_entry(fd, vaddr + (pageidx * PAGE_SIZE), rcventry, grp, pages + pageidx, npages); if (ret) @@ -813,12 +833,11 @@ static int program_rcvarray(struct file *fp, unsigned long vaddr, return idx; } -static int set_rcvarray_entry(struct file *fp, unsigned long vaddr, +static int set_rcvarray_entry(struct hfi1_filedata *fd, unsigned long vaddr, u32 rcventry, struct tid_group *grp, struct page **pages, unsigned npages) { int ret; - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct tid_rb_node *node; struct hfi1_devdata *dd = uctxt->dd; @@ -872,10 +891,9 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr, return 0; } -static int unprogram_rcvarray(struct file *fp, u32 tidinfo, +static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, struct tid_group **grp) { - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; struct tid_rb_node *node; @@ -1011,8 +1029,8 @@ static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode) * process in question. */ ev = uctxt->dd->events + - (((uctxt->ctxt - uctxt->dd->first_user_ctxt) * - HFI1_MAX_SHARED_CTXTS) + fdata->subctxt); + (((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) * + HFI1_MAX_SHARED_CTXTS) + fdata->subctxt); set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev); } fdata->invalid_tid_idx++; diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 9bc8d9fba87e..5250c897298d 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -1,7 +1,7 @@ #ifndef _HFI1_USER_EXP_RCV_H #define _HFI1_USER_EXP_RCV_H /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -70,10 +70,15 @@ (tid) |= EXP_TID_SET(field, (value)); \ } while (0) -int hfi1_user_exp_rcv_init(struct file *); -int hfi1_user_exp_rcv_free(struct hfi1_filedata *); -int hfi1_user_exp_rcv_setup(struct file *, struct hfi1_tid_info *); -int hfi1_user_exp_rcv_clear(struct file *, struct hfi1_tid_info *); -int hfi1_user_exp_rcv_invalid(struct file *, struct hfi1_tid_info *); +void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt); +int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd); +int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd); +void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd); +int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, + struct hfi1_tid_info *tinfo); +int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, + struct hfi1_tid_info *tinfo); +int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd, + struct hfi1_tid_info *tinfo); #endif /* _HFI1_USER_EXP_RCV_H */ diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c index 68295a12b771..e341e6dcc388 100644 --- a/drivers/infiniband/hw/hfi1/user_pages.c +++ b/drivers/infiniband/hw/hfi1/user_pages.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015-2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -73,7 +73,8 @@ bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm, { unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit, size = (cache_size * (1UL << 20)); /* convert to bytes */ - unsigned usr_ctxts = dd->num_rcv_contexts - dd->first_user_ctxt; + unsigned int usr_ctxts = + dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt; bool can_lock = capable(CAP_IPC_LOCK); /* diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index e6811c4edc73..d55339f5d73b 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -143,7 +143,9 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 /* KDETH OM multipliers and switch over point */ #define KDETH_OM_SMALL 4 +#define KDETH_OM_SMALL_SHIFT 2 #define KDETH_OM_LARGE 64 +#define KDETH_OM_LARGE_SHIFT 6 #define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1)) /* Tx request flag bits */ @@ -153,9 +155,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 /* SDMA request flag bits */ #define SDMA_REQ_FOR_THREAD 1 #define SDMA_REQ_SEND_DONE 2 -#define SDMA_REQ_HAVE_AHG 3 -#define SDMA_REQ_HAS_ERROR 4 -#define SDMA_REQ_DONE_ERROR 5 +#define SDMA_REQ_HAS_ERROR 3 +#define SDMA_REQ_DONE_ERROR 4 #define SDMA_PKT_Q_INACTIVE BIT(0) #define SDMA_PKT_Q_ACTIVE BIT(1) @@ -214,7 +215,7 @@ struct user_sdma_request { * each request will need it's own engine pointer. */ struct sdma_engine *sde; - u8 ahg_idx; + s8 ahg_idx; u32 ahg[9]; /* * KDETH.Offset (Eager) field @@ -229,12 +230,6 @@ struct user_sdma_request { */ u32 tidoffset; /* - * KDETH.OM - * Remember this because the header template always sets it - * to 0. - */ - u8 omfactor; - /* * We copy the iovs for this request (based on * info.iovcnt). These are only the data vectors */ @@ -284,39 +279,43 @@ struct user_sdma_txreq { hfi1_cdbg(SDMA, "[%u:%u:%u] " fmt, (pq)->dd->unit, (pq)->ctxt, \ (pq)->subctxt, ##__VA_ARGS__) -static int user_sdma_send_pkts(struct user_sdma_request *, unsigned); -static int num_user_pages(const struct iovec *); -static void user_sdma_txreq_cb(struct sdma_txreq *, int); -static inline void pq_update(struct hfi1_user_sdma_pkt_q *); -static void user_sdma_free_request(struct user_sdma_request *, bool); -static int pin_vector_pages(struct user_sdma_request *, - struct user_sdma_iovec *); -static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned, - unsigned); -static int check_header_template(struct user_sdma_request *, - struct hfi1_pkt_header *, u32, u32); -static int set_txreq_header(struct user_sdma_request *, - struct user_sdma_txreq *, u32); -static int set_txreq_header_ahg(struct user_sdma_request *, - struct user_sdma_txreq *, u32); -static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *, - struct hfi1_user_sdma_comp_q *, - u16, enum hfi1_sdma_comp_state, int); -static inline u32 set_pkt_bth_psn(__be32, u8, u32); +static int user_sdma_send_pkts(struct user_sdma_request *req, + unsigned maxpkts); +static int num_user_pages(const struct iovec *iov); +static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status); +static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq); +static void user_sdma_free_request(struct user_sdma_request *req, bool unpin); +static int pin_vector_pages(struct user_sdma_request *req, + struct user_sdma_iovec *iovec); +static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, + unsigned start, unsigned npages); +static int check_header_template(struct user_sdma_request *req, + struct hfi1_pkt_header *hdr, u32 lrhlen, + u32 datalen); +static int set_txreq_header(struct user_sdma_request *req, + struct user_sdma_txreq *tx, u32 datalen); +static int set_txreq_header_ahg(struct user_sdma_request *req, + struct user_sdma_txreq *tx, u32 len); +static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, + struct hfi1_user_sdma_comp_q *cq, + u16 idx, enum hfi1_sdma_comp_state state, + int ret); +static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags); static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len); static int defer_packet_queue( - struct sdma_engine *, - struct iowait *, - struct sdma_txreq *, - unsigned seq); -static void activate_packet_queue(struct iowait *, int); -static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long); -static int sdma_rb_insert(void *, struct mmu_rb_node *); + struct sdma_engine *sde, + struct iowait *wait, + struct sdma_txreq *txreq, + unsigned int seq); +static void activate_packet_queue(struct iowait *wait, int reason); +static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, + unsigned long len); +static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode); static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, void *arg2, bool *stop); -static void sdma_rb_remove(void *, struct mmu_rb_node *); -static int sdma_rb_invalidate(void *, struct mmu_rb_node *); +static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode); +static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode); static struct mmu_rb_ops sdma_rb_ops = { .filter = sdma_rb_filter, @@ -372,44 +371,27 @@ static void sdma_kmem_cache_ctor(void *obj) memset(tx, 0, sizeof(*tx)); } -int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) +int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, + struct hfi1_filedata *fd) { - struct hfi1_filedata *fd; - int ret = 0; - unsigned memsize; + int ret = -ENOMEM; char buf[64]; struct hfi1_devdata *dd; struct hfi1_user_sdma_comp_q *cq; struct hfi1_user_sdma_pkt_q *pq; unsigned long flags; - if (!uctxt || !fp) { - ret = -EBADF; - goto done; - } + if (!uctxt || !fd) + return -EBADF; - fd = fp->private_data; - - if (!hfi1_sdma_comp_ring_size) { - ret = -EINVAL; - goto done; - } + if (!hfi1_sdma_comp_ring_size) + return -EINVAL; dd = uctxt->dd; pq = kzalloc(sizeof(*pq), GFP_KERNEL); if (!pq) - goto pq_nomem; - - memsize = sizeof(*pq->reqs) * hfi1_sdma_comp_ring_size; - pq->reqs = kzalloc(memsize, GFP_KERNEL); - if (!pq->reqs) - goto pq_reqs_nomem; - - memsize = BITS_TO_LONGS(hfi1_sdma_comp_ring_size) * sizeof(long); - pq->req_in_use = kzalloc(memsize, GFP_KERNEL); - if (!pq->req_in_use) - goto pq_reqs_no_in_use; + return -ENOMEM; INIT_LIST_HEAD(&pq->list); pq->dd = dd; @@ -425,10 +407,23 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) iowait_init(&pq->busy, 0, NULL, defer_packet_queue, activate_packet_queue, NULL); pq->reqidx = 0; + + pq->reqs = kcalloc(hfi1_sdma_comp_ring_size, + sizeof(*pq->reqs), + GFP_KERNEL); + if (!pq->reqs) + goto pq_reqs_nomem; + + pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size), + sizeof(*pq->req_in_use), + GFP_KERNEL); + if (!pq->req_in_use) + goto pq_reqs_no_in_use; + snprintf(buf, 64, "txreq-kmem-cache-%u-%u-%u", dd->unit, uctxt->ctxt, fd->subctxt); pq->txreq_cache = kmem_cache_create(buf, - sizeof(struct user_sdma_txreq), + sizeof(struct user_sdma_txreq), L1_CACHE_BYTES, SLAB_HWCACHE_ALIGN, sdma_kmem_cache_ctor); @@ -437,31 +432,36 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) uctxt->ctxt); goto pq_txreq_nomem; } - fd->pq = pq; + cq = kzalloc(sizeof(*cq), GFP_KERNEL); if (!cq) goto cq_nomem; - memsize = PAGE_ALIGN(sizeof(*cq->comps) * hfi1_sdma_comp_ring_size); - cq->comps = vmalloc_user(memsize); + cq->comps = vmalloc_user(PAGE_ALIGN(sizeof(*cq->comps) + * hfi1_sdma_comp_ring_size)); if (!cq->comps) goto cq_comps_nomem; cq->nentries = hfi1_sdma_comp_ring_size; - fd->cq = cq; ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, dd->pport->hfi1_wq, &pq->handler); if (ret) { dd_dev_err(dd, "Failed to register with MMU %d", ret); - goto done; + goto pq_mmu_fail; } + fd->pq = pq; + fd->cq = cq; + spin_lock_irqsave(&uctxt->sdma_qlock, flags); list_add(&pq->list, &uctxt->sdma_queues); spin_unlock_irqrestore(&uctxt->sdma_qlock, flags); - goto done; + return 0; + +pq_mmu_fail: + vfree(cq->comps); cq_comps_nomem: kfree(cq); cq_nomem: @@ -472,10 +472,7 @@ pq_reqs_no_in_use: kfree(pq->reqs); pq_reqs_nomem: kfree(pq); - fd->pq = NULL; -pq_nomem: - ret = -ENOMEM; -done: + return ret; } @@ -535,11 +532,11 @@ static u8 dlid_to_selector(u16 dlid) return mapping[hash]; } -int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, - unsigned long dim, unsigned long *count) +int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, + struct iovec *iovec, unsigned long dim, + unsigned long *count) { int ret = 0, i; - struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_user_sdma_pkt_q *pq = fd->pq; struct hfi1_user_sdma_comp_q *cq = fd->cq; @@ -615,6 +612,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, req->pq = pq; req->cq = cq; req->status = -1; + req->ahg_idx = -1; INIT_LIST_HEAD(&req->txps); memcpy(&req->info, &info, sizeof(info)); @@ -704,7 +702,9 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, /* Save all the IO vector structures */ for (i = 0; i < req->data_iovs; i++) { INIT_LIST_HEAD(&req->iovs[i].list); - memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec)); + memcpy(&req->iovs[i].iov, + iovec + idx++, + sizeof(req->iovs[i].iov)); ret = pin_vector_pages(req, &req->iovs[i]); if (ret) { req->status = ret; @@ -763,14 +763,8 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, } /* We don't need an AHG entry if the request contains only one packet */ - if (req->info.npkts > 1 && HFI1_CAP_IS_USET(SDMA_AHG)) { - int ahg = sdma_ahg_alloc(req->sde); - - if (likely(ahg >= 0)) { - req->ahg_idx = (u8)ahg; - set_bit(SDMA_REQ_HAVE_AHG, &req->flags); - } - } + if (req->info.npkts > 1 && HFI1_CAP_IS_USET(SDMA_AHG)) + req->ahg_idx = sdma_ahg_alloc(req->sde); set_comp_state(pq, cq, info.comp_idx, QUEUED, 0); atomic_inc(&pq->n_reqs); @@ -988,7 +982,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) } } - if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) { + if (req->ahg_idx >= 0) { if (!req->seqnum) { u16 pbclen = le16_to_cpu(req->hdr.pbc[0]); u32 lrhlen = get_lrh_len(req->hdr, @@ -1118,7 +1112,7 @@ dosend: * happen due to the sequential manner in which * descriptors are processed. */ - if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) + if (req->ahg_idx >= 0) sdma_ahg_free(req->sde, req->ahg_idx); } return ret; @@ -1320,6 +1314,7 @@ static int set_txreq_header(struct user_sdma_request *req, { struct hfi1_user_sdma_pkt_q *pq = req->pq; struct hfi1_pkt_header *hdr = &tx->hdr; + u8 omfactor; /* KDETH.OM */ u16 pbclen; int ret; u32 tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen)); @@ -1397,8 +1392,9 @@ static int set_txreq_header(struct user_sdma_request *req, } tidval = req->tids[req->tididx]; } - req->omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >= - KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE : KDETH_OM_SMALL; + omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >= + KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE_SHIFT : + KDETH_OM_SMALL_SHIFT; /* Set KDETH.TIDCtrl based on value for this TID. */ KDETH_SET(hdr->kdeth.ver_tid_offset, TIDCTRL, EXP_TID_GET(tidval, CTRL)); @@ -1413,12 +1409,12 @@ static int set_txreq_header(struct user_sdma_request *req, * transfer. */ SDMA_DBG(req, "TID offset %ubytes %uunits om%u", - req->tidoffset, req->tidoffset / req->omfactor, - req->omfactor != KDETH_OM_SMALL); + req->tidoffset, req->tidoffset >> omfactor, + omfactor != KDETH_OM_SMALL_SHIFT); KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET, - req->tidoffset / req->omfactor); + req->tidoffset >> omfactor); KDETH_SET(hdr->kdeth.ver_tid_offset, OM, - req->omfactor != KDETH_OM_SMALL); + omfactor != KDETH_OM_SMALL_SHIFT); } done: trace_hfi1_sdma_user_header(pq->dd, pq->ctxt, pq->subctxt, @@ -1430,6 +1426,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, struct user_sdma_txreq *tx, u32 len) { int diff = 0; + u8 omfactor; /* KDETH.OM */ struct hfi1_user_sdma_pkt_q *pq = req->pq; struct hfi1_pkt_header *hdr = &req->hdr; u16 pbclen = le16_to_cpu(hdr->pbc[0]); @@ -1481,14 +1478,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, } tidval = req->tids[req->tididx]; } - req->omfactor = ((EXP_TID_GET(tidval, LEN) * + omfactor = ((EXP_TID_GET(tidval, LEN) * PAGE_SIZE) >= - KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE : - KDETH_OM_SMALL; + KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT : + KDETH_OM_SMALL_SHIFT; /* KDETH.OM and KDETH.OFFSET (TID) */ AHG_HEADER_SET(req->ahg, diff, 7, 0, 16, - ((!!(req->omfactor - KDETH_OM_SMALL)) << 15 | - ((req->tidoffset / req->omfactor) & 0x7fff))); + ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 | + ((req->tidoffset >> omfactor) + & 0x7fff))); /* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */ val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) | (EXP_TID_GET(tidval, IDX) & 0x3ff)); @@ -1615,9 +1613,10 @@ static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, { hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d", pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret); - cq->comps[idx].status = state; if (state == ERROR) cq->comps[idx].errcode = -ret; + smp_wmb(); /* make sure errcode is visible first */ + cq->comps[idx].status = state; trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt, idx, state, ret); } diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 39001714f551..e5b10aefe212 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -1,5 +1,7 @@ +#ifndef _HFI1_USER_SDMA_H +#define _HFI1_USER_SDMA_H /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -56,7 +58,7 @@ extern uint extended_psn; struct hfi1_user_sdma_pkt_q { struct list_head list; unsigned ctxt; - unsigned subctxt; + u16 subctxt; u16 n_max_reqs; atomic_t n_reqs; u16 reqidx; @@ -78,7 +80,11 @@ struct hfi1_user_sdma_comp_q { struct hfi1_sdma_comp_entry *comps; }; -int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *, struct file *); -int hfi1_user_sdma_free_queues(struct hfi1_filedata *); -int hfi1_user_sdma_process_request(struct file *, struct iovec *, unsigned long, - unsigned long *); +int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, + struct hfi1_filedata *fd); +int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd); +int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, + struct iovec *iovec, unsigned long dim, + unsigned long *count); + +#endif /* _HFI1_USER_SDMA_H */ diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 222315fadab1..90e7b77d68e8 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -60,6 +60,8 @@ #include "trace.h" #include "qp.h" #include "verbs_txreq.h" +#include "debugfs.h" +#include "vnic.h" static unsigned int hfi1_lkey_table_size = 16; module_param_named(lkey_table_size, hfi1_lkey_table_size, uint, @@ -297,6 +299,22 @@ static inline bool wss_exceeds_threshold(void) } /* + * Translate ib_wr_opcode into ib_wc_opcode. + */ +const enum ib_wc_opcode ib_hfi1_wc_opcode[] = { + [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, + [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, + [IB_WR_SEND] = IB_WC_SEND, + [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, + [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, + [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, + [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD, + [IB_WR_SEND_WITH_INV] = IB_WC_SEND, + [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV, + [IB_WR_REG_MR] = IB_WC_REG_MR +}; + +/* * Length of header by opcode, 0 --> not supported */ const u8 hdr_len_by_opcode[256] = { @@ -501,6 +519,35 @@ static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet) return NULL; } +static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc) +{ +#ifdef CONFIG_FAULT_INJECTION + if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP) + /* + * In order to drop non-IB traffic we + * set PbcInsertHrc to NONE (0x2). + * The packet will still be delivered + * to the receiving node but a + * KHdrHCRCErr (KDETH packet with a bad + * HCRC) will be triggered and the + * packet will not be delivered to the + * correct context. + */ + pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT; + else + /* + * In order to drop regular verbs + * traffic we set the PbcTestEbp + * flag. The packet will still be + * delivered to the receiving node but + * a 'late ebp error' will be + * triggered and will be dropped. + */ + pbc |= PBC_TEST_EBP; +#endif + return pbc; +} + /** * hfi1_ib_rcv - process an incoming packet * @packet: data packet information @@ -525,7 +572,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) u16 lid; /* Check for GRH */ - lnh = be16_to_cpu(hdr->lrh[0]) & 3; + lnh = ib_get_lnh(hdr); if (lnh == HFI1_LRH_BTH) { packet->ohdr = &hdr->u.oth; } else if (lnh == HFI1_LRH_GRH) { @@ -544,12 +591,12 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) trace_input_ibhdr(rcd->dd, hdr); - opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24); + opcode = ib_bth_get_opcode(packet->ohdr); inc_opstats(tlen, &rcd->opstats->stats[opcode]); /* Get the destination QP number. */ qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK; - lid = be16_to_cpu(hdr->lrh[1]); + lid = ib_get_dlid(hdr); if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) { struct rvt_mcast *mcast; @@ -557,7 +604,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) if (lnh != HFI1_LRH_GRH) goto drop; - mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid); + mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid, lid); if (!mcast) goto drop; list_for_each_entry_rcu(p, &mcast->qp_list, list) { @@ -583,6 +630,11 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) rcu_read_unlock(); goto drop; } + if (unlikely(hfi1_dbg_fault_opcode(packet->qp, opcode, + true))) { + rcu_read_unlock(); + goto drop; + } spin_lock_irqsave(&packet->qp->r_lock, flags); packet_handler = qp_ok(opcode, packet); if (likely(packet_handler)) @@ -781,7 +833,6 @@ static int build_verbs_tx_desc( if (ret) goto bail_txadd; } - /* add the ulp payload - if any. tx->ss can be NULL for acks */ if (tx->ss) ret = build_verbs_ulp_payload(sde, length, tx); @@ -800,7 +851,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct hfi1_ibdev *dev = ps->dev; struct hfi1_pportdata *ppd = ps->ppd; struct verbs_txreq *tx; - u64 pbc_flags = 0; u8 sc5 = priv->s_sc; int ret; @@ -809,12 +859,16 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (!sdma_txreq_built(&tx->txreq)) { if (likely(pbc == 0)) { u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); + u8 opcode = get_opcode(&tx->phdr.hdr); + /* No vl15 here */ /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; + pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; + if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false))) + pbc = hfi1_fault_tx(qp, opcode, pbc); pbc = create_pbc(ppd, - pbc_flags, + pbc, qp->srate_mbps, vl, plen); @@ -917,7 +971,6 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u32 plen = hdrwords + dwords + 2; /* includes pbc */ struct hfi1_pportdata *ppd = ps->ppd; u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr; - u64 pbc_flags = 0; u8 sc5; unsigned long flags = 0; struct send_context *sc; @@ -942,9 +995,14 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (likely(pbc == 0)) { u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); + struct verbs_txreq *tx = ps->s_txreq; + u8 opcode = get_opcode(&tx->phdr.hdr); + /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; - pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); + pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; + if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false))) + pbc = hfi1_fault_tx(qp, opcode, pbc); + pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen); } if (cb) iowait_pio_inc(&priv->s_iowait); @@ -1173,7 +1231,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) hdr = &ps->s_txreq->phdr.hdr; /* locate the pkey within the headers */ - lnh = be16_to_cpu(hdr->lrh[0]) & 3; + lnh = ib_get_lnh(hdr); if (lnh == HFI1_LRH_GRH) ohdr = &hdr->u.l.oth; else @@ -1220,17 +1278,20 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) static void hfi1_fill_device_attr(struct hfi1_devdata *dd) { struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; - u16 ver = dd->dc8051_ver; + u32 ver = dd->dc8051_ver; memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props)); - rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 16) | - (u64)dc8051_ver_min(ver); + rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 32) | + ((u64)(dc8051_ver_min(ver)) << 16) | + (u64)dc8051_ver_patch(ver); + rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE | - IB_DEVICE_MEM_MGT_EXTENSIONS; + IB_DEVICE_MEM_MGT_EXTENSIONS | + IB_DEVICE_RDMA_NETDEV_OPA_VNIC; rdi->dparms.props.page_size_cap = PAGE_SIZE; rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3; rdi->dparms.props.vendor_part_id = dd->pcidev->device; @@ -1398,14 +1459,14 @@ static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, /* * convert ah port,sl to sc */ -u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah) +u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah) { - struct hfi1_ibport *ibp = to_iport(ibdev, ah->port_num); + struct hfi1_ibport *ibp = to_iport(ibdev, rdma_ah_get_port_num(ah)); - return ibp->sl_to_sc[ah->sl]; + return ibp->sl_to_sc[rdma_ah_get_sl(ah)]; } -static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr) +static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr) { struct hfi1_ibport *ibp; struct hfi1_pportdata *ppd; @@ -1413,9 +1474,9 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr) u8 sc5; /* test the mapping for validity */ - ibp = to_iport(ibdev, ah_attr->port_num); + ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr)); ppd = ppd_from_ibp(ibp); - sc5 = ibp->sl_to_sc[ah_attr->sl]; + sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)]; dd = dd_from_ppd(ppd); if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf) return -EINVAL; @@ -1423,7 +1484,7 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr) } static void hfi1_notify_new_ah(struct ib_device *ibdev, - struct ib_ah_attr *ah_attr, + struct rdma_ah_attr *ah_attr, struct rvt_ah *ah) { struct hfi1_ibport *ibp; @@ -1436,9 +1497,9 @@ static void hfi1_notify_new_ah(struct ib_device *ibdev, * done being setup. We can however modify things which we need to set. */ - ibp = to_iport(ibdev, ah_attr->port_num); + ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr)); ppd = ppd_from_ibp(ibp); - sc5 = ibp->sl_to_sc[ah->attr.sl]; + sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)]; dd = dd_from_ppd(ppd); ah->vl = sc_to_vlt(dd, sc5); if (ah->vl < num_vls || ah->vl == 15) @@ -1447,17 +1508,21 @@ static void hfi1_notify_new_ah(struct ib_device *ibdev, struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid) { - struct ib_ah_attr attr; + struct rdma_ah_attr attr; struct ib_ah *ah = ERR_PTR(-EINVAL); struct rvt_qp *qp0; + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + struct hfi1_devdata *dd = dd_from_ppd(ppd); + u8 port_num = ppd->port; memset(&attr, 0, sizeof(attr)); - attr.dlid = dlid; - attr.port_num = ppd_from_ibp(ibp)->port; + attr.type = rdma_ah_find_type(&dd->verbs_dev.rdi.ibdev, port_num); + rdma_ah_set_dlid(&attr, dlid); + rdma_ah_set_port_num(&attr, ppd_from_ibp(ibp)->port); rcu_read_lock(); qp0 = rcu_dereference(ibp->rvp.qp[0]); if (qp0) - ah = ib_create_ah(qp0->ibqp.pd, &attr); + ah = rdma_create_ah(qp0->ibqp.pd, &attr); rcu_read_unlock(); return ah; } @@ -1504,10 +1569,10 @@ static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str, { struct rvt_dev_info *rdi = ib_to_rvt(ibdev); struct hfi1_ibdev *dev = dev_from_rdi(rdi); - u16 ver = dd_from_dev(dev)->dc8051_ver; + u32 ver = dd_from_dev(dev)->dc8051_ver; - snprintf(str, str_len, "%u.%u", dc8051_ver_maj(ver), - dc8051_ver_min(ver)); + snprintf(str, str_len, "%u.%u.%u", dc8051_ver_maj(ver), + dc8051_ver_min(ver), dc8051_ver_patch(ver)); } static const char * const driver_cntr_names[] = { @@ -1524,6 +1589,7 @@ static const char * const driver_cntr_names[] = { "DRIVER_EgrHdrFull" }; +static DEFINE_MUTEX(cntr_names_lock); /* protects the *_cntr_names bufers */ static const char **dev_cntr_names; static const char **port_cntr_names; static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names); @@ -1578,6 +1644,7 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev, { int i, err; + mutex_lock(&cntr_names_lock); if (!cntr_names_initialized) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); @@ -1586,8 +1653,10 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev, num_driver_cntrs, &num_dev_cntrs, &dev_cntr_names); - if (err) + if (err) { + mutex_unlock(&cntr_names_lock); return NULL; + } for (i = 0; i < num_driver_cntrs; i++) dev_cntr_names[num_dev_cntrs + i] = @@ -1601,10 +1670,12 @@ static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev, if (err) { kfree(dev_cntr_names); dev_cntr_names = NULL; + mutex_unlock(&cntr_names_lock); return NULL; } cntr_names_initialized = 1; } + mutex_unlock(&cntr_names_lock); if (!port_num) return rdma_alloc_hw_stats_struct( @@ -1707,6 +1778,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->modify_device = modify_device; ibdev->alloc_hw_stats = alloc_hw_stats; ibdev->get_hw_stats = get_hw_stats; + ibdev->alloc_rdma_netdev = hfi1_vnic_alloc_rn; + ibdev->free_rdma_netdev = hfi1_vnic_free_rn; /* keep process mad in the driver */ ibdev->process_mad = hfi1_process_mad; @@ -1751,7 +1824,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free; dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps; dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; - dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send; + dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send_from_rvt; dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send; dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send; dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr; @@ -1823,9 +1896,13 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd) del_timer_sync(&dev->mem_timer); verbs_txreq_exit(dev); + mutex_lock(&cntr_names_lock); kfree(dev_cntr_names); kfree(port_cntr_names); + dev_cntr_names = NULL; + port_cntr_names = NULL; cntr_names_initialized = 0; + mutex_unlock(&cntr_names_lock); } void hfi1_cnp_rcv(struct hfi1_packet *packet) @@ -1840,12 +1917,12 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet) switch (packet->qp->ibqp.qp_type) { case IB_QPT_UC: - rlid = qp->remote_ah_attr.dlid; + rlid = rdma_ah_get_dlid(&qp->remote_ah_attr); rqpn = qp->remote_qpn; svc_type = IB_CC_SVCTYPE_UC; break; case IB_QPT_RC: - rlid = qp->remote_ah_attr.dlid; + rlid = rdma_ah_get_dlid(&qp->remote_ah_attr); rqpn = qp->remote_qpn; svc_type = IB_CC_SVCTYPE_RC; break; @@ -1859,7 +1936,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet) return; } - sc5 = hdr2sc(hdr, packet->rhf); + sc5 = hfi1_9B_get_sc5(hdr, packet->rhf); sl = ibp->sc_to_sl[sc5]; lqpn = qp->ibqp.qp_num; diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 3a0b589e41c2..cd635d0c1d3b 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -125,7 +125,6 @@ struct hfi1_qp_priv { struct sdma_engine *s_sde; /* current sde */ struct send_context *s_sendcontext; /* current sendcontext */ u8 s_sc; /* SC[0..4] for next packet */ - u8 r_adefered; /* number of acks defered */ struct iowait s_iowait; struct rvt_qp *owner; }; @@ -140,6 +139,10 @@ struct hfi1_pkt_state { struct hfi1_pportdata *ppd; struct verbs_txreq *s_txreq; unsigned long flags; + unsigned long timeout; + unsigned long timeout_int; + int cpu; + bool in_thread; }; #define HFI1_PSN_CREDIT 16 @@ -195,6 +198,11 @@ struct hfi1_ibdev { struct dentry *hfi1_ibdev_dbg; /* per HFI symlinks to above */ struct dentry *hfi1_ibdev_link; +#ifdef CONFIG_FAULT_INJECTION + struct fault_opcode *fault_opcode; + struct fault_packet *fault_packet; + bool fault_suppress_err; +#endif #endif }; @@ -303,7 +311,7 @@ void hfi1_rc_hdrerr( u32 rcv_flags, struct rvt_qp *qp); -u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); +u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr); struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid); @@ -342,7 +350,7 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0); u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, - struct ib_global_route *grh, u32 hwords, u32 nwords); + const struct ib_global_route *grh, u32 hwords, u32 nwords); void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth2, int middle, @@ -350,7 +358,9 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, void _hfi1_do_send(struct work_struct *work); -void hfi1_do_send(struct rvt_qp *qp); +void hfi1_do_send_from_rvt(struct rvt_qp *qp); + +void hfi1_do_send(struct rvt_qp *qp, bool in_thread); void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status); diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h new file mode 100644 index 000000000000..e2c455299b53 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/vnic.h @@ -0,0 +1,184 @@ +#ifndef _HFI1_VNIC_H +#define _HFI1_VNIC_H +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <rdma/opa_vnic.h> +#include "hfi.h" +#include "sdma.h" + +#define HFI1_VNIC_MAX_TXQ 16 +#define HFI1_VNIC_MAX_PAD 12 + +/* L2 header definitions */ +#define HFI1_L2_TYPE_OFFSET 0x7 +#define HFI1_L2_TYPE_SHFT 0x5 +#define HFI1_L2_TYPE_MASK 0x3 + +#define HFI1_GET_L2_TYPE(hdr) \ + ((*((u8 *)(hdr) + HFI1_L2_TYPE_OFFSET) >> HFI1_L2_TYPE_SHFT) & \ + HFI1_L2_TYPE_MASK) + +/* L4 type definitions */ +#define HFI1_L4_TYPE_OFFSET 8 + +#define HFI1_GET_L4_TYPE(data) \ + (*((u8 *)(data) + HFI1_L4_TYPE_OFFSET)) + +/* L4 header definitions */ +#define HFI1_VNIC_L4_HDR_OFFSET OPA_VNIC_L2_HDR_LEN + +#define HFI1_VNIC_GET_L4_HDR(data) \ + (*((u16 *)((u8 *)(data) + HFI1_VNIC_L4_HDR_OFFSET))) + +#define HFI1_VNIC_GET_VESWID(data) \ + (HFI1_VNIC_GET_L4_HDR(data) & 0xFFF) + +/* Service class */ +#define HFI1_VNIC_SC_OFFSET_LOW 6 +#define HFI1_VNIC_SC_OFFSET_HI 7 +#define HFI1_VNIC_SC_SHIFT 4 + +#define HFI1_VNIC_MAX_QUEUE 16 + +/** + * struct hfi1_vnic_sdma - VNIC per Tx ring SDMA information + * @dd - device data pointer + * @sde - sdma engine + * @vinfo - vnic info pointer + * @wait - iowait structure + * @stx - sdma tx request + * @state - vnic Tx ring SDMA state + * @q_idx - vnic Tx queue index + */ +struct hfi1_vnic_sdma { + struct hfi1_devdata *dd; + struct sdma_engine *sde; + struct hfi1_vnic_vport_info *vinfo; + struct iowait wait; + struct sdma_txreq stx; + unsigned int state; + u8 q_idx; +}; + +/** + * struct hfi1_vnic_rx_queue - HFI1 VNIC receive queue + * @idx: queue index + * @vinfo: pointer to vport information + * @netdev: network device + * @napi: netdev napi structure + * @skbq: queue of received socket buffers + */ +struct hfi1_vnic_rx_queue { + u8 idx; + struct hfi1_vnic_vport_info *vinfo; + struct net_device *netdev; + struct napi_struct napi; + struct sk_buff_head skbq; +}; + +/** + * struct hfi1_vnic_vport_info - HFI1 VNIC virtual port information + * @dd: device data pointer + * @netdev: net device pointer + * @flags: state flags + * @lock: vport lock + * @num_tx_q: number of transmit queues + * @num_rx_q: number of receive queues + * @vesw_id: virtual switch id + * @rxq: Array of receive queues + * @stats: per queue stats + * @sdma: VNIC SDMA structure per TXQ + */ +struct hfi1_vnic_vport_info { + struct hfi1_devdata *dd; + struct net_device *netdev; + unsigned long flags; + + /* Lock used around state updates */ + struct mutex lock; + + u8 num_tx_q; + u8 num_rx_q; + u16 vesw_id; + struct hfi1_vnic_rx_queue rxq[HFI1_NUM_VNIC_CTXT]; + + struct opa_vnic_stats stats[HFI1_VNIC_MAX_QUEUE]; + struct hfi1_vnic_sdma sdma[HFI1_VNIC_MAX_TXQ]; +}; + +#define v_dbg(format, arg...) \ + netdev_dbg(vinfo->netdev, format, ## arg) +#define v_err(format, arg...) \ + netdev_err(vinfo->netdev, format, ## arg) +#define v_info(format, arg...) \ + netdev_info(vinfo->netdev, format, ## arg) + +/* vnic hfi1 internal functions */ +void hfi1_vnic_setup(struct hfi1_devdata *dd); +void hfi1_vnic_cleanup(struct hfi1_devdata *dd); +int hfi1_vnic_txreq_init(struct hfi1_devdata *dd); +void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd); + +void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet); +void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo); +bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo, + u8 q_idx); + +/* vnic rdma netdev operations */ +struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, + u8 port_num, + enum rdma_netdev_t type, + const char *name, + unsigned char name_assign_type, + void (*setup)(struct net_device *)); +void hfi1_vnic_free_rn(struct net_device *netdev); +int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx, + struct hfi1_vnic_vport_info *vinfo, + struct sk_buff *skb, u64 pbc, u8 plen); + +#endif /* _HFI1_VNIC_H */ diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c new file mode 100644 index 000000000000..b601c2929f8f --- /dev/null +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -0,0 +1,903 @@ +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains HFI1 support for VNIC functionality + */ + +#include <linux/io.h> +#include <linux/if_vlan.h> + +#include "vnic.h" + +#define HFI_TX_TIMEOUT_MS 1000 + +#define HFI1_VNIC_RCV_Q_SIZE 1024 + +#define HFI1_VNIC_UP 0 + +static DEFINE_SPINLOCK(vport_cntr_lock); + +static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt) +{ + unsigned int rcvctrl_ops = 0; + int ret; + + hfi1_init_ctxt(uctxt->sc); + + uctxt->do_interrupt = &handle_receive_interrupt; + + /* Now allocate the RcvHdr queue and eager buffers. */ + ret = hfi1_create_rcvhdrq(dd, uctxt); + if (ret) + goto done; + + ret = hfi1_setup_eagerbufs(uctxt); + if (ret) + goto done; + + if (uctxt->rcvhdrtail_kvaddr) + clear_rcvhdrtail(uctxt); + + rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; + rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB; + + if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR)) + rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; + if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL)) + rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; + if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) + rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; + if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL)) + rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; + + hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt); + + uctxt->is_vnic = true; +done: + return ret; +} + +static int allocate_vnic_ctxt(struct hfi1_devdata *dd, + struct hfi1_ctxtdata **vnic_ctxt) +{ + struct hfi1_ctxtdata *uctxt; + unsigned int ctxt; + int ret; + + if (dd->flags & HFI1_FROZEN) + return -EIO; + + for (ctxt = dd->first_dyn_alloc_ctxt; + ctxt < dd->num_rcv_contexts; ctxt++) + if (!dd->rcd[ctxt]) + break; + + if (ctxt == dd->num_rcv_contexts) + return -EBUSY; + + uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, dd->node); + if (!uctxt) { + dd_dev_err(dd, "Unable to create ctxtdata, failing open\n"); + return -ENOMEM; + } + + uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) | + HFI1_CAP_KGET(NODROP_RHQ_FULL) | + HFI1_CAP_KGET(NODROP_EGR_FULL) | + HFI1_CAP_KGET(DMA_RTAIL); + uctxt->seq_cnt = 1; + + /* Allocate and enable a PIO send context */ + uctxt->sc = sc_alloc(dd, SC_VNIC, uctxt->rcvhdrqentsize, + uctxt->numa_id); + + ret = uctxt->sc ? 0 : -ENOMEM; + if (ret) + goto bail; + + dd_dev_dbg(dd, "allocated vnic send context %u(%u)\n", + uctxt->sc->sw_index, uctxt->sc->hw_context); + ret = sc_enable(uctxt->sc); + if (ret) + goto bail; + + if (dd->num_msix_entries) + hfi1_set_vnic_msix_info(uctxt); + + hfi1_stats.sps_ctxts++; + dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt); + *vnic_ctxt = uctxt; + + return ret; +bail: + /* + * hfi1_free_ctxtdata() also releases send_context + * structure if uctxt->sc is not null + */ + dd->rcd[uctxt->ctxt] = NULL; + hfi1_free_ctxtdata(dd, uctxt); + dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret); + return ret; +} + +static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, + struct hfi1_ctxtdata *uctxt) +{ + unsigned long flags; + + dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt); + flush_wc(); + + if (dd->num_msix_entries) + hfi1_reset_vnic_msix_info(uctxt); + + spin_lock_irqsave(&dd->uctxt_lock, flags); + /* + * Disable receive context and interrupt available, reset all + * RcvCtxtCtrl bits to default values. + */ + hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | + HFI1_RCVCTRL_TIDFLOW_DIS | + HFI1_RCVCTRL_INTRAVAIL_DIS | + HFI1_RCVCTRL_ONE_PKT_EGR_DIS | + HFI1_RCVCTRL_NO_RHQ_DROP_DIS | + HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt); + /* + * VNIC contexts are allocated from user context pool. + * Release them back to user context pool. + * + * Reset context integrity checks to default. + * (writes to CSRs probably belong in chip.c) + */ + write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE, + hfi1_pkt_default_send_ctxt_mask(dd, SC_USER)); + sc_disable(uctxt->sc); + + dd->send_contexts[uctxt->sc->sw_index].type = SC_USER; + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + + dd->rcd[uctxt->ctxt] = NULL; + uctxt->event_flags = 0; + + hfi1_clear_tids(uctxt); + hfi1_clear_ctxt_pkey(dd, uctxt); + + hfi1_stats.sps_ctxts--; + hfi1_free_ctxtdata(dd, uctxt); +} + +void hfi1_vnic_setup(struct hfi1_devdata *dd) +{ + idr_init(&dd->vnic.vesw_idr); +} + +void hfi1_vnic_cleanup(struct hfi1_devdata *dd) +{ + idr_destroy(&dd->vnic.vesw_idr); +} + +#define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \ + u64 *src64, *dst64; \ + for (src64 = &qstats->x_grp.unicast, \ + dst64 = &stats->x_grp.unicast; \ + dst64 <= &stats->x_grp.s_1519_max;) { \ + *dst64++ += *src64++; \ + } \ + } while (0) + +/* hfi1_vnic_update_stats - update statistics */ +static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo, + struct opa_vnic_stats *stats) +{ + struct net_device *netdev = vinfo->netdev; + u8 i; + + /* add tx counters on different queues */ + for (i = 0; i < vinfo->num_tx_q; i++) { + struct opa_vnic_stats *qstats = &vinfo->stats[i]; + struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats; + + stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors; + stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors; + stats->tx_drop_state += qstats->tx_drop_state; + stats->tx_dlid_zero += qstats->tx_dlid_zero; + + SUM_GRP_COUNTERS(stats, qstats, tx_grp); + stats->netstats.tx_packets += qnstats->tx_packets; + stats->netstats.tx_bytes += qnstats->tx_bytes; + } + + /* add rx counters on different queues */ + for (i = 0; i < vinfo->num_rx_q; i++) { + struct opa_vnic_stats *qstats = &vinfo->stats[i]; + struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats; + + stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors; + stats->netstats.rx_nohandler += qnstats->rx_nohandler; + stats->rx_drop_state += qstats->rx_drop_state; + stats->rx_oversize += qstats->rx_oversize; + stats->rx_runt += qstats->rx_runt; + + SUM_GRP_COUNTERS(stats, qstats, rx_grp); + stats->netstats.rx_packets += qnstats->rx_packets; + stats->netstats.rx_bytes += qnstats->rx_bytes; + } + + stats->netstats.tx_errors = stats->netstats.tx_fifo_errors + + stats->netstats.tx_carrier_errors + + stats->tx_drop_state + stats->tx_dlid_zero; + stats->netstats.tx_dropped = stats->netstats.tx_errors; + + stats->netstats.rx_errors = stats->netstats.rx_fifo_errors + + stats->netstats.rx_nohandler + + stats->rx_drop_state + stats->rx_oversize + + stats->rx_runt; + stats->netstats.rx_dropped = stats->netstats.rx_errors; + + netdev->stats.tx_packets = stats->netstats.tx_packets; + netdev->stats.tx_bytes = stats->netstats.tx_bytes; + netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors; + netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors; + netdev->stats.tx_errors = stats->netstats.tx_errors; + netdev->stats.tx_dropped = stats->netstats.tx_dropped; + + netdev->stats.rx_packets = stats->netstats.rx_packets; + netdev->stats.rx_bytes = stats->netstats.rx_bytes; + netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors; + netdev->stats.multicast = stats->rx_grp.mcastbcast; + netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt; + netdev->stats.rx_errors = stats->netstats.rx_errors; + netdev->stats.rx_dropped = stats->netstats.rx_dropped; +} + +/* update_len_counters - update pkt's len histogram counters */ +static inline void update_len_counters(struct opa_vnic_grp_stats *grp, + int len) +{ + /* account for 4 byte FCS */ + if (len >= 1515) + grp->s_1519_max++; + else if (len >= 1020) + grp->s_1024_1518++; + else if (len >= 508) + grp->s_512_1023++; + else if (len >= 252) + grp->s_256_511++; + else if (len >= 124) + grp->s_128_255++; + else if (len >= 61) + grp->s_65_127++; + else + grp->s_64++; +} + +/* hfi1_vnic_update_tx_counters - update transmit counters */ +static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo, + u8 q_idx, struct sk_buff *skb, int err) +{ + struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb); + struct opa_vnic_stats *stats = &vinfo->stats[q_idx]; + struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp; + u16 vlan_tci; + + stats->netstats.tx_packets++; + stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN; + + update_len_counters(tx_grp, skb->len); + + /* rest of the counts are for good packets only */ + if (unlikely(err)) + return; + + if (is_multicast_ether_addr(mac_hdr->h_dest)) + tx_grp->mcastbcast++; + else + tx_grp->unicast++; + + if (!__vlan_get_tag(skb, &vlan_tci)) + tx_grp->vlan++; + else + tx_grp->untagged++; +} + +/* hfi1_vnic_update_rx_counters - update receive counters */ +static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo, + u8 q_idx, struct sk_buff *skb, int err) +{ + struct ethhdr *mac_hdr = (struct ethhdr *)skb->data; + struct opa_vnic_stats *stats = &vinfo->stats[q_idx]; + struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp; + u16 vlan_tci; + + stats->netstats.rx_packets++; + stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN; + + update_len_counters(rx_grp, skb->len); + + /* rest of the counts are for good packets only */ + if (unlikely(err)) + return; + + if (is_multicast_ether_addr(mac_hdr->h_dest)) + rx_grp->mcastbcast++; + else + rx_grp->unicast++; + + if (!__vlan_get_tag(skb, &vlan_tci)) + rx_grp->vlan++; + else + rx_grp->untagged++; +} + +/* This function is overloaded for opa_vnic specific implementation */ +static void hfi1_vnic_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats; + struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); + + hfi1_vnic_update_stats(vinfo, vstats); +} + +static u64 create_bypass_pbc(u32 vl, u32 dw_len) +{ + u64 pbc; + + pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT) + | PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN + | PBC_PACKET_BYPASS + | ((vl & PBC_VL_MASK) << PBC_VL_SHIFT) + | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT; + + return pbc; +} + +/* hfi1_vnic_maybe_stop_tx - stop tx queue if required */ +static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo, + u8 q_idx) +{ + netif_stop_subqueue(vinfo->netdev, q_idx); + if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx)) + return; + + netif_start_subqueue(vinfo->netdev, q_idx); +} + +static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb, + struct net_device *netdev) +{ + struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); + u8 pad_len, q_idx = skb->queue_mapping; + struct hfi1_devdata *dd = vinfo->dd; + struct opa_vnic_skb_mdata *mdata; + u32 pkt_len, total_len; + int err = -EINVAL; + u64 pbc; + + v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len); + if (unlikely(!netif_oper_up(netdev))) { + vinfo->stats[q_idx].tx_drop_state++; + goto tx_finish; + } + + /* take out meta data */ + mdata = (struct opa_vnic_skb_mdata *)skb->data; + skb_pull(skb, sizeof(*mdata)); + if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) { + vinfo->stats[q_idx].tx_dlid_zero++; + goto tx_finish; + } + + /* add tail padding (for 8 bytes size alignment) and icrc */ + pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7; + pad_len += OPA_VNIC_ICRC_TAIL_LEN; + + /* + * pkt_len is how much data we have to write, includes header and data. + * total_len is length of the packet in Dwords plus the PBC should not + * include the CRC. + */ + pkt_len = (skb->len + pad_len) >> 2; + total_len = pkt_len + 2; /* PBC + packet */ + + pbc = create_bypass_pbc(mdata->vl, total_len); + + skb_get(skb); + v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len); + err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len); + if (unlikely(err)) { + if (err == -ENOMEM) + vinfo->stats[q_idx].netstats.tx_fifo_errors++; + else if (err != -EBUSY) + vinfo->stats[q_idx].netstats.tx_carrier_errors++; + } + /* remove the header before updating tx counters */ + skb_pull(skb, OPA_VNIC_HDR_LEN); + + if (unlikely(err == -EBUSY)) { + hfi1_vnic_maybe_stop_tx(vinfo, q_idx); + dev_kfree_skb_any(skb); + return NETDEV_TX_BUSY; + } + +tx_finish: + /* update tx counters */ + hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err); + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} + +static u16 hfi1_vnic_select_queue(struct net_device *netdev, + struct sk_buff *skb, + void *accel_priv, + select_queue_fallback_t fallback) +{ + struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); + struct opa_vnic_skb_mdata *mdata; + struct sdma_engine *sde; + + mdata = (struct opa_vnic_skb_mdata *)skb->data; + sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl); + return sde->this_idx; +} + +/* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */ +static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq, + struct sk_buff *skb) +{ + struct hfi1_vnic_vport_info *vinfo = rxq->vinfo; + int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN; + int rc = -EFAULT; + + skb_pull(skb, OPA_VNIC_HDR_LEN); + + /* Validate Packet length */ + if (unlikely(skb->len > max_len)) + vinfo->stats[rxq->idx].rx_oversize++; + else if (unlikely(skb->len < ETH_ZLEN)) + vinfo->stats[rxq->idx].rx_runt++; + else + rc = 0; + return rc; +} + +static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq) +{ + unsigned char *pad_info; + struct sk_buff *skb; + + skb = skb_dequeue(&rxq->skbq); + if (unlikely(!skb)) + return NULL; + + /* remove tail padding and icrc */ + pad_info = skb->data + skb->len - 1; + skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN - + ((*pad_info) & 0x7))); + + return skb; +} + +/* hfi1_vnic_handle_rx - handle skb receive */ +static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq, + int *work_done, int work_to_do) +{ + struct hfi1_vnic_vport_info *vinfo = rxq->vinfo; + struct sk_buff *skb; + int rc; + + while (1) { + if (*work_done >= work_to_do) + break; + + skb = hfi1_vnic_get_skb(rxq); + if (unlikely(!skb)) + break; + + rc = hfi1_vnic_decap_skb(rxq, skb); + /* update rx counters */ + hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc); + if (unlikely(rc)) { + dev_kfree_skb_any(skb); + continue; + } + + skb_checksum_none_assert(skb); + skb->protocol = eth_type_trans(skb, rxq->netdev); + + napi_gro_receive(&rxq->napi, skb); + (*work_done)++; + } +} + +/* hfi1_vnic_napi - napi receive polling callback function */ +static int hfi1_vnic_napi(struct napi_struct *napi, int budget) +{ + struct hfi1_vnic_rx_queue *rxq = container_of(napi, + struct hfi1_vnic_rx_queue, napi); + struct hfi1_vnic_vport_info *vinfo = rxq->vinfo; + int work_done = 0; + + v_dbg("napi %d budget %d\n", rxq->idx, budget); + hfi1_vnic_handle_rx(rxq, &work_done, budget); + + v_dbg("napi %d work_done %d\n", rxq->idx, work_done); + if (work_done < budget) + napi_complete(napi); + + return work_done; +} + +void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) +{ + struct hfi1_devdata *dd = packet->rcd->dd; + struct hfi1_vnic_vport_info *vinfo = NULL; + struct hfi1_vnic_rx_queue *rxq; + struct sk_buff *skb; + int l4_type, vesw_id = -1; + u8 q_idx; + + l4_type = HFI1_GET_L4_TYPE(packet->ebuf); + if (likely(l4_type == OPA_VNIC_L4_ETHR)) { + vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf); + vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id); + + /* + * In case of invalid vesw id, count the error on + * the first available vport. + */ + if (unlikely(!vinfo)) { + struct hfi1_vnic_vport_info *vinfo_tmp; + int id_tmp = 0; + + vinfo_tmp = idr_get_next(&dd->vnic.vesw_idr, &id_tmp); + if (vinfo_tmp) { + spin_lock(&vport_cntr_lock); + vinfo_tmp->stats[0].netstats.rx_nohandler++; + spin_unlock(&vport_cntr_lock); + } + } + } + + if (unlikely(!vinfo)) { + dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n", + l4_type, vesw_id, packet->rcd->ctxt); + return; + } + + q_idx = packet->rcd->vnic_q_idx; + rxq = &vinfo->rxq[q_idx]; + if (unlikely(!netif_oper_up(vinfo->netdev))) { + vinfo->stats[q_idx].rx_drop_state++; + skb_queue_purge(&rxq->skbq); + return; + } + + if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) { + vinfo->stats[q_idx].netstats.rx_fifo_errors++; + return; + } + + skb = netdev_alloc_skb(vinfo->netdev, packet->tlen); + if (unlikely(!skb)) { + vinfo->stats[q_idx].netstats.rx_fifo_errors++; + return; + } + + memcpy(skb->data, packet->ebuf, packet->tlen); + skb_put(skb, packet->tlen); + skb_queue_tail(&rxq->skbq, skb); + + if (napi_schedule_prep(&rxq->napi)) { + v_dbg("napi %d scheduling\n", q_idx); + __napi_schedule(&rxq->napi); + } +} + +static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo) +{ + struct hfi1_devdata *dd = vinfo->dd; + struct net_device *netdev = vinfo->netdev; + int i, rc; + + /* ensure virtual eth switch id is valid */ + if (!vinfo->vesw_id) + return -EINVAL; + + rc = idr_alloc(&dd->vnic.vesw_idr, vinfo, vinfo->vesw_id, + vinfo->vesw_id + 1, GFP_NOWAIT); + if (rc < 0) + return rc; + + for (i = 0; i < vinfo->num_rx_q; i++) { + struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; + + skb_queue_head_init(&rxq->skbq); + napi_enable(&rxq->napi); + } + + netif_carrier_on(netdev); + netif_tx_start_all_queues(netdev); + set_bit(HFI1_VNIC_UP, &vinfo->flags); + + return 0; +} + +static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo) +{ + struct hfi1_devdata *dd = vinfo->dd; + u8 i; + + clear_bit(HFI1_VNIC_UP, &vinfo->flags); + netif_carrier_off(vinfo->netdev); + netif_tx_disable(vinfo->netdev); + idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id); + + /* ensure irqs see the change */ + hfi1_vnic_synchronize_irq(dd); + + /* remove unread skbs */ + for (i = 0; i < vinfo->num_rx_q; i++) { + struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; + + napi_disable(&rxq->napi); + skb_queue_purge(&rxq->skbq); + } +} + +static int hfi1_netdev_open(struct net_device *netdev) +{ + struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); + int rc; + + mutex_lock(&vinfo->lock); + rc = hfi1_vnic_up(vinfo); + mutex_unlock(&vinfo->lock); + return rc; +} + +static int hfi1_netdev_close(struct net_device *netdev) +{ + struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); + + mutex_lock(&vinfo->lock); + if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) + hfi1_vnic_down(vinfo); + mutex_unlock(&vinfo->lock); + return 0; +} + +static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd, + struct hfi1_ctxtdata **vnic_ctxt) +{ + int rc; + + rc = allocate_vnic_ctxt(dd, vnic_ctxt); + if (rc) { + dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc); + return rc; + } + + rc = setup_vnic_ctxt(dd, *vnic_ctxt); + if (rc) { + dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc); + deallocate_vnic_ctxt(dd, *vnic_ctxt); + *vnic_ctxt = NULL; + } + + return rc; +} + +static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo) +{ + struct hfi1_devdata *dd = vinfo->dd; + int i, rc = 0; + + mutex_lock(&hfi1_mutex); + if (!dd->vnic.num_vports) { + rc = hfi1_vnic_txreq_init(dd); + if (rc) + goto txreq_fail; + + dd->vnic.msix_idx = dd->first_dyn_msix_idx; + } + + for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) { + rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]); + if (rc) + break; + dd->vnic.ctxt[i]->vnic_q_idx = i; + } + + if (i < vinfo->num_rx_q) { + /* + * If required amount of contexts is not + * allocated successfully then remaining contexts + * are released. + */ + while (i-- > dd->vnic.num_ctxt) { + deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); + dd->vnic.ctxt[i] = NULL; + } + goto alloc_fail; + } + + if (dd->vnic.num_ctxt != i) { + dd->vnic.num_ctxt = i; + hfi1_init_vnic_rsm(dd); + } + + dd->vnic.num_vports++; + hfi1_vnic_sdma_init(vinfo); +alloc_fail: + if (!dd->vnic.num_vports) + hfi1_vnic_txreq_deinit(dd); +txreq_fail: + mutex_unlock(&hfi1_mutex); + return rc; +} + +static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo) +{ + struct hfi1_devdata *dd = vinfo->dd; + int i; + + mutex_lock(&hfi1_mutex); + if (--dd->vnic.num_vports == 0) { + for (i = 0; i < dd->vnic.num_ctxt; i++) { + deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); + dd->vnic.ctxt[i] = NULL; + } + hfi1_deinit_vnic_rsm(dd); + dd->vnic.num_ctxt = 0; + hfi1_vnic_txreq_deinit(dd); + } + mutex_unlock(&hfi1_mutex); +} + +static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id) +{ + struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); + bool reopen = false; + + /* + * If vesw_id is being changed, and if the vnic port is up, + * reset the vnic port to ensure new vesw_id gets picked up + */ + if (id != vinfo->vesw_id) { + mutex_lock(&vinfo->lock); + if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) { + hfi1_vnic_down(vinfo); + reopen = true; + } + + vinfo->vesw_id = id; + if (reopen) + hfi1_vnic_up(vinfo); + + mutex_unlock(&vinfo->lock); + } +} + +/* netdev ops */ +static const struct net_device_ops hfi1_netdev_ops = { + .ndo_open = hfi1_netdev_open, + .ndo_stop = hfi1_netdev_close, + .ndo_start_xmit = hfi1_netdev_start_xmit, + .ndo_select_queue = hfi1_vnic_select_queue, + .ndo_get_stats64 = hfi1_vnic_get_stats64, +}; + +struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, + u8 port_num, + enum rdma_netdev_t type, + const char *name, + unsigned char name_assign_type, + void (*setup)(struct net_device *)) +{ + struct hfi1_devdata *dd = dd_from_ibdev(device); + struct hfi1_vnic_vport_info *vinfo; + struct net_device *netdev; + struct rdma_netdev *rn; + int i, size, rc; + + if (!port_num || (port_num > dd->num_pports)) + return ERR_PTR(-EINVAL); + + if (type != RDMA_NETDEV_OPA_VNIC) + return ERR_PTR(-EOPNOTSUPP); + + size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo); + netdev = alloc_netdev_mqs(size, name, name_assign_type, setup, + dd->chip_sdma_engines, HFI1_NUM_VNIC_CTXT); + if (!netdev) + return ERR_PTR(-ENOMEM); + + rn = netdev_priv(netdev); + vinfo = opa_vnic_dev_priv(netdev); + vinfo->dd = dd; + vinfo->num_tx_q = dd->chip_sdma_engines; + vinfo->num_rx_q = HFI1_NUM_VNIC_CTXT; + vinfo->netdev = netdev; + rn->set_id = hfi1_vnic_set_vesw_id; + + netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG; + netdev->hw_features = netdev->features; + netdev->vlan_features = netdev->features; + netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS); + netdev->netdev_ops = &hfi1_netdev_ops; + mutex_init(&vinfo->lock); + + for (i = 0; i < vinfo->num_rx_q; i++) { + struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; + + rxq->idx = i; + rxq->vinfo = vinfo; + rxq->netdev = netdev; + netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64); + } + + rc = hfi1_vnic_init(vinfo); + if (rc) + goto init_fail; + + return netdev; +init_fail: + mutex_destroy(&vinfo->lock); + free_netdev(netdev); + return ERR_PTR(rc); +} + +void hfi1_vnic_free_rn(struct net_device *netdev) +{ + struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); + + hfi1_vnic_deinit(vinfo); + mutex_destroy(&vinfo->lock); + free_netdev(netdev); +} diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c new file mode 100644 index 000000000000..51a817d3aa14 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c @@ -0,0 +1,323 @@ +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains HFI1 support for VNIC SDMA functionality + */ + +#include "sdma.h" +#include "vnic.h" + +#define HFI1_VNIC_SDMA_Q_ACTIVE BIT(0) +#define HFI1_VNIC_SDMA_Q_DEFERRED BIT(1) + +#define HFI1_VNIC_TXREQ_NAME_LEN 32 +#define HFI1_VNIC_SDMA_DESC_WTRMRK 64 +#define HFI1_VNIC_SDMA_RETRY_COUNT 1 + +/* + * struct vnic_txreq - VNIC transmit descriptor + * @txreq: sdma transmit request + * @sdma: vnic sdma pointer + * @skb: skb to send + * @pad: pad buffer + * @plen: pad length + * @pbc_val: pbc value + * @retry_count: tx retry count + */ +struct vnic_txreq { + struct sdma_txreq txreq; + struct hfi1_vnic_sdma *sdma; + + struct sk_buff *skb; + unsigned char pad[HFI1_VNIC_MAX_PAD]; + u16 plen; + __le64 pbc_val; + + u32 retry_count; +}; + +static void vnic_sdma_complete(struct sdma_txreq *txreq, + int status) +{ + struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq); + struct hfi1_vnic_sdma *vnic_sdma = tx->sdma; + + sdma_txclean(vnic_sdma->dd, txreq); + dev_kfree_skb_any(tx->skb); + kmem_cache_free(vnic_sdma->dd->vnic.txreq_cache, tx); +} + +static noinline int build_vnic_ulp_payload(struct sdma_engine *sde, + struct vnic_txreq *tx) +{ + int i, ret = 0; + + ret = sdma_txadd_kvaddr( + sde->dd, + &tx->txreq, + tx->skb->data, + skb_headlen(tx->skb)); + if (unlikely(ret)) + goto bail_txadd; + + for (i = 0; i < skb_shinfo(tx->skb)->nr_frags; i++) { + struct skb_frag_struct *frag = &skb_shinfo(tx->skb)->frags[i]; + + /* combine physically continuous fragments later? */ + ret = sdma_txadd_page(sde->dd, + &tx->txreq, + skb_frag_page(frag), + frag->page_offset, + skb_frag_size(frag)); + if (unlikely(ret)) + goto bail_txadd; + } + + if (tx->plen) + ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq, + tx->pad + HFI1_VNIC_MAX_PAD - tx->plen, + tx->plen); + +bail_txadd: + return ret; +} + +static int build_vnic_tx_desc(struct sdma_engine *sde, + struct vnic_txreq *tx, + u64 pbc) +{ + int ret = 0; + u16 hdrbytes = 2 << 2; /* PBC */ + + ret = sdma_txinit_ahg( + &tx->txreq, + 0, + hdrbytes + tx->skb->len + tx->plen, + 0, + 0, + NULL, + 0, + vnic_sdma_complete); + if (unlikely(ret)) + goto bail_txadd; + + /* add pbc */ + tx->pbc_val = cpu_to_le64(pbc); + ret = sdma_txadd_kvaddr( + sde->dd, + &tx->txreq, + &tx->pbc_val, + hdrbytes); + if (unlikely(ret)) + goto bail_txadd; + + /* add the ulp payload */ + ret = build_vnic_ulp_payload(sde, tx); +bail_txadd: + return ret; +} + +/* setup the last plen bypes of pad */ +static inline void hfi1_vnic_update_pad(unsigned char *pad, u8 plen) +{ + pad[HFI1_VNIC_MAX_PAD - 1] = plen - OPA_VNIC_ICRC_TAIL_LEN; +} + +int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx, + struct hfi1_vnic_vport_info *vinfo, + struct sk_buff *skb, u64 pbc, u8 plen) +{ + struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[q_idx]; + struct sdma_engine *sde = vnic_sdma->sde; + struct vnic_txreq *tx; + int ret = -ECOMM; + + if (unlikely(READ_ONCE(vnic_sdma->state) != HFI1_VNIC_SDMA_Q_ACTIVE)) + goto tx_err; + + if (unlikely(!sde || !sdma_running(sde))) + goto tx_err; + + tx = kmem_cache_alloc(dd->vnic.txreq_cache, GFP_ATOMIC); + if (unlikely(!tx)) { + ret = -ENOMEM; + goto tx_err; + } + + tx->sdma = vnic_sdma; + tx->skb = skb; + hfi1_vnic_update_pad(tx->pad, plen); + tx->plen = plen; + ret = build_vnic_tx_desc(sde, tx, pbc); + if (unlikely(ret)) + goto free_desc; + tx->retry_count = 0; + + ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq); + /* When -ECOMM, sdma callback will be called with ABORT status */ + if (unlikely(ret && unlikely(ret != -ECOMM))) + goto free_desc; + + return ret; + +free_desc: + sdma_txclean(dd, &tx->txreq); + kmem_cache_free(dd->vnic.txreq_cache, tx); +tx_err: + if (ret != -EBUSY) + dev_kfree_skb_any(skb); + return ret; +} + +/* + * hfi1_vnic_sdma_sleep - vnic sdma sleep function + * + * This function gets called from sdma_send_txreq() when there are not enough + * sdma descriptors available to send the packet. It adds Tx queue's wait + * structure to sdma engine's dmawait list to be woken up when descriptors + * become available. + */ +static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde, + struct iowait *wait, + struct sdma_txreq *txreq, + unsigned int seq) +{ + struct hfi1_vnic_sdma *vnic_sdma = + container_of(wait, struct hfi1_vnic_sdma, wait); + struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev; + struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq); + + if (sdma_progress(sde, seq, txreq)) + if (tx->retry_count++ < HFI1_VNIC_SDMA_RETRY_COUNT) + return -EAGAIN; + + vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED; + write_seqlock(&dev->iowait_lock); + if (list_empty(&vnic_sdma->wait.list)) + list_add_tail(&vnic_sdma->wait.list, &sde->dmawait); + write_sequnlock(&dev->iowait_lock); + return -EBUSY; +} + +/* + * hfi1_vnic_sdma_wakeup - vnic sdma wakeup function + * + * This function gets called when SDMA descriptors becomes available and Tx + * queue's wait structure was previously added to sdma engine's dmawait list. + * It notifies the upper driver about Tx queue wakeup. + */ +static void hfi1_vnic_sdma_wakeup(struct iowait *wait, int reason) +{ + struct hfi1_vnic_sdma *vnic_sdma = + container_of(wait, struct hfi1_vnic_sdma, wait); + struct hfi1_vnic_vport_info *vinfo = vnic_sdma->vinfo; + + vnic_sdma->state = HFI1_VNIC_SDMA_Q_ACTIVE; + if (__netif_subqueue_stopped(vinfo->netdev, vnic_sdma->q_idx)) + netif_wake_subqueue(vinfo->netdev, vnic_sdma->q_idx); +}; + +inline bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo, + u8 q_idx) +{ + struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[q_idx]; + + return (READ_ONCE(vnic_sdma->state) == HFI1_VNIC_SDMA_Q_ACTIVE); +} + +void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo) +{ + int i; + + for (i = 0; i < vinfo->num_tx_q; i++) { + struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[i]; + + iowait_init(&vnic_sdma->wait, 0, NULL, hfi1_vnic_sdma_sleep, + hfi1_vnic_sdma_wakeup, NULL); + vnic_sdma->sde = &vinfo->dd->per_sdma[i]; + vnic_sdma->dd = vinfo->dd; + vnic_sdma->vinfo = vinfo; + vnic_sdma->q_idx = i; + vnic_sdma->state = HFI1_VNIC_SDMA_Q_ACTIVE; + + /* Add a free descriptor watermark for wakeups */ + if (vnic_sdma->sde->descq_cnt > HFI1_VNIC_SDMA_DESC_WTRMRK) { + INIT_LIST_HEAD(&vnic_sdma->stx.list); + vnic_sdma->stx.num_desc = HFI1_VNIC_SDMA_DESC_WTRMRK; + list_add_tail(&vnic_sdma->stx.list, + &vnic_sdma->wait.tx_head); + } + } +} + +static void hfi1_vnic_txreq_kmem_cache_ctor(void *obj) +{ + struct vnic_txreq *tx = (struct vnic_txreq *)obj; + + memset(tx, 0, sizeof(*tx)); +} + +int hfi1_vnic_txreq_init(struct hfi1_devdata *dd) +{ + char buf[HFI1_VNIC_TXREQ_NAME_LEN]; + + snprintf(buf, sizeof(buf), "hfi1_%u_vnic_txreq_cache", dd->unit); + dd->vnic.txreq_cache = kmem_cache_create(buf, + sizeof(struct vnic_txreq), + 0, SLAB_HWCACHE_ALIGN, + hfi1_vnic_txreq_kmem_cache_ctor); + if (!dd->vnic.txreq_cache) + return -ENOMEM; + return 0; +} + +void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd) +{ + kmem_cache_destroy(dd->vnic.txreq_cache); + dd->vnic.txreq_cache = NULL; +} |