From b03b99a329a14b7302f37c3ea6da3848db41c8c5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 27 Mar 2017 21:53:38 -0700 Subject: acpi, nfit, libnvdimm: fix interleave set cookie calculation (64-bit comparison) While reviewing the -stable patch for commit 86ef58a4e35e "nfit, libnvdimm: fix interleave set cookie calculation" Ben noted: "This is returning an int, thus it's effectively doing a 32-bit comparison and not the 64-bit comparison you say is needed." Update the compare operation to be immune to this integer demotion problem. Cc: Cc: Nicholas Moulin Fixes: 86ef58a4e35e ("nfit, libnvdimm: fix interleave set cookie calculation") Reported-by: Ben Hutchings Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 662036bdc65e..c8ea9d698cd0 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1617,7 +1617,11 @@ static int cmp_map(const void *m0, const void *m1) const struct nfit_set_info_map *map0 = m0; const struct nfit_set_info_map *map1 = m1; - return map0->region_offset - map1->region_offset; + if (map0->region_offset < map1->region_offset) + return -1; + else if (map0->region_offset > map1->region_offset) + return 1; + return 0; } /* Retrieve the nth entry referencing this spa */ -- cgit v1.2.3-59-g8ed1b From 62e24c5775ecb387a3eb33701378ccfa6dbc98ee Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Fri, 5 Feb 2016 13:41:39 +0100 Subject: reset: add exported __reset_control_get, return NULL if optional Rename the internal __reset_control_get/put functions to __reset_control_get/put_internal and add an exported __reset_control_get equivalent to __of_reset_control_get that takes a struct device parameter. This avoids the confusing call to __of_reset_control_get in the non-DT case and fixes the devm_reset_control_get_optional function to return NULL if RESET_CONTROLLER is enabled but dev->of_node == NULL. Fixes: bb475230b8e5 ("reset: make optional functions really optional") Reported-by: Andy Shevchenko Tested-by: Andy Shevchenko Cc: Ramiro Oliveira Signed-off-by: Philipp Zabel --- drivers/reset/core.c | 22 ++++++++++++++++------ include/linux/reset.h | 22 ++++++++++++++-------- 2 files changed, 30 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/reset/core.c b/drivers/reset/core.c index f1e5e65388bb..cd739d2fa160 100644 --- a/drivers/reset/core.c +++ b/drivers/reset/core.c @@ -275,7 +275,7 @@ int reset_control_status(struct reset_control *rstc) } EXPORT_SYMBOL_GPL(reset_control_status); -static struct reset_control *__reset_control_get( +static struct reset_control *__reset_control_get_internal( struct reset_controller_dev *rcdev, unsigned int index, bool shared) { @@ -308,7 +308,7 @@ static struct reset_control *__reset_control_get( return rstc; } -static void __reset_control_put(struct reset_control *rstc) +static void __reset_control_put_internal(struct reset_control *rstc) { lockdep_assert_held(&reset_list_mutex); @@ -377,7 +377,7 @@ struct reset_control *__of_reset_control_get(struct device_node *node, } /* reset_list_mutex also protects the rcdev's reset_control list */ - rstc = __reset_control_get(rcdev, rstc_id, shared); + rstc = __reset_control_get_internal(rcdev, rstc_id, shared); mutex_unlock(&reset_list_mutex); @@ -385,6 +385,17 @@ struct reset_control *__of_reset_control_get(struct device_node *node, } EXPORT_SYMBOL_GPL(__of_reset_control_get); +struct reset_control *__reset_control_get(struct device *dev, const char *id, + int index, bool shared, bool optional) +{ + if (dev->of_node) + return __of_reset_control_get(dev->of_node, id, index, shared, + optional); + + return optional ? NULL : ERR_PTR(-EINVAL); +} +EXPORT_SYMBOL_GPL(__reset_control_get); + /** * reset_control_put - free the reset controller * @rstc: reset controller @@ -396,7 +407,7 @@ void reset_control_put(struct reset_control *rstc) return; mutex_lock(&reset_list_mutex); - __reset_control_put(rstc); + __reset_control_put_internal(rstc); mutex_unlock(&reset_list_mutex); } EXPORT_SYMBOL_GPL(reset_control_put); @@ -417,8 +428,7 @@ struct reset_control *__devm_reset_control_get(struct device *dev, if (!ptr) return ERR_PTR(-ENOMEM); - rstc = __of_reset_control_get(dev ? dev->of_node : NULL, - id, index, shared, optional); + rstc = __reset_control_get(dev, id, index, shared, optional); if (!IS_ERR(rstc)) { *ptr = rstc; devres_add(dev, ptr); diff --git a/include/linux/reset.h b/include/linux/reset.h index 96fb139bdd08..13d8681210d5 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -15,6 +15,9 @@ int reset_control_status(struct reset_control *rstc); struct reset_control *__of_reset_control_get(struct device_node *node, const char *id, int index, bool shared, bool optional); +struct reset_control *__reset_control_get(struct device *dev, const char *id, + int index, bool shared, + bool optional); void reset_control_put(struct reset_control *rstc); struct reset_control *__devm_reset_control_get(struct device *dev, const char *id, int index, bool shared, @@ -72,6 +75,13 @@ static inline struct reset_control *__of_reset_control_get( return optional ? NULL : ERR_PTR(-ENOTSUPP); } +static inline struct reset_control *__reset_control_get( + struct device *dev, const char *id, + int index, bool shared, bool optional) +{ + return optional ? NULL : ERR_PTR(-ENOTSUPP); +} + static inline struct reset_control *__devm_reset_control_get( struct device *dev, const char *id, int index, bool shared, bool optional) @@ -102,8 +112,7 @@ __must_check reset_control_get_exclusive(struct device *dev, const char *id) #ifndef CONFIG_RESET_CONTROLLER WARN_ON(1); #endif - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, false, - false); + return __reset_control_get(dev, id, 0, false, false); } /** @@ -131,22 +140,19 @@ __must_check reset_control_get_exclusive(struct device *dev, const char *id) static inline struct reset_control *reset_control_get_shared( struct device *dev, const char *id) { - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, true, - false); + return __reset_control_get(dev, id, 0, true, false); } static inline struct reset_control *reset_control_get_optional_exclusive( struct device *dev, const char *id) { - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, false, - true); + return __reset_control_get(dev, id, 0, false, true); } static inline struct reset_control *reset_control_get_optional_shared( struct device *dev, const char *id) { - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, true, - true); + return __reset_control_get(dev, id, 0, true, true); } /** -- cgit v1.2.3-59-g8ed1b From fe514739d8538783749d3ce72f78e5a999ea5668 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 4 Apr 2017 15:08:36 -0700 Subject: libnvdimm: fix blk free space accounting Commit a1f3e4d6a0c3 "libnvdimm, region: update nd_region_available_dpa() for multi-pmem support" reworked blk dpa (DIMM Physical Address) accounting to comprehend multiple pmem namespace allocations aliasing with a given blk-dpa range. The following call trace is a result of failing to account for allocated blk capacity. WARNING: CPU: 1 PID: 2433 at tools/testing/nvdimm/../../../drivers/nvdimm/names 4 size_store+0x6f3/0x930 [libnvdimm] nd_region region5: allocation underrun: 0x0 of 0x1000000 bytes [..] Call Trace: dump_stack+0x86/0xc3 __warn+0xcb/0xf0 warn_slowpath_fmt+0x5f/0x80 size_store+0x6f3/0x930 [libnvdimm] dev_attr_store+0x18/0x30 If a given blk-dpa allocation does not alias with any pmem ranges then the full allocation should be accounted as busy space, not the size of the current pmem contribution to the region. The thinkos that led to this confusion was not realizing that the struct resource management is already guaranteeing no collisions between pmem allocations and blk allocations on the same dimm. Also, we do not try to support blk allocations in aliased pmem holes. This patch also fixes a case where the available blk goes negative. Cc: Fixes: a1f3e4d6a0c3 ("libnvdimm, region: update nd_region_available_dpa() for multi-pmem support"). Reported-by: Dariusz Dokupil Reported-by: Dave Jiang Reported-by: Vishal Verma Tested-by: Dave Jiang Tested-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/dimm_devs.c | 77 +++++++--------------------------------------- 1 file changed, 11 insertions(+), 66 deletions(-) (limited to 'drivers') diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 0eedc49e0d47..8b721321be5b 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -395,7 +395,7 @@ EXPORT_SYMBOL_GPL(nvdimm_create); int alias_dpa_busy(struct device *dev, void *data) { - resource_size_t map_end, blk_start, new, busy; + resource_size_t map_end, blk_start, new; struct blk_alloc_info *info = data; struct nd_mapping *nd_mapping; struct nd_region *nd_region; @@ -436,29 +436,19 @@ int alias_dpa_busy(struct device *dev, void *data) retry: /* * Find the free dpa from the end of the last pmem allocation to - * the end of the interleave-set mapping that is not already - * covered by a blk allocation. + * the end of the interleave-set mapping. */ - busy = 0; for_each_dpa_resource(ndd, res) { + if (strncmp(res->name, "pmem", 4) != 0) + continue; if ((res->start >= blk_start && res->start < map_end) || (res->end >= blk_start && res->end <= map_end)) { - if (strncmp(res->name, "pmem", 4) == 0) { - new = max(blk_start, min(map_end + 1, - res->end + 1)); - if (new != blk_start) { - blk_start = new; - goto retry; - } - } else - busy += min(map_end, res->end) - - max(nd_mapping->start, res->start) + 1; - } else if (nd_mapping->start > res->start - && map_end < res->end) { - /* total eclipse of the PMEM region mapping */ - busy += nd_mapping->size; - break; + new = max(blk_start, min(map_end + 1, res->end + 1)); + if (new != blk_start) { + blk_start = new; + goto retry; + } } } @@ -470,52 +460,11 @@ int alias_dpa_busy(struct device *dev, void *data) return 1; } - info->available -= blk_start - nd_mapping->start + busy; + info->available -= blk_start - nd_mapping->start; return 0; } -static int blk_dpa_busy(struct device *dev, void *data) -{ - struct blk_alloc_info *info = data; - struct nd_mapping *nd_mapping; - struct nd_region *nd_region; - resource_size_t map_end; - int i; - - if (!is_nd_pmem(dev)) - return 0; - - nd_region = to_nd_region(dev); - for (i = 0; i < nd_region->ndr_mappings; i++) { - nd_mapping = &nd_region->mapping[i]; - if (nd_mapping->nvdimm == info->nd_mapping->nvdimm) - break; - } - - if (i >= nd_region->ndr_mappings) - return 0; - - map_end = nd_mapping->start + nd_mapping->size - 1; - if (info->res->start >= nd_mapping->start - && info->res->start < map_end) { - if (info->res->end <= map_end) { - info->busy = 0; - return 1; - } else { - info->busy -= info->res->end - map_end; - return 0; - } - } else if (info->res->end >= nd_mapping->start - && info->res->end <= map_end) { - info->busy -= nd_mapping->start - info->res->start; - return 0; - } else { - info->busy -= nd_mapping->size; - return 0; - } -} - /** * nd_blk_available_dpa - account the unused dpa of BLK region * @nd_mapping: container of dpa-resource-root + labels @@ -545,11 +494,7 @@ resource_size_t nd_blk_available_dpa(struct nd_region *nd_region) for_each_dpa_resource(ndd, res) { if (strncmp(res->name, "blk", 3) != 0) continue; - - info.res = res; - info.busy = resource_size(res); - device_for_each_child(&nvdimm_bus->dev, &info, blk_dpa_busy); - info.available -= info.busy; + info.available -= resource_size(res); } return info.available; -- cgit v1.2.3-59-g8ed1b From 6780414519f91c2a84da9baa963a940ac916f803 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Tue, 28 Mar 2017 12:41:26 +0800 Subject: scsi: sd: Consider max_xfer_blocks if opt_xfer_blocks is unusable If device reports a small max_xfer_blocks and a zero opt_xfer_blocks, we end up using BLK_DEF_MAX_SECTORS, which is wrong and r/w of that size may get error. [mkp: tweaked to avoid setting rw_max twice and added typecast] Cc: # v4.4+ Fixes: ca369d51b3e ("block/sd: Fix device-imposed transfer length limits") Signed-off-by: Fam Zheng Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index fb9b4d29af0b..906cd6bfa2a9 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2956,7 +2956,8 @@ static int sd_revalidate_disk(struct gendisk *disk) q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks); rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks); } else - rw_max = BLK_DEF_MAX_SECTORS; + rw_max = min_not_zero(logical_to_sectors(sdp, dev_max), + (sector_t)BLK_DEF_MAX_SECTORS); /* Combine with controller limits */ q->limits.max_sectors = min(rw_max, queue_max_hw_sectors(q)); -- cgit v1.2.3-59-g8ed1b From a00a7862513089f17209b732f230922f1942e0b9 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 17 Mar 2017 08:47:14 -0400 Subject: scsi: sr: Sanity check returned mode data Kefeng Wang discovered that old versions of the QEMU CD driver would return mangled mode data causing us to walk off the end of the buffer in an attempt to parse it. Sanity check the returned mode sense data. Cc: Reported-by: Kefeng Wang Tested-by: Kefeng Wang Signed-off-by: Martin K. Petersen --- drivers/scsi/sr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 0b29b9329b1c..a8f630213a1a 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -836,6 +836,7 @@ static void get_capabilities(struct scsi_cd *cd) unsigned char *buffer; struct scsi_mode_data data; struct scsi_sense_hdr sshdr; + unsigned int ms_len = 128; int rc, n; static const char *loadmech[] = @@ -862,10 +863,11 @@ static void get_capabilities(struct scsi_cd *cd) scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr); /* ask for mode page 0x2a */ - rc = scsi_mode_sense(cd->device, 0, 0x2a, buffer, 128, + rc = scsi_mode_sense(cd->device, 0, 0x2a, buffer, ms_len, SR_TIMEOUT, 3, &data, NULL); - if (!scsi_status_is_good(rc)) { + if (!scsi_status_is_good(rc) || data.length > ms_len || + data.header_length + data.block_descriptor_length > data.length) { /* failed, drive doesn't have capabilities mode page */ cd->cdi.speed = 1; cd->cdi.mask |= (CDC_CD_R | CDC_CD_RW | CDC_DVD_R | -- cgit v1.2.3-59-g8ed1b From 8eaf7dfcfcf222e56f7d1e0a9ffdd7be0f300c2f Mon Sep 17 00:00:00 2001 From: Chad Dupuis Date: Thu, 23 Mar 2017 06:58:47 -0700 Subject: scsi: qedf: Fix crash due to unsolicited FIP VLAN response. We need to initialize qedf->fipvlan_compl in __qedf_probe so that if we receive an unsolicited FIP VLAN response, the system doesn't crash due to trying to complete an uninitialized completion. Also add a check to see if there are any waiters on the completion so we don't inadvertantly kick start the discovery process due to the unsolicited frame. Fixed the crash: <1>BUG: unable to handle kernel NULL pointer dereference at (null) <1>IP: [] __wake_up_common+0x31/0x90 <4>PGD 0 <4>Oops: 0000 [#1] SMP <4>last sysfs file: /sys/devices/system/cpu/online <4>CPU 7 <4>Modules linked in: autofs4 nfs lockd fscache auth_rpcgss nfs_acl sunrpc target_core_iblock target_core_file target_core_pscsi target_core_mod configfs bnx2fc cnic fcoe 8021q garp stp llc ipt_REJECT nf_conntrack_ipv4 nf_defrag_ipv4 iptable_filter ip_tables ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables ipv6 vfat fat uinput ipmi_devintf microcode power_meter acpi_ipmi ipmi_si ipmi_msghandler iTCO_wdt iTCO_vendor_support dcdbas sg joydev sb_edac edac_core lpc_ich mfd_core shpchp tg3 ptp pps_core ext4 jbd2 mbcache sr_mod cdrom sd_mod crc_t10dif qedi(U) iscsi_boot_sysfs libiscsi scsi_transport_iscsi uio qedf(U) libfcoe libfc scsi_transport_fc scsi_tgt qede(U) qed(U) ahci megaraid_sas wmi dm_mirror dm_region_hash dm_log dm_mod [last unloaded: speedstep_lib] <4> <4>Pid: 1485, comm: qedf_11_ll2 Not tainted 2.6.32-642.el6.x86_64 #1 Dell Inc. PowerEdge R730/0599V5 <4>RIP: 0010:[] [] __wake_up_common+0x31/0x90 <4>RSP: 0018:ffff881068a83d50 EFLAGS: 00010086 <4>RAX: ffffffffffffffe8 RBX: ffff88106bf42de0 RCX: 0000000000000000 <4>RDX: 0000000000000000 RSI: 0000000000000003 RDI: ffff88106bf42de0 <4>RBP: ffff881068a83d90 R08: 0000000000000000 R09: 00000000fffffffe <4>R10: 0000000000000000 R11: 000000000000000b R12: 0000000000000286 <4>R13: ffff88106bf42de8 R14: 0000000000000000 R15: 0000000000000000 <4>FS: 0000000000000000(0000) GS:ffff88089c460000(0000) knlGS:0000000000000000 <4>CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b <4>CR2: 0000000000000000 CR3: 0000000001a8d000 CR4: 00000000001407e0 <4>DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 <4>DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 <4>Process qedf_11_ll2 (pid: 1485, threadinfo ffff881068a80000, task ffff881068a70040) <4>Stack: <4> ffff88106ef00090 0000000300000001 ffff881068a83d90 ffff88106bf42de0 <4> 0000000000000286 ffff88106bf42dd8 ffff88106bf40a50 0000000000000002 <4> ffff881068a83dc0 ffffffff810634c7 ffff881000000003 000000000000000b <4>Call Trace: <4> [] complete+0x47/0x60 <4> [] qedf_fip_recv+0x1c7/0x450 [qedf] <4> [] qedf_ll2_recv_thread+0x33b/0x510 [qedf] <4> [] ? qedf_ll2_recv_thread+0x0/0x510 [qedf] <4> [] kthread+0x9e/0xc0 <4> [] child_rip+0xa/0x20 <4> [] ? kthread+0x0/0xc0 <4> [] ? child_rip+0x0/0x20 <4>Code: 41 56 41 55 41 54 53 48 83 ec 18 0f 1f 44 00 00 89 75 cc 89 55 c8 4c 8d 6f 08 48 8b 57 08 41 89 cf 4d 89 c6 48 8d 42 e8 49 39 d5 <48> 8b 58 18 74 3f 48 83 eb 18 eb 0a 0f 1f 00 48 89 d8 48 8d 5a <1>RIP [] __wake_up_common+0x31/0x90 <4> RSP <4>CR2: 0000000000000000 Signed-off-by: Chad Dupuis Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_fip.c | 3 ++- drivers/scsi/qedf/qedf_main.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/scsi/qedf/qedf_fip.c b/drivers/scsi/qedf/qedf_fip.c index ed58b9104f58..e10b91cc3c62 100644 --- a/drivers/scsi/qedf/qedf_fip.c +++ b/drivers/scsi/qedf/qedf_fip.c @@ -99,7 +99,8 @@ static void qedf_fcoe_process_vlan_resp(struct qedf_ctx *qedf, qedf_set_vlan_id(qedf, vid); /* Inform waiter that it's ok to call fcoe_ctlr_link up() */ - complete(&qedf->fipvlan_compl); + if (!completion_done(&qedf->fipvlan_compl)) + complete(&qedf->fipvlan_compl); } } diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 8e2a160490e6..cceddd995a4b 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -2803,6 +2803,7 @@ static int __qedf_probe(struct pci_dev *pdev, int mode) atomic_set(&qedf->num_offloads, 0); qedf->stop_io_on_error = false; pci_set_drvdata(pdev, qedf); + init_completion(&qedf->fipvlan_compl); QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_INFO, "QLogic FastLinQ FCoE Module qedf %s, " -- cgit v1.2.3-59-g8ed1b From bf6061b17a8d47ef0d9344d3ef576a4ff0edf793 Mon Sep 17 00:00:00 2001 From: Sawan Chandak Date: Fri, 31 Mar 2017 14:37:03 -0700 Subject: scsi: qla2xxx: Add fix to read correct register value for ISP82xx. Add fix to read correct register value for ISP82xx, during check for register disconnect.ISP82xx has different base register. Fixes: a465537ad1a4 ("qla2xxx: Disable the adapter and skip error recovery in case of register disconnect") Signed-off-by: Sawan Chandak Signed-off-by: Himanshu Madhani Cc: Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 579363a6f44f..c9e45d2befe5 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1126,8 +1126,13 @@ static inline uint32_t qla2x00_isp_reg_stat(struct qla_hw_data *ha) { struct device_reg_24xx __iomem *reg = &ha->iobase->isp24; + struct device_reg_82xx __iomem *reg82 = &ha->iobase->isp82; - return ((RD_REG_DWORD(®->host_status)) == ISP_REG_DISCONNECT); + if (IS_P3P_TYPE(ha)) + return ((RD_REG_DWORD(®82->host_int)) == ISP_REG_DISCONNECT); + else + return ((RD_REG_DWORD(®->host_status)) == + ISP_REG_DISCONNECT); } /************************************************************************** -- cgit v1.2.3-59-g8ed1b From 7c856152cb92f8eee2df29ef325a1b1f43161aff Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 4 Apr 2017 10:42:30 -0400 Subject: scsi: sd: Fix capacity calculation with 32-bit sector_t We previously made sure that the reported disk capacity was less than 0xffffffff blocks when the kernel was not compiled with large sector_t support (CONFIG_LBDAF). However, this check assumed that the capacity was reported in units of 512 bytes. Add a sanity check function to ensure that we only enable disks if the entire reported capacity can be expressed in terms of sector_t. Cc: Reported-by: Steve Magnani Cc: Bart Van Assche Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 906cd6bfa2a9..fe0f7997074e 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2102,6 +2102,22 @@ static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp, #define READ_CAPACITY_RETRIES_ON_RESET 10 +/* + * Ensure that we don't overflow sector_t when CONFIG_LBDAF is not set + * and the reported logical block size is bigger than 512 bytes. Note + * that last_sector is a u64 and therefore logical_to_sectors() is not + * applicable. + */ +static bool sd_addressable_capacity(u64 lba, unsigned int sector_size) +{ + u64 last_sector = (lba + 1ULL) << (ilog2(sector_size) - 9); + + if (sizeof(sector_t) == 4 && last_sector > U32_MAX) + return false; + + return true; +} + static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, unsigned char *buffer) { @@ -2167,7 +2183,7 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, return -ENODEV; } - if ((sizeof(sdkp->capacity) == 4) && (lba >= 0xffffffffULL)) { + if (!sd_addressable_capacity(lba, sector_size)) { sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a " "kernel compiled with support for large block " "devices.\n"); @@ -2256,7 +2272,7 @@ static int read_capacity_10(struct scsi_disk *sdkp, struct scsi_device *sdp, return sector_size; } - if ((sizeof(sdkp->capacity) == 4) && (lba == 0xffffffff)) { + if (!sd_addressable_capacity(lba, sector_size)) { sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a " "kernel compiled with support for large block " "devices.\n"); -- cgit v1.2.3-59-g8ed1b From 096e9e912ba8c973213e57701e0591e01064be26 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 6 Apr 2017 09:15:50 +0300 Subject: nvme-loop: Fix sqsize wrong assignment based on ctrl MQES capability both our sqsize and the controller MQES cap are a 0 based value, so making it 1 based is wrong. Reported-by: Trapp, Darren Reported-by: Daniel Verkamp Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/loop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 22f7bc6bac7f..c7b0b6a52708 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -392,7 +392,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) } ctrl->ctrl.sqsize = - min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize); + min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize); error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); if (error) -- cgit v1.2.3-59-g8ed1b From 1af76ddaa80e3bca8001b535e44aaaebd3e1907e Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 6 Apr 2017 09:15:51 +0300 Subject: nvme-rdma: Fix sqsize wrong assignment based on ctrl MQES capability both our sqsize and the controller MQES cap are a 0 based value, so making it 1 based is wrong. Reported-by: Trapp, Darren Reported-by: Daniel Verkamp Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 47a479f26e5d..16f84eb0b95e 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1606,7 +1606,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) } ctrl->ctrl.sqsize = - min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize); + min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize); error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); if (error) -- cgit v1.2.3-59-g8ed1b From c6c64a942c87faa6fca68f5dd208094b8cf61236 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 6 Apr 2017 09:15:52 +0300 Subject: nvme-fc: Fix sqsize wrong assignment based on ctrl MQES capability both our sqsize and the controller MQES cap are a 0 based value, so making it 1 based is wrong. Reported-by: Trapp, Darren Reported-by: Daniel Verkamp Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 9690beb15e69..d996ca73d3be 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2023,7 +2023,7 @@ nvme_fc_configure_admin_queue(struct nvme_fc_ctrl *ctrl) } ctrl->ctrl.sqsize = - min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize); + min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize); error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); if (error) -- cgit v1.2.3-59-g8ed1b From 0beb2012a1722633515c8aaa263c73449636c893 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 7 Apr 2017 09:47:24 -0700 Subject: libnvdimm: fix reconfig_mutex, mmap_sem, and jbd2_handle lockdep splat Holding the reconfig_mutex over a potential userspace fault sets up a lockdep dependency chain between filesystem-DAX and the libnvdimm ioctl path. Move the user access outside of the lock. [ INFO: possible circular locking dependency detected ] 4.11.0-rc3+ #13 Tainted: G W O ------------------------------------------------------- fallocate/16656 is trying to acquire lock: (&nvdimm_bus->reconfig_mutex){+.+.+.}, at: [] nvdimm_bus_lock+0x21/0x30 [libnvdimm] but task is already holding lock: (jbd2_handle){++++..}, at: [] start_this_handle+0x104/0x460 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (jbd2_handle){++++..}: lock_acquire+0xbd/0x200 start_this_handle+0x16a/0x460 jbd2__journal_start+0xe9/0x2d0 __ext4_journal_start_sb+0x89/0x1c0 ext4_dirty_inode+0x32/0x70 __mark_inode_dirty+0x235/0x670 generic_update_time+0x87/0xd0 touch_atime+0xa9/0xd0 ext4_file_mmap+0x90/0xb0 mmap_region+0x370/0x5b0 do_mmap+0x415/0x4f0 vm_mmap_pgoff+0xd7/0x120 SyS_mmap_pgoff+0x1c5/0x290 SyS_mmap+0x22/0x30 entry_SYSCALL_64_fastpath+0x1f/0xc2 -> #1 (&mm->mmap_sem){++++++}: lock_acquire+0xbd/0x200 __might_fault+0x70/0xa0 __nd_ioctl+0x683/0x720 [libnvdimm] nvdimm_ioctl+0x8b/0xe0 [libnvdimm] do_vfs_ioctl+0xa8/0x740 SyS_ioctl+0x79/0x90 do_syscall_64+0x6c/0x200 return_from_SYSCALL_64+0x0/0x7a -> #0 (&nvdimm_bus->reconfig_mutex){+.+.+.}: __lock_acquire+0x16b6/0x1730 lock_acquire+0xbd/0x200 __mutex_lock+0x88/0x9b0 mutex_lock_nested+0x1b/0x20 nvdimm_bus_lock+0x21/0x30 [libnvdimm] nvdimm_forget_poison+0x25/0x50 [libnvdimm] nvdimm_clear_poison+0x106/0x140 [libnvdimm] pmem_do_bvec+0x1c2/0x2b0 [nd_pmem] pmem_make_request+0xf9/0x270 [nd_pmem] generic_make_request+0x118/0x3b0 submit_bio+0x75/0x150 Cc: Fixes: 62232e45f4a2 ("libnvdimm: control (ioctl) messages for nvdimm_bus and nvdimm devices") Cc: Dave Jiang Reported-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/bus.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers') diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 23d4a1728cdf..351bac8f6503 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -934,8 +934,14 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len, NULL); if (rc < 0) goto out_unlock; + nvdimm_bus_unlock(&nvdimm_bus->dev); + if (copy_to_user(p, buf, buf_len)) rc = -EFAULT; + + vfree(buf); + return rc; + out_unlock: nvdimm_bus_unlock(&nvdimm_bus->dev); out: -- cgit v1.2.3-59-g8ed1b From 4aa5615e080a9855e607accc75b07ab79b252dde Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 7 Apr 2017 12:25:52 -0700 Subject: libnvdimm: band aid btt vs clear poison locking The following warning results from holding a lane spinlock, preempt_disable(), or the btt map spinlock and then trying to take the reconfig_mutex to walk the poison list and potentially add new entries. BUG: sleeping function called from invalid context at kernel/locking/mutex.c:747 in_atomic(): 1, irqs_disabled(): 0, pid: 17159, name: dd [..] Call Trace: dump_stack+0x85/0xc8 ___might_sleep+0x184/0x250 __might_sleep+0x4a/0x90 __mutex_lock+0x58/0x9b0 ? nvdimm_bus_lock+0x21/0x30 [libnvdimm] ? __nvdimm_bus_badblocks_clear+0x2f/0x60 [libnvdimm] ? acpi_nfit_forget_poison+0x79/0x80 [nfit] ? _raw_spin_unlock+0x27/0x40 mutex_lock_nested+0x1b/0x20 nvdimm_bus_lock+0x21/0x30 [libnvdimm] nvdimm_forget_poison+0x25/0x50 [libnvdimm] nvdimm_clear_poison+0x106/0x140 [libnvdimm] nsio_rw_bytes+0x164/0x270 [libnvdimm] btt_write_pg+0x1de/0x3e0 [nd_btt] ? blk_queue_enter+0x30/0x290 btt_make_request+0x11a/0x310 [nd_btt] ? blk_queue_enter+0xb7/0x290 ? blk_queue_enter+0x30/0x290 generic_make_request+0x118/0x3b0 As a minimal fix, disable error clearing when the BTT is enabled for the namespace. For the final fix a larger rework of the poison list locking is needed. Note that this is not a problem in the blk case since that path never calls nvdimm_clear_poison(). Cc: Fixes: 82bf1037f2ca ("libnvdimm: check and clear poison before writing to pmem") Cc: Dave Jiang [jeff: dynamically disable error clearing in the btt case] Suggested-by: Jeff Moyer Reviewed-by: Jeff Moyer Reported-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/claim.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index b3323c0697f6..ca6d572c48fc 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -243,7 +243,15 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, } if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) { - if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)) { + /* + * FIXME: nsio_rw_bytes() may be called from atomic + * context in the btt case and nvdimm_clear_poison() + * takes a sleeping lock. Until the locking can be + * reworked this capability requires that the namespace + * is not claimed by btt. + */ + if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512) + && (!ndns->claim || !is_nd_btt(ndns->claim))) { long cleared; cleared = nvdimm_clear_poison(&ndns->dev, offset, size); -- cgit v1.2.3-59-g8ed1b From 911e572e98656056ebbbb4274d8fe61434188646 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Thu, 6 Apr 2017 18:12:09 -0300 Subject: scsi: aacraid: fix PCI error recovery path During a PCI error recovery, if aac_check_health() is not aware that a PCI error happened and we have an offline PCI channel, it might trigger some errors (like NULL pointer dereference) and inhibit the error recovery process to complete. This patch makes the health check procedure aware of PCI channel issues, and in case of error recovery process, the function aac_adapter_check_health() returns -1 and let the recovery process to complete successfully. This patch was tested on upstream kernel v4.11-rc5 in PowerPC ppc64le architecture with adapter 9005:028d (VID:DID) - the error recovery procedure was able to recover fine. Fixes: 5c63f7f710bd ("aacraid: Added EEH support") Cc: stable@vger.kernel.org # v4.6+ Signed-off-by: Guilherme G. Piccoli Reviewed-by: Dave Carroll Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/aacraid.h | 11 ++++++++--- drivers/scsi/aacraid/commsup.c | 3 ++- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index d036a806f31c..d281492009fb 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -1690,9 +1690,6 @@ struct aac_dev #define aac_adapter_sync_cmd(dev, command, p1, p2, p3, p4, p5, p6, status, r1, r2, r3, r4) \ (dev)->a_ops.adapter_sync_cmd(dev, command, p1, p2, p3, p4, p5, p6, status, r1, r2, r3, r4) -#define aac_adapter_check_health(dev) \ - (dev)->a_ops.adapter_check_health(dev) - #define aac_adapter_restart(dev, bled, reset_type) \ ((dev)->a_ops.adapter_restart(dev, bled, reset_type)) @@ -2615,6 +2612,14 @@ static inline unsigned int cap_to_cyls(sector_t capacity, unsigned divisor) return capacity; } +static inline int aac_adapter_check_health(struct aac_dev *dev) +{ + if (unlikely(pci_channel_offline(dev->pdev))) + return -1; + + return (dev)->a_ops.adapter_check_health(dev); +} + /* SCp.phase values */ #define AAC_OWNER_MIDLEVEL 0x101 #define AAC_OWNER_LOWLEVEL 0x102 diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index c8172f16cf33..1f4918355fdb 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -1873,7 +1873,8 @@ int aac_check_health(struct aac_dev * aac) spin_unlock_irqrestore(&aac->fib_lock, flagv); if (BlinkLED < 0) { - printk(KERN_ERR "%s: Host adapter dead %d\n", aac->name, BlinkLED); + printk(KERN_ERR "%s: Host adapter is dead (or got a PCI error) %d\n", + aac->name, BlinkLED); goto out; } -- cgit v1.2.3-59-g8ed1b From 785a470496d8e0a32e3d39f376984eb2c98ca5b3 Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Tue, 11 Apr 2017 11:46:04 -0300 Subject: scsi: ipr: do not set DID_PASSTHROUGH on CHECK CONDITION On a dual controller setup with multipath enabled, some MEDIUM ERRORs caused both paths to be failed, thus I/O got queued/blocked since the 'queue_if_no_path' feature is enabled by default on IPR controllers. This example disabled 'queue_if_no_path' so the I/O failure is seen at the sg_dd program. Notice that after the sg_dd test-case, both paths are in 'failed' state, and both path/priority groups are in 'enabled' state (not 'active') -- which would block I/O with 'queue_if_no_path'. # sg_dd if=/dev/dm-2 bs=4096 count=1 dio=1 verbose=4 blk_sgio=0 <...> read(unix): count=4096, res=-1 sg_dd: reading, skip=0 : Input/output error <...> # dmesg [...] sd 2:2:16:0: [sds] FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE [...] sd 2:2:16:0: [sds] Sense Key : Medium Error [current] [...] sd 2:2:16:0: [sds] Add. Sense: Unrecovered read error - recommend rewrite the data [...] sd 2:2:16:0: [sds] CDB: Read(10) 28 00 00 00 00 00 00 00 20 00 [...] blk_update_request: I/O error, dev sds, sector 0 [...] device-mapper: multipath: Failing path 65:32. <...> [...] device-mapper: multipath: Failing path 65:224. # multipath -l 1IBM_IPR-0_59C2AE0000001F80 dm-2 IBM ,IPR-0 59C2AE00 size=5.2T features='0' hwhandler='1 alua' wp=rw |-+- policy='service-time 0' prio=0 status=enabled | `- 2:2:16:0 sds 65:32 failed undef running `-+- policy='service-time 0' prio=0 status=enabled `- 1:2:7:0 sdae 65:224 failed undef running This is not the desired behavior. The dm-multipath explicitly checks for the MEDIUM ERROR case (and a few others) so not to fail the path (e.g., I/O to other sectors could potentially happen without problems). See dm-mpath.c :: do_end_io_bio() -> noretry_error() !->! fail_path(). The problem trace is: 1) ipr_scsi_done() // SENSE KEY/CHECK CONDITION detected, go to.. 2) ipr_erp_start() // ipr_is_gscsi() and masked_ioasc OK, go to.. 3) ipr_gen_sense() // masked_ioasc is IPR_IOASC_MED_DO_NOT_REALLOC, // so set DID_PASSTHROUGH. 4) scsi_decide_disposition() // check for DID_PASSTHROUGH and return // early on, faking a DID_OK.. *instead* // of reaching scsi_check_sense(). // Had it reached the latter, that would // set host_byte to DID_MEDIUM_ERROR. 5) scsi_finish_command() 6) scsi_io_completion() 7) __scsi_error_from_host_byte() // That would be converted to -ENODATA <...> 8) dm_softirq_done() 9) multipath_end_io() 10) do_end_io() 11) noretry_error() // And that is checked in dm-mpath :: noretry_error() // which would cause fail_path() not to be called. With this patch applied, the I/O is failed but the paths are not. This multipath device continues accepting more I/O requests without blocking. (and notice the different host byte/driver byte handling per SCSI layer). # dmesg [...] sd 2:2:7:0: [sdaf] Done: SUCCESS Result: hostbyte=0x13 driverbyte=DRIVER_OK [...] sd 2:2:7:0: [sdaf] CDB: Read(10) 28 00 00 00 00 00 00 00 40 00 [...] sd 2:2:7:0: [sdaf] Sense Key : Medium Error [current] [...] sd 2:2:7:0: [sdaf] Add. Sense: Unrecovered read error - recommend rewrite the data [...] blk_update_request: critical medium error, dev sdaf, sector 0 [...] blk_update_request: critical medium error, dev dm-6, sector 0 [...] sd 2:2:7:0: [sdaf] Done: SUCCESS Result: hostbyte=0x13 driverbyte=DRIVER_OK [...] sd 2:2:7:0: [sdaf] CDB: Read(10) 28 00 00 00 00 00 00 00 10 00 [...] sd 2:2:7:0: [sdaf] Sense Key : Medium Error [current] [...] sd 2:2:7:0: [sdaf] Add. Sense: Unrecovered read error - recommend rewrite the data [...] blk_update_request: critical medium error, dev sdaf, sector 0 [...] blk_update_request: critical medium error, dev dm-6, sector 0 [...] Buffer I/O error on dev dm-6, logical block 0, async page read # multipath -l 1IBM_IPR-0_59C2AE0000001F80 1IBM_IPR-0_59C2AE0000001F80 dm-6 IBM ,IPR-0 59C2AE00 size=5.2T features='1 queue_if_no_path' hwhandler='1 alua' wp=rw |-+- policy='service-time 0' prio=0 status=active | `- 2:2:7:0 sdaf 65:240 active undef running `-+- policy='service-time 0' prio=0 status=enabled `- 1:2:7:0 sdh 8:112 active undef running Signed-off-by: Mauricio Faria de Oliveira Acked-by: Brian King Signed-off-by: Martin K. Petersen --- drivers/scsi/ipr.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 835c59c777f2..0d782135ac55 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -6293,7 +6293,12 @@ static void ipr_erp_start(struct ipr_ioa_cfg *ioa_cfg, break; case IPR_IOASC_MED_DO_NOT_REALLOC: /* prevent retries */ case IPR_IOASA_IR_DUAL_IOA_DISABLED: - scsi_cmd->result |= (DID_PASSTHROUGH << 16); + /* + * exception: do not set DID_PASSTHROUGH on CHECK CONDITION + * so SCSI mid-layer and upper layers handle it accordingly. + */ + if (scsi_cmd->result != SAM_STAT_CHECK_CONDITION) + scsi_cmd->result |= (DID_PASSTHROUGH << 16); break; case IPR_IOASC_BUS_WAS_RESET: case IPR_IOASC_BUS_WAS_RESET_BY_OTHER: -- cgit v1.2.3-59-g8ed1b From 956a4cd2c957acf638ff29951aabaa9d8e92bbc2 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 7 Apr 2017 16:42:08 -0700 Subject: device-dax: switch to srcu, fix rcu_read_lock() vs pte allocation The following warning triggers with a new unit test that stresses the device-dax interface. =============================== [ ERR: suspicious RCU usage. ] 4.11.0-rc4+ #1049 Tainted: G O ------------------------------- ./include/linux/rcupdate.h:521 Illegal context switch in RCU read-side critical section! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 0 2 locks held by fio/9070: #0: (&mm->mmap_sem){++++++}, at: [] __do_page_fault+0x167/0x4f0 #1: (rcu_read_lock){......}, at: [] dax_dev_huge_fault+0x32/0x620 [dax] Call Trace: dump_stack+0x86/0xc3 lockdep_rcu_suspicious+0xd7/0x110 ___might_sleep+0xac/0x250 __might_sleep+0x4a/0x80 __alloc_pages_nodemask+0x23a/0x360 alloc_pages_current+0xa1/0x1f0 pte_alloc_one+0x17/0x80 __pte_alloc+0x1e/0x120 __get_locked_pte+0x1bf/0x1d0 insert_pfn.isra.70+0x3a/0x100 ? lookup_memtype+0xa6/0xd0 vm_insert_mixed+0x64/0x90 dax_dev_huge_fault+0x520/0x620 [dax] ? dax_dev_huge_fault+0x32/0x620 [dax] dax_dev_fault+0x10/0x20 [dax] __do_fault+0x1e/0x140 __handle_mm_fault+0x9af/0x10d0 handle_mm_fault+0x16d/0x370 ? handle_mm_fault+0x47/0x370 __do_page_fault+0x28c/0x4f0 trace_do_page_fault+0x58/0x2a0 do_async_page_fault+0x1a/0xa0 async_page_fault+0x28/0x30 Inserting a page table entry may trigger an allocation while we are holding a read lock to keep the device instance alive for the duration of the fault. Use srcu for this keep-alive protection. Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap") Cc: Signed-off-by: Dan Williams --- drivers/dax/Kconfig | 1 + drivers/dax/dax.c | 13 +++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index 3e2ab3b14eea..9e95bf94eb13 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig @@ -2,6 +2,7 @@ menuconfig DEV_DAX tristate "DAX: direct access to differentiated memory" default m if NVDIMM_DAX depends on TRANSPARENT_HUGEPAGE + select SRCU help Support raw access to differentiated (persistence, bandwidth, latency...) memory via an mmap(2) capable character diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c index 80c6db279ae1..806f180c80d8 100644 --- a/drivers/dax/dax.c +++ b/drivers/dax/dax.c @@ -25,6 +25,7 @@ #include "dax.h" static dev_t dax_devt; +DEFINE_STATIC_SRCU(dax_srcu); static struct class *dax_class; static DEFINE_IDA(dax_minor_ida); static int nr_dax = CONFIG_NR_DEV_DAX; @@ -60,7 +61,7 @@ struct dax_region { * @region - parent region * @dev - device backing the character device * @cdev - core chardev data - * @alive - !alive + rcu grace period == no new mappings can be established + * @alive - !alive + srcu grace period == no new mappings can be established * @id - child id in the region * @num_resources - number of physical address extents in this device * @res - array of physical address ranges @@ -569,7 +570,7 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) static int dax_dev_huge_fault(struct vm_fault *vmf, enum page_entry_size pe_size) { - int rc; + int rc, id; struct file *filp = vmf->vma->vm_file; struct dax_dev *dax_dev = filp->private_data; @@ -578,7 +579,7 @@ static int dax_dev_huge_fault(struct vm_fault *vmf, ? "write" : "read", vmf->vma->vm_start, vmf->vma->vm_end); - rcu_read_lock(); + id = srcu_read_lock(&dax_srcu); switch (pe_size) { case PE_SIZE_PTE: rc = __dax_dev_pte_fault(dax_dev, vmf); @@ -592,7 +593,7 @@ static int dax_dev_huge_fault(struct vm_fault *vmf, default: return VM_FAULT_FALLBACK; } - rcu_read_unlock(); + srcu_read_unlock(&dax_srcu, id); return rc; } @@ -713,11 +714,11 @@ static void unregister_dax_dev(void *dev) * Note, rcu is not protecting the liveness of dax_dev, rcu is * ensuring that any fault handlers that might have seen * dax_dev->alive == true, have completed. Any fault handlers - * that start after synchronize_rcu() has started will abort + * that start after synchronize_srcu() has started will abort * upon seeing dax_dev->alive == false. */ dax_dev->alive = false; - synchronize_rcu(); + synchronize_srcu(&dax_srcu); unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1); cdev_del(cdev); device_unregister(dev); -- cgit v1.2.3-59-g8ed1b From a8983d01f9b7d600fbdb3916899edf395954cc83 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 14 Apr 2017 10:57:52 +0200 Subject: Revert "tty: don't panic on OOM in tty_set_ldisc()" This reverts commit 5362544bebe85071188dd9e479b5a5040841c895 as it is reported to cause a reproducable crash. Fixes: 5362544bebe8 ("tty: don't panic on OOM in tty_set_ldisc()") Reported-by: Vegard Nossum Cc: Dmitry Vyukov Cc: Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: Peter Hurley Cc: One Thousand Gnomes Cc: Linus Torvalds Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org --- drivers/tty/tty_ldisc.c | 85 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 69 insertions(+), 16 deletions(-) (limited to 'drivers') diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index b0500a0a87b8..e4603b09863a 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -491,6 +491,41 @@ static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld) tty_ldisc_debug(tty, "%p: closed\n", ld); } +/** + * tty_ldisc_restore - helper for tty ldisc change + * @tty: tty to recover + * @old: previous ldisc + * + * Restore the previous line discipline or N_TTY when a line discipline + * change fails due to an open error + */ + +static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old) +{ + struct tty_ldisc *new_ldisc; + int r; + + /* There is an outstanding reference here so this is safe */ + old = tty_ldisc_get(tty, old->ops->num); + WARN_ON(IS_ERR(old)); + tty->ldisc = old; + tty_set_termios_ldisc(tty, old->ops->num); + if (tty_ldisc_open(tty, old) < 0) { + tty_ldisc_put(old); + /* This driver is always present */ + new_ldisc = tty_ldisc_get(tty, N_TTY); + if (IS_ERR(new_ldisc)) + panic("n_tty: get"); + tty->ldisc = new_ldisc; + tty_set_termios_ldisc(tty, N_TTY); + r = tty_ldisc_open(tty, new_ldisc); + if (r < 0) + panic("Couldn't open N_TTY ldisc for " + "%s --- error %d.", + tty_name(tty), r); + } +} + /** * tty_set_ldisc - set line discipline * @tty: the terminal to set @@ -504,7 +539,12 @@ static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld) int tty_set_ldisc(struct tty_struct *tty, int disc) { - int retval, old_disc; + int retval; + struct tty_ldisc *old_ldisc, *new_ldisc; + + new_ldisc = tty_ldisc_get(tty, disc); + if (IS_ERR(new_ldisc)) + return PTR_ERR(new_ldisc); tty_lock(tty); retval = tty_ldisc_lock(tty, 5 * HZ); @@ -517,8 +557,7 @@ int tty_set_ldisc(struct tty_struct *tty, int disc) } /* Check the no-op case */ - old_disc = tty->ldisc->ops->num; - if (old_disc == disc) + if (tty->ldisc->ops->num == disc) goto out; if (test_bit(TTY_HUPPED, &tty->flags)) { @@ -527,25 +566,34 @@ int tty_set_ldisc(struct tty_struct *tty, int disc) goto out; } - retval = tty_ldisc_reinit(tty, disc); + old_ldisc = tty->ldisc; + + /* Shutdown the old discipline. */ + tty_ldisc_close(tty, old_ldisc); + + /* Now set up the new line discipline. */ + tty->ldisc = new_ldisc; + tty_set_termios_ldisc(tty, disc); + + retval = tty_ldisc_open(tty, new_ldisc); if (retval < 0) { /* Back to the old one or N_TTY if we can't */ - if (tty_ldisc_reinit(tty, old_disc) < 0) { - pr_err("tty: TIOCSETD failed, reinitializing N_TTY\n"); - if (tty_ldisc_reinit(tty, N_TTY) < 0) { - /* At this point we have tty->ldisc == NULL. */ - pr_err("tty: reinitializing N_TTY failed\n"); - } - } + tty_ldisc_put(new_ldisc); + tty_ldisc_restore(tty, old_ldisc); } - if (tty->ldisc && tty->ldisc->ops->num != old_disc && - tty->ops->set_ldisc) { + if (tty->ldisc->ops->num != old_ldisc->ops->num && tty->ops->set_ldisc) { down_read(&tty->termios_rwsem); tty->ops->set_ldisc(tty); up_read(&tty->termios_rwsem); } + /* At this point we hold a reference to the new ldisc and a + reference to the old ldisc, or we hold two references to + the old ldisc (if it was restored as part of error cleanup + above). In either case, releasing a single reference from + the old ldisc is correct. */ + new_ldisc = old_ldisc; out: tty_ldisc_unlock(tty); @@ -553,6 +601,7 @@ out: already running */ tty_buffer_restart_work(tty->port); err: + tty_ldisc_put(new_ldisc); /* drop the extra reference */ tty_unlock(tty); return retval; } @@ -613,8 +662,10 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc) int retval; ld = tty_ldisc_get(tty, disc); - if (IS_ERR(ld)) + if (IS_ERR(ld)) { + BUG_ON(disc == N_TTY); return PTR_ERR(ld); + } if (tty->ldisc) { tty_ldisc_close(tty, tty->ldisc); @@ -626,8 +677,10 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc) tty_set_termios_ldisc(tty, disc); retval = tty_ldisc_open(tty, tty->ldisc); if (retval) { - tty_ldisc_put(tty->ldisc); - tty->ldisc = NULL; + if (!WARN_ON(disc == N_TTY)) { + tty_ldisc_put(tty->ldisc); + tty->ldisc = NULL; + } } return retval; } -- cgit v1.2.3-59-g8ed1b From 271a8b428f8361f3ad4c599835ccd34dd458b212 Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Wed, 12 Apr 2017 17:06:52 +0100 Subject: sfc: limit the number of receive queues The number of rx queues is determined by the rss_cpus parameter or the cpu topology. If that is higher than EFX_MAX_RX_QUEUES the driver can corrupt state. Fixes: 8ceee660aacb ("New driver "sfc" for Solarstorm SFC4000 controller.") Signed-off-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx.c | 7 +++++++ drivers/net/ethernet/sfc/falcon/efx.c | 7 +++++++ 2 files changed, 14 insertions(+) (limited to 'drivers') diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 50d28261b6b9..b9cb697b2818 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -1371,6 +1371,13 @@ static unsigned int efx_wanted_parallelism(struct efx_nic *efx) free_cpumask_var(thread_mask); } + if (count > EFX_MAX_RX_QUEUES) { + netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn, + "Reducing number of rx queues from %u to %u.\n", + count, EFX_MAX_RX_QUEUES); + count = EFX_MAX_RX_QUEUES; + } + /* If RSS is requested for the PF *and* VFs then we can't write RSS * table entries that are inaccessible to VFs */ diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c index f5e5cd1659a1..29614da91cbf 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.c +++ b/drivers/net/ethernet/sfc/falcon/efx.c @@ -1354,6 +1354,13 @@ static unsigned int ef4_wanted_parallelism(struct ef4_nic *efx) free_cpumask_var(thread_mask); } + if (count > EF4_MAX_RX_QUEUES) { + netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn, + "Reducing number of rx queues from %u to %u.\n", + count, EF4_MAX_RX_QUEUES); + count = EF4_MAX_RX_QUEUES; + } + return count; } -- cgit v1.2.3-59-g8ed1b From b47a57a273fc0c1a922fe8b6a139881971e3d8de Mon Sep 17 00:00:00 2001 From: George Cherian Date: Thu, 13 Apr 2017 07:25:01 +0000 Subject: net: thunderx: Fix set_max_bgx_per_node for 81xx rgx Add the PCI_SUBSYS_DEVID_81XX_RGX and use the same to set the max bgx per node count. This fixes the issue intoduced by following commit 78aacb6f6 net: thunderx: Fix invalid mac addresses for node1 interfaces With this commit the max_bgx_per_node for 81xx is set as 2 instead of 3 because of which num_vfs is always calculated as zero. Signed-off-by: George Cherian Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 1 + drivers/net/ethernet/cavium/thunder/thunder_bgx.h | 1 + 2 files changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 64a1095e4d14..a0ca68ce3fbb 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -134,6 +134,7 @@ static void set_max_bgx_per_node(struct pci_dev *pdev) pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid); switch (sdevid) { case PCI_SUBSYS_DEVID_81XX_BGX: + case PCI_SUBSYS_DEVID_81XX_RGX: max_bgx_per_node = MAX_BGX_PER_CN81XX; break; case PCI_SUBSYS_DEVID_83XX_BGX: diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index c5080f2cead5..6b7fe6fdd13b 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -16,6 +16,7 @@ /* Subsystem device IDs */ #define PCI_SUBSYS_DEVID_88XX_BGX 0xA126 #define PCI_SUBSYS_DEVID_81XX_BGX 0xA226 +#define PCI_SUBSYS_DEVID_81XX_RGX 0xA254 #define PCI_SUBSYS_DEVID_83XX_BGX 0xA326 #define MAX_BGX_THUNDER 8 /* Max 2 nodes, 4 per node */ -- cgit v1.2.3-59-g8ed1b From 426c87caa2b4578b43cd3f689f02c65b743b2559 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 13 Apr 2017 10:57:15 -0600 Subject: net: vrf: Fix setting NLM_F_EXCL flag when adding l3mdev rule Only need 1 l3mdev FIB rule. Fix setting NLM_F_EXCL in the nlmsghdr. Fixes: 1aa6c4f6b8cd8 ("net: vrf: Add l3mdev rules on first device create") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index d6988db1930d..7d909c8183e9 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1128,7 +1128,7 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it) goto nla_put_failure; /* rule only needs to appear once */ - nlh->nlmsg_flags &= NLM_F_EXCL; + nlh->nlmsg_flags |= NLM_F_EXCL; frh = nlmsg_data(nlh); memset(frh, 0, sizeof(*frh)); -- cgit v1.2.3-59-g8ed1b From bfe72442578bb112626e476ffe1f276504d85b95 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Thu, 13 Apr 2017 14:11:27 -0500 Subject: net: phy: micrel: fix crash when statistic requested for KSZ9031 phy Now the command: ethtool --phy-statistics eth0 will cause system crash with meassage "Unable to handle kernel NULL pointer dereference at virtual address 00000010" from: (kszphy_get_stats) from [] (ethtool_get_phy_stats+0xd8/0x210) (ethtool_get_phy_stats) from [] (dev_ethtool+0x5b8/0x228c) (dev_ethtool) from [] (dev_ioctl+0x3fc/0x964) (dev_ioctl) from [] (sock_ioctl+0x170/0x2c0) (sock_ioctl) from [] (do_vfs_ioctl+0xa8/0x95c) (do_vfs_ioctl) from [] (SyS_ioctl+0x3c/0x64) (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x44) The reason: phy_driver structure for KSZ9031 phy has no .probe() callback defined. As result, struct phy_device *phydev->priv pointer will not be initializes (null). This issue will affect also following phys: KSZ8795, KSZ886X, KSZ8873MLL, KSZ9031, KSZ9021, KSZ8061, KS8737 Fix it by: - adding .probe() = kszphy_probe() callback to KSZ9031, KSZ9021 phys. The kszphy_probe() can be re-used as it doesn't do any phy specific settings. - removing statistic callbacks from other phys (KSZ8795, KSZ886X, KSZ8873MLL, KSZ8061, KS8737) as they doesn't have corresponding statistic counters. Fixes: 2b2427d06426 ("phy: micrel: Add ethtool statistics counters") Signed-off-by: Grygorii Strashko Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 6742070ca676..1326d99771c1 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -798,9 +798,6 @@ static struct phy_driver ksphy_driver[] = { .read_status = genphy_read_status, .ack_interrupt = kszphy_ack_interrupt, .config_intr = kszphy_config_intr, - .get_sset_count = kszphy_get_sset_count, - .get_strings = kszphy_get_strings, - .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, }, { @@ -940,9 +937,6 @@ static struct phy_driver ksphy_driver[] = { .read_status = genphy_read_status, .ack_interrupt = kszphy_ack_interrupt, .config_intr = kszphy_config_intr, - .get_sset_count = kszphy_get_sset_count, - .get_strings = kszphy_get_strings, - .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, }, { @@ -952,6 +946,7 @@ static struct phy_driver ksphy_driver[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, .driver_data = &ksz9021_type, + .probe = kszphy_probe, .config_init = ksz9021_config_init, .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, @@ -971,6 +966,7 @@ static struct phy_driver ksphy_driver[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, .driver_data = &ksz9021_type, + .probe = kszphy_probe, .config_init = ksz9031_config_init, .config_aneg = genphy_config_aneg, .read_status = ksz9031_read_status, @@ -989,9 +985,6 @@ static struct phy_driver ksphy_driver[] = { .config_init = kszphy_config_init, .config_aneg = ksz8873mll_config_aneg, .read_status = ksz8873mll_read_status, - .get_sset_count = kszphy_get_sset_count, - .get_strings = kszphy_get_strings, - .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, }, { @@ -1003,9 +996,6 @@ static struct phy_driver ksphy_driver[] = { .config_init = kszphy_config_init, .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, - .get_sset_count = kszphy_get_sset_count, - .get_strings = kszphy_get_strings, - .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, }, { @@ -1017,9 +1007,6 @@ static struct phy_driver ksphy_driver[] = { .config_init = kszphy_config_init, .config_aneg = ksz8873mll_config_aneg, .read_status = ksz8873mll_read_status, - .get_sset_count = kszphy_get_sset_count, - .get_strings = kszphy_get_strings, - .get_stats = kszphy_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, } }; -- cgit v1.2.3-59-g8ed1b From 81d2dd09ca11a2b834c8a915c6aabf8325d57ecf Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 14 Apr 2017 11:19:11 +0800 Subject: net: ethernet: mediatek: fix inconsistency between TXD and the used buffer Fix inconsistency between the TXD descriptor and the used buffer that would cause unexpected logic at mtk_tx_unmap() during skb housekeeping. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 9e757684816d..50a6fe87e990 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -613,7 +613,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, struct mtk_mac *mac = netdev_priv(dev); struct mtk_eth *eth = mac->hw; struct mtk_tx_dma *itxd, *txd; - struct mtk_tx_buf *tx_buf; + struct mtk_tx_buf *itx_buf, *tx_buf; dma_addr_t mapped_addr; unsigned int nr_frags; int i, n_desc = 1; @@ -627,8 +627,8 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, fport = (mac->id + 1) << TX_DMA_FPORT_SHIFT; txd4 |= fport; - tx_buf = mtk_desc_to_tx_buf(ring, itxd); - memset(tx_buf, 0, sizeof(*tx_buf)); + itx_buf = mtk_desc_to_tx_buf(ring, itxd); + memset(itx_buf, 0, sizeof(*itx_buf)); if (gso) txd4 |= TX_DMA_TSO; @@ -647,9 +647,9 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, return -ENOMEM; WRITE_ONCE(itxd->txd1, mapped_addr); - tx_buf->flags |= MTK_TX_FLAGS_SINGLE0; - dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr); - dma_unmap_len_set(tx_buf, dma_len0, skb_headlen(skb)); + itx_buf->flags |= MTK_TX_FLAGS_SINGLE0; + dma_unmap_addr_set(itx_buf, dma_addr0, mapped_addr); + dma_unmap_len_set(itx_buf, dma_len0, skb_headlen(skb)); /* TX SG offload */ txd = itxd; @@ -685,10 +685,9 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, last_frag * TX_DMA_LS0)); WRITE_ONCE(txd->txd4, fport); - tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC; tx_buf = mtk_desc_to_tx_buf(ring, txd); memset(tx_buf, 0, sizeof(*tx_buf)); - + tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC; tx_buf->flags |= MTK_TX_FLAGS_PAGE0; dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr); dma_unmap_len_set(tx_buf, dma_len0, frag_map_size); @@ -698,7 +697,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, } /* store skb to cleanup */ - tx_buf->skb = skb; + itx_buf->skb = skb; WRITE_ONCE(itxd->txd4, txd4); WRITE_ONCE(itxd->txd3, (TX_DMA_SWC | TX_DMA_PLEN0(skb_headlen(skb)) | -- cgit v1.2.3-59-g8ed1b From 134d21525f5f7c89f1f6ce052a11ac09dc27b331 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Fri, 14 Apr 2017 11:19:12 +0800 Subject: net: ethernet: mediatek: fix inconsistency of port number carried in TXD Fix port inconsistency on TXD due to hardware BUG that would cause different port number is carried on the same TXD between tx_map() and tx_unmap() with the iperf test. It would cause confusing BQL logic which leads to kernel panic when dual GMAC runs concurrently. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 14 +++++++++----- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 12 +++++++++--- 2 files changed, 18 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 50a6fe87e990..93949139e62c 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -648,6 +648,8 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, WRITE_ONCE(itxd->txd1, mapped_addr); itx_buf->flags |= MTK_TX_FLAGS_SINGLE0; + itx_buf->flags |= (!mac->id) ? MTK_TX_FLAGS_FPORT0 : + MTK_TX_FLAGS_FPORT1; dma_unmap_addr_set(itx_buf, dma_addr0, mapped_addr); dma_unmap_len_set(itx_buf, dma_len0, skb_headlen(skb)); @@ -689,6 +691,9 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, memset(tx_buf, 0, sizeof(*tx_buf)); tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC; tx_buf->flags |= MTK_TX_FLAGS_PAGE0; + tx_buf->flags |= (!mac->id) ? MTK_TX_FLAGS_FPORT0 : + MTK_TX_FLAGS_FPORT1; + dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr); dma_unmap_len_set(tx_buf, dma_len0, frag_map_size); frag_size -= frag_map_size; @@ -1011,17 +1016,16 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget) while ((cpu != dma) && budget) { u32 next_cpu = desc->txd2; - int mac; + int mac = 0; desc = mtk_qdma_phys_to_virt(ring, desc->txd2); if ((desc->txd3 & TX_DMA_OWNER_CPU) == 0) break; - mac = (desc->txd4 >> TX_DMA_FPORT_SHIFT) & - TX_DMA_FPORT_MASK; - mac--; - tx_buf = mtk_desc_to_tx_buf(ring, desc); + if (tx_buf->flags & MTK_TX_FLAGS_FPORT1) + mac = 1; + skb = tx_buf->skb; if (!skb) { condition = 1; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 99b1c8e9f16f..08285a96ff70 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -406,12 +406,18 @@ struct mtk_hw_stats { struct u64_stats_sync syncp; }; -/* PDMA descriptor can point at 1-2 segments. This enum allows us to track how - * memory was allocated so that it can be freed properly - */ enum mtk_tx_flags { + /* PDMA descriptor can point at 1-2 segments. This enum allows us to + * track how memory was allocated so that it can be freed properly. + */ MTK_TX_FLAGS_SINGLE0 = 0x01, MTK_TX_FLAGS_PAGE0 = 0x02, + + /* MTK_TX_FLAGS_FPORTx allows tracking which port the transmitted + * SKB out instead of looking up through hardware TX descriptor. + */ + MTK_TX_FLAGS_FPORT0 = 0x04, + MTK_TX_FLAGS_FPORT1 = 0x08, }; /* This enum allows us to identify how the clock is defined on the array of the -- cgit v1.2.3-59-g8ed1b From 1debdc8f9ebd07daf140e417b3841596911e0066 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Mon, 17 Apr 2017 15:55:22 +0300 Subject: sh_eth: unmap DMA buffers when freeing rings The DMA API debugging (when enabled) causes: WARNING: CPU: 0 PID: 1445 at lib/dma-debug.c:519 add_dma_entry+0xe0/0x12c DMA-API: exceeded 7 overlapping mappings of cacheline 0x01b2974d to be printed after repeated initialization of the Ether device, e.g. suspend/resume or 'ifconfig' up/down. This is because DMA buffers mapped using dma_map_single() in sh_eth_ring_format() and sh_eth_start_xmit() are never unmapped. Resolve this problem by unmapping the buffers when freeing the descriptor rings; in order to do it right, we'd have to add an extra parameter to sh_eth_txfree() (we rename this function to sh_eth_tx_free(), while at it). Based on the commit a47b70ea86bd ("ravb: unmap descriptors when freeing rings"). Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 122 +++++++++++++++++++--------------- 1 file changed, 67 insertions(+), 55 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 54248775f227..f68c4db656ed 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1127,12 +1127,70 @@ static struct mdiobb_ops bb_ops = { .get_mdio_data = sh_get_mdio, }; +/* free Tx skb function */ +static int sh_eth_tx_free(struct net_device *ndev, bool sent_only) +{ + struct sh_eth_private *mdp = netdev_priv(ndev); + struct sh_eth_txdesc *txdesc; + int free_num = 0; + int entry; + bool sent; + + for (; mdp->cur_tx - mdp->dirty_tx > 0; mdp->dirty_tx++) { + entry = mdp->dirty_tx % mdp->num_tx_ring; + txdesc = &mdp->tx_ring[entry]; + sent = !(txdesc->status & cpu_to_le32(TD_TACT)); + if (sent_only && !sent) + break; + /* TACT bit must be checked before all the following reads */ + dma_rmb(); + netif_info(mdp, tx_done, ndev, + "tx entry %d status 0x%08x\n", + entry, le32_to_cpu(txdesc->status)); + /* Free the original skb. */ + if (mdp->tx_skbuff[entry]) { + dma_unmap_single(&ndev->dev, le32_to_cpu(txdesc->addr), + le32_to_cpu(txdesc->len) >> 16, + DMA_TO_DEVICE); + dev_kfree_skb_irq(mdp->tx_skbuff[entry]); + mdp->tx_skbuff[entry] = NULL; + free_num++; + } + txdesc->status = cpu_to_le32(TD_TFP); + if (entry >= mdp->num_tx_ring - 1) + txdesc->status |= cpu_to_le32(TD_TDLE); + + if (sent) { + ndev->stats.tx_packets++; + ndev->stats.tx_bytes += le32_to_cpu(txdesc->len) >> 16; + } + } + return free_num; +} + /* free skb and descriptor buffer */ static void sh_eth_ring_free(struct net_device *ndev) { struct sh_eth_private *mdp = netdev_priv(ndev); int ringsize, i; + if (mdp->rx_ring) { + for (i = 0; i < mdp->num_rx_ring; i++) { + if (mdp->rx_skbuff[i]) { + struct sh_eth_rxdesc *rxdesc = &mdp->rx_ring[i]; + + dma_unmap_single(&ndev->dev, + le32_to_cpu(rxdesc->addr), + ALIGN(mdp->rx_buf_sz, 32), + DMA_FROM_DEVICE); + } + } + ringsize = sizeof(struct sh_eth_rxdesc) * mdp->num_rx_ring; + dma_free_coherent(NULL, ringsize, mdp->rx_ring, + mdp->rx_desc_dma); + mdp->rx_ring = NULL; + } + /* Free Rx skb ringbuffer */ if (mdp->rx_skbuff) { for (i = 0; i < mdp->num_rx_ring; i++) @@ -1141,27 +1199,18 @@ static void sh_eth_ring_free(struct net_device *ndev) kfree(mdp->rx_skbuff); mdp->rx_skbuff = NULL; - /* Free Tx skb ringbuffer */ - if (mdp->tx_skbuff) { - for (i = 0; i < mdp->num_tx_ring; i++) - dev_kfree_skb(mdp->tx_skbuff[i]); - } - kfree(mdp->tx_skbuff); - mdp->tx_skbuff = NULL; - - if (mdp->rx_ring) { - ringsize = sizeof(struct sh_eth_rxdesc) * mdp->num_rx_ring; - dma_free_coherent(NULL, ringsize, mdp->rx_ring, - mdp->rx_desc_dma); - mdp->rx_ring = NULL; - } - if (mdp->tx_ring) { + sh_eth_tx_free(ndev, false); + ringsize = sizeof(struct sh_eth_txdesc) * mdp->num_tx_ring; dma_free_coherent(NULL, ringsize, mdp->tx_ring, mdp->tx_desc_dma); mdp->tx_ring = NULL; } + + /* Free Tx skb ringbuffer */ + kfree(mdp->tx_skbuff); + mdp->tx_skbuff = NULL; } /* format skb and descriptor buffer */ @@ -1409,43 +1458,6 @@ static void sh_eth_dev_exit(struct net_device *ndev) update_mac_address(ndev); } -/* free Tx skb function */ -static int sh_eth_txfree(struct net_device *ndev) -{ - struct sh_eth_private *mdp = netdev_priv(ndev); - struct sh_eth_txdesc *txdesc; - int free_num = 0; - int entry; - - for (; mdp->cur_tx - mdp->dirty_tx > 0; mdp->dirty_tx++) { - entry = mdp->dirty_tx % mdp->num_tx_ring; - txdesc = &mdp->tx_ring[entry]; - if (txdesc->status & cpu_to_le32(TD_TACT)) - break; - /* TACT bit must be checked before all the following reads */ - dma_rmb(); - netif_info(mdp, tx_done, ndev, - "tx entry %d status 0x%08x\n", - entry, le32_to_cpu(txdesc->status)); - /* Free the original skb. */ - if (mdp->tx_skbuff[entry]) { - dma_unmap_single(&ndev->dev, le32_to_cpu(txdesc->addr), - le32_to_cpu(txdesc->len) >> 16, - DMA_TO_DEVICE); - dev_kfree_skb_irq(mdp->tx_skbuff[entry]); - mdp->tx_skbuff[entry] = NULL; - free_num++; - } - txdesc->status = cpu_to_le32(TD_TFP); - if (entry >= mdp->num_tx_ring - 1) - txdesc->status |= cpu_to_le32(TD_TDLE); - - ndev->stats.tx_packets++; - ndev->stats.tx_bytes += le32_to_cpu(txdesc->len) >> 16; - } - return free_num; -} - /* Packet receive function */ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota) { @@ -1690,7 +1702,7 @@ static void sh_eth_error(struct net_device *ndev, u32 intr_status) intr_status, mdp->cur_tx, mdp->dirty_tx, (u32)ndev->state, edtrr); /* dirty buffer free */ - sh_eth_txfree(ndev); + sh_eth_tx_free(ndev, true); /* SH7712 BUG */ if (edtrr ^ sh_eth_get_edtrr_trns(mdp)) { @@ -1751,7 +1763,7 @@ static irqreturn_t sh_eth_interrupt(int irq, void *netdev) /* Clear Tx interrupts */ sh_eth_write(ndev, intr_status & cd->tx_check, EESR); - sh_eth_txfree(ndev); + sh_eth_tx_free(ndev, true); netif_wake_queue(ndev); } @@ -2412,7 +2424,7 @@ static int sh_eth_start_xmit(struct sk_buff *skb, struct net_device *ndev) spin_lock_irqsave(&mdp->lock, flags); if ((mdp->cur_tx - mdp->dirty_tx) >= (mdp->num_tx_ring - 4)) { - if (!sh_eth_txfree(ndev)) { + if (!sh_eth_tx_free(ndev, true)) { netif_warn(mdp, tx_queued, ndev, "TxFD exhausted.\n"); netif_stop_queue(ndev); spin_unlock_irqrestore(&mdp->lock, flags); -- cgit v1.2.3-59-g8ed1b