From fe2e082f5da5b4a0a92ae32978f81507ef37ec66 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Tue, 20 Aug 2019 00:16:40 -0500 Subject: ecryptfs: fix a memory leak bug in parse_tag_1_packet() In parse_tag_1_packet(), if tag 1 packet contains a key larger than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES, no cleanup is executed, leading to a memory leak on the allocated 'auth_tok_list_item'. To fix this issue, go to the label 'out_free' to perform the cleanup work. Cc: stable@vger.kernel.org Fixes: dddfa461fc89 ("[PATCH] eCryptfs: Public key; packet management") Signed-off-by: Wenwen Wang Signed-off-by: Tyler Hicks --- fs/ecryptfs/keystore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 216fbe6a4837..4dc09638de8f 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1304,7 +1304,7 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat, printk(KERN_WARNING "Tag 1 packet contains key larger " "than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES\n"); rc = -EINVAL; - goto out; + goto out_free; } memcpy((*new_auth_tok)->session_key.encrypted_key, &data[(*packet_size)], (body_size - (ECRYPTFS_SIG_SIZE + 2))); -- cgit v1.2.3-59-g8ed1b From b4a81b87a4cfe2bb26a4a943b748d96a43ef20e8 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Tue, 20 Aug 2019 00:33:54 -0500 Subject: ecryptfs: fix a memory leak bug in ecryptfs_init_messaging() In ecryptfs_init_messaging(), if the allocation for 'ecryptfs_msg_ctx_arr' fails, the previously allocated 'ecryptfs_daemon_hash' is not deallocated, leading to a memory leak bug. To fix this issue, free 'ecryptfs_daemon_hash' before returning the error. Cc: stable@vger.kernel.org Fixes: 88b4a07e6610 ("[PATCH] eCryptfs: Public key transport mechanism") Signed-off-by: Wenwen Wang Signed-off-by: Tyler Hicks --- fs/ecryptfs/messaging.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index d668e60b85b5..c05ca39aa449 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -379,6 +379,7 @@ int __init ecryptfs_init_messaging(void) * ecryptfs_message_buf_len), GFP_KERNEL); if (!ecryptfs_msg_ctx_arr) { + kfree(ecryptfs_daemon_hash); rc = -ENOMEM; goto out; } -- cgit v1.2.3-59-g8ed1b From 4ab2bb3c311a45d80d31f2f189606871669ed792 Mon Sep 17 00:00:00 2001 From: Filipe Laíns Date: Sat, 11 Jan 2020 19:24:19 +0000 Subject: HID: logitech-hidpp: BatteryVoltage: only read chargeStatus if extPower is active MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the HID++ 2.0 function getBatteryInfo() from the BatteryVoltage (0x1001) feature, chargeStatus is only valid if extPower is active. Previously we were ignoring extPower, which resulted in wrong values. Example: With an unplugged mouse $ cat /sys/class/power_supply/hidpp_battery_0/status Charging This patch fixes that, it also renames charge_sts to flags as charge_sts can be confused with chargeStatus from the spec. Spec: +--------+-------------------------------------------------------------------------+ | byte | 2 | +--------+--------------+------------+------------+----------+----------+----------+ | bit | 0..2 | 3 | 4 | 5 | 6 | 7 | +--------+--------------+------------+------------+----------+----------+----------+ | buffer | chargeStatus | fastCharge | slowCharge | critical | (unused) | extPower | +--------+--------------+------------+------------+----------+----------+----------+ Table 1 - battery voltage (0x1001), getBatteryInfo() (ASE 0), 3rd byte +-------+--------------------------------------+ | value | meaning | +-------+--------------------------------------+ | 0 | Charging | +-------+--------------------------------------+ | 1 | End of charge (100% charged) | +-------+--------------------------------------+ | 2 | Charge stopped (any "normal" reason) | +-------+--------------------------------------+ | 7 | Hardware error | +-------+--------------------------------------+ Table 2 - chargeStatus value Signed-off-by: Filipe Laíns Tested-by: Pedro Vanzella Reviewed-by: Pedro Vanzella Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-logitech-hidpp.c | 43 ++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 70e1cb928bf0..094f4f1b6555 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -1256,36 +1256,35 @@ static int hidpp20_battery_map_status_voltage(u8 data[3], int *voltage, { int status; - long charge_sts = (long)data[2]; + long flags = (long) data[2]; - *level = POWER_SUPPLY_CAPACITY_LEVEL_UNKNOWN; - switch (data[2] & 0xe0) { - case 0x00: - status = POWER_SUPPLY_STATUS_CHARGING; - break; - case 0x20: - status = POWER_SUPPLY_STATUS_FULL; - *level = POWER_SUPPLY_CAPACITY_LEVEL_FULL; - break; - case 0x40: + if (flags & 0x80) + switch (flags & 0x07) { + case 0: + status = POWER_SUPPLY_STATUS_CHARGING; + break; + case 1: + status = POWER_SUPPLY_STATUS_FULL; + *level = POWER_SUPPLY_CAPACITY_LEVEL_FULL; + break; + case 2: + status = POWER_SUPPLY_STATUS_NOT_CHARGING; + break; + default: + status = POWER_SUPPLY_STATUS_UNKNOWN; + break; + } + else status = POWER_SUPPLY_STATUS_DISCHARGING; - break; - case 0xe0: - status = POWER_SUPPLY_STATUS_NOT_CHARGING; - break; - default: - status = POWER_SUPPLY_STATUS_UNKNOWN; - } *charge_type = POWER_SUPPLY_CHARGE_TYPE_STANDARD; - if (test_bit(3, &charge_sts)) { + if (test_bit(3, &flags)) { *charge_type = POWER_SUPPLY_CHARGE_TYPE_FAST; } - if (test_bit(4, &charge_sts)) { + if (test_bit(4, &flags)) { *charge_type = POWER_SUPPLY_CHARGE_TYPE_TRICKLE; } - - if (test_bit(5, &charge_sts)) { + if (test_bit(5, &flags)) { *level = POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL; } -- cgit v1.2.3-59-g8ed1b From 8c8c5a4994a306c217fd061cbfc5903399fd4c1c Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Fri, 10 Jan 2020 18:19:33 +0100 Subject: dma-contiguous: CMA: give precedence to cmdline Although the device tree might contain a reserved-memory DT node dedicated as the default CMA pool, users might want to change CMA's parameters using the kernel command line for debugging purposes and whatnot. Honor this by bypassing the reserved memory CMA setup, which will ultimately end up freeing the memblock and allow the command line CMA configuration routine to run. Signed-off-by: Nicolas Saenz Julienne Reviewed-by: Phil Elwell Signed-off-by: Christoph Hellwig --- kernel/dma/contiguous.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index daa4e6eefdde..8bc6f2d670f9 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -302,9 +302,16 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem) phys_addr_t align = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order); phys_addr_t mask = align - 1; unsigned long node = rmem->fdt_node; + bool default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL); struct cma *cma; int err; + if (size_cmdline != -1 && default_cma) { + pr_info("Reserved memory: bypass %s node, using cmdline CMA params instead\n", + rmem->name); + return -EBUSY; + } + if (!of_get_flat_dt_prop(node, "reusable", NULL) || of_get_flat_dt_prop(node, "no-map", NULL)) return -EINVAL; @@ -322,7 +329,7 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem) /* Architecture specific contiguous memory fixup. */ dma_contiguous_early_fixup(rmem->base, rmem->size); - if (of_get_flat_dt_prop(node, "linux,cma-default", NULL)) + if (default_cma) dma_contiguous_set_default(cma); rmem->ops = &rmem_cma_ops; -- cgit v1.2.3-59-g8ed1b From beae56192a2570578ae45050e73c5ff9254f63e6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 1 Feb 2020 12:56:48 +0100 Subject: HID: ite: Only bind to keyboard USB interface on Acer SW5-012 keyboard dock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard dock") added the USB id for the Acer SW5-012's keyboard dock to the hid-ite driver to fix the rfkill driver not working. Most keyboard docks with an ITE 8595 keyboard/touchpad controller have the "Wireless Radio Control" bits which need the special hid-ite driver on the second USB interface (the mouse interface) and their touchpad only supports mouse emulation, so using generic hid-input handling for anything but the "Wireless Radio Control" bits is fine. On these devices we simply bind to all USB interfaces. But unlike other ITE8595 using keyboard docks, the Acer Aspire Switch 10 (SW5-012)'s touchpad not only does mouse emulation it also supports HID-multitouch and all the keys including the "Wireless Radio Control" bits have been moved to the first USB interface (the keyboard intf). So we need hid-ite to handle the first (keyboard) USB interface and have it NOT bind to the second (mouse) USB interface so that that can be handled by hid-multitouch.c and we get proper multi-touch support. This commit changes the hid_device_id for the SW5-012 keyboard dock to only match on hid devices from the HID_GROUP_GENERIC group, this way hid-ite will not bind the the mouse/multi-touch interface which has HID_GROUP_MULTITOUCH_WIN_8 as group. This fixes the regression to mouse-emulation mode introduced by adding the keyboard dock USB id. Cc: stable@vger.kernel.org Fixes: 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard dock") Reported-by: Zdeněk Rampas Signed-off-by: Hans de Goede Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-ite.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c index c436e12feb23..6c55682c5974 100644 --- a/drivers/hid/hid-ite.c +++ b/drivers/hid/hid-ite.c @@ -41,8 +41,9 @@ static const struct hid_device_id ite_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) }, { HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) }, /* ITE8595 USB kbd ctlr, with Synaptics touchpad connected to it. */ - { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, - USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) }, + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_SYNAPTICS, + USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) }, { } }; MODULE_DEVICE_TABLE(hid, ite_devices); -- cgit v1.2.3-59-g8ed1b From 51b2569402a38e206d26728b0099eb059ab315b5 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Wed, 5 Feb 2020 08:41:46 -0500 Subject: KVM: arm/arm64: Fix up includes for trace.h Fedora kernel builds on armv7hl began failing recently because kvm_arm_exception_type and kvm_arm_exception_class were undeclared in trace.h. Add the missing include. Fixes: 0e20f5e25556 ("KVM: arm/arm64: Cleanup MMIO handling") Signed-off-by: Jeremy Cline Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200205134146.82678-1-jcline@redhat.com --- virt/kvm/arm/trace.h | 1 + 1 file changed, 1 insertion(+) diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h index 204d210d01c2..cc94ccc68821 100644 --- a/virt/kvm/arm/trace.h +++ b/virt/kvm/arm/trace.h @@ -4,6 +4,7 @@ #include #include +#include #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm -- cgit v1.2.3-59-g8ed1b From 91ef26f914171cf753330f13724fd9142b5b1640 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 3 Feb 2020 18:11:10 +0100 Subject: dma-direct: relax addressability checks in dma_direct_supported dma_direct_supported tries to find the minimum addressable bitmask based on the end pfn and optional magic that architectures can use to communicate the size of the magic ZONE_DMA that can be used for bounce buffering. But between the DMA offsets that can change per device (or sometimes even region), the fact the ZONE_DMA isn't even guaranteed to be the lowest addresses and failure of having proper interfaces to the MM code this fails at least for one arm subarchitecture. As all the legacy DMA implementations have supported 32-bit DMA masks, and 32-bit masks are guranteed to always work by the API contract (using bounce buffers if needed), we can short cut the complicated check and always return true without breaking existing assumptions. Hopefully we can properly clean up the interaction with the arch defined zones and the bootmem allocator eventually. Fixes: ad3c7b18c5b3 ("arm: use swiotlb for bounce buffering on LPAE configs") Reported-by: Peter Ujfalusi Signed-off-by: Christoph Hellwig Tested-by: Peter Ujfalusi --- kernel/dma/direct.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 6af7ae83c4ad..32ec69cdba54 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -472,28 +472,26 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, } #endif /* CONFIG_MMU */ -/* - * Because 32-bit DMA masks are so common we expect every architecture to be - * able to satisfy them - either by not supporting more physical memory, or by - * providing a ZONE_DMA32. If neither is the case, the architecture needs to - * use an IOMMU instead of the direct mapping. - */ int dma_direct_supported(struct device *dev, u64 mask) { - u64 min_mask; - - if (IS_ENABLED(CONFIG_ZONE_DMA)) - min_mask = DMA_BIT_MASK(zone_dma_bits); - else - min_mask = DMA_BIT_MASK(32); + u64 min_mask = (max_pfn - 1) << PAGE_SHIFT; - min_mask = min_t(u64, min_mask, (max_pfn - 1) << PAGE_SHIFT); + /* + * Because 32-bit DMA masks are so common we expect every architecture + * to be able to satisfy them - either by not supporting more physical + * memory, or by providing a ZONE_DMA32. If neither is the case, the + * architecture needs to use an IOMMU instead of the direct mapping. + */ + if (mask >= DMA_BIT_MASK(32)) + return 1; /* * This check needs to be against the actual bit mask value, so * use __phys_to_dma() here so that the SME encryption mask isn't * part of the check. */ + if (IS_ENABLED(CONFIG_ZONE_DMA)) + min_mask = min_t(u64, min_mask, DMA_BIT_MASK(zone_dma_bits)); return mask >= __phys_to_dma(dev, min_mask); } -- cgit v1.2.3-59-g8ed1b From 4a47cbae04844f0c5e2365aa6c217b61850bb832 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 3 Feb 2020 14:44:38 +0100 Subject: dma-direct: improve swiotlb error reporting Untangle the way how dma_direct_map_page calls into swiotlb to be able to properly report errors where the swiotlb DMA address overflows the mask separately from overflows in the !swiotlb case. This means that siotlb_map now has to do a little more work that duplicates dma_direct_map_page, but doing so greatly simplifies the calling convention. Signed-off-by: Christoph Hellwig Reviewed-by: Konrad Rzeszutek Wilk --- include/linux/swiotlb.h | 11 +++-------- kernel/dma/direct.c | 16 +++++++--------- kernel/dma/swiotlb.c | 42 +++++++++++++++++++++++------------------- 3 files changed, 33 insertions(+), 36 deletions(-) diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index cde3dc18e21a..046bb94bd4d6 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -64,6 +64,9 @@ extern void swiotlb_tbl_sync_single(struct device *hwdev, size_t size, enum dma_data_direction dir, enum dma_sync_target target); +dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir, unsigned long attrs); + #ifdef CONFIG_SWIOTLB extern enum swiotlb_force swiotlb_force; extern phys_addr_t io_tlb_start, io_tlb_end; @@ -73,8 +76,6 @@ static inline bool is_swiotlb_buffer(phys_addr_t paddr) return paddr >= io_tlb_start && paddr < io_tlb_end; } -bool swiotlb_map(struct device *dev, phys_addr_t *phys, dma_addr_t *dma_addr, - size_t size, enum dma_data_direction dir, unsigned long attrs); void __init swiotlb_exit(void); unsigned int swiotlb_max_segment(void); size_t swiotlb_max_mapping_size(struct device *dev); @@ -85,12 +86,6 @@ static inline bool is_swiotlb_buffer(phys_addr_t paddr) { return false; } -static inline bool swiotlb_map(struct device *dev, phys_addr_t *phys, - dma_addr_t *dma_addr, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - return false; -} static inline void swiotlb_exit(void) { } diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 32ec69cdba54..594bddd04e01 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -357,13 +357,6 @@ void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, EXPORT_SYMBOL(dma_direct_unmap_sg); #endif -static inline bool dma_direct_possible(struct device *dev, dma_addr_t dma_addr, - size_t size) -{ - return swiotlb_force != SWIOTLB_FORCE && - dma_capable(dev, dma_addr, size, true); -} - dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) @@ -371,8 +364,13 @@ dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, phys_addr_t phys = page_to_phys(page) + offset; dma_addr_t dma_addr = phys_to_dma(dev, phys); - if (unlikely(!dma_direct_possible(dev, dma_addr, size)) && - !swiotlb_map(dev, &phys, &dma_addr, size, dir, attrs)) { + if (unlikely(swiotlb_force == SWIOTLB_FORCE)) + return swiotlb_map(dev, phys, size, dir, attrs); + + if (unlikely(!dma_capable(dev, dma_addr, size, true))) { + if (swiotlb_force != SWIOTLB_NO_FORCE) + return swiotlb_map(dev, phys, size, dir, attrs); + report_addr(dev, dma_addr, size); return DMA_MAPPING_ERROR; } diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 9280d6f8271e..c19379fabd20 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -656,35 +657,38 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, } /* - * Create a swiotlb mapping for the buffer at @phys, and in case of DMAing + * Create a swiotlb mapping for the buffer at @paddr, and in case of DMAing * to the device copy the data into it as well. */ -bool swiotlb_map(struct device *dev, phys_addr_t *phys, dma_addr_t *dma_addr, - size_t size, enum dma_data_direction dir, unsigned long attrs) +dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, + enum dma_data_direction dir, unsigned long attrs) { - trace_swiotlb_bounced(dev, *dma_addr, size, swiotlb_force); + phys_addr_t swiotlb_addr; + dma_addr_t dma_addr; - if (unlikely(swiotlb_force == SWIOTLB_NO_FORCE)) { - dev_warn_ratelimited(dev, - "Cannot do DMA to address %pa\n", phys); - return false; - } + trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size, + swiotlb_force); - /* Oh well, have to allocate and map a bounce buffer. */ - *phys = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start), - *phys, size, size, dir, attrs); - if (*phys == (phys_addr_t)DMA_MAPPING_ERROR) - return false; + swiotlb_addr = swiotlb_tbl_map_single(dev, + __phys_to_dma(dev, io_tlb_start), + paddr, size, size, dir, attrs); + if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR) + return DMA_MAPPING_ERROR; /* Ensure that the address returned is DMA'ble */ - *dma_addr = __phys_to_dma(dev, *phys); - if (unlikely(!dma_capable(dev, *dma_addr, size, true))) { - swiotlb_tbl_unmap_single(dev, *phys, size, size, dir, + dma_addr = __phys_to_dma(dev, swiotlb_addr); + if (unlikely(!dma_capable(dev, dma_addr, size, true))) { + swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, size, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); - return false; + dev_WARN_ONCE(dev, 1, + "swiotlb addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", + &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); + return DMA_MAPPING_ERROR; } - return true; + if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + arch_sync_dma_for_device(swiotlb_addr, size, dir); + return dma_addr; } size_t swiotlb_max_mapping_size(struct device *dev) -- cgit v1.2.3-59-g8ed1b From 75467ee48a5e04cf3ae3cb39aea6adee73aeff91 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 3 Feb 2020 14:54:50 +0100 Subject: dma-direct: improve DMA mask overflow reporting Remove the unset dma_mask case as that won't get into mapping calls anymore, and also report the other errors unconditonally and with a slightly improved message. Remove the now pointless report_addr helper. Signed-off-by: Christoph Hellwig Reviewed-by: Konrad Rzeszutek Wilk --- kernel/dma/direct.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 594bddd04e01..ac7956c38f69 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -23,18 +23,6 @@ */ unsigned int zone_dma_bits __ro_after_init = 24; -static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size) -{ - if (!dev->dma_mask) { - dev_err_once(dev, "DMA map on device without dma_mask\n"); - } else if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_limit) { - dev_err_once(dev, - "overflow %pad+%zu of DMA mask %llx bus limit %llx\n", - &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); - } - WARN_ON_ONCE(1); -} - static inline dma_addr_t phys_to_dma_direct(struct device *dev, phys_addr_t phys) { @@ -371,7 +359,9 @@ dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, if (swiotlb_force != SWIOTLB_NO_FORCE) return swiotlb_map(dev, phys, size, dir, attrs); - report_addr(dev, dma_addr, size); + dev_WARN_ONCE(dev, 1, + "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", + &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); return DMA_MAPPING_ERROR; } @@ -409,7 +399,10 @@ dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr, dma_addr_t dma_addr = paddr; if (unlikely(!dma_capable(dev, dma_addr, size, false))) { - report_addr(dev, dma_addr, size); + dev_err_once(dev, + "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", + &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); + WARN_ON_ONCE(1); return DMA_MAPPING_ERROR; } -- cgit v1.2.3-59-g8ed1b From d1520889782dff58610c0b6b54d4cf3211ceb690 Mon Sep 17 00:00:00 2001 From: Oleksandr Suvorov Date: Wed, 5 Feb 2020 18:04:36 +0200 Subject: ASoC: fsl_sai: Fix exiting path on probing failure If the imx-sdma driver is built as a module, the fsl-sai device doesn't disable on probing failure, which causes the warning in the next probing: ================================================================== fsl-sai 308a0000.sai: Unbalanced pm_runtime_enable! fsl-sai 308a0000.sai: Unbalanced pm_runtime_enable! fsl-sai 308a0000.sai: Unbalanced pm_runtime_enable! fsl-sai 308a0000.sai: Unbalanced pm_runtime_enable! fsl-sai 308a0000.sai: Unbalanced pm_runtime_enable! fsl-sai 308a0000.sai: Unbalanced pm_runtime_enable! ================================================================== Disabling the device properly fixes the issue. Fixes: 812ad463e089 ("ASoC: fsl_sai: Add support for runtime pm") Signed-off-by: Oleksandr Suvorov Link: https://lore.kernel.org/r/20200205160436.3813642-1-oleksandr.suvorov@toradex.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_sai.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index 8c3ea7300972..9d436b0c5718 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -1020,12 +1020,24 @@ static int fsl_sai_probe(struct platform_device *pdev) ret = devm_snd_soc_register_component(&pdev->dev, &fsl_component, &fsl_sai_dai, 1); if (ret) - return ret; + goto err_pm_disable; - if (sai->soc_data->use_imx_pcm) - return imx_pcm_dma_init(pdev, IMX_SAI_DMABUF_SIZE); - else - return devm_snd_dmaengine_pcm_register(&pdev->dev, NULL, 0); + if (sai->soc_data->use_imx_pcm) { + ret = imx_pcm_dma_init(pdev, IMX_SAI_DMABUF_SIZE); + if (ret) + goto err_pm_disable; + } else { + ret = devm_snd_dmaengine_pcm_register(&pdev->dev, NULL, 0); + if (ret) + goto err_pm_disable; + } + + return ret; + +err_pm_disable: + pm_runtime_disable(&pdev->dev); + + return ret; } static int fsl_sai_remove(struct platform_device *pdev) -- cgit v1.2.3-59-g8ed1b From c4a3922d2d20c710f827d3a115ee338e8d0467df Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 2 Feb 2020 20:30:52 -0800 Subject: netfilter: xt_hashlimit: reduce hashlimit_mutex scope for htable_put() It is unnecessary to hold hashlimit_mutex for htable_destroy() as it is already removed from the global hashtable and its refcount is already zero. Also, switch hinfo->use to refcount_t so that we don't have to hold the mutex until it reaches zero in htable_put(). Reported-and-tested-by: syzbot+adf6c6c2be1c3a718121@syzkaller.appspotmail.com Acked-by: Florian Westphal Signed-off-by: Cong Wang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_hashlimit.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index bccd47cd7190..cc475a608f81 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #define XT_HASHLIMIT_ALL (XT_HASHLIMIT_HASH_DIP | XT_HASHLIMIT_HASH_DPT | \ @@ -114,7 +115,7 @@ struct dsthash_ent { struct xt_hashlimit_htable { struct hlist_node node; /* global list of all htables */ - int use; + refcount_t use; u_int8_t family; bool rnd_initialized; @@ -315,7 +316,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg, for (i = 0; i < hinfo->cfg.size; i++) INIT_HLIST_HEAD(&hinfo->hash[i]); - hinfo->use = 1; + refcount_set(&hinfo->use, 1); hinfo->count = 0; hinfo->family = family; hinfo->rnd_initialized = false; @@ -420,7 +421,7 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net, hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) { if (!strcmp(name, hinfo->name) && hinfo->family == family) { - hinfo->use++; + refcount_inc(&hinfo->use); return hinfo; } } @@ -429,12 +430,11 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net, static void htable_put(struct xt_hashlimit_htable *hinfo) { - mutex_lock(&hashlimit_mutex); - if (--hinfo->use == 0) { + if (refcount_dec_and_mutex_lock(&hinfo->use, &hashlimit_mutex)) { hlist_del(&hinfo->node); + mutex_unlock(&hashlimit_mutex); htable_destroy(hinfo); } - mutex_unlock(&hashlimit_mutex); } /* The algorithm used is the Simple Token Bucket Filter (TBF) -- cgit v1.2.3-59-g8ed1b From 8d0015a7ab76b8b1e89a3e5f5710a6e5103f2dd5 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 2 Feb 2020 20:30:53 -0800 Subject: netfilter: xt_hashlimit: limit the max size of hashtable The user-specified hashtable size is unbound, this could easily lead to an OOM or a hung task as we hold the global mutex while allocating and initializing the new hashtable. Add a max value to cap both cfg->size and cfg->max, as suggested by Florian. Reported-and-tested-by: syzbot+adf6c6c2be1c3a718121@syzkaller.appspotmail.com Signed-off-by: Cong Wang Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_hashlimit.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index cc475a608f81..7a2c4b8408c4 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -837,6 +837,8 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 3); } +#define HASHLIMIT_MAX_SIZE 1048576 + static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, struct xt_hashlimit_htable **hinfo, struct hashlimit_cfg3 *cfg, @@ -847,6 +849,14 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, if (cfg->gc_interval == 0 || cfg->expire == 0) return -EINVAL; + if (cfg->size > HASHLIMIT_MAX_SIZE) { + cfg->size = HASHLIMIT_MAX_SIZE; + pr_info_ratelimited("size too large, truncated to %u\n", cfg->size); + } + if (cfg->max > HASHLIMIT_MAX_SIZE) { + cfg->max = HASHLIMIT_MAX_SIZE; + pr_info_ratelimited("max too large, truncated to %u\n", cfg->max); + } if (par->family == NFPROTO_IPV4) { if (cfg->srcmask > 32 || cfg->dstmask > 32) return -EINVAL; -- cgit v1.2.3-59-g8ed1b From a7da92c2c8a1faf253a3b3e292fda6910deba540 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 3 Feb 2020 13:06:18 +0100 Subject: netfilter: flowtable: skip offload setup if disabled nftables test case tests/shell/testcases/flowtable/0001flowtable_0 results in a crash. After the refactor, if we leave early via nf_flowtable_hw_offload(), then "struct flow_block_offload" is left in an uninitialized state, but later users assume its initialised. Fixes: a7965d58ddab02 ("netfilter: flowtable: add nf_flow_table_offload_cmd()") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_offload.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 83e1db37c3b0..06f00cdc3891 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -847,9 +847,6 @@ static int nf_flow_table_offload_cmd(struct flow_block_offload *bo, { int err; - if (!nf_flowtable_hw_offload(flowtable)) - return 0; - if (!dev->netdev_ops->ndo_setup_tc) return -EOPNOTSUPP; @@ -876,6 +873,9 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, struct flow_block_offload bo; int err; + if (!nf_flowtable_hw_offload(flowtable)) + return 0; + err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, &extack); if (err < 0) return err; -- cgit v1.2.3-59-g8ed1b From 3f9e12e0df012c4a9a7fd7eb0d3ae69b459d6b2c Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 6 Feb 2020 16:58:45 +0100 Subject: ACPI: watchdog: Allow disabling WDAT at boot In case the WDAT interface is broken, give the user an option to ignore it to let a native driver bind to the watchdog device instead. Signed-off-by: Jean Delvare Acked-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/kernel-parameters.txt | 4 ++++ drivers/acpi/acpi_watchdog.c | 12 +++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index dbc22d684627..c07815d230bc 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -136,6 +136,10 @@ dynamic table installation which will install SSDT tables to /sys/firmware/acpi/tables/dynamic. + acpi_no_watchdog [HW,ACPI,WDT] + Ignore the ACPI-based watchdog interface (WDAT) and let + a native driver control the watchdog device instead. + acpi_rsdp= [ACPI,EFI,KEXEC] Pass the RSDP address to the kernel, mostly used on machines running EFI runtime service to boot the diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c index b5516b04ffc0..ab6e434b4cee 100644 --- a/drivers/acpi/acpi_watchdog.c +++ b/drivers/acpi/acpi_watchdog.c @@ -55,12 +55,14 @@ static bool acpi_watchdog_uses_rtc(const struct acpi_table_wdat *wdat) } #endif +static bool acpi_no_watchdog; + static const struct acpi_table_wdat *acpi_watchdog_get_wdat(void) { const struct acpi_table_wdat *wdat = NULL; acpi_status status; - if (acpi_disabled) + if (acpi_disabled || acpi_no_watchdog) return NULL; status = acpi_get_table(ACPI_SIG_WDAT, 0, @@ -88,6 +90,14 @@ bool acpi_has_watchdog(void) } EXPORT_SYMBOL_GPL(acpi_has_watchdog); +/* ACPI watchdog can be disabled on boot command line */ +static int __init disable_acpi_watchdog(char *str) +{ + acpi_no_watchdog = true; + return 1; +} +__setup("acpi_no_watchdog", disable_acpi_watchdog); + void __init acpi_watchdog_init(void) { const struct acpi_wdat_entry *entries; -- cgit v1.2.3-59-g8ed1b From c664a4fa8f69308b8f624cff4fa1294e9aef880d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 29 Jan 2020 20:30:37 +0300 Subject: USB: serial: ir-usb: Silence harmless uninitialized variable warning The "actual_length" variable might be uninitialized on some failure paths. It's harmless but static analysis tools like Smatch complain and at runtime the UBSan tool will likely complain as well. Fixes: e7542bc382f8 ("USB: serial: ir-usb: make set_termios synchronous") Signed-off-by: Dan Carpenter Signed-off-by: Johan Hovold --- drivers/usb/serial/ir-usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c index 79d0586e2b33..172261a908d8 100644 --- a/drivers/usb/serial/ir-usb.c +++ b/drivers/usb/serial/ir-usb.c @@ -448,7 +448,7 @@ static void ir_set_termios(struct tty_struct *tty, usb_sndbulkpipe(udev, port->bulk_out_endpointAddress), transfer_buffer, 1, &actual_length, 5000); if (ret || actual_length != 1) { - if (actual_length != 1) + if (!ret) ret = -EIO; dev_err(&port->dev, "failed to change line speed: %d\n", ret); } -- cgit v1.2.3-59-g8ed1b From 7c3d02285ad558691f27fde760bcd841baa27eab Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 6 Feb 2020 12:18:19 +0100 Subject: USB: serial: ch341: fix receiver regression While assumed not to make a difference, not using the factor-2 prescaler makes the receiver more susceptible to errors. Specifically, there have been reports of problems with devices that cannot generate a 115200 rate with a smaller error than 2.1% (e.g. 117647 bps). But this can also be reproduced with a low-speed RS232 tranceiver at 115200 when the input rate matches the nominal rate. So whenever possible, enable the factor-2 prescaler and halve the divisor in order to use settings closer to that of the previous algorithm. Fixes: 35714565089e ("USB: serial: ch341: reimplement line-speed handling") Cc: stable # 5.5 Reported-by: Jakub Nantl Tested-by: Jakub Nantl Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- drivers/usb/serial/ch341.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index d3f420f3a083..c5ecdcd51ffc 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -205,6 +205,16 @@ static int ch341_get_divisor(speed_t speed) 16 * speed - 16 * CH341_CLKRATE / (clk_div * (div + 1))) div++; + /* + * Prefer lower base clock (fact = 0) if even divisor. + * + * Note that this makes the receiver more tolerant to errors. + */ + if (fact == 1 && div % 2 == 0) { + div /= 2; + fact = 0; + } + return (0x100 - div) << 8 | fact << 2 | ps; } -- cgit v1.2.3-59-g8ed1b From 8a6483ac634acda3f599f50082c652d2d37199c7 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Mon, 9 Dec 2019 10:27:07 +0200 Subject: drm/bridge: tc358767: fix poll timeouts Link training fails with: Link training timeout waiting for LT_LOOPDONE! main link enable error: -110 This is caused by too tight timeouts, which were changed recently in aa92213f388b ("drm/bridge: tc358767: Simplify polling in tc_link_training()"). With a quick glance, the commit does not change the timeouts. However, the method of delaying/sleeping is different, and as the timeout in the previous implementation was not explicit, the new version in practice has much tighter timeout. The same change was made to other parts in the driver, but the link training timeout is the only one I have seen causing issues. Nevertheless, 1 us sleep is not very sane, and the timeouts look pretty tight, so lets fix all the timeouts. One exception was the aux busy poll, where the poll sleep was much longer than necessary (or optimal). I measured the times on my setup, and now the sleep times are set to such values that they result in multiple loops, but not too many (say, 5-10 loops). The timeouts were all increased to 100ms, which should be more than enough for all of these, but in case of bad errors, shouldn't stop the driver as multi-second timeouts could do. Signed-off-by: Tomi Valkeinen Fixes: aa92213f388b ("drm/bridge: tc358767: Simplify polling in tc_link_training()") Tested-by: Andrey Smirnov Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20191209082707.24531-1-tomi.valkeinen@ti.com --- drivers/gpu/drm/bridge/tc358767.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 8029478ffebb..b0b0ccbb059d 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -297,7 +297,7 @@ static inline int tc_poll_timeout(struct tc_data *tc, unsigned int addr, static int tc_aux_wait_busy(struct tc_data *tc) { - return tc_poll_timeout(tc, DP0_AUXSTATUS, AUX_BUSY, 0, 1000, 100000); + return tc_poll_timeout(tc, DP0_AUXSTATUS, AUX_BUSY, 0, 100, 100000); } static int tc_aux_write_data(struct tc_data *tc, const void *data, @@ -640,7 +640,7 @@ static int tc_aux_link_setup(struct tc_data *tc) if (ret) goto err; - ret = tc_poll_timeout(tc, DP_PHY_CTRL, PHY_RDY, PHY_RDY, 1, 1000); + ret = tc_poll_timeout(tc, DP_PHY_CTRL, PHY_RDY, PHY_RDY, 100, 100000); if (ret == -ETIMEDOUT) { dev_err(tc->dev, "Timeout waiting for PHY to become ready"); return ret; @@ -876,7 +876,7 @@ static int tc_wait_link_training(struct tc_data *tc) int ret; ret = tc_poll_timeout(tc, DP0_LTSTAT, LT_LOOPDONE, - LT_LOOPDONE, 1, 1000); + LT_LOOPDONE, 500, 100000); if (ret) { dev_err(tc->dev, "Link training timeout waiting for LT_LOOPDONE!\n"); return ret; @@ -949,7 +949,7 @@ static int tc_main_link_enable(struct tc_data *tc) dp_phy_ctrl &= ~(DP_PHY_RST | PHY_M1_RST | PHY_M0_RST); ret = regmap_write(tc->regmap, DP_PHY_CTRL, dp_phy_ctrl); - ret = tc_poll_timeout(tc, DP_PHY_CTRL, PHY_RDY, PHY_RDY, 1, 1000); + ret = tc_poll_timeout(tc, DP_PHY_CTRL, PHY_RDY, PHY_RDY, 500, 100000); if (ret) { dev_err(dev, "timeout waiting for phy become ready"); return ret; -- cgit v1.2.3-59-g8ed1b From 48bc281e4bf049abd3bb98371209315651bf4a14 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 15 Jan 2020 13:56:53 +0100 Subject: drm/bridge: ti-tfp410: Update drm_connector_init_with_ddc() error message The code was changed to call drm_connector_init_with_ddc() instead of drm_connector_init(), but the corresponding error message was not updated. Fixes: cfb444552926989f ("drm/bridge: ti-tfp410: Provide ddc symlink in connector sysfs directory") Signed-off-by: Geert Uytterhoeven Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20200115125653.5519-1-geert+renesas@glider.be --- drivers/gpu/drm/bridge/ti-tfp410.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/ti-tfp410.c b/drivers/gpu/drm/bridge/ti-tfp410.c index 6f6d6d1e60ae..f195a4732e0b 100644 --- a/drivers/gpu/drm/bridge/ti-tfp410.c +++ b/drivers/gpu/drm/bridge/ti-tfp410.c @@ -140,7 +140,8 @@ static int tfp410_attach(struct drm_bridge *bridge) dvi->connector_type, dvi->ddc); if (ret) { - dev_err(dvi->dev, "drm_connector_init() failed: %d\n", ret); + dev_err(dvi->dev, "drm_connector_init_with_ddc() failed: %d\n", + ret); return ret; } -- cgit v1.2.3-59-g8ed1b From e7598fac323aad0e502415edeffd567315994dd6 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 10 Feb 2020 10:36:56 +0100 Subject: iommu/vt-d: Fix compile warning from intel-svm.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The intel_svm_is_pasid_valid() needs to be marked inline, otherwise it causes the compile warning below: CC [M] drivers/dma/idxd/cdev.o In file included from drivers/dma/idxd/cdev.c:9:0: ./include/linux/intel-svm.h:125:12: warning: ‘intel_svm_is_pasid_valid’ defined but not used [-Wunused-function] static int intel_svm_is_pasid_valid(struct device *dev, int pasid) ^~~~~~~~~~~~~~~~~~~~~~~~ Reported-by: Borislav Petkov Fixes: 15060aba71711 ('iommu/vt-d: Helper function to query if a pasid has any active users') Signed-off-by: Joerg Roedel --- include/linux/intel-svm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 94f047a8a845..d7c403d0dd27 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -122,7 +122,7 @@ static inline int intel_svm_unbind_mm(struct device *dev, int pasid) BUG(); } -static int intel_svm_is_pasid_valid(struct device *dev, int pasid) +static inline int intel_svm_is_pasid_valid(struct device *dev, int pasid) { return -EINVAL; } -- cgit v1.2.3-59-g8ed1b From 9437bfda00f3b26eb5f475737ddaaf4dc07fee4f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 30 Jan 2020 15:05:45 +0200 Subject: ASoC: atmel: fix atmel_ssc_set_audio link failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ssc audio driver can call into both pdc and dma backends. With the latest rework, the logic to do this in a safe way avoiding link errors was removed, bringing back link errors that were fixed long ago in commit 061981ff8cc8 ("ASoC: atmel: properly select dma driver state") such as sound/soc/atmel/atmel_ssc_dai.o: In function `atmel_ssc_set_audio': atmel_ssc_dai.c:(.text+0xac): undefined reference to `atmel_pcm_pdc_platform_register' Fix it this time using Makefile hacks and a comment to prevent this from accidentally getting removed again rather than Kconfig hacks. Fixes: 18291410557f ("ASoC: atmel: enable SOC_SSC_PDC and SOC_SSC_DMA in Kconfig") Signed-off-by: Arnd Bergmann Signed-off-by: Codrin Ciubotariu Link: https://lore.kernel.org/r/20200130130545.31148-1-codrin.ciubotariu@microchip.com Reviewed-by: Michał Mirosław Signed-off-by: Mark Brown --- sound/soc/atmel/Kconfig | 4 ++-- sound/soc/atmel/Makefile | 10 ++++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig index d1dc8e6366dc..71f2d42188c4 100644 --- a/sound/soc/atmel/Kconfig +++ b/sound/soc/atmel/Kconfig @@ -10,11 +10,11 @@ config SND_ATMEL_SOC if SND_ATMEL_SOC config SND_ATMEL_SOC_PDC - tristate + bool depends on HAS_DMA config SND_ATMEL_SOC_DMA - tristate + bool select SND_SOC_GENERIC_DMAENGINE_PCM config SND_ATMEL_SOC_SSC diff --git a/sound/soc/atmel/Makefile b/sound/soc/atmel/Makefile index 1f6890ed3738..c7d2989791be 100644 --- a/sound/soc/atmel/Makefile +++ b/sound/soc/atmel/Makefile @@ -6,8 +6,14 @@ snd-soc-atmel_ssc_dai-objs := atmel_ssc_dai.o snd-soc-atmel-i2s-objs := atmel-i2s.o snd-soc-mchp-i2s-mcc-objs := mchp-i2s-mcc.o -obj-$(CONFIG_SND_ATMEL_SOC_PDC) += snd-soc-atmel-pcm-pdc.o -obj-$(CONFIG_SND_ATMEL_SOC_DMA) += snd-soc-atmel-pcm-dma.o +# pdc and dma need to both be built-in if any user of +# ssc is built-in. +ifdef CONFIG_SND_ATMEL_SOC_PDC +obj-$(CONFIG_SND_ATMEL_SOC_SSC) += snd-soc-atmel-pcm-pdc.o +endif +ifdef CONFIG_SND_ATMEL_SOC_DMA +obj-$(CONFIG_SND_ATMEL_SOC_SSC) += snd-soc-atmel-pcm-dma.o +endif obj-$(CONFIG_SND_ATMEL_SOC_SSC) += snd-soc-atmel_ssc_dai.o obj-$(CONFIG_SND_ATMEL_SOC_I2S) += snd-soc-atmel-i2s.o obj-$(CONFIG_SND_MCHP_SOC_I2S_MCC) += snd-soc-mchp-i2s-mcc.o -- cgit v1.2.3-59-g8ed1b From 43bcb1c0507858cdc95e425017dcc33f8105df39 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Thu, 6 Feb 2020 22:02:21 +0200 Subject: ALSA: hda: do not override bus codec_mask in link_get() snd_hdac_ext_bus_link_get() does not work correctly in case there are multiple codecs on the bus. It unconditionally resets the bus->codec_mask value. As per documentation in hdaudio.h and existing use in client code, this field should be used to store bit flag of detected codecs on the bus. By overwriting value of the codec_mask, information on all detected codecs is lost. No current user of hdac is impacted, but use of bus->codec_mask is planned in future patches for SOF. Signed-off-by: Kai Vehmanen Reviewed-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Reviewed-by: Takashi Iwai Link: https://lore.kernel.org/r/20200206200223.7715-1-kai.vehmanen@linux.intel.com Signed-off-by: Mark Brown --- sound/hda/ext/hdac_ext_controller.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sound/hda/ext/hdac_ext_controller.c b/sound/hda/ext/hdac_ext_controller.c index cfab60d88c92..09ff209df4a3 100644 --- a/sound/hda/ext/hdac_ext_controller.c +++ b/sound/hda/ext/hdac_ext_controller.c @@ -254,6 +254,7 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_link_power_down_all); int snd_hdac_ext_bus_link_get(struct hdac_bus *bus, struct hdac_ext_link *link) { + unsigned long codec_mask; int ret = 0; mutex_lock(&bus->lock); @@ -280,9 +281,11 @@ int snd_hdac_ext_bus_link_get(struct hdac_bus *bus, * HDA spec section 4.3 - Codec Discovery */ udelay(521); - bus->codec_mask = snd_hdac_chip_readw(bus, STATESTS); - dev_dbg(bus->dev, "codec_mask = 0x%lx\n", bus->codec_mask); - snd_hdac_chip_writew(bus, STATESTS, bus->codec_mask); + codec_mask = snd_hdac_chip_readw(bus, STATESTS); + dev_dbg(bus->dev, "codec_mask = 0x%lx\n", codec_mask); + snd_hdac_chip_writew(bus, STATESTS, codec_mask); + if (!bus->codec_mask) + bus->codec_mask = codec_mask; } mutex_unlock(&bus->lock); -- cgit v1.2.3-59-g8ed1b From 816938b272b0ac0203e25ce50483bd284ea4a2db Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Thu, 6 Feb 2020 22:02:22 +0200 Subject: ASoC: SOF: Intel: hda: fix ordering bug in resume flow When HDA controller is resumed from suspend, i915 HDMI/DP codec requires that following order of actions is kept: - i915 display power up and configuration of link params - hda link reset and setup Current SOF HDA code delegates display codec power control to the codec driver. This works most of the time, but in runtime PM sequences, the above constraint may be violated. On platforms where BIOS values for HDA link parameters do not match hardware reset defaults, this may lead to errors in HDA verb transactions after resume. Fix the issue by explicitly powering the display codec in the HDA controller resume/suspend calls, thus ensuring correct ordering. Special handling is needed for the D0i3 flow, where display power must be turned off even though DSP is left powered. Now that we have more invocations of the display power helper functions, the conditional checks surrounding each call have been moved inside hda_codec_i915_display_power(). The two special cases of display powering at initial probe are handled separately. The intent is to avoid powering the display whenever no display codecs are used. Note that early powering of display was removed in commit 687ae9e287b3 ("ASoC: intel: skl: Fix display power regression"). This change was also copied to the SOF driver. No failures have resulted as hardware default values for link parameters have worked out of the box. However with recent i915 driver changes like done in commit 87c1694533c9 ("drm/i915: save AUD_FREQ_CNTRL state at audio domain suspend"), this does not hold anymore and errors are hit. Cc: Takashi Iwai Signed-off-by: Kai Vehmanen Reviewed-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Reviewed-by: Takashi Iwai Link: https://lore.kernel.org/r/20200206200223.7715-2-kai.vehmanen@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda-codec.c | 12 ++++++++---- sound/soc/sof/intel/hda-dsp.c | 11 +++++++++++ sound/soc/sof/intel/hda.c | 2 +- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/sound/soc/sof/intel/hda-codec.c b/sound/soc/sof/intel/hda-codec.c index 9106ab8dac6f..ff45075ef720 100644 --- a/sound/soc/sof/intel/hda-codec.c +++ b/sound/soc/sof/intel/hda-codec.c @@ -174,8 +174,10 @@ void hda_codec_i915_display_power(struct snd_sof_dev *sdev, bool enable) { struct hdac_bus *bus = sof_to_bus(sdev); - dev_dbg(bus->dev, "Turning i915 HDAC power %d\n", enable); - snd_hdac_display_power(bus, HDA_CODEC_IDX_CONTROLLER, enable); + if (HDA_IDISP_CODEC(bus->codec_mask)) { + dev_dbg(bus->dev, "Turning i915 HDAC power %d\n", enable); + snd_hdac_display_power(bus, HDA_CODEC_IDX_CONTROLLER, enable); + } } EXPORT_SYMBOL_NS(hda_codec_i915_display_power, SND_SOC_SOF_HDA_AUDIO_CODEC_I915); @@ -189,7 +191,8 @@ int hda_codec_i915_init(struct snd_sof_dev *sdev) if (ret < 0) return ret; - hda_codec_i915_display_power(sdev, true); + /* codec_mask not yet known, power up for probe */ + snd_hdac_display_power(bus, HDA_CODEC_IDX_CONTROLLER, true); return 0; } @@ -200,7 +203,8 @@ int hda_codec_i915_exit(struct snd_sof_dev *sdev) struct hdac_bus *bus = sof_to_bus(sdev); int ret; - hda_codec_i915_display_power(sdev, false); + /* power down unconditionally */ + snd_hdac_display_power(bus, HDA_CODEC_IDX_CONTROLLER, false); ret = snd_hdac_i915_exit(bus); diff --git a/sound/soc/sof/intel/hda-dsp.c b/sound/soc/sof/intel/hda-dsp.c index 4a4d318f97ff..0848b79967a9 100644 --- a/sound/soc/sof/intel/hda-dsp.c +++ b/sound/soc/sof/intel/hda-dsp.c @@ -428,6 +428,9 @@ static int hda_suspend(struct snd_sof_dev *sdev, bool runtime_suspend) return ret; } + /* display codec can powered off after link reset */ + hda_codec_i915_display_power(sdev, false); + return 0; } @@ -439,6 +442,9 @@ static int hda_resume(struct snd_sof_dev *sdev, bool runtime_resume) #endif int ret; + /* display codec must be powered before link reset */ + hda_codec_i915_display_power(sdev, true); + /* * clear TCSEL to clear playback on some HD Audio * codecs. PCI TCSEL is defined in the Intel manuals. @@ -482,6 +488,8 @@ int hda_dsp_resume(struct snd_sof_dev *sdev) struct pci_dev *pci = to_pci_dev(sdev->dev); if (sdev->s0_suspend) { + hda_codec_i915_display_power(sdev, true); + /* restore L1SEN bit */ if (hda->l1_support_changed) snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, @@ -531,6 +539,9 @@ int hda_dsp_suspend(struct snd_sof_dev *sdev) int ret; if (sdev->s0_suspend) { + /* we can't keep a wakeref to display driver at suspend */ + hda_codec_i915_display_power(sdev, false); + /* enable L1SEN to make sure the system can enter S0Ix */ hda->l1_support_changed = snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index 65b86dd044f1..8fddafb5c1d4 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -381,7 +381,7 @@ static int hda_init_caps(struct snd_sof_dev *sdev) hda_codec_probe_bus(sdev, hda_codec_use_common_hdmi); if (!HDA_IDISP_CODEC(bus->codec_mask)) - hda_codec_i915_display_power(sdev, false); + hda_codec_i915_exit(sdev); /* * we are done probing so decrement link counts -- cgit v1.2.3-59-g8ed1b From af7aae1b1f6306a1cda4da393e920a1334eaa3d4 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Thu, 6 Feb 2020 22:02:23 +0200 Subject: ASoC: SOF: Intel: hda: move i915 init earlier To be compliant with i915 display driver requirements, i915 power-up must be done before any HDA communication takes place, including parsing the bus capabilities. Otherwise the initial codec probe may fail. Move i915 initialization earlier in the SOF HDA sequence. This sequence is now aligned with the snd-hda-intel driver where the display_power() call is before snd_hdac_bus_parse_capabilities() and rest of the capability parsing. Also remove unnecessary ifdef around hda_codec_i915_init(). There's a dummy implementation provided if CONFIG_SND_SOC_SOF_HDA is not enabled. Signed-off-by: Kai Vehmanen Reviewed-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Reviewed-by: Takashi Iwai Link: https://lore.kernel.org/r/20200206200223.7715-3-kai.vehmanen@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index 8fddafb5c1d4..25946a1c2822 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -286,6 +286,13 @@ static int hda_init(struct snd_sof_dev *sdev) /* HDA base */ sdev->bar[HDA_DSP_HDA_BAR] = bus->remap_addr; + /* init i915 and HDMI codecs */ + ret = hda_codec_i915_init(sdev); + if (ret < 0) { + dev_err(sdev->dev, "error: init i915 and HDMI codec failed\n"); + return ret; + } + /* get controller capabilities */ ret = hda_dsp_ctrl_get_caps(sdev); if (ret < 0) @@ -353,15 +360,6 @@ static int hda_init_caps(struct snd_sof_dev *sdev) if (bus->ppcap) dev_dbg(sdev->dev, "PP capability, will probe DSP later.\n"); -#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA) - /* init i915 and HDMI codecs */ - ret = hda_codec_i915_init(sdev); - if (ret < 0) { - dev_err(sdev->dev, "error: init i915 and HDMI codec failed\n"); - return ret; - } -#endif - /* Init HDA controller after i915 init */ ret = hda_dsp_ctrl_init_chip(sdev, true); if (ret < 0) { @@ -611,6 +609,7 @@ free_streams: iounmap(sdev->bar[HDA_DSP_BAR]); hdac_bus_unmap: iounmap(bus->remap_addr); + hda_codec_i915_exit(sdev); err: return ret; } -- cgit v1.2.3-59-g8ed1b From f148b9f402ef002b57bcff3964d45abc8ffb6c3f Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 10 Feb 2020 15:45:50 +0200 Subject: xhci: Force Maximum Packet size for Full-speed bulk devices to valid range. A Full-speed bulk USB audio device (DJ-Tech CTRL) with a invalid Maximum Packet Size of 4 causes a xHC "Parameter Error" at enumeration. This is because valid Maximum packet sizes for Full-speed bulk endpoints are 8, 16, 32 and 64 bytes. Hosts are not required to support other values than these. See usb 2 specs section 5.8.3 for details. The device starts working after forcing the maximum packet size to 8. This is most likely the case with other devices as well, so force the maximum packet size to a valid range. Cc: stable@vger.kernel.org Reported-by: Rene D Obermueller Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20200210134553.9144-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 3b1388fa2f36..0e2701649369 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1475,9 +1475,15 @@ int xhci_endpoint_init(struct xhci_hcd *xhci, /* Allow 3 retries for everything but isoc, set CErr = 3 */ if (!usb_endpoint_xfer_isoc(&ep->desc)) err_count = 3; - /* Some devices get this wrong */ - if (usb_endpoint_xfer_bulk(&ep->desc) && udev->speed == USB_SPEED_HIGH) - max_packet = 512; + /* HS bulk max packet should be 512, FS bulk supports 8, 16, 32 or 64 */ + if (usb_endpoint_xfer_bulk(&ep->desc)) { + if (udev->speed == USB_SPEED_HIGH) + max_packet = 512; + if (udev->speed == USB_SPEED_FULL) { + max_packet = rounddown_pow_of_two(max_packet); + max_packet = clamp_val(max_packet, 8, 64); + } + } /* xHCI 1.0 and 1.1 indicates that ctrl ep avg TRB Length should be 8 */ if (usb_endpoint_xfer_control(&ep->desc) && xhci->hci_version >= 0x100) avg_trb_len = 8; -- cgit v1.2.3-59-g8ed1b From fc57313d1017dd6b6f37a94e88daa8df54368ecc Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 10 Feb 2020 15:45:51 +0200 Subject: xhci: Fix memory leak when caching protocol extended capability PSI tables xhci driver assumed that xHC controllers have at most one custom supported speed table (PSI) for all usb 3.x ports. Memory was allocated for one PSI table under the xhci hub structure. Turns out this is not the case, some controllers have a separate "supported protocol capability" entry with a PSI table for each port. This means each usb3 roothub port can in theory support different custom speeds. To solve this, cache all supported protocol capabilities with their PSI tables in an array, and add pointers to the xhci port structure so that every port points to its capability entry in the array. When creating the SuperSpeedPlus USB Device Capability BOS descriptor for the xhci USB 3.1 roothub we for now will use only data from the first USB 3.1 capable protocol capability entry in the array. This could be improved later, this patch focuses resolving the memory leak. Reported-by: Paul Menzel Reported-by: Sajja Venkateswara Rao Fixes: 47189098f8be ("xhci: parse xhci protocol speed ID list for usb 3.1 usage") Cc: stable # v4.4+ Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20200210134553.9144-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 25 ++++++++++++------- drivers/usb/host/xhci-mem.c | 58 ++++++++++++++++++++++++++++----------------- drivers/usb/host/xhci.h | 14 ++++++++--- 3 files changed, 64 insertions(+), 33 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 7a3a29e5e9d2..af92b2576fe9 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -55,6 +55,7 @@ static u8 usb_bos_descriptor [] = { static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf, u16 wLength) { + struct xhci_port_cap *port_cap = NULL; int i, ssa_count; u32 temp; u16 desc_size, ssp_cap_size, ssa_size = 0; @@ -64,16 +65,24 @@ static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf, ssp_cap_size = sizeof(usb_bos_descriptor) - desc_size; /* does xhci support USB 3.1 Enhanced SuperSpeed */ - if (xhci->usb3_rhub.min_rev >= 0x01) { + for (i = 0; i < xhci->num_port_caps; i++) { + if (xhci->port_caps[i].maj_rev == 0x03 && + xhci->port_caps[i].min_rev >= 0x01) { + usb3_1 = true; + port_cap = &xhci->port_caps[i]; + break; + } + } + + if (usb3_1) { /* does xhci provide a PSI table for SSA speed attributes? */ - if (xhci->usb3_rhub.psi_count) { + if (port_cap->psi_count) { /* two SSA entries for each unique PSI ID, RX and TX */ - ssa_count = xhci->usb3_rhub.psi_uid_count * 2; + ssa_count = port_cap->psi_uid_count * 2; ssa_size = ssa_count * sizeof(u32); ssp_cap_size -= 16; /* skip copying the default SSA */ } desc_size += ssp_cap_size; - usb3_1 = true; } memcpy(buf, &usb_bos_descriptor, min(desc_size, wLength)); @@ -99,7 +108,7 @@ static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf, } /* If PSI table exists, add the custom speed attributes from it */ - if (usb3_1 && xhci->usb3_rhub.psi_count) { + if (usb3_1 && port_cap->psi_count) { u32 ssp_cap_base, bm_attrib, psi, psi_mant, psi_exp; int offset; @@ -111,7 +120,7 @@ static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf, /* attribute count SSAC bits 4:0 and ID count SSIC bits 8:5 */ bm_attrib = (ssa_count - 1) & 0x1f; - bm_attrib |= (xhci->usb3_rhub.psi_uid_count - 1) << 5; + bm_attrib |= (port_cap->psi_uid_count - 1) << 5; put_unaligned_le32(bm_attrib, &buf[ssp_cap_base + 4]); if (wLength < desc_size + ssa_size) @@ -124,8 +133,8 @@ static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf, * USB 3.1 requires two SSA entries (RX and TX) for every link */ offset = desc_size; - for (i = 0; i < xhci->usb3_rhub.psi_count; i++) { - psi = xhci->usb3_rhub.psi[i]; + for (i = 0; i < port_cap->psi_count; i++) { + psi = port_cap->psi[i]; psi &= ~USB_SSP_SUBLINK_SPEED_RSVD; psi_exp = XHCI_EXT_PORT_PSIE(psi); psi_mant = XHCI_EXT_PORT_PSIM(psi); diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 0e2701649369..bd5b152df6c0 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1915,17 +1915,16 @@ no_bw: xhci->usb3_rhub.num_ports = 0; xhci->num_active_eps = 0; kfree(xhci->usb2_rhub.ports); - kfree(xhci->usb2_rhub.psi); kfree(xhci->usb3_rhub.ports); - kfree(xhci->usb3_rhub.psi); kfree(xhci->hw_ports); kfree(xhci->rh_bw); kfree(xhci->ext_caps); + for (i = 0; i < xhci->num_port_caps; i++) + kfree(xhci->port_caps[i].psi); + kfree(xhci->port_caps); xhci->usb2_rhub.ports = NULL; - xhci->usb2_rhub.psi = NULL; xhci->usb3_rhub.ports = NULL; - xhci->usb3_rhub.psi = NULL; xhci->hw_ports = NULL; xhci->rh_bw = NULL; xhci->ext_caps = NULL; @@ -2126,6 +2125,7 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, u8 major_revision, minor_revision; struct xhci_hub *rhub; struct device *dev = xhci_to_hcd(xhci)->self.sysdev; + struct xhci_port_cap *port_cap; temp = readl(addr); major_revision = XHCI_EXT_PORT_MAJOR(temp); @@ -2160,31 +2160,39 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, /* WTF? "Valid values are ‘1’ to MaxPorts" */ return; - rhub->psi_count = XHCI_EXT_PORT_PSIC(temp); - if (rhub->psi_count) { - rhub->psi = kcalloc_node(rhub->psi_count, sizeof(*rhub->psi), - GFP_KERNEL, dev_to_node(dev)); - if (!rhub->psi) - rhub->psi_count = 0; + port_cap = &xhci->port_caps[xhci->num_port_caps++]; + if (xhci->num_port_caps > max_caps) + return; + + port_cap->maj_rev = major_revision; + port_cap->min_rev = minor_revision; + port_cap->psi_count = XHCI_EXT_PORT_PSIC(temp); + + if (port_cap->psi_count) { + port_cap->psi = kcalloc_node(port_cap->psi_count, + sizeof(*port_cap->psi), + GFP_KERNEL, dev_to_node(dev)); + if (!port_cap->psi) + port_cap->psi_count = 0; - rhub->psi_uid_count++; - for (i = 0; i < rhub->psi_count; i++) { - rhub->psi[i] = readl(addr + 4 + i); + port_cap->psi_uid_count++; + for (i = 0; i < port_cap->psi_count; i++) { + port_cap->psi[i] = readl(addr + 4 + i); /* count unique ID values, two consecutive entries can * have the same ID if link is assymetric */ - if (i && (XHCI_EXT_PORT_PSIV(rhub->psi[i]) != - XHCI_EXT_PORT_PSIV(rhub->psi[i - 1]))) - rhub->psi_uid_count++; + if (i && (XHCI_EXT_PORT_PSIV(port_cap->psi[i]) != + XHCI_EXT_PORT_PSIV(port_cap->psi[i - 1]))) + port_cap->psi_uid_count++; xhci_dbg(xhci, "PSIV:%d PSIE:%d PLT:%d PFD:%d LP:%d PSIM:%d\n", - XHCI_EXT_PORT_PSIV(rhub->psi[i]), - XHCI_EXT_PORT_PSIE(rhub->psi[i]), - XHCI_EXT_PORT_PLT(rhub->psi[i]), - XHCI_EXT_PORT_PFD(rhub->psi[i]), - XHCI_EXT_PORT_LP(rhub->psi[i]), - XHCI_EXT_PORT_PSIM(rhub->psi[i])); + XHCI_EXT_PORT_PSIV(port_cap->psi[i]), + XHCI_EXT_PORT_PSIE(port_cap->psi[i]), + XHCI_EXT_PORT_PLT(port_cap->psi[i]), + XHCI_EXT_PORT_PFD(port_cap->psi[i]), + XHCI_EXT_PORT_LP(port_cap->psi[i]), + XHCI_EXT_PORT_PSIM(port_cap->psi[i])); } } /* cache usb2 port capabilities */ @@ -2219,6 +2227,7 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, continue; } hw_port->rhub = rhub; + hw_port->port_cap = port_cap; rhub->num_ports++; } /* FIXME: Should we disable ports not in the Extended Capabilities? */ @@ -2309,6 +2318,11 @@ static int xhci_setup_port_arrays(struct xhci_hcd *xhci, gfp_t flags) if (!xhci->ext_caps) return -ENOMEM; + xhci->port_caps = kcalloc_node(cap_count, sizeof(*xhci->port_caps), + flags, dev_to_node(dev)); + if (!xhci->port_caps) + return -ENOMEM; + offset = cap_start; while (offset) { diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 13d8838cd552..3ecee10fdcdc 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1702,12 +1702,20 @@ struct xhci_bus_state { * Intel Lynx Point LP xHCI host. */ #define XHCI_MAX_REXIT_TIMEOUT_MS 20 +struct xhci_port_cap { + u32 *psi; /* array of protocol speed ID entries */ + u8 psi_count; + u8 psi_uid_count; + u8 maj_rev; + u8 min_rev; +}; struct xhci_port { __le32 __iomem *addr; int hw_portnum; int hcd_portnum; struct xhci_hub *rhub; + struct xhci_port_cap *port_cap; }; struct xhci_hub { @@ -1719,9 +1727,6 @@ struct xhci_hub { /* supported prococol extended capabiliy values */ u8 maj_rev; u8 min_rev; - u32 *psi; /* array of protocol speed ID entries */ - u8 psi_count; - u8 psi_uid_count; }; /* There is one xhci_hcd structure per controller */ @@ -1880,6 +1885,9 @@ struct xhci_hcd { /* cached usb2 extened protocol capabilites */ u32 *ext_caps; unsigned int num_ext_caps; + /* cached extended protocol port capabilities */ + struct xhci_port_cap *port_caps; + unsigned int num_port_caps; /* Compliance Mode Recovery Data */ struct timer_list comp_mode_recovery_timer; u32 port_status_u0; -- cgit v1.2.3-59-g8ed1b From 024d411e9c5d49eb96c825af52a3ce2682895676 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 10 Feb 2020 15:45:52 +0200 Subject: xhci: fix runtime pm enabling for quirky Intel hosts Intel hosts that need the XHCI_PME_STUCK_QUIRK flag should enable runtime pm by calling xhci_pme_acpi_rtd3_enable() before usb_hcd_pci_probe() calls pci_dev_run_wake(). Otherwise usage count for the device won't be decreased, and runtime suspend is prevented. usb_hcd_pci_probe() only decreases the usage count if device can generate run-time wake-up events, i.e. when pci_dev_run_wake() returns true. This issue was exposed by pci_dev_run_wake() change in commit 8feaec33b986 ("PCI / PM: Always check PME wakeup capability for runtime wakeup support") and should be backported to kernels with that change Cc: # 4.13+ Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20200210134553.9144-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 4917c5b033fa..da7c2db41671 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -302,6 +302,9 @@ static int xhci_pci_setup(struct usb_hcd *hcd) if (!usb_hcd_is_primary_hcd(hcd)) return 0; + if (xhci->quirks & XHCI_PME_STUCK_QUIRK) + xhci_pme_acpi_rtd3_enable(pdev); + xhci_dbg(xhci, "Got SBRN %u\n", (unsigned int) xhci->sbrn); /* Find any debug ports */ @@ -359,9 +362,6 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) HCC_MAX_PSA(xhci->hcc_params) >= 4) xhci->shared_hcd->can_do_streams = 1; - if (xhci->quirks & XHCI_PME_STUCK_QUIRK) - xhci_pme_acpi_rtd3_enable(dev); - /* USB-2 and USB-3 roothubs initialized, allow runtime pm suspend */ pm_runtime_put_noidle(&dev->dev); -- cgit v1.2.3-59-g8ed1b From a3ae87dce3a5abe0b57c811bab02b2564b574106 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 10 Feb 2020 15:45:53 +0200 Subject: xhci: apply XHCI_PME_STUCK_QUIRK to Intel Comet Lake platforms Intel Comet Lake based platform require the XHCI_PME_STUCK_QUIRK quirk as well. Without this xHC can not enter D3 in runtime suspend. Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20200210134553.9144-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index da7c2db41671..5e9b537df631 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -49,6 +49,7 @@ #define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_4C_XHCI 0x15ec #define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_DD_XHCI 0x15f0 #define PCI_DEVICE_ID_INTEL_ICE_LAKE_XHCI 0x8a13 +#define PCI_DEVICE_ID_INTEL_CML_XHCI 0xa3af #define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 #define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba @@ -187,7 +188,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI)) { + pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_CML_XHCI)) { xhci->quirks |= XHCI_PME_STUCK_QUIRK; } if (pdev->vendor == PCI_VENDOR_ID_INTEL && -- cgit v1.2.3-59-g8ed1b From 93134df520f23f4e9998c425b8987edca7016817 Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Tue, 4 Feb 2020 19:34:02 +0000 Subject: staging: vt6656: fix sign of rx_dbm to bb_pre_ed_rssi. bb_pre_ed_rssi is an u8 rx_dm always returns negative signed values add minus operator to always yield positive. fixes issue where rx sensitivity is always set to maximum because the unsigned numbers were always greater then 100. Fixes: 63b9907f58f1 ("staging: vt6656: mac80211 conversion: create rx function.") Cc: stable Signed-off-by: Malcolm Priestley Link: https://lore.kernel.org/r/aceac98c-6e69-3ce1-dfec-2bf27b980221@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6656/dpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/vt6656/dpc.c b/drivers/staging/vt6656/dpc.c index 821aae8ca402..a0b60e7d1086 100644 --- a/drivers/staging/vt6656/dpc.c +++ b/drivers/staging/vt6656/dpc.c @@ -98,7 +98,7 @@ int vnt_rx_data(struct vnt_private *priv, struct vnt_rcb *ptr_rcb, vnt_rf_rssi_to_dbm(priv, tail->rssi, &rx_dbm); - priv->bb_pre_ed_rssi = (u8)rx_dbm + 1; + priv->bb_pre_ed_rssi = (u8)-rx_dbm + 1; priv->current_rssi = priv->bb_pre_ed_rssi; skb_pull(skb, sizeof(*head)); -- cgit v1.2.3-59-g8ed1b From 6d67b0290b4b84c477e6a2fc6e005e174d3c7786 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Mon, 27 Jan 2020 15:56:16 -0800 Subject: staging: android: ashmem: Disallow ashmem memory from being remapped When ashmem file is mmapped, the resulting vma->vm_file points to the backing shmem file with the generic fops that do not check ashmem permissions like fops of ashmem do. If an mremap is done on the ashmem region, then the permission checks will be skipped. Fix that by disallowing mapping operation on the backing shmem file. Reported-by: Jann Horn Signed-off-by: Suren Baghdasaryan Cc: stable # 4.4,4.9,4.14,4.18,5.4 Signed-off-by: Todd Kjos Reviewed-by: Joel Fernandes (Google) Link: https://lore.kernel.org/r/20200127235616.48920-1-tkjos@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ashmem.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 5891d0744a76..8044510d8ec6 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -351,8 +351,23 @@ static inline vm_flags_t calc_vm_may_flags(unsigned long prot) _calc_vm_trans(prot, PROT_EXEC, VM_MAYEXEC); } +static int ashmem_vmfile_mmap(struct file *file, struct vm_area_struct *vma) +{ + /* do not allow to mmap ashmem backing shmem file directly */ + return -EPERM; +} + +static unsigned long +ashmem_vmfile_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); +} + static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) { + static struct file_operations vmfile_fops; struct ashmem_area *asma = file->private_data; int ret = 0; @@ -393,6 +408,19 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) } vmfile->f_mode |= FMODE_LSEEK; asma->file = vmfile; + /* + * override mmap operation of the vmfile so that it can't be + * remapped which would lead to creation of a new vma with no + * asma permission checks. Have to override get_unmapped_area + * as well to prevent VM_BUG_ON check for f_ops modification. + */ + if (!vmfile_fops.mmap) { + vmfile_fops = *vmfile->f_op; + vmfile_fops.mmap = ashmem_vmfile_mmap; + vmfile_fops.get_unmapped_area = + ashmem_vmfile_get_unmapped_area; + } + vmfile->f_op = &vmfile_fops; } get_file(asma->file); -- cgit v1.2.3-59-g8ed1b From 8ae9a588ca35eb9c32dc03299c5e1f4a1e9a9617 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 26 Jan 2020 22:05:49 +0000 Subject: staging: rtl8723bs: fix copy of overlapping memory Currently the rtw_sprintf prints the contents of thread_name onto thread_name and this can lead to a potential copy of a string over itself. Avoid this by printing the literal string RTWHALXT instread of the contents of thread_name. Addresses-Coverity: ("copy of overlapping memory") Fixes: 554c0a3abf21 ("staging: Add rtl8723bs sdio wifi driver") Signed-off-by: Colin Ian King Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20200126220549.9849-1-colin.king@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c b/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c index b44e902ed338..b6d56cfb0a19 100644 --- a/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c +++ b/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c @@ -476,14 +476,13 @@ int rtl8723bs_xmit_thread(void *context) s32 ret; struct adapter *padapter; struct xmit_priv *pxmitpriv; - u8 thread_name[20] = "RTWHALXT"; - + u8 thread_name[20]; ret = _SUCCESS; padapter = context; pxmitpriv = &padapter->xmitpriv; - rtw_sprintf(thread_name, 20, "%s-"ADPT_FMT, thread_name, ADPT_ARG(padapter)); + rtw_sprintf(thread_name, 20, "RTWHALXT-" ADPT_FMT, ADPT_ARG(padapter)); thread_enter(thread_name); DBG_871X("start "FUNC_ADPT_FMT"\n", FUNC_ADPT_ARG(padapter)); -- cgit v1.2.3-59-g8ed1b From c3709b3285009e0c1448510b9460e96146cd5c9a Mon Sep 17 00:00:00 2001 From: Alistair Delva Date: Sun, 2 Feb 2020 20:22:54 -0800 Subject: staging: android: Delete the 'vsoc' driver The 'vsoc' driver was required for an early iteration of the Android 'cuttlefish' virtual platform, but this platform has been wholly converted to use virtio drivers instead. Delete this old driver. Cc: Greg Kroah-Hartman Cc: Joel Fernandes Cc: Greg Hartman Cc: kernel-team@android.com Cc: devel@driverdev.osuosl.org Signed-off-by: Alistair Delva Reviewed-by: Joel Fernandes (Google) Link: https://lore.kernel.org/r/20200203042254.80360-1-adelva@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/Kconfig | 8 - drivers/staging/android/Makefile | 1 - drivers/staging/android/TODO | 9 - drivers/staging/android/uapi/vsoc_shm.h | 295 -------- drivers/staging/android/vsoc.c | 1149 ------------------------------- 5 files changed, 1462 deletions(-) delete mode 100644 drivers/staging/android/uapi/vsoc_shm.h delete mode 100644 drivers/staging/android/vsoc.c diff --git a/drivers/staging/android/Kconfig b/drivers/staging/android/Kconfig index d6d605d5cbde..8d8fd5c29349 100644 --- a/drivers/staging/android/Kconfig +++ b/drivers/staging/android/Kconfig @@ -14,14 +14,6 @@ config ASHMEM It is, in theory, a good memory allocator for low-memory devices, because it can discard shared memory units when under memory pressure. -config ANDROID_VSOC - tristate "Android Virtual SoC support" - depends on PCI_MSI - help - This option adds support for the Virtual SoC driver needed to boot - a 'cuttlefish' Android image inside QEmu. The driver interacts with - a QEmu ivshmem device. If built as a module, it will be called vsoc. - source "drivers/staging/android/ion/Kconfig" endif # if ANDROID diff --git a/drivers/staging/android/Makefile b/drivers/staging/android/Makefile index 14bd9c6ce10d..3b66cd0b0ec5 100644 --- a/drivers/staging/android/Makefile +++ b/drivers/staging/android/Makefile @@ -4,4 +4,3 @@ ccflags-y += -I$(src) # needed for trace events obj-y += ion/ obj-$(CONFIG_ASHMEM) += ashmem.o -obj-$(CONFIG_ANDROID_VSOC) += vsoc.o diff --git a/drivers/staging/android/TODO b/drivers/staging/android/TODO index 767dd98fd92d..80eccfaf6db5 100644 --- a/drivers/staging/android/TODO +++ b/drivers/staging/android/TODO @@ -9,14 +9,5 @@ ion/ - Split /dev/ion up into multiple nodes (e.g. /dev/ion/heap0) - Better test framework (integration with VGEM was suggested) -vsoc.c, uapi/vsoc_shm.h - - The current driver uses the same wait queue for all of the futexes in a - region. This will cause false wakeups in regions with a large number of - waiting threads. We should eventually use multiple queues and select the - queue based on the region. - - Add debugfs support for examining the permissions of regions. - - Remove VSOC_WAIT_FOR_INCOMING_INTERRUPT ioctl. This functionality has been - superseded by the futex and is there for legacy reasons. - Please send patches to Greg Kroah-Hartman and Cc: Arve Hjønnevåg and Riley Andrews diff --git a/drivers/staging/android/uapi/vsoc_shm.h b/drivers/staging/android/uapi/vsoc_shm.h deleted file mode 100644 index 6291fb24efb2..000000000000 --- a/drivers/staging/android/uapi/vsoc_shm.h +++ /dev/null @@ -1,295 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2017 Google, Inc. - * - */ - -#ifndef _UAPI_LINUX_VSOC_SHM_H -#define _UAPI_LINUX_VSOC_SHM_H - -#include - -/** - * A permission is a token that permits a receiver to read and/or write an area - * of memory within a Vsoc region. - * - * An fd_scoped permission grants both read and write access, and can be - * attached to a file description (see open(2)). - * Ownership of the area can then be shared by passing a file descriptor - * among processes. - * - * begin_offset and end_offset define the area of memory that is controlled by - * the permission. owner_offset points to a word, also in shared memory, that - * controls ownership of the area. - * - * ownership of the region expires when the associated file description is - * released. - * - * At most one permission can be attached to each file description. - * - * This is useful when implementing HALs like gralloc that scope and pass - * ownership of shared resources via file descriptors. - * - * The caller is responsibe for doing any fencing. - * - * The calling process will normally identify a currently free area of - * memory. It will construct a proposed fd_scoped_permission_arg structure: - * - * begin_offset and end_offset describe the area being claimed - * - * owner_offset points to the location in shared memory that indicates the - * owner of the area. - * - * owned_value is the value that will be stored in owner_offset iff the - * permission can be granted. It must be different than VSOC_REGION_FREE. - * - * Two fd_scoped_permission structures are compatible if they vary only by - * their owned_value fields. - * - * The driver ensures that, for any group of simultaneous callers proposing - * compatible fd_scoped_permissions, it will accept exactly one of the - * propopsals. The other callers will get a failure with errno of EAGAIN. - * - * A process receiving a file descriptor can identify the region being - * granted using the VSOC_GET_FD_SCOPED_PERMISSION ioctl. - */ -struct fd_scoped_permission { - __u32 begin_offset; - __u32 end_offset; - __u32 owner_offset; - __u32 owned_value; -}; - -/* - * This value represents a free area of memory. The driver expects to see this - * value at owner_offset when creating a permission otherwise it will not do it, - * and will write this value back once the permission is no longer needed. - */ -#define VSOC_REGION_FREE ((__u32)0) - -/** - * ioctl argument for VSOC_CREATE_FD_SCOPE_PERMISSION - */ -struct fd_scoped_permission_arg { - struct fd_scoped_permission perm; - __s32 managed_region_fd; -}; - -#define VSOC_NODE_FREE ((__u32)0) - -/* - * Describes a signal table in shared memory. Each non-zero entry in the - * table indicates that the receiver should signal the futex at the given - * offset. Offsets are relative to the region, not the shared memory window. - * - * interrupt_signalled_offset is used to reliably signal interrupts across the - * vmm boundary. There are two roles: transmitter and receiver. For example, - * in the host_to_guest_signal_table the host is the transmitter and the - * guest is the receiver. The protocol is as follows: - * - * 1. The transmitter should convert the offset of the futex to an offset - * in the signal table [0, (1 << num_nodes_lg2)) - * The transmitter can choose any appropriate hashing algorithm, including - * hash = futex_offset & ((1 << num_nodes_lg2) - 1) - * - * 3. The transmitter should atomically compare and swap futex_offset with 0 - * at hash. There are 3 possible outcomes - * a. The swap fails because the futex_offset is already in the table. - * The transmitter should stop. - * b. Some other offset is in the table. This is a hash collision. The - * transmitter should move to another table slot and try again. One - * possible algorithm: - * hash = (hash + 1) & ((1 << num_nodes_lg2) - 1) - * c. The swap worked. Continue below. - * - * 3. The transmitter atomically swaps 1 with the value at the - * interrupt_signalled_offset. There are two outcomes: - * a. The prior value was 1. In this case an interrupt has already been - * posted. The transmitter is done. - * b. The prior value was 0, indicating that the receiver may be sleeping. - * The transmitter will issue an interrupt. - * - * 4. On waking the receiver immediately exchanges a 0 with the - * interrupt_signalled_offset. If it receives a 0 then this a spurious - * interrupt. That may occasionally happen in the current protocol, but - * should be rare. - * - * 5. The receiver scans the signal table by atomicaly exchanging 0 at each - * location. If a non-zero offset is returned from the exchange the - * receiver wakes all sleepers at the given offset: - * futex((int*)(region_base + old_value), FUTEX_WAKE, MAX_INT); - * - * 6. The receiver thread then does a conditional wait, waking immediately - * if the value at interrupt_signalled_offset is non-zero. This catches cases - * here additional signals were posted while the table was being scanned. - * On the guest the wait is handled via the VSOC_WAIT_FOR_INCOMING_INTERRUPT - * ioctl. - */ -struct vsoc_signal_table_layout { - /* log_2(Number of signal table entries) */ - __u32 num_nodes_lg2; - /* - * Offset to the first signal table entry relative to the start of the - * region - */ - __u32 futex_uaddr_table_offset; - /* - * Offset to an atomic_t / atomic uint32_t. A non-zero value indicates - * that one or more offsets are currently posted in the table. - * semi-unique access to an entry in the table - */ - __u32 interrupt_signalled_offset; -}; - -#define VSOC_REGION_WHOLE ((__s32)0) -#define VSOC_DEVICE_NAME_SZ 16 - -/** - * Each HAL would (usually) talk to a single device region - * Mulitple entities care about these regions: - * - The ivshmem_server will populate the regions in shared memory - * - The guest kernel will read the region, create minor device nodes, and - * allow interested parties to register for FUTEX_WAKE events in the region - * - HALs will access via the minor device nodes published by the guest kernel - * - Host side processes will access the region via the ivshmem_server: - * 1. Pass name to ivshmem_server at a UNIX socket - * 2. ivshmemserver will reply with 2 fds: - * - host->guest doorbell fd - * - guest->host doorbell fd - * - fd for the shared memory region - * - region offset - * 3. Start a futex receiver thread on the doorbell fd pointed at the - * signal_nodes - */ -struct vsoc_device_region { - __u16 current_version; - __u16 min_compatible_version; - __u32 region_begin_offset; - __u32 region_end_offset; - __u32 offset_of_region_data; - struct vsoc_signal_table_layout guest_to_host_signal_table; - struct vsoc_signal_table_layout host_to_guest_signal_table; - /* Name of the device. Must always be terminated with a '\0', so - * the longest supported device name is 15 characters. - */ - char device_name[VSOC_DEVICE_NAME_SZ]; - /* There are two ways that permissions to access regions are handled: - * - When subdivided_by is VSOC_REGION_WHOLE, any process that can - * open the device node for the region gains complete access to it. - * - When subdivided is set processes that open the region cannot - * access it. Access to a sub-region must be established by invoking - * the VSOC_CREATE_FD_SCOPE_PERMISSION ioctl on the region - * referenced in subdivided_by, providing a fileinstance - * (represented by a fd) opened on this region. - */ - __u32 managed_by; -}; - -/* - * The vsoc layout descriptor. - * The first 4K should be reserved for the shm header and region descriptors. - * The regions should be page aligned. - */ - -struct vsoc_shm_layout_descriptor { - __u16 major_version; - __u16 minor_version; - - /* size of the shm. This may be redundant but nice to have */ - __u32 size; - - /* number of shared memory regions */ - __u32 region_count; - - /* The offset to the start of region descriptors */ - __u32 vsoc_region_desc_offset; -}; - -/* - * This specifies the current version that should be stored in - * vsoc_shm_layout_descriptor.major_version and - * vsoc_shm_layout_descriptor.minor_version. - * It should be updated only if the vsoc_device_region and - * vsoc_shm_layout_descriptor structures have changed. - * Versioning within each region is transferred - * via the min_compatible_version and current_version fields in - * vsoc_device_region. The driver does not consult these fields: they are left - * for the HALs and host processes and will change independently of the layout - * version. - */ -#define CURRENT_VSOC_LAYOUT_MAJOR_VERSION 2 -#define CURRENT_VSOC_LAYOUT_MINOR_VERSION 0 - -#define VSOC_CREATE_FD_SCOPED_PERMISSION \ - _IOW(0xF5, 0, struct fd_scoped_permission) -#define VSOC_GET_FD_SCOPED_PERMISSION _IOR(0xF5, 1, struct fd_scoped_permission) - -/* - * This is used to signal the host to scan the guest_to_host_signal_table - * for new futexes to wake. This sends an interrupt if one is not already - * in flight. - */ -#define VSOC_MAYBE_SEND_INTERRUPT_TO_HOST _IO(0xF5, 2) - -/* - * When this returns the guest will scan host_to_guest_signal_table to - * check for new futexes to wake. - */ -/* TODO(ghartman): Consider moving this to the bottom half */ -#define VSOC_WAIT_FOR_INCOMING_INTERRUPT _IO(0xF5, 3) - -/* - * Guest HALs will use this to retrieve the region description after - * opening their device node. - */ -#define VSOC_DESCRIBE_REGION _IOR(0xF5, 4, struct vsoc_device_region) - -/* - * Wake any threads that may be waiting for a host interrupt on this region. - * This is mostly used during shutdown. - */ -#define VSOC_SELF_INTERRUPT _IO(0xF5, 5) - -/* - * This is used to signal the host to scan the guest_to_host_signal_table - * for new futexes to wake. This sends an interrupt unconditionally. - */ -#define VSOC_SEND_INTERRUPT_TO_HOST _IO(0xF5, 6) - -enum wait_types { - VSOC_WAIT_UNDEFINED = 0, - VSOC_WAIT_IF_EQUAL = 1, - VSOC_WAIT_IF_EQUAL_TIMEOUT = 2 -}; - -/* - * Wait for a condition to be true - * - * Note, this is sized and aligned so the 32 bit and 64 bit layouts are - * identical. - */ -struct vsoc_cond_wait { - /* Input: Offset of the 32 bit word to check */ - __u32 offset; - /* Input: Value that will be compared with the offset */ - __u32 value; - /* Monotonic time to wake at in seconds */ - __u64 wake_time_sec; - /* Input: Monotonic time to wait in nanoseconds */ - __u32 wake_time_nsec; - /* Input: Type of wait */ - __u32 wait_type; - /* Output: Number of times the thread woke before returning. */ - __u32 wakes; - /* Ensure that we're 8-byte aligned and 8 byte length for 32/64 bit - * compatibility. - */ - __u32 reserved_1; -}; - -#define VSOC_COND_WAIT _IOWR(0xF5, 7, struct vsoc_cond_wait) - -/* Wake any local threads waiting at the offset given in arg */ -#define VSOC_COND_WAKE _IO(0xF5, 8) - -#endif /* _UAPI_LINUX_VSOC_SHM_H */ diff --git a/drivers/staging/android/vsoc.c b/drivers/staging/android/vsoc.c deleted file mode 100644 index 1240bb0317d9..000000000000 --- a/drivers/staging/android/vsoc.c +++ /dev/null @@ -1,1149 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * drivers/android/staging/vsoc.c - * - * Android Virtual System on a Chip (VSoC) driver - * - * Copyright (C) 2017 Google, Inc. - * - * Author: ghartman@google.com - * - * Based on drivers/char/kvm_ivshmem.c - driver for KVM Inter-VM shared memory - * Copyright 2009 Cam Macdonell - * - * Based on cirrusfb.c and 8139cp.c: - * Copyright 1999-2001 Jeff Garzik - * Copyright 2001-2004 Jeff Garzik - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "uapi/vsoc_shm.h" - -#define VSOC_DEV_NAME "vsoc" - -/* - * Description of the ivshmem-doorbell PCI device used by QEmu. These - * constants follow docs/specs/ivshmem-spec.txt, which can be found in - * the QEmu repository. This was last reconciled with the version that - * came out with 2.8 - */ - -/* - * These constants are determined KVM Inter-VM shared memory device - * register offsets - */ -enum { - INTR_MASK = 0x00, /* Interrupt Mask */ - INTR_STATUS = 0x04, /* Interrupt Status */ - IV_POSITION = 0x08, /* VM ID */ - DOORBELL = 0x0c, /* Doorbell */ -}; - -static const int REGISTER_BAR; /* Equal to 0 */ -static const int MAX_REGISTER_BAR_LEN = 0x100; -/* - * The MSI-x BAR is not used directly. - * - * static const int MSI_X_BAR = 1; - */ -static const int SHARED_MEMORY_BAR = 2; - -struct vsoc_region_data { - char name[VSOC_DEVICE_NAME_SZ + 1]; - wait_queue_head_t interrupt_wait_queue; - /* TODO(b/73664181): Use multiple futex wait queues */ - wait_queue_head_t futex_wait_queue; - /* Flag indicating that an interrupt has been signalled by the host. */ - atomic_t *incoming_signalled; - /* Flag indicating the guest has signalled the host. */ - atomic_t *outgoing_signalled; - bool irq_requested; - bool device_created; -}; - -struct vsoc_device { - /* Kernel virtual address of REGISTER_BAR. */ - void __iomem *regs; - /* Physical address of SHARED_MEMORY_BAR. */ - phys_addr_t shm_phys_start; - /* Kernel virtual address of SHARED_MEMORY_BAR. */ - void __iomem *kernel_mapped_shm; - /* Size of the entire shared memory window in bytes. */ - size_t shm_size; - /* - * Pointer to the virtual address of the shared memory layout structure. - * This is probably identical to kernel_mapped_shm, but saving this - * here saves a lot of annoying casts. - */ - struct vsoc_shm_layout_descriptor *layout; - /* - * Points to a table of region descriptors in the kernel's virtual - * address space. Calculated from - * vsoc_shm_layout_descriptor.vsoc_region_desc_offset - */ - struct vsoc_device_region *regions; - /* Head of a list of permissions that have been granted. */ - struct list_head permissions; - struct pci_dev *dev; - /* Per-region (and therefore per-interrupt) information. */ - struct vsoc_region_data *regions_data; - /* - * Table of msi-x entries. This has to be separated from struct - * vsoc_region_data because the kernel deals with them as an array. - */ - struct msix_entry *msix_entries; - /* Mutex that protectes the permission list */ - struct mutex mtx; - /* Major number assigned by the kernel */ - int major; - /* Character device assigned by the kernel */ - struct cdev cdev; - /* Device class assigned by the kernel */ - struct class *class; - /* - * Flags that indicate what we've initialized. These are used to do an - * orderly cleanup of the device. - */ - bool enabled_device; - bool requested_regions; - bool cdev_added; - bool class_added; - bool msix_enabled; -}; - -static struct vsoc_device vsoc_dev; - -/* - * TODO(ghartman): Add a /sys filesystem entry that summarizes the permissions. - */ - -struct fd_scoped_permission_node { - struct fd_scoped_permission permission; - struct list_head list; -}; - -struct vsoc_private_data { - struct fd_scoped_permission_node *fd_scoped_permission_node; -}; - -static long vsoc_ioctl(struct file *, unsigned int, unsigned long); -static int vsoc_mmap(struct file *, struct vm_area_struct *); -static int vsoc_open(struct inode *, struct file *); -static int vsoc_release(struct inode *, struct file *); -static ssize_t vsoc_read(struct file *, char __user *, size_t, loff_t *); -static ssize_t vsoc_write(struct file *, const char __user *, size_t, loff_t *); -static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin); -static int -do_create_fd_scoped_permission(struct vsoc_device_region *region_p, - struct fd_scoped_permission_node *np, - struct fd_scoped_permission_arg __user *arg); -static void -do_destroy_fd_scoped_permission(struct vsoc_device_region *owner_region_p, - struct fd_scoped_permission *perm); -static long do_vsoc_describe_region(struct file *, - struct vsoc_device_region __user *); -static ssize_t vsoc_get_area(struct file *filp, __u32 *perm_off); - -/** - * Validate arguments on entry points to the driver. - */ -inline int vsoc_validate_inode(struct inode *inode) -{ - if (iminor(inode) >= vsoc_dev.layout->region_count) { - dev_err(&vsoc_dev.dev->dev, - "describe_region: invalid region %d\n", iminor(inode)); - return -ENODEV; - } - return 0; -} - -inline int vsoc_validate_filep(struct file *filp) -{ - int ret = vsoc_validate_inode(file_inode(filp)); - - if (ret) - return ret; - if (!filp->private_data) { - dev_err(&vsoc_dev.dev->dev, - "No private data on fd, region %d\n", - iminor(file_inode(filp))); - return -EBADFD; - } - return 0; -} - -/* Converts from shared memory offset to virtual address */ -static inline void *shm_off_to_virtual_addr(__u32 offset) -{ - return (void __force *)vsoc_dev.kernel_mapped_shm + offset; -} - -/* Converts from shared memory offset to physical address */ -static inline phys_addr_t shm_off_to_phys_addr(__u32 offset) -{ - return vsoc_dev.shm_phys_start + offset; -} - -/** - * Convenience functions to obtain the region from the inode or file. - * Dangerous to call before validating the inode/file. - */ -static -inline struct vsoc_device_region *vsoc_region_from_inode(struct inode *inode) -{ - return &vsoc_dev.regions[iminor(inode)]; -} - -static -inline struct vsoc_device_region *vsoc_region_from_filep(struct file *inode) -{ - return vsoc_region_from_inode(file_inode(inode)); -} - -static inline uint32_t vsoc_device_region_size(struct vsoc_device_region *r) -{ - return r->region_end_offset - r->region_begin_offset; -} - -static const struct file_operations vsoc_ops = { - .owner = THIS_MODULE, - .open = vsoc_open, - .mmap = vsoc_mmap, - .read = vsoc_read, - .unlocked_ioctl = vsoc_ioctl, - .compat_ioctl = vsoc_ioctl, - .write = vsoc_write, - .llseek = vsoc_lseek, - .release = vsoc_release, -}; - -static struct pci_device_id vsoc_id_table[] = { - {0x1af4, 0x1110, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, - {0}, -}; - -MODULE_DEVICE_TABLE(pci, vsoc_id_table); - -static void vsoc_remove_device(struct pci_dev *pdev); -static int vsoc_probe_device(struct pci_dev *pdev, - const struct pci_device_id *ent); - -static struct pci_driver vsoc_pci_driver = { - .name = "vsoc", - .id_table = vsoc_id_table, - .probe = vsoc_probe_device, - .remove = vsoc_remove_device, -}; - -static int -do_create_fd_scoped_permission(struct vsoc_device_region *region_p, - struct fd_scoped_permission_node *np, - struct fd_scoped_permission_arg __user *arg) -{ - struct file *managed_filp; - s32 managed_fd; - atomic_t *owner_ptr = NULL; - struct vsoc_device_region *managed_region_p; - - if (copy_from_user(&np->permission, - &arg->perm, sizeof(np->permission)) || - copy_from_user(&managed_fd, - &arg->managed_region_fd, sizeof(managed_fd))) { - return -EFAULT; - } - managed_filp = fdget(managed_fd).file; - /* Check that it's a valid fd, */ - if (!managed_filp || vsoc_validate_filep(managed_filp)) - return -EPERM; - /* EEXIST if the given fd already has a permission. */ - if (((struct vsoc_private_data *)managed_filp->private_data)-> - fd_scoped_permission_node) - return -EEXIST; - managed_region_p = vsoc_region_from_filep(managed_filp); - /* Check that the provided region is managed by this one */ - if (&vsoc_dev.regions[managed_region_p->managed_by] != region_p) - return -EPERM; - /* The area must be well formed and have non-zero size */ - if (np->permission.begin_offset >= np->permission.end_offset) - return -EINVAL; - /* The area must fit in the memory window */ - if (np->permission.end_offset > - vsoc_device_region_size(managed_region_p)) - return -ERANGE; - /* The area must be in the region data section */ - if (np->permission.begin_offset < - managed_region_p->offset_of_region_data) - return -ERANGE; - /* The area must be page aligned */ - if (!PAGE_ALIGNED(np->permission.begin_offset) || - !PAGE_ALIGNED(np->permission.end_offset)) - return -EINVAL; - /* Owner offset must be naturally aligned in the window */ - if (np->permission.owner_offset & - (sizeof(np->permission.owner_offset) - 1)) - return -EINVAL; - /* The owner flag must reside in the owner memory */ - if (np->permission.owner_offset + sizeof(np->permission.owner_offset) > - vsoc_device_region_size(region_p)) - return -ERANGE; - /* The owner flag must reside in the data section */ - if (np->permission.owner_offset < region_p->offset_of_region_data) - return -EINVAL; - /* The owner value must change to claim the memory */ - if (np->permission.owned_value == VSOC_REGION_FREE) - return -EINVAL; - owner_ptr = - (atomic_t *)shm_off_to_virtual_addr(region_p->region_begin_offset + - np->permission.owner_offset); - /* We've already verified that this is in the shared memory window, so - * it should be safe to write to this address. - */ - if (atomic_cmpxchg(owner_ptr, - VSOC_REGION_FREE, - np->permission.owned_value) != VSOC_REGION_FREE) { - return -EBUSY; - } - ((struct vsoc_private_data *)managed_filp->private_data)-> - fd_scoped_permission_node = np; - /* The file offset needs to be adjusted if the calling - * process did any read/write operations on the fd - * before creating the permission. - */ - if (managed_filp->f_pos) { - if (managed_filp->f_pos > np->permission.end_offset) { - /* If the offset is beyond the permission end, set it - * to the end. - */ - managed_filp->f_pos = np->permission.end_offset; - } else { - /* If the offset is within the permission interval - * keep it there otherwise reset it to zero. - */ - if (managed_filp->f_pos < np->permission.begin_offset) { - managed_filp->f_pos = 0; - } else { - managed_filp->f_pos -= - np->permission.begin_offset; - } - } - } - return 0; -} - -static void -do_destroy_fd_scoped_permission_node(struct vsoc_device_region *owner_region_p, - struct fd_scoped_permission_node *node) -{ - if (node) { - do_destroy_fd_scoped_permission(owner_region_p, - &node->permission); - mutex_lock(&vsoc_dev.mtx); - list_del(&node->list); - mutex_unlock(&vsoc_dev.mtx); - kfree(node); - } -} - -static void -do_destroy_fd_scoped_permission(struct vsoc_device_region *owner_region_p, - struct fd_scoped_permission *perm) -{ - atomic_t *owner_ptr = NULL; - int prev = 0; - - if (!perm) - return; - owner_ptr = (atomic_t *)shm_off_to_virtual_addr - (owner_region_p->region_begin_offset + perm->owner_offset); - prev = atomic_xchg(owner_ptr, VSOC_REGION_FREE); - if (prev != perm->owned_value) - dev_err(&vsoc_dev.dev->dev, - "%x-%x: owner (%s) %x: expected to be %x was %x", - perm->begin_offset, perm->end_offset, - owner_region_p->device_name, perm->owner_offset, - perm->owned_value, prev); -} - -static long do_vsoc_describe_region(struct file *filp, - struct vsoc_device_region __user *dest) -{ - struct vsoc_device_region *region_p; - int retval = vsoc_validate_filep(filp); - - if (retval) - return retval; - region_p = vsoc_region_from_filep(filp); - if (copy_to_user(dest, region_p, sizeof(*region_p))) - return -EFAULT; - return 0; -} - -/** - * Implements the inner logic of cond_wait. Copies to and from userspace are - * done in the helper function below. - */ -static int handle_vsoc_cond_wait(struct file *filp, struct vsoc_cond_wait *arg) -{ - DEFINE_WAIT(wait); - u32 region_number = iminor(file_inode(filp)); - struct vsoc_region_data *data = vsoc_dev.regions_data + region_number; - struct hrtimer_sleeper timeout, *to = NULL; - int ret = 0; - struct vsoc_device_region *region_p = vsoc_region_from_filep(filp); - atomic_t *address = NULL; - ktime_t wake_time; - - /* Ensure that the offset is aligned */ - if (arg->offset & (sizeof(uint32_t) - 1)) - return -EADDRNOTAVAIL; - /* Ensure that the offset is within shared memory */ - if (((uint64_t)arg->offset) + region_p->region_begin_offset + - sizeof(uint32_t) > region_p->region_end_offset) - return -E2BIG; - address = shm_off_to_virtual_addr(region_p->region_begin_offset + - arg->offset); - - /* Ensure that the type of wait is valid */ - switch (arg->wait_type) { - case VSOC_WAIT_IF_EQUAL: - break; - case VSOC_WAIT_IF_EQUAL_TIMEOUT: - to = &timeout; - break; - default: - return -EINVAL; - } - - if (to) { - /* Copy the user-supplied timesec into the kernel structure. - * We do things this way to flatten differences between 32 bit - * and 64 bit timespecs. - */ - if (arg->wake_time_nsec >= NSEC_PER_SEC) - return -EINVAL; - wake_time = ktime_set(arg->wake_time_sec, arg->wake_time_nsec); - - hrtimer_init_sleeper_on_stack(to, CLOCK_MONOTONIC, - HRTIMER_MODE_ABS); - hrtimer_set_expires_range_ns(&to->timer, wake_time, - current->timer_slack_ns); - } - - while (1) { - prepare_to_wait(&data->futex_wait_queue, &wait, - TASK_INTERRUPTIBLE); - /* - * Check the sentinel value after prepare_to_wait. If the value - * changes after this check the writer will call signal, - * changing the task state from INTERRUPTIBLE to RUNNING. That - * will ensure that schedule() will eventually schedule this - * task. - */ - if (atomic_read(address) != arg->value) { - ret = 0; - break; - } - if (to) { - hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS); - if (likely(to->task)) - freezable_schedule(); - hrtimer_cancel(&to->timer); - if (!to->task) { - ret = -ETIMEDOUT; - break; - } - } else { - freezable_schedule(); - } - /* Count the number of times that we woke up. This is useful - * for unit testing. - */ - ++arg->wakes; - if (signal_pending(current)) { - ret = -EINTR; - break; - } - } - finish_wait(&data->futex_wait_queue, &wait); - if (to) - destroy_hrtimer_on_stack(&to->timer); - return ret; -} - -/** - * Handles the details of copying from/to userspace to ensure that the copies - * happen on all of the return paths of cond_wait. - */ -static int do_vsoc_cond_wait(struct file *filp, - struct vsoc_cond_wait __user *untrusted_in) -{ - struct vsoc_cond_wait arg; - int rval = 0; - - if (copy_from_user(&arg, untrusted_in, sizeof(arg))) - return -EFAULT; - /* wakes is an out parameter. Initialize it to something sensible. */ - arg.wakes = 0; - rval = handle_vsoc_cond_wait(filp, &arg); - if (copy_to_user(untrusted_in, &arg, sizeof(arg))) - return -EFAULT; - return rval; -} - -static int do_vsoc_cond_wake(struct file *filp, uint32_t offset) -{ - struct vsoc_device_region *region_p = vsoc_region_from_filep(filp); - u32 region_number = iminor(file_inode(filp)); - struct vsoc_region_data *data = vsoc_dev.regions_data + region_number; - /* Ensure that the offset is aligned */ - if (offset & (sizeof(uint32_t) - 1)) - return -EADDRNOTAVAIL; - /* Ensure that the offset is within shared memory */ - if (((uint64_t)offset) + region_p->region_begin_offset + - sizeof(uint32_t) > region_p->region_end_offset) - return -E2BIG; - /* - * TODO(b/73664181): Use multiple futex wait queues. - * We need to wake every sleeper when the condition changes. Typically - * only a single thread will be waiting on the condition, but there - * are exceptions. The worst case is about 10 threads. - */ - wake_up_interruptible_all(&data->futex_wait_queue); - return 0; -} - -static long vsoc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - int rv = 0; - struct vsoc_device_region *region_p; - u32 reg_num; - struct vsoc_region_data *reg_data; - int retval = vsoc_validate_filep(filp); - - if (retval) - return retval; - region_p = vsoc_region_from_filep(filp); - reg_num = iminor(file_inode(filp)); - reg_data = vsoc_dev.regions_data + reg_num; - switch (cmd) { - case VSOC_CREATE_FD_SCOPED_PERMISSION: - { - struct fd_scoped_permission_node *node = NULL; - - node = kzalloc(sizeof(*node), GFP_KERNEL); - /* We can't allocate memory for the permission */ - if (!node) - return -ENOMEM; - INIT_LIST_HEAD(&node->list); - rv = do_create_fd_scoped_permission - (region_p, - node, - (struct fd_scoped_permission_arg __user *)arg); - if (!rv) { - mutex_lock(&vsoc_dev.mtx); - list_add(&node->list, &vsoc_dev.permissions); - mutex_unlock(&vsoc_dev.mtx); - } else { - kfree(node); - return rv; - } - } - break; - - case VSOC_GET_FD_SCOPED_PERMISSION: - { - struct fd_scoped_permission_node *node = - ((struct vsoc_private_data *)filp->private_data)-> - fd_scoped_permission_node; - if (!node) - return -ENOENT; - if (copy_to_user - ((struct fd_scoped_permission __user *)arg, - &node->permission, sizeof(node->permission))) - return -EFAULT; - } - break; - - case VSOC_MAYBE_SEND_INTERRUPT_TO_HOST: - if (!atomic_xchg(reg_data->outgoing_signalled, 1)) { - writel(reg_num, vsoc_dev.regs + DOORBELL); - return 0; - } else { - return -EBUSY; - } - break; - - case VSOC_SEND_INTERRUPT_TO_HOST: - writel(reg_num, vsoc_dev.regs + DOORBELL); - return 0; - case VSOC_WAIT_FOR_INCOMING_INTERRUPT: - wait_event_interruptible - (reg_data->interrupt_wait_queue, - (atomic_read(reg_data->incoming_signalled) != 0)); - break; - - case VSOC_DESCRIBE_REGION: - return do_vsoc_describe_region - (filp, - (struct vsoc_device_region __user *)arg); - - case VSOC_SELF_INTERRUPT: - atomic_set(reg_data->incoming_signalled, 1); - wake_up_interruptible(®_data->interrupt_wait_queue); - break; - - case VSOC_COND_WAIT: - return do_vsoc_cond_wait(filp, - (struct vsoc_cond_wait __user *)arg); - case VSOC_COND_WAKE: - return do_vsoc_cond_wake(filp, arg); - - default: - return -EINVAL; - } - return 0; -} - -static ssize_t vsoc_read(struct file *filp, char __user *buffer, size_t len, - loff_t *poffset) -{ - __u32 area_off; - const void *area_p; - ssize_t area_len; - int retval = vsoc_validate_filep(filp); - - if (retval) - return retval; - area_len = vsoc_get_area(filp, &area_off); - area_p = shm_off_to_virtual_addr(area_off); - area_p += *poffset; - area_len -= *poffset; - if (area_len <= 0) - return 0; - if (area_len < len) - len = area_len; - if (copy_to_user(buffer, area_p, len)) - return -EFAULT; - *poffset += len; - return len; -} - -static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin) -{ - ssize_t area_len = 0; - int retval = vsoc_validate_filep(filp); - - if (retval) - return retval; - area_len = vsoc_get_area(filp, NULL); - switch (origin) { - case SEEK_SET: - break; - - case SEEK_CUR: - if (offset > 0 && offset + filp->f_pos < 0) - return -EOVERFLOW; - offset += filp->f_pos; - break; - - case SEEK_END: - if (offset > 0 && offset + area_len < 0) - return -EOVERFLOW; - offset += area_len; - break; - - case SEEK_DATA: - if (offset >= area_len) - return -EINVAL; - if (offset < 0) - offset = 0; - break; - - case SEEK_HOLE: - /* Next hole is always the end of the region, unless offset is - * beyond that - */ - if (offset < area_len) - offset = area_len; - break; - - default: - return -EINVAL; - } - - if (offset < 0 || offset > area_len) - return -EINVAL; - filp->f_pos = offset; - - return offset; -} - -static ssize_t vsoc_write(struct file *filp, const char __user *buffer, - size_t len, loff_t *poffset) -{ - __u32 area_off; - void *area_p; - ssize_t area_len; - int retval = vsoc_validate_filep(filp); - - if (retval) - return retval; - area_len = vsoc_get_area(filp, &area_off); - area_p = shm_off_to_virtual_addr(area_off); - area_p += *poffset; - area_len -= *poffset; - if (area_len <= 0) - return 0; - if (area_len < len) - len = area_len; - if (copy_from_user(area_p, buffer, len)) - return -EFAULT; - *poffset += len; - return len; -} - -static irqreturn_t vsoc_interrupt(int irq, void *region_data_v) -{ - struct vsoc_region_data *region_data = - (struct vsoc_region_data *)region_data_v; - int reg_num = region_data - vsoc_dev.regions_data; - - if (unlikely(!region_data)) - return IRQ_NONE; - - if (unlikely(reg_num < 0 || - reg_num >= vsoc_dev.layout->region_count)) { - dev_err(&vsoc_dev.dev->dev, - "invalid irq @%p reg_num=0x%04x\n", - region_data, reg_num); - return IRQ_NONE; - } - if (unlikely(vsoc_dev.regions_data + reg_num != region_data)) { - dev_err(&vsoc_dev.dev->dev, - "irq not aligned @%p reg_num=0x%04x\n", - region_data, reg_num); - return IRQ_NONE; - } - wake_up_interruptible(®ion_data->interrupt_wait_queue); - return IRQ_HANDLED; -} - -static int vsoc_probe_device(struct pci_dev *pdev, - const struct pci_device_id *ent) -{ - int result; - int i; - resource_size_t reg_size; - dev_t devt; - - vsoc_dev.dev = pdev; - result = pci_enable_device(pdev); - if (result) { - dev_err(&pdev->dev, - "pci_enable_device failed %s: error %d\n", - pci_name(pdev), result); - return result; - } - vsoc_dev.enabled_device = true; - result = pci_request_regions(pdev, "vsoc"); - if (result < 0) { - dev_err(&pdev->dev, "pci_request_regions failed\n"); - vsoc_remove_device(pdev); - return -EBUSY; - } - vsoc_dev.requested_regions = true; - /* Set up the control registers in BAR 0 */ - reg_size = pci_resource_len(pdev, REGISTER_BAR); - if (reg_size > MAX_REGISTER_BAR_LEN) - vsoc_dev.regs = - pci_iomap(pdev, REGISTER_BAR, MAX_REGISTER_BAR_LEN); - else - vsoc_dev.regs = pci_iomap(pdev, REGISTER_BAR, reg_size); - - if (!vsoc_dev.regs) { - dev_err(&pdev->dev, - "cannot map registers of size %zu\n", - (size_t)reg_size); - vsoc_remove_device(pdev); - return -EBUSY; - } - - /* Map the shared memory in BAR 2 */ - vsoc_dev.shm_phys_start = pci_resource_start(pdev, SHARED_MEMORY_BAR); - vsoc_dev.shm_size = pci_resource_len(pdev, SHARED_MEMORY_BAR); - - dev_info(&pdev->dev, "shared memory @ DMA %pa size=0x%zx\n", - &vsoc_dev.shm_phys_start, vsoc_dev.shm_size); - vsoc_dev.kernel_mapped_shm = pci_iomap_wc(pdev, SHARED_MEMORY_BAR, 0); - if (!vsoc_dev.kernel_mapped_shm) { - dev_err(&vsoc_dev.dev->dev, "cannot iomap region\n"); - vsoc_remove_device(pdev); - return -EBUSY; - } - - vsoc_dev.layout = (struct vsoc_shm_layout_descriptor __force *) - vsoc_dev.kernel_mapped_shm; - dev_info(&pdev->dev, "major_version: %d\n", - vsoc_dev.layout->major_version); - dev_info(&pdev->dev, "minor_version: %d\n", - vsoc_dev.layout->minor_version); - dev_info(&pdev->dev, "size: 0x%x\n", vsoc_dev.layout->size); - dev_info(&pdev->dev, "regions: %d\n", vsoc_dev.layout->region_count); - if (vsoc_dev.layout->major_version != - CURRENT_VSOC_LAYOUT_MAJOR_VERSION) { - dev_err(&vsoc_dev.dev->dev, - "driver supports only major_version %d\n", - CURRENT_VSOC_LAYOUT_MAJOR_VERSION); - vsoc_remove_device(pdev); - return -EBUSY; - } - result = alloc_chrdev_region(&devt, 0, vsoc_dev.layout->region_count, - VSOC_DEV_NAME); - if (result) { - dev_err(&vsoc_dev.dev->dev, "alloc_chrdev_region failed\n"); - vsoc_remove_device(pdev); - return -EBUSY; - } - vsoc_dev.major = MAJOR(devt); - cdev_init(&vsoc_dev.cdev, &vsoc_ops); - vsoc_dev.cdev.owner = THIS_MODULE; - result = cdev_add(&vsoc_dev.cdev, devt, vsoc_dev.layout->region_count); - if (result) { - dev_err(&vsoc_dev.dev->dev, "cdev_add error\n"); - vsoc_remove_device(pdev); - return -EBUSY; - } - vsoc_dev.cdev_added = true; - vsoc_dev.class = class_create(THIS_MODULE, VSOC_DEV_NAME); - if (IS_ERR(vsoc_dev.class)) { - dev_err(&vsoc_dev.dev->dev, "class_create failed\n"); - vsoc_remove_device(pdev); - return PTR_ERR(vsoc_dev.class); - } - vsoc_dev.class_added = true; - vsoc_dev.regions = (struct vsoc_device_region __force *) - ((void *)vsoc_dev.layout + - vsoc_dev.layout->vsoc_region_desc_offset); - vsoc_dev.msix_entries = - kcalloc(vsoc_dev.layout->region_count, - sizeof(vsoc_dev.msix_entries[0]), GFP_KERNEL); - if (!vsoc_dev.msix_entries) { - dev_err(&vsoc_dev.dev->dev, - "unable to allocate msix_entries\n"); - vsoc_remove_device(pdev); - return -ENOSPC; - } - vsoc_dev.regions_data = - kcalloc(vsoc_dev.layout->region_count, - sizeof(vsoc_dev.regions_data[0]), GFP_KERNEL); - if (!vsoc_dev.regions_data) { - dev_err(&vsoc_dev.dev->dev, - "unable to allocate regions' data\n"); - vsoc_remove_device(pdev); - return -ENOSPC; - } - for (i = 0; i < vsoc_dev.layout->region_count; ++i) - vsoc_dev.msix_entries[i].entry = i; - - result = pci_enable_msix_exact(vsoc_dev.dev, vsoc_dev.msix_entries, - vsoc_dev.layout->region_count); - if (result) { - dev_info(&pdev->dev, "pci_enable_msix failed: %d\n", result); - vsoc_remove_device(pdev); - return -ENOSPC; - } - /* Check that all regions are well formed */ - for (i = 0; i < vsoc_dev.layout->region_count; ++i) { - const struct vsoc_device_region *region = vsoc_dev.regions + i; - - if (!PAGE_ALIGNED(region->region_begin_offset) || - !PAGE_ALIGNED(region->region_end_offset)) { - dev_err(&vsoc_dev.dev->dev, - "region %d not aligned (%x:%x)", i, - region->region_begin_offset, - region->region_end_offset); - vsoc_remove_device(pdev); - return -EFAULT; - } - if (region->region_begin_offset >= region->region_end_offset || - region->region_end_offset > vsoc_dev.shm_size) { - dev_err(&vsoc_dev.dev->dev, - "region %d offsets are wrong: %x %x %zx", - i, region->region_begin_offset, - region->region_end_offset, vsoc_dev.shm_size); - vsoc_remove_device(pdev); - return -EFAULT; - } - if (region->managed_by >= vsoc_dev.layout->region_count) { - dev_err(&vsoc_dev.dev->dev, - "region %d has invalid owner: %u", - i, region->managed_by); - vsoc_remove_device(pdev); - return -EFAULT; - } - } - vsoc_dev.msix_enabled = true; - for (i = 0; i < vsoc_dev.layout->region_count; ++i) { - const struct vsoc_device_region *region = vsoc_dev.regions + i; - size_t name_sz = sizeof(vsoc_dev.regions_data[i].name) - 1; - const struct vsoc_signal_table_layout *h_to_g_signal_table = - ®ion->host_to_guest_signal_table; - const struct vsoc_signal_table_layout *g_to_h_signal_table = - ®ion->guest_to_host_signal_table; - - vsoc_dev.regions_data[i].name[name_sz] = '\0'; - memcpy(vsoc_dev.regions_data[i].name, region->device_name, - name_sz); - dev_info(&pdev->dev, "region %d name=%s\n", - i, vsoc_dev.regions_data[i].name); - init_waitqueue_head - (&vsoc_dev.regions_data[i].interrupt_wait_queue); - init_waitqueue_head(&vsoc_dev.regions_data[i].futex_wait_queue); - vsoc_dev.regions_data[i].incoming_signalled = - shm_off_to_virtual_addr(region->region_begin_offset) + - h_to_g_signal_table->interrupt_signalled_offset; - vsoc_dev.regions_data[i].outgoing_signalled = - shm_off_to_virtual_addr(region->region_begin_offset) + - g_to_h_signal_table->interrupt_signalled_offset; - result = request_irq(vsoc_dev.msix_entries[i].vector, - vsoc_interrupt, 0, - vsoc_dev.regions_data[i].name, - vsoc_dev.regions_data + i); - if (result) { - dev_info(&pdev->dev, - "request_irq failed irq=%d vector=%d\n", - i, vsoc_dev.msix_entries[i].vector); - vsoc_remove_device(pdev); - return -ENOSPC; - } - vsoc_dev.regions_data[i].irq_requested = true; - if (!device_create(vsoc_dev.class, NULL, - MKDEV(vsoc_dev.major, i), - NULL, vsoc_dev.regions_data[i].name)) { - dev_err(&vsoc_dev.dev->dev, "device_create failed\n"); - vsoc_remove_device(pdev); - return -EBUSY; - } - vsoc_dev.regions_data[i].device_created = true; - } - return 0; -} - -/* - * This should undo all of the allocations in the probe function in reverse - * order. - * - * Notes: - * - * The device may have been partially initialized, so double check - * that the allocations happened. - * - * This function may be called multiple times, so mark resources as freed - * as they are deallocated. - */ -static void vsoc_remove_device(struct pci_dev *pdev) -{ - int i; - /* - * pdev is the first thing to be set on probe and the last thing - * to be cleared here. If it's NULL then there is no cleanup. - */ - if (!pdev || !vsoc_dev.dev) - return; - dev_info(&pdev->dev, "remove_device\n"); - if (vsoc_dev.regions_data) { - for (i = 0; i < vsoc_dev.layout->region_count; ++i) { - if (vsoc_dev.regions_data[i].device_created) { - device_destroy(vsoc_dev.class, - MKDEV(vsoc_dev.major, i)); - vsoc_dev.regions_data[i].device_created = false; - } - if (vsoc_dev.regions_data[i].irq_requested) - free_irq(vsoc_dev.msix_entries[i].vector, NULL); - vsoc_dev.regions_data[i].irq_requested = false; - } - kfree(vsoc_dev.regions_data); - vsoc_dev.regions_data = NULL; - } - if (vsoc_dev.msix_enabled) { - pci_disable_msix(pdev); - vsoc_dev.msix_enabled = false; - } - kfree(vsoc_dev.msix_entries); - vsoc_dev.msix_entries = NULL; - vsoc_dev.regions = NULL; - if (vsoc_dev.class_added) { - class_destroy(vsoc_dev.class); - vsoc_dev.class_added = false; - } - if (vsoc_dev.cdev_added) { - cdev_del(&vsoc_dev.cdev); - vsoc_dev.cdev_added = false; - } - if (vsoc_dev.major && vsoc_dev.layout) { - unregister_chrdev_region(MKDEV(vsoc_dev.major, 0), - vsoc_dev.layout->region_count); - vsoc_dev.major = 0; - } - vsoc_dev.layout = NULL; - if (vsoc_dev.kernel_mapped_shm) { - pci_iounmap(pdev, vsoc_dev.kernel_mapped_shm); - vsoc_dev.kernel_mapped_shm = NULL; - } - if (vsoc_dev.regs) { - pci_iounmap(pdev, vsoc_dev.regs); - vsoc_dev.regs = NULL; - } - if (vsoc_dev.requested_regions) { - pci_release_regions(pdev); - vsoc_dev.requested_regions = false; - } - if (vsoc_dev.enabled_device) { - pci_disable_device(pdev); - vsoc_dev.enabled_device = false; - } - /* Do this last: it indicates that the device is not initialized. */ - vsoc_dev.dev = NULL; -} - -static void __exit vsoc_cleanup_module(void) -{ - vsoc_remove_device(vsoc_dev.dev); - pci_unregister_driver(&vsoc_pci_driver); -} - -static int __init vsoc_init_module(void) -{ - int err = -ENOMEM; - - INIT_LIST_HEAD(&vsoc_dev.permissions); - mutex_init(&vsoc_dev.mtx); - - err = pci_register_driver(&vsoc_pci_driver); - if (err < 0) - return err; - return 0; -} - -static int vsoc_open(struct inode *inode, struct file *filp) -{ - /* Can't use vsoc_validate_filep because filp is still incomplete */ - int ret = vsoc_validate_inode(inode); - - if (ret) - return ret; - filp->private_data = - kzalloc(sizeof(struct vsoc_private_data), GFP_KERNEL); - if (!filp->private_data) - return -ENOMEM; - return 0; -} - -static int vsoc_release(struct inode *inode, struct file *filp) -{ - struct vsoc_private_data *private_data = NULL; - struct fd_scoped_permission_node *node = NULL; - struct vsoc_device_region *owner_region_p = NULL; - int retval = vsoc_validate_filep(filp); - - if (retval) - return retval; - private_data = (struct vsoc_private_data *)filp->private_data; - if (!private_data) - return 0; - - node = private_data->fd_scoped_permission_node; - if (node) { - owner_region_p = vsoc_region_from_inode(inode); - if (owner_region_p->managed_by != VSOC_REGION_WHOLE) { - owner_region_p = - &vsoc_dev.regions[owner_region_p->managed_by]; - } - do_destroy_fd_scoped_permission_node(owner_region_p, node); - private_data->fd_scoped_permission_node = NULL; - } - kfree(private_data); - filp->private_data = NULL; - - return 0; -} - -/* - * Returns the device relative offset and length of the area specified by the - * fd scoped permission. If there is no fd scoped permission set, a default - * permission covering the entire region is assumed, unless the region is owned - * by another one, in which case the default is a permission with zero size. - */ -static ssize_t vsoc_get_area(struct file *filp, __u32 *area_offset) -{ - __u32 off = 0; - ssize_t length = 0; - struct vsoc_device_region *region_p; - struct fd_scoped_permission *perm; - - region_p = vsoc_region_from_filep(filp); - off = region_p->region_begin_offset; - perm = &((struct vsoc_private_data *)filp->private_data)-> - fd_scoped_permission_node->permission; - if (perm) { - off += perm->begin_offset; - length = perm->end_offset - perm->begin_offset; - } else if (region_p->managed_by == VSOC_REGION_WHOLE) { - /* No permission set and the regions is not owned by another, - * default to full region access. - */ - length = vsoc_device_region_size(region_p); - } else { - /* return zero length, access is denied. */ - length = 0; - } - if (area_offset) - *area_offset = off; - return length; -} - -static int vsoc_mmap(struct file *filp, struct vm_area_struct *vma) -{ - unsigned long len = vma->vm_end - vma->vm_start; - __u32 area_off; - phys_addr_t mem_off; - ssize_t area_len; - int retval = vsoc_validate_filep(filp); - - if (retval) - return retval; - area_len = vsoc_get_area(filp, &area_off); - /* Add the requested offset */ - area_off += (vma->vm_pgoff << PAGE_SHIFT); - area_len -= (vma->vm_pgoff << PAGE_SHIFT); - if (area_len < len) - return -EINVAL; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - mem_off = shm_off_to_phys_addr(area_off); - if (io_remap_pfn_range(vma, vma->vm_start, mem_off >> PAGE_SHIFT, - len, vma->vm_page_prot)) - return -EAGAIN; - return 0; -} - -module_init(vsoc_init_module); -module_exit(vsoc_cleanup_module); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Greg Hartman "); -MODULE_DESCRIPTION("VSoC interpretation of QEmu's ivshmem device"); -MODULE_VERSION("1.0"); -- cgit v1.2.3-59-g8ed1b From b7db58105b80fa9232719c8329b995b3addfab55 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 5 Feb 2020 15:32:17 +0300 Subject: staging: greybus: use after free in gb_audio_manager_remove_all() When we call kobject_put() and it's the last reference to the kobject then it calls gb_audio_module_release() and frees module. We dereference "module" on the next line which is a use after free. Fixes: c77f85bbc91a ("greybus: audio: Fix incorrect counting of 'ida'") Signed-off-by: Dan Carpenter Acked-by: Viresh Kumar Reviewed-by: Vaibhav Agarwal Link: https://lore.kernel.org/r/20200205123217.jreendkyxulqsool@kili.mountain Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/audio_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/greybus/audio_manager.c b/drivers/staging/greybus/audio_manager.c index 9b19ea9d3fa1..9a3f7c034ab4 100644 --- a/drivers/staging/greybus/audio_manager.c +++ b/drivers/staging/greybus/audio_manager.c @@ -92,8 +92,8 @@ void gb_audio_manager_remove_all(void) list_for_each_entry_safe(module, next, &modules_list, list) { list_del(&module->list); - kobject_put(&module->kobj); ida_simple_remove(&module_id, module->id); + kobject_put(&module->kobj); } is_empty = list_empty(&modules_list); -- cgit v1.2.3-59-g8ed1b From 499c405b2b80bb3a04425ba3541d20305e014d3e Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 10 Feb 2020 12:02:30 -0600 Subject: staging: rtl8188eu: Fix potential security hole In routine rtw_hostapd_ioctl(), the user-controlled p->length is assumed to be at least the size of struct ieee_param size, but this assumption is never checked. This could result in out-of-bounds read/write on kernel heap in case a p->length less than the size of struct ieee_param is specified by the user. If p->length is allowed to be greater than the size of the struct, then a malicious user could be wasting kernel memory. Fixes commit a2c60d42d97c ("Add files for new driver - part 16"). Reported by: Pietro Oliva Cc: Pietro Oliva Cc: Stable Fixes: a2c60d42d97c ("staging: r8188eu: Add files for new driver - part 16") Signed-off-by: Larry Finger Link: https://lore.kernel.org/r/20200210180235.21691-2-Larry.Finger@lwfinger.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/os_dep/ioctl_linux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c index 9b6ea86d1dcf..7d21f5799640 100644 --- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c @@ -2796,7 +2796,7 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p) goto out; } - if (!p->pointer) { + if (!p->pointer || p->length != sizeof(struct ieee_param)) { ret = -EINVAL; goto out; } -- cgit v1.2.3-59-g8ed1b From ac33597c0c0d1d819dccfe001bcd0acef7107e7c Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 10 Feb 2020 12:02:31 -0600 Subject: staging: rtl8723bs: Fix potential security hole In routine rtw_hostapd_ioctl(), the user-controlled p->length is assumed to be at least the size of struct ieee_param size, but this assumption is never checked. This could result in out-of-bounds read/write on kernel heap in case a p->length less than the size of struct ieee_param is specified by the user. If p->length is allowed to be greater than the size of the struct, then a malicious user could be wasting kernel memory. Fixes commit 554c0a3abf216 ("0taging: Add rtl8723bs sdio wifi driver"). Reported by: Pietro Oliva Cc: Pietro Oliva Cc: Stable Fixes 554c0a3abf216 ("0taging: Add rtl8723bs sdio wifi driver"). Signed-off-by: Larry Finger Link: https://lore.kernel.org/r/20200210180235.21691-3-Larry.Finger@lwfinger.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/os_dep/ioctl_linux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c index db6528a01229..3128766dd50e 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c @@ -4207,7 +4207,7 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p) /* if (p->length < sizeof(struct ieee_param) || !p->pointer) { */ - if (!p->pointer) { + if (!p->pointer || p->length != sizeof(*param)) { ret = -EINVAL; goto out; } -- cgit v1.2.3-59-g8ed1b From 4ddf8ab8d15ddbc52eefb44eb64e38466ce1f70f Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 10 Feb 2020 12:02:32 -0600 Subject: staging: rtl8188eu: Fix potential overuse of kernel memory In routine wpa_supplicant_ioctl(), the user-controlled p->length is checked to be at least the size of struct ieee_param size, but the code does not detect the case where p->length is greater than the size of the struct, thus a malicious user could be wasting kernel memory. Fixes commit a2c60d42d97c ("Add files for new driver - part 16"). Reported by: Pietro Oliva Cc: Pietro Oliva Cc: Stable Fixes commit a2c60d42d97c ("Add files for new driver - part 16"). Signed-off-by: Larry Finger Link: https://lore.kernel.org/r/20200210180235.21691-4-Larry.Finger@lwfinger.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/os_dep/ioctl_linux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c index 7d21f5799640..acca3ae8b254 100644 --- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c @@ -2009,7 +2009,7 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p) struct ieee_param *param; uint ret = 0; - if (p->length < sizeof(struct ieee_param) || !p->pointer) { + if (!p->pointer || p->length != sizeof(struct ieee_param)) { ret = -EINVAL; goto out; } -- cgit v1.2.3-59-g8ed1b From 23954cb078febfc63a755301fe77e06bccdb4d2a Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 10 Feb 2020 12:02:33 -0600 Subject: staging: rtl8723bs: Fix potential overuse of kernel memory In routine wpa_supplicant_ioctl(), the user-controlled p->length is checked to be at least the size of struct ieee_param size, but the code does not detect the case where p->length is greater than the size of the struct, thus a malicious user could be wasting kernel memory. Fixes commit 554c0a3abf216 ("staging: Add rtl8723bs sdio wifi driver"). Reported by: Pietro Oliva Cc: Pietro Oliva Cc: Stable Fixes: 554c0a3abf216 ("staging: Add rtl8723bs sdio wifi driver"). Signed-off-by: Larry Finger Link: https://lore.kernel.org/r/20200210180235.21691-5-Larry.Finger@lwfinger.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/os_dep/ioctl_linux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c index 3128766dd50e..2ac0d84f090e 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c @@ -3373,7 +3373,7 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p) /* down(&ieee->wx_sem); */ - if (p->length < sizeof(struct ieee_param) || !p->pointer) { + if (!p->pointer || p->length != sizeof(struct ieee_param)) { ret = -EINVAL; goto out; } -- cgit v1.2.3-59-g8ed1b From e40c6d0f8763fe67585227d4afc97171db861b3b Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 10 Feb 2020 12:02:34 -0600 Subject: staging: rtl8188eu: Remove some unneeded goto statements In routines rtw_hostapd_ioctl() and wpa_supplicant_ioctl(), several error conditions involve setting a variable indicating the error, followed by a goto. The code following the target of that goto merely returns the value. It is simpler, therefore to return the error value immediately, and eliminate the got target. Signed-off-by: Larry Finger Cc: Pietro Oliva Link: https://lore.kernel.org/r/20200210180235.21691-6-Larry.Finger@lwfinger.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/os_dep/ioctl_linux.c | 40 ++++++++------------------ 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c index acca3ae8b254..ba53959e1303 100644 --- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c @@ -2009,21 +2009,16 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p) struct ieee_param *param; uint ret = 0; - if (!p->pointer || p->length != sizeof(struct ieee_param)) { - ret = -EINVAL; - goto out; - } + if (!p->pointer || p->length != sizeof(struct ieee_param)) + return -EINVAL; param = (struct ieee_param *)rtw_malloc(p->length); - if (!param) { - ret = -ENOMEM; - goto out; - } + if (!param) + return -ENOMEM; if (copy_from_user(param, p->pointer, p->length)) { kfree(param); - ret = -EFAULT; - goto out; + return -EFAULT; } switch (param->cmd) { @@ -2054,9 +2049,6 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p) ret = -EFAULT; kfree(param); - -out: - return ret; } @@ -2791,26 +2783,19 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p) * so, we just check hw_init_completed */ - if (!padapter->hw_init_completed) { - ret = -EPERM; - goto out; - } + if (!padapter->hw_init_completed) + return -EPERM; - if (!p->pointer || p->length != sizeof(struct ieee_param)) { - ret = -EINVAL; - goto out; - } + if (!p->pointer || p->length != sizeof(struct ieee_param)) + return -EINVAL; param = (struct ieee_param *)rtw_malloc(p->length); - if (!param) { - ret = -ENOMEM; - goto out; - } + if (!param) + return -ENOMEM; if (copy_from_user(param, p->pointer, p->length)) { kfree(param); - ret = -EFAULT; - goto out; + return -EFAULT; } switch (param->cmd) { @@ -2865,7 +2850,6 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p) if (ret == 0 && copy_to_user(p->pointer, param, p->length)) ret = -EFAULT; kfree(param); -out: return ret; } #endif -- cgit v1.2.3-59-g8ed1b From 9a4556bd8f23209c29f152e6a930b6a893b0fc81 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 10 Feb 2020 12:02:35 -0600 Subject: staging: rtl8723bs: Remove unneeded goto statements In routines rtw_hostapd_ioctl() and wpa_supplicant_ioctl(), several error conditions involve setting a variable indicating the error, followed by a goto. The code following the target of that goto merely returns the value. It is simpler, therefore to return the error value immediately, and eliminate the got target. Signed-off-by: Larry Finger Cc: Pietro Oliva Link: https://lore.kernel.org/r/20200210180235.21691-7-Larry.Finger@lwfinger.net Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/os_dep/ioctl_linux.c | 47 +++++++------------------- 1 file changed, 12 insertions(+), 35 deletions(-) diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c index 2ac0d84f090e..9b9038e7deb1 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c @@ -3373,21 +3373,16 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p) /* down(&ieee->wx_sem); */ - if (!p->pointer || p->length != sizeof(struct ieee_param)) { - ret = -EINVAL; - goto out; - } + if (!p->pointer || p->length != sizeof(struct ieee_param)) + return -EINVAL; param = rtw_malloc(p->length); - if (param == NULL) { - ret = -ENOMEM; - goto out; - } + if (param == NULL) + return -ENOMEM; if (copy_from_user(param, p->pointer, p->length)) { kfree(param); - ret = -EFAULT; - goto out; + return -EFAULT; } switch (param->cmd) { @@ -3421,12 +3416,8 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p) kfree(param); -out: - /* up(&ieee->wx_sem); */ - return ret; - } static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param, u32 param_len) @@ -4200,28 +4191,19 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p) * so, we just check hw_init_completed */ - if (!padapter->hw_init_completed) { - ret = -EPERM; - goto out; - } - + if (!padapter->hw_init_completed) + return -EPERM; - /* if (p->length < sizeof(struct ieee_param) || !p->pointer) { */ - if (!p->pointer || p->length != sizeof(*param)) { - ret = -EINVAL; - goto out; - } + if (!p->pointer || p->length != sizeof(*param)) + return -EINVAL; param = rtw_malloc(p->length); - if (param == NULL) { - ret = -ENOMEM; - goto out; - } + if (param == NULL) + return -ENOMEM; if (copy_from_user(param, p->pointer, p->length)) { kfree(param); - ret = -EFAULT; - goto out; + return -EFAULT; } /* DBG_871X("%s, cmd =%d\n", __func__, param->cmd); */ @@ -4321,13 +4303,8 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p) if (ret == 0 && copy_to_user(p->pointer, param, p->length)) ret = -EFAULT; - kfree(param); - -out: - return ret; - } static int rtw_wx_set_priv(struct net_device *dev, -- cgit v1.2.3-59-g8ed1b From 1208f9e1d758c991b0a46a1bd60c616b906bbe27 Mon Sep 17 00:00:00 2001 From: Hardik Gajjar Date: Thu, 6 Feb 2020 12:49:23 +0100 Subject: USB: hub: Fix the broken detection of USB3 device in SMSC hub Renesas R-Car H3ULCB + Kingfisher Infotainment Board is either not able to detect the USB3.0 mass storage devices or is detecting those as USB2.0 high speed devices. The explanation given by Renesas is that, due to a HW issue, the XHCI driver does not wake up after going to sleep on connecting a USB3.0 device. In order to mitigate that, disable the auto-suspend feature specifically for SMSC hubs from hub_probe() function, as a quirk. Renesas Kingfisher Infotainment Board has two USB3.0 ports (CN2) which are connected via USB5534B 4-port SuperSpeed/Hi-Speed, low-power, configurable hub controller. [1] SanDisk USB 3.0 device detected as USB-2.0 before the patch [ 74.036390] usb 5-1.1: new high-speed USB device number 4 using xhci-hcd [ 74.061598] usb 5-1.1: New USB device found, idVendor=0781, idProduct=5581, bcdDevice= 1.00 [ 74.069976] usb 5-1.1: New USB device strings: Mfr=1, Product=2, SerialNumber=3 [ 74.077303] usb 5-1.1: Product: Ultra [ 74.080980] usb 5-1.1: Manufacturer: SanDisk [ 74.085263] usb 5-1.1: SerialNumber: 4C530001110208116550 [2] SanDisk USB 3.0 device detected as USB-3.0 after the patch [ 34.565078] usb 6-1.1: new SuperSpeed Gen 1 USB device number 3 using xhci-hcd [ 34.588719] usb 6-1.1: New USB device found, idVendor=0781, idProduct=5581, bcdDevice= 1.00 [ 34.597098] usb 6-1.1: New USB device strings: Mfr=1, Product=2, SerialNumber=3 [ 34.604430] usb 6-1.1: Product: Ultra [ 34.608110] usb 6-1.1: Manufacturer: SanDisk [ 34.612397] usb 6-1.1: SerialNumber: 4C530001110208116550 Suggested-by: Alan Stern Signed-off-by: Hardik Gajjar Acked-by: Alan Stern Tested-by: Eugeniu Rosca Cc: stable Link: https://lore.kernel.org/r/1580989763-32291-1-git-send-email-hgajjar@de.adit-jv.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 15 +++++++++++++++ drivers/usb/core/hub.h | 1 + 2 files changed, 16 insertions(+) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 3405b146edc9..de94fa4a4ca7 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -38,7 +38,9 @@ #include "otg_whitelist.h" #define USB_VENDOR_GENESYS_LOGIC 0x05e3 +#define USB_VENDOR_SMSC 0x0424 #define HUB_QUIRK_CHECK_PORT_AUTOSUSPEND 0x01 +#define HUB_QUIRK_DISABLE_AUTOSUSPEND 0x02 #define USB_TP_TRANSMISSION_DELAY 40 /* ns */ #define USB_TP_TRANSMISSION_DELAY_MAX 65535 /* ns */ @@ -1731,6 +1733,10 @@ static void hub_disconnect(struct usb_interface *intf) kfree(hub->buffer); pm_suspend_ignore_children(&intf->dev, false); + + if (hub->quirk_disable_autosuspend) + usb_autopm_put_interface(intf); + kref_put(&hub->kref, hub_release); } @@ -1863,6 +1869,11 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id) if (id->driver_info & HUB_QUIRK_CHECK_PORT_AUTOSUSPEND) hub->quirk_check_port_auto_suspend = 1; + if (id->driver_info & HUB_QUIRK_DISABLE_AUTOSUSPEND) { + hub->quirk_disable_autosuspend = 1; + usb_autopm_get_interface(intf); + } + if (hub_configure(hub, &desc->endpoint[0].desc) >= 0) return 0; @@ -5599,6 +5610,10 @@ out_hdev_lock: } static const struct usb_device_id hub_id_table[] = { + { .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_CLASS, + .idVendor = USB_VENDOR_SMSC, + .bInterfaceClass = USB_CLASS_HUB, + .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND}, { .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_CLASS, .idVendor = USB_VENDOR_GENESYS_LOGIC, diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h index a9e24e4b8df1..a97dd1ba964e 100644 --- a/drivers/usb/core/hub.h +++ b/drivers/usb/core/hub.h @@ -61,6 +61,7 @@ struct usb_hub { unsigned quiescing:1; unsigned disconnected:1; unsigned in_reset:1; + unsigned quirk_disable_autosuspend:1; unsigned quirk_check_port_auto_suspend:1; -- cgit v1.2.3-59-g8ed1b From dddb40e83038ec72533b1b5721831caf90224a09 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 28 Jan 2020 16:29:56 +0200 Subject: MAINTAINERS: Sort entries in database for USB TYPEC Run parse-maintainers.pl and choose USB TYPEC records. Fix them accordingly. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200128142956.39604-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 38fe2f3f7b6f..1277cf33d413 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17392,11 +17392,14 @@ F: drivers/usb/ F: include/linux/usb.h F: include/linux/usb/ -USB TYPEC PI3USB30532 MUX DRIVER -M: Hans de Goede +USB TYPEC BUS FOR ALTERNATE MODES +M: Heikki Krogerus L: linux-usb@vger.kernel.org S: Maintained -F: drivers/usb/typec/mux/pi3usb30532.c +F: Documentation/ABI/testing/sysfs-bus-typec +F: Documentation/driver-api/usb/typec_bus.rst +F: drivers/usb/typec/altmodes/ +F: include/linux/usb/typec_altmode.h USB TYPEC CLASS M: Heikki Krogerus @@ -17407,14 +17410,11 @@ F: Documentation/driver-api/usb/typec.rst F: drivers/usb/typec/ F: include/linux/usb/typec.h -USB TYPEC BUS FOR ALTERNATE MODES -M: Heikki Krogerus +USB TYPEC PI3USB30532 MUX DRIVER +M: Hans de Goede L: linux-usb@vger.kernel.org S: Maintained -F: Documentation/ABI/testing/sysfs-bus-typec -F: Documentation/driver-api/usb/typec_bus.rst -F: drivers/usb/typec/altmodes/ -F: include/linux/usb/typec_altmode.h +F: drivers/usb/typec/mux/pi3usb30532.c USB TYPEC PORT CONTROLLER DRIVERS M: Guenter Roeck -- cgit v1.2.3-59-g8ed1b From 3e99862c05a9caa5a27969f41566b428696f5a9a Mon Sep 17 00:00:00 2001 From: EJ Hsu Date: Thu, 30 Jan 2020 01:25:06 -0800 Subject: usb: uas: fix a plug & unplug racing When a uas disk is plugged into an external hub, uas_probe() will be called by the hub thread to do the probe. It will first create a SCSI host and then do the scan for this host. During the scan, it will probe the LUN using SCSI INQUERY command which will be packed in the URB and submitted to uas disk. There might be a chance that this external hub with uas disk attached is unplugged during the scan. In this case, uas driver will fail to submit the URB (due to the NOTATTACHED state of uas device) and try to put this SCSI command back to request queue waiting for next chance to run. In normal case, this cycle will terminate when hub thread gets disconnection event and calls into uas_disconnect() accordingly. But in this case, uas_disconnect() will not be called because hub thread of external hub gets stuck waiting for the completion of this SCSI command. A deadlock happened. In this fix, uas will call scsi_scan_host() asynchronously to avoid the blocking of hub thread. Signed-off-by: EJ Hsu Acked-by: Oliver Neukum Cc: stable Link: https://lore.kernel.org/r/20200130092506.102760-1-ejh@nvidia.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 95bba3ba6ac6..3670fda02c34 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -45,6 +45,7 @@ struct uas_dev_info { struct scsi_cmnd *cmnd[MAX_CMNDS]; spinlock_t lock; struct work_struct work; + struct work_struct scan_work; /* for async scanning */ }; enum { @@ -114,6 +115,17 @@ out: spin_unlock_irqrestore(&devinfo->lock, flags); } +static void uas_scan_work(struct work_struct *work) +{ + struct uas_dev_info *devinfo = + container_of(work, struct uas_dev_info, scan_work); + struct Scsi_Host *shost = usb_get_intfdata(devinfo->intf); + + dev_dbg(&devinfo->intf->dev, "starting scan\n"); + scsi_scan_host(shost); + dev_dbg(&devinfo->intf->dev, "scan complete\n"); +} + static void uas_add_work(struct uas_cmd_info *cmdinfo) { struct scsi_pointer *scp = (void *)cmdinfo; @@ -982,6 +994,7 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id) init_usb_anchor(&devinfo->data_urbs); spin_lock_init(&devinfo->lock); INIT_WORK(&devinfo->work, uas_do_work); + INIT_WORK(&devinfo->scan_work, uas_scan_work); result = uas_configure_endpoints(devinfo); if (result) @@ -998,7 +1011,9 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id) if (result) goto free_streams; - scsi_scan_host(shost); + /* Submit the delayed_work for SCSI-device scanning */ + schedule_work(&devinfo->scan_work); + return result; free_streams: @@ -1166,6 +1181,12 @@ static void uas_disconnect(struct usb_interface *intf) usb_kill_anchored_urbs(&devinfo->data_urbs); uas_zap_pending(devinfo, DID_NO_CONNECT); + /* + * Prevent SCSI scanning (if it hasn't started yet) + * or wait for the SCSI-scanning routine to stop. + */ + cancel_work_sync(&devinfo->scan_work); + scsi_remove_host(shost); uas_free_streams(devinfo); scsi_host_put(shost); -- cgit v1.2.3-59-g8ed1b From ca4b43c14cd88d28cfc6467d2fa075aad6818f1d Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Sat, 1 Feb 2020 14:13:44 +0800 Subject: usb: charger: assign specific number for enum value To work properly on every architectures and compilers, the enum value needs to be specific numbers. Suggested-by: Greg KH Signed-off-by: Peter Chen Link: https://lore.kernel.org/r/1580537624-10179-1-git-send-email-peter.chen@nxp.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/usb/charger.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/usb/charger.h b/include/uapi/linux/usb/charger.h index 5f72af35b3ed..ad22079125bf 100644 --- a/include/uapi/linux/usb/charger.h +++ b/include/uapi/linux/usb/charger.h @@ -14,18 +14,18 @@ * ACA (Accessory Charger Adapters) */ enum usb_charger_type { - UNKNOWN_TYPE, - SDP_TYPE, - DCP_TYPE, - CDP_TYPE, - ACA_TYPE, + UNKNOWN_TYPE = 0, + SDP_TYPE = 1, + DCP_TYPE = 2, + CDP_TYPE = 3, + ACA_TYPE = 4, }; /* USB charger state */ enum usb_charger_state { - USB_CHARGER_DEFAULT, - USB_CHARGER_PRESENT, - USB_CHARGER_ABSENT, + USB_CHARGER_DEFAULT = 0, + USB_CHARGER_PRESENT = 1, + USB_CHARGER_ABSENT = 2, }; #endif /* _UAPI__LINUX_USB_CHARGER_H */ -- cgit v1.2.3-59-g8ed1b From 8099f58f1ecddf4f374f4828a3dff8397c7cbd74 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 31 Jan 2020 10:39:26 -0500 Subject: USB: hub: Don't record a connect-change event during reset-resume Paul Zimmerman reports that his USB Bluetooth adapter sometimes crashes following system resume, when it receives a Get-Device-Descriptor request while it is busy doing something else. Such a request was added by commit a4f55d8b8c14 ("usb: hub: Check device descriptor before resusciation"). It gets sent when the hub driver's work thread checks whether a connect-change event on an enabled port really indicates a new device has been connected, as opposed to an old device momentarily disconnecting and then reconnecting (which can happen with xHCI host controllers, since they automatically enable connected ports). The same kind of thing occurs when a port's power session is lost during system suspend. When the system wakes up it sees a connect-change event on the port, and if the child device's persist_enabled flag was set then hub_activate() sets the device's reset_resume flag as well as the port's bit in hub->change_bits. The reset-resume code then takes responsibility for checking that the same device is still attached to the port, and it does this as part of the device's resume pathway. By the time the hub driver's work thread starts up again, the device has already been fully reinitialized and is busy doing its own thing. There's no need for the work thread to do the same check a second time, and in fact this unnecessary check is what caused the problem that Paul observed. Note that performing the unnecessary check is not actually a bug. Devices are supposed to be able to send descriptors back to the host even when they are busy doing something else. The underlying cause of Paul's problem lies in his Bluetooth adapter. Nevertheless, we shouldn't perform the same check twice in a row -- and as a nice side benefit, removing the extra check allows the Bluetooth adapter to work more reliably. The work thread performs its check when it sees that the port's bit is set in hub->change_bits. In this situation that bit is interpreted as though a connect-change event had occurred on the port _after_ the reset-resume, which is not what actually happened. One possible fix would be to make the reset-resume code clear the port's bit in hub->change_bits. But it seems simpler to just avoid setting the bit during hub_activate() in the first place. That's what this patch does. (Proving that the patch is correct when CONFIG_PM is disabled requires a little thought. In that setting hub_activate() will be called only for initialization and resets, since there won't be any resumes or reset-resumes. During initialization and hub resets the hub doesn't have any child devices, and so this code path never gets executed.) Reported-and-tested-by: Paul Zimmerman Signed-off-by: Alan Stern Link: https://marc.info/?t=157949360700001&r=1&w=2 CC: David Heinzelmann CC: Link: https://lore.kernel.org/r/Pine.LNX.4.44L0.2001311037460.1577-100000@iolanthe.rowland.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index de94fa4a4ca7..1d212f82c69b 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1219,11 +1219,6 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) #ifdef CONFIG_PM udev->reset_resume = 1; #endif - /* Don't set the change_bits when the device - * was powered off. - */ - if (test_bit(port1, hub->power_bits)) - set_bit(port1, hub->change_bits); } else { /* The power session is gone; tell hub_wq */ -- cgit v1.2.3-59-g8ed1b From a4a601948fc8d0a9661b83df03f5ee11e903efe6 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 3 Feb 2020 01:42:59 +0300 Subject: usb: phy: tegra: Add clarifying comments about the shared registers Tools like Coccinelle may erroneously recommend to use the devm_platform_ioremap_resource() API for the registers mapping because these tools are not aware about the implementation details of the driver. Let's add a clarifying comments to the code, which should help to stop future attempts to break the driver. Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20200202224259.29187-1-digetx@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy-tegra-usb.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/phy/phy-tegra-usb.c b/drivers/usb/phy/phy-tegra-usb.c index 037e8eee737d..6153cc35aba0 100644 --- a/drivers/usb/phy/phy-tegra-usb.c +++ b/drivers/usb/phy/phy-tegra-usb.c @@ -969,6 +969,10 @@ static int utmi_phy_probe(struct tegra_usb_phy *tegra_phy, return -ENXIO; } + /* + * Note that UTMI pad registers are shared by all PHYs, therefore + * devm_platform_ioremap_resource() can't be used here. + */ tegra_phy->pad_regs = devm_ioremap(&pdev->dev, res->start, resource_size(res)); if (!tegra_phy->pad_regs) { @@ -1087,6 +1091,10 @@ static int tegra_usb_phy_probe(struct platform_device *pdev) return -ENXIO; } + /* + * Note that PHY and USB controller are using shared registers, + * therefore devm_platform_ioremap_resource() can't be used here. + */ tegra_phy->regs = devm_ioremap(&pdev->dev, res->start, resource_size(res)); if (!tegra_phy->regs) { -- cgit v1.2.3-59-g8ed1b From b32196e35bd7bbc8038db1aba1fbf022dc469b6a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 10 Feb 2020 09:51:39 +0000 Subject: usb: dwc3: debug: fix string position formatting mixup with ret and len Currently the string formatting is mixing up the offset of ret and len. Re-work the code to use just len, remove ret and use scnprintf instead of snprintf and len position accumulation where required. Remove the -ve return check since scnprintf never returns a failure -ve size. Also break overly long lines to clean up checkpatch warnings. Addresses-Coverity: ("Unused value") Fixes: 1381a5113caf ("usb: dwc3: debug: purge usage of strcat") Signed-off-by: Colin Ian King Reviewed-by: Dan Carpenter Cc: stable Link: https://lore.kernel.org/r/20200210095139.328711-1-colin.king@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/debug.h | 39 +++++++++++++++------------------------ 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/drivers/usb/dwc3/debug.h b/drivers/usb/dwc3/debug.h index e56beb9d1e36..4a13ceaf4093 100644 --- a/drivers/usb/dwc3/debug.h +++ b/drivers/usb/dwc3/debug.h @@ -256,86 +256,77 @@ static inline const char *dwc3_ep_event_string(char *str, size_t size, u8 epnum = event->endpoint_number; size_t len; int status; - int ret; - ret = snprintf(str, size, "ep%d%s: ", epnum >> 1, + len = scnprintf(str, size, "ep%d%s: ", epnum >> 1, (epnum & 1) ? "in" : "out"); - if (ret < 0) - return "UNKNOWN"; status = event->status; switch (event->endpoint_event) { case DWC3_DEPEVT_XFERCOMPLETE: - len = strlen(str); - snprintf(str + len, size - len, "Transfer Complete (%c%c%c)", + len += scnprintf(str + len, size - len, + "Transfer Complete (%c%c%c)", status & DEPEVT_STATUS_SHORT ? 'S' : 's', status & DEPEVT_STATUS_IOC ? 'I' : 'i', status & DEPEVT_STATUS_LST ? 'L' : 'l'); - len = strlen(str); - if (epnum <= 1) - snprintf(str + len, size - len, " [%s]", + scnprintf(str + len, size - len, " [%s]", dwc3_ep0_state_string(ep0state)); break; case DWC3_DEPEVT_XFERINPROGRESS: - len = strlen(str); - - snprintf(str + len, size - len, "Transfer In Progress [%d] (%c%c%c)", + scnprintf(str + len, size - len, + "Transfer In Progress [%d] (%c%c%c)", event->parameters, status & DEPEVT_STATUS_SHORT ? 'S' : 's', status & DEPEVT_STATUS_IOC ? 'I' : 'i', status & DEPEVT_STATUS_LST ? 'M' : 'm'); break; case DWC3_DEPEVT_XFERNOTREADY: - len = strlen(str); - - snprintf(str + len, size - len, "Transfer Not Ready [%d]%s", + len += scnprintf(str + len, size - len, + "Transfer Not Ready [%d]%s", event->parameters, status & DEPEVT_STATUS_TRANSFER_ACTIVE ? " (Active)" : " (Not Active)"); - len = strlen(str); - /* Control Endpoints */ if (epnum <= 1) { int phase = DEPEVT_STATUS_CONTROL_PHASE(event->status); switch (phase) { case DEPEVT_STATUS_CONTROL_DATA: - snprintf(str + ret, size - ret, + scnprintf(str + len, size - len, " [Data Phase]"); break; case DEPEVT_STATUS_CONTROL_STATUS: - snprintf(str + ret, size - ret, + scnprintf(str + len, size - len, " [Status Phase]"); } } break; case DWC3_DEPEVT_RXTXFIFOEVT: - snprintf(str + ret, size - ret, "FIFO"); + scnprintf(str + len, size - len, "FIFO"); break; case DWC3_DEPEVT_STREAMEVT: status = event->status; switch (status) { case DEPEVT_STREAMEVT_FOUND: - snprintf(str + ret, size - ret, " Stream %d Found", + scnprintf(str + len, size - len, " Stream %d Found", event->parameters); break; case DEPEVT_STREAMEVT_NOTFOUND: default: - snprintf(str + ret, size - ret, " Stream Not Found"); + scnprintf(str + len, size - len, " Stream Not Found"); break; } break; case DWC3_DEPEVT_EPCMDCMPLT: - snprintf(str + ret, size - ret, "Endpoint Command Complete"); + scnprintf(str + len, size - len, "Endpoint Command Complete"); break; default: - snprintf(str, size, "UNKNOWN"); + scnprintf(str + len, size - len, "UNKNOWN"); } return str; -- cgit v1.2.3-59-g8ed1b From 73f8bda9b5dc1c69df2bc55c0cbb24461a6391a9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 3 Feb 2020 16:38:28 +0100 Subject: USB: core: add endpoint-blacklist quirk Add a new device quirk that can be used to blacklist endpoints. Since commit 3e4f8e21c4f2 ("USB: core: fix check for duplicate endpoints") USB core ignores any duplicate endpoints found during descriptor parsing. In order to handle devices where the first interfaces with duplicate endpoints are the ones that should have their endpoints ignored, we need to add a blacklist. Tested-by: edes Cc: stable Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20200203153830.26394-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 11 +++++++++++ drivers/usb/core/quirks.c | 32 ++++++++++++++++++++++++++++++++ drivers/usb/core/usb.h | 3 +++ include/linux/usb/quirks.h | 3 +++ 4 files changed, 49 insertions(+) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 26bc05e48d8a..7df22bcefa9d 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -256,6 +256,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, struct usb_host_interface *ifp, int num_ep, unsigned char *buffer, int size) { + struct usb_device *udev = to_usb_device(ddev); unsigned char *buffer0 = buffer; struct usb_endpoint_descriptor *d; struct usb_host_endpoint *endpoint; @@ -297,6 +298,16 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, goto skip_to_next_endpoint_or_interface_descriptor; } + /* Ignore blacklisted endpoints */ + if (udev->quirks & USB_QUIRK_ENDPOINT_BLACKLIST) { + if (usb_endpoint_is_blacklisted(udev, ifp, d)) { + dev_warn(ddev, "config %d interface %d altsetting %d has a blacklisted endpoint with address 0x%X, skipping\n", + cfgno, inum, asnum, + d->bEndpointAddress); + goto skip_to_next_endpoint_or_interface_descriptor; + } + } + endpoint = &ifp->endpoint[ifp->desc.bNumEndpoints]; ++ifp->desc.bNumEndpoints; diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 6b6413073584..56c8dffaf5f5 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -472,6 +472,38 @@ static const struct usb_device_id usb_amd_resume_quirk_list[] = { { } /* terminating entry must be last */ }; +/* + * Entries for blacklisted endpoints that should be ignored when parsing + * configuration descriptors. + * + * Matched for devices with USB_QUIRK_ENDPOINT_BLACKLIST. + */ +static const struct usb_device_id usb_endpoint_blacklist[] = { + { } +}; + +bool usb_endpoint_is_blacklisted(struct usb_device *udev, + struct usb_host_interface *intf, + struct usb_endpoint_descriptor *epd) +{ + const struct usb_device_id *id; + unsigned int address; + + for (id = usb_endpoint_blacklist; id->match_flags; ++id) { + if (!usb_match_device(udev, id)) + continue; + + if (!usb_match_one_id_intf(udev, intf, id)) + continue; + + address = id->driver_info; + if (address == epd->bEndpointAddress) + return true; + } + + return false; +} + static bool usb_match_any_interface(struct usb_device *udev, const struct usb_device_id *id) { diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h index cf4783cf661a..3ad0ee57e859 100644 --- a/drivers/usb/core/usb.h +++ b/drivers/usb/core/usb.h @@ -37,6 +37,9 @@ extern void usb_authorize_interface(struct usb_interface *); extern void usb_detect_quirks(struct usb_device *udev); extern void usb_detect_interface_quirks(struct usb_device *udev); extern void usb_release_quirk_list(void); +extern bool usb_endpoint_is_blacklisted(struct usb_device *udev, + struct usb_host_interface *intf, + struct usb_endpoint_descriptor *epd); extern int usb_remove_device(struct usb_device *udev); extern int usb_get_device_descriptor(struct usb_device *dev, diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index a1be64c9940f..22c1f579afe3 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -69,4 +69,7 @@ /* Hub needs extra delay after resetting its port. */ #define USB_QUIRK_HUB_SLOW_RESET BIT(14) +/* device has blacklisted endpoints */ +#define USB_QUIRK_ENDPOINT_BLACKLIST BIT(15) + #endif /* __LINUX_USB_QUIRKS_H */ -- cgit v1.2.3-59-g8ed1b From bdd1b147b8026df0e4260b387026b251d888ed01 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 3 Feb 2020 16:38:29 +0100 Subject: USB: quirks: blacklist duplicate ep on Sound Devices USBPre2 This device has a broken vendor-specific altsetting for interface 1, where endpoint 0x85 is declared as an isochronous endpoint despite being used by interface 2 for audio capture. Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 2.00 bDeviceClass 239 Miscellaneous Device bDeviceSubClass 2 bDeviceProtocol 1 Interface Association bMaxPacketSize0 64 idVendor 0x0926 idProduct 0x0202 bcdDevice 1.00 iManufacturer 1 Sound Devices iProduct 2 USBPre2 iSerial 3 [...] bNumConfigurations 1 [...] Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 1 bAlternateSetting 3 bNumEndpoints 2 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 0 bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x85 EP 5 IN bmAttributes 5 Transfer Type Isochronous Synch Type Asynchronous Usage Type Data wMaxPacketSize 0x0126 1x 294 bytes bInterval 1 [...] Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 2 bAlternateSetting 1 bNumEndpoints 1 bInterfaceClass 1 Audio bInterfaceSubClass 2 Streaming bInterfaceProtocol 0 iInterface 0 AudioStreaming Interface Descriptor: bLength 7 bDescriptorType 36 bDescriptorSubtype 1 (AS_GENERAL) bTerminalLink 4 bDelay 1 frames wFormatTag 0x0001 PCM AudioStreaming Interface Descriptor: bLength 26 bDescriptorType 36 bDescriptorSubtype 2 (FORMAT_TYPE) bFormatType 1 (FORMAT_TYPE_I) bNrChannels 2 bSubframeSize 2 bBitResolution 16 bSamFreqType 6 Discrete tSamFreq[ 0] 8000 tSamFreq[ 1] 16000 tSamFreq[ 2] 24000 tSamFreq[ 3] 32000 tSamFreq[ 4] 44100 tSamFreq[ 5] 48000 Endpoint Descriptor: bLength 9 bDescriptorType 5 bEndpointAddress 0x85 EP 5 IN bmAttributes 5 Transfer Type Isochronous Synch Type Asynchronous Usage Type Data wMaxPacketSize 0x0126 1x 294 bytes bInterval 4 bRefresh 0 bSynchAddress 0 AudioStreaming Endpoint Descriptor: bLength 7 bDescriptorType 37 bDescriptorSubtype 1 (EP_GENERAL) bmAttributes 0x01 Sampling Frequency bLockDelayUnits 2 Decoded PCM samples wLockDelay 0x0000 Since commit 3e4f8e21c4f2 ("USB: core: fix check for duplicate endpoints") USB core ignores any duplicate endpoints found during descriptor parsing, but in this case we need to ignore the first instance in order to avoid breaking the audio capture interface. Fixes: 3e4f8e21c4f2 ("USB: core: fix check for duplicate endpoints") Cc: stable Reported-by: edes Tested-by: edes Link: https://lore.kernel.org/r/20200201105829.5682c887@acme7.acmenet Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20200203153830.26394-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 56c8dffaf5f5..f27468966a3d 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -354,6 +354,10 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0904, 0x6103), .driver_info = USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL }, + /* Sound Devices USBPre2 */ + { USB_DEVICE(0x0926, 0x0202), .driver_info = + USB_QUIRK_ENDPOINT_BLACKLIST }, + /* Keytouch QWERTY Panel keyboard */ { USB_DEVICE(0x0926, 0x3333), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, @@ -479,6 +483,7 @@ static const struct usb_device_id usb_amd_resume_quirk_list[] = { * Matched for devices with USB_QUIRK_ENDPOINT_BLACKLIST. */ static const struct usb_device_id usb_endpoint_blacklist[] = { + { USB_DEVICE_INTERFACE_NUMBER(0x0926, 0x0202, 1), .driver_info = 0x85 }, { } }; -- cgit v1.2.3-59-g8ed1b From 7f1b92a6a7f2b96a8647a488370b9a851433df77 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 3 Feb 2020 16:38:30 +0100 Subject: USB: core: clean up endpoint-descriptor parsing Use the new usb-device pointer instead of back-casting when accessing the struct usb_device when parsing endpoints. Note that this introduces two lines that are longer than 80 chars on purpose. Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20200203153830.26394-4-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 7df22bcefa9d..b7918f695434 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -322,7 +322,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, j = 255; if (usb_endpoint_xfer_int(d)) { i = 1; - switch (to_usb_device(ddev)->speed) { + switch (udev->speed) { case USB_SPEED_SUPER_PLUS: case USB_SPEED_SUPER: case USB_SPEED_HIGH: @@ -343,8 +343,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, /* * This quirk fixes bIntervals reported in ms. */ - if (to_usb_device(ddev)->quirks & - USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL) { + if (udev->quirks & USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL) { n = clamp(fls(d->bInterval) + 3, i, j); i = j = n; } @@ -352,8 +351,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, * This quirk fixes bIntervals reported in * linear microframes. */ - if (to_usb_device(ddev)->quirks & - USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL) { + if (udev->quirks & USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL) { n = clamp(fls(d->bInterval), i, j); i = j = n; } @@ -370,7 +368,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, } else if (usb_endpoint_xfer_isoc(d)) { i = 1; j = 16; - switch (to_usb_device(ddev)->speed) { + switch (udev->speed) { case USB_SPEED_HIGH: n = 7; /* 8 ms = 2^(7-1) uframes */ break; @@ -392,8 +390,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, * explicitly forbidden by the USB spec. In an attempt to make * them usable, we will try treating them as Interrupt endpoints. */ - if (to_usb_device(ddev)->speed == USB_SPEED_LOW && - usb_endpoint_xfer_bulk(d)) { + if (udev->speed == USB_SPEED_LOW && usb_endpoint_xfer_bulk(d)) { dev_warn(ddev, "config %d interface %d altsetting %d " "endpoint 0x%X is Bulk; changing to Interrupt\n", cfgno, inum, asnum, d->bEndpointAddress); @@ -417,7 +414,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, /* Find the highest legal maxpacket size for this endpoint */ i = 0; /* additional transactions per microframe */ - switch (to_usb_device(ddev)->speed) { + switch (udev->speed) { case USB_SPEED_LOW: maxpacket_maxes = low_speed_maxpacket_maxes; break; @@ -453,8 +450,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, * maxpacket sizes other than 512. High speed HCDs may not * be able to handle that particular bug, so let's warn... */ - if (to_usb_device(ddev)->speed == USB_SPEED_HIGH - && usb_endpoint_xfer_bulk(d)) { + if (udev->speed == USB_SPEED_HIGH && usb_endpoint_xfer_bulk(d)) { if (maxp != 512) dev_warn(ddev, "config %d interface %d altsetting %d " "bulk endpoint 0x%X has invalid maxpacket %d\n", @@ -463,7 +459,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, } /* Parse a possible SuperSpeed endpoint companion descriptor */ - if (to_usb_device(ddev)->speed >= USB_SPEED_SUPER) + if (udev->speed >= USB_SPEED_SUPER) usb_parse_ss_endpoint_companion(ddev, cfgno, inum, asnum, endpoint, buffer, size); -- cgit v1.2.3-59-g8ed1b From 87c5cbf71ecbb9e289d60a2df22eb686c70bf196 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Fri, 7 Feb 2020 11:53:35 +0200 Subject: serial: ar933x_uart: set UART_CS_{RX,TX}_READY_ORIDE On AR934x this UART is usually not initialized by the bootloader as it is only used as a secondary serial port while the primary UART is a newly introduced NS16550-compatible. In order to make use of the ar933x-uart on AR934x without RTS/CTS hardware flow control, one needs to set the UART_CS_{RX,TX}_READY_ORIDE bits as other than on AR933x where this UART is used as primary/console, the bootloader on AR934x typically doesn't set those bits. Setting them explicitely on AR933x should not do any harm, so just set them unconditionally. Tested-by: Chuanhong Guo Signed-off-by: Daniel Golle Link: https://lore.kernel.org/r/20200207095335.GA179836@makrotopia.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/ar933x_uart.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c index 3bdd56a1021b..ea12f10610b6 100644 --- a/drivers/tty/serial/ar933x_uart.c +++ b/drivers/tty/serial/ar933x_uart.c @@ -286,6 +286,10 @@ static void ar933x_uart_set_termios(struct uart_port *port, ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, AR933X_UART_CS_HOST_INT_EN); + /* enable RX and TX ready overide */ + ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, + AR933X_UART_CS_TX_READY_ORIDE | AR933X_UART_CS_RX_READY_ORIDE); + /* reenable the UART */ ar933x_uart_rmw(up, AR933X_UART_CS_REG, AR933X_UART_CS_IF_MODE_M << AR933X_UART_CS_IF_MODE_S, @@ -418,6 +422,10 @@ static int ar933x_uart_startup(struct uart_port *port) ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, AR933X_UART_CS_HOST_INT_EN); + /* enable RX and TX ready overide */ + ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, + AR933X_UART_CS_TX_READY_ORIDE | AR933X_UART_CS_RX_READY_ORIDE); + /* Enable RX interrupts */ up->ier = AR933X_UART_INT_RX_VALID; ar933x_uart_write(up, AR933X_UART_INT_EN_REG, up->ier); -- cgit v1.2.3-59-g8ed1b From 0c5aae59270fb1f827acce182786094c9ccf598e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 10 Feb 2020 15:57:30 +0100 Subject: serdev: ttyport: restore client ops on deregistration The serdev tty-port controller driver should reset the tty-port client operations also on deregistration to avoid a NULL-pointer dereference in case the port is later re-registered as a normal tty device. Note that this can only happen with tty drivers such as 8250 which have statically allocated port structures that can end up being reused and where a later registration would not register a serdev controller (e.g. due to registration errors or if the devicetree has been changed in between). Specifically, this can be an issue for any statically defined ports that would be registered by 8250 core when an 8250 driver is being unbound. Fixes: bed35c6dfa6a ("serdev: add a tty port controller driver") Cc: stable # 4.11 Reported-by: Loic Poulain Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20200210145730.22762-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serdev/serdev-ttyport.c | 6 ++---- drivers/tty/tty_port.c | 5 +++-- include/linux/tty.h | 2 ++ 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serdev/serdev-ttyport.c b/drivers/tty/serdev/serdev-ttyport.c index d1cdd2ab8b4c..d367803e2044 100644 --- a/drivers/tty/serdev/serdev-ttyport.c +++ b/drivers/tty/serdev/serdev-ttyport.c @@ -265,7 +265,6 @@ struct device *serdev_tty_port_register(struct tty_port *port, struct device *parent, struct tty_driver *drv, int idx) { - const struct tty_port_client_operations *old_ops; struct serdev_controller *ctrl; struct serport *serport; int ret; @@ -284,7 +283,6 @@ struct device *serdev_tty_port_register(struct tty_port *port, ctrl->ops = &ctrl_ops; - old_ops = port->client_ops; port->client_ops = &client_ops; port->client_data = ctrl; @@ -297,7 +295,7 @@ struct device *serdev_tty_port_register(struct tty_port *port, err_reset_data: port->client_data = NULL; - port->client_ops = old_ops; + port->client_ops = &tty_port_default_client_ops; serdev_controller_put(ctrl); return ERR_PTR(ret); @@ -312,8 +310,8 @@ int serdev_tty_port_unregister(struct tty_port *port) return -ENODEV; serdev_controller_remove(ctrl); - port->client_ops = NULL; port->client_data = NULL; + port->client_ops = &tty_port_default_client_ops; serdev_controller_put(ctrl); return 0; diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c index 044c3cbdcfa4..ea80bf872f54 100644 --- a/drivers/tty/tty_port.c +++ b/drivers/tty/tty_port.c @@ -52,10 +52,11 @@ static void tty_port_default_wakeup(struct tty_port *port) } } -static const struct tty_port_client_operations default_client_ops = { +const struct tty_port_client_operations tty_port_default_client_ops = { .receive_buf = tty_port_default_receive_buf, .write_wakeup = tty_port_default_wakeup, }; +EXPORT_SYMBOL_GPL(tty_port_default_client_ops); void tty_port_init(struct tty_port *port) { @@ -68,7 +69,7 @@ void tty_port_init(struct tty_port *port) spin_lock_init(&port->lock); port->close_delay = (50 * HZ) / 100; port->closing_wait = (3000 * HZ) / 100; - port->client_ops = &default_client_ops; + port->client_ops = &tty_port_default_client_ops; kref_init(&port->kref); } EXPORT_SYMBOL(tty_port_init); diff --git a/include/linux/tty.h b/include/linux/tty.h index bfa4e2ee94a9..bd5fe0e907e8 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -225,6 +225,8 @@ struct tty_port_client_operations { void (*write_wakeup)(struct tty_port *port); }; +extern const struct tty_port_client_operations tty_port_default_client_ops; + struct tty_port { struct tty_bufhead buf; /* Locked internally */ struct tty_struct *tty; /* Back pointer */ -- cgit v1.2.3-59-g8ed1b From 04b5bfe3dc94e64d0590c54045815cb5183fb095 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Mon, 10 Feb 2020 16:20:53 +0100 Subject: tty/serial: atmel: manage shutdown in case of RS485 or ISO7816 mode In atmel_shutdown() we call atmel_stop_rx() and atmel_stop_tx() functions. Prevent the rx restart that is implemented in RS485 or ISO7816 modes when calling atmel_stop_tx() by using the atomic information tasklet_shutdown that is already in place for this purpose. Fixes: 98f2082c3ac4 ("tty/serial: atmel: enforce tasklet init and termination sequences") Signed-off-by: Nicolas Ferre Cc: stable Link: https://lore.kernel.org/r/20200210152053.8289-1-nicolas.ferre@microchip.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index c15c398c88a9..a39c87a7c2e1 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -570,7 +570,8 @@ static void atmel_stop_tx(struct uart_port *port) atmel_uart_writel(port, ATMEL_US_IDR, atmel_port->tx_done_mask); if (atmel_uart_is_half_duplex(port)) - atmel_start_rx(port); + if (!atomic_read(&atmel_port->tasklet_shutdown)) + atmel_start_rx(port); } -- cgit v1.2.3-59-g8ed1b From 1f69a1273b3f204a9c00dc3bbdcc4afcd0787428 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 9 Feb 2020 19:44:15 +0300 Subject: tty: serial: tegra: Handle RX transfer in PIO mode if DMA wasn't started It is possible to get an instant RX timeout or end-of-transfer interrupt before RX DMA was started, if transaction is less than 16 bytes. Transfer should be handled in PIO mode in this case because DMA can't handle it. This patch brings back the original behaviour of the driver that was changed by accident by a previous commit, it fixes occasional Bluetooth HW initialization failures which I started to notice recently. Fixes: d5e3fadb7012 ("tty: serial: tegra: Activate RX DMA transfer by request") Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20200209164415.9632-1-digetx@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial-tegra.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/drivers/tty/serial/serial-tegra.c b/drivers/tty/serial/serial-tegra.c index 33034b852a51..8de8bac9c6c7 100644 --- a/drivers/tty/serial/serial-tegra.c +++ b/drivers/tty/serial/serial-tegra.c @@ -692,11 +692,22 @@ static void tegra_uart_copy_rx_to_tty(struct tegra_uart_port *tup, count, DMA_TO_DEVICE); } +static void do_handle_rx_pio(struct tegra_uart_port *tup) +{ + struct tty_struct *tty = tty_port_tty_get(&tup->uport.state->port); + struct tty_port *port = &tup->uport.state->port; + + tegra_uart_handle_rx_pio(tup, port); + if (tty) { + tty_flip_buffer_push(port); + tty_kref_put(tty); + } +} + static void tegra_uart_rx_buffer_push(struct tegra_uart_port *tup, unsigned int residue) { struct tty_port *port = &tup->uport.state->port; - struct tty_struct *tty = tty_port_tty_get(port); unsigned int count; async_tx_ack(tup->rx_dma_desc); @@ -705,11 +716,7 @@ static void tegra_uart_rx_buffer_push(struct tegra_uart_port *tup, /* If we are here, DMA is stopped */ tegra_uart_copy_rx_to_tty(tup, port, count); - tegra_uart_handle_rx_pio(tup, port); - if (tty) { - tty_flip_buffer_push(port); - tty_kref_put(tty); - } + do_handle_rx_pio(tup); } static void tegra_uart_rx_dma_complete(void *args) @@ -749,8 +756,10 @@ static void tegra_uart_terminate_rx_dma(struct tegra_uart_port *tup) { struct dma_tx_state state; - if (!tup->rx_dma_active) + if (!tup->rx_dma_active) { + do_handle_rx_pio(tup); return; + } dmaengine_terminate_all(tup->rx_dma_chan); dmaengine_tx_status(tup->rx_dma_chan, tup->rx_cookie, &state); @@ -816,18 +825,6 @@ static void tegra_uart_handle_modem_signal_change(struct uart_port *u) uart_handle_cts_change(&tup->uport, msr & UART_MSR_CTS); } -static void do_handle_rx_pio(struct tegra_uart_port *tup) -{ - struct tty_struct *tty = tty_port_tty_get(&tup->uport.state->port); - struct tty_port *port = &tup->uport.state->port; - - tegra_uart_handle_rx_pio(tup, port); - if (tty) { - tty_flip_buffer_push(port); - tty_kref_put(tty); - } -} - static irqreturn_t tegra_uart_isr(int irq, void *data) { struct tegra_uart_port *tup = data; -- cgit v1.2.3-59-g8ed1b From ae7fce069bd7c8a54d920692f93f1d4eff2bff04 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Wed, 5 Feb 2020 00:16:27 +0000 Subject: Documentation/process: Add Arm contact for embargoed HW issues Adding myself to list after getting voluntold Cc: Catalin Marinas Signed-off-by: Grant Likely Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20200205001627.27356-1-grant.likely@arm.com Signed-off-by: Greg Kroah-Hartman --- Documentation/process/embargoed-hardware-issues.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index 33edae654599..572da66be43c 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -244,7 +244,7 @@ disclosure of a particular issue, unless requested by a response team or by an involved disclosed party. The current ambassadors list: ============= ======================================================== - ARM + ARM Grant Likely AMD Tom Lendacky IBM Intel Tony Luck -- cgit v1.2.3-59-g8ed1b From 485d5b75980dd6c62e02522a9e8c09b5d5529e76 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 5 Feb 2020 12:25:51 +0000 Subject: embargoed-hardware-issues: drop Amazon contact as the email address now bounces Peter's email address bounces, so remove him as the contact for Amazon. Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20200205122551.GA1185549@kroah.com Signed-off-by: Greg Kroah-Hartman --- Documentation/process/embargoed-hardware-issues.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index 572da66be43c..e405b4659d84 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -260,7 +260,7 @@ an involved disclosed party. The current ambassadors list: Red Hat Josh Poimboeuf SUSE Jiri Kosina - Amazon Peter Bowen + Amazon Google Kees Cook ============= ======================================================== -- cgit v1.2.3-59-g8ed1b From 4bc4f8128c48511b93e7285979a46cf0682cbb2f Mon Sep 17 00:00:00 2001 From: James Morris Date: Thu, 6 Feb 2020 10:08:34 +1100 Subject: Documentation/process: Change Microsoft contact for embargoed hardware issues Update Microsoft contact from Sasha to James. Cc: Sasha Levin Signed-off-by: James Morris Link: https://lore.kernel.org/r/alpine.LRH.2.21.2002061006350.22130@namei.org Signed-off-by: Greg Kroah-Hartman --- Documentation/process/embargoed-hardware-issues.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index e405b4659d84..64f375c02358 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -250,7 +250,7 @@ an involved disclosed party. The current ambassadors list: Intel Tony Luck Qualcomm Trilok Soni - Microsoft Sasha Levin + Microsoft James Morris VMware Xen Andrew Cooper -- cgit v1.2.3-59-g8ed1b From 74835c7db0322b6eddf091b8b062f127b8999a0a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 6 Feb 2020 16:48:00 +0100 Subject: COPYING: state that all contributions really are covered by this file Explicitly state that all contributions to the kernel source tree really are covered under this COPYING file in case someone thought otherwise. Lawyers love to be pedantic, even more so than software engineers at times, and this sentence makes them sleep easier. Reviewed-by: Thomas Gleixner Acked-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20200206154800.GA3754085@kroah.com Signed-off-by: Greg Kroah-Hartman --- COPYING | 2 ++ 1 file changed, 2 insertions(+) diff --git a/COPYING b/COPYING index da4cb28febe6..a635a38ef940 100644 --- a/COPYING +++ b/COPYING @@ -16,3 +16,5 @@ In addition, other licenses may also apply. Please see: Documentation/process/license-rules.rst for more details. + +All contributions to the Linux Kernel are subject to this COPYING file. -- cgit v1.2.3-59-g8ed1b From ea3d147a474cb522bfdfe68f1f2557750dcf41dd Mon Sep 17 00:00:00 2001 From: Brendan Higgins Date: Fri, 31 Jan 2020 14:18:32 +1030 Subject: fsi: aspeed: add unspecified HAS_IOMEM dependency Currently CONFIG_FSI_MASTER_ASPEED=y implicitly depends on CONFIG_HAS_IOMEM=y; consequently, on architectures without IOMEM we get the following build error: ld: drivers/fsi/fsi-master-aspeed.o: in function `fsi_master_aspeed_probe': drivers/fsi/fsi-master-aspeed.c:436: undefined reference to `devm_ioremap_resource' Fix the build error by adding the unspecified dependency. Fixes: 606397d67f41 ("fsi: Add ast2600 master driver") Cc: stable@vger.kernel.org Reported-by: Brendan Higgins Signed-off-by: Brendan Higgins Reviewed-by: Joel Stanley Signed-off-by: Joel Stanley Link: https://lore.kernel.org/r/20200131034832.294268-1-joel@jms.id.au Signed-off-by: Greg Kroah-Hartman --- drivers/fsi/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/fsi/Kconfig b/drivers/fsi/Kconfig index 92ce6d85802c..4cc0e630ab79 100644 --- a/drivers/fsi/Kconfig +++ b/drivers/fsi/Kconfig @@ -55,6 +55,7 @@ config FSI_MASTER_AST_CF config FSI_MASTER_ASPEED tristate "FSI ASPEED master" + depends on HAS_IOMEM help This option enables a FSI master that is present behind an OPB bridge in the AST2600. -- cgit v1.2.3-59-g8ed1b From 3f4ef485be9d54040b695f32ec76d0f1ea50bbf3 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 28 Jan 2020 12:50:33 -0500 Subject: vt: fix scrollback flushing on background consoles Commit a6dbe4427559 ("vt: perform safe console erase in the right order") provided fixes to an earlier commit by gathering all console scrollback flushing operations in a function of its own. This includes the invocation of vc_sw->con_switch() as previously done through a update_screen() call. That commit failed to carry over the con_is_visible() conditional though, as well as cursor handling, which caused problems when "\e[3J" was written to a background console. One could argue for preserving the call to update_screen(). However this does far more than we need, and it is best to remove scrollback assumptions from it. Instead let's gather the minimum needed to actually perform scrollback flushing properly in that one place. While at it, let's document the vc_sw->con_switch() side effect being relied upon. Signed-off-by: Nicolas Pitre Reported-and-tested-by: Lukas Wunner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/nycvar.YSQ.7.76.2001281205560.1655@knanqh.ubzr Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 35d21cdb60d0..0cfbb7182b5a 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -936,10 +936,21 @@ static void flush_scrollback(struct vc_data *vc) WARN_CONSOLE_UNLOCKED(); set_origin(vc); - if (vc->vc_sw->con_flush_scrollback) + if (vc->vc_sw->con_flush_scrollback) { vc->vc_sw->con_flush_scrollback(vc); - else + } else if (con_is_visible(vc)) { + /* + * When no con_flush_scrollback method is provided then the + * legacy way for flushing the scrollback buffer is to use + * a side effect of the con_switch method. We do it only on + * the foreground console as background consoles have no + * scrollback buffers in that case and we obviously don't + * want to switch to them. + */ + hide_cursor(vc); vc->vc_sw->con_switch(vc); + set_cursor(vc); + } } /* -- cgit v1.2.3-59-g8ed1b From f2e97dc126b712c0d21219ed0c42710006c1cf52 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 9 Feb 2020 21:44:37 -0800 Subject: bpf: Selftests build error in sockmap_basic.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix following build error. We could push a tcp.h header into one of the include paths, but I think its easy enough to simply pull in the three defines we need here. If we end up using more of tcp.h at some point we can pull it in later. /home/john/git/bpf/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c: In function ‘connected_socket_v4’: /home/john/git/bpf/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c:20:11: error: ‘TCP_REPAIR_ON’ undeclared (first use in this function) repair = TCP_REPAIR_ON; ^ /home/john/git/bpf/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c:20:11: note: each undeclared identifier is reported only once for each function it appears in /home/john/git/bpf/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c:29:11: error: ‘TCP_REPAIR_OFF_NO_WP’ undeclared (first use in this function) repair = TCP_REPAIR_OFF_NO_WP; Then with fix, $ ./test_progs -n 44 #44/1 sockmap create_update_free:OK #44/2 sockhash create_update_free:OK #44 sockmap_basic:OK Fixes: 5d3919a953c3c ("selftests/bpf: Test freeing sockmap/sockhash with a socket in it") Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/158131347731.21414.12120493483848386652.stgit@john-Precision-5820-Tower --- tools/testing/selftests/bpf/prog_tests/sockmap_basic.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 07f5b462c2ef..aa43e0bd210c 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -3,6 +3,11 @@ #include "test_progs.h" +#define TCP_REPAIR 19 /* TCP sock is under repair right now */ + +#define TCP_REPAIR_ON 1 +#define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */ + static int connected_socket_v4(void) { struct sockaddr_in addr = { -- cgit v1.2.3-59-g8ed1b From 2bf0eb9b3b0d099b20b2c4736436b666d78b94d5 Mon Sep 17 00:00:00 2001 From: Hongbo Yao Date: Mon, 10 Feb 2020 09:14:41 +0800 Subject: bpf: Make btf_check_func_type_match() static Fix the following sparse warning: kernel/bpf/btf.c:4131:5: warning: symbol 'btf_check_func_type_match' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: Hongbo Yao Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200210011441.147102-1-yaohongbo@huawei.com --- kernel/bpf/btf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 805c43b083e9..787140095e58 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -4142,9 +4142,9 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, * EFAULT - verifier bug * 0 - 99% match. The last 1% is validated by the verifier. */ -int btf_check_func_type_match(struct bpf_verifier_log *log, - struct btf *btf1, const struct btf_type *t1, - struct btf *btf2, const struct btf_type *t2) +static int btf_check_func_type_match(struct bpf_verifier_log *log, + struct btf *btf1, const struct btf_type *t1, + struct btf *btf2, const struct btf_type *t2) { const struct btf_param *args1, *args2; const char *fn1, *fn2, *s1, *s2; -- cgit v1.2.3-59-g8ed1b From 95ba79e89c107851bad4492ca23e9b9c399b8592 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 30 Jan 2020 14:55:15 +0100 Subject: MAINTAINERS: remove unnecessary ':' characters Commit e567cb3fef30 ("MAINTAINERS: add an entry for kfifo") added a new entry to MAINTAINERS. Following the example of the previous entry on the list I added a trailing ':' character at the end of the title line. This however results in rather strange looking output from scripts/get_maintainer.pl: $ ./scripts/get_maintainer.pl ./0001-kfifo.patch Stefani Seibold (maintainer:KFIFO:) linux-kernel@vger.kernel.org (open list) It turns out there are more entries like this. Fix the entire file by removing all trailing colons. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20200130135515.30359-1-brgl@bgdev.pl Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 38fe2f3f7b6f..0e7e88879337 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3909,7 +3909,7 @@ S: Supported F: Documentation/filesystems/ceph.txt F: fs/ceph/ -CERTIFICATE HANDLING: +CERTIFICATE HANDLING M: David Howells M: David Woodhouse L: keyrings@vger.kernel.org @@ -3919,7 +3919,7 @@ F: certs/ F: scripts/sign-file.c F: scripts/extract-cert.c -CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM: +CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM L: devel@driverdev.osuosl.org S: Obsolete F: drivers/staging/wusbcore/ @@ -7047,7 +7047,7 @@ L: kvm@vger.kernel.org S: Supported F: drivers/uio/uio_pci_generic.c -GENERIC VDSO LIBRARY: +GENERIC VDSO LIBRARY M: Andy Lutomirski M: Thomas Gleixner M: Vincenzo Frascino @@ -9278,7 +9278,7 @@ F: include/keys/trusted-type.h F: security/keys/trusted.c F: include/keys/trusted.h -KEYS/KEYRINGS: +KEYS/KEYRINGS M: David Howells M: Jarkko Sakkinen L: keyrings@vger.kernel.org @@ -11484,7 +11484,7 @@ F: drivers/scsi/mac_scsi.* F: drivers/scsi/sun3_scsi.* F: drivers/scsi/sun3_scsi_vme.c -NCSI LIBRARY: +NCSI LIBRARY M: Samuel Mendoza-Jonas S: Maintained F: net/ncsi/ @@ -13512,7 +13512,7 @@ L: linuxppc-dev@lists.ozlabs.org S: Maintained F: drivers/block/ps3vram.c -PSAMPLE PACKET SAMPLING SUPPORT: +PSAMPLE PACKET SAMPLING SUPPORT M: Yotam Gigi S: Maintained F: net/psample @@ -17080,7 +17080,7 @@ S: Maintained F: Documentation/admin-guide/ufs.rst F: fs/ufs/ -UHID USERSPACE HID IO DRIVER: +UHID USERSPACE HID IO DRIVER M: David Herrmann L: linux-input@vger.kernel.org S: Maintained @@ -17094,18 +17094,18 @@ S: Maintained F: drivers/usb/common/ulpi.c F: include/linux/ulpi/ -ULTRA-WIDEBAND (UWB) SUBSYSTEM: +ULTRA-WIDEBAND (UWB) SUBSYSTEM L: devel@driverdev.osuosl.org S: Obsolete F: drivers/staging/uwb/ -UNICODE SUBSYSTEM: +UNICODE SUBSYSTEM M: Gabriel Krisman Bertazi L: linux-fsdevel@vger.kernel.org S: Supported F: fs/unicode/ -UNICORE32 ARCHITECTURE: +UNICORE32 ARCHITECTURE M: Guan Xuetao W: http://mprc.pku.edu.cn/~guanxuetao/linux S: Maintained @@ -17791,7 +17791,7 @@ F: include/linux/vbox_utils.h F: include/uapi/linux/vbox*.h F: drivers/virt/vboxguest/ -VIRTUAL BOX SHARED FOLDER VFS DRIVER: +VIRTUAL BOX SHARED FOLDER VFS DRIVER M: Hans de Goede L: linux-fsdevel@vger.kernel.org S: Maintained -- cgit v1.2.3-59-g8ed1b From e20d8e81a0e06c672e964c9f01100f07a64b1ce6 Mon Sep 17 00:00:00 2001 From: Brendan Higgins Date: Fri, 31 Jan 2020 16:01:02 -0800 Subject: Documentation: kunit: fixed sphinx error in code block Fix a missing newline in a code block that was causing a warning: Documentation/dev-tools/kunit/usage.rst:553: WARNING: Error in "code-block" directive: maximum 1 argument(s) allowed, 3 supplied. .. code-block:: bash modprobe example-test Signed-off-by: Brendan Higgins Reviewed-by: Alan Maguire Signed-off-by: Shuah Khan --- Documentation/dev-tools/kunit/usage.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst index 7cd56a1993b1..607758a66a99 100644 --- a/Documentation/dev-tools/kunit/usage.rst +++ b/Documentation/dev-tools/kunit/usage.rst @@ -551,6 +551,7 @@ options to your ``.config``: Once the kernel is built and installed, a simple .. code-block:: bash + modprobe example-test ...will run the tests. -- cgit v1.2.3-59-g8ed1b From 7714d469dcba572bbfb9cc47217fed7e7ddeb051 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 16 Jan 2020 09:29:03 +0000 Subject: selftests: fix spelling mistaked "chaigned" -> "chained" There is a spelling mistake in a literal string, fix it. Signed-off-by: Colin Ian King Reviewed-by: Aleksa Sarai Signed-off-by: Shuah Khan --- tools/testing/selftests/openat2/resolve_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/openat2/resolve_test.c b/tools/testing/selftests/openat2/resolve_test.c index 7a94b1da8e7b..bbafad440893 100644 --- a/tools/testing/selftests/openat2/resolve_test.c +++ b/tools/testing/selftests/openat2/resolve_test.c @@ -230,7 +230,7 @@ void test_openat2_opath_tests(void) { .name = "[in_root] garbage link to /root", .path = "cheeky/garbageself", .how.resolve = RESOLVE_IN_ROOT, .out.path = "root", .pass = true }, - { .name = "[in_root] chainged garbage links to /root", + { .name = "[in_root] chained garbage links to /root", .path = "abscheeky/garbageself", .how.resolve = RESOLVE_IN_ROOT, .out.path = "root", .pass = true }, { .name = "[in_root] relative path to 'root'", -- cgit v1.2.3-59-g8ed1b From a098d9c82a0bb2f91e5cf6c780859bc00c15e1e4 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 30 Jan 2020 21:45:27 -0500 Subject: selftests/ftrace: Have pid filter test use instance flag While running the ftracetests, the pid filter test failed because the instance "foo" existed, and it was using it to rerun the test under a instance named foo. The collision caused the test to fail as the mkdir failed as the name already existed. As of commit b5b77be812de7 ("selftests: ftrace: Allow some tests to be run in a tracing instance") all a selftest needs to do to be tested in an instance is to set the "instance" flag. There's no reason a selftest needs to create an instance to run its test in an instance directly. Remove the open coded testing in an instance for the pid filter test and have it set the "instance" flag instead. Signed-off-by: Steven Rostedt (VMware) Acked-by: Masami Hiramatsu Signed-off-by: Shuah Khan --- tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc index 64cfcc75e3c1..f2ee1e889e13 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: ftrace - function pid filters +# flags: instance # Make sure that function pid matching filter works. # Also test it on an instance directory @@ -96,13 +97,6 @@ do_test() { } do_test - -mkdir instances/foo -cd instances/foo -do_test -cd ../../ -rmdir instances/foo - do_reset exit 0 -- cgit v1.2.3-59-g8ed1b From b32694cd0724d4ceca2c62cc7c3d3a8d1ffa11fc Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Fri, 31 Jan 2020 18:25:23 +0300 Subject: Kernel selftests: tpm2: check for tpm support tpm2 tests set fails if there is no /dev/tpm0 and /dev/tpmrm0 supported. Check if these files exist before run and mark test as skipped in case of absence. Signed-off-by: Nikita Sobolev Signed-off-by: Shuah Khan --- tools/testing/selftests/tpm2/test_smoke.sh | 13 +++++++++++-- tools/testing/selftests/tpm2/test_space.sh | 9 ++++++++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/tpm2/test_smoke.sh b/tools/testing/selftests/tpm2/test_smoke.sh index 8155c2ea7ccb..b630c7b5950a 100755 --- a/tools/testing/selftests/tpm2/test_smoke.sh +++ b/tools/testing/selftests/tpm2/test_smoke.sh @@ -1,8 +1,17 @@ #!/bin/bash # SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +self.flags = flags -python -m unittest -v tpm2_tests.SmokeTest -python -m unittest -v tpm2_tests.AsyncTest +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + + +if [ -f /dev/tpm0 ] ; then + python -m unittest -v tpm2_tests.SmokeTest + python -m unittest -v tpm2_tests.AsyncTest +else + exit $ksft_skip +fi CLEAR_CMD=$(which tpm2_clear) if [ -n $CLEAR_CMD ]; then diff --git a/tools/testing/selftests/tpm2/test_space.sh b/tools/testing/selftests/tpm2/test_space.sh index a6f5e346635e..180b469c53b4 100755 --- a/tools/testing/selftests/tpm2/test_space.sh +++ b/tools/testing/selftests/tpm2/test_space.sh @@ -1,4 +1,11 @@ #!/bin/bash # SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) -python -m unittest -v tpm2_tests.SpaceTest +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +if [ -f /dev/tpmrm0 ] ; then + python -m unittest -v tpm2_tests.SpaceTest +else + exit $ksft_skip +fi -- cgit v1.2.3-59-g8ed1b From 9d235a558c689b0ecdd23bbd8beb2e0584f619ed Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 6 Feb 2020 09:40:00 +0100 Subject: selftests: allow detection of build failures Commit 5f70bde26a48 ("selftests: fix build behaviour on targets' failures") added a logic to track failure of builds of individual targets. However, it does exactly the opposite of what a distro kernel needs: we create a RPM package with a selected set of selftests and we need the build to fail if build of any of the targets fail. Both use cases are valid. A distribution kernel is in control of what is included in the kernel and what is being built; any error needs to be flagged and acted upon. A CI system that tries to build as many tests as possible on the best effort basis is not really interested in a failure here and there. Support both use cases by introducing a FORCE_TARGETS variable. It is switched off by default to make life for CI systems easier, distributions can easily switch it on while building their packages. Reported-by: Yauheni Kaliuta Signed-off-by: Jiri Benc Reviewed-by: Cristian Marussi Tested-by: Cristian Marussi Signed-off-by: Shuah Khan --- tools/testing/selftests/Makefile | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 63430e2664c2..6ec503912bea 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -77,6 +77,12 @@ ifneq ($(SKIP_TARGETS),) override TARGETS := $(TMP) endif +# User can set FORCE_TARGETS to 1 to require all targets to be successfully +# built; make will fail if any of the targets cannot be built. If +# FORCE_TARGETS is not set (the default), make will succeed if at least one +# of the targets gets built. +FORCE_TARGETS ?= + # Clear LDFLAGS and MAKEFLAGS if called from main # Makefile to avoid test build failures when test # Makefile doesn't have explicit build rules. @@ -151,7 +157,8 @@ all: khdr for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ mkdir $$BUILD_TARGET -p; \ - $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET; \ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET \ + $(if $(FORCE_TARGETS),|| exit); \ ret=$$((ret * $$?)); \ done; exit $$ret; @@ -205,7 +212,8 @@ ifdef INSTALL_PATH @ret=1; \ for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install \ + $(if $(FORCE_TARGETS),|| exit); \ ret=$$((ret * $$?)); \ done; exit $$ret; -- cgit v1.2.3-59-g8ed1b From c363eb48ada5cf732b3f489fab799fc881097842 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 6 Feb 2020 09:40:52 +0100 Subject: selftests: fix too long argument With some shells, the command construed for install of bpf selftests becomes too large due to long list of files: make[1]: execvp: /bin/sh: Argument list too long make[1]: *** [../lib.mk:73: install] Error 127 Currently, each of the file lists is replicated three times in the command: in the shell 'if' condition, in the 'echo' and in the 'rsync'. Reduce that by one instance by using make conditionals and separate the echo and rsync into two shell commands. (One would be inclined to just remove the '@' at the beginning of the rsync command and let 'make' echo it by itself; unfortunately, it appears that the '@' in the front of mkdir silences output also for the following commands.) Also, separate handling of each of the lists to its own shell command. The semantics of the makefile is unchanged before and after the patch. The ability of individual test directories to override INSTALL_RULE is retained. Reported-by: Yauheni Kaliuta Tested-by: Yauheni Kaliuta Signed-off-by: Jiri Benc Signed-off-by: Shuah Khan --- tools/testing/selftests/lib.mk | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 1c8a1963d03f..3ed0134a764d 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -83,17 +83,20 @@ else $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS)) endif +define INSTALL_SINGLE_RULE + $(if $(INSTALL_LIST),@mkdir -p $(INSTALL_PATH)) + $(if $(INSTALL_LIST),@echo rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/) + $(if $(INSTALL_LIST),@rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/) +endef + define INSTALL_RULE - @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \ - mkdir -p ${INSTALL_PATH}; \ - echo "rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/"; \ - rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/; \ - fi - @if [ "X$(TEST_GEN_PROGS)$(TEST_CUSTOM_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then \ - mkdir -p ${INSTALL_PATH}; \ - echo "rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/"; \ - rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/; \ - fi + $(eval INSTALL_LIST = $(TEST_PROGS)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_FILES)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_GEN_PROGS)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_CUSTOM_PROGS)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_GEN_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_GEN_FILES)) $(INSTALL_SINGLE_RULE) endef install: all -- cgit v1.2.3-59-g8ed1b From 5ee858975b13a9b40db00f456989a689fdbb296c Mon Sep 17 00:00:00 2001 From: Anurag Kumar Vulisha Date: Mon, 27 Jan 2020 19:30:46 +0000 Subject: usb: dwc3: gadget: Check for IOC/LST bit in TRB->ctrl fields The current code in dwc3_gadget_ep_reclaim_completed_trb() will check for IOC/LST bit in the event->status and returns if IOC/LST bit is set. This logic doesn't work if multiple TRBs are queued per request and the IOC/LST bit is set on the last TRB of that request. Consider an example where a queued request has multiple queued TRBs and IOC/LST bit is set only for the last TRB. In this case, the core generates XferComplete/XferInProgress events only for the last TRB (since IOC/LST are set only for the last TRB). As per the logic in dwc3_gadget_ep_reclaim_completed_trb() event->status is checked for IOC/LST bit and returns on the first TRB. This leaves the remaining TRBs left unhandled. Similarly, if the gadget function enqueues an unaligned request with sglist already in it, it should fail the same way, since we will append another TRB to something that already uses more than one TRB. To aviod this, this patch changes the code to check for IOC/LST bits in TRB->ctrl instead. At a practical level, this patch resolves USB transfer stalls seen with adb on dwc3 based HiKey960 after functionfs gadget added scatter-gather support around v4.20. Cc: Felipe Balbi Cc: Yang Fei Cc: Thinh Nguyen Cc: Tejas Joglekar Cc: Andrzej Pietrasiewicz Cc: Jack Pham Cc: Todd Kjos Cc: Greg KH Cc: Linux USB List Cc: stable Tested-by: Tejas Joglekar Reviewed-by: Thinh Nguyen Signed-off-by: Anurag Kumar Vulisha [jstultz: forward ported to mainline, reworded commit log, reworked to only check trb->ctrl as suggested by Felipe] Signed-off-by: John Stultz Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 1b8014ab0b25..1b7d2f9cb673 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2429,7 +2429,8 @@ static int dwc3_gadget_ep_reclaim_completed_trb(struct dwc3_ep *dep, if (event->status & DEPEVT_STATUS_SHORT && !chain) return 1; - if (event->status & DEPEVT_STATUS_IOC) + if ((trb->ctrl & DWC3_TRB_CTRL_IOC) || + (trb->ctrl & DWC3_TRB_CTRL_LST)) return 1; return 0; -- cgit v1.2.3-59-g8ed1b From 904967c60d87393a3708fed2324b684cdb79b1ee Mon Sep 17 00:00:00 2001 From: John Keeping Date: Fri, 17 Jan 2020 10:40:22 +0000 Subject: usb: gadget: u_audio: Fix high-speed max packet size Prior to commit eb9fecb9e69b ("usb: gadget: f_uac2: split out audio core") the maximum packet size was calculated only from the high-speed descriptor but now we use the largest of the full-speed and high-speed descriptors. This is correct, but the full-speed value is likely to be higher than that for high-speed and this leads to submitting requests for OUT transfers (received by the gadget) which are larger than the endpoint's maximum packet size. These are rightly rejected by the gadget core. config_ep_by_speed() already sets up the correct maximum packet size for the enumerated speed in the usb_ep structure, so we can simply use this instead of the overall value that has been used to allocate buffers for requests. Note that the minimum period for ALSA is still set from the largest value, and this is unavoidable because it's possible to open the audio device before the gadget has been enumerated. Tested-by: Pavel Hofman Signed-off-by: John Keeping Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/u_audio.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/function/u_audio.c b/drivers/usb/gadget/function/u_audio.c index 6d956f190f5a..e6d32c536781 100644 --- a/drivers/usb/gadget/function/u_audio.c +++ b/drivers/usb/gadget/function/u_audio.c @@ -361,7 +361,7 @@ int u_audio_start_capture(struct g_audio *audio_dev) ep = audio_dev->out_ep; prm = &uac->c_prm; config_ep_by_speed(gadget, &audio_dev->func, ep); - req_len = prm->max_psize; + req_len = ep->maxpacket; prm->ep_enabled = true; usb_ep_enable(ep); @@ -379,7 +379,7 @@ int u_audio_start_capture(struct g_audio *audio_dev) req->context = &prm->ureq[i]; req->length = req_len; req->complete = u_audio_iso_complete; - req->buf = prm->rbuf + i * prm->max_psize; + req->buf = prm->rbuf + i * ep->maxpacket; } if (usb_ep_queue(ep, prm->ureq[i].req, GFP_ATOMIC)) @@ -430,9 +430,9 @@ int u_audio_start_playback(struct g_audio *audio_dev) uac->p_pktsize = min_t(unsigned int, uac->p_framesize * (params->p_srate / uac->p_interval), - prm->max_psize); + ep->maxpacket); - if (uac->p_pktsize < prm->max_psize) + if (uac->p_pktsize < ep->maxpacket) uac->p_pktsize_residue = uac->p_framesize * (params->p_srate % uac->p_interval); else @@ -457,7 +457,7 @@ int u_audio_start_playback(struct g_audio *audio_dev) req->context = &prm->ureq[i]; req->length = req_len; req->complete = u_audio_iso_complete; - req->buf = prm->rbuf + i * prm->max_psize; + req->buf = prm->rbuf + i * ep->maxpacket; } if (usb_ep_queue(ep, prm->ureq[i].req, GFP_ATOMIC)) -- cgit v1.2.3-59-g8ed1b From c724417baf162bd3e035659e22cdf990cfb0d917 Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Thu, 30 Jan 2020 19:10:35 -0800 Subject: usb: gadget: composite: Fix bMaxPower for SuperSpeedPlus SuperSpeedPlus peripherals must report their bMaxPower of the configuration descriptor in units of 8mA as per the USB 3.2 specification. The current switch statement in encode_bMaxPower() only checks for USB_SPEED_SUPER but not USB_SPEED_SUPER_PLUS so the latter falls back to USB 2.0 encoding which uses 2mA units. Replace the switch with a simple if/else. Fixes: eae5820b852f ("usb: gadget: composite: Write SuperSpeedPlus config descriptors") Signed-off-by: Jack Pham Signed-off-by: Felipe Balbi --- drivers/usb/gadget/composite.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 3b4f67000315..cd303a3ea680 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -437,12 +437,10 @@ static u8 encode_bMaxPower(enum usb_device_speed speed, val = CONFIG_USB_GADGET_VBUS_DRAW; if (!val) return 0; - switch (speed) { - case USB_SPEED_SUPER: - return DIV_ROUND_UP(val, 8); - default: + if (speed < USB_SPEED_SUPER) return DIV_ROUND_UP(val, 2); - } + else + return DIV_ROUND_UP(val, 8); } static int config_buf(struct usb_configuration *config, -- cgit v1.2.3-59-g8ed1b From a2035411fa1d1206cea7d5dfe833e78481844a76 Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Thu, 30 Jan 2020 19:10:36 -0800 Subject: usb: gadget: composite: Support more than 500mA MaxPower USB 3.x SuperSpeed peripherals can draw up to 900mA of VBUS power when in configured state. However, if a configuration wanting to take advantage of this is added with MaxPower greater than 500 (currently possible if using a ConfigFS gadget) the composite driver fails to accommodate this for a couple reasons: - usb_gadget_vbus_draw() when called from set_config() and composite_resume() will be passed the MaxPower value without regard for the current connection speed, resulting in a violation for USB 2.0 since the max is 500mA. - the bMaxPower of the configuration descriptor would be incorrectly encoded, again if the connection speed is only at USB 2.0 or below, likely wrapping around U8_MAX since the 2mA multiplier corresponds to a maximum of 510mA. Fix these by adding checks against the current gadget->speed when the c->MaxPower value is used (set_config() and composite_resume()) and appropriately limit based on whether it is currently at a low-/full-/high- or super-speed connection. Because 900 is not divisible by 8, with the round-up division currently used in encode_bMaxPower() a MaxPower of 900mA will result in an encoded value of 0x71. When a host stack (including Linux and Windows) enumerates this on a single port root hub, it reads this value back and decodes (multiplies by 8) to get 904mA which is strictly greater than 900mA that is typically budgeted for that port, causing it to reject the configuration. Instead, we should be using the round-down behavior of normal integral division so that 900 / 8 -> 0x70 or 896mA to stay within range. And we might as well change it for the high/full/low case as well for consistency. N.B. USB 3.2 Gen N x 2 allows for up to 1500mA but there doesn't seem to be any any peripheral controller supported by Linux that does two lane operation, so for now keeping the clamp at 900 should be fine. Signed-off-by: Jack Pham Signed-off-by: Felipe Balbi --- drivers/usb/gadget/composite.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index cd303a3ea680..223f72d4d9ed 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -438,9 +438,13 @@ static u8 encode_bMaxPower(enum usb_device_speed speed, if (!val) return 0; if (speed < USB_SPEED_SUPER) - return DIV_ROUND_UP(val, 2); + return min(val, 500U) / 2; else - return DIV_ROUND_UP(val, 8); + /* + * USB 3.x supports up to 900mA, but since 900 isn't divisible + * by 8 the integral division will effectively cap to 896mA. + */ + return min(val, 900U) / 8; } static int config_buf(struct usb_configuration *config, @@ -852,6 +856,10 @@ static int set_config(struct usb_composite_dev *cdev, /* when we return, be sure our power usage is valid */ power = c->MaxPower ? c->MaxPower : CONFIG_USB_GADGET_VBUS_DRAW; + if (gadget->speed < USB_SPEED_SUPER) + power = min(power, 500U); + else + power = min(power, 900U); done: usb_gadget_vbus_draw(gadget, power); if (result >= 0 && cdev->delayed_status) @@ -2278,7 +2286,7 @@ void composite_resume(struct usb_gadget *gadget) { struct usb_composite_dev *cdev = get_gadget_data(gadget); struct usb_function *f; - u16 maxpower; + unsigned maxpower; /* REVISIT: should we have config level * suspend/resume callbacks? @@ -2292,10 +2300,14 @@ void composite_resume(struct usb_gadget *gadget) f->resume(f); } - maxpower = cdev->config->MaxPower; + maxpower = cdev->config->MaxPower ? + cdev->config->MaxPower : CONFIG_USB_GADGET_VBUS_DRAW; + if (gadget->speed < USB_SPEED_SUPER) + maxpower = min(maxpower, 500U); + else + maxpower = min(maxpower, 900U); - usb_gadget_vbus_draw(gadget, maxpower ? - maxpower : CONFIG_USB_GADGET_VBUS_DRAW); + usb_gadget_vbus_draw(gadget, maxpower); } cdev->suspended = 0; -- cgit v1.2.3-59-g8ed1b From 860ef6cd3f90b84a1832f8a6485c90c34d3b588b Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Tue, 21 Jan 2020 14:24:04 +0400 Subject: usb: dwc2: Fix in ISOC request length checking Moved ISOC request length checking from dwc2_hsotg_start_req() function to dwc2_hsotg_ep_queue(). Fixes: 4fca54aa58293 ("usb: gadget: s3c-hsotg: add multi count support") Signed-off-by: Minas Harutyunyan Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/gadget.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 88f7d6d4ff2d..7b40cf5bdc2f 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -1083,11 +1083,6 @@ static void dwc2_hsotg_start_req(struct dwc2_hsotg *hsotg, else packets = 1; /* send one packet if length is zero. */ - if (hs_ep->isochronous && length > (hs_ep->mc * hs_ep->ep.maxpacket)) { - dev_err(hsotg->dev, "req length > maxpacket*mc\n"); - return; - } - if (dir_in && index != 0) if (hs_ep->isochronous) epsize = DXEPTSIZ_MC(packets); @@ -1391,6 +1386,13 @@ static int dwc2_hsotg_ep_queue(struct usb_ep *ep, struct usb_request *req, req->actual = 0; req->status = -EINPROGRESS; + /* Don't queue ISOC request if length greater than mps*mc */ + if (hs_ep->isochronous && + req->length > (hs_ep->mc * hs_ep->ep.maxpacket)) { + dev_err(hs->dev, "req length > maxpacket*mc\n"); + return -EINVAL; + } + /* In DDMA mode for ISOC's don't queue request if length greater * than descriptor limits. */ -- cgit v1.2.3-59-g8ed1b From 9a0d6f7c0a83844baae1d6d85482863d2bf3b7a7 Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Tue, 21 Jan 2020 14:17:07 +0400 Subject: usb: dwc2: Fix SET/CLEAR_FEATURE and GET_STATUS flows SET/CLEAR_FEATURE for Remote Wakeup allowance not handled correctly. GET_STATUS handling provided not correct data on DATA Stage. Issue seen when gadget's dr_mode set to "otg" mode and connected to MacOS. Both are fixed and tested using USBCV Ch.9 tests. Signed-off-by: Minas Harutyunyan Fixes: fa389a6d7726 ("usb: dwc2: gadget: Add remote_wakeup_allowed flag") Tested-by: Jack Mitchell Cc: stable@vger.kernel.org Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/gadget.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 7b40cf5bdc2f..92ed32ec1607 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -1634,6 +1634,7 @@ static int dwc2_hsotg_process_req_status(struct dwc2_hsotg *hsotg, struct dwc2_hsotg_ep *ep0 = hsotg->eps_out[0]; struct dwc2_hsotg_ep *ep; __le16 reply; + u16 status; int ret; dev_dbg(hsotg->dev, "%s: USB_REQ_GET_STATUS\n", __func__); @@ -1645,11 +1646,10 @@ static int dwc2_hsotg_process_req_status(struct dwc2_hsotg *hsotg, switch (ctrl->bRequestType & USB_RECIP_MASK) { case USB_RECIP_DEVICE: - /* - * bit 0 => self powered - * bit 1 => remote wakeup - */ - reply = cpu_to_le16(0); + status = 1 << USB_DEVICE_SELF_POWERED; + status |= hsotg->remote_wakeup_allowed << + USB_DEVICE_REMOTE_WAKEUP; + reply = cpu_to_le16(status); break; case USB_RECIP_INTERFACE: @@ -1760,7 +1760,10 @@ static int dwc2_hsotg_process_req_feature(struct dwc2_hsotg *hsotg, case USB_RECIP_DEVICE: switch (wValue) { case USB_DEVICE_REMOTE_WAKEUP: - hsotg->remote_wakeup_allowed = 1; + if (set) + hsotg->remote_wakeup_allowed = 1; + else + hsotg->remote_wakeup_allowed = 0; break; case USB_DEVICE_TEST_MODE: @@ -1770,16 +1773,17 @@ static int dwc2_hsotg_process_req_feature(struct dwc2_hsotg *hsotg, return -EINVAL; hsotg->test_mode = wIndex >> 8; - ret = dwc2_hsotg_send_reply(hsotg, ep0, NULL, 0); - if (ret) { - dev_err(hsotg->dev, - "%s: failed to send reply\n", __func__); - return ret; - } break; default: return -ENOENT; } + + ret = dwc2_hsotg_send_reply(hsotg, ep0, NULL, 0); + if (ret) { + dev_err(hsotg->dev, + "%s: failed to send reply\n", __func__); + return ret; + } break; case USB_RECIP_ENDPOINT: -- cgit v1.2.3-59-g8ed1b From 43d565727a3a6fd24e37c7c2116475106af71806 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Thu, 16 Jan 2020 15:29:01 +0200 Subject: usb: gadget: ffs: ffs_aio_cancel(): Save/restore IRQ flags ffs_aio_cancel() can be called from both interrupt and thread context. Make sure that the current IRQ state is saved and restored by using spin_{un,}lock_irq{save,restore}(). Otherwise undefined behavior might occur. Acked-by: Michal Nazarewicz Signed-off-by: Lars-Peter Clausen Signed-off-by: Alexandru Ardelean Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_fs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 6171d28331e6..571917677d35 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1162,18 +1162,19 @@ static int ffs_aio_cancel(struct kiocb *kiocb) { struct ffs_io_data *io_data = kiocb->private; struct ffs_epfile *epfile = kiocb->ki_filp->private_data; + unsigned long flags; int value; ENTER(); - spin_lock_irq(&epfile->ffs->eps_lock); + spin_lock_irqsave(&epfile->ffs->eps_lock, flags); if (likely(io_data && io_data->ep && io_data->req)) value = usb_ep_dequeue(io_data->ep, io_data->req); else value = -EINVAL; - spin_unlock_irq(&epfile->ffs->eps_lock); + spin_unlock_irqrestore(&epfile->ffs->eps_lock, flags); return value; } -- cgit v1.2.3-59-g8ed1b From e4bfded56cf39b8d02733c1e6ef546b97961e18a Mon Sep 17 00:00:00 2001 From: Sergey Organov Date: Wed, 29 Jan 2020 14:21:46 +0300 Subject: usb: gadget: serial: fix Tx stall after buffer overflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Symptom: application opens /dev/ttyGS0 and starts sending (writing) to it while either USB cable is not connected, or nobody listens on the other side of the cable. If driver circular buffer overflows before connection is established, no data will be written to the USB layer until/unless /dev/ttyGS0 is closed and re-opened again by the application (the latter besides having no means of being notified about the event of establishing of the connection.) Fix: on open and/or connect, kick Tx to flush circular buffer data to USB layer. Signed-off-by: Sergey Organov Reviewed-by: Michał Mirosław Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/u_serial.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index f986e5c55974..8167d379e115 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -561,8 +561,10 @@ static int gs_start_io(struct gs_port *port) port->n_read = 0; started = gs_start_rx(port); - /* unblock any pending writes into our circular buffer */ if (started) { + gs_start_tx(port); + /* Unblock any pending writes into our circular buffer, in case + * we didn't in gs_start_tx() */ tty_wakeup(port->port.tty); } else { gs_free_requests(ep, head, &port->read_allocated); -- cgit v1.2.3-59-g8ed1b From 42cd5ffe46c1037d5d9a253c72e71a024a7bfbef Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 10 Feb 2020 09:51:39 +0000 Subject: usb: dwc3: debug: fix string position formatting mixup with ret and len Currently the string formatting is mixing up the offset of ret and len. Re-work the code to use just len, remove ret and use scnprintf instead of snprintf and len position accumulation where required. Remove the -ve return check since scnprintf never returns a failure -ve size. Also break overly long lines to clean up checkpatch warnings. Addresses-Coverity: ("Unused value") Fixes: 1381a5113caf ("usb: dwc3: debug: purge usage of strcat") Reviewed-by: Dan Carpenter Signed-off-by: Colin Ian King Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/debug.h | 39 +++++++++++++++------------------------ 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/drivers/usb/dwc3/debug.h b/drivers/usb/dwc3/debug.h index e56beb9d1e36..4a13ceaf4093 100644 --- a/drivers/usb/dwc3/debug.h +++ b/drivers/usb/dwc3/debug.h @@ -256,86 +256,77 @@ static inline const char *dwc3_ep_event_string(char *str, size_t size, u8 epnum = event->endpoint_number; size_t len; int status; - int ret; - ret = snprintf(str, size, "ep%d%s: ", epnum >> 1, + len = scnprintf(str, size, "ep%d%s: ", epnum >> 1, (epnum & 1) ? "in" : "out"); - if (ret < 0) - return "UNKNOWN"; status = event->status; switch (event->endpoint_event) { case DWC3_DEPEVT_XFERCOMPLETE: - len = strlen(str); - snprintf(str + len, size - len, "Transfer Complete (%c%c%c)", + len += scnprintf(str + len, size - len, + "Transfer Complete (%c%c%c)", status & DEPEVT_STATUS_SHORT ? 'S' : 's', status & DEPEVT_STATUS_IOC ? 'I' : 'i', status & DEPEVT_STATUS_LST ? 'L' : 'l'); - len = strlen(str); - if (epnum <= 1) - snprintf(str + len, size - len, " [%s]", + scnprintf(str + len, size - len, " [%s]", dwc3_ep0_state_string(ep0state)); break; case DWC3_DEPEVT_XFERINPROGRESS: - len = strlen(str); - - snprintf(str + len, size - len, "Transfer In Progress [%d] (%c%c%c)", + scnprintf(str + len, size - len, + "Transfer In Progress [%d] (%c%c%c)", event->parameters, status & DEPEVT_STATUS_SHORT ? 'S' : 's', status & DEPEVT_STATUS_IOC ? 'I' : 'i', status & DEPEVT_STATUS_LST ? 'M' : 'm'); break; case DWC3_DEPEVT_XFERNOTREADY: - len = strlen(str); - - snprintf(str + len, size - len, "Transfer Not Ready [%d]%s", + len += scnprintf(str + len, size - len, + "Transfer Not Ready [%d]%s", event->parameters, status & DEPEVT_STATUS_TRANSFER_ACTIVE ? " (Active)" : " (Not Active)"); - len = strlen(str); - /* Control Endpoints */ if (epnum <= 1) { int phase = DEPEVT_STATUS_CONTROL_PHASE(event->status); switch (phase) { case DEPEVT_STATUS_CONTROL_DATA: - snprintf(str + ret, size - ret, + scnprintf(str + len, size - len, " [Data Phase]"); break; case DEPEVT_STATUS_CONTROL_STATUS: - snprintf(str + ret, size - ret, + scnprintf(str + len, size - len, " [Status Phase]"); } } break; case DWC3_DEPEVT_RXTXFIFOEVT: - snprintf(str + ret, size - ret, "FIFO"); + scnprintf(str + len, size - len, "FIFO"); break; case DWC3_DEPEVT_STREAMEVT: status = event->status; switch (status) { case DEPEVT_STREAMEVT_FOUND: - snprintf(str + ret, size - ret, " Stream %d Found", + scnprintf(str + len, size - len, " Stream %d Found", event->parameters); break; case DEPEVT_STREAMEVT_NOTFOUND: default: - snprintf(str + ret, size - ret, " Stream Not Found"); + scnprintf(str + len, size - len, " Stream Not Found"); break; } break; case DWC3_DEPEVT_EPCMDCMPLT: - snprintf(str + ret, size - ret, "Endpoint Command Complete"); + scnprintf(str + len, size - len, "Endpoint Command Complete"); break; default: - snprintf(str, size, "UNKNOWN"); + scnprintf(str + len, size - len, "UNKNOWN"); } return str; -- cgit v1.2.3-59-g8ed1b From 0cbb4f9c69827decf56519c2f63918f16904ede5 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 3 Feb 2020 09:46:19 -0800 Subject: platform/chrome: wilco_ec: Include asm/unaligned instead of linux/ path It seems that we shouldn't try to include the include/linux/ path to unaligned functions. Just include asm/unaligned.h instead so that we don't run into compilation warnings like below. In file included from drivers/platform/chrome/wilco_ec/properties.c:8:0: include/linux/unaligned/le_memmove.h:7:19: error: redefinition of 'get_unaligned_le16' static inline u16 get_unaligned_le16(const void *p) ^~~~~~~~~~~~~~~~~~ In file included from arch/ia64/include/asm/unaligned.h:5:0, from arch/ia64/include/asm/io.h:23, from arch/ia64/include/asm/smp.h:21, from include/linux/smp.h:68, from include/linux/percpu.h:7, from include/linux/arch_topology.h:9, from include/linux/topology.h:30, from include/linux/gfp.h:9, from include/linux/xarray.h:14, from include/linux/radix-tree.h:18, from include/linux/idr.h:15, from include/linux/kernfs.h:13, from include/linux/sysfs.h:16, from include/linux/kobject.h:20, from include/linux/device.h:16, from include/linux/platform_data/wilco-ec.h:11, from drivers/platform/chrome/wilco_ec/properties.c:6: include/linux/unaligned/le_struct.h:7:19: note: previous definition of 'get_unaligned_le16' was here static inline u16 get_unaligned_le16(const void *p) ^~~~~~~~~~~~~~~~~~ Reported-by: kbuild test robot Fixes: 60fb8a8e93ca ("platform/chrome: wilco_ec: Allow wilco to be compiled in COMPILE_TEST") Signed-off-by: Stephen Boyd Signed-off-by: Enric Balletbo i Serra --- drivers/platform/chrome/wilco_ec/properties.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/chrome/wilco_ec/properties.c b/drivers/platform/chrome/wilco_ec/properties.c index e69682c95ea2..62f27610dd33 100644 --- a/drivers/platform/chrome/wilco_ec/properties.c +++ b/drivers/platform/chrome/wilco_ec/properties.c @@ -5,7 +5,7 @@ #include #include -#include +#include /* Operation code; what the EC should do with the property */ enum ec_property_op { -- cgit v1.2.3-59-g8ed1b From 908087ffbe896c100ed73d5f0ce11a5b7264af4a Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 23 Dec 2019 17:51:48 +0200 Subject: habanalabs: halt the engines before hard-reset The driver must halt the engines before doing hard-reset, otherwise the device can go into undefined state. There is a place where the driver didn't do that and this patch fixes it. Reviewed-by: Tomer Tayar Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/device.c | 1 + drivers/misc/habanalabs/goya/goya.c | 42 +++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index b155e9549076..166883b64725 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -1189,6 +1189,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { dev_info(hdev->dev, "H/W state is dirty, must reset before initializing\n"); + hdev->asic_funcs->halt_engines(hdev, true); hdev->asic_funcs->hw_fini(hdev, true); } diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 7344e8a222ae..f24fe909b88d 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -895,6 +895,11 @@ void goya_init_dma_qmans(struct hl_device *hdev) */ static void goya_disable_external_queues(struct hl_device *hdev) { + struct goya_device *goya = hdev->asic_specific; + + if (!(goya->hw_cap_initialized & HW_CAP_DMA)) + return; + WREG32(mmDMA_QM_0_GLBL_CFG0, 0); WREG32(mmDMA_QM_1_GLBL_CFG0, 0); WREG32(mmDMA_QM_2_GLBL_CFG0, 0); @@ -956,6 +961,11 @@ static int goya_stop_external_queues(struct hl_device *hdev) { int rc, retval = 0; + struct goya_device *goya = hdev->asic_specific; + + if (!(goya->hw_cap_initialized & HW_CAP_DMA)) + return retval; + rc = goya_stop_queue(hdev, mmDMA_QM_0_GLBL_CFG1, mmDMA_QM_0_CP_STS, @@ -1744,9 +1754,18 @@ void goya_init_tpc_qmans(struct hl_device *hdev) */ static void goya_disable_internal_queues(struct hl_device *hdev) { + struct goya_device *goya = hdev->asic_specific; + + if (!(goya->hw_cap_initialized & HW_CAP_MME)) + goto disable_tpc; + WREG32(mmMME_QM_GLBL_CFG0, 0); WREG32(mmMME_CMDQ_GLBL_CFG0, 0); +disable_tpc: + if (!(goya->hw_cap_initialized & HW_CAP_TPC)) + return; + WREG32(mmTPC0_QM_GLBL_CFG0, 0); WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0); @@ -1782,8 +1801,12 @@ static void goya_disable_internal_queues(struct hl_device *hdev) */ static int goya_stop_internal_queues(struct hl_device *hdev) { + struct goya_device *goya = hdev->asic_specific; int rc, retval = 0; + if (!(goya->hw_cap_initialized & HW_CAP_MME)) + goto stop_tpc; + /* * Each queue (QMAN) is a separate H/W logic. That means that each * QMAN can be stopped independently and failure to stop one does NOT @@ -1810,6 +1833,10 @@ static int goya_stop_internal_queues(struct hl_device *hdev) retval = -EIO; } +stop_tpc: + if (!(goya->hw_cap_initialized & HW_CAP_TPC)) + return retval; + rc = goya_stop_queue(hdev, mmTPC0_QM_GLBL_CFG1, mmTPC0_QM_CP_STS, @@ -1975,6 +2002,11 @@ static int goya_stop_internal_queues(struct hl_device *hdev) static void goya_dma_stall(struct hl_device *hdev) { + struct goya_device *goya = hdev->asic_specific; + + if (!(goya->hw_cap_initialized & HW_CAP_DMA)) + return; + WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT); WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT); WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT); @@ -1984,6 +2016,11 @@ static void goya_dma_stall(struct hl_device *hdev) static void goya_tpc_stall(struct hl_device *hdev) { + struct goya_device *goya = hdev->asic_specific; + + if (!(goya->hw_cap_initialized & HW_CAP_TPC)) + return; + WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT); WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT); @@ -1996,6 +2033,11 @@ static void goya_tpc_stall(struct hl_device *hdev) static void goya_mme_stall(struct hl_device *hdev) { + struct goya_device *goya = hdev->asic_specific; + + if (!(goya->hw_cap_initialized & HW_CAP_MME)) + return; + WREG32(mmMME_STALL, 0xFFFFFFFF); } -- cgit v1.2.3-59-g8ed1b From a37e47192dfa98f79a0cd5ab991c224b5980c982 Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Sun, 5 Jan 2020 09:05:45 +0000 Subject: habanalabs: do not halt CoreSight during hard reset During hard reset we must not write to the device. Hence avoid halting CoreSight during user context close if it is done during hard reset. In addition, we must not re-enable clock gating afterwards as it was deliberately disabled in the beginning of the hard reset flow. Signed-off-by: Omer Shpigelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/device.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index 166883b64725..b680b0caa69b 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -598,7 +598,9 @@ int hl_device_set_debug_mode(struct hl_device *hdev, bool enable) goto out; } - hdev->asic_funcs->halt_coresight(hdev); + if (!hdev->hard_reset_pending) + hdev->asic_funcs->halt_coresight(hdev); + hdev->in_debug = 0; goto out; -- cgit v1.2.3-59-g8ed1b From cf01514c5c6efa2d521d35e68dff2e0674d08e91 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 23 Jan 2020 00:43:06 +0200 Subject: habanalabs: patched cb equals user cb in device memset During device memory memset, the driver allocates and use a CB (command buffer). To reuse existing code, it keeps a pointer to the CB in two variables, user_cb and patched_cb. Therefore, there is no need to "put" both the user_cb and patched_cb, as it will cause an underflow of the refcnt of the CB. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index f24fe909b88d..b8a8de24aaf7 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -4690,8 +4690,6 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, rc = goya_send_job_on_qman0(hdev, job); - hl_cb_put(job->patched_cb); - hl_debugfs_remove_job(hdev, job); kfree(job); cb->cs_cnt--; -- cgit v1.2.3-59-g8ed1b From 3d1e0b406de16508de96f4a07fc3f94cfc678372 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 3 Feb 2020 17:37:04 +0100 Subject: netfilter: conntrack: remove two args from resolve_clash ctinfo is whats taken from the skb, i.e. ct = nf_ct_get(skb, &ctinfo). We do not pass 'ct' and instead re-fetch it from the skb. Just do the same for both netns and ctinfo. Also add a comment on what clash resolution is supposed to do. While at it, one indent level can be removed. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 69 +++++++++++++++++++++++++++++---------- 1 file changed, 51 insertions(+), 18 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index d1305423640f..5e332b01f3c0 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -894,31 +894,64 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo, } } -/* Resolve race on insertion if this protocol allows this. */ +/** + * nf_ct_resolve_clash - attempt to handle clash without packet drop + * + * @skb: skb that causes the clash + * @h: tuplehash of the clashing entry already in table + * + * A conntrack entry can be inserted to the connection tracking table + * if there is no existing entry with an identical tuple. + * + * If there is one, @skb (and the assocated, unconfirmed conntrack) has + * to be dropped. In case @skb is retransmitted, next conntrack lookup + * will find the already-existing entry. + * + * The major problem with such packet drop is the extra delay added by + * the packet loss -- it will take some time for a retransmit to occur + * (or the sender to time out when waiting for a reply). + * + * This function attempts to handle the situation without packet drop. + * + * If @skb has no NAT transformation or if the colliding entries are + * exactly the same, only the to-be-confirmed conntrack entry is discarded + * and @skb is associated with the conntrack entry already in the table. + * + * Returns NF_DROP if the clash could not be resolved. + */ static __cold noinline int -nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - struct nf_conntrack_tuple_hash *h) +nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h) { /* This is the conntrack entry already in hashes that won race. */ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); const struct nf_conntrack_l4proto *l4proto; - enum ip_conntrack_info oldinfo; - struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); + enum ip_conntrack_info ctinfo; + struct nf_conn *loser_ct; + struct net *net; + + loser_ct = nf_ct_get(skb, &ctinfo); l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); - if (l4proto->allow_clash && - !nf_ct_is_dying(ct) && - atomic_inc_not_zero(&ct->ct_general.use)) { - if (((ct->status & IPS_NAT_DONE_MASK) == 0) || - nf_ct_match(ct, loser_ct)) { - nf_ct_acct_merge(ct, ctinfo, loser_ct); - nf_conntrack_put(&loser_ct->ct_general); - nf_ct_set(skb, ct, oldinfo); - return NF_ACCEPT; - } - nf_ct_put(ct); + if (!l4proto->allow_clash) + goto drop; + + if (nf_ct_is_dying(ct)) + goto drop; + + if (!atomic_inc_not_zero(&ct->ct_general.use)) + goto drop; + + if (((ct->status & IPS_NAT_DONE_MASK) == 0) || + nf_ct_match(ct, loser_ct)) { + nf_ct_acct_merge(ct, ctinfo, loser_ct); + nf_conntrack_put(&loser_ct->ct_general); + nf_ct_set(skb, ct, ctinfo); + return NF_ACCEPT; } + + nf_ct_put(ct); +drop: + net = nf_ct_net(loser_ct); NF_CT_STAT_INC(net, drop); return NF_DROP; } @@ -1036,7 +1069,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) out: nf_ct_add_to_dying_list(ct); - ret = nf_ct_resolve_clash(net, skb, ctinfo, h); + ret = nf_ct_resolve_clash(skb, h); dying: nf_conntrack_double_unlock(hash, reply_hash); NF_CT_STAT_INC(net, insert_failed); -- cgit v1.2.3-59-g8ed1b From b1b32552c1d81f0cf6a8e79043a2a47e769ff071 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 3 Feb 2020 17:37:05 +0100 Subject: netfilter: conntrack: place confirm-bit setting in a helper ... so it can be re-used from clash resolution in followup patch. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5e332b01f3c0..5fda5bd10160 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -894,6 +894,19 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo, } } +static void __nf_conntrack_insert_prepare(struct nf_conn *ct) +{ + struct nf_conn_tstamp *tstamp; + + atomic_inc(&ct->ct_general.use); + ct->status |= IPS_CONFIRMED; + + /* set conntrack timestamp, if enabled. */ + tstamp = nf_conn_tstamp_find(ct); + if (tstamp) + tstamp->start = ktime_get_real_ns(); +} + /** * nf_ct_resolve_clash - attempt to handle clash without packet drop * @@ -965,7 +978,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; struct nf_conn_help *help; - struct nf_conn_tstamp *tstamp; struct hlist_nulls_node *n; enum ip_conntrack_info ctinfo; struct net *net; @@ -1042,13 +1054,8 @@ __nf_conntrack_confirm(struct sk_buff *skb) setting time, otherwise we'd get timer wrap in weird delay cases. */ ct->timeout += nfct_time_stamp; - atomic_inc(&ct->ct_general.use); - ct->status |= IPS_CONFIRMED; - /* set conntrack timestamp, if enabled. */ - tstamp = nf_conn_tstamp_find(ct); - if (tstamp) - tstamp->start = ktime_get_real_ns(); + __nf_conntrack_insert_prepare(ct); /* Since the lookup is lockless, hash insertion must be done after * starting the timer and setting the CONFIRMED bit. The RCU barriers -- cgit v1.2.3-59-g8ed1b From bb89abe52bf426f1f40850c441efc77426cc31e1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 3 Feb 2020 17:37:06 +0100 Subject: netfilter: conntrack: split resolve_clash function Followup patch will need a helper function with the 'clashing entries refer to the identical tuple in both directions' resolution logic. This patch will add another resolve_clash helper where loser_ct must not be added to the dying list because it will be inserted into the table. Therefore this also moves the stat counters and dying-list insertion of the losing ct. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 58 +++++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5fda5bd10160..3f069eb0f0fc 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -907,6 +907,39 @@ static void __nf_conntrack_insert_prepare(struct nf_conn *ct) tstamp->start = ktime_get_real_ns(); } +static int __nf_ct_resolve_clash(struct sk_buff *skb, + struct nf_conntrack_tuple_hash *h) +{ + /* This is the conntrack entry already in hashes that won race. */ + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + enum ip_conntrack_info ctinfo; + struct nf_conn *loser_ct; + + loser_ct = nf_ct_get(skb, &ctinfo); + + if (nf_ct_is_dying(ct)) + return NF_DROP; + + if (!atomic_inc_not_zero(&ct->ct_general.use)) + return NF_DROP; + + if (((ct->status & IPS_NAT_DONE_MASK) == 0) || + nf_ct_match(ct, loser_ct)) { + struct net *net = nf_ct_net(ct); + + nf_ct_acct_merge(ct, ctinfo, loser_ct); + nf_ct_add_to_dying_list(loser_ct); + nf_conntrack_put(&loser_ct->ct_general); + nf_ct_set(skb, ct, ctinfo); + + NF_CT_STAT_INC(net, insert_failed); + return NF_ACCEPT; + } + + nf_ct_put(ct); + return NF_DROP; +} + /** * nf_ct_resolve_clash - attempt to handle clash without packet drop * @@ -941,31 +974,23 @@ nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h) enum ip_conntrack_info ctinfo; struct nf_conn *loser_ct; struct net *net; + int ret; loser_ct = nf_ct_get(skb, &ctinfo); + net = nf_ct_net(loser_ct); l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); if (!l4proto->allow_clash) goto drop; - if (nf_ct_is_dying(ct)) - goto drop; - - if (!atomic_inc_not_zero(&ct->ct_general.use)) - goto drop; - - if (((ct->status & IPS_NAT_DONE_MASK) == 0) || - nf_ct_match(ct, loser_ct)) { - nf_ct_acct_merge(ct, ctinfo, loser_ct); - nf_conntrack_put(&loser_ct->ct_general); - nf_ct_set(skb, ct, ctinfo); - return NF_ACCEPT; - } + ret = __nf_ct_resolve_clash(skb, h); + if (ret == NF_ACCEPT) + return ret; - nf_ct_put(ct); drop: - net = nf_ct_net(loser_ct); + nf_ct_add_to_dying_list(loser_ct); NF_CT_STAT_INC(net, drop); + NF_CT_STAT_INC(net, insert_failed); return NF_DROP; } @@ -1034,6 +1059,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) if (unlikely(nf_ct_is_dying(ct))) { nf_ct_add_to_dying_list(ct); + NF_CT_STAT_INC(net, insert_failed); goto dying; } @@ -1075,11 +1101,9 @@ __nf_conntrack_confirm(struct sk_buff *skb) return NF_ACCEPT; out: - nf_ct_add_to_dying_list(ct); ret = nf_ct_resolve_clash(skb, h); dying: nf_conntrack_double_unlock(hash, reply_hash); - NF_CT_STAT_INC(net, insert_failed); local_bh_enable(); return ret; } -- cgit v1.2.3-59-g8ed1b From da0f3e0201b87ee4bbd2175925dd57e1228c35fb Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 Feb 2020 17:23:02 +0200 Subject: MAINTAINERS: Sort entries in database for THUNDERBOLT Run parse-maintainers.pl and choose THUNDERBOLT record. Fix it accordingly. Signed-off-by: Andy Shevchenko Signed-off-by: Mika Westerberg --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 38fe2f3f7b6f..d6e118a8f96e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16552,8 +16552,8 @@ M: Michael Jamet M: Mika Westerberg M: Yehezkel Bernat L: linux-usb@vger.kernel.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git F: Documentation/admin-guide/thunderbolt.rst F: drivers/thunderbolt/ F: include/linux/thunderbolt.h -- cgit v1.2.3-59-g8ed1b From 30744a68626db6a0029aca9c646831c869c16d83 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Mon, 10 Feb 2020 16:27:12 +0100 Subject: xsk: Publish global consumer pointers when NAPI is finished The commit 4b638f13bab4 ("xsk: Eliminate the RX batch size") introduced a much more lazy way of updating the global consumer pointers from the kernel side, by only doing so when running out of entries in the fill or Tx rings (the rings consumed by the kernel). This can result in a deadlock with the user application if the kernel requires more than one entry to proceed and the application cannot put these entries in the fill ring because the kernel has not updated the global consumer pointer since the ring is not empty. Fix this by publishing the local kernel side consumer pointer whenever we have completed Rx or Tx processing in the kernel. This way, user space will have an up-to-date view of the consumer pointers whenever it gets to execute in the one core case (application and driver on the same core), or after a certain number of packets have been processed in the two core case (application and driver on different cores). A side effect of this patch is that the one core case gets better performance, but the two core case gets worse. The reason that the one core case improves is that updating the global consumer pointer is relatively cheap since the application by definition is not running when the kernel is (they are on the same core) and it is beneficial for the application, once it gets to run, to have pointers that are as up to date as possible since it then can operate on more packets and buffers. In the two core case, the most important performance aspect is to minimize the number of accesses to the global pointers since they are shared between two cores and bounces between the caches of those cores. This patch results in more updates to global state, which means lower performance in the two core case. Fixes: 4b638f13bab4 ("xsk: Eliminate the RX batch size") Reported-by: Ryan Goodfellow Reported-by: Maxim Mikityanskiy Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Acked-by: Jonathan Lemon Acked-by: Maxim Mikityanskiy Link: https://lore.kernel.org/bpf/1581348432-6747-1-git-send-email-magnus.karlsson@intel.com --- net/xdp/xsk.c | 2 ++ net/xdp/xsk_queue.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index df600487a68d..356f90e4522b 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -217,6 +217,7 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) static void xsk_flush(struct xdp_sock *xs) { xskq_prod_submit(xs->rx); + __xskq_cons_release(xs->umem->fq); sock_def_readable(&xs->sk); } @@ -304,6 +305,7 @@ void xsk_umem_consume_tx_done(struct xdp_umem *umem) rcu_read_lock(); list_for_each_entry_rcu(xs, &umem->xsk_list, list) { + __xskq_cons_release(xs->tx); xs->sk.sk_write_space(&xs->sk); } rcu_read_unlock(); diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index bec2af11853a..89a01ac4e079 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -271,7 +271,8 @@ static inline void xskq_cons_release(struct xsk_queue *q) { /* To improve performance, only update local state here. * Reflect this to global state when we get new entries - * from the ring in xskq_cons_get_entries(). + * from the ring in xskq_cons_get_entries() and whenever + * Rx or Tx processing are completed in the NAPI loop. */ q->cached_cons++; } -- cgit v1.2.3-59-g8ed1b From ef8c9809acb0805c991bba8bdd4749fc46d44a98 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Sat, 18 Jan 2020 15:41:20 -0500 Subject: drm/msm/mdp5: rate limit pp done timeout warnings Add rate limiting of the 'pp done time out' warnings since these warnings can quickly fill the dmesg buffer. Signed-off-by: Brian Masney Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c index 05cc04f729d6..e1cc541e0ef2 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c @@ -1109,8 +1109,8 @@ static void mdp5_crtc_wait_for_pp_done(struct drm_crtc *crtc) ret = wait_for_completion_timeout(&mdp5_crtc->pp_completion, msecs_to_jiffies(50)); if (ret == 0) - dev_warn(dev->dev, "pp done time out, lm=%d\n", - mdp5_cstate->pipeline.mixer->lm); + dev_warn_ratelimited(dev->dev, "pp done time out, lm=%d\n", + mdp5_cstate->pipeline.mixer->lm); } static void mdp5_crtc_wait_for_flush_done(struct drm_crtc *crtc) -- cgit v1.2.3-59-g8ed1b From e4f9bbe9f8beab9a1ce460e7e194595b76868595 Mon Sep 17 00:00:00 2001 From: Kalyan Thota Date: Thu, 23 Jan 2020 15:47:55 +0530 Subject: msm:disp:dpu1: add UBWC support for display on SC7180 Add UBWC global configuration for display on SC7180 target. Signed-off-by: Kalyan Thota Tested-by: Douglas Anderson Fixes: 73bfb790ac78 ("msm:disp:dpu1: setup display datapath for SC7180 target") Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c | 58 +++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c index 29705e773a4b..80d3cfc14007 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c @@ -12,6 +12,7 @@ #define to_dpu_mdss(x) container_of(x, struct dpu_mdss, base) +#define HW_REV 0x0 #define HW_INTR_STATUS 0x0010 /* Max BW defined in KBps */ @@ -22,6 +23,17 @@ struct dpu_irq_controller { struct irq_domain *domain; }; +struct dpu_hw_cfg { + u32 val; + u32 offset; +}; + +struct dpu_mdss_hw_init_handler { + u32 hw_rev; + u32 hw_reg_count; + struct dpu_hw_cfg* hw_cfg; +}; + struct dpu_mdss { struct msm_mdss base; void __iomem *mmio; @@ -32,6 +44,44 @@ struct dpu_mdss { u32 num_paths; }; +static struct dpu_hw_cfg hw_cfg[] = { + { + /* UBWC global settings */ + .val = 0x1E, + .offset = 0x144, + } +}; + +static struct dpu_mdss_hw_init_handler cfg_handler[] = { + { .hw_rev = DPU_HW_VER_620, + .hw_reg_count = ARRAY_SIZE(hw_cfg), + .hw_cfg = hw_cfg + }, +}; + +static void dpu_mdss_hw_init(struct dpu_mdss *dpu_mdss, u32 hw_rev) +{ + int i; + u32 count = 0; + struct dpu_hw_cfg *hw_cfg = NULL; + + for (i = 0; i < ARRAY_SIZE(cfg_handler); i++) { + if (cfg_handler[i].hw_rev == hw_rev) { + hw_cfg = cfg_handler[i].hw_cfg; + count = cfg_handler[i].hw_reg_count; + break; + } + } + + for (i = 0; i < count; i++ ) { + writel_relaxed(hw_cfg->val, + dpu_mdss->mmio + hw_cfg->offset); + hw_cfg++; + } + + return; +} + static int dpu_mdss_parse_data_bus_icc_path(struct drm_device *dev, struct dpu_mdss *dpu_mdss) { @@ -174,12 +224,18 @@ static int dpu_mdss_enable(struct msm_mdss *mdss) struct dpu_mdss *dpu_mdss = to_dpu_mdss(mdss); struct dss_module_power *mp = &dpu_mdss->mp; int ret; + u32 mdss_rev; dpu_mdss_icc_request_bw(mdss); ret = msm_dss_enable_clk(mp->clk_config, mp->num_clk, true); - if (ret) + if (ret) { DPU_ERROR("clock enable failed, ret:%d\n", ret); + return ret; + } + + mdss_rev = readl_relaxed(dpu_mdss->mmio + HW_REV); + dpu_mdss_hw_init(dpu_mdss, mdss_rev); return ret; } -- cgit v1.2.3-59-g8ed1b From e8e35c62ba517f73cca32bc9925d62f4c4981768 Mon Sep 17 00:00:00 2001 From: Akhil P Oommen Date: Fri, 24 Jan 2020 17:50:11 +0530 Subject: drm/msm/a6xx: Correct the highestbank configuration Highest bank bit configuration is different for a618 gpu. Update it with the correct configuration which is the reset value incidentally. Signed-off-by: Akhil P Oommen Signed-off-by: Sharat Masetty Fixes: e812744c5f95 ("drm: msm: a6xx: Add support for A618") Reviewed-by: Rob Clark Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index daf07800cde0..536d1960a188 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -470,10 +470,12 @@ static int a6xx_hw_init(struct msm_gpu *gpu) /* Select CP0 to always count cycles */ gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT); - gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL, 2 << 1); - gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, 2 << 1); - gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, 2 << 1); - gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, 2 << 21); + if (adreno_is_a630(adreno_gpu)) { + gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL, 2 << 1); + gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, 2 << 1); + gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, 2 << 1); + gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, 2 << 21); + } /* Enable fault detection */ gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, -- cgit v1.2.3-59-g8ed1b From 7fd2dfc3694922eb7ace4801b7208cf9f62ebc7d Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 29 Jan 2020 20:12:44 +0000 Subject: drm: msm: Fix return type of dsi_mgr_connector_mode_valid for kCFI I was hitting kCFI crashes when building with clang, and after some digging finally narrowed it down to the dsi_mgr_connector_mode_valid() function being implemented as returning an int, instead of an enum drm_mode_status. This patch fixes it, and appeases the opaque word of the kCFI gods (seriously, clang inlining everything makes the kCFI backtraces only really rough estimates of where things went wrong). Thanks as always to Sami for his help narrowing this down. Cc: Rob Clark Cc: Sean Paul Cc: Sami Tolvanen Cc: Todd Kjos Cc: Alistair Delva Cc: Amit Pundir Cc: Sumit Semwal Cc: freedreno@lists.freedesktop.org Cc: clang-built-linux@googlegroups.com Signed-off-by: John Stultz Reviewed-by: Nick Desaulniers Tested-by: Amit Pundir Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dsi/dsi_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c index 104115d112eb..acc711fd14f8 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_manager.c +++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c @@ -336,7 +336,7 @@ static int dsi_mgr_connector_get_modes(struct drm_connector *connector) return num; } -static int dsi_mgr_connector_mode_valid(struct drm_connector *connector, +static enum drm_mode_status dsi_mgr_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { int id = dsi_mgr_connector_get_id(connector); -- cgit v1.2.3-59-g8ed1b From 56d977d5610bc6a83cf5f2d69cec91f3a2b91f77 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Tue, 4 Feb 2020 10:42:28 -0700 Subject: drm/msm/a6xx: Remove unneeded GBIF unhalt Commit e812744c5f95 ("drm: msm: a6xx: Add support for A618") added a universal GBIF un-halt into a6xx_start(). This can cause problems for a630 targets which do not use GBIF and might have access protection enabled on the region now occupied by the GBIF registers. But it turns out that we didn't need to unhalt the GBIF in this path since the stop function already takes care of that after executing a flush but before turning off the headswitch. We should be confident that the GBIF is open for business when we restart the hardware. Signed-off-by: Jordan Crouse Tested-by: John Stultz Reviewed-by: Rob Clark Fixes: e812744c5f95 ("drm: msm: a6xx: Add support for A618") Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 536d1960a188..7c449d154b4d 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -378,18 +378,6 @@ static int a6xx_hw_init(struct msm_gpu *gpu) struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); int ret; - /* - * During a previous slumber, GBIF halt is asserted to ensure - * no further transaction can go through GPU before GPU - * headswitch is turned off. - * - * This halt is deasserted once headswitch goes off but - * incase headswitch doesn't goes off clear GBIF halt - * here to ensure GPU wake-up doesn't fail because of - * halted GPU transactions. - */ - gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0); - /* Make sure the GMU keeps the GPU on while we set it up */ a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); -- cgit v1.2.3-59-g8ed1b From 1636295a9f6931e8524c416ae333cd9ff7ef4661 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Wed, 5 Feb 2020 10:01:21 -0700 Subject: drm/msm/a6xx: Update the GMU bus tables for sc7180 Fixup the GMU bus table values for the sc7180 target. Signed-off-by: Jordan Crouse Reviewed-by: Rob Clark Fixes: e812744c5f95 ("drm: msm: a6xx: Add support for A618") Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a6xx_hfi.c | 85 ++++++++++++++++++++++++----------- 1 file changed, 60 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c index eda11abc5f01..e450e0b97211 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c @@ -7,6 +7,7 @@ #include "a6xx_gmu.h" #include "a6xx_gmu.xml.h" +#include "a6xx_gpu.h" #define HFI_MSG_ID(val) [val] = #val @@ -216,48 +217,82 @@ static int a6xx_hfi_send_perf_table(struct a6xx_gmu *gmu) NULL, 0); } -static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu) +static void a618_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) { - struct a6xx_hfi_msg_bw_table msg = { 0 }; + /* Send a single "off" entry since the 618 GMU doesn't do bus scaling */ + msg->bw_level_num = 1; + + msg->ddr_cmds_num = 3; + msg->ddr_wait_bitmask = 0x01; + + msg->ddr_cmds_addrs[0] = 0x50000; + msg->ddr_cmds_addrs[1] = 0x5003c; + msg->ddr_cmds_addrs[2] = 0x5000c; + + msg->ddr_cmds_data[0][0] = 0x40000000; + msg->ddr_cmds_data[0][1] = 0x40000000; + msg->ddr_cmds_data[0][2] = 0x40000000; /* - * The sdm845 GMU doesn't do bus frequency scaling on its own but it - * does need at least one entry in the list because it might be accessed - * when the GMU is shutting down. Send a single "off" entry. + * These are the CX (CNOC) votes - these are used by the GMU but the + * votes are known and fixed for the target */ + msg->cnoc_cmds_num = 1; + msg->cnoc_wait_bitmask = 0x01; + + msg->cnoc_cmds_addrs[0] = 0x5007c; + msg->cnoc_cmds_data[0][0] = 0x40000000; + msg->cnoc_cmds_data[1][0] = 0x60000001; +} - msg.bw_level_num = 1; +static void a6xx_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) +{ + /* Send a single "off" entry since the 630 GMU doesn't do bus scaling */ + msg->bw_level_num = 1; - msg.ddr_cmds_num = 3; - msg.ddr_wait_bitmask = 0x07; + msg->ddr_cmds_num = 3; + msg->ddr_wait_bitmask = 0x07; - msg.ddr_cmds_addrs[0] = 0x50000; - msg.ddr_cmds_addrs[1] = 0x5005c; - msg.ddr_cmds_addrs[2] = 0x5000c; + msg->ddr_cmds_addrs[0] = 0x50000; + msg->ddr_cmds_addrs[1] = 0x5005c; + msg->ddr_cmds_addrs[2] = 0x5000c; - msg.ddr_cmds_data[0][0] = 0x40000000; - msg.ddr_cmds_data[0][1] = 0x40000000; - msg.ddr_cmds_data[0][2] = 0x40000000; + msg->ddr_cmds_data[0][0] = 0x40000000; + msg->ddr_cmds_data[0][1] = 0x40000000; + msg->ddr_cmds_data[0][2] = 0x40000000; /* * These are the CX (CNOC) votes. This is used but the values for the * sdm845 GMU are known and fixed so we can hard code them. */ - msg.cnoc_cmds_num = 3; - msg.cnoc_wait_bitmask = 0x05; + msg->cnoc_cmds_num = 3; + msg->cnoc_wait_bitmask = 0x05; - msg.cnoc_cmds_addrs[0] = 0x50034; - msg.cnoc_cmds_addrs[1] = 0x5007c; - msg.cnoc_cmds_addrs[2] = 0x5004c; + msg->cnoc_cmds_addrs[0] = 0x50034; + msg->cnoc_cmds_addrs[1] = 0x5007c; + msg->cnoc_cmds_addrs[2] = 0x5004c; - msg.cnoc_cmds_data[0][0] = 0x40000000; - msg.cnoc_cmds_data[0][1] = 0x00000000; - msg.cnoc_cmds_data[0][2] = 0x40000000; + msg->cnoc_cmds_data[0][0] = 0x40000000; + msg->cnoc_cmds_data[0][1] = 0x00000000; + msg->cnoc_cmds_data[0][2] = 0x40000000; + + msg->cnoc_cmds_data[1][0] = 0x60000001; + msg->cnoc_cmds_data[1][1] = 0x20000001; + msg->cnoc_cmds_data[1][2] = 0x60000001; +} + + +static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu) +{ + struct a6xx_hfi_msg_bw_table msg = { 0 }; + struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; - msg.cnoc_cmds_data[1][0] = 0x60000001; - msg.cnoc_cmds_data[1][1] = 0x20000001; - msg.cnoc_cmds_data[1][2] = 0x60000001; + if (adreno_is_a618(adreno_gpu)) + a618_build_bw_table(&msg); + else + a6xx_build_bw_table(&msg); return a6xx_hfi_send_msg(gmu, HFI_H2F_MSG_BW_TABLE, &msg, sizeof(msg), NULL, 0); -- cgit v1.2.3-59-g8ed1b From 9cc68ee1d92e3ab5bd5c821e5c1f387b0e16a669 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Wed, 5 Feb 2020 13:48:17 -0700 Subject: drm/msm: Fix a6xx GMU shutdown sequence Commit e812744c5f95 ("drm: msm: a6xx: Add support for A618") missed updating the VBIF flush in a6xx_gmu_shutdown and instead inserted the new sequence into a6xx_pm_suspend along with a redundant GMU idle. Move a6xx_bus_clear_pending_transactions to a6xx_gmu.c and use it in the appropriate place in the shutdown routine and remove the redundant idle call. v2: Remove newly unused variable that was triggering a warning Signed-off-by: Jordan Crouse Reviewed-by: Rob Clark Fixes: e812744c5f95 ("drm: msm: a6xx: Add support for A618") Tested-by: Douglas Anderson Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 37 +++++++++++++++++++++++++----- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 43 ----------------------------------- 2 files changed, 31 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 983afeaee737..748cd379065f 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -796,12 +796,41 @@ bool a6xx_gmu_isidle(struct a6xx_gmu *gmu) return true; } +#define GBIF_CLIENT_HALT_MASK BIT(0) +#define GBIF_ARB_HALT_MASK BIT(1) + +static void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu) +{ + struct msm_gpu *gpu = &adreno_gpu->base; + + if (!a6xx_has_gbif(adreno_gpu)) { + gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0xf); + spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) & + 0xf) == 0xf); + gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0); + + return; + } + + /* Halt new client requests on GBIF */ + gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK); + spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & + (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK); + + /* Halt all AXI requests on GBIF */ + gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK); + spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & + (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK); + + /* The GBIF halt needs to be explicitly cleared */ + gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0); +} + /* Gracefully try to shut down the GMU and by extension the GPU */ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu) { struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; - struct msm_gpu *gpu = &adreno_gpu->base; u32 val; /* @@ -819,11 +848,7 @@ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu) return; } - /* Clear the VBIF pipe before shutting down */ - gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0xf); - spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) & 0xf) - == 0xf); - gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0); + a6xx_bus_clear_pending_transactions(adreno_gpu); /* tell the GMU we want to slumber */ a6xx_gmu_notify_slumber(gmu); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 7c449d154b4d..68af24150de5 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -738,39 +738,6 @@ static const u32 a6xx_register_offsets[REG_ADRENO_REGISTER_MAX] = { REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A6XX_CP_RB_CNTL), }; -#define GBIF_CLIENT_HALT_MASK BIT(0) -#define GBIF_ARB_HALT_MASK BIT(1) - -static void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu) -{ - struct msm_gpu *gpu = &adreno_gpu->base; - - if(!a6xx_has_gbif(adreno_gpu)){ - gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0xf); - spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) & - 0xf) == 0xf); - gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0); - - return; - } - - /* Halt new client requests on GBIF */ - gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK); - spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & - (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK); - - /* Halt all AXI requests on GBIF */ - gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK); - spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & - (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK); - - /* - * GMU needs DDR access in slumber path. Deassert GBIF halt now - * to allow for GMU to access system memory. - */ - gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0); -} - static int a6xx_pm_resume(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -795,16 +762,6 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu) devfreq_suspend_device(gpu->devfreq.devfreq); - /* - * Make sure the GMU is idle before continuing (because some transitions - * may use VBIF - */ - a6xx_gmu_wait_for_idle(&a6xx_gpu->gmu); - - /* Clear the VBIF pipe before shutting down */ - /* FIXME: This accesses the GPU - do we need to make sure it is on? */ - a6xx_bus_clear_pending_transactions(adreno_gpu); - return a6xx_gmu_stop(a6xx_gpu); } -- cgit v1.2.3-59-g8ed1b From ff5ac61ee83c13f516544d29847d28be093a40ee Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 1 Feb 2020 09:32:21 +0100 Subject: x86/ima: use correct identifier for SetupMode variable The IMA arch code attempts to inspect the "SetupMode" EFI variable by populating a variable called efi_SetupMode_name with the string "SecureBoot" and passing that to the EFI GetVariable service, which obviously does not yield the expected result. Given that the string is only referenced a single time, let's get rid of the intermediate variable, and pass the correct string as an immediate argument. While at it, do the same for "SecureBoot". Fixes: 399574c64eaf ("x86/ima: retry detecting secure boot mode") Fixes: 980ef4d22a95 ("x86/ima: check EFI SetupMode too") Cc: Matthew Garrett Signed-off-by: Ard Biesheuvel Cc: stable@vger.kernel.org # v5.3 Signed-off-by: Mimi Zohar --- arch/x86/kernel/ima_arch.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/ima_arch.c b/arch/x86/kernel/ima_arch.c index 4d4f5d9faac3..23054909c8dd 100644 --- a/arch/x86/kernel/ima_arch.c +++ b/arch/x86/kernel/ima_arch.c @@ -10,8 +10,6 @@ extern struct boot_params boot_params; static enum efi_secureboot_mode get_sb_mode(void) { - efi_char16_t efi_SecureBoot_name[] = L"SecureBoot"; - efi_char16_t efi_SetupMode_name[] = L"SecureBoot"; efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID; efi_status_t status; unsigned long size; @@ -25,7 +23,7 @@ static enum efi_secureboot_mode get_sb_mode(void) } /* Get variable contents into buffer */ - status = efi.get_variable(efi_SecureBoot_name, &efi_variable_guid, + status = efi.get_variable(L"SecureBoot", &efi_variable_guid, NULL, &size, &secboot); if (status == EFI_NOT_FOUND) { pr_info("ima: secureboot mode disabled\n"); @@ -38,7 +36,7 @@ static enum efi_secureboot_mode get_sb_mode(void) } size = sizeof(setupmode); - status = efi.get_variable(efi_SetupMode_name, &efi_variable_guid, + status = efi.get_variable(L"SetupMode", &efi_variable_guid, NULL, &size, &setupmode); if (status != EFI_SUCCESS) /* ignore unknown SetupMode */ -- cgit v1.2.3-59-g8ed1b From e433be929e63265b7412478eb7ff271467aee2d7 Mon Sep 17 00:00:00 2001 From: Mansour Behabadi Date: Wed, 29 Jan 2020 17:26:31 +1100 Subject: HID: apple: Add support for recent firmware on Magic Keyboards Magic Keyboards with more recent firmware (0x0100) report Fn key differently. Without this patch, Fn key may not behave as expected and may not be configurable via hid_apple fnmode module parameter. Signed-off-by: Mansour Behabadi Signed-off-by: Jiri Kosina --- drivers/hid/hid-apple.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index 6ac8becc2372..d732d1d10caf 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -340,7 +340,8 @@ static int apple_input_mapping(struct hid_device *hdev, struct hid_input *hi, unsigned long **bit, int *max) { if (usage->hid == (HID_UP_CUSTOM | 0x0003) || - usage->hid == (HID_UP_MSVENDOR | 0x0003)) { + usage->hid == (HID_UP_MSVENDOR | 0x0003) || + usage->hid == (HID_UP_HPVENDOR2 | 0x0003)) { /* The fn key on Apple USB keyboards */ set_bit(EV_REP, hi->input->evbit); hid_map_usage_clear(hi, usage, bit, max, EV_KEY, KEY_FN); -- cgit v1.2.3-59-g8ed1b From 5ebdffd25098898aff1249ae2f7dbfddd76d8f8f Mon Sep 17 00:00:00 2001 From: Johan Korsnes Date: Fri, 17 Jan 2020 13:08:35 +0100 Subject: HID: core: fix off-by-one memset in hid_report_raw_event() In case a report is greater than HID_MAX_BUFFER_SIZE, it is truncated, but the report-number byte is not correctly handled. This results in a off-by-one in the following memset, causing a kernel Oops and ensuing system crash. Note: With commit 8ec321e96e05 ("HID: Fix slab-out-of-bounds read in hid_field_extract") I no longer hit the kernel Oops as we instead fail "controlled" at probe if there is a report too long in the HID report-descriptor. hid_report_raw_event() is an exported symbol, so presumabely we cannot always rely on this being the case. Fixes: 966922f26c7f ("HID: fix a crash in hid_report_raw_event() function.") Signed-off-by: Johan Korsnes Cc: Armando Visconti Cc: Jiri Kosina Cc: Alan Stern Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 851fe54ea59e..359616e3efbb 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1741,7 +1741,9 @@ int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size, rsize = ((report->size - 1) >> 3) + 1; - if (rsize > HID_MAX_BUFFER_SIZE) + if (report_enum->numbered && rsize >= HID_MAX_BUFFER_SIZE) + rsize = HID_MAX_BUFFER_SIZE - 1; + else if (rsize > HID_MAX_BUFFER_SIZE) rsize = HID_MAX_BUFFER_SIZE; if (csize < rsize) { -- cgit v1.2.3-59-g8ed1b From 84a4062632462c4320704fcdf8e99e89e94c0aba Mon Sep 17 00:00:00 2001 From: Johan Korsnes Date: Fri, 17 Jan 2020 13:08:36 +0100 Subject: HID: core: increase HID report buffer size to 8KiB We have a HID touch device that reports its opens and shorts test results in HID buffers of size 8184 bytes. The maximum size of the HID buffer is currently set to 4096 bytes, causing probe of this device to fail. With this patch we increase the maximum size of the HID buffer to 8192 bytes, making device probe and acquisition of said buffers succeed. Signed-off-by: Johan Korsnes Cc: Alan Stern Cc: Armando Visconti Cc: Jiri Kosina Signed-off-by: Jiri Kosina --- include/linux/hid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/hid.h b/include/linux/hid.h index cd41f209043f..875f71132b14 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -492,7 +492,7 @@ struct hid_report_enum { }; #define HID_MIN_BUFFER_SIZE 64 /* make sure there is at least a packet size of space */ -#define HID_MAX_BUFFER_SIZE 4096 /* 4kb */ +#define HID_MAX_BUFFER_SIZE 8192 /* 8kb */ #define HID_CONTROL_FIFO_SIZE 256 /* to init devices with >100 reports */ #define HID_OUTPUT_FIFO_SIZE 64 -- cgit v1.2.3-59-g8ed1b From 5c02c447eaeda29d3da121a2e17b97ccaf579b51 Mon Sep 17 00:00:00 2001 From: "dan.carpenter@oracle.com" Date: Wed, 15 Jan 2020 20:46:28 +0300 Subject: HID: hiddev: Fix race in in hiddev_disconnect() Syzbot reports that "hiddev" is used after it's free in hiddev_disconnect(). The hiddev_disconnect() function sets "hiddev->exist = 0;" so hiddev_release() can free it as soon as we drop the "existancelock" lock. This patch moves the mutex_unlock(&hiddev->existancelock) until after we have finished using it. Reported-by: syzbot+784ccb935f9900cc7c9e@syzkaller.appspotmail.com Fixes: 7f77897ef2b6 ("HID: hiddev: fix potential use-after-free") Suggested-by: Alan Stern Signed-off-by: Dan Carpenter Signed-off-by: Jiri Kosina --- drivers/hid/usbhid/hiddev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index a970b809d778..4140dea693e9 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -932,9 +932,9 @@ void hiddev_disconnect(struct hid_device *hid) hiddev->exist = 0; if (hiddev->open) { - mutex_unlock(&hiddev->existancelock); hid_hw_close(hiddev->hid); wake_up_interruptible(&hiddev->wait); + mutex_unlock(&hiddev->existancelock); } else { mutex_unlock(&hiddev->existancelock); kfree(hiddev); -- cgit v1.2.3-59-g8ed1b From 8d2e77b39b8fecb794e19cd006a12f90b14dd077 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 4 Dec 2019 04:35:25 +0100 Subject: HID: alps: Fix an error handling path in 'alps_input_configured()' They are issues: - if 'input_allocate_device()' fails and return NULL, there is no need to free anything and 'input_free_device()' call is a no-op. It can be axed. - 'ret' is known to be 0 at this point, so we must set it to a meaningful value before returning Fixes: 2562756dde55 ("HID: add Alps I2C HID Touchpad-Stick support") Signed-off-by: Christophe JAILLET Signed-off-by: Jiri Kosina --- drivers/hid/hid-alps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-alps.c b/drivers/hid/hid-alps.c index ae79a7c66737..fa704153cb00 100644 --- a/drivers/hid/hid-alps.c +++ b/drivers/hid/hid-alps.c @@ -730,7 +730,7 @@ static int alps_input_configured(struct hid_device *hdev, struct hid_input *hi) if (data->has_sp) { input2 = input_allocate_device(); if (!input2) { - input_free_device(input2); + ret = -ENOMEM; goto exit; } -- cgit v1.2.3-59-g8ed1b From 2fe77100553f3ac6b2105db8ae14b5ea4b43c108 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 11 Feb 2020 09:59:10 -0800 Subject: selftests/bpf: Fix error checking on reading the tcp_fastopen sysctl There is a typo in checking the "saved_tcp_fo" and instead "saved_tcp_syncookie" is checked again. This patch fixes it and also breaks them into separate if statements such that the test will abort asap. Reported-by: David Binderman Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200211175910.3235321-1-kafai@fb.com --- tools/testing/selftests/bpf/prog_tests/select_reuseport.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c index 098bcae5f827..b577666d028e 100644 --- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c @@ -822,8 +822,10 @@ void test_select_reuseport(void) goto out; saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL); + if (saved_tcp_fo < 0) + goto out; saved_tcp_syncookie = read_int_sysctl(TCP_SYNCOOKIE_SYSCTL); - if (saved_tcp_syncookie < 0 || saved_tcp_syncookie < 0) + if (saved_tcp_syncookie < 0) goto out; if (enable_fastopen()) -- cgit v1.2.3-59-g8ed1b From eecd618b45166fdddea3b6366b18479c2be0e11c Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 12 Feb 2020 10:32:08 +0000 Subject: selftests/bpf: Mark SYN cookie test skipped for UDP sockets SYN cookie test with reuseport BPF doesn't make sense for UDP sockets. We don't run it but the test_progs test runner doesn't know about it. Mark the test as skipped so the test_progs can report correctly how many tests were skipped. Fixes: 7ee0d4e97b88 ("selftests/bpf: Switch reuseport tests for test_progs framework") Reported-by: Lorenz Bauer Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200212103208.438419-1-jakub@cloudflare.com --- tools/testing/selftests/bpf/prog_tests/select_reuseport.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c index b577666d028e..0800036ed654 100644 --- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c @@ -506,8 +506,10 @@ static void test_syncookie(int type, sa_family_t family) .pass_on_failure = 0, }; - if (type != SOCK_STREAM) + if (type != SOCK_STREAM) { + test__skip(); return; + } /* * +1 for TCP-SYN and -- cgit v1.2.3-59-g8ed1b From b50f4f940b736b63df1d8a0ddcd28f0a580233eb Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 10 Feb 2020 11:04:55 +0100 Subject: dt-bindings: display: sunxi: Fix compatible Commit f5a98bfe7b37 ("dt-bindings: display: Convert Allwinner display pipeline to schemas") introduced a YAML schema for the Allwinner TCON DT binding, but the H6 TCON-TV compatible was mistakenly set to fallback on the A83t's, while the initial documentation and the DT are using R40's. Fix that. Fixes: f5a98bfe7b37 ("dt-bindings: display: Convert Allwinner display pipeline to schemas") Signed-off-by: Maxime Ripard Acked-by: Rob Herring Link: https://patchwork.freedesktop.org/patch/msgid/20200210100455.78695-1-maxime@cerno.tech --- .../devicetree/bindings/display/allwinner,sun4i-a10-tcon.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tcon.yaml b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tcon.yaml index 86ad617d2327..5ff9cf26ca38 100644 --- a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tcon.yaml +++ b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tcon.yaml @@ -43,9 +43,13 @@ properties: - enum: - allwinner,sun8i-h3-tcon-tv - allwinner,sun50i-a64-tcon-tv - - allwinner,sun50i-h6-tcon-tv - const: allwinner,sun8i-a83t-tcon-tv + - items: + - enum: + - allwinner,sun50i-h6-tcon-tv + - const: allwinner,sun8i-r40-tcon-tv + reg: maxItems: 1 -- cgit v1.2.3-59-g8ed1b From e6980a727154b793adb218fbc7b4d6af52a7e364 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Fri, 17 Jan 2020 16:34:28 +0100 Subject: drm/modes: Make sure to parse valid rotation value from cmdline A rotation value should have exactly one rotation angle. At the moment there is no validation for this when parsing video= parameters from the command line. This causes problems later on when we try to combine the command line rotation with the panel orientation. To make sure that we generate a valid rotation value: - Set DRM_MODE_ROTATE_0 by default (if no rotate= option is set) - Validate that there is exactly one rotation angle set (i.e. specifying the rotate= option multiple times is invalid) Signed-off-by: Stephan Gerhold Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20200117153429.54700-2-stephan@gerhold.net --- drivers/gpu/drm/drm_modes.c | 7 +++++++ drivers/gpu/drm/selftests/drm_cmdline_selftests.h | 1 + drivers/gpu/drm/selftests/test-drm_cmdline_parser.c | 15 +++++++++++++-- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 10336b144c72..d4d64518e11b 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -1698,6 +1698,13 @@ static int drm_mode_parse_cmdline_options(const char *str, if (rotation && freestanding) return -EINVAL; + if (!(rotation & DRM_MODE_ROTATE_MASK)) + rotation |= DRM_MODE_ROTATE_0; + + /* Make sure there is exactly one rotation defined */ + if (!is_power_of_2(rotation & DRM_MODE_ROTATE_MASK)) + return -EINVAL; + mode->rotation_reflection = rotation; return 0; diff --git a/drivers/gpu/drm/selftests/drm_cmdline_selftests.h b/drivers/gpu/drm/selftests/drm_cmdline_selftests.h index ceac7af9a172..29e367db6118 100644 --- a/drivers/gpu/drm/selftests/drm_cmdline_selftests.h +++ b/drivers/gpu/drm/selftests/drm_cmdline_selftests.h @@ -53,6 +53,7 @@ cmdline_test(drm_cmdline_test_rotate_0) cmdline_test(drm_cmdline_test_rotate_90) cmdline_test(drm_cmdline_test_rotate_180) cmdline_test(drm_cmdline_test_rotate_270) +cmdline_test(drm_cmdline_test_rotate_multiple) cmdline_test(drm_cmdline_test_rotate_invalid_val) cmdline_test(drm_cmdline_test_rotate_truncated) cmdline_test(drm_cmdline_test_hmirror) diff --git a/drivers/gpu/drm/selftests/test-drm_cmdline_parser.c b/drivers/gpu/drm/selftests/test-drm_cmdline_parser.c index 520f3e66a384..d96cd890def6 100644 --- a/drivers/gpu/drm/selftests/test-drm_cmdline_parser.c +++ b/drivers/gpu/drm/selftests/test-drm_cmdline_parser.c @@ -856,6 +856,17 @@ static int drm_cmdline_test_rotate_270(void *ignored) return 0; } +static int drm_cmdline_test_rotate_multiple(void *ignored) +{ + struct drm_cmdline_mode mode = { }; + + FAIL_ON(drm_mode_parse_command_line_for_connector("720x480,rotate=0,rotate=90", + &no_connector, + &mode)); + + return 0; +} + static int drm_cmdline_test_rotate_invalid_val(void *ignored) { struct drm_cmdline_mode mode = { }; @@ -888,7 +899,7 @@ static int drm_cmdline_test_hmirror(void *ignored) FAIL_ON(!mode.specified); FAIL_ON(mode.xres != 720); FAIL_ON(mode.yres != 480); - FAIL_ON(mode.rotation_reflection != DRM_MODE_REFLECT_X); + FAIL_ON(mode.rotation_reflection != (DRM_MODE_ROTATE_0 | DRM_MODE_REFLECT_X)); FAIL_ON(mode.refresh_specified); @@ -913,7 +924,7 @@ static int drm_cmdline_test_vmirror(void *ignored) FAIL_ON(!mode.specified); FAIL_ON(mode.xres != 720); FAIL_ON(mode.yres != 480); - FAIL_ON(mode.rotation_reflection != DRM_MODE_REFLECT_Y); + FAIL_ON(mode.rotation_reflection != (DRM_MODE_ROTATE_0 | DRM_MODE_REFLECT_Y)); FAIL_ON(mode.refresh_specified); -- cgit v1.2.3-59-g8ed1b From 5c320b6ce7510653bce68cecf80cf5b2d67e907f Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Fri, 17 Jan 2020 16:34:29 +0100 Subject: drm/modes: Allow DRM_MODE_ROTATE_0 when applying video mode parameters At the moment, only DRM_MODE_ROTATE_180 is allowed when we try to apply the rotation from the video mode parameters. It is also useful to allow DRM_MODE_ROTATE_0 in case there is only a reflect option in the video mode parameter (e.g. video=540x960,reflect_x). DRM_MODE_ROTATE_0 means "no rotation" and should therefore not require any special handling, so we can just add it to the if condition. Signed-off-by: Stephan Gerhold Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20200117153429.54700-3-stephan@gerhold.net --- drivers/gpu/drm/drm_client_modeset.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c index 6d4a29e99ae2..3035584f6dc7 100644 --- a/drivers/gpu/drm/drm_client_modeset.c +++ b/drivers/gpu/drm/drm_client_modeset.c @@ -951,7 +951,8 @@ bool drm_client_rotation(struct drm_mode_set *modeset, unsigned int *rotation) * depending on the hardware this may require the framebuffer * to be in a specific tiling format. */ - if ((*rotation & DRM_MODE_ROTATE_MASK) != DRM_MODE_ROTATE_180 || + if (((*rotation & DRM_MODE_ROTATE_MASK) != DRM_MODE_ROTATE_0 && + (*rotation & DRM_MODE_ROTATE_MASK) != DRM_MODE_ROTATE_180) || !plane->rotation_property) return false; -- cgit v1.2.3-59-g8ed1b From 67f68f977a12657028e866c013d43dd87320d210 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 12 Feb 2020 09:48:57 -0800 Subject: Revert "xhci: Fix memory leak when caching protocol extended capability PSI tables" This reverts commit fc57313d1017dd6b6f37a94e88daa8df54368ecc. Marek reports that it breaks things: This patch landed in today's linux-next (20200211) and causes NULL pointer dereference during second suspend/resume cycle on Samsung Exynos5422-based (arm 32bit) Odroid XU3lite board: A more complete fix will be added soon. Reported-by: Marek Szyprowski Fixes: fc57313d1017 ("xhci: Fix memory leak when caching protocol extended capability PSI tables") Cc: Paul Menzel Cc: Sajja Venkateswara Rao Cc: stable # v4.4+ Cc: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 25 +++++++------------ drivers/usb/host/xhci-mem.c | 58 +++++++++++++++++---------------------------- drivers/usb/host/xhci.h | 14 +++-------- 3 files changed, 33 insertions(+), 64 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index af92b2576fe9..7a3a29e5e9d2 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -55,7 +55,6 @@ static u8 usb_bos_descriptor [] = { static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf, u16 wLength) { - struct xhci_port_cap *port_cap = NULL; int i, ssa_count; u32 temp; u16 desc_size, ssp_cap_size, ssa_size = 0; @@ -65,24 +64,16 @@ static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf, ssp_cap_size = sizeof(usb_bos_descriptor) - desc_size; /* does xhci support USB 3.1 Enhanced SuperSpeed */ - for (i = 0; i < xhci->num_port_caps; i++) { - if (xhci->port_caps[i].maj_rev == 0x03 && - xhci->port_caps[i].min_rev >= 0x01) { - usb3_1 = true; - port_cap = &xhci->port_caps[i]; - break; - } - } - - if (usb3_1) { + if (xhci->usb3_rhub.min_rev >= 0x01) { /* does xhci provide a PSI table for SSA speed attributes? */ - if (port_cap->psi_count) { + if (xhci->usb3_rhub.psi_count) { /* two SSA entries for each unique PSI ID, RX and TX */ - ssa_count = port_cap->psi_uid_count * 2; + ssa_count = xhci->usb3_rhub.psi_uid_count * 2; ssa_size = ssa_count * sizeof(u32); ssp_cap_size -= 16; /* skip copying the default SSA */ } desc_size += ssp_cap_size; + usb3_1 = true; } memcpy(buf, &usb_bos_descriptor, min(desc_size, wLength)); @@ -108,7 +99,7 @@ static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf, } /* If PSI table exists, add the custom speed attributes from it */ - if (usb3_1 && port_cap->psi_count) { + if (usb3_1 && xhci->usb3_rhub.psi_count) { u32 ssp_cap_base, bm_attrib, psi, psi_mant, psi_exp; int offset; @@ -120,7 +111,7 @@ static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf, /* attribute count SSAC bits 4:0 and ID count SSIC bits 8:5 */ bm_attrib = (ssa_count - 1) & 0x1f; - bm_attrib |= (port_cap->psi_uid_count - 1) << 5; + bm_attrib |= (xhci->usb3_rhub.psi_uid_count - 1) << 5; put_unaligned_le32(bm_attrib, &buf[ssp_cap_base + 4]); if (wLength < desc_size + ssa_size) @@ -133,8 +124,8 @@ static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf, * USB 3.1 requires two SSA entries (RX and TX) for every link */ offset = desc_size; - for (i = 0; i < port_cap->psi_count; i++) { - psi = port_cap->psi[i]; + for (i = 0; i < xhci->usb3_rhub.psi_count; i++) { + psi = xhci->usb3_rhub.psi[i]; psi &= ~USB_SSP_SUBLINK_SPEED_RSVD; psi_exp = XHCI_EXT_PORT_PSIE(psi); psi_mant = XHCI_EXT_PORT_PSIM(psi); diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index bd5b152df6c0..0e2701649369 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1915,16 +1915,17 @@ no_bw: xhci->usb3_rhub.num_ports = 0; xhci->num_active_eps = 0; kfree(xhci->usb2_rhub.ports); + kfree(xhci->usb2_rhub.psi); kfree(xhci->usb3_rhub.ports); + kfree(xhci->usb3_rhub.psi); kfree(xhci->hw_ports); kfree(xhci->rh_bw); kfree(xhci->ext_caps); - for (i = 0; i < xhci->num_port_caps; i++) - kfree(xhci->port_caps[i].psi); - kfree(xhci->port_caps); xhci->usb2_rhub.ports = NULL; + xhci->usb2_rhub.psi = NULL; xhci->usb3_rhub.ports = NULL; + xhci->usb3_rhub.psi = NULL; xhci->hw_ports = NULL; xhci->rh_bw = NULL; xhci->ext_caps = NULL; @@ -2125,7 +2126,6 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, u8 major_revision, minor_revision; struct xhci_hub *rhub; struct device *dev = xhci_to_hcd(xhci)->self.sysdev; - struct xhci_port_cap *port_cap; temp = readl(addr); major_revision = XHCI_EXT_PORT_MAJOR(temp); @@ -2160,39 +2160,31 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, /* WTF? "Valid values are ‘1’ to MaxPorts" */ return; - port_cap = &xhci->port_caps[xhci->num_port_caps++]; - if (xhci->num_port_caps > max_caps) - return; - - port_cap->maj_rev = major_revision; - port_cap->min_rev = minor_revision; - port_cap->psi_count = XHCI_EXT_PORT_PSIC(temp); - - if (port_cap->psi_count) { - port_cap->psi = kcalloc_node(port_cap->psi_count, - sizeof(*port_cap->psi), - GFP_KERNEL, dev_to_node(dev)); - if (!port_cap->psi) - port_cap->psi_count = 0; + rhub->psi_count = XHCI_EXT_PORT_PSIC(temp); + if (rhub->psi_count) { + rhub->psi = kcalloc_node(rhub->psi_count, sizeof(*rhub->psi), + GFP_KERNEL, dev_to_node(dev)); + if (!rhub->psi) + rhub->psi_count = 0; - port_cap->psi_uid_count++; - for (i = 0; i < port_cap->psi_count; i++) { - port_cap->psi[i] = readl(addr + 4 + i); + rhub->psi_uid_count++; + for (i = 0; i < rhub->psi_count; i++) { + rhub->psi[i] = readl(addr + 4 + i); /* count unique ID values, two consecutive entries can * have the same ID if link is assymetric */ - if (i && (XHCI_EXT_PORT_PSIV(port_cap->psi[i]) != - XHCI_EXT_PORT_PSIV(port_cap->psi[i - 1]))) - port_cap->psi_uid_count++; + if (i && (XHCI_EXT_PORT_PSIV(rhub->psi[i]) != + XHCI_EXT_PORT_PSIV(rhub->psi[i - 1]))) + rhub->psi_uid_count++; xhci_dbg(xhci, "PSIV:%d PSIE:%d PLT:%d PFD:%d LP:%d PSIM:%d\n", - XHCI_EXT_PORT_PSIV(port_cap->psi[i]), - XHCI_EXT_PORT_PSIE(port_cap->psi[i]), - XHCI_EXT_PORT_PLT(port_cap->psi[i]), - XHCI_EXT_PORT_PFD(port_cap->psi[i]), - XHCI_EXT_PORT_LP(port_cap->psi[i]), - XHCI_EXT_PORT_PSIM(port_cap->psi[i])); + XHCI_EXT_PORT_PSIV(rhub->psi[i]), + XHCI_EXT_PORT_PSIE(rhub->psi[i]), + XHCI_EXT_PORT_PLT(rhub->psi[i]), + XHCI_EXT_PORT_PFD(rhub->psi[i]), + XHCI_EXT_PORT_LP(rhub->psi[i]), + XHCI_EXT_PORT_PSIM(rhub->psi[i])); } } /* cache usb2 port capabilities */ @@ -2227,7 +2219,6 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, continue; } hw_port->rhub = rhub; - hw_port->port_cap = port_cap; rhub->num_ports++; } /* FIXME: Should we disable ports not in the Extended Capabilities? */ @@ -2318,11 +2309,6 @@ static int xhci_setup_port_arrays(struct xhci_hcd *xhci, gfp_t flags) if (!xhci->ext_caps) return -ENOMEM; - xhci->port_caps = kcalloc_node(cap_count, sizeof(*xhci->port_caps), - flags, dev_to_node(dev)); - if (!xhci->port_caps) - return -ENOMEM; - offset = cap_start; while (offset) { diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 3ecee10fdcdc..13d8838cd552 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1702,20 +1702,12 @@ struct xhci_bus_state { * Intel Lynx Point LP xHCI host. */ #define XHCI_MAX_REXIT_TIMEOUT_MS 20 -struct xhci_port_cap { - u32 *psi; /* array of protocol speed ID entries */ - u8 psi_count; - u8 psi_uid_count; - u8 maj_rev; - u8 min_rev; -}; struct xhci_port { __le32 __iomem *addr; int hw_portnum; int hcd_portnum; struct xhci_hub *rhub; - struct xhci_port_cap *port_cap; }; struct xhci_hub { @@ -1727,6 +1719,9 @@ struct xhci_hub { /* supported prococol extended capabiliy values */ u8 maj_rev; u8 min_rev; + u32 *psi; /* array of protocol speed ID entries */ + u8 psi_count; + u8 psi_uid_count; }; /* There is one xhci_hcd structure per controller */ @@ -1885,9 +1880,6 @@ struct xhci_hcd { /* cached usb2 extened protocol capabilites */ u32 *ext_caps; unsigned int num_ext_caps; - /* cached extended protocol port capabilities */ - struct xhci_port_cap *port_caps; - unsigned int num_port_caps; /* Compliance Mode Recovery Data */ struct timer_list comp_mode_recovery_timer; u32 port_status_u0; -- cgit v1.2.3-59-g8ed1b From cf0ee7c60c89641f6e4d1d3c7867fe32b9e30300 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 11 Feb 2020 17:01:58 +0200 Subject: xhci: Fix memory leak when caching protocol extended capability PSI tables - take 2 xhci driver assumed that xHC controllers have at most one custom supported speed table (PSI) for all usb 3.x ports. Memory was allocated for one PSI table under the xhci hub structure. Turns out this is not the case, some controllers have a separate "supported protocol capability" entry with a PSI table for each port. This means each usb3 roothub port can in theory support different custom speeds. To solve this, cache all supported protocol capabilities with their PSI tables in an array, and add pointers to the xhci port structure so that every port points to its capability entry in the array. When creating the SuperSpeedPlus USB Device Capability BOS descriptor for the xhci USB 3.1 roothub we for now will use only data from the first USB 3.1 capable protocol capability entry in the array. This could be improved later, this patch focuses resolving the memory leak. Reported-by: Paul Menzel Reported-by: Sajja Venkateswara Rao Fixes: 47189098f8be ("xhci: parse xhci protocol speed ID list for usb 3.1 usage") Cc: stable # v4.4+ Signed-off-by: Mathias Nyman Tested-by: Marek Szyprowski Link: https://lore.kernel.org/r/20200211150158.14475-1-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 25 +++++++++++++------ drivers/usb/host/xhci-mem.c | 59 ++++++++++++++++++++++++++++----------------- drivers/usb/host/xhci.h | 14 ++++++++--- 3 files changed, 65 insertions(+), 33 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 7a3a29e5e9d2..af92b2576fe9 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -55,6 +55,7 @@ static u8 usb_bos_descriptor [] = { static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf, u16 wLength) { + struct xhci_port_cap *port_cap = NULL; int i, ssa_count; u32 temp; u16 desc_size, ssp_cap_size, ssa_size = 0; @@ -64,16 +65,24 @@ static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf, ssp_cap_size = sizeof(usb_bos_descriptor) - desc_size; /* does xhci support USB 3.1 Enhanced SuperSpeed */ - if (xhci->usb3_rhub.min_rev >= 0x01) { + for (i = 0; i < xhci->num_port_caps; i++) { + if (xhci->port_caps[i].maj_rev == 0x03 && + xhci->port_caps[i].min_rev >= 0x01) { + usb3_1 = true; + port_cap = &xhci->port_caps[i]; + break; + } + } + + if (usb3_1) { /* does xhci provide a PSI table for SSA speed attributes? */ - if (xhci->usb3_rhub.psi_count) { + if (port_cap->psi_count) { /* two SSA entries for each unique PSI ID, RX and TX */ - ssa_count = xhci->usb3_rhub.psi_uid_count * 2; + ssa_count = port_cap->psi_uid_count * 2; ssa_size = ssa_count * sizeof(u32); ssp_cap_size -= 16; /* skip copying the default SSA */ } desc_size += ssp_cap_size; - usb3_1 = true; } memcpy(buf, &usb_bos_descriptor, min(desc_size, wLength)); @@ -99,7 +108,7 @@ static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf, } /* If PSI table exists, add the custom speed attributes from it */ - if (usb3_1 && xhci->usb3_rhub.psi_count) { + if (usb3_1 && port_cap->psi_count) { u32 ssp_cap_base, bm_attrib, psi, psi_mant, psi_exp; int offset; @@ -111,7 +120,7 @@ static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf, /* attribute count SSAC bits 4:0 and ID count SSIC bits 8:5 */ bm_attrib = (ssa_count - 1) & 0x1f; - bm_attrib |= (xhci->usb3_rhub.psi_uid_count - 1) << 5; + bm_attrib |= (port_cap->psi_uid_count - 1) << 5; put_unaligned_le32(bm_attrib, &buf[ssp_cap_base + 4]); if (wLength < desc_size + ssa_size) @@ -124,8 +133,8 @@ static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf, * USB 3.1 requires two SSA entries (RX and TX) for every link */ offset = desc_size; - for (i = 0; i < xhci->usb3_rhub.psi_count; i++) { - psi = xhci->usb3_rhub.psi[i]; + for (i = 0; i < port_cap->psi_count; i++) { + psi = port_cap->psi[i]; psi &= ~USB_SSP_SUBLINK_SPEED_RSVD; psi_exp = XHCI_EXT_PORT_PSIE(psi); psi_mant = XHCI_EXT_PORT_PSIM(psi); diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 0e2701649369..884c601bfa15 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1915,17 +1915,17 @@ no_bw: xhci->usb3_rhub.num_ports = 0; xhci->num_active_eps = 0; kfree(xhci->usb2_rhub.ports); - kfree(xhci->usb2_rhub.psi); kfree(xhci->usb3_rhub.ports); - kfree(xhci->usb3_rhub.psi); kfree(xhci->hw_ports); kfree(xhci->rh_bw); kfree(xhci->ext_caps); + for (i = 0; i < xhci->num_port_caps; i++) + kfree(xhci->port_caps[i].psi); + kfree(xhci->port_caps); + xhci->num_port_caps = 0; xhci->usb2_rhub.ports = NULL; - xhci->usb2_rhub.psi = NULL; xhci->usb3_rhub.ports = NULL; - xhci->usb3_rhub.psi = NULL; xhci->hw_ports = NULL; xhci->rh_bw = NULL; xhci->ext_caps = NULL; @@ -2126,6 +2126,7 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, u8 major_revision, minor_revision; struct xhci_hub *rhub; struct device *dev = xhci_to_hcd(xhci)->self.sysdev; + struct xhci_port_cap *port_cap; temp = readl(addr); major_revision = XHCI_EXT_PORT_MAJOR(temp); @@ -2160,31 +2161,39 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, /* WTF? "Valid values are ‘1’ to MaxPorts" */ return; - rhub->psi_count = XHCI_EXT_PORT_PSIC(temp); - if (rhub->psi_count) { - rhub->psi = kcalloc_node(rhub->psi_count, sizeof(*rhub->psi), - GFP_KERNEL, dev_to_node(dev)); - if (!rhub->psi) - rhub->psi_count = 0; + port_cap = &xhci->port_caps[xhci->num_port_caps++]; + if (xhci->num_port_caps > max_caps) + return; + + port_cap->maj_rev = major_revision; + port_cap->min_rev = minor_revision; + port_cap->psi_count = XHCI_EXT_PORT_PSIC(temp); + + if (port_cap->psi_count) { + port_cap->psi = kcalloc_node(port_cap->psi_count, + sizeof(*port_cap->psi), + GFP_KERNEL, dev_to_node(dev)); + if (!port_cap->psi) + port_cap->psi_count = 0; - rhub->psi_uid_count++; - for (i = 0; i < rhub->psi_count; i++) { - rhub->psi[i] = readl(addr + 4 + i); + port_cap->psi_uid_count++; + for (i = 0; i < port_cap->psi_count; i++) { + port_cap->psi[i] = readl(addr + 4 + i); /* count unique ID values, two consecutive entries can * have the same ID if link is assymetric */ - if (i && (XHCI_EXT_PORT_PSIV(rhub->psi[i]) != - XHCI_EXT_PORT_PSIV(rhub->psi[i - 1]))) - rhub->psi_uid_count++; + if (i && (XHCI_EXT_PORT_PSIV(port_cap->psi[i]) != + XHCI_EXT_PORT_PSIV(port_cap->psi[i - 1]))) + port_cap->psi_uid_count++; xhci_dbg(xhci, "PSIV:%d PSIE:%d PLT:%d PFD:%d LP:%d PSIM:%d\n", - XHCI_EXT_PORT_PSIV(rhub->psi[i]), - XHCI_EXT_PORT_PSIE(rhub->psi[i]), - XHCI_EXT_PORT_PLT(rhub->psi[i]), - XHCI_EXT_PORT_PFD(rhub->psi[i]), - XHCI_EXT_PORT_LP(rhub->psi[i]), - XHCI_EXT_PORT_PSIM(rhub->psi[i])); + XHCI_EXT_PORT_PSIV(port_cap->psi[i]), + XHCI_EXT_PORT_PSIE(port_cap->psi[i]), + XHCI_EXT_PORT_PLT(port_cap->psi[i]), + XHCI_EXT_PORT_PFD(port_cap->psi[i]), + XHCI_EXT_PORT_LP(port_cap->psi[i]), + XHCI_EXT_PORT_PSIM(port_cap->psi[i])); } } /* cache usb2 port capabilities */ @@ -2219,6 +2228,7 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, continue; } hw_port->rhub = rhub; + hw_port->port_cap = port_cap; rhub->num_ports++; } /* FIXME: Should we disable ports not in the Extended Capabilities? */ @@ -2309,6 +2319,11 @@ static int xhci_setup_port_arrays(struct xhci_hcd *xhci, gfp_t flags) if (!xhci->ext_caps) return -ENOMEM; + xhci->port_caps = kcalloc_node(cap_count, sizeof(*xhci->port_caps), + flags, dev_to_node(dev)); + if (!xhci->port_caps) + return -ENOMEM; + offset = cap_start; while (offset) { diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 13d8838cd552..3ecee10fdcdc 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1702,12 +1702,20 @@ struct xhci_bus_state { * Intel Lynx Point LP xHCI host. */ #define XHCI_MAX_REXIT_TIMEOUT_MS 20 +struct xhci_port_cap { + u32 *psi; /* array of protocol speed ID entries */ + u8 psi_count; + u8 psi_uid_count; + u8 maj_rev; + u8 min_rev; +}; struct xhci_port { __le32 __iomem *addr; int hw_portnum; int hcd_portnum; struct xhci_hub *rhub; + struct xhci_port_cap *port_cap; }; struct xhci_hub { @@ -1719,9 +1727,6 @@ struct xhci_hub { /* supported prococol extended capabiliy values */ u8 maj_rev; u8 min_rev; - u32 *psi; /* array of protocol speed ID entries */ - u8 psi_count; - u8 psi_uid_count; }; /* There is one xhci_hcd structure per controller */ @@ -1880,6 +1885,9 @@ struct xhci_hcd { /* cached usb2 extened protocol capabilites */ u32 *ext_caps; unsigned int num_ext_caps; + /* cached extended protocol port capabilities */ + struct xhci_port_cap *port_caps; + unsigned int num_port_caps; /* Compliance Mode Recovery Data */ struct timer_list comp_mode_recovery_timer; u32 port_status_u0; -- cgit v1.2.3-59-g8ed1b From b692056db8ecc7f452b934f016c17348282b7699 Mon Sep 17 00:00:00 2001 From: Richard Dodd Date: Wed, 12 Feb 2020 14:22:18 +0000 Subject: USB: Fix novation SourceControl XL after suspend Currently, the SourceControl will stay in power-down mode after resuming from suspend. This patch resets the device after suspend to power it up. Signed-off-by: Richard Dodd Cc: stable Link: https://lore.kernel.org/r/20200212142220.36892-1-richard.o.dodd@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index f27468966a3d..2b24336a72e5 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -449,6 +449,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* INTEL VALUE SSD */ { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME }, + /* novation SoundControl XL */ + { USB_DEVICE(0x1235, 0x0061), .driver_info = USB_QUIRK_RESET_RESUME }, + { } /* terminating entry must be last */ }; -- cgit v1.2.3-59-g8ed1b From 461d8deb26a7d70254bc0391feb4fd8a95e674e8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 11 Feb 2020 20:04:21 -0800 Subject: USB: misc: iowarrior: add support for 2 OEMed devices Add support for two OEM devices that are identical to existing IO-Warrior devices, except for the USB device id. Cc: Christoph Jung Cc: stable Link: https://lore.kernel.org/r/20200212040422.2991-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index dce44fbf031f..990acbe14852 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -34,6 +34,10 @@ /* full speed iowarrior */ #define USB_DEVICE_ID_CODEMERCS_IOW56 0x1503 +/* OEMed devices */ +#define USB_DEVICE_ID_CODEMERCS_IOW24SAG 0x158a +#define USB_DEVICE_ID_CODEMERCS_IOW56AM 0x158b + /* Get a minor range for your devices from the usb maintainer */ #ifdef CONFIG_USB_DYNAMIC_MINORS #define IOWARRIOR_MINOR_BASE 0 @@ -133,6 +137,8 @@ static const struct usb_device_id iowarrior_ids[] = { {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOWPV1)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOWPV2)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW24SAG)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56AM)}, {} /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, iowarrior_ids); @@ -357,6 +363,7 @@ static ssize_t iowarrior_write(struct file *file, } switch (dev->product_id) { case USB_DEVICE_ID_CODEMERCS_IOW24: + case USB_DEVICE_ID_CODEMERCS_IOW24SAG: case USB_DEVICE_ID_CODEMERCS_IOWPV1: case USB_DEVICE_ID_CODEMERCS_IOWPV2: case USB_DEVICE_ID_CODEMERCS_IOW40: @@ -371,6 +378,7 @@ static ssize_t iowarrior_write(struct file *file, goto exit; break; case USB_DEVICE_ID_CODEMERCS_IOW56: + case USB_DEVICE_ID_CODEMERCS_IOW56AM: /* The IOW56 uses asynchronous IO and more urbs */ if (atomic_read(&dev->write_busy) == MAX_WRITES_IN_FLIGHT) { /* Wait until we are below the limit for submitted urbs */ @@ -493,6 +501,7 @@ static long iowarrior_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case IOW_WRITE: if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW24 || + dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW24SAG || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOWPV1 || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOWPV2 || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW40) { @@ -767,7 +776,8 @@ static int iowarrior_probe(struct usb_interface *interface, goto error; } - if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) { + if ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM)) { res = usb_find_last_int_out_endpoint(iface_desc, &dev->int_out_endpoint); if (res) { @@ -780,7 +790,8 @@ static int iowarrior_probe(struct usb_interface *interface, /* we have to check the report_size often, so remember it in the endianness suitable for our machine */ dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint); if ((dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) && - (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56)) + ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM))) /* IOWarrior56 has wMaxPacketSize different from report size */ dev->report_size = 7; -- cgit v1.2.3-59-g8ed1b From 5f6f8da2d7b5a431d3f391d0d73ace8edfb42af7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 11 Feb 2020 20:04:22 -0800 Subject: USB: misc: iowarrior: add support for the 28 and 28L devices Add new device ids for the 28 and 28L devices. These have 4 interfaces instead of 2, but the driver binds the same, so the driver changes are minimal. Cc: Christoph Jung Cc: stable Link: https://lore.kernel.org/r/20200212040422.2991-2-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 990acbe14852..d20b60acfe8a 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -33,6 +33,9 @@ #define USB_DEVICE_ID_CODEMERCS_IOWPV2 0x1512 /* full speed iowarrior */ #define USB_DEVICE_ID_CODEMERCS_IOW56 0x1503 +/* fuller speed iowarrior */ +#define USB_DEVICE_ID_CODEMERCS_IOW28 0x1504 +#define USB_DEVICE_ID_CODEMERCS_IOW28L 0x1505 /* OEMed devices */ #define USB_DEVICE_ID_CODEMERCS_IOW24SAG 0x158a @@ -139,6 +142,8 @@ static const struct usb_device_id iowarrior_ids[] = { {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW24SAG)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56AM)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28L)}, {} /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, iowarrior_ids); @@ -379,6 +384,8 @@ static ssize_t iowarrior_write(struct file *file, break; case USB_DEVICE_ID_CODEMERCS_IOW56: case USB_DEVICE_ID_CODEMERCS_IOW56AM: + case USB_DEVICE_ID_CODEMERCS_IOW28: + case USB_DEVICE_ID_CODEMERCS_IOW28L: /* The IOW56 uses asynchronous IO and more urbs */ if (atomic_read(&dev->write_busy) == MAX_WRITES_IN_FLIGHT) { /* Wait until we are below the limit for submitted urbs */ @@ -777,7 +784,9 @@ static int iowarrior_probe(struct usb_interface *interface, } if ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || - (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM)) { + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L)) { res = usb_find_last_int_out_endpoint(iface_desc, &dev->int_out_endpoint); if (res) { @@ -791,7 +800,9 @@ static int iowarrior_probe(struct usb_interface *interface, dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint); if ((dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) && ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || - (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM))) + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L))) /* IOWarrior56 has wMaxPacketSize different from report size */ dev->report_size = 7; -- cgit v1.2.3-59-g8ed1b From 1880b1f1d686b17387b5bf45654eb1d087ead918 Mon Sep 17 00:00:00 2001 From: Ravulapati Vishnu vardhan rao Date: Tue, 11 Feb 2020 18:42:28 +0530 Subject: ASoC: amd: Buffer Size instead of MAX Buffer Because of MAX BUFFER size in register,when user/app give small buffer size produces noise of old data in buffer. This patch rectifies this noise when using different buffer sizes less than MAX BUFFER. Signed-off-by: Ravulapati Vishnu vardhan rao Link: https://lore.kernel.org/r/1581426768-8937-1-git-send-email-Vishnuvardhanrao.Ravulapati@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/raven/acp3x-i2s.c | 8 ++++++++ sound/soc/amd/raven/acp3x-pcm-dma.c | 7 +------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/sound/soc/amd/raven/acp3x-i2s.c b/sound/soc/amd/raven/acp3x-i2s.c index 31cd4008e33f..91a388184e52 100644 --- a/sound/soc/amd/raven/acp3x-i2s.c +++ b/sound/soc/amd/raven/acp3x-i2s.c @@ -170,6 +170,7 @@ static int acp3x_i2s_trigger(struct snd_pcm_substream *substream, struct snd_soc_card *card; struct acp3x_platform_info *pinfo; u32 ret, val, period_bytes, reg_val, ier_val, water_val; + u32 buf_size, buf_reg; prtd = substream->private_data; rtd = substream->runtime->private_data; @@ -183,6 +184,8 @@ static int acp3x_i2s_trigger(struct snd_pcm_substream *substream, } period_bytes = frames_to_bytes(substream->runtime, substream->runtime->period_size); + buf_size = frames_to_bytes(substream->runtime, + substream->runtime->buffer_size); switch (cmd) { case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: @@ -196,6 +199,7 @@ static int acp3x_i2s_trigger(struct snd_pcm_substream *substream, mmACP_BT_TX_INTR_WATERMARK_SIZE; reg_val = mmACP_BTTDM_ITER; ier_val = mmACP_BTTDM_IER; + buf_reg = mmACP_BT_TX_RINGBUFSIZE; break; case I2S_SP_INSTANCE: default: @@ -203,6 +207,7 @@ static int acp3x_i2s_trigger(struct snd_pcm_substream *substream, mmACP_I2S_TX_INTR_WATERMARK_SIZE; reg_val = mmACP_I2STDM_ITER; ier_val = mmACP_I2STDM_IER; + buf_reg = mmACP_I2S_TX_RINGBUFSIZE; } } else { switch (rtd->i2s_instance) { @@ -211,6 +216,7 @@ static int acp3x_i2s_trigger(struct snd_pcm_substream *substream, mmACP_BT_RX_INTR_WATERMARK_SIZE; reg_val = mmACP_BTTDM_IRER; ier_val = mmACP_BTTDM_IER; + buf_reg = mmACP_BT_RX_RINGBUFSIZE; break; case I2S_SP_INSTANCE: default: @@ -218,9 +224,11 @@ static int acp3x_i2s_trigger(struct snd_pcm_substream *substream, mmACP_I2S_RX_INTR_WATERMARK_SIZE; reg_val = mmACP_I2STDM_IRER; ier_val = mmACP_I2STDM_IER; + buf_reg = mmACP_I2S_RX_RINGBUFSIZE; } } rv_writel(period_bytes, rtd->acp3x_base + water_val); + rv_writel(buf_size, rtd->acp3x_base + buf_reg); val = rv_readl(rtd->acp3x_base + reg_val); val = val | BIT(0); rv_writel(val, rtd->acp3x_base + reg_val); diff --git a/sound/soc/amd/raven/acp3x-pcm-dma.c b/sound/soc/amd/raven/acp3x-pcm-dma.c index aecc3c061679..d62c0d90c41e 100644 --- a/sound/soc/amd/raven/acp3x-pcm-dma.c +++ b/sound/soc/amd/raven/acp3x-pcm-dma.c @@ -110,7 +110,7 @@ static void config_acp3x_dma(struct i2s_stream_instance *rtd, int direction) { u16 page_idx; u32 low, high, val, acp_fifo_addr, reg_fifo_addr; - u32 reg_ringbuf_size, reg_dma_size, reg_fifo_size; + u32 reg_dma_size, reg_fifo_size; dma_addr_t addr; addr = rtd->dma_addr; @@ -157,7 +157,6 @@ static void config_acp3x_dma(struct i2s_stream_instance *rtd, int direction) if (direction == SNDRV_PCM_STREAM_PLAYBACK) { switch (rtd->i2s_instance) { case I2S_BT_INSTANCE: - reg_ringbuf_size = mmACP_BT_TX_RINGBUFSIZE; reg_dma_size = mmACP_BT_TX_DMA_SIZE; acp_fifo_addr = ACP_SRAM_PTE_OFFSET + BT_PB_FIFO_ADDR_OFFSET; @@ -169,7 +168,6 @@ static void config_acp3x_dma(struct i2s_stream_instance *rtd, int direction) case I2S_SP_INSTANCE: default: - reg_ringbuf_size = mmACP_I2S_TX_RINGBUFSIZE; reg_dma_size = mmACP_I2S_TX_DMA_SIZE; acp_fifo_addr = ACP_SRAM_PTE_OFFSET + SP_PB_FIFO_ADDR_OFFSET; @@ -181,7 +179,6 @@ static void config_acp3x_dma(struct i2s_stream_instance *rtd, int direction) } else { switch (rtd->i2s_instance) { case I2S_BT_INSTANCE: - reg_ringbuf_size = mmACP_BT_RX_RINGBUFSIZE; reg_dma_size = mmACP_BT_RX_DMA_SIZE; acp_fifo_addr = ACP_SRAM_PTE_OFFSET + BT_CAPT_FIFO_ADDR_OFFSET; @@ -193,7 +190,6 @@ static void config_acp3x_dma(struct i2s_stream_instance *rtd, int direction) case I2S_SP_INSTANCE: default: - reg_ringbuf_size = mmACP_I2S_RX_RINGBUFSIZE; reg_dma_size = mmACP_I2S_RX_DMA_SIZE; acp_fifo_addr = ACP_SRAM_PTE_OFFSET + SP_CAPT_FIFO_ADDR_OFFSET; @@ -203,7 +199,6 @@ static void config_acp3x_dma(struct i2s_stream_instance *rtd, int direction) rtd->acp3x_base + mmACP_I2S_RX_RINGBUFADDR); } } - rv_writel(MAX_BUFFER, rtd->acp3x_base + reg_ringbuf_size); rv_writel(DMA_SIZE, rtd->acp3x_base + reg_dma_size); rv_writel(acp_fifo_addr, rtd->acp3x_base + reg_fifo_addr); rv_writel(FIFO_SIZE, rtd->acp3x_base + reg_fifo_size); -- cgit v1.2.3-59-g8ed1b From 57d7713196ccd83010fcaa82b9f02d740c9e6bb2 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Wed, 12 Feb 2020 11:59:47 +0100 Subject: usb: gadget: udc-xilinx: Fix xudc_stop() kernel-doc format The patch removes "driver" parameter which has been removed without updating kernel-doc format. Fixes: 22835b807e7c ("usb: gadget: remove unnecessary 'driver' argument") Signed-off-by: Michal Simek Link: https://lore.kernel.org/r/c753b529bdcdfdd40a3cf69121527ec8c63775cb.1581505183.git.michal.simek@xilinx.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/udc-xilinx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/gadget/udc/udc-xilinx.c b/drivers/usb/gadget/udc/udc-xilinx.c index 29d8e5f8bb58..b1cfc8279c3d 100644 --- a/drivers/usb/gadget/udc/udc-xilinx.c +++ b/drivers/usb/gadget/udc/udc-xilinx.c @@ -1399,7 +1399,6 @@ err: /** * xudc_stop - stops the device. * @gadget: pointer to the usb gadget structure - * @driver: pointer to usb gadget driver structure * * Return: zero always */ -- cgit v1.2.3-59-g8ed1b From ead68df94d248c80fdbae220ae5425eb5af2e753 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 12 Feb 2020 20:15:29 +0100 Subject: KVM: x86: enable -Werror Avoid more embarrassing mistakes. At least those that the compiler can catch. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index b19ef421084d..4654e97a05cc 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 ccflags-y += -Iarch/x86/kvm +ccflags-y += -Werror KVM := ../../../virt/kvm -- cgit v1.2.3-59-g8ed1b From fe154a2422333c4cd87bb0d8a19fb2df066cc6ee Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 3 Feb 2020 15:27:24 +0000 Subject: drm/panfrost: Remove set but not used variable 'bo' Fixes gcc '-Wunused-but-set-variable' warning: drivers/gpu/drm/panfrost/panfrost_job.c: In function 'panfrost_job_cleanup': drivers/gpu/drm/panfrost/panfrost_job.c:278:31: warning: variable 'bo' set but not used [-Wunused-but-set-variable] commit bdefca2d8dc0 ("drm/panfrost: Add the panfrost_gem_mapping concept") involved this unused variable. Reported-by: Hulk Robot Signed-off-by: YueHaibing Reviewed-by: Steven Price Reviewed-by: Alyssas Rosenzweig Signed-off-by: Rob Herring Link: https://patchwork.freedesktop.org/patch/msgid/20200203152724.42611-1-yuehaibing@huawei.com --- drivers/gpu/drm/panfrost/panfrost_job.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index 7157dfd7dea3..9a1a72a748e7 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -280,12 +280,8 @@ static void panfrost_job_cleanup(struct kref *ref) } if (job->bos) { - struct panfrost_gem_object *bo; - - for (i = 0; i < job->bo_count; i++) { - bo = to_panfrost_bo(job->bos[i]); + for (i = 0; i < job->bo_count; i++) drm_gem_object_put_unlocked(job->bos[i]); - } kvfree(job->bos); } -- cgit v1.2.3-59-g8ed1b From 6cd1ed50efd88261298577cd92a14f2768eddeeb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Feb 2020 11:07:21 -0800 Subject: vt: vt_ioctl: fix race in VT_RESIZEX We need to make sure vc_cons[i].d is not NULL after grabbing console_lock(), or risk a crash. general protection fault, probably for non-canonical address 0xdffffc0000000068: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000340-0x0000000000000347] CPU: 1 PID: 19462 Comm: syz-executor.5 Not tainted 5.5.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:vt_ioctl+0x1f96/0x26d0 drivers/tty/vt/vt_ioctl.c:883 Code: 74 41 e8 bd a6 84 fd 48 89 d8 48 c1 e8 03 42 80 3c 28 00 0f 85 e4 04 00 00 48 8b 03 48 8d b8 40 03 00 00 48 89 fa 48 c1 ea 03 <42> 0f b6 14 2a 84 d2 74 09 80 fa 03 0f 8e b1 05 00 00 44 89 b8 40 RSP: 0018:ffffc900086d7bb0 EFLAGS: 00010202 RAX: 0000000000000000 RBX: ffffffff8c34ee88 RCX: ffffc9001415c000 RDX: 0000000000000068 RSI: ffffffff83f0e6e3 RDI: 0000000000000340 RBP: ffffc900086d7cd0 R08: ffff888054ce0100 R09: fffffbfff16a2f6d R10: ffff888054ce0998 R11: ffff888054ce0100 R12: 000000000000001d R13: dffffc0000000000 R14: 1ffff920010daf79 R15: 000000000000ff7f FS: 00007f7d13c12700(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffd477e3c38 CR3: 0000000095d0a000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tty_ioctl+0xa37/0x14f0 drivers/tty/tty_io.c:2660 vfs_ioctl fs/ioctl.c:47 [inline] ksys_ioctl+0x123/0x180 fs/ioctl.c:763 __do_sys_ioctl fs/ioctl.c:772 [inline] __se_sys_ioctl fs/ioctl.c:770 [inline] __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:770 do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x45b399 Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f7d13c11c78 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007f7d13c126d4 RCX: 000000000045b399 RDX: 0000000020000080 RSI: 000000000000560a RDI: 0000000000000003 RBP: 000000000075bf20 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 0000000000000666 R14: 00000000004c7f04 R15: 000000000075bf2c Modules linked in: ---[ end trace 80970faf7a67eb77 ]--- RIP: 0010:vt_ioctl+0x1f96/0x26d0 drivers/tty/vt/vt_ioctl.c:883 Code: 74 41 e8 bd a6 84 fd 48 89 d8 48 c1 e8 03 42 80 3c 28 00 0f 85 e4 04 00 00 48 8b 03 48 8d b8 40 03 00 00 48 89 fa 48 c1 ea 03 <42> 0f b6 14 2a 84 d2 74 09 80 fa 03 0f 8e b1 05 00 00 44 89 b8 40 RSP: 0018:ffffc900086d7bb0 EFLAGS: 00010202 RAX: 0000000000000000 RBX: ffffffff8c34ee88 RCX: ffffc9001415c000 RDX: 0000000000000068 RSI: ffffffff83f0e6e3 RDI: 0000000000000340 RBP: ffffc900086d7cd0 R08: ffff888054ce0100 R09: fffffbfff16a2f6d R10: ffff888054ce0998 R11: ffff888054ce0100 R12: 000000000000001d R13: dffffc0000000000 R14: 1ffff920010daf79 R15: 000000000000ff7f FS: 00007f7d13c12700(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffd477e3c38 CR3: 0000000095d0a000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Cc: stable Reported-by: syzbot Link: https://lore.kernel.org/r/20200210190721.200418-1-edumazet@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt_ioctl.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 8b0ed139592f..ee6c91ef1f6c 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -876,15 +876,20 @@ int vt_ioctl(struct tty_struct *tty, return -EINVAL; for (i = 0; i < MAX_NR_CONSOLES; i++) { + struct vc_data *vcp; + if (!vc_cons[i].d) continue; console_lock(); - if (v.v_vlin) - vc_cons[i].d->vc_scan_lines = v.v_vlin; - if (v.v_clin) - vc_cons[i].d->vc_font.height = v.v_clin; - vc_cons[i].d->vc_resize_user = 1; - vc_resize(vc_cons[i].d, v.v_cols, v.v_rows); + vcp = vc_cons[i].d; + if (vcp) { + if (v.v_vlin) + vcp->vc_scan_lines = v.v_vlin; + if (v.v_clin) + vcp->vc_font.height = v.v_clin; + vcp->vc_resize_user = 1; + vc_resize(vcp, v.v_cols, v.v_rows); + } console_unlock(); } break; -- cgit v1.2.3-59-g8ed1b From f76707831829530ffdd3888bebc108aecefccaa0 Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Tue, 11 Feb 2020 14:16:01 +0800 Subject: tty: serial: imx: setup the correct sg entry for tx dma MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There has oops as below happen on i.MX8MP EVK platform that has 6G bytes DDR memory. when (xmit->tail < xmit->head) && (xmit->head == 0), it setups one sg entry with sg->length is zero: sg_set_buf(sgl + 1, xmit->buf, xmit->head); if xmit->buf is allocated from >4G address space, and SDMA only support <4G address space, then dma_map_sg() will call swiotlb_map() to do bounce buffer copying and mapping. But swiotlb_map() don't allow sg entry's length is zero, otherwise report BUG_ON(). So the patch is to correct the tx DMA scatter list. Oops: [ 287.675715] kernel BUG at kernel/dma/swiotlb.c:497! [ 287.680592] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [ 287.686075] Modules linked in: [ 287.689133] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.4.3-00016-g3fdc4e0-dirty #10 [ 287.696872] Hardware name: FSL i.MX8MP EVK (DT) [ 287.701402] pstate: 80000085 (Nzcv daIf -PAN -UAO) [ 287.706199] pc : swiotlb_tbl_map_single+0x1fc/0x310 [ 287.711076] lr : swiotlb_map+0x60/0x148 [ 287.714909] sp : ffff800010003c00 [ 287.718221] x29: ffff800010003c00 x28: 0000000000000000 [ 287.723533] x27: 0000000000000040 x26: ffff800011ae0000 [ 287.728844] x25: ffff800011ae09f8 x24: 0000000000000000 [ 287.734155] x23: 00000001b7af9000 x22: 0000000000000000 [ 287.739465] x21: ffff000176409c10 x20: 00000000001f7ffe [ 287.744776] x19: ffff000176409c10 x18: 000000000000002e [ 287.750087] x17: 0000000000000000 x16: 0000000000000000 [ 287.755397] x15: 0000000000000000 x14: 0000000000000000 [ 287.760707] x13: ffff00017f334000 x12: 0000000000000001 [ 287.766018] x11: 00000000001fffff x10: 0000000000000000 [ 287.771328] x9 : 0000000000000003 x8 : 0000000000000000 [ 287.776638] x7 : 0000000000000000 x6 : 0000000000000000 [ 287.781949] x5 : 0000000000200000 x4 : 0000000000000000 [ 287.787259] x3 : 0000000000000001 x2 : 00000001b7af9000 [ 287.792570] x1 : 00000000fbfff000 x0 : 0000000000000000 [ 287.797881] Call trace: [ 287.800328] swiotlb_tbl_map_single+0x1fc/0x310 [ 287.804859] swiotlb_map+0x60/0x148 [ 287.808347] dma_direct_map_page+0xf0/0x130 [ 287.812530] dma_direct_map_sg+0x78/0xe0 [ 287.816453] imx_uart_dma_tx+0x134/0x2f8 [ 287.820374] imx_uart_dma_tx_callback+0xd8/0x168 [ 287.824992] vchan_complete+0x194/0x200 [ 287.828828] tasklet_action_common.isra.0+0x154/0x1a0 [ 287.833879] tasklet_action+0x24/0x30 [ 287.837540] __do_softirq+0x120/0x23c [ 287.841202] irq_exit+0xb8/0xd8 [ 287.844343] __handle_domain_irq+0x64/0xb8 [ 287.848438] gic_handle_irq+0x5c/0x148 [ 287.852185] el1_irq+0xb8/0x180 [ 287.855327] cpuidle_enter_state+0x84/0x360 [ 287.859508] cpuidle_enter+0x34/0x48 [ 287.863083] call_cpuidle+0x18/0x38 [ 287.866571] do_idle+0x1e0/0x280 [ 287.869798] cpu_startup_entry+0x20/0x40 [ 287.873721] rest_init+0xd4/0xe0 [ 287.876949] arch_call_rest_init+0xc/0x14 [ 287.880958] start_kernel+0x420/0x44c [ 287.884622] Code: 9124c021 9417aff8 a94363f7 17ffffd5 (d4210000) [ 287.890718] ---[ end trace 5bc44c4ab6b009ce ]--- [ 287.895334] Kernel panic - not syncing: Fatal exception in interrupt [ 287.901686] SMP: stopping secondary CPUs [ 288.905607] SMP: failed to stop secondary CPUs 0-1 [ 288.910395] Kernel Offset: disabled [ 288.913882] CPU features: 0x0002,2000200c [ 288.917888] Memory Limit: none [ 288.920944] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- Reported-by: Eagle Zhou Tested-by: Eagle Zhou Signed-off-by: Fugang Duan Cc: stable Fixes: 7942f8577f2a ("serial: imx: TX DMA: clean up sg initialization") Reviewed-by: Uwe Kleine-König Link: https://lore.kernel.org/r/1581401761-6378-1-git-send-email-fugang.duan@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 0c6c63166250..d337782b3648 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -599,7 +599,7 @@ static void imx_uart_dma_tx(struct imx_port *sport) sport->tx_bytes = uart_circ_chars_pending(xmit); - if (xmit->tail < xmit->head) { + if (xmit->tail < xmit->head || xmit->head == 0) { sport->dma_tx_nents = 1; sg_init_one(sgl, xmit->buf + xmit->tail, sport->tx_bytes); } else { -- cgit v1.2.3-59-g8ed1b From 7febbcbc48fc92e3f33863b32ed715ba4aff18c4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 11 Feb 2020 15:55:59 +0200 Subject: serial: 8250: Check UPF_IRQ_SHARED in advance The commit 54e53b2e8081 ("tty: serial: 8250: pass IRQ shared flag to UART ports") nicely explained the problem: ---8<---8<--- On some systems IRQ lines between multiple UARTs might be shared. If so, the irqflags have to be configured accordingly. The reason is: The 8250 port startup code performs IRQ tests *before* the IRQ handler for that particular port is registered. This is performed in serial8250_do_startup(). This function checks whether IRQF_SHARED is configured and only then disables the IRQ line while testing. This test is performed upon each open() of the UART device. Imagine two UARTs share the same IRQ line: On is already opened and the IRQ is active. When the second UART is opened, the IRQ line has to be disabled while performing IRQ tests. Otherwise an IRQ might handler might be invoked, but the IRQ itself cannot be handled, because the corresponding handler isn't registered, yet. That's because the 8250 code uses a chain-handler and invokes the corresponding port's IRQ handling routines himself. Unfortunately this IRQF_SHARED flag isn't configured for UARTs probed via device tree even if the IRQs are shared. This way, the actual and shared IRQ line isn't disabled while performing tests and the kernel correctly detects a spurious IRQ. So, adding this flag to the DT probe solves the issue. Note: The UPF_SHARE_IRQ flag is configured unconditionally. Therefore, the IRQF_SHARED flag can be set unconditionally as well. Example stack trace by performing `echo 1 > /dev/ttyS2` on a non-patched system: |irq 85: nobody cared (try booting with the "irqpoll" option) | [...] |handlers: |[] irq_default_primary_handler threaded [] serial8250_interrupt |Disabling IRQ #85 ---8<---8<--- But unfortunately didn't fix the root cause. Let's try again here by moving IRQ flag assignment from serial_link_irq_chain() to serial8250_do_startup(). This should fix the similar issue reported for 8250_pnp case. Since this change we don't need to have custom solutions in 8250_aspeed_vuart and 8250_of drivers, thus, drop them. Fixes: 1c2f04937b3e ("serial: 8250: add IRQ trigger support") Reported-by: Li RongQing Cc: Kurt Kanzenbach Cc: Vikram Pandita Signed-off-by: Andy Shevchenko Cc: stable Acked-by: Kurt Kanzenbach Link: https://lore.kernel.org/r/20200211135559.85960-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_aspeed_vuart.c | 1 - drivers/tty/serial/8250/8250_core.c | 5 ++--- drivers/tty/serial/8250/8250_of.c | 1 - drivers/tty/serial/8250/8250_port.c | 4 ++++ 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c index d657aa14c3e4..c33e02cbde93 100644 --- a/drivers/tty/serial/8250/8250_aspeed_vuart.c +++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c @@ -446,7 +446,6 @@ static int aspeed_vuart_probe(struct platform_device *pdev) port.port.line = rc; port.port.irq = irq_of_parse_and_map(np, 0); - port.port.irqflags = IRQF_SHARED; port.port.handle_irq = aspeed_vuart_handle_irq; port.port.iotype = UPIO_MEM; port.port.type = PORT_16550A; diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 0894a22fd702..f2a33c9082a6 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -174,7 +174,7 @@ static int serial_link_irq_chain(struct uart_8250_port *up) struct hlist_head *h; struct hlist_node *n; struct irq_info *i; - int ret, irq_flags = up->port.flags & UPF_SHARE_IRQ ? IRQF_SHARED : 0; + int ret; mutex_lock(&hash_mutex); @@ -209,9 +209,8 @@ static int serial_link_irq_chain(struct uart_8250_port *up) INIT_LIST_HEAD(&up->list); i->head = &up->list; spin_unlock_irq(&i->lock); - irq_flags |= up->port.irqflags; ret = request_irq(up->port.irq, serial8250_interrupt, - irq_flags, up->port.name, i); + up->port.irqflags, up->port.name, i); if (ret < 0) serial_do_unlink(i, up); } diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c index 531ad67395e0..f6687756ec5e 100644 --- a/drivers/tty/serial/8250/8250_of.c +++ b/drivers/tty/serial/8250/8250_of.c @@ -202,7 +202,6 @@ static int of_platform_serial_setup(struct platform_device *ofdev, port->type = type; port->uartclk = clk; - port->irqflags |= IRQF_SHARED; if (of_property_read_bool(np, "no-loopback-test")) port->flags |= UPF_SKIP_TEST; diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 430e3467aff7..0325f2e53b74 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -2177,6 +2177,10 @@ int serial8250_do_startup(struct uart_port *port) } } + /* Check if we need to have shared IRQs */ + if (port->irq && (up->port.flags & UPF_SHARE_IRQ)) + up->port.irqflags |= IRQF_SHARED; + if (port->irq && !(up->port.flags & UPF_NO_THRE_TEST)) { unsigned char iir1; /* -- cgit v1.2.3-59-g8ed1b From 679aac5ead2f18d223554a52b543e1195e181811 Mon Sep 17 00:00:00 2001 From: satya priya Date: Tue, 11 Feb 2020 15:43:02 +0530 Subject: tty: serial: qcom_geni_serial: Fix RX cancel command failure RX cancel command fails when BT is switched on and off multiple times. To handle this, poll for the cancel bit in SE_GENI_S_IRQ_STATUS register instead of SE_GENI_S_CMD_CTRL_REG. As per the HPG update, handle the RX last bit after cancel command and flush out the RX FIFO buffer. Signed-off-by: satya priya Cc: stable Link: https://lore.kernel.org/r/1581415982-8793-1-git-send-email-skakit@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/qcom_geni_serial.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index 191abb18fc2a..0bd1684cabb3 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -129,6 +129,7 @@ static int handle_rx_console(struct uart_port *uport, u32 bytes, bool drop); static int handle_rx_uart(struct uart_port *uport, u32 bytes, bool drop); static unsigned int qcom_geni_serial_tx_empty(struct uart_port *port); static void qcom_geni_serial_stop_rx(struct uart_port *uport); +static void qcom_geni_serial_handle_rx(struct uart_port *uport, bool drop); static const unsigned long root_freq[] = {7372800, 14745600, 19200000, 29491200, 32000000, 48000000, 64000000, 80000000, @@ -599,7 +600,7 @@ static void qcom_geni_serial_stop_rx(struct uart_port *uport) u32 irq_en; u32 status; struct qcom_geni_serial_port *port = to_dev_port(uport, uport); - u32 irq_clear = S_CMD_DONE_EN; + u32 s_irq_status; irq_en = readl(uport->membase + SE_GENI_S_IRQ_EN); irq_en &= ~(S_RX_FIFO_WATERMARK_EN | S_RX_FIFO_LAST_EN); @@ -615,10 +616,19 @@ static void qcom_geni_serial_stop_rx(struct uart_port *uport) return; geni_se_cancel_s_cmd(&port->se); - qcom_geni_serial_poll_bit(uport, SE_GENI_S_CMD_CTRL_REG, - S_GENI_CMD_CANCEL, false); + qcom_geni_serial_poll_bit(uport, SE_GENI_S_IRQ_STATUS, + S_CMD_CANCEL_EN, true); + /* + * If timeout occurs secondary engine remains active + * and Abort sequence is executed. + */ + s_irq_status = readl(uport->membase + SE_GENI_S_IRQ_STATUS); + /* Flush the Rx buffer */ + if (s_irq_status & S_RX_FIFO_LAST_EN) + qcom_geni_serial_handle_rx(uport, true); + writel(s_irq_status, uport->membase + SE_GENI_S_IRQ_CLEAR); + status = readl(uport->membase + SE_GENI_STATUS); - writel(irq_clear, uport->membase + SE_GENI_S_IRQ_CLEAR); if (status & S_GENI_CMD_ACTIVE) qcom_geni_serial_abort_rx(uport); } -- cgit v1.2.3-59-g8ed1b From dde2bb2da01e96c17f0a44b4a3cf72a30e66e3ef Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 6 Feb 2020 15:13:27 +0100 Subject: drm/panfrost: perfcnt: Reserve/use the AS attached to the perfcnt MMU context We need to use the AS attached to the opened FD when dumping counters. Reported-by: Antonio Caggiano Fixes: 7282f7645d06 ("drm/panfrost: Implement per FD address spaces") Cc: Signed-off-by: Boris Brezillon Reviewed-by: Steven Price Tested-by: Antonio Caggiano Signed-off-by: Rob Herring Link: https://patchwork.freedesktop.org/patch/msgid/20200206141327.446127-1-boris.brezillon@collabora.com --- drivers/gpu/drm/panfrost/panfrost_mmu.c | 7 ++++++- drivers/gpu/drm/panfrost/panfrost_perfcnt.c | 11 ++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index 763cfca886a7..3107b0738e40 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -151,7 +151,12 @@ u32 panfrost_mmu_as_get(struct panfrost_device *pfdev, struct panfrost_mmu *mmu) as = mmu->as; if (as >= 0) { int en = atomic_inc_return(&mmu->as_count); - WARN_ON(en >= NUM_JOB_SLOTS); + + /* + * AS can be retained by active jobs or a perfcnt context, + * hence the '+ 1' here. + */ + WARN_ON(en >= (NUM_JOB_SLOTS + 1)); list_move(&mmu->list, &pfdev->as_lru_list); goto out; diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c index 684820448be3..6913578d5aa7 100644 --- a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c +++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c @@ -73,7 +73,7 @@ static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev, struct panfrost_file_priv *user = file_priv->driver_priv; struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; struct drm_gem_shmem_object *bo; - u32 cfg; + u32 cfg, as; int ret; if (user == perfcnt->user) @@ -126,12 +126,8 @@ static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev, perfcnt->user = user; - /* - * Always use address space 0 for now. - * FIXME: this needs to be updated when we start using different - * address space. - */ - cfg = GPU_PERFCNT_CFG_AS(0) | + as = panfrost_mmu_as_get(pfdev, perfcnt->mapping->mmu); + cfg = GPU_PERFCNT_CFG_AS(as) | GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_MANUAL); /* @@ -195,6 +191,7 @@ static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev, drm_gem_shmem_vunmap(&perfcnt->mapping->obj->base.base, perfcnt->buf); perfcnt->buf = NULL; panfrost_gem_close(&perfcnt->mapping->obj->base.base, file_priv); + panfrost_mmu_as_put(pfdev, perfcnt->mapping->mmu); panfrost_gem_mapping_put(perfcnt->mapping); perfcnt->mapping = NULL; pm_runtime_mark_last_busy(pfdev->dev); -- cgit v1.2.3-59-g8ed1b From c14335ebb92a98646ddbf447e6cacc66de5269ad Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sun, 9 Feb 2020 21:12:02 -0800 Subject: scsi: Revert "target/core: Inline transport_lun_remove_cmd()" Commit 83f85b8ec305 postponed the percpu_ref_put(&se_cmd->se_lun->lun_ref) call from command completion to the time when the final command reference is dropped. That approach is not compatible with the iSCSI target driver because the iSCSI target driver keeps the command with the highest stat_sn after it has completed until the next command is received (see also iscsit_ack_from_expstatsn()). Fix this regression by reverting commit 83f85b8ec305. Fixes: 83f85b8ec305 ("scsi: target/core: Inline transport_lun_remove_cmd()") Cc: Pavel Zakharov Cc: Mike Christie Cc: Link: https://lore.kernel.org/r/20200210051202.12934-1-bvanassche@acm.org Reported-by: Pavel Zakharov Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/target/target_core_transport.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index ea482d4b1f00..0ae9e60fc4d5 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -666,6 +666,11 @@ static int transport_cmd_check_stop_to_fabric(struct se_cmd *cmd) target_remove_from_state_list(cmd); + /* + * Clear struct se_cmd->se_lun before the handoff to FE. + */ + cmd->se_lun = NULL; + spin_lock_irqsave(&cmd->t_state_lock, flags); /* * Determine if frontend context caller is requesting the stopping of @@ -693,6 +698,17 @@ static int transport_cmd_check_stop_to_fabric(struct se_cmd *cmd) return cmd->se_tfo->check_stop_free(cmd); } +static void transport_lun_remove_cmd(struct se_cmd *cmd) +{ + struct se_lun *lun = cmd->se_lun; + + if (!lun) + return; + + if (cmpxchg(&cmd->lun_ref_active, true, false)) + percpu_ref_put(&lun->lun_ref); +} + static void target_complete_failure_work(struct work_struct *work) { struct se_cmd *cmd = container_of(work, struct se_cmd, work); @@ -783,6 +799,8 @@ static void target_handle_abort(struct se_cmd *cmd) WARN_ON_ONCE(kref_read(&cmd->cmd_kref) == 0); + transport_lun_remove_cmd(cmd); + transport_cmd_check_stop_to_fabric(cmd); } @@ -1708,6 +1726,7 @@ static void target_complete_tmr_failure(struct work_struct *work) se_cmd->se_tmr_req->response = TMR_LUN_DOES_NOT_EXIST; se_cmd->se_tfo->queue_tm_rsp(se_cmd); + transport_lun_remove_cmd(se_cmd); transport_cmd_check_stop_to_fabric(se_cmd); } @@ -1898,6 +1917,7 @@ void transport_generic_request_failure(struct se_cmd *cmd, goto queue_full; check_stop: + transport_lun_remove_cmd(cmd); transport_cmd_check_stop_to_fabric(cmd); return; @@ -2195,6 +2215,7 @@ queue_status: transport_handle_queue_full(cmd, cmd->se_dev, ret, false); return; } + transport_lun_remove_cmd(cmd); transport_cmd_check_stop_to_fabric(cmd); } @@ -2289,6 +2310,7 @@ static void target_complete_ok_work(struct work_struct *work) if (ret) goto queue_full; + transport_lun_remove_cmd(cmd); transport_cmd_check_stop_to_fabric(cmd); return; } @@ -2314,6 +2336,7 @@ static void target_complete_ok_work(struct work_struct *work) if (ret) goto queue_full; + transport_lun_remove_cmd(cmd); transport_cmd_check_stop_to_fabric(cmd); return; } @@ -2349,6 +2372,7 @@ queue_rsp: if (ret) goto queue_full; + transport_lun_remove_cmd(cmd); transport_cmd_check_stop_to_fabric(cmd); return; } @@ -2384,6 +2408,7 @@ queue_status: break; } + transport_lun_remove_cmd(cmd); transport_cmd_check_stop_to_fabric(cmd); return; @@ -2710,6 +2735,9 @@ int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) */ if (cmd->state_active) target_remove_from_state_list(cmd); + + if (cmd->se_lun) + transport_lun_remove_cmd(cmd); } if (aborted) cmd->free_compl = &compl; @@ -2781,9 +2809,6 @@ static void target_release_cmd_kref(struct kref *kref) struct completion *abrt_compl = se_cmd->abrt_compl; unsigned long flags; - if (se_cmd->lun_ref_active) - percpu_ref_put(&se_cmd->se_lun->lun_ref); - if (se_sess) { spin_lock_irqsave(&se_sess->sess_cmd_lock, flags); list_del_init(&se_cmd->se_cmd_list); -- cgit v1.2.3-59-g8ed1b From 0e99b2c625da181aebf1a3d13493e3f7a5057a9c Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Tue, 4 Feb 2020 16:24:13 +0100 Subject: scsi: megaraid_sas: silence a warning Add a flag to DMA memory allocation to silence a warning. This driver allocates DMA memory for IO frames. This allocation may exceed MAX_ORDER pages for few megaraid_sas controllers (controllers with very high queue depth). Consequently, the driver has logic to keep reducing the controller queue depth until the DMA memory allocation succeeds. On impacted megaraid_sas controllers there would be multiple DMA allocation failures until driver settled on an allocation that fit. These failed DMA allocation requests caused stack traces in system logs. These were not harmful and this patch silences those warnings/stack traces. [mkp: clarified commit desc] Link: https://lore.kernel.org/r/20200204152413.7107-1-thenzl@redhat.com Signed-off-by: Tomas Henzl Acked-by: Sumit Saxena Reviewed-by: Lee Duncan Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_fusion.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index f3b36fd0a0eb..b2ad96564484 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -623,7 +623,8 @@ retry_alloc: fusion->io_request_frames = dma_pool_alloc(fusion->io_request_frames_pool, - GFP_KERNEL, &fusion->io_request_frames_phys); + GFP_KERNEL | __GFP_NOWARN, + &fusion->io_request_frames_phys); if (!fusion->io_request_frames) { if (instance->max_fw_cmds >= (MEGASAS_REDUCE_QD_COUNT * 2)) { instance->max_fw_cmds -= MEGASAS_REDUCE_QD_COUNT; @@ -661,7 +662,7 @@ retry_alloc: fusion->io_request_frames = dma_pool_alloc(fusion->io_request_frames_pool, - GFP_KERNEL, + GFP_KERNEL | __GFP_NOWARN, &fusion->io_request_frames_phys); if (!fusion->io_request_frames) { -- cgit v1.2.3-59-g8ed1b From 9e661cedcc0a072d91a32cb88e0515ea26e35711 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 12 Feb 2020 10:35:30 +0100 Subject: i2c: jz4780: silence log flood on txabrt The printout for txabrt is way too talkative and is highly annoying with scanning programs like 'i2cdetect'. Reduce it to the minimum, the rest can be gained by I2C core debugging and datasheet information. Also, make it a debug printout, it won't help the regular user. Fixes: ba92222ed63a ("i2c: jz4780: Add i2c bus controller driver for Ingenic JZ4780") Reported-by: H. Nikolaus Schaller Tested-by: H. Nikolaus Schaller Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-jz4780.c | 36 ++---------------------------------- 1 file changed, 2 insertions(+), 34 deletions(-) diff --git a/drivers/i2c/busses/i2c-jz4780.c b/drivers/i2c/busses/i2c-jz4780.c index 16a67a64284a..b426fc956938 100644 --- a/drivers/i2c/busses/i2c-jz4780.c +++ b/drivers/i2c/busses/i2c-jz4780.c @@ -78,25 +78,6 @@ #define X1000_I2C_DC_STOP BIT(9) -static const char * const jz4780_i2c_abrt_src[] = { - "ABRT_7B_ADDR_NOACK", - "ABRT_10ADDR1_NOACK", - "ABRT_10ADDR2_NOACK", - "ABRT_XDATA_NOACK", - "ABRT_GCALL_NOACK", - "ABRT_GCALL_READ", - "ABRT_HS_ACKD", - "SBYTE_ACKDET", - "ABRT_HS_NORSTRT", - "SBYTE_NORSTRT", - "ABRT_10B_RD_NORSTRT", - "ABRT_MASTER_DIS", - "ARB_LOST", - "SLVFLUSH_TXFIFO", - "SLV_ARBLOST", - "SLVRD_INTX", -}; - #define JZ4780_I2C_INTST_IGC BIT(11) #define JZ4780_I2C_INTST_ISTT BIT(10) #define JZ4780_I2C_INTST_ISTP BIT(9) @@ -576,21 +557,8 @@ done: static void jz4780_i2c_txabrt(struct jz4780_i2c *i2c, int src) { - int i; - - dev_err(&i2c->adap.dev, "txabrt: 0x%08x\n", src); - dev_err(&i2c->adap.dev, "device addr=%x\n", - jz4780_i2c_readw(i2c, JZ4780_I2C_TAR)); - dev_err(&i2c->adap.dev, "send cmd count:%d %d\n", - i2c->cmd, i2c->cmd_buf[i2c->cmd]); - dev_err(&i2c->adap.dev, "receive data count:%d %d\n", - i2c->cmd, i2c->data_buf[i2c->cmd]); - - for (i = 0; i < 16; i++) { - if (src & BIT(i)) - dev_dbg(&i2c->adap.dev, "I2C TXABRT[%d]=%s\n", - i, jz4780_i2c_abrt_src[i]); - } + dev_dbg(&i2c->adap.dev, "txabrt: 0x%08x, cmd: %d, send: %d, recv: %d\n", + src, i2c->cmd, i2c->cmd_buf[i2c->cmd], i2c->data_buf[i2c->cmd]); } static inline int jz4780_i2c_xfer_read(struct jz4780_i2c *i2c, -- cgit v1.2.3-59-g8ed1b From 54498e8070e19e74498a72c7331348143e7e1f8c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 11 Feb 2020 08:47:04 -0600 Subject: i2c: altera: Fix potential integer overflow Factor out 100 from the equation and do 32-bit arithmetic (3 * clk_mhz / 10) instead of 64-bit. Notice that clk_mhz is MHz, so the multiplication will never wrap 32 bits and there is no need for div_u64(). Addresses-Coverity: 1458369 ("Unintentional integer overflow") Fixes: 0560ad576268 ("i2c: altera: Add Altera I2C Controller driver") Suggested-by: David Laight Signed-off-by: Gustavo A. R. Silva Reviewed-by: Thor Thayer Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-altera.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-altera.c b/drivers/i2c/busses/i2c-altera.c index 5255d3755411..1de23b4f3809 100644 --- a/drivers/i2c/busses/i2c-altera.c +++ b/drivers/i2c/busses/i2c-altera.c @@ -171,7 +171,7 @@ static void altr_i2c_init(struct altr_i2c_dev *idev) /* SCL Low Time */ writel(t_low, idev->base + ALTR_I2C_SCL_LOW); /* SDA Hold Time, 300ns */ - writel(div_u64(300 * clk_mhz, 1000), idev->base + ALTR_I2C_SDA_HOLD); + writel(3 * clk_mhz / 10, idev->base + ALTR_I2C_SDA_HOLD); /* Mask all master interrupt bits */ altr_i2c_int_enable(idev, ALTR_I2C_ALL_IRQ, false); -- cgit v1.2.3-59-g8ed1b From 872d92dec353a8d30fa186892cd5ea3e17ca75d3 Mon Sep 17 00:00:00 2001 From: Hongbo Yao Date: Wed, 22 Jan 2020 17:12:38 +0800 Subject: tee: amdtee: amdtee depends on CRYPTO_DEV_CCP_DD If CRYPTO_DEV_CCP_DD=m and AMDTEE=y, the following error is seen while building call.c or core.c drivers/tee/amdtee/call.o: In function `handle_unload_ta': call.c:(.text+0x35f): undefined reference to `psp_tee_process_cmd' drivers/tee/amdtee/core.o: In function `amdtee_driver_init': core.c:(.init.text+0xf): undefined reference to `psp_check_tee_status Fix the config dependency for AMDTEE here. Reported-by: Hulk Robot Fixes: 757cc3e9ff1d ("tee: add AMD-TEE driver") Signed-off-by: Hongbo Yao Reviewed-by: Rijo Thomas Acked-by: Jens Wiklander Signed-off-by: Herbert Xu --- drivers/tee/amdtee/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tee/amdtee/Kconfig b/drivers/tee/amdtee/Kconfig index 4e32b6413b41..191f9715fa9a 100644 --- a/drivers/tee/amdtee/Kconfig +++ b/drivers/tee/amdtee/Kconfig @@ -3,6 +3,6 @@ config AMDTEE tristate "AMD-TEE" default m - depends on CRYPTO_DEV_SP_PSP + depends on CRYPTO_DEV_SP_PSP && CRYPTO_DEV_CCP_DD help This implements AMD's Trusted Execution Environment (TEE) driver. -- cgit v1.2.3-59-g8ed1b From 03cd45d2e219301880cabc357e3cf478a500080f Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 13 Feb 2020 12:56:04 +0300 Subject: thunderbolt: Prevent crash if non-active NVMem file is read The driver does not populate .reg_read callback for the non-active NVMem because the file is supposed to be write-only. However, it turns out NVMem subsystem does not yet support this and expects that the .reg_read callback is provided. If user reads the binary attribute it triggers NULL pointer dereference like this one: BUG: kernel NULL pointer dereference, address: 0000000000000000 ... Call Trace: bin_attr_nvmem_read+0x64/0x80 kernfs_fop_read+0xa7/0x180 vfs_read+0xbd/0x170 ksys_read+0x5a/0xd0 do_syscall_64+0x43/0x150 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fix this in the driver by providing .reg_read callback that always returns an error. Reported-by: Nicholas Johnson Fixes: e6b245ccd524 ("thunderbolt: Add support for host and device NVM firmware upgrade") Signed-off-by: Mika Westerberg Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200213095604.1074-1-mika.westerberg@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/switch.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index ad5479f21174..7d6ecc342508 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -348,6 +348,12 @@ out: return ret; } +static int tb_switch_nvm_no_read(void *priv, unsigned int offset, void *val, + size_t bytes) +{ + return -EPERM; +} + static int tb_switch_nvm_write(void *priv, unsigned int offset, void *val, size_t bytes) { @@ -393,6 +399,7 @@ static struct nvmem_device *register_nvmem(struct tb_switch *sw, int id, config.read_only = true; } else { config.name = "nvm_non_active"; + config.reg_read = tb_switch_nvm_no_read; config.reg_write = tb_switch_nvm_write; config.root_only = true; } -- cgit v1.2.3-59-g8ed1b From b6570fdb96edf45bcf71884bd2644bd73d348d1a Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Thu, 13 Feb 2020 00:11:44 -0600 Subject: ASoC: codec2codec: avoid invalid/double-free of pcm runtime The PCM runtime was freed during PMU in the case that the event hook encountered an error. However, it is also unconditionally freed during PMD. Avoid a double-free by dropping the call to kfree in the PMU hook. Fixes: a72706ed8208 ("ASoC: codec2codec: remove ephemeral variables") Cc: stable@vger.kernel.org Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20200213061147.29386-2-samuel@sholland.org Signed-off-by: Mark Brown --- sound/soc/soc-dapm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index bc20ad9abf8b..8b24396675ec 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -3916,9 +3916,6 @@ snd_soc_dai_link_event_pre_pmu(struct snd_soc_dapm_widget *w, runtime->rate = params_rate(params); out: - if (ret < 0) - kfree(runtime); - kfree(params); return ret; } -- cgit v1.2.3-59-g8ed1b From 6e5cf31fbe651bed7ba1df768f2e123531132417 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 4 Feb 2020 13:28:41 +0100 Subject: x86/mce/amd: Publish the bank pointer only after setup has succeeded threshold_create_bank() creates a bank descriptor per MCA error thresholding counter which can be controlled over sysfs. It publishes the pointer to that bank in a per-CPU variable and then goes on to create additional thresholding blocks if the bank has such. However, that creation of additional blocks in allocate_threshold_blocks() can fail, leading to a use-after-free through the per-CPU pointer. Therefore, publish that pointer only after all blocks have been setup successfully. Fixes: 019f34fccfd5 ("x86, MCE, AMD: Move shared bank to node descriptor") Reported-by: Saar Amar Reported-by: Dan Carpenter Signed-off-by: Borislav Petkov Cc: Link: http://lkml.kernel.org/r/20200128140846.phctkvx5btiexvbx@kili.mountain --- arch/x86/kernel/cpu/mce/amd.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index b3a50d962851..e7313e5c497c 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -1198,8 +1198,9 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) return buf_mcatype; } -static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, - unsigned int block, u32 address) +static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb, + unsigned int bank, unsigned int block, + u32 address) { struct threshold_block *b = NULL; u32 low, high; @@ -1243,16 +1244,12 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, INIT_LIST_HEAD(&b->miscj); - if (per_cpu(threshold_banks, cpu)[bank]->blocks) { - list_add(&b->miscj, - &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj); - } else { - per_cpu(threshold_banks, cpu)[bank]->blocks = b; - } + if (tb->blocks) + list_add(&b->miscj, &tb->blocks->miscj); + else + tb->blocks = b; - err = kobject_init_and_add(&b->kobj, &threshold_ktype, - per_cpu(threshold_banks, cpu)[bank]->kobj, - get_name(bank, b)); + err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(bank, b)); if (err) goto out_free; recurse: @@ -1260,7 +1257,7 @@ recurse: if (!address) return 0; - err = allocate_threshold_blocks(cpu, bank, block, address); + err = allocate_threshold_blocks(cpu, tb, bank, block, address); if (err) goto out_free; @@ -1345,8 +1342,6 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) goto out_free; } - per_cpu(threshold_banks, cpu)[bank] = b; - if (is_shared_bank(bank)) { refcount_set(&b->cpus, 1); @@ -1357,9 +1352,13 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) } } - err = allocate_threshold_blocks(cpu, bank, 0, msr_ops.misc(bank)); - if (!err) - goto out; + err = allocate_threshold_blocks(cpu, b, bank, 0, msr_ops.misc(bank)); + if (err) + goto out_free; + + per_cpu(threshold_banks, cpu)[bank] = b; + + return 0; out_free: kfree(b); -- cgit v1.2.3-59-g8ed1b From 4508cf76b1ecdf20a456b6b161acbe78f3b23358 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 13 Feb 2020 12:43:42 +0100 Subject: serial: cpm_uart: call cpm_muram_init before registering console Christophe reports that powerpc 8xx silently fails to 5.6-rc1. It turns out I was wrong about nobody relying on the lazy initialization of the cpm/qe muram in commit b6231ea2b3c6 (soc: fsl: qe: drop broken lazy call of cpm_muram_init()). Rather than reinstating the somewhat dubious lazy call (initializing a currently held spinlock, and implicitly doing a GFP_KERNEL under that spinlock), make sure that cpm_muram_init() is called early enough - I thought the calls from the subsys_initcalls were good enough, but when used by console drivers, that's obviously not the case. cpm_muram_init() is safe to call twice (there's an early return if it is already initialized), so keep the call from cpm_init() - in case SERIAL_CPM_CONSOLE=n. Fixes: b6231ea2b3c6 (soc: fsl: qe: drop broken lazy call of cpm_muram_init()) Reported-by: Christophe Leroy Signed-off-by: Rasmus Villemoes Tested-by: Christophe Leroy Link: https://lore.kernel.org/r/20200213114342.21712-1-linux@rasmusvillemoes.dk Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/cpm_uart/cpm_uart_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_core.c b/drivers/tty/serial/cpm_uart/cpm_uart_core.c index 19d5a4cf29a6..d4b81b06e0cb 100644 --- a/drivers/tty/serial/cpm_uart/cpm_uart_core.c +++ b/drivers/tty/serial/cpm_uart/cpm_uart_core.c @@ -1373,6 +1373,7 @@ static struct console cpm_scc_uart_console = { static int __init cpm_uart_console_init(void) { + cpm_muram_init(); register_console(&cpm_scc_uart_console); return 0; } -- cgit v1.2.3-59-g8ed1b From 687bff0cd08f790d540cfb7b2349f0d876cdddec Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 10 Feb 2020 09:11:30 +0100 Subject: vt: selection, handle pending signals in paste_selection When pasting a selection to a vt, the task is set as INTERRUPTIBLE while waiting for a tty to unthrottle. But signals are not handled at all. Normally, this is not a problem as tty_ldisc_receive_buf receives all the goods and a user has no reason to interrupt the task. There are two scenarios where this matters: 1) when the tty is throttled and a signal is sent to the process, it spins on a CPU until the tty is unthrottled. schedule() does not really echedule, but returns immediately, of course. 2) when the sel_buffer becomes invalid, KASAN prevents any reads from it and the loop simply does not proceed and spins forever (causing the tty to throttle, but the code never sleeps, the same as above). This sometimes happens as there is a race in the sel_buffer handling code. So add signal handling to this ioctl (TIOCL_PASTESEL) and return -EINTR in case a signal is pending. Signed-off-by: Jiri Slaby Cc: stable Link: https://lore.kernel.org/r/20200210081131.23572-1-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/selection.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index 78732feaf65b..44d974d4159f 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -29,6 +29,8 @@ #include #include +#include + /* Don't take this from : 011-015 on the screen aren't spaces */ #define isspace(c) ((c) == ' ') @@ -350,6 +352,7 @@ int paste_selection(struct tty_struct *tty) unsigned int count; struct tty_ldisc *ld; DECLARE_WAITQUEUE(wait, current); + int ret = 0; console_lock(); poke_blanked_console(); @@ -363,6 +366,10 @@ int paste_selection(struct tty_struct *tty) add_wait_queue(&vc->paste_wait, &wait); while (sel_buffer && sel_buffer_lth > pasted) { set_current_state(TASK_INTERRUPTIBLE); + if (signal_pending(current)) { + ret = -EINTR; + break; + } if (tty_throttled(tty)) { schedule(); continue; @@ -378,6 +385,6 @@ int paste_selection(struct tty_struct *tty) tty_buffer_unlock_exclusive(&vc->port); tty_ldisc_deref(ld); - return 0; + return ret; } EXPORT_SYMBOL_GPL(paste_selection); -- cgit v1.2.3-59-g8ed1b From 07e6124a1a46b4b5a9b3cacc0c306b50da87abf5 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 10 Feb 2020 09:11:31 +0100 Subject: vt: selection, close sel_buffer race syzkaller reported this UAF: BUG: KASAN: use-after-free in n_tty_receive_buf_common+0x2481/0x2940 drivers/tty/n_tty.c:1741 Read of size 1 at addr ffff8880089e40e9 by task syz-executor.1/13184 CPU: 0 PID: 13184 Comm: syz-executor.1 Not tainted 5.4.7 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 Call Trace: ... kasan_report+0xe/0x20 mm/kasan/common.c:634 n_tty_receive_buf_common+0x2481/0x2940 drivers/tty/n_tty.c:1741 tty_ldisc_receive_buf+0xac/0x190 drivers/tty/tty_buffer.c:461 paste_selection+0x297/0x400 drivers/tty/vt/selection.c:372 tioclinux+0x20d/0x4e0 drivers/tty/vt/vt.c:3044 vt_ioctl+0x1bcf/0x28d0 drivers/tty/vt/vt_ioctl.c:364 tty_ioctl+0x525/0x15a0 drivers/tty/tty_io.c:2657 vfs_ioctl fs/ioctl.c:47 [inline] It is due to a race between parallel paste_selection (TIOCL_PASTESEL) and set_selection_user (TIOCL_SETSEL) invocations. One uses sel_buffer, while the other frees it and reallocates a new one for another selection. Add a mutex to close this race. The mutex takes care properly of sel_buffer and sel_buffer_lth only. The other selection global variables (like sel_start, sel_end, and sel_cons) are protected only in set_selection_user. The other functions need quite some more work to close the races of the variables there. This is going to happen later. This likely fixes (I am unsure as there is no reproducer provided) bug 206361 too. It was marked as CVE-2020-8648. Signed-off-by: Jiri Slaby Reported-by: syzbot+59997e8d5cbdc486e6f6@syzkaller.appspotmail.com References: https://bugzilla.kernel.org/show_bug.cgi?id=206361 Cc: stable Link: https://lore.kernel.org/r/20200210081131.23572-2-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/selection.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index 44d974d4159f..0c50d7410b31 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -45,6 +46,7 @@ static volatile int sel_start = -1; /* cleared by clear_selection */ static int sel_end; static int sel_buffer_lth; static char *sel_buffer; +static DEFINE_MUTEX(sel_lock); /* clear_selection, highlight and highlight_pointer can be called from interrupt (via scrollback/front) */ @@ -186,7 +188,7 @@ int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty) char *bp, *obp; int i, ps, pe, multiplier; u32 c; - int mode; + int mode, ret = 0; poke_blanked_console(); @@ -212,6 +214,7 @@ int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty) if (ps > pe) /* make sel_start <= sel_end */ swap(ps, pe); + mutex_lock(&sel_lock); if (sel_cons != vc_cons[fg_console].d) { clear_selection(); sel_cons = vc_cons[fg_console].d; @@ -257,9 +260,10 @@ int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty) break; case TIOCL_SELPOINTER: highlight_pointer(pe); - return 0; + goto unlock; default: - return -EINVAL; + ret = -EINVAL; + goto unlock; } /* remove the pointer */ @@ -281,7 +285,7 @@ int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty) else if (new_sel_start == sel_start) { if (new_sel_end == sel_end) /* no action required */ - return 0; + goto unlock; else if (new_sel_end > sel_end) /* extend to right */ highlight(sel_end + 2, new_sel_end); else /* contract from right */ @@ -309,7 +313,8 @@ int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty) if (!bp) { printk(KERN_WARNING "selection: kmalloc() failed\n"); clear_selection(); - return -ENOMEM; + ret = -ENOMEM; + goto unlock; } kfree(sel_buffer); sel_buffer = bp; @@ -334,7 +339,9 @@ int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty) } } sel_buffer_lth = bp - sel_buffer; - return 0; +unlock: + mutex_unlock(&sel_lock); + return ret; } EXPORT_SYMBOL_GPL(set_selection_kernel); @@ -364,6 +371,7 @@ int paste_selection(struct tty_struct *tty) tty_buffer_lock_exclusive(&vc->port); add_wait_queue(&vc->paste_wait, &wait); + mutex_lock(&sel_lock); while (sel_buffer && sel_buffer_lth > pasted) { set_current_state(TASK_INTERRUPTIBLE); if (signal_pending(current)) { @@ -371,7 +379,9 @@ int paste_selection(struct tty_struct *tty) break; } if (tty_throttled(tty)) { + mutex_unlock(&sel_lock); schedule(); + mutex_lock(&sel_lock); continue; } __set_current_state(TASK_RUNNING); @@ -380,6 +390,7 @@ int paste_selection(struct tty_struct *tty) count); pasted += count; } + mutex_unlock(&sel_lock); remove_wait_queue(&vc->paste_wait, &wait); __set_current_state(TASK_RUNNING); -- cgit v1.2.3-59-g8ed1b From 3e8393630e928767aeb23f4744518de4ea5cc35a Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 12 Feb 2020 14:00:40 +0000 Subject: selftests: use LDLIBS for libraries instead of LDFLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While building selftests, the following errors were observed: > tools/testing/selftests/timens' > gcc -Wall -Werror -pthread -lrt -ldl timens.c -o tools/testing/selftests/timens/timens > /usr/bin/ld: /tmp/ccGy5CST.o: in function `check_config_posix_timers': > timens.c:(.text+0x65a): undefined reference to `timer_create' > collect2: error: ld returned 1 exit status Quoting commit 870f193d48c2 ("selftests: net: use LDLIBS instead of LDFLAGS"): The default Makefile rule looks like: $(CC) $(CFLAGS) $(LDFLAGS) $@ $^ $(LDLIBS) When linking is done by gcc itself, no issue, but when it needs to be passed to proper ld, only LDLIBS follows and then ld cannot know what libs to link with. More detail: https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html LDFLAGS Extra flags to give to compilers when they are supposed to invoke the linker, ‘ld’, such as -L. Libraries (-lfoo) should be added to the LDLIBS variable instead. LDLIBS Library flags or names given to compilers when they are supposed to invoke the linker, ‘ld’. LOADLIBES is a deprecated (but still supported) alternative to LDLIBS. Non-library linker flags, such as -L, should go in the LDFLAGS variable. While at here, correct other selftests, not only timens ones. Reported-by: Shuah Khan Signed-off-by: Dmitry Safonov Tested-by: Shuah Khan Signed-off-by: Shuah Khan --- tools/testing/selftests/futex/functional/Makefile | 2 +- tools/testing/selftests/net/Makefile | 4 ++-- tools/testing/selftests/rtc/Makefile | 2 +- tools/testing/selftests/timens/Makefile | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile index 30996306cabc..23207829ec75 100644 --- a/tools/testing/selftests/futex/functional/Makefile +++ b/tools/testing/selftests/futex/functional/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 INCLUDES := -I../include -I../../ CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES) -LDFLAGS := $(LDFLAGS) -pthread -lrt +LDLIBS := -lpthread -lrt HEADERS := \ ../include/futextest.h \ diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index b5694196430a..287ae916ec0b 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -27,5 +27,5 @@ KSFT_KHDR_INSTALL := 1 include ../lib.mk $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma -$(OUTPUT)/tcp_mmap: LDFLAGS += -lpthread -$(OUTPUT)/tcp_inq: LDFLAGS += -lpthread +$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread +$(OUTPUT)/tcp_inq: LDLIBS += -lpthread diff --git a/tools/testing/selftests/rtc/Makefile b/tools/testing/selftests/rtc/Makefile index de9c8566672a..2d93d65723c9 100644 --- a/tools/testing/selftests/rtc/Makefile +++ b/tools/testing/selftests/rtc/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -O3 -Wl,-no-as-needed -Wall -LDFLAGS += -lrt -lpthread -lm +LDLIBS += -lrt -lpthread -lm TEST_GEN_PROGS = rtctest diff --git a/tools/testing/selftests/timens/Makefile b/tools/testing/selftests/timens/Makefile index e9fb30bd8aeb..b4fd9a934654 100644 --- a/tools/testing/selftests/timens/Makefile +++ b/tools/testing/selftests/timens/Makefile @@ -2,6 +2,6 @@ TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec TEST_GEN_PROGS_EXTENDED := gettime_perf CFLAGS := -Wall -Werror -pthread -LDFLAGS := -lrt -ldl +LDLIBS := -lrt -ldl include ../lib.mk -- cgit v1.2.3-59-g8ed1b From 9a0584f05687947d5a0b87f046bcd2592a55e67c Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Thu, 13 Feb 2020 18:26:56 +1100 Subject: selftests: openat2: fix build error on newer glibc It appears that newer glibcs check that openat(O_CREAT) was provided a fourth argument (rather than passing garbage), resulting in the following build error: > In file included from /usr/include/fcntl.h:301, > from helpers.c:9: > In function 'openat', > inlined from 'touchat' at helpers.c:49:11: > /usr/include/x86_64-linux-gnu/bits/fcntl2.h:126:4: error: call to > '__openat_missing_mode' declared with attribute error: openat with O_CREAT > or O_TMPFILE in third argument needs 4 arguments > 126 | __openat_missing_mode (); > | ^~~~~~~~~~~~~~~~~~~~~~~~ Reported-by: Shuah Khan Signed-off-by: Aleksa Sarai Tested-by: Shuah Khan Signed-off-by: Shuah Khan --- tools/testing/selftests/openat2/helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/openat2/helpers.c b/tools/testing/selftests/openat2/helpers.c index e9a6557ab16f..5074681ffdc9 100644 --- a/tools/testing/selftests/openat2/helpers.c +++ b/tools/testing/selftests/openat2/helpers.c @@ -46,7 +46,7 @@ int sys_renameat2(int olddirfd, const char *oldpath, int touchat(int dfd, const char *path) { - int fd = openat(dfd, path, O_CREAT); + int fd = openat(dfd, path, O_CREAT, 0700); if (fd >= 0) close(fd); return fd; -- cgit v1.2.3-59-g8ed1b From a1028dcfd0dd97884072288d0c8ed7f30399b528 Mon Sep 17 00:00:00 2001 From: Harigovindan P Date: Thu, 6 Feb 2020 14:26:15 +0530 Subject: drm/msm/dsi: save pll state before dsi host is powered off Save pll state before dsi host is powered off. Without this change some register values gets resetted. Signed-off-by: Harigovindan P Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dsi/dsi_manager.c | 5 +++++ drivers/gpu/drm/msm/dsi/phy/dsi_phy.c | 4 ---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c index acc711fd14f8..4864b9558f65 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_manager.c +++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c @@ -506,6 +506,7 @@ static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge) struct msm_dsi *msm_dsi1 = dsi_mgr_get_dsi(DSI_1); struct mipi_dsi_host *host = msm_dsi->host; struct drm_panel *panel = msm_dsi->panel; + struct msm_dsi_pll *src_pll; bool is_dual_dsi = IS_DUAL_DSI(); int ret; @@ -539,6 +540,10 @@ static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge) id, ret); } + /* Save PLL status if it is a clock source */ + src_pll = msm_dsi_phy_get_pll(msm_dsi->phy); + msm_dsi_pll_save_state(src_pll); + ret = msm_dsi_host_power_off(host); if (ret) pr_err("%s: host %d power off failed,%d\n", __func__, id, ret); diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index b0cfa67d2a57..f509ebd77500 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -724,10 +724,6 @@ void msm_dsi_phy_disable(struct msm_dsi_phy *phy) if (!phy || !phy->cfg->ops.disable) return; - /* Save PLL status if it is a clock source */ - if (phy->usecase != MSM_DSI_PHY_SLAVE) - msm_dsi_pll_save_state(phy->pll); - phy->cfg->ops.disable(phy); dsi_phy_regulator_disable(phy); -- cgit v1.2.3-59-g8ed1b From c6659785dfb3f8d75f1fe637e4222ff8178f5280 Mon Sep 17 00:00:00 2001 From: Harigovindan P Date: Thu, 6 Feb 2020 14:42:01 +0530 Subject: drm/msm/dsi/pll: call vco set rate explicitly For a given byte clock, if VCO recalc value is exactly same as vco set rate value, vco_set_rate does not get called assuming VCO is already set to required value. But Due to GDSC toggle, VCO values are erased in the HW. To make sure VCO is programmed correctly, we forcefully call set_rate from vco_prepare. Signed-off-by: Harigovindan P Reviewed-by: Jeffrey Hugo Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c index 1c894548dd72..6ac04fc303f5 100644 --- a/drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c +++ b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c @@ -411,6 +411,12 @@ static int dsi_pll_10nm_vco_prepare(struct clk_hw *hw) if (pll_10nm->slave) dsi_pll_enable_pll_bias(pll_10nm->slave); + rc = dsi_pll_10nm_vco_set_rate(hw,pll_10nm->vco_current_rate, 0); + if (rc) { + pr_err("vco_set_rate failed, rc=%d\n", rc); + return rc; + } + /* Start PLL */ pll_write(pll_10nm->phy_cmn_mmio + REG_DSI_10nm_PHY_CMN_PLL_CNTRL, 0x01); -- cgit v1.2.3-59-g8ed1b From 8fc7036ee652207ca992fbb9abb64090c355a9e0 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 13 Feb 2020 12:01:35 -0800 Subject: drm/msm/dpu: fix BGR565 vs RGB565 confusion The component order between the two was swapped, resulting in incorrect color when games with 565 visual hit the overlay path instead of GPU composition. Fixes: 25fdd5933e4c ("drm/msm: Add SDM845 DPU support") Signed-off-by: Rob Clark Reviewed-by: Sean Paul Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c index 528632690f1e..a05282dede91 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c @@ -255,13 +255,13 @@ static const struct dpu_format dpu_format_map[] = { INTERLEAVED_RGB_FMT(RGB565, 0, COLOR_5BIT, COLOR_6BIT, COLOR_5BIT, - C2_R_Cr, C0_G_Y, C1_B_Cb, 0, 3, + C1_B_Cb, C0_G_Y, C2_R_Cr, 0, 3, false, 2, 0, DPU_FETCH_LINEAR, 1), INTERLEAVED_RGB_FMT(BGR565, 0, COLOR_5BIT, COLOR_6BIT, COLOR_5BIT, - C1_B_Cb, C0_G_Y, C2_R_Cr, 0, 3, + C2_R_Cr, C0_G_Y, C1_B_Cb, 0, 3, false, 2, 0, DPU_FETCH_LINEAR, 1), -- cgit v1.2.3-59-g8ed1b From 1dade3a7048ccfc675650cd2cf13d578b095e5fb Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 12 Feb 2020 17:59:39 +0300 Subject: ACPICA: Introduce ACPI_ACCESS_BYTE_WIDTH() macro Sometimes it is useful to find the access_width field value in bytes and not in bits so add a helper that can be used for this purpose. Suggested-by: Jean Delvare Signed-off-by: Mika Westerberg Reviewed-by: Jean Delvare Cc: 4.16+ # 4.16+ Signed-off-by: Rafael J. Wysocki --- include/acpi/actypes.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index a2583c2bc054..4defed58ea33 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -532,11 +532,12 @@ typedef u64 acpi_integer; strnlen (a, ACPI_NAMESEG_SIZE) == ACPI_NAMESEG_SIZE) /* - * Algorithm to obtain access bit width. + * Algorithm to obtain access bit or byte width. * Can be used with access_width of struct acpi_generic_address and access_size of * struct acpi_resource_generic_register. */ #define ACPI_ACCESS_BIT_WIDTH(size) (1 << ((size) + 2)) +#define ACPI_ACCESS_BYTE_WIDTH(size) (1 << ((size) - 1)) /******************************************************************************* * -- cgit v1.2.3-59-g8ed1b From 2ba33a4e9e22ac4dda928d3e9b5978a3a2ded4e0 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 12 Feb 2020 17:59:40 +0300 Subject: ACPI: watchdog: Fix gas->access_width usage ACPI Generic Address Structure (GAS) access_width field is not in bytes as the driver seems to expect in few places so fix this by using the newly introduced macro ACPI_ACCESS_BYTE_WIDTH(). Fixes: b1abf6fc4982 ("ACPI / watchdog: Fix off-by-one error at resource assignment") Fixes: 058dfc767008 ("ACPI / watchdog: Add support for WDAT hardware watchdog") Reported-by: Jean Delvare Signed-off-by: Mika Westerberg Reviewed-by: Jean Delvare Cc: 4.16+ # 4.16+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_watchdog.c | 3 +-- drivers/watchdog/wdat_wdt.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c index ab6e434b4cee..6e9ec6e3fe47 100644 --- a/drivers/acpi/acpi_watchdog.c +++ b/drivers/acpi/acpi_watchdog.c @@ -136,12 +136,11 @@ void __init acpi_watchdog_init(void) gas = &entries[i].register_region; res.start = gas->address; + res.end = res.start + ACPI_ACCESS_BYTE_WIDTH(gas->access_width) - 1; if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { res.flags = IORESOURCE_MEM; - res.end = res.start + ALIGN(gas->access_width, 4) - 1; } else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { res.flags = IORESOURCE_IO; - res.end = res.start + gas->access_width - 1; } else { pr_warn("Unsupported address space: %u\n", gas->space_id); diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c index b069349b52f5..e1b1fcfc02af 100644 --- a/drivers/watchdog/wdat_wdt.c +++ b/drivers/watchdog/wdat_wdt.c @@ -389,7 +389,7 @@ static int wdat_wdt_probe(struct platform_device *pdev) memset(&r, 0, sizeof(r)); r.start = gas->address; - r.end = r.start + gas->access_width - 1; + r.end = r.start + ACPI_ACCESS_BYTE_WIDTH(gas->access_width) - 1; if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { r.flags = IORESOURCE_MEM; } else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { -- cgit v1.2.3-59-g8ed1b From cabe17d0173ab04bd3f87b8199ae75f43f1ea473 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 12 Feb 2020 17:59:41 +0300 Subject: ACPI: watchdog: Set default timeout in probe If the BIOS default timeout for the watchdog is too small userspace may not have enough time to configure new timeout after opening the device before the system is already reset. For this reason program default timeout of 30 seconds in the driver probe and allow userspace to change this from command line or through module parameter (wdat_wdt.timeout). Reported-by: Jean Delvare Signed-off-by: Mika Westerberg Reviewed-by: Jean Delvare Signed-off-by: Rafael J. Wysocki --- drivers/watchdog/wdat_wdt.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c index e1b1fcfc02af..3065dd670a18 100644 --- a/drivers/watchdog/wdat_wdt.c +++ b/drivers/watchdog/wdat_wdt.c @@ -54,6 +54,13 @@ module_param(nowayout, bool, 0); MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); +#define WDAT_DEFAULT_TIMEOUT 30 + +static int timeout = WDAT_DEFAULT_TIMEOUT; +module_param(timeout, int, 0); +MODULE_PARM_DESC(timeout, "Watchdog timeout in seconds (default=" + __MODULE_STRING(WDAT_DEFAULT_TIMEOUT) ")"); + static int wdat_wdt_read(struct wdat_wdt *wdat, const struct wdat_instruction *instr, u32 *value) { @@ -438,6 +445,22 @@ static int wdat_wdt_probe(struct platform_device *pdev) platform_set_drvdata(pdev, wdat); + /* + * Set initial timeout so that userspace has time to configure the + * watchdog properly after it has opened the device. In some cases + * the BIOS default is too short and causes immediate reboot. + */ + if (timeout * 1000 < wdat->wdd.min_hw_heartbeat_ms || + timeout * 1000 > wdat->wdd.max_hw_heartbeat_ms) { + dev_warn(dev, "Invalid timeout %d given, using %d\n", + timeout, WDAT_DEFAULT_TIMEOUT); + timeout = WDAT_DEFAULT_TIMEOUT; + } + + ret = wdat_wdt_set_timeout(&wdat->wdd, timeout); + if (ret) + return ret; + watchdog_set_nowayout(&wdat->wdd, nowayout); return devm_watchdog_register_device(dev, &wdat->wdd); } -- cgit v1.2.3-59-g8ed1b From 3da627073b56955b4f1d028c4b8092af59375938 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Thu, 13 Feb 2020 21:48:42 +0000 Subject: Documentation/process: Swap out the ambassador for Canonical John Johansen will take over as the process ambassador for Canonical when dealing with embargoed hardware issues. Cc: John Johansen Cc: linux-kernel@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: Alex Shi Cc: Harry Wei Cc: Thomas Gleixner Cc: Greg Kroah-Hartman Cc: Jonathan Corbet Acked-by: John Johansen Signed-off-by: Tyler Hicks Link: https://lore.kernel.org/r/20200213214842.21312-1-tyhicks@canonical.com Signed-off-by: Greg Kroah-Hartman --- Documentation/process/embargoed-hardware-issues.rst | 2 +- Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index 64f375c02358..a19d084f9b2c 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -254,7 +254,7 @@ an involved disclosed party. The current ambassadors list: VMware Xen Andrew Cooper - Canonical Tyler Hicks + Canonical John Johansen Debian Ben Hutchings Oracle Konrad Rzeszutek Wilk Red Hat Josh Poimboeuf diff --git a/Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst b/Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst index b93f1af68261..88273ebe7823 100644 --- a/Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst +++ b/Documentation/translations/zh_CN/process/embargoed-hardware-issues.rst @@ -183,7 +183,7 @@ CVE分配 VMware Xen Andrew Cooper - Canonical Tyler Hicks + Canonical John Johansen Debian Ben Hutchings Oracle Konrad Rzeszutek Wilk Red Hat Josh Poimboeuf -- cgit v1.2.3-59-g8ed1b From c9cc0517bba9f0213f1e55172feceb99e5512daf Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 6 Feb 2020 12:42:01 +0100 Subject: crypto: chacha20poly1305 - prevent integer overflow on large input This code assigns src_len (size_t) to sl (int), which causes problems when src_len is very large. Probably nobody in the kernel should be passing this much data to chacha20poly1305 all in one go anyway, so I don't think we need to change the algorithm or introduce larger types or anything. But we should at least error out early in this case and print a warning so that we get reports if this does happen and can look into why anybody is possibly passing it that much data or if they're accidently passing -1 or similar. Fixes: d95312a3ccc0 ("crypto: lib/chacha20poly1305 - reimplement crypt_from_sg() routine") Cc: Ard Biesheuvel Cc: stable@vger.kernel.org # 5.5+ Signed-off-by: Jason A. Donenfeld Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- lib/crypto/chacha20poly1305.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c index 6d83cafebc69..ad0699ce702f 100644 --- a/lib/crypto/chacha20poly1305.c +++ b/lib/crypto/chacha20poly1305.c @@ -235,6 +235,9 @@ bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src, __le64 lens[2]; } b __aligned(16); + if (WARN_ON(src_len > INT_MAX)) + return false; + chacha_load_key(b.k, key); b.iv[0] = 0; -- cgit v1.2.3-59-g8ed1b From 51dede9c05df2b78acd6dcf6a17d21f0877d2d7b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 13 Feb 2020 19:01:34 +0100 Subject: x86/mce/amd: Fix kobject lifetime Accessing the MCA thresholding controls in sysfs concurrently with CPU hotplug can lead to a couple of KASAN-reported issues: BUG: KASAN: use-after-free in sysfs_file_ops+0x155/0x180 Read of size 8 at addr ffff888367578940 by task grep/4019 and BUG: KASAN: use-after-free in show_error_count+0x15c/0x180 Read of size 2 at addr ffff888368a05514 by task grep/4454 for example. Both result from the fact that the threshold block creation/teardown code frees the descriptor memory itself instead of defining proper ->release function and leaving it to the driver core to take care of that, after all sysfs accesses have completed. Do that and get rid of the custom freeing code, fixing the above UAFs in the process. [ bp: write commit message. ] Fixes: 95268664390b ("[PATCH] x86_64: mce_amd support for family 0x10 processors") Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: Link: https://lkml.kernel.org/r/20200214082801.13836-1-bp@alien8.de --- arch/x86/kernel/cpu/mce/amd.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index e7313e5c497c..52de616a8065 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -1163,9 +1163,12 @@ static const struct sysfs_ops threshold_ops = { .store = store, }; +static void threshold_block_release(struct kobject *kobj); + static struct kobj_type threshold_ktype = { .sysfs_ops = &threshold_ops, .default_attrs = default_attrs, + .release = threshold_block_release, }; static const char *get_name(unsigned int bank, struct threshold_block *b) @@ -1367,8 +1370,12 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) return err; } -static void deallocate_threshold_block(unsigned int cpu, - unsigned int bank) +static void threshold_block_release(struct kobject *kobj) +{ + kfree(to_block(kobj)); +} + +static void deallocate_threshold_block(unsigned int cpu, unsigned int bank) { struct threshold_block *pos = NULL; struct threshold_block *tmp = NULL; @@ -1378,13 +1385,11 @@ static void deallocate_threshold_block(unsigned int cpu, return; list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) { - kobject_put(&pos->kobj); list_del(&pos->miscj); - kfree(pos); + kobject_put(&pos->kobj); } - kfree(per_cpu(threshold_banks, cpu)[bank]->blocks); - per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; + kobject_put(&head->blocks->kobj); } static void __threshold_remove_blocks(struct threshold_bank *b) -- cgit v1.2.3-59-g8ed1b From cba6437a1854fde5934098ec3bd0ee83af3129f5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 12 Feb 2020 12:19:41 +0100 Subject: genirq/proc: Reject invalid affinity masks (again) Qian Cai reported that the WARN_ON() in the x86/msi affinity setting code, which catches cases where the affinity setting is not done on the CPU which is the current target of the interrupt, triggers during CPU hotplug stress testing. It turns out that the warning which was added with the commit addressing the MSI affinity race unearthed yet another long standing bug. If user space writes a bogus affinity mask, i.e. it contains no online CPUs, then it calls irq_select_affinity_usr(). This was introduced for ALPHA in eee45269b0f5 ("[PATCH] Alpha: convert to generic irq framework (generic part)") and subsequently made available for all architectures in 18404756765c ("genirq: Expose default irq affinity mask (take 3)") which introduced the circumvention of the affinity setting restrictions for interrupt which cannot be moved in process context. The whole exercise is bogus in various aspects: 1) If the interrupt is already started up then there is absolutely no point to honour a bogus interrupt affinity setting from user space. The interrupt is already assigned to an online CPU and it does not make any sense to reassign it to some other randomly chosen online CPU. 2) If the interupt is not yet started up then there is no point either. A subsequent startup of the interrupt will invoke irq_setup_affinity() anyway which will chose a valid target CPU. So the only correct solution is to just return -EINVAL in case user space wrote an affinity mask which does not contain any online CPUs, except for ALPHA which has it's own magic sauce for this. Fixes: 18404756765c ("genirq: Expose default irq affinity mask (take 3)") Reported-by: Qian Cai Signed-off-by: Thomas Gleixner Tested-by: Qian Cai Link: https://lkml.kernel.org/r/878sl8xdbm.fsf@nanos.tec.linutronix.de --- kernel/irq/internals.h | 2 -- kernel/irq/manage.c | 18 ++---------------- kernel/irq/proc.c | 22 ++++++++++++++++++++++ 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 3924fbe829d4..c9d8eb7f5c02 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -128,8 +128,6 @@ static inline void unregister_handler_proc(unsigned int irq, extern bool irq_can_set_affinity_usr(unsigned int irq); -extern int irq_select_affinity_usr(unsigned int irq); - extern void irq_set_thread_affinity(struct irq_desc *desc); extern int irq_do_set_affinity(struct irq_data *data, diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 3089a60ea8f9..7eee98c38f25 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -481,23 +481,9 @@ int irq_setup_affinity(struct irq_desc *desc) { return irq_select_affinity(irq_desc_get_irq(desc)); } -#endif +#endif /* CONFIG_AUTO_IRQ_AFFINITY */ +#endif /* CONFIG_SMP */ -/* - * Called when a bogus affinity is set via /proc/irq - */ -int irq_select_affinity_usr(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - unsigned long flags; - int ret; - - raw_spin_lock_irqsave(&desc->lock, flags); - ret = irq_setup_affinity(desc); - raw_spin_unlock_irqrestore(&desc->lock, flags); - return ret; -} -#endif /** * irq_set_vcpu_affinity - Set vcpu affinity for the interrupt diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 9e5783d98033..32c071d7bc03 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -111,6 +111,28 @@ static int irq_affinity_list_proc_show(struct seq_file *m, void *v) return show_irq_affinity(AFFINITY_LIST, m); } +#ifndef CONFIG_AUTO_IRQ_AFFINITY +static inline int irq_select_affinity_usr(unsigned int irq) +{ + /* + * If the interrupt is started up already then this fails. The + * interrupt is assigned to an online CPU already. There is no + * point to move it around randomly. Tell user space that the + * selected mask is bogus. + * + * If not then any change to the affinity is pointless because the + * startup code invokes irq_setup_affinity() which will select + * a online CPU anyway. + */ + return -EINVAL; +} +#else +/* ALPHA magic affinity auto selector. Keep it for historical reasons. */ +static inline int irq_select_affinity_usr(unsigned int irq) +{ + return irq_select_affinity(irq); +} +#endif static ssize_t write_irq_affinity(int type, struct file *file, const char __user *buffer, size_t count, loff_t *pos) -- cgit v1.2.3-59-g8ed1b From be0aba826c4a6ba5929def1962a90d6127871969 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 14 Feb 2020 14:53:07 +0800 Subject: HID: i2c-hid: add Trekstor Surfbook E11B to descriptor override The Surfbook E11B uses the SIPODEV SP1064 touchpad, which does not supply descriptors, so it has to be added to the override list. BugLink: https://bugs.launchpad.net/bugs/1858299 Signed-off-by: Kai-Heng Feng Reviewed-by: Hans de Goede Signed-off-by: Benjamin Tissoires --- drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c index d31ea82b84c1..a66f08041a1a 100644 --- a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c +++ b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c @@ -341,6 +341,14 @@ static const struct dmi_system_id i2c_hid_dmi_desc_override_table[] = { }, .driver_data = (void *)&sipodev_desc }, + { + .ident = "Trekstor SURFBOOK E11B", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "TREKSTOR"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "SURFBOOK E11B"), + }, + .driver_data = (void *)&sipodev_desc + }, { .ident = "Direkt-Tek DTLAPY116-2", .matches = { -- cgit v1.2.3-59-g8ed1b From bb51e669fa49feb5904f452b2991b240ef31bc97 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 14 Feb 2020 12:13:14 +0100 Subject: ALSA: seq: Avoid concurrent access to queue flags The queue flags are represented in bit fields and the concurrent access may result in unexpected results. Although the current code should be mostly OK as it's only reading a field while writing other fields as KCSAN reported, it's safer to cover both with a proper spinlock protection. This patch fixes the possible concurrent read by protecting with q->owner_lock. Also the queue owner field is protected as well since it's the field to be protected by the lock itself. Reported-by: syzbot+65c6c92d04304d0a8efc@syzkaller.appspotmail.com Reported-by: syzbot+e60ddfa48717579799dd@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20200214111316.26939-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/seq_queue.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c index caf68bf42f13..20c552cf8398 100644 --- a/sound/core/seq/seq_queue.c +++ b/sound/core/seq/seq_queue.c @@ -392,6 +392,7 @@ int snd_seq_queue_check_access(int queueid, int client) int snd_seq_queue_set_owner(int queueid, int client, int locked) { struct snd_seq_queue *q = queueptr(queueid); + unsigned long flags; if (q == NULL) return -EINVAL; @@ -401,8 +402,10 @@ int snd_seq_queue_set_owner(int queueid, int client, int locked) return -EPERM; } + spin_lock_irqsave(&q->owner_lock, flags); q->locked = locked ? 1 : 0; q->owner = client; + spin_unlock_irqrestore(&q->owner_lock, flags); queue_access_unlock(q); queuefree(q); @@ -539,15 +542,17 @@ void snd_seq_queue_client_termination(int client) unsigned long flags; int i; struct snd_seq_queue *q; + bool matched; for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) { if ((q = queueptr(i)) == NULL) continue; spin_lock_irqsave(&q->owner_lock, flags); - if (q->owner == client) + matched = (q->owner == client); + if (matched) q->klocked = 1; spin_unlock_irqrestore(&q->owner_lock, flags); - if (q->owner == client) { + if (matched) { if (q->timer->running) snd_seq_timer_stop(q->timer); snd_seq_timer_reset(q->timer); @@ -739,6 +744,8 @@ void snd_seq_info_queues_read(struct snd_info_entry *entry, int i, bpm; struct snd_seq_queue *q; struct snd_seq_timer *tmr; + bool locked; + int owner; for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) { if ((q = queueptr(i)) == NULL) @@ -750,9 +757,14 @@ void snd_seq_info_queues_read(struct snd_info_entry *entry, else bpm = 0; + spin_lock_irq(&q->owner_lock); + locked = q->locked; + owner = q->owner; + spin_unlock_irq(&q->owner_lock); + snd_iprintf(buffer, "queue %d: [%s]\n", q->queue, q->name); - snd_iprintf(buffer, "owned by client : %d\n", q->owner); - snd_iprintf(buffer, "lock status : %s\n", q->locked ? "Locked" : "Free"); + snd_iprintf(buffer, "owned by client : %d\n", owner); + snd_iprintf(buffer, "lock status : %s\n", locked ? "Locked" : "Free"); snd_iprintf(buffer, "queued time events : %d\n", snd_seq_prioq_avail(q->timeq)); snd_iprintf(buffer, "queued tick events : %d\n", snd_seq_prioq_avail(q->tickq)); snd_iprintf(buffer, "timer state : %s\n", tmr->running ? "Running" : "Stopped"); -- cgit v1.2.3-59-g8ed1b From dc7497795e014d84699c3b8809ed6df35352dd74 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 14 Feb 2020 12:13:15 +0100 Subject: ALSA: seq: Fix concurrent access to queue current tick/time snd_seq_check_queue() passes the current tick and time of the given queue as a pointer to snd_seq_prioq_cell_out(), but those might be updated concurrently by the seq timer update. Fix it by retrieving the current tick and time via the proper helper functions at first, and pass those values to snd_seq_prioq_cell_out() later in the loops. snd_seq_timer_get_cur_time() takes a new argument and adjusts with the current system time only when it's requested so; this update isn't needed for snd_seq_check_queue(), as it's called either from the interrupt handler or right after queuing. Also, snd_seq_timer_get_cur_tick() is changed to read the value in the spinlock for the concurrency, too. Reported-by: syzbot+fd5e0eaa1a32999173b2@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20200214111316.26939-3-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/seq_clientmgr.c | 4 ++-- sound/core/seq/seq_queue.c | 9 ++++++--- sound/core/seq/seq_timer.c | 13 ++++++++++--- sound/core/seq/seq_timer.h | 3 ++- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 6d9592f0ae1d..cc93157fa950 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -580,7 +580,7 @@ static int update_timestamp_of_queue(struct snd_seq_event *event, event->queue = queue; event->flags &= ~SNDRV_SEQ_TIME_STAMP_MASK; if (real_time) { - event->time.time = snd_seq_timer_get_cur_time(q->timer); + event->time.time = snd_seq_timer_get_cur_time(q->timer, true); event->flags |= SNDRV_SEQ_TIME_STAMP_REAL; } else { event->time.tick = snd_seq_timer_get_cur_tick(q->timer); @@ -1659,7 +1659,7 @@ static int snd_seq_ioctl_get_queue_status(struct snd_seq_client *client, tmr = queue->timer; status->events = queue->tickq->cells + queue->timeq->cells; - status->time = snd_seq_timer_get_cur_time(tmr); + status->time = snd_seq_timer_get_cur_time(tmr, true); status->tick = snd_seq_timer_get_cur_tick(tmr); status->running = tmr->running; diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c index 20c552cf8398..71a6ea62c3be 100644 --- a/sound/core/seq/seq_queue.c +++ b/sound/core/seq/seq_queue.c @@ -238,6 +238,8 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop) { unsigned long flags; struct snd_seq_event_cell *cell; + snd_seq_tick_time_t cur_tick; + snd_seq_real_time_t cur_time; if (q == NULL) return; @@ -254,17 +256,18 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop) __again: /* Process tick queue... */ + cur_tick = snd_seq_timer_get_cur_tick(q->timer); for (;;) { - cell = snd_seq_prioq_cell_out(q->tickq, - &q->timer->tick.cur_tick); + cell = snd_seq_prioq_cell_out(q->tickq, &cur_tick); if (!cell) break; snd_seq_dispatch_event(cell, atomic, hop); } /* Process time queue... */ + cur_time = snd_seq_timer_get_cur_time(q->timer, false); for (;;) { - cell = snd_seq_prioq_cell_out(q->timeq, &q->timer->cur_time); + cell = snd_seq_prioq_cell_out(q->timeq, &cur_time); if (!cell) break; snd_seq_dispatch_event(cell, atomic, hop); diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c index be59b59c9be4..1645e4142e30 100644 --- a/sound/core/seq/seq_timer.c +++ b/sound/core/seq/seq_timer.c @@ -428,14 +428,15 @@ int snd_seq_timer_continue(struct snd_seq_timer *tmr) } /* return current 'real' time. use timeofday() to get better granularity. */ -snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr) +snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr, + bool adjust_ktime) { snd_seq_real_time_t cur_time; unsigned long flags; spin_lock_irqsave(&tmr->lock, flags); cur_time = tmr->cur_time; - if (tmr->running) { + if (adjust_ktime && tmr->running) { struct timespec64 tm; ktime_get_ts64(&tm); @@ -452,7 +453,13 @@ snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr) high PPQ values) */ snd_seq_tick_time_t snd_seq_timer_get_cur_tick(struct snd_seq_timer *tmr) { - return tmr->tick.cur_tick; + snd_seq_tick_time_t cur_tick; + unsigned long flags; + + spin_lock_irqsave(&tmr->lock, flags); + cur_tick = tmr->tick.cur_tick; + spin_unlock_irqrestore(&tmr->lock, flags); + return cur_tick; } diff --git a/sound/core/seq/seq_timer.h b/sound/core/seq/seq_timer.h index 66c3e344eae3..4bec57df8158 100644 --- a/sound/core/seq/seq_timer.h +++ b/sound/core/seq/seq_timer.h @@ -120,7 +120,8 @@ int snd_seq_timer_set_tempo_ppq(struct snd_seq_timer *tmr, int tempo, int ppq); int snd_seq_timer_set_position_tick(struct snd_seq_timer *tmr, snd_seq_tick_time_t position); int snd_seq_timer_set_position_time(struct snd_seq_timer *tmr, snd_seq_real_time_t position); int snd_seq_timer_set_skew(struct snd_seq_timer *tmr, unsigned int skew, unsigned int base); -snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr); +snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr, + bool adjust_ktime); snd_seq_tick_time_t snd_seq_timer_get_cur_tick(struct snd_seq_timer *tmr); extern int seq_default_timer_class; -- cgit v1.2.3-59-g8ed1b From dfa9a5efe8b932a84b3b319250aa3ac60c20f876 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 14 Feb 2020 12:13:16 +0100 Subject: ALSA: rawmidi: Avoid bit fields for state flags The rawmidi state flags (opened, append, active_sensing) are stored in bit fields that can be potentially racy when concurrently accessed without any locks. Although the current code should be fine, there is also no any real benefit by keeping the bitfields for this kind of short number of members. This patch changes those bit fields flags to the simple bool fields. There should be no size increase of the snd_rawmidi_substream by this change. Reported-by: syzbot+576cc007eb9f2c968200@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20200214111316.26939-4-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/rawmidi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h index 40ab20439fee..a36b7227a15a 100644 --- a/include/sound/rawmidi.h +++ b/include/sound/rawmidi.h @@ -77,9 +77,9 @@ struct snd_rawmidi_substream { struct list_head list; /* list of all substream for given stream */ int stream; /* direction */ int number; /* substream number */ - unsigned int opened: 1, /* open flag */ - append: 1, /* append flag (merge more streams) */ - active_sensing: 1; /* send active sensing when close */ + bool opened; /* open flag */ + bool append; /* append flag (merge more streams) */ + bool active_sensing; /* send active sensing when close */ int use_count; /* use counter (for output) */ size_t bytes; struct snd_rawmidi *rmidi; -- cgit v1.2.3-59-g8ed1b From 4b8a1ca4628343829f373bf0d4e087fe50c451e5 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Fri, 14 Feb 2020 18:57:42 +0800 Subject: ASoC: max98090: revert invalid fix for handling SHDN Reverts commit 62d5ae4cafb7 ("ASoC: max98090: save and restore SHDN when changing sensitive registers"). A critical side-effect was observed: when keep playing something, the recorded sound has chance to break (clipping). Signed-off-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20200214105744.82258-2-tzungbi@google.com Signed-off-by: Mark Brown --- sound/soc/codecs/max98090.c | 434 +++++++++++++------------------------------- sound/soc/codecs/max98090.h | 3 +- 2 files changed, 124 insertions(+), 313 deletions(-) diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c index 5bc2c6411b33..032adc14562d 100644 --- a/sound/soc/codecs/max98090.c +++ b/sound/soc/codecs/max98090.c @@ -5,150 +5,24 @@ * Copyright 2011-2012 Maxim Integrated Products */ -#include -#include #include #include #include -#include #include #include #include #include #include +#include +#include #include -#include #include #include #include #include +#include #include "max98090.h" -static void max98090_shdn_save_locked(struct max98090_priv *max98090) -{ - int shdn = 0; - - /* saved_shdn, saved_count, SHDN are protected by card->dapm_mutex */ - regmap_read(max98090->regmap, M98090_REG_DEVICE_SHUTDOWN, &shdn); - max98090->saved_shdn |= shdn; - ++max98090->saved_count; - - if (shdn) - regmap_write(max98090->regmap, M98090_REG_DEVICE_SHUTDOWN, 0x0); -} - -static void max98090_shdn_restore_locked(struct max98090_priv *max98090) -{ - /* saved_shdn, saved_count, SHDN are protected by card->dapm_mutex */ - if (--max98090->saved_count == 0) { - if (max98090->saved_shdn) { - regmap_write(max98090->regmap, - M98090_REG_DEVICE_SHUTDOWN, - M98090_SHDNN_MASK); - max98090->saved_shdn = 0; - } - } -} - -static void max98090_shdn_save(struct max98090_priv *max98090) -{ - mutex_lock_nested(&max98090->component->card->dapm_mutex, - SND_SOC_DAPM_CLASS_RUNTIME); - max98090_shdn_save_locked(max98090); -} - -static void max98090_shdn_restore(struct max98090_priv *max98090) -{ - max98090_shdn_restore_locked(max98090); - mutex_unlock(&max98090->component->card->dapm_mutex); -} - -static int max98090_put_volsw(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_component *component = - snd_soc_kcontrol_component(kcontrol); - struct max98090_priv *max98090 = - snd_soc_component_get_drvdata(component); - int ret; - - max98090_shdn_save(max98090); - ret = snd_soc_put_volsw(kcontrol, ucontrol); - max98090_shdn_restore(max98090); - - return ret; -} - -static int max98090_dapm_put_enum_double(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_component *component = - snd_soc_dapm_kcontrol_component(kcontrol); - struct max98090_priv *max98090 = - snd_soc_component_get_drvdata(component); - int ret; - - max98090_shdn_save(max98090); - ret = snd_soc_dapm_put_enum_double_locked(kcontrol, ucontrol); - max98090_shdn_restore(max98090); - - return ret; -} - -static int max98090_put_enum_double(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_component *component = - snd_soc_kcontrol_component(kcontrol); - struct max98090_priv *max98090 = - snd_soc_component_get_drvdata(component); - int ret; - - max98090_shdn_save(max98090); - ret = snd_soc_put_enum_double(kcontrol, ucontrol); - max98090_shdn_restore(max98090); - - return ret; -} - -static int max98090_bytes_put(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_component *component = - snd_soc_kcontrol_component(kcontrol); - struct max98090_priv *max98090 = - snd_soc_component_get_drvdata(component); - int ret; - - max98090_shdn_save(max98090); - ret = snd_soc_bytes_put(kcontrol, ucontrol); - max98090_shdn_restore(max98090); - - return ret; -} - -static int max98090_dapm_event(struct snd_soc_dapm_widget *w, - struct snd_kcontrol *kcontrol, int event) -{ - struct snd_soc_component *component = - snd_soc_dapm_to_component(w->dapm); - struct max98090_priv *max98090 = - snd_soc_component_get_drvdata(component); - - switch (event) { - case SND_SOC_DAPM_PRE_PMU: - case SND_SOC_DAPM_PRE_PMD: - max98090_shdn_save_locked(max98090); - break; - case SND_SOC_DAPM_POST_PMU: - case SND_SOC_DAPM_POST_PMD: - max98090_shdn_restore_locked(max98090); - break; - } - - return 0; -} - /* Allows for sparsely populated register maps */ static const struct reg_default max98090_reg[] = { { 0x00, 0x00 }, /* 00 Software Reset */ @@ -632,13 +506,10 @@ static SOC_ENUM_SINGLE_DECL(max98090_adchp_enum, max98090_pwr_perf_text); static const struct snd_kcontrol_new max98090_snd_controls[] = { - SOC_ENUM_EXT("MIC Bias VCM Bandgap", max98090_vcmbandgap_enum, - snd_soc_get_enum_double, max98090_put_enum_double), + SOC_ENUM("MIC Bias VCM Bandgap", max98090_vcmbandgap_enum), - SOC_SINGLE_EXT("DMIC MIC Comp Filter Config", - M98090_REG_DIGITAL_MIC_CONFIG, - M98090_DMIC_COMP_SHIFT, M98090_DMIC_COMP_NUM - 1, 0, - snd_soc_get_volsw, max98090_put_volsw), + SOC_SINGLE("DMIC MIC Comp Filter Config", M98090_REG_DIGITAL_MIC_CONFIG, + M98090_DMIC_COMP_SHIFT, M98090_DMIC_COMP_NUM - 1, 0), SOC_SINGLE_EXT_TLV("MIC1 Boost Volume", M98090_REG_MIC1_INPUT_LEVEL, M98090_MIC_PA1EN_SHIFT, @@ -693,34 +564,24 @@ static const struct snd_kcontrol_new max98090_snd_controls[] = { M98090_AVR_SHIFT, M98090_AVR_NUM - 1, 1, max98090_av_tlv), - SOC_ENUM_EXT("ADC Oversampling Rate", max98090_osr128_enum, - snd_soc_get_enum_double, max98090_put_enum_double), - SOC_SINGLE_EXT("ADC Quantizer Dither", M98090_REG_ADC_CONTROL, - M98090_ADCDITHER_SHIFT, M98090_ADCDITHER_NUM - 1, 0, - snd_soc_get_volsw, max98090_put_volsw), - SOC_ENUM_EXT("ADC High Performance Mode", max98090_adchp_enum, - snd_soc_get_enum_double, max98090_put_enum_double), - - SOC_SINGLE_EXT("DAC Mono Mode", M98090_REG_IO_CONFIGURATION, - M98090_DMONO_SHIFT, M98090_DMONO_NUM - 1, 0, - snd_soc_get_volsw, max98090_put_volsw), - SOC_SINGLE_EXT("SDIN Mode", M98090_REG_IO_CONFIGURATION, - M98090_SDIEN_SHIFT, M98090_SDIEN_NUM - 1, 0, - snd_soc_get_volsw, max98090_put_volsw), - SOC_SINGLE_EXT("SDOUT Mode", M98090_REG_IO_CONFIGURATION, - M98090_SDOEN_SHIFT, M98090_SDOEN_NUM - 1, 0, - snd_soc_get_volsw, max98090_put_volsw), - SOC_SINGLE_EXT("SDOUT Hi-Z Mode", M98090_REG_IO_CONFIGURATION, - M98090_HIZOFF_SHIFT, M98090_HIZOFF_NUM - 1, 1, - snd_soc_get_volsw, max98090_put_volsw), - SOC_ENUM_EXT("Filter Mode", max98090_mode_enum, - snd_soc_get_enum_double, max98090_put_enum_double), - SOC_SINGLE_EXT("Record Path DC Blocking", M98090_REG_FILTER_CONFIG, - M98090_AHPF_SHIFT, M98090_AHPF_NUM - 1, 0, - snd_soc_get_volsw, max98090_put_volsw), - SOC_SINGLE_EXT("Playback Path DC Blocking", M98090_REG_FILTER_CONFIG, - M98090_DHPF_SHIFT, M98090_DHPF_NUM - 1, 0, - snd_soc_get_volsw, max98090_put_volsw), + SOC_ENUM("ADC Oversampling Rate", max98090_osr128_enum), + SOC_SINGLE("ADC Quantizer Dither", M98090_REG_ADC_CONTROL, + M98090_ADCDITHER_SHIFT, M98090_ADCDITHER_NUM - 1, 0), + SOC_ENUM("ADC High Performance Mode", max98090_adchp_enum), + + SOC_SINGLE("DAC Mono Mode", M98090_REG_IO_CONFIGURATION, + M98090_DMONO_SHIFT, M98090_DMONO_NUM - 1, 0), + SOC_SINGLE("SDIN Mode", M98090_REG_IO_CONFIGURATION, + M98090_SDIEN_SHIFT, M98090_SDIEN_NUM - 1, 0), + SOC_SINGLE("SDOUT Mode", M98090_REG_IO_CONFIGURATION, + M98090_SDOEN_SHIFT, M98090_SDOEN_NUM - 1, 0), + SOC_SINGLE("SDOUT Hi-Z Mode", M98090_REG_IO_CONFIGURATION, + M98090_HIZOFF_SHIFT, M98090_HIZOFF_NUM - 1, 1), + SOC_ENUM("Filter Mode", max98090_mode_enum), + SOC_SINGLE("Record Path DC Blocking", M98090_REG_FILTER_CONFIG, + M98090_AHPF_SHIFT, M98090_AHPF_NUM - 1, 0), + SOC_SINGLE("Playback Path DC Blocking", M98090_REG_FILTER_CONFIG, + M98090_DHPF_SHIFT, M98090_DHPF_NUM - 1, 0), SOC_SINGLE_TLV("Digital BQ Volume", M98090_REG_ADC_BIQUAD_LEVEL, M98090_AVBQ_SHIFT, M98090_AVBQ_NUM - 1, 1, max98090_dv_tlv), SOC_SINGLE_EXT_TLV("Digital Sidetone Volume", @@ -733,17 +594,13 @@ static const struct snd_kcontrol_new max98090_snd_controls[] = { SOC_SINGLE_TLV("Digital Volume", M98090_REG_DAI_PLAYBACK_LEVEL, M98090_DV_SHIFT, M98090_DV_NUM - 1, 1, max98090_dv_tlv), - SND_SOC_BYTES_E("EQ Coefficients", M98090_REG_EQUALIZER_BASE, 105, - snd_soc_bytes_get, max98090_bytes_put), - SOC_SINGLE_EXT("Digital EQ 3 Band Switch", M98090_REG_DSP_FILTER_ENABLE, - M98090_EQ3BANDEN_SHIFT, M98090_EQ3BANDEN_NUM - 1, 0, - snd_soc_get_volsw, max98090_put_volsw), - SOC_SINGLE_EXT("Digital EQ 5 Band Switch", M98090_REG_DSP_FILTER_ENABLE, - M98090_EQ5BANDEN_SHIFT, M98090_EQ5BANDEN_NUM - 1, 0, - snd_soc_get_volsw, max98090_put_volsw), - SOC_SINGLE_EXT("Digital EQ 7 Band Switch", M98090_REG_DSP_FILTER_ENABLE, - M98090_EQ7BANDEN_SHIFT, M98090_EQ7BANDEN_NUM - 1, 0, - snd_soc_get_volsw, max98090_put_volsw), + SND_SOC_BYTES("EQ Coefficients", M98090_REG_EQUALIZER_BASE, 105), + SOC_SINGLE("Digital EQ 3 Band Switch", M98090_REG_DSP_FILTER_ENABLE, + M98090_EQ3BANDEN_SHIFT, M98090_EQ3BANDEN_NUM - 1, 0), + SOC_SINGLE("Digital EQ 5 Band Switch", M98090_REG_DSP_FILTER_ENABLE, + M98090_EQ5BANDEN_SHIFT, M98090_EQ5BANDEN_NUM - 1, 0), + SOC_SINGLE("Digital EQ 7 Band Switch", M98090_REG_DSP_FILTER_ENABLE, + M98090_EQ7BANDEN_SHIFT, M98090_EQ7BANDEN_NUM - 1, 0), SOC_SINGLE("Digital EQ Clipping Detection", M98090_REG_DAI_PLAYBACK_LEVEL_EQ, M98090_EQCLPN_SHIFT, M98090_EQCLPN_NUM - 1, 1), @@ -751,34 +608,25 @@ static const struct snd_kcontrol_new max98090_snd_controls[] = { M98090_DVEQ_SHIFT, M98090_DVEQ_NUM - 1, 1, max98090_dv_tlv), - SOC_SINGLE_EXT("ALC Enable", M98090_REG_DRC_TIMING, - M98090_DRCEN_SHIFT, M98090_DRCEN_NUM - 1, 0, - snd_soc_get_volsw, max98090_put_volsw), - SOC_ENUM_EXT("ALC Attack Time", max98090_drcatk_enum, - snd_soc_get_enum_double, max98090_put_enum_double), - SOC_ENUM_EXT("ALC Release Time", max98090_drcrls_enum, - snd_soc_get_enum_double, max98090_put_enum_double), + SOC_SINGLE("ALC Enable", M98090_REG_DRC_TIMING, + M98090_DRCEN_SHIFT, M98090_DRCEN_NUM - 1, 0), + SOC_ENUM("ALC Attack Time", max98090_drcatk_enum), + SOC_ENUM("ALC Release Time", max98090_drcrls_enum), SOC_SINGLE_TLV("ALC Make Up Volume", M98090_REG_DRC_GAIN, M98090_DRCG_SHIFT, M98090_DRCG_NUM - 1, 0, max98090_alcmakeup_tlv), - SOC_ENUM_EXT("ALC Compression Ratio", max98090_alccmp_enum, - snd_soc_get_enum_double, max98090_put_enum_double), - SOC_ENUM_EXT("ALC Expansion Ratio", max98090_drcexp_enum, - snd_soc_get_enum_double, max98090_put_enum_double), - SOC_SINGLE_EXT_TLV("ALC Compression Threshold Volume", + SOC_ENUM("ALC Compression Ratio", max98090_alccmp_enum), + SOC_ENUM("ALC Expansion Ratio", max98090_drcexp_enum), + SOC_SINGLE_TLV("ALC Compression Threshold Volume", M98090_REG_DRC_COMPRESSOR, M98090_DRCTHC_SHIFT, - M98090_DRCTHC_NUM - 1, 1, - snd_soc_get_volsw, max98090_put_volsw, max98090_alccomp_tlv), - SOC_SINGLE_EXT_TLV("ALC Expansion Threshold Volume", + M98090_DRCTHC_NUM - 1, 1, max98090_alccomp_tlv), + SOC_SINGLE_TLV("ALC Expansion Threshold Volume", M98090_REG_DRC_EXPANDER, M98090_DRCTHE_SHIFT, - M98090_DRCTHE_NUM - 1, 1, - snd_soc_get_volsw, max98090_put_volsw, max98090_drcexp_tlv), + M98090_DRCTHE_NUM - 1, 1, max98090_drcexp_tlv), - SOC_ENUM_EXT("DAC HP Playback Performance Mode", - max98090_dac_perfmode_enum, - snd_soc_get_enum_double, max98090_put_enum_double), - SOC_ENUM_EXT("DAC High Performance Mode", max98090_dachp_enum, - snd_soc_get_enum_double, max98090_put_enum_double), + SOC_ENUM("DAC HP Playback Performance Mode", + max98090_dac_perfmode_enum), + SOC_ENUM("DAC High Performance Mode", max98090_dachp_enum), SOC_SINGLE_TLV("Headphone Left Mixer Volume", M98090_REG_HP_CONTROL, M98090_MIXHPLG_SHIFT, @@ -836,12 +684,9 @@ static const struct snd_kcontrol_new max98090_snd_controls[] = { SOC_SINGLE("Volume Adjustment Smoothing", M98090_REG_LEVEL_CONTROL, M98090_VSENN_SHIFT, M98090_VSENN_NUM - 1, 1), - SND_SOC_BYTES_E("Biquad Coefficients", - M98090_REG_RECORD_BIQUAD_BASE, 15, - snd_soc_bytes_get, max98090_bytes_put), - SOC_SINGLE_EXT("Biquad Switch", M98090_REG_DSP_FILTER_ENABLE, - M98090_ADCBQEN_SHIFT, M98090_ADCBQEN_NUM - 1, 0, - snd_soc_get_volsw, max98090_put_volsw), + SND_SOC_BYTES("Biquad Coefficients", M98090_REG_RECORD_BIQUAD_BASE, 15), + SOC_SINGLE("Biquad Switch", M98090_REG_DSP_FILTER_ENABLE, + M98090_ADCBQEN_SHIFT, M98090_ADCBQEN_NUM - 1, 0), }; static const struct snd_kcontrol_new max98091_snd_controls[] = { @@ -850,12 +695,10 @@ static const struct snd_kcontrol_new max98091_snd_controls[] = { M98090_DMIC34_ZEROPAD_SHIFT, M98090_DMIC34_ZEROPAD_NUM - 1, 0), - SOC_ENUM_EXT("Filter DMIC34 Mode", max98090_filter_dmic34mode_enum, - snd_soc_get_enum_double, max98090_put_enum_double), - SOC_SINGLE_EXT("DMIC34 DC Blocking", M98090_REG_FILTER_CONFIG, + SOC_ENUM("Filter DMIC34 Mode", max98090_filter_dmic34mode_enum), + SOC_SINGLE("DMIC34 DC Blocking", M98090_REG_FILTER_CONFIG, M98090_FLT_DMIC34HPF_SHIFT, - M98090_FLT_DMIC34HPF_NUM - 1, 0, - snd_soc_get_volsw, max98090_put_volsw), + M98090_FLT_DMIC34HPF_NUM - 1, 0), SOC_SINGLE_TLV("DMIC3 Boost Volume", M98090_REG_DMIC3_VOLUME, M98090_DMIC_AV3G_SHIFT, M98090_DMIC_AV3G_NUM - 1, 0, @@ -873,9 +716,8 @@ static const struct snd_kcontrol_new max98091_snd_controls[] = { SND_SOC_BYTES("DMIC34 Biquad Coefficients", M98090_REG_DMIC34_BIQUAD_BASE, 15), - SOC_SINGLE_EXT("DMIC34 Biquad Switch", M98090_REG_DSP_FILTER_ENABLE, - M98090_DMIC34BQEN_SHIFT, M98090_DMIC34BQEN_NUM - 1, 0, - snd_soc_get_volsw, max98090_put_volsw), + SOC_SINGLE("DMIC34 Biquad Switch", M98090_REG_DSP_FILTER_ENABLE, + M98090_DMIC34BQEN_SHIFT, M98090_DMIC34BQEN_NUM - 1, 0), SOC_SINGLE_TLV("DMIC34 BQ PreAttenuation Volume", M98090_REG_DMIC34_BQ_PREATTEN, M98090_AV34BQ_SHIFT, @@ -929,6 +771,19 @@ static int max98090_micinput_event(struct snd_soc_dapm_widget *w, return 0; } +static int max98090_shdn_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); + struct max98090_priv *max98090 = snd_soc_component_get_drvdata(component); + + if (event & SND_SOC_DAPM_POST_PMU) + max98090->shdn_pending = true; + + return 0; + +} + static const char *mic1_mux_text[] = { "IN12", "IN56" }; static SOC_ENUM_SINGLE_DECL(mic1_mux_enum, @@ -1029,14 +884,10 @@ static SOC_ENUM_SINGLE_DECL(ltenr_mux_enum, lten_mux_text); static const struct snd_kcontrol_new max98090_ltenl_mux = - SOC_DAPM_ENUM_EXT("LTENL Mux", ltenl_mux_enum, - snd_soc_dapm_get_enum_double, - max98090_dapm_put_enum_double); + SOC_DAPM_ENUM("LTENL Mux", ltenl_mux_enum); static const struct snd_kcontrol_new max98090_ltenr_mux = - SOC_DAPM_ENUM_EXT("LTENR Mux", ltenr_mux_enum, - snd_soc_dapm_get_enum_double, - max98090_dapm_put_enum_double); + SOC_DAPM_ENUM("LTENR Mux", ltenr_mux_enum); static const char *lben_mux_text[] = { "Normal", "Loopback" }; @@ -1051,14 +902,10 @@ static SOC_ENUM_SINGLE_DECL(lbenr_mux_enum, lben_mux_text); static const struct snd_kcontrol_new max98090_lbenl_mux = - SOC_DAPM_ENUM_EXT("LBENL Mux", lbenl_mux_enum, - snd_soc_dapm_get_enum_double, - max98090_dapm_put_enum_double); + SOC_DAPM_ENUM("LBENL Mux", lbenl_mux_enum); static const struct snd_kcontrol_new max98090_lbenr_mux = - SOC_DAPM_ENUM_EXT("LBENR Mux", lbenr_mux_enum, - snd_soc_dapm_get_enum_double, - max98090_dapm_put_enum_double); + SOC_DAPM_ENUM("LBENR Mux", lbenr_mux_enum); static const char *stenl_mux_text[] = { "Normal", "Sidetone Left" }; @@ -1225,25 +1072,21 @@ static const struct snd_soc_dapm_widget max98090_dapm_widgets[] = { SND_SOC_DAPM_INPUT("IN56"), SND_SOC_DAPM_SUPPLY("MICBIAS", M98090_REG_INPUT_ENABLE, - M98090_MBEN_SHIFT, 0, max98090_dapm_event, - SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD), + M98090_MBEN_SHIFT, 0, NULL, 0), SND_SOC_DAPM_SUPPLY("SHDN", M98090_REG_DEVICE_SHUTDOWN, M98090_SHDNN_SHIFT, 0, NULL, 0), SND_SOC_DAPM_SUPPLY("SDIEN", M98090_REG_IO_CONFIGURATION, - M98090_SDIEN_SHIFT, 0, max98090_dapm_event, - SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD), + M98090_SDIEN_SHIFT, 0, NULL, 0), SND_SOC_DAPM_SUPPLY("SDOEN", M98090_REG_IO_CONFIGURATION, - M98090_SDOEN_SHIFT, 0, max98090_dapm_event, - SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD), + M98090_SDOEN_SHIFT, 0, NULL, 0), SND_SOC_DAPM_SUPPLY("DMICL_ENA", M98090_REG_DIGITAL_MIC_ENABLE, - M98090_DIGMICL_SHIFT, 0, max98090_dapm_event, - SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD), + M98090_DIGMICL_SHIFT, 0, max98090_shdn_event, + SND_SOC_DAPM_POST_PMU), SND_SOC_DAPM_SUPPLY("DMICR_ENA", M98090_REG_DIGITAL_MIC_ENABLE, - M98090_DIGMICR_SHIFT, 0, max98090_dapm_event, - SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD), + M98090_DIGMICR_SHIFT, 0, max98090_shdn_event, + SND_SOC_DAPM_POST_PMU), SND_SOC_DAPM_SUPPLY("AHPF", M98090_REG_FILTER_CONFIG, - M98090_AHPF_SHIFT, 0, max98090_dapm_event, - SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD), + M98090_AHPF_SHIFT, 0, NULL, 0), /* * Note: Sysclk and misc power supplies are taken care of by SHDN @@ -1273,12 +1116,10 @@ static const struct snd_soc_dapm_widget max98090_dapm_widgets[] = { &max98090_lineb_mixer_controls[0], ARRAY_SIZE(max98090_lineb_mixer_controls)), - SND_SOC_DAPM_PGA_E("LINEA Input", M98090_REG_INPUT_ENABLE, - M98090_LINEAEN_SHIFT, 0, NULL, 0, max98090_dapm_event, - SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD), - SND_SOC_DAPM_PGA_E("LINEB Input", M98090_REG_INPUT_ENABLE, - M98090_LINEBEN_SHIFT, 0, NULL, 0, max98090_dapm_event, - SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD), + SND_SOC_DAPM_PGA("LINEA Input", M98090_REG_INPUT_ENABLE, + M98090_LINEAEN_SHIFT, 0, NULL, 0), + SND_SOC_DAPM_PGA("LINEB Input", M98090_REG_INPUT_ENABLE, + M98090_LINEBEN_SHIFT, 0, NULL, 0), SND_SOC_DAPM_MIXER("Left ADC Mixer", SND_SOC_NOPM, 0, 0, &max98090_left_adc_mixer_controls[0], @@ -1289,11 +1130,11 @@ static const struct snd_soc_dapm_widget max98090_dapm_widgets[] = { ARRAY_SIZE(max98090_right_adc_mixer_controls)), SND_SOC_DAPM_ADC_E("ADCL", NULL, M98090_REG_INPUT_ENABLE, - M98090_ADLEN_SHIFT, 0, max98090_dapm_event, - SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD), + M98090_ADLEN_SHIFT, 0, max98090_shdn_event, + SND_SOC_DAPM_POST_PMU), SND_SOC_DAPM_ADC_E("ADCR", NULL, M98090_REG_INPUT_ENABLE, - M98090_ADREN_SHIFT, 0, max98090_dapm_event, - SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD), + M98090_ADREN_SHIFT, 0, max98090_shdn_event, + SND_SOC_DAPM_POST_PMU), SND_SOC_DAPM_AIF_OUT("AIFOUTL", "HiFi Capture", 0, SND_SOC_NOPM, 0, 0), @@ -1321,12 +1162,10 @@ static const struct snd_soc_dapm_widget max98090_dapm_widgets[] = { SND_SOC_DAPM_AIF_IN("AIFINL", "HiFi Playback", 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("AIFINR", "HiFi Playback", 1, SND_SOC_NOPM, 0, 0), - SND_SOC_DAPM_DAC_E("DACL", NULL, M98090_REG_OUTPUT_ENABLE, - M98090_DALEN_SHIFT, 0, max98090_dapm_event, - SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD), - SND_SOC_DAPM_DAC_E("DACR", NULL, M98090_REG_OUTPUT_ENABLE, - M98090_DAREN_SHIFT, 0, max98090_dapm_event, - SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD), + SND_SOC_DAPM_DAC("DACL", NULL, M98090_REG_OUTPUT_ENABLE, + M98090_DALEN_SHIFT, 0), + SND_SOC_DAPM_DAC("DACR", NULL, M98090_REG_OUTPUT_ENABLE, + M98090_DAREN_SHIFT, 0), SND_SOC_DAPM_MIXER("Left Headphone Mixer", SND_SOC_NOPM, 0, 0, &max98090_left_hp_mixer_controls[0], @@ -1361,26 +1200,20 @@ static const struct snd_soc_dapm_widget max98090_dapm_widgets[] = { SND_SOC_DAPM_MUX("MIXHPRSEL Mux", SND_SOC_NOPM, 0, 0, &max98090_mixhprsel_mux), - SND_SOC_DAPM_PGA_E("HP Left Out", M98090_REG_OUTPUT_ENABLE, - M98090_HPLEN_SHIFT, 0, NULL, 0, max98090_dapm_event, - SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD), - SND_SOC_DAPM_PGA_E("HP Right Out", M98090_REG_OUTPUT_ENABLE, - M98090_HPREN_SHIFT, 0, NULL, 0, max98090_dapm_event, - SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD), - - SND_SOC_DAPM_PGA_E("SPK Left Out", M98090_REG_OUTPUT_ENABLE, - M98090_SPLEN_SHIFT, 0, NULL, 0, max98090_dapm_event, - SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD), - SND_SOC_DAPM_PGA_E("SPK Right Out", M98090_REG_OUTPUT_ENABLE, - M98090_SPREN_SHIFT, 0, NULL, 0, max98090_dapm_event, - SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD), - - SND_SOC_DAPM_PGA_E("RCV Left Out", M98090_REG_OUTPUT_ENABLE, - M98090_RCVLEN_SHIFT, 0, NULL, 0, max98090_dapm_event, - SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD), - SND_SOC_DAPM_PGA_E("RCV Right Out", M98090_REG_OUTPUT_ENABLE, - M98090_RCVREN_SHIFT, 0, NULL, 0, max98090_dapm_event, - SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD), + SND_SOC_DAPM_PGA("HP Left Out", M98090_REG_OUTPUT_ENABLE, + M98090_HPLEN_SHIFT, 0, NULL, 0), + SND_SOC_DAPM_PGA("HP Right Out", M98090_REG_OUTPUT_ENABLE, + M98090_HPREN_SHIFT, 0, NULL, 0), + + SND_SOC_DAPM_PGA("SPK Left Out", M98090_REG_OUTPUT_ENABLE, + M98090_SPLEN_SHIFT, 0, NULL, 0), + SND_SOC_DAPM_PGA("SPK Right Out", M98090_REG_OUTPUT_ENABLE, + M98090_SPREN_SHIFT, 0, NULL, 0), + + SND_SOC_DAPM_PGA("RCV Left Out", M98090_REG_OUTPUT_ENABLE, + M98090_RCVLEN_SHIFT, 0, NULL, 0), + SND_SOC_DAPM_PGA("RCV Right Out", M98090_REG_OUTPUT_ENABLE, + M98090_RCVREN_SHIFT, 0, NULL, 0), SND_SOC_DAPM_OUTPUT("HPL"), SND_SOC_DAPM_OUTPUT("HPR"), @@ -1395,11 +1228,9 @@ static const struct snd_soc_dapm_widget max98091_dapm_widgets[] = { SND_SOC_DAPM_INPUT("DMIC4"), SND_SOC_DAPM_SUPPLY("DMIC3_ENA", M98090_REG_DIGITAL_MIC_ENABLE, - M98090_DIGMIC3_SHIFT, 0, max98090_dapm_event, - SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD), + M98090_DIGMIC3_SHIFT, 0, NULL, 0), SND_SOC_DAPM_SUPPLY("DMIC4_ENA", M98090_REG_DIGITAL_MIC_ENABLE, - M98090_DIGMIC4_SHIFT, 0, max98090_dapm_event, - SND_SOC_DAPM_PRE_POST_PMU | SND_SOC_DAPM_PRE_POST_PMD), + M98090_DIGMIC4_SHIFT, 0, NULL, 0), }; static const struct snd_soc_dapm_route max98090_dapm_routes[] = { @@ -1670,11 +1501,6 @@ static void max98090_configure_bclk(struct snd_soc_component *component) return; } - /* - * Master mode: no need to save and restore SHDN for the following - * sensitive registers. - */ - /* Check for supported PCLK to LRCLK ratios */ for (i = 0; i < ARRAY_SIZE(pclk_rates); i++) { if ((pclk_rates[i] == max98090->sysclk) && @@ -1761,14 +1587,12 @@ static int max98090_dai_set_fmt(struct snd_soc_dai *codec_dai, switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { case SND_SOC_DAIFMT_CBS_CFS: /* Set to slave mode PLL - MAS mode off */ - max98090_shdn_save(max98090); snd_soc_component_write(component, M98090_REG_CLOCK_RATIO_NI_MSB, 0x00); snd_soc_component_write(component, M98090_REG_CLOCK_RATIO_NI_LSB, 0x00); snd_soc_component_update_bits(component, M98090_REG_CLOCK_MODE, M98090_USE_M1_MASK, 0); - max98090_shdn_restore(max98090); max98090->master = false; break; case SND_SOC_DAIFMT_CBM_CFM: @@ -1794,9 +1618,7 @@ static int max98090_dai_set_fmt(struct snd_soc_dai *codec_dai, dev_err(component->dev, "DAI clock mode unsupported"); return -EINVAL; } - max98090_shdn_save(max98090); snd_soc_component_write(component, M98090_REG_MASTER_MODE, regval); - max98090_shdn_restore(max98090); regval = 0; switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { @@ -1841,10 +1663,8 @@ static int max98090_dai_set_fmt(struct snd_soc_dai *codec_dai, if (max98090->tdm_slots > 1) regval ^= M98090_BCI_MASK; - max98090_shdn_save(max98090); snd_soc_component_write(component, M98090_REG_INTERFACE_FORMAT, regval); - max98090_shdn_restore(max98090); } return 0; @@ -1856,7 +1676,6 @@ static int max98090_set_tdm_slot(struct snd_soc_dai *codec_dai, struct snd_soc_component *component = codec_dai->component; struct max98090_priv *max98090 = snd_soc_component_get_drvdata(component); struct max98090_cdata *cdata; - cdata = &max98090->dai[0]; if (slots < 0 || slots > 4) @@ -1866,7 +1685,6 @@ static int max98090_set_tdm_slot(struct snd_soc_dai *codec_dai, max98090->tdm_width = slot_width; if (max98090->tdm_slots > 1) { - max98090_shdn_save(max98090); /* SLOTL SLOTR SLOTDLY */ snd_soc_component_write(component, M98090_REG_TDM_FORMAT, 0 << M98090_TDM_SLOTL_SHIFT | @@ -1877,7 +1695,6 @@ static int max98090_set_tdm_slot(struct snd_soc_dai *codec_dai, snd_soc_component_update_bits(component, M98090_REG_TDM_CONTROL, M98090_TDM_MASK, M98090_TDM_MASK); - max98090_shdn_restore(max98090); } /* @@ -2077,7 +1894,6 @@ static int max98090_configure_dmic(struct max98090_priv *max98090, dmic_freq = dmic_table[pclk_index].settings[micclk_index].freq; dmic_comp = dmic_table[pclk_index].settings[micclk_index].comp[i]; - max98090_shdn_save(max98090); regmap_update_bits(max98090->regmap, M98090_REG_DIGITAL_MIC_ENABLE, M98090_MICCLK_MASK, micclk_index << M98090_MICCLK_SHIFT); @@ -2086,7 +1902,6 @@ static int max98090_configure_dmic(struct max98090_priv *max98090, M98090_DMIC_COMP_MASK | M98090_DMIC_FREQ_MASK, dmic_comp << M98090_DMIC_COMP_SHIFT | dmic_freq << M98090_DMIC_FREQ_SHIFT); - max98090_shdn_restore(max98090); return 0; } @@ -2123,10 +1938,8 @@ static int max98090_dai_hw_params(struct snd_pcm_substream *substream, switch (params_width(params)) { case 16: - max98090_shdn_save(max98090); snd_soc_component_update_bits(component, M98090_REG_INTERFACE_FORMAT, M98090_WS_MASK, 0); - max98090_shdn_restore(max98090); break; default: return -EINVAL; @@ -2137,7 +1950,6 @@ static int max98090_dai_hw_params(struct snd_pcm_substream *substream, cdata->rate = max98090->lrclk; - max98090_shdn_save(max98090); /* Update filter mode */ if (max98090->lrclk < 24000) snd_soc_component_update_bits(component, M98090_REG_FILTER_CONFIG, @@ -2153,7 +1965,6 @@ static int max98090_dai_hw_params(struct snd_pcm_substream *substream, else snd_soc_component_update_bits(component, M98090_REG_FILTER_CONFIG, M98090_DHF_MASK, M98090_DHF_MASK); - max98090_shdn_restore(max98090); max98090_configure_dmic(max98090, max98090->dmic_freq, max98090->pclk, max98090->lrclk); @@ -2184,7 +1995,6 @@ static int max98090_dai_set_sysclk(struct snd_soc_dai *dai, * 0x02 (when master clk is 20MHz to 40MHz).. * 0x03 (when master clk is 40MHz to 60MHz).. */ - max98090_shdn_save(max98090); if ((freq >= 10000000) && (freq <= 20000000)) { snd_soc_component_write(component, M98090_REG_SYSTEM_CLOCK, M98090_PSCLK_DIV1); @@ -2199,10 +2009,8 @@ static int max98090_dai_set_sysclk(struct snd_soc_dai *dai, max98090->pclk = freq >> 2; } else { dev_err(component->dev, "Invalid master clock frequency\n"); - max98090_shdn_restore(max98090); return -EINVAL; } - max98090_shdn_restore(max98090); max98090->sysclk = freq; @@ -2314,12 +2122,10 @@ static void max98090_pll_work(struct max98090_priv *max98090) */ /* Toggle shutdown OFF then ON */ - mutex_lock(&component->card->dapm_mutex); snd_soc_component_update_bits(component, M98090_REG_DEVICE_SHUTDOWN, M98090_SHDNN_MASK, 0); snd_soc_component_update_bits(component, M98090_REG_DEVICE_SHUTDOWN, M98090_SHDNN_MASK, M98090_SHDNN_MASK); - mutex_unlock(&component->card->dapm_mutex); for (i = 0; i < 10; ++i) { /* Give PLL time to lock */ @@ -2642,12 +2448,7 @@ static int max98090_probe(struct snd_soc_component *component) */ snd_soc_component_read32(component, M98090_REG_DEVICE_STATUS); - /* - * SHDN should be 0 at the point, no need to save/restore for the - * following registers. - * - * High Performance is default - */ + /* High Performance is default */ snd_soc_component_update_bits(component, M98090_REG_DAC_CONTROL, M98090_DACHP_MASK, 1 << M98090_DACHP_SHIFT); @@ -2658,12 +2459,7 @@ static int max98090_probe(struct snd_soc_component *component) M98090_ADCHP_MASK, 1 << M98090_ADCHP_SHIFT); - /* - * SHDN should be 0 at the point, no need to save/restore for the - * following registers. - * - * Turn on VCM bandgap reference - */ + /* Turn on VCM bandgap reference */ snd_soc_component_write(component, M98090_REG_BIAS_CONTROL, M98090_VCM_MODE_MASK); @@ -2695,9 +2491,25 @@ static void max98090_remove(struct snd_soc_component *component) max98090->component = NULL; } +static void max98090_seq_notifier(struct snd_soc_component *component, + enum snd_soc_dapm_type event, int subseq) +{ + struct max98090_priv *max98090 = snd_soc_component_get_drvdata(component); + + if (max98090->shdn_pending) { + snd_soc_component_update_bits(component, M98090_REG_DEVICE_SHUTDOWN, + M98090_SHDNN_MASK, 0); + msleep(40); + snd_soc_component_update_bits(component, M98090_REG_DEVICE_SHUTDOWN, + M98090_SHDNN_MASK, M98090_SHDNN_MASK); + max98090->shdn_pending = false; + } +} + static const struct snd_soc_component_driver soc_component_dev_max98090 = { .probe = max98090_probe, .remove = max98090_remove, + .seq_notifier = max98090_seq_notifier, .set_bias_level = max98090_set_bias_level, .idle_bias_on = 1, .use_pmdown_time = 1, diff --git a/sound/soc/codecs/max98090.h b/sound/soc/codecs/max98090.h index 0a31708b7df7..a197114b0dad 100644 --- a/sound/soc/codecs/max98090.h +++ b/sound/soc/codecs/max98090.h @@ -1539,8 +1539,7 @@ struct max98090_priv { unsigned int pa2en; unsigned int sidetone; bool master; - int saved_count; - int saved_shdn; + bool shdn_pending; }; int max98090_mic_detect(struct snd_soc_component *component, -- cgit v1.2.3-59-g8ed1b From 8f486296459c084b106d907414540301bd9485fd Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Fri, 14 Feb 2020 18:57:43 +0800 Subject: ASoC: dapm: remove snd_soc_dapm_put_enum_double_locked Reverts commit 839284e79482 ("ASoC: dapm: add snd_soc_dapm_put_enum_double_locked"). Signed-off-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20200214105744.82258-3-tzungbi@google.com Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 2 -- sound/soc/soc-dapm.c | 54 ++++++++++++------------------------------------ 2 files changed, 13 insertions(+), 43 deletions(-) diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 2a306c6f3fbc..1b6afbc1a4ed 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -392,8 +392,6 @@ int snd_soc_dapm_get_enum_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); int snd_soc_dapm_put_enum_double(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol); -int snd_soc_dapm_put_enum_double_locked(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol); int snd_soc_dapm_info_pin_switch(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo); int snd_soc_dapm_get_pin_switch(struct snd_kcontrol *kcontrol, diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 8b24396675ec..9b130561d562 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -3441,8 +3441,17 @@ int snd_soc_dapm_get_enum_double(struct snd_kcontrol *kcontrol, } EXPORT_SYMBOL_GPL(snd_soc_dapm_get_enum_double); -static int __snd_soc_dapm_put_enum_double(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol, int locked) +/** + * snd_soc_dapm_put_enum_double - dapm enumerated double mixer set callback + * @kcontrol: mixer control + * @ucontrol: control element information + * + * Callback to set the value of a dapm enumerated double mixer control. + * + * Returns 0 for success. + */ +int snd_soc_dapm_put_enum_double(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) { struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_dapm(kcontrol); struct snd_soc_card *card = dapm->card; @@ -3465,9 +3474,7 @@ static int __snd_soc_dapm_put_enum_double(struct snd_kcontrol *kcontrol, mask |= e->mask << e->shift_r; } - if (!locked) - mutex_lock_nested(&card->dapm_mutex, - SND_SOC_DAPM_CLASS_RUNTIME); + mutex_lock_nested(&card->dapm_mutex, SND_SOC_DAPM_CLASS_RUNTIME); change = dapm_kcontrol_set_value(kcontrol, val); @@ -3489,50 +3496,15 @@ static int __snd_soc_dapm_put_enum_double(struct snd_kcontrol *kcontrol, card->update = NULL; } - if (!locked) - mutex_unlock(&card->dapm_mutex); + mutex_unlock(&card->dapm_mutex); if (ret > 0) soc_dpcm_runtime_update(card); return change; } - -/** - * snd_soc_dapm_put_enum_double - dapm enumerated double mixer set callback - * @kcontrol: mixer control - * @ucontrol: control element information - * - * Callback to set the value of a dapm enumerated double mixer control. - * - * Returns 0 for success. - */ -int snd_soc_dapm_put_enum_double(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - return __snd_soc_dapm_put_enum_double(kcontrol, ucontrol, 0); -} EXPORT_SYMBOL_GPL(snd_soc_dapm_put_enum_double); -/** - * snd_soc_dapm_put_enum_double_locked - dapm enumerated double mixer set - * callback - * @kcontrol: mixer control - * @ucontrol: control element information - * - * Callback to set the value of a dapm enumerated double mixer control. - * Must acquire dapm_mutex before calling the function. - * - * Returns 0 for success. - */ -int snd_soc_dapm_put_enum_double_locked(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - dapm_assert_locked(snd_soc_dapm_kcontrol_dapm(kcontrol)); - return __snd_soc_dapm_put_enum_double(kcontrol, ucontrol, 1); -} -EXPORT_SYMBOL_GPL(snd_soc_dapm_put_enum_double_locked); - /** * snd_soc_dapm_info_pin_switch - Info for a pin switch * -- cgit v1.2.3-59-g8ed1b From 43064f5c8b8818e35fa254496ef00aabd63d547a Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha Date: Thu, 6 Feb 2020 14:26:38 -0500 Subject: drm/amd/display: fix backwards byte order in rx_caps. We were using incorrect byte order after we started using the drm_defines So fix it. Fixes: 02837a91ae75 ("drm/amd/display: add and use defines from drm_hdcp.h") Signed-off-by: JinZe.Xu Signed-off-by: Bhawanpreet Lakha Reviewed-by: Wenjing Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c index f730b94ac3c0..55246711700b 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c @@ -46,8 +46,8 @@ static inline enum mod_hdcp_status check_hdcp2_capable(struct mod_hdcp *hdcp) enum mod_hdcp_status status; if (is_dp_hdcp(hdcp)) - status = (hdcp->auth.msg.hdcp2.rxcaps_dp[2] & HDCP_2_2_RX_CAPS_VERSION_VAL) && - HDCP_2_2_DP_HDCP_CAPABLE(hdcp->auth.msg.hdcp2.rxcaps_dp[0]) ? + status = (hdcp->auth.msg.hdcp2.rxcaps_dp[0] == HDCP_2_2_RX_CAPS_VERSION_VAL) && + HDCP_2_2_DP_HDCP_CAPABLE(hdcp->auth.msg.hdcp2.rxcaps_dp[2]) ? MOD_HDCP_STATUS_SUCCESS : MOD_HDCP_STATUS_HDCP2_NOT_CAPABLE; else -- cgit v1.2.3-59-g8ed1b From c6f8c440441029d5621ee5153676243234a4b76e Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha Date: Fri, 7 Feb 2020 10:41:20 -0500 Subject: drm/amd/display: fix dtm unloading there was a type in the terminate command. We should be calling psp_dtm_unload() instead of psp_hdcp_unload() Fixes: 143f23053333 ("drm/amdgpu: psp DTM init") Signed-off-by: Bhawanpreet Lakha Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 3a1570dafe34..146f96661b6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1013,6 +1013,30 @@ static int psp_dtm_initialize(struct psp_context *psp) return 0; } +static int psp_dtm_unload(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the unloading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_ta_unload_cmd_buf(cmd, psp->dtm_context.session_id); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + + kfree(cmd); + + return ret; +} + int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id) { /* @@ -1037,7 +1061,7 @@ static int psp_dtm_terminate(struct psp_context *psp) if (!psp->dtm_context.dtm_initialized) return 0; - ret = psp_hdcp_unload(psp); + ret = psp_dtm_unload(psp); if (ret) return ret; -- cgit v1.2.3-59-g8ed1b From aad4e2dbe543bc1633bc208ac7bddc4f0bb185ba Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 11 Feb 2020 12:39:53 +0800 Subject: drm/amd/powerplay: always refetch the enabled features status on dpm enablement Otherwise, the cached dpm features status may be inconsistent under some case(e.g. baco reset of Navi asic). Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index 0dc49479a7eb..b06c057a9002 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -898,6 +898,9 @@ int smu_v11_0_system_features_control(struct smu_context *smu, if (ret) return ret; + bitmap_zero(feature->enabled, feature->feature_num); + bitmap_zero(feature->supported, feature->feature_num); + if (en) { ret = smu_feature_get_enabled_mask(smu, feature_mask, 2); if (ret) @@ -907,9 +910,6 @@ int smu_v11_0_system_features_control(struct smu_context *smu, feature->feature_num); bitmap_copy(feature->supported, (unsigned long *)&feature_mask, feature->feature_num); - } else { - bitmap_zero(feature->enabled, feature->feature_num); - bitmap_zero(feature->supported, feature->feature_num); } return ret; -- cgit v1.2.3-59-g8ed1b From c657b936ea98630ef5ba4f130ab1ad5c534d0165 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Feb 2020 01:46:16 -0500 Subject: drm/amdgpu/soc15: fix xclk for raven It's 25 Mhz (refclk / 4). This fixes the interpretation of the rlc clock counter. Acked-by: Evan Quan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/soc15.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 15f3424a1ff7..2b488dfb2f21 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -272,7 +272,12 @@ static u32 soc15_get_config_memsize(struct amdgpu_device *adev) static u32 soc15_get_xclk(struct amdgpu_device *adev) { - return adev->clock.spll.reference_freq; + u32 reference_clock = adev->clock.spll.reference_freq; + + if (adev->asic_type == CHIP_RAVEN) + return reference_clock / 4; + + return reference_clock; } -- cgit v1.2.3-59-g8ed1b From 120cf959308e1bda984e40a9edd25ee2d6262efd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Feb 2020 08:51:29 -0500 Subject: drm/amdgpu/gfx9: disable gfxoff when reading rlc clock Otherwise we readback all ones. Fixes rlc counter readback while gfxoff is active. Reviewed-by: Xiaojie Yuan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index b33a4eb39193..6d6aca08d6fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3959,6 +3959,7 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) { uint64_t clock; + amdgpu_gfx_off_ctrl(adev, false); mutex_lock(&adev->gfx.gpu_clock_mutex); if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) { uint32_t tmp, lsb, msb, i = 0; @@ -3977,6 +3978,7 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); } mutex_unlock(&adev->gfx.gpu_clock_mutex); + amdgpu_gfx_off_ctrl(adev, true); return clock; } -- cgit v1.2.3-59-g8ed1b From b08c3ed609aabc4e76e74edc4404f0c26279d7ed Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Feb 2020 08:52:32 -0500 Subject: drm/amdgpu/gfx10: disable gfxoff when reading rlc clock Otherwise we readback all ones. Fixes rlc counter readback while gfxoff is active. Reviewed-by: Xiaojie Yuan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 1785fdad6ecb..22bbb36c768e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3923,11 +3923,13 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) { uint64_t clock; + amdgpu_gfx_off_ctrl(adev, false); mutex_lock(&adev->gfx.gpu_clock_mutex); WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); mutex_unlock(&adev->gfx.gpu_clock_mutex); + amdgpu_gfx_off_ctrl(adev, true); return clock; } -- cgit v1.2.3-59-g8ed1b From 726464596b5d3f10b7c655129a62168e5c17d60c Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Wed, 12 Feb 2020 23:35:03 +0000 Subject: MAINTAINERS: eCryptfs: Update maintainer address and downgrade status Adjust my email address to a personal account. Downgrade the status of eCryptfs maintenance to 'Odd Fixes' since it has not been part of my work responsibilities recently and I've had little personal time to devote to it. eCryptfs hasn't seen active development in some time. New deployments of file level encryption should use more modern solutions, such as fscrypt, where possible. Signed-off-by: Tyler Hicks --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 08176d64eed5..04ee092e1940 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5740,12 +5740,12 @@ S: Maintained F: drivers/media/dvb-frontends/ec100* ECRYPT FILE SYSTEM -M: Tyler Hicks +M: Tyler Hicks L: ecryptfs@vger.kernel.org W: http://ecryptfs.org W: https://launchpad.net/ecryptfs T: git git://git.kernel.org/pub/scm/linux/kernel/git/tyhicks/ecryptfs.git -S: Supported +S: Odd Fixes F: Documentation/filesystems/ecryptfs.txt F: fs/ecryptfs/ -- cgit v1.2.3-59-g8ed1b From f8e48a8408f5e23dd514916fda128a87e34f8ffd Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Thu, 13 Feb 2020 21:25:54 +0000 Subject: eCryptfs: Replace deactivated email address Replace a recently deactived email address with one that I'll be able to personally control and keep alive. Signed-off-by: Tyler Hicks --- fs/ecryptfs/ecryptfs_kernel.h | 2 +- fs/ecryptfs/main.c | 2 +- fs/ecryptfs/messaging.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 1c1a56be7ea2..e6ac78c62ca4 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -8,7 +8,7 @@ * Copyright (C) 2004-2008 International Business Machines Corp. * Author(s): Michael A. Halcrow * Trevor S. Highland - * Tyler Hicks + * Tyler Hicks */ #ifndef ECRYPTFS_KERNEL_H diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index b8a7ce379ffe..e63259fdef28 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -7,7 +7,7 @@ * Copyright (C) 2004-2007 International Business Machines Corp. * Author(s): Michael A. Halcrow * Michael C. Thompson - * Tyler Hicks + * Tyler Hicks */ #include diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index c05ca39aa449..8646ba76def3 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -4,7 +4,7 @@ * * Copyright (C) 2004-2008 International Business Machines Corp. * Author(s): Michael A. Halcrow - * Tyler Hicks + * Tyler Hicks */ #include #include -- cgit v1.2.3-59-g8ed1b From 2c2a7552dd6465e8fde6bc9cccf8d66ed1c1eb72 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Fri, 14 Feb 2020 12:21:01 -0600 Subject: ecryptfs: replace BUG_ON with error handling code In crypt_scatterlist, if the crypt_stat argument is not set up correctly, the kernel crashes. Instead, by returning an error code upstream, the error is handled safely. The issue is detected via a static analysis tool written by us. Fixes: 237fead619984 (ecryptfs: fs/Makefile and fs/Kconfig) Signed-off-by: Aditya Pakki Signed-off-by: Tyler Hicks --- fs/ecryptfs/crypto.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index f91db24bbf3b..a064b408d841 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -311,8 +311,10 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, struct extent_crypt_result ecr; int rc = 0; - BUG_ON(!crypt_stat || !crypt_stat->tfm - || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)); + if (!crypt_stat || !crypt_stat->tfm + || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)) + return -EINVAL; + if (unlikely(ecryptfs_verbosity > 0)) { ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n", crypt_stat->key_size); -- cgit v1.2.3-59-g8ed1b From 76261ada16dcc3be610396a46d35acc3efbda682 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 12 Feb 2020 21:08:59 -0800 Subject: scsi: Revert "RDMA/isert: Fix a recently introduced regression related to logout" Since commit 04060db41178 introduces soft lockups when toggling network interfaces, revert it. Link: https://marc.info/?l=target-devel&m=158157054906196 Cc: Rahul Kundu Cc: Mike Marciniszyn Cc: Sagi Grimberg Reported-by: Dakshaja Uppalapati Fixes: 04060db41178 ("scsi: RDMA/isert: Fix a recently introduced regression related to logout") Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/infiniband/ulp/isert/ib_isert.c | 12 ++++++++++++ drivers/target/iscsi/iscsi_target.c | 6 +++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index b273e421e910..a1a035270cab 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -2575,6 +2575,17 @@ isert_wait4logout(struct isert_conn *isert_conn) } } +static void +isert_wait4cmds(struct iscsi_conn *conn) +{ + isert_info("iscsi_conn %p\n", conn); + + if (conn->sess) { + target_sess_cmd_list_set_waiting(conn->sess->se_sess); + target_wait_for_sess_cmds(conn->sess->se_sess); + } +} + /** * isert_put_unsol_pending_cmds() - Drop commands waiting for * unsolicitate dataout @@ -2622,6 +2633,7 @@ static void isert_wait_conn(struct iscsi_conn *conn) ib_drain_qp(isert_conn->qp); isert_put_unsol_pending_cmds(conn); + isert_wait4cmds(conn); isert_wait4logout(isert_conn); queue_work(isert_release_wq, &isert_conn->release_work); diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index b94ed4e30770..7251a87bb576 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -4149,6 +4149,9 @@ int iscsit_close_connection( iscsit_stop_nopin_response_timer(conn); iscsit_stop_nopin_timer(conn); + if (conn->conn_transport->iscsit_wait_conn) + conn->conn_transport->iscsit_wait_conn(conn); + /* * During Connection recovery drop unacknowledged out of order * commands for this connection, and prepare the other commands @@ -4234,9 +4237,6 @@ int iscsit_close_connection( target_sess_cmd_list_set_waiting(sess->se_sess); target_wait_for_sess_cmds(sess->se_sess); - if (conn->conn_transport->iscsit_wait_conn) - conn->conn_transport->iscsit_wait_conn(conn); - ahash_request_free(conn->conn_tx_hash); if (conn->conn_rx_hash) { struct crypto_ahash *tfm; -- cgit v1.2.3-59-g8ed1b From 807b9515b7d044cf77df31f1af9d842a76ecd5cb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 12 Feb 2020 21:09:00 -0800 Subject: scsi: Revert "target: iscsi: Wait for all commands to finish before freeing a session" Since commit e9d3009cb936 introduced a regression and since the fix for that regression was not perfect, revert this commit. Link: https://marc.info/?l=target-devel&m=158157054906195 Cc: Rahul Kundu Cc: Mike Marciniszyn Cc: Sagi Grimberg Reported-by: Dakshaja Uppalapati Fixes: e9d3009cb936 ("scsi: target: iscsi: Wait for all commands to finish before freeing a session") Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/iscsi_target.c | 10 ++-------- include/scsi/iscsi_proto.h | 1 - 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 7251a87bb576..09e55ea0bf5d 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1165,9 +1165,7 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, hdr->cmdsn, be32_to_cpu(hdr->data_length), payload_length, conn->cid); - if (target_get_sess_cmd(&cmd->se_cmd, true) < 0) - return iscsit_add_reject_cmd(cmd, - ISCSI_REASON_WAITING_FOR_LOGOUT, buf); + target_get_sess_cmd(&cmd->se_cmd, true); cmd->sense_reason = transport_lookup_cmd_lun(&cmd->se_cmd, scsilun_to_int(&hdr->lun)); @@ -2004,9 +2002,7 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, conn->sess->se_sess, 0, DMA_NONE, TCM_SIMPLE_TAG, cmd->sense_buffer + 2); - if (target_get_sess_cmd(&cmd->se_cmd, true) < 0) - return iscsit_add_reject_cmd(cmd, - ISCSI_REASON_WAITING_FOR_LOGOUT, buf); + target_get_sess_cmd(&cmd->se_cmd, true); /* * TASK_REASSIGN for ERL=2 / connection stays inside of @@ -4234,8 +4230,6 @@ int iscsit_close_connection( * must wait until they have completed. */ iscsit_check_conn_usage_count(conn); - target_sess_cmd_list_set_waiting(sess->se_sess); - target_wait_for_sess_cmds(sess->se_sess); ahash_request_free(conn->conn_tx_hash); if (conn->conn_rx_hash) { diff --git a/include/scsi/iscsi_proto.h b/include/scsi/iscsi_proto.h index 533f56733ba8..b71b5c4f418c 100644 --- a/include/scsi/iscsi_proto.h +++ b/include/scsi/iscsi_proto.h @@ -627,7 +627,6 @@ struct iscsi_reject { #define ISCSI_REASON_BOOKMARK_INVALID 9 #define ISCSI_REASON_BOOKMARK_NO_RESOURCES 10 #define ISCSI_REASON_NEGOTIATION_RESET 11 -#define ISCSI_REASON_WAITING_FOR_LOGOUT 12 /* Max. number of Key=Value pairs in a text message */ #define MAX_KEY_VALUE_PAIRS 8192 -- cgit v1.2.3-59-g8ed1b From d3f703c4359ff06619b2322b91f69710453e6b6d Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Tue, 11 Feb 2020 11:24:33 -0800 Subject: mips: vdso: fix 'jalr t9' crash in vdso code Observed that when kernel is built with Yocto mips64-poky-linux-gcc, and mips64-poky-linux-gnun32-gcc toolchain, resulting vdso contains 'jalr t9' instructions in its code and since in vdso case nobody sets GOT table code crashes when instruction reached. On other hand observed that when kernel is built mips-poky-linux-gcc toolchain, the same 'jalr t9' instruction are replaced with PC relative function calls using 'bal' instructions. The difference boils down to -mrelax-pic-calls and -mexplicit-relocs gcc options that gets different default values depending on gcc target triplets and corresponding binutils. -mrelax-pic-calls got enabled by default only in mips-poky-linux-gcc case. MIPS binutils ld relies on R_MIPS_JALR relocation to convert 'jalr t9' into 'bal' and such relocation is generated only if -mrelax-pic-calls option is on. Please note 'jalr t9' conversion to 'bal' can happen only to static functions. These static PIC calls use mips local GOT entries that are supposed to be filled with start of DSO value by run-time linker (missing in VDSO case) and they do not have dynamic relocations. Global mips GOT entries must have dynamic relocations and they should be prevented by cmd_vdso_check Makefile rule. Solution call out -mrelax-pic-calls and -mexplicit-relocs options explicitly while compiling MIPS vdso code. That would get correct and consistent between different toolchains behaviour. Reported-by: Bruce Ashfield Signed-off-by: Victor Kamensky Signed-off-by: Paul Burton Cc: linux-mips@vger.kernel.org Cc: Ralf Baechle Cc: James Hogan Cc: Vincenzo Frascino Cc: richard.purdie@linuxfoundation.org --- arch/mips/vdso/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index aa89a41dc5dd..848baeaef1f8 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -33,6 +33,7 @@ endif cflags-vdso := $(ccflags-vdso) \ $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \ -O3 -g -fPIC -fno-strict-aliasing -fno-common -fno-builtin -G 0 \ + -mrelax-pic-calls -mexplicit-relocs \ -fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \ $(call cc-option, -fno-asynchronous-unwind-tables) \ $(call cc-option, -fno-stack-protector) -- cgit v1.2.3-59-g8ed1b From 07015d7a103c4420b69a287b8ef4d2535c0f4106 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 15 Feb 2020 12:38:36 -0800 Subject: MIPS: Disable VDSO time functionality on microMIPS A check we're about to add to pick up on function calls that depend on bogus use of the GOT in the VDSO picked up on instances of such function calls in microMIPS builds. Since the code appears genuinely problematic, and given the relatively small amount of use & testing that microMIPS sees, go ahead & disable the VDSO for microMIPS builds. Signed-off-by: Paul Burton --- arch/mips/vdso/Makefile | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index 848baeaef1f8..b5e0bd82d47f 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -52,6 +52,8 @@ endif CFLAGS_REMOVE_vgettimeofday.o = -pg +DISABLE_VDSO := n + # # For the pre-R6 code in arch/mips/vdso/vdso.h for locating # the base address of VDSO, the linker will emit a R_MIPS_PC32 @@ -65,11 +67,24 @@ CFLAGS_REMOVE_vgettimeofday.o = -pg ifndef CONFIG_CPU_MIPSR6 ifeq ($(call ld-ifversion, -lt, 225000000, y),y) $(warning MIPS VDSO requires binutils >= 2.25) - obj-vdso-y := $(filter-out vgettimeofday.o, $(obj-vdso-y)) - ccflags-vdso += -DDISABLE_MIPS_VDSO + DISABLE_VDSO := y endif endif +# +# GCC (at least up to version 9.2) appears to emit function calls that make use +# of the GOT when targeting microMIPS, which we can't use in the VDSO due to +# the lack of relocations. As such, we disable the VDSO for microMIPS builds. +# +ifdef CONFIG_CPU_MICROMIPS + DISABLE_VDSO := y +endif + +ifeq ($(DISABLE_VDSO),y) + obj-vdso-y := $(filter-out vgettimeofday.o, $(obj-vdso-y)) + ccflags-vdso += -DDISABLE_MIPS_VDSO +endif + # VDSO linker flags. VDSO_LDFLAGS := \ -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 \ -- cgit v1.2.3-59-g8ed1b From 976c23af3ee5bd3447a7bfb6c356ceb4acf264a6 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Tue, 11 Feb 2020 11:24:34 -0800 Subject: mips: vdso: add build time check that no 'jalr t9' calls left vdso shared object cannot have GOT based PIC 'jalr t9' calls because nobody set GOT table in vdso. Contributing into vdso .o files are compiled in PIC mode and as result for internal static functions calls compiler will generate 'jalr t9' instructions. Those are supposed to be converted into PC relative 'bal' calls by linker when relocation are processed. Mips global GOT entries do have dynamic relocations and they will be caught by cmd_vdso_check Makefile rule. Static PIC calls go through mips local GOT entries that do not have dynamic relocations. For those 'jalr t9' calls could be present but without dynamic relocations and they need to be converted to 'bal' calls by linker. Add additional build time check to make sure that no 'jalr t9' slip through because of some toolchain misconfiguration that prevents 'jalr t9' to 'bal' conversion. Signed-off-by: Victor Kamensky Signed-off-by: Paul Burton Cc: linux-mips@vger.kernel.org Cc: Ralf Baechle Cc: James Hogan Cc: Vincenzo Frascino Cc: bruce.ashfield@gmail.com Cc: richard.purdie@linuxfoundation.org --- arch/mips/vdso/Makefile | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index b5e0bd82d47f..77374c1f0c77 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -97,12 +97,18 @@ GCOV_PROFILE := n UBSAN_SANITIZE := n KCOV_INSTRUMENT := n +# Check that we don't have PIC 'jalr t9' calls left +quiet_cmd_vdso_mips_check = VDSOCHK $@ + cmd_vdso_mips_check = if $(OBJDUMP) --disassemble $@ | egrep -h "jalr.*t9" > /dev/null; \ + then (echo >&2 "$@: PIC 'jalr t9' calls are not supported"; \ + rm -f $@; /bin/false); fi + # # Shared build commands. # quiet_cmd_vdsold_and_vdso_check = LD $@ - cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check) + cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check); $(cmd_vdso_mips_check) quiet_cmd_vdsold = VDSO $@ cmd_vdsold = $(CC) $(c_flags) $(VDSO_LDFLAGS) \ -- cgit v1.2.3-59-g8ed1b From 97e914b7de3c943011779b979b8093fdc0d85722 Mon Sep 17 00:00:00 2001 From: Mark Tomlinson Date: Wed, 12 Feb 2020 10:24:55 +1300 Subject: MIPS: cavium_octeon: Fix syncw generation. The Cavium Octeon CPU uses a special sync instruction for implementing wmb, and due to a CPU bug, the instruction must appear twice. A macro had been defined to hide this: #define __SYNC_rpt(type) (1 + (type == __SYNC_wmb)) which was intended to evaluate to 2 for __SYNC_wmb, and 1 for any other type of sync. However, this expression is evaluated by the assembler, and not the compiler, and the result of '==' in the assembler is 0 or -1, not 0 or 1 as it is in C. The net result was wmb() producing no code at all. The simple fix in this patch is to change the '+' to '-'. Fixes: bf92927251b3 ("MIPS: barrier: Add __SYNC() infrastructure") Signed-off-by: Mark Tomlinson Tested-by: Chris Packham Signed-off-by: Paul Burton Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org --- arch/mips/include/asm/sync.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/asm/sync.h b/arch/mips/include/asm/sync.h index 7c6a1095f556..aabd097933fe 100644 --- a/arch/mips/include/asm/sync.h +++ b/arch/mips/include/asm/sync.h @@ -155,9 +155,11 @@ * effective barrier as noted by commit 6b07d38aaa52 ("MIPS: Octeon: Use * optimized memory barrier primitives."). Here we specify that the affected * sync instructions should be emitted twice. + * Note that this expression is evaluated by the assembler (not the compiler), + * and that the assembler evaluates '==' as 0 or -1, not 0 or 1. */ #ifdef CONFIG_CPU_CAVIUM_OCTEON -# define __SYNC_rpt(type) (1 + (type == __SYNC_wmb)) +# define __SYNC_rpt(type) (1 - (type == __SYNC_wmb)) #else # define __SYNC_rpt(type) 1 #endif -- cgit v1.2.3-59-g8ed1b From bef8e2dfceed6daeb6ca3e8d33f9c9d43b926580 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 2 Feb 2020 21:19:22 +0100 Subject: MIPS: VPE: Fix a double free and a memory leak in 'release_vpe()' Pointer on the memory allocated by 'alloc_progmem()' is stored in 'v->load_addr'. So this is this memory that should be freed by 'release_progmem()'. 'release_progmem()' is only a call to 'kfree()'. With the current code, there is both a double free and a memory leak. Fix it by passing the correct pointer to 'release_progmem()'. Fixes: e01402b115ccc ("More AP / SP bits for the 34K, the Malta bits and things. Still wants") Signed-off-by: Christophe JAILLET Signed-off-by: Paul Burton Cc: ralf@linux-mips.org Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: kernel-janitors@vger.kernel.org --- arch/mips/kernel/vpe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index 6176b9acba95..d0d832ab3d3b 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -134,7 +134,7 @@ void release_vpe(struct vpe *v) { list_del(&v->list); if (v->load_addr) - release_progmem(v); + release_progmem(v->load_addr); kfree(v); } -- cgit v1.2.3-59-g8ed1b From 7fbeb95d0f68e21e6ca61284f1ac681630976947 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 16 Feb 2020 01:01:18 +0300 Subject: io_uring: add missing io_req_cancelled() fallocate_finish() is missing cancellation check. Add it. It's safe to do that, as only flags setup and sqe fields copy are done before it gets into __io_fallocate(). Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5a826017ebb8..29565d82291f 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2517,6 +2517,9 @@ static void io_fallocate_finish(struct io_wq_work **workptr) struct io_kiocb *nxt = NULL; int ret; + if (io_req_cancelled(req)) + return; + ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off, req->sync.len); if (ret < 0) @@ -2904,6 +2907,7 @@ static void io_close_finish(struct io_wq_work **workptr) struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); struct io_kiocb *nxt = NULL; + /* not cancellable, don't do io_req_cancelled() */ __io_close_finish(req, &nxt); if (nxt) io_wq_assign_next(workptr, nxt); -- cgit v1.2.3-59-g8ed1b From d4f194ed9eb9841a8f978710e4d24296f791a85b Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Fri, 7 Feb 2020 15:57:31 +1100 Subject: powerpc/eeh: Fix deadlock handling dead PHB Recovering a dead PHB can currently cause a deadlock as the PCI rescan/remove lock is taken twice. This is caused as part of an existing bug in eeh_handle_special_event(). The pe is processed while traversing the PHBs even though the pe is unrelated to the loop. This causes the pe to be, incorrectly, processed more than once. Untangling this section can move the pe processing out of the loop and also outside the locked section, correcting both problems. Fixes: 2e25505147b8 ("powerpc/eeh: Fix crash when edev->pdev changes") Cc: stable@vger.kernel.org # 5.4+ Signed-off-by: Sam Bobroff Reviewed-by: Frederic Barrat Tested-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0547e82dbf90ee0729a2979a8cac5c91665c621f.1581051445.git.sbobroff@linux.ibm.com --- arch/powerpc/kernel/eeh_driver.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index a1eaffe868de..7b048cee767c 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -1184,6 +1184,17 @@ void eeh_handle_special_event(void) eeh_pe_state_mark(pe, EEH_PE_RECOVERING); eeh_handle_normal_event(pe); } else { + eeh_for_each_pe(pe, tmp_pe) + eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev) + edev->mode &= ~EEH_DEV_NO_HANDLER; + + /* Notify all devices to be down */ + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); + eeh_set_channel_state(pe, pci_channel_io_perm_failure); + eeh_pe_report( + "error_detected(permanent failure)", pe, + eeh_report_failure, NULL); + pci_lock_rescan_remove(); list_for_each_entry(hose, &hose_list, list_node) { phb_pe = eeh_phb_pe_get(hose); @@ -1192,16 +1203,6 @@ void eeh_handle_special_event(void) (phb_pe->state & EEH_PE_RECOVERING)) continue; - eeh_for_each_pe(pe, tmp_pe) - eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev) - edev->mode &= ~EEH_DEV_NO_HANDLER; - - /* Notify all devices to be down */ - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); - eeh_set_channel_state(pe, pci_channel_io_perm_failure); - eeh_pe_report( - "error_detected(permanent failure)", pe, - eeh_report_failure, NULL); bus = eeh_pe_bus_get(phb_pe); if (!bus) { pr_err("%s: Cannot find PCI bus for " -- cgit v1.2.3-59-g8ed1b From f2b67ef90b0d5eca0f2255e02cf2f620bc0ddcdb Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 6 Feb 2020 13:50:28 +0000 Subject: powerpc/hugetlb: Fix 512k hugepages on 8xx with 16k page size Commit 55c8fc3f4930 ("powerpc/8xx: reintroduce 16K pages with HW assistance") redefined pte_t as a struct of 4 pte_basic_t, because in 16K pages mode there are four identical entries in the page table. But the size of hugepage tables is calculated based of the size of (void *). Therefore, we end up with page tables of size 1k instead of 4k for 512k pages. As 512k hugepage tables are the same size as standard page tables, ie 4k, use the standard page tables instead of PGT_CACHE tables. Fixes: 3fb69c6a1a13 ("powerpc/8xx: Enable 512k hugepage support with HW assistance") Cc: stable@vger.kernel.org # v5.0+ Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/90ec56a2315be602494619ed0223bba3b0b8d619.1580997007.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/hugetlbpage.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 73d4873fc7f8..33b3461d91e8 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -53,20 +53,24 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (pshift >= pdshift) { cachep = PGT_CACHE(PTE_T_ORDER); num_hugepd = 1 << (pshift - pdshift); + new = NULL; } else if (IS_ENABLED(CONFIG_PPC_8xx)) { - cachep = PGT_CACHE(PTE_INDEX_SIZE); + cachep = NULL; num_hugepd = 1; + new = pte_alloc_one(mm); } else { cachep = PGT_CACHE(pdshift - pshift); num_hugepd = 1; + new = NULL; } - if (!cachep) { + if (!cachep && !new) { WARN_ONCE(1, "No page table cache created for hugetlb tables"); return -ENOMEM; } - new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); + if (cachep) + new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); BUG_ON(pshift > HUGEPD_SHIFT_MASK); BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); @@ -97,7 +101,10 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (i < num_hugepd) { for (i = i - 1 ; i >= 0; i--, hpdp--) *hpdp = __hugepd(0); - kmem_cache_free(cachep, new); + if (cachep) + kmem_cache_free(cachep, new); + else + pte_free(mm, new); } else { kmemleak_ignore(new); } @@ -324,8 +331,7 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif if (shift >= pdshift) hugepd_free(tlb, hugepte); else if (IS_ENABLED(CONFIG_PPC_8xx)) - pgtable_free_tlb(tlb, hugepte, - get_hugepd_cache_index(PTE_INDEX_SIZE)); + pgtable_free_tlb(tlb, hugepte, 0); else pgtable_free_tlb(tlb, hugepte, get_hugepd_cache_index(pdshift - shift)); @@ -639,12 +645,13 @@ static int __init hugetlbpage_init(void) * if we have pdshift and shift value same, we don't * use pgt cache for hugepd. */ - if (pdshift > shift && IS_ENABLED(CONFIG_PPC_8xx)) - pgtable_cache_add(PTE_INDEX_SIZE); - else if (pdshift > shift) - pgtable_cache_add(pdshift - shift); - else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) || IS_ENABLED(CONFIG_PPC_8xx)) + if (pdshift > shift) { + if (!IS_ENABLED(CONFIG_PPC_8xx)) + pgtable_cache_add(pdshift - shift); + } else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) || + IS_ENABLED(CONFIG_PPC_8xx)) { pgtable_cache_add(PTE_T_ORDER); + } configured = true; } -- cgit v1.2.3-59-g8ed1b From 50a175dd18de7a647e72aca7daf4744e3a5a81e3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 9 Feb 2020 16:02:41 +0000 Subject: powerpc/hugetlb: Fix 8M hugepages on 8xx With HW assistance all page tables must be 4k aligned, the 8xx drops the last 12 bits during the walk. Redefine HUGEPD_SHIFT_MASK to mask last 12 bits out. HUGEPD_SHIFT_MASK is used to for alignment of page table cache. Fixes: 22569b881d37 ("powerpc/8xx: Enable 8M hugepage support with HW assistance") Cc: stable@vger.kernel.org # v5.0+ Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/778b1a248c4c7ca79640eeff7740044da6a220a0.1581264115.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/page.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 86332080399a..080a0bf8e54b 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -295,8 +295,13 @@ static inline bool pfn_valid(unsigned long pfn) /* * Some number of bits at the level of the page table that points to * a hugepte are used to encode the size. This masks those bits. + * On 8xx, HW assistance requires 4k alignment for the hugepte. */ +#ifdef CONFIG_PPC_8xx +#define HUGEPD_SHIFT_MASK 0xfff +#else #define HUGEPD_SHIFT_MASK 0x3f +#endif #ifndef __ASSEMBLY__ -- cgit v1.2.3-59-g8ed1b From a4031afb9d10d97f4d0285844abbc0ab04245304 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 9 Feb 2020 18:14:42 +0000 Subject: powerpc/8xx: Fix clearing of bits 20-23 in ITLB miss In ITLB miss handled the line supposed to clear bits 20-23 on the L2 ITLB entry is buggy and does indeed nothing, leading to undefined value which could allow execution when it shouldn't. Properly do the clearing with the relevant instruction. Fixes: 74fabcadfd43 ("powerpc/8xx: don't use r12/SPRN_SPRG_SCRATCH2 in TLB Miss handlers") Cc: stable@vger.kernel.org # v5.0+ Signed-off-by: Christophe Leroy Reviewed-by: Leonardo Bras Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4f70c2778163affce8508a210f65d140e84524b4.1581272050.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/head_8xx.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 9922306ae512..073a651787df 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -256,7 +256,7 @@ InstructionTLBMiss: * set. All other Linux PTE bits control the behavior * of the MMU. */ - rlwimi r10, r10, 0, 0x0f00 /* Clear bits 20-23 */ + rlwinm r10, r10, 0, ~0x0f00 /* Clear bits 20-23 */ rlwimi r10, r10, 4, 0x0400 /* Copy _PAGE_EXEC into bit 21 */ ori r10, r10, RPN_PATTERN | 0x200 /* Set 22 and 24-27 */ mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ -- cgit v1.2.3-59-g8ed1b From e8023b030ce1748930e2dc76353a262fe47d4745 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 11 Feb 2020 15:32:56 +0800 Subject: selftests: forwarding: use proto icmp for {gretap, ip6gretap}_mac testing For tc ip_proto filter, when we extract the flow via __skb_flow_dissect() without flag FLOW_DISSECTOR_F_STOP_AT_ENCAP, we will continue extract to the inner proto. So for GRE + ICMP messages, we should not track GRE proto, but inner ICMP proto. For test mirror_gre.sh, it may make user confused if we capture ICMP message on $h3(since the flow is GRE message). So I move the capture dev to h3-gt{4,6}, and only capture ICMP message. Before the fix: ]# ./mirror_gre.sh TEST: ingress mirror to gretap (skip_hw) [ OK ] TEST: egress mirror to gretap (skip_hw) [ OK ] TEST: ingress mirror to ip6gretap (skip_hw) [ OK ] TEST: egress mirror to ip6gretap (skip_hw) [ OK ] TEST: ingress mirror to gretap: envelope MAC (skip_hw) [FAIL] Expected to capture 10 packets, got 0. TEST: egress mirror to gretap: envelope MAC (skip_hw) [FAIL] Expected to capture 10 packets, got 0. TEST: ingress mirror to ip6gretap: envelope MAC (skip_hw) [FAIL] Expected to capture 10 packets, got 0. TEST: egress mirror to ip6gretap: envelope MAC (skip_hw) [FAIL] Expected to capture 10 packets, got 0. TEST: two simultaneously configured mirrors (skip_hw) [ OK ] WARN: Could not test offloaded functionality After fix: ]# ./mirror_gre.sh TEST: ingress mirror to gretap (skip_hw) [ OK ] TEST: egress mirror to gretap (skip_hw) [ OK ] TEST: ingress mirror to ip6gretap (skip_hw) [ OK ] TEST: egress mirror to ip6gretap (skip_hw) [ OK ] TEST: ingress mirror to gretap: envelope MAC (skip_hw) [ OK ] TEST: egress mirror to gretap: envelope MAC (skip_hw) [ OK ] TEST: ingress mirror to ip6gretap: envelope MAC (skip_hw) [ OK ] TEST: egress mirror to ip6gretap: envelope MAC (skip_hw) [ OK ] TEST: two simultaneously configured mirrors (skip_hw) [ OK ] WARN: Could not test offloaded functionality Fixes: ba8d39871a10 ("selftests: forwarding: Add test for mirror to gretap") Signed-off-by: Hangbin Liu Reviewed-by: Petr Machata Tested-by: Petr Machata Signed-off-by: David S. Miller --- .../testing/selftests/net/forwarding/mirror_gre.sh | 25 +++++++++++----------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh index e6fd7a18c655..0266443601bc 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh @@ -63,22 +63,23 @@ test_span_gre_mac() { local tundev=$1; shift local direction=$1; shift - local prot=$1; shift local what=$1; shift - local swp3mac=$(mac_get $swp3) - local h3mac=$(mac_get $h3) + case "$direction" in + ingress) local src_mac=$(mac_get $h1); local dst_mac=$(mac_get $h2) + ;; + egress) local src_mac=$(mac_get $h2); local dst_mac=$(mac_get $h1) + ;; + esac RET=0 mirror_install $swp1 $direction $tundev "matchall $tcflags" - tc filter add dev $h3 ingress pref 77 prot $prot \ - flower ip_proto 0x2f src_mac $swp3mac dst_mac $h3mac \ - action pass + icmp_capture_install h3-${tundev} "src_mac $src_mac dst_mac $dst_mac" - mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10 + mirror_test v$h1 192.0.2.1 192.0.2.2 h3-${tundev} 100 10 - tc filter del dev $h3 ingress pref 77 + icmp_capture_uninstall h3-${tundev} mirror_uninstall $swp1 $direction log_test "$direction $what: envelope MAC ($tcflags)" @@ -120,14 +121,14 @@ test_ip6gretap() test_gretap_mac() { - test_span_gre_mac gt4 ingress ip "mirror to gretap" - test_span_gre_mac gt4 egress ip "mirror to gretap" + test_span_gre_mac gt4 ingress "mirror to gretap" + test_span_gre_mac gt4 egress "mirror to gretap" } test_ip6gretap_mac() { - test_span_gre_mac gt6 ingress ipv6 "mirror to ip6gretap" - test_span_gre_mac gt6 egress ipv6 "mirror to ip6gretap" + test_span_gre_mac gt6 ingress "mirror to ip6gretap" + test_span_gre_mac gt6 egress "mirror to ip6gretap" } test_all() -- cgit v1.2.3-59-g8ed1b From e404b8c7cfb31654c9024d497cec58a501501692 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 12 Feb 2020 10:41:06 +0900 Subject: ipv6: Fix route replacement with dev-only route After commit 27596472473a ("ipv6: fix ECMP route replacement") it is no longer possible to replace an ECMP-able route by a non ECMP-able route. For example, ip route add 2001:db8::1/128 via fe80::1 dev dummy0 ip route replace 2001:db8::1/128 dev dummy0 does not work as expected. Tweak the replacement logic so that point 3 in the log of the above commit becomes: 3. If the new route is not ECMP-able, and no matching non-ECMP-able route exists, replace matching ECMP-able route (if any) or add the new route. We can now summarize the entire replace semantics to: When doing a replace, prefer replacing a matching route of the same "ECMP-able-ness" as the replace argument. If there is no such candidate, fallback to the first route found. Fixes: 27596472473a ("ipv6: fix ECMP route replacement") Signed-off-by: Benjamin Poirier Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 7 ++++--- tools/testing/selftests/net/fib_tests.sh | 6 ++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 58fbde244381..72abf892302f 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1102,8 +1102,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, found++; break; } - if (rt_can_ecmp) - fallback_ins = fallback_ins ?: ins; + fallback_ins = fallback_ins ?: ins; goto next_iter; } @@ -1146,7 +1145,9 @@ next_iter: } if (fallback_ins && !found) { - /* No ECMP-able route found, replace first non-ECMP one */ + /* No matching route with same ecmp-able-ness found, replace + * first matching route + */ ins = fallback_ins; iter = rcu_dereference_protected(*ins, lockdep_is_held(&rt->fib6_table->tb6_lock)); diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 6dd403103800..60273f1bc7d9 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -910,6 +910,12 @@ ipv6_rt_replace_mpath() check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024" log_test $? 0 "Multipath with single path via multipath attribute" + # multipath with dev-only + add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 dev veth1" + check_route6 "2001:db8:104::/64 dev veth1 metric 1024" + log_test $? 0 "Multipath with dev-only" + # route replace fails - invalid nexthop 1 add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:111::3 nexthop via 2001:db8:103::3" -- cgit v1.2.3-59-g8ed1b From afecdb376bd81d7e16578f0cfe82a1aec7ae18f3 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 12 Feb 2020 10:41:07 +0900 Subject: ipv6: Fix nlmsg_flags when splitting a multipath route When splitting an RTA_MULTIPATH request into multiple routes and adding the second and later components, we must not simply remove NLM_F_REPLACE but instead replace it by NLM_F_CREATE. Otherwise, it may look like the netlink message was malformed. For example, ip route add 2001:db8::1/128 dev dummy0 ip route change 2001:db8::1/128 nexthop via fe80::30:1 dev dummy0 \ nexthop via fe80::30:2 dev dummy0 results in the following warnings: [ 1035.057019] IPv6: RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE [ 1035.057517] IPv6: NLM_F_CREATE should be set when creating new route This patch makes the nlmsg sequence look equivalent for __ip6_ins_rt() to what it would get if the multipath route had been added in multiple netlink operations: ip route add 2001:db8::1/128 dev dummy0 ip route change 2001:db8::1/128 nexthop via fe80::30:1 dev dummy0 ip route append 2001:db8::1/128 nexthop via fe80::30:2 dev dummy0 Fixes: 27596472473a ("ipv6: fix ECMP route replacement") Signed-off-by: Benjamin Poirier Reviewed-by: Michal Kubecek Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4fbdc60b4e07..2931224b674e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5198,6 +5198,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, */ cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | NLM_F_REPLACE); + cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE; nhn++; } -- cgit v1.2.3-59-g8ed1b From 0d4597c8c5abdeeaf50774066c16683f30184dc8 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 11 Feb 2020 19:03:55 -0800 Subject: net/rds: Track user mapped pages through special API Convert net/rds to use the newly introduces pin_user_pages() API, which properly sets FOLL_PIN. Setting FOLL_PIN is now required for code that requires tracking of pinned pages. Note that this effectively changes the code's behavior: it now ultimately calls set_page_dirty_lock(), instead of set_page_dirty(). This is probably more accurate. As Christoph Hellwig put it, "set_page_dirty() is only safe if we are dealing with a file backed page where we have reference on the inode it hangs off." [1] [1] https://lore.kernel.org/r/20190723153640.GB720@lst.de Cc: Hans Westgaard Ry Cc: Santosh Shilimkar Signed-off-by: Leon Romanovsky Signed-off-by: John Hubbard Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/rdma.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 3341eee87bf9..585e6b3b69ce 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -162,10 +162,9 @@ static int rds_pin_pages(unsigned long user_addr, unsigned int nr_pages, if (write) gup_flags |= FOLL_WRITE; - ret = get_user_pages_fast(user_addr, nr_pages, gup_flags, pages); + ret = pin_user_pages_fast(user_addr, nr_pages, gup_flags, pages); if (ret >= 0 && ret < nr_pages) { - while (ret--) - put_page(pages[ret]); + unpin_user_pages(pages, ret); ret = -EFAULT; } @@ -300,8 +299,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, * to release anything. */ if (!need_odp) { - for (i = 0 ; i < nents; i++) - put_page(sg_page(&sg[i])); + unpin_user_pages(pages, nr_pages); kfree(sg); } ret = PTR_ERR(trans_private); @@ -325,7 +323,12 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, if (cookie_ret) *cookie_ret = cookie; - if (args->cookie_addr && put_user(cookie, (u64 __user *)(unsigned long) args->cookie_addr)) { + if (args->cookie_addr && + put_user(cookie, (u64 __user *)(unsigned long)args->cookie_addr)) { + if (!need_odp) { + unpin_user_pages(pages, nr_pages); + kfree(sg); + } ret = -EFAULT; goto out; } @@ -496,9 +499,7 @@ void rds_rdma_free_op(struct rm_rdma_op *ro) * is the case for a RDMA_READ which copies from remote * to local memory */ - if (!ro->op_write) - set_page_dirty(page); - put_page(page); + unpin_user_pages_dirty_lock(&page, 1, !ro->op_write); } } @@ -515,8 +516,7 @@ void rds_atomic_free_op(struct rm_atomic_op *ao) /* Mark page dirty if it was possibly modified, which * is the case for a RDMA_READ which copies from remote * to local memory */ - set_page_dirty(page); - put_page(page); + unpin_user_pages_dirty_lock(&page, 1, true); kfree(ao->op_notifier); ao->op_notifier = NULL; @@ -944,7 +944,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, return ret; err: if (page) - put_page(page); + unpin_user_page(page); rm->atomic.op_active = 0; kfree(rm->atomic.op_notifier); -- cgit v1.2.3-59-g8ed1b From 540e585a79e9d643ede077b73bcc7aa2d7b4d919 Mon Sep 17 00:00:00 2001 From: Jethro Beekman Date: Wed, 12 Feb 2020 16:43:41 +0100 Subject: net: fib_rules: Correctly set table field when table number exceeds 8 bits In 709772e6e06564ed94ba740de70185ac3d792773, RT_TABLE_COMPAT was added to allow legacy software to deal with routing table numbers >= 256, but the same change to FIB rule queries was overlooked. Signed-off-by: Jethro Beekman Signed-off-by: David S. Miller --- net/core/fib_rules.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 3e7e15278c46..bd7eba9066f8 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -974,7 +974,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, frh = nlmsg_data(nlh); frh->family = ops->family; - frh->table = rule->table; + frh->table = rule->table < 256 ? rule->table : RT_TABLE_COMPAT; if (nla_put_u32(skb, FRA_TABLE, rule->table)) goto nla_put_failure; if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen)) -- cgit v1.2.3-59-g8ed1b From e6a41c23df0d5da01540d2abef41591589c0b4be Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 12 Feb 2020 17:45:38 +0100 Subject: net: macb: ensure interface is not suspended on at91rm9200 Because of autosuspend, at91ether_start is called with clocks disabled. Ensure that pm_runtime doesn't suspend the interface as soon as it is opened as there is no pm_runtime support is the other relevant parts of the platform support for at91rm9200. Fixes: d54f89af6cc4 ("net: macb: Add pm runtime support") Signed-off-by: Alexandre Belloni Reviewed-by: Claudiu Beznea Acked-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 4508f0d150da..def94e91883a 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3790,6 +3790,10 @@ static int at91ether_open(struct net_device *dev) u32 ctl; int ret; + ret = pm_runtime_get_sync(&lp->pdev->dev); + if (ret < 0) + return ret; + /* Clear internal statistics */ ctl = macb_readl(lp, NCR); macb_writel(lp, NCR, ctl | MACB_BIT(CLRSTAT)); @@ -3854,7 +3858,7 @@ static int at91ether_close(struct net_device *dev) q->rx_buffers, q->rx_buffers_dma); q->rx_buffers = NULL; - return 0; + return pm_runtime_put(&lp->pdev->dev); } /* Transmit packet */ -- cgit v1.2.3-59-g8ed1b From 44bfa9c5e5f06c72540273813e4c66beb5a8c213 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Feb 2020 20:58:26 -0800 Subject: net: rtnetlink: fix bugs in rtnl_alt_ifname() Since IFLA_ALT_IFNAME is an NLA_STRING, we have no guarantee it is nul terminated. We should use nla_strdup() instead of kstrdup(), since this helper will make sure not accessing out-of-bounds data. BUG: KMSAN: uninit-value in strlen+0x5e/0xa0 lib/string.c:535 CPU: 1 PID: 19157 Comm: syz-executor.5 Not tainted 5.5.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x220 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:118 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215 strlen+0x5e/0xa0 lib/string.c:535 kstrdup+0x7f/0x1a0 mm/util.c:59 rtnl_alt_ifname net/core/rtnetlink.c:3495 [inline] rtnl_linkprop+0x85d/0xc00 net/core/rtnetlink.c:3553 rtnl_newlinkprop+0x9d/0xb0 net/core/rtnetlink.c:3568 rtnetlink_rcv_msg+0x1153/0x1570 net/core/rtnetlink.c:5424 netlink_rcv_skb+0x451/0x650 net/netlink/af_netlink.c:2477 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:5442 netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] netlink_unicast+0xf9e/0x1100 net/netlink/af_netlink.c:1328 netlink_sendmsg+0x1248/0x14d0 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:639 [inline] sock_sendmsg net/socket.c:659 [inline] ____sys_sendmsg+0x12b6/0x1350 net/socket.c:2330 ___sys_sendmsg net/socket.c:2384 [inline] __sys_sendmsg+0x451/0x5f0 net/socket.c:2417 __do_sys_sendmsg net/socket.c:2426 [inline] __se_sys_sendmsg+0x97/0xb0 net/socket.c:2424 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2424 do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45b3b9 Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007ff1c7b1ac78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007ff1c7b1b6d4 RCX: 000000000045b3b9 RDX: 0000000000000000 RSI: 0000000020000040 RDI: 0000000000000003 RBP: 000000000075bf20 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 00000000000009cb R14: 00000000004cb3dd R15: 000000000075bf2c Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:144 [inline] kmsan_internal_poison_shadow+0x66/0xd0 mm/kmsan/kmsan.c:127 kmsan_slab_alloc+0x8a/0xe0 mm/kmsan/kmsan_hooks.c:82 slab_alloc_node mm/slub.c:2774 [inline] __kmalloc_node_track_caller+0xb40/0x1200 mm/slub.c:4382 __kmalloc_reserve net/core/skbuff.c:141 [inline] __alloc_skb+0x2fd/0xac0 net/core/skbuff.c:209 alloc_skb include/linux/skbuff.h:1049 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1174 [inline] netlink_sendmsg+0x7d3/0x14d0 net/netlink/af_netlink.c:1892 sock_sendmsg_nosec net/socket.c:639 [inline] sock_sendmsg net/socket.c:659 [inline] ____sys_sendmsg+0x12b6/0x1350 net/socket.c:2330 ___sys_sendmsg net/socket.c:2384 [inline] __sys_sendmsg+0x451/0x5f0 net/socket.c:2417 __do_sys_sendmsg net/socket.c:2426 [inline] __se_sys_sendmsg+0x97/0xb0 net/socket.c:2424 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2424 do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 36fbf1e52bd3 ("net: rtnetlink: add linkprop commands to add and delete alternative ifnames") Signed-off-by: Eric Dumazet Cc: Jiri Pirko Reported-by: syzbot Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 09c44bf2e1d2..e1152f4ffe33 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3504,27 +3504,25 @@ static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr, if (err) return err; - alt_ifname = nla_data(attr); + alt_ifname = nla_strdup(attr, GFP_KERNEL); + if (!alt_ifname) + return -ENOMEM; + if (cmd == RTM_NEWLINKPROP) { - alt_ifname = kstrdup(alt_ifname, GFP_KERNEL); - if (!alt_ifname) - return -ENOMEM; err = netdev_name_node_alt_create(dev, alt_ifname); - if (err) { - kfree(alt_ifname); - return err; - } + if (!err) + alt_ifname = NULL; } else if (cmd == RTM_DELLINKPROP) { err = netdev_name_node_alt_destroy(dev, alt_ifname); - if (err) - return err; } else { - WARN_ON(1); - return 0; + WARN_ON_ONCE(1); + err = -EINVAL; } - *changed = true; - return 0; + kfree(alt_ifname); + if (!err) + *changed = true; + return err; } static int rtnl_linkprop(int cmd, struct sk_buff *skb, struct nlmsghdr *nlh, -- cgit v1.2.3-59-g8ed1b From 4e867c9a50ff1a07ed0b86c3b1c8bc773933d728 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 13 Feb 2020 17:40:54 +0800 Subject: selftests: forwarding: vxlan_bridge_1d: fix tos value After commit 71130f29979c ("vxlan: fix tos value before xmit") we start strict vxlan xmit tos value by RT_TOS(), which limits the tos value less than 0x1E. With current value 0x40 the test will failed with "v1: Expected to capture 10 packets, got 0". So let's choose a smaller tos value for testing. Fixes: d417ecf533fe ("selftests: forwarding: vxlan_bridge_1d: Add a TOS test") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index bb10e33690b2..353613fc1947 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -516,9 +516,9 @@ test_tos() RET=0 tc filter add dev v1 egress pref 77 prot ip \ - flower ip_tos 0x40 action pass - vxlan_ping_test $h1 192.0.2.3 "-Q 0x40" v1 egress 77 10 - vxlan_ping_test $h1 192.0.2.3 "-Q 0x30" v1 egress 77 0 + flower ip_tos 0x11 action pass + vxlan_ping_test $h1 192.0.2.3 "-Q 0x11" v1 egress 77 10 + vxlan_ping_test $h1 192.0.2.3 "-Q 0x12" v1 egress 77 0 tc filter del dev v1 egress pref 77 prot ip log_test "VXLAN: envelope TOS inheritance" -- cgit v1.2.3-59-g8ed1b From 15beab0a9d797be1b7c67458da007a62269be29a Mon Sep 17 00:00:00 2001 From: Dmitry Bezrukov Date: Fri, 14 Feb 2020 18:44:51 +0300 Subject: net: atlantic: checksum compat issue Yet another checksum offload compatibility issue was found. The known issue is that AQC HW marks tcp packets with 0xFFFF checksum as invalid (1). This is workarounded in driver, passing all the suspicious packets up to the stack for further csum validation. Another HW problem (2) is that it hides invalid csum of LRO aggregated packets inside of the individual descriptors. That was workarounded by forced scan of all LRO descriptors for checksum errors. However the scan logic was joint for both LRO and multi-descriptor packets (jumbos). And this causes the issue. We have to drop LRO packets with the detected bad checksum because of (2), but we have to pass jumbo packets to stack because of (1). When using windows tcp partner with jumbo frames but with LSO disabled driver discards such frames as bad checksummed. But only LRO frames should be dropped, not jumbos. On such a configurations tcp stream have a chance of drops and stucks. (1) 76f254d4afe2 ("net: aquantia: tcp checksum 0xffff being handled incorrectly") (2) d08b9a0a3ebd ("net: aquantia: do not pass lro session with invalid tcp checksum") Fixes: d08b9a0a3ebd ("net: aquantia: do not pass lro session with invalid tcp checksum") Signed-off-by: Dmitry Bezrukov Signed-off-by: Igor Russkikh Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 3 ++- drivers/net/ethernet/aquantia/atlantic/aq_ring.h | 3 ++- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 5 +++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 951d86f8b66e..6941999ae845 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -351,7 +351,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self, err = 0; goto err_exit; } - if (buff->is_error || buff->is_cso_err) { + if (buff->is_error || + (buff->is_lro && buff->is_cso_err)) { buff_ = buff; do { next_ = buff_->next, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h index 991e4d31b094..2c96f20f6289 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h @@ -78,7 +78,8 @@ struct __packed aq_ring_buff_s { u32 is_cleaned:1; u32 is_error:1; u32 is_vlan:1; - u32 rsvd3:4; + u32 is_lro:1; + u32 rsvd3:3; u16 eop_index; u16 rsvd4; }; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index ec041f78d063..5784da26f868 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -823,6 +823,8 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self, } } + buff->is_lro = !!(HW_ATL_B0_RXD_WB_STAT2_RSCCNT & + rxd_wb->status); if (HW_ATL_B0_RXD_WB_STAT2_EOP & rxd_wb->status) { buff->len = rxd_wb->pkt_len % AQ_CFG_RX_FRAME_MAX; @@ -835,8 +837,7 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self, rxd_wb->pkt_len > AQ_CFG_RX_FRAME_MAX ? AQ_CFG_RX_FRAME_MAX : rxd_wb->pkt_len; - if (HW_ATL_B0_RXD_WB_STAT2_RSCCNT & - rxd_wb->status) { + if (buff->is_lro) { /* LRO */ buff->next = rxd_wb->next_desc_ptr; ++ring->stats.rx.lro_packets; -- cgit v1.2.3-59-g8ed1b From e7b5f97e6574dc4918e375d5f8d24ec31653cd6d Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Fri, 14 Feb 2020 18:44:52 +0300 Subject: net: atlantic: check rpc result and wait for rpc address Artificial HW reliability tests revealed a possible hangup in the driver. Normally, when device disappears from bus, all register reads returns 0xFFFFFFFF. At remote procedure invocation towards FW there is a logic where result is compared with -1 in a loop. That caused an infinite loop if hardware due to some issues disappears from bus. Add extra result checks to prevent this. Signed-off-by: Dmitry Bogdanov Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- .../ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index f547baa6c954..354705f9bc49 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -22,6 +22,7 @@ #define HW_ATL_MIF_ADDR 0x0208U #define HW_ATL_MIF_VAL 0x020CU +#define HW_ATL_MPI_RPC_ADDR 0x0334U #define HW_ATL_RPC_CONTROL_ADR 0x0338U #define HW_ATL_RPC_STATE_ADR 0x033CU @@ -53,15 +54,14 @@ enum mcp_area { }; static int hw_atl_utils_ver_match(u32 ver_expected, u32 ver_actual); - static int hw_atl_utils_mpi_set_state(struct aq_hw_s *self, enum hal_atl_utils_fw_state_e state); - static u32 hw_atl_utils_get_mpi_mbox_tid(struct aq_hw_s *self); static u32 hw_atl_utils_mpi_get_state(struct aq_hw_s *self); static u32 hw_atl_utils_mif_cmd_get(struct aq_hw_s *self); static u32 hw_atl_utils_mif_addr_get(struct aq_hw_s *self); static u32 hw_atl_utils_rpc_state_get(struct aq_hw_s *self); +static u32 aq_fw1x_rpc_get(struct aq_hw_s *self); int hw_atl_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops) { @@ -476,6 +476,10 @@ static int hw_atl_utils_init_ucp(struct aq_hw_s *self, self, self->mbox_addr, self->mbox_addr != 0U, 1000U, 10000U); + err = readx_poll_timeout_atomic(aq_fw1x_rpc_get, self, + self->rpc_addr, + self->rpc_addr != 0U, + 1000U, 100000U); return err; } @@ -531,6 +535,12 @@ int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self, self, fw.val, sw.tid == fw.tid, 1000U, 100000U); + if (err < 0) + goto err_exit; + + err = aq_hw_err_from_flags(self); + if (err < 0) + goto err_exit; if (fw.len == 0xFFFFU) { err = hw_atl_utils_fw_rpc_call(self, sw.len); @@ -1025,6 +1035,11 @@ static u32 hw_atl_utils_rpc_state_get(struct aq_hw_s *self) return aq_hw_read_reg(self, HW_ATL_RPC_STATE_ADR); } +static u32 aq_fw1x_rpc_get(struct aq_hw_s *self) +{ + return aq_hw_read_reg(self, HW_ATL_MPI_RPC_ADDR); +} + const struct aq_fw_ops aq_fw_1x_ops = { .init = hw_atl_utils_mpi_create, .deinit = hw_atl_fw1x_deinit, -- cgit v1.2.3-59-g8ed1b From f08a464c27ca0a4050333baa271504b27ce834b7 Mon Sep 17 00:00:00 2001 From: Egor Pomozov Date: Fri, 14 Feb 2020 18:44:53 +0300 Subject: net: atlantic: ptp gpio adjustments Clock adjustment data should be passed to FW as well, otherwise in some cases a drift was observed when using GPIO features. Signed-off-by: Egor Pomozov Signed-off-by: Igor Russkikh Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_hw.h | 2 ++ drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 4 +++- .../ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c | 12 ++++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h index cc70c606b6ef..251767c31f7e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h @@ -337,6 +337,8 @@ struct aq_fw_ops { void (*enable_ptp)(struct aq_hw_s *self, int enable); + void (*adjust_ptp)(struct aq_hw_s *self, uint64_t adj); + int (*set_eee_rate)(struct aq_hw_s *self, u32 speed); int (*get_eee_rate)(struct aq_hw_s *self, u32 *rate, diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 5784da26f868..9acdb3fbb750 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -1162,6 +1162,8 @@ static int hw_atl_b0_adj_sys_clock(struct aq_hw_s *self, s64 delta) { self->ptp_clk_offset += delta; + self->aq_fw_ops->adjust_ptp(self, self->ptp_clk_offset); + return 0; } @@ -1212,7 +1214,7 @@ static int hw_atl_b0_gpio_pulse(struct aq_hw_s *self, u32 index, fwreq.ptp_gpio_ctrl.index = index; fwreq.ptp_gpio_ctrl.period = period; /* Apply time offset */ - fwreq.ptp_gpio_ctrl.start = start - self->ptp_clk_offset; + fwreq.ptp_gpio_ctrl.start = start; size = sizeof(fwreq.msg_id) + sizeof(fwreq.ptp_gpio_ctrl); return self->aq_fw_ops->send_fw_request(self, &fwreq, size); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c index 97ebf849695f..77a4ed64830f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c @@ -30,6 +30,9 @@ #define HW_ATL_FW3X_EXT_CONTROL_ADDR 0x378 #define HW_ATL_FW3X_EXT_STATE_ADDR 0x37c +#define HW_ATL_FW3X_PTP_ADJ_LSW_ADDR 0x50a0 +#define HW_ATL_FW3X_PTP_ADJ_MSW_ADDR 0x50a4 + #define HW_ATL_FW2X_CAP_PAUSE BIT(CAPS_HI_PAUSE) #define HW_ATL_FW2X_CAP_ASYM_PAUSE BIT(CAPS_HI_ASYMMETRIC_PAUSE) #define HW_ATL_FW2X_CAP_SLEEP_PROXY BIT(CAPS_HI_SLEEP_PROXY) @@ -475,6 +478,14 @@ static void aq_fw3x_enable_ptp(struct aq_hw_s *self, int enable) aq_hw_write_reg(self, HW_ATL_FW3X_EXT_CONTROL_ADDR, ptp_opts); } +static void aq_fw3x_adjust_ptp(struct aq_hw_s *self, uint64_t adj) +{ + aq_hw_write_reg(self, HW_ATL_FW3X_PTP_ADJ_LSW_ADDR, + (adj >> 0) & 0xffffffff); + aq_hw_write_reg(self, HW_ATL_FW3X_PTP_ADJ_MSW_ADDR, + (adj >> 32) & 0xffffffff); +} + static int aq_fw2x_led_control(struct aq_hw_s *self, u32 mode) { if (self->fw_ver_actual < HW_ATL_FW_VER_LED) @@ -633,4 +644,5 @@ const struct aq_fw_ops aq_fw_2x_ops = { .enable_ptp = aq_fw3x_enable_ptp, .led_control = aq_fw2x_led_control, .set_phyloopback = aq_fw2x_set_phyloopback, + .adjust_ptp = aq_fw3x_adjust_ptp, }; -- cgit v1.2.3-59-g8ed1b From b42726fcf76e9367e524392e0ead7e672cc0791c Mon Sep 17 00:00:00 2001 From: Nikita Danilov Date: Fri, 14 Feb 2020 18:44:54 +0300 Subject: net: atlantic: better loopback mode handling Add checks to not enable multiple loopback modes simultaneously, It was also discovered that for dma loopback to function correctly promisc mode should be enabled on device. Fixes: ea4b4d7fc106 ("net: atlantic: loopback tests via private flags") Signed-off-by: Nikita Danilov Signed-off-by: Igor Russkikh Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c | 5 +++++ drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 13 ++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index a1f99bef4a68..7b55633d2cb9 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -722,6 +722,11 @@ static int aq_ethtool_set_priv_flags(struct net_device *ndev, u32 flags) if (flags & ~AQ_PRIV_FLAGS_MASK) return -EOPNOTSUPP; + if (hweight32((flags | priv_flags) & AQ_HW_LOOPBACK_MASK) > 1) { + netdev_info(ndev, "Can't enable more than one loopback simultaneously\n"); + return -EINVAL; + } + cfg->priv_flags = flags; if ((priv_flags ^ flags) & BIT(AQ_HW_LOOPBACK_DMA_NET)) { diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 9acdb3fbb750..d20d91cdece8 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -885,13 +885,16 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self, { struct aq_nic_cfg_s *cfg = self->aq_nic_cfg; unsigned int i = 0U; + u32 vlan_promisc; + u32 l2_promisc; - hw_atl_rpfl2promiscuous_mode_en_set(self, - IS_FILTER_ENABLED(IFF_PROMISC)); + l2_promisc = IS_FILTER_ENABLED(IFF_PROMISC) || + !!(cfg->priv_flags & BIT(AQ_HW_LOOPBACK_DMA_NET)); + vlan_promisc = l2_promisc || cfg->is_vlan_force_promisc; - hw_atl_rpf_vlan_prom_mode_en_set(self, - IS_FILTER_ENABLED(IFF_PROMISC) || - cfg->is_vlan_force_promisc); + hw_atl_rpfl2promiscuous_mode_en_set(self, l2_promisc); + + hw_atl_rpf_vlan_prom_mode_en_set(self, vlan_promisc); hw_atl_rpfl2multicast_flr_en_set(self, IS_FILTER_ENABLED(IFF_ALLMULTI) && -- cgit v1.2.3-59-g8ed1b From a4980919ad6a7be548d499bc5338015e1a9191c6 Mon Sep 17 00:00:00 2001 From: Pavel Belous Date: Fri, 14 Feb 2020 18:44:55 +0300 Subject: net: atlantic: fix use after free kasan warn skb->len is used to calculate statistics after xmit invocation. Under a stress load it may happen that skb will be xmited, rx interrupt will come and skb will be freed, all before xmit function is even returned. Eventually, skb->len will access unallocated area. Moving stats calculation into tx_clean routine. Fixes: 018423e90bee ("net: ethernet: aquantia: Add ring support code") Reported-by: Christophe Vu-Brugier Signed-off-by: Igor Russkikh Signed-off-by: Pavel Belous Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 4 ---- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 7 +++++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index c85e3e29012c..263beea1859c 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -655,10 +655,6 @@ int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb) if (likely(frags)) { err = self->aq_hw_ops->hw_ring_tx_xmit(self->aq_hw, ring, frags); - if (err >= 0) { - ++ring->stats.tx.packets; - ring->stats.tx.bytes += skb->len; - } } else { err = NETDEV_TX_BUSY; } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 6941999ae845..bae95a618560 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -272,9 +272,12 @@ bool aq_ring_tx_clean(struct aq_ring_s *self) } } - if (unlikely(buff->is_eop)) - dev_kfree_skb_any(buff->skb); + if (unlikely(buff->is_eop)) { + ++self->stats.rx.packets; + self->stats.tx.bytes += buff->skb->len; + dev_kfree_skb_any(buff->skb); + } buff->pa = 0U; buff->eop_index = 0xffffU; self->sw_head = aq_ring_next_dx(self, self->sw_head); -- cgit v1.2.3-59-g8ed1b From 380ec5b9af7f0d57dbf6ac067fd9f33cff2fef71 Mon Sep 17 00:00:00 2001 From: Pavel Belous Date: Fri, 14 Feb 2020 18:44:56 +0300 Subject: net: atlantic: fix potential error handling Code inspection found that in case of mapping error we do return current 'ret' value. But beside error, it is used to count number of descriptors allocated for the packet. In that case map_skb function could return '1'. Changing it to return zero (number of mapped descriptors for skb) Fixes: 018423e90bee ("net: ethernet: aquantia: Add ring support code") Signed-off-by: Pavel Belous Signed-off-by: Igor Russkikh Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 263beea1859c..e95f6a6bef73 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -533,8 +533,10 @@ unsigned int aq_nic_map_skb(struct aq_nic_s *self, struct sk_buff *skb, dx_buff->len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa))) + if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa))) { + ret = 0; goto exit; + } first = dx_buff; dx_buff->len_pkt = skb->len; -- cgit v1.2.3-59-g8ed1b From 52a22f4d6ff95e8bdca557765c04893eb5dd83fd Mon Sep 17 00:00:00 2001 From: Pavel Belous Date: Fri, 14 Feb 2020 18:44:57 +0300 Subject: net: atlantic: possible fault in transition to hibernation during hibernation freeze, aq_nic_stop could be invoked on a stopped device. That may cause panic on access to not yet allocated vector/ring structures. Add a check to stop device if it is not yet stopped. Similiarly after freeze in hibernation thaw, aq_nic_start could be invoked on a not initialized net device. Result will be the same. Add a check to start device if it is initialized. In our case, this is the same as started. Fixes: 8aaa112a57c1 ("net: atlantic: refactoring pm logic") Signed-off-by: Pavel Belous Signed-off-by: Nikita Danilov Signed-off-by: Igor Russkikh Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 6b27af0db499..78b6f3248756 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -359,7 +359,8 @@ static int aq_suspend_common(struct device *dev, bool deep) netif_device_detach(nic->ndev); netif_tx_stop_all_queues(nic->ndev); - aq_nic_stop(nic); + if (netif_running(nic->ndev)) + aq_nic_stop(nic); if (deep) { aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); @@ -375,7 +376,7 @@ static int atl_resume_common(struct device *dev, bool deep) { struct pci_dev *pdev = to_pci_dev(dev); struct aq_nic_s *nic; - int ret; + int ret = 0; nic = pci_get_drvdata(pdev); @@ -390,9 +391,11 @@ static int atl_resume_common(struct device *dev, bool deep) goto err_exit; } - ret = aq_nic_start(nic); - if (ret) - goto err_exit; + if (netif_running(nic->ndev)) { + ret = aq_nic_start(nic); + if (ret) + goto err_exit; + } netif_device_attach(nic->ndev); netif_tx_start_all_queues(nic->ndev); -- cgit v1.2.3-59-g8ed1b From 5a292c89a84d49b598f8978f154bdda48b1072c0 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Fri, 14 Feb 2020 18:44:58 +0300 Subject: net: atlantic: fix out of range usage of active_vlans array fix static checker warning: drivers/net/ethernet/aquantia/atlantic/aq_filters.c:166 aq_check_approve_fvlan() error: passing untrusted data to 'test_bit()' Reported-by: Dan Carpenter Fixes: 7975d2aff5af: ("net: aquantia: add support of rx-vlan-filter offload") Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_filters.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c index 6102251bb909..03ff92bc4a7f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c @@ -163,7 +163,7 @@ aq_check_approve_fvlan(struct aq_nic_s *aq_nic, } if ((aq_nic->ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) && - (!test_bit(be16_to_cpu(fsp->h_ext.vlan_tci), + (!test_bit(be16_to_cpu(fsp->h_ext.vlan_tci) & VLAN_VID_MASK, aq_nic->active_vlans))) { netdev_err(aq_nic->ndev, "ethtool: unknown vlan-id specified"); -- cgit v1.2.3-59-g8ed1b From e08ad80551b4b33c02f2fce1522f6c227d3976cf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 14 Feb 2020 07:53:53 -0800 Subject: net: add strict checks in netdev_name_node_alt_destroy() netdev_name_node_alt_destroy() does a lookup over all device names of a namespace. We need to make sure the name belongs to the device of interest, and that we do not destroy its primary name, since we rely on it being not deleted : dev->name_node would indeed point to freed memory. syzbot report was the following : BUG: KASAN: use-after-free in dev_net include/linux/netdevice.h:2206 [inline] BUG: KASAN: use-after-free in mld_force_mld_version net/ipv6/mcast.c:1172 [inline] BUG: KASAN: use-after-free in mld_in_v2_mode_only net/ipv6/mcast.c:1180 [inline] BUG: KASAN: use-after-free in mld_in_v1_mode+0x203/0x230 net/ipv6/mcast.c:1190 Read of size 8 at addr ffff88809886c588 by task swapper/1/0 CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.6.0-rc1-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x197/0x210 lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xd4/0x30b mm/kasan/report.c:374 __kasan_report.cold+0x1b/0x32 mm/kasan/report.c:506 kasan_report+0x12/0x20 mm/kasan/common.c:641 __asan_report_load8_noabort+0x14/0x20 mm/kasan/generic_report.c:135 dev_net include/linux/netdevice.h:2206 [inline] mld_force_mld_version net/ipv6/mcast.c:1172 [inline] mld_in_v2_mode_only net/ipv6/mcast.c:1180 [inline] mld_in_v1_mode+0x203/0x230 net/ipv6/mcast.c:1190 mld_send_initial_cr net/ipv6/mcast.c:2083 [inline] mld_dad_timer_expire+0x24/0x230 net/ipv6/mcast.c:2118 call_timer_fn+0x1ac/0x780 kernel/time/timer.c:1404 expire_timers kernel/time/timer.c:1449 [inline] __run_timers kernel/time/timer.c:1773 [inline] __run_timers kernel/time/timer.c:1740 [inline] run_timer_softirq+0x6c3/0x1790 kernel/time/timer.c:1786 __do_softirq+0x262/0x98c kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0x19b/0x1e0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:546 [inline] smp_apic_timer_interrupt+0x1a3/0x610 arch/x86/kernel/apic/apic.c:1146 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:829 RIP: 0010:native_safe_halt+0xe/0x10 arch/x86/include/asm/irqflags.h:61 Code: 68 73 c5 f9 eb 8a cc cc cc cc cc cc e9 07 00 00 00 0f 00 2d 94 be 59 00 f4 c3 66 90 e9 07 00 00 00 0f 00 2d 84 be 59 00 fb f4 cc 55 48 89 e5 41 57 41 56 41 55 41 54 53 e8 de 2a 74 f9 e8 09 RSP: 0018:ffffc90000d3fd68 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff13 RAX: 1ffffffff136761a RBX: ffff8880a99fc340 RCX: 0000000000000000 RDX: dffffc0000000000 RSI: 0000000000000006 RDI: ffff8880a99fcbd4 RBP: ffffc90000d3fd98 R08: ffff8880a99fc340 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: dffffc0000000000 R13: ffffffff8aa5a1c0 R14: 0000000000000000 R15: 0000000000000001 arch_cpu_idle+0xa/0x10 arch/x86/kernel/process.c:686 default_idle_call+0x84/0xb0 kernel/sched/idle.c:94 cpuidle_idle_call kernel/sched/idle.c:154 [inline] do_idle+0x3c8/0x6e0 kernel/sched/idle.c:269 cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:361 start_secondary+0x2f4/0x410 arch/x86/kernel/smpboot.c:264 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:242 Allocated by task 10229: save_stack+0x23/0x90 mm/kasan/common.c:72 set_track mm/kasan/common.c:80 [inline] __kasan_kmalloc mm/kasan/common.c:515 [inline] __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:488 kasan_kmalloc+0x9/0x10 mm/kasan/common.c:529 __do_kmalloc_node mm/slab.c:3616 [inline] __kmalloc_node+0x4e/0x70 mm/slab.c:3623 kmalloc_node include/linux/slab.h:578 [inline] kvmalloc_node+0x68/0x100 mm/util.c:574 kvmalloc include/linux/mm.h:645 [inline] kvzalloc include/linux/mm.h:653 [inline] alloc_netdev_mqs+0x98/0xe40 net/core/dev.c:9797 rtnl_create_link+0x22d/0xaf0 net/core/rtnetlink.c:3047 __rtnl_newlink+0xf9f/0x1790 net/core/rtnetlink.c:3309 rtnl_newlink+0x69/0xa0 net/core/rtnetlink.c:3377 rtnetlink_rcv_msg+0x45e/0xaf0 net/core/rtnetlink.c:5438 netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477 rtnetlink_rcv+0x1d/0x30 net/core/rtnetlink.c:5456 netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] netlink_unicast+0x59e/0x7e0 net/netlink/af_netlink.c:1328 netlink_sendmsg+0x91c/0xea0 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg+0xd7/0x130 net/socket.c:672 __sys_sendto+0x262/0x380 net/socket.c:1998 __do_compat_sys_socketcall net/compat.c:771 [inline] __se_compat_sys_socketcall net/compat.c:719 [inline] __ia32_compat_sys_socketcall+0x530/0x710 net/compat.c:719 do_syscall_32_irqs_on arch/x86/entry/common.c:337 [inline] do_fast_syscall_32+0x27b/0xe16 arch/x86/entry/common.c:408 entry_SYSENTER_compat+0x70/0x7f arch/x86/entry/entry_64_compat.S:139 Freed by task 10229: save_stack+0x23/0x90 mm/kasan/common.c:72 set_track mm/kasan/common.c:80 [inline] kasan_set_free_info mm/kasan/common.c:337 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/common.c:476 kasan_slab_free+0xe/0x10 mm/kasan/common.c:485 __cache_free mm/slab.c:3426 [inline] kfree+0x10a/0x2c0 mm/slab.c:3757 __netdev_name_node_alt_destroy+0x1ff/0x2a0 net/core/dev.c:322 netdev_name_node_alt_destroy+0x57/0x80 net/core/dev.c:334 rtnl_alt_ifname net/core/rtnetlink.c:3518 [inline] rtnl_linkprop.isra.0+0x575/0x6f0 net/core/rtnetlink.c:3567 rtnl_dellinkprop+0x46/0x60 net/core/rtnetlink.c:3588 rtnetlink_rcv_msg+0x45e/0xaf0 net/core/rtnetlink.c:5438 netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477 rtnetlink_rcv+0x1d/0x30 net/core/rtnetlink.c:5456 netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] netlink_unicast+0x59e/0x7e0 net/netlink/af_netlink.c:1328 netlink_sendmsg+0x91c/0xea0 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg+0xd7/0x130 net/socket.c:672 ____sys_sendmsg+0x753/0x880 net/socket.c:2343 ___sys_sendmsg+0x100/0x170 net/socket.c:2397 __sys_sendmsg+0x105/0x1d0 net/socket.c:2430 __compat_sys_sendmsg net/compat.c:642 [inline] __do_compat_sys_sendmsg net/compat.c:649 [inline] __se_compat_sys_sendmsg net/compat.c:646 [inline] __ia32_compat_sys_sendmsg+0x7a/0xb0 net/compat.c:646 do_syscall_32_irqs_on arch/x86/entry/common.c:337 [inline] do_fast_syscall_32+0x27b/0xe16 arch/x86/entry/common.c:408 entry_SYSENTER_compat+0x70/0x7f arch/x86/entry/entry_64_compat.S:139 The buggy address belongs to the object at ffff88809886c000 which belongs to the cache kmalloc-4k of size 4096 The buggy address is located 1416 bytes inside of 4096-byte region [ffff88809886c000, ffff88809886d000) The buggy address belongs to the page: page:ffffea0002621b00 refcount:1 mapcount:0 mapping:ffff8880aa402000 index:0x0 compound_mapcount: 0 flags: 0xfffe0000010200(slab|head) raw: 00fffe0000010200 ffffea0002610d08 ffffea0002607608 ffff8880aa402000 raw: 0000000000000000 ffff88809886c000 0000000100000001 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88809886c480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88809886c500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff88809886c580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88809886c600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88809886c680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 36fbf1e52bd3 ("net: rtnetlink: add linkprop commands to add and delete alternative ifnames") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Jiri Pirko Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index a6316b336128..b6d13f3f1e5a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -331,6 +331,12 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name) name_node = netdev_name_node_lookup(net, name); if (!name_node) return -ENOENT; + /* lookup might have found our primary name or a name belonging + * to another device. + */ + if (name_node == dev->name_node || name_node->dev != dev) + return -EINVAL; + __netdev_name_node_alt_destroy(name_node); return 0; -- cgit v1.2.3-59-g8ed1b From 6f08e98d62799e53c89dbf2c9a49d77e20ca648c Mon Sep 17 00:00:00 2001 From: Arun Parameswaran Date: Fri, 14 Feb 2020 13:47:46 -0800 Subject: net: phy: restore mdio regs in the iproc mdio driver The mii management register in iproc mdio block does not have a retention register so it is lost on suspend. Save and restore value of register while resuming from suspend. Fixes: bb1a619735b4 ("net: phy: Initialize mdio clock at probe function") Signed-off-by: Arun Parameswaran Signed-off-by: Scott Branden Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/mdio-bcm-iproc.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/phy/mdio-bcm-iproc.c b/drivers/net/phy/mdio-bcm-iproc.c index 7e9975d25066..f1ded03f0229 100644 --- a/drivers/net/phy/mdio-bcm-iproc.c +++ b/drivers/net/phy/mdio-bcm-iproc.c @@ -178,6 +178,23 @@ static int iproc_mdio_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM_SLEEP +int iproc_mdio_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct iproc_mdio_priv *priv = platform_get_drvdata(pdev); + + /* restore the mii clock configuration */ + iproc_mdio_config_clk(priv->base); + + return 0; +} + +static const struct dev_pm_ops iproc_mdio_pm_ops = { + .resume = iproc_mdio_resume +}; +#endif /* CONFIG_PM_SLEEP */ + static const struct of_device_id iproc_mdio_of_match[] = { { .compatible = "brcm,iproc-mdio", }, { /* sentinel */ }, @@ -188,6 +205,9 @@ static struct platform_driver iproc_mdio_driver = { .driver = { .name = "iproc-mdio", .of_match_table = iproc_mdio_of_match, +#ifdef CONFIG_PM_SLEEP + .pm = &iproc_mdio_pm_ops, +#endif }, .probe = iproc_mdio_probe, .remove = iproc_mdio_remove, -- cgit v1.2.3-59-g8ed1b From b6e4a1aeeb14cad595f70b31cc376903d322c821 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Fri, 14 Feb 2020 14:14:29 -0800 Subject: mptcp: Protect subflow socket options before connection completes Userspace should not be able to directly manipulate subflow socket options before a connection is established since it is not yet known if it will be an MPTCP subflow or a TCP fallback subflow. TCP fallback subflows can be more directly controlled by userspace because they are regular TCP connections, while MPTCP subflow sockets need to be configured for the specific needs of MPTCP. Use the same logic as sendmsg/recvmsg to ensure that socket option calls are only passed through to known TCP fallback subflows. Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 48 +++++++++++++++++++----------------------------- 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 030dee668e0a..e9aa6807b5be 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -755,60 +755,50 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { struct mptcp_sock *msk = mptcp_sk(sk); - int ret = -EOPNOTSUPP; struct socket *ssock; - struct sock *ssk; pr_debug("msk=%p", msk); /* @@ the meaning of setsockopt() when the socket is connected and - * there are multiple subflows is not defined. + * there are multiple subflows is not yet defined. It is up to the + * MPTCP-level socket to configure the subflows until the subflow + * is in TCP fallback, when TCP socket options are passed through + * to the one remaining subflow. */ lock_sock(sk); - ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE); - if (IS_ERR(ssock)) { - release_sock(sk); - return ret; - } + ssock = __mptcp_tcp_fallback(msk); + if (ssock) + return tcp_setsockopt(ssock->sk, level, optname, optval, + optlen); - ssk = ssock->sk; - sock_hold(ssk); release_sock(sk); - ret = tcp_setsockopt(ssk, level, optname, optval, optlen); - sock_put(ssk); - - return ret; + return -EOPNOTSUPP; } static int mptcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *option) { struct mptcp_sock *msk = mptcp_sk(sk); - int ret = -EOPNOTSUPP; struct socket *ssock; - struct sock *ssk; pr_debug("msk=%p", msk); - /* @@ the meaning of getsockopt() when the socket is connected and - * there are multiple subflows is not defined. + /* @@ the meaning of setsockopt() when the socket is connected and + * there are multiple subflows is not yet defined. It is up to the + * MPTCP-level socket to configure the subflows until the subflow + * is in TCP fallback, when socket options are passed through + * to the one remaining subflow. */ lock_sock(sk); - ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE); - if (IS_ERR(ssock)) { - release_sock(sk); - return ret; - } + ssock = __mptcp_tcp_fallback(msk); + if (ssock) + return tcp_getsockopt(ssock->sk, level, optname, optval, + option); - ssk = ssock->sk; - sock_hold(ssk); release_sock(sk); - ret = tcp_getsockopt(ssk, level, optname, optval, option); - sock_put(ssk); - - return ret; + return -EOPNOTSUPP; } static int mptcp_get_port(struct sock *sk, unsigned short snum) -- cgit v1.2.3-59-g8ed1b From 04ddf1208f03e1dbc39a4619c40eba640051b950 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 14 Feb 2020 23:57:20 +0100 Subject: wireguard: selftests: reduce complexity and fix make races This gives us fewer dependencies and shortens build time, fixes up some hash checking race conditions, and also fixes missing directory creation that caused issues on massively parallel builds. Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- tools/testing/selftests/wireguard/qemu/Makefile | 38 +++++++++---------------- 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile index f10aa3590adc..28d477683e8a 100644 --- a/tools/testing/selftests/wireguard/qemu/Makefile +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -38,19 +38,17 @@ endef define file_download = $(DISTFILES_PATH)/$(1): mkdir -p $(DISTFILES_PATH) - flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp' - if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi + flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi' endef $(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3)) -$(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/,171f89699f286a5854b72b91d06e8f8e3683064c5901fb09d954a9ab6f551f81)) $(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c)) $(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d)) $(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae)) $(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c)) $(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa)) $(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a)) -$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20191226,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,aa8af0fdc9872d369d8c890a84dbc2a2466b55795dccd5b47721b2d97644b04f)) +$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20200206,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,f5207248c6a3c3e3bfc9ab30b91c1897b00802ed861e1f9faaed873366078c64)) KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug) rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) @@ -295,21 +293,13 @@ $(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS) $(MAKE) -C $(IPERF_PATH) $(STRIP) -s $@ -$(LIBMNL_PATH)/.installed: $(LIBMNL_TAR) - flock -s $<.lock tar -C $(BUILD_PATH) -xf $< - touch $@ - -$(LIBMNL_PATH)/src/.libs/libmnl.a: | $(LIBMNL_PATH)/.installed $(USERSPACE_DEPS) - cd $(LIBMNL_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared - $(MAKE) -C $(LIBMNL_PATH) - sed -i 's:prefix=.*:prefix=$(LIBMNL_PATH):' $(LIBMNL_PATH)/libmnl.pc - $(WIREGUARD_TOOLS_PATH)/.installed: $(WIREGUARD_TOOLS_TAR) + mkdir -p $(BUILD_PATH) flock -s $<.lock tar -C $(BUILD_PATH) -xf $< touch $@ -$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) - LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg +$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(USERSPACE_DEPS) + $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src wg $(STRIP) -s $@ $(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS) @@ -340,17 +330,17 @@ $(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS) $(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR) mkdir -p $(BUILD_PATH) flock -s $<.lock tar -C $(BUILD_PATH) -xf $< - printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=y\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_LIBMNL -I$(LIBMNL_PATH)/include\nLDLIBS+=-lmnl' > $(IPROUTE2_PATH)/config.mk + printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS\n' > $(IPROUTE2_PATH)/config.mk printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile touch $@ -$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) - LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip - $(STRIP) -s $(IPROUTE2_PATH)/ip/ip +$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS) + $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip + $(STRIP) -s $@ -$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) - LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss - $(STRIP) -s $(IPROUTE2_PATH)/misc/ss +$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS) + $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss + $(STRIP) -s $@ $(IPTABLES_PATH)/.installed: $(IPTABLES_TAR) mkdir -p $(BUILD_PATH) @@ -358,8 +348,8 @@ $(IPTABLES_PATH)/.installed: $(IPTABLES_TAR) sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure touch $@ -$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) - cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include +$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(USERSPACE_DEPS) + cd $(IPTABLES_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --disable-connlabel --with-kernel=$(BUILD_PATH)/include $(MAKE) -C $(IPTABLES_PATH) $(STRIP) -s $@ -- cgit v1.2.3-59-g8ed1b From 2a8a4df36462aa85b0db87b7c5ea145ba67e34a8 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 14 Feb 2020 23:57:21 +0100 Subject: wireguard: receive: reset last_under_load to zero This is a small optimization that prevents more expensive comparisons from happening when they are no longer necessary, by clearing the last_under_load variable whenever we wind up in a state where we were under load but we no longer are. Signed-off-by: Jason A. Donenfeld Suggested-by: Matt Dunwoodie Signed-off-by: David S. Miller --- drivers/net/wireguard/receive.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index 9c6bab9c981f..4a153894cee2 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -118,10 +118,13 @@ static void wg_receive_handshake_packet(struct wg_device *wg, under_load = skb_queue_len(&wg->incoming_handshakes) >= MAX_QUEUED_INCOMING_HANDSHAKES / 8; - if (under_load) + if (under_load) { last_under_load = ktime_get_coarse_boottime_ns(); - else if (last_under_load) + } else if (last_under_load) { under_load = !wg_birthdate_has_expired(last_under_load, 1); + if (!under_load) + last_under_load = 0; + } mac_state = wg_cookie_validate_packet(&wg->cookie_checker, skb, under_load); if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) || -- cgit v1.2.3-59-g8ed1b From 175f1ca9a9ed8689d2028da1a7c624bb4fb4ff7e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 14 Feb 2020 23:57:22 +0100 Subject: wireguard: send: account for mtu=0 devices It turns out there's an easy way to get packets queued up while still having an MTU of zero, and that's via persistent keep alive. This commit makes sure that in whatever condition, we don't wind up dividing by zero. Note that an MTU of zero for a wireguard interface is something quasi-valid, so I don't think the correct fix is to limit it via min_mtu. This can be reproduced easily with: ip link add wg0 type wireguard ip link add wg1 type wireguard ip link set wg0 up mtu 0 ip link set wg1 up wg set wg0 private-key <(wg genkey) wg set wg1 listen-port 1 private-key <(wg genkey) peer $(wg show wg0 public-key) wg set wg0 peer $(wg show wg1 public-key) persistent-keepalive 1 endpoint 127.0.0.1:1 However, while min_mtu=0 seems fine, it makes sense to restrict the max_mtu. This commit also restricts the maximum MTU to the greatest number for which rounding up to the padding multiple won't overflow a signed integer. Packets this large were always rejected anyway eventually, due to checks deeper in, but it seems more sound not to even let the administrator configure something that won't work anyway. We use this opportunity to clean up this function a bit so that it's clear which paths we're expecting. Signed-off-by: Jason A. Donenfeld Cc: Eric Dumazet Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/wireguard/device.c | 7 ++++--- drivers/net/wireguard/send.c | 16 +++++++++++----- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index 43db442b1373..cdc96968b0f4 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -258,6 +258,8 @@ static void wg_setup(struct net_device *dev) enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO | NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA }; + const int overhead = MESSAGE_MINIMUM_LENGTH + sizeof(struct udphdr) + + max(sizeof(struct ipv6hdr), sizeof(struct iphdr)); dev->netdev_ops = &netdev_ops; dev->hard_header_len = 0; @@ -271,9 +273,8 @@ static void wg_setup(struct net_device *dev) dev->features |= WG_NETDEV_FEATURES; dev->hw_features |= WG_NETDEV_FEATURES; dev->hw_enc_features |= WG_NETDEV_FEATURES; - dev->mtu = ETH_DATA_LEN - MESSAGE_MINIMUM_LENGTH - - sizeof(struct udphdr) - - max(sizeof(struct ipv6hdr), sizeof(struct iphdr)); + dev->mtu = ETH_DATA_LEN - overhead; + dev->max_mtu = round_down(INT_MAX, MESSAGE_PADDING_MULTIPLE) - overhead; SET_NETDEV_DEVTYPE(dev, &device_type); diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c index c13260563446..7348c10cbae3 100644 --- a/drivers/net/wireguard/send.c +++ b/drivers/net/wireguard/send.c @@ -143,16 +143,22 @@ static void keep_key_fresh(struct wg_peer *peer) static unsigned int calculate_skb_padding(struct sk_buff *skb) { + unsigned int padded_size, last_unit = skb->len; + + if (unlikely(!PACKET_CB(skb)->mtu)) + return ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE) - last_unit; + /* We do this modulo business with the MTU, just in case the networking * layer gives us a packet that's bigger than the MTU. In that case, we * wouldn't want the final subtraction to overflow in the case of the - * padded_size being clamped. + * padded_size being clamped. Fortunately, that's very rarely the case, + * so we optimize for that not happening. */ - unsigned int last_unit = skb->len % PACKET_CB(skb)->mtu; - unsigned int padded_size = ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE); + if (unlikely(last_unit > PACKET_CB(skb)->mtu)) + last_unit %= PACKET_CB(skb)->mtu; - if (padded_size > PACKET_CB(skb)->mtu) - padded_size = PACKET_CB(skb)->mtu; + padded_size = min(PACKET_CB(skb)->mtu, + ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE)); return padded_size - last_unit; } -- cgit v1.2.3-59-g8ed1b From 1fbc33b0a7feb6ca72bf7dc8a05d81485ee8ee2e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 14 Feb 2020 23:57:23 +0100 Subject: wireguard: socket: remove extra call to synchronize_net synchronize_net() is a wrapper around synchronize_rcu(), so there's no point in having synchronize_net and synchronize_rcu back to back, despite the documentation comment suggesting maybe it's somewhat useful, "Wait for packets currently being received to be done." This commit removes the extra call. Signed-off-by: Jason A. Donenfeld Suggested-by: Eric Dumazet Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/wireguard/socket.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c index 262f3b5c819d..b0d6541582d3 100644 --- a/drivers/net/wireguard/socket.c +++ b/drivers/net/wireguard/socket.c @@ -432,7 +432,6 @@ void wg_socket_reinit(struct wg_device *wg, struct sock *new4, wg->incoming_port = ntohs(inet_sk(new4)->inet_sport); mutex_unlock(&wg->socket_update_lock); synchronize_rcu(); - synchronize_net(); sock_free(old4); sock_free(old6); } -- cgit v1.2.3-59-g8ed1b From d965a5432d4c3e6b9c3d2bc1d4a800013bbf76f6 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 14 Feb 2020 15:26:19 -0800 Subject: net: dsa: b53: Ensure the default VID is untagged We need to ensure that the default VID is untagged otherwise the switch will be sending tagged frames and the results can be problematic. This is especially true with b53 switches that use VID 0 as their default VLAN since VID 0 has a special meaning. Fixes: fea83353177a ("net: dsa: b53: Fix default VLAN ID") Fixes: 061f6a505ac3 ("net: dsa: Add ndo_vlan_rx_{add, kill}_vid implementation") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 449a22172e07..1a69286daa8d 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1366,6 +1366,9 @@ void b53_vlan_add(struct dsa_switch *ds, int port, b53_get_vlan_entry(dev, vid, vl); + if (vid == 0 && vid == b53_default_pvid(dev)) + untagged = true; + vl->members |= BIT(port); if (untagged && !dsa_is_cpu_port(ds, port)) vl->untag |= BIT(port); -- cgit v1.2.3-59-g8ed1b From 6699170376ab941c1cc5c3bcefa766efc3575c73 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Sat, 15 Feb 2020 01:55:53 +0100 Subject: ethtool: fix application of verbose no_mask bitset A bitset without mask in a _SET request means we want exactly the bits in the bitset to be set. This works correctly for compact format but when verbose format is parsed, ethnl_update_bitset32_verbose() only sets the bits present in the request bitset but does not clear the rest. This can cause incorrect results like lion:~ # ethtool eth0 | grep Wake Supports Wake-on: pumbg Wake-on: g lion:~ # ethtool -s eth0 wol u lion:~ # ethtool eth0 | grep Wake Supports Wake-on: pumbg Wake-on: ug when the second ethtool command issues request ETHTOOL_MSG_WOL_SET ETHTOOL_A_WOL_HEADER ETHTOOL_A_HEADER_DEV_NAME = "eth0" ETHTOOL_A_WOL_MODES ETHTOOL_A_BITSET_NOMASK ETHTOOL_A_BITSET_BITS ETHTOOL_A_BITSET_BITS_BIT ETHTOOL_BITSET_BIT_INDEX = 1 Fix the logic by clearing the whole target bitmap before we start iterating through the request bits. Fixes: 10b518d4e6dd ("ethtool: netlink bitset handling") Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ethtool/bitset.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c index fce45dac4205..8977fe1f3946 100644 --- a/net/ethtool/bitset.c +++ b/net/ethtool/bitset.c @@ -447,7 +447,10 @@ ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits, "mask only allowed in compact bitset"); return -EINVAL; } + no_mask = tb[ETHTOOL_A_BITSET_NOMASK]; + if (no_mask) + ethnl_bitmap32_clear(bitmap, 0, nbits, mod); nla_for_each_nested(bit_attr, tb[ETHTOOL_A_BITSET_BITS], rem) { bool old_val, new_val; -- cgit v1.2.3-59-g8ed1b From c4c10784293ec89746721b1a40cb730b0106deea Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 15 Feb 2020 08:17:28 +0100 Subject: NFC: pn544: Fix a typo in a debug message The ending character of the string shoulb be \n, not \b. Fixes: 17936b43f0fd ("NFC: Standardize logging style") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/nfc/pn544/pn544.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/pn544/pn544.c b/drivers/nfc/pn544/pn544.c index 2b83156efe3f..b788870473e8 100644 --- a/drivers/nfc/pn544/pn544.c +++ b/drivers/nfc/pn544/pn544.c @@ -682,7 +682,7 @@ static int pn544_hci_tm_send(struct nfc_hci_dev *hdev, struct sk_buff *skb) static int pn544_hci_check_presence(struct nfc_hci_dev *hdev, struct nfc_target *target) { - pr_debug("supported protocol %d\b", target->supported_protocols); + pr_debug("supported protocol %d\n", target->supported_protocols); if (target->supported_protocols & (NFC_PROTO_ISO14443_MASK | NFC_PROTO_ISO14443_B_MASK)) { return nfc_hci_send_cmd(hdev, target->hci_reader_gate, -- cgit v1.2.3-59-g8ed1b From 064ff66e2bef84f1153087612032b5b9eab005bd Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sat, 15 Feb 2020 10:50:08 +0000 Subject: bonding: add missing netdev_update_lockdep_key() After bond_release(), netdev_update_lockdep_key() should be called. But both ioctl path and attribute path don't call netdev_update_lockdep_key(). This patch adds missing netdev_update_lockdep_key(). Test commands: ip link add bond0 type bond ip link add bond1 type bond ifenslave bond0 bond1 ifenslave -d bond0 bond1 ifenslave bond1 bond0 Splat looks like: [ 29.501182][ T1046] WARNING: possible circular locking dependency detected [ 29.501945][ T1039] hardirqs last disabled at (1962): [] handle_mm_fault+0x13f/0x700 [ 29.503442][ T1046] 5.5.0+ #322 Not tainted [ 29.503447][ T1046] ------------------------------------------------------ [ 29.504277][ T1039] softirqs last enabled at (1180): [] __do_softirq+0x678/0x981 [ 29.505443][ T1046] ifenslave/1046 is trying to acquire lock: [ 29.505886][ T1039] softirqs last disabled at (1169): [] irq_exit+0x17a/0x1a0 [ 29.509997][ T1046] ffff88805d5da280 (&dev->addr_list_lock_key#3){+...}, at: dev_mc_sync_multiple+0x95/0x120 [ 29.511243][ T1046] [ 29.511243][ T1046] but task is already holding lock: [ 29.512192][ T1046] ffff8880460f2280 (&dev->addr_list_lock_key#4){+...}, at: bond_enslave+0x4482/0x47b0 [bonding] [ 29.514124][ T1046] [ 29.514124][ T1046] which lock already depends on the new lock. [ 29.514124][ T1046] [ 29.517297][ T1046] [ 29.517297][ T1046] the existing dependency chain (in reverse order) is: [ 29.518231][ T1046] [ 29.518231][ T1046] -> #1 (&dev->addr_list_lock_key#4){+...}: [ 29.519076][ T1046] _raw_spin_lock+0x30/0x70 [ 29.519588][ T1046] dev_mc_sync_multiple+0x95/0x120 [ 29.520208][ T1046] bond_enslave+0x448d/0x47b0 [bonding] [ 29.520862][ T1046] bond_option_slaves_set+0x1a3/0x370 [bonding] [ 29.521640][ T1046] __bond_opt_set+0x1ff/0xbb0 [bonding] [ 29.522438][ T1046] __bond_opt_set_notify+0x2b/0xf0 [bonding] [ 29.523251][ T1046] bond_opt_tryset_rtnl+0x92/0xf0 [bonding] [ 29.524082][ T1046] bonding_sysfs_store_option+0x8a/0xf0 [bonding] [ 29.524959][ T1046] kernfs_fop_write+0x276/0x410 [ 29.525620][ T1046] vfs_write+0x197/0x4a0 [ 29.526218][ T1046] ksys_write+0x141/0x1d0 [ 29.526818][ T1046] do_syscall_64+0x99/0x4f0 [ 29.527430][ T1046] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 29.528265][ T1046] [ 29.528265][ T1046] -> #0 (&dev->addr_list_lock_key#3){+...}: [ 29.529272][ T1046] __lock_acquire+0x2d8d/0x3de0 [ 29.529935][ T1046] lock_acquire+0x164/0x3b0 [ 29.530638][ T1046] _raw_spin_lock+0x30/0x70 [ 29.531187][ T1046] dev_mc_sync_multiple+0x95/0x120 [ 29.531790][ T1046] bond_enslave+0x448d/0x47b0 [bonding] [ 29.532451][ T1046] bond_option_slaves_set+0x1a3/0x370 [bonding] [ 29.533163][ T1046] __bond_opt_set+0x1ff/0xbb0 [bonding] [ 29.533789][ T1046] __bond_opt_set_notify+0x2b/0xf0 [bonding] [ 29.534595][ T1046] bond_opt_tryset_rtnl+0x92/0xf0 [bonding] [ 29.535500][ T1046] bonding_sysfs_store_option+0x8a/0xf0 [bonding] [ 29.536379][ T1046] kernfs_fop_write+0x276/0x410 [ 29.537057][ T1046] vfs_write+0x197/0x4a0 [ 29.537640][ T1046] ksys_write+0x141/0x1d0 [ 29.538251][ T1046] do_syscall_64+0x99/0x4f0 [ 29.538870][ T1046] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 29.539659][ T1046] [ 29.539659][ T1046] other info that might help us debug this: [ 29.539659][ T1046] [ 29.540953][ T1046] Possible unsafe locking scenario: [ 29.540953][ T1046] [ 29.541883][ T1046] CPU0 CPU1 [ 29.542540][ T1046] ---- ---- [ 29.543209][ T1046] lock(&dev->addr_list_lock_key#4); [ 29.543880][ T1046] lock(&dev->addr_list_lock_key#3); [ 29.544873][ T1046] lock(&dev->addr_list_lock_key#4); [ 29.545863][ T1046] lock(&dev->addr_list_lock_key#3); [ 29.546525][ T1046] [ 29.546525][ T1046] *** DEADLOCK *** [ 29.546525][ T1046] [ 29.547542][ T1046] 5 locks held by ifenslave/1046: [ 29.548196][ T1046] #0: ffff88806044c478 (sb_writers#5){.+.+}, at: vfs_write+0x3bb/0x4a0 [ 29.549248][ T1046] #1: ffff88805af00890 (&of->mutex){+.+.}, at: kernfs_fop_write+0x1cf/0x410 [ 29.550343][ T1046] #2: ffff88805b8b54b0 (kn->count#157){.+.+}, at: kernfs_fop_write+0x1f2/0x410 [ 29.551575][ T1046] #3: ffffffffaecf4cf0 (rtnl_mutex){+.+.}, at: bond_opt_tryset_rtnl+0x5f/0xf0 [bonding] [ 29.552819][ T1046] #4: ffff8880460f2280 (&dev->addr_list_lock_key#4){+...}, at: bond_enslave+0x4482/0x47b0 [bonding] [ 29.554175][ T1046] [ 29.554175][ T1046] stack backtrace: [ 29.554907][ T1046] CPU: 0 PID: 1046 Comm: ifenslave Not tainted 5.5.0+ #322 [ 29.555854][ T1046] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 29.557064][ T1046] Call Trace: [ 29.557504][ T1046] dump_stack+0x96/0xdb [ 29.558054][ T1046] check_noncircular+0x371/0x450 [ 29.558723][ T1046] ? print_circular_bug.isra.35+0x310/0x310 [ 29.559486][ T1046] ? hlock_class+0x130/0x130 [ 29.560100][ T1046] ? __lock_acquire+0x2d8d/0x3de0 [ 29.560761][ T1046] __lock_acquire+0x2d8d/0x3de0 [ 29.561366][ T1046] ? register_lock_class+0x14d0/0x14d0 [ 29.562045][ T1046] ? find_held_lock+0x39/0x1d0 [ 29.562641][ T1046] lock_acquire+0x164/0x3b0 [ 29.563199][ T1046] ? dev_mc_sync_multiple+0x95/0x120 [ 29.563872][ T1046] _raw_spin_lock+0x30/0x70 [ 29.564464][ T1046] ? dev_mc_sync_multiple+0x95/0x120 [ 29.565146][ T1046] dev_mc_sync_multiple+0x95/0x120 [ 29.565793][ T1046] bond_enslave+0x448d/0x47b0 [bonding] [ 29.566487][ T1046] ? bond_update_slave_arr+0x940/0x940 [bonding] [ 29.567279][ T1046] ? bstr_printf+0xc20/0xc20 [ 29.567857][ T1046] ? stack_trace_consume_entry+0x160/0x160 [ 29.568614][ T1046] ? deactivate_slab.isra.77+0x2c5/0x800 [ 29.569320][ T1046] ? check_chain_key+0x236/0x5d0 [ 29.569939][ T1046] ? sscanf+0x93/0xc0 [ 29.570442][ T1046] ? vsscanf+0x1e20/0x1e20 [ 29.571003][ T1046] bond_option_slaves_set+0x1a3/0x370 [bonding] [ ... ] Fixes: ab92d68fc22f ("net: core: add generic lockdep keys") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 ++ drivers/net/bonding/bond_options.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 48d5ec770b94..1e9d5d35fc78 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3640,6 +3640,8 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd case BOND_RELEASE_OLD: case SIOCBONDRELEASE: res = bond_release(bond_dev, slave_dev); + if (!res) + netdev_update_lockdep_key(slave_dev); break; case BOND_SETHWADDR_OLD: case SIOCBONDSETHWADDR: diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index ddb3916d3506..215c10923289 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1398,6 +1398,8 @@ static int bond_option_slaves_set(struct bonding *bond, case '-': slave_dbg(bond->dev, dev, "Releasing interface\n"); ret = bond_release(bond->dev, dev); + if (!ret) + netdev_update_lockdep_key(dev); break; default: -- cgit v1.2.3-59-g8ed1b From 7151affeef8d527f50b4b68a871fd28bd660023f Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sat, 15 Feb 2020 10:50:21 +0000 Subject: net: export netdev_next_lower_dev_rcu() netdev_next_lower_dev_rcu() will be used to implement a function, which is to walk all lower interfaces. There are already functions that they walk their lower interface. (netdev_walk_all_lower_dev_rcu, netdev_walk_all_lower_dev()). But, there would be cases that couldn't be covered by given netdev_walk_all_lower_dev_{rcu}() function. So, some modules would want to implement own function, which is to walk all lower interfaces. In the next patch, netdev_next_lower_dev_rcu() will be used. In addition, this patch removes two unused prototypes in netdevice.h. Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++---- net/core/dev.c | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9f1f633235f6..6c3f7032e8d9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -72,6 +72,8 @@ void netdev_set_default_ethtool_ops(struct net_device *dev, #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ #define NET_RX_DROP 1 /* packet dropped */ +#define MAX_NEST_DEV 8 + /* * Transmit return codes: transmit return codes originate from three different * namespaces: @@ -4389,11 +4391,8 @@ void *netdev_lower_get_next(struct net_device *dev, ldev; \ ldev = netdev_lower_get_next(dev, &(iter))) -struct net_device *netdev_all_lower_get_next(struct net_device *dev, +struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, struct list_head **iter); -struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, - struct list_head **iter); - int netdev_walk_all_lower_dev(struct net_device *dev, int (*fn)(struct net_device *lower_dev, void *data), diff --git a/net/core/dev.c b/net/core/dev.c index b6d13f3f1e5a..2577ebfed293 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -146,7 +146,6 @@ #include "net-sysfs.h" #define MAX_GRO_SKBS 8 -#define MAX_NEST_DEV 8 /* This should be increased if a protocol with a bigger head is added. */ #define GRO_MAX_HEAD (MAX_HEADER + 128) @@ -7207,8 +7206,8 @@ static int __netdev_walk_all_lower_dev(struct net_device *dev, return 0; } -static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, - struct list_head **iter) +struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, + struct list_head **iter) { struct netdev_adjacent *lower; @@ -7220,6 +7219,7 @@ static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, return lower->dev; } +EXPORT_SYMBOL(netdev_next_lower_dev_rcu); static u8 __netdev_upper_depth(struct net_device *dev) { -- cgit v1.2.3-59-g8ed1b From b3e80d44f5b1b470dd9e2dbc6816e63a5c519709 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sat, 15 Feb 2020 10:50:40 +0000 Subject: bonding: fix lockdep warning in bond_get_stats() In the "struct bonding", there is stats_lock. This lock protects "bond_stats" in the "struct bonding". bond_stats is updated in the bond_get_stats() and this function would be executed concurrently. So, the lock is needed. Bonding interfaces would be nested. So, either stats_lock should use dynamic lockdep class key or stats_lock should be used by spin_lock_nested(). In the current code, stats_lock is using a dynamic lockdep class key. But there is no updating stats_lock_key routine So, lockdep warning will occur. Test commands: ip link add bond0 type bond ip link add bond1 type bond ip link set bond0 master bond1 ip link set bond0 nomaster ip link set bond1 master bond0 Splat looks like: [ 38.420603][ T957] 5.5.0+ #394 Not tainted [ 38.421074][ T957] ------------------------------------------------------ [ 38.421837][ T957] ip/957 is trying to acquire lock: [ 38.422399][ T957] ffff888063262cd8 (&bond->stats_lock_key#2){+.+.}, at: bond_get_stats+0x90/0x4d0 [bonding] [ 38.423528][ T957] [ 38.423528][ T957] but task is already holding lock: [ 38.424526][ T957] ffff888065fd2cd8 (&bond->stats_lock_key){+.+.}, at: bond_get_stats+0x90/0x4d0 [bonding] [ 38.426075][ T957] [ 38.426075][ T957] which lock already depends on the new lock. [ 38.426075][ T957] [ 38.428536][ T957] [ 38.428536][ T957] the existing dependency chain (in reverse order) is: [ 38.429475][ T957] [ 38.429475][ T957] -> #1 (&bond->stats_lock_key){+.+.}: [ 38.430273][ T957] _raw_spin_lock+0x30/0x70 [ 38.430812][ T957] bond_get_stats+0x90/0x4d0 [bonding] [ 38.431451][ T957] dev_get_stats+0x1ec/0x270 [ 38.432088][ T957] bond_get_stats+0x1a5/0x4d0 [bonding] [ 38.432767][ T957] dev_get_stats+0x1ec/0x270 [ 38.433322][ T957] rtnl_fill_stats+0x44/0xbe0 [ 38.433866][ T957] rtnl_fill_ifinfo+0xeb2/0x3720 [ 38.434474][ T957] rtmsg_ifinfo_build_skb+0xca/0x170 [ 38.435081][ T957] rtmsg_ifinfo_event.part.33+0x1b/0xb0 [ 38.436848][ T957] rtnetlink_event+0xcd/0x120 [ 38.437455][ T957] notifier_call_chain+0x90/0x160 [ 38.438067][ T957] netdev_change_features+0x74/0xa0 [ 38.438708][ T957] bond_compute_features.isra.45+0x4e6/0x6f0 [bonding] [ 38.439522][ T957] bond_enslave+0x3639/0x47b0 [bonding] [ 38.440225][ T957] do_setlink+0xaab/0x2ef0 [ 38.440786][ T957] __rtnl_newlink+0x9c5/0x1270 [ 38.441463][ T957] rtnl_newlink+0x65/0x90 [ 38.442075][ T957] rtnetlink_rcv_msg+0x4a8/0x890 [ 38.442774][ T957] netlink_rcv_skb+0x121/0x350 [ 38.443451][ T957] netlink_unicast+0x42e/0x610 [ 38.444282][ T957] netlink_sendmsg+0x65a/0xb90 [ 38.444992][ T957] ____sys_sendmsg+0x5ce/0x7a0 [ 38.445679][ T957] ___sys_sendmsg+0x10f/0x1b0 [ 38.446365][ T957] __sys_sendmsg+0xc6/0x150 [ 38.447007][ T957] do_syscall_64+0x99/0x4f0 [ 38.447668][ T957] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 38.448538][ T957] [ 38.448538][ T957] -> #0 (&bond->stats_lock_key#2){+.+.}: [ 38.449554][ T957] __lock_acquire+0x2d8d/0x3de0 [ 38.450148][ T957] lock_acquire+0x164/0x3b0 [ 38.450711][ T957] _raw_spin_lock+0x30/0x70 [ 38.451292][ T957] bond_get_stats+0x90/0x4d0 [bonding] [ 38.451950][ T957] dev_get_stats+0x1ec/0x270 [ 38.452425][ T957] bond_get_stats+0x1a5/0x4d0 [bonding] [ 38.453362][ T957] dev_get_stats+0x1ec/0x270 [ 38.453825][ T957] rtnl_fill_stats+0x44/0xbe0 [ 38.454390][ T957] rtnl_fill_ifinfo+0xeb2/0x3720 [ 38.456257][ T957] rtmsg_ifinfo_build_skb+0xca/0x170 [ 38.456998][ T957] rtmsg_ifinfo_event.part.33+0x1b/0xb0 [ 38.459351][ T957] rtnetlink_event+0xcd/0x120 [ 38.460086][ T957] notifier_call_chain+0x90/0x160 [ 38.460829][ T957] netdev_change_features+0x74/0xa0 [ 38.461752][ T957] bond_compute_features.isra.45+0x4e6/0x6f0 [bonding] [ 38.462705][ T957] bond_enslave+0x3639/0x47b0 [bonding] [ 38.463476][ T957] do_setlink+0xaab/0x2ef0 [ 38.464141][ T957] __rtnl_newlink+0x9c5/0x1270 [ 38.464897][ T957] rtnl_newlink+0x65/0x90 [ 38.465522][ T957] rtnetlink_rcv_msg+0x4a8/0x890 [ 38.466215][ T957] netlink_rcv_skb+0x121/0x350 [ 38.466895][ T957] netlink_unicast+0x42e/0x610 [ 38.467583][ T957] netlink_sendmsg+0x65a/0xb90 [ 38.468285][ T957] ____sys_sendmsg+0x5ce/0x7a0 [ 38.469202][ T957] ___sys_sendmsg+0x10f/0x1b0 [ 38.469884][ T957] __sys_sendmsg+0xc6/0x150 [ 38.470587][ T957] do_syscall_64+0x99/0x4f0 [ 38.471245][ T957] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 38.472093][ T957] [ 38.472093][ T957] other info that might help us debug this: [ 38.472093][ T957] [ 38.473438][ T957] Possible unsafe locking scenario: [ 38.473438][ T957] [ 38.474898][ T957] CPU0 CPU1 [ 38.476234][ T957] ---- ---- [ 38.480171][ T957] lock(&bond->stats_lock_key); [ 38.480808][ T957] lock(&bond->stats_lock_key#2); [ 38.481791][ T957] lock(&bond->stats_lock_key); [ 38.482754][ T957] lock(&bond->stats_lock_key#2); [ 38.483416][ T957] [ 38.483416][ T957] *** DEADLOCK *** [ 38.483416][ T957] [ 38.484505][ T957] 3 locks held by ip/957: [ 38.485048][ T957] #0: ffffffffbccf6230 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x457/0x890 [ 38.486198][ T957] #1: ffff888065fd2cd8 (&bond->stats_lock_key){+.+.}, at: bond_get_stats+0x90/0x4d0 [bonding] [ 38.487625][ T957] #2: ffffffffbc9254c0 (rcu_read_lock){....}, at: bond_get_stats+0x5/0x4d0 [bonding] [ 38.488897][ T957] [ 38.488897][ T957] stack backtrace: [ 38.489646][ T957] CPU: 1 PID: 957 Comm: ip Not tainted 5.5.0+ #394 [ 38.490497][ T957] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 38.492810][ T957] Call Trace: [ 38.493219][ T957] dump_stack+0x96/0xdb [ 38.493709][ T957] check_noncircular+0x371/0x450 [ 38.494344][ T957] ? lookup_address+0x60/0x60 [ 38.494923][ T957] ? print_circular_bug.isra.35+0x310/0x310 [ 38.495699][ T957] ? hlock_class+0x130/0x130 [ 38.496334][ T957] ? __lock_acquire+0x2d8d/0x3de0 [ 38.496979][ T957] __lock_acquire+0x2d8d/0x3de0 [ 38.497607][ T957] ? register_lock_class+0x14d0/0x14d0 [ 38.498333][ T957] ? check_chain_key+0x236/0x5d0 [ 38.499003][ T957] lock_acquire+0x164/0x3b0 [ 38.499800][ T957] ? bond_get_stats+0x90/0x4d0 [bonding] [ 38.500706][ T957] _raw_spin_lock+0x30/0x70 [ 38.501435][ T957] ? bond_get_stats+0x90/0x4d0 [bonding] [ 38.502311][ T957] bond_get_stats+0x90/0x4d0 [bonding] [ ... ] But, there is another problem. The dynamic lockdep class key is protected by RTNL, but bond_get_stats() would be called outside of RTNL. So, it would use an invalid dynamic lockdep class key. In order to fix this issue, stats_lock uses spin_lock_nested() instead of a dynamic lockdep key. The bond_get_stats() calls bond_get_lowest_level_rcu() to get the correct nest level value, which will be used by spin_lock_nested(). The "dev->lower_level" indicates lower nest level value, but this value is invalid outside of RTNL. So, bond_get_lowest_level_rcu() returns valid lower nest level value in the RCU critical section. bond_get_lowest_level_rcu() will be work only when LOCKDEP is enabled. Fixes: 089bca2caed0 ("bonding: use dynamic lockdep key instead of subclass") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 53 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 1e9d5d35fc78..d10805e5e623 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3526,6 +3526,47 @@ static void bond_fold_stats(struct rtnl_link_stats64 *_res, } } +#ifdef CONFIG_LOCKDEP +static int bond_get_lowest_level_rcu(struct net_device *dev) +{ + struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; + int cur = 0, max = 0; + + now = dev; + iter = &dev->adj_list.lower; + + while (1) { + next = NULL; + while (1) { + ldev = netdev_next_lower_dev_rcu(now, &iter); + if (!ldev) + break; + + next = ldev; + niter = &ldev->adj_list.lower; + dev_stack[cur] = now; + iter_stack[cur++] = iter; + if (max <= cur) + max = cur; + break; + } + + if (!next) { + if (!cur) + return max; + next = dev_stack[--cur]; + niter = iter_stack[cur]; + } + + now = next; + iter = niter; + } + + return max; +} +#endif + static void bond_get_stats(struct net_device *bond_dev, struct rtnl_link_stats64 *stats) { @@ -3533,11 +3574,17 @@ static void bond_get_stats(struct net_device *bond_dev, struct rtnl_link_stats64 temp; struct list_head *iter; struct slave *slave; + int nest_level = 0; - spin_lock(&bond->stats_lock); - memcpy(stats, &bond->bond_stats, sizeof(*stats)); rcu_read_lock(); +#ifdef CONFIG_LOCKDEP + nest_level = bond_get_lowest_level_rcu(bond_dev); +#endif + + spin_lock_nested(&bond->stats_lock, nest_level); + memcpy(stats, &bond->bond_stats, sizeof(*stats)); + bond_for_each_slave_rcu(bond, slave, iter) { const struct rtnl_link_stats64 *new = dev_get_stats(slave->dev, &temp); @@ -3547,10 +3594,10 @@ static void bond_get_stats(struct net_device *bond_dev, /* save off the slave stats for the next run */ memcpy(&slave->slave_stats, new, sizeof(*new)); } - rcu_read_unlock(); memcpy(&bond->bond_stats, stats, sizeof(*stats)); spin_unlock(&bond->stats_lock); + rcu_read_unlock(); } static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) -- cgit v1.2.3-59-g8ed1b From 357b41caf949c57e426f1c5f18574b6b46583406 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sat, 15 Feb 2020 15:45:56 +0100 Subject: mptcp: select CRYPTO Without this modification and if CRYPTO is not selected, we have this warning: WARNING: unmet direct dependencies detected for CRYPTO_LIB_SHA256 Depends on [n]: CRYPTO [=n] Selected by [y]: - MPTCP [=y] && NET [=y] && INET [=y] MPTCP selects CRYPTO_LIB_SHA256 which seems to depend on CRYPTO. CRYPTO is now selected to avoid this issue. Even though the config system prints that warning, it looks like sha256.c is compiled and linked even without CONFIG_CRYPTO. Since MPTCP will end up needing CONFIG_CRYPTO anyway in future commits -- currently in preparation for net-next -- we propose to add it now to fix the warning. The dependency in the config system comes from the fact that CRYPTO_LIB_SHA256 is defined in "lib/crypto/Kconfig" which is sourced from "crypto/Kconfig" only if CRYPTO is selected. Fixes: 65492c5a6ab5 (mptcp: move from sha1 (v0) to sha256 (v1)) Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig index 49f6054e7f4e..a9ed3bf1d93f 100644 --- a/net/mptcp/Kconfig +++ b/net/mptcp/Kconfig @@ -4,6 +4,7 @@ config MPTCP depends on INET select SKB_EXTENSIONS select CRYPTO_LIB_SHA256 + select CRYPTO help Multipath TCP (MPTCP) connections send and receive data over multiple subflows in order to utilize multiple network paths. Each subflow -- cgit v1.2.3-59-g8ed1b From 69233bba6543a37755158ca3382765387b8078df Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 15 Feb 2020 17:54:17 +0100 Subject: net: ks8851-ml: Remove 8-bit bus accessors This driver is mixing 8-bit and 16-bit bus accessors for reasons unknown, however the speculation is that this was some sort of attempt to support the 8-bit bus mode. As per the KS8851-16MLL documentation, all two registers accessed via the 8-bit accessors are internally 16-bit registers, so reading them using 16-bit accessors is fine. The KS_CCR read can be converted to 16-bit read outright, as it is already a concatenation of two 8-bit reads of that register. The KS_RXQCR accesses are 8-bit only, however writing the top 8 bits of the register is OK as well, since the driver caches the entire 16-bit register value anyway. Finally, the driver is not used by any hardware in the kernel right now. The only hardware available to me is one with 16-bit bus, so I have no way to test the 8-bit bus mode, however it is unlikely this ever really worked anyway. If the 8-bit bus mode is ever required, it can be easily added by adjusting the 16-bit accessors to do 2 consecutive accesses, which is how this should have been done from the beginning. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Lukas Wunner Cc: Petr Stetiar Cc: YueHaibing Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851_mll.c | 45 ++++---------------------------- 1 file changed, 5 insertions(+), 40 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index a41a90c589db..e2fb20154511 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -156,24 +156,6 @@ static int msg_enable; * chip is busy transferring packet data (RX/TX FIFO accesses). */ -/** - * ks_rdreg8 - read 8 bit register from device - * @ks : The chip information - * @offset: The register address - * - * Read a 8bit register from the chip, returning the result - */ -static u8 ks_rdreg8(struct ks_net *ks, int offset) -{ - u16 data; - u8 shift_bit = offset & 0x03; - u8 shift_data = (offset & 1) << 3; - ks->cmd_reg_cache = (u16) offset | (u16)(BE0 << shift_bit); - iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); - data = ioread16(ks->hw_addr); - return (u8)(data >> shift_data); -} - /** * ks_rdreg16 - read 16 bit register from device * @ks : The chip information @@ -189,22 +171,6 @@ static u16 ks_rdreg16(struct ks_net *ks, int offset) return ioread16(ks->hw_addr); } -/** - * ks_wrreg8 - write 8bit register value to chip - * @ks: The chip information - * @offset: The register address - * @value: The value to write - * - */ -static void ks_wrreg8(struct ks_net *ks, int offset, u8 value) -{ - u8 shift_bit = (offset & 0x03); - u16 value_write = (u16)(value << ((offset & 1) << 3)); - ks->cmd_reg_cache = (u16)offset | (BE0 << shift_bit); - iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); - iowrite16(value_write, ks->hw_addr); -} - /** * ks_wrreg16 - write 16bit register value to chip * @ks: The chip information @@ -324,8 +290,7 @@ static void ks_read_config(struct ks_net *ks) u16 reg_data = 0; /* Regardless of bus width, 8 bit read should always work.*/ - reg_data = ks_rdreg8(ks, KS_CCR) & 0x00FF; - reg_data |= ks_rdreg8(ks, KS_CCR+1) << 8; + reg_data = ks_rdreg16(ks, KS_CCR); /* addr/data bus are multiplexed */ ks->sharedbus = (reg_data & CCR_SHARED) == CCR_SHARED; @@ -429,7 +394,7 @@ static inline void ks_read_qmu(struct ks_net *ks, u16 *buf, u32 len) /* 1. set sudo DMA mode */ ks_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI); - ks_wrreg8(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_SDA) & 0xff); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); /* 2. read prepend data */ /** @@ -446,7 +411,7 @@ static inline void ks_read_qmu(struct ks_net *ks, u16 *buf, u32 len) ks_inblk(ks, buf, ALIGN(len, 4)); /* 4. reset sudo DMA Mode */ - ks_wrreg8(ks, KS_RXQCR, ks->rc_rxqcr); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); } /** @@ -679,13 +644,13 @@ static void ks_write_qmu(struct ks_net *ks, u8 *pdata, u16 len) ks->txh.txw[1] = cpu_to_le16(len); /* 1. set sudo-DMA mode */ - ks_wrreg8(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_SDA) & 0xff); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); /* 2. write status/lenth info */ ks_outblk(ks, ks->txh.txw, 4); /* 3. write pkt data */ ks_outblk(ks, (u16 *)pdata, ALIGN(len, 4)); /* 4. reset sudo-DMA mode */ - ks_wrreg8(ks, KS_RXQCR, ks->rc_rxqcr); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); /* 5. Enqueue Tx(move the pkt from TX buffer into TXQ) */ ks_wrreg16(ks, KS_TXQCR, TXQCR_METFE); /* 6. wait until TXQCR_METFE is auto-cleared */ -- cgit v1.2.3-59-g8ed1b From edacb098ea9c31589276152f09b4439052c0f2b1 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 15 Feb 2020 17:54:18 +0100 Subject: net: ks8851-ml: Fix 16-bit data access The packet data written to and read from Micrel KSZ8851-16MLLI must be byte-swapped in 16-bit mode, add this byte-swapping. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Lukas Wunner Cc: Petr Stetiar Cc: YueHaibing Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851_mll.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index e2fb20154511..5ae206ae5d2b 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -197,7 +197,7 @@ static inline void ks_inblk(struct ks_net *ks, u16 *wptr, u32 len) { len >>= 1; while (len--) - *wptr++ = (u16)ioread16(ks->hw_addr); + *wptr++ = be16_to_cpu(ioread16(ks->hw_addr)); } /** @@ -211,7 +211,7 @@ static inline void ks_outblk(struct ks_net *ks, u16 *wptr, u32 len) { len >>= 1; while (len--) - iowrite16(*wptr++, ks->hw_addr); + iowrite16(cpu_to_be16(*wptr++), ks->hw_addr); } static void ks_disable_int(struct ks_net *ks) -- cgit v1.2.3-59-g8ed1b From 58292104832fef6cb4a89f736012c0e0724c3442 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 15 Feb 2020 17:54:19 +0100 Subject: net: ks8851-ml: Fix 16-bit IO operation The Micrel KSZ8851-16MLLI datasheet DS00002357B page 12 states that BE[3:0] signals are active high. This contradicts the measurements of the behavior of the actual chip, where these signals behave as active low. For example, to read the CIDER register, the bus must expose 0xc0c0 during the address phase, which means BE[3:0]=4'b1100. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Lukas Wunner Cc: Petr Stetiar Cc: YueHaibing Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851_mll.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index 5ae206ae5d2b..1c9e70c8cc30 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -166,7 +166,7 @@ static int msg_enable; static u16 ks_rdreg16(struct ks_net *ks, int offset) { - ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02)); + ks->cmd_reg_cache = (u16)offset | ((BE3 | BE2) >> (offset & 0x02)); iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); return ioread16(ks->hw_addr); } @@ -181,7 +181,7 @@ static u16 ks_rdreg16(struct ks_net *ks, int offset) static void ks_wrreg16(struct ks_net *ks, int offset, u16 value) { - ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02)); + ks->cmd_reg_cache = (u16)offset | ((BE3 | BE2) >> (offset & 0x02)); iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); iowrite16(value, ks->hw_addr); } -- cgit v1.2.3-59-g8ed1b From 66256e0b15bd72e1e1c24c4cef4281a95636781c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 15 Feb 2020 11:42:37 -0800 Subject: net/sock.h: fix all kernel-doc warnings Fix all kernel-doc warnings for . Fixes these warnings: ../include/net/sock.h:232: warning: Function parameter or member 'skc_addrpair' not described in 'sock_common' ../include/net/sock.h:232: warning: Function parameter or member 'skc_portpair' not described in 'sock_common' ../include/net/sock.h:232: warning: Function parameter or member 'skc_ipv6only' not described in 'sock_common' ../include/net/sock.h:232: warning: Function parameter or member 'skc_net_refcnt' not described in 'sock_common' ../include/net/sock.h:232: warning: Function parameter or member 'skc_v6_daddr' not described in 'sock_common' ../include/net/sock.h:232: warning: Function parameter or member 'skc_v6_rcv_saddr' not described in 'sock_common' ../include/net/sock.h:232: warning: Function parameter or member 'skc_cookie' not described in 'sock_common' ../include/net/sock.h:232: warning: Function parameter or member 'skc_listener' not described in 'sock_common' ../include/net/sock.h:232: warning: Function parameter or member 'skc_tw_dr' not described in 'sock_common' ../include/net/sock.h:232: warning: Function parameter or member 'skc_rcv_wnd' not described in 'sock_common' ../include/net/sock.h:232: warning: Function parameter or member 'skc_tw_rcv_nxt' not described in 'sock_common' ../include/net/sock.h:498: warning: Function parameter or member 'sk_rx_skb_cache' not described in 'sock' ../include/net/sock.h:498: warning: Function parameter or member 'sk_wq_raw' not described in 'sock' ../include/net/sock.h:498: warning: Function parameter or member 'tcp_rtx_queue' not described in 'sock' ../include/net/sock.h:498: warning: Function parameter or member 'sk_tx_skb_cache' not described in 'sock' ../include/net/sock.h:498: warning: Function parameter or member 'sk_route_forced_caps' not described in 'sock' ../include/net/sock.h:498: warning: Function parameter or member 'sk_txtime_report_errors' not described in 'sock' ../include/net/sock.h:498: warning: Function parameter or member 'sk_validate_xmit_skb' not described in 'sock' ../include/net/sock.h:498: warning: Function parameter or member 'sk_bpf_storage' not described in 'sock' ../include/net/sock.h:2024: warning: No description found for return value of 'sk_wmem_alloc_get' ../include/net/sock.h:2035: warning: No description found for return value of 'sk_rmem_alloc_get' ../include/net/sock.h:2046: warning: No description found for return value of 'sk_has_allocations' ../include/net/sock.h:2082: warning: No description found for return value of 'skwq_has_sleeper' ../include/net/sock.h:2244: warning: No description found for return value of 'sk_page_frag' ../include/net/sock.h:2444: warning: Function parameter or member 'tcp_rx_skb_cache_key' not described in 'DECLARE_STATIC_KEY_FALSE' ../include/net/sock.h:2444: warning: Excess function parameter 'sk' description in 'DECLARE_STATIC_KEY_FALSE' ../include/net/sock.h:2444: warning: Excess function parameter 'skb' description in 'DECLARE_STATIC_KEY_FALSE' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/net/sock.h | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 02162b0378f7..328564525526 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -117,19 +117,26 @@ typedef __u64 __bitwise __addrpair; * struct sock_common - minimal network layer representation of sockets * @skc_daddr: Foreign IPv4 addr * @skc_rcv_saddr: Bound local IPv4 addr + * @skc_addrpair: 8-byte-aligned __u64 union of @skc_daddr & @skc_rcv_saddr * @skc_hash: hash value used with various protocol lookup tables * @skc_u16hashes: two u16 hash values used by UDP lookup tables * @skc_dport: placeholder for inet_dport/tw_dport * @skc_num: placeholder for inet_num/tw_num + * @skc_portpair: __u32 union of @skc_dport & @skc_num * @skc_family: network address family * @skc_state: Connection state * @skc_reuse: %SO_REUSEADDR setting * @skc_reuseport: %SO_REUSEPORT setting + * @skc_ipv6only: socket is IPV6 only + * @skc_net_refcnt: socket is using net ref counting * @skc_bound_dev_if: bound device index if != 0 * @skc_bind_node: bind hash linkage for various protocol lookup tables * @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol * @skc_prot: protocol handlers inside a network family * @skc_net: reference to the network namespace of this socket + * @skc_v6_daddr: IPV6 destination address + * @skc_v6_rcv_saddr: IPV6 source address + * @skc_cookie: socket's cookie value * @skc_node: main hash linkage for various protocol lookup tables * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol * @skc_tx_queue_mapping: tx queue number for this connection @@ -137,7 +144,15 @@ typedef __u64 __bitwise __addrpair; * @skc_flags: place holder for sk_flags * %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings + * @skc_listener: connection request listener socket (aka rsk_listener) + * [union with @skc_flags] + * @skc_tw_dr: (aka tw_dr) ptr to &struct inet_timewait_death_row + * [union with @skc_flags] * @skc_incoming_cpu: record/match cpu processing incoming packets + * @skc_rcv_wnd: (aka rsk_rcv_wnd) TCP receive window size (possibly scaled) + * [union with @skc_incoming_cpu] + * @skc_tw_rcv_nxt: (aka tw_rcv_nxt) TCP window next expected seq number + * [union with @skc_incoming_cpu] * @skc_refcnt: reference count * * This is the minimal network layer representation of sockets, the header @@ -245,6 +260,7 @@ struct bpf_sk_storage; * @sk_dst_cache: destination cache * @sk_dst_pending_confirm: need to confirm neighbour * @sk_policy: flow policy + * @sk_rx_skb_cache: cache copy of recently accessed RX skb * @sk_receive_queue: incoming packets * @sk_wmem_alloc: transmit queue bytes committed * @sk_tsq_flags: TCP Small Queues flags @@ -265,6 +281,8 @@ struct bpf_sk_storage; * @sk_no_check_rx: allow zero checksum in RX packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) * @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK) + * @sk_route_forced_caps: static, forced route capabilities + * (set in tcp_init_sock()) * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) * @sk_gso_max_size: Maximum GSO segment size to build * @sk_gso_max_segs: Maximum number of GSO segments @@ -303,6 +321,8 @@ struct bpf_sk_storage; * @sk_frag: cached page frag * @sk_peek_off: current peek_offset value * @sk_send_head: front of stuff to transmit + * @tcp_rtx_queue: TCP re-transmit queue [union with @sk_send_head] + * @sk_tx_skb_cache: cache copy of recently accessed TX skb * @sk_security: used by security modules * @sk_mark: generic packet mark * @sk_cgrp_data: cgroup data for this cgroup @@ -313,11 +333,14 @@ struct bpf_sk_storage; * @sk_write_space: callback to indicate there is bf sending space available * @sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE) * @sk_backlog_rcv: callback to process the backlog + * @sk_validate_xmit_skb: ptr to an optional validate function * @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0 * @sk_reuseport_cb: reuseport group container + * @sk_bpf_storage: ptr to cache and control for bpf_sk_storage * @sk_rcu: used during RCU grace period * @sk_clockid: clockid used by time-based scheduling (SO_TXTIME) * @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME + * @sk_txtime_report_errors: set report errors mode for SO_TXTIME * @sk_txtime_unused: unused txtime flags */ struct sock { @@ -393,7 +416,9 @@ struct sock { struct sk_filter __rcu *sk_filter; union { struct socket_wq __rcu *sk_wq; + /* private: */ struct socket_wq *sk_wq_raw; + /* public: */ }; #ifdef CONFIG_XFRM struct xfrm_policy __rcu *sk_policy[2]; @@ -2017,7 +2042,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro * sk_wmem_alloc_get - returns write allocations * @sk: socket * - * Returns sk_wmem_alloc minus initial offset of one + * Return: sk_wmem_alloc minus initial offset of one */ static inline int sk_wmem_alloc_get(const struct sock *sk) { @@ -2028,7 +2053,7 @@ static inline int sk_wmem_alloc_get(const struct sock *sk) * sk_rmem_alloc_get - returns read allocations * @sk: socket * - * Returns sk_rmem_alloc + * Return: sk_rmem_alloc */ static inline int sk_rmem_alloc_get(const struct sock *sk) { @@ -2039,7 +2064,7 @@ static inline int sk_rmem_alloc_get(const struct sock *sk) * sk_has_allocations - check if allocations are outstanding * @sk: socket * - * Returns true if socket has write or read allocations + * Return: true if socket has write or read allocations */ static inline bool sk_has_allocations(const struct sock *sk) { @@ -2050,7 +2075,7 @@ static inline bool sk_has_allocations(const struct sock *sk) * skwq_has_sleeper - check if there are any waiting processes * @wq: struct socket_wq * - * Returns true if socket_wq has waiting processes + * Return: true if socket_wq has waiting processes * * The purpose of the skwq_has_sleeper and sock_poll_wait is to wrap the memory * barrier call. They were added due to the race found within the tcp code. @@ -2238,6 +2263,9 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest * inside other socket operations and end up recursing into sk_page_frag() * while it's already in use. + * + * Return: a per task page_frag if context allows that, + * otherwise a per socket one. */ static inline struct page_frag *sk_page_frag(struct sock *sk) { @@ -2432,6 +2460,7 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) &skb_shinfo(skb)->tskey); } +DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key); /** * sk_eat_skb - Release a skb if it is no longer needed * @sk: socket to eat this skb from @@ -2440,7 +2469,6 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) * This routine must be called with interrupts disabled or with the socket * locked so that the sk_buff queue operation is ok. */ -DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key); static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb) { __skb_unlink(skb, &sk->sk_receive_queue); -- cgit v1.2.3-59-g8ed1b From 8955b4357d6fc98734b53855b76ee37014a7e492 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 15 Feb 2020 13:41:12 -0800 Subject: skbuff: remove stale bit mask comments Remove stale comments since this flag is no longer a bit mask but is a bit field. Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 864cb9e9622f..1365a556152c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -467,7 +467,6 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, return NULL; } - /* use OR instead of assignment to avoid clearing of bits in mask */ if (pfmemalloc) skb->pfmemalloc = 1; skb->head_frag = 1; @@ -527,7 +526,6 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, return NULL; } - /* use OR instead of assignment to avoid clearing of bits in mask */ if (nc->page.pfmemalloc) skb->pfmemalloc = 1; skb->head_frag = 1; -- cgit v1.2.3-59-g8ed1b From d2f273f0a9205257b91af1d3d461ee29688c2f24 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 15 Feb 2020 15:34:07 -0800 Subject: skbuff.h: fix all kernel-doc warnings Fix all kernel-doc warnings in . Fixes these warnings: ../include/linux/skbuff.h:890: warning: Function parameter or member 'list' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'dev_scratch' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'ip_defrag_offset' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'skb_mstamp_ns' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member '__cloned_offset' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'head_frag' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member '__pkt_type_offset' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'encapsulation' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'encap_hdr_csum' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'csum_valid' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member '__pkt_vlan_present_offset' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'vlan_present' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'csum_complete_sw' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'csum_level' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'inner_protocol_type' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'remcsum_offload' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'sender_cpu' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'reserved_tailroom' not described in 'sk_buff' ../include/linux/skbuff.h:890: warning: Function parameter or member 'inner_ipproto' not described in 'sk_buff' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/linux/skbuff.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ca8806b69388..5b50278c4bc8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -611,9 +611,15 @@ typedef unsigned char *sk_buff_data_t; * @next: Next buffer in list * @prev: Previous buffer in list * @tstamp: Time we arrived/left + * @skb_mstamp_ns: (aka @tstamp) earliest departure time; start point + * for retransmit timer * @rbnode: RB tree node, alternative to next/prev for netem/tcp + * @list: queue head * @sk: Socket we are owned by + * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in + * fragmentation management * @dev: Device we arrived on/are leaving by + * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL * @cb: Control buffer. Free for use by every layer. Put private vars here * @_skb_refdst: destination entry (with norefcount bit) * @sp: the security path, used for xfrm @@ -632,6 +638,9 @@ typedef unsigned char *sk_buff_data_t; * @pkt_type: Packet class * @fclone: skbuff clone status * @ipvs_property: skbuff is owned by ipvs + * @inner_protocol_type: whether the inner protocol is + * ENCAP_TYPE_ETHER or ENCAP_TYPE_IPPROTO + * @remcsum_offload: remote checksum offload is enabled * @offload_fwd_mark: Packet was L2-forwarded in hardware * @offload_l3_fwd_mark: Packet was L3-forwarded in hardware * @tc_skip_classify: do not classify packet. set by IFB device @@ -650,6 +659,8 @@ typedef unsigned char *sk_buff_data_t; * @tc_index: Traffic control index * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices + * @head_frag: skb was allocated from page fragments, + * not allocated by kmalloc() or vmalloc(). * @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves * @active_extensions: active extensions (skb_ext_id types) * @ndisc_nodetype: router type (from link layer) @@ -660,15 +671,28 @@ typedef unsigned char *sk_buff_data_t; * @wifi_acked_valid: wifi_acked was set * @wifi_acked: whether frame was acked on wifi or not * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS + * @encapsulation: indicates the inner headers in the skbuff are valid + * @encap_hdr_csum: software checksum is needed + * @csum_valid: checksum is already valid * @csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL + * @csum_complete_sw: checksum was completed by software + * @csum_level: indicates the number of consecutive checksums found in + * the packet minus one that have been verified as + * CHECKSUM_UNNECESSARY (max 3) * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB * @napi_id: id of the NAPI struct this skb came from + * @sender_cpu: (aka @napi_id) source CPU in XPS * @secmark: security marking * @mark: Generic packet mark + * @reserved_tailroom: (aka @mark) number of bytes of free space available + * at the tail of an sk_buff + * @vlan_present: VLAN tag is present * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information * @inner_protocol: Protocol (encapsulation) + * @inner_ipproto: (aka @inner_protocol) stores ipproto when + * skb->inner_protocol_type == ENCAP_TYPE_IPPROTO; * @inner_transport_header: Inner transport layer header (encapsulation) * @inner_network_header: Network layer header (encapsulation) * @inner_mac_header: Link layer header (encapsulation) @@ -750,7 +774,9 @@ struct sk_buff { #endif #define CLONED_OFFSET() offsetof(struct sk_buff, __cloned_offset) + /* private: */ __u8 __cloned_offset[0]; + /* public: */ __u8 cloned:1, nohdr:1, fclone:2, @@ -775,7 +801,9 @@ struct sk_buff { #endif #define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset) + /* private: */ __u8 __pkt_type_offset[0]; + /* public: */ __u8 pkt_type:3; __u8 ignore_df:1; __u8 nf_trace:1; @@ -798,7 +826,9 @@ struct sk_buff { #define PKT_VLAN_PRESENT_BIT 0 #endif #define PKT_VLAN_PRESENT_OFFSET() offsetof(struct sk_buff, __pkt_vlan_present_offset) + /* private: */ __u8 __pkt_vlan_present_offset[0]; + /* public: */ __u8 vlan_present:1; __u8 csum_complete_sw:1; __u8 csum_level:2; -- cgit v1.2.3-59-g8ed1b From 9a6a0dea16177ccaecc116f560232e63bec115f1 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sun, 16 Feb 2020 16:39:43 -0300 Subject: net: ethernet: dm9000: Handle -EPROBE_DEFER in dm9000_parse_dt() The call to of_get_mac_address() can return -EPROBE_DEFER, for instance when the MAC address is read from a NVMEM driver that did not probe yet. Cc: H. Nikolaus Schaller Cc: Mathieu Malaterre Signed-off-by: Paul Cercueil Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/davicom/dm9000.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index 1ea3372775e6..e94ae9b94dbf 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1405,6 +1405,8 @@ static struct dm9000_plat_data *dm9000_parse_dt(struct device *dev) mac_addr = of_get_mac_address(np); if (!IS_ERR(mac_addr)) ether_addr_copy(pdata->dev_addr, mac_addr); + else if (PTR_ERR(mac_addr) == -EPROBE_DEFER) + return ERR_CAST(mac_addr); return pdata; } -- cgit v1.2.3-59-g8ed1b From 072663f86d62571fe540d9e1d24eb873a1b1182f Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 15 Jan 2020 06:34:22 +1000 Subject: drm/nouveau/acr/tu11x: initial support Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 2 ++ drivers/gpu/drm/nouveau/nvkm/subdev/acr/tu102.c | 14 ++++++++++++++ drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c | 2 ++ 3 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index c7d700916eae..4fe9b38db459 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2579,6 +2579,7 @@ nv166_chipset = { static const struct nvkm_device_chip nv167_chipset = { .name = "TU117", + .acr = tu102_acr_new, .bar = tu102_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, @@ -2615,6 +2616,7 @@ nv167_chipset = { static const struct nvkm_device_chip nv168_chipset = { .name = "TU116", + .acr = tu102_acr_new, .bar = tu102_bar_new, .bios = nvkm_bios_new, .bus = gf100_bus_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/tu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/tu102.c index 7f4b89d82d32..d28d8f36ae24 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/tu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/tu102.c @@ -107,6 +107,12 @@ MODULE_FIRMWARE("nvidia/tu104/acr/ucode_unload.bin"); MODULE_FIRMWARE("nvidia/tu106/acr/unload_bl.bin"); MODULE_FIRMWARE("nvidia/tu106/acr/ucode_unload.bin"); +MODULE_FIRMWARE("nvidia/tu116/acr/unload_bl.bin"); +MODULE_FIRMWARE("nvidia/tu116/acr/ucode_unload.bin"); + +MODULE_FIRMWARE("nvidia/tu117/acr/unload_bl.bin"); +MODULE_FIRMWARE("nvidia/tu117/acr/ucode_unload.bin"); + static const struct nvkm_acr_hsf_fwif tu102_acr_unload_fwif[] = { { 0, nvkm_acr_hsfw_load, &gp108_acr_unload_0 }, @@ -130,6 +136,8 @@ tu102_acr_asb_0 = { MODULE_FIRMWARE("nvidia/tu102/acr/ucode_asb.bin"); MODULE_FIRMWARE("nvidia/tu104/acr/ucode_asb.bin"); MODULE_FIRMWARE("nvidia/tu106/acr/ucode_asb.bin"); +MODULE_FIRMWARE("nvidia/tu116/acr/ucode_asb.bin"); +MODULE_FIRMWARE("nvidia/tu117/acr/ucode_asb.bin"); static const struct nvkm_acr_hsf_fwif tu102_acr_asb_fwif[] = { @@ -154,6 +162,12 @@ MODULE_FIRMWARE("nvidia/tu104/acr/ucode_ahesasc.bin"); MODULE_FIRMWARE("nvidia/tu106/acr/bl.bin"); MODULE_FIRMWARE("nvidia/tu106/acr/ucode_ahesasc.bin"); +MODULE_FIRMWARE("nvidia/tu116/acr/bl.bin"); +MODULE_FIRMWARE("nvidia/tu116/acr/ucode_ahesasc.bin"); + +MODULE_FIRMWARE("nvidia/tu117/acr/bl.bin"); +MODULE_FIRMWARE("nvidia/tu117/acr/ucode_ahesasc.bin"); + static const struct nvkm_acr_hsf_fwif tu102_acr_ahesasc_fwif[] = { { 0, nvkm_acr_hsfw_load, &tu102_acr_ahesasc_0 }, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c index 389bad312bf2..10ff5d053f7e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c @@ -51,3 +51,5 @@ MODULE_FIRMWARE("nvidia/gv100/nvdec/scrubber.bin"); MODULE_FIRMWARE("nvidia/tu102/nvdec/scrubber.bin"); MODULE_FIRMWARE("nvidia/tu104/nvdec/scrubber.bin"); MODULE_FIRMWARE("nvidia/tu106/nvdec/scrubber.bin"); +MODULE_FIRMWARE("nvidia/tu116/nvdec/scrubber.bin"); +MODULE_FIRMWARE("nvidia/tu117/nvdec/scrubber.bin"); -- cgit v1.2.3-59-g8ed1b From b99ef12b80cfe48a14e7918c2f799c37d2195aca Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 15 Jan 2020 06:34:22 +1000 Subject: drm/nouveau/gr/tu11x: initial support Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 2 ++ drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c | 26 +++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 4fe9b38db459..8ebbe1656008 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2608,6 +2608,7 @@ nv167_chipset = { .disp = tu102_disp_new, .dma = gv100_dma_new, .fifo = tu102_fifo_new, + .gr = tu102_gr_new, .nvdec[0] = gm107_nvdec_new, .nvenc[0] = gm107_nvenc_new, .sec2 = tu102_sec2_new, @@ -2645,6 +2646,7 @@ nv168_chipset = { .disp = tu102_disp_new, .dma = gv100_dma_new, .fifo = tu102_fifo_new, + .gr = tu102_gr_new, .nvdec[0] = gm107_nvdec_new, .nvenc[0] = gm107_nvenc_new, .sec2 = tu102_sec2_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c index 454668b1cf54..a9efa4d78be9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c @@ -164,6 +164,32 @@ MODULE_FIRMWARE("nvidia/tu106/gr/sw_nonctx.bin"); MODULE_FIRMWARE("nvidia/tu106/gr/sw_bundle_init.bin"); MODULE_FIRMWARE("nvidia/tu106/gr/sw_method_init.bin"); +MODULE_FIRMWARE("nvidia/tu117/gr/fecs_bl.bin"); +MODULE_FIRMWARE("nvidia/tu117/gr/fecs_inst.bin"); +MODULE_FIRMWARE("nvidia/tu117/gr/fecs_data.bin"); +MODULE_FIRMWARE("nvidia/tu117/gr/fecs_sig.bin"); +MODULE_FIRMWARE("nvidia/tu117/gr/gpccs_bl.bin"); +MODULE_FIRMWARE("nvidia/tu117/gr/gpccs_inst.bin"); +MODULE_FIRMWARE("nvidia/tu117/gr/gpccs_data.bin"); +MODULE_FIRMWARE("nvidia/tu117/gr/gpccs_sig.bin"); +MODULE_FIRMWARE("nvidia/tu117/gr/sw_ctx.bin"); +MODULE_FIRMWARE("nvidia/tu117/gr/sw_nonctx.bin"); +MODULE_FIRMWARE("nvidia/tu117/gr/sw_bundle_init.bin"); +MODULE_FIRMWARE("nvidia/tu117/gr/sw_method_init.bin"); + +MODULE_FIRMWARE("nvidia/tu116/gr/fecs_bl.bin"); +MODULE_FIRMWARE("nvidia/tu116/gr/fecs_inst.bin"); +MODULE_FIRMWARE("nvidia/tu116/gr/fecs_data.bin"); +MODULE_FIRMWARE("nvidia/tu116/gr/fecs_sig.bin"); +MODULE_FIRMWARE("nvidia/tu116/gr/gpccs_bl.bin"); +MODULE_FIRMWARE("nvidia/tu116/gr/gpccs_inst.bin"); +MODULE_FIRMWARE("nvidia/tu116/gr/gpccs_data.bin"); +MODULE_FIRMWARE("nvidia/tu116/gr/gpccs_sig.bin"); +MODULE_FIRMWARE("nvidia/tu116/gr/sw_ctx.bin"); +MODULE_FIRMWARE("nvidia/tu116/gr/sw_nonctx.bin"); +MODULE_FIRMWARE("nvidia/tu116/gr/sw_bundle_init.bin"); +MODULE_FIRMWARE("nvidia/tu116/gr/sw_method_init.bin"); + static const struct gf100_gr_fwif tu102_gr_fwif[] = { { 0, gm200_gr_load, &tu102_gr, &gp108_gr_fecs_acr, &gp108_gr_gpccs_acr }, -- cgit v1.2.3-59-g8ed1b From f287d3d19769b1d22cba4e51fa0487f2697713c9 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Wed, 12 Feb 2020 18:11:49 -0500 Subject: drm/nouveau/kms/gv100-: Re-set LUT after clearing for modesets While certain modeset operations on gv100+ need us to temporarily disable the LUT, we make the mistake of sometimes neglecting to reprogram the LUT after such modesets. In particular, moving a head from one encoder to another seems to trigger this quite often. GV100+ is very picky about having a LUT in most scenarios, so this causes the display engine to hang with the following error code: disp: chid 1 stat 00005080 reason 5 [INVALID_STATE] mthd 0200 data 00000001 code 0000002d) So, fix this by always re-programming the LUT if we're clearing it in a state where the wndw is still visible, and has a XLUT handle programmed. Signed-off-by: Lyude Paul Fixes: facaed62b4cb ("drm/nouveau/kms/gv100: initial support") Cc: # v4.18+ Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/wndw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index 890315291b01..bb737f9281e6 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -458,6 +458,8 @@ nv50_wndw_atomic_check(struct drm_plane *plane, struct drm_plane_state *state) asyw->clr.ntfy = armw->ntfy.handle != 0; asyw->clr.sema = armw->sema.handle != 0; asyw->clr.xlut = armw->xlut.handle != 0; + if (asyw->clr.xlut && asyw->visible) + asyw->set.xlut = asyw->xlut.handle != 0; asyw->clr.csc = armw->csc.valid; if (wndw->func->image_clr) asyw->clr.image = armw->image.handle[0] != 0; -- cgit v1.2.3-59-g8ed1b From bab5417f5f0118ce914bc5b2f8381e959e891155 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 14 Feb 2020 08:11:48 -0800 Subject: USB: misc: iowarrior: add support for the 100 device Add a new device id for the 100 devie. It has 4 interfaces like the 28 and 28L devices but a larger endpoint so more I/O pins. Cc: Christoph Jung Cc: stable Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20200214161148.GA3963518@kroah.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index d20b60acfe8a..dce20301e367 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -36,6 +36,7 @@ /* fuller speed iowarrior */ #define USB_DEVICE_ID_CODEMERCS_IOW28 0x1504 #define USB_DEVICE_ID_CODEMERCS_IOW28L 0x1505 +#define USB_DEVICE_ID_CODEMERCS_IOW100 0x1506 /* OEMed devices */ #define USB_DEVICE_ID_CODEMERCS_IOW24SAG 0x158a @@ -144,6 +145,7 @@ static const struct usb_device_id iowarrior_ids[] = { {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56AM)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28L)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW100)}, {} /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, iowarrior_ids); @@ -386,6 +388,7 @@ static ssize_t iowarrior_write(struct file *file, case USB_DEVICE_ID_CODEMERCS_IOW56AM: case USB_DEVICE_ID_CODEMERCS_IOW28: case USB_DEVICE_ID_CODEMERCS_IOW28L: + case USB_DEVICE_ID_CODEMERCS_IOW100: /* The IOW56 uses asynchronous IO and more urbs */ if (atomic_read(&dev->write_busy) == MAX_WRITES_IN_FLIGHT) { /* Wait until we are below the limit for submitted urbs */ @@ -786,7 +789,8 @@ static int iowarrior_probe(struct usb_interface *interface, if ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) || (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) || - (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L)) { + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW100)) { res = usb_find_last_int_out_endpoint(iface_desc, &dev->int_out_endpoint); if (res) { @@ -802,7 +806,8 @@ static int iowarrior_probe(struct usb_interface *interface, ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) || (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) || - (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L))) + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW100))) /* IOWarrior56 has wMaxPacketSize different from report size */ dev->report_size = 7; -- cgit v1.2.3-59-g8ed1b From 6a757c07e51f80ac34325fcd558490d2d1439e1b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 3 Feb 2020 17:37:07 +0100 Subject: netfilter: conntrack: allow insertion of clashing entries This patch further relaxes the need to drop an skb due to a clash with an existing conntrack entry. Current clash resolution handles the case where the clash occurs between two identical entries (distinct nf_conn objects with same tuples), i.e.: Original Reply existing: 10.2.3.4:42 -> 10.8.8.8:53 10.2.3.4:42 <- 10.0.0.6:5353 clashing: 10.2.3.4:42 -> 10.8.8.8:53 10.2.3.4:42 <- 10.0.0.6:5353 ... existing handling will discard the unconfirmed clashing entry and makes skb->_nfct point to the existing one. The skb can then be processed normally just as if the clash would not have existed in the first place. For other clashes, the skb needs to be dropped. This frequently happens with DNS resolvers that send A and AAAA queries back-to-back when NAT rules are present that cause packets to get different DNAT transformations applied, for example: -m statistics --mode random ... -j DNAT --dnat-to 10.0.0.6:5353 -m statistics --mode random ... -j DNAT --dnat-to 10.0.0.7:5353 In this case the A or AAAA query is dropped which incurs a costly delay during name resolution. This patch also allows this collision type: Original Reply existing: 10.2.3.4:42 -> 10.8.8.8:53 10.2.3.4:42 <- 10.0.0.6:5353 clashing: 10.2.3.4:42 -> 10.8.8.8:53 10.2.3.4:42 <- 10.0.0.7:5353 In this case, clash is in original direction -- the reply direction is still unique. The change makes it so that when the 2nd colliding packet is received, the clashing conntrack is tagged with new IPS_NAT_CLASH_BIT, gets a fixed 1 second timeout and is inserted in the reply direction only. The entry is hidden from 'conntrack -L', it will time out quickly and it can be early dropped because it will never progress to the ASSURED state. To avoid special-casing the delete code path to special case the ORIGINAL hlist_nulls node, a new helper, "hlist_nulls_add_fake", is added so hlist_nulls_del() will work. Example: CPU A: CPU B: 1. 10.2.3.4:42 -> 10.8.8.8:53 (A) 2. 10.2.3.4:42 -> 10.8.8.8:53 (AAAA) 3. Apply DNAT, reply changed to 10.0.0.6 4. 10.2.3.4:42 -> 10.8.8.8:53 (AAAA) 5. Apply DNAT, reply changed to 10.0.0.7 6. confirm/commit to conntrack table, no collisions 7. commit clashing entry Reply comes in: 10.2.3.4:42 <- 10.0.0.6:5353 (A) -> Finds a conntrack, DNAT is reversed & packet forwarded to 10.2.3.4:42 10.2.3.4:42 <- 10.0.0.7:5353 (AAAA) -> Finds a conntrack, DNAT is reversed & packet forwarded to 10.2.3.4:42 The conntrack entry is deleted from table, as it has the NAT_CLASH bit set. In case of a retransmit from ORIGINAL dir, all further packets will get the DNAT transformation to 10.0.0.6. I tried to come up with other solutions but they all have worse problems. Alternatives considered were: 1. Confirm ct entries at allocation time, not in postrouting. a. will cause uneccesarry work when the skb that creates the conntrack is dropped by ruleset. b. in case nat is applied, ct entry would need to be moved in the table, which requires another spinlock pair to be taken. c. breaks the 'unconfirmed entry is private to cpu' assumption: we would need to guard all nfct->ext allocation requests with ct->lock spinlock. 2. Make the unconfirmed list a hash table instead of a pcpu list. Shares drawback c) of the first alternative. 3. Document this is expected and force users to rearrange their ruleset (e.g. by using "-m cluster" instead of "-m statistics"). nft has the 'jhash' expression which can be used instead of 'numgen'. Major drawback: doesn't fix what I consider a bug, not very realistic and I believe its reasonable to have the existing rulesets to 'just work'. 4. Document this is expected and force users to steer problematic packets to the same CPU -- this would serialize the "allocate new conntrack entry/nat table evaluation/perform nat/confirm entry", so no race can occur. Similar drawback to 3. Another advantage of this patch compared to 1) and 2) is that there are no changes to the hot path; things are handled in the udp tracker and the clash resolution path. Cc: rcu@vger.kernel.org Cc: "Paul E. McKenney" Cc: Josh Triplett Cc: Jozsef Kadlecsik Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/rculist_nulls.h | 7 ++ include/uapi/linux/netfilter/nf_conntrack_common.h | 12 +++- net/netfilter/nf_conntrack_core.c | 76 +++++++++++++++++++++- net/netfilter/nf_conntrack_proto_udp.c | 20 ++++-- 4 files changed, 108 insertions(+), 7 deletions(-) diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index e5b752027a03..9670b54b484a 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -145,6 +145,13 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, } } +/* after that hlist_nulls_del will work */ +static inline void hlist_nulls_add_fake(struct hlist_nulls_node *n) +{ + n->pprev = &n->next; + n->next = (struct hlist_nulls_node *)NULLS_MARKER(NULL); +} + /** * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type * @tpos: the type * to use as a loop cursor. diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 336014bf8868..b6f0bb1dc799 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -97,6 +97,15 @@ enum ip_conntrack_status { IPS_UNTRACKED_BIT = 12, IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT), +#ifdef __KERNEL__ + /* Re-purposed for in-kernel use: + * Tags a conntrack entry that clashed with an existing entry + * on insert. + */ + IPS_NAT_CLASH_BIT = IPS_UNTRACKED_BIT, + IPS_NAT_CLASH = IPS_UNTRACKED, +#endif + /* Conntrack got a helper explicitly attached via CT target. */ IPS_HELPER_BIT = 13, IPS_HELPER = (1 << IPS_HELPER_BIT), @@ -110,7 +119,8 @@ enum ip_conntrack_status { */ IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK | IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING | - IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD), + IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_UNTRACKED | + IPS_OFFLOAD), __IPS_MAX_BIT = 15, }; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 3f069eb0f0fc..1927fc296f95 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -940,11 +940,71 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb, return NF_DROP; } +/** + * nf_ct_resolve_clash_harder - attempt to insert clashing conntrack entry + * + * @skb: skb that causes the collision + * @repl_idx: hash slot for reply direction + * + * Called when origin or reply direction had a clash. + * The skb can be handled without packet drop provided the reply direction + * is unique or there the existing entry has the identical tuple in both + * directions. + * + * Caller must hold conntrack table locks to prevent concurrent updates. + * + * Returns NF_DROP if the clash could not be handled. + */ +static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx) +{ + struct nf_conn *loser_ct = (struct nf_conn *)skb_nfct(skb); + const struct nf_conntrack_zone *zone; + struct nf_conntrack_tuple_hash *h; + struct hlist_nulls_node *n; + struct net *net; + + zone = nf_ct_zone(loser_ct); + net = nf_ct_net(loser_ct); + + /* Reply direction must never result in a clash, unless both origin + * and reply tuples are identical. + */ + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[repl_idx], hnnode) { + if (nf_ct_key_equal(h, + &loser_ct->tuplehash[IP_CT_DIR_REPLY].tuple, + zone, net)) + return __nf_ct_resolve_clash(skb, h); + } + + /* We want the clashing entry to go away real soon: 1 second timeout. */ + loser_ct->timeout = nfct_time_stamp + HZ; + + /* IPS_NAT_CLASH removes the entry automatically on the first + * reply. Also prevents UDP tracker from moving the entry to + * ASSURED state, i.e. the entry can always be evicted under + * pressure. + */ + loser_ct->status |= IPS_FIXED_TIMEOUT | IPS_NAT_CLASH; + + __nf_conntrack_insert_prepare(loser_ct); + + /* fake add for ORIGINAL dir: we want lookups to only find the entry + * already in the table. This also hides the clashing entry from + * ctnetlink iteration, i.e. conntrack -L won't show them. + */ + hlist_nulls_add_fake(&loser_ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); + + hlist_nulls_add_head_rcu(&loser_ct->tuplehash[IP_CT_DIR_REPLY].hnnode, + &nf_conntrack_hash[repl_idx]); + return NF_ACCEPT; +} + /** * nf_ct_resolve_clash - attempt to handle clash without packet drop * * @skb: skb that causes the clash * @h: tuplehash of the clashing entry already in table + * @hash_reply: hash slot for reply direction * * A conntrack entry can be inserted to the connection tracking table * if there is no existing entry with an identical tuple. @@ -963,10 +1023,18 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb, * exactly the same, only the to-be-confirmed conntrack entry is discarded * and @skb is associated with the conntrack entry already in the table. * + * Failing that, the new, unconfirmed conntrack is still added to the table + * provided that the collision only occurs in the ORIGINAL direction. + * The new entry will be added after the existing one in the hash list, + * so packets in the ORIGINAL direction will continue to match the existing + * entry. The new entry will also have a fixed timeout so it expires -- + * due to the collision, it will not see bidirectional traffic. + * * Returns NF_DROP if the clash could not be resolved. */ static __cold noinline int -nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h) +nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h, + u32 reply_hash) { /* This is the conntrack entry already in hashes that won race. */ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); @@ -987,6 +1055,10 @@ nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h) if (ret == NF_ACCEPT) return ret; + ret = nf_ct_resolve_clash_harder(skb, reply_hash); + if (ret == NF_ACCEPT) + return ret; + drop: nf_ct_add_to_dying_list(loser_ct); NF_CT_STAT_INC(net, drop); @@ -1101,7 +1173,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) return NF_ACCEPT; out: - ret = nf_ct_resolve_clash(skb, h); + ret = nf_ct_resolve_clash(skb, h, reply_hash); dying: nf_conntrack_double_unlock(hash, reply_hash); local_bh_enable(); diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 7365b43f8f98..760ca2422816 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -81,6 +81,18 @@ static bool udp_error(struct sk_buff *skb, return false; } +static void nf_conntrack_udp_refresh_unreplied(struct nf_conn *ct, + struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + u32 extra_jiffies) +{ + if (unlikely(ctinfo == IP_CT_ESTABLISHED_REPLY && + ct->status & IPS_NAT_CLASH)) + nf_ct_kill(ct); + else + nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies); +} + /* Returns verdict for packet, and may modify conntracktype */ int nf_conntrack_udp_packet(struct nf_conn *ct, struct sk_buff *skb, @@ -116,8 +128,8 @@ int nf_conntrack_udp_packet(struct nf_conn *ct, if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_ASSURED, ct); } else { - nf_ct_refresh_acct(ct, ctinfo, skb, - timeouts[UDP_CT_UNREPLIED]); + nf_conntrack_udp_refresh_unreplied(ct, skb, ctinfo, + timeouts[UDP_CT_UNREPLIED]); } return NF_ACCEPT; } @@ -198,8 +210,8 @@ int nf_conntrack_udplite_packet(struct nf_conn *ct, if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_ASSURED, ct); } else { - nf_ct_refresh_acct(ct, ctinfo, skb, - timeouts[UDP_CT_UNREPLIED]); + nf_conntrack_udp_refresh_unreplied(ct, skb, ctinfo, + timeouts[UDP_CT_UNREPLIED]); } return NF_ACCEPT; } -- cgit v1.2.3-59-g8ed1b From 5eee7c625d414fb62985439ed58ab755d8988c76 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 3 Feb 2020 08:10:29 -0800 Subject: watchdog: fix mtk_wdt.c RESET_CONTROLLER build error Fix build error when CONFIG_RESET_CONTROLLER is not set by selecting RESET_CONTROLLER. ld: drivers/watchdog/mtk_wdt.o: in function `mtk_wdt_probe': mtk_wdt.c:(.text+0x3ec): undefined reference to `devm_reset_controller_register' Signed-off-by: Randy Dunlap Fixes: c254e103082b74e ("watchdog: mtk_wdt: mt8183: Add reset controller") Cc: Wim Van Sebroeck Cc: Guenter Roeck Cc: Matthias Brugger Cc: linux-watchdog@vger.kernel.org Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/77c1e557-4941-3806-2933-6c3583576390@infradead.org Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index cec868f8db3f..c3c8e0786a99 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -841,6 +841,7 @@ config MEDIATEK_WATCHDOG tristate "Mediatek SoCs watchdog support" depends on ARCH_MEDIATEK || COMPILE_TEST select WATCHDOG_CORE + select RESET_CONTROLLER help Say Y here to include support for the watchdog timer in Mediatek SoCs. -- cgit v1.2.3-59-g8ed1b From d970a325561da5e611596cbb06475db3755ce823 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 13 Feb 2020 18:22:55 +0100 Subject: KVM: x86: fix missing prototypes Reported with "make W=1" due to -Wmissing-prototypes. Reported-by: Qian Cai Reviewed-by: Miaohe Lin Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e89eb67356cb..7944ad6ac10b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -889,6 +889,8 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu); +int kvm_arch_post_init_vm(struct kvm *kvm); +void kvm_arch_pre_destroy_vm(struct kvm *kvm); #ifndef __KVM_HAVE_ARCH_VM_ALLOC /* -- cgit v1.2.3-59-g8ed1b From 463bfeeead97416ad2b141421f51888054dc0e18 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Fri, 14 Feb 2020 10:44:05 +0800 Subject: KVM: nVMX: Fix some obsolete comments and grammar error Fix wrong variable names and grammar error in comment. Signed-off-by: Miaohe Lin Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 3589cd3c0fcc..a5757b0b80f9 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3161,10 +3161,10 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, * or KVM_SET_NESTED_STATE). Otherwise it's called from vmlaunch/vmresume. * * Returns: - * NVMX_ENTRY_SUCCESS: Entered VMX non-root mode - * NVMX_ENTRY_VMFAIL: Consistency check VMFail - * NVMX_ENTRY_VMEXIT: Consistency check VMExit - * NVMX_ENTRY_KVM_INTERNAL_ERROR: KVM internal error + * NVMX_VMENTRY_SUCCESS: Entered VMX non-root mode + * NVMX_VMENTRY_VMFAIL: Consistency check VMFail + * NVMX_VMENTRY_VMEXIT: Consistency check VMExit + * NVMX_VMENTRY_KVM_INTERNAL_ERROR: KVM internal error */ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) @@ -5330,7 +5330,7 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, } /* - * Return 1 if we should exit from L2 to L1 to handle an MSR access access, + * Return 1 if we should exit from L2 to L1 to handle an MSR access, * rather than handle it ourselves in L0. I.e., check whether L1 expressed * disinterest in the current event (read or write a specific MSR) by using an * MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps. -- cgit v1.2.3-59-g8ed1b From e9a0e65eda3f78d0b04ec6136c591c000cbc3b76 Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Mon, 20 Jan 2020 10:17:29 +0100 Subject: watchdog: da9062: do not ping the hw during stop() The da9062 hw has a minimum ping cool down phase of at least 200ms. The driver takes that into account by setting the min_hw_heartbeat_ms to 300ms and the core guarantees that the hw limit is observed for the ping() calls. But the core can't guarantee the required minimum ping cool down phase if a stop() command is send immediately after the ping() command. So it is not allowed to ping the watchdog within the stop() command as the driver does. Remove the ping can be done without doubts because the watchdog gets disabled anyway and a (re)start resets the watchdog counter too. Signed-off-by: Marco Felsch Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200120091729.16256-1-m.felsch@pengutronix.de [groeck: Updated description] Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/da9062_wdt.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/watchdog/da9062_wdt.c b/drivers/watchdog/da9062_wdt.c index 47eefe072b40..777d7eec7f2e 100644 --- a/drivers/watchdog/da9062_wdt.c +++ b/drivers/watchdog/da9062_wdt.c @@ -95,13 +95,6 @@ static int da9062_wdt_stop(struct watchdog_device *wdd) struct da9062_watchdog *wdt = watchdog_get_drvdata(wdd); int ret; - ret = da9062_reset_watchdog_timer(wdt); - if (ret) { - dev_err(wdt->hw->dev, "Failed to ping the watchdog (err = %d)\n", - ret); - return ret; - } - ret = regmap_update_bits(wdt->hw->regmap, DA9062AA_CONTROL_D, DA9062AA_TWDSCALE_MASK, -- cgit v1.2.3-59-g8ed1b From 8541673d2a5f2faccff345e35991e2f9887779ea Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Fri, 7 Feb 2020 08:15:18 +0100 Subject: watchdog: da9062: fix power management ops This fixes commit f6c98b08381c ("watchdog: da9062: add power management ops"). During discussion [1] we agreed that this should be configurable because it is a device quirk if we can't use the hw watchdog auto suspend function. [1] https://lore.kernel.org/linux-watchdog/20191128171931.22563-1-m.felsch@pengutronix.de/ Signed-off-by: Marco Felsch Fixes: f6c98b08381c ("watchdog: da9062: add power management ops") Reviewed-by: Guenter Roeck Reviewed-by: Adam Thomson Link: https://lore.kernel.org/r/20200207071518.5559-1-m.felsch@pengutronix.de Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/da9062_wdt.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/watchdog/da9062_wdt.c b/drivers/watchdog/da9062_wdt.c index 777d7eec7f2e..0ad15d55071c 100644 --- a/drivers/watchdog/da9062_wdt.c +++ b/drivers/watchdog/da9062_wdt.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -31,6 +32,7 @@ static const unsigned int wdt_timeout[] = { 0, 2, 4, 8, 16, 32, 65, 131 }; struct da9062_watchdog { struct da9062 *hw; struct watchdog_device wdtdev; + bool use_sw_pm; }; static unsigned int da9062_wdt_timeout_to_sel(unsigned int secs) @@ -193,6 +195,8 @@ static int da9062_wdt_probe(struct platform_device *pdev) if (!wdt) return -ENOMEM; + wdt->use_sw_pm = device_property_present(dev, "dlg,use-sw-pm"); + wdt->hw = chip; wdt->wdtdev.info = &da9062_watchdog_info; @@ -219,6 +223,10 @@ static int da9062_wdt_probe(struct platform_device *pdev) static int __maybe_unused da9062_wdt_suspend(struct device *dev) { struct watchdog_device *wdd = dev_get_drvdata(dev); + struct da9062_watchdog *wdt = watchdog_get_drvdata(wdd); + + if (!wdt->use_sw_pm) + return 0; if (watchdog_active(wdd)) return da9062_wdt_stop(wdd); @@ -229,6 +237,10 @@ static int __maybe_unused da9062_wdt_suspend(struct device *dev) static int __maybe_unused da9062_wdt_resume(struct device *dev) { struct watchdog_device *wdd = dev_get_drvdata(dev); + struct da9062_watchdog *wdt = watchdog_get_drvdata(wdd); + + if (!wdt->use_sw_pm) + return 0; if (watchdog_active(wdd)) return da9062_wdt_start(wdd); -- cgit v1.2.3-59-g8ed1b From 44144c809e39d64ff9931c7e8956c42b2baa89e6 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 8 Feb 2020 05:08:03 -0800 Subject: watchdog: da9062: Add dependency on I2C Since commit 057b52b4b3d58 ("watchdog: da9062: make restart handler atomic safe"), the driver calls i2c functions directly. It now therefore depends on I2C. This is a hard dependency which overrides COMPILE_TEST. Reported-by: kbuild test robot Reported-by: Randy Dunlap Fixes: 057b52b4b3d58 ("watchdog: da9062: make restart handler atomic safe") Cc: Marco Felsch Cc: Adam Thomson Cc: Stefan Lengfeld Reviewed-by: Marco Felsch Acked-by: Randy Dunlap Acked-by: Geert Uytterhoeven Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index c3c8e0786a99..9ea2b43d4b01 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -207,6 +207,7 @@ config DA9063_WATCHDOG config DA9062_WATCHDOG tristate "Dialog DA9062/61 Watchdog" depends on MFD_DA9062 || COMPILE_TEST + depends on I2C select WATCHDOG_CORE help Support for the watchdog in the DA9062 and DA9061 PMICs. -- cgit v1.2.3-59-g8ed1b From 4aadf4b49ec7d80c5db842ca28479d07108c9484 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Mon, 17 Feb 2020 11:16:52 +0800 Subject: ASoC: hdmi-codec: set plugged_cb to NULL when component removing Sets plugged_cb to NULL when component removing to notify its consumers : no further plugged status report is required. Signed-off-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20200217105513.1.Icc323daaf71ad02f191fd8d91136b01b61eca5e3@changeid Signed-off-by: Mark Brown --- sound/soc/codecs/hdmi-codec.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c index 543363102d03..bc2903d27e6e 100644 --- a/sound/soc/codecs/hdmi-codec.c +++ b/sound/soc/codecs/hdmi-codec.c @@ -779,7 +779,17 @@ static int hdmi_of_xlate_dai_id(struct snd_soc_component *component, return ret; } +static void hdmi_remove(struct snd_soc_component *component) +{ + struct hdmi_codec_priv *hcp = snd_soc_component_get_drvdata(component); + + if (hcp->hcd.ops->hook_plugged_cb) + hcp->hcd.ops->hook_plugged_cb(component->dev->parent, + hcp->hcd.data, NULL, NULL); +} + static const struct snd_soc_component_driver hdmi_driver = { + .remove = hdmi_remove, .dapm_widgets = hdmi_widgets, .num_dapm_widgets = ARRAY_SIZE(hdmi_widgets), .of_xlate_dai_id = hdmi_of_xlate_dai_id, -- cgit v1.2.3-59-g8ed1b From 3bc7b6c15fffdf3f818df31198c8c040ad8f7ea9 Mon Sep 17 00:00:00 2001 From: Ravulapati Vishnu vardhan rao Date: Mon, 17 Feb 2020 16:09:19 +0530 Subject: ASoC: amd: ACP needs to be powered off in BIOS. Removed this logic because It is BIOS which needs to power off the ACP power domian through ACP_PGFSM_CTRL register when you De-initialize ACP Engine. Signed-off-by: Ravulapati Vishnu vardhan rao Link: https://lore.kernel.org/r/1581935964-15059-1-git-send-email-Vishnuvardhanrao.Ravulapati@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/raven/pci-acp3x.c | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/sound/soc/amd/raven/pci-acp3x.c b/sound/soc/amd/raven/pci-acp3x.c index 65330bb50e74..da60e2ec5535 100644 --- a/sound/soc/amd/raven/pci-acp3x.c +++ b/sound/soc/amd/raven/pci-acp3x.c @@ -45,23 +45,6 @@ static int acp3x_power_on(void __iomem *acp3x_base) return -ETIMEDOUT; } -static int acp3x_power_off(void __iomem *acp3x_base) -{ - u32 val; - int timeout; - - rv_writel(ACP_PGFSM_CNTL_POWER_OFF_MASK, - acp3x_base + mmACP_PGFSM_CONTROL); - timeout = 0; - while (++timeout < 500) { - val = rv_readl(acp3x_base + mmACP_PGFSM_STATUS); - if ((val & ACP_PGFSM_STATUS_MASK) == ACP_POWERED_OFF) - return 0; - udelay(1); - } - return -ETIMEDOUT; -} - static int acp3x_reset(void __iomem *acp3x_base) { u32 val; @@ -115,12 +98,6 @@ static int acp3x_deinit(void __iomem *acp3x_base) pr_err("ACP3x reset failed\n"); return ret; } - /* power off */ - ret = acp3x_power_off(acp3x_base); - if (ret) { - pr_err("ACP3x power off failed\n"); - return ret; - } return 0; } -- cgit v1.2.3-59-g8ed1b From b3f15ec3d809ccf2e171ca4e272a220d3c1a3e05 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 10 Feb 2020 11:47:57 +0000 Subject: kvm: arm/arm64: Fold VHE entry/exit work into kvm_vcpu_run_vhe() With VHE, running a vCPU always requires the sequence: 1. kvm_arm_vhe_guest_enter(); 2. kvm_vcpu_run_vhe(); 3. kvm_arm_vhe_guest_exit() ... and as we invoke this from the shared arm/arm64 KVM code, 32-bit arm has to provide stubs for all three functions. To simplify the common code, and make it easier to make further modifications to the arm64-specific portions in the near future, let's fold kvm_arm_vhe_guest_enter() and kvm_arm_vhe_guest_exit() into kvm_vcpu_run_vhe(). The 32-bit stubs for kvm_arm_vhe_guest_enter() and kvm_arm_vhe_guest_exit() are removed, as they are no longer used. The 32-bit stub for kvm_vcpu_run_vhe() is left as-is. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200210114757.2889-1-mark.rutland@arm.com --- arch/arm/include/asm/kvm_host.h | 3 --- arch/arm64/include/asm/kvm_host.h | 32 -------------------------------- arch/arm64/kvm/hyp/switch.c | 39 +++++++++++++++++++++++++++++++++++++-- virt/kvm/arm/arm.c | 2 -- 4 files changed, 37 insertions(+), 39 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index bd2233805d99..cbd26ae95e7e 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -394,9 +394,6 @@ static inline void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) {} static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) {} static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {} -static inline void kvm_arm_vhe_guest_enter(void) {} -static inline void kvm_arm_vhe_guest_exit(void) {} - #define KVM_BP_HARDEN_UNKNOWN -1 #define KVM_BP_HARDEN_WA_NEEDED 0 #define KVM_BP_HARDEN_NOT_REQUIRED 1 diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f6a77ddab956..d740ec00ecd3 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -628,38 +628,6 @@ static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {} static inline void kvm_clr_pmu_events(u32 clr) {} #endif -static inline void kvm_arm_vhe_guest_enter(void) -{ - local_daif_mask(); - - /* - * Having IRQs masked via PMR when entering the guest means the GIC - * will not signal the CPU of interrupts of lower priority, and the - * only way to get out will be via guest exceptions. - * Naturally, we want to avoid this. - * - * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a - * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. - */ - pmr_sync(); -} - -static inline void kvm_arm_vhe_guest_exit(void) -{ - /* - * local_daif_restore() takes care to properly restore PSTATE.DAIF - * and the GIC PMR if the host is using IRQ priorities. - */ - local_daif_restore(DAIF_PROCCTX_NOIRQ); - - /* - * When we exit from the guest we change a number of CPU configuration - * parameters, such as traps. Make sure these changes take effect - * before running the host or additional guests. - */ - isb(); -} - #define KVM_BP_HARDEN_UNKNOWN -1 #define KVM_BP_HARDEN_WA_NEEDED 0 #define KVM_BP_HARDEN_NOT_REQUIRED 1 diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 72fbbd86eb5e..457067706b75 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -617,7 +617,7 @@ static void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) } /* Switch to the guest for VHE systems running in EL2 */ -int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) +static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *guest_ctxt; @@ -670,7 +670,42 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) return exit_code; } -NOKPROBE_SYMBOL(kvm_vcpu_run_vhe); +NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe); + +int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) +{ + int ret; + + local_daif_mask(); + + /* + * Having IRQs masked via PMR when entering the guest means the GIC + * will not signal the CPU of interrupts of lower priority, and the + * only way to get out will be via guest exceptions. + * Naturally, we want to avoid this. + * + * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a + * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. + */ + pmr_sync(); + + ret = __kvm_vcpu_run_vhe(vcpu); + + /* + * local_daif_restore() takes care to properly restore PSTATE.DAIF + * and the GIC PMR if the host is using IRQ priorities. + */ + local_daif_restore(DAIF_PROCCTX_NOIRQ); + + /* + * When we exit from the guest we change a number of CPU configuration + * parameters, such as traps. Make sure these changes take effect + * before running the host or additional guests. + */ + isb(); + + return ret; +} /* Switch to the guest for legacy non-VHE systems */ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index efda376ab3c5..560d6f258297 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -797,9 +797,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) guest_enter_irqoff(); if (has_vhe()) { - kvm_arm_vhe_guest_enter(); ret = kvm_vcpu_run_vhe(vcpu); - kvm_arm_vhe_guest_exit(); } else { ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu); } -- cgit v1.2.3-59-g8ed1b From a655e2b107d463ce2745188ce050d07daed09a71 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 17 Feb 2020 16:19:47 +0100 Subject: ALSA: hda/realtek - Apply quirk for MSI GP63, too The same quirk that was applied to MSI GL73 is needed for MSI GP63, too. Adding the entry with the SSID 1462:1228. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=206503 Cc: Link: https://lore.kernel.org/r/20200217151947.17528-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6c8cb4ce517e..82485e06dde1 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2447,6 +2447,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1071, 0x8258, "Evesham Voyaeger", ALC882_FIXUP_EAPD), SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE), SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), + SND_PCI_QUIRK(0x1462, 0x1228, "MSI-GP63", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1276, "MSI-GL73", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD), SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), -- cgit v1.2.3-59-g8ed1b From 52e29e331070cd7d52a64cbf1b0958212a340e28 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 17 Jan 2020 09:02:20 -0500 Subject: btrfs: don't set path->leave_spinning for truncate The only time we actually leave the path spinning is if we're truncating a small amount and don't actually free an extent, which is not a common occurrence. We have to set the path blocking in order to add the delayed ref anyway, so the first extent we find we set the path to blocking and stay blocking for the duration of the operation. With the upcoming file extent map stuff there will be another case that we have to have the path blocking, so just swap to blocking always. Note: this patch also fixes a warning after 28553fa992cb ("Btrfs: fix race between shrinking truncate and fiemap") got merged that inserts extent locks around truncation so the path must not leave spinning locks after btrfs_search_slot. [70.794783] BUG: sleeping function called from invalid context at mm/slab.h:565 [70.794834] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1141, name: rsync [70.794863] 5 locks held by rsync/1141: [70.794876] #0: ffff888417b9c408 (sb_writers#17){.+.+}, at: mnt_want_write+0x20/0x50 [70.795030] #1: ffff888428de28e8 (&type->i_mutex_dir_key#13/1){+.+.}, at: lock_rename+0xf1/0x100 [70.795051] #2: ffff888417b9c608 (sb_internal#2){.+.+}, at: start_transaction+0x394/0x560 [70.795124] #3: ffff888403081768 (btrfs-fs-01){++++}, at: btrfs_try_tree_write_lock+0x2f/0x160 [70.795203] #4: ffff888403086568 (btrfs-fs-00){++++}, at: btrfs_try_tree_write_lock+0x2f/0x160 [70.795222] CPU: 5 PID: 1141 Comm: rsync Not tainted 5.6.0-rc2-backup+ #2 [70.795362] Call Trace: [70.795374] dump_stack+0x71/0xa0 [70.795445] ___might_sleep.part.96.cold.106+0xa6/0xb6 [70.795459] kmem_cache_alloc+0x1d3/0x290 [70.795471] alloc_extent_state+0x22/0x1c0 [70.795544] __clear_extent_bit+0x3ba/0x580 [70.795557] ? _raw_spin_unlock_irq+0x24/0x30 [70.795569] btrfs_truncate_inode_items+0x339/0xe50 [70.795647] btrfs_evict_inode+0x269/0x540 [70.795659] ? dput.part.38+0x29/0x460 [70.795671] evict+0xcd/0x190 [70.795682] __dentry_kill+0xd6/0x180 [70.795754] dput.part.38+0x2ad/0x460 [70.795765] do_renameat2+0x3cb/0x540 [70.795777] __x64_sys_rename+0x1c/0x20 Reported-by: Dave Jones Fixes: 28553fa992cb ("Btrfs: fix race between shrinking truncate and fiemap") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Reviewed-by: David Sterba [ add note ] Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7d26b4bfb2c6..36deef69f847 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4142,7 +4142,6 @@ search_again: goto out; } - path->leave_spinning = 1; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) goto out; @@ -4294,7 +4293,6 @@ delete: root == fs_info->tree_root)) { struct btrfs_ref ref = { 0 }; - btrfs_set_path_blocking(path); bytes_deleted += extent_num_bytes; btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, -- cgit v1.2.3-59-g8ed1b From e20d3a055a457a10a4c748ce5b7c2ed3173a1324 Mon Sep 17 00:00:00 2001 From: Johannes Krude Date: Wed, 12 Feb 2020 20:32:27 +0100 Subject: bpf, offload: Replace bitwise AND by logical AND in bpf_prog_offload_info_fill This if guards whether user-space wants a copy of the offload-jited bytecode and whether this bytecode exists. By erroneously doing a bitwise AND instead of a logical AND on user- and kernel-space buffer-size can lead to no data being copied to user-space especially when user-space size is a power of two and bigger then the kernel-space buffer. Fixes: fcfb126defda ("bpf: add new jited info fields in bpf_dev_offload and bpf_prog_info") Signed-off-by: Johannes Krude Signed-off-by: Daniel Borkmann Acked-by: Jakub Kicinski Link: https://lore.kernel.org/bpf/20200212193227.GA3769@phlox.h.transitiv.net --- kernel/bpf/offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 2c5dc6541ece..bd09290e3648 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -321,7 +321,7 @@ int bpf_prog_offload_info_fill(struct bpf_prog_info *info, ulen = info->jited_prog_len; info->jited_prog_len = aux->offload->jited_len; - if (info->jited_prog_len & ulen) { + if (info->jited_prog_len && ulen) { uinsns = u64_to_user_ptr(info->jited_prog_insns); ulen = min_t(u32, info->jited_prog_len, ulen); if (copy_to_user(uinsns, aux->offload->jited_image, ulen)) { -- cgit v1.2.3-59-g8ed1b From 8b101a5e14f2161869636ff9cb4907b7749dc0c2 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 24 Jan 2020 08:48:55 +0300 Subject: s390/cio: cio_ignore_proc_seq_next should increase position index if seq_file .next fuction does not change position index, read after some lseek can generate unexpected output. Link: https://bugzilla.kernel.org/show_bug.cgi?id=206283 Link: https://lore.kernel.org/r/d44c53a7-9bc1-15c7-6d4a-0c10cb9dffce@virtuozzo.com Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger Signed-off-by: Vasily Averin Signed-off-by: Vasily Gorbik --- drivers/s390/cio/blacklist.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c index da642e811f7f..4dd2eb634856 100644 --- a/drivers/s390/cio/blacklist.c +++ b/drivers/s390/cio/blacklist.c @@ -303,8 +303,10 @@ static void * cio_ignore_proc_seq_next(struct seq_file *s, void *it, loff_t *offset) { struct ccwdev_iter *iter; + loff_t p = *offset; - if (*offset >= (__MAX_SUBCHANNEL + 1) * (__MAX_SSID + 1)) + (*offset)++; + if (p >= (__MAX_SUBCHANNEL + 1) * (__MAX_SSID + 1)) return NULL; iter = it; if (iter->devno == __MAX_SUBCHANNEL) { @@ -314,7 +316,6 @@ cio_ignore_proc_seq_next(struct seq_file *s, void *it, loff_t *offset) return NULL; } else iter->devno++; - (*offset)++; return iter; } -- cgit v1.2.3-59-g8ed1b From b16c3724dd717a354d0f0257fd647ef5518982aa Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 13 Feb 2020 11:13:13 -0500 Subject: s390/defconfig: enable CONFIG_PROTECTED_VIRTUALIZATION_GUEST The guest support for protected virtualization is default on most distributions. Also refresh defconfig and debug_defconfig. Signed-off-by: Christian Borntraeger Signed-off-by: Vasily Gorbik --- arch/s390/configs/debug_defconfig | 28 +++++++++++++--------------- arch/s390/configs/defconfig | 11 +++++------ 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 2e60c80395ab..0c86ba19fa2b 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -53,6 +53,7 @@ CONFIG_VFIO_AP=m CONFIG_CRASH_DUMP=y CONFIG_HIBERNATION=y CONFIG_PM_DEBUG=y +CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_KVM=m @@ -474,7 +475,6 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_EMULEX is not set # CONFIG_NET_VENDOR_EZCHIP is not set # CONFIG_NET_VENDOR_GOOGLE is not set -# CONFIG_NET_VENDOR_HP is not set # CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set @@ -684,7 +684,6 @@ CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_XXHASH=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -748,7 +747,6 @@ CONFIG_DEBUG_INFO_DWARF4=y CONFIG_GDB_SCRIPTS=y CONFIG_FRAME_WARN=1024 CONFIG_HEADERS_INSTALL=y -CONFIG_HEADERS_CHECK=y CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_PAGEALLOC=y @@ -772,9 +770,9 @@ CONFIG_DEBUG_MEMORY_INIT=y CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m CONFIG_DEBUG_PER_CPU_MAPS=y CONFIG_DEBUG_SHIRQ=y +CONFIG_PANIC_ON_OOPS=y CONFIG_DETECT_HUNG_TASK=y CONFIG_WQ_WATCHDOG=y -CONFIG_PANIC_ON_OOPS=y CONFIG_DEBUG_TIMEKEEPING=y CONFIG_PROVE_LOCKING=y CONFIG_LOCK_STAT=y @@ -783,9 +781,20 @@ CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DEBUG_LOCKING_API_SELFTESTS=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y +CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_DEBUG_CREDENTIALS=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=300 +CONFIG_LATENCYTOP=y +CONFIG_FUNCTION_PROFILER=y +CONFIG_STACK_TRACER=y +CONFIG_IRQSOFF_TRACER=y +CONFIG_PREEMPT_TRACER=y +CONFIG_SCHED_TRACER=y +CONFIG_FTRACE_SYSCALLS=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_HIST_TRIGGERS=y +CONFIG_S390_PTDUMP=y CONFIG_NOTIFIER_ERROR_INJECTION=m CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m CONFIG_FAULT_INJECTION=y @@ -796,15 +805,6 @@ CONFIG_FAIL_IO_TIMEOUT=y CONFIG_FAIL_FUTEX=y CONFIG_FAULT_INJECTION_DEBUG_FS=y CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y -CONFIG_LATENCYTOP=y -CONFIG_IRQSOFF_TRACER=y -CONFIG_PREEMPT_TRACER=y -CONFIG_SCHED_TRACER=y -CONFIG_FTRACE_SYSCALLS=y -CONFIG_STACK_TRACER=y -CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_FUNCTION_PROFILER=y -CONFIG_HIST_TRIGGERS=y CONFIG_LKDTM=m CONFIG_TEST_LIST_SORT=y CONFIG_TEST_SORT=y @@ -814,5 +814,3 @@ CONFIG_INTERVAL_TREE_TEST=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y CONFIG_TEST_BPF=m -CONFIG_BUG_ON_DATA_CORRUPTION=y -CONFIG_S390_PTDUMP=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 25f799849582..6b27d861a9a3 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -53,6 +53,7 @@ CONFIG_VFIO_AP=m CONFIG_CRASH_DUMP=y CONFIG_HIBERNATION=y CONFIG_PM_DEBUG=y +CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_KVM=m @@ -470,7 +471,6 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_EMULEX is not set # CONFIG_NET_VENDOR_EZCHIP is not set # CONFIG_NET_VENDOR_GOOGLE is not set -# CONFIG_NET_VENDOR_HP is not set # CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MARVELL is not set @@ -677,7 +677,6 @@ CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_XXHASH=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m @@ -739,18 +738,18 @@ CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_PANIC_ON_OOPS=y +CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_LATENCYTOP=y +CONFIG_FUNCTION_PROFILER=y +CONFIG_STACK_TRACER=y CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y -CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y +CONFIG_S390_PTDUMP=y CONFIG_LKDTM=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y CONFIG_TEST_BPF=m -CONFIG_BUG_ON_DATA_CORRUPTION=y -CONFIG_S390_PTDUMP=y -- cgit v1.2.3-59-g8ed1b From 0d730b57b95f7e703bb5789de91a7ece6d3a68ac Mon Sep 17 00:00:00 2001 From: chenqiwu Date: Fri, 14 Feb 2020 21:51:33 +0800 Subject: s390/cio: use kobj_to_dev() API Use kobj_to_dev() API instead of container_of(). Reviewed-by: Cornelia Huck Signed-off-by: chenqiwu Signed-off-by: chenqiwu Message-Id: <1581688293-17283-1-git-send-email-qiwuchen55@gmail.com> Signed-off-by: Vasily Gorbik --- drivers/s390/cio/chp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index 51038ec309c1..dfcbe54591fb 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -135,7 +135,7 @@ static ssize_t chp_measurement_chars_read(struct file *filp, struct channel_path *chp; struct device *device; - device = container_of(kobj, struct device, kobj); + device = kobj_to_dev(kobj); chp = to_channelpath(device); if (chp->cmg == -1) return 0; @@ -184,7 +184,7 @@ static ssize_t chp_measurement_read(struct file *filp, struct kobject *kobj, struct device *device; unsigned int size; - device = container_of(kobj, struct device, kobj); + device = kobj_to_dev(kobj); chp = to_channelpath(device); css = to_css(chp->dev.parent); -- cgit v1.2.3-59-g8ed1b From 05ccaca003e4f26aceb7f075c073a49159af6e9e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 12 Feb 2020 09:46:22 +0100 Subject: s390/pkey/zcrypt: spelling s/crytp/crypt/ Fix typos in a comments. Link: https://lkml.kernel.org/r/20200212084622.9219-1-geert+renesas@glider.be Signed-off-by: Geert Uytterhoeven Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/zcrypt_ep11misc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/crypto/zcrypt_ep11misc.c b/drivers/s390/crypto/zcrypt_ep11misc.c index d4caf46ff9df..2afe2153b34e 100644 --- a/drivers/s390/crypto/zcrypt_ep11misc.c +++ b/drivers/s390/crypto/zcrypt_ep11misc.c @@ -887,7 +887,7 @@ static int ep11_unwrapkey(u16 card, u16 domain, /* empty pin tag */ *p++ = 0x04; *p++ = 0; - /* encrytped key value tag and bytes */ + /* encrypted key value tag and bytes */ p += asn1tag_write(p, 0x04, enckey, enckeysize); /* reply cprb and payload */ @@ -1095,7 +1095,7 @@ int ep11_clr2keyblob(u16 card, u16 domain, u32 keybitsize, u32 keygenflags, /* Step 1: generate AES 256 bit random kek key */ rc = ep11_genaeskey(card, domain, 256, - 0x00006c00, /* EN/DECRYTP, WRAP/UNWRAP */ + 0x00006c00, /* EN/DECRYPT, WRAP/UNWRAP */ kek, &keklen); if (rc) { DEBUG_ERR( -- cgit v1.2.3-59-g8ed1b From 380324734956c64cd060e1db4304f3117ac15809 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 13 Feb 2020 23:42:07 -0700 Subject: s390/mm: Explicitly compare PAGE_DEFAULT_KEY against zero in storage_key_init_range Clang warns: In file included from ../arch/s390/purgatory/purgatory.c:10: In file included from ../include/linux/kexec.h:18: In file included from ../include/linux/crash_core.h:6: In file included from ../include/linux/elfcore.h:5: In file included from ../include/linux/user.h:1: In file included from ../arch/s390/include/asm/user.h:11: ../arch/s390/include/asm/page.h:45:6: warning: converting the result of '<<' to a boolean always evaluates to false [-Wtautological-constant-compare] if (PAGE_DEFAULT_KEY) ^ ../arch/s390/include/asm/page.h:23:44: note: expanded from macro 'PAGE_DEFAULT_KEY' #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) ^ 1 warning generated. Explicitly compare this against zero to silence the warning as it is intended to be used in a boolean context. Fixes: de3fa841e429 ("s390/mm: fix compile for PAGE_DEFAULT_KEY != 0") Link: https://github.com/ClangBuiltLinux/linux/issues/860 Link: https://lkml.kernel.org/r/20200214064207.10381-1-natechancellor@gmail.com Acked-by: Christian Borntraeger Signed-off-by: Nathan Chancellor Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/page.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 85e944f04c70..1019efd85b9d 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -42,7 +42,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end); static inline void storage_key_init_range(unsigned long start, unsigned long end) { - if (PAGE_DEFAULT_KEY) + if (PAGE_DEFAULT_KEY != 0) __storage_key_init_range(start, end); } -- cgit v1.2.3-59-g8ed1b From 788d671517b5c81efbed9310ccbadb8cca86a08e Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 8 Feb 2020 07:10:52 -0700 Subject: s390/kaslr: Fix casts in get_random Clang warns: ../arch/s390/boot/kaslr.c:78:25: warning: passing 'char *' to parameter of type 'const u8 *' (aka 'const unsigned char *') converts between pointers to integer types with different sign [-Wpointer-sign] (char *) entropy, (char *) entropy, ^~~~~~~~~~~~~~~~ ../arch/s390/include/asm/cpacf.h:280:28: note: passing argument to parameter 'src' here u8 *dest, const u8 *src, long src_len) ^ 2 warnings generated. Fix the cast to match what else is done in this function. Fixes: b2d24b97b2a9 ("s390/kernel: add support for kernel address space layout randomization (KASLR)") Link: https://github.com/ClangBuiltLinux/linux/issues/862 Link: https://lkml.kernel.org/r/20200208141052.48476-1-natechancellor@gmail.com Signed-off-by: Nathan Chancellor Signed-off-by: Vasily Gorbik --- arch/s390/boot/kaslr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index 5d12352545c5..5591243d673e 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -75,7 +75,7 @@ static unsigned long get_random(unsigned long limit) *(unsigned long *) prng.parm_block ^= seed; for (i = 0; i < 16; i++) { cpacf_kmc(CPACF_KMC_PRNG, prng.parm_block, - (char *) entropy, (char *) entropy, + (u8 *) entropy, (u8 *) entropy, sizeof(entropy)); memcpy(prng.parm_block, entropy, sizeof(entropy)); } -- cgit v1.2.3-59-g8ed1b From 94e90f727f7424d827256023cace829cad6896f4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 16 Feb 2020 23:48:29 +0900 Subject: s390: make 'install' not depend on vmlinux For the same reason as commit 19514fc665ff ("arm, kbuild: make "make install" not depend on vmlinux"), the install targets should never trigger the rebuild of the kernel. The variable, CONFIGURE, is not set by anyone. Remove it as well. Link: https://lkml.kernel.org/r/20200216144829.27023-1-masahiroy@kernel.org Signed-off-by: Masahiro Yamada Signed-off-by: Vasily Gorbik --- arch/s390/Makefile | 2 +- arch/s390/boot/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/Makefile b/arch/s390/Makefile index e0e3a465bbfd..8dfa2cf1f05c 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -146,7 +146,7 @@ all: bzImage #KBUILD_IMAGE is necessary for packaging targets like rpm-pkg, deb-pkg... KBUILD_IMAGE := $(boot)/bzImage -install: vmlinux +install: $(Q)$(MAKE) $(build)=$(boot) $@ bzImage: vmlinux diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index e2c47d3a1c89..0ff9261c915e 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -70,7 +70,7 @@ $(obj)/compressed/vmlinux: $(obj)/startup.a FORCE $(obj)/startup.a: $(OBJECTS) FORCE $(call if_changed,ar) -install: $(CONFIGURE) $(obj)/bzImage +install: sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ System.map "$(INSTALL_PATH)" -- cgit v1.2.3-59-g8ed1b From dea8d5ce46d7e7f7270b9804df7d1174f88bfd99 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 30 Jan 2020 16:45:53 +0000 Subject: drm/i915/gem: Require per-engine reset support for non-persistent contexts To enable non-persistent contexts, we require a means of cancelling any inflight work from that context. This is first done "gracefully" by using preemption to kick the active context off the engine, and then forcefully by resetting the engine if it is active. If we are unable to reset the engine to remove hostile userspace, we should not allow userspace to opt into using non-persistent contexts. If the per-engine reset fails, we still do a full GPU reset, but that is rare and usually indicative of much deeper issues. The damage is already done. However, the goal of the interface to allow long running compute jobs without causing collateral damage elsewhere, and if we are unable to support that we should make that known by not providing the interface (and falsely pretending we can). Fixes: a0e047156cde ("drm/i915/gem: Make context persistence optional") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Jon Bloomfield Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20200130164553.1937718-1-chris@chris-wilson.co.uk (cherry picked from commit d1b9b5f127bc3797fc274cfa4f363e039f045c3a) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index a2e57e62af30..151a1e8ae36a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -565,6 +565,22 @@ static int __context_set_persistence(struct i915_gem_context *ctx, bool state) if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) return -ENODEV; + /* + * If the cancel fails, we then need to reset, cleanly! + * + * If the per-engine reset fails, all hope is lost! We resort + * to a full GPU reset in that unlikely case, but realistically + * if the engine could not reset, the full reset does not fare + * much better. The damage has been done. + * + * However, if we cannot reset an engine by itself, we cannot + * cleanup a hanging persistent context without causing + * colateral damage, and we should not pretend we can by + * exposing the interface. + */ + if (!intel_has_reset_engine(&ctx->i915->gt)) + return -ENODEV; + i915_gem_context_clear_persistence(ctx); } -- cgit v1.2.3-59-g8ed1b From c01e8da2cdb99303547d25b3dbffa3afec56738a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 3 Feb 2020 09:41:48 +0000 Subject: drm/i915: Initialise basic fence before acquiring seqno Inside the intel_timeline_get_seqno(), we currently track the retirement of the old cachelines by listening to the new request. This requires that the new request is ready to be used and so requires a minimum bit of initialisation prior to getting the new seqno. Fixes: b1e3177bd1d8 ("drm/i915: Coordinate i915_active with its own mutex") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Matthew Auld Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200203094152.4150550-2-chris@chris-wilson.co.uk (cherry picked from commit 855e39e65cfc33a73724f1cc644ffc5754864a20) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_request.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 78a5f5d3c070..f56b046a32de 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -595,6 +595,8 @@ static void __i915_request_ctor(void *arg) i915_sw_fence_init(&rq->submit, submit_notify); i915_sw_fence_init(&rq->semaphore, semaphore_notify); + dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, 0, 0); + rq->file_priv = NULL; rq->capture_list = NULL; @@ -653,25 +655,30 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) } } - ret = intel_timeline_get_seqno(tl, rq, &seqno); - if (ret) - goto err_free; - rq->i915 = ce->engine->i915; rq->context = ce; rq->engine = ce->engine; rq->ring = ce->ring; rq->execution_mask = ce->engine->mask; + kref_init(&rq->fence.refcount); + rq->fence.flags = 0; + rq->fence.error = 0; + INIT_LIST_HEAD(&rq->fence.cb_list); + + ret = intel_timeline_get_seqno(tl, rq, &seqno); + if (ret) + goto err_free; + + rq->fence.context = tl->fence_context; + rq->fence.seqno = seqno; + RCU_INIT_POINTER(rq->timeline, tl); RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline); rq->hwsp_seqno = tl->hwsp_seqno; rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ - dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, - tl->fence_context, seqno); - /* We bump the ref for the fence chain */ i915_sw_fence_reinit(&i915_request_get(rq)->submit); i915_sw_fence_reinit(&i915_request_get(rq)->semaphore); -- cgit v1.2.3-59-g8ed1b From faaca0a0d48e7b122f6e7e2521f4f6fc487d0451 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 4 Feb 2020 14:16:27 +0200 Subject: tpm: Revert tpm_tis_spi_mod.ko to tpm_tis_spi.ko. Revert tpm_tis_spi_mod.ko back to tpm_tis_spi.ko as the rename could break user space scripts. This can be achieved by renaming tpm_tis_spi.c as tpm_tis_spi_main.c. Then tpm_tis_spi-y can be used inside the makefile. Cc: Andrey Pronin Cc: Stephen Boyd Cc: stable@vger.kernel.org # 5.5.x Fixes: 797c0113c9a4 ("tpm: tpm_tis_spi: Support cr50 devices") Reported-by: Alexander Steffen Tested-by: Alexander Steffen Reviewed-by: Stephen Boyd Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/Makefile | 8 +- drivers/char/tpm/tpm_tis_spi.c | 298 ------------------------------------ drivers/char/tpm/tpm_tis_spi_main.c | 298 ++++++++++++++++++++++++++++++++++++ 3 files changed, 303 insertions(+), 301 deletions(-) delete mode 100644 drivers/char/tpm/tpm_tis_spi.c create mode 100644 drivers/char/tpm/tpm_tis_spi_main.c diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile index 5a0d99d4fec0..9567e5197f74 100644 --- a/drivers/char/tpm/Makefile +++ b/drivers/char/tpm/Makefile @@ -21,9 +21,11 @@ tpm-$(CONFIG_EFI) += eventlog/efi.o tpm-$(CONFIG_OF) += eventlog/of.o obj-$(CONFIG_TCG_TIS_CORE) += tpm_tis_core.o obj-$(CONFIG_TCG_TIS) += tpm_tis.o -obj-$(CONFIG_TCG_TIS_SPI) += tpm_tis_spi_mod.o -tpm_tis_spi_mod-y := tpm_tis_spi.o -tpm_tis_spi_mod-$(CONFIG_TCG_TIS_SPI_CR50) += tpm_tis_spi_cr50.o + +obj-$(CONFIG_TCG_TIS_SPI) += tpm_tis_spi.o +tpm_tis_spi-y := tpm_tis_spi_main.o +tpm_tis_spi-$(CONFIG_TCG_TIS_SPI_CR50) += tpm_tis_spi_cr50.o + obj-$(CONFIG_TCG_TIS_I2C_ATMEL) += tpm_i2c_atmel.o obj-$(CONFIG_TCG_TIS_I2C_INFINEON) += tpm_i2c_infineon.o obj-$(CONFIG_TCG_TIS_I2C_NUVOTON) += tpm_i2c_nuvoton.o diff --git a/drivers/char/tpm/tpm_tis_spi.c b/drivers/char/tpm/tpm_tis_spi.c deleted file mode 100644 index d1754fd6c573..000000000000 --- a/drivers/char/tpm/tpm_tis_spi.c +++ /dev/null @@ -1,298 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2015 Infineon Technologies AG - * Copyright (C) 2016 STMicroelectronics SAS - * - * Authors: - * Peter Huewe - * Christophe Ricard - * - * Maintained by: - * - * Device driver for TCG/TCPA TPM (trusted platform module). - * Specifications at www.trustedcomputinggroup.org - * - * This device driver implements the TPM interface as defined in - * the TCG TPM Interface Spec version 1.3, revision 27 via _raw/native - * SPI access_. - * - * It is based on the original tpm_tis device driver from Leendert van - * Dorn and Kyleen Hall and Jarko Sakkinnen. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "tpm.h" -#include "tpm_tis_core.h" -#include "tpm_tis_spi.h" - -#define MAX_SPI_FRAMESIZE 64 - -/* - * TCG SPI flow control is documented in section 6.4 of the spec[1]. In short, - * keep trying to read from the device until MISO goes high indicating the - * wait state has ended. - * - * [1] https://trustedcomputinggroup.org/resource/pc-client-platform-tpm-profile-ptp-specification/ - */ -static int tpm_tis_spi_flow_control(struct tpm_tis_spi_phy *phy, - struct spi_transfer *spi_xfer) -{ - struct spi_message m; - int ret, i; - - if ((phy->iobuf[3] & 0x01) == 0) { - // handle SPI wait states - phy->iobuf[0] = 0; - - for (i = 0; i < TPM_RETRY; i++) { - spi_xfer->len = 1; - spi_message_init(&m); - spi_message_add_tail(spi_xfer, &m); - ret = spi_sync_locked(phy->spi_device, &m); - if (ret < 0) - return ret; - if (phy->iobuf[0] & 0x01) - break; - } - - if (i == TPM_RETRY) - return -ETIMEDOUT; - } - - return 0; -} - -int tpm_tis_spi_transfer(struct tpm_tis_data *data, u32 addr, u16 len, - u8 *in, const u8 *out) -{ - struct tpm_tis_spi_phy *phy = to_tpm_tis_spi_phy(data); - int ret = 0; - struct spi_message m; - struct spi_transfer spi_xfer; - u8 transfer_len; - - spi_bus_lock(phy->spi_device->master); - - while (len) { - transfer_len = min_t(u16, len, MAX_SPI_FRAMESIZE); - - phy->iobuf[0] = (in ? 0x80 : 0) | (transfer_len - 1); - phy->iobuf[1] = 0xd4; - phy->iobuf[2] = addr >> 8; - phy->iobuf[3] = addr; - - memset(&spi_xfer, 0, sizeof(spi_xfer)); - spi_xfer.tx_buf = phy->iobuf; - spi_xfer.rx_buf = phy->iobuf; - spi_xfer.len = 4; - spi_xfer.cs_change = 1; - - spi_message_init(&m); - spi_message_add_tail(&spi_xfer, &m); - ret = spi_sync_locked(phy->spi_device, &m); - if (ret < 0) - goto exit; - - ret = phy->flow_control(phy, &spi_xfer); - if (ret < 0) - goto exit; - - spi_xfer.cs_change = 0; - spi_xfer.len = transfer_len; - spi_xfer.delay_usecs = 5; - - if (in) { - spi_xfer.tx_buf = NULL; - } else if (out) { - spi_xfer.rx_buf = NULL; - memcpy(phy->iobuf, out, transfer_len); - out += transfer_len; - } - - spi_message_init(&m); - spi_message_add_tail(&spi_xfer, &m); - reinit_completion(&phy->ready); - ret = spi_sync_locked(phy->spi_device, &m); - if (ret < 0) - goto exit; - - if (in) { - memcpy(in, phy->iobuf, transfer_len); - in += transfer_len; - } - - len -= transfer_len; - } - -exit: - spi_bus_unlock(phy->spi_device->master); - return ret; -} - -static int tpm_tis_spi_read_bytes(struct tpm_tis_data *data, u32 addr, - u16 len, u8 *result) -{ - return tpm_tis_spi_transfer(data, addr, len, result, NULL); -} - -static int tpm_tis_spi_write_bytes(struct tpm_tis_data *data, u32 addr, - u16 len, const u8 *value) -{ - return tpm_tis_spi_transfer(data, addr, len, NULL, value); -} - -int tpm_tis_spi_read16(struct tpm_tis_data *data, u32 addr, u16 *result) -{ - __le16 result_le; - int rc; - - rc = data->phy_ops->read_bytes(data, addr, sizeof(u16), - (u8 *)&result_le); - if (!rc) - *result = le16_to_cpu(result_le); - - return rc; -} - -int tpm_tis_spi_read32(struct tpm_tis_data *data, u32 addr, u32 *result) -{ - __le32 result_le; - int rc; - - rc = data->phy_ops->read_bytes(data, addr, sizeof(u32), - (u8 *)&result_le); - if (!rc) - *result = le32_to_cpu(result_le); - - return rc; -} - -int tpm_tis_spi_write32(struct tpm_tis_data *data, u32 addr, u32 value) -{ - __le32 value_le; - int rc; - - value_le = cpu_to_le32(value); - rc = data->phy_ops->write_bytes(data, addr, sizeof(u32), - (u8 *)&value_le); - - return rc; -} - -int tpm_tis_spi_init(struct spi_device *spi, struct tpm_tis_spi_phy *phy, - int irq, const struct tpm_tis_phy_ops *phy_ops) -{ - phy->iobuf = devm_kmalloc(&spi->dev, MAX_SPI_FRAMESIZE, GFP_KERNEL); - if (!phy->iobuf) - return -ENOMEM; - - phy->spi_device = spi; - - return tpm_tis_core_init(&spi->dev, &phy->priv, irq, phy_ops, NULL); -} - -static const struct tpm_tis_phy_ops tpm_spi_phy_ops = { - .read_bytes = tpm_tis_spi_read_bytes, - .write_bytes = tpm_tis_spi_write_bytes, - .read16 = tpm_tis_spi_read16, - .read32 = tpm_tis_spi_read32, - .write32 = tpm_tis_spi_write32, -}; - -static int tpm_tis_spi_probe(struct spi_device *dev) -{ - struct tpm_tis_spi_phy *phy; - int irq; - - phy = devm_kzalloc(&dev->dev, sizeof(struct tpm_tis_spi_phy), - GFP_KERNEL); - if (!phy) - return -ENOMEM; - - phy->flow_control = tpm_tis_spi_flow_control; - - /* If the SPI device has an IRQ then use that */ - if (dev->irq > 0) - irq = dev->irq; - else - irq = -1; - - init_completion(&phy->ready); - return tpm_tis_spi_init(dev, phy, irq, &tpm_spi_phy_ops); -} - -typedef int (*tpm_tis_spi_probe_func)(struct spi_device *); - -static int tpm_tis_spi_driver_probe(struct spi_device *spi) -{ - const struct spi_device_id *spi_dev_id = spi_get_device_id(spi); - tpm_tis_spi_probe_func probe_func; - - probe_func = of_device_get_match_data(&spi->dev); - if (!probe_func && spi_dev_id) - probe_func = (tpm_tis_spi_probe_func)spi_dev_id->driver_data; - if (!probe_func) - return -ENODEV; - - return probe_func(spi); -} - -static SIMPLE_DEV_PM_OPS(tpm_tis_pm, tpm_pm_suspend, tpm_tis_spi_resume); - -static int tpm_tis_spi_remove(struct spi_device *dev) -{ - struct tpm_chip *chip = spi_get_drvdata(dev); - - tpm_chip_unregister(chip); - tpm_tis_remove(chip); - return 0; -} - -static const struct spi_device_id tpm_tis_spi_id[] = { - { "tpm_tis_spi", (unsigned long)tpm_tis_spi_probe }, - { "cr50", (unsigned long)cr50_spi_probe }, - {} -}; -MODULE_DEVICE_TABLE(spi, tpm_tis_spi_id); - -static const struct of_device_id of_tis_spi_match[] = { - { .compatible = "st,st33htpm-spi", .data = tpm_tis_spi_probe }, - { .compatible = "infineon,slb9670", .data = tpm_tis_spi_probe }, - { .compatible = "tcg,tpm_tis-spi", .data = tpm_tis_spi_probe }, - { .compatible = "google,cr50", .data = cr50_spi_probe }, - {} -}; -MODULE_DEVICE_TABLE(of, of_tis_spi_match); - -static const struct acpi_device_id acpi_tis_spi_match[] = { - {"SMO0768", 0}, - {} -}; -MODULE_DEVICE_TABLE(acpi, acpi_tis_spi_match); - -static struct spi_driver tpm_tis_spi_driver = { - .driver = { - .name = "tpm_tis_spi", - .pm = &tpm_tis_pm, - .of_match_table = of_match_ptr(of_tis_spi_match), - .acpi_match_table = ACPI_PTR(acpi_tis_spi_match), - }, - .probe = tpm_tis_spi_driver_probe, - .remove = tpm_tis_spi_remove, - .id_table = tpm_tis_spi_id, -}; -module_spi_driver(tpm_tis_spi_driver); - -MODULE_DESCRIPTION("TPM Driver for native SPI access"); -MODULE_LICENSE("GPL"); diff --git a/drivers/char/tpm/tpm_tis_spi_main.c b/drivers/char/tpm/tpm_tis_spi_main.c new file mode 100644 index 000000000000..d1754fd6c573 --- /dev/null +++ b/drivers/char/tpm/tpm_tis_spi_main.c @@ -0,0 +1,298 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 Infineon Technologies AG + * Copyright (C) 2016 STMicroelectronics SAS + * + * Authors: + * Peter Huewe + * Christophe Ricard + * + * Maintained by: + * + * Device driver for TCG/TCPA TPM (trusted platform module). + * Specifications at www.trustedcomputinggroup.org + * + * This device driver implements the TPM interface as defined in + * the TCG TPM Interface Spec version 1.3, revision 27 via _raw/native + * SPI access_. + * + * It is based on the original tpm_tis device driver from Leendert van + * Dorn and Kyleen Hall and Jarko Sakkinnen. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "tpm.h" +#include "tpm_tis_core.h" +#include "tpm_tis_spi.h" + +#define MAX_SPI_FRAMESIZE 64 + +/* + * TCG SPI flow control is documented in section 6.4 of the spec[1]. In short, + * keep trying to read from the device until MISO goes high indicating the + * wait state has ended. + * + * [1] https://trustedcomputinggroup.org/resource/pc-client-platform-tpm-profile-ptp-specification/ + */ +static int tpm_tis_spi_flow_control(struct tpm_tis_spi_phy *phy, + struct spi_transfer *spi_xfer) +{ + struct spi_message m; + int ret, i; + + if ((phy->iobuf[3] & 0x01) == 0) { + // handle SPI wait states + phy->iobuf[0] = 0; + + for (i = 0; i < TPM_RETRY; i++) { + spi_xfer->len = 1; + spi_message_init(&m); + spi_message_add_tail(spi_xfer, &m); + ret = spi_sync_locked(phy->spi_device, &m); + if (ret < 0) + return ret; + if (phy->iobuf[0] & 0x01) + break; + } + + if (i == TPM_RETRY) + return -ETIMEDOUT; + } + + return 0; +} + +int tpm_tis_spi_transfer(struct tpm_tis_data *data, u32 addr, u16 len, + u8 *in, const u8 *out) +{ + struct tpm_tis_spi_phy *phy = to_tpm_tis_spi_phy(data); + int ret = 0; + struct spi_message m; + struct spi_transfer spi_xfer; + u8 transfer_len; + + spi_bus_lock(phy->spi_device->master); + + while (len) { + transfer_len = min_t(u16, len, MAX_SPI_FRAMESIZE); + + phy->iobuf[0] = (in ? 0x80 : 0) | (transfer_len - 1); + phy->iobuf[1] = 0xd4; + phy->iobuf[2] = addr >> 8; + phy->iobuf[3] = addr; + + memset(&spi_xfer, 0, sizeof(spi_xfer)); + spi_xfer.tx_buf = phy->iobuf; + spi_xfer.rx_buf = phy->iobuf; + spi_xfer.len = 4; + spi_xfer.cs_change = 1; + + spi_message_init(&m); + spi_message_add_tail(&spi_xfer, &m); + ret = spi_sync_locked(phy->spi_device, &m); + if (ret < 0) + goto exit; + + ret = phy->flow_control(phy, &spi_xfer); + if (ret < 0) + goto exit; + + spi_xfer.cs_change = 0; + spi_xfer.len = transfer_len; + spi_xfer.delay_usecs = 5; + + if (in) { + spi_xfer.tx_buf = NULL; + } else if (out) { + spi_xfer.rx_buf = NULL; + memcpy(phy->iobuf, out, transfer_len); + out += transfer_len; + } + + spi_message_init(&m); + spi_message_add_tail(&spi_xfer, &m); + reinit_completion(&phy->ready); + ret = spi_sync_locked(phy->spi_device, &m); + if (ret < 0) + goto exit; + + if (in) { + memcpy(in, phy->iobuf, transfer_len); + in += transfer_len; + } + + len -= transfer_len; + } + +exit: + spi_bus_unlock(phy->spi_device->master); + return ret; +} + +static int tpm_tis_spi_read_bytes(struct tpm_tis_data *data, u32 addr, + u16 len, u8 *result) +{ + return tpm_tis_spi_transfer(data, addr, len, result, NULL); +} + +static int tpm_tis_spi_write_bytes(struct tpm_tis_data *data, u32 addr, + u16 len, const u8 *value) +{ + return tpm_tis_spi_transfer(data, addr, len, NULL, value); +} + +int tpm_tis_spi_read16(struct tpm_tis_data *data, u32 addr, u16 *result) +{ + __le16 result_le; + int rc; + + rc = data->phy_ops->read_bytes(data, addr, sizeof(u16), + (u8 *)&result_le); + if (!rc) + *result = le16_to_cpu(result_le); + + return rc; +} + +int tpm_tis_spi_read32(struct tpm_tis_data *data, u32 addr, u32 *result) +{ + __le32 result_le; + int rc; + + rc = data->phy_ops->read_bytes(data, addr, sizeof(u32), + (u8 *)&result_le); + if (!rc) + *result = le32_to_cpu(result_le); + + return rc; +} + +int tpm_tis_spi_write32(struct tpm_tis_data *data, u32 addr, u32 value) +{ + __le32 value_le; + int rc; + + value_le = cpu_to_le32(value); + rc = data->phy_ops->write_bytes(data, addr, sizeof(u32), + (u8 *)&value_le); + + return rc; +} + +int tpm_tis_spi_init(struct spi_device *spi, struct tpm_tis_spi_phy *phy, + int irq, const struct tpm_tis_phy_ops *phy_ops) +{ + phy->iobuf = devm_kmalloc(&spi->dev, MAX_SPI_FRAMESIZE, GFP_KERNEL); + if (!phy->iobuf) + return -ENOMEM; + + phy->spi_device = spi; + + return tpm_tis_core_init(&spi->dev, &phy->priv, irq, phy_ops, NULL); +} + +static const struct tpm_tis_phy_ops tpm_spi_phy_ops = { + .read_bytes = tpm_tis_spi_read_bytes, + .write_bytes = tpm_tis_spi_write_bytes, + .read16 = tpm_tis_spi_read16, + .read32 = tpm_tis_spi_read32, + .write32 = tpm_tis_spi_write32, +}; + +static int tpm_tis_spi_probe(struct spi_device *dev) +{ + struct tpm_tis_spi_phy *phy; + int irq; + + phy = devm_kzalloc(&dev->dev, sizeof(struct tpm_tis_spi_phy), + GFP_KERNEL); + if (!phy) + return -ENOMEM; + + phy->flow_control = tpm_tis_spi_flow_control; + + /* If the SPI device has an IRQ then use that */ + if (dev->irq > 0) + irq = dev->irq; + else + irq = -1; + + init_completion(&phy->ready); + return tpm_tis_spi_init(dev, phy, irq, &tpm_spi_phy_ops); +} + +typedef int (*tpm_tis_spi_probe_func)(struct spi_device *); + +static int tpm_tis_spi_driver_probe(struct spi_device *spi) +{ + const struct spi_device_id *spi_dev_id = spi_get_device_id(spi); + tpm_tis_spi_probe_func probe_func; + + probe_func = of_device_get_match_data(&spi->dev); + if (!probe_func && spi_dev_id) + probe_func = (tpm_tis_spi_probe_func)spi_dev_id->driver_data; + if (!probe_func) + return -ENODEV; + + return probe_func(spi); +} + +static SIMPLE_DEV_PM_OPS(tpm_tis_pm, tpm_pm_suspend, tpm_tis_spi_resume); + +static int tpm_tis_spi_remove(struct spi_device *dev) +{ + struct tpm_chip *chip = spi_get_drvdata(dev); + + tpm_chip_unregister(chip); + tpm_tis_remove(chip); + return 0; +} + +static const struct spi_device_id tpm_tis_spi_id[] = { + { "tpm_tis_spi", (unsigned long)tpm_tis_spi_probe }, + { "cr50", (unsigned long)cr50_spi_probe }, + {} +}; +MODULE_DEVICE_TABLE(spi, tpm_tis_spi_id); + +static const struct of_device_id of_tis_spi_match[] = { + { .compatible = "st,st33htpm-spi", .data = tpm_tis_spi_probe }, + { .compatible = "infineon,slb9670", .data = tpm_tis_spi_probe }, + { .compatible = "tcg,tpm_tis-spi", .data = tpm_tis_spi_probe }, + { .compatible = "google,cr50", .data = cr50_spi_probe }, + {} +}; +MODULE_DEVICE_TABLE(of, of_tis_spi_match); + +static const struct acpi_device_id acpi_tis_spi_match[] = { + {"SMO0768", 0}, + {} +}; +MODULE_DEVICE_TABLE(acpi, acpi_tis_spi_match); + +static struct spi_driver tpm_tis_spi_driver = { + .driver = { + .name = "tpm_tis_spi", + .pm = &tpm_tis_pm, + .of_match_table = of_match_ptr(of_tis_spi_match), + .acpi_match_table = ACPI_PTR(acpi_tis_spi_match), + }, + .probe = tpm_tis_spi_driver_probe, + .remove = tpm_tis_spi_remove, + .id_table = tpm_tis_spi_id, +}; +module_spi_driver(tpm_tis_spi_driver); + +MODULE_DESCRIPTION("TPM Driver for native SPI access"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3-59-g8ed1b From dc10e4181c05a2315ddc375e963b7c763b5ee0df Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Mon, 10 Feb 2020 11:00:41 +0100 Subject: tpm: Initialize crypto_id of allocated_banks to HASH_ALGO__LAST chip->allocated_banks, an array of tpm_bank_info structures, contains the list of TPM algorithm IDs of allocated PCR banks. It also contains the corresponding ID of the crypto subsystem, so that users of the TPM driver can calculate a digest for a PCR extend operation. However, if there is no mapping between TPM algorithm ID and crypto ID, the crypto_id field of tpm_bank_info remains set to zero (the array is allocated and initialized with kcalloc() in tpm2_get_pcr_allocation()). Zero should not be used as value for unknown mappings, as it is a valid crypto ID (HASH_ALGO_MD4). Thus, initialize crypto_id to HASH_ALGO__LAST. Cc: stable@vger.kernel.org # 5.1.x Fixes: 879b589210a9 ("tpm: retrieve digest size of unknown algorithms with PCR read") Signed-off-by: Roberto Sassu Reviewed-by: Petr Vorel Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/tpm2-cmd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c index 13696deceae8..760329598b99 100644 --- a/drivers/char/tpm/tpm2-cmd.c +++ b/drivers/char/tpm/tpm2-cmd.c @@ -525,6 +525,8 @@ static int tpm2_init_bank_info(struct tpm_chip *chip, u32 bank_index) return 0; } + bank->crypto_id = HASH_ALGO__LAST; + return tpm2_pcr_read(chip, 0, &digest, &bank->digest_size); } -- cgit v1.2.3-59-g8ed1b From 96228b7df33f8eb9006f8ae96949400aed9bd303 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 12 Feb 2020 18:04:33 +0200 Subject: MAINTAINERS: Update drm/i915 bug filing URL We've moved from bugzilla to gitlab. Cc: stable@vger.kernel.org Reviewed-by: Chris Wilson Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200212160434.6437-1-jani.nikula@intel.com (cherry picked from commit 3a6a4f0810c8ade6f1ff63c34aa9834176b9d88b) Signed-off-by: Jani Nikula --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a0d86490c2c6..19dd0d4ffdcc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8392,7 +8392,7 @@ M: Joonas Lahtinen M: Rodrigo Vivi L: intel-gfx@lists.freedesktop.org W: https://01.org/linuxgraphics/ -B: https://01.org/linuxgraphics/documentation/how-report-bugs +B: https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs C: irc://chat.freenode.net/intel-gfx Q: http://patchwork.freedesktop.org/project/intel-gfx/ T: git git://anongit.freedesktop.org/drm-intel -- cgit v1.2.3-59-g8ed1b From 7ddc7005a0aa2f43a826b71f5d6bd7d4b90f8f2a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 12 Feb 2020 18:04:34 +0200 Subject: drm/i915: Update drm/i915 bug filing URL We've moved from bugzilla to gitlab. Cc: stable@vger.kernel.org Reviewed-by: Chris Wilson Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200212160434.6437-2-jani.nikula@intel.com (cherry picked from commit ddae4d7af0bbe3b2051f1603459a8b24e9a19324) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/Kconfig | 5 ++--- drivers/gpu/drm/i915/i915_gpu_error.c | 3 ++- drivers/gpu/drm/i915/i915_utils.c | 5 ++--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index ba9595960bbe..907c4471f591 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -75,9 +75,8 @@ config DRM_I915_CAPTURE_ERROR help This option enables capturing the GPU state when a hang is detected. This information is vital for triaging hangs and assists in debugging. - Please report any hang to - https://bugs.freedesktop.org/enter_bug.cgi?product=DRI - for triaging. + Please report any hang for triaging according to: + https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs If in doubt, say "Y". diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 594341e27a47..9e401a5fcae8 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1852,7 +1852,8 @@ void i915_error_state_store(struct i915_gpu_coredump *error) if (!xchg(&warned, true) && ktime_get_real_seconds() - DRIVER_TIMESTAMP < DAY_AS_SECONDS(180)) { pr_info("GPU hangs can indicate a bug anywhere in the entire gfx stack, including userspace.\n"); - pr_info("Please file a _new_ bug report on bugs.freedesktop.org against DRI -> DRM/Intel\n"); + pr_info("Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/intel/issues/new.\n"); + pr_info("Please see https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs for details.\n"); pr_info("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n"); pr_info("The GPU crash dump is required to analyze GPU hangs, so please always attach it.\n"); pr_info("GPU crash dump saved to /sys/class/drm/card%d/error\n", diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c index c47261ae86ea..632d6953c78d 100644 --- a/drivers/gpu/drm/i915/i915_utils.c +++ b/drivers/gpu/drm/i915/i915_utils.c @@ -8,9 +8,8 @@ #include "i915_drv.h" #include "i915_utils.h" -#define FDO_BUG_URL "https://bugs.freedesktop.org/enter_bug.cgi?product=DRI" -#define FDO_BUG_MSG "Please file a bug at " FDO_BUG_URL " against DRM/Intel " \ - "providing the dmesg log by booting with drm.debug=0xf" +#define FDO_BUG_URL "https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs" +#define FDO_BUG_MSG "Please file a bug on drm/i915; see " FDO_BUG_URL " for details." void __i915_printk(struct drm_i915_private *dev_priv, const char *level, -- cgit v1.2.3-59-g8ed1b From 58e9121c32a245fab47f29ab4ad29dd62470a7e8 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 6 Feb 2020 16:14:16 -0800 Subject: drm/i915/ehl: Update port clock voltage level requirements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Voltage level depends not only on the cdclk, but also on the DDI clock. Last time the bspec voltage level table for EHL was updated, we only updated the cdclk requirements, but forgot to account for the new port clock criteria. Bspec: 21809 Fixes: d147483884ed ("drm/i915/ehl: Update voltage level checks") Cc: José Roberto de Souza Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20200207001417.1229251-1-matthew.d.roper@intel.com Reviewed-by: José Roberto de Souza (cherry picked from commit 9d5fd37ed7e26efdbe90f492d7eb8b53dcdb61d6) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_ddi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 33f1dc3d7c1a..d9a61f341070 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -4251,7 +4251,9 @@ static bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv, void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv, struct intel_crtc_state *crtc_state) { - if (INTEL_GEN(dev_priv) >= 11 && crtc_state->port_clock > 594000) + if (IS_ELKHARTLAKE(dev_priv) && crtc_state->port_clock > 594000) + crtc_state->min_voltage_level = 3; + else if (INTEL_GEN(dev_priv) >= 11 && crtc_state->port_clock > 594000) crtc_state->min_voltage_level = 1; else if (IS_CANNONLAKE(dev_priv) && crtc_state->port_clock > 594000) crtc_state->min_voltage_level = 2; -- cgit v1.2.3-59-g8ed1b From 2e0a576511f656933adfe56ef03b9cf3e64b21b7 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 13 Feb 2020 16:04:11 +0200 Subject: drm/i915/dsc: force full modeset whenever DSC is enabled at probe We lack full state readout of DSC config, which may lead to DSC enable using a config that's all zeros, failing spectacularly. Force full modeset and thus compute config at probe to get a sane state, until we implement DSC state readout. Any fastset that did appear to work with DSC at probe, worked by coincidence. [1] is an example of a change that triggered the issue on TGL DSI DSC. [1] http://patchwork.freedesktop.org/patch/msgid/20200212150102.7600-1-ville.syrjala@linux.intel.com Cc: Manasi Navare Cc: Vandita Kulkarni Cc: Ville Syrjala Cc: stable@vger.kernel.org Fixes: fbacb15ea814 ("drm/i915/dsc: add basic hardware state readout support") Acked-by: Matt Roper Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200213140412.32697-3-stanislav.lisovskiy@intel.com (cherry picked from commit a4277aa398d76db109d6b8420934f68daf69a6c3) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 064dd99bbc49..e68ec25fc97c 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -17433,6 +17433,24 @@ retry: * have readout for pipe gamma enable. */ crtc_state->uapi.color_mgmt_changed = true; + + /* + * FIXME hack to force full modeset when DSC is being + * used. + * + * As long as we do not have full state readout and + * config comparison of crtc_state->dsc, we have no way + * to ensure reliable fastset. Remove once we have + * readout for DSC. + */ + if (crtc_state->dsc.compression_enable) { + ret = drm_atomic_add_affected_connectors(state, + &crtc->base); + if (ret) + goto out; + crtc_state->uapi.mode_changed = true; + drm_dbg_kms(dev, "Force full modeset for DSC\n"); + } } } -- cgit v1.2.3-59-g8ed1b From e543e370ec3160c06c2cd897477150dfb23f1afd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 6 Feb 2020 20:49:12 +0000 Subject: drm/i915/gt: Prevent queuing retire workers on the virtual engine Virtual engines are fleeting. They carry a reference count and may be freed when their last request is retired. This makes them unsuitable for the task of housing engine->retire.work so assert that it is not used. Tvrtko tracked down an instance where we did indeed violate this rule. In virtual_submit_request, we flush a completed request directly with __i915_request_submit and this causes us to queue that request on the veng's breadcrumb list and signal it. Leading us down a path where we should not attach the retire. Reported-by: Tvrtko Ursulin Fixes: dc93c9b69315 ("drm/i915/gt: Schedule request retirement when signaler idles") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200206204915.2636606-1-chris@chris-wilson.co.uk (cherry picked from commit f91d8156ab8afb32447cd2bf3189219bab943f18) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 3 +++ drivers/gpu/drm/i915/gt/intel_gt_requests.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 0ba524a414c6..cbad7fe722ce 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -136,6 +136,9 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) struct intel_engine_cs *engine = container_of(b, struct intel_engine_cs, breadcrumbs); + if (unlikely(intel_engine_is_virtual(engine))) + engine = intel_virtual_engine_get_sibling(engine, 0); + intel_engine_add_retire(engine, tl); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index 7ef1d37970f6..8a5054f21bf8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -99,6 +99,9 @@ static bool add_retire(struct intel_engine_cs *engine, void intel_engine_add_retire(struct intel_engine_cs *engine, struct intel_timeline *tl) { + /* We don't deal well with the engine disappearing beneath us */ + GEM_BUG_ON(intel_engine_is_virtual(engine)); + if (add_retire(engine, tl)) schedule_work(&engine->retire_work); } -- cgit v1.2.3-59-g8ed1b From 19b5f3b419a61808ff2713f1f30b8a88fe14ac9b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 6 Feb 2020 20:49:13 +0000 Subject: drm/i915/gt: Protect defer_request() from new waiters Mika spotted <4>[17436.705441] general protection fault: 0000 [#1] PREEMPT SMP PTI <4>[17436.705447] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 5.5.0+ #1 <4>[17436.705449] Hardware name: System manufacturer System Product Name/Z170M-PLUS, BIOS 3805 05/16/2018 <4>[17436.705512] RIP: 0010:__execlists_submission_tasklet+0xc4d/0x16e0 [i915] <4>[17436.705516] Code: c5 4c 8d 60 e0 75 17 e9 8c 07 00 00 49 8b 44 24 20 49 39 c5 4c 8d 60 e0 0f 84 7a 07 00 00 49 8b 5c 24 08 49 8b 87 80 00 00 00 <48> 39 83 d8 fe ff ff 75 d9 48 8b 83 88 fe ff ff a8 01 0f 84 b6 05 <4>[17436.705518] RSP: 0018:ffffc9000012ce80 EFLAGS: 00010083 <4>[17436.705521] RAX: ffff88822ae42000 RBX: 5a5a5a5a5a5a5a5a RCX: dead000000000122 <4>[17436.705523] RDX: ffff88822ae42588 RSI: ffff8881e32a7908 RDI: ffff8881c429fd48 <4>[17436.705525] RBP: ffffc9000012cf00 R08: ffff88822ae42588 R09: 00000000fffffffe <4>[17436.705527] R10: ffff8881c429fb80 R11: 00000000a677cf08 R12: ffff8881c42a0aa8 <4>[17436.705529] R13: ffff8881c429fd38 R14: ffff88822ae42588 R15: ffff8881c429fb80 <4>[17436.705532] FS: 0000000000000000(0000) GS:ffff88822ed00000(0000) knlGS:0000000000000000 <4>[17436.705534] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[17436.705536] CR2: 00007f858c76d000 CR3: 0000000005610003 CR4: 00000000003606e0 <4>[17436.705538] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 <4>[17436.705540] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 <4>[17436.705542] Call Trace: <4>[17436.705545] <4>[17436.705603] execlists_submission_tasklet+0xc0/0x130 [i915] which is us consuming a partially initialised new waiter in defer_requests(). We can prevent this by initialising the i915_dependency prior to making it visible, and since we are using a concurrent list_add/iterator mark them up to the compiler. Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200206204915.2636606-2-chris@chris-wilson.co.uk (cherry picked from commit f14f27b1663269a81ed62d3961fe70250a1a0623) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_lrc.c | 7 ++++++- drivers/gpu/drm/i915/i915_scheduler.c | 6 ++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index a13a8c4b65ab..0a8a2c8026f1 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1605,6 +1605,11 @@ last_active(const struct intel_engine_execlists *execlists) return *last; } +#define for_each_waiter(p__, rq__) \ + list_for_each_entry_lockless(p__, \ + &(rq__)->sched.waiters_list, \ + wait_link) + static void defer_request(struct i915_request *rq, struct list_head * const pl) { LIST_HEAD(list); @@ -1622,7 +1627,7 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl) GEM_BUG_ON(i915_request_is_active(rq)); list_move_tail(&rq->sched.link, pl); - list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { + for_each_waiter(p, rq) { struct i915_request *w = container_of(p->waiter, typeof(*w), sched); diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 5d96cfba40f8..34b654b4e58a 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -423,8 +423,6 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node, if (!node_signaled(signal)) { INIT_LIST_HEAD(&dep->dfs_link); - list_add(&dep->wait_link, &signal->waiters_list); - list_add(&dep->signal_link, &node->signalers_list); dep->signaler = signal; dep->waiter = node; dep->flags = flags; @@ -434,6 +432,10 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node, !node_started(signal)) node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN; + /* All set, now publish. Beware the lockless walkers. */ + list_add(&dep->signal_link, &node->signalers_list); + list_add_rcu(&dep->wait_link, &signal->waiters_list); + /* * As we do not allow WAIT to preempt inflight requests, * once we have executed a request, along with triggering -- cgit v1.2.3-59-g8ed1b From 96781fd941b39e1f78098009344ebcd7af861c67 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Mon, 17 Feb 2020 00:42:22 -0600 Subject: ASoC: sun8i-codec: Fix setting DAI data format Use the correct mask for this two-bit field. This fixes setting the DAI data format to RIGHT_J or DSP_A. Fixes: 36c684936fae ("ASoC: Add sun8i digital audio codec") Signed-off-by: Samuel Holland Acked-by: Chen-Yu Tsai Cc: stable@kernel.org Link: https://lore.kernel.org/r/20200217064250.15516-7-samuel@sholland.org Signed-off-by: Mark Brown --- sound/soc/sunxi/sun8i-codec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/sunxi/sun8i-codec.c b/sound/soc/sunxi/sun8i-codec.c index 55798bc8eae2..686561df8e13 100644 --- a/sound/soc/sunxi/sun8i-codec.c +++ b/sound/soc/sunxi/sun8i-codec.c @@ -80,6 +80,7 @@ #define SUN8I_SYS_SR_CTRL_AIF1_FS_MASK GENMASK(15, 12) #define SUN8I_SYS_SR_CTRL_AIF2_FS_MASK GENMASK(11, 8) +#define SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT_MASK GENMASK(3, 2) #define SUN8I_AIF1CLK_CTRL_AIF1_WORD_SIZ_MASK GENMASK(5, 4) #define SUN8I_AIF1CLK_CTRL_AIF1_LRCK_DIV_MASK GENMASK(8, 6) #define SUN8I_AIF1CLK_CTRL_AIF1_BCLK_DIV_MASK GENMASK(12, 9) @@ -241,7 +242,7 @@ static int sun8i_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) return -EINVAL; } regmap_update_bits(scodec->regmap, SUN8I_AIF1CLK_CTRL, - BIT(SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT), + SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT_MASK, value << SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT); return 0; -- cgit v1.2.3-59-g8ed1b From a81541041ceb55bcec9a8bb8ad3482263f0a205a Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Mon, 17 Feb 2020 09:31:33 +0100 Subject: net: mscc: fix in frame extraction Each extracted frame on Ocelot has an IFH. The frame and IFH are extracted by reading chuncks of 4 bytes from a register. In case the IFH and frames were read corretly it would try to read the next frame. In case there are no more frames in the queue, it checks if there were any previous errors and in that case clear the queue. But this check will always succeed also when there are no errors. Because when extracting the IFH the error is checked against 4(number of bytes read) and then the error is set only if the extraction of the frame failed. So in a happy case where there are no errors the err variable is still 4. So it could be a case where after the check that there are no more frames in the queue, a frame will arrive in the queue but because the error is not reseted, it would try to flush the queue. So the frame will be lost. The fix consist in resetting the error after reading the IFH. Signed-off-by: Horatiu Vultur Acked-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot_board.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c index b38820849faa..1135a18019c7 100644 --- a/drivers/net/ethernet/mscc/ocelot_board.c +++ b/drivers/net/ethernet/mscc/ocelot_board.c @@ -114,6 +114,14 @@ static irqreturn_t ocelot_xtr_irq_handler(int irq, void *arg) if (err != 4) break; + /* At this point the IFH was read correctly, so it is safe to + * presume that there is no error. The err needs to be reset + * otherwise a frame could come in CPU queue between the while + * condition and the check for error later on. And in that case + * the new frame is just removed and not processed. + */ + err = 0; + ocelot_parse_ifh(ifh, &info); ocelot_port = ocelot->ports[info.port]; -- cgit v1.2.3-59-g8ed1b From 0a923a76d615b4f54456ce6c52865bb0d5b5516d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 16 Feb 2020 20:44:09 -0800 Subject: Documentation/hwmon: fix xdpe12284 Sphinx warnings Fix Sphinx format warnings by adding a blank line. Documentation/hwmon/xdpe12284.rst:28: WARNING: Unexpected indentation. Documentation/hwmon/xdpe12284.rst:29: WARNING: Block quote ends without a blank line; unexpected unindent. Signed-off-by: Randy Dunlap Cc: Jean Delvare Cc: Guenter Roeck Cc: linux-hwmon@vger.kernel.org Cc: Vadim Pasternak Link: https://lore.kernel.org/r/0094c570-dd4c-dc0e-386d-ea1c39b6a582@infradead.org Signed-off-by: Guenter Roeck --- Documentation/hwmon/xdpe12284.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/hwmon/xdpe12284.rst b/Documentation/hwmon/xdpe12284.rst index 6b7ae98cc536..67d1f87808e5 100644 --- a/Documentation/hwmon/xdpe12284.rst +++ b/Documentation/hwmon/xdpe12284.rst @@ -24,6 +24,7 @@ This driver implements support for Infineon Multi-phase XDPE122 family dual loop voltage regulators. The family includes XDPE12284 and XDPE12254 devices. The devices from this family complaint with: + - Intel VR13 and VR13HC rev 1.3, IMVP8 rev 1.2 and IMPVP9 rev 1.3 DC-DC converter specification. - Intel SVID rev 1.9. protocol. -- cgit v1.2.3-59-g8ed1b From 8a9093c79863b58cc2f9874d7ae788f0d622a596 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 17 Feb 2020 15:38:09 -0500 Subject: net: sched: correct flower port blocking tc flower rules that are based on src or dst port blocking are sometimes ineffective due to uninitialized stack data. __skb_flow_dissect() extracts ports from the skb for tc flower to match against. However, the port dissection is not done when when the FLOW_DIS_IS_FRAGMENT bit is set in key_control->flags. All callers of __skb_flow_dissect(), zero-out the key_control field except for fl_classify() as used by the flower classifier. Thus, the FLOW_DIS_IS_FRAGMENT may be set on entry to __skb_flow_dissect(), since key_control is allocated on the stack and may not be initialized. Since key_basic and key_control are present for all flow keys, let's make sure they are initialized. Fixes: 62230715fd24 ("flow_dissector: do not dissect l4 ports for fragments") Co-developed-by: Eric Dumazet Signed-off-by: Eric Dumazet Acked-by: Cong Wang Signed-off-by: Jason Baron Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 9 +++++++++ net/sched/cls_flower.c | 1 + 2 files changed, 10 insertions(+) diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index e9391e877f9a..628383915827 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -5,6 +5,7 @@ #include #include #include +#include #include struct sk_buff; @@ -348,4 +349,12 @@ struct bpf_flow_dissector { void *data_end; }; +static inline void +flow_dissector_init_keys(struct flow_dissector_key_control *key_control, + struct flow_dissector_key_basic *key_basic) +{ + memset(key_control, 0, sizeof(*key_control)); + memset(key_basic, 0, sizeof(*key_basic)); +} + #endif diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 7e54d2ab5254..d32d4233d337 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -305,6 +305,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct cls_fl_filter *f; list_for_each_entry_rcu(mask, &head->masks, list) { + flow_dissector_init_keys(&skb_key.control, &skb_key.basic); fl_clear_masked_range(&skb_key, mask); skb_flow_dissect_meta(skb, &mask->dissector, &skb_key); -- cgit v1.2.3-59-g8ed1b From 245709ec8be89af46ea7ef0444c9c80913999d99 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 18 Feb 2020 12:07:53 +0800 Subject: sctp: move the format error check out of __sctp_sf_do_9_1_abort When T2 timer is to be stopped, the asoc should also be deleted, otherwise, there will be no chance to call sctp_association_free and the asoc could last in memory forever. However, in sctp_sf_shutdown_sent_abort(), after adding the cmd SCTP_CMD_TIMER_STOP for T2 timer, it may return error due to the format error from __sctp_sf_do_9_1_abort() and miss adding SCTP_CMD_ASSOC_FAILED where the asoc will be deleted. This patch is to fix it by moving the format error check out of __sctp_sf_do_9_1_abort(), and do it before adding the cmd SCTP_CMD_TIMER_STOP for T2 timer. Thanks Hangbin for reporting this issue by the fuzz testing. v1->v2: - improve the comment in the code as Marcelo's suggestion. Fixes: 96ca468b86b0 ("sctp: check invalid value of length parameter in error cause") Reported-by: Hangbin Liu Acked-by: Marcelo Ricardo Leitner Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/sm_statefuns.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 748e3b19ec1d..6a16af4b1ef6 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -170,6 +170,16 @@ static inline bool sctp_chunk_length_valid(struct sctp_chunk *chunk, return true; } +/* Check for format error in an ABORT chunk */ +static inline bool sctp_err_chunk_valid(struct sctp_chunk *chunk) +{ + struct sctp_errhdr *err; + + sctp_walk_errors(err, chunk->chunk_hdr); + + return (void *)err == (void *)chunk->chunk_end; +} + /********************************************************** * These are the state functions for handling chunk events. **********************************************************/ @@ -2255,6 +2265,9 @@ enum sctp_disposition sctp_sf_shutdown_pending_abort( sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + if (!sctp_err_chunk_valid(chunk)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } @@ -2298,6 +2311,9 @@ enum sctp_disposition sctp_sf_shutdown_sent_abort( sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + if (!sctp_err_chunk_valid(chunk)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Stop the T2-shutdown timer. */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN)); @@ -2565,6 +2581,9 @@ enum sctp_disposition sctp_sf_do_9_1_abort( sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + if (!sctp_err_chunk_valid(chunk)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } @@ -2582,16 +2601,8 @@ static enum sctp_disposition __sctp_sf_do_9_1_abort( /* See if we have an error cause code in the chunk. */ len = ntohs(chunk->chunk_hdr->length); - if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) { - struct sctp_errhdr *err; - - sctp_walk_errors(err, chunk->chunk_hdr); - if ((void *)err != (void *)chunk->chunk_end) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, - commands); - + if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) error = ((struct sctp_errhdr *)chunk->skb->data)->cause; - } sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET)); /* ASSOC_FAILED will DELETE_TCB. */ -- cgit v1.2.3-59-g8ed1b From 82969e6ef0430a31d58342009e64c7634512eb53 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 14 Feb 2020 15:32:24 +0100 Subject: net: cnic: fix spelling mistake "reserverd" -> "reserved" The reserved member should be named reserved3. Signed-off-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/cnic_defs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/cnic_defs.h b/drivers/net/ethernet/broadcom/cnic_defs.h index b38499774071..99e2c6d4d8c3 100644 --- a/drivers/net/ethernet/broadcom/cnic_defs.h +++ b/drivers/net/ethernet/broadcom/cnic_defs.h @@ -543,13 +543,13 @@ struct l4_kwq_update_pg { #define L4_KWQ_UPDATE_PG_RESERVERD2_SHIFT 2 #endif #if defined(__BIG_ENDIAN) - u16 reserverd3; + u16 reserved3; u8 da0; u8 da1; #elif defined(__LITTLE_ENDIAN) u8 da1; u8 da0; - u16 reserverd3; + u16 reserved3; #endif #if defined(__BIG_ENDIAN) u8 da2; -- cgit v1.2.3-59-g8ed1b From 9b64208f74fbd0e920475ecfe9326f8443fdc3a5 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 17 Feb 2020 11:43:15 +0800 Subject: selftests: forwarding: vxlan_bridge_1d: use more proper tos value 0x11 and 0x12 set the ECN bits based on RFC2474, it would be better to avoid that. 0x14 and 0x18 would be better and works as well. Reported-by: Petr Machata Fixes: 4e867c9a50ff ("selftests: forwarding: vxlan_bridge_1d: fix tos value") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index 353613fc1947..ce6bea9675c0 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -516,9 +516,9 @@ test_tos() RET=0 tc filter add dev v1 egress pref 77 prot ip \ - flower ip_tos 0x11 action pass - vxlan_ping_test $h1 192.0.2.3 "-Q 0x11" v1 egress 77 10 - vxlan_ping_test $h1 192.0.2.3 "-Q 0x12" v1 egress 77 0 + flower ip_tos 0x14 action pass + vxlan_ping_test $h1 192.0.2.3 "-Q 0x14" v1 egress 77 10 + vxlan_ping_test $h1 192.0.2.3 "-Q 0x18" v1 egress 77 0 tc filter del dev v1 egress pref 77 prot ip log_test "VXLAN: envelope TOS inheritance" -- cgit v1.2.3-59-g8ed1b From aa3146193ae25d0fe4b96d815169a135db2e8f01 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 2 Feb 2020 15:39:34 +0000 Subject: drm/i915: Wean off drm_pci_alloc/drm_pci_free drm_pci_alloc and drm_pci_free are just very thin wrappers around dma_alloc_coherent, with a note that we should be removing them. Furthermore since commit de09d31dd38a50fdce106c15abd68432eebbd014 Author: Kirill A. Shutemov Date: Fri Jan 15 16:51:42 2016 -0800 page-flags: define PG_reserved behavior on compound pages As far as I can see there's no users of PG_reserved on compound pages. Let's use PF_NO_COMPOUND here. drm_pci_alloc has been declared broken since it mixes GFP_COMP and SetPageReserved. Avoid this conflict by weaning ourselves off using the abstraction and using the dma functions directly. Reported-by: Taketo Kabe Closes: https://gitlab.freedesktop.org/drm/intel/issues/1027 Fixes: de09d31dd38a ("page-flags: define PG_reserved behavior on compound pages") Signed-off-by: Chris Wilson Cc: # v4.5+ Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20200202153934.3899472-1-chris@chris-wilson.co.uk (cherry picked from commit c6790dc22312f592c1434577258b31c48c72d52a) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 3 - drivers/gpu/drm/i915/gem/i915_gem_phys.c | 98 ++++++++++++------------ drivers/gpu/drm/i915/i915_gem.c | 8 +- 4 files changed, 55 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index e68ec25fc97c..aa453953908b 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -11087,7 +11087,7 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state) u32 base; if (INTEL_INFO(dev_priv)->display.cursor_needs_physical) - base = obj->phys_handle->busaddr; + base = sg_dma_address(obj->mm.pages->sgl); else base = intel_plane_ggtt_offset(plane_state); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index f64ad77e6b1e..c2174da35bb0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -285,9 +285,6 @@ struct drm_i915_gem_object { void *gvt_info; }; - - /** for phys allocated objects */ - struct drm_dma_handle *phys_handle; }; static inline struct drm_i915_gem_object * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index b1b7c1b3038a..b07bb40edd5a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -22,88 +22,87 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) { struct address_space *mapping = obj->base.filp->f_mapping; - struct drm_dma_handle *phys; - struct sg_table *st; struct scatterlist *sg; - char *vaddr; + struct sg_table *st; + dma_addr_t dma; + void *vaddr; + void *dst; int i; - int err; if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) return -EINVAL; - /* Always aligning to the object size, allows a single allocation + /* + * Always aligning to the object size, allows a single allocation * to handle all possible callers, and given typical object sizes, * the alignment of the buddy allocation will naturally match. */ - phys = drm_pci_alloc(obj->base.dev, - roundup_pow_of_two(obj->base.size), - roundup_pow_of_two(obj->base.size)); - if (!phys) + vaddr = dma_alloc_coherent(&obj->base.dev->pdev->dev, + roundup_pow_of_two(obj->base.size), + &dma, GFP_KERNEL); + if (!vaddr) return -ENOMEM; - vaddr = phys->vaddr; + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + goto err_pci; + + if (sg_alloc_table(st, 1, GFP_KERNEL)) + goto err_st; + + sg = st->sgl; + sg->offset = 0; + sg->length = obj->base.size; + + sg_assign_page(sg, (struct page *)vaddr); + sg_dma_address(sg) = dma; + sg_dma_len(sg) = obj->base.size; + + dst = vaddr; for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { struct page *page; - char *src; + void *src; page = shmem_read_mapping_page(mapping, i); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto err_phys; - } + if (IS_ERR(page)) + goto err_st; src = kmap_atomic(page); - memcpy(vaddr, src, PAGE_SIZE); - drm_clflush_virt_range(vaddr, PAGE_SIZE); + memcpy(dst, src, PAGE_SIZE); + drm_clflush_virt_range(dst, PAGE_SIZE); kunmap_atomic(src); put_page(page); - vaddr += PAGE_SIZE; + dst += PAGE_SIZE; } intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) { - err = -ENOMEM; - goto err_phys; - } - - if (sg_alloc_table(st, 1, GFP_KERNEL)) { - kfree(st); - err = -ENOMEM; - goto err_phys; - } - - sg = st->sgl; - sg->offset = 0; - sg->length = obj->base.size; - - sg_dma_address(sg) = phys->busaddr; - sg_dma_len(sg) = obj->base.size; - - obj->phys_handle = phys; - __i915_gem_object_set_pages(obj, st, sg->length); return 0; -err_phys: - drm_pci_free(obj->base.dev, phys); - - return err; +err_st: + kfree(st); +err_pci: + dma_free_coherent(&obj->base.dev->pdev->dev, + roundup_pow_of_two(obj->base.size), + vaddr, dma); + return -ENOMEM; } static void i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, struct sg_table *pages) { + dma_addr_t dma = sg_dma_address(pages->sgl); + void *vaddr = sg_page(pages->sgl); + __i915_gem_object_release_shmem(obj, pages, false); if (obj->mm.dirty) { struct address_space *mapping = obj->base.filp->f_mapping; - char *vaddr = obj->phys_handle->vaddr; + void *src = vaddr; int i; for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { @@ -115,15 +114,16 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, continue; dst = kmap_atomic(page); - drm_clflush_virt_range(vaddr, PAGE_SIZE); - memcpy(dst, vaddr, PAGE_SIZE); + drm_clflush_virt_range(src, PAGE_SIZE); + memcpy(dst, src, PAGE_SIZE); kunmap_atomic(dst); set_page_dirty(page); if (obj->mm.madv == I915_MADV_WILLNEED) mark_page_accessed(page); put_page(page); - vaddr += PAGE_SIZE; + + src += PAGE_SIZE; } obj->mm.dirty = false; } @@ -131,7 +131,9 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, sg_free_table(pages); kfree(pages); - drm_pci_free(obj->base.dev, obj->phys_handle); + dma_free_coherent(&obj->base.dev->pdev->dev, + roundup_pow_of_two(obj->base.size), + vaddr, dma); } static void phys_release(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c2de2f45b459..5f6e63952821 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -180,7 +180,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, struct drm_i915_gem_pwrite *args, struct drm_file *file) { - void *vaddr = obj->phys_handle->vaddr + args->offset; + void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset; char __user *user_data = u64_to_user_ptr(args->data_ptr); /* @@ -844,10 +844,10 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, ret = i915_gem_gtt_pwrite_fast(obj, args); if (ret == -EFAULT || ret == -ENOSPC) { - if (obj->phys_handle) - ret = i915_gem_phys_pwrite(obj, args, file); - else + if (i915_gem_object_has_struct_page(obj)) ret = i915_gem_shmem_pwrite(obj, args); + else + ret = i915_gem_phys_pwrite(obj, args, file); } i915_gem_object_unpin_pages(obj); -- cgit v1.2.3-59-g8ed1b From b1339ecac661e1cf3e1dc78ac56bff3aeeaeb92c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 7 Feb 2020 21:14:52 +0000 Subject: drm/i915/execlists: Always force a context reload when rewinding RING_TAIL If we rewind the RING_TAIL on a context, due to a preemption event, we must force the context restore for the RING_TAIL update to be properly handled. Rather than note which preemption events may cause us to rewind the tail, compare the new request's tail with the previously submitted RING_TAIL, as it turns out that timeslicing was causing unexpected rewinds. -0 0d.s2 1280851190us : __execlists_submission_tasklet: 0000:00:02.0 rcs0: expired last=130:4698, prio=3, hint=3 -0 0d.s2 1280851192us : __i915_request_unsubmit: 0000:00:02.0 rcs0: fence 66:119966, current 119964 -0 0d.s2 1280851195us : __i915_request_unsubmit: 0000:00:02.0 rcs0: fence 130:4698, current 4695 -0 0d.s2 1280851198us : __i915_request_unsubmit: 0000:00:02.0 rcs0: fence 130:4696, current 4695 ^---- Note we unwind 2 requests from the same context -0 0d.s2 1280851208us : __i915_request_submit: 0000:00:02.0 rcs0: fence 130:4696, current 4695 -0 0d.s2 1280851213us : __i915_request_submit: 0000:00:02.0 rcs0: fence 134:1508, current 1506 ^---- But to apply the new timeslice, we have to replay the first request before the new client can start -- the unexpected RING_TAIL rewind -0 0d.s2 1280851219us : trace_ports: 0000:00:02.0 rcs0: submit { 130:4696*, 134:1508 } synmark2-5425 2..s. 1280851239us : process_csb: 0000:00:02.0 rcs0: cs-irq head=5, tail=0 synmark2-5425 2..s. 1280851240us : process_csb: 0000:00:02.0 rcs0: csb[0]: status=0x00008002:0x00000000 ^---- Preemption event for the ELSP update; note the lite-restore synmark2-5425 2..s. 1280851243us : trace_ports: 0000:00:02.0 rcs0: preempted { 130:4698, 66:119966 } synmark2-5425 2..s. 1280851246us : trace_ports: 0000:00:02.0 rcs0: promote { 130:4696*, 134:1508 } synmark2-5425 2.... 1280851462us : __i915_request_commit: 0000:00:02.0 rcs0: fence 130:4700, current 4695 synmark2-5425 2.... 1280852111us : __i915_request_commit: 0000:00:02.0 rcs0: fence 130:4702, current 4695 synmark2-5425 2.Ns1 1280852296us : process_csb: 0000:00:02.0 rcs0: cs-irq head=0, tail=2 synmark2-5425 2.Ns1 1280852297us : process_csb: 0000:00:02.0 rcs0: csb[1]: status=0x00000814:0x00000000 synmark2-5425 2.Ns1 1280852299us : trace_ports: 0000:00:02.0 rcs0: completed { 130:4696!, 134:1508 } synmark2-5425 2.Ns1 1280852301us : process_csb: 0000:00:02.0 rcs0: csb[2]: status=0x00000818:0x00000040 synmark2-5425 2.Ns1 1280852302us : trace_ports: 0000:00:02.0 rcs0: completed { 134:1508, 0:0 } synmark2-5425 2.Ns1 1280852313us : process_csb: process_csb:2336 GEM_BUG_ON(!i915_request_completed(*execlists->active) && !reset_in_progress(execlists)) Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing") Referenecs: 82c69bf58650 ("drm/i915/gt: Detect if we miss WaIdleLiteRestore") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Cc: # v5.4+ Link: https://patchwork.freedesktop.org/patch/msgid/20200207211452.2860634-1-chris@chris-wilson.co.uk (cherry picked from commit 5ba32c7be81e53ea8a27190b0f6be98e6c6779af) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_lrc.c | 18 ++++++++---------- drivers/gpu/drm/i915/gt/intel_ring.c | 1 + drivers/gpu/drm/i915/gt/intel_ring.h | 8 ++++++++ drivers/gpu/drm/i915/gt/intel_ring_types.h | 1 + 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 0a8a2c8026f1..438d7a97d45c 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1321,7 +1321,7 @@ static u64 execlists_update_context(struct i915_request *rq) { struct intel_context *ce = rq->context; u64 desc = ce->lrc_desc; - u32 tail; + u32 tail, prev; /* * WaIdleLiteRestore:bdw,skl @@ -1334,9 +1334,15 @@ static u64 execlists_update_context(struct i915_request *rq) * subsequent resubmissions (for lite restore). Should that fail us, * and we try and submit the same tail again, force the context * reload. + * + * If we need to return to a preempted context, we need to skip the + * lite-restore and force it to reload the RING_TAIL. Otherwise, the + * HW has a tendency to ignore us rewinding the TAIL to the end of + * an earlier request. */ tail = intel_ring_set_tail(rq->ring, rq->tail); - if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL] == tail)) + prev = ce->lrc_reg_state[CTX_RING_TAIL]; + if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0)) desc |= CTX_DESC_FORCE_RESTORE; ce->lrc_reg_state[CTX_RING_TAIL] = tail; rq->tail = rq->wa_tail; @@ -1839,14 +1845,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) */ __unwind_incomplete_requests(engine); - /* - * If we need to return to the preempted context, we - * need to skip the lite-restore and force it to - * reload the RING_TAIL. Otherwise, the HW has a - * tendency to ignore us rewinding the TAIL to the - * end of an earlier request. - */ - last->context->lrc_desc |= CTX_DESC_FORCE_RESTORE; last = NULL; } else if (need_timeslice(engine, last) && timer_expired(&engine->execlists.timer)) { diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index 374b28f13ca0..6ff803f397c4 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -145,6 +145,7 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size) kref_init(&ring->ref); ring->size = size; + ring->wrap = BITS_PER_TYPE(ring->size) - ilog2(size); /* * Workaround an erratum on the i830 which causes a hang if diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h index ea2839d9e044..5bdce24994aa 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.h +++ b/drivers/gpu/drm/i915/gt/intel_ring.h @@ -56,6 +56,14 @@ static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) return pos & (ring->size - 1); } +static inline int intel_ring_direction(const struct intel_ring *ring, + u32 next, u32 prev) +{ + typecheck(typeof(ring->size), next); + typecheck(typeof(ring->size), prev); + return (next - prev) << ring->wrap; +} + static inline bool intel_ring_offset_valid(const struct intel_ring *ring, unsigned int pos) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_types.h b/drivers/gpu/drm/i915/gt/intel_ring_types.h index d9f17f38e0cc..3cd7fec7fd8d 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_types.h +++ b/drivers/gpu/drm/i915/gt/intel_ring_types.h @@ -45,6 +45,7 @@ struct intel_ring { u32 space; u32 size; + u32 wrap; u32 effective_size; }; -- cgit v1.2.3-59-g8ed1b From 15de9cb5c9c83a23be92b8f7a1178cead1486587 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 11 Feb 2020 12:01:31 +0000 Subject: drm/i915/gt: Avoid resetting ring->head outside of its timeline mutex We manipulate ring->head while active in i915_request_retire underneath the timeline manipulation. We cannot rely on a stable ring->head outside of the timeline->mutex, in particular while setting up the context for resume and reset. Closes: https://gitlab.freedesktop.org/drm/intel/issues/1126 Fixes: 0881954965e3 ("drm/i915: Introduce intel_context.pin_mutex for pin management") Fixes: e5dadff4b093 ("drm/i915: Protect request retirement with timeline->mutex") References: f3c0efc9fe7a ("drm/i915/execlists: Leave resetting ring to intel_ring") Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Andi Shyti Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200211120131.958949-1-chris@chris-wilson.co.uk (cherry picked from commit 42827350f75c56d0fe9f15d8425a1390528958b6) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_lrc.c | 36 ++++++++++++++---------------- drivers/gpu/drm/i915/gt/intel_ring_types.h | 6 ++--- drivers/gpu/drm/i915/gt/selftest_lrc.c | 2 +- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 438d7a97d45c..fe8a59aaa629 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -237,7 +237,8 @@ static void execlists_init_reg_state(u32 *reg_state, bool close); static void __execlists_update_reg_state(const struct intel_context *ce, - const struct intel_engine_cs *engine); + const struct intel_engine_cs *engine, + u32 head); static void mark_eio(struct i915_request *rq) { @@ -1186,12 +1187,11 @@ static void reset_active(struct i915_request *rq, head = rq->tail; else head = active_request(ce->timeline, rq)->head; - ce->ring->head = intel_ring_wrap(ce->ring, head); - intel_ring_update_space(ce->ring); + head = intel_ring_wrap(ce->ring, head); /* Scrub the context image to prevent replaying the previous batch */ restore_default_state(ce, engine); - __execlists_update_reg_state(ce, engine); + __execlists_update_reg_state(ce, engine, head); /* We've switched away, so this should be a no-op, but intent matters */ ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; @@ -2863,16 +2863,17 @@ static void execlists_context_unpin(struct intel_context *ce) static void __execlists_update_reg_state(const struct intel_context *ce, - const struct intel_engine_cs *engine) + const struct intel_engine_cs *engine, + u32 head) { struct intel_ring *ring = ce->ring; u32 *regs = ce->lrc_reg_state; - GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head)); + GEM_BUG_ON(!intel_ring_offset_valid(ring, head)); GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); - regs[CTX_RING_HEAD] = ring->head; + regs[CTX_RING_HEAD] = head; regs[CTX_RING_TAIL] = ring->tail; /* RPCS */ @@ -2901,7 +2902,7 @@ __execlists_context_pin(struct intel_context *ce, ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; - __execlists_update_reg_state(ce, engine); + __execlists_update_reg_state(ce, engine, ce->ring->tail); return 0; } @@ -2942,7 +2943,7 @@ static void execlists_context_reset(struct intel_context *ce) /* Scrub away the garbage */ execlists_init_reg_state(ce->lrc_reg_state, ce, ce->engine, ce->ring, true); - __execlists_update_reg_state(ce, ce->engine); + __execlists_update_reg_state(ce, ce->engine, ce->ring->tail); ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; } @@ -3497,6 +3498,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) struct intel_engine_execlists * const execlists = &engine->execlists; struct intel_context *ce; struct i915_request *rq; + u32 head; mb(); /* paranoia: read the CSB pointers from after the reset */ clflush(execlists->csb_write); @@ -3524,15 +3526,15 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) if (i915_request_completed(rq)) { /* Idle context; tidy up the ring so we can restart afresh */ - ce->ring->head = intel_ring_wrap(ce->ring, rq->tail); + head = intel_ring_wrap(ce->ring, rq->tail); goto out_replay; } /* Context has requests still in-flight; it should not be idle! */ GEM_BUG_ON(i915_active_is_idle(&ce->active)); rq = active_request(ce->timeline, rq); - ce->ring->head = intel_ring_wrap(ce->ring, rq->head); - GEM_BUG_ON(ce->ring->head == ce->ring->tail); + head = intel_ring_wrap(ce->ring, rq->head); + GEM_BUG_ON(head == ce->ring->tail); /* * If this request hasn't started yet, e.g. it is waiting on a @@ -3577,10 +3579,9 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) out_replay: ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n", - ce->ring->head, ce->ring->tail); - intel_ring_update_space(ce->ring); + head, ce->ring->tail); __execlists_reset_reg_state(ce, engine); - __execlists_update_reg_state(ce, engine); + __execlists_update_reg_state(ce, engine, head); ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ unwind: @@ -5223,10 +5224,7 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, restore_default_state(ce, engine); /* Rerun the request; its payload has been neutered (if guilty). */ - ce->ring->head = head; - intel_ring_update_space(ce->ring); - - __execlists_update_reg_state(ce, engine); + __execlists_update_reg_state(ce, engine, head); } bool diff --git a/drivers/gpu/drm/i915/gt/intel_ring_types.h b/drivers/gpu/drm/i915/gt/intel_ring_types.h index 3cd7fec7fd8d..1a189ea00fd8 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_types.h +++ b/drivers/gpu/drm/i915/gt/intel_ring_types.h @@ -39,9 +39,9 @@ struct intel_ring { */ atomic_t pin_count; - u32 head; - u32 tail; - u32 emit; + u32 head; /* updated during retire, loosely tracks RING_HEAD */ + u32 tail; /* updated on submission, used for RING_TAIL */ + u32 emit; /* updated during request construction */ u32 space; u32 size; diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 65718ca2326e..b292f8cbd0bf 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -186,7 +186,7 @@ static int live_unlite_restore(struct intel_gt *gt, int prio) } GEM_BUG_ON(!ce[1]->ring->size); intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); - __execlists_update_reg_state(ce[1], engine); + __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head); rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); if (IS_ERR(rq[0])) { -- cgit v1.2.3-59-g8ed1b From cc5049ae4d457194796f854eb2e38b9727ad8c2d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 18 Feb 2020 09:09:15 +0100 Subject: ALSA: hda/realtek - Apply quirk for yet another MSI laptop MSI GP65 laptop with SSID 1462:1293 requires the same quirk as other MSI models. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=204159 Cc: Link: https://lore.kernel.org/r/20200218080915.3433-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 82485e06dde1..477589e7ec1d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2449,6 +2449,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), SND_PCI_QUIRK(0x1462, 0x1228, "MSI-GP63", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1276, "MSI-GL73", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1462, 0x1293, "MSI-GP65", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD), SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3), -- cgit v1.2.3-59-g8ed1b From 44eeb081b8630bb3ad3cd381d1ae1831463e48bb Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 18 Feb 2020 10:14:09 +0100 Subject: ALSA: hda: Use scnprintf() for printing texts for sysfs/procfs Some code in HD-audio driver calls snprintf() in a loop and still expects that the return value were actually written size, while snprintf() returns the expected would-be length instead. When the given buffer limit were small, this leads to a buffer overflow. Use scnprintf() for addressing those issues. It returns the actually written size unlike snprintf(). Cc: Link: https://lore.kernel.org/r/20200218091409.27162-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/hda/hdmi_chmap.c | 2 +- sound/pci/hda/hda_codec.c | 2 +- sound/pci/hda/hda_eld.c | 2 +- sound/pci/hda/hda_sysfs.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/hda/hdmi_chmap.c b/sound/hda/hdmi_chmap.c index 5fd6d575e123..aad5c4bf4d34 100644 --- a/sound/hda/hdmi_chmap.c +++ b/sound/hda/hdmi_chmap.c @@ -250,7 +250,7 @@ void snd_hdac_print_channel_allocation(int spk_alloc, char *buf, int buflen) for (i = 0, j = 0; i < ARRAY_SIZE(cea_speaker_allocation_names); i++) { if (spk_alloc & (1 << i)) - j += snprintf(buf + j, buflen - j, " %s", + j += scnprintf(buf + j, buflen - j, " %s", cea_speaker_allocation_names[i]); } buf[j] = '\0'; /* necessary when j == 0 */ diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 5dc42f932739..53e7732ef752 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -4022,7 +4022,7 @@ void snd_print_pcm_bits(int pcm, char *buf, int buflen) for (i = 0, j = 0; i < ARRAY_SIZE(bits); i++) if (pcm & (AC_SUPPCM_BITS_8 << i)) - j += snprintf(buf + j, buflen - j, " %d", bits[i]); + j += scnprintf(buf + j, buflen - j, " %d", bits[i]); buf[j] = '\0'; /* necessary when j == 0 */ } diff --git a/sound/pci/hda/hda_eld.c b/sound/pci/hda/hda_eld.c index bb46c89b7f63..136477ed46ae 100644 --- a/sound/pci/hda/hda_eld.c +++ b/sound/pci/hda/hda_eld.c @@ -360,7 +360,7 @@ static void hdmi_print_pcm_rates(int pcm, char *buf, int buflen) for (i = 0, j = 0; i < ARRAY_SIZE(alsa_rates); i++) if (pcm & (1 << i)) - j += snprintf(buf + j, buflen - j, " %d", + j += scnprintf(buf + j, buflen - j, " %d", alsa_rates[i]); buf[j] = '\0'; /* necessary when j == 0 */ diff --git a/sound/pci/hda/hda_sysfs.c b/sound/pci/hda/hda_sysfs.c index 0607ed5d1959..eb8ec109d7ad 100644 --- a/sound/pci/hda/hda_sysfs.c +++ b/sound/pci/hda/hda_sysfs.c @@ -222,7 +222,7 @@ static ssize_t init_verbs_show(struct device *dev, int i, len = 0; mutex_lock(&codec->user_mutex); snd_array_for_each(&codec->init_verbs, i, v) { - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "0x%02x 0x%03x 0x%04x\n", v->nid, v->verb, v->param); } @@ -272,7 +272,7 @@ static ssize_t hints_show(struct device *dev, int i, len = 0; mutex_lock(&codec->user_mutex); snd_array_for_each(&codec->hints, i, hint) { - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "%s = %s\n", hint->key, hint->val); } mutex_unlock(&codec->user_mutex); -- cgit v1.2.3-59-g8ed1b From 2464cc4c345699adea52c7aef75707207cb8a2f6 Mon Sep 17 00:00:00 2001 From: Gustavo Luiz Duarte Date: Tue, 11 Feb 2020 00:38:29 -0300 Subject: powerpc/tm: Fix clearing MSR[TS] in current when reclaiming on signal delivery After a treclaim, we expect to be in non-transactional state. If we don't clear the current thread's MSR[TS] before we get preempted, then tm_recheckpoint_new_task() will recheckpoint and we get rescheduled in suspended transaction state. When handling a signal caught in transactional state, handle_rt_signal64() calls get_tm_stackpointer() that treclaims the transaction using tm_reclaim_current() but without clearing the thread's MSR[TS]. This can cause the TM Bad Thing exception below if later we pagefault and get preempted trying to access the user's sigframe, using __put_user(). Afterwards, when we are rescheduled back into do_page_fault() (but now in suspended state since the thread's MSR[TS] was not cleared), upon executing 'rfid' after completion of the page fault handling, the exception is raised because a transition from suspended to non-transactional state is invalid. Unexpected TM Bad Thing exception at c00000000000de44 (msr 0x8000000302a03031) tm_scratch=800000010280b033 Oops: Unrecoverable exception, sig: 6 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries CPU: 25 PID: 15547 Comm: a.out Not tainted 5.4.0-rc2 #32 NIP: c00000000000de44 LR: c000000000034728 CTR: 0000000000000000 REGS: c00000003fe7bd70 TRAP: 0700 Not tainted (5.4.0-rc2) MSR: 8000000302a03031 CR: 44000884 XER: 00000000 CFAR: c00000000000dda4 IRQMASK: 0 PACATMSCRATCH: 800000010280b033 GPR00: c000000000034728 c000000f65a17c80 c000000001662800 00007fffacf3fd78 GPR04: 0000000000001000 0000000000001000 0000000000000000 c000000f611f8af0 GPR08: 0000000000000000 0000000078006001 0000000000000000 000c000000000000 GPR12: c000000f611f84b0 c00000003ffcb200 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 0000000000000000 c000000f611f8140 GPR24: 0000000000000000 00007fffacf3fd68 c000000f65a17d90 c000000f611f7800 GPR28: c000000f65a17e90 c000000f65a17e90 c000000001685e18 00007fffacf3f000 NIP [c00000000000de44] fast_exception_return+0xf4/0x1b0 LR [c000000000034728] handle_rt_signal64+0x78/0xc50 Call Trace: [c000000f65a17c80] [c000000000034710] handle_rt_signal64+0x60/0xc50 (unreliable) [c000000f65a17d30] [c000000000023640] do_notify_resume+0x330/0x460 [c000000f65a17e20] [c00000000000dcc4] ret_from_except_lite+0x70/0x74 Instruction dump: 7c4ff120 e8410170 7c5a03a6 38400000 f8410060 e8010070 e8410080 e8610088 60000000 60000000 e8810090 e8210078 <4c000024> 48000000 e8610178 88ed0989 ---[ end trace 93094aa44b442f87 ]--- The simplified sequence of events that triggers the above exception is: ... # userspace in NON-TRANSACTIONAL state tbegin # userspace in TRANSACTIONAL state signal delivery # kernelspace in SUSPENDED state handle_rt_signal64() get_tm_stackpointer() treclaim # kernelspace in NON-TRANSACTIONAL state __put_user() page fault happens. We will never get back here because of the TM Bad Thing exception. page fault handling kicks in and we voluntarily preempt ourselves do_page_fault() __schedule() __switch_to(other_task) our task is rescheduled and we recheckpoint because the thread's MSR[TS] was not cleared __switch_to(our_task) switch_to_tm() tm_recheckpoint_new_task() trechkpt # kernelspace in SUSPENDED state The page fault handling resumes, but now we are in suspended transaction state do_page_fault() completes rfid <----- trying to get back where the page fault happened (we were non-transactional back then) TM Bad Thing # illegal transition from suspended to non-transactional This patch fixes that issue by clearing the current thread's MSR[TS] just after treclaim in get_tm_stackpointer() so that we stay in non-transactional state in case we are preempted. In order to make treclaim and clearing the thread's MSR[TS] atomic from a preemption perspective when CONFIG_PREEMPT is set, preempt_disable/enable() is used. It's also necessary to save the previous value of the thread's MSR before get_tm_stackpointer() is called so that it can be exposed to the signal handler later in setup_tm_sigcontexts() to inform the userspace MSR at the moment of the signal delivery. Found with tm-signal-context-force-tm kernel selftest. Fixes: 2b0a576d15e0 ("powerpc: Add new transactional memory state to the signal context") Cc: stable@vger.kernel.org # v3.9 Signed-off-by: Gustavo Luiz Duarte Acked-by: Michael Neuling Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200211033831.11165-1-gustavold@linux.ibm.com --- arch/powerpc/kernel/signal.c | 17 +++++++++++++++-- arch/powerpc/kernel/signal_32.c | 28 ++++++++++++++-------------- arch/powerpc/kernel/signal_64.c | 22 ++++++++++------------ 3 files changed, 39 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index e6c30cee6abf..d215f9554553 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -200,14 +200,27 @@ unsigned long get_tm_stackpointer(struct task_struct *tsk) * normal/non-checkpointed stack pointer. */ + unsigned long ret = tsk->thread.regs->gpr[1]; + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM BUG_ON(tsk != current); if (MSR_TM_ACTIVE(tsk->thread.regs->msr)) { + preempt_disable(); tm_reclaim_current(TM_CAUSE_SIGNAL); if (MSR_TM_TRANSACTIONAL(tsk->thread.regs->msr)) - return tsk->thread.ckpt_regs.gpr[1]; + ret = tsk->thread.ckpt_regs.gpr[1]; + + /* + * If we treclaim, we must clear the current thread's TM bits + * before re-enabling preemption. Otherwise we might be + * preempted and have the live MSR[TS] changed behind our back + * (tm_recheckpoint_new_task() would recheckpoint). Besides, we + * enter the signal handler in non-transactional state. + */ + tsk->thread.regs->msr &= ~MSR_TS_MASK; + preempt_enable(); } #endif - return tsk->thread.regs->gpr[1]; + return ret; } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 98600b276f76..1b090a76b444 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -489,19 +489,11 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, */ static int save_tm_user_regs(struct pt_regs *regs, struct mcontext __user *frame, - struct mcontext __user *tm_frame, int sigret) + struct mcontext __user *tm_frame, int sigret, + unsigned long msr) { - unsigned long msr = regs->msr; - WARN_ON(tm_suspend_disabled); - /* Remove TM bits from thread's MSR. The MSR in the sigcontext - * just indicates to userland that we were doing a transaction, but we - * don't want to return in transactional state. This also ensures - * that flush_fp_to_thread won't set TIF_RESTORE_TM again. - */ - regs->msr &= ~MSR_TS_MASK; - /* Save both sets of general registers */ if (save_general_regs(¤t->thread.ckpt_regs, frame) || save_general_regs(regs, tm_frame)) @@ -912,6 +904,10 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, int sigret; unsigned long tramp; struct pt_regs *regs = tsk->thread.regs; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Save the thread's msr before get_tm_stackpointer() changes it */ + unsigned long msr = regs->msr; +#endif BUG_ON(tsk != current); @@ -944,13 +940,13 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, #ifdef CONFIG_PPC_TRANSACTIONAL_MEM tm_frame = &rt_sf->uc_transact.uc_mcontext; - if (MSR_TM_ACTIVE(regs->msr)) { + if (MSR_TM_ACTIVE(msr)) { if (__put_user((unsigned long)&rt_sf->uc_transact, &rt_sf->uc.uc_link) || __put_user((unsigned long)tm_frame, &rt_sf->uc_transact.uc_regs)) goto badframe; - if (save_tm_user_regs(regs, frame, tm_frame, sigret)) + if (save_tm_user_regs(regs, frame, tm_frame, sigret, msr)) goto badframe; } else @@ -1369,6 +1365,10 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, int sigret; unsigned long tramp; struct pt_regs *regs = tsk->thread.regs; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Save the thread's msr before get_tm_stackpointer() changes it */ + unsigned long msr = regs->msr; +#endif BUG_ON(tsk != current); @@ -1402,9 +1402,9 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, #ifdef CONFIG_PPC_TRANSACTIONAL_MEM tm_mctx = &frame->mctx_transact; - if (MSR_TM_ACTIVE(regs->msr)) { + if (MSR_TM_ACTIVE(msr)) { if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact, - sigret)) + sigret, msr)) goto badframe; } else diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 117515564ec7..84ed2e77ef9c 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -192,7 +192,8 @@ static long setup_sigcontext(struct sigcontext __user *sc, static long setup_tm_sigcontexts(struct sigcontext __user *sc, struct sigcontext __user *tm_sc, struct task_struct *tsk, - int signr, sigset_t *set, unsigned long handler) + int signr, sigset_t *set, unsigned long handler, + unsigned long msr) { /* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the * process never used altivec yet (MSR_VEC is zero in pt_regs of @@ -207,12 +208,11 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, elf_vrreg_t __user *tm_v_regs = sigcontext_vmx_regs(tm_sc); #endif struct pt_regs *regs = tsk->thread.regs; - unsigned long msr = tsk->thread.regs->msr; long err = 0; BUG_ON(tsk != current); - BUG_ON(!MSR_TM_ACTIVE(regs->msr)); + BUG_ON(!MSR_TM_ACTIVE(msr)); WARN_ON(tm_suspend_disabled); @@ -222,13 +222,6 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, */ msr |= tsk->thread.ckpt_regs.msr & (MSR_FP | MSR_VEC | MSR_VSX); - /* Remove TM bits from thread's MSR. The MSR in the sigcontext - * just indicates to userland that we were doing a transaction, but we - * don't want to return in transactional state. This also ensures - * that flush_fp_to_thread won't set TIF_RESTORE_TM again. - */ - regs->msr &= ~MSR_TS_MASK; - #ifdef CONFIG_ALTIVEC err |= __put_user(v_regs, &sc->v_regs); err |= __put_user(tm_v_regs, &tm_sc->v_regs); @@ -824,6 +817,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, unsigned long newsp = 0; long err = 0; struct pt_regs *regs = tsk->thread.regs; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Save the thread's msr before get_tm_stackpointer() changes it */ + unsigned long msr = regs->msr; +#endif BUG_ON(tsk != current); @@ -841,7 +838,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, err |= __put_user(0, &frame->uc.uc_flags); err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (MSR_TM_ACTIVE(regs->msr)) { + if (MSR_TM_ACTIVE(msr)) { /* The ucontext_t passed to userland points to the second * ucontext_t (for transactional state) with its uc_link ptr. */ @@ -849,7 +846,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext, &frame->uc_transact.uc_mcontext, tsk, ksig->sig, NULL, - (unsigned long)ksig->ka.sa.sa_handler); + (unsigned long)ksig->ka.sa.sa_handler, + msr); } else #endif { -- cgit v1.2.3-59-g8ed1b From 232ca1eecafed8c54491017f0612c33d8c742d74 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 15 Feb 2020 10:14:25 +0000 Subject: powerpc/32s: Fix DSI and ISI exceptions for CONFIG_VMAP_STACK hash_page() needs to read page tables from kernel memory. When entire kernel memory is mapped by BATs, which is normally the case when CONFIG_STRICT_KERNEL_RWX is not set, it works even if the page hosting the page table is not referenced in the MMU hash table. However, if the page where the page table resides is not covered by a BAT, a DSI fault can be encountered from hash_page(), and it loops forever. This can happen when CONFIG_STRICT_KERNEL_RWX is selected and the alignment of the different regions is too small to allow covering the entire memory with BATs. This also happens when CONFIG_DEBUG_PAGEALLOC is selected or when booting with 'nobats' flag. Also, if the page containing the kernel stack is not present in the MMU hash table, registers cannot be saved and a recursive DSI fault is encountered. To allow hash_page() to properly do its job at all time and load the MMU hash table whenever needed, it must run with data MMU disabled. This means it must be called before re-enabling data MMU. To allow this, registers clobbered by hash_page() and create_hpte() have to be saved in the thread struct together with SRR0, SSR1, DAR and DSISR. It is also necessary to ensure that DSI prolog doesn't overwrite regs saved by prolog of the current running exception. That means: - DSI can only use SPRN_SPRG_SCRATCH0 - Exceptions must free SPRN_SPRG_SCRATCH0 before writing to the stack. This also fixes the Oops reported by Erhard when create_hpte() is called by add_hash_page(). Due to prolog size increase, a few more exceptions had to get split in two parts. Fixes: cd08f109e262 ("powerpc/32s: Enable CONFIG_VMAP_STACK") Reported-by: Erhard F. Signed-off-by: Christophe Leroy Tested-by: Erhard F. Tested-by: Larry Finger Signed-off-by: Michael Ellerman Link: https://bugzilla.kernel.org/show_bug.cgi?id=206501 Link: https://lore.kernel.org/r/64a4aa44686e9fd4b01333401367029771d9b231.1581761633.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/processor.h | 4 + arch/powerpc/kernel/asm-offsets.c | 12 +++ arch/powerpc/kernel/head_32.S | 155 ++++++++++++++++++++++++++++++++-- arch/powerpc/kernel/head_32.h | 21 ++++- arch/powerpc/mm/book3s32/hash_low.S | 52 +++++------- arch/powerpc/mm/book3s32/mmu.c | 10 +-- arch/powerpc/mm/kasan/kasan_init_32.c | 3 +- 7 files changed, 212 insertions(+), 45 deletions(-) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 8387698bd5b6..eedcbfb9a6ff 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -168,6 +168,10 @@ struct thread_struct { unsigned long srr1; unsigned long dar; unsigned long dsisr; +#ifdef CONFIG_PPC_BOOK3S_32 + unsigned long r0, r3, r4, r5, r6, r8, r9, r11; + unsigned long lr, ctr; +#endif #endif /* Debug Registers */ struct debug_reg debug; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c25e562f1cd9..fcf24a365fc0 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -132,6 +132,18 @@ int main(void) OFFSET(SRR1, thread_struct, srr1); OFFSET(DAR, thread_struct, dar); OFFSET(DSISR, thread_struct, dsisr); +#ifdef CONFIG_PPC_BOOK3S_32 + OFFSET(THR0, thread_struct, r0); + OFFSET(THR3, thread_struct, r3); + OFFSET(THR4, thread_struct, r4); + OFFSET(THR5, thread_struct, r5); + OFFSET(THR6, thread_struct, r6); + OFFSET(THR8, thread_struct, r8); + OFFSET(THR9, thread_struct, r9); + OFFSET(THR11, thread_struct, r11); + OFFSET(THLR, thread_struct, lr); + OFFSET(THCTR, thread_struct, ctr); +#endif #endif #ifdef CONFIG_SPE OFFSET(THREAD_EVR0, thread_struct, evr[0]); diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 0493fcac6409..97c887950c3c 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -290,17 +290,55 @@ MachineCheck: 7: EXCEPTION_PROLOG_2 addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_CHRP - bne cr1,1f +#ifdef CONFIG_VMAP_STACK + mfspr r4, SPRN_SPRG_THREAD + tovirt(r4, r4) + lwz r4, RTAS_SP(r4) + cmpwi cr1, r4, 0 #endif - EXC_XFER_STD(0x200, machine_check_exception) -#ifdef CONFIG_PPC_CHRP -1: b machine_check_in_rtas + beq cr1, machine_check_tramp + b machine_check_in_rtas +#else + b machine_check_tramp #endif /* Data access exception. */ . = 0x300 DO_KVM 0x300 DataAccess: +#ifdef CONFIG_VMAP_STACK + mtspr SPRN_SPRG_SCRATCH0,r10 + mfspr r10, SPRN_SPRG_THREAD +BEGIN_MMU_FTR_SECTION + stw r11, THR11(r10) + mfspr r10, SPRN_DSISR + mfcr r11 +#ifdef CONFIG_PPC_KUAP + andis. r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h +#else + andis. r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h +#endif + mfspr r10, SPRN_SPRG_THREAD + beq hash_page_dsi +.Lhash_page_dsi_cont: + mtcr r11 + lwz r11, THR11(r10) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) + mtspr SPRN_SPRG_SCRATCH1,r11 + mfspr r11, SPRN_DAR + stw r11, DAR(r10) + mfspr r11, SPRN_DSISR + stw r11, DSISR(r10) + mfspr r11, SPRN_SRR0 + stw r11, SRR0(r10) + mfspr r11, SPRN_SRR1 /* check whether user or kernel */ + stw r11, SRR1(r10) + mfcr r10 + andi. r11, r11, MSR_PR + + EXCEPTION_PROLOG_1 + b handle_page_fault_tramp_1 +#else /* CONFIG_VMAP_STACK */ EXCEPTION_PROLOG handle_dar_dsisr=1 get_and_save_dar_dsisr_on_stack r4, r5, r11 BEGIN_MMU_FTR_SECTION @@ -316,11 +354,32 @@ BEGIN_MMU_FTR_SECTION FTR_SECTION_ELSE b handle_page_fault_tramp_2 ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) +#endif /* CONFIG_VMAP_STACK */ /* Instruction access exception. */ . = 0x400 DO_KVM 0x400 InstructionAccess: +#ifdef CONFIG_VMAP_STACK + mtspr SPRN_SPRG_SCRATCH0,r10 + mtspr SPRN_SPRG_SCRATCH1,r11 + mfspr r10, SPRN_SPRG_THREAD + mfspr r11, SPRN_SRR0 + stw r11, SRR0(r10) + mfspr r11, SPRN_SRR1 /* check whether user or kernel */ + stw r11, SRR1(r10) + mfcr r10 +BEGIN_MMU_FTR_SECTION + andis. r11, r11, SRR1_ISI_NOPT@h /* no pte found? */ + bne hash_page_isi +.Lhash_page_isi_cont: + mfspr r11, SPRN_SRR1 /* check whether user or kernel */ +END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) + andi. r11, r11, MSR_PR + + EXCEPTION_PROLOG_1 + EXCEPTION_PROLOG_2 +#else /* CONFIG_VMAP_STACK */ EXCEPTION_PROLOG andis. r0,r9,SRR1_ISI_NOPT@h /* no pte found? */ beq 1f /* if so, try to put a PTE */ @@ -329,6 +388,7 @@ InstructionAccess: BEGIN_MMU_FTR_SECTION bl hash_page END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) +#endif /* CONFIG_VMAP_STACK */ 1: mr r4,r12 andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ stw r4, _DAR(r11) @@ -344,7 +404,7 @@ Alignment: EXCEPTION_PROLOG handle_dar_dsisr=1 save_dar_dsisr_on_stack r4, r5, r11 addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_STD(0x600, alignment_exception) + b alignment_exception_tramp /* Program check exception */ EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) @@ -645,15 +705,100 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) . = 0x3000 +machine_check_tramp: + EXC_XFER_STD(0x200, machine_check_exception) + +alignment_exception_tramp: + EXC_XFER_STD(0x600, alignment_exception) + handle_page_fault_tramp_1: +#ifdef CONFIG_VMAP_STACK + EXCEPTION_PROLOG_2 handle_dar_dsisr=1 +#endif lwz r4, _DAR(r11) lwz r5, _DSISR(r11) /* fall through */ handle_page_fault_tramp_2: EXC_XFER_LITE(0x300, handle_page_fault) +#ifdef CONFIG_VMAP_STACK +.macro save_regs_thread thread + stw r0, THR0(\thread) + stw r3, THR3(\thread) + stw r4, THR4(\thread) + stw r5, THR5(\thread) + stw r6, THR6(\thread) + stw r8, THR8(\thread) + stw r9, THR9(\thread) + mflr r0 + stw r0, THLR(\thread) + mfctr r0 + stw r0, THCTR(\thread) +.endm + +.macro restore_regs_thread thread + lwz r0, THLR(\thread) + mtlr r0 + lwz r0, THCTR(\thread) + mtctr r0 + lwz r0, THR0(\thread) + lwz r3, THR3(\thread) + lwz r4, THR4(\thread) + lwz r5, THR5(\thread) + lwz r6, THR6(\thread) + lwz r8, THR8(\thread) + lwz r9, THR9(\thread) +.endm + +hash_page_dsi: + save_regs_thread r10 + mfdsisr r3 + mfdar r4 + mfsrr0 r5 + mfsrr1 r9 + rlwinm r3, r3, 32 - 15, _PAGE_RW /* DSISR_STORE -> _PAGE_RW */ + bl hash_page + mfspr r10, SPRN_SPRG_THREAD + restore_regs_thread r10 + b .Lhash_page_dsi_cont + +hash_page_isi: + mr r11, r10 + mfspr r10, SPRN_SPRG_THREAD + save_regs_thread r10 + li r3, 0 + lwz r4, SRR0(r10) + lwz r9, SRR1(r10) + bl hash_page + mfspr r10, SPRN_SPRG_THREAD + restore_regs_thread r10 + mr r10, r11 + b .Lhash_page_isi_cont + + .globl fast_hash_page_return +fast_hash_page_return: + andis. r10, r9, SRR1_ISI_NOPT@h /* Set on ISI, cleared on DSI */ + mfspr r10, SPRN_SPRG_THREAD + restore_regs_thread r10 + bne 1f + + /* DSI */ + mtcr r11 + lwz r11, THR11(r10) + mfspr r10, SPRN_SPRG_SCRATCH0 + SYNC + RFI + +1: /* ISI */ + mtcr r11 + mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r10, SPRN_SPRG_SCRATCH0 + SYNC + RFI + stack_overflow: vmap_stack_overflow_exception +#endif AltiVecUnavailable: EXCEPTION_PROLOG diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index a6a5fbbf8504..9db162f79fe6 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -64,11 +64,25 @@ .endm .macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0 +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S) +BEGIN_MMU_FTR_SECTION + mtcr r10 +FTR_SECTION_ELSE + stw r10, _CCR(r11) +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) +#else stw r10,_CCR(r11) /* save registers */ +#endif + mfspr r10, SPRN_SPRG_SCRATCH0 stw r12,GPR12(r11) stw r9,GPR9(r11) - mfspr r10,SPRN_SPRG_SCRATCH0 stw r10,GPR10(r11) +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S) +BEGIN_MMU_FTR_SECTION + mfcr r10 + stw r10, _CCR(r11) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) +#endif mfspr r12,SPRN_SPRG_SCRATCH1 stw r12,GPR11(r11) mflr r10 @@ -83,6 +97,11 @@ stw r10, _DSISR(r11) .endif lwz r9, SRR1(r12) +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S) +BEGIN_MMU_FTR_SECTION + andi. r10, r9, MSR_PR +END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) +#endif lwz r12, SRR0(r12) #else mfspr r12,SPRN_SRR0 diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index c11b0a005196..2015c4f96238 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -25,12 +25,6 @@ #include #include -#ifdef CONFIG_VMAP_STACK -#define ADDR_OFFSET 0 -#else -#define ADDR_OFFSET PAGE_OFFSET -#endif - #ifdef CONFIG_SMP .section .bss .align 2 @@ -53,8 +47,8 @@ mmu_hash_lock: .text _GLOBAL(hash_page) #ifdef CONFIG_SMP - lis r8, (mmu_hash_lock - ADDR_OFFSET)@h - ori r8, r8, (mmu_hash_lock - ADDR_OFFSET)@l + lis r8, (mmu_hash_lock - PAGE_OFFSET)@h + ori r8, r8, (mmu_hash_lock - PAGE_OFFSET)@l lis r0,0x0fff b 10f 11: lwz r6,0(r8) @@ -72,12 +66,9 @@ _GLOBAL(hash_page) cmplw 0,r4,r0 ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */ mfspr r5, SPRN_SPRG_PGDIR /* phys page-table root */ -#ifdef CONFIG_VMAP_STACK - tovirt(r5, r5) -#endif blt+ 112f /* assume user more likely */ - lis r5, (swapper_pg_dir - ADDR_OFFSET)@ha /* if kernel address, use */ - addi r5 ,r5 ,(swapper_pg_dir - ADDR_OFFSET)@l /* kernel page table */ + lis r5, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ + addi r5 ,r5 ,(swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */ 112: #ifndef CONFIG_PTE_64BIT @@ -89,9 +80,6 @@ _GLOBAL(hash_page) lwzx r8,r8,r5 /* Get L1 entry */ rlwinm. r8,r8,0,0,20 /* extract pt base address */ #endif -#ifdef CONFIG_VMAP_STACK - tovirt(r8, r8) -#endif #ifdef CONFIG_SMP beq- hash_page_out /* return if no mapping */ #else @@ -143,30 +131,36 @@ retry: bne- retry /* retry if someone got there first */ mfsrin r3,r4 /* get segment reg for segment */ +#ifndef CONFIG_VMAP_STACK mfctr r0 stw r0,_CTR(r11) +#endif bl create_hpte /* add the hash table entry */ #ifdef CONFIG_SMP eieio - lis r8, (mmu_hash_lock - ADDR_OFFSET)@ha + lis r8, (mmu_hash_lock - PAGE_OFFSET)@ha li r0,0 - stw r0, (mmu_hash_lock - ADDR_OFFSET)@l(r8) + stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8) #endif +#ifdef CONFIG_VMAP_STACK + b fast_hash_page_return +#else /* Return from the exception */ lwz r5,_CTR(r11) mtctr r5 lwz r0,GPR0(r11) lwz r8,GPR8(r11) b fast_exception_return +#endif #ifdef CONFIG_SMP hash_page_out: eieio - lis r8, (mmu_hash_lock - ADDR_OFFSET)@ha + lis r8, (mmu_hash_lock - PAGE_OFFSET)@ha li r0,0 - stw r0, (mmu_hash_lock - ADDR_OFFSET)@l(r8) + stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8) blr #endif /* CONFIG_SMP */ @@ -341,7 +335,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) patch_site 1f, patch__hash_page_A1 patch_site 2f, patch__hash_page_A2 /* Get the address of the primary PTE group in the hash table (r3) */ -0: lis r0, (Hash_base - ADDR_OFFSET)@h /* base address of hash table */ +0: lis r0, (Hash_base - PAGE_OFFSET)@h /* base address of hash table */ 1: rlwimi r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */ 2: rlwinm r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */ xor r3,r3,r0 /* make primary hash */ @@ -355,10 +349,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) beq+ 10f /* no PTE: go look for an empty slot */ tlbie r4 - lis r4, (htab_hash_searches - ADDR_OFFSET)@ha - lwz r6, (htab_hash_searches - ADDR_OFFSET)@l(r4) + lis r4, (htab_hash_searches - PAGE_OFFSET)@ha + lwz r6, (htab_hash_searches - PAGE_OFFSET)@l(r4) addi r6,r6,1 /* count how many searches we do */ - stw r6, (htab_hash_searches - ADDR_OFFSET)@l(r4) + stw r6, (htab_hash_searches - PAGE_OFFSET)@l(r4) /* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */ mtctr r0 @@ -390,10 +384,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) beq+ found_empty /* update counter of times that the primary PTEG is full */ - lis r4, (primary_pteg_full - ADDR_OFFSET)@ha - lwz r6, (primary_pteg_full - ADDR_OFFSET)@l(r4) + lis r4, (primary_pteg_full - PAGE_OFFSET)@ha + lwz r6, (primary_pteg_full - PAGE_OFFSET)@l(r4) addi r6,r6,1 - stw r6, (primary_pteg_full - ADDR_OFFSET)@l(r4) + stw r6, (primary_pteg_full - PAGE_OFFSET)@l(r4) patch_site 0f, patch__hash_page_C /* Search the secondary PTEG for an empty slot */ @@ -427,8 +421,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) * lockup here but that shouldn't happen */ -1: lis r4, (next_slot - ADDR_OFFSET)@ha /* get next evict slot */ - lwz r6, (next_slot - ADDR_OFFSET)@l(r4) +1: lis r4, (next_slot - PAGE_OFFSET)@ha /* get next evict slot */ + lwz r6, (next_slot - PAGE_OFFSET)@l(r4) addi r6,r6,HPTE_SIZE /* search for candidate */ andi. r6,r6,7*HPTE_SIZE stw r6,next_slot@l(r4) diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 0a1c65a2c565..f888cbb109b9 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -413,7 +413,7 @@ void __init MMU_init_hw(void) void __init MMU_init_hw_patch(void) { unsigned int hmask = Hash_mask >> (16 - LG_HPTEG_SIZE); - unsigned int hash; + unsigned int hash = (unsigned int)Hash - PAGE_OFFSET; if (ppc_md.progress) ppc_md.progress("hash:patch", 0x345); @@ -425,11 +425,6 @@ void __init MMU_init_hw_patch(void) /* * Patch up the instructions in hashtable.S:create_hpte */ - if (IS_ENABLED(CONFIG_VMAP_STACK)) - hash = (unsigned int)Hash; - else - hash = (unsigned int)Hash - PAGE_OFFSET; - modify_instruction_site(&patch__hash_page_A0, 0xffff, hash >> 16); modify_instruction_site(&patch__hash_page_A1, 0x7c0, hash_mb << 6); modify_instruction_site(&patch__hash_page_A2, 0x7c0, hash_mb2 << 6); @@ -439,8 +434,7 @@ void __init MMU_init_hw_patch(void) /* * Patch up the instructions in hashtable.S:flush_hash_page */ - modify_instruction_site(&patch__flush_hash_A0, 0xffff, - ((unsigned int)Hash - PAGE_OFFSET) >> 16); + modify_instruction_site(&patch__flush_hash_A0, 0xffff, hash >> 16); modify_instruction_site(&patch__flush_hash_A1, 0x7c0, hash_mb << 6); modify_instruction_site(&patch__flush_hash_A2, 0x7c0, hash_mb2 << 6); modify_instruction_site(&patch__flush_hash_B, 0xffff, hmask); diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index 16dd95bd0749..db5664dde5ff 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -185,8 +185,7 @@ u8 __initdata early_hash[256 << 10] __aligned(256 << 10) = {0}; static void __init kasan_early_hash_table(void) { - unsigned int hash = IS_ENABLED(CONFIG_VMAP_STACK) ? (unsigned int)early_hash : - __pa(early_hash); + unsigned int hash = __pa(early_hash); modify_instruction_site(&patch__hash_page_A0, 0xffff, hash >> 16); modify_instruction_site(&patch__flush_hash_A0, 0xffff, hash >> 16); -- cgit v1.2.3-59-g8ed1b From 5a528eb67908bcf493f3583cb4726fbd8e129605 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 14 Feb 2020 08:39:50 +0000 Subject: powerpc/chrp: Fix enter_rtas() with CONFIG_VMAP_STACK With CONFIG_VMAP_STACK, data MMU has to be enabled to read data on the stack. Fixes: cd08f109e262 ("powerpc/32s: Enable CONFIG_VMAP_STACK") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d2330584f8c42d3039896e2b56f5d39676dc919c.1581669558.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/entry_32.S | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 0713daa651d9..bc056d906b51 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -1354,12 +1354,17 @@ _GLOBAL(enter_rtas) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 RFI -1: tophys(r9,r1) +1: tophys_novmstack r9, r1 +#ifdef CONFIG_VMAP_STACK + li r0, MSR_KERNEL & ~MSR_IR /* can take DTLB miss */ + mtmsr r0 + isync +#endif lwz r8,INT_FRAME_SIZE+4(r9) /* get return address */ lwz r9,8(r9) /* original msr value */ addi r1,r1,INT_FRAME_SIZE li r0,0 - tophys(r7, r2) + tophys_novmstack r7, r2 stw r0, THREAD + RTAS_SP(r7) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 -- cgit v1.2.3-59-g8ed1b From 477f3488a94e35380c82a7498d46f10fa5f3edd2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 14 Feb 2020 06:53:00 +0000 Subject: powerpc/6xx: Fix power_save_ppc32_restore() with CONFIG_VMAP_STACK power_save_ppc32_restore() is called during exception entry, before re-enabling the MMU. It substracts KERNELBASE from the address of nap_save_msscr0 to access it. With CONFIG_VMAP_STACK enabled, data MMU translation has already been re-enabled, so power_save_ppc32_restore() has to access nap_save_msscr0 by its virtual address. Reported-by: Larry Finger Signed-off-by: Christophe Leroy Fixes: cd08f109e262 ("powerpc/32s: Enable CONFIG_VMAP_STACK") Tested-by: Larry Finger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/7bce32ccbab3ba3e3e0f27da6961bf6313df97ed.1581663140.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/idle_6xx.S | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 0ffdd18b9f26..433d97bea1f3 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -166,7 +166,11 @@ BEGIN_FTR_SECTION mfspr r9,SPRN_HID0 andis. r9,r9,HID0_NAP@h beq 1f +#ifdef CONFIG_VMAP_STACK + addis r9, r11, nap_save_msscr0@ha +#else addis r9,r11,(nap_save_msscr0-KERNELBASE)@ha +#endif lwz r9,nap_save_msscr0@l(r9) mtspr SPRN_MSSCR0, r9 sync @@ -174,7 +178,11 @@ BEGIN_FTR_SECTION 1: END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR) BEGIN_FTR_SECTION +#ifdef CONFIG_VMAP_STACK + addis r9, r11, nap_save_hid1@ha +#else addis r9,r11,(nap_save_hid1-KERNELBASE)@ha +#endif lwz r9,nap_save_hid1@l(r9) mtspr SPRN_HID1, r9 END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX) -- cgit v1.2.3-59-g8ed1b From 066bc3576e653b615ee3f5230a89d69c8ebeeb71 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Mon, 17 Feb 2020 15:13:43 +1100 Subject: powerpc/xmon: Fix whitespace handling in getstring() The ls (lookup symbol) and zr (reboot) commands use xmon's getstring() helper to read a string argument from the xmon prompt. This function skips over leading whitespace, but doesn't check if the first "non-whitespace" character is a newline which causes some odd behaviour ( indicates a the enter key was pressed): 0:mon> ls printk printk: c0000000001680c4 0:mon> ls printk Symbol ' printk' not found. 0:mon> With commit 2d9b332d99b ("powerpc/xmon: Allow passing an argument to ppc_md.restart()") we have a similar problem with the zr command. Previously zr took no arguments so "zr would trigger a reboot. With that patch applied a second newline needs to be sent in order for the reboot to occur. Fix this by checking if the leading whitespace ended on a newline: 0:mon> ls Symbol '' not found. Fixes: 2d9b332d99b2 ("powerpc/xmon: Allow passing an argument to ppc_md.restart()") Reported-by: Michael Ellerman Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200217041343.2454-1-oohall@gmail.com --- arch/powerpc/xmon/xmon.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index e8c84d265602..0ec9640335bb 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -3435,6 +3435,11 @@ getstring(char *s, int size) int c; c = skipbl(); + if (c == '\n') { + *s = 0; + return; + } + do { if( size > 1 ){ *s++ = c; -- cgit v1.2.3-59-g8ed1b From 3be54d558c75562e42bc83d665df024bd79d399b Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 17 Feb 2020 12:39:47 +0100 Subject: efi: Only print errors about failing to get certs if EFI vars are found If CONFIG_LOAD_UEFI_KEYS is enabled, the kernel attempts to load the certs from the db, dbx and MokListRT EFI variables into the appropriate keyrings. But it just assumes that the variables will be present and prints an error if the certs can't be loaded, even when is possible that the variables may not exist. For example the MokListRT variable will only be present if shim is used. So only print an error message about failing to get the certs list from an EFI variable if this is found. Otherwise these printed errors just pollute the kernel log ring buffer with confusing messages like the following: [ 5.427251] Couldn't get size: 0x800000000000000e [ 5.427261] MODSIGN: Couldn't get UEFI db list [ 5.428012] Couldn't get size: 0x800000000000000e [ 5.428023] Couldn't get UEFI MokListRT Reported-by: Hans de Goede Signed-off-by: Javier Martinez Canillas Tested-by: Hans de Goede Acked-by: Ard Biesheuvel Signed-off-by: Mimi Zohar --- security/integrity/platform_certs/load_uefi.c | 40 +++++++++++++++++---------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c index 111898aad56e..f0c908241966 100644 --- a/security/integrity/platform_certs/load_uefi.c +++ b/security/integrity/platform_certs/load_uefi.c @@ -35,16 +35,18 @@ static __init bool uefi_check_ignore_db(void) * Get a certificate list blob from the named EFI variable. */ static __init void *get_cert_list(efi_char16_t *name, efi_guid_t *guid, - unsigned long *size) + unsigned long *size, efi_status_t *status) { - efi_status_t status; unsigned long lsize = 4; unsigned long tmpdb[4]; void *db; - status = efi.get_variable(name, guid, NULL, &lsize, &tmpdb); - if (status != EFI_BUFFER_TOO_SMALL) { - pr_err("Couldn't get size: 0x%lx\n", status); + *status = efi.get_variable(name, guid, NULL, &lsize, &tmpdb); + if (*status == EFI_NOT_FOUND) + return NULL; + + if (*status != EFI_BUFFER_TOO_SMALL) { + pr_err("Couldn't get size: 0x%lx\n", *status); return NULL; } @@ -52,10 +54,10 @@ static __init void *get_cert_list(efi_char16_t *name, efi_guid_t *guid, if (!db) return NULL; - status = efi.get_variable(name, guid, NULL, &lsize, db); - if (status != EFI_SUCCESS) { + *status = efi.get_variable(name, guid, NULL, &lsize, db); + if (*status != EFI_SUCCESS) { kfree(db); - pr_err("Error reading db var: 0x%lx\n", status); + pr_err("Error reading db var: 0x%lx\n", *status); return NULL; } @@ -74,6 +76,7 @@ static int __init load_uefi_certs(void) efi_guid_t mok_var = EFI_SHIM_LOCK_GUID; void *db = NULL, *dbx = NULL, *mok = NULL; unsigned long dbsize = 0, dbxsize = 0, moksize = 0; + efi_status_t status; int rc = 0; if (!efi.get_variable) @@ -83,9 +86,12 @@ static int __init load_uefi_certs(void) * an error if we can't get them. */ if (!uefi_check_ignore_db()) { - db = get_cert_list(L"db", &secure_var, &dbsize); + db = get_cert_list(L"db", &secure_var, &dbsize, &status); if (!db) { - pr_err("MODSIGN: Couldn't get UEFI db list\n"); + if (status == EFI_NOT_FOUND) + pr_debug("MODSIGN: db variable wasn't found\n"); + else + pr_err("MODSIGN: Couldn't get UEFI db list\n"); } else { rc = parse_efi_signature_list("UEFI:db", db, dbsize, get_handler_for_db); @@ -96,9 +102,12 @@ static int __init load_uefi_certs(void) } } - mok = get_cert_list(L"MokListRT", &mok_var, &moksize); + mok = get_cert_list(L"MokListRT", &mok_var, &moksize, &status); if (!mok) { - pr_info("Couldn't get UEFI MokListRT\n"); + if (status == EFI_NOT_FOUND) + pr_debug("MokListRT variable wasn't found\n"); + else + pr_info("Couldn't get UEFI MokListRT\n"); } else { rc = parse_efi_signature_list("UEFI:MokListRT", mok, moksize, get_handler_for_db); @@ -107,9 +116,12 @@ static int __init load_uefi_certs(void) kfree(mok); } - dbx = get_cert_list(L"dbx", &secure_var, &dbxsize); + dbx = get_cert_list(L"dbx", &secure_var, &dbxsize, &status); if (!dbx) { - pr_info("Couldn't get UEFI dbx list\n"); + if (status == EFI_NOT_FOUND) + pr_debug("dbx variable wasn't found\n"); + else + pr_info("Couldn't get UEFI dbx list\n"); } else { rc = parse_efi_signature_list("UEFI:dbx", dbx, dbxsize, -- cgit v1.2.3-59-g8ed1b From 6a30e1b1dcad0ba94fae757f797812d7d8dcb72c Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Mon, 10 Feb 2020 20:44:39 +0800 Subject: crypto: rename sm3-256 to sm3 in hash_algo_name The name sm3-256 is defined in hash_algo_name in hash_info, but the algorithm name implemented in sm3_generic.c is sm3, which will cause the sm3-256 algorithm to be not found in some application scenarios of the hash algorithm, and an ENOENT error will occur. For example, IMA, keys, and other subsystems that reference hash_algo_name all use the hash algorithm of sm3. Fixes: 5ca4c20cfd37 ("keys, trusted: select hash algorithm for TPM2 chips") Signed-off-by: Tianjia Zhang Reviewed-by: Pascal van Leeuwen Signed-off-by: Mimi Zohar --- crypto/hash_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/hash_info.c b/crypto/hash_info.c index c754cb75dd1a..a49ff96bde77 100644 --- a/crypto/hash_info.c +++ b/crypto/hash_info.c @@ -26,7 +26,7 @@ const char *const hash_algo_name[HASH_ALGO__LAST] = { [HASH_ALGO_TGR_128] = "tgr128", [HASH_ALGO_TGR_160] = "tgr160", [HASH_ALGO_TGR_192] = "tgr192", - [HASH_ALGO_SM3_256] = "sm3-256", + [HASH_ALGO_SM3_256] = "sm3", [HASH_ALGO_STREEBOG_256] = "streebog256", [HASH_ALGO_STREEBOG_512] = "streebog512", }; -- cgit v1.2.3-59-g8ed1b From 5780b9abd530982c2bb1018e2c52c05ab3c30b45 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Mon, 10 Feb 2020 20:44:40 +0800 Subject: ima: add sm3 algorithm to hash algorithm configuration list sm3 has been supported by the ima hash algorithm, but it is not yet in the Kconfig configuration list. After adding, both ima and tpm2 can support sm3 well. Signed-off-by: Tianjia Zhang Signed-off-by: Mimi Zohar --- security/integrity/ima/Kconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig index 711ff10fa36e..3f3ee4e2eb0d 100644 --- a/security/integrity/ima/Kconfig +++ b/security/integrity/ima/Kconfig @@ -112,6 +112,10 @@ choice config IMA_DEFAULT_HASH_WP512 bool "WP512" depends on CRYPTO_WP512=y && !IMA_TEMPLATE + + config IMA_DEFAULT_HASH_SM3 + bool "SM3" + depends on CRYPTO_SM3=y && !IMA_TEMPLATE endchoice config IMA_DEFAULT_HASH @@ -121,6 +125,7 @@ config IMA_DEFAULT_HASH default "sha256" if IMA_DEFAULT_HASH_SHA256 default "sha512" if IMA_DEFAULT_HASH_SHA512 default "wp512" if IMA_DEFAULT_HASH_WP512 + default "sm3" if IMA_DEFAULT_HASH_SM3 config IMA_WRITE_POLICY bool "Enable multiple writes to the IMA policy" -- cgit v1.2.3-59-g8ed1b From 789a2c250340666220fa74bc6c8f58497e3863b3 Mon Sep 17 00:00:00 2001 From: Hanno Zulla Date: Tue, 18 Feb 2020 12:37:47 +0100 Subject: HID: hid-bigbenff: fix general protection fault caused by double kfree The struct *bigben was allocated via devm_kzalloc() and then used as a parameter in input_ff_create_memless(). This caused a double kfree during removal of the device, since both the managed resource API and ml_ff_destroy() in drivers/input/ff-memless.c would call kfree() on it. Signed-off-by: Hanno Zulla Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-bigbenff.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-bigbenff.c b/drivers/hid/hid-bigbenff.c index 3f6abd190df4..f7e85bacb688 100644 --- a/drivers/hid/hid-bigbenff.c +++ b/drivers/hid/hid-bigbenff.c @@ -220,10 +220,16 @@ static void bigben_worker(struct work_struct *work) static int hid_bigben_play_effect(struct input_dev *dev, void *data, struct ff_effect *effect) { - struct bigben_device *bigben = data; + struct hid_device *hid = input_get_drvdata(dev); + struct bigben_device *bigben = hid_get_drvdata(hid); u8 right_motor_on; u8 left_motor_force; + if (!bigben) { + hid_err(hid, "no device data\n"); + return 0; + } + if (effect->type != FF_RUMBLE) return 0; @@ -341,7 +347,7 @@ static int bigben_probe(struct hid_device *hid, INIT_WORK(&bigben->worker, bigben_worker); - error = input_ff_create_memless(hidinput->input, bigben, + error = input_ff_create_memless(hidinput->input, NULL, hid_bigben_play_effect); if (error) return error; -- cgit v1.2.3-59-g8ed1b From 976a54d0f4202cb412a3b1fc7f117e1d97db35f3 Mon Sep 17 00:00:00 2001 From: Hanno Zulla Date: Tue, 18 Feb 2020 12:38:34 +0100 Subject: HID: hid-bigbenff: call hid_hw_stop() in case of error It's required to call hid_hw_stop() once hid_hw_start() was called previously, so error cases need to handle this. Also, hid_hw_close() is not necessary during removal. Signed-off-by: Hanno Zulla Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-bigbenff.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/hid/hid-bigbenff.c b/drivers/hid/hid-bigbenff.c index f7e85bacb688..f8c552b64a89 100644 --- a/drivers/hid/hid-bigbenff.c +++ b/drivers/hid/hid-bigbenff.c @@ -305,7 +305,6 @@ static void bigben_remove(struct hid_device *hid) struct bigben_device *bigben = hid_get_drvdata(hid); cancel_work_sync(&bigben->worker); - hid_hw_close(hid); hid_hw_stop(hid); } @@ -350,7 +349,7 @@ static int bigben_probe(struct hid_device *hid, error = input_ff_create_memless(hidinput->input, NULL, hid_bigben_play_effect); if (error) - return error; + goto error_hw_stop; name_sz = strlen(dev_name(&hid->dev)) + strlen(":red:bigben#") + 1; @@ -360,8 +359,10 @@ static int bigben_probe(struct hid_device *hid, sizeof(struct led_classdev) + name_sz, GFP_KERNEL ); - if (!led) - return -ENOMEM; + if (!led) { + error = -ENOMEM; + goto error_hw_stop; + } name = (void *)(&led[1]); snprintf(name, name_sz, "%s:red:bigben%d", @@ -375,7 +376,7 @@ static int bigben_probe(struct hid_device *hid, bigben->leds[n] = led; error = devm_led_classdev_register(&hid->dev, led); if (error) - return error; + goto error_hw_stop; } /* initial state: LED1 is on, no rumble effect */ @@ -389,6 +390,10 @@ static int bigben_probe(struct hid_device *hid, hid_info(hid, "LED and force feedback support for BigBen gamepad\n"); return 0; + +error_hw_stop: + hid_hw_stop(hid); + return error; } static __u8 *bigben_report_fixup(struct hid_device *hid, __u8 *rdesc, -- cgit v1.2.3-59-g8ed1b From 4eb1b01de5b9d8596d6c103efcf1a15cfc1bedf7 Mon Sep 17 00:00:00 2001 From: Hanno Zulla Date: Tue, 18 Feb 2020 12:39:31 +0100 Subject: HID: hid-bigbenff: fix race condition for scheduled work during removal It's possible that there is scheduled work left while the device is already being removed, which can cause a kernel crash. Adding a flag will avoid this. Signed-off-by: Hanno Zulla Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-bigbenff.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/hid/hid-bigbenff.c b/drivers/hid/hid-bigbenff.c index f8c552b64a89..db6da21ade06 100644 --- a/drivers/hid/hid-bigbenff.c +++ b/drivers/hid/hid-bigbenff.c @@ -174,6 +174,7 @@ static __u8 pid0902_rdesc_fixed[] = { struct bigben_device { struct hid_device *hid; struct hid_report *report; + bool removed; u8 led_state; /* LED1 = 1 .. LED4 = 8 */ u8 right_motor_on; /* right motor off/on 0/1 */ u8 left_motor_force; /* left motor force 0-255 */ @@ -190,6 +191,9 @@ static void bigben_worker(struct work_struct *work) struct bigben_device, worker); struct hid_field *report_field = bigben->report->field[0]; + if (bigben->removed) + return; + if (bigben->work_led) { bigben->work_led = false; report_field->value[0] = 0x01; /* 1 = led message */ @@ -304,6 +308,7 @@ static void bigben_remove(struct hid_device *hid) { struct bigben_device *bigben = hid_get_drvdata(hid); + bigben->removed = true; cancel_work_sync(&bigben->worker); hid_hw_stop(hid); } @@ -324,6 +329,7 @@ static int bigben_probe(struct hid_device *hid, return -ENOMEM; hid_set_drvdata(hid, bigben); bigben->hid = hid; + bigben->removed = false; error = hid_parse(hid); if (error) { -- cgit v1.2.3-59-g8ed1b From f25975f42f2f8f2a01303054d6a70c7ceb1fcf54 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Tue, 18 Feb 2020 14:03:34 +0100 Subject: bpf, uapi: Remove text about bpf_redirect_map() giving higher performance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The performance of bpf_redirect() is now roughly the same as that of bpf_redirect_map(). However, David Ahern pointed out that the header file has not been updated to reflect this, and still says that a significant performance increase is possible when using bpf_redirect_map(). Remove this text from the bpf_redirect_map() description, and reword the description in bpf_redirect() slightly. Also fix the 'Return' section of the bpf_redirect_map() documentation. Fixes: 1d233886dd90 ("xdp: Use bulking for non-map XDP_REDIRECT and consolidate code paths") Reported-by: David Ahern Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200218130334.29889-1-toke@redhat.com --- include/uapi/linux/bpf.h | 16 +++++++--------- tools/include/uapi/linux/bpf.h | 16 +++++++--------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f1d74a2bd234..22f235260a3a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1045,9 +1045,9 @@ union bpf_attr { * supports redirection to the egress interface, and accepts no * flag at all. * - * The same effect can be attained with the more generic - * **bpf_redirect_map**\ (), which requires specific maps to be - * used but offers better performance. + * The same effect can also be attained with the more generic + * **bpf_redirect_map**\ (), which uses a BPF map to store the + * redirect target instead of providing it directly to the helper. * Return * For XDP, the helper returns **XDP_REDIRECT** on success or * **XDP_ABORTED** on error. For other program types, the values @@ -1611,13 +1611,11 @@ union bpf_attr { * the caller. Any higher bits in the *flags* argument must be * unset. * - * When used to redirect packets to net devices, this helper - * provides a high performance increase over **bpf_redirect**\ (). - * This is due to various implementation details of the underlying - * mechanisms, one of which is the fact that **bpf_redirect_map**\ - * () tries to send packet as a "bulk" to the device. + * See also bpf_redirect(), which only supports redirecting to an + * ifindex, but doesn't require a map to do so. * Return - * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error. + * **XDP_REDIRECT** on success, or the value of the two lower bits + * of the **flags* argument on error. * * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) * Description diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index f1d74a2bd234..22f235260a3a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1045,9 +1045,9 @@ union bpf_attr { * supports redirection to the egress interface, and accepts no * flag at all. * - * The same effect can be attained with the more generic - * **bpf_redirect_map**\ (), which requires specific maps to be - * used but offers better performance. + * The same effect can also be attained with the more generic + * **bpf_redirect_map**\ (), which uses a BPF map to store the + * redirect target instead of providing it directly to the helper. * Return * For XDP, the helper returns **XDP_REDIRECT** on success or * **XDP_ABORTED** on error. For other program types, the values @@ -1611,13 +1611,11 @@ union bpf_attr { * the caller. Any higher bits in the *flags* argument must be * unset. * - * When used to redirect packets to net devices, this helper - * provides a high performance increase over **bpf_redirect**\ (). - * This is due to various implementation details of the underlying - * mechanisms, one of which is the fact that **bpf_redirect_map**\ - * () tries to send packet as a "bulk" to the device. + * See also bpf_redirect(), which only supports redirecting to an + * ifindex, but doesn't require a map to do so. * Return - * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error. + * **XDP_REDIRECT** on success, or the value of the two lower bits + * of the **flags* argument on error. * * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) * Description -- cgit v1.2.3-59-g8ed1b From 113e6b7e15e23dc45d5c66eb66bb91a627812e36 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Mon, 17 Feb 2020 18:17:01 +0100 Subject: libbpf: Sanitise internal map names so they are not rejected by the kernel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kernel only accepts map names with alphanumeric characters, underscores and periods in their name. However, the auto-generated internal map names used by libbpf takes their prefix from the user-supplied BPF object name, which has no such restriction. This can lead to "Invalid argument" errors when trying to load a BPF program using global variables. Fix this by sanitising the map names, replacing any non-allowed characters with underscores. Fixes: d859900c4c56 ("bpf, libbpf: support global data/bss/rodata sections") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200217171701.215215-1-toke@redhat.com --- tools/lib/bpf/libbpf.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 514b1a524abb..7469c7dcc15e 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -1283,7 +1284,7 @@ static size_t bpf_map_mmap_sz(const struct bpf_map *map) static char *internal_map_name(struct bpf_object *obj, enum libbpf_map_type type) { - char map_name[BPF_OBJ_NAME_LEN]; + char map_name[BPF_OBJ_NAME_LEN], *p; const char *sfx = libbpf_type_to_btf_name[type]; int sfx_len = max((size_t)7, strlen(sfx)); int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, @@ -1292,6 +1293,11 @@ static char *internal_map_name(struct bpf_object *obj, snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name, sfx_len, libbpf_type_to_btf_name[type]); + /* sanitise map name to characters allowed by kernel */ + for (p = map_name; *p && p < map_name + sizeof(map_name); p++) + if (!isalnum(*p) && *p != '_' && *p != '.') + *p = '_'; + return strdup(map_name); } -- cgit v1.2.3-59-g8ed1b From 1d4615978f525b769990a4a4ef22fb1b9a04cdf1 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 17 Feb 2020 17:12:37 +0100 Subject: iommu/vt-d: Add attach_deferred() helper Implement a helper function to check whether a device's attach process is deferred. Fixes: 1ee0186b9a12 ("iommu/vt-d: Refactor find_domain() helper") Cc: stable@vger.kernel.org # v5.5 Reviewed-by: Jerry Snitselaar Acked-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 9dc37672bf89..80f2332a5466 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -762,6 +762,11 @@ static int iommu_dummy(struct device *dev) return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; } +static bool attach_deferred(struct device *dev) +{ + return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO; +} + /** * is_downstream_to_pci_bridge - test if a device belongs to the PCI * sub-hierarchy of a candidate PCI-PCI bridge @@ -2510,8 +2515,7 @@ struct dmar_domain *find_domain(struct device *dev) { struct device_domain_info *info; - if (unlikely(dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO || - dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)) + if (unlikely(attach_deferred(dev) || iommu_dummy(dev))) return NULL; if (dev_is_pci(dev)) @@ -2527,7 +2531,7 @@ struct dmar_domain *find_domain(struct device *dev) static struct dmar_domain *deferred_attach_domain(struct device *dev) { - if (unlikely(dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO)) { + if (unlikely(attach_deferred(dev))) { struct iommu_domain *domain; dev->archdata.iommu = NULL; @@ -6133,7 +6137,7 @@ intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain, struct device *dev) { - return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO; + return attach_deferred(dev); } static int -- cgit v1.2.3-59-g8ed1b From 034d98cc0cdcde2415c6f598fa9125e3eaa02569 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 17 Feb 2020 17:16:19 +0100 Subject: iommu/vt-d: Move deferred device attachment into helper function Move the code that does the deferred device attachment into a separate helper function. Fixes: 1ee0186b9a12 ("iommu/vt-d: Refactor find_domain() helper") Cc: stable@vger.kernel.org # v5.5 Reviewed-by: Jerry Snitselaar Acked-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 80f2332a5466..42cdcce1602e 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2529,16 +2529,20 @@ struct dmar_domain *find_domain(struct device *dev) return NULL; } -static struct dmar_domain *deferred_attach_domain(struct device *dev) +static void do_deferred_attach(struct device *dev) { - if (unlikely(attach_deferred(dev))) { - struct iommu_domain *domain; + struct iommu_domain *domain; - dev->archdata.iommu = NULL; - domain = iommu_get_domain_for_dev(dev); - if (domain) - intel_iommu_attach_device(domain, dev); - } + dev->archdata.iommu = NULL; + domain = iommu_get_domain_for_dev(dev); + if (domain) + intel_iommu_attach_device(domain, dev); +} + +static struct dmar_domain *deferred_attach_domain(struct device *dev) +{ + if (unlikely(attach_deferred(dev))) + do_deferred_attach(dev); return find_domain(dev); } -- cgit v1.2.3-59-g8ed1b From a11bfde9c77df1fd350ea27169ab921f511bf5d0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 17 Feb 2020 17:20:59 +0100 Subject: iommu/vt-d: Do deferred attachment in iommu_need_mapping() The attachment of deferred devices needs to happen before the check whether the device is identity mapped or not. Otherwise the check will return wrong results, cause warnings boot failures in kdump kernels, like WARNING: CPU: 0 PID: 318 at ../drivers/iommu/intel-iommu.c:592 domain_get_iommu+0x61/0x70 [...] Call Trace: __intel_map_single+0x55/0x190 intel_alloc_coherent+0xac/0x110 dmam_alloc_attrs+0x50/0xa0 ahci_port_start+0xfb/0x1f0 [libahci] ata_host_start.part.39+0x104/0x1e0 [libata] With the earlier check the kdump boot succeeds and a crashdump is written. Fixes: 1ee0186b9a12 ("iommu/vt-d: Refactor find_domain() helper") Cc: stable@vger.kernel.org # v5.5 Reviewed-by: Jerry Snitselaar Acked-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 42cdcce1602e..723f615c6e84 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2541,9 +2541,6 @@ static void do_deferred_attach(struct device *dev) static struct dmar_domain *deferred_attach_domain(struct device *dev) { - if (unlikely(attach_deferred(dev))) - do_deferred_attach(dev); - return find_domain(dev); } @@ -3595,6 +3592,9 @@ static bool iommu_need_mapping(struct device *dev) if (iommu_dummy(dev)) return false; + if (unlikely(attach_deferred(dev))) + do_deferred_attach(dev); + ret = identity_mapping(dev); if (ret) { u64 dma_mask = *dev->dma_mask; @@ -3958,7 +3958,11 @@ bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size, int prot = 0; int ret; + if (unlikely(attach_deferred(dev))) + do_deferred_attach(dev); + domain = deferred_attach_domain(dev); + if (WARN_ON(dir == DMA_NONE || !domain)) return DMA_MAPPING_ERROR; -- cgit v1.2.3-59-g8ed1b From 96d170f3b1a607612caf3618c534d5c64fc2d61b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 17 Feb 2020 17:27:44 +0100 Subject: iommu/vt-d: Remove deferred_attach_domain() The function is now only a wrapper around find_domain(). Remove the function and call find_domain() directly at the call-sites. Fixes: 1ee0186b9a12 ("iommu/vt-d: Refactor find_domain() helper") Cc: stable@vger.kernel.org # v5.5 Reviewed-by: Jerry Snitselaar Acked-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 723f615c6e84..69f1c6b8dfcf 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2539,11 +2539,6 @@ static void do_deferred_attach(struct device *dev) intel_iommu_attach_device(domain, dev); } -static struct dmar_domain *deferred_attach_domain(struct device *dev) -{ - return find_domain(dev); -} - static inline struct device_domain_info * dmar_search_domain_by_dev_info(int segment, int bus, int devfn) { @@ -3643,7 +3638,7 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, BUG_ON(dir == DMA_NONE); - domain = deferred_attach_domain(dev); + domain = find_domain(dev); if (!domain) return DMA_MAPPING_ERROR; @@ -3863,7 +3858,7 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele if (!iommu_need_mapping(dev)) return dma_direct_map_sg(dev, sglist, nelems, dir, attrs); - domain = deferred_attach_domain(dev); + domain = find_domain(dev); if (!domain) return 0; @@ -3961,7 +3956,7 @@ bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size, if (unlikely(attach_deferred(dev))) do_deferred_attach(dev); - domain = deferred_attach_domain(dev); + domain = find_domain(dev); if (WARN_ON(dir == DMA_NONE || !domain)) return DMA_MAPPING_ERROR; -- cgit v1.2.3-59-g8ed1b From 1ddb32da4a629fa7f87873d0b6836c2e1feb7518 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 17 Feb 2020 17:29:55 +0100 Subject: iommu/vt-d: Simplify check in identity_mapping() The function only has one call-site and there it is never called with dummy or deferred devices. Simplify the check in the function to account for that. Fixes: 1ee0186b9a12 ("iommu/vt-d: Refactor find_domain() helper") Cc: stable@vger.kernel.org # v5.5 Reviewed-by: Jerry Snitselaar Acked-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 69f1c6b8dfcf..6fa6de2b6ad5 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2916,7 +2916,7 @@ static int identity_mapping(struct device *dev) struct device_domain_info *info; info = dev->archdata.iommu; - if (info && info != DUMMY_DEVICE_DOMAIN_INFO && info != DEFER_DEVICE_DOMAIN_INFO) + if (info) return (info->domain == si_domain); return 0; -- cgit v1.2.3-59-g8ed1b From c68a9032299e837b56d356de9250c93094f7e0e3 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Thu, 9 Jan 2020 11:17:40 +0800 Subject: riscv: set pmp configuration if kernel is running in M-mode When the kernel is running in S-mode, the expectation is that the bootloader or SBI layer will configure the PMP to allow the kernel to access physical memory. But, when the kernel is running in M-mode and is started with the ELF "loader", there's probably no bootloader or SBI layer involved to configure the PMP. Thus, we need to configure the PMP ourselves to enable the kernel to access all regions. Signed-off-by: Greentime Hu Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/csr.h | 12 ++++++++++++ arch/riscv/kernel/head.S | 6 ++++++ 2 files changed, 18 insertions(+) diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 435b65532e29..8e18d2c64399 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -72,6 +72,16 @@ #define EXC_LOAD_PAGE_FAULT 13 #define EXC_STORE_PAGE_FAULT 15 +/* PMP configuration */ +#define PMP_R 0x01 +#define PMP_W 0x02 +#define PMP_X 0x04 +#define PMP_A 0x18 +#define PMP_A_TOR 0x08 +#define PMP_A_NA4 0x10 +#define PMP_A_NAPOT 0x18 +#define PMP_L 0x80 + /* symbolic CSR names: */ #define CSR_CYCLE 0xc00 #define CSR_TIME 0xc01 @@ -100,6 +110,8 @@ #define CSR_MCAUSE 0x342 #define CSR_MTVAL 0x343 #define CSR_MIP 0x344 +#define CSR_PMPCFG0 0x3a0 +#define CSR_PMPADDR0 0x3b0 #define CSR_MHARTID 0xf14 #ifdef CONFIG_RISCV_M_MODE diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 271860fc2c3f..85f2073e7fe4 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -58,6 +58,12 @@ _start_kernel: /* Reset all registers except ra, a0, a1 */ call reset_regs + /* Setup a PMP to permit access to all of memory. */ + li a0, -1 + csrw CSR_PMPADDR0, a0 + li a0, (PMP_A_NAPOT | PMP_R | PMP_W | PMP_X) + csrw CSR_PMPCFG0, a0 + /* * The hartid in a0 is expected later on, and we have no firmware * to hand it to us. -- cgit v1.2.3-59-g8ed1b From dd1f6308b28edf0452dd5dc7877992903ec61e69 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Tue, 18 Feb 2020 16:49:06 +0000 Subject: arm64: lse: Fix LSE atomics with LLVM Commit e0d5896bd356 ("arm64: lse: fix LSE atomics with LLVM's integrated assembler") broke the build when clang is used in connjunction with the binutils assembler ("-no-integrated-as"). This happens because __LSE_PREAMBLE is defined as ".arch armv8-a+lse", which overrides the version of the CPU architecture passed via the "-march" paramter to gas: $ aarch64-none-linux-gnu-as -EL -I ./arch/arm64/include -I ./arch/arm64/include/generated -I ./include -I ./include -I ./arch/arm64/include/uapi -I ./arch/arm64/include/generated/uapi -I ./include/uapi -I ./include/generated/uapi -I ./init -I ./init -march=armv8.3-a -o init/do_mounts.o /tmp/do_mounts-d7992a.s /tmp/do_mounts-d7992a.s: Assembler messages: /tmp/do_mounts-d7992a.s:1959: Error: selected processor does not support `autiasp' /tmp/do_mounts-d7992a.s:2021: Error: selected processor does not support `paciasp' /tmp/do_mounts-d7992a.s:2157: Error: selected processor does not support `autiasp' /tmp/do_mounts-d7992a.s:2175: Error: selected processor does not support `paciasp' /tmp/do_mounts-d7992a.s:2494: Error: selected processor does not support `autiasp' Fix the issue by replacing ".arch armv8-a+lse" with ".arch_extension lse". Sami confirms that the clang integrated assembler does now support the '.arch_extension' directive, so this change will be fine even for LTO builds in future. Fixes: e0d5896bd356cd ("arm64: lse: fix LSE atomics with LLVM's integrated assembler") Cc: Catalin Marinas Cc: Will Deacon Reported-by: Amit Kachhap Tested-by: Sami Tolvanen Signed-off-by: Vincenzo Frascino Signed-off-by: Will Deacon --- arch/arm64/include/asm/lse.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index d429f7701c36..5d10051c3e62 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -6,7 +6,7 @@ #ifdef CONFIG_ARM64_LSE_ATOMICS -#define __LSE_PREAMBLE ".arch armv8-a+lse\n" +#define __LSE_PREAMBLE ".arch_extension lse\n" #include #include -- cgit v1.2.3-59-g8ed1b From 297a31e3e8318f533cff4fe33ffaefb74f72c6e2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 17 Feb 2020 17:39:45 +0300 Subject: io_uring: remove unnecessary NULL checks The "kmsg" pointer can't be NULL and we have already dereferenced it so a check here would be useless. Reviewed-by: Stefano Garzarella Signed-off-by: Dan Carpenter Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 29565d82291f..d35b45696c73 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3075,7 +3075,7 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt, if (req->io) return -EAGAIN; if (io_alloc_async_ctx(req)) { - if (kmsg && kmsg->iov != kmsg->fast_iov) + if (kmsg->iov != kmsg->fast_iov) kfree(kmsg->iov); return -ENOMEM; } @@ -3229,7 +3229,7 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt, if (req->io) return -EAGAIN; if (io_alloc_async_ctx(req)) { - if (kmsg && kmsg->iov != kmsg->fast_iov) + if (kmsg->iov != kmsg->fast_iov) kfree(kmsg->iov); return -ENOMEM; } -- cgit v1.2.3-59-g8ed1b From 6a1ce99dc4bde564e4a072936f9d41f4a439140e Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Sun, 2 Feb 2020 16:32:02 +0530 Subject: RISC-V: Don't enable all interrupts in trap_init() Historically, we have been enabling all interrupts for each HART in trap_init(). Ideally, we should only enable M-mode interrupts for M-mode kernel and S-mode interrupts for S-mode kernel in trap_init(). Currently, we get suprious S-mode interrupts on Kendryte K210 board running M-mode NO-MMU kernel because we are enabling all interrupts in trap_init(). To fix this, we only enable software and external interrupt in trap_init(). In future, trap_init() will only enable software interrupt and PLIC driver will enable external interrupt using CPU notifiers. Fixes: a4c3733d32a7 ("riscv: abstract out CSR names for supervisor vs machine mode") Signed-off-by: Anup Patel Reviewed-by: Atish Patra Tested-by: Palmer Dabbelt [QMEU virt machine with SMP] [Palmer: Move the Fixes up to a newer commit] Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/traps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index f4cad5163bf2..ffb3d94bf0cc 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -156,6 +156,6 @@ void __init trap_init(void) csr_write(CSR_SCRATCH, 0); /* Set the exception vector address */ csr_write(CSR_TVEC, &handle_exception); - /* Enable all interrupts */ - csr_write(CSR_IE, -1); + /* Enable interrupts */ + csr_write(CSR_IE, IE_SIE | IE_EIE); } -- cgit v1.2.3-59-g8ed1b From af6565adb02d3129d3fae4d9d5da945abaf4417a Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 17 Feb 2020 13:37:18 +0200 Subject: qede: Fix race between rdma destroy workqueue and link change event If an event is added while the rdma workqueue is being destroyed it could lead to several races, list corruption, null pointer dereference during queue_work or init_queue. This fixes the race between the two flows which can occur during shutdown. A kref object and a completion object are added to the rdma_dev structure, these are initialized before the workqueue is created. The refcnt is used to indicate work is being added to the workqueue and ensures the cleanup flow won't start while we're in the middle of adding the event. Once the work is added, the refcnt is decreased and the cleanup flow is safe to run. Fixes: cee9fbd8e2e ("qede: Add qedr framework") Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede.h | 2 ++ drivers/net/ethernet/qlogic/qede/qede_rdma.c | 29 +++++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index e8a1b27db84d..234c6f30effb 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -163,6 +163,8 @@ struct qede_rdma_dev { struct list_head entry; struct list_head rdma_event_list; struct workqueue_struct *rdma_wq; + struct kref refcnt; + struct completion event_comp; bool exp_recovery; }; diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c index ffabc2d2f082..2d873ae8a234 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c +++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c @@ -59,6 +59,9 @@ static void _qede_rdma_dev_add(struct qede_dev *edev) static int qede_rdma_create_wq(struct qede_dev *edev) { INIT_LIST_HEAD(&edev->rdma_info.rdma_event_list); + kref_init(&edev->rdma_info.refcnt); + init_completion(&edev->rdma_info.event_comp); + edev->rdma_info.rdma_wq = create_singlethread_workqueue("rdma_wq"); if (!edev->rdma_info.rdma_wq) { DP_NOTICE(edev, "qedr: Could not create workqueue\n"); @@ -83,8 +86,23 @@ static void qede_rdma_cleanup_event(struct qede_dev *edev) } } +static void qede_rdma_complete_event(struct kref *ref) +{ + struct qede_rdma_dev *rdma_dev = + container_of(ref, struct qede_rdma_dev, refcnt); + + /* no more events will be added after this */ + complete(&rdma_dev->event_comp); +} + static void qede_rdma_destroy_wq(struct qede_dev *edev) { + /* Avoid race with add_event flow, make sure it finishes before + * we start accessing the list and cleaning up the work + */ + kref_put(&edev->rdma_info.refcnt, qede_rdma_complete_event); + wait_for_completion(&edev->rdma_info.event_comp); + qede_rdma_cleanup_event(edev); destroy_workqueue(edev->rdma_info.rdma_wq); } @@ -310,15 +328,24 @@ static void qede_rdma_add_event(struct qede_dev *edev, if (!edev->rdma_info.qedr_dev) return; + /* We don't want the cleanup flow to start while we're allocating and + * scheduling the work + */ + if (!kref_get_unless_zero(&edev->rdma_info.refcnt)) + return; /* already being destroyed */ + event_node = qede_rdma_get_free_event_node(edev); if (!event_node) - return; + goto out; event_node->event = event; event_node->ptr = edev; INIT_WORK(&event_node->work, qede_rdma_handle_event); queue_work(edev->rdma_info.rdma_wq, &event_node->work); + +out: + kref_put(&edev->rdma_info.refcnt, qede_rdma_complete_event); } void qede_rdma_dev_event_open(struct qede_dev *edev) -- cgit v1.2.3-59-g8ed1b From d99bfed58d9698c0ea1dbf47e4fdf4b87cc7203f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Feb 2020 16:54:38 +0100 Subject: mptcp: fix bogus socket flag values Dan Carpenter reports static checker warnings due to bogus BIT() usage: net/mptcp/subflow.c:571 subflow_write_space() warn: test_bit() takes a bit number net/mptcp/subflow.c:694 subflow_state_change() warn: test_bit() takes a bit number net/mptcp/protocol.c:261 ssk_check_wmem() warn: test_bit() takes a bit number [..] This is harmless (we use bits 1 & 2 instead of 0 and 1), but would break eventually when adding BIT(5) (or 6, depends on size of 'long'). Just use 0 and 1, the values are only passed to test/set/clear_bit functions. Fixes: 648ef4b88673 ("mptcp: Implement MPTCP receive path") Reported-by: Dan Carpenter Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/mptcp/protocol.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 8a99a2930284..9f8663b30456 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -56,8 +56,8 @@ #define MPTCP_DSS_FLAG_MASK (0x1F) /* MPTCP socket flags */ -#define MPTCP_DATA_READY BIT(0) -#define MPTCP_SEND_SPACE BIT(1) +#define MPTCP_DATA_READY 0 +#define MPTCP_SEND_SPACE 1 /* MPTCP connection sock */ struct mptcp_sock { -- cgit v1.2.3-59-g8ed1b From bf4498ad3f9a0f7202cf90e52b5ce9bb31700b91 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 17 Feb 2020 20:04:19 -0800 Subject: tmpfs: deny and force are not huge mount options 5.6-rc1 commit 2710c957a8ef ("fs_parse: get rid of ->enums") regressed the huge tmpfs mount options to an earlier state: "deny" and "force" are not valid there, and can crash the kernel. Delete those lines. Signed-off-by: Hugh Dickins Signed-off-by: Al Viro --- mm/shmem.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index c8f7540ef048..aad3ba74b0e9 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3386,8 +3386,6 @@ static const struct constant_table shmem_param_enums_huge[] = { {"always", SHMEM_HUGE_ALWAYS }, {"within_size", SHMEM_HUGE_WITHIN_SIZE }, {"advise", SHMEM_HUGE_ADVISE }, - {"deny", SHMEM_HUGE_DENY }, - {"force", SHMEM_HUGE_FORCE }, {} }; -- cgit v1.2.3-59-g8ed1b From 29f20dd6258a6f9c434992a0f1fc522caecda7ef Mon Sep 17 00:00:00 2001 From: Jonathan Neuschäfer Date: Tue, 18 Feb 2020 16:47:01 +0100 Subject: net: phy: broadcom: Fix a typo ("firsly") MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonathan Neuschäfer Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 7d68b28bb893..a62229a8b1a4 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -410,7 +410,7 @@ static int bcm5481_config_aneg(struct phy_device *phydev) struct device_node *np = phydev->mdio.dev.of_node; int ret; - /* Aneg firsly. */ + /* Aneg firstly. */ ret = genphy_config_aneg(phydev); /* Then we can set up the delay. */ @@ -463,7 +463,7 @@ static int bcm54616s_config_aneg(struct phy_device *phydev) { int ret; - /* Aneg firsly. */ + /* Aneg firstly. */ if (phydev->dev_flags & PHY_BCM_FLAGS_MODE_1000BX) ret = genphy_c37_config_aneg(phydev); else -- cgit v1.2.3-59-g8ed1b From 379349e9bc3b42b8b2f8f7a03f64a97623fff323 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 18 Feb 2020 18:15:44 +0100 Subject: Revert "net: dev: introduce support for sch BYPASS for lockless qdisc" This reverts commit ba27b4cdaaa66561aaedb2101876e563738d36fe Ahmed reported ouf-of-order issues bisected to commit ba27b4cdaaa6 ("net: dev: introduce support for sch BYPASS for lockless qdisc"). I can't find any working solution other than a plain revert. This will introduce some minor performance regressions for pfifo_fast qdisc. I plan to address them in net-next with more indirect call wrapper boilerplate for qdiscs. Reported-by: Ahmad Fatoum Fixes: ba27b4cdaaa6 ("net: dev: introduce support for sch BYPASS for lockless qdisc") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/core/dev.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 2577ebfed293..e10bd680dc03 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3662,26 +3662,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_calculate_pkt_len(skb, q); if (q->flags & TCQ_F_NOLOCK) { - if ((q->flags & TCQ_F_CAN_BYPASS) && READ_ONCE(q->empty) && - qdisc_run_begin(q)) { - if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, - &q->state))) { - __qdisc_drop(skb, &to_free); - rc = NET_XMIT_DROP; - goto end_run; - } - qdisc_bstats_cpu_update(q, skb); - - rc = NET_XMIT_SUCCESS; - if (sch_direct_xmit(skb, q, dev, txq, NULL, true)) - __qdisc_run(q); - -end_run: - qdisc_run_end(q); - } else { - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; - qdisc_run(q); - } + rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + qdisc_run(q); if (unlikely(to_free)) kfree_skb_list(to_free); -- cgit v1.2.3-59-g8ed1b From 8c70c3d72833a08214ff8c8df1f7d9778509888d Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Tue, 18 Feb 2020 23:47:18 +0530 Subject: net: netlabel: Use built-in RCU list checking list_for_each_entry_rcu() has built-in RCU and lock checking. Pass cond argument to list_for_each_entry_rcu() to silence false lockdep warning when CONFIG_PROVE_RCU_LIST is enabled by default. Signed-off-by: Madhuparna Bhowmik Signed-off-by: David S. Miller --- net/netlabel/netlabel_unlabeled.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index d2e4ab8d1cb1..77bb1bb22c3b 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -207,7 +207,8 @@ static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface(int ifindex) bkt = netlbl_unlhsh_hash(ifindex); bkt_list = &netlbl_unlhsh_rcu_deref(netlbl_unlhsh)->tbl[bkt]; - list_for_each_entry_rcu(iter, bkt_list, list) + list_for_each_entry_rcu(iter, bkt_list, list, + lockdep_is_held(&netlbl_unlhsh_lock)) if (iter->valid && iter->ifindex == ifindex) return iter; -- cgit v1.2.3-59-g8ed1b From 9facfdb5467382a21fc0bd4211ced26c06f28832 Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Wed, 19 Feb 2020 00:11:32 +0530 Subject: netlabel_domainhash.c: Use built-in RCU list checking list_for_each_entry_rcu() has built-in RCU and lock checking. Pass cond argument to list_for_each_entry_rcu() to silence false lockdep warning when CONFIG_PROVE_RCU_LIST is enabled by default. Signed-off-by: Madhuparna Bhowmik Signed-off-by: David S. Miller --- net/netlabel/netlabel_domainhash.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index f5d34da0646e..a1f2320ecc16 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -143,7 +143,8 @@ static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain, if (domain != NULL) { bkt = netlbl_domhsh_hash(domain); bkt_list = &netlbl_domhsh_rcu_deref(netlbl_domhsh)->tbl[bkt]; - list_for_each_entry_rcu(iter, bkt_list, list) + list_for_each_entry_rcu(iter, bkt_list, list, + lockdep_is_held(&netlbl_domhsh_lock)) if (iter->valid && netlbl_family_match(iter->family, family) && strcmp(iter->domain, domain) == 0) -- cgit v1.2.3-59-g8ed1b From 7790614616458b6dd3d90652acfa6b7443ee7041 Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Wed, 19 Feb 2020 01:24:25 +0530 Subject: meter.c: Use built-in RCU list checking hlist_for_each_entry_rcu() has built-in RCU and lock checking. Pass cond argument to list_for_each_entry_rcu() to silence false lockdep warning when CONFIG_PROVE_RCU_LIST is enabled by default. Signed-off-by: Madhuparna Bhowmik Signed-off-by: David S. Miller --- net/openvswitch/meter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 3323b79ff548..5010d1ddd4bd 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -61,7 +61,8 @@ static struct dp_meter *lookup_meter(const struct datapath *dp, struct hlist_head *head; head = meter_hash_bucket(dp, meter_id); - hlist_for_each_entry_rcu(meter, head, dp_hash_node) { + hlist_for_each_entry_rcu(meter, head, dp_hash_node, + lockdep_ovsl_is_held()) { if (meter->id == meter_id) return meter; } -- cgit v1.2.3-59-g8ed1b From fed48423f14d9fa184b262d7c35d9dc1c3698500 Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Wed, 19 Feb 2020 01:27:42 +0530 Subject: vport.c: Use built-in RCU list checking hlist_for_each_entry_rcu() has built-in RCU and lock checking. Pass cond argument to list_for_each_entry_rcu() to silence false lockdep warning when CONFIG_PROVE_RCU_LIST is enabled by default. Signed-off-by: Madhuparna Bhowmik Signed-off-by: David S. Miller --- net/openvswitch/vport.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 5da9392b03d6..47febb4504f0 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -96,7 +96,8 @@ struct vport *ovs_vport_locate(const struct net *net, const char *name) struct hlist_head *bucket = hash_bucket(net, name); struct vport *vport; - hlist_for_each_entry_rcu(vport, bucket, hash_node) + hlist_for_each_entry_rcu(vport, bucket, hash_node, + lockdep_ovsl_is_held()) if (!strcmp(name, ovs_vport_name(vport)) && net_eq(ovs_dp_get_net(vport->dp), net)) return vport; -- cgit v1.2.3-59-g8ed1b From 53742e69e85d2eb7ed56f58d277bc3e682f8949e Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Wed, 19 Feb 2020 01:28:02 +0530 Subject: datapath.c: Use built-in RCU list checking hlist_for_each_entry_rcu() has built-in RCU and lock checking. Pass cond argument to list_for_each_entry_rcu() to silence false lockdep warning when CONFIG_PROVE_RCU_LIST is enabled by default. Signed-off-by: Madhuparna Bhowmik Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 659c2a790fe7..c047afd12116 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -179,7 +179,8 @@ struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) struct hlist_head *head; head = vport_hash_bucket(dp, port_no); - hlist_for_each_entry_rcu(vport, head, dp_hash_node) { + hlist_for_each_entry_rcu(vport, head, dp_hash_node, + lockdep_ovsl_is_held()) { if (vport->port_no == port_no) return vport; } @@ -2042,7 +2043,8 @@ static unsigned int ovs_get_max_headroom(struct datapath *dp) int i; for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { - hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) { + hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node, + lockdep_ovsl_is_held()) { dev = vport->dev; dev_headroom = netdev_get_fwd_headroom(dev); if (dev_headroom > max_headroom) @@ -2061,7 +2063,8 @@ static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom) dp->max_headroom = new_headroom; for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) - hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) + hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node, + lockdep_ovsl_is_held()) netdev_set_rx_headroom(vport->dev, new_headroom); } -- cgit v1.2.3-59-g8ed1b From a2cfb96cc3654c6d451020480a4bcfbbca564350 Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Wed, 19 Feb 2020 01:28:20 +0530 Subject: flow_table.c: Use built-in RCU list checking hlist_for_each_entry_rcu() has built-in RCU and lock checking. Pass cond argument to list_for_each_entry_rcu() to silence false lockdep warning when CONFIG_PROVE_RCU_LIST is enabled by default. Signed-off-by: Madhuparna Bhowmik Signed-off-by: David S. Miller --- net/openvswitch/flow_table.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 5904e93e5765..fd8a01ca7a2d 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -585,7 +585,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, head = find_bucket(ti, hash); (*n_mask_hit)++; - hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) { + hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver], + lockdep_ovsl_is_held()) { if (flow->mask == mask && flow->flow_table.hash == hash && flow_cmp_masked_key(flow, &masked_key, &mask->range)) return flow; @@ -769,7 +770,8 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl, hash = ufid_hash(ufid); head = find_bucket(ti, hash); - hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver]) { + hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver], + lockdep_ovsl_is_held()) { if (flow->ufid_table.hash == hash && ovs_flow_cmp_ufid(flow, ufid)) return flow; -- cgit v1.2.3-59-g8ed1b From bd97ad51a7eb1b02049deca56bc26d96cabbac8a Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Fri, 14 Feb 2020 18:14:13 +0100 Subject: netfilter: nft_set_pipapo: Fix mapping table example in comments In both insertion and lookup examples, the two element pointers of rule mapping tables were swapped. Fix that. Reported-by: Pablo Neira Ayuso Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Signed-off-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index f0cb1e13af50..579600b39f39 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -203,7 +203,7 @@ * :: * * rule indices in last field: 0 1 - * map to elements: 0x42 0x66 + * map to elements: 0x66 0x42 * * * Matching @@ -298,7 +298,7 @@ * :: * * rule indices in last field: 0 1 - * map to elements: 0x42 0x66 + * map to elements: 0x66 0x42 * * the matching element is at 0x42. * -- cgit v1.2.3-59-g8ed1b From 9a7712048f9d43da5022e75eca3d6b81080e76d3 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Fri, 14 Feb 2020 18:14:14 +0100 Subject: netfilter: nft_set_pipapo: Don't abuse unlikely() in pipapo_refill() I originally used unlikely() in the if (match_only) clause, which we hit on the mapping table for the last field in a set, to ensure we avoid branching to the rest of for loop body, which is executed more frequently. However, Pablo reports, this is confusing as it gives the impression that this is not a common case, and it's actually not the intended usage of unlikely(). I couldn't observe any statistical difference in matching rates on x864_64 and aarch64 without it, so just drop it. Reported-by: Pablo Neira Ayuso Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Signed-off-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 579600b39f39..feac8553f6d9 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -503,7 +503,7 @@ static int pipapo_refill(unsigned long *map, int len, int rules, return -1; } - if (unlikely(match_only)) { + if (match_only) { bitmap_clear(map, i, 1); return i; } -- cgit v1.2.3-59-g8ed1b From 6551d5c56eb0d02db2274d7a8d26c333deba7fd2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 18 Feb 2020 10:12:58 -0800 Subject: pipe: make sure to wake up everybody when the last reader/writer closes Andrei Vagin reported that commit 0ddad21d3e99 ("pipe: use exclusive waits when reading or writing") broke one of the CRIU tests. He even has a trivial reproducer: #include #include #include int main() { int p[2]; pid_t p1, p2; int status; if (pipe(p) == -1) return 1; p1 = fork(); if (p1 == 0) { close(p[1]); read(p[0], &status, sizeof(status)); return 0; } p2 = fork(); if (p2 == 0) { close(p[1]); read(p[0], &status, sizeof(status)); return 0; } sleep(1); close(p[1]); wait(&status); wait(&status); return 0; } and the problem - once he points it out - is obvious. We use these nice exclusive waits, but when the last writer goes away, it then needs to wake up _every_ reader (and conversely, the last reader disappearing needs to wake every writer, of course). In fact, when going through this, we had several small oddities around how to wake things. We did in fact wake every reader when we changed the size of the pipe buffers. But that's entirely pointless, since that just acts as a possible source of new space - no new data to read. And when we change the size of the buffer, we don't need to wake all writers even when we add space - that case acts just as if somebody made space by reading, and any writer that finds itself not filling it up entirely will wake the next one. On the other hand, on the exit path, we tried to limit the wakeups with the proper poll keys etc, which is entirely pointless, because at that point we obviously need to wake up everybody. So don't do that: just wake up everybody - but only do that if the counts changed to zero. So fix those non-IO wakeups to be more proper: space change doesn't add any new data, but it might make room for writers, so it wakes up a writer. And the actual changes to reader/writer counts should wake up everybody, since everybody is affected (ie readers will all see EOF if the writers have gone away, and writers will all get EPIPE if all readers have gone away). Fixes: 0ddad21d3e99 ("pipe: use exclusive waits when reading or writing") Reported-and-tested-by: Andrei Vagin Cc: Josh Triplett Cc: Matthew Wilcox Signed-off-by: Linus Torvalds --- fs/pipe.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 5a34d6c22d4c..2144507447c5 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -722,9 +722,10 @@ pipe_release(struct inode *inode, struct file *file) if (file->f_mode & FMODE_WRITE) pipe->writers--; - if (pipe->readers || pipe->writers) { - wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM | EPOLLERR | EPOLLHUP); - wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM | EPOLLERR | EPOLLHUP); + /* Was that the last reader or writer, but not the other side? */ + if (!pipe->readers != !pipe->writers) { + wake_up_interruptible_all(&pipe->rd_wait); + wake_up_interruptible_all(&pipe->wr_wait); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } @@ -1026,8 +1027,8 @@ static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt) static void wake_up_partner(struct pipe_inode_info *pipe) { - wake_up_interruptible(&pipe->rd_wait); - wake_up_interruptible(&pipe->wr_wait); + wake_up_interruptible_all(&pipe->rd_wait); + wake_up_interruptible_all(&pipe->wr_wait); } static int fifo_open(struct inode *inode, struct file *filp) @@ -1144,7 +1145,7 @@ err_rd: err_wr: if (!--pipe->writers) - wake_up_interruptible(&pipe->rd_wait); + wake_up_interruptible_all(&pipe->rd_wait); ret = -ERESTARTSYS; goto err; @@ -1271,8 +1272,9 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) pipe->max_usage = nr_slots; pipe->tail = tail; pipe->head = head; - wake_up_interruptible_all(&pipe->rd_wait); - wake_up_interruptible_all(&pipe->wr_wait); + + /* This might have made more room for writers */ + wake_up_interruptible(&pipe->wr_wait); return pipe->max_usage * PAGE_SIZE; out_revert_acct: -- cgit v1.2.3-59-g8ed1b From 9eb425b2e04e0e3006adffea5bf5f227a896f128 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 18 Feb 2020 14:09:29 +0000 Subject: powerpc/entry: Fix an #if which should be an #ifdef in entry_32.S Fixes: 12c3f1fd87bf ("powerpc/32s: get rid of CPU_FTR_601 feature") Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a99fc0ad65b87a1ba51cfa3e0e9034ee294c3e07.1582034961.git.christophe.leroy@c-s.fr --- arch/powerpc/kernel/entry_32.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index bc056d906b51..16af0d8d90a8 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -783,7 +783,7 @@ fast_exception_return: 1: lis r3,exc_exit_restart_end@ha addi r3,r3,exc_exit_restart_end@l cmplw r12,r3 -#if CONFIG_PPC_BOOK3S_601 +#ifdef CONFIG_PPC_BOOK3S_601 bge 2b #else bge 3f @@ -791,7 +791,7 @@ fast_exception_return: lis r4,exc_exit_restart@ha addi r4,r4,exc_exit_restart@l cmplw r12,r4 -#if CONFIG_PPC_BOOK3S_601 +#ifdef CONFIG_PPC_BOOK3S_601 blt 2b #else blt 3f -- cgit v1.2.3-59-g8ed1b From 81f7eb00ff5bb8326e82503a32809421d14abb8a Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 11 Feb 2020 15:25:37 +0800 Subject: btrfs: destroy qgroup extent records on transaction abort We clean up the delayed references when we abort a transaction but we leave the pending qgroup extent records behind, leaking memory. This patch destroys the extent records when we destroy the delayed refs and makes sure ensure they're gone before releasing the transaction. Fixes: 3368d001ba5d ("btrfs: qgroup: Record possible quota-related extent for qgroup.") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Jeff Mahoney [ Rebased to latest upstream, remove to_qgroup() helper, use rbtree_postorder_for_each_entry_safe() wrapper ] Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 1 + fs/btrfs/qgroup.c | 13 +++++++++++++ fs/btrfs/qgroup.h | 1 + fs/btrfs/transaction.c | 2 ++ 4 files changed, 17 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 89422aa8e9d1..d7fec89974cb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4276,6 +4276,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, cond_resched(); spin_lock(&delayed_refs->lock); } + btrfs_qgroup_destroy_extent_records(trans); spin_unlock(&delayed_refs->lock); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 98d9a50352d6..ff1870ff3474 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -4002,3 +4002,16 @@ out: } return ret; } + +void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans) +{ + struct btrfs_qgroup_extent_record *entry; + struct btrfs_qgroup_extent_record *next; + struct rb_root *root; + + root = &trans->delayed_refs.dirty_extent_root; + rbtree_postorder_for_each_entry_safe(entry, next, root, node) { + ulist_free(entry->old_roots); + kfree(entry); + } +} diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 236f12224d52..1bc654459469 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -414,5 +414,6 @@ int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans, u64 last_snapshot); int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *eb); +void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans); #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 33dcc88b428a..beb6c69cd1e5 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -121,6 +121,8 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) BUG_ON(!list_empty(&transaction->list)); WARN_ON(!RB_EMPTY_ROOT( &transaction->delayed_refs.href_root.rb_root)); + WARN_ON(!RB_EMPTY_ROOT( + &transaction->delayed_refs.dirty_extent_root)); if (transaction->delayed_refs.pending_csums) btrfs_err(transaction->fs_info, "pending csums is %llu", -- cgit v1.2.3-59-g8ed1b From 315bf8ef914f31d51d084af950703aa1e09a728c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 13 Feb 2020 10:47:28 -0500 Subject: btrfs: reset fs_root to NULL on error in open_ctree While running my error injection script I hit a panic when we tried to clean up the fs_root when freeing the fs_root. This is because fs_info->fs_root == PTR_ERR(-EIO), which isn't great. Fix this by setting fs_info->fs_root = NULL; if we fail to read the root. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Reviewed-by: Qu Wenruo Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d7fec89974cb..197352f23534 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3200,6 +3200,7 @@ int __cold open_ctree(struct super_block *sb, if (IS_ERR(fs_info->fs_root)) { err = PTR_ERR(fs_info->fs_root); btrfs_warn(fs_info, "failed to read fs tree: %d", err); + fs_info->fs_root = NULL; goto fail_qgroup; } -- cgit v1.2.3-59-g8ed1b From 1e90315149f3fe148e114a5de86f0196d1c21fa5 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 13 Feb 2020 10:47:29 -0500 Subject: btrfs: do not check delayed items are empty for single transaction cleanup btrfs_assert_delayed_root_empty() will check if the delayed root is completely empty, but this is a filesystem-wide check. On cleanup we may have allowed other transactions to begin, for whatever reason, and thus the delayed root is not empty. So remove this check from cleanup_one_transation(). This however can stay in btrfs_cleanup_transaction(), because it checks only after all of the transactions have been properly cleaned up, and thus is valid. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Johannes Thumshirn Reviewed-by: Nikolay Borisov Reviewed-by: Qu Wenruo Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 197352f23534..c6c9a6a8e6c8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4503,7 +4503,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, wake_up(&fs_info->transaction_wait); btrfs_destroy_delayed_inodes(fs_info); - btrfs_assert_delayed_root_empty(fs_info); btrfs_destroy_marked_extents(fs_info, &cur_trans->dirty_pages, EXTENT_DIRTY); -- cgit v1.2.3-59-g8ed1b From bd727173e4432fe6cb70ba108dc1f3602c5409d7 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 13 Feb 2020 10:47:30 -0500 Subject: btrfs: handle logged extent failure properly If we're allocating a logged extent we attempt to insert an extent record for the file extent directly. We increase space_info->bytes_reserved, because the extent entry addition will call btrfs_update_block_group(), which will convert the ->bytes_reserved to ->bytes_used. However if we fail at any point while inserting the extent entry we will bail and leave space on ->bytes_reserved, which will trigger a WARN_ON() on umount. Fix this by pinning the space if we fail to insert, which is what happens in every other failure case that involves adding the extent entry. CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Johannes Thumshirn Reviewed-by: Nikolay Borisov Reviewed-by: Qu Wenruo Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0163fdd59f8f..a7bc66121330 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4430,6 +4430,8 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner, offset, ins, 1); + if (ret) + btrfs_pin_extent(fs_info, ins->objectid, ins->offset, 1); btrfs_put_block_group(block_group); return ret; } -- cgit v1.2.3-59-g8ed1b From b778cf962d71a0e737923d55d0432f3bd287258e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 13 Feb 2020 10:47:31 -0500 Subject: btrfs: fix bytes_may_use underflow in prealloc error condtition I hit the following warning while running my error injection stress testing: WARNING: CPU: 3 PID: 1453 at fs/btrfs/space-info.h:108 btrfs_free_reserved_data_space_noquota+0xfd/0x160 [btrfs] RIP: 0010:btrfs_free_reserved_data_space_noquota+0xfd/0x160 [btrfs] Call Trace: btrfs_free_reserved_data_space+0x4f/0x70 [btrfs] __btrfs_prealloc_file_range+0x378/0x470 [btrfs] elfcorehdr_read+0x40/0x40 ? elfcorehdr_read+0x40/0x40 ? btrfs_commit_transaction+0xca/0xa50 [btrfs] ? dput+0xb4/0x2a0 ? btrfs_log_dentry_safe+0x55/0x70 [btrfs] ? btrfs_sync_file+0x30e/0x420 [btrfs] ? do_fsync+0x38/0x70 ? __x64_sys_fdatasync+0x13/0x20 ? do_syscall_64+0x5b/0x1b0 ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 This happens if we fail to insert our reserved file extent. At this point we've already converted our reservation from ->bytes_may_use to ->bytes_reserved. However once we break we will attempt to free everything from [cur_offset, end] from ->bytes_may_use, but our extent reservation will overlap part of this. Fix this problem by adding ins.offset (our extent allocation size) to cur_offset so we remove the actual remaining part from ->bytes_may_use. I validated this fix using my inject-error.py script python inject-error.py -o should_fail_bio -t cache_save_setup -t \ __btrfs_prealloc_file_range \ -t insert_reserved_file_extent.constprop.0 \ -r "-5" ./run-fsstress.sh where run-fsstress.sh simply mounts and runs fsstress on a disk. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Qu Wenruo Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 36deef69f847..4f47ba652b31 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9824,6 +9824,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key ins; u64 cur_offset = start; + u64 clear_offset = start; u64 i_size; u64 cur_bytes; u64 last_alloc = (u64)-1; @@ -9858,6 +9859,15 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, btrfs_end_transaction(trans); break; } + + /* + * We've reserved this space, and thus converted it from + * ->bytes_may_use to ->bytes_reserved. Any error that happens + * from here on out we will only need to clear our reservation + * for the remaining unreserved area, so advance our + * clear_offset by our extent size. + */ + clear_offset += ins.offset; btrfs_dec_block_group_reservations(fs_info, ins.objectid); last_alloc = ins.offset; @@ -9937,9 +9947,9 @@ next: if (own_trans) btrfs_end_transaction(trans); } - if (cur_offset < end) - btrfs_free_reserved_data_space(inode, NULL, cur_offset, - end - cur_offset + 1); + if (clear_offset < end) + btrfs_free_reserved_data_space(inode, NULL, clear_offset, + end - clear_offset + 1); return ret; } -- cgit v1.2.3-59-g8ed1b From e75fd33b3f744f644061a4f9662bd63f5434f806 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 13 Feb 2020 12:29:50 +0000 Subject: Btrfs: fix btrfs_wait_ordered_range() so that it waits for all ordered extents In btrfs_wait_ordered_range() once we find an ordered extent that has finished with an error we exit the loop and don't wait for any other ordered extents that might be still in progress. All the users of btrfs_wait_ordered_range() expect that there are no more ordered extents in progress after that function returns. So past fixes such like the ones from the two following commits: ff612ba7849964 ("btrfs: fix panic during relocation after ENOSPC before writeback happens") 28aeeac1dd3080 ("Btrfs: fix panic when starting bg cache writeout after IO error") don't work when there are multiple ordered extents in the range. Fix that by making btrfs_wait_ordered_range() wait for all ordered extents even after it finds one that had an error. Link: https://github.com/kdave/btrfs-progs/issues/228#issuecomment-569777554 CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Qu Wenruo Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ordered-data.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index ecb9fb6a6fe0..a65f189a5b94 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -679,10 +679,15 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) } btrfs_start_ordered_extent(inode, ordered, 1); end = ordered->file_offset; + /* + * If the ordered extent had an error save the error but don't + * exit without waiting first for all other ordered extents in + * the range to complete. + */ if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) ret = -EIO; btrfs_put_ordered_extent(ordered); - if (ret || end == 0 || end == start) + if (end == 0 || end == start) break; end--; } -- cgit v1.2.3-59-g8ed1b From 929a3af90f0f4bd7132d83552c1a98c83f60ef7e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 19 Feb 2020 00:19:09 +0300 Subject: io_uring: fix use-after-free by io_cleanup_req() io_cleanup_req() should be called before req->io is freed, and so shouldn't be after __io_free_req() -> __io_req_aux_free(). Also, it will be ignored for in io_free_req_many(), which use __io_req_aux_free(). Place cleanup_req() into __io_req_aux_free(). Fixes: 99bc4c38537d774 ("io_uring: fix iovec leaks") Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d35b45696c73..6e249aa97ba3 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1260,6 +1260,9 @@ static void __io_req_aux_free(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; + if (req->flags & REQ_F_NEED_CLEANUP) + io_cleanup_req(req); + kfree(req->io); if (req->file) { if (req->flags & REQ_F_FIXED_FILE) @@ -1275,9 +1278,6 @@ static void __io_free_req(struct io_kiocb *req) { __io_req_aux_free(req); - if (req->flags & REQ_F_NEED_CLEANUP) - io_cleanup_req(req); - if (req->flags & REQ_F_INFLIGHT) { struct io_ring_ctx *ctx = req->ctx; unsigned long flags; -- cgit v1.2.3-59-g8ed1b From 3d9c5e023a0dbf3e117bb416cfefd9405bf5af0c Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Mon, 3 Feb 2020 16:32:18 -0600 Subject: net/mlx5: Fix sleep while atomic in mlx5_eswitch_get_vepa rtnl_bridge_getlink is protected by rcu lock, so mlx5_eswitch_get_vepa cannot take mutex lock. Two possible issues can happen: 1. User at the same time change vepa mode via RTM_SETLINK command. 2. User at the same time change the switchdev mode via devlink netlink interface. Case 1 cannot happen because rtnl executes one message in order. Case 2 can happen but we do not expect user to change the switchdev mode when changing vepa. Even if a user does it, so he will read a value which is no longer valid. Fixes: 8da202b24913 ("net/mlx5: E-Switch, Add support for VEPA in legacy mode.") Signed-off-by: Huy Nguyen Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 5acf60b1bbfe..564d42605892 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -2452,25 +2452,17 @@ out: int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting) { - int err = 0; - if (!esw) return -EOPNOTSUPP; if (!ESW_ALLOWED(esw)) return -EPERM; - mutex_lock(&esw->state_lock); - if (esw->mode != MLX5_ESWITCH_LEGACY) { - err = -EOPNOTSUPP; - goto out; - } + if (esw->mode != MLX5_ESWITCH_LEGACY) + return -EOPNOTSUPP; *setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0; - -out: - mutex_unlock(&esw->state_lock); - return err; + return 0; } int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, -- cgit v1.2.3-59-g8ed1b From 5ee090ed0da649b1febae2b7c285ac77d1e55a0c Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 9 Dec 2019 14:08:18 +0200 Subject: net/mlx5e: Reset RQ doorbell counter before moving RQ state from RST to RDY Initialize RQ doorbell counters to zero prior to moving an RQ from RST to RDY state. Per HW spec, when RQ is back to RDY state, the descriptor ID on the completion is reset. The doorbell record must comply. Fixes: 8276ea1353a4 ("net/mlx5e: Report and recover from CQE with error on RQ") Signed-off-by: Aya Levin Reported-by: Tariq Toukan Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 8 +++++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 ++ drivers/net/ethernet/mellanox/mlx5/core/wq.c | 39 +++++++++++++++++------ drivers/net/ethernet/mellanox/mlx5/core/wq.h | 2 ++ 4 files changed, 43 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 7c8796d9743f..a226277b0980 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -179,6 +179,14 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) } } +static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq) +{ + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + mlx5_wq_ll_reset(&rq->mpwqe.wq); + else + mlx5_wq_cyc_reset(&rq->wqe.wq); +} + /* SW parser related functions */ struct mlx5e_swp_spec { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 454d3459bd8b..966983674663 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -712,6 +712,9 @@ int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state) if (!in) return -ENOMEM; + if (curr_state == MLX5_RQC_STATE_RST && next_state == MLX5_RQC_STATE_RDY) + mlx5e_rqwq_reset(rq); + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); MLX5_SET(modify_rq_in, in, rq_state, curr_state); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index 02f7e4a39578..01f075fac276 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -94,6 +94,13 @@ void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides) print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, wqe, len, false); } +void mlx5_wq_cyc_reset(struct mlx5_wq_cyc *wq) +{ + wq->wqe_ctr = 0; + wq->cur_sz = 0; + mlx5_wq_cyc_update_db_record(wq); +} + int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *qpc, struct mlx5_wq_qp *wq, struct mlx5_wq_ctrl *wq_ctrl) @@ -192,6 +199,19 @@ err_db_free: return err; } +static void mlx5_wq_ll_init_list(struct mlx5_wq_ll *wq) +{ + struct mlx5_wqe_srq_next_seg *next_seg; + int i; + + for (i = 0; i < wq->fbc.sz_m1; i++) { + next_seg = mlx5_wq_ll_get_wqe(wq, i); + next_seg->next_wqe_index = cpu_to_be16(i + 1); + } + next_seg = mlx5_wq_ll_get_wqe(wq, i); + wq->tail_next = &next_seg->next_wqe_index; +} + int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *wqc, struct mlx5_wq_ll *wq, struct mlx5_wq_ctrl *wq_ctrl) @@ -199,9 +219,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride); u8 log_wq_sz = MLX5_GET(wq, wqc, log_wq_sz); struct mlx5_frag_buf_ctrl *fbc = &wq->fbc; - struct mlx5_wqe_srq_next_seg *next_seg; int err; - int i; err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { @@ -220,13 +238,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc); - for (i = 0; i < fbc->sz_m1; i++) { - next_seg = mlx5_wq_ll_get_wqe(wq, i); - next_seg->next_wqe_index = cpu_to_be16(i + 1); - } - next_seg = mlx5_wq_ll_get_wqe(wq, i); - wq->tail_next = &next_seg->next_wqe_index; - + mlx5_wq_ll_init_list(wq); wq_ctrl->mdev = mdev; return 0; @@ -237,6 +249,15 @@ err_db_free: return err; } +void mlx5_wq_ll_reset(struct mlx5_wq_ll *wq) +{ + wq->head = 0; + wq->wqe_ctr = 0; + wq->cur_sz = 0; + mlx5_wq_ll_init_list(wq); + mlx5_wq_ll_update_db_record(wq); +} + void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl) { mlx5_frag_buf_free(wq_ctrl->mdev, &wq_ctrl->buf); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index d9a94bc223c0..4cadc336593f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -80,6 +80,7 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *wqc, struct mlx5_wq_cyc *wq, struct mlx5_wq_ctrl *wq_ctrl); void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides); +void mlx5_wq_cyc_reset(struct mlx5_wq_cyc *wq); int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *qpc, struct mlx5_wq_qp *wq, @@ -92,6 +93,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *wqc, struct mlx5_wq_ll *wq, struct mlx5_wq_ctrl *wq_ctrl); +void mlx5_wq_ll_reset(struct mlx5_wq_ll *wq); void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl); -- cgit v1.2.3-59-g8ed1b From 1ad6c43c6a7b8627240c6cc19c69e31fedc596a7 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 12 Feb 2020 15:17:25 +0200 Subject: net/mlx5e: Fix crash in recovery flow without devlink reporter When health reporters are not supported, recovery function is invoked directly, not via devlink health reporters. In this direct flow, the recover function input parameter was passed incorrectly and is causing a kernel oops. This patch is fixing the input parameter. Following call trace is observed on rx error health reporting. Internal error: Oops: 96000007 [#1] PREEMPT SMP Process kworker/u16:4 (pid: 4584, stack limit = 0x00000000c9e45703) Call trace: mlx5e_rx_reporter_err_rq_cqe_recover+0x30/0x164 [mlx5_core] mlx5e_health_report+0x60/0x6c [mlx5_core] mlx5e_reporter_rq_cqe_err+0x6c/0x90 [mlx5_core] mlx5e_rq_err_cqe_work+0x20/0x2c [mlx5_core] process_one_work+0x168/0x3d0 worker_thread+0x58/0x3d0 kthread+0x108/0x134 Fixes: c50de4af1d63 ("net/mlx5e: Generalize tx reporter's functionality") Signed-off-by: Aya Levin Signed-off-by: Parav Pandit Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/health.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c index 3a975641f902..20b907dc1e29 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c @@ -200,7 +200,7 @@ int mlx5e_health_report(struct mlx5e_priv *priv, netdev_err(priv->netdev, err_str); if (!reporter) - return err_ctx->recover(&err_ctx->ctx); + return err_ctx->recover(err_ctx->ctx); return devlink_health_report(reporter, err_str, err_ctx); } -- cgit v1.2.3-59-g8ed1b From 52d214976d4f64504c1bbb52d47b46a5a3d5ee42 Mon Sep 17 00:00:00 2001 From: Hamdan Igbaria Date: Wed, 5 Feb 2020 14:31:12 +0200 Subject: net/mlx5: DR, Fix matching on vport gvmi Set vport gvmi in the tag, only when source gvmi is set in the bit mask. Fixes: 26d688e3 ("net/mlx5: DR, Add Steering entry (STE) utilities") Signed-off-by: Hamdan Igbaria Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index c6c7d1defbd7..aade62a9ee5c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -2307,7 +2307,9 @@ static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, struct mlx5dr_cmd_vport_cap *vport_cap; struct mlx5dr_domain *dmn = sb->dmn; struct mlx5dr_cmd_caps *caps; + u8 *bit_mask = sb->bit_mask; u8 *tag = hw_ste->tag; + bool source_gvmi_set; DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn); @@ -2328,7 +2330,8 @@ static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, if (!vport_cap) return -EINVAL; - if (vport_cap->vport_gvmi) + source_gvmi_set = MLX5_GET(ste_src_gvmi_qp, bit_mask, source_gvmi); + if (vport_cap->vport_gvmi && source_gvmi_set) MLX5_SET(ste_src_gvmi_qp, tag, source_gvmi, vport_cap->vport_gvmi); misc->source_eswitch_owner_vhca_id = 0; -- cgit v1.2.3-59-g8ed1b From 383de108157c881074f32914b61125e299820bd2 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 12 Feb 2020 11:32:39 +0200 Subject: net/mlx5e: Don't clear the whole vf config when switching modes There is no need to reset all vf config (except link state) between legacy and switchdev modes changes. Also, set link state to AUTO, when legacy enabled. Fixes: 3b83b6c2e024 ("net/mlx5e: Clear VF config when switching modes") Signed-off-by: Dmytro Linkin Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 6 +++++- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 564d42605892..e49acd0c5da5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -459,12 +459,16 @@ static void esw_destroy_legacy_table(struct mlx5_eswitch *esw) static int esw_legacy_enable(struct mlx5_eswitch *esw) { - int ret; + struct mlx5_vport *vport; + int ret, i; ret = esw_create_legacy_table(esw); if (ret) return ret; + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; + ret = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_LEGACY_SRIOV_VPORT_EVENTS); if (ret) esw_destroy_legacy_table(esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 979f13bdc203..1a57b2bd74b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1172,7 +1172,7 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, return -EINVAL; } - mlx5_eswitch_disable(esw, true); + mlx5_eswitch_disable(esw, false); mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs); err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS); if (err) { @@ -2065,7 +2065,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, { int err, err1; - mlx5_eswitch_disable(esw, true); + mlx5_eswitch_disable(esw, false); err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy"); -- cgit v1.2.3-59-g8ed1b From 76781623f009d5615b67f0675230ef90eaa9272a Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 31 Dec 2019 17:04:15 +0200 Subject: net/mlx5: Fix lowest FDB pool size The pool sizes represent the pool sizes in the fw. when we request a pool size from fw, it will return the next possible group. We track how many pools the fw has left and start requesting groups from the big to the small. When we start request 4k group, which doesn't exists in fw, fw wants to allocate the next possible size, 64k, but will fail since its exhausted. The correct smallest pool size in fw is 128 and not 4k. Fixes: 39ac237ce009 ("net/mlx5: E-Switch, Refactor chains and priorities") Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c index c5a446e295aa..4276194b633f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c @@ -35,7 +35,7 @@ static const unsigned int ESW_POOLS[] = { 4 * 1024 * 1024, 1 * 1024 * 1024, 64 * 1024, - 4 * 1024, }; + 128 }; struct mlx5_esw_chains_priv { struct rhashtable chains_ht; -- cgit v1.2.3-59-g8ed1b From 13a7e459a41a56d788ab33d825c6205379bbb711 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Tue, 14 Jan 2020 09:27:27 +0200 Subject: net/mlx5: DR, Handle reformat capability over sw-steering tables On flow table creation, send the relevant flags according to what the FW currently supports. When FW doesn't support reformat option over SW-steering managed table, the driver shouldn't pass this. Fixes: 988fd6b32d07 ("net/mlx5: DR, Pass table flags at creation to lower layer") Signed-off-by: Erez Shitrit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c | 9 +++++++-- include/linux/mlx5/mlx5_ifc.h | 5 ++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index 3abfc8125926..c2027192e21e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -66,15 +66,20 @@ static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *next_ft) { struct mlx5dr_table *tbl; + u32 flags; int err; if (mlx5_dr_is_fw_table(ft->flags)) return mlx5_fs_cmd_get_fw_cmds()->create_flow_table(ns, ft, log_size, next_ft); + flags = ft->flags; + /* turn off encap/decap if not supported for sw-str by fw */ + if (!MLX5_CAP_FLOWTABLE(ns->dev, sw_owner_reformat_supported)) + flags = ft->flags & ~(MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); - tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, - ft->level, ft->flags); + tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain, ft->level, flags); if (!tbl) { mlx5_core_err(ns->dev, "Failed creating dr flow_table\n"); return -EINVAL; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ff8c9d527bb4..bfdf41537cf1 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -688,7 +688,10 @@ struct mlx5_ifc_flow_table_nic_cap_bits { u8 nic_rx_multi_path_tirs[0x1]; u8 nic_rx_multi_path_tirs_fts[0x1]; u8 allow_sniffer_and_nic_rx_shared_tir[0x1]; - u8 reserved_at_3[0x1d]; + u8 reserved_at_3[0x4]; + u8 sw_owner_reformat_supported[0x1]; + u8 reserved_at_8[0x18]; + u8 encap_general_header[0x1]; u8 reserved_at_21[0xa]; u8 log_max_packet_reformat_context[0x5]; -- cgit v1.2.3-59-g8ed1b From 3dfee47b215e49788cfc80e474820ea2e948c031 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 10 Feb 2020 15:51:15 +0800 Subject: iommu/amd: Disable IOMMU on Stoney Ridge systems Serious screen flickering when Stoney Ridge outputs to a 4K monitor. Use identity-mapping and PCI ATS doesn't help this issue. According to Alex Deucher, IOMMU isn't enabled on Windows, so let's do the same here to avoid screen flickering on 4K monitor. Cc: Alex Deucher Bug: https://gitlab.freedesktop.org/drm/amd/issues/961 Signed-off-by: Kai-Heng Feng Acked-by: Alex Deucher Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 2759a8d57b7f..6be3853a5d97 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -2523,6 +2523,7 @@ static int __init early_amd_iommu_init(void) struct acpi_table_header *ivrs_base; acpi_status status; int i, remap_cache_sz, ret = 0; + u32 pci_id; if (!amd_iommu_detected) return -ENODEV; @@ -2610,6 +2611,16 @@ static int __init early_amd_iommu_init(void) if (ret) goto out; + /* Disable IOMMU if there's Stoney Ridge graphics */ + for (i = 0; i < 32; i++) { + pci_id = read_pci_config(0, i, 0, 0); + if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) { + pr_info("Disable IOMMU on Stoney Ridge\n"); + amd_iommu_disabled = true; + break; + } + } + /* Disable any previously enabled IOMMUs */ if (!is_kdump_kernel() || amd_iommu_disabled) disable_iommus(); @@ -2718,7 +2729,7 @@ static int __init state_next(void) ret = early_amd_iommu_init(); init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED; if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) { - pr_info("AMD IOMMU disabled on kernel command-line\n"); + pr_info("AMD IOMMU disabled\n"); init_state = IOMMU_CMDLINE_DISABLED; ret = -EINVAL; } -- cgit v1.2.3-59-g8ed1b From faf305c51aeabd1ea2d7131e798ef5f55f4a7750 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 18 Feb 2020 18:12:41 +0000 Subject: iommu/qcom: Fix bogus detach logic Currently, the implementation of qcom_iommu_domain_free() is guaranteed to do one of two things: WARN() and leak everything, or dereference NULL and crash. That alone is terrible, but in fact the whole idea of trying to track the liveness of a domain via the qcom_domain->iommu pointer as a sanity check is full of fundamentally flawed assumptions. Make things robust and actually functional by not trying to be quite so clever. Reported-by: Brian Masney Tested-by: Brian Masney Reported-by: Naresh Kamboju Fixes: 0ae349a0f33f ("iommu/qcom: Add qcom_iommu") Signed-off-by: Robin Murphy Tested-by: Stephan Gerhold Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: Joerg Roedel --- drivers/iommu/qcom_iommu.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index 39759db4f003..4328da0b0a9f 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -344,21 +344,19 @@ static void qcom_iommu_domain_free(struct iommu_domain *domain) { struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); - if (WARN_ON(qcom_domain->iommu)) /* forgot to detach? */ - return; - iommu_put_dma_cookie(domain); - /* NOTE: unmap can be called after client device is powered off, - * for example, with GPUs or anything involving dma-buf. So we - * cannot rely on the device_link. Make sure the IOMMU is on to - * avoid unclocked accesses in the TLB inv path: - */ - pm_runtime_get_sync(qcom_domain->iommu->dev); - - free_io_pgtable_ops(qcom_domain->pgtbl_ops); - - pm_runtime_put_sync(qcom_domain->iommu->dev); + if (qcom_domain->iommu) { + /* + * NOTE: unmap can be called after client device is powered + * off, for example, with GPUs or anything involving dma-buf. + * So we cannot rely on the device_link. Make sure the IOMMU + * is on to avoid unclocked accesses in the TLB inv path: + */ + pm_runtime_get_sync(qcom_domain->iommu->dev); + free_io_pgtable_ops(qcom_domain->pgtbl_ops); + pm_runtime_put_sync(qcom_domain->iommu->dev); + } kfree(qcom_domain); } @@ -404,7 +402,7 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); unsigned i; - if (!qcom_domain->iommu) + if (WARN_ON(!qcom_domain->iommu)) return; pm_runtime_get_sync(qcom_iommu->dev); @@ -417,8 +415,6 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de ctx->domain = NULL; } pm_runtime_put_sync(qcom_iommu->dev); - - qcom_domain->iommu = NULL; } static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova, -- cgit v1.2.3-59-g8ed1b From ab362fffa0feb0da23191111e60b641d39130053 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 18 Feb 2020 17:27:56 +0000 Subject: iommu/arm-smmu: Restore naming of driver parameter prefix Extending the Arm SMMU driver to allow for modular builds changed KBUILD_MODNAME to be "arm_smmu_mod" so that a single module could be built from the multiple existing object files without the need to rename any source files. This inadvertently changed the name of the driver parameters, which may lead to runtime issues if bootloaders are relying on the old names for correctness (e.g. "arm-smmu.disable_bypass=0"). Although MODULE_PARAM_PREFIX can be overridden to restore the old naming for builtin parameters, only the new name is matched by modprobe and so loading the driver as a module would cause parameters specified on the kernel command line to be ignored. Instead, rename "arm_smmu_mod" to "arm_smmu". Whilst it's a bit of a bodge, this allows us to create a single module without renaming any files and makes use of the fact that underscores and hyphens can be used interchangeably in parameter names. Cc: Robin Murphy Cc: Russell King Reported-by: Li Yang Fixes: cd221bd24ff5 ("iommu/arm-smmu: Allow building as a module") Signed-off-by: Will Deacon Reviewed-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 2104fb8afc06..9f33fdb3bb05 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -14,8 +14,8 @@ obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o amd_iommu_quirks.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += amd_iommu_debugfs.o obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o -obj-$(CONFIG_ARM_SMMU) += arm-smmu-mod.o -arm-smmu-mod-objs += arm-smmu.o arm-smmu-impl.o arm-smmu-qcom.o +obj-$(CONFIG_ARM_SMMU) += arm_smmu.o +arm_smmu-objs += arm-smmu.o arm-smmu-impl.o arm-smmu-qcom.o obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o obj-$(CONFIG_DMAR_TABLE) += dmar.o obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o intel-pasid.o -- cgit v1.2.3-59-g8ed1b From 140588bfed2727e9a4814211617a79401d74922f Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Fri, 14 Feb 2020 18:26:28 +0100 Subject: s390: remove obsolete ieee_emulation_warnings s390 math emulation was removed with commit 5a79859ae0f3 ("s390: remove 31 bit support"), rendering ieee_emulation_warnings useless. The code still built because it was protected by CONFIG_MATHEMU, which was no longer selectable. This patch removes the sysctl_ieee_emulation_warnings declaration and the sysctl entry declaration. Link: https://lkml.kernel.org/r/20200214172628.3598516-1-steve@sk2.org Reviewed-by: Vasily Gorbik Signed-off-by: Stephen Kitt Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/processor.h | 1 - kernel/sysctl.c | 9 --------- 2 files changed, 10 deletions(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 361ef5eda468..aadb3d0e2adc 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -84,7 +84,6 @@ void s390_update_cpu_mhz(void); void cpu_detect_mhz_feature(void); extern const struct seq_operations cpuinfo_op; -extern int sysctl_ieee_emulation_warnings; extern void execve_tail(void); extern void __bpon(void); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d396aaaf19a3..ad5b88a53c5a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -805,15 +805,6 @@ static struct ctl_table kern_table[] = { .extra2 = &maxolduid, }, #ifdef CONFIG_S390 -#ifdef CONFIG_MATHEMU - { - .procname = "ieee_emulation_warnings", - .data = &sysctl_ieee_emulation_warnings, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, -#endif { .procname = "userprocess_debug", .data = &show_unhandled_signals, -- cgit v1.2.3-59-g8ed1b From d0022c0ef29b78bcbe8a5c5894bd2307143afce1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 19 Feb 2020 10:19:13 +0000 Subject: arm64: memory: Add missing brackets to untagged_addr() macro Add brackets around the evaluation of the 'addr' parameter to the untagged_addr() macro so that the cast to 'u64' applies to the result of the expression. Cc: Fixes: 597399d0cb91 ("arm64: tags: Preserve tags for addresses translated via TTBR1") Reported-by: Linus Torvalds Signed-off-by: Will Deacon --- arch/arm64/include/asm/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index a4f9ca5479b0..4d94676e5a8b 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -213,7 +213,7 @@ static inline unsigned long kaslr_offset(void) ((__force __typeof__(addr))sign_extend64((__force u64)(addr), 55)) #define untagged_addr(addr) ({ \ - u64 __addr = (__force u64)addr; \ + u64 __addr = (__force u64)(addr); \ __addr &= __untagged_addr(__addr); \ (__force __typeof__(addr))__addr; \ }) -- cgit v1.2.3-59-g8ed1b From 1fae37accfc5872af3905d4ba71dc6ab15829be7 Mon Sep 17 00:00:00 2001 From: Shyjumon N Date: Thu, 6 Feb 2020 13:17:25 -0700 Subject: nvme/pci: Add sleep quirk for Samsung and Toshiba drives The Samsung SSD SM981/PM981 and Toshiba SSD KBG40ZNT256G on the Lenovo C640 platform experience runtime resume issues when the SSDs are kept in sleep/suspend mode for long time. This patch applies the 'Simple Suspend' quirk to these configurations. With this patch, the issue had not been observed in a 1+ day test. Reviewed-by: Jon Derrick Reviewed-by: Christoph Hellwig Signed-off-by: Shyjumon N Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9c80f9f08149..b0434b687b17 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2747,6 +2747,18 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) (dmi_match(DMI_BOARD_NAME, "PRIME B350M-A") || dmi_match(DMI_BOARD_NAME, "PRIME Z370-A"))) return NVME_QUIRK_NO_APST; + } else if ((pdev->vendor == 0x144d && (pdev->device == 0xa801 || + pdev->device == 0xa808 || pdev->device == 0xa809)) || + (pdev->vendor == 0x1e0f && pdev->device == 0x0001)) { + /* + * Forcing to use host managed nvme power settings for + * lowest idle power with quick resume latency on + * Samsung and Toshiba SSDs based on suspend behavior + * on Coffee Lake board for LENOVO C640 + */ + if ((dmi_match(DMI_BOARD_VENDOR, "LENOVO")) && + dmi_match(DMI_BOARD_NAME, "LNVNB161216")) + return NVME_QUIRK_SIMPLE_SUSPEND; } return 0; -- cgit v1.2.3-59-g8ed1b From 98f7b86a0becc1154b1a6df6e75c9695dfd87e0d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 12 Feb 2020 12:32:18 +0200 Subject: nvme-pci: Use single IRQ vector for old Apple models People reported that old Apple machines are not working properly if the non-first IRQ vector is in use. Set quirk for that models to limit IRQ to use first vector only. Based on original patch by GitHub user npx001. Link: https://github.com/Dunedan/mbp-2016-linux/issues/9 Cc: Benjamin Herrenschmidt Cc: Leif Liddy Signed-off-by: Andy Shevchenko Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b0434b687b17..ace4dd9e953c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3121,7 +3121,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_NO_DEEPEST_PS | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, - { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, + { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001), + .driver_data = NVME_QUIRK_SINGLE_VECTOR }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2005), .driver_data = NVME_QUIRK_SINGLE_VECTOR | -- cgit v1.2.3-59-g8ed1b From debcf83770073f90c9b075134650fdc758ff3033 Mon Sep 17 00:00:00 2001 From: changzhu Date: Fri, 14 Feb 2020 10:57:17 +0800 Subject: drm/amdgpu: add is_raven_kicker judgement for raven1 The rlc version of raven_kicer_rlc is different from the legacy rlc version of raven_rlc. So it needs to add a judgement function for raven_kicer_rlc and avoid disable GFXOFF when loading raven_kicer_rlc. Signed-off-by: changzhu Reviewed-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 6d6aca08d6fa..3afdbbd6aaad 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1193,6 +1193,14 @@ static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev) return false; } +static bool is_raven_kicker(struct amdgpu_device *adev) +{ + if (adev->pm.fw_version >= 0x41e2b) + return true; + else + return false; +} + static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) { if (gfx_v9_0_should_disable_gfxoff(adev->pdev)) @@ -1205,9 +1213,8 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) break; case CHIP_RAVEN: if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) && - ((adev->gfx.rlc_fw_version != 106 && + ((!is_raven_kicker(adev) && adev->gfx.rlc_fw_version < 531) || - (adev->gfx.rlc_fw_version == 53815) || (adev->gfx.rlc_feature_version < 1) || !adev->gfx.rlc.is_rlc_v2_1)) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; -- cgit v1.2.3-59-g8ed1b From 6c62ce8073daf27ae3fd03b6929d6cea3887eeb2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 18 Feb 2020 13:20:30 -0500 Subject: drm/amdgpu/display: clean up hdcp workqueue handling Use the existence of the workqueue itself to determine when to enable HDCP features rather than sprinkling asic checks all over the code. Also add a check for the existence of the hdcp workqueue in the irq handling on the off chance we get and HPD RX interrupt with the CP bit set. This avoids a crash if the driver doesn't support HDCP for a particular asic. Fixes: 96a3b32e67236f ("drm/amd/display: only enable HDCP for DCN+") Bug: https://bugzilla.kernel.org/show_bug.cgi?id=206519 Reviewed-by: Bhawanpreet Lakha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 63e8a12a74bc..e8f66fbf399e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1911,7 +1911,7 @@ static void handle_hpd_irq(void *param) mutex_lock(&aconnector->hpd_lock); #ifdef CONFIG_DRM_AMD_DC_HDCP - if (adev->asic_type >= CHIP_RAVEN) + if (adev->dm.hdcp_workqueue) hdcp_reset_display(adev->dm.hdcp_workqueue, aconnector->dc_link->link_index); #endif if (aconnector->fake_enable) @@ -2088,8 +2088,10 @@ static void handle_hpd_rx_irq(void *param) } } #ifdef CONFIG_DRM_AMD_DC_HDCP - if (hpd_irq_data.bytes.device_service_irq.bits.CP_IRQ) - hdcp_handle_cpirq(adev->dm.hdcp_workqueue, aconnector->base.index); + if (hpd_irq_data.bytes.device_service_irq.bits.CP_IRQ) { + if (adev->dm.hdcp_workqueue) + hdcp_handle_cpirq(adev->dm.hdcp_workqueue, aconnector->base.index); + } #endif if ((dc_link->cur_link_settings.lane_count != LANE_COUNT_UNKNOWN) || (dc_link->type == dc_connection_mst_branch)) @@ -5702,7 +5704,7 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm, drm_connector_attach_vrr_capable_property( &aconnector->base); #ifdef CONFIG_DRM_AMD_DC_HDCP - if (adev->asic_type >= CHIP_RAVEN) + if (adev->dm.hdcp_workqueue) drm_connector_attach_content_protection_property(&aconnector->base, true); #endif } -- cgit v1.2.3-59-g8ed1b From df6d4f9db79c1a5d6f48b59db35ccd1e9ff9adfc Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Thu, 16 Jan 2020 12:46:51 -0800 Subject: x86/boot/compressed: Don't declare __force_order in kaslr_64.c GCC 10 changed the default to -fno-common, which leads to LD arch/x86/boot/compressed/vmlinux ld: arch/x86/boot/compressed/pgtable_64.o:(.bss+0x0): multiple definition of `__force_order'; \ arch/x86/boot/compressed/kaslr_64.o:(.bss+0x0): first defined here make[2]: *** [arch/x86/boot/compressed/Makefile:119: arch/x86/boot/compressed/vmlinux] Error 1 Since __force_order is already provided in pgtable_64.c, there is no need to declare __force_order in kaslr_64.c. Signed-off-by: H.J. Lu Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200124181811.4780-1-hjl.tools@gmail.com --- arch/x86/boot/compressed/kaslr_64.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/boot/compressed/kaslr_64.c b/arch/x86/boot/compressed/kaslr_64.c index 748456c365f4..9557c5a15b91 100644 --- a/arch/x86/boot/compressed/kaslr_64.c +++ b/arch/x86/boot/compressed/kaslr_64.c @@ -29,9 +29,6 @@ #define __PAGE_OFFSET __PAGE_OFFSET_BASE #include "../../mm/ident_map.c" -/* Used by pgtable.h asm code to force instruction serialization. */ -unsigned long __force_order; - /* Used to track our page table allocation area. */ struct alloc_pgt_data { unsigned char *pgt_buf; -- cgit v1.2.3-59-g8ed1b From e9091ffd6a0aaced111b5d6ead5eaab5cd7101bc Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 10 Feb 2020 10:48:11 +0100 Subject: s390/qdio: fill SL with absolute addresses As the comment says, sl->sbal holds an absolute address. qeth currently solves this through wild casting, while zfcp doesn't care. Handle this properly in the code that actually builds the SL. Signed-off-by: Julian Wiedmann Reviewed-by: Alexandra Winter Reviewed-by: Steffen Maier [for qdio] Reviewed-by: Benjamin Block Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/qdio.h | 2 +- drivers/s390/cio/qdio_setup.c | 3 ++- drivers/s390/net/qeth_core_main.c | 23 +++++++++++------------ 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 71e3f0146cda..7870cf834533 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -227,7 +227,7 @@ struct qdio_buffer { * @sbal: absolute SBAL address */ struct sl_element { - unsigned long sbal; + u64 sbal; } __attribute__ ((packed)); /** diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index 3ab8e80d7bbc..e115623b86b2 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include "cio.h" @@ -205,7 +206,7 @@ static void setup_storage_lists(struct qdio_q *q, struct qdio_irq *irq_ptr, /* fill in sl */ for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++) - q->sl->element[j].sbal = (unsigned long)q->sbal[j]; + q->sl->element[j].sbal = virt_to_phys(q->sbal[j]); } static void setup_queues(struct qdio_irq *irq_ptr, diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 9639938581f5..2c83d489f099 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4746,10 +4746,10 @@ static void qeth_qdio_establish_cq(struct qeth_card *card, if (card->options.cq == QETH_CQ_ENABLED) { int offset = QDIO_MAX_BUFFERS_PER_Q * (card->qdio.no_in_queues - 1); - for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i) { - in_sbal_ptrs[offset + i] = (struct qdio_buffer *) - virt_to_phys(card->qdio.c_q->bufs[i].buffer); - } + + for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; i++) + in_sbal_ptrs[offset + i] = + card->qdio.c_q->bufs[i].buffer; queue_start_poll[card->qdio.no_in_queues - 1] = NULL; } @@ -4783,10 +4783,9 @@ static int qeth_qdio_establish(struct qeth_card *card) rc = -ENOMEM; goto out_free_qib_param; } - for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i) { - in_sbal_ptrs[i] = (struct qdio_buffer *) - virt_to_phys(card->qdio.in_q->bufs[i].buffer); - } + + for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; i++) + in_sbal_ptrs[i] = card->qdio.in_q->bufs[i].buffer; queue_start_poll = kcalloc(card->qdio.no_in_queues, sizeof(void *), GFP_KERNEL); @@ -4807,11 +4806,11 @@ static int qeth_qdio_establish(struct qeth_card *card) rc = -ENOMEM; goto out_free_queue_start_poll; } + for (i = 0, k = 0; i < card->qdio.no_out_queues; ++i) - for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j, ++k) { - out_sbal_ptrs[k] = (struct qdio_buffer *)virt_to_phys( - card->qdio.out_qs[i]->bufs[j]->buffer); - } + for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++, k++) + out_sbal_ptrs[k] = + card->qdio.out_qs[i]->bufs[j]->buffer; memset(&init_data, 0, sizeof(struct qdio_initialize)); init_data.cdev = CARD_DDEV(card); -- cgit v1.2.3-59-g8ed1b From 2db01da8d25f0420c411e788a9e1ba39269ae37b Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 11 Feb 2020 09:27:38 +0100 Subject: s390/qdio: fill SBALEs with absolute addresses sbale->addr holds an absolute address (or for some FCP usage, an opaque request ID), and should only be used with proper virt/phys translation. Signed-off-by: Julian Wiedmann Reviewed-by: Benjamin Block Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/qdio.h | 4 ++-- drivers/s390/net/qeth_core_main.c | 23 ++++++++++++----------- drivers/s390/scsi/zfcp_fsf.c | 2 +- drivers/s390/scsi/zfcp_qdio.c | 6 +++--- drivers/s390/scsi/zfcp_qdio.h | 6 +++--- 5 files changed, 21 insertions(+), 20 deletions(-) diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 7870cf834533..1e3517b0518b 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -201,7 +201,7 @@ struct slib { * @scount: SBAL count * @sflags: whole SBAL flags * @length: length - * @addr: address + * @addr: absolute data address */ struct qdio_buffer_element { u8 eflags; @@ -211,7 +211,7 @@ struct qdio_buffer_element { u8 scount; u8 sflags; u32 length; - void *addr; + u64 addr; } __attribute__ ((packed, aligned(16))); /** diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 2c83d489f099..03935466265c 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1128,9 +1128,10 @@ static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue, qeth_tx_complete_buf(buf, error, budget); for (i = 0; i < queue->max_elements; ++i) { - if (buf->buffer->element[i].addr && buf->is_header[i]) - kmem_cache_free(qeth_core_header_cache, - buf->buffer->element[i].addr); + void *data = phys_to_virt(buf->buffer->element[i].addr); + + if (data && buf->is_header[i]) + kmem_cache_free(qeth_core_header_cache, data); buf->is_header[i] = 0; } @@ -2641,7 +2642,8 @@ static int qeth_init_input_buffer(struct qeth_card *card, buf->pool_entry = pool_entry; for (i = 0; i < QETH_MAX_BUFFER_ELEMENTS(card); ++i) { buf->buffer->element[i].length = PAGE_SIZE; - buf->buffer->element[i].addr = pool_entry->elements[i]; + buf->buffer->element[i].addr = + virt_to_phys(pool_entry->elements[i]); if (i == QETH_MAX_BUFFER_ELEMENTS(card) - 1) buf->buffer->element[i].eflags = SBAL_EFLAGS_LAST_ENTRY; else @@ -3459,9 +3461,8 @@ static void qeth_qdio_cq_handler(struct qeth_card *card, unsigned int qdio_err, while ((e < QDIO_MAX_ELEMENTS_PER_BUFFER) && buffer->element[e].addr) { - unsigned long phys_aob_addr; + unsigned long phys_aob_addr = buffer->element[e].addr; - phys_aob_addr = (unsigned long) buffer->element[e].addr; qeth_qdio_handle_aob(card, phys_aob_addr); ++e; } @@ -3750,7 +3751,7 @@ static unsigned int __qeth_fill_buffer(struct sk_buff *skb, elem_length = min_t(unsigned int, length, PAGE_SIZE - offset_in_page(data)); - buffer->element[element].addr = data; + buffer->element[element].addr = virt_to_phys(data); buffer->element[element].length = elem_length; length -= elem_length; if (is_first_elem) { @@ -3780,7 +3781,7 @@ static unsigned int __qeth_fill_buffer(struct sk_buff *skb, elem_length = min_t(unsigned int, length, PAGE_SIZE - offset_in_page(data)); - buffer->element[element].addr = data; + buffer->element[element].addr = virt_to_phys(data); buffer->element[element].length = elem_length; buffer->element[element].eflags = SBAL_EFLAGS_MIDDLE_FRAG; @@ -3820,7 +3821,7 @@ static unsigned int qeth_fill_buffer(struct qeth_qdio_out_buffer *buf, int element = buf->next_element_to_fill; is_first_elem = false; - buffer->element[element].addr = hdr; + buffer->element[element].addr = virt_to_phys(hdr); buffer->element[element].length = hd_len; buffer->element[element].eflags = SBAL_EFLAGS_FIRST_FRAG; /* remember to free cache-allocated qeth_hdr: */ @@ -5288,7 +5289,7 @@ next_packet: offset = 0; } - hdr = element->addr + offset; + hdr = phys_to_virt(element->addr) + offset; offset += sizeof(*hdr); skb = NULL; @@ -5387,7 +5388,7 @@ use_skb: walk_packet: while (skb_len) { int data_len = min(skb_len, (int)(element->length - offset)); - char *data = element->addr + offset; + char *data = phys_to_virt(element->addr) + offset; skb_len -= data_len; offset += data_len; diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 223a805f0b0b..cae9b7ff79b0 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -2510,7 +2510,7 @@ void zfcp_fsf_reqid_check(struct zfcp_qdio *qdio, int sbal_idx) for (idx = 0; idx < QDIO_MAX_ELEMENTS_PER_BUFFER; idx++) { sbale = &sbal->element[idx]; - req_id = (unsigned long) sbale->addr; + req_id = sbale->addr; fsf_req = zfcp_reqlist_find_rm(adapter->req_list, req_id); if (!fsf_req) { diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c index 661436a92f8e..f0d6296e673b 100644 --- a/drivers/s390/scsi/zfcp_qdio.c +++ b/drivers/s390/scsi/zfcp_qdio.c @@ -98,7 +98,7 @@ static void zfcp_qdio_int_resp(struct ccw_device *cdev, unsigned int qdio_err, memset(pl, 0, ZFCP_QDIO_MAX_SBALS_PER_REQ * sizeof(void *)); sbale = qdio->res_q[idx]->element; - req_id = (u64) sbale->addr; + req_id = sbale->addr; scount = min(sbale->scount + 1, ZFCP_QDIO_MAX_SBALS_PER_REQ + 1); /* incl. signaling SBAL */ @@ -199,7 +199,7 @@ int zfcp_qdio_sbals_from_sg(struct zfcp_qdio *qdio, struct zfcp_qdio_req *q_req, q_req->sbal_number); return -EINVAL; } - sbale->addr = sg_virt(sg); + sbale->addr = sg_phys(sg); sbale->length = sg->length; } return 0; @@ -418,7 +418,7 @@ int zfcp_qdio_open(struct zfcp_qdio *qdio) sbale->length = 0; sbale->eflags = SBAL_EFLAGS_LAST_ENTRY; sbale->sflags = 0; - sbale->addr = NULL; + sbale->addr = 0; } if (do_QDIO(cdev, QDIO_FLAG_SYNC_INPUT, 0, 0, QDIO_MAX_BUFFERS_PER_Q)) diff --git a/drivers/s390/scsi/zfcp_qdio.h b/drivers/s390/scsi/zfcp_qdio.h index 2a816a37b3c0..6b43d6b254be 100644 --- a/drivers/s390/scsi/zfcp_qdio.h +++ b/drivers/s390/scsi/zfcp_qdio.h @@ -122,14 +122,14 @@ void zfcp_qdio_req_init(struct zfcp_qdio *qdio, struct zfcp_qdio_req *q_req, % QDIO_MAX_BUFFERS_PER_Q; sbale = zfcp_qdio_sbale_req(qdio, q_req); - sbale->addr = (void *) req_id; + sbale->addr = req_id; sbale->eflags = 0; sbale->sflags = SBAL_SFLAGS0_COMMAND | sbtype; if (unlikely(!data)) return; sbale++; - sbale->addr = data; + sbale->addr = virt_to_phys(data); sbale->length = len; } @@ -152,7 +152,7 @@ void zfcp_qdio_fill_next(struct zfcp_qdio *qdio, struct zfcp_qdio_req *q_req, BUG_ON(q_req->sbale_curr == qdio->max_sbale_per_sbal - 1); q_req->sbale_curr++; sbale = zfcp_qdio_sbale_curr(qdio, q_req); - sbale->addr = data; + sbale->addr = virt_to_phys(data); sbale->length = len; } -- cgit v1.2.3-59-g8ed1b From 15755854d53b4bbb0bb37a0fce66f0156cfc8a17 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 20 Feb 2020 00:59:36 +0900 Subject: nvme: Fix uninitialized-variable warning gcc may detect a false positive on nvme using an unintialized variable if setting features fails. Since this is not a fast path, explicitly initialize this variable to suppress the warning. Reported-by: Arnd Bergmann Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ada59df642d2..a4d8c90ee7cc 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1165,8 +1165,8 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid, unsigned int dword11, void *buffer, size_t buflen, u32 *result) { + union nvme_result res = { 0 }; struct nvme_command c; - union nvme_result res; int ret; memset(&c, 0, sizeof(c)); -- cgit v1.2.3-59-g8ed1b From 72cf3b3df423c1bbd8fa1056fed009d3a260f8a9 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 17 Feb 2020 14:11:49 -0700 Subject: MIPS: vdso: Wrap -mexplicit-relocs in cc-option Clang does not support this option and errors out: clang-11: error: unknown argument: '-mexplicit-relocs' Clang does not appear to need this flag like GCC does because the jalr check that was added in commit 976c23af3ee5 ("mips: vdso: add build time check that no 'jalr t9' calls left") passes just fine with $ make ARCH=mips CC=clang CROSS_COMPILE=mipsel-linux-gnu- malta_defconfig arch/mips/vdso/ even before commit d3f703c4359f ("mips: vdso: fix 'jalr t9' crash in vdso code"). -mrelax-pic-calls has been supported since clang 9, which is the earliest version that could build a working MIPS kernel, and it is the default for clang so just leave it be. Fixes: d3f703c4359f ("mips: vdso: fix 'jalr t9' crash in vdso code") Link: https://github.com/ClangBuiltLinux/linux/issues/890 Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: clang-built-linux@googlegroups.com --- arch/mips/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index 77374c1f0c77..d7fe8408603e 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -33,7 +33,7 @@ endif cflags-vdso := $(ccflags-vdso) \ $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \ -O3 -g -fPIC -fno-strict-aliasing -fno-common -fno-builtin -G 0 \ - -mrelax-pic-calls -mexplicit-relocs \ + -mrelax-pic-calls $(call cc-option, -mexplicit-relocs) \ -fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \ $(call cc-option, -fno-asynchronous-unwind-tables) \ $(call cc-option, -fno-stack-protector) -- cgit v1.2.3-59-g8ed1b From eb41113870c94dd7e519c69184efc171b7327699 Mon Sep 17 00:00:00 2001 From: "周琰杰 (Zhou Yanjie)" Date: Sun, 16 Feb 2020 20:10:28 +0800 Subject: MIPS: X1000: Fix clock of watchdog node. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The devicetree ABI was broken on purpose by commit 6d532143c915 ("watchdog: jz4740: Use regmap provided by TCU driver"), and commit 1d9c30745455 ("watchdog: jz4740: Use WDT clock provided by TCU driver"). The commit message of the latter explains why the ABI was broken. However, the current devicetree files were not updated to the new ABI described in Documentation/devicetree/bindings/timer/ingenic,tcu.txt, so the watchdog driver would not probe. Fix this problem by updating the clock of watchdog node from "&cgu X1000_CLK_RTCLK" to "&tcu TCU_CLK_WDT" to comply with the new ABI. Fixes: 7a16ccd300c2 ("[v8,1/4] MIPS: Ingenic: Add Ingenic X1000 support."). Signed-off-by: 周琰杰 (Zhou Yanjie) Signed-off-by: Paul Burton Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: devicetree@vger.kernel.org Cc: paul@crapouillou.net Cc: robh+dt@kernel.org Cc: mark.rutland@arm.com Cc: ralf@linux-mips.org Cc: sernia.zhou@foxmail.com Cc: zhenwenjin@gmail.com Cc: dongsheng.qiu@ingenic.com --- arch/mips/boot/dts/ingenic/x1000.dtsi | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/mips/boot/dts/ingenic/x1000.dtsi b/arch/mips/boot/dts/ingenic/x1000.dtsi index 4994c695a1a7..147f7d5c243a 100644 --- a/arch/mips/boot/dts/ingenic/x1000.dtsi +++ b/arch/mips/boot/dts/ingenic/x1000.dtsi @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include @@ -72,7 +73,7 @@ compatible = "ingenic,x1000-watchdog", "ingenic,jz4780-watchdog"; reg = <0x0 0x10>; - clocks = <&cgu X1000_CLK_RTCLK>; + clocks = <&tcu TCU_CLK_WDT>; clock-names = "wdt"; }; }; @@ -158,7 +159,6 @@ i2c0: i2c-controller@10050000 { compatible = "ingenic,x1000-i2c"; reg = <0x10050000 0x1000>; - #address-cells = <1>; #size-cells = <0>; @@ -173,7 +173,6 @@ i2c1: i2c-controller@10051000 { compatible = "ingenic,x1000-i2c"; reg = <0x10051000 0x1000>; - #address-cells = <1>; #size-cells = <0>; @@ -188,7 +187,6 @@ i2c2: i2c-controller@10052000 { compatible = "ingenic,x1000-i2c"; reg = <0x10052000 0x1000>; - #address-cells = <1>; #size-cells = <0>; -- cgit v1.2.3-59-g8ed1b From 11479e8e3cd896673a15af21cd0f145a4752f01a Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Tue, 11 Feb 2020 11:53:37 -0300 Subject: MIPS: ingenic: DTS: Fix watchdog nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The devicetree ABI was broken on purpose by commit 6d532143c915 ("watchdog: jz4740: Use regmap provided by TCU driver"), and commit 1d9c30745455 ("watchdog: jz4740: Use WDT clock provided by TCU driver"). The commit message of the latter explains why the ABI was broken. However, the current devicetree files were not updated to the new ABI described in Documentation/devicetree/bindings/timer/ingenic,tcu.txt, so the watchdog driver would not probe. Fix this problem by updating the watchdog nodes to comply with the new ABI. Fixes: 6d532143c915 ("watchdog: jz4740: Use regmap provided by TCU driver") Signed-off-by: Paul Cercueil Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: Rob Herring Cc: Mark Rutland Cc: Zhou Yanjie Cc: od@zcrc.me Cc: linux-mips@vger.kernel.org Cc: devicetree@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: # v5.5+ --- arch/mips/boot/dts/ingenic/jz4740.dtsi | 17 +++++++++-------- arch/mips/boot/dts/ingenic/jz4780.dtsi | 17 +++++++++-------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/arch/mips/boot/dts/ingenic/jz4740.dtsi b/arch/mips/boot/dts/ingenic/jz4740.dtsi index 5accda2767be..a3301bab9231 100644 --- a/arch/mips/boot/dts/ingenic/jz4740.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4740.dtsi @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include / { #address-cells = <1>; @@ -45,14 +46,6 @@ #clock-cells = <1>; }; - watchdog: watchdog@10002000 { - compatible = "ingenic,jz4740-watchdog"; - reg = <0x10002000 0x10>; - - clocks = <&cgu JZ4740_CLK_RTC>; - clock-names = "rtc"; - }; - tcu: timer@10002000 { compatible = "ingenic,jz4740-tcu", "simple-mfd"; reg = <0x10002000 0x1000>; @@ -73,6 +66,14 @@ interrupt-parent = <&intc>; interrupts = <23 22 21>; + + watchdog: watchdog@0 { + compatible = "ingenic,jz4740-watchdog"; + reg = <0x0 0xc>; + + clocks = <&tcu TCU_CLK_WDT>; + clock-names = "wdt"; + }; }; rtc_dev: rtc@10003000 { diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi index f928329b034b..bb89653d16a3 100644 --- a/arch/mips/boot/dts/ingenic/jz4780.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include / { @@ -67,6 +68,14 @@ interrupt-parent = <&intc>; interrupts = <27 26 25>; + + watchdog: watchdog@0 { + compatible = "ingenic,jz4780-watchdog"; + reg = <0x0 0xc>; + + clocks = <&tcu TCU_CLK_WDT>; + clock-names = "wdt"; + }; }; rtc_dev: rtc@10003000 { @@ -348,14 +357,6 @@ status = "disabled"; }; - watchdog: watchdog@10002000 { - compatible = "ingenic,jz4780-watchdog"; - reg = <0x10002000 0x10>; - - clocks = <&cgu JZ4780_CLK_RTCLK>; - clock-names = "rtc"; - }; - nemc: nemc@13410000 { compatible = "ingenic,jz4780-nemc"; reg = <0x13410000 0x10000>; -- cgit v1.2.3-59-g8ed1b From a7a9456e8d28e81030f7cf6f1f59f907089916a9 Mon Sep 17 00:00:00 2001 From: Amol Grover Date: Wed, 19 Feb 2020 15:30:11 +0530 Subject: net: hsr: Pass lockdep expression to RCU lists node_db is traversed using list_for_each_entry_rcu outside an RCU read-side critical section but under the protection of hsr->list_lock. Hence, add corresponding lockdep expression to silence false-positive warnings, and harden RCU lists. Signed-off-by: Amol Grover Signed-off-by: David S. Miller --- net/hsr/hsr_framereg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 364ea2cc028e..3ba7f61be107 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -155,7 +155,8 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, new_node->seq_out[i] = seq_out; spin_lock_bh(&hsr->list_lock); - list_for_each_entry_rcu(node, node_db, mac_list) { + list_for_each_entry_rcu(node, node_db, mac_list, + lockdep_is_held(&hsr->list_lock)) { if (ether_addr_equal(node->macaddress_A, addr)) goto out; if (ether_addr_equal(node->macaddress_B, addr)) -- cgit v1.2.3-59-g8ed1b From 21b5ee59ef18e27d85810584caf1f7ddc705ea83 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Wed, 19 Feb 2020 18:52:43 +0100 Subject: x86/cpu/amd: Enable the fixed Instructions Retired counter IRPERF Commit aaf248848db50 ("perf/x86/msr: Add AMD IRPERF (Instructions Retired) performance counter") added support for access to the free-running counter via 'perf -e msr/irperf/', but when exercised, it always returns a 0 count: BEFORE: $ perf stat -e instructions,msr/irperf/ true Performance counter stats for 'true': 624,833 instructions 0 msr/irperf/ Simply set its enable bit - HWCR bit 30 - to make it start counting. Enablement is restricted to all machines advertising IRPERF capability, except those susceptible to an erratum that makes the IRPERF return bad values. That erratum occurs in Family 17h models 00-1fh [1], but not in F17h models 20h and above [2]. AFTER (on a family 17h model 31h machine): $ perf stat -e instructions,msr/irperf/ true Performance counter stats for 'true': 621,690 instructions 622,490 msr/irperf/ [1] Revision Guide for AMD Family 17h Models 00h-0Fh Processors [2] Revision Guide for AMD Family 17h Models 30h-3Fh Processors The revision guides are available from the bugzilla Link below. [ bp: Massage commit message. ] Fixes: aaf248848db50 ("perf/x86/msr: Add AMD IRPERF (Instructions Retired) performance counter") Signed-off-by: Kim Phillips Signed-off-by: Borislav Petkov Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 Link: http://lkml.kernel.org/r/20200214201805.13830-1-kim.phillips@amd.com --- arch/x86/include/asm/msr-index.h | 2 ++ arch/x86/kernel/cpu/amd.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ebe1685e92dd..d5e517d1c3dd 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -512,6 +512,8 @@ #define MSR_K7_HWCR 0xc0010015 #define MSR_K7_HWCR_SMMLOCK_BIT 0 #define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT) +#define MSR_K7_HWCR_IRPERF_EN_BIT 30 +#define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT) #define MSR_K7_FID_VID_CTL 0xc0010041 #define MSR_K7_FID_VID_STATUS 0xc0010042 diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index ac83a0fef628..1f875fbe1384 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -28,6 +28,7 @@ static const int amd_erratum_383[]; static const int amd_erratum_400[]; +static const int amd_erratum_1054[]; static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum); /* @@ -972,6 +973,15 @@ static void init_amd(struct cpuinfo_x86 *c) /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ if (!cpu_has(c, X86_FEATURE_XENPV)) set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); + + /* + * Turn on the Instructions Retired free counter on machines not + * susceptible to erratum #1054 "Instructions Retired Performance + * Counter May Be Inaccurate". + */ + if (cpu_has(c, X86_FEATURE_IRPERF) && + !cpu_has_amd_erratum(c, amd_erratum_1054)) + msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT); } #ifdef CONFIG_X86_32 @@ -1099,6 +1109,10 @@ static const int amd_erratum_400[] = static const int amd_erratum_383[] = AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf)); +/* #1054: Instructions Retired Performance Counter May Be Inaccurate */ +static const int amd_erratum_1054[] = + AMD_OSVW_ERRATUM(0, AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf)); + static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) { -- cgit v1.2.3-59-g8ed1b From c3331d2fe3fd4d5e321f2467d01f72de7edfb5d0 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 19 Feb 2020 18:01:22 +0300 Subject: nfc: pn544: Fix occasional HW initialization failure The PN544 driver checks the "enable" polarity during of driver's probe and it's doing that by turning ON and OFF NFC with different polarities until enabling succeeds. It takes some time for the hardware to power-down, and thus, to deassert the IRQ that is raised by turning ON the hardware. Since the delay after last power-down of the polarity-checking process is missed in the code, the interrupt may trigger immediately after installing the IRQ handler (right after the checking is done), which results in IRQ handler trying to touch the disabled HW and ends with marking NFC as 'DEAD' during of the driver's probe: pn544_hci_i2c 1-002a: NFC: nfc_en polarity : active high pn544_hci_i2c 1-002a: NFC: invalid len byte shdlc: llc_shdlc_recv_frame: NULL Frame -> link is dead This patch fixes the occasional NFC initialization failure on Nexus 7 device. Signed-off-by: Dmitry Osipenko Signed-off-by: David S. Miller --- drivers/nfc/pn544/i2c.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c index 720c89d6066e..4ac8cb262559 100644 --- a/drivers/nfc/pn544/i2c.c +++ b/drivers/nfc/pn544/i2c.c @@ -225,6 +225,7 @@ static void pn544_hci_i2c_platform_init(struct pn544_i2c_phy *phy) out: gpiod_set_value_cansleep(phy->gpiod_en, !phy->en_polarity); + usleep_range(10000, 15000); } static void pn544_hci_i2c_enable_mode(struct pn544_i2c_phy *phy, int run_mode) -- cgit v1.2.3-59-g8ed1b From 33c4acbe2f4e8f2866914b1fb90ce74fc7216c21 Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Wed, 19 Feb 2020 20:47:46 +0530 Subject: bridge: br_stp: Use built-in RCU list checking list_for_each_entry_rcu() has built-in RCU and lock checking. Pass cond argument to list_for_each_entry_rcu() to silence false lockdep warning when CONFIG_PROVE_RCU_LIST is enabled by default. Signed-off-by: Madhuparna Bhowmik Signed-off-by: David S. Miller --- net/bridge/br_stp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 6856a6d9282b..1f14b8455345 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -63,7 +63,8 @@ struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no) { struct net_bridge_port *p; - list_for_each_entry_rcu(p, &br->port_list, list) { + list_for_each_entry_rcu(p, &br->port_list, list, + lockdep_is_held(&br->lock)) { if (p->port_no == port_no) return p; } -- cgit v1.2.3-59-g8ed1b From 840f8ad0aaf20044e2fb099095bbce27c02f58da Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 13 Feb 2020 13:31:23 -0800 Subject: ice: Don't reject odd values of usecs set by user Currently if a user sets an odd [tx|rx]-usecs value through ethtool, the request is denied because the hardware is set to have an ITR granularity of 2us. This caused poor customer experience. Fix this by aligning to a register allowed value, which results in rounding down. Also, print a once per ring container type message to be clear about our intentions. Also, change the ITR_TO_REG define to be the bitwise and of the ITR setting and the ICE_ITR_MASK. This makes the purpose of ITR_TO_REG more obvious. Signed-off-by: Brett Creeley Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 49 +++++++++++++++++++++------- drivers/net/ethernet/intel/ice/ice_txrx.h | 2 +- 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index b002ab4e5838..a88763066681 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3489,21 +3489,13 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, return -EINVAL; } - /* hardware only supports an ITR granularity of 2us */ - if (coalesce_usecs % 2 != 0) { - netdev_info(vsi->netdev, "Invalid value, %s-usecs must be even\n", - c_type_str); - return -EINVAL; - } - if (use_adaptive_coalesce) { rc->itr_setting |= ICE_ITR_DYNAMIC; } else { - /* store user facing value how it was set */ + /* save the user set usecs */ rc->itr_setting = coalesce_usecs; - /* set to static and convert to value HW understands */ - rc->target_itr = - ITR_TO_REG(ITR_REG_ALIGN(rc->itr_setting)); + /* device ITR granularity is in 2 usec increments */ + rc->target_itr = ITR_REG_ALIGN(rc->itr_setting); } return 0; @@ -3596,6 +3588,30 @@ ice_is_coalesce_param_invalid(struct net_device *netdev, return 0; } +/** + * ice_print_if_odd_usecs - print message if user tries to set odd [tx|rx]-usecs + * @netdev: netdev used for print + * @itr_setting: previous user setting + * @use_adaptive_coalesce: if adaptive coalesce is enabled or being enabled + * @coalesce_usecs: requested value of [tx|rx]-usecs + * @c_type_str: either "rx" or "tx" to match user set field of [tx|rx]-usecs + */ +static void +ice_print_if_odd_usecs(struct net_device *netdev, u16 itr_setting, + u32 use_adaptive_coalesce, u32 coalesce_usecs, + const char *c_type_str) +{ + if (use_adaptive_coalesce) + return; + + itr_setting = ITR_TO_REG(itr_setting); + + if (itr_setting != coalesce_usecs && (coalesce_usecs % 2)) + netdev_info(netdev, "User set %s-usecs to %d, device only supports even values. Rounding down and attempting to set %s-usecs to %d\n", + c_type_str, coalesce_usecs, c_type_str, + ITR_REG_ALIGN(coalesce_usecs)); +} + /** * __ice_set_coalesce - set ITR/INTRL values for the device * @netdev: pointer to the netdev associated with this query @@ -3616,8 +3632,19 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, return -EINVAL; if (q_num < 0) { + struct ice_q_vector *q_vector = vsi->q_vectors[0]; int v_idx; + if (q_vector) { + ice_print_if_odd_usecs(netdev, q_vector->rx.itr_setting, + ec->use_adaptive_rx_coalesce, + ec->rx_coalesce_usecs, "rx"); + + ice_print_if_odd_usecs(netdev, q_vector->tx.itr_setting, + ec->use_adaptive_tx_coalesce, + ec->tx_coalesce_usecs, "tx"); + } + ice_for_each_q_vector(vsi, v_idx) { /* In some cases if DCB is configured the num_[rx|tx]q * can be less than vsi->num_q_vectors. This check diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 14a1bf445889..7ee00a128663 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -222,7 +222,7 @@ enum ice_rx_dtype { #define ICE_ITR_GRAN_S 1 /* ITR granularity is always 2us */ #define ICE_ITR_GRAN_US BIT(ICE_ITR_GRAN_S) #define ICE_ITR_MASK 0x1FFE /* ITR register value alignment mask */ -#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~ICE_ITR_MASK) +#define ITR_REG_ALIGN(setting) ((setting) & ICE_ITR_MASK) #define ICE_ITR_ADAPTIVE_MIN_INC 0x0002 #define ICE_ITR_ADAPTIVE_MIN_USECS 0x0002 -- cgit v1.2.3-59-g8ed1b From 8a55c08d3bbc9ffc9639f69f742e59ebd99f913b Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Thu, 13 Feb 2020 13:31:24 -0800 Subject: ice: Don't tell the OS that link is going down Remove code that tell the OS that link is going down when user change flow control via ethtool. When link is up it isn't certain that link goes down after 0x0605 aq command. If link doesn't go down, OS thinks that link is down, but physical link is up. To reset this state user have to take interface down and up. If link goes down after 0x0605 command, FW send information about that and after that driver tells the OS that the link goes down. So this code in ethtool is unnecessary. Signed-off-by: Michal Swiatkowski Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index a88763066681..77c412a7e7a4 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -2936,13 +2936,6 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) else return -EINVAL; - /* Tell the OS link is going down, the link will go back up when fw - * says it is ready asynchronously - */ - ice_print_link_msg(vsi, false); - netif_carrier_off(netdev); - netif_tx_stop_all_queues(netdev); - /* Set the FC mode and only restart AN if link is up */ status = ice_set_fc(pi, &aq_failures, link_up); -- cgit v1.2.3-59-g8ed1b From c54d209c78b8a3d0a75e710993833ebe1eb3273b Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Tue, 18 Feb 2020 13:22:06 -0800 Subject: ice: Wait for VF to be reset/ready before configuration The configuration/command below is failing when the VF in the xml file is already bound to the host iavf driver. pci_0000_af_0_0.xml:
> virsh attach-device domain_name pci_0000_af_0_0.xml error: Failed to attach device from pci_0000_af_0_0.xml error: Cannot set interface MAC/vlanid to 00:de:ad:00:11:01/0 for ifname ens1f1 vf 0: Device or resource busy This is failing because the VF has not been completely removed/reset after being unbound (via the virsh command above) from the host iavf driver and ice_set_vf_mac() checks if the VF is disabled before waiting for the reset to finish. Fix this by waiting for the VF remove/reset process to happen before checking if the VF is disabled. Also, since many functions for VF administration on the PF were more or less calling the same 3 functions (ice_wait_on_vf_reset(), ice_is_vf_disabled(), and ice_check_vf_init()) move these into the helper function ice_check_vf_ready_for_cfg(). Then call this function in any flow that attempts to configure/query a VF from the PF. Lastly, increase the maximum wait time in ice_wait_on_vf_reset() to 800ms, and modify/add the #define(s) that determine the wait time. This was done for robustness because in rare/stress cases VF removal can take a max of ~800ms and previously the wait was a max of ~300ms. Signed-off-by: Brett Creeley Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 134 +++++++++++++---------- drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h | 3 +- 2 files changed, 76 insertions(+), 61 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 262714d5f54a..75c70d432c72 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -1873,6 +1873,48 @@ error_param: NULL, 0); } +/** + * ice_wait_on_vf_reset - poll to make sure a given VF is ready after reset + * @vf: The VF being resseting + * + * The max poll time is about ~800ms, which is about the maximum time it takes + * for a VF to be reset and/or a VF driver to be removed. + */ +static void ice_wait_on_vf_reset(struct ice_vf *vf) +{ + int i; + + for (i = 0; i < ICE_MAX_VF_RESET_TRIES; i++) { + if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) + break; + msleep(ICE_MAX_VF_RESET_SLEEP_MS); + } +} + +/** + * ice_check_vf_ready_for_cfg - check if VF is ready to be configured/queried + * @vf: VF to check if it's ready to be configured/queried + * + * The purpose of this function is to make sure the VF is not in reset, not + * disabled, and initialized so it can be configured and/or queried by a host + * administrator. + */ +static int ice_check_vf_ready_for_cfg(struct ice_vf *vf) +{ + struct ice_pf *pf; + + ice_wait_on_vf_reset(vf); + + if (ice_is_vf_disabled(vf)) + return -EINVAL; + + pf = vf->pf; + if (ice_check_vf_init(pf, vf)) + return -EBUSY; + + return 0; +} + /** * ice_set_vf_spoofchk * @netdev: network interface device structure @@ -1890,16 +1932,16 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) enum ice_status status; struct device *dev; struct ice_vf *vf; - int ret = 0; + int ret; dev = ice_pf_to_dev(pf); if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; vf = &pf->vf[vf_id]; - - if (ice_check_vf_init(pf, vf)) - return -EBUSY; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + return ret; vf_vsi = pf->vsi[vf->lan_vsi_idx]; if (!vf_vsi) { @@ -2696,7 +2738,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, struct ice_vsi *vsi; struct device *dev; struct ice_vf *vf; - int ret = 0; + int ret; dev = ice_pf_to_dev(pf); if (ice_validate_vf_id(pf, vf_id)) @@ -2714,13 +2756,15 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, vf = &pf->vf[vf_id]; vsi = pf->vsi[vf->lan_vsi_idx]; - if (ice_check_vf_init(pf, vf)) - return -EBUSY; + + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + return ret; if (le16_to_cpu(vsi->info.pvid) == vlanprio) { /* duplicate request, so just return success */ dev_dbg(dev, "Duplicate pvid %d request\n", vlanprio); - return ret; + return 0; } /* If PVID, then remove all filters on the old VLAN */ @@ -2731,7 +2775,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, if (vlan_id || qos) { ret = ice_vsi_manage_pvid(vsi, vlanprio, true); if (ret) - goto error_set_pvid; + return ret; } else { ice_vsi_manage_pvid(vsi, 0, false); vsi->info.pvid = 0; @@ -2744,7 +2788,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, /* add new VLAN filter for each MAC */ ret = ice_vsi_add_vlan(vsi, vlan_id); if (ret) - goto error_set_pvid; + return ret; } /* The Port VLAN needs to be saved across resets the same as the @@ -2752,8 +2796,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, */ vf->port_vlan_id = le16_to_cpu(vsi->info.pvid); -error_set_pvid: - return ret; + return 0; } /** @@ -3236,23 +3279,6 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi) return 0; } -/** - * ice_wait_on_vf_reset - * @vf: The VF being resseting - * - * Poll to make sure a given VF is ready after reset - */ -static void ice_wait_on_vf_reset(struct ice_vf *vf) -{ - int i; - - for (i = 0; i < ICE_MAX_VF_RESET_WAIT; i++) { - if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) - break; - msleep(20); - } -} - /** * ice_set_vf_mac * @netdev: network interface device structure @@ -3265,29 +3291,21 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) { struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_vf *vf; - int ret = 0; + int ret; if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; - vf = &pf->vf[vf_id]; - /* Don't set MAC on disabled VF */ - if (ice_is_vf_disabled(vf)) - return -EINVAL; - - /* In case VF is in reset mode, wait until it is completed. Depending - * on factors like queue disabling routine, this could take ~250ms - */ - ice_wait_on_vf_reset(vf); - - if (ice_check_vf_init(pf, vf)) - return -EBUSY; - if (is_zero_ether_addr(mac) || is_multicast_ether_addr(mac)) { netdev_err(netdev, "%pM not a valid unicast address\n", mac); return -EINVAL; } + vf = &pf->vf[vf_id]; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + return ret; + /* copy MAC into dflt_lan_addr and trigger a VF reset. The reset * flow will use the updated dflt_lan_addr and add a MAC filter * using ice_add_mac. Also set pf_set_mac to indicate that the PF has @@ -3299,7 +3317,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) vf_id, mac); ice_vc_reset_vf(vf); - return ret; + return 0; } /** @@ -3314,22 +3332,15 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) { struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_vf *vf; + int ret; if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; vf = &pf->vf[vf_id]; - /* Don't set Trusted Mode on disabled VF */ - if (ice_is_vf_disabled(vf)) - return -EINVAL; - - /* In case VF is in reset mode, wait until it is completed. Depending - * on factors like queue disabling routine, this could take ~250ms - */ - ice_wait_on_vf_reset(vf); - - if (ice_check_vf_init(pf, vf)) - return -EBUSY; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + return ret; /* Check if already trusted */ if (trusted == vf->trusted) @@ -3355,13 +3366,15 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state) { struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_vf *vf; + int ret; if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; vf = &pf->vf[vf_id]; - if (ice_check_vf_init(pf, vf)) - return -EBUSY; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + return ret; switch (link_state) { case IFLA_VF_LINK_STATE_AUTO: @@ -3397,14 +3410,15 @@ int ice_get_vf_stats(struct net_device *netdev, int vf_id, struct ice_eth_stats *stats; struct ice_vsi *vsi; struct ice_vf *vf; + int ret; if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; vf = &pf->vf[vf_id]; - - if (ice_check_vf_init(pf, vf)) - return -EBUSY; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + return ret; vsi = pf->vsi[vf->lan_vsi_idx]; if (!vsi) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h index 4647d636ed36..ac67982751df 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h @@ -38,7 +38,8 @@ #define ICE_MAX_POLICY_INTR_PER_VF 33 #define ICE_MIN_INTR_PER_VF (ICE_MIN_QS_PER_VF + 1) #define ICE_DFLT_INTR_PER_VF (ICE_DFLT_QS_PER_VF + 1) -#define ICE_MAX_VF_RESET_WAIT 15 +#define ICE_MAX_VF_RESET_TRIES 40 +#define ICE_MAX_VF_RESET_SLEEP_MS 20 #define ice_for_each_vf(pf, i) \ for ((i) = 0; (i) < (pf)->num_alloc_vfs; (i)++) -- cgit v1.2.3-59-g8ed1b From dde54b9492a8ba46bcd7e7e26172adf2bfcea817 Mon Sep 17 00:00:00 2001 From: Heidi Fahim Date: Tue, 26 Nov 2019 14:36:16 -0800 Subject: kunit: test: Improve error messages for kunit_tool when kunitconfig is invalid Previous error message for invalid kunitconfig was vague. Added to it so that it lists invalid fields and prompts for them to be removed. Added validate_config function returning whether or not this kconfig is valid. Signed-off-by: Heidi Fahim Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_kernel.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index cc5d844ecca1..d99ae75ef72f 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -93,6 +93,20 @@ class LinuxSourceTree(object): return False return True + def validate_config(self, build_dir): + kconfig_path = get_kconfig_path(build_dir) + validated_kconfig = kunit_config.Kconfig() + validated_kconfig.read_from_file(kconfig_path) + if not self._kconfig.is_subset_of(validated_kconfig): + invalid = self._kconfig.entries() - validated_kconfig.entries() + message = 'Provided Kconfig is not contained in validated .config. Following fields found in kunitconfig, ' \ + 'but not in .config: %s' % ( + ', '.join([str(e) for e in invalid]) + ) + logging.error(message) + return False + return True + def build_config(self, build_dir): kconfig_path = get_kconfig_path(build_dir) if build_dir and not os.path.exists(build_dir): @@ -103,12 +117,7 @@ class LinuxSourceTree(object): except ConfigError as e: logging.error(e) return False - validated_kconfig = kunit_config.Kconfig() - validated_kconfig.read_from_file(kconfig_path) - if not self._kconfig.is_subset_of(validated_kconfig): - logging.error('Provided Kconfig is not contained in validated .config!') - return False - return True + return self.validate_config(build_dir) def build_reconfig(self, build_dir): """Creates a new .config if it is not a subset of the .kunitconfig.""" @@ -133,12 +142,7 @@ class LinuxSourceTree(object): except (ConfigError, BuildError) as e: logging.error(e) return False - used_kconfig = kunit_config.Kconfig() - used_kconfig.read_from_file(get_kconfig_path(build_dir)) - if not self._kconfig.is_subset_of(used_kconfig): - logging.error('Provided Kconfig is not contained in final config!') - return False - return True + return self.validate_config(build_dir) def run_kernel(self, args=[], timeout=None, build_dir=''): args.extend(['mem=256M']) -- cgit v1.2.3-59-g8ed1b From be886ba90cce2fb2f5a4dbcda8f3be3fd1b2f484 Mon Sep 17 00:00:00 2001 From: Heidi Fahim Date: Tue, 18 Feb 2020 14:19:16 -0800 Subject: kunit: run kunit_tool from any directory Implemented small fix so that the script changes work directories to the root of the linux kernel source tree from which kunit.py is run. This enables the user to run kunit from any working directory. Originally considered using os.path.join but this is more error prone as we would have to find all file path usages and modify them accordingly. Using os.chdir ensures that the entire script is run within /linux. Signed-off-by: Heidi Fahim Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index e59eb9e7f923..180ad1e1b04f 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -24,6 +24,8 @@ KunitResult = namedtuple('KunitResult', ['status','result']) KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs', 'build_dir', 'defconfig']) +KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0] + class KunitStatus(Enum): SUCCESS = auto() CONFIG_FAILURE = auto() @@ -35,6 +37,13 @@ def create_default_kunitconfig(): shutil.copyfile('arch/um/configs/kunit_defconfig', kunit_kernel.kunitconfig_path) +def get_kernel_root_path(): + parts = sys.argv[0] if not __file__ else __file__ + parts = os.path.realpath(parts).split('tools/testing/kunit') + if len(parts) != 2: + sys.exit(1) + return parts[0] + def run_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitRequest) -> KunitResult: config_start = time.time() @@ -114,6 +123,9 @@ def main(argv, linux=None): cli_args = parser.parse_args(argv) if cli_args.subcommand == 'run': + if get_kernel_root_path(): + os.chdir(get_kernel_root_path()) + if cli_args.build_dir: if not os.path.exists(cli_args.build_dir): os.mkdir(cli_args.build_dir) -- cgit v1.2.3-59-g8ed1b From 492e0d0d6f2eb4badfd2868addf9da0f651eba0e Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Tue, 18 Feb 2020 09:25:52 -0800 Subject: bpf: Do not grab the bucket spinlock by default on htab batch ops Grabbing the spinlock for every bucket even if it's empty, was causing significant perfomance cost when traversing htab maps that have only a few entries. This patch addresses the issue by checking first the bucket_cnt, if the bucket has some entries then we go and grab the spinlock and proceed with the batching. Tested with a htab of size 50K and different value of populated entries. Before: Benchmark Time(ns) CPU(ns) --------------------------------------------- BM_DumpHashMap/1 2759655 2752033 BM_DumpHashMap/10 2933722 2930825 BM_DumpHashMap/200 3171680 3170265 BM_DumpHashMap/500 3639607 3635511 BM_DumpHashMap/1000 4369008 4364981 BM_DumpHashMap/5k 11171919 11134028 BM_DumpHashMap/20k 69150080 69033496 BM_DumpHashMap/39k 190501036 190226162 After: Benchmark Time(ns) CPU(ns) --------------------------------------------- BM_DumpHashMap/1 202707 200109 BM_DumpHashMap/10 213441 210569 BM_DumpHashMap/200 478641 472350 BM_DumpHashMap/500 980061 967102 BM_DumpHashMap/1000 1863835 1839575 BM_DumpHashMap/5k 8961836 8902540 BM_DumpHashMap/20k 69761497 69322756 BM_DumpHashMap/39k 187437830 186551111 Fixes: 057996380a42 ("bpf: Add batch ops to all htab bpf map") Signed-off-by: Brian Vazquez Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200218172552.215077-1-brianvv@google.com --- kernel/bpf/hashtab.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 2d182c4ee9d9..9194479a2fa7 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1259,7 +1259,8 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map, u64 elem_map_flags, map_flags; struct hlist_nulls_head *head; struct hlist_nulls_node *n; - unsigned long flags; + unsigned long flags = 0; + bool locked = false; struct htab_elem *l; struct bucket *b; int ret = 0; @@ -1319,15 +1320,25 @@ again_nocopy: dst_val = values; b = &htab->buckets[batch]; head = &b->head; - raw_spin_lock_irqsave(&b->lock, flags); + /* do not grab the lock unless need it (bucket_cnt > 0). */ + if (locked) + raw_spin_lock_irqsave(&b->lock, flags); bucket_cnt = 0; hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) bucket_cnt++; + if (bucket_cnt && !locked) { + locked = true; + goto again_nocopy; + } + if (bucket_cnt > (max_count - total)) { if (total == 0) ret = -ENOSPC; + /* Note that since bucket_cnt > 0 here, it is implicit + * that the locked was grabbed, so release it. + */ raw_spin_unlock_irqrestore(&b->lock, flags); rcu_read_unlock(); this_cpu_dec(bpf_prog_active); @@ -1337,6 +1348,9 @@ again_nocopy: if (bucket_cnt > bucket_size) { bucket_size = bucket_cnt; + /* Note that since bucket_cnt > 0 here, it is implicit + * that the locked was grabbed, so release it. + */ raw_spin_unlock_irqrestore(&b->lock, flags); rcu_read_unlock(); this_cpu_dec(bpf_prog_active); @@ -1346,6 +1360,10 @@ again_nocopy: goto alloc; } + /* Next block is only safe to run if you have grabbed the lock */ + if (!locked) + goto next_batch; + hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { memcpy(dst_key, l->key, key_size); @@ -1380,6 +1398,8 @@ again_nocopy: } raw_spin_unlock_irqrestore(&b->lock, flags); + locked = false; +next_batch: /* If we are not copying data, we can go to next bucket and avoid * unlocking the rcu. */ -- cgit v1.2.3-59-g8ed1b From b9aff38de2cb166476988020428985c5f7412ffc Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 19 Feb 2020 15:47:57 -0800 Subject: bpf: Fix a potential deadlock with bpf_map_do_batch Commit 057996380a42 ("bpf: Add batch ops to all htab bpf map") added lookup_and_delete batch operation for hash table. The current implementation has bpf_lru_push_free() inside the bucket lock, which may cause a deadlock. syzbot reports: -> #2 (&htab->buckets[i].lock#2){....}: __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0x95/0xcd kernel/locking/spinlock.c:159 htab_lru_map_delete_node+0xce/0x2f0 kernel/bpf/hashtab.c:593 __bpf_lru_list_shrink_inactive kernel/bpf/bpf_lru_list.c:220 [inline] __bpf_lru_list_shrink+0xf9/0x470 kernel/bpf/bpf_lru_list.c:266 bpf_lru_list_pop_free_to_local kernel/bpf/bpf_lru_list.c:340 [inline] bpf_common_lru_pop_free kernel/bpf/bpf_lru_list.c:447 [inline] bpf_lru_pop_free+0x87c/0x1670 kernel/bpf/bpf_lru_list.c:499 prealloc_lru_pop+0x2c/0xa0 kernel/bpf/hashtab.c:132 __htab_lru_percpu_map_update_elem+0x67e/0xa90 kernel/bpf/hashtab.c:1069 bpf_percpu_hash_update+0x16e/0x210 kernel/bpf/hashtab.c:1585 bpf_map_update_value.isra.0+0x2d7/0x8e0 kernel/bpf/syscall.c:181 generic_map_update_batch+0x41f/0x610 kernel/bpf/syscall.c:1319 bpf_map_do_batch+0x3f5/0x510 kernel/bpf/syscall.c:3348 __do_sys_bpf+0x9b7/0x41e0 kernel/bpf/syscall.c:3460 __se_sys_bpf kernel/bpf/syscall.c:3355 [inline] __x64_sys_bpf+0x73/0xb0 kernel/bpf/syscall.c:3355 do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe -> #0 (&loc_l->lock){....}: check_prev_add kernel/locking/lockdep.c:2475 [inline] check_prevs_add kernel/locking/lockdep.c:2580 [inline] validate_chain kernel/locking/lockdep.c:2970 [inline] __lock_acquire+0x2596/0x4a00 kernel/locking/lockdep.c:3954 lock_acquire+0x190/0x410 kernel/locking/lockdep.c:4484 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0x95/0xcd kernel/locking/spinlock.c:159 bpf_common_lru_push_free kernel/bpf/bpf_lru_list.c:516 [inline] bpf_lru_push_free+0x250/0x5b0 kernel/bpf/bpf_lru_list.c:555 __htab_map_lookup_and_delete_batch+0x8d4/0x1540 kernel/bpf/hashtab.c:1374 htab_lru_map_lookup_and_delete_batch+0x34/0x40 kernel/bpf/hashtab.c:1491 bpf_map_do_batch+0x3f5/0x510 kernel/bpf/syscall.c:3348 __do_sys_bpf+0x1f7d/0x41e0 kernel/bpf/syscall.c:3456 __se_sys_bpf kernel/bpf/syscall.c:3355 [inline] __x64_sys_bpf+0x73/0xb0 kernel/bpf/syscall.c:3355 do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe Possible unsafe locking scenario: CPU0 CPU2 ---- ---- lock(&htab->buckets[i].lock#2); lock(&l->lock); lock(&htab->buckets[i].lock#2); lock(&loc_l->lock); *** DEADLOCK *** To fix the issue, for htab_lru_map_lookup_and_delete_batch() in CPU0, let us do bpf_lru_push_free() out of the htab bucket lock. This can avoid the above deadlock scenario. Fixes: 057996380a42 ("bpf: Add batch ops to all htab bpf map") Reported-by: syzbot+a38ff3d9356388f2fb83@syzkaller.appspotmail.com Reported-by: syzbot+122b5421d14e68f29cd1@syzkaller.appspotmail.com Suggested-by: Hillf Danton Suggested-by: Martin KaFai Lau Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Reviewed-by: Jakub Sitnicki Acked-by: Brian Vazquez Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200219234757.3544014-1-yhs@fb.com --- kernel/bpf/hashtab.c | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 9194479a2fa7..a1468e3f5af2 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -56,6 +56,7 @@ struct htab_elem { union { struct bpf_htab *htab; struct pcpu_freelist_node fnode; + struct htab_elem *batch_flink; }; }; }; @@ -126,6 +127,17 @@ free_elems: bpf_map_area_free(htab->elems); } +/* The LRU list has a lock (lru_lock). Each htab bucket has a lock + * (bucket_lock). If both locks need to be acquired together, the lock + * order is always lru_lock -> bucket_lock and this only happens in + * bpf_lru_list.c logic. For example, certain code path of + * bpf_lru_pop_free(), which is called by function prealloc_lru_pop(), + * will acquire lru_lock first followed by acquiring bucket_lock. + * + * In hashtab.c, to avoid deadlock, lock acquisition of + * bucket_lock followed by lru_lock is not allowed. In such cases, + * bucket_lock needs to be released first before acquiring lru_lock. + */ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key, u32 hash) { @@ -1256,6 +1268,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map, void __user *ukeys = u64_to_user_ptr(attr->batch.keys); void *ubatch = u64_to_user_ptr(attr->batch.in_batch); u32 batch, max_count, size, bucket_size; + struct htab_elem *node_to_free = NULL; u64 elem_map_flags, map_flags; struct hlist_nulls_head *head; struct hlist_nulls_node *n; @@ -1388,10 +1401,18 @@ again_nocopy: } if (do_delete) { hlist_nulls_del_rcu(&l->hash_node); - if (is_lru_map) - bpf_lru_push_free(&htab->lru, &l->lru_node); - else + + /* bpf_lru_push_free() will acquire lru_lock, which + * may cause deadlock. See comments in function + * prealloc_lru_pop(). Let us do bpf_lru_push_free() + * after releasing the bucket lock. + */ + if (is_lru_map) { + l->batch_flink = node_to_free; + node_to_free = l; + } else { free_htab_elem(htab, l); + } } dst_key += key_size; dst_val += value_size; @@ -1399,6 +1420,13 @@ again_nocopy: raw_spin_unlock_irqrestore(&b->lock, flags); locked = false; + + while (node_to_free) { + l = node_to_free; + node_to_free = node_to_free->batch_flink; + bpf_lru_push_free(&htab->lru, &l->lru_node); + } + next_batch: /* If we are not copying data, we can go to next bucket and avoid * unlocking the rcu. -- cgit v1.2.3-59-g8ed1b From e7167043ee508739fc9c5030494f94bea48cee23 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 12 Feb 2020 19:34:23 +0900 Subject: riscv: Fix gitignore Tell git to not track the compiled boot/loader and boot/loader.lds files. Signed-off-by: Damien Le Moal Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/.gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/boot/.gitignore b/arch/riscv/boot/.gitignore index 8dab0bb6ae66..8a45a37d2af4 100644 --- a/arch/riscv/boot/.gitignore +++ b/arch/riscv/boot/.gitignore @@ -1,2 +1,4 @@ Image Image.gz +loader +loader.lds -- cgit v1.2.3-59-g8ed1b From 88d5271c1efbd89713092b962070cf9af2fcd7c4 Mon Sep 17 00:00:00 2001 From: Tomas Paukrt Date: Wed, 22 Jan 2020 21:10:39 +0100 Subject: dt-bindings: mmc: omap-hsmmc: Fix SDIO interrupt SDIO interrupt must be specified correctly as IRQ_TYPE_LEVEL_LOW instead of GPIO_ACTIVE_LOW. Signed-off-by: Tomas Paukrt Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt index 19f5508a7569..4a9145ef15d6 100644 --- a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt +++ b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt @@ -124,7 +124,7 @@ not every application needs SDIO irq, e.g. MMC cards. pinctrl-1 = <&mmc1_idle>; pinctrl-2 = <&mmc1_sleep>; ... - interrupts-extended = <&intc 64 &gpio2 28 GPIO_ACTIVE_LOW>; + interrupts-extended = <&intc 64 &gpio2 28 IRQ_TYPE_LEVEL_LOW>; }; mmc1_idle : pinmux_cirq_pin { -- cgit v1.2.3-59-g8ed1b From 06f5201c6392f998a49ca9c9173e2930c8eb51d8 Mon Sep 17 00:00:00 2001 From: Rohit Maheshwari Date: Wed, 19 Feb 2020 09:40:22 +0530 Subject: net/tls: Fix to avoid gettig invalid tls record Current code doesn't check if tcp sequence number is starting from (/after) 1st record's start sequnce number. It only checks if seq number is before 1st record's end sequnce number. This problem will always be a possibility in re-transmit case. If a record which belongs to a requested seq number is already deleted, tls_get_record will start looking into list and as per the check it will look if seq number is before the end seq of 1st record, which will always be true and will return 1st record always, it should in fact return NULL. As part of the fix, start looking each record only if the sequence number lies in the list else return NULL. There is one more check added, driver look for the start marker record to handle tcp packets which are before the tls offload start sequence number, hence return 1st record if the record is tls start marker and seq number is before the 1st record's starting sequence number. Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure") Signed-off-by: Rohit Maheshwari Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_device.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 1ba5a92832bb..1c5574e2e058 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -593,7 +593,7 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, u32 seq, u64 *p_record_sn) { u64 record_sn = context->hint_record_sn; - struct tls_record_info *info; + struct tls_record_info *info, *last; info = context->retransmit_hint; if (!info || @@ -605,6 +605,24 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, struct tls_record_info, list); if (!info) return NULL; + /* send the start_marker record if seq number is before the + * tls offload start marker sequence number. This record is + * required to handle TCP packets which are before TLS offload + * started. + * And if it's not start marker, look if this seq number + * belongs to the list. + */ + if (likely(!tls_record_is_start_marker(info))) { + /* we have the first record, get the last record to see + * if this seq number belongs to the list. + */ + last = list_last_entry(&context->records_list, + struct tls_record_info, list); + + if (!between(seq, tls_record_start_seq(info), + last->end_seq)) + return NULL; + } record_sn = context->unacked_record_sn; } -- cgit v1.2.3-59-g8ed1b From 303d0403b8c25e994e4a6e45389e173cf8706fb5 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 19 Feb 2020 14:16:32 -0500 Subject: udp: rehash on disconnect As of the below commit, udp sockets bound to a specific address can coexist with one bound to the any addr for the same port. The commit also phased out the use of socket hashing based only on port (hslot), in favor of always hashing on {addr, port} (hslot2). The change broke the following behavior with disconnect (AF_UNSPEC): server binds to 0.0.0.0:1337 server connects to 127.0.0.1:80 server disconnects client connects to 127.0.0.1:1337 client sends "hello" server reads "hello" // times out, packet did not find sk On connect the server acquires a specific source addr suitable for routing to its destination. On disconnect it reverts to the any addr. The connect call triggers a rehash to a different hslot2. On disconnect, add the same to return to the original hslot2. Skip this step if the socket is going to be unhashed completely. Fixes: 4cdeeee9252a ("net: udp: prefer listeners bound to an address") Reported-by: Pavel Roskin Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index db76b9609299..08a41f1e1cd2 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1857,8 +1857,12 @@ int __udp_disconnect(struct sock *sk, int flags) inet->inet_dport = 0; sock_rps_reset_rxhash(sk); sk->sk_bound_dev_if = 0; - if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) + if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) { inet_reset_saddr(sk); + if (sk->sk_prot->rehash && + (sk->sk_userlocks & SOCK_BINDPORT_LOCK)) + sk->sk_prot->rehash(sk); + } if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) { sk->sk_prot->unhash(sk); -- cgit v1.2.3-59-g8ed1b From 3044d9891bdb98ebae5c9e6fb1bd7f007bf7cad6 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 14 Jan 2020 13:38:21 +0100 Subject: dt-bindings: memory-controller: Update example for Tegra124 EMC The example in the Tegra124 EMC device tree binding looks like an old version that doesn't contain all the required fields. Update it with a version from the current DTS files to fix the make dt_binding_check target. Reported-by: Rob Herring Signed-off-by: Thierry Reding [robh: also fix missing '#reset-cells'] Signed-off-by: Rob Herring --- .../memory-controllers/nvidia,tegra124-emc.yaml | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra124-emc.yaml b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra124-emc.yaml index dd1843489ad1..3e0a8a92d652 100644 --- a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra124-emc.yaml +++ b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra124-emc.yaml @@ -347,6 +347,7 @@ examples: interrupts = ; #iommu-cells = <1>; + #reset-cells = <1>; }; external-memory-controller@7001b000 { @@ -363,20 +364,23 @@ examples: timing-0 { clock-frequency = <12750000>; - nvidia,emc-zcal-cnt-long = <0x00000042>; - nvidia,emc-auto-cal-interval = <0x001fffff>; - nvidia,emc-ctt-term-ctrl = <0x00000802>; - nvidia,emc-cfg = <0x73240000>; - nvidia,emc-cfg-2 = <0x000008c5>; - nvidia,emc-sel-dpd-ctrl = <0x00040128>; - nvidia,emc-bgbias-ctl0 = <0x00000008>; nvidia,emc-auto-cal-config = <0xa1430000>; nvidia,emc-auto-cal-config2 = <0x00000000>; nvidia,emc-auto-cal-config3 = <0x00000000>; - nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-bgbias-ctl0 = <0x00000008>; + nvidia,emc-cfg = <0x73240000>; + nvidia,emc-cfg-2 = <0x000008c5>; + nvidia,emc-ctt-term-ctrl = <0x00000802>; nvidia,emc-mode-1 = <0x80100003>; nvidia,emc-mode-2 = <0x80200008>; nvidia,emc-mode-4 = <0x00000000>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-mrs-wait-cnt = <0x000e000e>; + nvidia,emc-sel-dpd-ctrl = <0x00040128>; + nvidia,emc-xm2dqspadctrl2 = <0x0130b118>; + nvidia,emc-zcal-cnt-long = <0x00000042>; + nvidia,emc-zcal-interval = <0x00000000>; nvidia,emc-configuration = < 0x00000000 /* EMC_RC */ -- cgit v1.2.3-59-g8ed1b From 867c1859590f3bbc13d9cfb46c90c8202769d0e5 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 14 Feb 2020 21:44:08 +0200 Subject: dt-bindings: net: mdio: remove compatible string from example Remove vendor specific compatible string from example, otherwise DT YAML schemas validation may trigger warnings specific to TI ti,davinci_mdio and not to the generic MDIO example. For example, the "bus_freq" is required for davinci_mdio, but not required for generic mdio example. As result following warning will be produced: mdio.example.dt.yaml: mdio@5c030000: 'bus_freq' is a required property Signed-off-by: Grygorii Strashko Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/net/mdio.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/mdio.yaml b/Documentation/devicetree/bindings/net/mdio.yaml index 5d08d2ffd4eb..50c3397a82bc 100644 --- a/Documentation/devicetree/bindings/net/mdio.yaml +++ b/Documentation/devicetree/bindings/net/mdio.yaml @@ -56,7 +56,6 @@ patternProperties: examples: - | davinci_mdio: mdio@5c030000 { - compatible = "ti,davinci_mdio"; reg = <0x5c030000 0x1000>; #address-cells = <1>; #size-cells = <0>; -- cgit v1.2.3-59-g8ed1b From 303d37b4b0522150c0c7701d84934bc13199bebc Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 10 Feb 2020 11:04:16 +0100 Subject: dt-bindings: media: csi: Add interconnects properties The Allwinner CSI controller is sitting beside the MBUS that is represented as an interconnect. Make sure that the interconnect properties are valid in the binding. Fixes: 7866d6903ce8 ("media: dt-bindings: media: sun4i-csi: Add compatible for CSI0 on R40") Signed-off-by: Maxime Ripard Signed-off-by: Rob Herring --- .../devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml b/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml index 9af873b43acd..afde17d9dab1 100644 --- a/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml +++ b/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml @@ -51,6 +51,16 @@ properties: resets: maxItems: 1 + # FIXME: This should be made required eventually once every SoC will + # have the MBUS declared. + interconnects: + maxItems: 1 + + # FIXME: This should be made required eventually once every SoC will + # have the MBUS declared. + interconnect-names: + const: dma-mem + # See ./video-interfaces.txt for details port: type: object -- cgit v1.2.3-59-g8ed1b From 854bdbae9058bcf09b0add70b6047bc9ca776de2 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 10 Feb 2020 11:04:17 +0100 Subject: dt-bindings: media: csi: Fix clocks description Commit 1de243b07666 ("media: dt-bindings: media: sun4i-csi: Add compatible for CSI1 on A10/A20") introduced support for the CSI1 controller on A10 and A20 that unlike CSI0 doesn't have an ISP and therefore only have two clocks, the bus and module clocks. The clocks and clock-names properties have thus been modified to allow either two or tree clocks. However, the current list has the ISP clock at the second position, which means the bindings expects a list of either bus and isp, or bus, isp and mod. The initial intent of the patch was obviously to have bus and mod in the former case. Let's fix the binding so that it validates properly. Fixes: 1de243b07666 ("media: dt-bindings: media: sun4i-csi: Add compatible for CSI1 on A10/A20") Signed-off-by: Maxime Ripard Signed-off-by: Rob Herring --- .../bindings/media/allwinner,sun4i-a10-csi.yaml | 30 +++++++++++++--------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml b/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml index afde17d9dab1..8453ee340b9f 100644 --- a/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml +++ b/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml @@ -33,20 +33,26 @@ properties: maxItems: 1 clocks: - minItems: 2 - maxItems: 3 - items: - - description: The CSI interface clock - - description: The CSI ISP clock - - description: The CSI DRAM clock + oneOf: + - items: + - description: The CSI interface clock + - description: The CSI DRAM clock + + - items: + - description: The CSI interface clock + - description: The CSI ISP clock + - description: The CSI DRAM clock clock-names: - minItems: 2 - maxItems: 3 - items: - - const: bus - - const: isp - - const: ram + oneOf: + - items: + - const: bus + - const: ram + + - items: + - const: bus + - const: isp + - const: ram resets: maxItems: 1 -- cgit v1.2.3-59-g8ed1b From badcd4546d52ae4318f2bcfda0e47a1394b60e38 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 19 Feb 2020 14:36:14 -0800 Subject: hwmon: (acpi_power_meter) Fix lockdep splat Damien Le Moal reports a lockdep splat with the acpi_power_meter, observed with Linux v5.5 and later. ====================================================== WARNING: possible circular locking dependency detected 5.6.0-rc2+ #629 Not tainted ------------------------------------------------------ python/1397 is trying to acquire lock: ffff888619080070 (&resource->lock){+.+.}, at: show_power+0x3c/0xa0 [acpi_power_meter] but task is already holding lock: ffff88881643f188 (kn->count#119){++++}, at: kernfs_seq_start+0x6a/0x160 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (kn->count#119){++++}: __kernfs_remove+0x626/0x7e0 kernfs_remove_by_name_ns+0x41/0x80 remove_attrs+0xcb/0x3c0 [acpi_power_meter] acpi_power_meter_notify+0x1f7/0x310 [acpi_power_meter] acpi_ev_notify_dispatch+0x198/0x1f3 acpi_os_execute_deferred+0x4d/0x70 process_one_work+0x7c8/0x1340 worker_thread+0x94/0xc70 kthread+0x2ed/0x3f0 ret_from_fork+0x24/0x30 -> #0 (&resource->lock){+.+.}: __lock_acquire+0x20be/0x49b0 lock_acquire+0x127/0x340 __mutex_lock+0x15b/0x1350 show_power+0x3c/0xa0 [acpi_power_meter] dev_attr_show+0x3f/0x80 sysfs_kf_seq_show+0x216/0x410 seq_read+0x407/0xf90 vfs_read+0x152/0x2c0 ksys_read+0xf3/0x1d0 do_syscall_64+0x95/0x1010 entry_SYSCALL_64_after_hwframe+0x49/0xbe other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(kn->count#119); lock(&resource->lock); lock(kn->count#119); lock(&resource->lock); *** DEADLOCK *** 4 locks held by python/1397: #0: ffff8890242d64e0 (&f->f_pos_lock){+.+.}, at: __fdget_pos+0x9b/0xb0 #1: ffff889040be74e0 (&p->lock){+.+.}, at: seq_read+0x6b/0xf90 #2: ffff8890448eb880 (&of->mutex){+.+.}, at: kernfs_seq_start+0x47/0x160 #3: ffff88881643f188 (kn->count#119){++++}, at: kernfs_seq_start+0x6a/0x160 stack backtrace: CPU: 10 PID: 1397 Comm: python Not tainted 5.6.0-rc2+ #629 Hardware name: Supermicro Super Server/X11DPL-i, BIOS 3.1 05/21/2019 Call Trace: dump_stack+0x97/0xe0 check_noncircular+0x32e/0x3e0 ? print_circular_bug.isra.0+0x1e0/0x1e0 ? unwind_next_frame+0xb9a/0x1890 ? entry_SYSCALL_64_after_hwframe+0x49/0xbe ? graph_lock+0x79/0x170 ? __lockdep_reset_lock+0x3c0/0x3c0 ? mark_lock+0xbc/0x1150 __lock_acquire+0x20be/0x49b0 ? mark_held_locks+0xe0/0xe0 ? stack_trace_save+0x91/0xc0 lock_acquire+0x127/0x340 ? show_power+0x3c/0xa0 [acpi_power_meter] ? device_remove_bin_file+0x10/0x10 ? device_remove_bin_file+0x10/0x10 __mutex_lock+0x15b/0x1350 ? show_power+0x3c/0xa0 [acpi_power_meter] ? show_power+0x3c/0xa0 [acpi_power_meter] ? mutex_lock_io_nested+0x11f0/0x11f0 ? lock_downgrade+0x6a0/0x6a0 ? kernfs_seq_start+0x47/0x160 ? lock_acquire+0x127/0x340 ? kernfs_seq_start+0x6a/0x160 ? device_remove_bin_file+0x10/0x10 ? show_power+0x3c/0xa0 [acpi_power_meter] show_power+0x3c/0xa0 [acpi_power_meter] dev_attr_show+0x3f/0x80 ? memset+0x20/0x40 sysfs_kf_seq_show+0x216/0x410 seq_read+0x407/0xf90 ? security_file_permission+0x16f/0x2c0 vfs_read+0x152/0x2c0 Problem is that reading an attribute takes the kernfs lock in the kernfs code, then resource->lock in the driver. During an ACPI notification, the opposite happens: The resource lock is taken first, followed by the kernfs lock when sysfs attributes are removed and re-created. Presumably this is now seen due to some locking related changes in kernfs after v5.4, but it was likely always a problem. Fix the problem by not blindly acquiring the lock in the notification function. It is only needed to protect the various update functions. However, those update functions are called anyway when sysfs attributes are read. This means that we can just stop calling those functions from the notifier, and the resource lock in the notifier function is no longer needed. That leaves two situations: First, METER_NOTIFY_CONFIG removes and re-allocates capability strings. While it did so under the resource lock, _displaying_ those strings was not protected, creating a race condition. To solve this problem, selectively protect both removal/creation and reporting of capability attributes with the resource lock. Second, removing and re-creating the attribute files is no longer protected by the resource lock. That doesn't matter since access to each individual attribute is protected by the kernfs lock. Userspace may get messed up if attributes disappear and reappear under its nose, but that is not different than today, and there is nothing we can do about it without major driver restructuring. Last but not least, when removing the driver, remove attribute functions first, then release capability strings. This avoids yet another race condition. Reported-by: Damien Le Moal Cc: Damien Le Moal Cc: stable@vger.kernel.org # v5.5+ Tested-by: Damien Le Moal Signed-off-by: Guenter Roeck --- drivers/hwmon/acpi_power_meter.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/hwmon/acpi_power_meter.c b/drivers/hwmon/acpi_power_meter.c index 4cf25458f0b9..0db8ef4fd6e1 100644 --- a/drivers/hwmon/acpi_power_meter.c +++ b/drivers/hwmon/acpi_power_meter.c @@ -355,7 +355,9 @@ static ssize_t show_str(struct device *dev, struct acpi_device *acpi_dev = to_acpi_device(dev); struct acpi_power_meter_resource *resource = acpi_dev->driver_data; acpi_string val; + int ret; + mutex_lock(&resource->lock); switch (attr->index) { case 0: val = resource->model_number; @@ -372,8 +374,9 @@ static ssize_t show_str(struct device *dev, val = ""; break; } - - return sprintf(buf, "%s\n", val); + ret = sprintf(buf, "%s\n", val); + mutex_unlock(&resource->lock); + return ret; } static ssize_t show_val(struct device *dev, @@ -817,11 +820,12 @@ static void acpi_power_meter_notify(struct acpi_device *device, u32 event) resource = acpi_driver_data(device); - mutex_lock(&resource->lock); switch (event) { case METER_NOTIFY_CONFIG: + mutex_lock(&resource->lock); free_capabilities(resource); res = read_capabilities(resource); + mutex_unlock(&resource->lock); if (res) break; @@ -830,15 +834,12 @@ static void acpi_power_meter_notify(struct acpi_device *device, u32 event) break; case METER_NOTIFY_TRIP: sysfs_notify(&device->dev.kobj, NULL, POWER_AVERAGE_NAME); - update_meter(resource); break; case METER_NOTIFY_CAP: sysfs_notify(&device->dev.kobj, NULL, POWER_CAP_NAME); - update_cap(resource); break; case METER_NOTIFY_INTERVAL: sysfs_notify(&device->dev.kobj, NULL, POWER_AVG_INTERVAL_NAME); - update_avg_interval(resource); break; case METER_NOTIFY_CAPPING: sysfs_notify(&device->dev.kobj, NULL, POWER_ALARM_NAME); @@ -848,7 +849,6 @@ static void acpi_power_meter_notify(struct acpi_device *device, u32 event) WARN(1, "Unexpected event %d\n", event); break; } - mutex_unlock(&resource->lock); acpi_bus_generate_netlink_event(ACPI_POWER_METER_CLASS, dev_name(&device->dev), event, 0); @@ -912,8 +912,8 @@ static int acpi_power_meter_remove(struct acpi_device *device) resource = acpi_driver_data(device); hwmon_device_unregister(resource->hwmon_dev); - free_capabilities(resource); remove_attrs(resource); + free_capabilities(resource); kfree(resource); return 0; -- cgit v1.2.3-59-g8ed1b From 35df4299a6487f323b0aca120ea3f485dfee2ae3 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 7 Feb 2020 09:29:11 -0500 Subject: ext4: fix a data race in EXT4_I(inode)->i_disksize EXT4_I(inode)->i_disksize could be accessed concurrently as noticed by KCSAN, BUG: KCSAN: data-race in ext4_write_end [ext4] / ext4_writepages [ext4] write to 0xffff91c6713b00f8 of 8 bytes by task 49268 on cpu 127: ext4_write_end+0x4e3/0x750 [ext4] ext4_update_i_disksize at fs/ext4/ext4.h:3032 (inlined by) ext4_update_inode_size at fs/ext4/ext4.h:3046 (inlined by) ext4_write_end at fs/ext4/inode.c:1287 generic_perform_write+0x208/0x2a0 ext4_buffered_write_iter+0x11f/0x210 [ext4] ext4_file_write_iter+0xce/0x9e0 [ext4] new_sync_write+0x29c/0x3b0 __vfs_write+0x92/0xa0 vfs_write+0x103/0x260 ksys_write+0x9d/0x130 __x64_sys_write+0x4c/0x60 do_syscall_64+0x91/0xb47 entry_SYSCALL_64_after_hwframe+0x49/0xbe read to 0xffff91c6713b00f8 of 8 bytes by task 24872 on cpu 37: ext4_writepages+0x10ac/0x1d00 [ext4] mpage_map_and_submit_extent at fs/ext4/inode.c:2468 (inlined by) ext4_writepages at fs/ext4/inode.c:2772 do_writepages+0x5e/0x130 __writeback_single_inode+0xeb/0xb20 writeback_sb_inodes+0x429/0x900 __writeback_inodes_wb+0xc4/0x150 wb_writeback+0x4bd/0x870 wb_workfn+0x6b4/0x960 process_one_work+0x54c/0xbe0 worker_thread+0x80/0x650 kthread+0x1e0/0x200 ret_from_fork+0x27/0x50 Reported by Kernel Concurrency Sanitizer on: CPU: 37 PID: 24872 Comm: kworker/u261:2 Tainted: G W O L 5.5.0-next-20200204+ #5 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 Workqueue: writeback wb_workfn (flush-7:0) Since only the read is operating as lockless (outside of the "i_data_sem"), load tearing could introduce a logic bug. Fix it by adding READ_ONCE() for the read and WRITE_ONCE() for the write. Signed-off-by: Qian Cai Link: https://lore.kernel.org/r/1581085751-31793-1-git-send-email-cai@lca.pw Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/ext4.h | 2 +- fs/ext4/inode.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4441331d06cc..480badcf2783 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3032,7 +3032,7 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) !inode_is_locked(inode)); down_write(&EXT4_I(inode)->i_data_sem); if (newsize > EXT4_I(inode)->i_disksize) - EXT4_I(inode)->i_disksize = newsize; + WRITE_ONCE(EXT4_I(inode)->i_disksize, newsize); up_write(&EXT4_I(inode)->i_data_sem); } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e60aca791d3f..6e1d81ed44ad 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2465,7 +2465,7 @@ update_disksize: * truncate are avoided by checking i_size under i_data_sem. */ disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT; - if (disksize > EXT4_I(inode)->i_disksize) { + if (disksize > READ_ONCE(EXT4_I(inode)->i_disksize)) { int err2; loff_t i_size; -- cgit v1.2.3-59-g8ed1b From 9424ef56e13a1f14c57ea161eed3ecfdc7b2770e Mon Sep 17 00:00:00 2001 From: Shijie Luo Date: Sat, 15 Feb 2020 03:02:06 -0500 Subject: ext4: add cond_resched() to __ext4_find_entry() We tested a soft lockup problem in linux 4.19 which could also be found in linux 5.x. When dir inode takes up a large number of blocks, and if the directory is growing when we are searching, it's possible the restart branch could be called many times, and the do while loop could hold cpu a long time. Here is the call trace in linux 4.19. [ 473.756186] Call trace: [ 473.756196] dump_backtrace+0x0/0x198 [ 473.756199] show_stack+0x24/0x30 [ 473.756205] dump_stack+0xa4/0xcc [ 473.756210] watchdog_timer_fn+0x300/0x3e8 [ 473.756215] __hrtimer_run_queues+0x114/0x358 [ 473.756217] hrtimer_interrupt+0x104/0x2d8 [ 473.756222] arch_timer_handler_virt+0x38/0x58 [ 473.756226] handle_percpu_devid_irq+0x90/0x248 [ 473.756231] generic_handle_irq+0x34/0x50 [ 473.756234] __handle_domain_irq+0x68/0xc0 [ 473.756236] gic_handle_irq+0x6c/0x150 [ 473.756238] el1_irq+0xb8/0x140 [ 473.756286] ext4_es_lookup_extent+0xdc/0x258 [ext4] [ 473.756310] ext4_map_blocks+0x64/0x5c0 [ext4] [ 473.756333] ext4_getblk+0x6c/0x1d0 [ext4] [ 473.756356] ext4_bread_batch+0x7c/0x1f8 [ext4] [ 473.756379] ext4_find_entry+0x124/0x3f8 [ext4] [ 473.756402] ext4_lookup+0x8c/0x258 [ext4] [ 473.756407] __lookup_hash+0x8c/0xe8 [ 473.756411] filename_create+0xa0/0x170 [ 473.756413] do_mkdirat+0x6c/0x140 [ 473.756415] __arm64_sys_mkdirat+0x28/0x38 [ 473.756419] el0_svc_common+0x78/0x130 [ 473.756421] el0_svc_handler+0x38/0x78 [ 473.756423] el0_svc+0x8/0xc [ 485.755156] watchdog: BUG: soft lockup - CPU#2 stuck for 22s! [tmp:5149] Add cond_resched() to avoid soft lockup and to provide a better system responding. Link: https://lore.kernel.org/r/20200215080206.13293-1-luoshijie1@huawei.com Signed-off-by: Shijie Luo Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@kernel.org --- fs/ext4/namei.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index ceff4b4b1877..b05ea72f38fd 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1511,6 +1511,7 @@ restart: /* * We deal with the read-ahead logic here. */ + cond_resched(); if (ra_ptr >= ra_max) { /* Refill the readahead buffer */ ra_ptr = 0; -- cgit v1.2.3-59-g8ed1b From ce4a64e1f656138e2a1481049ea554720f86b43a Mon Sep 17 00:00:00 2001 From: Scott Branden Date: Wed, 19 Feb 2020 14:14:03 -0800 Subject: docs: arm64: fix trivial spelling enought to enough in memory.rst Fix trivial spelling error enought to enough in memory.rst. Cc: trivial@kernel.org Signed-off-by: Scott Branden Signed-off-by: Will Deacon --- Documentation/arm64/memory.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst index 02e02175e6f5..cf03b3290800 100644 --- a/Documentation/arm64/memory.rst +++ b/Documentation/arm64/memory.rst @@ -129,7 +129,7 @@ this logic. As a single binary will need to support both 48-bit and 52-bit VA spaces, the VMEMMAP must be sized large enough for 52-bit VAs and -also must be sized large enought to accommodate a fixed PAGE_OFFSET. +also must be sized large enough to accommodate a fixed PAGE_OFFSET. Most code in the kernel should not need to consider the VA_BITS, for code that does need to know the VA size the variables are -- cgit v1.2.3-59-g8ed1b From dcde237319e626d1ec3c9d8b7613032f0fd4663a Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 19 Feb 2020 12:31:56 +0000 Subject: mm: Avoid creating virtual address aliases in brk()/mmap()/mremap() Currently the arm64 kernel ignores the top address byte passed to brk(), mmap() and mremap(). When the user is not aware of the 56-bit address limit or relies on the kernel to return an error, untagging such pointers has the potential to create address aliases in user-space. Passing a tagged address to munmap(), madvise() is permitted since the tagged pointer is expected to be inside an existing mapping. The current behaviour breaks the existing glibc malloc() implementation which relies on brk() with an address beyond 56-bit to be rejected by the kernel. Remove untagging in the above functions by partially reverting commit ce18d171cb73 ("mm: untag user pointers in mmap/munmap/mremap/brk"). In addition, update the arm64 tagged-address-abi.rst document accordingly. Link: https://bugzilla.redhat.com/1797052 Fixes: ce18d171cb73 ("mm: untag user pointers in mmap/munmap/mremap/brk") Cc: # 5.4.x- Cc: Florian Weimer Reviewed-by: Andrew Morton Reported-by: Victor Stinner Acked-by: Will Deacon Acked-by: Andrey Konovalov Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon --- Documentation/arm64/tagged-address-abi.rst | 11 +++++++++-- mm/mmap.c | 4 ---- mm/mremap.c | 1 - 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/Documentation/arm64/tagged-address-abi.rst b/Documentation/arm64/tagged-address-abi.rst index d4a85d535bf9..4a9d9c794ee5 100644 --- a/Documentation/arm64/tagged-address-abi.rst +++ b/Documentation/arm64/tagged-address-abi.rst @@ -44,8 +44,15 @@ The AArch64 Tagged Address ABI has two stages of relaxation depending how the user addresses are used by the kernel: 1. User addresses not accessed by the kernel but used for address space - management (e.g. ``mmap()``, ``mprotect()``, ``madvise()``). The use - of valid tagged pointers in this context is always allowed. + management (e.g. ``mprotect()``, ``madvise()``). The use of valid + tagged pointers in this context is allowed with the exception of + ``brk()``, ``mmap()`` and the ``new_address`` argument to + ``mremap()`` as these have the potential to alias with existing + user addresses. + + NOTE: This behaviour changed in v5.6 and so some earlier kernels may + incorrectly accept valid tagged pointers for the ``brk()``, + ``mmap()`` and ``mremap()`` system calls. 2. User addresses accessed by the kernel (e.g. ``write()``). This ABI relaxation is disabled by default and the application thread needs to diff --git a/mm/mmap.c b/mm/mmap.c index 6756b8bb0033..d681a20eb4ea 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -195,8 +195,6 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) bool downgraded = false; LIST_HEAD(uf); - brk = untagged_addr(brk); - if (down_write_killable(&mm->mmap_sem)) return -EINTR; @@ -1557,8 +1555,6 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, struct file *file = NULL; unsigned long retval; - addr = untagged_addr(addr); - if (!(flags & MAP_ANONYMOUS)) { audit_mmap_fd(fd, flags); file = fget(fd); diff --git a/mm/mremap.c b/mm/mremap.c index 122938dcec15..af363063ea23 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -607,7 +607,6 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, LIST_HEAD(uf_unmap); addr = untagged_addr(addr); - new_addr = untagged_addr(new_addr); if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) return ret; -- cgit v1.2.3-59-g8ed1b From 14ba91c74782f4d470f4d7c7cf585e29d4761035 Mon Sep 17 00:00:00 2001 From: Jonathan Neuschäfer Date: Tue, 18 Feb 2020 15:58:18 +0100 Subject: Documentation: power: Drop reference to interface.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It has been merged into sleep-states.rst. Fixes: c21502efdaed ("Documentation: admin-guide: PM: Update sleep states documentation") Signed-off-by: Jonathan Neuschäfer Signed-off-by: Rafael J. Wysocki --- Documentation/power/index.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/power/index.rst b/Documentation/power/index.rst index 002e42745263..ced8a8007434 100644 --- a/Documentation/power/index.rst +++ b/Documentation/power/index.rst @@ -13,7 +13,6 @@ Power Management drivers-testing energy-model freezing-of-tasks - interface opp pci pm_qos_interface -- cgit v1.2.3-59-g8ed1b From b0c609ab2057d0953fa05e7566f0c0e8a28fa9e1 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 14 Feb 2020 15:06:21 +0100 Subject: PM / hibernate: fix typo "reserverd_size" -> "reserved_size" Fix a mistake in a variable name in a comment. Signed-off-by: Alexandre Belloni Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index ddade80ad276..d82b7b88d616 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1681,7 +1681,7 @@ static unsigned long minimum_image_size(unsigned long saveable) * hibernation for allocations made while saving the image and for device * drivers, in case they need to allocate memory from their hibernation * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough - * estimate) and reserverd_size divided by PAGE_SIZE (which is tunable through + * estimate) and reserved_size divided by PAGE_SIZE (which is tunable through * /sys/power/reserved_size, respectively). To make this happen, we compute the * total number of available page frames and allocate at least * -- cgit v1.2.3-59-g8ed1b From 68ca0fd272dac9a9f78fbf14b5e65de34f12c1b4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 6 Feb 2020 08:11:39 +0000 Subject: selftest/lkdtm: Don't pollute 'git status' Commit 46d1a0f03d66 ("selftests/lkdtm: Add tests for LKDTM targets") added generation of lkdtm test scripts. Ignore those generated scripts when performing 'git status' Fixes: 46d1a0f03d66 ("selftests/lkdtm: Add tests for LKDTM targets") Signed-off-by: Christophe Leroy Signed-off-by: Shuah Khan --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 72ef86a5570d..2763fce8766c 100644 --- a/.gitignore +++ b/.gitignore @@ -100,6 +100,10 @@ modules.order /include/ksym/ /arch/*/include/generated/ +# Generated lkdtm tests +/tools/testing/selftests/lkdtm/*.sh +!/tools/testing/selftests/lkdtm/run.sh + # stgit generated dirs patches-* -- cgit v1.2.3-59-g8ed1b From b9167c8078c3527de6da241c8a1a75a9224ed90a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 20 Feb 2020 15:42:41 +1100 Subject: selftests: Install settings files to fix TIMEOUT failures Commit 852c8cbf34d3 ("selftests/kselftest/runner.sh: Add 45 second timeout per test") added a 45 second timeout for tests, and also added a way for tests to customise the timeout via a settings file. For example the ftrace tests take multiple minutes to run, so they were given longer in commit b43e78f65b1d ("tracing/selftests: Turn off timeout setting"). This works when the tests are run from the source tree. However if the tests are installed with "make -C tools/testing/selftests install", the settings files are not copied into the install directory. When the tests are then run from the install directory the longer timeouts are not applied and the tests timeout incorrectly. So add the settings files to TEST_FILES of the appropriate Makefiles to cause the settings files to be installed using the existing install logic. Fixes: 852c8cbf34d3 ("selftests/kselftest/runner.sh: Add 45 second timeout per test") Signed-off-by: Michael Ellerman Signed-off-by: Shuah Khan --- tools/testing/selftests/ftrace/Makefile | 2 +- tools/testing/selftests/livepatch/Makefile | 2 ++ tools/testing/selftests/net/mptcp/Makefile | 2 ++ tools/testing/selftests/rseq/Makefile | 2 ++ tools/testing/selftests/rtc/Makefile | 2 ++ 5 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile index cd1f5b3a7774..d6e106fbce11 100644 --- a/tools/testing/selftests/ftrace/Makefile +++ b/tools/testing/selftests/ftrace/Makefile @@ -2,7 +2,7 @@ all: TEST_PROGS := ftracetest -TEST_FILES := test.d +TEST_FILES := test.d settings EXTRA_CLEAN := $(OUTPUT)/logs/* include ../lib.mk diff --git a/tools/testing/selftests/livepatch/Makefile b/tools/testing/selftests/livepatch/Makefile index 3876d8d62494..1acc9e1fa3fb 100644 --- a/tools/testing/selftests/livepatch/Makefile +++ b/tools/testing/selftests/livepatch/Makefile @@ -8,4 +8,6 @@ TEST_PROGS := \ test-state.sh \ test-ftrace.sh +TEST_FILES := settings + include ../lib.mk diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index 93de52016dde..ba450e62dc5b 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -8,6 +8,8 @@ TEST_PROGS := mptcp_connect.sh TEST_GEN_FILES = mptcp_connect +TEST_FILES := settings + EXTRA_CLEAN := *.pcap include ../../lib.mk diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile index d6469535630a..f1053630bb6f 100644 --- a/tools/testing/selftests/rseq/Makefile +++ b/tools/testing/selftests/rseq/Makefile @@ -19,6 +19,8 @@ TEST_GEN_PROGS_EXTENDED = librseq.so TEST_PROGS = run_param_test.sh +TEST_FILES := settings + include ../lib.mk $(OUTPUT)/librseq.so: rseq.c rseq.h rseq-*.h diff --git a/tools/testing/selftests/rtc/Makefile b/tools/testing/selftests/rtc/Makefile index 2d93d65723c9..55198ecc04db 100644 --- a/tools/testing/selftests/rtc/Makefile +++ b/tools/testing/selftests/rtc/Makefile @@ -6,4 +6,6 @@ TEST_GEN_PROGS = rtctest TEST_GEN_PROGS_EXTENDED = setdate +TEST_FILES := settings + include ../lib.mk -- cgit v1.2.3-59-g8ed1b From ef89d0545132d685f73da6f58b7e7fe002536f91 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 20 Feb 2020 22:37:48 +1100 Subject: selftests/rseq: Fix out-of-tree compilation Currently if you build with O=... the rseq tests don't build: $ make O=$PWD/output -C tools/testing/selftests/ TARGETS=rseq make: Entering directory '/linux/tools/testing/selftests' ... make[1]: Entering directory '/linux/tools/testing/selftests/rseq' gcc -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./ -shared -fPIC rseq.c -lpthread -o /linux/output/rseq/librseq.so gcc -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./ basic_test.c -lpthread -lrseq -o /linux/output/rseq/basic_test /usr/bin/ld: cannot find -lrseq collect2: error: ld returned 1 exit status This is because the library search path points to the source directory, not the output. We can fix it by changing the library search path to $(OUTPUT). Signed-off-by: Michael Ellerman Signed-off-by: Shuah Khan --- tools/testing/selftests/rseq/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile index f1053630bb6f..2af9d39a9716 100644 --- a/tools/testing/selftests/rseq/Makefile +++ b/tools/testing/selftests/rseq/Makefile @@ -4,7 +4,7 @@ ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),) CLANG_FLAGS += -no-integrated-as endif -CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./ \ +CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L$(OUTPUT) -Wl,-rpath=./ \ $(CLANG_FLAGS) LDLIBS += -lpthread -- cgit v1.2.3-59-g8ed1b From 9038ec99ceb94fb8d93ade5e236b2928f0792c7c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 19 Feb 2020 22:23:18 -0800 Subject: x86/xen: Distribute switch variables for initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Variables declared in a switch statement before any case statements cannot be automatically initialized with compiler instrumentation (as they are not part of any execution flow). With GCC's proposed automatic stack variable initialization feature, this triggers a warning (and they don't get initialized). Clang's automatic stack variable initialization (via CONFIG_INIT_STACK_ALL=y) doesn't throw a warning, but it also doesn't initialize such variables[1]. Note that these warnings (or silent skipping) happen before the dead-store elimination optimization phase, so even when the automatic initializations are later elided in favor of direct initializations, the warnings remain. To avoid these problems, move such variables into the "case" where they're used or lift them up into the main function body. arch/x86/xen/enlighten_pv.c: In function ‘xen_write_msr_safe’: arch/x86/xen/enlighten_pv.c:904:12: warning: statement will never be executed [-Wswitch-unreachable] 904 | unsigned which; | ^~~~~ [1] https://bugs.llvm.org/show_bug.cgi?id=44916 Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20200220062318.69299-1-keescook@chromium.org Reviewed-by: Juergen Gross [boris: made @which an 'unsigned int'] Signed-off-by: Boris Ostrovsky --- arch/x86/xen/enlighten_pv.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index ae4a41ca19f6..3fbbc50bb032 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -896,14 +896,15 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err) static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int ret; +#ifdef CONFIG_X86_64 + unsigned int which; + u64 base; +#endif ret = 0; switch (msr) { #ifdef CONFIG_X86_64 - unsigned which; - u64 base; - case MSR_FS_BASE: which = SEGBASE_FS; goto set; case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set; case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set; -- cgit v1.2.3-59-g8ed1b From 147f1a1fe5d7e6b01b8df4d0cbd6f9eaf6b6c73b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 13 Feb 2020 18:24:48 +0100 Subject: KVM: x86: fix incorrect comparison in trace event The "u" field in the event has three states, -1/0/1. Using u8 however means that comparison with -1 will always fail, so change to signed char. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmutrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 3c6522b84ff1..ffcd96fc02d0 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -339,7 +339,7 @@ TRACE_EVENT( /* These depend on page entry type, so compute them now. */ __field(bool, r) __field(bool, x) - __field(u8, u) + __field(signed char, u) ), TP_fast_assign( -- cgit v1.2.3-59-g8ed1b From b78a8552d77f8efb7c4fbd92a91b890c32b89528 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Mon, 17 Feb 2020 11:48:26 -0500 Subject: kvm/emulate: fix a -Werror=cast-function-type arch/x86/kvm/emulate.c: In function 'x86_emulate_insn': arch/x86/kvm/emulate.c:5686:22: error: cast between incompatible function types from 'int (*)(struct x86_emulate_ctxt *)' to 'void (*)(struct fastop *)' [-Werror=cast-function-type] rc = fastop(ctxt, (fastop_t)ctxt->execute); Fix it by using an unnamed union of a (*execute) function pointer and a (*fastop) function pointer. Fixes: 3009afc6e39e ("KVM: x86: Use a typedef for fastop functions") Suggested-by: Paolo Bonzini Signed-off-by: Qian Cai Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_emulate.h | 13 ++++++++++++- arch/x86/kvm/emulate.c | 36 ++++++++++++++---------------------- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 03946eb3e2b9..2a8f2bd2e5cf 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -292,6 +292,14 @@ enum x86emul_mode { #define X86EMUL_SMM_MASK (1 << 6) #define X86EMUL_SMM_INSIDE_NMI_MASK (1 << 7) +/* + * fastop functions are declared as taking a never-defined fastop parameter, + * so they can't be called from C directly. + */ +struct fastop; + +typedef void (*fastop_t)(struct fastop *); + struct x86_emulate_ctxt { const struct x86_emulate_ops *ops; @@ -324,7 +332,10 @@ struct x86_emulate_ctxt { struct operand src; struct operand src2; struct operand dst; - int (*execute)(struct x86_emulate_ctxt *ctxt); + union { + int (*execute)(struct x86_emulate_ctxt *ctxt); + fastop_t fop; + }; int (*check_perm)(struct x86_emulate_ctxt *ctxt); /* * The following six fields are cleared together, diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index ddbc61984227..dd19fb3539e0 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -191,25 +191,6 @@ #define NR_FASTOP (ilog2(sizeof(ulong)) + 1) #define FASTOP_SIZE 8 -/* - * fastop functions have a special calling convention: - * - * dst: rax (in/out) - * src: rdx (in/out) - * src2: rcx (in) - * flags: rflags (in/out) - * ex: rsi (in:fastop pointer, out:zero if exception) - * - * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for - * different operand sizes can be reached by calculation, rather than a jump - * table (which would be bigger than the code). - * - * fastop functions are declared as taking a never-defined fastop parameter, - * so they can't be called from C directly. - */ - -struct fastop; - struct opcode { u64 flags : 56; u64 intercept : 8; @@ -311,8 +292,19 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) #define ON64(x) #endif -typedef void (*fastop_t)(struct fastop *); - +/* + * fastop functions have a special calling convention: + * + * dst: rax (in/out) + * src: rdx (in/out) + * src2: rcx (in) + * flags: rflags (in/out) + * ex: rsi (in:fastop pointer, out:zero if exception) + * + * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for + * different operand sizes can be reached by calculation, rather than a jump + * table (which would be bigger than the code). + */ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); #define __FOP_FUNC(name) \ @@ -5683,7 +5675,7 @@ special_insn: if (ctxt->execute) { if (ctxt->d & Fastop) - rc = fastop(ctxt, (fastop_t)ctxt->execute); + rc = fastop(ctxt, ctxt->fop); else rc = ctxt->execute(ctxt); if (rc != X86EMUL_CONTINUE) -- cgit v1.2.3-59-g8ed1b From 161d179261f95ac56f61f94f89304e0620534230 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 19 Feb 2020 22:23:04 -0800 Subject: net: core: Distribute switch variables for initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Variables declared in a switch statement before any case statements cannot be automatically initialized with compiler instrumentation (as they are not part of any execution flow). With GCC's proposed automatic stack variable initialization feature, this triggers a warning (and they don't get initialized). Clang's automatic stack variable initialization (via CONFIG_INIT_STACK_ALL=y) doesn't throw a warning, but it also doesn't initialize such variables[1]. Note that these warnings (or silent skipping) happen before the dead-store elimination optimization phase, so even when the automatic initializations are later elided in favor of direct initializations, the warnings remain. To avoid these problems, move such variables into the "case" where they're used or lift them up into the main function body. net/core/skbuff.c: In function ‘skb_checksum_setup_ip’: net/core/skbuff.c:4809:7: warning: statement will never be executed [-Wswitch-unreachable] 4809 | int err; | ^~~ [1] https://bugs.llvm.org/show_bug.cgi?id=44916 Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1365a556152c..e1101a4f90a6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4803,9 +4803,9 @@ static __sum16 *skb_checksum_setup_ip(struct sk_buff *skb, typeof(IPPROTO_IP) proto, unsigned int off) { - switch (proto) { - int err; + int err; + switch (proto) { case IPPROTO_TCP: err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr), off + MAX_TCP_HDR_LEN); -- cgit v1.2.3-59-g8ed1b From 46d30cb1045c2ab1ada269702c8c84d6446baf81 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 19 Feb 2020 22:23:07 -0800 Subject: net: ip6_gre: Distribute switch variables for initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Variables declared in a switch statement before any case statements cannot be automatically initialized with compiler instrumentation (as they are not part of any execution flow). With GCC's proposed automatic stack variable initialization feature, this triggers a warning (and they don't get initialized). Clang's automatic stack variable initialization (via CONFIG_INIT_STACK_ALL=y) doesn't throw a warning, but it also doesn't initialize such variables[1]. Note that these warnings (or silent skipping) happen before the dead-store elimination optimization phase, so even when the automatic initializations are later elided in favor of direct initializations, the warnings remain. To avoid these problems, move such variables into the "case" where they're used or lift them up into the main function body. net/ipv6/ip6_gre.c: In function ‘ip6gre_err’: net/ipv6/ip6_gre.c:440:32: warning: statement will never be executed [-Wswitch-unreachable] 440 | struct ipv6_tlv_tnl_enc_lim *tel; | ^~~ net/ipv6/ip6_tunnel.c: In function ‘ip6_tnl_err’: net/ipv6/ip6_tunnel.c:520:32: warning: statement will never be executed [-Wswitch-unreachable] 520 | struct ipv6_tlv_tnl_enc_lim *tel; | ^~~ [1] https://bugs.llvm.org/show_bug.cgi?id=44916 Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 8 +++++--- net/ipv6/ip6_tunnel.c | 13 +++++++++---- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 55bfc5149d0c..781ca8c07a0d 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -437,8 +437,6 @@ static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return -ENOENT; switch (type) { - struct ipv6_tlv_tnl_enc_lim *tel; - __u32 teli; case ICMPV6_DEST_UNREACH: net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", t->parms.name); @@ -452,7 +450,10 @@ static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, break; } return 0; - case ICMPV6_PARAMPROB: + case ICMPV6_PARAMPROB: { + struct ipv6_tlv_tnl_enc_lim *tel; + __u32 teli; + teli = 0; if (code == ICMPV6_HDR_FIELD) teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data); @@ -468,6 +469,7 @@ static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, t->parms.name); } return 0; + } case ICMPV6_PKT_TOOBIG: ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); return 0; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 5d65436ad5ad..4703b09808d0 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -517,8 +517,6 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, err = 0; switch (*type) { - struct ipv6_tlv_tnl_enc_lim *tel; - __u32 mtu, teli; case ICMPV6_DEST_UNREACH: net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", t->parms.name); @@ -531,7 +529,10 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, rel_msg = 1; } break; - case ICMPV6_PARAMPROB: + case ICMPV6_PARAMPROB: { + struct ipv6_tlv_tnl_enc_lim *tel; + __u32 teli; + teli = 0; if ((*code) == ICMPV6_HDR_FIELD) teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data); @@ -548,7 +549,10 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, t->parms.name); } break; - case ICMPV6_PKT_TOOBIG: + } + case ICMPV6_PKT_TOOBIG: { + __u32 mtu; + ip6_update_pmtu(skb, net, htonl(*info), 0, 0, sock_net_uid(net, NULL)); mtu = *info - offset; @@ -562,6 +566,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, rel_msg = 1; } break; + } case NDISC_REDIRECT: ip6_redirect(skb, net, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); -- cgit v1.2.3-59-g8ed1b From 16a556eeb7ed2dc3709fe2c5be76accdfa4901ab Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 19 Feb 2020 22:23:09 -0800 Subject: openvswitch: Distribute switch variables for initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Variables declared in a switch statement before any case statements cannot be automatically initialized with compiler instrumentation (as they are not part of any execution flow). With GCC's proposed automatic stack variable initialization feature, this triggers a warning (and they don't get initialized). Clang's automatic stack variable initialization (via CONFIG_INIT_STACK_ALL=y) doesn't throw a warning, but it also doesn't initialize such variables[1]. Note that these warnings (or silent skipping) happen before the dead-store elimination optimization phase, so even when the automatic initializations are later elided in favor of direct initializations, the warnings remain. To avoid these problems, move such variables into the "case" where they're used or lift them up into the main function body. net/openvswitch/flow_netlink.c: In function ‘validate_set’: net/openvswitch/flow_netlink.c:2711:29: warning: statement will never be executed [-Wswitch-unreachable] 2711 | const struct ovs_key_ipv4 *ipv4_key; | ^~~~~~~~ [1] https://bugs.llvm.org/show_bug.cgi?id=44916 Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- net/openvswitch/flow_netlink.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 7da4230627f5..288122eec7c8 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2708,10 +2708,6 @@ static int validate_set(const struct nlattr *a, return -EINVAL; switch (key_type) { - const struct ovs_key_ipv4 *ipv4_key; - const struct ovs_key_ipv6 *ipv6_key; - int err; - case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_SKB_MARK: case OVS_KEY_ATTR_CT_MARK: @@ -2723,7 +2719,9 @@ static int validate_set(const struct nlattr *a, return -EINVAL; break; - case OVS_KEY_ATTR_TUNNEL: + case OVS_KEY_ATTR_TUNNEL: { + int err; + if (masked) return -EINVAL; /* Masked tunnel set not supported. */ @@ -2732,8 +2730,10 @@ static int validate_set(const struct nlattr *a, if (err) return err; break; + } + case OVS_KEY_ATTR_IPV4: { + const struct ovs_key_ipv4 *ipv4_key; - case OVS_KEY_ATTR_IPV4: if (eth_type != htons(ETH_P_IP)) return -EINVAL; @@ -2753,8 +2753,10 @@ static int validate_set(const struct nlattr *a, return -EINVAL; } break; + } + case OVS_KEY_ATTR_IPV6: { + const struct ovs_key_ipv6 *ipv6_key; - case OVS_KEY_ATTR_IPV6: if (eth_type != htons(ETH_P_IPV6)) return -EINVAL; @@ -2781,7 +2783,7 @@ static int validate_set(const struct nlattr *a, return -EINVAL; break; - + } case OVS_KEY_ATTR_TCP: if ((eth_type != htons(ETH_P_IP) && eth_type != htons(ETH_P_IPV6)) || -- cgit v1.2.3-59-g8ed1b From 6f3846f0955308b6d1b219419da42b8de2c08845 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Thu, 20 Feb 2020 15:54:54 +0100 Subject: s390/qeth: vnicc Fix EOPNOTSUPP precedence When getting or setting VNICC parameters, the error code EOPNOTSUPP should have precedence over EBUSY. EBUSY is used because vnicc feature and bridgeport feature are mutually exclusive, which is a temporary condition. Whereas EOPNOTSUPP indicates that the HW does not support all or parts of the vnicc feature. This issue causes the vnicc sysfs params to show 'blocked by bridgeport' for HW that does not support VNICC at all. Fixes: caa1f0b10d18 ("s390/qeth: add VNICC enable/disable support") Signed-off-by: Alexandra Winter Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l2_main.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 692bd2623401..9972d96820f3 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1707,15 +1707,14 @@ int qeth_l2_vnicc_set_state(struct qeth_card *card, u32 vnicc, bool state) QETH_CARD_TEXT(card, 2, "vniccsch"); - /* do not change anything if BridgePort is enabled */ - if (qeth_bridgeport_is_in_use(card)) - return -EBUSY; - /* check if characteristic and enable/disable are supported */ if (!(card->options.vnicc.sup_chars & vnicc) || !(card->options.vnicc.set_char_sup & vnicc)) return -EOPNOTSUPP; + if (qeth_bridgeport_is_in_use(card)) + return -EBUSY; + /* set enable/disable command and store wanted characteristic */ if (state) { cmd = IPA_VNICC_ENABLE; @@ -1761,14 +1760,13 @@ int qeth_l2_vnicc_get_state(struct qeth_card *card, u32 vnicc, bool *state) QETH_CARD_TEXT(card, 2, "vniccgch"); - /* do not get anything if BridgePort is enabled */ - if (qeth_bridgeport_is_in_use(card)) - return -EBUSY; - /* check if characteristic is supported */ if (!(card->options.vnicc.sup_chars & vnicc)) return -EOPNOTSUPP; + if (qeth_bridgeport_is_in_use(card)) + return -EBUSY; + /* if card is ready, query current VNICC state */ if (qeth_card_hw_is_reachable(card)) rc = qeth_l2_vnicc_query_chars(card); @@ -1786,15 +1784,14 @@ int qeth_l2_vnicc_set_timeout(struct qeth_card *card, u32 timeout) QETH_CARD_TEXT(card, 2, "vniccsto"); - /* do not change anything if BridgePort is enabled */ - if (qeth_bridgeport_is_in_use(card)) - return -EBUSY; - /* check if characteristic and set_timeout are supported */ if (!(card->options.vnicc.sup_chars & QETH_VNICC_LEARNING) || !(card->options.vnicc.getset_timeout_sup & QETH_VNICC_LEARNING)) return -EOPNOTSUPP; + if (qeth_bridgeport_is_in_use(card)) + return -EBUSY; + /* do we need to do anything? */ if (card->options.vnicc.learning_timeout == timeout) return rc; @@ -1823,14 +1820,14 @@ int qeth_l2_vnicc_get_timeout(struct qeth_card *card, u32 *timeout) QETH_CARD_TEXT(card, 2, "vniccgto"); - /* do not get anything if BridgePort is enabled */ - if (qeth_bridgeport_is_in_use(card)) - return -EBUSY; - /* check if characteristic and get_timeout are supported */ if (!(card->options.vnicc.sup_chars & QETH_VNICC_LEARNING) || !(card->options.vnicc.getset_timeout_sup & QETH_VNICC_LEARNING)) return -EOPNOTSUPP; + + if (qeth_bridgeport_is_in_use(card)) + return -EBUSY; + /* if card is ready, get timeout. Otherwise, just return stored value */ *timeout = card->options.vnicc.learning_timeout; if (qeth_card_hw_is_reachable(card)) -- cgit v1.2.3-59-g8ed1b From 420579dba126c6111b5a3dea062f21a7e4e647c6 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 20 Feb 2020 15:54:55 +0100 Subject: s390/qeth: don't warn for napi with 0 budget Calling napi->poll() with 0 budget is a legitimate use by netpoll. Fixes: a1c3ed4c9ca0 ("qeth: NAPI support for l2 and l3 discipline") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 9639938581f5..2264c6619def 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5447,7 +5447,6 @@ static int qeth_extract_skbs(struct qeth_card *card, int budget, { int work_done = 0; - WARN_ON_ONCE(!budget); *done = false; while (budget) { -- cgit v1.2.3-59-g8ed1b From 54a61fbc020fd2e305680871c453abcf7fc0339b Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 20 Feb 2020 15:54:56 +0100 Subject: s390/qeth: fix off-by-one in RX copybreak check The RX copybreak is intended as the _max_ value where the frame's data should be copied. So for frame_len == copybreak, don't build an SG skb. Fixes: 4a71df50047f ("qeth: new qeth device driver") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 2264c6619def..5efcaa43615b 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5344,7 +5344,7 @@ next_packet: } use_rx_sg = (card->options.cq == QETH_CQ_ENABLED) || - ((skb_len >= card->options.rx_sg_cb) && + (skb_len > card->options.rx_sg_cb && !atomic_read(&card->force_alloc_skb) && !IS_OSN(card)); -- cgit v1.2.3-59-g8ed1b From 279eef0531928a6669230879a6eed081513ad5a3 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 14 Feb 2020 16:56:38 -0600 Subject: tracing: Make sure synth_event_trace() example always uses u64 synth_event_trace() is the varargs version of synth_event_trace_array(), which takes an array of u64, as do synth_event_add_val() et al. To not only be consistent with those, but also to address the fact that synth_event_trace() expects every arg to be of the same type since it doesn't also pass in e.g. a format string, the caller needs to make sure all args are of the same type, u64. u64 is used because it needs to accomodate the largest type available in synthetic events, which is u64. This fixes the bug reported by the kernel test robot/Rong Chen. Link: https://lore.kernel.org/lkml/20200212113444.GS12867@shao2-debian/ Link: http://lkml.kernel.org/r/894c4e955558b521210ee0642ba194a9e603354c.1581720155.git.zanussi@kernel.org Fixes: 9fe41efaca084 ("tracing: Add synth event generation test module") Reported-by: kernel test robot Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/synth_event_gen_test.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/kernel/trace/synth_event_gen_test.c b/kernel/trace/synth_event_gen_test.c index 4aefe003cb7c..6866280a9b10 100644 --- a/kernel/trace/synth_event_gen_test.c +++ b/kernel/trace/synth_event_gen_test.c @@ -111,11 +111,11 @@ static int __init test_gen_synth_cmd(void) /* Create some bogus values just for testing */ vals[0] = 777; /* next_pid_field */ - vals[1] = (u64)"hula hoops"; /* next_comm_field */ + vals[1] = (u64)(long)"hula hoops"; /* next_comm_field */ vals[2] = 1000000; /* ts_ns */ vals[3] = 1000; /* ts_ms */ vals[4] = smp_processor_id(); /* cpu */ - vals[5] = (u64)"thneed"; /* my_string_field */ + vals[5] = (u64)(long)"thneed"; /* my_string_field */ vals[6] = 598; /* my_int_field */ /* Now generate a gen_synth_test event */ @@ -218,11 +218,11 @@ static int __init test_empty_synth_event(void) /* Create some bogus values just for testing */ vals[0] = 777; /* next_pid_field */ - vals[1] = (u64)"tiddlywinks"; /* next_comm_field */ + vals[1] = (u64)(long)"tiddlywinks"; /* next_comm_field */ vals[2] = 1000000; /* ts_ns */ vals[3] = 1000; /* ts_ms */ vals[4] = smp_processor_id(); /* cpu */ - vals[5] = (u64)"thneed_2.0"; /* my_string_field */ + vals[5] = (u64)(long)"thneed_2.0"; /* my_string_field */ vals[6] = 399; /* my_int_field */ /* Now trace an empty_synth_test event */ @@ -290,11 +290,11 @@ static int __init test_create_synth_event(void) /* Create some bogus values just for testing */ vals[0] = 777; /* next_pid_field */ - vals[1] = (u64)"tiddlywinks"; /* next_comm_field */ + vals[1] = (u64)(long)"tiddlywinks"; /* next_comm_field */ vals[2] = 1000000; /* ts_ns */ vals[3] = 1000; /* ts_ms */ vals[4] = smp_processor_id(); /* cpu */ - vals[5] = (u64)"thneed"; /* my_string_field */ + vals[5] = (u64)(long)"thneed"; /* my_string_field */ vals[6] = 398; /* my_int_field */ /* Now generate a create_synth_test event */ @@ -330,7 +330,7 @@ static int __init test_add_next_synth_val(void) goto out; /* next_comm_field */ - ret = synth_event_add_next_val((u64)"slinky", &trace_state); + ret = synth_event_add_next_val((u64)(long)"slinky", &trace_state); if (ret) goto out; @@ -350,7 +350,7 @@ static int __init test_add_next_synth_val(void) goto out; /* my_string_field */ - ret = synth_event_add_next_val((u64)"thneed_2.01", &trace_state); + ret = synth_event_add_next_val((u64)(long)"thneed_2.01", &trace_state); if (ret) goto out; @@ -396,12 +396,12 @@ static int __init test_add_synth_val(void) if (ret) goto out; - ret = synth_event_add_val("next_comm_field", (u64)"silly putty", + ret = synth_event_add_val("next_comm_field", (u64)(long)"silly putty", &trace_state); if (ret) goto out; - ret = synth_event_add_val("my_string_field", (u64)"thneed_9", + ret = synth_event_add_val("my_string_field", (u64)(long)"thneed_9", &trace_state); if (ret) goto out; @@ -423,13 +423,13 @@ static int __init test_trace_synth_event(void) /* Trace some bogus values just for testing */ ret = synth_event_trace(create_synth_test, 7, /* number of values */ - 444, /* next_pid_field */ - (u64)"clackers", /* next_comm_field */ - 1000000, /* ts_ns */ - 1000, /* ts_ms */ - smp_processor_id(), /* cpu */ - (u64)"Thneed", /* my_string_field */ - 999); /* my_int_field */ + (u64)444, /* next_pid_field */ + (u64)(long)"clackers", /* next_comm_field */ + (u64)1000000, /* ts_ns */ + (u64)1000, /* ts_ms */ + (u64)smp_processor_id(),/* cpu */ + (u64)(long)"Thneed", /* my_string_field */ + (u64)999); /* my_int_field */ return ret; } -- cgit v1.2.3-59-g8ed1b From 1d9d4c90194a8c3b2f7da9f4bf3f8ba2ed810656 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 14 Feb 2020 16:56:39 -0600 Subject: tracing: Make synth_event trace functions endian-correct synth_event_trace(), synth_event_trace_array() and __synth_event_add_val() write directly into the trace buffer and need to take endianness into account, like trace_event_raw_event_synth() does. Link: http://lkml.kernel.org/r/2011354355e405af9c9d28abba430d1f5ff7771a.1581720155.git.zanussi@kernel.org Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 62 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 65b54d6a1422..6a380fb83864 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1891,7 +1891,25 @@ int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...) strscpy(str_field, str_val, STR_VAR_LEN_MAX); n_u64 += STR_VAR_LEN_MAX / sizeof(u64); } else { - state.entry->fields[n_u64] = val; + struct synth_field *field = state.event->fields[i]; + + switch (field->size) { + case 1: + *(u8 *)&state.entry->fields[n_u64] = (u8)val; + break; + + case 2: + *(u16 *)&state.entry->fields[n_u64] = (u16)val; + break; + + case 4: + *(u32 *)&state.entry->fields[n_u64] = (u32)val; + break; + + default: + state.entry->fields[n_u64] = val; + break; + } n_u64++; } } @@ -1943,7 +1961,26 @@ int synth_event_trace_array(struct trace_event_file *file, u64 *vals, strscpy(str_field, str_val, STR_VAR_LEN_MAX); n_u64 += STR_VAR_LEN_MAX / sizeof(u64); } else { - state.entry->fields[n_u64] = vals[i]; + struct synth_field *field = state.event->fields[i]; + u64 val = vals[i]; + + switch (field->size) { + case 1: + *(u8 *)&state.entry->fields[n_u64] = (u8)val; + break; + + case 2: + *(u16 *)&state.entry->fields[n_u64] = (u16)val; + break; + + case 4: + *(u32 *)&state.entry->fields[n_u64] = (u32)val; + break; + + default: + state.entry->fields[n_u64] = val; + break; + } n_u64++; } } @@ -2062,8 +2099,25 @@ static int __synth_event_add_val(const char *field_name, u64 val, str_field = (char *)&entry->fields[field->offset]; strscpy(str_field, str_val, STR_VAR_LEN_MAX); - } else - entry->fields[field->offset] = val; + } else { + switch (field->size) { + case 1: + *(u8 *)&trace_state->entry->fields[field->offset] = (u8)val; + break; + + case 2: + *(u16 *)&trace_state->entry->fields[field->offset] = (u16)val; + break; + + case 4: + *(u32 *)&trace_state->entry->fields[field->offset] = (u32)val; + break; + + default: + trace_state->entry->fields[field->offset] = val; + break; + } + } out: return ret; } -- cgit v1.2.3-59-g8ed1b From 3843083772dc2afde790a6d7160658b00a808da1 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 14 Feb 2020 16:56:40 -0600 Subject: tracing: Check that number of vals matches number of synth event fields Commit 7276531d4036('tracing: Consolidate trace() functions') inadvertently dropped the synth_event_trace() and synth_event_trace_array() checks that verify the number of values passed in matches the number of fields in the synthetic event being traced, so add them back. Link: http://lkml.kernel.org/r/32819cac708714693669e0dfe10fe9d935e94a16.1581720155.git.zanussi@kernel.org Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 6a380fb83864..45622194a34d 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1878,6 +1878,11 @@ int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...) return ret; } + if (n_vals != state.event->n_fields) { + ret = -EINVAL; + goto out; + } + va_start(args, n_vals); for (i = 0, n_u64 = 0; i < state.event->n_fields; i++) { u64 val; @@ -1914,7 +1919,7 @@ int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...) } } va_end(args); - +out: __synth_event_trace_end(&state); return ret; @@ -1953,6 +1958,11 @@ int synth_event_trace_array(struct trace_event_file *file, u64 *vals, return ret; } + if (n_vals != state.event->n_fields) { + ret = -EINVAL; + goto out; + } + for (i = 0, n_u64 = 0; i < state.event->n_fields; i++) { if (state.event->fields[i]->is_string) { char *str_val = (char *)(long)vals[i]; @@ -1984,7 +1994,7 @@ int synth_event_trace_array(struct trace_event_file *file, u64 *vals, n_u64++; } } - +out: __synth_event_trace_end(&state); return ret; -- cgit v1.2.3-59-g8ed1b From 784bd0847eda032ed2f3522f87250655a18c0190 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 14 Feb 2020 16:56:41 -0600 Subject: tracing: Fix number printing bug in print_synth_event() Fix a varargs-related bug in print_synth_event() which resulted in strange output and oopses on 32-bit x86 systems. The problem is that trace_seq_printf() expects the varargs to match the format string, but print_synth_event() was always passing u64 values regardless. This results in unspecified behavior when unpacking with va_arg() in trace_seq_printf(). Add a function that takes the size into account when calling trace_seq_printf(). Before: modprobe-1731 [003] .... 919.039758: gen_synth_test: next_pid_field=777(null)next_comm_field=hula hoops ts_ns=1000000 ts_ms=1000 cpu=3(null)my_string_field=thneed my_int_field=598(null) After: insmod-1136 [001] .... 36.634590: gen_synth_test: next_pid_field=777 next_comm_field=hula hoops ts_ns=1000000 ts_ms=1000 cpu=1 my_string_field=thneed my_int_field=598 Link: http://lkml.kernel.org/r/a9b59eb515dbbd7d4abe53b347dccf7a8e285657.1581720155.git.zanussi@kernel.org Reported-by: Steven Rostedt (VMware) Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 45622194a34d..f068d55bd37f 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -820,6 +820,29 @@ static const char *synth_field_fmt(char *type) return fmt; } +static void print_synth_event_num_val(struct trace_seq *s, + char *print_fmt, char *name, + int size, u64 val, char *space) +{ + switch (size) { + case 1: + trace_seq_printf(s, print_fmt, name, (u8)val, space); + break; + + case 2: + trace_seq_printf(s, print_fmt, name, (u16)val, space); + break; + + case 4: + trace_seq_printf(s, print_fmt, name, (u32)val, space); + break; + + default: + trace_seq_printf(s, print_fmt, name, val, space); + break; + } +} + static enum print_line_t print_synth_event(struct trace_iterator *iter, int flags, struct trace_event *event) @@ -858,10 +881,13 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter, } else { struct trace_print_flags __flags[] = { __def_gfpflag_names, {-1, NULL} }; + char *space = (i == se->n_fields - 1 ? "" : " "); - trace_seq_printf(s, print_fmt, se->fields[i]->name, - entry->fields[n_u64], - i == se->n_fields - 1 ? "" : " "); + print_synth_event_num_val(s, print_fmt, + se->fields[i]->name, + se->fields[i]->size, + entry->fields[n_u64], + space); if (strcmp(se->fields[i]->type, "gfp_t") == 0) { trace_seq_puts(s, " ("); -- cgit v1.2.3-59-g8ed1b From 8645e56a4ad6dcbf504872db7f14a2f67db88ef2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 19 Feb 2020 18:30:26 +0100 Subject: xen: Enable interrupts when calling _cond_resched() xen_maybe_preempt_hcall() is called from the exception entry point xen_do_hypervisor_callback with interrupts disabled. _cond_resched() evades the might_sleep() check in cond_resched() which would have caught that and schedule_debug() unfortunately lacks a check for irqs_disabled(). Enable interrupts around the call and use cond_resched() to catch future issues. Fixes: fdfd811ddde3 ("x86/xen: allow privcmd hypercalls to be preempted") Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/878skypjrh.fsf@nanos.tec.linutronix.de Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- drivers/xen/preempt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/xen/preempt.c b/drivers/xen/preempt.c index 8b9919c26095..456a164364a2 100644 --- a/drivers/xen/preempt.c +++ b/drivers/xen/preempt.c @@ -33,7 +33,9 @@ asmlinkage __visible void xen_maybe_preempt_hcall(void) * cpu. */ __this_cpu_write(xen_in_preemptible_hcall, false); - _cond_resched(); + local_irq_enable(); + cond_resched(); + local_irq_disable(); __this_cpu_write(xen_in_preemptible_hcall, true); } } -- cgit v1.2.3-59-g8ed1b From 3c18a9be7c9d4f53239795282c5d927f73f534b3 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 20 Feb 2020 16:29:50 -0500 Subject: tracing: Have synthetic event test use raw_smp_processor_id() The test code that tests synthetic event creation pushes in as one of its test fields the current CPU using "smp_processor_id()". As this is just something to see if the value is correctly passed in, and the actual CPU used does not matter, use raw_smp_processor_id(), otherwise with debug preemption enabled, a warning happens as the smp_processor_id() is called without preemption enabled. Link: http://lkml.kernel.org/r/20200220162950.35162579@gandalf.local.home Reviewed-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/synth_event_gen_test.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/trace/synth_event_gen_test.c b/kernel/trace/synth_event_gen_test.c index 6866280a9b10..7d56d621ffea 100644 --- a/kernel/trace/synth_event_gen_test.c +++ b/kernel/trace/synth_event_gen_test.c @@ -114,7 +114,7 @@ static int __init test_gen_synth_cmd(void) vals[1] = (u64)(long)"hula hoops"; /* next_comm_field */ vals[2] = 1000000; /* ts_ns */ vals[3] = 1000; /* ts_ms */ - vals[4] = smp_processor_id(); /* cpu */ + vals[4] = raw_smp_processor_id(); /* cpu */ vals[5] = (u64)(long)"thneed"; /* my_string_field */ vals[6] = 598; /* my_int_field */ @@ -221,7 +221,7 @@ static int __init test_empty_synth_event(void) vals[1] = (u64)(long)"tiddlywinks"; /* next_comm_field */ vals[2] = 1000000; /* ts_ns */ vals[3] = 1000; /* ts_ms */ - vals[4] = smp_processor_id(); /* cpu */ + vals[4] = raw_smp_processor_id(); /* cpu */ vals[5] = (u64)(long)"thneed_2.0"; /* my_string_field */ vals[6] = 399; /* my_int_field */ @@ -293,7 +293,7 @@ static int __init test_create_synth_event(void) vals[1] = (u64)(long)"tiddlywinks"; /* next_comm_field */ vals[2] = 1000000; /* ts_ns */ vals[3] = 1000; /* ts_ms */ - vals[4] = smp_processor_id(); /* cpu */ + vals[4] = raw_smp_processor_id(); /* cpu */ vals[5] = (u64)(long)"thneed"; /* my_string_field */ vals[6] = 398; /* my_int_field */ @@ -345,7 +345,7 @@ static int __init test_add_next_synth_val(void) goto out; /* cpu */ - ret = synth_event_add_next_val(smp_processor_id(), &trace_state); + ret = synth_event_add_next_val(raw_smp_processor_id(), &trace_state); if (ret) goto out; @@ -388,7 +388,7 @@ static int __init test_add_synth_val(void) if (ret) goto out; - ret = synth_event_add_val("cpu", smp_processor_id(), &trace_state); + ret = synth_event_add_val("cpu", raw_smp_processor_id(), &trace_state); if (ret) goto out; @@ -427,7 +427,7 @@ static int __init test_trace_synth_event(void) (u64)(long)"clackers", /* next_comm_field */ (u64)1000000, /* ts_ns */ (u64)1000, /* ts_ms */ - (u64)smp_processor_id(),/* cpu */ + (u64)raw_smp_processor_id(), /* cpu */ (u64)(long)"Thneed", /* my_string_field */ (u64)999); /* my_int_field */ return ret; -- cgit v1.2.3-59-g8ed1b From 78041c0c9e935d9ce4086feeff6c569ed88ddfd4 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 20 Feb 2020 15:38:01 -0500 Subject: tracing: Disable trace_printk() on post poned tests The tracing seftests checks various aspects of the tracing infrastructure, and one is filtering. If trace_printk() is active during a self test, it can cause the filtering to fail, which will disable that part of the trace. To keep the selftests from failing because of trace_printk() calls, trace_printk() checks the variable tracing_selftest_running, and if set, it does not write to the tracing buffer. As some tracers were registered earlier in boot, the selftest they triggered would fail because not all the infrastructure was set up for the full selftest. Thus, some of the tests were post poned to when their infrastructure was ready (namely file system code). The postpone code did not set the tracing_seftest_running variable, and could fail if a trace_printk() was added and executed during their run. Cc: stable@vger.kernel.org Fixes: 9afecfbb95198 ("tracing: Postpone tracer start-up tests till the system is more robust") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 183b031a3828..a89c562ffb8f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1837,6 +1837,7 @@ static __init int init_trace_selftests(void) pr_info("Running postponed tracer tests:\n"); + tracing_selftest_running = true; list_for_each_entry_safe(p, n, &postponed_selftests, list) { /* This loop can take minutes when sanitizers are enabled, so * lets make sure we allow RCU processing. @@ -1859,6 +1860,7 @@ static __init int init_trace_selftests(void) list_del(&p->list); kfree(p); } + tracing_selftest_running = false; out: mutex_unlock(&trace_types_lock); -- cgit v1.2.3-59-g8ed1b From 08d9e686426f7557d3f1cda219ff907397c89d53 Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Sun, 16 Feb 2020 19:28:31 +0800 Subject: bootconfig: Mark boot_config_checksum() static In fact, this function is only used in this file, so mark it with 'static'. Link: http://lkml.kernel.org/r/1581852511-14163-1-git-send-email-hqjagain@gmail.com Acked-by: Masami Hiramatsu Signed-off-by: Qiujun Huang Signed-off-by: Steven Rostedt (VMware) --- init/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/main.c b/init/main.c index 59248717c925..48c87f47a444 100644 --- a/init/main.c +++ b/init/main.c @@ -335,7 +335,7 @@ static char * __init xbc_make_cmdline(const char *key) return new_cmdline; } -u32 boot_config_checksum(unsigned char *p, u32 size) +static u32 boot_config_checksum(unsigned char *p, u32 size) { u32 ret = 0; -- cgit v1.2.3-59-g8ed1b From 7ab215f22d04067094de8c81c20ba4c565ff8dd4 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 17 Feb 2020 18:52:39 +0900 Subject: tracing: Clear trace_state when starting trace Clear trace_state data structure when starting trace in __synth_event_trace_start() internal function. Currently trace_state is initialized only in the synth_event_trace_start() API, but the trace_state in synth_event_trace() and synth_event_trace_array() are on the stack without initialization. This means those APIs will see wrong parameters and wil skip closing process in __synth_event_trace_end() because trace_state->disabled may be !0. Link: http://lkml.kernel.org/r/158193315899.8868.1781259176894639952.stgit@devnote2 Reviewed-by: Tom Zanussi Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index f068d55bd37f..9d87aa1f0b79 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1824,6 +1824,8 @@ __synth_event_trace_start(struct trace_event_file *file, int entry_size, fields_size = 0; int ret = 0; + memset(trace_state, '\0', sizeof(*trace_state)); + /* * Normal event tracing doesn't get called at all unless the * ENABLED bit is set (which attaches the probe thus allowing @@ -2063,8 +2065,6 @@ int synth_event_trace_start(struct trace_event_file *file, if (!trace_state) return -EINVAL; - memset(trace_state, '\0', sizeof(*trace_state)); - ret = __synth_event_trace_start(file, trace_state); if (ret == -ENOENT) ret = 0; /* just disabled, not really an error */ -- cgit v1.2.3-59-g8ed1b From d8a953ddde5ec30a36810d0a892c3949b50849e9 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Feb 2020 21:18:33 +0900 Subject: bootconfig: Set CONFIG_BOOT_CONFIG=n by default Set CONFIG_BOOT_CONFIG=n by default. This also warns user if CONFIG_BOOT_CONFIG=n but "bootconfig" is given in the kernel command line. Link: http://lkml.kernel.org/r/158220111291.26565.9036889083940367969.stgit@devnote2 Suggested-by: Steven Rostedt Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- init/Kconfig | 1 - init/main.c | 8 ++++++++ kernel/trace/Kconfig | 3 ++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 4a672c6629d0..f586878410d2 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1218,7 +1218,6 @@ endif config BOOT_CONFIG bool "Boot config support" depends on BLK_DEV_INITRD - default y help Extra boot config allows system admin to pass a config file as complemental extension of kernel cmdline when booting. diff --git a/init/main.c b/init/main.c index 48c87f47a444..d96cc5f65022 100644 --- a/init/main.c +++ b/init/main.c @@ -418,6 +418,14 @@ not_found: } #else #define setup_boot_config(cmdline) do { } while (0) + +static int __init warn_bootconfig(char *str) +{ + pr_warn("WARNING: 'bootconfig' found on the kernel command line but CONFIG_BOOTCONFIG is not set.\n"); + return 0; +} +early_param("bootconfig", warn_bootconfig); + #endif /* Change NUL term back to "=", to make "param" the whole string. */ diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 91e885194dbc..795c3e02d3f1 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -143,7 +143,8 @@ if FTRACE config BOOTTIME_TRACING bool "Boot-time Tracing support" - depends on BOOT_CONFIG && TRACING + depends on TRACING + select BOOT_CONFIG default y help Enable developer to setup ftrace subsystem via supplemental -- cgit v1.2.3-59-g8ed1b From 85c46b78da58398be1c5166f55063c0512decd39 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Feb 2020 21:18:42 +0900 Subject: bootconfig: Add bootconfig magic word for indicating bootconfig explicitly Add bootconfig magic word to the end of bootconfig on initrd image for indicating explicitly the bootconfig is there. Also tools/bootconfig treats wrong size or wrong checksum or parse error as an error, because if there is a bootconfig magic word, there must be a bootconfig. The bootconfig magic word is "#BOOTCONFIG\n", 12 bytes word. Thus the block image of the initrd file with bootconfig is as follows. [Initrd][bootconfig][size][csum][#BOOTCONFIG\n] Link: http://lkml.kernel.org/r/158220112263.26565.3944814205960612841.stgit@devnote2 Suggested-by: Steven Rostedt Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- Documentation/admin-guide/bootconfig.rst | 10 +++++--- include/linux/bootconfig.h | 3 +++ init/Kconfig | 2 +- init/main.c | 6 ++++- tools/bootconfig/main.c | 43 ++++++++++++++++++++++++-------- tools/bootconfig/test-bootconfig.sh | 2 +- 6 files changed, 49 insertions(+), 17 deletions(-) diff --git a/Documentation/admin-guide/bootconfig.rst b/Documentation/admin-guide/bootconfig.rst index b342a6796392..5e7609936507 100644 --- a/Documentation/admin-guide/bootconfig.rst +++ b/Documentation/admin-guide/bootconfig.rst @@ -102,9 +102,13 @@ Boot Kernel With a Boot Config ============================== Since the boot configuration file is loaded with initrd, it will be added -to the end of the initrd (initramfs) image file. The Linux kernel decodes -the last part of the initrd image in memory to get the boot configuration -data. +to the end of the initrd (initramfs) image file with size, checksum and +12-byte magic word as below. + +[initrd][bootconfig][size(u32)][checksum(u32)][#BOOTCONFIG\n] + +The Linux kernel decodes the last part of the initrd image in memory to +get the boot configuration data. Because of this "piggyback" method, there is no need to change or update the boot loader and the kernel image itself. diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index 7e18c939663e..d11e183fcb54 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -10,6 +10,9 @@ #include #include +#define BOOTCONFIG_MAGIC "#BOOTCONFIG\n" +#define BOOTCONFIG_MAGIC_LEN 12 + /* XBC tree node */ struct xbc_node { u16 next; diff --git a/init/Kconfig b/init/Kconfig index f586878410d2..a84e7aa89a29 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1222,7 +1222,7 @@ config BOOT_CONFIG Extra boot config allows system admin to pass a config file as complemental extension of kernel cmdline when booting. The boot config file must be attached at the end of initramfs - with checksum and size. + with checksum, size and magic word. See for details. If unsure, say Y. diff --git a/init/main.c b/init/main.c index d96cc5f65022..2fe8dec93e68 100644 --- a/init/main.c +++ b/init/main.c @@ -374,7 +374,11 @@ static void __init setup_boot_config(const char *cmdline) if (!initrd_end) goto not_found; - hdr = (u32 *)(initrd_end - 8); + data = (char *)initrd_end - BOOTCONFIG_MAGIC_LEN; + if (memcmp(data, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN)) + goto not_found; + + hdr = (u32 *)(data - 8); size = hdr[0]; csum = hdr[1]; diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index e18eeb070562..742271f019a9 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -131,15 +131,26 @@ int load_xbc_from_initrd(int fd, char **buf) struct stat stat; int ret; u32 size = 0, csum = 0, rcsum; + char magic[BOOTCONFIG_MAGIC_LEN]; ret = fstat(fd, &stat); if (ret < 0) return -errno; - if (stat.st_size < 8) + if (stat.st_size < 8 + BOOTCONFIG_MAGIC_LEN) return 0; - if (lseek(fd, -8, SEEK_END) < 0) { + if (lseek(fd, -BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0) { + pr_err("Failed to lseek: %d\n", -errno); + return -errno; + } + if (read(fd, magic, BOOTCONFIG_MAGIC_LEN) < 0) + return -errno; + /* Check the bootconfig magic bytes */ + if (memcmp(magic, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN) != 0) + return 0; + + if (lseek(fd, -(8 + BOOTCONFIG_MAGIC_LEN), SEEK_END) < 0) { pr_err("Failed to lseek: %d\n", -errno); return -errno; } @@ -150,11 +161,14 @@ int load_xbc_from_initrd(int fd, char **buf) if (read(fd, &csum, sizeof(u32)) < 0) return -errno; - /* Wrong size, maybe no boot config here */ - if (stat.st_size < size + 8) - return 0; + /* Wrong size error */ + if (stat.st_size < size + 8 + BOOTCONFIG_MAGIC_LEN) { + pr_err("bootconfig size is too big\n"); + return -E2BIG; + } - if (lseek(fd, stat.st_size - 8 - size, SEEK_SET) < 0) { + if (lseek(fd, stat.st_size - (size + 8 + BOOTCONFIG_MAGIC_LEN), + SEEK_SET) < 0) { pr_err("Failed to lseek: %d\n", -errno); return -errno; } @@ -163,17 +177,17 @@ int load_xbc_from_initrd(int fd, char **buf) if (ret < 0) return ret; - /* Wrong Checksum, maybe no boot config here */ + /* Wrong Checksum */ rcsum = checksum((unsigned char *)*buf, size); if (csum != rcsum) { pr_err("checksum error: %d != %d\n", csum, rcsum); - return 0; + return -EINVAL; } ret = xbc_init(*buf); - /* Wrong data, maybe no boot config here */ + /* Wrong data */ if (ret < 0) - return 0; + return ret; return size; } @@ -226,7 +240,8 @@ int delete_xbc(const char *path) } else if (size > 0) { ret = fstat(fd, &stat); if (!ret) - ret = ftruncate(fd, stat.st_size - size - 8); + ret = ftruncate(fd, stat.st_size + - size - 8 - BOOTCONFIG_MAGIC_LEN); if (ret) ret = -errno; } /* Ignore if there is no boot config in initrd */ @@ -295,6 +310,12 @@ int apply_xbc(const char *path, const char *xbc_path) pr_err("Failed to apply a boot config: %d\n", ret); return ret; } + /* Write a magic word of the bootconfig */ + ret = write(fd, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN); + if (ret < 0) { + pr_err("Failed to apply a boot config magic: %d\n", ret); + return ret; + } close(fd); free(data); diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh index 1de06de328e2..adafb7c50940 100755 --- a/tools/bootconfig/test-bootconfig.sh +++ b/tools/bootconfig/test-bootconfig.sh @@ -49,7 +49,7 @@ xpass $BOOTCONF -a $TEMPCONF $INITRD new_size=$(stat -c %s $INITRD) echo "File size check" -xpass test $new_size -eq $(expr $bconf_size + $initrd_size + 9) +xpass test $new_size -eq $(expr $bconf_size + $initrd_size + 9 + 12) echo "Apply command repeat test" xpass $BOOTCONF -a $TEMPCONF $INITRD -- cgit v1.2.3-59-g8ed1b From 15e95037b45f24f9ab6d4f0bd101d4df0be24c1d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Feb 2020 21:18:52 +0900 Subject: tools/bootconfig: Remove unneeded error message silencer Remove error message silent knob, we don't need it anymore because we can check if there is a bootconfig by checking the magic word. If there is a magic word, but failed to load a bootconfig from initrd, there is a real problem. Link: http://lkml.kernel.org/r/158220113256.26565.14264598654427773104.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/include/linux/printk.h | 5 +---- tools/bootconfig/main.c | 8 -------- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/tools/bootconfig/include/linux/printk.h b/tools/bootconfig/include/linux/printk.h index e978a63d3222..036e667596eb 100644 --- a/tools/bootconfig/include/linux/printk.h +++ b/tools/bootconfig/include/linux/printk.h @@ -4,10 +4,7 @@ #include -/* controllable printf */ -extern int pr_output; -#define printk(fmt, ...) \ - (pr_output ? printf(fmt, ##__VA_ARGS__) : 0) +#define printk(fmt, ...) printf(fmt, ##__VA_ARGS__) #define pr_err printk #define pr_warn printk diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 742271f019a9..a9b97814d1a9 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -14,8 +14,6 @@ #include #include -int pr_output = 1; - static int xbc_show_array(struct xbc_node *node) { const char *val; @@ -227,13 +225,7 @@ int delete_xbc(const char *path) return -errno; } - /* - * Suppress error messages in xbc_init() because it can be just a - * data which concidentally matches the size and checksum footer. - */ - pr_output = 0; size = load_xbc_from_initrd(fd, &buf); - pr_output = 1; if (size < 0) { ret = size; pr_err("Failed to load a boot config from initrd: %d\n", ret); -- cgit v1.2.3-59-g8ed1b From a24d286f36104ed45108a5a36f3868938434772f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Feb 2020 21:19:12 +0900 Subject: bootconfig: Reject subkey and value on same parent key Reject if a value node is mixed with subkey node on same parent key node. A value node can not co-exist with subkey node under some key node, e.g. key = value key.subkey = another-value This is not be allowed because bootconfig API is not designed to handle such case. Link: http://lkml.kernel.org/r/158220115232.26565.7792340045009731803.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- Documentation/admin-guide/bootconfig.rst | 7 +++++++ lib/bootconfig.c | 16 ++++++++++++---- tools/bootconfig/samples/bad-mixed-kv1.bconf | 3 +++ tools/bootconfig/samples/bad-mixed-kv2.bconf | 3 +++ 4 files changed, 25 insertions(+), 4 deletions(-) create mode 100644 tools/bootconfig/samples/bad-mixed-kv1.bconf create mode 100644 tools/bootconfig/samples/bad-mixed-kv2.bconf diff --git a/Documentation/admin-guide/bootconfig.rst b/Documentation/admin-guide/bootconfig.rst index 5e7609936507..dfeffa73dca3 100644 --- a/Documentation/admin-guide/bootconfig.rst +++ b/Documentation/admin-guide/bootconfig.rst @@ -62,6 +62,13 @@ Or more shorter, written as following:: In both styles, same key words are automatically merged when parsing it at boot time. So you can append similar trees or key-values. +Note that a sub-key and a value can not co-exist under a parent key. +For example, following config is NOT allowed.:: + + foo = value1 + foo.bar = value2 # !ERROR! subkey "bar" and value "value1" can NOT co-exist + + Comments -------- diff --git a/lib/bootconfig.c b/lib/bootconfig.c index 3ea601a2eba5..54ac623ca781 100644 --- a/lib/bootconfig.c +++ b/lib/bootconfig.c @@ -533,7 +533,7 @@ struct xbc_node *find_match_node(struct xbc_node *node, char *k) static int __init __xbc_add_key(char *k) { - struct xbc_node *node; + struct xbc_node *node, *child; if (!xbc_valid_keyword(k)) return xbc_parse_error("Invalid keyword", k); @@ -543,8 +543,12 @@ static int __init __xbc_add_key(char *k) if (!last_parent) /* the first level */ node = find_match_node(xbc_nodes, k); - else - node = find_match_node(xbc_node_get_child(last_parent), k); + else { + child = xbc_node_get_child(last_parent); + if (child && xbc_node_is_value(child)) + return xbc_parse_error("Subkey is mixed with value", k); + node = find_match_node(child, k); + } if (node) last_parent = node; @@ -577,7 +581,7 @@ static int __init __xbc_parse_keys(char *k) static int __init xbc_parse_kv(char **k, char *v) { struct xbc_node *prev_parent = last_parent; - struct xbc_node *node; + struct xbc_node *node, *child; char *next; int c, ret; @@ -585,6 +589,10 @@ static int __init xbc_parse_kv(char **k, char *v) if (ret) return ret; + child = xbc_node_get_child(last_parent); + if (child && xbc_node_is_key(child)) + return xbc_parse_error("Value is mixed with subkey", v); + c = __xbc_parse_value(&v, &next); if (c < 0) return c; diff --git a/tools/bootconfig/samples/bad-mixed-kv1.bconf b/tools/bootconfig/samples/bad-mixed-kv1.bconf new file mode 100644 index 000000000000..1761547dd05c --- /dev/null +++ b/tools/bootconfig/samples/bad-mixed-kv1.bconf @@ -0,0 +1,3 @@ +# value -> subkey pattern +key = value +key.subkey = another-value diff --git a/tools/bootconfig/samples/bad-mixed-kv2.bconf b/tools/bootconfig/samples/bad-mixed-kv2.bconf new file mode 100644 index 000000000000..6b32e0c3878c --- /dev/null +++ b/tools/bootconfig/samples/bad-mixed-kv2.bconf @@ -0,0 +1,3 @@ +# subkey -> value pattern +key.subkey = value +key = another-value -- cgit v1.2.3-59-g8ed1b From 88b913718db94697497028b85acbec8b180a4333 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 20 Feb 2020 21:19:42 +0900 Subject: bootconfig: Print array as multiple commands for legacy command line Print arraied values as multiple same options for legacy kernel command line. With this rule, if the "kernel.*" and "init.*" array entries in bootconfig are printed out as multiple same options, e.g. kernel { console = "ttyS0,115200" console += "tty0" } will be correctly converted to console="ttyS0,115200" console="tty0" in the kernel command line. Link: http://lkml.kernel.org/r/158220118213.26565.8163300497009463916.stgit@devnote2 Reported-by: Borislav Petkov Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- init/main.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/init/main.c b/init/main.c index 2fe8dec93e68..c9b1ee6bbb8d 100644 --- a/init/main.c +++ b/init/main.c @@ -268,7 +268,6 @@ static int __init xbc_snprint_cmdline(char *buf, size_t size, { struct xbc_node *knode, *vnode; char *end = buf + size; - char c = '\"'; const char *val; int ret; @@ -279,25 +278,20 @@ static int __init xbc_snprint_cmdline(char *buf, size_t size, return ret; vnode = xbc_node_get_child(knode); - ret = snprintf(buf, rest(buf, end), "%s%c", xbc_namebuf, - vnode ? '=' : ' '); - if (ret < 0) - return ret; - buf += ret; - if (!vnode) + if (!vnode) { + ret = snprintf(buf, rest(buf, end), "%s ", xbc_namebuf); + if (ret < 0) + return ret; + buf += ret; continue; - - c = '\"'; + } xbc_array_for_each_value(vnode, val) { - ret = snprintf(buf, rest(buf, end), "%c%s", c, val); + ret = snprintf(buf, rest(buf, end), "%s=\"%s\" ", + xbc_namebuf, val); if (ret < 0) return ret; buf += ret; - c = ','; } - if (rest(buf, end) > 2) - strcpy(buf, "\" "); - buf += 2; } return buf - (end - size); -- cgit v1.2.3-59-g8ed1b From ac2fcfa9fd26db67d7000677c05629c34cc94564 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 19 Feb 2020 15:15:51 +0100 Subject: net: macb: Properly handle phylink on at91rm9200 at91ether_init was handling the phy mode and speed but since the switch to phylink, the NCFGR register got overwritten by macb_mac_config(). The issue is that the RM9200_RMII bit and the MACB_CLK_DIV32 field are cleared but never restored as they conflict with the PAE, GBE and PCSSEL bits. Add new capability to differentiate between EMAC and the other versions of the IP and use it to set and avoid clearing the relevant bits. Also, this fixes a NULL pointer dereference in macb_mac_link_up as the EMAC doesn't use any rings/bufffers/queues. Fixes: 7897b071ac3b ("net: macb: convert to phylink") Signed-off-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.h | 1 + drivers/net/ethernet/cadence/macb_main.c | 60 +++++++++++++++++--------------- 2 files changed, 33 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index dbf7070fcdba..a3f0f27fc79a 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -652,6 +652,7 @@ #define MACB_CAPS_GEM_HAS_PTP 0x00000040 #define MACB_CAPS_BD_RD_PREFETCH 0x00000080 #define MACB_CAPS_NEEDS_RSTONUBR 0x00000100 +#define MACB_CAPS_MACB_IS_EMAC 0x08000000 #define MACB_CAPS_FIFO_MODE 0x10000000 #define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000 #define MACB_CAPS_SG_DISABLED 0x40000000 diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index def94e91883a..2c28da1737fe 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -572,8 +572,21 @@ static void macb_mac_config(struct phylink_config *config, unsigned int mode, old_ctrl = ctrl = macb_or_gem_readl(bp, NCFGR); /* Clear all the bits we might set later */ - ctrl &= ~(GEM_BIT(GBE) | MACB_BIT(SPD) | MACB_BIT(FD) | MACB_BIT(PAE) | - GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL)); + ctrl &= ~(MACB_BIT(SPD) | MACB_BIT(FD) | MACB_BIT(PAE)); + + if (bp->caps & MACB_CAPS_MACB_IS_EMAC) { + if (state->interface == PHY_INTERFACE_MODE_RMII) + ctrl |= MACB_BIT(RM9200_RMII); + } else { + ctrl &= ~(GEM_BIT(GBE) | GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL)); + + /* We do not support MLO_PAUSE_RX yet */ + if (state->pause & MLO_PAUSE_TX) + ctrl |= MACB_BIT(PAE); + + if (state->interface == PHY_INTERFACE_MODE_SGMII) + ctrl |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL); + } if (state->speed == SPEED_1000) ctrl |= GEM_BIT(GBE); @@ -583,13 +596,6 @@ static void macb_mac_config(struct phylink_config *config, unsigned int mode, if (state->duplex) ctrl |= MACB_BIT(FD); - /* We do not support MLO_PAUSE_RX yet */ - if (state->pause & MLO_PAUSE_TX) - ctrl |= MACB_BIT(PAE); - - if (state->interface == PHY_INTERFACE_MODE_SGMII) - ctrl |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL); - /* Apply the new configuration, if any */ if (old_ctrl ^ ctrl) macb_or_gem_writel(bp, NCFGR, ctrl); @@ -608,9 +614,10 @@ static void macb_mac_link_down(struct phylink_config *config, unsigned int mode, unsigned int q; u32 ctrl; - for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) - queue_writel(queue, IDR, - bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP)); + if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC)) + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) + queue_writel(queue, IDR, + bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP)); /* Disable Rx and Tx */ ctrl = macb_readl(bp, NCR) & ~(MACB_BIT(RE) | MACB_BIT(TE)); @@ -627,17 +634,19 @@ static void macb_mac_link_up(struct phylink_config *config, unsigned int mode, struct macb_queue *queue; unsigned int q; - macb_set_tx_clk(bp->tx_clk, bp->speed, ndev); + if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC)) { + macb_set_tx_clk(bp->tx_clk, bp->speed, ndev); - /* Initialize rings & buffers as clearing MACB_BIT(TE) in link down - * cleared the pipeline and control registers. - */ - bp->macbgem_ops.mog_init_rings(bp); - macb_init_buffers(bp); + /* Initialize rings & buffers as clearing MACB_BIT(TE) in link down + * cleared the pipeline and control registers. + */ + bp->macbgem_ops.mog_init_rings(bp); + macb_init_buffers(bp); - for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) - queue_writel(queue, IER, - bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP)); + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) + queue_writel(queue, IER, + bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP)); + } /* Enable Rx and Tx */ macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(RE) | MACB_BIT(TE)); @@ -4041,7 +4050,6 @@ static int at91ether_init(struct platform_device *pdev) struct net_device *dev = platform_get_drvdata(pdev); struct macb *bp = netdev_priv(dev); int err; - u32 reg; bp->queues[0].bp = bp; @@ -4055,11 +4063,7 @@ static int at91ether_init(struct platform_device *pdev) macb_writel(bp, NCR, 0); - reg = MACB_BF(CLK, MACB_CLK_DIV32) | MACB_BIT(BIG); - if (bp->phy_interface == PHY_INTERFACE_MODE_RMII) - reg |= MACB_BIT(RM9200_RMII); - - macb_writel(bp, NCFGR, reg); + macb_writel(bp, NCFGR, MACB_BF(CLK, MACB_CLK_DIV32) | MACB_BIT(BIG)); return 0; } @@ -4218,7 +4222,7 @@ static const struct macb_config sama5d4_config = { }; static const struct macb_config emac_config = { - .caps = MACB_CAPS_NEEDS_RSTONUBR, + .caps = MACB_CAPS_NEEDS_RSTONUBR | MACB_CAPS_MACB_IS_EMAC, .clk_init = at91ether_clk_init, .init = at91ether_init, }; -- cgit v1.2.3-59-g8ed1b From 98bda63e20daab95bdc084ce00459a4f622a0505 Mon Sep 17 00:00:00 2001 From: Roman Kiryanov Date: Wed, 19 Feb 2020 13:40:06 -0800 Subject: net: disable BRIDGE_NETFILTER by default The description says 'If unsure, say N.' but the module is built as M by default (once the dependencies are satisfied). When the module is selected (Y or M), it enables NETFILTER_FAMILY_BRIDGE and SKB_EXTENSIONS which alter kernel internal structures. We (Android Studio Emulator) currently do not use this module and think this it is more consistent to have it disabled by default as opposite to disabling it explicitly to prevent enabling NETFILTER_FAMILY_BRIDGE and SKB_EXTENSIONS. Signed-off-by: Roman Kiryanov Acked-by: Florian Westphal Signed-off-by: David S. Miller --- net/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/net/Kconfig b/net/Kconfig index b0937a700f01..2eeb0e55f7c9 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -189,7 +189,6 @@ config BRIDGE_NETFILTER depends on NETFILTER_ADVANCED select NETFILTER_FAMILY_BRIDGE select SKB_EXTENSIONS - default m ---help--- Enabling this option will let arptables resp. iptables see bridged ARP resp. IP traffic. If you want a bridging firewall, you probably -- cgit v1.2.3-59-g8ed1b From 68b759a75d6257759d1e37ff13f2d0659baf1112 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Wed, 19 Feb 2020 14:59:42 -0800 Subject: ionic: fix fw_status read The fw_status field is only 8 bits, so fix the read. Also, we only want to look at the one status bit, to allow for future use of the other bits, and watch for a bad PCI read. Fixes: 97ca486592c0 ("ionic: add heartbeat check") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_dev.c | 11 +++++++---- drivers/net/ethernet/pensando/ionic/ionic_if.h | 1 + 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c index 87f82f36812f..46107de5e6c3 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c @@ -103,7 +103,7 @@ int ionic_heartbeat_check(struct ionic *ionic) { struct ionic_dev *idev = &ionic->idev; unsigned long hb_time; - u32 fw_status; + u8 fw_status; u32 hb; /* wait a little more than one second before testing again */ @@ -111,9 +111,12 @@ int ionic_heartbeat_check(struct ionic *ionic) if (time_before(hb_time, (idev->last_hb_time + ionic->watchdog_period))) return 0; - /* firmware is useful only if fw_status is non-zero */ - fw_status = ioread32(&idev->dev_info_regs->fw_status); - if (!fw_status) + /* firmware is useful only if the running bit is set and + * fw_status != 0xff (bad PCI read) + */ + fw_status = ioread8(&idev->dev_info_regs->fw_status); + if (fw_status == 0xff || + !(fw_status & IONIC_FW_STS_F_RUNNING)) return -ENXIO; /* early FW has no heartbeat, else FW will return non-zero */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h index ce07c2931a72..54547d53b0f2 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_if.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h @@ -2445,6 +2445,7 @@ union ionic_dev_info_regs { u8 version; u8 asic_type; u8 asic_rev; +#define IONIC_FW_STS_F_RUNNING 0x1 u8 fw_status; u32 fw_heartbeat; char fw_version[IONIC_DEVINFO_FWVERS_BUFLEN]; -- cgit v1.2.3-59-g8ed1b From 971617c3b761c876d686a2188220a33898c90e99 Mon Sep 17 00:00:00 2001 From: Tim Harvey Date: Wed, 19 Feb 2020 15:19:36 -0800 Subject: net: thunderx: workaround BGX TX Underflow issue While it is not yet understood why a TX underflow can easily occur for SGMII interfaces resulting in a TX wedge. It has been found that disabling/re-enabling the LMAC resolves the issue. Signed-off-by: Tim Harvey Reviewed-by: Robert Jones Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 62 +++++++++++++++++++++-- drivers/net/ethernet/cavium/thunder/thunder_bgx.h | 9 ++++ 2 files changed, 68 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 17a4110c2e49..8ff28ed04b7f 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -410,10 +410,19 @@ void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable) lmac = &bgx->lmac[lmacid]; cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG); - if (enable) + if (enable) { cfg |= CMR_PKT_RX_EN | CMR_PKT_TX_EN; - else + + /* enable TX FIFO Underflow interrupt */ + bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_INT_ENA_W1S, + GMI_TXX_INT_UNDFLW); + } else { cfg &= ~(CMR_PKT_RX_EN | CMR_PKT_TX_EN); + + /* Disable TX FIFO Underflow interrupt */ + bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_INT_ENA_W1C, + GMI_TXX_INT_UNDFLW); + } bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg); if (bgx->is_rgx) @@ -1535,6 +1544,48 @@ static int bgx_init_phy(struct bgx *bgx) return bgx_init_of_phy(bgx); } +static irqreturn_t bgx_intr_handler(int irq, void *data) +{ + struct bgx *bgx = (struct bgx *)data; + u64 status, val; + int lmac; + + for (lmac = 0; lmac < bgx->lmac_count; lmac++) { + status = bgx_reg_read(bgx, lmac, BGX_GMP_GMI_TXX_INT); + if (status & GMI_TXX_INT_UNDFLW) { + pci_err(bgx->pdev, "BGX%d lmac%d UNDFLW\n", + bgx->bgx_id, lmac); + val = bgx_reg_read(bgx, lmac, BGX_CMRX_CFG); + val &= ~CMR_EN; + bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val); + val |= CMR_EN; + bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val); + } + /* clear interrupts */ + bgx_reg_write(bgx, lmac, BGX_GMP_GMI_TXX_INT, status); + } + + return IRQ_HANDLED; +} + +static void bgx_register_intr(struct pci_dev *pdev) +{ + struct bgx *bgx = pci_get_drvdata(pdev); + int ret; + + ret = pci_alloc_irq_vectors(pdev, BGX_LMAC_VEC_OFFSET, + BGX_LMAC_VEC_OFFSET, PCI_IRQ_ALL_TYPES); + if (ret < 0) { + pci_err(pdev, "Req for #%d msix vectors failed\n", + BGX_LMAC_VEC_OFFSET); + return; + } + ret = pci_request_irq(pdev, GMPX_GMI_TX_INT, bgx_intr_handler, NULL, + bgx, "BGX%d", bgx->bgx_id); + if (ret) + pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); +} + static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { int err; @@ -1550,7 +1601,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, bgx); - err = pci_enable_device(pdev); + err = pcim_enable_device(pdev); if (err) { dev_err(dev, "Failed to enable PCI device\n"); pci_set_drvdata(pdev, NULL); @@ -1604,6 +1655,8 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) bgx_init_hw(bgx); + bgx_register_intr(pdev); + /* Enable all LMACs */ for (lmac = 0; lmac < bgx->lmac_count; lmac++) { err = bgx_lmac_enable(bgx, lmac); @@ -1620,6 +1673,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err_enable: bgx_vnic[bgx->bgx_id] = NULL; + pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); err_release_regions: pci_release_regions(pdev); err_disable_device: @@ -1637,6 +1691,8 @@ static void bgx_remove(struct pci_dev *pdev) for (lmac = 0; lmac < bgx->lmac_count; lmac++) bgx_lmac_disable(bgx, lmac); + pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); + bgx_vnic[bgx->bgx_id] = NULL; pci_release_regions(pdev); pci_disable_device(pdev); diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index 25888706bdcd..cdea49392185 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -180,6 +180,15 @@ #define BGX_GMP_GMI_TXX_BURST 0x38228 #define BGX_GMP_GMI_TXX_MIN_PKT 0x38240 #define BGX_GMP_GMI_TXX_SGMII_CTL 0x38300 +#define BGX_GMP_GMI_TXX_INT 0x38500 +#define BGX_GMP_GMI_TXX_INT_W1S 0x38508 +#define BGX_GMP_GMI_TXX_INT_ENA_W1C 0x38510 +#define BGX_GMP_GMI_TXX_INT_ENA_W1S 0x38518 +#define GMI_TXX_INT_PTP_LOST BIT_ULL(4) +#define GMI_TXX_INT_LATE_COL BIT_ULL(3) +#define GMI_TXX_INT_XSDEF BIT_ULL(2) +#define GMI_TXX_INT_XSCOL BIT_ULL(1) +#define GMI_TXX_INT_UNDFLW BIT_ULL(0) #define BGX_MSIX_VEC_0_29_ADDR 0x400000 /* +(0..29) << 4 */ #define BGX_MSIX_VEC_0_29_CTL 0x400008 -- cgit v1.2.3-59-g8ed1b From 3a20773beeeeadec41477a5ba872175b778ff752 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 20 Feb 2020 16:42:13 +0200 Subject: net: netlink: cap max groups which will be considered in netlink_bind() Since nl_groups is a u32 we can't bind more groups via ->bind (netlink_bind) call, but netlink has supported more groups via setsockopt() for a long time and thus nlk->ngroups could be over 32. Recently I added support for per-vlan notifications and increased the groups to 33 for NETLINK_ROUTE which exposed an old bug in the netlink_bind() code causing out-of-bounds access on archs where unsigned long is 32 bits via test_bit() on a local variable. Fix this by capping the maximum groups in netlink_bind() to BITS_PER_TYPE(u32), effectively capping them at 32 which is the minimum of allocated groups and the maximum groups which can be bound via netlink_bind(). CC: Christophe Leroy CC: Richard Guy Briggs Fixes: 4f520900522f ("netlink: have netlink per-protocol bind function return an error code.") Reported-by: Erhard F. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4e31721e7293..edf3e285e242 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1014,7 +1014,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (nlk->netlink_bind && groups) { int group; - for (group = 0; group < nlk->ngroups; group++) { + /* nl_groups is a u32, so cap the maximum groups we can bind */ + for (group = 0; group < BITS_PER_TYPE(u32); group++) { if (!test_bit(group, &groups)) continue; err = nlk->netlink_bind(net, group + 1); @@ -1033,7 +1034,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, netlink_insert(sk, nladdr->nl_pid) : netlink_autobind(sock); if (err) { - netlink_undo_bind(nlk->ngroups, groups, sk); + netlink_undo_bind(BITS_PER_TYPE(u32), groups, sk); goto unlock; } } -- cgit v1.2.3-59-g8ed1b From 5567ae4a8d569d996d0d88d0eceb76205e4c7ce5 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Thu, 20 Feb 2020 17:26:34 -0500 Subject: bnxt_en: Improve device shutdown method. Especially when bnxt_shutdown() is called during kexec, we need to disable MSIX and disable Bus Master to completely quiesce the device. Make these 2 calls unconditionally in the shutdown method. Fixes: c20dc142dd7b ("bnxt_en: Disable bus master during PCI shutdown and driver unload.") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 597e6fd5bfea..2ad007e5ee7f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11983,10 +11983,10 @@ static void bnxt_shutdown(struct pci_dev *pdev) dev_close(dev); bnxt_ulp_shutdown(bp); + bnxt_clear_int_mode(bp); + pci_disable_device(pdev); if (system_state == SYSTEM_POWER_OFF) { - bnxt_clear_int_mode(bp); - pci_disable_device(pdev); pci_wake_from_d3(pdev, bp->wol); pci_set_power_state(pdev, PCI_D3hot); } -- cgit v1.2.3-59-g8ed1b From 8743db4a9acfd51f805ac0c87bcaae92c42d1061 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Thu, 20 Feb 2020 17:26:35 -0500 Subject: bnxt_en: Issue PCIe FLR in kdump kernel to cleanup pending DMAs. If crashed kernel does not shutdown the NIC properly, PCIe FLR is required in the kdump kernel in order to initialize all the functions properly. Fixes: d629522e1d66 ("bnxt_en: Reduce memory usage when running in kdump kernel.") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 2ad007e5ee7f..fd6e0e48cd51 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11786,6 +11786,14 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (version_printed++ == 0) pr_info("%s", version); + /* Clear any pending DMA transactions from crash kernel + * while loading driver in capture kernel. + */ + if (is_kdump_kernel()) { + pci_clear_master(pdev); + pcie_flr(pdev); + } + max_irqs = bnxt_get_max_irq(pdev); dev = alloc_etherdev_mq(sizeof(*bp), max_irqs); if (!dev) -- cgit v1.2.3-59-g8ed1b From 1d0c3924a92e69bfa91163bda83c12a994b4d106 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 15 Feb 2020 16:40:37 -0500 Subject: ext4: fix potential race between online resizing and write operations During an online resize an array of pointers to buffer heads gets replaced so it can get enlarged. If there is a racing block allocation or deallocation which uses the old array, and the old array has gotten reused this can lead to a GPF or some other random kernel memory getting modified. Link: https://bugzilla.kernel.org/show_bug.cgi?id=206443 Link: https://lore.kernel.org/r/20200221053458.730016-2-tytso@mit.edu Reported-by: Suraj Jitindar Singh Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/balloc.c | 14 +++++++++++--- fs/ext4/ext4.h | 20 +++++++++++++++++++- fs/ext4/resize.c | 55 ++++++++++++++++++++++++++++++++++++++++++++----------- fs/ext4/super.c | 33 +++++++++++++++++++++++---------- 4 files changed, 97 insertions(+), 25 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 5f993a411251..8fd0b3cdab4c 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -270,6 +270,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, ext4_group_t ngroups = ext4_get_groups_count(sb); struct ext4_group_desc *desc; struct ext4_sb_info *sbi = EXT4_SB(sb); + struct buffer_head *bh_p; if (block_group >= ngroups) { ext4_error(sb, "block_group >= groups_count - block_group = %u," @@ -280,7 +281,14 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); - if (!sbi->s_group_desc[group_desc]) { + bh_p = sbi_array_rcu_deref(sbi, s_group_desc, group_desc); + /* + * sbi_array_rcu_deref returns with rcu unlocked, this is ok since + * the pointer being dereferenced won't be dereferenced again. By + * looking at the usage in add_new_gdb() the value isn't modified, + * just the pointer, and so it remains valid. + */ + if (!bh_p) { ext4_error(sb, "Group descriptor not loaded - " "block_group = %u, group_desc = %u, desc = %u", block_group, group_desc, offset); @@ -288,10 +296,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, } desc = (struct ext4_group_desc *)( - (__u8 *)sbi->s_group_desc[group_desc]->b_data + + (__u8 *)bh_p->b_data + offset * EXT4_DESC_SIZE(sb)); if (bh) - *bh = sbi->s_group_desc[group_desc]; + *bh = bh_p; return desc; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 480badcf2783..b51003f75568 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1400,7 +1400,7 @@ struct ext4_sb_info { loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ struct buffer_head * s_sbh; /* Buffer containing the super block */ struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ - struct buffer_head **s_group_desc; + struct buffer_head * __rcu *s_group_desc; unsigned int s_mount_opt; unsigned int s_mount_opt2; unsigned int s_mount_flags; @@ -1576,6 +1576,23 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); } +/* + * Returns: sbi->field[index] + * Used to access an array element from the following sbi fields which require + * rcu protection to avoid dereferencing an invalid pointer due to reassignment + * - s_group_desc + * - s_group_info + * - s_flex_group + */ +#define sbi_array_rcu_deref(sbi, field, index) \ +({ \ + typeof(*((sbi)->field)) _v; \ + rcu_read_lock(); \ + _v = ((typeof(_v)*)rcu_dereference((sbi)->field))[index]; \ + rcu_read_unlock(); \ + _v; \ +}) + /* * Simulate_fail codes */ @@ -2730,6 +2747,7 @@ extern int ext4_generic_delete_entry(handle_t *handle, extern bool ext4_empty_dir(struct inode *inode); /* resize.c */ +extern void ext4_kvfree_array_rcu(void *to_free); extern int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input); extern int ext4_group_extend(struct super_block *sb, diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 86a2500ed292..536cc9f38091 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -17,6 +17,33 @@ #include "ext4_jbd2.h" +struct ext4_rcu_ptr { + struct rcu_head rcu; + void *ptr; +}; + +static void ext4_rcu_ptr_callback(struct rcu_head *head) +{ + struct ext4_rcu_ptr *ptr; + + ptr = container_of(head, struct ext4_rcu_ptr, rcu); + kvfree(ptr->ptr); + kfree(ptr); +} + +void ext4_kvfree_array_rcu(void *to_free) +{ + struct ext4_rcu_ptr *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); + + if (ptr) { + ptr->ptr = to_free; + call_rcu(&ptr->rcu, ext4_rcu_ptr_callback); + return; + } + synchronize_rcu(); + kvfree(to_free); +} + int ext4_resize_begin(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -542,8 +569,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb, brelse(gdb); goto out; } - memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data, - gdb->b_size); + memcpy(gdb->b_data, sbi_array_rcu_deref(sbi, + s_group_desc, j)->b_data, gdb->b_size); set_buffer_uptodate(gdb); err = ext4_handle_dirty_metadata(handle, NULL, gdb); @@ -860,13 +887,15 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, } brelse(dind); - o_group_desc = EXT4_SB(sb)->s_group_desc; + rcu_read_lock(); + o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc); memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); + rcu_read_unlock(); n_group_desc[gdb_num] = gdb_bh; - EXT4_SB(sb)->s_group_desc = n_group_desc; + rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc); EXT4_SB(sb)->s_gdb_count++; - kvfree(o_group_desc); + ext4_kvfree_array_rcu(o_group_desc); le16_add_cpu(&es->s_reserved_gdt_blocks, -1); err = ext4_handle_dirty_super(handle, sb); @@ -909,9 +938,11 @@ static int add_new_gdb_meta_bg(struct super_block *sb, return err; } - o_group_desc = EXT4_SB(sb)->s_group_desc; + rcu_read_lock(); + o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc); memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); + rcu_read_unlock(); n_group_desc[gdb_num] = gdb_bh; BUFFER_TRACE(gdb_bh, "get_write_access"); @@ -922,9 +953,9 @@ static int add_new_gdb_meta_bg(struct super_block *sb, return err; } - EXT4_SB(sb)->s_group_desc = n_group_desc; + rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc); EXT4_SB(sb)->s_gdb_count++; - kvfree(o_group_desc); + ext4_kvfree_array_rcu(o_group_desc); return err; } @@ -1188,7 +1219,8 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, * use non-sparse filesystems anymore. This is already checked above. */ if (gdb_off) { - gdb_bh = sbi->s_group_desc[gdb_num]; + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, + gdb_num); BUFFER_TRACE(gdb_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, gdb_bh); @@ -1270,7 +1302,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, /* * get_write_access() has been called on gdb_bh by ext4_add_new_desc(). */ - gdb_bh = sbi->s_group_desc[gdb_num]; + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, gdb_num); /* Update group descriptor block for new group */ gdp = (struct ext4_group_desc *)(gdb_bh->b_data + gdb_off * EXT4_DESC_SIZE(sb)); @@ -1497,7 +1529,8 @@ exit_journal: for (; gdb_num <= gdb_num_end; gdb_num++) { struct buffer_head *gdb_bh; - gdb_bh = sbi->s_group_desc[gdb_num]; + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, + gdb_num); if (old_gdb == gdb_bh->b_blocknr) continue; update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f464dff09774..e00bcc19099f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1014,6 +1014,7 @@ static void ext4_put_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; + struct buffer_head **group_desc; int aborted = 0; int i, err; @@ -1046,9 +1047,12 @@ static void ext4_put_super(struct super_block *sb) if (!sb_rdonly(sb)) ext4_commit_super(sb, 1); + rcu_read_lock(); + group_desc = rcu_dereference(sbi->s_group_desc); for (i = 0; i < sbi->s_gdb_count; i++) - brelse(sbi->s_group_desc[i]); - kvfree(sbi->s_group_desc); + brelse(group_desc[i]); + kvfree(group_desc); + rcu_read_unlock(); kvfree(sbi->s_flex_groups); percpu_counter_destroy(&sbi->s_freeclusters_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); @@ -3634,7 +3638,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) { struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev); char *orig_data = kstrdup(data, GFP_KERNEL); - struct buffer_head *bh; + struct buffer_head *bh, **group_desc; struct ext4_super_block *es = NULL; struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); ext4_fsblk_t block; @@ -4290,9 +4294,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } } - sbi->s_group_desc = kvmalloc_array(db_count, - sizeof(struct buffer_head *), - GFP_KERNEL); + rcu_assign_pointer(sbi->s_group_desc, + kvmalloc_array(db_count, + sizeof(struct buffer_head *), + GFP_KERNEL)); if (sbi->s_group_desc == NULL) { ext4_msg(sb, KERN_ERR, "not enough memory"); ret = -ENOMEM; @@ -4308,14 +4313,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } for (i = 0; i < db_count; i++) { + struct buffer_head *bh; + block = descriptor_loc(sb, logical_sb_block, i); - sbi->s_group_desc[i] = sb_bread_unmovable(sb, block); - if (!sbi->s_group_desc[i]) { + bh = sb_bread_unmovable(sb, block); + if (!bh) { ext4_msg(sb, KERN_ERR, "can't read group descriptor %d", i); db_count = i; goto failed_mount2; } + rcu_read_lock(); + rcu_dereference(sbi->s_group_desc)[i] = bh; + rcu_read_unlock(); } sbi->s_gdb_count = db_count; if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) { @@ -4717,9 +4727,12 @@ failed_mount3: if (sbi->s_mmp_tsk) kthread_stop(sbi->s_mmp_tsk); failed_mount2: + rcu_read_lock(); + group_desc = rcu_dereference(sbi->s_group_desc); for (i = 0; i < db_count; i++) - brelse(sbi->s_group_desc[i]); - kvfree(sbi->s_group_desc); + brelse(group_desc[i]); + kvfree(group_desc); + rcu_read_unlock(); failed_mount: if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); -- cgit v1.2.3-59-g8ed1b From df3da4ea5a0fc5d115c90d5aa6caa4dd433750a7 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Tue, 18 Feb 2020 19:08:50 -0800 Subject: ext4: fix potential race between s_group_info online resizing and access During an online resize an array of pointers to s_group_info gets replaced so it can get enlarged. If there is a concurrent access to the array in ext4_get_group_info() and this memory has been reused then this can lead to an invalid memory access. Link: https://bugzilla.kernel.org/show_bug.cgi?id=206443 Link: https://lore.kernel.org/r/20200221053458.730016-3-tytso@mit.edu Signed-off-by: Suraj Jitindar Singh Signed-off-by: Theodore Ts'o Reviewed-by: Balbir Singh Cc: stable@kernel.org --- fs/ext4/ext4.h | 8 ++++---- fs/ext4/mballoc.c | 52 +++++++++++++++++++++++++++++++++++----------------- 2 files changed, 39 insertions(+), 21 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b51003f75568..b1ece5329738 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1462,7 +1462,7 @@ struct ext4_sb_info { #endif /* for buddy allocator */ - struct ext4_group_info ***s_group_info; + struct ext4_group_info ** __rcu *s_group_info; struct inode *s_buddy_cache; spinlock_t s_md_lock; unsigned short *s_mb_offsets; @@ -2994,13 +2994,13 @@ static inline struct ext4_group_info *ext4_get_group_info(struct super_block *sb, ext4_group_t group) { - struct ext4_group_info ***grp_info; + struct ext4_group_info **grp_info; long indexv, indexh; BUG_ON(group >= EXT4_SB(sb)->s_groups_count); - grp_info = EXT4_SB(sb)->s_group_info; indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); - return grp_info[indexv][indexh]; + grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv); + return grp_info[indexh]; } /* diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f64838187559..1b46fb63692a 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2356,7 +2356,7 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) { struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned size; - struct ext4_group_info ***new_groupinfo; + struct ext4_group_info ***old_groupinfo, ***new_groupinfo; size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); @@ -2369,13 +2369,16 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); return -ENOMEM; } - if (sbi->s_group_info) { - memcpy(new_groupinfo, sbi->s_group_info, + rcu_read_lock(); + old_groupinfo = rcu_dereference(sbi->s_group_info); + if (old_groupinfo) + memcpy(new_groupinfo, old_groupinfo, sbi->s_group_info_size * sizeof(*sbi->s_group_info)); - kvfree(sbi->s_group_info); - } - sbi->s_group_info = new_groupinfo; + rcu_read_unlock(); + rcu_assign_pointer(sbi->s_group_info, new_groupinfo); sbi->s_group_info_size = size / sizeof(*sbi->s_group_info); + if (old_groupinfo) + ext4_kvfree_array_rcu(old_groupinfo); ext4_debug("allocated s_groupinfo array for %d meta_bg's\n", sbi->s_group_info_size); return 0; @@ -2387,6 +2390,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, { int i; int metalen = 0; + int idx = group >> EXT4_DESC_PER_BLOCK_BITS(sb); struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_info **meta_group_info; struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); @@ -2405,12 +2409,12 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, "for a buddy group"); goto exit_meta_group_info; } - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = - meta_group_info; + rcu_read_lock(); + rcu_dereference(sbi->s_group_info)[idx] = meta_group_info; + rcu_read_unlock(); } - meta_group_info = - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; + meta_group_info = sbi_array_rcu_deref(sbi, s_group_info, idx); i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS); @@ -2458,8 +2462,13 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, exit_group_info: /* If a meta_group_info table has been allocated, release it now */ if (group % EXT4_DESC_PER_BLOCK(sb) == 0) { - kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]); - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL; + struct ext4_group_info ***group_info; + + rcu_read_lock(); + group_info = rcu_dereference(sbi->s_group_info); + kfree(group_info[idx]); + group_info[idx] = NULL; + rcu_read_unlock(); } exit_meta_group_info: return -ENOMEM; @@ -2472,6 +2481,7 @@ static int ext4_mb_init_backend(struct super_block *sb) struct ext4_sb_info *sbi = EXT4_SB(sb); int err; struct ext4_group_desc *desc; + struct ext4_group_info ***group_info; struct kmem_cache *cachep; err = ext4_mb_alloc_groupinfo(sb, ngroups); @@ -2507,11 +2517,16 @@ err_freebuddy: while (i-- > 0) kmem_cache_free(cachep, ext4_get_group_info(sb, i)); i = sbi->s_group_info_size; + rcu_read_lock(); + group_info = rcu_dereference(sbi->s_group_info); while (i-- > 0) - kfree(sbi->s_group_info[i]); + kfree(group_info[i]); + rcu_read_unlock(); iput(sbi->s_buddy_cache); err_freesgi: - kvfree(sbi->s_group_info); + rcu_read_lock(); + kvfree(rcu_dereference(sbi->s_group_info)); + rcu_read_unlock(); return -ENOMEM; } @@ -2700,7 +2715,7 @@ int ext4_mb_release(struct super_block *sb) ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t i; int num_meta_group_infos; - struct ext4_group_info *grinfo; + struct ext4_group_info *grinfo, ***group_info; struct ext4_sb_info *sbi = EXT4_SB(sb); struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); @@ -2719,9 +2734,12 @@ int ext4_mb_release(struct super_block *sb) num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); + rcu_read_lock(); + group_info = rcu_dereference(sbi->s_group_info); for (i = 0; i < num_meta_group_infos; i++) - kfree(sbi->s_group_info[i]); - kvfree(sbi->s_group_info); + kfree(group_info[i]); + kvfree(group_info); + rcu_read_unlock(); } kfree(sbi->s_mb_offsets); kfree(sbi->s_mb_maxs); -- cgit v1.2.3-59-g8ed1b From fd1d98650ac0042d475155116e65fd17eb379542 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Tue, 8 Oct 2019 14:37:02 +0800 Subject: MAINTAINERS: csky: Add mailing list for csky Add mailing list and it's convenient for maintain C-SKY subsystem. Signed-off-by: Guo Ren --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index a0d86490c2c6..a4b5858b29bd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3649,6 +3649,7 @@ F: sound/pci/oxygen/ C-SKY ARCHITECTURE M: Guo Ren +L: linux-csky@vger.kernel.org T: git https://github.com/c-sky/csky-linux.git S: Supported F: arch/csky/ -- cgit v1.2.3-59-g8ed1b From 2f78c73f78c39dabc5c44ad8dd61fd6ec65636d6 Mon Sep 17 00:00:00 2001 From: Mao Han Date: Fri, 11 Oct 2019 10:56:55 +0800 Subject: csky: Initial stack protector support This is a basic -fstack-protector support without per-task canary switching. The protector will report something like when stack corruption is detected: It's tested with strcpy local array overflow in sys_kill and get: stack-protector: Kernel stack is corrupted in: sys_kill+0x23c/0x23c TODO: - Support task switch for different cannary Signed-off-by: Mao Han Signed-off-by: Guo Ren --- arch/csky/Kconfig | 1 + arch/csky/include/asm/stackprotector.h | 29 +++++++++++++++++++++++++++++ arch/csky/kernel/process.c | 6 ++++++ 3 files changed, 36 insertions(+) create mode 100644 arch/csky/include/asm/stackprotector.h diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index da09c884cc30..ea7f3b890d03 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -49,6 +49,7 @@ config CSKY select HAVE_PERF_USER_STACK_DUMP select HAVE_DMA_API_DEBUG select HAVE_DMA_CONTIGUOUS + select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select MAY_HAVE_SPARSE_IRQ select MODULES_USE_ELF_RELA if MODULES diff --git a/arch/csky/include/asm/stackprotector.h b/arch/csky/include/asm/stackprotector.h new file mode 100644 index 000000000000..d7cd4e51edd9 --- /dev/null +++ b/arch/csky/include/asm/stackprotector.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_STACKPROTECTOR_H +#define _ASM_STACKPROTECTOR_H 1 + +#include +#include + +extern unsigned long __stack_chk_guard; + +/* + * Initialize the stackprotector canary value. + * + * NOTE: this must only be called from functions that never return, + * and it must always be inlined. + */ +static __always_inline void boot_init_stack_canary(void) +{ + unsigned long canary; + + /* Try to get a semi random initial value. */ + get_random_bytes(&canary, sizeof(canary)); + canary ^= LINUX_VERSION_CODE; + canary &= CANARY_MASK; + + current->stack_canary = canary; + __stack_chk_guard = current->stack_canary; +} + +#endif /* __ASM_SH_STACKPROTECTOR_H */ diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c index f320d9248a22..5349cd8c0f30 100644 --- a/arch/csky/kernel/process.c +++ b/arch/csky/kernel/process.c @@ -16,6 +16,12 @@ struct cpuinfo_csky cpu_data[NR_CPUS]; +#ifdef CONFIG_STACKPROTECTOR +#include +unsigned long __stack_chk_guard __read_mostly; +EXPORT_SYMBOL(__stack_chk_guard); +#endif + asmlinkage void ret_from_fork(void); asmlinkage void ret_from_kernel_thread(void); -- cgit v1.2.3-59-g8ed1b From f525bb2c9e7cf1e3c43ab57704c9e1c836d30b34 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Wed, 27 Nov 2019 08:44:33 +0800 Subject: csky: Tightly-Coupled Memory or Sram support The implementation are not only used by TCM but also used by sram on SOC bus. It follow existed linux tcm software interface, so that old tcm application codes could be re-used directly. Software interface list in asm/tcm.h: - Variables/Const: __tcmdata, __tcmconst - Functions: __tcmfunc, __tcmlocalfunc - Malloc/Free: tcm_alloc, tcm_free In linux menuconfig: - Choose a TCM contain instrctions + data or separated in ITCM/DTCM. - Determine TCM_BASE (DTCM_BASE) in phyiscal address. - Determine size of TCM or ITCM(DTCM) in page counts. Here is hello tcm example from Documentation/arm/tcm.rst which could be directly used: /* Uninitialized data */ static u32 __tcmdata tcmvar; /* Initialized data */ static u32 __tcmdata tcmassigned = 0x2BADBABEU; /* Constant */ static const u32 __tcmconst tcmconst = 0xCAFEBABEU; static void __tcmlocalfunc tcm_to_tcm(void) { int i; for (i = 0; i < 100; i++) tcmvar ++; } static void __tcmfunc hello_tcm(void) { /* Some abstract code that runs in ITCM */ int i; for (i = 0; i < 100; i++) { tcmvar ++; } tcm_to_tcm(); } static void __init test_tcm(void) { u32 *tcmem; int i; hello_tcm(); printk("Hello TCM executed from ITCM RAM\n"); printk("TCM variable from testrun: %u @ %p\n", tcmvar, &tcmvar); tcmvar = 0xDEADBEEFU; printk("TCM variable: 0x%x @ %p\n", tcmvar, &tcmvar); printk("TCM assigned variable: 0x%x @ %p\n", tcmassigned, &tcmassigned); printk("TCM constant: 0x%x @ %p\n", tcmconst, &tcmconst); /* Allocate some TCM memory from the pool */ tcmem = tcm_alloc(20); if (tcmem) { printk("TCM Allocated 20 bytes of TCM @ %p\n", tcmem); tcmem[0] = 0xDEADBEEFU; tcmem[1] = 0x2BADBABEU; tcmem[2] = 0xCAFEBABEU; tcmem[3] = 0xDEADBEEFU; tcmem[4] = 0x2BADBABEU; for (i = 0; i < 5; i++) printk("TCM tcmem[%d] = %08x\n", i, tcmem[i]); tcm_free(tcmem, 20); } } TODO: - Separate fixup mapping from highmem - Support abiv1 Signed-off-by: Guo Ren --- arch/csky/Kconfig | 35 +++++++++ arch/csky/include/asm/fixmap.h | 5 +- arch/csky/include/asm/memory.h | 22 ++++++ arch/csky/include/asm/tcm.h | 24 ++++++ arch/csky/kernel/vmlinux.lds.S | 49 ++++++++++++ arch/csky/mm/Makefile | 1 + arch/csky/mm/tcm.c | 169 +++++++++++++++++++++++++++++++++++++++++ 7 files changed, 304 insertions(+), 1 deletion(-) create mode 100644 arch/csky/include/asm/memory.h create mode 100644 arch/csky/include/asm/tcm.h create mode 100644 arch/csky/mm/tcm.c diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index ea7f3b890d03..fc92f08f0017 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -189,6 +189,41 @@ config CPU_PM_STOP bool "stop" endchoice +menuconfig HAVE_TCM + bool "Tightly-Coupled/Sram Memory" + depends on HIGHMEM + select GENERIC_ALLOCATOR + help + The implementation are not only used by TCM (Tightly-Coupled Meory) + but also used by sram on SOC bus. It follow existed linux tcm + software interface, so that old tcm application codes could be + re-used directly. + +if HAVE_TCM +config ITCM_RAM_BASE + hex "ITCM ram base" + default 0xffffffff + +config ITCM_NR_PAGES + int "Page count of ITCM size: NR*4KB" + range 1 256 + default 32 + +config HAVE_DTCM + bool "DTCM Support" + +config DTCM_RAM_BASE + hex "DTCM ram base" + depends on HAVE_DTCM + default 0xffffffff + +config DTCM_NR_PAGES + int "Page count of DTCM size: NR*4KB" + depends on HAVE_DTCM + range 1 256 + default 32 +endif + config CPU_HAS_VDSP bool "CPU has VDSP coprocessor" depends on CPU_HAS_FPU && CPU_HAS_FPUV2 diff --git a/arch/csky/include/asm/fixmap.h b/arch/csky/include/asm/fixmap.h index 380ff0a307df..4350578faa1c 100644 --- a/arch/csky/include/asm/fixmap.h +++ b/arch/csky/include/asm/fixmap.h @@ -5,12 +5,16 @@ #define __ASM_CSKY_FIXMAP_H #include +#include #ifdef CONFIG_HIGHMEM #include #include #endif enum fixed_addresses { +#ifdef CONFIG_HAVE_TCM + FIX_TCM = TCM_NR_PAGES, +#endif #ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * NR_CPUS) - 1, @@ -18,7 +22,6 @@ enum fixed_addresses { __end_of_fixed_addresses }; -#define FIXADDR_TOP 0xffffc000 #define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) diff --git a/arch/csky/include/asm/memory.h b/arch/csky/include/asm/memory.h new file mode 100644 index 000000000000..cdffbd0a500b --- /dev/null +++ b/arch/csky/include/asm/memory.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_CSKY_MEMORY_H +#define __ASM_CSKY_MEMORY_H + +#include +#include +#include +#include + +#define FIXADDR_TOP _AC(0xffffc000, UL) + +#ifdef CONFIG_HAVE_TCM +#ifdef CONFIG_HAVE_DTCM +#define TCM_NR_PAGES (CONFIG_ITCM_NR_PAGES + CONFIG_DTCM_NR_PAGES) +#else +#define TCM_NR_PAGES (CONFIG_ITCM_NR_PAGES) +#endif +#define FIXADDR_TCM _AC(FIXADDR_TOP - (TCM_NR_PAGES * PAGE_SIZE), UL) +#endif + +#endif diff --git a/arch/csky/include/asm/tcm.h b/arch/csky/include/asm/tcm.h new file mode 100644 index 000000000000..2b135cefb73f --- /dev/null +++ b/arch/csky/include/asm/tcm.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_CSKY_TCM_H +#define __ASM_CSKY_TCM_H + +#ifndef CONFIG_HAVE_TCM +#error "You should not be including tcm.h unless you have a TCM!" +#endif + +#include + +/* Tag variables with this */ +#define __tcmdata __section(.tcm.data) +/* Tag constants with this */ +#define __tcmconst __section(.tcm.rodata) +/* Tag functions inside TCM called from outside TCM with this */ +#define __tcmfunc __section(.tcm.text) noinline +/* Tag function inside TCM called from inside TCM with this */ +#define __tcmlocalfunc __section(.tcm.text) + +void *tcm_alloc(size_t len); +void tcm_free(void *addr, size_t len); + +#endif diff --git a/arch/csky/kernel/vmlinux.lds.S b/arch/csky/kernel/vmlinux.lds.S index 2ff37beaf2bf..f05b413df328 100644 --- a/arch/csky/kernel/vmlinux.lds.S +++ b/arch/csky/kernel/vmlinux.lds.S @@ -2,6 +2,7 @@ #include #include +#include OUTPUT_ARCH(csky) ENTRY(_start) @@ -53,6 +54,54 @@ SECTIONS RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) _edata = .; +#ifdef CONFIG_HAVE_TCM + .tcm_start : { + . = ALIGN(PAGE_SIZE); + __tcm_start = .; + } + + .text_data_tcm FIXADDR_TCM : AT(__tcm_start) + { + . = ALIGN(4); + __stcm_text_data = .; + *(.tcm.text) + *(.tcm.rodata) +#ifndef CONFIG_HAVE_DTCM + *(.tcm.data) +#endif + . = ALIGN(4); + __etcm_text_data = .; + } + + . = ADDR(.tcm_start) + SIZEOF(.tcm_start) + SIZEOF(.text_data_tcm); + +#ifdef CONFIG_HAVE_DTCM + #define ITCM_SIZE CONFIG_ITCM_NR_PAGES * PAGE_SIZE + + .dtcm_start : { + __dtcm_start = .; + } + + .data_tcm FIXADDR_TCM + ITCM_SIZE : AT(__dtcm_start) + { + . = ALIGN(4); + __stcm_data = .; + *(.tcm.data) + . = ALIGN(4); + __etcm_data = .; + } + + . = ADDR(.dtcm_start) + SIZEOF(.data_tcm); + + .tcm_end : AT(ADDR(.dtcm_start) + SIZEOF(.data_tcm)) { +#else + .tcm_end : AT(ADDR(.tcm_start) + SIZEOF(.text_data_tcm)) { +#endif + . = ALIGN(PAGE_SIZE); + __tcm_end = .; + } +#endif + EXCEPTION_TABLE(L1_CACHE_BYTES) BSS_SECTION(L1_CACHE_BYTES, PAGE_SIZE, L1_CACHE_BYTES) VBR_BASE diff --git a/arch/csky/mm/Makefile b/arch/csky/mm/Makefile index c94ef6481098..2c51918eb4fc 100644 --- a/arch/csky/mm/Makefile +++ b/arch/csky/mm/Makefile @@ -14,3 +14,4 @@ obj-y += syscache.o obj-y += tlb.o obj-y += asid.o obj-y += context.o +obj-$(CONFIG_HAVE_TCM) += tcm.o diff --git a/arch/csky/mm/tcm.c b/arch/csky/mm/tcm.c new file mode 100644 index 000000000000..ddeb36328819 --- /dev/null +++ b/arch/csky/mm/tcm.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include + +#if (CONFIG_ITCM_RAM_BASE == 0xffffffff) +#error "You should define ITCM_RAM_BASE" +#endif + +#ifdef CONFIG_HAVE_DTCM +#if (CONFIG_DTCM_RAM_BASE == 0xffffffff) +#error "You should define DTCM_RAM_BASE" +#endif + +#if (CONFIG_DTCM_RAM_BASE == CONFIG_ITCM_RAM_BASE) +#error "You should define correct DTCM_RAM_BASE" +#endif +#endif + +extern char __tcm_start, __tcm_end, __dtcm_start; + +static struct gen_pool *tcm_pool; + +static void __init tcm_mapping_init(void) +{ + pte_t *tcm_pte; + unsigned long vaddr, paddr; + int i; + + paddr = CONFIG_ITCM_RAM_BASE; + + if (pfn_valid(PFN_DOWN(CONFIG_ITCM_RAM_BASE))) + goto panic; + +#ifndef CONFIG_HAVE_DTCM + for (i = 0; i < TCM_NR_PAGES; i++) { +#else + for (i = 0; i < CONFIG_ITCM_NR_PAGES; i++) { +#endif + vaddr = __fix_to_virt(FIX_TCM - i); + + tcm_pte = + pte_offset_kernel((pmd_t *)pgd_offset_k(vaddr), vaddr); + + set_pte(tcm_pte, pfn_pte(__phys_to_pfn(paddr), PAGE_KERNEL)); + + flush_tlb_one(vaddr); + + paddr = paddr + PAGE_SIZE; + } + +#ifdef CONFIG_HAVE_DTCM + if (pfn_valid(PFN_DOWN(CONFIG_DTCM_RAM_BASE))) + goto panic; + + paddr = CONFIG_DTCM_RAM_BASE; + + for (i = 0; i < CONFIG_DTCM_NR_PAGES; i++) { + vaddr = __fix_to_virt(FIX_TCM - CONFIG_ITCM_NR_PAGES - i); + + tcm_pte = + pte_offset_kernel((pmd_t *) pgd_offset_k(vaddr), vaddr); + + set_pte(tcm_pte, pfn_pte(__phys_to_pfn(paddr), PAGE_KERNEL)); + + flush_tlb_one(vaddr); + + paddr = paddr + PAGE_SIZE; + } +#endif + +#ifndef CONFIG_HAVE_DTCM + memcpy((void *)__fix_to_virt(FIX_TCM), + &__tcm_start, &__tcm_end - &__tcm_start); + + pr_info("%s: mapping tcm va:0x%08lx to pa:0x%08x\n", + __func__, __fix_to_virt(FIX_TCM), CONFIG_ITCM_RAM_BASE); + + pr_info("%s: __tcm_start va:0x%08lx size:%d\n", + __func__, (unsigned long)&__tcm_start, &__tcm_end - &__tcm_start); +#else + memcpy((void *)__fix_to_virt(FIX_TCM), + &__tcm_start, &__dtcm_start - &__tcm_start); + + pr_info("%s: mapping itcm va:0x%08lx to pa:0x%08x\n", + __func__, __fix_to_virt(FIX_TCM), CONFIG_ITCM_RAM_BASE); + + pr_info("%s: __itcm_start va:0x%08lx size:%d\n", + __func__, (unsigned long)&__tcm_start, &__dtcm_start - &__tcm_start); + + memcpy((void *)__fix_to_virt(FIX_TCM - CONFIG_ITCM_NR_PAGES), + &__dtcm_start, &__tcm_end - &__dtcm_start); + + pr_info("%s: mapping dtcm va:0x%08lx to pa:0x%08x\n", + __func__, __fix_to_virt(FIX_TCM - CONFIG_ITCM_NR_PAGES), + CONFIG_DTCM_RAM_BASE); + + pr_info("%s: __dtcm_start va:0x%08lx size:%d\n", + __func__, (unsigned long)&__dtcm_start, &__tcm_end - &__dtcm_start); + +#endif + return; +panic: + panic("TCM init error"); +} + +void *tcm_alloc(size_t len) +{ + unsigned long vaddr; + + if (!tcm_pool) + return NULL; + + vaddr = gen_pool_alloc(tcm_pool, len); + if (!vaddr) + return NULL; + + return (void *) vaddr; +} +EXPORT_SYMBOL(tcm_alloc); + +void tcm_free(void *addr, size_t len) +{ + gen_pool_free(tcm_pool, (unsigned long) addr, len); +} +EXPORT_SYMBOL(tcm_free); + +static int __init tcm_setup_pool(void) +{ +#ifndef CONFIG_HAVE_DTCM + u32 pool_size = (u32) (TCM_NR_PAGES * PAGE_SIZE) + - (u32) (&__tcm_end - &__tcm_start); + + u32 tcm_pool_start = __fix_to_virt(FIX_TCM) + + (u32) (&__tcm_end - &__tcm_start); +#else + u32 pool_size = (u32) (CONFIG_DTCM_NR_PAGES * PAGE_SIZE) + - (u32) (&__tcm_end - &__dtcm_start); + + u32 tcm_pool_start = __fix_to_virt(FIX_TCM - CONFIG_ITCM_NR_PAGES) + + (u32) (&__tcm_end - &__dtcm_start); +#endif + int ret; + + tcm_pool = gen_pool_create(2, -1); + + ret = gen_pool_add(tcm_pool, tcm_pool_start, pool_size, -1); + if (ret) { + pr_err("%s: gen_pool add failed!\n", __func__); + return ret; + } + + pr_info("%s: Added %d bytes @ 0x%08x to memory pool\n", + __func__, pool_size, tcm_pool_start); + + return 0; +} + +static int __init tcm_init(void) +{ + tcm_mapping_init(); + + tcm_setup_pool(); + + return 0; +} +arch_initcall(tcm_init); -- cgit v1.2.3-59-g8ed1b From f136008f31e91060d6b6e6e031cb2c827448e280 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Sun, 1 Dec 2019 22:34:19 +0800 Subject: csky: Separate fixaddr_init from highmem After fixaddr_init is separated from highmem, we could use tcm without highmem selected. (610 (abiv1) don't support highmem, but it could use tcm now.) Signed-off-by: Guo Ren --- arch/csky/Kconfig | 1 - arch/csky/include/asm/fixmap.h | 4 +++ arch/csky/include/asm/memory.h | 3 ++ arch/csky/include/asm/pgtable.h | 6 +--- arch/csky/kernel/setup.c | 2 ++ arch/csky/mm/highmem.c | 64 +++-------------------------------------- arch/csky/mm/init.c | 47 ++++++++++++++++++++++++++++++ 7 files changed, 61 insertions(+), 66 deletions(-) diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index fc92f08f0017..00ac7bc5558a 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -191,7 +191,6 @@ endchoice menuconfig HAVE_TCM bool "Tightly-Coupled/Sram Memory" - depends on HIGHMEM select GENERIC_ALLOCATOR help The implementation are not only used by TCM (Tightly-Coupled Meory) diff --git a/arch/csky/include/asm/fixmap.h b/arch/csky/include/asm/fixmap.h index 4350578faa1c..81f9477d5330 100644 --- a/arch/csky/include/asm/fixmap.h +++ b/arch/csky/include/asm/fixmap.h @@ -27,4 +27,8 @@ enum fixed_addresses { #include +extern void fixrange_init(unsigned long start, unsigned long end, + pgd_t *pgd_base); +extern void __init fixaddr_init(void); + #endif /* __ASM_CSKY_FIXMAP_H */ diff --git a/arch/csky/include/asm/memory.h b/arch/csky/include/asm/memory.h index cdffbd0a500b..a65c6759f537 100644 --- a/arch/csky/include/asm/memory.h +++ b/arch/csky/include/asm/memory.h @@ -9,6 +9,9 @@ #include #define FIXADDR_TOP _AC(0xffffc000, UL) +#define PKMAP_BASE _AC(0xff800000, UL) +#define VMALLOC_START _AC(0xc0008000, UL) +#define VMALLOC_END (PKMAP_BASE - (PAGE_SIZE * 2)) #ifdef CONFIG_HAVE_TCM #ifdef CONFIG_HAVE_DTCM diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h index 4b2a41e15f2e..9b7764cb7645 100644 --- a/arch/csky/include/asm/pgtable.h +++ b/arch/csky/include/asm/pgtable.h @@ -5,6 +5,7 @@ #define __ASM_CSKY_PGTABLE_H #include +#include #include #include #include @@ -16,11 +17,6 @@ #define USER_PTRS_PER_PGD (0x80000000UL/PGDIR_SIZE) #define FIRST_USER_ADDRESS 0UL -#define PKMAP_BASE (0xff800000) - -#define VMALLOC_START (0xc0008000) -#define VMALLOC_END (PKMAP_BASE - 2*PAGE_SIZE) - /* * C-SKY is two-level paging structure: */ diff --git a/arch/csky/kernel/setup.c b/arch/csky/kernel/setup.c index 52eaf31ba27f..4fc7b16b77d2 100644 --- a/arch/csky/kernel/setup.c +++ b/arch/csky/kernel/setup.c @@ -133,6 +133,8 @@ void __init setup_arch(char **cmdline_p) sparse_init(); + fixaddr_init(); + #ifdef CONFIG_HIGHMEM kmap_init(); #endif diff --git a/arch/csky/mm/highmem.c b/arch/csky/mm/highmem.c index 3317b774f6dc..813129145f3d 100644 --- a/arch/csky/mm/highmem.c +++ b/arch/csky/mm/highmem.c @@ -117,85 +117,29 @@ struct page *kmap_atomic_to_page(void *ptr) return pte_page(*pte); } -static void __init fixrange_init(unsigned long start, unsigned long end, - pgd_t *pgd_base) +static void __init kmap_pages_init(void) { -#ifdef CONFIG_HIGHMEM - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - int i, j, k; unsigned long vaddr; - - vaddr = start; - i = __pgd_offset(vaddr); - j = __pud_offset(vaddr); - k = __pmd_offset(vaddr); - pgd = pgd_base + i; - - for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) { - pud = (pud_t *)pgd; - for ( ; (j < PTRS_PER_PUD) && (vaddr != end); pud++, j++) { - pmd = (pmd_t *)pud; - for (; (k < PTRS_PER_PMD) && (vaddr != end); pmd++, k++) { - if (pmd_none(*pmd)) { - pte = (pte_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); - if (!pte) - panic("%s: Failed to allocate %lu bytes align=%lx\n", - __func__, PAGE_SIZE, - PAGE_SIZE); - - set_pmd(pmd, __pmd(__pa(pte))); - BUG_ON(pte != pte_offset_kernel(pmd, 0)); - } - vaddr += PMD_SIZE; - } - k = 0; - } - j = 0; - } -#endif -} - -void __init fixaddr_kmap_pages_init(void) -{ - unsigned long vaddr; - pgd_t *pgd_base; -#ifdef CONFIG_HIGHMEM pgd_t *pgd; pmd_t *pmd; pud_t *pud; pte_t *pte; -#endif - pgd_base = swapper_pg_dir; - - /* - * Fixed mappings: - */ - vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - fixrange_init(vaddr, 0, pgd_base); - -#ifdef CONFIG_HIGHMEM - /* - * Permanent kmaps: - */ + vaddr = PKMAP_BASE; - fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); + fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, swapper_pg_dir); pgd = swapper_pg_dir + __pgd_offset(vaddr); pud = (pud_t *)pgd; pmd = pmd_offset(pud, vaddr); pte = pte_offset_kernel(pmd, vaddr); pkmap_page_table = pte; -#endif } void __init kmap_init(void) { unsigned long vaddr; - fixaddr_kmap_pages_init(); + kmap_pages_init(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN); diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c index d4c2292ea46b..322eb7bd7962 100644 --- a/arch/csky/mm/init.c +++ b/arch/csky/mm/init.c @@ -101,3 +101,50 @@ void __init pre_mmu_init(void) /* Setup page mask to 4k */ write_mmu_pagemask(0); } + +void __init fixrange_init(unsigned long start, unsigned long end, + pgd_t *pgd_base) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + int i, j, k; + unsigned long vaddr; + + vaddr = start; + i = __pgd_offset(vaddr); + j = __pud_offset(vaddr); + k = __pmd_offset(vaddr); + pgd = pgd_base + i; + + for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) { + pud = (pud_t *)pgd; + for ( ; (j < PTRS_PER_PUD) && (vaddr != end); pud++, j++) { + pmd = (pmd_t *)pud; + for (; (k < PTRS_PER_PMD) && (vaddr != end); pmd++, k++) { + if (pmd_none(*pmd)) { + pte = (pte_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); + if (!pte) + panic("%s: Failed to allocate %lu bytes align=%lx\n", + __func__, PAGE_SIZE, + PAGE_SIZE); + + set_pmd(pmd, __pmd(__pa(pte))); + BUG_ON(pte != pte_offset_kernel(pmd, 0)); + } + vaddr += PMD_SIZE; + } + k = 0; + } + j = 0; + } +} + +void __init fixaddr_init(void) +{ + unsigned long vaddr; + + vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; + fixrange_init(vaddr, vaddr + PMD_SIZE, swapper_pg_dir); +} -- cgit v1.2.3-59-g8ed1b From 7f4a567332f035ab16b29010fbd04a0f10183c77 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Mon, 30 Dec 2019 15:53:37 +0800 Subject: csky/mm: Fixup export invalid_pte_table symbol There is no present bit in csky pmd hardware, so we need to prepare invalid_pte_table for empty pmd entry and the functions (pmd_none & pmd_present) in pgtable.h need invalid_pte_talbe to get result. If a module use these functions, we need export the symbol for it. Signed-off-by: Guo Ren Cc: Mo Qihui Cc: Zhange Jian --- arch/csky/mm/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c index 322eb7bd7962..dbb4b2dfe4b7 100644 --- a/arch/csky/mm/init.c +++ b/arch/csky/mm/init.c @@ -31,6 +31,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss; pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss; +EXPORT_SYMBOL(invalid_pte_table); unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); -- cgit v1.2.3-59-g8ed1b From f8e17c17b81070f38062dce79ca7f4541851dadd Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Tue, 17 Dec 2019 11:12:55 +0800 Subject: csky: Set regs->usp to kernel sp, when the exception is from kernel In the past, we didn't care about kernel sp when saving pt_reg. But in some cases, we still need pt_reg->usp to represent the kernel stack before enter exception. For cmpxhg in atomic.S, we need save and restore usp for above. Signed-off-by: Guo Ren --- arch/csky/abiv1/inc/abi/entry.h | 19 ++++++++++++++----- arch/csky/abiv2/inc/abi/entry.h | 11 +++++++++++ arch/csky/kernel/atomic.S | 8 ++++++-- 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/arch/csky/abiv1/inc/abi/entry.h b/arch/csky/abiv1/inc/abi/entry.h index 7ab78bd0f3b1..f35a9f3315ee 100644 --- a/arch/csky/abiv1/inc/abi/entry.h +++ b/arch/csky/abiv1/inc/abi/entry.h @@ -16,14 +16,16 @@ #define LSAVE_A4 40 #define LSAVE_A5 44 +#define usp ss1 + .macro USPTOKSP - mtcr sp, ss1 + mtcr sp, usp mfcr sp, ss0 .endm .macro KSPTOUSP mtcr sp, ss0 - mfcr sp, ss1 + mfcr sp, usp .endm .macro SAVE_ALL epc_inc @@ -45,7 +47,13 @@ add lr, r13 stw lr, (sp, 8) + mov lr, sp + addi lr, 32 + addi lr, 32 + addi lr, 16 + bt 2f mfcr lr, ss1 +2: stw lr, (sp, 16) stw a0, (sp, 20) @@ -79,9 +87,10 @@ ldw a0, (sp, 12) mtcr a0, epsr btsti a0, 31 + bt 1f ldw a0, (sp, 16) mtcr a0, ss1 - +1: ldw a0, (sp, 24) ldw a1, (sp, 28) ldw a2, (sp, 32) @@ -102,9 +111,9 @@ addi sp, 32 addi sp, 8 - bt 1f + bt 2f KSPTOUSP -1: +2: rte .endm diff --git a/arch/csky/abiv2/inc/abi/entry.h b/arch/csky/abiv2/inc/abi/entry.h index 9897a16b45e5..94a7a58765df 100644 --- a/arch/csky/abiv2/inc/abi/entry.h +++ b/arch/csky/abiv2/inc/abi/entry.h @@ -31,7 +31,13 @@ mfcr lr, epsr stw lr, (sp, 12) + btsti lr, 31 + bf 1f + addi lr, sp, 152 + br 2f +1: mfcr lr, usp +2: stw lr, (sp, 16) stw a0, (sp, 20) @@ -64,8 +70,10 @@ mtcr a0, epc ldw a0, (sp, 12) mtcr a0, epsr + btsti a0, 31 ldw a0, (sp, 16) mtcr a0, usp + mtcr a0, ss0 #ifdef CONFIG_CPU_HAS_HILO ldw a0, (sp, 140) @@ -86,6 +94,9 @@ addi sp, 40 ldm r16-r30, (sp) addi sp, 72 + bf 1f + mfcr sp, ss0 +1: rte .endm diff --git a/arch/csky/kernel/atomic.S b/arch/csky/kernel/atomic.S index 5b84f11485ae..3821ef9b7567 100644 --- a/arch/csky/kernel/atomic.S +++ b/arch/csky/kernel/atomic.S @@ -17,10 +17,12 @@ ENTRY(csky_cmpxchg) mfcr a3, epc addi a3, TRAP0_SIZE - subi sp, 8 + subi sp, 16 stw a3, (sp, 0) mfcr a3, epsr stw a3, (sp, 4) + mfcr a3, usp + stw a3, (sp, 8) psrset ee #ifdef CONFIG_CPU_HAS_LDSTEX @@ -47,7 +49,9 @@ ENTRY(csky_cmpxchg) mtcr a3, epc ldw a3, (sp, 4) mtcr a3, epsr - addi sp, 8 + ldw a3, (sp, 8) + mtcr a3, usp + addi sp, 16 KSPTOUSP rte END(csky_cmpxchg) -- cgit v1.2.3-59-g8ed1b From c9492737b25ca32679ba3163609d938c9abfd508 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Tue, 7 Jan 2020 12:21:25 +0800 Subject: csky/smp: Fixup boot failed when CONFIG_SMP If we use a non-ipi-support interrupt controller, it will cause panic here. We should let cpu up and work with CONFIG_SMP, when we use a non-ipi intc. Signed-off-by: Guo Ren --- arch/csky/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/csky/kernel/smp.c b/arch/csky/kernel/smp.c index b753d382e4ce..0bb0954d5570 100644 --- a/arch/csky/kernel/smp.c +++ b/arch/csky/kernel/smp.c @@ -120,7 +120,7 @@ void __init setup_smp_ipi(void) int rc; if (ipi_irq == 0) - panic("%s IRQ mapping failed\n", __func__); + return; rc = request_percpu_irq(ipi_irq, handle_ipi, "IPI Interrupt", &ipi_dummy_dev); -- cgit v1.2.3-59-g8ed1b From a736fa1ed772e3640e1bfaab36032c5a285d6a7b Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Sat, 11 Jan 2020 13:44:32 +0800 Subject: csky/Kconfig: Add Kconfig.platforms to support some drivers Such as snps,dw-apb-ictl Signed-off-by: Guo Ren --- arch/csky/Kconfig | 2 ++ arch/csky/Kconfig.platforms | 9 +++++++++ 2 files changed, 11 insertions(+) create mode 100644 arch/csky/Kconfig.platforms diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 00ac7bc5558a..1c723cb11cc4 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -270,4 +270,6 @@ config HOTPLUG_CPU Say N if you want to disable CPU hotplug. endmenu +source "arch/csky/Kconfig.platforms" + source "kernel/Kconfig.hz" diff --git a/arch/csky/Kconfig.platforms b/arch/csky/Kconfig.platforms new file mode 100644 index 000000000000..639e17f4eacb --- /dev/null +++ b/arch/csky/Kconfig.platforms @@ -0,0 +1,9 @@ +menu "Platform drivers selection" + +config ARCH_CSKY_DW_APB_ICTL + bool "Select dw-apb interrupt controller" + select DW_APB_ICTL + default y + help + This enables support for snps dw-apb-ictl +endmenu -- cgit v1.2.3-59-g8ed1b From 761b4f694cb90b63ca2739ac8a8a176342636e5e Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Wed, 22 Jan 2020 11:15:14 +0800 Subject: csky: Support icache flush without specific instructions Some CPUs don't support icache specific instructions to flush icache lines in broadcast way. We use cpu control registers to flush local icache and use IPI to notify other cores. Signed-off-by: Guo Ren --- arch/csky/Kconfig | 4 ++++ arch/csky/include/asm/cache.h | 1 + arch/csky/mm/cachev1.c | 5 +++++ arch/csky/mm/cachev2.c | 29 +++++++++++++++++++++-------- 4 files changed, 31 insertions(+), 8 deletions(-) diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 1c723cb11cc4..111474b50f69 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -231,6 +231,10 @@ config CPU_HAS_FPU bool "CPU has FPU coprocessor" depends on CPU_CK807 || CPU_CK810 || CPU_CK860 +config CPU_HAS_ICACHE_INS + bool "CPU has Icache invalidate instructions" + depends on CPU_HAS_CACHEV2 + config CPU_HAS_TEE bool "CPU has Trusted Execution Environment" depends on CPU_CK810 diff --git a/arch/csky/include/asm/cache.h b/arch/csky/include/asm/cache.h index 1d5fc2f78fd7..4b5c09bf1d25 100644 --- a/arch/csky/include/asm/cache.h +++ b/arch/csky/include/asm/cache.h @@ -16,6 +16,7 @@ void dcache_wb_line(unsigned long start); void icache_inv_range(unsigned long start, unsigned long end); void icache_inv_all(void); +void local_icache_inv_all(void *priv); void dcache_wb_range(unsigned long start, unsigned long end); void dcache_wbinv_all(void); diff --git a/arch/csky/mm/cachev1.c b/arch/csky/mm/cachev1.c index 494ec912abff..5a5a9804a0e3 100644 --- a/arch/csky/mm/cachev1.c +++ b/arch/csky/mm/cachev1.c @@ -94,6 +94,11 @@ void icache_inv_all(void) cache_op_all(INS_CACHE|CACHE_INV, 0); } +void local_icache_inv_all(void *priv) +{ + cache_op_all(INS_CACHE|CACHE_INV, 0); +} + void dcache_wb_range(unsigned long start, unsigned long end) { cache_op_range(start, end, DATA_CACHE|CACHE_CLR, 0); diff --git a/arch/csky/mm/cachev2.c b/arch/csky/mm/cachev2.c index b61be6518e21..909fdd0f5995 100644 --- a/arch/csky/mm/cachev2.c +++ b/arch/csky/mm/cachev2.c @@ -3,15 +3,25 @@ #include #include +#include #include #include -inline void dcache_wb_line(unsigned long start) +#define INS_CACHE (1 << 0) +#define CACHE_INV (1 << 4) + +void local_icache_inv_all(void *priv) { - asm volatile("dcache.cval1 %0\n"::"r"(start):"memory"); + mtcr("cr17", INS_CACHE|CACHE_INV); sync_is(); } +void icache_inv_all(void) +{ + on_each_cpu(local_icache_inv_all, NULL, 1); +} + +#ifdef CONFIG_CPU_HAS_ICACHE_INS void icache_inv_range(unsigned long start, unsigned long end) { unsigned long i = start & ~(L1_CACHE_BYTES - 1); @@ -20,10 +30,16 @@ void icache_inv_range(unsigned long start, unsigned long end) asm volatile("icache.iva %0\n"::"r"(i):"memory"); sync_is(); } +#else +void icache_inv_range(unsigned long start, unsigned long end) +{ + icache_inv_all(); +} +#endif -void icache_inv_all(void) +inline void dcache_wb_line(unsigned long start) { - asm volatile("icache.ialls\n":::"memory"); + asm volatile("dcache.cval1 %0\n"::"r"(start):"memory"); sync_is(); } @@ -53,10 +69,7 @@ void cache_wbinv_range(unsigned long start, unsigned long end) asm volatile("dcache.cval1 %0\n"::"r"(i):"memory"); sync_is(); - i = start & ~(L1_CACHE_BYTES - 1); - for (; i < end; i += L1_CACHE_BYTES) - asm volatile("icache.iva %0\n"::"r"(i):"memory"); - sync_is(); + icache_inv_range(start, end); } EXPORT_SYMBOL(cache_wbinv_range); -- cgit v1.2.3-59-g8ed1b From a1176734132c630b50908c36563e05fb3599682c Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Sat, 25 Jan 2020 00:37:09 +0800 Subject: csky: Remove unnecessary flush_icache_* implementation The abiv2 CPUs are all PIPT cache, so there is no need to implement flush_icache_page function. The function flush_icache_user_range hasn't been used, so just remove it. The function flush_cache_range is not necessary for PIPT cache when tlb mapping changed. Signed-off-by: Guo Ren --- arch/csky/abiv1/inc/abi/cacheflush.h | 3 --- arch/csky/abiv2/cacheflush.c | 23 ----------------------- arch/csky/abiv2/inc/abi/cacheflush.h | 13 ++----------- 3 files changed, 2 insertions(+), 37 deletions(-) diff --git a/arch/csky/abiv1/inc/abi/cacheflush.h b/arch/csky/abiv1/inc/abi/cacheflush.h index 79ef9e8c1afd..a73702704f38 100644 --- a/arch/csky/abiv1/inc/abi/cacheflush.h +++ b/arch/csky/abiv1/inc/abi/cacheflush.h @@ -49,9 +49,6 @@ extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, u #define flush_icache_page(vma, page) do {} while (0); #define flush_icache_range(start, end) cache_wbinv_range(start, end) -#define flush_icache_user_range(vma,page,addr,len) \ - flush_dcache_page(page) - #define copy_from_user_page(vma, page, vaddr, dst, src, len) \ do { \ memcpy(dst, src, len); \ diff --git a/arch/csky/abiv2/cacheflush.c b/arch/csky/abiv2/cacheflush.c index 5bb887b275e1..f64b415f6fde 100644 --- a/arch/csky/abiv2/cacheflush.c +++ b/arch/csky/abiv2/cacheflush.c @@ -6,29 +6,6 @@ #include #include -void flush_icache_page(struct vm_area_struct *vma, struct page *page) -{ - unsigned long start; - - start = (unsigned long) kmap_atomic(page); - - cache_wbinv_range(start, start + PAGE_SIZE); - - kunmap_atomic((void *)start); -} - -void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, - unsigned long vaddr, int len) -{ - unsigned long kaddr; - - kaddr = (unsigned long) kmap_atomic(page) + (vaddr & ~PAGE_MASK); - - cache_wbinv_range(kaddr, kaddr + len); - - kunmap_atomic((void *)kaddr); -} - void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *pte) { diff --git a/arch/csky/abiv2/inc/abi/cacheflush.h b/arch/csky/abiv2/inc/abi/cacheflush.h index b8db5e0b2fe3..62a9031fffd8 100644 --- a/arch/csky/abiv2/inc/abi/cacheflush.h +++ b/arch/csky/abiv2/inc/abi/cacheflush.h @@ -13,25 +13,16 @@ #define flush_cache_all() do { } while (0) #define flush_cache_mm(mm) do { } while (0) #define flush_cache_dup_mm(mm) do { } while (0) - -#define flush_cache_range(vma, start, end) \ - do { \ - if (vma->vm_flags & VM_EXEC) \ - icache_inv_all(); \ - } while (0) - +#define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 #define flush_dcache_page(page) do { } while (0) #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) +#define flush_icache_page(vma, page) do { } while (0) #define flush_icache_range(start, end) cache_wbinv_range(start, end) -void flush_icache_page(struct vm_area_struct *vma, struct page *page); -void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, - unsigned long vaddr, int len); - #define flush_cache_vmap(start, end) do { } while (0) #define flush_cache_vunmap(start, end) do { } while (0) -- cgit v1.2.3-59-g8ed1b From d936a7e708dcf22344c4420e8b0e36f5d5f8c073 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Mon, 27 Jan 2020 01:20:36 +0800 Subject: csky: Enable defer flush_dcache_page for abiv2 cpus (807/810/860) Instead of flushing cache per update_mmu_cache() called, we use flush_dcache_page to reduce the frequency of flashing the cache. As abiv2 cpus are all PIPT for icache & dcache, we needn't handle dcache aliasing problem. But their icache can't snoop dcache, so we still need sync_icache_dcache in update_mmu_cache(). Signed-off-by: Guo Ren --- arch/csky/abiv2/cacheflush.c | 14 ++++++++------ arch/csky/abiv2/inc/abi/cacheflush.h | 12 ++++++++++-- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/arch/csky/abiv2/cacheflush.c b/arch/csky/abiv2/cacheflush.c index f64b415f6fde..ba469953a16e 100644 --- a/arch/csky/abiv2/cacheflush.c +++ b/arch/csky/abiv2/cacheflush.c @@ -9,20 +9,22 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *pte) { - unsigned long addr, pfn; + unsigned long addr; struct page *page; - pfn = pte_pfn(*pte); - if (unlikely(!pfn_valid(pfn))) + page = pfn_to_page(pte_pfn(*pte)); + if (page == ZERO_PAGE(0)) return; - page = pfn_to_page(pfn); - if (page == ZERO_PAGE(0)) + if (test_and_set_bit(PG_dcache_clean, &page->flags)) return; addr = (unsigned long) kmap_atomic(page); - cache_wbinv_range(addr, addr + PAGE_SIZE); + dcache_wb_range(addr, addr + PAGE_SIZE); + + if (vma->vm_flags & VM_EXEC) + icache_inv_range(addr, addr + PAGE_SIZE); kunmap_atomic((void *) addr); } diff --git a/arch/csky/abiv2/inc/abi/cacheflush.h b/arch/csky/abiv2/inc/abi/cacheflush.h index 62a9031fffd8..acd7c6c55d61 100644 --- a/arch/csky/abiv2/inc/abi/cacheflush.h +++ b/arch/csky/abiv2/inc/abi/cacheflush.h @@ -15,8 +15,16 @@ #define flush_cache_dup_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr, pfn) do { } while (0) -#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 -#define flush_dcache_page(page) do { } while (0) + +#define PG_dcache_clean PG_arch_1 + +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 +static inline void flush_dcache_page(struct page *page) +{ + if (test_bit(PG_dcache_clean, &page->flags)) + clear_bit(PG_dcache_clean, &page->flags); +} + #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) #define flush_icache_page(vma, page) do { } while (0) -- cgit v1.2.3-59-g8ed1b From cc1f6563a92ced0889775d0587316d725b6e1a68 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Mon, 27 Jan 2020 19:57:29 +0800 Subject: csky: Optimize abiv2 copy_to_user_page with VM_EXEC Only when vma is for VM_EXEC, we need sync dcache & icache. eg: - gdb ptrace modify user space instruction code area. Add VM_EXEC condition to reduce unnecessary cache flush. The abiv1 cpus' cache are all VIPT, so we still need to deal with dcache aliasing problem. But there is optimized way to use cache color, just like what's done in arch/csky/abiv1/inc/abi/page.h. Signed-off-by: Guo Ren --- arch/csky/abiv2/inc/abi/cacheflush.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/csky/abiv2/inc/abi/cacheflush.h b/arch/csky/abiv2/inc/abi/cacheflush.h index acd7c6c55d61..28b7c3233175 100644 --- a/arch/csky/abiv2/inc/abi/cacheflush.h +++ b/arch/csky/abiv2/inc/abi/cacheflush.h @@ -37,7 +37,9 @@ static inline void flush_dcache_page(struct page *page) #define copy_to_user_page(vma, page, vaddr, dst, src, len) \ do { \ memcpy(dst, src, len); \ - cache_wbinv_range((unsigned long)dst, (unsigned long)dst + len); \ + if (vma->vm_flags & VM_EXEC) \ + cache_wbinv_range((unsigned long)dst, \ + (unsigned long)dst + len); \ } while (0) #define copy_from_user_page(vma, page, vaddr, dst, src, len) \ memcpy(dst, src, len) -- cgit v1.2.3-59-g8ed1b From 997153b9a75c08d545ad45e6f8ceb432435d2425 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Fri, 31 Jan 2020 20:33:10 +0800 Subject: csky: Add flush_icache_mm to defer flush icache all Some CPUs don't support icache.va instruction to maintain the whole smp cores' icache. Using icache.all + IPI casue a lot on performace and using defer mechanism could reduce the number of calling icache _flush_all functions. Signed-off-by: Guo Ren --- arch/csky/abiv1/inc/abi/cacheflush.h | 2 ++ arch/csky/abiv2/cacheflush.c | 55 ++++++++++++++++++++++++++++++++++++ arch/csky/abiv2/inc/abi/cacheflush.h | 14 +++++++-- arch/csky/include/asm/cacheflush.h | 1 + arch/csky/include/asm/mmu.h | 1 + arch/csky/include/asm/mmu_context.h | 2 ++ arch/csky/mm/syscache.c | 13 ++++----- 7 files changed, 77 insertions(+), 11 deletions(-) diff --git a/arch/csky/abiv1/inc/abi/cacheflush.h b/arch/csky/abiv1/inc/abi/cacheflush.h index a73702704f38..d3e04208d53c 100644 --- a/arch/csky/abiv1/inc/abi/cacheflush.h +++ b/arch/csky/abiv1/inc/abi/cacheflush.h @@ -48,6 +48,8 @@ extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, u #define flush_icache_page(vma, page) do {} while (0); #define flush_icache_range(start, end) cache_wbinv_range(start, end) +#define flush_icache_mm_range(mm, start, end) cache_wbinv_range(start, end) +#define flush_icache_deferred(mm) do {} while (0); #define copy_from_user_page(vma, page, vaddr, dst, src, len) \ do { \ diff --git a/arch/csky/abiv2/cacheflush.c b/arch/csky/abiv2/cacheflush.c index ba469953a16e..790f1ebfba44 100644 --- a/arch/csky/abiv2/cacheflush.c +++ b/arch/csky/abiv2/cacheflush.c @@ -28,3 +28,58 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, kunmap_atomic((void *) addr); } + +void flush_icache_deferred(struct mm_struct *mm) +{ + unsigned int cpu = smp_processor_id(); + cpumask_t *mask = &mm->context.icache_stale_mask; + + if (cpumask_test_cpu(cpu, mask)) { + cpumask_clear_cpu(cpu, mask); + /* + * Ensure the remote hart's writes are visible to this hart. + * This pairs with a barrier in flush_icache_mm. + */ + smp_mb(); + local_icache_inv_all(NULL); + } +} + +void flush_icache_mm_range(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + unsigned int cpu; + cpumask_t others, *mask; + + preempt_disable(); + +#ifdef CONFIG_CPU_HAS_ICACHE_INS + if (mm == current->mm) { + icache_inv_range(start, end); + preempt_enable(); + return; + } +#endif + + /* Mark every hart's icache as needing a flush for this MM. */ + mask = &mm->context.icache_stale_mask; + cpumask_setall(mask); + + /* Flush this hart's I$ now, and mark it as flushed. */ + cpu = smp_processor_id(); + cpumask_clear_cpu(cpu, mask); + local_icache_inv_all(NULL); + + /* + * Flush the I$ of other harts concurrently executing, and mark them as + * flushed. + */ + cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu)); + + if (mm != current->active_mm || !cpumask_empty(&others)) { + on_each_cpu_mask(&others, local_icache_inv_all, NULL, 1); + cpumask_clear(mask); + } + + preempt_enable(); +} diff --git a/arch/csky/abiv2/inc/abi/cacheflush.h b/arch/csky/abiv2/inc/abi/cacheflush.h index 28b7c3233175..a565e00c3f70 100644 --- a/arch/csky/abiv2/inc/abi/cacheflush.h +++ b/arch/csky/abiv2/inc/abi/cacheflush.h @@ -31,15 +31,23 @@ static inline void flush_dcache_page(struct page *page) #define flush_icache_range(start, end) cache_wbinv_range(start, end) +void flush_icache_mm_range(struct mm_struct *mm, + unsigned long start, unsigned long end); +void flush_icache_deferred(struct mm_struct *mm); + #define flush_cache_vmap(start, end) do { } while (0) #define flush_cache_vunmap(start, end) do { } while (0) #define copy_to_user_page(vma, page, vaddr, dst, src, len) \ do { \ memcpy(dst, src, len); \ - if (vma->vm_flags & VM_EXEC) \ - cache_wbinv_range((unsigned long)dst, \ - (unsigned long)dst + len); \ + if (vma->vm_flags & VM_EXEC) { \ + dcache_wb_range((unsigned long)dst, \ + (unsigned long)dst + len); \ + flush_icache_mm_range(current->mm, \ + (unsigned long)dst, \ + (unsigned long)dst + len); \ + } \ } while (0) #define copy_from_user_page(vma, page, vaddr, dst, src, len) \ memcpy(dst, src, len) diff --git a/arch/csky/include/asm/cacheflush.h b/arch/csky/include/asm/cacheflush.h index a96da67261ae..f0b8f25429a2 100644 --- a/arch/csky/include/asm/cacheflush.h +++ b/arch/csky/include/asm/cacheflush.h @@ -4,6 +4,7 @@ #ifndef __ASM_CSKY_CACHEFLUSH_H #define __ASM_CSKY_CACHEFLUSH_H +#include #include #endif /* __ASM_CSKY_CACHEFLUSH_H */ diff --git a/arch/csky/include/asm/mmu.h b/arch/csky/include/asm/mmu.h index b382a14ea4ec..26fbb1d15df0 100644 --- a/arch/csky/include/asm/mmu.h +++ b/arch/csky/include/asm/mmu.h @@ -7,6 +7,7 @@ typedef struct { atomic64_t asid; void *vdso; + cpumask_t icache_stale_mask; } mm_context_t; #endif /* __ASM_CSKY_MMU_H */ diff --git a/arch/csky/include/asm/mmu_context.h b/arch/csky/include/asm/mmu_context.h index 0285b0ad18b6..abdf1f1cb6ec 100644 --- a/arch/csky/include/asm/mmu_context.h +++ b/arch/csky/include/asm/mmu_context.h @@ -43,5 +43,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, TLBMISS_HANDLER_SETUP_PGD(next->pgd); write_mmu_entryhi(next->context.asid.counter); + + flush_icache_deferred(next); } #endif /* __ASM_CSKY_MMU_CONTEXT_H */ diff --git a/arch/csky/mm/syscache.c b/arch/csky/mm/syscache.c index c4645e4e97f4..ffade2f9a4c8 100644 --- a/arch/csky/mm/syscache.c +++ b/arch/csky/mm/syscache.c @@ -3,7 +3,7 @@ #include #include -#include +#include #include SYSCALL_DEFINE3(cacheflush, @@ -13,17 +13,14 @@ SYSCALL_DEFINE3(cacheflush, { switch (cache) { case ICACHE: - icache_inv_range((unsigned long)addr, - (unsigned long)addr + bytes); - break; + case BCACHE: + flush_icache_mm_range(current->mm, + (unsigned long)addr, + (unsigned long)addr + bytes); case DCACHE: dcache_wb_range((unsigned long)addr, (unsigned long)addr + bytes); break; - case BCACHE: - cache_wbinv_range((unsigned long)addr, - (unsigned long)addr + bytes); - break; default: return -EINVAL; } -- cgit v1.2.3-59-g8ed1b From 359ae00d12589c31cf103894d0f32588d523ca83 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Sun, 2 Feb 2020 09:58:42 +0800 Subject: csky: Fixup ftrace modify panic During ftrace init, linux will replace all function prologues (call_mcout) with nops, but it need flush_dcache and invalidate_icache to make it work. So flush_cache functions couldn't be nested called by ftrace framework. Signed-off-by: Guo Ren --- arch/csky/mm/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/csky/mm/Makefile b/arch/csky/mm/Makefile index 2c51918eb4fc..6e7696e55f71 100644 --- a/arch/csky/mm/Makefile +++ b/arch/csky/mm/Makefile @@ -1,8 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only ifeq ($(CONFIG_CPU_HAS_CACHEV2),y) obj-y += cachev2.o +CFLAGS_REMOVE_cachev2.o = $(CC_FLAGS_FTRACE) else obj-y += cachev1.o +CFLAGS_REMOVE_cachev1.o = $(CC_FLAGS_FTRACE) endif obj-y += dma-mapping.o -- cgit v1.2.3-59-g8ed1b From 9025fd48a8aeddade845afa353d4bbab7f19dbf2 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Sun, 2 Feb 2020 10:58:38 +0800 Subject: csky: Remove unused cache implementation Only for coding convention, these codes are unnecessary for abiv2. Signed-off-by: Guo Ren --- arch/csky/mm/cachev2.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/arch/csky/mm/cachev2.c b/arch/csky/mm/cachev2.c index 909fdd0f5995..bc419f8039d3 100644 --- a/arch/csky/mm/cachev2.c +++ b/arch/csky/mm/cachev2.c @@ -52,23 +52,9 @@ void dcache_wb_range(unsigned long start, unsigned long end) sync_is(); } -void dcache_inv_range(unsigned long start, unsigned long end) -{ - unsigned long i = start & ~(L1_CACHE_BYTES - 1); - - for (; i < end; i += L1_CACHE_BYTES) - asm volatile("dcache.civa %0\n"::"r"(i):"memory"); - sync_is(); -} - void cache_wbinv_range(unsigned long start, unsigned long end) { - unsigned long i = start & ~(L1_CACHE_BYTES - 1); - - for (; i < end; i += L1_CACHE_BYTES) - asm volatile("dcache.cval1 %0\n"::"r"(i):"memory"); - sync_is(); - + dcache_wb_range(start, end); icache_inv_range(start, end); } EXPORT_SYMBOL(cache_wbinv_range); -- cgit v1.2.3-59-g8ed1b From 2305f60b76110cb3e8658a4ae85d1f7eb0c66a5b Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Sun, 2 Feb 2020 14:11:11 +0800 Subject: csky: Fixup compile warning for three unimplemented syscalls Implement fstat64, fstatat64, clone3 syscalls to fixup checksyscalls.sh compile warnings. Signed-off-by: Guo Ren --- arch/csky/include/uapi/asm/unistd.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/csky/include/uapi/asm/unistd.h b/arch/csky/include/uapi/asm/unistd.h index 211c983c7282..ba4018929733 100644 --- a/arch/csky/include/uapi/asm/unistd.h +++ b/arch/csky/include/uapi/asm/unistd.h @@ -1,7 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. +#define __ARCH_WANT_STAT64 +#define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 #define __ARCH_WANT_SET_GET_RLIMIT #define __ARCH_WANT_TIME32_SYSCALLS #include -- cgit v1.2.3-59-g8ed1b From bebd26ab623616728d6e72b5c74a47bfff5287d8 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 31 Jan 2020 17:52:30 -0800 Subject: arch/csky: fix some Kconfig typos Fix wording in help text for the CPU_HAS_LDSTEX symbol. Signed-off-by: Randy Dunlap Signed-off-by: Guo Ren Signed-off-by: Guo Ren --- arch/csky/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 111474b50f69..f1e73543feec 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -76,7 +76,7 @@ config CPU_HAS_TLBI config CPU_HAS_LDSTEX bool help - For SMP, CPU needs "ldex&stex" instrcutions to atomic operations. + For SMP, CPU needs "ldex&stex" instructions for atomic operations. config CPU_NEED_TLBSYNC bool -- cgit v1.2.3-59-g8ed1b From 4ec575b78521b4573e85aec451930cb040582455 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 30 Jan 2020 20:22:40 +0100 Subject: csky: Cleanup old Kconfig options CONFIG_CLKSRC_OF is gone since commit bb0eb050a577 ("clocksource/drivers: Rename CLKSRC_OF to TIMER_OF"). The platform already selects TIMER_OF. CONFIG_HAVE_DMA_API_DEBUG is gone since commit 6e88628d03dd ("dma-debug: remove CONFIG_HAVE_DMA_API_DEBUG"). CONFIG_DEFAULT_DEADLINE is gone since commit f382fb0bcef4 ("block: remove legacy IO schedulers"). Signed-off-by: Krzysztof Kozlowski Signed-off-by: Guo Ren --- arch/csky/Kconfig | 2 -- arch/csky/configs/defconfig | 1 - 2 files changed, 3 deletions(-) diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index f1e73543feec..bf246b036dee 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -9,7 +9,6 @@ config CSKY select ARCH_USE_QUEUED_RWLOCKS if NR_CPUS>2 select COMMON_CLK select CLKSRC_MMIO - select CLKSRC_OF select CSKY_MPINTC if CPU_CK860 select CSKY_MP_TIMER if CPU_CK860 select CSKY_APB_INTC @@ -47,7 +46,6 @@ config CSKY select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP - select HAVE_DMA_API_DEBUG select HAVE_DMA_CONTIGUOUS select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS diff --git a/arch/csky/configs/defconfig b/arch/csky/configs/defconfig index 7ef42895dfb0..3b540539dbe6 100644 --- a/arch/csky/configs/defconfig +++ b/arch/csky/configs/defconfig @@ -10,7 +10,6 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -CONFIG_DEFAULT_DEADLINE=y CONFIG_CPU_CK807=y CONFIG_CPU_HAS_FPU=y CONFIG_NET=y -- cgit v1.2.3-59-g8ed1b From d46869aaab7981f9153d3271228005a52ef18591 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Tue, 8 Oct 2019 14:25:13 +0800 Subject: csky: Add setup_initrd check code We should give some necessary check for initrd just like other architectures and it seems that setup_initrd() could be a common code for all architectures. Signed-off-by: Guo Ren --- arch/csky/kernel/setup.c | 3 --- arch/csky/mm/init.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/arch/csky/kernel/setup.c b/arch/csky/kernel/setup.c index 4fc7b16b77d2..3821e55742f4 100644 --- a/arch/csky/kernel/setup.c +++ b/arch/csky/kernel/setup.c @@ -47,9 +47,6 @@ static void __init csky_memblock_init(void) signed long size; memblock_reserve(__pa(_stext), _end - _stext); -#ifdef CONFIG_BLK_DEV_INITRD - memblock_reserve(__pa(initrd_start), initrd_end - initrd_start); -#endif early_init_fdt_reserve_self(); early_init_fdt_scan_reserved_mem(); diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c index dbb4b2dfe4b7..cb64d8647a78 100644 --- a/arch/csky/mm/init.c +++ b/arch/csky/mm/init.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -36,6 +37,45 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); +#ifdef CONFIG_BLK_DEV_INITRD +static void __init setup_initrd(void) +{ + unsigned long size; + + if (initrd_start >= initrd_end) { + pr_err("initrd not found or empty"); + goto disable; + } + + if (__pa(initrd_end) > PFN_PHYS(max_low_pfn)) { + pr_err("initrd extends beyond end of memory"); + goto disable; + } + + size = initrd_end - initrd_start; + + if (memblock_is_region_reserved(__pa(initrd_start), size)) { + pr_err("INITRD: 0x%08lx+0x%08lx overlaps in-use memory region", + __pa(initrd_start), size); + goto disable; + } + + memblock_reserve(__pa(initrd_start), size); + + pr_info("Initial ramdisk at: 0x%p (%lu bytes)\n", + (void *)(initrd_start), size); + + initrd_below_start_ok = 1; + + return; + +disable: + initrd_start = initrd_end = 0; + + pr_err(" - disabling initrd\n"); +} +#endif + void __init mem_init(void) { #ifdef CONFIG_HIGHMEM @@ -47,6 +87,10 @@ void __init mem_init(void) #endif high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); +#ifdef CONFIG_BLK_DEV_INITRD + setup_initrd(); +#endif + memblock_free_all(); #ifdef CONFIG_HIGHMEM -- cgit v1.2.3-59-g8ed1b From dc2efc0028dd5c4bf0f7324806c9a196958aaad3 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Sun, 2 Feb 2020 17:56:58 +0800 Subject: csky: Minimize defconfig to support buildroot config.fragment Some bsp (eg: buildroot) has defconfig.fragment design to add more configs into the defconfig in linux source code tree. For example, we could put different cpu configs into different defconfig.fragments, but they all use the same defconfig in Linux. Signed-off-by: Ma Jun Signed-off-by: Guo Ren --- arch/csky/configs/defconfig | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/csky/configs/defconfig b/arch/csky/configs/defconfig index 3b540539dbe6..af722e4dfb47 100644 --- a/arch/csky/configs/defconfig +++ b/arch/csky/configs/defconfig @@ -10,8 +10,6 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -CONFIG_CPU_CK807=y -CONFIG_CPU_HAS_FPU=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -26,10 +24,7 @@ CONFIG_SERIAL_NONSTANDARD=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y -CONFIG_TTY_PRINTK=y # CONFIG_VGA_CONSOLE is not set -CONFIG_CSKY_MPTIMER=y -CONFIG_GX6605S_TIMER=y CONFIG_PM_DEVFREQ=y CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y CONFIG_DEVFREQ_GOV_PERFORMANCE=y @@ -55,6 +50,4 @@ CONFIG_CRAMFS=y CONFIG_ROMFS_FS=y CONFIG_NFS_FS=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y -- cgit v1.2.3-59-g8ed1b From 5b49c82dadfe0f3741778f57385f964ec1514863 Mon Sep 17 00:00:00 2001 From: MaJun Date: Mon, 27 Jan 2020 10:56:21 +0800 Subject: csky: Add PCI support Add the pci related code for csky arch to support basic pci virtual function, such as qemu virt-pci-9pfs. Signed-off-by: MaJun Signed-off-by: Guo Ren --- arch/csky/Kconfig | 5 +++++ arch/csky/include/asm/Kbuild | 1 - arch/csky/include/asm/pci.h | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 arch/csky/include/asm/pci.h diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index bf246b036dee..72b2999a889a 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -58,6 +58,11 @@ config CSKY select TIMER_OF select USB_ARCH_HAS_EHCI select USB_ARCH_HAS_OHCI + select GENERIC_PCI_IOMAP + select HAVE_PCI + select PCI_DOMAINS_GENERIC if PCI + select PCI_SYSCALL if PCI + select PCI_MSI if PCI config CPU_HAS_CACHEV2 bool diff --git a/arch/csky/include/asm/Kbuild b/arch/csky/include/asm/Kbuild index bc15a26c782f..4130e3eaa766 100644 --- a/arch/csky/include/asm/Kbuild +++ b/arch/csky/include/asm/Kbuild @@ -28,7 +28,6 @@ generic-y += local64.h generic-y += mm-arch-hooks.h generic-y += mmiowb.h generic-y += module.h -generic-y += pci.h generic-y += percpu.h generic-y += preempt.h generic-y += qrwlock.h diff --git a/arch/csky/include/asm/pci.h b/arch/csky/include/asm/pci.h new file mode 100644 index 000000000000..ebc765b1f78b --- /dev/null +++ b/arch/csky/include/asm/pci.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_CSKY_PCI_H +#define __ASM_CSKY_PCI_H + +#include +#include +#include + +#include + +#define PCIBIOS_MIN_IO 0 +#define PCIBIOS_MIN_MEM 0 + +/* C-SKY shim does not initialize PCI bus */ +#define pcibios_assign_all_busses() 1 + +extern int isa_dma_bridge_buggy; + +#ifdef CONFIG_PCI +static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) +{ + /* no legacy IRQ on csky */ + return -ENODEV; +} + +static inline int pci_proc_domain(struct pci_bus *bus) +{ + /* always show the domain in /proc */ + return 1; +} +#endif /* CONFIG_PCI */ + +#endif /* __ASM_CSKY_PCI_H */ -- cgit v1.2.3-59-g8ed1b From 0b9f386c4be6493d282aab0af6f9b70c62142777 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Wed, 12 Feb 2020 10:24:52 +0800 Subject: csky: Implement copy_thread_tls This is required for clone3 which passes the TLS value through a struct rather than a register. Cc: Amanieu d'Antras Signed-off-by: Guo Ren --- arch/csky/Kconfig | 1 + arch/csky/kernel/process.c | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 72b2999a889a..047427f71d83 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -36,6 +36,7 @@ config CSKY select GX6605S_TIMER if CPU_CK610 select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_AUDITSYSCALL + select HAVE_COPY_THREAD_TLS select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c index 5349cd8c0f30..f7b231ca269a 100644 --- a/arch/csky/kernel/process.c +++ b/arch/csky/kernel/process.c @@ -40,10 +40,11 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return sw->r15; } -int copy_thread(unsigned long clone_flags, +int copy_thread_tls(unsigned long clone_flags, unsigned long usp, unsigned long kthread_arg, - struct task_struct *p) + struct task_struct *p, + unsigned long tls) { struct switch_stack *childstack; struct pt_regs *childregs = task_pt_regs(p); @@ -70,7 +71,7 @@ int copy_thread(unsigned long clone_flags, childregs->usp = usp; if (clone_flags & CLONE_SETTLS) task_thread_info(p)->tp_value = childregs->tls - = childregs->regs[0]; + = tls; childregs->a0 = 0; childstack->r15 = (unsigned long) ret_from_fork; -- cgit v1.2.3-59-g8ed1b From 4c5fd3b791a06021084b42d5610400f846d206b5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 19 Feb 2020 17:28:21 -0800 Subject: zonefs: fix documentation typos etc. Fix typos, spellos, etc. in zonefs.txt. Signed-off-by: Randy Dunlap Cc: Damien Le Moal Reviewed-by: Chaitanya Kulkarni Signed-off-by: Damien Le Moal --- Documentation/filesystems/zonefs.txt | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Documentation/filesystems/zonefs.txt b/Documentation/filesystems/zonefs.txt index 935bf22031ca..d54fa98ac158 100644 --- a/Documentation/filesystems/zonefs.txt +++ b/Documentation/filesystems/zonefs.txt @@ -134,7 +134,7 @@ Sequential zone files can only be written sequentially, starting from the file end, that is, write operations can only be append writes. Zonefs makes no attempt at accepting random writes and will fail any write request that has a start offset not corresponding to the end of the file, or to the end of the last -write issued and still in-flight (for asynchrnous I/O operations). +write issued and still in-flight (for asynchronous I/O operations). Since dirty page writeback by the page cache does not guarantee a sequential write pattern, zonefs prevents buffered writes and writeable shared mappings @@ -142,7 +142,7 @@ on sequential files. Only direct I/O writes are accepted for these files. zonefs relies on the sequential delivery of write I/O requests to the device implemented by the block layer elevator. An elevator implementing the sequential write feature for zoned block device (ELEVATOR_F_ZBD_SEQ_WRITE elevator feature) -must be used. This type of elevator (e.g. mq-deadline) is the set by default +must be used. This type of elevator (e.g. mq-deadline) is set by default for zoned block devices on device initialization. There are no restrictions on the type of I/O used for read operations in @@ -196,7 +196,7 @@ additional conditions that result in I/O errors. may still happen in the case of a partial failure of a very large direct I/O operation split into multiple BIOs/requests or asynchronous I/O operations. If one of the write request within the set of sequential write requests - issued to the device fails, all write requests after queued after it will + issued to the device fails, all write requests queued after it will become unaligned and fail. * Delayed write errors: similarly to regular block devices, if the device side @@ -207,7 +207,7 @@ additional conditions that result in I/O errors. causing all data to be dropped after the sector that caused the error. All I/O errors detected by zonefs are notified to the user with an error code -return for the system call that trigered or detected the error. The recovery +return for the system call that triggered or detected the error. The recovery actions taken by zonefs in response to I/O errors depend on the I/O type (read vs write) and on the reason for the error (bad sector, unaligned writes or zone condition change). @@ -222,7 +222,7 @@ condition change). * A zone condition change to read-only or offline also always triggers zonefs I/O error recovery. -Zonefs minimal I/O error recovery may change a file size and a file access +Zonefs minimal I/O error recovery may change a file size and file access permissions. * File size changes: @@ -237,7 +237,7 @@ permissions. A file size may also be reduced to reflect a delayed write error detected on fsync(): in this case, the amount of data effectively written in the zone may be less than originally indicated by the file inode size. After such I/O - error, zonefs always fixes a file inode size to reflect the amount of data + error, zonefs always fixes the file inode size to reflect the amount of data persistently stored in the file zone. * Access permission changes: @@ -281,11 +281,11 @@ Further notes: permissions to read-only applies to all files. The file system is remounted read-only. * Access permission and file size changes due to the device transitioning zones - to the offline condition are permanent. Remounting or reformating the device + to the offline condition are permanent. Remounting or reformatting the device with mkfs.zonefs (mkzonefs) will not change back offline zone files to a good state. * File access permission changes to read-only due to the device transitioning - zones to the read-only condition are permanent. Remounting or reformating + zones to the read-only condition are permanent. Remounting or reformatting the device will not re-enable file write access. * File access permission changes implied by the remount-ro, zone-ro and zone-offline mount options are temporary for zones in a good condition. @@ -301,13 +301,13 @@ Mount options zonefs define the "errors=" mount option to allow the user to specify zonefs behavior in response to I/O errors, inode size inconsistencies or zone -condition chages. The defined behaviors are as follow: +condition changes. The defined behaviors are as follow: * remount-ro (default) * zone-ro * zone-offline * repair -The I/O error actions defined for each behavior is detailed in the previous +The I/O error actions defined for each behavior are detailed in the previous section. Zonefs User Space Tools -- cgit v1.2.3-59-g8ed1b From 9951ebfcdf2b97dbb28a5d930458424341e61aa2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 21 Feb 2020 10:41:43 +0100 Subject: nl80211: fix potential leak in AP start If nl80211_parse_he_obss_pd() fails, we leak the previously allocated ACL memory. Free it in this case. Fixes: 796e90f42b7e ("cfg80211: add support for parsing OBBS_PD attributes") Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20200221104142.835aba4cdd14.I1923b55ba9989c57e13978f91f40bfdc45e60cbd@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index cedf17d4933f..46be40e19e7f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4800,8 +4800,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) err = nl80211_parse_he_obss_pd( info->attrs[NL80211_ATTR_HE_OBSS_PD], ¶ms.he_obss_pd); - if (err) - return err; + goto out; } nl80211_calculate_ap_params(¶ms); @@ -4823,6 +4822,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) } wdev_unlock(wdev); +out: kfree(params.acl); return err; -- cgit v1.2.3-59-g8ed1b From a7ee7d44b57c9ae174088e53a668852b7f4f452d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 21 Feb 2020 10:44:50 +0100 Subject: cfg80211: check reg_rule for NULL in handle_channel_custom() We may end up with a NULL reg_rule after the loop in handle_channel_custom() if the bandwidth didn't fit, check if this is the case and bail out if so. Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20200221104449.3b558a50201c.I4ad3725c4dacaefd2d18d3cc65ba6d18acd5dbfe@changeid Signed-off-by: Johannes Berg --- net/wireless/reg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index fff9a74891fc..1a8218f1bbe0 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2276,7 +2276,7 @@ static void handle_channel_custom(struct wiphy *wiphy, break; } - if (IS_ERR(reg_rule)) { + if (IS_ERR_OR_NULL(reg_rule)) { pr_debug("Disabling freq %d MHz as custom regd has no rule that fits it\n", chan->center_freq); if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) { -- cgit v1.2.3-59-g8ed1b From 0daa63ed4c6c4302790ce67b7a90c0997ceb7514 Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Fri, 21 Feb 2020 10:47:20 +0100 Subject: mac80211: Remove a redundant mutex unlock The below-mentioned commit changed the code to unlock *inside* the function, but previously the unlock was *outside*. It failed to remove the outer unlock, however, leading to double unlock. Fix this. Fixes: 33483a6b88e4 ("mac80211: fix missing unlock on error in ieee80211_mark_sta_auth()") Signed-off-by: Andrei Otcheretianski Link: https://lore.kernel.org/r/20200221104719.cce4741cf6eb.I671567b185c8a4c2409377e483fd149ce590f56d@changeid [rewrite commit message to better explain what happened] Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e041af2f021a..88d7a692a965 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2959,7 +2959,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, (auth_transaction == 2 && ifmgd->auth_data->expected_transaction == 2)) { if (!ieee80211_mark_sta_auth(sdata, bssid)) - goto out_err; + return; /* ignore frame -- wait for timeout */ } else if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE && auth_transaction == 2) { sdata_info(sdata, "SAE peer confirmed\n"); @@ -2967,10 +2967,6 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, } cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); - return; - out_err: - mutex_unlock(&sdata->local->sta_mtx); - /* ignore frame -- wait for timeout */ } #define case_WLAN(type) \ -- cgit v1.2.3-59-g8ed1b From 2546287c5fb363a0165933ae2181c92f03e701d0 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Fri, 21 Feb 2020 10:07:25 +0800 Subject: genirq/irqdomain: Make sure all irq domain flags are distinct This was noticed when printing debugfs for MSIs on my ARM64 server. The new dstate IRQD_MSI_NOMASK_QUIRK came out surprisingly while it should only be the x86 stuff for the time being... The new MSI quirk flag uses the same bit as IRQ_DOMAIN_NAME_ALLOCATED which is oddly defined as bit 6 for no good reason. Switch it to the non used bit 1. Fixes: 6f1a4891a592 ("x86/apic/msi: Plug non-maskable MSI affinity race") Signed-off-by: Zenghui Yu Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20200221020725.2038-1-yuzenghui@huawei.com --- include/linux/irqdomain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index b2d47571ab67..8d062e86d954 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -192,7 +192,7 @@ enum { IRQ_DOMAIN_FLAG_HIERARCHY = (1 << 0), /* Irq domain name was allocated in __irq_domain_add() */ - IRQ_DOMAIN_NAME_ALLOCATED = (1 << 6), + IRQ_DOMAIN_NAME_ALLOCATED = (1 << 1), /* Irq domain is an IPI domain with virq per cpu */ IRQ_DOMAIN_FLAG_IPI_PER_CPU = (1 << 2), -- cgit v1.2.3-59-g8ed1b From 3b7830904e17202524bad1974505a9bfc718d31f Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 20 Feb 2020 13:29:53 -0700 Subject: nvme-multipath: Fix memory leak with ana_log_buf kmemleak reports a memory leak with the ana_log_buf allocated by nvme_mpath_init(): unreferenced object 0xffff888120e94000 (size 8208): comm "nvme", pid 6884, jiffies 4295020435 (age 78786.312s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 ................ 01 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000e2360188>] kmalloc_order+0x97/0xc0 [<0000000079b18dd4>] kmalloc_order_trace+0x24/0x100 [<00000000f50c0406>] __kmalloc+0x24c/0x2d0 [<00000000f31a10b9>] nvme_mpath_init+0x23c/0x2b0 [<000000005802589e>] nvme_init_identify+0x75f/0x1600 [<0000000058ef911b>] nvme_loop_configure_admin_queue+0x26d/0x280 [<00000000673774b9>] nvme_loop_create_ctrl+0x2a7/0x710 [<00000000f1c7a233>] nvmf_dev_write+0xc66/0x10b9 [<000000004199f8d0>] __vfs_write+0x50/0xa0 [<0000000065466fef>] vfs_write+0xf3/0x280 [<00000000b0db9a8b>] ksys_write+0xc6/0x160 [<0000000082156b91>] __x64_sys_write+0x43/0x50 [<00000000c34fbb6d>] do_syscall_64+0x77/0x2f0 [<00000000bbc574c9>] entry_SYSCALL_64_after_hwframe+0x49/0xbe nvme_mpath_init() is called by nvme_init_identify() which is called in multiple places (nvme_reset_work(), nvme_passthru_end(), etc). This means nvme_mpath_init() may be called multiple times before nvme_mpath_uninit() (which is only called on nvme_free_ctrl()). When nvme_mpath_init() is called multiple times, it overwrites the ana_log_buf pointer with a new allocation, thus leaking the previous allocation. To fix this, free ana_log_buf before allocating a new one. Fixes: 0d0b660f214dc490 ("nvme: add ANA support") Cc: Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Logan Gunthorpe Signed-off-by: Keith Busch --- drivers/nvme/host/multipath.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 797c18337d96..a11900cf3a36 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -715,6 +715,7 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) } INIT_WORK(&ctrl->ana_work, nvme_ana_work); + kfree(ctrl->ana_log_buf); ctrl->ana_log_buf = kmalloc(ctrl->ana_log_size, GFP_KERNEL); if (!ctrl->ana_log_buf) { error = -ENOMEM; -- cgit v1.2.3-59-g8ed1b From 4e4694d8729f7cd6381f6691e8f83e378fce3160 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 21 Feb 2020 17:13:42 +0900 Subject: bootconfig: Prohibit re-defining value on same key Currently, bootconfig adds a new value on the existing key to the tail of an array. But this looks a bit confusing because an admin can easily rewrite the original value in the same config file. This rejects the following value re-definition. key = value1 ... key = value2 You should rewrite value1 to value2 in this case. Link: http://lkml.kernel.org/r/158227282199.12842.10110929876059658601.stgit@devnote2 Suggested-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu [ Fixed spelling of arraies to arrays ] Signed-off-by: Steven Rostedt (VMware) --- Documentation/admin-guide/bootconfig.rst | 11 ++++++++++- lib/bootconfig.c | 13 ++++++++----- tools/bootconfig/samples/bad-samekey.bconf | 6 ++++++ 3 files changed, 24 insertions(+), 6 deletions(-) create mode 100644 tools/bootconfig/samples/bad-samekey.bconf diff --git a/Documentation/admin-guide/bootconfig.rst b/Documentation/admin-guide/bootconfig.rst index dfeffa73dca3..57119fb69d36 100644 --- a/Documentation/admin-guide/bootconfig.rst +++ b/Documentation/admin-guide/bootconfig.rst @@ -62,7 +62,16 @@ Or more shorter, written as following:: In both styles, same key words are automatically merged when parsing it at boot time. So you can append similar trees or key-values. -Note that a sub-key and a value can not co-exist under a parent key. +Same-key Values +--------------- + +It is prohibited that two or more values or arrays share a same-key. +For example,:: + + foo = bar, baz + foo = qux # !ERROR! we can not re-define same key + +Also, a sub-key and a value can not co-exist under a parent key. For example, following config is NOT allowed.:: foo = value1 diff --git a/lib/bootconfig.c b/lib/bootconfig.c index 54ac623ca781..2ef304db31f2 100644 --- a/lib/bootconfig.c +++ b/lib/bootconfig.c @@ -581,7 +581,7 @@ static int __init __xbc_parse_keys(char *k) static int __init xbc_parse_kv(char **k, char *v) { struct xbc_node *prev_parent = last_parent; - struct xbc_node *node, *child; + struct xbc_node *child; char *next; int c, ret; @@ -590,15 +590,18 @@ static int __init xbc_parse_kv(char **k, char *v) return ret; child = xbc_node_get_child(last_parent); - if (child && xbc_node_is_key(child)) - return xbc_parse_error("Value is mixed with subkey", v); + if (child) { + if (xbc_node_is_key(child)) + return xbc_parse_error("Value is mixed with subkey", v); + else + return xbc_parse_error("Value is redefined", v); + } c = __xbc_parse_value(&v, &next); if (c < 0) return c; - node = xbc_add_sibling(v, XBC_VALUE); - if (!node) + if (!xbc_add_sibling(v, XBC_VALUE)) return -ENOMEM; if (c == ',') { /* Array */ diff --git a/tools/bootconfig/samples/bad-samekey.bconf b/tools/bootconfig/samples/bad-samekey.bconf new file mode 100644 index 000000000000..e8d983a4563c --- /dev/null +++ b/tools/bootconfig/samples/bad-samekey.bconf @@ -0,0 +1,6 @@ +# Same key value is not allowed +key { + foo = value + bar = value2 +} +key.foo = value -- cgit v1.2.3-59-g8ed1b From 5f811c57c99205e048926293bb812c750a6ea562 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 21 Feb 2020 17:13:52 +0900 Subject: bootconfig: Add append value operator support Add append value operator "+=" support to bootconfig syntax. With this operator, user can add new value to the key as an entry of array instead of overwriting. For example, foo = bar ... foo += baz Then the key "foo" has "bar" and "baz" values as an array. Link: http://lkml.kernel.org/r/158227283195.12842.8310503105963275584.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- Documentation/admin-guide/bootconfig.rst | 10 +++++++++- lib/bootconfig.c | 15 +++++++++++---- tools/bootconfig/test-bootconfig.sh | 16 ++++++++++++++-- 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/Documentation/admin-guide/bootconfig.rst b/Documentation/admin-guide/bootconfig.rst index 57119fb69d36..cf2edcd09183 100644 --- a/Documentation/admin-guide/bootconfig.rst +++ b/Documentation/admin-guide/bootconfig.rst @@ -71,7 +71,15 @@ For example,:: foo = bar, baz foo = qux # !ERROR! we can not re-define same key -Also, a sub-key and a value can not co-exist under a parent key. +If you want to append the value to existing key as an array member, +you can use ``+=`` operator. For example:: + + foo = bar, baz + foo += qux + +In this case, the key ``foo`` has ``bar``, ``baz`` and ``qux``. + +However, a sub-key and a value can not co-exist under a parent key. For example, following config is NOT allowed.:: foo = value1 diff --git a/lib/bootconfig.c b/lib/bootconfig.c index 2ef304db31f2..ec3ce7fd299f 100644 --- a/lib/bootconfig.c +++ b/lib/bootconfig.c @@ -578,7 +578,7 @@ static int __init __xbc_parse_keys(char *k) return __xbc_add_key(k); } -static int __init xbc_parse_kv(char **k, char *v) +static int __init xbc_parse_kv(char **k, char *v, int op) { struct xbc_node *prev_parent = last_parent; struct xbc_node *child; @@ -593,7 +593,7 @@ static int __init xbc_parse_kv(char **k, char *v) if (child) { if (xbc_node_is_key(child)) return xbc_parse_error("Value is mixed with subkey", v); - else + else if (op == '=') return xbc_parse_error("Value is redefined", v); } @@ -774,7 +774,7 @@ int __init xbc_init(char *buf) p = buf; do { - q = strpbrk(p, "{}=;\n#"); + q = strpbrk(p, "{}=+;\n#"); if (!q) { p = skip_spaces(p); if (*p != '\0') @@ -785,8 +785,15 @@ int __init xbc_init(char *buf) c = *q; *q++ = '\0'; switch (c) { + case '+': + if (*q++ != '=') { + ret = xbc_parse_error("Wrong '+' operator", + q - 2); + break; + } + /* Fall through */ case '=': - ret = xbc_parse_kv(&p, q); + ret = xbc_parse_kv(&p, q, c); break; case '{': ret = xbc_open_brace(&p, q); diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh index adafb7c50940..1411f4c3454f 100755 --- a/tools/bootconfig/test-bootconfig.sh +++ b/tools/bootconfig/test-bootconfig.sh @@ -9,7 +9,7 @@ TEMPCONF=`mktemp temp-XXXX.bconf` NG=0 cleanup() { - rm -f $INITRD $TEMPCONF + rm -f $INITRD $TEMPCONF $OUTFILE exit $NG } @@ -71,7 +71,6 @@ printf " \0\0\0 \0\0\0" >> $INITRD $BOOTCONF -a $TEMPCONF $INITRD > $OUTFILE 2>&1 xfail grep -i "failed" $OUTFILE xfail grep -i "error" $OUTFILE -rm $OUTFILE echo "Max node number check" @@ -96,6 +95,19 @@ truncate -s 32764 $TEMPCONF echo "\"" >> $TEMPCONF # add 2 bytes + terminal ('\"\n\0') xpass $BOOTCONF -a $TEMPCONF $INITRD +echo "Adding same-key values" +cat > $TEMPCONF << EOF +key = bar, baz +key += qux +EOF +echo > $INITRD + +xpass $BOOTCONF -a $TEMPCONF $INITRD +$BOOTCONF $INITRD > $OUTFILE +xpass grep -q "bar" $OUTFILE +xpass grep -q "baz" $OUTFILE +xpass grep -q "qux" $OUTFILE + echo "=== expected failure cases ===" for i in samples/bad-* ; do xfail $BOOTCONF -a $i $INITRD -- cgit v1.2.3-59-g8ed1b From a5ae50dea9111db63d30d700766dd5509602f7ad Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 20 Feb 2020 13:29:49 +0000 Subject: Btrfs: fix deadlock during fast fsync when logging prealloc extents beyond eof While logging the prealloc extents of an inode during a fast fsync we call btrfs_truncate_inode_items(), through btrfs_log_prealloc_extents(), while holding a read lock on a leaf of the inode's root (not the log root, the fs/subvol root), and then that function locks the file range in the inode's iotree. This can lead to a deadlock when: * the fsync is ranged * the file has prealloc extents beyond eof * writeback for a range different from the fsync range starts during the fsync * the size of the file is not sector size aligned Because when finishing an ordered extent we lock first a file range and then try to COW the fs/subvol tree to insert an extent item. The following diagram shows how the deadlock can happen. CPU 1 CPU 2 btrfs_sync_file() --> for range [0, 1MiB) --> inode has a size of 1MiB and has 1 prealloc extent beyond the i_size, starting at offset 4MiB flushes all delalloc for the range [0MiB, 1MiB) and waits for the respective ordered extents to complete --> before task at CPU 1 locks the inode, a write into file range [1MiB, 2MiB + 1KiB) is made --> i_size is updated to 2MiB + 1KiB --> writeback is started for that range, [1MiB, 2MiB + 4KiB) --> end offset rounded up to be sector size aligned btrfs_log_dentry_safe() btrfs_log_inode_parent() btrfs_log_inode() btrfs_log_changed_extents() btrfs_log_prealloc_extents() --> does a search on the inode's root --> holds a read lock on leaf X btrfs_finish_ordered_io() --> locks range [1MiB, 2MiB + 4KiB) --> end offset rounded up to be sector size aligned --> tries to cow leaf X, through insert_reserved_file_extent() --> already locked by the task at CPU 1 btrfs_truncate_inode_items() --> gets an i_size of 2MiB + 1KiB, which is not sector size aligned --> tries to lock file range [2MiB, (u64)-1) --> the start range is rounded down from 2MiB + 1K to 2MiB to be sector size aligned --> but the subrange [2MiB, 2MiB + 4KiB) is already locked by task at CPU 2 which is waiting to get a write lock on leaf X for which we are holding a read lock *** deadlock *** This results in a stack trace like the following, triggered by test case generic/561 from fstests: [ 2779.973608] INFO: task kworker/u8:6:247 blocked for more than 120 seconds. [ 2779.979536] Not tainted 5.6.0-rc2-btrfs-next-53 #1 [ 2779.984503] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 2779.990136] kworker/u8:6 D 0 247 2 0x80004000 [ 2779.990457] Workqueue: btrfs-endio-write btrfs_work_helper [btrfs] [ 2779.990466] Call Trace: [ 2779.990491] ? __schedule+0x384/0xa30 [ 2779.990521] schedule+0x33/0xe0 [ 2779.990616] btrfs_tree_read_lock+0x19e/0x2e0 [btrfs] [ 2779.990632] ? remove_wait_queue+0x60/0x60 [ 2779.990730] btrfs_read_lock_root_node+0x2f/0x40 [btrfs] [ 2779.990782] btrfs_search_slot+0x510/0x1000 [btrfs] [ 2779.990869] btrfs_lookup_file_extent+0x4a/0x70 [btrfs] [ 2779.990944] __btrfs_drop_extents+0x161/0x1060 [btrfs] [ 2779.990987] ? mark_held_locks+0x6d/0xc0 [ 2779.990994] ? __slab_alloc.isra.49+0x99/0x100 [ 2779.991060] ? insert_reserved_file_extent.constprop.19+0x64/0x300 [btrfs] [ 2779.991145] insert_reserved_file_extent.constprop.19+0x97/0x300 [btrfs] [ 2779.991222] ? start_transaction+0xdd/0x5c0 [btrfs] [ 2779.991291] btrfs_finish_ordered_io+0x4f4/0x840 [btrfs] [ 2779.991405] btrfs_work_helper+0xaa/0x720 [btrfs] [ 2779.991432] process_one_work+0x26d/0x6a0 [ 2779.991460] worker_thread+0x4f/0x3e0 [ 2779.991481] ? process_one_work+0x6a0/0x6a0 [ 2779.991489] kthread+0x103/0x140 [ 2779.991499] ? kthread_create_worker_on_cpu+0x70/0x70 [ 2779.991515] ret_from_fork+0x3a/0x50 (...) [ 2780.026211] INFO: task fsstress:17375 blocked for more than 120 seconds. [ 2780.027480] Not tainted 5.6.0-rc2-btrfs-next-53 #1 [ 2780.028482] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 2780.030035] fsstress D 0 17375 17373 0x00004000 [ 2780.030038] Call Trace: [ 2780.030044] ? __schedule+0x384/0xa30 [ 2780.030052] schedule+0x33/0xe0 [ 2780.030075] lock_extent_bits+0x20c/0x320 [btrfs] [ 2780.030094] ? btrfs_truncate_inode_items+0xf4/0x1150 [btrfs] [ 2780.030098] ? rcu_read_lock_sched_held+0x59/0xa0 [ 2780.030102] ? remove_wait_queue+0x60/0x60 [ 2780.030122] btrfs_truncate_inode_items+0x133/0x1150 [btrfs] [ 2780.030151] ? btrfs_set_path_blocking+0xb2/0x160 [btrfs] [ 2780.030165] ? btrfs_search_slot+0x379/0x1000 [btrfs] [ 2780.030195] btrfs_log_changed_extents.isra.8+0x841/0x93e [btrfs] [ 2780.030202] ? do_raw_spin_unlock+0x49/0xc0 [ 2780.030215] ? btrfs_get_num_csums+0x10/0x10 [btrfs] [ 2780.030239] btrfs_log_inode+0xf83/0x1124 [btrfs] [ 2780.030251] ? __mutex_unlock_slowpath+0x45/0x2a0 [ 2780.030275] btrfs_log_inode_parent+0x2a0/0xe40 [btrfs] [ 2780.030282] ? dget_parent+0xa1/0x370 [ 2780.030309] btrfs_log_dentry_safe+0x4a/0x70 [btrfs] [ 2780.030329] btrfs_sync_file+0x3f3/0x490 [btrfs] [ 2780.030339] do_fsync+0x38/0x60 [ 2780.030343] __x64_sys_fdatasync+0x13/0x20 [ 2780.030345] do_syscall_64+0x5c/0x280 [ 2780.030348] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 2780.030356] RIP: 0033:0x7f2d80f6d5f0 [ 2780.030361] Code: Bad RIP value. [ 2780.030362] RSP: 002b:00007ffdba3c8548 EFLAGS: 00000246 ORIG_RAX: 000000000000004b [ 2780.030364] RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f2d80f6d5f0 [ 2780.030365] RDX: 00007ffdba3c84b0 RSI: 00007ffdba3c84b0 RDI: 0000000000000003 [ 2780.030367] RBP: 000000000000004a R08: 0000000000000001 R09: 00007ffdba3c855c [ 2780.030368] R10: 0000000000000078 R11: 0000000000000246 R12: 00000000000001f4 [ 2780.030369] R13: 0000000051eb851f R14: 00007ffdba3c85f0 R15: 0000557a49220d90 So fix this by making btrfs_truncate_inode_items() not lock the range in the inode's iotree when the target root is a log root, since it's not needed to lock the range for log roots as the protection from the inode's lock and log_mutex are all that's needed. Fixes: 28553fa992cb28 ("Btrfs: fix race between shrinking truncate and fiemap") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4f47ba652b31..1ccb3f8d528d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4103,8 +4103,9 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, return -ENOMEM; path->reada = READA_BACK; - lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, (u64)-1, - &cached_state); + if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) + lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, (u64)-1, + &cached_state); /* * We want to drop from the next block forward in case this new size is @@ -4368,11 +4369,10 @@ out: if (!ret && last_size > new_size) last_size = new_size; btrfs_ordered_update_i_size(inode, last_size, NULL); + unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, + (u64)-1, &cached_state); } - unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, (u64)-1, - &cached_state); - btrfs_free_path(path); return ret; } -- cgit v1.2.3-59-g8ed1b From 7143b5ac5750f404ff3a594b34fdf3fc2f99f828 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 21 Feb 2020 16:42:16 +0100 Subject: io_uring: prevent sq_thread from spinning when it should stop This patch drops 'cur_mm' before calling cond_resched(), to prevent the sq_thread from spinning even when the user process is finished. Before this patch, if the user process ended without closing the io_uring fd, the sq_thread continues to spin until the 'sq_thread_idle' timeout ends. In the worst case where the 'sq_thread_idle' parameter is bigger than INT_MAX, the sq_thread will spin forever. Fixes: 6c271ce2f1d5 ("io_uring: add submission polling") Signed-off-by: Stefano Garzarella Signed-off-by: Jens Axboe --- fs/io_uring.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 6e249aa97ba3..b43467b3a8dc 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5142,6 +5142,18 @@ static int io_sq_thread(void *data) * to enter the kernel to reap and flush events. */ if (!to_submit || ret == -EBUSY) { + /* + * Drop cur_mm before scheduling, we can't hold it for + * long periods (or over schedule()). Do this before + * adding ourselves to the waitqueue, as the unuse/drop + * may sleep. + */ + if (cur_mm) { + unuse_mm(cur_mm); + mmput(cur_mm); + cur_mm = NULL; + } + /* * We're polling. If we're within the defined idle * period, then let us spin without work before going @@ -5156,18 +5168,6 @@ static int io_sq_thread(void *data) continue; } - /* - * Drop cur_mm before scheduling, we can't hold it for - * long periods (or over schedule()). Do this before - * adding ourselves to the waitqueue, as the unuse/drop - * may sleep. - */ - if (cur_mm) { - unuse_mm(cur_mm); - mmput(cur_mm); - cur_mm = NULL; - } - prepare_to_wait(&ctx->sqo_wait, &wait, TASK_INTERRUPTIBLE); -- cgit v1.2.3-59-g8ed1b From 7455a8327674e1a7c9a1f5dd1b0743ab6713f6d1 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Fri, 14 Feb 2020 10:32:38 +0800 Subject: KVM: x86: don't notify userspace IOAPIC on edge-triggered interrupt EOI Commit 13db77347db1 ("KVM: x86: don't notify userspace IOAPIC on edge EOI") said, edge-triggered interrupts don't set a bit in TMR, which means that IOAPIC isn't notified on EOI. And var level indicates level-triggered interrupt. But commit 3159d36ad799 ("KVM: x86: use generic function for MSI parsing") replace var level with irq.level by mistake. Fix it by changing irq.level to irq.trig_mode. Cc: stable@vger.kernel.org Fixes: 3159d36ad799 ("KVM: x86: use generic function for MSI parsing") Signed-off-by: Miaohe Lin Signed-off-by: Paolo Bonzini --- arch/x86/kvm/irq_comm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 79afa0bb5f41..c47d2acec529 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -417,7 +417,7 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, kvm_set_msi_irq(vcpu->kvm, entry, &irq); - if (irq.level && + if (irq.trig_mode && kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT, irq.dest_id, irq.dest_mode)) __set_bit(irq.vector, ioapic_handled_vectors); -- cgit v1.2.3-59-g8ed1b From c9dfd3fb08352d439f0399b6fabe697681d2638c Mon Sep 17 00:00:00 2001 From: wanpeng li Date: Mon, 17 Feb 2020 18:37:43 +0800 Subject: KVM: nVMX: Hold KVM's srcu lock when syncing vmcs12->shadow For the duration of mapping eVMCS, it derefences ->memslots without holding ->srcu or ->slots_lock when accessing hv assist page. This patch fixes it by moving nested_sync_vmcs12_to_shadow to prepare_guest_switch, where the SRCU is already taken. It can be reproduced by running kvm's evmcs_test selftest. ============================= warning: suspicious rcu usage 5.6.0-rc1+ #53 tainted: g w ioe ----------------------------- ./include/linux/kvm_host.h:623 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by evmcs_test/8507: #0: ffff9ddd156d00d0 (&vcpu->mutex){+.+.}, at: kvm_vcpu_ioctl+0x85/0x680 [kvm] stack backtrace: cpu: 6 pid: 8507 comm: evmcs_test tainted: g w ioe 5.6.0-rc1+ #53 hardware name: dell inc. optiplex 7040/0jctf8, bios 1.4.9 09/12/2016 call trace: dump_stack+0x68/0x9b kvm_read_guest_cached+0x11d/0x150 [kvm] kvm_hv_get_assist_page+0x33/0x40 [kvm] nested_enlightened_vmentry+0x2c/0x60 [kvm_intel] nested_vmx_handle_enlightened_vmptrld.part.52+0x32/0x1c0 [kvm_intel] nested_sync_vmcs12_to_shadow+0x439/0x680 [kvm_intel] vmx_vcpu_run+0x67a/0xe60 [kvm_intel] vcpu_enter_guest+0x35e/0x1bc0 [kvm] kvm_arch_vcpu_ioctl_run+0x40b/0x670 [kvm] kvm_vcpu_ioctl+0x370/0x680 [kvm] ksys_ioctl+0x235/0x850 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x77/0x780 entry_syscall_64_after_hwframe+0x49/0xbe Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 3be25ecae145..dafe4df893c8 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1175,6 +1175,10 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) vmx->guest_msrs[i].mask); } + + if (vmx->nested.need_vmcs12_to_shadow_sync) + nested_sync_vmcs12_to_shadow(vcpu); + if (vmx->guest_state_loaded) return; @@ -6482,8 +6486,11 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) vmcs_write32(PLE_WINDOW, vmx->ple_window); } - if (vmx->nested.need_vmcs12_to_shadow_sync) - nested_sync_vmcs12_to_shadow(vcpu); + /* + * We did this in prepare_switch_to_guest, because it needs to + * be within srcu_read_lock. + */ + WARN_ON_ONCE(vmx->nested.need_vmcs12_to_shadow_sync); if (kvm_register_is_dirty(vcpu, VCPU_REGS_RSP)) vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); -- cgit v1.2.3-59-g8ed1b From 624e18f92f2ec449c2b3a6d388c0dccc45e4c4d7 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Sun, 16 Feb 2020 18:48:57 +0800 Subject: KVM: VMX: Add VMX_FEATURE_USR_WAIT_PAUSE Commit 159348784ff0 ("x86/vmx: Introduce VMX_FEATURES_*") missed bit 26 (enable user wait and pause) of Secondary Processor-based VM-Execution Controls. Add VMX_FEATURE_USR_WAIT_PAUSE flag so that it shows up in /proc/cpuinfo, and use it to define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE to make them uniform. Signed-off-by: Xiaoyao Li Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/vmx.h | 2 +- arch/x86/include/asm/vmxfeatures.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 2a85287b3685..8521af3fef27 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -72,7 +72,7 @@ #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC) #define SECONDARY_EXEC_PT_USE_GPA VMCS_CONTROL_BIT(PT_USE_GPA) #define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING) -#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE 0x04000000 +#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE VMCS_CONTROL_BIT(USR_WAIT_PAUSE) #define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING) #define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING) diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h index a50e4a0de315..9915990fd8cf 100644 --- a/arch/x86/include/asm/vmxfeatures.h +++ b/arch/x86/include/asm/vmxfeatures.h @@ -81,6 +81,7 @@ #define VMX_FEATURE_MODE_BASED_EPT_EXEC ( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */ #define VMX_FEATURE_PT_USE_GPA ( 2*32+ 24) /* "" Processor Trace logs GPAs */ #define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* Scale hardware TSC when read in guest */ +#define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* Enable TPAUSE, UMONITOR, UMWAIT in guest */ #define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */ #endif /* _ASM_X86_VMXFEATURES_H */ -- cgit v1.2.3-59-g8ed1b From 93fd9666c269877fffb74e14f52792d9c000c1f2 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Fri, 21 Feb 2020 08:52:17 -0600 Subject: kvm: x86: svm: Fix NULL pointer dereference when AVIC not enabled Launching VM w/ AVIC disabled together with pass-through device results in NULL pointer dereference bug with the following call trace. RIP: 0010:svm_refresh_apicv_exec_ctrl+0x17e/0x1a0 [kvm_amd] Call Trace: kvm_vcpu_update_apicv+0x44/0x60 [kvm] kvm_arch_vcpu_ioctl_run+0x3f4/0x1c80 [kvm] kvm_vcpu_ioctl+0x3d8/0x650 [kvm] do_vfs_ioctl+0xaa/0x660 ? tomoyo_file_ioctl+0x19/0x20 ksys_ioctl+0x67/0x90 __x64_sys_ioctl+0x1a/0x20 do_syscall_64+0x57/0x190 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Investigation shows that this is due to the uninitialized usage of struct vapu_svm.ir_list in the svm_set_pi_irte_mode(), which is called from svm_refresh_apicv_exec_ctrl(). The ir_list is initialized only if AVIC is enabled. So, fixes by adding a check if AVIC is enabled in the svm_refresh_apicv_exec_ctrl(). Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=206579 Fixes: 8937d762396d ("kvm: x86: svm: Add support to (de)activate posted interrupts.") Signed-off-by: Suravee Suthikulpanit Tested-by: Alex Williamson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index bef0ba35f121..a391b29138f0 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -5232,6 +5232,9 @@ static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) struct vmcb *vmcb = svm->vmcb; bool activated = kvm_vcpu_apicv_active(vcpu); + if (!avic) + return; + if (activated) { /** * During AVIC temporary deactivation, guest could update -- cgit v1.2.3-59-g8ed1b From 91a5f413af596ad01097e59bf487eb07cb3f1331 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 20 Feb 2020 18:22:05 +0100 Subject: KVM: nVMX: handle nested posted interrupts when apicv is disabled for L1 Even when APICv is disabled for L1 it can (and, actually, is) still available for L2, this means we need to always call vmx_deliver_nested_posted_interrupt() when attempting an interrupt delivery. Suggested-by: Paolo Bonzini Signed-off-by: Vitaly Kuznetsov Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/lapic.c | 5 +---- arch/x86/kvm/svm.c | 7 ++++++- arch/x86/kvm/vmx/vmx.c | 13 +++++++++---- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 40a0c0fd95ca..a84e8c5acda8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1146,7 +1146,7 @@ struct kvm_x86_ops { void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); - void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); + int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index afcd30d44cbb..cc8ee8125712 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1046,11 +1046,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, apic->regs + APIC_TMR); } - if (vcpu->arch.apicv_active) - kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); - else { + if (kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)) { kvm_lapic_set_irr(vector, apic); - kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_vcpu_kick(vcpu); } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a391b29138f0..8787a123b8e7 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -5258,8 +5258,11 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) return; } -static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) +static int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) { + if (!vcpu->arch.apicv_active) + return -1; + kvm_lapic_set_irr(vec, vcpu->arch.apic); smp_mb__after_atomic(); @@ -5271,6 +5274,8 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) put_cpu(); } else kvm_vcpu_wake_up(vcpu); + + return 0; } static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index dafe4df893c8..63ccc435a602 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3822,24 +3822,29 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, * 2. If target vcpu isn't running(root mode), kick it to pick up the * interrupt from PIR in next vmentry. */ -static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) +static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) { struct vcpu_vmx *vmx = to_vmx(vcpu); int r; r = vmx_deliver_nested_posted_interrupt(vcpu, vector); if (!r) - return; + return 0; + + if (!vcpu->arch.apicv_active) + return -1; if (pi_test_and_set_pir(vector, &vmx->pi_desc)) - return; + return 0; /* If a previous notification has sent the IPI, nothing to do. */ if (pi_test_and_set_on(&vmx->pi_desc)) - return; + return 0; if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false)) kvm_vcpu_kick(vcpu); + + return 0; } /* -- cgit v1.2.3-59-g8ed1b From a4443267800af240072280c44521caab61924e55 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 20 Feb 2020 18:22:04 +0100 Subject: KVM: nVMX: clear PIN_BASED_POSTED_INTR from nested pinbased_ctls only when apicv is globally disabled When apicv is disabled on a vCPU (e.g. by enabling KVM_CAP_HYPERV_SYNIC*), nothing happens to VMX MSRs on the already existing vCPUs, however, all new ones are created with PIN_BASED_POSTED_INTR filtered out. This is very confusing and results in the following picture inside the guest: $ rdmsr -ax 0x48d ff00000016 7f00000016 7f00000016 7f00000016 This is observed with QEMU and 4-vCPU guest: QEMU creates vCPU0, does KVM_CAP_HYPERV_SYNIC2 and then creates the remaining three. L1 hypervisor may only check CPU0's controls to find out what features are available and it will be very confused later. Switch to setting PIN_BASED_POSTED_INTR control based on global 'enable_apicv' setting. Signed-off-by: Vitaly Kuznetsov Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/capabilities.h | 1 + arch/x86/kvm/vmx/nested.c | 5 ++--- arch/x86/kvm/vmx/nested.h | 3 +-- arch/x86/kvm/vmx/vmx.c | 10 ++++------ 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 283bdb7071af..f486e2606247 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -12,6 +12,7 @@ extern bool __read_mostly enable_ept; extern bool __read_mostly enable_unrestricted_guest; extern bool __read_mostly enable_ept_ad_bits; extern bool __read_mostly enable_pml; +extern bool __read_mostly enable_apicv; extern int __read_mostly pt_mode; #define PT_MODE_SYSTEM 0 diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index a5757b0b80f9..2b3ba7d27be4 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5949,8 +5949,7 @@ void nested_vmx_set_vmcs_shadowing_bitmap(void) * bit in the high half is on if the corresponding bit in the control field * may be on. See also vmx_control_verify(). */ -void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps, - bool apicv) +void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) { /* * Note that as a general rule, the high half of the MSRs (bits in @@ -5977,7 +5976,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps, PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS | - (apicv ? PIN_BASED_POSTED_INTR : 0); + (enable_apicv ? PIN_BASED_POSTED_INTR : 0); msrs->pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | PIN_BASED_VMX_PREEMPTION_TIMER; diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index fc874d4ead0f..1c5fbff45d69 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -17,8 +17,7 @@ enum nvmx_vmentry_status { }; void vmx_leave_nested(struct kvm_vcpu *vcpu); -void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps, - bool apicv); +void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps); void nested_vmx_hardware_unsetup(void); __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)); void nested_vmx_set_vmcs_shadowing_bitmap(void); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 63ccc435a602..404dafedd778 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -95,7 +95,7 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO); static bool __read_mostly fasteoi = 1; module_param(fasteoi, bool, S_IRUGO); -static bool __read_mostly enable_apicv = 1; +bool __read_mostly enable_apicv = 1; module_param(enable_apicv, bool, S_IRUGO); /* @@ -6769,8 +6769,7 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) if (nested) nested_vmx_setup_ctls_msrs(&vmx->nested.msrs, - vmx_capability.ept, - kvm_vcpu_apicv_active(vcpu)); + vmx_capability.ept); else memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs)); @@ -6851,8 +6850,7 @@ static int __init vmx_check_processor_compat(void) if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) return -EIO; if (nested) - nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept, - enable_apicv); + nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept); if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) { printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n", smp_processor_id()); @@ -7714,7 +7712,7 @@ static __init int hardware_setup(void) if (nested) { nested_vmx_setup_ctls_msrs(&vmcs_config.nested, - vmx_capability.ept, enable_apicv); + vmx_capability.ept); r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers); if (r) -- cgit v1.2.3-59-g8ed1b From 23520b2def95205f132e167cf5b25c609975e959 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Fri, 21 Feb 2020 22:04:46 +0800 Subject: KVM: apic: avoid calculating pending eoi from an uninitialized val When pv_eoi_get_user() fails, 'val' may remain uninitialized and the return value of pv_eoi_get_pending() becomes random. Fix the issue by initializing the variable. Reviewed-by: Vitaly Kuznetsov Signed-off-by: Miaohe Lin Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index cc8ee8125712..e3099c642fec 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -627,9 +627,11 @@ static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) { u8 val; - if (pv_eoi_get_user(vcpu, &val) < 0) + if (pv_eoi_get_user(vcpu, &val) < 0) { printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n", (unsigned long long)vcpu->arch.pv_eoi.msr_val); + return false; + } return val & 0x1; } -- cgit v1.2.3-59-g8ed1b From d80b64ff297e40c2b6f7d7abc1b3eba70d22a068 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Sat, 4 Jan 2020 16:56:49 +0800 Subject: KVM: SVM: Fix potential memory leak in svm_cpu_init() When kmalloc memory for sd->sev_vmcbs failed, we forget to free the page held by sd->save_area. Also get rid of the var r as '-ENOMEM' is actually the only possible outcome here. Reviewed-by: Liran Alon Reviewed-by: Vitaly Kuznetsov Signed-off-by: Miaohe Lin Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8787a123b8e7..ff02aeb23616 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1005,33 +1005,32 @@ static void svm_cpu_uninit(int cpu) static int svm_cpu_init(int cpu) { struct svm_cpu_data *sd; - int r; sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL); if (!sd) return -ENOMEM; sd->cpu = cpu; - r = -ENOMEM; sd->save_area = alloc_page(GFP_KERNEL); if (!sd->save_area) - goto err_1; + goto free_cpu_data; if (svm_sev_enabled()) { - r = -ENOMEM; sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1, sizeof(void *), GFP_KERNEL); if (!sd->sev_vmcbs) - goto err_1; + goto free_save_area; } per_cpu(svm_data, cpu) = sd; return 0; -err_1: +free_save_area: + __free_page(sd->save_area); +free_cpu_data: kfree(sd); - return r; + return -ENOMEM; } -- cgit v1.2.3-59-g8ed1b From e61d2392253220477813b43412090dccdcac601f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 20 Feb 2020 06:29:48 -0800 Subject: hwmon: (w83627ehf) Fix crash seen with W83627DHG-P Loading the driver on a system with W83627DHG-P crashes as follows. w83627ehf: Found W83627DHG-P chip at 0x290 BUG: kernel NULL pointer dereference, address: 0000000000000000 PGD 0 P4D 0 Oops: 0000 [#1] SMP NOPTI CPU: 0 PID: 604 Comm: sensors Not tainted 5.6.0-rc2-00055-gca7e1fd1026c #29 Hardware name: /D425KT, BIOS MWPNT10N.86A.0132.2013.0726.1534 07/26/2013 RIP: 0010:w83627ehf_read_string+0x27/0x70 [w83627ehf] Code: [... ] RSP: 0018:ffffb95980657df8 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff96caaa7f5218 RCX: 0000000000000000 RDX: 0000000000000015 RSI: 0000000000000001 RDI: ffff96caa736ec08 RBP: 0000000000000000 R08: ffffb95980657e20 R09: 0000000000000001 R10: ffff96caaa635cc0 R11: 0000000000000000 R12: ffff96caa9f7cf00 R13: ffff96caa9ec3d00 R14: ffff96caa9ec3d28 R15: ffff96caa9ec3d40 FS: 00007fbc7c4e2740(0000) GS:ffff96caabc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000129d58000 CR4: 00000000000006f0 Call Trace: ? cp_new_stat+0x12d/0x160 hwmon_attr_show_string+0x37/0x70 [hwmon] dev_attr_show+0x14/0x50 sysfs_kf_seq_show+0xb5/0x1b0 seq_read+0xcf/0x460 vfs_read+0x9b/0x150 ksys_read+0x5f/0xe0 do_syscall_64+0x48/0x190 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ... Temperature labels are not always present. Adjust sysfs attribute visibility accordingly. Reported-by: Meelis Roos Suggested-by: Dr. David Alan Gilbert Reviewed-by: Dr. David Alan Gilbert Cc: Meelis Roos Cc: Dr. David Alan Gilbert Fixes: 266cd5835947 ("hwmon: (w83627ehf) convert to with_info interface") Signed-off-by: Guenter Roeck --- drivers/hwmon/w83627ehf.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/w83627ehf.c b/drivers/hwmon/w83627ehf.c index 7ffadc2da57b..5a5120121e50 100644 --- a/drivers/hwmon/w83627ehf.c +++ b/drivers/hwmon/w83627ehf.c @@ -1346,8 +1346,13 @@ w83627ehf_is_visible(const void *drvdata, enum hwmon_sensor_types type, /* channel 0.., name 1.. */ if (!(data->have_temp & (1 << channel))) return 0; - if (attr == hwmon_temp_input || attr == hwmon_temp_label) + if (attr == hwmon_temp_input) return 0444; + if (attr == hwmon_temp_label) { + if (data->temp_label) + return 0444; + return 0; + } if (channel == 2 && data->temp3_val_only) return 0; if (attr == hwmon_temp_max) { -- cgit v1.2.3-59-g8ed1b From 63fb9623427fbb44e3782233b6e4714057b76ff2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 21 Feb 2020 01:46:18 +0100 Subject: ACPI: PM: s2idle: Check fixed wakeup events in acpi_s2idle_wake() Commit fdde0ff8590b ("ACPI: PM: s2idle: Prevent spurious SCIs from waking up the system") overlooked the fact that fixed events can wake up the system too and broke RTC wakeup from suspend-to-idle as a result. Fix this issue by checking the fixed events in acpi_s2idle_wake() in addition to checking wakeup GPEs and break out of the suspend-to-idle loop if the status bits of any enabled fixed events are set then. Fixes: fdde0ff8590b ("ACPI: PM: s2idle: Prevent spurious SCIs from waking up the system") Reported-and-tested-by: Chris Wilson Cc: 5.4+ # 5.4+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Linus Torvalds --- drivers/acpi/acpica/evevent.c | 45 +++++++++++++++++++++++++++++++++++++++++++ drivers/acpi/sleep.c | 7 +++++++ include/acpi/acpixf.h | 1 + 3 files changed, 53 insertions(+) diff --git a/drivers/acpi/acpica/evevent.c b/drivers/acpi/acpica/evevent.c index 8c83d8c620dc..789d5e920aaf 100644 --- a/drivers/acpi/acpica/evevent.c +++ b/drivers/acpi/acpica/evevent.c @@ -265,4 +265,49 @@ static u32 acpi_ev_fixed_event_dispatch(u32 event) handler) (acpi_gbl_fixed_event_handlers[event].context)); } +/******************************************************************************* + * + * FUNCTION: acpi_any_fixed_event_status_set + * + * PARAMETERS: None + * + * RETURN: TRUE or FALSE + * + * DESCRIPTION: Checks the PM status register for active fixed events + * + ******************************************************************************/ + +u32 acpi_any_fixed_event_status_set(void) +{ + acpi_status status; + u32 in_status; + u32 in_enable; + u32 i; + + status = acpi_hw_register_read(ACPI_REGISTER_PM1_ENABLE, &in_enable); + if (ACPI_FAILURE(status)) { + return (FALSE); + } + + status = acpi_hw_register_read(ACPI_REGISTER_PM1_STATUS, &in_status); + if (ACPI_FAILURE(status)) { + return (FALSE); + } + + /* + * Check for all possible Fixed Events and dispatch those that are active + */ + for (i = 0; i < ACPI_NUM_FIXED_EVENTS; i++) { + + /* Both the status and enable bits must be on for this event */ + + if ((in_status & acpi_gbl_fixed_event_info[i].status_bit_mask) && + (in_enable & acpi_gbl_fixed_event_info[i].enable_bit_mask)) { + return (TRUE); + } + } + + return (FALSE); +} + #endif /* !ACPI_REDUCED_HARDWARE */ diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 152f7fc0b200..e5f95922bc21 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -1005,6 +1005,13 @@ static bool acpi_s2idle_wake(void) if (irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq))) return true; + /* + * If the status bit of any enabled fixed event is set, the + * wakeup is regarded as valid. + */ + if (acpi_any_fixed_event_status_set()) + return true; + /* * If there are no EC events to process and at least one of the * other enabled GPEs is active, the wakeup is regarded as a diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 5867777bb7d0..8e8be989c2a6 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -753,6 +753,7 @@ ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_disable_all_gpes(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_runtime_gpes(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_wakeup_gpes(void)) ACPI_HW_DEPENDENT_RETURN_UINT32(u32 acpi_any_gpe_status_set(void)) +ACPI_HW_DEPENDENT_RETURN_UINT32(u32 acpi_any_fixed_event_status_set(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_get_gpe_device(u32 gpe_index, -- cgit v1.2.3-59-g8ed1b From 595abbaff5db121428247a2e6ab368734472e101 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 20 Feb 2020 20:03:50 -0800 Subject: y2038: remove ktime to/from timespec/timeval conversion A couple of helpers are now obsolete and can be removed, so drivers can no longer start using them and instead use y2038-safe interfaces. Link: http://lkml.kernel.org/r/20200110154232.4104492-2-arnd@arndb.de Signed-off-by: Arnd Bergmann Acked-by: Thomas Gleixner Cc: Deepa Dinamani Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ktime.h | 37 ------------------------------------- 1 file changed, 37 deletions(-) diff --git a/include/linux/ktime.h b/include/linux/ktime.h index b2bb44f87f5a..d1fb05135665 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -66,33 +66,15 @@ static inline ktime_t ktime_set(const s64 secs, const unsigned long nsecs) */ #define ktime_sub_ns(kt, nsval) ((kt) - (nsval)) -/* convert a timespec to ktime_t format: */ -static inline ktime_t timespec_to_ktime(struct timespec ts) -{ - return ktime_set(ts.tv_sec, ts.tv_nsec); -} - /* convert a timespec64 to ktime_t format: */ static inline ktime_t timespec64_to_ktime(struct timespec64 ts) { return ktime_set(ts.tv_sec, ts.tv_nsec); } -/* convert a timeval to ktime_t format: */ -static inline ktime_t timeval_to_ktime(struct timeval tv) -{ - return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC); -} - -/* Map the ktime_t to timespec conversion to ns_to_timespec function */ -#define ktime_to_timespec(kt) ns_to_timespec((kt)) - /* Map the ktime_t to timespec conversion to ns_to_timespec function */ #define ktime_to_timespec64(kt) ns_to_timespec64((kt)) -/* Map the ktime_t to timeval conversion to ns_to_timeval function */ -#define ktime_to_timeval(kt) ns_to_timeval((kt)) - /* Convert ktime_t to nanoseconds */ static inline s64 ktime_to_ns(const ktime_t kt) { @@ -215,25 +197,6 @@ static inline ktime_t ktime_sub_ms(const ktime_t kt, const u64 msec) extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs); -/** - * ktime_to_timespec_cond - convert a ktime_t variable to timespec - * format only if the variable contains data - * @kt: the ktime_t variable to convert - * @ts: the timespec variable to store the result in - * - * Return: %true if there was a successful conversion, %false if kt was 0. - */ -static inline __must_check bool ktime_to_timespec_cond(const ktime_t kt, - struct timespec *ts) -{ - if (kt) { - *ts = ktime_to_timespec(kt); - return true; - } else { - return false; - } -} - /** * ktime_to_timespec64_cond - convert a ktime_t variable to timespec64 * format only if the variable contains data -- cgit v1.2.3-59-g8ed1b From 412c53a680a97cb1ae2c0ab60230e193bee86387 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 20 Feb 2020 20:03:54 -0800 Subject: y2038: remove unused time32 interfaces No users remain, so kill these off before we grow new ones. Link: http://lkml.kernel.org/r/20200110154232.4104492-3-arnd@arndb.de Signed-off-by: Arnd Bergmann Acked-by: Thomas Gleixner Cc: Deepa Dinamani Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compat.h | 29 -------- include/linux/time32.h | 154 +----------------------------------------- include/linux/timekeeping32.h | 32 --------- include/linux/types.h | 5 -- kernel/compat.c | 64 ------------------ kernel/time/time.c | 43 ------------ 6 files changed, 1 insertion(+), 326 deletions(-) diff --git a/include/linux/compat.h b/include/linux/compat.h index 11083d84eb23..df2475be134a 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -248,15 +248,6 @@ typedef struct compat_siginfo { } _sifields; } compat_siginfo_t; -/* - * These functions operate on 32- or 64-bit specs depending on - * COMPAT_USE_64BIT_TIME, hence the void user pointer arguments. - */ -extern int compat_get_timespec(struct timespec *, const void __user *); -extern int compat_put_timespec(const struct timespec *, void __user *); -extern int compat_get_timeval(struct timeval *, const void __user *); -extern int compat_put_timeval(const struct timeval *, void __user *); - struct compat_iovec { compat_uptr_t iov_base; compat_size_t iov_len; @@ -416,26 +407,6 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *to, const kernel_siginf int get_compat_sigevent(struct sigevent *event, const struct compat_sigevent __user *u_event); -static inline int old_timeval32_compare(struct old_timeval32 *lhs, - struct old_timeval32 *rhs) -{ - if (lhs->tv_sec < rhs->tv_sec) - return -1; - if (lhs->tv_sec > rhs->tv_sec) - return 1; - return lhs->tv_usec - rhs->tv_usec; -} - -static inline int old_timespec32_compare(struct old_timespec32 *lhs, - struct old_timespec32 *rhs) -{ - if (lhs->tv_sec < rhs->tv_sec) - return -1; - if (lhs->tv_sec > rhs->tv_sec) - return 1; - return lhs->tv_nsec - rhs->tv_nsec; -} - extern int get_compat_sigset(sigset_t *set, const compat_sigset_t __user *compat); /* diff --git a/include/linux/time32.h b/include/linux/time32.h index cad4c3186002..cf9320cd2d0b 100644 --- a/include/linux/time32.h +++ b/include/linux/time32.h @@ -12,8 +12,6 @@ #include #include -#define TIME_T_MAX (__kernel_old_time_t)((1UL << ((sizeof(__kernel_old_time_t) << 3) - 1)) - 1) - typedef s32 old_time32_t; struct old_timespec32 { @@ -73,162 +71,12 @@ struct __kernel_timex; int get_old_timex32(struct __kernel_timex *, const struct old_timex32 __user *); int put_old_timex32(struct old_timex32 __user *, const struct __kernel_timex *); -#if __BITS_PER_LONG == 64 - -/* timespec64 is defined as timespec here */ -static inline struct timespec timespec64_to_timespec(const struct timespec64 ts64) -{ - return *(const struct timespec *)&ts64; -} - -static inline struct timespec64 timespec_to_timespec64(const struct timespec ts) -{ - return *(const struct timespec64 *)&ts; -} - -#else -static inline struct timespec timespec64_to_timespec(const struct timespec64 ts64) -{ - struct timespec ret; - - ret.tv_sec = (time_t)ts64.tv_sec; - ret.tv_nsec = ts64.tv_nsec; - return ret; -} - -static inline struct timespec64 timespec_to_timespec64(const struct timespec ts) -{ - struct timespec64 ret; - - ret.tv_sec = ts.tv_sec; - ret.tv_nsec = ts.tv_nsec; - return ret; -} -#endif - -static inline int timespec_equal(const struct timespec *a, - const struct timespec *b) -{ - return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec); -} - -/* - * lhs < rhs: return <0 - * lhs == rhs: return 0 - * lhs > rhs: return >0 - */ -static inline int timespec_compare(const struct timespec *lhs, const struct timespec *rhs) -{ - if (lhs->tv_sec < rhs->tv_sec) - return -1; - if (lhs->tv_sec > rhs->tv_sec) - return 1; - return lhs->tv_nsec - rhs->tv_nsec; -} - -/* - * Returns true if the timespec is norm, false if denorm: - */ -static inline bool timespec_valid(const struct timespec *ts) -{ - /* Dates before 1970 are bogus */ - if (ts->tv_sec < 0) - return false; - /* Can't have more nanoseconds then a second */ - if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) - return false; - return true; -} - -/** - * timespec_to_ns - Convert timespec to nanoseconds - * @ts: pointer to the timespec variable to be converted - * - * Returns the scalar nanosecond representation of the timespec - * parameter. - */ -static inline s64 timespec_to_ns(const struct timespec *ts) -{ - return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; -} - /** - * ns_to_timespec - Convert nanoseconds to timespec - * @nsec: the nanoseconds value to be converted - * - * Returns the timespec representation of the nsec parameter. - */ -extern struct timespec ns_to_timespec(const s64 nsec); - -/** - * timespec_add_ns - Adds nanoseconds to a timespec - * @a: pointer to timespec to be incremented - * @ns: unsigned nanoseconds value to be added - * - * This must always be inlined because its used from the x86-64 vdso, - * which cannot call other kernel functions. - */ -static __always_inline void timespec_add_ns(struct timespec *a, u64 ns) -{ - a->tv_sec += __iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns); - a->tv_nsec = ns; -} - -static inline unsigned long mktime(const unsigned int year, - const unsigned int mon, const unsigned int day, - const unsigned int hour, const unsigned int min, - const unsigned int sec) -{ - return mktime64(year, mon, day, hour, min, sec); -} - -static inline bool timeval_valid(const struct timeval *tv) -{ - /* Dates before 1970 are bogus */ - if (tv->tv_sec < 0) - return false; - - /* Can't have more microseconds then a second */ - if (tv->tv_usec < 0 || tv->tv_usec >= USEC_PER_SEC) - return false; - - return true; -} - -/** - * timeval_to_ns - Convert timeval to nanoseconds - * @ts: pointer to the timeval variable to be converted - * - * Returns the scalar nanosecond representation of the timeval - * parameter. - */ -static inline s64 timeval_to_ns(const struct timeval *tv) -{ - return ((s64) tv->tv_sec * NSEC_PER_SEC) + - tv->tv_usec * NSEC_PER_USEC; -} - -/** - * ns_to_timeval - Convert nanoseconds to timeval + * ns_to_kernel_old_timeval - Convert nanoseconds to timeval * @nsec: the nanoseconds value to be converted * * Returns the timeval representation of the nsec parameter. */ -extern struct timeval ns_to_timeval(const s64 nsec); extern struct __kernel_old_timeval ns_to_kernel_old_timeval(s64 nsec); -/* - * Old names for the 32-bit time_t interfaces, these will be removed - * when everything uses the new names. - */ -#define compat_time_t old_time32_t -#define compat_timeval old_timeval32 -#define compat_timespec old_timespec32 -#define compat_itimerspec old_itimerspec32 -#define ns_to_compat_timeval ns_to_old_timeval32 -#define get_compat_itimerspec64 get_old_itimerspec32 -#define put_compat_itimerspec64 put_old_itimerspec32 -#define compat_get_timespec64 get_old_timespec32 -#define compat_put_timespec64 put_old_timespec32 - #endif diff --git a/include/linux/timekeeping32.h b/include/linux/timekeeping32.h index cc59cc9e0e84..266017fc9ee9 100644 --- a/include/linux/timekeeping32.h +++ b/include/linux/timekeeping32.h @@ -11,36 +11,4 @@ static inline unsigned long get_seconds(void) return ktime_get_real_seconds(); } -static inline void getnstimeofday(struct timespec *ts) -{ - struct timespec64 ts64; - - ktime_get_real_ts64(&ts64); - *ts = timespec64_to_timespec(ts64); -} - -static inline void ktime_get_ts(struct timespec *ts) -{ - struct timespec64 ts64; - - ktime_get_ts64(&ts64); - *ts = timespec64_to_timespec(ts64); -} - -static inline void getrawmonotonic(struct timespec *ts) -{ - struct timespec64 ts64; - - ktime_get_raw_ts64(&ts64); - *ts = timespec64_to_timespec(ts64); -} - -static inline void getboottime(struct timespec *ts) -{ - struct timespec64 ts64; - - getboottime64(&ts64); - *ts = timespec64_to_timespec(ts64); -} - #endif diff --git a/include/linux/types.h b/include/linux/types.h index eb870ad42919..d3021c879179 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -65,11 +65,6 @@ typedef __kernel_ssize_t ssize_t; typedef __kernel_ptrdiff_t ptrdiff_t; #endif -#ifndef _TIME_T -#define _TIME_T -typedef __kernel_old_time_t time_t; -#endif - #ifndef _CLOCK_T #define _CLOCK_T typedef __kernel_clock_t clock_t; diff --git a/kernel/compat.c b/kernel/compat.c index 95005f849c68..843dd17e6078 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -26,70 +26,6 @@ #include -static int __compat_get_timeval(struct timeval *tv, const struct old_timeval32 __user *ctv) -{ - return (!access_ok(ctv, sizeof(*ctv)) || - __get_user(tv->tv_sec, &ctv->tv_sec) || - __get_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0; -} - -static int __compat_put_timeval(const struct timeval *tv, struct old_timeval32 __user *ctv) -{ - return (!access_ok(ctv, sizeof(*ctv)) || - __put_user(tv->tv_sec, &ctv->tv_sec) || - __put_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0; -} - -static int __compat_get_timespec(struct timespec *ts, const struct old_timespec32 __user *cts) -{ - return (!access_ok(cts, sizeof(*cts)) || - __get_user(ts->tv_sec, &cts->tv_sec) || - __get_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0; -} - -static int __compat_put_timespec(const struct timespec *ts, struct old_timespec32 __user *cts) -{ - return (!access_ok(cts, sizeof(*cts)) || - __put_user(ts->tv_sec, &cts->tv_sec) || - __put_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0; -} - -int compat_get_timeval(struct timeval *tv, const void __user *utv) -{ - if (COMPAT_USE_64BIT_TIME) - return copy_from_user(tv, utv, sizeof(*tv)) ? -EFAULT : 0; - else - return __compat_get_timeval(tv, utv); -} -EXPORT_SYMBOL_GPL(compat_get_timeval); - -int compat_put_timeval(const struct timeval *tv, void __user *utv) -{ - if (COMPAT_USE_64BIT_TIME) - return copy_to_user(utv, tv, sizeof(*tv)) ? -EFAULT : 0; - else - return __compat_put_timeval(tv, utv); -} -EXPORT_SYMBOL_GPL(compat_put_timeval); - -int compat_get_timespec(struct timespec *ts, const void __user *uts) -{ - if (COMPAT_USE_64BIT_TIME) - return copy_from_user(ts, uts, sizeof(*ts)) ? -EFAULT : 0; - else - return __compat_get_timespec(ts, uts); -} -EXPORT_SYMBOL_GPL(compat_get_timespec); - -int compat_put_timespec(const struct timespec *ts, void __user *uts) -{ - if (COMPAT_USE_64BIT_TIME) - return copy_to_user(uts, ts, sizeof(*ts)) ? -EFAULT : 0; - else - return __compat_put_timespec(ts, uts); -} -EXPORT_SYMBOL_GPL(compat_put_timespec); - #ifdef __ARCH_WANT_SYS_SIGPROCMASK /* diff --git a/kernel/time/time.c b/kernel/time/time.c index cdd7386115ff..3985b2b32d08 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -449,49 +449,6 @@ time64_t mktime64(const unsigned int year0, const unsigned int mon0, } EXPORT_SYMBOL(mktime64); -/** - * ns_to_timespec - Convert nanoseconds to timespec - * @nsec: the nanoseconds value to be converted - * - * Returns the timespec representation of the nsec parameter. - */ -struct timespec ns_to_timespec(const s64 nsec) -{ - struct timespec ts; - s32 rem; - - if (!nsec) - return (struct timespec) {0, 0}; - - ts.tv_sec = div_s64_rem(nsec, NSEC_PER_SEC, &rem); - if (unlikely(rem < 0)) { - ts.tv_sec--; - rem += NSEC_PER_SEC; - } - ts.tv_nsec = rem; - - return ts; -} -EXPORT_SYMBOL(ns_to_timespec); - -/** - * ns_to_timeval - Convert nanoseconds to timeval - * @nsec: the nanoseconds value to be converted - * - * Returns the timeval representation of the nsec parameter. - */ -struct timeval ns_to_timeval(const s64 nsec) -{ - struct timespec ts = ns_to_timespec(nsec); - struct timeval tv; - - tv.tv_sec = ts.tv_sec; - tv.tv_usec = (suseconds_t) ts.tv_nsec / 1000; - - return tv; -} -EXPORT_SYMBOL(ns_to_timeval); - struct __kernel_old_timeval ns_to_kernel_old_timeval(const s64 nsec) { struct timespec64 ts = ns_to_timespec64(nsec); -- cgit v1.2.3-59-g8ed1b From c766d1472c70d25ad475cf56042af1652e792b23 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 20 Feb 2020 20:03:57 -0800 Subject: y2038: hide timeval/timespec/itimerval/itimerspec types There are no in-kernel users remaining, but there may still be users that include linux/time.h instead of sys/time.h from user space, so leave the types available to user space while hiding them from kernel space. Only the __kernel_old_* versions of these types remain now. Link: http://lkml.kernel.org/r/20200110154232.4104492-4-arnd@arndb.de Signed-off-by: Arnd Bergmann Acked-by: Thomas Gleixner Cc: Deepa Dinamani Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/asm-generic/posix_types.h | 2 ++ include/uapi/linux/time.h | 22 ++++++++++++---------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/include/uapi/asm-generic/posix_types.h b/include/uapi/asm-generic/posix_types.h index 2f9c80595ba7..b5f7594eee7a 100644 --- a/include/uapi/asm-generic/posix_types.h +++ b/include/uapi/asm-generic/posix_types.h @@ -87,7 +87,9 @@ typedef struct { typedef __kernel_long_t __kernel_off_t; typedef long long __kernel_loff_t; typedef __kernel_long_t __kernel_old_time_t; +#ifndef __KERNEL__ typedef __kernel_long_t __kernel_time_t; +#endif typedef long long __kernel_time64_t; typedef __kernel_long_t __kernel_clock_t; typedef int __kernel_timer_t; diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h index a655aa28dc6e..4f4b6e48e01c 100644 --- a/include/uapi/linux/time.h +++ b/include/uapi/linux/time.h @@ -5,6 +5,7 @@ #include #include +#ifndef __KERNEL__ #ifndef _STRUCT_TIMESPEC #define _STRUCT_TIMESPEC struct timespec { @@ -18,6 +19,17 @@ struct timeval { __kernel_suseconds_t tv_usec; /* microseconds */ }; +struct itimerspec { + struct timespec it_interval;/* timer period */ + struct timespec it_value; /* timer expiration */ +}; + +struct itimerval { + struct timeval it_interval;/* timer interval */ + struct timeval it_value; /* current value */ +}; +#endif + struct timezone { int tz_minuteswest; /* minutes west of Greenwich */ int tz_dsttime; /* type of dst correction */ @@ -31,16 +43,6 @@ struct timezone { #define ITIMER_VIRTUAL 1 #define ITIMER_PROF 2 -struct itimerspec { - struct timespec it_interval; /* timer period */ - struct timespec it_value; /* timer expiration */ -}; - -struct itimerval { - struct timeval it_interval; /* timer interval */ - struct timeval it_value; /* current value */ -}; - /* * The IDs of the various system clocks (for POSIX.1b interval timers): */ -- cgit v1.2.3-59-g8ed1b From edf28f4061afe4c2d9eb1c3323d90e882c1d6800 Mon Sep 17 00:00:00 2001 From: Ioanna Alifieraki Date: Thu, 20 Feb 2020 20:04:00 -0800 Subject: Revert "ipc,sem: remove uneeded sem_undo_list lock usage in exit_sem()" This reverts commit a97955844807e327df11aa33869009d14d6b7de0. Commit a97955844807 ("ipc,sem: remove uneeded sem_undo_list lock usage in exit_sem()") removes a lock that is needed. This leads to a process looping infinitely in exit_sem() and can also lead to a crash. There is a reproducer available in [1] and with the commit reverted the issue does not reproduce anymore. Using the reproducer found in [1] is fairly easy to reach a point where one of the child processes is looping infinitely in exit_sem between for(;;) and if (semid == -1) block, while it's trying to free its last sem_undo structure which has already been freed by freeary(). Each sem_undo struct is on two lists: one per semaphore set (list_id) and one per process (list_proc). The list_id list tracks undos by semaphore set, and the list_proc by process. Undo structures are removed either by freeary() or by exit_sem(). The freeary function is invoked when the user invokes a syscall to remove a semaphore set. During this operation freeary() traverses the list_id associated with the semaphore set and removes the undo structures from both the list_id and list_proc lists. For this case, exit_sem() is called at process exit. Each process contains a struct sem_undo_list (referred to as "ulp") which contains the head for the list_proc list. When the process exits, exit_sem() traverses this list to remove each sem_undo struct. As in freeary(), whenever a sem_undo struct is removed from list_proc, it is also removed from the list_id list. Removing elements from list_id is safe for both exit_sem() and freeary() due to sem_lock(). Removing elements from list_proc is not safe; freeary() locks &un->ulp->lock when it performs list_del_rcu(&un->list_proc) but exit_sem() does not (locking was removed by commit a97955844807 ("ipc,sem: remove uneeded sem_undo_list lock usage in exit_sem()"). This can result in the following situation while executing the reproducer [1] : Consider a child process in exit_sem() and the parent in freeary() (because of semctl(sid[i], NSEM, IPC_RMID)). - The list_proc for the child contains the last two undo structs A and B (the rest have been removed either by exit_sem() or freeary()). - The semid for A is 1 and semid for B is 2. - exit_sem() removes A and at the same time freeary() removes B. - Since A and B have different semid sem_lock() will acquire different locks for each process and both can proceed. The bug is that they remove A and B from the same list_proc at the same time because only freeary() acquires the ulp lock. When exit_sem() removes A it makes ulp->list_proc.next to point at B and at the same time freeary() removes B setting B->semid=-1. At the next iteration of for(;;) loop exit_sem() will try to remove B. The only way to break from for(;;) is for (&un->list_proc == &ulp->list_proc) to be true which is not. Then exit_sem() will check if B->semid=-1 which is and will continue looping in for(;;) until the memory for B is reallocated and the value at B->semid is changed. At that point, exit_sem() will crash attempting to unlink B from the lists (this can be easily triggered by running the reproducer [1] a second time). To prove this scenario instrumentation was added to keep information about each sem_undo (un) struct that is removed per process and per semaphore set (sma). CPU0 CPU1 [caller holds sem_lock(sma for A)] ... freeary() exit_sem() ... ... ... sem_lock(sma for B) spin_lock(A->ulp->lock) ... list_del_rcu(un_A->list_proc) list_del_rcu(un_B->list_proc) Undo structures A and B have different semid and sem_lock() operations proceed. However they belong to the same list_proc list and they are removed at the same time. This results into ulp->list_proc.next pointing to the address of B which is already removed. After reverting commit a97955844807 ("ipc,sem: remove uneeded sem_undo_list lock usage in exit_sem()") the issue was no longer reproducible. [1] https://bugzilla.redhat.com/show_bug.cgi?id=1694779 Link: http://lkml.kernel.org/r/20191211191318.11860-1-ioanna-maria.alifieraki@canonical.com Fixes: a97955844807 ("ipc,sem: remove uneeded sem_undo_list lock usage in exit_sem()") Signed-off-by: Ioanna Alifieraki Acked-by: Manfred Spraul Acked-by: Herton R. Krzesinski Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Cc: Joel Fernandes (Google) Cc: Davidlohr Bueso Cc: Jay Vosburgh Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/sem.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ipc/sem.c b/ipc/sem.c index 4f4303f32077..3687b71151b3 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -2384,11 +2384,9 @@ void exit_sem(struct task_struct *tsk) ipc_assert_locked_object(&sma->sem_perm); list_del(&un->list_id); - /* we are the last process using this ulp, acquiring ulp->lock - * isn't required. Besides that, we are also protected against - * IPC_RMID as we hold sma->sem_perm lock now - */ + spin_lock(&ulp->lock); list_del_rcu(&un->list_proc); + spin_unlock(&ulp->lock); /* perform adjustments registered in un */ for (i = 0; i < sma->sem_nsems; i++) { -- cgit v1.2.3-59-g8ed1b From 467d12f5c7842896d2de3ced74e4147ee29e97c8 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 20 Feb 2020 20:04:03 -0800 Subject: include/uapi/linux/swab.h: fix userspace breakage, use __BITS_PER_LONG for swap QEMU has a funny new build error message when I use the upstream kernel headers: CC block/file-posix.o In file included from /home/cborntra/REPOS/qemu/include/qemu/timer.h:4, from /home/cborntra/REPOS/qemu/include/qemu/timed-average.h:29, from /home/cborntra/REPOS/qemu/include/block/accounting.h:28, from /home/cborntra/REPOS/qemu/include/block/block_int.h:27, from /home/cborntra/REPOS/qemu/block/file-posix.c:30: /usr/include/linux/swab.h: In function `__swab': /home/cborntra/REPOS/qemu/include/qemu/bitops.h:20:34: error: "sizeof" is not defined, evaluates to 0 [-Werror=undef] 20 | #define BITS_PER_LONG (sizeof (unsigned long) * BITS_PER_BYTE) | ^~~~~~ /home/cborntra/REPOS/qemu/include/qemu/bitops.h:20:41: error: missing binary operator before token "(" 20 | #define BITS_PER_LONG (sizeof (unsigned long) * BITS_PER_BYTE) | ^ cc1: all warnings being treated as errors make: *** [/home/cborntra/REPOS/qemu/rules.mak:69: block/file-posix.o] Error 1 rm tests/qemu-iotests/socket_scm_helper.o This was triggered by commit d5767057c9a ("uapi: rename ext2_swab() to swab() and share globally in swab.h"). That patch is doing #include but it uses BITS_PER_LONG. The kernel file asm/bitsperlong.h provide only __BITS_PER_LONG. Let us use the __ variant in swap.h Link: http://lkml.kernel.org/r/20200213142147.17604-1-borntraeger@de.ibm.com Fixes: d5767057c9a ("uapi: rename ext2_swab() to swab() and share globally in swab.h") Signed-off-by: Christian Borntraeger Cc: Yury Norov Cc: Allison Randal Cc: Joe Perches Cc: Thomas Gleixner Cc: William Breathitt Gray Cc: Torsten Hilbrich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/swab.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h index fa7f97da5b76..7272f85d6d6a 100644 --- a/include/uapi/linux/swab.h +++ b/include/uapi/linux/swab.h @@ -135,9 +135,9 @@ static inline __attribute_const__ __u32 __fswahb32(__u32 val) static __always_inline unsigned long __swab(const unsigned long y) { -#if BITS_PER_LONG == 64 +#if __BITS_PER_LONG == 64 return __swab64(y); -#else /* BITS_PER_LONG == 32 */ +#else /* __BITS_PER_LONG == 32 */ return __swab32(y); #endif } -- cgit v1.2.3-59-g8ed1b From 9e69fa46275b63f670ced0f11095af1841c73fca Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 20 Feb 2020 20:04:06 -0800 Subject: selftests/vm: add missed tests in run_vmtests The commits introducing 'mlock-random-test'[1], 'map_fiex_noreplace'[2], and 'thuge-gen'[3] have not added those in the 'run_vmtests' script and thus the 'run_tests' command of kselftests doesn't run those. This commit adds those in the script. 'gup_benchmark' and 'transhuge-stress' are also not included in the 'run_vmtests', but this commit does not add those because those are for performance measurement rather than pass/fail tests. [1] commit 26b4224d9961 ("selftests: expanding more mlock selftest") [2] commit 91cbacc34512 ("tools/testing/selftests/vm/map_fixed_noreplace.c: add test for MAP_FIXED_NOREPLACE") [3] commit fcc1f2d5dd34 ("selftests: add a test program for variable huge page sizes in mmap/shmget") Link: http://lkml.kernel.org/r/20200206085144.29126-1-sj38.park@gmail.com Signed-off-by: SeongJae Park Cc: Uladzislau Rezki (Sony) Cc: Masami Hiramatsu Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/run_vmtests | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index a692ea828317..f33714843198 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -112,6 +112,17 @@ echo "NOTE: The above hugetlb tests provide minimal coverage. Use" echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" echo " hugetlb regression testing." +echo "---------------------------" +echo "running map_fixed_noreplace" +echo "---------------------------" +./map_fixed_noreplace +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + echo "-------------------" echo "running userfaultfd" echo "-------------------" @@ -186,6 +197,17 @@ else echo "[PASS]" fi +echo "-------------------------" +echo "running mlock-random-test" +echo "-------------------------" +./mlock-random-test +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + echo "--------------------" echo "running mlock2-tests" echo "--------------------" @@ -197,6 +219,17 @@ else echo "[PASS]" fi +echo "-----------------" +echo "running thuge-gen" +echo "-----------------" +./thuge-gen +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + if [ $VADDR64 -ne 0 ]; then echo "-----------------------------" echo "running virtual_address_range" -- cgit v1.2.3-59-g8ed1b From ef0c08192ac09e29ddd676b3ca6c4a501f277f10 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 20 Feb 2020 20:04:09 -0800 Subject: get_maintainer: remove uses of P: for maintainer name Commit 1ca84ed6425f ("MAINTAINERS: Reclaim the P: tag for Maintainer Entry Profile") changed the use of the "P:" tag from "Person" to "Profile (ie: special subsystem coding styles and characteristics)" Change how get_maintainer.pl parses the "P:" tag to match. Link: http://lkml.kernel.org/r/ca53823fc5d25c0be32ad937d0207a0589c08643.camel@perches.com Signed-off-by: Joe Perches Acked-by: Dan Williams Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 34085d146fa2..a00156855354 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -1341,35 +1341,11 @@ sub add_categories { } } } elsif ($ptype eq "M") { - my ($name, $address) = parse_email($pvalue); - if ($name eq "") { - if ($i > 0) { - my $tv = $typevalue[$i - 1]; - if ($tv =~ m/^([A-Z]):\s*(.*)/) { - if ($1 eq "P") { - $name = $2; - $pvalue = format_email($name, $address, $email_usename); - } - } - } - } if ($email_maintainer) { my $role = get_maintainer_role($i); push_email_addresses($pvalue, $role); } } elsif ($ptype eq "R") { - my ($name, $address) = parse_email($pvalue); - if ($name eq "") { - if ($i > 0) { - my $tv = $typevalue[$i - 1]; - if ($tv =~ m/^([A-Z]):\s*(.*)/) { - if ($1 eq "P") { - $name = $2; - $pvalue = format_email($name, $address, $email_usename); - } - } - } - } if ($email_reviewer) { my $subsystem = get_subsystem_name($i); push_email_addresses($pvalue, "reviewer:$subsystem"); -- cgit v1.2.3-59-g8ed1b From 0ef82fcefb99300ede6f4d38a8100845b2dc8e30 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 20 Feb 2020 20:04:12 -0800 Subject: scripts/get_maintainer.pl: deprioritize old Fixes: addresses Recently, I found that get_maintainer was causing me to send emails to the old addresses for maintainers. Since I usually just trust the output of get_maintainer to know the right email address, I didn't even look carefully and fired off two patch series that went to the wrong place. Oops. The problem was introduced recently when trying to add signatures from Fixes. The problem was that these email addresses were added too early in the process of compiling our list of places to send. Things added to the list earlier are considered more canonical and when we later added maintainer entries we ended up deduplicating to the old address. Here are two examples using mainline commits (to make it easier to replicate) for the two maintainers that I messed up recently: $ git format-patch d8549bcd0529~..d8549bcd0529 $ ./scripts/get_maintainer.pl 0001-clk-Add-clk_hw*.patch | grep Boyd Stephen Boyd ... $ git format-patch 6d1238aa3395~..6d1238aa3395 $ ./scripts/get_maintainer.pl 0001-arm64-dts-qcom-qcs404*.patch | grep Andy Andy Gross Let's move the adding of addresses from Fixes: to the end since the email addresses from these are much more likely to be older. After this patch the above examples get the right addresses for the two examples. Link: http://lkml.kernel.org/r/20200127095001.1.I41fba9f33590bfd92cd01960161d8384268c6569@changeid Fixes: 2f5bd343694e ("scripts/get_maintainer.pl: add signatures from Fixes: lines in commit message") Signed-off-by: Douglas Anderson Acked-by: Joe Perches Cc: Stephen Boyd Cc: Bjorn Andersson Cc: Andy Gross Cc: Kees Cook Cc: Dan Carpenter Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index a00156855354..6cbcd1a3e113 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -932,10 +932,6 @@ sub get_maintainers { } } - foreach my $fix (@fixes) { - vcs_add_commit_signers($fix, "blamed_fixes"); - } - foreach my $email (@email_to, @list_to) { $email->[0] = deduplicate_email($email->[0]); } @@ -974,6 +970,10 @@ sub get_maintainers { } } + foreach my $fix (@fixes) { + vcs_add_commit_signers($fix, "blamed_fixes"); + } + my @to = (); if ($email || $email_list) { if ($email) { -- cgit v1.2.3-59-g8ed1b From fed98ef4d8b665316479dd35cbd92d3e2ff470a3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Feb 2020 20:04:15 -0800 Subject: mm/swapfile.c: fix a comment in sys_swapon() claim_swapfile now always takes i_rwsem. Link: http://lkml.kernel.org/r/20200114161225.309792-2-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swapfile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 2c33ff456ed5..b2a2e45c9a36 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3157,7 +3157,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) mapping = swap_file->f_mapping; inode = mapping->host; - /* If S_ISREG(inode->i_mode) will do inode_lock(inode); */ + /* will take i_rwsem; */ error = claim_swapfile(p, inode); if (unlikely(error)) goto bad_swap; -- cgit v1.2.3-59-g8ed1b From 75866af62b439859d5146b7093ceb6b482852683 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 20 Feb 2020 20:04:18 -0800 Subject: mm/memcontrol.c: lost css_put in memcg_expand_shrinker_maps() for_each_mem_cgroup() increases css reference counter for memory cgroup and requires to use mem_cgroup_iter_break() if the walk is cancelled. Link: http://lkml.kernel.org/r/c98414fb-7e1f-da0f-867a-9340ec4bd30b@virtuozzo.com Fixes: 0a4465d34028 ("mm, memcg: assign memcg-aware shrinkers bitmap to memcg") Signed-off-by: Vasily Averin Acked-by: Kirill Tkhai Acked-by: Michal Hocko Reviewed-by: Roman Gushchin Cc: Johannes Weiner Cc: Vladimir Davydov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6f6dc8712e39..d09776cd6e10 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -409,8 +409,10 @@ int memcg_expand_shrinker_maps(int new_id) if (mem_cgroup_is_root(memcg)) continue; ret = memcg_expand_one_shrinker_map(memcg, size, old_size); - if (ret) + if (ret) { + mem_cgroup_iter_break(NULL, memcg); goto unlock; + } } unlock: if (!ret) -- cgit v1.2.3-59-g8ed1b From c11d3fa0116a6bc832a9e387427caa16f8de5ef2 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Thu, 20 Feb 2020 20:04:21 -0800 Subject: lib/string.c: update match_string() doc-strings with correct behavior There were a few attempts at changing behavior of the match_string() helpers (i.e. 'match_string()' & 'sysfs_match_string()'), to change & extend the behavior according to the doc-string. But the simplest approach is to just fix the doc-strings. The current behavior is fine as-is, and some bugs were introduced trying to fix it. As for extending the behavior, new helpers can always be introduced if needed. The match_string() helpers behave more like 'strncmp()' in the sense that they go up to n elements or until the first NULL element in the array of strings. This change updates the doc-strings with this info. Link: http://lkml.kernel.org/r/20200213072722.8249-1-alexandru.ardelean@analog.com Signed-off-by: Alexandru Ardelean Acked-by: Andy Shevchenko Cc: Kees Cook Cc: "Tobin C . Harding" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/string.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/lib/string.c b/lib/string.c index f607b967d978..6012c385fb31 100644 --- a/lib/string.c +++ b/lib/string.c @@ -699,6 +699,14 @@ EXPORT_SYMBOL(sysfs_streq); * @n: number of strings in the array or -1 for NULL terminated arrays * @string: string to match with * + * This routine will look for a string in an array of strings up to the + * n-th element in the array or until the first NULL element. + * + * Historically the value of -1 for @n, was used to search in arrays that + * are NULL terminated. However, the function does not make a distinction + * when finishing the search: either @n elements have been compared OR + * the first NULL element was found. + * * Return: * index of a @string in the @array if matches, or %-EINVAL otherwise. */ @@ -727,6 +735,14 @@ EXPORT_SYMBOL(match_string); * * Returns index of @str in the @array or -EINVAL, just like match_string(). * Uses sysfs_streq instead of strcmp for matching. + * + * This routine will look for a string in an array of strings up to the + * n-th element in the array or until the first NULL element. + * + * Historically the value of -1 for @n, was used to search in arrays that + * are NULL terminated. However, the function does not make a distinction + * when finishing the search: either @n elements have been compared OR + * the first NULL element was found. */ int __sysfs_match_string(const char * const *array, size_t n, const char *str) { -- cgit v1.2.3-59-g8ed1b From 76073c646f5f4999d763f471df9e38a5a912d70d Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 20 Feb 2020 20:04:24 -0800 Subject: mm/vmscan.c: don't round up scan size for online memory cgroup Commit 68600f623d69 ("mm: don't miss the last page because of round-off error") makes the scan size round up to @denominator regardless of the memory cgroup's state, online or offline. This affects the overall reclaiming behavior: the corresponding LRU list is eligible for reclaiming only when its size logically right shifted by @sc->priority is bigger than zero in the former formula. For example, the inactive anonymous LRU list should have at least 0x4000 pages to be eligible for reclaiming when we have 60/12 for swappiness/priority and without taking scan/rotation ratio into account. After the roundup is applied, the inactive anonymous LRU list becomes eligible for reclaiming when its size is bigger than or equal to 0x1000 in the same condition. (0x4000 >> 12) * 60 / (60 + 140 + 1) = 1 ((0x1000 >> 12) * 60) + 200) / (60 + 140 + 1) = 1 aarch64 has 512MB huge page size when the base page size is 64KB. The memory cgroup that has a huge page is always eligible for reclaiming in that case. The reclaiming is likely to stop after the huge page is reclaimed, meaing the further iteration on @sc->priority and the silbing and child memory cgroups will be skipped. The overall behaviour has been changed. This fixes the issue by applying the roundup to offlined memory cgroups only, to give more preference to reclaim memory from offlined memory cgroup. It sounds reasonable as those memory is unlikedly to be used by anyone. The issue was found by starting up 8 VMs on a Ampere Mustang machine, which has 8 CPUs and 16 GB memory. Each VM is given with 2 vCPUs and 2GB memory. It took 264 seconds for all VMs to be completely up and 784MB swap is consumed after that. With this patch applied, it took 236 seconds and 60MB swap to do same thing. So there is 10% performance improvement for my case. Note that KSM is disable while THP is enabled in the testing. total used free shared buff/cache available Mem: 16196 10065 2049 16 4081 3749 Swap: 8175 784 7391 total used free shared buff/cache available Mem: 16196 11324 3656 24 1215 2936 Swap: 8175 60 8115 Link: http://lkml.kernel.org/r/20200211024514.8730-1-gshan@redhat.com Fixes: 68600f623d69 ("mm: don't miss the last page because of round-off error") Signed-off-by: Gavin Shan Acked-by: Roman Gushchin Cc: [4.20+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index c05eb9efec07..876370565455 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2415,10 +2415,13 @@ out: /* * Scan types proportional to swappiness and * their relative recent reclaim efficiency. - * Make sure we don't miss the last page - * because of a round-off error. + * Make sure we don't miss the last page on + * the offlined memory cgroups because of a + * round-off error. */ - scan = DIV64_U64_ROUND_UP(scan * fraction[file], + scan = mem_cgroup_online(memcg) ? + div64_u64(scan * fraction[file], denominator) : + DIV64_U64_ROUND_UP(scan * fraction[file], denominator); break; case SCAN_FILE: -- cgit v1.2.3-59-g8ed1b From 18e19f195cd888f65643a77a0c6aee8f5be6439a Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 20 Feb 2020 20:04:27 -0800 Subject: mm/sparsemem: pfn_to_page is not valid yet on SPARSEMEM When we use SPARSEMEM instead of SPARSEMEM_VMEMMAP, pfn_to_page() doesn't work before sparse_init_one_section() is called. This leads to a crash when hotplug memory: BUG: unable to handle page fault for address: 0000000006400000 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 0 P4D 0 Oops: 0002 [#1] SMP PTI CPU: 3 PID: 221 Comm: kworker/u16:1 Tainted: G W 5.5.0-next-20200205+ #343 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015 Workqueue: kacpi_hotplug acpi_hotplug_work_fn RIP: 0010:__memset+0x24/0x30 Code: cc cc cc cc cc cc 0f 1f 44 00 00 49 89 f9 48 89 d1 83 e2 07 48 c1 e9 03 40 0f b6 f6 48 b8 01 01 01 01 01 01 01 01 48 0f af c6 48 ab 89 d1 f3 aa 4c 89 c8 c3 90 49 89 f9 40 88 f0 48 89 d1 f3 RSP: 0018:ffffb43ac0373c80 EFLAGS: 00010a87 RAX: ffffffffffffffff RBX: ffff8a1518800000 RCX: 0000000000050000 RDX: 0000000000000000 RSI: 00000000000000ff RDI: 0000000006400000 RBP: 0000000000140000 R08: 0000000000100000 R09: 0000000006400000 R10: 0000000000000000 R11: 0000000000000002 R12: 0000000000000000 R13: 0000000000000028 R14: 0000000000000000 R15: ffff8a153ffd9280 FS: 0000000000000000(0000) GS:ffff8a153ab00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000006400000 CR3: 0000000136fca000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: sparse_add_section+0x1c9/0x26a __add_pages+0xbf/0x150 add_pages+0x12/0x60 add_memory_resource+0xc8/0x210 __add_memory+0x62/0xb0 acpi_memory_device_add+0x13f/0x300 acpi_bus_attach+0xf6/0x200 acpi_bus_scan+0x43/0x90 acpi_device_hotplug+0x275/0x3d0 acpi_hotplug_work_fn+0x1a/0x30 process_one_work+0x1a7/0x370 worker_thread+0x30/0x380 kthread+0x112/0x130 ret_from_fork+0x35/0x40 We should use memmap as it did. On x86 the impact is limited to x86_32 builds, or x86_64 configurations that override the default setting for SPARSEMEM_VMEMMAP. Other memory hotplug archs (arm64, ia64, and ppc) also default to SPARSEMEM_VMEMMAP=y. [dan.j.williams@intel.com: changelog update] {rppt@linux.ibm.com: changelog update] Link: http://lkml.kernel.org/r/20200219030454.4844-1-bhe@redhat.com Fixes: ba72b4c8cf60 ("mm/sparsemem: support sub-section hotplug") Signed-off-by: Wei Yang Signed-off-by: Baoquan He Acked-by: David Hildenbrand Reviewed-by: Baoquan He Reviewed-by: Dan Williams Acked-by: Michal Hocko Cc: Mike Rapoport Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/sparse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/sparse.c b/mm/sparse.c index c184b69460b7..596b2a45b100 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -876,7 +876,7 @@ int __meminit sparse_add_section(int nid, unsigned long start_pfn, * Poison uninitialized struct pages in order to catch invalid flags * combinations. */ - page_init_poison(pfn_to_page(start_pfn), sizeof(struct page) * nr_pages); + page_init_poison(memmap, sizeof(struct page) * nr_pages); ms = __nr_to_section(section_nr); set_section_nid(section_nr, nid); -- cgit v1.2.3-59-g8ed1b From 305e519ce48e935702c32241f07d393c3c8fed3e Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Thu, 20 Feb 2020 20:04:30 -0800 Subject: lib/stackdepot.c: fix global out-of-bounds in stack_slabs Walter Wu has reported a potential case in which init_stack_slab() is called after stack_slabs[STACK_ALLOC_MAX_SLABS - 1] has already been initialized. In that case init_stack_slab() will overwrite stack_slabs[STACK_ALLOC_MAX_SLABS], which may result in a memory corruption. Link: http://lkml.kernel.org/r/20200218102950.260263-1-glider@google.com Fixes: cd11016e5f521 ("mm, kasan: stackdepot implementation. Enable stackdepot for SLAB") Signed-off-by: Alexander Potapenko Reported-by: Walter Wu Cc: Dmitry Vyukov Cc: Matthias Brugger Cc: Thomas Gleixner Cc: Josh Poimboeuf Cc: Kate Stewart Cc: Greg Kroah-Hartman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/stackdepot.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index ed717dd08ff3..81c69c08d1d1 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -83,15 +83,19 @@ static bool init_stack_slab(void **prealloc) return true; if (stack_slabs[depot_index] == NULL) { stack_slabs[depot_index] = *prealloc; + *prealloc = NULL; } else { - stack_slabs[depot_index + 1] = *prealloc; + /* If this is the last depot slab, do not touch the next one. */ + if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) { + stack_slabs[depot_index + 1] = *prealloc; + *prealloc = NULL; + } /* * This smp_store_release pairs with smp_load_acquire() from * |next_slab_inited| above and in stack_depot_save(). */ smp_store_release(&next_slab_inited, 1); } - *prealloc = NULL; return true; } -- cgit v1.2.3-59-g8ed1b From bb8d00ff51a0c5e58cebd2e698a778f4e3d34d48 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 20 Feb 2020 20:04:33 -0800 Subject: MAINTAINERS: use tabs for SAFESETID Use tabs for indentation instead of spaces for SAFESETID. All (!) other entries in MAINTAINERS use tabs (according to my simple grepping). Link: http://lkml.kernel.org/r/2bb2e52a-2694-816d-57b4-6cabfadd6c1a@infradead.org Signed-off-by: Randy Dunlap Cc: Micah Morton Cc: James Morris Cc: "Serge E. Hallyn" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 4beb8dc4c7eb..4e9bdb3c63b2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14582,10 +14582,10 @@ F: drivers/media/pci/saa7146/ F: include/media/drv-intf/saa7146* SAFESETID SECURITY MODULE -M: Micah Morton -S: Supported -F: security/safesetid/ -F: Documentation/admin-guide/LSM/SafeSetID.rst +M: Micah Morton +S: Supported +F: security/safesetid/ +F: Documentation/admin-guide/LSM/SafeSetID.rst SAMSUNG AUDIO (ASoC) DRIVERS M: Krzysztof Kozlowski -- cgit v1.2.3-59-g8ed1b From ff6993bb79b9f99bdac0b5378169052931b65432 Mon Sep 17 00:00:00 2001 From: Igor Druzhinin Date: Tue, 14 Jan 2020 14:43:19 +0000 Subject: scsi: libfc: free response frame from GPN_ID fc_disc_gpn_id_resp() should be the last function using it so free it here to avoid memory leak. Link: https://lore.kernel.org/r/1579013000-14570-2-git-send-email-igor.druzhinin@citrix.com Reviewed-by: Hannes Reinecke Signed-off-by: Igor Druzhinin Signed-off-by: Martin K. Petersen --- drivers/scsi/libfc/fc_disc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c index 9c5f7c9178c6..2b865c6423e2 100644 --- a/drivers/scsi/libfc/fc_disc.c +++ b/drivers/scsi/libfc/fc_disc.c @@ -628,6 +628,8 @@ redisc: } out: kref_put(&rdata->kref, fc_rport_destroy); + if (!IS_ERR(fp)) + fc_frame_free(fp); } /** -- cgit v1.2.3-59-g8ed1b From 7c990728b99ed6fbe9c75fc202fce1172d9916da Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Tue, 18 Feb 2020 19:08:51 -0800 Subject: ext4: fix potential race between s_flex_groups online resizing and access During an online resize an array of s_flex_groups structures gets replaced so it can get enlarged. If there is a concurrent access to the array and this memory has been reused then this can lead to an invalid memory access. The s_flex_group array has been converted into an array of pointers rather than an array of structures. This is to ensure that the information contained in the structures cannot get out of sync during a resize due to an accessor updating the value in the old structure after it has been copied but before the array pointer is updated. Since the structures them- selves are no longer copied but only the pointers to them this case is mitigated. Link: https://bugzilla.kernel.org/show_bug.cgi?id=206443 Link: https://lore.kernel.org/r/20200221053458.730016-4-tytso@mit.edu Signed-off-by: Suraj Jitindar Singh Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/ext4.h | 2 +- fs/ext4/ialloc.c | 23 +++++++++++------- fs/ext4/mballoc.c | 9 ++++--- fs/ext4/resize.c | 7 ++++-- fs/ext4/super.c | 72 ++++++++++++++++++++++++++++++++++++++----------------- 5 files changed, 76 insertions(+), 37 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b1ece5329738..614fefa7dc7a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1512,7 +1512,7 @@ struct ext4_sb_info { unsigned int s_extent_max_zeroout_kb; unsigned int s_log_groups_per_flex; - struct flex_groups *s_flex_groups; + struct flex_groups * __rcu *s_flex_groups; ext4_group_t s_flex_groups_allocated; /* workqueue for reserved extent conversions (buffered io) */ diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index c66e8f9451a2..f95ee99091e4 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -328,11 +328,13 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) percpu_counter_inc(&sbi->s_freeinodes_counter); if (sbi->s_log_groups_per_flex) { - ext4_group_t f = ext4_flex_group(sbi, block_group); + struct flex_groups *fg; - atomic_inc(&sbi->s_flex_groups[f].free_inodes); + fg = sbi_array_rcu_deref(sbi, s_flex_groups, + ext4_flex_group(sbi, block_group)); + atomic_inc(&fg->free_inodes); if (is_directory) - atomic_dec(&sbi->s_flex_groups[f].used_dirs); + atomic_dec(&fg->used_dirs); } BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); fatal = ext4_handle_dirty_metadata(handle, NULL, bh2); @@ -368,12 +370,13 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g, int flex_size, struct orlov_stats *stats) { struct ext4_group_desc *desc; - struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; if (flex_size > 1) { - stats->free_inodes = atomic_read(&flex_group[g].free_inodes); - stats->free_clusters = atomic64_read(&flex_group[g].free_clusters); - stats->used_dirs = atomic_read(&flex_group[g].used_dirs); + struct flex_groups *fg = sbi_array_rcu_deref(EXT4_SB(sb), + s_flex_groups, g); + stats->free_inodes = atomic_read(&fg->free_inodes); + stats->free_clusters = atomic64_read(&fg->free_clusters); + stats->used_dirs = atomic_read(&fg->used_dirs); return; } @@ -1054,7 +1057,8 @@ got: if (sbi->s_log_groups_per_flex) { ext4_group_t f = ext4_flex_group(sbi, group); - atomic_inc(&sbi->s_flex_groups[f].used_dirs); + atomic_inc(&sbi_array_rcu_deref(sbi, s_flex_groups, + f)->used_dirs); } } if (ext4_has_group_desc_csum(sb)) { @@ -1077,7 +1081,8 @@ got: if (sbi->s_log_groups_per_flex) { flex_group = ext4_flex_group(sbi, group); - atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); + atomic_dec(&sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_inodes); } inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 1b46fb63692a..51a78eb65f3c 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3038,7 +3038,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, ext4_group_t flex_group = ext4_flex_group(sbi, ac->ac_b_ex.fe_group); atomic64_sub(ac->ac_b_ex.fe_len, - &sbi->s_flex_groups[flex_group].free_clusters); + &sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_clusters); } err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); @@ -4936,7 +4937,8 @@ do_more: if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); atomic64_add(count_clusters, - &sbi->s_flex_groups[flex_group].free_clusters); + &sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_clusters); } /* @@ -5093,7 +5095,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); atomic64_add(clusters_freed, - &sbi->s_flex_groups[flex_group].free_clusters); + &sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_clusters); } ext4_mb_unload_buddy(&e4b); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 536cc9f38091..a50b51270ea9 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1430,11 +1430,14 @@ static void ext4_update_super(struct super_block *sb, percpu_counter_read(&sbi->s_freeclusters_counter)); if (ext4_has_feature_flex_bg(sb) && sbi->s_log_groups_per_flex) { ext4_group_t flex_group; + struct flex_groups *fg; + flex_group = ext4_flex_group(sbi, group_data[0].group); + fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group); atomic64_add(EXT4_NUM_B2C(sbi, free_blocks), - &sbi->s_flex_groups[flex_group].free_clusters); + &fg->free_clusters); atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, - &sbi->s_flex_groups[flex_group].free_inodes); + &fg->free_inodes); } /* diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e00bcc19099f..6b7e628b7903 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1015,6 +1015,7 @@ static void ext4_put_super(struct super_block *sb) struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; struct buffer_head **group_desc; + struct flex_groups **flex_groups; int aborted = 0; int i, err; @@ -1052,8 +1053,13 @@ static void ext4_put_super(struct super_block *sb) for (i = 0; i < sbi->s_gdb_count; i++) brelse(group_desc[i]); kvfree(group_desc); + flex_groups = rcu_dereference(sbi->s_flex_groups); + if (flex_groups) { + for (i = 0; i < sbi->s_flex_groups_allocated; i++) + kvfree(flex_groups[i]); + kvfree(flex_groups); + } rcu_read_unlock(); - kvfree(sbi->s_flex_groups); percpu_counter_destroy(&sbi->s_freeclusters_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); @@ -2384,8 +2390,8 @@ done: int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) { struct ext4_sb_info *sbi = EXT4_SB(sb); - struct flex_groups *new_groups; - int size; + struct flex_groups **old_groups, **new_groups; + int size, i; if (!sbi->s_log_groups_per_flex) return 0; @@ -2394,22 +2400,37 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) if (size <= sbi->s_flex_groups_allocated) return 0; - size = roundup_pow_of_two(size * sizeof(struct flex_groups)); - new_groups = kvzalloc(size, GFP_KERNEL); + new_groups = kvzalloc(roundup_pow_of_two(size * + sizeof(*sbi->s_flex_groups)), GFP_KERNEL); if (!new_groups) { - ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", - size / (int) sizeof(struct flex_groups)); + ext4_msg(sb, KERN_ERR, + "not enough memory for %d flex group pointers", size); return -ENOMEM; } - - if (sbi->s_flex_groups) { - memcpy(new_groups, sbi->s_flex_groups, - (sbi->s_flex_groups_allocated * - sizeof(struct flex_groups))); - kvfree(sbi->s_flex_groups); + for (i = sbi->s_flex_groups_allocated; i < size; i++) { + new_groups[i] = kvzalloc(roundup_pow_of_two( + sizeof(struct flex_groups)), + GFP_KERNEL); + if (!new_groups[i]) { + for (i--; i >= sbi->s_flex_groups_allocated; i--) + kvfree(new_groups[i]); + kvfree(new_groups); + ext4_msg(sb, KERN_ERR, + "not enough memory for %d flex groups", size); + return -ENOMEM; + } } - sbi->s_flex_groups = new_groups; - sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups); + rcu_read_lock(); + old_groups = rcu_dereference(sbi->s_flex_groups); + if (old_groups) + memcpy(new_groups, old_groups, + (sbi->s_flex_groups_allocated * + sizeof(struct flex_groups *))); + rcu_read_unlock(); + rcu_assign_pointer(sbi->s_flex_groups, new_groups); + sbi->s_flex_groups_allocated = size; + if (old_groups) + ext4_kvfree_array_rcu(old_groups); return 0; } @@ -2417,6 +2438,7 @@ static int ext4_fill_flex_info(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_desc *gdp = NULL; + struct flex_groups *fg; ext4_group_t flex_group; int i, err; @@ -2434,12 +2456,11 @@ static int ext4_fill_flex_info(struct super_block *sb) gdp = ext4_get_group_desc(sb, i, NULL); flex_group = ext4_flex_group(sbi, i); - atomic_add(ext4_free_inodes_count(sb, gdp), - &sbi->s_flex_groups[flex_group].free_inodes); + fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group); + atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes); atomic64_add(ext4_free_group_clusters(sb, gdp), - &sbi->s_flex_groups[flex_group].free_clusters); - atomic_add(ext4_used_dirs_count(sb, gdp), - &sbi->s_flex_groups[flex_group].used_dirs); + &fg->free_clusters); + atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs); } return 1; @@ -3641,6 +3662,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) struct buffer_head *bh, **group_desc; struct ext4_super_block *es = NULL; struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + struct flex_groups **flex_groups; ext4_fsblk_t block; ext4_fsblk_t sb_block = get_sb_block(&data); ext4_fsblk_t logical_sb_block; @@ -4692,8 +4714,14 @@ failed_mount7: ext4_unregister_li_request(sb); failed_mount6: ext4_mb_release(sb); - if (sbi->s_flex_groups) - kvfree(sbi->s_flex_groups); + rcu_read_lock(); + flex_groups = rcu_dereference(sbi->s_flex_groups); + if (flex_groups) { + for (i = 0; i < sbi->s_flex_groups_allocated; i++) + kvfree(flex_groups[i]); + kvfree(flex_groups); + } + rcu_read_unlock(); percpu_counter_destroy(&sbi->s_freeclusters_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); -- cgit v1.2.3-59-g8ed1b From bbd55937de8f2754adc5792b0f8e5ff7d9c0420e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 19 Feb 2020 10:30:46 -0800 Subject: ext4: rename s_journal_flag_rwsem to s_writepages_rwsem In preparation for making s_journal_flag_rwsem synchronize ext4_writepages() with changes to both the EXTENTS and JOURNAL_DATA flags (rather than just JOURNAL_DATA as it does currently), rename it to s_writepages_rwsem. Link: https://lore.kernel.org/r/20200219183047.47417-2-ebiggers@kernel.org Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@kernel.org --- fs/ext4/ext4.h | 2 +- fs/ext4/inode.c | 14 +++++++------- fs/ext4/super.c | 6 +++--- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 614fefa7dc7a..4b986ad42b9d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1553,7 +1553,7 @@ struct ext4_sb_info { struct ratelimit_state s_msg_ratelimit_state; /* Barrier between changing inodes' journal flags and writepages ops. */ - struct percpu_rw_semaphore s_journal_flag_rwsem; + struct percpu_rw_semaphore s_writepages_rwsem; struct dax_device *s_daxdev; #ifdef CONFIG_EXT4_DEBUG unsigned long s_simulate_fail; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 6e1d81ed44ad..fa0ff78dc033 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2628,7 +2628,7 @@ static int ext4_writepages(struct address_space *mapping, if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) return -EIO; - percpu_down_read(&sbi->s_journal_flag_rwsem); + percpu_down_read(&sbi->s_writepages_rwsem); trace_ext4_writepages(inode, wbc); /* @@ -2849,7 +2849,7 @@ unplug: out_writepages: trace_ext4_writepages_result(inode, wbc, ret, nr_to_write - wbc->nr_to_write); - percpu_up_read(&sbi->s_journal_flag_rwsem); + percpu_up_read(&sbi->s_writepages_rwsem); return ret; } @@ -2864,13 +2864,13 @@ static int ext4_dax_writepages(struct address_space *mapping, if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) return -EIO; - percpu_down_read(&sbi->s_journal_flag_rwsem); + percpu_down_read(&sbi->s_writepages_rwsem); trace_ext4_writepages(inode, wbc); ret = dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc); trace_ext4_writepages_result(inode, wbc, ret, nr_to_write - wbc->nr_to_write); - percpu_up_read(&sbi->s_journal_flag_rwsem); + percpu_up_read(&sbi->s_writepages_rwsem); return ret; } @@ -5861,7 +5861,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) } } - percpu_down_write(&sbi->s_journal_flag_rwsem); + percpu_down_write(&sbi->s_writepages_rwsem); jbd2_journal_lock_updates(journal); /* @@ -5878,7 +5878,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) err = jbd2_journal_flush(journal); if (err < 0) { jbd2_journal_unlock_updates(journal); - percpu_up_write(&sbi->s_journal_flag_rwsem); + percpu_up_write(&sbi->s_writepages_rwsem); return err; } ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); @@ -5886,7 +5886,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) ext4_set_aops(inode); jbd2_journal_unlock_updates(journal); - percpu_up_write(&sbi->s_journal_flag_rwsem); + percpu_up_write(&sbi->s_writepages_rwsem); if (val) up_write(&EXT4_I(inode)->i_mmap_sem); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6b7e628b7903..6928fc229799 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1064,7 +1064,7 @@ static void ext4_put_super(struct super_block *sb) percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); - percpu_free_rwsem(&sbi->s_journal_flag_rwsem); + percpu_free_rwsem(&sbi->s_writepages_rwsem); #ifdef CONFIG_QUOTA for (i = 0; i < EXT4_MAXQUOTAS; i++) kfree(get_qf_name(sb, sbi, i)); @@ -4626,7 +4626,7 @@ no_journal: err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, GFP_KERNEL); if (!err) - err = percpu_init_rwsem(&sbi->s_journal_flag_rwsem); + err = percpu_init_rwsem(&sbi->s_writepages_rwsem); if (err) { ext4_msg(sb, KERN_ERR, "insufficient memory"); @@ -4726,7 +4726,7 @@ failed_mount6: percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); - percpu_free_rwsem(&sbi->s_journal_flag_rwsem); + percpu_free_rwsem(&sbi->s_writepages_rwsem); failed_mount5: ext4_ext_release(sb); ext4_release_system_zone(sb); -- cgit v1.2.3-59-g8ed1b From cb85f4d23f794e24127f3e562cb3b54b0803f456 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 19 Feb 2020 10:30:47 -0800 Subject: ext4: fix race between writepages and enabling EXT4_EXTENTS_FL If EXT4_EXTENTS_FL is set on an inode while ext4_writepages() is running on it, the following warning in ext4_add_complete_io() can be hit: WARNING: CPU: 1 PID: 0 at fs/ext4/page-io.c:234 ext4_put_io_end_defer+0xf0/0x120 Here's a minimal reproducer (not 100% reliable) (root isn't required): while true; do sync done & while true; do rm -f file touch file chattr -e file echo X >> file chattr +e file done The problem is that in ext4_writepages(), ext4_should_dioread_nolock() (which only returns true on extent-based files) is checked once to set the number of reserved journal credits, and also again later to select the flags for ext4_map_blocks() and copy the reserved journal handle to ext4_io_end::handle. But if EXT4_EXTENTS_FL is being concurrently set, the first check can see dioread_nolock disabled while the later one can see it enabled, causing the reserved handle to unexpectedly be NULL. Since changing EXT4_EXTENTS_FL is uncommon, and there may be other races related to doing so as well, fix this by synchronizing changing EXT4_EXTENTS_FL with ext4_writepages() via the existing s_writepages_rwsem (previously called s_journal_flag_rwsem). This was originally reported by syzbot without a reproducer at https://syzkaller.appspot.com/bug?extid=2202a584a00fffd19fbf, but now that dioread_nolock is the default I also started seeing this when running syzkaller locally. Link: https://lore.kernel.org/r/20200219183047.47417-3-ebiggers@kernel.org Reported-by: syzbot+2202a584a00fffd19fbf@syzkaller.appspotmail.com Fixes: 6b523df4fb5a ("ext4: use transaction reservation for extent conversion in ext4_end_io") Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@kernel.org --- fs/ext4/ext4.h | 5 ++++- fs/ext4/migrate.c | 27 +++++++++++++++++++-------- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4b986ad42b9d..61b37a052052 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1552,7 +1552,10 @@ struct ext4_sb_info { struct ratelimit_state s_warning_ratelimit_state; struct ratelimit_state s_msg_ratelimit_state; - /* Barrier between changing inodes' journal flags and writepages ops. */ + /* + * Barrier between writepages ops and changing any inode's JOURNAL_DATA + * or EXTENTS flag. + */ struct percpu_rw_semaphore s_writepages_rwsem; struct dax_device *s_daxdev; #ifdef CONFIG_EXT4_DEBUG diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 89725fa42573..fb6520f37135 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -407,6 +407,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode) int ext4_ext_migrate(struct inode *inode) { + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); handle_t *handle; int retval = 0, i; __le32 *i_data; @@ -431,6 +432,8 @@ int ext4_ext_migrate(struct inode *inode) */ return retval; + percpu_down_write(&sbi->s_writepages_rwsem); + /* * Worst case we can touch the allocation bitmaps, a bgd * block, and a block to link in the orphan list. We do need @@ -441,7 +444,7 @@ int ext4_ext_migrate(struct inode *inode) if (IS_ERR(handle)) { retval = PTR_ERR(handle); - return retval; + goto out_unlock; } goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; @@ -452,7 +455,7 @@ int ext4_ext_migrate(struct inode *inode) if (IS_ERR(tmp_inode)) { retval = PTR_ERR(tmp_inode); ext4_journal_stop(handle); - return retval; + goto out_unlock; } i_size_write(tmp_inode, i_size_read(inode)); /* @@ -494,7 +497,7 @@ int ext4_ext_migrate(struct inode *inode) */ ext4_orphan_del(NULL, tmp_inode); retval = PTR_ERR(handle); - goto out; + goto out_tmp_inode; } ei = EXT4_I(inode); @@ -576,10 +579,11 @@ err_out: ext4_ext_tree_init(handle, tmp_inode); out_stop: ext4_journal_stop(handle); -out: +out_tmp_inode: unlock_new_inode(tmp_inode); iput(tmp_inode); - +out_unlock: + percpu_up_write(&sbi->s_writepages_rwsem); return retval; } @@ -589,7 +593,8 @@ out: int ext4_ind_migrate(struct inode *inode) { struct ext4_extent_header *eh; - struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct ext4_super_block *es = sbi->s_es; struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_extent *ex; unsigned int i, len; @@ -613,9 +618,13 @@ int ext4_ind_migrate(struct inode *inode) if (test_opt(inode->i_sb, DELALLOC)) ext4_alloc_da_blocks(inode); + percpu_down_write(&sbi->s_writepages_rwsem); + handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); - if (IS_ERR(handle)) - return PTR_ERR(handle); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out_unlock; + } down_write(&EXT4_I(inode)->i_data_sem); ret = ext4_ext_check_inode(inode); @@ -650,5 +659,7 @@ int ext4_ind_migrate(struct inode *inode) errout: ext4_journal_stop(handle); up_write(&EXT4_I(inode)->i_data_sem); +out_unlock: + percpu_up_write(&sbi->s_writepages_rwsem); return ret; } -- cgit v1.2.3-59-g8ed1b From 8eedabfd66b68a4623beec0789eac54b8c9d0fb6 Mon Sep 17 00:00:00 2001 From: wangyan Date: Thu, 20 Feb 2020 21:46:14 +0800 Subject: jbd2: fix ocfs2 corrupt when clearing block group bits I found a NULL pointer dereference in ocfs2_block_group_clear_bits(). The running environment: kernel version: 4.19 A cluster with two nodes, 5 luns mounted on two nodes, and do some file operations like dd/fallocate/truncate/rm on every lun with storage network disconnection. The fallocate operation on dm-23-45 caused an null pointer dereference. The information of NULL pointer dereference as follows: [577992.878282] JBD2: Error -5 detected when updating journal superblock for dm-23-45. [577992.878290] Aborting journal on device dm-23-45. ... [577992.890778] JBD2: Error -5 detected when updating journal superblock for dm-24-46. [577992.890908] __journal_remove_journal_head: freeing b_committed_data [577992.890916] (fallocate,88392,52):ocfs2_extend_trans:474 ERROR: status = -30 [577992.890918] __journal_remove_journal_head: freeing b_committed_data [577992.890920] (fallocate,88392,52):ocfs2_rotate_tree_right:2500 ERROR: status = -30 [577992.890922] __journal_remove_journal_head: freeing b_committed_data [577992.890924] (fallocate,88392,52):ocfs2_do_insert_extent:4382 ERROR: status = -30 [577992.890928] (fallocate,88392,52):ocfs2_insert_extent:4842 ERROR: status = -30 [577992.890928] __journal_remove_journal_head: freeing b_committed_data [577992.890930] (fallocate,88392,52):ocfs2_add_clusters_in_btree:4947 ERROR: status = -30 [577992.890933] __journal_remove_journal_head: freeing b_committed_data [577992.890939] __journal_remove_journal_head: freeing b_committed_data [577992.890949] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000020 [577992.890950] Mem abort info: [577992.890951] ESR = 0x96000004 [577992.890952] Exception class = DABT (current EL), IL = 32 bits [577992.890952] SET = 0, FnV = 0 [577992.890953] EA = 0, S1PTW = 0 [577992.890954] Data abort info: [577992.890955] ISV = 0, ISS = 0x00000004 [577992.890956] CM = 0, WnR = 0 [577992.890958] user pgtable: 4k pages, 48-bit VAs, pgdp = 00000000f8da07a9 [577992.890960] [0000000000000020] pgd=0000000000000000 [577992.890964] Internal error: Oops: 96000004 [#1] SMP [577992.890965] Process fallocate (pid: 88392, stack limit = 0x00000000013db2fd) [577992.890968] CPU: 52 PID: 88392 Comm: fallocate Kdump: loaded Tainted: G W OE 4.19.36 #1 [577992.890969] Hardware name: Huawei TaiShan 2280 V2/BC82AMDD, BIOS 0.98 08/25/2019 [577992.890971] pstate: 60400009 (nZCv daif +PAN -UAO) [577992.891054] pc : _ocfs2_free_suballoc_bits+0x63c/0x968 [ocfs2] [577992.891082] lr : _ocfs2_free_suballoc_bits+0x618/0x968 [ocfs2] [577992.891084] sp : ffff0000c8e2b810 [577992.891085] x29: ffff0000c8e2b820 x28: 0000000000000000 [577992.891087] x27: 00000000000006f3 x26: ffffa07957b02e70 [577992.891089] x25: ffff807c59d50000 x24: 00000000000006f2 [577992.891091] x23: 0000000000000001 x22: ffff807bd39abc30 [577992.891093] x21: ffff0000811d9000 x20: ffffa07535d6a000 [577992.891097] x19: ffff000001681638 x18: ffffffffffffffff [577992.891098] x17: 0000000000000000 x16: ffff000080a03df0 [577992.891100] x15: ffff0000811d9708 x14: 203d207375746174 [577992.891101] x13: 73203a524f525245 x12: 20373439343a6565 [577992.891103] x11: 0000000000000038 x10: 0101010101010101 [577992.891106] x9 : ffffa07c68a85d70 x8 : 7f7f7f7f7f7f7f7f [577992.891109] x7 : 0000000000000000 x6 : 0000000000000080 [577992.891110] x5 : 0000000000000000 x4 : 0000000000000002 [577992.891112] x3 : ffff000001713390 x2 : 2ff90f88b1c22f00 [577992.891114] x1 : ffff807bd39abc30 x0 : 0000000000000000 [577992.891116] Call trace: [577992.891139] _ocfs2_free_suballoc_bits+0x63c/0x968 [ocfs2] [577992.891162] _ocfs2_free_clusters+0x100/0x290 [ocfs2] [577992.891185] ocfs2_free_clusters+0x50/0x68 [ocfs2] [577992.891206] ocfs2_add_clusters_in_btree+0x198/0x5e0 [ocfs2] [577992.891227] ocfs2_add_inode_data+0x94/0xc8 [ocfs2] [577992.891248] ocfs2_extend_allocation+0x1bc/0x7a8 [ocfs2] [577992.891269] ocfs2_allocate_extents+0x14c/0x338 [ocfs2] [577992.891290] __ocfs2_change_file_space+0x3f8/0x610 [ocfs2] [577992.891309] ocfs2_fallocate+0xe4/0x128 [ocfs2] [577992.891316] vfs_fallocate+0x11c/0x250 [577992.891317] ksys_fallocate+0x54/0x88 [577992.891319] __arm64_sys_fallocate+0x28/0x38 [577992.891323] el0_svc_common+0x78/0x130 [577992.891325] el0_svc_handler+0x38/0x78 [577992.891327] el0_svc+0x8/0xc My analysis process as follows: ocfs2_fallocate __ocfs2_change_file_space ocfs2_allocate_extents ocfs2_extend_allocation ocfs2_add_inode_data ocfs2_add_clusters_in_btree ocfs2_insert_extent ocfs2_do_insert_extent ocfs2_rotate_tree_right ocfs2_extend_rotate_transaction ocfs2_extend_trans jbd2_journal_restart jbd2__journal_restart /* handle->h_transaction is NULL, * is_handle_aborted(handle) is true */ handle->h_transaction = NULL; start_this_handle return -EROFS; ocfs2_free_clusters _ocfs2_free_clusters _ocfs2_free_suballoc_bits ocfs2_block_group_clear_bits ocfs2_journal_access_gd __ocfs2_journal_access jbd2_journal_get_undo_access /* I think jbd2_write_access_granted() will * return true, because do_get_write_access() * will return -EROFS. */ if (jbd2_write_access_granted(...)) return 0; do_get_write_access /* handle->h_transaction is NULL, it will * return -EROFS here, so do_get_write_access() * was not called. */ if (is_handle_aborted(handle)) return -EROFS; /* bh2jh(group_bh) is NULL, caused NULL pointer dereference */ undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data; If handle->h_transaction == NULL, then jbd2_write_access_granted() does not really guarantee that journal_head will stay around, not even speaking of its b_committed_data. The bh2jh(group_bh) can be removed after ocfs2_journal_access_gd() and before call "bh2jh(group_bh)->b_committed_data". So, we should move is_handle_aborted() check from do_get_write_access() into jbd2_journal_get_undo_access() and jbd2_journal_get_write_access() before the call to jbd2_write_access_granted(). Link: https://lore.kernel.org/r/f72a623f-b3f1-381a-d91d-d22a1c83a336@huawei.com Signed-off-by: Yan Wang Signed-off-by: Theodore Ts'o Reviewed-by: Jun Piao Reviewed-by: Jan Kara Cc: stable@kernel.org --- fs/jbd2/transaction.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 2dd848a743ed..d181948c0390 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -936,8 +936,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, char *frozen_buffer = NULL; unsigned long start_lock, time_lock; - if (is_handle_aborted(handle)) - return -EROFS; journal = transaction->t_journal; jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy); @@ -1189,6 +1187,9 @@ int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh) struct journal_head *jh; int rc; + if (is_handle_aborted(handle)) + return -EROFS; + if (jbd2_write_access_granted(handle, bh, false)) return 0; @@ -1326,6 +1327,9 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh) struct journal_head *jh; char *committed_data = NULL; + if (is_handle_aborted(handle)) + return -EROFS; + if (jbd2_write_access_granted(handle, bh, true)) return 0; -- cgit v1.2.3-59-g8ed1b From 9db176bceb5c5df4990486709da386edadc6bd1d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 21 Feb 2020 11:08:35 +0100 Subject: ext4: fix mount failure with quota configured as module When CONFIG_QFMT_V2 is configured as a module, the test in ext4_feature_set_ok() fails and so mount of filesystems with quota or project features fails. Fix the test to use IS_ENABLED macro which works properly even for modules. Link: https://lore.kernel.org/r/20200221100835.9332-1-jack@suse.cz Fixes: d65d87a07476 ("ext4: improve explanation of a mount failure caused by a misconfigured kernel") Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6928fc229799..ff1b764b0c0e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3034,7 +3034,7 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) return 0; } -#if !defined(CONFIG_QUOTA) || !defined(CONFIG_QFMT_V2) +#if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2) if (!readonly && (ext4_has_feature_quota(sb) || ext4_has_feature_project(sb))) { ext4_msg(sb, KERN_ERR, -- cgit v1.2.3-59-g8ed1b From f66ee0410b1c3481ee75e5db9b34547b4d582465 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 11 Feb 2020 23:20:43 +0100 Subject: netfilter: ipset: Fix "INFO: rcu detected stall in hash_xxx" reports In the case of huge hash:* types of sets, due to the single spinlock of a set the processing of the whole set under spinlock protection could take too long. There were four places where the whole hash table of the set was processed from bucket to bucket under holding the spinlock: - During resizing a set, the original set was locked to exclude kernel side add/del element operations (userspace add/del is excluded by the nfnetlink mutex). The original set is actually just read during the resize, so the spinlocking is replaced with rcu locking of regions. However, thus there can be parallel kernel side add/del of entries. In order not to loose those operations a backlog is added and replayed after the successful resize. - Garbage collection of timed out entries was also protected by the spinlock. In order not to lock too long, region locking is introduced and a single region is processed in one gc go. Also, the simple timer based gc running is replaced with a workqueue based solution. The internal book-keeping (number of elements, size of extensions) is moved to region level due to the region locking. - Adding elements: when the max number of the elements is reached, the gc was called to evict the timed out entries. The new approach is that the gc is called just for the matching region, assuming that if the region (proportionally) seems to be full, then the whole set does. We could scan the other regions to check every entry under rcu locking, but for huge sets it'd mean a slowdown at adding elements. - Listing the set header data: when the set was defined with timeout support, the garbage collector was called to clean up timed out entries to get the correct element numbers and set size values. Now the set is scanned to check non-timed out entries, without actually calling the gc for the whole set. Thanks to Florian Westphal for helping me to solve the SOFTIRQ-safe -> SOFTIRQ-unsafe lock order issues during working on the patch. Reported-by: syzbot+4b0e9d4ff3cf117837e5@syzkaller.appspotmail.com Reported-by: syzbot+c27b8d5010f45c666ed1@syzkaller.appspotmail.com Reported-by: syzbot+68a806795ac89df3aa1c@syzkaller.appspotmail.com Fixes: 23c42a403a9c ("netfilter: ipset: Introduction of new commands and protocol version 7") Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 11 +- net/netfilter/ipset/ip_set_core.c | 34 +- net/netfilter/ipset/ip_set_hash_gen.h | 633 +++++++++++++++++++++++---------- 3 files changed, 472 insertions(+), 206 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 908d38dbcb91..5448c8b443db 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -121,6 +121,7 @@ struct ip_set_ext { u32 timeout; u8 packets_op; u8 bytes_op; + bool target; }; struct ip_set; @@ -187,6 +188,14 @@ struct ip_set_type_variant { /* Return true if "b" set is the same as "a" * according to the create set parameters */ bool (*same_set)(const struct ip_set *a, const struct ip_set *b); + /* Region-locking is used */ + bool region_lock; +}; + +struct ip_set_region { + spinlock_t lock; /* Region lock */ + size_t ext_size; /* Size of the dynamic extensions */ + u32 elements; /* Number of elements vs timeout */ }; /* The core set type structure */ @@ -501,7 +510,7 @@ ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo, } #define IP_SET_INIT_KEXT(skb, opt, set) \ - { .bytes = (skb)->len, .packets = 1, \ + { .bytes = (skb)->len, .packets = 1, .target = true,\ .timeout = ip_set_adt_opt_timeout(opt, set) } #define IP_SET_INIT_UEXT(set) \ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 69c107f9ba8d..8dd17589217d 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -723,6 +723,20 @@ ip_set_rcu_get(struct net *net, ip_set_id_t index) return set; } +static inline void +ip_set_lock(struct ip_set *set) +{ + if (!set->variant->region_lock) + spin_lock_bh(&set->lock); +} + +static inline void +ip_set_unlock(struct ip_set *set) +{ + if (!set->variant->region_lock) + spin_unlock_bh(&set->lock); +} + int ip_set_test(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) @@ -744,9 +758,9 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb, if (ret == -EAGAIN) { /* Type requests element to be completed */ pr_debug("element must be completed, ADD is triggered\n"); - spin_lock_bh(&set->lock); + ip_set_lock(set); set->variant->kadt(set, skb, par, IPSET_ADD, opt); - spin_unlock_bh(&set->lock); + ip_set_unlock(set); ret = 1; } else { /* --return-nomatch: invert matched element */ @@ -775,9 +789,9 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb, !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return -IPSET_ERR_TYPE_MISMATCH; - spin_lock_bh(&set->lock); + ip_set_lock(set); ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt); - spin_unlock_bh(&set->lock); + ip_set_unlock(set); return ret; } @@ -797,9 +811,9 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return -IPSET_ERR_TYPE_MISMATCH; - spin_lock_bh(&set->lock); + ip_set_lock(set); ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt); - spin_unlock_bh(&set->lock); + ip_set_unlock(set); return ret; } @@ -1264,9 +1278,9 @@ ip_set_flush_set(struct ip_set *set) { pr_debug("set: %s\n", set->name); - spin_lock_bh(&set->lock); + ip_set_lock(set); set->variant->flush(set); - spin_unlock_bh(&set->lock); + ip_set_unlock(set); } static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb, @@ -1713,9 +1727,9 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, bool eexist = flags & IPSET_FLAG_EXIST, retried = false; do { - spin_lock_bh(&set->lock); + ip_set_lock(set); ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); - spin_unlock_bh(&set->lock); + ip_set_unlock(set); retried = true; } while (ret == -EAGAIN && set->variant->resize && diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 7480ce55b5c8..71e93eac0831 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -7,13 +7,21 @@ #include #include #include +#include #include -#define __ipset_dereference_protected(p, c) rcu_dereference_protected(p, c) -#define ipset_dereference_protected(p, set) \ - __ipset_dereference_protected(p, lockdep_is_held(&(set)->lock)) - -#define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1) +#define __ipset_dereference(p) \ + rcu_dereference_protected(p, 1) +#define ipset_dereference_nfnl(p) \ + rcu_dereference_protected(p, \ + lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) +#define ipset_dereference_set(p, set) \ + rcu_dereference_protected(p, \ + lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \ + lockdep_is_held(&(set)->lock)) +#define ipset_dereference_bh_nfnl(p) \ + rcu_dereference_bh_check(p, \ + lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) /* Hashing which uses arrays to resolve clashing. The hash table is resized * (doubled) when searching becomes too long. @@ -72,11 +80,35 @@ struct hbucket { __aligned(__alignof__(u64)); }; +/* Region size for locking == 2^HTABLE_REGION_BITS */ +#define HTABLE_REGION_BITS 10 +#define ahash_numof_locks(htable_bits) \ + ((htable_bits) < HTABLE_REGION_BITS ? 1 \ + : jhash_size((htable_bits) - HTABLE_REGION_BITS)) +#define ahash_sizeof_regions(htable_bits) \ + (ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region)) +#define ahash_region(n, htable_bits) \ + ((n) % ahash_numof_locks(htable_bits)) +#define ahash_bucket_start(h, htable_bits) \ + ((htable_bits) < HTABLE_REGION_BITS ? 0 \ + : (h) * jhash_size(HTABLE_REGION_BITS)) +#define ahash_bucket_end(h, htable_bits) \ + ((htable_bits) < HTABLE_REGION_BITS ? jhash_size(htable_bits) \ + : ((h) + 1) * jhash_size(HTABLE_REGION_BITS)) + +struct htable_gc { + struct delayed_work dwork; + struct ip_set *set; /* Set the gc belongs to */ + u32 region; /* Last gc run position */ +}; + /* The hash table: the table size stored here in order to make resizing easy */ struct htable { atomic_t ref; /* References for resizing */ - atomic_t uref; /* References for dumping */ + atomic_t uref; /* References for dumping and gc */ u8 htable_bits; /* size of hash table == 2^htable_bits */ + u32 maxelem; /* Maxelem per region */ + struct ip_set_region *hregion; /* Region locks and ext sizes */ struct hbucket __rcu *bucket[0]; /* hashtable buckets */ }; @@ -162,6 +194,10 @@ htable_bits(u32 hashsize) #define NLEN 0 #endif /* IP_SET_HASH_WITH_NETS */ +#define SET_ELEM_EXPIRED(set, d) \ + (SET_WITH_TIMEOUT(set) && \ + ip_set_timeout_expired(ext_timeout(d, set))) + #endif /* _IP_SET_HASH_GEN_H */ #ifndef MTYPE @@ -205,10 +241,12 @@ htable_bits(u32 hashsize) #undef mtype_test_cidrs #undef mtype_test #undef mtype_uref -#undef mtype_expire #undef mtype_resize +#undef mtype_ext_size +#undef mtype_resize_ad #undef mtype_head #undef mtype_list +#undef mtype_gc_do #undef mtype_gc #undef mtype_gc_init #undef mtype_variant @@ -247,10 +285,12 @@ htable_bits(u32 hashsize) #define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs) #define mtype_test IPSET_TOKEN(MTYPE, _test) #define mtype_uref IPSET_TOKEN(MTYPE, _uref) -#define mtype_expire IPSET_TOKEN(MTYPE, _expire) #define mtype_resize IPSET_TOKEN(MTYPE, _resize) +#define mtype_ext_size IPSET_TOKEN(MTYPE, _ext_size) +#define mtype_resize_ad IPSET_TOKEN(MTYPE, _resize_ad) #define mtype_head IPSET_TOKEN(MTYPE, _head) #define mtype_list IPSET_TOKEN(MTYPE, _list) +#define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) #define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) #define mtype_variant IPSET_TOKEN(MTYPE, _variant) @@ -275,8 +315,7 @@ htable_bits(u32 hashsize) /* The generic hash structure */ struct htype { struct htable __rcu *table; /* the hash table */ - struct timer_list gc; /* garbage collection when timeout enabled */ - struct ip_set *set; /* attached to this ip_set */ + struct htable_gc gc; /* gc workqueue */ u32 maxelem; /* max elements in the hash */ u32 initval; /* random jhash init value */ #ifdef IP_SET_HASH_WITH_MARKMASK @@ -288,21 +327,33 @@ struct htype { #ifdef IP_SET_HASH_WITH_NETMASK u8 netmask; /* netmask value for subnets to store */ #endif + struct list_head ad; /* Resize add|del backlist */ struct mtype_elem next; /* temporary storage for uadd */ #ifdef IP_SET_HASH_WITH_NETS struct net_prefixes nets[NLEN]; /* book-keeping of prefixes */ #endif }; +/* ADD|DEL entries saved during resize */ +struct mtype_resize_ad { + struct list_head list; + enum ipset_adt ad; /* ADD|DEL element */ + struct mtype_elem d; /* Element value */ + struct ip_set_ext ext; /* Extensions for ADD */ + struct ip_set_ext mext; /* Target extensions for ADD */ + u32 flags; /* Flags for ADD */ +}; + #ifdef IP_SET_HASH_WITH_NETS /* Network cidr size book keeping when the hash stores different * sized networks. cidr == real cidr + 1 to support /0. */ static void -mtype_add_cidr(struct htype *h, u8 cidr, u8 n) +mtype_add_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n) { int i, j; + spin_lock_bh(&set->lock); /* Add in increasing prefix order, so larger cidr first */ for (i = 0, j = -1; i < NLEN && h->nets[i].cidr[n]; i++) { if (j != -1) { @@ -311,7 +362,7 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 n) j = i; } else if (h->nets[i].cidr[n] == cidr) { h->nets[CIDR_POS(cidr)].nets[n]++; - return; + goto unlock; } } if (j != -1) { @@ -320,24 +371,29 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 n) } h->nets[i].cidr[n] = cidr; h->nets[CIDR_POS(cidr)].nets[n] = 1; +unlock: + spin_unlock_bh(&set->lock); } static void -mtype_del_cidr(struct htype *h, u8 cidr, u8 n) +mtype_del_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n) { u8 i, j, net_end = NLEN - 1; + spin_lock_bh(&set->lock); for (i = 0; i < NLEN; i++) { if (h->nets[i].cidr[n] != cidr) continue; h->nets[CIDR_POS(cidr)].nets[n]--; if (h->nets[CIDR_POS(cidr)].nets[n] > 0) - return; + goto unlock; for (j = i; j < net_end && h->nets[j].cidr[n]; j++) h->nets[j].cidr[n] = h->nets[j + 1].cidr[n]; h->nets[j].cidr[n] = 0; - return; + goto unlock; } +unlock: + spin_unlock_bh(&set->lock); } #endif @@ -345,7 +401,7 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 n) static size_t mtype_ahash_memsize(const struct htype *h, const struct htable *t) { - return sizeof(*h) + sizeof(*t); + return sizeof(*h) + sizeof(*t) + ahash_sizeof_regions(t->htable_bits); } /* Get the ith element from the array block n */ @@ -369,24 +425,29 @@ mtype_flush(struct ip_set *set) struct htype *h = set->data; struct htable *t; struct hbucket *n; - u32 i; - - t = ipset_dereference_protected(h->table, set); - for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = __ipset_dereference_protected(hbucket(t, i), 1); - if (!n) - continue; - if (set->extensions & IPSET_EXT_DESTROY) - mtype_ext_cleanup(set, n); - /* FIXME: use slab cache */ - rcu_assign_pointer(hbucket(t, i), NULL); - kfree_rcu(n, rcu); + u32 r, i; + + t = ipset_dereference_nfnl(h->table); + for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) { + spin_lock_bh(&t->hregion[r].lock); + for (i = ahash_bucket_start(r, t->htable_bits); + i < ahash_bucket_end(r, t->htable_bits); i++) { + n = __ipset_dereference(hbucket(t, i)); + if (!n) + continue; + if (set->extensions & IPSET_EXT_DESTROY) + mtype_ext_cleanup(set, n); + /* FIXME: use slab cache */ + rcu_assign_pointer(hbucket(t, i), NULL); + kfree_rcu(n, rcu); + } + t->hregion[r].ext_size = 0; + t->hregion[r].elements = 0; + spin_unlock_bh(&t->hregion[r].lock); } #ifdef IP_SET_HASH_WITH_NETS memset(h->nets, 0, sizeof(h->nets)); #endif - set->elements = 0; - set->ext_size = 0; } /* Destroy the hashtable part of the set */ @@ -397,7 +458,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy) u32 i; for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = __ipset_dereference_protected(hbucket(t, i), 1); + n = __ipset_dereference(hbucket(t, i)); if (!n) continue; if (set->extensions & IPSET_EXT_DESTROY && ext_destroy) @@ -406,6 +467,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy) kfree(n); } + ip_set_free(t->hregion); ip_set_free(t); } @@ -414,28 +476,21 @@ static void mtype_destroy(struct ip_set *set) { struct htype *h = set->data; + struct list_head *l, *lt; if (SET_WITH_TIMEOUT(set)) - del_timer_sync(&h->gc); + cancel_delayed_work_sync(&h->gc.dwork); - mtype_ahash_destroy(set, - __ipset_dereference_protected(h->table, 1), true); + mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true); + list_for_each_safe(l, lt, &h->ad) { + list_del(l); + kfree(l); + } kfree(h); set->data = NULL; } -static void -mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t)) -{ - struct htype *h = set->data; - - timer_setup(&h->gc, gc, 0); - mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ); - pr_debug("gc initialized, run in every %u\n", - IPSET_GC_PERIOD(set->timeout)); -} - static bool mtype_same_set(const struct ip_set *a, const struct ip_set *b) { @@ -454,11 +509,9 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b) a->extensions == b->extensions; } -/* Delete expired elements from the hashtable */ static void -mtype_expire(struct ip_set *set, struct htype *h) +mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r) { - struct htable *t; struct hbucket *n, *tmp; struct mtype_elem *data; u32 i, j, d; @@ -466,10 +519,12 @@ mtype_expire(struct ip_set *set, struct htype *h) #ifdef IP_SET_HASH_WITH_NETS u8 k; #endif + u8 htable_bits = t->htable_bits; - t = ipset_dereference_protected(h->table, set); - for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = __ipset_dereference_protected(hbucket(t, i), 1); + spin_lock_bh(&t->hregion[r].lock); + for (i = ahash_bucket_start(r, htable_bits); + i < ahash_bucket_end(r, htable_bits); i++) { + n = __ipset_dereference(hbucket(t, i)); if (!n) continue; for (j = 0, d = 0; j < n->pos; j++) { @@ -485,58 +540,100 @@ mtype_expire(struct ip_set *set, struct htype *h) smp_mb__after_atomic(); #ifdef IP_SET_HASH_WITH_NETS for (k = 0; k < IPSET_NET_COUNT; k++) - mtype_del_cidr(h, + mtype_del_cidr(set, h, NCIDR_PUT(DCIDR_GET(data->cidr, k)), k); #endif + t->hregion[r].elements--; ip_set_ext_destroy(set, data); - set->elements--; d++; } if (d >= AHASH_INIT_SIZE) { if (d >= n->size) { + t->hregion[r].ext_size -= + ext_size(n->size, dsize); rcu_assign_pointer(hbucket(t, i), NULL); kfree_rcu(n, rcu); continue; } tmp = kzalloc(sizeof(*tmp) + - (n->size - AHASH_INIT_SIZE) * dsize, - GFP_ATOMIC); + (n->size - AHASH_INIT_SIZE) * dsize, + GFP_ATOMIC); if (!tmp) - /* Still try to delete expired elements */ + /* Still try to delete expired elements. */ continue; tmp->size = n->size - AHASH_INIT_SIZE; for (j = 0, d = 0; j < n->pos; j++) { if (!test_bit(j, n->used)) continue; data = ahash_data(n, j, dsize); - memcpy(tmp->value + d * dsize, data, dsize); + memcpy(tmp->value + d * dsize, + data, dsize); set_bit(d, tmp->used); d++; } tmp->pos = d; - set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize); + t->hregion[r].ext_size -= + ext_size(AHASH_INIT_SIZE, dsize); rcu_assign_pointer(hbucket(t, i), tmp); kfree_rcu(n, rcu); } } + spin_unlock_bh(&t->hregion[r].lock); } static void -mtype_gc(struct timer_list *t) +mtype_gc(struct work_struct *work) { - struct htype *h = from_timer(h, t, gc); - struct ip_set *set = h->set; + struct htable_gc *gc; + struct ip_set *set; + struct htype *h; + struct htable *t; + u32 r, numof_locks; + unsigned int next_run; + + gc = container_of(work, struct htable_gc, dwork.work); + set = gc->set; + h = set->data; - pr_debug("called\n"); spin_lock_bh(&set->lock); - mtype_expire(set, h); + t = ipset_dereference_set(h->table, set); + atomic_inc(&t->uref); + numof_locks = ahash_numof_locks(t->htable_bits); + r = gc->region++; + if (r >= numof_locks) { + r = gc->region = 0; + } + next_run = (IPSET_GC_PERIOD(set->timeout) * HZ) / numof_locks; + if (next_run < HZ/10) + next_run = HZ/10; spin_unlock_bh(&set->lock); - h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; - add_timer(&h->gc); + mtype_gc_do(set, h, t, r); + + if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { + pr_debug("Table destroy after resize by expire: %p\n", t); + mtype_ahash_destroy(set, t, false); + } + + queue_delayed_work(system_power_efficient_wq, &gc->dwork, next_run); + +} + +static void +mtype_gc_init(struct htable_gc *gc) +{ + INIT_DEFERRABLE_WORK(&gc->dwork, mtype_gc); + queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ); } +static int +mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags); +static int +mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags); + /* Resize a hash: create a new hash table with doubling the hashsize * and inserting the elements to it. Repeat until we succeed or * fail due to memory pressures. @@ -547,7 +644,7 @@ mtype_resize(struct ip_set *set, bool retried) struct htype *h = set->data; struct htable *t, *orig; u8 htable_bits; - size_t extsize, dsize = set->dsize; + size_t dsize = set->dsize; #ifdef IP_SET_HASH_WITH_NETS u8 flags; struct mtype_elem *tmp; @@ -555,7 +652,9 @@ mtype_resize(struct ip_set *set, bool retried) struct mtype_elem *data; struct mtype_elem *d; struct hbucket *n, *m; - u32 i, j, key; + struct list_head *l, *lt; + struct mtype_resize_ad *x; + u32 i, j, r, nr, key; int ret; #ifdef IP_SET_HASH_WITH_NETS @@ -563,10 +662,8 @@ mtype_resize(struct ip_set *set, bool retried) if (!tmp) return -ENOMEM; #endif - rcu_read_lock_bh(); - orig = rcu_dereference_bh_nfnl(h->table); + orig = ipset_dereference_bh_nfnl(h->table); htable_bits = orig->htable_bits; - rcu_read_unlock_bh(); retry: ret = 0; @@ -583,88 +680,124 @@ retry: ret = -ENOMEM; goto out; } + t->hregion = ip_set_alloc(ahash_sizeof_regions(htable_bits)); + if (!t->hregion) { + kfree(t); + ret = -ENOMEM; + goto out; + } t->htable_bits = htable_bits; + t->maxelem = h->maxelem / ahash_numof_locks(htable_bits); + for (i = 0; i < ahash_numof_locks(htable_bits); i++) + spin_lock_init(&t->hregion[i].lock); - spin_lock_bh(&set->lock); - orig = __ipset_dereference_protected(h->table, 1); - /* There can't be another parallel resizing, but dumping is possible */ + /* There can't be another parallel resizing, + * but dumping, gc, kernel side add/del are possible + */ + orig = ipset_dereference_bh_nfnl(h->table); atomic_set(&orig->ref, 1); atomic_inc(&orig->uref); - extsize = 0; pr_debug("attempt to resize set %s from %u to %u, t %p\n", set->name, orig->htable_bits, htable_bits, orig); - for (i = 0; i < jhash_size(orig->htable_bits); i++) { - n = __ipset_dereference_protected(hbucket(orig, i), 1); - if (!n) - continue; - for (j = 0; j < n->pos; j++) { - if (!test_bit(j, n->used)) + for (r = 0; r < ahash_numof_locks(orig->htable_bits); r++) { + /* Expire may replace a hbucket with another one */ + rcu_read_lock_bh(); + for (i = ahash_bucket_start(r, orig->htable_bits); + i < ahash_bucket_end(r, orig->htable_bits); i++) { + n = __ipset_dereference(hbucket(orig, i)); + if (!n) continue; - data = ahash_data(n, j, dsize); + for (j = 0; j < n->pos; j++) { + if (!test_bit(j, n->used)) + continue; + data = ahash_data(n, j, dsize); + if (SET_ELEM_EXPIRED(set, data)) + continue; #ifdef IP_SET_HASH_WITH_NETS - /* We have readers running parallel with us, - * so the live data cannot be modified. - */ - flags = 0; - memcpy(tmp, data, dsize); - data = tmp; - mtype_data_reset_flags(data, &flags); + /* We have readers running parallel with us, + * so the live data cannot be modified. + */ + flags = 0; + memcpy(tmp, data, dsize); + data = tmp; + mtype_data_reset_flags(data, &flags); #endif - key = HKEY(data, h->initval, htable_bits); - m = __ipset_dereference_protected(hbucket(t, key), 1); - if (!m) { - m = kzalloc(sizeof(*m) + + key = HKEY(data, h->initval, htable_bits); + m = __ipset_dereference(hbucket(t, key)); + nr = ahash_region(key, htable_bits); + if (!m) { + m = kzalloc(sizeof(*m) + AHASH_INIT_SIZE * dsize, GFP_ATOMIC); - if (!m) { - ret = -ENOMEM; - goto cleanup; - } - m->size = AHASH_INIT_SIZE; - extsize += ext_size(AHASH_INIT_SIZE, dsize); - RCU_INIT_POINTER(hbucket(t, key), m); - } else if (m->pos >= m->size) { - struct hbucket *ht; - - if (m->size >= AHASH_MAX(h)) { - ret = -EAGAIN; - } else { - ht = kzalloc(sizeof(*ht) + + if (!m) { + ret = -ENOMEM; + goto cleanup; + } + m->size = AHASH_INIT_SIZE; + t->hregion[nr].ext_size += + ext_size(AHASH_INIT_SIZE, + dsize); + RCU_INIT_POINTER(hbucket(t, key), m); + } else if (m->pos >= m->size) { + struct hbucket *ht; + + if (m->size >= AHASH_MAX(h)) { + ret = -EAGAIN; + } else { + ht = kzalloc(sizeof(*ht) + (m->size + AHASH_INIT_SIZE) * dsize, GFP_ATOMIC); - if (!ht) - ret = -ENOMEM; + if (!ht) + ret = -ENOMEM; + } + if (ret < 0) + goto cleanup; + memcpy(ht, m, sizeof(struct hbucket) + + m->size * dsize); + ht->size = m->size + AHASH_INIT_SIZE; + t->hregion[nr].ext_size += + ext_size(AHASH_INIT_SIZE, + dsize); + kfree(m); + m = ht; + RCU_INIT_POINTER(hbucket(t, key), ht); } - if (ret < 0) - goto cleanup; - memcpy(ht, m, sizeof(struct hbucket) + - m->size * dsize); - ht->size = m->size + AHASH_INIT_SIZE; - extsize += ext_size(AHASH_INIT_SIZE, dsize); - kfree(m); - m = ht; - RCU_INIT_POINTER(hbucket(t, key), ht); - } - d = ahash_data(m, m->pos, dsize); - memcpy(d, data, dsize); - set_bit(m->pos++, m->used); + d = ahash_data(m, m->pos, dsize); + memcpy(d, data, dsize); + set_bit(m->pos++, m->used); + t->hregion[nr].elements++; #ifdef IP_SET_HASH_WITH_NETS - mtype_data_reset_flags(d, &flags); + mtype_data_reset_flags(d, &flags); #endif + } } + rcu_read_unlock_bh(); } - rcu_assign_pointer(h->table, t); - set->ext_size = extsize; - spin_unlock_bh(&set->lock); + /* There can't be any other writer. */ + rcu_assign_pointer(h->table, t); /* Give time to other readers of the set */ synchronize_rcu(); pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, orig->htable_bits, orig, t->htable_bits, t); - /* If there's nobody else dumping the table, destroy it */ + /* Add/delete elements processed by the SET target during resize. + * Kernel-side add cannot trigger a resize and userspace actions + * are serialized by the mutex. + */ + list_for_each_safe(l, lt, &h->ad) { + x = list_entry(l, struct mtype_resize_ad, list); + if (x->ad == IPSET_ADD) { + mtype_add(set, &x->d, &x->ext, &x->mext, x->flags); + } else { + mtype_del(set, &x->d, NULL, NULL, 0); + } + list_del(l); + kfree(l); + } + /* If there's nobody else using the table, destroy it */ if (atomic_dec_and_test(&orig->uref)) { pr_debug("Table destroy by resize %p\n", orig); mtype_ahash_destroy(set, orig, false); @@ -677,15 +810,44 @@ out: return ret; cleanup: + rcu_read_unlock_bh(); atomic_set(&orig->ref, 0); atomic_dec(&orig->uref); - spin_unlock_bh(&set->lock); mtype_ahash_destroy(set, t, false); if (ret == -EAGAIN) goto retry; goto out; } +/* Get the current number of elements and ext_size in the set */ +static void +mtype_ext_size(struct ip_set *set, u32 *elements, size_t *ext_size) +{ + struct htype *h = set->data; + const struct htable *t; + u32 i, j, r; + struct hbucket *n; + struct mtype_elem *data; + + t = rcu_dereference_bh(h->table); + for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) { + for (i = ahash_bucket_start(r, t->htable_bits); + i < ahash_bucket_end(r, t->htable_bits); i++) { + n = rcu_dereference_bh(hbucket(t, i)); + if (!n) + continue; + for (j = 0; j < n->pos; j++) { + if (!test_bit(j, n->used)) + continue; + data = ahash_data(n, j, set->dsize); + if (!SET_ELEM_EXPIRED(set, data)) + (*elements)++; + } + } + *ext_size += t->hregion[r].ext_size; + } +} + /* Add an element to a hash and update the internal counters when succeeded, * otherwise report the proper error code. */ @@ -698,32 +860,49 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, const struct mtype_elem *d = value; struct mtype_elem *data; struct hbucket *n, *old = ERR_PTR(-ENOENT); - int i, j = -1; + int i, j = -1, ret; bool flag_exist = flags & IPSET_FLAG_EXIST; bool deleted = false, forceadd = false, reuse = false; - u32 key, multi = 0; + u32 r, key, multi = 0, elements, maxelem; - if (set->elements >= h->maxelem) { - if (SET_WITH_TIMEOUT(set)) - /* FIXME: when set is full, we slow down here */ - mtype_expire(set, h); - if (set->elements >= h->maxelem && SET_WITH_FORCEADD(set)) + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); + key = HKEY(value, h->initval, t->htable_bits); + r = ahash_region(key, t->htable_bits); + atomic_inc(&t->uref); + elements = t->hregion[r].elements; + maxelem = t->maxelem; + if (elements >= maxelem) { + u32 e; + if (SET_WITH_TIMEOUT(set)) { + rcu_read_unlock_bh(); + mtype_gc_do(set, h, t, r); + rcu_read_lock_bh(); + } + maxelem = h->maxelem; + elements = 0; + for (e = 0; e < ahash_numof_locks(t->htable_bits); e++) + elements += t->hregion[e].elements; + if (elements >= maxelem && SET_WITH_FORCEADD(set)) forceadd = true; } + rcu_read_unlock_bh(); - t = ipset_dereference_protected(h->table, set); - key = HKEY(value, h->initval, t->htable_bits); - n = __ipset_dereference_protected(hbucket(t, key), 1); + spin_lock_bh(&t->hregion[r].lock); + n = rcu_dereference_bh(hbucket(t, key)); if (!n) { - if (forceadd || set->elements >= h->maxelem) + if (forceadd || elements >= maxelem) goto set_full; old = NULL; n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize, GFP_ATOMIC); - if (!n) - return -ENOMEM; + if (!n) { + ret = -ENOMEM; + goto unlock; + } n->size = AHASH_INIT_SIZE; - set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize); + t->hregion[r].ext_size += + ext_size(AHASH_INIT_SIZE, set->dsize); goto copy_elem; } for (i = 0; i < n->pos; i++) { @@ -737,19 +916,16 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, } data = ahash_data(n, i, set->dsize); if (mtype_data_equal(data, d, &multi)) { - if (flag_exist || - (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, set)))) { + if (flag_exist || SET_ELEM_EXPIRED(set, data)) { /* Just the extensions could be overwritten */ j = i; goto overwrite_extensions; } - return -IPSET_ERR_EXIST; + ret = -IPSET_ERR_EXIST; + goto unlock; } /* Reuse first timed out entry */ - if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, set)) && - j == -1) { + if (SET_ELEM_EXPIRED(set, data) && j == -1) { j = i; reuse = true; } @@ -759,16 +935,16 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (!deleted) { #ifdef IP_SET_HASH_WITH_NETS for (i = 0; i < IPSET_NET_COUNT; i++) - mtype_del_cidr(h, + mtype_del_cidr(set, h, NCIDR_PUT(DCIDR_GET(data->cidr, i)), i); #endif ip_set_ext_destroy(set, data); - set->elements--; + t->hregion[r].elements--; } goto copy_data; } - if (set->elements >= h->maxelem) + if (elements >= maxelem) goto set_full; /* Create a new slot */ if (n->pos >= n->size) { @@ -776,28 +952,32 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (n->size >= AHASH_MAX(h)) { /* Trigger rehashing */ mtype_data_next(&h->next, d); - return -EAGAIN; + ret = -EAGAIN; + goto resize; } old = n; n = kzalloc(sizeof(*n) + (old->size + AHASH_INIT_SIZE) * set->dsize, GFP_ATOMIC); - if (!n) - return -ENOMEM; + if (!n) { + ret = -ENOMEM; + goto unlock; + } memcpy(n, old, sizeof(struct hbucket) + old->size * set->dsize); n->size = old->size + AHASH_INIT_SIZE; - set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize); + t->hregion[r].ext_size += + ext_size(AHASH_INIT_SIZE, set->dsize); } copy_elem: j = n->pos++; data = ahash_data(n, j, set->dsize); copy_data: - set->elements++; + t->hregion[r].elements++; #ifdef IP_SET_HASH_WITH_NETS for (i = 0; i < IPSET_NET_COUNT; i++) - mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i); + mtype_add_cidr(set, h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i); #endif memcpy(data, d, sizeof(struct mtype_elem)); overwrite_extensions: @@ -820,13 +1000,41 @@ overwrite_extensions: if (old) kfree_rcu(old, rcu); } + ret = 0; +resize: + spin_unlock_bh(&t->hregion[r].lock); + if (atomic_read(&t->ref) && ext->target) { + /* Resize is in process and kernel side add, save values */ + struct mtype_resize_ad *x; + + x = kzalloc(sizeof(struct mtype_resize_ad), GFP_ATOMIC); + if (!x) + /* Don't bother */ + goto out; + x->ad = IPSET_ADD; + memcpy(&x->d, value, sizeof(struct mtype_elem)); + memcpy(&x->ext, ext, sizeof(struct ip_set_ext)); + memcpy(&x->mext, mext, sizeof(struct ip_set_ext)); + x->flags = flags; + spin_lock_bh(&set->lock); + list_add_tail(&x->list, &h->ad); + spin_unlock_bh(&set->lock); + } + goto out; - return 0; set_full: if (net_ratelimit()) pr_warn("Set %s is full, maxelem %u reached\n", - set->name, h->maxelem); - return -IPSET_ERR_HASH_FULL; + set->name, maxelem); + ret = -IPSET_ERR_HASH_FULL; +unlock: + spin_unlock_bh(&t->hregion[r].lock); +out: + if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { + pr_debug("Table destroy after resize by add: %p\n", t); + mtype_ahash_destroy(set, t, false); + } + return ret; } /* Delete an element from the hash and free up space if possible. @@ -840,13 +1048,23 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, const struct mtype_elem *d = value; struct mtype_elem *data; struct hbucket *n; - int i, j, k, ret = -IPSET_ERR_EXIST; + struct mtype_resize_ad *x = NULL; + int i, j, k, r, ret = -IPSET_ERR_EXIST; u32 key, multi = 0; size_t dsize = set->dsize; - t = ipset_dereference_protected(h->table, set); + /* Userspace add and resize is excluded by the mutex. + * Kernespace add does not trigger resize. + */ + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); key = HKEY(value, h->initval, t->htable_bits); - n = __ipset_dereference_protected(hbucket(t, key), 1); + r = ahash_region(key, t->htable_bits); + atomic_inc(&t->uref); + rcu_read_unlock_bh(); + + spin_lock_bh(&t->hregion[r].lock); + n = rcu_dereference_bh(hbucket(t, key)); if (!n) goto out; for (i = 0, k = 0; i < n->pos; i++) { @@ -857,8 +1075,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, data = ahash_data(n, i, dsize); if (!mtype_data_equal(data, d, &multi)) continue; - if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, set))) + if (SET_ELEM_EXPIRED(set, data)) goto out; ret = 0; @@ -866,20 +1083,33 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, smp_mb__after_atomic(); if (i + 1 == n->pos) n->pos--; - set->elements--; + t->hregion[r].elements--; #ifdef IP_SET_HASH_WITH_NETS for (j = 0; j < IPSET_NET_COUNT; j++) - mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)), - j); + mtype_del_cidr(set, h, + NCIDR_PUT(DCIDR_GET(d->cidr, j)), j); #endif ip_set_ext_destroy(set, data); + if (atomic_read(&t->ref) && ext->target) { + /* Resize is in process and kernel side del, + * save values + */ + x = kzalloc(sizeof(struct mtype_resize_ad), + GFP_ATOMIC); + if (x) { + x->ad = IPSET_DEL; + memcpy(&x->d, value, + sizeof(struct mtype_elem)); + x->flags = flags; + } + } for (; i < n->pos; i++) { if (!test_bit(i, n->used)) k++; } if (n->pos == 0 && k == 0) { - set->ext_size -= ext_size(n->size, dsize); + t->hregion[r].ext_size -= ext_size(n->size, dsize); rcu_assign_pointer(hbucket(t, key), NULL); kfree_rcu(n, rcu); } else if (k >= AHASH_INIT_SIZE) { @@ -898,7 +1128,8 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, k++; } tmp->pos = k; - set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize); + t->hregion[r].ext_size -= + ext_size(AHASH_INIT_SIZE, dsize); rcu_assign_pointer(hbucket(t, key), tmp); kfree_rcu(n, rcu); } @@ -906,6 +1137,16 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, } out: + spin_unlock_bh(&t->hregion[r].lock); + if (x) { + spin_lock_bh(&set->lock); + list_add(&x->list, &h->ad); + spin_unlock_bh(&set->lock); + } + if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { + pr_debug("Table destroy after resize by del: %p\n", t); + mtype_ahash_destroy(set, t, false); + } return ret; } @@ -991,6 +1232,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, int i, ret = 0; u32 key, multi = 0; + rcu_read_lock_bh(); t = rcu_dereference_bh(h->table); #ifdef IP_SET_HASH_WITH_NETS /* If we test an IP address and not a network address, @@ -1022,6 +1264,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, goto out; } out: + rcu_read_unlock_bh(); return ret; } @@ -1033,23 +1276,14 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) const struct htable *t; struct nlattr *nested; size_t memsize; + u32 elements = 0; + size_t ext_size = 0; u8 htable_bits; - /* If any members have expired, set->elements will be wrong - * mytype_expire function will update it with the right count. - * we do not hold set->lock here, so grab it first. - * set->elements can still be incorrect in the case of a huge set, - * because elements might time out during the listing. - */ - if (SET_WITH_TIMEOUT(set)) { - spin_lock_bh(&set->lock); - mtype_expire(set, h); - spin_unlock_bh(&set->lock); - } - rcu_read_lock_bh(); - t = rcu_dereference_bh_nfnl(h->table); - memsize = mtype_ahash_memsize(h, t) + set->ext_size; + t = rcu_dereference_bh(h->table); + mtype_ext_size(set, &elements, &ext_size); + memsize = mtype_ahash_memsize(h, t) + ext_size + set->ext_size; htable_bits = t->htable_bits; rcu_read_unlock_bh(); @@ -1071,7 +1305,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) #endif if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || - nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements))) + nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(elements))) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; @@ -1091,15 +1325,15 @@ mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start) if (start) { rcu_read_lock_bh(); - t = rcu_dereference_bh_nfnl(h->table); + t = ipset_dereference_bh_nfnl(h->table); atomic_inc(&t->uref); cb->args[IPSET_CB_PRIVATE] = (unsigned long)t; rcu_read_unlock_bh(); } else if (cb->args[IPSET_CB_PRIVATE]) { t = (struct htable *)cb->args[IPSET_CB_PRIVATE]; if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { - /* Resizing didn't destroy the hash table */ - pr_debug("Table destroy by dump: %p\n", t); + pr_debug("Table destroy after resize " + " by dump: %p\n", t); mtype_ahash_destroy(set, t, false); } cb->args[IPSET_CB_PRIVATE] = 0; @@ -1141,8 +1375,7 @@ mtype_list(const struct ip_set *set, if (!test_bit(i, n->used)) continue; e = ahash_data(n, i, set->dsize); - if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, set))) + if (SET_ELEM_EXPIRED(set, e)) continue; pr_debug("list hash %lu hbucket %p i %u, data %p\n", cb->args[IPSET_CB_ARG0], n, i, e); @@ -1208,6 +1441,7 @@ static const struct ip_set_type_variant mtype_variant = { .uref = mtype_uref, .resize = mtype_resize, .same_set = mtype_same_set, + .region_lock = true, }; #ifdef IP_SET_EMIT_CREATE @@ -1226,6 +1460,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, size_t hsize; struct htype *h; struct htable *t; + u32 i; pr_debug("Create set %s with family %s\n", set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6"); @@ -1294,6 +1529,15 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, kfree(h); return -ENOMEM; } + t->hregion = ip_set_alloc(ahash_sizeof_regions(hbits)); + if (!t->hregion) { + kfree(t); + kfree(h); + return -ENOMEM; + } + h->gc.set = set; + for (i = 0; i < ahash_numof_locks(hbits); i++) + spin_lock_init(&t->hregion[i].lock); h->maxelem = maxelem; #ifdef IP_SET_HASH_WITH_NETMASK h->netmask = netmask; @@ -1304,9 +1548,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, get_random_bytes(&h->initval, sizeof(h->initval)); t->htable_bits = hbits; + t->maxelem = h->maxelem / ahash_numof_locks(hbits); RCU_INIT_POINTER(h->table, t); - h->set = set; + INIT_LIST_HEAD(&h->ad); set->data = h; #ifndef IP_SET_PROTO_UNDEF if (set->family == NFPROTO_IPV4) { @@ -1329,12 +1574,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, #ifndef IP_SET_PROTO_UNDEF if (set->family == NFPROTO_IPV4) #endif - IPSET_TOKEN(HTYPE, 4_gc_init)(set, - IPSET_TOKEN(HTYPE, 4_gc)); + IPSET_TOKEN(HTYPE, 4_gc_init)(&h->gc); #ifndef IP_SET_PROTO_UNDEF else - IPSET_TOKEN(HTYPE, 6_gc_init)(set, - IPSET_TOKEN(HTYPE, 6_gc)); + IPSET_TOKEN(HTYPE, 6_gc_init)(&h->gc); #endif } pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", -- cgit v1.2.3-59-g8ed1b From 5c37f1ae1c335800d16b207cb578009c695dcd39 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 20 Feb 2020 16:58:37 +0000 Subject: KVM: arm64: Ask the compiler to __always_inline functions used at HYP On non VHE CPUs, KVM's __hyp_text contains code run at EL2 while the rest of the kernel runs at EL1. This code lives in its own section with start and end markers so we can map it to EL2. The compiler may decide not to inline static-inline functions from the header file. It may also decide not to put these out-of-line functions in the same section, meaning they aren't mapped when called at EL2. Clang-9 does exactly this with __kern_hyp_va() and a few others when x18 is reserved for the shadow call stack. Add the additional __always_ hint to all the static-inlines that are called from a hyp file. Signed-off-by: James Morse Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200220165839.256881-2-james.morse@arm.com ---- kvm_get_hyp_vector() pulls in all the regular per-cpu accessors and this_cpu_has_cap(), fortunately its only called for VHE. --- arch/arm64/include/asm/arch_gicv3.h | 2 +- arch/arm64/include/asm/cpufeature.h | 2 +- arch/arm64/include/asm/kvm_emulate.h | 48 ++++++++++++++++++------------------ arch/arm64/include/asm/kvm_mmu.h | 3 ++- arch/arm64/include/asm/virt.h | 2 +- 5 files changed, 29 insertions(+), 28 deletions(-) diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 89e4c8b79349..07597028bb00 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -32,7 +32,7 @@ static inline void gic_write_eoir(u32 irq) isb(); } -static inline void gic_write_dir(u32 irq) +static __always_inline void gic_write_dir(u32 irq) { write_sysreg_s(irq, SYS_ICC_DIR_EL1); isb(); diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 4261d55e8506..0e6d03c7e368 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -581,7 +581,7 @@ static inline bool system_supports_sve(void) cpus_have_const_cap(ARM64_SVE); } -static inline bool system_supports_cnp(void) +static __always_inline bool system_supports_cnp(void) { return IS_ENABLED(CONFIG_ARM64_CNP) && cpus_have_const_cap(ARM64_HAS_CNP); diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 688c63412cc2..f658dda12364 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -36,7 +36,7 @@ void kvm_inject_undef32(struct kvm_vcpu *vcpu); void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr); -static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) +static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { return !(vcpu->arch.hcr_el2 & HCR_RW); } @@ -127,7 +127,7 @@ static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr) vcpu->arch.vsesr_el2 = vsesr; } -static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) +static __always_inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) { return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; } @@ -153,17 +153,17 @@ static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned long *__vcpu_elr_el1(vcpu) = v; } -static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) +static __always_inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) { return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate; } -static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) +static __always_inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) { return !!(*vcpu_cpsr(vcpu) & PSR_MODE32_BIT); } -static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu) +static __always_inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) return kvm_condition_valid32(vcpu); @@ -181,13 +181,13 @@ static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) * coming from a read of ESR_EL2. Otherwise, it may give the wrong result on * AArch32 with banked registers. */ -static inline unsigned long vcpu_get_reg(const struct kvm_vcpu *vcpu, +static __always_inline unsigned long vcpu_get_reg(const struct kvm_vcpu *vcpu, u8 reg_num) { return (reg_num == 31) ? 0 : vcpu_gp_regs(vcpu)->regs.regs[reg_num]; } -static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num, +static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num, unsigned long val) { if (reg_num != 31) @@ -264,12 +264,12 @@ static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) return mode != PSR_MODE_EL0t; } -static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu) +static __always_inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu) { return vcpu->arch.fault.esr_el2; } -static inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu) +static __always_inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu) { u32 esr = kvm_vcpu_get_hsr(vcpu); @@ -279,12 +279,12 @@ static inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu) return -1; } -static inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vcpu) +static __always_inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vcpu) { return vcpu->arch.fault.far_el2; } -static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu) +static __always_inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu) { return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8; } @@ -299,7 +299,7 @@ static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_xVC_IMM_MASK; } -static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu) +static __always_inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu) { return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV); } @@ -319,17 +319,17 @@ static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SF); } -static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) +static __always_inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) { return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; } -static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) +static __always_inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) { return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW); } -static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu) +static __always_inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu) { return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR) || kvm_vcpu_dabt_iss1tw(vcpu); /* AF/DBM update */ @@ -340,18 +340,18 @@ static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM); } -static inline unsigned int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) +static __always_inline unsigned int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) { return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT); } /* This one is not specific to Data Abort */ -static inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu) +static __always_inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu) { return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_IL); } -static inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu) +static __always_inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu) { return ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu)); } @@ -361,17 +361,17 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) return kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_IABT_LOW; } -static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) +static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) { return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC; } -static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) +static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) { return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE; } -static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) +static __always_inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) { switch (kvm_vcpu_trap_get_fault(vcpu)) { case FSC_SEA: @@ -390,7 +390,7 @@ static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) } } -static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) +static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) { u32 esr = kvm_vcpu_get_hsr(vcpu); return ESR_ELx_SYS64_ISS_RT(esr); @@ -504,7 +504,7 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, return data; /* Leave LE untouched */ } -static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) +static __always_inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) { if (vcpu_mode_is_32bit(vcpu)) kvm_skip_instr32(vcpu, is_wide_instr); @@ -519,7 +519,7 @@ static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) * Skip an instruction which has been emulated at hyp while most guest sysregs * are live. */ -static inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu) +static __always_inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu) { *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 53d846f1bfe7..785762860c63 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -93,7 +93,7 @@ void kvm_update_va_mask(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); void kvm_compute_layout(void); -static inline unsigned long __kern_hyp_va(unsigned long v) +static __always_inline unsigned long __kern_hyp_va(unsigned long v) { asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n" "ror %0, %0, #1\n" @@ -473,6 +473,7 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, extern void *__kvm_bp_vect_base; extern int __kvm_harden_el2_vector_slot; +/* This is only called on a VHE system */ static inline void *kvm_get_hyp_vector(void) { struct bp_hardening_data *data = arm64_get_bp_hardening_data(); diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 0958ed6191aa..61fd26752adc 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -83,7 +83,7 @@ static inline bool is_kernel_in_hyp_mode(void) return read_sysreg(CurrentEL) == CurrentEL_EL2; } -static inline bool has_vhe(void) +static __always_inline bool has_vhe(void) { if (cpus_have_const_cap(ARM64_HAS_VIRT_HOST_EXTN)) return true; -- cgit v1.2.3-59-g8ed1b From 8c2d146ee7a2e0782eea4dd70fddc1c837140136 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 20 Feb 2020 16:58:38 +0000 Subject: KVM: arm64: Define our own swab32() to avoid a uapi static inline KVM uses swab32() when mediating GIC MMIO accesses if the GICV is badly aligned, and the host and guest differ in endianness. arm64 doesn't provide a __arch_swab32(), so __fswab32() is always backed by the macro implementation that the compiler reduces to a single instruction. But the static-inline causes problems for KVM if the compiler chooses not to inline this function, it may not be located in the __hyp_text where __vgic_v2_perform_cpuif_access() needs it. Create our own __kvm_swab32() macro that calls ___constant_swab32() directly. This way we know it will always be inlined. Signed-off-by: James Morse Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200220165839.256881-3-james.morse@arm.com --- arch/arm64/include/asm/kvm_hyp.h | 7 +++++++ arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 97f21cc66657..5fde137b5150 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -47,6 +47,13 @@ #define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1) #define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1) +/* + * Without an __arch_swab32(), we fall back to ___constant_swab32(), but the + * static inline can allow the compiler to out-of-line this. KVM always wants + * the macro version as its always inlined. + */ +#define __kvm_swab32(x) ___constant_swab32(x) + int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); void __vgic_v3_save_state(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c index 29ee1feba4eb..4f3a087e36d5 100644 --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -69,14 +69,14 @@ int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) u32 data = vcpu_get_reg(vcpu, rd); if (__is_be(vcpu)) { /* guest pre-swabbed data, undo this for writel() */ - data = swab32(data); + data = __kvm_swab32(data); } writel_relaxed(data, addr); } else { u32 data = readl_relaxed(addr); if (__is_be(vcpu)) { /* guest expects swabbed data */ - data = swab32(data); + data = __kvm_swab32(data); } vcpu_set_reg(vcpu, rd, data); } -- cgit v1.2.3-59-g8ed1b From e43f1331e2ef913b8c566920c9af75e0ccdd1d3f Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 20 Feb 2020 16:58:39 +0000 Subject: arm64: Ask the compiler to __always_inline functions used by KVM at HYP KVM uses some of the static-inline helpers like icache_is_vipt() from its HYP code. This assumes the function is inlined so that the code is mapped to EL2. The compiler may decide not to inline these, and the out-of-line version may not be in the __hyp_text section. Add the additional __always_ hint to these static-inlines that are used by KVM. Signed-off-by: James Morse Signed-off-by: Marc Zyngier Acked-by: Will Deacon Link: https://lore.kernel.org/r/20200220165839.256881-4-james.morse@arm.com --- arch/arm64/include/asm/cache.h | 2 +- arch/arm64/include/asm/cacheflush.h | 2 +- arch/arm64/include/asm/cpufeature.h | 8 ++++---- arch/arm64/include/asm/io.h | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 806e9dc2a852..a4d1b5f771f6 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -69,7 +69,7 @@ static inline int icache_is_aliasing(void) return test_bit(ICACHEF_ALIASING, &__icache_flags); } -static inline int icache_is_vpipt(void) +static __always_inline int icache_is_vpipt(void) { return test_bit(ICACHEF_VPIPT, &__icache_flags); } diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 665c78e0665a..e6cca3d4acf7 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -145,7 +145,7 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *, #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); -static inline void __flush_icache_all(void) +static __always_inline void __flush_icache_all(void) { if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC)) return; diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 0e6d03c7e368..be078699ac4b 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -435,13 +435,13 @@ cpuid_feature_extract_signed_field(u64 features, int field) return cpuid_feature_extract_signed_field_width(features, field, 4); } -static inline unsigned int __attribute_const__ +static __always_inline unsigned int __attribute_const__ cpuid_feature_extract_unsigned_field_width(u64 features, int field, int width) { return (u64)(features << (64 - width - field)) >> (64 - width); } -static inline unsigned int __attribute_const__ +static __always_inline unsigned int __attribute_const__ cpuid_feature_extract_unsigned_field(u64 features, int field) { return cpuid_feature_extract_unsigned_field_width(features, field, 4); @@ -564,7 +564,7 @@ static inline bool system_supports_mixed_endian(void) return val == 0x1; } -static inline bool system_supports_fpsimd(void) +static __always_inline bool system_supports_fpsimd(void) { return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD); } @@ -575,7 +575,7 @@ static inline bool system_uses_ttbr0_pan(void) !cpus_have_const_cap(ARM64_HAS_PAN); } -static inline bool system_supports_sve(void) +static __always_inline bool system_supports_sve(void) { return IS_ENABLED(CONFIG_ARM64_SVE) && cpus_have_const_cap(ARM64_SVE); diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 4e531f57147d..6facd1308e7c 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -34,7 +34,7 @@ static inline void __raw_writew(u16 val, volatile void __iomem *addr) } #define __raw_writel __raw_writel -static inline void __raw_writel(u32 val, volatile void __iomem *addr) +static __always_inline void __raw_writel(u32 val, volatile void __iomem *addr) { asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr)); } @@ -69,7 +69,7 @@ static inline u16 __raw_readw(const volatile void __iomem *addr) } #define __raw_readl __raw_readl -static inline u32 __raw_readl(const volatile void __iomem *addr) +static __always_inline u32 __raw_readl(const volatile void __iomem *addr) { u32 val; asm volatile(ALTERNATIVE("ldr %w0, [%1]", -- cgit v1.2.3-59-g8ed1b From 8af1c6fbd9239877998c7f5a591cb2c88d41fb66 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 22 Feb 2020 12:01:43 +0100 Subject: netfilter: ipset: Fix forceadd evaluation path When the forceadd option is enabled, the hash:* types should find and replace the first entry in the bucket with the new one if there are no reuseable (deleted or timed out) entries. However, the position index was just not set to zero and remained the invalid -1 if there were no reuseable entries. Reported-by: syzbot+6a86565c74ebe30aea18@syzkaller.appspotmail.com Fixes: 23c42a403a9c ("netfilter: ipset: Introduction of new commands and protocol version 7") Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_hash_gen.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 71e93eac0831..e52d7b7597a0 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -931,6 +931,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, } } if (reuse || forceadd) { + if (j == -1) + j = 0; data = ahash_data(n, j, set->dsize); if (!deleted) { #ifdef IP_SET_HASH_WITH_NETS -- cgit v1.2.3-59-g8ed1b From c7849be9cc2dd2754c48ddbaca27c2de6d80a95d Mon Sep 17 00:00:00 2001 From: Xiaoguang Wang Date: Sat, 22 Feb 2020 14:46:05 +0800 Subject: io_uring: fix __io_iopoll_check deadlock in io_sq_thread Since commit a3a0e43fd770 ("io_uring: don't enter poll loop if we have CQEs pending"), if we already events pending, we won't enter poll loop. In case SETUP_IOPOLL and SETUP_SQPOLL are both enabled, if app has been terminated and don't reap pending events which are already in cq ring, and there are some reqs in poll_list, io_sq_thread will enter __io_iopoll_check(), and find pending events, then return, this loop will never have a chance to exit. I have seen this issue in fio stress tests, to fix this issue, let io_sq_thread call io_iopoll_getevents() with argument 'min' being zero, and remove __io_iopoll_check(). Fixes: a3a0e43fd770 ("io_uring: don't enter poll loop if we have CQEs pending") Signed-off-by: Xiaoguang Wang Signed-off-by: Jens Axboe --- fs/io_uring.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index b43467b3a8dc..de650df9ac53 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1672,11 +1672,17 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx) mutex_unlock(&ctx->uring_lock); } -static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, - long min) +static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, + long min) { int iters = 0, ret = 0; + /* + * We disallow the app entering submit/complete with polling, but we + * still need to lock the ring to prevent racing with polled issue + * that got punted to a workqueue. + */ + mutex_lock(&ctx->uring_lock); do { int tmin = 0; @@ -1712,21 +1718,6 @@ static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, ret = 0; } while (min && !*nr_events && !need_resched()); - return ret; -} - -static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, - long min) -{ - int ret; - - /* - * We disallow the app entering submit/complete with polling, but we - * still need to lock the ring to prevent racing with polled issue - * that got punted to a workqueue. - */ - mutex_lock(&ctx->uring_lock); - ret = __io_iopoll_check(ctx, nr_events, min); mutex_unlock(&ctx->uring_lock); return ret; } @@ -5118,7 +5109,7 @@ static int io_sq_thread(void *data) */ mutex_lock(&ctx->uring_lock); if (!list_empty(&ctx->poll_list)) - __io_iopoll_check(ctx, &nr_events, 0); + io_iopoll_getevents(ctx, &nr_events, 0); else inflight = 0; mutex_unlock(&ctx->uring_lock); -- cgit v1.2.3-59-g8ed1b From 2ad3e17ebf94b7b7f3f64c050ff168f9915345eb Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Sat, 22 Feb 2020 20:36:47 -0500 Subject: audit: fix error handling in audit_data_to_entry() Commit 219ca39427bf ("audit: use union for audit_field values since they are mutually exclusive") combined a number of separate fields in the audit_field struct into a single union. Generally this worked just fine because they are generally mutually exclusive. Unfortunately in audit_data_to_entry() the overlap can be a problem when a specific error case is triggered that causes the error path code to attempt to cleanup an audit_field struct and the cleanup involves attempting to free a stored LSM string (the lsm_str field). Currently the code always has a non-NULL value in the audit_field.lsm_str field as the top of the for-loop transfers a value into audit_field.val (both .lsm_str and .val are part of the same union); if audit_data_to_entry() fails and the audit_field struct is specified to contain a LSM string, but the audit_field.lsm_str has not yet been properly set, the error handling code will attempt to free the bogus audit_field.lsm_str value that was set with audit_field.val at the top of the for-loop. This patch corrects this by ensuring that the audit_field.val is only set when needed (it is cleared when the audit_field struct is allocated with kcalloc()). It also corrects a few other issues to ensure that in case of error the proper error code is returned. Cc: stable@vger.kernel.org Fixes: 219ca39427bf ("audit: use union for audit_field values since they are mutually exclusive") Reported-by: syzbot+1f4d90ead370d72e450b@syzkaller.appspotmail.com Signed-off-by: Paul Moore --- kernel/auditfilter.c | 71 +++++++++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index b0126e9c0743..026e34da4ace 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -456,6 +456,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, bufp = data->buf; for (i = 0; i < data->field_count; i++) { struct audit_field *f = &entry->rule.fields[i]; + u32 f_val; err = -EINVAL; @@ -464,12 +465,12 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, goto exit_free; f->type = data->fields[i]; - f->val = data->values[i]; + f_val = data->values[i]; /* Support legacy tests for a valid loginuid */ - if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) { + if ((f->type == AUDIT_LOGINUID) && (f_val == AUDIT_UID_UNSET)) { f->type = AUDIT_LOGINUID_SET; - f->val = 0; + f_val = 0; entry->rule.pflags |= AUDIT_LOGINUID_LEGACY; } @@ -485,7 +486,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, case AUDIT_SUID: case AUDIT_FSUID: case AUDIT_OBJ_UID: - f->uid = make_kuid(current_user_ns(), f->val); + f->uid = make_kuid(current_user_ns(), f_val); if (!uid_valid(f->uid)) goto exit_free; break; @@ -494,11 +495,12 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, case AUDIT_SGID: case AUDIT_FSGID: case AUDIT_OBJ_GID: - f->gid = make_kgid(current_user_ns(), f->val); + f->gid = make_kgid(current_user_ns(), f_val); if (!gid_valid(f->gid)) goto exit_free; break; case AUDIT_ARCH: + f->val = f_val; entry->rule.arch_f = f; break; case AUDIT_SUBJ_USER: @@ -511,11 +513,13 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, case AUDIT_OBJ_TYPE: case AUDIT_OBJ_LEV_LOW: case AUDIT_OBJ_LEV_HIGH: - str = audit_unpack_string(&bufp, &remain, f->val); - if (IS_ERR(str)) + str = audit_unpack_string(&bufp, &remain, f_val); + if (IS_ERR(str)) { + err = PTR_ERR(str); goto exit_free; - entry->rule.buflen += f->val; - + } + entry->rule.buflen += f_val; + f->lsm_str = str; err = security_audit_rule_init(f->type, f->op, str, (void **)&f->lsm_rule); /* Keep currently invalid fields around in case they @@ -524,68 +528,71 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, pr_warn("audit rule for LSM \'%s\' is invalid\n", str); err = 0; - } - if (err) { - kfree(str); + } else if (err) goto exit_free; - } else - f->lsm_str = str; break; case AUDIT_WATCH: - str = audit_unpack_string(&bufp, &remain, f->val); - if (IS_ERR(str)) + str = audit_unpack_string(&bufp, &remain, f_val); + if (IS_ERR(str)) { + err = PTR_ERR(str); goto exit_free; - entry->rule.buflen += f->val; - - err = audit_to_watch(&entry->rule, str, f->val, f->op); + } + err = audit_to_watch(&entry->rule, str, f_val, f->op); if (err) { kfree(str); goto exit_free; } + entry->rule.buflen += f_val; break; case AUDIT_DIR: - str = audit_unpack_string(&bufp, &remain, f->val); - if (IS_ERR(str)) + str = audit_unpack_string(&bufp, &remain, f_val); + if (IS_ERR(str)) { + err = PTR_ERR(str); goto exit_free; - entry->rule.buflen += f->val; - + } err = audit_make_tree(&entry->rule, str, f->op); kfree(str); if (err) goto exit_free; + entry->rule.buflen += f_val; break; case AUDIT_INODE: + f->val = f_val; err = audit_to_inode(&entry->rule, f); if (err) goto exit_free; break; case AUDIT_FILTERKEY: - if (entry->rule.filterkey || f->val > AUDIT_MAX_KEY_LEN) + if (entry->rule.filterkey || f_val > AUDIT_MAX_KEY_LEN) goto exit_free; - str = audit_unpack_string(&bufp, &remain, f->val); - if (IS_ERR(str)) + str = audit_unpack_string(&bufp, &remain, f_val); + if (IS_ERR(str)) { + err = PTR_ERR(str); goto exit_free; - entry->rule.buflen += f->val; + } + entry->rule.buflen += f_val; entry->rule.filterkey = str; break; case AUDIT_EXE: - if (entry->rule.exe || f->val > PATH_MAX) + if (entry->rule.exe || f_val > PATH_MAX) goto exit_free; - str = audit_unpack_string(&bufp, &remain, f->val); + str = audit_unpack_string(&bufp, &remain, f_val); if (IS_ERR(str)) { err = PTR_ERR(str); goto exit_free; } - entry->rule.buflen += f->val; - - audit_mark = audit_alloc_mark(&entry->rule, str, f->val); + audit_mark = audit_alloc_mark(&entry->rule, str, f_val); if (IS_ERR(audit_mark)) { kfree(str); err = PTR_ERR(audit_mark); goto exit_free; } + entry->rule.buflen += f_val; entry->rule.exe = audit_mark; break; + default: + f->val = f_val; + break; } } -- cgit v1.2.3-59-g8ed1b From 99db590b083fa2bc60adfcb5c839a62db4ef1d79 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 12 Feb 2020 11:10:58 +0100 Subject: csky: Replace by The C-Sky platform code is not a clock provider, and just needs to call of_clk_init(). Hence it can include instead of . Signed-off-by: Geert Uytterhoeven Signed-off-by: Guo Ren --- arch/csky/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/csky/kernel/time.c b/arch/csky/kernel/time.c index b5fc9447d93f..52379d866fe4 100644 --- a/arch/csky/kernel/time.c +++ b/arch/csky/kernel/time.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. -#include #include +#include void __init time_init(void) { -- cgit v1.2.3-59-g8ed1b From 42d84c8490f9f0931786f1623191fcab397c3d64 Mon Sep 17 00:00:00 2001 From: Eugenio Pérez Date: Fri, 21 Feb 2020 12:06:56 +0100 Subject: vhost: Check docket sk_family instead of call getname MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Doing so, we save one call to get data we already have in the struct. Also, since there is no guarantee that getname use sockaddr_ll parameter beyond its size, we add a little bit of security here. It should do not do beyond MAX_ADDR_LEN, but syzbot found that ax25_getname writes more (72 bytes, the size of full_sockaddr_ax25, versus 20 + 32 bytes of sockaddr_ll + MAX_ADDR_LEN in syzbot repro). Fixes: 3a4d5c94e9593 ("vhost_net: a kernel-level virtio server") Reported-by: syzbot+f2a62d07a5198c819c7b@syzkaller.appspotmail.com Signed-off-by: Eugenio Pérez Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/vhost/net.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index e158159671fa..18e205eeb9af 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1414,10 +1414,6 @@ static int vhost_net_release(struct inode *inode, struct file *f) static struct socket *get_raw_socket(int fd) { - struct { - struct sockaddr_ll sa; - char buf[MAX_ADDR_LEN]; - } uaddr; int r; struct socket *sock = sockfd_lookup(fd, &r); @@ -1430,11 +1426,7 @@ static struct socket *get_raw_socket(int fd) goto err; } - r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa, 0); - if (r < 0) - goto err; - - if (uaddr.sa.sll_family != AF_PACKET) { + if (sock->sk->sk_family != AF_PACKET) { r = -EPFNOSUPPORT; goto err; } -- cgit v1.2.3-59-g8ed1b From 3e72dfdf8227b052393f71d820ec7599909dddc2 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Fri, 21 Feb 2020 12:28:38 +0100 Subject: ipv4: ensure rcu_read_lock() in cipso_v4_error() Similarly to commit c543cb4a5f07 ("ipv4: ensure rcu_read_lock() in ipv4_link_failure()"), __ip_options_compile() must be called under rcu protection. Fixes: 3da1ed7ac398 ("net: avoid use IPCB in cipso_v4_error") Suggested-by: Guillaume Nault Signed-off-by: Matteo Croce Acked-by: Paul Moore Signed-off-by: David S. Miller --- net/ipv4/cipso_ipv4.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 376882215919..0bd10a1f477f 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1724,6 +1724,7 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) { unsigned char optbuf[sizeof(struct ip_options) + 40]; struct ip_options *opt = (struct ip_options *)optbuf; + int res; if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES) return; @@ -1735,7 +1736,11 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) memset(opt, 0, sizeof(struct ip_options)); opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); - if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL)) + rcu_read_lock(); + res = __ip_options_compile(dev_net(skb->dev), opt, skb, NULL); + rcu_read_unlock(); + + if (res) return; if (gateway) -- cgit v1.2.3-59-g8ed1b From 39f3b41aa7cae917f928ef9f31d09da28188e5ed Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 21 Feb 2020 19:42:13 +0100 Subject: net: genetlink: return the error code when attribute parsing fails. Currently if attribute parsing fails and the genl family does not support parallel operation, the error code returned by __nlmsg_parse() is discarded by genl_family_rcv_msg_attrs_parse(). Be sure to report the error for all genl families. Fixes: c10e6cf85e7d ("net: genetlink: push attrbuf allocation and parsing to a separate function") Fixes: ab5b526da048 ("net: genetlink: always allocate separate attrs for dumpit ops") Signed-off-by: Paolo Abeni Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 0522b2b1fd95..9f357aa22b94 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -497,8 +497,9 @@ genl_family_rcv_msg_attrs_parse(const struct genl_family *family, err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr, family->policy, validate, extack); - if (err && parallel) { - kfree(attrbuf); + if (err) { + if (parallel) + kfree(attrbuf); return ERR_PTR(err); } return attrbuf; -- cgit v1.2.3-59-g8ed1b From dd58f3c95c98e6e2cf30d9e562cae0503c5f2713 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Sun, 23 Feb 2020 16:13:12 +0800 Subject: KVM: fix error handling in svm_hardware_setup rename svm_hardware_unsetup as svm_hardware_teardown, move it before svm_hardware_setup, and call it to free all memory if fail to setup in svm_hardware_setup, otherwise memory will be leaked remove __exit attribute for it since it is called in __init function Signed-off-by: Li RongQing Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ff02aeb23616..d9b5add5a211 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1349,6 +1349,24 @@ static __init void svm_adjust_mmio_mask(void) kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK); } +static void svm_hardware_teardown(void) +{ + int cpu; + + if (svm_sev_enabled()) { + bitmap_free(sev_asid_bitmap); + bitmap_free(sev_reclaim_asid_bitmap); + + sev_flush_asids(); + } + + for_each_possible_cpu(cpu) + svm_cpu_uninit(cpu); + + __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); + iopm_base = 0; +} + static __init int svm_hardware_setup(void) { int cpu; @@ -1462,29 +1480,10 @@ static __init int svm_hardware_setup(void) return 0; err: - __free_pages(iopm_pages, IOPM_ALLOC_ORDER); - iopm_base = 0; + svm_hardware_teardown(); return r; } -static __exit void svm_hardware_unsetup(void) -{ - int cpu; - - if (svm_sev_enabled()) { - bitmap_free(sev_asid_bitmap); - bitmap_free(sev_reclaim_asid_bitmap); - - sev_flush_asids(); - } - - for_each_possible_cpu(cpu) - svm_cpu_uninit(cpu); - - __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); - iopm_base = 0; -} - static void init_seg(struct vmcb_seg *seg) { seg->selector = 0; @@ -7385,7 +7384,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, .hardware_setup = svm_hardware_setup, - .hardware_unsetup = svm_hardware_unsetup, + .hardware_unsetup = svm_hardware_teardown, .check_processor_compatibility = svm_check_processor_compat, .hardware_enable = svm_hardware_enable, .hardware_disable = svm_hardware_disable, -- cgit v1.2.3-59-g8ed1b From 5ef8acbdd687c9d72582e2c05c0b9756efb37863 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 7 Feb 2020 02:36:07 -0800 Subject: KVM: nVMX: Emulate MTF when performing instruction emulation Since commit 5f3d45e7f282 ("kvm/x86: add support for MONITOR_TRAP_FLAG"), KVM has allowed an L1 guest to use the monitor trap flag processor-based execution control for its L2 guest. KVM simply forwards any MTF VM-exits to the L1 guest, which works for normal instruction execution. However, when KVM needs to emulate an instruction on the behalf of an L2 guest, the monitor trap flag is not emulated. Add the necessary logic to kvm_skip_emulated_instruction() to synthesize an MTF VM-exit to L1 upon instruction emulation for L2. Fixes: 5f3d45e7f282 ("kvm/x86: add support for MONITOR_TRAP_FLAG") Signed-off-by: Oliver Upton Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/include/uapi/asm/kvm.h | 1 + arch/x86/kvm/svm.c | 1 + arch/x86/kvm/vmx/nested.c | 35 ++++++++++++++++++++++++++++++++++- arch/x86/kvm/vmx/nested.h | 5 +++++ arch/x86/kvm/vmx/vmx.c | 37 ++++++++++++++++++++++++++++++++++++- arch/x86/kvm/vmx/vmx.h | 3 +++ arch/x86/kvm/x86.c | 2 ++ 8 files changed, 83 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a84e8c5acda8..98959e8cd448 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1122,6 +1122,7 @@ struct kvm_x86_ops { int (*handle_exit)(struct kvm_vcpu *vcpu, enum exit_fastpath_completion exit_fastpath); int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); + void (*update_emulated_instruction)(struct kvm_vcpu *vcpu); void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu); void (*patch_hypercall)(struct kvm_vcpu *vcpu, diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 503d3f42da16..3f3f780c8c65 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -390,6 +390,7 @@ struct kvm_sync_regs { #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 #define KVM_STATE_NESTED_EVMCS 0x00000004 +#define KVM_STATE_NESTED_MTF_PENDING 0x00000008 #define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_SMM_VMXON 0x00000002 diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d9b5add5a211..ad3f5b178a03 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -7439,6 +7439,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .run = svm_vcpu_run, .handle_exit = handle_exit, .skip_emulated_instruction = skip_emulated_instruction, + .update_emulated_instruction = NULL, .set_interrupt_shadow = svm_set_interrupt_shadow, .get_interrupt_shadow = svm_get_interrupt_shadow, .patch_hypercall = svm_patch_hypercall, diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 2b3ba7d27be4..50d8dbb3616d 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3609,8 +3609,15 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) unsigned long exit_qual; bool block_nested_events = vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu); + bool mtf_pending = vmx->nested.mtf_pending; struct kvm_lapic *apic = vcpu->arch.apic; + /* + * Clear the MTF state. If a higher priority VM-exit is delivered first, + * this state is discarded. + */ + vmx->nested.mtf_pending = false; + if (lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &apic->pending_events)) { if (block_nested_events) @@ -3621,8 +3628,28 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) return 0; } + /* + * Process any exceptions that are not debug traps before MTF. + */ + if (vcpu->arch.exception.pending && + !vmx_pending_dbg_trap(vcpu) && + nested_vmx_check_exception(vcpu, &exit_qual)) { + if (block_nested_events) + return -EBUSY; + nested_vmx_inject_exception_vmexit(vcpu, exit_qual); + return 0; + } + + if (mtf_pending) { + if (block_nested_events) + return -EBUSY; + nested_vmx_update_pending_dbg(vcpu); + nested_vmx_vmexit(vcpu, EXIT_REASON_MONITOR_TRAP_FLAG, 0, 0); + return 0; + } + if (vcpu->arch.exception.pending && - nested_vmx_check_exception(vcpu, &exit_qual)) { + nested_vmx_check_exception(vcpu, &exit_qual)) { if (block_nested_events) return -EBUSY; nested_vmx_inject_exception_vmexit(vcpu, exit_qual); @@ -5712,6 +5739,9 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, if (vmx->nested.nested_run_pending) kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING; + + if (vmx->nested.mtf_pending) + kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING; } } @@ -5892,6 +5922,9 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, vmx->nested.nested_run_pending = !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); + vmx->nested.mtf_pending = + !!(kvm_state->flags & KVM_STATE_NESTED_MTF_PENDING); + ret = -EINVAL; if (nested_cpu_has_shadow_vmcs(vmcs12) && vmcs12->vmcs_link_pointer != -1ull) { diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index 1c5fbff45d69..1db388f2a444 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -174,6 +174,11 @@ static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12) return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS; } +static inline int nested_cpu_has_mtf(struct vmcs12 *vmcs12) +{ + return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_TRAP_FLAG); +} + static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12) { return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 404dafedd778..dcca514ffd42 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1603,6 +1603,40 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) return 1; } + +/* + * Recognizes a pending MTF VM-exit and records the nested state for later + * delivery. + */ +static void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!is_guest_mode(vcpu)) + return; + + /* + * Per the SDM, MTF takes priority over debug-trap exceptions besides + * T-bit traps. As instruction emulation is completed (i.e. at the + * instruction boundary), any #DB exception pending delivery must be a + * debug-trap. Record the pending MTF state to be delivered in + * vmx_check_nested_events(). + */ + if (nested_cpu_has_mtf(vmcs12) && + (!vcpu->arch.exception.pending || + vcpu->arch.exception.nr == DB_VECTOR)) + vmx->nested.mtf_pending = true; + else + vmx->nested.mtf_pending = false; +} + +static int vmx_skip_emulated_instruction(struct kvm_vcpu *vcpu) +{ + vmx_update_emulated_instruction(vcpu); + return skip_emulated_instruction(vcpu); +} + static void vmx_clear_hlt(struct kvm_vcpu *vcpu) { /* @@ -7796,7 +7830,8 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .run = vmx_vcpu_run, .handle_exit = vmx_handle_exit, - .skip_emulated_instruction = skip_emulated_instruction, + .skip_emulated_instruction = vmx_skip_emulated_instruction, + .update_emulated_instruction = vmx_update_emulated_instruction, .set_interrupt_shadow = vmx_set_interrupt_shadow, .get_interrupt_shadow = vmx_get_interrupt_shadow, .patch_hypercall = vmx_patch_hypercall, diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 7f42cf3dcd70..e64da06c7009 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -150,6 +150,9 @@ struct nested_vmx { /* L2 must run next, and mustn't decide to exit to L1. */ bool nested_run_pending; + /* Pending MTF VM-exit into L1. */ + bool mtf_pending; + struct loaded_vmcs vmcs02; /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fb5d64ebc35d..359fcd395132 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6891,6 +6891,8 @@ restart: kvm_rip_write(vcpu, ctxt->eip); if (r && ctxt->tf) r = kvm_vcpu_do_singlestep(vcpu); + if (kvm_x86_ops->update_emulated_instruction) + kvm_x86_ops->update_emulated_instruction(vcpu); __kvm_set_rflags(vcpu, ctxt->eflags); } -- cgit v1.2.3-59-g8ed1b From 07721feee46b4b248402133228235318199b05ec Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 4 Feb 2020 15:26:29 -0800 Subject: KVM: nVMX: Don't emulate instructions in guest mode vmx_check_intercept is not yet fully implemented. To avoid emulating instructions disallowed by the L1 hypervisor, refuse to emulate instructions by default. Cc: stable@vger.kernel.org [Made commit, added commit msg - Oliver] Signed-off-by: Oliver Upton Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index dcca514ffd42..5801a86f9c24 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7164,7 +7164,7 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, } /* TODO: check more intercepts... */ - return X86EMUL_CONTINUE; + return X86EMUL_UNHANDLEABLE; } #ifdef CONFIG_X86_64 -- cgit v1.2.3-59-g8ed1b From e71237d3ff1abf9f3388337cfebf53b96df2020d Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Feb 2020 15:26:30 -0800 Subject: KVM: nVMX: Refactor IO bitmap checks into helper function Checks against the IO bitmap are useful for both instruction emulation and VM-exit reflection. Refactor the IO bitmap checks into a helper function. Signed-off-by: Oliver Upton Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 39 +++++++++++++++++++++++++-------------- arch/x86/kvm/vmx/nested.h | 2 ++ 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 50d8dbb3616d..f979832c394d 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5312,24 +5312,17 @@ fail: return 1; } - -static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12) +/* + * Return true if an IO instruction with the specified port and size should cause + * a VM-exit into L1. + */ +bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port, + int size) { - unsigned long exit_qualification; + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); gpa_t bitmap, last_bitmap; - unsigned int port; - int size; u8 b; - if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) - return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - - port = exit_qualification >> 16; - size = (exit_qualification & 7) + 1; - last_bitmap = (gpa_t)-1; b = -1; @@ -5356,6 +5349,24 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, return false; } +static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + unsigned long exit_qualification; + unsigned int port; + int size; + + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) + return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); + + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + + port = exit_qualification >> 16; + size = (exit_qualification & 7) + 1; + + return nested_vmx_check_io_bitmaps(vcpu, port, size); +} + /* * Return 1 if we should exit from L2 to L1 to handle an MSR access, * rather than handle it ourselves in L0. I.e., check whether L1 expressed diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index 1db388f2a444..9aeda46f473e 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -33,6 +33,8 @@ int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata); int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, u32 vmx_instruction_info, bool wr, int len, gva_t *ret); void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu); +bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port, + int size); static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) { -- cgit v1.2.3-59-g8ed1b From 35a571346a94fb93b5b3b6a599675ef3384bc75c Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 4 Feb 2020 15:26:31 -0800 Subject: KVM: nVMX: Check IO instruction VM-exit conditions Consult the 'unconditional IO exiting' and 'use IO bitmaps' VM-execution controls when checking instruction interception. If the 'use IO bitmaps' VM-execution control is 1, check the instruction access against the IO bitmaps to determine if the instruction causes a VM-exit. Signed-off-by: Oliver Upton Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 2 +- arch/x86/kvm/vmx/vmx.c | 57 ++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 52 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index f979832c394d..e920d7834d73 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5353,7 +5353,7 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { unsigned long exit_qualification; - unsigned int port; + unsigned short port; int size; if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 5801a86f9c24..63aaf44edd1f 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7145,6 +7145,39 @@ static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu) to_vmx(vcpu)->req_immediate_exit = true; } +static int vmx_check_intercept_io(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + unsigned short port; + bool intercept; + int size; + + if (info->intercept == x86_intercept_in || + info->intercept == x86_intercept_ins) { + port = info->src_val; + size = info->dst_bytes; + } else { + port = info->dst_val; + size = info->src_bytes; + } + + /* + * If the 'use IO bitmaps' VM-execution control is 0, IO instruction + * VM-exits depend on the 'unconditional IO exiting' VM-execution + * control. + * + * Otherwise, IO instruction VM-exits are controlled by the IO bitmaps. + */ + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) + intercept = nested_cpu_has(vmcs12, + CPU_BASED_UNCOND_IO_EXITING); + else + intercept = nested_vmx_check_io_bitmaps(vcpu, port, size); + + return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; +} + static int vmx_check_intercept(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage) @@ -7152,18 +7185,30 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; + switch (info->intercept) { /* * RDPID causes #UD if disabled through secondary execution controls. * Because it is marked as EmulateOnUD, we need to intercept it here. */ - if (info->intercept == x86_intercept_rdtscp && - !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) { - ctxt->exception.vector = UD_VECTOR; - ctxt->exception.error_code_valid = false; - return X86EMUL_PROPAGATE_FAULT; - } + case x86_intercept_rdtscp: + if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) { + ctxt->exception.vector = UD_VECTOR; + ctxt->exception.error_code_valid = false; + return X86EMUL_PROPAGATE_FAULT; + } + break; + + case x86_intercept_in: + case x86_intercept_ins: + case x86_intercept_out: + case x86_intercept_outs: + return vmx_check_intercept_io(vcpu, info); /* TODO: check more intercepts... */ + default: + break; + } + return X86EMUL_UNHANDLEABLE; } -- cgit v1.2.3-59-g8ed1b From eae7172f8141eb98e64e6e81acc9e9d5b2add127 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Fri, 21 Feb 2020 14:17:05 +0100 Subject: net: usb: qmi_wwan: restore mtu min/max values after raw_ip switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit usbnet creates network interfaces with min_mtu = 0 and max_mtu = ETH_MAX_MTU. These values are not modified by qmi_wwan when the network interface is created initially, allowing, for example, to set mtu greater than 1500. When a raw_ip switch is done (raw_ip set to 'Y', then set to 'N') the mtu values for the network interface are set through ether_setup, with min_mtu = ETH_MIN_MTU and max_mtu = ETH_DATA_LEN, not allowing anymore to set mtu greater than 1500 (error: mtu greater than device maximum). The patch restores the original min/max mtu values set by usbnet after a raw_ip switch. Signed-off-by: Daniele Palmas Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 3b7a3b8a5e06..5754bb6ca0ee 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -337,6 +337,9 @@ static void qmi_wwan_netdev_setup(struct net_device *net) netdev_dbg(net, "mode: raw IP\n"); } else if (!net->header_ops) { /* don't bother if already set */ ether_setup(net); + /* Restoring min/max mtu values set originally by usbnet */ + net->min_mtu = 0; + net->max_mtu = ETH_MAX_MTU; clear_bit(EVENT_NO_IP_ALIGN, &dev->flags); netdev_dbg(net, "mode: Ethernet\n"); } -- cgit v1.2.3-59-g8ed1b From f8788d86ab28f61f7b46eb6be375f8a726783636 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 23 Feb 2020 16:17:42 -0800 Subject: Linux 5.6-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index aab38cb02b24..0914049d2929 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 6 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Kleptomaniac Octopus # *DOCUMENTATION* -- cgit v1.2.3-59-g8ed1b From f6f13c125e05603f68f5bf31f045b95e6d493598 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 21 Feb 2020 08:32:18 -0800 Subject: hv_netvsc: Fix unwanted wakeup in netvsc_attach() When netvsc_attach() is called by operations like changing MTU, etc., an extra wakeup may happen while netvsc_attach() calling rndis_filter_device_add() which sends rndis messages when queue is stopped in netvsc_detach(). The completion message will wake up queue 0. We can reproduce the issue by changing MTU etc., then the wake_queue counter from "ethtool -S" will increase beyond stop_queue counter: stop_queue: 0 wake_queue: 1 The issue causes queue wake up, and counter increment, no other ill effects in current code. So we didn't see any network problem for now. To fix this, initialize tx_disable to true, and set it to false when the NIC is ready to be attached or registered. Fixes: 7b2ee50c0cd5 ("hv_netvsc: common detach logic") Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 2 +- drivers/net/hyperv/netvsc_drv.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index ae3f3084c2ed..1b320bcf150a 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -99,7 +99,7 @@ static struct netvsc_device *alloc_net_device(void) init_waitqueue_head(&net_device->wait_drain); net_device->destroy = false; - net_device->tx_disable = false; + net_device->tx_disable = true; net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT; net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 65e12cb07f45..2c0a24c606fc 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1068,6 +1068,7 @@ static int netvsc_attach(struct net_device *ndev, } /* In any case device is now ready */ + nvdev->tx_disable = false; netif_device_attach(ndev); /* Note: enable and attach happen when sub-channels setup */ @@ -2476,6 +2477,8 @@ static int netvsc_probe(struct hv_device *dev, else net->max_mtu = ETH_DATA_LEN; + nvdev->tx_disable = false; + ret = register_netdevice(net); if (ret != 0) { pr_err("Unable to register netdev.\n"); -- cgit v1.2.3-59-g8ed1b From dad8cea7add96a353fa1898b5ccefbb72da66f29 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sat, 22 Feb 2020 11:21:15 -0500 Subject: tcp: fix TFO SYNACK undo to avoid double-timestamp-undo In a rare corner case the new logic for undo of SYNACK RTO could result in triggering the warning in tcp_fastretrans_alert() that says: WARN_ON(tp->retrans_out != 0); The warning looked like: WARNING: CPU: 1 PID: 1 at net/ipv4/tcp_input.c:2818 tcp_ack+0x13e0/0x3270 The sequence that tickles this bug is: - Fast Open server receives TFO SYN with data, sends SYNACK - (client receives SYNACK and sends ACK, but ACK is lost) - server app sends some data packets - (N of the first data packets are lost) - server receives client ACK that has a TS ECR matching first SYNACK, and also SACKs suggesting the first N data packets were lost - server performs TS undo of SYNACK RTO, then immediately enters recovery - buggy behavior then performed a *second* undo that caused the connection to be in CA_Open with retrans_out != 0 Basically, the incoming ACK packet with SACK blocks causes us to first undo the cwnd reduction from the SYNACK RTO, but then immediately enters fast recovery, which then makes us eligible for undo again. And then tcp_rcv_synrecv_state_fastopen() accidentally performs an undo using a "mash-up" of state from two different loss recovery phases: it uses the timestamp info from the ACK of the original SYNACK, and the undo_marker from the fast recovery. This fix refines the logic to only invoke the tcp_try_undo_loss() inside tcp_rcv_synrecv_state_fastopen() if the connection is still in CA_Loss. If peer SACKs triggered fast recovery, then tcp_rcv_synrecv_state_fastopen() can't safely undo. Fixes: 794200d66273 ("tcp: undo cwnd on Fast Open spurious SYNACK retransmit") Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 316ebdf8151d..6b6b57000dad 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6124,7 +6124,11 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk) { struct request_sock *req; - tcp_try_undo_loss(sk, false); + /* If we are still handling the SYNACK RTO, see if timestamp ECR allows + * undo. If peer SACKs triggered fast recovery, we can't undo here. + */ + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) + tcp_try_undo_loss(sk, false); /* Reset rtx states to prevent spurious retransmits_timed_out() */ tcp_sk(sk)->retrans_stamp = 0; -- cgit v1.2.3-59-g8ed1b From 66d0e797bf095d407479c89952d42b1d96ef0a7f Mon Sep 17 00:00:00 2001 From: Orson Zhai Date: Fri, 21 Feb 2020 01:37:04 +0800 Subject: Revert "PM / devfreq: Modify the device name as devfreq(X) for sysfs" This reverts commit 4585fbcb5331fc910b7e553ad3efd0dd7b320d14. The name changing as devfreq(X) breaks some user space applications, such as Android HAL from Unisoc and Hikey [1]. The device name will be changed unexpectly after every boot depending on module init sequence. It will make trouble to setup some system configuration like selinux for Android. So we'd like to revert it back to old naming rule before any better way being found. [1] https://lkml.org/lkml/2018/5/8/1042 Cc: John Stultz Cc: Greg Kroah-Hartman Cc: stable@vger.kernel.org Signed-off-by: Orson Zhai Acked-by: Greg Kroah-Hartman Signed-off-by: Chanwoo Choi --- drivers/devfreq/devfreq.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index cceee8bc3c2f..7dcf2093e531 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -738,7 +738,6 @@ struct devfreq *devfreq_add_device(struct device *dev, { struct devfreq *devfreq; struct devfreq_governor *governor; - static atomic_t devfreq_no = ATOMIC_INIT(-1); int err = 0; if (!dev || !profile || !governor_name) { @@ -800,8 +799,7 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev); atomic_set(&devfreq->suspend_count, 0); - dev_set_name(&devfreq->dev, "devfreq%d", - atomic_inc_return(&devfreq_no)); + dev_set_name(&devfreq->dev, "%s", dev_name(dev)); err = device_register(&devfreq->dev); if (err) { mutex_unlock(&devfreq->lock); -- cgit v1.2.3-59-g8ed1b From 193155c8c9429f57400daf1f2ef0075016767112 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 22 Feb 2020 23:22:19 -0700 Subject: io_uring: handle multiple personalities in link chains If we have a chain of requests and they don't all use the same credentials, then the head of the chain will be issued with the credentails of the tail of the chain. Ensure __io_queue_sqe() overrides the credentials, if they are different. Once we do that, we can clean up the creds handling as well, by only having io_submit_sqe() do the lookup of a personality. It doesn't need to assign it, since __io_queue_sqe() now always does the right thing. Fixes: 75c6a03904e0 ("io_uring: support using a registered personality for commands") Reported-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index de650df9ac53..7d0be264527d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4705,11 +4705,21 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_kiocb *linked_timeout; struct io_kiocb *nxt = NULL; + const struct cred *old_creds = NULL; int ret; again: linked_timeout = io_prep_linked_timeout(req); + if (req->work.creds && req->work.creds != current_cred()) { + if (old_creds) + revert_creds(old_creds); + if (old_creds == req->work.creds) + old_creds = NULL; /* restored original creds */ + else + old_creds = override_creds(req->work.creds); + } + ret = io_issue_sqe(req, sqe, &nxt, true); /* @@ -4759,6 +4769,8 @@ done_req: goto punt; goto again; } + if (old_creds) + revert_creds(old_creds); } static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe) @@ -4803,7 +4815,6 @@ static inline void io_queue_link_head(struct io_kiocb *req) static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, struct io_submit_state *state, struct io_kiocb **link) { - const struct cred *old_creds = NULL; struct io_ring_ctx *ctx = req->ctx; unsigned int sqe_flags; int ret, id; @@ -4818,14 +4829,12 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, id = READ_ONCE(sqe->personality); if (id) { - const struct cred *personality_creds; - - personality_creds = idr_find(&ctx->personality_idr, id); - if (unlikely(!personality_creds)) { + req->work.creds = idr_find(&ctx->personality_idr, id); + if (unlikely(!req->work.creds)) { ret = -EINVAL; goto err_req; } - old_creds = override_creds(personality_creds); + get_cred(req->work.creds); } /* same numerical values with corresponding REQ_F_*, safe to copy */ @@ -4837,8 +4846,6 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, err_req: io_cqring_add_event(req, ret); io_double_put_req(req); - if (old_creds) - revert_creds(old_creds); return false; } @@ -4899,8 +4906,6 @@ err_req: } } - if (old_creds) - revert_creds(old_creds); return true; } -- cgit v1.2.3-59-g8ed1b From 52df1e564eb0470b2ecd1481e457932f09f49f5d Mon Sep 17 00:00:00 2001 From: Jonathan Neuschäfer Date: Sun, 23 Feb 2020 18:46:31 +0100 Subject: docs: networking: phy: Rephrase paragraph for clarity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's make it a little easier to read. Signed-off-by: Jonathan Neuschäfer Signed-off-by: David S. Miller --- Documentation/networking/phy.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/phy.rst b/Documentation/networking/phy.rst index 1e4735cc0553..256106054c8c 100644 --- a/Documentation/networking/phy.rst +++ b/Documentation/networking/phy.rst @@ -487,8 +487,9 @@ phy_register_fixup_for_id():: The stubs set one of the two matching criteria, and set the other one to match anything. -When phy_register_fixup() or \*_for_uid()/\*_for_id() is called at module, -unregister fixup and free allocate memory are required. +When phy_register_fixup() or \*_for_uid()/\*_for_id() is called at module load +time, the module needs to unregister the fixup and free allocated memory when +it's unloaded. Call one of following function before unloading module:: -- cgit v1.2.3-59-g8ed1b From 44343418d0f2f623cb9da6f5000df793131cbe3b Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 23 Feb 2020 14:38:40 +0100 Subject: net: ks8851-ml: Fix IRQ handling and locking The KS8851 requires that packet RX and TX are mutually exclusive. Currently, the driver hopes to achieve this by disabling interrupt from the card by writing the card registers and by disabling the interrupt on the interrupt controller. This however is racy on SMP. Replace this approach by expanding the spinlock used around the ks_start_xmit() TX path to ks_irq() RX path to assure true mutual exclusion and remove the interrupt enabling/disabling, which is now not needed anymore. Furthermore, disable interrupts also in ks_net_stop(), which was missing before. Note that a massive improvement here would be to re-use the KS8851 driver approach, which is to move the TX path into a worker thread, interrupt handling to threaded interrupt, and synchronize everything with mutexes, but that would be a much bigger rework, for a separate patch. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Lukas Wunner Cc: Petr Stetiar Cc: YueHaibing Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851_mll.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index 1c9e70c8cc30..58579baf3f7a 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -513,14 +513,17 @@ static irqreturn_t ks_irq(int irq, void *pw) { struct net_device *netdev = pw; struct ks_net *ks = netdev_priv(netdev); + unsigned long flags; u16 status; + spin_lock_irqsave(&ks->statelock, flags); /*this should be the first in IRQ handler */ ks_save_cmd_reg(ks); status = ks_rdreg16(ks, KS_ISR); if (unlikely(!status)) { ks_restore_cmd_reg(ks); + spin_unlock_irqrestore(&ks->statelock, flags); return IRQ_NONE; } @@ -546,6 +549,7 @@ static irqreturn_t ks_irq(int irq, void *pw) ks->netdev->stats.rx_over_errors++; /* this should be the last in IRQ handler*/ ks_restore_cmd_reg(ks); + spin_unlock_irqrestore(&ks->statelock, flags); return IRQ_HANDLED; } @@ -615,6 +619,7 @@ static int ks_net_stop(struct net_device *netdev) /* shutdown RX/TX QMU */ ks_disable_qmu(ks); + ks_disable_int(ks); /* set powermode to soft power down to save power */ ks_set_powermode(ks, PMECR_PM_SOFTDOWN); @@ -671,10 +676,9 @@ static netdev_tx_t ks_start_xmit(struct sk_buff *skb, struct net_device *netdev) { netdev_tx_t retv = NETDEV_TX_OK; struct ks_net *ks = netdev_priv(netdev); + unsigned long flags; - disable_irq(netdev->irq); - ks_disable_int(ks); - spin_lock(&ks->statelock); + spin_lock_irqsave(&ks->statelock, flags); /* Extra space are required: * 4 byte for alignment, 4 for status/length, 4 for CRC @@ -688,9 +692,7 @@ static netdev_tx_t ks_start_xmit(struct sk_buff *skb, struct net_device *netdev) dev_kfree_skb(skb); } else retv = NETDEV_TX_BUSY; - spin_unlock(&ks->statelock); - ks_enable_int(ks); - enable_irq(netdev->irq); + spin_unlock_irqrestore(&ks->statelock, flags); return retv; } -- cgit v1.2.3-59-g8ed1b From 503ba7c6961034ff0047707685644cad9287c226 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 20 Feb 2020 15:34:53 -0800 Subject: net: phy: Avoid multiple suspends It is currently possible for a PHY device to be suspended as part of a network device driver's suspend call while it is still being attached to that net_device, either via phy_suspend() or implicitly via phy_stop(). Later on, when the MDIO bus controller get suspended, we would attempt to suspend again the PHY because it is still attached to a network device. This is both a waste of time and creates an opportunity for improper clock/power management bugs to creep in. Fixes: 803dd9c77ac3 ("net: phy: avoid suspending twice a PHY") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 6a5056e0ae77..6131aca79823 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -247,7 +247,7 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) * MDIO bus driver and clock gated at this point. */ if (!netdev) - return !phydev->suspended; + goto out; if (netdev->wol_enabled) return false; @@ -267,7 +267,8 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) if (device_may_wakeup(&netdev->dev)) return false; - return true; +out: + return !phydev->suspended; } static int mdio_bus_phy_suspend(struct device *dev) -- cgit v1.2.3-59-g8ed1b From 6132c1d9033d158bd0464e90bc46544fbe0bd6bc Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Sun, 23 Feb 2020 16:52:33 +0530 Subject: net: core: devlink.c: Hold devlink->lock from the beginning of devlink_dpipe_table_register() devlink_dpipe_table_find() should be called under either rcu_read_lock() or devlink->lock. devlink_dpipe_table_register() calls devlink_dpipe_table_find() without holding the lock and acquires it later. Therefore hold the devlink->lock from the beginning of devlink_dpipe_table_register(). Suggested-by: Jiri Pirko Signed-off-by: Madhuparna Bhowmik Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index 549ee56b7a21..8d0b558be942 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -6878,26 +6878,33 @@ int devlink_dpipe_table_register(struct devlink *devlink, void *priv, bool counter_control_extern) { struct devlink_dpipe_table *table; - - if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name)) - return -EEXIST; + int err = 0; if (WARN_ON(!table_ops->size_get)) return -EINVAL; + mutex_lock(&devlink->lock); + + if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name)) { + err = -EEXIST; + goto unlock; + } + table = kzalloc(sizeof(*table), GFP_KERNEL); - if (!table) - return -ENOMEM; + if (!table) { + err = -ENOMEM; + goto unlock; + } table->name = table_name; table->table_ops = table_ops; table->priv = priv; table->counter_control_extern = counter_control_extern; - mutex_lock(&devlink->lock); list_add_tail_rcu(&table->list, &devlink->dpipe_table_list); +unlock: mutex_unlock(&devlink->lock); - return 0; + return err; } EXPORT_SYMBOL_GPL(devlink_dpipe_table_register); -- cgit v1.2.3-59-g8ed1b From e3ae39edbce6dc933fb1393490d1b5d76d3edb90 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 24 Feb 2020 09:38:15 +0100 Subject: nl80211: explicitly include if_vlan.h We use that here, and do seem to get it through some recursive include, but better include it explicitly. Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20200224093814.1b9c258fec67.I45ac150d4e11c72eb263abec9f1f0c7add9bef2b@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 46be40e19e7f..5b19e9fac4aa 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3-59-g8ed1b From 253216ffb2a002a682c6f68bd3adff5b98b71de8 Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Sun, 23 Feb 2020 20:03:02 +0530 Subject: mac80211: rx: avoid RCU list traversal under mutex local->sta_mtx is held in __ieee80211_check_fast_rx_iface(). No need to use list_for_each_entry_rcu() as it also requires a cond argument to avoid false lockdep warnings when not used in RCU read-side section (with CONFIG_PROVE_RCU_LIST). Therefore use list_for_each_entry(); Signed-off-by: Madhuparna Bhowmik Link: https://lore.kernel.org/r/20200223143302.15390-1-madhuparnabhowmik10@gmail.com Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 0e05ff037672..0ba98ad9bc85 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4114,7 +4114,7 @@ void __ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata) lockdep_assert_held(&local->sta_mtx); - list_for_each_entry_rcu(sta, &local->sta_list, list) { + list_for_each_entry(sta, &local->sta_list, list) { if (sdata != sta->sdata && (!sta->sdata->bss || sta->sdata->bss != sdata->bss)) continue; -- cgit v1.2.3-59-g8ed1b From 3eb55e6f753a379e293395de8d5f3be28351a7f8 Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Fri, 21 Feb 2020 10:32:34 +0800 Subject: drm/i915/gvt: Separate display reset from ALL_ENGINES reset ALL_ENGINES reset doesn't clobber display with the current gvt-g supported platforms. Thus ALL_ENGINES reset shouldn't reset the display engine registers emulated by gvt-g. This fixes guest warning like [ 14.622026] [drm] Initialized i915 1.6.0 20200114 for 0000:00:03.0 on minor 0 [ 14.967917] fbcon: i915drmfb (fb0) is primary device [ 25.100188] [drm:drm_atomic_helper_wait_for_dependencies [drm_kms_helper]] E RROR [CRTC:51:pipe A] flip_done timed out [ 25.100860] -----------[ cut here ]----------- [ 25.100861] pll on state mismatch (expected 0, found 1) [ 25.101024] WARNING: CPU: 1 PID: 30 at drivers/gpu/drm/i915/display/intel_dis play.c:14382 verify_single_dpll_state.isra.115+0x28f/0x320 [i915] [ 25.101025] Modules linked in: intel_rapl_msr intel_rapl_common kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel i915 aesni_intel cr ypto_simd cryptd glue_helper cec rc_core video drm_kms_helper joydev drm input_l eds i2c_algo_bit serio_raw fb_sys_fops syscopyarea sysfillrect sysimgblt mac_hid qemu_fw_cfg sch_fq_codel parport_pc ppdev lp parport ip_tables x_tables autofs4 e1000 psmouse i2c_piix4 pata_acpi floppy [ 25.101052] CPU: 1 PID: 30 Comm: kworker/u4:1 Not tainted 5.5.0+ #1 [ 25.101053] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1 .12.1-0-ga5cab58 04/01/2014 [ 25.101055] Workqueue: events_unbound async_run_entry_fn [ 25.101092] RIP: 0010:verify_single_dpll_state.isra.115+0x28f/0x320 [i915] [ 25.101093] Code: e0 d9 ff e9 a3 fe ff ff 80 3d e9 c2 11 00 00 44 89 f6 48 c7 c7 c0 9d 88 c0 75 3b e8 eb df d9 ff e9 c7 fe ff ff e8 d1 e0 ae c4 <0f> 0b e9 7a fe ff ff 80 3d c0 c2 11 00 00 8d 71 41 89 c2 48 c7 c7 [ 25.101093] RSP: 0018:ffffb1de80107878 EFLAGS: 00010286 [ 25.101094] RAX: 0000000000000000 RBX: ffffb1de80107884 RCX: 0000000000000007 [ 25.101095] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff94fdfdd19740 [ 25.101095] RBP: ffffb1de80107938 R08: 0000000d6bfdc7b4 R09: 000000000000002b [ 25.101096] R10: ffff94fdf82dc000 R11: 0000000000000225 R12: 00000000000001f8 [ 25.101096] R13: ffff94fdb3ca6a90 R14: ffff94fdb3ca0000 R15: 0000000000000000 [ 25.101097] FS: 0000000000000000(0000) GS:ffff94fdfdd00000(0000) knlGS:00000 00000000000 [ 25.101098] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 25.101098] CR2: 00007fbc3e2be9c8 CR3: 000000003339a003 CR4: 0000000000360ee0 [ 25.101101] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 25.101101] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 25.101102] Call Trace: [ 25.101139] intel_atomic_commit_tail+0xde4/0x1520 [i915] [ 25.101141] ? flush_workqueue_prep_pwqs+0xfa/0x130 [ 25.101142] ? flush_workqueue+0x198/0x3c0 [ 25.101174] intel_atomic_commit+0x2ad/0x320 [i915] [ 25.101209] drm_atomic_commit+0x4a/0x50 [drm] [ 25.101220] drm_client_modeset_commit_atomic+0x1c4/0x200 [drm] [ 25.101231] drm_client_modeset_commit_force+0x47/0x170 [drm] [ 25.101250] drm_fb_helper_restore_fbdev_mode_unlocked+0x4e/0xa0 [drm_kms_hel per] [ 25.101255] drm_fb_helper_set_par+0x2d/0x60 [drm_kms_helper] [ 25.101287] intel_fbdev_set_par+0x1a/0x40 [i915] [ 25.101289] ? con_is_visible+0x2e/0x60 [ 25.101290] fbcon_init+0x378/0x600 [ 25.101292] visual_init+0xd5/0x130 [ 25.101296] do_bind_con_driver+0x217/0x430 [ 25.101297] do_take_over_console+0x7d/0x1b0 [ 25.101298] do_fbcon_takeover+0x5c/0xb0 [ 25.101299] fbcon_fb_registered+0x199/0x1a0 [ 25.101301] register_framebuffer+0x22c/0x330 [ 25.101306] __drm_fb_helper_initial_config_and_unlock+0x31a/0x520 [drm_kms_h elper] [ 25.101311] drm_fb_helper_initial_config+0x35/0x40 [drm_kms_helper] [ 25.101341] intel_fbdev_initial_config+0x18/0x30 [i915] [ 25.101342] async_run_entry_fn+0x3c/0x150 [ 25.101343] process_one_work+0x1fd/0x3f0 [ 25.101344] worker_thread+0x34/0x410 [ 25.101346] kthread+0x121/0x140 [ 25.101346] ? process_one_work+0x3f0/0x3f0 [ 25.101347] ? kthread_park+0x90/0x90 [ 25.101350] ret_from_fork+0x35/0x40 [ 25.101351] --[ end trace b5b47d44cd998ba1 ]-- Fixes: 6294b61ba769 ("drm/i915/gvt: add missing display part reset for vGPU reset") Signed-off-by: Tina Zhang Reviewed-by: Zhenyu Wang Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200221023234.28635-1-tina.zhang@intel.com --- drivers/gpu/drm/i915/gvt/vgpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 85bd9bf4f6ee..487af6ea9972 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -560,9 +560,9 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, intel_vgpu_reset_mmio(vgpu, dmlr); populate_pvinfo_page(vgpu); - intel_vgpu_reset_display(vgpu); if (dmlr) { + intel_vgpu_reset_display(vgpu); intel_vgpu_reset_cfg_space(vgpu); /* only reset the failsafe mode when dmlr reset */ vgpu->failsafe = false; -- cgit v1.2.3-59-g8ed1b From 41726c9a50e7464beca7112d0aebf3a0090c62d2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 23 Feb 2020 13:11:42 -0700 Subject: io_uring: fix personality idr leak We somehow never free the idr, even though we init it for every ctx. Free it when the rest of the ring data is freed. Fixes: 071698e13ac6 ("io_uring: allow registering credentials") Reviewed-by: Stefano Garzarella Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7d0be264527d..d961945cb332 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6339,6 +6339,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) io_sqe_buffer_unregister(ctx); io_sqe_files_unregister(ctx); io_eventfd_unregister(ctx); + idr_destroy(&ctx->personality_idr); #if defined(CONFIG_UNIX) if (ctx->ring_sock) { -- cgit v1.2.3-59-g8ed1b From 51fdaa0490241e8cd41b40cbf43a336d1a014460 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 19 Feb 2020 15:38:00 +0900 Subject: scsi: sd_sbc: Fix sd_zbc_report_zones() The block layer generic blk_revalidate_disk_zones() checks the validity of zone descriptors reported by a disk using the blk_revalidate_zone_cb() callback function executed for each zone descriptor. If a ZBC disk reports invalid zone descriptors, blk_revalidate_disk_zones() returns an error and sd_zbc_read_zones() changes the disk capacity to 0, which in turn results in the gendisk structure capacity to be set to 0. This all works well for the first revalidate pass on a disk and the block layer detects the capactiy change. On the second revalidate pass, blk_revalidate_disk_zones() is called again and sd_zbc_report_zones() executed to check the zones a second time. However, for this second pass, the gendisk capacity is now 0, which results in sd_zbc_report_zones() to do nothing and to report success and no zones. blk_revalidate_disk_zones() in turn returns success and sets the disk queue chunk_sectors limit with zero as no zones were checked, causing a oops to trigger on the BUG_ON(!is_power_of_2(chunk_sectors)) in blk_queue_chunk_sectors(). Fix this by using the sdkp capacity field rather than the gendisk capacity for the report zones loop in sd_zbc_report_zones(). Also add a check to return immediately an error if the sdkp capacity is 0. With this fix, invalid/buggy ZBC disk scan does not trigger a oops and are exposed with a 0 capacity. This change also preserve the chance for the disk to be correctly revalidated on the second revalidate pass as the scsi disk structure capacity field is always set to the disk reported value when sd_zbc_report_zones() is called. Link: https://lore.kernel.org/r/20200219063800.880834-1-damien.lemoal@wdc.com Fixes: d41003513e61 ("block: rework zone reporting") Cc: Cc: # v5.5 Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/sd_zbc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index e4282bce5834..f45c22b09726 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -161,6 +161,7 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data) { struct scsi_disk *sdkp = scsi_disk(disk); + sector_t capacity = logical_to_sectors(sdkp->device, sdkp->capacity); unsigned int nr, i; unsigned char *buf; size_t offset, buflen = 0; @@ -171,11 +172,15 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, /* Not a zoned device */ return -EOPNOTSUPP; + if (!capacity) + /* Device gone or invalid */ + return -ENODEV; + buf = sd_zbc_alloc_report_buffer(sdkp, nr_zones, &buflen); if (!buf) return -ENOMEM; - while (zone_idx < nr_zones && sector < get_capacity(disk)) { + while (zone_idx < nr_zones && sector < capacity) { ret = sd_zbc_do_report_zones(sdkp, buf, buflen, sectors_to_logical(sdkp->device, sector), true); if (ret) -- cgit v1.2.3-59-g8ed1b From a3fd4bfe85fbb67cf4ec1232d0af625ece3c508b Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Wed, 19 Feb 2020 16:09:25 +0100 Subject: scsi: zfcp: fix wrong data and display format of SFP+ temperature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When implementing support for retrieval of local diagnostic data from the FCP channel, the wrong data format was assumed for the temperature of the local SFP+ connector. The Fibre Channel Link Services (FC-LS-3) specification is not clear on the format of the stored integer, and only after consulting the SNIA specification SFF-8472 did we realize it is stored as two's complement. Thus, the used data and display format is wrong, and highly misleading for users when the temperature should drop below 0°C (however unlikely that may be). To fix this, change the data format in `struct fsf_qtcb_bottom_port` from unsigned to signed, and change the printf format string used to generate `zfcp_sysfs_adapter_diag_sfp_temperature_show()` from `%hu` to `%hd`. Link: https://lore.kernel.org/r/d6e3be5428da5c9490cfff4df7cae868bc9f1a7e.1582039501.git.bblock@linux.ibm.com Fixes: a10a61e807b0 ("scsi: zfcp: support retrieval of SFP Data via Exchange Port Data") Fixes: 6028f7c4cd87 ("scsi: zfcp: introduce sysfs interface for diagnostics of local SFP transceiver") Cc: # 5.5+ Reviewed-by: Jens Remus Reviewed-by: Fedor Loshakov Reviewed-by: Steffen Maier Signed-off-by: Benjamin Block Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_fsf.h | 2 +- drivers/s390/scsi/zfcp_sysfs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/scsi/zfcp_fsf.h b/drivers/s390/scsi/zfcp_fsf.h index 2b1e4da1944f..4bfb79f20588 100644 --- a/drivers/s390/scsi/zfcp_fsf.h +++ b/drivers/s390/scsi/zfcp_fsf.h @@ -410,7 +410,7 @@ struct fsf_qtcb_bottom_port { u8 cb_util; u8 a_util; u8 res2; - u16 temperature; + s16 temperature; u16 vcc; u16 tx_bias; u16 tx_power; diff --git a/drivers/s390/scsi/zfcp_sysfs.c b/drivers/s390/scsi/zfcp_sysfs.c index 494b9fe9cc94..a711a0d15100 100644 --- a/drivers/s390/scsi/zfcp_sysfs.c +++ b/drivers/s390/scsi/zfcp_sysfs.c @@ -800,7 +800,7 @@ static ZFCP_DEV_ATTR(adapter_diag, b2b_credit, 0400, static ZFCP_DEV_ATTR(adapter_diag_sfp, _name, 0400, \ zfcp_sysfs_adapter_diag_sfp_##_name##_show, NULL) -ZFCP_DEFINE_DIAG_SFP_ATTR(temperature, temperature, 5, "%hu"); +ZFCP_DEFINE_DIAG_SFP_ATTR(temperature, temperature, 6, "%hd"); ZFCP_DEFINE_DIAG_SFP_ATTR(vcc, vcc, 5, "%hu"); ZFCP_DEFINE_DIAG_SFP_ATTR(tx_bias, tx_bias, 5, "%hu"); ZFCP_DEFINE_DIAG_SFP_ATTR(tx_power, tx_power, 5, "%hu"); -- cgit v1.2.3-59-g8ed1b From a93236fcbe1d0248461b29c0f87cb0b510c94e6f Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 24 Feb 2020 11:15:59 +0100 Subject: KVM: s390: rstify new ioctls in api.rst We also need to rstify the new ioctls that we added in parallel to the rstification of the kvm docs. Signed-off-by: Christian Borntraeger Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 97a72a53fa4b..ebd383fba939 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -4611,35 +4611,38 @@ unpins the VPA pages and releases all the device pages that are used to track the secure pages by hypervisor. 4.122 KVM_S390_NORMAL_RESET +--------------------------- -Capability: KVM_CAP_S390_VCPU_RESETS -Architectures: s390 -Type: vcpu ioctl -Parameters: none -Returns: 0 +:Capability: KVM_CAP_S390_VCPU_RESETS +:Architectures: s390 +:Type: vcpu ioctl +:Parameters: none +:Returns: 0 This ioctl resets VCPU registers and control structures according to the cpu reset definition in the POP (Principles Of Operation). 4.123 KVM_S390_INITIAL_RESET +---------------------------- -Capability: none -Architectures: s390 -Type: vcpu ioctl -Parameters: none -Returns: 0 +:Capability: none +:Architectures: s390 +:Type: vcpu ioctl +:Parameters: none +:Returns: 0 This ioctl resets VCPU registers and control structures according to the initial cpu reset definition in the POP. However, the cpu is not put into ESA mode. This reset is a superset of the normal reset. 4.124 KVM_S390_CLEAR_RESET +-------------------------- -Capability: KVM_CAP_S390_VCPU_RESETS -Architectures: s390 -Type: vcpu ioctl -Parameters: none -Returns: 0 +:Capability: KVM_CAP_S390_VCPU_RESETS +:Architectures: s390 +:Type: vcpu ioctl +:Parameters: none +:Returns: 0 This ioctl resets VCPU registers and control structures according to the clear cpu reset definition in the POP. However, the cpu is not put -- cgit v1.2.3-59-g8ed1b From 84823ff80f7403752b59e00bb198724100dc611c Mon Sep 17 00:00:00 2001 From: Esben Haabendal Date: Fri, 21 Feb 2020 07:47:21 +0100 Subject: net: ll_temac: Fix race condition causing TX hang It is possible that the interrupt handler fires and frees up space in the TX ring in between checking for sufficient TX ring space and stopping the TX queue in temac_start_xmit. If this happens, the queue wake from the interrupt handler will occur before the queue is stopped, causing a lost wakeup and the adapter's transmit hanging. To avoid this, after stopping the queue, check again whether there is sufficient space in the TX ring. If so, wake up the queue again. This is a port of the similar fix in axienet driver, commit 7de44285c1f6 ("net: axienet: Fix race condition causing TX hang"). Fixes: 23ecc4bde21f ("net: ll_temac: fix checksum offload logic") Signed-off-by: Esben Haabendal Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/ll_temac_main.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 6f11f52c9a9e..996004ef8bd4 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -788,6 +788,9 @@ static void temac_start_xmit_done(struct net_device *ndev) stat = be32_to_cpu(cur_p->app0); } + /* Matches barrier in temac_start_xmit */ + smp_mb(); + netif_wake_queue(ndev); } @@ -830,9 +833,19 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; if (temac_check_tx_bd_space(lp, num_frag + 1)) { - if (!netif_queue_stopped(ndev)) - netif_stop_queue(ndev); - return NETDEV_TX_BUSY; + if (netif_queue_stopped(ndev)) + return NETDEV_TX_BUSY; + + netif_stop_queue(ndev); + + /* Matches barrier in temac_start_xmit_done */ + smp_mb(); + + /* Space might have just been freed - check again */ + if (temac_check_tx_bd_space(lp, num_frag)) + return NETDEV_TX_BUSY; + + netif_wake_queue(ndev); } cur_p->app0 = 0; -- cgit v1.2.3-59-g8ed1b From d07c849cd2b97d6809430dfb7e738ad31088037a Mon Sep 17 00:00:00 2001 From: Esben Haabendal Date: Fri, 21 Feb 2020 07:47:33 +0100 Subject: net: ll_temac: Add more error handling of dma_map_single() calls This adds error handling to the remaining dma_map_single() calls, so that behavior is well defined if/when we run out of DMA memory. Fixes: 92744989533c ("net: add Xilinx ll_temac device driver") Signed-off-by: Esben Haabendal Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/ll_temac_main.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 996004ef8bd4..c368c3914bda 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -367,6 +367,8 @@ static int temac_dma_bd_init(struct net_device *ndev) skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data, XTE_MAX_JUMBO_FRAME_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(ndev->dev.parent, skb_dma_addr)) + goto out; lp->rx_bd_v[i].phys = cpu_to_be32(skb_dma_addr); lp->rx_bd_v[i].len = cpu_to_be32(XTE_MAX_JUMBO_FRAME_SIZE); lp->rx_bd_v[i].app0 = cpu_to_be32(STS_CTRL_APP0_IRQONEND); @@ -863,12 +865,13 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data, skb_headlen(skb), DMA_TO_DEVICE); cur_p->len = cpu_to_be32(skb_headlen(skb)); + if (WARN_ON_ONCE(dma_mapping_error(ndev->dev.parent, skb_dma_addr))) + return NETDEV_TX_BUSY; cur_p->phys = cpu_to_be32(skb_dma_addr); ptr_to_txbd((void *)skb, cur_p); for (ii = 0; ii < num_frag; ii++) { - lp->tx_bd_tail++; - if (lp->tx_bd_tail >= TX_BD_NUM) + if (++lp->tx_bd_tail >= TX_BD_NUM) lp->tx_bd_tail = 0; cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; @@ -876,6 +879,25 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) skb_frag_address(frag), skb_frag_size(frag), DMA_TO_DEVICE); + if (dma_mapping_error(ndev->dev.parent, skb_dma_addr)) { + if (--lp->tx_bd_tail < 0) + lp->tx_bd_tail = TX_BD_NUM - 1; + cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; + while (--ii >= 0) { + --frag; + dma_unmap_single(ndev->dev.parent, + be32_to_cpu(cur_p->phys), + skb_frag_size(frag), + DMA_TO_DEVICE); + if (--lp->tx_bd_tail < 0) + lp->tx_bd_tail = TX_BD_NUM - 1; + cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; + } + dma_unmap_single(ndev->dev.parent, + be32_to_cpu(cur_p->phys), + skb_headlen(skb), DMA_TO_DEVICE); + return NETDEV_TX_BUSY; + } cur_p->phys = cpu_to_be32(skb_dma_addr); cur_p->len = cpu_to_be32(skb_frag_size(frag)); cur_p->app0 = 0; -- cgit v1.2.3-59-g8ed1b From 770d9c67974c4c71af4beb786dc43162ad2a15ba Mon Sep 17 00:00:00 2001 From: Esben Haabendal Date: Fri, 21 Feb 2020 07:47:45 +0100 Subject: net: ll_temac: Fix RX buffer descriptor handling on GFP_ATOMIC pressure Failures caused by GFP_ATOMIC memory pressure have been observed, and due to the missing error handling, results in kernel crash such as [1876998.350133] kernel BUG at mm/slub.c:3952! [1876998.350141] invalid opcode: 0000 [#1] PREEMPT SMP PTI [1876998.350147] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 5.3.0-scnxt #1 [1876998.350150] Hardware name: N/A N/A/COMe-bIP2, BIOS CCR2R920 03/01/2017 [1876998.350160] RIP: 0010:kfree+0x1ca/0x220 [1876998.350164] Code: 85 db 74 49 48 8b 95 68 01 00 00 48 31 c2 48 89 10 e9 d7 fe ff ff 49 8b 04 24 a9 00 00 01 00 75 0b 49 8b 44 24 08 a8 01 75 02 <0f> 0b 49 8b 04 24 31 f6 a9 00 00 01 00 74 06 41 0f b6 74 24 5b [1876998.350172] RSP: 0018:ffffc900000f0df0 EFLAGS: 00010246 [1876998.350177] RAX: ffffea00027f0708 RBX: ffff888008d78000 RCX: 0000000000391372 [1876998.350181] RDX: 0000000000000000 RSI: ffffe8ffffd01400 RDI: ffff888008d78000 [1876998.350185] RBP: ffff8881185a5d00 R08: ffffc90000087dd8 R09: 000000000000280a [1876998.350189] R10: 0000000000000002 R11: 0000000000000000 R12: ffffea0000235e00 [1876998.350193] R13: ffff8881185438a0 R14: 0000000000000000 R15: ffff888118543870 [1876998.350198] FS: 0000000000000000(0000) GS:ffff88811f300000(0000) knlGS:0000000000000000 [1876998.350203] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 s#1 Part1 [1876998.350206] CR2: 00007f8dac7b09f0 CR3: 000000011e20a006 CR4: 00000000001606e0 [1876998.350210] Call Trace: [1876998.350215] [1876998.350224] ? __netif_receive_skb_core+0x70a/0x920 [1876998.350229] kfree_skb+0x32/0xb0 [1876998.350234] __netif_receive_skb_core+0x70a/0x920 [1876998.350240] __netif_receive_skb_one_core+0x36/0x80 [1876998.350245] process_backlog+0x8b/0x150 [1876998.350250] net_rx_action+0xf7/0x340 [1876998.350255] __do_softirq+0x10f/0x353 [1876998.350262] irq_exit+0xb2/0xc0 [1876998.350265] do_IRQ+0x77/0xd0 [1876998.350271] common_interrupt+0xf/0xf [1876998.350274] In order to handle such failures more graceful, this change splits the receive loop into one for consuming the received buffers, and one for allocating new buffers. When GFP_ATOMIC allocations fail, the receive will continue with the buffers that is still there, and with the expectation that the allocations will succeed in a later call to receive. Fixes: 92744989533c ("net: add Xilinx ll_temac device driver") Signed-off-by: Esben Haabendal Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/ll_temac.h | 1 + drivers/net/ethernet/xilinx/ll_temac_main.c | 112 ++++++++++++++++++++-------- 2 files changed, 82 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/xilinx/ll_temac.h b/drivers/net/ethernet/xilinx/ll_temac.h index 276292bca334..99fe059e5c7f 100644 --- a/drivers/net/ethernet/xilinx/ll_temac.h +++ b/drivers/net/ethernet/xilinx/ll_temac.h @@ -375,6 +375,7 @@ struct temac_local { int tx_bd_next; int tx_bd_tail; int rx_bd_ci; + int rx_bd_tail; /* DMA channel control setup */ u32 tx_chnl_ctrl; diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index c368c3914bda..255207f2fd27 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -389,12 +389,13 @@ static int temac_dma_bd_init(struct net_device *ndev) lp->tx_bd_next = 0; lp->tx_bd_tail = 0; lp->rx_bd_ci = 0; + lp->rx_bd_tail = RX_BD_NUM - 1; /* Enable RX DMA transfers */ wmb(); lp->dma_out(lp, RX_CURDESC_PTR, lp->rx_bd_p); lp->dma_out(lp, RX_TAILDESC_PTR, - lp->rx_bd_p + (sizeof(*lp->rx_bd_v) * (RX_BD_NUM - 1))); + lp->rx_bd_p + (sizeof(*lp->rx_bd_v) * lp->rx_bd_tail)); /* Prepare for TX DMA transfer */ lp->dma_out(lp, TX_CURDESC_PTR, lp->tx_bd_p); @@ -923,27 +924,41 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) static void ll_temac_recv(struct net_device *ndev) { struct temac_local *lp = netdev_priv(ndev); - struct sk_buff *skb, *new_skb; - unsigned int bdstat; - struct cdmac_bd *cur_p; - dma_addr_t tail_p, skb_dma_addr; - int length; unsigned long flags; + int rx_bd; + bool update_tail = false; spin_lock_irqsave(&lp->rx_lock, flags); - tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci; - cur_p = &lp->rx_bd_v[lp->rx_bd_ci]; - - bdstat = be32_to_cpu(cur_p->app0); - while ((bdstat & STS_CTRL_APP0_CMPLT)) { + /* Process all received buffers, passing them on network + * stack. After this, the buffer descriptors will be in an + * un-allocated stage, where no skb is allocated for it, and + * they are therefore not available for TEMAC/DMA. + */ + do { + struct cdmac_bd *bd = &lp->rx_bd_v[lp->rx_bd_ci]; + struct sk_buff *skb = lp->rx_skb[lp->rx_bd_ci]; + unsigned int bdstat = be32_to_cpu(bd->app0); + int length; + + /* While this should not normally happen, we can end + * here when GFP_ATOMIC allocations fail, and we + * therefore have un-allocated buffers. + */ + if (!skb) + break; - skb = lp->rx_skb[lp->rx_bd_ci]; - length = be32_to_cpu(cur_p->app4) & 0x3FFF; + /* Loop over all completed buffer descriptors */ + if (!(bdstat & STS_CTRL_APP0_CMPLT)) + break; - dma_unmap_single(ndev->dev.parent, be32_to_cpu(cur_p->phys), + dma_unmap_single(ndev->dev.parent, be32_to_cpu(bd->phys), XTE_MAX_JUMBO_FRAME_SIZE, DMA_FROM_DEVICE); + /* The buffer is not valid for DMA anymore */ + bd->phys = 0; + bd->len = 0; + length = be32_to_cpu(bd->app4) & 0x3FFF; skb_put(skb, length); skb->protocol = eth_type_trans(skb, ndev); skb_checksum_none_assert(skb); @@ -958,39 +973,74 @@ static void ll_temac_recv(struct net_device *ndev) * (back) for proper IP checksum byte order * (be16). */ - skb->csum = htons(be32_to_cpu(cur_p->app3) & 0xFFFF); + skb->csum = htons(be32_to_cpu(bd->app3) & 0xFFFF); skb->ip_summed = CHECKSUM_COMPLETE; } if (!skb_defer_rx_timestamp(skb)) netif_rx(skb); + /* The skb buffer is now owned by network stack above */ + lp->rx_skb[lp->rx_bd_ci] = NULL; ndev->stats.rx_packets++; ndev->stats.rx_bytes += length; - new_skb = netdev_alloc_skb_ip_align(ndev, - XTE_MAX_JUMBO_FRAME_SIZE); - if (!new_skb) { - spin_unlock_irqrestore(&lp->rx_lock, flags); - return; + rx_bd = lp->rx_bd_ci; + if (++lp->rx_bd_ci >= RX_BD_NUM) + lp->rx_bd_ci = 0; + } while (rx_bd != lp->rx_bd_tail); + + /* Allocate new buffers for those buffer descriptors that were + * passed to network stack. Note that GFP_ATOMIC allocations + * can fail (e.g. when a larger burst of GFP_ATOMIC + * allocations occurs), so while we try to allocate all + * buffers in the same interrupt where they were processed, we + * continue with what we could get in case of allocation + * failure. Allocation of remaining buffers will be retried + * in following calls. + */ + while (1) { + struct sk_buff *skb; + struct cdmac_bd *bd; + dma_addr_t skb_dma_addr; + + rx_bd = lp->rx_bd_tail + 1; + if (rx_bd >= RX_BD_NUM) + rx_bd = 0; + bd = &lp->rx_bd_v[rx_bd]; + + if (bd->phys) + break; /* All skb's allocated */ + + skb = netdev_alloc_skb_ip_align(ndev, XTE_MAX_JUMBO_FRAME_SIZE); + if (!skb) { + dev_warn(&ndev->dev, "skb alloc failed\n"); + break; } - cur_p->app0 = cpu_to_be32(STS_CTRL_APP0_IRQONEND); - skb_dma_addr = dma_map_single(ndev->dev.parent, new_skb->data, + skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data, XTE_MAX_JUMBO_FRAME_SIZE, DMA_FROM_DEVICE); - cur_p->phys = cpu_to_be32(skb_dma_addr); - cur_p->len = cpu_to_be32(XTE_MAX_JUMBO_FRAME_SIZE); - lp->rx_skb[lp->rx_bd_ci] = new_skb; + if (WARN_ON_ONCE(dma_mapping_error(ndev->dev.parent, + skb_dma_addr))) { + dev_kfree_skb_any(skb); + break; + } - lp->rx_bd_ci++; - if (lp->rx_bd_ci >= RX_BD_NUM) - lp->rx_bd_ci = 0; + bd->phys = cpu_to_be32(skb_dma_addr); + bd->len = cpu_to_be32(XTE_MAX_JUMBO_FRAME_SIZE); + bd->app0 = cpu_to_be32(STS_CTRL_APP0_IRQONEND); + lp->rx_skb[rx_bd] = skb; + + lp->rx_bd_tail = rx_bd; + update_tail = true; + } - cur_p = &lp->rx_bd_v[lp->rx_bd_ci]; - bdstat = be32_to_cpu(cur_p->app0); + /* Move tail pointer when buffers have been allocated */ + if (update_tail) { + lp->dma_out(lp, RX_TAILDESC_PTR, + lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_tail); } - lp->dma_out(lp, RX_TAILDESC_PTR, tail_p); spin_unlock_irqrestore(&lp->rx_lock, flags); } -- cgit v1.2.3-59-g8ed1b From 1d63b8d66d146deaaedbe16c80de105f685ea012 Mon Sep 17 00:00:00 2001 From: Esben Haabendal Date: Fri, 21 Feb 2020 07:47:58 +0100 Subject: net: ll_temac: Handle DMA halt condition caused by buffer underrun The SDMA engine used by TEMAC halts operation when it has finished processing of the last buffer descriptor in the buffer ring. Unfortunately, no interrupt event is generated when this happens, so we need to setup another mechanism to make sure DMA operation is restarted when enough buffers have been added to the ring. Fixes: 92744989533c ("net: add Xilinx ll_temac device driver") Signed-off-by: Esben Haabendal Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/ll_temac.h | 3 ++ drivers/net/ethernet/xilinx/ll_temac_main.c | 58 ++++++++++++++++++++++++++--- 2 files changed, 56 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/xilinx/ll_temac.h b/drivers/net/ethernet/xilinx/ll_temac.h index 99fe059e5c7f..53fb8141f1a6 100644 --- a/drivers/net/ethernet/xilinx/ll_temac.h +++ b/drivers/net/ethernet/xilinx/ll_temac.h @@ -380,6 +380,9 @@ struct temac_local { /* DMA channel control setup */ u32 tx_chnl_ctrl; u32 rx_chnl_ctrl; + u8 coalesce_count_rx; + + struct delayed_work restart_work; }; /* Wrappers for temac_ior()/temac_iow() function pointers above */ diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 255207f2fd27..9461acec6f70 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -866,8 +867,11 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data, skb_headlen(skb), DMA_TO_DEVICE); cur_p->len = cpu_to_be32(skb_headlen(skb)); - if (WARN_ON_ONCE(dma_mapping_error(ndev->dev.parent, skb_dma_addr))) - return NETDEV_TX_BUSY; + if (WARN_ON_ONCE(dma_mapping_error(ndev->dev.parent, skb_dma_addr))) { + dev_kfree_skb_any(skb); + ndev->stats.tx_dropped++; + return NETDEV_TX_OK; + } cur_p->phys = cpu_to_be32(skb_dma_addr); ptr_to_txbd((void *)skb, cur_p); @@ -897,7 +901,9 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) dma_unmap_single(ndev->dev.parent, be32_to_cpu(cur_p->phys), skb_headlen(skb), DMA_TO_DEVICE); - return NETDEV_TX_BUSY; + dev_kfree_skb_any(skb); + ndev->stats.tx_dropped++; + return NETDEV_TX_OK; } cur_p->phys = cpu_to_be32(skb_dma_addr); cur_p->len = cpu_to_be32(skb_frag_size(frag)); @@ -920,6 +926,17 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) return NETDEV_TX_OK; } +static int ll_temac_recv_buffers_available(struct temac_local *lp) +{ + int available; + + if (!lp->rx_skb[lp->rx_bd_ci]) + return 0; + available = 1 + lp->rx_bd_tail - lp->rx_bd_ci; + if (available <= 0) + available += RX_BD_NUM; + return available; +} static void ll_temac_recv(struct net_device *ndev) { @@ -990,6 +1007,18 @@ static void ll_temac_recv(struct net_device *ndev) lp->rx_bd_ci = 0; } while (rx_bd != lp->rx_bd_tail); + /* DMA operations will halt when the last buffer descriptor is + * processed (ie. the one pointed to by RX_TAILDESC_PTR). + * When that happens, no more interrupt events will be + * generated. No IRQ_COAL or IRQ_DLY, and not even an + * IRQ_ERR. To avoid stalling, we schedule a delayed work + * when there is a potential risk of that happening. The work + * will call this function, and thus re-schedule itself until + * enough buffers are available again. + */ + if (ll_temac_recv_buffers_available(lp) < lp->coalesce_count_rx) + schedule_delayed_work(&lp->restart_work, HZ / 1000); + /* Allocate new buffers for those buffer descriptors that were * passed to network stack. Note that GFP_ATOMIC allocations * can fail (e.g. when a larger burst of GFP_ATOMIC @@ -1045,6 +1074,18 @@ static void ll_temac_recv(struct net_device *ndev) spin_unlock_irqrestore(&lp->rx_lock, flags); } +/* Function scheduled to ensure a restart in case of DMA halt + * condition caused by running out of buffer descriptors. + */ +static void ll_temac_restart_work_func(struct work_struct *work) +{ + struct temac_local *lp = container_of(work, struct temac_local, + restart_work.work); + struct net_device *ndev = lp->ndev; + + ll_temac_recv(ndev); +} + static irqreturn_t ll_temac_tx_irq(int irq, void *_ndev) { struct net_device *ndev = _ndev; @@ -1137,6 +1178,8 @@ static int temac_stop(struct net_device *ndev) dev_dbg(&ndev->dev, "temac_close()\n"); + cancel_delayed_work_sync(&lp->restart_work); + free_irq(lp->tx_irq, ndev); free_irq(lp->rx_irq, ndev); @@ -1258,6 +1301,7 @@ static int temac_probe(struct platform_device *pdev) lp->dev = &pdev->dev; lp->options = XTE_OPTION_DEFAULTS; spin_lock_init(&lp->rx_lock); + INIT_DELAYED_WORK(&lp->restart_work, ll_temac_restart_work_func); /* Setup mutex for synchronization of indirect register access */ if (pdata) { @@ -1364,6 +1408,7 @@ static int temac_probe(struct platform_device *pdev) */ lp->tx_chnl_ctrl = 0x10220000; lp->rx_chnl_ctrl = 0xff070000; + lp->coalesce_count_rx = 0x07; /* Finished with the DMA node; drop the reference */ of_node_put(dma_np); @@ -1395,11 +1440,14 @@ static int temac_probe(struct platform_device *pdev) (pdata->tx_irq_count << 16); else lp->tx_chnl_ctrl = 0x10220000; - if (pdata->rx_irq_timeout || pdata->rx_irq_count) + if (pdata->rx_irq_timeout || pdata->rx_irq_count) { lp->rx_chnl_ctrl = (pdata->rx_irq_timeout << 24) | (pdata->rx_irq_count << 16); - else + lp->coalesce_count_rx = pdata->rx_irq_count; + } else { lp->rx_chnl_ctrl = 0xff070000; + lp->coalesce_count_rx = 0x07; + } } /* Error handle returned DMA RX and TX interrupts */ -- cgit v1.2.3-59-g8ed1b From 823d81b0fa2cd83a640734e74caee338b5d3c093 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 24 Feb 2020 18:46:22 +0200 Subject: net: bridge: fix stale eth hdr pointer in br_dev_xmit In br_dev_xmit() we perform vlan filtering in br_allowed_ingress() but if the packet has the vlan header inside (e.g. bridge with disabled tx-vlan-offload) then the vlan filtering code will use skb_vlan_untag() to extract the vid before filtering which in turn calls pskb_may_pull() and we may end up with a stale eth pointer. Moreover the cached eth header pointer will generally be wrong after that operation. Remove the eth header caching and just use eth_hdr() directly, the compiler does the right thing and calculates it only once so we don't lose anything. Fixes: 057658cb33fb ("bridge: suppress arp pkts on BR_NEIGH_SUPPRESS ports") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_device.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index dc3d2c1dd9d5..0e3dbc5f3c34 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -34,7 +34,6 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) const struct nf_br_ops *nf_ops; u8 state = BR_STATE_FORWARDING; const unsigned char *dest; - struct ethhdr *eth; u16 vid = 0; rcu_read_lock(); @@ -54,15 +53,14 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) BR_INPUT_SKB_CB(skb)->frag_max_size = 0; skb_reset_mac_header(skb); - eth = eth_hdr(skb); skb_pull(skb, ETH_HLEN); if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid, &state)) goto out; if (IS_ENABLED(CONFIG_INET) && - (eth->h_proto == htons(ETH_P_ARP) || - eth->h_proto == htons(ETH_P_RARP)) && + (eth_hdr(skb)->h_proto == htons(ETH_P_ARP) || + eth_hdr(skb)->h_proto == htons(ETH_P_RARP)) && br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) { br_do_proxy_suppress_arp(skb, br, vid, NULL); } else if (IS_ENABLED(CONFIG_IPV6) && -- cgit v1.2.3-59-g8ed1b From 2e90ca68b0d2f5548804f22f0dd61145516171e3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 21 Feb 2020 12:43:35 -0800 Subject: floppy: check FDC index for errors before assigning it Jordy Zomer reported a KASAN out-of-bounds read in the floppy driver in wait_til_ready(). Which on the face of it can't happen, since as Willy Tarreau points out, the function does no particular memory access. Except through the FDCS macro, which just indexes a static allocation through teh current fdc, which is always checked against N_FDC. Except the checking happens after we've already assigned the value. The floppy driver is a disgrace (a lot of it going back to my original horrd "design"), and has no real maintainer. Nobody has the hardware, and nobody really cares. But it still gets used in virtual environment because it's one of those things that everybody supports. The whole thing should be re-written, or at least parts of it should be seriously cleaned up. The 'current fdc' index, which is used by the FDCS macro, and which is often shadowed by a local 'fdc' variable, is a prime example of how not to write code. But because nobody has the hardware or the motivation, let's just fix up the immediate problem with a nasty band-aid: test the fdc index before actually assigning it to the static 'fdc' variable. Reported-by: Jordy Zomer Cc: Willy Tarreau Cc: Dan Carpenter Signed-off-by: Linus Torvalds --- drivers/block/floppy.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index cd3612e4e2e1..8ef65c085640 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -853,14 +853,17 @@ static void reset_fdc_info(int mode) /* selects the fdc and drive, and enables the fdc's input/dma. */ static void set_fdc(int drive) { + unsigned int new_fdc = fdc; + if (drive >= 0 && drive < N_DRIVE) { - fdc = FDC(drive); + new_fdc = FDC(drive); current_drive = drive; } - if (fdc != 1 && fdc != 0) { + if (new_fdc >= N_FDC) { pr_info("bad fdc value\n"); return; } + fdc = new_fdc; set_dor(fdc, ~0, 8); #if N_FDC > 1 set_dor(1 - fdc, ~8, 0); -- cgit v1.2.3-59-g8ed1b From 03264ddde2453f6877a7d637d84068079632a3c5 Mon Sep 17 00:00:00 2001 From: Adam Williamson Date: Wed, 19 Feb 2020 17:50:07 +0100 Subject: scsi: compat_ioctl: cdrom: Replace .ioctl with .compat_ioctl in four appropriate places Arnd Bergmann inadvertently typoed these in d320a9551e394 and 64cbfa96551a; they seem to be the cause of https://bugzilla.redhat.com/show_bug.cgi?id=1801353 , invalid SCSI commands when udev tries to query a DVD drive. [arnd] Found another instance of the same bug, also introduced in my compat_ioctl series. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1801353 Link: https://lore.kernel.org/r/20200219165139.3467320-1-arnd@arndb.de Fixes: c103d6ee69f9 ("compat_ioctl: ide: floppy: add handler") Fixes: 64cbfa96551a ("compat_ioctl: move cdrom commands into cdrom.c") Fixes: d320a9551e39 ("compat_ioctl: scsi: move ioctl handling into drivers") Bisected-by: Chris Murphy Signed-off-by: Arnd Bergmann Signed-off-by: Adam Williamson Signed-off-by: Martin K. Petersen --- drivers/block/paride/pcd.c | 2 +- drivers/cdrom/gdrom.c | 2 +- drivers/ide/ide-gd.c | 2 +- drivers/scsi/sr.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 117cfc8cd05a..cda5cf917e9a 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -276,7 +276,7 @@ static const struct block_device_operations pcd_bdops = { .release = pcd_block_release, .ioctl = pcd_block_ioctl, #ifdef CONFIG_COMPAT - .ioctl = blkdev_compat_ptr_ioctl, + .compat_ioctl = blkdev_compat_ptr_ioctl, #endif .check_events = pcd_block_check_events, }; diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 886b2638c730..c51292c2a131 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -519,7 +519,7 @@ static const struct block_device_operations gdrom_bdops = { .check_events = gdrom_bdops_check_events, .ioctl = gdrom_bdops_ioctl, #ifdef CONFIG_COMPAT - .ioctl = blkdev_compat_ptr_ioctl, + .compat_ioctl = blkdev_compat_ptr_ioctl, #endif }; diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c index 1bb99b556393..05c26986637b 100644 --- a/drivers/ide/ide-gd.c +++ b/drivers/ide/ide-gd.c @@ -361,7 +361,7 @@ static const struct block_device_operations ide_gd_ops = { .release = ide_gd_release, .ioctl = ide_gd_ioctl, #ifdef CONFIG_COMPAT - .ioctl = ide_gd_compat_ioctl, + .compat_ioctl = ide_gd_compat_ioctl, #endif .getgeo = ide_gd_getgeo, .check_events = ide_gd_check_events, diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 0fbb8fe6e521..e4240e4ae8bb 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -688,7 +688,7 @@ static const struct block_device_operations sr_bdops = .release = sr_block_release, .ioctl = sr_block_ioctl, #ifdef CONFIG_COMPAT - .ioctl = sr_block_compat_ioctl, + .compat_ioctl = sr_block_compat_ioctl, #endif .check_events = sr_block_check_events, .revalidate_disk = sr_block_revalidate_disk, -- cgit v1.2.3-59-g8ed1b From a0a31fd84f8f66828790d860545d4167777d58c6 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Fri, 7 Feb 2020 17:52:44 +0800 Subject: riscv: allocate a complete page size for each page table Each page table should be created by allocating a complete page size for it. Otherwise, the content of the page table would be corrupted somewhere through memory allocation which allocates the memory at the middle of the page table for other use. Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/kasan_init.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index f0cc86040587..f8eaf7e73a23 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -46,29 +46,34 @@ asmlinkage void __init kasan_early_init(void) static void __init populate(void *start, void *end) { - unsigned long i; + unsigned long i, offset; unsigned long vaddr = (unsigned long)start & PAGE_MASK; unsigned long vend = PAGE_ALIGN((unsigned long)end); unsigned long n_pages = (vend - vaddr) / PAGE_SIZE; + unsigned long n_ptes = + ((n_pages + PTRS_PER_PTE) & -PTRS_PER_PTE) / PTRS_PER_PTE; unsigned long n_pmds = - (n_pages % PTRS_PER_PTE) ? n_pages / PTRS_PER_PTE + 1 : - n_pages / PTRS_PER_PTE; + ((n_ptes + PTRS_PER_PMD) & -PTRS_PER_PMD) / PTRS_PER_PMD; + + pte_t *pte = + memblock_alloc(n_ptes * PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE); + pmd_t *pmd = + memblock_alloc(n_pmds * PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE); pgd_t *pgd = pgd_offset_k(vaddr); - pmd_t *pmd = memblock_alloc(n_pmds * sizeof(pmd_t), PAGE_SIZE); - pte_t *pte = memblock_alloc(n_pages * sizeof(pte_t), PAGE_SIZE); for (i = 0; i < n_pages; i++) { phys_addr_t phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); - - set_pte(pte + i, pfn_pte(PHYS_PFN(phys), PAGE_KERNEL)); + set_pte(&pte[i], pfn_pte(PHYS_PFN(phys), PAGE_KERNEL)); } - for (i = 0; i < n_pmds; ++pgd, i += PTRS_PER_PMD) - set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(((uintptr_t)(pmd + i)))), + for (i = 0, offset = 0; i < n_ptes; i++, offset += PTRS_PER_PTE) + set_pmd(&pmd[i], + pfn_pmd(PFN_DOWN(__pa(&pte[offset])), __pgprot(_PAGE_TABLE))); - for (i = 0; i < n_pages; ++pmd, i += PTRS_PER_PTE) - set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa((uintptr_t)(pte + i))), + for (i = 0, offset = 0; i < n_pmds; i++, offset += PTRS_PER_PMD) + set_pgd(&pgd[i], + pfn_pgd(PFN_DOWN(__pa(&pmd[offset])), __pgprot(_PAGE_TABLE))); flush_tlb_all(); -- cgit v1.2.3-59-g8ed1b From 8458ca147c204e7db124e8baa8fede219006e80d Mon Sep 17 00:00:00 2001 From: Zong Li Date: Fri, 7 Feb 2020 17:52:45 +0800 Subject: riscv: adjust the indent Adjust the indent to match Linux coding style. Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/kasan_init.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index f8eaf7e73a23..ec0ca90dd900 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -19,18 +19,20 @@ asmlinkage void __init kasan_early_init(void) for (i = 0; i < PTRS_PER_PTE; ++i) set_pte(kasan_early_shadow_pte + i, mk_pte(virt_to_page(kasan_early_shadow_page), - PAGE_KERNEL)); + PAGE_KERNEL)); for (i = 0; i < PTRS_PER_PMD; ++i) set_pmd(kasan_early_shadow_pmd + i, - pfn_pmd(PFN_DOWN(__pa((uintptr_t)kasan_early_shadow_pte)), - __pgprot(_PAGE_TABLE))); + pfn_pmd(PFN_DOWN + (__pa((uintptr_t) kasan_early_shadow_pte)), + __pgprot(_PAGE_TABLE))); for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END; i += PGDIR_SIZE, ++pgd) set_pgd(pgd, - pfn_pgd(PFN_DOWN(__pa(((uintptr_t)kasan_early_shadow_pmd))), - __pgprot(_PAGE_TABLE))); + pfn_pgd(PFN_DOWN + (__pa(((uintptr_t) kasan_early_shadow_pmd))), + __pgprot(_PAGE_TABLE))); /* init for swapper_pg_dir */ pgd = pgd_offset_k(KASAN_SHADOW_START); @@ -38,8 +40,9 @@ asmlinkage void __init kasan_early_init(void) for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END; i += PGDIR_SIZE, ++pgd) set_pgd(pgd, - pfn_pgd(PFN_DOWN(__pa(((uintptr_t)kasan_early_shadow_pmd))), - __pgprot(_PAGE_TABLE))); + pfn_pgd(PFN_DOWN + (__pa(((uintptr_t) kasan_early_shadow_pmd))), + __pgprot(_PAGE_TABLE))); flush_tlb_all(); } @@ -86,7 +89,8 @@ void __init kasan_init(void) unsigned long i; kasan_populate_early_shadow((void *)KASAN_SHADOW_START, - (void *)kasan_mem_to_shadow((void *)VMALLOC_END)); + (void *)kasan_mem_to_shadow((void *) + VMALLOC_END)); for_each_memblock(memory, reg) { void *start = (void *)__va(reg->base); @@ -95,14 +99,14 @@ void __init kasan_init(void) if (start >= end) break; - populate(kasan_mem_to_shadow(start), - kasan_mem_to_shadow(end)); + populate(kasan_mem_to_shadow(start), kasan_mem_to_shadow(end)); }; for (i = 0; i < PTRS_PER_PTE; i++) set_pte(&kasan_early_shadow_pte[i], mk_pte(virt_to_page(kasan_early_shadow_page), - __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_ACCESSED))); + __pgprot(_PAGE_PRESENT | _PAGE_READ | + _PAGE_ACCESSED))); memset(kasan_early_shadow_page, 0, PAGE_SIZE); init_task.kasan_depth = 0; -- cgit v1.2.3-59-g8ed1b From 756125289285f6e55a03861bf4b6257aa3d19a93 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 24 Feb 2020 16:38:57 -0500 Subject: audit: always check the netlink payload length in audit_receive_msg() This patch ensures that we always check the netlink payload length in audit_receive_msg() before we take any action on the payload itself. Cc: stable@vger.kernel.org Reported-by: syzbot+399c44bf1f43b8747403@syzkaller.appspotmail.com Reported-by: syzbot+e4b12d8d202701f08b6d@syzkaller.appspotmail.com Signed-off-by: Paul Moore --- kernel/audit.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/kernel/audit.c b/kernel/audit.c index 17b0d523afb3..9ddfe2aa6671 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1101,13 +1101,11 @@ static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature audit_log_end(ab); } -static int audit_set_feature(struct sk_buff *skb) +static int audit_set_feature(struct audit_features *uaf) { - struct audit_features *uaf; int i; BUILD_BUG_ON(AUDIT_LAST_FEATURE + 1 > ARRAY_SIZE(audit_feature_names)); - uaf = nlmsg_data(nlmsg_hdr(skb)); /* if there is ever a version 2 we should handle that here */ @@ -1175,6 +1173,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { u32 seq; void *data; + int data_len; int err; struct audit_buffer *ab; u16 msg_type = nlh->nlmsg_type; @@ -1188,6 +1187,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) seq = nlh->nlmsg_seq; data = nlmsg_data(nlh); + data_len = nlmsg_len(nlh); switch (msg_type) { case AUDIT_GET: { @@ -1211,7 +1211,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) struct audit_status s; memset(&s, 0, sizeof(s)); /* guard against past and future API changes */ - memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh))); + memcpy(&s, data, min_t(size_t, sizeof(s), data_len)); if (s.mask & AUDIT_STATUS_ENABLED) { err = audit_set_enabled(s.enabled); if (err < 0) @@ -1315,7 +1315,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return err; break; case AUDIT_SET_FEATURE: - err = audit_set_feature(skb); + if (data_len < sizeof(struct audit_features)) + return -EINVAL; + err = audit_set_feature(data); if (err) return err; break; @@ -1327,6 +1329,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) err = audit_filter(msg_type, AUDIT_FILTER_USER); if (err == 1) { /* match or error */ + char *str = data; + err = 0; if (msg_type == AUDIT_USER_TTY) { err = tty_audit_push(); @@ -1334,26 +1338,24 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) break; } audit_log_user_recv_msg(&ab, msg_type); - if (msg_type != AUDIT_USER_TTY) + if (msg_type != AUDIT_USER_TTY) { + /* ensure NULL termination */ + str[data_len - 1] = '\0'; audit_log_format(ab, " msg='%.*s'", AUDIT_MESSAGE_TEXT_MAX, - (char *)data); - else { - int size; - + str); + } else { audit_log_format(ab, " data="); - size = nlmsg_len(nlh); - if (size > 0 && - ((unsigned char *)data)[size - 1] == '\0') - size--; - audit_log_n_untrustedstring(ab, data, size); + if (data_len > 0 && str[data_len - 1] == '\0') + data_len--; + audit_log_n_untrustedstring(ab, str, data_len); } audit_log_end(ab); } break; case AUDIT_ADD_RULE: case AUDIT_DEL_RULE: - if (nlmsg_len(nlh) < sizeof(struct audit_rule_data)) + if (data_len < sizeof(struct audit_rule_data)) return -EINVAL; if (audit_enabled == AUDIT_LOCKED) { audit_log_common_recv_msg(audit_context(), &ab, @@ -1365,7 +1367,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) audit_log_end(ab); return -EPERM; } - err = audit_rule_change(msg_type, seq, data, nlmsg_len(nlh)); + err = audit_rule_change(msg_type, seq, data, data_len); break; case AUDIT_LIST_RULES: err = audit_list_rules_send(skb, seq); @@ -1380,7 +1382,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) case AUDIT_MAKE_EQUIV: { void *bufp = data; u32 sizes[2]; - size_t msglen = nlmsg_len(nlh); + size_t msglen = data_len; char *old, *new; err = -EINVAL; @@ -1456,7 +1458,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) memset(&s, 0, sizeof(s)); /* guard against past and future API changes */ - memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh))); + memcpy(&s, data, min_t(size_t, sizeof(s), data_len)); /* check if new data is valid */ if ((s.enabled != 0 && s.enabled != 1) || (s.log_passwd != 0 && s.log_passwd != 1)) -- cgit v1.2.3-59-g8ed1b From 01e99aeca3979600302913cef3f89076786f32c8 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 25 Feb 2020 09:04:32 +0800 Subject: blk-mq: insert passthrough request into hctx->dispatch directly For some reason, device may be in one situation which can't handle FS request, so STS_RESOURCE is always returned and the FS request will be added to hctx->dispatch. However passthrough request may be required at that time for fixing the problem. If passthrough request is added to scheduler queue, there isn't any chance for blk-mq to dispatch it given we prioritize requests in hctx->dispatch. Then the FS IO request may never be completed, and IO hang is caused. So passthrough request has to be added to hctx->dispatch directly for fixing the IO hang. Fix this issue by inserting passthrough request into hctx->dispatch directly together withing adding FS request to the tail of hctx->dispatch in blk_mq_dispatch_rq_list(). Actually we add FS request to tail of hctx->dispatch at default, see blk_mq_request_bypass_insert(). Then it becomes consistent with original legacy IO request path, in which passthrough request is always added to q->queue_head. Cc: Dongli Zhang Cc: Christoph Hellwig Cc: Ewan D. Milne Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-flush.c | 2 +- block/blk-mq-sched.c | 22 +++++++++++++++------- block/blk-mq.c | 18 +++++++++++------- block/blk-mq.h | 3 ++- 4 files changed, 29 insertions(+), 16 deletions(-) diff --git a/block/blk-flush.c b/block/blk-flush.c index 3f977c517960..5cc775bdb06a 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -412,7 +412,7 @@ void blk_insert_flush(struct request *rq) */ if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { - blk_mq_request_bypass_insert(rq, false); + blk_mq_request_bypass_insert(rq, false, false); return; } diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index ca22afd47b3d..856356b1619e 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -361,13 +361,19 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, bool has_sched, struct request *rq) { - /* dispatch flush rq directly */ - if (rq->rq_flags & RQF_FLUSH_SEQ) { - spin_lock(&hctx->lock); - list_add(&rq->queuelist, &hctx->dispatch); - spin_unlock(&hctx->lock); + /* + * dispatch flush and passthrough rq directly + * + * passthrough request has to be added to hctx->dispatch directly. + * For some reason, device may be in one situation which can't + * handle FS request, so STS_RESOURCE is always returned and the + * FS request will be added to hctx->dispatch. However passthrough + * request may be required at that time for fixing the problem. If + * passthrough request is added to scheduler queue, there isn't any + * chance to dispatch it given we prioritize requests in hctx->dispatch. + */ + if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq)) return true; - } if (has_sched) rq->rq_flags |= RQF_SORTED; @@ -391,8 +397,10 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head, WARN_ON(e && (rq->tag != -1)); - if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) + if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) { + blk_mq_request_bypass_insert(rq, at_head, false); goto run; + } if (e && e->type->ops.insert_requests) { LIST_HEAD(list); diff --git a/block/blk-mq.c b/block/blk-mq.c index a12b1763508d..5e1e4151cb51 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -735,7 +735,7 @@ static void blk_mq_requeue_work(struct work_struct *work) * merge. */ if (rq->rq_flags & RQF_DONTPREP) - blk_mq_request_bypass_insert(rq, false); + blk_mq_request_bypass_insert(rq, false, false); else blk_mq_sched_insert_request(rq, true, false, false); } @@ -1286,7 +1286,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, q->mq_ops->commit_rqs(hctx); spin_lock(&hctx->lock); - list_splice_init(list, &hctx->dispatch); + list_splice_tail_init(list, &hctx->dispatch); spin_unlock(&hctx->lock); /* @@ -1677,12 +1677,16 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, * Should only be used carefully, when the caller knows we want to * bypass a potential IO scheduler on the target device. */ -void blk_mq_request_bypass_insert(struct request *rq, bool run_queue) +void blk_mq_request_bypass_insert(struct request *rq, bool at_head, + bool run_queue) { struct blk_mq_hw_ctx *hctx = rq->mq_hctx; spin_lock(&hctx->lock); - list_add_tail(&rq->queuelist, &hctx->dispatch); + if (at_head) + list_add(&rq->queuelist, &hctx->dispatch); + else + list_add_tail(&rq->queuelist, &hctx->dispatch); spin_unlock(&hctx->lock); if (run_queue) @@ -1849,7 +1853,7 @@ insert: if (bypass_insert) return BLK_STS_RESOURCE; - blk_mq_request_bypass_insert(rq, run_queue); + blk_mq_request_bypass_insert(rq, false, run_queue); return BLK_STS_OK; } @@ -1876,7 +1880,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true); if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) - blk_mq_request_bypass_insert(rq, true); + blk_mq_request_bypass_insert(rq, false, true); else if (ret != BLK_STS_OK) blk_mq_end_request(rq, ret); @@ -1910,7 +1914,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, if (ret != BLK_STS_OK) { if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) { - blk_mq_request_bypass_insert(rq, + blk_mq_request_bypass_insert(rq, false, list_empty(list)); break; } diff --git a/block/blk-mq.h b/block/blk-mq.h index eaaca8fc1c28..c0fa34378eb2 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -66,7 +66,8 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, */ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bool at_head); -void blk_mq_request_bypass_insert(struct request *rq, bool run_queue); +void blk_mq_request_bypass_insert(struct request *rq, bool at_head, + bool run_queue); void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct list_head *list); -- cgit v1.2.3-59-g8ed1b From 3234f4ed3066a58cd5ce8edcf752fa4fe0c95cb5 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 22 Feb 2020 09:04:17 -0800 Subject: MAINTAINERS: Hand MIPS over to Thomas MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My time with MIPS the company has reached its end, and so at best I'll have little time spend on maintaining arch/mips/. Ralf last authored a patch over 2 years ago, the last time he committed one is even further back & activity was sporadic for a while before that. The reality is that he isn't active. Having a new maintainer with time to do things properly will be beneficial all round. Thomas Bogendoerfer has been involved in MIPS development for a long time & has offered to step up as maintainer, so add Thomas and remove myself & Ralf from the MIPS entry. Ralf already has an entry in CREDITS to honor his contributions, so this just adds one for me. Signed-off-by: Paul Burton Reviewed-by: Philippe Mathieu-Daudé Acked-by: Thomas Bogendoerfer Cc: Ralf Baechle Cc: linux-kernel@vger.kernel.org Cc: linux-mips@vger.kernel.org --- CREDITS | 5 +++++ MAINTAINERS | 6 ++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/CREDITS b/CREDITS index a97d3280a627..032b5994f476 100644 --- a/CREDITS +++ b/CREDITS @@ -567,6 +567,11 @@ D: Original author of Amiga FFS filesystem S: Orlando, Florida S: USA +N: Paul Burton +E: paulburton@kernel.org +W: https://pburton.com +D: MIPS maintainer 2018-2020 + N: Lennert Buytenhek E: kernel@wantstofly.org D: Original (2.4) rewrite of the ethernet bridging code diff --git a/MAINTAINERS b/MAINTAINERS index 38fe2f3f7b6f..2c546977fb88 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11114,14 +11114,12 @@ S: Maintained F: drivers/usb/image/microtek.* MIPS -M: Ralf Baechle -M: Paul Burton +M: Thomas Bogendoerfer L: linux-mips@vger.kernel.org W: http://www.linux-mips.org/ -T: git git://git.linux-mips.org/pub/scm/ralf/linux.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/mips/linux.git Q: http://patchwork.linux-mips.org/project/linux-mips/list/ -S: Supported +S: Maintained F: Documentation/devicetree/bindings/mips/ F: Documentation/mips/ F: arch/mips/ -- cgit v1.2.3-59-g8ed1b From b549c252b1292aea959cd9b83537fcb9384a6112 Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Tue, 25 Feb 2020 13:35:27 +0800 Subject: drm/i915/gvt: Fix orphan vgpu dmabuf_objs' lifetime Deleting dmabuf item's list head after releasing its container can lead to KASAN-reported issue: BUG: KASAN: use-after-free in __list_del_entry_valid+0x15/0xf0 Read of size 8 at addr ffff88818a4598a8 by task kworker/u8:3/13119 So fix this issue by puting deleting dmabuf_objs ahead of releasing its container. Fixes: dfb6ae4e14bd6 ("drm/i915/gvt: Handle orphan dmabuf_objs") Signed-off-by: Tina Zhang Reviewed-by: Zhenyu Wang Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200225053527.8336-2-tina.zhang@intel.com --- drivers/gpu/drm/i915/gvt/dmabuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 2477a1e5a166..ae139f0877ae 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -151,12 +151,12 @@ static void dmabuf_gem_object_free(struct kref *kref) dmabuf_obj = container_of(pos, struct intel_vgpu_dmabuf_obj, list); if (dmabuf_obj == obj) { + list_del(pos); intel_gvt_hypervisor_put_vfio_device(vgpu); idr_remove(&vgpu->object_idr, dmabuf_obj->dmabuf_id); kfree(dmabuf_obj->info); kfree(dmabuf_obj); - list_del(pos); break; } } -- cgit v1.2.3-59-g8ed1b From 53ace1195263b30fd593677dd67559e879ed9aa2 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Fri, 21 Feb 2020 21:57:33 +0100 Subject: docs: remove MPX from the x86 toc MPX was removed in commit 45fc24e89b7c ("x86/mpx: remove MPX from arch/x86"), this removes the corresponding entry in the x86 toc. This was suggested by a Sphinx warning. Signed-off-by: Stephen Kitt Fixes: 45fc24e89b7cc ("x86/mpx: remove MPX from arch/x86") Acked-by: Dave Hansen Signed-off-by: Jonathan Corbet --- Documentation/x86/index.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst index a8de2fbc1caa..265d9e9a093b 100644 --- a/Documentation/x86/index.rst +++ b/Documentation/x86/index.rst @@ -19,7 +19,6 @@ x86-specific Documentation tlb mtrr pat - intel_mpx intel-iommu intel_txt amd-memory-encryption -- cgit v1.2.3-59-g8ed1b From adc10f5b0a03606e30c704cff1f0283a696d0260 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 21 Feb 2020 16:02:39 -0800 Subject: docs: Fix empty parallelism argument When there was no parallelism (no top-level -j arg and a pre-1.7 sphinx-build), the argument passed would be empty ("") instead of just being missing, which would (understandably) badly confuse sphinx-build. Fix this by removing the quotes. Reported-by: Rafael J. Wysocki Fixes: 51e46c7a4007 ("docs, parallelism: Rearrange how jobserver reservations are made") Cc: stable@vger.kernel.org # v5.5 only Signed-off-by: Kees Cook Signed-off-by: Jonathan Corbet --- Documentation/sphinx/parallel-wrapper.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/sphinx/parallel-wrapper.sh b/Documentation/sphinx/parallel-wrapper.sh index 7daf5133bdd3..e54c44ce117d 100644 --- a/Documentation/sphinx/parallel-wrapper.sh +++ b/Documentation/sphinx/parallel-wrapper.sh @@ -30,4 +30,4 @@ if [ -n "$parallel" ] ; then parallel="-j$parallel" fi -exec "$sphinx" "$parallel" "$@" +exec "$sphinx" $parallel "$@" -- cgit v1.2.3-59-g8ed1b From d0820556507bd7aef4f3a615b1b6eb66eb9785fe Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Fri, 21 Feb 2020 03:11:56 +0100 Subject: selftests: nft_concat_range: Move option for 'list ruleset' before command Before nftables commit fb9cea50e8b3 ("main: enforce options before commands"), 'nft list ruleset -a' happened to work, but it's wrong and won't work anymore. Replace it by 'nft -a list ruleset'. Reported-by: Chen Yi Fixes: 611973c1e06f ("selftests: netfilter: Introduce tests for sets with range concatenation") Signed-off-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/nft_concat_range.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh index aca21dde102a..5c1033ee1b39 100755 --- a/tools/testing/selftests/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/netfilter/nft_concat_range.sh @@ -1025,7 +1025,7 @@ format_noconcat() { add() { if ! nft add element inet filter test "${1}"; then err "Failed to add ${1} given ruleset:" - err "$(nft list ruleset -a)" + err "$(nft -a list ruleset)" return 1 fi } @@ -1045,7 +1045,7 @@ add_perf() { add_perf_norange() { if ! nft add element netdev perf norange "${1}"; then err "Failed to add ${1} given ruleset:" - err "$(nft list ruleset -a)" + err "$(nft -a list ruleset)" return 1 fi } @@ -1054,7 +1054,7 @@ add_perf_norange() { add_perf_noconcat() { if ! nft add element netdev perf noconcat "${1}"; then err "Failed to add ${1} given ruleset:" - err "$(nft list ruleset -a)" + err "$(nft -a list ruleset)" return 1 fi } @@ -1063,7 +1063,7 @@ add_perf_noconcat() { del() { if ! nft delete element inet filter test "${1}"; then err "Failed to delete ${1} given ruleset:" - err "$(nft list ruleset -a)" + err "$(nft -a list ruleset)" return 1 fi } @@ -1134,7 +1134,7 @@ send_match() { err " $(for f in ${src}; do eval format_\$f "${2}"; printf ' '; done)" err "should have matched ruleset:" - err "$(nft list ruleset -a)" + err "$(nft -a list ruleset)" return 1 fi nft reset counter inet filter test >/dev/null @@ -1160,7 +1160,7 @@ send_nomatch() { err " $(for f in ${src}; do eval format_\$f "${2}"; printf ' '; done)" err "should not have matched ruleset:" - err "$(nft list ruleset -a)" + err "$(nft -a list ruleset)" return 1 fi } -- cgit v1.2.3-59-g8ed1b From c780e86dd48ef6467a1146cf7d0fe1e05a635039 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 6 Feb 2020 15:28:12 +0100 Subject: blktrace: Protect q->blk_trace with RCU KASAN is reporting that __blk_add_trace() has a use-after-free issue when accessing q->blk_trace. Indeed the switching of block tracing (and thus eventual freeing of q->blk_trace) is completely unsynchronized with the currently running tracing and thus it can happen that the blk_trace structure is being freed just while __blk_add_trace() works on it. Protect accesses to q->blk_trace by RCU during tracing and make sure we wait for the end of RCU grace period when shutting down tracing. Luckily that is rare enough event that we can afford that. Note that postponing the freeing of blk_trace to an RCU callback should better be avoided as it could have unexpected user visible side-effects as debugfs files would be still existing for a short while block tracing has been shut down. Link: https://bugzilla.kernel.org/show_bug.cgi?id=205711 CC: stable@vger.kernel.org Reviewed-by: Chaitanya Kulkarni Reviewed-by: Ming Lei Tested-by: Ming Lei Reviewed-by: Bart Van Assche Reported-by: Tristan Madani Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- include/linux/blktrace_api.h | 18 +++++-- kernel/trace/blktrace.c | 114 +++++++++++++++++++++++++++++++------------ 3 files changed, 97 insertions(+), 37 deletions(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 053ea4b51988..10455b2bbbb4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -524,7 +524,7 @@ struct request_queue { unsigned int sg_reserved_size; int node; #ifdef CONFIG_BLK_DEV_IO_TRACE - struct blk_trace *blk_trace; + struct blk_trace __rcu *blk_trace; struct mutex blk_trace_mutex; #endif /* diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 7bb2d8de9f30..3b6ff5902edc 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -51,9 +51,13 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f **/ #define blk_add_cgroup_trace_msg(q, cg, fmt, ...) \ do { \ - struct blk_trace *bt = (q)->blk_trace; \ + struct blk_trace *bt; \ + \ + rcu_read_lock(); \ + bt = rcu_dereference((q)->blk_trace); \ if (unlikely(bt)) \ __trace_note_message(bt, cg, fmt, ##__VA_ARGS__);\ + rcu_read_unlock(); \ } while (0) #define blk_add_trace_msg(q, fmt, ...) \ blk_add_cgroup_trace_msg(q, NULL, fmt, ##__VA_ARGS__) @@ -61,10 +65,14 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f static inline bool blk_trace_note_message_enabled(struct request_queue *q) { - struct blk_trace *bt = q->blk_trace; - if (likely(!bt)) - return false; - return bt->act_mask & BLK_TC_NOTIFY; + struct blk_trace *bt; + bool ret; + + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + ret = bt && (bt->act_mask & BLK_TC_NOTIFY); + rcu_read_unlock(); + return ret; } extern void blk_add_driver_data(struct request_queue *q, struct request *rq, diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 0735ae8545d8..4560878f0bac 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -335,6 +335,7 @@ static void put_probe_ref(void) static void blk_trace_cleanup(struct blk_trace *bt) { + synchronize_rcu(); blk_trace_free(bt); put_probe_ref(); } @@ -629,8 +630,10 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name, static int __blk_trace_startstop(struct request_queue *q, int start) { int ret; - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + bt = rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex)); if (bt == NULL) return -EINVAL; @@ -740,8 +743,8 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) void blk_trace_shutdown(struct request_queue *q) { mutex_lock(&q->blk_trace_mutex); - - if (q->blk_trace) { + if (rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex))) { __blk_trace_startstop(q, 0); __blk_trace_remove(q); } @@ -752,8 +755,10 @@ void blk_trace_shutdown(struct request_queue *q) #ifdef CONFIG_BLK_CGROUP static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + /* We don't use the 'bt' value here except as an optimization... */ + bt = rcu_dereference_protected(q->blk_trace, 1); if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) return 0; @@ -796,10 +801,14 @@ blk_trace_request_get_cgid(struct request_queue *q, struct request *rq) static void blk_add_trace_rq(struct request *rq, int error, unsigned int nr_bytes, u32 what, u64 cgid) { - struct blk_trace *bt = rq->q->blk_trace; + struct blk_trace *bt; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(rq->q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } if (blk_rq_is_passthrough(rq)) what |= BLK_TC_ACT(BLK_TC_PC); @@ -808,6 +817,7 @@ static void blk_add_trace_rq(struct request *rq, int error, __blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq), rq->cmd_flags, what, error, 0, NULL, cgid); + rcu_read_unlock(); } static void blk_add_trace_rq_insert(void *ignore, @@ -853,14 +863,19 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq, static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, u32 what, int error) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf, what, error, 0, NULL, blk_trace_bio_get_cgid(q, bio)); + rcu_read_unlock(); } static void blk_add_trace_bio_bounce(void *ignore, @@ -905,11 +920,14 @@ static void blk_add_trace_getrq(void *ignore, if (bio) blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0); else { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0, NULL, 0); + rcu_read_unlock(); } } @@ -921,27 +939,35 @@ static void blk_add_trace_sleeprq(void *ignore, if (bio) blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0); else { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ, 0, 0, NULL, 0); + rcu_read_unlock(); } } static void blk_add_trace_plug(void *ignore, struct request_queue *q) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) __blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0); + rcu_read_unlock(); } static void blk_add_trace_unplug(void *ignore, struct request_queue *q, unsigned int depth, bool explicit) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) { __be64 rpdu = cpu_to_be64(depth); u32 what; @@ -953,14 +979,17 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q, __blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0); } + rcu_read_unlock(); } static void blk_add_trace_split(void *ignore, struct request_queue *q, struct bio *bio, unsigned int pdu) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) { __be64 rpdu = cpu_to_be64(pdu); @@ -969,6 +998,7 @@ static void blk_add_trace_split(void *ignore, BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu), &rpdu, blk_trace_bio_get_cgid(q, bio)); } + rcu_read_unlock(); } /** @@ -988,11 +1018,15 @@ static void blk_add_trace_bio_remap(void *ignore, struct request_queue *q, struct bio *bio, dev_t dev, sector_t from) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; struct blk_io_trace_remap r; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } r.device_from = cpu_to_be32(dev); r.device_to = cpu_to_be32(bio_dev(bio)); @@ -1001,6 +1035,7 @@ static void blk_add_trace_bio_remap(void *ignore, __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status, sizeof(r), &r, blk_trace_bio_get_cgid(q, bio)); + rcu_read_unlock(); } /** @@ -1021,11 +1056,15 @@ static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev, sector_t from) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; struct blk_io_trace_remap r; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } r.device_from = cpu_to_be32(dev); r.device_to = cpu_to_be32(disk_devt(rq->rq_disk)); @@ -1034,6 +1073,7 @@ static void blk_add_trace_rq_remap(void *ignore, __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rq_data_dir(rq), 0, BLK_TA_REMAP, 0, sizeof(r), &r, blk_trace_request_get_cgid(q, rq)); + rcu_read_unlock(); } /** @@ -1051,14 +1091,19 @@ void blk_add_driver_data(struct request_queue *q, struct request *rq, void *data, size_t len) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0, BLK_TA_DRV_DATA, 0, len, data, blk_trace_request_get_cgid(q, rq)); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(blk_add_driver_data); @@ -1597,6 +1642,7 @@ static int blk_trace_remove_queue(struct request_queue *q) return -EINVAL; put_probe_ref(); + synchronize_rcu(); blk_trace_free(bt); return 0; } @@ -1758,6 +1804,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, struct hd_struct *p = dev_to_part(dev); struct request_queue *q; struct block_device *bdev; + struct blk_trace *bt; ssize_t ret = -ENXIO; bdev = bdget(part_devt(p)); @@ -1770,21 +1817,23 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, mutex_lock(&q->blk_trace_mutex); + bt = rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex)); if (attr == &dev_attr_enable) { - ret = sprintf(buf, "%u\n", !!q->blk_trace); + ret = sprintf(buf, "%u\n", !!bt); goto out_unlock_bdev; } - if (q->blk_trace == NULL) + if (bt == NULL) ret = sprintf(buf, "disabled\n"); else if (attr == &dev_attr_act_mask) - ret = blk_trace_mask2str(buf, q->blk_trace->act_mask); + ret = blk_trace_mask2str(buf, bt->act_mask); else if (attr == &dev_attr_pid) - ret = sprintf(buf, "%u\n", q->blk_trace->pid); + ret = sprintf(buf, "%u\n", bt->pid); else if (attr == &dev_attr_start_lba) - ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba); + ret = sprintf(buf, "%llu\n", bt->start_lba); else if (attr == &dev_attr_end_lba) - ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); + ret = sprintf(buf, "%llu\n", bt->end_lba); out_unlock_bdev: mutex_unlock(&q->blk_trace_mutex); @@ -1801,6 +1850,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, struct block_device *bdev; struct request_queue *q; struct hd_struct *p; + struct blk_trace *bt; u64 value; ssize_t ret = -EINVAL; @@ -1831,8 +1881,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, mutex_lock(&q->blk_trace_mutex); + bt = rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex)); if (attr == &dev_attr_enable) { - if (!!value == !!q->blk_trace) { + if (!!value == !!bt) { ret = 0; goto out_unlock_bdev; } @@ -1844,18 +1896,18 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, } ret = 0; - if (q->blk_trace == NULL) + if (bt == NULL) ret = blk_trace_setup_queue(q, bdev); if (ret == 0) { if (attr == &dev_attr_act_mask) - q->blk_trace->act_mask = value; + bt->act_mask = value; else if (attr == &dev_attr_pid) - q->blk_trace->pid = value; + bt->pid = value; else if (attr == &dev_attr_start_lba) - q->blk_trace->start_lba = value; + bt->start_lba = value; else if (attr == &dev_attr_end_lba) - q->blk_trace->end_lba = value; + bt->end_lba = value; } out_unlock_bdev: -- cgit v1.2.3-59-g8ed1b From bdcd3eab2a9ae0ac93f27275b6895dd95e5bf360 Mon Sep 17 00:00:00 2001 From: Xiaoguang Wang Date: Tue, 25 Feb 2020 22:12:08 +0800 Subject: io_uring: fix poll_list race for SETUP_IOPOLL|SETUP_SQPOLL After making ext4 support iopoll method: let ext4_file_operations's iopoll method be iomap_dio_iopoll(), we found fio can easily hang in fio_ioring_getevents() with below fio job: rm -f testfile; sync; sudo fio -name=fiotest -filename=testfile -iodepth=128 -thread -rw=write -ioengine=io_uring -hipri=1 -sqthread_poll=1 -direct=1 -bs=4k -size=10G -numjobs=8 -runtime=2000 -group_reporting with IORING_SETUP_SQPOLL and IORING_SETUP_IOPOLL enabled. There are two issues that results in this hang, one reason is that when IORING_SETUP_SQPOLL and IORING_SETUP_IOPOLL are enabled, fio does not use io_uring_enter to get completed events, it relies on kernel io_sq_thread to poll for completed events. Another reason is that there is a race: when io_submit_sqes() in io_sq_thread() submits a batch of sqes, variable 'inflight' will record the number of submitted reqs, then io_sq_thread will poll for reqs which have been added to poll_list. But note, if some previous reqs have been punted to io worker, these reqs will won't be in poll_list timely. io_sq_thread() will only poll for a part of previous submitted reqs, and then find poll_list is empty, reset variable 'inflight' to be zero. If app just waits these deferred reqs and does not wake up io_sq_thread again, then hang happens. For app that entirely relies on io_sq_thread to poll completed requests, let io_iopoll_req_issued() wake up io_sq_thread properly when adding new element to poll_list, and when io_sq_thread prepares to sleep, check whether poll_list is empty again, if not empty, continue to poll. Signed-off-by: Xiaoguang Wang Signed-off-by: Jens Axboe --- fs/io_uring.c | 59 +++++++++++++++++++++++++++-------------------------------- 1 file changed, 27 insertions(+), 32 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d961945cb332..ffd9bfa84d86 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1821,6 +1821,10 @@ static void io_iopoll_req_issued(struct io_kiocb *req) list_add(&req->list, &ctx->poll_list); else list_add_tail(&req->list, &ctx->poll_list); + + if ((ctx->flags & IORING_SETUP_SQPOLL) && + wq_has_sleeper(&ctx->sqo_wait)) + wake_up(&ctx->sqo_wait); } static void io_file_put(struct io_submit_state *state) @@ -5086,9 +5090,8 @@ static int io_sq_thread(void *data) const struct cred *old_cred; mm_segment_t old_fs; DEFINE_WAIT(wait); - unsigned inflight; unsigned long timeout; - int ret; + int ret = 0; complete(&ctx->completions[1]); @@ -5096,39 +5099,19 @@ static int io_sq_thread(void *data) set_fs(USER_DS); old_cred = override_creds(ctx->creds); - ret = timeout = inflight = 0; + timeout = jiffies + ctx->sq_thread_idle; while (!kthread_should_park()) { unsigned int to_submit; - if (inflight) { + if (!list_empty(&ctx->poll_list)) { unsigned nr_events = 0; - if (ctx->flags & IORING_SETUP_IOPOLL) { - /* - * inflight is the count of the maximum possible - * entries we submitted, but it can be smaller - * if we dropped some of them. If we don't have - * poll entries available, then we know that we - * have nothing left to poll for. Reset the - * inflight count to zero in that case. - */ - mutex_lock(&ctx->uring_lock); - if (!list_empty(&ctx->poll_list)) - io_iopoll_getevents(ctx, &nr_events, 0); - else - inflight = 0; - mutex_unlock(&ctx->uring_lock); - } else { - /* - * Normal IO, just pretend everything completed. - * We don't have to poll completions for that. - */ - nr_events = inflight; - } - - inflight -= nr_events; - if (!inflight) + mutex_lock(&ctx->uring_lock); + if (!list_empty(&ctx->poll_list)) + io_iopoll_getevents(ctx, &nr_events, 0); + else timeout = jiffies + ctx->sq_thread_idle; + mutex_unlock(&ctx->uring_lock); } to_submit = io_sqring_entries(ctx); @@ -5157,7 +5140,7 @@ static int io_sq_thread(void *data) * more IO, we should wait for the application to * reap events and wake us up. */ - if (inflight || + if (!list_empty(&ctx->poll_list) || (!time_after(jiffies, timeout) && ret != -EBUSY && !percpu_ref_is_dying(&ctx->refs))) { cond_resched(); @@ -5167,6 +5150,19 @@ static int io_sq_thread(void *data) prepare_to_wait(&ctx->sqo_wait, &wait, TASK_INTERRUPTIBLE); + /* + * While doing polled IO, before going to sleep, we need + * to check if there are new reqs added to poll_list, it + * is because reqs may have been punted to io worker and + * will be added to poll_list later, hence check the + * poll_list again. + */ + if ((ctx->flags & IORING_SETUP_IOPOLL) && + !list_empty_careful(&ctx->poll_list)) { + finish_wait(&ctx->sqo_wait, &wait); + continue; + } + /* Tell userspace we may need a wakeup call */ ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP; /* make sure to read SQ tail after writing flags */ @@ -5194,8 +5190,7 @@ static int io_sq_thread(void *data) mutex_lock(&ctx->uring_lock); ret = io_submit_sqes(ctx, to_submit, NULL, -1, &cur_mm, true); mutex_unlock(&ctx->uring_lock); - if (ret > 0) - inflight += ret; + timeout = jiffies + ctx->sq_thread_idle; } set_fs(old_fs); -- cgit v1.2.3-59-g8ed1b From 3030fd4cb783377eca0e2a3eee63724a5c66ee15 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 25 Feb 2020 08:47:30 -0700 Subject: io-wq: remove spin-for-work optimization Andres reports that buffered IO seems to suck up more cycles than we would like, and he narrowed it down to the fact that the io-wq workers will briefly spin for more work on completion of a work item. This was a win on the networking side, but apparently some other cases take a hit because of it. Remove the optimization to avoid burning more CPU than we have to for disk IO. Reported-by: Andres Freund Signed-off-by: Jens Axboe --- fs/io-wq.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 0a5ab1a8f69a..bf8ed1b0b90a 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -535,42 +535,23 @@ next: } while (1); } -static inline void io_worker_spin_for_work(struct io_wqe *wqe) -{ - int i = 0; - - while (++i < 1000) { - if (io_wqe_run_queue(wqe)) - break; - if (need_resched()) - break; - cpu_relax(); - } -} - static int io_wqe_worker(void *data) { struct io_worker *worker = data; struct io_wqe *wqe = worker->wqe; struct io_wq *wq = wqe->wq; - bool did_work; io_worker_start(wqe, worker); - did_work = false; while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) { set_current_state(TASK_INTERRUPTIBLE); loop: - if (did_work) - io_worker_spin_for_work(wqe); spin_lock_irq(&wqe->lock); if (io_wqe_run_queue(wqe)) { __set_current_state(TASK_RUNNING); io_worker_handle_work(worker); - did_work = true; goto loop; } - did_work = false; /* drops the lock on success, retry */ if (__io_worker_idle(wqe, worker)) { __release(&wqe->lock); -- cgit v1.2.3-59-g8ed1b From 4829f89855f1d3a3d8014e74cceab51b421503db Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Sat, 8 Feb 2020 19:01:21 +0800 Subject: drm/amdgpu: fix memory leak during TDR test(v2) fix system memory leak v2: fix coding style Signed-off-by: Monk Liu Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index b06c057a9002..c9e5ce135fd4 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -978,8 +978,12 @@ int smu_v11_0_init_max_sustainable_clocks(struct smu_context *smu) struct smu_11_0_max_sustainable_clocks *max_sustainable_clocks; int ret = 0; - max_sustainable_clocks = kzalloc(sizeof(struct smu_11_0_max_sustainable_clocks), + if (!smu->smu_table.max_sustainable_clocks) + max_sustainable_clocks = kzalloc(sizeof(struct smu_11_0_max_sustainable_clocks), GFP_KERNEL); + else + max_sustainable_clocks = smu->smu_table.max_sustainable_clocks; + smu->smu_table.max_sustainable_clocks = (void *)max_sustainable_clocks; max_sustainable_clocks->uclock = smu->smu_table.boot_values.uclk / 100; -- cgit v1.2.3-59-g8ed1b From a3ed353cf8015ba84a0407a5dc3ffee038166ab0 Mon Sep 17 00:00:00 2001 From: Shirish S Date: Mon, 27 Jan 2020 16:35:24 +0530 Subject: amdgpu/gmc_v9: save/restore sdpif regs during S3 fixes S3 issue with IOMMU + S/G enabled @ 64M VRAM. Suggested-by: Alex Deucher Signed-off-by: Shirish S Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 37 +++++++++++++++++++++- .../drm/amd/include/asic_reg/dce/dce_12_0_offset.h | 2 ++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index d3c27a3c43f6..7546da0cc70c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -195,6 +195,7 @@ struct amdgpu_gmc { uint32_t srbm_soft_reset; bool prt_warning; uint64_t stolen_size; + uint32_t sdpif_register; /* apertures */ u64 shared_aperture_start; u64 shared_aperture_end; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 90216abf14a4..cc0c273a86f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1271,6 +1271,19 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) } } +/** + * gmc_v9_0_restore_registers - restores regs + * + * @adev: amdgpu_device pointer + * + * This restores register values, saved at suspend. + */ +static void gmc_v9_0_restore_registers(struct amdgpu_device *adev) +{ + if (adev->asic_type == CHIP_RAVEN) + WREG32(mmDCHUBBUB_SDPIF_MMIO_CNTRL_0, adev->gmc.sdpif_register); +} + /** * gmc_v9_0_gart_enable - gart enable * @@ -1376,6 +1389,20 @@ static int gmc_v9_0_hw_init(void *handle) return r; } +/** + * gmc_v9_0_save_registers - saves regs + * + * @adev: amdgpu_device pointer + * + * This saves potential register values that should be + * restored upon resume + */ +static void gmc_v9_0_save_registers(struct amdgpu_device *adev) +{ + if (adev->asic_type == CHIP_RAVEN) + adev->gmc.sdpif_register = RREG32(mmDCHUBBUB_SDPIF_MMIO_CNTRL_0); +} + /** * gmc_v9_0_gart_disable - gart disable * @@ -1412,9 +1439,16 @@ static int gmc_v9_0_hw_fini(void *handle) static int gmc_v9_0_suspend(void *handle) { + int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - return gmc_v9_0_hw_fini(adev); + r = gmc_v9_0_hw_fini(adev); + if (r) + return r; + + gmc_v9_0_save_registers(adev); + + return 0; } static int gmc_v9_0_resume(void *handle) @@ -1422,6 +1456,7 @@ static int gmc_v9_0_resume(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + gmc_v9_0_restore_registers(adev); r = gmc_v9_0_hw_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h index b6f74bf4af02..27bb8c1ab858 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h @@ -7376,6 +7376,8 @@ #define mmCRTC4_CRTC_DRR_CONTROL 0x0f3e #define mmCRTC4_CRTC_DRR_CONTROL_BASE_IDX 2 +#define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0 0x395d +#define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0_BASE_IDX 2 // addressBlock: dce_dc_fmt4_dispdec // base address: 0x2000 -- cgit v1.2.3-59-g8ed1b From 93d7c3185893b185e7f4347f8986b9b521254a6e Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Mon, 24 Feb 2020 10:39:11 -0800 Subject: null_blk: remove unused fields in 'nullb_cmd' 'list', 'll_list' and 'csd' are no longer used. The 'list' is not used since it was introduced by commit f2298c0403b0 ("null_blk: multi queue aware block test driver"). The 'll_list' is no longer used since commit 3c395a969acc ("null_blk: set a separate timer for each command"). The 'csd' is no longer used since commit ce2c350b2cfe ("null_blk: use blk_complete_request and blk_mq_complete_request"). Reviewed-by: Bart Van Assche Signed-off-by: Dongli Zhang Signed-off-by: Jens Axboe --- drivers/block/null_blk.h | 3 --- drivers/block/null_blk_main.c | 2 -- 2 files changed, 5 deletions(-) diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h index bc837862b767..62b660821dbc 100644 --- a/drivers/block/null_blk.h +++ b/drivers/block/null_blk.h @@ -14,9 +14,6 @@ #include struct nullb_cmd { - struct list_head list; - struct llist_node ll_list; - struct __call_single_data csd; struct request *rq; struct bio *bio; unsigned int tag; diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 16510795e377..133060431dbd 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -1518,8 +1518,6 @@ static int setup_commands(struct nullb_queue *nq) for (i = 0; i < nq->queue_depth; i++) { cmd = &nq->cmds[i]; - INIT_LIST_HEAD(&cmd->list); - cmd->ll_list.next = NULL; cmd->tag = -1U; } -- cgit v1.2.3-59-g8ed1b From a8e41f6033a0c5633d55d6e35993c9e2005d872f Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 25 Feb 2020 18:05:35 +0800 Subject: icmp: allow icmpv6_ndo_send to work with CONFIG_IPV6=n The icmpv6_send function has long had a static inline implementation with an empty body for CONFIG_IPV6=n, so that code calling it doesn't need to be ifdef'd. The new icmpv6_ndo_send function, which is intended for drivers as a drop-in replacement with an identical function signature, should follow the same pattern. Without this patch, drivers that used to work with CONFIG_IPV6=n now result in a linker error. Cc: Chen Zhou Reported-by: Hulk Robot Fixes: 0b41713b6066 ("icmp: introduce helper for nat'd source address in network device context") Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- include/linux/icmpv6.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 93338fd54af8..33d379602314 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -22,19 +22,23 @@ extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn); int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, unsigned int data_len); +#if IS_ENABLED(CONFIG_NF_NAT) +void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info); +#else +#define icmpv6_ndo_send icmpv6_send +#endif + #else static inline void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) { - } -#endif -#if IS_ENABLED(CONFIG_NF_NAT) -void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info); -#else -#define icmpv6_ndo_send icmpv6_send +static inline void icmpv6_ndo_send(struct sk_buff *skb, + u8 type, u8 code, __u32 info) +{ +} #endif extern int icmpv6_init(void); -- cgit v1.2.3-59-g8ed1b From 2d141dd2caa78fbaf87b57c27769bdc14975ab3d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 25 Feb 2020 11:52:56 -0700 Subject: io-wq: ensure work->task_pid is cleared on init We use ->task_pid for exit cancellation, but we need to ensure it's cleared to zero for io_req_work_grab_env() to do the right thing. Take a suggestion from Bart and clear the whole thing, just setting the function passed in. This makes it more future proof as well. Fixes: 36282881a795 ("io-wq: add io_wq_cancel_pid() to cancel based on a specific pid") Signed-off-by: Jens Axboe --- fs/io-wq.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/fs/io-wq.h b/fs/io-wq.h index ccc7d84af57d..33baba4370c5 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -79,16 +79,10 @@ struct io_wq_work { pid_t task_pid; }; -#define INIT_IO_WORK(work, _func) \ - do { \ - (work)->list.next = NULL; \ - (work)->func = _func; \ - (work)->files = NULL; \ - (work)->mm = NULL; \ - (work)->creds = NULL; \ - (work)->fs = NULL; \ - (work)->flags = 0; \ - } while (0) \ +#define INIT_IO_WORK(work, _func) \ + do { \ + *(work) = (struct io_wq_work){ .func = _func }; \ + } while (0) \ typedef void (get_work_fn)(struct io_wq_work *); typedef void (put_work_fn)(struct io_wq_work *); -- cgit v1.2.3-59-g8ed1b From 2910b5aa6f545c044173a5cab3dbb7f43e23916d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 25 Feb 2020 23:36:41 +0900 Subject: bootconfig: Fix CONFIG_BOOTTIME_TRACING dependency issue Since commit d8a953ddde5e ("bootconfig: Set CONFIG_BOOT_CONFIG=n by default") also changed the CONFIG_BOOTTIME_TRACING to select CONFIG_BOOT_CONFIG to show the boot-time tracing on the menu, it introduced wrong dependencies with BLK_DEV_INITRD as below. WARNING: unmet direct dependencies detected for BOOT_CONFIG Depends on [n]: BLK_DEV_INITRD [=n] Selected by [y]: - BOOTTIME_TRACING [=y] && TRACING_SUPPORT [=y] && FTRACE [=y] && TRACING [=y] This makes the CONFIG_BOOT_CONFIG selects CONFIG_BLK_DEV_INITRD to fix this error and make CONFIG_BOOTTIME_TRACING=n by default, so that both boot-time tracing and boot configuration off but those appear on the menu list. Link: http://lkml.kernel.org/r/158264140162.23842.11237423518607465535.stgit@devnote2 Fixes: d8a953ddde5e ("bootconfig: Set CONFIG_BOOT_CONFIG=n by default") Reported-by: Randy Dunlap Compiled-tested-by: Randy Dunlap Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- init/Kconfig | 2 +- kernel/trace/Kconfig | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index a84e7aa89a29..8b4c3e8c05ea 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1217,7 +1217,7 @@ endif config BOOT_CONFIG bool "Boot config support" - depends on BLK_DEV_INITRD + select BLK_DEV_INITRD help Extra boot config allows system admin to pass a config file as complemental extension of kernel cmdline when booting. diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 795c3e02d3f1..402eef84c859 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -145,7 +145,6 @@ config BOOTTIME_TRACING bool "Boot-time Tracing support" depends on TRACING select BOOT_CONFIG - default y help Enable developer to setup ftrace subsystem via supplemental kernel cmdline at boot time for debugging (tracing) driver -- cgit v1.2.3-59-g8ed1b From 7c69eb84d98a28c428f902318c20c53cf29c9084 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Feb 2020 06:37:23 -0800 Subject: zonefs: fix IOCB_NOWAIT handling IOCB_NOWAIT can't just be ignored as it breaks applications expecting it not to block. Just refuse the operation as applications must handle that (e.g. by falling back to a thread pool). Fixes: 8dcc1a9d90c1 ("fs: New zonefs file system") Signed-off-by: Christoph Hellwig Signed-off-by: Damien Le Moal --- fs/zonefs/super.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 8bc6ef82d693..69aee3dfb660 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -601,13 +601,13 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) ssize_t ret; /* - * For async direct IOs to sequential zone files, ignore IOCB_NOWAIT + * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT * as this can cause write reordering (e.g. the first aio gets EAGAIN * on the inode lock but the second goes through but is now unaligned). */ - if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !is_sync_kiocb(iocb) - && (iocb->ki_flags & IOCB_NOWAIT)) - iocb->ki_flags &= ~IOCB_NOWAIT; + if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !is_sync_kiocb(iocb) && + (iocb->ki_flags & IOCB_NOWAIT)) + return -EOPNOTSUPP; if (iocb->ki_flags & IOCB_NOWAIT) { if (!inode_trylock(inode)) -- cgit v1.2.3-59-g8ed1b From 0dda2ddb7ded1395189e95d43106469687c07795 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 25 Feb 2020 22:03:33 +0100 Subject: zonefs: select FS_IOMAP Zonefs makes use of iomap internally, so it should also select iomap in Kconfig. Signed-off-by: Johannes Thumshirn Signed-off-by: Damien Le Moal --- fs/zonefs/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/zonefs/Kconfig b/fs/zonefs/Kconfig index fb87ad372e29..ef2697b78820 100644 --- a/fs/zonefs/Kconfig +++ b/fs/zonefs/Kconfig @@ -2,6 +2,7 @@ config ZONEFS_FS tristate "zonefs filesystem support" depends on BLOCK depends on BLK_DEV_ZONED + select FS_IOMAP help zonefs is a simple file system which exposes zones of a zoned block device (e.g. host-managed or host-aware SMR disk drives) as files. -- cgit v1.2.3-59-g8ed1b From b5dacc8fb52c690e2cdf7df3ae36bd1cf20e63dd Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 21 Feb 2020 12:54:14 +0200 Subject: drm/i915: fix header test with GCOV $(CC) with $(CFLAGS_GCOV) assumes the output filename with .gcno suffix appended is writable. This is not the case when the output filename is /dev/null: HDRTEST drivers/gpu/drm/i915/display/intel_frontbuffer.h /dev/null:1:0: error: cannot open /dev/null.gcno HDRTEST drivers/gpu/drm/i915/display/intel_ddi.h /dev/null:1:0: error: cannot open /dev/null.gcno make[5]: *** [../drivers/gpu/drm/i915/Makefile:307: drivers/gpu/drm/i915/display/intel_ddi.hdrtest] Error 1 make[5]: *** Waiting for unfinished jobs.... make[5]: *** [../drivers/gpu/drm/i915/Makefile:307: drivers/gpu/drm/i915/display/intel_frontbuffer.hdrtest] Error 1 Filter out $(CFLAGS_GVOC) from the header test $(c_flags) as they don't make sense here anyway. References: http://lore.kernel.org/r/d8112767-4089-4c58-d7d3-2ce03139858a@infradead.org Reported-by: Randy Dunlap Fixes: c6d4a099a240 ("drm/i915: reimplement header test feature") Cc: Masahiro Yamada Acked-by: Randy Dunlap Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200221105414.14358-1-jani.nikula@intel.com (cherry picked from commit 408c1b3253dab93da175690dc0e21dd8bccf3371) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index b8c5f8934dbd..a1f2411aa21b 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -294,7 +294,7 @@ extra-$(CONFIG_DRM_I915_WERROR) += \ $(shell cd $(srctree)/$(src) && find * -name '*.h'))) quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@) - cmd_hdrtest = $(CC) $(c_flags) -S -o /dev/null -x c /dev/null -include $<; touch $@ + cmd_hdrtest = $(CC) $(filter-out $(CFLAGS_GCOV), $(c_flags)) -S -o /dev/null -x c /dev/null -include $<; touch $@ $(obj)/%.hdrtest: $(src)/%.h FORCE $(call if_changed_dep,hdrtest) -- cgit v1.2.3-59-g8ed1b From eee18939e5767dbe3a98b3ea172e7fd7ba7d403c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 24 Feb 2020 10:11:20 +0000 Subject: drm/i915/gtt: Downgrade gen7 (ivb, byt, hsw) back to aliasing-ppgtt Full-ppgtt on gen7 is proving to be highly unstable and not robust. Closes: https://gitlab.freedesktop.org/drm/intel/issues/694 Fixes: 3cd6e8860ecd ("drm/i915/gen7: Re-enable full-ppgtt for ivb & hsw") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Jani Nikula Cc: Dave Airlie Acked-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20200224101120.4024481-1-chris@chris-wilson.co.uk (cherry picked from commit 4fbe112a569526e46fa2accb5763c069f78cb431) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 83f01401b8b5..f631f6d21127 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -437,7 +437,7 @@ static const struct intel_device_info snb_m_gt2_info = { .has_rc6 = 1, \ .has_rc6p = 1, \ .has_rps = true, \ - .ppgtt_type = INTEL_PPGTT_FULL, \ + .ppgtt_type = INTEL_PPGTT_ALIASING, \ .ppgtt_size = 31, \ IVB_PIPE_OFFSETS, \ IVB_CURSOR_OFFSETS, \ @@ -494,7 +494,7 @@ static const struct intel_device_info vlv_info = { .has_rps = true, .display.has_gmch = 1, .display.has_hotplug = 1, - .ppgtt_type = INTEL_PPGTT_FULL, + .ppgtt_type = INTEL_PPGTT_ALIASING, .ppgtt_size = 31, .has_snoop = true, .has_coherent_ggtt = false, -- cgit v1.2.3-59-g8ed1b From 19ee5e8da6129d8d828201a12264ab3d09153ec4 Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Wed, 19 Feb 2020 17:18:21 +0100 Subject: drm/i915/pmu: Avoid using globals for CPU hotplug state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Attempting to bind / unbind module from devices where we have both integrated and discreete GPU handled by i915 can lead to leaks and warnings from cpuhp: Error: Removing state XXX which has instances left. Let's move the state to i915_pmu. Fixes: 05488673a4d4 ("drm/i915/pmu: Support multiple GPUs") Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Michal Wajdeczko Cc: Tvrtko Ursulin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200219161822.24592-1-michal.winiarski@intel.com (cherry picked from commit f5a179d4687d4e7bfadd7cbda7ee5d0bad76761f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_pmu.c | 18 +++++++++--------- drivers/gpu/drm/i915/i915_pmu.h | 7 +++++-- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index ec0299490dd4..84301004d5c0 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -1042,7 +1042,7 @@ static void free_event_attributes(struct i915_pmu *pmu) static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) { - struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); + struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); GEM_BUG_ON(!pmu->base.event_init); @@ -1055,7 +1055,7 @@ static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) { - struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); + struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); unsigned int target; GEM_BUG_ON(!pmu->base.event_init); @@ -1072,8 +1072,6 @@ static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) return 0; } -static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; - static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu) { enum cpuhp_state slot; @@ -1087,21 +1085,22 @@ static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu) return ret; slot = ret; - ret = cpuhp_state_add_instance(slot, &pmu->node); + ret = cpuhp_state_add_instance(slot, &pmu->cpuhp.node); if (ret) { cpuhp_remove_multi_state(slot); return ret; } - cpuhp_slot = slot; + pmu->cpuhp.slot = slot; return 0; } static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu) { - WARN_ON(cpuhp_slot == CPUHP_INVALID); - WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &pmu->node)); - cpuhp_remove_multi_state(cpuhp_slot); + WARN_ON(pmu->cpuhp.slot == CPUHP_INVALID); + WARN_ON(cpuhp_state_remove_instance(pmu->cpuhp.slot, &pmu->cpuhp.node)); + cpuhp_remove_multi_state(pmu->cpuhp.slot); + pmu->cpuhp.slot = CPUHP_INVALID; } static bool is_igp(struct drm_i915_private *i915) @@ -1128,6 +1127,7 @@ void i915_pmu_register(struct drm_i915_private *i915) spin_lock_init(&pmu->lock); hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); pmu->timer.function = i915_sample; + pmu->cpuhp.slot = CPUHP_INVALID; if (!is_igp(i915)) { pmu->name = kasprintf(GFP_KERNEL, diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 6c1647c5daf2..207058391cec 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -39,9 +39,12 @@ struct i915_pmu_sample { struct i915_pmu { /** - * @node: List node for CPU hotplug handling. + * @cpuhp: Struct used for CPU hotplug handling. */ - struct hlist_node node; + struct { + struct hlist_node node; + enum cpuhp_state slot; + } cpuhp; /** * @base: PMU base. */ -- cgit v1.2.3-59-g8ed1b From 2de0147d77168d6a227c00eb9c5a49374e1582a3 Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Wed, 19 Feb 2020 17:18:22 +0100 Subject: drm/i915/pmu: Avoid using globals for PMU events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Attempting to bind / unbind module from devices where we have both integrated and discreete GPU handled by i915, will cause us to try and double free the global state, hitting null ptr deref in free_event_attributes. Let's move it to i915_pmu. Fixes: 05488673a4d4 ("drm/i915/pmu: Support multiple GPUs") Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Michal Wajdeczko Cc: Tvrtko Ursulin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200219161822.24592-2-michal.winiarski@intel.com (cherry picked from commit 46129dc10f47c5c2b51c93a82b7b2aca46574ae0) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_pmu.c | 41 ++++++++++++++++++++++------------------- drivers/gpu/drm/i915/i915_pmu.h | 4 ++++ 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 84301004d5c0..aa729d04abe2 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -822,11 +822,6 @@ static ssize_t i915_pmu_event_show(struct device *dev, return sprintf(buf, "config=0x%lx\n", eattr->val); } -static struct attribute_group i915_pmu_events_attr_group = { - .name = "events", - /* Patch in attrs at runtime. */ -}; - static ssize_t i915_pmu_get_attr_cpumask(struct device *dev, struct device_attribute *attr, @@ -846,13 +841,6 @@ static const struct attribute_group i915_pmu_cpumask_attr_group = { .attrs = i915_cpumask_attrs, }; -static const struct attribute_group *i915_pmu_attr_groups[] = { - &i915_pmu_format_attr_group, - &i915_pmu_events_attr_group, - &i915_pmu_cpumask_attr_group, - NULL -}; - #define __event(__config, __name, __unit) \ { \ .config = (__config), \ @@ -1026,16 +1014,16 @@ err_alloc: static void free_event_attributes(struct i915_pmu *pmu) { - struct attribute **attr_iter = i915_pmu_events_attr_group.attrs; + struct attribute **attr_iter = pmu->events_attr_group.attrs; for (; *attr_iter; attr_iter++) kfree((*attr_iter)->name); - kfree(i915_pmu_events_attr_group.attrs); + kfree(pmu->events_attr_group.attrs); kfree(pmu->i915_attr); kfree(pmu->pmu_attr); - i915_pmu_events_attr_group.attrs = NULL; + pmu->events_attr_group.attrs = NULL; pmu->i915_attr = NULL; pmu->pmu_attr = NULL; } @@ -1117,6 +1105,13 @@ static bool is_igp(struct drm_i915_private *i915) void i915_pmu_register(struct drm_i915_private *i915) { struct i915_pmu *pmu = &i915->pmu; + const struct attribute_group *attr_groups[] = { + &i915_pmu_format_attr_group, + &pmu->events_attr_group, + &i915_pmu_cpumask_attr_group, + NULL + }; + int ret = -ENOMEM; if (INTEL_GEN(i915) <= 2) { @@ -1143,11 +1138,16 @@ void i915_pmu_register(struct drm_i915_private *i915) if (!pmu->name) goto err; - i915_pmu_events_attr_group.attrs = create_event_attributes(pmu); - if (!i915_pmu_events_attr_group.attrs) + pmu->events_attr_group.name = "events"; + pmu->events_attr_group.attrs = create_event_attributes(pmu); + if (!pmu->events_attr_group.attrs) goto err_name; - pmu->base.attr_groups = i915_pmu_attr_groups; + pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups), + GFP_KERNEL); + if (!pmu->base.attr_groups) + goto err_attr; + pmu->base.task_ctx_nr = perf_invalid_context; pmu->base.event_init = i915_pmu_event_init; pmu->base.add = i915_pmu_event_add; @@ -1159,7 +1159,7 @@ void i915_pmu_register(struct drm_i915_private *i915) ret = perf_pmu_register(&pmu->base, pmu->name, -1); if (ret) - goto err_attr; + goto err_groups; ret = i915_pmu_register_cpuhp_state(pmu); if (ret) @@ -1169,6 +1169,8 @@ void i915_pmu_register(struct drm_i915_private *i915) err_unreg: perf_pmu_unregister(&pmu->base); +err_groups: + kfree(pmu->base.attr_groups); err_attr: pmu->base.event_init = NULL; free_event_attributes(pmu); @@ -1194,6 +1196,7 @@ void i915_pmu_unregister(struct drm_i915_private *i915) perf_pmu_unregister(&pmu->base); pmu->base.event_init = NULL; + kfree(pmu->base.attr_groups); if (!is_igp(i915)) kfree(pmu->name); free_event_attributes(pmu); diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 207058391cec..f1d6cad0d7d5 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -107,6 +107,10 @@ struct i915_pmu { * @sleep_last: Last time GT parked for RC6 estimation. */ ktime_t sleep_last; + /** + * @events_attr_group: Device events attribute group. + */ + struct attribute_group events_attr_group; /** * @i915_attr: Memory block holding device attributes. */ -- cgit v1.2.3-59-g8ed1b From 238734262142075056653b4de091458e0ca858f2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 21 Feb 2020 22:18:18 +0000 Subject: drm/i915: Avoid recursing onto active vma from the shrinker We mark the vma as active while binding it in order to protect outselves from being shrunk under mempressure. This only works if we are strict in not attempting to shrink active objects. <6> [472.618968] Workqueue: events_unbound fence_work [i915] <4> [472.618970] Call Trace: <4> [472.618974] ? __schedule+0x2e5/0x810 <4> [472.618978] schedule+0x37/0xe0 <4> [472.618982] schedule_preempt_disabled+0xf/0x20 <4> [472.618984] __mutex_lock+0x281/0x9c0 <4> [472.618987] ? mark_held_locks+0x49/0x70 <4> [472.618989] ? _raw_spin_unlock_irqrestore+0x47/0x60 <4> [472.619038] ? i915_vma_unbind+0xae/0x110 [i915] <4> [472.619084] ? i915_vma_unbind+0xae/0x110 [i915] <4> [472.619122] i915_vma_unbind+0xae/0x110 [i915] <4> [472.619165] i915_gem_object_unbind+0x1dc/0x400 [i915] <4> [472.619208] i915_gem_shrink+0x328/0x660 [i915] <4> [472.619250] ? i915_gem_shrink_all+0x38/0x60 [i915] <4> [472.619282] i915_gem_shrink_all+0x38/0x60 [i915] <4> [472.619325] vm_alloc_page.constprop.25+0x1aa/0x240 [i915] <4> [472.619330] ? rcu_read_lock_sched_held+0x4d/0x80 <4> [472.619363] ? __alloc_pd+0xb/0x30 [i915] <4> [472.619366] ? module_assert_mutex_or_preempt+0xf/0x30 <4> [472.619368] ? __module_address+0x23/0xe0 <4> [472.619371] ? is_module_address+0x26/0x40 <4> [472.619374] ? static_obj+0x34/0x50 <4> [472.619376] ? lockdep_init_map+0x4d/0x1e0 <4> [472.619407] setup_page_dma+0xd/0x90 [i915] <4> [472.619437] alloc_pd+0x29/0x50 [i915] <4> [472.619470] __gen8_ppgtt_alloc+0x443/0x6b0 [i915] <4> [472.619503] gen8_ppgtt_alloc+0xd7/0x300 [i915] <4> [472.619535] ppgtt_bind_vma+0x2a/0xe0 [i915] <4> [472.619577] __vma_bind+0x26/0x40 [i915] <4> [472.619611] fence_work+0x1c/0x90 [i915] <4> [472.619617] process_one_work+0x26a/0x620 Fixes: 2850748ef876 ("drm/i915: Pull i915_vma_pin under the vm->mutex") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200221221818.2861432-1-chris@chris-wilson.co.uk (cherry picked from commit 6f24e41022f28061368776ea1514db0a6e67a9b1) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index f7e4b39c734f..59b387ade49c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -256,8 +256,7 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *i915) with_intel_runtime_pm(&i915->runtime_pm, wakeref) { freed = i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_ACTIVE); + I915_SHRINK_UNBOUND); } return freed; @@ -336,7 +335,6 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) freed_pages = 0; with_intel_runtime_pm(&i915->runtime_pm, wakeref) freed_pages += i915_gem_shrink(i915, -1UL, NULL, - I915_SHRINK_ACTIVE | I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_WRITEBACK); -- cgit v1.2.3-59-g8ed1b From 212d58c106fd0f2704664be2bb173e14cb4e86d3 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Fri, 21 Feb 2020 03:04:21 +0100 Subject: nft_set_pipapo: Actually fetch key data in nft_pipapo_remove() Phil reports that adding elements, flushing and re-adding them right away: nft add table t '{ set s { type ipv4_addr . inet_service; flags interval; }; }' nft add element t s '{ 10.0.0.1 . 22-25, 10.0.0.1 . 10-20 }' nft flush set t s nft add element t s '{ 10.0.0.1 . 10-20, 10.0.0.1 . 22-25 }' triggers, almost reliably, a crash like this one: [ 71.319848] general protection fault, probably for non-canonical address 0x6f6b6e696c2e756e: 0000 [#1] PREEMPT SMP PTI [ 71.321540] CPU: 3 PID: 1201 Comm: kworker/3:2 Not tainted 5.6.0-rc1-00377-g2bb07f4e1d861 #192 [ 71.322746] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190711_202441-buildvm-armv7-10.arm.fedoraproject.org-2.fc31 04/01/2014 [ 71.324430] Workqueue: events nf_tables_trans_destroy_work [nf_tables] [ 71.325387] RIP: 0010:nft_set_elem_destroy+0xa5/0x110 [nf_tables] [ 71.326164] Code: 89 d4 84 c0 74 0e 8b 77 44 0f b6 f8 48 01 df e8 41 ff ff ff 45 84 e4 74 36 44 0f b6 63 08 45 84 e4 74 2c 49 01 dc 49 8b 04 24 <48> 8b 40 38 48 85 c0 74 4f 48 89 e7 4c 8b [ 71.328423] RSP: 0018:ffffc9000226fd90 EFLAGS: 00010282 [ 71.329225] RAX: 6f6b6e696c2e756e RBX: ffff88813ab79f60 RCX: ffff88813931b5a0 [ 71.330365] RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff88813ab79f9a [ 71.331473] RBP: ffff88813ab79f60 R08: 0000000000000008 R09: 0000000000000000 [ 71.332627] R10: 000000000000021c R11: 0000000000000000 R12: ffff88813ab79fc2 [ 71.333615] R13: ffff88813b3adf50 R14: dead000000000100 R15: ffff88813931b8a0 [ 71.334596] FS: 0000000000000000(0000) GS:ffff88813bd80000(0000) knlGS:0000000000000000 [ 71.335780] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 71.336577] CR2: 000055ac683710f0 CR3: 000000013a222003 CR4: 0000000000360ee0 [ 71.337533] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 71.338557] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 71.339718] Call Trace: [ 71.340093] nft_pipapo_destroy+0x7a/0x170 [nf_tables_set] [ 71.340973] nft_set_destroy+0x20/0x50 [nf_tables] [ 71.341879] nf_tables_trans_destroy_work+0x246/0x260 [nf_tables] [ 71.342916] process_one_work+0x1d5/0x3c0 [ 71.343601] worker_thread+0x4a/0x3c0 [ 71.344229] kthread+0xfb/0x130 [ 71.344780] ? process_one_work+0x3c0/0x3c0 [ 71.345477] ? kthread_park+0x90/0x90 [ 71.346129] ret_from_fork+0x35/0x40 [ 71.346748] Modules linked in: nf_tables_set nf_tables nfnetlink 8021q [last unloaded: nfnetlink] [ 71.348153] ---[ end trace 2eaa8149ca759bcc ]--- [ 71.349066] RIP: 0010:nft_set_elem_destroy+0xa5/0x110 [nf_tables] [ 71.350016] Code: 89 d4 84 c0 74 0e 8b 77 44 0f b6 f8 48 01 df e8 41 ff ff ff 45 84 e4 74 36 44 0f b6 63 08 45 84 e4 74 2c 49 01 dc 49 8b 04 24 <48> 8b 40 38 48 85 c0 74 4f 48 89 e7 4c 8b [ 71.350017] RSP: 0018:ffffc9000226fd90 EFLAGS: 00010282 [ 71.350019] RAX: 6f6b6e696c2e756e RBX: ffff88813ab79f60 RCX: ffff88813931b5a0 [ 71.350019] RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff88813ab79f9a [ 71.350020] RBP: ffff88813ab79f60 R08: 0000000000000008 R09: 0000000000000000 [ 71.350021] R10: 000000000000021c R11: 0000000000000000 R12: ffff88813ab79fc2 [ 71.350022] R13: ffff88813b3adf50 R14: dead000000000100 R15: ffff88813931b8a0 [ 71.350025] FS: 0000000000000000(0000) GS:ffff88813bd80000(0000) knlGS:0000000000000000 [ 71.350026] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 71.350027] CR2: 000055ac683710f0 CR3: 000000013a222003 CR4: 0000000000360ee0 [ 71.350028] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 71.350028] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 71.350030] Kernel panic - not syncing: Fatal exception [ 71.350412] Kernel Offset: disabled [ 71.365922] ---[ end Kernel panic - not syncing: Fatal exception ]--- which is caused by dangling elements that have been deactivated, but never removed. On a flush operation, nft_pipapo_walk() walks through all the elements in the mapping table, which are then deactivated by nft_flush_set(), one by one, and added to the commit list for removal. Element data is then freed. On transaction commit, nft_pipapo_remove() is called, and failed to remove these elements, leading to the stale references in the mapping. The first symptom of this, revealed by KASan, is a one-byte use-after-free in subsequent calls to nft_pipapo_walk(), which is usually not enough to trigger a panic. When stale elements are used more heavily, though, such as double-free via nft_pipapo_destroy() as in Phil's case, the problem becomes more noticeable. The issue comes from that fact that, on a flush operation, nft_pipapo_remove() won't get the actual key data via elem->key, elements to be deleted upon commit won't be found by the lookup via pipapo_get(), and removal will be skipped. Key data should be fetched via nft_set_ext_key(), instead. Reported-by: Phil Sutter Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Signed-off-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index feac8553f6d9..4fc0c924ed5d 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1766,11 +1766,13 @@ static bool pipapo_match_field(struct nft_pipapo_field *f, static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { - const u8 *data = (const u8 *)elem->key.val.data; struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m = priv->clone; + struct nft_pipapo_elem *e = elem->priv; int rules_f0, first_rule = 0; - struct nft_pipapo_elem *e; + const u8 *data; + + data = (const u8 *)nft_set_ext_key(&e->ext); e = pipapo_get(net, set, data, 0); if (IS_ERR(e)) -- cgit v1.2.3-59-g8ed1b From 0954df70fba743d8cdaa09ccf6ba8e4ad09628de Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Fri, 21 Feb 2020 03:04:22 +0100 Subject: selftests: nft_concat_range: Add test for reported add/flush/add issue Add a specific test for the crash reported by Phil Sutter and addressed in the previous patch. The test cases that, in my intention, should have covered these cases, that is, the ones from the 'concurrency' section, don't run these sequences tightly enough and spectacularly failed to catch this. While at it, define a convenient way to add these kind of tests, by adding a "reported issues" test section. It's more convenient, for this particular test, to execute the set setup in its own function. However, future test cases like this one might need to call setup functions, and will typically need no tools other than nft, so allow for this in check_tools(). The original form of the reproducer used here was provided by Phil. Reported-by: Phil Sutter Signed-off-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- .../selftests/netfilter/nft_concat_range.sh | 43 ++++++++++++++++++++-- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh index 5c1033ee1b39..5a4938d6dcf2 100755 --- a/tools/testing/selftests/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/netfilter/nft_concat_range.sh @@ -13,11 +13,12 @@ KSELFTEST_SKIP=4 # Available test groups: +# - reported_issues: check for issues that were reported in the past # - correctness: check that packets match given entries, and only those # - concurrency: attempt races between insertion, deletion and lookup # - timeout: check that packets match entries until they expire # - performance: estimate matching rate, compare with rbtree and hash baselines -TESTS="correctness concurrency timeout" +TESTS="reported_issues correctness concurrency timeout" [ "${quicktest}" != "1" ] && TESTS="${TESTS} performance" # Set types, defined by TYPE_ variables below @@ -25,6 +26,9 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto net_port_net net_mac net_mac_icmp net6_mac_icmp net6_port_net6_port net_port_mac_proto_net" +# Reported bugs, also described by TYPE_ variables below +BUGS="flush_remove_add" + # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" ../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh @@ -327,6 +331,12 @@ flood_spec ip daddr . tcp dport . meta l4proto . ip saddr perf_duration 0 " +# Definition of tests for bugs reported in the past: +# display display text for test report +TYPE_flush_remove_add=" +display Add two elements, flush, re-add +" + # Set template for all tests, types and rules are filled in depending on test set_template=' flush ruleset @@ -440,6 +450,8 @@ setup_set() { # Check that at least one of the needed tools is available check_tools() { + [ -z "${tools}" ] && return 0 + __tools= for tool in ${tools}; do if [ "${tool}" = "nc" ] && [ "${proto}" = "udp6" ] && \ @@ -1430,6 +1442,23 @@ test_performance() { kill "${perf_pid}" } +test_bug_flush_remove_add() { + set_cmd='{ set s { type ipv4_addr . inet_service; flags interval; }; }' + elem1='{ 10.0.0.1 . 22-25, 10.0.0.1 . 10-20 }' + elem2='{ 10.0.0.1 . 10-20, 10.0.0.1 . 22-25 }' + for i in `seq 1 100`; do + nft add table t ${set_cmd} || return ${KSELFTEST_SKIP} + nft add element t s ${elem1} 2>/dev/null || return 1 + nft flush set t s 2>/dev/null || return 1 + nft add element t s ${elem2} 2>/dev/null || return 1 + done + nft flush ruleset +} + +test_reported_issues() { + eval test_bug_"${subtest}" +} + # Run everything in a separate network namespace [ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; } tmp="$(mktemp)" @@ -1438,9 +1467,15 @@ trap cleanup EXIT # Entry point for test runs passed=0 for name in ${TESTS}; do - printf "TEST: %s\n" "${name}" - for type in ${TYPES}; do - eval desc=\$TYPE_"${type}" + printf "TEST: %s\n" "$(echo ${name} | tr '_' ' ')" + if [ "${name}" = "reported_issues" ]; then + SUBTESTS="${BUGS}" + else + SUBTESTS="${TYPES}" + fi + + for subtest in ${SUBTESTS}; do + eval desc=\$TYPE_"${subtest}" IFS=' ' for __line in ${desc}; do -- cgit v1.2.3-59-g8ed1b From 2a44f46781617c5040372b59da33553a02b1f46d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 25 Feb 2020 13:25:41 -0700 Subject: io_uring: pick up link work on submit reference drop If work completes inline, then we should pick up a dependent link item in __io_queue_sqe() as well. If we don't do so, we're forced to go async with that item, which is suboptimal. This also fixes an issue with io_put_req_find_next(), which always looks up the next work item. That should only be done if we're dropping the last reference to the request, to prevent multiple lookups of the same work item. Outside of being a fix, this also enables a good cleanup series for 5.7, where we never have to pass 'nxt' around or into the work handlers. Reviewed-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index ffd9bfa84d86..f79ca494bb56 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1483,10 +1483,10 @@ static void io_free_req(struct io_kiocb *req) __attribute__((nonnull)) static void io_put_req_find_next(struct io_kiocb *req, struct io_kiocb **nxtptr) { - io_req_find_next(req, nxtptr); - - if (refcount_dec_and_test(&req->refs)) + if (refcount_dec_and_test(&req->refs)) { + io_req_find_next(req, nxtptr); __io_free_req(req); + } } static void io_put_req(struct io_kiocb *req) @@ -4749,7 +4749,7 @@ punt: err: /* drop submission reference */ - io_put_req(req); + io_put_req_find_next(req, &nxt); if (linked_timeout) { if (!ret) -- cgit v1.2.3-59-g8ed1b From 3a9015988b3d41027cda61f4fdeaaeee73be8b24 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 25 Feb 2020 17:48:55 -0700 Subject: io_uring: import_single_range() returns 0/-ERROR Unlike the other core import helpers, import_single_range() returns 0 on success, not the length imported. This means that links that depend on the result of non-vec based IORING_OP_{READ,WRITE} that were added for 5.5 get errored when they should not be. Fixes: 3a6820f2bb8a ("io_uring: add non-vectored read/write commands") Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index f79ca494bb56..36917c0101fd 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2075,7 +2075,7 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req, ssize_t ret; ret = import_single_range(rw, buf, sqe_len, *iovec, iter); *iovec = NULL; - return ret; + return ret < 0 ? ret : sqe_len; } if (req->io) { -- cgit v1.2.3-59-g8ed1b From 505b12b3861bc79d1b81c815faaf4910469a7006 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 12 Feb 2020 20:40:57 -0800 Subject: kbuild: add comment for V=2 mode Complete the comments for valid values of KBUILD_VERBOSE, specifically for KBUILD_VERBOSE=2. Signed-off-by: Randy Dunlap Signed-off-by: Masahiro Yamada --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 0914049d2929..2afa692b39ff 100644 --- a/Makefile +++ b/Makefile @@ -68,6 +68,7 @@ unexport GREP_OPTIONS # # If KBUILD_VERBOSE equals 0 then the above command will be hidden. # If KBUILD_VERBOSE equals 1 then the above command is displayed. +# If KBUILD_VERBOSE equals 2 then give the reason why each target is rebuilt. # # To put more focus on warnings, be less verbose as default # Use 'make V=1' to see the full commands -- cgit v1.2.3-59-g8ed1b From eccbde4f6c2b6ebc52b3e9103e6f2f73f5a9f79a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 19 Feb 2020 10:15:19 +0900 Subject: kbuild: remove wrong documentation about mandatory-y This sentence does not make sense in the section about mandatory-y. This seems to be a copy-paste mistake of commit fcc8487d477a ("uapi: export all headers under uapi directories"). The correct description would be "The convention is to list one mandatory-y per line ...". I just removed it instead of fixing it. If such information is needed, it could be commented in include/asm-generic/Kbuild and include/uapi/asm-generic/Kbuild. Signed-off-by: Masahiro Yamada --- Documentation/kbuild/makefiles.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index 0e0eb2c8da7d..4018ad7c7a11 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -1379,9 +1379,6 @@ See subsequent chapter for the syntax of the Kbuild file. in arch/$(ARCH)/include/(uapi/)/asm, Kbuild will automatically generate a wrapper of the asm-generic one. - The convention is to list one subdir per line and - preferably in alphabetic order. - 8 Kbuild Variables ================== -- cgit v1.2.3-59-g8ed1b From 7a04960560640ac5b0b89461f7757322b57d0c7a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 23 Feb 2020 04:04:31 +0900 Subject: kbuild: fix DT binding schema rule to detect command line changes This if_change_rule is not working properly; it cannot detect any command line change. The reason is because cmd-check in scripts/Kbuild.include compares $(cmd_$@) and $(cmd_$1), but cmd_dtc_dt_yaml does not exist here. For if_change_rule to work properly, the stem part of cmd_* and rule_* must match. Because this cmd_and_fixdep invokes cmd_dtc, this rule must be named rule_dtc. Fixes: 4f0e3a57d6eb ("kbuild: Add support for DT binding schema checks") Signed-off-by: Masahiro Yamada Acked-by: Rob Herring --- scripts/Makefile.lib | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index bae62549e3d2..64b938c10039 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -302,13 +302,13 @@ DT_TMP_SCHEMA := $(objtree)/$(DT_BINDING_DIR)/processed-schema.yaml quiet_cmd_dtb_check = CHECK $@ cmd_dtb_check = $(DT_CHECKER) -u $(srctree)/$(DT_BINDING_DIR) -p $(DT_TMP_SCHEMA) $@ ; -define rule_dtc_dt_yaml +define rule_dtc $(call cmd_and_fixdep,dtc,yaml) $(call cmd,dtb_check) endef $(obj)/%.dt.yaml: $(src)/%.dts $(DTC) $(DT_TMP_SCHEMA) FORCE - $(call if_changed_rule,dtc_dt_yaml) + $(call if_changed_rule,dtc) dtc-tmp = $(subst $(comma),_,$(dot-target).dts.tmp) -- cgit v1.2.3-59-g8ed1b From fd63fab48f143f73b534821408a303241ed174f9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 23 Feb 2020 04:04:32 +0900 Subject: kbuild: remove unneeded semicolon at the end of cmd_dtb_check This trailing semicolon is unneeded. Signed-off-by: Masahiro Yamada Acked-by: Rob Herring --- scripts/Makefile.lib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 64b938c10039..752ff0a225a9 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -300,7 +300,7 @@ DT_BINDING_DIR := Documentation/devicetree/bindings DT_TMP_SCHEMA := $(objtree)/$(DT_BINDING_DIR)/processed-schema.yaml quiet_cmd_dtb_check = CHECK $@ - cmd_dtb_check = $(DT_CHECKER) -u $(srctree)/$(DT_BINDING_DIR) -p $(DT_TMP_SCHEMA) $@ ; + cmd_dtb_check = $(DT_CHECKER) -u $(srctree)/$(DT_BINDING_DIR) -p $(DT_TMP_SCHEMA) $@ define rule_dtc $(call cmd_and_fixdep,dtc,yaml) -- cgit v1.2.3-59-g8ed1b From 964a596db8db8c77c9903dd05655696696e6b3ad Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 23 Feb 2020 04:04:33 +0900 Subject: kbuild: add dtbs_check to PHONY The dtbs_check should be a phony target, but currently it is not specified so. 'make dtbs_check' works even if a file named 'dtbs_check' exists because it depends on another phony target, scripts_dtc, but we should not rely on it. Add dtbs_check to PHONY. Signed-off-by: Masahiro Yamada Acked-by: Rob Herring --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2afa692b39ff..51df13599211 100644 --- a/Makefile +++ b/Makefile @@ -1239,7 +1239,7 @@ ifneq ($(dtstree),) %.dtb: include/config/kernel.release scripts_dtc $(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@ -PHONY += dtbs dtbs_install dt_binding_check +PHONY += dtbs dtbs_install dtbs_check dt_binding_check dtbs dtbs_check: include/config/kernel.release scripts_dtc $(Q)$(MAKE) $(build)=$(dtstree) -- cgit v1.2.3-59-g8ed1b From c473a8d03ea8e03ca9d649b0b6d72fbcf6212c05 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 23 Feb 2020 04:04:34 +0900 Subject: kbuild: add dt_binding_check to PHONY in a correct place The dt_binding_check is added to PHONY, but it is invisible when $(dtstree) is empty. So, it is not specified as phony for ARCH=x86 etc. Add it to PHONY outside the ifneq ... endif block. Signed-off-by: Masahiro Yamada Acked-by: Rob Herring --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 51df13599211..1a1a0d271697 100644 --- a/Makefile +++ b/Makefile @@ -1239,7 +1239,7 @@ ifneq ($(dtstree),) %.dtb: include/config/kernel.release scripts_dtc $(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@ -PHONY += dtbs dtbs_install dtbs_check dt_binding_check +PHONY += dtbs dtbs_install dtbs_check dtbs dtbs_check: include/config/kernel.release scripts_dtc $(Q)$(MAKE) $(build)=$(dtstree) @@ -1259,6 +1259,7 @@ PHONY += scripts_dtc scripts_dtc: scripts_basic $(Q)$(MAKE) $(build)=scripts/dtc +PHONY += dt_binding_check dt_binding_check: scripts_dtc $(Q)$(MAKE) $(build)=Documentation/devicetree/bindings -- cgit v1.2.3-59-g8ed1b From cae740a04b4d6d5166f19ee5faf04ea2a1f34b3d Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 26 Feb 2020 20:10:15 +0800 Subject: blk-mq: Remove some unused function arguments The struct blk_mq_hw_ctx pointer argument in blk_mq_put_tag(), blk_mq_poll_nsecs(), and blk_mq_poll_hybrid_sleep() is unused, so remove it. Overall obj code size shows a minor reduction, before: text data bss dec hex filename 27306 1312 0 28618 6fca block/blk-mq.o 4303 272 0 4575 11df block/blk-mq-tag.o after: 27282 1312 0 28594 6fb2 block/blk-mq.o 4311 272 0 4583 11e7 block/blk-mq-tag.o Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: John Garry -- This minor patch had been carried as part of the blk-mq shared tags RFC, I'd rather not carry it anymore as it required rebasing, so now or never.. Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 4 ++-- block/blk-mq-tag.h | 4 ++-- block/blk-mq.c | 10 ++++------ block/blk-mq.h | 2 +- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index fbacde454718..586c9d6e904a 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -183,8 +183,8 @@ found_tag: return tag + tag_offset; } -void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags, - struct blk_mq_ctx *ctx, unsigned int tag) +void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, + unsigned int tag) { if (!blk_mq_tag_is_reserved(tags, tag)) { const int real_tag = tag - tags->nr_reserved_tags; diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 15bc74acb57e..2b8321efb682 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -26,8 +26,8 @@ extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int r extern void blk_mq_free_tags(struct blk_mq_tags *tags); extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); -extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags, - struct blk_mq_ctx *ctx, unsigned int tag); +extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, + unsigned int tag); extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags **tags, unsigned int depth, bool can_grow); diff --git a/block/blk-mq.c b/block/blk-mq.c index 5e1e4151cb51..d92088dec6c3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -477,9 +477,9 @@ static void __blk_mq_free_request(struct request *rq) blk_pm_mark_last_busy(rq); rq->mq_hctx = NULL; if (rq->tag != -1) - blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag); + blk_mq_put_tag(hctx->tags, ctx, rq->tag); if (sched_tag != -1) - blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag); + blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag); blk_mq_sched_restart(hctx); blk_queue_exit(q); } @@ -3402,7 +3402,6 @@ static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb) } static unsigned long blk_mq_poll_nsecs(struct request_queue *q, - struct blk_mq_hw_ctx *hctx, struct request *rq) { unsigned long ret = 0; @@ -3435,7 +3434,6 @@ static unsigned long blk_mq_poll_nsecs(struct request_queue *q, } static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, - struct blk_mq_hw_ctx *hctx, struct request *rq) { struct hrtimer_sleeper hs; @@ -3455,7 +3453,7 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, if (q->poll_nsec > 0) nsecs = q->poll_nsec; else - nsecs = blk_mq_poll_nsecs(q, hctx, rq); + nsecs = blk_mq_poll_nsecs(q, rq); if (!nsecs) return false; @@ -3510,7 +3508,7 @@ static bool blk_mq_poll_hybrid(struct request_queue *q, return false; } - return blk_mq_poll_hybrid_sleep(q, hctx, rq); + return blk_mq_poll_hybrid_sleep(q, rq); } /** diff --git a/block/blk-mq.h b/block/blk-mq.h index c0fa34378eb2..10bfdfb494fa 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -200,7 +200,7 @@ static inline bool blk_mq_get_dispatch_budget(struct blk_mq_hw_ctx *hctx) static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, struct request *rq) { - blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag); + blk_mq_put_tag(hctx->tags, rq->mq_ctx, rq->tag); rq->tag = -1; if (rq->rq_flags & RQF_MQ_INFLIGHT) { -- cgit v1.2.3-59-g8ed1b From dd3db2a34cff14e152da7c8e320297719a35abf9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 26 Feb 2020 10:23:43 -0700 Subject: io_uring: drop file set ref put/get on switch Dan reports that he triggered a warning on ring exit doing some testing: percpu ref (io_file_data_ref_zero) <= 0 (0) after switching to atomic WARNING: CPU: 3 PID: 0 at lib/percpu-refcount.c:160 percpu_ref_switch_to_atomic_rcu+0xe8/0xf0 Modules linked in: CPU: 3 PID: 0 Comm: swapper/3 Not tainted 5.6.0-rc3+ #5648 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 RIP: 0010:percpu_ref_switch_to_atomic_rcu+0xe8/0xf0 Code: e7 ff 55 e8 eb d2 80 3d bd 02 d2 00 00 75 8b 48 8b 55 d8 48 c7 c7 e8 70 e6 81 c6 05 a9 02 d2 00 01 48 8b 75 e8 e8 3a d0 c5 ff <0f> 0b e9 69 ff ff ff 90 55 48 89 fd 53 48 89 f3 48 83 ec 28 48 83 RSP: 0018:ffffc90000110ef8 EFLAGS: 00010292 RAX: 0000000000000045 RBX: 7fffffffffffffff RCX: 0000000000000000 RDX: 0000000000000045 RSI: ffffffff825be7a5 RDI: ffffffff825bc32c RBP: ffff8881b75eac38 R08: 000000042364b941 R09: 0000000000000045 R10: ffffffff825beb40 R11: ffffffff825be78a R12: 0000607e46005aa0 R13: ffff888107dcdd00 R14: 0000000000000000 R15: 0000000000000009 FS: 0000000000000000(0000) GS:ffff8881b9d80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f49e6a5ea20 CR3: 00000001b747c004 CR4: 00000000001606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: rcu_core+0x1e4/0x4d0 __do_softirq+0xdb/0x2f1 irq_exit+0xa0/0xb0 smp_apic_timer_interrupt+0x60/0x140 apic_timer_interrupt+0xf/0x20 RIP: 0010:default_idle+0x23/0x170 Code: ff eb ab cc cc cc cc 0f 1f 44 00 00 41 54 55 53 65 8b 2d 10 96 92 7e 0f 1f 44 00 00 e9 07 00 00 00 0f 00 2d 21 d0 51 00 fb f4 <65> 8b 2d f6 95 92 7e 0f 1f 44 00 00 5b 5d 41 5c c3 65 8b 05 e5 95 Turns out that this is due to percpu_ref_switch_to_atomic() only grabbing a reference to the percpu refcount if it's not already in atomic mode. io_uring drops a ref and re-gets it when switching back to percpu mode. We attempt to protect against this with the FFD_F_ATOMIC bit, but that isn't reliable. We don't actually need to juggle these refcounts between atomic and percpu switch, we can just do them when we've switched to atomic mode. This removes the need for FFD_F_ATOMIC, which wasn't reliable. Fixes: 05f3fb3c5397 ("io_uring: avoid ring quiesce for fixed file set unregister and update") Reported-by: Dan Melnic Signed-off-by: Jens Axboe --- fs/io_uring.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 36917c0101fd..e412a1761d93 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -183,17 +183,12 @@ struct fixed_file_table { struct file **files; }; -enum { - FFD_F_ATOMIC, -}; - struct fixed_file_data { struct fixed_file_table *table; struct io_ring_ctx *ctx; struct percpu_ref refs; struct llist_head put_llist; - unsigned long state; struct work_struct ref_work; struct completion done; }; @@ -5595,7 +5590,6 @@ static void io_ring_file_ref_switch(struct work_struct *work) data = container_of(work, struct fixed_file_data, ref_work); io_ring_file_ref_flush(data); - percpu_ref_get(&data->refs); percpu_ref_switch_to_percpu(&data->refs); } @@ -5771,8 +5765,13 @@ static void io_atomic_switch(struct percpu_ref *ref) { struct fixed_file_data *data; + /* + * Juggle reference to ensure we hit zero, if needed, so we can + * switch back to percpu mode + */ data = container_of(ref, struct fixed_file_data, refs); - clear_bit(FFD_F_ATOMIC, &data->state); + percpu_ref_put(&data->refs); + percpu_ref_get(&data->refs); } static bool io_queue_file_removal(struct fixed_file_data *data, @@ -5795,11 +5794,7 @@ static bool io_queue_file_removal(struct fixed_file_data *data, llist_add(&pfile->llist, &data->put_llist); if (pfile == &pfile_stack) { - if (!test_and_set_bit(FFD_F_ATOMIC, &data->state)) { - percpu_ref_put(&data->refs); - percpu_ref_switch_to_atomic(&data->refs, - io_atomic_switch); - } + percpu_ref_switch_to_atomic(&data->refs, io_atomic_switch); wait_for_completion(&done); flush_work(&data->ref_work); return false; @@ -5873,10 +5868,8 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, up->offset++; } - if (ref_switch && !test_and_set_bit(FFD_F_ATOMIC, &data->state)) { - percpu_ref_put(&data->refs); + if (ref_switch) percpu_ref_switch_to_atomic(&data->refs, io_atomic_switch); - } return done ? done : err; } -- cgit v1.2.3-59-g8ed1b From fda31c50292a5062332fa0343c084bd9f46604d9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 24 Feb 2020 12:47:14 -0800 Subject: signal: avoid double atomic counter increments for user accounting When queueing a signal, we increment both the users count of pending signals (for RLIMIT_SIGPENDING tracking) and we increment the refcount of the user struct itself (because we keep a reference to the user in the signal structure in order to correctly account for it when freeing). That turns out to be fairly expensive, because both of them are atomic updates, and particularly under extreme signal handling pressure on big machines, you can get a lot of cache contention on the user struct. That can then cause horrid cacheline ping-pong when you do these multiple accesses. So change the reference counting to only pin the user for the _first_ pending signal, and to unpin it when the last pending signal is dequeued. That means that when a user sees a lot of concurrent signal queuing - which is the only situation when this matters - the only atomic access needed is generally the 'sigpending' count update. This was noticed because of a particularly odd timing artifact on a dual-socket 96C/192T Cascade Lake platform: when you get into bad contention, on that machine for some reason seems to be much worse when the contention happens in the upper 32-byte half of the cacheline. As a result, the kernel test robot will-it-scale 'signal1' benchmark had an odd performance regression simply due to random alignment of the 'struct user_struct' (and pointed to a completely unrelated and apparently nonsensical commit for the regression). Avoiding the double increments (and decrements on the dequeueing side, of course) makes for much less contention and hugely improved performance on that will-it-scale microbenchmark. Quoting Feng Tang: "It makes a big difference, that the performance score is tripled! bump from original 17000 to 54000. Also the gap between 5.0-rc6 and 5.0-rc6+Jiri's patch is reduced to around 2%" [ The "2% gap" is the odd cacheline placement difference on that platform: under the extreme contention case, the effect of which half of the cacheline was hot was 5%, so with the reduced contention the odd timing artifact is reduced too ] It does help in the non-contended case too, but is not nearly as noticeable. Reported-and-tested-by: Feng Tang Cc: Eric W. Biederman Cc: Huang, Ying Cc: Philip Li Cc: Andi Kleen Cc: Jiri Olsa Cc: Peter Zijlstra Signed-off-by: Linus Torvalds --- kernel/signal.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index 9ad8dea93dbb..5b2396350dd1 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -413,27 +413,32 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi { struct sigqueue *q = NULL; struct user_struct *user; + int sigpending; /* * Protect access to @t credentials. This can go away when all * callers hold rcu read lock. + * + * NOTE! A pending signal will hold on to the user refcount, + * and we get/put the refcount only when the sigpending count + * changes from/to zero. */ rcu_read_lock(); - user = get_uid(__task_cred(t)->user); - atomic_inc(&user->sigpending); + user = __task_cred(t)->user; + sigpending = atomic_inc_return(&user->sigpending); + if (sigpending == 1) + get_uid(user); rcu_read_unlock(); - if (override_rlimit || - atomic_read(&user->sigpending) <= - task_rlimit(t, RLIMIT_SIGPENDING)) { + if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) { q = kmem_cache_alloc(sigqueue_cachep, flags); } else { print_dropped_signal(sig); } if (unlikely(q == NULL)) { - atomic_dec(&user->sigpending); - free_uid(user); + if (atomic_dec_and_test(&user->sigpending)) + free_uid(user); } else { INIT_LIST_HEAD(&q->list); q->flags = 0; @@ -447,8 +452,8 @@ static void __sigqueue_free(struct sigqueue *q) { if (q->flags & SIGQUEUE_PREALLOC) return; - atomic_dec(&q->user->sigpending); - free_uid(q->user); + if (atomic_dec_and_test(&q->user->sigpending)) + free_uid(q->user); kmem_cache_free(sigqueue_cachep, q); } -- cgit v1.2.3-59-g8ed1b From cfe2ce49b9da3959015e94b08f7494ade3ee0c49 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Feb 2020 07:39:29 -0800 Subject: Revert "KVM: x86: enable -Werror" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit ead68df94d248c80fdbae220ae5425eb5af2e753. Using the -Werror flag breaks the build for me due to mostly harmless KASAN or similar warnings: arch/x86/kvm/x86.c: In function ‘kvm_timer_init’: arch/x86/kvm/x86.c:7209:1: error: the frame size of 1112 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] Feel free to add a CONFIG_WERROR if you care strong enough, but don't break peoples builds for absolutely no good reason. Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- arch/x86/kvm/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 4654e97a05cc..b19ef421084d 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 ccflags-y += -Iarch/x86/kvm -ccflags-y += -Werror KVM := ../../../virt/kvm -- cgit v1.2.3-59-g8ed1b From 8a3bddf67ce88b96531fb22c5a75d7f4dc41d155 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sat, 22 Feb 2020 18:54:31 +0100 Subject: drm/amdgpu: Drop DRIVER_USE_AGP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This doesn't do anything except auto-init drm_agp support when you call drm_get_pci_dev(). Which amdgpu stopped doing with commit b58c11314a1706bf094c489ef5cb28f76478c704 Author: Alex Deucher Date: Fri Jun 2 17:16:31 2017 -0400 drm/amdgpu: drop deprecated drm_get_pci_dev and drm_put_dev No idea whether this was intentional or accidental breakage, but I guess anyone who manages to boot a this modern gpu behind an agp bridge deserves a price. A price I never expect anyone to ever collect :-) Cc: Alex Deucher Cc: "Christian König" Cc: Hawking Zhang Cc: Xiaojie Yuan Cc: Evan Quan Cc: "Tianci.Yin" Cc: "Marek Olšák" Cc: Hans de Goede Reviewed-by: Emil Velikov Reviewed-by: Alex Deucher Signed-off-by: Daniel Vetter Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 94e2fd758e01..42f4febe24c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1389,7 +1389,7 @@ amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe, static struct drm_driver kms_driver = { .driver_features = - DRIVER_USE_AGP | DRIVER_ATOMIC | + DRIVER_ATOMIC | DRIVER_GEM | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE, -- cgit v1.2.3-59-g8ed1b From eb12c957735b582607e5842a06d1f4c62e185c1d Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sat, 22 Feb 2020 18:54:32 +0100 Subject: drm/radeon: Inline drm_get_pci_dev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's the last user, and more importantly, it's the last non-legacy user of anything in drm_pci.c. The only tricky bit is the agp initialization. But a close look shows that radeon does not use the drm_agp midlayer (the main use of that is drm_bufs for legacy drivers), and instead could use the agp subsystem directly (like nouveau does already). Hence we can just pull this in too. A further step would be to entirely drop the use of drm_device->agp, but feels like too much churn just for this patch. Signed-off-by: Daniel Vetter Cc: Alex Deucher Cc: "Christian König" Cc: "David (ChunMing) Zhou" Cc: amd-gfx@lists.freedesktop.org Reviewed-by: Alex Deucher Reviewed-by: Emil Velikov Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_drv.c | 43 +++++++++++++++++++++++++++++++++++-- drivers/gpu/drm/radeon/radeon_kms.c | 6 ++++++ 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index fd74e2611185..8696af1ee14d 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -37,6 +37,7 @@ #include #include +#include #include #include #include @@ -325,6 +326,7 @@ static int radeon_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { unsigned long flags = 0; + struct drm_device *dev; int ret; if (!ent) @@ -365,7 +367,44 @@ static int radeon_pci_probe(struct pci_dev *pdev, if (ret) return ret; - return drm_get_pci_dev(pdev, ent, &kms_driver); + dev = drm_dev_alloc(&kms_driver, &pdev->dev); + if (IS_ERR(dev)) + return PTR_ERR(dev); + + ret = pci_enable_device(pdev); + if (ret) + goto err_free; + + dev->pdev = pdev; +#ifdef __alpha__ + dev->hose = pdev->sysdata; +#endif + + pci_set_drvdata(pdev, dev); + + if (pci_find_capability(dev->pdev, PCI_CAP_ID_AGP)) + dev->agp = drm_agp_init(dev); + if (dev->agp) { + dev->agp->agp_mtrr = arch_phys_wc_add( + dev->agp->agp_info.aper_base, + dev->agp->agp_info.aper_size * + 1024 * 1024); + } + + ret = drm_dev_register(dev, ent->driver_data); + if (ret) + goto err_agp; + + return 0; + +err_agp: + if (dev->agp) + arch_phys_wc_del(dev->agp->agp_mtrr); + kfree(dev->agp); + pci_disable_device(pdev); +err_free: + drm_dev_put(dev); + return ret; } static void @@ -575,7 +614,7 @@ radeon_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe, static struct drm_driver kms_driver = { .driver_features = - DRIVER_USE_AGP | DRIVER_GEM | DRIVER_RENDER, + DRIVER_GEM | DRIVER_RENDER, .load = radeon_driver_load_kms, .open = radeon_driver_open_kms, .postclose = radeon_driver_postclose_kms, diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index d24f23a81656..dd2f19b8022b 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -32,6 +32,7 @@ #include #include +#include #include #include #include @@ -77,6 +78,11 @@ void radeon_driver_unload_kms(struct drm_device *dev) radeon_modeset_fini(rdev); radeon_device_fini(rdev); + if (dev->agp) + arch_phys_wc_del(dev->agp->agp_mtrr); + kfree(dev->agp); + dev->agp = NULL; + done_free: kfree(rdev); dev->dev_private = NULL; -- cgit v1.2.3-59-g8ed1b From 6e11d1578fba8d09d03a286740ffcf336d53928c Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Mon, 24 Feb 2020 10:56:00 -0800 Subject: net: Fix Tx hash bound checking Fixes the lower and upper bounds when there are multiple TCs and traffic is on the the same TC on the same device. The lower bound is represented by 'qoffset' and the upper limit for hash value is 'qcount + qoffset'. This gives a clean Rx to Tx queue mapping when there are multiple TCs, as the queue indices for upper TCs will be offset by 'qoffset'. v2: Fixed commit description based on comments. Fixes: 1b837d489e06 ("net: Revoke export for __skb_tx_hash, update it to just be static skb_tx_hash") Fixes: eadec877ce9c ("net: Add support for subordinate traffic classes to netdev_pick_tx") Signed-off-by: Amritha Nambiar Reviewed-by: Alexander Duyck Reviewed-by: Sridhar Samudrala Signed-off-by: David S. Miller --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index e10bd680dc03..c6c985fe7b1b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3076,6 +3076,8 @@ static u16 skb_tx_hash(const struct net_device *dev, if (skb_rx_queue_recorded(skb)) { hash = skb_get_rx_queue(skb); + if (hash >= qoffset) + hash -= qoffset; while (unlikely(hash >= qcount)) hash -= qcount; return hash + qoffset; -- cgit v1.2.3-59-g8ed1b From e34f1753eebc428c312527662eb1b529cf260240 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Mon, 24 Feb 2020 20:42:12 +0100 Subject: ethtool: limit bitset size Syzbot reported that ethnl_compact_sanity_checks() can be tricked into reading past the end of ETHTOOL_A_BITSET_VALUE and ETHTOOL_A_BITSET_MASK attributes and even the message by passing a value between (u32)(-31) and (u32)(-1) as ETHTOOL_A_BITSET_SIZE. The problem is that DIV_ROUND_UP(attr_nbits, 32) is 0 for such values so that zero length ETHTOOL_A_BITSET_VALUE will pass the length check but ethnl_bitmap32_not_zero() check would try to access up to 512 MB of attribute "payload". Prevent this overflow byt limiting the bitset size. Technically, compact bitset format would allow bitset sizes up to almost 2^18 (so that the nest size does not exceed U16_MAX) but bitsets used by ethtool are much shorter. S16_MAX, the largest value which can be directly used as an upper limit in policy, should be a reasonable compromise. Fixes: 10b518d4e6dd ("ethtool: netlink bitset handling") Reported-by: syzbot+7fd4ed5b4234ab1fdccd@syzkaller.appspotmail.com Reported-by: syzbot+709b7a64d57978247e44@syzkaller.appspotmail.com Reported-by: syzbot+983cb8fb2d17a7af549d@syzkaller.appspotmail.com Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ethtool/bitset.c | 3 ++- net/ethtool/bitset.h | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c index 8977fe1f3946..ef9197541cb3 100644 --- a/net/ethtool/bitset.c +++ b/net/ethtool/bitset.c @@ -305,7 +305,8 @@ nla_put_failure: static const struct nla_policy bitset_policy[ETHTOOL_A_BITSET_MAX + 1] = { [ETHTOOL_A_BITSET_UNSPEC] = { .type = NLA_REJECT }, [ETHTOOL_A_BITSET_NOMASK] = { .type = NLA_FLAG }, - [ETHTOOL_A_BITSET_SIZE] = { .type = NLA_U32 }, + [ETHTOOL_A_BITSET_SIZE] = NLA_POLICY_MAX(NLA_U32, + ETHNL_MAX_BITSET_SIZE), [ETHTOOL_A_BITSET_BITS] = { .type = NLA_NESTED }, [ETHTOOL_A_BITSET_VALUE] = { .type = NLA_BINARY }, [ETHTOOL_A_BITSET_MASK] = { .type = NLA_BINARY }, diff --git a/net/ethtool/bitset.h b/net/ethtool/bitset.h index b8247e34109d..b849f9d19676 100644 --- a/net/ethtool/bitset.h +++ b/net/ethtool/bitset.h @@ -3,6 +3,8 @@ #ifndef _NET_ETHTOOL_BITSET_H #define _NET_ETHTOOL_BITSET_H +#define ETHNL_MAX_BITSET_SIZE S16_MAX + typedef const char (*const ethnl_string_array_t)[ETH_GSTRING_LEN]; int ethnl_bitset_is_compact(const struct nlattr *bitset, bool *compact); -- cgit v1.2.3-59-g8ed1b From 99b79c3900d4627672c85d9f344b5b0f06bc2a4d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 12 Feb 2020 22:53:52 -0800 Subject: netfilter: xt_hashlimit: unregister proc file before releasing mutex Before releasing the global mutex, we only unlink the hashtable from the hash list, its proc file is still not unregistered at this point. So syzbot could trigger a race condition where a parallel htable_create() could register the same file immediately after the mutex is released. Move htable_remove_proc_entry() back to mutex protection to fix this. And, fold htable_destroy() into htable_put() to make the code slightly easier to understand. Reported-and-tested-by: syzbot+d195fd3b9a364ddd6731@syzkaller.appspotmail.com Fixes: c4a3922d2d20 ("netfilter: xt_hashlimit: reduce hashlimit_mutex scope for htable_put()") Signed-off-by: Cong Wang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_hashlimit.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 7a2c4b8408c4..8c835ad63729 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -402,15 +402,6 @@ static void htable_remove_proc_entry(struct xt_hashlimit_htable *hinfo) remove_proc_entry(hinfo->name, parent); } -static void htable_destroy(struct xt_hashlimit_htable *hinfo) -{ - cancel_delayed_work_sync(&hinfo->gc_work); - htable_remove_proc_entry(hinfo); - htable_selective_cleanup(hinfo, true); - kfree(hinfo->name); - vfree(hinfo); -} - static struct xt_hashlimit_htable *htable_find_get(struct net *net, const char *name, u_int8_t family) @@ -432,8 +423,13 @@ static void htable_put(struct xt_hashlimit_htable *hinfo) { if (refcount_dec_and_mutex_lock(&hinfo->use, &hashlimit_mutex)) { hlist_del(&hinfo->node); + htable_remove_proc_entry(hinfo); mutex_unlock(&hashlimit_mutex); - htable_destroy(hinfo); + + cancel_delayed_work_sync(&hinfo->gc_work); + htable_selective_cleanup(hinfo, true); + kfree(hinfo->name); + vfree(hinfo); } } -- cgit v1.2.3-59-g8ed1b From 9a005c3898aa07cd5cdca77b7096814e6c478c92 Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Mon, 24 Feb 2020 15:29:09 -0800 Subject: bnxt_en: add newline to netdev_*() format strings Add missing newlines to netdev_* format strings so the lines aren't buffered by the printk subsystem. Nitpicked-by: Jakub Kicinski Signed-off-by: Jonathan Lemon Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 +- drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 10 ++--- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 4 +- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 48 +++++++++++------------ drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c | 10 ++--- 5 files changed, 38 insertions(+), 38 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index fd6e0e48cd51..f9a8151f092c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11252,7 +11252,7 @@ static void bnxt_cfg_ntp_filters(struct bnxt *bp) } } if (test_and_clear_bit(BNXT_HWRM_PF_UNLOAD_SP_EVENT, &bp->sp_event)) - netdev_info(bp->dev, "Receive PF driver unload event!"); + netdev_info(bp->dev, "Receive PF driver unload event!\n"); } #else @@ -11759,7 +11759,7 @@ static int bnxt_pcie_dsn_get(struct bnxt *bp, u8 dsn[]) u32 dw; if (!pos) { - netdev_info(bp->dev, "Unable do read adapter's DSN"); + netdev_info(bp->dev, "Unable do read adapter's DSN\n"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index eec0168330b7..d3c93ccee86a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -641,14 +641,14 @@ static int bnxt_dl_params_register(struct bnxt *bp) rc = devlink_params_register(bp->dl, bnxt_dl_params, ARRAY_SIZE(bnxt_dl_params)); if (rc) { - netdev_warn(bp->dev, "devlink_params_register failed. rc=%d", + netdev_warn(bp->dev, "devlink_params_register failed. rc=%d\n", rc); return rc; } rc = devlink_port_params_register(&bp->dl_port, bnxt_dl_port_params, ARRAY_SIZE(bnxt_dl_port_params)); if (rc) { - netdev_err(bp->dev, "devlink_port_params_register failed"); + netdev_err(bp->dev, "devlink_port_params_register failed\n"); devlink_params_unregister(bp->dl, bnxt_dl_params, ARRAY_SIZE(bnxt_dl_params)); return rc; @@ -679,7 +679,7 @@ int bnxt_dl_register(struct bnxt *bp) else dl = devlink_alloc(&bnxt_vf_dl_ops, sizeof(struct bnxt_dl)); if (!dl) { - netdev_warn(bp->dev, "devlink_alloc failed"); + netdev_warn(bp->dev, "devlink_alloc failed\n"); return -ENOMEM; } @@ -692,7 +692,7 @@ int bnxt_dl_register(struct bnxt *bp) rc = devlink_register(dl, &bp->pdev->dev); if (rc) { - netdev_warn(bp->dev, "devlink_register failed. rc=%d", rc); + netdev_warn(bp->dev, "devlink_register failed. rc=%d\n", rc); goto err_dl_free; } @@ -704,7 +704,7 @@ int bnxt_dl_register(struct bnxt *bp) sizeof(bp->dsn)); rc = devlink_port_register(dl, &bp->dl_port, bp->pf.port_id); if (rc) { - netdev_err(bp->dev, "devlink_port_register failed"); + netdev_err(bp->dev, "devlink_port_register failed\n"); goto err_dl_unreg; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 6171fa8b3677..e8fc1671c581 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2028,7 +2028,7 @@ int bnxt_flash_package_from_file(struct net_device *dev, const char *filename, } if (fw->size > item_len) { - netdev_err(dev, "PKG insufficient update area in nvram: %lu", + netdev_err(dev, "PKG insufficient update area in nvram: %lu\n", (unsigned long)fw->size); rc = -EFBIG; } else { @@ -3338,7 +3338,7 @@ err: kfree(coredump.data); *dump_len += sizeof(struct bnxt_coredump_record); if (rc == -ENOBUFS) - netdev_err(bp->dev, "Firmware returned large coredump buffer"); + netdev_err(bp->dev, "Firmware returned large coredump buffer\n"); return rc; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 0cc6ec51f45f..9bec256b0934 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -50,7 +50,7 @@ static u16 bnxt_flow_get_dst_fid(struct bnxt *pf_bp, struct net_device *dev) /* check if dev belongs to the same switch */ if (!netdev_port_same_parent_id(pf_bp->dev, dev)) { - netdev_info(pf_bp->dev, "dev(ifindex=%d) not on same switch", + netdev_info(pf_bp->dev, "dev(ifindex=%d) not on same switch\n", dev->ifindex); return BNXT_FID_INVALID; } @@ -70,7 +70,7 @@ static int bnxt_tc_parse_redir(struct bnxt *bp, struct net_device *dev = act->dev; if (!dev) { - netdev_info(bp->dev, "no dev in mirred action"); + netdev_info(bp->dev, "no dev in mirred action\n"); return -EINVAL; } @@ -106,7 +106,7 @@ static int bnxt_tc_parse_tunnel_set(struct bnxt *bp, const struct ip_tunnel_key *tun_key = &tun_info->key; if (ip_tunnel_info_af(tun_info) != AF_INET) { - netdev_info(bp->dev, "only IPv4 tunnel-encap is supported"); + netdev_info(bp->dev, "only IPv4 tunnel-encap is supported\n"); return -EOPNOTSUPP; } @@ -295,7 +295,7 @@ static int bnxt_tc_parse_actions(struct bnxt *bp, int i, rc; if (!flow_action_has_entries(flow_action)) { - netdev_info(bp->dev, "no actions"); + netdev_info(bp->dev, "no actions\n"); return -EINVAL; } @@ -370,7 +370,7 @@ static int bnxt_tc_parse_flow(struct bnxt *bp, /* KEY_CONTROL and KEY_BASIC are needed for forming a meaningful key */ if ((dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL)) == 0 || (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_BASIC)) == 0) { - netdev_info(bp->dev, "cannot form TC key: used_keys = 0x%x", + netdev_info(bp->dev, "cannot form TC key: used_keys = 0x%x\n", dissector->used_keys); return -EOPNOTSUPP; } @@ -508,7 +508,7 @@ static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp, rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (rc) - netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); + netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc); return rc; } @@ -841,7 +841,7 @@ static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp, resp = bnxt_get_hwrm_resp_addr(bp, &req); *decap_filter_handle = resp->decap_filter_id; } else { - netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); + netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc); } mutex_unlock(&bp->hwrm_cmd_lock); @@ -859,7 +859,7 @@ static int hwrm_cfa_decap_filter_free(struct bnxt *bp, rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (rc) - netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); + netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc); return rc; } @@ -906,7 +906,7 @@ static int hwrm_cfa_encap_record_alloc(struct bnxt *bp, resp = bnxt_get_hwrm_resp_addr(bp, &req); *encap_record_handle = resp->encap_record_id; } else { - netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); + netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc); } mutex_unlock(&bp->hwrm_cmd_lock); @@ -924,7 +924,7 @@ static int hwrm_cfa_encap_record_free(struct bnxt *bp, rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (rc) - netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); + netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc); return rc; } @@ -943,7 +943,7 @@ static int bnxt_tc_put_l2_node(struct bnxt *bp, tc_info->l2_ht_params); if (rc) netdev_err(bp->dev, - "Error: %s: rhashtable_remove_fast: %d", + "Error: %s: rhashtable_remove_fast: %d\n", __func__, rc); kfree_rcu(l2_node, rcu); } @@ -972,7 +972,7 @@ bnxt_tc_get_l2_node(struct bnxt *bp, struct rhashtable *l2_table, if (rc) { kfree_rcu(l2_node, rcu); netdev_err(bp->dev, - "Error: %s: rhashtable_insert_fast: %d", + "Error: %s: rhashtable_insert_fast: %d\n", __func__, rc); return NULL; } @@ -1031,7 +1031,7 @@ static bool bnxt_tc_can_offload(struct bnxt *bp, struct bnxt_tc_flow *flow) if ((flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) && (flow->l4_key.ip_proto != IPPROTO_TCP && flow->l4_key.ip_proto != IPPROTO_UDP)) { - netdev_info(bp->dev, "Cannot offload non-TCP/UDP (%d) ports", + netdev_info(bp->dev, "Cannot offload non-TCP/UDP (%d) ports\n", flow->l4_key.ip_proto); return false; } @@ -1088,7 +1088,7 @@ static int bnxt_tc_put_tunnel_node(struct bnxt *bp, rc = rhashtable_remove_fast(tunnel_table, &tunnel_node->node, *ht_params); if (rc) { - netdev_err(bp->dev, "rhashtable_remove_fast rc=%d", rc); + netdev_err(bp->dev, "rhashtable_remove_fast rc=%d\n", rc); rc = -1; } kfree_rcu(tunnel_node, rcu); @@ -1129,7 +1129,7 @@ bnxt_tc_get_tunnel_node(struct bnxt *bp, struct rhashtable *tunnel_table, tunnel_node->refcount++; return tunnel_node; err: - netdev_info(bp->dev, "error rc=%d", rc); + netdev_info(bp->dev, "error rc=%d\n", rc); return NULL; } @@ -1187,7 +1187,7 @@ static void bnxt_tc_put_decap_l2_node(struct bnxt *bp, &decap_l2_node->node, tc_info->decap_l2_ht_params); if (rc) - netdev_err(bp->dev, "rhashtable_remove_fast rc=%d", rc); + netdev_err(bp->dev, "rhashtable_remove_fast rc=%d\n", rc); kfree_rcu(decap_l2_node, rcu); } } @@ -1227,7 +1227,7 @@ static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp, rt = ip_route_output_key(dev_net(real_dst_dev), &flow); if (IS_ERR(rt)) { - netdev_info(bp->dev, "no route to %pI4b", &flow.daddr); + netdev_info(bp->dev, "no route to %pI4b\n", &flow.daddr); return -EOPNOTSUPP; } @@ -1241,7 +1241,7 @@ static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp, if (vlan->real_dev != real_dst_dev) { netdev_info(bp->dev, - "dst_dev(%s) doesn't use PF-if(%s)", + "dst_dev(%s) doesn't use PF-if(%s)\n", netdev_name(dst_dev), netdev_name(real_dst_dev)); rc = -EOPNOTSUPP; @@ -1253,7 +1253,7 @@ static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp, #endif } else if (dst_dev != real_dst_dev) { netdev_info(bp->dev, - "dst_dev(%s) for %pI4b is not PF-if(%s)", + "dst_dev(%s) for %pI4b is not PF-if(%s)\n", netdev_name(dst_dev), &flow.daddr, netdev_name(real_dst_dev)); rc = -EOPNOTSUPP; @@ -1262,7 +1262,7 @@ static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp, nbr = dst_neigh_lookup(&rt->dst, &flow.daddr); if (!nbr) { - netdev_info(bp->dev, "can't lookup neighbor for %pI4b", + netdev_info(bp->dev, "can't lookup neighbor for %pI4b\n", &flow.daddr); rc = -EOPNOTSUPP; goto put_rt; @@ -1472,7 +1472,7 @@ static int __bnxt_tc_del_flow(struct bnxt *bp, rc = rhashtable_remove_fast(&tc_info->flow_table, &flow_node->node, tc_info->flow_ht_params); if (rc) - netdev_err(bp->dev, "Error: %s: rhashtable_remove_fast rc=%d", + netdev_err(bp->dev, "Error: %s: rhashtable_remove_fast rc=%d\n", __func__, rc); kfree_rcu(flow_node, rcu); @@ -1587,7 +1587,7 @@ unlock: free_node: kfree_rcu(new_node, rcu); done: - netdev_err(bp->dev, "Error: %s: cookie=0x%lx error=%d", + netdev_err(bp->dev, "Error: %s: cookie=0x%lx error=%d\n", __func__, tc_flow_cmd->cookie, rc); return rc; } @@ -1700,7 +1700,7 @@ bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows, le64_to_cpu(resp_bytes[i]); } } else { - netdev_info(bp->dev, "error rc=%d", rc); + netdev_info(bp->dev, "error rc=%d\n", rc); } mutex_unlock(&bp->hwrm_cmd_lock); @@ -1970,7 +1970,7 @@ static int bnxt_tc_indr_block_event(struct notifier_block *nb, bp); if (rc) netdev_info(bp->dev, - "Failed to register indirect blk: dev: %s", + "Failed to register indirect blk: dev: %s\n", netdev->name); break; case NETDEV_UNREGISTER: diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c index b010b34cdaf8..6f2faf81c1ae 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c @@ -43,7 +43,7 @@ static int hwrm_cfa_vfr_alloc(struct bnxt *bp, u16 vf_idx, netdev_dbg(bp->dev, "tx_cfa_action=0x%x, rx_cfa_code=0x%x", *tx_cfa_action, *rx_cfa_code); } else { - netdev_info(bp->dev, "%s error rc=%d", __func__, rc); + netdev_info(bp->dev, "%s error rc=%d\n", __func__, rc); } mutex_unlock(&bp->hwrm_cmd_lock); @@ -60,7 +60,7 @@ static int hwrm_cfa_vfr_free(struct bnxt *bp, u16 vf_idx) rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (rc) - netdev_info(bp->dev, "%s error rc=%d", __func__, rc); + netdev_info(bp->dev, "%s error rc=%d\n", __func__, rc); return rc; } @@ -465,7 +465,7 @@ static int bnxt_vf_reps_create(struct bnxt *bp) return 0; err: - netdev_info(bp->dev, "%s error=%d", __func__, rc); + netdev_info(bp->dev, "%s error=%d\n", __func__, rc); kfree(cfa_code_map); __bnxt_vf_reps_destroy(bp); return rc; @@ -488,7 +488,7 @@ int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode, mutex_lock(&bp->sriov_lock); if (bp->eswitch_mode == mode) { - netdev_info(bp->dev, "already in %s eswitch mode", + netdev_info(bp->dev, "already in %s eswitch mode\n", mode == DEVLINK_ESWITCH_MODE_LEGACY ? "legacy" : "switchdev"); rc = -EINVAL; @@ -508,7 +508,7 @@ int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode, } if (pci_num_vf(bp->pdev) == 0) { - netdev_info(bp->dev, "Enable VFs before setting switchdev mode"); + netdev_info(bp->dev, "Enable VFs before setting switchdev mode\n"); rc = -EPERM; goto done; } -- cgit v1.2.3-59-g8ed1b From 98c5f7d44fef309e692c24c6d71131ee0f0871fb Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 24 Feb 2020 15:56:32 -0800 Subject: net: dsa: bcm_sf2: Forcibly configure IMP port for 1Gb/sec We are still experiencing some packet loss with the existing advanced congestion buffering (ACB) settings with the IMP port configured for 2Gb/sec, so revert to conservative link speeds that do not produce packet loss until this is resolved. Fixes: 8f1880cbe8d0 ("net: dsa: bcm_sf2: Configure IMP port for 2Gb/sec") Fixes: de34d7084edd ("net: dsa: bcm_sf2: Only 7278 supports 2Gb/sec IMP port") Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index d1955543acd1..b0f5280a83cb 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -69,8 +69,7 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) /* Force link status for IMP port */ reg = core_readl(priv, offset); reg |= (MII_SW_OR | LINK_STS); - if (priv->type == BCM7278_DEVICE_ID) - reg |= GMII_SPEED_UP_2G; + reg &= ~GMII_SPEED_UP_2G; core_writel(priv, reg, offset); /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */ -- cgit v1.2.3-59-g8ed1b From 2eb51c75dcb354f8aef03d7648318b24630632e1 Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Tue, 25 Feb 2020 17:57:45 +0530 Subject: net: core: devlink.c: Use built-in RCU list checking list_for_each_entry_rcu() has built-in RCU and lock checking. Pass cond argument to list_for_each_entry_rcu() to silence false lockdep warning when CONFIG_PROVE_RCU_LIST is enabled. The devlink->lock is held when devlink_dpipe_table_find() is called in non RCU read side section. Therefore, pass struct devlink to devlink_dpipe_table_find() for lockdep checking. Signed-off-by: Madhuparna Bhowmik Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/devlink.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index 8d0b558be942..5e220809844c 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2103,11 +2103,11 @@ err_action_values_put: static struct devlink_dpipe_table * devlink_dpipe_table_find(struct list_head *dpipe_tables, - const char *table_name) + const char *table_name, struct devlink *devlink) { struct devlink_dpipe_table *table; - - list_for_each_entry_rcu(table, dpipe_tables, list) { + list_for_each_entry_rcu(table, dpipe_tables, list, + lockdep_is_held(&devlink->lock)) { if (!strcmp(table->name, table_name)) return table; } @@ -2226,7 +2226,7 @@ static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb, table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]); table = devlink_dpipe_table_find(&devlink->dpipe_table_list, - table_name); + table_name, devlink); if (!table) return -EINVAL; @@ -2382,7 +2382,7 @@ static int devlink_dpipe_table_counters_set(struct devlink *devlink, struct devlink_dpipe_table *table; table = devlink_dpipe_table_find(&devlink->dpipe_table_list, - table_name); + table_name, devlink); if (!table) return -EINVAL; @@ -6854,7 +6854,7 @@ bool devlink_dpipe_table_counter_enabled(struct devlink *devlink, rcu_read_lock(); table = devlink_dpipe_table_find(&devlink->dpipe_table_list, - table_name); + table_name, devlink); enabled = false; if (table) enabled = table->counters_enabled; @@ -6885,7 +6885,8 @@ int devlink_dpipe_table_register(struct devlink *devlink, mutex_lock(&devlink->lock); - if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name)) { + if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name, + devlink)) { err = -EEXIST; goto unlock; } @@ -6921,7 +6922,7 @@ void devlink_dpipe_table_unregister(struct devlink *devlink, mutex_lock(&devlink->lock); table = devlink_dpipe_table_find(&devlink->dpipe_table_list, - table_name); + table_name, devlink); if (!table) goto unlock; list_del_rcu(&table->list); @@ -7078,7 +7079,7 @@ int devlink_dpipe_table_resource_set(struct devlink *devlink, mutex_lock(&devlink->lock); table = devlink_dpipe_table_find(&devlink->dpipe_table_list, - table_name); + table_name, devlink); if (!table) { err = -EINVAL; goto out; -- cgit v1.2.3-59-g8ed1b From eabc8bcb292fb9a5757b0c8ab7751f41b0a104f8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 27 Feb 2020 02:44:58 +0900 Subject: kbuild: get rid of trailing slash from subdir- example obj-* needs a trailing slash for a directory, but subdir-* does not. Signed-off-by: Masahiro Yamada --- Documentation/kbuild/makefiles.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index 4018ad7c7a11..6bc126a14b3d 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -765,7 +765,7 @@ is not sufficient this sometimes needs to be explicit. Example:: #arch/x86/boot/Makefile - subdir- := compressed/ + subdir- := compressed The above assignment instructs kbuild to descend down in the directory compressed/ when "make clean" is executed. -- cgit v1.2.3-59-g8ed1b From 1521a67e6016664941f0917d50cb20053a8826a2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 25 Feb 2020 13:54:12 +0100 Subject: sched: act: count in the size of action flags bitfield The put of the flags was added by the commit referenced in fixes tag, however the size of the message was not extended accordingly. Fix this by adding size of the flags bitfield to the message size. Fixes: e38226786022 ("net: sched: update action implementations to support flags") Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/act_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 90a31b15585f..8c466a712cda 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -186,6 +186,7 @@ static size_t tcf_action_shared_attrs_size(const struct tc_action *act) + nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */ + cookie_len /* TCA_ACT_COOKIE */ + nla_total_size(0) /* TCA_ACT_STATS nested */ + + nla_total_size(sizeof(struct nla_bitfield32)) /* TCA_ACT_FLAGS */ /* TCA_STATS_BASIC */ + nla_total_size_64bit(sizeof(struct gnet_stats_basic)) /* TCA_STATS_PKT64 */ -- cgit v1.2.3-59-g8ed1b From 402482a6a78e5c61d8a2ec6311fc5b4aca392cd6 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Tue, 25 Feb 2020 14:11:59 +0100 Subject: net: bcmgenet: Clear ID_MODE_DIS in EXT_RGMII_OOB_CTRL when not needed Outdated Raspberry Pi 4 firmware might configure the external PHY as rgmii although the kernel currently sets it as rgmii-rxid. This makes connections unreliable as ID_MODE_DIS is left enabled. To avoid this, explicitly clear that bit whenever we don't need it. Fixes: da38802211cc ("net: bcmgenet: Add RGMII_RXID support") Signed-off-by: Nicolas Saenz Julienne Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmmii.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 6392a2530183..10244941a7a6 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -294,6 +294,7 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) */ if (priv->ext_phy) { reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL); + reg &= ~ID_MODE_DIS; reg |= id_mode_dis; if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv)) reg |= RGMII_MODE_EN_V123; -- cgit v1.2.3-59-g8ed1b From 51e3dfa8906ace90c809235b3d3afebc166b6433 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Tue, 25 Feb 2020 16:34:36 +0100 Subject: net/smc: fix cleanup for linkgroup setup failures If an SMC connection to a certain peer is setup the first time, a new linkgroup is created. In case of setup failures, such a linkgroup is unusable and should disappear. As a first step the linkgroup is removed from the linkgroup list in smc_lgr_forget(). There are 2 problems: smc_listen_decline() might be called before linkgroup creation resulting in a crash due to calling smc_lgr_forget() with parameter NULL. If a setup failure occurs after linkgroup creation, the connection is never unregistered from the linkgroup, preventing linkgroup freeing. This patch introduces an enhanced smc_lgr_cleanup_early() function which * contains a linkgroup check for early smc_listen_decline() invocations * invokes smc_conn_free() to guarantee unregistering of the connection. * schedules fast linkgroup removal of the unusable linkgroup And the unused function smcd_conn_free() is removed from smc_core.h. Fixes: 3b2dec2603d5b ("net/smc: restructure client and server code in af_smc") Fixes: 2a0674fffb6bc ("net/smc: improve abnormal termination of link groups") Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/af_smc.c | 25 +++++++++++++++---------- net/smc/smc_core.c | 12 ++++++++++++ net/smc/smc_core.h | 2 +- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 90988a511cd5..6fd44bdb0fc3 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -512,15 +512,18 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code) static int smc_connect_abort(struct smc_sock *smc, int reason_code, int local_contact) { + bool is_smcd = smc->conn.lgr->is_smcd; + if (local_contact == SMC_FIRST_CONTACT) - smc_lgr_forget(smc->conn.lgr); - if (smc->conn.lgr->is_smcd) + smc_lgr_cleanup_early(&smc->conn); + else + smc_conn_free(&smc->conn); + if (is_smcd) /* there is only one lgr role for SMC-D; use server lock */ mutex_unlock(&smc_server_lgr_pending); else mutex_unlock(&smc_client_lgr_pending); - smc_conn_free(&smc->conn); smc->connect_nonblock = 0; return reason_code; } @@ -1091,7 +1094,6 @@ static void smc_listen_out_err(struct smc_sock *new_smc) if (newsmcsk->sk_state == SMC_INIT) sock_put(&new_smc->sk); /* passive closing */ newsmcsk->sk_state = SMC_CLOSED; - smc_conn_free(&new_smc->conn); smc_listen_out(new_smc); } @@ -1102,12 +1104,13 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, { /* RDMA setup failed, switch back to TCP */ if (local_contact == SMC_FIRST_CONTACT) - smc_lgr_forget(new_smc->conn.lgr); + smc_lgr_cleanup_early(&new_smc->conn); + else + smc_conn_free(&new_smc->conn); if (reason_code < 0) { /* error, no fallback possible */ smc_listen_out_err(new_smc); return; } - smc_conn_free(&new_smc->conn); smc_switch_to_fallback(new_smc); new_smc->fallback_rsn = reason_code; if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) { @@ -1170,16 +1173,18 @@ static int smc_listen_ism_init(struct smc_sock *new_smc, new_smc->conn.lgr->vlan_id, new_smc->conn.lgr->smcd)) { if (ini->cln_first_contact == SMC_FIRST_CONTACT) - smc_lgr_forget(new_smc->conn.lgr); - smc_conn_free(&new_smc->conn); + smc_lgr_cleanup_early(&new_smc->conn); + else + smc_conn_free(&new_smc->conn); return SMC_CLC_DECL_SMCDNOTALK; } /* Create send and receive buffers */ if (smc_buf_create(new_smc, true)) { if (ini->cln_first_contact == SMC_FIRST_CONTACT) - smc_lgr_forget(new_smc->conn.lgr); - smc_conn_free(&new_smc->conn); + smc_lgr_cleanup_early(&new_smc->conn); + else + smc_conn_free(&new_smc->conn); return SMC_CLC_DECL_MEM; } diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 2249de5379ee..5b085efa3bce 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -162,6 +162,18 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) conn->lgr = NULL; } +void smc_lgr_cleanup_early(struct smc_connection *conn) +{ + struct smc_link_group *lgr = conn->lgr; + + if (!lgr) + return; + + smc_conn_free(conn); + smc_lgr_forget(lgr); + smc_lgr_schedule_free_work_fast(lgr); +} + /* Send delete link, either as client to request the initiation * of the DELETE LINK sequence from server; or as server to * initiate the delete processing. See smc_llc_rx_delete_link(). diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index c472e12951d1..234ae25f0025 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -296,6 +296,7 @@ struct smc_clc_msg_accept_confirm; struct smc_clc_msg_local; void smc_lgr_forget(struct smc_link_group *lgr); +void smc_lgr_cleanup_early(struct smc_connection *conn); void smc_lgr_terminate(struct smc_link_group *lgr, bool soft); void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport); void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, @@ -316,7 +317,6 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini); void smc_conn_free(struct smc_connection *conn); int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini); -void smcd_conn_free(struct smc_connection *conn); void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr); int smc_core_init(void); void smc_core_exit(void); -- cgit v1.2.3-59-g8ed1b From b6f6118901d1e867ac9177bbff3b00b185bd4fdc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Feb 2020 11:52:29 -0800 Subject: ipv6: restrict IPV6_ADDRFORM operation IPV6_ADDRFORM is able to transform IPv6 socket to IPv4 one. While this operation sounds illogical, we have to support it. One of the things it does for TCP socket is to switch sk->sk_prot to tcp_prot. We now have other layers playing with sk->sk_prot, so we should make sure to not interfere with them. This patch makes sure sk_prot is the default pointer for TCP IPv6 socket. syzbot reported : BUG: kernel NULL pointer dereference, address: 0000000000000000 PGD a0113067 P4D a0113067 PUD a8771067 PMD 0 Oops: 0010 [#1] PREEMPT SMP KASAN CPU: 0 PID: 10686 Comm: syz-executor.0 Not tainted 5.6.0-rc2-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:0x0 Code: Bad RIP value. RSP: 0018:ffffc9000281fce0 EFLAGS: 00010246 RAX: 1ffffffff15f48ac RBX: ffffffff8afa4560 RCX: dffffc0000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8880a69a8f40 RBP: ffffc9000281fd10 R08: ffffffff86ed9b0c R09: ffffed1014d351f5 R10: ffffed1014d351f5 R11: 0000000000000000 R12: ffff8880920d3098 R13: 1ffff1101241a613 R14: ffff8880a69a8f40 R15: 0000000000000000 FS: 00007f2ae75db700(0000) GS:ffff8880aea00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 00000000a3b85000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: inet_release+0x165/0x1c0 net/ipv4/af_inet.c:427 __sock_release net/socket.c:605 [inline] sock_close+0xe1/0x260 net/socket.c:1283 __fput+0x2e4/0x740 fs/file_table.c:280 ____fput+0x15/0x20 fs/file_table.c:313 task_work_run+0x176/0x1b0 kernel/task_work.c:113 tracehook_notify_resume include/linux/tracehook.h:188 [inline] exit_to_usermode_loop arch/x86/entry/common.c:164 [inline] prepare_exit_to_usermode+0x480/0x5b0 arch/x86/entry/common.c:195 syscall_return_slowpath+0x113/0x4a0 arch/x86/entry/common.c:278 do_syscall_64+0x11f/0x1c0 arch/x86/entry/common.c:304 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x45c429 Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f2ae75dac78 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 RAX: 0000000000000000 RBX: 00007f2ae75db6d4 RCX: 000000000045c429 RDX: 0000000000000001 RSI: 000000000000011a RDI: 0000000000000004 RBP: 000000000076bf20 R08: 0000000000000038 R09: 0000000000000000 R10: 0000000020000180 R11: 0000000000000246 R12: 00000000ffffffff R13: 0000000000000a9d R14: 00000000004ccfb4 R15: 000000000076bf2c Modules linked in: CR2: 0000000000000000 ---[ end trace 82567b5207e87bae ]--- RIP: 0010:0x0 Code: Bad RIP value. RSP: 0018:ffffc9000281fce0 EFLAGS: 00010246 RAX: 1ffffffff15f48ac RBX: ffffffff8afa4560 RCX: dffffc0000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8880a69a8f40 RBP: ffffc9000281fd10 R08: ffffffff86ed9b0c R09: ffffed1014d351f5 R10: ffffed1014d351f5 R11: 0000000000000000 R12: ffff8880920d3098 R13: 1ffff1101241a613 R14: ffff8880a69a8f40 R15: 0000000000000000 FS: 00007f2ae75db700(0000) GS:ffff8880aea00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 00000000a3b85000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: Eric Dumazet Reported-by: syzbot+1938db17e275e85dc328@syzkaller.appspotmail.com Cc: Daniel Borkmann Signed-off-by: David S. Miller --- net/ipv6/ipv6_sockglue.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 79fc012dd2ca..debdaeba5d8c 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -183,9 +183,15 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, retv = -EBUSY; break; } - } else if (sk->sk_protocol != IPPROTO_TCP) + } else if (sk->sk_protocol == IPPROTO_TCP) { + if (sk->sk_prot != &tcpv6_prot) { + retv = -EBUSY; + break; + } break; - + } else { + break; + } if (sk->sk_state != TCP_ESTABLISHED) { retv = -ENOTCONN; break; -- cgit v1.2.3-59-g8ed1b From f596c87005f7b1baeb7d62d9a9e25d68c3dfae10 Mon Sep 17 00:00:00 2001 From: yangerkun Date: Wed, 26 Feb 2020 11:54:35 +0800 Subject: slip: not call free_netdev before rtnl_unlock in slip_open As the description before netdev_run_todo, we cannot call free_netdev before rtnl_unlock, fix it by reorder the code. Signed-off-by: yangerkun Reviewed-by: Oliver Hartkopp Signed-off-by: David S. Miller --- drivers/net/slip/slip.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 6f4d7ba8b109..babb01888b78 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -863,7 +863,10 @@ err_free_chan: tty->disc_data = NULL; clear_bit(SLF_INUSE, &sl->flags); sl_free_netdev(sl->dev); + /* do not call free_netdev before rtnl_unlock */ + rtnl_unlock(); free_netdev(sl->dev); + return err; err_exit: rtnl_unlock(); -- cgit v1.2.3-59-g8ed1b From 4f31c532ad400a34dbdd836c204ed964d1ec2da5 Mon Sep 17 00:00:00 2001 From: Sudheesh Mavila Date: Wed, 26 Feb 2020 12:40:45 +0530 Subject: net: phy: corrected the return value for genphy_check_and_restart_aneg and genphy_c45_check_and_restart_aneg When auto-negotiation is not required, return value should be zero. Changes v1->v2: - improved comments and code as Andrew Lunn and Heiner Kallweit suggestion - fixed issue in genphy_c45_check_and_restart_aneg as Russell King suggestion. Fixes: 2a10ab043ac5 ("net: phy: add genphy_check_and_restart_aneg()") Fixes: 1af9f16840e9 ("net: phy: add genphy_c45_check_and_restart_aneg()") Signed-off-by: Sudheesh Mavila Reviewed-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy-c45.c | 6 +++--- drivers/net/phy/phy_device.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index a1caeee12236..dd2e23fb67c0 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -167,7 +167,7 @@ EXPORT_SYMBOL_GPL(genphy_c45_restart_aneg); */ int genphy_c45_check_and_restart_aneg(struct phy_device *phydev, bool restart) { - int ret = 0; + int ret; if (!restart) { /* Configure and restart aneg if it wasn't set before */ @@ -180,9 +180,9 @@ int genphy_c45_check_and_restart_aneg(struct phy_device *phydev, bool restart) } if (restart) - ret = genphy_c45_restart_aneg(phydev); + return genphy_c45_restart_aneg(phydev); - return ret; + return 0; } EXPORT_SYMBOL_GPL(genphy_c45_check_and_restart_aneg); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 6131aca79823..c8b0c34030d3 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1793,7 +1793,7 @@ EXPORT_SYMBOL(genphy_restart_aneg); */ int genphy_check_and_restart_aneg(struct phy_device *phydev, bool restart) { - int ret = 0; + int ret; if (!restart) { /* Advertisement hasn't changed, but maybe aneg was never on to @@ -1808,9 +1808,9 @@ int genphy_check_and_restart_aneg(struct phy_device *phydev, bool restart) } if (restart) - ret = genphy_restart_aneg(phydev); + return genphy_restart_aneg(phydev); - return ret; + return 0; } EXPORT_SYMBOL(genphy_check_and_restart_aneg); -- cgit v1.2.3-59-g8ed1b From dc24f8b4ecd3d6c4153a1ec1bc2006ab32a41b8d Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 26 Feb 2020 12:19:03 +0100 Subject: mptcp: add dummy icsk_sync_mss() syzbot noted that the master MPTCP socket lacks the icsk_sync_mss callback, and was able to trigger a null pointer dereference: BUG: kernel NULL pointer dereference, address: 0000000000000000 PGD 8e171067 P4D 8e171067 PUD 93fa2067 PMD 0 Oops: 0010 [#1] PREEMPT SMP KASAN CPU: 0 PID: 8984 Comm: syz-executor066 Not tainted 5.6.0-rc2-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:0x0 Code: Bad RIP value. RSP: 0018:ffffc900020b7b80 EFLAGS: 00010246 RAX: 1ffff110124ba600 RBX: 0000000000000000 RCX: ffff88809fefa600 RDX: ffff8880994cdb18 RSI: 0000000000000000 RDI: ffff8880925d3140 RBP: ffffc900020b7bd8 R08: ffffffff870225be R09: fffffbfff140652a R10: fffffbfff140652a R11: 0000000000000000 R12: ffff8880925d35d0 R13: ffff8880925d3140 R14: dffffc0000000000 R15: 1ffff110124ba6ba FS: 0000000001a0b880(0000) GS:ffff8880aea00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 00000000a6d6f000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: cipso_v4_sock_setattr+0x34b/0x470 net/ipv4/cipso_ipv4.c:1888 netlbl_sock_setattr+0x2a7/0x310 net/netlabel/netlabel_kapi.c:989 smack_netlabel security/smack/smack_lsm.c:2425 [inline] smack_inode_setsecurity+0x3da/0x4a0 security/smack/smack_lsm.c:2716 security_inode_setsecurity+0xb2/0x140 security/security.c:1364 __vfs_setxattr_noperm+0x16f/0x3e0 fs/xattr.c:197 vfs_setxattr fs/xattr.c:224 [inline] setxattr+0x335/0x430 fs/xattr.c:451 __do_sys_fsetxattr fs/xattr.c:506 [inline] __se_sys_fsetxattr+0x130/0x1b0 fs/xattr.c:495 __x64_sys_fsetxattr+0xbf/0xd0 fs/xattr.c:495 do_syscall_64+0xf7/0x1c0 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x440199 Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007ffcadc19e48 EFLAGS: 00000246 ORIG_RAX: 00000000000000be RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000440199 RDX: 0000000020000200 RSI: 00000000200001c0 RDI: 0000000000000003 RBP: 00000000006ca018 R08: 0000000000000003 R09: 00000000004002c8 R10: 0000000000000009 R11: 0000000000000246 R12: 0000000000401a20 R13: 0000000000401ab0 R14: 0000000000000000 R15: 0000000000000000 Modules linked in: CR2: 0000000000000000 Address the issue adding a dummy icsk_sync_mss callback. To properly sync the subflows mss and options list we need some additional infrastructure, which will land to net-next. Reported-by: syzbot+f4dfece964792d80b139@syzkaller.appspotmail.com Fixes: 2303f994b3e1 ("mptcp: Associate MPTCP context with TCP socket") Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e9aa6807b5be..3c19a8efdcea 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -543,6 +543,11 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, } } +static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu) +{ + return 0; +} + static int __mptcp_init_sock(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -551,6 +556,7 @@ static int __mptcp_init_sock(struct sock *sk) __set_bit(MPTCP_SEND_SPACE, &msk->flags); msk->first = NULL; + inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; return 0; } -- cgit v1.2.3-59-g8ed1b From c87a9d6fc6d555e4981f2ded77d9a8cce950743e Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 26 Feb 2020 16:26:50 +0100 Subject: net: phy: mscc: fix firmware paths The firmware paths for the VSC8584 PHYs not not contain the leading 'microchip/' directory, as used in linux-firmware, resulting in an error when probing the driver. This patch fixes it. Fixes: a5afc1678044 ("net: phy: mscc: add support for VSC8584 PHY") Signed-off-by: Antoine Tenart Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/mscc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c index 937ac7da2789..f686f40f6bdc 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc.c @@ -345,11 +345,11 @@ enum macsec_bank { BIT(VSC8531_FORCE_LED_OFF) | \ BIT(VSC8531_FORCE_LED_ON)) -#define MSCC_VSC8584_REVB_INT8051_FW "mscc_vsc8584_revb_int8051_fb48.bin" +#define MSCC_VSC8584_REVB_INT8051_FW "microchip/mscc_vsc8584_revb_int8051_fb48.bin" #define MSCC_VSC8584_REVB_INT8051_FW_START_ADDR 0xe800 #define MSCC_VSC8584_REVB_INT8051_FW_CRC 0xfb48 -#define MSCC_VSC8574_REVB_INT8051_FW "mscc_vsc8574_revb_int8051_29e8.bin" +#define MSCC_VSC8574_REVB_INT8051_FW "microchip/mscc_vsc8574_revb_int8051_29e8.bin" #define MSCC_VSC8574_REVB_INT8051_FW_START_ADDR 0x4000 #define MSCC_VSC8574_REVB_INT8051_FW_CRC 0x29e8 -- cgit v1.2.3-59-g8ed1b From 474a31e13a4e9749fb3ee55794d69d0f17ee0998 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 26 Feb 2020 18:49:01 +0200 Subject: net: stmmac: fix notifier registration We cannot register the same netdev notifier multiple times when probing stmmac devices. Register the notifier only once in module init, and also make debugfs creation/deletion safe against simultaneous notifier call. Fixes: 481a7d154cbb ("stmmac: debugfs entry name is not be changed when udev rename device name.") Signed-off-by: Aaro Koskinen Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5836b21edd7e..7da18c9afa01 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4405,6 +4405,8 @@ static void stmmac_init_fs(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); + rtnl_lock(); + /* Create per netdev entries */ priv->dbgfs_dir = debugfs_create_dir(dev->name, stmmac_fs_dir); @@ -4416,14 +4418,13 @@ static void stmmac_init_fs(struct net_device *dev) debugfs_create_file("dma_cap", 0444, priv->dbgfs_dir, dev, &stmmac_dma_cap_fops); - register_netdevice_notifier(&stmmac_notifier); + rtnl_unlock(); } static void stmmac_exit_fs(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); - unregister_netdevice_notifier(&stmmac_notifier); debugfs_remove_recursive(priv->dbgfs_dir); } #endif /* CONFIG_DEBUG_FS */ @@ -4940,14 +4941,14 @@ int stmmac_dvr_remove(struct device *dev) netdev_info(priv->dev, "%s: removing driver", __func__); -#ifdef CONFIG_DEBUG_FS - stmmac_exit_fs(ndev); -#endif stmmac_stop_all_dma(priv); stmmac_mac_set(priv, priv->ioaddr, false); netif_carrier_off(ndev); unregister_netdev(ndev); +#ifdef CONFIG_DEBUG_FS + stmmac_exit_fs(ndev); +#endif phylink_destroy(priv->phylink); if (priv->plat->stmmac_rst) reset_control_assert(priv->plat->stmmac_rst); @@ -5166,6 +5167,7 @@ static int __init stmmac_init(void) /* Create debugfs main directory if it doesn't exist yet */ if (!stmmac_fs_dir) stmmac_fs_dir = debugfs_create_dir(STMMAC_RESOURCE_NAME, NULL); + register_netdevice_notifier(&stmmac_notifier); #endif return 0; @@ -5174,6 +5176,7 @@ static int __init stmmac_init(void) static void __exit stmmac_exit(void) { #ifdef CONFIG_DEBUG_FS + unregister_netdevice_notifier(&stmmac_notifier); debugfs_remove_recursive(stmmac_fs_dir); #endif } -- cgit v1.2.3-59-g8ed1b From a2f2ef4a54c0d97aa6a8386f4ff23f36ebb488cf Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 26 Feb 2020 17:52:46 +0100 Subject: net/smc: check for valid ib_client_data In smc_ib_remove_dev() check if the provided ib device was actually initialized for SMC before. Reported-by: syzbot+84484ccebdd4e5451d91@syzkaller.appspotmail.com Fixes: a4cf0443c414 ("smc: introduce SMC as an IB-client") Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_ib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 548632621f4b..d6ba186f67e2 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -573,6 +573,8 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) struct smc_ib_device *smcibdev; smcibdev = ib_get_client_data(ibdev, &smc_ib_client); + if (!smcibdev || smcibdev->ibdev != ibdev) + return; ib_set_client_data(ibdev, &smc_ib_client, NULL); spin_lock(&smc_ib_devices.lock); list_del_init(&smcibdev->list); /* remove from smc_ib_devices */ -- cgit v1.2.3-59-g8ed1b From f5739cb0b56590d68d8df8a44659893b6d0084c3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 26 Feb 2020 22:39:27 +0100 Subject: cpufreq: Fix policy initialization for internal governor drivers Before commit 1e4f63aecb53 ("cpufreq: Avoid creating excessively large stack frames") the initial value of the policy field in struct cpufreq_policy set by the driver's ->init() callback was implicitly passed from cpufreq_init_policy() to cpufreq_set_policy() if the default governor was neither "performance" nor "powersave". After that commit, however, cpufreq_init_policy() must take that case into consideration explicitly and handle it as appropriate, so make that happen. Fixes: 1e4f63aecb53 ("cpufreq: Avoid creating excessively large stack frames") Link: https://lore.kernel.org/linux-pm/39fb762880c27da110086741315ca8b111d781cd.camel@gmail.com/ Reported-by: Artem Bityutskiy Cc: 5.4+ # 5.4+ Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index cbe6c94bf158..808874bccf4a 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1076,9 +1076,17 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy) pol = policy->last_policy; } else if (def_gov) { pol = cpufreq_parse_policy(def_gov->name); - } else { - return -ENODATA; + /* + * In case the default governor is neiter "performance" + * nor "powersave", fall back to the initial policy + * value set by the driver. + */ + if (pol == CPUFREQ_POLICY_UNKNOWN) + pol = policy->policy; } + if (pol != CPUFREQ_POLICY_PERFORMANCE && + pol != CPUFREQ_POLICY_POWERSAVE) + return -ENODATA; } return cpufreq_set_policy(policy, gov, pol); -- cgit v1.2.3-59-g8ed1b From bebdb65e077267957f48e43d205d4a16cc7b8161 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Wed, 26 Feb 2020 18:38:32 +0100 Subject: io_uring: define and set show_fdinfo only if procfs is enabled Follow the pattern used with other *_show_fdinfo functions and only define and use io_uring_show_fdinfo and its helper functions if CONFIG_PROC_FS is set. Fixes: 87ce955b24c9 ("io_uring: add ->show_fdinfo() for the io_uring file descriptor") Signed-off-by: Tobias Klauser Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index e412a1761d93..05eea06f5421 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6641,6 +6641,7 @@ out_fput: return submitted ? submitted : ret; } +#ifdef CONFIG_PROC_FS static int io_uring_show_cred(int id, void *p, void *data) { const struct cred *cred = p; @@ -6714,6 +6715,7 @@ static void io_uring_show_fdinfo(struct seq_file *m, struct file *f) percpu_ref_put(&ctx->refs); } } +#endif static const struct file_operations io_uring_fops = { .release = io_uring_release, @@ -6725,7 +6727,9 @@ static const struct file_operations io_uring_fops = { #endif .poll = io_uring_poll, .fasync = io_uring_fasync, +#ifdef CONFIG_PROC_FS .show_fdinfo = io_uring_show_fdinfo, +#endif }; static int io_allocate_scq_urings(struct io_ring_ctx *ctx, -- cgit v1.2.3-59-g8ed1b From 73a7a271b3eee7b83f29b13866163776f1cbef89 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 27 Feb 2020 12:51:46 +0100 Subject: PCI: brcmstb: Fix build on 32bit ARM platforms with older compilers Some older compilers have no implementation for the helper for 64-bit unsigned division/modulo, so linking pcie-brcmstb driver causes the "undefined reference to `__aeabi_uldivmod'" error. *rc_bar2_size is always a power of two, because it is calculated as: "1ULL << fls64(entry->res->end - entry->res->start)", so the modulo operation in the subsequent check can be replaced by a simple logical AND with a proper mask. Link: https://lore.kernel.org/r/20200227115146.24515-1-m.szyprowski@samsung.com Fixes: c0452137034b ("PCI: brcmstb: Add Broadcom STB PCIe host controller driver") Signed-off-by: Marek Szyprowski Signed-off-by: Bjorn Helgaas Acked-by: Nicolas Saenz Julienne Acked-by: Lorenzo Pieralisi --- drivers/pci/controller/pcie-brcmstb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c index d20aabc26273..3a10e678c7f4 100644 --- a/drivers/pci/controller/pcie-brcmstb.c +++ b/drivers/pci/controller/pcie-brcmstb.c @@ -670,7 +670,7 @@ static inline int brcm_pcie_get_rc_bar2_size_and_offset(struct brcm_pcie *pcie, * outbound memory @ 3GB). So instead it will start at the 1x * multiple of its size */ - if (!*rc_bar2_size || *rc_bar2_offset % *rc_bar2_size || + if (!*rc_bar2_size || (*rc_bar2_offset & (*rc_bar2_size - 1)) || (*rc_bar2_offset < SZ_4G && *rc_bar2_offset > SZ_2G)) { dev_err(dev, "Invalid rc_bar2_offset/size: size 0x%llx, off 0x%llx\n", *rc_bar2_size, *rc_bar2_offset); -- cgit v1.2.3-59-g8ed1b From 9515743bfb39c61aaf3d4f3219a645c8d1fe9a0e Mon Sep 17 00:00:00 2001 From: Bijan Mottahedeh Date: Wed, 26 Feb 2020 18:53:43 -0800 Subject: nvme-pci: Hold cq_poll_lock while completing CQEs Completions need to consumed in the same order the controller submitted them, otherwise future completion entries may overwrite ones we haven't handled yet. Hold the nvme queue's poll lock while completing new CQEs to prevent another thread from freeing command tags for reuse out-of-order. Fixes: dabcefab45d3 ("nvme: provide optimized poll function for separate poll queues") Signed-off-by: Bijan Mottahedeh Reviewed-by: Sagi Grimberg Reviewed-by: Jens Axboe Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index ace4dd9e953c..d3f23d6254e4 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1078,9 +1078,9 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx) spin_lock(&nvmeq->cq_poll_lock); found = nvme_process_cq(nvmeq, &start, &end, -1); + nvme_complete_cqes(nvmeq, start, end); spin_unlock(&nvmeq->cq_poll_lock); - nvme_complete_cqes(nvmeq, start, end); return found; } -- cgit v1.2.3-59-g8ed1b From 0bff777bd0cba73ad4cd0145696ad284d7e6a99f Mon Sep 17 00:00:00 2001 From: Luo bin Date: Thu, 27 Feb 2020 06:34:42 +0000 Subject: hinic: fix a irq affinity bug can not use a local variable as an input parameter of irq_set_affinity_hint Signed-off-by: Luo bin Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h | 1 + drivers/net/ethernet/huawei/hinic/hinic_rx.c | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h index f4a339b10b10..79091e131418 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h @@ -94,6 +94,7 @@ struct hinic_rq { struct hinic_wq *wq; + struct cpumask affinity_mask; u32 irq; u16 msix_entry; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c index 56ea6d692f1c..2695ad69fca6 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c @@ -475,7 +475,6 @@ static int rx_request_irq(struct hinic_rxq *rxq) struct hinic_hwdev *hwdev = nic_dev->hwdev; struct hinic_rq *rq = rxq->rq; struct hinic_qp *qp; - struct cpumask mask; int err; rx_add_napi(rxq); @@ -492,8 +491,8 @@ static int rx_request_irq(struct hinic_rxq *rxq) } qp = container_of(rq, struct hinic_qp, rq); - cpumask_set_cpu(qp->q_id % num_online_cpus(), &mask); - return irq_set_affinity_hint(rq->irq, &mask); + cpumask_set_cpu(qp->q_id % num_online_cpus(), &rq->affinity_mask); + return irq_set_affinity_hint(rq->irq, &rq->affinity_mask); } static void rx_free_irq(struct hinic_rxq *rxq) -- cgit v1.2.3-59-g8ed1b From d2ed69ce9ed3477e2a9527e6b89fe4689d99510e Mon Sep 17 00:00:00 2001 From: Luo bin Date: Thu, 27 Feb 2020 06:34:43 +0000 Subject: hinic: fix a bug of setting hw_ioctxt a reserved field is used to signify prime physical function index in the latest firmware version, so we must assign a value to it correctly Signed-off-by: Luo bin Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c | 1 + drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h | 2 +- drivers/net/ethernet/huawei/hinic/hinic_hw_if.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c index 6f2cf569a283..79b3d53f2fbf 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c @@ -297,6 +297,7 @@ static int set_hw_ioctxt(struct hinic_hwdev *hwdev, unsigned int rq_depth, } hw_ioctxt.func_idx = HINIC_HWIF_FUNC_IDX(hwif); + hw_ioctxt.ppf_idx = HINIC_HWIF_PPF_IDX(hwif); hw_ioctxt.set_cmdq_depth = HW_IOCTXT_SET_CMDQ_DEPTH_DEFAULT; hw_ioctxt.cmdq_depth = 0; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h index b069045de416..66fd2340d447 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h @@ -151,8 +151,8 @@ struct hinic_cmd_hw_ioctxt { u8 lro_en; u8 rsvd3; + u8 ppf_idx; u8 rsvd4; - u8 rsvd5; u16 rq_depth; u16 rx_buf_sz_idx; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h index 517794509eb2..c7bb9ceca72c 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h @@ -137,6 +137,7 @@ #define HINIC_HWIF_FUNC_IDX(hwif) ((hwif)->attr.func_idx) #define HINIC_HWIF_PCI_INTF(hwif) ((hwif)->attr.pci_intf_idx) #define HINIC_HWIF_PF_IDX(hwif) ((hwif)->attr.pf_idx) +#define HINIC_HWIF_PPF_IDX(hwif) ((hwif)->attr.ppf_idx) #define HINIC_FUNC_TYPE(hwif) ((hwif)->attr.func_type) #define HINIC_IS_PF(hwif) (HINIC_FUNC_TYPE(hwif) == HINIC_PF) -- cgit v1.2.3-59-g8ed1b From 386d4716fd91869e07c731657f2cde5a33086516 Mon Sep 17 00:00:00 2001 From: Luo bin Date: Thu, 27 Feb 2020 06:34:44 +0000 Subject: hinic: fix a bug of rss configuration should use real receive queue number to configure hw rss indirect table rather than maximal queue number Signed-off-by: Luo bin Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index 02a14f5e7fe3..13560975c103 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -356,7 +356,8 @@ static void hinic_enable_rss(struct hinic_dev *nic_dev) if (!num_cpus) num_cpus = num_online_cpus(); - nic_dev->num_qps = min_t(u16, nic_dev->max_qps, num_cpus); + nic_dev->num_qps = hinic_hwdev_num_qps(hwdev); + nic_dev->num_qps = min_t(u16, nic_dev->num_qps, num_cpus); nic_dev->rss_limit = nic_dev->num_qps; nic_dev->num_rss = nic_dev->num_qps; -- cgit v1.2.3-59-g8ed1b From 3a12500ed5dd21a63da779ac73503f11085bbc1c Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Wed, 26 Feb 2020 18:29:53 +0100 Subject: unix: define and set show_fdinfo only if procfs is enabled Follow the pattern used with other *_show_fdinfo functions and only define unix_show_fdinfo and set it in proto_ops if CONFIG_PROCFS is set. Fixes: 3c32da19a858 ("unix: Show number of pending scm files of receive queue in fdinfo") Signed-off-by: Tobias Klauser Reviewed-by: Kirill Tkhai Signed-off-by: David S. Miller --- net/unix/af_unix.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 62c12cb5763e..aa6e2530e1ec 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -682,6 +682,7 @@ static int unix_set_peek_off(struct sock *sk, int val) return 0; } +#ifdef CONFIG_PROCFS static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) { struct sock *sk = sock->sk; @@ -692,6 +693,9 @@ static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) seq_printf(m, "scm_fds: %u\n", READ_ONCE(u->scm_stat.nr_fds)); } } +#else +#define unix_show_fdinfo NULL +#endif static const struct proto_ops unix_stream_ops = { .family = PF_UNIX, -- cgit v1.2.3-59-g8ed1b From e387f7d5fccf95299135c88b799184c3bef6a705 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 27 Feb 2020 08:22:10 +0100 Subject: mlx5: register lag notifier for init network namespace only The current code causes problems when the unregistering netdevice could be different then the registering one. Since the check in mlx5_lag_netdev_event() does not allow any other network namespace anyway, fix this by registerting the lag notifier per init network namespace only. Fixes: d48834f9d4b4 ("mlx5: Use dev_net netdevice notifier registrations") Signed-off-by: Jiri Pirko Tested-by: Aya Levin Acked-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 11 +++-------- drivers/net/ethernet/mellanox/mlx5/core/lag.h | 1 - drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 +- 5 files changed, 6 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 966983674663..21de4764d4c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5147,7 +5147,6 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) static void mlx5e_nic_disable(struct mlx5e_priv *priv) { - struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; #ifdef CONFIG_MLX5_CORE_EN_DCB @@ -5168,7 +5167,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) mlx5e_monitor_counter_cleanup(priv); mlx5e_disable_async_events(priv); - mlx5_lag_remove(mdev, netdev); + mlx5_lag_remove(mdev); } int mlx5e_update_nic_rx(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 7b48ccacebe2..6ed307d7f191 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1861,7 +1861,6 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) { - struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -1870,7 +1869,7 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) #endif mlx5_notifier_unregister(mdev, &priv->events_nb); cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work); - mlx5_lag_remove(mdev, netdev); + mlx5_lag_remove(mdev); } static MLX5E_DEFINE_STATS_GRP(sw_rep, 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index b91eabc09fbc..8e19f6ab8393 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -464,9 +464,6 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, struct mlx5_lag *ldev; int changed = 0; - if (!net_eq(dev_net(ndev), &init_net)) - return NOTIFY_DONE; - if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE)) return NOTIFY_DONE; @@ -586,8 +583,7 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) if (!ldev->nb.notifier_call) { ldev->nb.notifier_call = mlx5_lag_netdev_event; - if (register_netdevice_notifier_dev_net(netdev, &ldev->nb, - &ldev->nn)) { + if (register_netdevice_notifier_net(&init_net, &ldev->nb)) { ldev->nb.notifier_call = NULL; mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); } @@ -600,7 +596,7 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) } /* Must be called with intf_mutex held */ -void mlx5_lag_remove(struct mlx5_core_dev *dev, struct net_device *netdev) +void mlx5_lag_remove(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; int i; @@ -620,8 +616,7 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev, struct net_device *netdev) if (i == MLX5_MAX_PORTS) { if (ldev->nb.notifier_call) - unregister_netdevice_notifier_dev_net(netdev, &ldev->nb, - &ldev->nn); + unregister_netdevice_notifier_net(&init_net, &ldev->nb); mlx5_lag_mp_cleanup(ldev); cancel_delayed_work_sync(&ldev->bond_work); mlx5_lag_dev_free(ldev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h index 316ab09e2664..f1068aac6406 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h @@ -44,7 +44,6 @@ struct mlx5_lag { struct workqueue_struct *wq; struct delayed_work bond_work; struct notifier_block nb; - struct netdev_net_notifier nn; struct lag_mp lag_mp; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index fcce9e0fc82c..da67b28d6e23 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -157,7 +157,7 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, u8 feature_group, u8 access_reg_group); void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); -void mlx5_lag_remove(struct mlx5_core_dev *dev, struct net_device *netdev); +void mlx5_lag_remove(struct mlx5_core_dev *dev); int mlx5_irq_table_init(struct mlx5_core_dev *dev); void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); -- cgit v1.2.3-59-g8ed1b From b82cf17ff1957ec35eaee7dc519c365ecd06ba38 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 27 Feb 2020 09:44:49 +0000 Subject: net: phy: marvell: don't interpret PHY status unless resolved Don't attempt to interpret the PHY specific status register unless the PHY is indicating that the resolution is valid. Reviewed-by: Andrew Lunn Signed-off-by: Russell King Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/marvell.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 28e33ece4ce1..9a8badafea8a 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1306,6 +1306,9 @@ static int marvell_read_status_page_an(struct phy_device *phydev, } } + if (!(status & MII_M1011_PHY_STATUS_RESOLVED)) + return 0; + if (status & MII_M1011_PHY_STATUS_FULLDUPLEX) phydev->duplex = DUPLEX_FULL; else @@ -1365,6 +1368,8 @@ static int marvell_read_status_page(struct phy_device *phydev, int page) linkmode_zero(phydev->lp_advertising); phydev->pause = 0; phydev->asym_pause = 0; + phydev->speed = SPEED_UNKNOWN; + phydev->duplex = DUPLEX_UNKNOWN; if (phydev->autoneg == AUTONEG_ENABLE) err = marvell_read_status_page_an(phydev, fiber, status); -- cgit v1.2.3-59-g8ed1b From 93b5cbfa9636d385126f211dca9efa7e3f683202 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 27 Feb 2020 12:23:52 +0000 Subject: net: rmnet: fix NULL pointer dereference in rmnet_newlink() rmnet registers IFLA_LINK interface as a lower interface. But, IFLA_LINK could be NULL. In the current code, rmnet doesn't check IFLA_LINK. So, panic would occur. Test commands: modprobe rmnet ip link add rmnet0 type rmnet mux_id 1 Splat looks like: [ 36.826109][ T1115] general protection fault, probably for non-canonical address 0xdffffc0000000000I [ 36.838817][ T1115] KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] [ 36.839908][ T1115] CPU: 1 PID: 1115 Comm: ip Not tainted 5.6.0-rc1+ #447 [ 36.840569][ T1115] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 36.841408][ T1115] RIP: 0010:rmnet_newlink+0x54/0x510 [rmnet] [ 36.841986][ T1115] Code: 83 ec 18 48 c1 e9 03 80 3c 01 00 0f 85 d4 03 00 00 48 8b 6a 28 48 b8 00 00 00 00 00 c [ 36.843923][ T1115] RSP: 0018:ffff8880b7e0f1c0 EFLAGS: 00010247 [ 36.844756][ T1115] RAX: dffffc0000000000 RBX: ffff8880d14cca00 RCX: 1ffff11016fc1e99 [ 36.845859][ T1115] RDX: 0000000000000000 RSI: ffff8880c3d04000 RDI: 0000000000000004 [ 36.846961][ T1115] RBP: 0000000000000000 R08: ffff8880b7e0f8b0 R09: ffff8880b6ac2d90 [ 36.848020][ T1115] R10: ffffffffc0589a40 R11: ffffed1016d585b7 R12: ffffffff88ceaf80 [ 36.848788][ T1115] R13: ffff8880c3d04000 R14: ffff8880b7e0f8b0 R15: ffff8880c3d04000 [ 36.849546][ T1115] FS: 00007f50ab3360c0(0000) GS:ffff8880da000000(0000) knlGS:0000000000000000 [ 36.851784][ T1115] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 36.852422][ T1115] CR2: 000055871afe5ab0 CR3: 00000000ae246001 CR4: 00000000000606e0 [ 36.853181][ T1115] Call Trace: [ 36.853514][ T1115] __rtnl_newlink+0xbdb/0x1270 [ 36.853967][ T1115] ? lock_downgrade+0x6e0/0x6e0 [ 36.854420][ T1115] ? rtnl_link_unregister+0x220/0x220 [ 36.854936][ T1115] ? lock_acquire+0x164/0x3b0 [ 36.855376][ T1115] ? is_bpf_image_address+0xff/0x1d0 [ 36.855884][ T1115] ? rtnl_newlink+0x4c/0x90 [ 36.856304][ T1115] ? kernel_text_address+0x111/0x140 [ 36.856857][ T1115] ? __kernel_text_address+0xe/0x30 [ 36.857440][ T1115] ? unwind_get_return_address+0x5f/0xa0 [ 36.858063][ T1115] ? create_prof_cpu_mask+0x20/0x20 [ 36.858644][ T1115] ? arch_stack_walk+0x83/0xb0 [ 36.859171][ T1115] ? stack_trace_save+0x82/0xb0 [ 36.859710][ T1115] ? stack_trace_consume_entry+0x160/0x160 [ 36.860357][ T1115] ? deactivate_slab.isra.78+0x2c5/0x800 [ 36.860928][ T1115] ? kasan_unpoison_shadow+0x30/0x40 [ 36.861520][ T1115] ? kmem_cache_alloc_trace+0x135/0x350 [ 36.862125][ T1115] ? rtnl_newlink+0x4c/0x90 [ 36.864073][ T1115] rtnl_newlink+0x65/0x90 [ ... ] Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 06de59521fc4..471e3b2a1403 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -135,6 +135,11 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, int err = 0; u16 mux_id; + if (!tb[IFLA_LINK]) { + NL_SET_ERR_MSG_MOD(extack, "link not specified"); + return -EINVAL; + } + real_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev || !dev) return -ENODEV; -- cgit v1.2.3-59-g8ed1b From 1eb1f43a6e37282348a41e3d68f5e9a6a4359212 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 27 Feb 2020 12:24:26 +0000 Subject: net: rmnet: fix NULL pointer dereference in rmnet_changelink() In the rmnet_changelink(), it uses IFLA_LINK without checking NULL pointer. tb[IFLA_LINK] could be NULL pointer. So, NULL-ptr-deref could occur. rmnet already has a lower interface (real_dev). So, after this patch, rmnet_changelink() does not use IFLA_LINK anymore. Test commands: modprobe rmnet ip link add dummy0 type dummy ip link add rmnet0 link dummy0 type rmnet mux_id 1 ip link set rmnet0 type rmnet mux_id 2 Splat looks like: [ 90.578726][ T1131] general protection fault, probably for non-canonical address 0xdffffc0000000000I [ 90.581121][ T1131] KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] [ 90.582380][ T1131] CPU: 2 PID: 1131 Comm: ip Not tainted 5.6.0-rc1+ #447 [ 90.584285][ T1131] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 90.587506][ T1131] RIP: 0010:rmnet_changelink+0x5a/0x8a0 [rmnet] [ 90.588546][ T1131] Code: 83 ec 20 48 c1 ea 03 80 3c 02 00 0f 85 6f 07 00 00 48 8b 5e 28 48 b8 00 00 00 00 00 0 [ 90.591447][ T1131] RSP: 0018:ffff8880ce78f1b8 EFLAGS: 00010247 [ 90.592329][ T1131] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffff8880ce78f8b0 [ 90.593253][ T1131] RDX: 0000000000000000 RSI: ffff8880ce78f4a0 RDI: 0000000000000004 [ 90.594058][ T1131] RBP: ffff8880cf543e00 R08: 0000000000000002 R09: 0000000000000002 [ 90.594859][ T1131] R10: ffffffffc0586a40 R11: 0000000000000000 R12: ffff8880ca47c000 [ 90.595690][ T1131] R13: ffff8880ca47c000 R14: ffff8880cf545000 R15: 0000000000000000 [ 90.596553][ T1131] FS: 00007f21f6c7e0c0(0000) GS:ffff8880da400000(0000) knlGS:0000000000000000 [ 90.597504][ T1131] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 90.599418][ T1131] CR2: 0000556e413db458 CR3: 00000000c917a002 CR4: 00000000000606e0 [ 90.600289][ T1131] Call Trace: [ 90.600631][ T1131] __rtnl_newlink+0x922/0x1270 [ 90.601194][ T1131] ? lock_downgrade+0x6e0/0x6e0 [ 90.601724][ T1131] ? rtnl_link_unregister+0x220/0x220 [ 90.602309][ T1131] ? lock_acquire+0x164/0x3b0 [ 90.602784][ T1131] ? is_bpf_image_address+0xff/0x1d0 [ 90.603331][ T1131] ? rtnl_newlink+0x4c/0x90 [ 90.603810][ T1131] ? kernel_text_address+0x111/0x140 [ 90.604419][ T1131] ? __kernel_text_address+0xe/0x30 [ 90.604981][ T1131] ? unwind_get_return_address+0x5f/0xa0 [ 90.605616][ T1131] ? create_prof_cpu_mask+0x20/0x20 [ 90.606304][ T1131] ? arch_stack_walk+0x83/0xb0 [ 90.606985][ T1131] ? stack_trace_save+0x82/0xb0 [ 90.607656][ T1131] ? stack_trace_consume_entry+0x160/0x160 [ 90.608503][ T1131] ? deactivate_slab.isra.78+0x2c5/0x800 [ 90.609336][ T1131] ? kasan_unpoison_shadow+0x30/0x40 [ 90.610096][ T1131] ? kmem_cache_alloc_trace+0x135/0x350 [ 90.610889][ T1131] ? rtnl_newlink+0x4c/0x90 [ 90.611512][ T1131] rtnl_newlink+0x65/0x90 [ ... ] Fixes: 23790ef12082 ("net: qualcomm: rmnet: Allow to configure flags for existing devices") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 471e3b2a1403..ac58f584190b 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -300,10 +300,8 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[], if (!dev) return -ENODEV; - real_dev = __dev_get_by_index(dev_net(dev), - nla_get_u32(tb[IFLA_LINK])); - - if (!real_dev || !rmnet_is_real_dev_registered(real_dev)) + real_dev = priv->real_dev; + if (!rmnet_is_real_dev_registered(real_dev)) return -ENODEV; port = rmnet_get_port_rtnl(real_dev); -- cgit v1.2.3-59-g8ed1b From 102210f7664442d8c0ce332c006ea90626df745b Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 27 Feb 2020 12:24:45 +0000 Subject: net: rmnet: fix suspicious RCU usage rmnet_get_port() internally calls rcu_dereference_rtnl(), which checks RTNL. But rmnet_get_port() could be called by packet path. The packet path is not protected by RTNL. So, the suspicious RCU usage problem occurs. Test commands: modprobe rmnet ip netns add nst ip link add veth0 type veth peer name veth1 ip link set veth1 netns nst ip link add rmnet0 link veth0 type rmnet mux_id 1 ip netns exec nst ip link add rmnet1 link veth1 type rmnet mux_id 1 ip netns exec nst ip link set veth1 up ip netns exec nst ip link set rmnet1 up ip netns exec nst ip a a 192.168.100.2/24 dev rmnet1 ip link set veth0 up ip link set rmnet0 up ip a a 192.168.100.1/24 dev rmnet0 ping 192.168.100.2 Splat looks like: [ 146.630958][ T1174] WARNING: suspicious RCU usage [ 146.631735][ T1174] 5.6.0-rc1+ #447 Not tainted [ 146.632387][ T1174] ----------------------------- [ 146.633151][ T1174] drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c:386 suspicious rcu_dereference_check() ! [ 146.634742][ T1174] [ 146.634742][ T1174] other info that might help us debug this: [ 146.634742][ T1174] [ 146.645992][ T1174] [ 146.645992][ T1174] rcu_scheduler_active = 2, debug_locks = 1 [ 146.646937][ T1174] 5 locks held by ping/1174: [ 146.647609][ T1174] #0: ffff8880c31dea70 (sk_lock-AF_INET){+.+.}, at: raw_sendmsg+0xab8/0x2980 [ 146.662463][ T1174] #1: ffffffff93925660 (rcu_read_lock_bh){....}, at: ip_finish_output2+0x243/0x2150 [ 146.671696][ T1174] #2: ffffffff93925660 (rcu_read_lock_bh){....}, at: __dev_queue_xmit+0x213/0x2940 [ 146.673064][ T1174] #3: ffff8880c19ecd58 (&dev->qdisc_running_key#7){+...}, at: ip_finish_output2+0x714/0x2150 [ 146.690358][ T1174] #4: ffff8880c5796898 (&dev->qdisc_xmit_lock_key#3){+.-.}, at: sch_direct_xmit+0x1e2/0x1020 [ 146.699875][ T1174] [ 146.699875][ T1174] stack backtrace: [ 146.701091][ T1174] CPU: 0 PID: 1174 Comm: ping Not tainted 5.6.0-rc1+ #447 [ 146.705215][ T1174] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 146.706565][ T1174] Call Trace: [ 146.707102][ T1174] dump_stack+0x96/0xdb [ 146.708007][ T1174] rmnet_get_port.part.9+0x76/0x80 [rmnet] [ 146.709233][ T1174] rmnet_egress_handler+0x107/0x420 [rmnet] [ 146.710492][ T1174] ? sch_direct_xmit+0x1e2/0x1020 [ 146.716193][ T1174] rmnet_vnd_start_xmit+0x3d/0xa0 [rmnet] [ 146.717012][ T1174] dev_hard_start_xmit+0x160/0x740 [ 146.717854][ T1174] sch_direct_xmit+0x265/0x1020 [ 146.718577][ T1174] ? register_lock_class+0x14d0/0x14d0 [ 146.719429][ T1174] ? dev_watchdog+0xac0/0xac0 [ 146.723738][ T1174] ? __dev_queue_xmit+0x15fd/0x2940 [ 146.724469][ T1174] ? lock_acquire+0x164/0x3b0 [ 146.725172][ T1174] __dev_queue_xmit+0x20c7/0x2940 [ ... ] Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 13 ++++++------- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h | 2 +- drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 4 ++-- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index ac58f584190b..fc68ecdd804b 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -382,11 +382,10 @@ struct rtnl_link_ops rmnet_link_ops __read_mostly = { .fill_info = rmnet_fill_info, }; -/* Needs either rcu_read_lock() or rtnl lock */ -struct rmnet_port *rmnet_get_port(struct net_device *real_dev) +struct rmnet_port *rmnet_get_port_rcu(struct net_device *real_dev) { if (rmnet_is_real_dev_registered(real_dev)) - return rcu_dereference_rtnl(real_dev->rx_handler_data); + return rcu_dereference_bh(real_dev->rx_handler_data); else return NULL; } @@ -412,7 +411,7 @@ int rmnet_add_bridge(struct net_device *rmnet_dev, struct rmnet_port *port, *slave_port; int err; - port = rmnet_get_port(real_dev); + port = rmnet_get_port_rtnl(real_dev); /* If there is more than one rmnet dev attached, its probably being * used for muxing. Skip the briding in that case @@ -427,7 +426,7 @@ int rmnet_add_bridge(struct net_device *rmnet_dev, if (err) return -EBUSY; - slave_port = rmnet_get_port(slave_dev); + slave_port = rmnet_get_port_rtnl(slave_dev); slave_port->rmnet_mode = RMNET_EPMODE_BRIDGE; slave_port->bridge_ep = real_dev; @@ -445,11 +444,11 @@ int rmnet_del_bridge(struct net_device *rmnet_dev, struct net_device *real_dev = priv->real_dev; struct rmnet_port *port, *slave_port; - port = rmnet_get_port(real_dev); + port = rmnet_get_port_rtnl(real_dev); port->rmnet_mode = RMNET_EPMODE_VND; port->bridge_ep = NULL; - slave_port = rmnet_get_port(slave_dev); + slave_port = rmnet_get_port_rtnl(slave_dev); rmnet_unregister_real_device(slave_dev, slave_port); netdev_dbg(slave_dev, "removed from rmnet as slave\n"); diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h index cd0a6bcbe74a..0d568dcfd65a 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h @@ -65,7 +65,7 @@ struct rmnet_priv { struct rmnet_priv_stats stats; }; -struct rmnet_port *rmnet_get_port(struct net_device *real_dev); +struct rmnet_port *rmnet_get_port_rcu(struct net_device *real_dev); struct rmnet_endpoint *rmnet_get_endpoint(struct rmnet_port *port, u8 mux_id); int rmnet_add_bridge(struct net_device *rmnet_dev, struct net_device *slave_dev, diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index 1b74bc160402..074a8b326c30 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -184,7 +184,7 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb) return RX_HANDLER_PASS; dev = skb->dev; - port = rmnet_get_port(dev); + port = rmnet_get_port_rcu(dev); switch (port->rmnet_mode) { case RMNET_EPMODE_VND: @@ -217,7 +217,7 @@ void rmnet_egress_handler(struct sk_buff *skb) skb->dev = priv->real_dev; mux_id = priv->mux_id; - port = rmnet_get_port(skb->dev); + port = rmnet_get_port_rcu(skb->dev); if (!port) goto drop; -- cgit v1.2.3-59-g8ed1b From c026d970102e9af9958edefb4a015702c6aab636 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 27 Feb 2020 12:25:05 +0000 Subject: net: rmnet: remove rcu_read_lock in rmnet_force_unassociate_device() The notifier_call() of the slave interface removes rmnet interface with unregister_netdevice_queue(). But, before calling unregister_netdevice_queue(), it acquires rcu readlock. In the RCU critical section, sleeping isn't be allowed. But, unregister_netdevice_queue() internally calls synchronize_net(), which would sleep. So, suspicious RCU usage warning occurs. Test commands: modprobe rmnet ip link add dummy0 type dummy ip link add dummy1 type dummy ip link add rmnet0 link dummy0 type rmnet mux_id 1 ip link set dummy1 master rmnet0 ip link del dummy0 Splat looks like: [ 79.639245][ T1195] ============================= [ 79.640134][ T1195] WARNING: suspicious RCU usage [ 79.640852][ T1195] 5.6.0-rc1+ #447 Not tainted [ 79.641657][ T1195] ----------------------------- [ 79.642472][ T1195] ./include/linux/rcupdate.h:273 Illegal context switch in RCU read-side critical section! [ 79.644043][ T1195] [ 79.644043][ T1195] other info that might help us debug this: [ 79.644043][ T1195] [ 79.645682][ T1195] [ 79.645682][ T1195] rcu_scheduler_active = 2, debug_locks = 1 [ 79.646980][ T1195] 2 locks held by ip/1195: [ 79.647629][ T1195] #0: ffffffffa3cf64f0 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x457/0x890 [ 79.649312][ T1195] #1: ffffffffa39256c0 (rcu_read_lock){....}, at: rmnet_config_notify_cb+0xf0/0x590 [rmnet] [ 79.651717][ T1195] [ 79.651717][ T1195] stack backtrace: [ 79.652650][ T1195] CPU: 3 PID: 1195 Comm: ip Not tainted 5.6.0-rc1+ #447 [ 79.653702][ T1195] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 79.655037][ T1195] Call Trace: [ 79.655560][ T1195] dump_stack+0x96/0xdb [ 79.656252][ T1195] ___might_sleep+0x345/0x440 [ 79.656994][ T1195] synchronize_net+0x18/0x30 [ 79.661132][ T1195] netdev_rx_handler_unregister+0x40/0xb0 [ 79.666266][ T1195] rmnet_unregister_real_device+0x42/0xb0 [rmnet] [ 79.667211][ T1195] rmnet_config_notify_cb+0x1f7/0x590 [rmnet] [ 79.668121][ T1195] ? rmnet_unregister_bridge.isra.6+0xf0/0xf0 [rmnet] [ 79.669166][ T1195] ? rmnet_unregister_bridge.isra.6+0xf0/0xf0 [rmnet] [ 79.670286][ T1195] ? __module_text_address+0x13/0x140 [ 79.671139][ T1195] notifier_call_chain+0x90/0x160 [ 79.671973][ T1195] rollback_registered_many+0x660/0xcf0 [ 79.672893][ T1195] ? netif_set_real_num_tx_queues+0x780/0x780 [ 79.675091][ T1195] ? __lock_acquire+0xdfe/0x3de0 [ 79.675825][ T1195] ? memset+0x1f/0x40 [ 79.676367][ T1195] ? __nla_validate_parse+0x98/0x1ab0 [ 79.677290][ T1195] unregister_netdevice_many.part.133+0x13/0x1b0 [ 79.678163][ T1195] rtnl_delete_link+0xbc/0x100 [ ... ] Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index fc68ecdd804b..0ad64aa66592 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -230,7 +230,6 @@ static void rmnet_force_unassociate_device(struct net_device *dev) port = rmnet_get_port_rtnl(dev); - rcu_read_lock(); rmnet_unregister_bridge(dev, port); hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) { @@ -241,7 +240,6 @@ static void rmnet_force_unassociate_device(struct net_device *dev) kfree(ep); } - rcu_read_unlock(); unregister_netdevice_many(&list); rmnet_unregister_real_device(real_dev, port); -- cgit v1.2.3-59-g8ed1b From 1dc49e9d164cd7e11c81279c83db84a147e14740 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 27 Feb 2020 12:25:19 +0000 Subject: net: rmnet: do not allow to change mux id if mux id is duplicated Basically, duplicate mux id isn't be allowed. So, the creation of rmnet will be failed if there is duplicate mux id is existing. But, changelink routine doesn't check duplicate mux id. Test commands: modprobe rmnet ip link add dummy0 type dummy ip link add rmnet0 link dummy0 type rmnet mux_id 1 ip link add rmnet1 link dummy0 type rmnet mux_id 2 ip link set rmnet1 type rmnet mux_id 1 Fixes: 23790ef12082 ("net: qualcomm: rmnet: Allow to configure flags for existing devices") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 0ad64aa66592..3c0e6d24d083 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -306,6 +306,10 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[], if (data[IFLA_RMNET_MUX_ID]) { mux_id = nla_get_u16(data[IFLA_RMNET_MUX_ID]); + if (rmnet_get_endpoint(port, mux_id)) { + NL_SET_ERR_MSG_MOD(extack, "MUX ID already exists"); + return -EINVAL; + } ep = rmnet_get_endpoint(port, priv->mux_id); if (!ep) return -ENODEV; -- cgit v1.2.3-59-g8ed1b From 037f9cdf72fb8a7ff9ec2b5dd05336ec1492bdf1 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 27 Feb 2020 12:25:43 +0000 Subject: net: rmnet: use upper/lower device infrastructure netdev_upper_dev_link() is useful to manage lower/upper interfaces. And this function internally validates looping, maximum depth. All or most virtual interfaces that could have a real interface (e.g. macsec, macvlan, ipvlan etc.) use lower/upper infrastructure. Test commands: modprobe rmnet ip link add dummy0 type dummy ip link add rmnet1 link dummy0 type rmnet mux_id 1 for i in {2..100} do let A=$i-1 ip link add rmnet$i link rmnet$A type rmnet mux_id $i done ip link del dummy0 The purpose of the test commands is to make stack overflow. Splat looks like: [ 52.411438][ T1395] BUG: KASAN: slab-out-of-bounds in find_busiest_group+0x27e/0x2c00 [ 52.413218][ T1395] Write of size 64 at addr ffff8880c774bde0 by task ip/1395 [ 52.414841][ T1395] [ 52.430720][ T1395] CPU: 1 PID: 1395 Comm: ip Not tainted 5.6.0-rc1+ #447 [ 52.496511][ T1395] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 52.513597][ T1395] Call Trace: [ 52.546516][ T1395] [ 52.558773][ T1395] Allocated by task 3171537984: [ 52.588290][ T1395] BUG: unable to handle page fault for address: ffffffffb999e260 [ 52.589311][ T1395] #PF: supervisor read access in kernel mode [ 52.590529][ T1395] #PF: error_code(0x0000) - not-present page [ 52.591374][ T1395] PGD d6818067 P4D d6818067 PUD d6819063 PMD 0 [ 52.592288][ T1395] Thread overran stack, or stack corrupted [ 52.604980][ T1395] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 52.605856][ T1395] CPU: 1 PID: 1395 Comm: ip Not tainted 5.6.0-rc1+ #447 [ 52.611764][ T1395] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 52.621520][ T1395] RIP: 0010:stack_depot_fetch+0x10/0x30 [ 52.622296][ T1395] Code: ff e9 f9 fe ff ff 48 89 df e8 9c 1d 91 ff e9 ca fe ff ff cc cc cc cc cc cc cc 89 f8 0 [ 52.627887][ T1395] RSP: 0018:ffff8880c774bb60 EFLAGS: 00010006 [ 52.628735][ T1395] RAX: 00000000001f8880 RBX: ffff8880c774d140 RCX: 0000000000000000 [ 52.631773][ T1395] RDX: 000000000000001d RSI: ffff8880c774bb68 RDI: 0000000000003ff0 [ 52.649584][ T1395] RBP: ffffea00031dd200 R08: ffffed101b43e403 R09: ffffed101b43e403 [ 52.674857][ T1395] R10: 0000000000000001 R11: ffffed101b43e402 R12: ffff8880d900e5c0 [ 52.678257][ T1395] R13: ffff8880c774c000 R14: 0000000000000000 R15: dffffc0000000000 [ 52.694541][ T1395] FS: 00007fe867f6e0c0(0000) GS:ffff8880da000000(0000) knlGS:0000000000000000 [ 52.764039][ T1395] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 52.815008][ T1395] CR2: ffffffffb999e260 CR3: 00000000c26aa005 CR4: 00000000000606e0 [ 52.862312][ T1395] Call Trace: [ 52.887133][ T1395] Modules linked in: dummy rmnet veth openvswitch nsh nf_conncount nf_nat nf_conntrack nf_dex [ 52.936749][ T1395] CR2: ffffffffb999e260 [ 52.965695][ T1395] ---[ end trace 7e32ca99482dbb31 ]--- [ 52.966556][ T1395] RIP: 0010:stack_depot_fetch+0x10/0x30 [ 52.971083][ T1395] Code: ff e9 f9 fe ff ff 48 89 df e8 9c 1d 91 ff e9 ca fe ff ff cc cc cc cc cc cc cc 89 f8 0 [ 53.003650][ T1395] RSP: 0018:ffff8880c774bb60 EFLAGS: 00010006 [ 53.043183][ T1395] RAX: 00000000001f8880 RBX: ffff8880c774d140 RCX: 0000000000000000 [ 53.076480][ T1395] RDX: 000000000000001d RSI: ffff8880c774bb68 RDI: 0000000000003ff0 [ 53.093858][ T1395] RBP: ffffea00031dd200 R08: ffffed101b43e403 R09: ffffed101b43e403 [ 53.112795][ T1395] R10: 0000000000000001 R11: ffffed101b43e402 R12: ffff8880d900e5c0 [ 53.139837][ T1395] R13: ffff8880c774c000 R14: 0000000000000000 R15: dffffc0000000000 [ 53.141500][ T1395] FS: 00007fe867f6e0c0(0000) GS:ffff8880da000000(0000) knlGS:0000000000000000 [ 53.143343][ T1395] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 53.152007][ T1395] CR2: ffffffffb999e260 CR3: 00000000c26aa005 CR4: 00000000000606e0 [ 53.156459][ T1395] Kernel panic - not syncing: Fatal exception [ 54.213570][ T1395] Shutting down cpus with NMI [ 54.354112][ T1395] Kernel Offset: 0x33000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0x) [ 54.355687][ T1395] Rebooting in 5 seconds.. Fixes: b37f78f234bf ("net: qualcomm: rmnet: Fix crash on real dev unregistration") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 35 ++++++++++------------ 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 3c0e6d24d083..e3fbf2331b96 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -61,9 +61,6 @@ static int rmnet_unregister_real_device(struct net_device *real_dev, kfree(port); - /* release reference on real_dev */ - dev_put(real_dev); - netdev_dbg(real_dev, "Removed from rmnet\n"); return 0; } @@ -89,9 +86,6 @@ static int rmnet_register_real_device(struct net_device *real_dev) return -EBUSY; } - /* hold on to real dev for MAP data */ - dev_hold(real_dev); - for (entry = 0; entry < RMNET_MAX_LOGICAL_EP; entry++) INIT_HLIST_HEAD(&port->muxed_ep[entry]); @@ -162,6 +156,10 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, if (err) goto err1; + err = netdev_upper_dev_link(real_dev, dev, extack); + if (err < 0) + goto err2; + port->rmnet_mode = mode; hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]); @@ -178,6 +176,8 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, return 0; +err2: + unregister_netdevice(dev); err1: rmnet_unregister_real_device(real_dev, port); err0: @@ -209,33 +209,30 @@ static void rmnet_dellink(struct net_device *dev, struct list_head *head) rmnet_vnd_dellink(mux_id, port, ep); kfree(ep); } + netdev_upper_dev_unlink(real_dev, dev); rmnet_unregister_real_device(real_dev, port); unregister_netdevice_queue(dev, head); } -static void rmnet_force_unassociate_device(struct net_device *dev) +static void rmnet_force_unassociate_device(struct net_device *real_dev) { - struct net_device *real_dev = dev; struct hlist_node *tmp_ep; struct rmnet_endpoint *ep; struct rmnet_port *port; unsigned long bkt_ep; LIST_HEAD(list); - if (!rmnet_is_real_dev_registered(real_dev)) - return; - ASSERT_RTNL(); - port = rmnet_get_port_rtnl(dev); + port = rmnet_get_port_rtnl(real_dev); - rmnet_unregister_bridge(dev, port); + rmnet_unregister_bridge(real_dev, port); hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) { + netdev_upper_dev_unlink(real_dev, ep->egress_dev); unregister_netdevice_queue(ep->egress_dev, &list); rmnet_vnd_dellink(ep->mux_id, port, ep); - hlist_del_init_rcu(&ep->hlnode); kfree(ep); } @@ -248,15 +245,15 @@ static void rmnet_force_unassociate_device(struct net_device *dev) static int rmnet_config_notify_cb(struct notifier_block *nb, unsigned long event, void *data) { - struct net_device *dev = netdev_notifier_info_to_dev(data); + struct net_device *real_dev = netdev_notifier_info_to_dev(data); - if (!dev) + if (!rmnet_is_real_dev_registered(real_dev)) return NOTIFY_DONE; switch (event) { case NETDEV_UNREGISTER: - netdev_dbg(dev, "Kernel unregister\n"); - rmnet_force_unassociate_device(dev); + netdev_dbg(real_dev, "Kernel unregister\n"); + rmnet_force_unassociate_device(real_dev); break; default: @@ -477,8 +474,8 @@ static int __init rmnet_init(void) static void __exit rmnet_exit(void) { - unregister_netdevice_notifier(&rmnet_dev_notifier); rtnl_link_unregister(&rmnet_link_ops); + unregister_netdevice_notifier(&rmnet_dev_notifier); } module_init(rmnet_init) -- cgit v1.2.3-59-g8ed1b From d939b6d30bea1a2322bc536b12be0a7c4c2bccd7 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 27 Feb 2020 12:26:02 +0000 Subject: net: rmnet: fix bridge mode bugs In order to attach a bridge interface to the rmnet interface, "master" operation is used. (e.g. ip link set dummy1 master rmnet0) But, in the rmnet_add_bridge(), which is a callback of ->ndo_add_slave() doesn't register lower interface. So, ->ndo_del_slave() doesn't work. There are other problems too. 1. It couldn't detect circular upper/lower interface relationship. 2. It couldn't prevent stack overflow because of too deep depth of upper/lower interface 3. It doesn't check the number of lower interfaces. 4. Panics because of several reasons. The root problem of these issues is actually the same. So, in this patch, these all problems will be fixed. Test commands: modprobe rmnet ip link add dummy0 type dummy ip link add rmnet0 link dummy0 type rmnet mux_id 1 ip link add dummy1 master rmnet0 type dummy ip link add dummy2 master rmnet0 type dummy ip link del rmnet0 ip link del dummy2 ip link del dummy1 Splat looks like: [ 41.867595][ T1164] general protection fault, probably for non-canonical address 0xdffffc0000000101I [ 41.869993][ T1164] KASAN: null-ptr-deref in range [0x0000000000000808-0x000000000000080f] [ 41.872950][ T1164] CPU: 0 PID: 1164 Comm: ip Not tainted 5.6.0-rc1+ #447 [ 41.873915][ T1164] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 41.875161][ T1164] RIP: 0010:rmnet_unregister_bridge.isra.6+0x71/0xf0 [rmnet] [ 41.876178][ T1164] Code: 48 89 ef 48 89 c6 5b 5d e9 fc fe ff ff e8 f7 f3 ff ff 48 8d b8 08 08 00 00 48 ba 00 7 [ 41.878925][ T1164] RSP: 0018:ffff8880c4d0f188 EFLAGS: 00010202 [ 41.879774][ T1164] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000101 [ 41.887689][ T1164] RDX: dffffc0000000000 RSI: ffffffffb8cf64f0 RDI: 0000000000000808 [ 41.888727][ T1164] RBP: ffff8880c40e4000 R08: ffffed101b3c0e3c R09: 0000000000000001 [ 41.889749][ T1164] R10: 0000000000000001 R11: ffffed101b3c0e3b R12: 1ffff110189a1e3c [ 41.890783][ T1164] R13: ffff8880c4d0f200 R14: ffffffffb8d56160 R15: ffff8880ccc2c000 [ 41.891794][ T1164] FS: 00007f4300edc0c0(0000) GS:ffff8880d9c00000(0000) knlGS:0000000000000000 [ 41.892953][ T1164] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 41.893800][ T1164] CR2: 00007f43003bc8c0 CR3: 00000000ca53e001 CR4: 00000000000606f0 [ 41.894824][ T1164] Call Trace: [ 41.895274][ T1164] ? rcu_is_watching+0x2c/0x80 [ 41.895895][ T1164] rmnet_config_notify_cb+0x1f7/0x590 [rmnet] [ 41.896687][ T1164] ? rmnet_unregister_bridge.isra.6+0xf0/0xf0 [rmnet] [ 41.897611][ T1164] ? rmnet_unregister_bridge.isra.6+0xf0/0xf0 [rmnet] [ 41.898508][ T1164] ? __module_text_address+0x13/0x140 [ 41.899162][ T1164] notifier_call_chain+0x90/0x160 [ 41.899814][ T1164] rollback_registered_many+0x660/0xcf0 [ 41.900544][ T1164] ? netif_set_real_num_tx_queues+0x780/0x780 [ 41.901316][ T1164] ? __lock_acquire+0xdfe/0x3de0 [ 41.901958][ T1164] ? memset+0x1f/0x40 [ 41.902468][ T1164] ? __nla_validate_parse+0x98/0x1ab0 [ 41.903166][ T1164] unregister_netdevice_many.part.133+0x13/0x1b0 [ 41.903988][ T1164] rtnl_delete_link+0xbc/0x100 [ ... ] Fixes: 60d58f971c10 ("net: qualcomm: rmnet: Implement bridge mode") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 131 ++++++++++----------- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h | 1 + drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 8 -- drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h | 1 - 4 files changed, 64 insertions(+), 77 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index e3fbf2331b96..fbf4cbcf1a65 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -13,25 +13,6 @@ #include "rmnet_vnd.h" #include "rmnet_private.h" -/* Locking scheme - - * The shared resource which needs to be protected is realdev->rx_handler_data. - * For the writer path, this is using rtnl_lock(). The writer paths are - * rmnet_newlink(), rmnet_dellink() and rmnet_force_unassociate_device(). These - * paths are already called with rtnl_lock() acquired in. There is also an - * ASSERT_RTNL() to ensure that we are calling with rtnl acquired. For - * dereference here, we will need to use rtnl_dereference(). Dev list writing - * needs to happen with rtnl_lock() acquired for netdev_master_upper_dev_link(). - * For the reader path, the real_dev->rx_handler_data is called in the TX / RX - * path. We only need rcu_read_lock() for these scenarios. In these cases, - * the rcu_read_lock() is held in __dev_queue_xmit() and - * netif_receive_skb_internal(), so readers need to use rcu_dereference_rtnl() - * to get the relevant information. For dev list reading, we again acquire - * rcu_read_lock() in rmnet_dellink() for netdev_master_upper_dev_get_rcu(). - * We also use unregister_netdevice_many() to free all rmnet devices in - * rmnet_force_unassociate_device() so we dont lose the rtnl_lock() and free in - * same context. - */ - /* Local Definitions and Declarations */ static const struct nla_policy rmnet_policy[IFLA_RMNET_MAX + 1] = { @@ -51,9 +32,10 @@ rmnet_get_port_rtnl(const struct net_device *real_dev) return rtnl_dereference(real_dev->rx_handler_data); } -static int rmnet_unregister_real_device(struct net_device *real_dev, - struct rmnet_port *port) +static int rmnet_unregister_real_device(struct net_device *real_dev) { + struct rmnet_port *port = rmnet_get_port_rtnl(real_dev); + if (port->nr_rmnet_devs) return -EINVAL; @@ -93,28 +75,33 @@ static int rmnet_register_real_device(struct net_device *real_dev) return 0; } -static void rmnet_unregister_bridge(struct net_device *dev, - struct rmnet_port *port) +static void rmnet_unregister_bridge(struct rmnet_port *port) { - struct rmnet_port *bridge_port; - struct net_device *bridge_dev; + struct net_device *bridge_dev, *real_dev, *rmnet_dev; + struct rmnet_port *real_port; if (port->rmnet_mode != RMNET_EPMODE_BRIDGE) return; - /* bridge slave handling */ + rmnet_dev = port->rmnet_dev; if (!port->nr_rmnet_devs) { - bridge_dev = port->bridge_ep; + /* bridge device */ + real_dev = port->bridge_ep; + bridge_dev = port->dev; - bridge_port = rmnet_get_port_rtnl(bridge_dev); - bridge_port->bridge_ep = NULL; - bridge_port->rmnet_mode = RMNET_EPMODE_VND; + real_port = rmnet_get_port_rtnl(real_dev); + real_port->bridge_ep = NULL; + real_port->rmnet_mode = RMNET_EPMODE_VND; } else { + /* real device */ bridge_dev = port->bridge_ep; - bridge_port = rmnet_get_port_rtnl(bridge_dev); - rmnet_unregister_real_device(bridge_dev, bridge_port); + port->bridge_ep = NULL; + port->rmnet_mode = RMNET_EPMODE_VND; } + + netdev_upper_dev_unlink(bridge_dev, rmnet_dev); + rmnet_unregister_real_device(bridge_dev); } static int rmnet_newlink(struct net *src_net, struct net_device *dev, @@ -161,6 +148,7 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, goto err2; port->rmnet_mode = mode; + port->rmnet_dev = dev; hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]); @@ -178,8 +166,9 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, err2: unregister_netdevice(dev); + rmnet_vnd_dellink(mux_id, port, ep); err1: - rmnet_unregister_real_device(real_dev, port); + rmnet_unregister_real_device(real_dev); err0: kfree(ep); return err; @@ -188,30 +177,32 @@ err0: static void rmnet_dellink(struct net_device *dev, struct list_head *head) { struct rmnet_priv *priv = netdev_priv(dev); - struct net_device *real_dev; + struct net_device *real_dev, *bridge_dev; + struct rmnet_port *real_port, *bridge_port; struct rmnet_endpoint *ep; - struct rmnet_port *port; - u8 mux_id; + u8 mux_id = priv->mux_id; real_dev = priv->real_dev; - if (!real_dev || !rmnet_is_real_dev_registered(real_dev)) + if (!rmnet_is_real_dev_registered(real_dev)) return; - port = rmnet_get_port_rtnl(real_dev); - - mux_id = rmnet_vnd_get_mux(dev); + real_port = rmnet_get_port_rtnl(real_dev); + bridge_dev = real_port->bridge_ep; + if (bridge_dev) { + bridge_port = rmnet_get_port_rtnl(bridge_dev); + rmnet_unregister_bridge(bridge_port); + } - ep = rmnet_get_endpoint(port, mux_id); + ep = rmnet_get_endpoint(real_port, mux_id); if (ep) { hlist_del_init_rcu(&ep->hlnode); - rmnet_unregister_bridge(dev, port); - rmnet_vnd_dellink(mux_id, port, ep); + rmnet_vnd_dellink(mux_id, real_port, ep); kfree(ep); } - netdev_upper_dev_unlink(real_dev, dev); - rmnet_unregister_real_device(real_dev, port); + netdev_upper_dev_unlink(real_dev, dev); + rmnet_unregister_real_device(real_dev); unregister_netdevice_queue(dev, head); } @@ -223,23 +214,23 @@ static void rmnet_force_unassociate_device(struct net_device *real_dev) unsigned long bkt_ep; LIST_HEAD(list); - ASSERT_RTNL(); - port = rmnet_get_port_rtnl(real_dev); - rmnet_unregister_bridge(real_dev, port); - - hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) { - netdev_upper_dev_unlink(real_dev, ep->egress_dev); - unregister_netdevice_queue(ep->egress_dev, &list); - rmnet_vnd_dellink(ep->mux_id, port, ep); - hlist_del_init_rcu(&ep->hlnode); - kfree(ep); + if (port->nr_rmnet_devs) { + /* real device */ + rmnet_unregister_bridge(port); + hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) { + unregister_netdevice_queue(ep->egress_dev, &list); + netdev_upper_dev_unlink(real_dev, ep->egress_dev); + rmnet_vnd_dellink(ep->mux_id, port, ep); + hlist_del_init_rcu(&ep->hlnode); + kfree(ep); + } + rmnet_unregister_real_device(real_dev); + unregister_netdevice_many(&list); + } else { + rmnet_unregister_bridge(port); } - - unregister_netdevice_many(&list); - - rmnet_unregister_real_device(real_dev, port); } static int rmnet_config_notify_cb(struct notifier_block *nb, @@ -418,6 +409,9 @@ int rmnet_add_bridge(struct net_device *rmnet_dev, if (port->nr_rmnet_devs > 1) return -EINVAL; + if (port->rmnet_mode != RMNET_EPMODE_VND) + return -EINVAL; + if (rmnet_is_real_dev_registered(slave_dev)) return -EBUSY; @@ -425,9 +419,17 @@ int rmnet_add_bridge(struct net_device *rmnet_dev, if (err) return -EBUSY; + err = netdev_master_upper_dev_link(slave_dev, rmnet_dev, NULL, NULL, + extack); + if (err) { + rmnet_unregister_real_device(slave_dev); + return err; + } + slave_port = rmnet_get_port_rtnl(slave_dev); slave_port->rmnet_mode = RMNET_EPMODE_BRIDGE; slave_port->bridge_ep = real_dev; + slave_port->rmnet_dev = rmnet_dev; port->rmnet_mode = RMNET_EPMODE_BRIDGE; port->bridge_ep = slave_dev; @@ -439,16 +441,9 @@ int rmnet_add_bridge(struct net_device *rmnet_dev, int rmnet_del_bridge(struct net_device *rmnet_dev, struct net_device *slave_dev) { - struct rmnet_priv *priv = netdev_priv(rmnet_dev); - struct net_device *real_dev = priv->real_dev; - struct rmnet_port *port, *slave_port; - - port = rmnet_get_port_rtnl(real_dev); - port->rmnet_mode = RMNET_EPMODE_VND; - port->bridge_ep = NULL; + struct rmnet_port *port = rmnet_get_port_rtnl(slave_dev); - slave_port = rmnet_get_port_rtnl(slave_dev); - rmnet_unregister_real_device(slave_dev, slave_port); + rmnet_unregister_bridge(port); netdev_dbg(slave_dev, "removed from rmnet as slave\n"); return 0; diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h index 0d568dcfd65a..be515982d628 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h @@ -28,6 +28,7 @@ struct rmnet_port { u8 rmnet_mode; struct hlist_head muxed_ep[RMNET_MAX_LOGICAL_EP]; struct net_device *bridge_ep; + struct net_device *rmnet_dev; }; extern struct rtnl_link_ops rmnet_link_ops; diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c index 509dfc895a33..26ad40f19c64 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c @@ -266,14 +266,6 @@ int rmnet_vnd_dellink(u8 id, struct rmnet_port *port, return 0; } -u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev) -{ - struct rmnet_priv *priv; - - priv = netdev_priv(rmnet_dev); - return priv->mux_id; -} - int rmnet_vnd_do_flow_control(struct net_device *rmnet_dev, int enable) { netdev_dbg(rmnet_dev, "Setting VND TX queue state to %d\n", enable); diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h index 54cbaf3c3bc4..14d77c709d4a 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h @@ -16,6 +16,5 @@ int rmnet_vnd_dellink(u8 id, struct rmnet_port *port, struct rmnet_endpoint *ep); void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev); void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev); -u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev); void rmnet_vnd_setup(struct net_device *dev); #endif /* _RMNET_VND_H_ */ -- cgit v1.2.3-59-g8ed1b From ad3cc31b599ea80f06b29ebdc18b3a39878a48d6 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 27 Feb 2020 12:26:15 +0000 Subject: net: rmnet: fix packet forwarding in rmnet bridge mode Packet forwarding is not working in rmnet bridge mode. Because when a packet is forwarded, skb_push() for an ethernet header is needed. But it doesn't call skb_push(). So, the ethernet header will be lost. Test commands: modprobe rmnet ip netns add nst ip netns add nst2 ip link add veth0 type veth peer name veth1 ip link add veth2 type veth peer name veth3 ip link set veth1 netns nst ip link set veth3 netns nst2 ip link add rmnet0 link veth0 type rmnet mux_id 1 ip link set veth2 master rmnet0 ip link set veth0 up ip link set veth2 up ip link set rmnet0 up ip a a 192.168.100.1/24 dev rmnet0 ip netns exec nst ip link set veth1 up ip netns exec nst ip a a 192.168.100.2/24 dev veth1 ip netns exec nst2 ip link set veth3 up ip netns exec nst2 ip a a 192.168.100.3/24 dev veth3 ip netns exec nst2 ping 192.168.100.2 Fixes: 60d58f971c10 ("net: qualcomm: rmnet: Implement bridge mode") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index 074a8b326c30..29a7bfa2584d 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -159,6 +159,9 @@ static int rmnet_map_egress_handler(struct sk_buff *skb, static void rmnet_bridge_handler(struct sk_buff *skb, struct net_device *bridge_dev) { + if (skb_mac_header_was_set(skb)) + skb_push(skb, skb->mac_len); + if (bridge_dev) { skb->dev = bridge_dev; dev_queue_xmit(skb); -- cgit v1.2.3-59-g8ed1b From 5c05a164d441a1792791175e4959ea9df12f7e2b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 27 Feb 2020 11:52:35 -0800 Subject: unix: It's CONFIG_PROC_FS not CONFIG_PROCFS Fixes: 3a12500ed5dd ("unix: define and set show_fdinfo only if procfs is enabled") Signed-off-by: David S. Miller --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index aa6e2530e1ec..68debcb28fa4 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -682,7 +682,7 @@ static int unix_set_peek_off(struct sock *sk, int val) return 0; } -#ifdef CONFIG_PROCFS +#ifdef CONFIG_PROC_FS static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) { struct sock *sk = sock->sk; -- cgit v1.2.3-59-g8ed1b From 3f74957fcbeab703297ed0f135430414ed7e0dd0 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Wed, 26 Feb 2020 11:58:18 +0100 Subject: vsock: fix potential deadlock in transport->release() Some transports (hyperv, virtio) acquire the sock lock during the .release() callback. In the vsock_stream_connect() we call vsock_assign_transport(); if the socket was previously assigned to another transport, the vsk->transport->release() is called, but the sock lock is already held in the vsock_stream_connect(), causing a deadlock reported by syzbot: INFO: task syz-executor280:9768 blocked for more than 143 seconds. Not tainted 5.6.0-rc1-syzkaller #0 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. syz-executor280 D27912 9768 9766 0x00000000 Call Trace: context_switch kernel/sched/core.c:3386 [inline] __schedule+0x934/0x1f90 kernel/sched/core.c:4082 schedule+0xdc/0x2b0 kernel/sched/core.c:4156 __lock_sock+0x165/0x290 net/core/sock.c:2413 lock_sock_nested+0xfe/0x120 net/core/sock.c:2938 virtio_transport_release+0xc4/0xd60 net/vmw_vsock/virtio_transport_common.c:832 vsock_assign_transport+0xf3/0x3b0 net/vmw_vsock/af_vsock.c:454 vsock_stream_connect+0x2b3/0xc70 net/vmw_vsock/af_vsock.c:1288 __sys_connect_file+0x161/0x1c0 net/socket.c:1857 __sys_connect+0x174/0x1b0 net/socket.c:1874 __do_sys_connect net/socket.c:1885 [inline] __se_sys_connect net/socket.c:1882 [inline] __x64_sys_connect+0x73/0xb0 net/socket.c:1882 do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe To avoid this issue, this patch remove the lock acquiring in the .release() callback of hyperv and virtio transports, and it holds the lock when we call vsk->transport->release() in the vsock core. Reported-by: syzbot+731710996d79d0d58fbc@syzkaller.appspotmail.com Fixes: 408624af4c89 ("vsock: use local transport when it is loaded") Signed-off-by: Stefano Garzarella Reviewed-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 20 ++++++++++++-------- net/vmw_vsock/hyperv_transport.c | 3 --- net/vmw_vsock/virtio_transport_common.c | 2 -- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 9c5b2a91baad..a5f28708e0e7 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -451,6 +451,12 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) if (vsk->transport == new_transport) return 0; + /* transport->release() must be called with sock lock acquired. + * This path can only be taken during vsock_stream_connect(), + * where we have already held the sock lock. + * In the other cases, this function is called on a new socket + * which is not assigned to any transport. + */ vsk->transport->release(vsk); vsock_deassign_transport(vsk); } @@ -753,20 +759,18 @@ static void __vsock_release(struct sock *sk, int level) vsk = vsock_sk(sk); pending = NULL; /* Compiler warning. */ - /* The release call is supposed to use lock_sock_nested() - * rather than lock_sock(), if a sock lock should be acquired. - */ - if (vsk->transport) - vsk->transport->release(vsk); - else if (sk->sk_type == SOCK_STREAM) - vsock_remove_sock(vsk); - /* When "level" is SINGLE_DEPTH_NESTING, use the nested * version to avoid the warning "possible recursive locking * detected". When "level" is 0, lock_sock_nested(sk, level) * is the same as lock_sock(sk). */ lock_sock_nested(sk, level); + + if (vsk->transport) + vsk->transport->release(vsk); + else if (sk->sk_type == SOCK_STREAM) + vsock_remove_sock(vsk); + sock_orphan(sk); sk->sk_shutdown = SHUTDOWN_MASK; diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 3492c021925f..630b851f8150 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -526,12 +526,9 @@ static bool hvs_close_lock_held(struct vsock_sock *vsk) static void hvs_release(struct vsock_sock *vsk) { - struct sock *sk = sk_vsock(vsk); bool remove_sock; - lock_sock_nested(sk, SINGLE_DEPTH_NESTING); remove_sock = hvs_close_lock_held(vsk); - release_sock(sk); if (remove_sock) vsock_remove_sock(vsk); } diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index d9f0c9c5425a..f3c4bab2f737 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -829,7 +829,6 @@ void virtio_transport_release(struct vsock_sock *vsk) struct sock *sk = &vsk->sk; bool remove_sock = true; - lock_sock_nested(sk, SINGLE_DEPTH_NESTING); if (sk->sk_type == SOCK_STREAM) remove_sock = virtio_transport_close(vsk); @@ -837,7 +836,6 @@ void virtio_transport_release(struct vsock_sock *vsk) list_del(&pkt->list); virtio_transport_free_pkt(pkt); } - release_sock(sk); if (remove_sock) vsock_remove_sock(vsk); -- cgit v1.2.3-59-g8ed1b From 23797b98909f34b75fd130369bde86f760db69d0 Mon Sep 17 00:00:00 2001 From: "Alex Maftei (amaftei)" Date: Wed, 26 Feb 2020 17:33:19 +0000 Subject: sfc: fix timestamp reconstruction at 16-bit rollover points We can't just use the top bits of the last sync event as they could be off-by-one every 65,536 seconds, giving an error in reconstruction of 65,536 seconds. This patch uses the difference in the bottom 16 bits (mod 2^16) to calculate an offset that needs to be applied to the last sync event to get to the current time. Signed-off-by: Alexandru-Mihai Maftei Acked-by: Martin Habets Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ptp.c | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index af15a737c675..59b4f16896a8 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -560,13 +560,45 @@ efx_ptp_mac_nic_to_ktime_correction(struct efx_nic *efx, u32 nic_major, u32 nic_minor, s32 correction) { + u32 sync_timestamp; ktime_t kt = { 0 }; + s16 delta; if (!(nic_major & 0x80000000)) { WARN_ON_ONCE(nic_major >> 16); - /* Use the top bits from the latest sync event. */ - nic_major &= 0xffff; - nic_major |= (last_sync_timestamp_major(efx) & 0xffff0000); + + /* Medford provides 48 bits of timestamp, so we must get the top + * 16 bits from the timesync event state. + * + * We only have the lower 16 bits of the time now, but we do + * have a full resolution timestamp at some point in past. As + * long as the difference between the (real) now and the sync + * is less than 2^15, then we can reconstruct the difference + * between those two numbers using only the lower 16 bits of + * each. + * + * Put another way + * + * a - b = ((a mod k) - b) mod k + * + * when -k/2 < (a-b) < k/2. In our case k is 2^16. We know + * (a mod k) and b, so can calculate the delta, a - b. + * + */ + sync_timestamp = last_sync_timestamp_major(efx); + + /* Because delta is s16 this does an implicit mask down to + * 16 bits which is what we need, assuming + * MEDFORD_TX_SECS_EVENT_BITS is 16. delta is signed so that + * we can deal with the (unlikely) case of sync timestamps + * arriving from the future. + */ + delta = nic_major - sync_timestamp; + + /* Recover the fully specified time now, by applying the offset + * to the (fully specified) sync time. + */ + nic_major = sync_timestamp + delta; kt = ptp->nic_to_kernel_time(nic_major, nic_minor, correction); -- cgit v1.2.3-59-g8ed1b From ac004e84164e27d69017731a97b11402a69d854b Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Thu, 27 Feb 2020 21:07:53 +0100 Subject: mlxsw: pci: Wait longer before accessing the device after reset During initialization the driver issues a reset to the device and waits for 100ms before checking if the firmware is ready. The waiting is necessary because before that the device is irresponsive and the first read can result in a completion timeout. While 100ms is sufficient for Spectrum-1 and Spectrum-2, it is insufficient for Spectrum-3. Fix this by increasing the timeout to 200ms. Fixes: da382875c616 ("mlxsw: spectrum: Extend to support Spectrum-3 ASIC") Signed-off-by: Amit Cohen Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/pci_hw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index e0d7d2d9a0c8..43fa8c85b5d9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -28,7 +28,7 @@ #define MLXSW_PCI_SW_RESET 0xF0010 #define MLXSW_PCI_SW_RESET_RST_BIT BIT(0) #define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 900000 -#define MLXSW_PCI_SW_RESET_WAIT_MSECS 100 +#define MLXSW_PCI_SW_RESET_WAIT_MSECS 200 #define MLXSW_PCI_FW_READY 0xA1844 #define MLXSW_PCI_FW_READY_MASK 0xFFFF #define MLXSW_PCI_FW_READY_MAGIC 0x5E -- cgit v1.2.3-59-g8ed1b From 3ee339eb28959629db33aaa2b8cde4c63c6289eb Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 27 Feb 2020 21:20:49 +0100 Subject: net: dsa: mv88e6xxx: Fix masking of egress port Add missing ~ to the usage of the mask. Reported-by: Kevin Benson Reported-by: Chris Healy Fixes: 5c74c54ce6ff ("net: dsa: mv88e6xxx: Split monitor port configuration") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/global1.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c index b016cc205f81..ca3a7a7a73c3 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.c +++ b/drivers/net/dsa/mv88e6xxx/global1.c @@ -278,13 +278,13 @@ int mv88e6095_g1_set_egress_port(struct mv88e6xxx_chip *chip, switch (direction) { case MV88E6XXX_EGRESS_DIR_INGRESS: dest_port_chip = &chip->ingress_dest_port; - reg &= MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK; + reg &= ~MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK; reg |= port << __bf_shf(MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK); break; case MV88E6XXX_EGRESS_DIR_EGRESS: dest_port_chip = &chip->egress_dest_port; - reg &= MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK; + reg &= ~MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK; reg |= port << __bf_shf(MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK); break; -- cgit v1.2.3-59-g8ed1b From d876836204897b6d7d911f942084f69a1e9d5c4d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 27 Feb 2020 14:17:49 -0700 Subject: io_uring: fix 32-bit compatability with sendmsg/recvmsg We must set MSG_CMSG_COMPAT if we're in compatability mode, otherwise the iovec import for these commands will not do the right thing and fail the command with -EINVAL. Found by running the test suite compiled as 32-bit. Cc: stable@vger.kernel.org Fixes: aa1fa28fc73e ("io_uring: add support for recvmsg()") Fixes: 0fa03c624d8f ("io_uring: add support for sendmsg()") Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 05eea06f5421..6a595c13e108 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3001,6 +3001,11 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); +#ifdef CONFIG_COMPAT + if (req->ctx->compat) + sr->msg_flags |= MSG_CMSG_COMPAT; +#endif + if (!io || req->opcode == IORING_OP_SEND) return 0; /* iovec is already imported */ @@ -3153,6 +3158,11 @@ static int io_recvmsg_prep(struct io_kiocb *req, sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); +#ifdef CONFIG_COMPAT + if (req->ctx->compat) + sr->msg_flags |= MSG_CMSG_COMPAT; +#endif + if (!io || req->opcode == IORING_OP_RECV) return 0; /* iovec is already imported */ -- cgit v1.2.3-59-g8ed1b From 5901b51f3e5d9129da3e59b10cc76e4cc983e940 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 21 Feb 2020 19:54:02 +0100 Subject: MAINTAINERS: Correct Cadence PCI driver path de80f95ccb9c ("PCI: cadence: Move all files to per-device cadence directory") moved files of the PCI cadence drivers, but did not update the MAINTAINERS entry. Since then, ./scripts/get_maintainer.pl --self-test complains: warning: no file matches F: drivers/pci/controller/pcie-cadence* Repair the MAINTAINERS entry. Link: https://lore.kernel.org/r/20200221185402.4703-1-lukas.bulwahn@gmail.com Signed-off-by: Lukas Bulwahn Signed-off-by: Bjorn Helgaas --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 38fe2f3f7b6f..8dd7ae98c574 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12740,7 +12740,7 @@ M: Tom Joseph L: linux-pci@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/pci/cdns,*.txt -F: drivers/pci/controller/pcie-cadence* +F: drivers/pci/controller/cadence/ PCI DRIVER FOR FREESCALE LAYERSCAPE M: Minghuan Lian -- cgit v1.2.3-59-g8ed1b From 7943f4acea3caf0b6d5b6cdfce7d5a2b4a9aa608 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 25 Feb 2020 08:54:26 +0100 Subject: KVM: SVM: allocate AVIC data structures based on kvm_amd module parameter Even if APICv is disabled at startup, the backing page and ir_list need to be initialized in case they are needed later. The only case in which this can be skipped is for userspace irqchip, and that must be done because avic_init_backing_page dereferences vcpu->arch.apic (which is NULL for userspace irqchip). Tested-by: rmuncrief@humanavance.com Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=206579 Reviewed-by: Miaohe Lin Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ad3f5b178a03..bd02526300ab 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2194,8 +2194,9 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) static int avic_init_vcpu(struct vcpu_svm *svm) { int ret; + struct kvm_vcpu *vcpu = &svm->vcpu; - if (!kvm_vcpu_apicv_active(&svm->vcpu)) + if (!avic || !irqchip_in_kernel(vcpu->kvm)) return 0; ret = avic_init_backing_page(&svm->vcpu); -- cgit v1.2.3-59-g8ed1b From fcd07f9adc7dacc2532695cf9dd2284d49e716ff Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 28 Feb 2020 09:49:41 +0100 Subject: KVM: let declaration of kvm_get_running_vcpus match implementation Sparse notices that declaration and implementation do not match: arch/s390/kvm/../../../virt/kvm/kvm_main.c:4435:17: warning: incorrect type in return expression (different address spaces) arch/s390/kvm/../../../virt/kvm/kvm_main.c:4435:17: expected struct kvm_vcpu [noderef] ** arch/s390/kvm/../../../virt/kvm/kvm_main.c:4435:17: got struct kvm_vcpu *[noderef] * Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7944ad6ac10b..bcb9b2ac0791 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1344,7 +1344,7 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ struct kvm_vcpu *kvm_get_running_vcpu(void); -struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); +struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS bool kvm_arch_has_irq_bypass(void); -- cgit v1.2.3-59-g8ed1b From a262bca3aba03f0696995beb223c610e47533db3 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 18 Feb 2020 09:08:23 +0800 Subject: KVM: Introduce pv check helpers Introduce some pv check helpers for consistency. Suggested-by: Vitaly Kuznetsov Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index d817f255aed8..7bc0fff3f8e6 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -425,7 +425,27 @@ static void __init sev_map_percpu_data(void) } } +static bool pv_tlb_flush_supported(void) +{ + return (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && + !kvm_para_has_hint(KVM_HINTS_REALTIME) && + kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)); +} + #ifdef CONFIG_SMP + +static bool pv_ipi_supported(void) +{ + return kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI); +} + +static bool pv_sched_yield_supported(void) +{ + return (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) && + !kvm_para_has_hint(KVM_HINTS_REALTIME) && + kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)); +} + #define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG) static void __send_ipi_mask(const struct cpumask *mask, int vector) @@ -619,9 +639,7 @@ static void __init kvm_guest_init(void) pv_ops.time.steal_clock = kvm_steal_clock; } - if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && - !kvm_para_has_hint(KVM_HINTS_REALTIME) && - kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { + if (pv_tlb_flush_supported()) { pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others; pv_ops.mmu.tlb_remove_table = tlb_remove_table; } @@ -632,9 +650,7 @@ static void __init kvm_guest_init(void) #ifdef CONFIG_SMP smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus; smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; - if (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) && - !kvm_para_has_hint(KVM_HINTS_REALTIME) && - kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { + if (pv_sched_yield_supported()) { smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi; pr_info("KVM setup pv sched yield\n"); } @@ -700,7 +716,7 @@ static uint32_t __init kvm_detect(void) static void __init kvm_apic_init(void) { #if defined(CONFIG_SMP) - if (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI)) + if (pv_ipi_supported()) kvm_setup_pv_ipi(); #endif } @@ -739,9 +755,7 @@ static __init int kvm_setup_pv_tlb_flush(void) if (!kvm_para_available() || nopv) return 0; - if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && - !kvm_para_has_hint(KVM_HINTS_REALTIME) && - kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { + if (pv_tlb_flush_supported()) { for_each_possible_cpu(cpu) { zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), GFP_KERNEL, cpu_to_node(cpu)); -- cgit v1.2.3-59-g8ed1b From 8a9442f49c72bde43f982e53b74526ac37d3565b Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 18 Feb 2020 09:08:24 +0800 Subject: KVM: Pre-allocate 1 cpumask variable per cpu for both pv tlb and pv ipis Nick Desaulniers Reported: When building with: $ make CC=clang arch/x86/ CFLAGS=-Wframe-larger-than=1000 The following warning is observed: arch/x86/kernel/kvm.c:494:13: warning: stack frame size of 1064 bytes in function 'kvm_send_ipi_mask_allbutself' [-Wframe-larger-than=] static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector) ^ Debugging with: https://github.com/ClangBuiltLinux/frame-larger-than via: $ python3 frame_larger_than.py arch/x86/kernel/kvm.o \ kvm_send_ipi_mask_allbutself points to the stack allocated `struct cpumask newmask` in `kvm_send_ipi_mask_allbutself`. The size of a `struct cpumask` is potentially large, as it's CONFIG_NR_CPUS divided by BITS_PER_LONG for the target architecture. CONFIG_NR_CPUS for X86_64 can be as high as 8192, making a single instance of a `struct cpumask` 1024 B. This patch fixes it by pre-allocate 1 cpumask variable per cpu and use it for both pv tlb and pv ipis.. Reported-by: Nick Desaulniers Acked-by: Nick Desaulniers Reviewed-by: Vitaly Kuznetsov Cc: Peter Zijlstra Cc: Nick Desaulniers Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 7bc0fff3f8e6..6efe0410fb72 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -432,6 +432,8 @@ static bool pv_tlb_flush_supported(void) kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)); } +static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask); + #ifdef CONFIG_SMP static bool pv_ipi_supported(void) @@ -510,12 +512,12 @@ static void kvm_send_ipi_mask(const struct cpumask *mask, int vector) static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector) { unsigned int this_cpu = smp_processor_id(); - struct cpumask new_mask; + struct cpumask *new_mask = this_cpu_cpumask_var_ptr(__pv_cpu_mask); const struct cpumask *local_mask; - cpumask_copy(&new_mask, mask); - cpumask_clear_cpu(this_cpu, &new_mask); - local_mask = &new_mask; + cpumask_copy(new_mask, mask); + cpumask_clear_cpu(this_cpu, new_mask); + local_mask = new_mask; __send_ipi_mask(local_mask, vector); } @@ -595,7 +597,6 @@ static void __init kvm_apf_trap_init(void) update_intr_gate(X86_TRAP_PF, async_page_fault); } -static DEFINE_PER_CPU(cpumask_var_t, __pv_tlb_mask); static void kvm_flush_tlb_others(const struct cpumask *cpumask, const struct flush_tlb_info *info) @@ -603,7 +604,7 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask, u8 state; int cpu; struct kvm_steal_time *src; - struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_tlb_mask); + struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask); cpumask_copy(flushmask, cpumask); /* @@ -642,6 +643,7 @@ static void __init kvm_guest_init(void) if (pv_tlb_flush_supported()) { pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others; pv_ops.mmu.tlb_remove_table = tlb_remove_table; + pr_info("KVM setup pv remote TLB flush\n"); } if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) @@ -748,24 +750,31 @@ static __init int activate_jump_labels(void) } arch_initcall(activate_jump_labels); -static __init int kvm_setup_pv_tlb_flush(void) +static __init int kvm_alloc_cpumask(void) { int cpu; + bool alloc = false; if (!kvm_para_available() || nopv) return 0; - if (pv_tlb_flush_supported()) { + if (pv_tlb_flush_supported()) + alloc = true; + +#if defined(CONFIG_SMP) + if (pv_ipi_supported()) + alloc = true; +#endif + + if (alloc) for_each_possible_cpu(cpu) { - zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), + zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu), GFP_KERNEL, cpu_to_node(cpu)); } - pr_info("KVM setup pv remote TLB flush\n"); - } return 0; } -arch_initcall(kvm_setup_pv_tlb_flush); +arch_initcall(kvm_alloc_cpumask); #ifdef CONFIG_PARAVIRT_SPINLOCKS -- cgit v1.2.3-59-g8ed1b From 575b255c1663c8fccc41fe965dcac281e3113c65 Mon Sep 17 00:00:00 2001 From: Valdis Klētnieks Date: Thu, 27 Feb 2020 21:49:52 -0500 Subject: KVM: x86: allow compiling as non-module with W=1 Compile error with CONFIG_KVM_INTEL=y and W=1: CC arch/x86/kvm/vmx/vmx.o arch/x86/kvm/vmx/vmx.c:68:32: error: 'vmx_cpu_id' defined but not used [-Werror=unused-const-variable=] 68 | static const struct x86_cpu_id vmx_cpu_id[] = { | ^~~~~~~~~~ cc1: all warnings being treated as errors When building with =y, the MODULE_DEVICE_TABLE macro doesn't generate a reference to the structure (or any code at all). This makes W=1 compiles unhappy. Wrap both in a #ifdef to avoid the issue. Signed-off-by: Valdis Kletnieks [Do the same for CONFIG_KVM_AMD. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 2 ++ arch/x86/kvm/vmx/vmx.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index bd02526300ab..24c0b2ba8fb9 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -57,11 +57,13 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); +#ifdef MODULE static const struct x86_cpu_id svm_cpu_id[] = { X86_FEATURE_MATCH(X86_FEATURE_SVM), {} }; MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id); +#endif #define IOPM_ALLOC_ORDER 2 #define MSRPM_ALLOC_ORDER 1 diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 63aaf44edd1f..ce70a71037ed 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -64,11 +64,13 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); +#ifdef MODULE static const struct x86_cpu_id vmx_cpu_id[] = { X86_FEATURE_MATCH(X86_FEATURE_VMX), {} }; MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id); +#endif bool __read_mostly enable_vpid = 1; module_param_named(vpid, enable_vpid, bool, 0444); -- cgit v1.2.3-59-g8ed1b From 4f337faf1c55e55bdc49df13fcb3a3c45655899e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Feb 2020 10:42:31 +0100 Subject: KVM: allow disabling -Werror Restrict -Werror to well-tested configurations and allow disabling it via Kconfig. Reported-by: Christoph Hellwig Signed-off-by: Paolo Bonzini --- arch/x86/kvm/Kconfig | 13 +++++++++++++ arch/x86/kvm/Makefile | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 991019d5eee1..1bb4927030af 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -59,6 +59,19 @@ config KVM If unsure, say N. +config KVM_WERROR + bool "Compile KVM with -Werror" + # KASAN may cause the build to fail due to larger frames + default y if X86_64 && !KASAN + # We use the dependency on !COMPILE_TEST to not be enabled + # blindly in allmodconfig or allyesconfig configurations + depends on (X86_64 && !KASAN) || !COMPILE_TEST + depends on EXPERT + help + Add -Werror to the build flags for (and only for) i915.ko. + + If in doubt, say "N". + config KVM_INTEL tristate "KVM for Intel (and compatible) processors support" depends on KVM && IA32_FEAT_CTL diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 4654e97a05cc..e553f0fdd87d 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 ccflags-y += -Iarch/x86/kvm -ccflags-y += -Werror +ccflags-$(CONFIG_KVM_WERROR) += -Werror KVM := ../../../virt/kvm -- cgit v1.2.3-59-g8ed1b From aaec7c03de92c35a96966631989950e6e27662db Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Feb 2020 10:49:10 +0100 Subject: KVM: x86: avoid useless copy of cpufreq policy struct cpufreq_policy is quite big and it is not a good idea to allocate one on the stack. Just use cpufreq_cpu_get and cpufreq_cpu_put which is even simpler. Reported-by: Christoph Hellwig Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 359fcd395132..bcb6b676608b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7190,15 +7190,15 @@ static void kvm_timer_init(void) if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { #ifdef CONFIG_CPU_FREQ - struct cpufreq_policy policy; + struct cpufreq_policy *policy; int cpu; - memset(&policy, 0, sizeof(policy)); cpu = get_cpu(); - cpufreq_get_policy(&policy, cpu); - if (policy.cpuinfo.max_freq) - max_tsc_khz = policy.cpuinfo.max_freq; + policy = cpufreq_cpu_get(cpu); + if (policy && policy->cpuinfo.max_freq) + max_tsc_khz = policy->cpuinfo.max_freq; put_cpu(); + cpufreq_cpu_put(policy); #endif cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); -- cgit v1.2.3-59-g8ed1b From ef935c25fd648a17c27af5d1738b1884f78c5b75 Mon Sep 17 00:00:00 2001 From: Erwan Velu Date: Thu, 27 Feb 2020 19:00:46 +0100 Subject: kvm: x86: Limit the number of "kvm: disabled by bios" messages In older version of systemd(219), at boot time, udevadm is called with : /usr/bin/udevadm trigger --type=devices --action=add" This program generates an echo "add" in /sys/devices/system/cpu/cpu/uevent, leading to the "kvm: disabled by bios" message in case of your Bios disabled the virtualization extensions. On a modern system running up to 256 CPU threads, this pollutes the Kernel logs. This patch offers to ratelimit this message to avoid any userspace program triggering this uevent printing this message too often. This patch is only a workaround but greatly reduce the pollution without breaking the current behavior of printing a message if some try to instantiate KVM on a system that doesn't support it. Note that recent versions of systemd (>239) do not have trigger this behavior. This patch will be useful at least for some using older systemd with recent Kernels. Signed-off-by: Erwan Velu Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bcb6b676608b..5de200663f51 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7308,12 +7308,12 @@ int kvm_arch_init(void *opaque) } if (!ops->cpu_has_kvm_support()) { - printk(KERN_ERR "kvm: no hardware support\n"); + pr_err_ratelimited("kvm: no hardware support\n"); r = -EOPNOTSUPP; goto out; } if (ops->disabled_by_bios()) { - printk(KERN_ERR "kvm: disabled by bios\n"); + pr_err_ratelimited("kvm: disabled by bios\n"); r = -EOPNOTSUPP; goto out; } -- cgit v1.2.3-59-g8ed1b From 6c5d911249290f41f7b50b43344a7520605b1acb Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 21 Feb 2020 23:31:11 -0500 Subject: jbd2: fix data races at struct journal_head journal_head::b_transaction and journal_head::b_next_transaction could be accessed concurrently as noticed by KCSAN, LTP: starting fsync04 /dev/zero: Can't open blockdev EXT4-fs (loop0): mounting ext3 file system using the ext4 subsystem EXT4-fs (loop0): mounted filesystem with ordered data mode. Opts: (null) ================================================================== BUG: KCSAN: data-race in __jbd2_journal_refile_buffer [jbd2] / jbd2_write_access_granted [jbd2] write to 0xffff99f9b1bd0e30 of 8 bytes by task 25721 on cpu 70: __jbd2_journal_refile_buffer+0xdd/0x210 [jbd2] __jbd2_journal_refile_buffer at fs/jbd2/transaction.c:2569 jbd2_journal_commit_transaction+0x2d15/0x3f20 [jbd2] (inlined by) jbd2_journal_commit_transaction at fs/jbd2/commit.c:1034 kjournald2+0x13b/0x450 [jbd2] kthread+0x1cd/0x1f0 ret_from_fork+0x27/0x50 read to 0xffff99f9b1bd0e30 of 8 bytes by task 25724 on cpu 68: jbd2_write_access_granted+0x1b2/0x250 [jbd2] jbd2_write_access_granted at fs/jbd2/transaction.c:1155 jbd2_journal_get_write_access+0x2c/0x60 [jbd2] __ext4_journal_get_write_access+0x50/0x90 [ext4] ext4_mb_mark_diskspace_used+0x158/0x620 [ext4] ext4_mb_new_blocks+0x54f/0xca0 [ext4] ext4_ind_map_blocks+0xc79/0x1b40 [ext4] ext4_map_blocks+0x3b4/0x950 [ext4] _ext4_get_block+0xfc/0x270 [ext4] ext4_get_block+0x3b/0x50 [ext4] __block_write_begin_int+0x22e/0xae0 __block_write_begin+0x39/0x50 ext4_write_begin+0x388/0xb50 [ext4] generic_perform_write+0x15d/0x290 ext4_buffered_write_iter+0x11f/0x210 [ext4] ext4_file_write_iter+0xce/0x9e0 [ext4] new_sync_write+0x29c/0x3b0 __vfs_write+0x92/0xa0 vfs_write+0x103/0x260 ksys_write+0x9d/0x130 __x64_sys_write+0x4c/0x60 do_syscall_64+0x91/0xb05 entry_SYSCALL_64_after_hwframe+0x49/0xbe 5 locks held by fsync04/25724: #0: ffff99f9911093f8 (sb_writers#13){.+.+}, at: vfs_write+0x21c/0x260 #1: ffff99f9db4c0348 (&sb->s_type->i_mutex_key#15){+.+.}, at: ext4_buffered_write_iter+0x65/0x210 [ext4] #2: ffff99f5e7dfcf58 (jbd2_handle){++++}, at: start_this_handle+0x1c1/0x9d0 [jbd2] #3: ffff99f9db4c0168 (&ei->i_data_sem){++++}, at: ext4_map_blocks+0x176/0x950 [ext4] #4: ffffffff99086b40 (rcu_read_lock){....}, at: jbd2_write_access_granted+0x4e/0x250 [jbd2] irq event stamp: 1407125 hardirqs last enabled at (1407125): [] __find_get_block+0x107/0x790 hardirqs last disabled at (1407124): [] __find_get_block+0x49/0x790 softirqs last enabled at (1405528): [] __do_softirq+0x34c/0x57c softirqs last disabled at (1405521): [] irq_exit+0xa2/0xc0 Reported by Kernel Concurrency Sanitizer on: CPU: 68 PID: 25724 Comm: fsync04 Tainted: G L 5.6.0-rc2-next-20200221+ #7 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019 The plain reads are outside of jh->b_state_lock critical section which result in data races. Fix them by adding pairs of READ|WRITE_ONCE(). Reviewed-by: Jan Kara Signed-off-by: Qian Cai Link: https://lore.kernel.org/r/20200222043111.2227-1-cai@lca.pw Signed-off-by: Theodore Ts'o --- fs/jbd2/transaction.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index d181948c0390..3dccc23cf010 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1150,8 +1150,8 @@ static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh, /* For undo access buffer must have data copied */ if (undo && !jh->b_committed_data) goto out; - if (jh->b_transaction != handle->h_transaction && - jh->b_next_transaction != handle->h_transaction) + if (READ_ONCE(jh->b_transaction) != handle->h_transaction && + READ_ONCE(jh->b_next_transaction) != handle->h_transaction) goto out; /* * There are two reasons for the barrier here: @@ -2569,8 +2569,8 @@ bool __jbd2_journal_refile_buffer(struct journal_head *jh) * our jh reference and thus __jbd2_journal_file_buffer() must not * take a new one. */ - jh->b_transaction = jh->b_next_transaction; - jh->b_next_transaction = NULL; + WRITE_ONCE(jh->b_transaction, jh->b_next_transaction); + WRITE_ONCE(jh->b_next_transaction, NULL); if (buffer_freed(bh)) jlist = BJ_Forget; else if (jh->b_modified) -- cgit v1.2.3-59-g8ed1b From 38b17afb0ebb9ecd41418d3c08bcf9198af4349d Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 25 Feb 2020 15:12:29 +0100 Subject: macintosh: therm_windtunnel: fix regression when instantiating devices Removing attach_adapter from this driver caused a regression for at least some machines. Those machines had the sensors described in their DT, too, so they didn't need manual creation of the sensor devices. The old code worked, though, because manual creation came first. Creation of DT devices then failed later and caused error logs, but the sensors worked nonetheless because of the manually created devices. When removing attach_adaper, manual creation now comes later and loses the race. The sensor devices were already registered via DT, yet with another binding, so the driver could not be bound to it. This fix refactors the code to remove the race and only manually creates devices if there are no DT nodes present. Also, the DT binding is updated to match both, the DT and manually created devices. Because we don't know which device creation will be used at runtime, the code to start the kthread is moved to do_probe() which will be called by both methods. Fixes: 3e7bed52719d ("macintosh: therm_windtunnel: drop using attach_adapter") Link: https://bugzilla.kernel.org/show_bug.cgi?id=201723 Reported-by: Erhard Furtner Tested-by: Erhard Furtner Acked-by: Michael Ellerman (powerpc) Signed-off-by: Wolfram Sang Cc: stable@kernel.org # v4.19+ --- drivers/macintosh/therm_windtunnel.c | 52 +++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/drivers/macintosh/therm_windtunnel.c b/drivers/macintosh/therm_windtunnel.c index 8c744578122a..a0d87ed9da69 100644 --- a/drivers/macintosh/therm_windtunnel.c +++ b/drivers/macintosh/therm_windtunnel.c @@ -300,9 +300,11 @@ static int control_loop(void *dummy) /* i2c probing and setup */ /************************************************************************/ -static int -do_attach( struct i2c_adapter *adapter ) +static void do_attach(struct i2c_adapter *adapter) { + struct i2c_board_info info = { }; + struct device_node *np; + /* scan 0x48-0x4f (DS1775) and 0x2c-2x2f (ADM1030) */ static const unsigned short scan_ds1775[] = { 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, @@ -313,25 +315,24 @@ do_attach( struct i2c_adapter *adapter ) I2C_CLIENT_END }; - if( strncmp(adapter->name, "uni-n", 5) ) - return 0; - - if( !x.running ) { - struct i2c_board_info info; + if (x.running || strncmp(adapter->name, "uni-n", 5)) + return; - memset(&info, 0, sizeof(struct i2c_board_info)); - strlcpy(info.type, "therm_ds1775", I2C_NAME_SIZE); + np = of_find_compatible_node(adapter->dev.of_node, NULL, "MAC,ds1775"); + if (np) { + of_node_put(np); + } else { + strlcpy(info.type, "MAC,ds1775", I2C_NAME_SIZE); i2c_new_probed_device(adapter, &info, scan_ds1775, NULL); + } - strlcpy(info.type, "therm_adm1030", I2C_NAME_SIZE); + np = of_find_compatible_node(adapter->dev.of_node, NULL, "MAC,adm1030"); + if (np) { + of_node_put(np); + } else { + strlcpy(info.type, "MAC,adm1030", I2C_NAME_SIZE); i2c_new_probed_device(adapter, &info, scan_adm1030, NULL); - - if( x.thermostat && x.fan ) { - x.running = 1; - x.poll_task = kthread_run(control_loop, NULL, "g4fand"); - } } - return 0; } static int @@ -404,8 +405,8 @@ out: enum chip { ds1775, adm1030 }; static const struct i2c_device_id therm_windtunnel_id[] = { - { "therm_ds1775", ds1775 }, - { "therm_adm1030", adm1030 }, + { "MAC,ds1775", ds1775 }, + { "MAC,adm1030", adm1030 }, { } }; MODULE_DEVICE_TABLE(i2c, therm_windtunnel_id); @@ -414,6 +415,7 @@ static int do_probe(struct i2c_client *cl, const struct i2c_device_id *id) { struct i2c_adapter *adapter = cl->adapter; + int ret = 0; if( !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WORD_DATA | I2C_FUNC_SMBUS_WRITE_BYTE) ) @@ -421,11 +423,19 @@ do_probe(struct i2c_client *cl, const struct i2c_device_id *id) switch (id->driver_data) { case adm1030: - return attach_fan( cl ); + ret = attach_fan(cl); + break; case ds1775: - return attach_thermostat(cl); + ret = attach_thermostat(cl); + break; } - return 0; + + if (!x.running && x.thermostat && x.fan) { + x.running = 1; + x.poll_task = kthread_run(control_loop, NULL, "g4fand"); + } + + return ret; } static struct i2c_driver g4fan_driver = { -- cgit v1.2.3-59-g8ed1b From 37b0b6b8b99c0e1c1f11abbe7cf49b6d03795b3f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 28 Feb 2020 12:22:56 +0300 Subject: ext4: potential crash on allocation error in ext4_alloc_flex_bg_array() If sbi->s_flex_groups_allocated is zero and the first allocation fails then this code will crash. The problem is that "i--" will set "i" to -1 but when we compare "i >= sbi->s_flex_groups_allocated" then the -1 is type promoted to unsigned and becomes UINT_MAX. Since UINT_MAX is more than zero, the condition is true so we call kvfree(new_groups[-1]). The loop will carry on freeing invalid memory until it crashes. Fixes: 7c990728b99e ("ext4: fix potential race between s_flex_groups online resizing and access") Reviewed-by: Suraj Jitindar Singh Signed-off-by: Dan Carpenter Cc: stable@kernel.org Link: https://lore.kernel.org/r/20200228092142.7irbc44yaz3by7nb@kili.mountain Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ff1b764b0c0e..0c7c4adb664e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2391,7 +2391,7 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct flex_groups **old_groups, **new_groups; - int size, i; + int size, i, j; if (!sbi->s_log_groups_per_flex) return 0; @@ -2412,8 +2412,8 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) sizeof(struct flex_groups)), GFP_KERNEL); if (!new_groups[i]) { - for (i--; i >= sbi->s_flex_groups_allocated; i--) - kvfree(new_groups[i]); + for (j = sbi->s_flex_groups_allocated; j < i; j++) + kvfree(new_groups[j]); kvfree(new_groups); ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", size); -- cgit v1.2.3-59-g8ed1b From 86f7e90ce840aa1db407d3ea6e9b3a52b2ce923c Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Sat, 29 Feb 2020 11:30:14 -0800 Subject: KVM: VMX: check descriptor table exits on instruction emulation KVM emulates UMIP on hardware that doesn't support it by setting the 'descriptor table exiting' VM-execution control and performing instruction emulation. When running nested, this emulation is broken as KVM refuses to emulate L2 instructions by default. Correct this regression by allowing the emulation of descriptor table instructions if L1 hasn't requested 'descriptor table exiting'. Fixes: 07721feee46b ("KVM: nVMX: Don't emulate instructions in guest mode") Reported-by: Jan Kiszka Cc: stable@vger.kernel.org Cc: Paolo Bonzini Cc: Jim Mattson Signed-off-by: Oliver Upton Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index ce70a71037ed..40b1e6138cd5 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7177,6 +7177,7 @@ static int vmx_check_intercept_io(struct kvm_vcpu *vcpu, else intercept = nested_vmx_check_io_bitmaps(vcpu, port, size); + /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */ return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; } @@ -7206,6 +7207,20 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, case x86_intercept_outs: return vmx_check_intercept_io(vcpu, info); + case x86_intercept_lgdt: + case x86_intercept_lidt: + case x86_intercept_lldt: + case x86_intercept_ltr: + case x86_intercept_sgdt: + case x86_intercept_sidt: + case x86_intercept_sldt: + case x86_intercept_str: + if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC)) + return X86EMUL_CONTINUE; + + /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */ + break; + /* TODO: check more intercepts... */ default: break; -- cgit v1.2.3-59-g8ed1b From 98d54f81e36ba3bf92172791eba5ca5bd813989b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 1 Mar 2020 16:38:46 -0600 Subject: Linux 5.6-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1a1a0d271697..86035d866f2c 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 6 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Kleptomaniac Octopus # *DOCUMENTATION* -- cgit v1.2.3-59-g8ed1b From 8c867387160e89c9ffd12459f38e56844312a7a7 Mon Sep 17 00:00:00 2001 From: Ley Foon Tan Date: Thu, 27 Feb 2020 04:20:14 +0800 Subject: arm64: dts: socfpga: agilex: Fix gmac compatible Fix gmac compatible string to "altr,socfpga-stmmac-a10-s10". Gmac for Agilex should use same compatible as Stratix 10. Fixes: 4b36daf9ada3 ("arm64: dts: agilex: Add initial support for Intel's Agilex SoCFPGA") Cc: stable@vger.kernel.org Signed-off-by: Ley Foon Tan Signed-off-by: Dinh Nguyen --- arch/arm64/boot/dts/intel/socfpga_agilex.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi index e1d357eaad7c..d8c44d3ca15a 100644 --- a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi +++ b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi @@ -102,7 +102,7 @@ }; gmac0: ethernet@ff800000 { - compatible = "altr,socfpga-stmmac", "snps,dwmac-3.74a", "snps,dwmac"; + compatible = "altr,socfpga-stmmac-a10-s10", "snps,dwmac-3.74a", "snps,dwmac"; reg = <0xff800000 0x2000>; interrupts = <0 90 4>; interrupt-names = "macirq"; @@ -118,7 +118,7 @@ }; gmac1: ethernet@ff802000 { - compatible = "altr,socfpga-stmmac", "snps,dwmac-3.74a", "snps,dwmac"; + compatible = "altr,socfpga-stmmac-a10-s10", "snps,dwmac-3.74a", "snps,dwmac"; reg = <0xff802000 0x2000>; interrupts = <0 91 4>; interrupt-names = "macirq"; @@ -134,7 +134,7 @@ }; gmac2: ethernet@ff804000 { - compatible = "altr,socfpga-stmmac", "snps,dwmac-3.74a", "snps,dwmac"; + compatible = "altr,socfpga-stmmac-a10-s10", "snps,dwmac-3.74a", "snps,dwmac"; reg = <0xff804000 0x2000>; interrupts = <0 92 4>; interrupt-names = "macirq"; -- cgit v1.2.3-59-g8ed1b