From 1c32ca5dc6d00012f0c964e5fdd7042fcc71efb1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 Apr 2020 15:10:08 +0100 Subject: KVM: arm: vgic: Fix limit condition when writing to GICD_I[CS]ACTIVER MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When deciding whether a guest has to be stopped we check whether this is a private interrupt or not. Unfortunately, there's an off-by-one bug here, and we fail to recognize a whole range of interrupts as being global (GICv2 SPIs 32-63). Fix the condition from > to be >=. Cc: stable@vger.kernel.org Fixes: abd7229626b93 ("KVM: arm/arm64: Simplify active_change_prepare and plug race") Reported-by: André Przywara Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-mmio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 2199302597fa..d085e047953f 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -444,7 +444,7 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, static void vgic_change_active_prepare(struct kvm_vcpu *vcpu, u32 intid) { if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 || - intid > VGIC_NR_PRIVATE_IRQS) + intid >= VGIC_NR_PRIVATE_IRQS) kvm_arm_halt_guest(vcpu->kvm); } @@ -452,7 +452,7 @@ static void vgic_change_active_prepare(struct kvm_vcpu *vcpu, u32 intid) static void vgic_change_active_finish(struct kvm_vcpu *vcpu, u32 intid) { if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 || - intid > VGIC_NR_PRIVATE_IRQS) + intid >= VGIC_NR_PRIVATE_IRQS) kvm_arm_resume_guest(vcpu->kvm); } -- cgit v1.2.3-59-g8ed1b From 2890ac993daa7bdcc57a92da2f07bcdec243666b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 1 Apr 2020 12:25:05 +0100 Subject: KVM: arm64: PSCI: Narrow input registers when using 32bit functions When a guest delibarately uses an SMC32 function number (which is allowed), we should make sure we drop the top 32bits from the input arguments, as they could legitimately be junk. Reported-by: Christoffer Dall Reviewed-by: Christoffer Dall Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- virt/kvm/arm/psci.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index 14a162e295a9..3772717efe3e 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -186,6 +186,18 @@ static void kvm_psci_system_reset(struct kvm_vcpu *vcpu) kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET); } +static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu) +{ + int i; + + /* + * Zero the input registers' upper 32 bits. They will be fully + * zeroed on exit, so we're fine changing them in place. + */ + for (i = 1; i < 4; i++) + vcpu_set_reg(vcpu, i, lower_32_bits(vcpu_get_reg(vcpu, i))); +} + static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; @@ -210,12 +222,16 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) val = PSCI_RET_SUCCESS; break; case PSCI_0_2_FN_CPU_ON: + kvm_psci_narrow_to_32bit(vcpu); + fallthrough; case PSCI_0_2_FN64_CPU_ON: mutex_lock(&kvm->lock); val = kvm_psci_vcpu_on(vcpu); mutex_unlock(&kvm->lock); break; case PSCI_0_2_FN_AFFINITY_INFO: + kvm_psci_narrow_to_32bit(vcpu); + fallthrough; case PSCI_0_2_FN64_AFFINITY_INFO: val = kvm_psci_vcpu_affinity_info(vcpu); break; -- cgit v1.2.3-59-g8ed1b From fdc9999e20cd038d9d5bf54496c1f0226bc2b64e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 1 Apr 2020 12:38:49 +0100 Subject: KVM: arm64: PSCI: Forbid 64bit functions for 32bit guests Implementing (and even advertising) 64bit PSCI functions to 32bit guests is at least a bit odd, if not altogether violating the spec which says ("5.2.1 Register usage in arguments and return values"): "Adherence to the SMC Calling Conventions implies that any AArch32 caller of an SMC64 function will get a return code of 0xFFFFFFFF(int32). This matches the NOT_SUPPORTED error code used in PSCI" Tighten the implementation by pretending these functions are not there for 32bit guests. Reviewed-by: Christoffer Dall Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- virt/kvm/arm/psci.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index 3772717efe3e..ae364716ee40 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -198,6 +198,21 @@ static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu) vcpu_set_reg(vcpu, i, lower_32_bits(vcpu_get_reg(vcpu, i))); } +static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32 fn) +{ + switch(fn) { + case PSCI_0_2_FN64_CPU_SUSPEND: + case PSCI_0_2_FN64_CPU_ON: + case PSCI_0_2_FN64_AFFINITY_INFO: + /* Disallow these functions for 32bit guests */ + if (vcpu_mode_is_32bit(vcpu)) + return PSCI_RET_NOT_SUPPORTED; + break; + } + + return 0; +} + static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; @@ -205,6 +220,10 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) unsigned long val; int ret = 1; + val = kvm_psci_check_allowed_function(vcpu, psci_fn); + if (val) + goto out; + switch (psci_fn) { case PSCI_0_2_FN_PSCI_VERSION: /* @@ -272,6 +291,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) break; } +out: smccc_set_retval(vcpu, val, 0, 0, 0); return ret; } @@ -289,6 +309,10 @@ static int kvm_psci_1_0_call(struct kvm_vcpu *vcpu) break; case PSCI_1_0_FN_PSCI_FEATURES: feature = smccc_get_arg1(vcpu); + val = kvm_psci_check_allowed_function(vcpu, feature); + if (val) + break; + switch(feature) { case PSCI_0_2_FN_PSCI_VERSION: case PSCI_0_2_FN_CPU_SUSPEND: -- cgit v1.2.3-59-g8ed1b From e9b3c610a05c1cdf8e959a6d89c38807ff758ee6 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 15 Apr 2020 16:03:04 +0200 Subject: USB: serial: garmin_gps: add sanity checking for data length We must not process packets shorter than a packet ID Signed-off-by: Oliver Neukum Reported-and-tested-by: syzbot+d29e9263e13ce0b9f4fd@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/garmin_gps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c index ffd984142171..d63072fee099 100644 --- a/drivers/usb/serial/garmin_gps.c +++ b/drivers/usb/serial/garmin_gps.c @@ -1138,8 +1138,8 @@ static void garmin_read_process(struct garmin_data *garmin_data_p, send it directly to the tty port */ if (garmin_data_p->flags & FLAGS_QUEUING) { pkt_add(garmin_data_p, data, data_length); - } else if (bulk_data || - getLayerId(data) == GARMIN_LAYERID_APPL) { + } else if (bulk_data || (data_length >= sizeof(u32) && + getLayerId(data) == GARMIN_LAYERID_APPL)) { spin_lock_irqsave(&garmin_data_p->lock, flags); garmin_data_p->flags |= APP_RESP_SEEN; -- cgit v1.2.3-59-g8ed1b From 9a50ebbffa9862db7604345f5fd763122b0f6fed Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 6 Apr 2020 16:21:20 +0100 Subject: KVM: arm: vgic: Synchronize the whole guest on GIC{D,R}_I{S,C}ACTIVER read When a guest tries to read the active state of its interrupts, we currently just return whatever state we have in memory. This means that if such an interrupt lives in a List Register on another CPU, we fail to obsertve the latest active state for this interrupt. In order to remedy this, stop all the other vcpus so that they exit and we can observe the most recent value for the state. This is similar to what we are doing for the write side of the same registers, and results in new MMIO handlers for userspace (which do not need to stop the guest, as it is supposed to be stopped already). Reported-by: Julien Grall Reviewed-by: Andre Przywara Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-mmio-v2.c | 4 +- virt/kvm/arm/vgic/vgic-mmio-v3.c | 12 ++--- virt/kvm/arm/vgic/vgic-mmio.c | 100 +++++++++++++++++++++++++-------------- virt/kvm/arm/vgic/vgic-mmio.h | 3 ++ 4 files changed, 75 insertions(+), 44 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index 5945f062d749..d63881f60e1a 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -422,11 +422,11 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = { VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET, vgic_mmio_read_active, vgic_mmio_write_sactive, - NULL, vgic_mmio_uaccess_write_sactive, 1, + vgic_uaccess_read_active, vgic_mmio_uaccess_write_sactive, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_CLEAR, vgic_mmio_read_active, vgic_mmio_write_cactive, - NULL, vgic_mmio_uaccess_write_cactive, 1, + vgic_uaccess_read_active, vgic_mmio_uaccess_write_cactive, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PRI, vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL, diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index e72dcc454247..f2b37a081f26 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -553,11 +553,11 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = { VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER, vgic_mmio_read_active, vgic_mmio_write_sactive, - NULL, vgic_mmio_uaccess_write_sactive, 1, + vgic_uaccess_read_active, vgic_mmio_uaccess_write_sactive, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER, vgic_mmio_read_active, vgic_mmio_write_cactive, - NULL, vgic_mmio_uaccess_write_cactive, + vgic_uaccess_read_active, vgic_mmio_uaccess_write_cactive, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR, vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL, @@ -625,12 +625,12 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = { VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISACTIVER0, vgic_mmio_read_active, vgic_mmio_write_sactive, - NULL, vgic_mmio_uaccess_write_sactive, - 4, VGIC_ACCESS_32bit), + vgic_uaccess_read_active, vgic_mmio_uaccess_write_sactive, 4, + VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICACTIVER0, vgic_mmio_read_active, vgic_mmio_write_cactive, - NULL, vgic_mmio_uaccess_write_cactive, - 4, VGIC_ACCESS_32bit), + vgic_uaccess_read_active, vgic_mmio_uaccess_write_cactive, 4, + VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IPRIORITYR0, vgic_mmio_read_priority, vgic_mmio_write_priority, 32, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index d085e047953f..b38e94e8f74a 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -348,8 +348,39 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, } } -unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len) + +/* + * If we are fiddling with an IRQ's active state, we have to make sure the IRQ + * is not queued on some running VCPU's LRs, because then the change to the + * active state can be overwritten when the VCPU's state is synced coming back + * from the guest. + * + * For shared interrupts as well as GICv3 private interrupts, we have to + * stop all the VCPUs because interrupts can be migrated while we don't hold + * the IRQ locks and we don't want to be chasing moving targets. + * + * For GICv2 private interrupts we don't have to do anything because + * userspace accesses to the VGIC state already require all VCPUs to be + * stopped, and only the VCPU itself can modify its private interrupts + * active state, which guarantees that the VCPU is not running. + */ +static void vgic_access_active_prepare(struct kvm_vcpu *vcpu, u32 intid) +{ + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 || + intid >= VGIC_NR_PRIVATE_IRQS) + kvm_arm_halt_guest(vcpu->kvm); +} + +/* See vgic_access_active_prepare */ +static void vgic_access_active_finish(struct kvm_vcpu *vcpu, u32 intid) +{ + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 || + intid >= VGIC_NR_PRIVATE_IRQS) + kvm_arm_resume_guest(vcpu->kvm); +} + +static unsigned long __vgic_mmio_read_active(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); u32 value = 0; @@ -359,6 +390,10 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, for (i = 0; i < len * 8; i++) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + /* + * Even for HW interrupts, don't evaluate the HW state as + * all the guest is interested in is the virtual state. + */ if (irq->active) value |= (1U << i); @@ -368,6 +403,29 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, return value; } +unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + u32 val; + + mutex_lock(&vcpu->kvm->lock); + vgic_access_active_prepare(vcpu, intid); + + val = __vgic_mmio_read_active(vcpu, addr, len); + + vgic_access_active_finish(vcpu, intid); + mutex_unlock(&vcpu->kvm->lock); + + return val; +} + +unsigned long vgic_uaccess_read_active(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return __vgic_mmio_read_active(vcpu, addr, len); +} + /* Must be called with irq->irq_lock held */ static void vgic_hw_irq_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, bool active, bool is_uaccess) @@ -426,36 +484,6 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, raw_spin_unlock_irqrestore(&irq->irq_lock, flags); } -/* - * If we are fiddling with an IRQ's active state, we have to make sure the IRQ - * is not queued on some running VCPU's LRs, because then the change to the - * active state can be overwritten when the VCPU's state is synced coming back - * from the guest. - * - * For shared interrupts, we have to stop all the VCPUs because interrupts can - * be migrated while we don't hold the IRQ locks and we don't want to be - * chasing moving targets. - * - * For private interrupts we don't have to do anything because userspace - * accesses to the VGIC state already require all VCPUs to be stopped, and - * only the VCPU itself can modify its private interrupts active state, which - * guarantees that the VCPU is not running. - */ -static void vgic_change_active_prepare(struct kvm_vcpu *vcpu, u32 intid) -{ - if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 || - intid >= VGIC_NR_PRIVATE_IRQS) - kvm_arm_halt_guest(vcpu->kvm); -} - -/* See vgic_change_active_prepare */ -static void vgic_change_active_finish(struct kvm_vcpu *vcpu, u32 intid) -{ - if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 || - intid >= VGIC_NR_PRIVATE_IRQS) - kvm_arm_resume_guest(vcpu->kvm); -} - static void __vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) @@ -477,11 +505,11 @@ void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, u32 intid = VGIC_ADDR_TO_INTID(addr, 1); mutex_lock(&vcpu->kvm->lock); - vgic_change_active_prepare(vcpu, intid); + vgic_access_active_prepare(vcpu, intid); __vgic_mmio_write_cactive(vcpu, addr, len, val); - vgic_change_active_finish(vcpu, intid); + vgic_access_active_finish(vcpu, intid); mutex_unlock(&vcpu->kvm->lock); } @@ -514,11 +542,11 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, u32 intid = VGIC_ADDR_TO_INTID(addr, 1); mutex_lock(&vcpu->kvm->lock); - vgic_change_active_prepare(vcpu, intid); + vgic_access_active_prepare(vcpu, intid); __vgic_mmio_write_sactive(vcpu, addr, len, val); - vgic_change_active_finish(vcpu, intid); + vgic_access_active_finish(vcpu, intid); mutex_unlock(&vcpu->kvm->lock); } diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h index 5af2aefad435..30713a44e3fa 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.h +++ b/virt/kvm/arm/vgic/vgic-mmio.h @@ -152,6 +152,9 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len); +unsigned long vgic_uaccess_read_active(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val); -- cgit v1.2.3-59-g8ed1b From 41ee52ecbcdc98eb2a03241923b1a7d46f476bb3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 9 Apr 2020 13:05:26 +0100 Subject: KVM: arm: vgic: Only use the virtual state when userspace accesses enable bits There is no point in accessing the HW when writing to any of the ISENABLER/ICENABLER registers from userspace, as only the guest should be allowed to change the HW state. Introduce new userspace-specific accessors that deal solely with the virtual state. Reported-by: James Morse Tested-by: James Morse Reviewed-by: James Morse Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-mmio-v2.c | 6 ++++-- virt/kvm/arm/vgic/vgic-mmio-v3.c | 16 +++++++++------ virt/kvm/arm/vgic/vgic-mmio.c | 42 ++++++++++++++++++++++++++++++++++++++++ virt/kvm/arm/vgic/vgic-mmio.h | 8 ++++++++ 4 files changed, 64 insertions(+), 8 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index d63881f60e1a..f51c6e939c76 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -409,10 +409,12 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = { NULL, vgic_mmio_uaccess_write_v2_group, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET, - vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1, + vgic_mmio_read_enable, vgic_mmio_write_senable, + NULL, vgic_uaccess_write_senable, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_CLEAR, - vgic_mmio_read_enable, vgic_mmio_write_cenable, NULL, NULL, 1, + vgic_mmio_read_enable, vgic_mmio_write_cenable, + NULL, vgic_uaccess_write_cenable, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET, vgic_mmio_read_pending, vgic_mmio_write_spending, NULL, NULL, 1, diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index f2b37a081f26..416613f2400c 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -538,10 +538,12 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = { vgic_mmio_read_group, vgic_mmio_write_group, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER, - vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1, + vgic_mmio_read_enable, vgic_mmio_write_senable, + NULL, vgic_uaccess_write_senable, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICENABLER, - vgic_mmio_read_enable, vgic_mmio_write_cenable, NULL, NULL, 1, + vgic_mmio_read_enable, vgic_mmio_write_cenable, + NULL, vgic_uaccess_write_cenable, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR, vgic_mmio_read_pending, vgic_mmio_write_spending, @@ -609,11 +611,13 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = { REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGROUPR0, vgic_mmio_read_group, vgic_mmio_write_group, 4, VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ISENABLER0, - vgic_mmio_read_enable, vgic_mmio_write_senable, 4, + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISENABLER0, + vgic_mmio_read_enable, vgic_mmio_write_senable, + NULL, vgic_uaccess_write_senable, 4, VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ICENABLER0, - vgic_mmio_read_enable, vgic_mmio_write_cenable, 4, + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICENABLER0, + vgic_mmio_read_enable, vgic_mmio_write_cenable, + NULL, vgic_uaccess_write_cenable, 4, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0, vgic_mmio_read_pending, vgic_mmio_write_spending, diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index b38e94e8f74a..6e30034d1464 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -184,6 +184,48 @@ void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu, } } +int vgic_uaccess_write_senable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->enabled = true; + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + + vgic_put_irq(vcpu->kvm, irq); + } + + return 0; +} + +int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->enabled = false; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + } + + return 0; +} + unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h index 30713a44e3fa..327d0a6938e4 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.h +++ b/virt/kvm/arm/vgic/vgic-mmio.h @@ -138,6 +138,14 @@ void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val); +int vgic_uaccess_write_senable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len); -- cgit v1.2.3-59-g8ed1b From ba1ed9e17b581c9a204ec1d72d40472dd8557edd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 9 Apr 2020 13:05:26 +0100 Subject: KVM: arm: vgic-v2: Only use the virtual state when userspace accesses pending bits There is no point in accessing the HW when writing to any of the ISPENDR/ICPENDR registers from userspace, as only the guest should be allowed to change the HW state. Introduce new userspace-specific accessors that deal solely with the virtual state. Note that the API differs from that of GICv3, where userspace exclusively uses ISPENDR to set the state. Too bad we can't reuse it. Fixes: 82e40f558de56 ("KVM: arm/arm64: vgic-v2: Handle SGI bits in GICD_I{S,C}PENDR0 as WI") Reviewed-by: James Morse Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-mmio-v2.c | 6 ++- virt/kvm/arm/vgic/vgic-mmio.c | 86 ++++++++++++++++++++++++++++++---------- virt/kvm/arm/vgic/vgic-mmio.h | 8 ++++ 3 files changed, 76 insertions(+), 24 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index f51c6e939c76..a016f07adc28 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -417,10 +417,12 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = { NULL, vgic_uaccess_write_cenable, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET, - vgic_mmio_read_pending, vgic_mmio_write_spending, NULL, NULL, 1, + vgic_mmio_read_pending, vgic_mmio_write_spending, + NULL, vgic_uaccess_write_spending, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR, - vgic_mmio_read_pending, vgic_mmio_write_cpending, NULL, NULL, 1, + vgic_mmio_read_pending, vgic_mmio_write_cpending, + NULL, vgic_uaccess_write_cpending, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET, vgic_mmio_read_active, vgic_mmio_write_sactive, diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 6e30034d1464..b2d73fc0d1ef 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -261,17 +261,6 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, return value; } -/* Must be called with irq->irq_lock held */ -static void vgic_hw_irq_spending(struct kvm_vcpu *vcpu, struct vgic_irq *irq, - bool is_uaccess) -{ - if (is_uaccess) - return; - - irq->pending_latch = true; - vgic_irq_set_phys_active(irq, true); -} - static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq) { return (vgic_irq_is_sgi(irq->intid) && @@ -282,7 +271,6 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { - bool is_uaccess = !kvm_get_running_vcpu(); u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; unsigned long flags; @@ -312,22 +300,48 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, continue; } + irq->pending_latch = true; if (irq->hw) - vgic_hw_irq_spending(vcpu, irq, is_uaccess); - else - irq->pending_latch = true; + vgic_irq_set_phys_active(irq, true); + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); vgic_put_irq(vcpu->kvm, irq); } } -/* Must be called with irq->irq_lock held */ -static void vgic_hw_irq_cpending(struct kvm_vcpu *vcpu, struct vgic_irq *irq, - bool is_uaccess) +int vgic_uaccess_write_spending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) { - if (is_uaccess) - return; + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->pending_latch = true; + + /* + * GICv2 SGIs are terribly broken. We can't restore + * the source of the interrupt, so just pick the vcpu + * itself as the source... + */ + if (is_vgic_v2_sgi(vcpu, irq)) + irq->source |= BIT(vcpu->vcpu_id); + + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + + vgic_put_irq(vcpu->kvm, irq); + } + + return 0; +} + +/* Must be called with irq->irq_lock held */ +static void vgic_hw_irq_cpending(struct kvm_vcpu *vcpu, struct vgic_irq *irq) +{ irq->pending_latch = false; /* @@ -350,7 +364,6 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { - bool is_uaccess = !kvm_get_running_vcpu(); u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; unsigned long flags; @@ -381,7 +394,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, } if (irq->hw) - vgic_hw_irq_cpending(vcpu, irq, is_uaccess); + vgic_hw_irq_cpending(vcpu, irq); else irq->pending_latch = false; @@ -390,6 +403,35 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, } } +int vgic_uaccess_write_cpending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + /* + * More fun with GICv2 SGIs! If we're clearing one of them + * from userspace, which source vcpu to clear? Let's not + * even think of it, and blow the whole set. + */ + if (is_vgic_v2_sgi(vcpu, irq)) + irq->source = 0; + + irq->pending_latch = false; + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + } + + return 0; +} /* * If we are fiddling with an IRQ's active state, we have to make sure the IRQ diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h index 327d0a6938e4..fefcca2b14dc 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.h +++ b/virt/kvm/arm/vgic/vgic-mmio.h @@ -157,6 +157,14 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val); +int vgic_uaccess_write_spending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +int vgic_uaccess_write_cpending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len); -- cgit v1.2.3-59-g8ed1b From 969ce8b5260d8ec01e6f1949d2927a86419663ce Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Tue, 14 Apr 2020 11:03:47 +0800 Subject: KVM: arm64: vgic-v3: Retire all pending LPIs on vcpu destroy It's likely that the vcpu fails to handle all virtual interrupts if userspace decides to destroy it, leaving the pending ones stay in the ap_list. If the un-handled one is a LPI, its vgic_irq structure will be eventually leaked because of an extra refcount increment in vgic_queue_irq_unlock(). This was detected by kmemleak on almost every guest destroy, the backtrace is as follows: unreferenced object 0xffff80725aed5500 (size 128): comm "CPU 5/KVM", pid 40711, jiffies 4298024754 (age 166366.512s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 08 01 a9 73 6d 80 ff ff ...........sm... c8 61 ee a9 00 20 ff ff 28 1e 55 81 6c 80 ff ff .a... ..(.U.l... backtrace: [<000000004bcaa122>] kmem_cache_alloc_trace+0x2dc/0x418 [<0000000069c7dabb>] vgic_add_lpi+0x88/0x418 [<00000000bfefd5c5>] vgic_its_cmd_handle_mapi+0x4dc/0x588 [<00000000cf993975>] vgic_its_process_commands.part.5+0x484/0x1198 [<000000004bd3f8e3>] vgic_its_process_commands+0x50/0x80 [<00000000b9a65b2b>] vgic_mmio_write_its_cwriter+0xac/0x108 [<0000000009641ebb>] dispatch_mmio_write+0xd0/0x188 [<000000008f79d288>] __kvm_io_bus_write+0x134/0x240 [<00000000882f39ac>] kvm_io_bus_write+0xe0/0x150 [<0000000078197602>] io_mem_abort+0x484/0x7b8 [<0000000060954e3c>] kvm_handle_guest_abort+0x4cc/0xa58 [<00000000e0d0cd65>] handle_exit+0x24c/0x770 [<00000000b44a7fad>] kvm_arch_vcpu_ioctl_run+0x460/0x1988 [<0000000025fb897c>] kvm_vcpu_ioctl+0x4f8/0xee0 [<000000003271e317>] do_vfs_ioctl+0x160/0xcd8 [<00000000e7f39607>] ksys_ioctl+0x98/0xd8 Fix it by retiring all pending LPIs in the ap_list on the destroy path. p.s. I can also reproduce it on a normal guest shutdown. It is because userspace still send LPIs to vcpu (through KVM_SIGNAL_MSI ioctl) while the guest is being shutdown and unable to handle it. A little strange though and haven't dig further... Reviewed-by: James Morse Signed-off-by: Zenghui Yu [maz: moved the distributor deallocation down to avoid an UAF splat] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200414030349.625-2-yuzenghui@huawei.com --- virt/kvm/arm/vgic/vgic-init.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index a963b9d766b7..30dbec9fe0b4 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -348,6 +348,12 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + /* + * Retire all pending LPIs on this vcpu anyway as we're + * going to destroy it. + */ + vgic_flush_pending_lpis(vcpu); + INIT_LIST_HEAD(&vgic_cpu->ap_list_head); } @@ -359,10 +365,10 @@ static void __kvm_vgic_destroy(struct kvm *kvm) vgic_debug_destroy(kvm); - kvm_vgic_dist_destroy(kvm); - kvm_for_each_vcpu(i, vcpu, kvm) kvm_vgic_vcpu_destroy(vcpu); + + kvm_vgic_dist_destroy(kvm); } void kvm_vgic_destroy(struct kvm *kvm) -- cgit v1.2.3-59-g8ed1b From 57bdb436ce869a45881d8aa4bc5dac8e072dd2b6 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Tue, 14 Apr 2020 11:03:48 +0800 Subject: KVM: arm64: vgic-its: Fix memory leak on the error path of vgic_add_lpi() If we're going to fail out the vgic_add_lpi(), let's make sure the allocated vgic_irq memory is also freed. Though it seems that both cases are unlikely to fail. Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20200414030349.625-3-yuzenghui@huawei.com --- virt/kvm/arm/vgic/vgic-its.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index d53d34a33e35..c012a52b19f5 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -96,14 +96,21 @@ out_unlock: * We "cache" the configuration table entries in our struct vgic_irq's. * However we only have those structs for mapped IRQs, so we read in * the respective config data from memory here upon mapping the LPI. + * + * Should any of these fail, behave as if we couldn't create the LPI + * by dropping the refcount and returning the error. */ ret = update_lpi_config(kvm, irq, NULL, false); - if (ret) + if (ret) { + vgic_put_irq(kvm, irq); return ERR_PTR(ret); + } ret = vgic_v3_lpi_sync_pending_status(kvm, irq); - if (ret) + if (ret) { + vgic_put_irq(kvm, irq); return ERR_PTR(ret); + } return irq; } -- cgit v1.2.3-59-g8ed1b From dc87f6dd058a648cd2a35e4aa04592dccdc9f0c2 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sat, 11 Apr 2020 20:33:52 -0500 Subject: gpio: pca953x: Fix pca953x_gpio_set_config pca953x_gpio_set_config is setup to support pull-up/down bias. Currently the driver uses a variable called 'config' to determine which options to use. Unfortunately, this is incorrect. This patch uses function pinconf_to_config_param(config), which converts this 'config' parameter back to pinconfig to determine which option to use. Fixes: 15add06841a3 ("gpio: pca953x: add ->set_config implementation") Signed-off-by: Adam Ford Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 5638b4e5355f..4269ea9a817e 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -531,7 +531,7 @@ static int pca953x_gpio_set_config(struct gpio_chip *gc, unsigned int offset, { struct pca953x_chip *chip = gpiochip_get_data(gc); - switch (config) { + switch (pinconf_to_config_param(config)) { case PIN_CONFIG_BIAS_PULL_UP: case PIN_CONFIG_BIAS_PULL_DOWN: return pca953x_gpio_set_pull_up_down(chip, offset, config); -- cgit v1.2.3-59-g8ed1b From 4e1541593017d76cfa04bd666de8e4dbcfe3105d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 15 Apr 2020 10:23:59 +0200 Subject: gpiolib: improve the robustness of watch/unwatch ioctl() This makes the new ioctl() a bit more robust - we now check if a line is already being watched and return -EBUSY if the user-space tries to start watching it again. Same for unwatch - return -EBUSY if user-space tries to unwatch a line that's not being watched. Fixes: 51c1064e82e7 ("gpiolib: add new ioctl() for monitoring changes in line info") Signed-off-by: Bartosz Golaszewski Reviewed-by: Linus Walleij --- drivers/gpio/gpiolib.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 40f2d7f69be2..29f6abe84d2e 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1227,6 +1227,7 @@ static long gpio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) void __user *ip = (void __user *)arg; struct gpio_desc *desc; __u32 offset; + int hwgpio; /* We fail any subsequent ioctl():s when the chip is gone */ if (!gc) @@ -1259,13 +1260,19 @@ static long gpio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (IS_ERR(desc)) return PTR_ERR(desc); + hwgpio = gpio_chip_hwgpio(desc); + + if (cmd == GPIO_GET_LINEINFO_WATCH_IOCTL && + test_bit(hwgpio, priv->watched_lines)) + return -EBUSY; + gpio_desc_to_lineinfo(desc, &lineinfo); if (copy_to_user(ip, &lineinfo, sizeof(lineinfo))) return -EFAULT; if (cmd == GPIO_GET_LINEINFO_WATCH_IOCTL) - set_bit(gpio_chip_hwgpio(desc), priv->watched_lines); + set_bit(hwgpio, priv->watched_lines); return 0; } else if (cmd == GPIO_GET_LINEHANDLE_IOCTL) { @@ -1280,7 +1287,12 @@ static long gpio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (IS_ERR(desc)) return PTR_ERR(desc); - clear_bit(gpio_chip_hwgpio(desc), priv->watched_lines); + hwgpio = gpio_chip_hwgpio(desc); + + if (!test_bit(hwgpio, priv->watched_lines)) + return -EBUSY; + + clear_bit(hwgpio, priv->watched_lines); return 0; } return -EINVAL; -- cgit v1.2.3-59-g8ed1b From 6409d049ce28bef35e13dfb8699fc7ee27469ba1 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 17 Apr 2020 09:01:51 +0200 Subject: gpiolib: don't call sleeping functions with a spinlock taken We must not call pinctrl_gpio_can_use_line() with the gpio_lock taken as it takes a mutex internally. Let's move the call before taking the spinlock and store the return value. This isn't perfect - there's a moment between calling pinctrl_gpio_can_use_line() and taking the spinlock where the situation can change but it isn't a regression either: previously this part wasn't protected at all and it only affects the information user-space is seeing. Reported-by: Geert Uytterhoeven Fixes: d2ac25798208 ("gpiolib: provide a dedicated function for setting lineinfo") Signed-off-by: Bartosz Golaszewski Acked-by: Linus Walleij --- drivers/gpio/gpiolib.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 29f6abe84d2e..bf9732ab7f91 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1158,8 +1158,19 @@ static void gpio_desc_to_lineinfo(struct gpio_desc *desc, struct gpioline_info *info) { struct gpio_chip *gc = desc->gdev->chip; + bool ok_for_pinctrl; unsigned long flags; + /* + * This function takes a mutex so we must check this before taking + * the spinlock. + * + * FIXME: find a non-racy way to retrieve this information. Maybe a + * lock common to both frameworks? + */ + ok_for_pinctrl = + pinctrl_gpio_can_use_line(gc->base + info->line_offset); + spin_lock_irqsave(&gpio_lock, flags); if (desc->name) { @@ -1186,7 +1197,7 @@ static void gpio_desc_to_lineinfo(struct gpio_desc *desc, test_bit(FLAG_USED_AS_IRQ, &desc->flags) || test_bit(FLAG_EXPORT, &desc->flags) || test_bit(FLAG_SYSFS, &desc->flags) || - !pinctrl_gpio_can_use_line(gc->base + info->line_offset)) + !ok_for_pinctrl) info->flags |= GPIOLINE_FLAG_KERNEL; if (test_bit(FLAG_IS_OUT, &desc->flags)) info->flags |= GPIOLINE_FLAG_IS_OUT; -- cgit v1.2.3-59-g8ed1b From b61ad5c0e21cc54107ca65b0b14bb57c2d39a3b6 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 30 Mar 2020 12:10:38 +0200 Subject: phy: tegra: Select USB_COMMON for usb_get_maximum_speed() The usb_get_maximum_speed() function is part of the usb-common module, so enable it by selecting the corresponding Kconfig symbol. While at it, also make sure to depend on USB_SUPPORT because USB_PHY requires that. This can lead to Kconfig conflicts if USB_SUPPORT is not enabled while attempting to enable PHY_TEGRA_XUSB. Reported-by: kbuild test robot Suggested-by: Nathan Chancellor Signed-off-by: Thierry Reding Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/tegra/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/phy/tegra/Kconfig b/drivers/phy/tegra/Kconfig index a208aca4ba7b..c591c958f1eb 100644 --- a/drivers/phy/tegra/Kconfig +++ b/drivers/phy/tegra/Kconfig @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only config PHY_TEGRA_XUSB tristate "NVIDIA Tegra XUSB pad controller driver" - depends on ARCH_TEGRA + depends on ARCH_TEGRA && USB_SUPPORT + select USB_COMMON select USB_CONN_GPIO select USB_PHY help -- cgit v1.2.3-59-g8ed1b From 45a76264c26fd8cfd0c9746196892d9b7e2657ee Mon Sep 17 00:00:00 2001 From: Arun Easi Date: Tue, 31 Mar 2020 03:40:14 -0700 Subject: scsi: qla2xxx: Fix hang when issuing nvme disconnect-all in NPIV In NPIV environment, a NPIV host may use a queue pair created by base host or other NPIVs, so the check for a queue pair created by this NPIV is not correct, and can cause an abort to fail, which in turn means the NVME command not returned. This leads to hang in nvme_fc layer in nvme_fc_delete_association() which waits for all I/Os to be returned, which is seen as hang in the application. Link: https://lore.kernel.org/r/20200331104015.24868-3-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Arun Easi Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_mbx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 4ed90437e8c4..d6c991bd1bde 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -3153,7 +3153,7 @@ qla24xx_abort_command(srb_t *sp) ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x108c, "Entered %s.\n", __func__); - if (vha->flags.qpairs_available && sp->qpair) + if (sp->qpair) req = sp->qpair->req; else return QLA_FUNCTION_FAILED; -- cgit v1.2.3-59-g8ed1b From c48f849d3f7a4ec1025105f446e29d395c4dcc2f Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 31 Mar 2020 03:40:15 -0700 Subject: scsi: qla2xxx: Delete all sessions before unregister local nvme port Delete all sessions before unregistering local nvme port. This allows nvme layer to decrement all active rport count down to zero. Once the count is down to zero, nvme would call qla to continue with the npiv port deletion. PID: 27448 TASK: ffff9e34b777c1c0 CPU: 0 COMMAND: "qaucli" 0 [ffff9e25e84abbd8] __schedule at ffffffff977858ca 1 [ffff9e25e84abc68] schedule at ffffffff97785d79 2 [ffff9e25e84abc78] schedule_timeout at ffffffff97783881 3 [ffff9e25e84abd28] wait_for_completion at ffffffff9778612d 4 [ffff9e25e84abd88] qla_nvme_delete at ffffffffc0e3024e [qla2xxx] 5 [ffff9e25e84abda8] qla24xx_vport_delete at ffffffffc0e024b9 [qla2xxx] 6 [ffff9e25e84abdf0] fc_vport_terminate at ffffffffc011c247 [scsi_transport_fc] 7 [ffff9e25e84abe28] store_fc_host_vport_delete at ffffffffc011cd94 [scsi_transport_fc] 8 [ffff9e25e84abe70] dev_attr_store at ffffffff974b376b 9 [ffff9e25e84abe80] sysfs_kf_write at ffffffff972d9a92 10 [ffff9e25e84abe90] kernfs_fop_write at ffffffff972d907b 11 [ffff9e25e84abec8] vfs_write at ffffffff9724c790 12 [ffff9e25e84abf08] sys_write at ffffffff9724d55f 13 [ffff9e25e84abf50] system_call_fastpath at ffffffff97792ed2 RIP: 00007fc0bd81a6fd RSP: 00007ffff78d9648 RFLAGS: 00010202 RAX: 0000000000000001 RBX: 0000000000000022 RCX: 00007ffff78d96e0 RDX: 0000000000000022 RSI: 00007ffff78d94e0 RDI: 0000000000000008 RBP: 00007ffff78d9440 R8: 0000000000000000 R9: 00007fc0bd48b2cd R10: 0000000000000017 R11: 0000000000000293 R12: 0000000000000000 R13: 00005624e4dac840 R14: 00005624e4da9a10 R15: 0000000000000000 ORIG_RAX: 0000000000000001 CS: 0033 SS: 002b Link: https://lore.kernel.org/r/20200331104015.24868-4-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_attr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 97cabd7e0014..33255968f03a 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -3031,11 +3031,11 @@ qla24xx_vport_delete(struct fc_vport *fc_vport) test_bit(FCPORT_UPDATE_NEEDED, &vha->dpc_flags)) msleep(1000); - qla_nvme_delete(vha); qla24xx_disable_vp(vha); qla2x00_wait_for_sess_deletion(vha); + qla_nvme_delete(vha); vha->flags.delete_progress = 1; qlt_remove_target(ha, vha); -- cgit v1.2.3-59-g8ed1b From d8dd25a461e4eec7190cb9d66616aceacc5110ad Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Sat, 25 Apr 2020 05:03:00 -0500 Subject: objtool: Fix stack offset tracking for indirect CFAs When the current frame address (CFA) is stored on the stack (i.e., cfa->base == CFI_SP_INDIRECT), objtool neglects to adjust the stack offset when there are subsequent pushes or pops. This results in bad ORC data at the end of the ENTER_IRQ_STACK macro, when it puts the previous stack pointer on the stack and does a subsequent push. This fixes the following unwinder warning: WARNING: can't dereference registers at 00000000f0a6bdba for ip interrupt_entry+0x9f/0xa0 Fixes: 627fce14809b ("objtool: Add ORC unwind table generation") Reported-by: Vince Weaver Reported-by: Dave Jones Reported-by: Steven Rostedt Reported-by: Vegard Nossum Reported-by: Joe Mario Reviewed-by: Miroslav Benes Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Jann Horn Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lore.kernel.org/r/853d5d691b29e250333332f09b8e27410b2d9924.1587808742.git.jpoimboe@redhat.com --- tools/objtool/check.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 4b170fd08a28..e7184641a40c 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1449,7 +1449,7 @@ static int update_insn_state_regs(struct instruction *insn, struct insn_state *s struct cfi_reg *cfa = &state->cfa; struct stack_op *op = &insn->stack_op; - if (cfa->base != CFI_SP) + if (cfa->base != CFI_SP && cfa->base != CFI_SP_INDIRECT) return 0; /* push */ -- cgit v1.2.3-59-g8ed1b From 06a9750edcffa808494d56da939085c35904e618 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Sat, 25 Apr 2020 05:03:01 -0500 Subject: x86/entry/64: Fix unwind hints in register clearing code The PUSH_AND_CLEAR_REGS macro zeroes each register immediately after pushing it. If an NMI or exception hits after a register is cleared, but before the UNWIND_HINT_REGS annotation, the ORC unwinder will wrongly think the previous value of the register was zero. This can confuse the unwinding process and cause it to exit early. Because ORC is simpler than DWARF, there are a limited number of unwind annotation states, so it's not possible to add an individual unwind hint after each push/clear combination. Instead, the register clearing instructions need to be consolidated and moved to after the UNWIND_HINT_REGS annotation. Fixes: 3f01daecd545 ("x86/entry/64: Introduce the PUSH_AND_CLEAN_REGS macro") Reviewed-by: Miroslav Benes Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Dave Jones Cc: Jann Horn Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Link: https://lore.kernel.org/r/68fd3d0bc92ae2d62ff7879d15d3684217d51f08.1587808742.git.jpoimboe@redhat.com --- arch/x86/entry/calling.h | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 0789e13ece90..1c7f13bb6728 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -98,13 +98,6 @@ For 32-bit we have the following conventions - kernel is built with #define SIZEOF_PTREGS 21*8 .macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0 - /* - * Push registers and sanitize registers of values that a - * speculation attack might otherwise want to exploit. The - * lower registers are likely clobbered well before they - * could be put to use in a speculative execution gadget. - * Interleave XOR with PUSH for better uop scheduling: - */ .if \save_ret pushq %rsi /* pt_regs->si */ movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */ @@ -114,34 +107,43 @@ For 32-bit we have the following conventions - kernel is built with pushq %rsi /* pt_regs->si */ .endif pushq \rdx /* pt_regs->dx */ - xorl %edx, %edx /* nospec dx */ pushq %rcx /* pt_regs->cx */ - xorl %ecx, %ecx /* nospec cx */ pushq \rax /* pt_regs->ax */ pushq %r8 /* pt_regs->r8 */ - xorl %r8d, %r8d /* nospec r8 */ pushq %r9 /* pt_regs->r9 */ - xorl %r9d, %r9d /* nospec r9 */ pushq %r10 /* pt_regs->r10 */ - xorl %r10d, %r10d /* nospec r10 */ pushq %r11 /* pt_regs->r11 */ - xorl %r11d, %r11d /* nospec r11*/ pushq %rbx /* pt_regs->rbx */ - xorl %ebx, %ebx /* nospec rbx*/ pushq %rbp /* pt_regs->rbp */ - xorl %ebp, %ebp /* nospec rbp*/ pushq %r12 /* pt_regs->r12 */ - xorl %r12d, %r12d /* nospec r12*/ pushq %r13 /* pt_regs->r13 */ - xorl %r13d, %r13d /* nospec r13*/ pushq %r14 /* pt_regs->r14 */ - xorl %r14d, %r14d /* nospec r14*/ pushq %r15 /* pt_regs->r15 */ - xorl %r15d, %r15d /* nospec r15*/ UNWIND_HINT_REGS + .if \save_ret pushq %rsi /* return address on top of stack */ .endif + + /* + * Sanitize registers of values that a speculation attack might + * otherwise want to exploit. The lower registers are likely clobbered + * well before they could be put to use in a speculative execution + * gadget. + */ + xorl %edx, %edx /* nospec dx */ + xorl %ecx, %ecx /* nospec cx */ + xorl %r8d, %r8d /* nospec r8 */ + xorl %r9d, %r9d /* nospec r9 */ + xorl %r10d, %r10d /* nospec r10 */ + xorl %r11d, %r11d /* nospec r11 */ + xorl %ebx, %ebx /* nospec rbx */ + xorl %ebp, %ebp /* nospec rbp */ + xorl %r12d, %r12d /* nospec r12 */ + xorl %r13d, %r13d /* nospec r13 */ + xorl %r14d, %r14d /* nospec r14 */ + xorl %r15d, %r15d /* nospec r15 */ + .endm .macro POP_REGS pop_rdi=1 skip_r11rcx=0 -- cgit v1.2.3-59-g8ed1b From 1fb143634a38095b641a3a21220774799772dc4c Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Sat, 25 Apr 2020 05:03:02 -0500 Subject: x86/entry/64: Fix unwind hints in kernel exit path In swapgs_restore_regs_and_return_to_usermode, after the stack is switched to the trampoline stack, the existing UNWIND_HINT_REGS hint is no longer valid, which can result in the following ORC unwinder warning: WARNING: can't dereference registers at 000000003aeb0cdd for ip swapgs_restore_regs_and_return_to_usermode+0x93/0xa0 For full correctness, we could try to add complicated unwind hints so the unwinder could continue to find the registers, but when when it's this close to kernel exit, unwind hints aren't really needed anymore and it's fine to just use an empty hint which tells the unwinder to stop. For consistency, also move the UNWIND_HINT_EMPTY in entry_SYSCALL_64_after_hwframe to a similar location. Fixes: 3e3b9293d392 ("x86/entry/64: Return to userspace from the trampoline stack") Reported-by: Vince Weaver Reported-by: Dave Jones Reported-by: Dr. David Alan Gilbert Reported-by: Joe Mario Reported-by: Jann Horn Reported-by: Linus Torvalds Reviewed-by: Miroslav Benes Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lore.kernel.org/r/60ea8f562987ed2d9ace2977502fe481c0d7c9a0.1587808742.git.jpoimboe@redhat.com --- arch/x86/entry/entry_64.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 0e9504fabe52..6b0d679efd6b 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -249,7 +249,6 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) */ syscall_return_via_sysret: /* rcx and r11 are already restored (see code above) */ - UNWIND_HINT_EMPTY POP_REGS pop_rdi=0 skip_r11rcx=1 /* @@ -258,6 +257,7 @@ syscall_return_via_sysret: */ movq %rsp, %rdi movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp + UNWIND_HINT_EMPTY pushq RSP-RDI(%rdi) /* RSP */ pushq (%rdi) /* RDI */ @@ -637,6 +637,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) */ movq %rsp, %rdi movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp + UNWIND_HINT_EMPTY /* Copy the IRET frame to the trampoline stack. */ pushq 6*8(%rdi) /* SS */ -- cgit v1.2.3-59-g8ed1b From 96c64806b4bf35f5edb465cafa6cec490e424a30 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Sat, 25 Apr 2020 05:03:03 -0500 Subject: x86/entry/64: Fix unwind hints in __switch_to_asm() UNWIND_HINT_FUNC has some limitations: specifically, it doesn't reset all the registers to undefined. This causes objtool to get confused about the RBP push in __switch_to_asm(), resulting in bad ORC data. While __switch_to_asm() does do some stack magic, it's otherwise a normal callable-from-C function, so just annotate it as a function, which makes objtool happy and allows it to produces the correct hints automatically. Fixes: 8c1f75587a18 ("x86/entry/64: Add unwind hint annotations") Reviewed-by: Miroslav Benes Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Dave Jones Cc: Jann Horn Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Link: https://lore.kernel.org/r/03d0411920d10f7418f2e909210d8e9a3b2ab081.1587808742.git.jpoimboe@redhat.com --- arch/x86/entry/entry_64.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 6b0d679efd6b..34a588950fe1 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -279,8 +279,7 @@ SYM_CODE_END(entry_SYSCALL_64) * %rdi: prev task * %rsi: next task */ -SYM_CODE_START(__switch_to_asm) - UNWIND_HINT_FUNC +SYM_FUNC_START(__switch_to_asm) /* * Save callee-saved registers * This must match the order in inactive_task_frame @@ -321,7 +320,7 @@ SYM_CODE_START(__switch_to_asm) popq %rbp jmp __switch_to -SYM_CODE_END(__switch_to_asm) +SYM_FUNC_END(__switch_to_asm) /* * A newly forked process directly context switches into this address. -- cgit v1.2.3-59-g8ed1b From f977df7b7ca45a4ac4b66d30a8931d0434c394b1 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Sat, 25 Apr 2020 05:03:04 -0500 Subject: x86/entry/64: Fix unwind hints in rewind_stack_do_exit() The LEAQ instruction in rewind_stack_do_exit() moves the stack pointer directly below the pt_regs at the top of the task stack before calling do_exit(). Tell the unwinder to expect pt_regs. Fixes: 8c1f75587a18 ("x86/entry/64: Add unwind hint annotations") Reviewed-by: Miroslav Benes Signed-off-by: Jann Horn Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Dave Jones Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Link: https://lore.kernel.org/r/68c33e17ae5963854916a46f522624f8e1d264f2.1587808742.git.jpoimboe@redhat.com --- arch/x86/entry/entry_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 34a588950fe1..9fe0d5cad8e4 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1739,7 +1739,7 @@ SYM_CODE_START(rewind_stack_do_exit) movq PER_CPU_VAR(cpu_current_top_of_stack), %rax leaq -PTREGS_SIZE(%rax), %rsp - UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE + UNWIND_HINT_REGS call do_exit SYM_CODE_END(rewind_stack_do_exit) -- cgit v1.2.3-59-g8ed1b From 153eb2223c794251b28400f3f74862e090d23f16 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Sat, 25 Apr 2020 05:03:05 -0500 Subject: x86/unwind/orc: Convert global variables to static These variables aren't used outside of unwind_orc.c, make them static. Also annotate some of them with '__ro_after_init', as applicable. Reviewed-by: Miroslav Benes Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Dave Jones Cc: Jann Horn Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Link: https://lore.kernel.org/r/43ae310bf7822b9862e571f36ae3474cfde8f301.1587808742.git.jpoimboe@redhat.com --- arch/x86/kernel/unwind_orc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index e9cc182aa97e..64889da666f4 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -15,12 +15,12 @@ extern int __stop_orc_unwind_ip[]; extern struct orc_entry __start_orc_unwind[]; extern struct orc_entry __stop_orc_unwind[]; -static DEFINE_MUTEX(sort_mutex); -int *cur_orc_ip_table = __start_orc_unwind_ip; -struct orc_entry *cur_orc_table = __start_orc_unwind; +static bool orc_init __ro_after_init; +static unsigned int lookup_num_blocks __ro_after_init; -unsigned int lookup_num_blocks; -bool orc_init; +static DEFINE_MUTEX(sort_mutex); +static int *cur_orc_ip_table = __start_orc_unwind_ip; +static struct orc_entry *cur_orc_table = __start_orc_unwind; static inline unsigned long orc_ip(const int *ip) { -- cgit v1.2.3-59-g8ed1b From b08418b54831255a7e3700d6bf7dfc2bdae25cd7 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Sat, 25 Apr 2020 05:03:06 -0500 Subject: x86/unwind: Prevent false warnings for non-current tasks There's some daring kernel code out there which dumps the stack of another task without first making sure the task is inactive. If the task happens to be running while the unwinder is reading the stack, unusual unwinder warnings can result. There's no race-free way for the unwinder to know whether such a warning is legitimate, so just disable unwinder warnings for all non-current tasks. Reviewed-by: Miroslav Benes Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Dave Jones Cc: Jann Horn Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Link: https://lore.kernel.org/r/ec424a2aea1d461eb30cab48a28c6433de2ab784.1587808742.git.jpoimboe@redhat.com --- arch/x86/kernel/dumpstack_64.c | 3 ++- arch/x86/kernel/unwind_frame.c | 3 +++ arch/x86/kernel/unwind_orc.c | 40 +++++++++++++++++++++++----------------- 3 files changed, 28 insertions(+), 18 deletions(-) diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 87b97897a881..460ae7f66818 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -183,7 +183,8 @@ recursion_check: */ if (visit_mask) { if (*visit_mask & (1UL << info->type)) { - printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type); + if (task == current) + printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type); goto unknown; } *visit_mask |= 1UL << info->type; diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index a224b5ab103f..54226110bc7f 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -344,6 +344,9 @@ bad_address: if (IS_ENABLED(CONFIG_X86_32)) goto the_end; + if (state->task != current) + goto the_end; + if (state->regs) { printk_deferred_once(KERN_WARNING "WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n", diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 64889da666f4..45166fd50be3 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -8,7 +8,13 @@ #include #define orc_warn(fmt, ...) \ - printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__) + printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__) + +#define orc_warn_current(args...) \ +({ \ + if (state->task == current) \ + orc_warn(args); \ +}) extern int __start_orc_unwind_ip[]; extern int __stop_orc_unwind_ip[]; @@ -446,8 +452,8 @@ bool unwind_next_frame(struct unwind_state *state) case ORC_REG_R10: if (!state->regs || !state->full_regs) { - orc_warn("missing regs for base reg R10 at ip %pB\n", - (void *)state->ip); + orc_warn_current("missing R10 value at %pB\n", + (void *)state->ip); goto err; } sp = state->regs->r10; @@ -455,8 +461,8 @@ bool unwind_next_frame(struct unwind_state *state) case ORC_REG_R13: if (!state->regs || !state->full_regs) { - orc_warn("missing regs for base reg R13 at ip %pB\n", - (void *)state->ip); + orc_warn_current("missing R13 value at %pB\n", + (void *)state->ip); goto err; } sp = state->regs->r13; @@ -464,8 +470,8 @@ bool unwind_next_frame(struct unwind_state *state) case ORC_REG_DI: if (!state->regs || !state->full_regs) { - orc_warn("missing regs for base reg DI at ip %pB\n", - (void *)state->ip); + orc_warn_current("missing RDI value at %pB\n", + (void *)state->ip); goto err; } sp = state->regs->di; @@ -473,15 +479,15 @@ bool unwind_next_frame(struct unwind_state *state) case ORC_REG_DX: if (!state->regs || !state->full_regs) { - orc_warn("missing regs for base reg DX at ip %pB\n", - (void *)state->ip); + orc_warn_current("missing DX value at %pB\n", + (void *)state->ip); goto err; } sp = state->regs->dx; break; default: - orc_warn("unknown SP base reg %d for ip %pB\n", + orc_warn("unknown SP base reg %d at %pB\n", orc->sp_reg, (void *)state->ip); goto err; } @@ -509,8 +515,8 @@ bool unwind_next_frame(struct unwind_state *state) case ORC_TYPE_REGS: if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) { - orc_warn("can't dereference registers at %p for ip %pB\n", - (void *)sp, (void *)orig_ip); + orc_warn_current("can't access registers at %pB\n", + (void *)orig_ip); goto err; } @@ -521,8 +527,8 @@ bool unwind_next_frame(struct unwind_state *state) case ORC_TYPE_REGS_IRET: if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) { - orc_warn("can't dereference iret registers at %p for ip %pB\n", - (void *)sp, (void *)orig_ip); + orc_warn_current("can't access iret registers at %pB\n", + (void *)orig_ip); goto err; } @@ -532,7 +538,7 @@ bool unwind_next_frame(struct unwind_state *state) break; default: - orc_warn("unknown .orc_unwind entry type %d for ip %pB\n", + orc_warn("unknown .orc_unwind entry type %d at %pB\n", orc->type, (void *)orig_ip); break; } @@ -564,8 +570,8 @@ bool unwind_next_frame(struct unwind_state *state) if (state->stack_info.type == prev_type && on_stack(&state->stack_info, (void *)state->sp, sizeof(long)) && state->sp <= prev_sp) { - orc_warn("stack going in the wrong direction? ip=%pB\n", - (void *)orig_ip); + orc_warn_current("stack going in the wrong direction? at %pB\n", + (void *)orig_ip); goto err; } -- cgit v1.2.3-59-g8ed1b From f1d9a2abff66aa8156fbc1493abed468db63ea48 Mon Sep 17 00:00:00 2001 From: Miroslav Benes Date: Sat, 25 Apr 2020 05:03:07 -0500 Subject: x86/unwind/orc: Don't skip the first frame for inactive tasks When unwinding an inactive task, the ORC unwinder skips the first frame by default. If both the 'regs' and 'first_frame' parameters of unwind_start() are NULL, 'state->sp' and 'first_frame' are later initialized to the same value for an inactive task. Given there is a "less than or equal to" comparison used at the end of __unwind_start() for skipping stack frames, the first frame is skipped. Drop the equal part of the comparison and make the behavior equivalent to the frame pointer unwinder. Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder") Reviewed-by: Miroslav Benes Signed-off-by: Miroslav Benes Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Dave Jones Cc: Jann Horn Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Link: https://lore.kernel.org/r/7f08db872ab59e807016910acdbe82f744de7065.1587808742.git.jpoimboe@redhat.com --- arch/x86/kernel/unwind_orc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 45166fd50be3..e9f5a20c69c6 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -657,7 +657,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, /* Otherwise, skip ahead to the user-specified starting frame: */ while (!unwind_done(state) && (!on_stack(&state->stack_info, first_frame, sizeof(long)) || - state->sp <= (unsigned long)first_frame)) + state->sp < (unsigned long)first_frame)) unwind_next_frame(state); return; -- cgit v1.2.3-59-g8ed1b From 98d0c8ebf77e0ba7c54a9ae05ea588f0e9e3f46e Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Sat, 25 Apr 2020 05:03:08 -0500 Subject: x86/unwind/orc: Prevent unwinding before ORC initialization If the unwinder is called before the ORC data has been initialized, orc_find() returns NULL, and it tries to fall back to using frame pointers. This can cause some unexpected warnings during boot. Move the 'orc_init' check from orc_find() to __unwind_init(), so that it doesn't even try to unwind from an uninitialized state. Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder") Reviewed-by: Miroslav Benes Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Dave Jones Cc: Jann Horn Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Link: https://lore.kernel.org/r/069d1499ad606d85532eb32ce39b2441679667d5.1587808742.git.jpoimboe@redhat.com --- arch/x86/kernel/unwind_orc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index e9f5a20c69c6..cb11567361cc 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -148,9 +148,6 @@ static struct orc_entry *orc_find(unsigned long ip) { static struct orc_entry *orc; - if (!orc_init) - return NULL; - if (ip == 0) return &null_orc_entry; @@ -591,6 +588,9 @@ EXPORT_SYMBOL_GPL(unwind_next_frame); void __unwind_start(struct unwind_state *state, struct task_struct *task, struct pt_regs *regs, unsigned long *first_frame) { + if (!orc_init) + goto done; + memset(state, 0, sizeof(*state)); state->task = task; -- cgit v1.2.3-59-g8ed1b From a0f81bf26888048100bf017fadf438a5bdffa8d8 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Sat, 25 Apr 2020 05:06:13 -0500 Subject: x86/unwind/orc: Fix error path for bad ORC entry type If the ORC entry type is unknown, nothing else can be done other than reporting an error. Exit the function instead of breaking out of the switch statement. Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder") Reviewed-by: Miroslav Benes Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Dave Jones Cc: Jann Horn Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Link: https://lore.kernel.org/r/a7fa668ca6eabbe81ab18b2424f15adbbfdc810a.1587808742.git.jpoimboe@redhat.com --- arch/x86/kernel/unwind_orc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index cb11567361cc..33b80a7f998f 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -537,7 +537,7 @@ bool unwind_next_frame(struct unwind_state *state) default: orc_warn("unknown .orc_unwind entry type %d at %pB\n", orc->type, (void *)orig_ip); - break; + goto err; } /* Find BP: */ -- cgit v1.2.3-59-g8ed1b From 81b67439d147677d844d492fcbd03712ea438f42 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Sat, 25 Apr 2020 05:06:14 -0500 Subject: x86/unwind/orc: Fix premature unwind stoppage due to IRET frames The following execution path is possible: fsnotify() [ realign the stack and store previous SP in R10 ] [ only IRET regs saved ] common_interrupt() interrupt_entry() [ full pt_regs saved ] ... [ unwind stack ] When the unwinder goes through the NMI and the IRQ on the stack, and then sees fsnotify(), it doesn't have access to the value of R10, because it only has the five IRET registers. So the unwind stops prematurely. However, because the interrupt_entry() code is careful not to clobber R10 before saving the full regs, the unwinder should be able to read R10 from the previously saved full pt_regs associated with the NMI. Handle this case properly. When encountering an IRET regs frame immediately after a full pt_regs frame, use the pt_regs as a backup which can be used to get the C register values. Also, note that a call frame resets the 'prev_regs' value, because a function is free to clobber the registers. For this fix to work, the IRET and full regs frames must be adjacent, with no FUNC frames in between. So replace the FUNC hint in interrupt_entry() with an IRET_REGS hint. Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder") Reviewed-by: Miroslav Benes Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Dave Jones Cc: Jann Horn Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Link: https://lore.kernel.org/r/97a408167cc09f1cfa0de31a7b70dd88868d743f.1587808742.git.jpoimboe@redhat.com --- arch/x86/entry/entry_64.S | 4 ++-- arch/x86/include/asm/unwind.h | 2 +- arch/x86/kernel/unwind_orc.c | 51 +++++++++++++++++++++++++++++++++---------- 3 files changed, 43 insertions(+), 14 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 9fe0d5cad8e4..3063aa9090f9 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -511,7 +511,7 @@ SYM_CODE_END(spurious_entries_start) * +----------------------------------------------------+ */ SYM_CODE_START(interrupt_entry) - UNWIND_HINT_FUNC + UNWIND_HINT_IRET_REGS offset=16 ASM_CLAC cld @@ -543,9 +543,9 @@ SYM_CODE_START(interrupt_entry) pushq 5*8(%rdi) /* regs->eflags */ pushq 4*8(%rdi) /* regs->cs */ pushq 3*8(%rdi) /* regs->ip */ + UNWIND_HINT_IRET_REGS pushq 2*8(%rdi) /* regs->orig_ax */ pushq 8(%rdi) /* return address */ - UNWIND_HINT_FUNC movq (%rdi), %rdi jmp 2f diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h index 499578f7e6d7..70fc159ebe69 100644 --- a/arch/x86/include/asm/unwind.h +++ b/arch/x86/include/asm/unwind.h @@ -19,7 +19,7 @@ struct unwind_state { #if defined(CONFIG_UNWINDER_ORC) bool signal, full_regs; unsigned long sp, bp, ip; - struct pt_regs *regs; + struct pt_regs *regs, *prev_regs; #elif defined(CONFIG_UNWINDER_FRAME_POINTER) bool got_irq; unsigned long *bp, *orig_sp, ip; diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 33b80a7f998f..0ebc11a8bb45 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -384,9 +384,38 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr return true; } +/* + * If state->regs is non-NULL, and points to a full pt_regs, just get the reg + * value from state->regs. + * + * Otherwise, if state->regs just points to IRET regs, and the previous frame + * had full regs, it's safe to get the value from the previous regs. This can + * happen when early/late IRQ entry code gets interrupted by an NMI. + */ +static bool get_reg(struct unwind_state *state, unsigned int reg_off, + unsigned long *val) +{ + unsigned int reg = reg_off/8; + + if (!state->regs) + return false; + + if (state->full_regs) { + *val = ((unsigned long *)state->regs)[reg]; + return true; + } + + if (state->prev_regs) { + *val = ((unsigned long *)state->prev_regs)[reg]; + return true; + } + + return false; +} + bool unwind_next_frame(struct unwind_state *state) { - unsigned long ip_p, sp, orig_ip = state->ip, prev_sp = state->sp; + unsigned long ip_p, sp, tmp, orig_ip = state->ip, prev_sp = state->sp; enum stack_type prev_type = state->stack_info.type; struct orc_entry *orc; bool indirect = false; @@ -448,39 +477,35 @@ bool unwind_next_frame(struct unwind_state *state) break; case ORC_REG_R10: - if (!state->regs || !state->full_regs) { + if (!get_reg(state, offsetof(struct pt_regs, r10), &sp)) { orc_warn_current("missing R10 value at %pB\n", (void *)state->ip); goto err; } - sp = state->regs->r10; break; case ORC_REG_R13: - if (!state->regs || !state->full_regs) { + if (!get_reg(state, offsetof(struct pt_regs, r13), &sp)) { orc_warn_current("missing R13 value at %pB\n", (void *)state->ip); goto err; } - sp = state->regs->r13; break; case ORC_REG_DI: - if (!state->regs || !state->full_regs) { + if (!get_reg(state, offsetof(struct pt_regs, di), &sp)) { orc_warn_current("missing RDI value at %pB\n", (void *)state->ip); goto err; } - sp = state->regs->di; break; case ORC_REG_DX: - if (!state->regs || !state->full_regs) { + if (!get_reg(state, offsetof(struct pt_regs, dx), &sp)) { orc_warn_current("missing DX value at %pB\n", (void *)state->ip); goto err; } - sp = state->regs->dx; break; default: @@ -507,6 +532,7 @@ bool unwind_next_frame(struct unwind_state *state) state->sp = sp; state->regs = NULL; + state->prev_regs = NULL; state->signal = false; break; @@ -518,6 +544,7 @@ bool unwind_next_frame(struct unwind_state *state) } state->regs = (struct pt_regs *)sp; + state->prev_regs = NULL; state->full_regs = true; state->signal = true; break; @@ -529,6 +556,8 @@ bool unwind_next_frame(struct unwind_state *state) goto err; } + if (state->full_regs) + state->prev_regs = state->regs; state->regs = (void *)sp - IRET_FRAME_OFFSET; state->full_regs = false; state->signal = true; @@ -543,8 +572,8 @@ bool unwind_next_frame(struct unwind_state *state) /* Find BP: */ switch (orc->bp_reg) { case ORC_REG_UNDEFINED: - if (state->regs && state->full_regs) - state->bp = state->regs->bp; + if (get_reg(state, offsetof(struct pt_regs, bp), &tmp)) + state->bp = tmp; break; case ORC_REG_PREV_SP: -- cgit v1.2.3-59-g8ed1b From 53fb6e990d782ded62d7c76d566e107c03393b74 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Sat, 25 Apr 2020 14:19:01 -0500 Subject: objtool: Fix infinite loop in for_offset_range() Randy reported that objtool got stuck in an infinite loop when processing drivers/i2c/busses/i2c-parport.o. It was caused by the following code: 00000000000001fd : 1fd: 48 b8 00 00 00 00 00 movabs $0x0,%rax 204: 00 00 00 1ff: R_X86_64_64 .rodata-0x8 207: 41 55 push %r13 209: 41 89 f5 mov %esi,%r13d 20c: 41 54 push %r12 20e: 49 89 fc mov %rdi,%r12 211: 55 push %rbp 212: 48 89 d5 mov %rdx,%rbp 215: 53 push %rbx 216: 0f b6 5a 01 movzbl 0x1(%rdx),%ebx 21a: 48 8d 34 dd 00 00 00 lea 0x0(,%rbx,8),%rsi 221: 00 21e: R_X86_64_32S .rodata 222: 48 89 f1 mov %rsi,%rcx 225: 48 29 c1 sub %rax,%rcx find_jump_table() saw the .rodata reference and tried to find a jump table associated with it (though there wasn't one). The -0x8 rela addend is unusual. It caused find_jump_table() to send a negative table_offset (unsigned 0xfffffffffffffff8) to find_rela_by_dest(). The negative offset should have been harmless, but it actually threw for_offset_range() for a loop... literally. When the mask value got incremented past the end value, it also wrapped to zero, causing the loop exit condition to remain true forever. Prevent this scenario from happening by ensuring the incremented value is always >= the starting value. Fixes: 74b873e49d92 ("objtool: Optimize find_rela_by_dest_range()") Reported-by: Randy Dunlap Tested-by: Randy Dunlap Acked-by: Randy Dunlap Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Julien Thierry Cc: Miroslav Benes Cc: Peter Zijlstra Link: https://lore.kernel.org/r/02b719674b031800b61e33c30b2e823183627c19.1587842122.git.jpoimboe@redhat.com --- tools/objtool/elf.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index ebbb10c61e24..c227a2e55751 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -87,9 +87,10 @@ struct elf { #define OFFSET_STRIDE (1UL << OFFSET_STRIDE_BITS) #define OFFSET_STRIDE_MASK (~(OFFSET_STRIDE - 1)) -#define for_offset_range(_offset, _start, _end) \ - for (_offset = ((_start) & OFFSET_STRIDE_MASK); \ - _offset <= ((_end) & OFFSET_STRIDE_MASK); \ +#define for_offset_range(_offset, _start, _end) \ + for (_offset = ((_start) & OFFSET_STRIDE_MASK); \ + _offset >= ((_start) & OFFSET_STRIDE_MASK) && \ + _offset <= ((_end) & OFFSET_STRIDE_MASK); \ _offset += OFFSET_STRIDE) static inline u32 sec_offset_hash(struct section *sec, unsigned long offset) -- cgit v1.2.3-59-g8ed1b From 8aebfffacfa379ba400da573a5bf9e49634e38cb Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Sat, 25 Apr 2020 20:52:26 +0800 Subject: configfs: fix config_item refcnt leak in configfs_rmdir() configfs_rmdir() invokes configfs_get_config_item(), which returns a reference of the specified config_item object to "parent_item" with increased refcnt. When configfs_rmdir() returns, local variable "parent_item" becomes invalid, so the refcount should be decreased to keep refcount balanced. The reference counting issue happens in one exception handling path of configfs_rmdir(). When down_write_killable() fails, the function forgets to decrease the refcnt increased by configfs_get_config_item(), causing a refcnt leak. Fix this issue by calling config_item_put() when down_write_killable() fails. Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Signed-off-by: Christoph Hellwig --- fs/configfs/dir.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index cf7b7e1d5bd7..cb733652ecca 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1519,6 +1519,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) spin_lock(&configfs_dirent_lock); configfs_detach_rollback(dentry); spin_unlock(&configfs_dirent_lock); + config_item_put(parent_item); return -EINTR; } frag->frag_dead = true; -- cgit v1.2.3-59-g8ed1b From 67321e02fb2da91a6a6c3fb059bf89d10ccda8ad Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 21 Apr 2020 04:18:15 +0000 Subject: phy: qcom-qusb2: Re add "qcom,sdm845-qusb2-phy" compat string This patch fixes a regression in 5.7-rc1+ In commit 8fe75cd4cddf ("phy: qcom-qusb2: Add generic QUSB2 V2 PHY support"), the change was made to add "qcom,qusb2-v2-phy" as a generic compat string. However the change also removed the "qcom,sdm845-qusb2-phy" compat string, which is documented in the binding and already in use. This patch re-adds the "qcom,sdm845-qusb2-phy" compat string which allows the driver to continue to work with existing dts entries such as found on the db845c. Cc: Andy Gross Cc: Bjorn Andersson Cc: Rob Herring Cc: Mark Rutland Cc: Doug Anderson Cc: Manu Gautam Cc: Sandeep Maheswaram Cc: Matthias Kaehlcke Cc: Stephen Boyd Cc: Kishon Vijay Abraham I Cc: linux-arm-msm@vger.kernel.org Cc: devicetree@vger.kernel.org Reviewed-by: Stephen Boyd Reviewed-by: Douglas Anderson Reviewed-by: Bjorn Andersson Fixes: 8fe75cd4cddf ("phy: qcom-qusb2: Add generic QUSB2 V2 PHY support") Reported-by: YongQin Liu Signed-off-by: John Stultz Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/qualcomm/phy-qcom-qusb2.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qusb2.c b/drivers/phy/qualcomm/phy-qcom-qusb2.c index 3708d43b7508..393011a05b48 100644 --- a/drivers/phy/qualcomm/phy-qcom-qusb2.c +++ b/drivers/phy/qualcomm/phy-qcom-qusb2.c @@ -815,6 +815,13 @@ static const struct of_device_id qusb2_phy_of_match_table[] = { }, { .compatible = "qcom,msm8998-qusb2-phy", .data = &msm8998_phy_cfg, + }, { + /* + * Deprecated. Only here to support legacy device + * trees that didn't include "qcom,qusb2-v2-phy" + */ + .compatible = "qcom,sdm845-qusb2-phy", + .data = &qusb2_v2_phy_cfg, }, { .compatible = "qcom,qusb2-v2-phy", .data = &qusb2_v2_phy_cfg, -- cgit v1.2.3-59-g8ed1b From 31c9590ae468478fe47dc0f5f0d3562b2f69450e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 18 Apr 2020 21:06:23 -0400 Subject: SUNRPC: Add "@len" parameter to gss_unwrap() Refactor: This is a pre-requisite to fixing the client-side ralign computation in gss_unwrap_resp_priv(). The length value is passed in explicitly rather that as the value of buf->len. This will subsequently allow gss_unwrap_kerberos_v1() to compute a slack and align value, instead of computing it in gss_unwrap_resp_priv(). Fixes: 35e77d21baa0 ("SUNRPC: Add rpc_auth::au_ralign field") Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_api.h | 2 ++ include/linux/sunrpc/gss_krb5.h | 6 +++--- net/sunrpc/auth_gss/auth_gss.c | 4 ++-- net/sunrpc/auth_gss/gss_krb5_crypto.c | 8 ++++---- net/sunrpc/auth_gss/gss_krb5_wrap.c | 26 +++++++++++++++----------- net/sunrpc/auth_gss/gss_mech_switch.c | 3 ++- net/sunrpc/auth_gss/svcauth_gss.c | 8 ++------ 7 files changed, 30 insertions(+), 27 deletions(-) diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index 48c1b1674cbf..e9a79518d652 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -66,6 +66,7 @@ u32 gss_wrap( u32 gss_unwrap( struct gss_ctx *ctx_id, int offset, + int len, struct xdr_buf *inbuf); u32 gss_delete_sec_context( struct gss_ctx **ctx_id); @@ -126,6 +127,7 @@ struct gss_api_ops { u32 (*gss_unwrap)( struct gss_ctx *ctx_id, int offset, + int len, struct xdr_buf *buf); void (*gss_delete_sec_context)( void *internal_ctx_id); diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index c1d77dd8ed41..e8f8ffe7448b 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -83,7 +83,7 @@ struct gss_krb5_enctype { u32 (*encrypt_v2) (struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, struct page **pages); /* v2 encryption function */ - u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset, + u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset, u32 len, struct xdr_buf *buf, u32 *headskip, u32 *tailskip); /* v2 decryption function */ }; @@ -255,7 +255,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx_id, int offset, struct xdr_buf *outbuf, struct page **pages); u32 -gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset, +gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset, int len, struct xdr_buf *buf); @@ -312,7 +312,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset, struct page **pages); u32 -gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, +gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len, struct xdr_buf *buf, u32 *plainoffset, u32 *plainlen); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 25fbd8d9de74..7885f37e3688 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -2043,9 +2043,9 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred, offset = (u8 *)(p) - (u8 *)head->iov_base; if (offset + opaque_len > rcv_buf->len) goto unwrap_failed; - rcv_buf->len = offset + opaque_len; - maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf); + maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, + offset + opaque_len, rcv_buf); if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat != GSS_S_COMPLETE) diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 6f2d30d7b766..e7180da1fc6a 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -851,8 +851,8 @@ out_err: } u32 -gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, - u32 *headskip, u32 *tailskip) +gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len, + struct xdr_buf *buf, u32 *headskip, u32 *tailskip) { struct xdr_buf subbuf; u32 ret = 0; @@ -881,7 +881,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, /* create a segment skipping the header and leaving out the checksum */ xdr_buf_subsegment(buf, &subbuf, offset + GSS_KRB5_TOK_HDR_LEN, - (buf->len - offset - GSS_KRB5_TOK_HDR_LEN - + (len - offset - GSS_KRB5_TOK_HDR_LEN - kctx->gk5e->cksumlength)); nblocks = (subbuf.len + blocksize - 1) / blocksize; @@ -926,7 +926,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, goto out_err; /* Get the packet's hmac value */ - ret = read_bytes_from_xdr_buf(buf, buf->len - kctx->gk5e->cksumlength, + ret = read_bytes_from_xdr_buf(buf, len - kctx->gk5e->cksumlength, pkt_hmac, kctx->gk5e->cksumlength); if (ret) goto out_err; diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 6c1920eed771..c7589e35d5d9 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -261,7 +261,8 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset, } static u32 -gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) +gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, int len, + struct xdr_buf *buf) { int signalg; int sealalg; @@ -284,7 +285,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) ptr = (u8 *)buf->head[0].iov_base + offset; if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr, - buf->len - offset)) + len - offset)) return GSS_S_DEFECTIVE_TOKEN; if ((ptr[0] != ((KG_TOK_WRAP_MSG >> 8) & 0xff)) || @@ -324,6 +325,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) (!kctx->initiate && direction != 0)) return GSS_S_BAD_SIG; + buf->len = len; if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) { struct crypto_sync_skcipher *cipher; int err; @@ -376,7 +378,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start; memmove(orig_start, data_start, data_len); buf->head[0].iov_len -= (data_start - orig_start); - buf->len -= (data_start - orig_start); + buf->len = len - (data_start - orig_start); if (gss_krb5_remove_padding(buf, blocksize)) return GSS_S_DEFECTIVE_TOKEN; @@ -486,7 +488,8 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset, } static u32 -gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) +gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, int len, + struct xdr_buf *buf) { time64_t now; u8 *ptr; @@ -532,7 +535,7 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) if (rrc != 0) rotate_left(offset + 16, buf, rrc); - err = (*kctx->gk5e->decrypt_v2)(kctx, offset, buf, + err = (*kctx->gk5e->decrypt_v2)(kctx, offset, len, buf, &headskip, &tailskip); if (err) return GSS_S_FAILURE; @@ -542,7 +545,7 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) * it against the original */ err = read_bytes_from_xdr_buf(buf, - buf->len - GSS_KRB5_TOK_HDR_LEN - tailskip, + len - GSS_KRB5_TOK_HDR_LEN - tailskip, decrypted_hdr, GSS_KRB5_TOK_HDR_LEN); if (err) { dprintk("%s: error %u getting decrypted_hdr\n", __func__, err); @@ -568,14 +571,14 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) * Note that buf->head[0].iov_len may indicate the available * head buffer space rather than that actually occupied. */ - movelen = min_t(unsigned int, buf->head[0].iov_len, buf->len); + movelen = min_t(unsigned int, buf->head[0].iov_len, len); movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip; if (offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen > buf->head[0].iov_len) return GSS_S_FAILURE; memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen); buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip; - buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip; + buf->len = len - GSS_KRB5_TOK_HDR_LEN + headskip; /* Trim off the trailing "extra count" and checksum blob */ buf->len -= ec + GSS_KRB5_TOK_HDR_LEN + tailskip; @@ -603,7 +606,8 @@ gss_wrap_kerberos(struct gss_ctx *gctx, int offset, } u32 -gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf) +gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, + int len, struct xdr_buf *buf) { struct krb5_ctx *kctx = gctx->internal_ctx_id; @@ -613,9 +617,9 @@ gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf) case ENCTYPE_DES_CBC_RAW: case ENCTYPE_DES3_CBC_RAW: case ENCTYPE_ARCFOUR_HMAC: - return gss_unwrap_kerberos_v1(kctx, offset, buf); + return gss_unwrap_kerberos_v1(kctx, offset, len, buf); case ENCTYPE_AES128_CTS_HMAC_SHA1_96: case ENCTYPE_AES256_CTS_HMAC_SHA1_96: - return gss_unwrap_kerberos_v2(kctx, offset, buf); + return gss_unwrap_kerberos_v2(kctx, offset, len, buf); } } diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index db550bfc2642..69316ab1b9fa 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -411,10 +411,11 @@ gss_wrap(struct gss_ctx *ctx_id, u32 gss_unwrap(struct gss_ctx *ctx_id, int offset, + int len, struct xdr_buf *buf) { return ctx_id->mech_type->gm_ops - ->gss_unwrap(ctx_id, offset, buf); + ->gss_unwrap(ctx_id, offset, len, buf); } diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 54ae5be62f6a..d0a2f084e5a4 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -934,7 +934,7 @@ static int unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) { u32 priv_len, maj_stat; - int pad, saved_len, remaining_len, offset; + int pad, remaining_len, offset; clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); @@ -954,12 +954,8 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs buf->len -= pad; fix_priv_head(buf, pad); - /* Maybe it would be better to give gss_unwrap a length parameter: */ - saved_len = buf->len; - buf->len = priv_len; - maj_stat = gss_unwrap(ctx, 0, buf); + maj_stat = gss_unwrap(ctx, 0, priv_len, buf); pad = priv_len - buf->len; - buf->len = saved_len; buf->len -= pad; /* The upper layers assume the buffer is aligned on 4-byte boundaries. * In the krb5p case, at least, the data ends up offset, so we need to -- cgit v1.2.3-59-g8ed1b From a7e429a6fa6d612d1dacde96c885dc1bb4a9f400 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 18 Apr 2020 14:38:19 -0400 Subject: SUNRPC: Fix GSS privacy computation of auth->au_ralign When the au_ralign field was added to gss_unwrap_resp_priv, the wrong calculation was used. Setting au_rslack == au_ralign is probably correct for kerberos_v1 privacy, but kerberos_v2 privacy adds additional GSS data after the clear text RPC message. au_ralign needs to be smaller than au_rslack in that fairly common case. When xdr_buf_trim() is restored to gss_unwrap_kerberos_v2(), it does exactly what I feared it would: it trims off part of the clear text RPC message. However, that's because rpc_prepare_reply_pages() does not set up the rq_rcv_buf's tail correctly because au_ralign is too large. Fixing the au_ralign computation also corrects the alignment of rq_rcv_buf->pages so that the client does not have to shift reply data payloads after they are received. Fixes: 35e77d21baa0 ("SUNRPC: Add rpc_auth::au_ralign field") Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_api.h | 1 + net/sunrpc/auth_gss/auth_gss.c | 8 +++----- net/sunrpc/auth_gss/gss_krb5_wrap.c | 19 +++++++++++++++---- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index e9a79518d652..bc07e51f20d1 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -21,6 +21,7 @@ struct gss_ctx { struct gss_api_mech *mech_type; void *internal_ctx_id; + unsigned int slack, align; }; #define GSS_C_NO_BUFFER ((struct xdr_netobj) 0) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 7885f37e3688..ac5cac0dd24b 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -2032,7 +2032,6 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred, struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; struct kvec *head = rqstp->rq_rcv_buf.head; struct rpc_auth *auth = cred->cr_auth; - unsigned int savedlen = rcv_buf->len; u32 offset, opaque_len, maj_stat; __be32 *p; @@ -2059,10 +2058,9 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred, */ xdr_init_decode(xdr, rcv_buf, p, rqstp); - auth->au_rslack = auth->au_verfsize + 2 + - XDR_QUADLEN(savedlen - rcv_buf->len); - auth->au_ralign = auth->au_verfsize + 2 + - XDR_QUADLEN(savedlen - rcv_buf->len); + auth->au_rslack = auth->au_verfsize + 2 + ctx->gc_gss_ctx->slack; + auth->au_ralign = auth->au_verfsize + 2 + ctx->gc_gss_ctx->align; + return 0; unwrap_failed: trace_rpcgss_unwrap_failed(task); diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index c7589e35d5d9..4905652e7567 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -262,7 +262,8 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset, static u32 gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, int len, - struct xdr_buf *buf) + struct xdr_buf *buf, unsigned int *slack, + unsigned int *align) { int signalg; int sealalg; @@ -280,6 +281,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, int len, u32 conflen = kctx->gk5e->conflen; int crypt_offset; u8 *cksumkey; + unsigned int saved_len = buf->len; dprintk("RPC: gss_unwrap_kerberos\n"); @@ -383,6 +385,10 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, int len, if (gss_krb5_remove_padding(buf, blocksize)) return GSS_S_DEFECTIVE_TOKEN; + /* slack must include room for krb5 padding */ + *slack = XDR_QUADLEN(saved_len - buf->len); + /* The GSS blob always precedes the RPC message payload */ + *align = *slack; return GSS_S_COMPLETE; } @@ -489,7 +495,8 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset, static u32 gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, int len, - struct xdr_buf *buf) + struct xdr_buf *buf, unsigned int *slack, + unsigned int *align) { time64_t now; u8 *ptr; @@ -583,6 +590,8 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, int len, /* Trim off the trailing "extra count" and checksum blob */ buf->len -= ec + GSS_KRB5_TOK_HDR_LEN + tailskip; + *align = XDR_QUADLEN(GSS_KRB5_TOK_HDR_LEN + headskip); + *slack = *align + XDR_QUADLEN(ec + GSS_KRB5_TOK_HDR_LEN + tailskip); return GSS_S_COMPLETE; } @@ -617,9 +626,11 @@ gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, case ENCTYPE_DES_CBC_RAW: case ENCTYPE_DES3_CBC_RAW: case ENCTYPE_ARCFOUR_HMAC: - return gss_unwrap_kerberos_v1(kctx, offset, len, buf); + return gss_unwrap_kerberos_v1(kctx, offset, len, buf, + &gctx->slack, &gctx->align); case ENCTYPE_AES128_CTS_HMAC_SHA1_96: case ENCTYPE_AES256_CTS_HMAC_SHA1_96: - return gss_unwrap_kerberos_v2(kctx, offset, len, buf); + return gss_unwrap_kerberos_v2(kctx, offset, len, buf, + &gctx->slack, &gctx->align); } } -- cgit v1.2.3-59-g8ed1b From 0a8e7b7d08466b5fc52f8e96070acc116d82a8bb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 15 Apr 2020 17:36:22 -0400 Subject: SUNRPC: Revert 241b1f419f0e ("SUNRPC: Remove xdr_buf_trim()") I've noticed that when krb5i or krb5p security is in use, retransmitted requests are missing the server's duplicate reply cache. The computed checksum on the retransmitted request does not match the cached checksum, resulting in the server performing the retransmitted request again instead of returning the cached reply. The assumptions made when removing xdr_buf_trim() were not correct. In the send paths, the upper layer has already set the segment lengths correctly, and shorting the buffer's content is simply a matter of reducing buf->len. xdr_buf_trim() is the right answer in the receive/unwrap path on both the client and the server. The buffer segment lengths have to be shortened one-by-one. On the server side in particular, head.iov_len needs to be updated correctly to enable nfsd_cache_csum() to work correctly. The simple buf->len computation doesn't do that, and that results in checksumming stale data in the buffer. The problem isn't noticed until there's significant instability of the RPC transport. At that point, the reliability of retransmit detection on the server becomes crucial. Fixes: 241b1f419f0e ("SUNRPC: Remove xdr_buf_trim()") Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 1 + net/sunrpc/auth_gss/gss_krb5_wrap.c | 7 +++---- net/sunrpc/auth_gss/svcauth_gss.c | 2 +- net/sunrpc/xdr.c | 41 +++++++++++++++++++++++++++++++++++++ 4 files changed, 46 insertions(+), 5 deletions(-) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 01bb41908c93..22c207b2425f 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -184,6 +184,7 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) extern void xdr_shift_buf(struct xdr_buf *, size_t); extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); +extern void xdr_buf_trim(struct xdr_buf *, unsigned int); extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 4905652e7567..cf0fd170ac18 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -580,15 +580,14 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, int len, */ movelen = min_t(unsigned int, buf->head[0].iov_len, len); movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip; - if (offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen > - buf->head[0].iov_len) - return GSS_S_FAILURE; + BUG_ON(offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen > + buf->head[0].iov_len); memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen); buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip; buf->len = len - GSS_KRB5_TOK_HDR_LEN + headskip; /* Trim off the trailing "extra count" and checksum blob */ - buf->len -= ec + GSS_KRB5_TOK_HDR_LEN + tailskip; + xdr_buf_trim(buf, ec + GSS_KRB5_TOK_HDR_LEN + tailskip); *align = XDR_QUADLEN(GSS_KRB5_TOK_HDR_LEN + headskip); *slack = *align + XDR_QUADLEN(ec + GSS_KRB5_TOK_HDR_LEN + tailskip); diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index d0a2f084e5a4..50d93c49ef1a 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -906,7 +906,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g if (svc_getnl(&buf->head[0]) != seq) goto out; /* trim off the mic and padding at the end before returning */ - buf->len -= 4 + round_up_to_quad(mic.len); + xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4); stat = 0; out: kfree(mic.data); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 15b58c5144f9..6f7d82fb1eb0 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1150,6 +1150,47 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, } EXPORT_SYMBOL_GPL(xdr_buf_subsegment); +/** + * xdr_buf_trim - lop at most "len" bytes off the end of "buf" + * @buf: buf to be trimmed + * @len: number of bytes to reduce "buf" by + * + * Trim an xdr_buf by the given number of bytes by fixing up the lengths. Note + * that it's possible that we'll trim less than that amount if the xdr_buf is + * too small, or if (for instance) it's all in the head and the parser has + * already read too far into it. + */ +void xdr_buf_trim(struct xdr_buf *buf, unsigned int len) +{ + size_t cur; + unsigned int trim = len; + + if (buf->tail[0].iov_len) { + cur = min_t(size_t, buf->tail[0].iov_len, trim); + buf->tail[0].iov_len -= cur; + trim -= cur; + if (!trim) + goto fix_len; + } + + if (buf->page_len) { + cur = min_t(unsigned int, buf->page_len, trim); + buf->page_len -= cur; + trim -= cur; + if (!trim) + goto fix_len; + } + + if (buf->head[0].iov_len) { + cur = min_t(size_t, buf->head[0].iov_len, trim); + buf->head[0].iov_len -= cur; + trim -= cur; + } +fix_len: + buf->len -= (len - trim); +} +EXPORT_SYMBOL_GPL(xdr_buf_trim); + static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len) { unsigned int this_len; -- cgit v1.2.3-59-g8ed1b From 18f02ad19e2c2a1d9e1d55a4e1c0cbf51419151c Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Sun, 26 Apr 2020 11:35:15 +0800 Subject: bpf: Fix sk_psock refcnt leak when receiving message tcp_bpf_recvmsg() invokes sk_psock_get(), which returns a reference of the specified sk_psock object to "psock" with increased refcnt. When tcp_bpf_recvmsg() returns, local variable "psock" becomes invalid, so the refcount should be decreased to keep refcount balanced. The reference counting issue happens in several exception handling paths of tcp_bpf_recvmsg(). When those error scenarios occur such as "flags" includes MSG_ERRQUEUE, the function forgets to decrease the refcnt increased by sk_psock_get(), causing a refcnt leak. Fix this issue by calling sk_psock_put() or pulling up the error queue read handling when those error scenarios occur. Fixes: e7a5f1f1cd000 ("bpf/sockmap: Read psock ingress_msg before sk_receive_queue") Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/1587872115-42805-1-git-send-email-xiyuyang19@fudan.edu.cn --- net/ipv4/tcp_bpf.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 5a05327f97c1..ff96466ea6da 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -262,14 +262,17 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, struct sk_psock *psock; int copied, ret; + if (unlikely(flags & MSG_ERRQUEUE)) + return inet_recv_error(sk, msg, len, addr_len); + psock = sk_psock_get(sk); if (unlikely(!psock)) return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); - if (unlikely(flags & MSG_ERRQUEUE)) - return inet_recv_error(sk, msg, len, addr_len); if (!skb_queue_empty(&sk->sk_receive_queue) && - sk_psock_queue_empty(psock)) + sk_psock_queue_empty(psock)) { + sk_psock_put(sk, psock); return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + } lock_sock(sk); msg_bytes_ready: copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags); -- cgit v1.2.3-59-g8ed1b From 66bb7fa81e28514ffd2cee9d07526fbb7484c029 Mon Sep 17 00:00:00 2001 From: Brian King Date: Mon, 27 Apr 2020 16:48:24 -0500 Subject: scsi: ibmvfc: Don't send implicit logouts prior to NPIV login Commit ed830385a2b1 ("scsi: ibmvfc: Avoid loss of all paths during SVC node reboot") introduced a regression where when the client resets or re-enables its CRQ with the hypervisor there is a chance that if the server side doesn't issue its INIT handshake quick enough the client can issue an Implicit Logout prior to doing an NPIV Login. The server treats this scenario as a protocol violation and closes the CRQ on its end forcing the client through a reset that gets the client host state and next host action out of agreement leading to a BUG assert. ibmvfc 30000003: Partner initialization complete ibmvfc 30000002: Partner initialization complete ibmvfc 30000002: Host partner adapter deregistered or failed (rc=2) ibmvfc 30000002: Partner initialized ------------[ cut here ]------------ kernel BUG at ../drivers/scsi/ibmvscsi/ibmvfc.c:4489! Oops: Exception in kernel mode, sig: 5 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries Supported: No, Unreleased kernel CPU: 16 PID: 1290 Comm: ibmvfc_0 Tainted: G OE X 5.3.18-12-default NIP: c00800000d84a2b4 LR: c00800000d84a040 CTR: c00800000d84a2a0 REGS: c00000000cb57a00 TRAP: 0700 Tainted: G OE X (5.3.18-12-default) MSR: 800000000282b033 CR: 24000848 XER: 00000001 CFAR: c00800000d84a070 IRQMASK: 1 GPR00: c00800000d84a040 c00000000cb57c90 c00800000d858e00 0000000000000000 GPR04: 0000000000000000 0000000000000000 0000000000000000 00000000000000a0 GPR08: c00800000d84a074 0000000000000001 0000000000000014 c00800000d84d7d0 GPR12: 0000000000000000 c00000001ea28200 c00000000016cd98 0000000000000000 GPR16: c00800000d84b7b8 0000000000000000 0000000000000000 c00000542c706d68 GPR20: 0000000000000005 c00000542c706d88 5deadbeef0000100 5deadbeef0000122 GPR24: 000000000000000c 000000000000000b c00800000d852180 0000000000000001 GPR28: 0000000000000000 c00000542c706da0 c00000542c706860 c00000542c706828 NIP [c00800000d84a2b4] ibmvfc_work+0x3ac/0xc90 [ibmvfc] LR [c00800000d84a040] ibmvfc_work+0x138/0xc90 [ibmvfc] This scenario can be prevented by rejecting any attempt to send an Implicit Logout if the client adapter is not logged in yet. Link: https://lore.kernel.org/r/20200427214824.6890-1-tyreld@linux.ibm.com Fixes: ed830385a2b1 ("scsi: ibmvfc: Avoid loss of all paths during SVC node reboot") Signed-off-by: Brian King Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 7da9e060b270..635f6f9cffc4 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -3640,6 +3640,11 @@ static void ibmvfc_tgt_implicit_logout_and_del(struct ibmvfc_target *tgt) struct ibmvfc_host *vhost = tgt->vhost; struct ibmvfc_event *evt; + if (!vhost->logged_in) { + ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT); + return; + } + if (vhost->discovery_threads >= disc_threads) return; -- cgit v1.2.3-59-g8ed1b From 501be6c1c72417eab05e7413671a38ea991a8ebc Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 25 Mar 2020 21:16:03 +0100 Subject: drm/tegra: Fix SMMU support on Tegra124 and Tegra210 When testing whether or not to enable the use of the SMMU, consult the supported DMA mask rather than the actually configured DMA mask, since the latter might already have been restricted. Fixes: 2d9384ff9177 ("drm/tegra: Relax IOMMU usage criteria on old Tegra") Tested-by: Jon Hunter Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/drm.c | 3 ++- drivers/gpu/host1x/dev.c | 13 +++++++++++++ include/linux/host1x.h | 3 +++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index bd268028fb3d..583cd6e0ae27 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1039,6 +1039,7 @@ void tegra_drm_free(struct tegra_drm *tegra, size_t size, void *virt, static bool host1x_drm_wants_iommu(struct host1x_device *dev) { + struct host1x *host1x = dev_get_drvdata(dev->dev.parent); struct iommu_domain *domain; /* @@ -1076,7 +1077,7 @@ static bool host1x_drm_wants_iommu(struct host1x_device *dev) * sufficient and whether or not the host1x is attached to an IOMMU * doesn't matter. */ - if (!domain && dma_get_mask(dev->dev.parent) <= DMA_BIT_MASK(32)) + if (!domain && host1x_get_dma_mask(host1x) <= DMA_BIT_MASK(32)) return true; return domain != NULL; diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 388bcc2889aa..40a4b9f8b861 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -502,6 +502,19 @@ static void __exit tegra_host1x_exit(void) } module_exit(tegra_host1x_exit); +/** + * host1x_get_dma_mask() - query the supported DMA mask for host1x + * @host1x: host1x instance + * + * Note that this returns the supported DMA mask for host1x, which can be + * different from the applicable DMA mask under certain circumstances. + */ +u64 host1x_get_dma_mask(struct host1x *host1x) +{ + return host1x->info->dma_mask; +} +EXPORT_SYMBOL(host1x_get_dma_mask); + MODULE_AUTHOR("Thierry Reding "); MODULE_AUTHOR("Terje Bergstrom "); MODULE_DESCRIPTION("Host1x driver for Tegra products"); diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 62d216ff1097..c230b4e70d75 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -17,9 +17,12 @@ enum host1x_class { HOST1X_CLASS_GR3D = 0x60, }; +struct host1x; struct host1x_client; struct iommu_group; +u64 host1x_get_dma_mask(struct host1x *host1x); + /** * struct host1x_client_ops - host1x client operations * @init: host1x client initialization code -- cgit v1.2.3-59-g8ed1b From 4010e729349fcab69183f338fe3743df17a473a0 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 25 Mar 2020 21:16:04 +0100 Subject: gpu: host1x: Use SMMU on Tegra124 and Tegra210 Tegra124 and Tegra210 support addressing more than 32 bits of physical memory. However, since their host1x does not support the wide GATHER opcode, they should use the SMMU if at all possible to ensure that all the system memory can be used for command buffers, irrespective of whether or not the host1x firewall is enabled. Tested-by: Jon Hunter Signed-off-by: Thierry Reding --- drivers/gpu/host1x/dev.c | 46 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 40a4b9f8b861..d24344e91922 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -192,17 +192,55 @@ static void host1x_setup_sid_table(struct host1x *host) } } +static bool host1x_wants_iommu(struct host1x *host1x) +{ + /* + * If we support addressing a maximum of 32 bits of physical memory + * and if the host1x firewall is enabled, there's no need to enable + * IOMMU support. This can happen for example on Tegra20, Tegra30 + * and Tegra114. + * + * Tegra124 and later can address up to 34 bits of physical memory and + * many platforms come equipped with more than 2 GiB of system memory, + * which requires crossing the 4 GiB boundary. But there's a catch: on + * SoCs before Tegra186 (i.e. Tegra124 and Tegra210), the host1x can + * only address up to 32 bits of memory in GATHER opcodes, which means + * that command buffers need to either be in the first 2 GiB of system + * memory (which could quickly lead to memory exhaustion), or command + * buffers need to be treated differently from other buffers (which is + * not possible with the current ABI). + * + * A third option is to use the IOMMU in these cases to make sure all + * buffers will be mapped into a 32-bit IOVA space that host1x can + * address. This allows all of the system memory to be used and works + * within the limitations of the host1x on these SoCs. + * + * In summary, default to enable IOMMU on Tegra124 and later. For any + * of the earlier SoCs, only use the IOMMU for additional safety when + * the host1x firewall is disabled. + */ + if (host1x->info->dma_mask <= DMA_BIT_MASK(32)) { + if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) + return false; + } + + return true; +} + static struct iommu_domain *host1x_iommu_attach(struct host1x *host) { struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev); int err; /* - * If the host1x firewall is enabled, there's no need to enable IOMMU - * support. Similarly, if host1x is already attached to an IOMMU (via - * the DMA API), don't try to attach again. + * We may not always want to enable IOMMU support (for example if the + * host1x firewall is already enabled and we don't support addressing + * more than 32 bits of physical memory), so check for that first. + * + * Similarly, if host1x is already attached to an IOMMU (via the DMA + * API), don't try to attach again. */ - if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) || domain) + if (!host1x_wants_iommu(host) || domain) return domain; host->group = iommu_group_get(host->dev); -- cgit v1.2.3-59-g8ed1b From 522587e7c008c24f19ef5ef34806268992c5e5a6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 7 Apr 2020 12:31:33 +0300 Subject: bus: mhi: core: Fix a NULL vs IS_ERR check in mhi_create_devices() The mhi_alloc_device() function never returns NULL, it returns error pointers. Fixes: da1c4f856924 ("bus: mhi: core: Add support for creating and destroying MHI devices") Signed-off-by: Dan Carpenter Acked-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200407093133.GM68494@mwanda Signed-off-by: Greg Kroah-Hartman --- drivers/bus/mhi/core/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c index eb4256b81406..55928feea0c9 100644 --- a/drivers/bus/mhi/core/main.c +++ b/drivers/bus/mhi/core/main.c @@ -294,7 +294,7 @@ void mhi_create_devices(struct mhi_controller *mhi_cntrl) !(mhi_chan->ee_mask & BIT(mhi_cntrl->ee))) continue; mhi_dev = mhi_alloc_device(mhi_cntrl); - if (!mhi_dev) + if (IS_ERR(mhi_dev)) return; mhi_dev->dev_type = MHI_DEVICE_XFER; -- cgit v1.2.3-59-g8ed1b From 5e56bc06e18dfc8a66180fa369384b36e2ab621a Mon Sep 17 00:00:00 2001 From: Christian Gromm Date: Fri, 24 Apr 2020 17:16:34 +0200 Subject: most: core: use function subsys_initcall() This patch replaces function module_init() with subsys_initcall(). It is needed to ensure that the core module of the driver is initialized before a component tries to register with the core. This leads to a NULL pointer dereference if the driver is configured as in-tree. Signed-off-by: Christian Gromm Reported-by: kernel test robot Link: https://lore.kernel.org/r/1587741394-22021-1-git-send-email-christian.gromm@microchip.com Signed-off-by: Greg Kroah-Hartman --- drivers/most/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/most/core.c b/drivers/most/core.c index 06426fc5c990..f781c46cd4af 100644 --- a/drivers/most/core.c +++ b/drivers/most/core.c @@ -1483,7 +1483,7 @@ static void __exit most_exit(void) ida_destroy(&mdev_id); } -module_init(most_init); +subsys_initcall(most_init); module_exit(most_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Christian Gromm "); -- cgit v1.2.3-59-g8ed1b From 8650b6099da5cab203624bf56af200191e260f68 Mon Sep 17 00:00:00 2001 From: David Gow Date: Fri, 24 Apr 2020 21:46:55 -0700 Subject: gpio: of: Build fails if CONFIG_OF_DYNAMIC enabled without CONFIG_OF_GPIO The symbol 'gpio_of_notifier' doesn't exist without both CONFIG_OF_GPIO and CONFIG_OF_DYNAMIC enabled, but is referenced when only CONFIG_OF_DYNAMIC is enabled. This broke building with 'make ARCH=um allyesconfig': --------------- /usr/bin/ld: drivers/gpio/gpiolib.o: in function `gpiolib_dev_init': ./drivers/gpio/gpiolib.c:5293: undefined reference to `gpio_of_notifier' collect2: error: ld returned 1 exit status --------------- Fixes: 63636d956c45 ("gpio: of: Add DT overlay support for GPIO hogs") Signed-off-by: David Gow Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200425044655.166257-1-davidgow@google.com Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index bf9732ab7f91..182136d98b97 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -5312,8 +5312,9 @@ static int __init gpiolib_dev_init(void) gpiolib_initialized = true; gpiochip_setup_devs(); - if (IS_ENABLED(CONFIG_OF_DYNAMIC)) - WARN_ON(of_reconfig_notifier_register(&gpio_of_notifier)); +#if IS_ENABLED(CONFIG_OF_DYNAMIC) && IS_ENABLED(CONFIG_OF_GPIO) + WARN_ON(of_reconfig_notifier_register(&gpio_of_notifier)); +#endif /* CONFIG_OF_DYNAMIC && CONFIG_OF_GPIO */ return ret; } -- cgit v1.2.3-59-g8ed1b From 0cf253eed5d2bdf7bb3152457b38f39b012955f7 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Mon, 27 Apr 2020 17:26:05 -0600 Subject: gpio: tegra: mask GPIO IRQs during IRQ shutdown The driver currently leaves GPIO IRQs unmasked even when the GPIO IRQ client has released the GPIO IRQ. This allows the HW to raise IRQs, and SW to process them, after shutdown. Fix this by masking the IRQ when it's shut down. This is usually taken care of by the irqchip core, but since this driver has a custom irq_shutdown implementation, it must do this explicitly itself. Signed-off-by: Stephen Warren Link: https://lore.kernel.org/r/20200427232605.11608-1-swarren@wwwdotorg.org Signed-off-by: Linus Walleij --- drivers/gpio/gpio-tegra.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c index acb99eff9939..86568154cdb3 100644 --- a/drivers/gpio/gpio-tegra.c +++ b/drivers/gpio/gpio-tegra.c @@ -368,6 +368,7 @@ static void tegra_gpio_irq_shutdown(struct irq_data *d) struct tegra_gpio_info *tgi = bank->tgi; unsigned int gpio = d->hwirq; + tegra_gpio_irq_mask(d); gpiochip_unlock_as_irq(&tgi->gc, gpio); } -- cgit v1.2.3-59-g8ed1b From 9495b7e92f716ab2bd6814fab5e97ab4a39adfdd Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 22 Apr 2020 12:09:54 +0200 Subject: driver core: platform: Initialize dma_parms for platform devices It's currently the platform driver's responsibility to initialize the pointer, dma_parms, for its corresponding struct device. The benefit with this approach allows us to avoid the initialization and to not waste memory for the struct device_dma_parameters, as this can be decided on a case by case basis. However, it has turned out that this approach is not very practical. Not only does it lead to open coding, but also to real errors. In principle callers of dma_set_max_seg_size() doesn't check the error code, but just assumes it succeeds. For these reasons, let's do the initialization from the common platform bus at the device registration point. This also follows the way the PCI devices are being managed, see pci_device_add(). Suggested-by: Christoph Hellwig Cc: Tested-by: Haibo Chen Reviewed-by: Arnd Bergmann Signed-off-by: Ulf Hansson Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20200422100954.31211-1-ulf.hansson@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 2 ++ include/linux/platform_device.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 5255550b7c34..b27d0f6c18c9 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -380,6 +380,8 @@ struct platform_object { */ static void setup_pdev_dma_masks(struct platform_device *pdev) { + pdev->dev.dma_parms = &pdev->dma_parms; + if (!pdev->dev.coherent_dma_mask) pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32); if (!pdev->dev.dma_mask) { diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index bdc35753ef7c..77a2aada106d 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -25,6 +25,7 @@ struct platform_device { bool id_auto; struct device dev; u64 platform_dma_mask; + struct device_dma_parameters dma_parms; u32 num_resources; struct resource *resource; -- cgit v1.2.3-59-g8ed1b From f458488425f1cc9a396aa1d09bb00c48783936da Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 22 Apr 2020 12:10:13 +0200 Subject: amba: Initialize dma_parms for amba devices It's currently the amba driver's responsibility to initialize the pointer, dma_parms, for its corresponding struct device. The benefit with this approach allows us to avoid the initialization and to not waste memory for the struct device_dma_parameters, as this can be decided on a case by case basis. However, it has turned out that this approach is not very practical. Not only does it lead to open coding, but also to real errors. In principle callers of dma_set_max_seg_size() doesn't check the error code, but just assumes it succeeds. For these reasons, let's do the initialization from the common amba bus at the device registration point. This also follows the way the PCI devices are being managed, see pci_device_add(). Suggested-by: Christoph Hellwig Cc: Russell King Cc: Tested-by: Haibo Chen Reviewed-by: Arnd Bergmann Signed-off-by: Ulf Hansson Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20200422101013.31267-1-ulf.hansson@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/amba/bus.c | 1 + include/linux/amba/bus.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index fe1523664816..8558b629880b 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -645,6 +645,7 @@ static void amba_device_initialize(struct amba_device *dev, const char *name) dev->dev.release = amba_device_release; dev->dev.bus = &amba_bustype; dev->dev.dma_mask = &dev->dev.coherent_dma_mask; + dev->dev.dma_parms = &dev->dma_parms; dev->res.name = dev_name(&dev->dev); } diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 26f0ecf401ea..0bbfd647f5c6 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -65,6 +65,7 @@ struct amba_device { struct device dev; struct resource res; struct clk *pclk; + struct device_dma_parameters dma_parms; unsigned int periphid; unsigned int cid; struct amba_cs_uci_id uci; -- cgit v1.2.3-59-g8ed1b From 3740d93e37902b31159a82da2d5c8812ed825404 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Thu, 16 Apr 2020 16:28:59 +0000 Subject: coredump: fix crash when umh is disabled Commit 64e90a8acb859 ("Introduce STATIC_USERMODEHELPER to mediate call_usermodehelper()") added the optiont to disable all call_usermodehelper() calls by setting STATIC_USERMODEHELPER_PATH to an empty string. When this is done, and crashdump is triggered, it will crash on null pointer dereference, since we make assumptions over what call_usermodehelper_exec() did. This has been reported by Sergey when one triggers a a coredump with the following configuration: ``` CONFIG_STATIC_USERMODEHELPER=y CONFIG_STATIC_USERMODEHELPER_PATH="" kernel.core_pattern = |/usr/lib/systemd/systemd-coredump %P %u %g %s %t %c %h %e ``` The way disabling the umh was designed was that call_usermodehelper_exec() would just return early, without an error. But coredump assumes certain variables are set up for us when this happens, and calls ile_start_write(cprm.file) with a NULL file. [ 2.819676] BUG: kernel NULL pointer dereference, address: 0000000000000020 [ 2.819859] #PF: supervisor read access in kernel mode [ 2.820035] #PF: error_code(0x0000) - not-present page [ 2.820188] PGD 0 P4D 0 [ 2.820305] Oops: 0000 [#1] SMP PTI [ 2.820436] CPU: 2 PID: 89 Comm: a Not tainted 5.7.0-rc1+ #7 [ 2.820680] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190711_202441-buildvm-armv7-10.arm.fedoraproject.org-2.fc31 04/01/2014 [ 2.821150] RIP: 0010:do_coredump+0xd80/0x1060 [ 2.821385] Code: e8 95 11 ed ff 48 c7 c6 cc a7 b4 81 48 8d bd 28 ff ff ff 89 c2 e8 70 f1 ff ff 41 89 c2 85 c0 0f 84 72 f7 ff ff e9 b4 fe ff ff <48> 8b 57 20 0f b7 02 66 25 00 f0 66 3d 00 8 0 0f 84 9c 01 00 00 44 [ 2.822014] RSP: 0000:ffffc9000029bcb8 EFLAGS: 00010246 [ 2.822339] RAX: 0000000000000000 RBX: ffff88803f860000 RCX: 000000000000000a [ 2.822746] RDX: 0000000000000009 RSI: 0000000000000282 RDI: 0000000000000000 [ 2.823141] RBP: ffffc9000029bde8 R08: 0000000000000000 R09: ffffc9000029bc00 [ 2.823508] R10: 0000000000000001 R11: ffff88803dec90be R12: ffffffff81c39da0 [ 2.823902] R13: ffff88803de84400 R14: 0000000000000000 R15: 0000000000000000 [ 2.824285] FS: 00007fee08183540(0000) GS:ffff88803e480000(0000) knlGS:0000000000000000 [ 2.824767] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2.825111] CR2: 0000000000000020 CR3: 000000003f856005 CR4: 0000000000060ea0 [ 2.825479] Call Trace: [ 2.825790] get_signal+0x11e/0x720 [ 2.826087] do_signal+0x1d/0x670 [ 2.826361] ? force_sig_info_to_task+0xc1/0xf0 [ 2.826691] ? force_sig_fault+0x3c/0x40 [ 2.826996] ? do_trap+0xc9/0x100 [ 2.827179] exit_to_usermode_loop+0x49/0x90 [ 2.827359] prepare_exit_to_usermode+0x77/0xb0 [ 2.827559] ? invalid_op+0xa/0x30 [ 2.827747] ret_from_intr+0x20/0x20 [ 2.827921] RIP: 0033:0x55e2c76d2129 [ 2.828107] Code: 2d ff ff ff e8 68 ff ff ff 5d c6 05 18 2f 00 00 01 c3 0f 1f 80 00 00 00 00 c3 0f 1f 80 00 00 00 00 e9 7b ff ff ff 55 48 89 e5 <0f> 0b b8 00 00 00 00 5d c3 66 2e 0f 1f 84 0 0 00 00 00 00 0f 1f 40 [ 2.828603] RSP: 002b:00007fffeba5e080 EFLAGS: 00010246 [ 2.828801] RAX: 000055e2c76d2125 RBX: 0000000000000000 RCX: 00007fee0817c718 [ 2.829034] RDX: 00007fffeba5e188 RSI: 00007fffeba5e178 RDI: 0000000000000001 [ 2.829257] RBP: 00007fffeba5e080 R08: 0000000000000000 R09: 00007fee08193c00 [ 2.829482] R10: 0000000000000009 R11: 0000000000000000 R12: 000055e2c76d2040 [ 2.829727] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 [ 2.829964] CR2: 0000000000000020 [ 2.830149] ---[ end trace ceed83d8c68a1bf1 ]--- ``` Cc: # v4.11+ Fixes: 64e90a8acb85 ("Introduce STATIC_USERMODEHELPER to mediate call_usermodehelper()") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=199795 Reported-by: Tony Vroon Reported-by: Sergey Kvachonok Tested-by: Sergei Trofimovich Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20200416162859.26518-1-mcgrof@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/coredump.c | 8 ++++++++ kernel/umh.c | 5 +++++ 2 files changed, 13 insertions(+) diff --git a/fs/coredump.c b/fs/coredump.c index 408418e6aa13..478a0d810136 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -788,6 +788,14 @@ void do_coredump(const kernel_siginfo_t *siginfo) if (displaced) put_files_struct(displaced); if (!dump_interrupted()) { + /* + * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would + * have this set to NULL. + */ + if (!cprm.file) { + pr_info("Core dump to |%s disabled\n", cn.corename); + goto close_fail; + } file_start_write(cprm.file); core_dumped = binfmt->core_dump(&cprm); file_end_write(cprm.file); diff --git a/kernel/umh.c b/kernel/umh.c index 7f255b5a8845..11bf5eea474c 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -544,6 +544,11 @@ EXPORT_SYMBOL_GPL(fork_usermode_blob); * Runs a user-space application. The application is started * asynchronously if wait is not set, and runs as a child of system workqueues. * (ie. it runs with full root capabilities and optimized affinity). + * + * Note: successful return value does not guarantee the helper was called at + * all. You can't rely on sub_info->{init,cleanup} being called even for + * UMH_WAIT_* wait modes as STATIC_USERMODEHELPER_PATH="" turns all helpers + * into a successful no-op. */ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) { -- cgit v1.2.3-59-g8ed1b From 00b247557858bc0651a646710d90aba186bfeed4 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Mon, 30 Mar 2020 19:28:32 -0700 Subject: driver core: Fix handling of fw_devlink=permissive When commit 8375e74f2bca ("driver core: Add fw_devlink kernel commandline option") added fw_devlink, it didn't implement "permissive" mode correctly. That commit got the device links flags correct to make sure unprobed suppliers don't block the probing of a consumer. However, if a consumer is waiting for mandatory suppliers to register, that could still block a consumer from probing. This commit fixes that by making sure in permissive mode, all suppliers to a consumer are treated as a optional suppliers. So, even if a consumer is waiting for suppliers to register and link itself (using the DL_FLAG_SYNC_STATE_ONLY flag) to the supplier, the consumer is never blocked from probing. Fixes: 8375e74f2bca ("driver core: Add fw_devlink kernel commandline option") Reported-by: Marek Szyprowski Signed-off-by: Saravana Kannan Tested-by: Marek Szyprowski Link: https://lore.kernel.org/r/20200331022832.209618-1-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 139cdf7e7327..073045cb214e 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2370,6 +2370,11 @@ u32 fw_devlink_get_flags(void) return fw_devlink_flags; } +static bool fw_devlink_is_permissive(void) +{ + return fw_devlink_flags == DL_FLAG_SYNC_STATE_ONLY; +} + /** * device_add - add device to device hierarchy. * @dev: device. @@ -2524,7 +2529,7 @@ int device_add(struct device *dev) if (fw_devlink_flags && is_fwnode_dev && fwnode_has_op(dev->fwnode, add_links)) { fw_ret = fwnode_call_int_op(dev->fwnode, add_links, dev); - if (fw_ret == -ENODEV) + if (fw_ret == -ENODEV && !fw_devlink_is_permissive()) device_link_wait_for_mandatory_supplier(dev); else if (fw_ret) device_link_wait_for_optional_supplier(dev); -- cgit v1.2.3-59-g8ed1b From 7706b0a76a9697021e2bf395f3f065c18f51043d Mon Sep 17 00:00:00 2001 From: James Hilliard Date: Sat, 11 Apr 2020 13:02:41 -0600 Subject: component: Silence bind error on -EPROBE_DEFER If a component fails to bind due to -EPROBE_DEFER we should not log an error as this is not a real failure. Fixes messages like: vc4-drm soc:gpu: failed to bind 3f902000.hdmi (ops vc4_hdmi_ops): -517 vc4-drm soc:gpu: master bind failed: -517 Signed-off-by: James Hilliard Link: https://lore.kernel.org/r/20200411190241.89404-1-james.hilliard1@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/component.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/base/component.c b/drivers/base/component.c index e97704104784..dcfbe7251dc4 100644 --- a/drivers/base/component.c +++ b/drivers/base/component.c @@ -256,7 +256,8 @@ static int try_to_bring_up_master(struct master *master, ret = master->ops->bind(master->dev); if (ret < 0) { devres_release_group(master->dev, NULL); - dev_info(master->dev, "master bind failed: %d\n", ret); + if (ret != -EPROBE_DEFER) + dev_info(master->dev, "master bind failed: %d\n", ret); return ret; } @@ -611,8 +612,9 @@ static int component_bind(struct component *component, struct master *master, devres_release_group(component->dev, NULL); devres_release_group(master->dev, NULL); - dev_err(master->dev, "failed to bind %s (ops %ps): %d\n", - dev_name(component->dev), component->ops, ret); + if (ret != -EPROBE_DEFER) + dev_err(master->dev, "failed to bind %s (ops %ps): %d\n", + dev_name(component->dev), component->ops, ret); } return ret; -- cgit v1.2.3-59-g8ed1b From ce68929f07de0780c1afbbeef9aa6cf3550163d6 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 22 Apr 2020 20:32:43 +0000 Subject: driver core: Revert default driver_deferred_probe_timeout value to 0 This patch addresses a regression in 5.7-rc1+ In commit c8c43cee29f6 ("driver core: Fix driver_deferred_probe_check_state() logic"), we both cleaned up the logic and also set the default driver_deferred_probe_timeout value to 30 seconds to allow for drivers that are missing dependencies to have some time so that the dependency may be loaded from userland after initcalls_done is set. However, Yoshihiro Shimoda reported that on his device that expects to have unmet dependencies (due to "optional links" in its devicetree), was failing to mount the NFS root. In digging further, it seemed the problem was that while the device properly probes after waiting 30 seconds for any missing modules to load, the ip_auto_config() had already failed, resulting in NFS to fail. This was due to ip_auto_config() calling wait_for_device_probe() which doesn't wait for the driver_deferred_probe_timeout to fire. Fixing that issue is possible, but could also introduce 30 second delays in bootups for users who don't have any missing dependencies, which is not ideal. So I think the best solution to avoid any regressions is to revert back to a default timeout value of zero, and allow systems that need to utilize the timeout in order for userland to load any modules that supply misisng dependencies in the dts to specify the timeout length via the exiting documented boot argument. Thanks to Geert for chasing down that ip_auto_config was why NFS was failing in this case! Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: Hideaki YOSHIFUJI Cc: Jakub Kicinski Cc: Rafael J. Wysocki Cc: Rob Herring Cc: Geert Uytterhoeven Cc: Yoshihiro Shimoda Cc: Robin Murphy Cc: Andy Shevchenko Cc: Sudeep Holla Cc: Andy Shevchenko Cc: Naresh Kamboju Cc: Basil Eljuse Cc: Ferry Toth Cc: Arnd Bergmann Cc: Anders Roxell Reported-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Tested-by: Geert Uytterhoeven Fixes: c8c43cee29f6 ("driver core: Fix driver_deferred_probe_check_state() logic") Signed-off-by: John Stultz Link: https://lore.kernel.org/r/20200422203245.83244-2-john.stultz@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 06ec0e851fa1..908ae4d7805e 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -224,16 +224,7 @@ static int deferred_devs_show(struct seq_file *s, void *data) } DEFINE_SHOW_ATTRIBUTE(deferred_devs); -#ifdef CONFIG_MODULES -/* - * In the case of modules, set the default probe timeout to - * 30 seconds to give userland some time to load needed modules - */ -int driver_deferred_probe_timeout = 30; -#else -/* In the case of !modules, no probe timeout needed */ -int driver_deferred_probe_timeout = -1; -#endif +int driver_deferred_probe_timeout; EXPORT_SYMBOL_GPL(driver_deferred_probe_timeout); static int __init deferred_probe_timeout_setup(char *str) @@ -266,7 +257,7 @@ int driver_deferred_probe_check_state(struct device *dev) return -ENODEV; } - if (!driver_deferred_probe_timeout) { + if (!driver_deferred_probe_timeout && initcalls_done) { dev_WARN(dev, "deferred probe timeout, ignoring dependency"); return -ETIMEDOUT; } -- cgit v1.2.3-59-g8ed1b From 4ccc03e28ec309173f434fa38b48b6ad65ff5d1b Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 22 Apr 2020 20:32:44 +0000 Subject: driver core: Use dev_warn() instead of dev_WARN() for deferred_probe_timeout warnings In commit c8c43cee29f6 ("driver core: Fix driver_deferred_probe_check_state() logic") and following changes the logic was changes slightly so that if there is no driver to match whats found in the dtb, we wait the sepcified seconds for modules to be loaded by userland, and then timeout, where as previously we'd print "ignoring dependency for device, assuming no driver" and immediately return -ENODEV after initcall_done. However, in the timeout case (which previously existed but was practicaly un-used without a boot argument), the timeout message uses dev_WARN(). This means folks are now seeing a big backtrace in their boot logs if there a entry in their dts that doesn't have a driver. To fix this, lets use dev_warn(), instead of dev_WARN() to match the previous error path. Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: Hideaki YOSHIFUJI Cc: Jakub Kicinski Cc: Rafael J. Wysocki Cc: Rob Herring Cc: Geert Uytterhoeven Cc: Yoshihiro Shimoda Cc: Robin Murphy Cc: Andy Shevchenko Cc: Sudeep Holla Cc: Andy Shevchenko Cc: Naresh Kamboju Cc: Basil Eljuse Cc: Ferry Toth Cc: Arnd Bergmann Cc: Anders Roxell Cc: linux-pm@vger.kernel.org Reviewed-by: Yoshihiro Shimoda Fixes: c8c43cee29f6 ("driver core: Fix driver_deferred_probe_check_state() logic") Signed-off-by: John Stultz Link: https://lore.kernel.org/r/20200422203245.83244-3-john.stultz@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 908ae4d7805e..9c88afa5c74a 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -258,7 +258,7 @@ int driver_deferred_probe_check_state(struct device *dev) } if (!driver_deferred_probe_timeout && initcalls_done) { - dev_WARN(dev, "deferred probe timeout, ignoring dependency"); + dev_warn(dev, "deferred probe timeout, ignoring dependency"); return -ETIMEDOUT; } -- cgit v1.2.3-59-g8ed1b From 35a672363ab3e8dfe4ebcadb4dd0b2d06bb85ebe Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 22 Apr 2020 20:32:45 +0000 Subject: driver core: Ensure wait_for_device_probe() waits until the deferred_probe_timeout fires In commit c8c43cee29f6 ("driver core: Fix driver_deferred_probe_check_state() logic"), we set the default driver_deferred_probe_timeout value to 30 seconds to allow for drivers that are missing dependencies to have some time so that the dependency may be loaded from userland after initcalls_done is set. However, Yoshihiro Shimoda reported that on his device that expects to have unmet dependencies (due to "optional links" in its devicetree), was failing to mount the NFS root. In digging further, it seemed the problem was that while the device properly probes after waiting 30 seconds for any missing modules to load, the ip_auto_config() had already failed, resulting in NFS to fail. This was due to ip_auto_config() calling wait_for_device_probe() which doesn't wait for the driver_deferred_probe_timeout to fire. This patch tries to fix the issue by creating a waitqueue for the driver_deferred_probe_timeout, and calling wait_event() to make sure driver_deferred_probe_timeout is zero in wait_for_device_probe() to make sure all the probing is finished. The downside to this solution is that kernel functionality that uses wait_for_device_probe(), will block until the driver_deferred_probe_timeout fires, regardless of if there is any missing dependencies. However, the previous patch reverts the default timeout value to zero, so this side-effect will only affect users who specify a driver_deferred_probe_timeout= value as a boot argument, where the additional delay would be beneficial to allow modules to load later during boot. Thanks to Geert for chasing down that ip_auto_config was why NFS was failing in this case! Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: Hideaki YOSHIFUJI Cc: Jakub Kicinski Cc: Rafael J. Wysocki Cc: Rob Herring Cc: Geert Uytterhoeven Cc: Yoshihiro Shimoda Cc: Robin Murphy Cc: Andy Shevchenko Cc: Sudeep Holla Cc: Andy Shevchenko Cc: Naresh Kamboju Cc: Basil Eljuse Cc: Ferry Toth Cc: Arnd Bergmann Cc: Anders Roxell Cc: linux-pm@vger.kernel.org Reported-by: Yoshihiro Shimoda Tested-by: Geert Uytterhoeven Tested-by: Yoshihiro Shimoda Fixes: c8c43cee29f6 ("driver core: Fix driver_deferred_probe_check_state() logic") Signed-off-by: John Stultz Link: https://lore.kernel.org/r/20200422203245.83244-4-john.stultz@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 9c88afa5c74a..94037be7f5d7 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -226,6 +226,7 @@ DEFINE_SHOW_ATTRIBUTE(deferred_devs); int driver_deferred_probe_timeout; EXPORT_SYMBOL_GPL(driver_deferred_probe_timeout); +static DECLARE_WAIT_QUEUE_HEAD(probe_timeout_waitqueue); static int __init deferred_probe_timeout_setup(char *str) { @@ -275,6 +276,7 @@ static void deferred_probe_timeout_work_func(struct work_struct *work) list_for_each_entry_safe(private, p, &deferred_probe_pending_list, deferred_probe) dev_info(private->device, "deferred probe pending"); + wake_up(&probe_timeout_waitqueue); } static DECLARE_DELAYED_WORK(deferred_probe_timeout_work, deferred_probe_timeout_work_func); @@ -649,6 +651,9 @@ int driver_probe_done(void) */ void wait_for_device_probe(void) { + /* wait for probe timeout */ + wait_event(probe_timeout_waitqueue, !driver_deferred_probe_timeout); + /* wait for the deferred probe workqueue to finish */ flush_work(&deferred_probe_work); -- cgit v1.2.3-59-g8ed1b From c3bf9930921b33edb31909006607e478751a6f5e Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 9 Apr 2020 10:18:10 +0300 Subject: thunderbolt: Check return value of tb_sw_read() in usb4_switch_op() The function misses checking return value of tb_sw_read() before it accesses the value that was read. Fix this by checking the return value first. Fixes: b04079837b20 ("thunderbolt: Add initial support for USB4") Signed-off-by: Mika Westerberg Reviewed-by: Yehezkel Bernat Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/usb4.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index 3d084cec136f..50c7534ba31e 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -182,6 +182,9 @@ static int usb4_switch_op(struct tb_switch *sw, u16 opcode, u8 *status) return ret; ret = tb_sw_read(sw, &val, TB_CFG_SWITCH, ROUTER_CS_26, 1); + if (ret) + return ret; + if (val & ROUTER_CS_26_ONS) return -EOPNOTSUPP; -- cgit v1.2.3-59-g8ed1b From b36522150e5b85045f868768d46fbaaa034174b2 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Mon, 27 Apr 2020 15:49:53 -0700 Subject: scsi: ibmvscsi: Fix WARN_ON during event pool release While removing an ibmvscsi client adapter a WARN_ON like the following is seen in the kernel log: drmgr: drmgr: -r -c slot -s U9080.M9S.783AEC8-V11-C11 -w 5 -d 1 WARNING: CPU: 9 PID: 24062 at ../kernel/dma/mapping.c:311 dma_free_attrs+0x78/0x110 Supported: No, Unreleased kernel CPU: 9 PID: 24062 Comm: drmgr Kdump: loaded Tainted: G X 5.3.18-12-default NIP: c0000000001fa758 LR: c0000000001fa744 CTR: c0000000001fa6e0 REGS: c0000002173375d0 TRAP: 0700 Tainted: G X (5.3.18-12-default) MSR: 8000000000029033 CR: 28088282 XER: 20000000 CFAR: c0000000001fbf0c IRQMASK: 1 GPR00: c0000000001fa744 c000000217337860 c00000000161ab00 0000000000000000 GPR04: 0000000000000000 c000011e12250000 0000000018010000 0000000000000000 GPR08: 0000000000000000 0000000000000001 0000000000000001 c0080000190f4fa8 GPR12: c0000000001fa6e0 c000000007fc2a00 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR24: 000000011420e310 0000000000000000 0000000000000000 0000000018010000 GPR28: c00000000159de50 c000011e12250000 0000000000006600 c000011e5c994848 NIP [c0000000001fa758] dma_free_attrs+0x78/0x110 LR [c0000000001fa744] dma_free_attrs+0x64/0x110 Call Trace: [c000000217337860] [000000011420e310] 0x11420e310 (unreliable) [c0000002173378b0] [c0080000190f0280] release_event_pool+0xd8/0x120 [ibmvscsi] [c000000217337930] [c0080000190f3f74] ibmvscsi_remove+0x6c/0x160 [ibmvscsi] [c000000217337960] [c0000000000f3cac] vio_bus_remove+0x5c/0x100 [c0000002173379a0] [c00000000087a0a4] device_release_driver_internal+0x154/0x280 [c0000002173379e0] [c0000000008777cc] bus_remove_device+0x11c/0x220 [c000000217337a60] [c000000000870fc4] device_del+0x1c4/0x470 [c000000217337b10] [c0000000008712a0] device_unregister+0x30/0xa0 [c000000217337b80] [c0000000000f39ec] vio_unregister_device+0x2c/0x60 [c000000217337bb0] [c00800001a1d0964] dlpar_remove_slot+0x14c/0x250 [rpadlpar_io] [c000000217337c50] [c00800001a1d0bcc] remove_slot_store+0xa4/0x110 [rpadlpar_io] [c000000217337cd0] [c000000000c091a0] kobj_attr_store+0x30/0x50 [c000000217337cf0] [c00000000057c934] sysfs_kf_write+0x64/0x90 [c000000217337d10] [c00000000057be10] kernfs_fop_write+0x1b0/0x290 [c000000217337d60] [c000000000488c4c] __vfs_write+0x3c/0x70 [c000000217337d80] [c00000000048c648] vfs_write+0xd8/0x260 [c000000217337dd0] [c00000000048ca8c] ksys_write+0xdc/0x130 [c000000217337e20] [c00000000000b488] system_call+0x5c/0x70 Instruction dump: 7c840074 f8010010 f821ffb1 20840040 eb830218 7c8407b4 48002019 60000000 2fa30000 409e003c 892d0988 792907e0 <0b090000> 2fbd0000 419e0028 2fbc0000 ---[ end trace 5955b3c0cc079942 ]--- rpadlpar_io: slot U9080.M9S.783AEC8-V11-C11 removed This is tripped as a result of irqs being disabled during the call to dma_free_coherent() by release_event_pool(). At this point in the code path we have quiesced the adapter and it is overly paranoid to be holding the host lock. [mkp: fixed build warning reported by sfr] Link: https://lore.kernel.org/r/1588027793-17952-1-git-send-email-tyreld@linux.ibm.com Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvscsi.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 7f66a7783209..59f0f1030c54 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -2320,16 +2320,12 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id) static int ibmvscsi_remove(struct vio_dev *vdev) { struct ibmvscsi_host_data *hostdata = dev_get_drvdata(&vdev->dev); - unsigned long flags; srp_remove_host(hostdata->host); scsi_remove_host(hostdata->host); purge_requests(hostdata, DID_ERROR); - - spin_lock_irqsave(hostdata->host->host_lock, flags); release_event_pool(&hostdata->pool, hostdata); - spin_unlock_irqrestore(hostdata->host->host_lock, flags); ibmvscsi_release_crq_queue(&hostdata->queue, hostdata, max_events); -- cgit v1.2.3-59-g8ed1b From 0c1d3f2c9a0dc49d96754148b74a44eaa37bfbd8 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 29 Apr 2020 06:21:16 +0200 Subject: MAINTAINERS: remove entry after hp100 driver removal Commit a10079c66290 ("staging: remove hp100 driver") removed all files from ./drivers/staging/hp/, but missed to adjust MAINTAINERS. Since then, ./scripts/get_maintainer.pl --self-test=patterns complains: warning: no file matches F: drivers/staging/hp/hp100.* So, drop HP100 Driver entry in MAINTAINERS now. Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20200429042116.29126-1-lukas.bulwahn@gmail.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 5 ----- 1 file changed, 5 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 26f281d9f32a..41e2b577488f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7746,11 +7746,6 @@ L: platform-driver-x86@vger.kernel.org S: Orphan F: drivers/platform/x86/tc1100-wmi.c -HP100: Driver for HP 10/100 Mbit/s Voice Grade Network Adapter Series -M: Jaroslav Kysela -S: Obsolete -F: drivers/staging/hp/hp100.* - HPET: High Precision Event Timers driver M: Clemens Ladisch S: Maintained -- cgit v1.2.3-59-g8ed1b From 5409e0cca53af8d2529f8398c9b05c36e25f9d2f Mon Sep 17 00:00:00 2001 From: ChenTao Date: Wed, 29 Apr 2020 13:19:04 +0300 Subject: interconnect: qcom: Move the static keyword to the front of declaration Fix the following warning: Move the static keyword to the front of declaration of sdm845_icc_osm_l3 sdm845_aggre1_noc sc7180_icc_osm_l3 sdm845_aggre2_noc sdm845_config_noc sdm845_dc_noc sdm845_gladiator_noc sdm845_mem_noc sdm845_mmss_noc and sdm845_system_noc, resolve the following compiler warning that can be when building with warnings enabled (W=1): drivers/interconnect/qcom/osm-l3.c:81:1: warning: const static struct qcom_icc_desc sdm845_icc_osm_l3 = { drivers/interconnect/qcom/osm-l3.c:94:1: warning: const static struct qcom_icc_desc sc7180_icc_osm_l3 = { drivers/interconnect/qcom/sdm845.c:195:1: warning: const static struct qcom_icc_desc sdm845_aggre1_noc = { drivers/interconnect/qcom/sdm845.c:223:1: warning: const static struct qcom_icc_desc sdm845_aggre2_noc = { drivers/interconnect/qcom/sdm845.c:284:1: warning: const static struct qcom_icc_desc sdm845_config_noc = { drivers/interconnect/qcom/sdm845.c:300:1: warning: const static struct qcom_icc_desc sdm845_dc_noc = { drivers/interconnect/qcom/sdm845.c:318:1: warning: const static struct qcom_icc_desc sdm845_gladiator_noc = { drivers/interconnect/qcom/sdm845.c:353:1: warning: const static struct qcom_icc_desc sdm845_mem_noc = { drivers/interconnect/qcom/sdm845.c:387:1: warning: const static struct qcom_icc_desc sdm845_mmss_noc = { drivers/interconnect/qcom/sdm845.c:433:1: warning: const static struct qcom_icc_desc sdm845_system_noc = { Reported-by: Hulk Robot Signed-off-by: ChenTao Link: https://lore.kernel.org/r/20200423132142.45174-1-chentao107@huawei.com Signed-off-by: Georgi Djakov Link: https://lore.kernel.org/r/20200429101904.5771-2-georgi.djakov@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/interconnect/qcom/osm-l3.c | 4 ++-- drivers/interconnect/qcom/sdm845.c | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/interconnect/qcom/osm-l3.c b/drivers/interconnect/qcom/osm-l3.c index a03c6d6833df..96fb9ff5ff2e 100644 --- a/drivers/interconnect/qcom/osm-l3.c +++ b/drivers/interconnect/qcom/osm-l3.c @@ -78,7 +78,7 @@ static struct qcom_icc_node *sdm845_osm_l3_nodes[] = { [SLAVE_OSM_L3] = &sdm845_osm_l3, }; -const static struct qcom_icc_desc sdm845_icc_osm_l3 = { +static const struct qcom_icc_desc sdm845_icc_osm_l3 = { .nodes = sdm845_osm_l3_nodes, .num_nodes = ARRAY_SIZE(sdm845_osm_l3_nodes), }; @@ -91,7 +91,7 @@ static struct qcom_icc_node *sc7180_osm_l3_nodes[] = { [SLAVE_OSM_L3] = &sc7180_osm_l3, }; -const static struct qcom_icc_desc sc7180_icc_osm_l3 = { +static const struct qcom_icc_desc sc7180_icc_osm_l3 = { .nodes = sc7180_osm_l3_nodes, .num_nodes = ARRAY_SIZE(sc7180_osm_l3_nodes), }; diff --git a/drivers/interconnect/qcom/sdm845.c b/drivers/interconnect/qcom/sdm845.c index b013b80caa45..f6c7b969520d 100644 --- a/drivers/interconnect/qcom/sdm845.c +++ b/drivers/interconnect/qcom/sdm845.c @@ -192,7 +192,7 @@ static struct qcom_icc_node *aggre1_noc_nodes[] = { [SLAVE_ANOC_PCIE_A1NOC_SNOC] = &qns_pcie_a1noc_snoc, }; -const static struct qcom_icc_desc sdm845_aggre1_noc = { +static const struct qcom_icc_desc sdm845_aggre1_noc = { .nodes = aggre1_noc_nodes, .num_nodes = ARRAY_SIZE(aggre1_noc_nodes), .bcms = aggre1_noc_bcms, @@ -220,7 +220,7 @@ static struct qcom_icc_node *aggre2_noc_nodes[] = { [SLAVE_SERVICE_A2NOC] = &srvc_aggre2_noc, }; -const static struct qcom_icc_desc sdm845_aggre2_noc = { +static const struct qcom_icc_desc sdm845_aggre2_noc = { .nodes = aggre2_noc_nodes, .num_nodes = ARRAY_SIZE(aggre2_noc_nodes), .bcms = aggre2_noc_bcms, @@ -281,7 +281,7 @@ static struct qcom_icc_node *config_noc_nodes[] = { [SLAVE_SERVICE_CNOC] = &srvc_cnoc, }; -const static struct qcom_icc_desc sdm845_config_noc = { +static const struct qcom_icc_desc sdm845_config_noc = { .nodes = config_noc_nodes, .num_nodes = ARRAY_SIZE(config_noc_nodes), .bcms = config_noc_bcms, @@ -297,7 +297,7 @@ static struct qcom_icc_node *dc_noc_nodes[] = { [SLAVE_MEM_NOC_CFG] = &qhs_memnoc, }; -const static struct qcom_icc_desc sdm845_dc_noc = { +static const struct qcom_icc_desc sdm845_dc_noc = { .nodes = dc_noc_nodes, .num_nodes = ARRAY_SIZE(dc_noc_nodes), .bcms = dc_noc_bcms, @@ -315,7 +315,7 @@ static struct qcom_icc_node *gladiator_noc_nodes[] = { [SLAVE_SERVICE_GNOC] = &srvc_gnoc, }; -const static struct qcom_icc_desc sdm845_gladiator_noc = { +static const struct qcom_icc_desc sdm845_gladiator_noc = { .nodes = gladiator_noc_nodes, .num_nodes = ARRAY_SIZE(gladiator_noc_nodes), .bcms = gladiator_noc_bcms, @@ -350,7 +350,7 @@ static struct qcom_icc_node *mem_noc_nodes[] = { [SLAVE_EBI1] = &ebi, }; -const static struct qcom_icc_desc sdm845_mem_noc = { +static const struct qcom_icc_desc sdm845_mem_noc = { .nodes = mem_noc_nodes, .num_nodes = ARRAY_SIZE(mem_noc_nodes), .bcms = mem_noc_bcms, @@ -384,7 +384,7 @@ static struct qcom_icc_node *mmss_noc_nodes[] = { [SLAVE_CAMNOC_UNCOMP] = &qns_camnoc_uncomp, }; -const static struct qcom_icc_desc sdm845_mmss_noc = { +static const struct qcom_icc_desc sdm845_mmss_noc = { .nodes = mmss_noc_nodes, .num_nodes = ARRAY_SIZE(mmss_noc_nodes), .bcms = mmss_noc_bcms, @@ -430,7 +430,7 @@ static struct qcom_icc_node *system_noc_nodes[] = { [SLAVE_TCU] = &xs_sys_tcu_cfg, }; -const static struct qcom_icc_desc sdm845_system_noc = { +static const struct qcom_icc_desc sdm845_system_noc = { .nodes = system_noc_nodes, .num_nodes = ARRAY_SIZE(system_noc_nodes), .bcms = system_noc_bcms, -- cgit v1.2.3-59-g8ed1b From 2a15483b401c0b07e44b43b95414e36f32c02f32 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 29 Apr 2020 17:23:49 +0000 Subject: regulator: Revert "Use driver_deferred_probe_timeout for regulator_init_complete_work" This reverts commit dca0b44957e5 ("regulator: Use driver_deferred_probe_timeout for regulator_init_complete_work"), as we ended up reverting the default deferred_probe_timeout value back to zero, to preserve behavior with 5.6 we need to decouple the regulator timeout which was previously 30 seconds. This avoids breaking some systems that depend on the regulator timeout but don't require the deferred probe timeout. Cc: linux-pm@vger.kernel.org Cc: Linus Walleij Cc: Thierry Reding Cc: Liam Girdwood Cc: Bjorn Andersson Cc: Saravana Kannan Cc: Todd Kjos Cc: Len Brown Cc: Pavel Machek Cc: Ulf Hansson Cc: Kevin Hilman Cc: "Rafael J. Wysocki" Cc: Rob Herring Reported-by: Marek Szyprowski Suggested-by: Mark Brown Signed-off-by: John Stultz Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20200429172349.55979-1-john.stultz@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/core.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index c340505150b6..7486f6e4e613 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5754,10 +5754,6 @@ static DECLARE_DELAYED_WORK(regulator_init_complete_work, static int __init regulator_init_complete(void) { - int delay = driver_deferred_probe_timeout; - - if (delay < 0) - delay = 0; /* * Since DT doesn't provide an idiomatic mechanism for * enabling full constraints and since it's much more natural @@ -5768,17 +5764,18 @@ static int __init regulator_init_complete(void) has_full_constraints = true; /* - * If driver_deferred_probe_timeout is set, we punt - * completion for that many seconds since systems like - * distros will load many drivers from userspace so consumers - * might not always be ready yet, this is particularly an - * issue with laptops where this might bounce the display off - * then on. Ideally we'd get a notification from userspace - * when this happens but we don't so just wait a bit and hope - * we waited long enough. It'd be better if we'd only do - * this on systems that need it. + * We punt completion for an arbitrary amount of time since + * systems like distros will load many drivers from userspace + * so consumers might not always be ready yet, this is + * particularly an issue with laptops where this might bounce + * the display off then on. Ideally we'd get a notification + * from userspace when this happens but we don't so just wait + * a bit and hope we waited long enough. It'd be better if + * we'd only do this on systems that need it, and a kernel + * command line option might be useful. */ - schedule_delayed_work(®ulator_init_complete_work, delay * HZ); + schedule_delayed_work(®ulator_init_complete_work, + msecs_to_jiffies(30000)); return 0; } -- cgit v1.2.3-59-g8ed1b From 6f8280cec1c9dfcd69c98e124ae6e0b61115158b Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 24 Apr 2020 11:26:38 +0530 Subject: MAINTAINERS: Add Vinod Koul as Generic PHY co-maintainer Add Vinod Koul as Generic PHY Subsystem co-maintainer and move the linux-phy to a shared repository. Cc: Vinod Koul Acked-By: Vinod Koul Signed-off-by: Kishon Vijay Abraham I --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index e64e5db31497..6c7adc14c389 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7115,9 +7115,10 @@ F: include/uapi/asm-generic/ GENERIC PHY FRAMEWORK M: Kishon Vijay Abraham I +M: Vinod Koul L: linux-kernel@vger.kernel.org S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/kishon/linux-phy.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy.git F: Documentation/devicetree/bindings/phy/ F: drivers/phy/ F: include/linux/phy/ -- cgit v1.2.3-59-g8ed1b From 820eeb9de62f1f479c04fc6575d874611b1e2095 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 7 Apr 2020 18:28:54 -0700 Subject: phy: qualcomm: usb-hs-28nm: Prepare clocks in init The AHB clock must be on for qcom_snps_hsphy_init() to be able to write the initialization sequence to the hardware, so move the clock enablement to phy init and exit. Fixes: 67b27dbeac4d ("phy: qualcomm: Add Synopsys 28nm Hi-Speed USB PHY driver") Signed-off-by: Bjorn Andersson Reviewed-by: Bryan O'Donoghue Signed-off-by: Vinod Koul Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/qualcomm/phy-qcom-usb-hs-28nm.c | 32 +++++++++++++++++++---------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-usb-hs-28nm.c b/drivers/phy/qualcomm/phy-qcom-usb-hs-28nm.c index d998e65c89c8..a52a9bf13b75 100644 --- a/drivers/phy/qualcomm/phy-qcom-usb-hs-28nm.c +++ b/drivers/phy/qualcomm/phy-qcom-usb-hs-28nm.c @@ -160,18 +160,11 @@ static int qcom_snps_hsphy_power_on(struct phy *phy) ret = regulator_bulk_enable(VREG_NUM, priv->vregs); if (ret) return ret; - ret = clk_bulk_prepare_enable(priv->num_clks, priv->clks); - if (ret) - goto err_disable_regulator; + qcom_snps_hsphy_disable_hv_interrupts(priv); qcom_snps_hsphy_exit_retention(priv); return 0; - -err_disable_regulator: - regulator_bulk_disable(VREG_NUM, priv->vregs); - - return ret; } static int qcom_snps_hsphy_power_off(struct phy *phy) @@ -180,7 +173,6 @@ static int qcom_snps_hsphy_power_off(struct phy *phy) qcom_snps_hsphy_enter_retention(priv); qcom_snps_hsphy_enable_hv_interrupts(priv); - clk_bulk_disable_unprepare(priv->num_clks, priv->clks); regulator_bulk_disable(VREG_NUM, priv->vregs); return 0; @@ -266,21 +258,39 @@ static int qcom_snps_hsphy_init(struct phy *phy) struct hsphy_priv *priv = phy_get_drvdata(phy); int ret; - ret = qcom_snps_hsphy_reset(priv); + ret = clk_bulk_prepare_enable(priv->num_clks, priv->clks); if (ret) return ret; + ret = qcom_snps_hsphy_reset(priv); + if (ret) + goto disable_clocks; + qcom_snps_hsphy_init_sequence(priv); ret = qcom_snps_hsphy_por_reset(priv); if (ret) - return ret; + goto disable_clocks; + + return 0; + +disable_clocks: + clk_bulk_disable_unprepare(priv->num_clks, priv->clks); + return ret; +} + +static int qcom_snps_hsphy_exit(struct phy *phy) +{ + struct hsphy_priv *priv = phy_get_drvdata(phy); + + clk_bulk_disable_unprepare(priv->num_clks, priv->clks); return 0; } static const struct phy_ops qcom_snps_hsphy_ops = { .init = qcom_snps_hsphy_init, + .exit = qcom_snps_hsphy_exit, .power_on = qcom_snps_hsphy_power_on, .power_off = qcom_snps_hsphy_power_off, .set_mode = qcom_snps_hsphy_set_mode, -- cgit v1.2.3-59-g8ed1b From 9f04db234af691007bb785342a06abab5fb34474 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 29 Apr 2020 17:52:18 +0200 Subject: USB: uas: add quirk for LaCie 2Big Quadra MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This device needs US_FL_NO_REPORT_OPCODES to avoid going through prolonged error handling on enumeration. Signed-off-by: Oliver Neukum Reported-by: Julian Groß Cc: stable Link: https://lore.kernel.org/r/20200429155218.7308-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_uas.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index 1b23741036ee..37157ed9a881 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -28,6 +28,13 @@ * and don't forget to CC: the USB development list */ +/* Reported-by: Julian Groß */ +UNUSUAL_DEV(0x059f, 0x105f, 0x0000, 0x9999, + "LaCie", + "2Big Quadra USB3", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_REPORT_OPCODES), + /* * Apricorn USB3 dongle sometimes returns "USBSUSBSUSBS" in response to SCSI * commands in UAS mode. Observed with the 1.28 firmware; are there others? -- cgit v1.2.3-59-g8ed1b From 6aea9e050394e83ac9fbd9fb0cb77c173e5bcae1 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 13 Apr 2020 16:10:16 -0700 Subject: KVM: arm64: Delete duplicated label in invalid_vector SYM_CODE_START defines \label , so it is redundant to define \label again. A redefinition at the same place is accepted by GNU as (https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=159fbb6088f17a341bcaaac960623cab881b4981) but rejected by the clang integrated assembler. Fixes: 617a2f392c92 ("arm64: kvm: Annotate assembly using modern annoations") Signed-off-by: Fangrui Song Signed-off-by: Marc Zyngier Tested-by: Nick Desaulniers Reviewed-by: Nick Desaulniers Link: https://github.com/ClangBuiltLinux/linux/issues/988 Link: https://lore.kernel.org/r/20200413231016.250737-1-maskray@google.com --- arch/arm64/kvm/hyp/hyp-entry.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index c2a13ab3c471..9c5cfb04170e 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -198,7 +198,6 @@ SYM_CODE_END(__hyp_panic) .macro invalid_vector label, target = __hyp_panic .align 2 SYM_CODE_START(\label) -\label: b \target SYM_CODE_END(\label) .endm -- cgit v1.2.3-59-g8ed1b From 6e977984f6d8e5689e079de1fd2e337cd17dcca5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 24 Apr 2020 14:24:34 +0100 Subject: KVM: arm64: Save/restore sp_el0 as part of __guest_enter We currently save/restore sp_el0 in C code. This is a bit unsafe, as a lot of the C code expects 'current' to be accessible from there (and the opportunity to run kernel code in HYP is specially great with VHE). Instead, let's move the save/restore of sp_el0 to the assembly code (in __guest_enter), making sure that sp_el0 is correct very early on when we exit the guest, and is preserved as long as possible to its host value when we enter the guest. Reviewed-by: Andrew Jones Acked-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/entry.S | 23 +++++++++++++++++++++++ arch/arm64/kvm/hyp/sysreg-sr.c | 17 +++-------------- 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index d22d0534dd60..90186cf6473e 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -18,6 +18,7 @@ #define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) #define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) +#define CPU_SP_EL0_OFFSET (CPU_XREG_OFFSET(30) + 8) .text .pushsection .hyp.text, "ax" @@ -47,6 +48,16 @@ ldp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)] .endm +.macro save_sp_el0 ctxt, tmp + mrs \tmp, sp_el0 + str \tmp, [\ctxt, #CPU_SP_EL0_OFFSET] +.endm + +.macro restore_sp_el0 ctxt, tmp + ldr \tmp, [\ctxt, #CPU_SP_EL0_OFFSET] + msr sp_el0, \tmp +.endm + /* * u64 __guest_enter(struct kvm_vcpu *vcpu, * struct kvm_cpu_context *host_ctxt); @@ -60,6 +71,9 @@ SYM_FUNC_START(__guest_enter) // Store the host regs save_callee_saved_regs x1 + // Save the host's sp_el0 + save_sp_el0 x1, x2 + // Now the host state is stored if we have a pending RAS SError it must // affect the host. If any asynchronous exception is pending we defer // the guest entry. The DSB isn't necessary before v8.2 as any SError @@ -83,6 +97,9 @@ alternative_else_nop_endif // when this feature is enabled for kernel code. ptrauth_switch_to_guest x29, x0, x1, x2 + // Restore the guest's sp_el0 + restore_sp_el0 x29, x0 + // Restore guest regs x0-x17 ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)] ldp x2, x3, [x29, #CPU_XREG_OFFSET(2)] @@ -130,6 +147,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) // Store the guest regs x18-x29, lr save_callee_saved_regs x1 + // Store the guest's sp_el0 + save_sp_el0 x1, x2 + get_host_ctxt x2, x3 // Macro ptrauth_switch_to_guest format: @@ -139,6 +159,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) // when this feature is enabled for kernel code. ptrauth_switch_to_host x1, x2, x3, x4, x5 + // Restore the hosts's sp_el0 + restore_sp_el0 x2, x3 + // Now restore the host regs restore_callee_saved_regs x2 diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 75b1925763f1..6d2df9fe0b5d 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -15,8 +15,9 @@ /* * Non-VHE: Both host and guest must save everything. * - * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and pstate, - * which are handled as part of the el2 return state) on every switch. + * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and + * pstate, which are handled as part of the el2 return state) on every + * switch (sp_el0 is being dealt with in the assembly code). * tpidr_el0 and tpidrro_el0 only need to be switched when going * to host userspace or a different VCPU. EL1 registers only need to be * switched when potentially going to run a different VCPU. The latter two @@ -26,12 +27,6 @@ static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt) { ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); - - /* - * The host arm64 Linux uses sp_el0 to point to 'current' and it must - * therefore be saved/restored on every entry/exit to/from the guest. - */ - ctxt->gp_regs.regs.sp = read_sysreg(sp_el0); } static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt) @@ -99,12 +94,6 @@ NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe); static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) { write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); - - /* - * The host arm64 Linux uses sp_el0 to point to 'current' and it must - * therefore be saved/restored on every entry/exit to/from the guest. - */ - write_sysreg(ctxt->gp_regs.regs.sp, sp_el0); } static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) -- cgit v1.2.3-59-g8ed1b From 958e8e14fd24c0aa1fe6a6beb2b985cb638577d6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 24 Apr 2020 15:30:30 +0100 Subject: KVM: arm64: vgic-v4: Initialize GICv4.1 even in the absence of a virtual ITS KVM now expects to be able to use HW-accelerated delivery of vSGIs as soon as the guest has enabled thm. Unfortunately, we only initialize the GICv4 context if we have a virtual ITS exposed to the guest. Fix it by always initializing the GICv4.1 context if it is available on the host. Fixes: 2291ff2f2a56 ("KVM: arm64: GICv4.1: Plumb SGI implementation selection in the distributor") Reviewed-by: Zenghui Yu Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-init.c | 9 ++++++++- virt/kvm/arm/vgic/vgic-mmio-v3.c | 3 ++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 30dbec9fe0b4..32e32d67a127 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -294,8 +294,15 @@ int vgic_init(struct kvm *kvm) } } - if (vgic_has_its(kvm)) { + if (vgic_has_its(kvm)) vgic_lpi_translation_cache_init(kvm); + + /* + * If we have GICv4.1 enabled, unconditionnaly request enable the + * v4 support so that we get HW-accelerated vSGIs. Otherwise, only + * enable it if we present a virtual ITS to the guest. + */ + if (vgic_supports_direct_msis(kvm)) { ret = vgic_v4_init(kvm); if (ret) goto out; diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 416613f2400c..89a14ec8b33b 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -50,7 +50,8 @@ bool vgic_has_its(struct kvm *kvm) bool vgic_supports_direct_msis(struct kvm *kvm) { - return kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm); + return (kvm_vgic_global_state.has_gicv4_1 || + (kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm))); } /* -- cgit v1.2.3-59-g8ed1b From 7f645462ca01d01abb94d75e6768c8b3ed3a188b Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 30 Apr 2020 08:18:51 +0000 Subject: bpf: Fix error return code in map_lookup_and_delete_elem() Fix to return negative error code -EFAULT from the copy_to_user() error handling case instead of 0, as done elsewhere in this function. Fixes: bd513cd08f10 ("bpf: add MAP_LOOKUP_AND_DELETE_ELEM syscall") Signed-off-by: Wei Yongjun Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200430081851.166996-1-weiyongjun1@huawei.com --- kernel/bpf/syscall.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7626b8024471..2843bbba9ca1 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1485,8 +1485,10 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) if (err) goto free_value; - if (copy_to_user(uvalue, value, value_size) != 0) + if (copy_to_user(uvalue, value, value_size) != 0) { + err = -EFAULT; goto free_value; + } err = 0; -- cgit v1.2.3-59-g8ed1b From ab5130186d7476dcee0d4e787d19a521ca552ce9 Mon Sep 17 00:00:00 2001 From: Rick Edgecombe Date: Wed, 22 Apr 2020 20:13:55 -0700 Subject: x86/mm/cpa: Flush direct map alias during cpa As an optimization, cpa_flush() was changed to optionally only flush the range in @cpa if it was small enough. However, this range does not include any direct map aliases changed in cpa_process_alias(). So small set_memory_() calls that touch that alias don't get the direct map changes flushed. This situation can happen when the virtual address taking variants are passed an address in vmalloc or modules space. In these cases, force a full TLB flush. Note this issue does not extend to cases where the set_memory_() calls are passed a direct map address, or page array, etc, as the primary target. In those cases the direct map would be flushed. Fixes: 935f5839827e ("x86/mm/cpa: Optimize cpa_flush_array() TLB invalidation") Signed-off-by: Rick Edgecombe Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200424105343.GA20730@hirez.programming.kicks-ass.net --- arch/x86/mm/pat/set_memory.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 59eca6a94ce7..b8c55a2e402d 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -43,7 +43,8 @@ struct cpa_data { unsigned long pfn; unsigned int flags; unsigned int force_split : 1, - force_static_prot : 1; + force_static_prot : 1, + force_flush_all : 1; struct page **pages; }; @@ -355,10 +356,10 @@ static void cpa_flush(struct cpa_data *data, int cache) return; } - if (cpa->numpages <= tlb_single_page_flush_ceiling) - on_each_cpu(__cpa_flush_tlb, cpa, 1); - else + if (cpa->force_flush_all || cpa->numpages > tlb_single_page_flush_ceiling) flush_tlb_all(); + else + on_each_cpu(__cpa_flush_tlb, cpa, 1); if (!cache) return; @@ -1598,6 +1599,8 @@ static int cpa_process_alias(struct cpa_data *cpa) alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); alias_cpa.curpage = 0; + cpa->force_flush_all = 1; + ret = __change_page_attr_set_clr(&alias_cpa, 0); if (ret) return ret; @@ -1618,6 +1621,7 @@ static int cpa_process_alias(struct cpa_data *cpa) alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); alias_cpa.curpage = 0; + cpa->force_flush_all = 1; /* * The high mapping range is imprecise, so ignore the * return value. -- cgit v1.2.3-59-g8ed1b From 54261af473be4c5481f6196064445d2945f2bdab Mon Sep 17 00:00:00 2001 From: KP Singh Date: Thu, 30 Apr 2020 17:52:40 +0200 Subject: security: Fix the default value of fs_context_parse_param hook security_fs_context_parse_param is called by vfs_parse_fs_param and a succussful return value (i.e 0) implies that a parameter will be consumed by the LSM framework. This stops all further parsing of the parmeter by VFS. Furthermore, if an LSM hook returns a success, the remaining LSM hooks are not invoked for the parameter. The current default behavior of returning success means that all the parameters are expected to be parsed by the LSM hook and none of them end up being populated by vfs in fs_context This was noticed when lsm=bpf is supplied on the command line before any other LSM. As the bpf lsm uses this default value to implement a default hook, this resulted in a failure to parse any fs_context parameters and a failure to mount the root filesystem. Fixes: 98e828a0650f ("security: Refactor declaration of LSM hooks") Reported-by: Mikko Ylinen Signed-off-by: KP Singh Signed-off-by: James Morris --- include/linux/lsm_hook_defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 9cd4455528e5..1bdd027766d4 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -55,7 +55,7 @@ LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, struct linux_binprm *bprm) LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, struct linux_binprm *bprm) LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc, struct fs_context *src_sc) -LSM_HOOK(int, 0, fs_context_parse_param, struct fs_context *fc, +LSM_HOOK(int, -ENOPARAM, fs_context_parse_param, struct fs_context *fc, struct fs_parameter *param) LSM_HOOK(int, 0, sb_alloc_security, struct super_block *sb) LSM_HOOK(void, LSM_RET_VOID, sb_free_security, struct super_block *sb) -- cgit v1.2.3-59-g8ed1b From 0225fd5e0a6a32af7af0aefac45c8ebf19dc5183 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 29 Apr 2020 11:21:55 +0100 Subject: KVM: arm64: Fix 32bit PC wrap-around In the unlikely event that a 32bit vcpu traps into the hypervisor on an instruction that is located right at the end of the 32bit range, the emulation of that instruction is going to increment PC past the 32bit range. This isn't great, as userspace can then observe this value and get a bit confused. Conversly, userspace can do things like (in the context of a 64bit guest that is capable of 32bit EL0) setting PSTATE to AArch64-EL0, set PC to a 64bit value, change PSTATE to AArch32-USR, and observe that PC hasn't been truncated. More confusion. Fix both by: - truncating PC increments for 32bit guests - sanitizing all 32bit regs every time a core reg is changed by userspace, and that PSTATE indicates a 32bit mode. Cc: stable@vger.kernel.org Acked-by: Will Deacon Signed-off-by: Marc Zyngier --- arch/arm64/kvm/guest.c | 7 +++++++ virt/kvm/arm/hyp/aarch32.c | 8 ++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 23ebe51410f0..50a279d3ddd7 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -200,6 +200,13 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) } memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id)); + + if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) { + int i; + + for (i = 0; i < 16; i++) + *vcpu_reg32(vcpu, i) = (u32)*vcpu_reg32(vcpu, i); + } out: return err; } diff --git a/virt/kvm/arm/hyp/aarch32.c b/virt/kvm/arm/hyp/aarch32.c index d31f267961e7..25c0e47d57cb 100644 --- a/virt/kvm/arm/hyp/aarch32.c +++ b/virt/kvm/arm/hyp/aarch32.c @@ -125,12 +125,16 @@ static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu) */ void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) { + u32 pc = *vcpu_pc(vcpu); bool is_thumb; is_thumb = !!(*vcpu_cpsr(vcpu) & PSR_AA32_T_BIT); if (is_thumb && !is_wide_instr) - *vcpu_pc(vcpu) += 2; + pc += 2; else - *vcpu_pc(vcpu) += 4; + pc += 4; + + *vcpu_pc(vcpu) = pc; + kvm_adjust_itstate(vcpu); } -- cgit v1.2.3-59-g8ed1b From 073919e09ca445d4486968e3f851372ff44cf2b5 Mon Sep 17 00:00:00 2001 From: Jesus Ramos Date: Mon, 27 Apr 2020 06:21:39 -0700 Subject: ALSA: usb-audio: Add control message quirk delay for Kingston HyperX headset Kingston HyperX headset with 0951:16ad also needs the same quirk for delaying the frequency controls. Signed-off-by: Jesus Ramos Cc: Link: https://lore.kernel.org/r/BY5PR19MB3634BA68C7CCA23D8DF428E796AF0@BY5PR19MB3634.namprd19.prod.outlook.com Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 848a4cc25bed..d8a765be5dfe 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1636,13 +1636,14 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe, && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) msleep(20); - /* Zoom R16/24, Logitech H650e, Jabra 550a needs a tiny delay here, - * otherwise requests like get/set frequency return as failed despite - * actually succeeding. + /* Zoom R16/24, Logitech H650e, Jabra 550a, Kingston HyperX needs a tiny + * delay here, otherwise requests like get/set frequency return as + * failed despite actually succeeding. */ if ((chip->usb_id == USB_ID(0x1686, 0x00dd) || chip->usb_id == USB_ID(0x046d, 0x0a46) || - chip->usb_id == USB_ID(0x0b0e, 0x0349)) && + chip->usb_id == USB_ID(0x0b0e, 0x0349) || + chip->usb_id == USB_ID(0x0951, 0x16ad)) && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) usleep_range(1000, 2000); } -- cgit v1.2.3-59-g8ed1b From c457a273e118bb96e1db8d1825f313e6cafe4258 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 24 Apr 2020 15:32:41 +0800 Subject: drm/amdgpu: move kfd suspend after ip_suspend_phase1 This sequence change should be safe as what did in ip_suspend_phase1 is to suspend DCE only. And this is a prerequisite for coming redundant cg/pg ungate dropping. Fixes: 487eca11a321ef ("drm/amdgpu: fix gfx hang during suspend with video playback (v2)") Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f84f9e35a73b..957d4872a855 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3375,12 +3375,12 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); - amdgpu_amdkfd_suspend(adev, !fbcon); - amdgpu_ras_suspend(adev); r = amdgpu_device_ip_suspend_phase1(adev); + amdgpu_amdkfd_suspend(adev, !fbcon); + /* evict vram memory */ amdgpu_bo_evict_vram(adev); -- cgit v1.2.3-59-g8ed1b From f7b52890daba570bc8162d43c96b5583bbdd4edd Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 24 Apr 2020 15:36:22 +0800 Subject: drm/amdgpu: drop redundant cg/pg ungate on runpm enter CG/PG ungate is already performed in ip_suspend_phase1. Otherwise, the CG/PG ungate will be performed twice. That will cause gfxoff disablement is performed twice also on runpm enter while gfxoff enablemnt once on rump exit. That will put gfxoff into disabled state. Fixes: b2a7e9735ab286 ("drm/amdgpu: fix the hw hang during perform system reboot and reset") Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 957d4872a855..affde2de2a0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3372,9 +3372,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) } } - amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); - amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); - amdgpu_ras_suspend(adev); r = amdgpu_device_ip_suspend_phase1(adev); -- cgit v1.2.3-59-g8ed1b From f33a6dec4e12fda128fde8f6b2fcc2252b4b59d1 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Wed, 29 Apr 2020 18:28:01 +0200 Subject: drm/amdgpu/dc: Use WARN_ON_ONCE for ASSERT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Once should generally be enough for diagnosing what lead up to it, repeating it over and over can be pretty annoying. Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/os_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h index c34eba19860a..6d7bca562eec 100644 --- a/drivers/gpu/drm/amd/display/dc/os_types.h +++ b/drivers/gpu/drm/amd/display/dc/os_types.h @@ -108,7 +108,7 @@ #define ASSERT(expr) ASSERT_CRITICAL(expr) #else -#define ASSERT(expr) WARN_ON(!(expr)) +#define ASSERT(expr) WARN_ON_ONCE(!(expr)) #endif #define BREAK_TO_DEBUGGER() ASSERT(0) -- cgit v1.2.3-59-g8ed1b From 59dfb0c64d3853d20dc84f4561f28d4f5a2ddc7d Mon Sep 17 00:00:00 2001 From: Daniel Kolesa Date: Wed, 29 Apr 2020 17:02:36 +0200 Subject: drm/amd/display: work around fp code being emitted outside of DC_FP_START/END The dcn20_validate_bandwidth function would have code touching the incorrect registers emitted outside of the boundaries of the DC_FP_START/END macros, at least on ppc64le. Work around the problem by wrapping the whole function instead. Signed-off-by: Daniel Kolesa Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.6.x --- .../gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 31 ++++++++++++++++------ 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index c3535bd3965b..e4348e3b6389 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -3068,25 +3068,32 @@ validate_out: return out; } - -bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, - bool fast_validate) +/* + * This must be noinline to ensure anything that deals with FP registers + * is contained within this call; previously our compiling with hard-float + * would result in fp instructions being emitted outside of the boundaries + * of the DC_FP_START/END macros, which makes sense as the compiler has no + * idea about what is wrapped and what is not + * + * This is largely just a workaround to avoid breakage introduced with 5.6, + * ideally all fp-using code should be moved into its own file, only that + * should be compiled with hard-float, and all code exported from there + * should be strictly wrapped with DC_FP_START/END + */ +static noinline bool dcn20_validate_bandwidth_fp(struct dc *dc, + struct dc_state *context, bool fast_validate) { bool voltage_supported = false; bool full_pstate_supported = false; bool dummy_pstate_supported = false; double p_state_latency_us; - DC_FP_START(); p_state_latency_us = context->bw_ctx.dml.soc.dram_clock_change_latency_us; context->bw_ctx.dml.soc.disable_dram_clock_change_vactive_support = dc->debug.disable_dram_clock_change_vactive_support; if (fast_validate) { - voltage_supported = dcn20_validate_bandwidth_internal(dc, context, true); - - DC_FP_END(); - return voltage_supported; + return dcn20_validate_bandwidth_internal(dc, context, true); } // Best case, we support full UCLK switch latency @@ -3115,7 +3122,15 @@ bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, restore_dml_state: context->bw_ctx.dml.soc.dram_clock_change_latency_us = p_state_latency_us; + return voltage_supported; +} +bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, + bool fast_validate) +{ + bool voltage_supported = false; + DC_FP_START(); + voltage_supported = dcn20_validate_bandwidth_fp(dc, context, fast_validate); DC_FP_END(); return voltage_supported; } -- cgit v1.2.3-59-g8ed1b From 57c4cfd4a2eef8f94052bd7c0fce0981f74fb213 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Wed, 19 Feb 2020 09:33:29 +0000 Subject: ftrace/selftests: workaround cgroup RT scheduling issues wakeup_rt.tc and wakeup.tc tests in tracers/ subdirectory fail due to the chrt command returning: chrt: failed to set pid 0's policy: Operation not permitted. To work around this, temporarily disable grout RT scheduling during ftracetest execution. Restore original value on test run completion. With these changes in place, both tests consistently pass. Fixes: c575dea2c1a5 ("selftests/ftrace: Add wakeup_rt tracer testcase") Fixes: c1edd060b413 ("selftests/ftrace: Add wakeup tracer testcase") Signed-off-by: Alan Maguire Acked-by: Steven Rostedt (VMware) Signed-off-by: Shuah Khan --- tools/testing/selftests/ftrace/ftracetest | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index 063ecb290a5a..144308a757b7 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -29,8 +29,25 @@ err_ret=1 # kselftest skip code is 4 err_skip=4 +# cgroup RT scheduling prevents chrt commands from succeeding, which +# induces failures in test wakeup tests. Disable for the duration of +# the tests. + +readonly sched_rt_runtime=/proc/sys/kernel/sched_rt_runtime_us + +sched_rt_runtime_orig=$(cat $sched_rt_runtime) + +setup() { + echo -1 > $sched_rt_runtime +} + +cleanup() { + echo $sched_rt_runtime_orig > $sched_rt_runtime +} + errexit() { # message echo "Error: $1" 1>&2 + cleanup exit $err_ret } @@ -39,6 +56,8 @@ if [ `id -u` -ne 0 ]; then errexit "this must be run by root user" fi +setup + # Utilities absdir() { # file_path (cd `dirname $1`; pwd) @@ -235,6 +254,7 @@ TOTAL_RESULT=0 INSTANCE= CASENO=0 + testcase() { # testfile CASENO=$((CASENO+1)) desc=`grep "^#[ \t]*description:" $1 | cut -f2 -d:` @@ -406,5 +426,7 @@ prlog "# of unsupported: " `echo $UNSUPPORTED_CASES | wc -w` prlog "# of xfailed: " `echo $XFAILED_CASES | wc -w` prlog "# of undefined(test bug): " `echo $UNDEFINED_CASES | wc -w` +cleanup + # if no error, return 0 exit $TOTAL_RESULT -- cgit v1.2.3-59-g8ed1b From b730d668138cb3dd9ce78f8003986d1adae5523a Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Wed, 19 Feb 2020 09:33:30 +0000 Subject: ftrace/selftest: make unresolved cases cause failure if --fail-unresolved set Currently, ftracetest will return 1 (failure) if any unresolved cases are encountered. The unresolved status results from modules and programs not being available, and as such does not indicate any issues with ftrace itself. As such, change the behaviour of ftracetest in line with unsupported cases; if unsupported cases happen, ftracetest still returns 0 unless --fail-unsupported. Here --fail-unresolved is added and the default is to return 0 if unresolved results occur. Signed-off-by: Alan Maguire Acked-by: Masami Hiramatsu Acked-by: Steven Rostedt (VMware) Signed-off-by: Shuah Khan --- tools/testing/selftests/ftrace/ftracetest | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index 144308a757b7..19e9236dec5e 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -17,6 +17,7 @@ echo " -v|--verbose Increase verbosity of test messages" echo " -vv Alias of -v -v (Show all results in stdout)" echo " -vvv Alias of -v -v -v (Show all commands immediately)" echo " --fail-unsupported Treat UNSUPPORTED as a failure" +echo " --fail-unresolved Treat UNRESOLVED as a failure" echo " -d|--debug Debug mode (trace all shell commands)" echo " -l|--logdir Save logs on the " echo " If is -, all logs output in console only" @@ -112,6 +113,10 @@ parse_opts() { # opts UNSUPPORTED_RESULT=1 shift 1 ;; + --fail-unresolved) + UNRESOLVED_RESULT=1 + shift 1 + ;; --logdir|-l) LOG_DIR=$2 shift 2 @@ -176,6 +181,7 @@ KEEP_LOG=0 DEBUG=0 VERBOSE=0 UNSUPPORTED_RESULT=0 +UNRESOLVED_RESULT=0 STOP_FAILURE=0 # Parse command-line options parse_opts $* @@ -280,7 +286,7 @@ eval_result() { # sigval $UNRESOLVED) prlog " [${color_blue}UNRESOLVED${color_reset}]" UNRESOLVED_CASES="$UNRESOLVED_CASES $CASENO" - return 1 # this is a kind of bug.. something happened. + return $UNRESOLVED_RESULT # depends on use case ;; $UNTESTED) prlog " [${color_blue}UNTESTED${color_reset}]" -- cgit v1.2.3-59-g8ed1b From 6734d211feaec0c24f90da77313dbe704437d8a8 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 1 May 2020 22:37:41 +0900 Subject: selftests/ftrace: Make XFAIL green color Since XFAIL (Expected Failure) is expected to fail the test, which means that test case works as we expected. IOW, XFAIL is same as PASS. So make it green. Signed-off-by: Masami Hiramatsu Acked-by: Steven Rostedt (VMware) Signed-off-by: Shuah Khan --- tools/testing/selftests/ftrace/ftracetest | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index 19e9236dec5e..a4605b5ee66d 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -299,7 +299,7 @@ eval_result() { # sigval return $UNSUPPORTED_RESULT # depends on use case ;; $XFAIL) - prlog " [${color_red}XFAIL${color_reset}]" + prlog " [${color_green}XFAIL${color_reset}]" XFAILED_CASES="$XFAILED_CASES $CASENO" return 0 ;; -- cgit v1.2.3-59-g8ed1b From 66d69e081b526b6a6031f0d3ca8ddff71e5707a5 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 27 Apr 2020 18:11:07 -0600 Subject: selftests: fix kvm relocatable native/cross builds and installs kvm test Makefile doesn't fully support cross-builds and installs. UNAME_M = $(shell uname -m) variable is used to define the target programs and libraries to be built from arch specific sources in sub-directories. For cross-builds to work, UNAME_M has to map to ARCH and arch specific directories and targets in this Makefile. UNAME_M variable to used to run the compiles pointing to the right arch directories and build the right targets for these supported architectures. TEST_GEN_PROGS and LIBKVM are set using UNAME_M variable. LINUX_TOOL_ARCH_INCLUDE is set using ARCH variable. x86_64 targets are named to include x86_64 as a suffix and directories for includes are in x86_64 sub-directory. s390x and aarch64 follow the same convention. "uname -m" doesn't result in the correct mapping for s390x and aarch64. Fix it to set UNAME_M correctly for s390x and aarch64 cross-builds. In addition, Makefile doesn't create arch sub-directories in the case of relocatable builds and test programs under s390x and x86_64 directories fail to build. This is a problem for native and cross-builds. Fix it to create all necessary directories keying off of TEST_GEN_PROGS. The following use-cases work with this change: Native x86_64: make O=/tmp/kselftest -C tools/testing/selftests TARGETS=kvm install \ INSTALL_PATH=$HOME/x86_64 arm64 cross-build: make O=$HOME/arm64_build/ ARCH=arm64 HOSTCC=gcc \ CROSS_COMPILE=aarch64-linux-gnu- defconfig make O=$HOME/arm64_build/ ARCH=arm64 HOSTCC=gcc \ CROSS_COMPILE=aarch64-linux-gnu- all make kselftest-install TARGETS=kvm O=$HOME/arm64_build ARCH=arm64 \ HOSTCC=gcc CROSS_COMPILE=aarch64-linux-gnu- s390x cross-build: make O=$HOME/s390x_build/ ARCH=s390 HOSTCC=gcc \ CROSS_COMPILE=s390x-linux-gnu- defconfig make O=$HOME/s390x_build/ ARCH=s390 HOSTCC=gcc \ CROSS_COMPILE=s390x-linux-gnu- all make kselftest-install TARGETS=kvm O=$HOME/s390x_build/ ARCH=s390 \ HOSTCC=gcc CROSS_COMPILE=s390x-linux-gnu- all No regressions in the following use-cases: make -C tools/testing/selftests TARGETS=kvm make kselftest-all TARGETS=kvm Signed-off-by: Shuah Khan --- tools/testing/selftests/kvm/Makefile | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 712a2ddd2a27..b728c0a0f9b2 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -5,8 +5,34 @@ all: top_srcdir = ../../../.. KSFT_KHDR_INSTALL := 1 + +# For cross-builds to work, UNAME_M has to map to ARCH and arch specific +# directories and targets in this Makefile. "uname -m" doesn't map to +# arch specific sub-directory names. +# +# UNAME_M variable to used to run the compiles pointing to the right arch +# directories and build the right targets for these supported architectures. +# +# TEST_GEN_PROGS and LIBKVM are set using UNAME_M variable. +# LINUX_TOOL_ARCH_INCLUDE is set using ARCH variable. +# +# x86_64 targets are named to include x86_64 as a suffix and directories +# for includes are in x86_64 sub-directory. s390x and aarch64 follow the +# same convention. "uname -m" doesn't result in the correct mapping for +# s390x and aarch64. +# +# No change necessary for x86_64 UNAME_M := $(shell uname -m) +# Set UNAME_M for arm64 compile/install to work +ifeq ($(ARCH),arm64) + UNAME_M := aarch64 +endif +# Set UNAME_M s390x compile/install to work +ifeq ($(ARCH),s390) + UNAME_M := s390x +endif + LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c @@ -53,7 +79,7 @@ LIBKVM += $(LIBKVM_$(UNAME_M)) INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include -LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/x86/include +LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ @@ -84,6 +110,7 @@ $(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c $(OUTPUT)/libkvm.a: $(LIBKVM_OBJ) $(AR) crs $@ $^ +x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))) all: $(STATIC_LIBS) $(TEST_GEN_PROGS): $(STATIC_LIBS) -- cgit v1.2.3-59-g8ed1b From fdc63ff0e49c588884992b4b2656345a5e878b32 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 8 Apr 2020 21:13:10 +0300 Subject: ftrace/x86: Fix trace event registration for syscalls without arguments The refactoring of SYSCALL_DEFINE0() macros removed the ABI stubs and simply defines __abi_sys_$NAME as alias of __do_sys_$NAME. As a result kallsyms_lookup() returns "__do_sys_$NAME" which does not match with the declared trace event name. See also commit 1c758a2202a6 ("tracing/x86: Update syscall trace events to handle new prefixed syscall func names"). Add __do_sys_ to the valid prefixes which are checked in arch_syscall_match_sym_name(). Fixes: d2b5de495ee9 ("x86/entry: Refactor SYSCALL_DEFINE0 macros") Signed-off-by: Konstantin Khlebnikov Signed-off-by: Thomas Gleixner Acked-by: Steven Rostedt (VMware) Link: https://lkml.kernel.org/r/158636958997.7900.16485049455470033557.stgit@buzz --- arch/x86/include/asm/ftrace.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 85be2f506272..70b96cae5b42 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -61,11 +61,12 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name { /* * Compare the symbol name with the system call name. Skip the - * "__x64_sys", "__ia32_sys" or simple "sys" prefix. + * "__x64_sys", "__ia32_sys", "__do_sys" or simple "sys" prefix. */ return !strcmp(sym + 3, name + 3) || (!strncmp(sym, "__x64_", 6) && !strcmp(sym + 9, name + 3)) || - (!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3)); + (!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3)) || + (!strncmp(sym, "__do_sys", 8) && !strcmp(sym + 8, name + 3)); } #ifndef COMPILE_OFFSETS -- cgit v1.2.3-59-g8ed1b From c84cb3735fd53c91101ccdb191f2e3331a9262cb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 27 Apr 2020 16:55:57 +0200 Subject: x86/apic: Move TSC deadline timer debug printk Leon reported that the printk_once() in __setup_APIC_LVTT() triggers a lockdep splat due to a lock order violation between hrtimer_base::lock and console_sem, when the 'once' condition is reset via /sys/kernel/debug/clear_warn_once after boot. The initial printk cannot trigger this because that happens during boot when the local APIC timer is set up on the boot CPU. Prevent it by moving the printk to a place which is guaranteed to be only called once during boot. Mark the deadline timer check related functions and data __init while at it. Reported-by: Leon Romanovsky Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/87y2qhoshi.fsf@nanos.tec.linutronix.de --- arch/x86/kernel/apic/apic.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 81b9c63dae1b..e53dda210cd7 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -352,8 +352,6 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) * According to Intel, MFENCE can do the serialization here. */ asm volatile("mfence" : : : "memory"); - - printk_once(KERN_DEBUG "TSC deadline timer enabled\n"); return; } @@ -546,7 +544,7 @@ static struct clock_event_device lapic_clockevent = { }; static DEFINE_PER_CPU(struct clock_event_device, lapic_events); -static u32 hsx_deadline_rev(void) +static __init u32 hsx_deadline_rev(void) { switch (boot_cpu_data.x86_stepping) { case 0x02: return 0x3a; /* EP */ @@ -556,7 +554,7 @@ static u32 hsx_deadline_rev(void) return ~0U; } -static u32 bdx_deadline_rev(void) +static __init u32 bdx_deadline_rev(void) { switch (boot_cpu_data.x86_stepping) { case 0x02: return 0x00000011; @@ -568,7 +566,7 @@ static u32 bdx_deadline_rev(void) return ~0U; } -static u32 skx_deadline_rev(void) +static __init u32 skx_deadline_rev(void) { switch (boot_cpu_data.x86_stepping) { case 0x03: return 0x01000136; @@ -581,7 +579,7 @@ static u32 skx_deadline_rev(void) return ~0U; } -static const struct x86_cpu_id deadline_match[] = { +static const struct x86_cpu_id deadline_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL( HASWELL_X, &hsx_deadline_rev), X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X, 0x0b000020), X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_D, &bdx_deadline_rev), @@ -603,18 +601,19 @@ static const struct x86_cpu_id deadline_match[] = { {}, }; -static void apic_check_deadline_errata(void) +static __init bool apic_validate_deadline_timer(void) { const struct x86_cpu_id *m; u32 rev; - if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) || - boot_cpu_has(X86_FEATURE_HYPERVISOR)) - return; + if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) + return false; + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return true; m = x86_match_cpu(deadline_match); if (!m) - return; + return true; /* * Function pointers will have the MSB set due to address layout, @@ -626,11 +625,12 @@ static void apic_check_deadline_errata(void) rev = (u32)m->driver_data; if (boot_cpu_data.microcode >= rev) - return; + return true; setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; " "please update microcode to version: 0x%x (or later)\n", rev); + return false; } /* @@ -2092,7 +2092,8 @@ void __init init_apic_mappings(void) { unsigned int new_apicid; - apic_check_deadline_errata(); + if (apic_validate_deadline_timer()) + pr_debug("TSC deadline timer available\n"); if (x2apic_mode) { boot_cpu_physical_apicid = read_apic_id(); -- cgit v1.2.3-59-g8ed1b From 1034872123a06b759aba772b1c99612ccb8e632a Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 3 May 2020 13:57:18 +0900 Subject: ALSA: firewire-lib: fix 'function sizeof not defined' error of tracepoints format The snd-firewire-lib.ko has 'amdtp-packet' event of tracepoints. Current printk format for the event includes 'sizeof(u8)' macro expected to be extended in compilation time. However, this is not done. As a result, perf tools cannot parse the event for printing: $ mount -l -t debugfs debugfs on /sys/kernel/debug type debugfs (rw,nosuid,nodev,noexec,relatime) $ cat /sys/kernel/debug/tracing/events/snd_firewire_lib/amdtp_packet/format ... print fmt: "%02u %04u %04x %04x %02d %03u %02u %03u %02u %01u %02u %s", REC->second, REC->cycle, REC->src, REC->dest, REC->channel, REC->payload_quadlets, REC->data_blocks, REC->data_block_counter, REC->packet_index, REC->irq, REC->index, __print_array(__get_dynamic_array(cip_header), __get_dynamic_array_len(cip_header), sizeof(u8)) $ sudo perf record -e snd_firewire_lib:amdtp_packet [snd_firewire_lib:amdtp_packet] function sizeof not defined Error: expected type 5 but read 0 This commit fixes it by obsoleting the macro with actual size. Cc: Fixes: bde2bbdb307a ("ALSA: firewire-lib: use dynamic array for CIP header of tracing events") Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20200503045718.86337-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- sound/firewire/amdtp-stream-trace.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/firewire/amdtp-stream-trace.h b/sound/firewire/amdtp-stream-trace.h index 16c7f6605511..26e7cb555d3c 100644 --- a/sound/firewire/amdtp-stream-trace.h +++ b/sound/firewire/amdtp-stream-trace.h @@ -66,8 +66,7 @@ TRACE_EVENT(amdtp_packet, __entry->irq, __entry->index, __print_array(__get_dynamic_array(cip_header), - __get_dynamic_array_len(cip_header), - sizeof(u8))) + __get_dynamic_array_len(cip_header), 1)) ); #endif -- cgit v1.2.3-59-g8ed1b From fb9cbbc895eb6e986dc90c928a35c793d75f435a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 28 Apr 2020 02:16:40 -0500 Subject: x86/unwind/orc: Move ORC sorting variables under !CONFIG_MODULES Fix the following warnings seen with !CONFIG_MODULES: arch/x86/kernel/unwind_orc.c:29:26: warning: 'cur_orc_table' defined but not used [-Wunused-variable] 29 | static struct orc_entry *cur_orc_table = __start_orc_unwind; | ^~~~~~~~~~~~~ arch/x86/kernel/unwind_orc.c:28:13: warning: 'cur_orc_ip_table' defined but not used [-Wunused-variable] 28 | static int *cur_orc_ip_table = __start_orc_unwind_ip; | ^~~~~~~~~~~~~~~~ Fixes: 153eb2223c79 ("x86/unwind/orc: Convert global variables to static") Reported-by: Stephen Rothwell Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linux Next Mailing List Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lore.kernel.org/r/20200428071640.psn5m7eh3zt2in4v@treble --- arch/x86/kernel/unwind_orc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 0ebc11a8bb45..5b0bd8581fe6 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -24,10 +24,6 @@ extern struct orc_entry __stop_orc_unwind[]; static bool orc_init __ro_after_init; static unsigned int lookup_num_blocks __ro_after_init; -static DEFINE_MUTEX(sort_mutex); -static int *cur_orc_ip_table = __start_orc_unwind_ip; -static struct orc_entry *cur_orc_table = __start_orc_unwind; - static inline unsigned long orc_ip(const int *ip) { return (unsigned long)ip + *ip; @@ -192,6 +188,10 @@ static struct orc_entry *orc_find(unsigned long ip) #ifdef CONFIG_MODULES +static DEFINE_MUTEX(sort_mutex); +static int *cur_orc_ip_table = __start_orc_unwind_ip; +static struct orc_entry *cur_orc_table = __start_orc_unwind; + static void orc_sort_swap(void *_a, void *_b, int size) { struct orc_entry *orc_a, *orc_b; -- cgit v1.2.3-59-g8ed1b From f41224efcf8aafe80ea47ac870c5e32f3209ffc8 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Sun, 3 May 2020 23:24:46 +0800 Subject: Revert "ALSA: hda/realtek: Fix pop noise on ALC225" This reverts commit 3b36b13d5e69d6f51ff1c55d1b404a74646c9757. Enable power save node breaks some systems with ACL225. Revert the patch and use a platform specific quirk for the original issue isntead. Fixes: 3b36b13d5e69 ("ALSA: hda/realtek: Fix pop noise on ALC225") BugLink: https://bugs.launchpad.net/bugs/1875916 Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20200503152449.22761-1-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c16f63957c5a..7bb025fb120a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8209,8 +8209,6 @@ static int patch_alc269(struct hda_codec *codec) spec->gen.mixer_nid = 0; break; case 0x10ec0225: - codec->power_save_node = 1; - /* fall through */ case 0x10ec0295: case 0x10ec0299: spec->codec_variant = ALC269_TYPE_ALC225; -- cgit v1.2.3-59-g8ed1b From 52e4e36807aeac1cdd07b14e509c8a64101e1a09 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Sun, 3 May 2020 23:24:47 +0800 Subject: ALSA: hda/realtek - Fix S3 pop noise on Dell Wyse Commit 317d9313925c ("ALSA: hda/realtek - Set default power save node to 0") makes the ALC225 have pop noise on S3 resume and cold boot. The previous fix enable power save node universally for ALC225, however it makes some ALC225 systems unable to produce any sound. So let's only enable power save node for the affected Dell Wyse platform. Fixes: 317d9313925c ("ALSA: hda/realtek - Set default power save node to 0") BugLink: https://bugs.launchpad.net/bugs/1866357 Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20200503152449.22761-2-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 7bb025fb120a..1f4b9e387d9f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5856,6 +5856,15 @@ static void alc233_alc662_fixup_lenovo_dual_codecs(struct hda_codec *codec, } } +static void alc225_fixup_s3_pop_noise(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + if (action != HDA_FIXUP_ACT_PRE_PROBE) + return; + + codec->power_save_node = 1; +} + /* Forcibly assign NID 0x03 to HP/LO while NID 0x02 to SPK for EQ */ static void alc274_fixup_bind_dacs(struct hda_codec *codec, const struct hda_fixup *fix, int action) @@ -6045,6 +6054,7 @@ enum { ALC233_FIXUP_ACER_HEADSET_MIC, ALC294_FIXUP_LENOVO_MIC_LOCATION, ALC225_FIXUP_DELL_WYSE_MIC_NO_PRESENCE, + ALC225_FIXUP_S3_POP_NOISE, ALC700_FIXUP_INTEL_REFERENCE, ALC274_FIXUP_DELL_BIND_DACS, ALC274_FIXUP_DELL_AIO_LINEOUT_VERB, @@ -6932,6 +6942,12 @@ static const struct hda_fixup alc269_fixups[] = { { } }, .chained = true, + .chain_id = ALC225_FIXUP_S3_POP_NOISE + }, + [ALC225_FIXUP_S3_POP_NOISE] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc225_fixup_s3_pop_noise, + .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC }, [ALC700_FIXUP_INTEL_REFERENCE] = { -- cgit v1.2.3-59-g8ed1b From da7a8f1a8fc3e14c6dcc52b4098bddb8f20390be Mon Sep 17 00:00:00 2001 From: Andrew Oakley Date: Sun, 3 May 2020 15:16:39 +0100 Subject: ALSA: usb-audio: add mapping for ASRock TRX40 Creator This is another TRX40 based motherboard with ALC1220-VB USB-audio that requires a static mapping table. This motherboard also has a PCI device which advertises no codecs. The PCI ID is 1022:1487 and PCI SSID is 1022:d102. As this is using the AMD vendor ID, don't blacklist for now in case other boards have a working audio device with the same ssid. alsa-info.sh report for this board: http://alsa-project.org/db/?f=0a742f89066527497b77ce16bca486daccf8a70c Signed-off-by: Andrew Oakley Link: https://lore.kernel.org/r/20200503141639.35519-1-andrew@adoakley.name Signed-off-by: Takashi Iwai --- sound/usb/mixer_maps.c | 5 +++++ sound/usb/quirks-table.h | 1 + 2 files changed, 6 insertions(+) diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index 0260c750e156..bfdc6ad52785 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -549,6 +549,11 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = { .map = trx40_mobo_map, .connector_map = trx40_mobo_connector_map, }, + { /* Asrock TRX40 Creator */ + .id = USB_ID(0x26ce, 0x0a01), + .map = trx40_mobo_map, + .connector_map = trx40_mobo_connector_map, + }, { 0 } /* terminator */ }; diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index a1df4c5b4f8c..6313c30f5c85 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -3563,6 +3563,7 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), ALC1220_VB_DESKTOP(0x0414, 0xa002), /* Gigabyte TRX40 Aorus Pro WiFi */ ALC1220_VB_DESKTOP(0x0db0, 0x0d64), /* MSI TRX40 Creator */ ALC1220_VB_DESKTOP(0x0db0, 0x543d), /* MSI TRX40 */ +ALC1220_VB_DESKTOP(0x26ce, 0x0a01), /* Asrock TRX40 Creator */ #undef ALC1220_VB_DESKTOP #undef USB_DEVICE_VENDOR_SPEC -- cgit v1.2.3-59-g8ed1b From c3e2850a9b68a62e7949633102d0351773846136 Mon Sep 17 00:00:00 2001 From: Gurchetan Singh Date: Fri, 1 May 2020 11:55:57 -0700 Subject: drm/virtio: create context before RESOURCE_CREATE_2D in 3D mode If 3D is enabled, but userspace requests a dumb buffer, we will call CTX_ATTACH_RESOURCE before actually creating the context. Fixes: 72b48ae800da ("drm/virtio: enqueue virtio_gpu_create_context after the first 3D ioctl") Signed-off-by: Gurchetan Singh Link: http://patchwork.freedesktop.org/patch/msgid/20200501185557.740-1-gurchetansingh@chromium.org Signed-off-by: Gerd Hoffmann --- drivers/gpu/drm/virtio/virtgpu_drv.h | 1 + drivers/gpu/drm/virtio/virtgpu_gem.c | 3 +++ drivers/gpu/drm/virtio/virtgpu_ioctl.c | 3 +-- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index c1824bdf2418..7879ff58236f 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -221,6 +221,7 @@ struct virtio_gpu_fpriv { /* virtio_ioctl.c */ #define DRM_VIRTIO_NUM_IOCTLS 10 extern struct drm_ioctl_desc virtio_gpu_ioctls[DRM_VIRTIO_NUM_IOCTLS]; +void virtio_gpu_create_context(struct drm_device *dev, struct drm_file *file); /* virtio_kms.c */ int virtio_gpu_init(struct drm_device *dev); diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c index 0d6152c99a27..f0d5a8974677 100644 --- a/drivers/gpu/drm/virtio/virtgpu_gem.c +++ b/drivers/gpu/drm/virtio/virtgpu_gem.c @@ -39,6 +39,9 @@ int virtio_gpu_gem_create(struct drm_file *file, int ret; u32 handle; + if (vgdev->has_virgl_3d) + virtio_gpu_create_context(dev, file); + ret = virtio_gpu_object_create(vgdev, params, &obj, NULL); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index 336cc9143205..70ba13e8e58a 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -33,8 +33,7 @@ #include "virtgpu_drv.h" -static void virtio_gpu_create_context(struct drm_device *dev, - struct drm_file *file) +void virtio_gpu_create_context(struct drm_device *dev, struct drm_file *file) { struct virtio_gpu_device *vgdev = dev->dev_private; struct virtio_gpu_fpriv *vfpriv = file->driver_priv; -- cgit v1.2.3-59-g8ed1b From 092a9f59bc05904d4555fa012db12e768734ba1a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 30 Apr 2020 18:39:04 -0700 Subject: Revert "tty: serial: bcm63xx: fix missing clk_put() in bcm63xx_uart" This reverts commit 580d952e44de5509c69c8f9346180ecaa78ebeec ("tty: serial: bcm63xx: fix missing clk_put() in bcm63xx_uart") because we should not be doing a clk_put() if we were not successful in getting a valid clock reference via clk_get() in the first place. Fixes: 580d952e44de ("tty: serial: bcm63xx: fix missing clk_put() in bcm63xx_uart") Signed-off-by: Florian Fainelli Cc: stable Link: https://lore.kernel.org/r/20200501013904.1394-1-f.fainelli@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/bcm63xx_uart.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/tty/serial/bcm63xx_uart.c b/drivers/tty/serial/bcm63xx_uart.c index ed0aa5c0d9b7..5674da2b76f0 100644 --- a/drivers/tty/serial/bcm63xx_uart.c +++ b/drivers/tty/serial/bcm63xx_uart.c @@ -843,10 +843,8 @@ static int bcm_uart_probe(struct platform_device *pdev) if (IS_ERR(clk) && pdev->dev.of_node) clk = of_clk_get(pdev->dev.of_node, 0); - if (IS_ERR(clk)) { - clk_put(clk); + if (IS_ERR(clk)) return -ENODEV; - } port->iotype = UPIO_MEM; port->irq = res_irq->start; -- cgit v1.2.3-59-g8ed1b From 57d38f26d81e4275748b69372f31df545dcd9b71 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Sat, 2 May 2020 11:01:07 -0400 Subject: vt: fix unicode console freeing with a common interface By directly using kfree() in different places we risk missing one if it is switched to using vfree(), especially if the corresponding vmalloc() is hidden away within a common abstraction. Oh wait, that's exactly what happened here. So let's fix this by creating a common abstraction for the free case as well. Signed-off-by: Nicolas Pitre Reported-by: syzbot+0bfda3ade1ee9288a1be@syzkaller.appspotmail.com Fixes: 9a98e7a80f95 ("vt: don't use kmalloc() for the unicode screen buffer") Cc: Reviewed-by: Sam Ravnborg Link: https://lore.kernel.org/r/nycvar.YSQ.7.76.2005021043110.2671@knanqh.ubzr Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index e5ffed795e4c..48a8199f7845 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -365,9 +365,14 @@ static struct uni_screen *vc_uniscr_alloc(unsigned int cols, unsigned int rows) return uniscr; } +static void vc_uniscr_free(struct uni_screen *uniscr) +{ + vfree(uniscr); +} + static void vc_uniscr_set(struct vc_data *vc, struct uni_screen *new_uniscr) { - vfree(vc->vc_uni_screen); + vc_uniscr_free(vc->vc_uni_screen); vc->vc_uni_screen = new_uniscr; } @@ -1230,7 +1235,7 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc, err = resize_screen(vc, new_cols, new_rows, user); if (err) { kfree(newscreen); - kfree(new_uniscr); + vc_uniscr_free(new_uniscr); return err; } -- cgit v1.2.3-59-g8ed1b From c59359a02d14a7256cd508a4886b7d2012df2363 Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Mon, 4 May 2020 08:35:12 +0200 Subject: drm: ingenic-drm: add MODULE_DEVICE_TABLE so that the driver can load by matching the device tree if compiled as module. Cc: stable@vger.kernel.org # v5.3+ Fixes: 90b86fcc47b4 ("DRM: Add KMS driver for the Ingenic JZ47xx SoCs") Signed-off-by: H. Nikolaus Schaller Signed-off-by: Paul Cercueil Link: https://patchwork.freedesktop.org/patch/msgid/1694a29b7a3449b6b662cec33d1b33f2ee0b174a.1588574111.git.hns@goldelico.com --- drivers/gpu/drm/ingenic/ingenic-drm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/ingenic/ingenic-drm.c b/drivers/gpu/drm/ingenic/ingenic-drm.c index 9dfe7cb530e1..1754c0547069 100644 --- a/drivers/gpu/drm/ingenic/ingenic-drm.c +++ b/drivers/gpu/drm/ingenic/ingenic-drm.c @@ -843,6 +843,7 @@ static const struct of_device_id ingenic_drm_of_match[] = { { .compatible = "ingenic,jz4770-lcd", .data = &jz4770_soc_info }, { /* sentinel */ }, }; +MODULE_DEVICE_TABLE(of, ingenic_drm_of_match); static struct platform_driver ingenic_drm_driver = { .driver = { -- cgit v1.2.3-59-g8ed1b From 3a3a71f97c30983f1627c2c550d43566e9b634d2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 Apr 2020 23:50:51 +0200 Subject: sun6i: dsi: fix gcc-4.8 Older compilers warn about initializers with incorrect curly braces: drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c: In function 'sun6i_dsi_encoder_enable': drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c:720:8: error: missing braces around initializer [-Werror=missing-braces] union phy_configure_opts opts = { 0 }; ^ drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c:720:8: error: (near initialization for 'opts.mipi_dphy') [-Werror=missing-braces] Use the GNU empty initializer extension to avoid this. Fixes: bb3b6fcb6849 ("sun6i: dsi: Convert to generic phy handling") Reviewed-by: Paul Kocialkowski Signed-off-by: Arnd Bergmann Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20200428215105.3928459-1-arnd@arndb.de --- drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c index 059939789730..3eb89f1eb0e1 100644 --- a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c +++ b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c @@ -717,7 +717,7 @@ static void sun6i_dsi_encoder_enable(struct drm_encoder *encoder) struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode; struct sun6i_dsi *dsi = encoder_to_sun6i_dsi(encoder); struct mipi_dsi_device *device = dsi->device; - union phy_configure_opts opts = { 0 }; + union phy_configure_opts opts = { }; struct phy_configure_opts_mipi_dphy *cfg = &opts.mipi_dphy; u16 delay; int err; -- cgit v1.2.3-59-g8ed1b From d8f1b9716cfd1a1f74c0fedad40c5f65a25aa208 Mon Sep 17 00:00:00 2001 From: Xiaoguang Wang Date: Sun, 26 Apr 2020 15:54:43 +0800 Subject: io_uring: fix mismatched finish_wait() calls in io_uring_cancel_files() The prepare_to_wait() and finish_wait() calls in io_uring_cancel_files() are mismatched. Currently I don't see any issues related this bug, just find it by learning codes. Signed-off-by: Xiaoguang Wang Signed-off-by: Jens Axboe --- fs/io_uring.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 0b91b0631173..6d52ff98279d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7360,11 +7360,9 @@ static int io_uring_release(struct inode *inode, struct file *file) static void io_uring_cancel_files(struct io_ring_ctx *ctx, struct files_struct *files) { - struct io_kiocb *req; - DEFINE_WAIT(wait); - while (!list_empty_careful(&ctx->inflight_list)) { - struct io_kiocb *cancel_req = NULL; + struct io_kiocb *cancel_req = NULL, *req; + DEFINE_WAIT(wait); spin_lock_irq(&ctx->inflight_lock); list_for_each_entry(req, &ctx->inflight_list, inflight_entry) { @@ -7404,6 +7402,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, */ if (refcount_sub_and_test(2, &cancel_req->refs)) { io_put_req(cancel_req); + finish_wait(&ctx->inflight_wait, &wait); continue; } } @@ -7411,8 +7410,8 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, io_wq_cancel_work(ctx->io_wq, &cancel_req->work); io_put_req(cancel_req); schedule(); + finish_wait(&ctx->inflight_wait, &wait); } - finish_wait(&ctx->inflight_wait, &wait); } static int io_uring_flush(struct file *file, void *data) -- cgit v1.2.3-59-g8ed1b From f9336e3281880b683137bc18f91848ac34af84c3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 4 May 2020 08:35:06 -0700 Subject: KVM: nVMX: Replace a BUG_ON(1) with BUG() to squash clang warning Use BUG() in the impossible-to-hit default case when switching on the scope of INVEPT to squash a warning with clang 11 due to clang treating the BUG_ON() as conditional. >> arch/x86/kvm/vmx/nested.c:5246:3: warning: variable 'roots_to_free' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] BUG_ON(1); Reported-by: kbuild test robot Fixes: ce8fe7b77bd8 ("KVM: nVMX: Free only the affected contexts when emulating INVEPT") Signed-off-by: Sean Christopherson Message-Id: <20200504153506.28898-1-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index fd78ffbde644..e44f33c82332 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5165,7 +5165,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) */ break; default: - BUG_ON(1); + BUG(); break; } -- cgit v1.2.3-59-g8ed1b From dee919d15dcf70f4ce84b7da9b77bdc1c307454c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 4 May 2020 09:34:10 -0400 Subject: KVM: SVM: fill in kvm_run->debug.arch.dr[67] The corresponding code was added for VMX in commit 42dbaa5a057 ("KVM: x86: Virtualize debug registers, 2008-12-15) but never for AMD. Fix this. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 2f379bacbb26..38f6aeefeb55 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1752,6 +1752,8 @@ static int db_interception(struct vcpu_svm *svm) if (svm->vcpu.guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) { kvm_run->exit_reason = KVM_EXIT_DEBUG; + kvm_run->debug.arch.dr6 = svm->vmcb->save.dr6; + kvm_run->debug.arch.dr7 = svm->vmcb->save.dr7; kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; kvm_run->debug.arch.exception = DB_VECTOR; -- cgit v1.2.3-59-g8ed1b From 637543a8d61c6afe4e9be64bfb43c78701a83375 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 7 Apr 2020 01:13:09 -0500 Subject: KVM: x86: Fixes posted interrupt check for IRQs delivery modes Current logic incorrectly uses the enum ioapic_irq_destination_types to check the posted interrupt destination types. However, the value was set using APIC_DM_XXX macros, which are left-shifted by 8 bits. Fixes by using the APIC_DM_FIXED and APIC_DM_LOWEST instead. Fixes: (fdcf75621375 'KVM: x86: Disable posted interrupts for non-standard IRQs delivery modes') Cc: Alexander Graf Signed-off-by: Suravee Suthikulpanit Message-Id: <1586239989-58305-1-git-send-email-suravee.suthikulpanit@amd.com> Reviewed-by: Maxim Levitsky Tested-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 42a2d0d3984a..0dea9f122bb9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1663,8 +1663,8 @@ void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq) { /* We can only post Fixed and LowPrio IRQs */ - return (irq->delivery_mode == dest_Fixed || - irq->delivery_mode == dest_LowestPrio); + return (irq->delivery_mode == APIC_DM_FIXED || + irq->delivery_mode == APIC_DM_LOWEST); } static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) -- cgit v1.2.3-59-g8ed1b From 78d6de3cfbd342918d31cf68d0d2eda401338aef Mon Sep 17 00:00:00 2001 From: Matt Jolly Date: Sun, 3 May 2020 01:03:47 +1000 Subject: USB: serial: qcserial: Add DW5816e support Add support for Dell Wireless 5816e to drivers/usb/serial/qcserial.c Signed-off-by: Matt Jolly Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/qcserial.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 613f91add03d..ce0401d3137f 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -173,6 +173,7 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x413c, 0x81b3)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */ {DEVICE_SWI(0x413c, 0x81b5)}, /* Dell Wireless 5811e QDL */ {DEVICE_SWI(0x413c, 0x81b6)}, /* Dell Wireless 5811e QDL */ + {DEVICE_SWI(0x413c, 0x81cc)}, /* Dell Wireless 5816e */ {DEVICE_SWI(0x413c, 0x81cf)}, /* Dell Wireless 5819 */ {DEVICE_SWI(0x413c, 0x81d0)}, /* Dell Wireless 5819 */ {DEVICE_SWI(0x413c, 0x81d1)}, /* Dell Wireless 5818 */ -- cgit v1.2.3-59-g8ed1b From 8be8f932e3db5fe4ed178b8892eeffeab530273a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 4 May 2020 12:19:45 -0400 Subject: kvm: ioapic: Restrict lazy EOI update to edge-triggered interrupts Commit f458d039db7e ("kvm: ioapic: Lazy update IOAPIC EOI") introduces the following infinite loop: BUG: stack guard page was hit at 000000008f595917 \ (stack is 00000000bdefe5a4..00000000ae2b06f5) kernel stack overflow (double-fault): 0000 [#1] SMP NOPTI RIP: 0010:kvm_set_irq+0x51/0x160 [kvm] Call Trace: irqfd_resampler_ack+0x32/0x90 [kvm] kvm_notify_acked_irq+0x62/0xd0 [kvm] kvm_ioapic_update_eoi_one.isra.0+0x30/0x120 [kvm] ioapic_set_irq+0x20e/0x240 [kvm] kvm_ioapic_set_irq+0x5c/0x80 [kvm] kvm_set_irq+0xbb/0x160 [kvm] ? kvm_hv_set_sint+0x20/0x20 [kvm] irqfd_resampler_ack+0x32/0x90 [kvm] kvm_notify_acked_irq+0x62/0xd0 [kvm] kvm_ioapic_update_eoi_one.isra.0+0x30/0x120 [kvm] ioapic_set_irq+0x20e/0x240 [kvm] kvm_ioapic_set_irq+0x5c/0x80 [kvm] kvm_set_irq+0xbb/0x160 [kvm] ? kvm_hv_set_sint+0x20/0x20 [kvm] .... The re-entrancy happens because the irq state is the OR of the interrupt state and the resamplefd state. That is, we don't want to show the state as 0 until we've had a chance to set the resamplefd. But if the interrupt has _not_ gone low then ioapic_set_irq is invoked again, causing an infinite loop. This can only happen for a level-triggered interrupt, otherwise irqfd_inject would immediately set the KVM_USERSPACE_IRQ_SOURCE_ID high and then low. Fortunately, in the case of level-triggered interrupts the VMEXIT already happens because TMR is set. Thus, fix the bug by restricting the lazy invocation of the ack notifier to edge-triggered interrupts, the only ones that need it. Tested-by: Suravee Suthikulpanit Reported-by: borisvk@bstnet.org Suggested-by: Paolo Bonzini Link: https://www.spinics.net/lists/kvm/msg213512.html Fixes: f458d039db7e ("kvm: ioapic: Lazy update IOAPIC EOI") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207489 Signed-off-by: Paolo Bonzini --- arch/x86/kvm/ioapic.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 750ff0b29404..d057376bd3d3 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -225,12 +225,12 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, } /* - * AMD SVM AVIC accelerate EOI write and do not trap, - * in-kernel IOAPIC will not be able to receive the EOI. - * In this case, we do lazy update of the pending EOI when - * trying to set IOAPIC irq. + * AMD SVM AVIC accelerate EOI write iff the interrupt is edge + * triggered, in which case the in-kernel IOAPIC will not be able + * to receive the EOI. In this case, we do a lazy update of the + * pending EOI when trying to set IOAPIC irq. */ - if (kvm_apicv_activated(ioapic->kvm)) + if (edge && kvm_apicv_activated(ioapic->kvm)) ioapic_lazy_update_eoi(ioapic, irq); /* -- cgit v1.2.3-59-g8ed1b From 2ae11c46d5fdc46cb396e35911c713d271056d35 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Mon, 4 May 2020 16:27:28 +0200 Subject: tty: xilinx_uartps: Fix missing id assignment to the console When serial console has been assigned to ttyPS1 (which is serial1 alias) console index is not updated property and pointing to index -1 (statically initialized) which ends up in situation where nothing has been printed on the port. The commit 18cc7ac8a28e ("Revert "serial: uartps: Register own uart console and driver structures"") didn't contain this line which was removed by accident. Fixes: 18cc7ac8a28e ("Revert "serial: uartps: Register own uart console and driver structures"") Signed-off-by: Shubhrajyoti Datta Cc: stable Signed-off-by: Michal Simek Link: https://lore.kernel.org/r/ed3111533ef5bd342ee5ec504812240b870f0853.1588602446.git.michal.simek@xilinx.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/xilinx_uartps.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index ac137b6a1dc1..35e9e8faf8de 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -1459,6 +1459,7 @@ static int cdns_uart_probe(struct platform_device *pdev) cdns_uart_uart_driver.nr = CDNS_UART_NR_PORTS; #ifdef CONFIG_SERIAL_XILINX_PS_UART_CONSOLE cdns_uart_uart_driver.cons = &cdns_uart_console; + cdns_uart_console.index = id; #endif rc = uart_register_driver(&cdns_uart_uart_driver); -- cgit v1.2.3-59-g8ed1b From 37e31d2d26a4124506c24e95434e9baf3405a23a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 22 Apr 2020 12:22:11 +0300 Subject: i40iw: Fix error handling in i40iw_manage_arp_cache() The i40iw_arp_table() function can return -EOVERFLOW if i40iw_alloc_resource() fails so we can't just test for "== -1". Fixes: 4e9042e647ff ("i40iw: add hw and utils files") Link: https://lore.kernel.org/r/20200422092211.GA195357@mwanda Signed-off-by: Dan Carpenter Acked-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 55a1fbf0e670..ae8b97c30665 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -534,7 +534,7 @@ void i40iw_manage_arp_cache(struct i40iw_device *iwdev, int arp_index; arp_index = i40iw_arp_table(iwdev, ip_addr, ipv4, mac_addr, action); - if (arp_index == -1) + if (arp_index < 0) return; cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false); if (!cqp_request) -- cgit v1.2.3-59-g8ed1b From 0fa8263367db9287aa0632f96c1a5f93cc478150 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 28 Apr 2020 08:10:22 -0400 Subject: ceph: fix endianness bug when handling MDS session feature bits Eduard reported a problem mounting cephfs on s390 arch. The feature mask sent by the MDS is little-endian, so we need to convert it before storing and testing against it. Cc: stable@vger.kernel.org Reported-and-Tested-by: Eduard Shishkin Signed-off-by: Jeff Layton Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 486f91f9685b..7c63abf5bea9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3251,8 +3251,7 @@ static void handle_session(struct ceph_mds_session *session, void *end = p + msg->front.iov_len; struct ceph_mds_session_head *h; u32 op; - u64 seq; - unsigned long features = 0; + u64 seq, features = 0; int wake = 0; bool blacklisted = false; @@ -3271,9 +3270,8 @@ static void handle_session(struct ceph_mds_session *session, goto bad; /* version >= 3, feature bits */ ceph_decode_32_safe(&p, end, len, bad); - ceph_decode_need(&p, end, len, bad); - memcpy(&features, p, min_t(size_t, len, sizeof(features))); - p += len; + ceph_decode_64_safe(&p, end, features, bad); + p += len - sizeof(features); } mutex_lock(&mdsc->mutex); -- cgit v1.2.3-59-g8ed1b From 7d8976afad18d4548ee472e526b126ab74012807 Mon Sep 17 00:00:00 2001 From: Wu Bo Date: Wed, 29 Apr 2020 10:01:55 +0800 Subject: ceph: fix special error code in ceph_try_get_caps() There are 3 speical error codes: -EAGAIN/-EFBIG/-ESTALE. After calling try_get_cap_refs, ceph_try_get_caps test for the -EAGAIN twice. Ensure that it tests for -ESTALE instead. Signed-off-by: Wu Bo Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 185db76300b3..1a8e20ef35bf 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2749,7 +2749,7 @@ int ceph_try_get_caps(struct inode *inode, int need, int want, ret = try_get_cap_refs(inode, need, want, 0, flags, got); /* three special error codes */ - if (ret == -EAGAIN || ret == -EFBIG || ret == -EAGAIN) + if (ret == -EAGAIN || ret == -EFBIG || ret == -ESTALE) ret = 0; return ret; } -- cgit v1.2.3-59-g8ed1b From 4d8e28ff3106b093d98bfd2eceb9b430c70a8758 Mon Sep 17 00:00:00 2001 From: Wu Bo Date: Thu, 30 Apr 2020 14:12:49 +0800 Subject: ceph: fix double unlock in handle_cap_export() If the ceph_mdsc_open_export_target_session() return fails, it will do a "goto retry", but the session mutex has already been unlocked. Re-lock the mutex in that case to ensure that we don't unlock it twice. Signed-off-by: Wu Bo Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 1a8e20ef35bf..5f3aa4d607de 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -3746,6 +3746,7 @@ retry: WARN_ON(1); tsession = NULL; target = -1; + mutex_lock(&session->s_mutex); } goto retry; -- cgit v1.2.3-59-g8ed1b From 3a5ccecd9af71220e8b303f439ebccbc48385305 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 20 Apr 2020 13:44:25 +0000 Subject: MAINTAINERS: remove myself as ceph co-maintainer Jeff, Ilya, and Dongsheng are doing all of the Ceph maintainance these days. [ idryomov: Remove Sage's git tree too, it hasn't been pushed to in years. ] Signed-off-by: Sage Weil Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- MAINTAINERS | 6 ------ 1 file changed, 6 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2926327e4976..db4cc08583dd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3936,11 +3936,9 @@ F: arch/powerpc/platforms/cell/ CEPH COMMON CODE (LIBCEPH) M: Ilya Dryomov M: Jeff Layton -M: Sage Weil L: ceph-devel@vger.kernel.org S: Supported W: http://ceph.com/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git T: git git://github.com/ceph/ceph-client.git F: include/linux/ceph/ F: include/linux/crush/ @@ -3948,12 +3946,10 @@ F: net/ceph/ CEPH DISTRIBUTED FILE SYSTEM CLIENT (CEPH) M: Jeff Layton -M: Sage Weil M: Ilya Dryomov L: ceph-devel@vger.kernel.org S: Supported W: http://ceph.com/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git T: git git://github.com/ceph/ceph-client.git F: Documentation/filesystems/ceph.rst F: fs/ceph/ @@ -14102,12 +14098,10 @@ F: drivers/media/radio/radio-tea5777.c RADOS BLOCK DEVICE (RBD) M: Ilya Dryomov -M: Sage Weil R: Dongsheng Yang L: ceph-devel@vger.kernel.org S: Supported W: http://ceph.com/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git T: git git://github.com/ceph/ceph-client.git F: Documentation/ABI/testing/sysfs-bus-rbd F: drivers/block/rbd.c -- cgit v1.2.3-59-g8ed1b From 30523408c0232d4f33ca0719004e5bffaf04220c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 28 Apr 2020 10:02:55 +0100 Subject: drm/i915: Avoid dereferencing a dead context Once the intel_context is closed, the GEM context may be freed and so the link from intel_context.gem_context is invalid. <3>[ 219.782944] BUG: KASAN: use-after-free in intel_engine_coredump_alloc+0x1bc3/0x2250 [i915] <3>[ 219.782996] Read of size 8 at addr ffff8881d7dff0b8 by task kworker/0:1/12 <4>[ 219.783052] CPU: 0 PID: 12 Comm: kworker/0:1 Tainted: G U 5.7.0-rc2-g1f3ffd7683d54-kasan_118+ #1 <4>[ 219.783055] Hardware name: System manufacturer System Product Name/Z170 PRO GAMING, BIOS 3402 04/26/2017 <4>[ 219.783105] Workqueue: events heartbeat [i915] <4>[ 219.783109] Call Trace: <4>[ 219.783113] <4>[ 219.783119] dump_stack+0x96/0xdb <4>[ 219.783177] ? intel_engine_coredump_alloc+0x1bc3/0x2250 [i915] <4>[ 219.783182] print_address_description.constprop.6+0x16/0x310 <4>[ 219.783239] ? intel_engine_coredump_alloc+0x1bc3/0x2250 [i915] <4>[ 219.783295] ? intel_engine_coredump_alloc+0x1bc3/0x2250 [i915] <4>[ 219.783300] __kasan_report+0x137/0x190 <4>[ 219.783359] ? intel_engine_coredump_alloc+0x1bc3/0x2250 [i915] <4>[ 219.783366] kasan_report+0x32/0x50 <4>[ 219.783426] intel_engine_coredump_alloc+0x1bc3/0x2250 [i915] <4>[ 219.783481] execlists_reset+0x39c/0x13d0 [i915] <4>[ 219.783494] ? mark_held_locks+0x9e/0xe0 <4>[ 219.783546] ? execlists_hold+0xfc0/0xfc0 [i915] <4>[ 219.783551] ? lockdep_hardirqs_on+0x348/0x5f0 <4>[ 219.783557] ? _raw_spin_unlock_irqrestore+0x34/0x60 <4>[ 219.783606] ? execlists_submission_tasklet+0x118/0x3a0 [i915] <4>[ 219.783615] tasklet_action_common.isra.14+0x13b/0x410 <4>[ 219.783623] ? __do_softirq+0x1e4/0x9a7 <4>[ 219.783630] __do_softirq+0x226/0x9a7 <4>[ 219.783643] do_softirq_own_stack+0x2a/0x40 <4>[ 219.783647] <4>[ 219.783692] ? heartbeat+0x3e2/0x10f0 [i915] <4>[ 219.783696] do_softirq.part.13+0x49/0x50 <4>[ 219.783700] __local_bh_enable_ip+0x1a2/0x1e0 <4>[ 219.783748] heartbeat+0x409/0x10f0 [i915] <4>[ 219.783801] ? __live_idle_pulse+0x9f0/0x9f0 [i915] <4>[ 219.783806] ? lock_acquire+0x1ac/0x8a0 <4>[ 219.783811] ? process_one_work+0x811/0x1870 <4>[ 219.783827] ? rcu_read_lock_sched_held+0x9c/0xd0 <4>[ 219.783832] ? rcu_read_lock_bh_held+0xb0/0xb0 <4>[ 219.783836] ? _raw_spin_unlock_irq+0x1f/0x40 <4>[ 219.783845] process_one_work+0x8ca/0x1870 <4>[ 219.783848] ? lock_acquire+0x1ac/0x8a0 <4>[ 219.783852] ? worker_thread+0x1d0/0xb80 <4>[ 219.783864] ? pwq_dec_nr_in_flight+0x2c0/0x2c0 <4>[ 219.783870] ? do_raw_spin_lock+0x129/0x290 <4>[ 219.783886] worker_thread+0x82/0xb80 <4>[ 219.783895] ? __kthread_parkme+0xaf/0x1b0 <4>[ 219.783902] ? process_one_work+0x1870/0x1870 <4>[ 219.783906] kthread+0x34e/0x420 <4>[ 219.783911] ? kthread_create_on_node+0xc0/0xc0 <4>[ 219.783918] ret_from_fork+0x3a/0x50 <3>[ 219.783950] Allocated by task 1264: <4>[ 219.783975] save_stack+0x19/0x40 <4>[ 219.783978] __kasan_kmalloc.constprop.3+0xa0/0xd0 <4>[ 219.784029] i915_gem_create_context+0xa2/0xab8 [i915] <4>[ 219.784081] i915_gem_context_create_ioctl+0x1fa/0x450 [i915] <4>[ 219.784085] drm_ioctl_kernel+0x1d8/0x270 <4>[ 219.784088] drm_ioctl+0x676/0x930 <4>[ 219.784092] ksys_ioctl+0xb7/0xe0 <4>[ 219.784096] __x64_sys_ioctl+0x6a/0xb0 <4>[ 219.784100] do_syscall_64+0x94/0x530 <4>[ 219.784103] entry_SYSCALL_64_after_hwframe+0x49/0xb3 <3>[ 219.784120] Freed by task 12: <4>[ 219.784141] save_stack+0x19/0x40 <4>[ 219.784145] __kasan_slab_free+0x130/0x180 <4>[ 219.784148] kmem_cache_free_bulk+0x1bd/0x500 <4>[ 219.784152] kfree_rcu_work+0x1d8/0x890 <4>[ 219.784155] process_one_work+0x8ca/0x1870 <4>[ 219.784158] worker_thread+0x82/0xb80 <4>[ 219.784162] kthread+0x34e/0x420 <4>[ 219.784165] ret_from_fork+0x3a/0x50 Fixes: 2e46a2a0b014 ("drm/i915: Use explicit flag to mark unreachable intel_context") Signed-off-by: Chris Wilson Acked-by: Akeem G Abodunrin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200428090255.10035-1-chris@chris-wilson.co.uk (cherry picked from commit 24aac336ff78eb55aedda043ed982c61dc3ac49a) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_gpu_error.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 2a4cd0ba5464..5c8e51d2ba5b 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1207,8 +1207,6 @@ static void engine_record_registers(struct intel_engine_coredump *ee) static void record_request(const struct i915_request *request, struct i915_request_coredump *erq) { - const struct i915_gem_context *ctx; - erq->flags = request->fence.flags; erq->context = request->fence.context; erq->seqno = request->fence.seqno; @@ -1219,9 +1217,13 @@ static void record_request(const struct i915_request *request, erq->pid = 0; rcu_read_lock(); - ctx = rcu_dereference(request->context->gem_context); - if (ctx) - erq->pid = pid_nr(ctx->pid); + if (!intel_context_is_closed(request->context)) { + const struct i915_gem_context *ctx; + + ctx = rcu_dereference(request->context->gem_context); + if (ctx) + erq->pid = pid_nr(ctx->pid); + } rcu_read_unlock(); } -- cgit v1.2.3-59-g8ed1b From fe5a708267911d55cce42910d93e303924b088fd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 May 2020 13:22:49 +0100 Subject: drm/i915/gt: Make timeslicing an explicit engine property In order to allow userspace to rely on timeslicing to reorder their batches, we must support preemption of those user batches. Declare timeslicing as an explicit property that is a combination of having the kernel support and HW support. Suggested-by: Tvrtko Ursulin Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200501122249.12417-1-chris@chris-wilson.co.uk (cherry picked from commit a211da9c771bf97395a3ced83a3aa383372b13a7) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_engine.h | 9 --------- drivers/gpu/drm/i915/gt/intel_engine_types.h | 18 ++++++++++++++---- drivers/gpu/drm/i915/gt/intel_lrc.c | 5 ++++- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index b469de0dd9b6..a1aa0d3e8be1 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -333,13 +333,4 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine) return intel_engine_has_preemption(engine); } -static inline bool -intel_engine_has_timeslices(const struct intel_engine_cs *engine) -{ - if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) - return false; - - return intel_engine_has_semaphores(engine); -} - #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 80cdde712842..bf4d13e08887 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -483,10 +483,11 @@ struct intel_engine_cs { #define I915_ENGINE_SUPPORTS_STATS BIT(1) #define I915_ENGINE_HAS_PREEMPTION BIT(2) #define I915_ENGINE_HAS_SEMAPHORES BIT(3) -#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) -#define I915_ENGINE_IS_VIRTUAL BIT(5) -#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) -#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) +#define I915_ENGINE_HAS_TIMESLICES BIT(4) +#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5) +#define I915_ENGINE_IS_VIRTUAL BIT(6) +#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7) +#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8) unsigned int flags; /* @@ -584,6 +585,15 @@ intel_engine_has_semaphores(const struct intel_engine_cs *engine) return engine->flags & I915_ENGINE_HAS_SEMAPHORES; } +static inline bool +intel_engine_has_timeslices(const struct intel_engine_cs *engine) +{ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return false; + + return engine->flags & I915_ENGINE_HAS_TIMESLICES; +} + static inline bool intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine) { diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 683014e7bc51..017fd5bf9c69 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -4369,8 +4369,11 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->flags |= I915_ENGINE_SUPPORTS_STATS; if (!intel_vgpu_active(engine->i915)) { engine->flags |= I915_ENGINE_HAS_SEMAPHORES; - if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) { engine->flags |= I915_ENGINE_HAS_PREEMPTION; + if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + engine->flags |= I915_ENGINE_HAS_TIMESLICES; + } } if (INTEL_GEN(engine->i915) >= 12) -- cgit v1.2.3-59-g8ed1b From 421abe200321a2c907ede1a6208c558284ba0b75 Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Thu, 30 Apr 2020 14:46:54 -0700 Subject: drm/i915: Don't enable WaIncreaseLatencyIPCEnabled when IPC is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 5a7d202b1574, a logical AND was erroneously changed to an OR, causing WaIncreaseLatencyIPCEnabled to be enabled unconditionally for kabylake and coffeelake, even when IPC is disabled. Fix the logic so that WaIncreaseLatencyIPCEnabled is only used when IPC is enabled. Fixes: 5a7d202b1574 ("drm/i915: Drop WaIncreaseLatencyIPCEnabled/1140 for cnl") Cc: stable@vger.kernel.org # 5.3.x+ Signed-off-by: Sultan Alsawaf Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200430214654.51314-1-sultan@kerneltoast.com (cherry picked from commit 690d22dafa88b82453516387b475664047a6bd14) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 8375054ba27d..a52986a9e7a6 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4992,7 +4992,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, * WaIncreaseLatencyIPCEnabled: kbl,cfl * Display WA #1141: kbl,cfl */ - if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) || + if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) && dev_priv->ipc_enabled) latency += 4; -- cgit v1.2.3-59-g8ed1b From 82152d424b6cb6fc1ede7d03d69c04e786688740 Mon Sep 17 00:00:00 2001 From: Peter Jones Date: Fri, 6 Jul 2018 15:04:24 -0400 Subject: Make the "Reducing compressed framebufer size" message be DRM_INFO_ONCE() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was sort of annoying me: random:~$ dmesg | tail -1 [523884.039227] [drm] Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS. random:~$ dmesg | grep -c "Reducing the compressed" 47 This patch makes it DRM_INFO_ONCE() just like the similar message farther down in that function is pr_info_once(). Cc: stable@vger.kernel.org Signed-off-by: Peter Jones Acked-by: Rodrigo Vivi Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1745 Link: https://patchwork.freedesktop.org/patch/msgid/20180706190424.29194-1-pjones@redhat.com [vsyrjala: Rebase due to per-device logging] Signed-off-by: Ville Syrjälä (cherry picked from commit 6b7fc6a3e6af4ff5773949d0fed70d8e7f68d5ce) [Rodrigo: port back to DRM_INFO_ONCE] Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_fbc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 2e5d835a9eaa..c125ca9ab9b3 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -485,8 +485,7 @@ static int intel_fbc_alloc_cfb(struct drm_i915_private *dev_priv, if (!ret) goto err_llb; else if (ret > 1) { - DRM_INFO("Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n"); - + DRM_INFO_ONCE("Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n"); } fbc->threshold = ret; -- cgit v1.2.3-59-g8ed1b From 7391efa48d88c8555a802bac562d02a38567127c Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Fri, 24 Apr 2020 10:29:26 +0530 Subject: RISC-V: Export riscv_cpuid_to_hartid_mask() API The riscv_cpuid_to_hartid_mask() API should be exported to allow building KVM RISC-V as loadable module. Signed-off-by: Anup Patel Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/smp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index e0a6293093f1..a65a8fa0c22d 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -63,6 +64,7 @@ void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out) for_each_cpu(cpu, in) cpumask_set_cpu(cpuid_to_hartid_map(cpu), out); } +EXPORT_SYMBOL_GPL(riscv_cpuid_to_hartid_mask); bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { -- cgit v1.2.3-59-g8ed1b From 6bcff51539ccae5431a01f60293419dbae21100f Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Fri, 24 Apr 2020 10:29:27 +0530 Subject: RISC-V: Add bitmap reprensenting ISA features common across CPUs This patch adds riscv_isa bitmap which represents Host ISA features common across all Host CPUs. The riscv_isa is not same as elf_hwcap because elf_hwcap will only have ISA features relevant for user-space apps whereas riscv_isa will have ISA features relevant to both kernel and user-space apps. One of the use-case for riscv_isa bitmap is in KVM hypervisor where we will use it to do following operations: 1. Check whether hypervisor extension is available 2. Find ISA features that need to be virtualized (e.g. floating point support, vector extension, etc.) Signed-off-by: Anup Patel Signed-off-by: Atish Patra Reviewed-by: Alexander Graf Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/hwcap.h | 22 +++++++++++ arch/riscv/kernel/cpufeature.c | 83 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 102 insertions(+), 3 deletions(-) diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 1bb0cd04aec3..5ce50468aff1 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -8,6 +8,7 @@ #ifndef _ASM_RISCV_HWCAP_H #define _ASM_RISCV_HWCAP_H +#include #include #ifndef __ASSEMBLY__ @@ -22,6 +23,27 @@ enum { }; extern unsigned long elf_hwcap; + +#define RISCV_ISA_EXT_a ('a' - 'a') +#define RISCV_ISA_EXT_c ('c' - 'a') +#define RISCV_ISA_EXT_d ('d' - 'a') +#define RISCV_ISA_EXT_f ('f' - 'a') +#define RISCV_ISA_EXT_h ('h' - 'a') +#define RISCV_ISA_EXT_i ('i' - 'a') +#define RISCV_ISA_EXT_m ('m' - 'a') +#define RISCV_ISA_EXT_s ('s' - 'a') +#define RISCV_ISA_EXT_u ('u' - 'a') + +#define RISCV_ISA_EXT_MAX 64 + +unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap); + +#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext) + +bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit); +#define riscv_isa_extension_available(isa_bitmap, ext) \ + __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext) + #endif #endif /* _ASM_RISCV_HWCAP_H */ diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index a5ad00043104..ac202f44a670 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -6,6 +6,7 @@ * Copyright (C) 2017 SiFive */ +#include #include #include #include @@ -13,15 +14,57 @@ #include unsigned long elf_hwcap __read_mostly; + +/* Host ISA bitmap */ +static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; + #ifdef CONFIG_FPU bool has_fpu __read_mostly; #endif +/** + * riscv_isa_extension_base() - Get base extension word + * + * @isa_bitmap: ISA bitmap to use + * Return: base extension word as unsigned long value + * + * NOTE: If isa_bitmap is NULL then Host ISA bitmap will be used. + */ +unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap) +{ + if (!isa_bitmap) + return riscv_isa[0]; + return isa_bitmap[0]; +} +EXPORT_SYMBOL_GPL(riscv_isa_extension_base); + +/** + * __riscv_isa_extension_available() - Check whether given extension + * is available or not + * + * @isa_bitmap: ISA bitmap to use + * @bit: bit position of the desired extension + * Return: true or false + * + * NOTE: If isa_bitmap is NULL then Host ISA bitmap will be used. + */ +bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit) +{ + const unsigned long *bmap = (isa_bitmap) ? isa_bitmap : riscv_isa; + + if (bit >= RISCV_ISA_EXT_MAX) + return false; + + return test_bit(bit, bmap) ? true : false; +} +EXPORT_SYMBOL_GPL(__riscv_isa_extension_available); + void riscv_fill_hwcap(void) { struct device_node *node; const char *isa; - size_t i; + char print_str[BITS_PER_LONG + 1]; + size_t i, j, isa_len; static unsigned long isa2hwcap[256] = {0}; isa2hwcap['i'] = isa2hwcap['I'] = COMPAT_HWCAP_ISA_I; @@ -33,8 +76,11 @@ void riscv_fill_hwcap(void) elf_hwcap = 0; + bitmap_zero(riscv_isa, RISCV_ISA_EXT_MAX); + for_each_of_cpu_node(node) { unsigned long this_hwcap = 0; + unsigned long this_isa = 0; if (riscv_of_processor_hartid(node) < 0) continue; @@ -44,8 +90,24 @@ void riscv_fill_hwcap(void) continue; } - for (i = 0; i < strlen(isa); ++i) + i = 0; + isa_len = strlen(isa); +#if IS_ENABLED(CONFIG_32BIT) + if (!strncmp(isa, "rv32", 4)) + i += 4; +#elif IS_ENABLED(CONFIG_64BIT) + if (!strncmp(isa, "rv64", 4)) + i += 4; +#endif + for (; i < isa_len; ++i) { this_hwcap |= isa2hwcap[(unsigned char)(isa[i])]; + /* + * TODO: X, Y and Z extension parsing for Host ISA + * bitmap will be added in-future. + */ + if ('a' <= isa[i] && isa[i] < 'x') + this_isa |= (1UL << (isa[i] - 'a')); + } /* * All "okay" hart should have same isa. Set HWCAP based on @@ -56,6 +118,11 @@ void riscv_fill_hwcap(void) elf_hwcap &= this_hwcap; else elf_hwcap = this_hwcap; + + if (riscv_isa[0]) + riscv_isa[0] &= this_isa; + else + riscv_isa[0] = this_isa; } /* We don't support systems with F but without D, so mask those out @@ -65,7 +132,17 @@ void riscv_fill_hwcap(void) elf_hwcap &= ~COMPAT_HWCAP_ISA_F; } - pr_info("elf_hwcap is 0x%lx\n", elf_hwcap); + memset(print_str, 0, sizeof(print_str)); + for (i = 0, j = 0; i < BITS_PER_LONG; i++) + if (riscv_isa[0] & BIT_MASK(i)) + print_str[j++] = (char)('a' + i); + pr_info("riscv: ISA extensions %s\n", print_str); + + memset(print_str, 0, sizeof(print_str)); + for (i = 0, j = 0; i < BITS_PER_LONG; i++) + if (elf_hwcap & BIT_MASK(i)) + print_str[j++] = (char)('a' + i); + pr_info("riscv: ELF capabilities %s\n", print_str); #ifdef CONFIG_FPU if (elf_hwcap & (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D)) -- cgit v1.2.3-59-g8ed1b From a2da5b181f888b3cdb4727b6c60a8755cedce272 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Fri, 24 Apr 2020 10:29:28 +0530 Subject: RISC-V: Remove N-extension related defines The RISC-V N-extension is still in draft state hence remove N-extension related defines from asm/csr.h. Signed-off-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/csr.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 8e18d2c64399..cec462e198ce 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -51,13 +51,10 @@ #define CAUSE_IRQ_FLAG (_AC(1, UL) << (__riscv_xlen - 1)) /* Interrupt causes (minus the high bit) */ -#define IRQ_U_SOFT 0 #define IRQ_S_SOFT 1 #define IRQ_M_SOFT 3 -#define IRQ_U_TIMER 4 #define IRQ_S_TIMER 5 #define IRQ_M_TIMER 7 -#define IRQ_U_EXT 8 #define IRQ_S_EXT 9 #define IRQ_M_EXT 11 -- cgit v1.2.3-59-g8ed1b From c749bb2d554825e007cbc43b791f54e124dadfce Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Mon, 27 Apr 2020 14:59:24 +0800 Subject: riscv: set max_pfn to the PFN of the last page The current max_pfn equals to zero. In this case, I found it caused users cannot get some page information through /proc such as kpagecount in v5.6 kernel because of new sanity checks. The following message is displayed by stress-ng test suite with the command "stress-ng --verbose --physpage 1 -t 1" on HiFive unleashed board. # stress-ng --verbose --physpage 1 -t 1 stress-ng: debug: [109] 4 processors online, 4 processors configured stress-ng: info: [109] dispatching hogs: 1 physpage stress-ng: debug: [109] cache allocate: reducing cache level from L3 (too high) to L0 stress-ng: debug: [109] get_cpu_cache: invalid cache_level: 0 stress-ng: info: [109] cache allocate: using built-in defaults as no suitable cache found stress-ng: debug: [109] cache allocate: default cache size: 2048K stress-ng: debug: [109] starting stressors stress-ng: debug: [109] 1 stressor spawned stress-ng: debug: [110] stress-ng-physpage: started [110] (instance 0) stress-ng: error: [110] stress-ng-physpage: cannot read page count for address 0x3fd34de000 in /proc/kpagecount, errno=0 (Success) stress-ng: error: [110] stress-ng-physpage: cannot read page count for address 0x3fd32db078 in /proc/kpagecount, errno=0 (Success) ... stress-ng: error: [110] stress-ng-physpage: cannot read page count for address 0x3fd32db078 in /proc/kpagecount, errno=0 (Success) stress-ng: debug: [110] stress-ng-physpage: exited [110] (instance 0) stress-ng: debug: [109] process [110] terminated stress-ng: info: [109] successful run completed in 1.00s # After applying this patch, the kernel can pass the test. # stress-ng --verbose --physpage 1 -t 1 stress-ng: debug: [104] 4 processors online, 4 processors configured stress-ng: info: [104] dispatching hogs: 1 physpage stress-ng: info: [104] cache allocate: using defaults, can't determine cache details from sysfs stress-ng: debug: [104] cache allocate: default cache size: 2048K stress-ng: debug: [104] starting stressors stress-ng: debug: [104] 1 stressor spawned stress-ng: debug: [105] stress-ng-physpage: started [105] (instance 0) stress-ng: debug: [105] stress-ng-physpage: exited [105] (instance 0) stress-ng: debug: [104] process [105] terminated stress-ng: info: [104] successful run completed in 1.01s # Cc: stable@vger.kernel.org Signed-off-by: Vincent Chen Reviewed-by: Anup Patel Reviewed-by: Yash Shah Tested-by: Yash Shah Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index b55be44ff9bd..5b813532db59 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -150,7 +150,8 @@ void __init setup_bootmem(void) memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); set_max_mapnr(PFN_DOWN(mem_size)); - max_low_pfn = PFN_DOWN(memblock_end_of_DRAM()); + max_pfn = PFN_DOWN(memblock_end_of_DRAM()); + max_low_pfn = max_pfn; #ifdef CONFIG_BLK_DEV_INITRD setup_initrd(); -- cgit v1.2.3-59-g8ed1b From 0a9f2a6161dcd6be057f6c501453d28b3c4a3b0c Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Mon, 27 Apr 2020 17:13:34 +0200 Subject: riscv: add Linux note to vdso The Linux note in the vdso allows glibc to check the running kernel version without having to issue the uname syscall. Signed-off-by: Andreas Schwab Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso/Makefile | 2 +- arch/riscv/kernel/vdso/note.S | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 arch/riscv/kernel/vdso/note.S diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index a4ee3a0e7d20..4c8b2a4a6a70 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -12,7 +12,7 @@ vdso-syms += getcpu vdso-syms += flush_icache # Files to link into the vdso -obj-vdso = $(patsubst %, %.o, $(vdso-syms)) +obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o # Build rules targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds vdso-dummy.o diff --git a/arch/riscv/kernel/vdso/note.S b/arch/riscv/kernel/vdso/note.S new file mode 100644 index 000000000000..2a956c942211 --- /dev/null +++ b/arch/riscv/kernel/vdso/note.S @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include +#include + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END -- cgit v1.2.3-59-g8ed1b From d6d5161280b3d57163f5310f0a0007cdeb729984 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Mon, 4 May 2020 11:54:48 +0800 Subject: riscv: force __cpu_up_ variables to put in data section Put __cpu_up_stack_pointer and __cpu_up_task_pointer in data section. Currently, these two variables are put in bss section, there is a potential risk that secondary harts get the uninitialized value before main hart finishing the bss clearing. In this case, all secondary harts would pass the waiting loop and enable the MMU before main hart set up the page table. This issue happens on random booting of multiple harts, which means it will manifest for BBL and OpenSBI v0.6 (or older version). In OpenSBI v0.7 (or higher version), we have HSM extension so all the secondary harts are brought-up by Linux kernel in an orderly fashion. This means we don't need this change for OpenSBI v0.7 (or higher version). Signed-off-by: Zong Li Reviewed-by: Greentime Hu Reviewed-by: Anup Patel Reviewed-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpu_ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c index c4c33bf02369..0ec22354018c 100644 --- a/arch/riscv/kernel/cpu_ops.c +++ b/arch/riscv/kernel/cpu_ops.c @@ -15,8 +15,8 @@ const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; -void *__cpu_up_stack_pointer[NR_CPUS]; -void *__cpu_up_task_pointer[NR_CPUS]; +void *__cpu_up_stack_pointer[NR_CPUS] __section(.data); +void *__cpu_up_task_pointer[NR_CPUS] __section(.data); extern const struct cpu_operations cpu_ops_sbi; extern const struct cpu_operations cpu_ops_spinwait; -- cgit v1.2.3-59-g8ed1b From 5615e74f48dcc982655543e979b6c3f3f877e6f6 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 5 May 2020 09:27:15 +0200 Subject: KVM: s390: Remove false WARN_ON_ONCE for the PQAP instruction In LPAR we will only get an intercept for FC==3 for the PQAP instruction. Running nested under z/VM can result in other intercepts as well as ECA_APIE is an effective bit: If one hypervisor layer has turned this bit off, the end result will be that we will get intercepts for all function codes. Usually the first one will be a query like PQAP(QCI). So the WARN_ON_ONCE is not right. Let us simply remove it. Cc: Pierre Morel Cc: Tony Krowiak Cc: stable@vger.kernel.org # v5.3+ Fixes: e5282de93105 ("s390: ap: kvm: add PQAP interception for AQIC") Link: https://lore.kernel.org/kvm/20200505083515.2720-1-borntraeger@de.ibm.com Reported-by: Qian Cai Signed-off-by: Christian Borntraeger Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/kvm/priv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 69a824f9ef0b..893893642415 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -626,10 +626,12 @@ static int handle_pqap(struct kvm_vcpu *vcpu) * available for the guest are AQIC and TAPQ with the t bit set * since we do not set IC.3 (FIII) we currently will only intercept * the AQIC function code. + * Note: running nested under z/VM can result in intercepts for other + * function codes, e.g. PQAP(QCI). We do not support this and bail out. */ reg0 = vcpu->run->s.regs.gprs[0]; fc = (reg0 >> 24) & 0xff; - if (WARN_ON_ONCE(fc != 0x03)) + if (fc != 0x03) return -EOPNOTSUPP; /* PQAP instruction is allowed for guest kernel only */ -- cgit v1.2.3-59-g8ed1b From 3c5c0805ac925f2f8c68eeb1ba0f8a796a7b4525 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 2 May 2020 13:26:44 +0200 Subject: staging: ks7010: remove me from CC list I lost interest in this driver years ago because I could't keep up with testing the incoming janitorial patches. So, drop me from CC. Signed-off-by: Wolfram Sang Link: https://lore.kernel.org/r/20200502112648.6942-1-wsa@the-dreams.de Signed-off-by: Greg Kroah-Hartman --- drivers/staging/ks7010/TODO | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/staging/ks7010/TODO b/drivers/staging/ks7010/TODO index 87a6dac4890d..ab6f39175d99 100644 --- a/drivers/staging/ks7010/TODO +++ b/drivers/staging/ks7010/TODO @@ -30,5 +30,4 @@ Now the TODOs: Please send any patches to: Greg Kroah-Hartman -Wolfram Sang Linux Driver Project Developer List -- cgit v1.2.3-59-g8ed1b From 769acc3656d93aaacada814939743361d284fd87 Mon Sep 17 00:00:00 2001 From: Oscar Carter Date: Fri, 1 May 2020 17:51:18 +0200 Subject: staging: gasket: Check the return value of gasket_get_bar_index() Check the return value of gasket_get_bar_index function as it can return a negative one (-EINVAL). If this happens, a negative index is used in the "gasket_dev->bar_data" array. Addresses-Coverity-ID: 1438542 ("Negative array index read") Fixes: 9a69f5087ccc2 ("drivers/staging: Gasket driver framework + Apex driver") Signed-off-by: Oscar Carter Cc: stable Reviewed-by: Richard Yeh Link: https://lore.kernel.org/r/20200501155118.13380-1-oscar.carter@gmx.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gasket/gasket_core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/staging/gasket/gasket_core.c b/drivers/staging/gasket/gasket_core.c index 8e0575fcb4c8..67325fbaf760 100644 --- a/drivers/staging/gasket/gasket_core.c +++ b/drivers/staging/gasket/gasket_core.c @@ -925,6 +925,10 @@ do_map_region(const struct gasket_dev *gasket_dev, struct vm_area_struct *vma, gasket_get_bar_index(gasket_dev, (vma->vm_pgoff << PAGE_SHIFT) + driver_desc->legacy_mmap_address_offset); + + if (bar_index < 0) + return DO_MAP_REGION_INVALID; + phys_base = gasket_dev->bar_data[bar_index].phys_base + phys_offset; while (mapped_bytes < map_length) { /* -- cgit v1.2.3-59-g8ed1b From ac854131d9844f79e2fdcef67a7707227538d78a Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 1 May 2020 16:07:28 -0400 Subject: USB: core: Fix misleading driver bug report The syzbot fuzzer found a race between URB submission to endpoint 0 and device reset. Namely, during the reset we call usb_ep0_reinit() because the characteristics of ep0 may have changed (if the reset follows a firmware update, for example). While usb_ep0_reinit() is running there is a brief period during which the pointers stored in udev->ep_in[0] and udev->ep_out[0] are set to NULL, and if an URB is submitted to ep0 during that period, usb_urb_ep_type_check() will report it as a driver bug. In the absence of those pointers, the routine thinks that the endpoint doesn't exist. The log message looks like this: ------------[ cut here ]------------ usb 2-1: BOGUS urb xfer, pipe 2 != type 2 WARNING: CPU: 0 PID: 9241 at drivers/usb/core/urb.c:478 usb_submit_urb+0x1188/0x1460 drivers/usb/core/urb.c:478 Now, although submitting an URB while the device is being reset is a questionable thing to do, it shouldn't count as a driver bug as severe as submitting an URB for an endpoint that doesn't exist. Indeed, endpoint 0 always exists, even while the device is in its unconfigured state. To prevent these misleading driver bug reports, this patch updates usb_disable_endpoint() to avoid clearing the ep_in[] and ep_out[] pointers when the endpoint being disabled is ep0. There's no danger of leaving a stale pointer in place, because the usb_host_endpoint structure being pointed to is stored permanently in udev->ep0; it doesn't get deallocated until the entire usb_device structure does. Reported-and-tested-by: syzbot+db339689b2101f6f6071@syzkaller.appspotmail.com Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/Pine.LNX.4.44L0.2005011558590.903-100000@netrider.rowland.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/message.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index a48678a0c83a..6197938dcc2d 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -1144,11 +1144,11 @@ void usb_disable_endpoint(struct usb_device *dev, unsigned int epaddr, if (usb_endpoint_out(epaddr)) { ep = dev->ep_out[epnum]; - if (reset_hardware) + if (reset_hardware && epnum != 0) dev->ep_out[epnum] = NULL; } else { ep = dev->ep_in[epnum]; - if (reset_hardware) + if (reset_hardware && epnum != 0) dev->ep_in[epnum] = NULL; } if (ep) { -- cgit v1.2.3-59-g8ed1b From e283f5e89f44a80ca536e4a12903c64e9e9a82e4 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Thu, 30 Apr 2020 16:56:57 +0300 Subject: usb: typec: intel_pmc_mux: Fix the property names The device property names for the port index number are "usb2-port-number" and "usb3-port-number", not "usb2-port" and "usb3-port". Fixes: 6701adfa9693 ("usb: typec: driver for Intel PMC mux control") Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200430135657.45169-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/mux/intel_pmc_mux.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c index f5c5e0aef66f..bb23886c1768 100644 --- a/drivers/usb/typec/mux/intel_pmc_mux.c +++ b/drivers/usb/typec/mux/intel_pmc_mux.c @@ -298,11 +298,11 @@ static int pmc_usb_register_port(struct pmc_usb *pmc, int index, struct typec_mux_desc mux_desc = { }; int ret; - ret = fwnode_property_read_u8(fwnode, "usb2-port", &port->usb2_port); + ret = fwnode_property_read_u8(fwnode, "usb2-port-number", &port->usb2_port); if (ret) return ret; - ret = fwnode_property_read_u8(fwnode, "usb3-port", &port->usb3_port); + ret = fwnode_property_read_u8(fwnode, "usb3-port-number", &port->usb3_port); if (ret) return ret; -- cgit v1.2.3-59-g8ed1b From 2bef9aed6f0e22391c8d4570749b1acc9bc3981e Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Mon, 4 May 2020 15:13:48 -0500 Subject: usb: usbfs: correct kernel->user page attribute mismatch On some architectures (e.g. arm64) requests for IO coherent memory may use non-cachable attributes if the relevant device isn't cache coherent. If these pages are then remapped into userspace as cacheable, they may not be coherent with the non-cacheable mappings. In particular this happens with libusb, when it attempts to create zero-copy buffers for use by rtl-sdr (https://github.com/osmocom/rtl-sdr/). On low end arm devices with non-coherent USB ports, the application will be unexpectedly killed, while continuing to work fine on arm machines with coherent USB controllers. This bug has been discovered/reported a few times over the last few years. In the case of rtl-sdr a compile time option to enable/disable zero copy was implemented to work around it. Rather than relaying on application specific workarounds, dma_mmap_coherent() can be used instead of remap_pfn_range(). The page cache/etc attributes will then be correctly set in userspace to match the kernel mapping. Signed-off-by: Jeremy Linton Cc: stable Link: https://lore.kernel.org/r/20200504201348.1183246-1-jeremy.linton@arm.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 6833c918abce..b9db9812d6c5 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -217,6 +217,7 @@ static int usbdev_mmap(struct file *file, struct vm_area_struct *vma) { struct usb_memory *usbm = NULL; struct usb_dev_state *ps = file->private_data; + struct usb_hcd *hcd = bus_to_hcd(ps->dev->bus); size_t size = vma->vm_end - vma->vm_start; void *mem; unsigned long flags; @@ -250,9 +251,7 @@ static int usbdev_mmap(struct file *file, struct vm_area_struct *vma) usbm->vma_use_count = 1; INIT_LIST_HEAD(&usbm->memlist); - if (remap_pfn_range(vma, vma->vm_start, - virt_to_phys(usbm->mem) >> PAGE_SHIFT, - size, vma->vm_page_prot) < 0) { + if (dma_mmap_coherent(hcd->self.sysdev, vma, mem, dma_handle, size)) { dec_usb_memory_use_count(usbm, &usbm->vma_use_count); return -EAGAIN; } -- cgit v1.2.3-59-g8ed1b From 7990be48ef4d87163940d6c04c349c93f0bd9ae7 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Tue, 28 Apr 2020 22:44:28 -0700 Subject: usb: typec: mux: intel: Handle alt mode HPD_HIGH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the PMC Type C Subsystem (TCSS) Mux programming guide rev 0.6, when a device is transitioning to DP Alternate Mode state, if the HPD_STATE (bit 7) field in the status update command VDO is set to HPD_HIGH, the HPD_HIGH field in the Alternate Mode request “mode_data” field (bit 14) should also be set. Ensure the bit is correctly handled while issuing the Alternate Mode request. Signed-off-by: Prashant Malani Fixes: 6701adfa9693 ("usb: typec: driver for Intel PMC mux control") Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200429054432.134178-1-pmalani@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/mux/intel_pmc_mux.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c index bb23886c1768..67c5139cfa0d 100644 --- a/drivers/usb/typec/mux/intel_pmc_mux.c +++ b/drivers/usb/typec/mux/intel_pmc_mux.c @@ -157,6 +157,10 @@ pmc_usb_mux_dp(struct pmc_usb_port *port, struct typec_mux_state *state) req.mode_data |= (state->mode - TYPEC_STATE_MODAL) << PMC_USB_ALTMODE_DP_MODE_SHIFT; + if (data->status & DP_STATUS_HPD_STATE) + req.mode_data |= PMC_USB_DP_HPD_LVL << + PMC_USB_ALTMODE_DP_MODE_SHIFT; + return pmc_usb_command(port, (void *)&req, sizeof(req)); } -- cgit v1.2.3-59-g8ed1b From eb791aa70b90c559eeb371d807c8813d569393f0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 May 2020 14:54:09 +0200 Subject: iommu/amd: Fix race in increase_address_space()/fetch_pte() The 'pt_root' and 'mode' struct members of 'struct protection_domain' need to be get/set atomically, otherwise the page-table of the domain can get corrupted. Merge the fields into one atomic64_t struct member which can be get/set atomically. Fixes: 92d420ec028d ("iommu/amd: Relax locking in dma_ops path") Reported-by: Qian Cai Signed-off-by: Joerg Roedel Tested-by: Qian Cai Link: https://lore.kernel.org/r/20200504125413.16798-2-joro@8bytes.org Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 140 ++++++++++++++++++++++++++++++---------- drivers/iommu/amd_iommu_types.h | 9 ++- 2 files changed, 112 insertions(+), 37 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 20cce366e951..28229a38af4d 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -151,6 +151,26 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom) return container_of(dom, struct protection_domain, domain); } +static void amd_iommu_domain_get_pgtable(struct protection_domain *domain, + struct domain_pgtable *pgtable) +{ + u64 pt_root = atomic64_read(&domain->pt_root); + + pgtable->root = (u64 *)(pt_root & PAGE_MASK); + pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */ +} + +static u64 amd_iommu_domain_encode_pgtable(u64 *root, int mode) +{ + u64 pt_root; + + /* lowest 3 bits encode pgtable mode */ + pt_root = mode & 7; + pt_root |= (u64)root; + + return pt_root; +} + static struct iommu_dev_data *alloc_dev_data(u16 devid) { struct iommu_dev_data *dev_data; @@ -1397,13 +1417,18 @@ static struct page *free_sub_pt(unsigned long root, int mode, static void free_pagetable(struct protection_domain *domain) { - unsigned long root = (unsigned long)domain->pt_root; + struct domain_pgtable pgtable; struct page *freelist = NULL; + unsigned long root; + + amd_iommu_domain_get_pgtable(domain, &pgtable); + atomic64_set(&domain->pt_root, 0); - BUG_ON(domain->mode < PAGE_MODE_NONE || - domain->mode > PAGE_MODE_6_LEVEL); + BUG_ON(pgtable.mode < PAGE_MODE_NONE || + pgtable.mode > PAGE_MODE_6_LEVEL); - freelist = free_sub_pt(root, domain->mode, freelist); + root = (unsigned long)pgtable.root; + freelist = free_sub_pt(root, pgtable.mode, freelist); free_page_list(freelist); } @@ -1417,24 +1442,28 @@ static bool increase_address_space(struct protection_domain *domain, unsigned long address, gfp_t gfp) { + struct domain_pgtable pgtable; unsigned long flags; bool ret = false; - u64 *pte; + u64 *pte, root; spin_lock_irqsave(&domain->lock, flags); - if (address <= PM_LEVEL_SIZE(domain->mode) || - WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL)) + amd_iommu_domain_get_pgtable(domain, &pgtable); + + if (address <= PM_LEVEL_SIZE(pgtable.mode) || + WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL)) goto out; pte = (void *)get_zeroed_page(gfp); if (!pte) goto out; - *pte = PM_LEVEL_PDE(domain->mode, - iommu_virt_to_phys(domain->pt_root)); - domain->pt_root = pte; - domain->mode += 1; + *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root)); + + root = amd_iommu_domain_encode_pgtable(pte, pgtable.mode + 1); + + atomic64_set(&domain->pt_root, root); ret = true; @@ -1451,16 +1480,22 @@ static u64 *alloc_pte(struct protection_domain *domain, gfp_t gfp, bool *updated) { + struct domain_pgtable pgtable; int level, end_lvl; u64 *pte, *page; BUG_ON(!is_power_of_2(page_size)); - while (address > PM_LEVEL_SIZE(domain->mode)) + amd_iommu_domain_get_pgtable(domain, &pgtable); + + while (address > PM_LEVEL_SIZE(pgtable.mode)) { *updated = increase_address_space(domain, address, gfp) || *updated; + amd_iommu_domain_get_pgtable(domain, &pgtable); + } + - level = domain->mode - 1; - pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; + level = pgtable.mode - 1; + pte = &pgtable.root[PM_LEVEL_INDEX(level, address)]; address = PAGE_SIZE_ALIGN(address, page_size); end_lvl = PAGE_SIZE_LEVEL(page_size); @@ -1536,16 +1571,19 @@ static u64 *fetch_pte(struct protection_domain *domain, unsigned long address, unsigned long *page_size) { + struct domain_pgtable pgtable; int level; u64 *pte; *page_size = 0; - if (address > PM_LEVEL_SIZE(domain->mode)) + amd_iommu_domain_get_pgtable(domain, &pgtable); + + if (address > PM_LEVEL_SIZE(pgtable.mode)) return NULL; - level = domain->mode - 1; - pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; + level = pgtable.mode - 1; + pte = &pgtable.root[PM_LEVEL_INDEX(level, address)]; *page_size = PTE_LEVEL_PAGE_SIZE(level); while (level > 0) { @@ -1806,6 +1844,7 @@ static void dma_ops_domain_free(struct protection_domain *domain) static struct protection_domain *dma_ops_domain_alloc(void) { struct protection_domain *domain; + u64 *pt_root, root; domain = kzalloc(sizeof(struct protection_domain), GFP_KERNEL); if (!domain) @@ -1814,12 +1853,14 @@ static struct protection_domain *dma_ops_domain_alloc(void) if (protection_domain_init(domain)) goto free_domain; - domain->mode = PAGE_MODE_3_LEVEL; - domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); - domain->flags = PD_DMA_OPS_MASK; - if (!domain->pt_root) + pt_root = (void *)get_zeroed_page(GFP_KERNEL); + if (!pt_root) goto free_domain; + root = amd_iommu_domain_encode_pgtable(pt_root, PAGE_MODE_3_LEVEL); + atomic64_set(&domain->pt_root, root); + domain->flags = PD_DMA_OPS_MASK; + if (iommu_get_dma_cookie(&domain->domain) == -ENOMEM) goto free_domain; @@ -1843,14 +1884,17 @@ static bool dma_ops_domain(struct protection_domain *domain) static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats, bool ppr) { + struct domain_pgtable pgtable; u64 pte_root = 0; u64 flags = 0; u32 old_domid; - if (domain->mode != PAGE_MODE_NONE) - pte_root = iommu_virt_to_phys(domain->pt_root); + amd_iommu_domain_get_pgtable(domain, &pgtable); + + if (pgtable.mode != PAGE_MODE_NONE) + pte_root = iommu_virt_to_phys(pgtable.root); - pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) + pte_root |= (pgtable.mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV; @@ -2375,6 +2419,7 @@ out_err: static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) { struct protection_domain *pdomain; + u64 *pt_root, root; switch (type) { case IOMMU_DOMAIN_UNMANAGED: @@ -2382,13 +2427,15 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) if (!pdomain) return NULL; - pdomain->mode = PAGE_MODE_3_LEVEL; - pdomain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); - if (!pdomain->pt_root) { + pt_root = (void *)get_zeroed_page(GFP_KERNEL); + if (!pt_root) { protection_domain_free(pdomain); return NULL; } + root = amd_iommu_domain_encode_pgtable(pt_root, PAGE_MODE_3_LEVEL); + atomic64_set(&pdomain->pt_root, root); + pdomain->domain.geometry.aperture_start = 0; pdomain->domain.geometry.aperture_end = ~0ULL; pdomain->domain.geometry.force_aperture = true; @@ -2406,7 +2453,7 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) if (!pdomain) return NULL; - pdomain->mode = PAGE_MODE_NONE; + atomic64_set(&pdomain->pt_root, PAGE_MODE_NONE); break; default: return NULL; @@ -2418,6 +2465,7 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) static void amd_iommu_domain_free(struct iommu_domain *dom) { struct protection_domain *domain; + struct domain_pgtable pgtable; domain = to_pdomain(dom); @@ -2435,7 +2483,9 @@ static void amd_iommu_domain_free(struct iommu_domain *dom) dma_ops_domain_free(domain); break; default: - if (domain->mode != PAGE_MODE_NONE) + amd_iommu_domain_get_pgtable(domain, &pgtable); + + if (pgtable.mode != PAGE_MODE_NONE) free_pagetable(domain); if (domain->flags & PD_IOMMUV2_MASK) @@ -2518,10 +2568,12 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, gfp_t gfp) { struct protection_domain *domain = to_pdomain(dom); + struct domain_pgtable pgtable; int prot = 0; int ret; - if (domain->mode == PAGE_MODE_NONE) + amd_iommu_domain_get_pgtable(domain, &pgtable); + if (pgtable.mode == PAGE_MODE_NONE) return -EINVAL; if (iommu_prot & IOMMU_READ) @@ -2541,8 +2593,10 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, struct iommu_iotlb_gather *gather) { struct protection_domain *domain = to_pdomain(dom); + struct domain_pgtable pgtable; - if (domain->mode == PAGE_MODE_NONE) + amd_iommu_domain_get_pgtable(domain, &pgtable); + if (pgtable.mode == PAGE_MODE_NONE) return 0; return iommu_unmap_page(domain, iova, page_size); @@ -2553,9 +2607,11 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, { struct protection_domain *domain = to_pdomain(dom); unsigned long offset_mask, pte_pgsize; + struct domain_pgtable pgtable; u64 *pte, __pte; - if (domain->mode == PAGE_MODE_NONE) + amd_iommu_domain_get_pgtable(domain, &pgtable); + if (pgtable.mode == PAGE_MODE_NONE) return iova; pte = fetch_pte(domain, iova, &pte_pgsize); @@ -2708,16 +2764,26 @@ EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier); void amd_iommu_domain_direct_map(struct iommu_domain *dom) { struct protection_domain *domain = to_pdomain(dom); + struct domain_pgtable pgtable; unsigned long flags; + u64 pt_root; spin_lock_irqsave(&domain->lock, flags); + /* First save pgtable configuration*/ + amd_iommu_domain_get_pgtable(domain, &pgtable); + /* Update data structure */ - domain->mode = PAGE_MODE_NONE; + pt_root = amd_iommu_domain_encode_pgtable(NULL, PAGE_MODE_NONE); + atomic64_set(&domain->pt_root, pt_root); /* Make changes visible to IOMMUs */ update_domain(domain); + /* Restore old pgtable in domain->ptroot to free page-table */ + pt_root = amd_iommu_domain_encode_pgtable(pgtable.root, pgtable.mode); + atomic64_set(&domain->pt_root, pt_root); + /* Page-table is not visible to IOMMU anymore, so free it */ free_pagetable(domain); @@ -2908,9 +2974,11 @@ static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc) static int __set_gcr3(struct protection_domain *domain, int pasid, unsigned long cr3) { + struct domain_pgtable pgtable; u64 *pte; - if (domain->mode != PAGE_MODE_NONE) + amd_iommu_domain_get_pgtable(domain, &pgtable); + if (pgtable.mode != PAGE_MODE_NONE) return -EINVAL; pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true); @@ -2924,9 +2992,11 @@ static int __set_gcr3(struct protection_domain *domain, int pasid, static int __clear_gcr3(struct protection_domain *domain, int pasid) { + struct domain_pgtable pgtable; u64 *pte; - if (domain->mode != PAGE_MODE_NONE) + amd_iommu_domain_get_pgtable(domain, &pgtable); + if (pgtable.mode != PAGE_MODE_NONE) return -EINVAL; pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false); diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index ca8c4522045b..7a8fdec138bd 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -468,8 +468,7 @@ struct protection_domain { iommu core code */ spinlock_t lock; /* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ - int mode; /* paging mode (0-6 levels) */ - u64 *pt_root; /* page table root pointer */ + atomic64_t pt_root; /* pgtable root and pgtable mode */ int glx; /* Number of levels for GCR3 table */ u64 *gcr3_tbl; /* Guest CR3 table */ unsigned long flags; /* flags to find out type of domain */ @@ -477,6 +476,12 @@ struct protection_domain { unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ }; +/* For decocded pt_root */ +struct domain_pgtable { + int mode; + u64 *root; +}; + /* * Structure where we save information about one hardware AMD IOMMU in the * system. -- cgit v1.2.3-59-g8ed1b From 5b8a9a047b6cad361405c7900c1e1cdd378c4589 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 May 2020 14:54:10 +0200 Subject: iommu/amd: Do not loop forever when trying to increase address space When increase_address_space() fails to allocate memory, alloc_pte() will call it again until it succeeds. Do not loop forever while trying to increase the address space and just return an error instead. Signed-off-by: Joerg Roedel Tested-by: Qian Cai Link: https://lore.kernel.org/r/20200504125413.16798-3-joro@8bytes.org Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 28229a38af4d..68da484a69dd 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1489,8 +1489,19 @@ static u64 *alloc_pte(struct protection_domain *domain, amd_iommu_domain_get_pgtable(domain, &pgtable); while (address > PM_LEVEL_SIZE(pgtable.mode)) { - *updated = increase_address_space(domain, address, gfp) || *updated; + bool upd = increase_address_space(domain, address, gfp); + + /* Read new values to check if update was successful */ amd_iommu_domain_get_pgtable(domain, &pgtable); + + /* + * Return an error if there is no memory to update the + * page-table. + */ + if (!upd && (address > PM_LEVEL_SIZE(pgtable.mode))) + return NULL; + + *updated = *updated || upd; } -- cgit v1.2.3-59-g8ed1b From f44a4d7e4f1cdef73c90b1dc749c4d8a7372a8eb Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 May 2020 14:54:11 +0200 Subject: iommu/amd: Call domain_flush_complete() in update_domain() The update_domain() function is expected to also inform the hardware about domain changes. This needs a COMPLETION_WAIT command to be sent to all IOMMUs which use the domain. Signed-off-by: Joerg Roedel Tested-by: Qian Cai Link: https://lore.kernel.org/r/20200504125413.16798-4-joro@8bytes.org Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 68da484a69dd..d2499c86d395 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2321,6 +2321,7 @@ static void update_domain(struct protection_domain *domain) domain_flush_devices(domain); domain_flush_tlb_pde(domain); + domain_flush_complete(domain); } int __init amd_iommu_init_api(void) -- cgit v1.2.3-59-g8ed1b From 19c6978fba68a2cdedee7d55fb8c3063d47982d9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 May 2020 14:54:12 +0200 Subject: iommu/amd: Update Device Table in increase_address_space() The Device Table needs to be updated before the new page-table root can be published in domain->pt_root. Otherwise a concurrent call to fetch_pte might fetch a PTE which is not reachable through the Device Table Entry. Fixes: 92d420ec028d ("iommu/amd: Relax locking in dma_ops path") Reported-by: Qian Cai Signed-off-by: Joerg Roedel Tested-by: Qian Cai Link: https://lore.kernel.org/r/20200504125413.16798-5-joro@8bytes.org Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 49 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index d2499c86d395..2ae1daac888a 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -101,6 +101,8 @@ struct kmem_cache *amd_iommu_irq_cache; static void update_domain(struct protection_domain *domain); static int protection_domain_init(struct protection_domain *domain); static void detach_device(struct device *dev); +static void update_and_flush_device_table(struct protection_domain *domain, + struct domain_pgtable *pgtable); /**************************************************************************** * @@ -1461,8 +1463,16 @@ static bool increase_address_space(struct protection_domain *domain, *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root)); - root = amd_iommu_domain_encode_pgtable(pte, pgtable.mode + 1); + pgtable.root = pte; + pgtable.mode += 1; + update_and_flush_device_table(domain, &pgtable); + domain_flush_complete(domain); + /* + * Device Table needs to be updated and flushed before the new root can + * be published. + */ + root = amd_iommu_domain_encode_pgtable(pte, pgtable.mode); atomic64_set(&domain->pt_root, root); ret = true; @@ -1893,19 +1903,17 @@ static bool dma_ops_domain(struct protection_domain *domain) } static void set_dte_entry(u16 devid, struct protection_domain *domain, + struct domain_pgtable *pgtable, bool ats, bool ppr) { - struct domain_pgtable pgtable; u64 pte_root = 0; u64 flags = 0; u32 old_domid; - amd_iommu_domain_get_pgtable(domain, &pgtable); + if (pgtable->mode != PAGE_MODE_NONE) + pte_root = iommu_virt_to_phys(pgtable->root); - if (pgtable.mode != PAGE_MODE_NONE) - pte_root = iommu_virt_to_phys(pgtable.root); - - pte_root |= (pgtable.mode & DEV_ENTRY_MODE_MASK) + pte_root |= (pgtable->mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV; @@ -1978,6 +1986,7 @@ static void clear_dte_entry(u16 devid) static void do_attach(struct iommu_dev_data *dev_data, struct protection_domain *domain) { + struct domain_pgtable pgtable; struct amd_iommu *iommu; bool ats; @@ -1993,7 +2002,9 @@ static void do_attach(struct iommu_dev_data *dev_data, domain->dev_cnt += 1; /* Update device table */ - set_dte_entry(dev_data->devid, domain, ats, dev_data->iommu_v2); + amd_iommu_domain_get_pgtable(domain, &pgtable); + set_dte_entry(dev_data->devid, domain, &pgtable, + ats, dev_data->iommu_v2); clone_aliases(dev_data->pdev); device_flush_dte(dev_data); @@ -2304,22 +2315,34 @@ static int amd_iommu_domain_get_attr(struct iommu_domain *domain, * *****************************************************************************/ -static void update_device_table(struct protection_domain *domain) +static void update_device_table(struct protection_domain *domain, + struct domain_pgtable *pgtable) { struct iommu_dev_data *dev_data; list_for_each_entry(dev_data, &domain->dev_list, list) { - set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled, - dev_data->iommu_v2); + set_dte_entry(dev_data->devid, domain, pgtable, + dev_data->ats.enabled, dev_data->iommu_v2); clone_aliases(dev_data->pdev); } } +static void update_and_flush_device_table(struct protection_domain *domain, + struct domain_pgtable *pgtable) +{ + update_device_table(domain, pgtable); + domain_flush_devices(domain); +} + static void update_domain(struct protection_domain *domain) { - update_device_table(domain); + struct domain_pgtable pgtable; - domain_flush_devices(domain); + /* Update device table */ + amd_iommu_domain_get_pgtable(domain, &pgtable); + update_and_flush_device_table(domain, &pgtable); + + /* Flush domain TLB(s) and wait for completion */ domain_flush_tlb_pde(domain); domain_flush_complete(domain); } -- cgit v1.2.3-59-g8ed1b From 119b2b2c3e256f4bcff7439acc25ac1e9f1aaa4e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 4 May 2020 14:54:13 +0200 Subject: iommu/amd: Do not flush Device Table in iommu_map_page() The flush of the Device Table Entries for the domain has already happened in increase_address_space(), if necessary. Do no flush them again in iommu_map_page(). Signed-off-by: Joerg Roedel Tested-by: Qian Cai Link: https://lore.kernel.org/r/20200504125413.16798-6-joro@8bytes.org Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 2ae1daac888a..1dc3718560d0 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1446,15 +1446,18 @@ static bool increase_address_space(struct protection_domain *domain, { struct domain_pgtable pgtable; unsigned long flags; - bool ret = false; + bool ret = true; u64 *pte, root; spin_lock_irqsave(&domain->lock, flags); amd_iommu_domain_get_pgtable(domain, &pgtable); - if (address <= PM_LEVEL_SIZE(pgtable.mode) || - WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL)) + if (address <= PM_LEVEL_SIZE(pgtable.mode)) + goto out; + + ret = false; + if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL)) goto out; pte = (void *)get_zeroed_page(gfp); @@ -1499,19 +1502,15 @@ static u64 *alloc_pte(struct protection_domain *domain, amd_iommu_domain_get_pgtable(domain, &pgtable); while (address > PM_LEVEL_SIZE(pgtable.mode)) { - bool upd = increase_address_space(domain, address, gfp); - - /* Read new values to check if update was successful */ - amd_iommu_domain_get_pgtable(domain, &pgtable); - /* * Return an error if there is no memory to update the * page-table. */ - if (!upd && (address > PM_LEVEL_SIZE(pgtable.mode))) + if (!increase_address_space(domain, address, gfp)) return NULL; - *updated = *updated || upd; + /* Read new values to check if update was successful */ + amd_iommu_domain_get_pgtable(domain, &pgtable); } @@ -1719,7 +1718,13 @@ out: unsigned long flags; spin_lock_irqsave(&dom->lock, flags); - update_domain(dom); + /* + * Flush domain TLB(s) and wait for completion. Any Device-Table + * Updates and flushing already happened in + * increase_address_space(). + */ + domain_flush_tlb_pde(dom); + domain_flush_complete(dom); spin_unlock_irqrestore(&dom->lock, flags); } -- cgit v1.2.3-59-g8ed1b From d76bc8200f9cf8b6746e66b37317ba477eda25c4 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Wed, 29 Apr 2020 00:12:00 +0300 Subject: mei: me: disable mei interface on LBG servers. Disable the MEI driver on LBG SPS (server) platforms, some corner flows such as recovery mode does not work, and the driver doesn't have working use cases. Cc: Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20200428211200.12200-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me.c | 8 ++++++++ drivers/misc/mei/hw-me.h | 4 ++++ drivers/misc/mei/pci-me.c | 2 +- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index 668418d7ea77..f620442addf5 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -1465,6 +1465,13 @@ static const struct mei_cfg mei_me_pch12_cfg = { MEI_CFG_DMA_128, }; +/* LBG with quirk for SPS Firmware exclusion */ +static const struct mei_cfg mei_me_pch12_sps_cfg = { + MEI_CFG_PCH8_HFS, + MEI_CFG_FW_VER_SUPP, + MEI_CFG_FW_SPS, +}; + /* Tiger Lake and newer devices */ static const struct mei_cfg mei_me_pch15_cfg = { MEI_CFG_PCH8_HFS, @@ -1487,6 +1494,7 @@ static const struct mei_cfg *const mei_cfg_list[] = { [MEI_ME_PCH8_CFG] = &mei_me_pch8_cfg, [MEI_ME_PCH8_SPS_CFG] = &mei_me_pch8_sps_cfg, [MEI_ME_PCH12_CFG] = &mei_me_pch12_cfg, + [MEI_ME_PCH12_SPS_CFG] = &mei_me_pch12_sps_cfg, [MEI_ME_PCH15_CFG] = &mei_me_pch15_cfg, }; diff --git a/drivers/misc/mei/hw-me.h b/drivers/misc/mei/hw-me.h index 4a8d4dcd5a91..b6b94e211464 100644 --- a/drivers/misc/mei/hw-me.h +++ b/drivers/misc/mei/hw-me.h @@ -80,6 +80,9 @@ struct mei_me_hw { * servers platforms with quirk for * SPS firmware exclusion. * @MEI_ME_PCH12_CFG: Platform Controller Hub Gen12 and newer + * @MEI_ME_PCH12_SPS_CFG: Platform Controller Hub Gen12 and newer + * servers platforms with quirk for + * SPS firmware exclusion. * @MEI_ME_PCH15_CFG: Platform Controller Hub Gen15 and newer * @MEI_ME_NUM_CFG: Upper Sentinel. */ @@ -93,6 +96,7 @@ enum mei_cfg_idx { MEI_ME_PCH8_CFG, MEI_ME_PCH8_SPS_CFG, MEI_ME_PCH12_CFG, + MEI_ME_PCH12_SPS_CFG, MEI_ME_PCH15_CFG, MEI_ME_NUM_CFG, }; diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 0c390fe421ad..a1ed375fed37 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -70,7 +70,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_2, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, MEI_ME_PCH8_SPS_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, MEI_ME_PCH8_SPS_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_LBG, MEI_ME_PCH12_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_LBG, MEI_ME_PCH12_SPS_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, MEI_ME_PCH8_CFG)}, -- cgit v1.2.3-59-g8ed1b From 115f32512f13c0280161908e9de45a97a87673bb Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Fri, 1 May 2020 00:35:50 +0530 Subject: bus: mhi: Fix parsing of mhi_flags With the current parsing of mhi_flags, the following statement always return false: eob = !!(flags & MHI_EOB); This is due to the fact that 'enum mhi_flags' starts with index 0 and we are using direct AND operation to extract each bit. Fix this by using BIT() macros for defining the flags so that the reset of the code need not be touched. Fixes: 189ff97cca53 ("bus: mhi: core: Add support for data transfer") Reported-by: Dan Carpenter Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200430190555.32741-2-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/mhi.h b/include/linux/mhi.h index ad1996001965..5642806360f3 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -53,9 +53,9 @@ enum mhi_callback { * @MHI_CHAIN: Linked transfer */ enum mhi_flags { - MHI_EOB, - MHI_EOT, - MHI_CHAIN, + MHI_EOB = BIT(0), + MHI_EOT = BIT(1), + MHI_CHAIN = BIT(2), }; /** -- cgit v1.2.3-59-g8ed1b From ce312258084ea0c828e11c831c7b9e8b42b02ef8 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Fri, 1 May 2020 00:35:51 +0530 Subject: bus: mhi: core: Make sure to powerdown if mhi_sync_power_up fails Powerdown is necessary if mhi_sync_power_up fails due to a timeout, to clean up the resources. Otherwise a BUG could be triggered when attempting to clean up MSIs because the IRQ is still active from a request_irq(). Signed-off-by: Jeffrey Hugo Reviewed-by: Hemant Kumar Reviewed-by: Manivannan Sadhasivam Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200430190555.32741-3-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/bus/mhi/core/pm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/bus/mhi/core/pm.c b/drivers/bus/mhi/core/pm.c index 52690cb5c89c..dc83d65f7784 100644 --- a/drivers/bus/mhi/core/pm.c +++ b/drivers/bus/mhi/core/pm.c @@ -902,7 +902,11 @@ int mhi_sync_power_up(struct mhi_controller *mhi_cntrl) MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state), msecs_to_jiffies(mhi_cntrl->timeout_ms)); - return (MHI_IN_MISSION_MODE(mhi_cntrl->ee)) ? 0 : -EIO; + ret = (MHI_IN_MISSION_MODE(mhi_cntrl->ee)) ? 0 : -ETIMEDOUT; + if (ret) + mhi_power_down(mhi_cntrl, false); + + return ret; } EXPORT_SYMBOL(mhi_sync_power_up); -- cgit v1.2.3-59-g8ed1b From 85a087df4a719ebab940efa3c79625e68161f57b Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Fri, 1 May 2020 00:35:52 +0530 Subject: bus: mhi: core: Remove link_status() callback If the MHI core detects invalid data due to a PCI read, it calls into the controller via link_status() to double check that the link is infact down. All in all, this is pretty pointless, and racy. There are no good reasons for this, and only drawbacks. Its pointless because chances are, the controller is going to do the same thing to determine if the link is down - attempt a PCI access and compare the result. This does not make the link status decision any smarter. Its racy because its possible that the link was down at the time of the MHI core access, but then recovered before the controller access. In this case, the controller will indicate the link is not down, and the MHI core will precede to use a bad value as the MHI core does not attempt to retry the access. Retrying the access in the MHI core is a bad idea because again, it is racy - what if the link is down again? Furthermore, there may be some higher level state associated with the link status, that is now invalid because the link went down. The only reason why the MHI core could see "invalid" data when doing a PCI access, that is actually valid, is if the register actually contained the PCI spec defined sentinel for an invalid access. In this case, it is arguable that the MHI implementation broken, and should be fixed, not worked around. Therefore, remove the link_status() callback before anyone attempts to implement it. Signed-off-by: Jeffrey Hugo Reviewed-by: Manivannan Sadhasivam Reviewed-by: Hemant Kumar Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200430190555.32741-4-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/bus/mhi/core/init.c | 6 ++---- drivers/bus/mhi/core/main.c | 5 ++--- include/linux/mhi.h | 2 -- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/bus/mhi/core/init.c b/drivers/bus/mhi/core/init.c index b38359c480ea..2af08d57ec28 100644 --- a/drivers/bus/mhi/core/init.c +++ b/drivers/bus/mhi/core/init.c @@ -812,10 +812,8 @@ int mhi_register_controller(struct mhi_controller *mhi_cntrl, if (!mhi_cntrl) return -EINVAL; - if (!mhi_cntrl->runtime_get || !mhi_cntrl->runtime_put) - return -EINVAL; - - if (!mhi_cntrl->status_cb || !mhi_cntrl->link_status) + if (!mhi_cntrl->runtime_get || !mhi_cntrl->runtime_put || + !mhi_cntrl->status_cb) return -EINVAL; ret = parse_config(mhi_cntrl, config); diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c index 55928feea0c9..f8401535e61a 100644 --- a/drivers/bus/mhi/core/main.c +++ b/drivers/bus/mhi/core/main.c @@ -20,9 +20,8 @@ int __must_check mhi_read_reg(struct mhi_controller *mhi_cntrl, { u32 tmp = readl(base + offset); - /* If there is any unexpected value, query the link status */ - if (PCI_INVALID_READ(tmp) && - mhi_cntrl->link_status(mhi_cntrl)) + /* If the value is invalid, the link is down */ + if (PCI_INVALID_READ(tmp)) return -EIO; *out = tmp; diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 5642806360f3..c80ba559face 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -335,7 +335,6 @@ struct mhi_controller_config { * @syserr_worker: System error worker * @state_event: State change event * @status_cb: CB function to notify power states of the device (required) - * @link_status: CB function to query link status of the device (required) * @wake_get: CB function to assert device wake (optional) * @wake_put: CB function to de-assert device wake (optional) * @wake_toggle: CB function to assert and de-assert device wake (optional) @@ -417,7 +416,6 @@ struct mhi_controller { void (*status_cb)(struct mhi_controller *mhi_cntrl, enum mhi_callback cb); - int (*link_status)(struct mhi_controller *mhi_cntrl); void (*wake_get)(struct mhi_controller *mhi_cntrl, bool override); void (*wake_put)(struct mhi_controller *mhi_cntrl, bool override); void (*wake_toggle)(struct mhi_controller *mhi_cntrl); -- cgit v1.2.3-59-g8ed1b From 45723a44845c90c8e859fd0e2b0bb492322b5d0b Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Fri, 1 May 2020 00:35:53 +0530 Subject: bus: mhi: core: Offload register accesses to the controller When reading or writing MHI registers, the core assumes that the physical link is a memory mapped PCI link. This assumption may not hold for all MHI devices. The controller knows what is the physical link (ie PCI, I2C, SPI, etc), and therefore knows the proper methods to access that link. The controller can also handle link specific error scenarios, such as reading -1 when the PCI link went down. Therefore, it is appropriate that the MHI core requests the controller to make register accesses on behalf of the core, which abstracts the core from link specifics, and end up removing an unnecessary assumption. Signed-off-by: Jeffrey Hugo Reviewed-by: Hemant Kumar Reviewed-by: Manivannan Sadhasivam Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200430190555.32741-5-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/bus/mhi/core/init.c | 3 ++- drivers/bus/mhi/core/internal.h | 3 --- drivers/bus/mhi/core/main.c | 12 ++---------- include/linux/mhi.h | 6 ++++++ 4 files changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/bus/mhi/core/init.c b/drivers/bus/mhi/core/init.c index 2af08d57ec28..eb2ab058a01d 100644 --- a/drivers/bus/mhi/core/init.c +++ b/drivers/bus/mhi/core/init.c @@ -813,7 +813,8 @@ int mhi_register_controller(struct mhi_controller *mhi_cntrl, return -EINVAL; if (!mhi_cntrl->runtime_get || !mhi_cntrl->runtime_put || - !mhi_cntrl->status_cb) + !mhi_cntrl->status_cb || !mhi_cntrl->read_reg || + !mhi_cntrl->write_reg) return -EINVAL; ret = parse_config(mhi_cntrl, config); diff --git a/drivers/bus/mhi/core/internal.h b/drivers/bus/mhi/core/internal.h index 5deadfaa053a..095d95bc0e37 100644 --- a/drivers/bus/mhi/core/internal.h +++ b/drivers/bus/mhi/core/internal.h @@ -11,9 +11,6 @@ extern struct bus_type mhi_bus_type; -/* MHI MMIO register mapping */ -#define PCI_INVALID_READ(val) (val == U32_MAX) - #define MHIREGLEN (0x0) #define MHIREGLEN_MHIREGLEN_MASK (0xFFFFFFFF) #define MHIREGLEN_MHIREGLEN_SHIFT (0) diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c index f8401535e61a..2aceb69f6ce8 100644 --- a/drivers/bus/mhi/core/main.c +++ b/drivers/bus/mhi/core/main.c @@ -18,15 +18,7 @@ int __must_check mhi_read_reg(struct mhi_controller *mhi_cntrl, void __iomem *base, u32 offset, u32 *out) { - u32 tmp = readl(base + offset); - - /* If the value is invalid, the link is down */ - if (PCI_INVALID_READ(tmp)) - return -EIO; - - *out = tmp; - - return 0; + return mhi_cntrl->read_reg(mhi_cntrl, base + offset, out); } int __must_check mhi_read_reg_field(struct mhi_controller *mhi_cntrl, @@ -48,7 +40,7 @@ int __must_check mhi_read_reg_field(struct mhi_controller *mhi_cntrl, void mhi_write_reg(struct mhi_controller *mhi_cntrl, void __iomem *base, u32 offset, u32 val) { - writel(val, base + offset); + mhi_cntrl->write_reg(mhi_cntrl, base + offset, val); } void mhi_write_reg_field(struct mhi_controller *mhi_cntrl, void __iomem *base, diff --git a/include/linux/mhi.h b/include/linux/mhi.h index c80ba559face..84a6c9e72f52 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -342,6 +342,8 @@ struct mhi_controller_config { * @runtimet_put: CB function to decrement pm usage (required) * @map_single: CB function to create TRE buffer * @unmap_single: CB function to destroy TRE buffer + * @read_reg: Read a MHI register via the physical link (required) + * @write_reg: Write a MHI register via the physical link (required) * @buffer_len: Bounce buffer length * @bounce_buf: Use of bounce buffer * @fbc_download: MHI host needs to do complete image transfer (optional) @@ -425,6 +427,10 @@ struct mhi_controller { struct mhi_buf_info *buf); void (*unmap_single)(struct mhi_controller *mhi_cntrl, struct mhi_buf_info *buf); + int (*read_reg)(struct mhi_controller *mhi_cntrl, void __iomem *addr, + u32 *out); + void (*write_reg)(struct mhi_controller *mhi_cntrl, void __iomem *addr, + u32 val); size_t buffer_len; bool bounce_buf; -- cgit v1.2.3-59-g8ed1b From af2e58818082ac0db29539444ca17eb1e77f6000 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Fri, 1 May 2020 00:35:54 +0530 Subject: bus: mhi: core: Fix typo in comment There is a typo - "runtimet" should be "runtime". Fix it. Signed-off-by: Jeffrey Hugo Reviewed-by: Hemant Kumar Reviewed-by: Manivannan Sadhasivam Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200430190555.32741-6-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mhi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 84a6c9e72f52..3d7c3c26eeb9 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -339,7 +339,7 @@ struct mhi_controller_config { * @wake_put: CB function to de-assert device wake (optional) * @wake_toggle: CB function to assert and de-assert device wake (optional) * @runtime_get: CB function to controller runtime resume (required) - * @runtimet_put: CB function to decrement pm usage (required) + * @runtime_put: CB function to decrement pm usage (required) * @map_single: CB function to create TRE buffer * @unmap_single: CB function to destroy TRE buffer * @read_reg: Read a MHI register via the physical link (required) -- cgit v1.2.3-59-g8ed1b From f0e1d3ac2d7c16a5d2c9d67f5a61133db7681af8 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Fri, 1 May 2020 00:35:55 +0530 Subject: bus: mhi: core: Fix channel device name conflict When multiple instances of the same MHI product are present in a system, we can see a splat from mhi_create_devices() - "sysfs: cannot create duplicate filename". This is because the device names assigned to the MHI channel devices are non-unique. They consist of the channel's name, and the channel's pipe id. For identical products, each instance is going to have the same set of channel (both in name and pipe id). To fix this, we prepend the device name of the parent device that the MHI channels belong to. Since different instances of the same product should have unique device names, this makes the MHI channel devices for each product also unique. Additionally, remove the pipe id from the MHI channel device name. This is an internal detail to the MHI product that provides little value, and imposes too much device specific internal details to userspace. It is expected that channel with a specific name (ie "SAHARA") has a specific client, and it does not matter what pipe id that channel is enumerated on. The pipe id is an internal detail between the MHI bus, and the hardware. The client is not expected to make decisions based on the pipe id, and to do so would require the client to have intimate knowledge of the hardware, which is inappropiate as it may violate the layering provided by the MHI bus. The limitation of doing this is that each product may only have one instance of a channel by a unique name. This limitation is appropriate given the usecases of MHI channels. Signed-off-by: Jeffrey Hugo Reviewed-by: Hemant Kumar Reviewed-by: Manivannan Sadhasivam Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200430190555.32741-7-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/bus/mhi/core/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c index 2aceb69f6ce8..97e06cc586e4 100644 --- a/drivers/bus/mhi/core/main.c +++ b/drivers/bus/mhi/core/main.c @@ -327,7 +327,8 @@ void mhi_create_devices(struct mhi_controller *mhi_cntrl) /* Channel name is same for both UL and DL */ mhi_dev->chan_name = mhi_chan->name; - dev_set_name(&mhi_dev->dev, "%04x_%s", mhi_chan->chan, + dev_set_name(&mhi_dev->dev, "%s_%s", + dev_name(mhi_cntrl->cntrl_dev), mhi_dev->chan_name); /* Init wakeup source if available */ -- cgit v1.2.3-59-g8ed1b From 0b80f9866e6bbfb905140ed8787ff2af03652c0c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 4 May 2020 19:27:54 -0400 Subject: iocost: protect iocg->abs_vdebt with iocg->waitq.lock abs_vdebt is an atomic_64 which tracks how much over budget a given cgroup is and controls the activation of use_delay mechanism. Once a cgroup goes over budget from forced IOs, it has to pay it back with its future budget. The progress guarantee on debt paying comes from the iocg being active - active iocgs are processed by the periodic timer, which ensures that as time passes the debts dissipate and the iocg returns to normal operation. However, both iocg activation and vdebt handling are asynchronous and a sequence like the following may happen. 1. The iocg is in the process of being deactivated by the periodic timer. 2. A bio enters ioc_rqos_throttle(), calls iocg_activate() which returns without anything because it still sees that the iocg is already active. 3. The iocg is deactivated. 4. The bio from #2 is over budget but needs to be forced. It increases abs_vdebt and goes over the threshold and enables use_delay. 5. IO control is enabled for the iocg's subtree and now IOs are attributed to the descendant cgroups and the iocg itself no longer issues IOs. This leaves the iocg with stuck abs_vdebt - it has debt but inactive and no further IOs which can activate it. This can end up unduly punishing all the descendants cgroups. The usual throttling path has the same issue - the iocg must be active while throttled to ensure that future event will wake it up - and solves the problem by synchronizing the throttling path with a spinlock. abs_vdebt handling is another form of overage handling and shares a lot of characteristics including the fact that it isn't in the hottest path. This patch fixes the above and other possible races by strictly synchronizing abs_vdebt and use_delay handling with iocg->waitq.lock. Signed-off-by: Tejun Heo Reported-by: Vlad Dmitriev Cc: stable@vger.kernel.org # v5.4+ Fixes: e1518f63f246 ("blk-iocost: Don't let merges push vtime into the future") Signed-off-by: Jens Axboe --- block/blk-iocost.c | 117 +++++++++++++++++++++++++---------------- tools/cgroup/iocost_monitor.py | 7 ++- 2 files changed, 77 insertions(+), 47 deletions(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 3ab0c1c704b6..7c1fe605d0d6 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -466,7 +466,7 @@ struct ioc_gq { */ atomic64_t vtime; atomic64_t done_vtime; - atomic64_t abs_vdebt; + u64 abs_vdebt; u64 last_vtime; /* @@ -1142,7 +1142,7 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now) struct iocg_wake_ctx ctx = { .iocg = iocg }; u64 margin_ns = (u64)(ioc->period_us * WAITQ_TIMER_MARGIN_PCT / 100) * NSEC_PER_USEC; - u64 abs_vdebt, vdebt, vshortage, expires, oexpires; + u64 vdebt, vshortage, expires, oexpires; s64 vbudget; u32 hw_inuse; @@ -1152,18 +1152,15 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now) vbudget = now->vnow - atomic64_read(&iocg->vtime); /* pay off debt */ - abs_vdebt = atomic64_read(&iocg->abs_vdebt); - vdebt = abs_cost_to_cost(abs_vdebt, hw_inuse); + vdebt = abs_cost_to_cost(iocg->abs_vdebt, hw_inuse); if (vdebt && vbudget > 0) { u64 delta = min_t(u64, vbudget, vdebt); u64 abs_delta = min(cost_to_abs_cost(delta, hw_inuse), - abs_vdebt); + iocg->abs_vdebt); atomic64_add(delta, &iocg->vtime); atomic64_add(delta, &iocg->done_vtime); - atomic64_sub(abs_delta, &iocg->abs_vdebt); - if (WARN_ON_ONCE(atomic64_read(&iocg->abs_vdebt) < 0)) - atomic64_set(&iocg->abs_vdebt, 0); + iocg->abs_vdebt -= abs_delta; } /* @@ -1219,12 +1216,18 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost) u64 expires, oexpires; u32 hw_inuse; + lockdep_assert_held(&iocg->waitq.lock); + /* debt-adjust vtime */ current_hweight(iocg, NULL, &hw_inuse); - vtime += abs_cost_to_cost(atomic64_read(&iocg->abs_vdebt), hw_inuse); + vtime += abs_cost_to_cost(iocg->abs_vdebt, hw_inuse); - /* clear or maintain depending on the overage */ - if (time_before_eq64(vtime, now->vnow)) { + /* + * Clear or maintain depending on the overage. Non-zero vdebt is what + * guarantees that @iocg is online and future iocg_kick_delay() will + * clear use_delay. Don't leave it on when there's no vdebt. + */ + if (!iocg->abs_vdebt || time_before_eq64(vtime, now->vnow)) { blkcg_clear_delay(blkg); return false; } @@ -1258,9 +1261,12 @@ static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer) { struct ioc_gq *iocg = container_of(timer, struct ioc_gq, delay_timer); struct ioc_now now; + unsigned long flags; + spin_lock_irqsave(&iocg->waitq.lock, flags); ioc_now(iocg->ioc, &now); iocg_kick_delay(iocg, &now, 0); + spin_unlock_irqrestore(&iocg->waitq.lock, flags); return HRTIMER_NORESTART; } @@ -1368,14 +1374,13 @@ static void ioc_timer_fn(struct timer_list *timer) * should have woken up in the last period and expire idle iocgs. */ list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) { - if (!waitqueue_active(&iocg->waitq) && - !atomic64_read(&iocg->abs_vdebt) && !iocg_is_idle(iocg)) + if (!waitqueue_active(&iocg->waitq) && iocg->abs_vdebt && + !iocg_is_idle(iocg)) continue; spin_lock(&iocg->waitq.lock); - if (waitqueue_active(&iocg->waitq) || - atomic64_read(&iocg->abs_vdebt)) { + if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt) { /* might be oversleeping vtime / hweight changes, kick */ iocg_kick_waitq(iocg, &now); iocg_kick_delay(iocg, &now, 0); @@ -1718,28 +1723,49 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) * tests are racy but the races aren't systemic - we only miss once * in a while which is fine. */ - if (!waitqueue_active(&iocg->waitq) && - !atomic64_read(&iocg->abs_vdebt) && + if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt && time_before_eq64(vtime + cost, now.vnow)) { iocg_commit_bio(iocg, bio, cost); return; } /* - * We're over budget. If @bio has to be issued regardless, - * remember the abs_cost instead of advancing vtime. - * iocg_kick_waitq() will pay off the debt before waking more IOs. + * We activated above but w/o any synchronization. Deactivation is + * synchronized with waitq.lock and we won't get deactivated as long + * as we're waiting or has debt, so we're good if we're activated + * here. In the unlikely case that we aren't, just issue the IO. + */ + spin_lock_irq(&iocg->waitq.lock); + + if (unlikely(list_empty(&iocg->active_list))) { + spin_unlock_irq(&iocg->waitq.lock); + iocg_commit_bio(iocg, bio, cost); + return; + } + + /* + * We're over budget. If @bio has to be issued regardless, remember + * the abs_cost instead of advancing vtime. iocg_kick_waitq() will pay + * off the debt before waking more IOs. + * * This way, the debt is continuously paid off each period with the - * actual budget available to the cgroup. If we just wound vtime, - * we would incorrectly use the current hw_inuse for the entire - * amount which, for example, can lead to the cgroup staying - * blocked for a long time even with substantially raised hw_inuse. + * actual budget available to the cgroup. If we just wound vtime, we + * would incorrectly use the current hw_inuse for the entire amount + * which, for example, can lead to the cgroup staying blocked for a + * long time even with substantially raised hw_inuse. + * + * An iocg with vdebt should stay online so that the timer can keep + * deducting its vdebt and [de]activate use_delay mechanism + * accordingly. We don't want to race against the timer trying to + * clear them and leave @iocg inactive w/ dangling use_delay heavily + * penalizing the cgroup and its descendants. */ if (bio_issue_as_root_blkg(bio) || fatal_signal_pending(current)) { - atomic64_add(abs_cost, &iocg->abs_vdebt); + iocg->abs_vdebt += abs_cost; if (iocg_kick_delay(iocg, &now, cost)) blkcg_schedule_throttle(rqos->q, (bio->bi_opf & REQ_SWAP) == REQ_SWAP); + spin_unlock_irq(&iocg->waitq.lock); return; } @@ -1756,20 +1782,6 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) * All waiters are on iocg->waitq and the wait states are * synchronized using waitq.lock. */ - spin_lock_irq(&iocg->waitq.lock); - - /* - * We activated above but w/o any synchronization. Deactivation is - * synchronized with waitq.lock and we won't get deactivated as - * long as we're waiting, so we're good if we're activated here. - * In the unlikely case that we are deactivated, just issue the IO. - */ - if (unlikely(list_empty(&iocg->active_list))) { - spin_unlock_irq(&iocg->waitq.lock); - iocg_commit_bio(iocg, bio, cost); - return; - } - init_waitqueue_func_entry(&wait.wait, iocg_wake_fn); wait.wait.private = current; wait.bio = bio; @@ -1801,6 +1813,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq, struct ioc_now now; u32 hw_inuse; u64 abs_cost, cost; + unsigned long flags; /* bypass if disabled or for root cgroup */ if (!ioc->enabled || !iocg->level) @@ -1820,15 +1833,28 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq, iocg->cursor = bio_end; /* - * Charge if there's enough vtime budget and the existing request - * has cost assigned. Otherwise, account it as debt. See debt - * handling in ioc_rqos_throttle() for details. + * Charge if there's enough vtime budget and the existing request has + * cost assigned. */ if (rq->bio && rq->bio->bi_iocost_cost && - time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow)) + time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow)) { iocg_commit_bio(iocg, bio, cost); - else - atomic64_add(abs_cost, &iocg->abs_vdebt); + return; + } + + /* + * Otherwise, account it as debt if @iocg is online, which it should + * be for the vast majority of cases. See debt handling in + * ioc_rqos_throttle() for details. + */ + spin_lock_irqsave(&iocg->waitq.lock, flags); + if (likely(!list_empty(&iocg->active_list))) { + iocg->abs_vdebt += abs_cost; + iocg_kick_delay(iocg, &now, cost); + } else { + iocg_commit_bio(iocg, bio, cost); + } + spin_unlock_irqrestore(&iocg->waitq.lock, flags); } static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio) @@ -1998,7 +2024,6 @@ static void ioc_pd_init(struct blkg_policy_data *pd) iocg->ioc = ioc; atomic64_set(&iocg->vtime, now.vnow); atomic64_set(&iocg->done_vtime, now.vnow); - atomic64_set(&iocg->abs_vdebt, 0); atomic64_set(&iocg->active_period, atomic64_read(&ioc->cur_period)); INIT_LIST_HEAD(&iocg->active_list); iocg->hweight_active = HWEIGHT_WHOLE; diff --git a/tools/cgroup/iocost_monitor.py b/tools/cgroup/iocost_monitor.py index 7427a5ee761b..9d8e9613008a 100644 --- a/tools/cgroup/iocost_monitor.py +++ b/tools/cgroup/iocost_monitor.py @@ -159,7 +159,12 @@ class IocgStat: else: self.inflight_pct = 0 - self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000 + # vdebt used to be an atomic64_t and is now u64, support both + try: + self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000 + except: + self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000 + self.use_delay = blkg.use_delay.counter.value_() self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000 -- cgit v1.2.3-59-g8ed1b From 5fe89a6acd668cbd1817fcdef5caa9fee568c2e8 Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Tue, 14 Apr 2020 15:02:55 -0400 Subject: drm: Fix HDCP failures when SRM fw is missing The SRM cleanup in 79643fddd6eb2 ("drm/hdcp: optimizing the srm handling") inadvertently altered the behavior of HDCP auth when the SRM firmware is missing. Before that patch, missing SRM was interpreted as the device having no revoked keys. With that patch, if the SRM fw file is missing we reject _all_ keys. This patch fixes that regression by returning success if the file cannot be found. It also checks the return value from request_srm such that we won't end up trying to parse the ksv list if there is an error fetching it. Fixes: 79643fddd6eb ("drm/hdcp: optimizing the srm handling") Cc: stable@vger.kernel.org Cc: Ramalingam C Cc: Sean Paul Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Reviewed-by: Ramalingam C Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20200414190258.38873-1-sean@poorly.run Changes in v2: -Noticed a couple other things to clean up Reviewed-by: Ramalingam C --- drivers/gpu/drm/drm_hdcp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_hdcp.c b/drivers/gpu/drm/drm_hdcp.c index 7f386adcf872..910108ccaae1 100644 --- a/drivers/gpu/drm/drm_hdcp.c +++ b/drivers/gpu/drm/drm_hdcp.c @@ -241,8 +241,12 @@ static int drm_hdcp_request_srm(struct drm_device *drm_dev, ret = request_firmware_direct(&fw, (const char *)fw_name, drm_dev->dev); - if (ret < 0) + if (ret < 0) { + *revoked_ksv_cnt = 0; + *revoked_ksv_list = NULL; + ret = 0; goto exit; + } if (fw->size && fw->data) ret = drm_hdcp_srm_update(fw->data, fw->size, revoked_ksv_list, @@ -287,6 +291,8 @@ int drm_hdcp_check_ksvs_revoked(struct drm_device *drm_dev, u8 *ksvs, ret = drm_hdcp_request_srm(drm_dev, &revoked_ksv_list, &revoked_ksv_cnt); + if (ret) + return ret; /* revoked_ksv_cnt will be zero when above function failed */ for (i = 0; i < revoked_ksv_cnt; i++) -- cgit v1.2.3-59-g8ed1b From 7f13657d141346125f4d0bb93eab4777f40c406e Mon Sep 17 00:00:00 2001 From: Xiaoguang Wang Date: Tue, 5 May 2020 16:28:53 +0800 Subject: io_uring: handle -EFAULT properly in io_uring_setup() If copy_to_user() in io_uring_setup() failed, we'll leak many kernel resources, which will be recycled until process terminates. This bug can be reproduced by using mprotect to set params to PROT_READ. To fix this issue, refactor io_uring_create() a bit to add a new 'struct io_uring_params __user *params' parameter and move the copy_to_user() in io_uring_setup() to io_uring_setup(), if copy_to_user() failed, we can free kernel resource properly. Suggested-by: Jens Axboe Signed-off-by: Xiaoguang Wang Signed-off-by: Jens Axboe --- fs/io_uring.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 6d52ff98279d..dd680eb153cb 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7760,7 +7760,8 @@ err: return ret; } -static int io_uring_create(unsigned entries, struct io_uring_params *p) +static int io_uring_create(unsigned entries, struct io_uring_params *p, + struct io_uring_params __user *params) { struct user_struct *user = NULL; struct io_ring_ctx *ctx; @@ -7852,6 +7853,14 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p) p->cq_off.overflow = offsetof(struct io_rings, cq_overflow); p->cq_off.cqes = offsetof(struct io_rings, cqes); + p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP | + IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS | + IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL; + + if (copy_to_user(params, p, sizeof(*p))) { + ret = -EFAULT; + goto err; + } /* * Install ring fd as the very last thing, so we don't risk someone * having closed it before we finish setup @@ -7860,9 +7869,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p) if (ret < 0) goto err; - p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP | - IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS | - IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL; trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags); return ret; err: @@ -7878,7 +7884,6 @@ err: static long io_uring_setup(u32 entries, struct io_uring_params __user *params) { struct io_uring_params p; - long ret; int i; if (copy_from_user(&p, params, sizeof(p))) @@ -7893,14 +7898,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params) IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ)) return -EINVAL; - ret = io_uring_create(entries, &p); - if (ret < 0) - return ret; - - if (copy_to_user(params, &p, sizeof(p))) - return -EFAULT; - - return ret; + return io_uring_create(entries, &p, params); } SYSCALL_DEFINE2(io_uring_setup, u32, entries, -- cgit v1.2.3-59-g8ed1b From b95e51eb9f2ee7b6d6c3203a2f75122349aa77be Mon Sep 17 00:00:00 2001 From: Sung Lee Date: Mon, 20 Apr 2020 11:38:30 -0400 Subject: drm/amd/display: Update DCN2.1 DV Code Revision [WHY & HOW] There is a problem in hscale_pixel_rate, the bug causes DCN to be more optimistic (more likely to underflow) in upscale cases during prefetch. This commit ports the fix from DV code to address these issues. Signed-off-by: Sung Lee Reviewed-by: Dmytro Laktyushkin Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c index a38baa73d484..b8ec08e3b7a3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c @@ -1200,7 +1200,7 @@ static void dml_rq_dlg_get_dlg_params( min_hratio_fact_l = 1.0; min_hratio_fact_c = 1.0; - if (htaps_l <= 1) + if (hratio_l <= 1) min_hratio_fact_l = 2.0; else if (htaps_l <= 6) { if ((hratio_l * 2.0) > 4.0) @@ -1216,7 +1216,7 @@ static void dml_rq_dlg_get_dlg_params( hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz; - if (htaps_c <= 1) + if (hratio_c <= 1) min_hratio_fact_c = 2.0; else if (htaps_c <= 6) { if ((hratio_c * 2.0) > 4.0) @@ -1522,8 +1522,8 @@ static void dml_rq_dlg_get_dlg_params( disp_dlg_regs->refcyc_per_vm_group_vblank = get_refcyc_per_vm_group_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; disp_dlg_regs->refcyc_per_vm_group_flip = get_refcyc_per_vm_group_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; - disp_dlg_regs->refcyc_per_vm_req_vblank = get_refcyc_per_vm_req_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; - disp_dlg_regs->refcyc_per_vm_req_flip = get_refcyc_per_vm_req_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; + disp_dlg_regs->refcyc_per_vm_req_vblank = get_refcyc_per_vm_req_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10); + disp_dlg_regs->refcyc_per_vm_req_flip = get_refcyc_per_vm_req_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10); // Clamp to max for now if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)dml_pow(2, 23)) -- cgit v1.2.3-59-g8ed1b From 80797dd6f1a525d1160c463d6a9f9d29af182cbb Mon Sep 17 00:00:00 2001 From: Roman Li Date: Wed, 26 Feb 2020 17:30:29 -0500 Subject: drm/amd/display: fix counter in wait_for_no_pipes_pending [Why] Wait counter is not being reset for each pipe. [How] Move counter reset into pipe loop scope. Signed-off-by: Roman Li Reviewed-by: Zhan Liu Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 8489f1e56892..47431ca6986d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -834,11 +834,10 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) static void wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context) { int i; - int count = 0; - struct pipe_ctx *pipe; PERF_TRACE(); for (i = 0; i < MAX_PIPES; i++) { - pipe = &context->res_ctx.pipe_ctx[i]; + int count = 0; + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; if (!pipe->plane_state) continue; -- cgit v1.2.3-59-g8ed1b From e6142dd511425cb827b5db869f489eb81f5f994d Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Wed, 22 Apr 2020 14:37:33 -0400 Subject: drm/amd/display: Prevent dpcd reads with passive dongles [why] During hotplug, a DP port may be connected to the sink through passive adapter which does not support DPCD reads. Issuing reads without checking for this condition will result in errors [how] Ensure the link is in aux_mode before initiating operation that result in a DPCD read. Signed-off-by: Aurabindo Pillai Reviewed-by: Harry Wentland Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 94c29b74c086..9c83c1303f08 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2008,17 +2008,22 @@ void amdgpu_dm_update_connector_after_detect( dc_sink_retain(aconnector->dc_sink); if (sink->dc_edid.length == 0) { aconnector->edid = NULL; - drm_dp_cec_unset_edid(&aconnector->dm_dp_aux.aux); + if (aconnector->dc_link->aux_mode) { + drm_dp_cec_unset_edid( + &aconnector->dm_dp_aux.aux); + } } else { aconnector->edid = - (struct edid *) sink->dc_edid.raw_edid; - + (struct edid *)sink->dc_edid.raw_edid; drm_connector_update_edid_property(connector, - aconnector->edid); - drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux, - aconnector->edid); + aconnector->edid); + + if (aconnector->dc_link->aux_mode) + drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux, + aconnector->edid); } + amdgpu_dm_update_freesync_caps(connector, aconnector->edid); update_connector_ext_caps(aconnector); } else { -- cgit v1.2.3-59-g8ed1b From 3e104c23816220919ea1b3fd93fabe363c67c484 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 4 May 2020 10:21:23 -0700 Subject: bpf, sockmap: msg_pop_data can incorrecty set an sge length When sk_msg_pop() is called where the pop operation is working on the end of a sge element and there is no additional trailing data and there _is_ data in front of pop, like the following case, |____________a_____________|__pop__| We have out of order operations where we incorrectly set the pop variable so that instead of zero'ing pop we incorrectly leave it untouched, effectively. This can cause later logic to shift the buffers around believing it should pop extra space. The result is we have 'popped' more data then we expected potentially breaking program logic. It took us a while to hit this case because typically we pop headers which seem to rarely be at the end of a scatterlist elements but we can't rely on this. Fixes: 7246d8ed4dcce ("bpf: helper to pop data from messages") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/158861288359.14306.7654891716919968144.stgit@john-Precision-5820-Tower --- net/core/filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index 7d6ceaa54d21..5cc9276f1023 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2590,8 +2590,8 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, } pop = 0; } else if (pop >= sge->length - a) { - sge->length = a; pop -= (sge->length - a); + sge->length = a; } } -- cgit v1.2.3-59-g8ed1b From 81aabbb9fb7b4b1efd073b62f0505d3adad442f3 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 4 May 2020 10:21:44 -0700 Subject: bpf, sockmap: bpf_tcp_ingress needs to subtract bytes from sg.size In bpf_tcp_ingress we used apply_bytes to subtract bytes from sg.size which is used to track total bytes in a message. But this is not correct because apply_bytes is itself modified in the main loop doing the mem_charge. Then at the end of this we have sg.size incorrectly set and out of sync with actual sk values. Then we can get a splat if we try to cork the data later and again try to redirect the msg to ingress. To fix instead of trying to track msg.size do the easy thing and include it as part of the sk_msg_xfer logic so that when the msg is moved the sg.size is always correct. To reproduce the below users will need ingress + cork and hit an error path that will then try to 'free' the skmsg. [ 173.699981] BUG: KASAN: null-ptr-deref in sk_msg_free_elem+0xdd/0x120 [ 173.699987] Read of size 8 at addr 0000000000000008 by task test_sockmap/5317 [ 173.700000] CPU: 2 PID: 5317 Comm: test_sockmap Tainted: G I 5.7.0-rc1+ #43 [ 173.700005] Hardware name: Dell Inc. Precision 5820 Tower/002KVM, BIOS 1.9.2 01/24/2019 [ 173.700009] Call Trace: [ 173.700021] dump_stack+0x8e/0xcb [ 173.700029] ? sk_msg_free_elem+0xdd/0x120 [ 173.700034] ? sk_msg_free_elem+0xdd/0x120 [ 173.700042] __kasan_report+0x102/0x15f [ 173.700052] ? sk_msg_free_elem+0xdd/0x120 [ 173.700060] kasan_report+0x32/0x50 [ 173.700070] sk_msg_free_elem+0xdd/0x120 [ 173.700080] __sk_msg_free+0x87/0x150 [ 173.700094] tcp_bpf_send_verdict+0x179/0x4f0 [ 173.700109] tcp_bpf_sendpage+0x3ce/0x5d0 Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/158861290407.14306.5327773422227552482.stgit@john-Precision-5820-Tower --- include/linux/skmsg.h | 1 + net/ipv4/tcp_bpf.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 8a709f63c5e5..ad31c9fb7158 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -187,6 +187,7 @@ static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src, dst->sg.data[which] = src->sg.data[which]; dst->sg.data[which].length = size; dst->sg.size += size; + src->sg.size -= size; src->sg.data[which].length -= size; src->sg.data[which].offset += size; } diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index ff96466ea6da..629aaa9a1eb9 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -125,7 +125,6 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock, if (!ret) { msg->sg.start = i; - msg->sg.size -= apply_bytes; sk_psock_queue_msg(psock, tmp); sk_psock_data_ready(sk, psock); } else { -- cgit v1.2.3-59-g8ed1b From 73cb8e2a5863ccc5215660f5123db621bd57dff7 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Sun, 3 May 2020 21:03:19 -0700 Subject: RISC-V: Remove unused code from STRICT_KERNEL_RWX This patch removes the unused functions set_kernel_text_rw/ro. Currently, it is not being invoked from anywhere and no other architecture (except arm) uses this code. Even in ARM, these functions are not invoked from anywhere currently. Fixes: d27c3c90817e ("riscv: add STRICT_KERNEL_RWX support") Signed-off-by: Atish Patra Reviewed-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/set_memory.h | 8 -------- arch/riscv/mm/init.c | 16 ---------------- 2 files changed, 24 deletions(-) diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h index c38df4771c09..4c5bae7ca01c 100644 --- a/arch/riscv/include/asm/set_memory.h +++ b/arch/riscv/include/asm/set_memory.h @@ -22,14 +22,6 @@ static inline int set_memory_x(unsigned long addr, int numpages) { return 0; } static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } #endif -#ifdef CONFIG_STRICT_KERNEL_RWX -void set_kernel_text_ro(void); -void set_kernel_text_rw(void); -#else -static inline void set_kernel_text_ro(void) { } -static inline void set_kernel_text_rw(void) { } -#endif - int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 5b813532db59..27a334106708 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -502,22 +502,6 @@ static inline void setup_vm_final(void) #endif /* CONFIG_MMU */ #ifdef CONFIG_STRICT_KERNEL_RWX -void set_kernel_text_rw(void) -{ - unsigned long text_start = (unsigned long)_text; - unsigned long text_end = (unsigned long)_etext; - - set_memory_rw(text_start, (text_end - text_start) >> PAGE_SHIFT); -} - -void set_kernel_text_ro(void) -{ - unsigned long text_start = (unsigned long)_text; - unsigned long text_end = (unsigned long)_etext; - - set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT); -} - void mark_rodata_ro(void) { unsigned long text_start = (unsigned long)_text; -- cgit v1.2.3-59-g8ed1b From 27abe57770ff1c4c23a60993816f02657ba94bd1 Mon Sep 17 00:00:00 2001 From: Kashyap Chamarthy Date: Tue, 5 May 2020 13:28:39 +0200 Subject: docs/virt/kvm: Document configuring and running nested guests This is a rewrite of this[1] Wiki page with further enhancements. The doc also includes a section on debugging problems in nested environments, among other improvements. [1] https://www.linux-kvm.org/page/Nested_Guests Signed-off-by: Kashyap Chamarthy Message-Id: <20200505112839.30534-1-kchamart@redhat.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/index.rst | 2 + Documentation/virt/kvm/running-nested-guests.rst | 276 +++++++++++++++++++++++ 2 files changed, 278 insertions(+) create mode 100644 Documentation/virt/kvm/running-nested-guests.rst diff --git a/Documentation/virt/kvm/index.rst b/Documentation/virt/kvm/index.rst index dcc252634cf9..b6833c7bb474 100644 --- a/Documentation/virt/kvm/index.rst +++ b/Documentation/virt/kvm/index.rst @@ -28,3 +28,5 @@ KVM arm/index devices/index + + running-nested-guests diff --git a/Documentation/virt/kvm/running-nested-guests.rst b/Documentation/virt/kvm/running-nested-guests.rst new file mode 100644 index 000000000000..d0a1fc754c84 --- /dev/null +++ b/Documentation/virt/kvm/running-nested-guests.rst @@ -0,0 +1,276 @@ +============================== +Running nested guests with KVM +============================== + +A nested guest is the ability to run a guest inside another guest (it +can be KVM-based or a different hypervisor). The straightforward +example is a KVM guest that in turn runs on a KVM guest (the rest of +this document is built on this example):: + + .----------------. .----------------. + | | | | + | L2 | | L2 | + | (Nested Guest) | | (Nested Guest) | + | | | | + |----------------'--'----------------| + | | + | L1 (Guest Hypervisor) | + | KVM (/dev/kvm) | + | | + .------------------------------------------------------. + | L0 (Host Hypervisor) | + | KVM (/dev/kvm) | + |------------------------------------------------------| + | Hardware (with virtualization extensions) | + '------------------------------------------------------' + +Terminology: + +- L0 – level-0; the bare metal host, running KVM + +- L1 – level-1 guest; a VM running on L0; also called the "guest + hypervisor", as it itself is capable of running KVM. + +- L2 – level-2 guest; a VM running on L1, this is the "nested guest" + +.. note:: The above diagram is modelled after the x86 architecture; + s390x, ppc64 and other architectures are likely to have + a different design for nesting. + + For example, s390x always has an LPAR (LogicalPARtition) + hypervisor running on bare metal, adding another layer and + resulting in at least four levels in a nested setup — L0 (bare + metal, running the LPAR hypervisor), L1 (host hypervisor), L2 + (guest hypervisor), L3 (nested guest). + + This document will stick with the three-level terminology (L0, + L1, and L2) for all architectures; and will largely focus on + x86. + + +Use Cases +--------- + +There are several scenarios where nested KVM can be useful, to name a +few: + +- As a developer, you want to test your software on different operating + systems (OSes). Instead of renting multiple VMs from a Cloud + Provider, using nested KVM lets you rent a large enough "guest + hypervisor" (level-1 guest). This in turn allows you to create + multiple nested guests (level-2 guests), running different OSes, on + which you can develop and test your software. + +- Live migration of "guest hypervisors" and their nested guests, for + load balancing, disaster recovery, etc. + +- VM image creation tools (e.g. ``virt-install``, etc) often run + their own VM, and users expect these to work inside a VM. + +- Some OSes use virtualization internally for security (e.g. to let + applications run safely in isolation). + + +Enabling "nested" (x86) +----------------------- + +From Linux kernel v4.19 onwards, the ``nested`` KVM parameter is enabled +by default for Intel and AMD. (Though your Linux distribution might +override this default.) + +In case you are running a Linux kernel older than v4.19, to enable +nesting, set the ``nested`` KVM module parameter to ``Y`` or ``1``. To +persist this setting across reboots, you can add it in a config file, as +shown below: + +1. On the bare metal host (L0), list the kernel modules and ensure that + the KVM modules:: + + $ lsmod | grep -i kvm + kvm_intel 133627 0 + kvm 435079 1 kvm_intel + +2. Show information for ``kvm_intel`` module:: + + $ modinfo kvm_intel | grep -i nested + parm: nested:bool + +3. For the nested KVM configuration to persist across reboots, place the + below in ``/etc/modprobed/kvm_intel.conf`` (create the file if it + doesn't exist):: + + $ cat /etc/modprobe.d/kvm_intel.conf + options kvm-intel nested=y + +4. Unload and re-load the KVM Intel module:: + + $ sudo rmmod kvm-intel + $ sudo modprobe kvm-intel + +5. Verify if the ``nested`` parameter for KVM is enabled:: + + $ cat /sys/module/kvm_intel/parameters/nested + Y + +For AMD hosts, the process is the same as above, except that the module +name is ``kvm-amd``. + + +Additional nested-related kernel parameters (x86) +------------------------------------------------- + +If your hardware is sufficiently advanced (Intel Haswell processor or +higher, which has newer hardware virt extensions), the following +additional features will also be enabled by default: "Shadow VMCS +(Virtual Machine Control Structure)", APIC Virtualization on your bare +metal host (L0). Parameters for Intel hosts:: + + $ cat /sys/module/kvm_intel/parameters/enable_shadow_vmcs + Y + + $ cat /sys/module/kvm_intel/parameters/enable_apicv + Y + + $ cat /sys/module/kvm_intel/parameters/ept + Y + +.. note:: If you suspect your L2 (i.e. nested guest) is running slower, + ensure the above are enabled (particularly + ``enable_shadow_vmcs`` and ``ept``). + + +Starting a nested guest (x86) +----------------------------- + +Once your bare metal host (L0) is configured for nesting, you should be +able to start an L1 guest with:: + + $ qemu-kvm -cpu host [...] + +The above will pass through the host CPU's capabilities as-is to the +gues); or for better live migration compatibility, use a named CPU +model supported by QEMU. e.g.:: + + $ qemu-kvm -cpu Haswell-noTSX-IBRS,vmx=on + +then the guest hypervisor will subsequently be capable of running a +nested guest with accelerated KVM. + + +Enabling "nested" (s390x) +------------------------- + +1. On the host hypervisor (L0), enable the ``nested`` parameter on + s390x:: + + $ rmmod kvm + $ modprobe kvm nested=1 + +.. note:: On s390x, the kernel parameter ``hpage`` is mutually exclusive + with the ``nested`` paramter — i.e. to be able to enable + ``nested``, the ``hpage`` parameter *must* be disabled. + +2. The guest hypervisor (L1) must be provided with the ``sie`` CPU + feature — with QEMU, this can be done by using "host passthrough" + (via the command-line ``-cpu host``). + +3. Now the KVM module can be loaded in the L1 (guest hypervisor):: + + $ modprobe kvm + + +Live migration with nested KVM +------------------------------ + +Migrating an L1 guest, with a *live* nested guest in it, to another +bare metal host, works as of Linux kernel 5.3 and QEMU 4.2.0 for +Intel x86 systems, and even on older versions for s390x. + +On AMD systems, once an L1 guest has started an L2 guest, the L1 guest +should no longer be migrated or saved (refer to QEMU documentation on +"savevm"/"loadvm") until the L2 guest shuts down. Attempting to migrate +or save-and-load an L1 guest while an L2 guest is running will result in +undefined behavior. You might see a ``kernel BUG!`` entry in ``dmesg``, a +kernel 'oops', or an outright kernel panic. Such a migrated or loaded L1 +guest can no longer be considered stable or secure, and must be restarted. +Migrating an L1 guest merely configured to support nesting, while not +actually running L2 guests, is expected to function normally even on AMD +systems but may fail once guests are started. + +Migrating an L2 guest is always expected to succeed, so all the following +scenarios should work even on AMD systems: + +- Migrating a nested guest (L2) to another L1 guest on the *same* bare + metal host. + +- Migrating a nested guest (L2) to another L1 guest on a *different* + bare metal host. + +- Migrating a nested guest (L2) to a bare metal host. + +Reporting bugs from nested setups +----------------------------------- + +Debugging "nested" problems can involve sifting through log files across +L0, L1 and L2; this can result in tedious back-n-forth between the bug +reporter and the bug fixer. + +- Mention that you are in a "nested" setup. If you are running any kind + of "nesting" at all, say so. Unfortunately, this needs to be called + out because when reporting bugs, people tend to forget to even + *mention* that they're using nested virtualization. + +- Ensure you are actually running KVM on KVM. Sometimes people do not + have KVM enabled for their guest hypervisor (L1), which results in + them running with pure emulation or what QEMU calls it as "TCG", but + they think they're running nested KVM. Thus confusing "nested Virt" + (which could also mean, QEMU on KVM) with "nested KVM" (KVM on KVM). + +Information to collect (generic) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The following is not an exhaustive list, but a very good starting point: + + - Kernel, libvirt, and QEMU version from L0 + + - Kernel, libvirt and QEMU version from L1 + + - QEMU command-line of L1 -- when using libvirt, you'll find it here: + ``/var/log/libvirt/qemu/instance.log`` + + - QEMU command-line of L2 -- as above, when using libvirt, get the + complete libvirt-generated QEMU command-line + + - ``cat /sys/cpuinfo`` from L0 + + - ``cat /sys/cpuinfo`` from L1 + + - ``lscpu`` from L0 + + - ``lscpu`` from L1 + + - Full ``dmesg`` output from L0 + + - Full ``dmesg`` output from L1 + +x86-specific info to collect +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Both the below commands, ``x86info`` and ``dmidecode``, should be +available on most Linux distributions with the same name: + + - Output of: ``x86info -a`` from L0 + + - Output of: ``x86info -a`` from L1 + + - Output of: ``dmidecode`` from L0 + + - Output of: ``dmidecode`` from L1 + +s390x-specific info to collect +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Along with the earlier mentioned generic details, the below is +also recommended: + + - ``/proc/sysinfo`` from L1; this will also include the info from L0 -- cgit v1.2.3-59-g8ed1b From c7cb2d650c9e78c03bd2d1c0db89891825f8c0f4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 5 May 2020 20:53:55 -0700 Subject: KVM: VMX: Explicitly clear RFLAGS.CF and RFLAGS.ZF in VM-Exit RSB path Clear CF and ZF in the VM-Exit path after doing __FILL_RETURN_BUFFER so that KVM doesn't interpret clobbered RFLAGS as a VM-Fail. Filling the RSB has always clobbered RFLAGS, its current incarnation just happens clear CF and ZF in the processs. Relying on the macro to clear CF and ZF is extremely fragile, e.g. commit 089dd8e53126e ("x86/speculation: Change FILL_RETURN_BUFFER to work with objtool") tweaks the loop such that the ZF flag is always set. Reported-by: Qian Cai Cc: Rick Edgecombe Cc: Peter Zijlstra (Intel) Cc: Josh Poimboeuf Cc: stable@vger.kernel.org Fixes: f2fde6a5bcfcf ("KVM: VMX: Move RSB stuffing to before the first RET after VM-Exit") Signed-off-by: Sean Christopherson Message-Id: <20200506035355.2242-1-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmenter.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 87f3f24fef37..51d1a82742fd 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -82,6 +82,9 @@ SYM_FUNC_START(vmx_vmexit) /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE + /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */ + or $1, %_ASM_AX + pop %_ASM_AX .Lvmexit_skip_rsb: #endif -- cgit v1.2.3-59-g8ed1b From 139f7425fdf54f054463e7524b9f54c41af8407f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 5 May 2020 09:40:46 -0400 Subject: kvm: x86: Use KVM CPU capabilities to determine CR4 reserved bits Using CPUID data can be useful for the processor compatibility check, but that's it. Using it to compute guest-reserved bits can have both false positives (such as LA57 and UMIP which we are already handling) and false negatives: in particular, with this patch we don't allow anymore a KVM guest to set CR4.PKE when CR4.PKE is clear on the host. Fixes: b9dd21e104bc ("KVM: x86: simplify handling of PKRU") Reported-by: Jim Mattson Tested-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c5835f9cb9ad..8d296e3d0d56 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -926,19 +926,6 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr); __reserved_bits; \ }) -static u64 kvm_host_cr4_reserved_bits(struct cpuinfo_x86 *c) -{ - u64 reserved_bits = __cr4_reserved_bits(cpu_has, c); - - if (kvm_cpu_cap_has(X86_FEATURE_LA57)) - reserved_bits &= ~X86_CR4_LA57; - - if (kvm_cpu_cap_has(X86_FEATURE_UMIP)) - reserved_bits &= ~X86_CR4_UMIP; - - return reserved_bits; -} - static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { if (cr4 & cr4_reserved_bits) @@ -9675,7 +9662,9 @@ int kvm_arch_hardware_setup(void *opaque) if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES)) supported_xss = 0; - cr4_reserved_bits = kvm_host_cr4_reserved_bits(&boot_cpu_data); +#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f) + cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_); +#undef __kvm_cpu_cap_has if (kvm_has_tsc_control) { /* @@ -9707,7 +9696,8 @@ int kvm_arch_check_processor_compat(void *opaque) WARN_ON(!irqs_disabled()); - if (kvm_host_cr4_reserved_bits(c) != cr4_reserved_bits) + if (__cr4_reserved_bits(cpu_has, c) != + __cr4_reserved_bits(cpu_has, &boot_cpu_data)) return -EIO; return ops->check_processor_compatibility(); -- cgit v1.2.3-59-g8ed1b From 8ffdaf9155ebe517cdec5edbcca19ba6e7ee9c3c Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 4 May 2020 18:06:07 -0400 Subject: KVM: selftests: Fix build for evmcs.h I got this error when building kvm selftests: /usr/bin/ld: /home/xz/git/linux/tools/testing/selftests/kvm/libkvm.a(vmx.o):/home/xz/git/linux/tools/testing/selftests/kvm/include/evmcs.h:222: multiple definition of `current_evmcs'; /tmp/cco1G48P.o:/home/xz/git/linux/tools/testing/selftests/kvm/include/evmcs.h:222: first defined here /usr/bin/ld: /home/xz/git/linux/tools/testing/selftests/kvm/libkvm.a(vmx.o):/home/xz/git/linux/tools/testing/selftests/kvm/include/evmcs.h:223: multiple definition of `current_vp_assist'; /tmp/cco1G48P.o:/home/xz/git/linux/tools/testing/selftests/kvm/include/evmcs.h:223: first defined here I think it's because evmcs.h is included both in a test file and a lib file so the structs have multiple declarations when linking. After all it's not a good habit to declare structs in the header files. Cc: Vitaly Kuznetsov Signed-off-by: Peter Xu Message-Id: <20200504220607.99627-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/evmcs.h | 4 ++-- tools/testing/selftests/kvm/lib/x86_64/vmx.c | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/include/evmcs.h b/tools/testing/selftests/kvm/include/evmcs.h index d8f4d6bfe05d..a034438b6266 100644 --- a/tools/testing/selftests/kvm/include/evmcs.h +++ b/tools/testing/selftests/kvm/include/evmcs.h @@ -219,8 +219,8 @@ struct hv_enlightened_vmcs { #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \ (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) -struct hv_enlightened_vmcs *current_evmcs; -struct hv_vp_assist_page *current_vp_assist; +extern struct hv_enlightened_vmcs *current_evmcs; +extern struct hv_vp_assist_page *current_vp_assist; int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id); diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index 6f17f69394be..4ae104f6ce69 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -17,6 +17,9 @@ bool enable_evmcs; +struct hv_enlightened_vmcs *current_evmcs; +struct hv_vp_assist_page *current_vp_assist; + struct eptPageTableEntry { uint64_t readable:1; uint64_t writable:1; -- cgit v1.2.3-59-g8ed1b From 495907ec36def1d28a44e2d1b5a51affe716aacf Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 5 May 2020 11:47:50 -0400 Subject: KVM: X86: Declare KVM_CAP_SET_GUEST_DEBUG properly KVM_CAP_SET_GUEST_DEBUG should be supported for x86 however it's not declared as supported. My wild guess is that userspaces like QEMU are using "#ifdef KVM_CAP_SET_GUEST_DEBUG" to check for the capability instead, but that could be wrong because the compilation host may not be the runtime host. The userspace might still want to keep the old "#ifdef" though to not break the guest debug on old kernels. Signed-off-by: Peter Xu Message-Id: <20200505154750.126300-1-peterx@redhat.com> [Do the same for PPC and s390. - Paolo] Signed-off-by: Paolo Bonzini --- arch/powerpc/kvm/powerpc.c | 1 + arch/s390/kvm/kvm-s390.c | 1 + arch/x86/kvm/x86.c | 1 + 3 files changed, 3 insertions(+) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index e15166b0a16d..ad2f172c26a6 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -521,6 +521,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: case KVM_CAP_IMMEDIATE_EXIT: + case KVM_CAP_SET_GUEST_DEBUG: r = 1; break; case KVM_CAP_PPC_GUEST_DEBUG_SSTEP: diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 5dcf9ff12828..d05bb040fd42 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -545,6 +545,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_AIS: case KVM_CAP_S390_AIS_MIGRATION: case KVM_CAP_S390_VCPU_RESETS: + case KVM_CAP_SET_GUEST_DEBUG: r = 1; break; case KVM_CAP_S390_HPAGE_1M: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8d296e3d0d56..d786c7d27ce5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3372,6 +3372,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_GET_MSR_FEATURES: case KVM_CAP_MSR_PLATFORM_INFO: case KVM_CAP_EXCEPTION_PAYLOAD: + case KVM_CAP_SET_GUEST_DEBUG: r = 1; break; case KVM_CAP_SYNC_REGS: -- cgit v1.2.3-59-g8ed1b From de462e5f10718517bacf2f84c8aa2804567ef7df Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 26 Apr 2020 15:53:30 +0900 Subject: bootconfig: Fix to remove bootconfig data from initrd while boot If there is a bootconfig data in the tail of initrd/initramfs, initrd image sanity check caused an error while decompression stage as follows. [ 0.883882] Unpacking initramfs... [ 2.696429] Initramfs unpacking failed: invalid magic at start of compressed archive This error will be ignored if CONFIG_BLK_DEV_RAM=n, but CONFIG_BLK_DEV_RAM=y the kernel failed to mount rootfs and causes a panic. To fix this issue, shrink down the initrd_end for removing tailing bootconfig data while boot the kernel. Link: http://lkml.kernel.org/r/158788401014.24243.17424755854115077915.stgit@devnote2 Cc: Borislav Petkov Cc: Kees Cook Cc: Ingo Molnar Cc: Andrew Morton Cc: stable@vger.kernel.org Fixes: 7684b8582c24 ("bootconfig: Load boot config from the tail of initrd") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- init/main.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 17 deletions(-) diff --git a/init/main.c b/init/main.c index a48617f2e5e5..1a5da2c2660c 100644 --- a/init/main.c +++ b/init/main.c @@ -257,6 +257,47 @@ static int __init loglevel(char *str) early_param("loglevel", loglevel); +#ifdef CONFIG_BLK_DEV_INITRD +static void * __init get_boot_config_from_initrd(u32 *_size, u32 *_csum) +{ + u32 size, csum; + char *data; + u32 *hdr; + + if (!initrd_end) + return NULL; + + data = (char *)initrd_end - BOOTCONFIG_MAGIC_LEN; + if (memcmp(data, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN)) + return NULL; + + hdr = (u32 *)(data - 8); + size = hdr[0]; + csum = hdr[1]; + + data = ((void *)hdr) - size; + if ((unsigned long)data < initrd_start) { + pr_err("bootconfig size %d is greater than initrd size %ld\n", + size, initrd_end - initrd_start); + return NULL; + } + + /* Remove bootconfig from initramfs/initrd */ + initrd_end = (unsigned long)data; + if (_size) + *_size = size; + if (_csum) + *_csum = csum; + + return data; +} +#else +static void * __init get_boot_config_from_initrd(u32 *_size, u32 *_csum) +{ + return NULL; +} +#endif + #ifdef CONFIG_BOOT_CONFIG char xbc_namebuf[XBC_KEYLEN_MAX] __initdata; @@ -357,9 +398,12 @@ static void __init setup_boot_config(const char *cmdline) int pos; u32 size, csum; char *data, *copy; - u32 *hdr; int ret; + data = get_boot_config_from_initrd(&size, &csum); + if (!data) + goto not_found; + strlcpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE); parse_args("bootconfig", tmp_cmdline, NULL, 0, 0, 0, NULL, bootconfig_params); @@ -367,27 +411,12 @@ static void __init setup_boot_config(const char *cmdline) if (!bootconfig_found) return; - if (!initrd_end) - goto not_found; - - data = (char *)initrd_end - BOOTCONFIG_MAGIC_LEN; - if (memcmp(data, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN)) - goto not_found; - - hdr = (u32 *)(data - 8); - size = hdr[0]; - csum = hdr[1]; - if (size >= XBC_DATA_MAX) { pr_err("bootconfig size %d greater than max size %d\n", size, XBC_DATA_MAX); return; } - data = ((void *)hdr) - size; - if ((unsigned long)data < initrd_start) - goto not_found; - if (boot_config_checksum((unsigned char *)data, size) != csum) { pr_err("bootconfig checksum failed\n"); return; @@ -420,8 +449,14 @@ static void __init setup_boot_config(const char *cmdline) not_found: pr_err("'bootconfig' found on command line, but no bootconfig found\n"); } + #else -#define setup_boot_config(cmdline) do { } while (0) + +static void __init setup_boot_config(const char *cmdline) +{ + /* Remove bootconfig data from initrd */ + get_boot_config_from_initrd(NULL, NULL); +} static int __init warn_bootconfig(char *str) { -- cgit v1.2.3-59-g8ed1b From dcbd21c9fca5e954fd4e3d91884907eb6d47187e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 25 Apr 2020 14:49:09 +0900 Subject: tracing/kprobes: Fix a double initialization typo Fix a typo that resulted in an unnecessary double initialization to addr. Link: http://lkml.kernel.org/r/158779374968.6082.2337484008464939919.stgit@devnote2 Cc: Tom Zanussi Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: c7411a1a126f ("tracing/kprobe: Check whether the non-suffixed symbol is notrace") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index d0568af4a0ef..0d9300c3b084 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -453,7 +453,7 @@ static bool __within_notrace_func(unsigned long addr) static bool within_notrace_func(struct trace_kprobe *tk) { - unsigned long addr = addr = trace_kprobe_address(tk); + unsigned long addr = trace_kprobe_address(tk); char symname[KSYM_NAME_LEN], *p; if (!__within_notrace_func(addr)) -- cgit v1.2.3-59-g8ed1b From da0f1f4167e3af69e1d8b32d6d65195ddd2bfb64 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 25 Apr 2020 14:49:17 +0900 Subject: tracing/boottime: Fix kprobe event API usage Fix boottime kprobe events to use API correctly for multiple events. For example, when we set a multiprobe kprobe events in bootconfig like below, ftrace.event.kprobes.myevent { probes = "vfs_read $arg1 $arg2", "vfs_write $arg1 $arg2" } This cause an error; trace_boot: Failed to add probe: p:kprobes/myevent (null) vfs_read $arg1 $arg2 vfs_write $arg1 $arg2 This shows the 1st argument becomes NULL and multiprobes are merged to 1 probe. Link: http://lkml.kernel.org/r/158779375766.6082.201939936008972838.stgit@devnote2 Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: 29a154810546 ("tracing: Change trace_boot to use kprobe_event interface") Reviewed-by: Tom Zanussi Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_boot.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 06d7feb5255f..9de29bb45a27 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -95,24 +95,20 @@ trace_boot_add_kprobe_event(struct xbc_node *node, const char *event) struct xbc_node *anode; char buf[MAX_BUF_LEN]; const char *val; - int ret; + int ret = 0; - kprobe_event_cmd_init(&cmd, buf, MAX_BUF_LEN); + xbc_node_for_each_array_value(node, "probes", anode, val) { + kprobe_event_cmd_init(&cmd, buf, MAX_BUF_LEN); - ret = kprobe_event_gen_cmd_start(&cmd, event, NULL); - if (ret) - return ret; + ret = kprobe_event_gen_cmd_start(&cmd, event, val); + if (ret) + break; - xbc_node_for_each_array_value(node, "probes", anode, val) { - ret = kprobe_event_add_field(&cmd, val); + ret = kprobe_event_gen_cmd_end(&cmd); if (ret) - return ret; + pr_err("Failed to add probe: %s\n", buf); } - ret = kprobe_event_gen_cmd_end(&cmd); - if (ret) - pr_err("Failed to add probe: %s\n", buf); - return ret; } #else -- cgit v1.2.3-59-g8ed1b From 5b4dcd2d201a395ad4054067bfae4a07554fbd65 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 25 Apr 2020 14:49:26 +0900 Subject: tracing/kprobes: Reject new event if loc is NULL Reject the new event which has NULL location for kprobes. For kprobes, user must specify at least the location. Link: http://lkml.kernel.org/r/158779376597.6082.1411212055469099461.stgit@devnote2 Cc: Tom Zanussi Cc: Ingo Molnar Cc: stable@vger.kernel.org Fixes: 2a588dd1d5d6 ("tracing: Add kprobe event command generation functions") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 0d9300c3b084..35989383ae11 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -940,6 +940,9 @@ EXPORT_SYMBOL_GPL(kprobe_event_cmd_init); * complete command or only the first part of it; in the latter case, * kprobe_event_add_fields() can be used to add more fields following this. * + * Unlikely the synth_event_gen_cmd_start(), @loc must be specified. This + * returns -EINVAL if @loc == NULL. + * * Return: 0 if successful, error otherwise. */ int __kprobe_event_gen_cmd_start(struct dynevent_cmd *cmd, bool kretprobe, @@ -953,6 +956,9 @@ int __kprobe_event_gen_cmd_start(struct dynevent_cmd *cmd, bool kretprobe, if (cmd->type != DYNEVENT_TYPE_KPROBE) return -EINVAL; + if (!loc) + return -EINVAL; + if (kretprobe) snprintf(buf, MAX_EVENT_NAME_LEN, "r:kprobes/%s", name); else -- cgit v1.2.3-59-g8ed1b From 4457a9db2bdec2360ddb15242341696108167886 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 4 May 2020 10:58:28 +0300 Subject: drm/i915/tgl+: Fix interrupt handling for DP AUX transactions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unmask/enable AUX interrupts on all ports on TGL+. So far the interrupts worked only on port A, which meant each transaction on other ports took 10ms. Cc: # v5.4+ Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200504075828.20348-1-imre.deak@intel.com (cherry picked from commit 054318c7e35f1d7d06b216143fff5f32405047ee) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_irq.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index d91557d842dc..8a2b83807ffc 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3361,7 +3361,7 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) u32 de_pipe_masked = gen8_de_pipe_fault_mask(dev_priv) | GEN8_PIPE_CDCLK_CRC_DONE; u32 de_pipe_enables; - u32 de_port_masked = GEN8_AUX_CHANNEL_A; + u32 de_port_masked = gen8_de_port_aux_mask(dev_priv); u32 de_port_enables; u32 de_misc_masked = GEN8_DE_EDP_PSR; enum pipe pipe; @@ -3369,18 +3369,8 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) <= 10) de_misc_masked |= GEN8_DE_MISC_GSE; - if (INTEL_GEN(dev_priv) >= 9) { - de_port_masked |= GEN9_AUX_CHANNEL_B | GEN9_AUX_CHANNEL_C | - GEN9_AUX_CHANNEL_D; - if (IS_GEN9_LP(dev_priv)) - de_port_masked |= BXT_DE_PORT_GMBUS; - } - - if (INTEL_GEN(dev_priv) >= 11) - de_port_masked |= ICL_AUX_CHANNEL_E; - - if (IS_CNL_WITH_PORT_F(dev_priv) || INTEL_GEN(dev_priv) >= 11) - de_port_masked |= CNL_AUX_CHANNEL_F; + if (IS_GEN9_LP(dev_priv)) + de_port_masked |= BXT_DE_PORT_GMBUS; de_pipe_enables = de_pipe_masked | GEN8_PIPE_VBLANK | GEN8_PIPE_FIFO_UNDERRUN; -- cgit v1.2.3-59-g8ed1b From ac9155842829a811c12d3e1868a133fdb8300df0 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 22 Apr 2020 14:14:30 -0500 Subject: gfs2: fix withdraw sequence deadlock After a gfs2 file system withdraw, any attempt to read metadata is automatically rejected by function gfs2_meta_read() except for reads of the journal inode. This turns out to be a problem because function signal_our_withdraw() repeatedly calls check_journal_clean() which reads the metadata (both its dinode and indirect blocks) to see if the entire journal is mapped. The dinode read works, but reading the indirect blocks returns -EIO which gets sent back up and causes a consistency error. This results in withdraw-from-withdraw, which becomes a deadlock. This patch changes the test in gfs2_meta_read() to allow all metadata reads for the journal. Instead of checking the journal block, it now checks for the journal inode glock which is the same for all blocks in the journal. This allows check_journal_clean() to properly check the journal without trying to withdraw recursively. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/meta_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 4b72abcf83b2..9856cc2e0795 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -252,7 +252,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, int num = 0; if (unlikely(gfs2_withdrawn(sdp)) && - (!sdp->sd_jdesc || (blkno != sdp->sd_jdesc->jd_no_addr))) { + (!sdp->sd_jdesc || gl != sdp->sd_jinode_gl)) { *bhp = NULL; return -EIO; } -- cgit v1.2.3-59-g8ed1b From af23facc38c26ac188b6adac2cae2dafaca0c82d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Apr 2020 13:01:50 +0100 Subject: drm/i915: Check current i915_vma.pin_count status first on unbind Do an early rejection of a i915_vma_unbind() attempt if the i915_vma is currently pinned, without waiting to see if the inflight operations may unpin it. We see this problem with the shrinker trying to unbind the active vma from inside its bind worker: <6> [472.618968] Workqueue: events_unbound fence_work [i915] <4> [472.618970] Call Trace: <4> [472.618974] ? __schedule+0x2e5/0x810 <4> [472.618978] schedule+0x37/0xe0 <4> [472.618982] schedule_preempt_disabled+0xf/0x20 <4> [472.618984] __mutex_lock+0x281/0x9c0 <4> [472.618987] ? mark_held_locks+0x49/0x70 <4> [472.618989] ? _raw_spin_unlock_irqrestore+0x47/0x60 <4> [472.619038] ? i915_vma_unbind+0xae/0x110 [i915] <4> [472.619084] ? i915_vma_unbind+0xae/0x110 [i915] <4> [472.619122] i915_vma_unbind+0xae/0x110 [i915] <4> [472.619165] i915_gem_object_unbind+0x1dc/0x400 [i915] <4> [472.619208] i915_gem_shrink+0x328/0x660 [i915] <4> [472.619250] ? i915_gem_shrink_all+0x38/0x60 [i915] <4> [472.619282] i915_gem_shrink_all+0x38/0x60 [i915] <4> [472.619325] vm_alloc_page.constprop.25+0x1aa/0x240 [i915] <4> [472.619330] ? rcu_read_lock_sched_held+0x4d/0x80 <4> [472.619363] ? __alloc_pd+0xb/0x30 [i915] <4> [472.619366] ? module_assert_mutex_or_preempt+0xf/0x30 <4> [472.619368] ? __module_address+0x23/0xe0 <4> [472.619371] ? is_module_address+0x26/0x40 <4> [472.619374] ? static_obj+0x34/0x50 <4> [472.619376] ? lockdep_init_map+0x4d/0x1e0 <4> [472.619407] setup_page_dma+0xd/0x90 [i915] <4> [472.619437] alloc_pd+0x29/0x50 [i915] <4> [472.619470] __gen8_ppgtt_alloc+0x443/0x6b0 [i915] <4> [472.619503] gen8_ppgtt_alloc+0xd7/0x300 [i915] <4> [472.619535] ppgtt_bind_vma+0x2a/0xe0 [i915] <4> [472.619577] __vma_bind+0x26/0x40 [i915] <4> [472.619611] fence_work+0x1c/0x90 [i915] <4> [472.619617] process_one_work+0x26a/0x620 Fixes: 2850748ef876 ("drm/i915: Pull i915_vma_pin under the vm->mutex") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200403120150.17091-1-chris@chris-wilson.co.uk (cherry picked from commit 614654abe847a42fc75d7eb5096e46f796a438b6) (cherry picked from commit dd086cf516d9bea3878abb267f62ccc53acd764b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_vma.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 82e3bc280622..2cd7a7e87c0a 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1228,18 +1228,6 @@ int __i915_vma_unbind(struct i915_vma *vma) lockdep_assert_held(&vma->vm->mutex); - /* - * First wait upon any activity as retiring the request may - * have side-effects such as unpinning or even unbinding this vma. - * - * XXX Actually waiting under the vm->mutex is a hinderance and - * should be pipelined wherever possible. In cases where that is - * unavoidable, we should lift the wait to before the mutex. - */ - ret = i915_vma_sync(vma); - if (ret) - return ret; - if (i915_vma_is_pinned(vma)) { vma_print_allocator(vma, "is pinned"); return -EAGAIN; @@ -1313,15 +1301,20 @@ int i915_vma_unbind(struct i915_vma *vma) if (!drm_mm_node_allocated(&vma->node)) return 0; - if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) - /* XXX not always required: nop_clear_range */ - wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm); - /* Optimistic wait before taking the mutex */ err = i915_vma_sync(vma); if (err) goto out_rpm; + if (i915_vma_is_pinned(vma)) { + vma_print_allocator(vma, "is pinned"); + return -EAGAIN; + } + + if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) + /* XXX not always required: nop_clear_range */ + wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm); + err = mutex_lock_interruptible(&vm->mutex); if (err) goto out_rpm; -- cgit v1.2.3-59-g8ed1b From 220dcfc1485bb79e300f08a28b475869feccbb53 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Apr 2020 14:08:11 +0100 Subject: drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore If we find ourselves waiting on a MI_SEMAPHORE_WAIT, either within the user batch or in our own preamble, the engine raises a GT_WAIT_ON_SEMAPHORE interrupt. We can unmask that interrupt and so respond to a semaphore wait by yielding the timeslice, if we have another context to yield to! The only real complication is that the interrupt is only generated for the start of the semaphore wait, and is asynchronous to our process_csb() -- that is, we may not have registered the timeslice before we see the interrupt. To ensure we don't miss a potential semaphore blocking forward progress (e.g. selftests/live_timeslice_preempt) we mark the interrupt and apply it to the next timeslice regardless of whether it was active at the time. v2: We use semaphores in preempt-to-busy, within the timeslicing implementation itself! Ergo, when we do insert a preemption due to an expired timeslice, the new context may start with the missed semaphore flagged by the retired context and be yielded, ad infinitum. To avoid this, read the context id at the time of the semaphore interrupt and only yield if that context is still active. Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Kenneth Graunke Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200407130811.17321-1-chris@chris-wilson.co.uk (cherry picked from commit c4e8ba7390346a77ffe33ec3f210bc62e0b6c8c6) (cherry picked from commit cd60e4ac4738a6921592c4f7baf87f9a3499f0e2) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 6 ++++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 9 ++++++ drivers/gpu/drm/i915/gt/intel_gt_irq.c | 15 ++++++++-- drivers/gpu/drm/i915/gt/intel_lrc.c | 41 ++++++++++++++++++++++++---- drivers/gpu/drm/i915/gt/selftest_lrc.c | 34 ++++++++++++----------- drivers/gpu/drm/i915/i915_reg.h | 1 + 6 files changed, 82 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 3aa8a652c16d..883a9b7fe88d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1295,6 +1295,12 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7)) drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID)); + if (HAS_EXECLISTS(dev_priv)) { + drm_printf(m, "\tEL_STAT_HI: 0x%08x\n", + ENGINE_READ(engine, RING_EXECLIST_STATUS_HI)); + drm_printf(m, "\tEL_STAT_LO: 0x%08x\n", + ENGINE_READ(engine, RING_EXECLIST_STATUS_LO)); + } drm_printf(m, "\tRING_START: 0x%08x\n", ENGINE_READ(engine, RING_START)); drm_printf(m, "\tRING_HEAD: 0x%08x\n", diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index bf4d13e08887..763f8f530ded 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -156,6 +156,15 @@ struct intel_engine_execlists { */ struct i915_priolist default_priolist; + /** + * @yield: CCID at the time of the last semaphore-wait interrupt. + * + * Instead of leaving a semaphore busy-spinning on an engine, we would + * like to switch to another ready context, i.e. yielding the semaphore + * timeslice. + */ + u32 yield; + /** * @error_interrupt: CS Master EIR * diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index f0e7fd95165a..0cc7dd54f4f9 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -39,6 +39,15 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir) } } + if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) { + WRITE_ONCE(engine->execlists.yield, + ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI)); + ENGINE_TRACE(engine, "semaphore yield: %08x\n", + engine->execlists.yield); + if (del_timer(&engine->execlists.timer)) + tasklet = true; + } + if (iir & GT_CONTEXT_SWITCH_INTERRUPT) tasklet = true; @@ -228,7 +237,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) const u32 irqs = GT_CS_MASTER_ERROR_INTERRUPT | GT_RENDER_USER_INTERRUPT | - GT_CONTEXT_SWITCH_INTERRUPT; + GT_CONTEXT_SWITCH_INTERRUPT | + GT_WAIT_SEMAPHORE_INTERRUPT; struct intel_uncore *uncore = gt->uncore; const u32 dmask = irqs << 16 | irqs; const u32 smask = irqs << 16; @@ -366,7 +376,8 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt) const u32 irqs = GT_CS_MASTER_ERROR_INTERRUPT | GT_RENDER_USER_INTERRUPT | - GT_CONTEXT_SWITCH_INTERRUPT; + GT_CONTEXT_SWITCH_INTERRUPT | + GT_WAIT_SEMAPHORE_INTERRUPT; const u32 gt_interrupts[] = { irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT, irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT, diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 017fd5bf9c69..a21b962c736f 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1754,7 +1754,8 @@ static void defer_active(struct intel_engine_cs *engine) } static bool -need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq) +need_timeslice(const struct intel_engine_cs *engine, + const struct i915_request *rq) { int hint; @@ -1768,6 +1769,32 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq) return hint >= effective_prio(rq); } +static bool +timeslice_yield(const struct intel_engine_execlists *el, + const struct i915_request *rq) +{ + /* + * Once bitten, forever smitten! + * + * If the active context ever busy-waited on a semaphore, + * it will be treated as a hog until the end of its timeslice (i.e. + * until it is scheduled out and replaced by a new submission, + * possibly even its own lite-restore). The HW only sends an interrupt + * on the first miss, and we do know if that semaphore has been + * signaled, or even if it is now stuck on another semaphore. Play + * safe, yield if it might be stuck -- it will be given a fresh + * timeslice in the near future. + */ + return upper_32_bits(rq->context->lrc_desc) == READ_ONCE(el->yield); +} + +static bool +timeslice_expired(const struct intel_engine_execlists *el, + const struct i915_request *rq) +{ + return timer_expired(&el->timer) || timeslice_yield(el, rq); +} + static int switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq) { @@ -1783,8 +1810,7 @@ timeslice(const struct intel_engine_cs *engine) return READ_ONCE(engine->props.timeslice_duration_ms); } -static unsigned long -active_timeslice(const struct intel_engine_cs *engine) +static unsigned long active_timeslice(const struct intel_engine_cs *engine) { const struct intel_engine_execlists *execlists = &engine->execlists; const struct i915_request *rq = *execlists->active; @@ -1946,13 +1972,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last = NULL; } else if (need_timeslice(engine, last) && - timer_expired(&engine->execlists.timer)) { + timeslice_expired(execlists, last)) { ENGINE_TRACE(engine, - "expired last=%llx:%lld, prio=%d, hint=%d\n", + "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n", last->fence.context, last->fence.seqno, last->sched.attr.priority, - execlists->queue_priority_hint); + execlists->queue_priority_hint, + yesno(timeslice_yield(execlists, last))); ring_set_paused(engine, 1); defer_active(engine); @@ -2213,6 +2240,7 @@ done: } clear_ports(port + 1, last_port - port); + WRITE_ONCE(execlists->yield, -1); execlists_submit_ports(engine); set_preempt_timeout(engine, *active); } else { @@ -4452,6 +4480,7 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift; + engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift; } static void rcs_submission_override(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 6f06ba750a0a..f95ae15ce865 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -929,7 +929,7 @@ create_rewinder(struct intel_context *ce, goto err; } - cs = intel_ring_begin(rq, 10); + cs = intel_ring_begin(rq, 14); if (IS_ERR(cs)) { err = PTR_ERR(cs); goto err; @@ -941,8 +941,8 @@ create_rewinder(struct intel_context *ce, *cs++ = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_GLOBAL_GTT | MI_SEMAPHORE_POLL | - MI_SEMAPHORE_SAD_NEQ_SDD; - *cs++ = 0; + MI_SEMAPHORE_SAD_GTE_SDD; + *cs++ = idx; *cs++ = offset; *cs++ = 0; @@ -951,6 +951,11 @@ create_rewinder(struct intel_context *ce, *cs++ = offset + idx * sizeof(u32); *cs++ = 0; + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = offset; + *cs++ = 0; + *cs++ = idx + 1; + intel_ring_advance(rq, cs); rq->sched.attr.priority = I915_PRIORITY_MASK; @@ -984,7 +989,7 @@ static int live_timeslice_rewind(void *arg) for_each_engine(engine, gt, id) { enum { A1, A2, B1 }; - enum { X = 1, Y, Z }; + enum { X = 1, Z, Y }; struct i915_request *rq[3] = {}; struct intel_context *ce; unsigned long heartbeat; @@ -1017,13 +1022,13 @@ static int live_timeslice_rewind(void *arg) goto err; } - rq[0] = create_rewinder(ce, NULL, slot, 1); + rq[0] = create_rewinder(ce, NULL, slot, X); if (IS_ERR(rq[0])) { intel_context_put(ce); goto err; } - rq[1] = create_rewinder(ce, NULL, slot, 2); + rq[1] = create_rewinder(ce, NULL, slot, Y); intel_context_put(ce); if (IS_ERR(rq[1])) goto err; @@ -1041,7 +1046,7 @@ static int live_timeslice_rewind(void *arg) goto err; } - rq[2] = create_rewinder(ce, rq[0], slot, 3); + rq[2] = create_rewinder(ce, rq[0], slot, Z); intel_context_put(ce); if (IS_ERR(rq[2])) goto err; @@ -1055,15 +1060,12 @@ static int live_timeslice_rewind(void *arg) GEM_BUG_ON(!timer_pending(&engine->execlists.timer)); /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */ - GEM_BUG_ON(!i915_request_is_active(rq[A1])); - GEM_BUG_ON(!i915_request_is_active(rq[A2])); - GEM_BUG_ON(!i915_request_is_active(rq[B1])); - - /* Wait for the timeslice to kick in */ - del_timer(&engine->execlists.timer); - tasklet_hi_schedule(&engine->execlists.tasklet); - intel_engine_flush_submission(engine); - + if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */ + /* Wait for the timeslice to kick in */ + del_timer(&engine->execlists.timer); + tasklet_hi_schedule(&engine->execlists.tasklet); + intel_engine_flush_submission(engine); + } /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */ GEM_BUG_ON(!i915_request_is_active(rq[A1])); GEM_BUG_ON(!i915_request_is_active(rq[B1])); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e0c6021fdaf9..6e12000c4b6b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3094,6 +3094,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GT_BSD_CS_ERROR_INTERRUPT (1 << 15) #define GT_BSD_USER_INTERRUPT (1 << 12) #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 (1 << 11) /* hsw+; rsvd on snb, ivb, vlv */ +#define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11) /* bdw+ */ #define GT_CONTEXT_SWITCH_INTERRUPT (1 << 8) #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT (1 << 5) /* !snb */ #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT (1 << 4) -- cgit v1.2.3-59-g8ed1b From 47bf7b7a7151bad568b9523d14477a353a450066 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 20 Apr 2020 13:53:55 +0100 Subject: drm/i915/gem: Remove object_is_locked assertion from unpin_from_display_plane Since moving the obj->vma.list to a spin_lock, and the vm->bound_list to its vm->mutex, along with tracking shrinkable status under its own spinlock, we no long require the object to be locked by the caller. This is fortunate as it appears we can be called with the lock along an error path in flipping: <4> [139.942851] WARN_ON(debug_locks && !lock_is_held(&(&((obj)->base.resv)->lock.base)->dep_map)) <4> [139.943242] WARNING: CPU: 0 PID: 1203 at drivers/gpu/drm/i915/gem/i915_gem_domain.c:405 i915_gem_object_unpin_from_display_plane+0x70/0x130 [i915] <4> [139.943263] Modules linked in: snd_hda_intel i915 vgem snd_hda_codec_realtek snd_hda_codec_generic coretemp snd_intel_dspcfg snd_hda_codec snd_hwdep snd_hda_core r8169 lpc_ich snd_pcm realtek prime_numbers [last unloaded: i915] <4> [139.943347] CPU: 0 PID: 1203 Comm: kms_flip Tainted: G U 5.6.0-gd0fda5c2cf3f1-drmtip_474+ #1 <4> [139.943363] Hardware name: /D510MO, BIOS MOPNV10J.86A.0311.2010.0802.2346 08/02/2010 <4> [139.943589] RIP: 0010:i915_gem_object_unpin_from_display_plane+0x70/0x130 [i915] <4> [139.943589] Code: 85 28 01 00 00 be ff ff ff ff 48 8d 78 60 e8 d7 9b f0 e2 85 c0 75 b9 48 c7 c6 50 b9 38 c0 48 c7 c7 e9 48 3c c0 e8 20 d4 e9 e2 <0f> 0b eb a2 48 c7 c1 08 bb 38 c0 ba 0a 01 00 00 48 c7 c6 88 a3 35 <4> [139.943589] RSP: 0018:ffffb774c0603b48 EFLAGS: 00010282 <4> [139.943589] RAX: 0000000000000000 RBX: ffff9a142fa36e80 RCX: 0000000000000006 <4> [139.943589] RDX: 000000000000160d RSI: ffff9a142c1a88f8 RDI: ffffffffa434a64d <4> [139.943589] RBP: ffff9a1410a513c0 R08: ffff9a142c1a88f8 R09: 0000000000000000 <4> [139.943589] R10: 0000000000000000 R11: 0000000000000000 R12: ffff9a1436ee94b8 <4> [139.943589] R13: 0000000000000001 R14: 00000000ffffffff R15: ffff9a1410960000 <4> [139.943589] FS: 00007fc73a744e40(0000) GS:ffff9a143da00000(0000) knlGS:0000000000000000 <4> [139.943589] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4> [139.943589] CR2: 00007fc73997e098 CR3: 000000002f5fe000 CR4: 00000000000006f0 <4> [139.943589] Call Trace: <4> [139.943589] intel_pin_and_fence_fb_obj+0x1c9/0x1f0 [i915] <4> [139.943589] intel_plane_pin_fb+0x3f/0xd0 [i915] <4> [139.943589] intel_prepare_plane_fb+0x13b/0x5c0 [i915] <4> [139.943589] drm_atomic_helper_prepare_planes+0x85/0x110 <4> [139.943589] intel_atomic_commit+0xda/0x390 [i915] <4> [139.943589] drm_atomic_helper_page_flip+0x9c/0xd0 <4> [139.943589] ? drm_event_reserve_init+0x46/0x60 <4> [139.943589] drm_mode_page_flip_ioctl+0x587/0x5d0 This completes the symmetry lost in commit 8b1c78e06e61 ("drm/i915: Avoid calling i915_gem_object_unbind holding object lock"). Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1743 Fixes: 8b1c78e06e61 ("drm/i915: Avoid calling i915_gem_object_unbind holding object lock") Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Andi Shyti Cc: # v5.6+ Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200420125356.26614-1-chris@chris-wilson.co.uk (cherry picked from commit a95f3ac21d64d62c746f836598d1467d5837fa28) (cherry picked from commit 2208b85fa1766ee4821a9435d548578b67090531) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 0cc40e77bbd2..4f96c8788a2e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -368,7 +368,6 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_vma *vma; - GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); if (!atomic_read(&obj->bind_count)) return; @@ -400,12 +399,8 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) { - struct drm_i915_gem_object *obj = vma->obj; - - assert_object_held(obj); - /* Bump the LRU to try and avoid premature eviction whilst flipping */ - i915_gem_object_bump_inactive_ggtt(obj); + i915_gem_object_bump_inactive_ggtt(vma->obj); i915_vma_unpin(vma); } -- cgit v1.2.3-59-g8ed1b From 53b2622e774681943230fb9f31096512fff99fe7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 28 Apr 2020 19:47:49 +0100 Subject: drm/i915/execlists: Avoid reusing the same logical CCID The bspec is confusing on the nature of the upper 32bits of the LRC descriptor. Once upon a time, it said that it uses the upper 32b to decide if it should perform a lite-restore, and so we must ensure that each unique context submitted to HW is given a unique CCID [for the duration of it being on the HW]. Currently, this is achieved by using a small circular tag, and assigning every context submitted to HW a new id. However, this tag is being cleared on repinning an inflight context such that we end up re-using the 0 tag for multiple contexts. To avoid accidentally clearing the CCID in the upper 32bits of the LRC descriptor, split the descriptor into two dwords so we can update the GGTT address separately from the CCID. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1796 Fixes: 2935ed5339c4 ("drm/i915: Remove logical HW ID") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: # v5.5+ Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200428184751.11257-1-chris@chris-wilson.co.uk (cherry picked from commit 2632f174a2e1a5fd40a70404fa8ccfd0b1f79ebd) (cherry picked from commit a4b70fcc587860f4b972f68217d8ebebe295ec15) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_context_types.h | 8 +++- drivers/gpu/drm/i915/gt/intel_engine_types.h | 5 +++ drivers/gpu/drm/i915/gt/intel_lrc.c | 50 ++++++++++------------- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +- drivers/gpu/drm/i915/gvt/scheduler.c | 4 +- drivers/gpu/drm/i915/i915_perf.c | 3 +- 6 files changed, 37 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 07cb83a0d017..ca0d4f4f3615 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -69,7 +69,13 @@ struct intel_context { #define CONTEXT_NOPREEMPT 7 u32 *lrc_reg_state; - u64 lrc_desc; + union { + struct { + u32 lrca; + u32 ccid; + }; + u64 desc; + } lrc; u32 tag; /* cookie passed to HW to track this context on submission */ /* Time on GPU as tracked by the hw. */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 763f8f530ded..8dd210a2c340 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -156,6 +156,11 @@ struct intel_engine_execlists { */ struct i915_priolist default_priolist; + /** + * @ccid: identifier for contexts submitted to this engine + */ + u32 ccid; + /** * @yield: CCID at the time of the last semaphore-wait interrupt. * diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index a21b962c736f..e8b02f84aa3d 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -456,10 +456,10 @@ assert_priority_queue(const struct i915_request *prev, * engine info, SW context ID and SW counter need to form a unique number * (Context ID) per lrc. */ -static u64 +static u32 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) { - u64 desc; + u32 desc; desc = INTEL_LEGACY_32B_CONTEXT; if (i915_vm_is_4lvl(ce->vm)) @@ -470,21 +470,7 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) if (IS_GEN(engine->i915, 8)) desc |= GEN8_CTX_L3LLC_COHERENT; - desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */ - /* - * The following 32bits are copied into the OA reports (dword 2). - * Consider updating oa_get_render_ctx_id in i915_perf.c when changing - * anything below. - */ - if (INTEL_GEN(engine->i915) >= 11) { - desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT; - /* bits 48-53 */ - - desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT; - /* bits 61-63 */ - } - - return desc; + return i915_ggtt_offset(ce->state) | desc; } static inline unsigned int dword_in_page(void *addr) @@ -1192,7 +1178,7 @@ static void reset_active(struct i915_request *rq, __execlists_update_reg_state(ce, engine, head); /* We've switched away, so this should be a no-op, but intent matters */ - ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; + ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; } static u32 intel_context_get_runtime(const struct intel_context *ce) @@ -1251,18 +1237,19 @@ __execlists_schedule_in(struct i915_request *rq) if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) execlists_check_context(ce, engine); - ce->lrc_desc &= ~GENMASK_ULL(47, 37); if (ce->tag) { /* Use a fixed tag for OA and friends */ - ce->lrc_desc |= (u64)ce->tag << 32; + ce->lrc.ccid = ce->tag; } else { /* We don't need a strict matching tag, just different values */ - ce->lrc_desc |= - (u64)(++engine->context_tag % NUM_CONTEXT_TAG) << - GEN11_SW_CTX_ID_SHIFT; + ce->lrc.ccid = + (++engine->context_tag % NUM_CONTEXT_TAG) << + (GEN11_SW_CTX_ID_SHIFT - 32); BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID); } + ce->lrc.ccid |= engine->execlists.ccid; + __intel_gt_pm_get(engine->gt); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); intel_engine_context_in(engine); @@ -1361,7 +1348,7 @@ execlists_schedule_out(struct i915_request *rq) static u64 execlists_update_context(struct i915_request *rq) { struct intel_context *ce = rq->context; - u64 desc = ce->lrc_desc; + u64 desc = ce->lrc.desc; u32 tail, prev; /* @@ -1400,7 +1387,7 @@ static u64 execlists_update_context(struct i915_request *rq) */ wmb(); - ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE; + ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE; return desc; } @@ -1785,7 +1772,7 @@ timeslice_yield(const struct intel_engine_execlists *el, * safe, yield if it might be stuck -- it will be given a fresh * timeslice in the near future. */ - return upper_32_bits(rq->context->lrc_desc) == READ_ONCE(el->yield); + return rq->context->lrc.ccid == READ_ONCE(el->yield); } static bool @@ -3071,7 +3058,7 @@ __execlists_context_pin(struct intel_context *ce, if (IS_ERR(vaddr)) return PTR_ERR(vaddr); - ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; + ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; __execlists_update_reg_state(ce, engine, ce->ring->tail); @@ -3100,7 +3087,7 @@ static void execlists_context_reset(struct intel_context *ce) ce, ce->engine, ce->ring, true); __execlists_update_reg_state(ce, ce->engine, ce->ring->tail); - ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; + ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; } static const struct intel_context_ops execlists_context_ops = { @@ -3781,7 +3768,7 @@ out_replay: head, ce->ring->tail); __execlists_reset_reg_state(ce, engine); __execlists_update_reg_state(ce, engine, head); - ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ + ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ unwind: /* Push back any incomplete requests for replay after the reset. */ @@ -4548,6 +4535,11 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) else execlists->csb_size = GEN11_CSB_ENTRIES; + if (INTEL_GEN(engine->i915) >= 11) { + execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32); + execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32); + } + reset_csb_pointers(engine); /* Finally, take ownership and responsibility for cleanup! */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index fe7778c28d2d..aa6d56e25a10 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -217,7 +217,7 @@ static void guc_wq_item_append(struct intel_guc *guc, static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; - u32 ctx_desc = lower_32_bits(rq->context->lrc_desc); + u32 ctx_desc = rq->context->lrc.ccid; u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); guc_wq_item_append(guc, engine->guc_id, ctx_desc, diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index cb11c3184085..6eb6710b35e9 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -290,7 +290,7 @@ static void shadow_context_descriptor_update(struct intel_context *ce, struct intel_vgpu_workload *workload) { - u64 desc = ce->lrc_desc; + u64 desc = ce->lrc.desc; /* * Update bits 0-11 of the context descriptor which includes flags @@ -300,7 +300,7 @@ shadow_context_descriptor_update(struct intel_context *ce, desc |= (u64)workload->ctx_desc.addressing_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; - ce->lrc_desc = desc; + ce->lrc.desc = desc; } static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 66a46e41d5ef..b5030192be3e 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1310,8 +1310,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) * dropped by GuC. They won't be part of the context * ID in the OA reports, so squash those lower bits. */ - stream->specific_ctx_id = - lower_32_bits(ce->lrc_desc) >> 12; + stream->specific_ctx_id = ce->lrc.lrca >> 12; /* * GuC uses the top bit to signal proxy submission, so -- cgit v1.2.3-59-g8ed1b From 1bc6a60143a4f9264cc6e09ceb9919f4e813a872 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 28 Apr 2020 19:47:50 +0100 Subject: drm/i915/execlists: Track inflight CCID The presumption is that by using a circular counter that is twice as large as the maximum ELSP submission, we would never reuse the same CCID for two inflight contexts. However, if we continually preempt an active context such that it always remains inflight, it can be resubmitted with an arbitrary number of paired contexts. As each of its paired contexts will use a new CCID, eventually it will wrap and submit two ELSP with the same CCID. Rather than use a simple circular counter, switch over to a small bitmap of inflight ids so we can avoid reusing one that is still potentially active. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1796 Fixes: 2935ed5339c4 ("drm/i915: Remove logical HW ID") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: # v5.5+ Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200428184751.11257-2-chris@chris-wilson.co.uk (cherry picked from commit 5c4a53e3b1cbc38d0906e382f1037290658759bb) (cherry picked from commit 134711240307d5586ae8e828d2699db70a8b74f2) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_engine_types.h | 3 +-- drivers/gpu/drm/i915/gt/intel_lrc.c | 29 +++++++++++++++++++++------- drivers/gpu/drm/i915/i915_perf.c | 3 +-- drivers/gpu/drm/i915/selftests/i915_vma.c | 2 +- 4 files changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 8dd210a2c340..0be674ae1cf6 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -309,8 +309,7 @@ struct intel_engine_cs { u32 context_size; u32 mmio_base; - unsigned int context_tag; -#define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS) + unsigned long context_tag; struct rb_node uabi_node; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index e8b02f84aa3d..77420372d813 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1239,13 +1239,17 @@ __execlists_schedule_in(struct i915_request *rq) if (ce->tag) { /* Use a fixed tag for OA and friends */ + GEM_BUG_ON(ce->tag <= BITS_PER_LONG); ce->lrc.ccid = ce->tag; } else { /* We don't need a strict matching tag, just different values */ - ce->lrc.ccid = - (++engine->context_tag % NUM_CONTEXT_TAG) << - (GEN11_SW_CTX_ID_SHIFT - 32); - BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID); + unsigned int tag = ffs(engine->context_tag); + + GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG); + clear_bit(tag - 1, &engine->context_tag); + ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32); + + BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID); } ce->lrc.ccid |= engine->execlists.ccid; @@ -1289,7 +1293,8 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) static inline void __execlists_schedule_out(struct i915_request *rq, - struct intel_engine_cs * const engine) + struct intel_engine_cs * const engine, + unsigned int ccid) { struct intel_context * const ce = rq->context; @@ -1307,6 +1312,14 @@ __execlists_schedule_out(struct i915_request *rq, i915_request_completed(rq)) intel_engine_add_retire(engine, ce->timeline); + ccid >>= GEN11_SW_CTX_ID_SHIFT - 32; + ccid &= GEN12_MAX_CONTEXT_HW_ID; + if (ccid < BITS_PER_LONG) { + GEM_BUG_ON(ccid == 0); + GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag)); + set_bit(ccid - 1, &engine->context_tag); + } + intel_context_update_runtime(ce); intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); @@ -1332,15 +1345,17 @@ execlists_schedule_out(struct i915_request *rq) { struct intel_context * const ce = rq->context; struct intel_engine_cs *cur, *old; + u32 ccid; trace_i915_request_out(rq); + ccid = rq->context->lrc.ccid; old = READ_ONCE(ce->inflight); do cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL; while (!try_cmpxchg(&ce->inflight, &old, cur)); if (!cur) - __execlists_schedule_out(rq, old); + __execlists_schedule_out(rq, old, ccid); i915_request_put(rq); } @@ -3556,7 +3571,7 @@ static void enable_execlists(struct intel_engine_cs *engine) enable_error_interrupt(engine); - engine->context_tag = 0; + engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0); } static bool unexpected_starting_state(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index b5030192be3e..cf2c01f17da8 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1327,11 +1327,10 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); /* * Pick an unused context id - * 0 - (NUM_CONTEXT_TAG - 1) are used by other contexts + * 0 - BITS_PER_LONG are used by other contexts * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context */ stream->specific_ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); - BUILD_BUG_ON((GEN12_MAX_CONTEXT_HW_ID - 1) < NUM_CONTEXT_TAG); break; } diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 58b5f40a07dd..af89c7fc8f59 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -173,7 +173,7 @@ static int igt_vma_create(void *arg) } nc = 0; - for_each_prime_number(num_ctx, 2 * NUM_CONTEXT_TAG) { + for_each_prime_number(num_ctx, 2 * BITS_PER_LONG) { for (; nc < num_ctx; nc++) { ctx = mock_context(i915, "mock"); if (!ctx) -- cgit v1.2.3-59-g8ed1b From 8101b5a1531f3390b3a69fa7934c70a8fd6566ad Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Apr 2020 11:07:22 +0200 Subject: ARM: futex: Address build warning Stephen reported the following build warning on a ARM multi_v7_defconfig build with GCC 9.2.1: kernel/futex.c: In function 'do_futex': kernel/futex.c:1676:17: warning: 'oldval' may be used uninitialized in this function [-Wmaybe-uninitialized] 1676 | return oldval == cmparg; | ~~~~~~~^~~~~~~~~ kernel/futex.c:1652:6: note: 'oldval' was declared here 1652 | int oldval, ret; | ^~~~~~ introduced by commit a08971e9488d ("futex: arch_futex_atomic_op_inuser() calling conventions change"). While that change should not make any difference it confuses GCC which fails to work out that oldval is not referenced when the return value is not zero. GCC fails to properly analyze arch_futex_atomic_op_inuser(). It's not the early return, the issue is with the assembly macros. GCC fails to detect that those either set 'ret' to 0 and set oldval or set 'ret' to -EFAULT which makes oldval uninteresting. The store to the callsite supplied oldval pointer is conditional on ret == 0. The straight forward way to solve this is to make the store unconditional. Aside of addressing the build warning this makes sense anyway because it removes the conditional from the fastpath. In the error case the stored value is uninteresting and the extra store does not matter at all. Reported-by: Stephen Rothwell Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/87pncao2ph.fsf@nanos.tec.linutronix.de --- arch/arm/include/asm/futex.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index e133da303a98..a9151884bc85 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -165,8 +165,13 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) preempt_enable(); #endif - if (!ret) - *oval = oldval; + /* + * Store unconditionally. If ret != 0 the extra store is the least + * of the worries but GCC cannot figure out that __futex_atomic_op() + * is either setting ret to -EFAULT or storing the old value in + * oldval which results in a uninitialized warning at the call site. + */ + *oval = oldval; return ret; } -- cgit v1.2.3-59-g8ed1b From 91edf63d5022bd0464788ffb4acc3d5febbaf81d Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Thu, 7 May 2020 08:49:18 +0800 Subject: usb: chipidea: msm: Ensure proper controller reset using role switch API Currently we check to make sure there is no error state on the extcon handle for VBUS when writing to the HS_PHY_GENCONFIG_2 register. When using the USB role-switch API we still need to write to this register absent an extcon handle. This patch makes the appropriate update to ensure the write happens if role-switching is true. Fixes: 05559f10ed79 ("usb: chipidea: add role switch class support") Cc: stable Cc: Greg Kroah-Hartman Cc: Philipp Zabel Cc: linux-usb@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Stephen Boyd Signed-off-by: Bryan O'Donoghue Signed-off-by: Peter Chen Link: https://lore.kernel.org/r/20200507004918.25975-2-peter.chen@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/ci_hdrc_msm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/chipidea/ci_hdrc_msm.c b/drivers/usb/chipidea/ci_hdrc_msm.c index af648ba6544d..46105457e1ca 100644 --- a/drivers/usb/chipidea/ci_hdrc_msm.c +++ b/drivers/usb/chipidea/ci_hdrc_msm.c @@ -114,7 +114,7 @@ static int ci_hdrc_msm_notify_event(struct ci_hdrc *ci, unsigned event) hw_write_id_reg(ci, HS_PHY_GENCONFIG_2, HS_PHY_ULPI_TX_PKT_EN_CLR_FIX, 0); - if (!IS_ERR(ci->platdata->vbus_extcon.edev)) { + if (!IS_ERR(ci->platdata->vbus_extcon.edev) || ci->role_switch) { hw_write_id_reg(ci, HS_PHY_GENCONFIG_2, HS_PHY_SESS_VLD_CTRL_EN, HS_PHY_SESS_VLD_CTRL_EN); -- cgit v1.2.3-59-g8ed1b From 027d0c7101f50cf03aeea9eebf484afd4920c8d3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 5 May 2020 13:59:30 +0100 Subject: arm64: hugetlb: avoid potential NULL dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The static analyzer in GCC 10 spotted that in huge_pte_alloc() we may pass a NULL pmdp into pte_alloc_map() when pmd_alloc() returns NULL: | CC arch/arm64/mm/pageattr.o | CC arch/arm64/mm/hugetlbpage.o | from arch/arm64/mm/hugetlbpage.c:10: | arch/arm64/mm/hugetlbpage.c: In function ‘huge_pte_alloc’: | ./arch/arm64/include/asm/pgtable-types.h:28:24: warning: dereference of NULL ‘pmdp’ [CWE-690] [-Wanalyzer-null-dereference] | ./arch/arm64/include/asm/pgtable.h:436:26: note: in expansion of macro ‘pmd_val’ | arch/arm64/mm/hugetlbpage.c:242:10: note: in expansion of macro ‘pte_alloc_map’ | |arch/arm64/mm/hugetlbpage.c:232:10: | |./arch/arm64/include/asm/pgtable-types.h:28:24: | ./arch/arm64/include/asm/pgtable.h:436:26: note: in expansion of macro ‘pmd_val’ | arch/arm64/mm/hugetlbpage.c:242:10: note: in expansion of macro ‘pte_alloc_map’ This can only occur when the kernel cannot allocate a page, and so is unlikely to happen in practice before other systems start failing. We can avoid this by bailing out if pmd_alloc() fails, as we do earlier in the function if pud_alloc() fails. Fixes: 66b3923a1a0f ("arm64: hugetlb: add support for PTE contiguous bit") Signed-off-by: Mark Rutland Reported-by: Kyrill Tkachov Cc: # 4.5.x- Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/hugetlbpage.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index bbeb6a5a6ba6..0be3355e3499 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -230,6 +230,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, ptep = (pte_t *)pudp; } else if (sz == (CONT_PTE_SIZE)) { pmdp = pmd_alloc(mm, pudp, addr); + if (!pmdp) + return NULL; WARN_ON(addr & (sz - 1)); /* -- cgit v1.2.3-59-g8ed1b From 282ede76e47048eebc8ce5324b412890f0ec0a69 Mon Sep 17 00:00:00 2001 From: Ben Chuang Date: Mon, 27 Apr 2020 18:30:48 +0800 Subject: mmc: sdhci-pci-gli: Fix no irq handler from suspend The kernel prints a message similar to "[ 28.881959] do_IRQ: 5.36 No irq handler for vector" when GL975x resumes from suspend. Implement a resume callback to fix this. Fixes: 31e43f31890c ("mmc: sdhci-pci-gli: Enable MSI interrupt for GL975x") Co-developed-by: Renius Chen Signed-off-by: Renius Chen Tested-by: Dave Flogeras Signed-off-by: Ben Chuang Tested-by: Vineeth Pillai Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20200427103048.20785-1-benchuanggli@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Samuel Zou [Samuel Zou: Make sdhci_pci_gli_resume() static] Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-gli.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index ce15a05f23d4..ff39d81a5742 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -334,6 +334,18 @@ static u32 sdhci_gl9750_readl(struct sdhci_host *host, int reg) return value; } +#ifdef CONFIG_PM_SLEEP +static int sdhci_pci_gli_resume(struct sdhci_pci_chip *chip) +{ + struct sdhci_pci_slot *slot = chip->slots[0]; + + pci_free_irq_vectors(slot->chip->pdev); + gli_pcie_enable_msi(slot); + + return sdhci_pci_resume_host(chip); +} +#endif + static const struct sdhci_ops sdhci_gl9755_ops = { .set_clock = sdhci_set_clock, .enable_dma = sdhci_pci_enable_dma, @@ -348,6 +360,9 @@ const struct sdhci_pci_fixes sdhci_gl9755 = { .quirks2 = SDHCI_QUIRK2_BROKEN_DDR50, .probe_slot = gli_probe_slot_gl9755, .ops = &sdhci_gl9755_ops, +#ifdef CONFIG_PM_SLEEP + .resume = sdhci_pci_gli_resume, +#endif }; static const struct sdhci_ops sdhci_gl9750_ops = { @@ -366,4 +381,7 @@ const struct sdhci_pci_fixes sdhci_gl9750 = { .quirks2 = SDHCI_QUIRK2_BROKEN_DDR50, .probe_slot = gli_probe_slot_gl9750, .ops = &sdhci_gl9750_ops, +#ifdef CONFIG_PM_SLEEP + .resume = sdhci_pci_gli_resume, +#endif }; -- cgit v1.2.3-59-g8ed1b From 7c277dd2b0ff6a16f1732a66c2c52a29f067163e Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 26 Apr 2020 22:23:55 +0200 Subject: mmc: alcor: Fix a resource leak in the error path for ->probe() If devm_request_threaded_irq() fails, the allocated struct mmc_host needs to be freed via calling mmc_free_host(), so let's do that. Fixes: c5413ad815a6 ("mmc: add new Alcor Micro Cardreader SD/MMC driver") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20200426202355.43055-1-christophe.jaillet@wanadoo.fr Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/alcor.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/alcor.c b/drivers/mmc/host/alcor.c index 1aee485d56d4..026ca9194ce5 100644 --- a/drivers/mmc/host/alcor.c +++ b/drivers/mmc/host/alcor.c @@ -1104,7 +1104,7 @@ static int alcor_pci_sdmmc_drv_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "Failed to get irq for data line\n"); - return ret; + goto free_host; } mutex_init(&host->cmd_mutex); @@ -1116,6 +1116,10 @@ static int alcor_pci_sdmmc_drv_probe(struct platform_device *pdev) dev_set_drvdata(&pdev->dev, host); mmc_add_host(mmc); return 0; + +free_host: + mmc_free_host(mmc); + return ret; } static int alcor_pci_sdmmc_drv_remove(struct platform_device *pdev) -- cgit v1.2.3-59-g8ed1b From b56ff195c317ad28c05d354aeecbb9995b8e08c1 Mon Sep 17 00:00:00 2001 From: Ben Chuang Date: Mon, 4 May 2020 14:39:57 +0800 Subject: mmc: sdhci-pci-gli: Fix can not access GL9750 after reboot from Windows 10 Need to clear some bits in a vendor-defined register after reboot from Windows 10. Fixes: e51df6ce668a ("mmc: host: sdhci-pci: Add Genesys Logic GL975x support") Reported-by: Grzegorz Kowal Signed-off-by: Ben Chuang Acked-by: Adrian Hunter Tested-by: Grzegorz Kowal Link: https://lore.kernel.org/r/20200504063957.6638-1-benchuanggli@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-gli.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index ff39d81a5742..fd76aa672e02 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -26,6 +26,9 @@ #define SDHCI_GLI_9750_DRIVING_2 GENMASK(27, 26) #define GLI_9750_DRIVING_1_VALUE 0xFFF #define GLI_9750_DRIVING_2_VALUE 0x3 +#define SDHCI_GLI_9750_SEL_1 BIT(29) +#define SDHCI_GLI_9750_SEL_2 BIT(31) +#define SDHCI_GLI_9750_ALL_RST (BIT(24)|BIT(25)|BIT(28)|BIT(30)) #define SDHCI_GLI_9750_PLL 0x864 #define SDHCI_GLI_9750_PLL_TX2_INV BIT(23) @@ -122,6 +125,8 @@ static void gli_set_9750(struct sdhci_host *host) GLI_9750_DRIVING_1_VALUE); driving_value |= FIELD_PREP(SDHCI_GLI_9750_DRIVING_2, GLI_9750_DRIVING_2_VALUE); + driving_value &= ~(SDHCI_GLI_9750_SEL_1|SDHCI_GLI_9750_SEL_2|SDHCI_GLI_9750_ALL_RST); + driving_value |= SDHCI_GLI_9750_SEL_2; sdhci_writel(host, driving_value, SDHCI_GLI_9750_DRIVING); sw_ctrl_value &= ~SDHCI_GLI_9750_SW_CTRL_4; -- cgit v1.2.3-59-g8ed1b From 156c75737255b8db0aa887abcb66b709856cf453 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 May 2020 14:47:53 +0200 Subject: vboxsf: don't use the source name in the bdi name Simplify the bdi name to mirror what we are doing elsewhere, and drop them name in favor of just using a number. This avoids a potentially very long bdi name. Signed-off-by: Christoph Hellwig Reviewed-by: Hans de Goede Signed-off-by: Jens Axboe --- fs/vboxsf/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c index 675e26989376..8fe03b4a0d2b 100644 --- a/fs/vboxsf/super.c +++ b/fs/vboxsf/super.c @@ -164,7 +164,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc) goto fail_free; } - err = super_setup_bdi_name(sb, "vboxsf-%s.%d", fc->source, sbi->bdi_id); + err = super_setup_bdi_name(sb, "vboxsf-%d", sbi->bdi_id); if (err) goto fail_free; -- cgit v1.2.3-59-g8ed1b From eb7ae5e06bb6e6ac6bb86872d27c43ebab92f6b2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 May 2020 14:47:54 +0200 Subject: bdi: move bdi_dev_name out of line bdi_dev_name is not a fast path function, move it out of line. This prepares for using it from modular callers without having to export an implementation detail like bdi_unknown_name. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Greg Kroah-Hartman Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 9 +-------- mm/backing-dev.c | 10 +++++++++- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index f88197c1ffc2..c9ad5c3b7b4b 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -505,13 +505,6 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi) (1 << WB_async_congested)); } -extern const char *bdi_unknown_name; - -static inline const char *bdi_dev_name(struct backing_dev_info *bdi) -{ - if (!bdi || !bdi->dev) - return bdi_unknown_name; - return dev_name(bdi->dev); -} +const char *bdi_dev_name(struct backing_dev_info *bdi); #endif /* _LINUX_BACKING_DEV_H */ diff --git a/mm/backing-dev.c b/mm/backing-dev.c index c81b4f3a7268..c2c44c89ee5d 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -21,7 +21,7 @@ struct backing_dev_info noop_backing_dev_info = { EXPORT_SYMBOL_GPL(noop_backing_dev_info); static struct class *bdi_class; -const char *bdi_unknown_name = "(unknown)"; +static const char *bdi_unknown_name = "(unknown)"; /* * bdi_lock protects bdi_tree and updates to bdi_list. bdi_list has RCU @@ -1043,6 +1043,14 @@ void bdi_put(struct backing_dev_info *bdi) } EXPORT_SYMBOL(bdi_put); +const char *bdi_dev_name(struct backing_dev_info *bdi) +{ + if (!bdi || !bdi->dev) + return bdi_unknown_name; + return dev_name(bdi->dev); +} +EXPORT_SYMBOL_GPL(bdi_dev_name); + static wait_queue_head_t congestion_wqh[2] = { __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) -- cgit v1.2.3-59-g8ed1b From 01c3259818a11f3cc3cd767adbae6b45849c03c1 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 7 May 2020 03:25:56 -0400 Subject: virtio_net: fix lockdep warning on 32 bit When we fill up a receive VQ, try_fill_recv currently tries to count kicks using a 64 bit stats counter. Turns out, on a 32 bit kernel that uses a seqcount. sequence counts are "lock" constructs where you need to make sure that writers are serialized. In turn, this means that we mustn't run two try_fill_recv concurrently. Which of course we don't. We do run try_fill_recv sometimes from a softirq napi context, and sometimes from a fully preemptible context, but the later always runs with napi disabled. However, when it comes to the seqcount, lockdep is trying to enforce the rule that the same lock isn't accessed from preemptible and softirq context - it doesn't know about napi being enabled/disabled. This causes a false-positive warning: WARNING: inconsistent lock state ... inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. As a work around, shut down the warning by switching to u64_stats_update_begin_irqsave - that works by disabling interrupts on 32 bit only, is a NOP on 64 bit. Reported-by: Thomas Gleixner Suggested-by: Eric Dumazet Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 11f722460513..ce07f52d89e7 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1243,9 +1243,11 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, break; } while (rq->vq->num_free); if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) { - u64_stats_update_begin(&rq->stats.syncp); + unsigned long flags; + + flags = u64_stats_update_begin_irqsave(&rq->stats.syncp); rq->stats.kicks++; - u64_stats_update_end(&rq->stats.syncp); + u64_stats_update_end_irqrestore(&rq->stats.syncp, flags); } return !oom; -- cgit v1.2.3-59-g8ed1b From 64082b67ba724c5a5acfff38d352068c4992d089 Mon Sep 17 00:00:00 2001 From: Maciej Żenczykowski Date: Thu, 7 May 2020 00:58:05 -0700 Subject: net: remove spurious declaration of tcp_default_init_rwnd() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit it doesn't actually exist... Test: builds and 'git grep tcp_default_init_rwnd' comes up empty Signed-off-by: Maciej Żenczykowski Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index dcf9a72eeaa6..64f84683feae 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1376,7 +1376,6 @@ static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) rx_opt->num_sacks = 0; } -u32 tcp_default_init_rwnd(u32 mss); void tcp_cwnd_restart(struct sock *sk, s32 delta); static inline void tcp_slow_start_after_idle_check(struct sock *sk) -- cgit v1.2.3-59-g8ed1b From 1119d265bc20226c241e5540fc8a246d9e30b272 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 28 Apr 2020 16:45:16 -0500 Subject: objtool: Fix infinite loop in find_jump_table() Kristen found a hang in objtool when building with -ffunction-sections. It was caused by evergreen_pcie_gen2_enable.cold() being laid out immediately before evergreen_pcie_gen2_enable(). Since their "pfunc" is always the same, find_jump_table() got into an infinite loop because it didn't recognize the boundary between the two functions. Fix that with a new prev_insn_same_sym() helper, which doesn't cross subfunction boundaries. Reported-by: Kristen Carlson Accardi Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/378b51c9d9c894dc3294bc460b4b0869e950b7c5.1588110291.git.jpoimboe@redhat.com --- tools/objtool/check.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 4b170fd08a28..0e8f9a32e4d1 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -72,6 +72,17 @@ static struct instruction *next_insn_same_func(struct objtool_file *file, return find_insn(file, func->cfunc->sec, func->cfunc->offset); } +static struct instruction *prev_insn_same_sym(struct objtool_file *file, + struct instruction *insn) +{ + struct instruction *prev = list_prev_entry(insn, list); + + if (&prev->list != &file->insn_list && prev->func == insn->func) + return prev; + + return NULL; +} + #define func_for_each_insn(file, func, insn) \ for (insn = find_insn(file, func->sec, func->offset); \ insn; \ @@ -1050,8 +1061,8 @@ static struct rela *find_jump_table(struct objtool_file *file, * it. */ for (; - &insn->list != &file->insn_list && insn->func && insn->func->pfunc == func; - insn = insn->first_jump_src ?: list_prev_entry(insn, list)) { + insn && insn->func && insn->func->pfunc == func; + insn = insn->first_jump_src ?: prev_insn_same_sym(file, insn)) { if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC) break; -- cgit v1.2.3-59-g8ed1b From 90da2e3f25c8b4d742b2687b8fed8fc4eb8851da Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 4 May 2020 22:39:35 +0300 Subject: splice: move f_mode checks to do_{splice,tee}() do_splice() is used by io_uring, as will be do_tee(). Move f_mode checks from sys_{splice,tee}() to do_{splice,tee}(), so they're enforced for io_uring as well. Fixes: 7d67af2c0134 ("io_uring: add splice(2) support") Reported-by: Jann Horn Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/splice.c | 45 ++++++++++++++++++--------------------------- 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/fs/splice.c b/fs/splice.c index 4735defc46ee..fd0a1e7e5959 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1118,6 +1118,10 @@ long do_splice(struct file *in, loff_t __user *off_in, loff_t offset; long ret; + if (unlikely(!(in->f_mode & FMODE_READ) || + !(out->f_mode & FMODE_WRITE))) + return -EBADF; + ipipe = get_pipe_info(in); opipe = get_pipe_info(out); @@ -1125,12 +1129,6 @@ long do_splice(struct file *in, loff_t __user *off_in, if (off_in || off_out) return -ESPIPE; - if (!(in->f_mode & FMODE_READ)) - return -EBADF; - - if (!(out->f_mode & FMODE_WRITE)) - return -EBADF; - /* Splicing to self would be fun, but... */ if (ipipe == opipe) return -EINVAL; @@ -1153,9 +1151,6 @@ long do_splice(struct file *in, loff_t __user *off_in, offset = out->f_pos; } - if (unlikely(!(out->f_mode & FMODE_WRITE))) - return -EBADF; - if (unlikely(out->f_flags & O_APPEND)) return -EINVAL; @@ -1440,15 +1435,11 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, error = -EBADF; in = fdget(fd_in); if (in.file) { - if (in.file->f_mode & FMODE_READ) { - out = fdget(fd_out); - if (out.file) { - if (out.file->f_mode & FMODE_WRITE) - error = do_splice(in.file, off_in, - out.file, off_out, - len, flags); - fdput(out); - } + out = fdget(fd_out); + if (out.file) { + error = do_splice(in.file, off_in, out.file, off_out, + len, flags); + fdput(out); } fdput(in); } @@ -1770,6 +1761,10 @@ static long do_tee(struct file *in, struct file *out, size_t len, struct pipe_inode_info *opipe = get_pipe_info(out); int ret = -EINVAL; + if (unlikely(!(in->f_mode & FMODE_READ) || + !(out->f_mode & FMODE_WRITE))) + return -EBADF; + /* * Duplicate the contents of ipipe to opipe without actually * copying the data. @@ -1795,7 +1790,7 @@ static long do_tee(struct file *in, struct file *out, size_t len, SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) { - struct fd in; + struct fd in, out; int error; if (unlikely(flags & ~SPLICE_F_ALL)) @@ -1807,14 +1802,10 @@ SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) error = -EBADF; in = fdget(fdin); if (in.file) { - if (in.file->f_mode & FMODE_READ) { - struct fd out = fdget(fdout); - if (out.file) { - if (out.file->f_mode & FMODE_WRITE) - error = do_tee(in.file, out.file, - len, flags); - fdput(out); - } + out = fdget(fdout); + if (out.file) { + error = do_tee(in.file, out.file, len, flags); + fdput(out); } fdput(in); } -- cgit v1.2.3-59-g8ed1b From d16a8c31077e75ecb9427fbfea59b74eed00f698 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 6 May 2020 10:20:10 -0400 Subject: tracing: Wait for preempt irq delay thread to finish Running on a slower machine, it is possible that the preempt delay kernel thread may still be executing if the module was immediately removed after added, and this can cause the kernel to crash as the kernel thread might be executing after its code has been removed. There's no reason that the caller of the code shouldn't just wait for the delay thread to finish, as the thread can also be created by a trigger in the sysfs code, which also has the same issues. Link: http://lore.kernel.org/r/5EA2B0C8.2080706@cn.fujitsu.com Cc: stable@vger.kernel.org Fixes: 793937236d1ee ("lib: Add module for testing preemptoff/irqsoff latency tracers") Reported-by: Xiao Yang Reviewed-by: Xiao Yang Reviewed-by: Joel Fernandes Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/preemptirq_delay_test.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c index 31c0fad4cb9e..c4c86de63cf9 100644 --- a/kernel/trace/preemptirq_delay_test.c +++ b/kernel/trace/preemptirq_delay_test.c @@ -113,22 +113,42 @@ static int preemptirq_delay_run(void *data) for (i = 0; i < s; i++) (testfuncs[i])(i); + + set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop()) { + schedule(); + set_current_state(TASK_INTERRUPTIBLE); + } + + __set_current_state(TASK_RUNNING); + return 0; } -static struct task_struct *preemptirq_start_test(void) +static int preemptirq_run_test(void) { + struct task_struct *task; + char task_name[50]; snprintf(task_name, sizeof(task_name), "%s_test", test_mode); - return kthread_run(preemptirq_delay_run, NULL, task_name); + task = kthread_run(preemptirq_delay_run, NULL, task_name); + if (IS_ERR(task)) + return PTR_ERR(task); + if (task) + kthread_stop(task); + return 0; } static ssize_t trigger_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - preemptirq_start_test(); + ssize_t ret; + + ret = preemptirq_run_test(); + if (ret) + return ret; return count; } @@ -148,11 +168,9 @@ static struct kobject *preemptirq_delay_kobj; static int __init preemptirq_delay_init(void) { - struct task_struct *test_task; int retval; - test_task = preemptirq_start_test(); - retval = PTR_ERR_OR_ZERO(test_task); + retval = preemptirq_run_test(); if (retval != 0) return retval; -- cgit v1.2.3-59-g8ed1b From 11f5efc3ab66284f7aaacc926e9351d658e2577b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 6 May 2020 10:36:18 -0400 Subject: tracing: Add a vmalloc_sync_mappings() for safe measure x86_64 lazily maps in the vmalloc pages, and the way this works with per_cpu areas can be complex, to say the least. Mappings may happen at boot up, and if nothing synchronizes the page tables, those page mappings may not be synced till they are used. This causes issues for anything that might touch one of those mappings in the path of the page fault handler. When one of those unmapped mappings is touched in the page fault handler, it will cause another page fault, which in turn will cause a page fault, and leave us in a loop of page faults. Commit 763802b53a42 ("x86/mm: split vmalloc_sync_all()") split vmalloc_sync_all() into vmalloc_sync_unmappings() and vmalloc_sync_mappings(), as on system exit, it did not need to do a full sync on x86_64 (although it still needed to be done on x86_32). By chance, the vmalloc_sync_all() would synchronize the page mappings done at boot up and prevent the per cpu area from being a problem for tracing in the page fault handler. But when that synchronization in the exit of a task became a nop, it caused the problem to appear. Link: https://lore.kernel.org/r/20200429054857.66e8e333@oasis.local.home Cc: stable@vger.kernel.org Fixes: 737223fbca3b1 ("tracing: Consolidate buffer allocation code") Reported-by: "Tzvetomir Stoyanov (VMware)" Suggested-by: Joerg Roedel Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8d2b98812625..9ed6d92768af 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8525,6 +8525,19 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) */ allocate_snapshot = false; #endif + + /* + * Because of some magic with the way alloc_percpu() works on + * x86_64, we need to synchronize the pgd of all the tables, + * otherwise the trace events that happen in x86_64 page fault + * handlers can't cope with accessing the chance that a + * alloc_percpu()'d memory might be touched in the page fault trace + * event. Oh, and we need to audit all other alloc_percpu() and vmalloc() + * calls in tracing, because something might get triggered within a + * page fault trace event! + */ + vmalloc_sync_mappings(); + return 0; } -- cgit v1.2.3-59-g8ed1b From 386c82a70319d42dba4f1b30e5e7076f2b4d8c2f Mon Sep 17 00:00:00 2001 From: Yiwei Zhang Date: Tue, 28 Apr 2020 15:08:25 -0700 Subject: gpu/trace: Minor comment updates for gpu_mem_total tracepoint This change updates the improper comment for the 'size' attribute in the tracepoint definition. Most gfx drivers pre-fault in physical pages instead of making virtual allocations. So we drop the 'Virtual' keyword here and leave this to the implementations. Link: http://lkml.kernel.org/r/20200428220825.169606-1-zzyiwei@google.com Signed-off-by: Yiwei Zhang Signed-off-by: Steven Rostedt (VMware) --- include/trace/events/gpu_mem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/gpu_mem.h b/include/trace/events/gpu_mem.h index 1897822a9150..26d871f96e94 100644 --- a/include/trace/events/gpu_mem.h +++ b/include/trace/events/gpu_mem.h @@ -24,7 +24,7 @@ * * @pid: Put 0 for global total, while positive pid for process total. * - * @size: Virtual size of the allocation in bytes. + * @size: Size of the allocation in bytes. * */ TRACE_EVENT(gpu_mem_total, -- cgit v1.2.3-59-g8ed1b From f094a233e1d5b1c61cc797d204aa28b611058827 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 28 Apr 2020 21:49:59 +0000 Subject: tracing: Fix doc mistakes in trace sample As the example below shows, DECLARE_EVENT_CLASS() is used instead of DEFINE_EVENT_CLASS(). Link: http://lkml.kernel.org/r/20200428214959.11259-1-richard.weiyang@gmail.com Signed-off-by: Wei Yang Signed-off-by: Steven Rostedt (VMware) --- samples/trace_events/trace-events-sample.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index 80b4a70315b6..13a35f7cbe66 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -416,7 +416,7 @@ TRACE_EVENT_FN(foo_bar_with_fn, * Note, TRACE_EVENT() itself is simply defined as: * * #define TRACE_EVENT(name, proto, args, tstruct, assign, printk) \ - * DEFINE_EVENT_CLASS(name, proto, args, tstruct, assign, printk); \ + * DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, printk); \ * DEFINE_EVENT(name, name, proto, args) * * The DEFINE_EVENT() also can be declared with conditions and reg functions: -- cgit v1.2.3-59-g8ed1b From 192b7993b3ff92b62b687e940e5e88fa0218d764 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Thu, 23 Apr 2020 12:08:25 +0800 Subject: tracing: Make tracing_snapshot_instance_cond() static Fix the following sparse warning: kernel/trace/trace.c:950:6: warning: symbol 'tracing_snapshot_instance_cond' was not declared. Should it be static? Link: http://lkml.kernel.org/r/1587614905-48692-1-git-send-email-zou_wei@huawei.com Reported-by: Hulk Robot Signed-off-by: Zou Wei Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9ed6d92768af..29615f15a820 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -947,7 +947,8 @@ int __trace_bputs(unsigned long ip, const char *str) EXPORT_SYMBOL_GPL(__trace_bputs); #ifdef CONFIG_TRACER_SNAPSHOT -void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data) +static void tracing_snapshot_instance_cond(struct trace_array *tr, + void *cond_data) { struct tracer *tracer = tr->current_trace; unsigned long flags; -- cgit v1.2.3-59-g8ed1b From 8842604446d1f005abcbf8c63c12eabdb5695094 Mon Sep 17 00:00:00 2001 From: Yunfeng Ye Date: Thu, 7 May 2020 17:23:36 +0800 Subject: tools/bootconfig: Fix resource leak in apply_xbc() Fix the @data and @fd allocations that are leaked in the error path of apply_xbc(). Link: http://lkml.kernel.org/r/583a49c9-c27a-931d-e6c2-6f63a4b18bea@huawei.com Acked-by: Masami Hiramatsu Signed-off-by: Yunfeng Ye Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 16b9a420e6fd..001076c51712 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -314,6 +314,7 @@ int apply_xbc(const char *path, const char *xbc_path) ret = delete_xbc(path); if (ret < 0) { pr_err("Failed to delete previous boot config: %d\n", ret); + free(data); return ret; } @@ -321,24 +322,26 @@ int apply_xbc(const char *path, const char *xbc_path) fd = open(path, O_RDWR | O_APPEND); if (fd < 0) { pr_err("Failed to open %s: %d\n", path, fd); + free(data); return fd; } /* TODO: Ensure the @path is initramfs/initrd image */ ret = write(fd, data, size + 8); if (ret < 0) { pr_err("Failed to apply a boot config: %d\n", ret); - return ret; + goto out; } /* Write a magic word of the bootconfig */ ret = write(fd, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN); if (ret < 0) { pr_err("Failed to apply a boot config magic: %d\n", ret); - return ret; + goto out; } +out: close(fd); free(data); - return 0; + return ret; } int usage(void) -- cgit v1.2.3-59-g8ed1b From 1a10186e598a5be5bc1cd5e55d9e9598a7c079ac Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 7 May 2020 11:49:49 +0200 Subject: usb: hso: correct debug message If you do not find the OUT endpoint, you should say so, rather than copy the error message for the IN endpoint. Presumably a copy and paste error. Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller --- drivers/net/usb/hso.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 417e42c9fd03..bb8c34d746ab 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2659,7 +2659,7 @@ static struct hso_device *hso_create_bulk_serial_device( if (! (serial->out_endp = hso_get_ep(interface, USB_ENDPOINT_XFER_BULK, USB_DIR_OUT))) { - dev_err(&interface->dev, "Failed to find BULK IN ep\n"); + dev_err(&interface->dev, "Failed to find BULK OUT ep\n"); goto exit2; } -- cgit v1.2.3-59-g8ed1b From 8aef199481df0d2b6d110b8be321d5358511fe01 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 7 May 2020 13:45:11 +0200 Subject: net: hisilicon: Make CONFIG_HNS invisible The HNS config symbol enables the framework support for the Hisilicon Network Subsystem. It is already selected by all of its users, so there is no reason to make it visible. Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig index 3892a2062404..2fff43509098 100644 --- a/drivers/net/ethernet/hisilicon/Kconfig +++ b/drivers/net/ethernet/hisilicon/Kconfig @@ -64,7 +64,7 @@ config HNS_MDIO the PHY config HNS - tristate "Hisilicon Network Subsystem Support (Framework)" + tristate ---help--- This selects the framework support for Hisilicon Network Subsystem. It is needed by any driver which provides HNS acceleration engine or make -- cgit v1.2.3-59-g8ed1b From c1f6e3c818dd734c30f6a7eeebf232ba2cf3181d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 7 May 2020 13:44:56 +0200 Subject: ALSA: rawmidi: Fix racy buffer resize under concurrent accesses The rawmidi core allows user to resize the runtime buffer via ioctl, and this may lead to UAF when performed during concurrent reads or writes: the read/write functions unlock the runtime lock temporarily during copying form/to user-space, and that's the race window. This patch fixes the hole by introducing a reference counter for the runtime buffer read/write access and returns -EBUSY error when the resize is performed concurrently against read/write. Note that the ref count field is a simple integer instead of refcount_t here, since the all contexts accessing the buffer is basically protected with a spinlock, hence we need no expensive atomic ops. Also, note that this busy check is needed only against read / write functions, and not in receive/transmit callbacks; the race can happen only at the spinlock hole mentioned in the above, while the whole function is protected for receive / transmit callbacks. Reported-by: butt3rflyh4ck Cc: Link: https://lore.kernel.org/r/CAFcO6XMWpUVK_yzzCpp8_XP7+=oUpQvuBeCbMffEDkpe8jWrfg@mail.gmail.com Link: https://lore.kernel.org/r/s5heerw3r5z.wl-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/rawmidi.h | 1 + sound/core/rawmidi.c | 31 +++++++++++++++++++++++++++---- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h index a36b7227a15a..334842daa904 100644 --- a/include/sound/rawmidi.h +++ b/include/sound/rawmidi.h @@ -61,6 +61,7 @@ struct snd_rawmidi_runtime { size_t avail_min; /* min avail for wakeup */ size_t avail; /* max used buffer for wakeup */ size_t xruns; /* over/underruns counter */ + int buffer_ref; /* buffer reference count */ /* misc */ spinlock_t lock; wait_queue_head_t sleep; diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index 20dd08e1f675..2a688b711a9a 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -120,6 +120,17 @@ static void snd_rawmidi_input_event_work(struct work_struct *work) runtime->event(runtime->substream); } +/* buffer refcount management: call with runtime->lock held */ +static inline void snd_rawmidi_buffer_ref(struct snd_rawmidi_runtime *runtime) +{ + runtime->buffer_ref++; +} + +static inline void snd_rawmidi_buffer_unref(struct snd_rawmidi_runtime *runtime) +{ + runtime->buffer_ref--; +} + static int snd_rawmidi_runtime_create(struct snd_rawmidi_substream *substream) { struct snd_rawmidi_runtime *runtime; @@ -669,6 +680,11 @@ static int resize_runtime_buffer(struct snd_rawmidi_runtime *runtime, if (!newbuf) return -ENOMEM; spin_lock_irq(&runtime->lock); + if (runtime->buffer_ref) { + spin_unlock_irq(&runtime->lock); + kvfree(newbuf); + return -EBUSY; + } oldbuf = runtime->buffer; runtime->buffer = newbuf; runtime->buffer_size = params->buffer_size; @@ -1019,8 +1035,10 @@ static long snd_rawmidi_kernel_read1(struct snd_rawmidi_substream *substream, long result = 0, count1; struct snd_rawmidi_runtime *runtime = substream->runtime; unsigned long appl_ptr; + int err = 0; spin_lock_irqsave(&runtime->lock, flags); + snd_rawmidi_buffer_ref(runtime); while (count > 0 && runtime->avail) { count1 = runtime->buffer_size - runtime->appl_ptr; if (count1 > count) @@ -1039,16 +1057,19 @@ static long snd_rawmidi_kernel_read1(struct snd_rawmidi_substream *substream, if (userbuf) { spin_unlock_irqrestore(&runtime->lock, flags); if (copy_to_user(userbuf + result, - runtime->buffer + appl_ptr, count1)) { - return result > 0 ? result : -EFAULT; - } + runtime->buffer + appl_ptr, count1)) + err = -EFAULT; spin_lock_irqsave(&runtime->lock, flags); + if (err) + goto out; } result += count1; count -= count1; } + out: + snd_rawmidi_buffer_unref(runtime); spin_unlock_irqrestore(&runtime->lock, flags); - return result; + return result > 0 ? result : err; } long snd_rawmidi_kernel_read(struct snd_rawmidi_substream *substream, @@ -1342,6 +1363,7 @@ static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream, return -EAGAIN; } } + snd_rawmidi_buffer_ref(runtime); while (count > 0 && runtime->avail > 0) { count1 = runtime->buffer_size - runtime->appl_ptr; if (count1 > count) @@ -1373,6 +1395,7 @@ static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream, } __end: count1 = runtime->avail < runtime->buffer_size; + snd_rawmidi_buffer_unref(runtime); spin_unlock_irqrestore(&runtime->lock, flags); if (count1) snd_rawmidi_output_trigger(substream, 1); -- cgit v1.2.3-59-g8ed1b From 63ff822358b276137059520cf16e587e8073e80f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 7 May 2020 14:56:15 -0600 Subject: io_uring: don't use 'fd' for openat/openat2/statx We currently make some guesses as when to open this fd, but in reality we have no business (or need) to do so at all. In fact, it makes certain things fail, like O_PATH. Remove the fd lookup from these opcodes, we're just passing the 'fd' to generic helpers anyway. With that, we can also remove the special casing of fd values in io_req_needs_file(), and the 'fd_non_neg' check that we have. And we can ensure that we only read sqe->fd once. This fixes O_PATH usage with openat/openat2, and ditto statx path side oddities. Cc: stable@vger.kernel.org: # v5.6 Reported-by: Max Kellermann Signed-off-by: Jens Axboe --- fs/io_uring.c | 32 +++++++------------------------- 1 file changed, 7 insertions(+), 25 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index dd680eb153cb..979d9f977409 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -680,8 +680,6 @@ struct io_op_def { unsigned needs_mm : 1; /* needs req->file assigned */ unsigned needs_file : 1; - /* needs req->file assigned IFF fd is >= 0 */ - unsigned fd_non_neg : 1; /* hash wq insertion if file is a regular file */ unsigned hash_reg_file : 1; /* unbound wq insertion if file is a non-regular file */ @@ -784,8 +782,6 @@ static const struct io_op_def io_op_defs[] = { .needs_file = 1, }, [IORING_OP_OPENAT] = { - .needs_file = 1, - .fd_non_neg = 1, .file_table = 1, .needs_fs = 1, }, @@ -799,8 +795,6 @@ static const struct io_op_def io_op_defs[] = { }, [IORING_OP_STATX] = { .needs_mm = 1, - .needs_file = 1, - .fd_non_neg = 1, .needs_fs = 1, .file_table = 1, }, @@ -837,8 +831,6 @@ static const struct io_op_def io_op_defs[] = { .buffer_select = 1, }, [IORING_OP_OPENAT2] = { - .needs_file = 1, - .fd_non_neg = 1, .file_table = 1, .needs_fs = 1, }, @@ -5368,15 +5360,6 @@ static void io_wq_submit_work(struct io_wq_work **workptr) io_steal_work(req, workptr); } -static int io_req_needs_file(struct io_kiocb *req, int fd) -{ - if (!io_op_defs[req->opcode].needs_file) - return 0; - if ((fd == -1 || fd == AT_FDCWD) && io_op_defs[req->opcode].fd_non_neg) - return 0; - return 1; -} - static inline struct file *io_file_from_index(struct io_ring_ctx *ctx, int index) { @@ -5414,14 +5397,11 @@ static int io_file_get(struct io_submit_state *state, struct io_kiocb *req, } static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req, - int fd, unsigned int flags) + int fd) { bool fixed; - if (!io_req_needs_file(req, fd)) - return 0; - - fixed = (flags & IOSQE_FIXED_FILE); + fixed = (req->flags & REQ_F_FIXED_FILE) != 0; if (unlikely(!fixed && req->needs_fixed_file)) return -EBADF; @@ -5798,7 +5778,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, struct io_submit_state *state, bool async) { unsigned int sqe_flags; - int id, fd; + int id; /* * All io need record the previous position, if LINK vs DARIN, @@ -5850,8 +5830,10 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, IOSQE_ASYNC | IOSQE_FIXED_FILE | IOSQE_BUFFER_SELECT | IOSQE_IO_LINK); - fd = READ_ONCE(sqe->fd); - return io_req_set_file(state, req, fd, sqe_flags); + if (!io_op_defs[req->opcode].needs_file) + return 0; + + return io_req_set_file(state, req, READ_ONCE(sqe->fd)); } static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, -- cgit v1.2.3-59-g8ed1b From ee2875566868687a95b8ec23913c0d6bce220efd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 May 2020 19:22:14 +0200 Subject: net: bareudp: avoid uninitialized variable warning clang points out that building without IPv6 would lead to returning an uninitialized variable if a packet with family!=AF_INET is passed into bareudp_udp_encap_recv(): drivers/net/bareudp.c:139:6: error: variable 'err' is used uninitialized whenever 'if' condition is false [-Werror,-Wsometimes-uninitialized] if (family == AF_INET) ^~~~~~~~~~~~~~~~~ drivers/net/bareudp.c:146:15: note: uninitialized use occurs here if (unlikely(err)) { ^~~ include/linux/compiler.h:78:42: note: expanded from macro 'unlikely' # define unlikely(x) __builtin_expect(!!(x), 0) ^ drivers/net/bareudp.c:139:2: note: remove the 'if' if its condition is always true if (family == AF_INET) ^~~~~~~~~~~~~~~~~~~~~~ This cannot happen in practice, so change the condition in a way that gcc sees the IPv4 case as unconditionally true here. For consistency, change all the similar constructs in this file the same way, using "if(IS_ENABLED())" instead of #if IS_ENABLED()". Fixes: 571912c69f0e ("net: UDP tunnel encapsulation module for tunnelling different protocols like MPLS, IP, NSH etc.") Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Signed-off-by: David S. Miller --- drivers/net/bareudp.c | 18 ++++-------------- include/net/udp_tunnel.h | 2 -- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index cc0703c3d57f..efd1a1d1f35e 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -136,25 +136,21 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) oiph = skb_network_header(skb); skb_reset_network_header(skb); - if (family == AF_INET) + if (!IS_ENABLED(CONFIG_IPV6) || family == AF_INET) err = IP_ECN_decapsulate(oiph, skb); -#if IS_ENABLED(CONFIG_IPV6) else err = IP6_ECN_decapsulate(oiph, skb); -#endif if (unlikely(err)) { if (log_ecn_error) { - if (family == AF_INET) + if (!IS_ENABLED(CONFIG_IPV6) || family == AF_INET) net_info_ratelimited("non-ECT from %pI4 " "with TOS=%#x\n", &((struct iphdr *)oiph)->saddr, ((struct iphdr *)oiph)->tos); -#if IS_ENABLED(CONFIG_IPV6) else net_info_ratelimited("non-ECT from %pI6\n", &((struct ipv6hdr *)oiph)->saddr); -#endif } if (err > 1) { ++bareudp->dev->stats.rx_frame_errors; @@ -350,7 +346,6 @@ free_dst: return err; } -#if IS_ENABLED(CONFIG_IPV6) static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct bareudp_dev *bareudp, const struct ip_tunnel_info *info) @@ -411,7 +406,6 @@ free_dst: dst_release(dst); return err; } -#endif static netdev_tx_t bareudp_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -435,11 +429,9 @@ static netdev_tx_t bareudp_xmit(struct sk_buff *skb, struct net_device *dev) } rcu_read_lock(); -#if IS_ENABLED(CONFIG_IPV6) - if (info->mode & IP_TUNNEL_INFO_IPV6) + if (IS_ENABLED(CONFIG_IPV6) && info->mode & IP_TUNNEL_INFO_IPV6) err = bareudp6_xmit_skb(skb, dev, bareudp, info); else -#endif err = bareudp_xmit_skb(skb, dev, bareudp, info); rcu_read_unlock(); @@ -467,7 +459,7 @@ static int bareudp_fill_metadata_dst(struct net_device *dev, use_cache = ip_tunnel_dst_cache_usable(skb, info); - if (ip_tunnel_info_af(info) == AF_INET) { + if (!IS_ENABLED(CONFIG_IPV6) || ip_tunnel_info_af(info) == AF_INET) { struct rtable *rt; __be32 saddr; @@ -478,7 +470,6 @@ static int bareudp_fill_metadata_dst(struct net_device *dev, ip_rt_put(rt); info->key.u.ipv4.src = saddr; -#if IS_ENABLED(CONFIG_IPV6) } else if (ip_tunnel_info_af(info) == AF_INET6) { struct dst_entry *dst; struct in6_addr saddr; @@ -492,7 +483,6 @@ static int bareudp_fill_metadata_dst(struct net_device *dev, dst_release(dst); info->key.u.ipv6.src = saddr; -#endif } else { return -EINVAL; } diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 4b1f95e08307..e7312ceb2794 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -143,14 +143,12 @@ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb __be16 df, __be16 src_port, __be16 dst_port, bool xnet, bool nocheck); -#if IS_ENABLED(CONFIG_IPV6) int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, struct net_device *dev, struct in6_addr *saddr, struct in6_addr *daddr, __u8 prio, __u8 ttl, __be32 label, __be16 src_port, __be16 dst_port, bool nocheck); -#endif void udp_tunnel_sock_release(struct socket *sock); -- cgit v1.2.3-59-g8ed1b From 09454fd0a4ce23cb3d8af65066c91a1bf27120dd Mon Sep 17 00:00:00 2001 From: Maciej Żenczykowski Date: Tue, 5 May 2020 11:57:23 -0700 Subject: Revert "ipv6: add mtu lock check in __ip6_rt_update_pmtu" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 19bda36c4299ce3d7e5bce10bebe01764a655a6d: | ipv6: add mtu lock check in __ip6_rt_update_pmtu | | Prior to this patch, ipv6 didn't do mtu lock check in ip6_update_pmtu. | It leaded to that mtu lock doesn't really work when receiving the pkt | of ICMPV6_PKT_TOOBIG. | | This patch is to add mtu lock check in __ip6_rt_update_pmtu just as ipv4 | did in __ip_rt_update_pmtu. The above reasoning is incorrect. IPv6 *requires* icmp based pmtu to work. There's already a comment to this effect elsewhere in the kernel: $ git grep -p -B1 -A3 'RTAX_MTU lock' net/ipv6/route.c=4813= static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg) ... /* In IPv6 pmtu discovery is not optional, so that RTAX_MTU lock cannot disable it. We still use this lock to block changes caused by addrconf/ndisc. */ This reverts to the pre-4.9 behaviour. Cc: Eric Dumazet Cc: Willem de Bruijn Cc: Xin Long Cc: Hannes Frederic Sowa Signed-off-by: Maciej Żenczykowski Fixes: 19bda36c4299 ("ipv6: add mtu lock check in __ip6_rt_update_pmtu") Signed-off-by: David S. Miller --- net/ipv6/route.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8d418038fe32..ff847a324220 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2722,8 +2722,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, const struct in6_addr *daddr, *saddr; struct rt6_info *rt6 = (struct rt6_info *)dst; - if (dst_metric_locked(dst, RTAX_MTU)) - return; + /* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU) + * IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it. + * [see also comment in rt6_mtu_change_route()] + */ if (iph) { daddr = &iph->daddr; -- cgit v1.2.3-59-g8ed1b From ff8ce319e9c25e920d994cc35236f0bb32dfc8f3 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Thu, 7 May 2020 23:13:20 +0800 Subject: net: microchip: encx24j600: add missed kthread_stop This driver calls kthread_run() in probe, but forgets to call kthread_stop() in probe failure and remove. Add the missed kthread_stop() to fix it. Signed-off-by: Chuhong Yuan Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/encx24j600.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c index 39925e4bf2ec..b25a13da900a 100644 --- a/drivers/net/ethernet/microchip/encx24j600.c +++ b/drivers/net/ethernet/microchip/encx24j600.c @@ -1070,7 +1070,7 @@ static int encx24j600_spi_probe(struct spi_device *spi) if (unlikely(ret)) { netif_err(priv, probe, ndev, "Error %d initializing card encx24j600 card\n", ret); - goto out_free; + goto out_stop; } eidled = encx24j600_read_reg(priv, EIDLED); @@ -1088,6 +1088,8 @@ static int encx24j600_spi_probe(struct spi_device *spi) out_unregister: unregister_netdev(priv->ndev); +out_stop: + kthread_stop(priv->kworker_task); out_free: free_netdev(ndev); @@ -1100,6 +1102,7 @@ static int encx24j600_spi_remove(struct spi_device *spi) struct encx24j600_priv *priv = dev_get_drvdata(&spi->dev); unregister_netdev(priv->ndev); + kthread_stop(priv->kworker_task); free_netdev(priv->ndev); -- cgit v1.2.3-59-g8ed1b From 7d14b0d2b9b317cfc14161143e2006b95a5da9b1 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 7 May 2020 18:53:24 +0200 Subject: mptcp: set correct vfs info for subflows When a subflow is created via mptcp_subflow_create_socket(), a new 'struct socket' is allocated, with a new i_ino value. When inspecting TCP sockets via the procfs and or the diag interface, the above ones are not related to the process owning the MPTCP master socket, even if they are a logical part of it ('ss -p' shows an empty process field) Additionally, subflows created by the path manager get the uid/gid from the running workqueue. Subflows are part of the owning MPTCP master socket, let's adjust the vfs info to reflect this. After this patch, 'ss' correctly displays subflows as belonging to the msk socket creator. Fixes: 2303f994b3e1 ("mptcp: Associate MPTCP context with TCP socket") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 67a4e35d4838..4931a29a6f08 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1012,6 +1012,16 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) if (err) return err; + /* the newly created socket really belongs to the owning MPTCP master + * socket, even if for additional subflows the allocation is performed + * by a kernel workqueue. Adjust inode references, so that the + * procfs/diag interaces really show this one belonging to the correct + * user. + */ + SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino; + SOCK_INODE(sf)->i_uid = SOCK_INODE(sk->sk_socket)->i_uid; + SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid; + subflow = mptcp_subflow_ctx(sf->sk); pr_debug("subflow=%p", subflow); -- cgit v1.2.3-59-g8ed1b From dd912306ff008891c82cd9f63e8181e47a9cb2fb Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 7 May 2020 12:19:03 -0700 Subject: net: fix a potential recursive NETDEV_FEAT_CHANGE syzbot managed to trigger a recursive NETDEV_FEAT_CHANGE event between bonding master and slave. I managed to find a reproducer for this: ip li set bond0 up ifenslave bond0 eth0 brctl addbr br0 ethtool -K eth0 lro off brctl addif br0 bond0 ip li set br0 up When a NETDEV_FEAT_CHANGE event is triggered on a bonding slave, it captures this and calls bond_compute_features() to fixup its master's and other slaves' features. However, when syncing with its lower devices by netdev_sync_lower_features() this event is triggered again on slaves when the LRO feature fails to change, so it goes back and forth recursively until the kernel stack is exhausted. Commit 17b85d29e82c intentionally lets __netdev_update_features() return -1 for such a failure case, so we have to just rely on the existing check inside netdev_sync_lower_features() and skip NETDEV_FEAT_CHANGE event only for this specific failure case. Fixes: fd867d51f889 ("net/core: generic support for disabling netdev features down stack") Reported-by: syzbot+e73ceacfd8560cc8a3ca@syzkaller.appspotmail.com Reported-by: syzbot+c2fb6f9ddcea95ba49b5@syzkaller.appspotmail.com Cc: Jarod Wilson Cc: Nikolay Aleksandrov Cc: Josh Poimboeuf Cc: Jann Horn Reviewed-by: Jay Vosburgh Signed-off-by: Cong Wang Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/core/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 522288177bbd..6d327b7aa813 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8907,11 +8907,13 @@ static void netdev_sync_lower_features(struct net_device *upper, netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n", &feature, lower->name); lower->wanted_features &= ~feature; - netdev_update_features(lower); + __netdev_update_features(lower); if (unlikely(lower->features & feature)) netdev_WARN(upper, "failed to disable %pNF on %s!\n", &feature, lower->name); + else + netdev_features_change(lower); } } } -- cgit v1.2.3-59-g8ed1b From b5f2006144c6ae941726037120fa1001ddede784 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 7 May 2020 18:35:39 -0700 Subject: ipc/mqueue.c: change __do_notify() to bypass check_kill_permission() Commit cc731525f26a ("signal: Remove kernel interal si_code magic") changed the value of SI_FROMUSER(SI_MESGQ), this means that mq_notify() no longer works if the sender doesn't have rights to send a signal. Change __do_notify() to use do_send_sig_info() instead of kill_pid_info() to avoid check_kill_permission(). This needs the additional notify.sigev_signo != 0 check, shouldn't we change do_mq_notify() to deny sigev_signo == 0 ? Test-case: #include #include #include #include #include static int notified; static void sigh(int sig) { notified = 1; } int main(void) { signal(SIGIO, sigh); int fd = mq_open("/mq", O_RDWR|O_CREAT, 0666, NULL); assert(fd >= 0); struct sigevent se = { .sigev_notify = SIGEV_SIGNAL, .sigev_signo = SIGIO, }; assert(mq_notify(fd, &se) == 0); if (!fork()) { assert(setuid(1) == 0); mq_send(fd, "",1,0); return 0; } wait(NULL); mq_unlink("/mq"); assert(notified); return 0; } [manfred@colorfullife.com: 1) Add self_exec_id evaluation so that the implementation matches do_notify_parent 2) use PIDTYPE_TGID everywhere] Fixes: cc731525f26a ("signal: Remove kernel interal si_code magic") Reported-by: Yoji Signed-off-by: Oleg Nesterov Signed-off-by: Manfred Spraul Signed-off-by: Andrew Morton Acked-by: "Eric W. Biederman" Cc: Davidlohr Bueso Cc: Markus Elfring Cc: <1vier1@web.de> Cc: Link: http://lkml.kernel.org/r/e2a782e4-eab9-4f5c-c749-c07a8f7a4e66@colorfullife.com Signed-off-by: Linus Torvalds --- ipc/mqueue.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/ipc/mqueue.c b/ipc/mqueue.c index dc8307bf2d74..beff0cfcd1e8 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -142,6 +142,7 @@ struct mqueue_inode_info { struct sigevent notify; struct pid *notify_owner; + u32 notify_self_exec_id; struct user_namespace *notify_user_ns; struct user_struct *user; /* user who created, for accounting */ struct sock *notify_sock; @@ -773,28 +774,44 @@ static void __do_notify(struct mqueue_inode_info *info) * synchronously. */ if (info->notify_owner && info->attr.mq_curmsgs == 1) { - struct kernel_siginfo sig_i; switch (info->notify.sigev_notify) { case SIGEV_NONE: break; - case SIGEV_SIGNAL: - /* sends signal */ + case SIGEV_SIGNAL: { + struct kernel_siginfo sig_i; + struct task_struct *task; + + /* do_mq_notify() accepts sigev_signo == 0, why?? */ + if (!info->notify.sigev_signo) + break; clear_siginfo(&sig_i); sig_i.si_signo = info->notify.sigev_signo; sig_i.si_errno = 0; sig_i.si_code = SI_MESGQ; sig_i.si_value = info->notify.sigev_value; - /* map current pid/uid into info->owner's namespaces */ rcu_read_lock(); + /* map current pid/uid into info->owner's namespaces */ sig_i.si_pid = task_tgid_nr_ns(current, ns_of_pid(info->notify_owner)); - sig_i.si_uid = from_kuid_munged(info->notify_user_ns, current_uid()); + sig_i.si_uid = from_kuid_munged(info->notify_user_ns, + current_uid()); + /* + * We can't use kill_pid_info(), this signal should + * bypass check_kill_permission(). It is from kernel + * but si_fromuser() can't know this. + * We do check the self_exec_id, to avoid sending + * signals to programs that don't expect them. + */ + task = pid_task(info->notify_owner, PIDTYPE_TGID); + if (task && task->self_exec_id == + info->notify_self_exec_id) { + do_send_sig_info(info->notify.sigev_signo, + &sig_i, task, PIDTYPE_TGID); + } rcu_read_unlock(); - - kill_pid_info(info->notify.sigev_signo, - &sig_i, info->notify_owner); break; + } case SIGEV_THREAD: set_cookie(info->notify_cookie, NOTIFY_WOKENUP); netlink_sendskb(info->notify_sock, info->notify_cookie); @@ -1383,6 +1400,7 @@ retry: info->notify.sigev_signo = notification->sigev_signo; info->notify.sigev_value = notification->sigev_value; info->notify.sigev_notify = SIGEV_SIGNAL; + info->notify_self_exec_id = current->self_exec_id; break; } -- cgit v1.2.3-59-g8ed1b From 11d6761218d19ca06ae5387f4e3692c4fa9e7493 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Thu, 7 May 2020 18:35:43 -0700 Subject: mm, memcg: fix error return value of mem_cgroup_css_alloc() When I run my memcg testcase which creates lots of memcgs, I found there're unexpected out of memory logs while there're still enough available free memory. The error log is mkdir: cannot create directory 'foo.65533': Cannot allocate memory The reason is when we try to create more than MEM_CGROUP_ID_MAX memcgs, an -ENOMEM errno will be set by mem_cgroup_css_alloc(), but the right errno should be -ENOSPC "No space left on device", which is an appropriate errno for userspace's failed mkdir. As the errno really misled me, we should make it right. After this patch, the error log will be mkdir: cannot create directory 'foo.65533': No space left on device [akpm@linux-foundation.org: s/EBUSY/ENOSPC/, per Michal] [akpm@linux-foundation.org: s/EBUSY/ENOSPC/, per Michal] Fixes: 73f576c04b94 ("mm: memcontrol: fix cgroup creation failure after many small jobs") Suggested-by: Matthew Wilcox Signed-off-by: Yafang Shao Signed-off-by: Andrew Morton Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: Vladimir Davydov Link: http://lkml.kernel.org/r/20200407063621.GA18914@dhcp22.suse.cz Link: http://lkml.kernel.org/r/1586192163-20099-1-git-send-email-laoar.shao@gmail.com Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 5beea03dd58a..a3b97f103966 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4990,19 +4990,22 @@ static struct mem_cgroup *mem_cgroup_alloc(void) unsigned int size; int node; int __maybe_unused i; + long error = -ENOMEM; size = sizeof(struct mem_cgroup); size += nr_node_ids * sizeof(struct mem_cgroup_per_node *); memcg = kzalloc(size, GFP_KERNEL); if (!memcg) - return NULL; + return ERR_PTR(error); memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL, 1, MEM_CGROUP_ID_MAX, GFP_KERNEL); - if (memcg->id.id < 0) + if (memcg->id.id < 0) { + error = memcg->id.id; goto fail; + } memcg->vmstats_local = alloc_percpu(struct memcg_vmstats_percpu); if (!memcg->vmstats_local) @@ -5046,7 +5049,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void) fail: mem_cgroup_id_remove(memcg); __mem_cgroup_free(memcg); - return NULL; + return ERR_PTR(error); } static struct cgroup_subsys_state * __ref @@ -5057,8 +5060,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) long error = -ENOMEM; memcg = mem_cgroup_alloc(); - if (!memcg) - return ERR_PTR(error); + if (IS_ERR(memcg)) + return ERR_CAST(memcg); WRITE_ONCE(memcg->high, PAGE_COUNTER_MAX); memcg->soft_limit = PAGE_COUNTER_MAX; @@ -5108,7 +5111,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) fail: mem_cgroup_id_remove(memcg); mem_cgroup_free(memcg); - return ERR_PTR(-ENOMEM); + return ERR_PTR(error); } static int mem_cgroup_css_online(struct cgroup_subsys_state *css) -- cgit v1.2.3-59-g8ed1b From e84fe99b68ce353c37ceeecc95dce9696c976556 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 7 May 2020 18:35:46 -0700 Subject: mm/page_alloc: fix watchdog soft lockups during set_zone_contiguous() Without CONFIG_PREEMPT, it can happen that we get soft lockups detected, e.g., while booting up. watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [swapper/0:1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.6.0-next-20200331+ #4 Hardware name: Red Hat KVM, BIOS 1.11.1-4.module+el8.1.0+4066+0f1aadab 04/01/2014 RIP: __pageblock_pfn_to_page+0x134/0x1c0 Call Trace: set_zone_contiguous+0x56/0x70 page_alloc_init_late+0x166/0x176 kernel_init_freeable+0xfa/0x255 kernel_init+0xa/0x106 ret_from_fork+0x35/0x40 The issue becomes visible when having a lot of memory (e.g., 4TB) assigned to a single NUMA node - a system that can easily be created using QEMU. Inside VMs on a hypervisor with quite some memory overcommit, this is fairly easy to trigger. Signed-off-by: David Hildenbrand Signed-off-by: Andrew Morton Reviewed-by: Pavel Tatashin Reviewed-by: Pankaj Gupta Reviewed-by: Baoquan He Reviewed-by: Shile Zhang Acked-by: Michal Hocko Cc: Kirill Tkhai Cc: Shile Zhang Cc: Pavel Tatashin Cc: Daniel Jordan Cc: Michal Hocko Cc: Alexander Duyck Cc: Baoquan He Cc: Oscar Salvador Cc: Link: http://lkml.kernel.org/r/20200416073417.5003-1-david@redhat.com Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 69827d4fa052..f31eda080823 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1607,6 +1607,7 @@ void set_zone_contiguous(struct zone *zone) if (!__pageblock_pfn_to_page(block_start_pfn, block_end_pfn, zone)) return; + cond_resched(); } /* We confirm that there is no hole */ -- cgit v1.2.3-59-g8ed1b From 324cfb19567c80ed71d7a02f1d5ff4621902f4c3 Mon Sep 17 00:00:00 2001 From: Maciej Grochowski Date: Thu, 7 May 2020 18:35:49 -0700 Subject: kernel/kcov.c: fix typos in kcov_remote_start documentation Signed-off-by: Maciej Grochowski Signed-off-by: Andrew Morton Reviewed-by: Andrey Konovalov Link: http://lkml.kernel.org/r/20200420030259.31674-1-maciek.grochowski@gmail.com Signed-off-by: Linus Torvalds --- kernel/kcov.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index f50354202dbe..8accc9722a81 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -740,8 +740,8 @@ static const struct file_operations kcov_fops = { * kcov_remote_handle() with KCOV_SUBSYSTEM_COMMON as the subsystem id and an * arbitrary 4-byte non-zero number as the instance id). This common handle * then gets saved into the task_struct of the process that issued the - * KCOV_REMOTE_ENABLE ioctl. When this proccess issues system calls that spawn - * kernel threads, the common handle must be retrived via kcov_common_handle() + * KCOV_REMOTE_ENABLE ioctl. When this process issues system calls that spawn + * kernel threads, the common handle must be retrieved via kcov_common_handle() * and passed to the spawned threads via custom annotations. Those kernel * threads must in turn be annotated with kcov_remote_start(common_handle) and * kcov_remote_stop(). All of the threads that are spawned by the same process -- cgit v1.2.3-59-g8ed1b From e08df079b23e2e982df15aa340bfbaf50f297504 Mon Sep 17 00:00:00 2001 From: Ivan Delalande Date: Thu, 7 May 2020 18:35:53 -0700 Subject: scripts/decodecode: fix trapping instruction formatting If the trapping instruction contains a ':', for a memory access through segment registers for example, the sed substitution will insert the '*' marker in the middle of the instruction instead of the line address: 2b: 65 48 0f c7 0f cmpxchg16b %gs:*(%rdi) <-- trapping instruction I started to think I had forgotten some quirk of the assembly syntax before noticing that it was actually coming from the script. Fix it to add the address marker at the right place for these instructions: 28: 49 8b 06 mov (%r14),%rax 2b:* 65 48 0f c7 0f cmpxchg16b %gs:(%rdi) <-- trapping instruction 30: 0f 94 c0 sete %al Fixes: 18ff44b189e2 ("scripts/decodecode: make faulting insn ptr more robust") Signed-off-by: Ivan Delalande Signed-off-by: Andrew Morton Reviewed-by: Borislav Petkov Link: http://lkml.kernel.org/r/20200419223653.GA31248@visor Signed-off-by: Linus Torvalds --- scripts/decodecode | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/decodecode b/scripts/decodecode index ba8b8d5834e6..fbdb325cdf4f 100755 --- a/scripts/decodecode +++ b/scripts/decodecode @@ -126,7 +126,7 @@ faultlinenum=$(( $(wc -l $T.oo | cut -d" " -f1) - \ faultline=`cat $T.dis | head -1 | cut -d":" -f2-` faultline=`echo "$faultline" | sed -e 's/\[/\\\[/g; s/\]/\\\]/g'` -cat $T.oo | sed -e "${faultlinenum}s/^\(.*:\)\(.*\)/\1\*\2\t\t<-- trapping instruction/" +cat $T.oo | sed -e "${faultlinenum}s/^\([^:]*:\)\(.*\)/\1\*\2\t\t<-- trapping instruction/" echo cat $T.aa cleanup -- cgit v1.2.3-59-g8ed1b From 996ed22c7a5251d76dcdfe5026ef8230e90066d9 Mon Sep 17 00:00:00 2001 From: Janakarajan Natarajan Date: Thu, 7 May 2020 18:35:56 -0700 Subject: arch/x86/kvm/svm/sev.c: change flag passed to GUP fast in sev_pin_memory() When trying to lock read-only pages, sev_pin_memory() fails because FOLL_WRITE is used as the flag for get_user_pages_fast(). Commit 73b0140bf0fe ("mm/gup: change GUP fast to use flags rather than a write 'bool'") updated the get_user_pages_fast() call sites to use flags, but incorrectly updated the call in sev_pin_memory(). As the original coding of this call was correct, revert the change made by that commit. Fixes: 73b0140bf0fe ("mm/gup: change GUP fast to use flags rather than a write 'bool'") Signed-off-by: Janakarajan Natarajan Signed-off-by: Andrew Morton Reviewed-by: Ira Weiny Cc: Paolo Bonzini Cc: Sean Christopherson Cc: Vitaly Kuznetsov Cc: Wanpeng Li Cc: Jim Mattson Cc: Joerg Roedel Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H . Peter Anvin" Cc: Mike Marshall Cc: Brijesh Singh Link: http://lkml.kernel.org/r/20200423152419.87202-1-Janakarajan.Natarajan@amd.com Signed-off-by: Linus Torvalds --- arch/x86/kvm/svm/sev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index cf912b4aaba8..89f7f3aebd31 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -345,7 +345,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, return NULL; /* Pin the user virtual address. */ - npinned = get_user_pages_fast(uaddr, npages, FOLL_WRITE, pages); + npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages); if (npinned != npages) { pr_err("SEV: Failure locking %lu pages.\n", npages); goto err; -- cgit v1.2.3-59-g8ed1b From 0c54a6a44bf3d41e76ce3f583a6ece267618df2e Mon Sep 17 00:00:00 2001 From: Khazhismel Kumykov Date: Thu, 7 May 2020 18:35:59 -0700 Subject: eventpoll: fix missing wakeup for ovflist in ep_poll_callback In the event that we add to ovflist, before commit 339ddb53d373 ("fs/epoll: remove unnecessary wakeups of nested epoll") we would be woken up by ep_scan_ready_list, and did no wakeup in ep_poll_callback. With that wakeup removed, if we add to ovflist here, we may never wake up. Rather than adding back the ep_scan_ready_list wakeup - which was resulting in unnecessary wakeups, trigger a wake-up in ep_poll_callback. We noticed that one of our workloads was missing wakeups starting with 339ddb53d373 and upon manual inspection, this wakeup seemed missing to me. With this patch added, we no longer see missing wakeups. I haven't yet tried to make a small reproducer, but the existing kselftests in filesystem/epoll passed for me with this patch. [khazhy@google.com: use if/elif instead of goto + cleanup suggested by Roman] Link: http://lkml.kernel.org/r/20200424190039.192373-1-khazhy@google.com Fixes: 339ddb53d373 ("fs/epoll: remove unnecessary wakeups of nested epoll") Signed-off-by: Khazhismel Kumykov Signed-off-by: Andrew Morton Reviewed-by: Roman Penyaev Cc: Alexander Viro Cc: Roman Penyaev Cc: Heiher Cc: Jason Baron Cc: Link: http://lkml.kernel.org/r/20200424025057.118641-1-khazhy@google.com Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 8c596641a72b..d6ba0e52439b 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1171,6 +1171,10 @@ static inline bool chain_epi_lockless(struct epitem *epi) { struct eventpoll *ep = epi->ep; + /* Fast preliminary check */ + if (epi->next != EP_UNACTIVE_PTR) + return false; + /* Check that the same epi has not been just chained from another CPU */ if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR) return false; @@ -1237,16 +1241,12 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v * chained in ep->ovflist and requeued later on. */ if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) { - if (epi->next == EP_UNACTIVE_PTR && - chain_epi_lockless(epi)) + if (chain_epi_lockless(epi)) + ep_pm_stay_awake_rcu(epi); + } else if (!ep_is_linked(epi)) { + /* In the usual case, add event to ready list. */ + if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist)) ep_pm_stay_awake_rcu(epi); - goto out_unlock; - } - - /* If this file is already in the ready list we exit soon */ - if (!ep_is_linked(epi) && - list_add_tail_lockless(&epi->rdllink, &ep->rdllist)) { - ep_pm_stay_awake_rcu(epi); } /* -- cgit v1.2.3-59-g8ed1b From 50e36be1fb9572b2e4f2753340bdce3116bf2ce7 Mon Sep 17 00:00:00 2001 From: Aymeric Agon-Rambosson Date: Thu, 7 May 2020 18:36:03 -0700 Subject: scripts/gdb: repair rb_first() and rb_last() The current implementations of the rb_first() and rb_last() gdb functions have a variable that references itself in its instanciation, which causes the function to throw an error if a specific condition on the argument is met. The original author rather intended to reference the argument and made a typo. Referring the argument instead makes the function work as intended. Signed-off-by: Aymeric Agon-Rambosson Signed-off-by: Andrew Morton Reviewed-by: Stephen Boyd Cc: Jan Kiszka Cc: Kieran Bingham Cc: Douglas Anderson Cc: Nikolay Borisov Cc: Jackie Liu Cc: Jason Wessel Link: http://lkml.kernel.org/r/20200427051029.354840-1-aymeric.agon@yandex.com Signed-off-by: Linus Torvalds --- scripts/gdb/linux/rbtree.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/gdb/linux/rbtree.py b/scripts/gdb/linux/rbtree.py index 39db889b874c..c4b991607917 100644 --- a/scripts/gdb/linux/rbtree.py +++ b/scripts/gdb/linux/rbtree.py @@ -12,7 +12,7 @@ rb_node_type = utils.CachedType("struct rb_node") def rb_first(root): if root.type == rb_root_type.get_type(): - node = node.address.cast(rb_root_type.get_type().pointer()) + node = root.address.cast(rb_root_type.get_type().pointer()) elif root.type != rb_root_type.get_type().pointer(): raise gdb.GdbError("Must be struct rb_root not {}".format(root.type)) @@ -28,7 +28,7 @@ def rb_first(root): def rb_last(root): if root.type == rb_root_type.get_type(): - node = node.address.cast(rb_root_type.get_type().pointer()) + node = root.address.cast(rb_root_type.get_type().pointer()) elif root.type != rb_root_type.get_type().pointer(): raise gdb.GdbError("Must be struct rb_root not {}".format(root.type)) -- cgit v1.2.3-59-g8ed1b From cbfc35a48609ceac978791e3ab9dde0c01f8cb20 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 7 May 2020 18:36:06 -0700 Subject: mm/slub: fix incorrect interpretation of s->offset In a couple of places in the slub memory allocator, the code uses "s->offset" as a check to see if the free pointer is put right after the object. That check is no longer true with commit 3202fa62fb43 ("slub: relocate freelist pointer to middle of object"). As a result, echoing "1" into the validate sysfs file, e.g. of dentry, may cause a bunch of "Freepointer corrupt" error reports like the following to appear with the system in panic afterwards. ============================================================================= BUG dentry(666:pmcd.service) (Tainted: G B): Freepointer corrupt ----------------------------------------------------------------------------- To fix it, use the check "s->offset == s->inuse" in the new helper function freeptr_outside_object() instead. Also add another helper function get_info_end() to return the end of info block (inuse + free pointer if not overlapping with object). Fixes: 3202fa62fb43 ("slub: relocate freelist pointer to middle of object") Signed-off-by: Waiman Long Signed-off-by: Andrew Morton Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Kees Cook Acked-by: Rafael Aquini Cc: Christoph Lameter Cc: Vitaly Nikolenko Cc: Silvio Cesare Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Markus Elfring Cc: Changbin Du Link: http://lkml.kernel.org/r/20200429135328.26976-1-longman@redhat.com Signed-off-by: Linus Torvalds --- mm/slub.c | 45 ++++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 9bf44955c4f1..b762450fc9f0 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -551,15 +551,32 @@ static void print_section(char *level, char *text, u8 *addr, metadata_access_disable(); } +/* + * See comment in calculate_sizes(). + */ +static inline bool freeptr_outside_object(struct kmem_cache *s) +{ + return s->offset >= s->inuse; +} + +/* + * Return offset of the end of info block which is inuse + free pointer if + * not overlapping with object. + */ +static inline unsigned int get_info_end(struct kmem_cache *s) +{ + if (freeptr_outside_object(s)) + return s->inuse + sizeof(void *); + else + return s->inuse; +} + static struct track *get_track(struct kmem_cache *s, void *object, enum track_item alloc) { struct track *p; - if (s->offset) - p = object + s->offset + sizeof(void *); - else - p = object + s->inuse; + p = object + get_info_end(s); return p + alloc; } @@ -686,10 +703,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) print_section(KERN_ERR, "Redzone ", p + s->object_size, s->inuse - s->object_size); - if (s->offset) - off = s->offset + sizeof(void *); - else - off = s->inuse; + off = get_info_end(s); if (s->flags & SLAB_STORE_USER) off += 2 * sizeof(struct track); @@ -782,7 +796,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, * object address * Bytes of the object to be managed. * If the freepointer may overlay the object then the free - * pointer is the first word of the object. + * pointer is at the middle of the object. * * Poisoning uses 0x6b (POISON_FREE) and the last byte is * 0xa5 (POISON_END) @@ -816,11 +830,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p) { - unsigned long off = s->inuse; /* The end of info */ - - if (s->offset) - /* Freepointer is placed after the object. */ - off += sizeof(void *); + unsigned long off = get_info_end(s); /* The end of info */ if (s->flags & SLAB_STORE_USER) /* We also have user information there */ @@ -907,7 +917,7 @@ static int check_object(struct kmem_cache *s, struct page *page, check_pad_bytes(s, page, p); } - if (!s->offset && val == SLUB_RED_ACTIVE) + if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE) /* * Object and freepointer overlap. Cannot check * freepointer while object is allocated. @@ -3587,6 +3597,11 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) * * This is the case if we do RCU, have a constructor or * destructor or are poisoning the objects. + * + * The assumption that s->offset >= s->inuse means free + * pointer is outside of the object is used in the + * freeptr_outside_object() function. If that is no + * longer true, the function needs to be modified. */ s->offset = size; size += sizeof(void *); -- cgit v1.2.3-59-g8ed1b From 28307d938fb2e4056ed4c982c06d1503d7719813 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 7 May 2020 18:36:10 -0700 Subject: percpu: make pcpu_alloc() aware of current gfp context Since 5.7-rc1, on btrfs we have a percpu counter initialization for which we always pass a GFP_KERNEL gfp_t argument (this happens since commit 2992df73268f78 ("btrfs: Implement DREW lock")). That is safe in some contextes but not on others where allowing fs reclaim could lead to a deadlock because we are either holding some btrfs lock needed for a transaction commit or holding a btrfs transaction handle open. Because of that we surround the call to the function that initializes the percpu counter with a NOFS context using memalloc_nofs_save() (this is done at btrfs_init_fs_root()). However it turns out that this is not enough to prevent a possible deadlock because percpu_alloc() determines if it is in an atomic context by looking exclusively at the gfp flags passed to it (GFP_KERNEL in this case) and it is not aware that a NOFS context is set. Because percpu_alloc() thinks it is in a non atomic context it locks the pcpu_alloc_mutex. This can result in a btrfs deadlock when pcpu_balance_workfn() is running, has acquired that mutex and is waiting for reclaim, while the btrfs task that called percpu_counter_init() (and therefore percpu_alloc()) is holding either the btrfs commit_root semaphore or a transaction handle (done fs/btrfs/backref.c: iterate_extent_inodes()), which prevents reclaim from finishing as an attempt to commit the current btrfs transaction will deadlock. Lockdep reports this issue with the following trace: ====================================================== WARNING: possible circular locking dependency detected 5.6.0-rc7-btrfs-next-77 #1 Not tainted ------------------------------------------------------ kswapd0/91 is trying to acquire lock: ffff8938a3b3fdc8 (&delayed_node->mutex){+.+.}, at: __btrfs_release_delayed_node.part.0+0x3f/0x320 [btrfs] but task is already holding lock: ffffffffb4f0dbc0 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x5/0x30 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #4 (fs_reclaim){+.+.}: fs_reclaim_acquire.part.0+0x25/0x30 __kmalloc+0x5f/0x3a0 pcpu_create_chunk+0x19/0x230 pcpu_balance_workfn+0x56a/0x680 process_one_work+0x235/0x5f0 worker_thread+0x50/0x3b0 kthread+0x120/0x140 ret_from_fork+0x3a/0x50 -> #3 (pcpu_alloc_mutex){+.+.}: __mutex_lock+0xa9/0xaf0 pcpu_alloc+0x480/0x7c0 __percpu_counter_init+0x50/0xd0 btrfs_drew_lock_init+0x22/0x70 [btrfs] btrfs_get_fs_root+0x29c/0x5c0 [btrfs] resolve_indirect_refs+0x120/0xa30 [btrfs] find_parent_nodes+0x50b/0xf30 [btrfs] btrfs_find_all_leafs+0x60/0xb0 [btrfs] iterate_extent_inodes+0x139/0x2f0 [btrfs] iterate_inodes_from_logical+0xa1/0xe0 [btrfs] btrfs_ioctl_logical_to_ino+0xb4/0x190 [btrfs] btrfs_ioctl+0x165a/0x3130 [btrfs] ksys_ioctl+0x87/0xc0 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x5c/0x260 entry_SYSCALL_64_after_hwframe+0x49/0xbe -> #2 (&fs_info->commit_root_sem){++++}: down_write+0x38/0x70 btrfs_cache_block_group+0x2ec/0x500 [btrfs] find_free_extent+0xc6a/0x1600 [btrfs] btrfs_reserve_extent+0x9b/0x180 [btrfs] btrfs_alloc_tree_block+0xc1/0x350 [btrfs] alloc_tree_block_no_bg_flush+0x4a/0x60 [btrfs] __btrfs_cow_block+0x122/0x5a0 [btrfs] btrfs_cow_block+0x106/0x240 [btrfs] commit_cowonly_roots+0x55/0x310 [btrfs] btrfs_commit_transaction+0x509/0xb20 [btrfs] sync_filesystem+0x74/0x90 generic_shutdown_super+0x22/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x93/0xc0 exit_to_usermode_loop+0xf9/0x100 do_syscall_64+0x20d/0x260 entry_SYSCALL_64_after_hwframe+0x49/0xbe -> #1 (&space_info->groups_sem){++++}: down_read+0x3c/0x140 find_free_extent+0xef6/0x1600 [btrfs] btrfs_reserve_extent+0x9b/0x180 [btrfs] btrfs_alloc_tree_block+0xc1/0x350 [btrfs] alloc_tree_block_no_bg_flush+0x4a/0x60 [btrfs] __btrfs_cow_block+0x122/0x5a0 [btrfs] btrfs_cow_block+0x106/0x240 [btrfs] btrfs_search_slot+0x50c/0xd60 [btrfs] btrfs_lookup_inode+0x3a/0xc0 [btrfs] __btrfs_update_delayed_inode+0x90/0x280 [btrfs] __btrfs_commit_inode_delayed_items+0x81f/0x870 [btrfs] __btrfs_run_delayed_items+0x8e/0x180 [btrfs] btrfs_commit_transaction+0x31b/0xb20 [btrfs] iterate_supers+0x87/0xf0 ksys_sync+0x60/0xb0 __ia32_sys_sync+0xa/0x10 do_syscall_64+0x5c/0x260 entry_SYSCALL_64_after_hwframe+0x49/0xbe -> #0 (&delayed_node->mutex){+.+.}: __lock_acquire+0xef0/0x1c80 lock_acquire+0xa2/0x1d0 __mutex_lock+0xa9/0xaf0 __btrfs_release_delayed_node.part.0+0x3f/0x320 [btrfs] btrfs_evict_inode+0x40d/0x560 [btrfs] evict+0xd9/0x1c0 dispose_list+0x48/0x70 prune_icache_sb+0x54/0x80 super_cache_scan+0x124/0x1a0 do_shrink_slab+0x176/0x440 shrink_slab+0x23a/0x2c0 shrink_node+0x188/0x6e0 balance_pgdat+0x31d/0x7f0 kswapd+0x238/0x550 kthread+0x120/0x140 ret_from_fork+0x3a/0x50 other info that might help us debug this: Chain exists of: &delayed_node->mutex --> pcpu_alloc_mutex --> fs_reclaim Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(fs_reclaim); lock(pcpu_alloc_mutex); lock(fs_reclaim); lock(&delayed_node->mutex); *** DEADLOCK *** 3 locks held by kswapd0/91: #0: (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x5/0x30 #1: (shrinker_rwsem){++++}, at: shrink_slab+0x12f/0x2c0 #2: (&type->s_umount_key#43){++++}, at: trylock_super+0x16/0x50 stack backtrace: CPU: 1 PID: 91 Comm: kswapd0 Not tainted 5.6.0-rc7-btrfs-next-77 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8f/0xd0 check_noncircular+0x170/0x190 __lock_acquire+0xef0/0x1c80 lock_acquire+0xa2/0x1d0 __mutex_lock+0xa9/0xaf0 __btrfs_release_delayed_node.part.0+0x3f/0x320 [btrfs] btrfs_evict_inode+0x40d/0x560 [btrfs] evict+0xd9/0x1c0 dispose_list+0x48/0x70 prune_icache_sb+0x54/0x80 super_cache_scan+0x124/0x1a0 do_shrink_slab+0x176/0x440 shrink_slab+0x23a/0x2c0 shrink_node+0x188/0x6e0 balance_pgdat+0x31d/0x7f0 kswapd+0x238/0x550 kthread+0x120/0x140 ret_from_fork+0x3a/0x50 This could be fixed by making btrfs pass GFP_NOFS instead of GFP_KERNEL to percpu_counter_init() in contextes where it is not reclaim safe, however that type of approach is discouraged since memalloc_[nofs|noio]_save() were introduced. Therefore this change makes pcpu_alloc() look up into an existing nofs/noio context before deciding whether it is in an atomic context or not. Signed-off-by: Filipe Manana Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Acked-by: Tejun Heo Acked-by: Dennis Zhou Cc: Tejun Heo Cc: Christoph Lameter Link: http://lkml.kernel.org/r/20200430164356.15543-1-fdmanana@kernel.org Signed-off-by: Linus Torvalds --- mm/percpu.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index d7e3bc649f4e..7da7d7737dab 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -80,6 +80,7 @@ #include #include #include +#include #include #include @@ -1557,10 +1558,9 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, gfp_t gfp) { - /* whitelisted flags that can be passed to the backing allocators */ - gfp_t pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); - bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; - bool do_warn = !(gfp & __GFP_NOWARN); + gfp_t pcpu_gfp; + bool is_atomic; + bool do_warn; static int warn_limit = 10; struct pcpu_chunk *chunk, *next; const char *err; @@ -1569,6 +1569,12 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, void __percpu *ptr; size_t bits, bit_align; + gfp = current_gfp_context(gfp); + /* whitelisted flags that can be passed to the backing allocators */ + pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); + is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; + do_warn = !(gfp & __GFP_NOWARN); + /* * There is now a minimum allocation size of PCPU_MIN_ALLOC_SIZE, * therefore alignment must be a minimum of that many bytes. -- cgit v1.2.3-59-g8ed1b From 474328c06e3ee75bb6b92826fec90fdc8ef3c573 Mon Sep 17 00:00:00 2001 From: Roman Penyaev Date: Thu, 7 May 2020 18:36:13 -0700 Subject: kselftests: introduce new epoll60 testcase for catching lost wakeups This test case catches lost wake up introduced by commit 339ddb53d373 ("fs/epoll: remove unnecessary wakeups of nested epoll") The test is simple: we have 10 threads and 10 event fds. Each thread can harvest only 1 event. 1 producer fires all 10 events at once and waits that all 10 events will be observed by 10 threads. In case of lost wakeup epoll_wait() will timeout and 0 will be returned. Test case catches two sort of problems: forgotten wakeup on event, which hits the ->ovflist list, this problem was fixed by: 5a2513239750 ("eventpoll: fix missing wakeup for ovflist in ep_poll_callback") the other problem is when several sequential events hit the same waiting thread, thus other waiters get no wakeups. Problem is fixed in the following patch. Signed-off-by: Roman Penyaev Signed-off-by: Andrew Morton Cc: Khazhismel Kumykov Cc: Alexander Viro Cc: Heiher Cc: Jason Baron Link: http://lkml.kernel.org/r/20200430130326.1368509-1-rpenyaev@suse.de Signed-off-by: Linus Torvalds --- .../filesystems/epoll/epoll_wakeup_test.c | 146 +++++++++++++++++++++ 1 file changed, 146 insertions(+) diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c index 11eee0b60040..d979ff14775a 100644 --- a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c +++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c @@ -3,6 +3,7 @@ #define _GNU_SOURCE #include #include +#include #include #include #include @@ -3136,4 +3137,149 @@ TEST(epoll59) close(ctx.sfd[0]); } +enum { + EPOLL60_EVENTS_NR = 10, +}; + +struct epoll60_ctx { + volatile int stopped; + int ready; + int waiters; + int epfd; + int evfd[EPOLL60_EVENTS_NR]; +}; + +static void *epoll60_wait_thread(void *ctx_) +{ + struct epoll60_ctx *ctx = ctx_; + struct epoll_event e; + sigset_t sigmask; + uint64_t v; + int ret; + + /* Block SIGUSR1 */ + sigemptyset(&sigmask); + sigaddset(&sigmask, SIGUSR1); + sigprocmask(SIG_SETMASK, &sigmask, NULL); + + /* Prepare empty mask for epoll_pwait() */ + sigemptyset(&sigmask); + + while (!ctx->stopped) { + /* Mark we are ready */ + __atomic_fetch_add(&ctx->ready, 1, __ATOMIC_ACQUIRE); + + /* Start when all are ready */ + while (__atomic_load_n(&ctx->ready, __ATOMIC_ACQUIRE) && + !ctx->stopped); + + /* Account this waiter */ + __atomic_fetch_add(&ctx->waiters, 1, __ATOMIC_ACQUIRE); + + ret = epoll_pwait(ctx->epfd, &e, 1, 2000, &sigmask); + if (ret != 1) { + /* We expect only signal delivery on stop */ + assert(ret < 0 && errno == EINTR && "Lost wakeup!\n"); + assert(ctx->stopped); + break; + } + + ret = read(e.data.fd, &v, sizeof(v)); + /* Since we are on ET mode, thus each thread gets its own fd. */ + assert(ret == sizeof(v)); + + __atomic_fetch_sub(&ctx->waiters, 1, __ATOMIC_RELEASE); + } + + return NULL; +} + +static inline unsigned long long msecs(void) +{ + struct timespec ts; + unsigned long long msecs; + + clock_gettime(CLOCK_REALTIME, &ts); + msecs = ts.tv_sec * 1000ull; + msecs += ts.tv_nsec / 1000000ull; + + return msecs; +} + +static inline int count_waiters(struct epoll60_ctx *ctx) +{ + return __atomic_load_n(&ctx->waiters, __ATOMIC_ACQUIRE); +} + +TEST(epoll60) +{ + struct epoll60_ctx ctx = { 0 }; + pthread_t waiters[ARRAY_SIZE(ctx.evfd)]; + struct epoll_event e; + int i, n, ret; + + signal(SIGUSR1, signal_handler); + + ctx.epfd = epoll_create1(0); + ASSERT_GE(ctx.epfd, 0); + + /* Create event fds */ + for (i = 0; i < ARRAY_SIZE(ctx.evfd); i++) { + ctx.evfd[i] = eventfd(0, EFD_NONBLOCK); + ASSERT_GE(ctx.evfd[i], 0); + + e.events = EPOLLIN | EPOLLET; + e.data.fd = ctx.evfd[i]; + ASSERT_EQ(epoll_ctl(ctx.epfd, EPOLL_CTL_ADD, ctx.evfd[i], &e), 0); + } + + /* Create waiter threads */ + for (i = 0; i < ARRAY_SIZE(waiters); i++) + ASSERT_EQ(pthread_create(&waiters[i], NULL, + epoll60_wait_thread, &ctx), 0); + + for (i = 0; i < 300; i++) { + uint64_t v = 1, ms; + + /* Wait for all to be ready */ + while (__atomic_load_n(&ctx.ready, __ATOMIC_ACQUIRE) != + ARRAY_SIZE(ctx.evfd)) + ; + + /* Steady, go */ + __atomic_fetch_sub(&ctx.ready, ARRAY_SIZE(ctx.evfd), + __ATOMIC_ACQUIRE); + + /* Wait all have gone to kernel */ + while (count_waiters(&ctx) != ARRAY_SIZE(ctx.evfd)) + ; + + /* 1ms should be enough to schedule away */ + usleep(1000); + + /* Quickly signal all handles at once */ + for (n = 0; n < ARRAY_SIZE(ctx.evfd); n++) { + ret = write(ctx.evfd[n], &v, sizeof(v)); + ASSERT_EQ(ret, sizeof(v)); + } + + /* Busy loop for 1s and wait for all waiters to wake up */ + ms = msecs(); + while (count_waiters(&ctx) && msecs() < ms + 1000) + ; + + ASSERT_EQ(count_waiters(&ctx), 0); + } + ctx.stopped = 1; + /* Stop waiters */ + for (i = 0; i < ARRAY_SIZE(waiters); i++) + ret = pthread_kill(waiters[i], SIGUSR1); + for (i = 0; i < ARRAY_SIZE(waiters); i++) + pthread_join(waiters[i], NULL); + + for (i = 0; i < ARRAY_SIZE(waiters); i++) + close(ctx.evfd[i]); + close(ctx.epfd); +} + TEST_HARNESS_MAIN -- cgit v1.2.3-59-g8ed1b From 412895f03cbf9633298111cb4dfde13b7720e2c5 Mon Sep 17 00:00:00 2001 From: Roman Penyaev Date: Thu, 7 May 2020 18:36:16 -0700 Subject: epoll: atomically remove wait entry on wake up This patch does two things: - fixes a lost wakeup introduced by commit 339ddb53d373 ("fs/epoll: remove unnecessary wakeups of nested epoll") - improves performance for events delivery. The description of the problem is the following: if N (>1) threads are waiting on ep->wq for new events and M (>1) events come, it is quite likely that >1 wakeups hit the same wait queue entry, because there is quite a big window between __add_wait_queue_exclusive() and the following __remove_wait_queue() calls in ep_poll() function. This can lead to lost wakeups, because thread, which was woken up, can handle not all the events in ->rdllist. (in better words the problem is described here: https://lkml.org/lkml/2019/10/7/905) The idea of the current patch is to use init_wait() instead of init_waitqueue_entry(). Internally init_wait() sets autoremove_wake_function as a callback, which removes the wait entry atomically (under the wq locks) from the list, thus the next coming wakeup hits the next wait entry in the wait queue, thus preventing lost wakeups. Problem is very well reproduced by the epoll60 test case [1]. Wait entry removal on wakeup has also performance benefits, because there is no need to take a ep->lock and remove wait entry from the queue after the successful wakeup. Here is the timing output of the epoll60 test case: With explicit wakeup from ep_scan_ready_list() (the state of the code prior 339ddb53d373): real 0m6.970s user 0m49.786s sys 0m0.113s After this patch: real 0m5.220s user 0m36.879s sys 0m0.019s The other testcase is the stress-epoll [2], where one thread consumes all the events and other threads produce many events: With explicit wakeup from ep_scan_ready_list() (the state of the code prior 339ddb53d373): threads events/ms run-time ms 8 5427 1474 16 6163 2596 32 6824 4689 64 7060 9064 128 6991 18309 After this patch: threads events/ms run-time ms 8 5598 1429 16 7073 2262 32 7502 4265 64 7640 8376 128 7634 16767 (number of "events/ms" represents event bandwidth, thus higher is better; number of "run-time ms" represents overall time spent doing the benchmark, thus lower is better) [1] tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c [2] https://github.com/rouming/test-tools/blob/master/stress-epoll.c Signed-off-by: Roman Penyaev Signed-off-by: Andrew Morton Reviewed-by: Jason Baron Cc: Khazhismel Kumykov Cc: Alexander Viro Cc: Heiher Cc: Link: http://lkml.kernel.org/r/20200430130326.1368509-2-rpenyaev@suse.de Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index d6ba0e52439b..aba03ee749f8 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1822,7 +1822,6 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, { int res = 0, eavail, timed_out = 0; u64 slack = 0; - bool waiter = false; wait_queue_entry_t wait; ktime_t expires, *to = NULL; @@ -1867,21 +1866,23 @@ fetch_events: */ ep_reset_busy_poll_napi_id(ep); - /* - * We don't have any available event to return to the caller. We need - * to sleep here, and we will be woken by ep_poll_callback() when events - * become available. - */ - if (!waiter) { - waiter = true; - init_waitqueue_entry(&wait, current); - + do { + /* + * Internally init_wait() uses autoremove_wake_function(), + * thus wait entry is removed from the wait queue on each + * wakeup. Why it is important? In case of several waiters + * each new wakeup will hit the next waiter, giving it the + * chance to harvest new event. Otherwise wakeup can be + * lost. This is also good performance-wise, because on + * normal wakeup path no need to call __remove_wait_queue() + * explicitly, thus ep->lock is not taken, which halts the + * event delivery. + */ + init_wait(&wait); write_lock_irq(&ep->lock); __add_wait_queue_exclusive(&ep->wq, &wait); write_unlock_irq(&ep->lock); - } - for (;;) { /* * We don't want to sleep if the ep_poll_callback() sends us * a wakeup in between. That's why we set the task state @@ -1911,10 +1912,20 @@ fetch_events: timed_out = 1; break; } - } + + /* We were woken up, thus go and try to harvest some events */ + eavail = 1; + + } while (0); __set_current_state(TASK_RUNNING); + if (!list_empty_careful(&wait.entry)) { + write_lock_irq(&ep->lock); + __remove_wait_queue(&ep->wq, &wait); + write_unlock_irq(&ep->lock); + } + send_events: /* * Try to transfer events to user space. In case we get 0 events and @@ -1925,12 +1936,6 @@ send_events: !(res = ep_send_events(ep, events, maxevents)) && !timed_out) goto fetch_events; - if (waiter) { - write_lock_irq(&ep->lock); - __remove_wait_queue(&ep->wq, &wait); - write_unlock_irq(&ep->lock); - } - return res; } -- cgit v1.2.3-59-g8ed1b From 17e34526f0a8f81a214d1ee6f7d8ad2a9c9bae33 Mon Sep 17 00:00:00 2001 From: Qiwu Chen Date: Thu, 7 May 2020 18:36:20 -0700 Subject: mm/vmscan: remove unnecessary argument description of isolate_lru_pages() Since commit a9e7c39fa9fd9 ("mm/vmscan.c: remove 7th argument of isolate_lru_pages()"), the explanation of 'mode' argument has been unnecessary. Let's remove it. Signed-off-by: Qiwu Chen Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Link: http://lkml.kernel.org/r/20200501090346.2894-1-chenqiwu@xiaomi.com Signed-off-by: Linus Torvalds --- mm/vmscan.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index b06868fc4926..a37c87b5aee2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1625,7 +1625,6 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec, * @dst: The temp list to put pages on to. * @nr_scanned: The number of pages that were scanned. * @sc: The scan_control struct for this reclaim session - * @mode: One of the LRU isolation modes * @lru: LRU list id for isolating * * returns how many pages were moved onto *@dst. -- cgit v1.2.3-59-g8ed1b From 8d58f222e85f01da0c0e1fc1e77986c86de889e2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 7 May 2020 18:36:23 -0700 Subject: ubsan: disable UBSAN_ALIGNMENT under COMPILE_TEST The documentation for UBSAN_ALIGNMENT already mentions that it should not be used on all*config builds (and for efficient-unaligned-access architectures), so just refactor the Kconfig to correctly implement this so randconfigs will stop creating insane images that freak out objtool under CONFIG_UBSAN_TRAP (due to the false positives producing functions that never return, etc). Link: http://lkml.kernel.org/r/202005011433.C42EA3E2D@keescook Fixes: 0887a7ebc977 ("ubsan: add trap instrumentation option") Signed-off-by: Kees Cook Reported-by: Randy Dunlap Link: https://lore.kernel.org/linux-next/202004231224.D6B3B650@keescook/ Signed-off-by: Andrew Morton Cc: Josh Poimboeuf Cc: Stephen Rothwell Cc: Peter Zijlstra Cc: Andrey Ryabinin Signed-off-by: Linus Torvalds --- lib/Kconfig.ubsan | 15 ++++++--------- tools/testing/selftests/wireguard/qemu/debug.config | 1 - 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index 48469c95d78e..929211039bac 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan @@ -60,18 +60,15 @@ config UBSAN_SANITIZE_ALL Enabling this option will get kernel image size increased significantly. -config UBSAN_NO_ALIGNMENT - bool "Disable checking of pointers alignment" - default y if HAVE_EFFICIENT_UNALIGNED_ACCESS +config UBSAN_ALIGNMENT + bool "Enable checks for pointers alignment" + default !HAVE_EFFICIENT_UNALIGNED_ACCESS + depends on !X86 || !COMPILE_TEST help - This option disables the check of unaligned memory accesses. - This option should be used when building allmodconfig. - Disabling this option on architectures that support unaligned + This option enables the check of unaligned memory accesses. + Enabling this option on architectures that support unaligned accesses may produce a lot of false positives. -config UBSAN_ALIGNMENT - def_bool !UBSAN_NO_ALIGNMENT - config TEST_UBSAN tristate "Module for testing for undefined behavior detection" depends on m diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config index 5909e7ef2a5c..9803dbb54181 100644 --- a/tools/testing/selftests/wireguard/qemu/debug.config +++ b/tools/testing/selftests/wireguard/qemu/debug.config @@ -25,7 +25,6 @@ CONFIG_KASAN=y CONFIG_KASAN_INLINE=y CONFIG_UBSAN=y CONFIG_UBSAN_SANITIZE_ALL=y -CONFIG_UBSAN_NO_ALIGNMENT=y CONFIG_UBSAN_NULL=y CONFIG_DEBUG_KMEMLEAK=y CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=8192 -- cgit v1.2.3-59-g8ed1b From 14f69140ff9c92a0928547ceefb153a842e8492c Mon Sep 17 00:00:00 2001 From: Henry Willard Date: Thu, 7 May 2020 18:36:27 -0700 Subject: mm: limit boost_watermark on small zones Commit 1c30844d2dfe ("mm: reclaim small amounts of memory when an external fragmentation event occurs") adds a boost_watermark() function which increases the min watermark in a zone by at least pageblock_nr_pages or the number of pages in a page block. On Arm64, with 64K pages and 512M huge pages, this is 8192 pages or 512M. It does this regardless of the number of managed pages managed in the zone or the likelihood of success. This can put the zone immediately under water in terms of allocating pages from the zone, and can cause a small machine to fail immediately due to OoM. Unlike set_recommended_min_free_kbytes(), which substantially increases min_free_kbytes and is tied to THP, boost_watermark() can be called even if THP is not active. The problem is most likely to appear on architectures such as Arm64 where pageblock_nr_pages is very large. It is desirable to run the kdump capture kernel in as small a space as possible to avoid wasting memory. In some architectures, such as Arm64, there are restrictions on where the capture kernel can run, and therefore, the space available. A capture kernel running in 768M can fail due to OoM immediately after boost_watermark() sets the min in zone DMA32, where most of the memory is, to 512M. It fails even though there is over 500M of free memory. With boost_watermark() suppressed, the capture kernel can run successfully in 448M. This patch limits boost_watermark() to boosting a zone's min watermark only when there are enough pages that the boost will produce positive results. In this case that is estimated to be four times as many pages as pageblock_nr_pages. Mel said: : There is no harm in marking it stable. Clearly it does not happen very : often but it's not impossible. 32-bit x86 is a lot less common now : which would previously have been vulnerable to triggering this easily. : ppc64 has a larger base page size but typically only has one zone. : arm64 is likely the most vulnerable, particularly when CMA is : configured with a small movable zone. Fixes: 1c30844d2dfe ("mm: reclaim small amounts of memory when an external fragmentation event occurs") Signed-off-by: Henry Willard Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Acked-by: Mel Gorman Cc: Vlastimil Babka Cc: Link: http://lkml.kernel.org/r/1588294148-6586-1-git-send-email-henry.willard@oracle.com Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f31eda080823..13cc653122b7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2401,6 +2401,14 @@ static inline void boost_watermark(struct zone *zone) if (!watermark_boost_factor) return; + /* + * Don't bother in zones that are unlikely to produce results. + * On small machines, including kdump capture kernels running + * in a small area, boosting the watermark can cause an out of + * memory situation immediately. + */ + if ((pageblock_nr_pages * 4) > zone_managed_pages(zone)) + return; max_boost = mult_frac(zone->_watermark[WMARK_HIGH], watermark_boost_factor, 10000); -- cgit v1.2.3-59-g8ed1b From e6bfb1bf00852b55f4c771f47ae67004c04d3c87 Mon Sep 17 00:00:00 2001 From: Veerabhadrarao Badiganti Date: Wed, 6 May 2020 20:04:02 +0530 Subject: mmc: core: Check request type before completing the request In the request completion path with CQE, request type is being checked after the request is getting completed. This is resulting in returning the wrong request type and leading to the IO hang issue. ASYNC request type is getting returned for DCMD type requests. Because of this mismatch, mq->cqe_busy flag is never getting cleared and the driver is not invoking blk_mq_hw_run_queue. So requests are not getting dispatched to the LLD from the block layer. All these eventually leading to IO hang issues. So, get the request type before completing the request. Cc: Fixes: 1e8e55b67030 ("mmc: block: Add CQE support") Signed-off-by: Veerabhadrarao Badiganti Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/1588775643-18037-2-git-send-email-vbadigan@codeaurora.org Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 8499b56a15a8..c5367e2c8487 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1370,6 +1370,7 @@ static void mmc_blk_cqe_complete_rq(struct mmc_queue *mq, struct request *req) struct mmc_request *mrq = &mqrq->brq.mrq; struct request_queue *q = req->q; struct mmc_host *host = mq->card->host; + enum mmc_issue_type issue_type = mmc_issue_type(mq, req); unsigned long flags; bool put_card; int err; @@ -1399,7 +1400,7 @@ static void mmc_blk_cqe_complete_rq(struct mmc_queue *mq, struct request *req) spin_lock_irqsave(&mq->lock, flags); - mq->in_flight[mmc_issue_type(mq, req)] -= 1; + mq->in_flight[issue_type] -= 1; put_card = (mmc_tot_in_flight(mq) == 0); -- cgit v1.2.3-59-g8ed1b From 39a22f73744d5baee30b5f134ae2e30b668b66ed Mon Sep 17 00:00:00 2001 From: Sarthak Garg Date: Thu, 7 May 2020 21:45:33 +0530 Subject: mmc: core: Fix recursive locking issue in CQE recovery path Consider the following stack trace -001|raw_spin_lock_irqsave -002|mmc_blk_cqe_complete_rq -003|__blk_mq_complete_request(inline) -003|blk_mq_complete_request(rq) -004|mmc_cqe_timed_out(inline) -004|mmc_mq_timed_out mmc_mq_timed_out acquires the queue_lock for the first time. The mmc_blk_cqe_complete_rq function also tries to acquire the same queue lock resulting in recursive locking where the task is spinning for the same lock which it has already acquired leading to watchdog bark. Fix this issue with the lock only for the required critical section. Cc: Fixes: 1e8e55b67030 ("mmc: block: Add CQE support") Suggested-by: Sahitya Tummala Signed-off-by: Sarthak Garg Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/1588868135-31783-1-git-send-email-vbadigan@codeaurora.org Signed-off-by: Ulf Hansson --- drivers/mmc/core/queue.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 25bee3daf9e2..b5fd3bc7eb58 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -107,7 +107,7 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req) case MMC_ISSUE_DCMD: if (host->cqe_ops->cqe_timeout(host, mrq, &recovery_needed)) { if (recovery_needed) - __mmc_cqe_recovery_notifier(mq); + mmc_cqe_recovery_notifier(mrq); return BLK_EH_RESET_TIMER; } /* No timeout (XXX: huh? comment doesn't make much sense) */ @@ -127,18 +127,13 @@ static enum blk_eh_timer_return mmc_mq_timed_out(struct request *req, struct mmc_card *card = mq->card; struct mmc_host *host = card->host; unsigned long flags; - int ret; + bool ignore_tout; spin_lock_irqsave(&mq->lock, flags); - - if (mq->recovery_needed || !mq->use_cqe || host->hsq_enabled) - ret = BLK_EH_RESET_TIMER; - else - ret = mmc_cqe_timed_out(req); - + ignore_tout = mq->recovery_needed || !mq->use_cqe || host->hsq_enabled; spin_unlock_irqrestore(&mq->lock, flags); - return ret; + return ignore_tout ? BLK_EH_RESET_TIMER : mmc_cqe_timed_out(req); } static void mmc_mq_recovery_handler(struct work_struct *work) -- cgit v1.2.3-59-g8ed1b From c077dc5e0620508a29497dac63a2822324ece52a Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 8 May 2020 09:22:27 +0300 Subject: mmc: block: Fix request completion in the CQE timeout path First, it should be noted that the CQE timeout (60 seconds) is substantial so a CQE request that times out is really stuck, and the race between timeout and completion is extremely unlikely. Nevertheless this patch fixes an issue with it. Commit ad73d6feadbd7b ("mmc: complete requests from ->timeout") preserved the existing functionality, to complete the request. However that had only been necessary because the block layer timeout handler had been marking the request to prevent it from being completed normally. That restriction was removed at the same time, the result being that a request that has gone will have been completed anyway. That is, the completion was unnecessary. At the time, the unnecessary completion was harmless because the block layer would ignore it, although that changed in kernel v5.0. Note for stable, this patch will not apply cleanly without patch "mmc: core: Fix recursive locking issue in CQE recovery path" Signed-off-by: Adrian Hunter Fixes: ad73d6feadbd7b ("mmc: complete requests from ->timeout") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200508062227.23144-1-adrian.hunter@intel.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/queue.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index b5fd3bc7eb58..4b1eb89b401d 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -110,8 +110,7 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req) mmc_cqe_recovery_notifier(mrq); return BLK_EH_RESET_TIMER; } - /* No timeout (XXX: huh? comment doesn't make much sense) */ - blk_mq_complete_request(req); + /* The request has gone already */ return BLK_EH_DONE; default: /* Timeout is handled by mmc core */ -- cgit v1.2.3-59-g8ed1b From a8b7528b69d4dc7e94d0338851ff8c929231fc4b Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 23 Apr 2020 14:30:57 -0500 Subject: gfs2: Fix error exit in do_xmote Before this patch, if an error was detected from glock function go_sync by function do_xmote, it would return. But the function had temporarily unlocked the gl_lockref spin_lock, and it never re-locked it. When the caller of do_xmote tried to unlock it again, it was already unlocked, which resulted in a corrupted spin_lock value. This patch makes sure the gl_lockref spin_lock is re-locked after it is unlocked. Thanks to Wu Bo for reporting this problem. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 29f9b6684b74..a1c5f245553f 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -613,7 +613,7 @@ __acquires(&gl->gl_lockref.lock) fs_err(sdp, "Error %d syncing glock \n", ret); gfs2_dump_glock(NULL, gl, true); } - return; + goto out; } } if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) { -- cgit v1.2.3-59-g8ed1b From 53af80ce0eaeb0fc4ce4b565c30e3a16e8e05de0 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 24 Apr 2020 12:15:21 -0500 Subject: gfs2: Fix BUG during unmount after file system withdraw Before this patch, when the logd daemon was forced to withdraw, it would try to request its journal be recovered by another cluster node. However, in single-user cases with lock_nolock, there are no other nodes to recover the journal. Function signal_our_withdraw() was recognizing the lock_nolock situation, but not until after it had evicted its journal inode. Since the journal descriptor that points to the inode was never removed from the master list, when the unmount occurred, it did another iput on the evicted inode, which resulted in a BUG_ON(inode->i_state & I_CLEAR). This patch moves the check for this situation earlier in function signal_our_withdraw(), which avoids the extra iput, so the unmount may happen normally. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/util.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 9b64d40ab379..aa087a5675af 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -119,6 +119,12 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) if (!sb_rdonly(sdp->sd_vfs)) ret = gfs2_make_fs_ro(sdp); + if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */ + if (!ret) + ret = -EIO; + clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); + goto skip_recovery; + } /* * Drop the glock for our journal so another node can recover it. */ @@ -159,10 +165,6 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) wait_on_bit(&gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE); } - if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */ - clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); - goto skip_recovery; - } /* * Dequeue the "live" glock, but keep a reference so it's never freed. */ -- cgit v1.2.3-59-g8ed1b From d22f69a08dcb0f469170cda1976d5938cb0e5dcf Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 24 Apr 2020 12:17:33 -0500 Subject: gfs2: Fix use-after-free in gfs2_logd after withdraw When the gfs2_logd daemon withdrew, the withdraw sequence called into make_fs_ro() to make the file system read-only. That caused the journal descriptors to be freed. However, those journal descriptors were used by gfs2_logd's call to gfs2_ail_flush_reqd(). This caused a use-after free and NULL pointer dereference. This patch changes function gfs2_logd() so that it stops all logd work until the thread is told to stop. Once a withdraw is done, it only does an interruptible sleep. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/log.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 3a75843ae580..cf0b80c78c82 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -1131,6 +1131,10 @@ int gfs2_logd(void *data) while (!kthread_should_stop()) { + if (gfs2_withdrawn(sdp)) { + msleep_interruptible(HZ); + continue; + } /* Check for errors writing to the journal */ if (sdp->sd_log_error) { gfs2_lm(sdp, @@ -1139,6 +1143,7 @@ int gfs2_logd(void *data) "prevent further damage.\n", sdp->sd_fsname, sdp->sd_log_error); gfs2_withdraw(sdp); + continue; } did_flush = false; -- cgit v1.2.3-59-g8ed1b From 566a2ab3c9005f62e784bd39022d58d34ef4365c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 20 Apr 2020 19:42:04 +0200 Subject: gfs2: Another gfs2_walk_metadata fix Make sure we don't walk past the end of the metadata in gfs2_walk_metadata: the inode holds fewer pointers than indirect blocks. Slightly clean up gfs2_iomap_get. Fixes: a27a0c9b6a20 ("gfs2: gfs2_walk_metadata fix") Cc: stable@vger.kernel.org # v5.3+ Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson --- fs/gfs2/bmap.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 936a8ec6b48e..6306eaae378b 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -528,10 +528,12 @@ lower_metapath: /* Advance in metadata tree. */ (mp->mp_list[hgt])++; - if (mp->mp_list[hgt] >= sdp->sd_inptrs) { - if (!hgt) + if (hgt) { + if (mp->mp_list[hgt] >= sdp->sd_inptrs) + goto lower_metapath; + } else { + if (mp->mp_list[hgt] >= sdp->sd_diptrs) break; - goto lower_metapath; } fill_up_metapath: @@ -876,10 +878,9 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, ret = -ENOENT; goto unlock; } else { - /* report a hole */ iomap->offset = pos; iomap->length = length; - goto do_alloc; + goto hole_found; } } iomap->length = size; @@ -933,8 +934,6 @@ unlock: return ret; do_alloc: - iomap->addr = IOMAP_NULL_ADDR; - iomap->type = IOMAP_HOLE; if (flags & IOMAP_REPORT) { if (pos >= size) ret = -ENOENT; @@ -956,6 +955,9 @@ do_alloc: if (pos < size && height == ip->i_height) ret = gfs2_hole_size(inode, lblock, len, mp, iomap); } +hole_found: + iomap->addr = IOMAP_NULL_ADDR; + iomap->type = IOMAP_HOLE; goto out; } -- cgit v1.2.3-59-g8ed1b From aa83da7f47b26c9587bade6c4bc4736ffa308f0a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 28 Apr 2020 01:15:41 +0200 Subject: gfs2: More gfs2_find_jhead fixes It turns out that when extending an existing bio, gfs2_find_jhead fails to check if the block number is consecutive, which leads to incorrect reads for fragmented journals. In addition, limit the maximum bio size to an arbitrary value of 2 megabytes: since commit 07173c3ec276 ("block: enable multipage bvecs"), if we just keep adding pages until bio_add_page fails, bios will grow much larger than useful, which pins more memory than necessary with barely any additional performance gains. Fixes: f4686c26ecc3 ("gfs2: read journal in large chunks") Cc: stable@vger.kernel.org # v5.2+ Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson --- fs/gfs2/lops.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 5ea96757afc4..48b54ec1c793 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -263,7 +263,7 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno, struct super_block *sb = sdp->sd_vfs; struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); - bio->bi_iter.bi_sector = blkno << (sb->s_blocksize_bits - 9); + bio->bi_iter.bi_sector = blkno << sdp->sd_fsb2bb_shift; bio_set_dev(bio, sb->s_bdev); bio->bi_end_io = end_io; bio->bi_private = sdp; @@ -509,7 +509,7 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, unsigned int bsize = sdp->sd_sb.sb_bsize, off; unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift; unsigned int shift = PAGE_SHIFT - bsize_shift; - unsigned int readahead_blocks = BIO_MAX_PAGES << shift; + unsigned int max_bio_size = 2 * 1024 * 1024; struct gfs2_journal_extent *je; int sz, ret = 0; struct bio *bio = NULL; @@ -537,12 +537,17 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, off = 0; } - if (!bio || (bio_chained && !off)) { + if (!bio || (bio_chained && !off) || + bio->bi_iter.bi_size >= max_bio_size) { /* start new bio */ } else { - sz = bio_add_page(bio, page, bsize, off); - if (sz == bsize) - goto block_added; + sector_t sector = dblock << sdp->sd_fsb2bb_shift; + + if (bio_end_sector(bio) == sector) { + sz = bio_add_page(bio, page, bsize, off); + if (sz == bsize) + goto block_added; + } if (off) { unsigned int blocks = (PAGE_SIZE - off) >> bsize_shift; @@ -568,7 +573,7 @@ block_added: off += bsize; if (off == PAGE_SIZE) page = NULL; - if (blocks_submitted < blocks_read + readahead_blocks) { + if (blocks_submitted < 2 * max_bio_size >> bsize_shift) { /* Keep at least one bio in flight */ continue; } -- cgit v1.2.3-59-g8ed1b From fb3637a113349f53830f7d6ca45891b7192cd28f Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 5 May 2020 20:47:47 +0200 Subject: iommu/virtio: Reverse arguments to list_add Elsewhere in the file, there is a list_for_each_entry with &vdev->resv_regions as the second argument, suggesting that &vdev->resv_regions is the list head. So exchange the arguments on the list_add call to put the list head in the second argument. Fixes: 2a5a31487445 ("iommu/virtio: Add probe request") Signed-off-by: Julia Lawall Reviewed-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/1588704467-13431-1-git-send-email-Julia.Lawall@inria.fr Signed-off-by: Joerg Roedel --- drivers/iommu/virtio-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index d5cac4f46ca5..4e1d11af23c8 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -453,7 +453,7 @@ static int viommu_add_resv_mem(struct viommu_endpoint *vdev, if (!region) return -ENOMEM; - list_add(&vdev->resv_regions, ®ion->list); + list_add(®ion->list, &vdev->resv_regions); return 0; } -- cgit v1.2.3-59-g8ed1b From 3f2c788a13143620c5471ac96ac4f033fc9ac3f3 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 7 May 2020 12:32:14 +0200 Subject: fork: prevent accidental access to clone3 features Jan reported an issue where an interaction between sign-extending clone's flag argument on ppc64le and the new CLONE_INTO_CGROUP feature causes clone() to consistently fail with EBADF. The whole story is a little longer. The legacy clone() syscall is odd in a bunch of ways and here two things interact. First, legacy clone's flag argument is word-size dependent, i.e. it's an unsigned long whereas most system calls with flag arguments use int or unsigned int. Second, legacy clone() ignores unknown and deprecated flags. The two of them taken together means that users on 64bit systems can pass garbage for the upper 32bit of the clone() syscall since forever and things would just work fine. Just try this on a 64bit kernel prior to v5.7-rc1 where this will succeed and on v5.7-rc1 where this will fail with EBADF: int main(int argc, char *argv[]) { pid_t pid; /* Note that legacy clone() has different argument ordering on * different architectures so this won't work everywhere. * * Only set the upper 32 bits. */ pid = syscall(__NR_clone, 0xffffffff00000000 | SIGCHLD, NULL, NULL, NULL, NULL); if (pid < 0) exit(EXIT_FAILURE); if (pid == 0) exit(EXIT_SUCCESS); if (wait(NULL) != pid) exit(EXIT_FAILURE); exit(EXIT_SUCCESS); } Since legacy clone() couldn't be extended this was not a problem so far and nobody really noticed or cared since nothing in the kernel ever bothered to look at the upper 32 bits. But once we introduced clone3() and expanded the flag argument in struct clone_args to 64 bit we opened this can of worms. With the first flag-based extension to clone3() making use of the upper 32 bits of the flag argument we've effectively made it possible for the legacy clone() syscall to reach clone3() only flags. The sign extension scenario is just the odd corner-case that we needed to figure this out. The reason we just realized this now and not already when we introduced CLONE_CLEAR_SIGHAND was that CLONE_INTO_CGROUP assumes that a valid cgroup file descriptor has been given. So the sign extension (or the user accidently passing garbage for the upper 32 bits) caused the CLONE_INTO_CGROUP bit to be raised and the kernel to error out when it didn't find a valid cgroup file descriptor. Let's fix this by always capping the upper 32 bits for all codepaths that are not aware of clone3() features. This ensures that we can't reach clone3() only features by accident via legacy clone as with the sign extension case and also that legacy clone() works exactly like before, i.e. ignoring any unknown flags. This solution risks no regressions and is also pretty clean. Fixes: 7f192e3cd316 ("fork: add clone3") Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups") Reported-by: Jan Stancek Signed-off-by: Christian Brauner Cc: Arnd Bergmann Cc: Dmitry V. Levin Cc: Andreas Schwab Cc: Florian Weimer Cc: libc-alpha@sourceware.org Cc: stable@vger.kernel.org # 5.3+ Link: https://sourceware.org/pipermail/libc-alpha/2020-May/113596.html Link: https://lore.kernel.org/r/20200507103214.77218-1-christian.brauner@ubuntu.com --- kernel/fork.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 8c700f881d92..48ed22774efa 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2486,11 +2486,11 @@ long do_fork(unsigned long clone_flags, int __user *child_tidptr) { struct kernel_clone_args args = { - .flags = (clone_flags & ~CSIGNAL), + .flags = (lower_32_bits(clone_flags) & ~CSIGNAL), .pidfd = parent_tidptr, .child_tid = child_tidptr, .parent_tid = parent_tidptr, - .exit_signal = (clone_flags & CSIGNAL), + .exit_signal = (lower_32_bits(clone_flags) & CSIGNAL), .stack = stack_start, .stack_size = stack_size, }; @@ -2508,8 +2508,9 @@ long do_fork(unsigned long clone_flags, pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) { struct kernel_clone_args args = { - .flags = ((flags | CLONE_VM | CLONE_UNTRACED) & ~CSIGNAL), - .exit_signal = (flags & CSIGNAL), + .flags = ((lower_32_bits(flags) | CLONE_VM | + CLONE_UNTRACED) & ~CSIGNAL), + .exit_signal = (lower_32_bits(flags) & CSIGNAL), .stack = (unsigned long)fn, .stack_size = (unsigned long)arg, }; @@ -2570,11 +2571,11 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, #endif { struct kernel_clone_args args = { - .flags = (clone_flags & ~CSIGNAL), + .flags = (lower_32_bits(clone_flags) & ~CSIGNAL), .pidfd = parent_tidptr, .child_tid = child_tidptr, .parent_tid = parent_tidptr, - .exit_signal = (clone_flags & CSIGNAL), + .exit_signal = (lower_32_bits(clone_flags) & CSIGNAL), .stack = newsp, .tls = tls, }; -- cgit v1.2.3-59-g8ed1b From 4bb9d46d47b105a774f9dca642f5271375bca4b2 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 8 May 2020 04:56:10 +0000 Subject: kselftests: dmabuf-heaps: Fix confused return value on expected error testing When I added the expected error testing, I forgot I need to set the return to zero when we successfully see an error. Without this change we only end up testing a single heap before the test quits. Cc: Shuah Khan Cc: Sumit Semwal Cc: Benjamin Gaignard Cc: Brian Starkey Cc: Laura Abbott Cc: "Andrew F. Davis" Cc: linux-kselftest@vger.kernel.org Signed-off-by: John Stultz Signed-off-by: Shuah Khan --- tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c index cd5e1f602ac9..909da9cdda97 100644 --- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c +++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c @@ -351,6 +351,7 @@ static int test_alloc_errors(char *heap_name) } printf("Expected error checking passed\n"); + ret = 0; out: if (dmabuf_fd >= 0) close(dmabuf_fd); -- cgit v1.2.3-59-g8ed1b From d8238f9eb6e0c175c8b657af20164eef6b30c71c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 May 2020 13:56:08 -0500 Subject: tools/testing: Replace zero-length array with flexible-array The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] sizeof(flexible-array-member) triggers a warning because flexible array members have incomplete type[1]. There are some instances of code in which the sizeof operator is being incorrectly/erroneously applied to zero-length arrays and the result is zero. Such instances may be hiding some bugs. So, this work (flexible-array member conversions) will also help to get completely rid of those sorts of issues. This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Shuah Khan --- tools/testing/selftests/nsfs/pidns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/nsfs/pidns.c b/tools/testing/selftests/nsfs/pidns.c index e0d86e1668c0..e3c772c6a7c7 100644 --- a/tools/testing/selftests/nsfs/pidns.c +++ b/tools/testing/selftests/nsfs/pidns.c @@ -27,7 +27,7 @@ #define __stack_aligned__ __attribute__((aligned(16))) struct cr_clone_arg { char stack[128] __stack_aligned__; - char stack_ptr[0]; + char stack_ptr[]; }; static int child(void *args) -- cgit v1.2.3-59-g8ed1b From adb571649c7ce7ee16022fa302b62043d1812b4b Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Tue, 5 May 2020 18:14:45 +0800 Subject: selftests/ftrace: mark irqsoff_tracer.tc test as unresolved if the test module does not exist The UNRESOLVED state is much more apporiate than the UNSUPPORTED state for the absence of the test module, as it matches "test was set up incorrectly" situation in the README file. A possible scenario is that the function was enabled (supported by the kernel) but the module was not installed properly, in this case we cannot call this as UNSUPPORTED. This change also make it consistent with other module-related tests in ftrace. Signed-off-by: Po-Hsu Lin Acked-by: Steven Rostedt (VMware) Acked-by: Masami Hiramatsu Signed-off-by: Shuah Khan --- .../testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc index cbd174334a48..2b82c80edf69 100644 --- a/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc +++ b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc @@ -17,7 +17,14 @@ unsup() { #msg exit_unsupported } -modprobe $MOD || unsup "$MOD module not available" +unres() { #msg + reset_tracer + rmmod $MOD || true + echo $1 + exit_unresolved +} + +modprobe $MOD || unres "$MOD module not available" rmmod $MOD grep -q "preemptoff" available_tracers || unsup "preemptoff tracer not enabled" -- cgit v1.2.3-59-g8ed1b From f131d9edc29d527df4b8f6840c340415f559f095 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 8 May 2020 16:53:55 +1000 Subject: selftests/lkdtm: Don't clear dmesg when running tests It is Very Rude to clear dmesg in test scripts. That's because the script may be part of a larger test run, and clearing dmesg potentially destroys the output of other tests. We can avoid using dmesg -c by saving the content of dmesg before the test, and then using diff to compare that to the dmesg afterward, producing a log with just the added lines. Signed-off-by: Michael Ellerman Acked-by: Kees Cook Signed-off-by: Shuah Khan --- tools/testing/selftests/lkdtm/run.sh | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/lkdtm/run.sh b/tools/testing/selftests/lkdtm/run.sh index dadf819148a4..0b409e187c7b 100755 --- a/tools/testing/selftests/lkdtm/run.sh +++ b/tools/testing/selftests/lkdtm/run.sh @@ -59,23 +59,25 @@ if [ -z "$expect" ]; then expect="call trace:" fi -# Clear out dmesg for output reporting -dmesg -c >/dev/null - # Prepare log for report checking -LOG=$(mktemp --tmpdir -t lkdtm-XXXXXX) +LOG=$(mktemp --tmpdir -t lkdtm-log-XXXXXX) +DMESG=$(mktemp --tmpdir -t lkdtm-dmesg-XXXXXX) cleanup() { - rm -f "$LOG" + rm -f "$LOG" "$DMESG" } trap cleanup EXIT +# Save existing dmesg so we can detect new content below +dmesg > "$DMESG" + # Most shells yell about signals and we're expecting the "cat" process # to usually be killed by the kernel. So we have to run it in a sub-shell # and silence errors. ($SHELL -c 'cat <(echo '"$test"') >'"$TRIGGER" 2>/dev/null) || true # Record and dump the results -dmesg -c >"$LOG" +dmesg | diff --changed-group-format='%>' --unchanged-group-format='' "$DMESG" - > "$LOG" || true + cat "$LOG" # Check for expected output if egrep -qi "$expect" "$LOG" ; then -- cgit v1.2.3-59-g8ed1b From 851c4df54dc1bcae41d07e46e3d89e035b0a7140 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 8 May 2020 16:53:56 +1000 Subject: selftests/lkdtm: Use grep -E instead of egrep shellcheck complains that egrep is deprecated, and the grep man page agrees. Use grep -E instead. Signed-off-by: Michael Ellerman Acked-by: Kees Cook Signed-off-by: Shuah Khan --- tools/testing/selftests/lkdtm/run.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/lkdtm/run.sh b/tools/testing/selftests/lkdtm/run.sh index 0b409e187c7b..ee64ff8df8f4 100755 --- a/tools/testing/selftests/lkdtm/run.sh +++ b/tools/testing/selftests/lkdtm/run.sh @@ -25,13 +25,13 @@ fi # Figure out which test to run from our script name. test=$(basename $0 .sh) # Look up details about the test from master list of LKDTM tests. -line=$(egrep '^#?'"$test"'\b' tests.txt) +line=$(grep -E '^#?'"$test"'\b' tests.txt) if [ -z "$line" ]; then echo "Skipped: missing test '$test' in tests.txt" exit $KSELFTEST_SKIP_TEST fi # Check that the test is known to LKDTM. -if ! egrep -q '^'"$test"'$' "$TRIGGER" ; then +if ! grep -E -q '^'"$test"'$' "$TRIGGER" ; then echo "Skipped: test '$test' missing in $TRIGGER!" exit $KSELFTEST_SKIP_TEST fi @@ -80,11 +80,11 @@ dmesg | diff --changed-group-format='%>' --unchanged-group-format='' "$DMESG" - cat "$LOG" # Check for expected output -if egrep -qi "$expect" "$LOG" ; then +if grep -E -qi "$expect" "$LOG" ; then echo "$test: saw '$expect': ok" exit 0 else - if egrep -qi XFAIL: "$LOG" ; then + if grep -E -qi XFAIL: "$LOG" ; then echo "$test: saw 'XFAIL': [SKIP]" exit $KSELFTEST_SKIP_TEST else -- cgit v1.2.3-59-g8ed1b From 12ae44a40a1be891bdc6463f8c7072b4ede746ef Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Tue, 5 May 2020 13:59:02 +0100 Subject: ceph: demote quotarealm lookup warning to a debug message A misconfigured cephx can easily result in having the kernel client flooding the logs with: ceph: Can't lookup inode 1 (err: -13) Change this message to debug level. Cc: stable@vger.kernel.org URL: https://tracker.ceph.com/issues/44546 Signed-off-by: Luis Henriques Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/quota.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index de56dee60540..19507e2fdb57 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -159,8 +159,8 @@ static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc, } if (IS_ERR(in)) { - pr_warn("Can't lookup inode %llx (err: %ld)\n", - realm->ino, PTR_ERR(in)); + dout("Can't lookup inode %llx (err: %ld)\n", + realm->ino, PTR_ERR(in)); qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */ } else { qri->timeout = 0; -- cgit v1.2.3-59-g8ed1b From 2297ab6144c2e85c418d0fd47b2f24e294b55dca Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 4 May 2020 10:18:43 -0500 Subject: gfs2: Fix problems regarding gfs2_qa_get and _put This patch fixes a couple of places in which gfs2_qa_get and gfs2_qa_put are not balanced: we now keep references around whenever a file is open for writing (see gfs2_open_common and gfs2_release), so we need to put all references we grab in function gfs2_create_inode. This was broken in the successful case and on one error path. This also means that we don't have a reference to put in gfs2_evict_inode. In addition, gfs2_qa_put was called for the wrong inode in gfs2_link. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/inode.c | 7 ++++--- fs/gfs2/super.c | 1 - 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 70b2d3a1e866..5acd3ce30759 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -622,7 +622,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = finish_no_open(file, NULL); } gfs2_glock_dq_uninit(ghs); - return error; + goto fail; } else if (error != -ENOENT) { goto fail_gunlock; } @@ -764,9 +764,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = finish_open(file, dentry, gfs2_open_common); } gfs2_glock_dq_uninit(ghs); + gfs2_qa_put(ip); gfs2_glock_dq_uninit(ghs + 1); clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags); gfs2_glock_put(io_gl); + gfs2_qa_put(dip); return error; fail_gunlock3: @@ -776,7 +778,6 @@ fail_gunlock2: clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags); gfs2_glock_put(io_gl); fail_free_inode: - gfs2_qa_put(ip); if (ip->i_gl) { glock_clear_object(ip->i_gl, ip); gfs2_glock_put(ip->i_gl); @@ -1005,7 +1006,7 @@ out_gunlock: out_child: gfs2_glock_dq(ghs); out_parent: - gfs2_qa_put(ip); + gfs2_qa_put(dip); gfs2_holder_uninit(ghs); gfs2_holder_uninit(ghs + 1); return error; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 37fc41632aa2..956fced0a8ec 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1404,7 +1404,6 @@ out: if (ip->i_qadata) gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0); gfs2_rs_delete(ip, NULL); - gfs2_qa_put(ip); gfs2_ordered_del_inode(ip); clear_inode(inode); gfs2_dir_hash_inval(ip); -- cgit v1.2.3-59-g8ed1b From f9615fe3113f1067093f7f68d46d281b4c9a5978 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 5 May 2020 11:50:24 -0500 Subject: gfs2: Change BUG_ON to an assert_withdraw in gfs2_quota_change Before this patch, gfs2_quota_change() would BUG_ON if the qa_ref counter was not a positive number. This patch changes it to be a withdraw instead. That way we can debug things more easily. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/quota.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index cc0c4b5800be..a62be4267c17 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1270,7 +1270,9 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, if (ip->i_diskflags & GFS2_DIF_SYSTEM) return; - BUG_ON(ip->i_qadata->qa_ref <= 0); + if (gfs2_assert_withdraw(sdp, ip->i_qadata && + ip->i_qadata->qa_ref > 0)) + return; for (x = 0; x < ip->i_qadata->qa_qd_num; x++) { qd = ip->i_qadata->qa_qd[x]; -- cgit v1.2.3-59-g8ed1b From e6ce26e571a813e6992c5148b27f2a4b17952080 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 5 May 2020 11:52:51 -0500 Subject: gfs2: remove check for quotas on in gfs2_quota_check This patch removes a check from gfs2_quota_check for whether quotas are enabled by the superblock. There is a test just prior for the GIF_QD_LOCKED bit in the inode, and that can only be set by functions that already check that quotas are enabled in the superblock. Therefore, the check is redundant. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/quota.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index a62be4267c17..c997cadc8d9a 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1210,9 +1210,6 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid, if (!test_bit(GIF_QD_LOCKED, &ip->i_flags)) return 0; - if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) - return 0; - for (x = 0; x < ip->i_qadata->qa_qd_num; x++) { qd = ip->i_qadata->qa_qd[x]; -- cgit v1.2.3-59-g8ed1b From 4ed0c30811cb4d30ef89850b787a53a84d5d2bcb Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 5 May 2020 11:53:21 -0500 Subject: gfs2: move privileged user check to gfs2_quota_lock_check Before this patch, function gfs2_quota_lock checked if it was called from a privileged user, and if so, it bypassed the quota check: superuser can operate outside the quotas. That's the wrong place for the check because the lock/unlock functions are separate from the lock_check function, and you can do lock and unlock without actually checking the quotas. This patch moves the check to gfs2_quota_lock_check. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/quota.c | 3 +-- fs/gfs2/quota.h | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index c997cadc8d9a..ed2e488f98b3 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1051,8 +1051,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) u32 x; int error = 0; - if (capable(CAP_SYS_RESOURCE) || - sdp->sd_args.ar_quota != GFS2_QUOTA_ON) + if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) return 0; error = gfs2_quota_hold(ip, uid, gid); diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 7f9ca8ef40fc..21ada332d555 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -44,7 +44,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip, int ret; ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */ - if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) + if (capable(CAP_SYS_RESOURCE) || + sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) return 0; ret = gfs2_quota_lock(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); if (ret) -- cgit v1.2.3-59-g8ed1b From c9cb9e381985bbbe8acd2695bbe6bd24bf06b81c Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 5 May 2020 11:55:03 -0500 Subject: gfs2: don't call quota_unhold if quotas are not locked Before this patch, function gfs2_quota_unlock checked if quotas are turned off, and if so, it branched to label out, which called gfs2_quota_unhold. With the new system of gfs2_qa_get and put, we no longer want to call gfs2_quota_unhold or we won't balance our gets and puts. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/quota.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index ed2e488f98b3..8259fef3f986 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1124,7 +1124,7 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) int found; if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags)) - goto out; + return; for (x = 0; x < ip->i_qadata->qa_qd_num; x++) { struct gfs2_quota_data *qd; @@ -1161,7 +1161,6 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) qd_unlock(qda[x]); } -out: gfs2_quota_unhold(ip); } -- cgit v1.2.3-59-g8ed1b From f4e2f5e1a527ce58fc9f85145b03704779a3123e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 5 May 2020 11:56:46 -0500 Subject: gfs2: Grab glock reference sooner in gfs2_add_revoke This patch rearranges gfs2_add_revoke so that the extra glock reference is added earlier on in the function to avoid races in which the glock is freed before the new reference is taken. Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson --- fs/gfs2/log.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index cf0b80c78c82..0644e58c6191 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -669,13 +669,13 @@ void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) struct buffer_head *bh = bd->bd_bh; struct gfs2_glock *gl = bd->bd_gl; + sdp->sd_log_num_revoke++; + if (atomic_inc_return(&gl->gl_revokes) == 1) + gfs2_glock_hold(gl); bh->b_private = NULL; bd->bd_blkno = bh->b_blocknr; gfs2_remove_from_ail(bd); /* drops ref on bh */ bd->bd_bh = NULL; - sdp->sd_log_num_revoke++; - if (atomic_inc_return(&gl->gl_revokes) == 1) - gfs2_glock_hold(gl); set_bit(GLF_LFLUSH, &gl->gl_flags); list_add(&bd->bd_list, &sdp->sd_log_revokes); } -- cgit v1.2.3-59-g8ed1b From ee79be181aee2f59522fdf81c580aaed5753e3af Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 30 Apr 2020 11:15:19 +0800 Subject: drm/amdgpu: disable MGCG/MGLS also on gfx CG ungate Otherwise, MGCG/MGLS will be left enabled. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index f92c158d89a1..0c98f705da0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4273,7 +4273,7 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, /* === CGCG /CGLS for GFX 3D Only === */ gfx_v10_0_update_3d_clock_gating(adev, enable); /* === MGCG + MGLS === */ - /* gfx_v10_0_update_medium_grain_clock_gating(adev, enable); */ + gfx_v10_0_update_medium_grain_clock_gating(adev, enable); } if (adev->cg_flags & -- cgit v1.2.3-59-g8ed1b From 1fe48ec08d9f2e26d893a6c05bd6c99a3490f9ef Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 30 Apr 2020 11:24:02 +0800 Subject: drm/amdgpu: drop unnecessary cancel_delayed_work_sync on PG ungate As this is already properly handled in amdgpu_gfx_off_ctrl(). In fact, this unnecessary cancel_delayed_work_sync may leave a small time window for race condition and is dangerous. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 6 +----- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 12 +++--------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 0c98f705da0e..d35ac7407c8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4353,11 +4353,7 @@ static int gfx_v10_0_set_powergating_state(void *handle, switch (adev->asic_type) { case CHIP_NAVI10: case CHIP_NAVI14: - if (!enable) { - amdgpu_gfx_off_ctrl(adev, false); - cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); - } else - amdgpu_gfx_off_ctrl(adev, true); + amdgpu_gfx_off_ctrl(adev, enable); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 0c390485bc10..0127f1ce16e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -5025,10 +5025,9 @@ static int gfx_v9_0_set_powergating_state(void *handle, switch (adev->asic_type) { case CHIP_RAVEN: case CHIP_RENOIR: - if (!enable) { + if (!enable) amdgpu_gfx_off_ctrl(adev, false); - cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); - } + if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); @@ -5052,12 +5051,7 @@ static int gfx_v9_0_set_powergating_state(void *handle, amdgpu_gfx_off_ctrl(adev, true); break; case CHIP_VEGA12: - if (!enable) { - amdgpu_gfx_off_ctrl(adev, false); - cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); - } else { - amdgpu_gfx_off_ctrl(adev, true); - } + amdgpu_gfx_off_ctrl(adev, enable); break; default: break; -- cgit v1.2.3-59-g8ed1b From f4fcfa4282c1a1bf51475ebb0ffda623eebf1191 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 30 Apr 2020 14:38:39 +0800 Subject: drm/amd/powerplay: perform PG ungate prior to CG ungate Since gfxoff should be disabled first before trying to access those GC registers. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 6 +++--- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index e4e5a53b2b4e..8e2acb4df860 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -319,12 +319,12 @@ static void pp_dpm_en_umd_pstate(struct pp_hwmgr *hwmgr, if (*level & profile_mode_mask) { hwmgr->saved_dpm_level = hwmgr->dpm_level; hwmgr->en_umd_pstate = true; - amdgpu_device_ip_set_clockgating_state(hwmgr->adev, - AMD_IP_BLOCK_TYPE_GFX, - AMD_CG_STATE_UNGATE); amdgpu_device_ip_set_powergating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, + AMD_IP_BLOCK_TYPE_GFX, + AMD_CG_STATE_UNGATE); } } else { /* exit umd pstate, restore level, enable gfx cg*/ diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index e8b27fab6aa1..09fa685b811b 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -1744,12 +1744,12 @@ static int smu_enable_umd_pstate(void *handle, if (*level & profile_mode_mask) { smu_dpm_ctx->saved_dpm_level = smu_dpm_ctx->dpm_level; smu_dpm_ctx->enable_umd_pstate = true; - amdgpu_device_ip_set_clockgating_state(smu->adev, - AMD_IP_BLOCK_TYPE_GFX, - AMD_CG_STATE_UNGATE); amdgpu_device_ip_set_powergating_state(smu->adev, AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); + amdgpu_device_ip_set_clockgating_state(smu->adev, + AMD_IP_BLOCK_TYPE_GFX, + AMD_CG_STATE_UNGATE); } } else { /* exit umd pstate, restore level, enable gfx cg*/ -- cgit v1.2.3-59-g8ed1b From a6aacb2b26e85aa619cf0c6f98d0ca77314cd2a1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 5 May 2020 09:42:26 -0400 Subject: drm/amdgpu: force fbdev into vram MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We set the fb smem pointer to the offset into the BAR, so keep the fbdev bo in vram. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=207581 Fixes: 6c8d74caa2fa33 ("drm/amdgpu: Enable scatter gather display support") Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 9ae7b61f696a..25ddb482466a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -133,8 +133,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, u32 cpp; u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED | - AMDGPU_GEM_CREATE_CPU_GTT_USWC; + AMDGPU_GEM_CREATE_VRAM_CLEARED; info = drm_get_format_info(adev->ddev, mode_cmd); cpp = info->cpp[0]; -- cgit v1.2.3-59-g8ed1b From 39b3128d7ffd44e400e581e6f49e88cb42bef9a1 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 5 May 2020 14:02:43 -0400 Subject: drm/amdgpu: Use GEM obj reference for KFD BOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Releasing the AMDGPU BO ref directly leads to problems when BOs were exported as DMA bufs. Releasing the GEM reference makes sure that the AMDGPU/TTM BO is not freed too early. Also take a GEM reference when importing BOs from DMABufs to keep references to imported BOs balances properly. Signed-off-by: Felix Kuehling Tested-by: Alex Sierra Acked-by: Christian König Reviewed-by: Alex Sierra Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 9dff792c9290..6a5b91d23fd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1343,7 +1343,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( } /* Free the BO*/ - amdgpu_bo_unref(&mem->bo); + drm_gem_object_put_unlocked(&mem->bo->tbo.base); mutex_destroy(&mem->lock); kfree(mem); @@ -1688,7 +1688,8 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE; - (*mem)->bo = amdgpu_bo_ref(bo); + drm_gem_object_get(&bo->tbo.base); + (*mem)->bo = bo; (*mem)->va = va; (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; -- cgit v1.2.3-59-g8ed1b From b2b6290a23986a5c88384887b8a589a3c4ebe292 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 7 May 2020 18:17:55 +0800 Subject: drm/amdgpu: enable hibernate support on Navi1X BACO is needed to support hibernate on Navi1X. Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++ drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2992a49ad4a5..8ac1581a6b53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -945,6 +945,7 @@ struct amdgpu_device { /* s3/s4 mask */ bool in_suspend; + bool in_hibernate; /* record last mm index being written through WREG32*/ unsigned long last_mm_index; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 466bfe541e45..a735d79a717b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1181,7 +1181,9 @@ static int amdgpu_pmops_freeze(struct device *dev) struct amdgpu_device *adev = drm_dev->dev_private; int r; + adev->in_hibernate = true; r = amdgpu_device_suspend(drm_dev, true); + adev->in_hibernate = false; if (r) return r; return amdgpu_asic_reset(adev); diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 09fa685b811b..e77046931e4c 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -1476,7 +1476,7 @@ static int smu_disable_dpm(struct smu_context *smu) bool use_baco = !smu->is_apu && ((adev->in_gpu_reset && (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) || - (adev->in_runpm && amdgpu_asic_supports_baco(adev))); + ((adev->in_runpm || adev->in_hibernate) && amdgpu_asic_supports_baco(adev))); ret = smu_get_smc_version(smu, NULL, &smu_version); if (ret) { -- cgit v1.2.3-59-g8ed1b From bff1a6112b09a810d8a1c0c0ea306e58810d4f0b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 8 May 2020 14:33:09 -0400 Subject: drm/amdgpu: implement soft_recovery for gfx10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same as gfx9. This allows us to kill the waves for hung shaders. Acked-by: Evan Quan Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index d35ac7407c8e..0e0daf0021b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4914,6 +4914,19 @@ static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, ref, mask); } +static void gfx_v10_0_ring_soft_recovery(struct amdgpu_ring *ring, + unsigned vmid) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t value = 0; + + value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); + value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); + value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); + value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + WREG32_SOC15(GC, 0, mmSQ_CMD, value); +} + static void gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, uint32_t me, uint32_t pipe, @@ -5305,6 +5318,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v10_0_ring_soft_recovery, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { -- cgit v1.2.3-59-g8ed1b From b11e1a84f370866a8f47e85040687b49c1eb8705 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 7 May 2020 12:12:23 -0500 Subject: gfs2: If go_sync returns error, withdraw but skip invalidate Before this patch, if the go_sync operation returned an error during the do_xmote process (such as unable to sync metadata to the journal) the code did goto out. That kept the glock locked, so it could not be given away, which correctly avoids file system corruption. However, it never set the withdraw bit or requeueing the glock work. So it would hang forever, unable to ever demote the glock. This patch changes to goto to a new label, skip_inval, so that errors from go_sync are treated the same way as errors from go_inval: The delayed withdraw bit is set and the work is requeued. That way, the logd should eventually figure out there's a problem and withdraw properly there. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index a1c5f245553f..5239098fcce6 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -613,7 +613,7 @@ __acquires(&gl->gl_lockref.lock) fs_err(sdp, "Error %d syncing glock \n", ret); gfs2_dump_glock(NULL, gl, true); } - goto out; + goto skip_inval; } } if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) { @@ -633,6 +633,7 @@ __acquires(&gl->gl_lockref.lock) clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); } +skip_inval: gfs2_glock_hold(gl); /* * Check for an error encountered since we called go_sync and go_inval. -- cgit v1.2.3-59-g8ed1b From b14c94908b1b884276a6608dea3d0b1b510338b7 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 8 May 2020 15:01:25 -0500 Subject: Revert "gfs2: Don't demote a glock until its revokes are written" This reverts commit df5db5f9ee112e76b5202fbc331f990a0fc316d6. This patch fixes a regression: patch df5db5f9ee112 allowed function run_queue() to bypass its call to do_xmote() if revokes were queued for the glock. That's wrong because its call to do_xmote() is what is responsible for calling the go_sync() glops functions to sync both the ail list and any revokes queued for it. By bypassing the call, gfs2 could get into a stand-off where the glock could not be demoted until its revokes are written back, but the revokes would not be written back because do_xmote() was never called. It "sort of" works, however, because there are other mechanisms like the log flush daemon (logd) that can sync the ail items and revokes, if it deems it necessary. The problem is: without file system pressure, it might never deem it necessary. Signed-off-by: Bob Peterson --- fs/gfs2/glock.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 5239098fcce6..bf70e3b14938 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -723,9 +723,6 @@ __acquires(&gl->gl_lockref.lock) goto out_unlock; if (nonblock) goto out_sched; - smp_mb(); - if (atomic_read(&gl->gl_revokes) != 0) - goto out_sched; set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE); gl->gl_target = gl->gl_demote_state; -- cgit v1.2.3-59-g8ed1b From 2346ef47e871536cf4e36b77859bfdeb39b49024 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 6 May 2020 15:47:54 -0400 Subject: drm/amd/display: Fix vblank and pageflip event handling for FreeSync [Why] We're sending the drm vblank event a frame too early in the case where the pageflip happens close to VUPDATE and ends up blocking the signal. The implementation in DM was previously correct *before* we started sending vblank events from VSTARTUP unconditionally to handle cases where HUBP was off, OTG was ON and userspace was still requesting some DRM planes enabled. As part of that patch series we dropped VUPDATE since it was deemed close enough to VSTARTUP, but there's a key difference betweeen VSTARTUP and VUPDATE - the VUPDATE signal can be blocked if we're holding the pipe lock. There was a fix recently to revert the unconditional behavior for the DCN VSTARTUP vblank event since it was sending the pageflip event on the wrong frame - once again, due to blocking VUPDATE and having the address start scanning out two frames later. The problem with this fix is it didn't update the logic that calls drm_crtc_handle_vblank(), so the timestamps are totally bogus now. [How] Essentially reverts most of the original VSTARTUP series but retains the behavior to send back events when active planes == 0. Some refactoring/cleanup was done to not have duplicated code in both the handlers. Fixes: 16f17eda8bad ("drm/amd/display: Send vblank and user events at vsartup for DCN") Fixes: 3a2ce8d66a4b ("drm/amd/display: Disable VUpdate interrupt for DCN hardware") Fixes: 2b5aed9ac3f7 ("drm/amd/display: Fix pageflip event race condition for DCN.") Signed-off-by: Nicholas Kazlauskas Reviewed-and-Tested-by: Mario Kleiner Reviewed-by: Leo Li Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.6.x --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 137 +++++++++------------- 1 file changed, 55 insertions(+), 82 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 9c83c1303f08..c3df6ef9f101 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -441,7 +441,7 @@ static void dm_vupdate_high_irq(void *interrupt_params) /** * dm_crtc_high_irq() - Handles CRTC interrupt - * @interrupt_params: ignored + * @interrupt_params: used for determining the CRTC instance * * Handles the CRTC/VSYNC interrupt by notfying DRM's VBLANK * event handler. @@ -455,70 +455,6 @@ static void dm_crtc_high_irq(void *interrupt_params) unsigned long flags; acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - IRQ_TYPE_VBLANK); - - if (acrtc) { - acrtc_state = to_dm_crtc_state(acrtc->base.state); - - DRM_DEBUG_VBL("crtc:%d, vupdate-vrr:%d\n", - acrtc->crtc_id, - amdgpu_dm_vrr_active(acrtc_state)); - - /* Core vblank handling at start of front-porch is only possible - * in non-vrr mode, as only there vblank timestamping will give - * valid results while done in front-porch. Otherwise defer it - * to dm_vupdate_high_irq after end of front-porch. - */ - if (!amdgpu_dm_vrr_active(acrtc_state)) - drm_crtc_handle_vblank(&acrtc->base); - - /* Following stuff must happen at start of vblank, for crc - * computation and below-the-range btr support in vrr mode. - */ - amdgpu_dm_crtc_handle_crc_irq(&acrtc->base); - - if (acrtc_state->stream && adev->family >= AMDGPU_FAMILY_AI && - acrtc_state->vrr_params.supported && - acrtc_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE) { - spin_lock_irqsave(&adev->ddev->event_lock, flags); - mod_freesync_handle_v_update( - adev->dm.freesync_module, - acrtc_state->stream, - &acrtc_state->vrr_params); - - dc_stream_adjust_vmin_vmax( - adev->dm.dc, - acrtc_state->stream, - &acrtc_state->vrr_params.adjust); - spin_unlock_irqrestore(&adev->ddev->event_lock, flags); - } - } -} - -#if defined(CONFIG_DRM_AMD_DC_DCN) -/** - * dm_dcn_crtc_high_irq() - Handles VStartup interrupt for DCN generation ASICs - * @interrupt params - interrupt parameters - * - * Notify DRM's vblank event handler at VSTARTUP - * - * Unlike DCE hardware, we trigger the handler at VSTARTUP. at which: - * * We are close enough to VUPDATE - the point of no return for hw - * * We are in the fixed portion of variable front porch when vrr is enabled - * * We are before VUPDATE, where double-buffered vrr registers are swapped - * - * It is therefore the correct place to signal vblank, send user flip events, - * and update VRR. - */ -static void dm_dcn_crtc_high_irq(void *interrupt_params) -{ - struct common_irq_params *irq_params = interrupt_params; - struct amdgpu_device *adev = irq_params->adev; - struct amdgpu_crtc *acrtc; - struct dm_crtc_state *acrtc_state; - unsigned long flags; - - acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - IRQ_TYPE_VBLANK); - if (!acrtc) return; @@ -528,22 +464,35 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params) amdgpu_dm_vrr_active(acrtc_state), acrtc_state->active_planes); + /** + * Core vblank handling at start of front-porch is only possible + * in non-vrr mode, as only there vblank timestamping will give + * valid results while done in front-porch. Otherwise defer it + * to dm_vupdate_high_irq after end of front-porch. + */ + if (!amdgpu_dm_vrr_active(acrtc_state)) + drm_crtc_handle_vblank(&acrtc->base); + + /** + * Following stuff must happen at start of vblank, for crc + * computation and below-the-range btr support in vrr mode. + */ amdgpu_dm_crtc_handle_crc_irq(&acrtc->base); - drm_crtc_handle_vblank(&acrtc->base); + + /* BTR updates need to happen before VUPDATE on Vega and above. */ + if (adev->family < AMDGPU_FAMILY_AI) + return; spin_lock_irqsave(&adev->ddev->event_lock, flags); - if (acrtc_state->vrr_params.supported && + if (acrtc_state->stream && acrtc_state->vrr_params.supported && acrtc_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE) { - mod_freesync_handle_v_update( - adev->dm.freesync_module, - acrtc_state->stream, - &acrtc_state->vrr_params); + mod_freesync_handle_v_update(adev->dm.freesync_module, + acrtc_state->stream, + &acrtc_state->vrr_params); - dc_stream_adjust_vmin_vmax( - adev->dm.dc, - acrtc_state->stream, - &acrtc_state->vrr_params.adjust); + dc_stream_adjust_vmin_vmax(adev->dm.dc, acrtc_state->stream, + &acrtc_state->vrr_params.adjust); } /* @@ -556,7 +505,8 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params) * avoid race conditions between flip programming and completion, * which could cause too early flip completion events. */ - if (acrtc->pflip_status == AMDGPU_FLIP_SUBMITTED && + if (adev->family >= AMDGPU_FAMILY_RV && + acrtc->pflip_status == AMDGPU_FLIP_SUBMITTED && acrtc_state->active_planes == 0) { if (acrtc->event) { drm_crtc_send_vblank_event(&acrtc->base, acrtc->event); @@ -568,7 +518,6 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params) spin_unlock_irqrestore(&adev->ddev->event_lock, flags); } -#endif static int dm_set_clockgating_state(void *handle, enum amd_clockgating_state state) @@ -2445,8 +2394,36 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) c_irq_params->adev = adev; c_irq_params->irq_src = int_params.irq_source; + amdgpu_dm_irq_register_interrupt( + adev, &int_params, dm_crtc_high_irq, c_irq_params); + } + + /* Use VUPDATE_NO_LOCK interrupt on DCN, which seems to correspond to + * the regular VUPDATE interrupt on DCE. We want DC_IRQ_SOURCE_VUPDATEx + * to trigger at end of each vblank, regardless of state of the lock, + * matching DCE behaviour. + */ + for (i = DCN_1_0__SRCID__OTG0_IHC_V_UPDATE_NO_LOCK_INTERRUPT; + i <= DCN_1_0__SRCID__OTG0_IHC_V_UPDATE_NO_LOCK_INTERRUPT + adev->mode_info.num_crtc - 1; + i++) { + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->vupdate_irq); + + if (r) { + DRM_ERROR("Failed to add vupdate irq id!\n"); + return r; + } + + int_params.int_context = INTERRUPT_HIGH_IRQ_CONTEXT; + int_params.irq_source = + dc_interrupt_to_irq_source(dc, i, 0); + + c_irq_params = &adev->dm.vupdate_params[int_params.irq_source - DC_IRQ_SOURCE_VUPDATE1]; + + c_irq_params->adev = adev; + c_irq_params->irq_src = int_params.irq_source; + amdgpu_dm_irq_register_interrupt(adev, &int_params, - dm_dcn_crtc_high_irq, c_irq_params); + dm_vupdate_high_irq, c_irq_params); } /* Use GRPH_PFLIP interrupt */ @@ -4453,10 +4430,6 @@ static inline int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable) struct amdgpu_device *adev = crtc->dev->dev_private; int rc; - /* Do not set vupdate for DCN hardware */ - if (adev->family > AMDGPU_FAMILY_AI) - return 0; - irq_source = IRQ_TYPE_VUPDATE + acrtc->otg_inst; rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY; -- cgit v1.2.3-59-g8ed1b From 626bf90fe03fa080d8df06bb0397c95c53ae8e27 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Mon, 30 Mar 2020 09:23:21 +0000 Subject: drm/amd/display: add basic atomic check for cursor plane This patch adds a basic cursor check when an atomic test-only commit is performed. The position and size of the cursor plane is checked. This should fix user-space relying on atomic checks to assign buffers to planes. Signed-off-by: Simon Ser Reported-by: Roman Gilg References: https://github.com/emersion/libliftoff/issues/46 Cc: Alex Deucher Cc: Harry Wentland Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 26 +++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index c3df6ef9f101..28e651b173ab 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7855,6 +7855,7 @@ static int dm_update_plane_state(struct dc *dc, struct drm_crtc_state *old_crtc_state, *new_crtc_state; struct dm_crtc_state *dm_new_crtc_state, *dm_old_crtc_state; struct dm_plane_state *dm_new_plane_state, *dm_old_plane_state; + struct amdgpu_crtc *new_acrtc; bool needs_reset; int ret = 0; @@ -7864,9 +7865,30 @@ static int dm_update_plane_state(struct dc *dc, dm_new_plane_state = to_dm_plane_state(new_plane_state); dm_old_plane_state = to_dm_plane_state(old_plane_state); - /*TODO Implement atomic check for cursor plane */ - if (plane->type == DRM_PLANE_TYPE_CURSOR) + /*TODO Implement better atomic check for cursor plane */ + if (plane->type == DRM_PLANE_TYPE_CURSOR) { + if (!enable || !new_plane_crtc || + drm_atomic_plane_disabling(plane->state, new_plane_state)) + return 0; + + new_acrtc = to_amdgpu_crtc(new_plane_crtc); + + if ((new_plane_state->crtc_w > new_acrtc->max_cursor_width) || + (new_plane_state->crtc_h > new_acrtc->max_cursor_height)) { + DRM_DEBUG_ATOMIC("Bad cursor size %d x %d\n", + new_plane_state->crtc_w, new_plane_state->crtc_h); + return -EINVAL; + } + + if (new_plane_state->crtc_x <= -new_acrtc->max_cursor_width || + new_plane_state->crtc_y <= -new_acrtc->max_cursor_height) { + DRM_DEBUG_ATOMIC("Bad cursor position %d, %d\n", + new_plane_state->crtc_x, new_plane_state->crtc_y); + return -EINVAL; + } + return 0; + } needs_reset = should_reset_plane(state, plane, old_plane_state, new_plane_state); -- cgit v1.2.3-59-g8ed1b From cc4de047b33be247f9c8150d3e496743a49642b8 Mon Sep 17 00:00:00 2001 From: Kelly Littlepage Date: Fri, 8 May 2020 19:58:46 +0000 Subject: net: tcp: fix rx timestamp behavior for tcp_recvmsg The stated intent of the original commit is to is to "return the timestamp corresponding to the highest sequence number data returned." The current implementation returns the timestamp for the last byte of the last fully read skb, which is not necessarily the last byte in the recv buffer. This patch converts behavior to the original definition, and to the behavior of the previous draft versions of commit 98aaa913b4ed ("tcp: Extend SOF_TIMESTAMPING_RX_SOFTWARE to TCP recvmsg") which also match this behavior. Fixes: 98aaa913b4ed ("tcp: Extend SOF_TIMESTAMPING_RX_SOFTWARE to TCP recvmsg") Co-developed-by: Iris Liu Signed-off-by: Iris Liu Signed-off-by: Kelly Littlepage Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6d87de434377..e72bd651d21a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2154,13 +2154,15 @@ skip_copy: tp->urg_data = 0; tcp_fast_path_check(sk); } - if (used + offset < skb->len) - continue; if (TCP_SKB_CB(skb)->has_rxtstamp) { tcp_update_recv_tstamps(skb, &tss); cmsg_flags |= 2; } + + if (used + offset < skb->len) + continue; + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto found_fin_ok; if (!(flags & MSG_PEEK)) -- cgit v1.2.3-59-g8ed1b From 1f8492df081bd66255764f3ce82ba1b2c37def49 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 8 May 2020 08:24:14 +0200 Subject: r8169: re-establish support for RTL8401 chip version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit r8169 never had native support for the RTL8401, however it reportedly worked with the fallback to RTL8101e [0]. Therefore let's add this as an explicit assignment. [0] https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=956868 Fixes: b4cc2dcc9c7c ("r8169: remove default chip versions") Reported-by: Camaleón Signed-off-by: Heiner Kallweit Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index bf5bf05970a2..78e15cc00e0a 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2127,6 +2127,8 @@ static enum mac_version rtl8169_get_mac_version(u16 xid, bool gmii) { 0x7cf, 0x348, RTL_GIGA_MAC_VER_07 }, { 0x7cf, 0x248, RTL_GIGA_MAC_VER_07 }, { 0x7cf, 0x340, RTL_GIGA_MAC_VER_13 }, + /* RTL8401, reportedly works if treated as RTL8101e */ + { 0x7cf, 0x240, RTL_GIGA_MAC_VER_13 }, { 0x7cf, 0x343, RTL_GIGA_MAC_VER_10 }, { 0x7cf, 0x342, RTL_GIGA_MAC_VER_16 }, { 0x7c8, 0x348, RTL_GIGA_MAC_VER_09 }, -- cgit v1.2.3-59-g8ed1b From 5099dea0a59f1c89525bb0ceac36689178a4c125 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 8 May 2020 07:27:35 +0000 Subject: nfp: abm: fix error return code in nfp_abm_vnic_alloc() Fix to return negative error code -ENOMEM from the kzalloc() error handling case instead of 0, as done elsewhere in this function. Fixes: 174ab544e3bc ("nfp: abm: add cls_u32 offload for simple band classification") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/abm/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.c b/drivers/net/ethernet/netronome/nfp/abm/main.c index 354efffac0f9..bdbf0726145e 100644 --- a/drivers/net/ethernet/netronome/nfp/abm/main.c +++ b/drivers/net/ethernet/netronome/nfp/abm/main.c @@ -333,8 +333,10 @@ nfp_abm_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id) goto err_free_alink; alink->prio_map = kzalloc(abm->prio_map_len, GFP_KERNEL); - if (!alink->prio_map) + if (!alink->prio_map) { + err = -ENOMEM; goto err_free_alink; + } /* This is a multi-host app, make sure MAC/PHY is up, but don't * make the MAC/PHY state follow the state of any of the ports. -- cgit v1.2.3-59-g8ed1b From 6d32a5119811d2e9b5caa284181944c6f1f192ed Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 8 May 2020 17:37:20 +0300 Subject: dpaa2-eth: prevent array underflow in update_cls_rule() The "location" is controlled by the user via the ethtool_set_rxnfc() function. This update_cls_rule() function checks for array overflows but it doesn't check if the value is negative. I have changed the type to unsigned to prevent array underflows. Fixes: afb90dbb5f78 ("dpaa2-eth: Add ethtool support for flow classification") Signed-off-by: Dan Carpenter Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c index 94347c695233..b7141fdc279e 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c @@ -635,7 +635,7 @@ static int num_rules(struct dpaa2_eth_priv *priv) static int update_cls_rule(struct net_device *net_dev, struct ethtool_rx_flow_spec *new_fs, - int location) + unsigned int location) { struct dpaa2_eth_priv *priv = netdev_priv(net_dev); struct dpaa2_eth_cls_rule *rule; -- cgit v1.2.3-59-g8ed1b From db803036ada7d61d096783726f9771b3fc540370 Mon Sep 17 00:00:00 2001 From: Vincent Minet Date: Fri, 8 May 2020 00:14:22 +0200 Subject: umh: fix memory leak on execve failure If a UMH process created by fork_usermode_blob() fails to execute, a pair of struct file allocated by umh_pipe_setup() will leak. Under normal conditions, the caller (like bpfilter) needs to manage the lifetime of the UMH and its two pipes. But when fork_usermode_blob() fails, the caller doesn't really have a way to know what needs to be done. It seems better to do the cleanup ourselves in this case. Fixes: 449325b52b7a ("umh: introduce fork_usermode_blob() helper") Signed-off-by: Vincent Minet Signed-off-by: Jakub Kicinski --- kernel/umh.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/umh.c b/kernel/umh.c index 7f255b5a8845..20d51e0957e0 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -475,6 +475,12 @@ static void umh_clean_and_save_pid(struct subprocess_info *info) { struct umh_info *umh_info = info->data; + /* cleanup if umh_pipe_setup() was successful but exec failed */ + if (info->pid && info->retval) { + fput(umh_info->pipe_to_umh); + fput(umh_info->pipe_from_umh); + } + argv_free(info->argv); umh_info->pid = info->pid; } -- cgit v1.2.3-59-g8ed1b From f965b68188ab59a40a421ced1b05a2fea638465c Mon Sep 17 00:00:00 2001 From: Colin Xu Date: Fri, 8 May 2020 14:05:06 +0800 Subject: drm/i915/gvt: Init DPLL/DDI vreg for virtual display instead of inheritance. Init value of some display vregs rea inherited from host pregs. When host display in different status, i.e. all monitors unpluged, different display configurations, etc., GVT virtual display setup don't consistent thus may lead to guest driver consider display goes malfunctional. The added init vreg values are based on PRMs and fixed by calcuation from current configuration (only PIPE_A) and the virtual EDID. Fixes: 04d348ae3f0a ("drm/i915/gvt: vGPU display virtualization") Acked-by: Zhenyu Wang Signed-off-by: Colin Xu Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200508060506.216250-1-colin.xu@intel.com --- drivers/gpu/drm/i915/gvt/display.c | 49 ++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index a83df2f84eb9..a1696e9ce4b6 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -208,14 +208,41 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) SKL_FUSE_PG_DIST_STATUS(SKL_PG0) | SKL_FUSE_PG_DIST_STATUS(SKL_PG1) | SKL_FUSE_PG_DIST_STATUS(SKL_PG2); - vgpu_vreg_t(vgpu, LCPLL1_CTL) |= - LCPLL_PLL_ENABLE | - LCPLL_PLL_LOCK; - vgpu_vreg_t(vgpu, LCPLL2_CTL) |= LCPLL_PLL_ENABLE; - + /* + * Only 1 PIPE enabled in current vGPU display and PIPE_A is + * tied to TRANSCODER_A in HW, so it's safe to assume PIPE_A, + * TRANSCODER_A can be enabled. PORT_x depends on the input of + * setup_virtual_dp_monitor, we can bind DPLL0 to any PORT_x + * so we fixed to DPLL0 here. + * Setup DPLL0: DP link clk 1620 MHz, non SSC, DP Mode + */ + vgpu_vreg_t(vgpu, DPLL_CTRL1) = + DPLL_CTRL1_OVERRIDE(DPLL_ID_SKL_DPLL0); + vgpu_vreg_t(vgpu, DPLL_CTRL1) |= + DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1620, DPLL_ID_SKL_DPLL0); + vgpu_vreg_t(vgpu, LCPLL1_CTL) = + LCPLL_PLL_ENABLE | LCPLL_PLL_LOCK; + vgpu_vreg_t(vgpu, DPLL_STATUS) = DPLL_LOCK(DPLL_ID_SKL_DPLL0); + /* + * Golden M/N are calculated based on: + * 24 bpp, 4 lanes, 154000 pixel clk (from virtual EDID), + * DP link clk 1620 MHz and non-constant_n. + * TODO: calculate DP link symbol clk and stream clk m/n. + */ + vgpu_vreg_t(vgpu, PIPE_DATA_M1(TRANSCODER_A)) = 63 << TU_SIZE_SHIFT; + vgpu_vreg_t(vgpu, PIPE_DATA_M1(TRANSCODER_A)) |= 0x5b425e; + vgpu_vreg_t(vgpu, PIPE_DATA_N1(TRANSCODER_A)) = 0x800000; + vgpu_vreg_t(vgpu, PIPE_LINK_M1(TRANSCODER_A)) = 0x3cd6e; + vgpu_vreg_t(vgpu, PIPE_LINK_N1(TRANSCODER_A)) = 0x80000; } if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) { + vgpu_vreg_t(vgpu, DPLL_CTRL2) &= + ~DPLL_CTRL2_DDI_CLK_OFF(PORT_B); + vgpu_vreg_t(vgpu, DPLL_CTRL2) |= + DPLL_CTRL2_DDI_CLK_SEL(DPLL_ID_SKL_DPLL0, PORT_B); + vgpu_vreg_t(vgpu, DPLL_CTRL2) |= + DPLL_CTRL2_DDI_SEL_OVERRIDE(PORT_B); vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED; vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &= ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | @@ -236,6 +263,12 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) } if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) { + vgpu_vreg_t(vgpu, DPLL_CTRL2) &= + ~DPLL_CTRL2_DDI_CLK_OFF(PORT_C); + vgpu_vreg_t(vgpu, DPLL_CTRL2) |= + DPLL_CTRL2_DDI_CLK_SEL(DPLL_ID_SKL_DPLL0, PORT_C); + vgpu_vreg_t(vgpu, DPLL_CTRL2) |= + DPLL_CTRL2_DDI_SEL_OVERRIDE(PORT_C); vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT; vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &= ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | @@ -256,6 +289,12 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) } if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D)) { + vgpu_vreg_t(vgpu, DPLL_CTRL2) &= + ~DPLL_CTRL2_DDI_CLK_OFF(PORT_D); + vgpu_vreg_t(vgpu, DPLL_CTRL2) |= + DPLL_CTRL2_DDI_CLK_SEL(DPLL_ID_SKL_DPLL0, PORT_D); + vgpu_vreg_t(vgpu, DPLL_CTRL2) |= + DPLL_CTRL2_DDI_SEL_OVERRIDE(PORT_D); vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT; vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &= ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | -- cgit v1.2.3-59-g8ed1b From 9302bead664f49ef73040fa06fa64552d02fee42 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 8 May 2020 02:15:19 +0000 Subject: octeontx2-vf: Fix error return code in otx2vf_probe() Fix to return negative error code -ENOMEM from the alloc failed error handling case instead of 0, as done elsewhere in this function. Fixes: 3184fb5ba96e ("octeontx2-vf: Virtual function driver support") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 187c633a7af5..f4227517dc8e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -497,13 +497,17 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) hw->irq_name = devm_kmalloc_array(&hw->pdev->dev, num_vec, NAME_SIZE, GFP_KERNEL); - if (!hw->irq_name) + if (!hw->irq_name) { + err = -ENOMEM; goto err_free_netdev; + } hw->affinity_mask = devm_kcalloc(&hw->pdev->dev, num_vec, sizeof(cpumask_var_t), GFP_KERNEL); - if (!hw->affinity_mask) + if (!hw->affinity_mask) { + err = -ENOMEM; goto err_free_netdev; + } err = pci_alloc_irq_vectors(hw->pdev, num_vec, num_vec, PCI_IRQ_MSIX); if (err < 0) { -- cgit v1.2.3-59-g8ed1b From 57644431a6c2faac5d754ebd35780cf43a531b1a Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 8 May 2020 19:28:34 +0200 Subject: net: ipv4: really enforce backoff for redirects In commit b406472b5ad7 ("net: ipv4: avoid mixed n_redirects and rate_tokens usage") I missed the fact that a 0 'rate_tokens' will bypass the backoff algorithm. Since rate_tokens is cleared after a redirect silence, and never incremented on redirects, if the host keeps receiving packets requiring redirect it will reply ignoring the backoff. Additionally, the 'rate_last' field will be updated with the cadence of the ingress packet requiring redirect. If that rate is high enough, that will prevent the host from generating any other kind of ICMP messages The check for a zero 'rate_tokens' value was likely a shortcut to avoid the more complex backoff algorithm after a redirect silence period. Address the issue checking for 'n_redirects' instead, which is incremented on successful redirect, and does not interfere with other ICMP replies. Fixes: b406472b5ad7 ("net: ipv4: avoid mixed n_redirects and rate_tokens usage") Reported-and-tested-by: Colin Walters Signed-off-by: Paolo Abeni Signed-off-by: Jakub Kicinski --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 788c69d9bfe0..fa829f31a3f5 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -915,7 +915,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) /* Check for load limit; set rate_last to the latest sent * redirect. */ - if (peer->rate_tokens == 0 || + if (peer->n_redirects == 0 || time_after(jiffies, (peer->rate_last + (ip_rt_redirect_load << peer->n_redirects)))) { -- cgit v1.2.3-59-g8ed1b From bcb543cc3d4034da3f3fd8bc4296a26dfeadf47d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 9 May 2020 13:13:33 -0700 Subject: hwmon: (drivetemp) Fix SCT support if SCT data tables are not supported If SCT is supported but SCT data tables are not, the driver unnecessarily tries to fall back to SMART. Use SCT without data tables instead in this situation. Fixes: 5b46903d8bf3 ("hwmon: Driver for disk and solid state drives with temperature sensors") Signed-off-by: Guenter Roeck --- drivers/hwmon/drivetemp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/drivetemp.c b/drivers/hwmon/drivetemp.c index 9179460c2d9d..0d4f3d97ffc6 100644 --- a/drivers/hwmon/drivetemp.c +++ b/drivers/hwmon/drivetemp.c @@ -346,7 +346,7 @@ static int drivetemp_identify_sata(struct drivetemp_data *st) st->have_temp_highest = temp_is_valid(buf[SCT_STATUS_TEMP_HIGHEST]); if (!have_sct_data_table) - goto skip_sct; + goto skip_sct_data; /* Request and read temperature history table */ memset(buf, '\0', sizeof(st->smartdata)); -- cgit v1.2.3-59-g8ed1b From 78a5255ffb6a1af189a83e493d916ba1c54d8c75 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 9 May 2020 13:57:10 -0700 Subject: Stop the ad-hoc games with -Wno-maybe-initialized We have some rather random rules about when we accept the "maybe-initialized" warnings, and when we don't. For example, we consider it unreliable for gcc versions < 4.9, but also if -O3 is enabled, or if optimizing for size. And then various kernel config options disabled it, because they know that they trigger that warning by confusing gcc sufficiently (ie PROFILE_ALL_BRANCHES). And now gcc-10 seems to be introducing a lot of those warnings too, so it falls under the same heading as 4.9 did. At the same time, we have a very straightforward way to _enable_ that warning when wanted: use "W=2" to enable more warnings. So stop playing these ad-hoc games, and just disable that warning by default, with the known and straight-forward "if you want to work on the extra compiler warnings, use W=123". Would it be great to have code that is always so obvious that it never confuses the compiler whether a variable is used initialized or not? Yes, it would. In a perfect world, the compilers would be smarter, and our source code would be simpler. That's currently not the world we live in, though. Signed-off-by: Linus Torvalds --- Makefile | 7 +++---- init/Kconfig | 18 ------------------ kernel/trace/Kconfig | 1 - 3 files changed, 3 insertions(+), 23 deletions(-) diff --git a/Makefile b/Makefile index 3512f7b243dc..1e94f4f15d92 100644 --- a/Makefile +++ b/Makefile @@ -729,10 +729,6 @@ else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE KBUILD_CFLAGS += -Os endif -ifdef CONFIG_CC_DISABLE_WARN_MAYBE_UNINITIALIZED -KBUILD_CFLAGS += -Wno-maybe-uninitialized -endif - # Tell gcc to never replace conditional load with a non-conditional one KBUILD_CFLAGS += $(call cc-option,--param=allow-store-data-races=0) KBUILD_CFLAGS += $(call cc-option,-fno-allow-store-data-races) @@ -881,6 +877,9 @@ KBUILD_CFLAGS += -Wno-pointer-sign # disable stringop warnings in gcc 8+ KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation) +# Enabled with W=2, disabled by default as noisy +KBUILD_CFLAGS += $(call cc-disable-warning, maybe-uninitialized) + # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) diff --git a/init/Kconfig b/init/Kconfig index 9e22ee8fbd75..9278a603d399 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -39,22 +39,6 @@ config TOOLS_SUPPORT_RELR config CC_HAS_ASM_INLINE def_bool $(success,echo 'void foo(void) { asm inline (""); }' | $(CC) -x c - -c -o /dev/null) -config CC_HAS_WARN_MAYBE_UNINITIALIZED - def_bool $(cc-option,-Wmaybe-uninitialized) - help - GCC >= 4.7 supports this option. - -config CC_DISABLE_WARN_MAYBE_UNINITIALIZED - bool - depends on CC_HAS_WARN_MAYBE_UNINITIALIZED - default CC_IS_GCC && GCC_VERSION < 40900 # unreliable for GCC < 4.9 - help - GCC's -Wmaybe-uninitialized is not reliable by definition. - Lots of false positive warnings are produced in some cases. - - If this option is enabled, -Wno-maybe-uninitialzed is passed - to the compiler to suppress maybe-uninitialized warnings. - config CONSTRUCTORS bool depends on !UML @@ -1257,14 +1241,12 @@ config CC_OPTIMIZE_FOR_PERFORMANCE config CC_OPTIMIZE_FOR_PERFORMANCE_O3 bool "Optimize more for performance (-O3)" depends on ARC - imply CC_DISABLE_WARN_MAYBE_UNINITIALIZED # avoid false positives help Choosing this option will pass "-O3" to your compiler to optimize the kernel yet more for performance. config CC_OPTIMIZE_FOR_SIZE bool "Optimize for size (-Os)" - imply CC_DISABLE_WARN_MAYBE_UNINITIALIZED # avoid false positives help Choosing this option will pass "-Os" to your compiler resulting in a smaller kernel. diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 402eef84c859..743647005f64 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -466,7 +466,6 @@ config PROFILE_ANNOTATED_BRANCHES config PROFILE_ALL_BRANCHES bool "Profile all if conditionals" if !FORTIFY_SOURCE select TRACE_BRANCH_PROFILING - imply CC_DISABLE_WARN_MAYBE_UNINITIALIZED # avoid false positives help This tracer profiles all branch conditions. Every if () taken in the kernel is recorded whether it hit or miss. -- cgit v1.2.3-59-g8ed1b From 5c45de21a2223fe46cf9488c99a7fbcf01527670 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 9 May 2020 14:30:29 -0700 Subject: gcc-10: disable 'zero-length-bounds' warning for now This is a fine warning, but we still have a number of zero-length arrays in the kernel that come from the traditional gcc extension. Yes, they are getting converted to flexible arrays, but in the meantime the gcc-10 warning about zero-length bounds is very verbose, and is hiding other issues. I missed one actual build failure because it was hidden among hundreds of lines of warning. Thankfully I caught it on the second go before pushing things out, but it convinced me that I really need to disable the new warnings for now. We'll hopefully be all done with our conversion to flexible arrays in the not too distant future, and we can then re-enable this warning. Signed-off-by: Linus Torvalds --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index 1e94f4f15d92..68a781326dd7 100644 --- a/Makefile +++ b/Makefile @@ -877,6 +877,9 @@ KBUILD_CFLAGS += -Wno-pointer-sign # disable stringop warnings in gcc 8+ KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation) +# We'll want to enable this eventually, but it's not going away for 5.7 at least +KBUILD_CFLAGS += $(call cc-disable-warning, zero-length-bounds) + # Enabled with W=2, disabled by default as noisy KBUILD_CFLAGS += $(call cc-disable-warning, maybe-uninitialized) -- cgit v1.2.3-59-g8ed1b From 44720996e2d79e47d508b0abe99b931a726a3197 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 9 May 2020 14:52:44 -0700 Subject: gcc-10: disable 'array-bounds' warning for now This is another fine warning, related to the 'zero-length-bounds' one, but hitting the same historical code in the kernel. Because C didn't historically support flexible array members, we have code that instead uses a one-sized array, the same way we have cases of zero-sized arrays. The one-sized arrays come from either not wanting to use the gcc zero-sized array extension, or from a slight convenience-feature, where particularly for strings, the size of the structure now includes the allocation for the final NUL character. So with a "char name[1];" at the end of a structure, you can do things like v = my_malloc(sizeof(struct vendor) + strlen(name)); and avoid the "+1" for the terminator. Yes, the modern way to do that is with a flexible array, and using 'offsetof()' instead of 'sizeof()', and adding the "+1" by hand. That also technically gets the size "more correct" in that it avoids any alignment (and thus padding) issues, but this is another long-term cleanup thing that will not happen for 5.7. So disable the warning for now, even though it's potentially quite useful. Having a slew of warnings that then hide more urgent new issues is not an improvement. Signed-off-by: Linus Torvalds --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 68a781326dd7..b4c88f0eb6fe 100644 --- a/Makefile +++ b/Makefile @@ -879,6 +879,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation) # We'll want to enable this eventually, but it's not going away for 5.7 at least KBUILD_CFLAGS += $(call cc-disable-warning, zero-length-bounds) +KBUILD_CFLAGS += $(call cc-disable-warning, array-bounds) # Enabled with W=2, disabled by default as noisy KBUILD_CFLAGS += $(call cc-disable-warning, maybe-uninitialized) -- cgit v1.2.3-59-g8ed1b From d51cfc53ade3189455a1b88ec7a2ff0c24597cf8 Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Mon, 4 May 2020 14:47:55 +0200 Subject: bdi: use bdi_dev_name() to get device name Use the common interface bdi_dev_name() to get device name. Signed-off-by: Yufen Yu Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jan Kara Reviewed-by: Bart Van Assche Add missing include BFQ Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 6 ++++-- block/blk-cgroup.c | 2 +- fs/ceph/debugfs.c | 2 +- include/trace/events/wbt.h | 8 ++++---- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 78ba57efd16b..3d411716d7ee 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -123,6 +123,7 @@ #include #include #include +#include #include "blk.h" #include "blk-mq.h" @@ -4976,8 +4977,9 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio); switch (ioprio_class) { default: - dev_err(bfqq->bfqd->queue->backing_dev_info->dev, - "bfq: bad prio class %d\n", ioprio_class); + pr_err("bdi %s: bfq: bad prio class %d\n", + bdi_dev_name(bfqq->bfqd->queue->backing_dev_info), + ioprio_class); /* fall through */ case IOPRIO_CLASS_NONE: /* diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index c5dc833212e1..930212c1a512 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -496,7 +496,7 @@ const char *blkg_dev_name(struct blkcg_gq *blkg) { /* some drivers (floppy) instantiate a queue w/o disk registered */ if (blkg->q->backing_dev_info->dev) - return dev_name(blkg->q->backing_dev_info->dev); + return bdi_dev_name(blkg->q->backing_dev_info); return NULL; } diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 481ac97b4d25..dcaed75de9e6 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -271,7 +271,7 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) &congestion_kb_fops); snprintf(name, sizeof(name), "../../bdi/%s", - dev_name(fsc->sb->s_bdi->dev)); + bdi_dev_name(fsc->sb->s_bdi)); fsc->debugfs_bdi = debugfs_create_symlink("bdi", fsc->client->debugfs_dir, diff --git a/include/trace/events/wbt.h b/include/trace/events/wbt.h index 784814160197..9c66e59d859c 100644 --- a/include/trace/events/wbt.h +++ b/include/trace/events/wbt.h @@ -33,7 +33,7 @@ TRACE_EVENT(wbt_stat, ), TP_fast_assign( - strlcpy(__entry->name, dev_name(bdi->dev), + strlcpy(__entry->name, bdi_dev_name(bdi), ARRAY_SIZE(__entry->name)); __entry->rmean = stat[0].mean; __entry->rmin = stat[0].min; @@ -68,7 +68,7 @@ TRACE_EVENT(wbt_lat, ), TP_fast_assign( - strlcpy(__entry->name, dev_name(bdi->dev), + strlcpy(__entry->name, bdi_dev_name(bdi), ARRAY_SIZE(__entry->name)); __entry->lat = div_u64(lat, 1000); ), @@ -105,7 +105,7 @@ TRACE_EVENT(wbt_step, ), TP_fast_assign( - strlcpy(__entry->name, dev_name(bdi->dev), + strlcpy(__entry->name, bdi_dev_name(bdi), ARRAY_SIZE(__entry->name)); __entry->msg = msg; __entry->step = step; @@ -141,7 +141,7 @@ TRACE_EVENT(wbt_timer, ), TP_fast_assign( - strlcpy(__entry->name, dev_name(bdi->dev), + strlcpy(__entry->name, bdi_dev_name(bdi), ARRAY_SIZE(__entry->name)); __entry->status = status; __entry->step = step; -- cgit v1.2.3-59-g8ed1b From 6bd87eec23cbc9ed222bed0f5b5b02bf300e9a8d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 May 2020 14:47:56 +0200 Subject: bdi: add a ->dev_name field to struct backing_dev_info Cache a copy of the name for the life time of the backing_dev_info structure so that we can reference it even after unregistering. Fixes: 68f23b89067f ("memcg: fix a crash in wb_workfn when a device disappears") Reported-by: Yufen Yu Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 1 + mm/backing-dev.c | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index ee577a83cfe6..7367150f962a 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -219,6 +219,7 @@ struct backing_dev_info { wait_queue_head_t wb_waitq; struct device *dev; + char dev_name[64]; struct device *owner; struct timer_list laptop_mode_wb_timer; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index c2c44c89ee5d..efc5b83acd2d 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -938,7 +938,8 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args) if (bdi->dev) /* The driver needs to use separate queues per device */ return 0; - dev = device_create_vargs(bdi_class, NULL, MKDEV(0, 0), bdi, fmt, args); + vsnprintf(bdi->dev_name, sizeof(bdi->dev_name), fmt, args); + dev = device_create(bdi_class, NULL, MKDEV(0, 0), bdi, bdi->dev_name); if (IS_ERR(dev)) return PTR_ERR(dev); @@ -1047,7 +1048,7 @@ const char *bdi_dev_name(struct backing_dev_info *bdi) { if (!bdi || !bdi->dev) return bdi_unknown_name; - return dev_name(bdi->dev); + return bdi->dev_name; } EXPORT_SYMBOL_GPL(bdi_dev_name); -- cgit v1.2.3-59-g8ed1b From a8de6639169b90e3dc4f27e752a3c5abac5e90da Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 7 May 2020 23:07:04 +0300 Subject: nvme-pci: fix "slimmer CQ head update" Pre-incrementing ->cq_head can't be done in memory because OOB value can be observed by another context. This devalues space savings compared to original code :-\ $ ./scripts/bloat-o-meter ../vmlinux-000 ../obj/vmlinux add/remove: 0/0 grow/shrink: 0/4 up/down: 0/-32 (-32) Function old new delta nvme_poll_irqdisable 464 456 -8 nvme_poll 455 447 -8 nvme_irq 388 380 -8 nvme_dev_disable 955 947 -8 But the code is minimal now: one read for head, one read for q_depth, one increment, one comparison, single instruction phase bit update and one write for new head. Signed-off-by: Alexey Dobriyan Reported-by: John Garry Tested-by: John Garry Fixes: e2a366a4b0feaeb ("nvme-pci: slimmer CQ head update") Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 4e79e412b276..e13c370de830 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -973,9 +973,13 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) { - if (++nvmeq->cq_head == nvmeq->q_depth) { + u16 tmp = nvmeq->cq_head + 1; + + if (tmp == nvmeq->q_depth) { nvmeq->cq_head = 0; nvmeq->cq_phase ^= 1; + } else { + nvmeq->cq_head = tmp; } } -- cgit v1.2.3-59-g8ed1b From 59c7c3caaaf8750df4ec3255082f15eb4e371514 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 6 May 2020 15:44:02 -0700 Subject: nvme: fix possible hang when ns scanning fails during error recovery When the controller is reconnecting, the host fails I/O and admin commands as the host cannot reach the controller. ns scanning may revalidate namespaces during that period and it is wrong to remove namespaces due to these failures as we may hang (see 205da2434301). One command that may fail is nvme_identify_ns_descs. Since we return success due to having ns identify descriptor list optional, we continue to compare ns identifiers in nvme_revalidate_disk, obviously fail and return -ENODEV to nvme_validate_ns, which will remove the namespace. Exactly what we don't want to happen. Fixes: 22802bf742c2 ("nvme: Namepace identification descriptor list is optional") Tested-by: Anton Eidelman Signed-off-by: Sagi Grimberg Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f2adea96b04c..f3c037f5a9ba 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1110,7 +1110,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, * Don't treat an error as fatal, as we potentially already * have a NGUID or EUI-64. */ - if (status > 0) + if (status > 0 && !(status & NVME_SC_DNR)) status = 0; goto free_data; } -- cgit v1.2.3-59-g8ed1b From 5a76021c2eff7fcf2f0918a08fd8a37ce7922921 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 9 May 2020 15:40:52 -0700 Subject: gcc-10: disable 'stringop-overflow' warning for now This is the final array bounds warning removal for gcc-10 for now. Again, the warning is good, and we should re-enable all these warnings when we have converted all the legacy array declaration cases to flexible arrays. But in the meantime, it's just noise. Signed-off-by: Linus Torvalds --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index b4c88f0eb6fe..abdb355d3f46 100644 --- a/Makefile +++ b/Makefile @@ -880,6 +880,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation) # We'll want to enable this eventually, but it's not going away for 5.7 at least KBUILD_CFLAGS += $(call cc-disable-warning, zero-length-bounds) KBUILD_CFLAGS += $(call cc-disable-warning, array-bounds) +KBUILD_CFLAGS += $(call cc-disable-warning, stringop-overflow) # Enabled with W=2, disabled by default as noisy KBUILD_CFLAGS += $(call cc-disable-warning, maybe-uninitialized) -- cgit v1.2.3-59-g8ed1b From adc71920969870dfa54e8f40dac8616284832d02 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 9 May 2020 15:45:21 -0700 Subject: gcc-10: disable 'restrict' warning for now gcc-10 now warns about passing aliasing pointers to functions that take restricted pointers. That's actually a great warning, and if we ever start using 'restrict' in the kernel, it might be quite useful. But right now we don't, and it turns out that the only thing this warns about is an idiom where we have declared a few functions to be "printf-like" (which seems to make gcc pick up the restricted pointer thing), and then we print to the same buffer that we also use as an input. And people do that as an odd concatenation pattern, with code like this: #define sysfs_show_gen_prop(buffer, fmt, ...) \ snprintf(buffer, PAGE_SIZE, "%s"fmt, buffer, __VA_ARGS__) where we have 'buffer' as both the destination of the final result, and as the initial argument. Yes, it's a bit questionable. And outside of the kernel, people do have standard declarations like int snprintf( char *restrict buffer, size_t bufsz, const char *restrict format, ... ); where that output buffer is marked as a restrict pointer that cannot alias with any other arguments. But in the context of the kernel, that 'use snprintf() to concatenate to the end result' does work, and the pattern shows up in multiple places. And we have not marked our own version of snprintf() as taking restrict pointers, so the warning is incorrect for now, and gcc picks it up on its own. If we do start using 'restrict' in the kernel (and it might be a good idea if people find places where it matters), we'll need to figure out how to avoid this issue for snprintf and friends. But in the meantime, this warning is not useful. Signed-off-by: Linus Torvalds --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index abdb355d3f46..a45d8d09f354 100644 --- a/Makefile +++ b/Makefile @@ -882,6 +882,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, zero-length-bounds) KBUILD_CFLAGS += $(call cc-disable-warning, array-bounds) KBUILD_CFLAGS += $(call cc-disable-warning, stringop-overflow) +# Another good warning that we'll want to enable eventually +KBUILD_CFLAGS += $(call cc-disable-warning, restrict) + # Enabled with W=2, disabled by default as noisy KBUILD_CFLAGS += $(call cc-disable-warning, maybe-uninitialized) -- cgit v1.2.3-59-g8ed1b From 99352c79af3e5f2e4724abf37fa5a2a3299b1c81 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 9 May 2020 14:04:52 +0200 Subject: net: freescale: select CONFIG_FIXED_PHY where needed I ran into a randconfig build failure with CONFIG_FIXED_PHY=m and CONFIG_GIANFAR=y: x86_64-linux-ld: drivers/net/ethernet/freescale/gianfar.o:(.rodata+0x418): undefined reference to `fixed_phy_change_carrier' It seems the same thing can happen with dpaa and ucc_geth, so change all three to do an explicit 'select FIXED_PHY'. The fixed-phy driver actually has an alternative stub function that theoretically allows building network drivers when fixed-phy is disabled, but I don't see how that would help here, as the drivers presumably would not work then. Signed-off-by: Arnd Bergmann Acked-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/Kconfig | 2 ++ drivers/net/ethernet/freescale/dpaa/Kconfig | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig index 2bd7ace0a953..bfc6bfe94d0a 100644 --- a/drivers/net/ethernet/freescale/Kconfig +++ b/drivers/net/ethernet/freescale/Kconfig @@ -77,6 +77,7 @@ config UCC_GETH depends on QUICC_ENGINE && PPC32 select FSL_PQ_MDIO select PHYLIB + select FIXED_PHY ---help--- This driver supports the Gigabit Ethernet mode of the QUICC Engine, which is available on some Freescale SOCs. @@ -90,6 +91,7 @@ config GIANFAR depends on HAS_DMA select FSL_PQ_MDIO select PHYLIB + select FIXED_PHY select CRC32 ---help--- This driver supports the Gigabit TSEC on the MPC83xx, MPC85xx, diff --git a/drivers/net/ethernet/freescale/dpaa/Kconfig b/drivers/net/ethernet/freescale/dpaa/Kconfig index 3b325733a4f8..0a54c7e0e4ae 100644 --- a/drivers/net/ethernet/freescale/dpaa/Kconfig +++ b/drivers/net/ethernet/freescale/dpaa/Kconfig @@ -3,6 +3,7 @@ menuconfig FSL_DPAA_ETH tristate "DPAA Ethernet" depends on FSL_DPAA && FSL_FMAN select PHYLIB + select FIXED_PHY select FSL_FMAN_MAC ---help--- Data Path Acceleration Architecture Ethernet driver, -- cgit v1.2.3-59-g8ed1b From 1a263ae60b04de959d9ce9caea4889385eefcc7b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 9 May 2020 15:58:04 -0700 Subject: gcc-10: avoid shadowing standard library 'free()' in crypto MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc-10 has started warning about conflicting types for a few new built-in functions, particularly 'free()'. This results in warnings like: crypto/xts.c:325:13: warning: conflicting types for built-in function ‘free’; expected ‘void(void *)’ [-Wbuiltin-declaration-mismatch] because the crypto layer had its local freeing functions called 'free()'. Gcc-10 is in the wrong here, since that function is marked 'static', and thus there is no chance of confusion with any standard library function namespace. But the simplest thing to do is to just use a different name here, and avoid this gcc mis-feature. [ Side note: gcc knowing about 'free()' is in itself not the mis-feature: the semantics of 'free()' are special enough that a compiler can validly do special things when seeing it. So the mis-feature here is that gcc thinks that 'free()' is some restricted name, and you can't shadow it as a local static function. Making the special 'free()' semantics be a function attribute rather than tied to the name would be the much better model ] Signed-off-by: Linus Torvalds --- crypto/lrw.c | 6 +++--- crypto/xts.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/crypto/lrw.c b/crypto/lrw.c index 376d7ed3f1f8..3c734b81b3a2 100644 --- a/crypto/lrw.c +++ b/crypto/lrw.c @@ -287,7 +287,7 @@ static void exit_tfm(struct crypto_skcipher *tfm) crypto_free_skcipher(ctx->child); } -static void free(struct skcipher_instance *inst) +static void free_inst(struct skcipher_instance *inst) { crypto_drop_skcipher(skcipher_instance_ctx(inst)); kfree(inst); @@ -400,12 +400,12 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb) inst->alg.encrypt = encrypt; inst->alg.decrypt = decrypt; - inst->free = free; + inst->free = free_inst; err = skcipher_register_instance(tmpl, inst); if (err) { err_free_inst: - free(inst); + free_inst(inst); } return err; } diff --git a/crypto/xts.c b/crypto/xts.c index dbdd8af629e6..6d8cea94b3cf 100644 --- a/crypto/xts.c +++ b/crypto/xts.c @@ -322,7 +322,7 @@ static void exit_tfm(struct crypto_skcipher *tfm) crypto_free_cipher(ctx->tweak); } -static void free(struct skcipher_instance *inst) +static void free_inst(struct skcipher_instance *inst) { crypto_drop_skcipher(skcipher_instance_ctx(inst)); kfree(inst); @@ -434,12 +434,12 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb) inst->alg.encrypt = encrypt; inst->alg.decrypt = decrypt; - inst->free = free; + inst->free = free_inst; err = skcipher_register_instance(tmpl, inst); if (err) { err_free_inst: - free(inst); + free_inst(inst); } return err; } -- cgit v1.2.3-59-g8ed1b From e99332e7b4cda6e60f5b5916cf9943a79dbef902 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 9 May 2020 17:50:03 -0700 Subject: gcc-10: mark more functions __init to avoid section mismatch warnings It seems that for whatever reason, gcc-10 ends up not inlining a couple of functions that used to be inlined before. Even if they only have one single callsite - it looks like gcc may have decided that the code was unlikely, and not worth inlining. The code generation difference is harmless, but caused a few new section mismatch errors, since the (now no longer inlined) function wasn't in the __init section, but called other init functions: Section mismatch in reference from the function kexec_free_initrd() to the function .init.text:free_initrd_mem() Section mismatch in reference from the function tpm2_calc_event_log_size() to the function .init.text:early_memremap() Section mismatch in reference from the function tpm2_calc_event_log_size() to the function .init.text:early_memunmap() So add the appropriate __init annotation to make modpost not complain. In both cases there were trivially just a single callsite from another __init function. Signed-off-by: Linus Torvalds --- drivers/firmware/efi/tpm.c | 2 +- init/initramfs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/tpm.c b/drivers/firmware/efi/tpm.c index 31f9f0e369b9..55b031d2c989 100644 --- a/drivers/firmware/efi/tpm.c +++ b/drivers/firmware/efi/tpm.c @@ -16,7 +16,7 @@ int efi_tpm_final_log_size; EXPORT_SYMBOL(efi_tpm_final_log_size); -static int tpm2_calc_event_log_size(void *data, int count, void *size_info) +static int __init tpm2_calc_event_log_size(void *data, int count, void *size_info) { struct tcg_pcr_event2_head *header; int event_size, size = 0; diff --git a/init/initramfs.c b/init/initramfs.c index 8ec1be4d7d51..7a38012e1af7 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -542,7 +542,7 @@ void __weak free_initrd_mem(unsigned long start, unsigned long end) } #ifdef CONFIG_KEXEC_CORE -static bool kexec_free_initrd(void) +static bool __init kexec_free_initrd(void) { unsigned long crashk_start = (unsigned long)__va(crashk_res.start); unsigned long crashk_end = (unsigned long)__va(crashk_res.end); -- cgit v1.2.3-59-g8ed1b From 090e28b229af92dc5b40786ca673999d59e73056 Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Sat, 9 May 2020 11:32:10 +0800 Subject: netprio_cgroup: Fix unlimited memory leak of v2 cgroups If systemd is configured to use hybrid mode which enables the use of both cgroup v1 and v2, systemd will create new cgroup on both the default root (v2) and netprio_cgroup hierarchy (v1) for a new session and attach task to the two cgroups. If the task does some network thing then the v2 cgroup can never be freed after the session exited. One of our machines ran into OOM due to this memory leak. In the scenario described above when sk_alloc() is called cgroup_sk_alloc() thought it's in v2 mode, so it stores the cgroup pointer in sk->sk_cgrp_data and increments the cgroup refcnt, but then sock_update_netprioidx() thought it's in v1 mode, so it stores netprioidx value in sk->sk_cgrp_data, so the cgroup refcnt will never be freed. Currently we do the mode switch when someone writes to the ifpriomap cgroup control file. The easiest fix is to also do the switch when a task is attached to a new cgroup. Fixes: bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup") Reported-by: Yang Yingliang Tested-by: Yang Yingliang Signed-off-by: Zefan Li Acked-by: Tejun Heo Signed-off-by: Jakub Kicinski --- net/core/netprio_cgroup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 8881dd943dd0..9bd4cab7d510 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -236,6 +236,8 @@ static void net_prio_attach(struct cgroup_taskset *tset) struct task_struct *p; struct cgroup_subsys_state *css; + cgroup_sk_alloc_disable(); + cgroup_taskset_for_each(p, css, tset) { void *v = (void *)(unsigned long)css->id; -- cgit v1.2.3-59-g8ed1b From 14425f1f521fdfe274a7bb390637c786432e08b4 Mon Sep 17 00:00:00 2001 From: Mike Pozulp Date: Sat, 9 May 2020 20:28:37 -0700 Subject: ALSA: hda/realtek: Add quirk for Samsung Notebook Some models of the Samsung Notebook 9 have very quiet and distorted headphone output. This quirk changes the VREF value of the ALC298 codec NID 0x1a from default HIZ to new 100. [ adjusted to 5.7-base and rearranged in SSID order -- tiwai ] Signed-off-by: Mike Pozulp BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=207423 Link: https://lore.kernel.org/r/20200510032838.1989130-1-pozulp.kernel@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 1f4b9e387d9f..188ba94c5cee 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6093,6 +6093,7 @@ enum { ALC285_FIXUP_HP_GPIO_LED, ALC285_FIXUP_HP_MUTE_LED, ALC236_FIXUP_HP_MUTE_LED, + ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, }; static const struct hda_fixup alc269_fixups[] = { @@ -7232,6 +7233,13 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc236_fixup_hp_mute_led, }, + [ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + { 0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, 0xc5 }, + { } + }, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -7426,6 +7434,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x10ec, 0x10f2, "Intel Reference board", ALC700_FIXUP_INTEL_REFERENCE), SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE), SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), + SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), + SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET), SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8), SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), -- cgit v1.2.3-59-g8ed1b From 3047211ca11bf77b3ecbce045c0aa544d934b945 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 9 May 2020 16:45:44 -0700 Subject: net: dsa: loop: Add module soft dependency There is a soft dependency against dsa_loop_bdinfo.ko which sets up the MDIO device registration, since there are no symbols referenced by dsa_loop.ko, there is no automatic loading of dsa_loop_bdinfo.ko which is needed. Fixes: 98cd1552ea27 ("net: dsa: Mock-up driver") Signed-off-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/dsa_loop.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c index fdcb70b9f0e4..400207c5c7de 100644 --- a/drivers/net/dsa/dsa_loop.c +++ b/drivers/net/dsa/dsa_loop.c @@ -360,6 +360,7 @@ static void __exit dsa_loop_exit(void) } module_exit(dsa_loop_exit); +MODULE_SOFTDEP("pre: dsa_loop_bdinfo"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Florian Fainelli"); MODULE_DESCRIPTION("DSA loopback driver"); -- cgit v1.2.3-59-g8ed1b From 2c407aca64977ede9b9f35158e919773cae2082f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 30 Apr 2020 23:30:48 +0200 Subject: netfilter: conntrack: avoid gcc-10 zero-length-bounds warning gcc-10 warns around a suspicious access to an empty struct member: net/netfilter/nf_conntrack_core.c: In function '__nf_conntrack_alloc': net/netfilter/nf_conntrack_core.c:1522:9: warning: array subscript 0 is outside the bounds of an interior zero-length array 'u8[0]' {aka 'unsigned char[0]'} [-Wzero-length-bounds] 1522 | memset(&ct->__nfct_init_offset[0], 0, | ^~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from net/netfilter/nf_conntrack_core.c:37: include/net/netfilter/nf_conntrack.h:90:5: note: while referencing '__nfct_init_offset' 90 | u8 __nfct_init_offset[0]; | ^~~~~~~~~~~~~~~~~~ The code is correct but a bit unusual. Rework it slightly in a way that does not trigger the warning, using an empty struct instead of an empty array. There are probably more elegant ways to do this, but this is the smallest change. Fixes: c41884ce0562 ("netfilter: conntrack: avoid zeroing timer") Signed-off-by: Arnd Bergmann Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 2 +- net/netfilter/nf_conntrack_core.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 9f551f3b69c6..90690e37a56f 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -87,7 +87,7 @@ struct nf_conn { struct hlist_node nat_bysource; #endif /* all members below initialized via memset */ - u8 __nfct_init_offset[0]; + struct { } __nfct_init_offset; /* If we were expected by an expectation, this will be it */ struct nf_conn *master; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c4582eb71766..0173398f4ced 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1519,9 +1519,9 @@ __nf_conntrack_alloc(struct net *net, ct->status = 0; ct->timeout = 0; write_pnet(&ct->ct_net, net); - memset(&ct->__nfct_init_offset[0], 0, + memset(&ct->__nfct_init_offset, 0, offsetof(struct nf_conn, proto) - - offsetof(struct nf_conn, __nfct_init_offset[0])); + offsetof(struct nf_conn, __nfct_init_offset)); nf_ct_zone_add(ct, zone); -- cgit v1.2.3-59-g8ed1b From 2ef96a5bb12be62ef75b5828c0aab838ebb29cb8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 10 May 2020 15:16:58 -0700 Subject: Linux 5.7-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a45d8d09f354..11fe9b1535de 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 7 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Kleptomaniac Octopus # *DOCUMENTATION* -- cgit v1.2.3-59-g8ed1b From e8a1b0efd632d1c9db7d4e93da66377c7b524862 Mon Sep 17 00:00:00 2001 From: Luo bin Date: Sun, 10 May 2020 19:01:08 +0000 Subject: hinic: fix a bug of ndo_stop if some function in ndo_stop interface returns failure because of hardware fault, must go on excuting rest steps rather than return failure directly, otherwise will cause memory leak.And bump the timeout for SET_FUNC_STATE to ensure that cmd won't return failure when hw is busy. Otherwise hw may stomp host memory if we free memory regardless of the return value of SET_FUNC_STATE. Fixes: 51ba902a16e6 ("net-next/hinic: Initialize hw interface") Signed-off-by: Luo bin Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c | 16 ++++++++++++---- drivers/net/ethernet/huawei/hinic/hinic_main.c | 16 ++-------------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c index 8995e32dd1c0..992908e6eebf 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c @@ -45,6 +45,8 @@ #define MGMT_MSG_TIMEOUT 5000 +#define SET_FUNC_PORT_MGMT_TIMEOUT 25000 + #define mgmt_to_pfhwdev(pf_mgmt) \ container_of(pf_mgmt, struct hinic_pfhwdev, pf_to_mgmt) @@ -238,12 +240,13 @@ static int msg_to_mgmt_sync(struct hinic_pf_to_mgmt *pf_to_mgmt, u8 *buf_in, u16 in_size, u8 *buf_out, u16 *out_size, enum mgmt_direction_type direction, - u16 resp_msg_id) + u16 resp_msg_id, u32 timeout) { struct hinic_hwif *hwif = pf_to_mgmt->hwif; struct pci_dev *pdev = hwif->pdev; struct hinic_recv_msg *recv_msg; struct completion *recv_done; + unsigned long timeo; u16 msg_id; int err; @@ -267,8 +270,9 @@ static int msg_to_mgmt_sync(struct hinic_pf_to_mgmt *pf_to_mgmt, goto unlock_sync_msg; } - if (!wait_for_completion_timeout(recv_done, - msecs_to_jiffies(MGMT_MSG_TIMEOUT))) { + timeo = msecs_to_jiffies(timeout ? timeout : MGMT_MSG_TIMEOUT); + + if (!wait_for_completion_timeout(recv_done, timeo)) { dev_err(&pdev->dev, "MGMT timeout, MSG id = %d\n", msg_id); err = -ETIMEDOUT; goto unlock_sync_msg; @@ -342,6 +346,7 @@ int hinic_msg_to_mgmt(struct hinic_pf_to_mgmt *pf_to_mgmt, { struct hinic_hwif *hwif = pf_to_mgmt->hwif; struct pci_dev *pdev = hwif->pdev; + u32 timeout = 0; if (sync != HINIC_MGMT_MSG_SYNC) { dev_err(&pdev->dev, "Invalid MGMT msg type\n"); @@ -353,9 +358,12 @@ int hinic_msg_to_mgmt(struct hinic_pf_to_mgmt *pf_to_mgmt, return -EINVAL; } + if (cmd == HINIC_PORT_CMD_SET_FUNC_STATE) + timeout = SET_FUNC_PORT_MGMT_TIMEOUT; + return msg_to_mgmt_sync(pf_to_mgmt, mod, cmd, buf_in, in_size, buf_out, out_size, MGMT_DIRECT_SEND, - MSG_NOT_RESP); + MSG_NOT_RESP, timeout); } /** diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index 13560975c103..63b92f6cc856 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -483,7 +483,6 @@ static int hinic_close(struct net_device *netdev) { struct hinic_dev *nic_dev = netdev_priv(netdev); unsigned int flags; - int err; down(&nic_dev->mgmt_lock); @@ -497,20 +496,9 @@ static int hinic_close(struct net_device *netdev) up(&nic_dev->mgmt_lock); - err = hinic_port_set_func_state(nic_dev, HINIC_FUNC_PORT_DISABLE); - if (err) { - netif_err(nic_dev, drv, netdev, - "Failed to set func port state\n"); - nic_dev->flags |= (flags & HINIC_INTF_UP); - return err; - } + hinic_port_set_state(nic_dev, HINIC_PORT_DISABLE); - err = hinic_port_set_state(nic_dev, HINIC_PORT_DISABLE); - if (err) { - netif_err(nic_dev, drv, netdev, "Failed to set port state\n"); - nic_dev->flags |= (flags & HINIC_INTF_UP); - return err; - } + hinic_port_set_func_state(nic_dev, HINIC_FUNC_PORT_DISABLE); if (nic_dev->flags & HINIC_RSS_ENABLE) { hinic_rss_deinit(nic_dev); -- cgit v1.2.3-59-g8ed1b From 7b301750f7f8f6503e11f1af4a03832525f58c66 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 9 May 2020 10:44:41 +0200 Subject: ACPI: EC: PM: Avoid premature returns from acpi_s2idle_wake() If the EC GPE status is not set after checking all of the other GPEs, acpi_s2idle_wake() returns 'false', to indicate that the SCI event that has just triggered is not a system wakeup one, but it does that without canceling the pending wakeup and re-arming the SCI for system wakeup which is a mistake, because it may cause s2idle_loop() to busy spin until the next valid wakeup event. [If that happens, the first spurious wakeup is still pending after acpi_s2idle_wake() has returned, so s2idle_enter() does nothing, acpi_s2idle_wake() is called again and it sees that the SCI has triggered, but no GPEs are active, so 'false' is returned again, and so on.] Fix that by moving all of the GPE checking logic from acpi_s2idle_wake() to acpi_ec_dispatch_gpe() and making the latter return 'true' only if a non-EC GPE has triggered and 'false' otherwise, which will cause acpi_s2idle_wake() to cancel the pending SCI wakeup and re-arm the SCI for system wakeup regardless of the EC GPE status. This also addresses a lockup observed on an Elitegroup EF20EA laptop after attempting to wake it up from suspend-to-idle by a key press. Fixes: d5406284ff80 ("ACPI: PM: s2idle: Refine active GPEs check") Link: https://bugzilla.kernel.org/show_bug.cgi?id=207603 Reported-by: Todd Brandt Fixes: fdde0ff8590b ("ACPI: PM: s2idle: Prevent spurious SCIs from waking up the system") Link: https://lore.kernel.org/linux-acpi/CAB4CAwdqo7=MvyG_PE+PGVfeA17AHF5i5JucgaKqqMX6mjArbQ@mail.gmail.com/ Reported-by: Chris Chiu Tested-by: Chris Chiu Cc: 5.4+ # 5.4+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 24 ++++++++++++++++-------- drivers/acpi/internal.h | 1 - drivers/acpi/sleep.c | 14 ++------------ 3 files changed, 18 insertions(+), 21 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index b4c0152e92aa..145ec0b6f20b 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1994,23 +1994,31 @@ void acpi_ec_set_gpe_wake_mask(u8 action) acpi_set_gpe_wake_mask(NULL, first_ec->gpe, action); } -bool acpi_ec_other_gpes_active(void) -{ - return acpi_any_gpe_status_set(first_ec ? first_ec->gpe : U32_MAX); -} - bool acpi_ec_dispatch_gpe(void) { u32 ret; if (!first_ec) + return acpi_any_gpe_status_set(U32_MAX); + + /* + * Report wakeup if the status bit is set for any enabled GPE other + * than the EC one. + */ + if (acpi_any_gpe_status_set(first_ec->gpe)) + return true; + + if (ec_no_wakeup) return false; + /* + * Dispatch the EC GPE in-band, but do not report wakeup in any case + * to allow the caller to process events properly after that. + */ ret = acpi_dispatch_gpe(NULL, first_ec->gpe); - if (ret == ACPI_INTERRUPT_HANDLED) { + if (ret == ACPI_INTERRUPT_HANDLED) pm_pr_dbg("EC GPE dispatched\n"); - return true; - } + return false; } #endif /* CONFIG_PM_SLEEP */ diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index e387517d3354..43411a7457cd 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -202,7 +202,6 @@ void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit); #ifdef CONFIG_PM_SLEEP void acpi_ec_flush_work(void); -bool acpi_ec_other_gpes_active(void); bool acpi_ec_dispatch_gpe(void); #endif diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 4edc8a3ce40f..3850704570c0 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -1013,20 +1013,10 @@ static bool acpi_s2idle_wake(void) if (acpi_check_wakeup_handlers()) return true; - /* - * If the status bit is set for any enabled GPE other than the - * EC one, the wakeup is regarded as a genuine one. - */ - if (acpi_ec_other_gpes_active()) + /* Check non-EC GPE wakeups and dispatch the EC GPE. */ + if (acpi_ec_dispatch_gpe()) return true; - /* - * If the EC GPE status bit has not been set, the wakeup is - * regarded as a spurious one. - */ - if (!acpi_ec_dispatch_gpe()) - return false; - /* * Cancel the wakeup and process all pending events in case * there are any wakeup ones in there. -- cgit v1.2.3-59-g8ed1b From 45a3fe3bf93b7cfeddc28ef7386555e05dc57f06 Mon Sep 17 00:00:00 2001 From: Raul E Rangel Date: Fri, 8 May 2020 16:54:21 -0600 Subject: mmc: sdhci-acpi: Add SDHCI_QUIRK2_BROKEN_64_BIT_DMA for AMDI0040 The AMD eMMC 5.0 controller does not support 64 bit DMA. Fixes: 34597a3f60b1 ("mmc: sdhci-acpi: Add support for ACPI HID of AMD Controller with HS400") Signed-off-by: Raul E Rangel Link: https://marc.info/?l=linux-mmc&m=158879884514552&w=2 Reviewed-by: Andy Shevchenko Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20200508165344.1.Id5bb8b1ae7ea576f26f9d91c761df7ccffbf58c5@changeid Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-acpi.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index faba53cf139b..d8b76cb8698a 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -605,10 +605,12 @@ static int sdhci_acpi_emmc_amd_probe_slot(struct platform_device *pdev, } static const struct sdhci_acpi_slot sdhci_acpi_slot_amd_emmc = { - .chip = &sdhci_acpi_chip_amd, - .caps = MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE, - .quirks = SDHCI_QUIRK_32BIT_DMA_ADDR | SDHCI_QUIRK_32BIT_DMA_SIZE | - SDHCI_QUIRK_32BIT_ADMA_SIZE, + .chip = &sdhci_acpi_chip_amd, + .caps = MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE, + .quirks = SDHCI_QUIRK_32BIT_DMA_ADDR | + SDHCI_QUIRK_32BIT_DMA_SIZE | + SDHCI_QUIRK_32BIT_ADMA_SIZE, + .quirks2 = SDHCI_QUIRK2_BROKEN_64_BIT_DMA, .probe_slot = sdhci_acpi_emmc_amd_probe_slot, }; -- cgit v1.2.3-59-g8ed1b From 72a7a9925e2beea09b109dffb3384c9bf920d9da Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Wed, 6 May 2020 17:59:18 +0800 Subject: drm/i915/gvt: Fix kernel oops for 3-level ppgtt guest As i915 won't allocate extra PDP for current default PML4 table, so for 3-level ppgtt guest, we would hit kernel pointer access failure on extra PDP pointers. So this trys to bypass that now. It won't impact real shadow PPGTT setup, so guest context still works. This is verified on 4.15 guest kernel with i915.enable_ppgtt=1 to force on old aliasing ppgtt behavior. Fixes: 4f15665ccbba ("drm/i915: Add ppgtt to GVT GEM context") Reviewed-by: Xiong Zhang Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200506095918.124913-1-zhenyuw@linux.intel.com --- drivers/gpu/drm/i915/gvt/scheduler.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index cb11c3184085..0f1d6998cf9c 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -379,7 +379,11 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, for (i = 0; i < GVT_RING_CTX_NR_PDPS; i++) { struct i915_page_directory * const pd = i915_pd_entry(ppgtt->pd, i); - + /* skip now as current i915 ppgtt alloc won't allocate + top level pdp for non 4-level table, won't impact + shadow ppgtt. */ + if (!pd) + break; px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i]; } } -- cgit v1.2.3-59-g8ed1b From d51c214541c5154dda3037289ee895ea3ded5ebd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 10 May 2020 09:54:41 +0200 Subject: arm64: fix the flush_icache_range arguments in machine_kexec The second argument is the end "pointer", not the length. Fixes: d28f6df1305a ("arm64/kexec: Add core kexec support") Cc: # 4.8.x- Signed-off-by: Christoph Hellwig Signed-off-by: Catalin Marinas --- arch/arm64/kernel/machine_kexec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 8e9c924423b4..a0b144cfaea7 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -177,6 +177,7 @@ void machine_kexec(struct kimage *kimage) * the offline CPUs. Therefore, we must use the __* variant here. */ __flush_icache_range((uintptr_t)reboot_code_buffer, + (uintptr_t)reboot_code_buffer + arm64_relocate_new_kernel_size); /* Flush the kimage list and its buffers. */ -- cgit v1.2.3-59-g8ed1b From 2c8897953f3b2ff5498f3f275708a742bfcdbc24 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Wed, 6 May 2020 14:24:39 +0300 Subject: netfilter: flowtable: Add pending bit for offload work Gc step can queue offloaded flow del work or stats work. Those work items can race each other and a flow could be freed before the stats work is executed and querying it. To avoid that, add a pending bit that if a work exists for a flow don't queue another work for it. This will also avoid adding multiple stats works in case stats work didn't complete but gc step started again. Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 1 + net/netfilter/nf_flow_table_offload.c | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 6bf69652f57d..c54a7f707e50 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -127,6 +127,7 @@ enum nf_flow_flags { NF_FLOW_HW_DYING, NF_FLOW_HW_DEAD, NF_FLOW_HW_REFRESH, + NF_FLOW_HW_PENDING, }; enum flow_offload_type { diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index e3b099c14eff..3d4ca62c81f9 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -817,6 +817,7 @@ static void flow_offload_work_handler(struct work_struct *work) WARN_ON_ONCE(1); } + clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags); kfree(offload); } @@ -831,9 +832,14 @@ nf_flow_offload_work_alloc(struct nf_flowtable *flowtable, { struct flow_offload_work *offload; + if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags)) + return NULL; + offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC); - if (!offload) + if (!offload) { + clear_bit(NF_FLOW_HW_PENDING, &flow->flags); return NULL; + } offload->cmd = cmd; offload->flow = flow; -- cgit v1.2.3-59-g8ed1b From 1d10da0eb09484ae087836da28258316ef4a02be Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 10 May 2020 13:55:43 +0300 Subject: netfilter: flowtable: Remove WQ_MEM_RECLAIM from workqueue This workqueue is in charge of handling offloaded flow tasks like add/del/stats we should not use WQ_MEM_RECLAIM flag. The flag can result in the following warning. [ 485.557189] ------------[ cut here ]------------ [ 485.562976] workqueue: WQ_MEM_RECLAIM nf_flow_table_offload:flow_offload_worr [ 485.562985] WARNING: CPU: 7 PID: 3731 at kernel/workqueue.c:2610 check_flush0 [ 485.590191] Kernel panic - not syncing: panic_on_warn set ... [ 485.597100] CPU: 7 PID: 3731 Comm: kworker/u112:8 Not tainted 5.7.0-rc1.21802 [ 485.606629] Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.4.3 01/177 [ 485.615487] Workqueue: nf_flow_table_offload flow_offload_work_handler [nf_f] [ 485.624834] Call Trace: [ 485.628077] dump_stack+0x50/0x70 [ 485.632280] panic+0xfb/0x2d7 [ 485.636083] ? check_flush_dependency+0x110/0x130 [ 485.641830] __warn.cold.12+0x20/0x2a [ 485.646405] ? check_flush_dependency+0x110/0x130 [ 485.652154] ? check_flush_dependency+0x110/0x130 [ 485.657900] report_bug+0xb8/0x100 [ 485.662187] ? sched_clock_cpu+0xc/0xb0 [ 485.666974] do_error_trap+0x9f/0xc0 [ 485.671464] do_invalid_op+0x36/0x40 [ 485.675950] ? check_flush_dependency+0x110/0x130 [ 485.681699] invalid_op+0x28/0x30 Fixes: 7da182a998d6 ("netfilter: flowtable: Use work entry per offload command") Reported-by: Marcelo Ricardo Leitner Signed-off-by: Roi Dayan Reviewed-by: Paul Blakey Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 3d4ca62c81f9..2276a73ccba2 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -1062,7 +1062,7 @@ static struct flow_indr_block_entry block_ing_entry = { int nf_flow_table_offload_init(void) { nf_flow_offload_wq = alloc_workqueue("nf_flow_table_offload", - WQ_UNBOUND | WQ_MEM_RECLAIM, 0); + WQ_UNBOUND, 0); if (!nf_flow_offload_wq) return -ENOMEM; -- cgit v1.2.3-59-g8ed1b From 54ab49fde95605a1077f759ce454d94e84b5ca45 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 10 May 2020 14:28:07 +0200 Subject: netfilter: conntrack: fix infinite loop on rmmod 'rmmod nf_conntrack' can hang forever, because the netns exit gets stuck in nf_conntrack_cleanup_net_list(): i_see_dead_people: busy = 0; list_for_each_entry(net, net_exit_list, exit_list) { nf_ct_iterate_cleanup(kill_all, net, 0, 0); if (atomic_read(&net->ct.count) != 0) busy = 1; } if (busy) { schedule(); goto i_see_dead_people; } When nf_ct_iterate_cleanup iterates the conntrack table, all nf_conn structures can be found twice: once for the original tuple and once for the conntracks reply tuple. get_next_corpse() only calls the iterator when the entry is in original direction -- the idea was to avoid unneeded invocations of the iterator callback. When support for clashing entries was added, the assumption that all nf_conn objects are added twice, once in original, once for reply tuple no longer holds -- NF_CLASH_BIT entries are only added in the non-clashing reply direction. Thus, if at least one NF_CLASH entry is in the list then nf_conntrack_cleanup_net_list() always skips it completely. During normal netns destruction, this causes a hang of several seconds, until the gc worker removes the entry (NF_CLASH entries always have a 1 second timeout). But in the rmmod case, the gc worker has already been stopped, so ct.count never becomes 0. We can fix this in two ways: 1. Add a second test for CLASH_BIT and call iterator for those entries as well, or: 2. Skip the original tuple direction and use the reply tuple. 2) is simpler, so do that. Fixes: 6a757c07e51f80ac ("netfilter: conntrack: allow insertion of clashing entries") Reported-by: Chen Yi Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0173398f4ced..1d57b95d3481 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2139,8 +2139,19 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data), nf_conntrack_lock(lockp); if (*bucket < nf_conntrack_htable_size) { hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) { - if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) + if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY) continue; + /* All nf_conn objects are added to hash table twice, one + * for original direction tuple, once for the reply tuple. + * + * Exception: In the IPS_NAT_CLASH case, only the reply + * tuple is added (the original tuple already existed for + * a different object). + * + * We only need to call the iterator once for each + * conntrack, so we just use the 'reply' direction + * tuple while iterating. + */ ct = nf_ct_tuplehash_to_ctrack(h); if (iter(ct, data)) goto found; -- cgit v1.2.3-59-g8ed1b From 995b819f291e872b191893a2e8b0f9c9d8a570d9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 11 May 2020 10:48:53 -0700 Subject: drm: fix trivial field description cut-and-paste error As reported by Amarnath Baliyase, the drm_mode_status enumeration documentation describes MODE_V_ILLEGAL as "mode has illegal horizontal timings". But that's just a cut-and-paste error from the previous line. The "V" stands for vertical, of course. I'm just fixing this directly rather than bothering with going through the proper channels. Less work for everybody. Reported-by: Amarnath Baliyase Signed-off-by: Linus Torvalds --- include/drm/drm_modes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/drm/drm_modes.h b/include/drm/drm_modes.h index 99134d4f35eb..320f8112a0f8 100644 --- a/include/drm/drm_modes.h +++ b/include/drm/drm_modes.h @@ -48,7 +48,7 @@ struct videomode; * @MODE_HSYNC: hsync out of range * @MODE_VSYNC: vsync out of range * @MODE_H_ILLEGAL: mode has illegal horizontal timings - * @MODE_V_ILLEGAL: mode has illegal horizontal timings + * @MODE_V_ILLEGAL: mode has illegal vertical timings * @MODE_BAD_WIDTH: requires an unsupported linepitch * @MODE_NOMODE: no mode with a matching name * @MODE_NO_INTERLACE: interlaced mode not supported -- cgit v1.2.3-59-g8ed1b From bc850943486887e3859597a266767f95db90aa72 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 6 May 2020 17:21:36 +0100 Subject: drm/i915: Propagate error from completed fences We need to preserve fatal errors from fences that are being terminated as we hook them up. Fixes: ef4688497512 ("drm/i915: Propagate fence errors") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200506162136.3325-1-chris@chris-wilson.co.uk (cherry picked from commit 24fe5f2ab2478053d50a3bc629ada895903a5cbc) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_request.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index c0df71d7d0ff..182a2995f674 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1017,8 +1017,10 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) GEM_BUG_ON(to == from); GEM_BUG_ON(to->timeline == from->timeline); - if (i915_request_completed(from)) + if (i915_request_completed(from)) { + i915_sw_fence_set_error_once(&to->submit, from->fence.error); return 0; + } if (to->engine->schedule) { ret = i915_sched_node_add_dependency(&to->sched, &from->sched); -- cgit v1.2.3-59-g8ed1b From a9d094dcf7845af85f82adcad9f793e51e4d14c8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 7 May 2020 16:51:09 +0100 Subject: drm/i915: Mark concurrent submissions with a weak-dependency We recorded the dependencies for WAIT_FOR_SUBMIT in order that we could correctly perform priority inheritance from the parallel branches to the common trunk. However, for the purpose of timeslicing and reset handling, the dependency is weak -- as we the pair of requests are allowed to run in parallel and not in strict succession. The real significance though is that this allows us to rearrange groups of WAIT_FOR_SUBMIT linked requests along the single engine, and so can resolve user level inter-batch scheduling dependencies from user semaphores. Fixes: c81471f5e95c ("drm/i915: Copy across scheduler behaviour flags across submit fences") Testcase: igt/gem_exec_fence/submit Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: # v5.6+ Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200507155109.8892-1-chris@chris-wilson.co.uk (cherry picked from commit 6b6cd2ebd8d071e55998e32b648bb8081f7f02bb) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_lrc.c | 3 +++ drivers/gpu/drm/i915/i915_request.c | 8 ++++++-- drivers/gpu/drm/i915/i915_scheduler.c | 6 +++--- drivers/gpu/drm/i915/i915_scheduler.h | 3 ++- drivers/gpu/drm/i915/i915_scheduler_types.h | 1 + 5 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 77420372d813..2dfaddb8811e 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1721,6 +1721,9 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl) struct i915_request *w = container_of(p->waiter, typeof(*w), sched); + if (p->flags & I915_DEPENDENCY_WEAK) + continue; + /* Leave semaphores spinning on the other engines */ if (w->engine != rq->engine) continue; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 182a2995f674..e2b78db685ea 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1023,7 +1023,9 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) } if (to->engine->schedule) { - ret = i915_sched_node_add_dependency(&to->sched, &from->sched); + ret = i915_sched_node_add_dependency(&to->sched, + &from->sched, + I915_DEPENDENCY_EXTERNAL); if (ret < 0) return ret; } @@ -1185,7 +1187,9 @@ __i915_request_await_execution(struct i915_request *to, /* Couple the dependency tree for PI on this exposed to->fence */ if (to->engine->schedule) { - err = i915_sched_node_add_dependency(&to->sched, &from->sched); + err = i915_sched_node_add_dependency(&to->sched, + &from->sched, + I915_DEPENDENCY_WEAK); if (err < 0) return err; } diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 68b06a7ba667..f0a9e8958ca0 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -456,7 +456,8 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node, } int i915_sched_node_add_dependency(struct i915_sched_node *node, - struct i915_sched_node *signal) + struct i915_sched_node *signal, + unsigned long flags) { struct i915_dependency *dep; @@ -465,8 +466,7 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node, return -ENOMEM; if (!__i915_sched_node_add_dependency(node, signal, dep, - I915_DEPENDENCY_EXTERNAL | - I915_DEPENDENCY_ALLOC)) + flags | I915_DEPENDENCY_ALLOC)) i915_dependency_free(dep); return 0; diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index d1dc4efef77b..6f0bf00fc569 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -34,7 +34,8 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node, unsigned long flags); int i915_sched_node_add_dependency(struct i915_sched_node *node, - struct i915_sched_node *signal); + struct i915_sched_node *signal, + unsigned long flags); void i915_sched_node_fini(struct i915_sched_node *node); diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index d18e70550054..7186875088a0 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -78,6 +78,7 @@ struct i915_dependency { unsigned long flags; #define I915_DEPENDENCY_ALLOC BIT(0) #define I915_DEPENDENCY_EXTERNAL BIT(1) +#define I915_DEPENDENCY_WEAK BIT(2) }; #endif /* _I915_SCHEDULER_TYPES_H_ */ -- cgit v1.2.3-59-g8ed1b From e7b146a8bfba50e263745bbdefc11833c3766664 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 7 May 2020 23:04:44 +0800 Subject: riscv: perf_event: Make some funciton static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following warning detected when running make with W=1, ../arch/riscv/kernel/perf_event.c:150:5: warning: no previous prototype for ‘riscv_map_cache_decode’ [-Wmissing-prototypes] int riscv_map_cache_decode(u64 config, unsigned int *type, ^~~~~~~~~~~~~~~~~~~~~~ ../arch/riscv/kernel/perf_event.c:345:13: warning: no previous prototype for ‘riscv_base_pmu_handle_irq’ [-Wmissing-prototypes] irqreturn_t riscv_base_pmu_handle_irq(int irq_num, void *dev) ^~~~~~~~~~~~~~~~~~~~~~~~~ ../arch/riscv/kernel/perf_event.c:364:6: warning: no previous prototype for ‘release_pmc_hardware’ [-Wmissing-prototypes] void release_pmc_hardware(void) ^~~~~~~~~~~~~~~~~~~~ ../arch/riscv/kernel/perf_event.c:467:12: warning: no previous prototype for ‘init_hw_perf_events’ [-Wmissing-prototypes] int __init init_hw_perf_events(void) ^~~~~~~~~~~~~~~~~~~ Signed-off-by: Kefeng Wang Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/perf_event.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c index 91626d9ae5f2..c835f0362d94 100644 --- a/arch/riscv/kernel/perf_event.c +++ b/arch/riscv/kernel/perf_event.c @@ -147,7 +147,7 @@ static int riscv_map_hw_event(u64 config) return riscv_pmu->hw_events[config]; } -int riscv_map_cache_decode(u64 config, unsigned int *type, +static int riscv_map_cache_decode(u64 config, unsigned int *type, unsigned int *op, unsigned int *result) { return -ENOENT; @@ -342,7 +342,7 @@ static void riscv_pmu_del(struct perf_event *event, int flags) static DEFINE_MUTEX(pmc_reserve_mutex); -irqreturn_t riscv_base_pmu_handle_irq(int irq_num, void *dev) +static irqreturn_t riscv_base_pmu_handle_irq(int irq_num, void *dev) { return IRQ_NONE; } @@ -361,7 +361,7 @@ static int reserve_pmc_hardware(void) return err; } -void release_pmc_hardware(void) +static void release_pmc_hardware(void) { mutex_lock(&pmc_reserve_mutex); if (riscv_pmu->irq >= 0) @@ -464,7 +464,7 @@ static const struct of_device_id riscv_pmu_of_ids[] = { { /* sentinel value */ } }; -int __init init_hw_perf_events(void) +static int __init init_hw_perf_events(void) { struct device_node *node = of_find_node_by_type(NULL, "pmu"); const struct of_device_id *of_id; -- cgit v1.2.3-59-g8ed1b From 9d82ccda2bc5c148060543d249d54f8703741bb4 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 8 May 2020 11:07:56 -0400 Subject: tools/bootconfig: Fix apply_xbc() to return zero on success The return of apply_xbc() returns the result of the last write() call, which is not what is expected. It should only return zero on success. Link: https://lore.kernel.org/r/20200508093059.GF9365@kadam Fixes: 8842604446d1 ("tools/bootconfig: Fix resource leak in apply_xbc()") Reported-by: Dan Carpenter Acked-by: Masami Hiramatsu Tested-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 001076c51712..0efaf45f7367 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -337,6 +337,7 @@ int apply_xbc(const char *path, const char *xbc_path) pr_err("Failed to apply a boot config magic: %d\n", ret); goto out; } + ret = 0; out: close(fd); free(data); -- cgit v1.2.3-59-g8ed1b From c781e1d4f3e23d5fc138dad2fd98ca0e0dd9c592 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 7 May 2020 14:14:03 -0500 Subject: net: ipa: set DMA length in gsi_trans_cmd_add() When a command gets added to a transaction for the AP->command channel we set the DMA address of its scatterlist entry, but not its DMA length. Fix this bug. Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- drivers/net/ipa/gsi_trans.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ipa/gsi_trans.c b/drivers/net/ipa/gsi_trans.c index 2fd21d75367d..bdbfeed359db 100644 --- a/drivers/net/ipa/gsi_trans.c +++ b/drivers/net/ipa/gsi_trans.c @@ -399,13 +399,14 @@ void gsi_trans_cmd_add(struct gsi_trans *trans, void *buf, u32 size, /* assert(which < trans->tre_count); */ /* Set the page information for the buffer. We also need to fill in - * the DMA address for the buffer (something dma_map_sg() normally - * does). + * the DMA address and length for the buffer (something dma_map_sg() + * normally does). */ sg = &trans->sgl[which]; sg_set_buf(sg, buf, size); sg_dma_address(sg) = addr; + sg_dma_len(sg) = sg->length; info = &trans->info[which]; info->opcode = opcode; -- cgit v1.2.3-59-g8ed1b From 2c4bb8093c3bfebc8113c683a5de83c64d43514c Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 7 May 2020 14:14:04 -0500 Subject: net: ipa: use tag process on modem crash One part of recovering from a modem crash is performing a "tag sequence" of several IPA immediate commands, to clear the hardware pipeline. The sequence ends with a data transfer request on the command endpoint (which is not otherwise done). Unfortunately, attempting to do the data transfer led to a hang, so that request plus two other commands were commented out. The previous commit fixes the bug that was causing that hang. And with that bug fixed we can properly issue the tag sequence when the modem crashes, to return the hardware to a known state. Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- drivers/net/ipa/ipa_cmd.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c index d226b858742d..cee417181f98 100644 --- a/drivers/net/ipa/ipa_cmd.c +++ b/drivers/net/ipa/ipa_cmd.c @@ -628,23 +628,15 @@ static void ipa_cmd_transfer_add(struct gsi_trans *trans, u16 size) void ipa_cmd_tag_process_add(struct gsi_trans *trans) { - ipa_cmd_register_write_add(trans, 0, 0, 0, true); -#if 1 - /* Reference these functions to avoid a compile error */ - (void)ipa_cmd_ip_packet_init_add; - (void)ipa_cmd_ip_tag_status_add; - (void) ipa_cmd_transfer_add; -#else struct ipa *ipa = container_of(trans->gsi, struct ipa, gsi); - struct gsi_endpoint *endpoint; + struct ipa_endpoint *endpoint; endpoint = ipa->name_map[IPA_ENDPOINT_AP_LAN_RX]; - ipa_cmd_ip_packet_init_add(trans, endpoint->endpoint_id); + ipa_cmd_register_write_add(trans, 0, 0, 0, true); + ipa_cmd_ip_packet_init_add(trans, endpoint->endpoint_id); ipa_cmd_ip_tag_status_add(trans, 0xcba987654321); - ipa_cmd_transfer_add(trans, 4); -#endif } /* Returns the number of commands required for the tag process */ -- cgit v1.2.3-59-g8ed1b From 8b1fac2e73e84ef0d6391051880a8e1d7044c847 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 10 May 2020 11:35:10 -0400 Subject: tracing: Wait for preempt irq delay thread to execute A bug report was posted that running the preempt irq delay module on a slow machine, and removing it quickly could lead to the thread created by the modlue to execute after the module is removed, and this could cause the kernel to crash. The fix for this was to call kthread_stop() after creating the thread to make sure it finishes before allowing the module to be removed. Now this caused the opposite problem on fast machines. What now happens is the kthread_stop() can cause the kthread never to execute and the test never to run. To fix this, add a completion and wait for the kthread to execute, then wait for it to end. This issue caused the ftracetest selftests to fail on the preemptirq tests. Link: https://lore.kernel.org/r/20200510114210.15d9e4af@oasis.local.home Cc: stable@vger.kernel.org Fixes: d16a8c31077e ("tracing: Wait for preempt irq delay thread to finish") Reviewed-by: Joel Fernandes (Google) Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/preemptirq_delay_test.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c index c4c86de63cf9..312d1a0ca3b6 100644 --- a/kernel/trace/preemptirq_delay_test.c +++ b/kernel/trace/preemptirq_delay_test.c @@ -16,6 +16,7 @@ #include #include #include +#include static ulong delay = 100; static char test_mode[12] = "irq"; @@ -28,6 +29,8 @@ MODULE_PARM_DESC(delay, "Period in microseconds (100 us default)"); MODULE_PARM_DESC(test_mode, "Mode of the test such as preempt, irq, or alternate (default irq)"); MODULE_PARM_DESC(burst_size, "The size of a burst (default 1)"); +static struct completion done; + #define MIN(x, y) ((x) < (y) ? (x) : (y)) static void busy_wait(ulong time) @@ -114,6 +117,8 @@ static int preemptirq_delay_run(void *data) for (i = 0; i < s; i++) (testfuncs[i])(i); + complete(&done); + set_current_state(TASK_INTERRUPTIBLE); while (!kthread_should_stop()) { schedule(); @@ -128,15 +133,18 @@ static int preemptirq_delay_run(void *data) static int preemptirq_run_test(void) { struct task_struct *task; - char task_name[50]; + init_completion(&done); + snprintf(task_name, sizeof(task_name), "%s_test", test_mode); task = kthread_run(preemptirq_delay_run, NULL, task_name); if (IS_ERR(task)) return PTR_ERR(task); - if (task) + if (task) { + wait_for_completion(&done); kthread_stop(task); + } return 0; } -- cgit v1.2.3-59-g8ed1b From 9e43342b464f1de570a3ad8256ac77645749ef45 Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Tue, 12 May 2020 14:15:24 +0800 Subject: ALSA: hda/realtek - Enable headset mic of ASUS GL503VM with ALC295 The ASUS laptop GL503VM with ALC295 can't detect the headset microphone. The headset microphone does not work until pin 0x19 is enabled for it. Signed-off-by: Chris Chiu Signed-off-by: Daniel Drake Signed-off-by: Jian-Hong Pan Link: https://lore.kernel.org/r/20200512061525.133985-1-jian-hong@endlessm.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 188ba94c5cee..6baee9fd23e3 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6094,6 +6094,7 @@ enum { ALC285_FIXUP_HP_MUTE_LED, ALC236_FIXUP_HP_MUTE_LED, ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, + ALC295_FIXUP_ASUS_MIC_NO_PRESENCE, }; static const struct hda_fixup alc269_fixups[] = { @@ -7240,6 +7241,15 @@ static const struct hda_fixup alc269_fixups[] = { { } }, }, + [ALC295_FIXUP_ASUS_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -8019,6 +8029,14 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60130}, {0x17, 0x90170110}, {0x21, 0x03211020}), + SND_HDA_PIN_QUIRK(0x10ec0295, 0x1043, "ASUS", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE, + {0x12, 0x90a60130}, + {0x17, 0x90170110}, + {0x21, 0x03211020}), + SND_HDA_PIN_QUIRK(0x10ec0295, 0x1043, "ASUS", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE, + {0x12, 0x90a60130}, + {0x17, 0x90170110}, + {0x21, 0x03211020}), SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, {0x14, 0x90170110}, {0x21, 0x04211020}), -- cgit v1.2.3-59-g8ed1b From ad97d667854c2fbce05a004e107f358ef4b04cf6 Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Tue, 12 May 2020 14:15:26 +0800 Subject: ALSA: hda/realtek - Enable headset mic of ASUS UX550GE with ALC295 The ASUS laptop UX550GE with ALC295 can't detect the headset microphone until ALC295_FIXUP_ASUS_MIC_NO_PRESENCE quirk applied. Signed-off-by: Jian-Hong Pan Signed-off-by: Daniel Drake Link: https://lore.kernel.org/r/20200512061525.133985-2-jian-hong@endlessm.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6baee9fd23e3..11d30c6a030e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8029,6 +8029,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60130}, {0x17, 0x90170110}, {0x21, 0x03211020}), + SND_HDA_PIN_QUIRK(0x10ec0295, 0x1043, "ASUS", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE, + {0x12, 0x90a60120}, + {0x17, 0x90170110}, + {0x21, 0x04211030}), SND_HDA_PIN_QUIRK(0x10ec0295, 0x1043, "ASUS", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE, {0x12, 0x90a60130}, {0x17, 0x90170110}, -- cgit v1.2.3-59-g8ed1b From 7900e81797613b92f855f9921392a7430cbdf88c Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Tue, 12 May 2020 14:15:28 +0800 Subject: ALSA: hda/realtek: Enable headset mic of ASUS UX581LV with ALC295 The ASUS UX581LV laptop's audio (1043:19e1) with ALC295 can't detect the headset microphone until ALC295_FIXUP_ASUS_MIC_NO_PRESENCE quirk applied. Signed-off-by: Jian-Hong Pan Link: https://lore.kernel.org/r/20200512061525.133985-3-jian-hong@endlessm.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 11d30c6a030e..f1cf2567d33d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7417,6 +7417,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x18f1, "Asus FX505DT", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x19ce, "ASUS B9450FA", ALC294_FIXUP_ASUS_HPE), + SND_PCI_QUIRK(0x1043, 0x19e1, "ASUS UX581LV", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW), SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC), -- cgit v1.2.3-59-g8ed1b From 1b94e59d30afecf18254ad413e953e7587645a20 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 May 2020 09:32:03 +0200 Subject: ALSA: hda/realtek - Add COEF workaround for ASUS ZenBook UX431DA ASUS ZenBook UX431DA requires an additional COEF setup when booted from the recent Windows 10, otherwise it produces the noisy output. The quirk turns on COEF 0x1b bit 10 that has been cleared supposedly due to the pop noise reduction. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=207553 Cc: Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20200512073203.14091-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f1cf2567d33d..4080b492ecc0 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6090,6 +6090,7 @@ enum { ALC294_FIXUP_ASUS_DUAL_SPK, ALC285_FIXUP_THINKPAD_HEADSET_JACK, ALC294_FIXUP_ASUS_HPE, + ALC294_FIXUP_ASUS_COEF_1B, ALC285_FIXUP_HP_GPIO_LED, ALC285_FIXUP_HP_MUTE_LED, ALC236_FIXUP_HP_MUTE_LED, @@ -7222,6 +7223,17 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC294_FIXUP_ASUS_HEADSET_MIC }, + [ALC294_FIXUP_ASUS_COEF_1B] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + /* Set bit 10 to correct noisy output after reboot from + * Windows 10 (due to pop noise reduction?) + */ + { 0x20, AC_VERB_SET_COEF_INDEX, 0x1b }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x4e4b }, + { } + }, + }, [ALC285_FIXUP_HP_GPIO_LED] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_hp_gpio_led, @@ -7420,6 +7432,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x19e1, "ASUS UX581LV", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW), SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B), SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), -- cgit v1.2.3-59-g8ed1b From 9ed81c8e0deb7bd2aa0d69371e4a0f9a7b31205d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 11 May 2020 11:54:31 +0200 Subject: netfilter: flowtable: set NF_FLOW_TEARDOWN flag on entry expiration If the flow timer expires, the gc sets on the NF_FLOW_TEARDOWN flag. Otherwise, the flowtable software path might race to refresh the timeout, leaving the state machine in inconsistent state. Fixes: c29f74e0df7a ("netfilter: nf_flow_table: hardware offload support") Reported-by: Paul Blakey Reviewed-by: Roi Dayan Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 4344e572b7f9..42da6e337276 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -284,7 +284,7 @@ static void flow_offload_del(struct nf_flowtable *flow_table, if (nf_flow_has_expired(flow)) flow_offload_fixup_ct(flow->ct); - else if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) + else flow_offload_fixup_ct_timeout(flow->ct); flow_offload_free(flow); @@ -361,8 +361,10 @@ static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data) { struct nf_flowtable *flow_table = data; - if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct) || - test_bit(NF_FLOW_TEARDOWN, &flow->flags)) { + if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct)) + set_bit(NF_FLOW_TEARDOWN, &flow->flags); + + if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) { if (test_bit(NF_FLOW_HW, &flow->flags)) { if (!test_bit(NF_FLOW_HW_DYING, &flow->flags)) nf_flow_offload_del(flow_table, flow); -- cgit v1.2.3-59-g8ed1b From 340eaff651160234bdbce07ef34b92a8e45cd540 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Mon, 11 May 2020 15:31:41 +0200 Subject: netfilter: nft_set_rbtree: Add missing expired checks Expired intervals would still match and be dumped to user space until garbage collection wiped them out. Make sure they stop matching and disappear (from users' perspective) as soon as they expire. Fixes: 8d8540c4f5e03 ("netfilter: nft_set_rbtree: add timeout support") Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_rbtree.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 3ffef454d469..62f416bc0579 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -79,6 +79,10 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set parent = rcu_dereference_raw(parent->rb_left); continue; } + + if (nft_set_elem_expired(&rbe->ext)) + return false; + if (nft_rbtree_interval_end(rbe)) { if (nft_set_is_anonymous(set)) return false; @@ -94,6 +98,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set if (set->flags & NFT_SET_INTERVAL && interval != NULL && nft_set_elem_active(&interval->ext, genmask) && + !nft_set_elem_expired(&interval->ext) && nft_rbtree_interval_start(interval)) { *ext = &interval->ext; return true; @@ -154,6 +159,9 @@ static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set, continue; } + if (nft_set_elem_expired(&rbe->ext)) + return false; + if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) || (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) == (flags & NFT_SET_ELEM_INTERVAL_END)) { @@ -170,6 +178,7 @@ static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set, if (set->flags & NFT_SET_INTERVAL && interval != NULL && nft_set_elem_active(&interval->ext, genmask) && + !nft_set_elem_expired(&interval->ext) && ((!nft_rbtree_interval_end(interval) && !(flags & NFT_SET_ELEM_INTERVAL_END)) || (nft_rbtree_interval_end(interval) && @@ -418,6 +427,8 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, if (iter->count < iter->skip) goto cont; + if (nft_set_elem_expired(&rbe->ext)) + goto cont; if (!nft_set_elem_active(&rbe->ext, iter->genmask)) goto cont; -- cgit v1.2.3-59-g8ed1b From 975f543e7522e17b8a4bf34d7daeac44819aee5a Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Thu, 7 May 2020 08:35:40 -0400 Subject: drm/amd/amdgpu: add raven1 part to the gfxoff quirk list On my raven1 system (rev c6) with VBIOS 113-RAVEN-114 GFXOFF is not stable (resulting in large block tiling noise in some applications). Disabling GFXOFF via the quirk list fixes the problems for me. Signed-off-by: Tom St Denis Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 0127f1ce16e6..d2d9dce68c2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1236,6 +1236,8 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 }, /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */ { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 }, + /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */ + { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 }, { 0, 0, 0, 0, 0 }, }; -- cgit v1.2.3-59-g8ed1b From 650e723cecf2738dee828564396f3239829aba83 Mon Sep 17 00:00:00 2001 From: "Leo (Hanghong) Ma" Date: Fri, 8 May 2020 14:18:07 -0400 Subject: drm/amd/amdgpu: Update update_config() logic [Why] For MST case: when update_config is called to disable a stream, this clears the settings for all the streams on that link. We should only clear the settings for the stream that was disabled. [How] Clear the settings after the call to remove display is called. Reviewed-by: Harry Wentland Reviewed-by: Bhawanpreet Lakha Signed-off-by: Leo (Hanghong) Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 78e1c11d4ae5..dcf84a61de37 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -398,15 +398,15 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) struct mod_hdcp_display *display = &hdcp_work[link_index].display; struct mod_hdcp_link *link = &hdcp_work[link_index].link; - memset(display, 0, sizeof(*display)); - memset(link, 0, sizeof(*link)); - - display->index = aconnector->base.index; - if (config->dpms_off) { hdcp_remove_display(hdcp_work, link_index, aconnector); return; } + + memset(display, 0, sizeof(*display)); + memset(link, 0, sizeof(*link)); + + display->index = aconnector->base.index; display->state = MOD_HDCP_DISPLAY_ACTIVE; if (aconnector->dc_sink != NULL) -- cgit v1.2.3-59-g8ed1b From 611d0a95d46b0977a530b4d538948c69d447b001 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 11 May 2020 10:39:24 +0900 Subject: bootconfig: Fix to prevent warning message if no bootconfig option Commit de462e5f1071 ("bootconfig: Fix to remove bootconfig data from initrd while boot") causes a cosmetic regression on dmesg, which warns "no bootconfig data" message without bootconfig cmdline option. Fix setup_boot_config() by moving no bootconfig check after commandline option check. Link: http://lkml.kernel.org/r/9b1ba335-071d-c983-89a4-2677b522dcc8@molgen.mpg.de Link: http://lkml.kernel.org/r/158916116468.21787.14558782332170588206.stgit@devnote2 Fixes: de462e5f1071 ("bootconfig: Fix to remove bootconfig data from initrd while boot") Reported-by: Paul Menzel Reviewed-by: Paul Menzel Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- init/main.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/init/main.c b/init/main.c index 1a5da2c2660c..5803ecb411ab 100644 --- a/init/main.c +++ b/init/main.c @@ -400,9 +400,8 @@ static void __init setup_boot_config(const char *cmdline) char *data, *copy; int ret; + /* Cut out the bootconfig data even if we have no bootconfig option */ data = get_boot_config_from_initrd(&size, &csum); - if (!data) - goto not_found; strlcpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE); parse_args("bootconfig", tmp_cmdline, NULL, 0, 0, 0, NULL, @@ -411,6 +410,11 @@ static void __init setup_boot_config(const char *cmdline) if (!bootconfig_found) return; + if (!data) { + pr_err("'bootconfig' found on command line, but no bootconfig found\n"); + return; + } + if (size >= XBC_DATA_MAX) { pr_err("bootconfig size %d greater than max size %d\n", size, XBC_DATA_MAX); @@ -446,8 +450,6 @@ static void __init setup_boot_config(const char *cmdline) extra_init_args = xbc_make_cmdline("init"); } return; -not_found: - pr_err("'bootconfig' found on command line, but no bootconfig found\n"); } #else -- cgit v1.2.3-59-g8ed1b From bb43c8e382e5da0ee253e3105d4099820ff4d922 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Sun, 26 Apr 2020 00:35:45 +0100 Subject: RDMA/rxe: Always return ERR_PTR from rxe_create_mmap_info() The commit below modified rxe_create_mmap_info() to return ERR_PTR's but didn't update the callers to handle them. Modify rxe_create_mmap_info() to only return ERR_PTR and fix all error checking after rxe_create_mmap_info() is called. Ensure that all other exit paths properly set the error return. Fixes: ff23dfa13457 ("IB: Pass only ib_udata in function prototypes") Link: https://lore.kernel.org/r/20200425233545.17210-1-sudipm.mukherjee@gmail.com Link: https://lore.kernel.org/r/20200511183742.GB225608@mwanda Cc: stable@vger.kernel.org [5.4+] Signed-off-by: Sudip Mukherjee Signed-off-by: Dan Carpenter Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mmap.c | 2 +- drivers/infiniband/sw/rxe/rxe_queue.c | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c index 48f48122ddcb..6a413d73b95d 100644 --- a/drivers/infiniband/sw/rxe/rxe_mmap.c +++ b/drivers/infiniband/sw/rxe/rxe_mmap.c @@ -151,7 +151,7 @@ struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe, u32 size, ip = kmalloc(sizeof(*ip), GFP_KERNEL); if (!ip) - return NULL; + return ERR_PTR(-ENOMEM); size = PAGE_ALIGN(size); diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c index ff92704de32f..245040c3a35d 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.c +++ b/drivers/infiniband/sw/rxe/rxe_queue.c @@ -45,12 +45,15 @@ int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf, if (outbuf) { ip = rxe_create_mmap_info(rxe, buf_size, udata, buf); - if (!ip) + if (IS_ERR(ip)) { + err = PTR_ERR(ip); goto err1; + } - err = copy_to_user(outbuf, &ip->info, sizeof(ip->info)); - if (err) + if (copy_to_user(outbuf, &ip->info, sizeof(ip->info))) { + err = -EFAULT; goto err2; + } spin_lock_bh(&rxe->pending_lock); list_add(&ip->pending_mmaps, &rxe->pending_mmaps); @@ -64,7 +67,7 @@ int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf, err2: kfree(ip); err1: - return -EINVAL; + return err; } inline void rxe_queue_reset(struct rxe_queue *q) -- cgit v1.2.3-59-g8ed1b From 6693ca95bd4330a0ad7326967e1f9bcedd6b0800 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sun, 26 Apr 2020 10:59:21 +0300 Subject: IB/mlx4: Test return value of calls to ib_get_cached_pkey In the mlx4_ib_post_send() flow, some functions call ib_get_cached_pkey() without checking its return value. If ib_get_cached_pkey() returns an error code, these functions should return failure. Fixes: 1ffeb2eb8be9 ("IB/mlx4: SR-IOV IB context objects and proxy/tunnel SQP support") Fixes: 225c7b1feef1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters") Fixes: e622f2f4ad21 ("IB: split struct ib_send_wr") Link: https://lore.kernel.org/r/20200426075921.130074-1-leon@kernel.org Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/qp.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 2f9f78912267..cf51e3cbd969 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2891,6 +2891,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, int send_size; int header_size; int spc; + int err; int i; if (wr->wr.opcode != IB_WR_SEND) @@ -2925,7 +2926,9 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, sqp->ud_header.lrh.virtual_lane = 0; sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); - ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey); + err = ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey); + if (err) + return err; sqp->ud_header.bth.pkey = cpu_to_be16(pkey); if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER) sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn); @@ -3212,9 +3215,14 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, } sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); if (!sqp->qp.ibqp.qp_num) - ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); + err = ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, + &pkey); else - ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index, &pkey); + err = ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index, + &pkey); + if (err) + return err; + sqp->ud_header.bth.pkey = cpu_to_be16(pkey); sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); -- cgit v1.2.3-59-g8ed1b From 856ec7f64688387b100b7083cdf480ce3ac41227 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2020 16:15:11 +0300 Subject: IB/i40iw: Remove bogus call to netdev_master_upper_dev_get() Local variable netdev is not used in these calls. It should be noted, that this change is required to work in bonded mode. Otherwise we would get the following assert: "RTNL: assertion failed at net/core/dev.c (5665)" With the calltrace as follows: dump_stack+0x19/0x1b netdev_master_upper_dev_get+0x61/0x70 i40iw_addr_resolve_neigh+0x1e8/0x220 i40iw_make_cm_node+0x296/0x700 ? i40iw_find_listener.isra.10+0xcc/0x110 i40iw_receive_ilq+0x3d4/0x810 i40iw_puda_poll_completion+0x341/0x420 i40iw_process_ceq+0xa5/0x280 i40iw_ceq_dpc+0x1e/0x40 tasklet_action+0x83/0x140 __do_softirq+0x125/0x2bb call_softirq+0x1c/0x30 do_softirq+0x65/0xa0 irq_exit+0x105/0x110 do_IRQ+0x56/0xf0 common_interrupt+0x16a/0x16a ? cpuidle_enter_state+0x57/0xd0 cpuidle_idle_call+0xde/0x230 arch_cpu_idle+0xe/0xc0 cpu_startup_entry+0x14a/0x1e0 start_secondary+0x1f7/0x270 start_cpu+0x5/0x14 Link: https://lore.kernel.org/r/20200428131511.11049-1-den@openvz.org Signed-off-by: Denis V. Lunev Acked-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index bb78d3280acc..fa7a5ff498c7 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -1987,7 +1987,6 @@ static int i40iw_addr_resolve_neigh(struct i40iw_device *iwdev, struct rtable *rt; struct neighbour *neigh; int rc = arpindex; - struct net_device *netdev = iwdev->netdev; __be32 dst_ipaddr = htonl(dst_ip); __be32 src_ipaddr = htonl(src_ip); @@ -1997,9 +1996,6 @@ static int i40iw_addr_resolve_neigh(struct i40iw_device *iwdev, return rc; } - if (netif_is_bond_slave(netdev)) - netdev = netdev_master_upper_dev_get(netdev); - neigh = dst_neigh_lookup(&rt->dst, &dst_ipaddr); rcu_read_lock(); @@ -2065,7 +2061,6 @@ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev, { struct neighbour *neigh; int rc = arpindex; - struct net_device *netdev = iwdev->netdev; struct dst_entry *dst; struct sockaddr_in6 dst_addr; struct sockaddr_in6 src_addr; @@ -2086,9 +2081,6 @@ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev, return rc; } - if (netif_is_bond_slave(netdev)) - netdev = netdev_master_upper_dev_get(netdev); - neigh = dst_neigh_lookup(dst, dst_addr.sin6_addr.in6_u.u6_addr32); rcu_read_lock(); -- cgit v1.2.3-59-g8ed1b From fa8dac3968635dec8518a13ac78d662f2aa88e4d Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 4 May 2020 09:09:17 -0400 Subject: IB/hfi1: Fix another case where pq is left on waitlist The commit noted below fixed a case where a pq is left on the sdma wait list. It however missed another case. user_sdma_send_pkts() has two calls from hfi1_user_sdma_process_request(). If the first one fails as indicated by -EBUSY, the pq will be placed on the waitlist as by design. If the second call then succeeds, the pq is still on the waitlist setting up a race with the interrupt handler if a subsequent request uses a different SDMA engine Fix by deleting the first call. The use of pcount and the intent to send a short burst of packets followed by the larger balance of packets was never correctly implemented, because the two calls always send pcount packets no matter what. A subsequent patch will correct that issue. Fixes: 9a293d1e21a6 ("IB/hfi1: Ensure pq is not left on waitlist") Link: https://lore.kernel.org/r/20200504130917.175613.43231.stgit@awfm-01.aw.intel.com Cc: Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/user_sdma.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 13e4203497b3..a92346e88628 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -589,10 +589,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, set_comp_state(pq, cq, info.comp_idx, QUEUED, 0); pq->state = SDMA_PKT_Q_ACTIVE; - /* Send the first N packets in the request to buy us some time */ - ret = user_sdma_send_pkts(req, pcount); - if (unlikely(ret < 0 && ret != -EBUSY)) - goto free_req; /* * This is a somewhat blocking send implementation. -- cgit v1.2.3-59-g8ed1b From 1901b91f99821955eac2bd48fe25ee983385dc00 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 7 May 2020 10:10:12 +0300 Subject: IB/core: Fix potential NULL pointer dereference in pkey cache The IB core pkey cache is populated by procedure ib_cache_update(). Initially, the pkey cache pointer is NULL. ib_cache_update allocates a buffer and populates it with the device's pkeys, via repeated calls to procedure ib_query_pkey(). If there is a failure in populating the pkey buffer via ib_query_pkey(), ib_cache_update does not replace the old pkey buffer cache with the updated one -- it leaves the old cache as is. Since initially the pkey buffer cache is NULL, when calling ib_cache_update the first time, a failure in ib_query_pkey() will cause the pkey buffer cache pointer to remain NULL. In this situation, any calls subsequent to ib_get_cached_pkey(), ib_find_cached_pkey(), or ib_find_cached_pkey_exact() will try to dereference the NULL pkey cache pointer, causing a kernel panic. Fix this by checking the ib_cache_update() return value. Fixes: 8faea9fd4a39 ("RDMA/cache: Move the cache per-port data into the main ib_port_data") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Link: https://lore.kernel.org/r/20200507071012.100594-1-leon@kernel.org Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 717b798cddad..a670209bbce6 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1553,8 +1553,11 @@ int ib_cache_setup_one(struct ib_device *device) if (err) return err; - rdma_for_each_port (device, p) - ib_cache_update(device, p, true); + rdma_for_each_port (device, p) { + err = ib_cache_update(device, p, true); + if (err) + return err; + } return 0; } -- cgit v1.2.3-59-g8ed1b From 50bbe3d34fea74b7c0fabe553c40c2f4a48bb9c3 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 7 May 2020 09:29:42 +0300 Subject: RDMA/core: Fix double put of resource Do not decrease the reference count of resource tracker object twice in the error flow of res_get_common_doit. Fixes: c5dfe0ea6ffa ("RDMA/nldev: Add resource tracker doit callback") Link: https://lore.kernel.org/r/20200507062942.98305-1-leon@kernel.org Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/nldev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 9eec26d10d7b..e16105be2eb2 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1292,11 +1292,10 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN); ret = fill_func(msg, has_cap_net_admin, res, port); - - rdma_restrack_put(res); if (ret) goto err_free; + rdma_restrack_put(res); nlmsg_end(msg, nlh); ib_device_put(device); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); -- cgit v1.2.3-59-g8ed1b From c8b1f340e54158662acfa41d6dee274846370282 Mon Sep 17 00:00:00 2001 From: Potnuri Bharat Teja Date: Tue, 12 May 2020 00:26:08 +0530 Subject: RDMA/iw_cxgb4: Fix incorrect function parameters While reading the TCB field in t4_tcb_get_field32() the wrong mask is passed as a parameter which leads the driver eventually to a kernel panic/app segfault from access to an illegal SRQ index while flushing the SRQ completions during connection teardown. Fixes: 11a27e2121a5 ("iw_cxgb4: complete the cached SRQ buffers") Link: https://lore.kernel.org/r/20200511185608.5202-1-bharat@chelsio.com Signed-off-by: Potnuri Bharat Teja Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index d69dece3b1d5..30e08bcc9afb 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2891,8 +2891,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) srqidx = ABORT_RSS_SRQIDX_G( be32_to_cpu(req->srqidx_status)); if (srqidx) { - complete_cached_srq_buffers(ep, - req->srqidx_status); + complete_cached_srq_buffers(ep, srqidx); } else { /* Hold ep ref until finish_peer_abort() */ c4iw_get_ep(&ep->com); @@ -3878,8 +3877,8 @@ static int read_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } - ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_W, - TCB_RQ_START_S); + ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_M, + TCB_RQ_START_S); cleanup: pr_debug("ep %p tid %u %016x\n", ep, ep->hwtid, ep->srqe_idx); -- cgit v1.2.3-59-g8ed1b From 64d950ae0b01eae96eb668b789c6d145c38ac41c Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Mon, 11 May 2020 09:24:42 -0700 Subject: mptcp: Initialize map_seq upon subflow establishment When the other MPTCP-peer uses 32-bit data-sequence numbers, we rely on map_seq to indicate how to expand to a 64-bit data-sequence number in expand_seq() when receiving data. For new subflows, this field is not initialized, thus results in an "invalid" mapping being discarded. Fix this by initializing map_seq upon subflow establishment time. Fixes: f296234c98a8 ("mptcp: Add handling of incoming MP_JOIN requests") Signed-off-by: Christoph Paasch Reviewed-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e1f23016ed3f..32ea8d35489a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1629,6 +1629,8 @@ bool mptcp_finish_join(struct sock *sk) ret = mptcp_pm_allow_new_subflow(msk); if (ret) { + subflow->map_seq = msk->ack_seq; + /* active connections are already on conn_list */ spin_lock_bh(&msk->join_list_lock); if (!WARN_ON_ONCE(!list_empty(&subflow->node))) -- cgit v1.2.3-59-g8ed1b From 2c864c78c2386ada7433268cdfa8cb77cfe31bf3 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 11 May 2020 14:02:15 -0700 Subject: ptp: fix struct member comment for do_aux_work The do_aux_work callback had documentation in the structure comment which referred to it as "do_work". Signed-off-by: Jacob Keller Cc: Richard Cochran Acked-by: Richard Cochran Signed-off-by: David S. Miller --- include/linux/ptp_clock_kernel.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 121a7eda4593..c602670bbffb 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -105,10 +105,10 @@ struct ptp_system_timestamp { * parameter func: the desired function to use. * parameter chan: the function channel index to use. * - * @do_work: Request driver to perform auxiliary (periodic) operations - * Driver should return delay of the next auxiliary work scheduling - * time (>=0) or negative value in case further scheduling - * is not required. + * @do_aux_work: Request driver to perform auxiliary (periodic) operations + * Driver should return delay of the next auxiliary work + * scheduling time (>=0) or negative value in case further + * scheduling is not required. * * Drivers should embed their ptp_clock_info within a private * structure, obtaining a reference to it using container_of(). -- cgit v1.2.3-59-g8ed1b From f20a4d404122b183b7d66cadea917fcd1340c05b Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 11 May 2020 14:04:44 -0700 Subject: ionic: leave netdev mac alone after fw-upgrade When running in a bond setup, or some other potential configurations, the netdev mac may have been changed from the default device mac. Since the userland doesn't know about the changes going on under the covers in a fw-upgrade it doesn't know the re-push the mac filter. The driver needs to leave the netdev mac filter alone when rebuilding after the fw-upgrade. Fixes: c672412f6172 ("ionic: remove lifs on fw reset") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index d5293bfded29..f8c626444da0 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -2348,7 +2348,17 @@ static int ionic_station_set(struct ionic_lif *lif) if (is_zero_ether_addr(ctx.comp.lif_getattr.mac)) return 0; - if (!ether_addr_equal(ctx.comp.lif_getattr.mac, netdev->dev_addr)) { + if (!is_zero_ether_addr(netdev->dev_addr)) { + /* If the netdev mac is non-zero and doesn't match the default + * device address, it was set by something earlier and we're + * likely here again after a fw-upgrade reset. We need to be + * sure the netdev mac is in our filter list. + */ + if (!ether_addr_equal(ctx.comp.lif_getattr.mac, + netdev->dev_addr)) + ionic_lif_addr(lif, netdev->dev_addr, true); + } else { + /* Update the netdev mac with the device's mac */ memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len); addr.sa_family = AF_INET; err = eth_prepare_mac_addr_change(netdev, &addr); @@ -2358,12 +2368,6 @@ static int ionic_station_set(struct ionic_lif *lif) return 0; } - if (!is_zero_ether_addr(netdev->dev_addr)) { - netdev_dbg(lif->netdev, "deleting station MAC addr %pM\n", - netdev->dev_addr); - ionic_lif_addr(lif, netdev->dev_addr, false); - } - eth_commit_mac_addr_change(netdev, &addr); } -- cgit v1.2.3-59-g8ed1b From ddc5911b9bd228dd69de976a4545292420f45e73 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 11 May 2020 14:04:45 -0700 Subject: ionic: call ionic_port_init after fw-upgrade Since the fw has been re-inited, we need to refresh the port information dma address so we can see fresh port information. Let's call ionic_port_init again, and tweak it to allow for a call to simply refresh the existing dma address. Fixes: c672412f6172 ("ionic: remove lifs on fw reset") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 1 + drivers/net/ethernet/pensando/ionic/ionic_main.c | 18 +++++++++--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index f8c626444da0..f8a9c1bcffc9 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -2118,6 +2118,7 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif) dev_info(ionic->dev, "FW Up: restarting LIFs\n"); ionic_init_devinfo(ionic); + ionic_port_init(ionic); err = ionic_qcqs_alloc(lif); if (err) goto err_out; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index 588c62e9add7..3344bc1f7671 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -509,16 +509,16 @@ int ionic_port_init(struct ionic *ionic) size_t sz; int err; - if (idev->port_info) - return 0; - - idev->port_info_sz = ALIGN(sizeof(*idev->port_info), PAGE_SIZE); - idev->port_info = dma_alloc_coherent(ionic->dev, idev->port_info_sz, - &idev->port_info_pa, - GFP_KERNEL); if (!idev->port_info) { - dev_err(ionic->dev, "Failed to allocate port info, aborting\n"); - return -ENOMEM; + idev->port_info_sz = ALIGN(sizeof(*idev->port_info), PAGE_SIZE); + idev->port_info = dma_alloc_coherent(ionic->dev, + idev->port_info_sz, + &idev->port_info_pa, + GFP_KERNEL); + if (!idev->port_info) { + dev_err(ionic->dev, "Failed to allocate port info\n"); + return -ENOMEM; + } } sz = min(sizeof(ident->port.config), sizeof(idev->dev_cmd_regs->data)); -- cgit v1.2.3-59-g8ed1b From 92db978f0d686468e527d49268e7c7e8d97d334b Mon Sep 17 00:00:00 2001 From: Clay McClure Date: Tue, 12 May 2020 13:02:30 +0300 Subject: net: ethernet: ti: Remove TI_CPTS_MOD workaround My recent commit b6d49cab44b5 ("net: Make PTP-specific drivers depend on PTP_1588_CLOCK") exposes a missing dependency in defconfigs that select TI_CPTS without selecting PTP_1588_CLOCK, leading to linker errors of the form: drivers/net/ethernet/ti/cpsw.o: in function `cpsw_ndo_stop': cpsw.c:(.text+0x680): undefined reference to `cpts_unregister' ... That's because TI_CPTS_MOD (which is the symbol gating the _compilation_ of cpts.c) now depends on PTP_1588_CLOCK, and so is not enabled in these configurations, but TI_CPTS (which is the symbol gating _calls_ to the cpts functions) _is_ enabled. So we end up compiling calls to functions that don't exist, resulting in the linker errors. This patch fixes build errors and restores previous behavior by: - ensure PTP_1588_CLOCK=y in TI specific configs and CPTS will be built - remove TI_CPTS_MOD and, instead, add dependencies from CPTS in TI_CPSW/TI_KEYSTONE_NETCP/TI_CPSW_SWITCHDEV as below: config TI_CPSW_SWITCHDEV ... depends on TI_CPTS || !TI_CPTS which will ensure proper dependencies PTP_1588_CLOCK -> TI_CPTS -> TI_CPSW/TI_KEYSTONE_NETCP/TI_CPSW_SWITCHDEV and build type selection. Note. For NFS boot + CPTS all of above configs have to be built-in. Cc: Arnd Bergmann Cc: Dan Murphy Cc: Tony Lindgren Fixes: b6d49cab44b5 ("net: Make PTP-specific drivers depend on PTP_1588_CLOCK") Reported-by: kbuild test robot Signed-off-by: Clay McClure [grygorii.strashko@ti.com: rewording, add deps cpsw/netcp from cpts, drop IS_REACHABLE] Signed-off-by: Grygorii Strashko Reviewed-by: Arnd Bergmann Tested-by: Tony Lindgren Signed-off-by: David S. Miller --- arch/arm/configs/keystone_defconfig | 1 + arch/arm/configs/omap2plus_defconfig | 1 + drivers/net/ethernet/ti/Kconfig | 16 ++++++---------- drivers/net/ethernet/ti/Makefile | 2 +- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig index 11e2211f9007..84a3b055f253 100644 --- a/arch/arm/configs/keystone_defconfig +++ b/arch/arm/configs/keystone_defconfig @@ -147,6 +147,7 @@ CONFIG_I2C_DAVINCI=y CONFIG_SPI=y CONFIG_SPI_DAVINCI=y CONFIG_SPI_SPIDEV=y +CONFIG_PTP_1588_CLOCK=y CONFIG_PINCTRL_SINGLE=y CONFIG_GPIOLIB=y CONFIG_GPIO_SYSFS=y diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 3cc3ca5fa027..8b83d4a5d309 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -274,6 +274,7 @@ CONFIG_SPI_TI_QSPI=m CONFIG_HSI=m CONFIG_OMAP_SSI=m CONFIG_SSI_PROTOCOL=m +CONFIG_PTP_1588_CLOCK=y CONFIG_PINCTRL_SINGLE=y CONFIG_DEBUG_GPIO=y CONFIG_GPIO_SYSFS=y diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index 8e348780efb6..62f809b67469 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -49,6 +49,7 @@ config TI_CPSW_PHY_SEL config TI_CPSW tristate "TI CPSW Switch Support" depends on ARCH_DAVINCI || ARCH_OMAP2PLUS || COMPILE_TEST + depends on TI_CPTS || !TI_CPTS select TI_DAVINCI_MDIO select MFD_SYSCON select PAGE_POOL @@ -64,6 +65,7 @@ config TI_CPSW_SWITCHDEV tristate "TI CPSW Switch Support with switchdev" depends on ARCH_DAVINCI || ARCH_OMAP2PLUS || COMPILE_TEST depends on NET_SWITCHDEV + depends on TI_CPTS || !TI_CPTS select PAGE_POOL select TI_DAVINCI_MDIO select MFD_SYSCON @@ -77,23 +79,16 @@ config TI_CPSW_SWITCHDEV will be called cpsw_new. config TI_CPTS - bool "TI Common Platform Time Sync (CPTS) Support" - depends on TI_CPSW || TI_KEYSTONE_NETCP || TI_CPSW_SWITCHDEV || COMPILE_TEST + tristate "TI Common Platform Time Sync (CPTS) Support" + depends on ARCH_OMAP2PLUS || ARCH_KEYSTONE || COMPILE_TEST depends on COMMON_CLK - depends on POSIX_TIMERS + depends on PTP_1588_CLOCK ---help--- This driver supports the Common Platform Time Sync unit of the CPSW Ethernet Switch and Keystone 2 1g/10g Switch Subsystem. The unit can time stamp PTP UDP/IPv4 and Layer 2 packets, and the driver offers a PTP Hardware Clock. -config TI_CPTS_MOD - tristate - depends on TI_CPTS - depends on PTP_1588_CLOCK - default y if TI_CPSW=y || TI_KEYSTONE_NETCP=y || TI_CPSW_SWITCHDEV=y - default m - config TI_K3_AM65_CPSW_NUSS tristate "TI K3 AM654x/J721E CPSW Ethernet driver" depends on ARCH_K3 && OF && TI_K3_UDMA_GLUE_LAYER @@ -114,6 +109,7 @@ config TI_KEYSTONE_NETCP select TI_DAVINCI_MDIO depends on OF depends on KEYSTONE_NAVIGATOR_DMA && KEYSTONE_NAVIGATOR_QMSS + depends on TI_CPTS || !TI_CPTS ---help--- This driver supports TI's Keystone NETCP Core. diff --git a/drivers/net/ethernet/ti/Makefile b/drivers/net/ethernet/ti/Makefile index 53792190e9c2..cb26a9d21869 100644 --- a/drivers/net/ethernet/ti/Makefile +++ b/drivers/net/ethernet/ti/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_TI_DAVINCI_EMAC) += ti_davinci_emac.o ti_davinci_emac-y := davinci_emac.o davinci_cpdma.o obj-$(CONFIG_TI_DAVINCI_MDIO) += davinci_mdio.o obj-$(CONFIG_TI_CPSW_PHY_SEL) += cpsw-phy-sel.o -obj-$(CONFIG_TI_CPTS_MOD) += cpts.o +obj-$(CONFIG_TI_CPTS) += cpts.o obj-$(CONFIG_TI_CPSW) += ti_cpsw.o ti_cpsw-y := cpsw.o davinci_cpdma.o cpsw_ale.o cpsw_priv.o cpsw_sl.o cpsw_ethtool.o obj-$(CONFIG_TI_CPSW_SWITCHDEV) += ti_cpsw_new.o -- cgit v1.2.3-59-g8ed1b From 24adbc1676af4e134e709ddc7f34cf2adc2131e4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 May 2020 06:54:30 -0700 Subject: tcp: fix SO_RCVLOWAT hangs with fat skbs We autotune rcvbuf whenever SO_RCVLOWAT is set to account for 100% overhead in tcp_set_rcvlowat() This works well when skb->len/skb->truesize ratio is bigger than 0.5 But if we receive packets with small MSS, we can end up in a situation where not enough bytes are available in the receive queue to satisfy RCVLOWAT setting. As our sk_rcvbuf limit is hit, we send zero windows in ACK packets, preventing remote peer from sending more data. Even autotuning does not help, because it only triggers at the time user process drains the queue. If no EPOLLIN is generated, this can not happen. Note poll() has a similar issue, after commit c7004482e8dc ("tcp: Respect SO_RCVLOWAT in tcp_poll().") Fixes: 03f45c883c6f ("tcp: avoid extra wakeups for SO_RCVLOWAT users") Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/net/tcp.h | 13 +++++++++++++ net/ipv4/tcp.c | 14 +++++++++++--- net/ipv4/tcp_input.c | 3 ++- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 64f84683feae..6f8e60c6fbc7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1420,6 +1420,19 @@ static inline int tcp_full_space(const struct sock *sk) return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); } +/* We provision sk_rcvbuf around 200% of sk_rcvlowat. + * If 87.5 % (7/8) of the space has been consumed, we want to override + * SO_RCVLOWAT constraint, since we are receiving skbs with too small + * len/truesize ratio. + */ +static inline bool tcp_rmem_pressure(const struct sock *sk) +{ + int rcvbuf = READ_ONCE(sk->sk_rcvbuf); + int threshold = rcvbuf - (rcvbuf >> 3); + + return atomic_read(&sk->sk_rmem_alloc) > threshold; +} + extern void tcp_openreq_init_rwin(struct request_sock *req, const struct sock *sk_listener, const struct dst_entry *dst); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e72bd651d21a..a385fcaaa03b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -476,9 +476,17 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags) static inline bool tcp_stream_is_readable(const struct tcp_sock *tp, int target, struct sock *sk) { - return (READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq) >= target) || - (sk->sk_prot->stream_memory_read ? - sk->sk_prot->stream_memory_read(sk) : false); + int avail = READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq); + + if (avail > 0) { + if (avail >= target) + return true; + if (tcp_rmem_pressure(sk)) + return true; + } + if (sk->sk_prot->stream_memory_read) + return sk->sk_prot->stream_memory_read(sk); + return false; } /* diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b996dc1069c5..29c6fc8c7716 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4757,7 +4757,8 @@ void tcp_data_ready(struct sock *sk) const struct tcp_sock *tp = tcp_sk(sk); int avail = tp->rcv_nxt - tp->copied_seq; - if (avail < sk->sk_rcvlowat && !sock_flag(sk, SOCK_DONE)) + if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) && + !sock_flag(sk, SOCK_DONE)) return; sk->sk_data_ready(sk); -- cgit v1.2.3-59-g8ed1b From c485b19d52c4ba269dfd027945dee81755fdd530 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 7 May 2020 09:33:47 +0300 Subject: RDMA/uverbs: Do not discard the IB_EVENT_DEVICE_FATAL event The commit below moved all of the destruction to the disassociate step and cleaned up the event channel during destroy_uobj. However, when ib_uverbs_free_hw_resources() pushes IB_EVENT_DEVICE_FATAL and then immediately goes to destroy all uobjects this causes ib_uverbs_free_event_queue() to discard the queued event if userspace hasn't already read() it. Unlike all other event queues async FD needs to defer the ib_uverbs_free_event_queue() until FD release. This still unregisters the handler from the IB device during disassociation. Fixes: 3e032c0e92aa ("RDMA/core: Make ib_uverbs_async_event_file into a uobject") Link: https://lore.kernel.org/r/20200507063348.98713-2-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 3 ++- drivers/infiniband/core/uverbs.h | 1 + drivers/infiniband/core/uverbs_main.c | 2 +- .../infiniband/core/uverbs_std_types_async_fd.c | 26 +++++++++++++++++++++- 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 177333d8bcda..bf8e149d3191 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -459,7 +459,8 @@ alloc_begin_fd_uobject(const struct uverbs_api_object *obj, struct ib_uobject *uobj; struct file *filp; - if (WARN_ON(fd_type->fops->release != &uverbs_uobject_fd_release)) + if (WARN_ON(fd_type->fops->release != &uverbs_uobject_fd_release && + fd_type->fops->release != &uverbs_async_event_release)) return ERR_PTR(-EINVAL); new_fd = get_unused_fd_flags(O_CLOEXEC); diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 7df71983212d..2673cb1cd655 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -219,6 +219,7 @@ void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue); void ib_uverbs_init_async_event_file(struct ib_uverbs_async_event_file *ev_file); void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue); void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res); +int uverbs_async_event_release(struct inode *inode, struct file *filp); int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs); int ib_init_ucontext(struct uverbs_attr_bundle *attrs); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 17fc25db0311..cb5b59123d8f 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -346,7 +346,7 @@ const struct file_operations uverbs_async_event_fops = { .owner = THIS_MODULE, .read = ib_uverbs_async_event_read, .poll = ib_uverbs_async_event_poll, - .release = uverbs_uobject_fd_release, + .release = uverbs_async_event_release, .fasync = ib_uverbs_async_event_fasync, .llseek = no_llseek, }; diff --git a/drivers/infiniband/core/uverbs_std_types_async_fd.c b/drivers/infiniband/core/uverbs_std_types_async_fd.c index 82ec0806b34b..462deb506b16 100644 --- a/drivers/infiniband/core/uverbs_std_types_async_fd.c +++ b/drivers/infiniband/core/uverbs_std_types_async_fd.c @@ -26,10 +26,34 @@ static int uverbs_async_event_destroy_uobj(struct ib_uobject *uobj, container_of(uobj, struct ib_uverbs_async_event_file, uobj); ib_unregister_event_handler(&event_file->event_handler); - ib_uverbs_free_event_queue(&event_file->ev_queue); return 0; } +int uverbs_async_event_release(struct inode *inode, struct file *filp) +{ + struct ib_uverbs_async_event_file *event_file; + struct ib_uobject *uobj = filp->private_data; + int ret; + + if (!uobj) + return uverbs_uobject_fd_release(inode, filp); + + event_file = + container_of(uobj, struct ib_uverbs_async_event_file, uobj); + + /* + * The async event FD has to deliver IB_EVENT_DEVICE_FATAL even after + * disassociation, so cleaning the event list must only happen after + * release. The user knows it has reached the end of the event stream + * when it sees IB_EVENT_DEVICE_FATAL. + */ + uverbs_uobject_get(uobj); + ret = uverbs_uobject_fd_release(inode, filp); + ib_uverbs_free_event_queue(&event_file->ev_queue); + uverbs_uobject_put(uobj); + return ret; +} + DECLARE_UVERBS_NAMED_METHOD( UVERBS_METHOD_ASYNC_EVENT_ALLOC, UVERBS_ATTR_FD(UVERBS_ATTR_ASYNC_EVENT_ALLOC_FD_HANDLE, -- cgit v1.2.3-59-g8ed1b From ccfdbaa5cf4601b9b71601893029dcc9245c002b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 7 May 2020 09:33:48 +0300 Subject: RDMA/uverbs: Move IB_EVENT_DEVICE_FATAL to destroy_uobj When multiple async FDs were allowed to exist the idea was for all broadcast events to be delivered to all async FDs, however IB_EVENT_DEVICE_FATAL was missed. Instead of having ib_uverbs_free_hw_resources() special case the global async_fd, have it cause the event during the uobject destruction. Every async fd is now a uobject so simply generate the IB_EVENT_DEVICE_FATAL while destroying the async fd uobject. This ensures every async FD gets a copy of the event. Fixes: d680e88e2013 ("RDMA/core: Add UVERBS_METHOD_ASYNC_EVENT_ALLOC") Link: https://lore.kernel.org/r/20200507063348.98713-3-leon@kernel.org Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs.h | 3 +++ drivers/infiniband/core/uverbs_main.c | 10 +++------- drivers/infiniband/core/uverbs_std_types_async_fd.c | 4 ++++ 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 2673cb1cd655..3d189c7ee59e 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -228,6 +228,9 @@ void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file, struct ib_ucq_object *uobj); void ib_uverbs_release_uevent(struct ib_uevent_object *uobj); void ib_uverbs_release_file(struct kref *ref); +void ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file, + __u64 element, __u64 event, + struct list_head *obj_list, u32 *counter); void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index cb5b59123d8f..1bab8de14757 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -386,10 +386,9 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN); } -static void -ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file, - __u64 element, __u64 event, struct list_head *obj_list, - u32 *counter) +void ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file, + __u64 element, __u64 event, + struct list_head *obj_list, u32 *counter) { struct ib_uverbs_event *entry; unsigned long flags; @@ -1187,9 +1186,6 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, */ mutex_unlock(&uverbs_dev->lists_mutex); - ib_uverbs_async_handler(READ_ONCE(file->async_file), 0, - IB_EVENT_DEVICE_FATAL, NULL, NULL); - uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE); kref_put(&file->ref, ib_uverbs_release_file); diff --git a/drivers/infiniband/core/uverbs_std_types_async_fd.c b/drivers/infiniband/core/uverbs_std_types_async_fd.c index 462deb506b16..61899eaf1f91 100644 --- a/drivers/infiniband/core/uverbs_std_types_async_fd.c +++ b/drivers/infiniband/core/uverbs_std_types_async_fd.c @@ -26,6 +26,10 @@ static int uverbs_async_event_destroy_uobj(struct ib_uobject *uobj, container_of(uobj, struct ib_uverbs_async_event_file, uobj); ib_unregister_event_handler(&event_file->event_handler); + + if (why == RDMA_REMOVE_DRIVER_REMOVE) + ib_uverbs_async_handler(event_file, 0, IB_EVENT_DEVICE_FATAL, + NULL, NULL); return 0; } -- cgit v1.2.3-59-g8ed1b From 59566b0b622e3e6ea928c0b8cac8a5601b00b383 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 30 Apr 2020 20:21:47 -0400 Subject: x86/ftrace: Have ftrace trampolines turn read-only at the end of system boot up Booting one of my machines, it triggered the following crash: Kernel/User page tables isolation: enabled ftrace: allocating 36577 entries in 143 pages Starting tracer 'function' BUG: unable to handle page fault for address: ffffffffa000005c #PF: supervisor write access in kernel mode #PF: error_code(0x0003) - permissions violation PGD 2014067 P4D 2014067 PUD 2015063 PMD 7b253067 PTE 7b252061 Oops: 0003 [#1] PREEMPT SMP PTI CPU: 0 PID: 0 Comm: swapper Not tainted 5.4.0-test+ #24 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007 RIP: 0010:text_poke_early+0x4a/0x58 Code: 34 24 48 89 54 24 08 e8 bf 72 0b 00 48 8b 34 24 48 8b 4c 24 08 84 c0 74 0b 48 89 df f3 a4 48 83 c4 10 5b c3 9c 58 fa 48 89 df a4 50 9d 48 83 c4 10 5b e9 d6 f9 ff ff 0 41 57 49 RSP: 0000:ffffffff82003d38 EFLAGS: 00010046 RAX: 0000000000000046 RBX: ffffffffa000005c RCX: 0000000000000005 RDX: 0000000000000005 RSI: ffffffff825b9a90 RDI: ffffffffa000005c RBP: ffffffffa000005c R08: 0000000000000000 R09: ffffffff8206e6e0 R10: ffff88807b01f4c0 R11: ffffffff8176c106 R12: ffffffff8206e6e0 R13: ffffffff824f2440 R14: 0000000000000000 R15: ffffffff8206eac0 FS: 0000000000000000(0000) GS:ffff88807d400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffa000005c CR3: 0000000002012000 CR4: 00000000000006b0 Call Trace: text_poke_bp+0x27/0x64 ? mutex_lock+0x36/0x5d arch_ftrace_update_trampoline+0x287/0x2d5 ? ftrace_replace_code+0x14b/0x160 ? ftrace_update_ftrace_func+0x65/0x6c __register_ftrace_function+0x6d/0x81 ftrace_startup+0x23/0xc1 register_ftrace_function+0x20/0x37 func_set_flag+0x59/0x77 __set_tracer_option.isra.19+0x20/0x3e trace_set_options+0xd6/0x13e apply_trace_boot_options+0x44/0x6d register_tracer+0x19e/0x1ac early_trace_init+0x21b/0x2c9 start_kernel+0x241/0x518 ? load_ucode_intel_bsp+0x21/0x52 secondary_startup_64+0xa4/0xb0 I was able to trigger it on other machines, when I added to the kernel command line of both "ftrace=function" and "trace_options=func_stack_trace". The cause is the "ftrace=function" would register the function tracer and create a trampoline, and it will set it as executable and read-only. Then the "trace_options=func_stack_trace" would then update the same trampoline to include the stack tracer version of the function tracer. But since the trampoline already exists, it updates it with text_poke_bp(). The problem is that text_poke_bp() called while system_state == SYSTEM_BOOTING, it will simply do a memcpy() and not the page mapping, as it would think that the text is still read-write. But in this case it is not, and we take a fault and crash. Instead, lets keep the ftrace trampolines read-write during boot up, and then when the kernel executable text is set to read-only, the ftrace trampolines get set to read-only as well. Link: https://lkml.kernel.org/r/20200430202147.4dc6e2de@oasis.local.home Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Josh Poimboeuf Cc: "H. Peter Anvin" Cc: stable@vger.kernel.org Fixes: 768ae4406a5c ("x86/ftrace: Use text_poke()") Acked-by: Peter Zijlstra Signed-off-by: Steven Rostedt (VMware) --- arch/x86/include/asm/ftrace.h | 6 ++++++ arch/x86/kernel/ftrace.c | 29 ++++++++++++++++++++++++++++- arch/x86/mm/init_64.c | 3 +++ include/linux/ftrace.h | 23 +++++++++++++++++++++++ kernel/trace/ftrace_internal.h | 22 ---------------------- 5 files changed, 60 insertions(+), 23 deletions(-) diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 85be2f506272..89af0d2c62aa 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -56,6 +56,12 @@ struct dyn_arch_ftrace { #ifndef __ASSEMBLY__ +#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE) +extern void set_ftrace_ops_ro(void); +#else +static inline void set_ftrace_ops_ro(void) { } +#endif + #define ARCH_HAS_SYSCALL_MATCH_SYM_NAME static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) { diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 37a0aeaf89e7..b0e641793be4 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -407,7 +407,8 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) set_vm_flush_reset_perms(trampoline); - set_memory_ro((unsigned long)trampoline, npages); + if (likely(system_state != SYSTEM_BOOTING)) + set_memory_ro((unsigned long)trampoline, npages); set_memory_x((unsigned long)trampoline, npages); return (unsigned long)trampoline; fail: @@ -415,6 +416,32 @@ fail: return 0; } +void set_ftrace_ops_ro(void) +{ + struct ftrace_ops *ops; + unsigned long start_offset; + unsigned long end_offset; + unsigned long npages; + unsigned long size; + + do_for_each_ftrace_op(ops, ftrace_ops_list) { + if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) + continue; + + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { + start_offset = (unsigned long)ftrace_regs_caller; + end_offset = (unsigned long)ftrace_regs_caller_end; + } else { + start_offset = (unsigned long)ftrace_caller; + end_offset = (unsigned long)ftrace_epilogue; + } + size = end_offset - start_offset; + size = size + RET_SIZE + sizeof(void *); + npages = DIV_ROUND_UP(size, PAGE_SIZE); + set_memory_ro((unsigned long)ops->trampoline, npages); + } while_for_each_ftrace_op(ops); +} + static unsigned long calc_trampoline_call_offset(bool save_regs) { unsigned long start_offset; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3b289c2f75cd..8b5f73f5e207 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -54,6 +54,7 @@ #include #include #include +#include #include "mm_internal.h" @@ -1291,6 +1292,8 @@ void mark_rodata_ro(void) all_end = roundup((unsigned long)_brk_end, PMD_SIZE); set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT); + set_ftrace_ops_ro(); + #ifdef CONFIG_CPA_DEBUG printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end); set_memory_rw(start, (end-start) >> PAGE_SHIFT); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index db95244a62d4..ab4bd15cbcdb 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -210,6 +210,29 @@ struct ftrace_ops { #endif }; +extern struct ftrace_ops __rcu *ftrace_ops_list; +extern struct ftrace_ops ftrace_list_end; + +/* + * Traverse the ftrace_global_list, invoking all entries. The reason that we + * can use rcu_dereference_raw_check() is that elements removed from this list + * are simply leaked, so there is no need to interact with a grace-period + * mechanism. The rcu_dereference_raw_check() calls are needed to handle + * concurrent insertions into the ftrace_global_list. + * + * Silly Alpha and silly pointer-speculation compiler optimizations! + */ +#define do_for_each_ftrace_op(op, list) \ + op = rcu_dereference_raw_check(list); \ + do + +/* + * Optimized for just a single item in the list (as that is the normal case). + */ +#define while_for_each_ftrace_op(op) \ + while (likely(op = rcu_dereference_raw_check((op)->next)) && \ + unlikely((op) != &ftrace_list_end)) + /* * Type of the current tracing. */ diff --git a/kernel/trace/ftrace_internal.h b/kernel/trace/ftrace_internal.h index 0456e0a3dab1..382775edf690 100644 --- a/kernel/trace/ftrace_internal.h +++ b/kernel/trace/ftrace_internal.h @@ -4,28 +4,6 @@ #ifdef CONFIG_FUNCTION_TRACER -/* - * Traverse the ftrace_global_list, invoking all entries. The reason that we - * can use rcu_dereference_raw_check() is that elements removed from this list - * are simply leaked, so there is no need to interact with a grace-period - * mechanism. The rcu_dereference_raw_check() calls are needed to handle - * concurrent insertions into the ftrace_global_list. - * - * Silly Alpha and silly pointer-speculation compiler optimizations! - */ -#define do_for_each_ftrace_op(op, list) \ - op = rcu_dereference_raw_check(list); \ - do - -/* - * Optimized for just a single item in the list (as that is the normal case). - */ -#define while_for_each_ftrace_op(op) \ - while (likely(op = rcu_dereference_raw_check((op)->next)) && \ - unlikely((op) != &ftrace_list_end)) - -extern struct ftrace_ops __rcu *ftrace_ops_list; -extern struct ftrace_ops ftrace_list_end; extern struct mutex ftrace_lock; extern struct ftrace_ops global_ops; -- cgit v1.2.3-59-g8ed1b From 48084c3595cb7429f6ba734cfea1313573b9a7fa Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 7 May 2020 23:04:45 +0800 Subject: riscv: perf: RISCV_BASE_PMU should be independent Selecting PERF_EVENTS without selecting RISCV_BASE_PMU results in a build error. Signed-off-by: Kefeng Wang [Palmer: commit text] Fixes: 178e9fc47aae("perf: riscv: preliminary RISC-V support") Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/perf_event.h | 8 ++------ arch/riscv/kernel/Makefile | 2 +- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h index 0234048b12bc..062efd3a1d5d 100644 --- a/arch/riscv/include/asm/perf_event.h +++ b/arch/riscv/include/asm/perf_event.h @@ -12,19 +12,14 @@ #include #include +#ifdef CONFIG_RISCV_BASE_PMU #define RISCV_BASE_COUNTERS 2 /* * The RISCV_MAX_COUNTERS parameter should be specified. */ -#ifdef CONFIG_RISCV_BASE_PMU #define RISCV_MAX_COUNTERS 2 -#endif - -#ifndef RISCV_MAX_COUNTERS -#error "Please provide a valid RISCV_MAX_COUNTERS for the PMU." -#endif /* * These are the indexes of bits in counteren register *minus* 1, @@ -82,6 +77,7 @@ struct riscv_pmu { int irq; }; +#endif #ifdef CONFIG_PERF_EVENTS #define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs #endif diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 86c83081044f..d8bbd3207100 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -43,7 +43,7 @@ obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o -obj-$(CONFIG_PERF_EVENTS) += perf_event.o +obj-$(CONFIG_RISCV_BASE_PMU) += perf_event.o obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o obj-$(CONFIG_RISCV_SBI) += sbi.o -- cgit v1.2.3-59-g8ed1b From ab7fbad0c7d7a4f9b320a059a171a92a34b6d409 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 11 May 2020 10:19:52 +0800 Subject: riscv: Fix unmet direct dependencies built based on SOC_VIRT Fix unmet direct dependencies Warning and fix Kconfig indent. WARNING: unmet direct dependencies detected for POWER_RESET_SYSCON Depends on [n]: POWER_RESET [=n] && OF [=y] && HAS_IOMEM [=y] Selected by [y]: - SOC_VIRT [=y] WARNING: unmet direct dependencies detected for POWER_RESET_SYSCON_POWEROFF Depends on [n]: POWER_RESET [=n] && OF [=y] && HAS_IOMEM [=y] Selected by [y]: - SOC_VIRT [=y] WARNING: unmet direct dependencies detected for RTC_DRV_GOLDFISH Depends on [n]: RTC_CLASS [=n] && OF [=y] && HAS_IOMEM [=y] && (GOLDFISH [=y] || COMPILE_TEST [=n]) Selected by [y]: - SOC_VIRT [=y] Reported-by: Hulk Robot Signed-off-by: Kefeng Wang Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig.socs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 216286db81c9..d646332e44f1 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -11,14 +11,15 @@ config SOC_SIFIVE This enables support for SiFive SoC platform hardware. config SOC_VIRT - bool "QEMU Virt Machine" - select POWER_RESET_SYSCON - select POWER_RESET_SYSCON_POWEROFF - select GOLDFISH - select RTC_DRV_GOLDFISH - select SIFIVE_PLIC - help - This enables support for QEMU Virt Machine. + bool "QEMU Virt Machine" + select POWER_RESET + select POWER_RESET_SYSCON + select POWER_RESET_SYSCON_POWEROFF + select GOLDFISH + select RTC_DRV_GOLDFISH if RTC_CLASS + select SIFIVE_PLIC + help + This enables support for QEMU Virt Machine. config SOC_KENDRYTE bool "Kendryte K210 SoC" -- cgit v1.2.3-59-g8ed1b From 0502bee37cdef755d63eee60236562e5605e2480 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 11 May 2020 10:19:53 +0800 Subject: riscv: stacktrace: Fix undefined reference to `walk_stackframe' Drop static declaration to fix following build error if FRAME_POINTER disabled, riscv64-linux-ld: arch/riscv/kernel/perf_callchain.o: in function `.L0': perf_callchain.c:(.text+0x2b8): undefined reference to `walk_stackframe' Reported-by: Hulk Robot Signed-off-by: Kefeng Wang Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/stacktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 6c854875ac74..837b9b38f825 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -65,7 +65,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, #else /* !CONFIG_FRAME_POINTER */ -static void notrace walk_stackframe(struct task_struct *task, +void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(unsigned long, void *), void *arg) { unsigned long sp, pc; -- cgit v1.2.3-59-g8ed1b From fa8174aa225fe3d53b37552e5066e6f0301dbabd Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 11 May 2020 10:19:54 +0800 Subject: riscv: Add pgprot_writecombine/device and PAGE_SHARED defination if NOMMU Some drivers use PAGE_SHARED, pgprot_writecombine()/pgprot_device(), add the defination to fix build error if NOMMU. Reported-by: Hulk Robot Signed-off-by: Kefeng Wang Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/mmio.h | 2 ++ arch/riscv/include/asm/pgtable.h | 1 + 2 files changed, 3 insertions(+) diff --git a/arch/riscv/include/asm/mmio.h b/arch/riscv/include/asm/mmio.h index a2c809df2733..56053c9838b2 100644 --- a/arch/riscv/include/asm/mmio.h +++ b/arch/riscv/include/asm/mmio.h @@ -16,6 +16,8 @@ #ifndef CONFIG_MMU #define pgprot_noncached(x) (x) +#define pgprot_writecombine(x) (x) +#define pgprot_device(x) (x) #endif /* CONFIG_MMU */ /* Generic IO read/write. These perform native-endian accesses. */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 9c188ad2e52d..f225e2fbdfb4 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -470,6 +470,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, #else /* CONFIG_MMU */ +#define PAGE_SHARED __pgprot(0) #define PAGE_KERNEL __pgprot(0) #define swapper_pg_dir NULL #define VMALLOC_START 0 -- cgit v1.2.3-59-g8ed1b From 21e2414083e2bad62956312f5c47fc8cbba76d7d Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 11 May 2020 10:19:57 +0800 Subject: riscv: Disable ARCH_HAS_DEBUG_VIRTUAL if NOMMU DEBUG_VIRTUAL should only used when MMU enabled, add the dependence. Signed-off-by: Kefeng Wang Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 74f82cf4f781..24f5a8345477 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -54,7 +54,7 @@ config RISCV select GENERIC_ARCH_TOPOLOGY if SMP select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_MMIOWB - select ARCH_HAS_DEBUG_VIRTUAL + select ARCH_HAS_DEBUG_VIRTUAL if MMU select HAVE_EBPF_JIT if MMU select EDAC_SUPPORT select ARCH_HAS_GIGANTIC_PAGE -- cgit v1.2.3-59-g8ed1b From 69868418e14873638f7fb54e79b3390624af4824 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 11 May 2020 10:19:58 +0800 Subject: riscv: Make SYS_SUPPORTS_HUGETLBFS depends on MMU HUGETLBFS only used when MMU enabled, add the dependency. Reported-by: Hulk Robot Signed-off-by: Kefeng Wang Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 24f5a8345477..a31e1a41913a 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -136,6 +136,7 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y config SYS_SUPPORTS_HUGETLBFS + depends on MMU def_bool y config STACKTRACE_SUPPORT -- cgit v1.2.3-59-g8ed1b From eead1c2ea2509fd754c6da893a94f0e69e83ebe4 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 12 May 2020 14:43:14 +0200 Subject: netlabel: cope with NULL catmap The cipso and calipso code can set the MLS_CAT attribute on successful parsing, even if the corresponding catmap has not been allocated, as per current configuration and external input. Later, selinux code tries to access the catmap if the MLS_CAT flag is present via netlbl_catmap_getlong(). That may cause null ptr dereference while processing incoming network traffic. Address the issue setting the MLS_CAT flag only if the catmap is really allocated. Additionally let netlbl_catmap_getlong() cope with NULL catmap. Reported-by: Matthew Sheets Fixes: 4b8feff251da ("netlabel: fix the horribly broken catmap functions") Fixes: ceba1832b1b2 ("calipso: Set the calipso socket label to match the secattr.") Signed-off-by: Paolo Abeni Acked-by: Paul Moore Signed-off-by: David S. Miller --- net/ipv4/cipso_ipv4.c | 6 ++++-- net/ipv6/calipso.c | 3 ++- net/netlabel/netlabel_kapi.c | 6 ++++++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 0bd10a1f477f..a23094b050f8 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1258,7 +1258,8 @@ static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def, return ret_val; } - secattr->flags |= NETLBL_SECATTR_MLS_CAT; + if (secattr->attr.mls.cat) + secattr->flags |= NETLBL_SECATTR_MLS_CAT; } return 0; @@ -1439,7 +1440,8 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def, return ret_val; } - secattr->flags |= NETLBL_SECATTR_MLS_CAT; + if (secattr->attr.mls.cat) + secattr->flags |= NETLBL_SECATTR_MLS_CAT; } return 0; diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index 221c81f85cbf..8d3f66c310db 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -1047,7 +1047,8 @@ static int calipso_opt_getattr(const unsigned char *calipso, goto getattr_return; } - secattr->flags |= NETLBL_SECATTR_MLS_CAT; + if (secattr->attr.mls.cat) + secattr->flags |= NETLBL_SECATTR_MLS_CAT; } secattr->type = NETLBL_NLTYPE_CALIPSO; diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 409a3ae47ce2..5e1239cef000 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -734,6 +734,12 @@ int netlbl_catmap_getlong(struct netlbl_lsm_catmap *catmap, if ((off & (BITS_PER_LONG - 1)) != 0) return -EINVAL; + /* a null catmap is equivalent to an empty one */ + if (!catmap) { + *offset = (u32)-1; + return 0; + } + if (off < catmap->startbit) { off = catmap->startbit; *offset = off; -- cgit v1.2.3-59-g8ed1b From c54a8f1f329197d83d941ad84c4aa38bf282cbbd Mon Sep 17 00:00:00 2001 From: Bernard Zhao Date: Tue, 28 Apr 2020 06:17:47 -0700 Subject: drm/meson: pm resume add return errno branch pm_resump api did not handle drm_mode_config_helper_resume error. This change add handle to return drm_mode_config_helper_resume`s error number. This code logic is aligned with api pm_suspend. After this change, the code maybe a bit readable. Signed-off-by: Bernard Zhao Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20200428131747.2099-1-bernard@vivo.com --- drivers/gpu/drm/meson/meson_drv.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index b5f5eb7b4bb9..8c2e1b47e81a 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -412,9 +412,7 @@ static int __maybe_unused meson_drv_pm_resume(struct device *dev) if (priv->afbcd.ops) priv->afbcd.ops->init(priv); - drm_mode_config_helper_resume(priv->drm); - - return 0; + return drm_mode_config_helper_resume(priv->drm); } static int compare_of(struct device *dev, void *data) -- cgit v1.2.3-59-g8ed1b From 6d44e43f225f96eea2e610f87070718032ad8aaa Mon Sep 17 00:00:00 2001 From: Amy Shih Date: Tue, 12 May 2020 02:25:23 +0000 Subject: hwmon: (nct7904) Read all SMI status registers in probe function When nct7904 power up, it compares current sensor readings against the default threshold immediately. This results in false alarms on startup. Read all SMI status registers in probe function to clear the alarms. Signed-off-by: Amy Shih [groeck: Reworded description] Signed-off-by: Guenter Roeck --- drivers/hwmon/nct7904.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/hwmon/nct7904.c b/drivers/hwmon/nct7904.c index 1f5743d68984..8d59cff37755 100644 --- a/drivers/hwmon/nct7904.c +++ b/drivers/hwmon/nct7904.c @@ -41,6 +41,7 @@ #define FANCTL_MAX 4 /* Counted from 1 */ #define TCPU_MAX 8 /* Counted from 1 */ #define TEMP_MAX 4 /* Counted from 1 */ +#define SMI_STS_MAX 10 /* Counted from 1 */ #define VT_ADC_CTRL0_REG 0x20 /* Bank 0 */ #define VT_ADC_CTRL1_REG 0x21 /* Bank 0 */ @@ -1009,6 +1010,13 @@ static int nct7904_probe(struct i2c_client *client, data->fan_mode[i] = ret; } + /* Read all of SMI status register to clear alarms */ + for (i = 0; i < SMI_STS_MAX; i++) { + ret = nct7904_read_reg(data, BANK_0, SMI_STS1_REG + i); + if (ret < 0) + return ret; + } + hwmon_dev = devm_hwmon_device_register_with_info(dev, client->name, data, &nct7904_chip_info, NULL); -- cgit v1.2.3-59-g8ed1b From 7b2fd270af27edaf02acb41a7babe805a9441914 Mon Sep 17 00:00:00 2001 From: Amy Shih Date: Tue, 12 May 2020 09:38:06 +0000 Subject: hwmon: (nct7904) Fix incorrect range of temperature limit registers The format of temperature limitation registers are 8-bit 2's complement and the range is -128~127. Converts the reading value to signed char to fix the incorrect range of temperature limitation registers. Signed-off-by: Amy Shih Signed-off-by: Guenter Roeck --- drivers/hwmon/nct7904.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/nct7904.c b/drivers/hwmon/nct7904.c index 8d59cff37755..a7eb10d2a053 100644 --- a/drivers/hwmon/nct7904.c +++ b/drivers/hwmon/nct7904.c @@ -362,6 +362,7 @@ static int nct7904_read_temp(struct device *dev, u32 attr, int channel, struct nct7904_data *data = dev_get_drvdata(dev); int ret, temp; unsigned int reg1, reg2, reg3; + s8 temps; switch (attr) { case hwmon_temp_input: @@ -467,7 +468,8 @@ static int nct7904_read_temp(struct device *dev, u32 attr, int channel, if (ret < 0) return ret; - *val = ret * 1000; + temps = ret; + *val = temps * 1000; return 0; } -- cgit v1.2.3-59-g8ed1b From 333e22db228f0bd0c839553015a6a8d3db4ba569 Mon Sep 17 00:00:00 2001 From: Samu Nuutamo Date: Mon, 11 May 2020 13:02:19 +0200 Subject: hwmon: (da9052) Synchronize access with mfd When tsi-as-adc is configured it is possible for in7[0123]_input read to return an incorrect value if a concurrent read to in[456]_input is performed. This is caused by a concurrent manipulation of the mux channel without proper locking as hwmon and mfd use different locks for synchronization. Switch hwmon to use the same lock as mfd when accessing the TSI channel. Fixes: 4f16cab19a3d5 ("hwmon: da9052: Add support for TSI channel") Signed-off-by: Samu Nuutamo [rebase to current master, reword commit message slightly] Signed-off-by: Sebastian Reichel Signed-off-by: Guenter Roeck --- drivers/hwmon/da9052-hwmon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/da9052-hwmon.c b/drivers/hwmon/da9052-hwmon.c index 53b517dbe7e6..4af2fc309c28 100644 --- a/drivers/hwmon/da9052-hwmon.c +++ b/drivers/hwmon/da9052-hwmon.c @@ -244,9 +244,9 @@ static ssize_t da9052_tsi_show(struct device *dev, int channel = to_sensor_dev_attr(devattr)->index; int ret; - mutex_lock(&hwmon->hwmon_lock); + mutex_lock(&hwmon->da9052->auxadc_lock); ret = __da9052_read_tsi(dev, channel); - mutex_unlock(&hwmon->hwmon_lock); + mutex_unlock(&hwmon->da9052->auxadc_lock); if (ret < 0) return ret; -- cgit v1.2.3-59-g8ed1b From 29b74cb75e3572d83708745e81e24d37837415f9 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 13 May 2020 09:42:29 +0200 Subject: s390/ism: fix error return code in ism_probe() Fix to return negative error code -ENOMEM from the smcd_alloc_dev() error handling case instead of 0, as done elsewhere in this function. Fixes: 684b89bc39ce ("s390/ism: add device driver for internal shared memory") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/ism_drv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index c75112ee7b97..c7fade836d83 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -521,8 +521,10 @@ static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id) ism->smcd = smcd_alloc_dev(&pdev->dev, dev_name(&pdev->dev), &ism_ops, ISM_NR_DMBS); - if (!ism->smcd) + if (!ism->smcd) { + ret = -ENOMEM; goto err_resource; + } ism->smcd->priv = ism; ret = ism_dev_init(ism); -- cgit v1.2.3-59-g8ed1b From be7fa20f057e05b31fc08d1999394c43cf2c1a9a Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 13 May 2020 09:42:30 +0200 Subject: MAINTAINERS: add Karsten Graul as S390 NETWORK DRIVERS maintainer Add Karsten as additional maintainer for drivers/s390/net . One of his focal points is the ism driver. Cc: Julian Wiedmann Acked-by: Julian Wiedmann Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 88bf36ab2b22..85894787825e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14644,6 +14644,7 @@ F: drivers/iommu/s390-iommu.c S390 IUCV NETWORK LAYER M: Julian Wiedmann +M: Karsten Graul M: Ursula Braun L: linux-s390@vger.kernel.org S: Supported -- cgit v1.2.3-59-g8ed1b From c72685894506a3fec5978b9b11b94069a7d1c995 Mon Sep 17 00:00:00 2001 From: Tuong Lien Date: Wed, 13 May 2020 19:33:16 +0700 Subject: tipc: fix large latency in smart Nagle streaming Currently when a connection is in Nagle mode, we set the 'ack_required' bit in the last sending buffer and wait for the corresponding ACK prior to pushing more data. However, on the receiving side, the ACK is issued only when application really reads the whole data. Even if part of the last buffer is received, we will not do the ACK as required. This might cause an unnecessary delay since the receiver does not always fetch the message as fast as the sender, resulting in a large latency in the user message sending, which is: [one RTT + the receiver processing time]. The commit makes Nagle ACK as soon as possible i.e. when a message with the 'ack_required' arrives in the receiving side's stack even before it is processed or put in the socket receive queue... This way, we can limit the streaming latency to one RTT as committed in Nagle mode. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Tuong Lien Signed-off-by: David S. Miller --- net/tipc/socket.c | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 87466607097f..e370ad0edd76 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1739,22 +1739,21 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct sk_buff *skb, return 0; } -static void tipc_sk_send_ack(struct tipc_sock *tsk) +static struct sk_buff *tipc_sk_build_ack(struct tipc_sock *tsk) { struct sock *sk = &tsk->sk; - struct net *net = sock_net(sk); struct sk_buff *skb = NULL; struct tipc_msg *msg; u32 peer_port = tsk_peer_port(tsk); u32 dnode = tsk_peer_node(tsk); if (!tipc_sk_connected(sk)) - return; + return NULL; skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, dnode, tsk_own_node(tsk), peer_port, tsk->portid, TIPC_OK); if (!skb) - return; + return NULL; msg = buf_msg(skb); msg_set_conn_ack(msg, tsk->rcv_unacked); tsk->rcv_unacked = 0; @@ -1764,7 +1763,19 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk) tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf); msg_set_adv_win(msg, tsk->rcv_win); } - tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg)); + return skb; +} + +static void tipc_sk_send_ack(struct tipc_sock *tsk) +{ + struct sk_buff *skb; + + skb = tipc_sk_build_ack(tsk); + if (!skb) + return; + + tipc_node_xmit_skb(sock_net(&tsk->sk), skb, tsk_peer_node(tsk), + msg_link_selector(buf_msg(skb))); } static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) @@ -1938,7 +1949,6 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m, bool peek = flags & MSG_PEEK; int offset, required, copy, copied = 0; int hlen, dlen, err, rc; - bool ack = false; long timeout; /* Catch invalid receive attempts */ @@ -1983,7 +1993,6 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m, /* Copy data if msg ok, otherwise return error/partial data */ if (likely(!err)) { - ack = msg_ack_required(hdr); offset = skb_cb->bytes_read; copy = min_t(int, dlen - offset, buflen - copied); rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy); @@ -2011,7 +2020,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m, /* Send connection flow control advertisement when applicable */ tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen); - if (ack || tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE) + if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE) tipc_sk_send_ack(tsk); /* Exit if all requested data or FIN/error received */ @@ -2105,9 +2114,11 @@ static void tipc_sk_proto_rcv(struct sock *sk, * tipc_sk_filter_connect - check incoming message for a connection-based socket * @tsk: TIPC socket * @skb: pointer to message buffer. + * @xmitq: for Nagle ACK if any * Returns true if message should be added to receive queue, false otherwise */ -static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) +static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb, + struct sk_buff_head *xmitq) { struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); @@ -2171,8 +2182,17 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) if (!skb_queue_empty(&sk->sk_write_queue)) tipc_sk_push_backlog(tsk); /* Accept only connection-based messages sent by peer */ - if (likely(con_msg && !err && pport == oport && pnode == onode)) + if (likely(con_msg && !err && pport == oport && + pnode == onode)) { + if (msg_ack_required(hdr)) { + struct sk_buff *skb; + + skb = tipc_sk_build_ack(tsk); + if (skb) + __skb_queue_tail(xmitq, skb); + } return true; + } if (!tsk_peer_msg(tsk, hdr)) return false; if (!err) @@ -2267,7 +2287,7 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, while ((skb = __skb_dequeue(&inputq))) { hdr = buf_msg(skb); limit = rcvbuf_limit(sk, skb); - if ((sk_conn && !tipc_sk_filter_connect(tsk, skb)) || + if ((sk_conn && !tipc_sk_filter_connect(tsk, skb, xmitq)) || (!sk_conn && msg_connected(hdr)) || (!grp && msg_in_group(hdr))) err = TIPC_ERR_NO_PORT; -- cgit v1.2.3-59-g8ed1b From 0771d7df819284d46cf5cfb57698621b503ec17f Mon Sep 17 00:00:00 2001 From: Tuong Lien Date: Wed, 13 May 2020 19:33:17 +0700 Subject: tipc: fix memory leak in service subscripting Upon receipt of a service subscription request from user via a topology connection, one 'sub' object will be allocated in kernel, so it will be able to send an event of the service if any to the user correspondingly then. Also, in case of any failure, the connection will be shutdown and all the pertaining 'sub' objects will be freed. However, there is a race condition as follows resulting in memory leak: receive-work connection send-work | | | sub-1 |<------//-------| | sub-2 |<------//-------| | | |<---------------| evt for sub-x sub-3 |<------//-------| | : : : : : : | /--------| | | | * peer closed | | | | | | | |<-------X-------| evt for sub-y | | |<===============| sub-n |<------/ X shutdown | -> orphan | | That is, the 'receive-work' may get the last subscription request while the 'send-work' is shutting down the connection due to peer close. We had a 'lock' on the connection, so the two actions cannot be carried out simultaneously. If the last subscription is allocated e.g. 'sub-n', before the 'send-work' closes the connection, there will be no issue at all, the 'sub' objects will be freed. In contrast the last subscription will become orphan since the connection was closed, and we released all references. This commit fixes the issue by simply adding one test if the connection remains in 'connected' state right after we obtain the connection lock, then a subscription object can be created as usual, otherwise we ignore it. Acked-by: Ying Xue Acked-by: Jon Maloy Reported-by: Thang Ngo Signed-off-by: Tuong Lien Signed-off-by: David S. Miller --- net/tipc/topsrv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index 73dbed0c4b6b..931c426673c0 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -400,7 +400,9 @@ static int tipc_conn_rcv_from_sock(struct tipc_conn *con) return -EWOULDBLOCK; if (ret == sizeof(s)) { read_lock_bh(&sk->sk_callback_lock); - ret = tipc_conn_rcv_sub(srv, con, &s); + /* RACE: the connection can be closed in the meantime */ + if (likely(connected(con))) + ret = tipc_conn_rcv_sub(srv, con, &s); read_unlock_bh(&sk->sk_callback_lock); if (!ret) return 0; -- cgit v1.2.3-59-g8ed1b From 88690b1079d473a44bf4183dfee9b03d4afae866 Mon Sep 17 00:00:00 2001 From: Tuong Lien Date: Wed, 13 May 2020 19:33:18 +0700 Subject: tipc: fix failed service subscription deletion When a service subscription is expired or canceled by user, it needs to be deleted from the subscription list, so that new subscriptions can be registered (max = 65535 per net). However, there are two issues in code that can cause such an unused subscription to persist: 1) The 'tipc_conn_delete_sub()' has a loop on the subscription list but it makes a break shortly when the 1st subscription differs from the one specified, so the subscription will not be deleted. 2) In case a subscription is canceled, the code to remove the 'TIPC_SUB_CANCEL' flag from the subscription filter does not work if it is a local subscription (i.e. the little endian isn't involved). So, it will be no matches when looking for the subscription to delete later. The subscription(s) will be removed eventually when the user terminates its topology connection but that could be a long time later. Meanwhile, the number of available subscriptions may be exhausted. This commit fixes the two issues above, so as needed a subscription can be deleted correctly. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Tuong Lien Signed-off-by: David S. Miller --- net/tipc/subscr.h | 10 ++++++++++ net/tipc/topsrv.c | 9 +++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index aa015c233898..6ebbec1bedd1 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -96,6 +96,16 @@ void tipc_sub_get(struct tipc_subscription *subscription); (swap_ ? swab32(val__) : val__); \ }) +/* tipc_sub_write - write val_ to field_ of struct sub_ in user endian format + */ +#define tipc_sub_write(sub_, field_, val_) \ + ({ \ + struct tipc_subscr *sub__ = sub_; \ + u32 val__ = val_; \ + int swap_ = !((sub__)->filter & TIPC_FILTER_MASK); \ + (sub__)->field_ = swap_ ? swab32(val__) : val__; \ + }) + /* tipc_evt_write - write val_ to field_ of struct evt_ in user endian format */ #define tipc_evt_write(evt_, field_, val_) \ diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index 931c426673c0..446af7bbd13e 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -237,8 +237,8 @@ static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s) if (!s || !memcmp(s, &sub->evt.s, sizeof(*s))) { tipc_sub_unsubscribe(sub); atomic_dec(&tn->subscription_count); - } else if (s) { - break; + if (s) + break; } } spin_unlock_bh(&con->sub_lock); @@ -362,9 +362,10 @@ static int tipc_conn_rcv_sub(struct tipc_topsrv *srv, { struct tipc_net *tn = tipc_net(srv->net); struct tipc_subscription *sub; + u32 s_filter = tipc_sub_read(s, filter); - if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) { - s->filter &= __constant_ntohl(~TIPC_SUB_CANCEL); + if (s_filter & TIPC_SUB_CANCEL) { + tipc_sub_write(s, filter, s_filter & ~TIPC_SUB_CANCEL); tipc_conn_delete_sub(con, s); return 0; } -- cgit v1.2.3-59-g8ed1b From 99addbe31f5524494f4d7077bcb3f6fa64c5d160 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 13 May 2020 08:51:51 -0700 Subject: net: broadcom: Select BROADCOM_PHY for BCMGENET The GENET controller on the Raspberry Pi 4 (2711) is typically interfaced with an external Broadcom PHY via a RGMII electrical interface. To make sure that delays are properly configured at the PHY side, ensure that we the dedicated Broadcom PHY driver (CONFIG_BROADCOM_PHY) is enabled for this to happen. Fixes: 402482a6a78e ("net: bcmgenet: Clear ID_MODE_DIS in EXT_RGMII_OOB_CTRL when not needed") Reported-by: Marek Szyprowski Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index 53055ce5dfd6..2a69c0d06f3c 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -69,6 +69,7 @@ config BCMGENET select BCM7XXX_PHY select MDIO_BCM_UNIMAC select DIMLIB + select BROADCOM_PHY if ARCH_BCM2835 help This driver supports the built-in Ethernet MACs found in the Broadcom BCM7xxx Set Top Box family chipset. -- cgit v1.2.3-59-g8ed1b From 955da9d77435acac066139e9d7f7723ce7204a1d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 May 2020 12:52:17 +0100 Subject: drm/i915: Handle idling during i915_gem_evict_something busy loops i915_gem_evict_something() is charged with finding a slot within the GTT that we may reuse. Since our goal is not to stall, we first look for a slot that only overlaps idle vma. To this end, on the first pass we move any active vma to the end of the search list. However, we only stopped moving active vma after we see the first active vma twice. If during the search, that first active vma completed, we would not notice and keep on extending the search list. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1746 Fixes: 2850748ef876 ("drm/i915: Pull i915_vma_pin under the vm->mutex") Fixes: b1e3177bd1d8 ("drm/i915: Coordinate i915_active with its own mutex") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: # v5.5+ Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200509115217.26853-1-chris@chris-wilson.co.uk (cherry picked from commit 73e28cc40bf00b5d168cb8f5cff1ae63e9097446) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_gem_evict.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 4518b9b35c3d..02ad1acd117c 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -128,6 +128,13 @@ search_again: active = NULL; INIT_LIST_HEAD(&eviction_list); list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) { + if (vma == active) { /* now seen this vma twice */ + if (flags & PIN_NONBLOCK) + break; + + active = ERR_PTR(-EAGAIN); + } + /* * We keep this list in a rough least-recently scanned order * of active elements (inactive elements are cheap to reap). @@ -143,21 +150,12 @@ search_again: * To notice when we complete one full cycle, we record the * first active element seen, before moving it to the tail. */ - if (i915_vma_is_active(vma)) { - if (vma == active) { - if (flags & PIN_NONBLOCK) - break; - - active = ERR_PTR(-EAGAIN); - } - - if (active != ERR_PTR(-EAGAIN)) { - if (!active) - active = vma; + if (active != ERR_PTR(-EAGAIN) && i915_vma_is_active(vma)) { + if (!active) + active = vma; - list_move_tail(&vma->vm_link, &vm->bound_list); - continue; - } + list_move_tail(&vma->vm_link, &vm->bound_list); + continue; } if (mark_free(&scan, vma, flags, &eviction_list)) -- cgit v1.2.3-59-g8ed1b From 9de5d235b60a7cdfcdd5461e70c5663e713fde87 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 12 May 2020 21:45:53 +0200 Subject: net: phy: fix aneg restart in phy_ethtool_set_eee phy_restart_aneg() enables aneg in the PHY. That's not what we want if phydev->autoneg is disabled. In this case still update EEE advertisement register, but don't enable aneg and don't trigger an aneg restart. Fixes: f75abeb8338e ("net: phy: restart phy autonegotiation after EEE advertisment change") Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 72c69a9c8a98..20ca6418f7bc 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1132,9 +1132,11 @@ int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data) /* Restart autonegotiation so the new modes get sent to the * link partner. */ - ret = phy_restart_aneg(phydev); - if (ret < 0) - return ret; + if (phydev->autoneg == AUTONEG_ENABLE) { + ret = phy_restart_aneg(phydev); + if (ret < 0) + return ret; + } } return 0; -- cgit v1.2.3-59-g8ed1b From 9a6630aef93394ac54494c7e273e9bc026509375 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 11 May 2020 10:19:59 +0800 Subject: riscv: pgtable: Fix __kernel_map_pages build error if NOMMU riscv64-none-linux-gnu-ld: mm/page_alloc.o: in function `.L0 ': page_alloc.c:(.text+0xd34): undefined reference to `__kernel_map_pages' riscv64-none-linux-gnu-ld: page_alloc.c:(.text+0x104a): undefined reference to `__kernel_map_pages' riscv64-none-linux-gnu-ld: mm/page_alloc.o: in function `__pageblock_pfn_to_page': page_alloc.c:(.text+0x145e): undefined reference to `__kernel_map_pages' Reported-by: Hulk Robot Signed-off-by: Kefeng Wang Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgtable.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index f225e2fbdfb4..35b60035b6b0 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -477,6 +477,8 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, #define TASK_SIZE 0xffffffffUL +static inline void __kernel_map_pages(struct page *page, int numpages, int enable) {} + #endif /* !CONFIG_MMU */ #define kern_addr_valid(addr) (1) /* FIXME */ -- cgit v1.2.3-59-g8ed1b From ed1ed4c0da5447c5e322481ce2ef9f03336c6ffb Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 11 May 2020 10:20:01 +0800 Subject: riscv: mmiowb: Fix implicit declaration of function 'smp_processor_id' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In file included from ./../include/linux/compiler_types.h:68, from : ../include/asm-generic/mmiowb.h: In function ‘mmiowb_set_pending’: ../include/asm-generic/percpu.h:34:38: error: implicit declaration of function ‘smp_processor_id’; did you mean ‘raw_smp_processor_id’? [-Werror=implicit-function-declaration] #define my_cpu_offset per_cpu_offset(smp_processor_id()) ^~~~~~~~~~~~~~~~ ../include/linux/compiler-gcc.h:58:26: note: in definition of macro ‘RELOC_HIDE’ (typeof(ptr)) (__ptr + (off)); \ ^~~ ../include/linux/percpu-defs.h:249:2: note: in expansion of macro ‘SHIFT_PERCPU_PTR’ SHIFT_PERCPU_PTR(ptr, my_cpu_offset); \ ^~~~~~~~~~~~~~~~ ../include/asm-generic/percpu.h:34:23: note: in expansion of macro ‘per_cpu_offset’ #define my_cpu_offset per_cpu_offset(smp_processor_id()) ^~~~~~~~~~~~~~ ../include/linux/percpu-defs.h:249:24: note: in expansion of macro ‘my_cpu_offset’ SHIFT_PERCPU_PTR(ptr, my_cpu_offset); \ ^~~~~~~~~~~~~ ../include/asm-generic/mmiowb.h:30:26: note: in expansion of macro ‘this_cpu_ptr’ #define __mmiowb_state() this_cpu_ptr(&__mmiowb_state) ^~~~~~~~~~~~ ../include/asm-generic/mmiowb.h:37:28: note: in expansion of macro ‘__mmiowb_state’ struct mmiowb_state *ms = __mmiowb_state(); ^~~~~~~~~~~~~~ Reported-by: Hulk Robot Signed-off-by: Kefeng Wang Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/mmiowb.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/include/asm/mmiowb.h b/arch/riscv/include/asm/mmiowb.h index bb4091ff4a21..0b2333e71fdc 100644 --- a/arch/riscv/include/asm/mmiowb.h +++ b/arch/riscv/include/asm/mmiowb.h @@ -9,6 +9,7 @@ */ #define mmiowb() __asm__ __volatile__ ("fence o,w" : : : "memory"); +#include #include #endif /* _ASM_RISCV_MMIOWB_H */ -- cgit v1.2.3-59-g8ed1b From 3d2353de81061cab4b9d68b3e1dc69cbec1451ea Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 13 May 2020 15:18:01 -0400 Subject: ring-buffer: Don't deactivate the ring buffer on failed iterator reads If the function tracer is running and the trace file is read (which uses the ring buffer iterator), the iterator can get in sync with the writes, and caues it to fail to find a page with content it can read three times. This causes a warning and deactivation of the ring buffer code. Looking at the other cases of failure to get an event, it appears that there's a chance that the writer could cause them too. Since the iterator is a "best effort" to read the ring buffer if there's an active writer (the consumer reader is made for this case "see trace_pipe"), if it fails to get an event after three tries, simply give up and return NULL. Don't warn, nor disable the ring buffer on this failure. Link: https://lore.kernel.org/r/20200429090508.GG5770@shao2-debian Reported-by: kernel test robot Fixes: ff84c50cfb4b ("ring-buffer: Do not die if rb_iter_peek() fails more than thrice") Tested-by: Sven Schnelle Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 6f0b42ceeb00..448d5f528764 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -4034,7 +4034,6 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; int nr_loops = 0; - bool failed = false; if (ts) *ts = 0; @@ -4056,19 +4055,14 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) return NULL; /* - * We repeat when a time extend is encountered or we hit - * the end of the page. Since the time extend is always attached - * to a data event, we should never loop more than three times. - * Once for going to next page, once on time extend, and - * finally once to get the event. - * We should never hit the following condition more than thrice, - * unless the buffer is very small, and there's a writer - * that is causing the reader to fail getting an event. + * As the writer can mess with what the iterator is trying + * to read, just give up if we fail to get an event after + * three tries. The iterator is not as reliable when reading + * the ring buffer with an active write as the consumer is. + * Do not warn if the three failures is reached. */ - if (++nr_loops > 3) { - RB_WARN_ON(cpu_buffer, !failed); + if (++nr_loops > 3) return NULL; - } if (rb_per_cpu_empty(cpu_buffer)) return NULL; @@ -4079,10 +4073,8 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) } event = rb_iter_head_event(iter); - if (!event) { - failed = true; + if (!event) goto again; - } switch (event->type_len) { case RINGBUF_TYPE_PADDING: -- cgit v1.2.3-59-g8ed1b From da4d401a6b8fda7414033f81982f64ade02c0e27 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 13 May 2020 15:36:22 -0400 Subject: ring-buffer: Remove all BUG() calls There's a lot of checks to make sure the ring buffer is working, and if an anomaly is detected, it safely shuts itself down. But there's a few cases that it will call BUG(), which defeats the point of being safe (it crashes the kernel when an anomaly is found!). There's no reason for them. Switch them all to either WARN_ON_ONCE() (when no ring buffer descriptor is present), or to RB_WARN_ON() (when a ring buffer descriptor is present). Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 448d5f528764..b8e1ca48be50 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -193,7 +193,7 @@ rb_event_length(struct ring_buffer_event *event) case RINGBUF_TYPE_DATA: return rb_event_data_length(event); default: - BUG(); + WARN_ON_ONCE(1); } /* not hit */ return 0; @@ -249,7 +249,7 @@ rb_event_data(struct ring_buffer_event *event) { if (extended_time(event)) event = skip_time_extend(event); - BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); + WARN_ON_ONCE(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); /* If length is in len field, then array[0] has the data */ if (event->type_len) return (void *)&event->array[0]; @@ -3727,7 +3727,7 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer, return; default: - BUG(); + RB_WARN_ON(cpu_buffer, 1); } return; } @@ -3757,7 +3757,7 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter, return; default: - BUG(); + RB_WARN_ON(iter->cpu_buffer, 1); } return; } @@ -4020,7 +4020,7 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, return event; default: - BUG(); + RB_WARN_ON(cpu_buffer, 1); } return NULL; @@ -4109,7 +4109,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) return event; default: - BUG(); + RB_WARN_ON(cpu_buffer, 1); } return NULL; -- cgit v1.2.3-59-g8ed1b From b590b38ca305d6d7902ec7c4f7e273e0069f3bcc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 14 May 2020 18:05:33 +0200 Subject: ALSA: hda/realtek - Limit int mic boost for Thinkpad T530 Lenovo Thinkpad T530 seems to have a sensitive internal mic capture that needs to limit the mic boost like a few other Thinkpad models. Although we may change the quirk for ALC269_FIXUP_LENOVO_DOCK, this hits way too many other laptop models, so let's add a new fixup model that limits the internal mic boost on top of the existing quirk and apply to only T530. BugLink: https://bugzilla.suse.com/show_bug.cgi?id=1171293 Cc: Link: https://lore.kernel.org/r/20200514160533.10337-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4080b492ecc0..dc2302171a71 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5969,6 +5969,7 @@ enum { ALC269_FIXUP_HP_LINE1_MIC1_LED, ALC269_FIXUP_INV_DMIC, ALC269_FIXUP_LENOVO_DOCK, + ALC269_FIXUP_LENOVO_DOCK_LIMIT_BOOST, ALC269_FIXUP_NO_SHUTUP, ALC286_FIXUP_SONY_MIC_NO_PRESENCE, ALC269_FIXUP_PINCFG_NO_HP_TO_LINEOUT, @@ -6293,6 +6294,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_PINCFG_NO_HP_TO_LINEOUT }, + [ALC269_FIXUP_LENOVO_DOCK_LIMIT_BOOST] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc269_fixup_limit_int_mic_boost, + .chained = true, + .chain_id = ALC269_FIXUP_LENOVO_DOCK, + }, [ALC269_FIXUP_PINCFG_NO_HP_TO_LINEOUT] = { .type = HDA_FIXUP_FUNC, .v.func = alc269_fixup_pincfg_no_hp_to_lineout, @@ -7476,7 +7483,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x21ca, "Thinkpad L412", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x21e9, "Thinkpad Edge 15", ALC269_FIXUP_SKU_IGNORE), - SND_PCI_QUIRK(0x17aa, 0x21f6, "Thinkpad T530", ALC269_FIXUP_LENOVO_DOCK), + SND_PCI_QUIRK(0x17aa, 0x21f6, "Thinkpad T530", ALC269_FIXUP_LENOVO_DOCK_LIMIT_BOOST), SND_PCI_QUIRK(0x17aa, 0x21fa, "Thinkpad X230", ALC269_FIXUP_LENOVO_DOCK), SND_PCI_QUIRK(0x17aa, 0x21f3, "Thinkpad T430", ALC269_FIXUP_LENOVO_DOCK), SND_PCI_QUIRK(0x17aa, 0x21fb, "Thinkpad T430s", ALC269_FIXUP_LENOVO_DOCK), @@ -7615,6 +7622,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC269_FIXUP_HEADSET_MODE, .name = "headset-mode"}, {.id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC, .name = "headset-mode-no-hp-mic"}, {.id = ALC269_FIXUP_LENOVO_DOCK, .name = "lenovo-dock"}, + {.id = ALC269_FIXUP_LENOVO_DOCK_LIMIT_BOOST, .name = "lenovo-dock-limit-boost"}, {.id = ALC269_FIXUP_HP_GPIO_LED, .name = "hp-gpio-led"}, {.id = ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED, .name = "hp-dock-gpio-mic1-led"}, {.id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, .name = "dell-headset-multi"}, -- cgit v1.2.3-59-g8ed1b From 04fd61a4e01028210a91f0efc408c8bc61a3018c Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Wed, 13 May 2020 17:50:34 -0700 Subject: mm, memcg: fix inconsistent oom event behavior A recent commit 9852ae3fe529 ("mm, memcg: consider subtrees in memory.events") changed the behavior of memcg events, which will now consider subtrees in memory.events. But oom_kill event is a special one as it is used in both cgroup1 and cgroup2. In cgroup1, it is displayed in memory.oom_control. The file memory.oom_control is in both root memcg and non root memcg, that is different with memory.event as it only in non-root memcg. That commit is okay for cgroup2, but it is not okay for cgroup1 as it will cause inconsistent behavior between root memcg and non-root memcg. Here's an example on why this behavior is inconsistent in cgroup1. root memcg / memcg foo / memcg bar Suppose there's an oom_kill in memcg bar, then the oon_kill will be root memcg : memory.oom_control(oom_kill) 0 / memcg foo : memory.oom_control(oom_kill) 1 / memcg bar : memory.oom_control(oom_kill) 1 For the non-root memcg, its memory.oom_control(oom_kill) includes its descendants' oom_kill, but for root memcg, it doesn't include its descendants' oom_kill. That means, memory.oom_control(oom_kill) has different meanings in different memcgs. That is inconsistent. Then the user has to know whether the memcg is root or not. If we can't fully support it in cgroup1, for example by adding memory.events.local into cgroup1 as well, then let's don't touch its original behavior. Fixes: 9852ae3fe529 ("mm, memcg: consider subtrees in memory.events") Reported-by: Randy Dunlap Signed-off-by: Yafang Shao Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Acked-by: Johannes Weiner Acked-by: Chris Down Acked-by: Michal Hocko Cc: Link: http://lkml.kernel.org/r/20200502141055.7378-1-laoar.shao@gmail.com Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d275c72c4f8e..977edd3b7bd8 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -783,6 +783,8 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg, atomic_long_inc(&memcg->memory_events[event]); cgroup_file_notify(&memcg->events_file); + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) + break; if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS) break; } while ((memcg = parent_mem_cgroup(memcg)) && -- cgit v1.2.3-59-g8ed1b From 65759097d804d2a9ad2b687db436319704ba7019 Mon Sep 17 00:00:00 2001 From: Roman Penyaev Date: Wed, 13 May 2020 17:50:38 -0700 Subject: epoll: call final ep_events_available() check under the lock There is a possible race when ep_scan_ready_list() leaves ->rdllist and ->obflist empty for a short period of time although some events are pending. It is quite likely that ep_events_available() observes empty lists and goes to sleep. Since commit 339ddb53d373 ("fs/epoll: remove unnecessary wakeups of nested epoll") we are conservative in wakeups (there is only one place for wakeup and this is ep_poll_callback()), thus ep_events_available() must always observe correct state of two lists. The easiest and correct way is to do the final check under the lock. This does not impact the performance, since lock is taken anyway for adding a wait entry to the wait queue. The discussion of the problem can be found here: https://lore.kernel.org/linux-fsdevel/a2f22c3c-c25a-4bda-8339-a7bdaf17849e@akamai.com/ In this patch barrierless __set_current_state() is used. This is safe since waitqueue_active() is called under the same lock on wakeup side. Short-circuit for fatal signals (i.e. fatal_signal_pending() check) is moved to the line just before actual events harvesting routine. This is fully compliant to what is said in the comment of the patch where the actual fatal_signal_pending() check was added: c257a340ede0 ("fs, epoll: short circuit fetching events if thread has been killed"). Fixes: 339ddb53d373 ("fs/epoll: remove unnecessary wakeups of nested epoll") Reported-by: Jason Baron Reported-by: Randy Dunlap Signed-off-by: Roman Penyaev Signed-off-by: Andrew Morton Reviewed-by: Jason Baron Cc: Khazhismel Kumykov Cc: Alexander Viro Cc: Link: http://lkml.kernel.org/r/20200505145609.1865152-1-rpenyaev@suse.de Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 48 ++++++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index aba03ee749f8..12eebcdea9c8 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1879,34 +1879,33 @@ fetch_events: * event delivery. */ init_wait(&wait); - write_lock_irq(&ep->lock); - __add_wait_queue_exclusive(&ep->wq, &wait); - write_unlock_irq(&ep->lock); + write_lock_irq(&ep->lock); /* - * We don't want to sleep if the ep_poll_callback() sends us - * a wakeup in between. That's why we set the task state - * to TASK_INTERRUPTIBLE before doing the checks. + * Barrierless variant, waitqueue_active() is called under + * the same lock on wakeup ep_poll_callback() side, so it + * is safe to avoid an explicit barrier. */ - set_current_state(TASK_INTERRUPTIBLE); + __set_current_state(TASK_INTERRUPTIBLE); + /* - * Always short-circuit for fatal signals to allow - * threads to make a timely exit without the chance of - * finding more events available and fetching - * repeatedly. + * Do the final check under the lock. ep_scan_ready_list() + * plays with two lists (->rdllist and ->ovflist) and there + * is always a race when both lists are empty for short + * period of time although events are pending, so lock is + * important. */ - if (fatal_signal_pending(current)) { - res = -EINTR; - break; + eavail = ep_events_available(ep); + if (!eavail) { + if (signal_pending(current)) + res = -EINTR; + else + __add_wait_queue_exclusive(&ep->wq, &wait); } + write_unlock_irq(&ep->lock); - eavail = ep_events_available(ep); - if (eavail) - break; - if (signal_pending(current)) { - res = -EINTR; + if (eavail || res) break; - } if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) { timed_out = 1; @@ -1927,6 +1926,15 @@ fetch_events: } send_events: + if (fatal_signal_pending(current)) { + /* + * Always short-circuit for fatal signals to allow + * threads to make a timely exit without the chance of + * finding more events available and fetching + * repeatedly. + */ + res = -EINTR; + } /* * Try to transfer events to user space. In case we get 0 events and * there's still timeout left over, we go trying again in search of -- cgit v1.2.3-59-g8ed1b From 475f4dfc021c5fde69f3b7d3287bde0a50477b05 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 13 May 2020 17:50:41 -0700 Subject: mm/gup: fix fixup_user_fault() on multiple retries This part was overlooked when reworking the gup code on multiple retries. When we get the 2nd+ retry, we'll be with TRIED flag set. Current code will bail out on the 2nd retry because the !TRIED check will fail so the retry logic will be skipped. What's worse is that, it will also return zero which errornously hints the caller that the page is faulted in while it's not. The !TRIED flag check seems to not be needed even before the mutliple retries change because if we get a VM_FAULT_RETRY, it must be the 1st retry, and we should not have TRIED set for that. Fix it by removing the !TRIED check, at the meantime check against fatal signals properly before the page fault so we can still properly respond to the user killing the process during retries. Fixes: 4426e945df58 ("mm/gup: allow VM_FAULT_RETRY for multiple times") Reported-by: Randy Dunlap Signed-off-by: Peter Xu Signed-off-by: Andrew Morton Cc: Alex Williamson Cc: Brian Geffon Link: http://lkml.kernel.org/r/20200502003523.8204-1-peterx@redhat.com Signed-off-by: Linus Torvalds --- mm/gup.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 50681f0286de..87a6a59fe667 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1218,6 +1218,10 @@ retry: if (!vma_permits_fault(vma, fault_flags)) return -EFAULT; + if ((fault_flags & FAULT_FLAG_KILLABLE) && + fatal_signal_pending(current)) + return -EINTR; + ret = handle_mm_fault(vma, address, fault_flags); major |= ret & VM_FAULT_MAJOR; if (ret & VM_FAULT_ERROR) { @@ -1230,11 +1234,9 @@ retry: if (ret & VM_FAULT_RETRY) { down_read(&mm->mmap_sem); - if (!(fault_flags & FAULT_FLAG_TRIED)) { - *unlocked = true; - fault_flags |= FAULT_FLAG_TRIED; - goto retry; - } + *unlocked = true; + fault_flags |= FAULT_FLAG_TRIED; + goto retry; } if (tsk) { -- cgit v1.2.3-59-g8ed1b From d1564926066115fb47ca06b2b9d23ed506ca9608 Mon Sep 17 00:00:00 2001 From: Brian Geffon Date: Wed, 13 May 2020 17:50:44 -0700 Subject: userfaultfd: fix remap event with MREMAP_DONTUNMAP A user is not required to set a new address when using MREMAP_DONTUNMAP as it can be used without MREMAP_FIXED. When doing so the remap event will use new_addr which may not have been set and we didn't propagate it back other then in the return value of remap_to. Because ret is always the new address it's probably more correct to use it rather than new_addr on the remap_event_complete call, and it resolves this bug. Fixes: e346b3813067d4b ("mm/mremap: add MREMAP_DONTUNMAP to mremap()") Reported-by: Randy Dunlap Signed-off-by: Brian Geffon Signed-off-by: Andrew Morton Cc: Lokesh Gidra Cc: Minchan Kim Cc: Kirill A. Shutemov Cc: Vlastimil Babka Cc: "Michael S . Tsirkin" Cc: Andrea Arcangeli Cc: Sonny Rao Cc: Joel Fernandes Link: http://lkml.kernel.org/r/20200506172158.218366-1-bgeffon@google.com Signed-off-by: Linus Torvalds --- mm/mremap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mremap.c b/mm/mremap.c index c881abeba0bf..6aa6ea605068 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -794,7 +794,7 @@ out: if (locked && new_len > old_len) mm_populate(new_addr + old_len, new_len - old_len); userfaultfd_unmap_complete(mm, &uf_unmap_early); - mremap_userfaultfd_complete(&uf, addr, new_addr, old_len); + mremap_userfaultfd_complete(&uf, addr, ret, old_len); userfaultfd_unmap_complete(mm, &uf_unmap); return ret; } -- cgit v1.2.3-59-g8ed1b From 5e698222c70257d13ae0816720dde57c56f81e15 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 13 May 2020 17:50:48 -0700 Subject: ipc/util.c: sysvipc_find_ipc() incorrectly updates position index Commit 89163f93c6f9 ("ipc/util.c: sysvipc_find_ipc() should increase position index") is causing this bug (seen on 5.6.8): # ipcs -q ------ Message Queues -------- key msqid owner perms used-bytes messages # ipcmk -Q Message queue id: 0 # ipcs -q ------ Message Queues -------- key msqid owner perms used-bytes messages 0x82db8127 0 root 644 0 0 # ipcmk -Q Message queue id: 1 # ipcs -q ------ Message Queues -------- key msqid owner perms used-bytes messages 0x82db8127 0 root 644 0 0 0x76d1fb2a 1 root 644 0 0 # ipcrm -q 0 # ipcs -q ------ Message Queues -------- key msqid owner perms used-bytes messages 0x76d1fb2a 1 root 644 0 0 0x76d1fb2a 1 root 644 0 0 # ipcmk -Q Message queue id: 2 # ipcrm -q 2 # ipcs -q ------ Message Queues -------- key msqid owner perms used-bytes messages 0x76d1fb2a 1 root 644 0 0 0x76d1fb2a 1 root 644 0 0 # ipcmk -Q Message queue id: 3 # ipcrm -q 1 # ipcs -q ------ Message Queues -------- key msqid owner perms used-bytes messages 0x7c982867 3 root 644 0 0 0x7c982867 3 root 644 0 0 0x7c982867 3 root 644 0 0 0x7c982867 3 root 644 0 0 Whenever an IPC item with a low id is deleted, the items with higher ids are duplicated, as if filling a hole. new_pos should jump through hole of unused ids, pos can be updated inside "for" cycle. Fixes: 89163f93c6f9 ("ipc/util.c: sysvipc_find_ipc() should increase position index") Reported-by: Andreas Schwab Reported-by: Randy Dunlap Signed-off-by: Vasily Averin Signed-off-by: Andrew Morton Acked-by: Waiman Long Cc: NeilBrown Cc: Steven Rostedt Cc: Ingo Molnar Cc: Peter Oberparleiter Cc: Davidlohr Bueso Cc: Manfred Spraul Cc: Link: http://lkml.kernel.org/r/4921fe9b-9385-a2b4-1dc4-1099be6d2e39@virtuozzo.com Signed-off-by: Linus Torvalds --- ipc/util.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ipc/util.c b/ipc/util.c index 7acccfded7cb..cfa0045e748d 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -764,21 +764,21 @@ static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos, total++; } - *new_pos = pos + 1; + ipc = NULL; if (total >= ids->in_use) - return NULL; + goto out; for (; pos < ipc_mni; pos++) { ipc = idr_find(&ids->ipcs_idr, pos); if (ipc != NULL) { rcu_read_lock(); ipc_lock_object(ipc); - return ipc; + break; } } - - /* Out of range - return NULL to terminate iteration */ - return NULL; +out: + *new_pos = pos + 1; + return ipc; } static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos) -- cgit v1.2.3-59-g8ed1b From 8a16c09edc58982d56c49ab577fdcdf830fbc3a5 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 13 May 2020 17:50:51 -0700 Subject: kasan: consistently disable debugging features KASAN is incompatible with some kernel debugging/tracing features. There's been multiple patches that disable those feature for some of KASAN files one by one. Instead of prolonging that, disable these features for all KASAN files at once. Reported-by: Randy Dunlap Signed-off-by: Andrey Konovalov Signed-off-by: Andrew Morton Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Leon Romanovsky Link: http://lkml.kernel.org/r/29bd753d5ff5596425905b0b07f51153e2345cc1.1589297433.git.andreyknvl@google.com Signed-off-by: Linus Torvalds --- mm/kasan/Makefile | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile index 08b43de2383b..434d503a6525 100644 --- a/mm/kasan/Makefile +++ b/mm/kasan/Makefile @@ -1,23 +1,28 @@ # SPDX-License-Identifier: GPL-2.0 KASAN_SANITIZE := n -UBSAN_SANITIZE_common.o := n -UBSAN_SANITIZE_generic.o := n -UBSAN_SANITIZE_generic_report.o := n -UBSAN_SANITIZE_tags.o := n +UBSAN_SANITIZE := n KCOV_INSTRUMENT := n +# Disable ftrace to avoid recursion. CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_generic.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_generic_report.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_quarantine.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_report.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_tags.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_tags_report.o = $(CC_FLAGS_FTRACE) # Function splitter causes unnecessary splits in __asan_load1/__asan_store1 # see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533 - CFLAGS_common.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) CFLAGS_generic.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) CFLAGS_generic_report.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) +CFLAGS_init.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) +CFLAGS_quarantine.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) +CFLAGS_report.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) CFLAGS_tags.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) +CFLAGS_tags_report.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) obj-$(CONFIG_KASAN) := common.o init.o report.o obj-$(CONFIG_KASAN_GENERIC) += generic.o generic_report.o quarantine.o -- cgit v1.2.3-59-g8ed1b From 13cf04880235cd70d408d70426ada31408ded2ee Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 13 May 2020 17:50:54 -0700 Subject: kasan: add missing functions declarations to kasan.h KASAN is currently missing declarations for __asan_report* and __hwasan* functions. This can lead to compiler warnings. Reported-by: Leon Romanovsky Reported-by: Randy Dunlap Signed-off-by: Andrey Konovalov Signed-off-by: Andrew Morton Tested-by: Leon Romanovsky Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Link: http://lkml.kernel.org/r/45b445a76a79208918f0cc44bfabebaea909b54d.1589297433.git.andreyknvl@google.com Signed-off-by: Linus Torvalds --- mm/kasan/kasan.h | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index e8f37199d885..cfade6413528 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -212,8 +212,6 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) asmlinkage void kasan_unpoison_task_stack_below(const void *watermark); void __asan_register_globals(struct kasan_global *globals, size_t size); void __asan_unregister_globals(struct kasan_global *globals, size_t size); -void __asan_loadN(unsigned long addr, size_t size); -void __asan_storeN(unsigned long addr, size_t size); void __asan_handle_no_return(void); void __asan_alloca_poison(unsigned long addr, size_t size); void __asan_allocas_unpoison(const void *stack_top, const void *stack_bottom); @@ -228,6 +226,8 @@ void __asan_load8(unsigned long addr); void __asan_store8(unsigned long addr); void __asan_load16(unsigned long addr); void __asan_store16(unsigned long addr); +void __asan_loadN(unsigned long addr, size_t size); +void __asan_storeN(unsigned long addr, size_t size); void __asan_load1_noabort(unsigned long addr); void __asan_store1_noabort(unsigned long addr); @@ -239,6 +239,21 @@ void __asan_load8_noabort(unsigned long addr); void __asan_store8_noabort(unsigned long addr); void __asan_load16_noabort(unsigned long addr); void __asan_store16_noabort(unsigned long addr); +void __asan_loadN_noabort(unsigned long addr, size_t size); +void __asan_storeN_noabort(unsigned long addr, size_t size); + +void __asan_report_load1_noabort(unsigned long addr); +void __asan_report_store1_noabort(unsigned long addr); +void __asan_report_load2_noabort(unsigned long addr); +void __asan_report_store2_noabort(unsigned long addr); +void __asan_report_load4_noabort(unsigned long addr); +void __asan_report_store4_noabort(unsigned long addr); +void __asan_report_load8_noabort(unsigned long addr); +void __asan_report_store8_noabort(unsigned long addr); +void __asan_report_load16_noabort(unsigned long addr); +void __asan_report_store16_noabort(unsigned long addr); +void __asan_report_load_n_noabort(unsigned long addr, size_t size); +void __asan_report_store_n_noabort(unsigned long addr, size_t size); void __asan_set_shadow_00(const void *addr, size_t size); void __asan_set_shadow_f1(const void *addr, size_t size); @@ -247,4 +262,19 @@ void __asan_set_shadow_f3(const void *addr, size_t size); void __asan_set_shadow_f5(const void *addr, size_t size); void __asan_set_shadow_f8(const void *addr, size_t size); +void __hwasan_load1_noabort(unsigned long addr); +void __hwasan_store1_noabort(unsigned long addr); +void __hwasan_load2_noabort(unsigned long addr); +void __hwasan_store2_noabort(unsigned long addr); +void __hwasan_load4_noabort(unsigned long addr); +void __hwasan_store4_noabort(unsigned long addr); +void __hwasan_load8_noabort(unsigned long addr); +void __hwasan_store8_noabort(unsigned long addr); +void __hwasan_load16_noabort(unsigned long addr); +void __hwasan_store16_noabort(unsigned long addr); +void __hwasan_loadN_noabort(unsigned long addr, size_t size); +void __hwasan_storeN_noabort(unsigned long addr, size_t size); + +void __hwasan_tag_memory(unsigned long addr, u8 tag, unsigned long size); + #endif -- cgit v1.2.3-59-g8ed1b From 23ad04669f81f958e9a4121b0266228d2eb3c357 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Mon, 11 May 2020 13:32:34 +0200 Subject: samples: bpf: Fix build error GCC 10 is very strict about symbol clash, and lwt_len_hist_user contains a symbol which clashes with libbpf: /usr/bin/ld: samples/bpf/lwt_len_hist_user.o:(.bss+0x0): multiple definition of `bpf_log_buf'; samples/bpf/bpf_load.o:(.bss+0x8c0): first defined here collect2: error: ld returned 1 exit status bpf_log_buf here seems to be a leftover, so removing it. Signed-off-by: Matteo Croce Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200511113234.80722-1-mcroce@redhat.com --- samples/bpf/lwt_len_hist_user.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/samples/bpf/lwt_len_hist_user.c b/samples/bpf/lwt_len_hist_user.c index 587b68b1f8dd..430a4b7e353e 100644 --- a/samples/bpf/lwt_len_hist_user.c +++ b/samples/bpf/lwt_len_hist_user.c @@ -15,8 +15,6 @@ #define MAX_INDEX 64 #define MAX_STARS 38 -char bpf_log_buf[BPF_LOG_BUF_SIZE]; - static void stars(char *str, long val, long max, int width) { int i; -- cgit v1.2.3-59-g8ed1b From 333291ce5055f2039afc907badaf5b66bc1adfdc Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 12 May 2020 16:59:25 -0700 Subject: bpf: Fix bug in mmap() implementation for BPF array map mmap() subsystem allows user-space application to memory-map region with initial page offset. This wasn't taken into account in initial implementation of BPF array memory-mapping. This would result in wrong pages, not taking into account requested page shift, being memory-mmaped into user-space. This patch fixes this gap and adds a test for such scenario. Fixes: fc9702273e2e ("bpf: Add mmap() support for BPF_MAP_TYPE_ARRAY") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200512235925.3817805-1-andriin@fb.com --- kernel/bpf/arraymap.c | 7 ++++++- tools/testing/selftests/bpf/prog_tests/mmap.c | 8 ++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 95d77770353c..1d6120fd5ba6 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -486,7 +486,12 @@ static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) if (!(map->map_flags & BPF_F_MMAPABLE)) return -EINVAL; - return remap_vmalloc_range(vma, array_map_vmalloc_addr(array), pgoff); + if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) > + PAGE_ALIGN((u64)array->map.max_entries * array->elem_size)) + return -EINVAL; + + return remap_vmalloc_range(vma, array_map_vmalloc_addr(array), + vma->vm_pgoff + pgoff); } const struct bpf_map_ops array_map_ops = { diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c index 56d80adcf4bd..6b9dce431d41 100644 --- a/tools/testing/selftests/bpf/prog_tests/mmap.c +++ b/tools/testing/selftests/bpf/prog_tests/mmap.c @@ -217,6 +217,14 @@ void test_mmap(void) munmap(tmp2, 4 * page_size); + /* map all 4 pages, but with pg_off=1 page, should fail */ + tmp1 = mmap(NULL, 4 * page_size, PROT_READ, MAP_SHARED | MAP_FIXED, + data_map_fd, page_size /* initial page shift */); + if (CHECK(tmp1 != MAP_FAILED, "adv_mmap7", "unexpected success")) { + munmap(tmp1, 4 * page_size); + goto cleanup; + } + tmp1 = mmap(NULL, map_sz, PROT_READ, MAP_SHARED, data_map_fd, 0); if (CHECK(tmp1 == MAP_FAILED, "last_mmap", "failed %d\n", errno)) goto cleanup; -- cgit v1.2.3-59-g8ed1b From 516d8d497c017a6820019c76acbb7912a24ec57b Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Wed, 13 May 2020 17:44:14 +0200 Subject: libbpf: Fix register naming in PT_REGS s390 macros Fix register naming in PT_REGS s390 macros Fixes: b8ebce86ffe6 ("libbpf: Provide CO-RE variants of PT_REGS macros") Signed-off-by: Sumanth Korikkar Signed-off-by: Alexei Starovoitov Reviewed-by: Julian Wiedmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200513154414.29972-1-sumanthk@linux.ibm.com --- tools/lib/bpf/bpf_tracing.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index f3f3c3fb98cb..48a9c7c69ef1 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -148,11 +148,11 @@ struct pt_regs; #define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[4]) #define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[5]) #define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[6]) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), grps[14]) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[14]) #define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[11]) #define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2]) #define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[15]) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), pdw.addr) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), psw.addr) #elif defined(bpf_target_arm) -- cgit v1.2.3-59-g8ed1b From 625236ba3832ae947cb3ebb7acc1f30788b274ef Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Tue, 12 May 2020 19:46:07 +0200 Subject: security: Fix the default value of secid_to_secctx hook security_secid_to_secctx is called by the bpf_lsm hook and a successful return value (i.e 0) implies that the parameter will be consumed by the LSM framework. The current behaviour return success when the pointer isn't initialized when CONFIG_BPF_LSM is enabled, with the default return from kernel/bpf/bpf_lsm.c. This is the internal error: [ 1229.341488][ T2659] usercopy: Kernel memory exposure attempt detected from null address (offset 0, size 280)! [ 1229.374977][ T2659] ------------[ cut here ]------------ [ 1229.376813][ T2659] kernel BUG at mm/usercopy.c:99! [ 1229.378398][ T2659] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [ 1229.380348][ T2659] Modules linked in: [ 1229.381654][ T2659] CPU: 0 PID: 2659 Comm: systemd-journal Tainted: G B W 5.7.0-rc5-next-20200511-00019-g864e0c6319b8-dirty #13 [ 1229.385429][ T2659] Hardware name: linux,dummy-virt (DT) [ 1229.387143][ T2659] pstate: 80400005 (Nzcv daif +PAN -UAO BTYPE=--) [ 1229.389165][ T2659] pc : usercopy_abort+0xc8/0xcc [ 1229.390705][ T2659] lr : usercopy_abort+0xc8/0xcc [ 1229.392225][ T2659] sp : ffff000064247450 [ 1229.393533][ T2659] x29: ffff000064247460 x28: 0000000000000000 [ 1229.395449][ T2659] x27: 0000000000000118 x26: 0000000000000000 [ 1229.397384][ T2659] x25: ffffa000127049e0 x24: ffffa000127049e0 [ 1229.399306][ T2659] x23: ffffa000127048e0 x22: ffffa000127048a0 [ 1229.401241][ T2659] x21: ffffa00012704b80 x20: ffffa000127049e0 [ 1229.403163][ T2659] x19: ffffa00012704820 x18: 0000000000000000 [ 1229.405094][ T2659] x17: 0000000000000000 x16: 0000000000000000 [ 1229.407008][ T2659] x15: 0000000000000000 x14: 003d090000000000 [ 1229.408942][ T2659] x13: ffff80000d5b25b2 x12: 1fffe0000d5b25b1 [ 1229.410859][ T2659] x11: 1fffe0000d5b25b1 x10: ffff80000d5b25b1 [ 1229.412791][ T2659] x9 : ffffa0001034bee0 x8 : ffff00006ad92d8f [ 1229.414707][ T2659] x7 : 0000000000000000 x6 : ffffa00015eacb20 [ 1229.416642][ T2659] x5 : ffff0000693c8040 x4 : 0000000000000000 [ 1229.418558][ T2659] x3 : ffffa0001034befc x2 : d57a7483a01c6300 [ 1229.420610][ T2659] x1 : 0000000000000000 x0 : 0000000000000059 [ 1229.422526][ T2659] Call trace: [ 1229.423631][ T2659] usercopy_abort+0xc8/0xcc [ 1229.425091][ T2659] __check_object_size+0xdc/0x7d4 [ 1229.426729][ T2659] put_cmsg+0xa30/0xa90 [ 1229.428132][ T2659] unix_dgram_recvmsg+0x80c/0x930 [ 1229.429731][ T2659] sock_recvmsg+0x9c/0xc0 [ 1229.431123][ T2659] ____sys_recvmsg+0x1cc/0x5f8 [ 1229.432663][ T2659] ___sys_recvmsg+0x100/0x160 [ 1229.434151][ T2659] __sys_recvmsg+0x110/0x1a8 [ 1229.435623][ T2659] __arm64_sys_recvmsg+0x58/0x70 [ 1229.437218][ T2659] el0_svc_common.constprop.1+0x29c/0x340 [ 1229.438994][ T2659] do_el0_svc+0xe8/0x108 [ 1229.440587][ T2659] el0_svc+0x74/0x88 [ 1229.441917][ T2659] el0_sync_handler+0xe4/0x8b4 [ 1229.443464][ T2659] el0_sync+0x17c/0x180 [ 1229.444920][ T2659] Code: aa1703e2 aa1603e1 910a8260 97ecc860 (d4210000) [ 1229.447070][ T2659] ---[ end trace 400497d91baeaf51 ]--- [ 1229.448791][ T2659] Kernel panic - not syncing: Fatal exception [ 1229.450692][ T2659] Kernel Offset: disabled [ 1229.452061][ T2659] CPU features: 0x240002,20002004 [ 1229.453647][ T2659] Memory Limit: none [ 1229.455015][ T2659] ---[ end Kernel panic - not syncing: Fatal exception ]--- Rework the so the default return value is -EOPNOTSUPP. There are likely other callbacks such as security_inode_getsecctx() that may have the same problem, and that someone that understand the code better needs to audit them. Thank you Arnd for helping me figure out what went wrong. Fixes: 98e828a0650f ("security: Refactor declaration of LSM hooks") Signed-off-by: Anders Roxell Signed-off-by: Alexei Starovoitov Acked-by: James Morris Cc: Arnd Bergmann Link: https://lore.kernel.org/bpf/20200512174607.9630-1-anders.roxell@linaro.org --- include/linux/lsm_hook_defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 9cd4455528e5..21f4fff9e4cd 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -243,7 +243,7 @@ LSM_HOOK(int, -EINVAL, getprocattr, struct task_struct *p, char *name, char **value) LSM_HOOK(int, -EINVAL, setprocattr, const char *name, void *value, size_t size) LSM_HOOK(int, 0, ismaclabel, const char *name) -LSM_HOOK(int, 0, secid_to_secctx, u32 secid, char **secdata, +LSM_HOOK(int, -EOPNOTSUPP, secid_to_secctx, u32 secid, char **secdata, u32 *seclen) LSM_HOOK(int, 0, secctx_to_secid, const char *secdata, u32 seclen, u32 *secid) LSM_HOOK(void, LSM_RET_VOID, release_secctx, char *secdata, u32 seclen) -- cgit v1.2.3-59-g8ed1b From fd4a5177382230d39e0d95632d98103fb2938383 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Thu, 14 May 2020 11:58:36 +0530 Subject: net: stmmac: fix num_por initialization Driver missed initializing num_por which is one of the por values that driver configures to hardware. In order to get these values, add a new structure ethqos_emac_driver_data which holds por and num_por values and populate that in driver probe. Fixes: a7c30e62d4b8 ("net: stmmac: Add driver for Qualcomm ethqos") Reported-by: Rahul Ankushrao Kawadgave Signed-off-by: Vinod Koul Reviewed-by: Amit Kucheria Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index e0a5fe83d8e0..bfc4a92f1d92 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -75,6 +75,11 @@ struct ethqos_emac_por { unsigned int value; }; +struct ethqos_emac_driver_data { + const struct ethqos_emac_por *por; + unsigned int num_por; +}; + struct qcom_ethqos { struct platform_device *pdev; void __iomem *rgmii_base; @@ -171,6 +176,11 @@ static const struct ethqos_emac_por emac_v2_3_0_por[] = { { .offset = RGMII_IO_MACRO_CONFIG2, .value = 0x00002060 }, }; +static const struct ethqos_emac_driver_data emac_v2_3_0_data = { + .por = emac_v2_3_0_por, + .num_por = ARRAY_SIZE(emac_v2_3_0_por), +}; + static int ethqos_dll_configure(struct qcom_ethqos *ethqos) { unsigned int val; @@ -442,6 +452,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node; struct plat_stmmacenet_data *plat_dat; struct stmmac_resources stmmac_res; + const struct ethqos_emac_driver_data *data; struct qcom_ethqos *ethqos; struct resource *res; int ret; @@ -471,7 +482,9 @@ static int qcom_ethqos_probe(struct platform_device *pdev) goto err_mem; } - ethqos->por = of_device_get_match_data(&pdev->dev); + data = of_device_get_match_data(&pdev->dev); + ethqos->por = data->por; + ethqos->num_por = data->num_por; ethqos->rgmii_clk = devm_clk_get(&pdev->dev, "rgmii"); if (IS_ERR(ethqos->rgmii_clk)) { @@ -526,7 +539,7 @@ static int qcom_ethqos_remove(struct platform_device *pdev) } static const struct of_device_id qcom_ethqos_match[] = { - { .compatible = "qcom,qcs404-ethqos", .data = &emac_v2_3_0_por}, + { .compatible = "qcom,qcs404-ethqos", .data = &emac_v2_3_0_data}, { } }; MODULE_DEVICE_TABLE(of, qcom_ethqos_match); -- cgit v1.2.3-59-g8ed1b From e92888c72fbdc6f9d07b3b0604c012e81d7c0da7 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 13 May 2020 22:32:05 -0700 Subject: bpf: Enforce returning 0 for fentry/fexit progs Currently, tracing/fentry and tracing/fexit prog return values are not enforced. In trampoline codes, the fentry/fexit prog return values are ignored. Let us enforce it to be 0 to avoid confusion and allows potential future extension. This patch also explicitly added return value checking for tracing/raw_tp, tracing/fmod_ret, and freplace programs such that these program return values can be anything. The purpose are two folds: 1. to make it explicit about return value expectations for these programs in verifier. 2. for tracing prog_type, if a future attach type is added, the default is -ENOTSUPP which will enforce to specify return value ranges explicitly. Fixes: fec56f5890d9 ("bpf: Introduce BPF trampoline") Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200514053206.1298415-1-yhs@fb.com --- kernel/bpf/verifier.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index fa1d8245b925..a44ba6672688 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7059,6 +7059,23 @@ static int check_return_code(struct bpf_verifier_env *env) return 0; range = tnum_const(0); break; + case BPF_PROG_TYPE_TRACING: + switch (env->prog->expected_attach_type) { + case BPF_TRACE_FENTRY: + case BPF_TRACE_FEXIT: + range = tnum_const(0); + break; + case BPF_TRACE_RAW_TP: + case BPF_MODIFY_RETURN: + return 0; + default: + return -ENOTSUPP; + } + break; + case BPF_PROG_TYPE_EXT: + /* freplace program can return anything as its return value + * depends on the to-be-replaced kernel func or bpf program. + */ default: return 0; } -- cgit v1.2.3-59-g8ed1b From 6d74f64b922b8394dccc52576659cb0dc0a1da7b Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 13 May 2020 22:32:07 -0700 Subject: selftests/bpf: Enforce returning 0 for fentry/fexit programs There are a few fentry/fexit programs returning non-0. The tests with these programs will break with the previous patch which enfoced return-0 rules. Fix them properly. Fixes: ac065870d928 ("selftests/bpf: Add BPF_PROG, BPF_KPROBE, and BPF_KRETPROBE macros") Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200514053207.1298479-1-yhs@fb.com --- tools/testing/selftests/bpf/progs/test_overhead.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c index 56a50b25cd33..abb7344b531f 100644 --- a/tools/testing/selftests/bpf/progs/test_overhead.c +++ b/tools/testing/selftests/bpf/progs/test_overhead.c @@ -30,13 +30,13 @@ int prog3(struct bpf_raw_tracepoint_args *ctx) SEC("fentry/__set_task_comm") int BPF_PROG(prog4, struct task_struct *tsk, const char *buf, bool exec) { - return !tsk; + return 0; } SEC("fexit/__set_task_comm") int BPF_PROG(prog5, struct task_struct *tsk, const char *buf, bool exec) { - return !tsk; + return 0; } char _license[] SEC("license") = "GPL"; -- cgit v1.2.3-59-g8ed1b From b8c158395119be62294da73646a3953c29ac974b Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 14 May 2020 12:15:39 +0200 Subject: pppoe: only process PADT targeted at local interfaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't want to disconnect a session because of a stray PADT arriving while the interface is in promiscuous mode. Furthermore, multicast and broadcast packets make no sense here, so only PACKET_HOST is accepted. Reported-by: David Balažic Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- drivers/net/ppp/pppoe.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index d760a36db28c..beedaad08255 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -490,6 +490,9 @@ static int pppoe_disc_rcv(struct sk_buff *skb, struct net_device *dev, if (!skb) goto out; + if (skb->pkt_type != PACKET_HOST) + goto abort; + if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr))) goto abort; -- cgit v1.2.3-59-g8ed1b From 16bb1b505c3c6ec6c0ec5943cef67b23f228979a Mon Sep 17 00:00:00 2001 From: Wang Wenhu Date: Thu, 14 May 2020 04:02:22 -0700 Subject: drivers: ipa: fix typos for ipa_smp2p structure doc Remove the duplicate "mutex", and change "Motex" to "Mutex". Also I recommend it's easier for understanding to make the "ready-interrupt" a bundle for it is a parallel description as "shutdown" which is appended after the slash. Signed-off-by: Wang Wenhu Cc: Alex Elder Signed-off-by: David S. Miller --- drivers/net/ipa/ipa_smp2p.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipa/ipa_smp2p.c b/drivers/net/ipa/ipa_smp2p.c index 4d33aa7ebfbb..a5f7a79a1923 100644 --- a/drivers/net/ipa/ipa_smp2p.c +++ b/drivers/net/ipa/ipa_smp2p.c @@ -53,7 +53,7 @@ * @clock_on: Whether IPA clock is on * @notified: Whether modem has been notified of clock state * @disabled: Whether setup ready interrupt handling is disabled - * @mutex mutex: Motex protecting ready interrupt/shutdown interlock + * @mutex: Mutex protecting ready-interrupt/shutdown interlock * @panic_notifier: Panic notifier structure */ struct ipa_smp2p { -- cgit v1.2.3-59-g8ed1b From 865e525db666767eb7bbcfc7d4ce7326b2b19271 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 14 May 2020 13:45:12 +0200 Subject: MAINTAINERS: another add of Karsten Graul for S390 networking Complete adding of Karsten as maintainer for all S390 networking parts in the kernel. Cc: Julian Wiedmann Acked-by: Julian Wiedmann Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 85894787825e..391e7eea6a3e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14655,6 +14655,7 @@ F: net/iucv/ S390 NETWORK DRIVERS M: Julian Wiedmann +M: Karsten Graul M: Ursula Braun L: linux-s390@vger.kernel.org S: Supported -- cgit v1.2.3-59-g8ed1b From c9e2053d4b1c634064bd3160689fe4ed52978d31 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 14 May 2020 13:13:07 -0700 Subject: MAINTAINERS: Add Jakub to networking drivers. Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 391e7eea6a3e..4b270dbdf09b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11718,6 +11718,7 @@ F: net/core/drop_monitor.c NETWORKING DRIVERS M: "David S. Miller" +M: Jakub Kicinski L: netdev@vger.kernel.org S: Odd Fixes W: http://www.linuxfoundation.org/en/Net -- cgit v1.2.3-59-g8ed1b From e776af608f692a7a647455106295fa34469e7475 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 May 2020 13:58:13 -0700 Subject: tcp: fix error recovery in tcp_zerocopy_receive() If user provides wrong virtual address in TCP_ZEROCOPY_RECEIVE operation we want to return -EINVAL error. But depending on zc->recv_skip_hint content, we might return -EIO error if the socket has SOCK_DONE set. Make sure to return -EINVAL in this case. BUG: KMSAN: uninit-value in tcp_zerocopy_receive net/ipv4/tcp.c:1833 [inline] BUG: KMSAN: uninit-value in do_tcp_getsockopt+0x4494/0x6320 net/ipv4/tcp.c:3685 CPU: 1 PID: 625 Comm: syz-executor.0 Not tainted 5.7.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x220 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:121 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215 tcp_zerocopy_receive net/ipv4/tcp.c:1833 [inline] do_tcp_getsockopt+0x4494/0x6320 net/ipv4/tcp.c:3685 tcp_getsockopt+0xf8/0x1f0 net/ipv4/tcp.c:3728 sock_common_getsockopt+0x13f/0x180 net/core/sock.c:3131 __sys_getsockopt+0x533/0x7b0 net/socket.c:2177 __do_sys_getsockopt net/socket.c:2192 [inline] __se_sys_getsockopt+0xe1/0x100 net/socket.c:2189 __x64_sys_getsockopt+0x62/0x80 net/socket.c:2189 do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:297 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45c829 Code: 0d b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 db b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f1deeb72c78 EFLAGS: 00000246 ORIG_RAX: 0000000000000037 RAX: ffffffffffffffda RBX: 00000000004e01e0 RCX: 000000000045c829 RDX: 0000000000000023 RSI: 0000000000000006 RDI: 0000000000000009 RBP: 000000000078bf00 R08: 0000000020000200 R09: 0000000000000000 R10: 00000000200001c0 R11: 0000000000000246 R12: 00000000ffffffff R13: 00000000000001d8 R14: 00000000004d3038 R15: 00007f1deeb736d4 Local variable ----zc@do_tcp_getsockopt created at: do_tcp_getsockopt+0x1a74/0x6320 net/ipv4/tcp.c:3670 do_tcp_getsockopt+0x1a74/0x6320 net/ipv4/tcp.c:3670 Fixes: 05255b823a61 ("tcp: add TCP_ZEROCOPY_RECEIVE support for zerocopy receive") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a385fcaaa03b..dd401757eea1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1764,10 +1764,11 @@ static int tcp_zerocopy_receive(struct sock *sk, down_read(¤t->mm->mmap_sem); - ret = -EINVAL; vma = find_vma(current->mm, address); - if (!vma || vma->vm_start > address || vma->vm_ops != &tcp_vm_ops) - goto out; + if (!vma || vma->vm_start > address || vma->vm_ops != &tcp_vm_ops) { + up_read(¤t->mm->mmap_sem); + return -EINVAL; + } zc->length = min_t(unsigned long, zc->length, vma->vm_end - address); tp = tcp_sk(sk); -- cgit v1.2.3-59-g8ed1b From cc8a677a76f419016b5e231207d09b073f9b1d3f Mon Sep 17 00:00:00 2001 From: Kevin Lo Date: Thu, 14 May 2020 08:57:33 +0800 Subject: net: phy: broadcom: fix BCM54XX_SHD_SCR3_TRDDAPD value for BCM54810 Set the correct bit when checking for PHY_BRCM_DIS_TXCRXC_NOENRGY on the BCM54810 PHY. Fixes: 0ececcfc9267 ("net: phy: broadcom: Allow BCM54810 to use bcm54xx_adjust_rxrefclk()") Signed-off-by: Kevin Lo Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 8 ++++++-- include/linux/brcmphy.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index ae4873f2f86e..d14d91b759b7 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -225,8 +225,12 @@ static void bcm54xx_adjust_rxrefclk(struct phy_device *phydev) else val |= BCM54XX_SHD_SCR3_DLLAPD_DIS; - if (phydev->dev_flags & PHY_BRCM_DIS_TXCRXC_NOENRGY) - val |= BCM54XX_SHD_SCR3_TRDDAPD; + if (phydev->dev_flags & PHY_BRCM_DIS_TXCRXC_NOENRGY) { + if (BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54810) + val |= BCM54810_SHD_SCR3_TRDDAPD; + else + val |= BCM54XX_SHD_SCR3_TRDDAPD; + } if (orig != val) bcm_phy_write_shadow(phydev, BCM54XX_SHD_SCR3, val); diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 6462c5447872..f4b77018c625 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -245,6 +245,7 @@ #define BCM54810_EXP_BROADREACH_LRE_MISC_CTL_EN (1 << 0) #define BCM54810_SHD_CLK_CTL 0x3 #define BCM54810_SHD_CLK_CTL_GTXCLK_EN (1 << 9) +#define BCM54810_SHD_SCR3_TRDDAPD 0x0100 /* BCM54612E Registers */ #define BCM54612E_EXP_SPARE0 (MII_BCM54XX_EXP_SEL_ETC + 0x34) -- cgit v1.2.3-59-g8ed1b From 95f59bf88bb75281cc626e283ecefdd5d5641427 Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Thu, 14 May 2020 19:41:15 +0530 Subject: drivers: net: hamradio: Fix suspicious RCU usage warning in bpqether.c This patch fixes the following warning: ============================= WARNING: suspicious RCU usage 5.7.0-rc5-next-20200514-syzkaller #0 Not tainted ----------------------------- drivers/net/hamradio/bpqether.c:149 RCU-list traversed in non-reader section!! Since rtnl lock is held, pass this cond in list_for_each_entry_rcu(). Reported-by: syzbot+bb82cafc737c002d11ca@syzkaller.appspotmail.com Signed-off-by: Madhuparna Bhowmik Signed-off-by: David S. Miller --- drivers/net/hamradio/bpqether.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index fbea6f232819..e2ad3c2e8df5 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -127,7 +127,8 @@ static inline struct net_device *bpq_get_ax25_dev(struct net_device *dev) { struct bpqdev *bpq; - list_for_each_entry_rcu(bpq, &bpq_devices, bpq_list) { + list_for_each_entry_rcu(bpq, &bpq_devices, bpq_list, + lockdep_rtnl_is_held()) { if (bpq->ethdev == dev) return bpq->axdev; } -- cgit v1.2.3-59-g8ed1b From a14fbcd4f157a5a97b6013289205559d246c5021 Mon Sep 17 00:00:00 2001 From: Amol Grover Date: Thu, 14 May 2020 23:31:02 +0530 Subject: ipmr: Fix RCU list debugging warning ipmr_for_each_table() macro uses list_for_each_entry_rcu() for traversing outside of an RCU read side critical section but under the protection of rtnl_mutex. Hence, add the corresponding lockdep expression to silence the following false-positive warning at boot: [ 4.319347] ============================= [ 4.319349] WARNING: suspicious RCU usage [ 4.319351] 5.5.4-stable #17 Tainted: G E [ 4.319352] ----------------------------- [ 4.319354] net/ipv4/ipmr.c:1757 RCU-list traversed in non-reader section!! Fixes: f0ad0860d01e ("ipv4: ipmr: support multiple tables") Signed-off-by: Amol Grover Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9cf83cc85e4a..4897f7420c8f 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -110,7 +110,8 @@ static void ipmr_expire_process(struct timer_list *t); #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES #define ipmr_for_each_table(mrt, net) \ - list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list) + list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list, \ + lockdep_rtnl_is_held()) static struct mr_table *ipmr_mr_table_iter(struct net *net, struct mr_table *mrt) -- cgit v1.2.3-59-g8ed1b From 7013908c2db285cd6b48fcd427a56354beac2233 Mon Sep 17 00:00:00 2001 From: Amol Grover Date: Thu, 14 May 2020 23:31:03 +0530 Subject: ipmr: Add lockdep expression to ipmr_for_each_table macro During the initialization process, ipmr_new_table() is called to create new tables which in turn calls ipmr_get_table() which traverses net->ipv4.mr_tables without holding the writer lock. However, this is safe to do so as no tables exist at this time. Hence add a suitable lockdep expression to silence the following false-positive warning: ============================= WARNING: suspicious RCU usage 5.7.0-rc3-next-20200428-syzkaller #0 Not tainted ----------------------------- net/ipv4/ipmr.c:136 RCU-list traversed in non-reader section!! ipmr_get_table+0x130/0x160 net/ipv4/ipmr.c:136 ipmr_new_table net/ipv4/ipmr.c:403 [inline] ipmr_rules_init net/ipv4/ipmr.c:248 [inline] ipmr_net_init+0x133/0x430 net/ipv4/ipmr.c:3089 Fixes: f0ad0860d01e ("ipv4: ipmr: support multiple tables") Reported-by: syzbot+1519f497f2f9f08183c6@syzkaller.appspotmail.com Suggested-by: Jakub Kicinski Signed-off-by: Amol Grover Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 4897f7420c8f..5c218db2dede 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -109,9 +109,10 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags); static void ipmr_expire_process(struct timer_list *t); #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES -#define ipmr_for_each_table(mrt, net) \ - list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list, \ - lockdep_rtnl_is_held()) +#define ipmr_for_each_table(mrt, net) \ + list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list, \ + lockdep_rtnl_is_held() || \ + list_empty(&net->ipv4.mr_tables)) static struct mr_table *ipmr_mr_table_iter(struct net *net, struct mr_table *mrt) -- cgit v1.2.3-59-g8ed1b From 207b584d0ab87e492412fcd69030e34873c7ed5e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 14 May 2020 18:04:41 -0700 Subject: MAINTAINERS: Mark networking drivers as Maintained. Suggested-by: Andrew Lunn Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 4b270dbdf09b..2c59cc557f2b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11720,7 +11720,7 @@ NETWORKING DRIVERS M: "David S. Miller" M: Jakub Kicinski L: netdev@vger.kernel.org -S: Odd Fixes +S: Maintained W: http://www.linuxfoundation.org/en/Net Q: http://patchwork.ozlabs.org/project/netdev/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git -- cgit v1.2.3-59-g8ed1b From 0ebeea8ca8a4d1d453ad299aef0507dab04f6e8d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 15 May 2020 12:11:16 +0200 Subject: bpf: Restrict bpf_probe_read{, str}() only to archs where they work Given the legacy bpf_probe_read{,str}() BPF helpers are broken on archs with overlapping address ranges, we should really take the next step to disable them from BPF use there. To generally fix the situation, we've recently added new helper variants bpf_probe_read_{user,kernel}() and bpf_probe_read_{user,kernel}_str(). For details on them, see 6ae08ae3dea2 ("bpf: Add probe_read_{user, kernel} and probe_read_{user,kernel}_str helpers"). Given bpf_probe_read{,str}() have been around for ~5 years by now, there are plenty of users at least on x86 still relying on them today, so we cannot remove them entirely w/o breaking the BPF tracing ecosystem. However, their use should be restricted to archs with non-overlapping address ranges where they are working in their current form. Therefore, move this behind a CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE and have x86, arm64, arm select it (other archs supporting it can follow-up on it as well). For the remaining archs, they can workaround easily by relying on the feature probe from bpftool which spills out defines that can be used out of BPF C code to implement the drop-in replacement for old/new kernels via: bpftool feature probe macro Suggested-by: Linus Torvalds Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Reviewed-by: Masami Hiramatsu Acked-by: Linus Torvalds Cc: Brendan Gregg Cc: Christoph Hellwig Link: https://lore.kernel.org/bpf/20200515101118.6508-2-daniel@iogearbox.net --- arch/arm/Kconfig | 1 + arch/arm64/Kconfig | 1 + arch/x86/Kconfig | 1 + init/Kconfig | 3 +++ kernel/trace/bpf_trace.c | 6 ++++-- 5 files changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 66a04f6f4775..c77c93c485a0 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -12,6 +12,7 @@ config ARM select ARCH_HAS_KEEPINITRD select ARCH_HAS_KCOV select ARCH_HAS_MEMBARRIER_SYNC_CORE + select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PTE_SPECIAL if ARM_LPAE select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_SETUP_DMA_OPS diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 40fb05d96c60..5d513f461957 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -20,6 +20,7 @@ config ARM64 select ARCH_HAS_KCOV select ARCH_HAS_KEEPINITRD select ARCH_HAS_MEMBARRIER_SYNC_CORE + select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PTE_DEVMAP select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SETUP_DMA_OPS diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1197b5596d5a..2d3f963fd6f1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -68,6 +68,7 @@ config X86 select ARCH_HAS_KCOV if X86_64 select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_MEMBARRIER_SYNC_CORE + select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_PTE_DEVMAP if X86_64 select ARCH_HAS_PTE_SPECIAL diff --git a/init/Kconfig b/init/Kconfig index 9e22ee8fbd75..6fd13a051342 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -2279,6 +2279,9 @@ config ASN1 source "kernel/Kconfig.locks" +config ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE + bool + config ARCH_HAS_SYNC_CORE_BEFORE_USERMODE bool diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index ca1796747a77..b83bdaa31c7b 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -825,14 +825,16 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_probe_read_user_proto; case BPF_FUNC_probe_read_kernel: return &bpf_probe_read_kernel_proto; - case BPF_FUNC_probe_read: - return &bpf_probe_read_compat_proto; case BPF_FUNC_probe_read_user_str: return &bpf_probe_read_user_str_proto; case BPF_FUNC_probe_read_kernel_str: return &bpf_probe_read_kernel_str_proto; +#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE + case BPF_FUNC_probe_read: + return &bpf_probe_read_compat_proto; case BPF_FUNC_probe_read_str: return &bpf_probe_read_compat_str_proto; +#endif #ifdef CONFIG_CGROUPS case BPF_FUNC_get_current_cgroup_id: return &bpf_get_current_cgroup_id_proto; -- cgit v1.2.3-59-g8ed1b From 47cc0ed574abcbbde0cf143ddb21a0baed1aa2df Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 15 May 2020 12:11:17 +0200 Subject: bpf: Add bpf_probe_read_{user, kernel}_str() to do_refine_retval_range Given bpf_probe_read{,str}() BPF helpers are now only available under CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE, we need to add the drop-in replacements of bpf_probe_read_{kernel,user}_str() to do_refine_retval_range() as well to avoid hitting the same issue as in 849fa50662fbc ("bpf/verifier: refine retval R0 state for bpf_get_stack helper"). Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200515101118.6508-3-daniel@iogearbox.net --- kernel/bpf/verifier.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a44ba6672688..8d7ee40e2748 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4340,7 +4340,9 @@ static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, if (ret_type != RET_INTEGER || (func_id != BPF_FUNC_get_stack && - func_id != BPF_FUNC_probe_read_str)) + func_id != BPF_FUNC_probe_read_str && + func_id != BPF_FUNC_probe_read_kernel_str && + func_id != BPF_FUNC_probe_read_user_str)) return; ret_reg->smax_value = meta->msize_max_value; -- cgit v1.2.3-59-g8ed1b From b2a5212fb634561bb734c6356904e37f6665b955 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 15 May 2020 12:11:18 +0200 Subject: bpf: Restrict bpf_trace_printk()'s %s usage and add %pks, %pus specifier Usage of plain %s conversion specifier in bpf_trace_printk() suffers from the very same issue as bpf_probe_read{,str}() helpers, that is, it is broken on archs with overlapping address ranges. While the helpers have been addressed through work in 6ae08ae3dea2 ("bpf: Add probe_read_{user, kernel} and probe_read_{user, kernel}_str helpers"), we need an option for bpf_trace_printk() as well to fix it. Similarly as with the helpers, force users to make an explicit choice by adding %pks and %pus specifier to bpf_trace_printk() which will then pick the corresponding strncpy_from_unsafe*() variant to perform the access under KERNEL_DS or USER_DS. The %pk* (kernel specifier) and %pu* (user specifier) can later also be extended for other objects aside strings that are probed and printed under tracing, and reused out of other facilities like bpf_seq_printf() or BTF based type printing. Existing behavior of %s for current users is still kept working for archs where it is not broken and therefore gated through CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE. For archs not having this property we fall-back to pick probing under KERNEL_DS as a sensible default. Fixes: 8d3b7dce8622 ("bpf: add support for %s specifier to bpf_trace_printk()") Reported-by: Linus Torvalds Reported-by: Christoph Hellwig Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Cc: Masami Hiramatsu Cc: Brendan Gregg Link: https://lore.kernel.org/bpf/20200515101118.6508-4-daniel@iogearbox.net --- Documentation/core-api/printk-formats.rst | 14 +++++ kernel/trace/bpf_trace.c | 94 ++++++++++++++++++++----------- lib/vsprintf.c | 12 ++++ 3 files changed, 88 insertions(+), 32 deletions(-) diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst index 8ebe46b1af39..5dfcc4592b23 100644 --- a/Documentation/core-api/printk-formats.rst +++ b/Documentation/core-api/printk-formats.rst @@ -112,6 +112,20 @@ used when printing stack backtraces. The specifier takes into consideration the effect of compiler optimisations which may occur when tail-calls are used and marked with the noreturn GCC attribute. +Probed Pointers from BPF / tracing +---------------------------------- + +:: + + %pks kernel string + %pus user string + +The ``k`` and ``u`` specifiers are used for printing prior probed memory from +either kernel memory (k) or user memory (u). The subsequent ``s`` specifier +results in printing a string. For direct use in regular vsnprintf() the (k) +and (u) annotation is ignored, however, when used out of BPF's bpf_trace_printk(), +for example, it reads the memory it is pointing to without faulting. + Kernel Pointers --------------- diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b83bdaa31c7b..a010edc37ee0 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -323,17 +323,15 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void) /* * Only limited trace_printk() conversion specifiers allowed: - * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s + * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pks %pus %s */ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, u64, arg2, u64, arg3) { + int i, mod[3] = {}, fmt_cnt = 0; + char buf[64], fmt_ptype; + void *unsafe_ptr = NULL; bool str_seen = false; - int mod[3] = {}; - int fmt_cnt = 0; - u64 unsafe_addr; - char buf[64]; - int i; /* * bpf_check()->check_func_arg()->check_stack_boundary() @@ -359,40 +357,71 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, if (fmt[i] == 'l') { mod[fmt_cnt]++; i++; - } else if (fmt[i] == 'p' || fmt[i] == 's') { + } else if (fmt[i] == 'p') { mod[fmt_cnt]++; + if ((fmt[i + 1] == 'k' || + fmt[i + 1] == 'u') && + fmt[i + 2] == 's') { + fmt_ptype = fmt[i + 1]; + i += 2; + goto fmt_str; + } + /* disallow any further format extensions */ if (fmt[i + 1] != 0 && !isspace(fmt[i + 1]) && !ispunct(fmt[i + 1])) return -EINVAL; - fmt_cnt++; - if (fmt[i] == 's') { - if (str_seen) - /* allow only one '%s' per fmt string */ - return -EINVAL; - str_seen = true; - - switch (fmt_cnt) { - case 1: - unsafe_addr = arg1; - arg1 = (long) buf; - break; - case 2: - unsafe_addr = arg2; - arg2 = (long) buf; - break; - case 3: - unsafe_addr = arg3; - arg3 = (long) buf; - break; - } - buf[0] = 0; - strncpy_from_unsafe(buf, - (void *) (long) unsafe_addr, + + goto fmt_next; + } else if (fmt[i] == 's') { + mod[fmt_cnt]++; + fmt_ptype = fmt[i]; +fmt_str: + if (str_seen) + /* allow only one '%s' per fmt string */ + return -EINVAL; + str_seen = true; + + if (fmt[i + 1] != 0 && + !isspace(fmt[i + 1]) && + !ispunct(fmt[i + 1])) + return -EINVAL; + + switch (fmt_cnt) { + case 0: + unsafe_ptr = (void *)(long)arg1; + arg1 = (long)buf; + break; + case 1: + unsafe_ptr = (void *)(long)arg2; + arg2 = (long)buf; + break; + case 2: + unsafe_ptr = (void *)(long)arg3; + arg3 = (long)buf; + break; + } + + buf[0] = 0; + switch (fmt_ptype) { + case 's': +#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE + strncpy_from_unsafe(buf, unsafe_ptr, sizeof(buf)); + break; +#endif + case 'k': + strncpy_from_unsafe_strict(buf, unsafe_ptr, + sizeof(buf)); + break; + case 'u': + strncpy_from_unsafe_user(buf, + (__force void __user *)unsafe_ptr, + sizeof(buf)); + break; } - continue; + goto fmt_next; } if (fmt[i] == 'l') { @@ -403,6 +432,7 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x') return -EINVAL; +fmt_next: fmt_cnt++; } diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 7c488a1ce318..532b6606a18a 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2168,6 +2168,10 @@ char *fwnode_string(char *buf, char *end, struct fwnode_handle *fwnode, * f full name * P node name, including a possible unit address * - 'x' For printing the address. Equivalent to "%lx". + * - '[ku]s' For a BPF/tracing related format specifier, e.g. used out of + * bpf_trace_printk() where [ku] prefix specifies either kernel (k) + * or user (u) memory to probe, and: + * s a string, equivalent to "%s" on direct vsnprintf() use * * ** When making changes please also update: * Documentation/core-api/printk-formats.rst @@ -2251,6 +2255,14 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, if (!IS_ERR(ptr)) break; return err_ptr(buf, end, ptr, spec); + case 'u': + case 'k': + switch (fmt[1]) { + case 's': + return string(buf, end, ptr, spec); + default: + return error_string(buf, end, "(einval)", spec); + } } /* default is to _not_ leak addresses, hash before printing */ -- cgit v1.2.3-59-g8ed1b From efa6a7d07523ffbbf6503c1a7eeb52201c15c0e3 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Fri, 15 May 2020 15:30:22 +0300 Subject: dpaa2-eth: properly handle buffer size restrictions Depending on the WRIOP version, the buffer size on the RX path must by a multiple of 64 or 256. Handle this restriction properly by aligning down the buffer size to the necessary value. Also, use the new buffer size dynamically computed instead of the compile time one. Fixes: 27c874867c4e ("dpaa2-eth: Use a single page per Rx buffer") Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 29 ++++++++++++++---------- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h | 1 + 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index b6c46639aa4c..d97c320a2dc0 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -86,7 +86,7 @@ static void free_rx_fd(struct dpaa2_eth_priv *priv, for (i = 1; i < DPAA2_ETH_MAX_SG_ENTRIES; i++) { addr = dpaa2_sg_get_addr(&sgt[i]); sg_vaddr = dpaa2_iova_to_virt(priv->iommu_domain, addr); - dma_unmap_page(dev, addr, DPAA2_ETH_RX_BUF_SIZE, + dma_unmap_page(dev, addr, priv->rx_buf_size, DMA_BIDIRECTIONAL); free_pages((unsigned long)sg_vaddr, 0); @@ -144,7 +144,7 @@ static struct sk_buff *build_frag_skb(struct dpaa2_eth_priv *priv, /* Get the address and length from the S/G entry */ sg_addr = dpaa2_sg_get_addr(sge); sg_vaddr = dpaa2_iova_to_virt(priv->iommu_domain, sg_addr); - dma_unmap_page(dev, sg_addr, DPAA2_ETH_RX_BUF_SIZE, + dma_unmap_page(dev, sg_addr, priv->rx_buf_size, DMA_BIDIRECTIONAL); sg_length = dpaa2_sg_get_len(sge); @@ -185,7 +185,7 @@ static struct sk_buff *build_frag_skb(struct dpaa2_eth_priv *priv, (page_address(page) - page_address(head_page)); skb_add_rx_frag(skb, i - 1, head_page, page_offset, - sg_length, DPAA2_ETH_RX_BUF_SIZE); + sg_length, priv->rx_buf_size); } if (dpaa2_sg_is_final(sge)) @@ -211,7 +211,7 @@ static void free_bufs(struct dpaa2_eth_priv *priv, u64 *buf_array, int count) for (i = 0; i < count; i++) { vaddr = dpaa2_iova_to_virt(priv->iommu_domain, buf_array[i]); - dma_unmap_page(dev, buf_array[i], DPAA2_ETH_RX_BUF_SIZE, + dma_unmap_page(dev, buf_array[i], priv->rx_buf_size, DMA_BIDIRECTIONAL); free_pages((unsigned long)vaddr, 0); } @@ -335,7 +335,7 @@ static u32 run_xdp(struct dpaa2_eth_priv *priv, break; case XDP_REDIRECT: dma_unmap_page(priv->net_dev->dev.parent, addr, - DPAA2_ETH_RX_BUF_SIZE, DMA_BIDIRECTIONAL); + priv->rx_buf_size, DMA_BIDIRECTIONAL); ch->buf_count--; xdp.data_hard_start = vaddr; err = xdp_do_redirect(priv->net_dev, &xdp, xdp_prog); @@ -374,7 +374,7 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv, trace_dpaa2_rx_fd(priv->net_dev, fd); vaddr = dpaa2_iova_to_virt(priv->iommu_domain, addr); - dma_sync_single_for_cpu(dev, addr, DPAA2_ETH_RX_BUF_SIZE, + dma_sync_single_for_cpu(dev, addr, priv->rx_buf_size, DMA_BIDIRECTIONAL); fas = dpaa2_get_fas(vaddr, false); @@ -393,13 +393,13 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv, return; } - dma_unmap_page(dev, addr, DPAA2_ETH_RX_BUF_SIZE, + dma_unmap_page(dev, addr, priv->rx_buf_size, DMA_BIDIRECTIONAL); skb = build_linear_skb(ch, fd, vaddr); } else if (fd_format == dpaa2_fd_sg) { WARN_ON(priv->xdp_prog); - dma_unmap_page(dev, addr, DPAA2_ETH_RX_BUF_SIZE, + dma_unmap_page(dev, addr, priv->rx_buf_size, DMA_BIDIRECTIONAL); skb = build_frag_skb(priv, ch, buf_data); free_pages((unsigned long)vaddr, 0); @@ -974,7 +974,7 @@ static int add_bufs(struct dpaa2_eth_priv *priv, if (!page) goto err_alloc; - addr = dma_map_page(dev, page, 0, DPAA2_ETH_RX_BUF_SIZE, + addr = dma_map_page(dev, page, 0, priv->rx_buf_size, DMA_BIDIRECTIONAL); if (unlikely(dma_mapping_error(dev, addr))) goto err_map; @@ -984,7 +984,7 @@ static int add_bufs(struct dpaa2_eth_priv *priv, /* tracing point */ trace_dpaa2_eth_buf_seed(priv->net_dev, page, DPAA2_ETH_RX_BUF_RAW_SIZE, - addr, DPAA2_ETH_RX_BUF_SIZE, + addr, priv->rx_buf_size, bpid); } @@ -1720,7 +1720,7 @@ static bool xdp_mtu_valid(struct dpaa2_eth_priv *priv, int mtu) int mfl, linear_mfl; mfl = DPAA2_ETH_L2_MAX_FRM(mtu); - linear_mfl = DPAA2_ETH_RX_BUF_SIZE - DPAA2_ETH_RX_HWA_SIZE - + linear_mfl = priv->rx_buf_size - DPAA2_ETH_RX_HWA_SIZE - dpaa2_eth_rx_head_room(priv) - XDP_PACKET_HEADROOM; if (mfl > linear_mfl) { @@ -2462,6 +2462,11 @@ static int set_buffer_layout(struct dpaa2_eth_priv *priv) else rx_buf_align = DPAA2_ETH_RX_BUF_ALIGN; + /* We need to ensure that the buffer size seen by WRIOP is a multiple + * of 64 or 256 bytes depending on the WRIOP version. + */ + priv->rx_buf_size = ALIGN_DOWN(DPAA2_ETH_RX_BUF_SIZE, rx_buf_align); + /* tx buffer */ buf_layout.private_data_size = DPAA2_ETH_SWA_SIZE; buf_layout.pass_timestamp = true; @@ -3126,7 +3131,7 @@ static int bind_dpni(struct dpaa2_eth_priv *priv) pools_params.num_dpbp = 1; pools_params.pools[0].dpbp_id = priv->dpbp_dev->obj_desc.id; pools_params.pools[0].backup_pool = 0; - pools_params.pools[0].buffer_size = DPAA2_ETH_RX_BUF_SIZE; + pools_params.pools[0].buffer_size = priv->rx_buf_size; err = dpni_set_pools(priv->mc_io, 0, priv->mc_token, &pools_params); if (err) { dev_err(dev, "dpni_set_pools() failed\n"); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index 7635db3ef903..13242bf5b427 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -382,6 +382,7 @@ struct dpaa2_eth_priv { u16 tx_data_offset; struct fsl_mc_device *dpbp_dev; + u16 rx_buf_size; u16 bpid; struct iommu_domain *iommu_domain; -- cgit v1.2.3-59-g8ed1b From 9a2dbb59ebd17a91f99a20b0286dfb6445981ecf Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Fri, 15 May 2020 17:54:41 +0200 Subject: selftests: mptcp: pm: rm the right tmp file "$err" is a variable pointing to a temp file. "$out" is not: only used as a local variable in "check()" and representing the output of a command line. Fixes: eedbc685321b (selftests: add PM netlink functional tests) Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/pm_netlink.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 9172746b6cf0..15f4f46ca3a9 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -30,7 +30,7 @@ ret=0 cleanup() { - rm -f $out + rm -f $err ip netns del $ns1 } -- cgit v1.2.3-59-g8ed1b