From e23b257c293ce4bcc8cabb2aa3097b6ed8a8261a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 14 Jan 2016 08:43:38 +0100 Subject: x86/irq: Call chip->irq_set_affinity in proper context setup_ioapic_dest() calls irqchip->irq_set_affinity() completely unprotected. That's wrong in several aspects: - it opens a race window where irq_set_affinity() can be interrupted and the irq chip left in unconsistent state. - it triggers a lockdep splat when we fix the vector race for 4.3+ because vector lock is taken with interrupts enabled. The proper calling convention is irq descriptor lock held and interrupts disabled. Reported-and-tested-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: Joe Lawrence Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1601140919420.3575@nanos Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/io_apic.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index f25321894ad2..fdb0fbfb1197 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2521,6 +2521,7 @@ void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; const struct cpumask *mask; + struct irq_desc *desc; struct irq_data *idata; struct irq_chip *chip; @@ -2536,7 +2537,9 @@ void __init setup_ioapic_dest(void) if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq)) continue; - idata = irq_get_irq_data(irq); + desc = irq_to_desc(irq); + raw_spin_lock_irq(&desc->lock); + idata = irq_desc_get_irq_data(desc); /* * Honour affinities which have been set in early boot @@ -2550,6 +2553,7 @@ void __init setup_ioapic_dest(void) /* Might be lapic_chip for irq 0 */ if (chip->irq_set_affinity) chip->irq_set_affinity(idata, mask, false); + raw_spin_unlock_irq(&desc->lock); } } #endif -- cgit v1.2.3-59-g8ed1b From 111abeba67e0dbdc26537429de9155e4f1d807d8 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 31 Dec 2015 16:30:44 +0000 Subject: x86/irq: Fix a race in x86_vector_free_irqs() There's a race condition between x86_vector_free_irqs() { free_apic_chip_data(irq_data->chip_data); xxxxx //irq_data->chip_data has been freed, but the pointer //hasn't been reset yet irq_domain_reset_irq_data(irq_data); } and smp_irq_move_cleanup_interrupt() { raw_spin_lock(&vector_lock); data = apic_chip_data(irq_desc_get_irq_data(desc)); access data->xxxx // may access freed memory raw_spin_unlock(&desc->lock); } which may cause smp_irq_move_cleanup_interrupt() to access freed memory. Call irq_domain_reset_irq_data(), which clears the pointer with vector lock held. [ tglx: Free memory outside of lock held region. ] Signed-off-by: Jiang Liu Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/1450880014-11741-3-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 908cb37da171..cf1e325b67ee 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -226,10 +226,8 @@ static int assign_irq_vector_policy(int irq, int node, static void clear_irq_vector(int irq, struct apic_chip_data *data) { struct irq_desc *desc; - unsigned long flags; int cpu, vector; - raw_spin_lock_irqsave(&vector_lock, flags); BUG_ON(!data->cfg.vector); vector = data->cfg.vector; @@ -239,10 +237,8 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data) data->cfg.vector = 0; cpumask_clear(data->domain); - if (likely(!data->move_in_progress)) { - raw_spin_unlock_irqrestore(&vector_lock, flags); + if (likely(!data->move_in_progress)) return; - } desc = irq_to_desc(irq); for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) { @@ -255,7 +251,6 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data) } } data->move_in_progress = 0; - raw_spin_unlock_irqrestore(&vector_lock, flags); } void init_irq_alloc_info(struct irq_alloc_info *info, @@ -276,19 +271,24 @@ void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src) static void x86_vector_free_irqs(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs) { + struct apic_chip_data *apic_data; struct irq_data *irq_data; + unsigned long flags; int i; for (i = 0; i < nr_irqs; i++) { irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i); if (irq_data && irq_data->chip_data) { + raw_spin_lock_irqsave(&vector_lock, flags); clear_irq_vector(virq + i, irq_data->chip_data); - free_apic_chip_data(irq_data->chip_data); + apic_data = irq_data->chip_data; + irq_domain_reset_irq_data(irq_data); + raw_spin_unlock_irqrestore(&vector_lock, flags); + free_apic_chip_data(apic_data); #ifdef CONFIG_X86_IO_APIC if (virq + i < nr_legacy_irqs()) legacy_irq_data[virq + i] = NULL; #endif - irq_domain_reset_irq_data(irq_data); } } } -- cgit v1.2.3-59-g8ed1b From 36f34c8c63da3e272fd66f91089228c22d2b6e8b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:45 +0000 Subject: x86/irq: Validate that irq descriptor is still active In fixup_irqs() we unconditionally dereference the irq chip of an irq descriptor. The descriptor might still be valid, but already cleaned up, i.e. the chip removed. Add a check for this condition. Signed-off-by: Thomas Gleixner Cc: Jiang Liu Cc: Joe Lawrence Cc: Jeremiah Mahler Cc: Borislav Petkov Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160106.236423282@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/irq.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index f8062aaf5df9..c0b58dd1ca04 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -470,6 +470,15 @@ void fixup_irqs(void) } chip = irq_data_get_irq_chip(data); + /* + * The interrupt descriptor might have been cleaned up + * already, but it is not yet removed from the radix tree + */ + if (!chip) { + raw_spin_unlock(&desc->lock); + continue; + } + if (!irqd_can_move_in_process_context(data) && chip->irq_mask) chip->irq_mask(data); -- cgit v1.2.3-59-g8ed1b From 8a580f70f6936ec095da217018cdeeb5835c0207 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 31 Dec 2015 16:30:46 +0000 Subject: x86/irq: Do not use apic_chip_data.old_domain as temporary buffer Function __assign_irq_vector() makes use of apic_chip_data.old_domain as a temporary buffer, which is in the way of using apic_chip_data.old_domain for synchronizing the vector cleanup with the vector assignement code. Use a proper temporary cpumask for this. [ tglx: Renamed the mask to searched_cpumask for clarity ] Signed-off-by: Jiang Liu Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/1450880014-11741-1-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index cf1e325b67ee..19082cf56616 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -31,7 +31,7 @@ struct apic_chip_data { struct irq_domain *x86_vector_domain; EXPORT_SYMBOL_GPL(x86_vector_domain); static DEFINE_RAW_SPINLOCK(vector_lock); -static cpumask_var_t vector_cpumask; +static cpumask_var_t vector_cpumask, searched_cpumask; static struct irq_chip lapic_controller; #ifdef CONFIG_X86_IO_APIC static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY]; @@ -126,6 +126,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, /* Only try and allocate irqs on cpus that are present */ err = -ENOSPC; cpumask_clear(d->old_domain); + cpumask_clear(searched_cpumask); cpu = cpumask_first_and(mask, cpu_online_mask); while (cpu < nr_cpu_ids) { int new_cpu, vector, offset; @@ -159,9 +160,9 @@ next: } if (unlikely(current_vector == vector)) { - cpumask_or(d->old_domain, d->old_domain, + cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask); - cpumask_andnot(vector_cpumask, mask, d->old_domain); + cpumask_andnot(vector_cpumask, mask, searched_cpumask); cpu = cpumask_first_and(vector_cpumask, cpu_online_mask); continue; @@ -406,6 +407,7 @@ int __init arch_early_irq_init(void) arch_init_htirq_domain(x86_vector_domain); BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL)); + BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL)); return arch_early_ioapic_init(); } -- cgit v1.2.3-59-g8ed1b From 433cbd57d190a1cdd02f243df41c3d7f55ec4b94 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:46 +0000 Subject: x86/irq: Reorganize the return path in assign_irq_vector Use an explicit goto for the cases where we have success in the search/update and return -ENOSPC if the search loop ends due to no space. Preparatory patch for fixes. No functional change. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160106.403491024@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 19082cf56616..613b1cd8eecb 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -118,13 +118,12 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, */ static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; static int current_offset = VECTOR_OFFSET_START % 16; - int cpu, err; + int cpu; if (d->move_in_progress) return -EBUSY; /* Only try and allocate irqs on cpus that are present */ - err = -ENOSPC; cpumask_clear(d->old_domain); cpumask_clear(searched_cpumask); cpu = cpumask_first_and(mask, cpu_online_mask); @@ -134,9 +133,8 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, apic->vector_allocation_domain(cpu, vector_cpumask, mask); if (cpumask_subset(vector_cpumask, d->domain)) { - err = 0; if (cpumask_equal(vector_cpumask, d->domain)) - break; + goto success; /* * New cpumask using the vector is a proper subset of * the current in use mask. So cleanup the vector @@ -147,7 +145,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, d->move_in_progress = cpumask_intersects(d->old_domain, cpu_online_mask); cpumask_and(d->domain, d->domain, vector_cpumask); - break; + goto success; } vector = current_vector; @@ -187,17 +185,13 @@ next: per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq); d->cfg.vector = vector; cpumask_copy(d->domain, vector_cpumask); - err = 0; - break; + goto success; } + return -ENOSPC; - if (!err) { - /* cache destination APIC IDs into cfg->dest_apicid */ - err = apic->cpu_mask_to_apicid_and(mask, d->domain, - &d->cfg.dest_apicid); - } - - return err; +success: + /* cache destination APIC IDs into cfg->dest_apicid */ + return apic->cpu_mask_to_apicid_and(mask, d->domain, &d->cfg.dest_apicid); } static int assign_irq_vector(int irq, struct apic_chip_data *data, -- cgit v1.2.3-59-g8ed1b From 95ffeb4b5baca266e1d0d2bc90f1513e6f419cdd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:47 +0000 Subject: x86/irq: Reorganize the search in assign_irq_vector Split out the code which advances the target cpu for the search so we can reuse it for the next patch which adds an early validation check for the vectormask which we get from the apic. Add comments while at it. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160106.484562040@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 613b1cd8eecb..cef31955ab18 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -157,14 +157,9 @@ next: vector = FIRST_EXTERNAL_VECTOR + offset; } - if (unlikely(current_vector == vector)) { - cpumask_or(searched_cpumask, searched_cpumask, - vector_cpumask); - cpumask_andnot(vector_cpumask, mask, searched_cpumask); - cpu = cpumask_first_and(vector_cpumask, - cpu_online_mask); - continue; - } + /* If the search wrapped around, try the next cpu */ + if (unlikely(current_vector == vector)) + goto next_cpu; if (test_bit(vector, used_vectors)) goto next; @@ -186,6 +181,19 @@ next: d->cfg.vector = vector; cpumask_copy(d->domain, vector_cpumask); goto success; + +next_cpu: + /* + * We exclude the current @vector_cpumask from the requested + * @mask and try again with the next online cpu in the + * result. We cannot modify @mask, so we use @vector_cpumask + * as a temporary buffer here as it will be reassigned when + * calling apic->vector_allocation_domain() above. + */ + cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask); + cpumask_andnot(vector_cpumask, mask, searched_cpumask); + cpu = cpumask_first_and(vector_cpumask, cpu_online_mask); + continue; } return -ENOSPC; -- cgit v1.2.3-59-g8ed1b From 3716fd27a604d61a91cda47083504971486b80f1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:48 +0000 Subject: x86/irq: Check vector allocation early __assign_irq_vector() uses the vector_cpumask which is assigned by apic->vector_allocation_domain() without doing basic sanity checks. That can result in a situation where the final assignement of a newly found vector fails in apic->cpu_mask_to_apicid_and(). So we have to do rollbacks for no reason. apic->cpu_mask_to_apicid_and() only fails if vector_cpumask & requested_cpumask & cpu_online_mask is empty. Check for this condition right away and if the result is empty try immediately the next possible cpu in the requested mask. So in case of a failure the old setting is unchanged and we can remove the rollback code. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160106.561877324@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index cef31955ab18..940e18d4dbcd 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -31,7 +31,7 @@ struct apic_chip_data { struct irq_domain *x86_vector_domain; EXPORT_SYMBOL_GPL(x86_vector_domain); static DEFINE_RAW_SPINLOCK(vector_lock); -static cpumask_var_t vector_cpumask, searched_cpumask; +static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask; static struct irq_chip lapic_controller; #ifdef CONFIG_X86_IO_APIC static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY]; @@ -130,8 +130,20 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, while (cpu < nr_cpu_ids) { int new_cpu, vector, offset; + /* Get the possible target cpus for @mask/@cpu from the apic */ apic->vector_allocation_domain(cpu, vector_cpumask, mask); + /* + * Clear the offline cpus from @vector_cpumask for searching + * and verify whether the result overlaps with @mask. If true, + * then the call to apic->cpu_mask_to_apicid_and() will + * succeed as well. If not, no point in trying to find a + * vector in this mask. + */ + cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask); + if (!cpumask_intersects(vector_searchmask, mask)) + goto next_cpu; + if (cpumask_subset(vector_cpumask, d->domain)) { if (cpumask_equal(vector_cpumask, d->domain)) goto success; @@ -164,7 +176,7 @@ next: if (test_bit(vector, used_vectors)) goto next; - for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) { + for_each_cpu(new_cpu, vector_searchmask) { if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector])) goto next; } @@ -176,7 +188,7 @@ next: d->move_in_progress = cpumask_intersects(d->old_domain, cpu_online_mask); } - for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) + for_each_cpu(new_cpu, vector_searchmask) per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq); d->cfg.vector = vector; cpumask_copy(d->domain, vector_cpumask); @@ -198,8 +210,14 @@ next_cpu: return -ENOSPC; success: - /* cache destination APIC IDs into cfg->dest_apicid */ - return apic->cpu_mask_to_apicid_and(mask, d->domain, &d->cfg.dest_apicid); + /* + * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail + * as we already established, that mask & d->domain & cpu_online_mask + * is not empty. + */ + BUG_ON(apic->cpu_mask_to_apicid_and(mask, d->domain, + &d->cfg.dest_apicid)); + return 0; } static int assign_irq_vector(int irq, struct apic_chip_data *data, @@ -409,6 +427,7 @@ int __init arch_early_irq_init(void) arch_init_htirq_domain(x86_vector_domain); BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL)); + BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL)); BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL)); return arch_early_ioapic_init(); @@ -498,14 +517,7 @@ static int apic_set_affinity(struct irq_data *irq_data, return -EINVAL; err = assign_irq_vector(irq, data, dest); - if (err) { - if (assign_irq_vector(irq, data, - irq_data_get_affinity_mask(irq_data))) - pr_err("Failed to recover vector for irq %d\n", irq); - return err; - } - - return IRQ_SET_MASK_OK; + return err ? err : IRQ_SET_MASK_OK; } static struct irq_chip lapic_controller = { -- cgit v1.2.3-59-g8ed1b From 9ac15b7a8af4cf3337a101498c0ed690d23ade75 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:49 +0000 Subject: x86/irq: Copy vectormask instead of an AND operation In the case that the new vector mask is a subset of the existing mask there is no point to do a AND operation of currentmask & newmask. The result is newmask. So we can simply copy the new mask to the current mask and be done with it. Preparatory patch for further consolidation. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160106.640253454@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 940e18d4dbcd..1bd29c624531 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -156,7 +156,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, vector_cpumask); d->move_in_progress = cpumask_intersects(d->old_domain, cpu_online_mask); - cpumask_and(d->domain, d->domain, vector_cpumask); + cpumask_copy(d->domain, vector_cpumask); goto success; } -- cgit v1.2.3-59-g8ed1b From ab25ac02148b600e645f77cfb8b8ea415ed75bb4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:49 +0000 Subject: x86/irq: Get rid of code duplication Reusing an existing vector and assigning a new vector has duplicated code. Consolidate it. This is also a preparatory patch for finally plugging the cleanup race. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160106.721599216@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 1bd29c624531..fccfa3f5545c 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -118,7 +118,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, */ static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; static int current_offset = VECTOR_OFFSET_START % 16; - int cpu; + int cpu, vector; if (d->move_in_progress) return -EBUSY; @@ -128,7 +128,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, cpumask_clear(searched_cpumask); cpu = cpumask_first_and(mask, cpu_online_mask); while (cpu < nr_cpu_ids) { - int new_cpu, vector, offset; + int new_cpu, offset; /* Get the possible target cpus for @mask/@cpu from the apic */ apic->vector_allocation_domain(cpu, vector_cpumask, mask); @@ -148,16 +148,12 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, if (cpumask_equal(vector_cpumask, d->domain)) goto success; /* - * New cpumask using the vector is a proper subset of - * the current in use mask. So cleanup the vector - * allocation for the members that are not used anymore. + * Mark the cpus which are not longer in the mask for + * cleanup. */ - cpumask_andnot(d->old_domain, d->domain, - vector_cpumask); - d->move_in_progress = - cpumask_intersects(d->old_domain, cpu_online_mask); - cpumask_copy(d->domain, vector_cpumask); - goto success; + cpumask_andnot(d->old_domain, d->domain, vector_cpumask); + vector = d->cfg.vector; + goto update; } vector = current_vector; @@ -183,16 +179,12 @@ next: /* Found one! */ current_vector = vector; current_offset = offset; - if (d->cfg.vector) { + /* Schedule the old vector for cleanup on all cpus */ + if (d->cfg.vector) cpumask_copy(d->old_domain, d->domain); - d->move_in_progress = - cpumask_intersects(d->old_domain, cpu_online_mask); - } for_each_cpu(new_cpu, vector_searchmask) per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq); - d->cfg.vector = vector; - cpumask_copy(d->domain, vector_cpumask); - goto success; + goto update; next_cpu: /* @@ -209,6 +201,11 @@ next_cpu: } return -ENOSPC; +update: + /* Cleanup required ? */ + d->move_in_progress = cpumask_intersects(d->old_domain, cpu_online_mask); + d->cfg.vector = vector; + cpumask_copy(d->domain, vector_cpumask); success: /* * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail -- cgit v1.2.3-59-g8ed1b From 847667ef10356b824a11c853fc8a8b1b437b6a8d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:50 +0000 Subject: x86/irq: Remove offline cpus from vector cleanup No point of keeping offline cpus in the cleanup mask. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160106.808642683@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index fccfa3f5545c..68d18b338e3a 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -202,8 +202,12 @@ next_cpu: return -ENOSPC; update: - /* Cleanup required ? */ - d->move_in_progress = cpumask_intersects(d->old_domain, cpu_online_mask); + /* + * Exclude offline cpus from the cleanup mask and set the + * move_in_progress flag when the result is not empty. + */ + cpumask_and(d->old_domain, d->old_domain, cpu_online_mask); + d->move_in_progress = !cpumask_empty(d->old_domain); d->cfg.vector = vector; cpumask_copy(d->domain, vector_cpumask); success: -- cgit v1.2.3-59-g8ed1b From c1684f5035b60e9f98566493e869496fb5de1d89 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:51 +0000 Subject: x86/irq: Clear move_in_progress before sending cleanup IPI send_cleanup_vector() fiddles with the old_domain mask unprotected because it relies on the protection by the move_in_progress flag. But this is fatal, as the flag is reset after the IPI has been sent. So a cpu which receives the IPI can still see the flag set and therefor ignores the cleanup request. If no other cleanup request happens then the vector stays stale on that cpu and in case of an irq removal the vector still persists. That can lead to use after free when the next cleanup IPI happens. Protect the code with vector_lock and clear move_in_progress before sending the IPI. This does not plug the race which Joe reported because: CPU0 CPU1 CPU2 lock_vector() data->move_in_progress=0 sendIPI() unlock_vector() set_affinity() assign_irq_vector() lock_vector() handle_IPI move_in_progress = 1 lock_vector() unlock_vector() move_in_progress == 1 The full fix comes with a later patch. Reported-and-tested-by: Joe Lawrence Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160106.892412198@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 68d18b338e3a..ed62f9c3f785 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -532,6 +532,8 @@ static void __send_cleanup_vector(struct apic_chip_data *data) { cpumask_var_t cleanup_mask; + raw_spin_lock(&vector_lock); + data->move_in_progress = 0; if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { unsigned int i; @@ -543,7 +545,7 @@ static void __send_cleanup_vector(struct apic_chip_data *data) apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); free_cpumask_var(cleanup_mask); } - data->move_in_progress = 0; + raw_spin_unlock(&vector_lock); } void send_cleanup_vector(struct irq_cfg *cfg) -- cgit v1.2.3-59-g8ed1b From 5da0c1217f05d2ccc9a8ed6e6e5c23a8a1d24dd6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:52 +0000 Subject: x86/irq: Remove the cpumask allocation from send_cleanup_vector() There is no need to allocate a new cpumask for sending the cleanup vector. The old_domain mask is now protected by the vector_lock, so we can safely remove the offline cpus from it and send the IPI with the resulting mask. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160106.967993932@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index ed62f9c3f785..91dc2742cfb1 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -530,21 +530,11 @@ static struct irq_chip lapic_controller = { #ifdef CONFIG_SMP static void __send_cleanup_vector(struct apic_chip_data *data) { - cpumask_var_t cleanup_mask; - raw_spin_lock(&vector_lock); + cpumask_and(data->old_domain, data->old_domain, cpu_online_mask); data->move_in_progress = 0; - if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { - unsigned int i; - - for_each_cpu_and(i, data->old_domain, cpu_online_mask) - apic->send_IPI_mask(cpumask_of(i), - IRQ_MOVE_CLEANUP_VECTOR); - } else { - cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask); - apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - free_cpumask_var(cleanup_mask); - } + if (!cpumask_empty(data->old_domain)) + apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR); raw_spin_unlock(&vector_lock); } -- cgit v1.2.3-59-g8ed1b From 56d7d2f4bbd00fb198b7907cb3ab657d06115a42 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:52 +0000 Subject: x86/irq: Remove outgoing CPU from vector cleanup mask We want to synchronize new vector assignments with a pending cleanup. Remove a dying cpu from a pending cleanup mask. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160107.045961667@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 91dc2742cfb1..a7fa11e49582 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -633,9 +633,23 @@ void irq_complete_move(struct irq_cfg *cfg) void irq_force_complete_move(int irq) { struct irq_cfg *cfg = irq_cfg(irq); + struct apic_chip_data *data; + + if (!cfg) + return; - if (cfg) - __irq_complete_move(cfg, cfg->vector); + __irq_complete_move(cfg, cfg->vector); + + /* + * Remove this cpu from the cleanup mask. The IPI might have been sent + * just before the cpu was removed from the offline mask, but has not + * been processed because the CPU has interrupts disabled and is on + * the way out. + */ + raw_spin_lock(&vector_lock); + data = container_of(cfg, struct apic_chip_data, cfg); + cpumask_clear_cpu(smp_processor_id(), data->old_domain); + raw_spin_unlock(&vector_lock); } #endif -- cgit v1.2.3-59-g8ed1b From 90a2282e23f0522e4b3f797ad447c5e91bf7fe32 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:53 +0000 Subject: x86/irq: Call irq_force_move_complete with irq descriptor First of all there is no point in looking up the irq descriptor again, but we also need the descriptor for the final cleanup race fix in the next patch. Make that change seperate. No functional difference. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160107.125211743@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/irq.h | 5 +++-- arch/x86/kernel/apic/vector.c | 11 +++++++---- arch/x86/kernel/irq.c | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 881b4768644a..e7de5c9a4fbd 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -23,11 +23,13 @@ extern void irq_ctx_init(int cpu); #define __ARCH_HAS_DO_SOFTIRQ +struct irq_desc; + #ifdef CONFIG_HOTPLUG_CPU #include extern int check_irq_vectors_for_cpu_disable(void); extern void fixup_irqs(void); -extern void irq_force_complete_move(int); +extern void irq_force_complete_move(struct irq_desc *desc); #endif #ifdef CONFIG_HAVE_KVM @@ -37,7 +39,6 @@ extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)); extern void (*x86_platform_ipi_callback)(void); extern void native_init_IRQ(void); -struct irq_desc; extern bool handle_irq(struct irq_desc *desc, struct pt_regs *regs); extern __visible unsigned int do_IRQ(struct pt_regs *regs); diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index a7fa11e49582..5f7883578880 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -630,10 +630,14 @@ void irq_complete_move(struct irq_cfg *cfg) __irq_complete_move(cfg, ~get_irq_regs()->orig_ax); } -void irq_force_complete_move(int irq) +/* + * Called with @desc->lock held and interrupts disabled. + */ +void irq_force_complete_move(struct irq_desc *desc) { - struct irq_cfg *cfg = irq_cfg(irq); - struct apic_chip_data *data; + struct irq_data *irqdata = irq_desc_get_irq_data(desc); + struct apic_chip_data *data = apic_chip_data(irqdata); + struct irq_cfg *cfg = data ? &data->cfg : NULL; if (!cfg) return; @@ -647,7 +651,6 @@ void irq_force_complete_move(int irq) * the way out. */ raw_spin_lock(&vector_lock); - data = container_of(cfg, struct apic_chip_data, cfg); cpumask_clear_cpu(smp_processor_id(), data->old_domain); raw_spin_unlock(&vector_lock); } diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index c0b58dd1ca04..61521dc19c10 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -462,7 +462,7 @@ void fixup_irqs(void) * non intr-remapping case, we can't wait till this interrupt * arrives at this cpu before completing the irq move. */ - irq_force_complete_move(irq); + irq_force_complete_move(desc); if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { break_affinity = 1; -- cgit v1.2.3-59-g8ed1b From 98229aa36caa9c769b13565523de9b813013c703 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:54 +0000 Subject: x86/irq: Plug vector cleanup race We still can end up with a stale vector due to the following: CPU0 CPU1 CPU2 lock_vector() data->move_in_progress=0 sendIPI() unlock_vector() set_affinity() assign_irq_vector() lock_vector() handle_IPI move_in_progress = 1 lock_vector() unlock_vector() move_in_progress == 1 So we need to serialize the vector assignment against a pending cleanup. The solution is rather simple now. We not only check for the move_in_progress flag in assign_irq_vector(), we also check whether there is still a cleanup pending in the old_domain cpumask. If so, we return -EBUSY to the caller and let him deal with it. Though we have to be careful in the cpu unplug case. If the cleanout has not yet completed then the following setaffinity() call would return -EBUSY. Add code which prevents this. Full context is here: http://lkml.kernel.org/r/5653B688.4050809@stratus.com Reported-and-tested-by: Joe Lawrence Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160107.207265407@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 63 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 53 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 5f7883578880..3b670df4ba7b 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -120,7 +120,12 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, static int current_offset = VECTOR_OFFSET_START % 16; int cpu, vector; - if (d->move_in_progress) + /* + * If there is still a move in progress or the previous move has not + * been cleaned up completely, tell the caller to come back later. + */ + if (d->move_in_progress || + cpumask_intersects(d->old_domain, cpu_online_mask)) return -EBUSY; /* Only try and allocate irqs on cpus that are present */ @@ -259,7 +264,12 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data) data->cfg.vector = 0; cpumask_clear(data->domain); - if (likely(!data->move_in_progress)) + /* + * If move is in progress or the old_domain mask is not empty, + * i.e. the cleanup IPI has not been processed yet, we need to remove + * the old references to desc from all cpus vector tables. + */ + if (!data->move_in_progress && cpumask_empty(data->old_domain)) return; desc = irq_to_desc(irq); @@ -579,12 +589,25 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) goto unlock; /* - * Check if the irq migration is in progress. If so, we - * haven't received the cleanup request yet for this irq. + * Nothing to cleanup if irq migration is in progress + * or this cpu is not set in the cleanup mask. */ - if (data->move_in_progress) + if (data->move_in_progress || + !cpumask_test_cpu(me, data->old_domain)) goto unlock; + /* + * We have two cases to handle here: + * 1) vector is unchanged but the target mask got reduced + * 2) vector and the target mask has changed + * + * #1 is obvious, but in #2 we have two vectors with the same + * irq descriptor: the old and the new vector. So we need to + * make sure that we only cleanup the old vector. The new + * vector has the current @vector number in the config and + * this cpu is part of the target mask. We better leave that + * one alone. + */ if (vector == data->cfg.vector && cpumask_test_cpu(me, data->domain)) goto unlock; @@ -602,6 +625,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) goto unlock; } __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); + cpumask_clear_cpu(me, data->old_domain); unlock: raw_spin_unlock(&desc->lock); } @@ -645,13 +669,32 @@ void irq_force_complete_move(struct irq_desc *desc) __irq_complete_move(cfg, cfg->vector); /* - * Remove this cpu from the cleanup mask. The IPI might have been sent - * just before the cpu was removed from the offline mask, but has not - * been processed because the CPU has interrupts disabled and is on - * the way out. + * This is tricky. If the cleanup of @data->old_domain has not been + * done yet, then the following setaffinity call will fail with + * -EBUSY. This can leave the interrupt in a stale state. + * + * The cleanup cannot make progress because we hold @desc->lock. So in + * case @data->old_domain is not yet cleaned up, we need to drop the + * lock and acquire it again. @desc cannot go away, because the + * hotplug code holds the sparse irq lock. */ raw_spin_lock(&vector_lock); - cpumask_clear_cpu(smp_processor_id(), data->old_domain); + /* Clean out all offline cpus (including ourself) first. */ + cpumask_and(data->old_domain, data->old_domain, cpu_online_mask); + while (!cpumask_empty(data->old_domain)) { + raw_spin_unlock(&vector_lock); + raw_spin_unlock(&desc->lock); + cpu_relax(); + raw_spin_lock(&desc->lock); + /* + * Reevaluate apic_chip_data. It might have been cleared after + * we dropped @desc->lock. + */ + data = apic_chip_data(irqdata); + if (!data) + return; + raw_spin_lock(&vector_lock); + } raw_spin_unlock(&vector_lock); } #endif -- cgit v1.2.3-59-g8ed1b From 3fda5bb420e79b357328b358409e4c547d8f0a18 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 15 Jan 2016 22:11:07 +0200 Subject: x86/platform/intel-mid: Enable 64-bit build Intel Tangier SoC is known to have 64-bit dual core CPU. Enable 64-bit build for it. The kernel has been tested on Intel Edison board: Linux buildroot 4.4.0-next-20160115+ #25 SMP Fri Jan 15 22:03:19 EET 2016 x86_64 GNU/Linux processor : 0 vendor_id : GenuineIntel cpu family : 6 model : 74 model name : Genuine Intel(R) CPU 4000 @ 500MHz stepping : 8 Signed-off-by: Andy Shevchenko Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Mika Westerberg Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1452888668-147116-1-git-send-email-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 3 +-- arch/x86/kernel/head64.c | 8 ++++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 258965d56beb..07459a6b417d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -495,11 +495,10 @@ config X86_INTEL_CE config X86_INTEL_MID bool "Intel MID platform support" - depends on X86_32 depends on X86_EXTENDED_PLATFORM depends on X86_PLATFORM_DEVICES depends on PCI - depends on PCI_GOANY + depends on X86_64 || (PCI_GOANY && X86_32) depends on X86_IO_APIC select SFI select I2C diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index f129a9af6357..2c0f3407bd1f 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -192,5 +192,13 @@ void __init x86_64_start_reservations(char *real_mode_data) reserve_ebda_region(); + switch (boot_params.hdr.hardware_subarch) { + case X86_SUBARCH_INTEL_MID: + x86_intel_mid_early_setup(); + break; + default: + break; + } + start_kernel(); } -- cgit v1.2.3-59-g8ed1b From b000de5848441bc4e99c662fe1fd1b854151a84e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 15 Jan 2016 22:11:08 +0200 Subject: x86/platform/intel-mid: Join string and fix SoC name Join string back to make grepping a bit easier. While here, lowering case for Penwell SoC name in one case to be aligned with the rest messages. Signed-off-by: Andy Shevchenko Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Mika Westerberg Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1452888668-147116-2-git-send-email-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/intel-mid/intel-mid.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index 1bbc21e2e4ae..90bb997ed0a2 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -138,7 +138,7 @@ static void intel_mid_arch_setup(void) intel_mid_ops = get_intel_mid_ops[__intel_mid_cpu_chip](); else { intel_mid_ops = get_intel_mid_ops[INTEL_MID_CPU_CHIP_PENWELL](); - pr_info("ARCH: Unknown SoC, assuming PENWELL!\n"); + pr_info("ARCH: Unknown SoC, assuming Penwell!\n"); } out: @@ -214,12 +214,10 @@ static inline int __init setup_x86_intel_mid_timer(char *arg) else if (strcmp("lapic_and_apbt", arg) == 0) intel_mid_timer_options = INTEL_MID_TIMER_LAPIC_APBT; else { - pr_warn("X86 INTEL_MID timer option %s not recognised" - " use x86_intel_mid_timer=apbt_only or lapic_and_apbt\n", - arg); + pr_warn("X86 INTEL_MID timer option %s not recognised use x86_intel_mid_timer=apbt_only or lapic_and_apbt\n", + arg); return -EINVAL; } return 0; } __setup("x86_intel_mid_timer=", setup_x86_intel_mid_timer); - -- cgit v1.2.3-59-g8ed1b From d394f2d9d8e1e7b4959819344baf67b5995da9b0 Mon Sep 17 00:00:00 2001 From: Alex Thorlton Date: Fri, 11 Dec 2015 14:59:45 -0600 Subject: x86/platform/UV: Remove EFI memmap quirk for UV2+ Commit a5d90c923bcf ("x86/efi: Quirk out SGI UV") added a quirk to efi_apply_memmap_quirks to force SGI UV systems to fall back to the old EFI memmap mechanism. We have a BIOS fix for this issue on all systems except for UV1. This commit fixes up the EFI quirk/MMR mapping code so that we only apply the special case to UV1 hardware. Signed-off-by: Alex Thorlton Reviewed-by: Matt Fleming Cc: Dimitri Sivanich Cc: H. Peter Anvin Cc: Hedi Berriche Cc: Len Brown Cc: Linus Torvalds Cc: Mike Travis Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1449867585-189233-2-git-send-email-athorlton@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 5 ++++- arch/x86/platform/efi/quirks.c | 17 +++++++++++++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index d760c6bb37b5..624db00583f4 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -889,7 +889,10 @@ void __init uv_system_init(void) return; } pr_info("UV: Found %s hub\n", hub); - map_low_mmrs(); + + /* We now only need to map the MMRs on UV1 */ + if (is_uv1_hub()) + map_low_mmrs(); m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); m_val = m_n_config.s.m_skt; diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 1c7380da65ff..2d66db8f80f9 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -248,6 +249,16 @@ out: return ret; } +static const struct dmi_system_id sgi_uv1_dmi[] = { + { NULL, "SGI UV1", + { DMI_MATCH(DMI_PRODUCT_NAME, "Stoutland Platform"), + DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"), + DMI_MATCH(DMI_BIOS_VENDOR, "SGI.COM"), + } + }, + { } /* NULL entry stops DMI scanning */ +}; + void __init efi_apply_memmap_quirks(void) { /* @@ -260,10 +271,8 @@ void __init efi_apply_memmap_quirks(void) efi_unmap_memmap(); } - /* - * UV doesn't support the new EFI pagetable mapping yet. - */ - if (is_uv_system()) + /* UV2+ BIOS has a fix for this issue. UV1 still needs the quirk. */ + if (dmi_check_system(sgi_uv1_dmi)) set_bit(EFI_OLD_MEMMAP, &efi.flags); } -- cgit v1.2.3-59-g8ed1b From 22c43f36b5cf22a7e4506cb3a53f3ad2a43f60f7 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 20 Jan 2016 22:13:41 +0200 Subject: x86/platform/quark: Print boundaries correctly When we print values, such as @size, we have to understand that it's derived from [begin .. end] as: size = end - begin + 1 On the opposite the @end is derived from the rest as: end = begin + size - 1 Correct the IMR code to print values correctly. Note that @__end_rodata actually points to the next address after the aligned .rodata section. Signed-off-by: Andy Shevchenko Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Ong, Boon Leong Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1453320821-64328-1-git-send-email-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/intel-quark/imr.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c index 0ee619f9fcb7..ad5ec6f784e8 100644 --- a/arch/x86/platform/intel-quark/imr.c +++ b/arch/x86/platform/intel-quark/imr.c @@ -228,11 +228,12 @@ static int imr_dbgfs_state_show(struct seq_file *s, void *unused) if (imr_is_enabled(&imr)) { base = imr_to_phys(imr.addr_lo); end = imr_to_phys(imr.addr_hi) + IMR_MASK; + size = end - base + 1; } else { base = 0; end = 0; + size = 0; } - size = end - base; seq_printf(s, "imr%02i: base=%pa, end=%pa, size=0x%08zx " "rmask=0x%08x, wmask=0x%08x, %s, %s\n", i, &base, &end, size, imr.rmask, imr.wmask, @@ -587,6 +588,7 @@ static void __init imr_fixup_memmap(struct imr_device *idev) { phys_addr_t base = virt_to_phys(&_text); size_t size = virt_to_phys(&__end_rodata) - base; + unsigned long start, end; int i; int ret; @@ -594,18 +596,24 @@ static void __init imr_fixup_memmap(struct imr_device *idev) for (i = 0; i < idev->max_imr; i++) imr_clear(i); + start = (unsigned long)_text; + end = (unsigned long)__end_rodata - 1; + /* * Setup a locked IMR around the physical extent of the kernel * from the beginning of the .text secton to the end of the * .rodata section as one physically contiguous block. + * + * We don't round up @size since it is already PAGE_SIZE aligned. + * See vmlinux.lds.S for details. */ ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, true); if (ret < 0) { - pr_err("unable to setup IMR for kernel: (%p - %p)\n", - &_text, &__end_rodata); + pr_err("unable to setup IMR for kernel: %zu KiB (%lx - %lx)\n", + size / 1024, start, end); } else { - pr_info("protecting kernel .text - .rodata: %zu KiB (%p - %p)\n", - size / 1024, &_text, &__end_rodata); + pr_info("protecting kernel .text - .rodata: %zu KiB (%lx - %lx)\n", + size / 1024, start, end); } } -- cgit v1.2.3-59-g8ed1b From 3625c2c234ef66acf21a72d47a5ffa94f6c5ebf2 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 26 Jan 2016 04:15:18 -0700 Subject: x86/mm: Fix types used in pgprot cacheability flags translations For PAE kernels "unsigned long" is not suitable to hold page protection flags, since _PAGE_NX doesn't fit there. This is the reason for quite a few W+X pages getting reported as insecure during boot (observed namely for the entire initrd range). Fixes: 281d4078be ("x86: Make page cache mode a real type") Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/56A7635602000078000CAFF1@prv-mh.provo.novell.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/pgtable_types.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index a471cadb9630..79c91853e50e 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -363,20 +363,18 @@ static inline enum page_cache_mode pgprot2cachemode(pgprot_t pgprot) } static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot) { + pgprotval_t val = pgprot_val(pgprot); pgprot_t new; - unsigned long val; - val = pgprot_val(pgprot); pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | ((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT)); return new; } static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot) { + pgprotval_t val = pgprot_val(pgprot); pgprot_t new; - unsigned long val; - val = pgprot_val(pgprot); pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | ((val & _PAGE_PAT_LARGE) >> (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT)); -- cgit v1.2.3-59-g8ed1b From 742563777e8da62197d6cb4b99f4027f59454735 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 29 Jan 2016 11:36:10 +0000 Subject: x86/mm/pat: Avoid truncation when converting cpa->numpages to address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are a couple of nasty truncation bugs lurking in the pageattr code that can be triggered when mapping EFI regions, e.g. when we pass a cpa->pgd pointer. Because cpa->numpages is a 32-bit value, shifting left by PAGE_SHIFT will truncate the resultant address to 32-bits. Viorel-Cătălin managed to trigger this bug on his Dell machine that provides a ~5GB EFI region which requires 1236992 pages to be mapped. When calling populate_pud() the end of the region gets calculated incorrectly in the following buggy expression, end = start + (cpa->numpages << PAGE_SHIFT); And only 188416 pages are mapped. Next, populate_pud() gets invoked for a second time because of the loop in __change_page_attr_set_clr(), only this time no pages get mapped because shifting the remaining number of pages (1048576) by PAGE_SHIFT is zero. At which point the loop in __change_page_attr_set_clr() spins forever because we fail to map progress. Hitting this bug depends very much on the virtual address we pick to map the large region at and how many pages we map on the initial run through the loop. This explains why this issue was only recently hit with the introduction of commit a5caa209ba9c ("x86/efi: Fix boot crash by mapping EFI memmap entries bottom-up at runtime, instead of top-down") It's interesting to note that safe uses of cpa->numpages do exist in the pageattr code. If instead of shifting ->numpages we multiply by PAGE_SIZE, no truncation occurs because PAGE_SIZE is a UL value, and so the result is unsigned long. To avoid surprises when users try to convert very large cpa->numpages values to addresses, change the data type from 'int' to 'unsigned long', thereby making it suitable for shifting by PAGE_SHIFT without any type casting. The alternative would be to make liberal use of casting, but that is far more likely to cause problems in the future when someone adds more code and fails to cast properly; this bug was difficult enough to track down in the first place. Reported-and-tested-by: Viorel-Cătălin Răpițeanu Acked-by: Borislav Petkov Cc: Sai Praneeth Prakhya Cc: Signed-off-by: Matt Fleming Link: https://bugzilla.kernel.org/show_bug.cgi?id=110131 Link: http://lkml.kernel.org/r/1454067370-10374-1-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index fc6a4c8f6e2a..2440814b0069 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -33,7 +33,7 @@ struct cpa_data { pgd_t *pgd; pgprot_t mask_set; pgprot_t mask_clr; - int numpages; + unsigned long numpages; int flags; unsigned long pfn; unsigned force_split : 1; @@ -1350,7 +1350,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) * CPA operation. Either a large page has been * preserved or a single page update happened. */ - BUG_ON(cpa->numpages > numpages); + BUG_ON(cpa->numpages > numpages || !cpa->numpages); numpages -= cpa->numpages; if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) cpa->curpage++; -- cgit v1.2.3-59-g8ed1b