From e23b257c293ce4bcc8cabb2aa3097b6ed8a8261a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 14 Jan 2016 08:43:38 +0100
Subject: x86/irq: Call chip->irq_set_affinity in proper context

setup_ioapic_dest() calls irqchip->irq_set_affinity() completely
unprotected. That's wrong in several aspects:

 - it opens a race window where irq_set_affinity() can be interrupted and the
   irq chip left in unconsistent state.

 - it triggers a lockdep splat when we fix the vector race for 4.3+ because
   vector lock is taken with interrupts enabled.

The proper calling convention is irq descriptor lock held and interrupts
disabled.

Reported-and-tested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Joe Lawrence <joe.lawrence@stratus.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1601140919420.3575@nanos
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/io_apic.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index f25321894ad2..fdb0fbfb1197 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2521,6 +2521,7 @@ void __init setup_ioapic_dest(void)
 {
 	int pin, ioapic, irq, irq_entry;
 	const struct cpumask *mask;
+	struct irq_desc *desc;
 	struct irq_data *idata;
 	struct irq_chip *chip;
 
@@ -2536,7 +2537,9 @@ void __init setup_ioapic_dest(void)
 		if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq))
 			continue;
 
-		idata = irq_get_irq_data(irq);
+		desc = irq_to_desc(irq);
+		raw_spin_lock_irq(&desc->lock);
+		idata = irq_desc_get_irq_data(desc);
 
 		/*
 		 * Honour affinities which have been set in early boot
@@ -2550,6 +2553,7 @@ void __init setup_ioapic_dest(void)
 		/* Might be lapic_chip for irq 0 */
 		if (chip->irq_set_affinity)
 			chip->irq_set_affinity(idata, mask, false);
+		raw_spin_unlock_irq(&desc->lock);
 	}
 }
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 111abeba67e0dbdc26537429de9155e4f1d807d8 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Thu, 31 Dec 2015 16:30:44 +0000
Subject: x86/irq: Fix a race in x86_vector_free_irqs()

There's a race condition between

x86_vector_free_irqs()
{
	free_apic_chip_data(irq_data->chip_data);
	xxxxx	//irq_data->chip_data has been freed, but the pointer
		//hasn't been reset yet
	irq_domain_reset_irq_data(irq_data);
}

and

smp_irq_move_cleanup_interrupt()
{
	raw_spin_lock(&vector_lock);
	data = apic_chip_data(irq_desc_get_irq_data(desc));
	access data->xxxx	// may access freed memory
	raw_spin_unlock(&desc->lock);
}

which may cause smp_irq_move_cleanup_interrupt() to access freed memory.

Call irq_domain_reset_irq_data(), which clears the pointer with vector lock
held.

[ tglx: Free memory outside of lock held region. ]

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/1450880014-11741-3-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 908cb37da171..cf1e325b67ee 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -226,10 +226,8 @@ static int assign_irq_vector_policy(int irq, int node,
 static void clear_irq_vector(int irq, struct apic_chip_data *data)
 {
 	struct irq_desc *desc;
-	unsigned long flags;
 	int cpu, vector;
 
-	raw_spin_lock_irqsave(&vector_lock, flags);
 	BUG_ON(!data->cfg.vector);
 
 	vector = data->cfg.vector;
@@ -239,10 +237,8 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
 	data->cfg.vector = 0;
 	cpumask_clear(data->domain);
 
-	if (likely(!data->move_in_progress)) {
-		raw_spin_unlock_irqrestore(&vector_lock, flags);
+	if (likely(!data->move_in_progress))
 		return;
-	}
 
 	desc = irq_to_desc(irq);
 	for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) {
@@ -255,7 +251,6 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
 		}
 	}
 	data->move_in_progress = 0;
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
 }
 
 void init_irq_alloc_info(struct irq_alloc_info *info,
@@ -276,19 +271,24 @@ void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)
 static void x86_vector_free_irqs(struct irq_domain *domain,
 				 unsigned int virq, unsigned int nr_irqs)
 {
+	struct apic_chip_data *apic_data;
 	struct irq_data *irq_data;
+	unsigned long flags;
 	int i;
 
 	for (i = 0; i < nr_irqs; i++) {
 		irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i);
 		if (irq_data && irq_data->chip_data) {
+			raw_spin_lock_irqsave(&vector_lock, flags);
 			clear_irq_vector(virq + i, irq_data->chip_data);
-			free_apic_chip_data(irq_data->chip_data);
+			apic_data = irq_data->chip_data;
+			irq_domain_reset_irq_data(irq_data);
+			raw_spin_unlock_irqrestore(&vector_lock, flags);
+			free_apic_chip_data(apic_data);
 #ifdef	CONFIG_X86_IO_APIC
 			if (virq + i < nr_legacy_irqs())
 				legacy_irq_data[virq + i] = NULL;
 #endif
-			irq_domain_reset_irq_data(irq_data);
 		}
 	}
 }
-- 
cgit v1.2.3-59-g8ed1b


From 36f34c8c63da3e272fd66f91089228c22d2b6e8b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:45 +0000
Subject: x86/irq: Validate that irq descriptor is still active

In fixup_irqs() we unconditionally dereference the irq chip of an irq
descriptor. The descriptor might still be valid, but already cleaned up,
i.e. the chip removed. Add a check for this condition.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.236423282@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/irq.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index f8062aaf5df9..c0b58dd1ca04 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -470,6 +470,15 @@ void fixup_irqs(void)
 		}
 
 		chip = irq_data_get_irq_chip(data);
+		/*
+		 * The interrupt descriptor might have been cleaned up
+		 * already, but it is not yet removed from the radix tree
+		 */
+		if (!chip) {
+			raw_spin_unlock(&desc->lock);
+			continue;
+		}
+
 		if (!irqd_can_move_in_process_context(data) && chip->irq_mask)
 			chip->irq_mask(data);
 
-- 
cgit v1.2.3-59-g8ed1b


From 8a580f70f6936ec095da217018cdeeb5835c0207 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Thu, 31 Dec 2015 16:30:46 +0000
Subject: x86/irq: Do not use apic_chip_data.old_domain as temporary buffer

Function __assign_irq_vector() makes use of apic_chip_data.old_domain as a
temporary buffer, which is in the way of using apic_chip_data.old_domain for
synchronizing the vector cleanup with the vector assignement code.

Use a proper temporary cpumask for this.

[ tglx: Renamed the mask to searched_cpumask for clarity ]

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/1450880014-11741-1-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index cf1e325b67ee..19082cf56616 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -31,7 +31,7 @@ struct apic_chip_data {
 struct irq_domain *x86_vector_domain;
 EXPORT_SYMBOL_GPL(x86_vector_domain);
 static DEFINE_RAW_SPINLOCK(vector_lock);
-static cpumask_var_t vector_cpumask;
+static cpumask_var_t vector_cpumask, searched_cpumask;
 static struct irq_chip lapic_controller;
 #ifdef	CONFIG_X86_IO_APIC
 static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
@@ -126,6 +126,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	/* Only try and allocate irqs on cpus that are present */
 	err = -ENOSPC;
 	cpumask_clear(d->old_domain);
+	cpumask_clear(searched_cpumask);
 	cpu = cpumask_first_and(mask, cpu_online_mask);
 	while (cpu < nr_cpu_ids) {
 		int new_cpu, vector, offset;
@@ -159,9 +160,9 @@ next:
 		}
 
 		if (unlikely(current_vector == vector)) {
-			cpumask_or(d->old_domain, d->old_domain,
+			cpumask_or(searched_cpumask, searched_cpumask,
 				   vector_cpumask);
-			cpumask_andnot(vector_cpumask, mask, d->old_domain);
+			cpumask_andnot(vector_cpumask, mask, searched_cpumask);
 			cpu = cpumask_first_and(vector_cpumask,
 						cpu_online_mask);
 			continue;
@@ -406,6 +407,7 @@ int __init arch_early_irq_init(void)
 	arch_init_htirq_domain(x86_vector_domain);
 
 	BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
+	BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL));
 
 	return arch_early_ioapic_init();
 }
-- 
cgit v1.2.3-59-g8ed1b


From 433cbd57d190a1cdd02f243df41c3d7f55ec4b94 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:46 +0000
Subject: x86/irq: Reorganize the return path in assign_irq_vector

Use an explicit goto for the cases where we have success in the search/update
and return -ENOSPC if the search loop ends due to no space.

Preparatory patch for fixes. No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.403491024@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 19082cf56616..613b1cd8eecb 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -118,13 +118,12 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	 */
 	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
 	static int current_offset = VECTOR_OFFSET_START % 16;
-	int cpu, err;
+	int cpu;
 
 	if (d->move_in_progress)
 		return -EBUSY;
 
 	/* Only try and allocate irqs on cpus that are present */
-	err = -ENOSPC;
 	cpumask_clear(d->old_domain);
 	cpumask_clear(searched_cpumask);
 	cpu = cpumask_first_and(mask, cpu_online_mask);
@@ -134,9 +133,8 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 		apic->vector_allocation_domain(cpu, vector_cpumask, mask);
 
 		if (cpumask_subset(vector_cpumask, d->domain)) {
-			err = 0;
 			if (cpumask_equal(vector_cpumask, d->domain))
-				break;
+				goto success;
 			/*
 			 * New cpumask using the vector is a proper subset of
 			 * the current in use mask. So cleanup the vector
@@ -147,7 +145,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 			d->move_in_progress =
 			   cpumask_intersects(d->old_domain, cpu_online_mask);
 			cpumask_and(d->domain, d->domain, vector_cpumask);
-			break;
+			goto success;
 		}
 
 		vector = current_vector;
@@ -187,17 +185,13 @@ next:
 			per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
 		d->cfg.vector = vector;
 		cpumask_copy(d->domain, vector_cpumask);
-		err = 0;
-		break;
+		goto success;
 	}
+	return -ENOSPC;
 
-	if (!err) {
-		/* cache destination APIC IDs into cfg->dest_apicid */
-		err = apic->cpu_mask_to_apicid_and(mask, d->domain,
-						   &d->cfg.dest_apicid);
-	}
-
-	return err;
+success:
+	/* cache destination APIC IDs into cfg->dest_apicid */
+	return apic->cpu_mask_to_apicid_and(mask, d->domain, &d->cfg.dest_apicid);
 }
 
 static int assign_irq_vector(int irq, struct apic_chip_data *data,
-- 
cgit v1.2.3-59-g8ed1b


From 95ffeb4b5baca266e1d0d2bc90f1513e6f419cdd Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:47 +0000
Subject: x86/irq: Reorganize the search in assign_irq_vector

Split out the code which advances the target cpu for the search so we can
reuse it for the next patch which adds an early validation check for the
vectormask which we get from the apic.

Add comments while at it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.484562040@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 613b1cd8eecb..cef31955ab18 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -157,14 +157,9 @@ next:
 			vector = FIRST_EXTERNAL_VECTOR + offset;
 		}
 
-		if (unlikely(current_vector == vector)) {
-			cpumask_or(searched_cpumask, searched_cpumask,
-				   vector_cpumask);
-			cpumask_andnot(vector_cpumask, mask, searched_cpumask);
-			cpu = cpumask_first_and(vector_cpumask,
-						cpu_online_mask);
-			continue;
-		}
+		/* If the search wrapped around, try the next cpu */
+		if (unlikely(current_vector == vector))
+			goto next_cpu;
 
 		if (test_bit(vector, used_vectors))
 			goto next;
@@ -186,6 +181,19 @@ next:
 		d->cfg.vector = vector;
 		cpumask_copy(d->domain, vector_cpumask);
 		goto success;
+
+next_cpu:
+		/*
+		 * We exclude the current @vector_cpumask from the requested
+		 * @mask and try again with the next online cpu in the
+		 * result. We cannot modify @mask, so we use @vector_cpumask
+		 * as a temporary buffer here as it will be reassigned when
+		 * calling apic->vector_allocation_domain() above.
+		 */
+		cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask);
+		cpumask_andnot(vector_cpumask, mask, searched_cpumask);
+		cpu = cpumask_first_and(vector_cpumask, cpu_online_mask);
+		continue;
 	}
 	return -ENOSPC;
 
-- 
cgit v1.2.3-59-g8ed1b


From 3716fd27a604d61a91cda47083504971486b80f1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:48 +0000
Subject: x86/irq: Check vector allocation early

__assign_irq_vector() uses the vector_cpumask which is assigned by
apic->vector_allocation_domain() without doing basic sanity checks. That can
result in a situation where the final assignement of a newly found vector
fails in apic->cpu_mask_to_apicid_and(). So we have to do rollbacks for no
reason.

apic->cpu_mask_to_apicid_and() only fails if

  vector_cpumask & requested_cpumask & cpu_online_mask

is empty.

Check for this condition right away and if the result is empty try immediately
the next possible cpu in the requested mask. So in case of a failure the old
setting is unchanged and we can remove the rollback code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.561877324@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 38 +++++++++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index cef31955ab18..940e18d4dbcd 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -31,7 +31,7 @@ struct apic_chip_data {
 struct irq_domain *x86_vector_domain;
 EXPORT_SYMBOL_GPL(x86_vector_domain);
 static DEFINE_RAW_SPINLOCK(vector_lock);
-static cpumask_var_t vector_cpumask, searched_cpumask;
+static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask;
 static struct irq_chip lapic_controller;
 #ifdef	CONFIG_X86_IO_APIC
 static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
@@ -130,8 +130,20 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	while (cpu < nr_cpu_ids) {
 		int new_cpu, vector, offset;
 
+		/* Get the possible target cpus for @mask/@cpu from the apic */
 		apic->vector_allocation_domain(cpu, vector_cpumask, mask);
 
+		/*
+		 * Clear the offline cpus from @vector_cpumask for searching
+		 * and verify whether the result overlaps with @mask. If true,
+		 * then the call to apic->cpu_mask_to_apicid_and() will
+		 * succeed as well. If not, no point in trying to find a
+		 * vector in this mask.
+		 */
+		cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask);
+		if (!cpumask_intersects(vector_searchmask, mask))
+			goto next_cpu;
+
 		if (cpumask_subset(vector_cpumask, d->domain)) {
 			if (cpumask_equal(vector_cpumask, d->domain))
 				goto success;
@@ -164,7 +176,7 @@ next:
 		if (test_bit(vector, used_vectors))
 			goto next;
 
-		for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) {
+		for_each_cpu(new_cpu, vector_searchmask) {
 			if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector]))
 				goto next;
 		}
@@ -176,7 +188,7 @@ next:
 			d->move_in_progress =
 			   cpumask_intersects(d->old_domain, cpu_online_mask);
 		}
-		for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask)
+		for_each_cpu(new_cpu, vector_searchmask)
 			per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
 		d->cfg.vector = vector;
 		cpumask_copy(d->domain, vector_cpumask);
@@ -198,8 +210,14 @@ next_cpu:
 	return -ENOSPC;
 
 success:
-	/* cache destination APIC IDs into cfg->dest_apicid */
-	return apic->cpu_mask_to_apicid_and(mask, d->domain, &d->cfg.dest_apicid);
+	/*
+	 * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail
+	 * as we already established, that mask & d->domain & cpu_online_mask
+	 * is not empty.
+	 */
+	BUG_ON(apic->cpu_mask_to_apicid_and(mask, d->domain,
+					    &d->cfg.dest_apicid));
+	return 0;
 }
 
 static int assign_irq_vector(int irq, struct apic_chip_data *data,
@@ -409,6 +427,7 @@ int __init arch_early_irq_init(void)
 	arch_init_htirq_domain(x86_vector_domain);
 
 	BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
+	BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
 	BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL));
 
 	return arch_early_ioapic_init();
@@ -498,14 +517,7 @@ static int apic_set_affinity(struct irq_data *irq_data,
 		return -EINVAL;
 
 	err = assign_irq_vector(irq, data, dest);
-	if (err) {
-		if (assign_irq_vector(irq, data,
-				      irq_data_get_affinity_mask(irq_data)))
-			pr_err("Failed to recover vector for irq %d\n", irq);
-		return err;
-	}
-
-	return IRQ_SET_MASK_OK;
+	return err ? err : IRQ_SET_MASK_OK;
 }
 
 static struct irq_chip lapic_controller = {
-- 
cgit v1.2.3-59-g8ed1b


From 9ac15b7a8af4cf3337a101498c0ed690d23ade75 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:49 +0000
Subject: x86/irq: Copy vectormask instead of an AND operation

In the case that the new vector mask is a subset of the existing mask there is
no point to do a AND operation of currentmask & newmask. The result is
newmask. So we can simply copy the new mask to the current mask and be done
with it. Preparatory patch for further consolidation.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.640253454@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 940e18d4dbcd..1bd29c624531 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -156,7 +156,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 				       vector_cpumask);
 			d->move_in_progress =
 			   cpumask_intersects(d->old_domain, cpu_online_mask);
-			cpumask_and(d->domain, d->domain, vector_cpumask);
+			cpumask_copy(d->domain, vector_cpumask);
 			goto success;
 		}
 
-- 
cgit v1.2.3-59-g8ed1b


From ab25ac02148b600e645f77cfb8b8ea415ed75bb4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:49 +0000
Subject: x86/irq: Get rid of code duplication

Reusing an existing vector and assigning a new vector has duplicated
code. Consolidate it.

This is also a preparatory patch for finally plugging the cleanup race.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.721599216@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 33 +++++++++++++++------------------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 1bd29c624531..fccfa3f5545c 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -118,7 +118,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	 */
 	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
 	static int current_offset = VECTOR_OFFSET_START % 16;
-	int cpu;
+	int cpu, vector;
 
 	if (d->move_in_progress)
 		return -EBUSY;
@@ -128,7 +128,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	cpumask_clear(searched_cpumask);
 	cpu = cpumask_first_and(mask, cpu_online_mask);
 	while (cpu < nr_cpu_ids) {
-		int new_cpu, vector, offset;
+		int new_cpu, offset;
 
 		/* Get the possible target cpus for @mask/@cpu from the apic */
 		apic->vector_allocation_domain(cpu, vector_cpumask, mask);
@@ -148,16 +148,12 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 			if (cpumask_equal(vector_cpumask, d->domain))
 				goto success;
 			/*
-			 * New cpumask using the vector is a proper subset of
-			 * the current in use mask. So cleanup the vector
-			 * allocation for the members that are not used anymore.
+			 * Mark the cpus which are not longer in the mask for
+			 * cleanup.
 			 */
-			cpumask_andnot(d->old_domain, d->domain,
-				       vector_cpumask);
-			d->move_in_progress =
-			   cpumask_intersects(d->old_domain, cpu_online_mask);
-			cpumask_copy(d->domain, vector_cpumask);
-			goto success;
+			cpumask_andnot(d->old_domain, d->domain, vector_cpumask);
+			vector = d->cfg.vector;
+			goto update;
 		}
 
 		vector = current_vector;
@@ -183,16 +179,12 @@ next:
 		/* Found one! */
 		current_vector = vector;
 		current_offset = offset;
-		if (d->cfg.vector) {
+		/* Schedule the old vector for cleanup on all cpus */
+		if (d->cfg.vector)
 			cpumask_copy(d->old_domain, d->domain);
-			d->move_in_progress =
-			   cpumask_intersects(d->old_domain, cpu_online_mask);
-		}
 		for_each_cpu(new_cpu, vector_searchmask)
 			per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
-		d->cfg.vector = vector;
-		cpumask_copy(d->domain, vector_cpumask);
-		goto success;
+		goto update;
 
 next_cpu:
 		/*
@@ -209,6 +201,11 @@ next_cpu:
 	}
 	return -ENOSPC;
 
+update:
+	/* Cleanup required ? */
+	d->move_in_progress = cpumask_intersects(d->old_domain, cpu_online_mask);
+	d->cfg.vector = vector;
+	cpumask_copy(d->domain, vector_cpumask);
 success:
 	/*
 	 * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail
-- 
cgit v1.2.3-59-g8ed1b


From 847667ef10356b824a11c853fc8a8b1b437b6a8d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:50 +0000
Subject: x86/irq: Remove offline cpus from vector cleanup

No point of keeping offline cpus in the cleanup mask.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.808642683@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index fccfa3f5545c..68d18b338e3a 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -202,8 +202,12 @@ next_cpu:
 	return -ENOSPC;
 
 update:
-	/* Cleanup required ? */
-	d->move_in_progress = cpumask_intersects(d->old_domain, cpu_online_mask);
+	/*
+	 * Exclude offline cpus from the cleanup mask and set the
+	 * move_in_progress flag when the result is not empty.
+	 */
+	cpumask_and(d->old_domain, d->old_domain, cpu_online_mask);
+	d->move_in_progress = !cpumask_empty(d->old_domain);
 	d->cfg.vector = vector;
 	cpumask_copy(d->domain, vector_cpumask);
 success:
-- 
cgit v1.2.3-59-g8ed1b


From c1684f5035b60e9f98566493e869496fb5de1d89 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:51 +0000
Subject: x86/irq: Clear move_in_progress before sending cleanup IPI

send_cleanup_vector() fiddles with the old_domain mask unprotected because it
relies on the protection by the move_in_progress flag. But this is fatal, as
the flag is reset after the IPI has been sent. So a cpu which receives the IPI
can still see the flag set and therefor ignores the cleanup request. If no
other cleanup request happens then the vector stays stale on that cpu and in
case of an irq removal the vector still persists. That can lead to use after
free when the next cleanup IPI happens.

Protect the code with vector_lock and clear move_in_progress before sending
the IPI.

This does not plug the race which Joe reported because:

CPU0                          CPU1                      CPU2
lock_vector()
data->move_in_progress=0
sendIPI()
unlock_vector()
                              set_affinity()
                              assign_irq_vector()
                              lock_vector()             handle_IPI
                              move_in_progress = 1      lock_vector()
                              unlock_vector()
                                                        move_in_progress == 1

The full fix comes with a later patch.

Reported-and-tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.892412198@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 68d18b338e3a..ed62f9c3f785 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -532,6 +532,8 @@ static void __send_cleanup_vector(struct apic_chip_data *data)
 {
 	cpumask_var_t cleanup_mask;
 
+	raw_spin_lock(&vector_lock);
+	data->move_in_progress = 0;
 	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
 		unsigned int i;
 
@@ -543,7 +545,7 @@ static void __send_cleanup_vector(struct apic_chip_data *data)
 		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
 		free_cpumask_var(cleanup_mask);
 	}
-	data->move_in_progress = 0;
+	raw_spin_unlock(&vector_lock);
 }
 
 void send_cleanup_vector(struct irq_cfg *cfg)
-- 
cgit v1.2.3-59-g8ed1b


From 5da0c1217f05d2ccc9a8ed6e6e5c23a8a1d24dd6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:52 +0000
Subject: x86/irq: Remove the cpumask allocation from send_cleanup_vector()

There is no need to allocate a new cpumask for sending the cleanup vector. The
old_domain mask is now protected by the vector_lock, so we can safely remove
the offline cpus from it and send the IPI with the resulting mask.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160106.967993932@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index ed62f9c3f785..91dc2742cfb1 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -530,21 +530,11 @@ static struct irq_chip lapic_controller = {
 #ifdef CONFIG_SMP
 static void __send_cleanup_vector(struct apic_chip_data *data)
 {
-	cpumask_var_t cleanup_mask;
-
 	raw_spin_lock(&vector_lock);
+	cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
 	data->move_in_progress = 0;
-	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
-		unsigned int i;
-
-		for_each_cpu_and(i, data->old_domain, cpu_online_mask)
-			apic->send_IPI_mask(cpumask_of(i),
-					    IRQ_MOVE_CLEANUP_VECTOR);
-	} else {
-		cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask);
-		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		free_cpumask_var(cleanup_mask);
-	}
+	if (!cpumask_empty(data->old_domain))
+		apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR);
 	raw_spin_unlock(&vector_lock);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 56d7d2f4bbd00fb198b7907cb3ab657d06115a42 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:52 +0000
Subject: x86/irq: Remove outgoing CPU from vector cleanup mask

We want to synchronize new vector assignments with a pending cleanup. Remove a
dying cpu from a pending cleanup mask.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160107.045961667@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 91dc2742cfb1..a7fa11e49582 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -633,9 +633,23 @@ void irq_complete_move(struct irq_cfg *cfg)
 void irq_force_complete_move(int irq)
 {
 	struct irq_cfg *cfg = irq_cfg(irq);
+	struct apic_chip_data *data;
+
+	if (!cfg)
+		return;
 
-	if (cfg)
-		__irq_complete_move(cfg, cfg->vector);
+	__irq_complete_move(cfg, cfg->vector);
+
+	/*
+	 * Remove this cpu from the cleanup mask. The IPI might have been sent
+	 * just before the cpu was removed from the offline mask, but has not
+	 * been processed because the CPU has interrupts disabled and is on
+	 * the way out.
+	 */
+	raw_spin_lock(&vector_lock);
+	data = container_of(cfg, struct apic_chip_data, cfg);
+	cpumask_clear_cpu(smp_processor_id(), data->old_domain);
+	raw_spin_unlock(&vector_lock);
 }
 #endif
 
-- 
cgit v1.2.3-59-g8ed1b


From 90a2282e23f0522e4b3f797ad447c5e91bf7fe32 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:53 +0000
Subject: x86/irq: Call irq_force_move_complete with irq descriptor

First of all there is no point in looking up the irq descriptor again, but we
also need the descriptor for the final cleanup race fix in the next
patch. Make that change seperate. No functional difference.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160107.125211743@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/irq.h    |  5 +++--
 arch/x86/kernel/apic/vector.c | 11 +++++++----
 arch/x86/kernel/irq.c         |  2 +-
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 881b4768644a..e7de5c9a4fbd 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -23,11 +23,13 @@ extern void irq_ctx_init(int cpu);
 
 #define __ARCH_HAS_DO_SOFTIRQ
 
+struct irq_desc;
+
 #ifdef CONFIG_HOTPLUG_CPU
 #include <linux/cpumask.h>
 extern int check_irq_vectors_for_cpu_disable(void);
 extern void fixup_irqs(void);
-extern void irq_force_complete_move(int);
+extern void irq_force_complete_move(struct irq_desc *desc);
 #endif
 
 #ifdef CONFIG_HAVE_KVM
@@ -37,7 +39,6 @@ extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void));
 extern void (*x86_platform_ipi_callback)(void);
 extern void native_init_IRQ(void);
 
-struct irq_desc;
 extern bool handle_irq(struct irq_desc *desc, struct pt_regs *regs);
 
 extern __visible unsigned int do_IRQ(struct pt_regs *regs);
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index a7fa11e49582..5f7883578880 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -630,10 +630,14 @@ void irq_complete_move(struct irq_cfg *cfg)
 	__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
 }
 
-void irq_force_complete_move(int irq)
+/*
+ * Called with @desc->lock held and interrupts disabled.
+ */
+void irq_force_complete_move(struct irq_desc *desc)
 {
-	struct irq_cfg *cfg = irq_cfg(irq);
-	struct apic_chip_data *data;
+	struct irq_data *irqdata = irq_desc_get_irq_data(desc);
+	struct apic_chip_data *data = apic_chip_data(irqdata);
+	struct irq_cfg *cfg = data ? &data->cfg : NULL;
 
 	if (!cfg)
 		return;
@@ -647,7 +651,6 @@ void irq_force_complete_move(int irq)
 	 * the way out.
 	 */
 	raw_spin_lock(&vector_lock);
-	data = container_of(cfg, struct apic_chip_data, cfg);
 	cpumask_clear_cpu(smp_processor_id(), data->old_domain);
 	raw_spin_unlock(&vector_lock);
 }
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index c0b58dd1ca04..61521dc19c10 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -462,7 +462,7 @@ void fixup_irqs(void)
 		 * non intr-remapping case, we can't wait till this interrupt
 		 * arrives at this cpu before completing the irq move.
 		 */
-		irq_force_complete_move(irq);
+		irq_force_complete_move(desc);
 
 		if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
 			break_affinity = 1;
-- 
cgit v1.2.3-59-g8ed1b


From 98229aa36caa9c769b13565523de9b813013c703 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Dec 2015 16:30:54 +0000
Subject: x86/irq: Plug vector cleanup race

We still can end up with a stale vector due to the following:

CPU0                          CPU1                      CPU2
lock_vector()
data->move_in_progress=0
sendIPI()
unlock_vector()
                              set_affinity()
                              assign_irq_vector()
                              lock_vector()             handle_IPI
                              move_in_progress = 1      lock_vector()
                              unlock_vector()
                                                        move_in_progress == 1

So we need to serialize the vector assignment against a pending cleanup. The
solution is rather simple now. We not only check for the move_in_progress flag
in assign_irq_vector(), we also check whether there is still a cleanup pending
in the old_domain cpumask. If so, we return -EBUSY to the caller and let him
deal with it. Though we have to be careful in the cpu unplug case. If the
cleanout has not yet completed then the following setaffinity() call would
return -EBUSY. Add code which prevents this.

Full context is here: http://lkml.kernel.org/r/5653B688.4050809@stratus.com

Reported-and-tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Cc: andy.shevchenko@gmail.com
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org #4.3+
Link: http://lkml.kernel.org/r/20151231160107.207265407@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/vector.c | 63 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 53 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 5f7883578880..3b670df4ba7b 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -120,7 +120,12 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 	static int current_offset = VECTOR_OFFSET_START % 16;
 	int cpu, vector;
 
-	if (d->move_in_progress)
+	/*
+	 * If there is still a move in progress or the previous move has not
+	 * been cleaned up completely, tell the caller to come back later.
+	 */
+	if (d->move_in_progress ||
+	    cpumask_intersects(d->old_domain, cpu_online_mask))
 		return -EBUSY;
 
 	/* Only try and allocate irqs on cpus that are present */
@@ -259,7 +264,12 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
 	data->cfg.vector = 0;
 	cpumask_clear(data->domain);
 
-	if (likely(!data->move_in_progress))
+	/*
+	 * If move is in progress or the old_domain mask is not empty,
+	 * i.e. the cleanup IPI has not been processed yet, we need to remove
+	 * the old references to desc from all cpus vector tables.
+	 */
+	if (!data->move_in_progress && cpumask_empty(data->old_domain))
 		return;
 
 	desc = irq_to_desc(irq);
@@ -579,12 +589,25 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
 			goto unlock;
 
 		/*
-		 * Check if the irq migration is in progress. If so, we
-		 * haven't received the cleanup request yet for this irq.
+		 * Nothing to cleanup if irq migration is in progress
+		 * or this cpu is not set in the cleanup mask.
 		 */
-		if (data->move_in_progress)
+		if (data->move_in_progress ||
+		    !cpumask_test_cpu(me, data->old_domain))
 			goto unlock;
 
+		/*
+		 * We have two cases to handle here:
+		 * 1) vector is unchanged but the target mask got reduced
+		 * 2) vector and the target mask has changed
+		 *
+		 * #1 is obvious, but in #2 we have two vectors with the same
+		 * irq descriptor: the old and the new vector. So we need to
+		 * make sure that we only cleanup the old vector. The new
+		 * vector has the current @vector number in the config and
+		 * this cpu is part of the target mask. We better leave that
+		 * one alone.
+		 */
 		if (vector == data->cfg.vector &&
 		    cpumask_test_cpu(me, data->domain))
 			goto unlock;
@@ -602,6 +625,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
 			goto unlock;
 		}
 		__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
+		cpumask_clear_cpu(me, data->old_domain);
 unlock:
 		raw_spin_unlock(&desc->lock);
 	}
@@ -645,13 +669,32 @@ void irq_force_complete_move(struct irq_desc *desc)
 	__irq_complete_move(cfg, cfg->vector);
 
 	/*
-	 * Remove this cpu from the cleanup mask. The IPI might have been sent
-	 * just before the cpu was removed from the offline mask, but has not
-	 * been processed because the CPU has interrupts disabled and is on
-	 * the way out.
+	 * This is tricky. If the cleanup of @data->old_domain has not been
+	 * done yet, then the following setaffinity call will fail with
+	 * -EBUSY. This can leave the interrupt in a stale state.
+	 *
+	 * The cleanup cannot make progress because we hold @desc->lock. So in
+	 * case @data->old_domain is not yet cleaned up, we need to drop the
+	 * lock and acquire it again. @desc cannot go away, because the
+	 * hotplug code holds the sparse irq lock.
 	 */
 	raw_spin_lock(&vector_lock);
-	cpumask_clear_cpu(smp_processor_id(), data->old_domain);
+	/* Clean out all offline cpus (including ourself) first. */
+	cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
+	while (!cpumask_empty(data->old_domain)) {
+		raw_spin_unlock(&vector_lock);
+		raw_spin_unlock(&desc->lock);
+		cpu_relax();
+		raw_spin_lock(&desc->lock);
+		/*
+		 * Reevaluate apic_chip_data. It might have been cleared after
+		 * we dropped @desc->lock.
+		 */
+		data = apic_chip_data(irqdata);
+		if (!data)
+			return;
+		raw_spin_lock(&vector_lock);
+	}
 	raw_spin_unlock(&vector_lock);
 }
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 3fda5bb420e79b357328b358409e4c547d8f0a18 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 15 Jan 2016 22:11:07 +0200
Subject: x86/platform/intel-mid: Enable 64-bit build

Intel Tangier SoC is known to have 64-bit dual core CPU. Enable
64-bit build for it.

The kernel has been tested on Intel Edison board:

	Linux buildroot 4.4.0-next-20160115+ #25 SMP Fri Jan 15 22:03:19 EET 2016 x86_64 GNU/Linux

	processor       : 0
	vendor_id       : GenuineIntel
	cpu family      : 6
	model           : 74
	model name      : Genuine Intel(R) CPU   4000  @  500MHz
	stepping        : 8

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1452888668-147116-1-git-send-email-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Kconfig         | 3 +--
 arch/x86/kernel/head64.c | 8 ++++++++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 258965d56beb..07459a6b417d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -495,11 +495,10 @@ config X86_INTEL_CE
 
 config X86_INTEL_MID
 	bool "Intel MID platform support"
-	depends on X86_32
 	depends on X86_EXTENDED_PLATFORM
 	depends on X86_PLATFORM_DEVICES
 	depends on PCI
-	depends on PCI_GOANY
+	depends on X86_64 || (PCI_GOANY && X86_32)
 	depends on X86_IO_APIC
 	select SFI
 	select I2C
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index f129a9af6357..2c0f3407bd1f 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -192,5 +192,13 @@ void __init x86_64_start_reservations(char *real_mode_data)
 
 	reserve_ebda_region();
 
+	switch (boot_params.hdr.hardware_subarch) {
+	case X86_SUBARCH_INTEL_MID:
+		x86_intel_mid_early_setup();
+		break;
+	default:
+		break;
+	}
+
 	start_kernel();
 }
-- 
cgit v1.2.3-59-g8ed1b


From b000de5848441bc4e99c662fe1fd1b854151a84e Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 15 Jan 2016 22:11:08 +0200
Subject: x86/platform/intel-mid: Join string and fix SoC name

Join string back to make grepping a bit easier. While here,
lowering case for Penwell SoC name in one case to be aligned
with the rest messages.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1452888668-147116-2-git-send-email-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/intel-mid/intel-mid.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 1bbc21e2e4ae..90bb997ed0a2 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -138,7 +138,7 @@ static void intel_mid_arch_setup(void)
 		intel_mid_ops = get_intel_mid_ops[__intel_mid_cpu_chip]();
 	else {
 		intel_mid_ops = get_intel_mid_ops[INTEL_MID_CPU_CHIP_PENWELL]();
-		pr_info("ARCH: Unknown SoC, assuming PENWELL!\n");
+		pr_info("ARCH: Unknown SoC, assuming Penwell!\n");
 	}
 
 out:
@@ -214,12 +214,10 @@ static inline int __init setup_x86_intel_mid_timer(char *arg)
 	else if (strcmp("lapic_and_apbt", arg) == 0)
 		intel_mid_timer_options = INTEL_MID_TIMER_LAPIC_APBT;
 	else {
-		pr_warn("X86 INTEL_MID timer option %s not recognised"
-			   " use x86_intel_mid_timer=apbt_only or lapic_and_apbt\n",
-			   arg);
+		pr_warn("X86 INTEL_MID timer option %s not recognised use x86_intel_mid_timer=apbt_only or lapic_and_apbt\n",
+			arg);
 		return -EINVAL;
 	}
 	return 0;
 }
 __setup("x86_intel_mid_timer=", setup_x86_intel_mid_timer);
-
-- 
cgit v1.2.3-59-g8ed1b


From d394f2d9d8e1e7b4959819344baf67b5995da9b0 Mon Sep 17 00:00:00 2001
From: Alex Thorlton <athorlton@sgi.com>
Date: Fri, 11 Dec 2015 14:59:45 -0600
Subject: x86/platform/UV: Remove EFI memmap quirk for UV2+

Commit a5d90c923bcf ("x86/efi: Quirk out SGI UV") added a quirk
to efi_apply_memmap_quirks to force SGI UV systems to fall back
to the old EFI memmap mechanism.  We have a BIOS fix for this
issue on all systems except for UV1.  This commit fixes up the
EFI quirk/MMR mapping code so that we only apply the special
case to UV1 hardware.

Signed-off-by: Alex Thorlton <athorlton@sgi.com>
Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Hedi Berriche <hedi@sgi.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Travis <travis@sgi.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1449867585-189233-2-git-send-email-athorlton@sgi.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/x2apic_uv_x.c |  5 ++++-
 arch/x86/platform/efi/quirks.c     | 17 +++++++++++++----
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index d760c6bb37b5..624db00583f4 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -889,7 +889,10 @@ void __init uv_system_init(void)
 		return;
 	}
 	pr_info("UV: Found %s hub\n", hub);
-	map_low_mmrs();
+
+	/* We now only need to map the MMRs on UV1 */
+	if (is_uv1_hub())
+		map_low_mmrs();
 
 	m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
 	m_val = m_n_config.s.m_skt;
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 1c7380da65ff..2d66db8f80f9 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -8,6 +8,7 @@
 #include <linux/memblock.h>
 #include <linux/bootmem.h>
 #include <linux/acpi.h>
+#include <linux/dmi.h>
 #include <asm/efi.h>
 #include <asm/uv/uv.h>
 
@@ -248,6 +249,16 @@ out:
 	return ret;
 }
 
+static const struct dmi_system_id sgi_uv1_dmi[] = {
+	{ NULL, "SGI UV1",
+		{	DMI_MATCH(DMI_PRODUCT_NAME,	"Stoutland Platform"),
+			DMI_MATCH(DMI_PRODUCT_VERSION,	"1.0"),
+			DMI_MATCH(DMI_BIOS_VENDOR,	"SGI.COM"),
+		}
+	},
+	{ } /* NULL entry stops DMI scanning */
+};
+
 void __init efi_apply_memmap_quirks(void)
 {
 	/*
@@ -260,10 +271,8 @@ void __init efi_apply_memmap_quirks(void)
 		efi_unmap_memmap();
 	}
 
-	/*
-	 * UV doesn't support the new EFI pagetable mapping yet.
-	 */
-	if (is_uv_system())
+	/* UV2+ BIOS has a fix for this issue.  UV1 still needs the quirk. */
+	if (dmi_check_system(sgi_uv1_dmi))
 		set_bit(EFI_OLD_MEMMAP, &efi.flags);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 22c43f36b5cf22a7e4506cb3a53f3ad2a43f60f7 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 20 Jan 2016 22:13:41 +0200
Subject: x86/platform/quark: Print boundaries correctly

When we print values, such as @size, we have to understand that
it's derived from [begin .. end] as:

	size = end - begin + 1

On the opposite the @end is derived from the rest as:

	end = begin + size - 1

Correct the IMR code to print values correctly.

Note that @__end_rodata actually points to the next address
after the aligned .rodata section.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Ong, Boon Leong <boon.leong.ong@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1453320821-64328-1-git-send-email-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/intel-quark/imr.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c
index 0ee619f9fcb7..ad5ec6f784e8 100644
--- a/arch/x86/platform/intel-quark/imr.c
+++ b/arch/x86/platform/intel-quark/imr.c
@@ -228,11 +228,12 @@ static int imr_dbgfs_state_show(struct seq_file *s, void *unused)
 		if (imr_is_enabled(&imr)) {
 			base = imr_to_phys(imr.addr_lo);
 			end = imr_to_phys(imr.addr_hi) + IMR_MASK;
+			size = end - base + 1;
 		} else {
 			base = 0;
 			end = 0;
+			size = 0;
 		}
-		size = end - base;
 		seq_printf(s, "imr%02i: base=%pa, end=%pa, size=0x%08zx "
 			   "rmask=0x%08x, wmask=0x%08x, %s, %s\n", i,
 			   &base, &end, size, imr.rmask, imr.wmask,
@@ -587,6 +588,7 @@ static void __init imr_fixup_memmap(struct imr_device *idev)
 {
 	phys_addr_t base = virt_to_phys(&_text);
 	size_t size = virt_to_phys(&__end_rodata) - base;
+	unsigned long start, end;
 	int i;
 	int ret;
 
@@ -594,18 +596,24 @@ static void __init imr_fixup_memmap(struct imr_device *idev)
 	for (i = 0; i < idev->max_imr; i++)
 		imr_clear(i);
 
+	start = (unsigned long)_text;
+	end = (unsigned long)__end_rodata - 1;
+
 	/*
 	 * Setup a locked IMR around the physical extent of the kernel
 	 * from the beginning of the .text secton to the end of the
 	 * .rodata section as one physically contiguous block.
+	 *
+	 * We don't round up @size since it is already PAGE_SIZE aligned.
+	 * See vmlinux.lds.S for details.
 	 */
 	ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, true);
 	if (ret < 0) {
-		pr_err("unable to setup IMR for kernel: (%p - %p)\n",
-			&_text, &__end_rodata);
+		pr_err("unable to setup IMR for kernel: %zu KiB (%lx - %lx)\n",
+			size / 1024, start, end);
 	} else {
-		pr_info("protecting kernel .text - .rodata: %zu KiB (%p - %p)\n",
-			size / 1024, &_text, &__end_rodata);
+		pr_info("protecting kernel .text - .rodata: %zu KiB (%lx - %lx)\n",
+			size / 1024, start, end);
 	}
 
 }
-- 
cgit v1.2.3-59-g8ed1b


From 3625c2c234ef66acf21a72d47a5ffa94f6c5ebf2 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Tue, 26 Jan 2016 04:15:18 -0700
Subject: x86/mm: Fix types used in pgprot cacheability flags translations

For PAE kernels "unsigned long" is not suitable to hold page protection
flags, since _PAGE_NX doesn't fit there. This is the reason for quite a
few W+X pages getting reported as insecure during boot (observed namely
for the entire initrd range).

Fixes: 281d4078be ("x86: Make page cache mode a real type")
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Juergen Gross <JGross@suse.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/56A7635602000078000CAFF1@prv-mh.provo.novell.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/pgtable_types.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index a471cadb9630..79c91853e50e 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -363,20 +363,18 @@ static inline enum page_cache_mode pgprot2cachemode(pgprot_t pgprot)
 }
 static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot)
 {
+	pgprotval_t val = pgprot_val(pgprot);
 	pgprot_t new;
-	unsigned long val;
 
-	val = pgprot_val(pgprot);
 	pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
 		((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
 	return new;
 }
 static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot)
 {
+	pgprotval_t val = pgprot_val(pgprot);
 	pgprot_t new;
-	unsigned long val;
 
-	val = pgprot_val(pgprot);
 	pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) |
 			  ((val & _PAGE_PAT_LARGE) >>
 			   (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT));
-- 
cgit v1.2.3-59-g8ed1b


From 742563777e8da62197d6cb4b99f4027f59454735 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt@codeblueprint.co.uk>
Date: Fri, 29 Jan 2016 11:36:10 +0000
Subject: x86/mm/pat: Avoid truncation when converting cpa->numpages to address
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are a couple of nasty truncation bugs lurking in the pageattr
code that can be triggered when mapping EFI regions, e.g. when we pass
a cpa->pgd pointer. Because cpa->numpages is a 32-bit value, shifting
left by PAGE_SHIFT will truncate the resultant address to 32-bits.

Viorel-Cătălin managed to trigger this bug on his Dell machine that
provides a ~5GB EFI region which requires 1236992 pages to be mapped.
When calling populate_pud() the end of the region gets calculated
incorrectly in the following buggy expression,

  end = start + (cpa->numpages << PAGE_SHIFT);

And only 188416 pages are mapped. Next, populate_pud() gets invoked
for a second time because of the loop in __change_page_attr_set_clr(),
only this time no pages get mapped because shifting the remaining
number of pages (1048576) by PAGE_SHIFT is zero. At which point the
loop in __change_page_attr_set_clr() spins forever because we fail to
map progress.

Hitting this bug depends very much on the virtual address we pick to
map the large region at and how many pages we map on the initial run
through the loop. This explains why this issue was only recently hit
with the introduction of commit

  a5caa209ba9c ("x86/efi: Fix boot crash by mapping EFI memmap
   entries bottom-up at runtime, instead of top-down")

It's interesting to note that safe uses of cpa->numpages do exist in
the pageattr code. If instead of shifting ->numpages we multiply by
PAGE_SIZE, no truncation occurs because PAGE_SIZE is a UL value, and
so the result is unsigned long.

To avoid surprises when users try to convert very large cpa->numpages
values to addresses, change the data type from 'int' to 'unsigned
long', thereby making it suitable for shifting by PAGE_SHIFT without
any type casting.

The alternative would be to make liberal use of casting, but that is
far more likely to cause problems in the future when someone adds more
code and fails to cast properly; this bug was difficult enough to
track down in the first place.

Reported-and-tested-by: Viorel-Cătălin Răpițeanu <rapiteanu.catalin@gmail.com>
Acked-by: Borislav Petkov <bp@alien8.de>
Cc: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=110131
Link: http://lkml.kernel.org/r/1454067370-10374-1-git-send-email-matt@codeblueprint.co.uk
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index fc6a4c8f6e2a..2440814b0069 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -33,7 +33,7 @@ struct cpa_data {
 	pgd_t		*pgd;
 	pgprot_t	mask_set;
 	pgprot_t	mask_clr;
-	int		numpages;
+	unsigned long	numpages;
 	int		flags;
 	unsigned long	pfn;
 	unsigned	force_split : 1;
@@ -1350,7 +1350,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
 		 * CPA operation. Either a large page has been
 		 * preserved or a single page update happened.
 		 */
-		BUG_ON(cpa->numpages > numpages);
+		BUG_ON(cpa->numpages > numpages || !cpa->numpages);
 		numpages -= cpa->numpages;
 		if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY))
 			cpa->curpage++;
-- 
cgit v1.2.3-59-g8ed1b