diff options
Diffstat (limited to 'arch/x86/kernel/apic/vector.c')
| -rw-r--r-- | arch/x86/kernel/apic/vector.c | 221 | 
1 files changed, 145 insertions, 76 deletions
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 908cb37da171..3b670df4ba7b 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -31,7 +31,7 @@ struct apic_chip_data {  struct irq_domain *x86_vector_domain;  EXPORT_SYMBOL_GPL(x86_vector_domain);  static DEFINE_RAW_SPINLOCK(vector_lock); -static cpumask_var_t vector_cpumask; +static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask;  static struct irq_chip lapic_controller;  #ifdef	CONFIG_X86_IO_APIC  static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY]; @@ -118,35 +118,47 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,  	 */  	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;  	static int current_offset = VECTOR_OFFSET_START % 16; -	int cpu, err; +	int cpu, vector; -	if (d->move_in_progress) +	/* +	 * If there is still a move in progress or the previous move has not +	 * been cleaned up completely, tell the caller to come back later. +	 */ +	if (d->move_in_progress || +	    cpumask_intersects(d->old_domain, cpu_online_mask))  		return -EBUSY;  	/* Only try and allocate irqs on cpus that are present */ -	err = -ENOSPC;  	cpumask_clear(d->old_domain); +	cpumask_clear(searched_cpumask);  	cpu = cpumask_first_and(mask, cpu_online_mask);  	while (cpu < nr_cpu_ids) { -		int new_cpu, vector, offset; +		int new_cpu, offset; +		/* Get the possible target cpus for @mask/@cpu from the apic */  		apic->vector_allocation_domain(cpu, vector_cpumask, mask); +		/* +		 * Clear the offline cpus from @vector_cpumask for searching +		 * and verify whether the result overlaps with @mask. If true, +		 * then the call to apic->cpu_mask_to_apicid_and() will +		 * succeed as well. If not, no point in trying to find a +		 * vector in this mask. +		 */ +		cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask); +		if (!cpumask_intersects(vector_searchmask, mask)) +			goto next_cpu; +  		if (cpumask_subset(vector_cpumask, d->domain)) { -			err = 0;  			if (cpumask_equal(vector_cpumask, d->domain)) -				break; +				goto success;  			/* -			 * New cpumask using the vector is a proper subset of -			 * the current in use mask. So cleanup the vector -			 * allocation for the members that are not used anymore. +			 * Mark the cpus which are not longer in the mask for +			 * cleanup.  			 */ -			cpumask_andnot(d->old_domain, d->domain, -				       vector_cpumask); -			d->move_in_progress = -			   cpumask_intersects(d->old_domain, cpu_online_mask); -			cpumask_and(d->domain, d->domain, vector_cpumask); -			break; +			cpumask_andnot(d->old_domain, d->domain, vector_cpumask); +			vector = d->cfg.vector; +			goto update;  		}  		vector = current_vector; @@ -158,45 +170,60 @@ next:  			vector = FIRST_EXTERNAL_VECTOR + offset;  		} -		if (unlikely(current_vector == vector)) { -			cpumask_or(d->old_domain, d->old_domain, -				   vector_cpumask); -			cpumask_andnot(vector_cpumask, mask, d->old_domain); -			cpu = cpumask_first_and(vector_cpumask, -						cpu_online_mask); -			continue; -		} +		/* If the search wrapped around, try the next cpu */ +		if (unlikely(current_vector == vector)) +			goto next_cpu;  		if (test_bit(vector, used_vectors))  			goto next; -		for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) { +		for_each_cpu(new_cpu, vector_searchmask) {  			if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector]))  				goto next;  		}  		/* Found one! */  		current_vector = vector;  		current_offset = offset; -		if (d->cfg.vector) { +		/* Schedule the old vector for cleanup on all cpus */ +		if (d->cfg.vector)  			cpumask_copy(d->old_domain, d->domain); -			d->move_in_progress = -			   cpumask_intersects(d->old_domain, cpu_online_mask); -		} -		for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) +		for_each_cpu(new_cpu, vector_searchmask)  			per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq); -		d->cfg.vector = vector; -		cpumask_copy(d->domain, vector_cpumask); -		err = 0; -		break; -	} +		goto update; -	if (!err) { -		/* cache destination APIC IDs into cfg->dest_apicid */ -		err = apic->cpu_mask_to_apicid_and(mask, d->domain, -						   &d->cfg.dest_apicid); +next_cpu: +		/* +		 * We exclude the current @vector_cpumask from the requested +		 * @mask and try again with the next online cpu in the +		 * result. We cannot modify @mask, so we use @vector_cpumask +		 * as a temporary buffer here as it will be reassigned when +		 * calling apic->vector_allocation_domain() above. +		 */ +		cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask); +		cpumask_andnot(vector_cpumask, mask, searched_cpumask); +		cpu = cpumask_first_and(vector_cpumask, cpu_online_mask); +		continue;  	} +	return -ENOSPC; -	return err; +update: +	/* +	 * Exclude offline cpus from the cleanup mask and set the +	 * move_in_progress flag when the result is not empty. +	 */ +	cpumask_and(d->old_domain, d->old_domain, cpu_online_mask); +	d->move_in_progress = !cpumask_empty(d->old_domain); +	d->cfg.vector = vector; +	cpumask_copy(d->domain, vector_cpumask); +success: +	/* +	 * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail +	 * as we already established, that mask & d->domain & cpu_online_mask +	 * is not empty. +	 */ +	BUG_ON(apic->cpu_mask_to_apicid_and(mask, d->domain, +					    &d->cfg.dest_apicid)); +	return 0;  }  static int assign_irq_vector(int irq, struct apic_chip_data *data, @@ -226,10 +253,8 @@ static int assign_irq_vector_policy(int irq, int node,  static void clear_irq_vector(int irq, struct apic_chip_data *data)  {  	struct irq_desc *desc; -	unsigned long flags;  	int cpu, vector; -	raw_spin_lock_irqsave(&vector_lock, flags);  	BUG_ON(!data->cfg.vector);  	vector = data->cfg.vector; @@ -239,10 +264,13 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)  	data->cfg.vector = 0;  	cpumask_clear(data->domain); -	if (likely(!data->move_in_progress)) { -		raw_spin_unlock_irqrestore(&vector_lock, flags); +	/* +	 * If move is in progress or the old_domain mask is not empty, +	 * i.e. the cleanup IPI has not been processed yet, we need to remove +	 * the old references to desc from all cpus vector tables. +	 */ +	if (!data->move_in_progress && cpumask_empty(data->old_domain))  		return; -	}  	desc = irq_to_desc(irq);  	for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) { @@ -255,7 +283,6 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)  		}  	}  	data->move_in_progress = 0; -	raw_spin_unlock_irqrestore(&vector_lock, flags);  }  void init_irq_alloc_info(struct irq_alloc_info *info, @@ -276,19 +303,24 @@ void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)  static void x86_vector_free_irqs(struct irq_domain *domain,  				 unsigned int virq, unsigned int nr_irqs)  { +	struct apic_chip_data *apic_data;  	struct irq_data *irq_data; +	unsigned long flags;  	int i;  	for (i = 0; i < nr_irqs; i++) {  		irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i);  		if (irq_data && irq_data->chip_data) { +			raw_spin_lock_irqsave(&vector_lock, flags);  			clear_irq_vector(virq + i, irq_data->chip_data); -			free_apic_chip_data(irq_data->chip_data); +			apic_data = irq_data->chip_data; +			irq_domain_reset_irq_data(irq_data); +			raw_spin_unlock_irqrestore(&vector_lock, flags); +			free_apic_chip_data(apic_data);  #ifdef	CONFIG_X86_IO_APIC  			if (virq + i < nr_legacy_irqs())  				legacy_irq_data[virq + i] = NULL;  #endif -			irq_domain_reset_irq_data(irq_data);  		}  	}  } @@ -406,6 +438,8 @@ int __init arch_early_irq_init(void)  	arch_init_htirq_domain(x86_vector_domain);  	BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL)); +	BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL)); +	BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL));  	return arch_early_ioapic_init();  } @@ -494,14 +528,7 @@ static int apic_set_affinity(struct irq_data *irq_data,  		return -EINVAL;  	err = assign_irq_vector(irq, data, dest); -	if (err) { -		if (assign_irq_vector(irq, data, -				      irq_data_get_affinity_mask(irq_data))) -			pr_err("Failed to recover vector for irq %d\n", irq); -		return err; -	} - -	return IRQ_SET_MASK_OK; +	return err ? err : IRQ_SET_MASK_OK;  }  static struct irq_chip lapic_controller = { @@ -513,20 +540,12 @@ static struct irq_chip lapic_controller = {  #ifdef CONFIG_SMP  static void __send_cleanup_vector(struct apic_chip_data *data)  { -	cpumask_var_t cleanup_mask; - -	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { -		unsigned int i; - -		for_each_cpu_and(i, data->old_domain, cpu_online_mask) -			apic->send_IPI_mask(cpumask_of(i), -					    IRQ_MOVE_CLEANUP_VECTOR); -	} else { -		cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask); -		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); -		free_cpumask_var(cleanup_mask); -	} +	raw_spin_lock(&vector_lock); +	cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);  	data->move_in_progress = 0; +	if (!cpumask_empty(data->old_domain)) +		apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR); +	raw_spin_unlock(&vector_lock);  }  void send_cleanup_vector(struct irq_cfg *cfg) @@ -570,12 +589,25 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)  			goto unlock;  		/* -		 * Check if the irq migration is in progress. If so, we -		 * haven't received the cleanup request yet for this irq. +		 * Nothing to cleanup if irq migration is in progress +		 * or this cpu is not set in the cleanup mask.  		 */ -		if (data->move_in_progress) +		if (data->move_in_progress || +		    !cpumask_test_cpu(me, data->old_domain))  			goto unlock; +		/* +		 * We have two cases to handle here: +		 * 1) vector is unchanged but the target mask got reduced +		 * 2) vector and the target mask has changed +		 * +		 * #1 is obvious, but in #2 we have two vectors with the same +		 * irq descriptor: the old and the new vector. So we need to +		 * make sure that we only cleanup the old vector. The new +		 * vector has the current @vector number in the config and +		 * this cpu is part of the target mask. We better leave that +		 * one alone. +		 */  		if (vector == data->cfg.vector &&  		    cpumask_test_cpu(me, data->domain))  			goto unlock; @@ -593,6 +625,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)  			goto unlock;  		}  		__this_cpu_write(vector_irq[vector], VECTOR_UNUSED); +		cpumask_clear_cpu(me, data->old_domain);  unlock:  		raw_spin_unlock(&desc->lock);  	} @@ -621,12 +654,48 @@ void irq_complete_move(struct irq_cfg *cfg)  	__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);  } -void irq_force_complete_move(int irq) +/* + * Called with @desc->lock held and interrupts disabled. + */ +void irq_force_complete_move(struct irq_desc *desc)  { -	struct irq_cfg *cfg = irq_cfg(irq); +	struct irq_data *irqdata = irq_desc_get_irq_data(desc); +	struct apic_chip_data *data = apic_chip_data(irqdata); +	struct irq_cfg *cfg = data ? &data->cfg : NULL; -	if (cfg) -		__irq_complete_move(cfg, cfg->vector); +	if (!cfg) +		return; + +	__irq_complete_move(cfg, cfg->vector); + +	/* +	 * This is tricky. If the cleanup of @data->old_domain has not been +	 * done yet, then the following setaffinity call will fail with +	 * -EBUSY. This can leave the interrupt in a stale state. +	 * +	 * The cleanup cannot make progress because we hold @desc->lock. So in +	 * case @data->old_domain is not yet cleaned up, we need to drop the +	 * lock and acquire it again. @desc cannot go away, because the +	 * hotplug code holds the sparse irq lock. +	 */ +	raw_spin_lock(&vector_lock); +	/* Clean out all offline cpus (including ourself) first. */ +	cpumask_and(data->old_domain, data->old_domain, cpu_online_mask); +	while (!cpumask_empty(data->old_domain)) { +		raw_spin_unlock(&vector_lock); +		raw_spin_unlock(&desc->lock); +		cpu_relax(); +		raw_spin_lock(&desc->lock); +		/* +		 * Reevaluate apic_chip_data. It might have been cleared after +		 * we dropped @desc->lock. +		 */ +		data = apic_chip_data(irqdata); +		if (!data) +			return; +		raw_spin_lock(&vector_lock); +	} +	raw_spin_unlock(&vector_lock);  }  #endif  | 
