aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-08-14 12:19:59 +0200
committerIngo Molnar <mingo@elte.hu>2008-08-14 12:19:59 +0200
commit8d7ccaa545490cdffdfaff0842436a8dd85cf47b (patch)
tree8129b5907161bc6ae26deb3645ce1e280c5e1f51 /arch/x86/kernel
parentX86_SMP: tlb_XX.c declare smp_invalidate_interrupt before they get used (diff)
parentLinux 2.6.27-rc3 (diff)
downloadlinux-dev-8d7ccaa545490cdffdfaff0842436a8dd85cf47b.tar.xz
linux-dev-8d7ccaa545490cdffdfaff0842436a8dd85cf47b.zip
Merge commit 'v2.6.27-rc3' into x86/prototypes
Conflicts: include/asm-x86/dma-mapping.h Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/acpi/sleep.c4
-rw-r--r--arch/x86/kernel/amd_iommu.c17
-rw-r--r--arch/x86/kernel/amd_iommu_init.c4
-rw-r--r--arch/x86/kernel/apic_32.c14
-rw-r--r--arch/x86/kernel/apm_32.c1
-rw-r--r--arch/x86/kernel/cpu/bugs.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/elanfreq.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c120
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h3
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c149
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c4
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c20
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c1
-rw-r--r--arch/x86/kernel/cpu/proc.c2
-rw-r--r--arch/x86/kernel/cpuid.c4
-rw-r--r--arch/x86/kernel/entry_32.S55
-rw-r--r--arch/x86/kernel/entry_64.S55
-rw-r--r--arch/x86/kernel/genapic_64.c1
-rw-r--r--arch/x86/kernel/genapic_flat_64.c2
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c4
-rw-r--r--arch/x86/kernel/head_32.S8
-rw-r--r--arch/x86/kernel/hpet.c10
-rw-r--r--arch/x86/kernel/io_apic_32.c6
-rw-r--r--arch/x86/kernel/io_apic_64.c37
-rw-r--r--arch/x86/kernel/irqinit_64.c5
-rw-r--r--arch/x86/kernel/kprobes.c6
-rw-r--r--arch/x86/kernel/ldt.c6
-rw-r--r--arch/x86/kernel/machine_kexec_32.c39
-rw-r--r--arch/x86/kernel/machine_kexec_64.c2
-rw-r--r--arch/x86/kernel/microcode.c14
-rw-r--r--arch/x86/kernel/module_64.c1
-rw-r--r--arch/x86/kernel/mpparse.c11
-rw-r--r--arch/x86/kernel/msr.c4
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/pci-calgary_64.c158
-rw-r--r--arch/x86/kernel/pci-dma.c163
-rw-r--r--arch/x86/kernel/pci-gart_64.c18
-rw-r--r--arch/x86/kernel/pci-nommu.c14
-rw-r--r--arch/x86/kernel/pci-swiotlb_64.c2
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/reboot.c3
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S174
-rw-r--r--arch/x86/kernel/setup.c42
-rw-r--r--arch/x86/kernel/setup_percpu.c21
-rw-r--r--arch/x86/kernel/signal_32.c3
-rw-r--r--arch/x86/kernel/signal_64.c56
-rw-r--r--arch/x86/kernel/smpboot.c30
-rw-r--r--arch/x86/kernel/syscall_table_32.S6
-rw-r--r--arch/x86/kernel/vmi_32.c3
55 files changed, 825 insertions, 513 deletions
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index a3ddad18aaa3..fa2161d5003b 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -150,6 +150,10 @@ static int __init acpi_sleep_setup(char *str)
acpi_realmode_flags |= 2;
if (strncmp(str, "s3_beep", 7) == 0)
acpi_realmode_flags |= 4;
+#ifdef CONFIG_HIBERNATION
+ if (strncmp(str, "s4_nohwsig", 10) == 0)
+ acpi_no_s4_hw_signature();
+#endif
if (strncmp(str, "old_ordering", 12) == 0)
acpi_old_suspend_ordering();
str = strchr(str, ',');
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index c25210e6ac88..22d7d050905d 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -29,9 +29,6 @@
#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
-#define to_pages(addr, size) \
- (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
-
#define EXIT_LOOP_COUNT 10000000
static DEFINE_RWLOCK(amd_iommu_devtable_lock);
@@ -185,7 +182,7 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
u64 address, size_t size)
{
int s = 0;
- unsigned pages = to_pages(address, size);
+ unsigned pages = iommu_num_pages(address, size);
address &= PAGE_MASK;
@@ -557,8 +554,8 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
if (iommu->exclusion_start &&
iommu->exclusion_start < dma_dom->aperture_size) {
unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
- int pages = to_pages(iommu->exclusion_start,
- iommu->exclusion_length);
+ int pages = iommu_num_pages(iommu->exclusion_start,
+ iommu->exclusion_length);
dma_ops_reserve_addresses(dma_dom, startpage, pages);
}
@@ -667,7 +664,7 @@ static int get_device_resources(struct device *dev,
_bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
/* device not translated by any IOMMU in the system? */
- if (_bdf >= amd_iommu_last_bdf) {
+ if (_bdf > amd_iommu_last_bdf) {
*iommu = NULL;
*domain = NULL;
*bdf = 0xffff;
@@ -767,7 +764,7 @@ static dma_addr_t __map_single(struct device *dev,
unsigned int pages;
int i;
- pages = to_pages(paddr, size);
+ pages = iommu_num_pages(paddr, size);
paddr &= PAGE_MASK;
address = dma_ops_alloc_addresses(dev, dma_dom, pages);
@@ -802,7 +799,7 @@ static void __unmap_single(struct amd_iommu *iommu,
if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size))
return;
- pages = to_pages(dma_addr, size);
+ pages = iommu_num_pages(dma_addr, size);
dma_addr &= PAGE_MASK;
start = dma_addr;
@@ -1085,7 +1082,7 @@ void prealloc_protection_domains(void)
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
devid = (dev->bus->number << 8) | dev->devfn;
- if (devid >= amd_iommu_last_bdf)
+ if (devid > amd_iommu_last_bdf)
continue;
devid = amd_iommu_alias_table[devid];
if (domain_for_device(devid))
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c9d8ff2eb130..d9a9da597e79 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -732,7 +732,7 @@ static int __init init_exclusion_range(struct ivmd_header *m)
set_device_exclusion_range(m->devid, m);
break;
case ACPI_IVMD_TYPE_ALL:
- for (i = 0; i < amd_iommu_last_bdf; ++i)
+ for (i = 0; i <= amd_iommu_last_bdf; ++i)
set_device_exclusion_range(i, m);
break;
case ACPI_IVMD_TYPE_RANGE:
@@ -934,7 +934,7 @@ int __init amd_iommu_init(void)
/*
* let all alias entries point to itself
*/
- for (i = 0; i < amd_iommu_last_bdf; ++i)
+ for (i = 0; i <= amd_iommu_last_bdf; ++i)
amd_iommu_alias_table[i] = i;
/*
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index d6c898358371..039a8d4aaf62 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1720,15 +1720,19 @@ static int __init parse_lapic_timer_c2_ok(char *arg)
}
early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
-static int __init apic_set_verbosity(char *str)
+static int __init apic_set_verbosity(char *arg)
{
- if (strcmp("debug", str) == 0)
+ if (!arg)
+ return -EINVAL;
+
+ if (strcmp(arg, "debug") == 0)
apic_verbosity = APIC_DEBUG;
- else if (strcmp("verbose", str) == 0)
+ else if (strcmp(arg, "verbose") == 0)
apic_verbosity = APIC_VERBOSE;
- return 1;
+
+ return 0;
}
-__setup("apic=", apic_set_verbosity);
+early_param("apic", apic_set_verbosity);
static int __init lapic_insert_resource(void)
{
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index bf9b441331e9..9ee24e6bc4b0 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -219,7 +219,6 @@
#include <linux/time.h>
#include <linux/sched.h>
#include <linux/pm.h>
-#include <linux/pm_legacy.h>
#include <linux/capability.h>
#include <linux/device.h>
#include <linux/kernel.h>
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c9b58a806e85..c8e315f1aa83 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -50,6 +50,8 @@ static double __initdata y = 3145727.0;
*/
static void __init check_fpu(void)
{
+ s32 fdiv_bug;
+
if (!boot_cpu_data.hard_math) {
#ifndef CONFIG_MATH_EMULATION
printk(KERN_EMERG "No coprocessor found and no math emulation present.\n");
@@ -74,8 +76,10 @@ static void __init check_fpu(void)
"fistpl %0\n\t"
"fwait\n\t"
"fninit"
- : "=m" (*&boot_cpu_data.fdiv_bug)
+ : "=m" (*&fdiv_bug)
: "m" (*&x), "m" (*&y));
+
+ boot_cpu_data.fdiv_bug = fdiv_bug;
if (boot_cpu_data.fdiv_bug)
printk("Hmm, FPU with FDIV bug.\n");
}
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index cb7a5715596d..efae3b22a0ff 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -235,9 +235,9 @@ config X86_LONGHAUL
If in doubt, say N.
config X86_E_POWERSAVER
- tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)"
+ tristate "VIA C7 Enhanced PowerSaver"
select CPU_FREQ_TABLE
- depends on X86_32 && EXPERIMENTAL
+ depends on X86_32
help
This adds the CPUFreq driver for VIA C7 processors.
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index b0c8208df9fa..dd097b835839 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -202,7 +202,7 @@ static void drv_write(struct drv_cmd *cmd)
cpumask_t saved_mask = current->cpus_allowed;
unsigned int i;
- for_each_cpu_mask(i, cmd->mask) {
+ for_each_cpu_mask_nr(i, cmd->mask) {
set_cpus_allowed_ptr(current, &cpumask_of_cpu(i));
do_drv_write(cmd);
}
@@ -451,7 +451,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
freqs.old = perf->states[perf->state].core_frequency * 1000;
freqs.new = data->freq_table[next_state].frequency;
- for_each_cpu_mask(i, cmd.mask) {
+ for_each_cpu_mask_nr(i, cmd.mask) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
}
@@ -466,7 +466,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
}
}
- for_each_cpu_mask(i, cmd.mask) {
+ for_each_cpu_mask_nr(i, cmd.mask) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
}
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
index 94619c22f563..e4a4bf870e94 100644
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
@@ -44,7 +44,7 @@ struct s_elan_multiplier {
* It is important that the frequencies
* are listed in ascending order here!
*/
-struct s_elan_multiplier elan_multiplier[] = {
+static struct s_elan_multiplier elan_multiplier[] = {
{1000, 0x02, 0x18},
{2000, 0x02, 0x10},
{4000, 0x02, 0x08},
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 199e4e05e5dc..f1685fb91fbd 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -122,7 +122,7 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy,
return 0;
/* notifiers */
- for_each_cpu_mask(i, policy->cpus) {
+ for_each_cpu_mask_nr(i, policy->cpus) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
}
@@ -130,11 +130,11 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy,
/* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software
* Developer's Manual, Volume 3
*/
- for_each_cpu_mask(i, policy->cpus)
+ for_each_cpu_mask_nr(i, policy->cpus)
cpufreq_p4_setdc(i, p4clockmod_table[newstate].index);
/* notifiers */
- for_each_cpu_mask(i, policy->cpus) {
+ for_each_cpu_mask_nr(i, policy->cpus) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
}
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 206791eb46e3..4e7271999a74 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -66,7 +66,6 @@ static u32 find_freq_from_fid(u32 fid)
return 800 + (fid * 100);
}
-
/* Return a frequency in KHz, given an input fid */
static u32 find_khz_freq_from_fid(u32 fid)
{
@@ -78,7 +77,6 @@ static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 p
return data[pstate].frequency;
}
-
/* Return the vco fid for an input fid
*
* Each "low" fid has corresponding "high" fid, and you can get to "low" fids
@@ -166,7 +164,6 @@ static void fidvid_msr_init(void)
wrmsr(MSR_FIDVID_CTL, lo, hi);
}
-
/* write the new fid value along with the other control fields to the msr */
static int write_new_fid(struct powernow_k8_data *data, u32 fid)
{
@@ -740,44 +737,63 @@ static int find_psb_table(struct powernow_k8_data *data)
#ifdef CONFIG_X86_POWERNOW_K8_ACPI
static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index)
{
- if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
+ if (!data->acpi_data->state_count || (cpu_family == CPU_HW_PSTATE))
return;
- data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK;
- data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK;
- data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
- data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK;
- data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK);
- data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK;
+ data->irt = (data->acpi_data->states[index].control >> IRT_SHIFT) & IRT_MASK;
+ data->rvo = (data->acpi_data->states[index].control >> RVO_SHIFT) & RVO_MASK;
+ data->exttype = (data->acpi_data->states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
+ data->plllock = (data->acpi_data->states[index].control >> PLL_L_SHIFT) & PLL_L_MASK;
+ data->vidmvs = 1 << ((data->acpi_data->states[index].control >> MVS_SHIFT) & MVS_MASK);
+ data->vstable = (data->acpi_data->states[index].control >> VST_SHIFT) & VST_MASK;
+}
+
+
+static struct acpi_processor_performance *acpi_perf_data;
+static int preregister_valid;
+
+static int powernow_k8_cpu_preinit_acpi(void)
+{
+ acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
+ if (!acpi_perf_data)
+ return -ENODEV;
+
+ if (acpi_processor_preregister_performance(acpi_perf_data))
+ return -ENODEV;
+ else
+ preregister_valid = 1;
+ return 0;
}
static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
{
struct cpufreq_frequency_table *powernow_table;
int ret_val;
+ int cpu = 0;
- if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
+ data->acpi_data = percpu_ptr(acpi_perf_data, cpu);
+ if (acpi_processor_register_performance(data->acpi_data, data->cpu)) {
dprintk("register performance failed: bad ACPI data\n");
return -EIO;
}
/* verify the data contained in the ACPI structures */
- if (data->acpi_data.state_count <= 1) {
+ if (data->acpi_data->state_count <= 1) {
dprintk("No ACPI P-States\n");
goto err_out;
}
- if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
- (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
+ if ((data->acpi_data->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
+ (data->acpi_data->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
dprintk("Invalid control/status registers (%x - %x)\n",
- data->acpi_data.control_register.space_id,
- data->acpi_data.status_register.space_id);
+ data->acpi_data->control_register.space_id,
+ data->acpi_data->status_register.space_id);
goto err_out;
}
/* fill in data->powernow_table */
powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
- * (data->acpi_data.state_count + 1)), GFP_KERNEL);
+ * (data->acpi_data->state_count + 1)), GFP_KERNEL);
if (!powernow_table) {
dprintk("powernow_table memory alloc failure\n");
goto err_out;
@@ -790,12 +806,12 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
if (ret_val)
goto err_out_mem;
- powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END;
- powernow_table[data->acpi_data.state_count].index = 0;
+ powernow_table[data->acpi_data->state_count].frequency = CPUFREQ_TABLE_END;
+ powernow_table[data->acpi_data->state_count].index = 0;
data->powernow_table = powernow_table;
/* fill in data */
- data->numps = data->acpi_data.state_count;
+ data->numps = data->acpi_data->state_count;
if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu)
print_basics(data);
powernow_k8_acpi_pst_values(data, 0);
@@ -803,16 +819,31 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
/* notify BIOS that we exist */
acpi_processor_notify_smm(THIS_MODULE);
+ /* determine affinity, from ACPI if available */
+ if (preregister_valid) {
+ if ((data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ALL) ||
+ (data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ANY))
+ data->starting_core_affinity = data->acpi_data->shared_cpu_map;
+ else
+ data->starting_core_affinity = cpumask_of_cpu(data->cpu);
+ } else {
+ /* best guess from family if not */
+ if (cpu_family == CPU_HW_PSTATE)
+ data->starting_core_affinity = cpumask_of_cpu(data->cpu);
+ else
+ data->starting_core_affinity = per_cpu(cpu_core_map, data->cpu);
+ }
+
return 0;
err_out_mem:
kfree(powernow_table);
err_out:
- acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
+ acpi_processor_unregister_performance(data->acpi_data, data->cpu);
/* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
- data->acpi_data.state_count = 0;
+ data->acpi_data->state_count = 0;
return -ENODEV;
}
@@ -824,10 +855,10 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf
rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo);
data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;
- for (i = 0; i < data->acpi_data.state_count; i++) {
+ for (i = 0; i < data->acpi_data->state_count; i++) {
u32 index;
- index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
+ index = data->acpi_data->states[i].control & HW_PSTATE_MASK;
if (index > data->max_hw_pstate) {
printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index);
printk(KERN_ERR PFX "Please report to BIOS manufacturer\n");
@@ -843,7 +874,7 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf
powernow_table[i].index = index;
- powernow_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000;
+ powernow_table[i].frequency = data->acpi_data->states[i].core_frequency * 1000;
}
return 0;
}
@@ -852,16 +883,16 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
{
int i;
int cntlofreq = 0;
- for (i = 0; i < data->acpi_data.state_count; i++) {
+ for (i = 0; i < data->acpi_data->state_count; i++) {
u32 fid;
u32 vid;
if (data->exttype) {
- fid = data->acpi_data.states[i].status & EXT_FID_MASK;
- vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK;
+ fid = data->acpi_data->states[i].status & EXT_FID_MASK;
+ vid = (data->acpi_data->states[i].status >> VID_SHIFT) & EXT_VID_MASK;
} else {
- fid = data->acpi_data.states[i].control & FID_MASK;
- vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK;
+ fid = data->acpi_data->states[i].control & FID_MASK;
+ vid = (data->acpi_data->states[i].control >> VID_SHIFT) & VID_MASK;
}
dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
@@ -902,10 +933,10 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
cntlofreq = i;
}
- if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) {
+ if (powernow_table[i].frequency != (data->acpi_data->states[i].core_frequency * 1000)) {
printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n",
powernow_table[i].frequency,
- (unsigned int) (data->acpi_data.states[i].core_frequency * 1000));
+ (unsigned int) (data->acpi_data->states[i].core_frequency * 1000));
powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
continue;
}
@@ -915,11 +946,12 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
{
- if (data->acpi_data.state_count)
- acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
+ if (data->acpi_data->state_count)
+ acpi_processor_unregister_performance(data->acpi_data, data->cpu);
}
#else
+static int powernow_k8_cpu_preinit_acpi(void) { return -ENODEV; }
static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; }
static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; }
static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; }
@@ -966,7 +998,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i
freqs.old = find_khz_freq_from_fid(data->currfid);
freqs.new = find_khz_freq_from_fid(fid);
- for_each_cpu_mask(i, *(data->available_cores)) {
+ for_each_cpu_mask_nr(i, *(data->available_cores)) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
}
@@ -974,7 +1006,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i
res = transition_fid_vid(data, fid, vid);
freqs.new = find_khz_freq_from_fid(data->currfid);
- for_each_cpu_mask(i, *(data->available_cores)) {
+ for_each_cpu_mask_nr(i, *(data->available_cores)) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
}
@@ -997,7 +1029,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate);
freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
- for_each_cpu_mask(i, *(data->available_cores)) {
+ for_each_cpu_mask_nr(i, *(data->available_cores)) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
}
@@ -1005,7 +1037,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
res = transition_pstate(data, pstate);
freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
- for_each_cpu_mask(i, *(data->available_cores)) {
+ for_each_cpu_mask_nr(i, *(data->available_cores)) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
}
@@ -1104,7 +1136,7 @@ static int powernowk8_verify(struct cpufreq_policy *pol)
static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
{
struct powernow_k8_data *data;
- cpumask_t oldmask;
+ cpumask_t oldmask = CPU_MASK_ALL;
int rc;
if (!cpu_online(pol->cpu))
@@ -1177,10 +1209,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
/* run on any CPU again */
set_cpus_allowed_ptr(current, &oldmask);
- if (cpu_family == CPU_HW_PSTATE)
- pol->cpus = cpumask_of_cpu(pol->cpu);
- else
- pol->cpus = per_cpu(cpu_core_map, pol->cpu);
+ pol->cpus = data->starting_core_affinity;
data->available_cores = &(pol->cpus);
/* Take a crude guess here.
@@ -1303,6 +1332,7 @@ static int __cpuinit powernowk8_init(void)
}
if (supported_cpus == num_online_cpus()) {
+ powernow_k8_cpu_preinit_acpi();
printk(KERN_INFO PFX "Found %d %s "
"processors (%d cpu cores) (" VERSION ")\n",
num_online_nodes(),
@@ -1319,6 +1349,10 @@ static void __exit powernowk8_exit(void)
dprintk("exit\n");
cpufreq_unregister_driver(&cpufreq_amd64_driver);
+
+#ifdef CONFIG_X86_POWERNOW_K8_ACPI
+ free_percpu(acpi_perf_data);
+#endif
}
MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>");
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index ab48cfed4d96..a62612cd4be8 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -33,12 +33,13 @@ struct powernow_k8_data {
#ifdef CONFIG_X86_POWERNOW_K8_ACPI
/* the acpi table needs to be kept. it's only available if ACPI was
* used to determine valid frequency/vid/fid states */
- struct acpi_processor_performance acpi_data;
+ struct acpi_processor_performance *acpi_data;
#endif
/* we need to keep track of associated cores, but let cpufreq
* handle hotplug events - so just point at cpufreq pol->cpus
* structure */
cpumask_t *available_cores;
+ cpumask_t starting_core_affinity;
};
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 908dd347c67e..15e13c01cc36 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -28,7 +28,8 @@
#define PFX "speedstep-centrino: "
#define MAINTAINER "cpufreq@lists.linux.org.uk"
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg)
+#define dprintk(msg...) \
+ cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg)
#define INTEL_MSR_RANGE (0xffff)
@@ -66,11 +67,12 @@ struct cpu_model
struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */
};
-static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x);
+static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c,
+ const struct cpu_id *x);
/* Operating points for current CPU */
-static struct cpu_model *centrino_model[NR_CPUS];
-static const struct cpu_id *centrino_cpu[NR_CPUS];
+static DEFINE_PER_CPU(struct cpu_model *, centrino_model);
+static DEFINE_PER_CPU(const struct cpu_id *, centrino_cpu);
static struct cpufreq_driver centrino_driver;
@@ -255,7 +257,7 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy)
return -ENOENT;
}
- centrino_model[policy->cpu] = model;
+ per_cpu(centrino_model, policy->cpu) = model;
dprintk("found \"%s\": max frequency: %dkHz\n",
model->model_name, model->max_freq);
@@ -264,10 +266,14 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy)
}
#else
-static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) { return -ENODEV; }
+static inline int centrino_cpu_init_table(struct cpufreq_policy *policy)
+{
+ return -ENODEV;
+}
#endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */
-static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x)
+static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c,
+ const struct cpu_id *x)
{
if ((c->x86 == x->x86) &&
(c->x86_model == x->x86_model) &&
@@ -286,23 +292,28 @@ static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe)
* for centrino, as some DSDTs are buggy.
* Ideally, this can be done using the acpi_data structure.
*/
- if ((centrino_cpu[cpu] == &cpu_ids[CPU_BANIAS]) ||
- (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_A1]) ||
- (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_B0])) {
+ if ((per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_BANIAS]) ||
+ (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_A1]) ||
+ (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_B0])) {
msr = (msr >> 8) & 0xff;
return msr * 100000;
}
- if ((!centrino_model[cpu]) || (!centrino_model[cpu]->op_points))
+ if ((!per_cpu(centrino_model, cpu)) ||
+ (!per_cpu(centrino_model, cpu)->op_points))
return 0;
msr &= 0xffff;
- for (i=0;centrino_model[cpu]->op_points[i].frequency != CPUFREQ_TABLE_END; i++) {
- if (msr == centrino_model[cpu]->op_points[i].index)
- return centrino_model[cpu]->op_points[i].frequency;
+ for (i = 0;
+ per_cpu(centrino_model, cpu)->op_points[i].frequency
+ != CPUFREQ_TABLE_END;
+ i++) {
+ if (msr == per_cpu(centrino_model, cpu)->op_points[i].index)
+ return per_cpu(centrino_model, cpu)->
+ op_points[i].frequency;
}
if (failsafe)
- return centrino_model[cpu]->op_points[i-1].frequency;
+ return per_cpu(centrino_model, cpu)->op_points[i-1].frequency;
else
return 0;
}
@@ -347,7 +358,8 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
int i;
/* Only Intel makes Enhanced Speedstep-capable CPUs */
- if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST))
+ if (cpu->x86_vendor != X86_VENDOR_INTEL ||
+ !cpu_has(cpu, X86_FEATURE_EST))
return -ENODEV;
if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC))
@@ -361,9 +373,9 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
break;
if (i != N_IDS)
- centrino_cpu[policy->cpu] = &cpu_ids[i];
+ per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i];
- if (!centrino_cpu[policy->cpu]) {
+ if (!per_cpu(centrino_cpu, policy->cpu)) {
dprintk("found unsupported CPU with "
"Enhanced SpeedStep: send /proc/cpuinfo to "
MAINTAINER "\n");
@@ -386,23 +398,26 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
/* check to see if it stuck */
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
if (!(l & (1<<16))) {
- printk(KERN_INFO PFX "couldn't enable Enhanced SpeedStep\n");
+ printk(KERN_INFO PFX
+ "couldn't enable Enhanced SpeedStep\n");
return -ENODEV;
}
}
freq = get_cur_freq(policy->cpu);
-
- policy->cpuinfo.transition_latency = 10000; /* 10uS transition latency */
+ policy->cpuinfo.transition_latency = 10000;
+ /* 10uS transition latency */
policy->cur = freq;
dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur);
- ret = cpufreq_frequency_table_cpuinfo(policy, centrino_model[policy->cpu]->op_points);
+ ret = cpufreq_frequency_table_cpuinfo(policy,
+ per_cpu(centrino_model, policy->cpu)->op_points);
if (ret)
return (ret);
- cpufreq_frequency_table_get_attr(centrino_model[policy->cpu]->op_points, policy->cpu);
+ cpufreq_frequency_table_get_attr(
+ per_cpu(centrino_model, policy->cpu)->op_points, policy->cpu);
return 0;
}
@@ -411,12 +426,12 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy)
{
unsigned int cpu = policy->cpu;
- if (!centrino_model[cpu])
+ if (!per_cpu(centrino_model, cpu))
return -ENODEV;
cpufreq_frequency_table_put_attr(cpu);
- centrino_model[cpu] = NULL;
+ per_cpu(centrino_model, cpu) = NULL;
return 0;
}
@@ -430,17 +445,26 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy)
*/
static int centrino_verify (struct cpufreq_policy *policy)
{
- return cpufreq_frequency_table_verify(policy, centrino_model[policy->cpu]->op_points);
+ return cpufreq_frequency_table_verify(policy,
+ per_cpu(centrino_model, policy->cpu)->op_points);
}
/**
* centrino_setpolicy - set a new CPUFreq policy
* @policy: new policy
* @target_freq: the target frequency
- * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ * @relation: how that frequency relates to achieved frequency
+ * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
*
* Sets a new CPUFreq policy.
*/
+struct allmasks {
+ cpumask_t online_policy_cpus;
+ cpumask_t saved_mask;
+ cpumask_t set_mask;
+ cpumask_t covered_cpus;
+};
+
static int centrino_target (struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
@@ -448,48 +472,55 @@ static int centrino_target (struct cpufreq_policy *policy,
unsigned int newstate = 0;
unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu;
struct cpufreq_freqs freqs;
- cpumask_t online_policy_cpus;
- cpumask_t saved_mask;
- cpumask_t set_mask;
- cpumask_t covered_cpus;
int retval = 0;
unsigned int j, k, first_cpu, tmp;
-
- if (unlikely(centrino_model[cpu] == NULL))
- return -ENODEV;
+ CPUMASK_ALLOC(allmasks);
+ CPUMASK_PTR(online_policy_cpus, allmasks);
+ CPUMASK_PTR(saved_mask, allmasks);
+ CPUMASK_PTR(set_mask, allmasks);
+ CPUMASK_PTR(covered_cpus, allmasks);
+
+ if (unlikely(allmasks == NULL))
+ return -ENOMEM;
+
+ if (unlikely(per_cpu(centrino_model, cpu) == NULL)) {
+ retval = -ENODEV;
+ goto out;
+ }
if (unlikely(cpufreq_frequency_table_target(policy,
- centrino_model[cpu]->op_points,
+ per_cpu(centrino_model, cpu)->op_points,
target_freq,
relation,
&newstate))) {
- return -EINVAL;
+ retval = -EINVAL;
+ goto out;
}
#ifdef CONFIG_HOTPLUG_CPU
/* cpufreq holds the hotplug lock, so we are safe from here on */
- cpus_and(online_policy_cpus, cpu_online_map, policy->cpus);
+ cpus_and(*online_policy_cpus, cpu_online_map, policy->cpus);
#else
- online_policy_cpus = policy->cpus;
+ *online_policy_cpus = policy->cpus;
#endif
- saved_mask = current->cpus_allowed;
+ *saved_mask = current->cpus_allowed;
first_cpu = 1;
- cpus_clear(covered_cpus);
- for_each_cpu_mask(j, online_policy_cpus) {
+ cpus_clear(*covered_cpus);
+ for_each_cpu_mask_nr(j, *online_policy_cpus) {
/*
* Support for SMP systems.
* Make sure we are running on CPU that wants to change freq
*/
- cpus_clear(set_mask);
+ cpus_clear(*set_mask);
if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
- cpus_or(set_mask, set_mask, online_policy_cpus);
+ cpus_or(*set_mask, *set_mask, *online_policy_cpus);
else
- cpu_set(j, set_mask);
+ cpu_set(j, *set_mask);
- set_cpus_allowed_ptr(current, &set_mask);
+ set_cpus_allowed_ptr(current, set_mask);
preempt_disable();
- if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) {
+ if (unlikely(!cpu_isset(smp_processor_id(), *set_mask))) {
dprintk("couldn't limit to CPUs in this domain\n");
retval = -EAGAIN;
if (first_cpu) {
@@ -500,7 +531,7 @@ static int centrino_target (struct cpufreq_policy *policy,
break;
}
- msr = centrino_model[cpu]->op_points[newstate].index;
+ msr = per_cpu(centrino_model, cpu)->op_points[newstate].index;
if (first_cpu) {
rdmsr(MSR_IA32_PERF_CTL, oldmsr, h);
@@ -517,7 +548,7 @@ static int centrino_target (struct cpufreq_policy *policy,
dprintk("target=%dkHz old=%d new=%d msr=%04x\n",
target_freq, freqs.old, freqs.new, msr);
- for_each_cpu_mask(k, online_policy_cpus) {
+ for_each_cpu_mask_nr(k, *online_policy_cpus) {
freqs.cpu = k;
cpufreq_notify_transition(&freqs,
CPUFREQ_PRECHANGE);
@@ -536,11 +567,11 @@ static int centrino_target (struct cpufreq_policy *policy,
break;
}
- cpu_set(j, covered_cpus);
+ cpu_set(j, *covered_cpus);
preempt_enable();
}
- for_each_cpu_mask(k, online_policy_cpus) {
+ for_each_cpu_mask_nr(k, *online_policy_cpus) {
freqs.cpu = k;
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
}
@@ -553,30 +584,32 @@ static int centrino_target (struct cpufreq_policy *policy,
* Best effort undo..
*/
- if (!cpus_empty(covered_cpus)) {
- for_each_cpu_mask(j, covered_cpus) {
+ if (!cpus_empty(*covered_cpus))
+ for_each_cpu_mask_nr(j, *covered_cpus) {
set_cpus_allowed_ptr(current,
&cpumask_of_cpu(j));
wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
}
- }
tmp = freqs.new;
freqs.new = freqs.old;
freqs.old = tmp;
- for_each_cpu_mask(j, online_policy_cpus) {
+ for_each_cpu_mask_nr(j, *online_policy_cpus) {
freqs.cpu = j;
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
}
}
- set_cpus_allowed_ptr(current, &saved_mask);
- return 0;
+ set_cpus_allowed_ptr(current, saved_mask);
+ retval = 0;
+ goto out;
migrate_end:
preempt_enable();
- set_cpus_allowed_ptr(current, &saved_mask);
- return 0;
+ set_cpus_allowed_ptr(current, saved_mask);
+out:
+ CPUMASK_FREE(allmasks);
+ return retval;
}
static struct freq_attr* centrino_attr[] = {
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 1b50244b1fdf..191f7263c61d 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -279,7 +279,7 @@ static int speedstep_target (struct cpufreq_policy *policy,
cpus_allowed = current->cpus_allowed;
- for_each_cpu_mask(i, policy->cpus) {
+ for_each_cpu_mask_nr(i, policy->cpus) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
}
@@ -292,7 +292,7 @@ static int speedstep_target (struct cpufreq_policy *policy,
/* allow to be run on all CPUs */
set_cpus_allowed_ptr(current, &cpus_allowed);
- for_each_cpu_mask(i, policy->cpus) {
+ for_each_cpu_mask_nr(i, policy->cpus) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
}
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index ff517f0b8cc4..6b0a10b002f1 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -489,7 +489,7 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
int sibling;
this_leaf = CPUID4_INFO_IDX(cpu, index);
- for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) {
+ for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) {
sibling_leaf = CPUID4_INFO_IDX(sibling, index);
cpu_clear(cpu, sibling_leaf->shared_cpu_map);
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index c4a7ec31394c..65a339678ece 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -580,7 +580,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
char __user *buf = ubuf;
int i, err;
- cpu_tsc = kmalloc(NR_CPUS * sizeof(long), GFP_KERNEL);
+ cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
if (!cpu_tsc)
return -ENOMEM;
@@ -762,10 +762,14 @@ DEFINE_PER_CPU(struct sys_device, device_mce);
/* Why are there no generic functions for this? */
#define ACCESSOR(name, var, start) \
- static ssize_t show_ ## name(struct sys_device *s, char *buf) { \
+ static ssize_t show_ ## name(struct sys_device *s, \
+ struct sysdev_attribute *attr, \
+ char *buf) { \
return sprintf(buf, "%lx\n", (unsigned long)var); \
} \
- static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t siz) { \
+ static ssize_t set_ ## name(struct sys_device *s, \
+ struct sysdev_attribute *attr, \
+ const char *buf, size_t siz) { \
char *end; \
unsigned long new = simple_strtoul(buf, &end, 0); \
if (end == buf) return -EINVAL; \
@@ -786,14 +790,16 @@ ACCESSOR(bank3ctl,bank[3],mce_restart())
ACCESSOR(bank4ctl,bank[4],mce_restart())
ACCESSOR(bank5ctl,bank[5],mce_restart())
-static ssize_t show_trigger(struct sys_device *s, char *buf)
+static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr,
+ char *buf)
{
strcpy(buf, trigger);
strcat(buf, "\n");
return strlen(trigger) + 1;
}
-static ssize_t set_trigger(struct sys_device *s,const char *buf,size_t siz)
+static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
+ const char *buf,size_t siz)
{
char *p;
int len;
@@ -806,12 +812,12 @@ static ssize_t set_trigger(struct sys_device *s,const char *buf,size_t siz)
}
static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
-ACCESSOR(tolerant,tolerant,)
+static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
ACCESSOR(check_interval,check_interval,mce_restart())
static struct sysdev_attribute *mce_attributes[] = {
&attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
&attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl,
- &attr_tolerant, &attr_check_interval, &attr_trigger,
+ &attr_tolerant.attr, &attr_check_interval, &attr_trigger,
NULL
};
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 7c9a813e1193..88736cadbaa6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -527,7 +527,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (err)
goto out_free;
- for_each_cpu_mask(i, b->cpus) {
+ for_each_cpu_mask_nr(i, b->cpus) {
if (i == cpu)
continue;
@@ -617,7 +617,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
#endif
/* remove all sibling symlinks before unregistering */
- for_each_cpu_mask(i, b->cpus) {
+ for_each_cpu_mask_nr(i, b->cpus) {
if (i == cpu)
continue;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 1f4cc48c14c6..d5ae2243f0b9 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -35,6 +35,7 @@ atomic_t therm_throt_en = ATOMIC_INIT(0);
#define define_therm_throt_sysdev_show_func(name) \
static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
+ struct sysdev_attribute *attr, \
char *buf) \
{ \
unsigned int cpu = dev->id; \
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 0d0d9057e7c0..a26c480b9491 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -160,7 +160,7 @@ static void *c_start(struct seq_file *m, loff_t *pos)
{
if (*pos == 0) /* just in case, cpu 0 is not the first */
*pos = first_cpu(cpu_online_map);
- if ((*pos) < NR_CPUS && cpu_online(*pos))
+ if ((*pos) < nr_cpu_ids && cpu_online(*pos))
return &cpu_data(*pos);
return NULL;
}
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 2de5fa2bbf77..14b11b3be31c 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -141,8 +141,8 @@ static __cpuinit int cpuid_device_create(int cpu)
{
struct device *dev;
- dev = device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, cpu),
- "cpu%d", cpu);
+ dev = device_create_drvdata(cpuid_class, NULL, MKDEV(CPUID_MAJOR, cpu),
+ NULL, "cpu%d", cpu);
return IS_ERR(dev) ? PTR_ERR(dev) : 0;
}
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index cdfd94cc6b14..109792bc7cfa 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -54,6 +54,16 @@
#include <asm/ftrace.h>
#include <asm/irq_vectors.h>
+/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
+#include <linux/elf-em.h>
+#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
+#define __AUDIT_ARCH_LE 0x40000000
+
+#ifndef CONFIG_AUDITSYSCALL
+#define sysenter_audit syscall_trace_entry
+#define sysexit_audit syscall_exit_work
+#endif
+
/*
* We use macros for low-level operations which need to be overridden
* for paravirtualization. The following will never clobber any registers:
@@ -333,7 +343,8 @@ sysenter_past_esp:
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
- jnz syscall_trace_entry
+ jnz sysenter_audit
+sysenter_do_call:
cmpl $(nr_syscalls), %eax
jae syscall_badsys
call *sys_call_table(,%eax,4)
@@ -343,7 +354,8 @@ sysenter_past_esp:
TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
testw $_TIF_ALLWORK_MASK, %cx
- jne syscall_exit_work
+ jne sysexit_audit
+sysenter_exit:
/* if something modifies registers it must also disable sysexit */
movl PT_EIP(%esp), %edx
movl PT_OLDESP(%esp), %ecx
@@ -351,6 +363,45 @@ sysenter_past_esp:
TRACE_IRQS_ON
1: mov PT_FS(%esp), %fs
ENABLE_INTERRUPTS_SYSEXIT
+
+#ifdef CONFIG_AUDITSYSCALL
+sysenter_audit:
+ testw $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+ jnz syscall_trace_entry
+ addl $4,%esp
+ CFI_ADJUST_CFA_OFFSET -4
+ /* %esi already in 8(%esp) 6th arg: 4th syscall arg */
+ /* %edx already in 4(%esp) 5th arg: 3rd syscall arg */
+ /* %ecx already in 0(%esp) 4th arg: 2nd syscall arg */
+ movl %ebx,%ecx /* 3rd arg: 1st syscall arg */
+ movl %eax,%edx /* 2nd arg: syscall number */
+ movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */
+ call audit_syscall_entry
+ pushl %ebx
+ CFI_ADJUST_CFA_OFFSET 4
+ movl PT_EAX(%esp),%eax /* reload syscall number */
+ jmp sysenter_do_call
+
+sysexit_audit:
+ testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx
+ jne syscall_exit_work
+ TRACE_IRQS_ON
+ ENABLE_INTERRUPTS(CLBR_ANY)
+ movl %eax,%edx /* second arg, syscall return value */
+ cmpl $0,%eax /* is it < 0? */
+ setl %al /* 1 if so, 0 if not */
+ movzbl %al,%eax /* zero-extend that */
+ inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
+ call audit_syscall_exit
+ DISABLE_INTERRUPTS(CLBR_ANY)
+ TRACE_IRQS_OFF
+ movl TI_flags(%ebp), %ecx
+ testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx
+ jne syscall_exit_work
+ movl PT_EAX(%esp),%eax /* reload syscall return value */
+ jmp sysenter_exit
+#endif
+
CFI_ENDPROC
.pushsection .fixup,"ax"
2: movl $0,PT_FS(%esp)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 8410e26f4183..89434d439605 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -53,6 +53,12 @@
#include <asm/paravirt.h>
#include <asm/ftrace.h>
+/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
+#include <linux/elf-em.h>
+#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
+#define __AUDIT_ARCH_64BIT 0x80000000
+#define __AUDIT_ARCH_LE 0x40000000
+
.code64
#ifdef CONFIG_FTRACE
@@ -351,6 +357,7 @@ ENTRY(system_call_after_swapgs)
GET_THREAD_INFO(%rcx)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
jnz tracesys
+system_call_fastpath:
cmpq $__NR_syscall_max,%rax
ja badsys
movq %r10,%rcx
@@ -402,16 +409,16 @@ sysret_careful:
sysret_signal:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
- testl $_TIF_DO_NOTIFY_MASK,%edx
- jz 1f
-
- /* Really a signal */
+#ifdef CONFIG_AUDITSYSCALL
+ bt $TIF_SYSCALL_AUDIT,%edx
+ jc sysret_audit
+#endif
/* edx: work flags (arg3) */
leaq do_notify_resume(%rip),%rax
leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
xorl %esi,%esi # oldset -> arg2
call ptregscall_common
-1: movl $_TIF_WORK_MASK,%edi
+ movl $_TIF_WORK_MASK,%edi
/* Use IRET because user could have changed frame. This
works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
DISABLE_INTERRUPTS(CLBR_NONE)
@@ -422,8 +429,45 @@ badsys:
movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
jmp ret_from_sys_call
+#ifdef CONFIG_AUDITSYSCALL
+ /*
+ * Fast path for syscall audit without full syscall trace.
+ * We just call audit_syscall_entry() directly, and then
+ * jump back to the normal fast path.
+ */
+auditsys:
+ movq %r10,%r9 /* 6th arg: 4th syscall arg */
+ movq %rdx,%r8 /* 5th arg: 3rd syscall arg */
+ movq %rsi,%rcx /* 4th arg: 2nd syscall arg */
+ movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
+ movq %rax,%rsi /* 2nd arg: syscall number */
+ movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
+ call audit_syscall_entry
+ LOAD_ARGS 0 /* reload call-clobbered registers */
+ jmp system_call_fastpath
+
+ /*
+ * Return fast path for syscall audit. Call audit_syscall_exit()
+ * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
+ * masked off.
+ */
+sysret_audit:
+ movq %rax,%rsi /* second arg, syscall return value */
+ cmpq $0,%rax /* is it < 0? */
+ setl %al /* 1 if so, 0 if not */
+ movzbl %al,%edi /* zero-extend that into %edi */
+ inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
+ call audit_syscall_exit
+ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
+ jmp sysret_check
+#endif /* CONFIG_AUDITSYSCALL */
+
/* Do syscall tracing */
tracesys:
+#ifdef CONFIG_AUDITSYSCALL
+ testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
+ jz auditsys
+#endif
SAVE_REST
movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
FIXUP_TOP_OF_STACK %rdi
@@ -448,6 +492,7 @@ tracesys:
* Has correct top of stack, but partial stack frame.
*/
.globl int_ret_from_sys_call
+ .globl int_with_check
int_ret_from_sys_call:
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 1fa8be5bd217..eaff0bbb1444 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -99,3 +99,4 @@ int is_uv_system(void)
{
return uv_system_type != UV_NONE;
}
+EXPORT_SYMBOL_GPL(is_uv_system);
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 1a9c68845ee8..786548a62d38 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -168,7 +168,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
* May as well be the first.
*/
cpu = first_cpu(cpumask);
- if ((unsigned)cpu < NR_CPUS)
+ if ((unsigned)cpu < nr_cpu_ids)
return per_cpu(x86_cpu_to_apicid, cpu);
else
return BAD_APICID;
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 3c3929340692..2cfcbded888a 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -98,7 +98,7 @@ static void uv_send_IPI_mask(cpumask_t mask, int vector)
{
unsigned int cpu;
- for (cpu = 0; cpu < NR_CPUS; ++cpu)
+ for_each_possible_cpu(cpu)
if (cpu_isset(cpu, mask))
uv_send_IPI_one(cpu, vector);
}
@@ -132,7 +132,7 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
* May as well be the first.
*/
cpu = first_cpu(cpumask);
- if ((unsigned)cpu < NR_CPUS)
+ if ((unsigned)cpu < nr_cpu_ids)
return per_cpu(x86_cpu_to_apicid, cpu);
else
return BAD_APICID;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index f67e93441caf..a7010c3a377a 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -456,9 +456,6 @@ is386: movl $2,%ecx # set MP
1:
#endif /* CONFIG_SMP */
jmp *(initial_code)
-.align 4
-ENTRY(initial_code)
- .long i386_start_kernel
/*
* We depend on ET to be correct. This checks for 287/387.
@@ -601,6 +598,11 @@ ignore_int:
#endif
iret
+.section .cpuinit.data,"wa"
+.align 4
+ENTRY(initial_code)
+ .long i386_start_kernel
+
.section .text
/*
* Real beginning of normal "text" segment
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 0ea6a19bfdfe..ad2b15a1334d 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -468,7 +468,7 @@ void hpet_disable(void)
#define RTC_NUM_INTS 1
static unsigned long hpet_rtc_flags;
-static unsigned long hpet_prev_update_sec;
+static int hpet_prev_update_sec;
static struct rtc_time hpet_alarm_time;
static unsigned long hpet_pie_count;
static unsigned long hpet_t1_cmp;
@@ -575,6 +575,9 @@ int hpet_set_rtc_irq_bit(unsigned long bit_mask)
hpet_rtc_flags |= bit_mask;
+ if ((bit_mask & RTC_UIE) && !(oldbits & RTC_UIE))
+ hpet_prev_update_sec = -1;
+
if (!oldbits)
hpet_rtc_timer_init();
@@ -652,7 +655,7 @@ static void hpet_rtc_timer_reinit(void)
if (hpet_rtc_flags & RTC_PIE)
hpet_pie_count += lost_ints;
if (printk_ratelimit())
- printk(KERN_WARNING "rtc: lost %d interrupts\n",
+ printk(KERN_WARNING "hpet1: lost %d rtc interrupts\n",
lost_ints);
}
}
@@ -670,7 +673,8 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
if (hpet_rtc_flags & RTC_UIE &&
curr_time.tm_sec != hpet_prev_update_sec) {
- rtc_int_flag = RTC_UF;
+ if (hpet_prev_update_sec >= 0)
+ rtc_int_flag = RTC_UF;
hpet_prev_update_sec = curr_time.tm_sec;
}
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index de9aa0e3a9c5..09cddb57bec4 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -57,7 +57,7 @@ atomic_t irq_mis_count;
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
+DEFINE_SPINLOCK(vector_lock);
int timer_through_8259 __initdata;
@@ -1209,10 +1209,6 @@ static int assign_irq_vector(int irq)
return vector;
}
-void setup_vector_irq(int cpu)
-{
-}
-
static struct irq_chip ioapic_chip;
#define IOAPIC_AUTO -1
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 64a46affd858..61a83b70c18f 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -101,7 +101,7 @@ int timer_through_8259 __initdata;
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
static DEFINE_SPINLOCK(ioapic_lock);
-DEFINE_SPINLOCK(vector_lock);
+static DEFINE_SPINLOCK(vector_lock);
/*
* # of IRQ routing registers
@@ -697,6 +697,19 @@ static int pin_2_irq(int idx, int apic, int pin)
return irq;
}
+void lock_vector_lock(void)
+{
+ /* Used to the online set of cpus does not change
+ * during assign_irq_vector.
+ */
+ spin_lock(&vector_lock);
+}
+
+void unlock_vector_lock(void)
+{
+ spin_unlock(&vector_lock);
+}
+
static int __assign_irq_vector(int irq, cpumask_t mask)
{
/*
@@ -732,7 +745,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
return 0;
}
- for_each_cpu_mask(cpu, mask) {
+ for_each_cpu_mask_nr(cpu, mask) {
cpumask_t domain, new_mask;
int new_cpu;
int vector, offset;
@@ -753,7 +766,7 @@ next:
continue;
if (vector == IA32_SYSCALL_VECTOR)
goto next;
- for_each_cpu_mask(new_cpu, new_mask)
+ for_each_cpu_mask_nr(new_cpu, new_mask)
if (per_cpu(vector_irq, new_cpu)[vector] != -1)
goto next;
/* Found one! */
@@ -763,7 +776,7 @@ next:
cfg->move_in_progress = 1;
cfg->old_domain = cfg->domain;
}
- for_each_cpu_mask(new_cpu, new_mask)
+ for_each_cpu_mask_nr(new_cpu, new_mask)
per_cpu(vector_irq, new_cpu)[vector] = irq;
cfg->vector = vector;
cfg->domain = domain;
@@ -795,14 +808,14 @@ static void __clear_irq_vector(int irq)
vector = cfg->vector;
cpus_and(mask, cfg->domain, cpu_online_map);
- for_each_cpu_mask(cpu, mask)
+ for_each_cpu_mask_nr(cpu, mask)
per_cpu(vector_irq, cpu)[vector] = -1;
cfg->vector = 0;
cpus_clear(cfg->domain);
}
-static void __setup_vector_irq(int cpu)
+void __setup_vector_irq(int cpu)
{
/* Initialize vector_irq on a new cpu */
/* This function must be called with vector_lock held */
@@ -825,14 +838,6 @@ static void __setup_vector_irq(int cpu)
}
}
-void setup_vector_irq(int cpu)
-{
- spin_lock(&vector_lock);
- __setup_vector_irq(smp_processor_id());
- spin_unlock(&vector_lock);
-}
-
-
static struct irq_chip ioapic_chip;
static void ioapic_register_intr(int irq, unsigned long trigger)
@@ -1373,12 +1378,10 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
static int ioapic_retrigger_irq(unsigned int irq)
{
struct irq_cfg *cfg = &irq_cfg[irq];
- cpumask_t mask;
unsigned long flags;
spin_lock_irqsave(&vector_lock, flags);
- mask = cpumask_of_cpu(first_cpu(cfg->domain));
- send_IPI_mask(mask, cfg->vector);
+ send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
spin_unlock_irqrestore(&vector_lock, flags);
return 1;
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 0373e88de95a..1f26fd9ec4f4 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -43,10 +43,11 @@
#define BUILD_IRQ(nr) \
asmlinkage void IRQ_NAME(nr); \
- asm("\n.p2align\n" \
+ asm("\n.text\n.p2align\n" \
"IRQ" #nr "_interrupt:\n\t" \
"push $~(" #nr ") ; " \
- "jmp common_interrupt");
+ "jmp common_interrupt\n" \
+ ".previous");
#define BI(x,y) \
BUILD_IRQ(x##y)
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 43c019f85f0d..6c27679ec6aa 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -431,7 +431,6 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
regs->ip = (unsigned long)p->ainsn.insn;
}
-/* Called with kretprobe_lock held */
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
@@ -682,8 +681,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
INIT_HLIST_HEAD(&empty_rp);
- spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(current);
+ kretprobe_hash_lock(current, &head, &flags);
/* fixup registers */
#ifdef CONFIG_X86_64
regs->cs = __KERNEL_CS;
@@ -732,7 +730,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
kretprobe_assert(ri, orig_ret_address, trampoline_address);
- spin_unlock_irqrestore(&kretprobe_lock, flags);
+ kretprobe_hash_unlock(current, &flags);
hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
hlist_del(&ri->hlist);
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index c49ff2dc8f83..0ed5f939b905 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -63,12 +63,10 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
if (reload) {
#ifdef CONFIG_SMP
- cpumask_t mask;
-
preempt_disable();
load_LDT(pc);
- mask = cpumask_of_cpu(smp_processor_id());
- if (!cpus_equal(current->mm->cpu_vm_mask, mask))
+ if (!cpus_equal(current->mm->cpu_vm_mask,
+ cpumask_of_cpu(smp_processor_id())))
smp_call_function(flush_ldt, current->mm, 1);
preempt_enable();
#else
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 8864230d55af..9fe478d98406 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -22,6 +22,7 @@
#include <asm/cpufeature.h>
#include <asm/desc.h>
#include <asm/system.h>
+#include <asm/cacheflush.h>
#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
static u32 kexec_pgd[1024] PAGE_ALIGNED;
@@ -85,10 +86,12 @@ static void load_segments(void)
* reboot code buffer to allow us to avoid allocations
* later.
*
- * Currently nothing.
+ * Make control page executable.
*/
int machine_kexec_prepare(struct kimage *image)
{
+ if (nx_enabled)
+ set_pages_x(image->control_code_page, 1);
return 0;
}
@@ -98,27 +101,48 @@ int machine_kexec_prepare(struct kimage *image)
*/
void machine_kexec_cleanup(struct kimage *image)
{
+ if (nx_enabled)
+ set_pages_nx(image->control_code_page, 1);
}
/*
* Do not allocate memory (or fail in any way) in machine_kexec().
* We are past the point of no return, committed to rebooting now.
*/
-NORET_TYPE void machine_kexec(struct kimage *image)
+void machine_kexec(struct kimage *image)
{
unsigned long page_list[PAGES_NR];
void *control_page;
+ asmlinkage unsigned long
+ (*relocate_kernel_ptr)(unsigned long indirection_page,
+ unsigned long control_page,
+ unsigned long start_address,
+ unsigned int has_pae,
+ unsigned int preserve_context);
tracer_disable();
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
+ if (image->preserve_context) {
+#ifdef CONFIG_X86_IO_APIC
+ /* We need to put APICs in legacy mode so that we can
+ * get timer interrupts in second kernel. kexec/kdump
+ * paths already have calls to disable_IO_APIC() in
+ * one form or other. kexec jump path also need
+ * one.
+ */
+ disable_IO_APIC();
+#endif
+ }
+
control_page = page_address(image->control_code_page);
- memcpy(control_page, relocate_kernel, PAGE_SIZE);
+ memcpy(control_page, relocate_kernel, PAGE_SIZE/2);
+ relocate_kernel_ptr = control_page;
page_list[PA_CONTROL_PAGE] = __pa(control_page);
- page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
+ page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
page_list[PA_PGD] = __pa(kexec_pgd);
page_list[VA_PGD] = (unsigned long)kexec_pgd;
#ifdef CONFIG_X86_PAE
@@ -131,6 +155,7 @@ NORET_TYPE void machine_kexec(struct kimage *image)
page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
page_list[PA_PTE_1] = __pa(kexec_pte1);
page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
+ page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT);
/* The segment registers are funny things, they have both a
* visible and an invisible part. Whenever the visible part is
@@ -149,8 +174,10 @@ NORET_TYPE void machine_kexec(struct kimage *image)
set_idt(phys_to_virt(0),0);
/* now call it */
- relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
- image->start, cpu_has_pae);
+ image->start = relocate_kernel_ptr((unsigned long)image->head,
+ (unsigned long)page_list,
+ image->start, cpu_has_pae,
+ image->preserve_context);
}
void arch_crash_save_vmcoreinfo(void)
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 9dd9262693a3..c43caa3a91f3 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -181,7 +181,7 @@ void machine_kexec_cleanup(struct kimage *image)
* Do not allocate memory (or fail in any way) in machine_kexec().
* We are past the point of no return, committed to rebooting now.
*/
-NORET_TYPE void machine_kexec(struct kimage *image)
+void machine_kexec(struct kimage *image)
{
unsigned long page_list[PAGES_NR];
void *control_page;
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 56b933119a04..652fa5c38ebe 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -644,7 +644,9 @@ static void microcode_fini_cpu(int cpu)
mutex_unlock(&microcode_mutex);
}
-static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz)
+static ssize_t reload_store(struct sys_device *dev,
+ struct sysdev_attribute *attr,
+ const char *buf, size_t sz)
{
struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
char *end;
@@ -655,9 +657,7 @@ static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz)
if (end == buf)
return -EINVAL;
if (val == 1) {
- cpumask_t old;
-
- old = current->cpus_allowed;
+ cpumask_t old = current->cpus_allowed;
get_online_cpus();
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
@@ -674,14 +674,16 @@ static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz)
return sz;
}
-static ssize_t version_show(struct sys_device *dev, char *buf)
+static ssize_t version_show(struct sys_device *dev,
+ struct sysdev_attribute *attr, char *buf)
{
struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
return sprintf(buf, "0x%x\n", uci->rev);
}
-static ssize_t pf_show(struct sys_device *dev, char *buf)
+static ssize_t pf_show(struct sys_device *dev,
+ struct sysdev_attribute *attr, char *buf)
{
struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c
index 0e867676b5a5..6ba87830d4b1 100644
--- a/arch/x86/kernel/module_64.c
+++ b/arch/x86/kernel/module_64.c
@@ -22,6 +22,7 @@
#include <linux/fs.h>
#include <linux/string.h>
#include <linux/kernel.h>
+#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/bug.h>
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 6ae005ccaed8..678090508a62 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -83,7 +83,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
if (x86_quirks->mpc_oem_bus_info)
x86_quirks->mpc_oem_bus_info(m, str);
else
- printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str);
+ apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->mpc_busid, str);
#if MAX_MP_BUSSES < 256
if (m->mpc_busid >= MAX_MP_BUSSES) {
@@ -154,7 +154,7 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m)
static void print_MP_intsrc_info(struct mpc_config_intsrc *m)
{
- printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x,"
+ apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
" IRQ %02x, APIC ID %x, APIC INT %02x\n",
m->mpc_irqtype, m->mpc_irqflag & 3,
(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
@@ -163,7 +163,7 @@ static void print_MP_intsrc_info(struct mpc_config_intsrc *m)
static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
{
- printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x,"
+ apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
" IRQ %02x, APIC ID %x, APIC INT %02x\n",
mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3,
(mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus,
@@ -235,7 +235,7 @@ static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
{
- printk(KERN_INFO "Lint: type %d, pol %d, trig %d, bus %02x,"
+ apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x,"
" IRQ %02x, APIC ID %x, APIC LINT %02x\n",
m->mpc_irqtype, m->mpc_irqflag & 3,
(m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid,
@@ -695,7 +695,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
unsigned int *bp = phys_to_virt(base);
struct intel_mp_floating *mpf;
- printk(KERN_DEBUG "Scan SMP from %p for %ld bytes.\n", bp, length);
+ apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
+ bp, length);
BUILD_BUG_ON(sizeof(*mpf) != 16);
while (length > 0) {
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index a153b3905f60..9fd809552447 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -149,8 +149,8 @@ static int __cpuinit msr_device_create(int cpu)
{
struct device *dev;
- dev = device_create(msr_class, NULL, MKDEV(MSR_MAJOR, cpu),
- "msr%d", cpu);
+ dev = device_create_drvdata(msr_class, NULL, MKDEV(MSR_MAJOR, cpu),
+ NULL, "msr%d", cpu);
return IS_ERR(dev) ? PTR_ERR(dev) : 0;
}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 097d8a6797fa..94da4d52d798 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -443,7 +443,7 @@ struct pv_mmu_ops pv_mmu_ops = {
#endif /* PAGETABLE_LEVELS >= 3 */
.pte_val = native_pte_val,
- .pte_flags = native_pte_val,
+ .pte_flags = native_pte_flags,
.pgd_val = native_pgd_val,
.make_pte = native_make_pte,
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 151f2d171f7c..02d19328525d 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -29,6 +29,7 @@
#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/string.h>
+#include <linux/crash_dump.h>
#include <linux/dma-mapping.h>
#include <linux/bitops.h>
#include <linux/pci_ids.h>
@@ -36,6 +37,7 @@
#include <linux/delay.h>
#include <linux/scatterlist.h>
#include <linux/iommu-helper.h>
+
#include <asm/iommu.h>
#include <asm/calgary.h>
#include <asm/tce.h>
@@ -167,6 +169,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl);
static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev);
static void calioc2_tce_cache_blast(struct iommu_table *tbl);
static void calioc2_dump_error_regs(struct iommu_table *tbl);
+static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl);
+static void get_tce_space_from_tar(void);
static struct cal_chipset_ops calgary_chip_ops = {
.handle_quirks = calgary_handle_quirks,
@@ -410,22 +414,6 @@ static void calgary_unmap_sg(struct device *dev,
}
}
-static int calgary_nontranslate_map_sg(struct device* dev,
- struct scatterlist *sg, int nelems, int direction)
-{
- struct scatterlist *s;
- int i;
-
- for_each_sg(sg, s, nelems, i) {
- struct page *p = sg_page(s);
-
- BUG_ON(!p);
- s->dma_address = virt_to_bus(sg_virt(s));
- s->dma_length = s->length;
- }
- return nelems;
-}
-
static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
int nelems, int direction)
{
@@ -436,9 +424,6 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
unsigned long entry;
int i;
- if (!translation_enabled(tbl))
- return calgary_nontranslate_map_sg(dev, sg, nelems, direction);
-
for_each_sg(sg, s, nelems, i) {
BUG_ON(!sg_page(s));
@@ -474,7 +459,6 @@ error:
static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr,
size_t size, int direction)
{
- dma_addr_t dma_handle = bad_dma_address;
void *vaddr = phys_to_virt(paddr);
unsigned long uaddr;
unsigned int npages;
@@ -483,12 +467,7 @@ static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr,
uaddr = (unsigned long)vaddr;
npages = num_dma_pages(uaddr, size);
- if (translation_enabled(tbl))
- dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction);
- else
- dma_handle = virt_to_bus(vaddr);
-
- return dma_handle;
+ return iommu_alloc(dev, tbl, vaddr, npages, direction);
}
static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
@@ -497,9 +476,6 @@ static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
struct iommu_table *tbl = find_iommu_table(dev);
unsigned int npages;
- if (!translation_enabled(tbl))
- return;
-
npages = num_dma_pages(dma_handle, size);
iommu_free(tbl, dma_handle, npages);
}
@@ -522,18 +498,12 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size,
goto error;
memset(ret, 0, size);
- if (translation_enabled(tbl)) {
- /* set up tces to cover the allocated range */
- mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL);
- if (mapping == bad_dma_address)
- goto free;
-
- *dma_handle = mapping;
- } else /* non translated slot */
- *dma_handle = virt_to_bus(ret);
-
+ /* set up tces to cover the allocated range */
+ mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL);
+ if (mapping == bad_dma_address)
+ goto free;
+ *dma_handle = mapping;
return ret;
-
free:
free_pages((unsigned long)ret, get_order(size));
ret = NULL;
@@ -541,7 +511,7 @@ error:
return ret;
}
-static const struct dma_mapping_ops calgary_dma_ops = {
+static struct dma_mapping_ops calgary_dma_ops = {
.alloc_coherent = calgary_alloc_coherent,
.map_single = calgary_map_single,
.unmap_single = calgary_unmap_single,
@@ -830,7 +800,11 @@ static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
tbl = pci_iommu(dev->bus);
tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
- tce_free(tbl, 0, tbl->it_size);
+
+ if (is_kdump_kernel())
+ calgary_init_bitmap_from_tce_table(tbl);
+ else
+ tce_free(tbl, 0, tbl->it_size);
if (is_calgary(dev->device))
tbl->chip_ops = &calgary_chip_ops;
@@ -1209,6 +1183,10 @@ static int __init calgary_init(void)
if (ret)
return ret;
+ /* Purely for kdump kernel case */
+ if (is_kdump_kernel())
+ get_tce_space_from_tar();
+
do {
dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev);
if (!dev)
@@ -1230,6 +1208,16 @@ static int __init calgary_init(void)
goto error;
} while (1);
+ dev = NULL;
+ for_each_pci_dev(dev) {
+ struct iommu_table *tbl;
+
+ tbl = find_iommu_table(&dev->dev);
+
+ if (translation_enabled(tbl))
+ dev->dev.archdata.dma_ops = &calgary_dma_ops;
+ }
+
return ret;
error:
@@ -1251,6 +1239,7 @@ error:
calgary_disable_translation(dev);
calgary_free_bus(dev);
pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */
+ dev->dev.archdata.dma_ops = NULL;
} while (1);
return ret;
@@ -1339,6 +1328,61 @@ static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev)
return (val != 0xffffffff);
}
+/*
+ * calgary_init_bitmap_from_tce_table():
+ * Funtion for kdump case. In the second/kdump kernel initialize
+ * the bitmap based on the tce table entries obtained from first kernel
+ */
+static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
+{
+ u64 *tp;
+ unsigned int index;
+ tp = ((u64 *)tbl->it_base);
+ for (index = 0 ; index < tbl->it_size; index++) {
+ if (*tp != 0x0)
+ set_bit(index, tbl->it_map);
+ tp++;
+ }
+}
+
+/*
+ * get_tce_space_from_tar():
+ * Function for kdump case. Get the tce tables from first kernel
+ * by reading the contents of the base adress register of calgary iommu
+ */
+static void get_tce_space_from_tar(void)
+{
+ int bus;
+ void __iomem *target;
+ unsigned long tce_space;
+
+ for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
+ struct calgary_bus_info *info = &bus_info[bus];
+ unsigned short pci_device;
+ u32 val;
+
+ val = read_pci_config(bus, 0, 0, 0);
+ pci_device = (val & 0xFFFF0000) >> 16;
+
+ if (!is_cal_pci_dev(pci_device))
+ continue;
+ if (info->translation_disabled)
+ continue;
+
+ if (calgary_bus_has_devices(bus, pci_device) ||
+ translate_empty_slots) {
+ target = calgary_reg(bus_info[bus].bbar,
+ tar_offset(bus));
+ tce_space = be64_to_cpu(readq(target));
+ tce_space = tce_space & TAR_SW_BITS;
+
+ tce_space = tce_space & (~specified_table_size);
+ info->tce_space = (u64 *)__va(tce_space);
+ }
+ }
+ return;
+}
+
void __init detect_calgary(void)
{
int bus;
@@ -1394,7 +1438,8 @@ void __init detect_calgary(void)
return;
}
- specified_table_size = determine_tce_table_size(max_pfn * PAGE_SIZE);
+ specified_table_size = determine_tce_table_size((is_kdump_kernel() ?
+ saved_max_pfn : max_pfn) * PAGE_SIZE);
for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
struct calgary_bus_info *info = &bus_info[bus];
@@ -1412,10 +1457,16 @@ void __init detect_calgary(void)
if (calgary_bus_has_devices(bus, pci_device) ||
translate_empty_slots) {
- tbl = alloc_tce_table();
- if (!tbl)
- goto cleanup;
- info->tce_space = tbl;
+ /*
+ * If it is kdump kernel, find and use tce tables
+ * from first kernel, else allocate tce tables here
+ */
+ if (!is_kdump_kernel()) {
+ tbl = alloc_tce_table();
+ if (!tbl)
+ goto cleanup;
+ info->tce_space = tbl;
+ }
calgary_found = 1;
}
}
@@ -1430,6 +1481,10 @@ void __init detect_calgary(void)
printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, "
"CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size,
debugging ? "enabled" : "disabled");
+
+ /* swiotlb for devices that aren't behind the Calgary. */
+ if (max_pfn > MAX_DMA32_PFN)
+ swiotlb = 1;
}
return;
@@ -1446,7 +1501,7 @@ int __init calgary_iommu_init(void)
{
int ret;
- if (no_iommu || swiotlb)
+ if (no_iommu || (swiotlb && !calgary_detected))
return -ENODEV;
if (!calgary_detected)
@@ -1459,15 +1514,14 @@ int __init calgary_iommu_init(void)
if (ret) {
printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
"falling back to no_iommu\n", ret);
- if (max_pfn > MAX_DMA32_PFN)
- printk(KERN_ERR "WARNING more than 4GB of memory, "
- "32bit PCI may malfunction.\n");
return ret;
}
force_iommu = 1;
bad_dma_address = 0x0;
- dma_ops = &calgary_dma_ops;
+ /* dma_ops is set to swiotlb or nommu */
+ if (!dma_ops)
+ dma_ops = &nommu_dma_ops;
return 0;
}
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index a4213c00dffc..87d4d6964ec2 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -11,7 +11,7 @@
static int forbid_dac __read_mostly;
-const struct dma_mapping_ops *dma_ops;
+struct dma_mapping_ops *dma_ops;
EXPORT_SYMBOL(dma_ops);
static int iommu_sac_force __read_mostly;
@@ -123,6 +123,14 @@ void __init pci_iommu_alloc(void)
pci_swiotlb_init();
}
+
+unsigned long iommu_num_pages(unsigned long addr, unsigned long len)
+{
+ unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
+
+ return size >> PAGE_SHIFT;
+}
+EXPORT_SYMBOL(iommu_num_pages);
#endif
/*
@@ -192,136 +200,19 @@ static __init int iommu_setup(char *p)
}
early_param("iommu", iommu_setup);
-#ifdef CONFIG_X86_32
-int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
- dma_addr_t device_addr, size_t size, int flags)
-{
- void __iomem *mem_base = NULL;
- int pages = size >> PAGE_SHIFT;
- int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
-
- if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
- goto out;
- if (!size)
- goto out;
- if (dev->dma_mem)
- goto out;
-
- /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
-
- mem_base = ioremap(bus_addr, size);
- if (!mem_base)
- goto out;
-
- dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
- if (!dev->dma_mem)
- goto out;
- dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
- if (!dev->dma_mem->bitmap)
- goto free1_out;
-
- dev->dma_mem->virt_base = mem_base;
- dev->dma_mem->device_base = device_addr;
- dev->dma_mem->size = pages;
- dev->dma_mem->flags = flags;
-
- if (flags & DMA_MEMORY_MAP)
- return DMA_MEMORY_MAP;
-
- return DMA_MEMORY_IO;
-
- free1_out:
- kfree(dev->dma_mem);
- out:
- if (mem_base)
- iounmap(mem_base);
- return 0;
-}
-EXPORT_SYMBOL(dma_declare_coherent_memory);
-
-void dma_release_declared_memory(struct device *dev)
-{
- struct dma_coherent_mem *mem = dev->dma_mem;
-
- if (!mem)
- return;
- dev->dma_mem = NULL;
- iounmap(mem->virt_base);
- kfree(mem->bitmap);
- kfree(mem);
-}
-EXPORT_SYMBOL(dma_release_declared_memory);
-
-void *dma_mark_declared_memory_occupied(struct device *dev,
- dma_addr_t device_addr, size_t size)
-{
- struct dma_coherent_mem *mem = dev->dma_mem;
- int pos, err;
- int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1);
-
- pages >>= PAGE_SHIFT;
-
- if (!mem)
- return ERR_PTR(-EINVAL);
-
- pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
- err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
- if (err != 0)
- return ERR_PTR(err);
- return mem->virt_base + (pos << PAGE_SHIFT);
-}
-EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
-
-static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size,
- dma_addr_t *dma_handle, void **ret)
-{
- struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
- int order = get_order(size);
-
- if (mem) {
- int page = bitmap_find_free_region(mem->bitmap, mem->size,
- order);
- if (page >= 0) {
- *dma_handle = mem->device_base + (page << PAGE_SHIFT);
- *ret = mem->virt_base + (page << PAGE_SHIFT);
- memset(*ret, 0, size);
- }
- if (mem->flags & DMA_MEMORY_EXCLUSIVE)
- *ret = NULL;
- }
- return (mem != NULL);
-}
-
-static int dma_release_coherent(struct device *dev, int order, void *vaddr)
-{
- struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
-
- if (mem && vaddr >= mem->virt_base && vaddr <
- (mem->virt_base + (mem->size << PAGE_SHIFT))) {
- int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
-
- bitmap_release_region(mem->bitmap, page, order);
- return 1;
- }
- return 0;
-}
-#else
-#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0)
-#define dma_release_coherent(dev, order, vaddr) (0)
-#endif /* CONFIG_X86_32 */
-
int dma_supported(struct device *dev, u64 mask)
{
+ struct dma_mapping_ops *ops = get_dma_ops(dev);
+
#ifdef CONFIG_PCI
if (mask > 0xffffffff && forbid_dac > 0) {
- printk(KERN_INFO "PCI: Disallowing DAC for device %s\n",
- dev->bus_id);
+ dev_info(dev, "PCI: Disallowing DAC for device\n");
return 0;
}
#endif
- if (dma_ops->dma_supported)
- return dma_ops->dma_supported(dev, mask);
+ if (ops->dma_supported)
+ return ops->dma_supported(dev, mask);
/* Copied from i386. Doesn't make much sense, because it will
only work for pci_alloc_coherent.
@@ -342,8 +233,7 @@ int dma_supported(struct device *dev, u64 mask)
type. Normally this doesn't make any difference, but gives
more gentle handling of IOMMU overflow. */
if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
- printk(KERN_INFO "%s: Force SAC with mask %Lx\n",
- dev->bus_id, mask);
+ dev_info(dev, "Force SAC with mask %Lx\n", mask);
return 0;
}
@@ -369,6 +259,7 @@ void *
dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp)
{
+ struct dma_mapping_ops *ops = get_dma_ops(dev);
void *memory = NULL;
struct page *page;
unsigned long dma_mask = 0;
@@ -378,7 +269,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
/* ignore region specifiers */
gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
- if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory))
+ if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
return memory;
if (!dev) {
@@ -437,8 +328,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
/* Let low level make its own zone decisions */
gfp &= ~(GFP_DMA32|GFP_DMA);
- if (dma_ops->alloc_coherent)
- return dma_ops->alloc_coherent(dev, size,
+ if (ops->alloc_coherent)
+ return ops->alloc_coherent(dev, size,
dma_handle, gfp);
return NULL;
}
@@ -450,14 +341,14 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
}
}
- if (dma_ops->alloc_coherent) {
+ if (ops->alloc_coherent) {
free_pages((unsigned long)memory, get_order(size));
gfp &= ~(GFP_DMA|GFP_DMA32);
- return dma_ops->alloc_coherent(dev, size, dma_handle, gfp);
+ return ops->alloc_coherent(dev, size, dma_handle, gfp);
}
- if (dma_ops->map_simple) {
- *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory),
+ if (ops->map_simple) {
+ *dma_handle = ops->map_simple(dev, virt_to_phys(memory),
size,
PCI_DMA_BIDIRECTIONAL);
if (*dma_handle != bad_dma_address)
@@ -479,12 +370,14 @@ EXPORT_SYMBOL(dma_alloc_coherent);
void dma_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t bus)
{
+ struct dma_mapping_ops *ops = get_dma_ops(dev);
+
int order = get_order(size);
WARN_ON(irqs_disabled()); /* for portability */
- if (dma_release_coherent(dev, order, vaddr))
+ if (dma_release_from_coherent(dev, order, vaddr))
return;
- if (dma_ops->unmap_single)
- dma_ops->unmap_single(dev, bus, size, 0);
+ if (ops->unmap_single)
+ ops->unmap_single(dev, bus, size, 0);
free_pages((unsigned long)vaddr, order);
}
EXPORT_SYMBOL(dma_free_coherent);
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index be60961f8695..49285f8fd4d5 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -67,9 +67,6 @@ static u32 gart_unmapped_entry;
(((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
-#define to_pages(addr, size) \
- (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
-
#define EMERGENCY_PAGES 32 /* = 128KB */
#ifdef CONFIG_AGP
@@ -198,9 +195,7 @@ static void iommu_full(struct device *dev, size_t size, int dir)
* out. Hopefully no network devices use single mappings that big.
*/
- printk(KERN_ERR
- "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n",
- size, dev->bus_id);
+ dev_err(dev, "PCI-DMA: Out of IOMMU space for %lu bytes\n", size);
if (size > PAGE_SIZE*EMERGENCY_PAGES) {
if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
@@ -243,7 +238,7 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
size_t size, int dir)
{
- unsigned long npages = to_pages(phys_mem, size);
+ unsigned long npages = iommu_num_pages(phys_mem, size);
unsigned long iommu_page = alloc_iommu(dev, npages);
int i;
@@ -306,7 +301,7 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
return;
iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
- npages = to_pages(dma_addr, size);
+ npages = iommu_num_pages(dma_addr, size);
for (i = 0; i < npages; i++) {
iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
CLEAR_LEAK(iommu_page + i);
@@ -389,7 +384,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start,
}
addr = phys_addr;
- pages = to_pages(s->offset, s->length);
+ pages = iommu_num_pages(s->offset, s->length);
while (pages--) {
iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
SET_LEAK(iommu_page);
@@ -472,7 +467,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
seg_size += s->length;
need = nextneed;
- pages += to_pages(s->offset, s->length);
+ pages += iommu_num_pages(s->offset, s->length);
ps = s;
}
if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0)
@@ -694,8 +689,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
extern int agp_amd64_init(void);
-static const struct dma_mapping_ops gart_dma_ops = {
- .mapping_error = NULL,
+static struct dma_mapping_ops gart_dma_ops = {
.map_single = gart_map_single,
.map_simple = gart_map_simple,
.unmap_single = gart_unmap_single,
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 792b9179eff3..3f91f71cdc3e 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -72,21 +72,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
return nents;
}
-/* Make sure we keep the same behaviour */
-static int nommu_mapping_error(dma_addr_t dma_addr)
-{
-#ifdef CONFIG_X86_32
- return 0;
-#else
- return (dma_addr == bad_dma_address);
-#endif
-}
-
-
-const struct dma_mapping_ops nommu_dma_ops = {
+struct dma_mapping_ops nommu_dma_ops = {
.map_single = nommu_map_single,
.map_sg = nommu_map_sg,
- .mapping_error = nommu_mapping_error,
.is_phys = 1,
};
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 20df839b9c20..c4ce0332759e 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -18,7 +18,7 @@ swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction);
}
-const struct dma_mapping_ops swiotlb_dma_ops = {
+struct dma_mapping_ops swiotlb_dma_ops = {
.mapping_error = swiotlb_dma_mapping_error,
.alloc_coherent = swiotlb_alloc_coherent,
.free_coherent = swiotlb_free_coherent,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 7218bccd1766..7b6e44a7c624 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -130,7 +130,7 @@ void cpu_idle(void)
/* endless idle loop with no priority at all */
while (1) {
- tick_nohz_stop_sched_tick();
+ tick_nohz_stop_sched_tick(1);
while (!need_resched()) {
check_pgt_cache();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c78090dd0c52..87d7dfdcf46c 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -121,7 +121,7 @@ void cpu_idle(void)
current_thread_info()->status |= TS_POLLING;
/* endless idle loop with no priority at all */
while (1) {
- tick_nohz_stop_sched_tick();
+ tick_nohz_stop_sched_tick(1);
while (!need_resched()) {
rmb();
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 9dcf39c02972..724adfc63cb9 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -411,10 +411,9 @@ void native_machine_shutdown(void)
{
/* Stop the cpus and apics */
#ifdef CONFIG_SMP
- int reboot_cpu_id;
/* The boot cpu is always logical cpu 0 */
- reboot_cpu_id = 0;
+ int reboot_cpu_id = 0;
#ifdef CONFIG_X86_32
/* See if there has been given a command line override */
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index c30fe25d470d..703310a99023 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -20,11 +20,44 @@
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
#define PAE_PGD_ATTR (_PAGE_PRESENT)
+/* control_page + PAGE_SIZE/2 ~ control_page + PAGE_SIZE * 3/4 are
+ * used to save some data for jumping back
+ */
+#define DATA(offset) (PAGE_SIZE/2+(offset))
+
+/* Minimal CPU state */
+#define ESP DATA(0x0)
+#define CR0 DATA(0x4)
+#define CR3 DATA(0x8)
+#define CR4 DATA(0xc)
+
+/* other data */
+#define CP_VA_CONTROL_PAGE DATA(0x10)
+#define CP_PA_PGD DATA(0x14)
+#define CP_PA_SWAP_PAGE DATA(0x18)
+#define CP_PA_BACKUP_PAGES_MAP DATA(0x1c)
+
.text
.align PAGE_SIZE
.globl relocate_kernel
relocate_kernel:
- movl 8(%esp), %ebp /* list of pages */
+ /* Save the CPU context, used for jumping back */
+
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ pushl %ebp
+ pushf
+
+ movl 20+8(%esp), %ebp /* list of pages */
+ movl PTR(VA_CONTROL_PAGE)(%ebp), %edi
+ movl %esp, ESP(%edi)
+ movl %cr0, %eax
+ movl %eax, CR0(%edi)
+ movl %cr3, %eax
+ movl %eax, CR3(%edi)
+ movl %cr4, %eax
+ movl %eax, CR4(%edi)
#ifdef CONFIG_X86_PAE
/* map the control page at its virtual address */
@@ -138,15 +171,25 @@ relocate_kernel:
relocate_new_kernel:
/* read the arguments and say goodbye to the stack */
- movl 4(%esp), %ebx /* page_list */
- movl 8(%esp), %ebp /* list of pages */
- movl 12(%esp), %edx /* start address */
- movl 16(%esp), %ecx /* cpu_has_pae */
+ movl 20+4(%esp), %ebx /* page_list */
+ movl 20+8(%esp), %ebp /* list of pages */
+ movl 20+12(%esp), %edx /* start address */
+ movl 20+16(%esp), %ecx /* cpu_has_pae */
+ movl 20+20(%esp), %esi /* preserve_context */
/* zero out flags, and disable interrupts */
pushl $0
popfl
+ /* save some information for jumping back */
+ movl PTR(VA_CONTROL_PAGE)(%ebp), %edi
+ movl %edi, CP_VA_CONTROL_PAGE(%edi)
+ movl PTR(PA_PGD)(%ebp), %eax
+ movl %eax, CP_PA_PGD(%edi)
+ movl PTR(PA_SWAP_PAGE)(%ebp), %eax
+ movl %eax, CP_PA_SWAP_PAGE(%edi)
+ movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi)
+
/* get physical address of control page now */
/* this is impossible after page table switch */
movl PTR(PA_CONTROL_PAGE)(%ebp), %edi
@@ -197,8 +240,90 @@ identity_mapped:
xorl %eax, %eax
movl %eax, %cr3
+ movl CP_PA_SWAP_PAGE(%edi), %eax
+ pushl %eax
+ pushl %ebx
+ call swap_pages
+ addl $8, %esp
+
+ /* To be certain of avoiding problems with self-modifying code
+ * I need to execute a serializing instruction here.
+ * So I flush the TLB, it's handy, and not processor dependent.
+ */
+ xorl %eax, %eax
+ movl %eax, %cr3
+
+ /* set all of the registers to known values */
+ /* leave %esp alone */
+
+ testl %esi, %esi
+ jnz 1f
+ xorl %edi, %edi
+ xorl %eax, %eax
+ xorl %ebx, %ebx
+ xorl %ecx, %ecx
+ xorl %edx, %edx
+ xorl %esi, %esi
+ xorl %ebp, %ebp
+ ret
+1:
+ popl %edx
+ movl CP_PA_SWAP_PAGE(%edi), %esp
+ addl $PAGE_SIZE, %esp
+2:
+ call *%edx
+
+ /* get the re-entry point of the peer system */
+ movl 0(%esp), %ebp
+ call 1f
+1:
+ popl %ebx
+ subl $(1b - relocate_kernel), %ebx
+ movl CP_VA_CONTROL_PAGE(%ebx), %edi
+ lea PAGE_SIZE(%ebx), %esp
+ movl CP_PA_SWAP_PAGE(%ebx), %eax
+ movl CP_PA_BACKUP_PAGES_MAP(%ebx), %edx
+ pushl %eax
+ pushl %edx
+ call swap_pages
+ addl $8, %esp
+ movl CP_PA_PGD(%ebx), %eax
+ movl %eax, %cr3
+ movl %cr0, %eax
+ orl $(1<<31), %eax
+ movl %eax, %cr0
+ lea PAGE_SIZE(%edi), %esp
+ movl %edi, %eax
+ addl $(virtual_mapped - relocate_kernel), %eax
+ pushl %eax
+ ret
+
+virtual_mapped:
+ movl CR4(%edi), %eax
+ movl %eax, %cr4
+ movl CR3(%edi), %eax
+ movl %eax, %cr3
+ movl CR0(%edi), %eax
+ movl %eax, %cr0
+ movl ESP(%edi), %esp
+ movl %ebp, %eax
+
+ popf
+ popl %ebp
+ popl %edi
+ popl %esi
+ popl %ebx
+ ret
+
/* Do the copies */
- movl %ebx, %ecx
+swap_pages:
+ movl 8(%esp), %edx
+ movl 4(%esp), %ecx
+ pushl %ebp
+ pushl %ebx
+ pushl %edi
+ pushl %esi
+ movl %ecx, %ebx
jmp 1f
0: /* top, read another word from the indirection page */
@@ -226,27 +351,28 @@ identity_mapped:
movl %ecx, %esi /* For every source page do a copy */
andl $0xfffff000, %esi
+ movl %edi, %eax
+ movl %esi, %ebp
+
+ movl %edx, %edi
movl $1024, %ecx
rep ; movsl
- jmp 0b
-3:
-
- /* To be certain of avoiding problems with self-modifying code
- * I need to execute a serializing instruction here.
- * So I flush the TLB, it's handy, and not processor dependent.
- */
- xorl %eax, %eax
- movl %eax, %cr3
+ movl %ebp, %edi
+ movl %eax, %esi
+ movl $1024, %ecx
+ rep ; movsl
- /* set all of the registers to known values */
- /* leave %esp alone */
+ movl %eax, %edi
+ movl %edx, %esi
+ movl $1024, %ecx
+ rep ; movsl
- xorl %eax, %eax
- xorl %ebx, %ebx
- xorl %ecx, %ecx
- xorl %edx, %edx
- xorl %esi, %esi
- xorl %edi, %edi
- xorl %ebp, %ebp
+ lea PAGE_SIZE(%ebp), %esi
+ jmp 0b
+3:
+ popl %esi
+ popl %edi
+ popl %ebx
+ popl %ebp
ret
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ec952aa5394a..68b48e3fbcbd 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -597,13 +597,21 @@ void __init setup_arch(char **cmdline_p)
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
visws_early_detect();
pre_setup_arch_hook();
- early_cpu_init();
#else
printk(KERN_INFO "Command line: %s\n", boot_command_line);
#endif
+ early_cpu_init();
early_ioremap_init();
+#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
+ /*
+ * Must be before kernel pagetables are setup
+ * or fixmap area is touched.
+ */
+ vmi_init();
+#endif
+
ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
screen_info = boot_params.screen_info;
edid_info = boot_params.edid_info;
@@ -665,9 +673,6 @@ void __init setup_arch(char **cmdline_p)
bss_resource.start = virt_to_phys(&__bss_start);
bss_resource.end = virt_to_phys(&__bss_stop)-1;
-#ifdef CONFIG_X86_64
- early_cpu_init();
-#endif
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;
@@ -680,7 +685,7 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_X86_LOCAL_APIC
disable_apic = 1;
#endif
- clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+ setup_clear_cpu_cap(X86_FEATURE_APIC);
}
#ifdef CONFIG_PCI
@@ -791,10 +796,6 @@ void __init setup_arch(char **cmdline_p)
initmem_init(0, max_pfn);
-#ifdef CONFIG_X86_64
- dma32_reserve_bootmem();
-#endif
-
#ifdef CONFIG_ACPI_SLEEP
/*
* Reserve low memory region for sleep support.
@@ -809,20 +810,21 @@ void __init setup_arch(char **cmdline_p)
#endif
reserve_crashkernel();
+#ifdef CONFIG_X86_64
+ /*
+ * dma32_reserve_bootmem() allocates bootmem which may conflict
+ * with the crashkernel command line, so do that after
+ * reserve_crashkernel()
+ */
+ dma32_reserve_bootmem();
+#endif
+
reserve_ibft_region();
#ifdef CONFIG_KVM_CLOCK
kvmclock_init();
#endif
-#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
- /*
- * Must be after max_low_pfn is determined, and before kernel
- * pagetables are setup.
- */
- vmi_init();
-#endif
-
paravirt_pagetable_setup_start(swapper_pg_dir);
paging_init();
paravirt_pagetable_setup_done(swapper_pg_dir);
@@ -859,12 +861,6 @@ void __init setup_arch(char **cmdline_p)
init_apic_mappings();
ioapic_init_mappings();
-#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) && defined(CONFIG_X86_32)
- if (def_to_bigsmp)
- printk(KERN_WARNING "More than 8 CPUs detected and "
- "CONFIG_X86_PC cannot handle it.\nUse "
- "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
-#endif
kvm_guest_init();
e820_reserve_resources();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index f7745f94c006..76e305e064f9 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -80,24 +80,6 @@ static void __init setup_per_cpu_maps(void)
#endif
}
-#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
-cpumask_t *cpumask_of_cpu_map __read_mostly;
-EXPORT_SYMBOL(cpumask_of_cpu_map);
-
-/* requires nr_cpu_ids to be initialized */
-static void __init setup_cpumask_of_cpu(void)
-{
- int i;
-
- /* alloc_bootmem zeroes memory */
- cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
- for (i = 0; i < nr_cpu_ids; i++)
- cpu_set(i, cpumask_of_cpu_map[i]);
-}
-#else
-static inline void setup_cpumask_of_cpu(void) { }
-#endif
-
#ifdef CONFIG_X86_32
/*
* Great future not-so-futuristic plan: make i386 and x86_64 do it
@@ -197,9 +179,6 @@ void __init setup_per_cpu_areas(void)
/* Setup node to cpumask map */
setup_node_to_cpumask_map();
-
- /* Setup cpumask_of_cpu map */
- setup_cpumask_of_cpu();
}
#endif
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 5cede1045ce7..0c727f64e79b 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -662,8 +662,5 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs);
- if (thread_info_flags & _TIF_HRTICK_RESCHED)
- hrtick_resched();
-
clear_thread_flag(TIF_IRET);
}
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index b95a0a609053..02b02583f5b5 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -54,6 +54,59 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
return do_sigaltstack(uss, uoss, regs->sp);
}
+/*
+ * Signal frame handlers.
+ */
+
+static inline int save_i387(struct _fpstate __user *buf)
+{
+ struct task_struct *tsk = current;
+ int err = 0;
+
+ BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
+ sizeof(tsk->thread.xstate->fxsave));
+
+ if ((unsigned long)buf % 16)
+ printk("save_i387: bad fpstate %p\n", buf);
+
+ if (!used_math())
+ return 0;
+ clear_used_math(); /* trigger finit */
+ if (task_thread_info(tsk)->status & TS_USEDFPU) {
+ err = save_i387_checking((struct i387_fxsave_struct __user *)
+ buf);
+ if (err)
+ return err;
+ task_thread_info(tsk)->status &= ~TS_USEDFPU;
+ stts();
+ } else {
+ if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
+ sizeof(struct i387_fxsave_struct)))
+ return -1;
+ }
+ return 1;
+}
+
+/*
+ * This restores directly out of user space. Exceptions are handled.
+ */
+static inline int restore_i387(struct _fpstate __user *buf)
+{
+ struct task_struct *tsk = current;
+ int err;
+
+ if (!used_math()) {
+ err = init_fpu(tsk);
+ if (err)
+ return err;
+ }
+
+ if (!(task_thread_info(current)->status & TS_USEDFPU)) {
+ clts();
+ task_thread_info(current)->status |= TS_USEDFPU;
+ }
+ return restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
+}
/*
* Do a signal return; undo the signal stack.
@@ -497,9 +550,6 @@ void do_notify_resume(struct pt_regs *regs, void *unused,
/* deal with pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs);
-
- if (thread_info_flags & _TIF_HRTICK_RESCHED)
- hrtick_resched();
}
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index a9331a42f76f..6112c3632345 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -326,12 +326,16 @@ static void __cpuinit start_secondary(void *unused)
* for which cpus receive the IPI. Holding this
* lock helps us to not include this cpu in a currently in progress
* smp_call_function().
+ *
+ * We need to hold vector_lock so there the set of online cpus
+ * does not change while we are assigning vectors to cpus. Holding
+ * this lock ensures we don't half assign or remove an irq from a cpu.
*/
ipi_call_lock_irq();
-#ifdef CONFIG_X86_IO_APIC
- setup_vector_irq(smp_processor_id());
-#endif
+ lock_vector_lock();
+ __setup_vector_irq(smp_processor_id());
cpu_set(smp_processor_id(), cpu_online_map);
+ unlock_vector_lock();
ipi_call_unlock_irq();
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
@@ -438,7 +442,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
cpu_set(cpu, cpu_sibling_setup_map);
if (smp_num_siblings > 1) {
- for_each_cpu_mask(i, cpu_sibling_setup_map) {
+ for_each_cpu_mask_nr(i, cpu_sibling_setup_map) {
if (c->phys_proc_id == cpu_data(i).phys_proc_id &&
c->cpu_core_id == cpu_data(i).cpu_core_id) {
cpu_set(i, per_cpu(cpu_sibling_map, cpu));
@@ -461,7 +465,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
return;
}
- for_each_cpu_mask(i, cpu_sibling_setup_map) {
+ for_each_cpu_mask_nr(i, cpu_sibling_setup_map) {
if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
cpu_set(i, c->llc_shared_map);
@@ -990,7 +994,17 @@ int __cpuinit native_cpu_up(unsigned int cpu)
flush_tlb_all();
low_mappings = 1;
+#ifdef CONFIG_X86_PC
+ if (def_to_bigsmp && apicid > 8) {
+ printk(KERN_WARNING
+ "More than 8 CPUs detected - skipping them.\n"
+ "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n");
+ err = -1;
+ } else
+ err = do_boot_cpu(apicid, cpu);
+#else
err = do_boot_cpu(apicid, cpu);
+#endif
zap_low_mappings();
low_mappings = 0;
@@ -1219,7 +1233,7 @@ static void remove_siblinginfo(int cpu)
int sibling;
struct cpuinfo_x86 *c = &cpu_data(cpu);
- for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) {
+ for_each_cpu_mask_nr(sibling, per_cpu(cpu_core_map, cpu)) {
cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
/*/
* last thread sibling in this cpu core going down
@@ -1228,7 +1242,7 @@ static void remove_siblinginfo(int cpu)
cpu_data(sibling).booted_cores--;
}
- for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu))
+ for_each_cpu_mask_nr(sibling, per_cpu(cpu_sibling_map, cpu))
cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
cpus_clear(per_cpu(cpu_sibling_map, cpu));
cpus_clear(per_cpu(cpu_core_map, cpu));
@@ -1336,7 +1350,9 @@ int __cpu_disable(void)
remove_siblinginfo(cpu);
/* It's now safe to remove this processor from the online map */
+ lock_vector_lock();
remove_cpu_from_maps(cpu);
+ unlock_vector_lock();
fixup_irqs(cpu_online_map);
return 0;
}
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index adff5562f5fd..d44395ff34c3 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -326,3 +326,9 @@ ENTRY(sys_call_table)
.long sys_fallocate
.long sys_timerfd_settime /* 325 */
.long sys_timerfd_gettime
+ .long sys_signalfd4
+ .long sys_eventfd2
+ .long sys_epoll_create1
+ .long sys_dup3 /* 330 */
+ .long sys_pipe2
+ .long sys_inotify_init1
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 0a1b1a9d922d..6ca515d6db54 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -37,6 +37,7 @@
#include <asm/timer.h>
#include <asm/vmi_time.h>
#include <asm/kmap_types.h>
+#include <asm/setup.h>
/* Convenient for calling VMI functions indirectly in the ROM */
typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void);
@@ -683,7 +684,7 @@ void vmi_bringup(void)
{
/* We must establish the lowmem mapping for MMU ops to work */
if (vmi_ops.set_linear_mapping)
- vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, max_low_pfn, 0);
+ vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, MAXMEM_PFN, 0);
}
/*