aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS6
-rw-r--r--arch/i386/Kconfig41
-rw-r--r--arch/i386/kernel/acpi/boot.c2
-rw-r--r--arch/i386/kernel/cpu/common.c2
-rw-r--r--arch/i386/kernel/smpboot.c4
-rw-r--r--arch/i386/kernel/trampoline.S5
-rw-r--r--drivers/acpi/ec.c2
-rw-r--r--drivers/acpi/toshiba_acpi.c9
-rw-r--r--drivers/ata/Kconfig2
-rw-r--r--drivers/ata/pata_hpt37x.c6
-rw-r--r--drivers/block/pktcdvd.c49
-rw-r--r--drivers/char/ip2/i2ellis.h4
-rw-r--r--drivers/connector/cn_proc.c11
-rw-r--r--drivers/hid/Kconfig18
-rw-r--r--drivers/ide/pci/atiixp.c18
-rw-r--r--drivers/ide/pci/via82cxxx.c138
-rw-r--r--drivers/kvm/kvm.h106
-rw-r--r--drivers/kvm/kvm_main.c155
-rw-r--r--drivers/kvm/mmu.c1114
-rw-r--r--drivers/kvm/paging_tmpl.h260
-rw-r--r--drivers/kvm/svm.c113
-rw-r--r--drivers/kvm/vmx.c175
-rw-r--r--drivers/kvm/x86_emulate.c2
-rw-r--r--drivers/leds/leds-s3c24xx.c2
-rw-r--r--drivers/macintosh/via-pmu.c1
-rw-r--r--drivers/net/Space.c11
-rw-r--r--drivers/net/ifb.c4
-rw-r--r--drivers/net/loopback.c4
-rw-r--r--drivers/net/sungem.c3
-rw-r--r--drivers/net/sungem_phy.c179
-rw-r--r--drivers/net/sungem_phy.h7
-rw-r--r--drivers/pci/search.c24
-rw-r--r--drivers/rtc/rtc-at91rm9200.c2
-rw-r--r--drivers/rtc/rtc-rs5c372.c535
-rw-r--r--drivers/usb/input/Kconfig6
-rw-r--r--drivers/video/backlight/corgi_bl.c2
-rw-r--r--drivers/video/backlight/hp680_bl.c2
-rw-r--r--drivers/video/backlight/locomolcd.c2
-rw-r--r--fs/adfs/dir_f.c2
-rw-r--r--fs/bad_inode.c330
-rw-r--r--fs/ufs/balloc.c25
-rw-r--r--fs/ufs/inode.c41
-rw-r--r--include/acpi/acconfig.h2
-rw-r--r--include/asm-i386/boot.h3
-rw-r--r--include/linux/kvm.h11
-rw-r--r--include/linux/magic.h1
-rw-r--r--include/linux/swap.h2
-rw-r--r--include/net/tcp.h2
-rw-r--r--include/net/x25.h1
-rw-r--r--init/main.c5
-rw-r--r--kernel/params.c6
-rw-r--r--kernel/power/swap.c9
-rw-r--r--kernel/power/user.c7
-rw-r--r--kernel/profile.c2
-rw-r--r--mm/oom_kill.c12
-rw-r--r--mm/page_alloc.c7
-rw-r--r--mm/slab.c4
-rw-r--r--mm/swapfile.c8
-rw-r--r--mm/vmscan.c33
-rw-r--r--net/bridge/netfilter/ebtables.c3
-rw-r--r--net/core/pktgen.c156
-rw-r--r--net/ipv4/devinet.c5
-rw-r--r--net/ipv4/netfilter.c7
-rw-r--r--net/ipv4/netfilter/Kconfig4
-rw-r--r--net/ipv4/netfilter/ip_tables.c10
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c5
-rw-r--r--net/ipv6/addrconf.c4
-rw-r--r--net/netfilter/Kconfig25
-rw-r--r--net/netfilter/xt_hashlimit.c2
-rw-r--r--net/netlink/af_netlink.c3
-rw-r--r--net/x25/af_x25.c2
-rw-r--r--net/xfrm/xfrm_user.c73
-rw-r--r--scripts/kconfig/qconf.cc12
-rw-r--r--scripts/kconfig/qconf.h2
-rw-r--r--sound/sparc/cs4231.c26
75 files changed, 2944 insertions, 934 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 0d54cc45ea3e..2bd34ef58ffa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -532,13 +532,13 @@ L: netdev@vger.kernel.org
S: Maintained
ASUS ACPI EXTRAS DRIVER
+P: Corentin Chary
+M: corentincj@iksaif.net
P: Karol Kozimor
M: sziwan@users.sourceforge.net
-P: Julien Lerouge
-M: julien.lerouge@free.fr
L: acpi4asus-user@lists.sourceforge.net
W: http://sourceforge.net/projects/acpi4asus
-W: http://julien.lerouge.free.fr
+W: http://xf.iksaif.net/acpi4asus
S: Maintained
ATA OVER ETHERNET DRIVER
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 0d67a0a1151e..0dfee812811a 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -777,6 +777,47 @@ config CRASH_DUMP
PHYSICAL_START.
For more details see Documentation/kdump/kdump.txt
+config PHYSICAL_START
+ hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
+ default "0x100000"
+ help
+ This gives the physical address where the kernel is loaded.
+
+ If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then
+ bzImage will decompress itself to above physical address and
+ run from there. Otherwise, bzImage will run from the address where
+ it has been loaded by the boot loader and will ignore above physical
+ address.
+
+ In normal kdump cases one does not have to set/change this option
+ as now bzImage can be compiled as a completely relocatable image
+ (CONFIG_RELOCATABLE=y) and be used to load and run from a different
+ address. This option is mainly useful for the folks who don't want
+ to use a bzImage for capturing the crash dump and want to use a
+ vmlinux instead. vmlinux is not relocatable hence a kernel needs
+ to be specifically compiled to run from a specific memory area
+ (normally a reserved region) and this option comes handy.
+
+ So if you are using bzImage for capturing the crash dump, leave
+ the value here unchanged to 0x100000 and set CONFIG_RELOCATABLE=y.
+ Otherwise if you plan to use vmlinux for capturing the crash dump
+ change this value to start of the reserved region (Typically 16MB
+ 0x1000000). In other words, it can be set based on the "X" value as
+ specified in the "crashkernel=YM@XM" command line boot parameter
+ passed to the panic-ed kernel. Typically this parameter is set as
+ crashkernel=64M@16M. Please take a look at
+ Documentation/kdump/kdump.txt for more details about crash dumps.
+
+ Usage of bzImage for capturing the crash dump is recommended as
+ one does not have to build two kernels. Same kernel can be used
+ as production kernel and capture kernel. Above option should have
+ gone away after relocatable bzImage support is introduced. But it
+ is present because there are users out there who continue to use
+ vmlinux for dump capture. This option should go away down the
+ line.
+
+ Don't change this unless you know what you are doing.
+
config RELOCATABLE
bool "Build a relocatable kernel(EXPERIMENTAL)"
depends on EXPERIMENTAL
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index 094300b3a81f..cbcb2c27f48b 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -333,7 +333,7 @@ acpi_parse_ioapic(acpi_table_entry_header * header, const unsigned long end)
/*
* Parse Interrupt Source Override for the ACPI SCI
*/
-static void acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger)
+static void __init acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger)
{
if (trigger == 0) /* compatible SCI trigger is level */
trigger = 3;
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 1b34c56f8123..8689d62abd4a 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -54,7 +54,7 @@ static struct cpu_dev __cpuinitdata default_cpu = {
.c_init = default_init,
.c_vendor = "Unknown",
};
-static struct cpu_dev * this_cpu = &default_cpu;
+static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu;
static int __init cachesize_setup(char *str)
{
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index aef39be81361..300d9b38d02e 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -227,7 +227,7 @@ static struct {
atomic_t count_start;
atomic_t count_stop;
unsigned long long values[NR_CPUS];
-} tsc __initdata = {
+} tsc __cpuinitdata = {
.start_flag = ATOMIC_INIT(0),
.count_start = ATOMIC_INIT(0),
.count_stop = ATOMIC_INIT(0),
@@ -332,7 +332,7 @@ static void __init synchronize_tsc_bp(void)
printk("passed.\n");
}
-static void __init synchronize_tsc_ap(void)
+static void __cpuinit synchronize_tsc_ap(void)
{
int i;
diff --git a/arch/i386/kernel/trampoline.S b/arch/i386/kernel/trampoline.S
index fcce0e61b0e7..2f1814c5cfd7 100644
--- a/arch/i386/kernel/trampoline.S
+++ b/arch/i386/kernel/trampoline.S
@@ -38,6 +38,11 @@
.data
+/* We can free up trampoline after bootup if cpu hotplug is not supported. */
+#ifndef CONFIG_HOTPLUG_CPU
+.section ".init.data","aw",@progbits
+#endif
+
.code16
ENTRY(trampoline_data)
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 9c52d87d6f04..4144d5dd442e 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -424,7 +424,7 @@ static void acpi_ec_gpe_query(void *ec_cxt)
snprintf(object_name, 8, "_Q%2.2X", value);
- printk(KERN_INFO PREFIX "evaluating %s\n", object_name);
+ ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Evaluating %s", object_name));
acpi_evaluate_object(ec->handle, object_name, NULL, NULL);
}
diff --git a/drivers/acpi/toshiba_acpi.c b/drivers/acpi/toshiba_acpi.c
index 88aeccbafaaf..d9b651ffcdc0 100644
--- a/drivers/acpi/toshiba_acpi.c
+++ b/drivers/acpi/toshiba_acpi.c
@@ -321,13 +321,16 @@ static int set_lcd_status(struct backlight_device *bd)
static unsigned long write_lcd(const char *buffer, unsigned long count)
{
int value;
- int ret = count;
+ int ret;
if (sscanf(buffer, " brightness : %i", &value) == 1 &&
- value >= 0 && value < HCI_LCD_BRIGHTNESS_LEVELS)
+ value >= 0 && value < HCI_LCD_BRIGHTNESS_LEVELS) {
ret = set_lcd(value);
- else
+ if (ret == 0)
+ ret = count;
+ } else {
ret = -EINVAL;
+ }
return ret;
}
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index b34e0a958d0f..da21552d2b1c 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -381,7 +381,7 @@ config PATA_OPTI
If unsure, say N.
config PATA_OPTIDMA
- tristate "OPTI FireStar PATA support (Veyr Experimental)"
+ tristate "OPTI FireStar PATA support (Very Experimental)"
depends on PCI && EXPERIMENTAL
help
This option enables DMA/PIO support for the later OPTi
diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c
index 47082df7199e..dfb306057cf4 100644
--- a/drivers/ata/pata_hpt37x.c
+++ b/drivers/ata/pata_hpt37x.c
@@ -25,7 +25,7 @@
#include <linux/libata.h>
#define DRV_NAME "pata_hpt37x"
-#define DRV_VERSION "0.5.1"
+#define DRV_VERSION "0.5.2"
struct hpt_clock {
u8 xfer_speed;
@@ -416,7 +416,7 @@ static const char *bad_ata100_5[] = {
static unsigned long hpt370_filter(const struct ata_port *ap, struct ata_device *adev, unsigned long mask)
{
- if (adev->class != ATA_DEV_ATA) {
+ if (adev->class == ATA_DEV_ATA) {
if (hpt_dma_blacklisted(adev, "UDMA", bad_ata33))
mask &= ~ATA_MASK_UDMA;
if (hpt_dma_blacklisted(adev, "UDMA100", bad_ata100_5))
@@ -749,7 +749,7 @@ static void hpt37x_bmdma_stop(struct ata_queued_cmd *qc)
{
struct ata_port *ap = qc->ap;
struct pci_dev *pdev = to_pci_dev(ap->host->dev);
- int mscreg = 0x50 + 2 * ap->port_no;
+ int mscreg = 0x50 + 4 * ap->port_no;
u8 bwsr_stat, msc_stat;
pci_read_config_byte(pdev, 0x6A, &bwsr_stat);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 7c95c762950f..62462190e07e 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -765,47 +765,34 @@ static inline struct bio *pkt_get_list_first(struct bio **list_head, struct bio
*/
static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *cgc)
{
- char sense[SCSI_SENSE_BUFFERSIZE];
- request_queue_t *q;
+ request_queue_t *q = bdev_get_queue(pd->bdev);
struct request *rq;
- DECLARE_COMPLETION_ONSTACK(wait);
- int err = 0;
+ int ret = 0;
- q = bdev_get_queue(pd->bdev);
+ rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ?
+ WRITE : READ, __GFP_WAIT);
+
+ if (cgc->buflen) {
+ if (blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen, __GFP_WAIT))
+ goto out;
+ }
+
+ rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
+ memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE);
+ if (sizeof(rq->cmd) > CDROM_PACKET_SIZE)
+ memset(rq->cmd + CDROM_PACKET_SIZE, 0, sizeof(rq->cmd) - CDROM_PACKET_SIZE);
- rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ? WRITE : READ,
- __GFP_WAIT);
- rq->errors = 0;
- rq->rq_disk = pd->bdev->bd_disk;
- rq->bio = NULL;
- rq->buffer = NULL;
rq->timeout = 60*HZ;
- rq->data = cgc->buffer;
- rq->data_len = cgc->buflen;
- rq->sense = sense;
- memset(sense, 0, sizeof(sense));
- rq->sense_len = 0;
rq->cmd_type = REQ_TYPE_BLOCK_PC;
rq->cmd_flags |= REQ_HARDBARRIER;
if (cgc->quiet)
rq->cmd_flags |= REQ_QUIET;
- memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE);
- if (sizeof(rq->cmd) > CDROM_PACKET_SIZE)
- memset(rq->cmd + CDROM_PACKET_SIZE, 0, sizeof(rq->cmd) - CDROM_PACKET_SIZE);
- rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
-
- rq->ref_count++;
- rq->end_io_data = &wait;
- rq->end_io = blk_end_sync_rq;
- elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
- generic_unplug_device(q);
- wait_for_completion(&wait);
-
- if (rq->errors)
- err = -EIO;
+ blk_execute_rq(rq->q, pd->bdev->bd_disk, rq, 0);
+ ret = rq->errors;
+out:
blk_put_request(rq);
- return err;
+ return ret;
}
/*
diff --git a/drivers/char/ip2/i2ellis.h b/drivers/char/ip2/i2ellis.h
index 5eabe47b0bc8..433305062fb8 100644
--- a/drivers/char/ip2/i2ellis.h
+++ b/drivers/char/ip2/i2ellis.h
@@ -606,9 +606,9 @@ static int iiDownloadAll(i2eBordStrPtr, loadHdrStrPtr, int, int);
// code and returning.
//
#define COMPLETE(pB,code) \
- if(1){ \
+ do { \
pB->i2eError = code; \
return (code == I2EE_GOOD);\
- }
+ } while (0)
#endif // I2ELLIS_H
diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 3ece69231343..5c9f67f98d10 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -28,6 +28,7 @@
#include <linux/init.h>
#include <linux/connector.h>
#include <asm/atomic.h>
+#include <asm/unaligned.h>
#include <linux/cn_proc.h>
@@ -60,7 +61,7 @@ void proc_fork_connector(struct task_struct *task)
ev = (struct proc_event*)msg->data;
get_seq(&msg->seq, &ev->cpu);
ktime_get_ts(&ts); /* get high res monotonic timestamp */
- ev->timestamp_ns = timespec_to_ns(&ts);
+ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns);
ev->what = PROC_EVENT_FORK;
ev->event_data.fork.parent_pid = task->real_parent->pid;
ev->event_data.fork.parent_tgid = task->real_parent->tgid;
@@ -88,7 +89,7 @@ void proc_exec_connector(struct task_struct *task)
ev = (struct proc_event*)msg->data;
get_seq(&msg->seq, &ev->cpu);
ktime_get_ts(&ts); /* get high res monotonic timestamp */
- ev->timestamp_ns = timespec_to_ns(&ts);
+ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns);
ev->what = PROC_EVENT_EXEC;
ev->event_data.exec.process_pid = task->pid;
ev->event_data.exec.process_tgid = task->tgid;
@@ -124,7 +125,7 @@ void proc_id_connector(struct task_struct *task, int which_id)
return;
get_seq(&msg->seq, &ev->cpu);
ktime_get_ts(&ts); /* get high res monotonic timestamp */
- ev->timestamp_ns = timespec_to_ns(&ts);
+ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns);
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
@@ -146,7 +147,7 @@ void proc_exit_connector(struct task_struct *task)
ev = (struct proc_event*)msg->data;
get_seq(&msg->seq, &ev->cpu);
ktime_get_ts(&ts); /* get high res monotonic timestamp */
- ev->timestamp_ns = timespec_to_ns(&ts);
+ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns);
ev->what = PROC_EVENT_EXIT;
ev->event_data.exit.process_pid = task->pid;
ev->event_data.exit.process_tgid = task->tgid;
@@ -181,7 +182,7 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack)
ev = (struct proc_event*)msg->data;
msg->seq = rcvd_seq;
ktime_get_ts(&ts); /* get high res monotonic timestamp */
- ev->timestamp_ns = timespec_to_ns(&ts);
+ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns);
ev->cpu = -1;
ev->what = PROC_EVENT_NONE;
ev->event_data.ack.err = err;
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 96d4a0bb2203..ec796ad087df 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -6,13 +6,21 @@ menu "HID Devices"
config HID
tristate "Generic HID support"
+ depends on INPUT
default y
---help---
- Say Y here if you want generic HID support to connect keyboards,
- mice, joysticks, graphic tablets, or any other HID based devices
- to your computer. You also need to select particular types of
- HID devices you want to compile support for, in the particular
- driver menu (USB, Bluetooth)
+ A human interface device (HID) is a type of computer device that
+ interacts directly with and takes input from humans. The term "HID"
+ most commonly used to refer to the USB-HID specification, but other
+ devices (such as, but not strictly limited to, Bluetooth) are
+ designed using HID specification (this involves certain keyboards,
+ mice, tablets, etc). This option compiles into kernel the generic
+ HID layer code (parser, usages, etc.), which can then be used by
+ transport-specific HID implementation (like USB or Bluetooth).
+
+ For docs and specs, see http://www.usb.org/developers/hidpage/
+
+ If unsure, say Y
endmenu
diff --git a/drivers/ide/pci/atiixp.c b/drivers/ide/pci/atiixp.c
index ffdffb6379ef..524e65de4398 100644
--- a/drivers/ide/pci/atiixp.c
+++ b/drivers/ide/pci/atiixp.c
@@ -46,6 +46,8 @@ static atiixp_ide_timing mdma_timing[] = {
static int save_mdma_mode[4];
+static DEFINE_SPINLOCK(atiixp_lock);
+
/**
* atiixp_ratemask - compute rate mask for ATIIXP IDE
* @drive: IDE drive to compute for
@@ -105,7 +107,7 @@ static int atiixp_ide_dma_host_on(ide_drive_t *drive)
unsigned long flags;
u16 tmp16;
- spin_lock_irqsave(&ide_lock, flags);
+ spin_lock_irqsave(&atiixp_lock, flags);
pci_read_config_word(dev, ATIIXP_IDE_UDMA_CONTROL, &tmp16);
if (save_mdma_mode[drive->dn])
@@ -114,7 +116,7 @@ static int atiixp_ide_dma_host_on(ide_drive_t *drive)
tmp16 |= (1 << drive->dn);
pci_write_config_word(dev, ATIIXP_IDE_UDMA_CONTROL, tmp16);
- spin_unlock_irqrestore(&ide_lock, flags);
+ spin_unlock_irqrestore(&atiixp_lock, flags);
return __ide_dma_host_on(drive);
}
@@ -125,13 +127,13 @@ static int atiixp_ide_dma_host_off(ide_drive_t *drive)
unsigned long flags;
u16 tmp16;
- spin_lock_irqsave(&ide_lock, flags);
+ spin_lock_irqsave(&atiixp_lock, flags);
pci_read_config_word(dev, ATIIXP_IDE_UDMA_CONTROL, &tmp16);
tmp16 &= ~(1 << drive->dn);
pci_write_config_word(dev, ATIIXP_IDE_UDMA_CONTROL, tmp16);
- spin_unlock_irqrestore(&ide_lock, flags);
+ spin_unlock_irqrestore(&atiixp_lock, flags);
return __ide_dma_host_off(drive);
}
@@ -152,7 +154,7 @@ static void atiixp_tuneproc(ide_drive_t *drive, u8 pio)
u32 pio_timing_data;
u16 pio_mode_data;
- spin_lock_irqsave(&ide_lock, flags);
+ spin_lock_irqsave(&atiixp_lock, flags);
pci_read_config_word(dev, ATIIXP_IDE_PIO_MODE, &pio_mode_data);
pio_mode_data &= ~(0x07 << (drive->dn * 4));
@@ -165,7 +167,7 @@ static void atiixp_tuneproc(ide_drive_t *drive, u8 pio)
(pio_timing[pio].command_width << (timing_shift + 4));
pci_write_config_dword(dev, ATIIXP_IDE_PIO_TIMING, pio_timing_data);
- spin_unlock_irqrestore(&ide_lock, flags);
+ spin_unlock_irqrestore(&atiixp_lock, flags);
}
/**
@@ -189,7 +191,7 @@ static int atiixp_speedproc(ide_drive_t *drive, u8 xferspeed)
speed = ide_rate_filter(atiixp_ratemask(drive), xferspeed);
- spin_lock_irqsave(&ide_lock, flags);
+ spin_lock_irqsave(&atiixp_lock, flags);
save_mdma_mode[drive->dn] = 0;
if (speed >= XFER_UDMA_0) {
@@ -208,7 +210,7 @@ static int atiixp_speedproc(ide_drive_t *drive, u8 xferspeed)
}
}
- spin_unlock_irqrestore(&ide_lock, flags);
+ spin_unlock_irqrestore(&atiixp_lock, flags);
if (speed >= XFER_SW_DMA_0)
pio = atiixp_dma_2_pio(speed);
diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c
index 61f1a9665a7f..381cc6f101ce 100644
--- a/drivers/ide/pci/via82cxxx.c
+++ b/drivers/ide/pci/via82cxxx.c
@@ -123,7 +123,7 @@ struct via82cxxx_dev
static void via_set_speed(ide_hwif_t *hwif, u8 dn, struct ide_timing *timing)
{
struct pci_dev *dev = hwif->pci_dev;
- struct via82cxxx_dev *vdev = ide_get_hwifdata(hwif);
+ struct via82cxxx_dev *vdev = pci_get_drvdata(hwif->pci_dev);
u8 t;
if (~vdev->via_config->flags & VIA_BAD_AST) {
@@ -162,7 +162,7 @@ static void via_set_speed(ide_hwif_t *hwif, u8 dn, struct ide_timing *timing)
static int via_set_drive(ide_drive_t *drive, u8 speed)
{
ide_drive_t *peer = HWIF(drive)->drives + (~drive->dn & 1);
- struct via82cxxx_dev *vdev = ide_get_hwifdata(drive->hwif);
+ struct via82cxxx_dev *vdev = pci_get_drvdata(drive->hwif->pci_dev);
struct ide_timing t, p;
unsigned int T, UT;
@@ -225,7 +225,7 @@ static void via82cxxx_tune_drive(ide_drive_t *drive, u8 pio)
static int via82cxxx_ide_dma_check (ide_drive_t *drive)
{
ide_hwif_t *hwif = HWIF(drive);
- struct via82cxxx_dev *vdev = ide_get_hwifdata(hwif);
+ struct via82cxxx_dev *vdev = pci_get_drvdata(hwif->pci_dev);
u16 w80 = hwif->udma_four;
u16 speed = ide_find_best_mode(drive,
@@ -262,6 +262,53 @@ static struct via_isa_bridge *via_config_find(struct pci_dev **isa)
return via_config;
}
+/*
+ * Check and handle 80-wire cable presence
+ */
+static void __devinit via_cable_detect(struct via82cxxx_dev *vdev, u32 u)
+{
+ int i;
+
+ switch (vdev->via_config->flags & VIA_UDMA) {
+ case VIA_UDMA_66:
+ for (i = 24; i >= 0; i -= 8)
+ if (((u >> (i & 16)) & 8) &&
+ ((u >> i) & 0x20) &&
+ (((u >> i) & 7) < 2)) {
+ /*
+ * 2x PCI clock and
+ * UDMA w/ < 3T/cycle
+ */
+ vdev->via_80w |= (1 << (1 - (i >> 4)));
+ }
+ break;
+
+ case VIA_UDMA_100:
+ for (i = 24; i >= 0; i -= 8)
+ if (((u >> i) & 0x10) ||
+ (((u >> i) & 0x20) &&
+ (((u >> i) & 7) < 4))) {
+ /* BIOS 80-wire bit or
+ * UDMA w/ < 60ns/cycle
+ */
+ vdev->via_80w |= (1 << (1 - (i >> 4)));
+ }
+ break;
+
+ case VIA_UDMA_133:
+ for (i = 24; i >= 0; i -= 8)
+ if (((u >> i) & 0x10) ||
+ (((u >> i) & 0x20) &&
+ (((u >> i) & 7) < 6))) {
+ /* BIOS 80-wire bit or
+ * UDMA w/ < 60ns/cycle
+ */
+ vdev->via_80w |= (1 << (1 - (i >> 4)));
+ }
+ break;
+ }
+}
+
/**
* init_chipset_via82cxxx - initialization handler
* @dev: PCI device
@@ -274,14 +321,22 @@ static struct via_isa_bridge *via_config_find(struct pci_dev **isa)
static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const char *name)
{
struct pci_dev *isa = NULL;
+ struct via82cxxx_dev *vdev;
struct via_isa_bridge *via_config;
u8 t, v;
- unsigned int u;
+ u32 u;
+
+ vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
+ if (!vdev) {
+ printk(KERN_ERR "VP_IDE: out of memory :(\n");
+ return -ENOMEM;
+ }
+ pci_set_drvdata(dev, vdev);
/*
* Find the ISA bridge to see how good the IDE is.
*/
- via_config = via_config_find(&isa);
+ vdev->via_config = via_config = via_config_find(&isa);
/* We checked this earlier so if it fails here deeep badness
is involved */
@@ -289,16 +344,17 @@ static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const
BUG_ON(!via_config->id);
/*
- * Setup or disable Clk66 if appropriate
+ * Detect cable and configure Clk66
*/
+ pci_read_config_dword(dev, VIA_UDMA_TIMING, &u);
+
+ via_cable_detect(vdev, u);
if ((via_config->flags & VIA_UDMA) == VIA_UDMA_66) {
/* Enable Clk66 */
- pci_read_config_dword(dev, VIA_UDMA_TIMING, &u);
pci_write_config_dword(dev, VIA_UDMA_TIMING, u|0x80008);
} else if (via_config->flags & VIA_BAD_CLK66) {
/* Would cause trouble on 596a and 686 */
- pci_read_config_dword(dev, VIA_UDMA_TIMING, &u);
pci_write_config_dword(dev, VIA_UDMA_TIMING, u & ~0x80008);
}
@@ -367,75 +423,11 @@ static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const
return 0;
}
-/*
- * Check and handle 80-wire cable presence
- */
-static void __devinit via_cable_detect(struct pci_dev *dev, struct via82cxxx_dev *vdev)
-{
- unsigned int u;
- int i;
- pci_read_config_dword(dev, VIA_UDMA_TIMING, &u);
-
- switch (vdev->via_config->flags & VIA_UDMA) {
-
- case VIA_UDMA_66:
- for (i = 24; i >= 0; i -= 8)
- if (((u >> (i & 16)) & 8) &&
- ((u >> i) & 0x20) &&
- (((u >> i) & 7) < 2)) {
- /*
- * 2x PCI clock and
- * UDMA w/ < 3T/cycle
- */
- vdev->via_80w |= (1 << (1 - (i >> 4)));
- }
- break;
-
- case VIA_UDMA_100:
- for (i = 24; i >= 0; i -= 8)
- if (((u >> i) & 0x10) ||
- (((u >> i) & 0x20) &&
- (((u >> i) & 7) < 4))) {
- /* BIOS 80-wire bit or
- * UDMA w/ < 60ns/cycle
- */
- vdev->via_80w |= (1 << (1 - (i >> 4)));
- }
- break;
-
- case VIA_UDMA_133:
- for (i = 24; i >= 0; i -= 8)
- if (((u >> i) & 0x10) ||
- (((u >> i) & 0x20) &&
- (((u >> i) & 7) < 6))) {
- /* BIOS 80-wire bit or
- * UDMA w/ < 60ns/cycle
- */
- vdev->via_80w |= (1 << (1 - (i >> 4)));
- }
- break;
-
- }
-}
-
static void __devinit init_hwif_via82cxxx(ide_hwif_t *hwif)
{
- struct via82cxxx_dev *vdev = kmalloc(sizeof(struct via82cxxx_dev),
- GFP_KERNEL);
- struct pci_dev *isa = NULL;
+ struct via82cxxx_dev *vdev = pci_get_drvdata(hwif->pci_dev);
int i;
- if (vdev == NULL) {
- printk(KERN_ERR "VP_IDE: out of memory :(\n");
- return;
- }
-
- memset(vdev, 0, sizeof(struct via82cxxx_dev));
- ide_set_hwifdata(hwif, vdev);
-
- vdev->via_config = via_config_find(&isa);
- via_cable_detect(hwif->pci_dev, vdev);
-
hwif->autodma = 0;
hwif->tuneproc = &via82cxxx_tune_drive;
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 100df6f38d92..91e0c75aca8f 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -52,6 +52,8 @@
#define KVM_MAX_VCPUS 1
#define KVM_MEMORY_SLOTS 4
#define KVM_NUM_MMU_PAGES 256
+#define KVM_MIN_FREE_MMU_PAGES 5
+#define KVM_REFILL_PAGES 25
#define FX_IMAGE_SIZE 512
#define FX_IMAGE_ALIGN 16
@@ -89,14 +91,54 @@ typedef unsigned long hva_t;
typedef u64 hpa_t;
typedef unsigned long hfn_t;
+#define NR_PTE_CHAIN_ENTRIES 5
+
+struct kvm_pte_chain {
+ u64 *parent_ptes[NR_PTE_CHAIN_ENTRIES];
+ struct hlist_node link;
+};
+
+/*
+ * kvm_mmu_page_role, below, is defined as:
+ *
+ * bits 0:3 - total guest paging levels (2-4, or zero for real mode)
+ * bits 4:7 - page table level for this shadow (1-4)
+ * bits 8:9 - page table quadrant for 2-level guests
+ * bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode)
+ */
+union kvm_mmu_page_role {
+ unsigned word;
+ struct {
+ unsigned glevels : 4;
+ unsigned level : 4;
+ unsigned quadrant : 2;
+ unsigned pad_for_nice_hex_output : 6;
+ unsigned metaphysical : 1;
+ };
+};
+
struct kvm_mmu_page {
struct list_head link;
+ struct hlist_node hash_link;
+
+ /*
+ * The following two entries are used to key the shadow page in the
+ * hash table.
+ */
+ gfn_t gfn;
+ union kvm_mmu_page_role role;
+
hpa_t page_hpa;
unsigned long slot_bitmap; /* One bit set per slot which has memory
* in this shadow page.
*/
int global; /* Set if all ptes in this page are global */
- u64 *parent_pte;
+ int multimapped; /* More than one parent_pte? */
+ int root_count; /* Currently serving as active root */
+ union {
+ u64 *parent_pte; /* !multimapped */
+ struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
+ };
};
struct vmcs {
@@ -117,14 +159,26 @@ struct kvm_vcpu;
struct kvm_mmu {
void (*new_cr3)(struct kvm_vcpu *vcpu);
int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
- void (*inval_page)(struct kvm_vcpu *vcpu, gva_t gva);
void (*free)(struct kvm_vcpu *vcpu);
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva);
hpa_t root_hpa;
int root_level;
int shadow_root_level;
+
+ u64 *pae_root;
+};
+
+#define KVM_NR_MEM_OBJS 20
+
+struct kvm_mmu_memory_cache {
+ int nobjs;
+ void *objects[KVM_NR_MEM_OBJS];
};
+/*
+ * We don't want allocation failures within the mmu code, so we preallocate
+ * enough memory for a single page fault in a cache.
+ */
struct kvm_guest_debug {
int enabled;
unsigned long bp[4];
@@ -173,6 +227,7 @@ struct kvm_vcpu {
struct mutex mutex;
int cpu;
int launched;
+ int interrupt_window_open;
unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
unsigned long irq_pending[NR_IRQ_WORDS];
@@ -184,6 +239,7 @@ struct kvm_vcpu {
unsigned long cr3;
unsigned long cr4;
unsigned long cr8;
+ u64 pdptrs[4]; /* pae */
u64 shadow_efer;
u64 apic_base;
int nmsrs;
@@ -194,6 +250,12 @@ struct kvm_vcpu {
struct kvm_mmu_page page_header_buf[KVM_NUM_MMU_PAGES];
struct kvm_mmu mmu;
+ struct kvm_mmu_memory_cache mmu_pte_chain_cache;
+ struct kvm_mmu_memory_cache mmu_rmap_desc_cache;
+
+ gfn_t last_pt_write_gfn;
+ int last_pt_write_count;
+
struct kvm_guest_debug guest_debug;
char fx_buf[FX_BUF_SIZE];
@@ -231,10 +293,16 @@ struct kvm {
spinlock_t lock; /* protects everything except vcpus */
int nmemslots;
struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS];
+ /*
+ * Hash table of struct kvm_mmu_page.
+ */
struct list_head active_mmu_pages;
+ int n_free_mmu_pages;
+ struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
struct kvm_vcpu vcpus[KVM_MAX_VCPUS];
int memory_config_version;
int busy;
+ unsigned long rmap_overflow;
};
struct kvm_stat {
@@ -247,6 +315,9 @@ struct kvm_stat {
u32 io_exits;
u32 mmio_exits;
u32 signal_exits;
+ u32 irq_window_exits;
+ u32 halt_exits;
+ u32 request_irq_exits;
u32 irq_exits;
};
@@ -279,6 +350,7 @@ struct kvm_arch_ops {
void (*set_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
+ void (*decache_cr0_cr4_guest_bits)(struct kvm_vcpu *vcpu);
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
void (*set_cr0_no_modeswitch)(struct kvm_vcpu *vcpu,
unsigned long cr0);
@@ -323,7 +395,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu);
int kvm_mmu_setup(struct kvm_vcpu *vcpu);
int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
-void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
+void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot);
hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa);
#define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
@@ -396,6 +468,19 @@ int kvm_write_guest(struct kvm_vcpu *vcpu,
unsigned long segment_base(u16 selector);
+void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes);
+void kvm_mmu_post_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes);
+int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
+void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
+
+static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
+ u32 error_code)
+{
+ if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
+ kvm_mmu_free_some_pages(vcpu);
+ return vcpu->mmu.page_fault(vcpu, gva, error_code);
+}
+
static inline struct page *_gfn_to_page(struct kvm *kvm, gfn_t gfn)
{
struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
@@ -541,19 +626,4 @@ static inline u32 get_rdx_init_val(void)
#define TSS_REDIRECTION_SIZE (256 / 8)
#define RMODE_TSS_SIZE (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1)
-#ifdef CONFIG_X86_64
-
-/*
- * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64. Therefore
- * we need to allocate shadow page tables in the first 4GB of memory, which
- * happens to fit the DMA32 zone.
- */
-#define GFP_KVM_MMU (GFP_KERNEL | __GFP_DMA32)
-
-#else
-
-#define GFP_KVM_MMU GFP_KERNEL
-
-#endif
-
#endif
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index ce7fe640f18d..67c1154960f0 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -58,6 +58,9 @@ static struct kvm_stats_debugfs_item {
{ "io_exits", &kvm_stat.io_exits },
{ "mmio_exits", &kvm_stat.mmio_exits },
{ "signal_exits", &kvm_stat.signal_exits },
+ { "irq_window", &kvm_stat.irq_window_exits },
+ { "halt_exits", &kvm_stat.halt_exits },
+ { "request_irq", &kvm_stat.request_irq_exits },
{ "irq_exits", &kvm_stat.irq_exits },
{ 0, 0 }
};
@@ -227,6 +230,7 @@ static int kvm_dev_open(struct inode *inode, struct file *filp)
struct kvm_vcpu *vcpu = &kvm->vcpus[i];
mutex_init(&vcpu->mutex);
+ vcpu->kvm = kvm;
vcpu->mmu.root_hpa = INVALID_PAGE;
INIT_LIST_HEAD(&vcpu->free_pages);
}
@@ -268,8 +272,8 @@ static void kvm_free_physmem(struct kvm *kvm)
static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
{
- kvm_arch_ops->vcpu_free(vcpu);
kvm_mmu_destroy(vcpu);
+ kvm_arch_ops->vcpu_free(vcpu);
}
static void kvm_free_vcpus(struct kvm *kvm)
@@ -295,14 +299,17 @@ static void inject_gp(struct kvm_vcpu *vcpu)
kvm_arch_ops->inject_gp(vcpu, 0);
}
-static int pdptrs_have_reserved_bits_set(struct kvm_vcpu *vcpu,
- unsigned long cr3)
+/*
+ * Load the pae pdptrs. Return true is they are all valid.
+ */
+static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
{
gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
- unsigned offset = (cr3 & (PAGE_SIZE-1)) >> 5;
+ unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
int i;
u64 pdpte;
u64 *pdpt;
+ int ret;
struct kvm_memory_slot *memslot;
spin_lock(&vcpu->kvm->lock);
@@ -310,16 +317,23 @@ static int pdptrs_have_reserved_bits_set(struct kvm_vcpu *vcpu,
/* FIXME: !memslot - emulate? 0xff? */
pdpt = kmap_atomic(gfn_to_page(memslot, pdpt_gfn), KM_USER0);
+ ret = 1;
for (i = 0; i < 4; ++i) {
pdpte = pdpt[offset + i];
- if ((pdpte & 1) && (pdpte & 0xfffffff0000001e6ull))
- break;
+ if ((pdpte & 1) && (pdpte & 0xfffffff0000001e6ull)) {
+ ret = 0;
+ goto out;
+ }
}
+ for (i = 0; i < 4; ++i)
+ vcpu->pdptrs[i] = pdpt[offset + i];
+
+out:
kunmap_atomic(pdpt, KM_USER0);
spin_unlock(&vcpu->kvm->lock);
- return i != 4;
+ return ret;
}
void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
@@ -365,8 +379,7 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
}
} else
#endif
- if (is_pae(vcpu) &&
- pdptrs_have_reserved_bits_set(vcpu, vcpu->cr3)) {
+ if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->cr3)) {
printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
"reserved bits\n");
inject_gp(vcpu);
@@ -387,6 +400,7 @@ EXPORT_SYMBOL_GPL(set_cr0);
void lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
{
+ kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f));
}
EXPORT_SYMBOL_GPL(lmsw);
@@ -407,7 +421,7 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
return;
}
} else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & CR4_PAE_MASK)
- && pdptrs_have_reserved_bits_set(vcpu, vcpu->cr3)) {
+ && !load_pdptrs(vcpu, vcpu->cr3)) {
printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
inject_gp(vcpu);
}
@@ -439,7 +453,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
return;
}
if (is_paging(vcpu) && is_pae(vcpu) &&
- pdptrs_have_reserved_bits_set(vcpu, cr3)) {
+ !load_pdptrs(vcpu, cr3)) {
printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
"reserved bits\n");
inject_gp(vcpu);
@@ -449,7 +463,19 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
vcpu->cr3 = cr3;
spin_lock(&vcpu->kvm->lock);
- vcpu->mmu.new_cr3(vcpu);
+ /*
+ * Does the new cr3 value map to physical memory? (Note, we
+ * catch an invalid cr3 even in real-mode, because it would
+ * cause trouble later on when we turn on paging anyway.)
+ *
+ * A real CPU would silently accept an invalid cr3 and would
+ * attempt to use it - with largely undefined (and often hard
+ * to debug) behavior on the guest side.
+ */
+ if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
+ inject_gp(vcpu);
+ else
+ vcpu->mmu.new_cr3(vcpu);
spin_unlock(&vcpu->kvm->lock);
}
EXPORT_SYMBOL_GPL(set_cr3);
@@ -517,7 +543,6 @@ static int kvm_dev_ioctl_create_vcpu(struct kvm *kvm, int n)
vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE;
vcpu->cpu = -1; /* First load will set up TR */
- vcpu->kvm = kvm;
r = kvm_arch_ops->vcpu_create(vcpu);
if (r < 0)
goto out_free_vcpus;
@@ -634,6 +659,7 @@ raced:
| __GFP_ZERO);
if (!new.phys_mem[i])
goto out_free;
+ new.phys_mem[i]->private = 0;
}
}
@@ -688,6 +714,13 @@ out:
return r;
}
+static void do_remove_write_access(struct kvm_vcpu *vcpu, int slot)
+{
+ spin_lock(&vcpu->kvm->lock);
+ kvm_mmu_slot_remove_write_access(vcpu, slot);
+ spin_unlock(&vcpu->kvm->lock);
+}
+
/*
* Get (and clear) the dirty memory log for a memory slot.
*/
@@ -697,6 +730,7 @@ static int kvm_dev_ioctl_get_dirty_log(struct kvm *kvm,
struct kvm_memory_slot *memslot;
int r, i;
int n;
+ int cleared;
unsigned long any = 0;
spin_lock(&kvm->lock);
@@ -727,15 +761,17 @@ static int kvm_dev_ioctl_get_dirty_log(struct kvm *kvm,
if (any) {
- spin_lock(&kvm->lock);
- kvm_mmu_slot_remove_write_access(kvm, log->slot);
- spin_unlock(&kvm->lock);
- memset(memslot->dirty_bitmap, 0, n);
+ cleared = 0;
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
struct kvm_vcpu *vcpu = vcpu_load(kvm, i);
if (!vcpu)
continue;
+ if (!cleared) {
+ do_remove_write_access(vcpu, log->slot);
+ memset(memslot->dirty_bitmap, 0, n);
+ cleared = 1;
+ }
kvm_arch_ops->tlb_flush(vcpu);
vcpu_put(vcpu);
}
@@ -863,6 +899,27 @@ static int emulator_read_emulated(unsigned long addr,
}
}
+static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
+ unsigned long val, int bytes)
+{
+ struct kvm_memory_slot *m;
+ struct page *page;
+ void *virt;
+
+ if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT))
+ return 0;
+ m = gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT);
+ if (!m)
+ return 0;
+ page = gfn_to_page(m, gpa >> PAGE_SHIFT);
+ kvm_mmu_pre_write(vcpu, gpa, bytes);
+ virt = kmap_atomic(page, KM_USER0);
+ memcpy(virt + offset_in_page(gpa), &val, bytes);
+ kunmap_atomic(virt, KM_USER0);
+ kvm_mmu_post_write(vcpu, gpa, bytes);
+ return 1;
+}
+
static int emulator_write_emulated(unsigned long addr,
unsigned long val,
unsigned int bytes,
@@ -874,6 +931,9 @@ static int emulator_write_emulated(unsigned long addr,
if (gpa == UNMAPPED_GVA)
return X86EMUL_PROPAGATE_FAULT;
+ if (emulator_write_phys(vcpu, gpa, val, bytes))
+ return X86EMUL_CONTINUE;
+
vcpu->mmio_needed = 1;
vcpu->mmio_phys_addr = gpa;
vcpu->mmio_size = bytes;
@@ -898,6 +958,30 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
return emulator_write_emulated(addr, new, bytes, ctxt);
}
+#ifdef CONFIG_X86_32
+
+static int emulator_cmpxchg8b_emulated(unsigned long addr,
+ unsigned long old_lo,
+ unsigned long old_hi,
+ unsigned long new_lo,
+ unsigned long new_hi,
+ struct x86_emulate_ctxt *ctxt)
+{
+ static int reported;
+ int r;
+
+ if (!reported) {
+ reported = 1;
+ printk(KERN_WARNING "kvm: emulating exchange8b as write\n");
+ }
+ r = emulator_write_emulated(addr, new_lo, 4, ctxt);
+ if (r != X86EMUL_CONTINUE)
+ return r;
+ return emulator_write_emulated(addr+4, new_hi, 4, ctxt);
+}
+
+#endif
+
static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
{
return kvm_arch_ops->get_segment_base(vcpu, seg);
@@ -905,18 +989,15 @@ static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
{
- spin_lock(&vcpu->kvm->lock);
- vcpu->mmu.inval_page(vcpu, address);
- spin_unlock(&vcpu->kvm->lock);
- kvm_arch_ops->invlpg(vcpu, address);
return X86EMUL_CONTINUE;
}
int emulate_clts(struct kvm_vcpu *vcpu)
{
- unsigned long cr0 = vcpu->cr0;
+ unsigned long cr0;
- cr0 &= ~CR0_TS_MASK;
+ kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
+ cr0 = vcpu->cr0 & ~CR0_TS_MASK;
kvm_arch_ops->set_cr0(vcpu, cr0);
return X86EMUL_CONTINUE;
}
@@ -975,6 +1056,9 @@ struct x86_emulate_ops emulate_ops = {
.read_emulated = emulator_read_emulated,
.write_emulated = emulator_write_emulated,
.cmpxchg_emulated = emulator_cmpxchg_emulated,
+#ifdef CONFIG_X86_32
+ .cmpxchg8b_emulated = emulator_cmpxchg8b_emulated,
+#endif
};
int emulate_instruction(struct kvm_vcpu *vcpu,
@@ -1024,6 +1108,8 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
}
if (r) {
+ if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
+ return EMULATE_DONE;
if (!vcpu->mmio_needed) {
report_emulation_failure(&emulate_ctxt);
return EMULATE_FAIL;
@@ -1069,6 +1155,7 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
{
+ kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
switch (cr) {
case 0:
return vcpu->cr0;
@@ -1403,6 +1490,7 @@ static int kvm_dev_ioctl_get_sregs(struct kvm *kvm, struct kvm_sregs *sregs)
sregs->gdt.limit = dt.limit;
sregs->gdt.base = dt.base;
+ kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
sregs->cr0 = vcpu->cr0;
sregs->cr2 = vcpu->cr2;
sregs->cr3 = vcpu->cr3;
@@ -1467,11 +1555,15 @@ static int kvm_dev_ioctl_set_sregs(struct kvm *kvm, struct kvm_sregs *sregs)
#endif
vcpu->apic_base = sregs->apic_base;
+ kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
+
mmu_reset_needed |= vcpu->cr0 != sregs->cr0;
kvm_arch_ops->set_cr0_no_modeswitch(vcpu, sregs->cr0);
mmu_reset_needed |= vcpu->cr4 != sregs->cr4;
kvm_arch_ops->set_cr4(vcpu, sregs->cr4);
+ if (!is_long_mode(vcpu) && is_pae(vcpu))
+ load_pdptrs(vcpu, vcpu->cr3);
if (mmu_reset_needed)
kvm_mmu_reset_context(vcpu);
@@ -1693,12 +1785,12 @@ static long kvm_dev_ioctl(struct file *filp,
if (copy_from_user(&kvm_run, (void *)arg, sizeof kvm_run))
goto out;
r = kvm_dev_ioctl_run(kvm, &kvm_run);
- if (r < 0)
+ if (r < 0 && r != -EINTR)
goto out;
- r = -EFAULT;
- if (copy_to_user((void *)arg, &kvm_run, sizeof kvm_run))
+ if (copy_to_user((void *)arg, &kvm_run, sizeof kvm_run)) {
+ r = -EFAULT;
goto out;
- r = 0;
+ }
break;
}
case KVM_GET_REGS: {
@@ -1842,6 +1934,7 @@ static long kvm_dev_ioctl(struct file *filp,
num_msrs_to_save * sizeof(u32)))
goto out;
r = 0;
+ break;
}
default:
;
@@ -1944,17 +2037,17 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
return -EEXIST;
}
- kvm_arch_ops = ops;
-
- if (!kvm_arch_ops->cpu_has_kvm_support()) {
+ if (!ops->cpu_has_kvm_support()) {
printk(KERN_ERR "kvm: no hardware support\n");
return -EOPNOTSUPP;
}
- if (kvm_arch_ops->disabled_by_bios()) {
+ if (ops->disabled_by_bios()) {
printk(KERN_ERR "kvm: disabled by bios\n");
return -EOPNOTSUPP;
}
+ kvm_arch_ops = ops;
+
r = kvm_arch_ops->hardware_setup();
if (r < 0)
return r;
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index 790423c5f23d..c6f972914f08 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -26,7 +26,31 @@
#include "vmx.h"
#include "kvm.h"
+#undef MMU_DEBUG
+
+#undef AUDIT
+
+#ifdef AUDIT
+static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg);
+#else
+static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {}
+#endif
+
+#ifdef MMU_DEBUG
+
+#define pgprintk(x...) do { if (dbg) printk(x); } while (0)
+#define rmap_printk(x...) do { if (dbg) printk(x); } while (0)
+
+#else
+
#define pgprintk(x...) do { } while (0)
+#define rmap_printk(x...) do { } while (0)
+
+#endif
+
+#if defined(MMU_DEBUG) || defined(AUDIT)
+static int dbg = 1;
+#endif
#define ASSERT(x) \
if (!(x)) { \
@@ -34,8 +58,10 @@
__FILE__, __LINE__, #x); \
}
-#define PT64_ENT_PER_PAGE 512
-#define PT32_ENT_PER_PAGE 1024
+#define PT64_PT_BITS 9
+#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
+#define PT32_PT_BITS 10
+#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
#define PT_WRITABLE_SHIFT 1
@@ -125,6 +151,13 @@
#define PT_DIRECTORY_LEVEL 2
#define PT_PAGE_TABLE_LEVEL 1
+#define RMAP_EXT 4
+
+struct kvm_rmap_desc {
+ u64 *shadow_ptes[RMAP_EXT];
+ struct kvm_rmap_desc *more;
+};
+
static int is_write_protection(struct kvm_vcpu *vcpu)
{
return vcpu->cr0 & CR0_WP_MASK;
@@ -150,32 +183,272 @@ static int is_io_pte(unsigned long pte)
return pte & PT_SHADOW_IO_MARK;
}
+static int is_rmap_pte(u64 pte)
+{
+ return (pte & (PT_WRITABLE_MASK | PT_PRESENT_MASK))
+ == (PT_WRITABLE_MASK | PT_PRESENT_MASK);
+}
+
+static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
+ size_t objsize, int min)
+{
+ void *obj;
+
+ if (cache->nobjs >= min)
+ return 0;
+ while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
+ obj = kzalloc(objsize, GFP_NOWAIT);
+ if (!obj)
+ return -ENOMEM;
+ cache->objects[cache->nobjs++] = obj;
+ }
+ return 0;
+}
+
+static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
+{
+ while (mc->nobjs)
+ kfree(mc->objects[--mc->nobjs]);
+}
+
+static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
+{
+ int r;
+
+ r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache,
+ sizeof(struct kvm_pte_chain), 4);
+ if (r)
+ goto out;
+ r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache,
+ sizeof(struct kvm_rmap_desc), 1);
+out:
+ return r;
+}
+
+static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
+{
+ mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache);
+ mmu_free_memory_cache(&vcpu->mmu_rmap_desc_cache);
+}
+
+static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
+ size_t size)
+{
+ void *p;
+
+ BUG_ON(!mc->nobjs);
+ p = mc->objects[--mc->nobjs];
+ memset(p, 0, size);
+ return p;
+}
+
+static void mmu_memory_cache_free(struct kvm_mmu_memory_cache *mc, void *obj)
+{
+ if (mc->nobjs < KVM_NR_MEM_OBJS)
+ mc->objects[mc->nobjs++] = obj;
+ else
+ kfree(obj);
+}
+
+static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu)
+{
+ return mmu_memory_cache_alloc(&vcpu->mmu_pte_chain_cache,
+ sizeof(struct kvm_pte_chain));
+}
+
+static void mmu_free_pte_chain(struct kvm_vcpu *vcpu,
+ struct kvm_pte_chain *pc)
+{
+ mmu_memory_cache_free(&vcpu->mmu_pte_chain_cache, pc);
+}
+
+static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu)
+{
+ return mmu_memory_cache_alloc(&vcpu->mmu_rmap_desc_cache,
+ sizeof(struct kvm_rmap_desc));
+}
+
+static void mmu_free_rmap_desc(struct kvm_vcpu *vcpu,
+ struct kvm_rmap_desc *rd)
+{
+ mmu_memory_cache_free(&vcpu->mmu_rmap_desc_cache, rd);
+}
+
+/*
+ * Reverse mapping data structures:
+ *
+ * If page->private bit zero is zero, then page->private points to the
+ * shadow page table entry that points to page_address(page).
+ *
+ * If page->private bit zero is one, (then page->private & ~1) points
+ * to a struct kvm_rmap_desc containing more mappings.
+ */
+static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte)
+{
+ struct page *page;
+ struct kvm_rmap_desc *desc;
+ int i;
+
+ if (!is_rmap_pte(*spte))
+ return;
+ page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
+ if (!page->private) {
+ rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
+ page->private = (unsigned long)spte;
+ } else if (!(page->private & 1)) {
+ rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte);
+ desc = mmu_alloc_rmap_desc(vcpu);
+ desc->shadow_ptes[0] = (u64 *)page->private;
+ desc->shadow_ptes[1] = spte;
+ page->private = (unsigned long)desc | 1;
+ } else {
+ rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
+ desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
+ while (desc->shadow_ptes[RMAP_EXT-1] && desc->more)
+ desc = desc->more;
+ if (desc->shadow_ptes[RMAP_EXT-1]) {
+ desc->more = mmu_alloc_rmap_desc(vcpu);
+ desc = desc->more;
+ }
+ for (i = 0; desc->shadow_ptes[i]; ++i)
+ ;
+ desc->shadow_ptes[i] = spte;
+ }
+}
+
+static void rmap_desc_remove_entry(struct kvm_vcpu *vcpu,
+ struct page *page,
+ struct kvm_rmap_desc *desc,
+ int i,
+ struct kvm_rmap_desc *prev_desc)
+{
+ int j;
+
+ for (j = RMAP_EXT - 1; !desc->shadow_ptes[j] && j > i; --j)
+ ;
+ desc->shadow_ptes[i] = desc->shadow_ptes[j];
+ desc->shadow_ptes[j] = 0;
+ if (j != 0)
+ return;
+ if (!prev_desc && !desc->more)
+ page->private = (unsigned long)desc->shadow_ptes[0];
+ else
+ if (prev_desc)
+ prev_desc->more = desc->more;
+ else
+ page->private = (unsigned long)desc->more | 1;
+ mmu_free_rmap_desc(vcpu, desc);
+}
+
+static void rmap_remove(struct kvm_vcpu *vcpu, u64 *spte)
+{
+ struct page *page;
+ struct kvm_rmap_desc *desc;
+ struct kvm_rmap_desc *prev_desc;
+ int i;
+
+ if (!is_rmap_pte(*spte))
+ return;
+ page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
+ if (!page->private) {
+ printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
+ BUG();
+ } else if (!(page->private & 1)) {
+ rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte);
+ if ((u64 *)page->private != spte) {
+ printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n",
+ spte, *spte);
+ BUG();
+ }
+ page->private = 0;
+ } else {
+ rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte);
+ desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
+ prev_desc = NULL;
+ while (desc) {
+ for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i)
+ if (desc->shadow_ptes[i] == spte) {
+ rmap_desc_remove_entry(vcpu, page,
+ desc, i,
+ prev_desc);
+ return;
+ }
+ prev_desc = desc;
+ desc = desc->more;
+ }
+ BUG();
+ }
+}
+
+static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct page *page;
+ struct kvm_memory_slot *slot;
+ struct kvm_rmap_desc *desc;
+ u64 *spte;
+
+ slot = gfn_to_memslot(kvm, gfn);
+ BUG_ON(!slot);
+ page = gfn_to_page(slot, gfn);
+
+ while (page->private) {
+ if (!(page->private & 1))
+ spte = (u64 *)page->private;
+ else {
+ desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
+ spte = desc->shadow_ptes[0];
+ }
+ BUG_ON(!spte);
+ BUG_ON((*spte & PT64_BASE_ADDR_MASK) !=
+ page_to_pfn(page) << PAGE_SHIFT);
+ BUG_ON(!(*spte & PT_PRESENT_MASK));
+ BUG_ON(!(*spte & PT_WRITABLE_MASK));
+ rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
+ rmap_remove(vcpu, spte);
+ kvm_arch_ops->tlb_flush(vcpu);
+ *spte &= ~(u64)PT_WRITABLE_MASK;
+ }
+}
+
+static int is_empty_shadow_page(hpa_t page_hpa)
+{
+ u64 *pos;
+ u64 *end;
+
+ for (pos = __va(page_hpa), end = pos + PAGE_SIZE / sizeof(u64);
+ pos != end; pos++)
+ if (*pos != 0) {
+ printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__,
+ pos, *pos);
+ return 0;
+ }
+ return 1;
+}
+
static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa)
{
struct kvm_mmu_page *page_head = page_header(page_hpa);
+ ASSERT(is_empty_shadow_page(page_hpa));
list_del(&page_head->link);
page_head->page_hpa = page_hpa;
list_add(&page_head->link, &vcpu->free_pages);
+ ++vcpu->kvm->n_free_mmu_pages;
}
-static int is_empty_shadow_page(hpa_t page_hpa)
+static unsigned kvm_page_table_hashfn(gfn_t gfn)
{
- u32 *pos;
- u32 *end;
- for (pos = __va(page_hpa), end = pos + PAGE_SIZE / sizeof(u32);
- pos != end; pos++)
- if (*pos != 0)
- return 0;
- return 1;
+ return gfn;
}
-static hpa_t kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, u64 *parent_pte)
+static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
+ u64 *parent_pte)
{
struct kvm_mmu_page *page;
if (list_empty(&vcpu->free_pages))
- return INVALID_PAGE;
+ return NULL;
page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link);
list_del(&page->link);
@@ -183,8 +456,239 @@ static hpa_t kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, u64 *parent_pte)
ASSERT(is_empty_shadow_page(page->page_hpa));
page->slot_bitmap = 0;
page->global = 1;
+ page->multimapped = 0;
page->parent_pte = parent_pte;
- return page->page_hpa;
+ --vcpu->kvm->n_free_mmu_pages;
+ return page;
+}
+
+static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu,
+ struct kvm_mmu_page *page, u64 *parent_pte)
+{
+ struct kvm_pte_chain *pte_chain;
+ struct hlist_node *node;
+ int i;
+
+ if (!parent_pte)
+ return;
+ if (!page->multimapped) {
+ u64 *old = page->parent_pte;
+
+ if (!old) {
+ page->parent_pte = parent_pte;
+ return;
+ }
+ page->multimapped = 1;
+ pte_chain = mmu_alloc_pte_chain(vcpu);
+ INIT_HLIST_HEAD(&page->parent_ptes);
+ hlist_add_head(&pte_chain->link, &page->parent_ptes);
+ pte_chain->parent_ptes[0] = old;
+ }
+ hlist_for_each_entry(pte_chain, node, &page->parent_ptes, link) {
+ if (pte_chain->parent_ptes[NR_PTE_CHAIN_ENTRIES-1])
+ continue;
+ for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i)
+ if (!pte_chain->parent_ptes[i]) {
+ pte_chain->parent_ptes[i] = parent_pte;
+ return;
+ }
+ }
+ pte_chain = mmu_alloc_pte_chain(vcpu);
+ BUG_ON(!pte_chain);
+ hlist_add_head(&pte_chain->link, &page->parent_ptes);
+ pte_chain->parent_ptes[0] = parent_pte;
+}
+
+static void mmu_page_remove_parent_pte(struct kvm_vcpu *vcpu,
+ struct kvm_mmu_page *page,
+ u64 *parent_pte)
+{
+ struct kvm_pte_chain *pte_chain;
+ struct hlist_node *node;
+ int i;
+
+ if (!page->multimapped) {
+ BUG_ON(page->parent_pte != parent_pte);
+ page->parent_pte = NULL;
+ return;
+ }
+ hlist_for_each_entry(pte_chain, node, &page->parent_ptes, link)
+ for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
+ if (!pte_chain->parent_ptes[i])
+ break;
+ if (pte_chain->parent_ptes[i] != parent_pte)
+ continue;
+ while (i + 1 < NR_PTE_CHAIN_ENTRIES
+ && pte_chain->parent_ptes[i + 1]) {
+ pte_chain->parent_ptes[i]
+ = pte_chain->parent_ptes[i + 1];
+ ++i;
+ }
+ pte_chain->parent_ptes[i] = NULL;
+ if (i == 0) {
+ hlist_del(&pte_chain->link);
+ mmu_free_pte_chain(vcpu, pte_chain);
+ if (hlist_empty(&page->parent_ptes)) {
+ page->multimapped = 0;
+ page->parent_pte = NULL;
+ }
+ }
+ return;
+ }
+ BUG();
+}
+
+static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm_vcpu *vcpu,
+ gfn_t gfn)
+{
+ unsigned index;
+ struct hlist_head *bucket;
+ struct kvm_mmu_page *page;
+ struct hlist_node *node;
+
+ pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
+ index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
+ bucket = &vcpu->kvm->mmu_page_hash[index];
+ hlist_for_each_entry(page, node, bucket, hash_link)
+ if (page->gfn == gfn && !page->role.metaphysical) {
+ pgprintk("%s: found role %x\n",
+ __FUNCTION__, page->role.word);
+ return page;
+ }
+ return NULL;
+}
+
+static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
+ gfn_t gfn,
+ gva_t gaddr,
+ unsigned level,
+ int metaphysical,
+ u64 *parent_pte)
+{
+ union kvm_mmu_page_role role;
+ unsigned index;
+ unsigned quadrant;
+ struct hlist_head *bucket;
+ struct kvm_mmu_page *page;
+ struct hlist_node *node;
+
+ role.word = 0;
+ role.glevels = vcpu->mmu.root_level;
+ role.level = level;
+ role.metaphysical = metaphysical;
+ if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) {
+ quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
+ quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
+ role.quadrant = quadrant;
+ }
+ pgprintk("%s: looking gfn %lx role %x\n", __FUNCTION__,
+ gfn, role.word);
+ index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
+ bucket = &vcpu->kvm->mmu_page_hash[index];
+ hlist_for_each_entry(page, node, bucket, hash_link)
+ if (page->gfn == gfn && page->role.word == role.word) {
+ mmu_page_add_parent_pte(vcpu, page, parent_pte);
+ pgprintk("%s: found\n", __FUNCTION__);
+ return page;
+ }
+ page = kvm_mmu_alloc_page(vcpu, parent_pte);
+ if (!page)
+ return page;
+ pgprintk("%s: adding gfn %lx role %x\n", __FUNCTION__, gfn, role.word);
+ page->gfn = gfn;
+ page->role = role;
+ hlist_add_head(&page->hash_link, bucket);
+ if (!metaphysical)
+ rmap_write_protect(vcpu, gfn);
+ return page;
+}
+
+static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu,
+ struct kvm_mmu_page *page)
+{
+ unsigned i;
+ u64 *pt;
+ u64 ent;
+
+ pt = __va(page->page_hpa);
+
+ if (page->role.level == PT_PAGE_TABLE_LEVEL) {
+ for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
+ if (pt[i] & PT_PRESENT_MASK)
+ rmap_remove(vcpu, &pt[i]);
+ pt[i] = 0;
+ }
+ kvm_arch_ops->tlb_flush(vcpu);
+ return;
+ }
+
+ for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
+ ent = pt[i];
+
+ pt[i] = 0;
+ if (!(ent & PT_PRESENT_MASK))
+ continue;
+ ent &= PT64_BASE_ADDR_MASK;
+ mmu_page_remove_parent_pte(vcpu, page_header(ent), &pt[i]);
+ }
+}
+
+static void kvm_mmu_put_page(struct kvm_vcpu *vcpu,
+ struct kvm_mmu_page *page,
+ u64 *parent_pte)
+{
+ mmu_page_remove_parent_pte(vcpu, page, parent_pte);
+}
+
+static void kvm_mmu_zap_page(struct kvm_vcpu *vcpu,
+ struct kvm_mmu_page *page)
+{
+ u64 *parent_pte;
+
+ while (page->multimapped || page->parent_pte) {
+ if (!page->multimapped)
+ parent_pte = page->parent_pte;
+ else {
+ struct kvm_pte_chain *chain;
+
+ chain = container_of(page->parent_ptes.first,
+ struct kvm_pte_chain, link);
+ parent_pte = chain->parent_ptes[0];
+ }
+ BUG_ON(!parent_pte);
+ kvm_mmu_put_page(vcpu, page, parent_pte);
+ *parent_pte = 0;
+ }
+ kvm_mmu_page_unlink_children(vcpu, page);
+ if (!page->root_count) {
+ hlist_del(&page->hash_link);
+ kvm_mmu_free_page(vcpu, page->page_hpa);
+ } else {
+ list_del(&page->link);
+ list_add(&page->link, &vcpu->kvm->active_mmu_pages);
+ }
+}
+
+static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+ unsigned index;
+ struct hlist_head *bucket;
+ struct kvm_mmu_page *page;
+ struct hlist_node *node, *n;
+ int r;
+
+ pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
+ r = 0;
+ index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
+ bucket = &vcpu->kvm->mmu_page_hash[index];
+ hlist_for_each_entry_safe(page, node, n, bucket, hash_link)
+ if (page->gfn == gfn && !page->role.metaphysical) {
+ pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn,
+ page->role.word);
+ kvm_mmu_zap_page(vcpu, page);
+ r = 1;
+ }
+ return r;
}
static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa)
@@ -225,35 +729,6 @@ hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva)
return gpa_to_hpa(vcpu, gpa);
}
-
-static void release_pt_page_64(struct kvm_vcpu *vcpu, hpa_t page_hpa,
- int level)
-{
- ASSERT(vcpu);
- ASSERT(VALID_PAGE(page_hpa));
- ASSERT(level <= PT64_ROOT_LEVEL && level > 0);
-
- if (level == 1)
- memset(__va(page_hpa), 0, PAGE_SIZE);
- else {
- u64 *pos;
- u64 *end;
-
- for (pos = __va(page_hpa), end = pos + PT64_ENT_PER_PAGE;
- pos != end; pos++) {
- u64 current_ent = *pos;
-
- *pos = 0;
- if (is_present_pte(current_ent))
- release_pt_page_64(vcpu,
- current_ent &
- PT64_BASE_ADDR_MASK,
- level - 1);
- }
- }
- kvm_mmu_free_page(vcpu, page_hpa);
-}
-
static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
{
}
@@ -266,52 +741,109 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
for (; ; level--) {
u32 index = PT64_INDEX(v, level);
u64 *table;
+ u64 pte;
ASSERT(VALID_PAGE(table_addr));
table = __va(table_addr);
if (level == 1) {
+ pte = table[index];
+ if (is_present_pte(pte) && is_writeble_pte(pte))
+ return 0;
mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT);
page_header_update_slot(vcpu->kvm, table, v);
table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK |
PT_USER_MASK;
+ rmap_add(vcpu, &table[index]);
return 0;
}
if (table[index] == 0) {
- hpa_t new_table = kvm_mmu_alloc_page(vcpu,
- &table[index]);
-
- if (!VALID_PAGE(new_table)) {
+ struct kvm_mmu_page *new_table;
+ gfn_t pseudo_gfn;
+
+ pseudo_gfn = (v & PT64_DIR_BASE_ADDR_MASK)
+ >> PAGE_SHIFT;
+ new_table = kvm_mmu_get_page(vcpu, pseudo_gfn,
+ v, level - 1,
+ 1, &table[index]);
+ if (!new_table) {
pgprintk("nonpaging_map: ENOMEM\n");
return -ENOMEM;
}
- if (level == PT32E_ROOT_LEVEL)
- table[index] = new_table | PT_PRESENT_MASK;
- else
- table[index] = new_table | PT_PRESENT_MASK |
- PT_WRITABLE_MASK | PT_USER_MASK;
+ table[index] = new_table->page_hpa | PT_PRESENT_MASK
+ | PT_WRITABLE_MASK | PT_USER_MASK;
}
table_addr = table[index] & PT64_BASE_ADDR_MASK;
}
}
-static void nonpaging_flush(struct kvm_vcpu *vcpu)
+static void mmu_free_roots(struct kvm_vcpu *vcpu)
{
- hpa_t root = vcpu->mmu.root_hpa;
+ int i;
+ struct kvm_mmu_page *page;
- ++kvm_stat.tlb_flush;
- pgprintk("nonpaging_flush\n");
- ASSERT(VALID_PAGE(root));
- release_pt_page_64(vcpu, root, vcpu->mmu.shadow_root_level);
- root = kvm_mmu_alloc_page(vcpu, NULL);
- ASSERT(VALID_PAGE(root));
- vcpu->mmu.root_hpa = root;
- if (is_paging(vcpu))
- root |= (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK));
- kvm_arch_ops->set_cr3(vcpu, root);
- kvm_arch_ops->tlb_flush(vcpu);
+#ifdef CONFIG_X86_64
+ if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) {
+ hpa_t root = vcpu->mmu.root_hpa;
+
+ ASSERT(VALID_PAGE(root));
+ page = page_header(root);
+ --page->root_count;
+ vcpu->mmu.root_hpa = INVALID_PAGE;
+ return;
+ }
+#endif
+ for (i = 0; i < 4; ++i) {
+ hpa_t root = vcpu->mmu.pae_root[i];
+
+ ASSERT(VALID_PAGE(root));
+ root &= PT64_BASE_ADDR_MASK;
+ page = page_header(root);
+ --page->root_count;
+ vcpu->mmu.pae_root[i] = INVALID_PAGE;
+ }
+ vcpu->mmu.root_hpa = INVALID_PAGE;
+}
+
+static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
+{
+ int i;
+ gfn_t root_gfn;
+ struct kvm_mmu_page *page;
+
+ root_gfn = vcpu->cr3 >> PAGE_SHIFT;
+
+#ifdef CONFIG_X86_64
+ if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) {
+ hpa_t root = vcpu->mmu.root_hpa;
+
+ ASSERT(!VALID_PAGE(root));
+ page = kvm_mmu_get_page(vcpu, root_gfn, 0,
+ PT64_ROOT_LEVEL, 0, NULL);
+ root = page->page_hpa;
+ ++page->root_count;
+ vcpu->mmu.root_hpa = root;
+ return;
+ }
+#endif
+ for (i = 0; i < 4; ++i) {
+ hpa_t root = vcpu->mmu.pae_root[i];
+
+ ASSERT(!VALID_PAGE(root));
+ if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL)
+ root_gfn = vcpu->pdptrs[i] >> PAGE_SHIFT;
+ else if (vcpu->mmu.root_level == 0)
+ root_gfn = 0;
+ page = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
+ PT32_ROOT_LEVEL, !is_paging(vcpu),
+ NULL);
+ root = page->page_hpa;
+ ++page->root_count;
+ vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK;
+ }
+ vcpu->mmu.root_hpa = __pa(vcpu->mmu.pae_root);
}
static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
@@ -322,43 +854,29 @@ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
u32 error_code)
{
- int ret;
gpa_t addr = gva;
+ hpa_t paddr;
+ int r;
+
+ r = mmu_topup_memory_caches(vcpu);
+ if (r)
+ return r;
ASSERT(vcpu);
ASSERT(VALID_PAGE(vcpu->mmu.root_hpa));
- for (;;) {
- hpa_t paddr;
-
- paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK);
- if (is_error_hpa(paddr))
- return 1;
+ paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK);
- ret = nonpaging_map(vcpu, addr & PAGE_MASK, paddr);
- if (ret) {
- nonpaging_flush(vcpu);
- continue;
- }
- break;
- }
- return ret;
-}
+ if (is_error_hpa(paddr))
+ return 1;
-static void nonpaging_inval_page(struct kvm_vcpu *vcpu, gva_t addr)
-{
+ return nonpaging_map(vcpu, addr & PAGE_MASK, paddr);
}
static void nonpaging_free(struct kvm_vcpu *vcpu)
{
- hpa_t root;
-
- ASSERT(vcpu);
- root = vcpu->mmu.root_hpa;
- if (VALID_PAGE(root))
- release_pt_page_64(vcpu, root, vcpu->mmu.shadow_root_level);
- vcpu->mmu.root_hpa = INVALID_PAGE;
+ mmu_free_roots(vcpu);
}
static int nonpaging_init_context(struct kvm_vcpu *vcpu)
@@ -367,40 +885,31 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu)
context->new_cr3 = nonpaging_new_cr3;
context->page_fault = nonpaging_page_fault;
- context->inval_page = nonpaging_inval_page;
context->gva_to_gpa = nonpaging_gva_to_gpa;
context->free = nonpaging_free;
- context->root_level = PT32E_ROOT_LEVEL;
+ context->root_level = 0;
context->shadow_root_level = PT32E_ROOT_LEVEL;
- context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL);
+ mmu_alloc_roots(vcpu);
ASSERT(VALID_PAGE(context->root_hpa));
kvm_arch_ops->set_cr3(vcpu, context->root_hpa);
return 0;
}
-
static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
{
- struct kvm_mmu_page *page, *npage;
-
- list_for_each_entry_safe(page, npage, &vcpu->kvm->active_mmu_pages,
- link) {
- if (page->global)
- continue;
-
- if (!page->parent_pte)
- continue;
-
- *page->parent_pte = 0;
- release_pt_page_64(vcpu, page->page_hpa, 1);
- }
++kvm_stat.tlb_flush;
kvm_arch_ops->tlb_flush(vcpu);
}
static void paging_new_cr3(struct kvm_vcpu *vcpu)
{
+ pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3);
+ mmu_free_roots(vcpu);
+ if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
+ kvm_mmu_free_some_pages(vcpu);
+ mmu_alloc_roots(vcpu);
kvm_mmu_flush_tlb(vcpu);
+ kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa);
}
static void mark_pagetable_nonglobal(void *shadow_pte)
@@ -412,7 +921,8 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu,
u64 *shadow_pte,
gpa_t gaddr,
int dirty,
- u64 access_bits)
+ u64 access_bits,
+ gfn_t gfn)
{
hpa_t paddr;
@@ -420,13 +930,10 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu,
if (!dirty)
access_bits &= ~PT_WRITABLE_MASK;
- if (access_bits & PT_WRITABLE_MASK)
- mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT);
+ paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK);
*shadow_pte |= access_bits;
- paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK);
-
if (!(*shadow_pte & PT_GLOBAL_MASK))
mark_pagetable_nonglobal(shadow_pte);
@@ -434,10 +941,31 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu,
*shadow_pte |= gaddr;
*shadow_pte |= PT_SHADOW_IO_MARK;
*shadow_pte &= ~PT_PRESENT_MASK;
- } else {
- *shadow_pte |= paddr;
- page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
+ return;
+ }
+
+ *shadow_pte |= paddr;
+
+ if (access_bits & PT_WRITABLE_MASK) {
+ struct kvm_mmu_page *shadow;
+
+ shadow = kvm_mmu_lookup_page(vcpu, gfn);
+ if (shadow) {
+ pgprintk("%s: found shadow page for %lx, marking ro\n",
+ __FUNCTION__, gfn);
+ access_bits &= ~PT_WRITABLE_MASK;
+ if (is_writeble_pte(*shadow_pte)) {
+ *shadow_pte &= ~PT_WRITABLE_MASK;
+ kvm_arch_ops->tlb_flush(vcpu);
+ }
+ }
}
+
+ if (access_bits & PT_WRITABLE_MASK)
+ mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT);
+
+ page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
+ rmap_add(vcpu, shadow_pte);
}
static void inject_page_fault(struct kvm_vcpu *vcpu,
@@ -474,41 +1002,6 @@ static int may_access(u64 pte, int write, int user)
return 1;
}
-/*
- * Remove a shadow pte.
- */
-static void paging_inval_page(struct kvm_vcpu *vcpu, gva_t addr)
-{
- hpa_t page_addr = vcpu->mmu.root_hpa;
- int level = vcpu->mmu.shadow_root_level;
-
- ++kvm_stat.invlpg;
-
- for (; ; level--) {
- u32 index = PT64_INDEX(addr, level);
- u64 *table = __va(page_addr);
-
- if (level == PT_PAGE_TABLE_LEVEL ) {
- table[index] = 0;
- return;
- }
-
- if (!is_present_pte(table[index]))
- return;
-
- page_addr = table[index] & PT64_BASE_ADDR_MASK;
-
- if (level == PT_DIRECTORY_LEVEL &&
- (table[index] & PT_SHADOW_PS_MARK)) {
- table[index] = 0;
- release_pt_page_64(vcpu, page_addr, PT_PAGE_TABLE_LEVEL);
-
- kvm_arch_ops->tlb_flush(vcpu);
- return;
- }
- }
-}
-
static void paging_free(struct kvm_vcpu *vcpu)
{
nonpaging_free(vcpu);
@@ -522,37 +1015,40 @@ static void paging_free(struct kvm_vcpu *vcpu)
#include "paging_tmpl.h"
#undef PTTYPE
-static int paging64_init_context(struct kvm_vcpu *vcpu)
+static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
{
struct kvm_mmu *context = &vcpu->mmu;
ASSERT(is_pae(vcpu));
context->new_cr3 = paging_new_cr3;
context->page_fault = paging64_page_fault;
- context->inval_page = paging_inval_page;
context->gva_to_gpa = paging64_gva_to_gpa;
context->free = paging_free;
- context->root_level = PT64_ROOT_LEVEL;
- context->shadow_root_level = PT64_ROOT_LEVEL;
- context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL);
+ context->root_level = level;
+ context->shadow_root_level = level;
+ mmu_alloc_roots(vcpu);
ASSERT(VALID_PAGE(context->root_hpa));
kvm_arch_ops->set_cr3(vcpu, context->root_hpa |
(vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)));
return 0;
}
+static int paging64_init_context(struct kvm_vcpu *vcpu)
+{
+ return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
+}
+
static int paging32_init_context(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *context = &vcpu->mmu;
context->new_cr3 = paging_new_cr3;
context->page_fault = paging32_page_fault;
- context->inval_page = paging_inval_page;
context->gva_to_gpa = paging32_gva_to_gpa;
context->free = paging_free;
context->root_level = PT32_ROOT_LEVEL;
context->shadow_root_level = PT32E_ROOT_LEVEL;
- context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL);
+ mmu_alloc_roots(vcpu);
ASSERT(VALID_PAGE(context->root_hpa));
kvm_arch_ops->set_cr3(vcpu, context->root_hpa |
(vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)));
@@ -561,14 +1057,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
static int paging32E_init_context(struct kvm_vcpu *vcpu)
{
- int ret;
-
- if ((ret = paging64_init_context(vcpu)))
- return ret;
-
- vcpu->mmu.root_level = PT32E_ROOT_LEVEL;
- vcpu->mmu.shadow_root_level = PT32E_ROOT_LEVEL;
- return 0;
+ return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
}
static int init_kvm_mmu(struct kvm_vcpu *vcpu)
@@ -597,41 +1086,161 @@ static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)
int kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
{
+ int r;
+
destroy_kvm_mmu(vcpu);
- return init_kvm_mmu(vcpu);
+ r = init_kvm_mmu(vcpu);
+ if (r < 0)
+ goto out;
+ r = mmu_topup_memory_caches(vcpu);
+out:
+ return r;
}
-static void free_mmu_pages(struct kvm_vcpu *vcpu)
+void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes)
{
- while (!list_empty(&vcpu->free_pages)) {
+ gfn_t gfn = gpa >> PAGE_SHIFT;
+ struct kvm_mmu_page *page;
+ struct kvm_mmu_page *child;
+ struct hlist_node *node, *n;
+ struct hlist_head *bucket;
+ unsigned index;
+ u64 *spte;
+ u64 pte;
+ unsigned offset = offset_in_page(gpa);
+ unsigned pte_size;
+ unsigned page_offset;
+ unsigned misaligned;
+ int level;
+ int flooded = 0;
+
+ pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
+ if (gfn == vcpu->last_pt_write_gfn) {
+ ++vcpu->last_pt_write_count;
+ if (vcpu->last_pt_write_count >= 3)
+ flooded = 1;
+ } else {
+ vcpu->last_pt_write_gfn = gfn;
+ vcpu->last_pt_write_count = 1;
+ }
+ index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
+ bucket = &vcpu->kvm->mmu_page_hash[index];
+ hlist_for_each_entry_safe(page, node, n, bucket, hash_link) {
+ if (page->gfn != gfn || page->role.metaphysical)
+ continue;
+ pte_size = page->role.glevels == PT32_ROOT_LEVEL ? 4 : 8;
+ misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
+ if (misaligned || flooded) {
+ /*
+ * Misaligned accesses are too much trouble to fix
+ * up; also, they usually indicate a page is not used
+ * as a page table.
+ *
+ * If we're seeing too many writes to a page,
+ * it may no longer be a page table, or we may be
+ * forking, in which case it is better to unmap the
+ * page.
+ */
+ pgprintk("misaligned: gpa %llx bytes %d role %x\n",
+ gpa, bytes, page->role.word);
+ kvm_mmu_zap_page(vcpu, page);
+ continue;
+ }
+ page_offset = offset;
+ level = page->role.level;
+ if (page->role.glevels == PT32_ROOT_LEVEL) {
+ page_offset <<= 1; /* 32->64 */
+ page_offset &= ~PAGE_MASK;
+ }
+ spte = __va(page->page_hpa);
+ spte += page_offset / sizeof(*spte);
+ pte = *spte;
+ if (is_present_pte(pte)) {
+ if (level == PT_PAGE_TABLE_LEVEL)
+ rmap_remove(vcpu, spte);
+ else {
+ child = page_header(pte & PT64_BASE_ADDR_MASK);
+ mmu_page_remove_parent_pte(vcpu, child, spte);
+ }
+ }
+ *spte = 0;
+ }
+}
+
+void kvm_mmu_post_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes)
+{
+}
+
+int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
+{
+ gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
+
+ return kvm_mmu_unprotect_page(vcpu, gpa >> PAGE_SHIFT);
+}
+
+void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
+{
+ while (vcpu->kvm->n_free_mmu_pages < KVM_REFILL_PAGES) {
struct kvm_mmu_page *page;
+ page = container_of(vcpu->kvm->active_mmu_pages.prev,
+ struct kvm_mmu_page, link);
+ kvm_mmu_zap_page(vcpu, page);
+ }
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_free_some_pages);
+
+static void free_mmu_pages(struct kvm_vcpu *vcpu)
+{
+ struct kvm_mmu_page *page;
+
+ while (!list_empty(&vcpu->kvm->active_mmu_pages)) {
+ page = container_of(vcpu->kvm->active_mmu_pages.next,
+ struct kvm_mmu_page, link);
+ kvm_mmu_zap_page(vcpu, page);
+ }
+ while (!list_empty(&vcpu->free_pages)) {
page = list_entry(vcpu->free_pages.next,
struct kvm_mmu_page, link);
list_del(&page->link);
__free_page(pfn_to_page(page->page_hpa >> PAGE_SHIFT));
page->page_hpa = INVALID_PAGE;
}
+ free_page((unsigned long)vcpu->mmu.pae_root);
}
static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
{
+ struct page *page;
int i;
ASSERT(vcpu);
for (i = 0; i < KVM_NUM_MMU_PAGES; i++) {
- struct page *page;
struct kvm_mmu_page *page_header = &vcpu->page_header_buf[i];
INIT_LIST_HEAD(&page_header->link);
- if ((page = alloc_page(GFP_KVM_MMU)) == NULL)
+ if ((page = alloc_page(GFP_KERNEL)) == NULL)
goto error_1;
page->private = (unsigned long)page_header;
page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT;
memset(__va(page_header->page_hpa), 0, PAGE_SIZE);
list_add(&page_header->link, &vcpu->free_pages);
+ ++vcpu->kvm->n_free_mmu_pages;
}
+
+ /*
+ * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.
+ * Therefore we need to allocate shadow page tables in the first
+ * 4GB of memory, which happens to fit the DMA32 zone.
+ */
+ page = alloc_page(GFP_KERNEL | __GFP_DMA32);
+ if (!page)
+ goto error_1;
+ vcpu->mmu.pae_root = page_address(page);
+ for (i = 0; i < 4; ++i)
+ vcpu->mmu.pae_root[i] = INVALID_PAGE;
+
return 0;
error_1:
@@ -663,10 +1272,12 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
destroy_kvm_mmu(vcpu);
free_mmu_pages(vcpu);
+ mmu_free_memory_caches(vcpu);
}
-void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
+void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot)
{
+ struct kvm *kvm = vcpu->kvm;
struct kvm_mmu_page *page;
list_for_each_entry(page, &kvm->active_mmu_pages, link) {
@@ -679,8 +1290,169 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
pt = __va(page->page_hpa);
for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
/* avoid RMW */
- if (pt[i] & PT_WRITABLE_MASK)
+ if (pt[i] & PT_WRITABLE_MASK) {
+ rmap_remove(vcpu, &pt[i]);
pt[i] &= ~PT_WRITABLE_MASK;
+ }
+ }
+}
+
+#ifdef AUDIT
+
+static const char *audit_msg;
+
+static gva_t canonicalize(gva_t gva)
+{
+#ifdef CONFIG_X86_64
+ gva = (long long)(gva << 16) >> 16;
+#endif
+ return gva;
+}
+static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
+ gva_t va, int level)
+{
+ u64 *pt = __va(page_pte & PT64_BASE_ADDR_MASK);
+ int i;
+ gva_t va_delta = 1ul << (PAGE_SHIFT + 9 * (level - 1));
+
+ for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) {
+ u64 ent = pt[i];
+
+ if (!ent & PT_PRESENT_MASK)
+ continue;
+
+ va = canonicalize(va);
+ if (level > 1)
+ audit_mappings_page(vcpu, ent, va, level - 1);
+ else {
+ gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, va);
+ hpa_t hpa = gpa_to_hpa(vcpu, gpa);
+
+ if ((ent & PT_PRESENT_MASK)
+ && (ent & PT64_BASE_ADDR_MASK) != hpa)
+ printk(KERN_ERR "audit error: (%s) levels %d"
+ " gva %lx gpa %llx hpa %llx ent %llx\n",
+ audit_msg, vcpu->mmu.root_level,
+ va, gpa, hpa, ent);
+ }
}
}
+
+static void audit_mappings(struct kvm_vcpu *vcpu)
+{
+ int i;
+
+ if (vcpu->mmu.root_level == 4)
+ audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4);
+ else
+ for (i = 0; i < 4; ++i)
+ if (vcpu->mmu.pae_root[i] & PT_PRESENT_MASK)
+ audit_mappings_page(vcpu,
+ vcpu->mmu.pae_root[i],
+ i << 30,
+ 2);
+}
+
+static int count_rmaps(struct kvm_vcpu *vcpu)
+{
+ int nmaps = 0;
+ int i, j, k;
+
+ for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
+ struct kvm_memory_slot *m = &vcpu->kvm->memslots[i];
+ struct kvm_rmap_desc *d;
+
+ for (j = 0; j < m->npages; ++j) {
+ struct page *page = m->phys_mem[j];
+
+ if (!page->private)
+ continue;
+ if (!(page->private & 1)) {
+ ++nmaps;
+ continue;
+ }
+ d = (struct kvm_rmap_desc *)(page->private & ~1ul);
+ while (d) {
+ for (k = 0; k < RMAP_EXT; ++k)
+ if (d->shadow_ptes[k])
+ ++nmaps;
+ else
+ break;
+ d = d->more;
+ }
+ }
+ }
+ return nmaps;
+}
+
+static int count_writable_mappings(struct kvm_vcpu *vcpu)
+{
+ int nmaps = 0;
+ struct kvm_mmu_page *page;
+ int i;
+
+ list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) {
+ u64 *pt = __va(page->page_hpa);
+
+ if (page->role.level != PT_PAGE_TABLE_LEVEL)
+ continue;
+
+ for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
+ u64 ent = pt[i];
+
+ if (!(ent & PT_PRESENT_MASK))
+ continue;
+ if (!(ent & PT_WRITABLE_MASK))
+ continue;
+ ++nmaps;
+ }
+ }
+ return nmaps;
+}
+
+static void audit_rmap(struct kvm_vcpu *vcpu)
+{
+ int n_rmap = count_rmaps(vcpu);
+ int n_actual = count_writable_mappings(vcpu);
+
+ if (n_rmap != n_actual)
+ printk(KERN_ERR "%s: (%s) rmap %d actual %d\n",
+ __FUNCTION__, audit_msg, n_rmap, n_actual);
+}
+
+static void audit_write_protection(struct kvm_vcpu *vcpu)
+{
+ struct kvm_mmu_page *page;
+
+ list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) {
+ hfn_t hfn;
+ struct page *pg;
+
+ if (page->role.metaphysical)
+ continue;
+
+ hfn = gpa_to_hpa(vcpu, (gpa_t)page->gfn << PAGE_SHIFT)
+ >> PAGE_SHIFT;
+ pg = pfn_to_page(hfn);
+ if (pg->private)
+ printk(KERN_ERR "%s: (%s) shadow page has writable"
+ " mappings: gfn %lx role %x\n",
+ __FUNCTION__, audit_msg, page->gfn,
+ page->role.word);
+ }
+}
+
+static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg)
+{
+ int olddbg = dbg;
+
+ dbg = 0;
+ audit_msg = msg;
+ audit_rmap(vcpu);
+ audit_write_protection(vcpu);
+ audit_mappings(vcpu);
+ dbg = olddbg;
+}
+
+#endif
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index 09bb9b4ed12d..2dbf4307ed9e 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -32,6 +32,11 @@
#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
#define PT_PTE_COPY_MASK PT64_PTE_COPY_MASK
+ #ifdef CONFIG_X86_64
+ #define PT_MAX_FULL_LEVELS 4
+ #else
+ #define PT_MAX_FULL_LEVELS 2
+ #endif
#elif PTTYPE == 32
#define pt_element_t u32
#define guest_walker guest_walker32
@@ -42,6 +47,7 @@
#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
#define PT_PTE_COPY_MASK PT32_PTE_COPY_MASK
+ #define PT_MAX_FULL_LEVELS 2
#else
#error Invalid PTTYPE value
#endif
@@ -52,93 +58,126 @@
*/
struct guest_walker {
int level;
+ gfn_t table_gfn[PT_MAX_FULL_LEVELS];
pt_element_t *table;
+ pt_element_t *ptep;
pt_element_t inherited_ar;
+ gfn_t gfn;
};
-static void FNAME(init_walker)(struct guest_walker *walker,
- struct kvm_vcpu *vcpu)
+/*
+ * Fetch a guest pte for a guest virtual address
+ */
+static void FNAME(walk_addr)(struct guest_walker *walker,
+ struct kvm_vcpu *vcpu, gva_t addr)
{
hpa_t hpa;
struct kvm_memory_slot *slot;
+ pt_element_t *ptep;
+ pt_element_t root;
+ gfn_t table_gfn;
+ pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
walker->level = vcpu->mmu.root_level;
- slot = gfn_to_memslot(vcpu->kvm,
- (vcpu->cr3 & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
- hpa = safe_gpa_to_hpa(vcpu, vcpu->cr3 & PT64_BASE_ADDR_MASK);
+ walker->table = NULL;
+ root = vcpu->cr3;
+#if PTTYPE == 64
+ if (!is_long_mode(vcpu)) {
+ walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3];
+ root = *walker->ptep;
+ if (!(root & PT_PRESENT_MASK))
+ return;
+ --walker->level;
+ }
+#endif
+ table_gfn = (root & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
+ walker->table_gfn[walker->level - 1] = table_gfn;
+ pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
+ walker->level - 1, table_gfn);
+ slot = gfn_to_memslot(vcpu->kvm, table_gfn);
+ hpa = safe_gpa_to_hpa(vcpu, root & PT64_BASE_ADDR_MASK);
walker->table = kmap_atomic(pfn_to_page(hpa >> PAGE_SHIFT), KM_USER0);
ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
(vcpu->cr3 & ~(PAGE_MASK | CR3_FLAGS_MASK)) == 0);
- walker->table = (pt_element_t *)( (unsigned long)walker->table |
- (unsigned long)(vcpu->cr3 & ~(PAGE_MASK | CR3_FLAGS_MASK)) );
walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK;
+
+ for (;;) {
+ int index = PT_INDEX(addr, walker->level);
+ hpa_t paddr;
+
+ ptep = &walker->table[index];
+ ASSERT(((unsigned long)walker->table & PAGE_MASK) ==
+ ((unsigned long)ptep & PAGE_MASK));
+
+ if (is_present_pte(*ptep) && !(*ptep & PT_ACCESSED_MASK))
+ *ptep |= PT_ACCESSED_MASK;
+
+ if (!is_present_pte(*ptep))
+ break;
+
+ if (walker->level == PT_PAGE_TABLE_LEVEL) {
+ walker->gfn = (*ptep & PT_BASE_ADDR_MASK)
+ >> PAGE_SHIFT;
+ break;
+ }
+
+ if (walker->level == PT_DIRECTORY_LEVEL
+ && (*ptep & PT_PAGE_SIZE_MASK)
+ && (PTTYPE == 64 || is_pse(vcpu))) {
+ walker->gfn = (*ptep & PT_DIR_BASE_ADDR_MASK)
+ >> PAGE_SHIFT;
+ walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
+ break;
+ }
+
+ if (walker->level != 3 || is_long_mode(vcpu))
+ walker->inherited_ar &= walker->table[index];
+ table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
+ paddr = safe_gpa_to_hpa(vcpu, *ptep & PT_BASE_ADDR_MASK);
+ kunmap_atomic(walker->table, KM_USER0);
+ walker->table = kmap_atomic(pfn_to_page(paddr >> PAGE_SHIFT),
+ KM_USER0);
+ --walker->level;
+ walker->table_gfn[walker->level - 1 ] = table_gfn;
+ pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
+ walker->level - 1, table_gfn);
+ }
+ walker->ptep = ptep;
+ pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep);
}
static void FNAME(release_walker)(struct guest_walker *walker)
{
- kunmap_atomic(walker->table, KM_USER0);
+ if (walker->table)
+ kunmap_atomic(walker->table, KM_USER0);
}
static void FNAME(set_pte)(struct kvm_vcpu *vcpu, u64 guest_pte,
- u64 *shadow_pte, u64 access_bits)
+ u64 *shadow_pte, u64 access_bits, gfn_t gfn)
{
ASSERT(*shadow_pte == 0);
access_bits &= guest_pte;
*shadow_pte = (guest_pte & PT_PTE_COPY_MASK);
set_pte_common(vcpu, shadow_pte, guest_pte & PT_BASE_ADDR_MASK,
- guest_pte & PT_DIRTY_MASK, access_bits);
+ guest_pte & PT_DIRTY_MASK, access_bits, gfn);
}
static void FNAME(set_pde)(struct kvm_vcpu *vcpu, u64 guest_pde,
- u64 *shadow_pte, u64 access_bits,
- int index)
+ u64 *shadow_pte, u64 access_bits, gfn_t gfn)
{
gpa_t gaddr;
ASSERT(*shadow_pte == 0);
access_bits &= guest_pde;
- gaddr = (guest_pde & PT_DIR_BASE_ADDR_MASK) + PAGE_SIZE * index;
+ gaddr = (gpa_t)gfn << PAGE_SHIFT;
if (PTTYPE == 32 && is_cpuid_PSE36())
gaddr |= (guest_pde & PT32_DIR_PSE36_MASK) <<
(32 - PT32_DIR_PSE36_SHIFT);
*shadow_pte = guest_pde & PT_PTE_COPY_MASK;
set_pte_common(vcpu, shadow_pte, gaddr,
- guest_pde & PT_DIRTY_MASK, access_bits);
-}
-
-/*
- * Fetch a guest pte from a specific level in the paging hierarchy.
- */
-static pt_element_t *FNAME(fetch_guest)(struct kvm_vcpu *vcpu,
- struct guest_walker *walker,
- int level,
- gva_t addr)
-{
-
- ASSERT(level > 0 && level <= walker->level);
-
- for (;;) {
- int index = PT_INDEX(addr, walker->level);
- hpa_t paddr;
-
- ASSERT(((unsigned long)walker->table & PAGE_MASK) ==
- ((unsigned long)&walker->table[index] & PAGE_MASK));
- if (level == walker->level ||
- !is_present_pte(walker->table[index]) ||
- (walker->level == PT_DIRECTORY_LEVEL &&
- (walker->table[index] & PT_PAGE_SIZE_MASK) &&
- (PTTYPE == 64 || is_pse(vcpu))))
- return &walker->table[index];
- if (walker->level != 3 || is_long_mode(vcpu))
- walker->inherited_ar &= walker->table[index];
- paddr = safe_gpa_to_hpa(vcpu, walker->table[index] & PT_BASE_ADDR_MASK);
- kunmap_atomic(walker->table, KM_USER0);
- walker->table = kmap_atomic(pfn_to_page(paddr >> PAGE_SHIFT),
- KM_USER0);
- --walker->level;
- }
+ guest_pde & PT_DIRTY_MASK, access_bits, gfn);
}
/*
@@ -150,15 +189,26 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
hpa_t shadow_addr;
int level;
u64 *prev_shadow_ent = NULL;
+ pt_element_t *guest_ent = walker->ptep;
+
+ if (!is_present_pte(*guest_ent))
+ return NULL;
shadow_addr = vcpu->mmu.root_hpa;
level = vcpu->mmu.shadow_root_level;
+ if (level == PT32E_ROOT_LEVEL) {
+ shadow_addr = vcpu->mmu.pae_root[(addr >> 30) & 3];
+ shadow_addr &= PT64_BASE_ADDR_MASK;
+ --level;
+ }
for (; ; level--) {
u32 index = SHADOW_PT_INDEX(addr, level);
u64 *shadow_ent = ((u64 *)__va(shadow_addr)) + index;
- pt_element_t *guest_ent;
+ struct kvm_mmu_page *shadow_page;
u64 shadow_pte;
+ int metaphysical;
+ gfn_t table_gfn;
if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) {
if (level == PT_PAGE_TABLE_LEVEL)
@@ -168,21 +218,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
continue;
}
- if (PTTYPE == 32 && level > PT32_ROOT_LEVEL) {
- ASSERT(level == PT32E_ROOT_LEVEL);
- guest_ent = FNAME(fetch_guest)(vcpu, walker,
- PT32_ROOT_LEVEL, addr);
- } else
- guest_ent = FNAME(fetch_guest)(vcpu, walker,
- level, addr);
-
- if (!is_present_pte(*guest_ent))
- return NULL;
-
- /* Don't set accessed bit on PAE PDPTRs */
- if (vcpu->mmu.root_level != 3 || walker->level != 3)
- *guest_ent |= PT_ACCESSED_MASK;
-
if (level == PT_PAGE_TABLE_LEVEL) {
if (walker->level == PT_DIRECTORY_LEVEL) {
@@ -190,21 +225,30 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
*prev_shadow_ent |= PT_SHADOW_PS_MARK;
FNAME(set_pde)(vcpu, *guest_ent, shadow_ent,
walker->inherited_ar,
- PT_INDEX(addr, PT_PAGE_TABLE_LEVEL));
+ walker->gfn);
} else {
ASSERT(walker->level == PT_PAGE_TABLE_LEVEL);
- FNAME(set_pte)(vcpu, *guest_ent, shadow_ent, walker->inherited_ar);
+ FNAME(set_pte)(vcpu, *guest_ent, shadow_ent,
+ walker->inherited_ar,
+ walker->gfn);
}
return shadow_ent;
}
- shadow_addr = kvm_mmu_alloc_page(vcpu, shadow_ent);
- if (!VALID_PAGE(shadow_addr))
- return ERR_PTR(-ENOMEM);
- shadow_pte = shadow_addr | PT_PRESENT_MASK;
- if (vcpu->mmu.root_level > 3 || level != 3)
- shadow_pte |= PT_ACCESSED_MASK
- | PT_WRITABLE_MASK | PT_USER_MASK;
+ if (level - 1 == PT_PAGE_TABLE_LEVEL
+ && walker->level == PT_DIRECTORY_LEVEL) {
+ metaphysical = 1;
+ table_gfn = (*guest_ent & PT_BASE_ADDR_MASK)
+ >> PAGE_SHIFT;
+ } else {
+ metaphysical = 0;
+ table_gfn = walker->table_gfn[level - 2];
+ }
+ shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
+ metaphysical, shadow_ent);
+ shadow_addr = shadow_page->page_hpa;
+ shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK
+ | PT_WRITABLE_MASK | PT_USER_MASK;
*shadow_ent = shadow_pte;
prev_shadow_ent = shadow_ent;
}
@@ -221,11 +265,13 @@ static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu,
u64 *shadow_ent,
struct guest_walker *walker,
gva_t addr,
- int user)
+ int user,
+ int *write_pt)
{
pt_element_t *guest_ent;
int writable_shadow;
gfn_t gfn;
+ struct kvm_mmu_page *page;
if (is_writeble_pte(*shadow_ent))
return 0;
@@ -250,17 +296,35 @@ static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu,
*shadow_ent &= ~PT_USER_MASK;
}
- guest_ent = FNAME(fetch_guest)(vcpu, walker, PT_PAGE_TABLE_LEVEL, addr);
+ guest_ent = walker->ptep;
if (!is_present_pte(*guest_ent)) {
*shadow_ent = 0;
return 0;
}
- gfn = (*guest_ent & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
+ gfn = walker->gfn;
+
+ if (user) {
+ /*
+ * Usermode page faults won't be for page table updates.
+ */
+ while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) {
+ pgprintk("%s: zap %lx %x\n",
+ __FUNCTION__, gfn, page->role.word);
+ kvm_mmu_zap_page(vcpu, page);
+ }
+ } else if (kvm_mmu_lookup_page(vcpu, gfn)) {
+ pgprintk("%s: found shadow page for %lx, marking ro\n",
+ __FUNCTION__, gfn);
+ *guest_ent |= PT_DIRTY_MASK;
+ *write_pt = 1;
+ return 0;
+ }
mark_page_dirty(vcpu->kvm, gfn);
*shadow_ent |= PT_WRITABLE_MASK;
*guest_ent |= PT_DIRTY_MASK;
+ rmap_add(vcpu, shadow_ent);
return 1;
}
@@ -276,7 +340,8 @@ static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu,
* - normal guest page fault due to the guest pte marked not present, not
* writable, or not executable
*
- * Returns: 1 if we need to emulate the instruction, 0 otherwise
+ * Returns: 1 if we need to emulate the instruction, 0 otherwise, or
+ * a negative value on error.
*/
static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
u32 error_code)
@@ -287,39 +352,47 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
struct guest_walker walker;
u64 *shadow_pte;
int fixed;
+ int write_pt = 0;
+ int r;
+
+ pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code);
+ kvm_mmu_audit(vcpu, "pre page fault");
+
+ r = mmu_topup_memory_caches(vcpu);
+ if (r)
+ return r;
/*
* Look up the shadow pte for the faulting address.
*/
- for (;;) {
- FNAME(init_walker)(&walker, vcpu);
- shadow_pte = FNAME(fetch)(vcpu, addr, &walker);
- if (IS_ERR(shadow_pte)) { /* must be -ENOMEM */
- nonpaging_flush(vcpu);
- FNAME(release_walker)(&walker);
- continue;
- }
- break;
- }
+ FNAME(walk_addr)(&walker, vcpu, addr);
+ shadow_pte = FNAME(fetch)(vcpu, addr, &walker);
/*
* The page is not mapped by the guest. Let the guest handle it.
*/
if (!shadow_pte) {
+ pgprintk("%s: not mapped\n", __FUNCTION__);
inject_page_fault(vcpu, addr, error_code);
FNAME(release_walker)(&walker);
return 0;
}
+ pgprintk("%s: shadow pte %p %llx\n", __FUNCTION__,
+ shadow_pte, *shadow_pte);
+
/*
* Update the shadow pte.
*/
if (write_fault)
fixed = FNAME(fix_write_pf)(vcpu, shadow_pte, &walker, addr,
- user_fault);
+ user_fault, &write_pt);
else
fixed = fix_read_pf(shadow_pte);
+ pgprintk("%s: updated shadow pte %p %llx\n", __FUNCTION__,
+ shadow_pte, *shadow_pte);
+
FNAME(release_walker)(&walker);
/*
@@ -331,20 +404,23 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
pgprintk("%s: io work, no access\n", __FUNCTION__);
inject_page_fault(vcpu, addr,
error_code | PFERR_PRESENT_MASK);
+ kvm_mmu_audit(vcpu, "post page fault (io)");
return 0;
}
/*
* pte not present, guest page fault.
*/
- if (pte_present && !fixed) {
+ if (pte_present && !fixed && !write_pt) {
inject_page_fault(vcpu, addr, error_code);
+ kvm_mmu_audit(vcpu, "post page fault (guest)");
return 0;
}
++kvm_stat.pf_fixed;
+ kvm_mmu_audit(vcpu, "post page fault (fixed)");
- return 0;
+ return write_pt;
}
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
@@ -353,9 +429,8 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
pt_element_t guest_pte;
gpa_t gpa;
- FNAME(init_walker)(&walker, vcpu);
- guest_pte = *FNAME(fetch_guest)(vcpu, &walker, PT_PAGE_TABLE_LEVEL,
- vaddr);
+ FNAME(walk_addr)(&walker, vcpu, vaddr);
+ guest_pte = *walker.ptep;
FNAME(release_walker)(&walker);
if (!is_present_pte(guest_pte))
@@ -389,3 +464,4 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
#undef PT_PTE_COPY_MASK
#undef PT_NON_PTE_COPY_MASK
#undef PT_DIR_BASE_ADDR_MASK
+#undef PT_MAX_FULL_LEVELS
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index fa0428735717..ccc06b1b91b5 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -235,6 +235,8 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
vcpu->rip = vcpu->svm->vmcb->save.rip = vcpu->svm->next_rip;
vcpu->svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
+
+ vcpu->interrupt_window_open = 1;
}
static int has_svm(void)
@@ -495,7 +497,6 @@ static void init_vmcb(struct vmcb *vmcb)
/* (1ULL << INTERCEPT_SELECTIVE_CR0) | */
(1ULL << INTERCEPT_CPUID) |
(1ULL << INTERCEPT_HLT) |
- (1ULL << INTERCEPT_INVLPG) |
(1ULL << INTERCEPT_INVLPGA) |
(1ULL << INTERCEPT_IOIO_PROT) |
(1ULL << INTERCEPT_MSR_PROT) |
@@ -700,6 +701,10 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
vcpu->svm->vmcb->save.gdtr.base = dt->base ;
}
+static void svm_decache_cr0_cr4_guest_bits(struct kvm_vcpu *vcpu)
+{
+}
+
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
#ifdef CONFIG_X86_64
@@ -847,6 +852,7 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
u64 fault_address;
u32 error_code;
enum emulation_result er;
+ int r;
if (is_external_interrupt(exit_int_info))
push_irq(vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
@@ -855,7 +861,12 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
fault_address = vcpu->svm->vmcb->control.exit_info_2;
error_code = vcpu->svm->vmcb->control.exit_info_1;
- if (!vcpu->mmu.page_fault(vcpu, fault_address, error_code)) {
+ r = kvm_mmu_page_fault(vcpu, fault_address, error_code);
+ if (r < 0) {
+ spin_unlock(&vcpu->kvm->lock);
+ return r;
+ }
+ if (!r) {
spin_unlock(&vcpu->kvm->lock);
return 1;
}
@@ -1031,10 +1042,11 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1;
skip_emulated_instruction(vcpu);
- if (vcpu->irq_summary && (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF))
+ if (vcpu->irq_summary)
return 1;
kvm_run->exit_reason = KVM_EXIT_HLT;
+ ++kvm_stat.halt_exits;
return 0;
}
@@ -1186,6 +1198,23 @@ static int msr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return rdmsr_interception(vcpu, kvm_run);
}
+static int interrupt_window_interception(struct kvm_vcpu *vcpu,
+ struct kvm_run *kvm_run)
+{
+ /*
+ * If the user space waits to inject interrupts, exit as soon as
+ * possible
+ */
+ if (kvm_run->request_interrupt_window &&
+ !vcpu->irq_summary) {
+ ++kvm_stat.irq_window_exits;
+ kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
+ return 0;
+ }
+
+ return 1;
+}
+
static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run) = {
[SVM_EXIT_READ_CR0] = emulate_on_interception,
@@ -1210,6 +1239,7 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu,
[SVM_EXIT_NMI] = nop_on_interception,
[SVM_EXIT_SMI] = nop_on_interception,
[SVM_EXIT_INIT] = nop_on_interception,
+ [SVM_EXIT_VINTR] = interrupt_window_interception,
/* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */
[SVM_EXIT_CPUID] = cpuid_interception,
[SVM_EXIT_HLT] = halt_interception,
@@ -1278,15 +1308,11 @@ static void pre_svm_run(struct kvm_vcpu *vcpu)
}
-static inline void kvm_try_inject_irq(struct kvm_vcpu *vcpu)
+static inline void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
{
struct vmcb_control_area *control;
- if (!vcpu->irq_summary)
- return;
-
control = &vcpu->svm->vmcb->control;
-
control->int_vector = pop_irq(vcpu);
control->int_ctl &= ~V_INTR_PRIO_MASK;
control->int_ctl |= V_IRQ_MASK |
@@ -1301,6 +1327,59 @@ static void kvm_reput_irq(struct kvm_vcpu *vcpu)
control->int_ctl &= ~V_IRQ_MASK;
push_irq(vcpu, control->int_vector);
}
+
+ vcpu->interrupt_window_open =
+ !(control->int_state & SVM_INTERRUPT_SHADOW_MASK);
+}
+
+static void do_interrupt_requests(struct kvm_vcpu *vcpu,
+ struct kvm_run *kvm_run)
+{
+ struct vmcb_control_area *control = &vcpu->svm->vmcb->control;
+
+ vcpu->interrupt_window_open =
+ (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
+ (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
+
+ if (vcpu->interrupt_window_open && vcpu->irq_summary)
+ /*
+ * If interrupts enabled, and not blocked by sti or mov ss. Good.
+ */
+ kvm_do_inject_irq(vcpu);
+
+ /*
+ * Interrupts blocked. Wait for unblock.
+ */
+ if (!vcpu->interrupt_window_open &&
+ (vcpu->irq_summary || kvm_run->request_interrupt_window)) {
+ control->intercept |= 1ULL << INTERCEPT_VINTR;
+ } else
+ control->intercept &= ~(1ULL << INTERCEPT_VINTR);
+}
+
+static void post_kvm_run_save(struct kvm_vcpu *vcpu,
+ struct kvm_run *kvm_run)
+{
+ kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
+ vcpu->irq_summary == 0);
+ kvm_run->if_flag = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0;
+ kvm_run->cr8 = vcpu->cr8;
+ kvm_run->apic_base = vcpu->apic_base;
+}
+
+/*
+ * Check if userspace requested an interrupt window, and that the
+ * interrupt window is open.
+ *
+ * No need to exit to userspace if we already have an interrupt queued.
+ */
+static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
+ struct kvm_run *kvm_run)
+{
+ return (!vcpu->irq_summary &&
+ kvm_run->request_interrupt_window &&
+ vcpu->interrupt_window_open &&
+ (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
}
static void save_db_regs(unsigned long *db_regs)
@@ -1324,9 +1403,10 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
u16 fs_selector;
u16 gs_selector;
u16 ldt_selector;
+ int r;
again:
- kvm_try_inject_irq(vcpu);
+ do_interrupt_requests(vcpu, kvm_run);
clgi();
@@ -1487,18 +1567,28 @@ again:
if (vcpu->svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY;
kvm_run->exit_reason = vcpu->svm->vmcb->control.exit_code;
+ post_kvm_run_save(vcpu, kvm_run);
return 0;
}
- if (handle_exit(vcpu, kvm_run)) {
+ r = handle_exit(vcpu, kvm_run);
+ if (r > 0) {
if (signal_pending(current)) {
++kvm_stat.signal_exits;
+ post_kvm_run_save(vcpu, kvm_run);
+ return -EINTR;
+ }
+
+ if (dm_request_for_irq_injection(vcpu, kvm_run)) {
+ ++kvm_stat.request_irq_exits;
+ post_kvm_run_save(vcpu, kvm_run);
return -EINTR;
}
kvm_resched(vcpu);
goto again;
}
- return 0;
+ post_kvm_run_save(vcpu, kvm_run);
+ return r;
}
static void svm_flush_tlb(struct kvm_vcpu *vcpu)
@@ -1565,6 +1655,7 @@ static struct kvm_arch_ops svm_arch_ops = {
.get_segment = svm_get_segment,
.set_segment = svm_set_segment,
.get_cs_db_l_bits = svm_get_cs_db_l_bits,
+ .decache_cr0_cr4_guest_bits = svm_decache_cr0_cr4_guest_bits,
.set_cr0 = svm_set_cr0,
.set_cr0_no_modeswitch = svm_set_cr0,
.set_cr3 = svm_set_cr3,
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index d0a2c2d5342a..d4701cb4c654 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -116,7 +116,7 @@ static void vmcs_clear(struct vmcs *vmcs)
static void __vcpu_clear(void *arg)
{
struct kvm_vcpu *vcpu = arg;
- int cpu = smp_processor_id();
+ int cpu = raw_smp_processor_id();
if (vcpu->cpu == cpu)
vmcs_clear(vcpu->vmcs);
@@ -152,15 +152,21 @@ static u64 vmcs_read64(unsigned long field)
#endif
}
+static noinline void vmwrite_error(unsigned long field, unsigned long value)
+{
+ printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n",
+ field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
+ dump_stack();
+}
+
static void vmcs_writel(unsigned long field, unsigned long value)
{
u8 error;
asm volatile (ASM_VMX_VMWRITE_RAX_RDX "; setna %0"
: "=q"(error) : "a"(value), "d"(field) : "cc" );
- if (error)
- printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n",
- field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
+ if (unlikely(error))
+ vmwrite_error(field, value);
}
static void vmcs_write16(unsigned long field, u16 value)
@@ -263,6 +269,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
if (interruptibility & 3)
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
interruptibility & ~3);
+ vcpu->interrupt_window_open = 1;
}
static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code)
@@ -541,7 +548,7 @@ static struct vmcs *alloc_vmcs_cpu(int cpu)
static struct vmcs *alloc_vmcs(void)
{
- return alloc_vmcs_cpu(smp_processor_id());
+ return alloc_vmcs_cpu(raw_smp_processor_id());
}
static void free_vmcs(struct vmcs *vmcs)
@@ -736,6 +743,15 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
#endif
+static void vmx_decache_cr0_cr4_guest_bits(struct kvm_vcpu *vcpu)
+{
+ vcpu->cr0 &= KVM_GUEST_CR0_MASK;
+ vcpu->cr0 |= vmcs_readl(GUEST_CR0) & ~KVM_GUEST_CR0_MASK;
+
+ vcpu->cr4 &= KVM_GUEST_CR4_MASK;
+ vcpu->cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK;
+}
+
static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
if (vcpu->rmode.active && (cr0 & CR0_PE_MASK))
@@ -1011,8 +1027,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
vmcs_writel(GUEST_RIP, 0xfff0);
vmcs_writel(GUEST_RSP, 0);
- vmcs_writel(GUEST_CR3, 0);
-
//todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0
vmcs_writel(GUEST_DR7, 0x400);
@@ -1049,7 +1063,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
| CPU_BASED_CR8_LOAD_EXITING /* 20.6.2 */
| CPU_BASED_CR8_STORE_EXITING /* 20.6.2 */
| CPU_BASED_UNCOND_IO_EXITING /* 20.6.2 */
- | CPU_BASED_INVDPG_EXITING
| CPU_BASED_MOV_DR_EXITING
| CPU_BASED_USE_TSC_OFFSETING /* 21.3 */
);
@@ -1094,14 +1107,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
rdmsrl(MSR_IA32_SYSENTER_EIP, a);
vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */
- ret = -ENOMEM;
- vcpu->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!vcpu->guest_msrs)
- goto out;
- vcpu->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!vcpu->host_msrs)
- goto out_free_guest_msrs;
-
for (i = 0; i < NR_VMX_MSR; ++i) {
u32 index = vmx_msr_index[i];
u32 data_low, data_high;
@@ -1155,8 +1160,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
return 0;
-out_free_guest_msrs:
- kfree(vcpu->guest_msrs);
out:
return ret;
}
@@ -1224,21 +1227,34 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
}
-static void kvm_try_inject_irq(struct kvm_vcpu *vcpu)
+
+static void do_interrupt_requests(struct kvm_vcpu *vcpu,
+ struct kvm_run *kvm_run)
{
- if ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)
- && (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0)
+ u32 cpu_based_vm_exec_control;
+
+ vcpu->interrupt_window_open =
+ ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
+ (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
+
+ if (vcpu->interrupt_window_open &&
+ vcpu->irq_summary &&
+ !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK))
/*
- * Interrupts enabled, and not blocked by sti or mov ss. Good.
+ * If interrupts enabled, and not blocked by sti or mov ss. Good.
*/
kvm_do_inject_irq(vcpu);
- else
+
+ cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ if (!vcpu->interrupt_window_open &&
+ (vcpu->irq_summary || kvm_run->request_interrupt_window))
/*
* Interrupts blocked. Wait for unblock.
*/
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
- vmcs_read32(CPU_BASED_VM_EXEC_CONTROL)
- | CPU_BASED_VIRTUAL_INTR_PENDING);
+ cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
+ else
+ cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
+ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
}
static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu)
@@ -1277,6 +1293,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
unsigned long cr2, rip;
u32 vect_info;
enum emulation_result er;
+ int r;
vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
@@ -1305,7 +1322,12 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
cr2 = vmcs_readl(EXIT_QUALIFICATION);
spin_lock(&vcpu->kvm->lock);
- if (!vcpu->mmu.page_fault(vcpu, cr2, error_code)) {
+ r = kvm_mmu_page_fault(vcpu, cr2, error_code);
+ if (r < 0) {
+ spin_unlock(&vcpu->kvm->lock);
+ return r;
+ }
+ if (!r) {
spin_unlock(&vcpu->kvm->lock);
return 1;
}
@@ -1425,17 +1447,6 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 0;
}
-static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
- u64 address = vmcs_read64(EXIT_QUALIFICATION);
- int instruction_length = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
- spin_lock(&vcpu->kvm->lock);
- vcpu->mmu.inval_page(vcpu, address);
- spin_unlock(&vcpu->kvm->lock);
- vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP) + instruction_length);
- return 1;
-}
-
static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
u64 exit_qualification;
@@ -1575,23 +1586,40 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1;
}
+static void post_kvm_run_save(struct kvm_vcpu *vcpu,
+ struct kvm_run *kvm_run)
+{
+ kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) != 0;
+ kvm_run->cr8 = vcpu->cr8;
+ kvm_run->apic_base = vcpu->apic_base;
+ kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
+ vcpu->irq_summary == 0);
+}
+
static int handle_interrupt_window(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
- /* Turn off interrupt window reporting. */
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
- vmcs_read32(CPU_BASED_VM_EXEC_CONTROL)
- & ~CPU_BASED_VIRTUAL_INTR_PENDING);
+ /*
+ * If the user space waits to inject interrupts, exit as soon as
+ * possible
+ */
+ if (kvm_run->request_interrupt_window &&
+ !vcpu->irq_summary) {
+ kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
+ ++kvm_stat.irq_window_exits;
+ return 0;
+ }
return 1;
}
static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
skip_emulated_instruction(vcpu);
- if (vcpu->irq_summary && (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF))
+ if (vcpu->irq_summary)
return 1;
kvm_run->exit_reason = KVM_EXIT_HLT;
+ ++kvm_stat.halt_exits;
return 0;
}
@@ -1605,7 +1633,6 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
[EXIT_REASON_EXCEPTION_NMI] = handle_exception,
[EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
[EXIT_REASON_IO_INSTRUCTION] = handle_io,
- [EXIT_REASON_INVLPG] = handle_invlpg,
[EXIT_REASON_CR_ACCESS] = handle_cr,
[EXIT_REASON_DR_ACCESS] = handle_dr,
[EXIT_REASON_CPUID] = handle_cpuid,
@@ -1642,11 +1669,27 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
return 0;
}
+/*
+ * Check if userspace requested an interrupt window, and that the
+ * interrupt window is open.
+ *
+ * No need to exit to userspace if we already have an interrupt queued.
+ */
+static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
+ struct kvm_run *kvm_run)
+{
+ return (!vcpu->irq_summary &&
+ kvm_run->request_interrupt_window &&
+ vcpu->interrupt_window_open &&
+ (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
+}
+
static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
u8 fail;
u16 fs_sel, gs_sel, ldt_sel;
int fs_gs_ldt_reload_needed;
+ int r;
again:
/*
@@ -1673,9 +1716,7 @@ again:
vmcs_writel(HOST_GS_BASE, segment_base(gs_sel));
#endif
- if (vcpu->irq_summary &&
- !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK))
- kvm_try_inject_irq(vcpu);
+ do_interrupt_requests(vcpu, kvm_run);
if (vcpu->guest_debug.enabled)
kvm_guest_debug_pre(vcpu);
@@ -1812,6 +1853,7 @@ again:
fx_save(vcpu->guest_fx_image);
fx_restore(vcpu->host_fx_image);
+ vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
#ifndef CONFIG_X86_64
asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
@@ -1821,6 +1863,7 @@ again:
if (fail) {
kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY;
kvm_run->exit_reason = vmcs_read32(VM_INSTRUCTION_ERROR);
+ r = 0;
} else {
if (fs_gs_ldt_reload_needed) {
load_ldt(ldt_sel);
@@ -1840,17 +1883,28 @@ again:
}
vcpu->launched = 1;
kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT;
- if (kvm_handle_exit(kvm_run, vcpu)) {
+ r = kvm_handle_exit(kvm_run, vcpu);
+ if (r > 0) {
/* Give scheduler a change to reschedule. */
if (signal_pending(current)) {
++kvm_stat.signal_exits;
+ post_kvm_run_save(vcpu, kvm_run);
+ return -EINTR;
+ }
+
+ if (dm_request_for_irq_injection(vcpu, kvm_run)) {
+ ++kvm_stat.request_irq_exits;
+ post_kvm_run_save(vcpu, kvm_run);
return -EINTR;
}
+
kvm_resched(vcpu);
goto again;
}
}
- return 0;
+
+ post_kvm_run_save(vcpu, kvm_run);
+ return r;
}
static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
@@ -1906,13 +1960,33 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
{
struct vmcs *vmcs;
+ vcpu->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!vcpu->guest_msrs)
+ return -ENOMEM;
+
+ vcpu->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!vcpu->host_msrs)
+ goto out_free_guest_msrs;
+
vmcs = alloc_vmcs();
if (!vmcs)
- return -ENOMEM;
+ goto out_free_msrs;
+
vmcs_clear(vmcs);
vcpu->vmcs = vmcs;
vcpu->launched = 0;
+
return 0;
+
+out_free_msrs:
+ kfree(vcpu->host_msrs);
+ vcpu->host_msrs = NULL;
+
+out_free_guest_msrs:
+ kfree(vcpu->guest_msrs);
+ vcpu->guest_msrs = NULL;
+
+ return -ENOMEM;
}
static struct kvm_arch_ops vmx_arch_ops = {
@@ -1936,6 +2010,7 @@ static struct kvm_arch_ops vmx_arch_ops = {
.get_segment = vmx_get_segment,
.set_segment = vmx_set_segment,
.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
+ .decache_cr0_cr4_guest_bits = vmx_decache_cr0_cr4_guest_bits,
.set_cr0 = vmx_set_cr0,
.set_cr0_no_modeswitch = vmx_set_cr0_no_modeswitch,
.set_cr3 = vmx_set_cr3,
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index 1bff3e925fda..be70795b4822 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -1323,7 +1323,7 @@ twobyte_special_insn:
ctxt)) != 0))
goto done;
if ((old_lo != _regs[VCPU_REGS_RAX])
- || (old_hi != _regs[VCPU_REGS_RDI])) {
+ || (old_hi != _regs[VCPU_REGS_RDX])) {
_regs[VCPU_REGS_RAX] = old_lo;
_regs[VCPU_REGS_RDX] = old_hi;
_eflags &= ~EFLG_ZF;
diff --git a/drivers/leds/leds-s3c24xx.c b/drivers/leds/leds-s3c24xx.c
index fb1edc1c9edb..50914439d861 100644
--- a/drivers/leds/leds-s3c24xx.c
+++ b/drivers/leds/leds-s3c24xx.c
@@ -16,7 +16,7 @@
#include <linux/platform_device.h>
#include <linux/leds.h>
-#include <asm/arch/hardware.h>
+#include <asm/hardware.h>
#include <asm/arch/regs-gpio.h>
#include <asm/arch/leds-gpio.h>
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index c8558d4ed506..8ca75e52f637 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -44,6 +44,7 @@
#include <linux/sysdev.h>
#include <linux/freezer.h>
#include <linux/syscalls.h>
+#include <linux/suspend.h>
#include <linux/cpu.h>
#include <asm/prom.h>
#include <asm/machdep.h>
diff --git a/drivers/net/Space.c b/drivers/net/Space.c
index 602ed31a5dd9..9305eb9b1b98 100644
--- a/drivers/net/Space.c
+++ b/drivers/net/Space.c
@@ -349,22 +349,11 @@ static void __init trif_probe2(int unit)
#endif
-/*
- * The loopback device is global so it can be directly referenced
- * by the network code. Also, it must be first on device list.
- */
-extern int loopback_init(void);
-
/* Statically configured drivers -- order matters here. */
static int __init net_olddevs_init(void)
{
int num;
- if (loopback_init()) {
- printk(KERN_ERR "Network loopback device setup failed\n");
- }
-
-
#ifdef CONFIG_SBNI
for (num = 0; num < 8; ++num)
sbni_probe(num);
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index c26a4b8e552a..ca2b21f9d444 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -154,8 +154,8 @@ static int ifb_xmit(struct sk_buff *skb, struct net_device *dev)
int ret = 0;
u32 from = G_TC_FROM(skb->tc_verd);
- stats->tx_packets++;
- stats->tx_bytes+=skb->len;
+ stats->rx_packets++;
+ stats->rx_bytes+=skb->len;
if (!from || !skb->input_dev) {
dropped:
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 82c10dec1b5a..2b739fd584f1 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -229,9 +229,11 @@ struct net_device loopback_dev = {
};
/* Setup and register the loopback device. */
-int __init loopback_init(void)
+static int __init loopback_init(void)
{
return register_netdev(&loopback_dev);
};
+module_init(loopback_init);
+
EXPORT_SYMBOL(loopback_dev);
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 785e4a535f9e..616be8d0fa85 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -90,7 +90,8 @@
#define ADVERTISE_MASK (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | \
SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | \
- SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full)
+ SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full | \
+ SUPPORTED_Pause | SUPPORTED_Autoneg)
#define DRV_NAME "sungem"
#define DRV_VERSION "0.98"
diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c
index 49800b25907d..d21991ee88c4 100644
--- a/drivers/net/sungem_phy.c
+++ b/drivers/net/sungem_phy.c
@@ -3,10 +3,9 @@
*
* This file could be shared with other drivers.
*
- * (c) 2002, Benjamin Herrenscmidt (benh@kernel.crashing.org)
+ * (c) 2002-2007, Benjamin Herrenscmidt (benh@kernel.crashing.org)
*
* TODO:
- * - Implement WOL
* - Add support for PHYs that provide an IRQ line
* - Eventually moved the entire polling state machine in
* there (out of the eth driver), so that it can easily be
@@ -152,6 +151,44 @@ static int bcm5221_suspend(struct mii_phy* phy)
return 0;
}
+static int bcm5241_init(struct mii_phy* phy)
+{
+ u16 data;
+
+ data = phy_read(phy, MII_BCM5221_TEST);
+ phy_write(phy, MII_BCM5221_TEST,
+ data | MII_BCM5221_TEST_ENABLE_SHADOWS);
+
+ data = phy_read(phy, MII_BCM5221_SHDOW_AUX_STAT2);
+ phy_write(phy, MII_BCM5221_SHDOW_AUX_STAT2,
+ data | MII_BCM5221_SHDOW_AUX_STAT2_APD);
+
+ data = phy_read(phy, MII_BCM5221_SHDOW_AUX_MODE4);
+ phy_write(phy, MII_BCM5221_SHDOW_AUX_MODE4,
+ data & ~MII_BCM5241_SHDOW_AUX_MODE4_STANDBYPWR);
+
+ data = phy_read(phy, MII_BCM5221_TEST);
+ phy_write(phy, MII_BCM5221_TEST,
+ data & ~MII_BCM5221_TEST_ENABLE_SHADOWS);
+
+ return 0;
+}
+
+static int bcm5241_suspend(struct mii_phy* phy)
+{
+ u16 data;
+
+ data = phy_read(phy, MII_BCM5221_TEST);
+ phy_write(phy, MII_BCM5221_TEST,
+ data | MII_BCM5221_TEST_ENABLE_SHADOWS);
+
+ data = phy_read(phy, MII_BCM5221_SHDOW_AUX_MODE4);
+ phy_write(phy, MII_BCM5221_SHDOW_AUX_MODE4,
+ data | MII_BCM5241_SHDOW_AUX_MODE4_STANDBYPWR);
+
+ return 0;
+}
+
static int bcm5400_init(struct mii_phy* phy)
{
u16 data;
@@ -373,6 +410,10 @@ static int bcm54xx_setup_aneg(struct mii_phy *phy, u32 advertise)
adv |= ADVERTISE_100HALF;
if (advertise & ADVERTISED_100baseT_Full)
adv |= ADVERTISE_100FULL;
+ if (advertise & ADVERTISED_Pause)
+ adv |= ADVERTISE_PAUSE_CAP;
+ if (advertise & ADVERTISED_Asym_Pause)
+ adv |= ADVERTISE_PAUSE_ASYM;
phy_write(phy, MII_ADVERTISE, adv);
/* Setup 1000BT advertise */
@@ -436,12 +477,15 @@ static int bcm54xx_read_link(struct mii_phy *phy)
val = phy_read(phy, MII_BCM5400_AUXSTATUS);
link_mode = ((val & MII_BCM5400_AUXSTATUS_LINKMODE_MASK) >>
MII_BCM5400_AUXSTATUS_LINKMODE_SHIFT);
- phy->duplex = phy_BCM5400_link_table[link_mode][0] ? DUPLEX_FULL : DUPLEX_HALF;
+ phy->duplex = phy_BCM5400_link_table[link_mode][0] ?
+ DUPLEX_FULL : DUPLEX_HALF;
phy->speed = phy_BCM5400_link_table[link_mode][2] ?
SPEED_1000 :
- (phy_BCM5400_link_table[link_mode][1] ? SPEED_100 : SPEED_10);
+ (phy_BCM5400_link_table[link_mode][1] ?
+ SPEED_100 : SPEED_10);
val = phy_read(phy, MII_LPA);
- phy->pause = ((val & LPA_PAUSE) != 0);
+ phy->pause = (phy->duplex == DUPLEX_FULL) &&
+ ((val & LPA_PAUSE) != 0);
}
/* On non-aneg, we assume what we put in BMCR is the speed,
* though magic-aneg shouldn't prevent this case from occurring
@@ -450,6 +494,28 @@ static int bcm54xx_read_link(struct mii_phy *phy)
return 0;
}
+static int marvell88e1111_init(struct mii_phy* phy)
+{
+ u16 rev;
+
+ /* magic init sequence for rev 0 */
+ rev = phy_read(phy, MII_PHYSID2) & 0x000f;
+ if (rev == 0) {
+ phy_write(phy, 0x1d, 0x000a);
+ phy_write(phy, 0x1e, 0x0821);
+
+ phy_write(phy, 0x1d, 0x0006);
+ phy_write(phy, 0x1e, 0x8600);
+
+ phy_write(phy, 0x1d, 0x000b);
+ phy_write(phy, 0x1e, 0x0100);
+
+ phy_write(phy, 0x1d, 0x0004);
+ phy_write(phy, 0x1e, 0x4850);
+ }
+ return 0;
+}
+
static int marvell_setup_aneg(struct mii_phy *phy, u32 advertise)
{
u16 ctl, adv;
@@ -471,6 +537,10 @@ static int marvell_setup_aneg(struct mii_phy *phy, u32 advertise)
adv |= ADVERTISE_100HALF;
if (advertise & ADVERTISED_100baseT_Full)
adv |= ADVERTISE_100FULL;
+ if (advertise & ADVERTISED_Pause)
+ adv |= ADVERTISE_PAUSE_CAP;
+ if (advertise & ADVERTISED_Asym_Pause)
+ adv |= ADVERTISE_PAUSE_ASYM;
phy_write(phy, MII_ADVERTISE, adv);
/* Setup 1000BT advertise & enable crossover detect
@@ -549,7 +619,7 @@ static int marvell_setup_forced(struct mii_phy *phy, int speed, int fd)
static int marvell_read_link(struct mii_phy *phy)
{
- u16 status;
+ u16 status, pmask;
if (phy->autoneg) {
status = phy_read(phy, MII_M1011_PHY_SPEC_STATUS);
@@ -565,7 +635,9 @@ static int marvell_read_link(struct mii_phy *phy)
phy->duplex = DUPLEX_FULL;
else
phy->duplex = DUPLEX_HALF;
- phy->pause = 0; /* XXX Check against spec ! */
+ pmask = MII_M1011_PHY_SPEC_STATUS_TX_PAUSE |
+ MII_M1011_PHY_SPEC_STATUS_RX_PAUSE;
+ phy->pause = (status & pmask) == pmask;
}
/* On non-aneg, we assume what we put in BMCR is the speed,
* though magic-aneg shouldn't prevent this case from occurring
@@ -595,6 +667,10 @@ static int genmii_setup_aneg(struct mii_phy *phy, u32 advertise)
adv |= ADVERTISE_100HALF;
if (advertise & ADVERTISED_100baseT_Full)
adv |= ADVERTISE_100FULL;
+ if (advertise & ADVERTISED_Pause)
+ adv |= ADVERTISE_PAUSE_CAP;
+ if (advertise & ADVERTISED_Asym_Pause)
+ adv |= ADVERTISE_PAUSE_ASYM;
phy_write(phy, MII_ADVERTISE, adv);
/* Start/Restart aneg */
@@ -666,7 +742,8 @@ static int genmii_read_link(struct mii_phy *phy)
phy->speed = SPEED_100;
else
phy->speed = SPEED_10;
- phy->pause = 0;
+ phy->pause = (phy->duplex == DUPLEX_FULL) &&
+ ((lpa & LPA_PAUSE) != 0);
}
/* On non-aneg, we assume what we put in BMCR is the speed,
* though magic-aneg shouldn't prevent this case from occurring
@@ -676,11 +753,19 @@ static int genmii_read_link(struct mii_phy *phy)
}
-#define MII_BASIC_FEATURES (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | \
- SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | \
- SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII)
-#define MII_GBIT_FEATURES (MII_BASIC_FEATURES | \
- SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full)
+#define MII_BASIC_FEATURES \
+ (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | \
+ SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | \
+ SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII | \
+ SUPPORTED_Pause)
+
+/* On gigabit capable PHYs, we advertise Pause support but not asym pause
+ * support for now as I'm not sure it's supported and Darwin doesn't do
+ * it neither. --BenH.
+ */
+#define MII_GBIT_FEATURES \
+ (MII_BASIC_FEATURES | \
+ SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full)
/* Broadcom BCM 5201 */
static struct mii_phy_ops bcm5201_phy_ops = {
@@ -720,6 +805,24 @@ static struct mii_phy_def bcm5221_phy_def = {
.ops = &bcm5221_phy_ops
};
+/* Broadcom BCM 5241 */
+static struct mii_phy_ops bcm5241_phy_ops = {
+ .suspend = bcm5241_suspend,
+ .init = bcm5241_init,
+ .setup_aneg = genmii_setup_aneg,
+ .setup_forced = genmii_setup_forced,
+ .poll_link = genmii_poll_link,
+ .read_link = genmii_read_link,
+};
+static struct mii_phy_def bcm5241_phy_def = {
+ .phy_id = 0x0143bc30,
+ .phy_id_mask = 0xfffffff0,
+ .name = "BCM5241",
+ .features = MII_BASIC_FEATURES,
+ .magic_aneg = 1,
+ .ops = &bcm5241_phy_ops
+};
+
/* Broadcom BCM 5400 */
static struct mii_phy_ops bcm5400_phy_ops = {
.init = bcm5400_init,
@@ -854,11 +957,8 @@ static struct mii_phy_def bcm5462V_phy_def = {
.ops = &bcm5462V_phy_ops
};
-/* Marvell 88E1101 (Apple seem to deal with 2 different revs,
- * I masked out the 8 last bits to get both, but some specs
- * would be useful here) --BenH.
- */
-static struct mii_phy_ops marvell_phy_ops = {
+/* Marvell 88E1101 amd 88E1111 */
+static struct mii_phy_ops marvell88e1101_phy_ops = {
.suspend = generic_suspend,
.setup_aneg = marvell_setup_aneg,
.setup_forced = marvell_setup_forced,
@@ -866,13 +966,41 @@ static struct mii_phy_ops marvell_phy_ops = {
.read_link = marvell_read_link
};
-static struct mii_phy_def marvell_phy_def = {
- .phy_id = 0x01410c00,
- .phy_id_mask = 0xffffff00,
- .name = "Marvell 88E1101",
+static struct mii_phy_ops marvell88e1111_phy_ops = {
+ .init = marvell88e1111_init,
+ .suspend = generic_suspend,
+ .setup_aneg = marvell_setup_aneg,
+ .setup_forced = marvell_setup_forced,
+ .poll_link = genmii_poll_link,
+ .read_link = marvell_read_link
+};
+
+/* two revs in darwin for the 88e1101 ... I could use a datasheet
+ * to get the proper names...
+ */
+static struct mii_phy_def marvell88e1101v1_phy_def = {
+ .phy_id = 0x01410c20,
+ .phy_id_mask = 0xfffffff0,
+ .name = "Marvell 88E1101v1",
+ .features = MII_GBIT_FEATURES,
+ .magic_aneg = 1,
+ .ops = &marvell88e1101_phy_ops
+};
+static struct mii_phy_def marvell88e1101v2_phy_def = {
+ .phy_id = 0x01410c60,
+ .phy_id_mask = 0xfffffff0,
+ .name = "Marvell 88E1101v2",
+ .features = MII_GBIT_FEATURES,
+ .magic_aneg = 1,
+ .ops = &marvell88e1101_phy_ops
+};
+static struct mii_phy_def marvell88e1111_phy_def = {
+ .phy_id = 0x01410cc0,
+ .phy_id_mask = 0xfffffff0,
+ .name = "Marvell 88E1111",
.features = MII_GBIT_FEATURES,
.magic_aneg = 1,
- .ops = &marvell_phy_ops
+ .ops = &marvell88e1111_phy_ops
};
/* Generic implementation for most 10/100 PHYs */
@@ -895,6 +1023,7 @@ static struct mii_phy_def genmii_phy_def = {
static struct mii_phy_def* mii_phy_table[] = {
&bcm5201_phy_def,
&bcm5221_phy_def,
+ &bcm5241_phy_def,
&bcm5400_phy_def,
&bcm5401_phy_def,
&bcm5411_phy_def,
@@ -902,7 +1031,9 @@ static struct mii_phy_def* mii_phy_table[] = {
&bcm5421k2_phy_def,
&bcm5461_phy_def,
&bcm5462V_phy_def,
- &marvell_phy_def,
+ &marvell88e1101v1_phy_def,
+ &marvell88e1101v2_phy_def,
+ &marvell88e1111_phy_def,
&genmii_phy_def,
NULL
};
diff --git a/drivers/net/sungem_phy.h b/drivers/net/sungem_phy.h
index 8ee1ca0471cf..1d70ba6f9f10 100644
--- a/drivers/net/sungem_phy.h
+++ b/drivers/net/sungem_phy.h
@@ -30,7 +30,7 @@ struct mii_phy_def
struct mii_phy
{
struct mii_phy_def* def;
- int advertising;
+ u32 advertising;
int mii_id;
/* 1: autoneg enabled, 0: disabled */
@@ -85,6 +85,9 @@ extern int mii_phy_probe(struct mii_phy *phy, int mii_id);
#define MII_BCM5221_SHDOW_AUX_MODE4_IDDQMODE 0x0001
#define MII_BCM5221_SHDOW_AUX_MODE4_CLKLOPWR 0x0004
+/* MII BCM5241 Additional registers */
+#define MII_BCM5241_SHDOW_AUX_MODE4_STANDBYPWR 0x0008
+
/* MII BCM5400 1000-BASET Control register */
#define MII_BCM5400_GB_CONTROL 0x09
#define MII_BCM5400_GB_CONTROL_FULLDUPLEXCAP 0x0200
@@ -115,5 +118,7 @@ extern int mii_phy_probe(struct mii_phy *phy, int mii_id);
#define MII_M1011_PHY_SPEC_STATUS_SPD_MASK 0xc000
#define MII_M1011_PHY_SPEC_STATUS_FULLDUPLEX 0x2000
#define MII_M1011_PHY_SPEC_STATUS_RESOLVED 0x0800
+#define MII_M1011_PHY_SPEC_STATUS_TX_PAUSE 0x0008
+#define MII_M1011_PHY_SPEC_STATUS_RX_PAUSE 0x0004
#endif /* __SUNGEM_PHY_H__ */
diff --git a/drivers/pci/search.c b/drivers/pci/search.c
index 45f2b20ef513..fab381ed853c 100644
--- a/drivers/pci/search.c
+++ b/drivers/pci/search.c
@@ -193,6 +193,18 @@ static struct pci_dev * pci_find_subsys(unsigned int vendor,
struct pci_dev *dev;
WARN_ON(in_interrupt());
+
+ /*
+ * pci_find_subsys() can be called on the ide_setup() path, super-early
+ * in boot. But the down_read() will enable local interrupts, which
+ * can cause some machines to crash. So here we detect and flag that
+ * situation and bail out early.
+ */
+ if (unlikely(list_empty(&pci_devices))) {
+ printk(KERN_INFO "pci_find_subsys() called while pci_devices "
+ "is still empty\n");
+ return NULL;
+ }
down_read(&pci_bus_sem);
n = from ? from->global_list.next : pci_devices.next;
@@ -259,6 +271,18 @@ pci_get_subsys(unsigned int vendor, unsigned int device,
struct pci_dev *dev;
WARN_ON(in_interrupt());
+
+ /*
+ * pci_get_subsys() can potentially be called by drivers super-early
+ * in boot. But the down_read() will enable local interrupts, which
+ * can cause some machines to crash. So here we detect and flag that
+ * situation and bail out early.
+ */
+ if (unlikely(list_empty(&pci_devices))) {
+ printk(KERN_NOTICE "pci_get_subsys() called while pci_devices "
+ "is still empty\n");
+ return NULL;
+ }
down_read(&pci_bus_sem);
n = from ? from->global_list.next : pci_devices.next;
diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c
index 4f654c901c64..a724ab49a797 100644
--- a/drivers/rtc/rtc-at91rm9200.c
+++ b/drivers/rtc/rtc-at91rm9200.c
@@ -33,6 +33,8 @@
#include <asm/mach/time.h>
+#include <asm/arch/at91_rtc.h>
+
#define AT91_RTC_FREQ 1
#define AT91_RTC_EPOCH 1900UL /* just like arch/arm/common/rtctime.c */
diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c
index 1460f6b769f2..e7851e3739ab 100644
--- a/drivers/rtc/rtc-rs5c372.c
+++ b/drivers/rtc/rtc-rs5c372.c
@@ -1,5 +1,5 @@
/*
- * An I2C driver for the Ricoh RS5C372 RTC
+ * An I2C driver for Ricoh RS5C372 and RV5C38[67] RTCs
*
* Copyright (C) 2005 Pavel Mironchik <pmironchik@optifacio.net>
* Copyright (C) 2006 Tower Technologies
@@ -13,7 +13,7 @@
#include <linux/rtc.h>
#include <linux/bcd.h>
-#define DRV_VERSION "0.3"
+#define DRV_VERSION "0.4"
/* Addresses to scan */
static unsigned short normal_i2c[] = { /* 0x32,*/ I2C_CLIENT_END };
@@ -21,6 +21,13 @@ static unsigned short normal_i2c[] = { /* 0x32,*/ I2C_CLIENT_END };
/* Insmod parameters */
I2C_CLIENT_INSMOD;
+
+/*
+ * Ricoh has a family of I2C based RTCs, which differ only slightly from
+ * each other. Differences center on pinout (e.g. how many interrupts,
+ * output clock, etc) and how the control registers are used. The '372
+ * is significant only because that's the one this driver first supported.
+ */
#define RS5C372_REG_SECS 0
#define RS5C372_REG_MINS 1
#define RS5C372_REG_HOURS 2
@@ -29,59 +36,142 @@ I2C_CLIENT_INSMOD;
#define RS5C372_REG_MONTH 5
#define RS5C372_REG_YEAR 6
#define RS5C372_REG_TRIM 7
+# define RS5C372_TRIM_XSL 0x80
+# define RS5C372_TRIM_MASK 0x7F
+
+#define RS5C_REG_ALARM_A_MIN 8 /* or ALARM_W */
+#define RS5C_REG_ALARM_A_HOURS 9
+#define RS5C_REG_ALARM_A_WDAY 10
+
+#define RS5C_REG_ALARM_B_MIN 11 /* or ALARM_D */
+#define RS5C_REG_ALARM_B_HOURS 12
+#define RS5C_REG_ALARM_B_WDAY 13 /* (ALARM_B only) */
+
+#define RS5C_REG_CTRL1 14
+# define RS5C_CTRL1_AALE (1 << 7) /* or WALE */
+# define RS5C_CTRL1_BALE (1 << 6) /* or DALE */
+# define RV5C387_CTRL1_24 (1 << 5)
+# define RS5C372A_CTRL1_SL1 (1 << 5)
+# define RS5C_CTRL1_CT_MASK (7 << 0)
+# define RS5C_CTRL1_CT0 (0 << 0) /* no periodic irq */
+# define RS5C_CTRL1_CT4 (4 << 0) /* 1 Hz level irq */
+#define RS5C_REG_CTRL2 15
+# define RS5C372_CTRL2_24 (1 << 5)
+# define RS5C_CTRL2_XSTP (1 << 4)
+# define RS5C_CTRL2_CTFG (1 << 2)
+# define RS5C_CTRL2_AAFG (1 << 1) /* or WAFG */
+# define RS5C_CTRL2_BAFG (1 << 0) /* or DAFG */
+
+
+/* to read (style 1) or write registers starting at R */
+#define RS5C_ADDR(R) (((R) << 4) | 0)
+
+
+enum rtc_type {
+ rtc_undef = 0,
+ rtc_rs5c372a,
+ rtc_rs5c372b,
+ rtc_rv5c386,
+ rtc_rv5c387a,
+};
-#define RS5C372_TRIM_XSL 0x80
-#define RS5C372_TRIM_MASK 0x7F
+/* REVISIT: this assumes that:
+ * - we're in the 21st century, so it's safe to ignore the century
+ * bit for rv5c38[67] (REG_MONTH bit 7);
+ * - we should use ALARM_A not ALARM_B (may be wrong on some boards)
+ */
+struct rs5c372 {
+ struct i2c_client *client;
+ struct rtc_device *rtc;
+ enum rtc_type type;
+ unsigned time24:1;
+ unsigned has_irq:1;
+ char buf[17];
+ char *regs;
+
+ /* on conversion to a "new style" i2c driver, this vanishes */
+ struct i2c_client dev;
+};
-#define RS5C372_REG_BASE 0
+static int rs5c_get_regs(struct rs5c372 *rs5c)
+{
+ struct i2c_client *client = rs5c->client;
+ struct i2c_msg msgs[] = {
+ { client->addr, I2C_M_RD, sizeof rs5c->buf, rs5c->buf },
+ };
+
+ /* This implements the third reading method from the datasheet, using
+ * an internal address that's reset after each transaction (by STOP)
+ * to 0x0f ... so we read extra registers, and skip the first one.
+ *
+ * The first method doesn't work with the iop3xx adapter driver, on at
+ * least 80219 chips; this works around that bug.
+ */
+ if ((i2c_transfer(client->adapter, msgs, 1)) != 1) {
+ pr_debug("%s: can't read registers\n", rs5c->rtc->name);
+ return -EIO;
+ }
-static int rs5c372_attach(struct i2c_adapter *adapter);
-static int rs5c372_detach(struct i2c_client *client);
-static int rs5c372_probe(struct i2c_adapter *adapter, int address, int kind);
+ dev_dbg(&client->dev,
+ "%02x %02x %02x (%02x) %02x %02x %02x (%02x), "
+ "%02x %02x %02x, %02x %02x %02x; %02x %02x\n",
+ rs5c->regs[0], rs5c->regs[1], rs5c->regs[2], rs5c->regs[3],
+ rs5c->regs[4], rs5c->regs[5], rs5c->regs[6], rs5c->regs[7],
+ rs5c->regs[8], rs5c->regs[9], rs5c->regs[10], rs5c->regs[11],
+ rs5c->regs[12], rs5c->regs[13], rs5c->regs[14], rs5c->regs[15]);
-struct rs5c372 {
- u8 reg_addr;
- u8 regs[17];
- struct i2c_msg msg[1];
- struct i2c_client client;
- struct rtc_device *rtc;
-};
+ return 0;
+}
-static struct i2c_driver rs5c372_driver = {
- .driver = {
- .name = "rs5c372",
- },
- .attach_adapter = &rs5c372_attach,
- .detach_client = &rs5c372_detach,
-};
+static unsigned rs5c_reg2hr(struct rs5c372 *rs5c, unsigned reg)
+{
+ unsigned hour;
-static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
+ if (rs5c->time24)
+ return BCD2BIN(reg & 0x3f);
+
+ hour = BCD2BIN(reg & 0x1f);
+ if (hour == 12)
+ hour = 0;
+ if (reg & 0x20)
+ hour += 12;
+ return hour;
+}
+
+static unsigned rs5c_hr2reg(struct rs5c372 *rs5c, unsigned hour)
{
+ if (rs5c->time24)
+ return BIN2BCD(hour);
+
+ if (hour > 12)
+ return 0x20 | BIN2BCD(hour - 12);
+ if (hour == 12)
+ return 0x20 | BIN2BCD(12);
+ if (hour == 0)
+ return BIN2BCD(12);
+ return BIN2BCD(hour);
+}
- struct rs5c372 *rs5c372 = i2c_get_clientdata(client);
- u8 *buf = &(rs5c372->regs[1]);
+static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
+{
+ struct rs5c372 *rs5c = i2c_get_clientdata(client);
+ int status = rs5c_get_regs(rs5c);
- /* this implements the 3rd reading method, according
- * to the datasheet. rs5c372 defaults to internal
- * address 0xF, so 0x0 is in regs[1]
- */
+ if (status < 0)
+ return status;
- if ((i2c_transfer(client->adapter, rs5c372->msg, 1)) != 1) {
- dev_err(&client->dev, "%s: read error\n", __FUNCTION__);
- return -EIO;
- }
+ tm->tm_sec = BCD2BIN(rs5c->regs[RS5C372_REG_SECS] & 0x7f);
+ tm->tm_min = BCD2BIN(rs5c->regs[RS5C372_REG_MINS] & 0x7f);
+ tm->tm_hour = rs5c_reg2hr(rs5c, rs5c->regs[RS5C372_REG_HOURS]);
- tm->tm_sec = BCD2BIN(buf[RS5C372_REG_SECS] & 0x7f);
- tm->tm_min = BCD2BIN(buf[RS5C372_REG_MINS] & 0x7f);
- tm->tm_hour = BCD2BIN(buf[RS5C372_REG_HOURS] & 0x3f);
- tm->tm_wday = BCD2BIN(buf[RS5C372_REG_WDAY] & 0x07);
- tm->tm_mday = BCD2BIN(buf[RS5C372_REG_DAY] & 0x3f);
+ tm->tm_wday = BCD2BIN(rs5c->regs[RS5C372_REG_WDAY] & 0x07);
+ tm->tm_mday = BCD2BIN(rs5c->regs[RS5C372_REG_DAY] & 0x3f);
/* tm->tm_mon is zero-based */
- tm->tm_mon = BCD2BIN(buf[RS5C372_REG_MONTH] & 0x1f) - 1;
+ tm->tm_mon = BCD2BIN(rs5c->regs[RS5C372_REG_MONTH] & 0x1f) - 1;
/* year is 1900 + tm->tm_year */
- tm->tm_year = BCD2BIN(buf[RS5C372_REG_YEAR]) + 100;
+ tm->tm_year = BCD2BIN(rs5c->regs[RS5C372_REG_YEAR]) + 100;
dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, "
"mday=%d, mon=%d, year=%d, wday=%d\n",
@@ -89,22 +179,25 @@ static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
tm->tm_sec, tm->tm_min, tm->tm_hour,
tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
- return 0;
+ /* rtc might need initialization */
+ return rtc_valid_tm(tm);
}
static int rs5c372_set_datetime(struct i2c_client *client, struct rtc_time *tm)
{
- unsigned char buf[8] = { RS5C372_REG_BASE };
+ struct rs5c372 *rs5c = i2c_get_clientdata(client);
+ unsigned char buf[8];
- dev_dbg(&client->dev,
- "%s: secs=%d, mins=%d, hours=%d "
+ dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d "
"mday=%d, mon=%d, year=%d, wday=%d\n",
- __FUNCTION__, tm->tm_sec, tm->tm_min, tm->tm_hour,
+ __FUNCTION__,
+ tm->tm_sec, tm->tm_min, tm->tm_hour,
tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
+ buf[0] = RS5C_ADDR(RS5C372_REG_SECS);
buf[1] = BIN2BCD(tm->tm_sec);
buf[2] = BIN2BCD(tm->tm_min);
- buf[3] = BIN2BCD(tm->tm_hour);
+ buf[3] = rs5c_hr2reg(rs5c, tm->tm_hour);
buf[4] = BIN2BCD(tm->tm_wday);
buf[5] = BIN2BCD(tm->tm_mday);
buf[6] = BIN2BCD(tm->tm_mon + 1);
@@ -118,21 +211,43 @@ static int rs5c372_set_datetime(struct i2c_client *client, struct rtc_time *tm)
return 0;
}
+#if defined(CONFIG_RTC_INTF_PROC) || defined(CONFIG_RTC_INTF_PROC_MODULE)
+#define NEED_TRIM
+#endif
+
+#if defined(CONFIG_RTC_INTF_SYSFS) || defined(CONFIG_RTC_INTF_SYSFS_MODULE)
+#define NEED_TRIM
+#endif
+
+#ifdef NEED_TRIM
static int rs5c372_get_trim(struct i2c_client *client, int *osc, int *trim)
{
struct rs5c372 *rs5c372 = i2c_get_clientdata(client);
- u8 tmp = rs5c372->regs[RS5C372_REG_TRIM + 1];
+ u8 tmp = rs5c372->regs[RS5C372_REG_TRIM];
if (osc)
*osc = (tmp & RS5C372_TRIM_XSL) ? 32000 : 32768;
if (trim) {
- *trim = tmp & RS5C372_TRIM_MASK;
- dev_dbg(&client->dev, "%s: raw trim=%x\n", __FUNCTION__, *trim);
+ dev_dbg(&client->dev, "%s: raw trim=%x\n", __FUNCTION__, tmp);
+ tmp &= RS5C372_TRIM_MASK;
+ if (tmp & 0x3e) {
+ int t = tmp & 0x3f;
+
+ if (tmp & 0x40)
+ t = (~t | (s8)0xc0) + 1;
+ else
+ t = t - 1;
+
+ tmp = t * 2;
+ } else
+ tmp = 0;
+ *trim = tmp;
}
return 0;
}
+#endif
static int rs5c372_rtc_read_time(struct device *dev, struct rtc_time *tm)
{
@@ -144,25 +259,190 @@ static int rs5c372_rtc_set_time(struct device *dev, struct rtc_time *tm)
return rs5c372_set_datetime(to_i2c_client(dev), tm);
}
+#if defined(CONFIG_RTC_INTF_DEV) || defined(CONFIG_RTC_INTF_DEV_MODULE)
+
+static int
+rs5c_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct rs5c372 *rs5c = i2c_get_clientdata(client);
+ unsigned char buf[2];
+ int status;
+
+ buf[1] = rs5c->regs[RS5C_REG_CTRL1];
+ switch (cmd) {
+ case RTC_UIE_OFF:
+ case RTC_UIE_ON:
+ /* some 327a modes use a different IRQ pin for 1Hz irqs */
+ if (rs5c->type == rtc_rs5c372a
+ && (buf[1] & RS5C372A_CTRL1_SL1))
+ return -ENOIOCTLCMD;
+ case RTC_AIE_OFF:
+ case RTC_AIE_ON:
+ /* these irq management calls only make sense for chips
+ * which are wired up to an IRQ.
+ */
+ if (!rs5c->has_irq)
+ return -ENOIOCTLCMD;
+ break;
+ default:
+ return -ENOIOCTLCMD;
+ }
+
+ status = rs5c_get_regs(rs5c);
+ if (status < 0)
+ return status;
+
+ buf[0] = RS5C_ADDR(RS5C_REG_CTRL1);
+ switch (cmd) {
+ case RTC_AIE_OFF: /* alarm off */
+ buf[1] &= ~RS5C_CTRL1_AALE;
+ break;
+ case RTC_AIE_ON: /* alarm on */
+ buf[1] |= RS5C_CTRL1_AALE;
+ break;
+ case RTC_UIE_OFF: /* update off */
+ buf[1] &= ~RS5C_CTRL1_CT_MASK;
+ break;
+ case RTC_UIE_ON: /* update on */
+ buf[1] &= ~RS5C_CTRL1_CT_MASK;
+ buf[1] |= RS5C_CTRL1_CT4;
+ break;
+ }
+ if ((i2c_master_send(client, buf, 2)) != 2) {
+ printk(KERN_WARNING "%s: can't update alarm\n",
+ rs5c->rtc->name);
+ status = -EIO;
+ } else
+ rs5c->regs[RS5C_REG_CTRL1] = buf[1];
+ return status;
+}
+
+#else
+#define rs5c_rtc_ioctl NULL
+#endif
+
+
+/* NOTE: Since RTC_WKALM_{RD,SET} were originally defined for EFI,
+ * which only exposes a polled programming interface; and since
+ * these calls map directly to those EFI requests; we don't demand
+ * we have an IRQ for this chip when we go through this API.
+ *
+ * The older x86_pc derived RTC_ALM_{READ,SET} calls require irqs
+ * though, managed through RTC_AIE_{ON,OFF} requests.
+ */
+
+static int rs5c_read_alarm(struct device *dev, struct rtc_wkalrm *t)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct rs5c372 *rs5c = i2c_get_clientdata(client);
+ int status;
+
+ status = rs5c_get_regs(rs5c);
+ if (status < 0)
+ return status;
+
+ /* report alarm time */
+ t->time.tm_sec = 0;
+ t->time.tm_min = BCD2BIN(rs5c->regs[RS5C_REG_ALARM_A_MIN] & 0x7f);
+ t->time.tm_hour = rs5c_reg2hr(rs5c, rs5c->regs[RS5C_REG_ALARM_A_HOURS]);
+ t->time.tm_mday = -1;
+ t->time.tm_mon = -1;
+ t->time.tm_year = -1;
+ t->time.tm_wday = -1;
+ t->time.tm_yday = -1;
+ t->time.tm_isdst = -1;
+
+ /* ... and status */
+ t->enabled = !!(rs5c->regs[RS5C_REG_CTRL1] & RS5C_CTRL1_AALE);
+ t->pending = !!(rs5c->regs[RS5C_REG_CTRL2] & RS5C_CTRL2_AAFG);
+
+ return 0;
+}
+
+static int rs5c_set_alarm(struct device *dev, struct rtc_wkalrm *t)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct rs5c372 *rs5c = i2c_get_clientdata(client);
+ int status;
+ unsigned char buf[4];
+
+ /* only handle up to 24 hours in the future, like RTC_ALM_SET */
+ if (t->time.tm_mday != -1
+ || t->time.tm_mon != -1
+ || t->time.tm_year != -1)
+ return -EINVAL;
+
+ /* REVISIT: round up tm_sec */
+
+ /* if needed, disable irq (clears pending status) */
+ status = rs5c_get_regs(rs5c);
+ if (status < 0)
+ return status;
+ if (rs5c->regs[RS5C_REG_CTRL1] & RS5C_CTRL1_AALE) {
+ buf[0] = RS5C_ADDR(RS5C_REG_CTRL1);
+ buf[1] = rs5c->regs[RS5C_REG_CTRL1] & ~RS5C_CTRL1_AALE;
+ if (i2c_master_send(client, buf, 2) != 2) {
+ pr_debug("%s: can't disable alarm\n", rs5c->rtc->name);
+ return -EIO;
+ }
+ rs5c->regs[RS5C_REG_CTRL1] = buf[1];
+ }
+
+ /* set alarm */
+ buf[0] = RS5C_ADDR(RS5C_REG_ALARM_A_MIN);
+ buf[1] = BIN2BCD(t->time.tm_min);
+ buf[2] = rs5c_hr2reg(rs5c, t->time.tm_hour);
+ buf[3] = 0x7f; /* any/all days */
+ if ((i2c_master_send(client, buf, 4)) != 4) {
+ pr_debug("%s: can't set alarm time\n", rs5c->rtc->name);
+ return -EIO;
+ }
+
+ /* ... and maybe enable its irq */
+ if (t->enabled) {
+ buf[0] = RS5C_ADDR(RS5C_REG_CTRL1);
+ buf[1] = rs5c->regs[RS5C_REG_CTRL1] | RS5C_CTRL1_AALE;
+ if ((i2c_master_send(client, buf, 2)) != 2)
+ printk(KERN_WARNING "%s: can't enable alarm\n",
+ rs5c->rtc->name);
+ rs5c->regs[RS5C_REG_CTRL1] = buf[1];
+ }
+
+ return 0;
+}
+
+#if defined(CONFIG_RTC_INTF_PROC) || defined(CONFIG_RTC_INTF_PROC_MODULE)
+
static int rs5c372_rtc_proc(struct device *dev, struct seq_file *seq)
{
int err, osc, trim;
err = rs5c372_get_trim(to_i2c_client(dev), &osc, &trim);
if (err == 0) {
- seq_printf(seq, "%d.%03d KHz\n", osc / 1000, osc % 1000);
- seq_printf(seq, "trim\t: %d\n", trim);
+ seq_printf(seq, "crystal\t\t: %d.%03d KHz\n",
+ osc / 1000, osc % 1000);
+ seq_printf(seq, "trim\t\t: %d\n", trim);
}
return 0;
}
+#else
+#define rs5c372_rtc_proc NULL
+#endif
+
static const struct rtc_class_ops rs5c372_rtc_ops = {
.proc = rs5c372_rtc_proc,
+ .ioctl = rs5c_rtc_ioctl,
.read_time = rs5c372_rtc_read_time,
.set_time = rs5c372_rtc_set_time,
+ .read_alarm = rs5c_read_alarm,
+ .set_alarm = rs5c_set_alarm,
};
+#if defined(CONFIG_RTC_INTF_SYSFS) || defined(CONFIG_RTC_INTF_SYSFS_MODULE)
+
static ssize_t rs5c372_sysfs_show_trim(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -172,7 +452,7 @@ static ssize_t rs5c372_sysfs_show_trim(struct device *dev,
if (err)
return err;
- return sprintf(buf, "0x%2x\n", trim);
+ return sprintf(buf, "%d\n", trim);
}
static DEVICE_ATTR(trim, S_IRUGO, rs5c372_sysfs_show_trim, NULL);
@@ -189,16 +469,35 @@ static ssize_t rs5c372_sysfs_show_osc(struct device *dev,
}
static DEVICE_ATTR(osc, S_IRUGO, rs5c372_sysfs_show_osc, NULL);
-static int rs5c372_attach(struct i2c_adapter *adapter)
+static int rs5c_sysfs_register(struct device *dev)
{
- return i2c_probe(adapter, &addr_data, rs5c372_probe);
+ int err;
+
+ err = device_create_file(dev, &dev_attr_trim);
+ if (err)
+ return err;
+ err = device_create_file(dev, &dev_attr_osc);
+ if (err)
+ device_remove_file(dev, &dev_attr_trim);
+
+ return err;
+}
+
+#else
+static int rs5c_sysfs_register(struct device *dev)
+{
+ return 0;
}
+#endif /* SYSFS */
+
+static struct i2c_driver rs5c372_driver;
static int rs5c372_probe(struct i2c_adapter *adapter, int address, int kind)
{
int err = 0;
struct i2c_client *client;
struct rs5c372 *rs5c372;
+ struct rtc_time tm;
dev_dbg(adapter->class_dev.dev, "%s\n", __FUNCTION__);
@@ -211,7 +510,15 @@ static int rs5c372_probe(struct i2c_adapter *adapter, int address, int kind)
err = -ENOMEM;
goto exit;
}
- client = &rs5c372->client;
+
+ /* we read registers 0x0f then 0x00-0x0f; skip the first one */
+ rs5c372->regs=&rs5c372->buf[1];
+
+ /* On conversion to a "new style" i2c driver, we'll be handed
+ * the i2c_client (we won't create it)
+ */
+ client = &rs5c372->dev;
+ rs5c372->client = client;
/* I2C client */
client->addr = address;
@@ -222,16 +529,99 @@ static int rs5c372_probe(struct i2c_adapter *adapter, int address, int kind)
i2c_set_clientdata(client, rs5c372);
- rs5c372->msg[0].addr = address;
- rs5c372->msg[0].flags = I2C_M_RD;
- rs5c372->msg[0].len = sizeof(rs5c372->regs);
- rs5c372->msg[0].buf = rs5c372->regs;
-
/* Inform the i2c layer */
if ((err = i2c_attach_client(client)))
goto exit_kfree;
- dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n");
+ err = rs5c_get_regs(rs5c372);
+ if (err < 0)
+ goto exit_detach;
+
+ /* For "new style" drivers, irq is in i2c_client and chip type
+ * info comes from i2c_client.dev.platform_data. Meanwhile:
+ *
+ * STICK BOARD-SPECIFIC SETUP CODE RIGHT HERE
+ */
+ if (rs5c372->type == rtc_undef) {
+ rs5c372->type = rtc_rs5c372b;
+ dev_warn(&client->dev, "assuming rs5c372b\n");
+ }
+
+ /* clock may be set for am/pm or 24 hr time */
+ switch (rs5c372->type) {
+ case rtc_rs5c372a:
+ case rtc_rs5c372b:
+ /* alarm uses ALARM_A; and nINTRA on 372a, nINTR on 372b.
+ * so does periodic irq, except some 327a modes.
+ */
+ if (rs5c372->regs[RS5C_REG_CTRL2] & RS5C372_CTRL2_24)
+ rs5c372->time24 = 1;
+ break;
+ case rtc_rv5c386:
+ case rtc_rv5c387a:
+ if (rs5c372->regs[RS5C_REG_CTRL1] & RV5C387_CTRL1_24)
+ rs5c372->time24 = 1;
+ /* alarm uses ALARM_W; and nINTRB for alarm and periodic
+ * irq, on both 386 and 387
+ */
+ break;
+ default:
+ dev_err(&client->dev, "unknown RTC type\n");
+ goto exit_detach;
+ }
+
+ /* if the oscillator lost power and no other software (like
+ * the bootloader) set it up, do it here.
+ */
+ if (rs5c372->regs[RS5C_REG_CTRL2] & RS5C_CTRL2_XSTP) {
+ unsigned char buf[3];
+
+ rs5c372->regs[RS5C_REG_CTRL2] &= ~RS5C_CTRL2_XSTP;
+
+ buf[0] = RS5C_ADDR(RS5C_REG_CTRL1);
+ buf[1] = rs5c372->regs[RS5C_REG_CTRL1];
+ buf[2] = rs5c372->regs[RS5C_REG_CTRL2];
+
+ /* use 24hr mode */
+ switch (rs5c372->type) {
+ case rtc_rs5c372a:
+ case rtc_rs5c372b:
+ buf[2] |= RS5C372_CTRL2_24;
+ rs5c372->time24 = 1;
+ break;
+ case rtc_rv5c386:
+ case rtc_rv5c387a:
+ buf[1] |= RV5C387_CTRL1_24;
+ rs5c372->time24 = 1;
+ break;
+ default:
+ /* impossible */
+ break;
+ }
+
+ if ((i2c_master_send(client, buf, 3)) != 3) {
+ dev_err(&client->dev, "setup error\n");
+ goto exit_detach;
+ }
+ rs5c372->regs[RS5C_REG_CTRL1] = buf[1];
+ rs5c372->regs[RS5C_REG_CTRL2] = buf[2];
+ }
+
+ if (rs5c372_get_datetime(client, &tm) < 0)
+ dev_warn(&client->dev, "clock needs to be set\n");
+
+ dev_info(&client->dev, "%s found, %s, driver version " DRV_VERSION "\n",
+ ({ char *s; switch (rs5c372->type) {
+ case rtc_rs5c372a: s = "rs5c372a"; break;
+ case rtc_rs5c372b: s = "rs5c372b"; break;
+ case rtc_rv5c386: s = "rv5c386"; break;
+ case rtc_rv5c387a: s = "rv5c387a"; break;
+ default: s = "chip"; break;
+ }; s;}),
+ rs5c372->time24 ? "24hr" : "am/pm"
+ );
+
+ /* FIXME when client->irq exists, use it to register alarm irq */
rs5c372->rtc = rtc_device_register(rs5c372_driver.driver.name,
&client->dev, &rs5c372_rtc_ops, THIS_MODULE);
@@ -241,18 +631,12 @@ static int rs5c372_probe(struct i2c_adapter *adapter, int address, int kind)
goto exit_detach;
}
- err = device_create_file(&client->dev, &dev_attr_trim);
+ err = rs5c_sysfs_register(&client->dev);
if (err)
goto exit_devreg;
- err = device_create_file(&client->dev, &dev_attr_osc);
- if (err)
- goto exit_trim;
return 0;
-exit_trim:
- device_remove_file(&client->dev, &dev_attr_trim);
-
exit_devreg:
rtc_device_unregister(rs5c372->rtc);
@@ -266,6 +650,11 @@ exit:
return err;
}
+static int rs5c372_attach(struct i2c_adapter *adapter)
+{
+ return i2c_probe(adapter, &addr_data, rs5c372_probe);
+}
+
static int rs5c372_detach(struct i2c_client *client)
{
int err;
@@ -274,6 +663,8 @@ static int rs5c372_detach(struct i2c_client *client)
if (rs5c372->rtc)
rtc_device_unregister(rs5c372->rtc);
+ /* REVISIT properly destroy the sysfs files ... */
+
if ((err = i2c_detach_client(client)))
return err;
@@ -281,6 +672,14 @@ static int rs5c372_detach(struct i2c_client *client)
return 0;
}
+static struct i2c_driver rs5c372_driver = {
+ .driver = {
+ .name = "rtc-rs5c372",
+ },
+ .attach_adapter = &rs5c372_attach,
+ .detach_client = &rs5c372_detach,
+};
+
static __init int rs5c372_init(void)
{
return i2c_add_driver(&rs5c372_driver);
diff --git a/drivers/usb/input/Kconfig b/drivers/usb/input/Kconfig
index f877cd4f317a..258a5d09d3dc 100644
--- a/drivers/usb/input/Kconfig
+++ b/drivers/usb/input/Kconfig
@@ -12,10 +12,8 @@ config USB_HID
---help---
Say Y here if you want full HID support to connect USB keyboards,
mice, joysticks, graphic tablets, or any other HID based devices
- to your computer via USB. You also need to select HID Input layer
- support (below) if you want to use keyboards, mice, joysticks and
- the like ... as well as Uninterruptible Power Supply (UPS) and
- monitor control devices.
+ to your computer via USB, as well as Uninterruptible Power Supply
+ (UPS) and monitor control devices.
You can't use this driver and the HIDBP (Boot Protocol) keyboard
and mouse drivers at the same time. More information is available:
diff --git a/drivers/video/backlight/corgi_bl.c b/drivers/video/backlight/corgi_bl.c
index 61587ca2cdbb..fde1d9518123 100644
--- a/drivers/video/backlight/corgi_bl.c
+++ b/drivers/video/backlight/corgi_bl.c
@@ -121,7 +121,7 @@ static int corgibl_probe(struct platform_device *pdev)
machinfo->limit_mask = -1;
corgi_backlight_device = backlight_device_register ("corgi-bl",
- NULL, &corgibl_data);
+ &pdev->dev, NULL, &corgibl_data);
if (IS_ERR (corgi_backlight_device))
return PTR_ERR (corgi_backlight_device);
diff --git a/drivers/video/backlight/hp680_bl.c b/drivers/video/backlight/hp680_bl.c
index 1c569fb543ae..c07d8207fb54 100644
--- a/drivers/video/backlight/hp680_bl.c
+++ b/drivers/video/backlight/hp680_bl.c
@@ -105,7 +105,7 @@ static struct backlight_properties hp680bl_data = {
static int __init hp680bl_probe(struct platform_device *dev)
{
hp680_backlight_device = backlight_device_register ("hp680-bl",
- NULL, &hp680bl_data);
+ &dev->dev, NULL, &hp680bl_data);
if (IS_ERR (hp680_backlight_device))
return PTR_ERR (hp680_backlight_device);
diff --git a/drivers/video/backlight/locomolcd.c b/drivers/video/backlight/locomolcd.c
index 2d7905410b2a..fc812d96c31d 100644
--- a/drivers/video/backlight/locomolcd.c
+++ b/drivers/video/backlight/locomolcd.c
@@ -184,7 +184,7 @@ static int locomolcd_probe(struct locomo_dev *ldev)
local_irq_restore(flags);
- locomolcd_bl_device = backlight_device_register("locomo-bl", NULL, &locomobl_data);
+ locomolcd_bl_device = backlight_device_register("locomo-bl", &ldev->dev, NULL, &locomobl_data);
if (IS_ERR (locomolcd_bl_device))
return PTR_ERR (locomolcd_bl_device);
diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c
index bbfc86259272..b9b2b27b68c3 100644
--- a/fs/adfs/dir_f.c
+++ b/fs/adfs/dir_f.c
@@ -53,7 +53,7 @@ static inline int adfs_readname(char *buf, char *ptr, int maxlen)
{
char *old_buf = buf;
- while (*ptr >= ' ' && maxlen--) {
+ while ((unsigned char)*ptr >= ' ' && maxlen--) {
if (*ptr == '/')
*buf++ = '.';
else
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 34e6d7b220c3..869f5193ecc2 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -14,59 +14,307 @@
#include <linux/time.h>
#include <linux/smp_lock.h>
#include <linux/namei.h>
+#include <linux/poll.h>
-static int return_EIO(void)
+
+static loff_t bad_file_llseek(struct file *file, loff_t offset, int origin)
+{
+ return -EIO;
+}
+
+static ssize_t bad_file_read(struct file *filp, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ return -EIO;
+}
+
+static ssize_t bad_file_write(struct file *filp, const char __user *buf,
+ size_t siz, loff_t *ppos)
+{
+ return -EIO;
+}
+
+static ssize_t bad_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ return -EIO;
+}
+
+static ssize_t bad_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ return -EIO;
+}
+
+static int bad_file_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+ return -EIO;
+}
+
+static unsigned int bad_file_poll(struct file *filp, poll_table *wait)
+{
+ return POLLERR;
+}
+
+static int bad_file_ioctl (struct inode *inode, struct file *filp,
+ unsigned int cmd, unsigned long arg)
+{
+ return -EIO;
+}
+
+static long bad_file_unlocked_ioctl(struct file *file, unsigned cmd,
+ unsigned long arg)
+{
+ return -EIO;
+}
+
+static long bad_file_compat_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ return -EIO;
+}
+
+static int bad_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ return -EIO;
+}
+
+static int bad_file_open(struct inode *inode, struct file *filp)
+{
+ return -EIO;
+}
+
+static int bad_file_flush(struct file *file, fl_owner_t id)
+{
+ return -EIO;
+}
+
+static int bad_file_release(struct inode *inode, struct file *filp)
+{
+ return -EIO;
+}
+
+static int bad_file_fsync(struct file *file, struct dentry *dentry,
+ int datasync)
+{
+ return -EIO;
+}
+
+static int bad_file_aio_fsync(struct kiocb *iocb, int datasync)
+{
+ return -EIO;
+}
+
+static int bad_file_fasync(int fd, struct file *filp, int on)
+{
+ return -EIO;
+}
+
+static int bad_file_lock(struct file *file, int cmd, struct file_lock *fl)
+{
+ return -EIO;
+}
+
+static ssize_t bad_file_sendfile(struct file *in_file, loff_t *ppos,
+ size_t count, read_actor_t actor, void *target)
+{
+ return -EIO;
+}
+
+static ssize_t bad_file_sendpage(struct file *file, struct page *page,
+ int off, size_t len, loff_t *pos, int more)
+{
+ return -EIO;
+}
+
+static unsigned long bad_file_get_unmapped_area(struct file *file,
+ unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags)
+{
+ return -EIO;
+}
+
+static int bad_file_check_flags(int flags)
{
return -EIO;
}
-#define EIO_ERROR ((void *) (return_EIO))
+static int bad_file_dir_notify(struct file *file, unsigned long arg)
+{
+ return -EIO;
+}
+
+static int bad_file_flock(struct file *filp, int cmd, struct file_lock *fl)
+{
+ return -EIO;
+}
+
+static ssize_t bad_file_splice_write(struct pipe_inode_info *pipe,
+ struct file *out, loff_t *ppos, size_t len,
+ unsigned int flags)
+{
+ return -EIO;
+}
+
+static ssize_t bad_file_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+{
+ return -EIO;
+}
static const struct file_operations bad_file_ops =
{
- .llseek = EIO_ERROR,
- .aio_read = EIO_ERROR,
- .read = EIO_ERROR,
- .write = EIO_ERROR,
- .aio_write = EIO_ERROR,
- .readdir = EIO_ERROR,
- .poll = EIO_ERROR,
- .ioctl = EIO_ERROR,
- .mmap = EIO_ERROR,
- .open = EIO_ERROR,
- .flush = EIO_ERROR,
- .release = EIO_ERROR,
- .fsync = EIO_ERROR,
- .aio_fsync = EIO_ERROR,
- .fasync = EIO_ERROR,
- .lock = EIO_ERROR,
- .sendfile = EIO_ERROR,
- .sendpage = EIO_ERROR,
- .get_unmapped_area = EIO_ERROR,
+ .llseek = bad_file_llseek,
+ .read = bad_file_read,
+ .write = bad_file_write,
+ .aio_read = bad_file_aio_read,
+ .aio_write = bad_file_aio_write,
+ .readdir = bad_file_readdir,
+ .poll = bad_file_poll,
+ .ioctl = bad_file_ioctl,
+ .unlocked_ioctl = bad_file_unlocked_ioctl,
+ .compat_ioctl = bad_file_compat_ioctl,
+ .mmap = bad_file_mmap,
+ .open = bad_file_open,
+ .flush = bad_file_flush,
+ .release = bad_file_release,
+ .fsync = bad_file_fsync,
+ .aio_fsync = bad_file_aio_fsync,
+ .fasync = bad_file_fasync,
+ .lock = bad_file_lock,
+ .sendfile = bad_file_sendfile,
+ .sendpage = bad_file_sendpage,
+ .get_unmapped_area = bad_file_get_unmapped_area,
+ .check_flags = bad_file_check_flags,
+ .dir_notify = bad_file_dir_notify,
+ .flock = bad_file_flock,
+ .splice_write = bad_file_splice_write,
+ .splice_read = bad_file_splice_read,
};
+static int bad_inode_create (struct inode *dir, struct dentry *dentry,
+ int mode, struct nameidata *nd)
+{
+ return -EIO;
+}
+
+static struct dentry *bad_inode_lookup(struct inode *dir,
+ struct dentry *dentry, struct nameidata *nd)
+{
+ return ERR_PTR(-EIO);
+}
+
+static int bad_inode_link (struct dentry *old_dentry, struct inode *dir,
+ struct dentry *dentry)
+{
+ return -EIO;
+}
+
+static int bad_inode_unlink(struct inode *dir, struct dentry *dentry)
+{
+ return -EIO;
+}
+
+static int bad_inode_symlink (struct inode *dir, struct dentry *dentry,
+ const char *symname)
+{
+ return -EIO;
+}
+
+static int bad_inode_mkdir(struct inode *dir, struct dentry *dentry,
+ int mode)
+{
+ return -EIO;
+}
+
+static int bad_inode_rmdir (struct inode *dir, struct dentry *dentry)
+{
+ return -EIO;
+}
+
+static int bad_inode_mknod (struct inode *dir, struct dentry *dentry,
+ int mode, dev_t rdev)
+{
+ return -EIO;
+}
+
+static int bad_inode_rename (struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry)
+{
+ return -EIO;
+}
+
+static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
+ int buflen)
+{
+ return -EIO;
+}
+
+static int bad_inode_permission(struct inode *inode, int mask,
+ struct nameidata *nd)
+{
+ return -EIO;
+}
+
+static int bad_inode_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat)
+{
+ return -EIO;
+}
+
+static int bad_inode_setattr(struct dentry *direntry, struct iattr *attrs)
+{
+ return -EIO;
+}
+
+static int bad_inode_setxattr(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return -EIO;
+}
+
+static ssize_t bad_inode_getxattr(struct dentry *dentry, const char *name,
+ void *buffer, size_t size)
+{
+ return -EIO;
+}
+
+static ssize_t bad_inode_listxattr(struct dentry *dentry, char *buffer,
+ size_t buffer_size)
+{
+ return -EIO;
+}
+
+static int bad_inode_removexattr(struct dentry *dentry, const char *name)
+{
+ return -EIO;
+}
+
static struct inode_operations bad_inode_ops =
{
- .create = EIO_ERROR,
- .lookup = EIO_ERROR,
- .link = EIO_ERROR,
- .unlink = EIO_ERROR,
- .symlink = EIO_ERROR,
- .mkdir = EIO_ERROR,
- .rmdir = EIO_ERROR,
- .mknod = EIO_ERROR,
- .rename = EIO_ERROR,
- .readlink = EIO_ERROR,
+ .create = bad_inode_create,
+ .lookup = bad_inode_lookup,
+ .link = bad_inode_link,
+ .unlink = bad_inode_unlink,
+ .symlink = bad_inode_symlink,
+ .mkdir = bad_inode_mkdir,
+ .rmdir = bad_inode_rmdir,
+ .mknod = bad_inode_mknod,
+ .rename = bad_inode_rename,
+ .readlink = bad_inode_readlink,
/* follow_link must be no-op, otherwise unmounting this inode
won't work */
- .truncate = EIO_ERROR,
- .permission = EIO_ERROR,
- .getattr = EIO_ERROR,
- .setattr = EIO_ERROR,
- .setxattr = EIO_ERROR,
- .getxattr = EIO_ERROR,
- .listxattr = EIO_ERROR,
- .removexattr = EIO_ERROR,
+ /* put_link returns void */
+ /* truncate returns void */
+ .permission = bad_inode_permission,
+ .getattr = bad_inode_getattr,
+ .setattr = bad_inode_setattr,
+ .setxattr = bad_inode_setxattr,
+ .getxattr = bad_inode_getxattr,
+ .listxattr = bad_inode_listxattr,
+ .removexattr = bad_inode_removexattr,
+ /* truncate_range returns void */
};
@@ -88,7 +336,7 @@ static struct inode_operations bad_inode_ops =
* on it to fail from this point on.
*/
-void make_bad_inode(struct inode * inode)
+void make_bad_inode(struct inode *inode)
{
remove_inode_hash(inode);
@@ -113,7 +361,7 @@ EXPORT_SYMBOL(make_bad_inode);
* Returns true if the inode in question has been marked as bad.
*/
-int is_bad_inode(struct inode * inode)
+int is_bad_inode(struct inode *inode)
{
return (inode->i_op == &bad_inode_ops);
}
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index b82381475779..2e0021e8f366 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -275,6 +275,25 @@ static void ufs_change_blocknr(struct inode *inode, unsigned int baseblk,
UFSD("EXIT\n");
}
+static void ufs_clear_frags(struct inode *inode, sector_t beg, unsigned int n,
+ int sync)
+{
+ struct buffer_head *bh;
+ sector_t end = beg + n;
+
+ for (; beg < end; ++beg) {
+ bh = sb_getblk(inode->i_sb, beg);
+ lock_buffer(bh);
+ memset(bh->b_data, 0, inode->i_sb->s_blocksize);
+ set_buffer_uptodate(bh);
+ mark_buffer_dirty(bh);
+ unlock_buffer(bh);
+ if (IS_SYNC(inode) || sync)
+ sync_dirty_buffer(bh);
+ brelse(bh);
+ }
+}
+
unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
unsigned goal, unsigned count, int * err, struct page *locked_page)
{
@@ -350,6 +369,8 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
*p = cpu_to_fs32(sb, result);
*err = 0;
UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count);
+ ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
+ locked_page != NULL);
}
unlock_super(sb);
UFSD("EXIT, result %u\n", result);
@@ -363,6 +384,8 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
if (result) {
*err = 0;
UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count);
+ ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
+ locked_page != NULL);
unlock_super(sb);
UFSD("EXIT, result %u\n", result);
return result;
@@ -398,6 +421,8 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
*p = cpu_to_fs32(sb, result);
*err = 0;
UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count);
+ ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
+ locked_page != NULL);
unlock_super(sb);
if (newcount < request)
ufs_free_fragments (inode, result + newcount, request - newcount);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index ee1eaa6f4ec2..2fbab0aab688 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -156,36 +156,6 @@ out:
return ret;
}
-static void ufs_clear_frag(struct inode *inode, struct buffer_head *bh)
-{
- lock_buffer(bh);
- memset(bh->b_data, 0, inode->i_sb->s_blocksize);
- set_buffer_uptodate(bh);
- mark_buffer_dirty(bh);
- unlock_buffer(bh);
- if (IS_SYNC(inode))
- sync_dirty_buffer(bh);
-}
-
-static struct buffer_head *
-ufs_clear_frags(struct inode *inode, sector_t beg,
- unsigned int n, sector_t want)
-{
- struct buffer_head *res = NULL, *bh;
- sector_t end = beg + n;
-
- for (; beg < end; ++beg) {
- bh = sb_getblk(inode->i_sb, beg);
- ufs_clear_frag(inode, bh);
- if (want != beg)
- brelse(bh);
- else
- res = bh;
- }
- BUG_ON(!res);
- return res;
-}
-
/**
* ufs_inode_getfrag() - allocate new fragment(s)
* @inode - pointer to inode
@@ -302,7 +272,7 @@ repeat:
}
if (!phys) {
- result = ufs_clear_frags(inode, tmp, required, tmp + blockoff);
+ result = sb_getblk(sb, tmp + blockoff);
} else {
*phys = tmp + blockoff;
result = NULL;
@@ -403,8 +373,7 @@ repeat:
if (!phys) {
- result = ufs_clear_frags(inode, tmp, uspi->s_fpb,
- tmp + blockoff);
+ result = sb_getblk(sb, tmp + blockoff);
} else {
*phys = tmp + blockoff;
*new = 1;
@@ -471,13 +440,13 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head
#define GET_INODE_DATABLOCK(x) \
ufs_inode_getfrag(inode, x, fragment, 1, &err, &phys, &new, bh_result->b_page)
#define GET_INODE_PTR(x) \
- ufs_inode_getfrag(inode, x, fragment, uspi->s_fpb, &err, NULL, NULL, bh_result->b_page)
+ ufs_inode_getfrag(inode, x, fragment, uspi->s_fpb, &err, NULL, NULL, NULL)
#define GET_INDIRECT_DATABLOCK(x) \
ufs_inode_getblock(inode, bh, x, fragment, \
- &err, &phys, &new, bh_result->b_page);
+ &err, &phys, &new, bh_result->b_page)
#define GET_INDIRECT_PTR(x) \
ufs_inode_getblock(inode, bh, x, fragment, \
- &err, NULL, NULL, bh_result->b_page);
+ &err, NULL, NULL, NULL)
if (ptr < UFS_NDIR_FRAGMENT) {
bh = GET_INODE_DATABLOCK(ptr);
diff --git a/include/acpi/acconfig.h b/include/acpi/acconfig.h
index 9e6c23c360b2..ebc1f697615a 100644
--- a/include/acpi/acconfig.h
+++ b/include/acpi/acconfig.h
@@ -105,7 +105,7 @@
/* Maximum object reference count (detects object deletion issues) */
-#define ACPI_MAX_REFERENCE_COUNT 0x800
+#define ACPI_MAX_REFERENCE_COUNT 0x1000
/* Size of cached memory mapping for system memory operation region */
diff --git a/include/asm-i386/boot.h b/include/asm-i386/boot.h
index 8ce79a6fa891..e7686d0a8413 100644
--- a/include/asm-i386/boot.h
+++ b/include/asm-i386/boot.h
@@ -13,7 +13,8 @@
#define ASK_VGA 0xfffd /* ask for it at bootup */
/* Physical address where kenrel should be loaded. */
-#define LOAD_PHYSICAL_ADDR ((0x100000 + CONFIG_PHYSICAL_ALIGN - 1) \
+#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \
+ + (CONFIG_PHYSICAL_ALIGN - 1)) \
& ~(CONFIG_PHYSICAL_ALIGN - 1))
#endif /* _LINUX_BOOT_H */
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 28fdce1ac1db..bc8b4616bad7 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -11,7 +11,7 @@
#include <asm/types.h>
#include <linux/ioctl.h>
-#define KVM_API_VERSION 1
+#define KVM_API_VERSION 2
/*
* Architectural interrupt line count, and the size of the bitmap needed
@@ -45,6 +45,7 @@ enum kvm_exit_reason {
KVM_EXIT_DEBUG = 4,
KVM_EXIT_HLT = 5,
KVM_EXIT_MMIO = 6,
+ KVM_EXIT_IRQ_WINDOW_OPEN = 7,
};
/* for KVM_RUN */
@@ -53,11 +54,19 @@ struct kvm_run {
__u32 vcpu;
__u32 emulated; /* skip current instruction */
__u32 mmio_completed; /* mmio request completed */
+ __u8 request_interrupt_window;
+ __u8 padding1[3];
/* out */
__u32 exit_type;
__u32 exit_reason;
__u32 instruction_length;
+ __u8 ready_for_interrupt_injection;
+ __u8 if_flag;
+ __u16 padding2;
+ __u64 cr8;
+ __u64 apic_base;
+
union {
/* KVM_EXIT_UNKNOWN */
struct {
diff --git a/include/linux/magic.h b/include/linux/magic.h
index 156c40fc664e..b78bbf42135a 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -3,6 +3,7 @@
#define ADFS_SUPER_MAGIC 0xadf5
#define AFFS_SUPER_MAGIC 0xadff
+#define AFS_SUPER_MAGIC 0x5346414F
#define AUTOFS_SUPER_MAGIC 0x0187
#define CODA_SUPER_MAGIC 0x73757245
#define EFS_SUPER_MAGIC 0x414A53
diff --git a/include/linux/swap.h b/include/linux/swap.h
index add51cebc8d9..5423559a44a6 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -245,7 +245,7 @@ extern int swap_duplicate(swp_entry_t);
extern int valid_swaphandles(swp_entry_t, unsigned long *);
extern void swap_free(swp_entry_t);
extern void free_swap_and_cache(swp_entry_t);
-extern int swap_type_of(dev_t, sector_t);
+extern int swap_type_of(dev_t, sector_t, struct block_device **);
extern unsigned int count_swap_pages(int, int);
extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t);
extern sector_t swapdev_block(int, pgoff_t);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b7d8317f22ac..cd8fa0c858ae 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -242,7 +242,7 @@ extern int tcp_memory_pressure;
static inline int before(__u32 seq1, __u32 seq2)
{
- return (__s32)(seq2-seq1) > 0;
+ return (__s32)(seq1-seq2) < 0;
}
#define after(seq2, seq1) before(seq1, seq2)
diff --git a/include/net/x25.h b/include/net/x25.h
index 0ad90ebcf86e..e47fe440d9d7 100644
--- a/include/net/x25.h
+++ b/include/net/x25.h
@@ -259,6 +259,7 @@ extern int x25_decode(struct sock *, struct sk_buff *, int *, int *, int *, int
extern void x25_disconnect(struct sock *, int, unsigned char, unsigned char);
/* x25_timer.c */
+extern void x25_init_timers(struct sock *sk);
extern void x25_start_heartbeat(struct sock *);
extern void x25_start_t2timer(struct sock *);
extern void x25_start_t21timer(struct sock *);
diff --git a/init/main.c b/init/main.c
index 2b1cdaab45e6..bc27d72bbb19 100644
--- a/init/main.c
+++ b/init/main.c
@@ -538,6 +538,11 @@ asmlinkage void __init start_kernel(void)
parse_args("Booting kernel", command_line, __start___param,
__stop___param - __start___param,
&unknown_bootoption);
+ if (!irqs_disabled()) {
+ printk(KERN_WARNING "start_kernel(): bug: interrupts were "
+ "enabled *very* early, fixing it\n");
+ local_irq_disable();
+ }
sort_main_extable();
trap_init();
rcu_init();
diff --git a/kernel/params.c b/kernel/params.c
index f406655d6653..718945da8f58 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -143,9 +143,15 @@ int parse_args(const char *name,
while (*args) {
int ret;
+ int irq_was_disabled;
args = next_arg(args, &param, &val);
+ irq_was_disabled = irqs_disabled();
ret = parse_one(param, val, params, num, unknown);
+ if (irq_was_disabled && !irqs_disabled()) {
+ printk(KERN_WARNING "parse_args(): option '%s' enabled "
+ "irq's!\n", param);
+ }
switch (ret) {
case -ENOENT:
printk(KERN_ERR "%s: Unknown parameter `%s'\n",
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index f133d4a6d817..3581f8f86acd 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -165,14 +165,15 @@ static int swsusp_swap_check(void) /* This is called before saving image */
{
int res;
- res = swap_type_of(swsusp_resume_device, swsusp_resume_block);
+ res = swap_type_of(swsusp_resume_device, swsusp_resume_block,
+ &resume_bdev);
if (res < 0)
return res;
root_swap = res;
- resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_WRITE);
- if (IS_ERR(resume_bdev))
- return PTR_ERR(resume_bdev);
+ res = blkdev_get(resume_bdev, FMODE_WRITE, O_RDWR);
+ if (res)
+ return res;
res = set_blocksize(resume_bdev, PAGE_SIZE);
if (res < 0)
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 89443b85163b..f7b7a785a5c6 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -57,7 +57,7 @@ static int snapshot_open(struct inode *inode, struct file *filp)
memset(&data->handle, 0, sizeof(struct snapshot_handle));
if ((filp->f_flags & O_ACCMODE) == O_RDONLY) {
data->swap = swsusp_resume_device ?
- swap_type_of(swsusp_resume_device, 0) : -1;
+ swap_type_of(swsusp_resume_device, 0, NULL) : -1;
data->mode = O_RDONLY;
} else {
data->swap = -1;
@@ -268,7 +268,8 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
* so we need to recode them
*/
if (old_decode_dev(arg)) {
- data->swap = swap_type_of(old_decode_dev(arg), 0);
+ data->swap = swap_type_of(old_decode_dev(arg),
+ 0, NULL);
if (data->swap < 0)
error = -ENODEV;
} else {
@@ -365,7 +366,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
swdev = old_decode_dev(swap_area.dev);
if (swdev) {
offset = swap_area.offset;
- data->swap = swap_type_of(swdev, offset);
+ data->swap = swap_type_of(swdev, offset, NULL);
if (data->swap < 0)
error = -ENODEV;
} else {
diff --git a/kernel/profile.c b/kernel/profile.c
index fb5e03d57e9d..11550b2290b6 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -63,7 +63,7 @@ static int __init profile_setup(char * str)
printk(KERN_INFO
"kernel sleep profiling enabled (shift: %ld)\n",
prof_shift);
- } else if (!strncmp(str, sleepstr, strlen(sleepstr))) {
+ } else if (!strncmp(str, schedstr, strlen(schedstr))) {
prof_on = SCHED_PROFILING;
if (str[strlen(schedstr)] == ',')
str += strlen(schedstr) + 1;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 6969cfb33901..b278b8d60eee 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -61,12 +61,6 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
}
/*
- * swapoff can easily use up all memory, so kill those first.
- */
- if (p->flags & PF_SWAPOFF)
- return ULONG_MAX;
-
- /*
* The memory size of the process is the basis for the badness.
*/
points = mm->total_vm;
@@ -77,6 +71,12 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
task_unlock(p);
/*
+ * swapoff can easily use up all memory, so kill those first.
+ */
+ if (p->flags & PF_SWAPOFF)
+ return ULONG_MAX;
+
+ /*
* Processes which fork a lot of child processes are likely
* a good choice. We add half the vmsize of the children if they
* have an own mm. This prevents forking servers to flood the
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8c1a116875bc..a49f96b7ea43 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -711,6 +711,9 @@ static void __drain_pages(unsigned int cpu)
for_each_zone(zone) {
struct per_cpu_pageset *pset;
+ if (!populated_zone(zone))
+ continue;
+
pset = zone_pcp(zone, cpu);
for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
struct per_cpu_pages *pcp;
@@ -3321,6 +3324,10 @@ void *__init alloc_large_system_hash(const char *tablename,
numentries >>= (scale - PAGE_SHIFT);
else
numentries <<= (PAGE_SHIFT - scale);
+
+ /* Make sure we've got at least a 0-order allocation.. */
+ if (unlikely((numentries * bucketsize) < PAGE_SIZE))
+ numentries = PAGE_SIZE / bucketsize;
}
numentries = roundup_pow_of_two(numentries);
diff --git a/mm/slab.c b/mm/slab.c
index 0d4e57431de4..c6100628a6ef 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3281,7 +3281,7 @@ retry:
flags | GFP_THISNODE, nid);
}
- if (!obj) {
+ if (!obj && !(flags & __GFP_NO_GROW)) {
/*
* This allocation will be performed within the constraints
* of the current cpuset / memory policy requirements.
@@ -3310,7 +3310,7 @@ retry:
*/
goto retry;
} else {
- kmem_freepages(cache, obj);
+ /* cache_grow already freed obj */
obj = NULL;
}
}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index b9fc0e5de6d5..a2d9bb4e80df 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -434,7 +434,7 @@ void free_swap_and_cache(swp_entry_t entry)
*
* This is needed for the suspend to disk (aka swsusp).
*/
-int swap_type_of(dev_t device, sector_t offset)
+int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
{
struct block_device *bdev = NULL;
int i;
@@ -450,6 +450,9 @@ int swap_type_of(dev_t device, sector_t offset)
continue;
if (!bdev) {
+ if (bdev_p)
+ *bdev_p = sis->bdev;
+
spin_unlock(&swap_lock);
return i;
}
@@ -459,6 +462,9 @@ int swap_type_of(dev_t device, sector_t offset)
se = list_entry(sis->extent_list.next,
struct swap_extent, list);
if (se->start_block == offset) {
+ if (bdev_p)
+ *bdev_p = sis->bdev;
+
spin_unlock(&swap_lock);
bdput(bdev);
return i;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 40fea4918390..7430df68cb64 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1406,6 +1406,16 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
return ret;
}
+static unsigned long count_lru_pages(void)
+{
+ struct zone *zone;
+ unsigned long ret = 0;
+
+ for_each_zone(zone)
+ ret += zone->nr_active + zone->nr_inactive;
+ return ret;
+}
+
/*
* Try to free `nr_pages' of memory, system-wide, and return the number of
* freed pages.
@@ -1420,7 +1430,6 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
unsigned long ret = 0;
int pass;
struct reclaim_state reclaim_state;
- struct zone *zone;
struct scan_control sc = {
.gfp_mask = GFP_KERNEL,
.may_swap = 0,
@@ -1431,10 +1440,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
current->reclaim_state = &reclaim_state;
- lru_pages = 0;
- for_each_zone(zone)
- lru_pages += zone->nr_active + zone->nr_inactive;
-
+ lru_pages = count_lru_pages();
nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
/* If slab caches are huge, it's better to hit them first */
while (nr_slab >= lru_pages) {
@@ -1461,13 +1467,6 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
for (pass = 0; pass < 5; pass++) {
int prio;
- /* Needed for shrinking slab caches later on */
- if (!lru_pages)
- for_each_zone(zone) {
- lru_pages += zone->nr_active;
- lru_pages += zone->nr_inactive;
- }
-
/* Force reclaiming mapped pages in the passes #3 and #4 */
if (pass > 2) {
sc.may_swap = 1;
@@ -1483,7 +1482,8 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
goto out;
reclaim_state.reclaimed_slab = 0;
- shrink_slab(sc.nr_scanned, sc.gfp_mask, lru_pages);
+ shrink_slab(sc.nr_scanned, sc.gfp_mask,
+ count_lru_pages());
ret += reclaim_state.reclaimed_slab;
if (ret >= nr_pages)
goto out;
@@ -1491,20 +1491,19 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
if (sc.nr_scanned && prio < DEF_PRIORITY - 2)
congestion_wait(WRITE, HZ / 10);
}
-
- lru_pages = 0;
}
/*
* If ret = 0, we could not shrink LRUs, but there may be something
* in slab caches
*/
- if (!ret)
+ if (!ret) {
do {
reclaim_state.reclaimed_slab = 0;
- shrink_slab(nr_pages, sc.gfp_mask, lru_pages);
+ shrink_slab(nr_pages, sc.gfp_mask, count_lru_pages());
ret += reclaim_state.reclaimed_slab;
} while (ret < nr_pages && reclaim_state.reclaimed_slab > 0);
+ }
out:
current->reclaim_state = NULL;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index bee558a41800..6c84ccb8c9d7 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -610,7 +610,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
struct ebt_entry_target *t;
struct ebt_target *target;
unsigned int i, j, hook = 0, hookmask = 0;
- size_t gap = e->next_offset - e->target_offset;
+ size_t gap;
int ret;
/* don't mess with the struct ebt_entries */
@@ -660,6 +660,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo,
if (ret != 0)
goto cleanup_watchers;
t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
+ gap = e->next_offset - e->target_offset;
target = find_target_lock(t->u.name, &ret, &ebt_mutex);
if (!target)
goto cleanup_watchers;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 1897a3a385d8..04d4b93c68eb 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -148,6 +148,7 @@
#include <linux/seq_file.h>
#include <linux/wait.h>
#include <linux/etherdevice.h>
+#include <linux/kthread.h>
#include <net/checksum.h>
#include <net/ipv6.h>
#include <net/addrconf.h>
@@ -360,8 +361,7 @@ struct pktgen_thread {
spinlock_t if_lock;
struct list_head if_list; /* All device here */
struct list_head th_list;
- int removed;
- char name[32];
+ struct task_struct *tsk;
char result[512];
u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */
@@ -1689,7 +1689,7 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
BUG_ON(!t);
seq_printf(seq, "Name: %s max_before_softirq: %d\n",
- t->name, t->max_before_softirq);
+ t->tsk->comm, t->max_before_softirq);
seq_printf(seq, "Running: ");
@@ -3112,7 +3112,7 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
{
/* Remove from the thread list */
- remove_proc_entry(t->name, pg_proc_dir);
+ remove_proc_entry(t->tsk->comm, pg_proc_dir);
mutex_lock(&pktgen_thread_lock);
@@ -3260,58 +3260,40 @@ out:;
* Main loop of the thread goes here
*/
-static void pktgen_thread_worker(struct pktgen_thread *t)
+static int pktgen_thread_worker(void *arg)
{
DEFINE_WAIT(wait);
+ struct pktgen_thread *t = arg;
struct pktgen_dev *pkt_dev = NULL;
int cpu = t->cpu;
- sigset_t tmpsig;
u32 max_before_softirq;
u32 tx_since_softirq = 0;
- daemonize("pktgen/%d", cpu);
-
- /* Block all signals except SIGKILL, SIGSTOP and SIGTERM */
-
- spin_lock_irq(&current->sighand->siglock);
- tmpsig = current->blocked;
- siginitsetinv(&current->blocked,
- sigmask(SIGKILL) | sigmask(SIGSTOP) | sigmask(SIGTERM));
-
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
-
- /* Migrate to the right CPU */
- set_cpus_allowed(current, cpumask_of_cpu(cpu));
- if (smp_processor_id() != cpu)
- BUG();
+ BUG_ON(smp_processor_id() != cpu);
init_waitqueue_head(&t->queue);
- t->control &= ~(T_TERMINATE);
- t->control &= ~(T_RUN);
- t->control &= ~(T_STOP);
- t->control &= ~(T_REMDEVALL);
- t->control &= ~(T_REMDEV);
-
t->pid = current->pid;
PG_DEBUG(printk("pktgen: starting pktgen/%d: pid=%d\n", cpu, current->pid));
max_before_softirq = t->max_before_softirq;
- __set_current_state(TASK_INTERRUPTIBLE);
- mb();
+ set_current_state(TASK_INTERRUPTIBLE);
- while (1) {
-
- __set_current_state(TASK_RUNNING);
+ while (!kthread_should_stop()) {
+ pkt_dev = next_to_run(t);
- /*
- * Get next dev to xmit -- if any.
- */
+ if (!pkt_dev &&
+ (t->control & (T_STOP | T_RUN | T_REMDEVALL | T_REMDEV))
+ == 0) {
+ prepare_to_wait(&(t->queue), &wait,
+ TASK_INTERRUPTIBLE);
+ schedule_timeout(HZ / 10);
+ finish_wait(&(t->queue), &wait);
+ }
- pkt_dev = next_to_run(t);
+ __set_current_state(TASK_RUNNING);
if (pkt_dev) {
@@ -3329,21 +3311,8 @@ static void pktgen_thread_worker(struct pktgen_thread *t)
do_softirq();
tx_since_softirq = 0;
}
- } else {
- prepare_to_wait(&(t->queue), &wait, TASK_INTERRUPTIBLE);
- schedule_timeout(HZ / 10);
- finish_wait(&(t->queue), &wait);
}
- /*
- * Back from sleep, either due to the timeout or signal.
- * We check if we have any "posted" work for us.
- */
-
- if (t->control & T_TERMINATE || signal_pending(current))
- /* we received a request to terminate ourself */
- break;
-
if (t->control & T_STOP) {
pktgen_stop(t);
t->control &= ~(T_STOP);
@@ -3364,20 +3333,19 @@ static void pktgen_thread_worker(struct pktgen_thread *t)
t->control &= ~(T_REMDEV);
}
- if (need_resched())
- schedule();
+ set_current_state(TASK_INTERRUPTIBLE);
}
- PG_DEBUG(printk("pktgen: %s stopping all device\n", t->name));
+ PG_DEBUG(printk("pktgen: %s stopping all device\n", t->tsk->comm));
pktgen_stop(t);
- PG_DEBUG(printk("pktgen: %s removing all device\n", t->name));
+ PG_DEBUG(printk("pktgen: %s removing all device\n", t->tsk->comm));
pktgen_rem_all_ifs(t);
- PG_DEBUG(printk("pktgen: %s removing thread.\n", t->name));
+ PG_DEBUG(printk("pktgen: %s removing thread.\n", t->tsk->comm));
pktgen_rem_thread(t);
- t->removed = 1;
+ return 0;
}
static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
@@ -3495,37 +3463,11 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
return add_dev_to_thread(t, pkt_dev);
}
-static struct pktgen_thread *__init pktgen_find_thread(const char *name)
+static int __init pktgen_create_thread(int cpu)
{
struct pktgen_thread *t;
-
- mutex_lock(&pktgen_thread_lock);
-
- list_for_each_entry(t, &pktgen_threads, th_list)
- if (strcmp(t->name, name) == 0) {
- mutex_unlock(&pktgen_thread_lock);
- return t;
- }
-
- mutex_unlock(&pktgen_thread_lock);
- return NULL;
-}
-
-static int __init pktgen_create_thread(const char *name, int cpu)
-{
- int err;
- struct pktgen_thread *t = NULL;
struct proc_dir_entry *pe;
-
- if (strlen(name) > 31) {
- printk("pktgen: ERROR: Thread name cannot be more than 31 characters.\n");
- return -EINVAL;
- }
-
- if (pktgen_find_thread(name)) {
- printk("pktgen: ERROR: thread: %s already exists\n", name);
- return -EINVAL;
- }
+ struct task_struct *p;
t = kzalloc(sizeof(struct pktgen_thread), GFP_KERNEL);
if (!t) {
@@ -3533,14 +3475,29 @@ static int __init pktgen_create_thread(const char *name, int cpu)
return -ENOMEM;
}
- strcpy(t->name, name);
spin_lock_init(&t->if_lock);
t->cpu = cpu;
- pe = create_proc_entry(t->name, 0600, pg_proc_dir);
+ INIT_LIST_HEAD(&t->if_list);
+
+ list_add_tail(&t->th_list, &pktgen_threads);
+
+ p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu);
+ if (IS_ERR(p)) {
+ printk("pktgen: kernel_thread() failed for cpu %d\n", t->cpu);
+ list_del(&t->th_list);
+ kfree(t);
+ return PTR_ERR(p);
+ }
+ kthread_bind(p, cpu);
+ t->tsk = p;
+
+ pe = create_proc_entry(t->tsk->comm, 0600, pg_proc_dir);
if (!pe) {
printk("pktgen: cannot create %s/%s procfs entry.\n",
- PG_PROC_DIR, t->name);
+ PG_PROC_DIR, t->tsk->comm);
+ kthread_stop(p);
+ list_del(&t->th_list);
kfree(t);
return -EINVAL;
}
@@ -3548,21 +3505,7 @@ static int __init pktgen_create_thread(const char *name, int cpu)
pe->proc_fops = &pktgen_thread_fops;
pe->data = t;
- INIT_LIST_HEAD(&t->if_list);
-
- list_add_tail(&t->th_list, &pktgen_threads);
-
- t->removed = 0;
-
- err = kernel_thread((void *)pktgen_thread_worker, (void *)t,
- CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
- if (err < 0) {
- printk("pktgen: kernel_thread() failed for cpu %d\n", t->cpu);
- remove_proc_entry(t->name, pg_proc_dir);
- list_del(&t->th_list);
- kfree(t);
- return err;
- }
+ wake_up_process(p);
return 0;
}
@@ -3643,10 +3586,8 @@ static int __init pg_init(void)
for_each_online_cpu(cpu) {
int err;
- char buf[30];
- sprintf(buf, "kpktgend_%i", cpu);
- err = pktgen_create_thread(buf, cpu);
+ err = pktgen_create_thread(cpu);
if (err)
printk("pktgen: WARNING: Cannot create thread for cpu %d (%d)\n",
cpu, err);
@@ -3674,9 +3615,8 @@ static void __exit pg_cleanup(void)
list_for_each_safe(q, n, &pktgen_threads) {
t = list_entry(q, struct pktgen_thread, th_list);
- t->control |= (T_TERMINATE);
-
- wait_event_interruptible_timeout(queue, (t->removed == 1), HZ);
+ kthread_stop(t->tsk);
+ kfree(t);
}
/* Un-register us from receiving netdevice events */
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 84bed40273ad..25c8a42965df 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -165,9 +165,8 @@ struct in_device *inetdev_init(struct net_device *dev)
NET_IPV4_NEIGH, "ipv4", NULL, NULL);
#endif
- /* Account for reference dev->ip_ptr */
+ /* Account for reference dev->ip_ptr (below) */
in_dev_hold(in_dev);
- rcu_assign_pointer(dev->ip_ptr, in_dev);
#ifdef CONFIG_SYSCTL
devinet_sysctl_register(in_dev, &in_dev->cnf);
@@ -176,6 +175,8 @@ struct in_device *inetdev_init(struct net_device *dev)
if (dev->flags & IFF_UP)
ip_mc_up(in_dev);
out:
+ /* we can receive as soon as ip_ptr is set -- do this last */
+ rcu_assign_pointer(dev->ip_ptr, in_dev);
return in_dev;
out_kfree:
kfree(in_dev);
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index a68966059b50..c47ce7076bd5 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -15,16 +15,19 @@ int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
struct flowi fl = {};
struct dst_entry *odst;
unsigned int hh_len;
+ unsigned int type;
+ type = inet_addr_type(iph->saddr);
if (addr_type == RTN_UNSPEC)
- addr_type = inet_addr_type(iph->saddr);
+ addr_type = type;
/* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
* packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
*/
if (addr_type == RTN_LOCAL) {
fl.nl_u.ip4_u.daddr = iph->daddr;
- fl.nl_u.ip4_u.saddr = iph->saddr;
+ if (type == RTN_LOCAL)
+ fl.nl_u.ip4_u.saddr = iph->saddr;
fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0;
fl.mark = (*pskb)->mark;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index f6026d4ac428..47bd3ad18b71 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -6,8 +6,8 @@ menu "IP: Netfilter Configuration"
depends on INET && NETFILTER
config NF_CONNTRACK_IPV4
- tristate "IPv4 connection tracking support (required for NAT) (EXPERIMENTAL)"
- depends on EXPERIMENTAL && NF_CONNTRACK
+ tristate "IPv4 connection tracking support (required for NAT)"
+ depends on NF_CONNTRACK
---help---
Connection tracking keeps a record of what packets have passed
through your machine, in order to figure out how they are related
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 09696f16aa95..fc1f153c86ba 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -919,13 +919,13 @@ copy_entries_to_user(unsigned int total_size,
#ifdef CONFIG_COMPAT
struct compat_delta {
struct compat_delta *next;
- u_int16_t offset;
+ unsigned int offset;
short delta;
};
static struct compat_delta *compat_offsets = NULL;
-static int compat_add_offset(u_int16_t offset, short delta)
+static int compat_add_offset(unsigned int offset, short delta)
{
struct compat_delta *tmp;
@@ -957,7 +957,7 @@ static void compat_flush_offsets(void)
}
}
-static short compat_calc_jump(u_int16_t offset)
+static short compat_calc_jump(unsigned int offset)
{
struct compat_delta *tmp;
short delta;
@@ -997,7 +997,7 @@ static int compat_calc_entry(struct ipt_entry *e, struct xt_table_info *info,
void *base, struct xt_table_info *newinfo)
{
struct ipt_entry_target *t;
- u_int16_t entry_offset;
+ unsigned int entry_offset;
int off, i, ret;
off = 0;
@@ -1467,7 +1467,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e,
{
struct ipt_entry_target *t;
struct ipt_target *target;
- u_int16_t entry_offset;
+ unsigned int entry_offset;
int ret, off, h, j;
duprintf("check_compat_entry_size_and_hooks %p\n", e);
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 28b9233956b5..d669685afd04 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -127,10 +127,13 @@ masquerade_target(struct sk_buff **pskb,
static inline int
device_cmp(struct ip_conntrack *i, void *ifindex)
{
+ int ret;
#ifdef CONFIG_NF_NAT_NEEDED
struct nf_conn_nat *nat = nfct_nat(i);
+
+ if (!nat)
+ return 0;
#endif
- int ret;
read_lock_bh(&masq_lock);
#ifdef CONFIG_NF_NAT_NEEDED
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 9b0a90643151..171e5b55d7d6 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -413,8 +413,6 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
if (netif_carrier_ok(dev))
ndev->if_flags |= IF_READY;
- /* protected by rtnl_lock */
- rcu_assign_pointer(dev->ip6_ptr, ndev);
ipv6_mc_init_dev(ndev);
ndev->tstamp = jiffies;
@@ -425,6 +423,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
NULL);
addrconf_sysctl_register(ndev, &ndev->cnf);
#endif
+ /* protected by rtnl_lock */
+ rcu_assign_pointer(dev->ip6_ptr, ndev);
return ndev;
}
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 1b853c34d301..cd10e44db015 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -44,8 +44,7 @@ choice
depends on NF_CONNTRACK_ENABLED
config NF_CONNTRACK_SUPPORT
- bool "Layer 3 Independent Connection tracking (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ bool "Layer 3 Independent Connection tracking"
help
Layer 3 independent connection tracking is experimental scheme
which generalize ip_conntrack to support other layer 3 protocols.
@@ -122,7 +121,7 @@ config NF_CONNTRACK_EVENTS
config NF_CT_PROTO_GRE
tristate
- depends on EXPERIMENTAL && NF_CONNTRACK
+ depends on NF_CONNTRACK
config NF_CT_PROTO_SCTP
tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
@@ -136,8 +135,8 @@ config NF_CT_PROTO_SCTP
Documentation/modules.txt. If unsure, say `N'.
config NF_CONNTRACK_AMANDA
- tristate "Amanda backup protocol support (EXPERIMENTAL)"
- depends on EXPERIMENTAL && NF_CONNTRACK
+ tristate "Amanda backup protocol support"
+ depends on NF_CONNTRACK
select TEXTSEARCH
select TEXTSEARCH_KMP
help
@@ -151,8 +150,8 @@ config NF_CONNTRACK_AMANDA
To compile it as a module, choose M here. If unsure, say N.
config NF_CONNTRACK_FTP
- tristate "FTP protocol support (EXPERIMENTAL)"
- depends on EXPERIMENTAL && NF_CONNTRACK
+ tristate "FTP protocol support"
+ depends on NF_CONNTRACK
help
Tracking FTP connections is problematic: special helpers are
required for tracking them, and doing masquerading and other forms
@@ -184,8 +183,8 @@ config NF_CONNTRACK_H323
To compile it as a module, choose M here. If unsure, say N.
config NF_CONNTRACK_IRC
- tristate "IRC protocol support (EXPERIMENTAL)"
- depends on EXPERIMENTAL && NF_CONNTRACK
+ tristate "IRC protocol support"
+ depends on NF_CONNTRACK
help
There is a commonly-used extension to IRC called
Direct Client-to-Client Protocol (DCC). This enables users to send
@@ -218,8 +217,8 @@ config NF_CONNTRACK_NETBIOS_NS
To compile it as a module, choose M here. If unsure, say N.
config NF_CONNTRACK_PPTP
- tristate "PPtP protocol support (EXPERIMENTAL)"
- depends on EXPERIMENTAL && NF_CONNTRACK
+ tristate "PPtP protocol support"
+ depends on NF_CONNTRACK
select NF_CT_PROTO_GRE
help
This module adds support for PPTP (Point to Point Tunnelling
@@ -249,8 +248,8 @@ config NF_CONNTRACK_SIP
To compile it as a module, choose M here. If unsure, say N.
config NF_CONNTRACK_TFTP
- tristate "TFTP protocol support (EXPERIMENTAL)"
- depends on EXPERIMENTAL && NF_CONNTRACK
+ tristate "TFTP protocol support"
+ depends on NF_CONNTRACK
help
TFTP connection tracking helper, this is required depending
on how restrictive your ruleset is.
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index a5a6e192ac2d..f28bf69d3d42 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -745,7 +745,7 @@ static int __init xt_hashlimit_init(void)
}
hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", proc_net);
if (!hashlimit_procdir6) {
- printk(KERN_ERR "xt_hashlimit: tnable to create proc dir "
+ printk(KERN_ERR "xt_hashlimit: unable to create proc dir "
"entry\n");
goto err4;
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 276131fe56dd..383dd4e82ee1 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -472,8 +472,7 @@ static int netlink_release(struct socket *sock)
NETLINK_URELEASE, &n);
}
- if (nlk->module)
- module_put(nlk->module);
+ module_put(nlk->module);
netlink_table_grab();
if (nlk->flags & NETLINK_KERNEL_SOCKET) {
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 52a2726d327f..b5c80b189902 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -484,8 +484,6 @@ out:
return sk;
}
-void x25_init_timers(struct sock *sk);
-
static int x25_create(struct socket *sock, int protocol)
{
struct sock *sk;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index e5372b11fc8f..82f36d396fca 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -434,18 +434,19 @@ error_no_put:
return NULL;
}
-static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct rtattr **xfrma)
{
struct xfrm_usersa_info *p = NLMSG_DATA(nlh);
struct xfrm_state *x;
int err;
struct km_event c;
- err = verify_newsa_info(p, (struct rtattr **)xfrma);
+ err = verify_newsa_info(p, xfrma);
if (err)
return err;
- x = xfrm_state_construct(p, (struct rtattr **)xfrma, &err);
+ x = xfrm_state_construct(p, xfrma, &err);
if (!x)
return err;
@@ -507,14 +508,15 @@ static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p,
return x;
}
-static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct rtattr **xfrma)
{
struct xfrm_state *x;
int err = -ESRCH;
struct km_event c;
struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
- x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err);
+ x = xfrm_user_state_lookup(p, xfrma, &err);
if (x == NULL)
return err;
@@ -672,14 +674,15 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb,
return skb;
}
-static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct rtattr **xfrma)
{
struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
struct xfrm_state *x;
struct sk_buff *resp_skb;
int err = -ESRCH;
- x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err);
+ x = xfrm_user_state_lookup(p, xfrma, &err);
if (x == NULL)
goto out_noput;
@@ -718,7 +721,8 @@ static int verify_userspi_info(struct xfrm_userspi_info *p)
return 0;
}
-static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct rtattr **xfrma)
{
struct xfrm_state *x;
struct xfrm_userspi_info *p;
@@ -1013,7 +1017,8 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p,
return NULL;
}
-static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct rtattr **xfrma)
{
struct xfrm_userpolicy_info *p = NLMSG_DATA(nlh);
struct xfrm_policy *xp;
@@ -1024,11 +1029,11 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
err = verify_newpolicy_info(p);
if (err)
return err;
- err = verify_sec_ctx_len((struct rtattr **)xfrma);
+ err = verify_sec_ctx_len(xfrma);
if (err)
return err;
- xp = xfrm_policy_construct(p, (struct rtattr **)xfrma, &err);
+ xp = xfrm_policy_construct(p, xfrma, &err);
if (!xp)
return err;
@@ -1227,7 +1232,8 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb,
return skb;
}
-static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct rtattr **xfrma)
{
struct xfrm_policy *xp;
struct xfrm_userpolicy_id *p;
@@ -1239,7 +1245,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
p = NLMSG_DATA(nlh);
delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY;
- err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma);
+ err = copy_from_user_policy_type(&type, xfrma);
if (err)
return err;
@@ -1250,11 +1256,10 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
if (p->index)
xp = xfrm_policy_byid(type, p->dir, p->index, delete);
else {
- struct rtattr **rtattrs = (struct rtattr **)xfrma;
- struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1];
+ struct rtattr *rt = xfrma[XFRMA_SEC_CTX-1];
struct xfrm_policy tmp;
- err = verify_sec_ctx_len(rtattrs);
+ err = verify_sec_ctx_len(xfrma);
if (err)
return err;
@@ -1302,7 +1307,8 @@ out:
return err;
}
-static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct rtattr **xfrma)
{
struct km_event c;
struct xfrm_usersa_flush *p = NLMSG_DATA(nlh);
@@ -1367,7 +1373,8 @@ nlmsg_failure:
return -1;
}
-static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct rtattr **xfrma)
{
struct xfrm_state *x;
struct sk_buff *r_skb;
@@ -1415,7 +1422,8 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
return err;
}
-static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct rtattr **xfrma)
{
struct xfrm_state *x;
struct km_event c;
@@ -1439,7 +1447,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
goto out;
spin_lock_bh(&x->lock);
- err = xfrm_update_ae_params(x,(struct rtattr **)xfrma);
+ err = xfrm_update_ae_params(x, xfrma);
spin_unlock_bh(&x->lock);
if (err < 0)
goto out;
@@ -1455,14 +1463,15 @@ out:
return err;
}
-static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct rtattr **xfrma)
{
struct km_event c;
u8 type = XFRM_POLICY_TYPE_MAIN;
int err;
struct xfrm_audit audit_info;
- err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma);
+ err = copy_from_user_policy_type(&type, xfrma);
if (err)
return err;
@@ -1477,7 +1486,8 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **x
return 0;
}
-static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct rtattr **xfrma)
{
struct xfrm_policy *xp;
struct xfrm_user_polexpire *up = NLMSG_DATA(nlh);
@@ -1485,18 +1495,17 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void *
u8 type = XFRM_POLICY_TYPE_MAIN;
int err = -ENOENT;
- err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma);
+ err = copy_from_user_policy_type(&type, xfrma);
if (err)
return err;
if (p->index)
xp = xfrm_policy_byid(type, p->dir, p->index, 0);
else {
- struct rtattr **rtattrs = (struct rtattr **)xfrma;
- struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1];
+ struct rtattr *rt = xfrma[XFRMA_SEC_CTX-1];
struct xfrm_policy tmp;
- err = verify_sec_ctx_len(rtattrs);
+ err = verify_sec_ctx_len(xfrma);
if (err)
return err;
@@ -1537,7 +1546,8 @@ out:
return err;
}
-static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct rtattr **xfrma)
{
struct xfrm_state *x;
int err;
@@ -1568,7 +1578,8 @@ out:
return err;
}
-static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
+static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct rtattr **xfrma)
{
struct xfrm_policy *xp;
struct xfrm_user_tmpl *ut;
@@ -1647,7 +1658,7 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
#undef XMSGSIZE
static struct xfrm_link {
- int (*doit)(struct sk_buff *, struct nlmsghdr *, void **);
+ int (*doit)(struct sk_buff *, struct nlmsghdr *, struct rtattr **);
int (*dump)(struct sk_buff *, struct netlink_callback *);
} xfrm_dispatch[XFRM_NR_MSGTYPES] = {
[XFRM_MSG_NEWSA - XFRM_MSG_BASE] = { .doit = xfrm_add_sa },
@@ -1735,7 +1746,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err
if (link->doit == NULL)
goto err_einval;
- *errp = link->doit(skb, nlh, (void **) &xfrma);
+ *errp = link->doit(skb, nlh, xfrma);
return *errp;
diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc
index 0b2fcc417f59..a8ffc329666a 100644
--- a/scripts/kconfig/qconf.cc
+++ b/scripts/kconfig/qconf.cc
@@ -925,6 +925,8 @@ ConfigInfoView::ConfigInfoView(QWidget* parent, const char *name)
configSettings->endGroup();
connect(configApp, SIGNAL(aboutToQuit()), SLOT(saveSettings()));
}
+
+ has_dbg_info = 0;
}
void ConfigInfoView::saveSettings(void)
@@ -953,10 +955,13 @@ void ConfigInfoView::setInfo(struct menu *m)
if (menu == m)
return;
menu = m;
- if (!menu)
+ if (!menu) {
+ has_dbg_info = 0;
clear();
- else
+ } else {
+ has_dbg_info = 1;
menuInfo();
+ }
}
void ConfigInfoView::setSource(const QString& name)
@@ -991,6 +996,9 @@ void ConfigInfoView::symbolInfo(void)
{
QString str;
+ if (!has_dbg_info)
+ return;
+
str += "<big>Symbol: <b>";
str += print_filter(sym->name);
str += "</b></big><br><br>value: ";
diff --git a/scripts/kconfig/qconf.h b/scripts/kconfig/qconf.h
index 6fc1c5f14425..a397edb5adcf 100644
--- a/scripts/kconfig/qconf.h
+++ b/scripts/kconfig/qconf.h
@@ -273,6 +273,8 @@ protected:
struct symbol *sym;
struct menu *menu;
bool _showDebug;
+
+ int has_dbg_info;
};
class ConfigSearchWindow : public QDialog {
diff --git a/sound/sparc/cs4231.c b/sound/sparc/cs4231.c
index edeb3d3c4c7e..f5956d557f70 100644
--- a/sound/sparc/cs4231.c
+++ b/sound/sparc/cs4231.c
@@ -1268,7 +1268,7 @@ static struct snd_pcm_hardware snd_cs4231_playback =
.channels_min = 1,
.channels_max = 2,
.buffer_bytes_max = (32*1024),
- .period_bytes_min = 4096,
+ .period_bytes_min = 64,
.period_bytes_max = (32*1024),
.periods_min = 1,
.periods_max = 1024,
@@ -1288,7 +1288,7 @@ static struct snd_pcm_hardware snd_cs4231_capture =
.channels_min = 1,
.channels_max = 2,
.buffer_bytes_max = (32*1024),
- .period_bytes_min = 4096,
+ .period_bytes_min = 64,
.period_bytes_max = (32*1024),
.periods_min = 1,
.periods_max = 1024,
@@ -1796,7 +1796,7 @@ static irqreturn_t snd_cs4231_sbus_interrupt(int irq, void *dev_id)
snd_cs4231_outm(chip, CS4231_IRQ_STATUS, ~CS4231_ALL_IRQS | ~status, 0);
spin_unlock_irqrestore(&chip->lock, flags);
- return 0;
+ return IRQ_HANDLED;
}
/*
@@ -1821,7 +1821,6 @@ static int sbus_dma_request(struct cs4231_dma_control *dma_cont, dma_addr_t bus_
if (!(csr & test))
goto out;
err = -EBUSY;
- csr = sbus_readl(base->regs + APCCSR);
test = APC_XINT_CNVA;
if ( base->dir == APC_PLAY )
test = APC_XINT_PNVA;
@@ -1862,17 +1861,16 @@ static void sbus_dma_enable(struct cs4231_dma_control *dma_cont, int on)
spin_lock_irqsave(&base->lock, flags);
if (!on) {
- if (base->dir == APC_PLAY) {
- sbus_writel(0, base->regs + base->dir + APCNVA);
- sbus_writel(1, base->regs + base->dir + APCC);
- }
- else
- {
- sbus_writel(0, base->regs + base->dir + APCNC);
- sbus_writel(0, base->regs + base->dir + APCVA);
- }
+ sbus_writel(0, base->regs + base->dir + APCNC);
+ sbus_writel(0, base->regs + base->dir + APCNVA);
+ sbus_writel(0, base->regs + base->dir + APCC);
+ sbus_writel(0, base->regs + base->dir + APCVA);
+
+ /* ACK any APC interrupts. */
+ csr = sbus_readl(base->regs + APCCSR);
+ sbus_writel(csr, base->regs + APCCSR);
}
- udelay(600);
+ udelay(1000);
csr = sbus_readl(base->regs + APCCSR);
shift = 0;
if ( base->dir == APC_PLAY )