aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/bootmem.h100
-rw-r--r--include/linux/compiler.h5
-rw-r--r--include/linux/console.h5
-rw-r--r--include/linux/cpu.h8
-rw-r--r--include/linux/device.h99
-rw-r--r--include/linux/elf-em.h1
-rw-r--r--include/linux/elfnote.h90
-rw-r--r--include/linux/gfp.h36
-rw-r--r--include/linux/highmem.h5
-rw-r--r--include/linux/irq.h6
-rw-r--r--include/linux/kernel.h1
-rw-r--r--include/linux/kobject.h16
-rw-r--r--include/linux/mempolicy.h4
-rw-r--r--include/linux/mm.h128
-rw-r--r--include/linux/mmzone.h120
-rw-r--r--include/linux/page-flags.h35
-rw-r--r--include/linux/pagemap.h15
-rw-r--r--include/linux/pci.h36
-rw-r--r--include/linux/percpu.h89
-rw-r--r--include/linux/platform_device.h2
-rw-r--r--include/linux/pm.h63
-rw-r--r--include/linux/resume-trace.h24
-rw-r--r--include/linux/rmap.h14
-rw-r--r--include/linux/selinux.h29
-rw-r--r--include/linux/slab.h29
-rw-r--r--include/linux/smp.h3
-rw-r--r--include/linux/suspend.h32
-rw-r--r--include/linux/swap.h12
-rw-r--r--include/linux/sysctl.h1
-rw-r--r--include/linux/sysfs.h28
-rw-r--r--include/linux/vmalloc.h2
-rw-r--r--include/linux/vmstat.h18
-rw-r--r--include/linux/writeback.h1
33 files changed, 728 insertions, 329 deletions
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index e319c649e4fd..31e9abb6d977 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -4,11 +4,8 @@
#ifndef _LINUX_BOOTMEM_H
#define _LINUX_BOOTMEM_H
-#include <asm/pgtable.h>
-#include <asm/dma.h>
-#include <linux/cache.h>
-#include <linux/init.h>
#include <linux/mmzone.h>
+#include <asm/dma.h>
/*
* simple boot-time physical memory area allocator.
@@ -41,45 +38,64 @@ typedef struct bootmem_data {
struct list_head list;
} bootmem_data_t;
-extern unsigned long __init bootmem_bootmap_pages (unsigned long);
-extern unsigned long __init init_bootmem (unsigned long addr, unsigned long memend);
-extern void __init free_bootmem (unsigned long addr, unsigned long size);
-extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal);
-extern void * __init __alloc_bootmem_nopanic (unsigned long size, unsigned long align, unsigned long goal);
-extern void * __init __alloc_bootmem_low(unsigned long size,
- unsigned long align,
- unsigned long goal);
-extern void * __init __alloc_bootmem_low_node(pg_data_t *pgdat,
- unsigned long size,
- unsigned long align,
- unsigned long goal);
-extern void * __init __alloc_bootmem_core(struct bootmem_data *bdata,
- unsigned long size, unsigned long align, unsigned long goal,
- unsigned long limit);
+extern unsigned long bootmem_bootmap_pages(unsigned long);
+extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
+extern void free_bootmem(unsigned long addr, unsigned long size);
+extern void *__alloc_bootmem(unsigned long size,
+ unsigned long align,
+ unsigned long goal);
+extern void *__alloc_bootmem_nopanic(unsigned long size,
+ unsigned long align,
+ unsigned long goal);
+extern void *__alloc_bootmem_low(unsigned long size,
+ unsigned long align,
+ unsigned long goal);
+extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
+ unsigned long size,
+ unsigned long align,
+ unsigned long goal);
+extern void *__alloc_bootmem_core(struct bootmem_data *bdata,
+ unsigned long size,
+ unsigned long align,
+ unsigned long goal,
+ unsigned long limit);
+
#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
-extern void __init reserve_bootmem (unsigned long addr, unsigned long size);
+extern void reserve_bootmem(unsigned long addr, unsigned long size);
#define alloc_bootmem(x) \
- __alloc_bootmem((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+ __alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_low(x) \
- __alloc_bootmem_low((x), SMP_CACHE_BYTES, 0)
+ __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
#define alloc_bootmem_pages(x) \
- __alloc_bootmem((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+ __alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_low_pages(x) \
- __alloc_bootmem_low((x), PAGE_SIZE, 0)
+ __alloc_bootmem_low(x, PAGE_SIZE, 0)
#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
-extern unsigned long __init free_all_bootmem (void);
-extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal);
-extern unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn);
-extern void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size);
-extern void __init free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size);
-extern unsigned long __init free_all_bootmem_node (pg_data_t *pgdat);
+
+extern unsigned long free_all_bootmem(void);
+extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
+extern void *__alloc_bootmem_node(pg_data_t *pgdat,
+ unsigned long size,
+ unsigned long align,
+ unsigned long goal);
+extern unsigned long init_bootmem_node(pg_data_t *pgdat,
+ unsigned long freepfn,
+ unsigned long startpfn,
+ unsigned long endpfn);
+extern void reserve_bootmem_node(pg_data_t *pgdat,
+ unsigned long physaddr,
+ unsigned long size);
+extern void free_bootmem_node(pg_data_t *pgdat,
+ unsigned long addr,
+ unsigned long size);
+
#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
#define alloc_bootmem_node(pgdat, x) \
- __alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+ __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_pages_node(pgdat, x) \
- __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+ __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_low_pages_node(pgdat, x) \
- __alloc_bootmem_low_node((pgdat), (x), PAGE_SIZE, 0)
+ __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
@@ -89,19 +105,19 @@ static inline void *alloc_remap(int nid, unsigned long size)
{
return NULL;
}
-#endif
+#endif /* CONFIG_HAVE_ARCH_ALLOC_REMAP */
extern unsigned long __meminitdata nr_kernel_pages;
extern unsigned long nr_all_pages;
-extern void *__init alloc_large_system_hash(const char *tablename,
- unsigned long bucketsize,
- unsigned long numentries,
- int scale,
- int flags,
- unsigned int *_hash_shift,
- unsigned int *_hash_mask,
- unsigned long limit);
+extern void *alloc_large_system_hash(const char *tablename,
+ unsigned long bucketsize,
+ unsigned long numentries,
+ int scale,
+ int flags,
+ unsigned int *_hash_shift,
+ unsigned int *_hash_mask,
+ unsigned long limit);
#define HASH_HIGHMEM 0x00000001 /* Consider highmem? */
#define HASH_EARLY 0x00000002 /* Allocating during early boot? */
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 9b4f11094937..060b96112ec6 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -99,6 +99,11 @@ extern void __chk_io_ptr(void __iomem *);
#define __must_check
#endif
+#ifndef CONFIG_ENABLE_MUST_CHECK
+#undef __must_check
+#define __must_check
+#endif
+
/*
* Allow us to avoid 'defined but not used' warnings on functions and data,
* as well as force them to be emitted to the assembly file.
diff --git a/include/linux/console.h b/include/linux/console.h
index 3bdf2155e565..76a1807726eb 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -120,9 +120,14 @@ extern void console_stop(struct console *);
extern void console_start(struct console *);
extern int is_console_locked(void);
+#ifndef CONFIG_DISABLE_CONSOLE_SUSPEND
/* Suspend and resume console messages over PM events */
extern void suspend_console(void);
extern void resume_console(void);
+#else
+static inline void suspend_console(void) {}
+static inline void resume_console(void) {}
+#endif /* CONFIG_DISABLE_CONSOLE_SUSPEND */
/* Some debug stub to catch some of the obvious races in the VT code */
#if 1
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 8fb344a9abd8..3fef7d67aedc 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -89,4 +89,12 @@ int cpu_down(unsigned int cpu);
static inline int cpu_is_offline(int cpu) { return 0; }
#endif
+#ifdef CONFIG_SUSPEND_SMP
+extern int disable_nonboot_cpus(void);
+extern void enable_nonboot_cpus(void);
+#else
+static inline int disable_nonboot_cpus(void) { return 0; }
+static inline void enable_nonboot_cpus(void) {}
+#endif
+
#endif /* _LINUX_CPU_H_ */
diff --git a/include/linux/device.h b/include/linux/device.h
index 1e5f30da98bc..662e6a10144e 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -15,6 +15,7 @@
#include <linux/kobject.h>
#include <linux/klist.h>
#include <linux/list.h>
+#include <linux/compiler.h>
#include <linux/types.h>
#include <linux/module.h>
#include <linux/pm.h>
@@ -51,14 +52,17 @@ struct bus_type {
int (*probe)(struct device * dev);
int (*remove)(struct device * dev);
void (*shutdown)(struct device * dev);
- int (*suspend)(struct device * dev, pm_message_t state);
- int (*resume)(struct device * dev);
+
+ int (*suspend)(struct device * dev, pm_message_t state);
+ int (*suspend_late)(struct device * dev, pm_message_t state);
+ int (*resume_early)(struct device * dev);
+ int (*resume)(struct device * dev);
};
-extern int bus_register(struct bus_type * bus);
+extern int __must_check bus_register(struct bus_type * bus);
extern void bus_unregister(struct bus_type * bus);
-extern void bus_rescan_devices(struct bus_type * bus);
+extern int __must_check bus_rescan_devices(struct bus_type * bus);
/* iterator helpers for buses */
@@ -67,9 +71,9 @@ int bus_for_each_dev(struct bus_type * bus, struct device * start, void * data,
struct device * bus_find_device(struct bus_type *bus, struct device *start,
void *data, int (*match)(struct device *, void *));
-int bus_for_each_drv(struct bus_type * bus, struct device_driver * start,
- void * data, int (*fn)(struct device_driver *, void *));
-
+int __must_check bus_for_each_drv(struct bus_type *bus,
+ struct device_driver *start, void *data,
+ int (*fn)(struct device_driver *, void *));
/* driverfs interface for exporting bus attributes */
@@ -82,7 +86,8 @@ struct bus_attribute {
#define BUS_ATTR(_name,_mode,_show,_store) \
struct bus_attribute bus_attr_##_name = __ATTR(_name,_mode,_show,_store)
-extern int bus_create_file(struct bus_type *, struct bus_attribute *);
+extern int __must_check bus_create_file(struct bus_type *,
+ struct bus_attribute *);
extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
struct device_driver {
@@ -101,16 +106,18 @@ struct device_driver {
void (*shutdown) (struct device * dev);
int (*suspend) (struct device * dev, pm_message_t state);
int (*resume) (struct device * dev);
+
+ unsigned int multithread_probe:1;
};
-extern int driver_register(struct device_driver * drv);
+extern int __must_check driver_register(struct device_driver * drv);
extern void driver_unregister(struct device_driver * drv);
extern struct device_driver * get_driver(struct device_driver * drv);
extern void put_driver(struct device_driver * drv);
extern struct device_driver *driver_find(const char *name, struct bus_type *bus);
-
+extern int driver_probe_done(void);
/* driverfs interface for exporting driver attributes */
@@ -123,16 +130,17 @@ struct driver_attribute {
#define DRIVER_ATTR(_name,_mode,_show,_store) \
struct driver_attribute driver_attr_##_name = __ATTR(_name,_mode,_show,_store)
-extern int driver_create_file(struct device_driver *, struct driver_attribute *);
+extern int __must_check driver_create_file(struct device_driver *,
+ struct driver_attribute *);
extern void driver_remove_file(struct device_driver *, struct driver_attribute *);
-extern int driver_for_each_device(struct device_driver * drv, struct device * start,
- void * data, int (*fn)(struct device *, void *));
+extern int __must_check driver_for_each_device(struct device_driver * drv,
+ struct device *start, void *data,
+ int (*fn)(struct device *, void *));
struct device * driver_find_device(struct device_driver *drv,
struct device *start, void *data,
int (*match)(struct device *, void *));
-
/*
* device classes
*/
@@ -146,17 +154,26 @@ struct class {
struct list_head interfaces;
struct semaphore sem; /* locks both the children and interfaces lists */
+ struct kobject *virtual_dir;
+
struct class_attribute * class_attrs;
struct class_device_attribute * class_dev_attrs;
+ struct device_attribute * dev_attrs;
int (*uevent)(struct class_device *dev, char **envp,
int num_envp, char *buffer, int buffer_size);
+ int (*dev_uevent)(struct device *dev, char **envp, int num_envp,
+ char *buffer, int buffer_size);
void (*release)(struct class_device *dev);
void (*class_release)(struct class *class);
+ void (*dev_release)(struct device *dev);
+
+ int (*suspend)(struct device *, pm_message_t state);
+ int (*resume)(struct device *);
};
-extern int class_register(struct class *);
+extern int __must_check class_register(struct class *);
extern void class_unregister(struct class *);
@@ -169,7 +186,8 @@ struct class_attribute {
#define CLASS_ATTR(_name,_mode,_show,_store) \
struct class_attribute class_attr_##_name = __ATTR(_name,_mode,_show,_store)
-extern int class_create_file(struct class *, const struct class_attribute *);
+extern int __must_check class_create_file(struct class *,
+ const struct class_attribute *);
extern void class_remove_file(struct class *, const struct class_attribute *);
struct class_device_attribute {
@@ -182,7 +200,7 @@ struct class_device_attribute {
struct class_device_attribute class_device_attr_##_name = \
__ATTR(_name,_mode,_show,_store)
-extern int class_device_create_file(struct class_device *,
+extern int __must_check class_device_create_file(struct class_device *,
const struct class_device_attribute *);
/**
@@ -242,10 +260,10 @@ class_set_devdata (struct class_device *dev, void *data)
}
-extern int class_device_register(struct class_device *);
+extern int __must_check class_device_register(struct class_device *);
extern void class_device_unregister(struct class_device *);
extern void class_device_initialize(struct class_device *);
-extern int class_device_add(struct class_device *);
+extern int __must_check class_device_add(struct class_device *);
extern void class_device_del(struct class_device *);
extern int class_device_rename(struct class_device *, char *);
@@ -255,7 +273,7 @@ extern void class_device_put(struct class_device *);
extern void class_device_remove_file(struct class_device *,
const struct class_device_attribute *);
-extern int class_device_create_bin_file(struct class_device *,
+extern int __must_check class_device_create_bin_file(struct class_device *,
struct bin_attribute *);
extern void class_device_remove_bin_file(struct class_device *,
struct bin_attribute *);
@@ -266,22 +284,23 @@ struct class_interface {
int (*add) (struct class_device *, struct class_interface *);
void (*remove) (struct class_device *, struct class_interface *);
+ int (*add_dev) (struct device *, struct class_interface *);
+ void (*remove_dev) (struct device *, struct class_interface *);
};
-extern int class_interface_register(struct class_interface *);
+extern int __must_check class_interface_register(struct class_interface *);
extern void class_interface_unregister(struct class_interface *);
-extern struct class *class_create(struct module *owner, char *name);
+extern struct class *class_create(struct module *owner, const char *name);
extern void class_destroy(struct class *cls);
extern struct class_device *class_device_create(struct class *cls,
struct class_device *parent,
dev_t devt,
struct device *device,
- char *fmt, ...)
+ const char *fmt, ...)
__attribute__((format(printf,5,6)));
extern void class_device_destroy(struct class *cls, dev_t devt);
-
/* interface for exporting device attributes */
struct device_attribute {
struct attribute attr;
@@ -294,8 +313,13 @@ struct device_attribute {
#define DEVICE_ATTR(_name,_mode,_show,_store) \
struct device_attribute dev_attr_##_name = __ATTR(_name,_mode,_show,_store)
-extern int device_create_file(struct device *device, struct device_attribute * entry);
+extern int __must_check device_create_file(struct device *device,
+ struct device_attribute * entry);
extern void device_remove_file(struct device * dev, struct device_attribute * attr);
+extern int __must_check device_create_bin_file(struct device *dev,
+ struct bin_attribute *attr);
+extern void device_remove_bin_file(struct device *dev,
+ struct bin_attribute *attr);
struct device {
struct klist klist_children;
struct klist_node knode_parent; /* node in sibling list */
@@ -305,6 +329,7 @@ struct device {
struct kobject kobj;
char bus_id[BUS_ID_SIZE]; /* position on parent bus */
+ unsigned is_registered:1;
struct device_attribute uevent_attr;
struct device_attribute *devt_attr;
@@ -338,6 +363,7 @@ struct device {
struct list_head node;
struct class *class; /* optional*/
dev_t devt; /* dev_t, creates the sysfs "dev" */
+ struct attribute_group **groups; /* optional groups */
void (*release)(struct device * dev);
};
@@ -356,38 +382,41 @@ dev_set_drvdata (struct device *dev, void *data)
static inline int device_is_registered(struct device *dev)
{
- return klist_node_attached(&dev->knode_bus);
+ return dev->is_registered;
}
/*
* High level routines for use by the bus drivers
*/
-extern int device_register(struct device * dev);
+extern int __must_check device_register(struct device * dev);
extern void device_unregister(struct device * dev);
extern void device_initialize(struct device * dev);
-extern int device_add(struct device * dev);
+extern int __must_check device_add(struct device * dev);
extern void device_del(struct device * dev);
-extern int device_for_each_child(struct device *, void *,
+extern int __must_check device_for_each_child(struct device *, void *,
int (*fn)(struct device *, void *));
+extern int device_rename(struct device *dev, char *new_name);
/*
* Manual binding of a device to driver. See drivers/base/bus.c
* for information on use.
*/
-extern void device_bind_driver(struct device * dev);
+extern int __must_check device_bind_driver(struct device *dev);
extern void device_release_driver(struct device * dev);
-extern int device_attach(struct device * dev);
-extern void driver_attach(struct device_driver * drv);
-extern void device_reprobe(struct device *dev);
+extern int __must_check device_attach(struct device * dev);
+extern int __must_check driver_attach(struct device_driver *drv);
+extern int __must_check device_reprobe(struct device *dev);
/*
* Easy functions for dynamically creating devices on the fly
*/
extern struct device *device_create(struct class *cls, struct device *parent,
- dev_t devt, char *fmt, ...)
+ dev_t devt, const char *fmt, ...)
__attribute__((format(printf,4,5)));
extern void device_destroy(struct class *cls, dev_t devt);
+extern int virtual_device_parent(struct device *dev);
+
/*
* Platform "fixup" functions - allow the platform to have their say
* about devices and actions that the general device layer doesn't
@@ -412,7 +441,7 @@ extern void device_shutdown(void);
/* drivers/base/firmware.c */
-extern int firmware_register(struct subsystem *);
+extern int __must_check firmware_register(struct subsystem *);
extern void firmware_unregister(struct subsystem *);
/* debugging and troubleshooting/diagnostic helpers. */
diff --git a/include/linux/elf-em.h b/include/linux/elf-em.h
index 6a5796c81c90..666e0a5f00fc 100644
--- a/include/linux/elf-em.h
+++ b/include/linux/elf-em.h
@@ -31,6 +31,7 @@
#define EM_M32R 88 /* Renesas M32R */
#define EM_H8_300 46 /* Renesas H8/300,300H,H8S */
#define EM_FRV 0x5441 /* Fujitsu FR-V */
+#define EM_AVR32 0x18ad /* Atmel AVR32 */
/*
* This is an interim value that we will use until the committee comes
diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h
new file mode 100644
index 000000000000..67396db141e8
--- /dev/null
+++ b/include/linux/elfnote.h
@@ -0,0 +1,90 @@
+#ifndef _LINUX_ELFNOTE_H
+#define _LINUX_ELFNOTE_H
+/*
+ * Helper macros to generate ELF Note structures, which are put into a
+ * PT_NOTE segment of the final vmlinux image. These are useful for
+ * including name-value pairs of metadata into the kernel binary (or
+ * modules?) for use by external programs.
+ *
+ * Each note has three parts: a name, a type and a desc. The name is
+ * intended to distinguish the note's originator, so it would be a
+ * company, project, subsystem, etc; it must be in a suitable form for
+ * use in a section name. The type is an integer which is used to tag
+ * the data, and is considered to be within the "name" namespace (so
+ * "FooCo"'s type 42 is distinct from "BarProj"'s type 42). The
+ * "desc" field is the actual data. There are no constraints on the
+ * desc field's contents, though typically they're fairly small.
+ *
+ * All notes from a given NAME are put into a section named
+ * .note.NAME. When the kernel image is finally linked, all the notes
+ * are packed into a single .notes section, which is mapped into the
+ * PT_NOTE segment. Because notes for a given name are grouped into
+ * the same section, they'll all be adjacent the output file.
+ *
+ * This file defines macros for both C and assembler use. Their
+ * syntax is slightly different, but they're semantically similar.
+ *
+ * See the ELF specification for more detail about ELF notes.
+ */
+
+#ifdef __ASSEMBLER__
+/*
+ * Generate a structure with the same shape as Elf{32,64}_Nhdr (which
+ * turn out to be the same size and shape), followed by the name and
+ * desc data with appropriate padding. The 'desctype' argument is the
+ * assembler pseudo op defining the type of the data e.g. .asciz while
+ * 'descdata' is the data itself e.g. "hello, world".
+ *
+ * e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two")
+ * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef)
+ */
+#define ELFNOTE(name, type, desctype, descdata) \
+.pushsection .note.name ; \
+ .align 4 ; \
+ .long 2f - 1f /* namesz */ ; \
+ .long 4f - 3f /* descsz */ ; \
+ .long type ; \
+1:.asciz "name" ; \
+2:.align 4 ; \
+3:desctype descdata ; \
+4:.align 4 ; \
+.popsection ;
+#else /* !__ASSEMBLER__ */
+#include <linux/elf.h>
+/*
+ * Use an anonymous structure which matches the shape of
+ * Elf{32,64}_Nhdr, but includes the name and desc data. The size and
+ * type of name and desc depend on the macro arguments. "name" must
+ * be a literal string, and "desc" must be passed by value. You may
+ * only define one note per line, since __LINE__ is used to generate
+ * unique symbols.
+ */
+#define _ELFNOTE_PASTE(a,b) a##b
+#define _ELFNOTE(size, name, unique, type, desc) \
+ static const struct { \
+ struct elf##size##_note _nhdr; \
+ unsigned char _name[sizeof(name)] \
+ __attribute__((aligned(sizeof(Elf##size##_Word)))); \
+ typeof(desc) _desc \
+ __attribute__((aligned(sizeof(Elf##size##_Word)))); \
+ } _ELFNOTE_PASTE(_note_, unique) \
+ __attribute_used__ \
+ __attribute__((section(".note." name), \
+ aligned(sizeof(Elf##size##_Word)), \
+ unused)) = { \
+ { \
+ sizeof(name), \
+ sizeof(desc), \
+ type, \
+ }, \
+ name, \
+ desc \
+ }
+#define ELFNOTE(size, name, type, desc) \
+ _ELFNOTE(size, name, __LINE__, type, desc)
+
+#define ELFNOTE32(name, type, desc) ELFNOTE(32, name, type, desc)
+#define ELFNOTE64(name, type, desc) ELFNOTE(64, name, type, desc)
+#endif /* __ASSEMBLER__ */
+
+#endif /* _LINUX_ELFNOTE_H */
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index cc9e60844484..8b34aabfe4c6 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -9,17 +9,16 @@ struct vm_area_struct;
/*
* GFP bitmasks..
+ *
+ * Zone modifiers (see linux/mmzone.h - low three bits)
+ *
+ * Do not put any conditional on these. If necessary modify the definitions
+ * without the underscores and use the consistently. The definitions here may
+ * be used in bit comparisons.
*/
-/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low three bits) */
#define __GFP_DMA ((__force gfp_t)0x01u)
#define __GFP_HIGHMEM ((__force gfp_t)0x02u)
-#ifdef CONFIG_DMA_IS_DMA32
-#define __GFP_DMA32 ((__force gfp_t)0x01) /* ZONE_DMA is ZONE_DMA32 */
-#elif BITS_PER_LONG < 64
-#define __GFP_DMA32 ((__force gfp_t)0x00) /* ZONE_NORMAL is ZONE_DMA32 */
-#else
-#define __GFP_DMA32 ((__force gfp_t)0x04) /* Has own ZONE_DMA32 */
-#endif
+#define __GFP_DMA32 ((__force gfp_t)0x04u)
/*
* Action modifiers - doesn't change the zoning
@@ -46,6 +45,7 @@ struct vm_area_struct;
#define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */
#define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
#define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
+#define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */
#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -54,7 +54,7 @@ struct vm_area_struct;
#define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
- __GFP_NOMEMALLOC|__GFP_HARDWALL)
+ __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE)
/* This equals 0, but use constants in case they ever change */
#define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH)
@@ -67,6 +67,8 @@ struct vm_area_struct;
#define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
__GFP_HIGHMEM)
+#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
+
/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some
platforms, used as appropriate on others */
@@ -76,11 +78,19 @@ struct vm_area_struct;
#define GFP_DMA32 __GFP_DMA32
-static inline int gfp_zone(gfp_t gfp)
+static inline enum zone_type gfp_zone(gfp_t flags)
{
- int zone = GFP_ZONEMASK & (__force int) gfp;
- BUG_ON(zone >= GFP_ZONETYPES);
- return zone;
+ if (flags & __GFP_DMA)
+ return ZONE_DMA;
+#ifdef CONFIG_ZONE_DMA32
+ if (flags & __GFP_DMA32)
+ return ZONE_DMA32;
+#endif
+#ifdef CONFIG_HIGHMEM
+ if (flags & __GFP_HIGHMEM)
+ return ZONE_HIGHMEM;
+#endif
+ return ZONE_NORMAL;
}
/*
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 85ce7ef9a512..fd7d12daa94f 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -24,11 +24,15 @@ static inline void flush_kernel_dcache_page(struct page *page)
/* declarations for linux/mm/highmem.c */
unsigned int nr_free_highpages(void);
+extern unsigned long totalhigh_pages;
#else /* CONFIG_HIGHMEM */
static inline unsigned int nr_free_highpages(void) { return 0; }
+#define totalhigh_pages 0
+
+#ifndef ARCH_HAS_KMAP
static inline void *kmap(struct page *page)
{
might_sleep();
@@ -41,6 +45,7 @@ static inline void *kmap(struct page *page)
#define kunmap_atomic(addr, idx) do { } while (0)
#define kmap_atomic_pfn(pfn, idx) page_address(pfn_to_page(pfn))
#define kmap_atomic_to_page(ptr) virt_to_page(ptr)
+#endif
#endif /* CONFIG_HIGHMEM */
diff --git a/include/linux/irq.h b/include/linux/irq.h
index fbf6d901e9c2..48d3cb3b6a47 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -320,7 +320,9 @@ handle_irq_name(void fastcall (*handle)(unsigned int, struct irq_desc *,
* Monolithic do_IRQ implementation.
* (is an explicit fastcall, because i386 4KSTACKS calls it from assembly)
*/
+#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
extern fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs);
+#endif
/*
* Architectures call this to let the generic IRQ layer
@@ -332,10 +334,14 @@ static inline void generic_handle_irq(unsigned int irq, struct pt_regs *regs)
{
struct irq_desc *desc = irq_desc + irq;
+#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
+ desc->handle_irq(irq, desc, regs);
+#else
if (likely(desc->handle_irq))
desc->handle_irq(irq, desc, regs);
else
__do_IRQ(irq, regs);
+#endif
}
/* Handling of unhandled and spurious interrupts: */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 1ff9609300b4..4fa373bb18ac 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -33,6 +33,7 @@ extern const char linux_banner[];
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#define ALIGN(x,a) (((x)+(a)-1UL)&~((a)-1UL))
#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
#define KERN_EMERG "<0>" /* system is unusable */
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 2d229327959e..bcd9cd173c2c 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -20,6 +20,7 @@
#include <linux/types.h>
#include <linux/list.h>
#include <linux/sysfs.h>
+#include <linux/compiler.h>
#include <linux/spinlock.h>
#include <linux/rwsem.h>
#include <linux/kref.h>
@@ -71,12 +72,12 @@ static inline const char * kobject_name(const struct kobject * kobj)
extern void kobject_init(struct kobject *);
extern void kobject_cleanup(struct kobject *);
-extern int kobject_add(struct kobject *);
+extern int __must_check kobject_add(struct kobject *);
extern void kobject_del(struct kobject *);
-extern int kobject_rename(struct kobject *, const char *new_name);
+extern int __must_check kobject_rename(struct kobject *, const char *new_name);
-extern int kobject_register(struct kobject *);
+extern int __must_check kobject_register(struct kobject *);
extern void kobject_unregister(struct kobject *);
extern struct kobject * kobject_get(struct kobject *);
@@ -128,8 +129,8 @@ struct kset {
extern void kset_init(struct kset * k);
-extern int kset_add(struct kset * k);
-extern int kset_register(struct kset * k);
+extern int __must_check kset_add(struct kset * k);
+extern int __must_check kset_register(struct kset * k);
extern void kset_unregister(struct kset * k);
static inline struct kset * to_kset(struct kobject * kobj)
@@ -239,7 +240,7 @@ extern struct subsystem hypervisor_subsys;
(obj)->subsys.kset.kobj.kset = &(_subsys).kset
extern void subsystem_init(struct subsystem *);
-extern int subsystem_register(struct subsystem *);
+extern int __must_check subsystem_register(struct subsystem *);
extern void subsystem_unregister(struct subsystem *);
static inline struct subsystem * subsys_get(struct subsystem * s)
@@ -258,7 +259,8 @@ struct subsys_attribute {
ssize_t (*store)(struct subsystem *, const char *, size_t);
};
-extern int subsys_create_file(struct subsystem * , struct subsys_attribute *);
+extern int __must_check subsys_create_file(struct subsystem * ,
+ struct subsys_attribute *);
#if defined(CONFIG_HOTPLUG)
void kobject_uevent(struct kobject *kobj, enum kobject_action action);
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 72440f0a443d..09f0f575ddff 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -162,9 +162,9 @@ extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
unsigned long addr);
extern unsigned slab_node(struct mempolicy *policy);
-extern int policy_zone;
+extern enum zone_type policy_zone;
-static inline void check_highest_zone(int k)
+static inline void check_highest_zone(enum zone_type k)
{
if (k > policy_zone)
policy_zone = k;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 224178a000d2..856f0ee7e84a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -15,6 +15,7 @@
#include <linux/fs.h>
#include <linux/mutex.h>
#include <linux/debug_locks.h>
+#include <linux/backing-dev.h>
struct mempolicy;
struct anon_vma;
@@ -218,7 +219,8 @@ struct inode;
* Each physical page in the system has a struct page associated with
* it to keep track of whatever it is we are using the page for at the
* moment. Note that we have no way to track which tasks are using
- * a page.
+ * a page, though if it is a pagecache page, rmap structures can tell us
+ * who is mapping it.
*/
struct page {
unsigned long flags; /* Atomic flags, some possibly
@@ -278,6 +280,12 @@ struct page {
*/
#include <linux/page-flags.h>
+#ifdef CONFIG_DEBUG_VM
+#define VM_BUG_ON(cond) BUG_ON(cond)
+#else
+#define VM_BUG_ON(condition) do { } while(0)
+#endif
+
/*
* Methods to modify the page usage count.
*
@@ -292,12 +300,11 @@ struct page {
*/
/*
- * Drop a ref, return true if the logical refcount fell to zero (the page has
- * no users)
+ * Drop a ref, return true if the refcount fell to zero (the page has no users)
*/
static inline int put_page_testzero(struct page *page)
{
- BUG_ON(atomic_read(&page->_count) == 0);
+ VM_BUG_ON(atomic_read(&page->_count) == 0);
return atomic_dec_and_test(&page->_count);
}
@@ -307,11 +314,10 @@ static inline int put_page_testzero(struct page *page)
*/
static inline int get_page_unless_zero(struct page *page)
{
+ VM_BUG_ON(PageCompound(page));
return atomic_inc_not_zero(&page->_count);
}
-extern void FASTCALL(__page_cache_release(struct page *));
-
static inline int page_count(struct page *page)
{
if (unlikely(PageCompound(page)))
@@ -323,6 +329,7 @@ static inline void get_page(struct page *page)
{
if (unlikely(PageCompound(page)))
page = (struct page *)page_private(page);
+ VM_BUG_ON(atomic_read(&page->_count) == 0);
atomic_inc(&page->_count);
}
@@ -349,43 +356,55 @@ void split_page(struct page *page, unsigned int order);
* For the non-reserved pages, page_count(page) denotes a reference count.
* page_count() == 0 means the page is free. page->lru is then used for
* freelist management in the buddy allocator.
- * page_count() == 1 means the page is used for exactly one purpose
- * (e.g. a private data page of one process).
+ * page_count() > 0 means the page has been allocated.
+ *
+ * Pages are allocated by the slab allocator in order to provide memory
+ * to kmalloc and kmem_cache_alloc. In this case, the management of the
+ * page, and the fields in 'struct page' are the responsibility of mm/slab.c
+ * unless a particular usage is carefully commented. (the responsibility of
+ * freeing the kmalloc memory is the caller's, of course).
*
- * A page may be used for kmalloc() or anyone else who does a
- * __get_free_page(). In this case the page_count() is at least 1, and
- * all other fields are unused but should be 0 or NULL. The
- * management of this page is the responsibility of the one who uses
- * it.
+ * A page may be used by anyone else who does a __get_free_page().
+ * In this case, page_count still tracks the references, and should only
+ * be used through the normal accessor functions. The top bits of page->flags
+ * and page->virtual store page management information, but all other fields
+ * are unused and could be used privately, carefully. The management of this
+ * page is the responsibility of the one who allocated it, and those who have
+ * subsequently been given references to it.
*
- * The other pages (we may call them "process pages") are completely
+ * The other pages (we may call them "pagecache pages") are completely
* managed by the Linux memory manager: I/O, buffers, swapping etc.
* The following discussion applies only to them.
*
- * A page may belong to an inode's memory mapping. In this case,
- * page->mapping is the pointer to the inode, and page->index is the
- * file offset of the page, in units of PAGE_CACHE_SIZE.
+ * A pagecache page contains an opaque `private' member, which belongs to the
+ * page's address_space. Usually, this is the address of a circular list of
+ * the page's disk buffers. PG_private must be set to tell the VM to call
+ * into the filesystem to release these pages.
*
- * A page contains an opaque `private' member, which belongs to the
- * page's address_space. Usually, this is the address of a circular
- * list of the page's disk buffers.
+ * A page may belong to an inode's memory mapping. In this case, page->mapping
+ * is the pointer to the inode, and page->index is the file offset of the page,
+ * in units of PAGE_CACHE_SIZE.
*
- * For pages belonging to inodes, the page_count() is the number of
- * attaches, plus 1 if `private' contains something, plus one for
- * the page cache itself.
+ * If pagecache pages are not associated with an inode, they are said to be
+ * anonymous pages. These may become associated with the swapcache, and in that
+ * case PG_swapcache is set, and page->private is an offset into the swapcache.
*
- * Instead of keeping dirty/clean pages in per address-space lists, we instead
- * now tag pages as dirty/under writeback in the radix tree.
+ * In either case (swapcache or inode backed), the pagecache itself holds one
+ * reference to the page. Setting PG_private should also increment the
+ * refcount. The each user mapping also has a reference to the page.
*
- * There is also a per-mapping radix tree mapping index to the page
- * in memory if present. The tree is rooted at mapping->root.
+ * The pagecache pages are stored in a per-mapping radix tree, which is
+ * rooted at mapping->page_tree, and indexed by offset.
+ * Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space
+ * lists, we instead now tag pages as dirty/writeback in the radix tree.
*
- * All process pages can do I/O:
+ * All pagecache pages may be subject to I/O:
* - inode pages may need to be read from disk,
* - inode pages which have been modified and are MAP_SHARED may need
- * to be written to disk,
- * - private pages which have been modified may need to be swapped out
- * to swap space and (later) to be read back into memory.
+ * to be written back to the inode on disk,
+ * - anonymous pages (including MAP_PRIVATE file mappings) which have been
+ * modified may need to be swapped out to swap space and (later) to be read
+ * back into memory.
*/
/*
@@ -463,7 +482,7 @@ void split_page(struct page *page, unsigned int order);
#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1)
#define ZONETABLE_MASK ((1UL << ZONETABLE_SHIFT) - 1)
-static inline unsigned long page_zonenum(struct page *page)
+static inline enum zone_type page_zonenum(struct page *page)
{
return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
}
@@ -480,23 +499,29 @@ static inline struct zone *page_zone(struct page *page)
return zone_table[page_zone_id(page)];
}
+static inline unsigned long zone_to_nid(struct zone *zone)
+{
+ return zone->zone_pgdat->node_id;
+}
+
static inline unsigned long page_to_nid(struct page *page)
{
if (FLAGS_HAS_NODE)
return (page->flags >> NODES_PGSHIFT) & NODES_MASK;
else
- return page_zone(page)->zone_pgdat->node_id;
+ return zone_to_nid(page_zone(page));
}
static inline unsigned long page_to_section(struct page *page)
{
return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
}
-static inline void set_page_zone(struct page *page, unsigned long zone)
+static inline void set_page_zone(struct page *page, enum zone_type zone)
{
page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT;
}
+
static inline void set_page_node(struct page *page, unsigned long node)
{
page->flags &= ~(NODES_MASK << NODES_PGSHIFT);
@@ -508,7 +533,7 @@ static inline void set_page_section(struct page *page, unsigned long section)
page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
}
-static inline void set_page_links(struct page *page, unsigned long zone,
+static inline void set_page_links(struct page *page, enum zone_type zone,
unsigned long node, unsigned long pfn)
{
set_page_zone(page, zone);
@@ -802,6 +827,39 @@ struct shrinker;
extern struct shrinker *set_shrinker(int, shrinker_t);
extern void remove_shrinker(struct shrinker *shrinker);
+/*
+ * Some shared mappigns will want the pages marked read-only
+ * to track write events. If so, we'll downgrade vm_page_prot
+ * to the private version (using protection_map[] without the
+ * VM_SHARED bit).
+ */
+static inline int vma_wants_writenotify(struct vm_area_struct *vma)
+{
+ unsigned int vm_flags = vma->vm_flags;
+
+ /* If it was private or non-writable, the write bit is already clear */
+ if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
+ return 0;
+
+ /* The backer wishes to know when pages are first written to? */
+ if (vma->vm_ops && vma->vm_ops->page_mkwrite)
+ return 1;
+
+ /* The open routine did something to the protections already? */
+ if (pgprot_val(vma->vm_page_prot) !=
+ pgprot_val(protection_map[vm_flags &
+ (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]))
+ return 0;
+
+ /* Specialty mapping? */
+ if (vm_flags & (VM_PFNMAP|VM_INSERTPAGE))
+ return 0;
+
+ /* Can the mapping track the dirty pages? */
+ return vma->vm_file && vma->vm_file->f_mapping &&
+ mapping_cap_account_dirty(vma->vm_file->f_mapping);
+}
+
extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl));
int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f45163c528e8..3693f1a52788 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -51,7 +51,8 @@ enum zone_stat_item {
NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
only modified from process context */
NR_FILE_PAGES,
- NR_SLAB, /* Pages used by slab allocator */
+ NR_SLAB_RECLAIMABLE,
+ NR_SLAB_UNRECLAIMABLE,
NR_PAGETABLE, /* used for pagetables */
NR_FILE_DIRTY,
NR_WRITEBACK,
@@ -88,53 +89,68 @@ struct per_cpu_pageset {
#define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)])
#endif
-#define ZONE_DMA 0
-#define ZONE_DMA32 1
-#define ZONE_NORMAL 2
-#define ZONE_HIGHMEM 3
-
-#define MAX_NR_ZONES 4 /* Sync this with ZONES_SHIFT */
-#define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */
-
+enum zone_type {
+ /*
+ * ZONE_DMA is used when there are devices that are not able
+ * to do DMA to all of addressable memory (ZONE_NORMAL). Then we
+ * carve out the portion of memory that is needed for these devices.
+ * The range is arch specific.
+ *
+ * Some examples
+ *
+ * Architecture Limit
+ * ---------------------------
+ * parisc, ia64, sparc <4G
+ * s390 <2G
+ * arm26 <48M
+ * arm Various
+ * alpha Unlimited or 0-16MB.
+ *
+ * i386, x86_64 and multiple other arches
+ * <16M.
+ */
+ ZONE_DMA,
+#ifdef CONFIG_ZONE_DMA32
+ /*
+ * x86_64 needs two ZONE_DMAs because it supports devices that are
+ * only able to do DMA to the lower 16M but also 32 bit devices that
+ * can only do DMA areas below 4G.
+ */
+ ZONE_DMA32,
+#endif
+ /*
+ * Normal addressable memory is in ZONE_NORMAL. DMA operations can be
+ * performed on pages in ZONE_NORMAL if the DMA devices support
+ * transfers to all addressable memory.
+ */
+ ZONE_NORMAL,
+#ifdef CONFIG_HIGHMEM
+ /*
+ * A memory area that is only addressable by the kernel through
+ * mapping portions into its own address space. This is for example
+ * used by i386 to allow the kernel to address the memory beyond
+ * 900MB. The kernel will set up special mappings (page
+ * table entries on i386) for each page that the kernel needs to
+ * access.
+ */
+ ZONE_HIGHMEM,
+#endif
+ MAX_NR_ZONES
+};
/*
* When a memory allocation must conform to specific limitations (such
* as being suitable for DMA) the caller will pass in hints to the
* allocator in the gfp_mask, in the zone modifier bits. These bits
* are used to select a priority ordered list of memory zones which
- * match the requested limits. GFP_ZONEMASK defines which bits within
- * the gfp_mask should be considered as zone modifiers. Each valid
- * combination of the zone modifier bits has a corresponding list
- * of zones (in node_zonelists). Thus for two zone modifiers there
- * will be a maximum of 4 (2 ** 2) zonelists, for 3 modifiers there will
- * be 8 (2 ** 3) zonelists. GFP_ZONETYPES defines the number of possible
- * combinations of zone modifiers in "zone modifier space".
- *
- * As an optimisation any zone modifier bits which are only valid when
- * no other zone modifier bits are set (loners) should be placed in
- * the highest order bits of this field. This allows us to reduce the
- * extent of the zonelists thus saving space. For example in the case
- * of three zone modifier bits, we could require up to eight zonelists.
- * If the left most zone modifier is a "loner" then the highest valid
- * zonelist would be four allowing us to allocate only five zonelists.
- * Use the first form for GFP_ZONETYPES when the left most bit is not
- * a "loner", otherwise use the second.
- *
- * NOTE! Make sure this matches the zones in <linux/gfp.h>
+ * match the requested limits. See gfp_zone() in include/linux/gfp.h
*/
-#define GFP_ZONEMASK 0x07
-/* #define GFP_ZONETYPES (GFP_ZONEMASK + 1) */ /* Non-loner */
-#define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */
-/*
- * On machines where it is needed (eg PCs) we divide physical memory
- * into multiple physical zones. On a 32bit PC we have 4 zones:
- *
- * ZONE_DMA < 16 MB ISA DMA capable memory
- * ZONE_DMA32 0 MB Empty
- * ZONE_NORMAL 16-896 MB direct mapped by the kernel
- * ZONE_HIGHMEM > 896 MB only page cache and user processes
- */
+#if !defined(CONFIG_ZONE_DMA32) && !defined(CONFIG_HIGHMEM)
+#define ZONES_SHIFT 1
+#else
+#define ZONES_SHIFT 2
+#endif
struct zone {
/* Fields commonly accessed by the page allocator */
@@ -154,7 +170,8 @@ struct zone {
/*
* zone reclaim becomes active if more unmapped pages exist.
*/
- unsigned long min_unmapped_ratio;
+ unsigned long min_unmapped_pages;
+ unsigned long min_slab_pages;
struct per_cpu_pageset *pageset[NR_CPUS];
#else
struct per_cpu_pageset pageset[NR_CPUS];
@@ -266,7 +283,6 @@ struct zone {
char *name;
} ____cacheline_internodealigned_in_smp;
-
/*
* The "priority" of VM scanning is how much of the queues we will scan in one
* go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
@@ -304,7 +320,7 @@ struct zonelist {
struct bootmem_data;
typedef struct pglist_data {
struct zone node_zones[MAX_NR_ZONES];
- struct zonelist node_zonelists[GFP_ZONETYPES];
+ struct zonelist node_zonelists[MAX_NR_ZONES];
int nr_zones;
#ifdef CONFIG_FLAT_NODE_MEM_MAP
struct page *node_mem_map;
@@ -373,12 +389,16 @@ static inline int populated_zone(struct zone *zone)
return (!!zone->present_pages);
}
-static inline int is_highmem_idx(int idx)
+static inline int is_highmem_idx(enum zone_type idx)
{
+#ifdef CONFIG_HIGHMEM
return (idx == ZONE_HIGHMEM);
+#else
+ return 0;
+#endif
}
-static inline int is_normal_idx(int idx)
+static inline int is_normal_idx(enum zone_type idx)
{
return (idx == ZONE_NORMAL);
}
@@ -391,7 +411,11 @@ static inline int is_normal_idx(int idx)
*/
static inline int is_highmem(struct zone *zone)
{
+#ifdef CONFIG_HIGHMEM
return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM;
+#else
+ return 0;
+#endif
}
static inline int is_normal(struct zone *zone)
@@ -401,7 +425,11 @@ static inline int is_normal(struct zone *zone)
static inline int is_dma32(struct zone *zone)
{
+#ifdef CONFIG_ZONE_DMA32
return zone == zone->zone_pgdat->node_zones + ZONE_DMA32;
+#else
+ return 0;
+#endif
}
static inline int is_dma(struct zone *zone)
@@ -421,6 +449,8 @@ int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file
void __user *, size_t *, loff_t *);
int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
struct file *, void __user *, size_t *, loff_t *);
+int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
+ struct file *, void __user *, size_t *, loff_t *);
#include <linux/topology.h>
/* Returns the number of the current Node. */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5748642e9f36..9d7921dd50f0 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -13,24 +13,25 @@
* PG_reserved is set for special pages, which can never be swapped out. Some
* of them might not even exist (eg empty_bad_page)...
*
- * The PG_private bitflag is set if page->private contains a valid value.
+ * The PG_private bitflag is set on pagecache pages if they contain filesystem
+ * specific data (which is normally at page->private). It can be used by
+ * private allocations for its own usage.
*
- * During disk I/O, PG_locked is used. This bit is set before I/O and
- * reset when I/O completes. page_waitqueue(page) is a wait queue of all tasks
- * waiting for the I/O on this page to complete.
+ * During initiation of disk I/O, PG_locked is set. This bit is set before I/O
+ * and cleared when writeback _starts_ or when read _completes_. PG_writeback
+ * is set before writeback starts and cleared when it finishes.
+ *
+ * PG_locked also pins a page in pagecache, and blocks truncation of the file
+ * while it is held.
+ *
+ * page_waitqueue(page) is a wait queue of all tasks waiting for the page
+ * to become unlocked.
*
* PG_uptodate tells whether the page's contents is valid. When a read
* completes, the page becomes uptodate, unless a disk I/O error happened.
*
- * For choosing which pages to swap out, inode pages carry a PG_referenced bit,
- * which is set any time the system accesses that page through the (mapping,
- * index) hash table. This referenced bit, together with the referenced bit
- * in the page tables, is used to manipulate page->age and move the page across
- * the active, inactive_dirty and inactive_clean lists.
- *
- * Note that the referenced bit, the page->lru list_head and the active,
- * inactive_dirty and inactive_clean lists are protected by the
- * zone->lru_lock, and *NOT* by the usual PG_locked bit!
+ * PG_referenced, PG_reclaim are used for page reclaim for anonymous and
+ * file-backed pagecache (see mm/vmscan.c).
*
* PG_error is set to indicate that an I/O error occurred on this page.
*
@@ -42,6 +43,10 @@
* space, they need to be kmapped separately for doing IO on the pages. The
* struct page (these bits with information) are always mapped into kernel
* address space...
+ *
+ * PG_buddy is set to indicate that the page is free and in the buddy system
+ * (see mm/page_alloc.c).
+ *
*/
/*
@@ -74,7 +79,7 @@
#define PG_checked 8 /* kill me in 2.5.<early>. */
#define PG_arch_1 9
#define PG_reserved 10
-#define PG_private 11 /* Has something at ->private */
+#define PG_private 11 /* If pagecache, has fs-private data */
#define PG_writeback 12 /* Page is under writeback */
#define PG_nosave 13 /* Used for system suspend/resume */
@@ -83,7 +88,7 @@
#define PG_mappedtodisk 16 /* Has blocks allocated on-disk */
#define PG_reclaim 17 /* To be reclaimed asap */
-#define PG_nosave_free 18 /* Free, should not be written */
+#define PG_nosave_free 18 /* Used for system suspend/resume */
#define PG_buddy 19 /* Page is free, on buddy lists */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 0a2f5d27f60e..64f950925151 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -130,14 +130,29 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
}
extern void FASTCALL(__lock_page(struct page *page));
+extern void FASTCALL(__lock_page_nosync(struct page *page));
extern void FASTCALL(unlock_page(struct page *page));
+/*
+ * lock_page may only be called if we have the page's inode pinned.
+ */
static inline void lock_page(struct page *page)
{
might_sleep();
if (TestSetPageLocked(page))
__lock_page(page);
}
+
+/*
+ * lock_page_nosync should only be used if we can't pin the page's inode.
+ * Doesn't play quite so well with block device plugging.
+ */
+static inline void lock_page_nosync(struct page *page)
+{
+ might_sleep();
+ if (TestSetPageLocked(page))
+ __lock_page_nosync(page);
+}
/*
* This is exported only for wait_on_page_locked/wait_on_page_writeback.
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 8565b81d7fbc..3ec72551ac31 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -49,6 +49,7 @@
#include <linux/types.h>
#include <linux/ioport.h>
#include <linux/list.h>
+#include <linux/compiler.h>
#include <linux/errno.h>
#include <linux/device.h>
@@ -346,6 +347,8 @@ struct pci_driver {
int (*probe) (struct pci_dev *dev, const struct pci_device_id *id); /* New device inserted */
void (*remove) (struct pci_dev *dev); /* Device removed (NULL if not a hot-plug capable driver) */
int (*suspend) (struct pci_dev *dev, pm_message_t state); /* Device suspended */
+ int (*suspend_late) (struct pci_dev *dev, pm_message_t state);
+ int (*resume_early) (struct pci_dev *dev);
int (*resume) (struct pci_dev *dev); /* Device woken up */
int (*enable_wake) (struct pci_dev *dev, pci_power_t state, int enable); /* Enable wake event */
void (*shutdown) (struct pci_dev *dev);
@@ -401,7 +404,7 @@ extern struct list_head pci_root_buses; /* list of all known PCI buses */
extern struct list_head pci_devices; /* list of all devices */
void pcibios_fixup_bus(struct pci_bus *);
-int pcibios_enable_device(struct pci_dev *, int mask);
+int __must_check pcibios_enable_device(struct pci_dev *, int mask);
char *pcibios_setup (char *str);
/* Used only when drivers/pci/setup.c is used */
@@ -488,19 +491,19 @@ static inline int pci_write_config_dword(struct pci_dev *dev, int where, u32 val
return pci_bus_write_config_dword (dev->bus, dev->devfn, where, val);
}
-int pci_enable_device(struct pci_dev *dev);
-int pci_enable_device_bars(struct pci_dev *dev, int mask);
+int __must_check pci_enable_device(struct pci_dev *dev);
+int __must_check pci_enable_device_bars(struct pci_dev *dev, int mask);
void pci_disable_device(struct pci_dev *dev);
void pci_set_master(struct pci_dev *dev);
#define HAVE_PCI_SET_MWI
-int pci_set_mwi(struct pci_dev *dev);
+int __must_check pci_set_mwi(struct pci_dev *dev);
void pci_clear_mwi(struct pci_dev *dev);
void pci_intx(struct pci_dev *dev, int enable);
int pci_set_dma_mask(struct pci_dev *dev, u64 mask);
int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask);
void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno);
-int pci_assign_resource(struct pci_dev *dev, int i);
-int pci_assign_resource_fixed(struct pci_dev *dev, int i);
+int __must_check pci_assign_resource(struct pci_dev *dev, int i);
+int __must_check pci_assign_resource_fixed(struct pci_dev *dev, int i);
void pci_restore_bars(struct pci_dev *dev);
/* ROM control related routines */
@@ -526,23 +529,24 @@ void pdev_sort_resources(struct pci_dev *, struct resource_list *);
void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *),
int (*)(struct pci_dev *, u8, u8));
#define HAVE_PCI_REQ_REGIONS 2
-int pci_request_regions(struct pci_dev *, const char *);
+int __must_check pci_request_regions(struct pci_dev *, const char *);
void pci_release_regions(struct pci_dev *);
-int pci_request_region(struct pci_dev *, int, const char *);
+int __must_check pci_request_region(struct pci_dev *, int, const char *);
void pci_release_region(struct pci_dev *, int);
/* drivers/pci/bus.c */
-int pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res,
- resource_size_t size, resource_size_t align,
- resource_size_t min, unsigned int type_mask,
- void (*alignf)(void *, struct resource *,
- resource_size_t, resource_size_t),
- void *alignf_data);
+int __must_check pci_bus_alloc_resource(struct pci_bus *bus,
+ struct resource *res, resource_size_t size,
+ resource_size_t align, resource_size_t min,
+ unsigned int type_mask,
+ void (*alignf)(void *, struct resource *,
+ resource_size_t, resource_size_t),
+ void *alignf_data);
void pci_enable_bridges(struct pci_bus *bus);
/* Proper probing supporting hot-pluggable devices */
-int __pci_register_driver(struct pci_driver *, struct module *);
-static inline int pci_register_driver(struct pci_driver *driver)
+int __must_check __pci_register_driver(struct pci_driver *, struct module *);
+static inline int __must_check pci_register_driver(struct pci_driver *driver)
{
return __pci_register_driver(driver, THIS_MODULE);
}
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index cb9039a21f2a..3835a9642f13 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -1,9 +1,12 @@
#ifndef __LINUX_PERCPU_H
#define __LINUX_PERCPU_H
+
#include <linux/spinlock.h> /* For preempt_disable() */
#include <linux/slab.h> /* For kmalloc() */
#include <linux/smp.h>
#include <linux/string.h> /* For memset() */
+#include <linux/cpumask.h>
+
#include <asm/percpu.h>
/* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */
@@ -11,8 +14,14 @@
#define PERCPU_ENOUGH_ROOM 32768
#endif
-/* Must be an lvalue. */
-#define get_cpu_var(var) (*({ preempt_disable(); &__get_cpu_var(var); }))
+/*
+ * Must be an lvalue. Since @var must be a simple identifier,
+ * we force a syntax error here if it isn't.
+ */
+#define get_cpu_var(var) (*({ \
+ extern int simple_indentifier_##var(void); \
+ preempt_disable(); \
+ &__get_cpu_var(var); }))
#define put_cpu_var(var) preempt_enable()
#ifdef CONFIG_SMP
@@ -21,39 +30,77 @@ struct percpu_data {
void *ptrs[NR_CPUS];
};
+#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata)
/*
- * Use this to get to a cpu's version of the per-cpu object allocated using
- * alloc_percpu. Non-atomic access to the current CPU's version should
+ * Use this to get to a cpu's version of the per-cpu object dynamically
+ * allocated. Non-atomic access to the current CPU's version should
* probably be combined with get_cpu()/put_cpu().
*/
-#define per_cpu_ptr(ptr, cpu) \
-({ \
- struct percpu_data *__p = (struct percpu_data *)~(unsigned long)(ptr); \
- (__typeof__(ptr))__p->ptrs[(cpu)]; \
+#define percpu_ptr(ptr, cpu) \
+({ \
+ struct percpu_data *__p = __percpu_disguise(ptr); \
+ (__typeof__(ptr))__p->ptrs[(cpu)]; \
})
-extern void *__alloc_percpu(size_t size);
-extern void free_percpu(const void *);
+extern void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu);
+extern void percpu_depopulate(void *__pdata, int cpu);
+extern int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
+ cpumask_t *mask);
+extern void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask);
+extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask);
+extern void percpu_free(void *__pdata);
#else /* CONFIG_SMP */
-#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
+#define percpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
+
+static inline void percpu_depopulate(void *__pdata, int cpu)
+{
+}
+
+static inline void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask)
+{
+}
-static inline void *__alloc_percpu(size_t size)
+static inline void *percpu_populate(void *__pdata, size_t size, gfp_t gfp,
+ int cpu)
{
- void *ret = kmalloc(size, GFP_KERNEL);
- if (ret)
- memset(ret, 0, size);
- return ret;
+ return percpu_ptr(__pdata, cpu);
}
-static inline void free_percpu(const void *ptr)
-{
- kfree(ptr);
+
+static inline int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
+ cpumask_t *mask)
+{
+ return 0;
+}
+
+static inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
+{
+ return kzalloc(size, gfp);
+}
+
+static inline void percpu_free(void *__pdata)
+{
+ kfree(__pdata);
}
#endif /* CONFIG_SMP */
-/* Simple wrapper for the common case: zeros memory. */
-#define alloc_percpu(type) ((type *)(__alloc_percpu(sizeof(type))))
+#define percpu_populate_mask(__pdata, size, gfp, mask) \
+ __percpu_populate_mask((__pdata), (size), (gfp), &(mask))
+#define percpu_depopulate_mask(__pdata, mask) \
+ __percpu_depopulate_mask((__pdata), &(mask))
+#define percpu_alloc_mask(size, gfp, mask) \
+ __percpu_alloc_mask((size), (gfp), &(mask))
+
+#define percpu_alloc(size, gfp) percpu_alloc_mask((size), (gfp), cpu_online_map)
+
+/* (legacy) interface for use without CPU hotplug handling */
+
+#define __alloc_percpu(size) percpu_alloc_mask((size), GFP_KERNEL, \
+ cpu_possible_map)
+#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type))
+#define free_percpu(ptr) percpu_free((ptr))
+#define per_cpu_ptr(ptr, cpu) percpu_ptr((ptr), (cpu))
#endif /* __LINUX_PERCPU_H */
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 782090c68932..29cd6dee13db 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -49,6 +49,8 @@ struct platform_driver {
int (*remove)(struct platform_device *);
void (*shutdown)(struct platform_device *);
int (*suspend)(struct platform_device *, pm_message_t state);
+ int (*suspend_late)(struct platform_device *, pm_message_t state);
+ int (*resume_early)(struct platform_device *);
int (*resume)(struct platform_device *);
struct device_driver driver;
};
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 658c1b93d5bb..6b27e07aef19 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -142,29 +142,61 @@ typedef struct pm_message {
} pm_message_t;
/*
- * There are 4 important states driver can be in:
- * ON -- driver is working
- * FREEZE -- stop operations and apply whatever policy is applicable to a
- * suspended driver of that class, freeze queues for block like IDE
- * does, drop packets for ethernet, etc... stop DMA engine too etc...
- * so a consistent image can be saved; but do not power any hardware
- * down.
- * SUSPEND - like FREEZE, but hardware is doing as much powersaving as
- * possible. Roughly pci D3.
+ * Several driver power state transitions are externally visible, affecting
+ * the state of pending I/O queues and (for drivers that touch hardware)
+ * interrupts, wakeups, DMA, and other hardware state. There may also be
+ * internal transitions to various low power modes, which are transparent
+ * to the rest of the driver stack (such as a driver that's ON gating off
+ * clocks which are not in active use).
*
- * Unfortunately, current drivers only recognize numeric values 0 (ON) and 3
- * (SUSPEND). We'll need to fix the drivers. So yes, putting 3 to all different
- * defines is intentional, and will go away as soon as drivers are fixed. Also
- * note that typedef is neccessary, we'll probably want to switch to
- * typedef struct pm_message_t { int event; int flags; } pm_message_t
- * or something similar soon.
+ * One transition is triggered by resume(), after a suspend() call; the
+ * message is implicit:
+ *
+ * ON Driver starts working again, responding to hardware events
+ * and software requests. The hardware may have gone through
+ * a power-off reset, or it may have maintained state from the
+ * previous suspend() which the driver will rely on while
+ * resuming. On most platforms, there are no restrictions on
+ * availability of resources like clocks during resume().
+ *
+ * Other transitions are triggered by messages sent using suspend(). All
+ * these transitions quiesce the driver, so that I/O queues are inactive.
+ * That commonly entails turning off IRQs and DMA; there may be rules
+ * about how to quiesce that are specific to the bus or the device's type.
+ * (For example, network drivers mark the link state.) Other details may
+ * differ according to the message:
+ *
+ * SUSPEND Quiesce, enter a low power device state appropriate for
+ * the upcoming system state (such as PCI_D3hot), and enable
+ * wakeup events as appropriate.
+ *
+ * FREEZE Quiesce operations so that a consistent image can be saved;
+ * but do NOT otherwise enter a low power device state, and do
+ * NOT emit system wakeup events.
+ *
+ * PRETHAW Quiesce as if for FREEZE; additionally, prepare for restoring
+ * the system from a snapshot taken after an earlier FREEZE.
+ * Some drivers will need to reset their hardware state instead
+ * of preserving it, to ensure that it's never mistaken for the
+ * state which that earlier snapshot had set up.
+ *
+ * A minimally power-aware driver treats all messages as SUSPEND, fully
+ * reinitializes its device during resume() -- whether or not it was reset
+ * during the suspend/resume cycle -- and can't issue wakeup events.
+ *
+ * More power-aware drivers may also use low power states at runtime as
+ * well as during system sleep states like PM_SUSPEND_STANDBY. They may
+ * be able to use wakeup events to exit from runtime low-power states,
+ * or from system low-power states such as standby or suspend-to-RAM.
*/
#define PM_EVENT_ON 0
#define PM_EVENT_FREEZE 1
#define PM_EVENT_SUSPEND 2
+#define PM_EVENT_PRETHAW 3
#define PMSG_FREEZE ((struct pm_message){ .event = PM_EVENT_FREEZE, })
+#define PMSG_PRETHAW ((struct pm_message){ .event = PM_EVENT_PRETHAW, })
#define PMSG_SUSPEND ((struct pm_message){ .event = PM_EVENT_SUSPEND, })
#define PMSG_ON ((struct pm_message){ .event = PM_EVENT_ON, })
@@ -190,6 +222,7 @@ extern void device_resume(void);
extern suspend_disk_method_t pm_disk_mode;
extern int device_suspend(pm_message_t state);
+extern int device_prepare_suspend(pm_message_t state);
#define device_set_wakeup_enable(dev,val) \
((dev)->power.should_wakeup = !!(val))
diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h
index a376bd4ade39..81e9299ca148 100644
--- a/include/linux/resume-trace.h
+++ b/include/linux/resume-trace.h
@@ -3,21 +3,25 @@
#ifdef CONFIG_PM_TRACE
+extern int pm_trace_enabled;
+
struct device;
extern void set_trace_device(struct device *);
extern void generate_resume_trace(void *tracedata, unsigned int user);
#define TRACE_DEVICE(dev) set_trace_device(dev)
-#define TRACE_RESUME(user) do { \
- void *tracedata; \
- asm volatile("movl $1f,%0\n" \
- ".section .tracedata,\"a\"\n" \
- "1:\t.word %c1\n" \
- "\t.long %c2\n" \
- ".previous" \
- :"=r" (tracedata) \
- : "i" (__LINE__), "i" (__FILE__)); \
- generate_resume_trace(tracedata, user); \
+#define TRACE_RESUME(user) do { \
+ if (pm_trace_enabled) { \
+ void *tracedata; \
+ asm volatile("movl $1f,%0\n" \
+ ".section .tracedata,\"a\"\n" \
+ "1:\t.word %c1\n" \
+ "\t.long %c2\n" \
+ ".previous" \
+ :"=r" (tracedata) \
+ : "i" (__LINE__), "i" (__FILE__)); \
+ generate_resume_trace(tracedata, user); \
+ } \
} while (0)
#else
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bf97b0900014..db2c1df4fef9 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -103,6 +103,14 @@ pte_t *page_check_address(struct page *, struct mm_struct *,
*/
unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
+/*
+ * Cleans the PTEs of shared mappings.
+ * (and since clean PTEs should also be readonly, write protects them too)
+ *
+ * returns the number of cleaned PTEs.
+ */
+int page_mkclean(struct page *);
+
#else /* !CONFIG_MMU */
#define anon_vma_init() do {} while (0)
@@ -112,6 +120,12 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
#define page_referenced(page,l) TestClearPageReferenced(page)
#define try_to_unmap(page, refs) SWAP_FAIL
+static inline int page_mkclean(struct page *page)
+{
+ return 0;
+}
+
+
#endif /* CONFIG_MMU */
/*
diff --git a/include/linux/selinux.h b/include/linux/selinux.h
index aad4e390d6a5..d1b7ca6c1c57 100644
--- a/include/linux/selinux.h
+++ b/include/linux/selinux.h
@@ -46,7 +46,7 @@ void selinux_audit_rule_free(struct selinux_audit_rule *rule);
/**
* selinux_audit_rule_match - determine if a context ID matches a rule.
- * @ctxid: the context ID to check
+ * @sid: the context ID to check
* @field: the field this rule refers to
* @op: the operater the rule uses
* @rule: pointer to the audit rule to check against
@@ -55,7 +55,7 @@ void selinux_audit_rule_free(struct selinux_audit_rule *rule);
* Returns 1 if the context id matches the rule, 0 if it does not, and
* -errno on failure.
*/
-int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
+int selinux_audit_rule_match(u32 sid, u32 field, u32 op,
struct selinux_audit_rule *rule,
struct audit_context *actx);
@@ -70,18 +70,8 @@ int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
void selinux_audit_set_callback(int (*callback)(void));
/**
- * selinux_task_ctxid - determine a context ID for a process.
- * @tsk: the task object
- * @ctxid: ID value returned via this
- *
- * On return, ctxid will contain an ID for the context. This value
- * should only be used opaquely.
- */
-void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid);
-
-/**
- * selinux_ctxid_to_string - map a security context ID to a string
- * @ctxid: security context ID to be converted.
+ * selinux_sid_to_string - map a security context ID to a string
+ * @sid: security context ID to be converted.
* @ctx: address of context string to be returned
* @ctxlen: length of returned context string.
*
@@ -89,7 +79,7 @@ void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid);
* string will be allocated internally, and the caller must call
* kfree() on it after use.
*/
-int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen);
+int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen);
/**
* selinux_get_inode_sid - get the inode's security context ID
@@ -154,7 +144,7 @@ static inline void selinux_audit_rule_free(struct selinux_audit_rule *rule)
return;
}
-static inline int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
+static inline int selinux_audit_rule_match(u32 sid, u32 field, u32 op,
struct selinux_audit_rule *rule,
struct audit_context *actx)
{
@@ -166,12 +156,7 @@ static inline void selinux_audit_set_callback(int (*callback)(void))
return;
}
-static inline void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid)
-{
- *ctxid = 0;
-}
-
-static inline int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen)
+static inline int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen)
{
*ctx = NULL;
*ctxlen = 0;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 45ad55b70d1c..66d6eb78d1c6 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -67,7 +67,6 @@ extern void *kmem_cache_zalloc(struct kmem_cache *, gfp_t);
extern void kmem_cache_free(kmem_cache_t *, void *);
extern unsigned int kmem_cache_size(kmem_cache_t *);
extern const char *kmem_cache_name(kmem_cache_t *);
-extern kmem_cache_t *kmem_find_general_cachep(size_t size, gfp_t gfpflags);
/* Size description struct for general caches. */
struct cache_sizes {
@@ -203,7 +202,30 @@ extern int slab_is_available(void);
#ifdef CONFIG_NUMA
extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t flags, int node);
-extern void *kmalloc_node(size_t size, gfp_t flags, int node);
+extern void *__kmalloc_node(size_t size, gfp_t flags, int node);
+
+static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
+{
+ if (__builtin_constant_p(size)) {
+ int i = 0;
+#define CACHE(x) \
+ if (size <= x) \
+ goto found; \
+ else \
+ i++;
+#include "kmalloc_sizes.h"
+#undef CACHE
+ {
+ extern void __you_cannot_kmalloc_that_much(void);
+ __you_cannot_kmalloc_that_much();
+ }
+found:
+ return kmem_cache_alloc_node((flags & GFP_DMA) ?
+ malloc_sizes[i].cs_dmacachep :
+ malloc_sizes[i].cs_cachep, flags, node);
+ }
+ return __kmalloc_node(size, flags, node);
+}
#else
static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int node)
{
@@ -223,7 +245,6 @@ extern int FASTCALL(kmem_ptr_validate(kmem_cache_t *cachep, void *ptr));
/* SLOB allocator routines */
void kmem_cache_init(void);
-struct kmem_cache *kmem_find_general_cachep(size_t, gfp_t gfpflags);
struct kmem_cache *kmem_cache_create(const char *c, size_t, size_t,
unsigned long,
void (*)(void *, struct kmem_cache *, unsigned long),
@@ -263,8 +284,6 @@ extern kmem_cache_t *fs_cachep;
extern kmem_cache_t *sighand_cachep;
extern kmem_cache_t *bio_cachep;
-extern atomic_t slab_reclaim_pages;
-
#endif /* __KERNEL__ */
#endif /* _LINUX_SLAB_H */
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 837e8bce1349..51649987f691 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -53,6 +53,9 @@ extern void smp_cpus_done(unsigned int max_cpus);
*/
int smp_call_function(void(*func)(void *info), void *info, int retry, int wait);
+int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
+ int retry, int wait);
+
/*
* Call a function on all processors
*/
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 96e31aa64cc7..b1237f16ecde 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -10,29 +10,11 @@
#include <linux/pm.h>
/* page backup entry */
-typedef struct pbe {
+struct pbe {
unsigned long address; /* address of the copy */
unsigned long orig_address; /* original address of page */
struct pbe *next;
-} suspend_pagedir_t;
-
-#define for_each_pbe(pbe, pblist) \
- for (pbe = pblist ; pbe ; pbe = pbe->next)
-
-#define PBES_PER_PAGE (PAGE_SIZE/sizeof(struct pbe))
-#define PB_PAGE_SKIP (PBES_PER_PAGE-1)
-
-#define for_each_pb_page(pbe, pblist) \
- for (pbe = pblist ; pbe ; pbe = (pbe+PB_PAGE_SKIP)->next)
-
-
-#define SWAP_FILENAME_MAXLENGTH 32
-
-
-extern dev_t swsusp_resume_device;
-
-/* mm/vmscan.c */
-extern int shrink_mem(void);
+};
/* mm/page_alloc.c */
extern void drain_local_pages(void);
@@ -53,18 +35,10 @@ static inline void pm_restore_console(void) {}
static inline int software_suspend(void)
{
printk("Warning: fake suspend called\n");
- return -EPERM;
+ return -ENOSYS;
}
#endif /* CONFIG_PM */
-#ifdef CONFIG_SUSPEND_SMP
-extern void disable_nonboot_cpus(void);
-extern void enable_nonboot_cpus(void);
-#else
-static inline void disable_nonboot_cpus(void) {}
-static inline void enable_nonboot_cpus(void) {}
-#endif
-
void save_processor_state(void);
void restore_processor_state(void);
struct saved_context;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 5e59184c9096..e7c36ba2a2db 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -10,6 +10,10 @@
#include <asm/atomic.h>
#include <asm/page.h>
+struct notifier_block;
+
+struct bio;
+
#define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */
#define SWAP_FLAG_PRIO_MASK 0x7fff
#define SWAP_FLAG_PRIO_SHIFT 0
@@ -156,13 +160,14 @@ struct swap_list_t {
/* linux/mm/oom_kill.c */
extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
+extern int register_oom_notifier(struct notifier_block *nb);
+extern int unregister_oom_notifier(struct notifier_block *nb);
/* linux/mm/memory.c */
extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *);
/* linux/mm/page_alloc.c */
extern unsigned long totalram_pages;
-extern unsigned long totalhigh_pages;
extern unsigned long totalreserve_pages;
extern long nr_swap_pages;
extern unsigned int nr_free_pages(void);
@@ -190,6 +195,7 @@ extern long vm_total_pages;
#ifdef CONFIG_NUMA
extern int zone_reclaim_mode;
extern int sysctl_min_unmapped_ratio;
+extern int sysctl_min_slab_ratio;
extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
#else
#define zone_reclaim_mode 0
@@ -212,7 +218,9 @@ extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *);
/* linux/mm/page_io.c */
extern int swap_readpage(struct file *, struct page *);
extern int swap_writepage(struct page *page, struct writeback_control *wbc);
-extern int rw_swap_page_sync(int, swp_entry_t, struct page *);
+extern int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page,
+ struct bio **bio_chain);
+extern int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err);
/* linux/mm/swap_state.c */
extern struct address_space swapper_space;
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 432778446ad2..1b24bd45e080 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -193,6 +193,7 @@ enum
VM_MIN_UNMAPPED=32, /* Set min percent of unmapped pages */
VM_PANIC_ON_OOM=33, /* panic at out-of-memory */
VM_VDSO_ENABLED=34, /* map VDSO into new processes? */
+ VM_MIN_SLAB=35, /* Percent pages ignored by zone reclaim */
};
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 1ea5d3cda6ae..6d5c43d31dec 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -10,6 +10,7 @@
#ifndef _SYSFS_H_
#define _SYSFS_H_
+#include <linux/compiler.h>
#include <asm/atomic.h>
struct kobject;
@@ -86,40 +87,44 @@ struct sysfs_dirent {
#ifdef CONFIG_SYSFS
-extern int
+extern int __must_check
sysfs_create_dir(struct kobject *);
extern void
sysfs_remove_dir(struct kobject *);
-extern int
+extern int __must_check
sysfs_rename_dir(struct kobject *, const char *new_name);
-extern int
+extern int __must_check
sysfs_create_file(struct kobject *, const struct attribute *);
-extern int
+extern int __must_check
sysfs_update_file(struct kobject *, const struct attribute *);
-extern int
+extern int __must_check
sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode);
extern void
sysfs_remove_file(struct kobject *, const struct attribute *);
-extern int
+extern int __must_check
sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name);
extern void
sysfs_remove_link(struct kobject *, const char * name);
-int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr);
-int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr);
+int __must_check sysfs_create_bin_file(struct kobject *kobj,
+ struct bin_attribute *attr);
+void sysfs_remove_bin_file(struct kobject *kobj, struct bin_attribute *attr);
-int sysfs_create_group(struct kobject *, const struct attribute_group *);
+int __must_check sysfs_create_group(struct kobject *,
+ const struct attribute_group *);
void sysfs_remove_group(struct kobject *, const struct attribute_group *);
void sysfs_notify(struct kobject * k, char *dir, char *attr);
+extern int __must_check sysfs_init(void);
+
#else /* CONFIG_SYSFS */
static inline int sysfs_create_dir(struct kobject * k)
@@ -191,6 +196,11 @@ static inline void sysfs_notify(struct kobject * k, char *dir, char *attr)
{
}
+static inline int __must_check sysfs_init(void)
+{
+ return 0;
+}
+
#endif /* CONFIG_SYSFS */
#endif /* _SYSFS_H_ */
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 71b6363caaaf..dee88c6b6fa7 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -44,8 +44,6 @@ extern void *vmalloc_32_user(unsigned long size);
extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask,
pgprot_t prot);
-extern void *__vmalloc_node(unsigned long size, gfp_t gfp_mask,
- pgprot_t prot, int node);
extern void vfree(void *addr);
extern void *vmap(struct page **pages, unsigned int count,
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 2d9b1b60798a..176c7f797339 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -18,7 +18,19 @@
* generated will simply be the increment of a global address.
*/
-#define FOR_ALL_ZONES(x) x##_DMA, x##_DMA32, x##_NORMAL, x##_HIGH
+#ifdef CONFIG_ZONE_DMA32
+#define DMA32_ZONE(xx) xx##_DMA32,
+#else
+#define DMA32_ZONE(xx)
+#endif
+
+#ifdef CONFIG_HIGHMEM
+#define HIGHMEM_ZONE(xx) , xx##_HIGH
+#else
+#define HIGHMEM_ZONE(xx)
+#endif
+
+#define FOR_ALL_ZONES(xx) xx##_DMA, DMA32_ZONE(xx) xx##_NORMAL HIGHMEM_ZONE(xx)
enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
FOR_ALL_ZONES(PGALLOC),
@@ -124,12 +136,10 @@ static inline unsigned long node_page_state(int node,
struct zone *zones = NODE_DATA(node)->node_zones;
return
-#ifndef CONFIG_DMA_IS_NORMAL
-#if !defined(CONFIG_DMA_IS_DMA32) && BITS_PER_LONG >= 64
+#ifdef CONFIG_ZONE_DMA32
zone_page_state(&zones[ZONE_DMA32], item) +
#endif
zone_page_state(&zones[ZONE_NORMAL], item) +
-#endif
#ifdef CONFIG_HIGHMEM
zone_page_state(&zones[ZONE_HIGHMEM], item) +
#endif
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 0422036af4eb..56a23a0e7f2e 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -116,6 +116,7 @@ int sync_page_range(struct inode *inode, struct address_space *mapping,
loff_t pos, loff_t count);
int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
loff_t pos, loff_t count);
+void set_page_dirty_balance(struct page *page);
/* pdflush.c */
extern int nr_pdflush_threads; /* Global so it can be exported to sysctl