From 67c457a8c378a006a34d92f9bd3078a80a92f250 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 14 Apr 2009 07:50:56 -0400 Subject: jbd2: use SWRITE_SYNC_PLUG when writing synchronous revoke records The revoke records must be written using the same way as the rest of the blocks during the commit process; that is, either marked as synchronous writes or as asynchornous writes. Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 8815a3456b3b..cc02393bfce8 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1193,7 +1193,8 @@ extern int jbd2_journal_init_revoke_caches(void); extern void jbd2_journal_destroy_revoke(journal_t *); extern int jbd2_journal_revoke (handle_t *, unsigned long long, struct buffer_head *); extern int jbd2_journal_cancel_revoke(handle_t *, struct journal_head *); -extern void jbd2_journal_write_revoke_records(journal_t *, transaction_t *); +extern void jbd2_journal_write_revoke_records(journal_t *, + transaction_t *, int); /* Recovery revoke support */ extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t); -- cgit v1.2.3-59-g8ed1b From 38d726d153cfe5efe5fe22d28d36ab382dda3a5c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 14 Apr 2009 10:10:47 -0400 Subject: jbd: use SWRITE_SYNC_PLUG when writing synchronous revoke records The revoke records must be written using the same way as the rest of the blocks during the commit process; that is, either marked as synchronous writes or as asynchornous writes. Signed-off-by: "Theodore Ts'o" --- fs/jbd/commit.c | 2 +- fs/jbd/revoke.c | 20 +++++++++++--------- include/linux/jbd.h | 3 ++- 3 files changed, 14 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index a8e8513a78a9..06560c520f49 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -502,7 +502,7 @@ void journal_commit_transaction(journal_t *journal) err = 0; } - journal_write_revoke_records(journal, commit_transaction); + journal_write_revoke_records(journal, commit_transaction, write_op); /* * If we found any dirty or locked buffers, then we should have diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index c7bd649bbbdc..1b1a06e1c836 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -67,6 +67,7 @@ #include #include #include +#include #endif #include @@ -99,8 +100,8 @@ struct jbd_revoke_table_s #ifdef __KERNEL__ static void write_one_revoke_record(journal_t *, transaction_t *, struct journal_head **, int *, - struct jbd_revoke_record_s *); -static void flush_descriptor(journal_t *, struct journal_head *, int); + struct jbd_revoke_record_s *, int); +static void flush_descriptor(journal_t *, struct journal_head *, int, int); #endif /* Utility functions to maintain the revoke table */ @@ -486,7 +487,7 @@ void journal_switch_revoke_table(journal_t *journal) */ void journal_write_revoke_records(journal_t *journal, - transaction_t *transaction) + transaction_t *transaction, int write_op) { struct journal_head *descriptor; struct jbd_revoke_record_s *record; @@ -510,14 +511,14 @@ void journal_write_revoke_records(journal_t *journal, hash_list->next; write_one_revoke_record(journal, transaction, &descriptor, &offset, - record); + record, write_op); count++; list_del(&record->hash); kmem_cache_free(revoke_record_cache, record); } } if (descriptor) - flush_descriptor(journal, descriptor, offset); + flush_descriptor(journal, descriptor, offset, write_op); jbd_debug(1, "Wrote %d revoke records\n", count); } @@ -530,7 +531,8 @@ static void write_one_revoke_record(journal_t *journal, transaction_t *transaction, struct journal_head **descriptorp, int *offsetp, - struct jbd_revoke_record_s *record) + struct jbd_revoke_record_s *record, + int write_op) { struct journal_head *descriptor; int offset; @@ -549,7 +551,7 @@ static void write_one_revoke_record(journal_t *journal, /* Make sure we have a descriptor with space left for the record */ if (descriptor) { if (offset == journal->j_blocksize) { - flush_descriptor(journal, descriptor, offset); + flush_descriptor(journal, descriptor, offset, write_op); descriptor = NULL; } } @@ -586,7 +588,7 @@ static void write_one_revoke_record(journal_t *journal, static void flush_descriptor(journal_t *journal, struct journal_head *descriptor, - int offset) + int offset, int write_op) { journal_revoke_header_t *header; struct buffer_head *bh = jh2bh(descriptor); @@ -601,7 +603,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(bh); BUFFER_TRACE(bh, "write"); set_buffer_dirty(bh); - ll_rw_block(SWRITE, 1, &bh); + ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); } #endif diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 53ae4399da2d..c2049a04fa0b 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -978,7 +978,8 @@ extern void journal_destroy_revoke(journal_t *); extern int journal_revoke (handle_t *, unsigned long, struct buffer_head *); extern int journal_cancel_revoke(handle_t *, struct journal_head *); -extern void journal_write_revoke_records(journal_t *, transaction_t *); +extern void journal_write_revoke_records(journal_t *, + transaction_t *, int); /* Recovery revoke support */ extern int journal_set_revoke(journal_t *, unsigned long, tid_t); -- cgit v1.2.3-59-g8ed1b From 412401029259b1ad67559cec93bcc7ee4a9551aa Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 16 Apr 2009 09:58:44 -0600 Subject: powerpc/5200: Bring the legacy fsl_spi_platform_data hooks back In commit 364fdbc00fbdd409ade63500710123fe323aa164 ("spi_mpc83xx: rework chip selects handling"), I merged activate_cs and deactivate_cs hooks into cs_control, but I overlooked that mpc52xx_psc_spi driver is using these hooks too. And that resulted in the following build failure: CC drivers/spi/mpc52xx_psc_spi.o drivers/spi/mpc52xx_psc_spi.c: In function 'mpc52xx_psc_spi_do_probe': drivers/spi/mpc52xx_psc_spi.c:398: error: 'struct fsl_spi_platform_data' has no member named 'activate_cs' drivers/spi/mpc52xx_psc_spi.c:399: error: 'struct fsl_spi_platform_data' has no member named 'deactivate_cs' make[2]: *** [drivers/spi/mpc52xx_psc_spi.o] Error 1 This patch simply adds the legacy hooks back for 2.6.30, and for 2.6.31 we'll convert the driver to ->cs_control. Reported-by: Subrata Modak Signed-off-by: Anton Vorontsov Signed-off-by: Grant Likely --- include/linux/fsl_devices.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index f2a78b5e8b55..0cde1806cfab 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -83,6 +83,10 @@ struct fsl_spi_platform_data { u16 max_chipselect; void (*cs_control)(struct spi_device *spi, bool on); u32 sysclk; + + /* Legacy hooks, used by mpc52xx_psc_spi driver. */ + void (*activate_cs)(u8 cs, u8 polarity); + void (*deactivate_cs)(u8 cs, u8 polarity); }; struct mpc8xx_pcmcia_ops { -- cgit v1.2.3-59-g8ed1b From 88bea188b85f9cefefbbd56b8a48d0f798409177 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 21 Apr 2009 00:35:47 -0400 Subject: ACPI: add /sys/firmware/acpi/interrupts/sci_not counter This counter may prove useful in debugging some spurious interrupt issues seen in the field. Signed-off-by: Len Brown --- Documentation/ABI/testing/sysfs-firmware-acpi | 8 ++++++-- drivers/acpi/osl.c | 4 +++- drivers/acpi/system.c | 11 +++++++++-- include/linux/acpi.h | 1 + 4 files changed, 19 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-firmware-acpi b/Documentation/ABI/testing/sysfs-firmware-acpi index e8ffc70ffe12..4f9ba3c2fca7 100644 --- a/Documentation/ABI/testing/sysfs-firmware-acpi +++ b/Documentation/ABI/testing/sysfs-firmware-acpi @@ -69,9 +69,13 @@ Description: gpe1F: 0 invalid gpe_all: 1192 sci: 1194 + sci_not: 0 - sci - The total number of times the ACPI SCI - has claimed an interrupt. + sci - The number of times the ACPI SCI + has been called and claimed an interrupt. + + sci_not - The number of times the ACPI SCI + has been called and NOT claimed an interrupt. gpe_all - count of SCI caused by GPEs. diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index d59f08ecaf16..d916bea729f1 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -353,8 +353,10 @@ static irqreturn_t acpi_irq(int irq, void *dev_id) if (handled) { acpi_irq_handled++; return IRQ_HANDLED; - } else + } else { + acpi_irq_not_handled++; return IRQ_NONE; + } } acpi_status diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c index da51f05ef8d8..0944daec064f 100644 --- a/drivers/acpi/system.c +++ b/drivers/acpi/system.c @@ -38,6 +38,7 @@ ACPI_MODULE_NAME("system"); #define ACPI_SYSTEM_DEVICE_NAME "System" u32 acpi_irq_handled; +u32 acpi_irq_not_handled; /* * Make ACPICA version work as module param @@ -214,8 +215,9 @@ err: #define COUNT_GPE 0 #define COUNT_SCI 1 /* acpi_irq_handled */ -#define COUNT_ERROR 2 /* other */ -#define NUM_COUNTERS_EXTRA 3 +#define COUNT_SCI_NOT 2 /* acpi_irq_not_handled */ +#define COUNT_ERROR 3 /* other */ +#define NUM_COUNTERS_EXTRA 4 struct event_counter { u32 count; @@ -317,6 +319,8 @@ static ssize_t counter_show(struct kobject *kobj, all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_SCI].count = acpi_irq_handled; + all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_SCI_NOT].count = + acpi_irq_not_handled; all_counters[num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_GPE].count = acpi_gpe_count; @@ -363,6 +367,7 @@ static ssize_t counter_set(struct kobject *kobj, all_counters[i].count = 0; acpi_gpe_count = 0; acpi_irq_handled = 0; + acpi_irq_not_handled = 0; goto end; } @@ -456,6 +461,8 @@ void acpi_irq_stats_init(void) sprintf(buffer, "gpe_all"); else if (i == num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_SCI) sprintf(buffer, "sci"); + else if (i == num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_SCI_NOT) + sprintf(buffer, "sci_not"); else if (i == num_gpes + ACPI_NUM_FIXED_EVENTS + COUNT_ERROR) sprintf(buffer, "error"); else diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6586cbd0d4af..88be890ee3c7 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -111,6 +111,7 @@ int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base); int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base); void acpi_irq_stats_init(void); extern u32 acpi_irq_handled; +extern u32 acpi_irq_not_handled; extern struct acpi_mcfg_allocation *pci_mmcfg_config; extern int pci_mmcfg_config_num; -- cgit v1.2.3-59-g8ed1b From 451a9ebf653d28337ba53ed5b4b70b0b9543cca1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 15 Apr 2009 19:50:51 +0200 Subject: bio: fix bio_kmalloc() Impact: fix bio_kmalloc() and its destruction path bio_kmalloc() was broken in two ways. * bvec_alloc_bs() first allocates bvec using kmalloc() and then ignores it and allocates again like non-kmalloc bvecs. * bio_kmalloc_destructor() didn't check for and free bio integrity data. This patch fixes the above problems. kmalloc patch is separated out from bio_alloc_bioset() and allocates the requested number of bvecs as inline bvecs. * bio_alloc_bioset() no longer takes NULL @bs. None other than bio_kmalloc() used it and outside users can't know how it was allocated anyway. * Define and use BIO_POOL_NONE so that pool index check in bvec_free_bs() triggers if inline or kmalloc allocated bvec gets there. * Relocate destructors on top of each allocation function so that how they're used is more clear. Jens Axboe suggested allocating bvecs inline. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- fs/bio.c | 118 ++++++++++++++++++++++++---------------------------- include/linux/bio.h | 1 + 2 files changed, 55 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/fs/bio.c b/fs/bio.c index cd42bb882f30..d35588fd6d57 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -174,14 +174,6 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, { struct bio_vec *bvl; - /* - * If 'bs' is given, lookup the pool and do the mempool alloc. - * If not, this is a bio_kmalloc() allocation and just do a - * kzalloc() for the exact number of vecs right away. - */ - if (!bs) - bvl = kmalloc(nr * sizeof(struct bio_vec), gfp_mask); - /* * see comment near bvec_array define! */ @@ -260,21 +252,6 @@ void bio_free(struct bio *bio, struct bio_set *bs) mempool_free(p, bs->bio_pool); } -/* - * default destructor for a bio allocated with bio_alloc_bioset() - */ -static void bio_fs_destructor(struct bio *bio) -{ - bio_free(bio, fs_bio_set); -} - -static void bio_kmalloc_destructor(struct bio *bio) -{ - if (bio_has_allocated_vec(bio)) - kfree(bio->bi_io_vec); - kfree(bio); -} - void bio_init(struct bio *bio) { memset(bio, 0, sizeof(*bio)); @@ -301,21 +278,15 @@ void bio_init(struct bio *bio) **/ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) { + unsigned long idx = BIO_POOL_NONE; struct bio_vec *bvl = NULL; - struct bio *bio = NULL; - unsigned long idx = 0; - void *p = NULL; - - if (bs) { - p = mempool_alloc(bs->bio_pool, gfp_mask); - if (!p) - goto err; - bio = p + bs->front_pad; - } else { - bio = kmalloc(sizeof(*bio), gfp_mask); - if (!bio) - goto err; - } + struct bio *bio; + void *p; + + p = mempool_alloc(bs->bio_pool, gfp_mask); + if (unlikely(!p)) + return NULL; + bio = p + bs->front_pad; bio_init(bio); @@ -332,22 +303,50 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) nr_iovecs = bvec_nr_vecs(idx); } +out_set: bio->bi_flags |= idx << BIO_POOL_OFFSET; bio->bi_max_vecs = nr_iovecs; -out_set: bio->bi_io_vec = bvl; - return bio; err_free: - if (bs) - mempool_free(p, bs->bio_pool); - else - kfree(bio); -err: + mempool_free(p, bs->bio_pool); return NULL; } +static void bio_fs_destructor(struct bio *bio) +{ + bio_free(bio, fs_bio_set); +} + +/** + * bio_alloc - allocate a new bio, memory pool backed + * @gfp_mask: allocation mask to use + * @nr_iovecs: number of iovecs + * + * Allocate a new bio with @nr_iovecs bvecs. If @gfp_mask + * contains __GFP_WAIT, the allocation is guaranteed to succeed. + * + * RETURNS: + * Pointer to new bio on success, NULL on failure. + */ +struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) +{ + struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); + + if (bio) + bio->bi_destructor = bio_fs_destructor; + + return bio; +} + +static void bio_kmalloc_destructor(struct bio *bio) +{ + if (bio_integrity(bio)) + bio_integrity_free(bio); + kfree(bio); +} + /** * bio_alloc - allocate a bio for I/O * @gfp_mask: the GFP_ mask given to the slab allocator @@ -366,29 +365,20 @@ err: * do so can cause livelocks under memory pressure. * **/ -struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) -{ - struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); - - if (bio) - bio->bi_destructor = bio_fs_destructor; - - return bio; -} - -/* - * Like bio_alloc(), but doesn't use a mempool backing. This means that - * it CAN fail, but while bio_alloc() can only be used for allocations - * that have a short (finite) life span, bio_kmalloc() should be used - * for more permanent bio allocations (like allocating some bio's for - * initalization or setup purposes). - */ struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs) { - struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); + struct bio *bio; - if (bio) - bio->bi_destructor = bio_kmalloc_destructor; + bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec), + gfp_mask); + if (unlikely(!bio)) + return NULL; + + bio_init(bio); + bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET; + bio->bi_max_vecs = nr_iovecs; + bio->bi_io_vec = bio->bi_inline_vecs; + bio->bi_destructor = bio_kmalloc_destructor; return bio; } diff --git a/include/linux/bio.h b/include/linux/bio.h index b89cf2d82898..7b214fd672a2 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -132,6 +132,7 @@ struct bio { * top 4 bits of bio flags indicate the pool this bio came from */ #define BIO_POOL_BITS (4) +#define BIO_POOL_NONE ((1UL << BIO_POOL_BITS) - 1) #define BIO_POOL_OFFSET (BITS_PER_LONG - BIO_POOL_BITS) #define BIO_POOL_MASK (1UL << BIO_POOL_OFFSET) #define BIO_POOL_IDX(bio) ((bio)->bi_flags >> BIO_POOL_OFFSET) -- cgit v1.2.3-59-g8ed1b From 71982a409f12c50d011325a4471aa20666bb908d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 17 Apr 2009 08:34:48 +0200 Subject: block: include empty disks in /proc/diskstats /proc/diskstats used to show stats for all disks whether they're zero-sized or not and their non-zero partitions. Commit 074a7aca7afa6f230104e8e65eba3420263714a5 accidentally changed the behavior such that it doesn't print out zero sized disks. This patch implements DISK_PITER_INCL_EMPTY_PART0 flag to partition iterator and uses it in diskstats_show() such that empty part0 is shown in /proc/diskstats. Reported and bisectd by Dianel Collins. Signed-off-by: Tejun Heo Reported-by: Daniel Collins Signed-off-by: Jens Axboe --- block/genhd.c | 12 ++++++++---- include/linux/genhd.h | 1 + 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/block/genhd.c b/block/genhd.c index a9ec910974c1..1a4916e01732 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -98,7 +98,7 @@ void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk, if (flags & DISK_PITER_REVERSE) piter->idx = ptbl->len - 1; - else if (flags & DISK_PITER_INCL_PART0) + else if (flags & (DISK_PITER_INCL_PART0 | DISK_PITER_INCL_EMPTY_PART0)) piter->idx = 0; else piter->idx = 1; @@ -134,7 +134,8 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) /* determine iteration parameters */ if (piter->flags & DISK_PITER_REVERSE) { inc = -1; - if (piter->flags & DISK_PITER_INCL_PART0) + if (piter->flags & (DISK_PITER_INCL_PART0 | + DISK_PITER_INCL_EMPTY_PART0)) end = -1; else end = 0; @@ -150,7 +151,10 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) part = rcu_dereference(ptbl->part[piter->idx]); if (!part) continue; - if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects) + if (!part->nr_sects && + !(piter->flags & DISK_PITER_INCL_EMPTY) && + !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && + piter->idx == 0)) continue; get_device(part_to_dev(part)); @@ -1011,7 +1015,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) "\n\n"); */ - disk_part_iter_init(&piter, gp, DISK_PITER_INCL_PART0); + disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0); while ((hd = disk_part_iter_next(&piter))) { cpu = part_stat_lock(); part_round_stats(cpu, hd); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 634c53028fb8..a1a28caed23d 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -214,6 +214,7 @@ static inline void disk_put_part(struct hd_struct *part) #define DISK_PITER_REVERSE (1 << 0) /* iterate in the reverse direction */ #define DISK_PITER_INCL_EMPTY (1 << 1) /* include 0-sized parts */ #define DISK_PITER_INCL_PART0 (1 << 2) /* include partition 0 */ +#define DISK_PITER_INCL_EMPTY_PART0 (1 << 3) /* include empty partition 0 */ struct disk_part_iter { struct gendisk *disk; -- cgit v1.2.3-59-g8ed1b From 4cd481f68dde99ac416003b825c835f71e364393 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 13 Apr 2009 11:59:32 +0200 Subject: KVM: Fix overlapping check for memory slots When checking for overlapping slots on registration of a new one, kvm currently also considers zero-length (ie. deleted) slots and rejects requests incorrectly. This finally denies user space from joining slots. Fix the check by skipping deleted slots and advertise this via a KVM_CAP_JOIN_MEMORY_REGIONS_WORKS. Cc: stable@kernel.org Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- include/linux/kvm.h | 2 ++ virt/kvm/kvm_main.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 311a073afe8a..8cc137911b34 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -409,6 +409,8 @@ struct kvm_trace_rec { #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT #define KVM_CAP_DEVICE_DEASSIGNMENT 27 #endif +/* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ +#define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 28d693a1ee8f..1ecbe2391c8b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -961,7 +961,7 @@ int __kvm_set_memory_region(struct kvm *kvm, for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { struct kvm_memory_slot *s = &kvm->memslots[i]; - if (s == memslot) + if (s == memslot || !s->npages) continue; if (!((base_gfn + npages <= s->base_gfn) || (base_gfn >= s->base_gfn + s->npages))) @@ -1983,6 +1983,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) switch (arg) { case KVM_CAP_USER_MEMORY: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: + case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: return 1; #ifdef CONFIG_HAVE_KVM_IRQCHIP case KVM_CAP_IRQ_ROUTING: -- cgit v1.2.3-59-g8ed1b From 1b6b8ce2ac372ea1f2065b89228ede105eb68dc5 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Thu, 9 Apr 2009 14:57:39 +0800 Subject: PCI: only save/restore existent registers in the PCIe capability PCIe 1.1 base neither requires the endpoint to implement the entire PCIe capability structure nor specifies default values of registers that are not implemented by the device. So we only save and restore registers that must be implemented by different device types if the device PCIe capability version is 1. PCIe 1.1 Capability Structure Expansion ECN and PCIe 2.0 requires all registers in the PCIe capability to be either implemented or hardwired to 0. Their PCIe capability version is 2. Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 70 ++++++++++++++++++++++++++++++++++++++---------- include/linux/pci_regs.h | 1 + 2 files changed, 57 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 16fd0d4c3166..34bf0fdf5047 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -681,11 +681,34 @@ EXPORT_SYMBOL(pci_choose_state); #define PCI_EXP_SAVE_REGS 7 +#define pcie_cap_has_devctl(type, flags) 1 +#define pcie_cap_has_lnkctl(type, flags) \ + ((flags & PCI_EXP_FLAGS_VERS) > 1 || \ + (type == PCI_EXP_TYPE_ROOT_PORT || \ + type == PCI_EXP_TYPE_ENDPOINT || \ + type == PCI_EXP_TYPE_LEG_END)) +#define pcie_cap_has_sltctl(type, flags) \ + ((flags & PCI_EXP_FLAGS_VERS) > 1 || \ + ((type == PCI_EXP_TYPE_ROOT_PORT) || \ + (type == PCI_EXP_TYPE_DOWNSTREAM && \ + (flags & PCI_EXP_FLAGS_SLOT)))) +#define pcie_cap_has_rtctl(type, flags) \ + ((flags & PCI_EXP_FLAGS_VERS) > 1 || \ + (type == PCI_EXP_TYPE_ROOT_PORT || \ + type == PCI_EXP_TYPE_RC_EC)) +#define pcie_cap_has_devctl2(type, flags) \ + ((flags & PCI_EXP_FLAGS_VERS) > 1) +#define pcie_cap_has_lnkctl2(type, flags) \ + ((flags & PCI_EXP_FLAGS_VERS) > 1) +#define pcie_cap_has_sltctl2(type, flags) \ + ((flags & PCI_EXP_FLAGS_VERS) > 1) + static int pci_save_pcie_state(struct pci_dev *dev) { int pos, i = 0; struct pci_cap_saved_state *save_state; u16 *cap; + u16 flags; pos = pci_find_capability(dev, PCI_CAP_ID_EXP); if (pos <= 0) @@ -698,13 +721,22 @@ static int pci_save_pcie_state(struct pci_dev *dev) } cap = (u16 *)&save_state->data[0]; - pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &cap[i++]); - pci_read_config_word(dev, pos + PCI_EXP_LNKCTL, &cap[i++]); - pci_read_config_word(dev, pos + PCI_EXP_SLTCTL, &cap[i++]); - pci_read_config_word(dev, pos + PCI_EXP_RTCTL, &cap[i++]); - pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &cap[i++]); - pci_read_config_word(dev, pos + PCI_EXP_LNKCTL2, &cap[i++]); - pci_read_config_word(dev, pos + PCI_EXP_SLTCTL2, &cap[i++]); + pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &flags); + + if (pcie_cap_has_devctl(dev->pcie_type, flags)) + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &cap[i++]); + if (pcie_cap_has_lnkctl(dev->pcie_type, flags)) + pci_read_config_word(dev, pos + PCI_EXP_LNKCTL, &cap[i++]); + if (pcie_cap_has_sltctl(dev->pcie_type, flags)) + pci_read_config_word(dev, pos + PCI_EXP_SLTCTL, &cap[i++]); + if (pcie_cap_has_rtctl(dev->pcie_type, flags)) + pci_read_config_word(dev, pos + PCI_EXP_RTCTL, &cap[i++]); + if (pcie_cap_has_devctl2(dev->pcie_type, flags)) + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, &cap[i++]); + if (pcie_cap_has_lnkctl2(dev->pcie_type, flags)) + pci_read_config_word(dev, pos + PCI_EXP_LNKCTL2, &cap[i++]); + if (pcie_cap_has_sltctl2(dev->pcie_type, flags)) + pci_read_config_word(dev, pos + PCI_EXP_SLTCTL2, &cap[i++]); return 0; } @@ -714,6 +746,7 @@ static void pci_restore_pcie_state(struct pci_dev *dev) int i = 0, pos; struct pci_cap_saved_state *save_state; u16 *cap; + u16 flags; save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP); pos = pci_find_capability(dev, PCI_CAP_ID_EXP); @@ -721,13 +754,22 @@ static void pci_restore_pcie_state(struct pci_dev *dev) return; cap = (u16 *)&save_state->data[0]; - pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, cap[i++]); - pci_write_config_word(dev, pos + PCI_EXP_LNKCTL, cap[i++]); - pci_write_config_word(dev, pos + PCI_EXP_SLTCTL, cap[i++]); - pci_write_config_word(dev, pos + PCI_EXP_RTCTL, cap[i++]); - pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, cap[i++]); - pci_write_config_word(dev, pos + PCI_EXP_LNKCTL2, cap[i++]); - pci_write_config_word(dev, pos + PCI_EXP_SLTCTL2, cap[i++]); + pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &flags); + + if (pcie_cap_has_devctl(dev->pcie_type, flags)) + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, cap[i++]); + if (pcie_cap_has_lnkctl(dev->pcie_type, flags)) + pci_write_config_word(dev, pos + PCI_EXP_LNKCTL, cap[i++]); + if (pcie_cap_has_sltctl(dev->pcie_type, flags)) + pci_write_config_word(dev, pos + PCI_EXP_SLTCTL, cap[i++]); + if (pcie_cap_has_rtctl(dev->pcie_type, flags)) + pci_write_config_word(dev, pos + PCI_EXP_RTCTL, cap[i++]); + if (pcie_cap_has_devctl2(dev->pcie_type, flags)) + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, cap[i++]); + if (pcie_cap_has_lnkctl2(dev->pcie_type, flags)) + pci_write_config_word(dev, pos + PCI_EXP_LNKCTL2, cap[i++]); + if (pcie_cap_has_sltctl2(dev->pcie_type, flags)) + pci_write_config_word(dev, pos + PCI_EXP_SLTCTL2, cap[i++]); } diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index e4d08c1b2e0b..616bf8b3c8b5 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -376,6 +376,7 @@ #define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */ #define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCI/PCI-X Bridge */ #define PCI_EXP_TYPE_RC_END 0x9 /* Root Complex Integrated Endpoint */ +#define PCI_EXP_TYPE_RC_EC 0x10 /* Root Complex Event Collector */ #define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ #define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ #define PCI_EXP_DEVCAP 4 /* Device capabilities */ -- cgit v1.2.3-59-g8ed1b From 952043ac12a117d8e94bddd9088338d7ad20ca7d Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 23 Apr 2009 08:48:15 +0100 Subject: bitops: Add __ffs64 bitop Finds the first set bit in a 64 bit word. This is required in order to fix a bug in GFS2, but I think it should be a generic function in case of future users. Signed-off-by: Steven Whitehouse Reviewed-by: Christoph Lameter Reviewed-by: Willy Tarreau --- include/linux/bitops.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 61829139795a..c05a29cb9bb2 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -112,6 +112,25 @@ static inline unsigned fls_long(unsigned long l) return fls64(l); } +/** + * __ffs64 - find first set bit in a 64 bit word + * @word: The 64 bit word + * + * On 64 bit arches this is a synomyn for __ffs + * The result is not defined if no bits are set, so check that @word + * is non-zero before calling this. + */ +static inline unsigned long __ffs64(u64 word) +{ +#if BITS_PER_LONG == 32 + if (((u32)word) == 0UL) + return __ffs((u32)(word >> 32)) + 32; +#elif BITS_PER_LONG != 64 +#error BITS_PER_LONG not 32 or 64 +#endif + return __ffs((unsigned long)word); +} + #ifdef __KERNEL__ #ifdef CONFIG_GENERIC_FIND_FIRST_BIT -- cgit v1.2.3-59-g8ed1b From fbfc396efbc11d784b4325adfc02e82a0df01a8d Mon Sep 17 00:00:00 2001 From: "Mark A. Greer" Date: Tue, 21 Apr 2009 20:52:54 -0700 Subject: USB: musb: Prevent multiple includes of musb.h Add #ifndef to musb header file to prevent multiple inclusions. Signed-off-by: Mark A. Greer Signed-off-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/musb.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h index d6aad0ea6033..d43755669261 100644 --- a/include/linux/usb/musb.h +++ b/include/linux/usb/musb.h @@ -7,6 +7,9 @@ * key configuration differences between boards. */ +#ifndef __LINUX_USB_MUSB_H +#define __LINUX_USB_MUSB_H + /* The USB role is defined by the connector used on the board, so long as * standards are being followed. (Developer boards sometimes won't.) */ @@ -101,3 +104,5 @@ extern int __init tusb6010_setup_interface( extern int tusb6010_platform_retime(unsigned is_refclk); #endif /* OMAP2 */ + +#endif /* __LINUX_USB_MUSB_H */ -- cgit v1.2.3-59-g8ed1b From 097102c2d04974bdfcfa16a5f3062d499842139c Mon Sep 17 00:00:00 2001 From: Alexander Beregalov Date: Tue, 21 Apr 2009 09:33:14 +0200 Subject: pktcdvd.h should include mempool.h Fix this build error: In file included from fs/compat_ioctl.c:104: include/linux/pktcdvd.h:285: error: expected specifier-qualifier-list before 'mempool_t' Signed-off-by: Alexander Beregalov Signed-off-by: Jens Axboe --- include/linux/pktcdvd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h index 04b4d7330e6d..d745f5b6c7b0 100644 --- a/include/linux/pktcdvd.h +++ b/include/linux/pktcdvd.h @@ -113,6 +113,7 @@ struct pkt_ctrl_command { #include #include #include +#include /* default bio write queue congestion marks */ #define PKT_WRITE_CONGESTION_ON 10000 -- cgit v1.2.3-59-g8ed1b From 42dad7647aec49b3ad20dd0cb832b232a6ae514f Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Wed, 22 Apr 2009 14:01:49 +0200 Subject: block: simplify I/O stat accounting This simplifies I/O stat accounting switching code and separates it completely from I/O scheduler switch code. Requests are accounted according to the state of their request queue at the time of the request allocation. There is no need anymore to flush the request queue when switching I/O accounting state. Signed-off-by: Jerome Marchand Signed-off-by: Jens Axboe --- block/blk-core.c | 6 ++++-- block/blk-merge.c | 5 ++++- block/blk-sysfs.c | 4 ---- block/blk.h | 7 +------ include/linux/blkdev.h | 3 +++ 5 files changed, 12 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 07ab75403e1a..2998fe3a2377 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -643,7 +643,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq) } static struct request * -blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask) +blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask) { struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); @@ -652,7 +652,7 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask) blk_rq_init(q, rq); - rq->cmd_flags = rw | REQ_ALLOCED; + rq->cmd_flags = flags | REQ_ALLOCED; if (priv) { if (unlikely(elv_set_request(q, rq, gfp_mask))) { @@ -792,6 +792,8 @@ static struct request *get_request(struct request_queue *q, int rw_flags, if (priv) rl->elvpriv++; + if (blk_queue_io_stat(q)) + rw_flags |= REQ_IO_STAT; spin_unlock_irq(q->queue_lock); rq = blk_alloc_request(q, rw_flags, priv, gfp_mask); diff --git a/block/blk-merge.c b/block/blk-merge.c index 63760ca3da0f..23d2a6fe34a3 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -402,7 +402,10 @@ static int attempt_merge(struct request_queue *q, struct request *req, elv_merge_requests(q, req, next); - blk_account_io_merge(req); + /* + * 'next' is going away, so update stats accordingly + */ + blk_account_io_merge(next); req->ioprio = ioprio_best(req->ioprio, next->ioprio); if (blk_rq_cpu_valid(next)) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index cac4e9febe6a..3ff9bba3379a 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -209,14 +209,10 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page, ssize_t ret = queue_var_store(&stats, page, count); spin_lock_irq(q->queue_lock); - elv_quiesce_start(q); - if (stats) queue_flag_set(QUEUE_FLAG_IO_STAT, q); else queue_flag_clear(QUEUE_FLAG_IO_STAT, q); - - elv_quiesce_end(q); spin_unlock_irq(q->queue_lock); return ret; diff --git a/block/blk.h b/block/blk.h index 5dfc41267a08..79c85f7c9ff5 100644 --- a/block/blk.h +++ b/block/blk.h @@ -114,12 +114,7 @@ static inline int blk_cpu_to_group(int cpu) static inline int blk_do_io_stat(struct request *rq) { - struct gendisk *disk = rq->rq_disk; - - if (!disk || !disk->queue) - return 0; - - return blk_queue_io_stat(disk->queue) && (rq->cmd_flags & REQ_ELVPRIV); + return rq->rq_disk && blk_rq_io_stat(rq); } #endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ba54c834a590..2755d5c6da22 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -118,6 +118,7 @@ enum rq_flag_bits { __REQ_COPY_USER, /* contains copies of user pages */ __REQ_INTEGRITY, /* integrity metadata has been remapped */ __REQ_NOIDLE, /* Don't anticipate more IO after this one */ + __REQ_IO_STAT, /* account I/O stat */ __REQ_NR_BITS, /* stops here */ }; @@ -145,6 +146,7 @@ enum rq_flag_bits { #define REQ_COPY_USER (1 << __REQ_COPY_USER) #define REQ_INTEGRITY (1 << __REQ_INTEGRITY) #define REQ_NOIDLE (1 << __REQ_NOIDLE) +#define REQ_IO_STAT (1 << __REQ_IO_STAT) #define BLK_MAX_CDB 16 @@ -598,6 +600,7 @@ enum { blk_failfast_transport(rq) || \ blk_failfast_driver(rq)) #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) +#define blk_rq_io_stat(rq) ((rq)->cmd_flags & REQ_IO_STAT) #define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) -- cgit v1.2.3-59-g8ed1b From 71951b64a5a87c09eb6fde59ce51aaab2fdaeab2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 24 Apr 2009 16:58:41 +0200 Subject: netfilter: nf_ct_dccp: add missing role attributes for DCCP This patch adds missing role attribute to the DCCP type, otherwise the creation of entries is not of any use. The attribute added is CTA_PROTOINFO_DCCP_ROLE which contains the role of the conntrack original tuple. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/linux/netfilter/nfnetlink_conntrack.h | 1 + net/netfilter/nf_conntrack_proto_dccp.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 29fe9ea1d346..1a865e48b8eb 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -100,6 +100,7 @@ enum ctattr_protoinfo_tcp { enum ctattr_protoinfo_dccp { CTA_PROTOINFO_DCCP_UNSPEC, CTA_PROTOINFO_DCCP_STATE, + CTA_PROTOINFO_DCCP_ROLE, __CTA_PROTOINFO_DCCP_MAX, }; #define CTA_PROTOINFO_DCCP_MAX (__CTA_PROTOINFO_DCCP_MAX - 1) diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 5411d63f31a5..8e757dd53396 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -633,6 +633,8 @@ static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, if (!nest_parms) goto nla_put_failure; NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state); + NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_ROLE, + ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]); nla_nest_end(skb, nest_parms); read_unlock_bh(&dccp_lock); return 0; @@ -644,6 +646,7 @@ nla_put_failure: static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = { [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 }, + [CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 }, }; static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) @@ -661,11 +664,21 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) return err; if (!tb[CTA_PROTOINFO_DCCP_STATE] || - nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) + !tb[CTA_PROTOINFO_DCCP_ROLE] || + nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) > CT_DCCP_ROLE_MAX || + nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) { return -EINVAL; + } write_lock_bh(&dccp_lock); ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]); + if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) { + ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; + ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; + } else { + ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER; + ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT; + } write_unlock_bh(&dccp_lock); return 0; } -- cgit v1.2.3-59-g8ed1b From c80d471a476b6d6fe0bc1fd25293c24c66b7aaaf Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Sat, 25 Apr 2009 22:10:56 -0400 Subject: Add new HEAD_TEXT_SECTION macro. This patch is preparation for replacing all uses of ".head.text" or ".text.head" in the kernel with macros, so that the section name can later be changed without having to touch a lot of the kernel. Since some linker scripts do more complex things than referencing HEAD_TEXT, we add a HEAD_TEXT_SECTION macro that just contains the actual name. I've defined HEAD_TEXT_SECTION in a new header, include/linux/section-names.h, so that this section name only needs to appear in one place. I anticipate creating similar macro structures for a number of other section names. The long-term goal here is to be able to change the kernel's magic section names to those that are compatible with -ffunction-sections -fdata-sections. This requires renaming all magic sections with names of the form ".text.foo". Signed-off-by: Tim Abbott Signed-off-by: Linus Torvalds --- include/asm-generic/vmlinux.lds.h | 4 +++- include/linux/init.h | 4 +++- include/linux/section-names.h | 6 ++++++ 3 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 include/linux/section-names.h (limited to 'include/linux') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 7fa660fd449c..eaa06ef6f7d9 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -1,3 +1,5 @@ +#include + #ifndef LOAD_OFFSET #define LOAD_OFFSET 0 #endif @@ -331,7 +333,7 @@ #endif /* Section used for early init (in .S files) */ -#define HEAD_TEXT *(.head.text) +#define HEAD_TEXT *(HEAD_TEXT_SECTION) /* init and exit section handling */ #define INIT_DATA \ diff --git a/include/linux/init.h b/include/linux/init.h index f121a7a10c3d..20a1334e34e9 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -2,6 +2,8 @@ #define _LINUX_INIT_H #include +#include +#include /* These macros are used to mark some functions or * initialized data (doesn't apply to uninitialized data) @@ -107,7 +109,7 @@ #define __memexitconst __section(.memexit.rodata) /* For assembly routines */ -#define __HEAD .section ".head.text","ax" +#define __HEAD .section __stringify(HEAD_TEXT_SECTION),"ax" #define __INIT .section ".init.text","ax" #define __FINIT .previous diff --git a/include/linux/section-names.h b/include/linux/section-names.h new file mode 100644 index 000000000000..c956f4eb2adf --- /dev/null +++ b/include/linux/section-names.h @@ -0,0 +1,6 @@ +#ifndef __LINUX_SECTION_NAMES_H +#define __LINUX_SECTION_NAMES_H + +#define HEAD_TEXT_SECTION .head.text + +#endif /* !__LINUX_SECTION_NAMES_H */ -- cgit v1.2.3-59-g8ed1b From c759a6b4e1cae6aff71f58c9c85404ebcd81b6e0 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 27 Apr 2009 02:36:20 -0700 Subject: net: Fix LL_MAX_HEADER for CONFIG_TR_MODULE Unless I miss anything this should fix a bug. Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2e7783f4a755..453be9a674c0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -104,7 +104,7 @@ struct wireless_dev; # else # define LL_MAX_HEADER 96 # endif -#elif defined(CONFIG_TR) +#elif defined(CONFIG_TR) || defined(CONFIG_TR_MODULE) # define LL_MAX_HEADER 48 #else # define LL_MAX_HEADER 32 -- cgit v1.2.3-59-g8ed1b From 37b607c5ac3b7c92a6a3624bb29f1cdcdcf7044a Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 27 Apr 2009 05:45:54 -0700 Subject: net: Fix typo in net_device_ops description. Signed-off-by: Mike Rapoport Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 453be9a674c0..5a96a1a406e9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -500,7 +500,7 @@ struct netdev_queue { * * int (*ndo_set_mac_address)(struct net_device *dev, void *addr); * This function is called when the Media Access Control address - * needs to be changed. If not this interface is not defined, the + * needs to be changed. If this interface is not defined, the * mac address can not be changed. * * int (*ndo_validate_addr)(struct net_device *dev); -- cgit v1.2.3-59-g8ed1b From 27b1833279995e7c290a40cac4ef36ccea7e9283 Mon Sep 17 00:00:00 2001 From: Tim Abbott Date: Mon, 27 Apr 2009 14:02:27 -0400 Subject: Remove unused support code for refok sections. The old refok sections .text.init.refok .data.init.refok .exit.text.refok have been deprecated since commit 312b1485fb509c9bc32eda28ad29537896658cb8. After the other patches in this patch series nothing is put in these sections, so clean things up by eliminating all the remaining references to them. Signed-off-by: Tim Abbott Acked-by: Sam Ravnborg Signed-off-by: Linus Torvalds --- include/asm-generic/vmlinux.lds.h | 3 --- include/linux/init.h | 8 -------- scripts/mod/modpost.c | 18 ------------------ 3 files changed, 29 deletions(-) (limited to 'include/linux') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index eaa06ef6f7d9..89853bcd27a6 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -90,7 +90,6 @@ /* .data section */ #define DATA_DATA \ *(.data) \ - *(.data.init.refok) \ *(.ref.data) \ DEV_KEEP(init.data) \ DEV_KEEP(exit.data) \ @@ -289,8 +288,6 @@ *(.text.hot) \ *(.text) \ *(.ref.text) \ - *(.text.init.refok) \ - *(.exit.text.refok) \ DEV_KEEP(init.text) \ DEV_KEEP(exit.text) \ CPU_KEEP(init.text) \ diff --git a/include/linux/init.h b/include/linux/init.h index 20a1334e34e9..0e06c176f185 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -62,14 +62,6 @@ #define __refdata __section(.ref.data) #define __refconst __section(.ref.rodata) -/* backward compatibility note - * A few places hardcode the old section names: - * .text.init.refok - * .data.init.refok - * .exit.text.refok - * They should be converted to use the defines from this file - */ - /* compatibility defines */ #define __init_refok __ref #define __initdata_refok __refdata diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index df6e6286a065..8d46ea7d6715 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -794,15 +794,6 @@ static const char *init_exit_sections[] = /* data section */ static const char *data_sections[] = { DATA_SECTIONS, NULL }; -/* sections that may refer to an init/exit section with no warning */ -static const char *initref_sections[] = -{ - ".text.init.refok*", - ".exit.text.refok*", - ".data.init.refok*", - NULL -}; - /* symbols in .data that may refer to init/exit sections */ static const char *symbol_white_list[] = @@ -915,11 +906,6 @@ static int section_mismatch(const char *fromsec, const char *tosec) /** * Whitelist to allow certain references to pass with no warning. * - * Pattern 0: - * Do not warn if funtion/data are marked with __init_refok/__initdata_refok. - * The pattern is identified by: - * fromsec = .text.init.refok* | .data.init.refok* - * * Pattern 1: * If a module parameter is declared __initdata and permissions=0 * then this is legal despite the warning generated. @@ -958,10 +944,6 @@ static int section_mismatch(const char *fromsec, const char *tosec) static int secref_whitelist(const char *fromsec, const char *fromsym, const char *tosec, const char *tosym) { - /* Check for pattern 0 */ - if (match(fromsec, initref_sections)) - return 0; - /* Check for pattern 1 */ if (match(tosec, init_data_sections) && match(fromsec, data_sections) && -- cgit v1.2.3-59-g8ed1b From bf368e4e70cd4e0f880923c44e95a4273d725ab4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Apr 2009 02:24:21 -0700 Subject: net: Avoid extra wakeups of threads blocked in wait_for_packet() In 2.6.25 we added UDP mem accounting. This unfortunatly added a penalty when a frame is transmitted, since we have at TX completion time to call sock_wfree() to perform necessary memory accounting. This calls sock_def_write_space() and utimately scheduler if any thread is waiting on the socket. Thread(s) waiting for an incoming frame was scheduled, then had to sleep again as event was meaningless. (All threads waiting on a socket are using same sk_sleep anchor) This adds lot of extra wakeups and increases latencies, as noted by Christoph Lameter, and slows down softirq handler. Reference : http://marc.info/?l=linux-netdev&m=124060437012283&w=2 Fortunatly, Davide Libenzi recently added concept of keyed wakeups into kernel, and particularly for sockets (see commit 37e5540b3c9d838eb20f2ca8ea2eb8072271e403 epoll keyed wakeups: make sockets use keyed wakeups) Davide goal was to optimize epoll, but this new wakeup infrastructure can help non epoll users as well, if they care to setup an appropriate handler. This patch introduces new DEFINE_WAIT_FUNC() helper and uses it in wait_for_packet(), so that only relevant event can wakeup a thread blocked in this function. Trace of function calls from bnx2 TX completion bnx2_poll_work() is : __kfree_skb() skb_release_head_state() sock_wfree() sock_def_write_space() __wake_up_sync_key() __wake_up_common() receiver_wake_function() : Stops here since thread is waiting for an INPUT Reported-by: Christoph Lameter Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/wait.h | 6 ++++-- net/core/datagram.c | 14 +++++++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 5d631c17eaee..bc024632f365 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -440,13 +440,15 @@ void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); -#define DEFINE_WAIT(name) \ +#define DEFINE_WAIT_FUNC(name, function) \ wait_queue_t name = { \ .private = current, \ - .func = autoremove_wake_function, \ + .func = function, \ .task_list = LIST_HEAD_INIT((name).task_list), \ } +#define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function) + #define DEFINE_WAIT_BIT(name, word, bit) \ struct wait_bit_queue name = { \ .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \ diff --git a/net/core/datagram.c b/net/core/datagram.c index d0de644b378d..b01a76abe1d2 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -64,13 +64,25 @@ static inline int connection_based(struct sock *sk) return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM; } +static int receiver_wake_function(wait_queue_t *wait, unsigned mode, int sync, + void *key) +{ + unsigned long bits = (unsigned long)key; + + /* + * Avoid a wakeup if event not interesting for us + */ + if (bits && !(bits & (POLLIN | POLLERR))) + return 0; + return autoremove_wake_function(wait, mode, sync, key); +} /* * Wait for a packet.. */ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) { int error; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, receiver_wake_function); prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); -- cgit v1.2.3-59-g8ed1b From 6916d97f6e25cc66a32d6e9a16419067d843b14f Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Mon, 27 Apr 2009 11:52:43 -0700 Subject: Input: bcm5974 - add quad-finger tapping The integrated button on the new unibody Macbooks presents a need to report explicit four-finger actions. Evidently, the finger pressing the button is also touching the trackpad, so in order to fully support three-finger actions, the driver must be able to report four-finger actions. This patch adds a new button, BTN_TOOL_QUADTAP, which achieves this. Signed-off-by: Henrik Rydberg Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/bcm5974.c | 4 +++- include/linux/input.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c index bda873393b0d..2ddf05e1d852 100644 --- a/drivers/input/mouse/bcm5974.c +++ b/drivers/input/mouse/bcm5974.c @@ -258,6 +258,7 @@ static void setup_events_to_report(struct input_dev *input_dev, __set_bit(BTN_TOOL_FINGER, input_dev->keybit); __set_bit(BTN_TOOL_DOUBLETAP, input_dev->keybit); __set_bit(BTN_TOOL_TRIPLETAP, input_dev->keybit); + __set_bit(BTN_TOOL_QUADTAP, input_dev->keybit); __set_bit(BTN_LEFT, input_dev->keybit); } @@ -329,7 +330,8 @@ static int report_tp_state(struct bcm5974 *dev, int size) input_report_key(input, BTN_TOUCH, dev->fingers > 0); input_report_key(input, BTN_TOOL_FINGER, dev->fingers == 1); input_report_key(input, BTN_TOOL_DOUBLETAP, dev->fingers == 2); - input_report_key(input, BTN_TOOL_TRIPLETAP, dev->fingers > 2); + input_report_key(input, BTN_TOOL_TRIPLETAP, dev->fingers == 3); + input_report_key(input, BTN_TOOL_QUADTAP, dev->fingers > 3); input_report_abs(input, ABS_PRESSURE, abs_p); input_report_abs(input, ABS_TOOL_WIDTH, abs_w); diff --git a/include/linux/input.h b/include/linux/input.h index 6b28048fc568..32cb825939be 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -445,6 +445,7 @@ struct input_absinfo { #define BTN_STYLUS2 0x14c #define BTN_TOOL_DOUBLETAP 0x14d #define BTN_TOOL_TRIPLETAP 0x14e +#define BTN_TOOL_QUADTAP 0x14f /* Four fingers on trackpad */ #define BTN_WHEEL 0x150 #define BTN_GEAR_DOWN 0x150 -- cgit v1.2.3-59-g8ed1b From 5e5ee686e3c0f8a3cbe9b75c2690326bf91af10d Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Tue, 28 Apr 2009 07:47:33 -0700 Subject: Input: add detailed multi-touch finger data report protocol In order to utilize the full power of the new multi-touch devices, a way to report detailed finger data to user space is needed. This patch adds a multi-touch (MT) protocol which allows drivers to report details for an arbitrary number of fingers. The driver sends a SYN_MT_REPORT event via the input_mt_sync() function when a complete finger has been reported. In order to stay compatible with existing applications, the data reported in a finger packet must not be recognized as single-touch events. In addition, all finger data must bypass input filtering, since subsequent events of the same type refer to different fingers. A set of ABS_MT events with the desired properties are defined. The events are divided into categories, to allow for partial implementation. The minimum set consists of ABS_MT_TOUCH_MAJOR, ABS_MT_POSITION_X and ABS_MT_POSITION_Y, which allows for multiple fingers to be tracked. If the device supports it, the ABS_MT_WIDTH_MAJOR may be used to provide the size of the approaching finger. Anisotropy and direction may be specified with ABS_MT_TOUCH_MINOR, ABS_MT_WIDTH_MINOR and ABS_MT_ORIENTATION. Devices with more granular information may specify general shapes as blobs, i.e., as a sequence of rectangular shapes grouped together by a ABS_MT_BLOB_ID. Finally, the ABS_MT_TOOL_TYPE may be used to specify whether the touching tool is a finger or a pen. Signed-off-by: Henrik Rydberg Signed-off-by: Dmitry Torokhov --- drivers/input/input.c | 13 +++++++++++++ include/linux/input.h | 23 +++++++++++++++++++++++ 2 files changed, 36 insertions(+) (limited to 'include/linux') diff --git a/drivers/input/input.c b/drivers/input/input.c index 8ff92aa13a0a..e54e002665b0 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -33,6 +33,15 @@ MODULE_LICENSE("GPL"); * EV_ABS events which should not be cached are listed here. */ static unsigned int input_abs_bypass_init_data[] __initdata = { + ABS_MT_TOUCH_MAJOR, + ABS_MT_TOUCH_MINOR, + ABS_MT_WIDTH_MAJOR, + ABS_MT_WIDTH_MINOR, + ABS_MT_ORIENTATION, + ABS_MT_POSITION_X, + ABS_MT_POSITION_Y, + ABS_MT_TOOL_TYPE, + ABS_MT_BLOB_ID, 0 }; static unsigned long input_abs_bypass[BITS_TO_LONGS(ABS_CNT)]; @@ -169,6 +178,10 @@ static void input_handle_event(struct input_dev *dev, disposition = INPUT_PASS_TO_HANDLERS; } break; + case SYN_MT_REPORT: + dev->sync = 0; + disposition = INPUT_PASS_TO_HANDLERS; + break; } break; diff --git a/include/linux/input.h b/include/linux/input.h index 32cb825939be..0e6ff5de3588 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -106,6 +106,7 @@ struct input_absinfo { #define SYN_REPORT 0 #define SYN_CONFIG 1 +#define SYN_MT_REPORT 2 /* * Keys and buttons @@ -645,6 +646,17 @@ struct input_absinfo { #define ABS_TOOL_WIDTH 0x1c #define ABS_VOLUME 0x20 #define ABS_MISC 0x28 + +#define ABS_MT_TOUCH_MAJOR 0x30 /* Major axis of touching ellipse */ +#define ABS_MT_TOUCH_MINOR 0x31 /* Minor axis (omit if circular) */ +#define ABS_MT_WIDTH_MAJOR 0x32 /* Major axis of approaching ellipse */ +#define ABS_MT_WIDTH_MINOR 0x33 /* Minor axis (omit if circular) */ +#define ABS_MT_ORIENTATION 0x34 /* Ellipse orientation */ +#define ABS_MT_POSITION_X 0x35 /* Center X ellipse position */ +#define ABS_MT_POSITION_Y 0x36 /* Center Y ellipse position */ +#define ABS_MT_TOOL_TYPE 0x37 /* Type of touching device */ +#define ABS_MT_BLOB_ID 0x38 /* Group a set of packets as a blob */ + #define ABS_MAX 0x3f #define ABS_CNT (ABS_MAX+1) @@ -743,6 +755,12 @@ struct input_absinfo { #define BUS_GSC 0x1A #define BUS_ATARI 0x1B +/* + * MT_TOOL types + */ +#define MT_TOOL_FINGER 0 +#define MT_TOOL_PEN 1 + /* * Values describing the status of a force-feedback effect */ @@ -1312,6 +1330,11 @@ static inline void input_sync(struct input_dev *dev) input_event(dev, EV_SYN, SYN_REPORT, 0); } +static inline void input_mt_sync(struct input_dev *dev) +{ + input_event(dev, EV_SYN, SYN_MT_REPORT, 0); +} + void input_set_capability(struct input_dev *dev, unsigned int type, unsigned int code); static inline void input_set_abs_params(struct input_dev *dev, int axis, int min, int max, int fuzz, int flat) -- cgit v1.2.3-59-g8ed1b From 9f6532519feab921856f41b30a2397ee25f4de49 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 3 Apr 2009 21:31:30 -0700 Subject: regulator: fix header file missing kernel-doc Add regulator header file missing kernel-doc: Warning(include/linux/regulator/driver.h:117): No description found for parameter 'set_mode' Signed-off-by: Randy Dunlap cc: Liam Girdwood cc: Mark Brown Signed-off-by: Liam Girdwood --- include/linux/regulator/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 4848d8dacd90..225f733e7533 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -50,6 +50,7 @@ enum regulator_status { * @set_current_limit: Configure a limit for a current-limited regulator. * @get_current_limit: Get the configured limit for a current-limited regulator. * + * @set_mode: Set the configured operating mode for the regulator. * @get_mode: Get the configured operating mode for the regulator. * @get_status: Return actual (not as-configured) status of regulator, as a * REGULATOR_STATUS value (or negative errno) -- cgit v1.2.3-59-g8ed1b From 942e4a2bd680c606af0211e64eb216be2e19bf61 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 28 Apr 2009 22:36:33 -0700 Subject: netfilter: revised locking for x_tables The x_tables are organized with a table structure and a per-cpu copies of the counters and rules. On older kernels there was a reader/writer lock per table which was a performance bottleneck. In 2.6.30-rc, this was converted to use RCU and the counters/rules which solved the performance problems for do_table but made replacing rules much slower because of the necessary RCU grace period. This version uses a per-cpu set of spinlocks and counters to allow to table processing to proceed without the cache thrashing of a global reader lock and keeps the same performance for table updates. Signed-off-by: Stephen Hemminger Acked-by: Linus Torvalds Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 73 +++++++++++++++++++-- net/ipv4/netfilter/arp_tables.c | 125 +++++++++++------------------------- net/ipv4/netfilter/ip_tables.c | 126 +++++++++++-------------------------- net/ipv6/netfilter/ip6_tables.c | 123 +++++++++++------------------------- net/netfilter/x_tables.c | 53 ++++++++-------- 5 files changed, 204 insertions(+), 296 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 7b1a652066c0..1b2e43502ef7 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -354,9 +354,6 @@ struct xt_table /* What hooks you will enter on */ unsigned int valid_hooks; - /* Lock for the curtain */ - struct mutex lock; - /* Man behind the curtain... */ struct xt_table_info *private; @@ -434,8 +431,74 @@ extern void xt_proto_fini(struct net *net, u_int8_t af); extern struct xt_table_info *xt_alloc_table_info(unsigned int size); extern void xt_free_table_info(struct xt_table_info *info); -extern void xt_table_entry_swap_rcu(struct xt_table_info *old, - struct xt_table_info *new); + +/* + * Per-CPU spinlock associated with per-cpu table entries, and + * with a counter for the "reading" side that allows a recursive + * reader to avoid taking the lock and deadlocking. + * + * "reading" is used by ip/arp/ip6 tables rule processing which runs per-cpu. + * It needs to ensure that the rules are not being changed while the packet + * is being processed. In some cases, the read lock will be acquired + * twice on the same CPU; this is okay because of the count. + * + * "writing" is used when reading counters. + * During replace any readers that are using the old tables have to complete + * before freeing the old table. This is handled by the write locking + * necessary for reading the counters. + */ +struct xt_info_lock { + spinlock_t lock; + unsigned char readers; +}; +DECLARE_PER_CPU(struct xt_info_lock, xt_info_locks); + +/* + * Note: we need to ensure that preemption is disabled before acquiring + * the per-cpu-variable, so we do it as a two step process rather than + * using "spin_lock_bh()". + * + * We _also_ need to disable bottom half processing before updating our + * nesting count, to make sure that the only kind of re-entrancy is this + * code being called by itself: since the count+lock is not an atomic + * operation, we can allow no races. + * + * _Only_ that special combination of being per-cpu and never getting + * re-entered asynchronously means that the count is safe. + */ +static inline void xt_info_rdlock_bh(void) +{ + struct xt_info_lock *lock; + + local_bh_disable(); + lock = &__get_cpu_var(xt_info_locks); + if (!lock->readers++) + spin_lock(&lock->lock); +} + +static inline void xt_info_rdunlock_bh(void) +{ + struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks); + + if (!--lock->readers) + spin_unlock(&lock->lock); + local_bh_enable(); +} + +/* + * The "writer" side needs to get exclusive access to the lock, + * regardless of readers. This must be called with bottom half + * processing (and thus also preemption) disabled. + */ +static inline void xt_info_wrlock(unsigned int cpu) +{ + spin_lock(&per_cpu(xt_info_locks, cpu).lock); +} + +static inline void xt_info_wrunlock(unsigned int cpu) +{ + spin_unlock(&per_cpu(xt_info_locks, cpu).lock); +} /* * This helper is performance critical and must be inlined diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 5ba533d234db..831fe1879dc0 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -253,9 +253,9 @@ unsigned int arpt_do_table(struct sk_buff *skb, indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; - rcu_read_lock_bh(); - private = rcu_dereference(table->private); - table_base = rcu_dereference(private->entries[smp_processor_id()]); + xt_info_rdlock_bh(); + private = table->private; + table_base = private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); @@ -273,6 +273,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + (2 * skb->dev->addr_len); + ADD_COUNTER(e->counters, hdr_len, 1); t = arpt_get_target(e); @@ -328,8 +329,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, e = (void *)e + e->next_offset; } } while (!hotdrop); - - rcu_read_unlock_bh(); + xt_info_rdunlock_bh(); if (hotdrop) return NF_DROP; @@ -711,9 +711,12 @@ static void get_counters(const struct xt_table_info *t, /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters * with data used by 'current' CPU - * We dont care about preemption here. + * + * Bottom half has to be disabled to prevent deadlock + * if new softirq were to run and call ipt_do_table */ - curcpu = raw_smp_processor_id(); + local_bh_disable(); + curcpu = smp_processor_id(); i = 0; ARPT_ENTRY_ITERATE(t->entries[curcpu], @@ -726,73 +729,22 @@ static void get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + xt_info_wrlock(cpu); ARPT_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, &i); + xt_info_wrunlock(cpu); } -} - - -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct arpt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - -/* Take values from counters and add them back onto the current cpu */ -static void put_counters(struct xt_table_info *t, - const struct xt_counters counters[]) -{ - unsigned int i, cpu; - - local_bh_disable(); - cpu = smp_processor_id(); - i = 0; - ARPT_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_counter_to_entry, - counters, - &i); local_bh_enable(); } -static inline int -zero_entry_counter(struct arpt_entry *e, void *arg) -{ - e->counters.bcnt = 0; - e->counters.pcnt = 0; - return 0; -} - -static void -clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) -{ - unsigned int cpu; - const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; - - memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); - for_each_possible_cpu(cpu) { - memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); - ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, - zero_entry_counter, NULL); - } -} - static struct xt_counters *alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; struct xt_table_info *private = table->private; - struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care @@ -802,30 +754,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - goto nomem; - - info = xt_alloc_table_info(private->size); - if (!info) - goto free_counters; - - clone_counters(info, private); - - mutex_lock(&table->lock); - xt_table_entry_swap_rcu(private, info); - synchronize_net(); /* Wait until smoke has cleared */ + return ERR_PTR(-ENOMEM); - get_counters(info, counters); - put_counters(private, counters); - mutex_unlock(&table->lock); - - xt_free_table_info(info); + get_counters(private, counters); return counters; - - free_counters: - vfree(counters); - nomem: - return ERR_PTR(-ENOMEM); } static int copy_entries_to_user(unsigned int total_size, @@ -1094,8 +1027,9 @@ static int __do_replace(struct net *net, const char *name, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters. */ + /* Get the old counters, and synchronize with replace */ get_counters(oldinfo, counters); + /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, @@ -1165,10 +1099,23 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) return ret; } +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct arpt_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { - unsigned int i; + unsigned int i, curcpu; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1224,26 +1171,26 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - mutex_lock(&t->lock); + local_bh_disable(); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } - preempt_disable(); i = 0; /* Choose the copy that is on our node */ - loc_cpu_entry = private->entries[smp_processor_id()]; + curcpu = smp_processor_id(); + loc_cpu_entry = private->entries[curcpu]; + xt_info_wrlock(curcpu); ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, paddc, &i); - preempt_enable(); + xt_info_wrunlock(curcpu); unlock_up_free: - mutex_unlock(&t->lock); - + local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 810c0b62c7d4..2ec8d7290c40 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -338,10 +338,9 @@ ipt_do_table(struct sk_buff *skb, tgpar.hooknum = hook; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - - rcu_read_lock_bh(); - private = rcu_dereference(table->private); - table_base = rcu_dereference(private->entries[smp_processor_id()]); + xt_info_rdlock_bh(); + private = table->private; + table_base = private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); @@ -436,8 +435,7 @@ ipt_do_table(struct sk_buff *skb, e = (void *)e + e->next_offset; } } while (!hotdrop); - - rcu_read_unlock_bh(); + xt_info_rdunlock_bh(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -896,10 +894,13 @@ get_counters(const struct xt_table_info *t, /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters - * with data used by 'current' CPU - * We dont care about preemption here. + * with data used by 'current' CPU. + * + * Bottom half has to be disabled to prevent deadlock + * if new softirq were to run and call ipt_do_table */ - curcpu = raw_smp_processor_id(); + local_bh_disable(); + curcpu = smp_processor_id(); i = 0; IPT_ENTRY_ITERATE(t->entries[curcpu], @@ -912,74 +913,22 @@ get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + xt_info_wrlock(cpu); IPT_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, &i); + xt_info_wrunlock(cpu); } - -} - -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct ipt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - -/* Take values from counters and add them back onto the current cpu */ -static void put_counters(struct xt_table_info *t, - const struct xt_counters counters[]) -{ - unsigned int i, cpu; - - local_bh_disable(); - cpu = smp_processor_id(); - i = 0; - IPT_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_counter_to_entry, - counters, - &i); local_bh_enable(); } - -static inline int -zero_entry_counter(struct ipt_entry *e, void *arg) -{ - e->counters.bcnt = 0; - e->counters.pcnt = 0; - return 0; -} - -static void -clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) -{ - unsigned int cpu; - const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; - - memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); - for_each_possible_cpu(cpu) { - memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); - IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, - zero_entry_counter, NULL); - } -} - static struct xt_counters * alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; struct xt_table_info *private = table->private; - struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -988,30 +937,11 @@ static struct xt_counters * alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - goto nomem; + return ERR_PTR(-ENOMEM); - info = xt_alloc_table_info(private->size); - if (!info) - goto free_counters; - - clone_counters(info, private); - - mutex_lock(&table->lock); - xt_table_entry_swap_rcu(private, info); - synchronize_net(); /* Wait until smoke has cleared */ - - get_counters(info, counters); - put_counters(private, counters); - mutex_unlock(&table->lock); - - xt_free_table_info(info); + get_counters(private, counters); return counters; - - free_counters: - vfree(counters); - nomem: - return ERR_PTR(-ENOMEM); } static int @@ -1306,8 +1236,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters. */ + /* Get the old counters, and synchronize with replace */ get_counters(oldinfo, counters); + /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, @@ -1377,11 +1308,23 @@ do_replace(struct net *net, void __user *user, unsigned int len) return ret; } +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct ipt_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { - unsigned int i; + unsigned int i, curcpu; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1437,25 +1380,26 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat goto free; } - mutex_lock(&t->lock); + local_bh_disable(); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } - preempt_disable(); i = 0; /* Choose the copy that is on our node */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + curcpu = smp_processor_id(); + loc_cpu_entry = private->entries[curcpu]; + xt_info_wrlock(curcpu); IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, paddc, &i); - preempt_enable(); + xt_info_wrunlock(curcpu); unlock_up_free: - mutex_unlock(&t->lock); + local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 800ae8542471..219e165aea10 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -365,9 +365,9 @@ ip6t_do_table(struct sk_buff *skb, IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - rcu_read_lock_bh(); - private = rcu_dereference(table->private); - table_base = rcu_dereference(private->entries[smp_processor_id()]); + xt_info_rdlock_bh(); + private = table->private; + table_base = private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); @@ -466,7 +466,7 @@ ip6t_do_table(struct sk_buff *skb, #ifdef CONFIG_NETFILTER_DEBUG ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; #endif - rcu_read_unlock_bh(); + xt_info_rdunlock_bh(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -926,9 +926,12 @@ get_counters(const struct xt_table_info *t, /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters * with data used by 'current' CPU - * We dont care about preemption here. + * + * Bottom half has to be disabled to prevent deadlock + * if new softirq were to run and call ipt_do_table */ - curcpu = raw_smp_processor_id(); + local_bh_disable(); + curcpu = smp_processor_id(); i = 0; IP6T_ENTRY_ITERATE(t->entries[curcpu], @@ -941,72 +944,22 @@ get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + xt_info_wrlock(cpu); IP6T_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, &i); + xt_info_wrunlock(cpu); } -} - -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct ip6t_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - -/* Take values from counters and add them back onto the current cpu */ -static void put_counters(struct xt_table_info *t, - const struct xt_counters counters[]) -{ - unsigned int i, cpu; - - local_bh_disable(); - cpu = smp_processor_id(); - i = 0; - IP6T_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_counter_to_entry, - counters, - &i); local_bh_enable(); } -static inline int -zero_entry_counter(struct ip6t_entry *e, void *arg) -{ - e->counters.bcnt = 0; - e->counters.pcnt = 0; - return 0; -} - -static void -clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) -{ - unsigned int cpu; - const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; - - memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); - for_each_possible_cpu(cpu) { - memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); - IP6T_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, - zero_entry_counter, NULL); - } -} - static struct xt_counters *alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; struct xt_table_info *private = table->private; - struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -1015,30 +968,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - goto nomem; + return ERR_PTR(-ENOMEM); - info = xt_alloc_table_info(private->size); - if (!info) - goto free_counters; - - clone_counters(info, private); - - mutex_lock(&table->lock); - xt_table_entry_swap_rcu(private, info); - synchronize_net(); /* Wait until smoke has cleared */ - - get_counters(info, counters); - put_counters(private, counters); - mutex_unlock(&table->lock); - - xt_free_table_info(info); + get_counters(private, counters); return counters; - - free_counters: - vfree(counters); - nomem: - return ERR_PTR(-ENOMEM); } static int @@ -1334,8 +1268,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters. */ + /* Get the old counters, and synchronize with replace */ get_counters(oldinfo, counters); + /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, @@ -1405,11 +1340,24 @@ do_replace(struct net *net, void __user *user, unsigned int len) return ret; } +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct ip6t_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { - unsigned int i; + unsigned int i, curcpu; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1465,25 +1413,28 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - mutex_lock(&t->lock); + + local_bh_disable(); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } - preempt_disable(); i = 0; /* Choose the copy that is on our node */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + curcpu = smp_processor_id(); + xt_info_wrlock(curcpu); + loc_cpu_entry = private->entries[curcpu]; IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, paddc, &i); - preempt_enable(); + xt_info_wrunlock(curcpu); + unlock_up_free: - mutex_unlock(&t->lock); + local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 509a95621f9f..150e5cf62f85 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -625,20 +625,6 @@ void xt_free_table_info(struct xt_table_info *info) } EXPORT_SYMBOL(xt_free_table_info); -void xt_table_entry_swap_rcu(struct xt_table_info *oldinfo, - struct xt_table_info *newinfo) -{ - unsigned int cpu; - - for_each_possible_cpu(cpu) { - void *p = oldinfo->entries[cpu]; - rcu_assign_pointer(oldinfo->entries[cpu], newinfo->entries[cpu]); - newinfo->entries[cpu] = p; - } - -} -EXPORT_SYMBOL_GPL(xt_table_entry_swap_rcu); - /* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name) @@ -676,32 +662,43 @@ void xt_compat_unlock(u_int8_t af) EXPORT_SYMBOL_GPL(xt_compat_unlock); #endif +DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks); +EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks); + + struct xt_table_info * xt_replace_table(struct xt_table *table, unsigned int num_counters, struct xt_table_info *newinfo, int *error) { - struct xt_table_info *oldinfo, *private; + struct xt_table_info *private; /* Do the substitution. */ - mutex_lock(&table->lock); + local_bh_disable(); private = table->private; + /* Check inside lock: is the old number correct? */ if (num_counters != private->number) { duprintf("num_counters != table->private->number (%u/%u)\n", num_counters, private->number); - mutex_unlock(&table->lock); + local_bh_enable(); *error = -EAGAIN; return NULL; } - oldinfo = private; - rcu_assign_pointer(table->private, newinfo); - newinfo->initial_entries = oldinfo->initial_entries; - mutex_unlock(&table->lock); - synchronize_net(); - return oldinfo; + table->private = newinfo; + newinfo->initial_entries = private->initial_entries; + + /* + * Even though table entries have now been swapped, other CPU's + * may still be using the old entries. This is okay, because + * resynchronization happens because of the locking done + * during the get_counters() routine. + */ + local_bh_enable(); + + return private; } EXPORT_SYMBOL_GPL(xt_replace_table); @@ -734,7 +731,6 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table *table, /* Simplifies replace_table code. */ table->private = bootstrap; - mutex_init(&table->lock); if (!xt_replace_table(table, 0, newinfo, &ret)) goto unlock; @@ -1147,7 +1143,14 @@ static struct pernet_operations xt_net_ops = { static int __init xt_init(void) { - int i, rv; + unsigned int i; + int rv; + + for_each_possible_cpu(i) { + struct xt_info_lock *lock = &per_cpu(xt_info_locks, i); + spin_lock_init(&lock->lock); + lock->readers = 0; + } xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL); if (!xt) -- cgit v1.2.3-59-g8ed1b From d37dc42ab6f040b8f0f2962ab219c5b2accf748d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 30 Apr 2009 06:45:08 -0400 Subject: nls: add a nls_nullsize inline It's possible for character sets to require a multi-byte null string terminator. Add a helper function that determines the size of the null terminator at runtime. Signed-off-by: Jeff Layton Acked-by: Suresh Jayaraman Signed-off-by: Steve French --- include/linux/nls.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nls.h b/include/linux/nls.h index 6a882208301a..52b1a76c1b43 100644 --- a/include/linux/nls.h +++ b/include/linux/nls.h @@ -58,6 +58,25 @@ static inline int nls_strnicmp(struct nls_table *t, const unsigned char *s1, return 0; } +/* + * nls_nullsize - return length of null character for codepage + * @codepage - codepage for which to return length of NULL terminator + * + * Since we can't guarantee that the null terminator will be a particular + * length, we have to check against the codepage. If there's a problem + * determining it, assume a single-byte NULL terminator. + */ +static inline int +nls_nullsize(const struct nls_table *codepage) +{ + int charlen; + char tmp[NLS_MAX_CHARSET_SIZE]; + + charlen = codepage->uni2char(0, tmp, NLS_MAX_CHARSET_SIZE); + + return charlen > 0 ? charlen : 1; +} + #define MODULE_ALIAS_NLS(name) MODULE_ALIAS("nls_" __stringify(name)) #endif /* _LINUX_NLS_H */ -- cgit v1.2.3-59-g8ed1b From 96c16743973e8c1a7b9c655d10b7973408d6d1dd Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 30 Apr 2009 18:24:34 +0200 Subject: ide-cd: fix REQ_QUIET tests in cdrom_decode_status Original patch (dfa4411cc3a690011cab90e9a536938795366cf9) was buggy. This is a more proper fix which introduces blk_rq_quiet() macro alleviating the need for dumb, too short caching variables. Thanks to Helge Deller and Bart for debugging this. Signed-off-by: Borislav Petkov Cc: Jens Axboe Cc: Sergei Shtylyov Reported-and-tested-by: Helge Deller Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-cd.c | 9 ++++----- include/linux/blkdev.h | 1 + 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 3d4e09969763..925eb9e245d1 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -312,7 +312,6 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) ide_hwif_t *hwif = drive->hwif; struct request *rq = hwif->rq; int err, sense_key, do_end_request = 0; - u8 quiet = rq->cmd_flags & REQ_QUIET; /* get the IDE error register */ err = ide_read_error(drive); @@ -347,7 +346,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) } else { cdrom_saw_media_change(drive); - if (blk_fs_request(rq) && !quiet) + if (blk_fs_request(rq) && !blk_rq_quiet(rq)) printk(KERN_ERR PFX "%s: tray open\n", drive->name); } @@ -382,7 +381,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) * No point in retrying after an illegal request or data * protect error. */ - if (!quiet) + if (!blk_rq_quiet(rq)) ide_dump_status(drive, "command error", stat); do_end_request = 1; break; @@ -391,14 +390,14 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) * No point in re-trying a zillion times on a bad sector. * If we got here the error is not correctable. */ - if (!quiet) + if (!blk_rq_quiet(rq)) ide_dump_status(drive, "media error " "(bad sector)", stat); do_end_request = 1; break; case BLANK_CHECK: /* disk appears blank? */ - if (!quiet) + if (!blk_rq_quiet(rq)) ide_dump_status(drive, "media error (blank)", stat); do_end_request = 1; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ba54c834a590..6f841fb1be30 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -598,6 +598,7 @@ enum { blk_failfast_transport(rq) || \ blk_failfast_driver(rq)) #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) +#define blk_rq_quiet(rq) ((rq)->cmd_flags & REQ_QUIET) #define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) -- cgit v1.2.3-59-g8ed1b From 0f3d042ed2f934f149ccb78300454beaf0c1134b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 May 2009 09:10:46 -0700 Subject: netfilter: use likely() in xt_info_rdlock_bh() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 1b2e43502ef7..c9efe039dc57 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -472,7 +472,7 @@ static inline void xt_info_rdlock_bh(void) local_bh_disable(); lock = &__get_cpu_var(xt_info_locks); - if (!lock->readers++) + if (likely(!lock->readers++)) spin_lock(&lock->lock); } @@ -480,7 +480,7 @@ static inline void xt_info_rdunlock_bh(void) { struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks); - if (!--lock->readers) + if (likely(!--lock->readers)) spin_unlock(&lock->lock); local_bh_enable(); } -- cgit v1.2.3-59-g8ed1b From c047fcd245975f40312ed57bf43e7d4abd188e6b Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Fri, 1 May 2009 15:34:02 -0700 Subject: virtio: add missing include to virtio_net.h virtio_net.h uses the macro ETH_ALEN which is defined in linux/if_ether.h. Discovered when hacking on virtio-over-pci patches. Signed-off-by: Grant Likely Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 242348bb3766..cec79adbe3ea 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -4,6 +4,7 @@ * compatible drivers/servers. */ #include #include +#include /* The ID for virtio_net */ #define VIRTIO_ID_NET 1 -- cgit v1.2.3-59-g8ed1b From ae3abae64f177586be55b04a7fb7047a34b21a3e Mon Sep 17 00:00:00 2001 From: Daisuke Nishimura Date: Thu, 30 Apr 2009 15:08:19 -0700 Subject: memcg: fix mem_cgroup_shrink_usage() Current mem_cgroup_shrink_usage() has two problems. 1. It doesn't call mem_cgroup_out_of_memory and doesn't update last_oom_jiffies, so pagefault_out_of_memory invokes global OOM. 2. Considering hierarchy, shrinking has to be done from the mem_over_limit, not from the memcg which the page would be charged to. mem_cgroup_try_charge_swapin() does all of these things properly, so we use it and call cancel_charge_swapin when it succeeded. The name of "shrink_usage" is not appropriate for this behavior, so we change it too. Signed-off-by: Daisuke Nishimura Acked-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Paul Menage Cc: Dhaval Giani Cc: Daisuke Nishimura Cc: YAMAMOTO Takashi Cc: KOSAKI Motohiro Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 4 ++-- mm/memcontrol.c | 33 ++++++++++++--------------------- mm/shmem.c | 8 ++++++-- 3 files changed, 20 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index a9e3b76aa884..25b9ca93d232 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -56,7 +56,7 @@ extern void mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to); extern void mem_cgroup_uncharge_page(struct page *page); extern void mem_cgroup_uncharge_cache_page(struct page *page); -extern int mem_cgroup_shrink_usage(struct page *page, +extern int mem_cgroup_shmem_charge_fallback(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, @@ -155,7 +155,7 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page) { } -static inline int mem_cgroup_shrink_usage(struct page *page, +static inline int mem_cgroup_shmem_charge_fallback(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { return 0; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 575203ae2109..01c2d8f14685 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1617,37 +1617,28 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem, } /* - * A call to try to shrink memory usage under specified resource controller. - * This is typically used for page reclaiming for shmem for reducing side - * effect of page allocation from shmem, which is used by some mem_cgroup. + * A call to try to shrink memory usage on charge failure at shmem's swapin. + * Calling hierarchical_reclaim is not enough because we should update + * last_oom_jiffies to prevent pagefault_out_of_memory from invoking global OOM. + * Moreover considering hierarchy, we should reclaim from the mem_over_limit, + * not from the memcg which this page would be charged to. + * try_charge_swapin does all of these works properly. */ -int mem_cgroup_shrink_usage(struct page *page, +int mem_cgroup_shmem_charge_fallback(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { struct mem_cgroup *mem = NULL; - int progress = 0; - int retry = MEM_CGROUP_RECLAIM_RETRIES; + int ret; if (mem_cgroup_disabled()) return 0; - if (page) - mem = try_get_mem_cgroup_from_swapcache(page); - if (!mem && mm) - mem = try_get_mem_cgroup_from_mm(mm); - if (unlikely(!mem)) - return 0; - do { - progress = mem_cgroup_hierarchical_reclaim(mem, - gfp_mask, true, false); - progress += mem_cgroup_check_under_limit(mem); - } while (!progress && --retry); + ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem); + if (!ret) + mem_cgroup_cancel_charge_swapin(mem); /* it does !mem check */ - css_put(&mem->css); - if (!retry) - return -ENOMEM; - return 0; + return ret; } static DEFINE_MUTEX(set_limit_mutex); diff --git a/mm/shmem.c b/mm/shmem.c index f9cb20ebb990..b25f95ce3db7 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1340,8 +1340,12 @@ repeat: shmem_swp_unmap(entry); spin_unlock(&info->lock); if (error == -ENOMEM) { - /* allow reclaim from this memory cgroup */ - error = mem_cgroup_shrink_usage(swappage, + /* + * reclaim from proper memory cgroup and + * call memcg's OOM if needed. + */ + error = mem_cgroup_shmem_charge_fallback( + swappage, current->mm, gfp); if (error) { -- cgit v1.2.3-59-g8ed1b From 74641f584da8eccf30becfbb5507ab457187db22 Mon Sep 17 00:00:00 2001 From: Ivan Kokshaysky Date: Thu, 30 Apr 2009 15:08:49 -0700 Subject: alpha: binfmt_aout fix This fixes the problem introduced by commit 3bfacef412 (get rid of special-casing the /sbin/loader on alpha): osf/1 ecoff binary segfaults when binfmt_aout built as module. That happens because aout binary handler gets on the top of the binfmt list due to late registration, and kernel attempts to execute the binary without preparatory work that must be done by binfmt_loader. Fixed by changing the registration order of the default binfmt handlers using list_add_tail() and introducing insert_binfmt() function which places new handler on the top of the binfmt list. This might be generally useful for installing arch-specific frontends for default handlers or just for overriding them. Signed-off-by: Ivan Kokshaysky Cc: Al Viro Cc: Richard Henderson Signed-off-by: Linus Torvalds --- arch/alpha/kernel/Makefile | 6 +++++- arch/alpha/kernel/binfmt_loader.c | 2 +- fs/exec.c | 7 ++++--- include/linux/binfmts.h | 14 +++++++++++++- 4 files changed, 23 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile index a427538252f8..7739a62440a7 100644 --- a/arch/alpha/kernel/Makefile +++ b/arch/alpha/kernel/Makefile @@ -8,7 +8,7 @@ EXTRA_CFLAGS := -Werror -Wno-sign-compare obj-y := entry.o traps.o process.o init_task.o osf_sys.o irq.o \ irq_alpha.o signal.o setup.o ptrace.o time.o \ - alpha_ksyms.o systbls.o err_common.o io.o binfmt_loader.o + alpha_ksyms.o systbls.o err_common.o io.o obj-$(CONFIG_VGA_HOSE) += console.o obj-$(CONFIG_SMP) += smp.o @@ -43,6 +43,10 @@ else # Misc support obj-$(CONFIG_ALPHA_SRM) += srmcons.o +ifdef CONFIG_BINFMT_AOUT +obj-y += binfmt_loader.o +endif + # Core logic support obj-$(CONFIG_ALPHA_APECS) += core_apecs.o obj-$(CONFIG_ALPHA_CIA) += core_cia.o diff --git a/arch/alpha/kernel/binfmt_loader.c b/arch/alpha/kernel/binfmt_loader.c index 4a0af906b00a..3fcfad410130 100644 --- a/arch/alpha/kernel/binfmt_loader.c +++ b/arch/alpha/kernel/binfmt_loader.c @@ -46,6 +46,6 @@ static struct linux_binfmt loader_format = { static int __init init_loader_binfmt(void) { - return register_binfmt(&loader_format); + return insert_binfmt(&loader_format); } arch_initcall(init_loader_binfmt); diff --git a/fs/exec.c b/fs/exec.c index a3a8ce83940f..639177b0eeac 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -69,17 +69,18 @@ int suid_dumpable = 0; static LIST_HEAD(formats); static DEFINE_RWLOCK(binfmt_lock); -int register_binfmt(struct linux_binfmt * fmt) +int __register_binfmt(struct linux_binfmt * fmt, int insert) { if (!fmt) return -EINVAL; write_lock(&binfmt_lock); - list_add(&fmt->lh, &formats); + insert ? list_add(&fmt->lh, &formats) : + list_add_tail(&fmt->lh, &formats); write_unlock(&binfmt_lock); return 0; } -EXPORT_SYMBOL(register_binfmt); +EXPORT_SYMBOL(__register_binfmt); void unregister_binfmt(struct linux_binfmt * fmt) { diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 6638b8148de7..61ee18c1bdb4 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -82,7 +82,19 @@ struct linux_binfmt { int hasvdso; }; -extern int register_binfmt(struct linux_binfmt *); +extern int __register_binfmt(struct linux_binfmt *fmt, int insert); + +/* Registration of default binfmt handlers */ +static inline int register_binfmt(struct linux_binfmt *fmt) +{ + return __register_binfmt(fmt, 0); +} +/* Same as above, but adds a new binfmt at the top of the list */ +static inline int insert_binfmt(struct linux_binfmt *fmt) +{ + return __register_binfmt(fmt, 1); +} + extern void unregister_binfmt(struct linux_binfmt *); extern int prepare_binprm(struct linux_binprm *); -- cgit v1.2.3-59-g8ed1b From 0763ed2355198cdef2f6a2098e9d52eb1fe4365d Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 30 Apr 2009 15:08:50 -0700 Subject: of: make of_(un)register_platform_driver common code Some drivers using of_register_platform_driver() wrapper break on sparc because the wrapper isn't in the header file. This patch moves it from Microblaze and PowerPC implementations and makes it common code. Fixes this sparc64 allmodconfig build error (at least): drivers/leds/leds-gpio.c: In function `gpio_led_init': drivers/leds/leds-gpio.c:295: error: implicit declaration of function `of_register_platform_driver' drivers/leds/leds-gpio.c: In function `gpio_led_exit': drivers/leds/leds-gpio.c:311: error: implicit declaration of function `of_unregister_platform_driver' Signed-off-by: Grant Likely Acked-by: David S. Miller Cc: Michal Simek Cc: Benjamin Herrenschmidt Cc: Stephen Rothwell Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/microblaze/include/asm/of_platform.h | 10 ---------- arch/powerpc/include/asm/of_platform.h | 10 ---------- include/linux/of_platform.h | 10 ++++++++++ 3 files changed, 10 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/arch/microblaze/include/asm/of_platform.h b/arch/microblaze/include/asm/of_platform.h index 187c0eedaece..37491276c6ca 100644 --- a/arch/microblaze/include/asm/of_platform.h +++ b/arch/microblaze/include/asm/of_platform.h @@ -36,16 +36,6 @@ static const struct of_device_id of_default_bus_ids[] = { {}, }; -/* Platform drivers register/unregister */ -static inline int of_register_platform_driver(struct of_platform_driver *drv) -{ - return of_register_driver(drv, &of_platform_bus_type); -} -static inline void of_unregister_platform_driver(struct of_platform_driver *drv) -{ - of_unregister_driver(drv); -} - /* Platform devices and busses creation */ extern struct of_device *of_platform_device_create(struct device_node *np, const char *bus_id, diff --git a/arch/powerpc/include/asm/of_platform.h b/arch/powerpc/include/asm/of_platform.h index 53b46507ffde..d4aaa3489440 100644 --- a/arch/powerpc/include/asm/of_platform.h +++ b/arch/powerpc/include/asm/of_platform.h @@ -11,16 +11,6 @@ * */ -/* Platform drivers register/unregister */ -static inline int of_register_platform_driver(struct of_platform_driver *drv) -{ - return of_register_driver(drv, &of_platform_bus_type); -} -static inline void of_unregister_platform_driver(struct of_platform_driver *drv) -{ - of_unregister_driver(drv); -} - /* Platform devices and busses creation */ extern struct of_device *of_platform_device_create(struct device_node *np, const char *bus_id, diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index 3d327b67d7e2..908406651330 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -51,6 +51,16 @@ extern int of_register_driver(struct of_platform_driver *drv, struct bus_type *bus); extern void of_unregister_driver(struct of_platform_driver *drv); +/* Platform drivers register/unregister */ +static inline int of_register_platform_driver(struct of_platform_driver *drv) +{ + return of_register_driver(drv, &of_platform_bus_type); +} +static inline void of_unregister_platform_driver(struct of_platform_driver *drv) +{ + of_unregister_driver(drv); +} + #include extern struct of_device *of_find_device_by_node(struct device_node *np); -- cgit v1.2.3-59-g8ed1b From 00a62ce91e554198ef28234c91c36f850f5a3bc9 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Thu, 30 Apr 2009 15:08:51 -0700 Subject: mm: fix Committed_AS underflow on large NR_CPUS environment The Committed_AS field can underflow in certain situations: > # while true; do cat /proc/meminfo | grep _AS; sleep 1; done | uniq -c > 1 Committed_AS: 18446744073709323392 kB > 11 Committed_AS: 18446744073709455488 kB > 6 Committed_AS: 35136 kB > 5 Committed_AS: 18446744073709454400 kB > 7 Committed_AS: 35904 kB > 3 Committed_AS: 18446744073709453248 kB > 2 Committed_AS: 34752 kB > 9 Committed_AS: 18446744073709453248 kB > 8 Committed_AS: 34752 kB > 3 Committed_AS: 18446744073709320960 kB > 7 Committed_AS: 18446744073709454080 kB > 3 Committed_AS: 18446744073709320960 kB > 5 Committed_AS: 18446744073709454080 kB > 6 Committed_AS: 18446744073709320960 kB Because NR_CPUS can be greater than 1000 and meminfo_proc_show() does not check for underflow. But NR_CPUS proportional isn't good calculation. In general, possibility of lock contention is proportional to the number of online cpus, not theorical maximum cpus (NR_CPUS). The current kernel has generic percpu-counter stuff. using it is right way. it makes code simplify and percpu_counter_read_positive() don't make underflow issue. Reported-by: Dave Hansen Signed-off-by: KOSAKI Motohiro Cc: Eric B Munson Cc: Mel Gorman Cc: Christoph Lameter Cc: [All kernel versions] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/meminfo.c | 2 +- include/linux/mman.h | 9 +++------ mm/mmap.c | 12 ++++++------ mm/nommu.c | 13 +++++++------ mm/swap.c | 46 ---------------------------------------------- 5 files changed, 17 insertions(+), 65 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 74ea974f5ca6..c6b0302af4c4 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -35,7 +35,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) #define K(x) ((x) << (PAGE_SHIFT - 10)) si_meminfo(&i); si_swapinfo(&i); - committed = atomic_long_read(&vm_committed_space); + committed = percpu_counter_read_positive(&vm_committed_as); allowed = ((totalram_pages - hugetlb_total_pages()) * sysctl_overcommit_ratio / 100) + total_swap_pages; diff --git a/include/linux/mman.h b/include/linux/mman.h index 30d1073bac3b..9872d6ca58ae 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -12,21 +12,18 @@ #ifdef __KERNEL__ #include +#include #include extern int sysctl_overcommit_memory; extern int sysctl_overcommit_ratio; -extern atomic_long_t vm_committed_space; +extern struct percpu_counter vm_committed_as; -#ifdef CONFIG_SMP -extern void vm_acct_memory(long pages); -#else static inline void vm_acct_memory(long pages) { - atomic_long_add(pages, &vm_committed_space); + percpu_counter_add(&vm_committed_as, pages); } -#endif static inline void vm_unacct_memory(long pages) { diff --git a/mm/mmap.c b/mm/mmap.c index 3303d1ba8e87..6b7b1a95944b 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -85,7 +85,7 @@ EXPORT_SYMBOL(vm_get_page_prot); int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ int sysctl_overcommit_ratio = 50; /* default is 50% */ int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; -atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); +struct percpu_counter vm_committed_as; /* * Check that a process has enough memory to allocate a new virtual @@ -179,11 +179,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) if (mm) allowed -= mm->total_vm / 32; - /* - * cast `allowed' as a signed long because vm_committed_space - * sometimes has a negative value - */ - if (atomic_long_read(&vm_committed_space) < (long)allowed) + if (percpu_counter_read_positive(&vm_committed_as) < allowed) return 0; error: vm_unacct_memory(pages); @@ -2481,4 +2477,8 @@ void mm_drop_all_locks(struct mm_struct *mm) */ void __init mmap_init(void) { + int ret; + + ret = percpu_counter_init(&vm_committed_as, 0); + VM_BUG_ON(ret); } diff --git a/mm/nommu.c b/mm/nommu.c index 72eda4aee2cb..809998aa7b50 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -62,7 +62,7 @@ void *high_memory; struct page *mem_map; unsigned long max_mapnr; unsigned long num_physpages; -atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); +struct percpu_counter vm_committed_as; int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ int sysctl_overcommit_ratio = 50; /* default is 50% */ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; @@ -463,6 +463,10 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) */ void __init mmap_init(void) { + int ret; + + ret = percpu_counter_init(&vm_committed_as, 0); + VM_BUG_ON(ret); vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC); } @@ -1847,12 +1851,9 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) if (mm) allowed -= mm->total_vm / 32; - /* - * cast `allowed' as a signed long because vm_committed_space - * sometimes has a negative value - */ - if (atomic_long_read(&vm_committed_space) < (long)allowed) + if (percpu_counter_read_positive(&vm_committed_as) < allowed) return 0; + error: vm_unacct_memory(pages); diff --git a/mm/swap.c b/mm/swap.c index bede23ce64ea..cb29ae5d33ab 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -491,49 +491,6 @@ unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, EXPORT_SYMBOL(pagevec_lookup_tag); -#ifdef CONFIG_SMP -/* - * We tolerate a little inaccuracy to avoid ping-ponging the counter between - * CPUs - */ -#define ACCT_THRESHOLD max(16, NR_CPUS * 2) - -static DEFINE_PER_CPU(long, committed_space); - -void vm_acct_memory(long pages) -{ - long *local; - - preempt_disable(); - local = &__get_cpu_var(committed_space); - *local += pages; - if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) { - atomic_long_add(*local, &vm_committed_space); - *local = 0; - } - preempt_enable(); -} - -#ifdef CONFIG_HOTPLUG_CPU - -/* Drop the CPU's cached committed space back into the central pool. */ -static int cpu_swap_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) -{ - long *committed; - - committed = &per_cpu(committed_space, (long)hcpu); - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { - atomic_long_add(*committed, &vm_committed_space); - *committed = 0; - drain_cpu_pagevecs((long)hcpu); - } - return NOTIFY_OK; -} -#endif /* CONFIG_HOTPLUG_CPU */ -#endif /* CONFIG_SMP */ - /* * Perform any setup for the swap system */ @@ -554,7 +511,4 @@ void __init swap_setup(void) * Right now other parts of the system means that we * _really_ don't want to cluster much more */ -#ifdef CONFIG_HOTPLUG_CPU - hotcpu_notifier(cpu_swap_callback, 0); -#endif } -- cgit v1.2.3-59-g8ed1b From f75e6745aa3084124ae1434fd7629853bdaf6798 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 21 Apr 2009 17:18:20 -0400 Subject: SUNRPC: Fix the problem of EADDRNOTAVAIL syslog floods on reconnect See http://bugzilla.kernel.org/show_bug.cgi?id=13034 If the port gets into a TIME_WAIT state, then we cannot reconnect without binding to a new port. Tested-by: Petr Vandrovec Tested-by: Jean Delvare Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/xprt.c | 6 ++---- net/sunrpc/xprtsock.c | 26 +++++++++++++++++++++----- 3 files changed, 24 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 1758d9f5b5c3..08afe43118f4 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -261,6 +261,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); #define XPRT_BINDING (5) #define XPRT_CLOSING (6) #define XPRT_CONNECTION_ABORT (7) +#define XPRT_CONNECTION_CLOSE (8) static inline void xprt_set_connected(struct rpc_xprt *xprt) { diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index a0bfe53f1621..06ca058572f2 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -672,10 +672,8 @@ xprt_init_autodisconnect(unsigned long data) if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) goto out_abort; spin_unlock(&xprt->transport_lock); - if (xprt_connecting(xprt)) - xprt_release_write(xprt, NULL); - else - queue_work(rpciod_workqueue, &xprt->task_cleanup); + set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); + queue_work(rpciod_workqueue, &xprt->task_cleanup); return; out_abort: spin_unlock(&xprt->transport_lock); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index d40ff50887aa..e18596146013 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -807,6 +807,9 @@ static void xs_reset_transport(struct sock_xprt *transport) * * This is used when all requests are complete; ie, no DRC state remains * on the server we want to save. + * + * The caller _must_ be holding XPRT_LOCKED in order to avoid issues with + * xs_reset_transport() zeroing the socket from underneath a writer. */ static void xs_close(struct rpc_xprt *xprt) { @@ -824,6 +827,14 @@ static void xs_close(struct rpc_xprt *xprt) xprt_disconnect_done(xprt); } +static void xs_tcp_close(struct rpc_xprt *xprt) +{ + if (test_and_clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state)) + xs_close(xprt); + else + xs_tcp_shutdown(xprt); +} + /** * xs_destroy - prepare to shutdown a transport * @xprt: doomed transport @@ -1772,6 +1783,15 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt, xprt, -status, xprt_connected(xprt), sock->sk->sk_state); switch (status) { + default: + printk("%s: connect returned unhandled error %d\n", + __func__, status); + case -EADDRNOTAVAIL: + /* We're probably in TIME_WAIT. Get rid of existing socket, + * and retry + */ + set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); + xprt_force_disconnect(xprt); case -ECONNREFUSED: case -ECONNRESET: case -ENETUNREACH: @@ -1782,10 +1802,6 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt, xprt_clear_connecting(xprt); return; } - /* get rid of existing socket, and retry */ - xs_tcp_shutdown(xprt); - printk("%s: connect returned unhandled error %d\n", - __func__, status); out_eagain: status = -EAGAIN; out: @@ -1994,7 +2010,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { .buf_free = rpc_free, .send_request = xs_tcp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, - .close = xs_tcp_shutdown, + .close = xs_tcp_close, .destroy = xs_destroy, .print_stats = xs_tcp_print_stats, }; -- cgit v1.2.3-59-g8ed1b From 9f722c0978b04acba209f8ca1896ad05814bc3a3 Mon Sep 17 00:00:00 2001 From: Omar Laazimani Date: Mon, 4 May 2009 12:01:43 -0700 Subject: usbnet: CDC EEM support (v5) This introduces a CDC Ethernet Emulation Model (EEM) host side driver to support USB EEM devices. EEM is different from the Ethernet Control Model (ECM) currently supported by the "CDC Ethernet" driver. One key difference is that it doesn't require of USB interface alternate settings to manage interface state; some maldesigned hardware can't handle that part of USB. It also avoids a separate USB interface for control and status updates. [ dbrownell@users.sourceforge.net: fix skb leaks, add rx packet checks, improve fault handling, EEM conformance updates, cleanup ] Signed-off-by: Omar Laazimani Signed-off-by: David Brownell Signed-off-by: David S. Miller --- drivers/net/usb/Kconfig | 14 ++ drivers/net/usb/Makefile | 1 + drivers/net/usb/cdc_eem.c | 381 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/usb/cdc.h | 3 + 4 files changed, 399 insertions(+) create mode 100644 drivers/net/usb/cdc_eem.c (limited to 'include/linux') diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index 8ee21030e9ac..dfc6cf765fbd 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -180,6 +180,20 @@ config USB_NET_CDCETHER IEEE 802 "local assignment" bit is set in the address, a "usbX" name is used instead. +config USB_NET_CDC_EEM + tristate "CDC EEM support" + depends on USB_USBNET && EXPERIMENTAL + help + This option supports devices conforming to the Communication Device + Class (CDC) Ethernet Emulation Model, a specification that's easy to + implement in device firmware. The CDC EEM specifications are available + from . + + This driver creates an interface named "ethX", where X depends on + what other networking devices you have in use. However, if the + IEEE 802 "local assignment" bit is set in the address, a "usbX" + name is used instead. + config USB_NET_DM9601 tristate "Davicom DM9601 based USB 1.1 10/100 ethernet devices" depends on USB_USBNET diff --git a/drivers/net/usb/Makefile b/drivers/net/usb/Makefile index 88a87eeb376a..c8aef62cf2b7 100644 --- a/drivers/net/usb/Makefile +++ b/drivers/net/usb/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_USB_RTL8150) += rtl8150.o obj-$(CONFIG_USB_HSO) += hso.o obj-$(CONFIG_USB_NET_AX8817X) += asix.o obj-$(CONFIG_USB_NET_CDCETHER) += cdc_ether.o +obj-$(CONFIG_USB_NET_CDC_EEM) += cdc_eem.o obj-$(CONFIG_USB_NET_DM9601) += dm9601.o obj-$(CONFIG_USB_NET_SMSC95XX) += smsc95xx.o obj-$(CONFIG_USB_NET_GL620A) += gl620a.o diff --git a/drivers/net/usb/cdc_eem.c b/drivers/net/usb/cdc_eem.c new file mode 100644 index 000000000000..80e01778dd3b --- /dev/null +++ b/drivers/net/usb/cdc_eem.c @@ -0,0 +1,381 @@ +/* + * USB CDC EEM network interface driver + * Copyright (C) 2009 Oberthur Technologies + * by Omar Laazimani, Olivier Condemine + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* + * This driver is an implementation of the CDC "Ethernet Emulation + * Model" (EEM) specification, which encapsulates Ethernet frames + * for transport over USB using a simpler USB device model than the + * previous CDC "Ethernet Control Model" (ECM, or "CDC Ethernet"). + * + * For details, see www.usb.org/developers/devclass_docs/CDC_EEM10.pdf + * + * This version has been tested with GIGAntIC WuaoW SIM Smart Card on 2.6.24, + * 2.6.27 and 2.6.30rc2 kernel. + * It has also been validated on Openmoko Om 2008.12 (based on 2.6.24 kernel). + * build on 23-April-2009 + */ + +#define EEM_HEAD 2 /* 2 byte header */ + +/*-------------------------------------------------------------------------*/ + +static void eem_linkcmd_complete(struct urb *urb) +{ + dev_kfree_skb(urb->context); + usb_free_urb(urb); +} + +static void eem_linkcmd(struct usbnet *dev, struct sk_buff *skb) +{ + struct urb *urb; + int status; + + urb = usb_alloc_urb(0, GFP_ATOMIC); + if (!urb) + goto fail; + + usb_fill_bulk_urb(urb, dev->udev, dev->out, + skb->data, skb->len, eem_linkcmd_complete, skb); + + status = usb_submit_urb(urb, GFP_ATOMIC); + if (status) { + usb_free_urb(urb); +fail: + dev_kfree_skb(skb); + devwarn(dev, "link cmd failure\n"); + return; + } +} + +static int eem_bind(struct usbnet *dev, struct usb_interface *intf) +{ + int status = 0; + + status = usbnet_get_endpoints(dev, intf); + if (status < 0) { + usb_set_intfdata(intf, NULL); + usb_driver_release_interface(driver_of(intf), intf); + return status; + } + + /* no jumbogram (16K) support for now */ + + dev->net->hard_header_len += EEM_HEAD + ETH_FCS_LEN; + + return 0; +} + +/* + * EEM permits packing multiple Ethernet frames into USB transfers + * (a "bundle"), but for TX we don't try to do that. + */ +static struct sk_buff *eem_tx_fixup(struct usbnet *dev, struct sk_buff *skb, + gfp_t flags) +{ + struct sk_buff *skb2 = NULL; + u16 len = skb->len; + u32 crc = 0; + int padlen = 0; + + /* When ((len + EEM_HEAD + ETH_FCS_LEN) % dev->maxpacket) is + * zero, stick two bytes of zero length EEM packet on the end. + * Else the framework would add invalid single byte padding, + * since it can't know whether ZLPs will be handled right by + * all the relevant hardware and software. + */ + if (!((len + EEM_HEAD + ETH_FCS_LEN) % dev->maxpacket)) + padlen += 2; + + if (!skb_cloned(skb)) { + int headroom = skb_headroom(skb); + int tailroom = skb_tailroom(skb); + + if ((tailroom >= ETH_FCS_LEN + padlen) + && (headroom >= EEM_HEAD)) + goto done; + + if ((headroom + tailroom) + > (EEM_HEAD + ETH_FCS_LEN + padlen)) { + skb->data = memmove(skb->head + + EEM_HEAD, + skb->data, + skb->len); + skb_set_tail_pointer(skb, len); + goto done; + } + } + + skb2 = skb_copy_expand(skb, EEM_HEAD, ETH_FCS_LEN + padlen, flags); + if (!skb2) + return NULL; + + dev_kfree_skb_any(skb); + skb = skb2; + +done: + /* we don't use the "no Ethernet CRC" option */ + crc = crc32_le(~0, skb->data, skb->len); + crc = ~crc; + + put_unaligned_le32(crc, skb_put(skb, 4)); + + /* EEM packet header format: + * b0..13: length of ethernet frame + * b14: bmCRC (1 == valid Ethernet CRC) + * b15: bmType (0 == data) + */ + len = skb->len; + put_unaligned_le16(BIT(14) | len, skb_push(skb, 2)); + + /* Bundle a zero length EEM packet if needed */ + if (padlen) + put_unaligned_le16(0, skb_put(skb, 2)); + + return skb; +} + +static int eem_rx_fixup(struct usbnet *dev, struct sk_buff *skb) +{ + /* + * Our task here is to strip off framing, leaving skb with one + * data frame for the usbnet framework code to process. But we + * may have received multiple EEM payloads, or command payloads. + * So we must process _everything_ as if it's a header, except + * maybe the last data payload + * + * REVISIT the framework needs updating so that when we consume + * all payloads (the last or only message was a command, or a + * zero length EEM packet) that is not accounted as an rx_error. + */ + do { + struct sk_buff *skb2 = NULL; + u16 header; + u16 len = 0; + + /* incomplete EEM header? */ + if (skb->len < EEM_HEAD) + return 0; + + /* + * EEM packet header format: + * b0..14: EEM type dependant (Data or Command) + * b15: bmType + */ + header = get_unaligned_le16(skb->data); + skb_pull(skb, EEM_HEAD); + + /* + * The bmType bit helps to denote when EEM + * packet is data or command : + * bmType = 0 : EEM data payload + * bmType = 1 : EEM (link) command + */ + if (header & BIT(15)) { + u16 bmEEMCmd; + + /* + * EEM (link) command packet: + * b0..10: bmEEMCmdParam + * b11..13: bmEEMCmd + * b14: bmReserved (must be 0) + * b15: 1 (EEM command) + */ + if (header & BIT(14)) { + devdbg(dev, "reserved command %04x\n", header); + continue; + } + + bmEEMCmd = (header >> 11) & 0x7; + switch (bmEEMCmd) { + + /* Responding to echo requests is mandatory. */ + case 0: /* Echo command */ + len = header & 0x7FF; + + /* bogus command? */ + if (skb->len < len) + return 0; + + skb2 = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!skb2)) + goto next; + skb_trim(skb2, len); + put_unaligned_le16(BIT(15) | (1 << 11) | len, + skb_push(skb2, 2)); + eem_linkcmd(dev, skb2); + break; + + /* + * Host may choose to ignore hints. + * - suspend: peripheral ready to suspend + * - response: suggest N millisec polling + * - response complete: suggest N sec polling + */ + case 2: /* Suspend hint */ + case 3: /* Response hint */ + case 4: /* Response complete hint */ + continue; + + /* + * Hosts should never receive host-to-peripheral + * or reserved command codes; or responses to an + * echo command we didn't send. + */ + case 1: /* Echo response */ + case 5: /* Tickle */ + default: /* reserved */ + devwarn(dev, "unexpected link command %d\n", + bmEEMCmd); + continue; + } + + } else { + u32 crc, crc2; + int is_last; + + /* zero length EEM packet? */ + if (header == 0) + continue; + + /* + * EEM data packet header : + * b0..13: length of ethernet frame + * b14: bmCRC + * b15: 0 (EEM data) + */ + len = header & 0x3FFF; + + /* bogus EEM payload? */ + if (skb->len < len) + return 0; + + /* bogus ethernet frame? */ + if (len < (ETH_HLEN + ETH_FCS_LEN)) + goto next; + + /* + * Treat the last payload differently: framework + * code expects our "fixup" to have stripped off + * headers, so "skb" is a data packet (or error). + * Else if it's not the last payload, keep "skb" + * for further processing. + */ + is_last = (len == skb->len); + if (is_last) + skb2 = skb; + else { + skb2 = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!skb2)) + return 0; + } + + crc = get_unaligned_le32(skb2->data + + len - ETH_FCS_LEN); + skb_trim(skb2, len - ETH_FCS_LEN); + + /* + * The bmCRC helps to denote when the CRC field in + * the Ethernet frame contains a calculated CRC: + * bmCRC = 1 : CRC is calculated + * bmCRC = 0 : CRC = 0xDEADBEEF + */ + if (header & BIT(14)) + crc2 = ~crc32_le(~0, skb2->data, len); + else + crc2 = 0xdeadbeef; + + if (is_last) + return crc == crc2; + + if (unlikely(crc != crc2)) { + dev->stats.rx_errors++; + dev_kfree_skb_any(skb2); + } else + usbnet_skb_return(dev, skb2); + } + +next: + skb_pull(skb, len); + } while (skb->len); + + return 1; +} + +static const struct driver_info eem_info = { + .description = "CDC EEM Device", + .flags = FLAG_ETHER, + .bind = eem_bind, + .rx_fixup = eem_rx_fixup, + .tx_fixup = eem_tx_fixup, +}; + +/*-------------------------------------------------------------------------*/ + +static const struct usb_device_id products[] = { +{ + USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_EEM, + USB_CDC_PROTO_EEM), + .driver_info = (unsigned long) &eem_info, +}, +{ + /* EMPTY == end of list */ +}, +}; +MODULE_DEVICE_TABLE(usb, products); + +static struct usb_driver eem_driver = { + .name = "cdc_eem", + .id_table = products, + .probe = usbnet_probe, + .disconnect = usbnet_disconnect, + .suspend = usbnet_suspend, + .resume = usbnet_resume, +}; + + +static int __init eem_init(void) +{ + return usb_register(&eem_driver); +} +module_init(eem_init); + +static void __exit eem_exit(void) +{ + usb_deregister(&eem_driver); +} +module_exit(eem_exit); + +MODULE_AUTHOR("Omar Laazimani "); +MODULE_DESCRIPTION("USB CDC EEM"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/usb/cdc.h b/include/linux/usb/cdc.h index 3c86ed25a04c..c24124a42ce5 100644 --- a/include/linux/usb/cdc.h +++ b/include/linux/usb/cdc.h @@ -17,6 +17,7 @@ #define USB_CDC_SUBCLASS_DMM 0x09 #define USB_CDC_SUBCLASS_MDLM 0x0a #define USB_CDC_SUBCLASS_OBEX 0x0b +#define USB_CDC_SUBCLASS_EEM 0x0c #define USB_CDC_PROTO_NONE 0 @@ -28,6 +29,8 @@ #define USB_CDC_ACM_PROTO_AT_CDMA 6 #define USB_CDC_ACM_PROTO_VENDOR 0xff +#define USB_CDC_PROTO_EEM 7 + /*-------------------------------------------------------------------------*/ /* -- cgit v1.2.3-59-g8ed1b From a7ca7fccacc029958fd09985e7f3529b90ec791d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 5 May 2009 14:31:12 +0200 Subject: netfilter: add missing linux/types.h include to xt_LED.h Pointed out by Dave Miller: CHECK include/linux/netfilter (57 files) /home/davem/src/GIT/net-2.6/usr/include/linux/netfilter/xt_LED.h:6: found __[us]{8,16,32,64} type without #include Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_LED.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_LED.h b/include/linux/netfilter/xt_LED.h index 4c91a0d770d0..f5509e7524d3 100644 --- a/include/linux/netfilter/xt_LED.h +++ b/include/linux/netfilter/xt_LED.h @@ -1,6 +1,8 @@ #ifndef _XT_LED_H #define _XT_LED_H +#include + struct xt_led_info { char id[27]; /* Unique ID for this trigger in the LED class */ __u8 always_blink; /* Blink even if the LED is already on */ -- cgit v1.2.3-59-g8ed1b From 280f37afa2c270ff029cb420b34396aa002909c3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 5 May 2009 17:46:07 +0200 Subject: netfilter: xt_cluster: fix use of cluster match with 32 nodes This patch fixes a problem when you use 32 nodes in the cluster match: % iptables -I PREROUTING -t mangle -i eth0 -m cluster \ --cluster-total-nodes 32 --cluster-local-node 32 \ --cluster-hash-seed 0xdeadbeef -j MARK --set-mark 0xffff iptables: Invalid argument. Run `dmesg' for more information. % dmesg | tail -1 xt_cluster: this node mask cannot be higher than the total number of nodes The problem is related to this checking: if (info->node_mask >= (1 << info->total_nodes)) { printk(KERN_ERR "xt_cluster: this node mask cannot be " "higher than the total number of nodes\n"); return false; } (1 << 32) is 1. Thus, the checking fails. BTW, I said this before but I insist: I have only tested the cluster match with 2 nodes getting ~45% extra performance in an active-active setup. The maximum limit of 32 nodes is still completely arbitrary. I'd really appreciate if people that have more nodes in their setups let me know. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_cluster.h | 2 ++ net/netfilter/xt_cluster.c | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_cluster.h b/include/linux/netfilter/xt_cluster.h index 5e0a0d07b526..886682656f09 100644 --- a/include/linux/netfilter/xt_cluster.h +++ b/include/linux/netfilter/xt_cluster.h @@ -12,4 +12,6 @@ struct xt_cluster_match_info { u_int32_t flags; }; +#define XT_CLUSTER_NODES_MAX 32 + #endif /* _XT_CLUSTER_MATCH_H */ diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index 6c4847662b85..69a639f35403 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -135,7 +135,13 @@ static bool xt_cluster_mt_checkentry(const struct xt_mtchk_param *par) { struct xt_cluster_match_info *info = par->matchinfo; - if (info->node_mask >= (1 << info->total_nodes)) { + if (info->total_nodes > XT_CLUSTER_NODES_MAX) { + printk(KERN_ERR "xt_cluster: you have exceeded the maximum " + "number of cluster nodes (%u > %u)\n", + info->total_nodes, XT_CLUSTER_NODES_MAX); + return false; + } + if (info->node_mask >= (1ULL << info->total_nodes)) { printk(KERN_ERR "xt_cluster: this node mask cannot be " "higher than the total number of nodes\n"); return false; -- cgit v1.2.3-59-g8ed1b From e67c85626cd02e306da1b4195bfaf68d61050796 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 8 Mar 2009 23:13:32 +0800 Subject: Revert driver core: move platform_data into platform_device This reverts commit 006f4571a15fae3a0575f2a0f9e9b63b3d1012f8: This patch moves platform_data from struct device into struct platform_device, based on the two ideas: 1. Now all platform_driver is registered by platform_driver_register, which makes probe()/release()/... of platform_driver passed parameter of platform_device *, so platform driver can get platform_data from platform_device; 2. Other kind of devices do not need to use platform_data, we can decrease size of device if moving it to platform_device. Taking into consideration of thousands of files to be fixed and they can't be finished in one night(maybe it will take a long time), so we keep platform_data in device to allow two kind of cases coexist until all platform devices pass its platfrom data from platform_device->platform_data. All patches to do this kind of conversion are welcome. As we don't really want to do it, it was a bad idea. Cc: David Brownell Cc: Ming Lei Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 3 --- include/linux/device.h | 9 ++------- include/linux/platform_device.h | 1 - 3 files changed, 2 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/platform.c b/drivers/base/platform.c index d1d0ee431926..8b4708e06244 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -217,7 +217,6 @@ int platform_device_add_data(struct platform_device *pdev, const void *data, if (d) { memcpy(d, data, size); pdev->dev.platform_data = d; - pdev->platform_data = d; } return d ? 0 : -ENOMEM; } @@ -247,8 +246,6 @@ int platform_device_add(struct platform_device *pdev) else dev_set_name(&pdev->dev, pdev->name); - pdev->platform_data = pdev->dev.platform_data; - for (i = 0; i < pdev->num_resources; i++) { struct resource *p, *r = &pdev->resource[i]; diff --git a/include/linux/device.h b/include/linux/device.h index 6a69caaac18a..5d5c197bad45 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -384,13 +384,8 @@ struct device { struct device_driver *driver; /* which driver has allocated this device */ void *driver_data; /* data private to the driver */ - - void *platform_data; /* We will remove platform_data - field if all platform devices - pass its platform specific data - from platform_device->platform_data, - other kind of devices should not - use platform_data. */ + void *platform_data; /* Platform specific data, device + core doesn't touch it */ struct dev_pm_info power; #ifdef CONFIG_NUMA diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 72736fd8223c..b67bb5d7b221 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -20,7 +20,6 @@ struct platform_device { struct device dev; u32 num_resources; struct resource * resource; - void *platform_data; struct platform_device_id *id_entry; }; -- cgit v1.2.3-59-g8ed1b From edcc37a0478836b4a51eafb1bcec6a52708f681d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 3 May 2009 06:00:05 -0400 Subject: Always lookup priv_root on reiserfs mount and keep it ... even if it's a negative dentry. That way we can set ->d_op on root before anyone could race with us. Simplify d_compare(), while we are at it. Signed-off-by: Al Viro --- fs/reiserfs/super.c | 6 ++- fs/reiserfs/xattr.c | 86 ++++++++++++++++++------------------------ include/linux/reiserfs_xattr.h | 1 + 3 files changed, 41 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 0ae6486d9046..d444fe0013a4 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1842,7 +1842,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) goto error; } - if ((errval = reiserfs_xattr_init(s, s->s_flags))) { + if ((errval = reiserfs_lookup_privroot(s)) || + (errval = reiserfs_xattr_init(s, s->s_flags))) { dput(s->s_root); s->s_root = NULL; goto error; @@ -1855,7 +1856,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) reiserfs_info(s, "using 3.5.x disk format\n"); } - if ((errval = reiserfs_xattr_init(s, s->s_flags))) { + if ((errval = reiserfs_lookup_privroot(s)) || + (errval = reiserfs_xattr_init(s, s->s_flags))) { dput(s->s_root); s->s_root = NULL; goto error; diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 31a3dbb120e1..2891f789f545 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -903,16 +903,19 @@ static int create_privroot(struct dentry *dentry) WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex)); err = xattr_mkdir(inode, dentry, 0700); - if (err) { - dput(dentry); - dentry = NULL; + if (err || !dentry->d_inode) { + reiserfs_warning(dentry->d_sb, "jdm-20006", + "xattrs/ACLs enabled and couldn't " + "find/create .reiserfs_priv. " + "Failing mount."); + return -EOPNOTSUPP; } - if (dentry && dentry->d_inode) - reiserfs_info(dentry->d_sb, "Created %s - reserved for xattr " - "storage.\n", PRIVROOT_NAME); + dentry->d_inode->i_flags |= S_PRIVATE; + reiserfs_info(dentry->d_sb, "Created %s - reserved for xattr " + "storage.\n", PRIVROOT_NAME); - return err; + return 0; } static int xattr_mount_check(struct super_block *s) @@ -944,11 +947,9 @@ static int xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name) { struct dentry *priv_root = REISERFS_SB(dentry->d_sb)->priv_root; - if (name->len == priv_root->d_name.len && - name->hash == priv_root->d_name.hash && - !memcmp(name->name, priv_root->d_name.name, name->len)) { + if (container_of(q1, struct dentry, d_name) == priv_root) return -ENOENT; - } else if (q1->len == name->len && + if (q1->len == name->len && !memcmp(q1->name, name->name, name->len)) return 0; return 1; @@ -958,6 +959,27 @@ static const struct dentry_operations xattr_lookup_poison_ops = { .d_compare = xattr_lookup_poison, }; +int reiserfs_lookup_privroot(struct super_block *s) +{ + struct dentry *dentry; + int err = 0; + + /* If we don't have the privroot located yet - go find it */ + mutex_lock(&s->s_root->d_inode->i_mutex); + dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, + strlen(PRIVROOT_NAME)); + if (!IS_ERR(dentry)) { + REISERFS_SB(s)->priv_root = dentry; + s->s_root->d_op = &xattr_lookup_poison_ops; + if (dentry->d_inode) + dentry->d_inode->i_flags |= S_PRIVATE; + } else + err = PTR_ERR(dentry); + mutex_unlock(&s->s_root->d_inode->i_mutex); + + return err; +} + /* We need to take a copy of the mount flags since things like * MS_RDONLY don't get set until *after* we're called. * mount_flags != mount_options */ @@ -969,48 +991,12 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) err = xattr_mount_check(s); if (err) goto error; -#endif - /* If we don't have the privroot located yet - go find it */ - if (!REISERFS_SB(s)->priv_root) { - struct dentry *dentry; - mutex_lock_nested(&s->s_root->d_inode->i_mutex, I_MUTEX_CHILD); - dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, - strlen(PRIVROOT_NAME)); - if (!IS_ERR(dentry)) { -#ifdef CONFIG_REISERFS_FS_XATTR - if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) - err = create_privroot(dentry); -#endif - if (!dentry->d_inode) { - dput(dentry); - dentry = NULL; - } - } else - err = PTR_ERR(dentry); + if (!REISERFS_SB(s)->priv_root->d_inode && !(mount_flags & MS_RDONLY)) { + mutex_lock(&s->s_root->d_inode->i_mutex); + err = create_privroot(REISERFS_SB(s)->priv_root); mutex_unlock(&s->s_root->d_inode->i_mutex); - - if (!err && dentry) { - s->s_root->d_op = &xattr_lookup_poison_ops; - dentry->d_inode->i_flags |= S_PRIVATE; - REISERFS_SB(s)->priv_root = dentry; -#ifdef CONFIG_REISERFS_FS_XATTR - /* xattrs are unavailable */ - } else if (!(mount_flags & MS_RDONLY)) { - /* If we're read-only it just means that the dir - * hasn't been created. Not an error -- just no - * xattrs on the fs. We'll check again if we - * go read-write */ - reiserfs_warning(s, "jdm-20006", - "xattrs/ACLs enabled and couldn't " - "find/create .reiserfs_priv. " - "Failing mount."); - err = -EOPNOTSUPP; -#endif - } } - -#ifdef CONFIG_REISERFS_FS_XATTR if (!err) s->s_xattr = reiserfs_xattr_handlers; diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h index dcae01e63e40..fea1a8e65bef 100644 --- a/include/linux/reiserfs_xattr.h +++ b/include/linux/reiserfs_xattr.h @@ -38,6 +38,7 @@ struct nameidata; int reiserfs_xattr_register_handlers(void) __init; void reiserfs_xattr_unregister_handlers(void); int reiserfs_xattr_init(struct super_block *sb, int mount_flags); +int reiserfs_lookup_privroot(struct super_block *sb); int reiserfs_delete_xattrs(struct inode *inode); int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs); -- cgit v1.2.3-59-g8ed1b From ab17c4f02156c4f75d7fa43a5aa2a7f942d47201 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 5 May 2009 15:30:15 -0400 Subject: reiserfs: fixup xattr_root caching The xattr_root caching was broken from my previous patch set. It wouldn't cause corruption, but could cause decreased performance due to allocating a larger chunk of the journal (~ 27 blocks) than it would actually use. This patch loads the xattr root dentry at xattr initialization and creates it on-demand. Since we're using the cached dentry, there's no point in keeping lookup_or_create_dir around, so that's removed. Signed-off-by: Jeff Mahoney Signed-off-by: Al Viro --- fs/reiserfs/xattr.c | 73 ++++++++++++++++++++++++++---------------- include/linux/reiserfs_fs_sb.h | 2 +- include/linux/reiserfs_xattr.h | 2 +- 3 files changed, 48 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 2891f789f545..c77984473db9 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -113,36 +113,28 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry) #define xattr_may_create(flags) (!flags || flags & XATTR_CREATE) -/* Returns and possibly creates the xattr dir. */ -static struct dentry *lookup_or_create_dir(struct dentry *parent, - const char *name, int flags) +static struct dentry *open_xa_root(struct super_block *sb, int flags) { - struct dentry *dentry; - BUG_ON(!parent); + struct dentry *privroot = REISERFS_SB(sb)->priv_root; + struct dentry *xaroot; + if (!privroot->d_inode) + return ERR_PTR(-ENODATA); - mutex_lock_nested(&parent->d_inode->i_mutex, I_MUTEX_XATTR); - dentry = lookup_one_len(name, parent, strlen(name)); - if (!IS_ERR(dentry) && !dentry->d_inode) { - int err = -ENODATA; + mutex_lock_nested(&privroot->d_inode->i_mutex, I_MUTEX_XATTR); + xaroot = dget(REISERFS_SB(sb)->xattr_root); + if (!xaroot->d_inode) { + int err = -ENODATA; if (xattr_may_create(flags)) - err = xattr_mkdir(parent->d_inode, dentry, 0700); - + err = xattr_mkdir(privroot->d_inode, xaroot, 0700); if (err) { - dput(dentry); - dentry = ERR_PTR(err); + dput(xaroot); + xaroot = ERR_PTR(err); } } - mutex_unlock(&parent->d_inode->i_mutex); - return dentry; -} -static struct dentry *open_xa_root(struct super_block *sb, int flags) -{ - struct dentry *privroot = REISERFS_SB(sb)->priv_root; - if (!privroot) - return ERR_PTR(-ENODATA); - return lookup_or_create_dir(privroot, XAROOT_NAME, flags); + mutex_unlock(&privroot->d_inode->i_mutex); + return xaroot; } static struct dentry *open_xa_dir(const struct inode *inode, int flags) @@ -158,10 +150,22 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags) le32_to_cpu(INODE_PKEY(inode)->k_objectid), inode->i_generation); - xadir = lookup_or_create_dir(xaroot, namebuf, flags); + mutex_lock_nested(&xaroot->d_inode->i_mutex, I_MUTEX_XATTR); + + xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf)); + if (!IS_ERR(xadir) && !xadir->d_inode) { + int err = -ENODATA; + if (xattr_may_create(flags)) + err = xattr_mkdir(xaroot->d_inode, xadir, 0700); + if (err) { + dput(xadir); + xadir = ERR_PTR(err); + } + } + + mutex_unlock(&xaroot->d_inode->i_mutex); dput(xaroot); return xadir; - } /* The following are side effects of other operations that aren't explicitly @@ -986,19 +990,33 @@ int reiserfs_lookup_privroot(struct super_block *s) int reiserfs_xattr_init(struct super_block *s, int mount_flags) { int err = 0; + struct dentry *privroot = REISERFS_SB(s)->priv_root; #ifdef CONFIG_REISERFS_FS_XATTR err = xattr_mount_check(s); if (err) goto error; - if (!REISERFS_SB(s)->priv_root->d_inode && !(mount_flags & MS_RDONLY)) { + if (!privroot->d_inode && !(mount_flags & MS_RDONLY)) { mutex_lock(&s->s_root->d_inode->i_mutex); err = create_privroot(REISERFS_SB(s)->priv_root); mutex_unlock(&s->s_root->d_inode->i_mutex); } - if (!err) + + if (privroot->d_inode) { s->s_xattr = reiserfs_xattr_handlers; + mutex_lock(&privroot->d_inode->i_mutex); + if (!REISERFS_SB(s)->xattr_root) { + struct dentry *dentry; + dentry = lookup_one_len(XAROOT_NAME, privroot, + strlen(XAROOT_NAME)); + if (!IS_ERR(dentry)) + REISERFS_SB(s)->xattr_root = dentry; + else + err = PTR_ERR(dentry); + } + mutex_unlock(&privroot->d_inode->i_mutex); + } error: if (err) { @@ -1008,11 +1026,12 @@ error: #endif /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */ - s->s_flags = s->s_flags & ~MS_POSIXACL; #ifdef CONFIG_REISERFS_FS_POSIX_ACL if (reiserfs_posixacl(s)) s->s_flags |= MS_POSIXACL; + else #endif + s->s_flags &= ~MS_POSIXACL; return err; } diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index 6b361d23a499..8651640868a1 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h @@ -402,7 +402,7 @@ struct reiserfs_sb_info { int reserved_blocks; /* amount of blocks reserved for further allocations */ spinlock_t bitmap_lock; /* this lock on now only used to protect reserved_blocks variable */ struct dentry *priv_root; /* root of /.reiserfs_priv */ - struct dentry *xattr_root; /* root of /.reiserfs_priv/.xa */ + struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */ int j_errno; #ifdef CONFIG_QUOTA char *s_qf_names[MAXQUOTAS]; diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h index fea1a8e65bef..cdedc01036e4 100644 --- a/include/linux/reiserfs_xattr.h +++ b/include/linux/reiserfs_xattr.h @@ -98,7 +98,7 @@ static inline size_t reiserfs_xattr_jcreate_nblocks(struct inode *inode) if ((REISERFS_I(inode)->i_flags & i_has_xattr_dir) == 0) { nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); - if (REISERFS_SB(inode->i_sb)->xattr_root == NULL) + if (!REISERFS_SB(inode->i_sb)->xattr_root->d_inode) nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); } -- cgit v1.2.3-59-g8ed1b From 677c9b2e393a0cd203bd54e9c18b012b2c73305a Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 5 May 2009 15:30:17 -0400 Subject: reiserfs: remove privroot hiding in lookup With Al Viro's patch to move privroot lookup to fs mount, there's no need to have special code to hide the privroot in reiserfs_lookup. I've also cleaned up the privroot hiding in reiserfs_readdir_dentry and removed the last user of reiserfs_xattrs(). Signed-off-by: Jeff Mahoney Signed-off-by: Al Viro --- fs/reiserfs/dir.c | 24 +++++++++++++----------- fs/reiserfs/namei.c | 17 ++--------------- fs/reiserfs/xattr.c | 2 +- include/linux/reiserfs_fs_sb.h | 1 - 4 files changed, 16 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 67a80d7e59e2..45ee3d357c70 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -41,6 +41,18 @@ static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, #define store_ih(where,what) copy_item_head (where, what) +static inline bool is_privroot_deh(struct dentry *dir, + struct reiserfs_de_head *deh) +{ + int ret = 0; +#ifdef CONFIG_REISERFS_FS_XATTR + struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root; + ret = (dir == dir->d_parent && privroot->d_inode && + deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); +#endif + return ret; +} + int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, filldir_t filldir, loff_t *pos) { @@ -138,18 +150,8 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, } /* Ignore the .reiserfs_priv entry */ - if (reiserfs_xattrs(inode->i_sb) && - !old_format_only(inode->i_sb) && - dentry == inode->i_sb->s_root && - REISERFS_SB(inode->i_sb)->priv_root && - REISERFS_SB(inode->i_sb)->priv_root->d_inode - && deh_objectid(deh) == - le32_to_cpu(INODE_PKEY - (REISERFS_SB(inode->i_sb)-> - priv_root->d_inode)-> - k_objectid)) { + if (is_privroot_deh(dentry, deh)) continue; - } d_off = deh_offset(deh); *pos = d_off; diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index efd4d720718e..271579128634 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -338,21 +338,8 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, &path_to_entry, &de); pathrelse(&path_to_entry); if (retval == NAME_FOUND) { - /* Hide the .reiserfs_priv directory */ - if (reiserfs_xattrs(dir->i_sb) && - !old_format_only(dir->i_sb) && - REISERFS_SB(dir->i_sb)->priv_root && - REISERFS_SB(dir->i_sb)->priv_root->d_inode && - de.de_objectid == - le32_to_cpu(INODE_PKEY - (REISERFS_SB(dir->i_sb)->priv_root->d_inode)-> - k_objectid)) { - reiserfs_write_unlock(dir->i_sb); - return ERR_PTR(-EACCES); - } - - inode = - reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id)); + inode = reiserfs_iget(dir->i_sb, + (struct cpu_key *)&(de.de_dir_id)); if (!inode || IS_ERR(inode)) { reiserfs_write_unlock(dir->i_sb); return ERR_PTR(-EACCES); diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index c77984473db9..2237e10c7c7c 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -841,7 +841,7 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size) if (!dentry->d_inode) return -EINVAL; - if (!reiserfs_xattrs(dentry->d_sb) || + if (!dentry->d_sb->s_xattr || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) return -EOPNOTSUPP; diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index 8651640868a1..6473650c28f1 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h @@ -488,7 +488,6 @@ enum reiserfs_mount_options { #define reiserfs_data_log(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_LOG)) #define reiserfs_data_ordered(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_ORDERED)) #define reiserfs_data_writeback(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_WRITEBACK)) -#define reiserfs_xattrs(s) ((s)->s_xattr != NULL) #define reiserfs_xattrs_user(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_XATTRS_USER)) #define reiserfs_posixacl(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_POSIXACL)) #define reiserfs_xattrs_optional(s) (reiserfs_xattrs_user(s) || reiserfs_posixacl(s)) -- cgit v1.2.3-59-g8ed1b From 74dbbdd7fdc11763f4698d2f3e684cf4446951e6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 6 May 2009 01:07:50 -0400 Subject: New helper: deactivate_locked_super() Does equivalent of up_write(&s->s_umount); deactivate_super(s); However, it does not does not unlock it until it's all over. As the result, it's safe to use to dispose of new superblock on ->get_sb() failure exits - nobody will see the sucker until it's all over. Equivalent using up_write/deactivate_super is safe for that purpose if superblock is either safe to use or has NULL ->s_root when we unlock. Normally filesystems take the required precautions, but a) we do have bugs in that area in some of them. b) up_write/deactivate_super sequence is extremely common, so the helper makes sense anyway. Signed-off-by: Al Viro --- fs/super.c | 46 ++++++++++++++++++++++++++++++++++------------ include/linux/fs.h | 1 + 2 files changed, 35 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/fs/super.c b/fs/super.c index 786fe7d72790..a9dc4c33ef4d 100644 --- a/fs/super.c +++ b/fs/super.c @@ -207,6 +207,34 @@ void deactivate_super(struct super_block *s) EXPORT_SYMBOL(deactivate_super); +/** + * deactivate_locked_super - drop an active reference to superblock + * @s: superblock to deactivate + * + * Equivalent of up_write(&s->s_umount); deactivate_super(s);, except that + * it does not unlock it until it's all over. As the result, it's safe to + * use to dispose of new superblock on ->get_sb() failure exits - nobody + * will see the sucker until it's all over. Equivalent using up_write + + * deactivate_super is safe for that purpose only if superblock is either + * safe to use or has NULL ->s_root when we unlock. + */ +void deactivate_locked_super(struct super_block *s) +{ + struct file_system_type *fs = s->s_type; + if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { + s->s_count -= S_BIAS-1; + spin_unlock(&sb_lock); + vfs_dq_off(s, 0); + fs->kill_sb(s); + put_filesystem(fs); + put_super(s); + } else { + up_write(&s->s_umount); + } +} + +EXPORT_SYMBOL(deactivate_locked_super); + /** * grab_super - acquire an active reference * @s: reference we are trying to make active @@ -797,8 +825,7 @@ int get_sb_ns(struct file_system_type *fs_type, int flags, void *data, sb->s_flags = flags; err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); if (err) { - up_write(&sb->s_umount); - deactivate_super(sb); + deactivate_locked_super(sb); return err; } @@ -854,8 +881,7 @@ int get_sb_bdev(struct file_system_type *fs_type, if (s->s_root) { if ((flags ^ s->s_flags) & MS_RDONLY) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); error = -EBUSY; goto error_bdev; } @@ -870,8 +896,7 @@ int get_sb_bdev(struct file_system_type *fs_type, sb_set_blocksize(s, block_size(bdev)); error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); goto error; } @@ -921,8 +946,7 @@ int get_sb_nodev(struct file_system_type *fs_type, error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); return error; } s->s_flags |= MS_ACTIVE; @@ -952,8 +976,7 @@ int get_sb_single(struct file_system_type *fs_type, s->s_flags = flags; error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); return error; } s->s_flags |= MS_ACTIVE; @@ -1006,8 +1029,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void return mnt; out_sb: dput(mnt->mnt_root); - up_write(&mnt->mnt_sb->s_umount); - deactivate_super(mnt->mnt_sb); + deactivate_locked_super(mnt->mnt_sb); out_free_secdata: free_secdata(secdata); out_mnt: diff --git a/include/linux/fs.h b/include/linux/fs.h index 5bed436f4353..11484d08042c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1775,6 +1775,7 @@ void kill_block_super(struct super_block *sb); void kill_anon_super(struct super_block *sb); void kill_litter_super(struct super_block *sb); void deactivate_super(struct super_block *sb); +void deactivate_locked_super(struct super_block *sb); int set_anon_super(struct super_block *s, void *data); struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), -- cgit v1.2.3-59-g8ed1b From db6c1fbb92eeb4cb52c6133e0c533602f49fc4bd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Apr 2009 18:07:08 +0200 Subject: romfs: cleanup romfs_fs.h There's no kernel-only content in it anymore, so move it to header-y and remove the superflous #ifdef __KERNEL__. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/Kbuild | 2 +- include/linux/romfs_fs.h | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index ca9b9b9bd331..3f0eaa397ef5 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -138,6 +138,7 @@ header-y += qnxtypes.h header-y += radeonfb.h header-y += raw.h header-y += resource.h +header-y += romfs_fs.h header-y += rose.h header-y += serial_reg.h header-y += smbno.h @@ -314,7 +315,6 @@ unifdef-y += irqnr.h unifdef-y += reboot.h unifdef-y += reiserfs_fs.h unifdef-y += reiserfs_xattr.h -unifdef-y += romfs_fs.h unifdef-y += route.h unifdef-y += rtc.h unifdef-y += rtnetlink.h diff --git a/include/linux/romfs_fs.h b/include/linux/romfs_fs.h index e20bbf9eb365..c490fbc43fe2 100644 --- a/include/linux/romfs_fs.h +++ b/include/linux/romfs_fs.h @@ -53,9 +53,4 @@ struct romfs_inode { #define ROMFH_PAD (ROMFH_SIZE-1) #define ROMFH_MASK (~ROMFH_PAD) -#ifdef __KERNEL__ - -/* Not much now */ - -#endif /* __KERNEL__ */ #endif -- cgit v1.2.3-59-g8ed1b From 6e8341a11eb21826b7192d0bb88cb5b44900a9af Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 6 Apr 2009 11:16:22 -0400 Subject: Switch open_exec() and sys_uselib() to do_open_filp() ... and make path_lookup_open() static Signed-off-by: Al Viro --- fs/exec.c | 72 ++++++++++++++++++--------------------------------- fs/namei.c | 13 +++++----- fs/open.c | 2 +- include/linux/fs.h | 2 +- include/linux/namei.h | 1 - 5 files changed, 34 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index 41ae8e0de72d..895823d0149d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -105,36 +105,28 @@ static inline void put_binfmt(struct linux_binfmt * fmt) SYSCALL_DEFINE1(uselib, const char __user *, library) { struct file *file; - struct nameidata nd; char *tmp = getname(library); int error = PTR_ERR(tmp); - if (!IS_ERR(tmp)) { - error = path_lookup_open(AT_FDCWD, tmp, - LOOKUP_FOLLOW, &nd, - FMODE_READ|FMODE_EXEC); - putname(tmp); - } - if (error) + if (IS_ERR(tmp)) + goto out; + + file = do_filp_open(AT_FDCWD, tmp, + O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, + MAY_READ | MAY_EXEC | MAY_OPEN); + putname(tmp); + error = PTR_ERR(file); + if (IS_ERR(file)) goto out; error = -EINVAL; - if (!S_ISREG(nd.path.dentry->d_inode->i_mode)) + if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) goto exit; error = -EACCES; - if (nd.path.mnt->mnt_flags & MNT_NOEXEC) - goto exit; - - error = may_open(&nd.path, MAY_READ | MAY_EXEC | MAY_OPEN, 0); - if (error) + if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) goto exit; - file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE); - error = PTR_ERR(file); - if (IS_ERR(file)) - goto out; - fsnotify_open(file->f_path.dentry); error = -ENOEXEC; @@ -156,13 +148,10 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) } read_unlock(&binfmt_lock); } +exit: fput(file); out: return error; -exit: - release_open_intent(&nd); - path_put(&nd.path); - goto out; } #ifdef CONFIG_MMU @@ -657,44 +646,33 @@ EXPORT_SYMBOL(setup_arg_pages); struct file *open_exec(const char *name) { - struct nameidata nd; struct file *file; int err; - err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, - FMODE_READ|FMODE_EXEC); - if (err) + file = do_filp_open(AT_FDCWD, name, + O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, + MAY_EXEC | MAY_OPEN); + if (IS_ERR(file)) goto out; err = -EACCES; - if (!S_ISREG(nd.path.dentry->d_inode->i_mode)) - goto out_path_put; - - if (nd.path.mnt->mnt_flags & MNT_NOEXEC) - goto out_path_put; - - err = may_open(&nd.path, MAY_EXEC | MAY_OPEN, 0); - if (err) - goto out_path_put; + if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) + goto exit; - file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE); - if (IS_ERR(file)) - return file; + if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) + goto exit; fsnotify_open(file->f_path.dentry); err = deny_write_access(file); - if (err) { - fput(file); - goto out; - } + if (err) + goto exit; +out: return file; - out_path_put: - release_open_intent(&nd); - path_put(&nd.path); - out: +exit: + fput(file); return ERR_PTR(err); } EXPORT_SYMBOL(open_exec); diff --git a/fs/namei.c b/fs/namei.c index 78f253cd2d4f..967c3db92724 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1130,8 +1130,8 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, * @nd: pointer to nameidata * @open_flags: open intent flags */ -int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags, - struct nameidata *nd, int open_flags) +static int path_lookup_open(int dfd, const char *name, + unsigned int lookup_flags, struct nameidata *nd, int open_flags) { struct file *filp = get_empty_filp(); int err; @@ -1637,18 +1637,19 @@ static int open_will_write_to_fs(int flag, struct inode *inode) * open_to_namei_flags() for more details. */ struct file *do_filp_open(int dfd, const char *pathname, - int open_flag, int mode) + int open_flag, int mode, int acc_mode) { struct file *filp; struct nameidata nd; - int acc_mode, error; + int error; struct path path; struct dentry *dir; int count = 0; int will_write; int flag = open_to_namei_flags(open_flag); - acc_mode = MAY_OPEN | ACC_MODE(flag); + if (!acc_mode) + acc_mode = MAY_OPEN | ACC_MODE(flag); /* O_TRUNC implies we need access checks for write permissions */ if (flag & O_TRUNC) @@ -1869,7 +1870,7 @@ do_link: */ struct file *filp_open(const char *filename, int flags, int mode) { - return do_filp_open(AT_FDCWD, filename, flags, mode); + return do_filp_open(AT_FDCWD, filename, flags, mode, 0); } EXPORT_SYMBOL(filp_open); diff --git a/fs/open.c b/fs/open.c index 377eb25b6abf..bdfbf03615a4 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1033,7 +1033,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode) if (!IS_ERR(tmp)) { fd = get_unused_fd_flags(flags); if (fd >= 0) { - struct file *f = do_filp_open(dfd, tmp, flags, mode); + struct file *f = do_filp_open(dfd, tmp, flags, mode, 0); if (IS_ERR(f)) { put_unused_fd(fd); fd = PTR_ERR(f); diff --git a/include/linux/fs.h b/include/linux/fs.h index 11484d08042c..ed788426f464 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2118,7 +2118,7 @@ extern struct file *create_write_pipe(int flags); extern void free_write_pipe(struct file *); extern struct file *do_filp_open(int dfd, const char *pathname, - int open_flag, int mode); + int open_flag, int mode, int acc_mode); extern int may_open(struct path *, int, int); extern int kernel_read(struct file *, unsigned long, char *, unsigned long); diff --git a/include/linux/namei.h b/include/linux/namei.h index fc2e03579877..518098fe63af 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -69,7 +69,6 @@ extern int path_lookup(const char *, unsigned, struct nameidata *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct nameidata *); -extern int path_lookup_open(int dfd, const char *name, unsigned lookup_flags, struct nameidata *, int open_flags); extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, int (*open)(struct inode *, struct file *)); extern struct file *nameidata_to_filp(struct nameidata *nd, int flags); -- cgit v1.2.3-59-g8ed1b From 2a32cebd6cbcc43996c3e2d114fa32ba1e71192a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 8 May 2009 16:05:57 -0400 Subject: Fix races around the access to ->s_options Put generic_show_options read access to s_options under rcu_read_lock, split save_mount_options() into "we are setting it the first time" (uses in foo_fill_super()) and "we are relacing and freeing the old one", synchronize_rcu() before kfree() in the latter. Signed-off-by: Al Viro --- drivers/isdn/capi/capifs.c | 3 +-- fs/affs/super.c | 3 +-- fs/afs/super.c | 4 ++-- fs/hpfs/super.c | 3 +-- fs/namespace.c | 21 ++++++++++++++++++--- fs/reiserfs/super.c | 3 +-- include/linux/fs.h | 1 + 7 files changed, 25 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/isdn/capi/capifs.c b/drivers/isdn/capi/capifs.c index b129409925af..8f9f3b5a3e8c 100644 --- a/drivers/isdn/capi/capifs.c +++ b/drivers/isdn/capi/capifs.c @@ -75,8 +75,7 @@ static int capifs_remount(struct super_block *s, int *flags, char *data) } } - kfree(s->s_options); - s->s_options = new_opt; + replace_mount_options(s, new_opt); config.setuid = setuid; config.setgid = setgid; diff --git a/fs/affs/super.c b/fs/affs/super.c index 5ce695e707fe..63f5183f263b 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -507,8 +507,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) kfree(new_opts); return -EINVAL; } - kfree(sb->s_options); - sb->s_options = new_opts; + replace_mount_options(sb, new_opts); sbi->s_flags = mount_flags; sbi->s_mode = mode; diff --git a/fs/afs/super.c b/fs/afs/super.c index 2753f16dd315..76828e5f8a39 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -408,17 +408,17 @@ static int afs_get_sb(struct file_system_type *fs_type, deactivate_locked_super(sb); goto error; } - sb->s_options = new_opts; + save_mount_options(sb, new_opts); sb->s_flags |= MS_ACTIVE; } else { _debug("reuse"); - kfree(new_opts); ASSERTCMP(sb->s_flags, &, MS_ACTIVE); } simple_set_mnt(mnt, sb); afs_put_volume(params.volume); afs_put_cell(params.cell); + kfree(new_opts); _leave(" = 0 [%p]", sb); return 0; diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index fecf402d7b8a..fc77965be841 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -423,8 +423,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) if (!(*flags & MS_RDONLY)) mark_dirty(s); - kfree(s->s_options); - s->s_options = new_opts; + replace_mount_options(s, new_opts); return 0; diff --git a/fs/namespace.c b/fs/namespace.c index 0d2003fb4377..134d494158d9 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -695,12 +695,16 @@ static inline void mangle(struct seq_file *m, const char *s) */ int generic_show_options(struct seq_file *m, struct vfsmount *mnt) { - const char *options = mnt->mnt_sb->s_options; + const char *options; + + rcu_read_lock(); + options = rcu_dereference(mnt->mnt_sb->s_options); if (options != NULL && options[0]) { seq_putc(m, ','); mangle(m, options); } + rcu_read_unlock(); return 0; } @@ -721,11 +725,22 @@ EXPORT_SYMBOL(generic_show_options); */ void save_mount_options(struct super_block *sb, char *options) { - kfree(sb->s_options); - sb->s_options = kstrdup(options, GFP_KERNEL); + BUG_ON(sb->s_options); + rcu_assign_pointer(sb->s_options, kstrdup(options, GFP_KERNEL)); } EXPORT_SYMBOL(save_mount_options); +void replace_mount_options(struct super_block *sb, char *options) +{ + char *old = sb->s_options; + rcu_assign_pointer(sb->s_options, options); + if (old) { + synchronize_rcu(); + kfree(old); + } +} +EXPORT_SYMBOL(replace_mount_options); + #ifdef CONFIG_PROC_FS /* iterator */ static void *m_start(struct seq_file *m, loff_t *pos) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index d444fe0013a4..1215a4f50cd2 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1316,8 +1316,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) } out_ok: - kfree(s->s_options); - s->s_options = new_opts; + replace_mount_options(s, new_opts); return 0; out_err: diff --git a/include/linux/fs.h b/include/linux/fs.h index ed788426f464..3b534e527e09 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2368,6 +2368,7 @@ extern void file_update_time(struct file *file); extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt); extern void save_mount_options(struct super_block *sb, char *options); +extern void replace_mount_options(struct super_block *sb, char *options); static inline ino_t parent_ino(struct dentry *dentry) { -- cgit v1.2.3-59-g8ed1b From ecf4667d30dd63fa130e22f8f2da3e6ce003358b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 12 May 2009 13:19:37 -0700 Subject: syscalls.h add the missing sys_pipe2 declaration In order to build the generic syscall table, we need a declaration for every system call. sys_pipe2 was added without a proper declaration, so add this to syscalls.h now. Signed-off-by: Arnd Bergmann Cc: Ulrich Drepper Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/syscalls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 40617c1d8976..30520844b8da 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -433,6 +433,7 @@ asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg); asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg); #endif +asmlinkage long sys_pipe2(int __user *fildes, int flags); asmlinkage long sys_dup(unsigned int fildes); asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd); asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags); -- cgit v1.2.3-59-g8ed1b From 4f005dbe5584fe54c9f6d6d4f0acd3fb29be84da Mon Sep 17 00:00:00 2001 From: Maciej Sosnowski Date: Thu, 23 Apr 2009 12:31:51 +0200 Subject: ioatdma: fix "ioatdma frees DMA memory with wrong function" as reported by Alexander Beregalov ioatdma 0000:00:08.0: DMA-API: device driver frees DMA memory with wrong function [device address=0x000000007f76f800] [size=2000 bytes] [map ped as single] [unmapped as page] The ioatdma driver was unmapping all regions (either allocated as page or single) using unmap_page. This patch lets dma driver recognize if unmap_single or unmap_page should be used. It introduces two new dma control flags: DMA_COMPL_SRC_UNMAP_SINGLE and DMA_COMPL_DEST_UNMAP_SINGLE. They should be set to indicate dma driver to do dma-unmapping as single (first one for the source, tha latter for the destination). If respective flag is not set, the driver assumes dma-unmapping as page. Signed-off-by: Maciej Sosnowski Reported-by: Alexander Beregalov Tested-by: Alexander Beregalov Signed-off-by: Dan Williams --- drivers/dma/dmaengine.c | 17 +++++++++++------ drivers/dma/ioat_dma.c | 45 ++++++++++++++++++++++++++++----------------- include/linux/dmaengine.h | 6 ++++++ 3 files changed, 45 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 92438e9dacc3..5a87384ea4ff 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -804,11 +804,14 @@ dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, dma_addr_t dma_dest, dma_src; dma_cookie_t cookie; int cpu; + unsigned long flags; dma_src = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE); dma_dest = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE); - tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, - DMA_CTRL_ACK); + flags = DMA_CTRL_ACK | + DMA_COMPL_SRC_UNMAP_SINGLE | + DMA_COMPL_DEST_UNMAP_SINGLE; + tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags); if (!tx) { dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE); @@ -850,11 +853,12 @@ dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page, dma_addr_t dma_dest, dma_src; dma_cookie_t cookie; int cpu; + unsigned long flags; dma_src = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE); dma_dest = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE); - tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, - DMA_CTRL_ACK); + flags = DMA_CTRL_ACK | DMA_COMPL_SRC_UNMAP_SINGLE; + tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags); if (!tx) { dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE); @@ -898,12 +902,13 @@ dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg, dma_addr_t dma_dest, dma_src; dma_cookie_t cookie; int cpu; + unsigned long flags; dma_src = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE); dma_dest = dma_map_page(dev->dev, dest_pg, dest_off, len, DMA_FROM_DEVICE); - tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, - DMA_CTRL_ACK); + flags = DMA_CTRL_ACK; + tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags); if (!tx) { dma_unmap_page(dev->dev, dma_src, len, DMA_TO_DEVICE); diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c index e4fc33c1c32f..1955ee8d6d20 100644 --- a/drivers/dma/ioat_dma.c +++ b/drivers/dma/ioat_dma.c @@ -1063,22 +1063,31 @@ static void ioat_dma_cleanup_tasklet(unsigned long data) static void ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc) { - /* - * yes we are unmapping both _page and _single - * alloc'd regions with unmap_page. Is this - * *really* that bad? - */ - if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) - pci_unmap_page(ioat_chan->device->pdev, - pci_unmap_addr(desc, dst), - pci_unmap_len(desc, len), - PCI_DMA_FROMDEVICE); - - if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) - pci_unmap_page(ioat_chan->device->pdev, - pci_unmap_addr(desc, src), - pci_unmap_len(desc, len), - PCI_DMA_TODEVICE); + if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (desc->async_tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE) + pci_unmap_single(ioat_chan->device->pdev, + pci_unmap_addr(desc, dst), + pci_unmap_len(desc, len), + PCI_DMA_FROMDEVICE); + else + pci_unmap_page(ioat_chan->device->pdev, + pci_unmap_addr(desc, dst), + pci_unmap_len(desc, len), + PCI_DMA_FROMDEVICE); + } + + if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + if (desc->async_tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE) + pci_unmap_single(ioat_chan->device->pdev, + pci_unmap_addr(desc, src), + pci_unmap_len(desc, len), + PCI_DMA_TODEVICE); + else + pci_unmap_page(ioat_chan->device->pdev, + pci_unmap_addr(desc, src), + pci_unmap_len(desc, len), + PCI_DMA_TODEVICE); + } } /** @@ -1363,6 +1372,7 @@ static int ioat_dma_self_test(struct ioatdma_device *device) int err = 0; struct completion cmp; unsigned long tmo; + unsigned long flags; src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); if (!src) @@ -1392,8 +1402,9 @@ static int ioat_dma_self_test(struct ioatdma_device *device) DMA_TO_DEVICE); dma_dest = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); + flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE; tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src, - IOAT_TEST_SIZE, 0); + IOAT_TEST_SIZE, flags); if (!tx) { dev_err(&device->pdev->dev, "Self-test prep failed, disabling\n"); diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 2e2aa3df170c..ffefba81c818 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -78,12 +78,18 @@ enum dma_transaction_type { * dependency chains * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s) * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s) + * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single + * (if not set, do the source dma-unmapping as page) + * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single + * (if not set, do the destination dma-unmapping as page) */ enum dma_ctrl_flags { DMA_PREP_INTERRUPT = (1 << 0), DMA_CTRL_ACK = (1 << 1), DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2), DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), + DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4), + DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5), }; /** -- cgit v1.2.3-59-g8ed1b From cd17cbfda004fe5f406c01b318c6378d9895896f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 May 2009 11:32:24 +0200 Subject: Revert "mm: add /proc controls for pdflush threads" This reverts commit fafd688e4c0c34da0f3de909881117d374e4c7af. Work is progressing to switch away from pdflush as the process backing for flushing out dirty data. So it seems pointless to add more knobs to control pdflush threads. The original author of the patch did not have any specific use cases for adding the knobs, so we can easily revert this before 2.6.30 to avoid having to maintain this API forever. Signed-off-by: Jens Axboe --- Documentation/sysctl/vm.txt | 28 ---------------------------- include/linux/writeback.h | 2 -- kernel/sysctl.c | 23 ----------------------- mm/pdflush.c | 31 ++++++++++++------------------- 4 files changed, 12 insertions(+), 72 deletions(-) (limited to 'include/linux') diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index b716d33912d8..c302ddf629a0 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -39,8 +39,6 @@ Currently, these files are in /proc/sys/vm: - nr_hugepages - nr_overcommit_hugepages - nr_pdflush_threads -- nr_pdflush_threads_min -- nr_pdflush_threads_max - nr_trim_pages (only if CONFIG_MMU=n) - numa_zonelist_order - oom_dump_tasks @@ -469,32 +467,6 @@ The default value is 0. ============================================================== -nr_pdflush_threads_min - -This value controls the minimum number of pdflush threads. - -At boot time, the kernel will create and maintain 'nr_pdflush_threads_min' -threads for the kernel's lifetime. - -The default value is 2. The minimum value you can specify is 1, and -the maximum value is the current setting of 'nr_pdflush_threads_max'. - -See 'nr_pdflush_threads_max' below for more information. - -============================================================== - -nr_pdflush_threads_max - -This value controls the maximum number of pdflush threads that can be -created. The pdflush algorithm will create a new pdflush thread (up to -this maximum) if no pdflush threads have been available for >= 1 second. - -The default value is 8. The minimum value you can specify is the -current value of 'nr_pdflush_threads_min' and the -maximum is 1000. - -============================================================== - overcommit_memory: This value contains a flag that enables memory overcommitment. diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 9c1ed1fb6ddb..93445477f86a 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -168,8 +168,6 @@ void writeback_set_ratelimit(void); /* pdflush.c */ extern int nr_pdflush_threads; /* Global so it can be exported to sysctl read-only. */ -extern int nr_pdflush_threads_max; /* Global so it can be exported to sysctl */ -extern int nr_pdflush_threads_min; /* Global so it can be exported to sysctl */ #endif /* WRITEBACK_H */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ea78fa101ad6..b2970d56fb76 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -101,7 +101,6 @@ static int __maybe_unused one = 1; static int __maybe_unused two = 2; static unsigned long one_ul = 1; static int one_hundred = 100; -static int one_thousand = 1000; /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ static unsigned long dirty_bytes_min = 2 * PAGE_SIZE; @@ -1033,28 +1032,6 @@ static struct ctl_table vm_table[] = { .mode = 0444 /* read-only*/, .proc_handler = &proc_dointvec, }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "nr_pdflush_threads_min", - .data = &nr_pdflush_threads_min, - .maxlen = sizeof nr_pdflush_threads_min, - .mode = 0644 /* read-write */, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &one, - .extra2 = &nr_pdflush_threads_max, - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "nr_pdflush_threads_max", - .data = &nr_pdflush_threads_max, - .maxlen = sizeof nr_pdflush_threads_max, - .mode = 0644 /* read-write */, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &nr_pdflush_threads_min, - .extra2 = &one_thousand, - }, { .ctl_name = VM_SWAPPINESS, .procname = "swappiness", diff --git a/mm/pdflush.c b/mm/pdflush.c index f2caf96993f8..235ac440c44e 100644 --- a/mm/pdflush.c +++ b/mm/pdflush.c @@ -57,14 +57,6 @@ static DEFINE_SPINLOCK(pdflush_lock); */ int nr_pdflush_threads = 0; -/* - * The max/min number of pdflush threads. R/W by sysctl at - * /proc/sys/vm/nr_pdflush_threads_max/min - */ -int nr_pdflush_threads_max __read_mostly = MAX_PDFLUSH_THREADS; -int nr_pdflush_threads_min __read_mostly = MIN_PDFLUSH_THREADS; - - /* * The time at which the pdflush thread pool last went empty */ @@ -76,7 +68,7 @@ static unsigned long last_empty_jifs; * Thread pool management algorithm: * * - The minimum and maximum number of pdflush instances are bound - * by nr_pdflush_threads_min and nr_pdflush_threads_max. + * by MIN_PDFLUSH_THREADS and MAX_PDFLUSH_THREADS. * * - If there have been no idle pdflush instances for 1 second, create * a new one. @@ -142,13 +134,14 @@ static int __pdflush(struct pdflush_work *my_work) * To throttle creation, we reset last_empty_jifs. */ if (time_after(jiffies, last_empty_jifs + 1 * HZ)) { - if (list_empty(&pdflush_list) && - nr_pdflush_threads < nr_pdflush_threads_max) { - last_empty_jifs = jiffies; - nr_pdflush_threads++; - spin_unlock_irq(&pdflush_lock); - start_one_pdflush_thread(); - spin_lock_irq(&pdflush_lock); + if (list_empty(&pdflush_list)) { + if (nr_pdflush_threads < MAX_PDFLUSH_THREADS) { + last_empty_jifs = jiffies; + nr_pdflush_threads++; + spin_unlock_irq(&pdflush_lock); + start_one_pdflush_thread(); + spin_lock_irq(&pdflush_lock); + } } } @@ -160,7 +153,7 @@ static int __pdflush(struct pdflush_work *my_work) */ if (list_empty(&pdflush_list)) continue; - if (nr_pdflush_threads <= nr_pdflush_threads_min) + if (nr_pdflush_threads <= MIN_PDFLUSH_THREADS) continue; pdf = list_entry(pdflush_list.prev, struct pdflush_work, list); if (time_after(jiffies, pdf->when_i_went_to_sleep + 1 * HZ)) { @@ -266,9 +259,9 @@ static int __init pdflush_init(void) * Pre-set nr_pdflush_threads... If we fail to create, * the count will be decremented. */ - nr_pdflush_threads = nr_pdflush_threads_min; + nr_pdflush_threads = MIN_PDFLUSH_THREADS; - for (i = 0; i < nr_pdflush_threads_min; i++) + for (i = 0; i < MIN_PDFLUSH_THREADS; i++) start_one_pdflush_thread(); return 0; } -- cgit v1.2.3-59-g8ed1b From 4bca3286433585b5f1c3e7d8ac37a2f4b3def9ca Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 15 May 2009 00:40:35 -0400 Subject: libata: Media rotation rate and form factor heuristics This patch provides new heuristics for parsing both the form factor and media rotation rate ATA IDENFITY words. The reported ATA version must be 7 or greater and the device must return values defined as valid in the standard. Only then are the characteristics reported to SCSI via the VPD B1 page. This seems like a reasonable compromise to me considering that we have been shipping several kernel releases that key off the rotation rate bit without any version checking whatsoever. With no complaints so far. Signed-off-by: Martin K. Petersen Signed-off-by: Jeff Garzik --- drivers/ata/libata-scsi.c | 11 ++++++----- include/linux/ata.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index d1718a1f278a..342316064e9f 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2142,13 +2142,14 @@ static unsigned int ata_scsiop_inq_89(struct ata_scsi_args *args, u8 *rbuf) static unsigned int ata_scsiop_inq_b1(struct ata_scsi_args *args, u8 *rbuf) { + int form_factor = ata_id_form_factor(args->id); + int media_rotation_rate = ata_id_rotation_rate(args->id); + rbuf[1] = 0xb1; rbuf[3] = 0x3c; - if (ata_id_major_version(args->id) > 7) { - rbuf[4] = args->id[217] >> 8; - rbuf[5] = args->id[217]; - rbuf[7] = args->id[168] & 0xf; - } + rbuf[4] = media_rotation_rate >> 8; + rbuf[5] = media_rotation_rate; + rbuf[7] = form_factor; return 0; } diff --git a/include/linux/ata.h b/include/linux/ata.h index cb79b7a208e1..915da43edee1 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -730,6 +730,34 @@ static inline int ata_id_has_unload(const u16 *id) return 0; } +static inline int ata_id_form_factor(const u16 *id) +{ + u16 val = id[168]; + + if (ata_id_major_version(id) < 7 || val == 0 || val == 0xffff) + return 0; + + val &= 0xf; + + if (val > 5) + return 0; + + return val; +} + +static inline int ata_id_rotation_rate(const u16 *id) +{ + u16 val = id[217]; + + if (ata_id_major_version(id) < 7 || val == 0 || val == 0xffff) + return 0; + + if (val > 1 && val < 0x401) + return 0; + + return val; +} + static inline int ata_id_has_trim(const u16 *id) { if (ata_id_major_version(id) >= 7 && -- cgit v1.2.3-59-g8ed1b From b83674c0da6558e357c6b482ccf299eeea77d8ef Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Sun, 17 May 2009 01:02:03 -0400 Subject: reiserfs: fixup perms when xattrs are disabled This adds CONFIG_REISERFS_FS_XATTR protection from reiserfs_permission. This is needed to avoid warnings during file deletions and chowns with xattrs disabled. Signed-off-by: Jeff Mahoney Signed-off-by: Linus Torvalds --- fs/reiserfs/xattr.c | 36 +++++++++++++++++++----------------- include/linux/reiserfs_xattr.h | 4 +--- 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 628075ca82c1..8e7deb0e6964 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -871,23 +871,6 @@ static int reiserfs_check_acl(struct inode *inode, int mask) return error; } -int reiserfs_permission(struct inode *inode, int mask) -{ - /* - * We don't do permission checks on the internal objects. - * Permissions are determined by the "owning" object. - */ - if (IS_PRIVATE(inode)) - return 0; - /* - * Stat data v1 doesn't support ACLs. - */ - if (get_inode_sd_version(inode) == STAT_DATA_V1) - return generic_permission(inode, mask, NULL); - else - return generic_permission(inode, mask, reiserfs_check_acl); -} - static int create_privroot(struct dentry *dentry) { int err; @@ -951,6 +934,25 @@ static int xattr_mount_check(struct super_block *s) return 0; } +int reiserfs_permission(struct inode *inode, int mask) +{ + /* + * We don't do permission checks on the internal objects. + * Permissions are determined by the "owning" object. + */ + if (IS_PRIVATE(inode)) + return 0; + +#ifdef CONFIG_REISERFS_FS_XATTR + /* + * Stat data v1 doesn't support ACLs. + */ + if (get_inode_sd_version(inode) != STAT_DATA_V1) + return generic_permission(inode, mask, reiserfs_check_acl); +#endif + return generic_permission(inode, mask, NULL); +} + /* This will catch lookups from the fs root to .reiserfs_priv */ static int xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name) diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h index cdedc01036e4..99928dce37ea 100644 --- a/include/linux/reiserfs_xattr.h +++ b/include/linux/reiserfs_xattr.h @@ -41,6 +41,7 @@ int reiserfs_xattr_init(struct super_block *sb, int mount_flags); int reiserfs_lookup_privroot(struct super_block *sb); int reiserfs_delete_xattrs(struct inode *inode); int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs); +int reiserfs_permission(struct inode *inode, int mask); #ifdef CONFIG_REISERFS_FS_XATTR #define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir) @@ -50,7 +51,6 @@ int reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); ssize_t reiserfs_listxattr(struct dentry *dentry, char *buffer, size_t size); int reiserfs_removexattr(struct dentry *dentry, const char *name); -int reiserfs_permission(struct inode *inode, int mask); int reiserfs_xattr_get(struct inode *, const char *, void *, size_t); int reiserfs_xattr_set(struct inode *, const char *, const void *, size_t, int); @@ -117,8 +117,6 @@ static inline void reiserfs_init_xattr_rwsem(struct inode *inode) #define reiserfs_listxattr NULL #define reiserfs_removexattr NULL -#define reiserfs_permission NULL - static inline void reiserfs_init_xattr_rwsem(struct inode *inode) { } -- cgit v1.2.3-59-g8ed1b From eb33575cf67d3f35fa2510210ef92631266e2465 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 13 May 2009 17:34:48 +0100 Subject: [ARM] Double check memmap is actually valid with a memmap has unexpected holes V2 pfn_valid() is meant to be able to tell if a given PFN has valid memmap associated with it or not. In FLATMEM, it is expected that holes always have valid memmap as long as there is valid PFNs either side of the hole. In SPARSEMEM, it is assumed that a valid section has a memmap for the entire section. However, ARM and maybe other embedded architectures in the future free memmap backing holes to save memory on the assumption the memmap is never used. The page_zone linkages are then broken even though pfn_valid() returns true. A walker of the full memmap must then do this additional check to ensure the memmap they are looking at is sane by making sure the zone and PFN linkages are still valid. This is expensive, but walkers of the full memmap are extremely rare. This was caught before for FLATMEM and hacked around but it hits again for SPARSEMEM because the page_zone linkages can look ok where the PFN linkages are totally screwed. This looks like a hatchet job but the reality is that any clean solution would end up consumning all the memory saved by punching these unexpected holes in the memmap. For example, we tried marking the memmap within the section invalid but the section size exceeds the size of the hole in most cases so pfn_valid() starts returning false where valid memmap exists. Shrinking the size of the section would increase memory consumption offsetting the gains. This patch identifies when an architecture is punching unexpected holes in the memmap that the memory model cannot automatically detect and sets ARCH_HAS_HOLES_MEMORYMODEL. At the moment, this is restricted to EP93xx which is the model sub-architecture this has been reported on but may expand later. When set, walkers of the full memmap must call memmap_valid_within() for each PFN and passing in what it expects the page and zone to be for that PFN. If it finds the linkages to be broken, it assumes the memmap is invalid for that PFN. Signed-off-by: Mel Gorman Signed-off-by: Russell King --- arch/arm/Kconfig | 6 +++--- include/linux/mmzone.h | 26 ++++++++++++++++++++++++++ mm/mmzone.c | 15 +++++++++++++++ mm/vmstat.c | 19 ++++--------------- 4 files changed, 48 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index e60ec54df334..9d02cdb15b23 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -273,6 +273,7 @@ config ARCH_EP93XX select HAVE_CLK select COMMON_CLKDEV select ARCH_REQUIRE_GPIOLIB + select ARCH_HAS_HOLES_MEMORYMODEL help This enables support for the Cirrus EP93xx series of CPUs. @@ -976,10 +977,9 @@ config OABI_COMPAT UNPREDICTABLE (in fact it can be predicted that it won't work at all). If in doubt say Y. -config ARCH_FLATMEM_HAS_HOLES +config ARCH_HAS_HOLES_MEMORYMODEL bool - default y - depends on FLATMEM + default n # Discontigmem is deprecated config ARCH_DISCONTIGMEM_ENABLE diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 186ec6ab334d..a47c879e1304 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1097,6 +1097,32 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); #define pfn_valid_within(pfn) (1) #endif +#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL +/* + * pfn_valid() is meant to be able to tell if a given PFN has valid memmap + * associated with it or not. In FLATMEM, it is expected that holes always + * have valid memmap as long as there is valid PFNs either side of the hole. + * In SPARSEMEM, it is assumed that a valid section has a memmap for the + * entire section. + * + * However, an ARM, and maybe other embedded architectures in the future + * free memmap backing holes to save memory on the assumption the memmap is + * never used. The page_zone linkages are then broken even though pfn_valid() + * returns true. A walker of the full memmap must then do this additional + * check to ensure the memmap they are looking at is sane by making sure + * the zone and PFN linkages are still valid. This is expensive, but walkers + * of the full memmap are extremely rare. + */ +int memmap_valid_within(unsigned long pfn, + struct page *page, struct zone *zone); +#else +static inline int memmap_valid_within(unsigned long pfn, + struct page *page, struct zone *zone) +{ + return 1; +} +#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ + #endif /* !__GENERATING_BOUNDS.H */ #endif /* !__ASSEMBLY__ */ #endif /* _LINUX_MMZONE_H */ diff --git a/mm/mmzone.c b/mm/mmzone.c index 16ce8b955dcf..f5b7d1760213 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -6,6 +6,7 @@ #include +#include #include #include @@ -72,3 +73,17 @@ struct zoneref *next_zones_zonelist(struct zoneref *z, *zone = zonelist_zone(z); return z; } + +#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL +int memmap_valid_within(unsigned long pfn, + struct page *page, struct zone *zone) +{ + if (page_to_pfn(page) != pfn) + return 0; + + if (page_zone(page) != zone) + return 0; + + return 1; +} +#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ diff --git a/mm/vmstat.c b/mm/vmstat.c index 66f6130976cb..74d66dba0cbe 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -509,22 +509,11 @@ static void pagetypeinfo_showblockcount_print(struct seq_file *m, continue; page = pfn_to_page(pfn); -#ifdef CONFIG_ARCH_FLATMEM_HAS_HOLES - /* - * Ordinarily, memory holes in flatmem still have a valid - * memmap for the PFN range. However, an architecture for - * embedded systems (e.g. ARM) can free up the memmap backing - * holes to save memory on the assumption the memmap is - * never used. The page_zone linkages are then broken even - * though pfn_valid() returns true. Skip the page if the - * linkages are broken. Even if this test passed, the impact - * is that the counters for the movable type are off but - * fragmentation monitoring is likely meaningless on small - * systems. - */ - if (page_zone(page) != zone) + + /* Watch for unexpected holes punched in the memmap */ + if (!memmap_valid_within(pfn, page, zone)) continue; -#endif + mtype = get_pageblock_migratetype(page); if (mtype < MIGRATE_TYPES) -- cgit v1.2.3-59-g8ed1b From 03fbdb15c14e9746c63168e3ff2c64b9c8336d33 Mon Sep 17 00:00:00 2001 From: Alessandro Rubini Date: Wed, 20 May 2009 22:39:08 +0100 Subject: [ARM] 5519/1: amba probe: pass "struct amba_id *" instead of void * The second argument of the probe method points to the amba_id structure, so it's better passed with the correct type. None of the current in-tree drivers uses the pointer, so they have only been checked for a clean compile. Change suggested by Russell King. Signed-off-by: Alessandro Rubini Signed-off-by: Russell King --- drivers/input/serio/ambakmi.c | 2 +- drivers/mmc/host/mmci.c | 2 +- drivers/rtc/rtc-pl030.c | 2 +- drivers/rtc/rtc-pl031.c | 2 +- drivers/serial/amba-pl010.c | 2 +- drivers/serial/amba-pl011.c | 2 +- drivers/video/amba-clcd.c | 2 +- include/linux/amba/bus.h | 2 +- sound/arm/aaci.c | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/serio/ambakmi.c b/drivers/input/serio/ambakmi.c index e29cdc13a199..a28c06d686e1 100644 --- a/drivers/input/serio/ambakmi.c +++ b/drivers/input/serio/ambakmi.c @@ -107,7 +107,7 @@ static void amba_kmi_close(struct serio *io) clk_disable(kmi->clk); } -static int amba_kmi_probe(struct amba_device *dev, void *id) +static int amba_kmi_probe(struct amba_device *dev, struct amba_id *id) { struct amba_kmi_port *kmi; struct serio *io; diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index 36875dcfa492..7d4febdab286 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -490,7 +490,7 @@ static void mmci_check_status(unsigned long data) mod_timer(&host->timer, jiffies + HZ); } -static int __devinit mmci_probe(struct amba_device *dev, void *id) +static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id) { struct mmc_platform_data *plat = dev->dev.platform_data; struct mmci_host *host; diff --git a/drivers/rtc/rtc-pl030.c b/drivers/rtc/rtc-pl030.c index 826153552157..aaf1f75fa293 100644 --- a/drivers/rtc/rtc-pl030.c +++ b/drivers/rtc/rtc-pl030.c @@ -102,7 +102,7 @@ static const struct rtc_class_ops pl030_ops = { .set_alarm = pl030_set_alarm, }; -static int pl030_probe(struct amba_device *dev, void *id) +static int pl030_probe(struct amba_device *dev, struct amba_id *id) { struct pl030_rtc *rtc; int ret; diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c index 333eec689d2f..451fc13784d1 100644 --- a/drivers/rtc/rtc-pl031.c +++ b/drivers/rtc/rtc-pl031.c @@ -127,7 +127,7 @@ static int pl031_remove(struct amba_device *adev) return 0; } -static int pl031_probe(struct amba_device *adev, void *id) +static int pl031_probe(struct amba_device *adev, struct amba_id *id) { int ret; struct pl031_local *ldata; diff --git a/drivers/serial/amba-pl010.c b/drivers/serial/amba-pl010.c index e3a5ad5ef1d6..cdc049d4350f 100644 --- a/drivers/serial/amba-pl010.c +++ b/drivers/serial/amba-pl010.c @@ -665,7 +665,7 @@ static struct uart_driver amba_reg = { .cons = AMBA_CONSOLE, }; -static int pl010_probe(struct amba_device *dev, void *id) +static int pl010_probe(struct amba_device *dev, struct amba_id *id) { struct uart_amba_port *uap; void __iomem *base; diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c index 8b2b9700f3e4..88fdac51b6c5 100644 --- a/drivers/serial/amba-pl011.c +++ b/drivers/serial/amba-pl011.c @@ -729,7 +729,7 @@ static struct uart_driver amba_reg = { .cons = AMBA_CONSOLE, }; -static int pl011_probe(struct amba_device *dev, void *id) +static int pl011_probe(struct amba_device *dev, struct amba_id *id) { struct uart_amba_port *uap; void __iomem *base; diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c index 61050ab14128..d1f80bac54f0 100644 --- a/drivers/video/amba-clcd.c +++ b/drivers/video/amba-clcd.c @@ -437,7 +437,7 @@ static int clcdfb_register(struct clcd_fb *fb) return ret; } -static int clcdfb_probe(struct amba_device *dev, void *id) +static int clcdfb_probe(struct amba_device *dev, struct amba_id *id) { struct clcd_board *board = dev->dev.platform_data; struct clcd_fb *fb; diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 51e6e54b2aa1..9b93cafa82a0 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -28,7 +28,7 @@ struct amba_id { struct amba_driver { struct device_driver drv; - int (*probe)(struct amba_device *, void *); + int (*probe)(struct amba_device *, struct amba_id *); int (*remove)(struct amba_device *); void (*shutdown)(struct amba_device *); int (*suspend)(struct amba_device *, pm_message_t); diff --git a/sound/arm/aaci.c b/sound/arm/aaci.c index 7fbd68fab944..5c48e36038f2 100644 --- a/sound/arm/aaci.c +++ b/sound/arm/aaci.c @@ -1074,7 +1074,7 @@ static unsigned int __devinit aaci_size_fifo(struct aaci *aaci) return i; } -static int __devinit aaci_probe(struct amba_device *dev, void *id) +static int __devinit aaci_probe(struct amba_device *dev, struct amba_id *id) { struct aaci *aaci; int ret, i; -- cgit v1.2.3-59-g8ed1b From 28ee9bc5cc42776e0364399b401a64906ac1ac8e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 22 May 2009 16:23:38 +0200 Subject: ide: report timeouts in ide_busy_sleep() * change 'hwif' argument to 'drive' * report an error on timeout Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-probe.c | 9 ++++++--- include/linux/ide.h | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 7f264ed1141b..c895ed52b2e8 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -295,7 +295,7 @@ int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id) timeout = ((cmd == ATA_CMD_ID_ATA) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2; - if (ide_busy_sleep(hwif, timeout, use_altstatus)) + if (ide_busy_sleep(drive, timeout, use_altstatus)) return 1; /* wait for IRQ and ATA_DRQ */ @@ -316,8 +316,9 @@ int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id) return rc; } -int ide_busy_sleep(ide_hwif_t *hwif, unsigned long timeout, int altstatus) +int ide_busy_sleep(ide_drive_t *drive, unsigned long timeout, int altstatus) { + ide_hwif_t *hwif = drive->hwif; u8 stat; timeout += jiffies; @@ -330,6 +331,8 @@ int ide_busy_sleep(ide_hwif_t *hwif, unsigned long timeout, int altstatus) return 0; } while (time_before(jiffies, timeout)); + printk(KERN_ERR "%s: timeout in %s\n", drive->name, __func__); + return 1; /* drive timed-out */ } @@ -420,7 +423,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd) tp_ops->dev_select(drive); msleep(50); tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET); - (void)ide_busy_sleep(hwif, WAIT_WORSTCASE, 0); + (void)ide_busy_sleep(drive, WAIT_WORSTCASE, 0); rc = ide_dev_read_id(drive, cmd, id); } diff --git a/include/linux/ide.h b/include/linux/ide.h index ff65fffb078f..9fed365a598b 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1109,7 +1109,7 @@ void ide_fix_driveid(u16 *); extern void ide_fixstring(u8 *, const int, const int); -int ide_busy_sleep(ide_hwif_t *, unsigned long, int); +int ide_busy_sleep(ide_drive_t *, unsigned long, int); int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long); -- cgit v1.2.3-59-g8ed1b From 5993856e53fbc4b4f28e2d481deaebeb715b1267 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Fri, 22 May 2009 16:23:39 +0200 Subject: via82cxxx: Add VIA VX855 PCI Device ID This patch adds the PCI Device ID 0xc409 to the PCI ID table of via82cxxx.c, as well as the 0x8409 south bridge ID. This is required to make the IDE driver work on the VX855/VX875 integrated chipset. Signed-off-by: Harald Welte Cc: Joseph Chan Cc: Bruce Chang Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/via82cxxx.c | 2 ++ include/linux/pci_ids.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/ide/via82cxxx.c b/drivers/ide/via82cxxx.c index 3ff7231e4858..028de26a25fe 100644 --- a/drivers/ide/via82cxxx.c +++ b/drivers/ide/via82cxxx.c @@ -67,6 +67,7 @@ static struct via_isa_bridge { u8 udma_mask; u8 flags; } via_isa_bridges[] = { + { "vx855", PCI_DEVICE_ID_VIA_VX855, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST }, { "vx800", PCI_DEVICE_ID_VIA_VX800, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST }, { "cx700", PCI_DEVICE_ID_VIA_CX700, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST }, { "vt8237s", PCI_DEVICE_ID_VIA_8237S, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST }, @@ -474,6 +475,7 @@ static const struct pci_device_id via_pci_tbl[] = { { PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_82C576_1), 0 }, { PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_82C586_1), 0 }, { PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_CX700_IDE), 0 }, + { PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_VX855_IDE), 0 }, { PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_6410), 1 }, { PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_SATA_EIDE), 1 }, { 0, }, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 06ba90c211a5..0f71812d67d3 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1406,7 +1406,7 @@ #define PCI_DEVICE_ID_VIA_82C598_1 0x8598 #define PCI_DEVICE_ID_VIA_838X_1 0xB188 #define PCI_DEVICE_ID_VIA_83_87XX_1 0xB198 -#define PCI_DEVICE_ID_VIA_C409_IDE 0XC409 +#define PCI_DEVICE_ID_VIA_VX855_IDE 0xC409 #define PCI_DEVICE_ID_VIA_ANON 0xFFFF #define PCI_VENDOR_ID_SIEMENS 0x110A -- cgit v1.2.3-59-g8ed1b From df391e0eda1e678add56a8e34226edf05d89af6a Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Sat, 23 May 2009 09:51:20 -0700 Subject: Input: multitouch - add tracking ID to the protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are a few multi-touch devices that support finger tracking well in hardware, Stantum being the prime example. By exposing the tracking ID in the MT protocol, evdev bandwidth and cpu usage in user space can be reduced. This patch adds the ABS_MT_TRACKING_ID to the MT protocol. Signed-off-by: Henrik Rydberg Tested-by: Stéphane Chatty Signed-off-by: Dmitry Torokhov --- drivers/input/input.c | 1 + include/linux/input.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/drivers/input/input.c b/drivers/input/input.c index e54e002665b0..5d445f48789b 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -42,6 +42,7 @@ static unsigned int input_abs_bypass_init_data[] __initdata = { ABS_MT_POSITION_Y, ABS_MT_TOOL_TYPE, ABS_MT_BLOB_ID, + ABS_MT_TRACKING_ID, 0 }; static unsigned long input_abs_bypass[BITS_TO_LONGS(ABS_CNT)]; diff --git a/include/linux/input.h b/include/linux/input.h index 0e6ff5de3588..6fed4f6a9c9e 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -656,6 +656,7 @@ struct input_absinfo { #define ABS_MT_POSITION_Y 0x36 /* Center Y ellipse position */ #define ABS_MT_TOOL_TYPE 0x37 /* Type of touching device */ #define ABS_MT_BLOB_ID 0x38 /* Group a set of packets as a blob */ +#define ABS_MT_TRACKING_ID 0x39 /* Unique ID of initiated contact */ #define ABS_MAX 0x3f #define ABS_CNT (ABS_MAX+1) -- cgit v1.2.3-59-g8ed1b From bfcaa50270e18f35220a11d46e98fc6232c24606 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 25 May 2009 17:23:15 +0200 Subject: netfilter: nf_ct_tcp: fix accepting invalid RST segments Robert L Mathews discovered that some clients send evil TCP RST segments, which are accepted by netfilter conntrack but discarded by the destination. Thus the conntrack entry is destroyed but the destination retransmits data until timeout. The same technique, i.e. sending properly crafted RST segments, can easily be used to bypass connlimit/connbytes based restrictions (the sample script written by Robert can be found in the netfilter mailing list archives). The patch below adds a new flag and new field to struct ip_ct_tcp_state so that checking RST segments can be made more strict and thus TCP conntrack can catch the invalid ones: the RST segment is accepted only if its sequence number higher than or equal to the highest ack we seen from the other direction. (The last_ack field cannot be reused because it is used to catch resent packets.) Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_tcp.h | 4 ++++ net/netfilter/nf_conntrack_proto_tcp.c | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h index 3066789b972a..b2f384d42611 100644 --- a/include/linux/netfilter/nf_conntrack_tcp.h +++ b/include/linux/netfilter/nf_conntrack_tcp.h @@ -35,6 +35,9 @@ enum tcp_conntrack { /* Has unacknowledged data */ #define IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED 0x10 +/* The field td_maxack has been set */ +#define IP_CT_TCP_FLAG_MAXACK_SET 0x20 + struct nf_ct_tcp_flags { __u8 flags; __u8 mask; @@ -46,6 +49,7 @@ struct ip_ct_tcp_state { u_int32_t td_end; /* max of seq + len */ u_int32_t td_maxend; /* max of ack + max(win, 1) */ u_int32_t td_maxwin; /* max(win) */ + u_int32_t td_maxack; /* max of ack */ u_int8_t td_scale; /* window scale factor */ u_int8_t flags; /* per direction options */ }; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index b5ccf2b4b2e7..97a6e93d742e 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -634,6 +634,14 @@ static bool tcp_in_window(const struct nf_conn *ct, sender->td_end = end; sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; } + if (tcph->ack) { + if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) { + sender->td_maxack = ack; + sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET; + } else if (after(ack, sender->td_maxack)) + sender->td_maxack = ack; + } + /* * Update receiver data. */ @@ -918,6 +926,16 @@ static int tcp_packet(struct nf_conn *ct, "nf_ct_tcp: invalid state "); return -NF_ACCEPT; case TCP_CONNTRACK_CLOSE: + if (index == TCP_RST_SET + && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) + && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) { + /* Invalid RST */ + write_unlock_bh(&tcp_lock); + if (LOG_INVALID(net, IPPROTO_TCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_tcp: invalid RST "); + return -NF_ACCEPT; + } if (index == TCP_RST_SET && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_SYN_SET) -- cgit v1.2.3-59-g8ed1b From 2f102607ac77354b02a76cf2748598ce9f270f08 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 27 May 2009 23:59:58 -0400 Subject: i7300_idle: allow testing on i5000-series hardware w/o re-compile Testing the i7300_idle driver on i5000-series hardware required an edit to i7300_idle.h to "#define SUPPORT_I5000 1" and a re-build of both i7300_idle and ioat_dma. Replace that build-time scheme with a load-time module parameter: "7300_idle.forceload=1" to make it easier to test the driver on hardware that while not officially validated, works fine and is much more commonly available. By default (no modparam) the driver will continue to load only on the i7300. Note that ioat_dma runs a copy of i7300_idle's probe routine to know to reserve an IOAT channel for i7300_idle. This change makes ioat_dma do that always on the i5000, just like it does on the i7300. Signed-off-by: Len Brown Acked-by: Andrew Henroid --- drivers/dma/ioat_dma.c | 2 +- drivers/idle/i7300_idle.c | 6 +++++- include/linux/i7300_idle.h | 20 ++++++++++---------- 3 files changed, 16 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c index 1955ee8d6d20..a600fc0f7962 100644 --- a/drivers/dma/ioat_dma.c +++ b/drivers/dma/ioat_dma.c @@ -173,7 +173,7 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); #ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL - if (i7300_idle_platform_probe(NULL, NULL) == 0) { + if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) { device->common.chancnt--; } #endif diff --git a/drivers/idle/i7300_idle.c b/drivers/idle/i7300_idle.c index bf740394d704..949c97ff57e3 100644 --- a/drivers/idle/i7300_idle.c +++ b/drivers/idle/i7300_idle.c @@ -41,6 +41,10 @@ static int debug; module_param_named(debug, debug, uint, 0644); MODULE_PARM_DESC(debug, "Enable debug printks in this driver"); +static int forceload; +module_param_named(forceload, forceload, uint, 0644); +MODULE_PARM_DESC(debug, "Enable driver testing on unvalidated i5000"); + #define dprintk(fmt, arg...) \ do { if (debug) printk(KERN_INFO I7300_PRINT fmt, ##arg); } while (0) @@ -552,7 +556,7 @@ static int __init i7300_idle_init(void) cpus_clear(idle_cpumask); total_us = 0; - if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev)) + if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev, forceload)) return -ENODEV; if (i7300_idle_thrt_save()) diff --git a/include/linux/i7300_idle.h b/include/linux/i7300_idle.h index 05a80c44513c..1587b7dec505 100644 --- a/include/linux/i7300_idle.h +++ b/include/linux/i7300_idle.h @@ -16,35 +16,33 @@ struct fbd_ioat { unsigned int vendor; unsigned int ioat_dev; + unsigned int enabled; }; /* * The i5000 chip-set has the same hooks as the i7300 - * but support is disabled by default because this driver - * has not been validated on that platform. + * but it is not enabled by default and must be manually + * manually enabled with "forceload=1" because it is + * only lightly validated. */ -#define SUPPORT_I5000 0 static const struct fbd_ioat fbd_ioat_list[] = { - {PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB}, -#if SUPPORT_I5000 - {PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT}, -#endif + {PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB, 1}, + {PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT, 0}, {0, 0} }; /* table of devices that work with this driver */ static const struct pci_device_id pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_FBD_CNB) }, -#if SUPPORT_I5000 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5000_ERR) }, -#endif { } /* Terminating entry */ }; /* Check for known platforms with I/O-AT */ static inline int i7300_idle_platform_probe(struct pci_dev **fbd_dev, - struct pci_dev **ioat_dev) + struct pci_dev **ioat_dev, + int enable_all) { int i; struct pci_dev *memdev, *dmadev; @@ -69,6 +67,8 @@ static inline int i7300_idle_platform_probe(struct pci_dev **fbd_dev, for (i = 0; fbd_ioat_list[i].vendor != 0; i++) { if (dmadev->vendor == fbd_ioat_list[i].vendor && dmadev->device == fbd_ioat_list[i].ioat_dev) { + if (!(fbd_ioat_list[i].enabled || enable_all)) + continue; if (fbd_dev) *fbd_dev = memdev; if (ioat_dev) -- cgit v1.2.3-59-g8ed1b From b2e1feaf0af6b8a826b86748a19ddc2013ab7dbd Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 28 May 2009 14:34:20 -0700 Subject: cred: #include init.h in cred.h linux/cred.h can't be included as first header (alphabetical order) because it uses __init which is enough to break compilation on some archs. Signed-off-by: Alexey Dobriyan Acked-by: James Morris Acked-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cred.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index 3282ee4318e7..4fa999696310 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -13,6 +13,7 @@ #define _LINUX_CRED_H #include +#include #include #include -- cgit v1.2.3-59-g8ed1b From e767e0561d7fd2333df1921f1ab4176211f9036b Mon Sep 17 00:00:00 2001 From: Daisuke Nishimura Date: Thu, 28 May 2009 14:34:28 -0700 Subject: memcg: fix deadlock between lock_page_cgroup and mapping tree_lock mapping->tree_lock can be acquired from interrupt context. Then, following dead lock can occur. Assume "A" as a page. CPU0: lock_page_cgroup(A) interrupted -> take mapping->tree_lock. CPU1: take mapping->tree_lock -> lock_page_cgroup(A) This patch tries to fix above deadlock by moving memcg's hook to out of mapping->tree_lock. charge/uncharge of pagecache/swapcache is protected by page lock, not tree_lock. After this patch, lock_page_cgroup() is not called under mapping->tree_lock. Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Daisuke Nishimura Cc: Balbir Singh Cc: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 5 +++++ mm/filemap.c | 6 +++--- mm/memcontrol.c | 4 +++- mm/swap_state.c | 4 +--- mm/truncate.c | 1 + mm/vmscan.c | 2 ++ 6 files changed, 15 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 62d81435347a..d476aad3ff57 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -437,6 +437,11 @@ static inline int mem_cgroup_cache_charge_swapin(struct page *page, return 0; } +static inline void +mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) +{ +} + #endif /* CONFIG_SWAP */ #endif /* __KERNEL__*/ #endif /* _LINUX_SWAP_H */ diff --git a/mm/filemap.c b/mm/filemap.c index 379ff0bcbf6e..1b60f30cebfa 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -121,7 +121,6 @@ void __remove_from_page_cache(struct page *page) mapping->nrpages--; __dec_zone_page_state(page, NR_FILE_PAGES); BUG_ON(page_mapped(page)); - mem_cgroup_uncharge_cache_page(page); /* * Some filesystems seem to re-dirty the page even after @@ -145,6 +144,7 @@ void remove_from_page_cache(struct page *page) spin_lock_irq(&mapping->tree_lock); __remove_from_page_cache(page); spin_unlock_irq(&mapping->tree_lock); + mem_cgroup_uncharge_cache_page(page); } static int sync_page(void *word) @@ -476,13 +476,13 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, if (likely(!error)) { mapping->nrpages++; __inc_zone_page_state(page, NR_FILE_PAGES); + spin_unlock_irq(&mapping->tree_lock); } else { page->mapping = NULL; + spin_unlock_irq(&mapping->tree_lock); mem_cgroup_uncharge_cache_page(page); page_cache_release(page); } - - spin_unlock_irq(&mapping->tree_lock); radix_tree_preload_end(); } else mem_cgroup_uncharge_cache_page(page); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 01c2d8f14685..4a747a27a22f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1488,8 +1488,9 @@ void mem_cgroup_uncharge_cache_page(struct page *page) __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); } +#ifdef CONFIG_SWAP /* - * called from __delete_from_swap_cache() and drop "page" account. + * called after __delete_from_swap_cache() and drop "page" account. * memcg information is recorded to swap_cgroup of "ent" */ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) @@ -1506,6 +1507,7 @@ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) if (memcg) css_put(&memcg->css); } +#endif #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP /* diff --git a/mm/swap_state.c b/mm/swap_state.c index 3ecea98ecb45..1416e7e9e02d 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -109,8 +109,6 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) */ void __delete_from_swap_cache(struct page *page) { - swp_entry_t ent = {.val = page_private(page)}; - VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(!PageSwapCache(page)); VM_BUG_ON(PageWriteback(page)); @@ -121,7 +119,6 @@ void __delete_from_swap_cache(struct page *page) total_swapcache_pages--; __dec_zone_page_state(page, NR_FILE_PAGES); INC_CACHE_INFO(del_total); - mem_cgroup_uncharge_swapcache(page, ent); } /** @@ -191,6 +188,7 @@ void delete_from_swap_cache(struct page *page) __delete_from_swap_cache(page); spin_unlock_irq(&swapper_space.tree_lock); + mem_cgroup_uncharge_swapcache(page, entry); swap_free(entry); page_cache_release(page); } diff --git a/mm/truncate.c b/mm/truncate.c index 55206fab7b99..12e1579f9165 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -359,6 +359,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) BUG_ON(page_has_private(page)); __remove_from_page_cache(page); spin_unlock_irq(&mapping->tree_lock); + mem_cgroup_uncharge_cache_page(page); page_cache_release(page); /* pagecache ref */ return 1; failed: diff --git a/mm/vmscan.c b/mm/vmscan.c index 5fa3eda1f03f..d254306562cd 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -470,10 +470,12 @@ static int __remove_mapping(struct address_space *mapping, struct page *page) swp_entry_t swap = { .val = page_private(page) }; __delete_from_swap_cache(page); spin_unlock_irq(&mapping->tree_lock); + mem_cgroup_uncharge_swapcache(page, swap); swap_free(swap); } else { __remove_from_page_cache(page); spin_unlock_irq(&mapping->tree_lock); + mem_cgroup_uncharge_cache_page(page); } return 1; -- cgit v1.2.3-59-g8ed1b From 52bb25a620e1925bb53d41d0ed28571b3de98a31 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 1 Jun 2009 06:21:13 +0000 Subject: headers_check fix: linux/auto_fs.h fix the following 'make headers_check' warnings: usr/include/linux/auto_fs.h:17: include of is preferred over Signed-off-by: Jaswinder Singh Rajput --- include/linux/auto_fs.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/auto_fs.h b/include/linux/auto_fs.h index 63265852b7d1..7b09c8348fd3 100644 --- a/include/linux/auto_fs.h +++ b/include/linux/auto_fs.h @@ -14,13 +14,12 @@ #ifndef _LINUX_AUTO_FS_H #define _LINUX_AUTO_FS_H +#include #ifdef __KERNEL__ #include #include -#include #include #else -#include #include #endif /* __KERNEL__ */ -- cgit v1.2.3-59-g8ed1b From d280cc989ad591607e812cd5c5dfde702b5f191a Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 1 Jun 2009 06:23:25 +0000 Subject: headers_check fix: linux/net_dropmon.h fix the following 'make headers_check' warnings: usr/include/linux/net_dropmon.h:7: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput --- include/linux/net_dropmon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/net_dropmon.h b/include/linux/net_dropmon.h index 0217fb81a630..0e2e100c44a2 100644 --- a/include/linux/net_dropmon.h +++ b/include/linux/net_dropmon.h @@ -1,6 +1,7 @@ #ifndef __NET_DROPMON_H #define __NET_DROPMON_H +#include #include struct net_dm_drop_point { -- cgit v1.2.3-59-g8ed1b From 05ad709d04799125ed85dd816fdb558258102172 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 2 Jun 2009 16:58:10 +0100 Subject: parport: quickfix the proc registration bug Ideally we should have a directory of drivers and a link to the 'active' driver. For now just show the first device which is effectively the existing semantics without a warning. This is an update on the original buggy patch that I then forgot to resubmit. Confusingly it was proposed by Red Hat, written by Etched Pixels fixed and submitted by Intel ... Resolves-Bug: http://bugzilla.kernel.org/show_bug.cgi?id=9749 Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/parport/share.c | 13 ++++++++++--- include/linux/parport.h | 4 ++++ 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/parport/share.c b/drivers/parport/share.c index 0ebca450ed29..dffa5d4fb298 100644 --- a/drivers/parport/share.c +++ b/drivers/parport/share.c @@ -614,7 +614,10 @@ parport_register_device(struct parport *port, const char *name, * pardevice fields. -arca */ port->ops->init_state(tmp, tmp->state); - parport_device_proc_register(tmp); + if (!test_and_set_bit(PARPORT_DEVPROC_REGISTERED, &port->devflags)) { + port->proc_device = tmp; + parport_device_proc_register(tmp); + } return tmp; out_free_all: @@ -646,10 +649,14 @@ void parport_unregister_device(struct pardevice *dev) } #endif - parport_device_proc_unregister(dev); - port = dev->port->physport; + if (port->proc_device == dev) { + port->proc_device = NULL; + clear_bit(PARPORT_DEVPROC_REGISTERED, &port->devflags); + parport_device_proc_unregister(dev); + } + if (port->cad == dev) { printk(KERN_DEBUG "%s: %s forgot to release port\n", port->name, dev->name); diff --git a/include/linux/parport.h b/include/linux/parport.h index e1f83c5065c5..38a423ed3c01 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -324,6 +324,10 @@ struct parport { int spintime; atomic_t ref_count; + unsigned long devflags; +#define PARPORT_DEVPROC_REGISTERED 0 + struct pardevice *proc_device; /* Currently register proc device */ + struct list_head full_list; struct parport *slaves[3]; }; -- cgit v1.2.3-59-g8ed1b From 087eb437051b3de817720f9c80c440fc9e7dcce8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 4 Jun 2009 16:29:07 -0700 Subject: ptrace: tracehook_report_clone: fix false positives The "trace || CLONE_PTRACE" check in tracehook_report_clone() is not right, - If the untraced task does clone(CLONE_PTRACE) the new child is not traced, we must not queue SIGSTOP. - If we forked the traced task, but the tracer exits and untraces both the forking task and the new child (after copy_process() drops tasklist_lock), we should not queue SIGSTOP too. Change the code to check task_ptrace() != 0 instead. This is still racy, but the race is harmless. We can race with another tracer attaching to this child, or the tracer can exit and detach in parallel. But giwen that we didn't do wake_up_new_task() yet, the child must have the pending SIGSTOP anyway. Signed-off-by: Oleg Nesterov Acked-by: Roland McGrath Cc: Christoph Hellwig Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/tracehook.h | 11 +++++------ kernel/fork.c | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index c7aa154f4bfc..eb96603d92db 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -259,14 +259,12 @@ static inline void tracehook_finish_clone(struct task_struct *child, /** * tracehook_report_clone - in parent, new child is about to start running - * @trace: return value from tracehook_prepare_clone() * @regs: parent's user register state * @clone_flags: flags from parent's system call * @pid: new child's PID in the parent's namespace * @child: new child task * - * Called after a child is set up, but before it has been started - * running. @trace is the value returned by tracehook_prepare_clone(). + * Called after a child is set up, but before it has been started running. * This is not a good place to block, because the child has not started * yet. Suspend the child here if desired, and then block in * tracehook_report_clone_complete(). This must prevent the child from @@ -276,13 +274,14 @@ static inline void tracehook_finish_clone(struct task_struct *child, * * Called with no locks held, but the child cannot run until this returns. */ -static inline void tracehook_report_clone(int trace, struct pt_regs *regs, +static inline void tracehook_report_clone(struct pt_regs *regs, unsigned long clone_flags, pid_t pid, struct task_struct *child) { - if (unlikely(trace) || unlikely(clone_flags & CLONE_PTRACE)) { + if (unlikely(task_ptrace(child))) { /* - * The child starts up with an immediate SIGSTOP. + * It doesn't matter who attached/attaching to this + * task, the pending SIGSTOP is right in any case. */ sigaddset(&child->pending.signal, SIGSTOP); set_tsk_thread_flag(child, TIF_SIGPENDING); diff --git a/kernel/fork.c b/kernel/fork.c index b9e2edd00726..875ffbdd96d0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1409,7 +1409,7 @@ long do_fork(unsigned long clone_flags, } audit_finish_fork(p); - tracehook_report_clone(trace, regs, clone_flags, nr, p); + tracehook_report_clone(regs, clone_flags, nr, p); /* * We set PF_STARTING at creation in case tracing wants to -- cgit v1.2.3-59-g8ed1b From aa853f85d9ed593672d0f24a98c72a2518cb63e6 Mon Sep 17 00:00:00 2001 From: Alessandro Rubini Date: Sat, 6 Jun 2009 10:17:57 +0100 Subject: [ARM] 5543/1: arm: serial amba: add missing declaration in serial.h This header is sometimes included in the uncompress stage to get register values, but no can be included there. So declare "struct amba_device" here before using it in a prototype. Signed-off-by: Alessandro Rubini Acked-by: Andrea Gallo Signed-off-by: Russell King --- include/linux/amba/serial.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h index 48ee32a18ac5..64a982ea5d5f 100644 --- a/include/linux/amba/serial.h +++ b/include/linux/amba/serial.h @@ -159,6 +159,7 @@ #define UART01x_FR_MODEM_ANY (UART01x_FR_DCD|UART01x_FR_DSR|UART01x_FR_CTS) #ifndef __ASSEMBLY__ +struct amba_device; /* in uncompress this is included but amba/bus.h is not */ struct amba_pl010_data { void (*set_mctrl)(struct amba_device *dev, void __iomem *base, unsigned int mctrl); }; -- cgit v1.2.3-59-g8ed1b From 0281b5dc0350cbf6dd21ed558a33cccce77abc02 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 6 Jun 2009 14:50:36 -0700 Subject: cpumask: introduce zalloc_cpumask_var So can get cpumask_var with cpumask_clear Signed-off-by: Yinghai Lu Signed-off-by: Rusty Russell --- include/linux/cpumask.h | 15 +++++++++++++++ lib/cpumask.c | 12 ++++++++++++ 2 files changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 9f315382610b..c5ac87ca7bc6 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -1022,6 +1022,8 @@ typedef struct cpumask *cpumask_var_t; bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); +bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); +bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); void alloc_bootmem_cpumask_var(cpumask_var_t *mask); void free_cpumask_var(cpumask_var_t mask); void free_bootmem_cpumask_var(cpumask_var_t mask); @@ -1040,6 +1042,19 @@ static inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, return true; } +static inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +{ + cpumask_clear(*mask); + return true; +} + +static inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, + int node) +{ + cpumask_clear(*mask); + return true; +} + static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask) { } diff --git a/lib/cpumask.c b/lib/cpumask.c index 1f71b97de0f9..eb23aaa0c7b8 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -119,6 +119,12 @@ bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) } EXPORT_SYMBOL(alloc_cpumask_var_node); +bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) +{ + return alloc_cpumask_var_node(mask, flags | __GFP_ZERO, node); +} +EXPORT_SYMBOL(zalloc_cpumask_var_node); + /** * alloc_cpumask_var - allocate a struct cpumask * @mask: pointer to cpumask_var_t where the cpumask is returned @@ -135,6 +141,12 @@ bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) } EXPORT_SYMBOL(alloc_cpumask_var); +bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +{ + return alloc_cpumask_var(mask, flags | __GFP_ZERO); +} +EXPORT_SYMBOL(zalloc_cpumask_var); + /** * alloc_bootmem_cpumask_var - allocate a struct cpumask from the bootmem arena. * @mask: pointer to cpumask_var_t where the cpumask is returned -- cgit v1.2.3-59-g8ed1b