From d0164adc89f6bb374d304ffcc375c6d2652fe67d Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 6 Nov 2015 16:28:21 -0800 Subject: mm, page_alloc: distinguish between being unable to sleep, unwilling to sleep and avoiding waking kswapd __GFP_WAIT has been used to identify atomic context in callers that hold spinlocks or are in interrupts. They are expected to be high priority and have access one of two watermarks lower than "min" which can be referred to as the "atomic reserve". __GFP_HIGH users get access to the first lower watermark and can be called the "high priority reserve". Over time, callers had a requirement to not block when fallback options were available. Some have abused __GFP_WAIT leading to a situation where an optimisitic allocation with a fallback option can access atomic reserves. This patch uses __GFP_ATOMIC to identify callers that are truely atomic, cannot sleep and have no alternative. High priority users continue to use __GFP_HIGH. __GFP_DIRECT_RECLAIM identifies callers that can sleep and are willing to enter direct reclaim. __GFP_KSWAPD_RECLAIM to identify callers that want to wake kswapd for background reclaim. __GFP_WAIT is redefined as a caller that is willing to enter direct reclaim and wake kswapd for background reclaim. This patch then converts a number of sites o __GFP_ATOMIC is used by callers that are high priority and have memory pools for those requests. GFP_ATOMIC uses this flag. o Callers that have a limited mempool to guarantee forward progress clear __GFP_DIRECT_RECLAIM but keep __GFP_KSWAPD_RECLAIM. bio allocations fall into this category where kswapd will still be woken but atomic reserves are not used as there is a one-entry mempool to guarantee progress. o Callers that are checking if they are non-blocking should use the helper gfpflags_allow_blocking() where possible. This is because checking for __GFP_WAIT as was done historically now can trigger false positives. Some exceptions like dm-crypt.c exist where the code intent is clearer if __GFP_DIRECT_RECLAIM is used instead of the helper due to flag manipulations. o Callers that built their own GFP flags instead of starting with GFP_KERNEL and friends now also need to specify __GFP_KSWAPD_RECLAIM. The first key hazard to watch out for is callers that removed __GFP_WAIT and was depending on access to atomic reserves for inconspicuous reasons. In some cases it may be appropriate for them to use __GFP_HIGH. The second key hazard is callers that assembled their own combination of GFP flags instead of starting with something like GFP_KERNEL. They may now wish to specify __GFP_KSWAPD_RECLAIM. It's almost certainly harmless if it's missed in most cases as other activity will wake kswapd. Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: Christoph Lameter Cc: David Rientjes Cc: Vitaly Wool Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 4 ++-- lib/radix-tree.c | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index 5335c43adf46..6098336df267 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -399,7 +399,7 @@ void idr_preload(gfp_t gfp_mask) * allocation guarantee. Disallow usage from those contexts. */ WARN_ON_ONCE(in_interrupt()); - might_sleep_if(gfp_mask & __GFP_WAIT); + might_sleep_if(gfpflags_allow_blocking(gfp_mask)); preempt_disable(); @@ -453,7 +453,7 @@ int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask) struct idr_layer *pa[MAX_IDR_LEVEL + 1]; int id; - might_sleep_if(gfp_mask & __GFP_WAIT); + might_sleep_if(gfpflags_allow_blocking(gfp_mask)); /* sanity checks */ if (WARN_ON_ONCE(start < 0)) diff --git a/lib/radix-tree.c b/lib/radix-tree.c index f9ebe1c82060..fcf5d98574ce 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -188,7 +188,7 @@ radix_tree_node_alloc(struct radix_tree_root *root) * preloading in the interrupt anyway as all the allocations have to * be atomic. So just do normal allocation when in interrupt. */ - if (!(gfp_mask & __GFP_WAIT) && !in_interrupt()) { + if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) { struct radix_tree_preload *rtp; /* @@ -249,7 +249,7 @@ radix_tree_node_free(struct radix_tree_node *node) * with preemption not disabled. * * To make use of this facility, the radix tree must be initialised without - * __GFP_WAIT being passed to INIT_RADIX_TREE(). + * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). */ static int __radix_tree_preload(gfp_t gfp_mask) { @@ -286,12 +286,12 @@ out: * with preemption not disabled. * * To make use of this facility, the radix tree must be initialised without - * __GFP_WAIT being passed to INIT_RADIX_TREE(). + * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). */ int radix_tree_preload(gfp_t gfp_mask) { /* Warn on non-sensical use... */ - WARN_ON_ONCE(!(gfp_mask & __GFP_WAIT)); + WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask)); return __radix_tree_preload(gfp_mask); } EXPORT_SYMBOL(radix_tree_preload); @@ -303,7 +303,7 @@ EXPORT_SYMBOL(radix_tree_preload); */ int radix_tree_maybe_preload(gfp_t gfp_mask) { - if (gfp_mask & __GFP_WAIT) + if (gfpflags_allow_blocking(gfp_mask)) return __radix_tree_preload(gfp_mask); /* Preloading doesn't help anything with this gfp mask, skip it */ preempt_disable(); -- cgit v1.2.3-59-g8ed1b From 71baba4b92dc1fa1bc461742c6ab1942ec6034e9 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 6 Nov 2015 16:28:28 -0800 Subject: mm, page_alloc: rename __GFP_WAIT to __GFP_RECLAIM __GFP_WAIT was used to signal that the caller was in atomic context and could not sleep. Now it is possible to distinguish between true atomic context and callers that are not willing to sleep. The latter should clear __GFP_DIRECT_RECLAIM so kswapd will still wake. As clearing __GFP_WAIT behaves differently, there is a risk that people will clear the wrong flags. This patch renames __GFP_WAIT to __GFP_RECLAIM to clearly indicate what it does -- setting it allows all reclaim activity, clearing them prevents it. [akpm@linux-foundation.org: fix build] [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Mel Gorman Acked-by: Michal Hocko Acked-by: Vlastimil Babka Acked-by: Johannes Weiner Cc: Christoph Lameter Acked-by: David Rientjes Cc: Vitaly Wool Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- block/blk-core.c | 4 ++-- block/blk-mq.c | 2 +- block/scsi_ioctl.c | 6 +++--- drivers/block/drbd/drbd_bitmap.c | 2 +- drivers/block/mtip32xx/mtip32xx.c | 2 +- drivers/block/paride/pd.c | 2 +- drivers/block/pktcdvd.c | 4 ++-- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/ide/ide-atapi.c | 2 +- drivers/ide/ide-cd.c | 2 +- drivers/ide/ide-cd_ioctl.c | 2 +- drivers/ide/ide-devsets.c | 2 +- drivers/ide/ide-disk.c | 2 +- drivers/ide/ide-ioctls.c | 4 ++-- drivers/ide/ide-park.c | 2 +- drivers/ide/ide-pm.c | 4 ++-- drivers/ide/ide-tape.c | 4 ++-- drivers/ide/ide-taskfile.c | 4 ++-- drivers/infiniband/hw/qib/qib_init.c | 2 +- drivers/misc/vmw_balloon.c | 2 +- drivers/nvme/host/pci.c | 6 ++++-- drivers/scsi/scsi_error.c | 2 +- drivers/scsi/scsi_lib.c | 4 ++-- drivers/staging/rdma/hfi1/init.c | 2 +- drivers/staging/rdma/ipath/ipath_file_ops.c | 2 +- fs/cachefiles/internal.h | 2 +- fs/direct-io.c | 2 +- fs/nilfs2/mdt.h | 2 +- include/linux/gfp.h | 16 ++++++++-------- kernel/power/swap.c | 16 ++++++++-------- lib/percpu_ida.c | 2 +- mm/failslab.c | 8 ++++---- mm/filemap.c | 2 +- mm/huge_memory.c | 2 +- mm/memcontrol.c | 2 +- mm/migrate.c | 2 +- mm/page_alloc.c | 9 +++++---- security/integrity/ima/ima_crypto.c | 2 +- 38 files changed, 71 insertions(+), 68 deletions(-) (limited to 'lib') diff --git a/block/blk-core.c b/block/blk-core.c index 9e32f0868e36..590cca21c24a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -638,7 +638,7 @@ int blk_queue_enter(struct request_queue *q, gfp_t gfp) if (percpu_ref_tryget_live(&q->q_usage_counter)) return 0; - if (!(gfp & __GFP_WAIT)) + if (!gfpflags_allow_blocking(gfp)) return -EBUSY; ret = wait_event_interruptible(q->mq_freeze_wq, @@ -2038,7 +2038,7 @@ void generic_make_request(struct bio *bio) do { struct request_queue *q = bdev_get_queue(bio->bi_bdev); - if (likely(blk_queue_enter(q, __GFP_WAIT) == 0)) { + if (likely(blk_queue_enter(q, __GFP_DIRECT_RECLAIM) == 0)) { q->make_request_fn(q, bio); diff --git a/block/blk-mq.c b/block/blk-mq.c index 68c0a3416b34..694f8703f83c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1186,7 +1186,7 @@ static struct request *blk_mq_map_request(struct request_queue *q, ctx = blk_mq_get_ctx(q); hctx = q->mq_ops->map_queue(q, ctx->cpu); blk_mq_set_alloc_data(&alloc_data, q, - __GFP_WAIT|__GFP_HIGH, false, ctx, hctx); + __GFP_RECLAIM|__GFP_HIGH, false, ctx, hctx); rq = __blk_mq_alloc_request(&alloc_data, rw); ctx = alloc_data.ctx; hctx = alloc_data.hctx; diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index dda653ce7b24..0774799942e0 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -444,7 +444,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, } - rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_WAIT); + rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_RECLAIM); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto error_free_buffer; @@ -495,7 +495,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, break; } - if (bytes && blk_rq_map_kern(q, rq, buffer, bytes, __GFP_WAIT)) { + if (bytes && blk_rq_map_kern(q, rq, buffer, bytes, __GFP_RECLAIM)) { err = DRIVER_ERROR << 24; goto error; } @@ -536,7 +536,7 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk, struct request *rq; int err; - rq = blk_get_request(q, WRITE, __GFP_WAIT); + rq = blk_get_request(q, WRITE, __GFP_RECLAIM); if (IS_ERR(rq)) return PTR_ERR(rq); blk_rq_set_block_pc(rq); diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index e5e0f19ceda0..3dc53a16ed3a 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1007,7 +1007,7 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho bm_set_page_unchanged(b->bm_pages[page_nr]); if (ctx->flags & BM_AIO_COPY_PAGES) { - page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_WAIT); + page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_RECLAIM); copy_highpage(page, b->bm_pages[page_nr]); bm_store_page_idx(page, page_nr); } else diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index f504232c1ee7..a28a562f7b7f 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -173,7 +173,7 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd) { struct request *rq; - rq = blk_mq_alloc_request(dd->queue, 0, __GFP_WAIT, true); + rq = blk_mq_alloc_request(dd->queue, 0, __GFP_RECLAIM, true); return blk_mq_rq_to_pdu(rq); } diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index b9242d78283d..562b5a4ca7b7 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -723,7 +723,7 @@ static int pd_special_command(struct pd_unit *disk, struct request *rq; int err = 0; - rq = blk_get_request(disk->gd->queue, READ, __GFP_WAIT); + rq = blk_get_request(disk->gd->queue, READ, __GFP_RECLAIM); if (IS_ERR(rq)) return PTR_ERR(rq); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 7be2375db7f2..5959c2981cc7 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -704,14 +704,14 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * int ret = 0; rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ? - WRITE : READ, __GFP_WAIT); + WRITE : READ, __GFP_RECLAIM); if (IS_ERR(rq)) return PTR_ERR(rq); blk_rq_set_block_pc(rq); if (cgc->buflen) { ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen, - __GFP_WAIT); + __GFP_RECLAIM); if (ret) goto out; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d58cb9e034fe..7e505d4be7c0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2216,7 +2216,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) mapping = file_inode(obj->base.filp)->i_mapping; gfp = mapping_gfp_mask(mapping); gfp |= __GFP_NORETRY | __GFP_NOWARN; - gfp &= ~(__GFP_IO | __GFP_WAIT); + gfp &= ~(__GFP_IO | __GFP_RECLAIM); sg = st->sgl; st->nents = 0; for (i = 0; i < page_count; i++) { diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 1362ad80a76c..05352f490d60 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -92,7 +92,7 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk, struct request *rq; int error; - rq = blk_get_request(drive->queue, READ, __GFP_WAIT); + rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); rq->cmd_type = REQ_TYPE_DRV_PRIV; rq->special = (char *)pc; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 64a6b827b3dd..ef907fd5ba98 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -441,7 +441,7 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, struct request *rq; int error; - rq = blk_get_request(drive->queue, write, __GFP_WAIT); + rq = blk_get_request(drive->queue, write, __GFP_RECLAIM); memcpy(rq->cmd, cmd, BLK_MAX_CDB); rq->cmd_type = REQ_TYPE_ATA_PC; diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c index 066e39036518..474173eb31bb 100644 --- a/drivers/ide/ide-cd_ioctl.c +++ b/drivers/ide/ide-cd_ioctl.c @@ -303,7 +303,7 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi) struct request *rq; int ret; - rq = blk_get_request(drive->queue, READ, __GFP_WAIT); + rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); rq->cmd_type = REQ_TYPE_DRV_PRIV; rq->cmd_flags = REQ_QUIET; ret = blk_execute_rq(drive->queue, cd->disk, rq, 0); diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c index b05a74d78ef5..0dd43b4fcec6 100644 --- a/drivers/ide/ide-devsets.c +++ b/drivers/ide/ide-devsets.c @@ -165,7 +165,7 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting, if (!(setting->flags & DS_SYNC)) return setting->set(drive, arg); - rq = blk_get_request(q, READ, __GFP_WAIT); + rq = blk_get_request(q, READ, __GFP_RECLAIM); rq->cmd_type = REQ_TYPE_DRV_PRIV; rq->cmd_len = 5; rq->cmd[0] = REQ_DEVSET_EXEC; diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 56b9708894a5..37a8a907febe 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -477,7 +477,7 @@ static int set_multcount(ide_drive_t *drive, int arg) if (drive->special_flags & IDE_SFLAG_SET_MULTMODE) return -EBUSY; - rq = blk_get_request(drive->queue, READ, __GFP_WAIT); + rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); rq->cmd_type = REQ_TYPE_ATA_TASKFILE; drive->mult_req = arg; diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c index aa2e9b77b20d..d05db2469209 100644 --- a/drivers/ide/ide-ioctls.c +++ b/drivers/ide/ide-ioctls.c @@ -125,7 +125,7 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg) if (NULL == (void *) arg) { struct request *rq; - rq = blk_get_request(drive->queue, READ, __GFP_WAIT); + rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); rq->cmd_type = REQ_TYPE_ATA_TASKFILE; err = blk_execute_rq(drive->queue, NULL, rq, 0); blk_put_request(rq); @@ -221,7 +221,7 @@ static int generic_drive_reset(ide_drive_t *drive) struct request *rq; int ret = 0; - rq = blk_get_request(drive->queue, READ, __GFP_WAIT); + rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); rq->cmd_type = REQ_TYPE_DRV_PRIV; rq->cmd_len = 1; rq->cmd[0] = REQ_DRIVE_RESET; diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c index c80868520488..2d7dca56dd24 100644 --- a/drivers/ide/ide-park.c +++ b/drivers/ide/ide-park.c @@ -31,7 +31,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) } spin_unlock_irq(&hwif->lock); - rq = blk_get_request(q, READ, __GFP_WAIT); + rq = blk_get_request(q, READ, __GFP_RECLAIM); rq->cmd[0] = REQ_PARK_HEADS; rq->cmd_len = 1; rq->cmd_type = REQ_TYPE_DRV_PRIV; diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index 081e43458d50..e34af488693a 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -18,7 +18,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) } memset(&rqpm, 0, sizeof(rqpm)); - rq = blk_get_request(drive->queue, READ, __GFP_WAIT); + rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); rq->cmd_type = REQ_TYPE_ATA_PM_SUSPEND; rq->special = &rqpm; rqpm.pm_step = IDE_PM_START_SUSPEND; @@ -88,7 +88,7 @@ int generic_ide_resume(struct device *dev) } memset(&rqpm, 0, sizeof(rqpm)); - rq = blk_get_request(drive->queue, READ, __GFP_WAIT); + rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); rq->cmd_type = REQ_TYPE_ATA_PM_RESUME; rq->cmd_flags |= REQ_PREEMPT; rq->special = &rqpm; diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index f5d51d1d09ee..12fa04997dcc 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -852,7 +852,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size) BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE); BUG_ON(size < 0 || size % tape->blk_size); - rq = blk_get_request(drive->queue, READ, __GFP_WAIT); + rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); rq->cmd_type = REQ_TYPE_DRV_PRIV; rq->cmd[13] = cmd; rq->rq_disk = tape->disk; @@ -860,7 +860,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size) if (size) { ret = blk_rq_map_kern(drive->queue, rq, tape->buf, size, - __GFP_WAIT); + __GFP_RECLAIM); if (ret) goto out_put; } diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 0979e126fff1..a716693417a3 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -430,7 +430,7 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf, int error; int rw = !(cmd->tf_flags & IDE_TFLAG_WRITE) ? READ : WRITE; - rq = blk_get_request(drive->queue, rw, __GFP_WAIT); + rq = blk_get_request(drive->queue, rw, __GFP_RECLAIM); rq->cmd_type = REQ_TYPE_ATA_TASKFILE; /* @@ -441,7 +441,7 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf, */ if (nsect) { error = blk_rq_map_kern(drive->queue, rq, buf, - nsect * SECTOR_SIZE, __GFP_WAIT); + nsect * SECTOR_SIZE, __GFP_RECLAIM); if (error) goto put_req; } diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 7e00470adc30..4ff340fe904f 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1680,7 +1680,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) * heavy filesystem activity makes these fail, and we can * use compound pages. */ - gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP; + gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP; egrcnt = rcd->rcvegrcnt; egroff = rcd->rcvegr_tid_base; diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index 89300870fefb..1e688bfec567 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -75,7 +75,7 @@ MODULE_LICENSE("GPL"); /* * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't - * allow wait (__GFP_WAIT) for NOSLEEP page allocations. Use + * allow wait (__GFP_RECLAIM) for NOSLEEP page allocations. Use * __GFP_NOWARN, to suppress page allocation failure warnings. */ #define VMW_PAGE_ALLOC_NOSLEEP (__GFP_HIGHMEM|__GFP_NOWARN) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e878590e71b6..6c195554d94a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1025,11 +1025,13 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, req->special = (void *)0; if (buffer && bufflen) { - ret = blk_rq_map_kern(q, req, buffer, bufflen, __GFP_WAIT); + ret = blk_rq_map_kern(q, req, buffer, bufflen, + __GFP_DIRECT_RECLAIM); if (ret) goto out; } else if (ubuffer && bufflen) { - ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, __GFP_WAIT); + ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, + __GFP_DIRECT_RECLAIM); if (ret) goto out; bio = req->bio; diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 66a96cd98b97..984ddcb4786d 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -1970,7 +1970,7 @@ static void scsi_eh_lock_door(struct scsi_device *sdev) struct request *req; /* - * blk_get_request with GFP_KERNEL (__GFP_WAIT) sleeps until a + * blk_get_request with GFP_KERNEL (__GFP_RECLAIM) sleeps until a * request becomes available */ req = blk_get_request(sdev->request_queue, READ, GFP_KERNEL); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 126a48c6431e..dd8ad2a44510 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -222,13 +222,13 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, int write = (data_direction == DMA_TO_DEVICE); int ret = DRIVER_ERROR << 24; - req = blk_get_request(sdev->request_queue, write, __GFP_WAIT); + req = blk_get_request(sdev->request_queue, write, __GFP_RECLAIM); if (IS_ERR(req)) return ret; blk_rq_set_block_pc(req); if (bufflen && blk_rq_map_kern(sdev->request_queue, req, - buffer, bufflen, __GFP_WAIT)) + buffer, bufflen, __GFP_RECLAIM)) goto out; req->cmd_len = COMMAND_SIZE(cmd[0]); diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c index 47a1202fcbdf..8666f3ad24e9 100644 --- a/drivers/staging/rdma/hfi1/init.c +++ b/drivers/staging/rdma/hfi1/init.c @@ -1560,7 +1560,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) * heavy filesystem activity makes these fail, and we can * use compound pages. */ - gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP; + gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP; /* * The minimum size of the eager buffers is a groups of MTU-sized diff --git a/drivers/staging/rdma/ipath/ipath_file_ops.c b/drivers/staging/rdma/ipath/ipath_file_ops.c index 5d9b9dbd8fc4..13c3cd11ab92 100644 --- a/drivers/staging/rdma/ipath/ipath_file_ops.c +++ b/drivers/staging/rdma/ipath/ipath_file_ops.c @@ -905,7 +905,7 @@ static int ipath_create_user_egr(struct ipath_portdata *pd) * heavy filesystem activity makes these fail, and we can * use compound pages. */ - gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP; + gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP; egrcnt = dd->ipath_rcvegrcnt; /* TID number offset for this port */ diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index aecd0859eacb..9c4b737a54df 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -30,7 +30,7 @@ extern unsigned cachefiles_debug; #define CACHEFILES_DEBUG_KLEAVE 2 #define CACHEFILES_DEBUG_KDEBUG 4 -#define cachefiles_gfp (__GFP_WAIT | __GFP_NORETRY | __GFP_NOMEMALLOC) +#define cachefiles_gfp (__GFP_RECLAIM | __GFP_NORETRY | __GFP_NOMEMALLOC) /* * node records diff --git a/fs/direct-io.c b/fs/direct-io.c index 3ae0e0427191..18e7554cf94c 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -361,7 +361,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, /* * bio_alloc() is guaranteed to return a bio when called with - * __GFP_WAIT and we request a valid number of vectors. + * __GFP_RECLAIM and we request a valid number of vectors. */ bio = bio_alloc(GFP_KERNEL, nr_vecs); diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h index fe529a87a208..03246cac3338 100644 --- a/fs/nilfs2/mdt.h +++ b/fs/nilfs2/mdt.h @@ -72,7 +72,7 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode) } /* Default GFP flags using highmem */ -#define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM) +#define NILFS_MDT_GFP (__GFP_RECLAIM | __GFP_IO | __GFP_HIGHMEM) int nilfs_mdt_get_block(struct inode *, unsigned long, int, void (*init_block)(struct inode *, diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 86f9f7da86ea..369227202ac2 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -107,7 +107,7 @@ struct vm_area_struct; * can be cleared when the reclaiming of pages would cause unnecessary * disruption. */ -#define __GFP_WAIT ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) +#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) #define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */ #define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */ @@ -126,12 +126,12 @@ struct vm_area_struct; */ #define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) #define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) -#define GFP_NOIO (__GFP_WAIT) -#define GFP_NOFS (__GFP_WAIT | __GFP_IO) -#define GFP_KERNEL (__GFP_WAIT | __GFP_IO | __GFP_FS) -#define GFP_TEMPORARY (__GFP_WAIT | __GFP_IO | __GFP_FS | \ +#define GFP_NOIO (__GFP_RECLAIM) +#define GFP_NOFS (__GFP_RECLAIM | __GFP_IO) +#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) +#define GFP_TEMPORARY (__GFP_RECLAIM | __GFP_IO | __GFP_FS | \ __GFP_RECLAIMABLE) -#define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL) +#define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL) #define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) #define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE) #define GFP_TRANSHUGE ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ @@ -143,12 +143,12 @@ struct vm_area_struct; #define GFP_MOVABLE_SHIFT 3 /* Control page allocator reclaim behavior */ -#define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\ +#define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\ __GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\ __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC) /* Control slab gfp mask during early boot */ -#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS)) +#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS)) /* Control allocation constraints */ #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index b2066fb5b10f..12cd989dadf6 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -257,7 +257,7 @@ static int hib_submit_io(int rw, pgoff_t page_off, void *addr, struct bio *bio; int error = 0; - bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); + bio = bio_alloc(__GFP_RECLAIM | __GFP_HIGH, 1); bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9); bio->bi_bdev = hib_resume_bdev; @@ -356,7 +356,7 @@ static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb) return -ENOSPC; if (hb) { - src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN | + src = (void *)__get_free_page(__GFP_RECLAIM | __GFP_NOWARN | __GFP_NORETRY); if (src) { copy_page(src, buf); @@ -364,7 +364,7 @@ static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb) ret = hib_wait_io(hb); /* Free pages */ if (ret) return ret; - src = (void *)__get_free_page(__GFP_WAIT | + src = (void *)__get_free_page(__GFP_RECLAIM | __GFP_NOWARN | __GFP_NORETRY); if (src) { @@ -672,7 +672,7 @@ static int save_image_lzo(struct swap_map_handle *handle, nr_threads = num_online_cpus() - 1; nr_threads = clamp_val(nr_threads, 1, LZO_THREADS); - page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); + page = (void *)__get_free_page(__GFP_RECLAIM | __GFP_HIGH); if (!page) { printk(KERN_ERR "PM: Failed to allocate LZO page\n"); ret = -ENOMEM; @@ -975,7 +975,7 @@ static int get_swap_reader(struct swap_map_handle *handle, last = tmp; tmp->map = (struct swap_map_page *) - __get_free_page(__GFP_WAIT | __GFP_HIGH); + __get_free_page(__GFP_RECLAIM | __GFP_HIGH); if (!tmp->map) { release_swap_reader(handle); return -ENOMEM; @@ -1242,9 +1242,9 @@ static int load_image_lzo(struct swap_map_handle *handle, for (i = 0; i < read_pages; i++) { page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ? - __GFP_WAIT | __GFP_HIGH : - __GFP_WAIT | __GFP_NOWARN | - __GFP_NORETRY); + __GFP_RECLAIM | __GFP_HIGH : + __GFP_RECLAIM | __GFP_NOWARN | + __GFP_NORETRY); if (!page[i]) { if (i < LZO_CMP_PAGES) { diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c index f75715131f20..6d40944960de 100644 --- a/lib/percpu_ida.c +++ b/lib/percpu_ida.c @@ -135,7 +135,7 @@ static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags) * TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, of course). * * @gfp indicates whether or not to wait until a free id is available (it's not - * used for internal memory allocations); thus if passed __GFP_WAIT we may sleep + * used for internal memory allocations); thus if passed __GFP_RECLAIM we may sleep * however long it takes until another thread frees an id (same semantics as a * mempool). * diff --git a/mm/failslab.c b/mm/failslab.c index 98fb490311eb..79171b4a5826 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -3,11 +3,11 @@ static struct { struct fault_attr attr; - bool ignore_gfp_wait; + bool ignore_gfp_reclaim; bool cache_filter; } failslab = { .attr = FAULT_ATTR_INITIALIZER, - .ignore_gfp_wait = true, + .ignore_gfp_reclaim = true, .cache_filter = false, }; @@ -16,7 +16,7 @@ bool should_failslab(size_t size, gfp_t gfpflags, unsigned long cache_flags) if (gfpflags & __GFP_NOFAIL) return false; - if (failslab.ignore_gfp_wait && (gfpflags & __GFP_WAIT)) + if (failslab.ignore_gfp_reclaim && (gfpflags & __GFP_RECLAIM)) return false; if (failslab.cache_filter && !(cache_flags & SLAB_FAILSLAB)) @@ -42,7 +42,7 @@ static int __init failslab_debugfs_init(void) return PTR_ERR(dir); if (!debugfs_create_bool("ignore-gfp-wait", mode, dir, - &failslab.ignore_gfp_wait)) + &failslab.ignore_gfp_reclaim)) goto fail; if (!debugfs_create_bool("cache-filter", mode, dir, &failslab.cache_filter)) diff --git a/mm/filemap.c b/mm/filemap.c index 58e04e26f996..6ef3674c0763 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2713,7 +2713,7 @@ EXPORT_SYMBOL(generic_file_write_iter); * page is known to the local caching routines. * * The @gfp_mask argument specifies whether I/O may be performed to release - * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS). + * this page (__GFP_IO), and whether the call may block (__GFP_RECLAIM & __GFP_FS). * */ int try_to_release_page(struct page *page, gfp_t gfp_mask) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f5c08b46fef8..9812d4618651 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -786,7 +786,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp) { - return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT)) | extra_gfp; + return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_RECLAIM)) | extra_gfp; } /* Caller must hold page table lock. */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 05374f09339c..a5470674a477 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2120,7 +2120,7 @@ done_restock: /* * If the hierarchy is above the normal consumption range, schedule * reclaim on returning to userland. We can perform reclaim here - * if __GFP_WAIT but let's always punt for simplicity and so that + * if __GFP_RECLAIM but let's always punt for simplicity and so that * GFP_KERNEL can consistently be used during reclaim. @memcg is * not recorded as it most likely matches current's and won't * change in the meantime. As high limit is checked again before diff --git a/mm/migrate.c b/mm/migrate.c index e60379eb23f8..7890d0bb5e23 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1752,7 +1752,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, goto out_dropref; new_page = alloc_pages_node(node, - (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_WAIT, + (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM, HPAGE_PMD_ORDER); if (!new_page) goto out_fail; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 70461f3e3378..1b373096b990 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2160,11 +2160,11 @@ static struct { struct fault_attr attr; bool ignore_gfp_highmem; - bool ignore_gfp_wait; + bool ignore_gfp_reclaim; u32 min_order; } fail_page_alloc = { .attr = FAULT_ATTR_INITIALIZER, - .ignore_gfp_wait = true, + .ignore_gfp_reclaim = true, .ignore_gfp_highmem = true, .min_order = 1, }; @@ -2183,7 +2183,8 @@ static bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) return false; if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM)) return false; - if (fail_page_alloc.ignore_gfp_wait && (gfp_mask & __GFP_DIRECT_RECLAIM)) + if (fail_page_alloc.ignore_gfp_reclaim && + (gfp_mask & __GFP_DIRECT_RECLAIM)) return false; return should_fail(&fail_page_alloc.attr, 1 << order); @@ -2202,7 +2203,7 @@ static int __init fail_page_alloc_debugfs(void) return PTR_ERR(dir); if (!debugfs_create_bool("ignore-gfp-wait", mode, dir, - &fail_page_alloc.ignore_gfp_wait)) + &fail_page_alloc.ignore_gfp_reclaim)) goto fail; if (!debugfs_create_bool("ignore-gfp-highmem", mode, dir, &fail_page_alloc.ignore_gfp_highmem)) diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c index e24121afb2f2..6eb62936c672 100644 --- a/security/integrity/ima/ima_crypto.c +++ b/security/integrity/ima/ima_crypto.c @@ -126,7 +126,7 @@ static void *ima_alloc_pages(loff_t max_size, size_t *allocated_size, { void *ptr; int order = ima_maxorder; - gfp_t gfp_mask = __GFP_WAIT | __GFP_NOWARN | __GFP_NORETRY; + gfp_t gfp_mask = __GFP_RECLAIM | __GFP_NOWARN | __GFP_NORETRY; if (order) order = min(get_order(max_size), order); -- cgit v1.2.3-59-g8ed1b From 3e406b1d7c1e5c14c84a71eb4bee5f46ba690401 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 6 Nov 2015 16:30:15 -0800 Subject: lib/dynamic_debug.c: use kstrdup_const Using kstrdup_const, thus reusing .rodata when possible, saves around 2 kB of runtime memory on my laptop/.config combination. Signed-off-by: Rasmus Villemoes Cc: Jason Baron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/dynamic_debug.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index e491e02eff54..e3952e9c8ec0 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -42,7 +42,7 @@ extern struct _ddebug __stop___verbose[]; struct ddebug_table { struct list_head link; - char *mod_name; + const char *mod_name; unsigned int num_ddebugs; struct _ddebug *ddebugs; }; @@ -841,12 +841,12 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n, const char *name) { struct ddebug_table *dt; - char *new_name; + const char *new_name; dt = kzalloc(sizeof(*dt), GFP_KERNEL); if (dt == NULL) return -ENOMEM; - new_name = kstrdup(name, GFP_KERNEL); + new_name = kstrdup_const(name, GFP_KERNEL); if (new_name == NULL) { kfree(dt); return -ENOMEM; @@ -907,7 +907,7 @@ int ddebug_dyndbg_module_param_cb(char *param, char *val, const char *module) static void ddebug_table_free(struct ddebug_table *dt) { list_del_init(&dt->link); - kfree(dt->mod_name); + kfree_const(dt->mod_name); kfree(dt); } -- cgit v1.2.3-59-g8ed1b From 5e4ee7b13b522d07196e737f399843c58569604d Mon Sep 17 00:00:00 2001 From: Martin Kletzander Date: Fri, 6 Nov 2015 16:30:17 -0800 Subject: printk: synchronize %p formatting documentation Move all pointer-formatting documentation to one place in the code and one place in the documentation instead of keeping it in three places with different level of completeness. Documentation/printk-formats.txt has detailed information about each modifier, docstring above pointer() has short descriptions of them (as that is the function dealing with %p) and docstring above vsprintf() is removed as redundant. Both docstrings in the code that were modified are updated with a reminder of updating the documentation upon any further change. [akpm@linux-foundation.org: fix comment] Signed-off-by: Martin Kletzander Reviewed-by: Andy Shevchenko Cc: Rasmus Villemoes Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/printk-formats.txt | 29 +++++++++++++++++++++++++++++ lib/vsprintf.c | 40 ++++++++-------------------------------- 2 files changed, 37 insertions(+), 32 deletions(-) (limited to 'lib') diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt index 2216eb187c21..9b8d7f746b1a 100644 --- a/Documentation/printk-formats.txt +++ b/Documentation/printk-formats.txt @@ -119,6 +119,7 @@ Raw buffer as an escaped string: If field width is omitted the 1 byte only will be escaped. Raw buffer as a hex string: + %*ph 00 01 02 ... 3f %*phC 00:01:02: ... :3f %*phD 00-01-02- ... -3f @@ -234,6 +235,7 @@ UUID/GUID addresses: Passed by reference. dentry names: + %pd{,2,3,4} %pD{,2,3,4} @@ -256,6 +258,8 @@ struct va_format: va_list *va; }; + Implements a "recursive vsnprintf". + Do not use this feature without some mechanism to verify the correctness of the format string and va_list arguments. @@ -284,6 +288,31 @@ bitmap and its derivatives such as cpumask and nodemask: Passed by reference. +Network device features: + + %pNF 0x000000000000c000 + + For printing netdev_features_t. + + Passed by reference. + +Command from struct task_struct + + %pT ls + + For printing executable name excluding path from struct + task_struct. + + Passed by reference. + +Ignored argument: + + %n %n + + The argument passed will be ignored. In other words, literal "%n" will + be in the output and the argument will be considered for next format + specifier. + Thank you for your cooperation and attention. diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 95cd63b43b99..e966a45e2f00 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1448,6 +1448,9 @@ int kptr_restrict __read_mostly; * - 'Cn' For a clock, it prints the name (Common Clock Framework) or address * (legacy clock framework) of the clock * - 'Cr' For a clock, it prints the current rate of the clock + * - 'n' For ignored argument + * + * ** Please update also Documentation/printk-formats.txt when making changes ** * * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 * function pointers are really function descriptors, which contain a @@ -1812,40 +1815,13 @@ qualifier: * @args: Arguments for the format string * * This function follows C99 vsnprintf, but has some extensions: - * %pS output the name of a text symbol with offset - * %ps output the name of a text symbol without offset - * %pF output the name of a function pointer with its offset - * %pf output the name of a function pointer without its offset - * %pB output the name of a backtrace symbol with its offset - * %pR output the address range in a struct resource with decoded flags - * %pr output the address range in a struct resource with raw flags - * %pb output the bitmap with field width as the number of bits - * %pbl output the bitmap as range list with field width as the number of bits - * %pM output a 6-byte MAC address with colons - * %pMR output a 6-byte MAC address with colons in reversed order - * %pMF output a 6-byte MAC address with dashes - * %pm output a 6-byte MAC address without colons - * %pmR output a 6-byte MAC address without colons in reversed order - * %pI4 print an IPv4 address without leading zeros - * %pi4 print an IPv4 address with leading zeros - * %pI6 print an IPv6 address with colons - * %pi6 print an IPv6 address without colons - * %pI6c print an IPv6 address as specified by RFC 5952 - * %pIS depending on sa_family of 'struct sockaddr *' print IPv4/IPv6 address - * %piS depending on sa_family of 'struct sockaddr *' print IPv4/IPv6 address - * %pU[bBlL] print a UUID/GUID in big or little endian using lower or upper - * case. - * %*pE[achnops] print an escaped buffer - * %*ph[CDN] a variable-length hex string with a separator (supports up to 64 - * bytes of the input) - * %pC output the name (Common Clock Framework) or address (legacy clock - * framework) of a clock - * %pCn output the name (Common Clock Framework) or address (legacy clock - * framework) of a clock - * %pCr output the current rate of a clock * %n is ignored + * %p* is handled by pointer() + * + * See pointer() or Documentation/printk-formats.txt for more + * extensive description. * - * ** Please update Documentation/printk-formats.txt when making changes ** + * ** Please update the documentation in both places when making changes ** * * The return value is the number of characters which would * be generated for the given input, excluding the trailing -- cgit v1.2.3-59-g8ed1b From b006f19b055f90b73e97086490f95b83095dcc91 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 6 Nov 2015 16:30:20 -0800 Subject: lib/vsprintf.c: handle invalid format specifiers more robustly If we meet any invalid or unsupported format specifier, 'handling' it by just printing it as a literal string is not safe: Presumably the format string and the arguments passed gcc's type checking, but that means something like sprintf(buf, "%n %pd", &intvar, dentry) would end up interpreting &intvar as a struct dentry*. When the offending specifier was %n it used to be at the end of the format string, but we can't rely on that always being the case. Also, gcc doesn't complain about some more or less exotic qualifiers (or 'length modifiers' in posix-speak) such as 'j' or 'q', but being unrecognized by the kernel's printf implementation, they'd be interpreted as unknown specifiers, and the rest of arguments would be interpreted wrongly. So let's complain about anything we don't understand, not just %n, and stop pretending that we'd be able to make sense of the rest of the format/arguments. If the offending specifier is in a printk() call we unfortunately only get a "BUG: recent printk recursion!", but at least direct users of the sprintf family will be caught. Signed-off-by: Rasmus Villemoes Reviewed-by: Andy Shevchenko Acked-by: Kees Cook Cc: Martin Kletzander Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index e966a45e2f00..e35724c2b2a8 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1772,14 +1772,14 @@ qualifier: case 'n': /* - * Since %n poses a greater security risk than utility, treat - * it as an invalid format specifier. Warn about its use so - * that new instances don't get added. + * Since %n poses a greater security risk than + * utility, treat it as any other invalid or + * unsupported format specifier. */ - WARN_ONCE(1, "Please remove ignored %%n in '%s'\n", fmt); /* Fall-through */ default: + WARN_ONCE(1, "Please remove unsupported %%%c in format string\n", *fmt); spec->type = FORMAT_TYPE_INVALID; return fmt - start; } @@ -1920,10 +1920,15 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) break; case FORMAT_TYPE_INVALID: - if (str < end) - *str = '%'; - ++str; - break; + /* + * Presumably the arguments passed gcc's type + * checking, but there is no safe or sane way + * for us to continue parsing the format and + * fetching from the va_list; the remaining + * specifiers and arguments would be out of + * sync. + */ + goto out; default: switch (spec.type) { @@ -1968,6 +1973,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) } } +out: if (size > 0) { if (str < end) *str = '\0'; @@ -2165,9 +2171,10 @@ do { \ switch (spec.type) { case FORMAT_TYPE_NONE: - case FORMAT_TYPE_INVALID: case FORMAT_TYPE_PERCENT_CHAR: break; + case FORMAT_TYPE_INVALID: + goto out; case FORMAT_TYPE_WIDTH: case FORMAT_TYPE_PRECISION: @@ -2229,6 +2236,7 @@ do { \ } } +out: return (u32 *)(PTR_ALIGN(str, sizeof(u32))) - bin_buf; #undef save_arg } @@ -2351,12 +2359,14 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) break; case FORMAT_TYPE_PERCENT_CHAR: - case FORMAT_TYPE_INVALID: if (str < end) *str = '%'; ++str; break; + case FORMAT_TYPE_INVALID: + goto out; + default: { unsigned long long num; @@ -2399,6 +2409,7 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) } /* switch(spec.type) */ } /* while(*fmt) */ +out: if (size > 0) { if (str < end) *str = '\0'; -- cgit v1.2.3-59-g8ed1b From 762abb515415a5a4a37423f4f4ff5770d5a14bac Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 6 Nov 2015 16:30:23 -0800 Subject: lib/vsprintf.c: also improve sanity check in bstr_printf() Quoting from 2aa2f9e21e4e ("lib/vsprintf.c: improve sanity check in vsnprintf()"): On 64 bit, size may very well be huge even if bit 31 happens to be 0. Somehow it doesn't feel right that one can pass a 5 GiB buffer but not a 3 GiB one. So cap at INT_MAX as was probably the intention all along. This is also the made-up value passed by sprintf and vsprintf. I should have seen this copy-pasted instance back then, but let's just do it now. Signed-off-by: Rasmus Villemoes Reviewed-by: Andy Shevchenko Acked-by: Kees Cook Cc: Martin Kletzander Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index e35724c2b2a8..a513469e9399 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2270,7 +2270,7 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) char *str, *end; const char *args = (const char *)bin_buf; - if (WARN_ON_ONCE((int) size < 0)) + if (WARN_ON_ONCE(size > INT_MAX)) return 0; str = buf; -- cgit v1.2.3-59-g8ed1b From 80c9eb46fa7236c1236ec695bfa2403c10cb8645 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 6 Nov 2015 16:30:26 -0800 Subject: lib/vsprintf.c: remove SPECIAL handling in pointer() As a quick git grep -E '%[ +0#-]*#[ +0#-]*(\*|[0-9]+)?(\.(\*|[0-9]+)?)?p' shows, nobody uses the # flag with %p. Should one try to do so, one will be met with warning: `#' flag used with `%p' gnu_printf format [-Wformat] (POSIX and C99 both say "... For other conversion specifiers, the behavior is undefined.". Obviously, the kernel can choose to define the behaviour however it wants, but as long as gcc issues that warning, users are unlikely to show up.) Since default_width is effectively always 2*sizeof(void*), we can simplify the prologue of pointer() and save a few instructions. Signed-off-by: Rasmus Villemoes Reviewed-by: Andy Shevchenko Acked-by: Kees Cook Cc: Martin Kletzander Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index a513469e9399..7848d53a5134 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1460,7 +1460,7 @@ static noinline_for_stack char *pointer(const char *fmt, char *buf, char *end, void *ptr, struct printf_spec spec) { - int default_width = 2 * sizeof(void *) + (spec.flags & SPECIAL ? 2 : 0); + const int default_width = 2 * sizeof(void *); if (!ptr && *fmt != 'K') { /* -- cgit v1.2.3-59-g8ed1b From 707cc7280f452a162c52bc240eae62568b9753c2 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 6 Nov 2015 16:30:29 -0800 Subject: test_printf: test printf family at runtime This adds a simple module for testing the kernel's printf facilities. Previously, some %p extensions have caused a wrong return value in case the entire output didn't fit and/or been unusable in kasprintf(). This should help catch such issues. Also, it should help ensure that changes to the formatting algorithms don't break anything. I'm not sure if we have a struct dentry or struct file lying around at boot time or if we can fake one, but most %p extensions should be testable, as should the ordinary number and string formatting. The nature of vararg functions means we can't use a more conventional table-driven approach. For now, this is mostly a skeleton; contributions are very welcome. Some tests are/will be slightly annoying to write, since the expected output depends on stuff like CONFIG_*, sizeof(long), runtime values etc. Signed-off-by: Rasmus Villemoes Reviewed-by: Kees Cook Cc: Andy Shevchenko Cc: Martin Kletzander Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 3 + lib/Makefile | 1 + lib/test_printf.c | 362 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 366 insertions(+) create mode 100644 lib/test_printf.c (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1d1521c26302..16bf3bc25e3e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1686,6 +1686,9 @@ config TEST_STRING_HELPERS config TEST_KSTRTOX tristate "Test kstrto*() family of functions at runtime" +config TEST_PRINTF + tristate "Test printf() family of functions at runtime" + config TEST_RHASHTABLE tristate "Perform selftest on resizable hash table" default n diff --git a/lib/Makefile b/lib/Makefile index 8de3b012eac7..7f1de26613d2 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -42,6 +42,7 @@ obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o +obj-$(CONFIG_TEST_PRINTF) += test_printf.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG diff --git a/lib/test_printf.c b/lib/test_printf.c new file mode 100644 index 000000000000..c5a666af9ba5 --- /dev/null +++ b/lib/test_printf.c @@ -0,0 +1,362 @@ +/* + * Test cases for printf facility. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define BUF_SIZE 256 +#define FILL_CHAR '$' + +#define PTR1 ((void*)0x01234567) +#define PTR2 ((void*)(long)(int)0xfedcba98) + +#if BITS_PER_LONG == 64 +#define PTR1_ZEROES "000000000" +#define PTR1_SPACES " " +#define PTR1_STR "1234567" +#define PTR2_STR "fffffffffedcba98" +#define PTR_WIDTH 16 +#else +#define PTR1_ZEROES "0" +#define PTR1_SPACES " " +#define PTR1_STR "1234567" +#define PTR2_STR "fedcba98" +#define PTR_WIDTH 8 +#endif +#define PTR_WIDTH_STR stringify(PTR_WIDTH) + +static unsigned total_tests __initdata; +static unsigned failed_tests __initdata; +static char *test_buffer __initdata; + +static int __printf(4, 0) __init +do_test(int bufsize, const char *expect, int elen, + const char *fmt, va_list ap) +{ + va_list aq; + int ret, written; + + total_tests++; + + memset(test_buffer, FILL_CHAR, BUF_SIZE); + va_copy(aq, ap); + ret = vsnprintf(test_buffer, bufsize, fmt, aq); + va_end(aq); + + if (ret != elen) { + pr_warn("vsnprintf(buf, %d, \"%s\", ...) returned %d, expected %d\n", + bufsize, fmt, ret, elen); + return 1; + } + + if (!bufsize) { + if (memchr_inv(test_buffer, FILL_CHAR, BUF_SIZE)) { + pr_warn("vsnprintf(buf, 0, \"%s\", ...) wrote to buffer\n", + fmt); + return 1; + } + return 0; + } + + written = min(bufsize-1, elen); + if (test_buffer[written]) { + pr_warn("vsnprintf(buf, %d, \"%s\", ...) did not nul-terminate buffer\n", + bufsize, fmt); + return 1; + } + + if (memcmp(test_buffer, expect, written)) { + pr_warn("vsnprintf(buf, %d, \"%s\", ...) wrote '%s', expected '%.*s'\n", + bufsize, fmt, test_buffer, written, expect); + return 1; + } + return 0; +} + +static void __printf(3, 4) __init +__test(const char *expect, int elen, const char *fmt, ...) +{ + va_list ap; + int rand; + char *p; + + BUG_ON(elen >= BUF_SIZE); + + va_start(ap, fmt); + + /* + * Every fmt+args is subjected to four tests: Three where we + * tell vsnprintf varying buffer sizes (plenty, not quite + * enough and 0), and then we also test that kvasprintf would + * be able to print it as expected. + */ + failed_tests += do_test(BUF_SIZE, expect, elen, fmt, ap); + rand = 1 + prandom_u32_max(elen+1); + /* Since elen < BUF_SIZE, we have 1 <= rand <= BUF_SIZE. */ + failed_tests += do_test(rand, expect, elen, fmt, ap); + failed_tests += do_test(0, expect, elen, fmt, ap); + + p = kvasprintf(GFP_KERNEL, fmt, ap); + if (p) { + if (memcmp(p, expect, elen+1)) { + pr_warn("kvasprintf(..., \"%s\", ...) returned '%s', expected '%s'\n", + fmt, p, expect); + failed_tests++; + } + kfree(p); + } + va_end(ap); +} + +#define test(expect, fmt, ...) \ + __test(expect, strlen(expect), fmt, ##__VA_ARGS__) + +static void __init +test_basic(void) +{ + /* Work around annoying "warning: zero-length gnu_printf format string". */ + char nul = '\0'; + + test("", &nul); + test("100%", "100%%"); + test("xxx%yyy", "xxx%cyyy", '%'); + __test("xxx\0yyy", 7, "xxx%cyyy", '\0'); +} + +static void __init +test_number(void) +{ + test("0x1234abcd ", "%#-12x", 0x1234abcd); + test(" 0x1234abcd", "%#12x", 0x1234abcd); + test("0|001| 12|+123| 1234|-123|-1234", "%d|%03d|%3d|%+d|% d|%+d|% d", 0, 1, 12, 123, 1234, -123, -1234); +} + +static void __init +test_string(void) +{ + test("", "%s%.0s", "", "123"); + test("ABCD|abc|123", "%s|%.3s|%.*s", "ABCD", "abcdef", 3, "123456"); + test("1 | 2|3 | 4|5 ", "%-3s|%3s|%-*s|%*s|%*s", "1", "2", 3, "3", 3, "4", -3, "5"); + /* + * POSIX and C99 say that a missing precision should be + * treated as a precision of 0. However, the kernel's printf + * implementation treats this case as if the . wasn't + * present. Let's add a test case documenting the current + * behaviour; should anyone ever feel the need to follow the + * standards more closely, this can be revisited. + */ + test("a||", "%.s|%.0s|%.*s", "a", "b", 0, "c"); + test("a | | ", "%-3.s|%-3.0s|%-3.*s", "a", "b", 0, "c"); +} + +static void __init +plain(void) +{ + test(PTR1_ZEROES PTR1_STR " " PTR2_STR, "%p %p", PTR1, PTR2); + /* + * The field width is overloaded for some %p extensions to + * pass another piece of information. For plain pointers, the + * behaviour is slightly odd: One cannot pass either the 0 + * flag nor a precision to %p without gcc complaining, and if + * one explicitly gives a field width, the number is no longer + * zero-padded. + */ + test("|" PTR1_STR PTR1_SPACES " | " PTR1_SPACES PTR1_STR "|", + "|%-*p|%*p|", PTR_WIDTH+2, PTR1, PTR_WIDTH+2, PTR1); + test("|" PTR2_STR " | " PTR2_STR "|", + "|%-*p|%*p|", PTR_WIDTH+2, PTR2, PTR_WIDTH+2, PTR2); + + /* + * Unrecognized %p extensions are treated as plain %p, but the + * alphanumeric suffix is ignored (that is, does not occur in + * the output.) + */ + test("|"PTR1_ZEROES PTR1_STR"|", "|%p0y|", PTR1); + test("|"PTR2_STR"|", "|%p0y|", PTR2); +} + +static void __init +symbol_ptr(void) +{ +} + +static void __init +kernel_ptr(void) +{ +} + +static void __init +struct_resource(void) +{ +} + +static void __init +addr(void) +{ +} + +static void __init +escaped_str(void) +{ +} + +static void __init +hex_string(void) +{ + const char buf[3] = {0xc0, 0xff, 0xee}; + + test("c0 ff ee|c0:ff:ee|c0-ff-ee|c0ffee", + "%3ph|%3phC|%3phD|%3phN", buf, buf, buf, buf); + test("c0 ff ee|c0:ff:ee|c0-ff-ee|c0ffee", + "%*ph|%*phC|%*phD|%*phN", 3, buf, 3, buf, 3, buf, 3, buf); +} + +static void __init +mac(void) +{ + const u8 addr[6] = {0x2d, 0x48, 0xd6, 0xfc, 0x7a, 0x05}; + + test("2d:48:d6:fc:7a:05", "%pM", addr); + test("05:7a:fc:d6:48:2d", "%pMR", addr); + test("2d-48-d6-fc-7a-05", "%pMF", addr); + test("2d48d6fc7a05", "%pm", addr); + test("057afcd6482d", "%pmR", addr); +} + +static void __init +ip4(void) +{ + struct sockaddr_in sa; + + sa.sin_family = AF_INET; + sa.sin_port = cpu_to_be16(12345); + sa.sin_addr.s_addr = cpu_to_be32(0x7f000001); + + test("127.000.000.001|127.0.0.1", "%pi4|%pI4", &sa.sin_addr, &sa.sin_addr); + test("127.000.000.001|127.0.0.1", "%piS|%pIS", &sa, &sa); + sa.sin_addr.s_addr = cpu_to_be32(0x01020304); + test("001.002.003.004:12345|1.2.3.4:12345", "%piSp|%pISp", &sa, &sa); +} + +static void __init +ip6(void) +{ +} + +static void __init +ip(void) +{ + ip4(); + ip6(); +} + +static void __init +uuid(void) +{ + const char uuid[16] = {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, + 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf}; + + test("00010203-0405-0607-0809-0a0b0c0d0e0f", "%pUb", uuid); + test("00010203-0405-0607-0809-0A0B0C0D0E0F", "%pUB", uuid); + test("03020100-0504-0706-0809-0a0b0c0d0e0f", "%pUl", uuid); + test("03020100-0504-0706-0809-0A0B0C0D0E0F", "%pUL", uuid); +} + +static void __init +dentry(void) +{ +} + +static void __init +struct_va_format(void) +{ +} + +static void __init +struct_clk(void) +{ +} + +static void __init +bitmap(void) +{ + DECLARE_BITMAP(bits, 20); + const int primes[] = {2,3,5,7,11,13,17,19}; + int i; + + bitmap_zero(bits, 20); + test("00000|00000", "%20pb|%*pb", bits, 20, bits); + test("|", "%20pbl|%*pbl", bits, 20, bits); + + for (i = 0; i < ARRAY_SIZE(primes); ++i) + set_bit(primes[i], bits); + test("a28ac|a28ac", "%20pb|%*pb", bits, 20, bits); + test("2-3,5,7,11,13,17,19|2-3,5,7,11,13,17,19", "%20pbl|%*pbl", bits, 20, bits); + + bitmap_fill(bits, 20); + test("fffff|fffff", "%20pb|%*pb", bits, 20, bits); + test("0-19|0-19", "%20pbl|%*pbl", bits, 20, bits); +} + +static void __init +netdev_features(void) +{ +} + +static void __init +test_pointer(void) +{ + plain(); + symbol_ptr(); + kernel_ptr(); + struct_resource(); + addr(); + escaped_str(); + hex_string(); + mac(); + ip(); + uuid(); + dentry(); + struct_va_format(); + struct_clk(); + bitmap(); + netdev_features(); +} + +static int __init +test_printf_init(void) +{ + test_buffer = kmalloc(BUF_SIZE, GFP_KERNEL); + if (!test_buffer) + return -ENOMEM; + + test_basic(); + test_number(); + test_string(); + test_pointer(); + + kfree(test_buffer); + + if (failed_tests == 0) + pr_info("all %u tests passed\n", total_tests); + else + pr_warn("failed %u out of %u tests\n", failed_tests, total_tests); + + return failed_tests ? -EINVAL : 0; +} + +module_init(test_printf_init); + +MODULE_AUTHOR("Rasmus Villemoes "); +MODULE_LICENSE("GPL"); -- cgit v1.2.3-59-g8ed1b From d7ec9a05d6defda8432da574a2a888eed6fc29f6 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 6 Nov 2015 16:30:35 -0800 Subject: lib/vsprintf.c: update documentation %n is no longer just ignored; it results in early return from vsnprintf. Also add a request to add test cases for future %p extensions. Signed-off-by: Rasmus Villemoes Reviewed-by: Martin Kletzander Reviewed-by: Andy Shevchenko Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/printk-formats.txt | 12 ++++++------ lib/vsprintf.c | 7 ++++--- 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt index 9b8d7f746b1a..b784c270105f 100644 --- a/Documentation/printk-formats.txt +++ b/Documentation/printk-formats.txt @@ -23,6 +23,10 @@ Example: Reminder: sizeof() result is of type size_t. +The kernel's printf does not support %n. For obvious reasons, floating +point formats (%e, %f, %g, %a) are also not recognized. Use of any +unsupported specifier or length qualifier results in a WARN and early +return from vsnprintf. Raw pointer value SHOULD be printed with %p. The kernel supports the following extended format specifiers for pointer types: @@ -305,13 +309,9 @@ Command from struct task_struct Passed by reference. -Ignored argument: +If you add other %p extensions, please extend lib/test_printf.c with +one or more test cases, if at all feasible. - %n %n - - The argument passed will be ignored. In other words, literal "%n" will - be in the output and the argument will be considered for next format - specifier. Thank you for your cooperation and attention. diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 7848d53a5134..f9cee8e1233c 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1448,7 +1448,6 @@ int kptr_restrict __read_mostly; * - 'Cn' For a clock, it prints the name (Common Clock Framework) or address * (legacy clock framework) of the clock * - 'Cr' For a clock, it prints the current rate of the clock - * - 'n' For ignored argument * * ** Please update also Documentation/printk-formats.txt when making changes ** * @@ -1814,8 +1813,10 @@ qualifier: * @fmt: The format string to use * @args: Arguments for the format string * - * This function follows C99 vsnprintf, but has some extensions: - * %n is ignored + * This function generally follows C99 vsnprintf, but has some + * extensions and a few limitations: + * + * %n is unsupported * %p* is handled by pointer() * * See pointer() or Documentation/printk-formats.txt for more -- cgit v1.2.3-59-g8ed1b From 1c78bc170f393317dfa9d57baa599a51061ea86a Mon Sep 17 00:00:00 2001 From: Alexander Kuleshov Date: Fri, 6 Nov 2015 16:31:11 -0800 Subject: lib/halfmd4.c: use rol32 inline function in the ROUND macro provides rol32() inline function, let's use already predefined function instead of direct expression. Signed-off-by: Alexander Kuleshov Cc: Herbert Xu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/halfmd4.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/halfmd4.c b/lib/halfmd4.c index a8fe6274a13c..137e861d9690 100644 --- a/lib/halfmd4.c +++ b/lib/halfmd4.c @@ -1,6 +1,7 @@ #include #include #include +#include /* F, G and H are basic MD4 functions: selection, majority, parity */ #define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) @@ -14,7 +15,7 @@ * Rotation is separate from addition to prevent recomputation */ #define ROUND(f, a, b, c, d, x, s) \ - (a += f(b, c, d) + x, a = (a << s) | (a >> (32 - s))) + (a += f(b, c, d) + x, a = rol32(a, s)) #define K1 0 #define K2 013240474631UL #define K3 015666365641UL -- cgit v1.2.3-59-g8ed1b From 943ba6503802a46318cb9f5ab45be31d42e6f884 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 6 Nov 2015 16:31:14 -0800 Subject: lib/test-string_helpers.c: add string_get_size() tests Add a couple of simple tests for string_get_size(). The last one will hang the kernel without the 'lib/string_helpers.c: fix infinite loop in string_get_size()' fix. Signed-off-by: Vitaly Kuznetsov Cc: James Bottomley Cc: Andy Shevchenko Cc: Rasmus Villemoes Cc: "K. Y. Srinivasan" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test-string_helpers.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'lib') diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c index 8e376efd88a4..98866a770770 100644 --- a/lib/test-string_helpers.c +++ b/lib/test-string_helpers.c @@ -326,6 +326,39 @@ out: kfree(out_test); } +#define string_get_size_maxbuf 16 +#define test_string_get_size_one(size, blk_size, units, exp_result) \ + do { \ + BUILD_BUG_ON(sizeof(exp_result) >= string_get_size_maxbuf); \ + __test_string_get_size((size), (blk_size), (units), \ + (exp_result)); \ + } while (0) + + +static __init void __test_string_get_size(const u64 size, const u64 blk_size, + const enum string_size_units units, + const char *exp_result) +{ + char buf[string_get_size_maxbuf]; + + string_get_size(size, blk_size, units, buf, sizeof(buf)); + if (!memcmp(buf, exp_result, strlen(exp_result) + 1)) + return; + + buf[sizeof(buf) - 1] = '\0'; + pr_warn("Test 'test_string_get_size_one' failed!\n"); + pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %d\n", + size, blk_size, units); + pr_warn("expected: '%s', got '%s'\n", exp_result, buf); +} + +static __init void test_string_get_size(void) +{ + test_string_get_size_one(16384, 512, STRING_UNITS_2, "8.00 MiB"); + test_string_get_size_one(8192, 4096, STRING_UNITS_10, "32.7 MB"); + test_string_get_size_one(1, 512, STRING_UNITS_10, "512 B"); +} + static int __init test_string_helpers_init(void) { unsigned int i; @@ -344,6 +377,9 @@ static int __init test_string_helpers_init(void) for (i = 0; i < (ESCAPE_ANY_NP | ESCAPE_HEX) + 1; i++) test_string_escape("escape 1", escape1, i, TEST_STRING_2_DICT_1); + /* Test string_get_size() */ + test_string_get_size(); + return -EINVAL; } module_init(test_string_helpers_init); -- cgit v1.2.3-59-g8ed1b From 2cf12f821cd4f996bfabeec23d8f25e7a2052a28 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Fri, 6 Nov 2015 16:31:17 -0800 Subject: lib/llist.c: fix data race in llist_del_first llist_del_first reads entry->next, but it did not acquire visibility over the entry node. As the result it can get a stale value of entry->next (e.g. NULL or whatever garbage was there before the appending thread wrote correct value). And then commit that value as llist head with cmpxchg. That will corrupt llist. Note there is a control-dependency between read of head->first and read of entry->next, but it does not make the code correct. Kernel memory model unambiguously says: "A load-load control dependency requires a full read memory barrier". Use smp_load_acquire to acquire visibility over the entry node. The data race was found with KernelThreadSanitizer (KTSAN). Here is an example of KTSAN report: ThreadSanitizer: data-race in llist_del_first Read of size 1 by thread T389 (K2630, CPU0): [] llist_del_first+0x39/0x70 lib/llist.c:74 [< inlined >] tty_buffer_alloc drivers/tty/tty_buffer.c:181 [] __tty_buffer_request_room+0xb4/0x250 drivers/tty/tty_buffer.c:292 [] tty_insert_flip_string_fixed_flag+0x6c/0x150 drivers/tty/tty_buffer.c:337 [< inlined >] tty_insert_flip_string include/linux/tty_flip.h:35 [] pty_write+0x72/0xc0 drivers/tty/pty.c:110 [< inlined >] process_output_block drivers/tty/n_tty.c:611 [] n_tty_write+0x346/0x7f0 drivers/tty/n_tty.c:2401 [< inlined >] do_tty_write drivers/tty/tty_io.c:1159 [] tty_write+0x21f/0x3f0 drivers/tty/tty_io.c:1245 [] __vfs_write+0x5f/0x1f0 fs/read_write.c:489 [] vfs_write+0xef/0x280 fs/read_write.c:538 [< inlined >] SYSC_write fs/read_write.c:585 [] SyS_write+0x70/0xe0 fs/read_write.c:577 [] entry_SYSCALL_64_fastpath+0x12/0x71 arch/x86/entry/entry_64.S:186 Previous write of size 8 by thread T226 (K761, CPU0): [] llist_add_batch+0x32/0x70 lib/llist.c:44 (discriminator 16) [< inlined >] llist_add include/linux/llist.h:180 [] tty_buffer_free+0x6c/0xb0 drivers/tty/tty_buffer.c:221 [] flush_to_ldisc+0x107/0x300 drivers/tty/tty_buffer.c:514 [] process_one_work+0x47e/0x930 kernel/workqueue.c:2036 [] worker_thread+0xb0/0x900 kernel/workqueue.c:2170 [] kthread+0x150/0x170 kernel/kthread.c:209 [] ret_from_fork+0x3f/0x70 arch/x86/entry/entry_64.S:526 Signed-off-by: Dmitry Vyukov Reviewed-by: Paul E. McKenney Cc: Rasmus Villemoes Cc: Huang Ying Cc: Konstantin Serebryany Cc: Andrey Konovalov Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/llist.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/llist.c b/lib/llist.c index 0b0e9779d675..ae5872b1df0c 100644 --- a/lib/llist.c +++ b/lib/llist.c @@ -66,12 +66,12 @@ struct llist_node *llist_del_first(struct llist_head *head) { struct llist_node *entry, *old_entry, *next; - entry = head->first; + entry = smp_load_acquire(&head->first); for (;;) { if (entry == NULL) return NULL; old_entry = entry; - next = entry->next; + next = READ_ONCE(entry->next); entry = cmpxchg(&head->first, old_entry, next); if (entry == old_entry) break; -- cgit v1.2.3-59-g8ed1b From 0a9df786a6ae2f898114bdd242b64920dedf53bd Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 6 Nov 2015 16:31:20 -0800 Subject: lib/kasprintf.c: introduce kvasprintf_const This adds kvasprintf_const which tries to use kstrdup_const if possible: If the format string contains no % characters, or if the format string is exactly "%s", we delegate to kstrdup_const. Otherwise, we fall back to kvasprintf. Just as for kstrdup_const, the main motivation is to save memory by reusing .rodata when possible. The return value should be freed by kfree_const, just like for kstrdup_const. There is deliberately no kasprintf_const: In the vast majority of cases, the format string argument is a literal, so one can determine statically whether one could instead use kstrdup_const directly (which would also require one to change all corresponding kfree calls to kfree_const). Signed-off-by: Rasmus Villemoes Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 ++ lib/kasprintf.c | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'lib') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 5582410727cb..2c13f747ac2e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -413,6 +413,8 @@ extern __printf(2, 3) char *kasprintf(gfp_t gfp, const char *fmt, ...); extern __printf(2, 0) char *kvasprintf(gfp_t gfp, const char *fmt, va_list args); +extern __printf(2, 0) +const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list args); extern __scanf(2, 3) int sscanf(const char *, const char *, ...); diff --git a/lib/kasprintf.c b/lib/kasprintf.c index 32f12150fc4f..f194e6e593e1 100644 --- a/lib/kasprintf.c +++ b/lib/kasprintf.c @@ -31,6 +31,22 @@ char *kvasprintf(gfp_t gfp, const char *fmt, va_list ap) } EXPORT_SYMBOL(kvasprintf); +/* + * If fmt contains no % (or is exactly %s), use kstrdup_const. If fmt + * (or the sole vararg) points to rodata, we will then save a memory + * allocation and string copy. In any case, the return value should be + * freed using kfree_const(). + */ +const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list ap) +{ + if (!strchr(fmt, '%')) + return kstrdup_const(fmt, gfp); + if (!strcmp(fmt, "%s")) + return kstrdup_const(va_arg(ap, const char*), gfp); + return kvasprintf(gfp, fmt, ap); +} +EXPORT_SYMBOL(kvasprintf_const); + char *kasprintf(gfp_t gfp, const char *fmt, ...) { va_list ap; -- cgit v1.2.3-59-g8ed1b From f773f32d71a4ed9a645634da107cd249e09e1180 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 6 Nov 2015 16:31:23 -0800 Subject: lib/kobject.c: use kvasprintf_const for formatting ->name Sometimes kobject_set_name_vargs is called with a format string conaining no %, or a format string of precisely "%s", where the single vararg happens to point to .rodata. kvasprintf_const detects these cases for us and returns a copy of that pointer instead of duplicating the string, thus saving some run-time memory. Otherwise, it falls back to kvasprintf. We just need to always deallocate ->name using kfree_const. Unfortunately, the dance we need to do to perform the '/' -> '!' sanitization makes the resulting code rather ugly. I instrumented kstrdup_const to provide some statistics on the memory saved, and for me this gave an additional ~14KB after boot (306KB was already saved; this patch bumped that to 320KB). I have KMALLOC_SHIFT_LOW==3, and since 80% of the kvasprintf_const hits were satisfied by an 8-byte allocation, the 14K would roughly be quadrupled when KMALLOC_SHIFT_LOW==5. Whether these numbers are sufficient to justify the ugliness I'll leave to others to decide. Signed-off-by: Rasmus Villemoes Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/kobject.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index 055407746266..7cbccd2b4c72 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -257,18 +257,32 @@ static int kobject_add_internal(struct kobject *kobj) int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, va_list vargs) { - char *s; + const char *s; if (kobj->name && !fmt) return 0; - s = kvasprintf(GFP_KERNEL, fmt, vargs); + s = kvasprintf_const(GFP_KERNEL, fmt, vargs); if (!s) return -ENOMEM; - /* ewww... some of these buggers have '/' in the name ... */ - strreplace(s, '/', '!'); - kfree(kobj->name); + /* + * ewww... some of these buggers have '/' in the name ... If + * that's the case, we need to make sure we have an actual + * allocated copy to modify, since kvasprintf_const may have + * returned something from .rodata. + */ + if (strchr(s, '/')) { + char *t; + + t = kstrdup(s, GFP_KERNEL); + kfree_const(s); + if (!t) + return -ENOMEM; + strreplace(t, '/', '!'); + s = t; + } + kfree_const(kobj->name); kobj->name = s; return 0; @@ -466,7 +480,7 @@ int kobject_rename(struct kobject *kobj, const char *new_name) envp[0] = devpath_string; envp[1] = NULL; - name = dup_name = kstrdup(new_name, GFP_KERNEL); + name = dup_name = kstrdup_const(new_name, GFP_KERNEL); if (!name) { error = -ENOMEM; goto out; @@ -486,7 +500,7 @@ int kobject_rename(struct kobject *kobj, const char *new_name) kobject_uevent_env(kobj, KOBJ_MOVE, envp); out: - kfree(dup_name); + kfree_const(dup_name); kfree(devpath_string); kfree(devpath); kobject_put(kobj); @@ -634,7 +648,7 @@ static void kobject_cleanup(struct kobject *kobj) /* free name if we allocated it */ if (name) { pr_debug("kobject: '%s': free name\n", name); - kfree(name); + kfree_const(name); } } -- cgit v1.2.3-59-g8ed1b From 90224350eaaf8b8043b19c393048f732bc2e4120 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 6 Nov 2015 16:31:26 -0800 Subject: lib/is_single_threaded.c: change current_is_single_threaded() to use for_each_thread() Change current_is_single_threaded() to use for_each_thread() rather than deprecated while_each_thread(). Signed-off-by: Oleg Nesterov Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/is_single_threaded.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c index bd2bea963364..391fd23976a2 100644 --- a/lib/is_single_threaded.c +++ b/lib/is_single_threaded.c @@ -36,8 +36,7 @@ bool current_is_single_threaded(void) if (unlikely(p == task->group_leader)) continue; - t = p; - do { + for_each_thread(p, t) { if (unlikely(t->mm == mm)) goto found; if (likely(t->mm)) @@ -48,7 +47,7 @@ bool current_is_single_threaded(void) * forked before exiting. */ smp_rmb(); - } while_each_thread(p, t); + } } ret = true; found: -- cgit v1.2.3-59-g8ed1b From 9f029f540c2f7e010e4922d44ba0dfd05da79f88 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 6 Nov 2015 16:31:31 -0800 Subject: lib/hexdump.c: truncate output in case of overflow There is a classical off-by-one error in case when we try to place, for example, 1+1 bytes as hex in the buffer of size 6. The expected result is to get an output truncated, but in the reality we get 6 bytes filed followed by terminating NUL. Change the logic how we fill the output in case of byte dumping into limited space. This will follow the snprintf() behaviour by truncating output even on half bytes. Fixes: 114fc1afb2de (hexdump: make it return number of bytes placed in buffer) Signed-off-by: Andy Shevchenko Reported-by: Aaro Koskinen Tested-by: Aaro Koskinen Cc: Al Viro Cc: Catalin Marinas Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/hexdump.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/hexdump.c b/lib/hexdump.c index 8d74c20d8595..992457b1284c 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -169,11 +169,15 @@ int hex_dump_to_buffer(const void *buf, size_t len, int rowsize, int groupsize, } } else { for (j = 0; j < len; j++) { - if (linebuflen < lx + 3) + if (linebuflen < lx + 2) goto overflow2; ch = ptr[j]; linebuf[lx++] = hex_asc_hi(ch); + if (linebuflen < lx + 2) + goto overflow2; linebuf[lx++] = hex_asc_lo(ch); + if (linebuflen < lx + 2) + goto overflow2; linebuf[lx++] = ' '; } if (j) -- cgit v1.2.3-59-g8ed1b From 7f8306429c4c75f9e2bf39fcfe990b0af2f7292d Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 6 Nov 2015 16:32:55 -0800 Subject: dma-debug: check nents in dma_sync_sg* Like dma_unmap_sg, dma_sync_sg* should be called with the original number of entries passed to dma_map_sg, so do the same check in the sync path as we do in the unmap path. Signed-off-by: Robin Murphy Cc: Arnd Bergmann Cc: Marek Szyprowski Cc: Sumit Semwal Cc: Sakari Ailus Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/dma-debug.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'lib') diff --git a/lib/dma-debug.c b/lib/dma-debug.c index fcb65d2a0b94..8855f019ebe8 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -1249,6 +1249,14 @@ static void check_sync(struct device *dev, dir2name[entry->direction], dir2name[ref->direction]); + if (ref->sg_call_ents && ref->type == dma_debug_sg && + ref->sg_call_ents != entry->sg_call_ents) { + err_printk(ref->dev, entry, "DMA-API: device driver syncs " + "DMA sg list with different entry count " + "[map count=%d] [sync count=%d]\n", + entry->sg_call_ents, ref->sg_call_ents); + } + out: put_hash_bucket(bucket, &flags); } -- cgit v1.2.3-59-g8ed1b