diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/Kconfig | 6 | ||||
-rw-r--r-- | block/Makefile | 4 | ||||
-rw-r--r-- | block/bfq-cgroup.c | 25 | ||||
-rw-r--r-- | block/bfq-iosched.c | 27 | ||||
-rw-r--r-- | block/bfq-iosched.h | 4 | ||||
-rw-r--r-- | block/bfq-wf2q.c | 28 | ||||
-rw-r--r-- | block/bio.c | 12 | ||||
-rw-r--r-- | block/blk-core.c | 6 | ||||
-rw-r--r-- | block/blk-flush.c | 2 | ||||
-rw-r--r-- | block/blk-iocost.c | 2 | ||||
-rw-r--r-- | block/blk-merge.c | 9 | ||||
-rw-r--r-- | block/blk-mq-sched.c | 44 | ||||
-rw-r--r-- | block/blk-mq-tag.c | 4 | ||||
-rw-r--r-- | block/blk-mq-tag.h | 4 | ||||
-rw-r--r-- | block/blk-mq.c | 182 | ||||
-rw-r--r-- | block/blk-mq.h | 5 | ||||
-rw-r--r-- | block/blk-settings.c | 2 | ||||
-rw-r--r-- | block/blk-zoned.c | 2 | ||||
-rw-r--r-- | block/bsg.c | 1 | ||||
-rw-r--r-- | block/compat_ioctl.c | 427 | ||||
-rw-r--r-- | block/genhd.c | 36 | ||||
-rw-r--r-- | block/ioctl.c | 319 | ||||
-rw-r--r-- | block/partition-generic.c | 26 | ||||
-rw-r--r-- | block/partitions/ldm.c | 2 | ||||
-rw-r--r-- | block/scsi_ioctl.c | 214 | ||||
-rw-r--r-- | block/t10-pi.c | 3 |
26 files changed, 713 insertions, 683 deletions
diff --git a/block/Kconfig b/block/Kconfig index c23094a14a2b..3bc76bb113a0 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -66,7 +66,6 @@ config BLK_DEV_BSGLIB config BLK_DEV_INTEGRITY bool "Block layer data integrity support" - select CRC_T10DIF if BLK_DEV_INTEGRITY ---help--- Some storage devices allow extra information to be stored/retrieved to help protect the data. The block layer @@ -77,6 +76,11 @@ config BLK_DEV_INTEGRITY T10/SCSI Data Integrity Field or the T13/ATA External Path Protection. If in doubt, say N. +config BLK_DEV_INTEGRITY_T10 + tristate + depends on BLK_DEV_INTEGRITY + select CRC_T10DIF + config BLK_DEV_ZONED bool "Zoned block device support" select MQ_IOSCHED_DEADLINE diff --git a/block/Makefile b/block/Makefile index 205a5f2fef17..1a43750f4b01 100644 --- a/block/Makefile +++ b/block/Makefile @@ -25,9 +25,9 @@ obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o obj-$(CONFIG_IOSCHED_BFQ) += bfq.o -obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o -obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o +obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o +obj-$(CONFIG_BLK_DEV_INTEGRITY_T10) += t10-pi.o obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o obj-$(CONFIG_BLK_MQ_RDMA) += blk-mq-rdma.o diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index e1419edde2ec..f0ff6654af28 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -332,7 +332,7 @@ static void bfqg_put(struct bfq_group *bfqg) kfree(bfqg); } -static void bfqg_and_blkg_get(struct bfq_group *bfqg) +void bfqg_and_blkg_get(struct bfq_group *bfqg) { /* see comments in bfq_bic_update_cgroup for why refcounting bfqg */ bfqg_get(bfqg); @@ -610,12 +610,13 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, */ entity = &bfqg->entity; for_each_entity(entity) { - bfqg = container_of(entity, struct bfq_group, entity); - if (bfqg != bfqd->root_group) { - parent = bfqg_parent(bfqg); + struct bfq_group *curr_bfqg = container_of(entity, + struct bfq_group, entity); + if (curr_bfqg != bfqd->root_group) { + parent = bfqg_parent(curr_bfqg); if (!parent) parent = bfqd->root_group; - bfq_group_set_parent(bfqg, parent); + bfq_group_set_parent(curr_bfqg, parent); } } @@ -651,9 +652,15 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_bfqq_expire(bfqd, bfqd->in_service_queue, false, BFQQE_PREEMPTED); + /* + * get extra reference to prevent bfqq from being freed in + * next possible deactivate + */ + bfqq->ref++; + if (bfq_bfqq_busy(bfqq)) bfq_deactivate_bfqq(bfqd, bfqq, false, false); - else if (entity->on_st) + else if (entity->on_st_or_in_serv) bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); bfqg_and_blkg_put(bfqq_group(bfqq)); @@ -670,6 +677,8 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (!bfqd->in_service_queue && !bfqd->rq_in_driver) bfq_schedule_dispatch(bfqd); + /* release extra ref taken above */ + bfq_put_queue(bfqq); } /** @@ -1398,6 +1407,10 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq) return bfqq->bfqd->root_group; } +void bfqg_and_blkg_get(struct bfq_group *bfqg) {} + +void bfqg_and_blkg_put(struct bfq_group *bfqg) {} + struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) { struct bfq_group *bfqg; diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index ad4af4aaf2ce..8c436abfaf14 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -427,7 +427,6 @@ void bfq_schedule_dispatch(struct bfq_data *bfqd) } #define bfq_class_idle(bfqq) ((bfqq)->ioprio_class == IOPRIO_CLASS_IDLE) -#define bfq_class_rt(bfqq) ((bfqq)->ioprio_class == IOPRIO_CLASS_RT) #define bfq_sample_valid(samples) ((samples) > 80) @@ -614,6 +613,10 @@ bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfqq->pos_root = NULL; } + /* oom_bfqq does not participate in queue merging */ + if (bfqq == &bfqd->oom_bfqq) + return; + /* * bfqq cannot be merged any longer (see comments in * bfq_setup_cooperator): no point in adding bfqq into the @@ -1056,7 +1059,7 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, static int bfqq_process_refs(struct bfq_queue *bfqq) { - return bfqq->ref - bfqq->allocated - bfqq->entity.on_st - + return bfqq->ref - bfqq->allocated - bfqq->entity.on_st_or_in_serv - (bfqq->weight_counter != NULL); } @@ -3444,6 +3447,10 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq) static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd, struct bfq_queue *bfqq) { + /* No point in idling for bfqq if it won't get requests any longer */ + if (unlikely(!bfqq_process_refs(bfqq))) + return false; + return (bfqq->wr_coeff > 1 && (bfqd->wr_busy_queues < bfq_tot_busy_queues(bfqd) || @@ -4077,6 +4084,10 @@ static bool idling_boosts_thr_without_issues(struct bfq_data *bfqd, bfqq_sequential_and_IO_bound, idling_boosts_thr; + /* No point in idling for bfqq if it won't get requests any longer */ + if (unlikely(!bfqq_process_refs(bfqq))) + return false; + bfqq_sequential_and_IO_bound = !BFQQ_SEEKY(bfqq) && bfq_bfqq_IO_bound(bfqq) && bfq_bfqq_has_short_ttime(bfqq); @@ -4170,6 +4181,10 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq) struct bfq_data *bfqd = bfqq->bfqd; bool idling_boosts_thr_with_no_issue, idling_needed_for_service_guar; + /* No point in idling for bfqq if it won't get requests any longer */ + if (unlikely(!bfqq_process_refs(bfqq))) + return false; + if (unlikely(bfqd->strict_guarantees)) return true; @@ -4810,9 +4825,7 @@ void bfq_put_queue(struct bfq_queue *bfqq) { struct bfq_queue *item; struct hlist_node *n; -#ifdef CONFIG_BFQ_GROUP_IOSCHED struct bfq_group *bfqg = bfqq_group(bfqq); -#endif if (bfqq->bfqd) bfq_log_bfqq(bfqq->bfqd, bfqq, "put_queue: %p %d", @@ -4885,9 +4898,7 @@ void bfq_put_queue(struct bfq_queue *bfqq) bfqq->bfqd->last_completed_rq_bfqq = NULL; kmem_cache_free(bfq_pool, bfqq); -#ifdef CONFIG_BFQ_GROUP_IOSCHED bfqg_and_blkg_put(bfqg); -#endif } static void bfq_put_cooperator(struct bfq_queue *bfqq) @@ -5968,6 +5979,8 @@ static void bfq_finish_requeue_request(struct request *rq) } /* + * Removes the association between the current task and bfqq, assuming + * that bic points to the bfq iocontext of the task. * Returns NULL if a new bfqq should be allocated, or the old bfqq if this * was the last process referring to that bfqq. */ @@ -6375,10 +6388,10 @@ static void bfq_exit_queue(struct elevator_queue *e) hrtimer_cancel(&bfqd->idle_slice_timer); -#ifdef CONFIG_BFQ_GROUP_IOSCHED /* release oom-queue reference to root group */ bfqg_and_blkg_put(bfqd->root_group); +#ifdef CONFIG_BFQ_GROUP_IOSCHED blkcg_deactivate_policy(bfqd->queue, &blkcg_policy_bfq); #else spin_lock_irq(&bfqd->lock); diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 8526f20c53bc..d1233af9c684 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -150,7 +150,7 @@ struct bfq_entity { * Flag, true if the entity is on a tree (either the active or * the idle one of its service_tree) or is in service. */ - bool on_st; + bool on_st_or_in_serv; /* B-WF2Q+ start and finish timestamps [sectors/weight] */ u64 start, finish; @@ -921,6 +921,7 @@ struct bfq_group { #else struct bfq_group { + struct bfq_entity entity; struct bfq_sched_data sched_data; struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; @@ -984,6 +985,7 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); struct bfq_group *bfqq_group(struct bfq_queue *bfqq); struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node); +void bfqg_and_blkg_get(struct bfq_group *bfqg); void bfqg_and_blkg_put(struct bfq_group *bfqg); #ifdef CONFIG_BFQ_GROUP_IOSCHED diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index 05f0bf4a1144..eb0e2a6daabe 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -277,10 +277,7 @@ struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity) */ static u64 bfq_delta(unsigned long service, unsigned long weight) { - u64 d = (u64)service << WFQ_SERVICE_SHIFT; - - do_div(d, weight); - return d; + return div64_ul((u64)service << WFQ_SERVICE_SHIFT, weight); } /** @@ -536,7 +533,9 @@ static void bfq_get_entity(struct bfq_entity *entity) bfqq->ref++; bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d", bfqq, bfqq->ref); - } + } else + bfqg_and_blkg_get(container_of(entity, struct bfq_group, + entity)); } /** @@ -648,10 +647,16 @@ static void bfq_forget_entity(struct bfq_service_tree *st, { struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - entity->on_st = false; + entity->on_st_or_in_serv = false; st->wsum -= entity->weight; - if (bfqq && !is_in_service) + if (is_in_service) + return; + + if (bfqq) bfq_put_queue(bfqq); + else + bfqg_and_blkg_put(container_of(entity, struct bfq_group, + entity)); } /** @@ -1002,7 +1007,7 @@ static void __bfq_activate_entity(struct bfq_entity *entity, */ bfq_get_entity(entity); - entity->on_st = true; + entity->on_st_or_in_serv = true; } #ifdef CONFIG_BFQ_GROUP_IOSCHED @@ -1168,7 +1173,10 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree) struct bfq_service_tree *st; bool is_in_service; - if (!entity->on_st) /* entity never activated, or already inactive */ + if (!entity->on_st_or_in_serv) /* + * entity never activated, or + * already inactive + */ return false; /* @@ -1623,7 +1631,7 @@ bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) * service tree either, then release the service reference to * the queue it represents (taken with bfq_get_entity). */ - if (!in_serv_entity->on_st) { + if (!in_serv_entity->on_st_or_in_serv) { /* * If no process is referencing in_serv_bfqq any * longer, then the service reference may be the only diff --git a/block/bio.c b/block/bio.c index 006bcc52a77e..94d697217887 100644 --- a/block/bio.c +++ b/block/bio.c @@ -538,6 +538,16 @@ void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start) } EXPORT_SYMBOL(zero_fill_bio_iter); +/** + * bio_truncate - truncate the bio to small size of @new_size + * @bio: the bio to be truncated + * @new_size: new size for truncating the bio + * + * Description: + * Truncate the bio to new size of @new_size. If bio_op(bio) is + * REQ_OP_READ, zero the truncated part. This function should only + * be used for handling corner cases, such as bio eod. + */ void bio_truncate(struct bio *bio, unsigned new_size) { struct bio_vec bv; @@ -548,7 +558,7 @@ void bio_truncate(struct bio *bio, unsigned new_size) if (new_size >= bio->bi_iter.bi_size) return; - if (bio_data_dir(bio) != READ) + if (bio_op(bio) != REQ_OP_READ) goto exit; bio_for_each_segment(bv, bio, iter) { diff --git a/block/blk-core.c b/block/blk-core.c index 089e890ab208..60dc9552ef8d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1663,12 +1663,6 @@ int kblockd_schedule_work(struct work_struct *work) } EXPORT_SYMBOL(kblockd_schedule_work); -int kblockd_schedule_work_on(int cpu, struct work_struct *work) -{ - return queue_work_on(cpu, kblockd_workqueue, work); -} -EXPORT_SYMBOL(kblockd_schedule_work_on); - int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay) { diff --git a/block/blk-flush.c b/block/blk-flush.c index 3f977c517960..5cc775bdb06a 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -412,7 +412,7 @@ void blk_insert_flush(struct request *rq) */ if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { - blk_mq_request_bypass_insert(rq, false); + blk_mq_request_bypass_insert(rq, false, false); return; } diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 27ca68621137..9a599cc28c29 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1318,7 +1318,7 @@ static bool iocg_is_idle(struct ioc_gq *iocg) return false; /* is something in flight? */ - if (atomic64_read(&iocg->done_vtime) < atomic64_read(&iocg->vtime)) + if (atomic64_read(&iocg->done_vtime) != atomic64_read(&iocg->vtime)) return false; return true; diff --git a/block/blk-merge.c b/block/blk-merge.c index 347782a24a35..1534ed736363 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -164,8 +164,13 @@ static inline unsigned get_max_segment_size(const struct request_queue *q, unsigned long mask = queue_segment_boundary(q); offset = mask & (page_to_phys(start_page) + offset); - return min_t(unsigned long, mask - offset + 1, - queue_max_segment_size(q)); + + /* + * overflow may be triggered in case of zero page physical address + * on 32bit arch, use queue's max segment size when that happens. + */ + return min_not_zero(mask - offset + 1, + (unsigned long)queue_max_segment_size(q)); } /** diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index ca22afd47b3d..74cedea56034 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -361,13 +361,19 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, bool has_sched, struct request *rq) { - /* dispatch flush rq directly */ - if (rq->rq_flags & RQF_FLUSH_SEQ) { - spin_lock(&hctx->lock); - list_add(&rq->queuelist, &hctx->dispatch); - spin_unlock(&hctx->lock); + /* + * dispatch flush and passthrough rq directly + * + * passthrough request has to be added to hctx->dispatch directly. + * For some reason, device may be in one situation which can't + * handle FS request, so STS_RESOURCE is always returned and the + * FS request will be added to hctx->dispatch. However passthrough + * request may be required at that time for fixing the problem. If + * passthrough request is added to scheduler queue, there isn't any + * chance to dispatch it given we prioritize requests in hctx->dispatch. + */ + if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq)) return true; - } if (has_sched) rq->rq_flags |= RQF_SORTED; @@ -391,8 +397,32 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head, WARN_ON(e && (rq->tag != -1)); - if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) + if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) { + /* + * Firstly normal IO request is inserted to scheduler queue or + * sw queue, meantime we add flush request to dispatch queue( + * hctx->dispatch) directly and there is at most one in-flight + * flush request for each hw queue, so it doesn't matter to add + * flush request to tail or front of the dispatch queue. + * + * Secondly in case of NCQ, flush request belongs to non-NCQ + * command, and queueing it will fail when there is any + * in-flight normal IO request(NCQ command). When adding flush + * rq to the front of hctx->dispatch, it is easier to introduce + * extra time to flush rq's latency because of S_SCHED_RESTART + * compared with adding to the tail of dispatch queue, then + * chance of flush merge is increased, and less flush requests + * will be issued to controller. It is observed that ~10% time + * is saved in blktests block/004 on disk attached to AHCI/NCQ + * drive when adding flush rq to the front of hctx->dispatch. + * + * Simply queue flush rq to the front of hctx->dispatch so that + * intensive flush workloads can benefit in case of NCQ HW. + */ + at_head = (rq->rq_flags & RQF_FLUSH_SEQ) ? true : at_head; + blk_mq_request_bypass_insert(rq, at_head, false); goto run; + } if (e && e->type->ops.insert_requests) { LIST_HEAD(list); diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index fbacde454718..586c9d6e904a 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -183,8 +183,8 @@ found_tag: return tag + tag_offset; } -void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags, - struct blk_mq_ctx *ctx, unsigned int tag) +void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, + unsigned int tag) { if (!blk_mq_tag_is_reserved(tags, tag)) { const int real_tag = tag - tags->nr_reserved_tags; diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 15bc74acb57e..2b8321efb682 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -26,8 +26,8 @@ extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int r extern void blk_mq_free_tags(struct blk_mq_tags *tags); extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); -extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags, - struct blk_mq_ctx *ctx, unsigned int tag); +extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, + unsigned int tag); extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags **tags, unsigned int depth, bool can_grow); diff --git a/block/blk-mq.c b/block/blk-mq.c index 323c9cb28066..d92088dec6c3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -477,9 +477,9 @@ static void __blk_mq_free_request(struct request *rq) blk_pm_mark_last_busy(rq); rq->mq_hctx = NULL; if (rq->tag != -1) - blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag); + blk_mq_put_tag(hctx->tags, ctx, rq->tag); if (sched_tag != -1) - blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag); + blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag); blk_mq_sched_restart(hctx); blk_queue_exit(q); } @@ -641,6 +641,14 @@ bool blk_mq_complete_request(struct request *rq) } EXPORT_SYMBOL(blk_mq_complete_request); +/** + * blk_mq_start_request - Start processing a request + * @rq: Pointer to request to be started + * + * Function used by device drivers to notify the block layer that a request + * is going to be processed now, so blk layer can do proper initializations + * such as starting the timeout timer. + */ void blk_mq_start_request(struct request *rq) { struct request_queue *q = rq->q; @@ -727,7 +735,7 @@ static void blk_mq_requeue_work(struct work_struct *work) * merge. */ if (rq->rq_flags & RQF_DONTPREP) - blk_mq_request_bypass_insert(rq, false); + blk_mq_request_bypass_insert(rq, false, false); else blk_mq_sched_insert_request(rq, true, false, false); } @@ -1278,7 +1286,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, q->mq_ops->commit_rqs(hctx); spin_lock(&hctx->lock); - list_splice_init(list, &hctx->dispatch); + list_splice_tail_init(list, &hctx->dispatch); spin_unlock(&hctx->lock); /* @@ -1327,6 +1335,12 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, return (queued + errors) != 0; } +/** + * __blk_mq_run_hw_queue - Run a hardware queue. + * @hctx: Pointer to the hardware queue to run. + * + * Send pending requests to the hardware. + */ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) { int srcu_idx; @@ -1424,6 +1438,15 @@ select_cpu: return next_cpu; } +/** + * __blk_mq_delay_run_hw_queue - Run (or schedule to run) a hardware queue. + * @hctx: Pointer to the hardware queue to run. + * @async: If we want to run the queue asynchronously. + * @msecs: Microseconds of delay to wait before running the queue. + * + * If !@async, try to run the queue now. Else, run the queue asynchronously and + * with a delay of @msecs. + */ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async, unsigned long msecs) { @@ -1445,12 +1468,28 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async, msecs_to_jiffies(msecs)); } +/** + * blk_mq_delay_run_hw_queue - Run a hardware queue asynchronously. + * @hctx: Pointer to the hardware queue to run. + * @msecs: Microseconds of delay to wait before running the queue. + * + * Run a hardware queue asynchronously with a delay of @msecs. + */ void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) { __blk_mq_delay_run_hw_queue(hctx, true, msecs); } EXPORT_SYMBOL(blk_mq_delay_run_hw_queue); +/** + * blk_mq_run_hw_queue - Start to run a hardware queue. + * @hctx: Pointer to the hardware queue to run. + * @async: If we want to run the queue asynchronously. + * + * Check if the request queue is not in a quiesced state and if there are + * pending requests to be sent. If this is true, run the queue to send requests + * to hardware. + */ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) { int srcu_idx; @@ -1474,6 +1513,11 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) } EXPORT_SYMBOL(blk_mq_run_hw_queue); +/** + * blk_mq_run_hw_queue - Run all hardware queues in a request queue. + * @q: Pointer to the request queue to run. + * @async: If we want to run the queue asynchronously. + */ void blk_mq_run_hw_queues(struct request_queue *q, bool async) { struct blk_mq_hw_ctx *hctx; @@ -1625,16 +1669,24 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, blk_mq_hctx_mark_pending(hctx, ctx); } -/* +/** + * blk_mq_request_bypass_insert - Insert a request at dispatch list. + * @rq: Pointer to request to be inserted. + * @run_queue: If we should run the hardware queue after inserting the request. + * * Should only be used carefully, when the caller knows we want to * bypass a potential IO scheduler on the target device. */ -void blk_mq_request_bypass_insert(struct request *rq, bool run_queue) +void blk_mq_request_bypass_insert(struct request *rq, bool at_head, + bool run_queue) { struct blk_mq_hw_ctx *hctx = rq->mq_hctx; spin_lock(&hctx->lock); - list_add_tail(&rq->queuelist, &hctx->dispatch); + if (at_head) + list_add(&rq->queuelist, &hctx->dispatch); + else + list_add_tail(&rq->queuelist, &hctx->dispatch); spin_unlock(&hctx->lock); if (run_queue) @@ -1668,28 +1720,20 @@ static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b) struct request *rqa = container_of(a, struct request, queuelist); struct request *rqb = container_of(b, struct request, queuelist); - if (rqa->mq_ctx < rqb->mq_ctx) - return -1; - else if (rqa->mq_ctx > rqb->mq_ctx) - return 1; - else if (rqa->mq_hctx < rqb->mq_hctx) - return -1; - else if (rqa->mq_hctx > rqb->mq_hctx) - return 1; + if (rqa->mq_ctx != rqb->mq_ctx) + return rqa->mq_ctx > rqb->mq_ctx; + if (rqa->mq_hctx != rqb->mq_hctx) + return rqa->mq_hctx > rqb->mq_hctx; return blk_rq_pos(rqa) > blk_rq_pos(rqb); } void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) { - struct blk_mq_hw_ctx *this_hctx; - struct blk_mq_ctx *this_ctx; - struct request_queue *this_q; - struct request *rq; LIST_HEAD(list); - LIST_HEAD(rq_list); - unsigned int depth; + if (list_empty(&plug->mq_list)) + return; list_splice_init(&plug->mq_list, &list); if (plug->rq_count > 2 && plug->multiple_queues) @@ -1697,42 +1741,27 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) plug->rq_count = 0; - this_q = NULL; - this_hctx = NULL; - this_ctx = NULL; - depth = 0; - - while (!list_empty(&list)) { - rq = list_entry_rq(list.next); - list_del_init(&rq->queuelist); - BUG_ON(!rq->q); - if (rq->mq_hctx != this_hctx || rq->mq_ctx != this_ctx) { - if (this_hctx) { - trace_block_unplug(this_q, depth, !from_schedule); - blk_mq_sched_insert_requests(this_hctx, this_ctx, - &rq_list, - from_schedule); - } - - this_q = rq->q; - this_ctx = rq->mq_ctx; - this_hctx = rq->mq_hctx; - depth = 0; + do { + struct list_head rq_list; + struct request *rq, *head_rq = list_entry_rq(list.next); + struct list_head *pos = &head_rq->queuelist; /* skip first */ + struct blk_mq_hw_ctx *this_hctx = head_rq->mq_hctx; + struct blk_mq_ctx *this_ctx = head_rq->mq_ctx; + unsigned int depth = 1; + + list_for_each_continue(pos, &list) { + rq = list_entry_rq(pos); + BUG_ON(!rq->q); + if (rq->mq_hctx != this_hctx || rq->mq_ctx != this_ctx) + break; + depth++; } - depth++; - list_add_tail(&rq->queuelist, &rq_list); - } - - /* - * If 'this_hctx' is set, we know we have entries to complete - * on 'rq_list'. Do those. - */ - if (this_hctx) { - trace_block_unplug(this_q, depth, !from_schedule); + list_cut_before(&rq_list, &list, pos); + trace_block_unplug(head_rq->q, depth, !from_schedule); blk_mq_sched_insert_requests(this_hctx, this_ctx, &rq_list, from_schedule); - } + } while(!list_empty(&list)); } static void blk_mq_bio_to_request(struct request *rq, struct bio *bio, @@ -1824,10 +1853,21 @@ insert: if (bypass_insert) return BLK_STS_RESOURCE; - blk_mq_request_bypass_insert(rq, run_queue); + blk_mq_request_bypass_insert(rq, false, run_queue); return BLK_STS_OK; } +/** + * blk_mq_try_issue_directly - Try to send a request directly to device driver. + * @hctx: Pointer of the associated hardware queue. + * @rq: Pointer to request to be sent. + * @cookie: Request queue cookie. + * + * If the device has enough resources to accept a new request now, send the + * request directly to device driver. Else, insert at hctx->dispatch queue, so + * we can try send it another time in the future. Requests inserted at this + * queue have higher priority. + */ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, struct request *rq, blk_qc_t *cookie) { @@ -1840,7 +1880,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true); if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) - blk_mq_request_bypass_insert(rq, true); + blk_mq_request_bypass_insert(rq, false, true); else if (ret != BLK_STS_OK) blk_mq_end_request(rq, ret); @@ -1874,7 +1914,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, if (ret != BLK_STS_OK) { if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) { - blk_mq_request_bypass_insert(rq, + blk_mq_request_bypass_insert(rq, false, list_empty(list)); break; } @@ -1905,6 +1945,22 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) } } +/** + * blk_mq_make_request - Create and send a request to block device. + * @q: Request queue pointer. + * @bio: Bio pointer. + * + * Builds up a request structure from @q and @bio and send to the device. The + * request may not be queued directly to hardware if: + * * This request can be merged with another one + * * We want to place request at plug queue for possible future merging + * * There is an IO scheduler active at this queue + * + * It will not queue the request if there is an error with the bio, or at the + * request creation. + * + * Returns: Request queue cookie. + */ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) { const int is_sync = op_is_sync(bio->bi_opf); @@ -1950,7 +2006,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) plug = blk_mq_plug(q, bio); if (unlikely(is_flush_fua)) { - /* bypass scheduler for flush rq */ + /* Bypass scheduler for flush requests */ blk_insert_flush(rq); blk_mq_run_hw_queue(data.hctx, true); } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs || @@ -1978,6 +2034,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_add_rq_to_plug(plug, rq); } else if (q->elevator) { + /* Insert the request at the IO scheduler queue */ blk_mq_sched_insert_request(rq, false, true, true); } else if (plug && !blk_queue_nomerges(q)) { /* @@ -2004,8 +2061,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) } } else if ((q->nr_hw_queues > 1 && is_sync) || !data.hctx->dispatch_busy) { + /* + * There is no scheduler and we can try to send directly + * to the hardware. + */ blk_mq_try_issue_directly(data.hctx, rq, &cookie); } else { + /* Default case. */ blk_mq_sched_insert_request(rq, false, true, true); } @@ -3340,7 +3402,6 @@ static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb) } static unsigned long blk_mq_poll_nsecs(struct request_queue *q, - struct blk_mq_hw_ctx *hctx, struct request *rq) { unsigned long ret = 0; @@ -3373,7 +3434,6 @@ static unsigned long blk_mq_poll_nsecs(struct request_queue *q, } static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, - struct blk_mq_hw_ctx *hctx, struct request *rq) { struct hrtimer_sleeper hs; @@ -3393,7 +3453,7 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, if (q->poll_nsec > 0) nsecs = q->poll_nsec; else - nsecs = blk_mq_poll_nsecs(q, hctx, rq); + nsecs = blk_mq_poll_nsecs(q, rq); if (!nsecs) return false; @@ -3448,7 +3508,7 @@ static bool blk_mq_poll_hybrid(struct request_queue *q, return false; } - return blk_mq_poll_hybrid_sleep(q, hctx, rq); + return blk_mq_poll_hybrid_sleep(q, rq); } /** diff --git a/block/blk-mq.h b/block/blk-mq.h index eaaca8fc1c28..10bfdfb494fa 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -66,7 +66,8 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, */ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bool at_head); -void blk_mq_request_bypass_insert(struct request *rq, bool run_queue); +void blk_mq_request_bypass_insert(struct request *rq, bool at_head, + bool run_queue); void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct list_head *list); @@ -199,7 +200,7 @@ static inline bool blk_mq_get_dispatch_budget(struct blk_mq_hw_ctx *hctx) static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, struct request *rq) { - blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag); + blk_mq_put_tag(hctx->tags, rq->mq_ctx, rq->tag); rq->tag = -1; if (rq->rq_flags & RQF_MQ_INFLIGHT) { diff --git a/block/blk-settings.c b/block/blk-settings.c index 5f6dcc7a47bd..c8eda2e7b91e 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -328,7 +328,7 @@ EXPORT_SYMBOL(blk_queue_max_segment_size); * storage device can address. The default of 512 covers most * hardware. **/ -void blk_queue_logical_block_size(struct request_queue *q, unsigned short size) +void blk_queue_logical_block_size(struct request_queue *q, unsigned int size) { q->limits.logical_block_size = size; diff --git a/block/blk-zoned.c b/block/blk-zoned.c index d00fcfd71dfe..05741c6f618b 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -198,7 +198,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, break; } - bio->bi_opf = op; + bio->bi_opf = op | REQ_SYNC; bio->bi_iter.bi_sector = sector; sector += zone_sectors; diff --git a/block/bsg.c b/block/bsg.c index 833c44b3d458..d7bae94b64d9 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -382,6 +382,7 @@ static const struct file_operations bsg_fops = { .open = bsg_open, .release = bsg_release, .unlocked_ioctl = bsg_ioctl, + .compat_ioctl = compat_ptr_ioctl, .owner = THIS_MODULE, .llseek = default_llseek, }; diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c deleted file mode 100644 index 3ed7a0f144a9..000000000000 --- a/block/compat_ioctl.c +++ /dev/null @@ -1,427 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/blkdev.h> -#include <linux/blkpg.h> -#include <linux/blktrace_api.h> -#include <linux/cdrom.h> -#include <linux/compat.h> -#include <linux/elevator.h> -#include <linux/hdreg.h> -#include <linux/pr.h> -#include <linux/slab.h> -#include <linux/syscalls.h> -#include <linux/types.h> -#include <linux/uaccess.h> - -static int compat_put_ushort(unsigned long arg, unsigned short val) -{ - return put_user(val, (unsigned short __user *)compat_ptr(arg)); -} - -static int compat_put_int(unsigned long arg, int val) -{ - return put_user(val, (compat_int_t __user *)compat_ptr(arg)); -} - -static int compat_put_uint(unsigned long arg, unsigned int val) -{ - return put_user(val, (compat_uint_t __user *)compat_ptr(arg)); -} - -static int compat_put_long(unsigned long arg, long val) -{ - return put_user(val, (compat_long_t __user *)compat_ptr(arg)); -} - -static int compat_put_ulong(unsigned long arg, compat_ulong_t val) -{ - return put_user(val, (compat_ulong_t __user *)compat_ptr(arg)); -} - -static int compat_put_u64(unsigned long arg, u64 val) -{ - return put_user(val, (compat_u64 __user *)compat_ptr(arg)); -} - -struct compat_hd_geometry { - unsigned char heads; - unsigned char sectors; - unsigned short cylinders; - u32 start; -}; - -static int compat_hdio_getgeo(struct gendisk *disk, struct block_device *bdev, - struct compat_hd_geometry __user *ugeo) -{ - struct hd_geometry geo; - int ret; - - if (!ugeo) - return -EINVAL; - if (!disk->fops->getgeo) - return -ENOTTY; - - memset(&geo, 0, sizeof(geo)); - /* - * We need to set the startsect first, the driver may - * want to override it. - */ - geo.start = get_start_sect(bdev); - ret = disk->fops->getgeo(bdev, &geo); - if (ret) - return ret; - - ret = copy_to_user(ugeo, &geo, 4); - ret |= put_user(geo.start, &ugeo->start); - if (ret) - ret = -EFAULT; - - return ret; -} - -static int compat_hdio_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - unsigned long __user *p; - int error; - - p = compat_alloc_user_space(sizeof(unsigned long)); - error = __blkdev_driver_ioctl(bdev, mode, - cmd, (unsigned long)p); - if (error == 0) { - unsigned int __user *uvp = compat_ptr(arg); - unsigned long v; - if (get_user(v, p) || put_user(v, uvp)) - error = -EFAULT; - } - return error; -} - -struct compat_cdrom_read_audio { - union cdrom_addr addr; - u8 addr_format; - compat_int_t nframes; - compat_caddr_t buf; -}; - -struct compat_cdrom_generic_command { - unsigned char cmd[CDROM_PACKET_SIZE]; - compat_caddr_t buffer; - compat_uint_t buflen; - compat_int_t stat; - compat_caddr_t sense; - unsigned char data_direction; - compat_int_t quiet; - compat_int_t timeout; - compat_caddr_t reserved[1]; -}; - -static int compat_cdrom_read_audio(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - struct cdrom_read_audio __user *cdread_audio; - struct compat_cdrom_read_audio __user *cdread_audio32; - __u32 data; - void __user *datap; - - cdread_audio = compat_alloc_user_space(sizeof(*cdread_audio)); - cdread_audio32 = compat_ptr(arg); - - if (copy_in_user(&cdread_audio->addr, - &cdread_audio32->addr, - (sizeof(*cdread_audio32) - - sizeof(compat_caddr_t)))) - return -EFAULT; - - if (get_user(data, &cdread_audio32->buf)) - return -EFAULT; - datap = compat_ptr(data); - if (put_user(datap, &cdread_audio->buf)) - return -EFAULT; - - return __blkdev_driver_ioctl(bdev, mode, cmd, - (unsigned long)cdread_audio); -} - -static int compat_cdrom_generic_command(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - struct cdrom_generic_command __user *cgc; - struct compat_cdrom_generic_command __user *cgc32; - u32 data; - unsigned char dir; - int itmp; - - cgc = compat_alloc_user_space(sizeof(*cgc)); - cgc32 = compat_ptr(arg); - - if (copy_in_user(&cgc->cmd, &cgc32->cmd, sizeof(cgc->cmd)) || - get_user(data, &cgc32->buffer) || - put_user(compat_ptr(data), &cgc->buffer) || - copy_in_user(&cgc->buflen, &cgc32->buflen, - (sizeof(unsigned int) + sizeof(int))) || - get_user(data, &cgc32->sense) || - put_user(compat_ptr(data), &cgc->sense) || - get_user(dir, &cgc32->data_direction) || - put_user(dir, &cgc->data_direction) || - get_user(itmp, &cgc32->quiet) || - put_user(itmp, &cgc->quiet) || - get_user(itmp, &cgc32->timeout) || - put_user(itmp, &cgc->timeout) || - get_user(data, &cgc32->reserved[0]) || - put_user(compat_ptr(data), &cgc->reserved[0])) - return -EFAULT; - - return __blkdev_driver_ioctl(bdev, mode, cmd, (unsigned long)cgc); -} - -struct compat_blkpg_ioctl_arg { - compat_int_t op; - compat_int_t flags; - compat_int_t datalen; - compat_caddr_t data; -}; - -static int compat_blkpg_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, struct compat_blkpg_ioctl_arg __user *ua32) -{ - struct blkpg_ioctl_arg __user *a = compat_alloc_user_space(sizeof(*a)); - compat_caddr_t udata; - compat_int_t n; - int err; - - err = get_user(n, &ua32->op); - err |= put_user(n, &a->op); - err |= get_user(n, &ua32->flags); - err |= put_user(n, &a->flags); - err |= get_user(n, &ua32->datalen); - err |= put_user(n, &a->datalen); - err |= get_user(udata, &ua32->data); - err |= put_user(compat_ptr(udata), &a->data); - if (err) - return err; - - return blkdev_ioctl(bdev, mode, cmd, (unsigned long)a); -} - -#define BLKBSZGET_32 _IOR(0x12, 112, int) -#define BLKBSZSET_32 _IOW(0x12, 113, int) -#define BLKGETSIZE64_32 _IOR(0x12, 114, int) - -static int compat_blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode, - unsigned cmd, unsigned long arg) -{ - switch (cmd) { - case HDIO_GET_UNMASKINTR: - case HDIO_GET_MULTCOUNT: - case HDIO_GET_KEEPSETTINGS: - case HDIO_GET_32BIT: - case HDIO_GET_NOWERR: - case HDIO_GET_DMA: - case HDIO_GET_NICE: - case HDIO_GET_WCACHE: - case HDIO_GET_ACOUSTIC: - case HDIO_GET_ADDRESS: - case HDIO_GET_BUSSTATE: - return compat_hdio_ioctl(bdev, mode, cmd, arg); - case CDROMREADAUDIO: - return compat_cdrom_read_audio(bdev, mode, cmd, arg); - case CDROM_SEND_PACKET: - return compat_cdrom_generic_command(bdev, mode, cmd, arg); - - /* - * No handler required for the ones below, we just need to - * convert arg to a 64 bit pointer. - */ - case BLKSECTSET: - /* - * 0x03 -- HD/IDE ioctl's used by hdparm and friends. - * Some need translations, these do not. - */ - case HDIO_GET_IDENTITY: - case HDIO_DRIVE_TASK: - case HDIO_DRIVE_CMD: - /* 0x330 is reserved -- it used to be HDIO_GETGEO_BIG */ - case 0x330: - /* CDROM stuff */ - case CDROMPAUSE: - case CDROMRESUME: - case CDROMPLAYMSF: - case CDROMPLAYTRKIND: - case CDROMREADTOCHDR: - case CDROMREADTOCENTRY: - case CDROMSTOP: - case CDROMSTART: - case CDROMEJECT: - case CDROMVOLCTRL: - case CDROMSUBCHNL: - case CDROMMULTISESSION: - case CDROM_GET_MCN: - case CDROMRESET: - case CDROMVOLREAD: - case CDROMSEEK: - case CDROMPLAYBLK: - case CDROMCLOSETRAY: - case CDROM_DISC_STATUS: - case CDROM_CHANGER_NSLOTS: - case CDROM_GET_CAPABILITY: - /* Ignore cdrom.h about these next 5 ioctls, they absolutely do - * not take a struct cdrom_read, instead they take a struct cdrom_msf - * which is compatible. - */ - case CDROMREADMODE2: - case CDROMREADMODE1: - case CDROMREADRAW: - case CDROMREADCOOKED: - case CDROMREADALL: - /* DVD ioctls */ - case DVD_READ_STRUCT: - case DVD_WRITE_STRUCT: - case DVD_AUTH: - arg = (unsigned long)compat_ptr(arg); - /* These intepret arg as an unsigned long, not as a pointer, - * so we must not do compat_ptr() conversion. */ - case HDIO_SET_MULTCOUNT: - case HDIO_SET_UNMASKINTR: - case HDIO_SET_KEEPSETTINGS: - case HDIO_SET_32BIT: - case HDIO_SET_NOWERR: - case HDIO_SET_DMA: - case HDIO_SET_PIO_MODE: - case HDIO_SET_NICE: - case HDIO_SET_WCACHE: - case HDIO_SET_ACOUSTIC: - case HDIO_SET_BUSSTATE: - case HDIO_SET_ADDRESS: - case CDROMEJECT_SW: - case CDROM_SET_OPTIONS: - case CDROM_CLEAR_OPTIONS: - case CDROM_SELECT_SPEED: - case CDROM_SELECT_DISC: - case CDROM_MEDIA_CHANGED: - case CDROM_DRIVE_STATUS: - case CDROM_LOCKDOOR: - case CDROM_DEBUG: - break; - default: - /* unknown ioctl number */ - return -ENOIOCTLCMD; - } - - return __blkdev_driver_ioctl(bdev, mode, cmd, arg); -} - -/* Most of the generic ioctls are handled in the normal fallback path. - This assumes the blkdev's low level compat_ioctl always returns - ENOIOCTLCMD for unknown ioctls. */ -long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) -{ - int ret = -ENOIOCTLCMD; - struct inode *inode = file->f_mapping->host; - struct block_device *bdev = inode->i_bdev; - struct gendisk *disk = bdev->bd_disk; - fmode_t mode = file->f_mode; - loff_t size; - unsigned int max_sectors; - - /* - * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have - * to updated it before every ioctl. - */ - if (file->f_flags & O_NDELAY) - mode |= FMODE_NDELAY; - else - mode &= ~FMODE_NDELAY; - - switch (cmd) { - case HDIO_GETGEO: - return compat_hdio_getgeo(disk, bdev, compat_ptr(arg)); - case BLKPBSZGET: - return compat_put_uint(arg, bdev_physical_block_size(bdev)); - case BLKIOMIN: - return compat_put_uint(arg, bdev_io_min(bdev)); - case BLKIOOPT: - return compat_put_uint(arg, bdev_io_opt(bdev)); - case BLKALIGNOFF: - return compat_put_int(arg, bdev_alignment_offset(bdev)); - case BLKDISCARDZEROES: - return compat_put_uint(arg, 0); - case BLKFLSBUF: - case BLKROSET: - case BLKDISCARD: - case BLKSECDISCARD: - case BLKZEROOUT: - /* - * the ones below are implemented in blkdev_locked_ioctl, - * but we call blkdev_ioctl, which gets the lock for us - */ - case BLKRRPART: - case BLKREPORTZONE: - case BLKRESETZONE: - case BLKOPENZONE: - case BLKCLOSEZONE: - case BLKFINISHZONE: - case BLKGETZONESZ: - case BLKGETNRZONES: - return blkdev_ioctl(bdev, mode, cmd, - (unsigned long)compat_ptr(arg)); - case BLKBSZSET_32: - return blkdev_ioctl(bdev, mode, BLKBSZSET, - (unsigned long)compat_ptr(arg)); - case BLKPG: - return compat_blkpg_ioctl(bdev, mode, cmd, compat_ptr(arg)); - case BLKRAGET: - case BLKFRAGET: - if (!arg) - return -EINVAL; - return compat_put_long(arg, - (bdev->bd_bdi->ra_pages * PAGE_SIZE) / 512); - case BLKROGET: /* compatible */ - return compat_put_int(arg, bdev_read_only(bdev) != 0); - case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */ - return compat_put_int(arg, block_size(bdev)); - case BLKSSZGET: /* get block device hardware sector size */ - return compat_put_int(arg, bdev_logical_block_size(bdev)); - case BLKSECTGET: - max_sectors = min_t(unsigned int, USHRT_MAX, - queue_max_sectors(bdev_get_queue(bdev))); - return compat_put_ushort(arg, max_sectors); - case BLKROTATIONAL: - return compat_put_ushort(arg, - !blk_queue_nonrot(bdev_get_queue(bdev))); - case BLKRASET: /* compatible, but no compat_ptr (!) */ - case BLKFRASET: - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - bdev->bd_bdi->ra_pages = (arg * 512) / PAGE_SIZE; - return 0; - case BLKGETSIZE: - size = i_size_read(bdev->bd_inode); - if ((size >> 9) > ~0UL) - return -EFBIG; - return compat_put_ulong(arg, size >> 9); - - case BLKGETSIZE64_32: - return compat_put_u64(arg, i_size_read(bdev->bd_inode)); - - case BLKTRACESETUP32: - case BLKTRACESTART: /* compatible */ - case BLKTRACESTOP: /* compatible */ - case BLKTRACETEARDOWN: /* compatible */ - ret = blk_trace_ioctl(bdev, cmd, compat_ptr(arg)); - return ret; - case IOC_PR_REGISTER: - case IOC_PR_RESERVE: - case IOC_PR_RELEASE: - case IOC_PR_PREEMPT: - case IOC_PR_PREEMPT_ABORT: - case IOC_PR_CLEAR: - return blkdev_ioctl(bdev, mode, cmd, - (unsigned long)compat_ptr(arg)); - default: - if (disk->fops->compat_ioctl) - ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg); - if (ret == -ENOIOCTLCMD) - ret = compat_blkdev_driver_ioctl(bdev, mode, cmd, arg); - return ret; - } -} diff --git a/block/genhd.c b/block/genhd.c index ff6268970ddc..9c2e13ce0d19 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -301,6 +301,42 @@ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) } EXPORT_SYMBOL_GPL(disk_map_sector_rcu); +/** + * disk_has_partitions + * @disk: gendisk of interest + * + * Walk through the partition table and check if valid partition exists. + * + * CONTEXT: + * Don't care. + * + * RETURNS: + * True if the gendisk has at least one valid non-zero size partition. + * Otherwise false. + */ +bool disk_has_partitions(struct gendisk *disk) +{ + struct disk_part_tbl *ptbl; + int i; + bool ret = false; + + rcu_read_lock(); + ptbl = rcu_dereference(disk->part_tbl); + + /* Iterate partitions skipping the whole device at index 0 */ + for (i = 1; i < ptbl->len; i++) { + if (rcu_dereference(ptbl->part[i])) { + ret = true; + break; + } + } + + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL_GPL(disk_has_partitions); + /* * Can be deleted altogether. Later. * diff --git a/block/ioctl.c b/block/ioctl.c index 5de98b97af2a..127194b9f9bd 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/capability.h> +#include <linux/compat.h> #include <linux/blkdev.h> #include <linux/export.h> #include <linux/gfp.h> @@ -11,12 +12,12 @@ #include <linux/pr.h> #include <linux/uaccess.h> -static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg) +static int blkpg_do_ioctl(struct block_device *bdev, + struct blkpg_partition __user *upart, int op) { struct block_device *bdevp; struct gendisk *disk; struct hd_struct *part, *lpart; - struct blkpg_ioctl_arg a; struct blkpg_partition p; struct disk_part_iter piter; long long start, length; @@ -24,9 +25,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if (copy_from_user(&a, arg, sizeof(struct blkpg_ioctl_arg))) - return -EFAULT; - if (copy_from_user(&p, a.data, sizeof(struct blkpg_partition))) + if (copy_from_user(&p, upart, sizeof(struct blkpg_partition))) return -EFAULT; disk = bdev->bd_disk; if (bdev != bdev->bd_contains) @@ -34,7 +33,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user partno = p.pno; if (partno <= 0) return -EINVAL; - switch (a.op) { + switch (op) { case BLKPG_ADD_PARTITION: start = p.start >> 9; length = p.length >> 9; @@ -155,6 +154,39 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user } } +static int blkpg_ioctl(struct block_device *bdev, + struct blkpg_ioctl_arg __user *arg) +{ + struct blkpg_partition __user *udata; + int op; + + if (get_user(op, &arg->op) || get_user(udata, &arg->data)) + return -EFAULT; + + return blkpg_do_ioctl(bdev, udata, op); +} + +#ifdef CONFIG_COMPAT +struct compat_blkpg_ioctl_arg { + compat_int_t op; + compat_int_t flags; + compat_int_t datalen; + compat_caddr_t data; +}; + +static int compat_blkpg_ioctl(struct block_device *bdev, + struct compat_blkpg_ioctl_arg __user *arg) +{ + compat_caddr_t udata; + int op; + + if (get_user(op, &arg->op) || get_user(udata, &arg->data)) + return -EFAULT; + + return blkpg_do_ioctl(bdev, compat_ptr(udata), op); +} +#endif + static int blkdev_reread_part(struct block_device *bdev) { int ret; @@ -238,36 +270,48 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode, BLKDEV_ZERO_NOUNMAP); } -static int put_ushort(unsigned long arg, unsigned short val) +static int put_ushort(unsigned short __user *argp, unsigned short val) { - return put_user(val, (unsigned short __user *)arg); + return put_user(val, argp); } -static int put_int(unsigned long arg, int val) +static int put_int(int __user *argp, int val) { - return put_user(val, (int __user *)arg); + return put_user(val, argp); } -static int put_uint(unsigned long arg, unsigned int val) +static int put_uint(unsigned int __user *argp, unsigned int val) { - return put_user(val, (unsigned int __user *)arg); + return put_user(val, argp); } -static int put_long(unsigned long arg, long val) +static int put_long(long __user *argp, long val) { - return put_user(val, (long __user *)arg); + return put_user(val, argp); } -static int put_ulong(unsigned long arg, unsigned long val) +static int put_ulong(unsigned long __user *argp, unsigned long val) { - return put_user(val, (unsigned long __user *)arg); + return put_user(val, argp); } -static int put_u64(unsigned long arg, u64 val) +static int put_u64(u64 __user *argp, u64 val) { - return put_user(val, (u64 __user *)arg); + return put_user(val, argp); } +#ifdef CONFIG_COMPAT +static int compat_put_long(compat_long_t *argp, long val) +{ + return put_user(val, argp); +} + +static int compat_put_ulong(compat_ulong_t *argp, compat_ulong_t val) +{ + return put_user(val, argp); +} +#endif + int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, unsigned long arg) { @@ -285,6 +329,26 @@ int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode, */ EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl); +#ifdef CONFIG_COMPAT +/* + * This is the equivalent of compat_ptr_ioctl(), to be used by block + * drivers that implement only commands that are completely compatible + * between 32-bit and 64-bit user space + */ +int blkdev_compat_ptr_ioctl(struct block_device *bdev, fmode_t mode, + unsigned cmd, unsigned long arg) +{ + struct gendisk *disk = bdev->bd_disk; + + if (disk->fops->ioctl) + return disk->fops->ioctl(bdev, mode, cmd, + (unsigned long)compat_ptr(arg)); + + return -ENOIOCTLCMD; +} +EXPORT_SYMBOL(blkdev_compat_ptr_ioctl); +#endif + static int blkdev_pr_register(struct block_device *bdev, struct pr_registration __user *arg) { @@ -455,6 +519,45 @@ static int blkdev_getgeo(struct block_device *bdev, return 0; } +#ifdef CONFIG_COMPAT +struct compat_hd_geometry { + unsigned char heads; + unsigned char sectors; + unsigned short cylinders; + u32 start; +}; + +static int compat_hdio_getgeo(struct block_device *bdev, + struct compat_hd_geometry __user *ugeo) +{ + struct gendisk *disk = bdev->bd_disk; + struct hd_geometry geo; + int ret; + + if (!ugeo) + return -EINVAL; + if (!disk->fops->getgeo) + return -ENOTTY; + + memset(&geo, 0, sizeof(geo)); + /* + * We need to set the startsect first, the driver may + * want to override it. + */ + geo.start = get_start_sect(bdev); + ret = disk->fops->getgeo(bdev, &geo); + if (ret) + return ret; + + ret = copy_to_user(ugeo, &geo, 4); + ret |= put_user(geo.start, &ugeo->start); + if (ret) + ret = -EFAULT; + + return ret; +} +#endif + /* set the logical block size */ static int blkdev_bszset(struct block_device *bdev, fmode_t mode, int __user *argp) @@ -481,13 +584,13 @@ static int blkdev_bszset(struct block_device *bdev, fmode_t mode, } /* - * always keep this in sync with compat_blkdev_ioctl() + * Common commands that are handled the same way on native and compat + * user space. Note the separate arg/argp parameters that are needed + * to deal with the compat_ptr() conversion. */ -int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, - unsigned long arg) +static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode, + unsigned cmd, unsigned long arg, void __user *argp) { - void __user *argp = (void __user *)arg; - loff_t size; unsigned int max_sectors; switch (cmd) { @@ -510,60 +613,39 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, case BLKFINISHZONE: return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg); case BLKGETZONESZ: - return put_uint(arg, bdev_zone_sectors(bdev)); + return put_uint(argp, bdev_zone_sectors(bdev)); case BLKGETNRZONES: - return put_uint(arg, blkdev_nr_zones(bdev->bd_disk)); - case HDIO_GETGEO: - return blkdev_getgeo(bdev, argp); - case BLKRAGET: - case BLKFRAGET: - if (!arg) - return -EINVAL; - return put_long(arg, (bdev->bd_bdi->ra_pages*PAGE_SIZE) / 512); + return put_uint(argp, blkdev_nr_zones(bdev->bd_disk)); case BLKROGET: - return put_int(arg, bdev_read_only(bdev) != 0); - case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ - return put_int(arg, block_size(bdev)); + return put_int(argp, bdev_read_only(bdev) != 0); case BLKSSZGET: /* get block device logical block size */ - return put_int(arg, bdev_logical_block_size(bdev)); + return put_int(argp, bdev_logical_block_size(bdev)); case BLKPBSZGET: /* get block device physical block size */ - return put_uint(arg, bdev_physical_block_size(bdev)); + return put_uint(argp, bdev_physical_block_size(bdev)); case BLKIOMIN: - return put_uint(arg, bdev_io_min(bdev)); + return put_uint(argp, bdev_io_min(bdev)); case BLKIOOPT: - return put_uint(arg, bdev_io_opt(bdev)); + return put_uint(argp, bdev_io_opt(bdev)); case BLKALIGNOFF: - return put_int(arg, bdev_alignment_offset(bdev)); + return put_int(argp, bdev_alignment_offset(bdev)); case BLKDISCARDZEROES: - return put_uint(arg, 0); + return put_uint(argp, 0); case BLKSECTGET: max_sectors = min_t(unsigned int, USHRT_MAX, queue_max_sectors(bdev_get_queue(bdev))); - return put_ushort(arg, max_sectors); + return put_ushort(argp, max_sectors); case BLKROTATIONAL: - return put_ushort(arg, !blk_queue_nonrot(bdev_get_queue(bdev))); + return put_ushort(argp, !blk_queue_nonrot(bdev_get_queue(bdev))); case BLKRASET: case BLKFRASET: if(!capable(CAP_SYS_ADMIN)) return -EACCES; bdev->bd_bdi->ra_pages = (arg * 512) / PAGE_SIZE; return 0; - case BLKBSZSET: - return blkdev_bszset(bdev, mode, argp); - case BLKPG: - return blkpg_ioctl(bdev, argp); case BLKRRPART: return blkdev_reread_part(bdev); - case BLKGETSIZE: - size = i_size_read(bdev->bd_inode); - if ((size >> 9) > ~0UL) - return -EFBIG; - return put_ulong(arg, size >> 9); - case BLKGETSIZE64: - return put_u64(arg, i_size_read(bdev->bd_inode)); case BLKTRACESTART: case BLKTRACESTOP: - case BLKTRACESETUP: case BLKTRACETEARDOWN: return blk_trace_ioctl(bdev, cmd, argp); case IOC_PR_REGISTER: @@ -579,7 +661,132 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, case IOC_PR_CLEAR: return blkdev_pr_clear(bdev, argp); default: + return -ENOIOCTLCMD; + } +} + +/* + * Always keep this in sync with compat_blkdev_ioctl() + * to handle all incompatible commands in both functions. + * + * New commands must be compatible and go into blkdev_common_ioctl + */ +int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, + unsigned long arg) +{ + int ret; + loff_t size; + void __user *argp = (void __user *)arg; + + switch (cmd) { + /* These need separate implementations for the data structure */ + case HDIO_GETGEO: + return blkdev_getgeo(bdev, argp); + case BLKPG: + return blkpg_ioctl(bdev, argp); + + /* Compat mode returns 32-bit data instead of 'long' */ + case BLKRAGET: + case BLKFRAGET: + if (!argp) + return -EINVAL; + return put_long(argp, (bdev->bd_bdi->ra_pages*PAGE_SIZE) / 512); + case BLKGETSIZE: + size = i_size_read(bdev->bd_inode); + if ((size >> 9) > ~0UL) + return -EFBIG; + return put_ulong(argp, size >> 9); + + /* The data is compatible, but the command number is different */ + case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ + return put_int(argp, block_size(bdev)); + case BLKBSZSET: + return blkdev_bszset(bdev, mode, argp); + case BLKGETSIZE64: + return put_u64(argp, i_size_read(bdev->bd_inode)); + + /* Incompatible alignment on i386 */ + case BLKTRACESETUP: + return blk_trace_ioctl(bdev, cmd, argp); + default: + break; + } + + ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); + if (ret == -ENOIOCTLCMD) return __blkdev_driver_ioctl(bdev, mode, cmd, arg); + + return ret; +} +EXPORT_SYMBOL_GPL(blkdev_ioctl); /* for /dev/raw */ + +#ifdef CONFIG_COMPAT + +#define BLKBSZGET_32 _IOR(0x12, 112, int) +#define BLKBSZSET_32 _IOW(0x12, 113, int) +#define BLKGETSIZE64_32 _IOR(0x12, 114, int) + +/* Most of the generic ioctls are handled in the normal fallback path. + This assumes the blkdev's low level compat_ioctl always returns + ENOIOCTLCMD for unknown ioctls. */ +long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) +{ + int ret; + void __user *argp = compat_ptr(arg); + struct inode *inode = file->f_mapping->host; + struct block_device *bdev = inode->i_bdev; + struct gendisk *disk = bdev->bd_disk; + fmode_t mode = file->f_mode; + loff_t size; + + /* + * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have + * to updated it before every ioctl. + */ + if (file->f_flags & O_NDELAY) + mode |= FMODE_NDELAY; + else + mode &= ~FMODE_NDELAY; + + switch (cmd) { + /* These need separate implementations for the data structure */ + case HDIO_GETGEO: + return compat_hdio_getgeo(bdev, argp); + case BLKPG: + return compat_blkpg_ioctl(bdev, argp); + + /* Compat mode returns 32-bit data instead of 'long' */ + case BLKRAGET: + case BLKFRAGET: + if (!argp) + return -EINVAL; + return compat_put_long(argp, + (bdev->bd_bdi->ra_pages * PAGE_SIZE) / 512); + case BLKGETSIZE: + size = i_size_read(bdev->bd_inode); + if ((size >> 9) > ~0UL) + return -EFBIG; + return compat_put_ulong(argp, size >> 9); + + /* The data is compatible, but the command number is different */ + case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */ + return put_int(argp, bdev_logical_block_size(bdev)); + case BLKBSZSET_32: + return blkdev_bszset(bdev, mode, argp); + case BLKGETSIZE64_32: + return put_u64(argp, i_size_read(bdev->bd_inode)); + + /* Incompatible alignment on i386 */ + case BLKTRACESETUP32: + return blk_trace_ioctl(bdev, cmd, argp); + default: + break; } + + ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); + if (ret == -ENOIOCTLCMD && disk->fops->compat_ioctl) + ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg); + + return ret; } -EXPORT_SYMBOL_GPL(blkdev_ioctl); +#endif diff --git a/block/partition-generic.c b/block/partition-generic.c index 1d20c9cf213f..564fae77711d 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -321,6 +321,24 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, const char *dname; int err; + /* + * Partitions are not supported on zoned block devices that are used as + * such. + */ + switch (disk->queue->limits.zoned) { + case BLK_ZONED_HM: + pr_warn("%s: partitions not supported on host managed zoned block device\n", + disk->disk_name); + return ERR_PTR(-ENXIO); + case BLK_ZONED_HA: + pr_info("%s: disabling host aware zoned block device support due to partitions\n", + disk->disk_name); + disk->queue->limits.zoned = BLK_ZONED_NONE; + break; + case BLK_ZONED_NONE: + break; + } + err = disk_expand_part_tbl(disk, partno); if (err) return ERR_PTR(err); @@ -501,7 +519,7 @@ static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev, part = add_partition(disk, p, from, size, state->parts[p].flags, &state->parts[p].info); - if (IS_ERR(part)) { + if (IS_ERR(part) && PTR_ERR(part) != -ENXIO) { printk(KERN_ERR " %s: p%d could not be added: %ld\n", disk->disk_name, p, -PTR_ERR(part)); return true; @@ -540,10 +558,10 @@ int blk_add_partitions(struct gendisk *disk, struct block_device *bdev) } /* - * Partitions are not supported on zoned block devices. + * Partitions are not supported on host managed zoned block devices. */ - if (bdev_is_zoned(bdev)) { - pr_warn("%s: ignoring partition table on zoned block device\n", + if (disk->queue->limits.zoned == BLK_ZONED_HM) { + pr_warn("%s: ignoring partition table on host managed zoned block device\n", disk->disk_name); ret = 0; goto out_free_state; diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c index fe5d970e2e60..a2d97ee1908c 100644 --- a/block/partitions/ldm.c +++ b/block/partitions/ldm.c @@ -1233,7 +1233,7 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags) BUG_ON (!data || !frags); if (size < 2 * VBLK_SIZE_HEAD) { - ldm_error("Value of size is to small."); + ldm_error("Value of size is too small."); return false; } diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 650bade5ea5a..b4e73d5dd5c2 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -20,6 +20,7 @@ #include <scsi/scsi.h> #include <scsi/scsi_ioctl.h> #include <scsi/scsi_cmnd.h> +#include <scsi/sg.h> struct blk_cmd_filter { unsigned long read_ok[BLK_SCSI_CMD_PER_LONG]; @@ -550,34 +551,6 @@ static inline int blk_send_start_stop(struct request_queue *q, return __blk_send_generic(q, bd_disk, GPCMD_START_STOP_UNIT, data); } -#ifdef CONFIG_COMPAT -struct compat_sg_io_hdr { - compat_int_t interface_id; /* [i] 'S' for SCSI generic (required) */ - compat_int_t dxfer_direction; /* [i] data transfer direction */ - unsigned char cmd_len; /* [i] SCSI command length ( <= 16 bytes) */ - unsigned char mx_sb_len; /* [i] max length to write to sbp */ - unsigned short iovec_count; /* [i] 0 implies no scatter gather */ - compat_uint_t dxfer_len; /* [i] byte count of data transfer */ - compat_uint_t dxferp; /* [i], [*io] points to data transfer memory - or scatter gather list */ - compat_uptr_t cmdp; /* [i], [*i] points to command to perform */ - compat_uptr_t sbp; /* [i], [*o] points to sense_buffer memory */ - compat_uint_t timeout; /* [i] MAX_UINT->no timeout (unit: millisec) */ - compat_uint_t flags; /* [i] 0 -> default, see SG_FLAG... */ - compat_int_t pack_id; /* [i->o] unused internally (normally) */ - compat_uptr_t usr_ptr; /* [i->o] unused internally */ - unsigned char status; /* [o] scsi status */ - unsigned char masked_status; /* [o] shifted, masked scsi status */ - unsigned char msg_status; /* [o] messaging level data (optional) */ - unsigned char sb_len_wr; /* [o] byte count actually written to sbp */ - unsigned short host_status; /* [o] errors from host adapter */ - unsigned short driver_status; /* [o] errors from software driver */ - compat_int_t resid; /* [o] dxfer_len - actual_transferred */ - compat_uint_t duration; /* [o] time taken by cmd (unit: millisec) */ - compat_uint_t info; /* [o] auxiliary information */ -}; -#endif - int put_sg_io_hdr(const struct sg_io_hdr *hdr, void __user *argp) { #ifdef CONFIG_COMPAT @@ -666,6 +639,136 @@ int get_sg_io_hdr(struct sg_io_hdr *hdr, const void __user *argp) } EXPORT_SYMBOL(get_sg_io_hdr); +#ifdef CONFIG_COMPAT +struct compat_cdrom_generic_command { + unsigned char cmd[CDROM_PACKET_SIZE]; + compat_caddr_t buffer; + compat_uint_t buflen; + compat_int_t stat; + compat_caddr_t sense; + unsigned char data_direction; + compat_int_t quiet; + compat_int_t timeout; + compat_caddr_t reserved[1]; +}; +#endif + +static int scsi_get_cdrom_generic_arg(struct cdrom_generic_command *cgc, + const void __user *arg) +{ +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) { + struct compat_cdrom_generic_command cgc32; + + if (copy_from_user(&cgc32, arg, sizeof(cgc32))) + return -EFAULT; + + *cgc = (struct cdrom_generic_command) { + .buffer = compat_ptr(cgc32.buffer), + .buflen = cgc32.buflen, + .stat = cgc32.stat, + .sense = compat_ptr(cgc32.sense), + .data_direction = cgc32.data_direction, + .quiet = cgc32.quiet, + .timeout = cgc32.timeout, + .reserved[0] = compat_ptr(cgc32.reserved[0]), + }; + memcpy(&cgc->cmd, &cgc32.cmd, CDROM_PACKET_SIZE); + return 0; + } +#endif + if (copy_from_user(cgc, arg, sizeof(*cgc))) + return -EFAULT; + + return 0; +} + +static int scsi_put_cdrom_generic_arg(const struct cdrom_generic_command *cgc, + void __user *arg) +{ +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) { + struct compat_cdrom_generic_command cgc32 = { + .buffer = (uintptr_t)(cgc->buffer), + .buflen = cgc->buflen, + .stat = cgc->stat, + .sense = (uintptr_t)(cgc->sense), + .data_direction = cgc->data_direction, + .quiet = cgc->quiet, + .timeout = cgc->timeout, + .reserved[0] = (uintptr_t)(cgc->reserved[0]), + }; + memcpy(&cgc32.cmd, &cgc->cmd, CDROM_PACKET_SIZE); + + if (copy_to_user(arg, &cgc32, sizeof(cgc32))) + return -EFAULT; + + return 0; + } +#endif + if (copy_to_user(arg, cgc, sizeof(*cgc))) + return -EFAULT; + + return 0; +} + +static int scsi_cdrom_send_packet(struct request_queue *q, + struct gendisk *bd_disk, + fmode_t mode, void __user *arg) +{ + struct cdrom_generic_command cgc; + struct sg_io_hdr hdr; + int err; + + err = scsi_get_cdrom_generic_arg(&cgc, arg); + if (err) + return err; + + cgc.timeout = clock_t_to_jiffies(cgc.timeout); + memset(&hdr, 0, sizeof(hdr)); + hdr.interface_id = 'S'; + hdr.cmd_len = sizeof(cgc.cmd); + hdr.dxfer_len = cgc.buflen; + switch (cgc.data_direction) { + case CGC_DATA_UNKNOWN: + hdr.dxfer_direction = SG_DXFER_UNKNOWN; + break; + case CGC_DATA_WRITE: + hdr.dxfer_direction = SG_DXFER_TO_DEV; + break; + case CGC_DATA_READ: + hdr.dxfer_direction = SG_DXFER_FROM_DEV; + break; + case CGC_DATA_NONE: + hdr.dxfer_direction = SG_DXFER_NONE; + break; + default: + return -EINVAL; + } + + hdr.dxferp = cgc.buffer; + hdr.sbp = cgc.sense; + if (hdr.sbp) + hdr.mx_sb_len = sizeof(struct request_sense); + hdr.timeout = jiffies_to_msecs(cgc.timeout); + hdr.cmdp = ((struct cdrom_generic_command __user*) arg)->cmd; + hdr.cmd_len = sizeof(cgc.cmd); + + err = sg_io(q, bd_disk, &hdr, mode); + if (err == -EFAULT) + return -EFAULT; + + if (hdr.status) + return -EIO; + + cgc.stat = err; + cgc.buflen = hdr.resid; + if (scsi_put_cdrom_generic_arg(&cgc, arg)) + return -EFAULT; + + return err; +} + int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mode, unsigned int cmd, void __user *arg) { @@ -716,60 +819,9 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod err = -EFAULT; break; } - case CDROM_SEND_PACKET: { - struct cdrom_generic_command cgc; - struct sg_io_hdr hdr; - - err = -EFAULT; - if (copy_from_user(&cgc, arg, sizeof(cgc))) - break; - cgc.timeout = clock_t_to_jiffies(cgc.timeout); - memset(&hdr, 0, sizeof(hdr)); - hdr.interface_id = 'S'; - hdr.cmd_len = sizeof(cgc.cmd); - hdr.dxfer_len = cgc.buflen; - err = 0; - switch (cgc.data_direction) { - case CGC_DATA_UNKNOWN: - hdr.dxfer_direction = SG_DXFER_UNKNOWN; - break; - case CGC_DATA_WRITE: - hdr.dxfer_direction = SG_DXFER_TO_DEV; - break; - case CGC_DATA_READ: - hdr.dxfer_direction = SG_DXFER_FROM_DEV; - break; - case CGC_DATA_NONE: - hdr.dxfer_direction = SG_DXFER_NONE; - break; - default: - err = -EINVAL; - } - if (err) - break; - - hdr.dxferp = cgc.buffer; - hdr.sbp = cgc.sense; - if (hdr.sbp) - hdr.mx_sb_len = sizeof(struct request_sense); - hdr.timeout = jiffies_to_msecs(cgc.timeout); - hdr.cmdp = ((struct cdrom_generic_command __user*) arg)->cmd; - hdr.cmd_len = sizeof(cgc.cmd); - - err = sg_io(q, bd_disk, &hdr, mode); - if (err == -EFAULT) - break; - - if (hdr.status) - err = -EIO; - - cgc.stat = err; - cgc.buflen = hdr.resid; - if (copy_to_user(arg, &cgc, sizeof(cgc))) - err = -EFAULT; - + case CDROM_SEND_PACKET: + err = scsi_cdrom_send_packet(q, bd_disk, mode, arg); break; - } /* * old junk scsi send command ioctl diff --git a/block/t10-pi.c b/block/t10-pi.c index f4907d941f03..d910534b3a41 100644 --- a/block/t10-pi.c +++ b/block/t10-pi.c @@ -7,6 +7,7 @@ #include <linux/t10-pi.h> #include <linux/blkdev.h> #include <linux/crc-t10dif.h> +#include <linux/module.h> #include <net/checksum.h> typedef __be16 (csum_fn) (void *, unsigned int); @@ -280,3 +281,5 @@ const struct blk_integrity_profile t10_pi_type3_ip = { .complete_fn = t10_pi_type3_complete, }; EXPORT_SYMBOL(t10_pi_type3_ip); + +MODULE_LICENSE("GPL"); |