diff options
97 files changed, 7534 insertions, 2284 deletions
diff --git a/Documentation/devicetree/bindings/dma/mtk-hsdma.txt b/Documentation/devicetree/bindings/dma/mtk-hsdma.txt new file mode 100644 index 000000000000..4bb317359dc6 --- /dev/null +++ b/Documentation/devicetree/bindings/dma/mtk-hsdma.txt @@ -0,0 +1,33 @@ +MediaTek High-Speed DMA Controller +================================== + +This device follows the generic DMA bindings defined in dma/dma.txt. + +Required properties: + +- compatible: Must be one of + "mediatek,mt7622-hsdma": for MT7622 SoC + "mediatek,mt7623-hsdma": for MT7623 SoC +- reg: Should contain the register's base address and length. +- interrupts: Should contain a reference to the interrupt used by this + device. +- clocks: Should be the clock specifiers corresponding to the entry in + clock-names property. +- clock-names: Should contain "hsdma" entries. +- power-domains: Phandle to the power domain that the device is part of +- #dma-cells: The length of the DMA specifier, must be <1>. This one cell + in dmas property of a client device represents the channel + number. +Example: + + hsdma: dma-controller@1b007000 { + compatible = "mediatek,mt7623-hsdma"; + reg = <0 0x1b007000 0 0x1000>; + interrupts = <GIC_SPI 98 IRQ_TYPE_LEVEL_LOW>; + clocks = <ðsys CLK_ETHSYS_HSDMA>; + clock-names = "hsdma"; + power-domains = <&scpsys MT2701_POWER_DOMAIN_ETH>; + #dma-cells = <1>; + }; + +DMA clients must use the format described in dma/dma.txt file. diff --git a/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt b/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt index 9cbf5d9df8fd..cf5b9e44432c 100644 --- a/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt +++ b/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt @@ -15,6 +15,10 @@ Required properties: the secure world. - qcom,controlled-remotely : optional, indicates that the bam is controlled by remote proccessor i.e. execution environment. +- num-channels : optional, indicates supported number of DMA channels in a + remotely controlled bam. +- qcom,num-ees : optional, indicates supported number of Execution Environments + in a remotely controlled bam. Example: diff --git a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt index 891db41e9420..aadfb236d53a 100644 --- a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt +++ b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt @@ -18,6 +18,7 @@ Required Properties: Examples with soctypes are: - "renesas,dmac-r8a7743" (RZ/G1M) - "renesas,dmac-r8a7745" (RZ/G1E) + - "renesas,dmac-r8a77470" (RZ/G1C) - "renesas,dmac-r8a7790" (R-Car H2) - "renesas,dmac-r8a7791" (R-Car M2-W) - "renesas,dmac-r8a7792" (R-Car V2H) @@ -26,6 +27,7 @@ Required Properties: - "renesas,dmac-r8a7795" (R-Car H3) - "renesas,dmac-r8a7796" (R-Car M3-W) - "renesas,dmac-r8a77970" (R-Car V3M) + - "renesas,dmac-r8a77980" (R-Car V3H) - reg: base address and length of the registers block for the DMAC diff --git a/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt b/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt index f3d1f151ba80..9dc935e24e55 100644 --- a/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt +++ b/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt @@ -11,6 +11,7 @@ Required Properties: - "renesas,r8a7794-usb-dmac" (R-Car E2) - "renesas,r8a7795-usb-dmac" (R-Car H3) - "renesas,r8a7796-usb-dmac" (R-Car M3-W) + - "renesas,r8a77965-usb-dmac" (R-Car M3-N) - reg: base address and length of the registers block for the DMAC - interrupts: interrupt specifiers for the DMAC, one for each entry in interrupt-names. diff --git a/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.txt b/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.txt new file mode 100644 index 000000000000..f237b7928283 --- /dev/null +++ b/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.txt @@ -0,0 +1,41 @@ +Synopsys DesignWare AXI DMA Controller + +Required properties: +- compatible: "snps,axi-dma-1.01a" +- reg: Address range of the DMAC registers. This should include + all of the per-channel registers. +- interrupt: Should contain the DMAC interrupt number. +- interrupt-parent: Should be the phandle for the interrupt controller + that services interrupts for this device. +- dma-channels: Number of channels supported by hardware. +- snps,dma-masters: Number of AXI masters supported by the hardware. +- snps,data-width: Maximum AXI data width supported by hardware. + (0 - 8bits, 1 - 16bits, 2 - 32bits, ..., 6 - 512bits) +- snps,priority: Priority of channel. Array size is equal to the number of + dma-channels. Priority value must be programmed within [0:dma-channels-1] + range. (0 - minimum priority) +- snps,block-size: Maximum block size supported by the controller channel. + Array size is equal to the number of dma-channels. + +Optional properties: +- snps,axi-max-burst-len: Restrict master AXI burst length by value specified + in this property. If this property is missing the maximum AXI burst length + supported by DMAC is used. [1:256] + +Example: + +dmac: dma-controller@80000 { + compatible = "snps,axi-dma-1.01a"; + reg = <0x80000 0x400>; + clocks = <&core_clk>, <&cfgr_clk>; + clock-names = "core-clk", "cfgr-clk"; + interrupt-parent = <&intc>; + interrupts = <27>; + + dma-channels = <4>; + snps,dma-masters = <2>; + snps,data-width = <3>; + snps,block-size = <4096 4096 4096 4096>; + snps,priority = <0 1 2 3>; + snps,axi-max-burst-len = <16>; +}; diff --git a/Documentation/devicetree/bindings/dma/stm32-dma.txt b/Documentation/devicetree/bindings/dma/stm32-dma.txt index 0b55718bf889..c5f519097204 100644 --- a/Documentation/devicetree/bindings/dma/stm32-dma.txt +++ b/Documentation/devicetree/bindings/dma/stm32-dma.txt @@ -62,14 +62,14 @@ channel: a phandle to the DMA controller plus the following four integer cells: 0x1: medium 0x2: high 0x3: very high -4. A 32bit mask specifying the DMA FIFO threshold configuration which are device - dependent: - -bit 0-1: Fifo threshold +4. A 32bit bitfield value specifying DMA features which are device dependent: + -bit 0-1: DMA FIFO threshold selection 0x0: 1/4 full FIFO 0x1: 1/2 full FIFO 0x2: 3/4 full FIFO 0x3: full FIFO + Example: usart1: serial@40011000 { diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph.txt index 0b302a11718a..d7f011ddc150 100644 --- a/Documentation/filesystems/ceph.txt +++ b/Documentation/filesystems/ceph.txt @@ -62,6 +62,18 @@ subdirectories, and a summation of all nested file sizes. This makes the identification of large disk space consumers relatively quick, as no 'du' or similar recursive scan of the file system is required. +Finally, Ceph also allows quotas to be set on any directory in the system. +The quota can restrict the number of bytes or the number of files stored +beneath that point in the directory hierarchy. Quotas can be set using +extended attributes 'ceph.quota.max_files' and 'ceph.quota.max_bytes', eg: + + setfattr -n ceph.quota.max_bytes -v 100000000 /some/dir + getfattr -n ceph.quota.max_bytes /some/dir + +A limitation of the current quotas implementation is that it relies on the +cooperation of the client mounting the file system to stop writers when a +limit is reached. A modified or adversarial client cannot be prevented +from writing as much data as it needs. Mount Syntax ============ @@ -137,6 +149,10 @@ Mount Options noasyncreaddir Do not use the dcache as above for readdir. + noquotadf + Report overall filesystem usage in statfs instead of using the root + directory quota. + More Information ================ diff --git a/MAINTAINERS b/MAINTAINERS index 189b1bf2d7f0..c7182d2a9f5c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8860,6 +8860,15 @@ M: Sean Wang <sean.wang@mediatek.com> S: Maintained F: drivers/media/rc/mtk-cir.c +MEDIATEK DMA DRIVER +M: Sean Wang <sean.wang@mediatek.com> +L: dmaengine@vger.kernel.org +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +L: linux-mediatek@lists.infradead.org (moderated for non-subscribers) +S: Maintained +F: Documentation/devicetree/bindings/dma/mtk-* +F: drivers/dma/mediatek/ + MEDIATEK PMIC LED DRIVER M: Sean Wang <sean.wang@mediatek.com> S: Maintained @@ -13483,6 +13492,12 @@ S: Maintained F: drivers/gpio/gpio-dwapb.c F: Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt +SYNOPSYS DESIGNWARE AXI DMAC DRIVER +M: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com> +S: Maintained +F: drivers/dma/dwi-axi-dmac/ +F: Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.txt + SYNOPSYS DESIGNWARE DMAC DRIVER M: Viresh Kumar <vireshk@kernel.org> R: Andy Shevchenko <andriy.shevchenko@linux.intel.com> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 1e03b04819c8..07dc5419bd63 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -32,6 +32,7 @@ #include <linux/ceph/osd_client.h> #include <linux/ceph/mon_client.h> #include <linux/ceph/cls_lock_client.h> +#include <linux/ceph/striper.h> #include <linux/ceph/decode.h> #include <linux/parser.h> #include <linux/bsearch.h> @@ -200,95 +201,81 @@ struct rbd_client { }; struct rbd_img_request; -typedef void (*rbd_img_callback_t)(struct rbd_img_request *); - -#define BAD_WHICH U32_MAX /* Good which or bad which, which? */ - -struct rbd_obj_request; -typedef void (*rbd_obj_callback_t)(struct rbd_obj_request *); enum obj_request_type { - OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES + OBJ_REQUEST_NODATA = 1, + OBJ_REQUEST_BIO, /* pointer into provided bio (list) */ + OBJ_REQUEST_BVECS, /* pointer into provided bio_vec array */ + OBJ_REQUEST_OWN_BVECS, /* private bio_vec array, doesn't own pages */ }; enum obj_operation_type { + OBJ_OP_READ = 1, OBJ_OP_WRITE, - OBJ_OP_READ, OBJ_OP_DISCARD, }; -enum obj_req_flags { - OBJ_REQ_DONE, /* completion flag: not done = 0, done = 1 */ - OBJ_REQ_IMG_DATA, /* object usage: standalone = 0, image = 1 */ - OBJ_REQ_KNOWN, /* EXISTS flag valid: no = 0, yes = 1 */ - OBJ_REQ_EXISTS, /* target exists: no = 0, yes = 1 */ +/* + * Writes go through the following state machine to deal with + * layering: + * + * need copyup + * RBD_OBJ_WRITE_GUARD ---------------> RBD_OBJ_WRITE_COPYUP + * | ^ | + * v \------------------------------/ + * done + * ^ + * | + * RBD_OBJ_WRITE_FLAT + * + * Writes start in RBD_OBJ_WRITE_GUARD or _FLAT, depending on whether + * there is a parent or not. + */ +enum rbd_obj_write_state { + RBD_OBJ_WRITE_FLAT = 1, + RBD_OBJ_WRITE_GUARD, + RBD_OBJ_WRITE_COPYUP, }; struct rbd_obj_request { - u64 object_no; - u64 offset; /* object start byte */ - u64 length; /* bytes from offset */ - unsigned long flags; - - /* - * An object request associated with an image will have its - * img_data flag set; a standalone object request will not. - * - * A standalone object request will have which == BAD_WHICH - * and a null obj_request pointer. - * - * An object request initiated in support of a layered image - * object (to check for its existence before a write) will - * have which == BAD_WHICH and a non-null obj_request pointer. - * - * Finally, an object request for rbd image data will have - * which != BAD_WHICH, and will have a non-null img_request - * pointer. The value of which will be in the range - * 0..(img_request->obj_request_count-1). - */ + struct ceph_object_extent ex; union { - struct rbd_obj_request *obj_request; /* STAT op */ - struct { - struct rbd_img_request *img_request; - u64 img_offset; - /* links for img_request->obj_requests list */ - struct list_head links; - }; + bool tried_parent; /* for reads */ + enum rbd_obj_write_state write_state; /* for writes */ }; - u32 which; /* posn image request list */ - enum obj_request_type type; + struct rbd_img_request *img_request; + struct ceph_file_extent *img_extents; + u32 num_img_extents; + union { - struct bio *bio_list; + struct ceph_bio_iter bio_pos; struct { - struct page **pages; - u32 page_count; + struct ceph_bvec_iter bvec_pos; + u32 bvec_count; + u32 bvec_idx; }; }; - struct page **copyup_pages; - u32 copyup_page_count; + struct bio_vec *copyup_bvecs; + u32 copyup_bvec_count; struct ceph_osd_request *osd_req; u64 xferred; /* bytes transferred */ int result; - rbd_obj_callback_t callback; - struct kref kref; }; enum img_req_flags { - IMG_REQ_WRITE, /* I/O direction: read = 0, write = 1 */ IMG_REQ_CHILD, /* initiator: block = 0, child image = 1 */ IMG_REQ_LAYERED, /* ENOENT handling: normal = 0, layered = 1 */ - IMG_REQ_DISCARD, /* discard: normal = 0, discard request = 1 */ }; struct rbd_img_request { struct rbd_device *rbd_dev; - u64 offset; /* starting image byte offset */ - u64 length; /* byte count from offset */ + enum obj_operation_type op_type; + enum obj_request_type data_type; unsigned long flags; union { u64 snap_id; /* for reads */ @@ -298,26 +285,21 @@ struct rbd_img_request { struct request *rq; /* block request */ struct rbd_obj_request *obj_request; /* obj req initiator */ }; - struct page **copyup_pages; - u32 copyup_page_count; - spinlock_t completion_lock;/* protects next_completion */ - u32 next_completion; - rbd_img_callback_t callback; + spinlock_t completion_lock; u64 xferred;/* aggregate bytes transferred */ int result; /* first nonzero obj_request result */ + struct list_head object_extents; /* obj_req.ex structs */ u32 obj_request_count; - struct list_head obj_requests; /* rbd_obj_request structs */ + u32 pending_count; struct kref kref; }; #define for_each_obj_request(ireq, oreq) \ - list_for_each_entry(oreq, &(ireq)->obj_requests, links) -#define for_each_obj_request_from(ireq, oreq) \ - list_for_each_entry_from(oreq, &(ireq)->obj_requests, links) + list_for_each_entry(oreq, &(ireq)->object_extents, ex.oe_item) #define for_each_obj_request_safe(ireq, oreq, n) \ - list_for_each_entry_safe_reverse(oreq, n, &(ireq)->obj_requests, links) + list_for_each_entry_safe(oreq, n, &(ireq)->object_extents, ex.oe_item) enum rbd_watch_state { RBD_WATCH_STATE_UNREGISTERED, @@ -433,8 +415,6 @@ static DEFINE_SPINLOCK(rbd_client_list_lock); static struct kmem_cache *rbd_img_request_cache; static struct kmem_cache *rbd_obj_request_cache; -static struct bio_set *rbd_bio_clone; - static int rbd_major; static DEFINE_IDA(rbd_dev_id_ida); @@ -447,8 +427,6 @@ static bool single_major = true; module_param(single_major, bool, S_IRUGO); MODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (default: true)"); -static int rbd_img_request_submit(struct rbd_img_request *img_request); - static ssize_t rbd_add(struct bus_type *bus, const char *buf, size_t count); static ssize_t rbd_remove(struct bus_type *bus, const char *buf, @@ -458,7 +436,6 @@ static ssize_t rbd_add_single_major(struct bus_type *bus, const char *buf, static ssize_t rbd_remove_single_major(struct bus_type *bus, const char *buf, size_t count); static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth); -static void rbd_spec_put(struct rbd_spec *spec); static int rbd_dev_id_to_minor(int dev_id) { @@ -577,9 +554,6 @@ void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...) # define rbd_assert(expr) ((void) 0) #endif /* !RBD_DEBUG */ -static void rbd_osd_copyup_callback(struct rbd_obj_request *obj_request); -static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request); -static void rbd_img_parent_read(struct rbd_obj_request *obj_request); static void rbd_dev_remove_parent(struct rbd_device *rbd_dev); static int rbd_dev_refresh(struct rbd_device *rbd_dev); @@ -857,26 +831,6 @@ static char* obj_op_name(enum obj_operation_type op_type) } /* - * Get a ceph client with specific addr and configuration, if one does - * not exist create it. Either way, ceph_opts is consumed by this - * function. - */ -static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts) -{ - struct rbd_client *rbdc; - - mutex_lock_nested(&client_mutex, SINGLE_DEPTH_NESTING); - rbdc = rbd_client_find(ceph_opts); - if (rbdc) /* using an existing client */ - ceph_destroy_options(ceph_opts); - else - rbdc = rbd_client_create(ceph_opts); - mutex_unlock(&client_mutex); - - return rbdc; -} - -/* * Destroy ceph client * * Caller must hold rbd_client_list_lock. @@ -904,6 +858,56 @@ static void rbd_put_client(struct rbd_client *rbdc) kref_put(&rbdc->kref, rbd_client_release); } +static int wait_for_latest_osdmap(struct ceph_client *client) +{ + u64 newest_epoch; + int ret; + + ret = ceph_monc_get_version(&client->monc, "osdmap", &newest_epoch); + if (ret) + return ret; + + if (client->osdc.osdmap->epoch >= newest_epoch) + return 0; + + ceph_osdc_maybe_request_map(&client->osdc); + return ceph_monc_wait_osdmap(&client->monc, newest_epoch, + client->options->mount_timeout); +} + +/* + * Get a ceph client with specific addr and configuration, if one does + * not exist create it. Either way, ceph_opts is consumed by this + * function. + */ +static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts) +{ + struct rbd_client *rbdc; + int ret; + + mutex_lock_nested(&client_mutex, SINGLE_DEPTH_NESTING); + rbdc = rbd_client_find(ceph_opts); + if (rbdc) { + ceph_destroy_options(ceph_opts); + + /* + * Using an existing client. Make sure ->pg_pools is up to + * date before we look up the pool id in do_rbd_add(). + */ + ret = wait_for_latest_osdmap(rbdc->client); + if (ret) { + rbd_warn(NULL, "failed to get latest osdmap: %d", ret); + rbd_put_client(rbdc); + rbdc = ERR_PTR(ret); + } + } else { + rbdc = rbd_client_create(ceph_opts); + } + mutex_unlock(&client_mutex); + + return rbdc; +} + static bool rbd_image_format_valid(u32 image_format) { return image_format == 1 || image_format == 2; @@ -1223,272 +1227,59 @@ static void rbd_dev_mapping_clear(struct rbd_device *rbd_dev) rbd_dev->mapping.features = 0; } -static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset) -{ - u64 segment_size = rbd_obj_bytes(&rbd_dev->header); - - return offset & (segment_size - 1); -} - -static u64 rbd_segment_length(struct rbd_device *rbd_dev, - u64 offset, u64 length) -{ - u64 segment_size = rbd_obj_bytes(&rbd_dev->header); - - offset &= segment_size - 1; - - rbd_assert(length <= U64_MAX - offset); - if (offset + length > segment_size) - length = segment_size - offset; - - return length; -} - -/* - * bio helpers - */ - -static void bio_chain_put(struct bio *chain) -{ - struct bio *tmp; - - while (chain) { - tmp = chain; - chain = chain->bi_next; - bio_put(tmp); - } -} - -/* - * zeros a bio chain, starting at specific offset - */ -static void zero_bio_chain(struct bio *chain, int start_ofs) +static void zero_bvec(struct bio_vec *bv) { - struct bio_vec bv; - struct bvec_iter iter; - unsigned long flags; void *buf; - int pos = 0; - - while (chain) { - bio_for_each_segment(bv, chain, iter) { - if (pos + bv.bv_len > start_ofs) { - int remainder = max(start_ofs - pos, 0); - buf = bvec_kmap_irq(&bv, &flags); - memset(buf + remainder, 0, - bv.bv_len - remainder); - flush_dcache_page(bv.bv_page); - bvec_kunmap_irq(buf, &flags); - } - pos += bv.bv_len; - } + unsigned long flags; - chain = chain->bi_next; - } + buf = bvec_kmap_irq(bv, &flags); + memset(buf, 0, bv->bv_len); + flush_dcache_page(bv->bv_page); + bvec_kunmap_irq(buf, &flags); } -/* - * similar to zero_bio_chain(), zeros data defined by a page array, - * starting at the given byte offset from the start of the array and - * continuing up to the given end offset. The pages array is - * assumed to be big enough to hold all bytes up to the end. - */ -static void zero_pages(struct page **pages, u64 offset, u64 end) +static void zero_bios(struct ceph_bio_iter *bio_pos, u32 off, u32 bytes) { - struct page **page = &pages[offset >> PAGE_SHIFT]; + struct ceph_bio_iter it = *bio_pos; - rbd_assert(end > offset); - rbd_assert(end - offset <= (u64)SIZE_MAX); - while (offset < end) { - size_t page_offset; - size_t length; - unsigned long flags; - void *kaddr; - - page_offset = offset & ~PAGE_MASK; - length = min_t(size_t, PAGE_SIZE - page_offset, end - offset); - local_irq_save(flags); - kaddr = kmap_atomic(*page); - memset(kaddr + page_offset, 0, length); - flush_dcache_page(*page); - kunmap_atomic(kaddr); - local_irq_restore(flags); - - offset += length; - page++; - } + ceph_bio_iter_advance(&it, off); + ceph_bio_iter_advance_step(&it, bytes, ({ + zero_bvec(&bv); + })); } -/* - * Clone a portion of a bio, starting at the given byte offset - * and continuing for the number of bytes indicated. - */ -static struct bio *bio_clone_range(struct bio *bio_src, - unsigned int offset, - unsigned int len, - gfp_t gfpmask) +static void zero_bvecs(struct ceph_bvec_iter *bvec_pos, u32 off, u32 bytes) { - struct bio *bio; - - bio = bio_clone_fast(bio_src, gfpmask, rbd_bio_clone); - if (!bio) - return NULL; /* ENOMEM */ + struct ceph_bvec_iter it = *bvec_pos; - bio_advance(bio, offset); - bio->bi_iter.bi_size = len; - - return bio; + ceph_bvec_iter_advance(&it, off); + ceph_bvec_iter_advance_step(&it, bytes, ({ + zero_bvec(&bv); + })); } /* - * Clone a portion of a bio chain, starting at the given byte offset - * into the first bio in the source chain and continuing for the - * number of bytes indicated. The result is another bio chain of - * exactly the given length, or a null pointer on error. - * - * The bio_src and offset parameters are both in-out. On entry they - * refer to the first source bio and the offset into that bio where - * the start of data to be cloned is located. + * Zero a range in @obj_req data buffer defined by a bio (list) or + * (private) bio_vec array. * - * On return, bio_src is updated to refer to the bio in the source - * chain that contains first un-cloned byte, and *offset will - * contain the offset of that byte within that bio. + * @off is relative to the start of the data buffer. */ -static struct bio *bio_chain_clone_range(struct bio **bio_src, - unsigned int *offset, - unsigned int len, - gfp_t gfpmask) +static void rbd_obj_zero_range(struct rbd_obj_request *obj_req, u32 off, + u32 bytes) { - struct bio *bi = *bio_src; - unsigned int off = *offset; - struct bio *chain = NULL; - struct bio **end; - - /* Build up a chain of clone bios up to the limit */ - - if (!bi || off >= bi->bi_iter.bi_size || !len) - return NULL; /* Nothing to clone */ - - end = &chain; - while (len) { - unsigned int bi_size; - struct bio *bio; - - if (!bi) { - rbd_warn(NULL, "bio_chain exhausted with %u left", len); - goto out_err; /* EINVAL; ran out of bio's */ - } - bi_size = min_t(unsigned int, bi->bi_iter.bi_size - off, len); - bio = bio_clone_range(bi, off, bi_size, gfpmask); - if (!bio) - goto out_err; /* ENOMEM */ - - *end = bio; - end = &bio->bi_next; - - off += bi_size; - if (off == bi->bi_iter.bi_size) { - bi = bi->bi_next; - off = 0; - } - len -= bi_size; - } - *bio_src = bi; - *offset = off; - - return chain; -out_err: - bio_chain_put(chain); - - return NULL; -} - -/* - * The default/initial value for all object request flags is 0. For - * each flag, once its value is set to 1 it is never reset to 0 - * again. - */ -static void obj_request_img_data_set(struct rbd_obj_request *obj_request) -{ - if (test_and_set_bit(OBJ_REQ_IMG_DATA, &obj_request->flags)) { - struct rbd_device *rbd_dev; - - rbd_dev = obj_request->img_request->rbd_dev; - rbd_warn(rbd_dev, "obj_request %p already marked img_data", - obj_request); - } -} - -static bool obj_request_img_data_test(struct rbd_obj_request *obj_request) -{ - smp_mb(); - return test_bit(OBJ_REQ_IMG_DATA, &obj_request->flags) != 0; -} - -static void obj_request_done_set(struct rbd_obj_request *obj_request) -{ - if (test_and_set_bit(OBJ_REQ_DONE, &obj_request->flags)) { - struct rbd_device *rbd_dev = NULL; - - if (obj_request_img_data_test(obj_request)) - rbd_dev = obj_request->img_request->rbd_dev; - rbd_warn(rbd_dev, "obj_request %p already marked done", - obj_request); + switch (obj_req->img_request->data_type) { + case OBJ_REQUEST_BIO: + zero_bios(&obj_req->bio_pos, off, bytes); + break; + case OBJ_REQUEST_BVECS: + case OBJ_REQUEST_OWN_BVECS: + zero_bvecs(&obj_req->bvec_pos, off, bytes); + break; + default: + rbd_assert(0); } } -static bool obj_request_done_test(struct rbd_obj_request *obj_request) -{ - smp_mb(); - return test_bit(OBJ_REQ_DONE, &obj_request->flags) != 0; -} - -/* - * This sets the KNOWN flag after (possibly) setting the EXISTS - * flag. The latter is set based on the "exists" value provided. - * - * Note that for our purposes once an object exists it never goes - * away again. It's possible that the response from two existence - * checks are separated by the creation of the target object, and - * the first ("doesn't exist") response arrives *after* the second - * ("does exist"). In that case we ignore the second one. - */ -static void obj_request_existence_set(struct rbd_obj_request *obj_request, - bool exists) -{ - if (exists) - set_bit(OBJ_REQ_EXISTS, &obj_request->flags); - set_bit(OBJ_REQ_KNOWN, &obj_request->flags); - smp_mb(); -} - -static bool obj_request_known_test(struct rbd_obj_request *obj_request) -{ - smp_mb(); - return test_bit(OBJ_REQ_KNOWN, &obj_request->flags) != 0; -} - -static bool obj_request_exists_test(struct rbd_obj_request *obj_request) -{ - smp_mb(); - return test_bit(OBJ_REQ_EXISTS, &obj_request->flags) != 0; -} - -static bool obj_request_overlaps_parent(struct rbd_obj_request *obj_request) -{ - struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev; - - return obj_request->img_offset < - round_up(rbd_dev->parent_overlap, rbd_obj_bytes(&rbd_dev->header)); -} - -static void rbd_obj_request_get(struct rbd_obj_request *obj_request) -{ - dout("%s: obj %p (was %d)\n", __func__, obj_request, - kref_read(&obj_request->kref)); - kref_get(&obj_request->kref); -} - static void rbd_obj_request_destroy(struct kref *kref); static void rbd_obj_request_put(struct rbd_obj_request *obj_request) { @@ -1505,18 +1296,13 @@ static void rbd_img_request_get(struct rbd_img_request *img_request) kref_get(&img_request->kref); } -static bool img_request_child_test(struct rbd_img_request *img_request); -static void rbd_parent_request_destroy(struct kref *kref); static void rbd_img_request_destroy(struct kref *kref); static void rbd_img_request_put(struct rbd_img_request *img_request) { rbd_assert(img_request != NULL); dout("%s: img %p (was %d)\n", __func__, img_request, kref_read(&img_request->kref)); - if (img_request_child_test(img_request)) - kref_put(&img_request->kref, rbd_parent_request_destroy); - else - kref_put(&img_request->kref, rbd_img_request_destroy); + kref_put(&img_request->kref, rbd_img_request_destroy); } static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request, @@ -1526,139 +1312,37 @@ static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request, /* Image request now owns object's original reference */ obj_request->img_request = img_request; - obj_request->which = img_request->obj_request_count; - rbd_assert(!obj_request_img_data_test(obj_request)); - obj_request_img_data_set(obj_request); - rbd_assert(obj_request->which != BAD_WHICH); img_request->obj_request_count++; - list_add_tail(&obj_request->links, &img_request->obj_requests); - dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request, - obj_request->which); + img_request->pending_count++; + dout("%s: img %p obj %p\n", __func__, img_request, obj_request); } static inline void rbd_img_obj_request_del(struct rbd_img_request *img_request, struct rbd_obj_request *obj_request) { - rbd_assert(obj_request->which != BAD_WHICH); - - dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request, - obj_request->which); - list_del(&obj_request->links); + dout("%s: img %p obj %p\n", __func__, img_request, obj_request); + list_del(&obj_request->ex.oe_item); rbd_assert(img_request->obj_request_count > 0); img_request->obj_request_count--; - rbd_assert(obj_request->which == img_request->obj_request_count); - obj_request->which = BAD_WHICH; - rbd_assert(obj_request_img_data_test(obj_request)); rbd_assert(obj_request->img_request == img_request); - obj_request->img_request = NULL; - obj_request->callback = NULL; rbd_obj_request_put(obj_request); } -static bool obj_request_type_valid(enum obj_request_type type) -{ - switch (type) { - case OBJ_REQUEST_NODATA: - case OBJ_REQUEST_BIO: - case OBJ_REQUEST_PAGES: - return true; - default: - return false; - } -} - -static void rbd_img_obj_callback(struct rbd_obj_request *obj_request); - static void rbd_obj_request_submit(struct rbd_obj_request *obj_request) { struct ceph_osd_request *osd_req = obj_request->osd_req; dout("%s %p object_no %016llx %llu~%llu osd_req %p\n", __func__, - obj_request, obj_request->object_no, obj_request->offset, - obj_request->length, osd_req); - if (obj_request_img_data_test(obj_request)) { - WARN_ON(obj_request->callback != rbd_img_obj_callback); - rbd_img_request_get(obj_request->img_request); - } + obj_request, obj_request->ex.oe_objno, obj_request->ex.oe_off, + obj_request->ex.oe_len, osd_req); ceph_osdc_start_request(osd_req->r_osdc, osd_req, false); } -static void rbd_img_request_complete(struct rbd_img_request *img_request) -{ - - dout("%s: img %p\n", __func__, img_request); - - /* - * If no error occurred, compute the aggregate transfer - * count for the image request. We could instead use - * atomic64_cmpxchg() to update it as each object request - * completes; not clear which way is better off hand. - */ - if (!img_request->result) { - struct rbd_obj_request *obj_request; - u64 xferred = 0; - - for_each_obj_request(img_request, obj_request) - xferred += obj_request->xferred; - img_request->xferred = xferred; - } - - if (img_request->callback) - img_request->callback(img_request); - else - rbd_img_request_put(img_request); -} - /* * The default/initial value for all image request flags is 0. Each * is conditionally set to 1 at image request initialization time * and currently never change thereafter. */ -static void img_request_write_set(struct rbd_img_request *img_request) -{ - set_bit(IMG_REQ_WRITE, &img_request->flags); - smp_mb(); -} - -static bool img_request_write_test(struct rbd_img_request *img_request) -{ - smp_mb(); - return test_bit(IMG_REQ_WRITE, &img_request->flags) != 0; -} - -/* - * Set the discard flag when the img_request is an discard request - */ -static void img_request_discard_set(struct rbd_img_request *img_request) -{ - set_bit(IMG_REQ_DISCARD, &img_request->flags); - smp_mb(); -} - -static bool img_request_discard_test(struct rbd_img_request *img_request) -{ - smp_mb(); - return test_bit(IMG_REQ_DISCARD, &img_request->flags) != 0; -} - -static void img_request_child_set(struct rbd_img_request *img_request) -{ - set_bit(IMG_REQ_CHILD, &img_request->flags); - smp_mb(); -} - -static void img_request_child_clear(struct rbd_img_request *img_request) -{ - clear_bit(IMG_REQ_CHILD, &img_request->flags); - smp_mb(); -} - -static bool img_request_child_test(struct rbd_img_request *img_request) -{ - smp_mb(); - return test_bit(IMG_REQ_CHILD, &img_request->flags) != 0; -} - static void img_request_layered_set(struct rbd_img_request *img_request) { set_bit(IMG_REQ_LAYERED, &img_request->flags); @@ -1677,209 +1361,70 @@ static bool img_request_layered_test(struct rbd_img_request *img_request) return test_bit(IMG_REQ_LAYERED, &img_request->flags) != 0; } -static enum obj_operation_type -rbd_img_request_op_type(struct rbd_img_request *img_request) -{ - if (img_request_write_test(img_request)) - return OBJ_OP_WRITE; - else if (img_request_discard_test(img_request)) - return OBJ_OP_DISCARD; - else - return OBJ_OP_READ; -} - -static void -rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request) -{ - u64 xferred = obj_request->xferred; - u64 length = obj_request->length; - - dout("%s: obj %p img %p result %d %llu/%llu\n", __func__, - obj_request, obj_request->img_request, obj_request->result, - xferred, length); - /* - * ENOENT means a hole in the image. We zero-fill the entire - * length of the request. A short read also implies zero-fill - * to the end of the request. An error requires the whole - * length of the request to be reported finished with an error - * to the block layer. In each case we update the xferred - * count to indicate the whole request was satisfied. - */ - rbd_assert(obj_request->type != OBJ_REQUEST_NODATA); - if (obj_request->result == -ENOENT) { - if (obj_request->type == OBJ_REQUEST_BIO) - zero_bio_chain(obj_request->bio_list, 0); - else - zero_pages(obj_request->pages, 0, length); - obj_request->result = 0; - } else if (xferred < length && !obj_request->result) { - if (obj_request->type == OBJ_REQUEST_BIO) - zero_bio_chain(obj_request->bio_list, xferred); - else - zero_pages(obj_request->pages, xferred, length); - } - obj_request->xferred = length; - obj_request_done_set(obj_request); -} - -static void rbd_obj_request_complete(struct rbd_obj_request *obj_request) +static bool rbd_obj_is_entire(struct rbd_obj_request *obj_req) { - dout("%s: obj %p cb %p\n", __func__, obj_request, - obj_request->callback); - obj_request->callback(obj_request); -} + struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; -static void rbd_obj_request_error(struct rbd_obj_request *obj_request, int err) -{ - obj_request->result = err; - obj_request->xferred = 0; - /* - * kludge - mirror rbd_obj_request_submit() to match a put in - * rbd_img_obj_callback() - */ - if (obj_request_img_data_test(obj_request)) { - WARN_ON(obj_request->callback != rbd_img_obj_callback); - rbd_img_request_get(obj_request->img_request); - } - obj_request_done_set(obj_request); - rbd_obj_request_complete(obj_request); + return !obj_req->ex.oe_off && + obj_req->ex.oe_len == rbd_dev->layout.object_size; } -static void rbd_osd_read_callback(struct rbd_obj_request *obj_request) +static bool rbd_obj_is_tail(struct rbd_obj_request *obj_req) { - struct rbd_img_request *img_request = NULL; - struct rbd_device *rbd_dev = NULL; - bool layered = false; - - if (obj_request_img_data_test(obj_request)) { - img_request = obj_request->img_request; - layered = img_request && img_request_layered_test(img_request); - rbd_dev = img_request->rbd_dev; - } - - dout("%s: obj %p img %p result %d %llu/%llu\n", __func__, - obj_request, img_request, obj_request->result, - obj_request->xferred, obj_request->length); - if (layered && obj_request->result == -ENOENT && - obj_request->img_offset < rbd_dev->parent_overlap) - rbd_img_parent_read(obj_request); - else if (img_request) - rbd_img_obj_request_read_callback(obj_request); - else - obj_request_done_set(obj_request); -} + struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; -static void rbd_osd_write_callback(struct rbd_obj_request *obj_request) -{ - dout("%s: obj %p result %d %llu\n", __func__, obj_request, - obj_request->result, obj_request->length); - /* - * There is no such thing as a successful short write. Set - * it to our originally-requested length. - */ - obj_request->xferred = obj_request->length; - obj_request_done_set(obj_request); + return obj_req->ex.oe_off + obj_req->ex.oe_len == + rbd_dev->layout.object_size; } -static void rbd_osd_discard_callback(struct rbd_obj_request *obj_request) +static u64 rbd_obj_img_extents_bytes(struct rbd_obj_request *obj_req) { - dout("%s: obj %p result %d %llu\n", __func__, obj_request, - obj_request->result, obj_request->length); - /* - * There is no such thing as a successful short discard. Set - * it to our originally-requested length. - */ - obj_request->xferred = obj_request->length; - /* discarding a non-existent object is not a problem */ - if (obj_request->result == -ENOENT) - obj_request->result = 0; - obj_request_done_set(obj_request); + return ceph_file_extents_bytes(obj_req->img_extents, + obj_req->num_img_extents); } -/* - * For a simple stat call there's nothing to do. We'll do more if - * this is part of a write sequence for a layered image. - */ -static void rbd_osd_stat_callback(struct rbd_obj_request *obj_request) +static bool rbd_img_is_write(struct rbd_img_request *img_req) { - dout("%s: obj %p\n", __func__, obj_request); - obj_request_done_set(obj_request); + switch (img_req->op_type) { + case OBJ_OP_READ: + return false; + case OBJ_OP_WRITE: + case OBJ_OP_DISCARD: + return true; + default: + rbd_assert(0); + } } -static void rbd_osd_call_callback(struct rbd_obj_request *obj_request) -{ - dout("%s: obj %p\n", __func__, obj_request); - - if (obj_request_img_data_test(obj_request)) - rbd_osd_copyup_callback(obj_request); - else - obj_request_done_set(obj_request); -} +static void rbd_obj_handle_request(struct rbd_obj_request *obj_req); static void rbd_osd_req_callback(struct ceph_osd_request *osd_req) { - struct rbd_obj_request *obj_request = osd_req->r_priv; - u16 opcode; + struct rbd_obj_request *obj_req = osd_req->r_priv; - dout("%s: osd_req %p\n", __func__, osd_req); - rbd_assert(osd_req == obj_request->osd_req); - if (obj_request_img_data_test(obj_request)) { - rbd_assert(obj_request->img_request); - rbd_assert(obj_request->which != BAD_WHICH); - } else { - rbd_assert(obj_request->which == BAD_WHICH); - } - - if (osd_req->r_result < 0) - obj_request->result = osd_req->r_result; - - /* - * We support a 64-bit length, but ultimately it has to be - * passed to the block layer, which just supports a 32-bit - * length field. - */ - obj_request->xferred = osd_req->r_ops[0].outdata_len; - rbd_assert(obj_request->xferred < (u64)UINT_MAX); + dout("%s osd_req %p result %d for obj_req %p\n", __func__, osd_req, + osd_req->r_result, obj_req); + rbd_assert(osd_req == obj_req->osd_req); - opcode = osd_req->r_ops[0].op; - switch (opcode) { - case CEPH_OSD_OP_READ: - rbd_osd_read_callback(obj_request); - break; - case CEPH_OSD_OP_SETALLOCHINT: - rbd_assert(osd_req->r_ops[1].op == CEPH_OSD_OP_WRITE || - osd_req->r_ops[1].op == CEPH_OSD_OP_WRITEFULL); - /* fall through */ - case CEPH_OSD_OP_WRITE: - case CEPH_OSD_OP_WRITEFULL: - rbd_osd_write_callback(obj_request); - break; - case CEPH_OSD_OP_STAT: - rbd_osd_stat_callback(obj_request); - break; - case CEPH_OSD_OP_DELETE: - case CEPH_OSD_OP_TRUNCATE: - case CEPH_OSD_OP_ZERO: - rbd_osd_discard_callback(obj_request); - break; - case CEPH_OSD_OP_CALL: - rbd_osd_call_callback(obj_request); - break; - default: - rbd_warn(NULL, "unexpected OSD op: object_no %016llx opcode %d", - obj_request->object_no, opcode); - break; - } + obj_req->result = osd_req->r_result < 0 ? osd_req->r_result : 0; + if (!obj_req->result && !rbd_img_is_write(obj_req->img_request)) + obj_req->xferred = osd_req->r_result; + else + /* + * Writes aren't allowed to return a data payload. In some + * guarded write cases (e.g. stat + zero on an empty object) + * a stat response makes it through, but we don't care. + */ + obj_req->xferred = 0; - if (obj_request_done_test(obj_request)) - rbd_obj_request_complete(obj_request); + rbd_obj_handle_request(obj_req); } static void rbd_osd_req_format_read(struct rbd_obj_request *obj_request) { struct ceph_osd_request *osd_req = obj_request->osd_req; - rbd_assert(obj_request_img_data_test(obj_request)); + osd_req->r_flags = CEPH_OSD_FLAG_READ; osd_req->r_snapid = obj_request->img_request->snap_id; } @@ -1887,32 +1432,33 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request) { struct ceph_osd_request *osd_req = obj_request->osd_req; + osd_req->r_flags = CEPH_OSD_FLAG_WRITE; ktime_get_real_ts(&osd_req->r_mtime); - osd_req->r_data_offset = obj_request->offset; + osd_req->r_data_offset = obj_request->ex.oe_off; } static struct ceph_osd_request * -__rbd_osd_req_create(struct rbd_device *rbd_dev, - struct ceph_snap_context *snapc, - int num_ops, unsigned int flags, - struct rbd_obj_request *obj_request) +rbd_osd_req_create(struct rbd_obj_request *obj_req, unsigned int num_ops) { + struct rbd_img_request *img_req = obj_req->img_request; + struct rbd_device *rbd_dev = img_req->rbd_dev; struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; struct ceph_osd_request *req; const char *name_format = rbd_dev->image_format == 1 ? RBD_V1_DATA_FORMAT : RBD_V2_DATA_FORMAT; - req = ceph_osdc_alloc_request(osdc, snapc, num_ops, false, GFP_NOIO); + req = ceph_osdc_alloc_request(osdc, + (rbd_img_is_write(img_req) ? img_req->snapc : NULL), + num_ops, false, GFP_NOIO); if (!req) return NULL; - req->r_flags = flags; req->r_callback = rbd_osd_req_callback; - req->r_priv = obj_request; + req->r_priv = obj_req; req->r_base_oloc.pool = rbd_dev->layout.pool_id; if (ceph_oid_aprintf(&req->r_base_oid, GFP_NOIO, name_format, - rbd_dev->header.object_prefix, obj_request->object_no)) + rbd_dev->header.object_prefix, obj_req->ex.oe_objno)) goto err_req; if (ceph_osdc_alloc_messages(req, GFP_NOIO)) @@ -1925,83 +1471,20 @@ err_req: return NULL; } -/* - * Create an osd request. A read request has one osd op (read). - * A write request has either one (watch) or two (hint+write) osd ops. - * (All rbd data writes are prefixed with an allocation hint op, but - * technically osd watch is a write request, hence this distinction.) - */ -static struct ceph_osd_request *rbd_osd_req_create( - struct rbd_device *rbd_dev, - enum obj_operation_type op_type, - unsigned int num_ops, - struct rbd_obj_request *obj_request) -{ - struct ceph_snap_context *snapc = NULL; - - if (obj_request_img_data_test(obj_request) && - (op_type == OBJ_OP_DISCARD || op_type == OBJ_OP_WRITE)) { - struct rbd_img_request *img_request = obj_request->img_request; - if (op_type == OBJ_OP_WRITE) { - rbd_assert(img_request_write_test(img_request)); - } else { - rbd_assert(img_request_discard_test(img_request)); - } - snapc = img_request->snapc; - } - - rbd_assert(num_ops == 1 || ((op_type == OBJ_OP_WRITE) && num_ops == 2)); - - return __rbd_osd_req_create(rbd_dev, snapc, num_ops, - (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD) ? - CEPH_OSD_FLAG_WRITE : CEPH_OSD_FLAG_READ, obj_request); -} - -/* - * Create a copyup osd request based on the information in the object - * request supplied. A copyup request has two or three osd ops, a - * copyup method call, potentially a hint op, and a write or truncate - * or zero op. - */ -static struct ceph_osd_request * -rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request) -{ - struct rbd_img_request *img_request; - int num_osd_ops = 3; - - rbd_assert(obj_request_img_data_test(obj_request)); - img_request = obj_request->img_request; - rbd_assert(img_request); - rbd_assert(img_request_write_test(img_request) || - img_request_discard_test(img_request)); - - if (img_request_discard_test(img_request)) - num_osd_ops = 2; - - return __rbd_osd_req_create(img_request->rbd_dev, - img_request->snapc, num_osd_ops, - CEPH_OSD_FLAG_WRITE, obj_request); -} - static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req) { ceph_osdc_put_request(osd_req); } -static struct rbd_obj_request * -rbd_obj_request_create(enum obj_request_type type) +static struct rbd_obj_request *rbd_obj_request_create(void) { struct rbd_obj_request *obj_request; - rbd_assert(obj_request_type_valid(type)); - obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_NOIO); if (!obj_request) return NULL; - obj_request->which = BAD_WHICH; - obj_request->type = type; - INIT_LIST_HEAD(&obj_request->links); + ceph_object_extent_init(&obj_request->ex); kref_init(&obj_request->kref); dout("%s %p\n", __func__, obj_request); @@ -2011,32 +1494,34 @@ rbd_obj_request_create(enum obj_request_type type) static void rbd_obj_request_destroy(struct kref *kref) { struct rbd_obj_request *obj_request; + u32 i; obj_request = container_of(kref, struct rbd_obj_request, kref); dout("%s: obj %p\n", __func__, obj_request); - rbd_assert(obj_request->img_request == NULL); - rbd_assert(obj_request->which == BAD_WHICH); - if (obj_request->osd_req) rbd_osd_req_destroy(obj_request->osd_req); - rbd_assert(obj_request_type_valid(obj_request->type)); - switch (obj_request->type) { + switch (obj_request->img_request->data_type) { case OBJ_REQUEST_NODATA: - break; /* Nothing to do */ case OBJ_REQUEST_BIO: - if (obj_request->bio_list) - bio_chain_put(obj_request->bio_list); - break; - case OBJ_REQUEST_PAGES: - /* img_data requests don't own their page array */ - if (obj_request->pages && - !obj_request_img_data_test(obj_request)) - ceph_release_page_vector(obj_request->pages, - obj_request->page_count); + case OBJ_REQUEST_BVECS: + break; /* Nothing to do */ + case OBJ_REQUEST_OWN_BVECS: + kfree(obj_request->bvec_pos.bvecs); break; + default: + rbd_assert(0); + } + + kfree(obj_request->img_extents); + if (obj_request->copyup_bvecs) { + for (i = 0; i < obj_request->copyup_bvec_count; i++) { + if (obj_request->copyup_bvecs[i].bv_page) + __free_page(obj_request->copyup_bvecs[i].bv_page); + } + kfree(obj_request->copyup_bvecs); } kmem_cache_free(rbd_obj_request_cache, obj_request); @@ -2111,7 +1596,6 @@ static bool rbd_dev_parent_get(struct rbd_device *rbd_dev) */ static struct rbd_img_request *rbd_img_request_create( struct rbd_device *rbd_dev, - u64 offset, u64 length, enum obj_operation_type op_type, struct ceph_snap_context *snapc) { @@ -2122,27 +1606,21 @@ static struct rbd_img_request *rbd_img_request_create( return NULL; img_request->rbd_dev = rbd_dev; - img_request->offset = offset; - img_request->length = length; - if (op_type == OBJ_OP_DISCARD) { - img_request_discard_set(img_request); - img_request->snapc = snapc; - } else if (op_type == OBJ_OP_WRITE) { - img_request_write_set(img_request); - img_request->snapc = snapc; - } else { + img_request->op_type = op_type; + if (!rbd_img_is_write(img_request)) img_request->snap_id = rbd_dev->spec->snap_id; - } + else + img_request->snapc = snapc; + if (rbd_dev_parent_get(rbd_dev)) img_request_layered_set(img_request); spin_lock_init(&img_request->completion_lock); - INIT_LIST_HEAD(&img_request->obj_requests); + INIT_LIST_HEAD(&img_request->object_extents); kref_init(&img_request->kref); - dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev, - obj_op_name(op_type), offset, length, img_request); - + dout("%s: rbd_dev %p %s -> img %p\n", __func__, rbd_dev, + obj_op_name(op_type), img_request); return img_request; } @@ -2165,829 +1643,934 @@ static void rbd_img_request_destroy(struct kref *kref) rbd_dev_parent_put(img_request->rbd_dev); } - if (img_request_write_test(img_request) || - img_request_discard_test(img_request)) + if (rbd_img_is_write(img_request)) ceph_put_snap_context(img_request->snapc); kmem_cache_free(rbd_img_request_cache, img_request); } -static struct rbd_img_request *rbd_parent_request_create( - struct rbd_obj_request *obj_request, - u64 img_offset, u64 length) +static void prune_extents(struct ceph_file_extent *img_extents, + u32 *num_img_extents, u64 overlap) { - struct rbd_img_request *parent_request; - struct rbd_device *rbd_dev; + u32 cnt = *num_img_extents; - rbd_assert(obj_request->img_request); - rbd_dev = obj_request->img_request->rbd_dev; + /* drop extents completely beyond the overlap */ + while (cnt && img_extents[cnt - 1].fe_off >= overlap) + cnt--; - parent_request = rbd_img_request_create(rbd_dev->parent, img_offset, - length, OBJ_OP_READ, NULL); - if (!parent_request) - return NULL; + if (cnt) { + struct ceph_file_extent *ex = &img_extents[cnt - 1]; - img_request_child_set(parent_request); - rbd_obj_request_get(obj_request); - parent_request->obj_request = obj_request; + /* trim final overlapping extent */ + if (ex->fe_off + ex->fe_len > overlap) + ex->fe_len = overlap - ex->fe_off; + } - return parent_request; + *num_img_extents = cnt; } -static void rbd_parent_request_destroy(struct kref *kref) +/* + * Determine the byte range(s) covered by either just the object extent + * or the entire object in the parent image. + */ +static int rbd_obj_calc_img_extents(struct rbd_obj_request *obj_req, + bool entire) { - struct rbd_img_request *parent_request; - struct rbd_obj_request *orig_request; + struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; + int ret; - parent_request = container_of(kref, struct rbd_img_request, kref); - orig_request = parent_request->obj_request; + if (!rbd_dev->parent_overlap) + return 0; - parent_request->obj_request = NULL; - rbd_obj_request_put(orig_request); - img_request_child_clear(parent_request); + ret = ceph_extent_to_file(&rbd_dev->layout, obj_req->ex.oe_objno, + entire ? 0 : obj_req->ex.oe_off, + entire ? rbd_dev->layout.object_size : + obj_req->ex.oe_len, + &obj_req->img_extents, + &obj_req->num_img_extents); + if (ret) + return ret; - rbd_img_request_destroy(kref); + prune_extents(obj_req->img_extents, &obj_req->num_img_extents, + rbd_dev->parent_overlap); + return 0; } -static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request) +static void rbd_osd_req_setup_data(struct rbd_obj_request *obj_req, u32 which) { - struct rbd_img_request *img_request; - unsigned int xferred; - int result; - bool more; - - rbd_assert(obj_request_img_data_test(obj_request)); - img_request = obj_request->img_request; - - rbd_assert(obj_request->xferred <= (u64)UINT_MAX); - xferred = (unsigned int)obj_request->xferred; - result = obj_request->result; - if (result) { - struct rbd_device *rbd_dev = img_request->rbd_dev; - enum obj_operation_type op_type; - - if (img_request_discard_test(img_request)) - op_type = OBJ_OP_DISCARD; - else if (img_request_write_test(img_request)) - op_type = OBJ_OP_WRITE; - else - op_type = OBJ_OP_READ; - - rbd_warn(rbd_dev, "%s %llx at %llx (%llx)", - obj_op_name(op_type), obj_request->length, - obj_request->img_offset, obj_request->offset); - rbd_warn(rbd_dev, " result %d xferred %x", - result, xferred); - if (!img_request->result) - img_request->result = result; - /* - * Need to end I/O on the entire obj_request worth of - * bytes in case of error. - */ - xferred = obj_request->length; + switch (obj_req->img_request->data_type) { + case OBJ_REQUEST_BIO: + osd_req_op_extent_osd_data_bio(obj_req->osd_req, which, + &obj_req->bio_pos, + obj_req->ex.oe_len); + break; + case OBJ_REQUEST_BVECS: + case OBJ_REQUEST_OWN_BVECS: + rbd_assert(obj_req->bvec_pos.iter.bi_size == + obj_req->ex.oe_len); + rbd_assert(obj_req->bvec_idx == obj_req->bvec_count); + osd_req_op_extent_osd_data_bvec_pos(obj_req->osd_req, which, + &obj_req->bvec_pos); + break; + default: + rbd_assert(0); } +} - if (img_request_child_test(img_request)) { - rbd_assert(img_request->obj_request != NULL); - more = obj_request->which < img_request->obj_request_count - 1; - } else { - blk_status_t status = errno_to_blk_status(result); +static int rbd_obj_setup_read(struct rbd_obj_request *obj_req) +{ + obj_req->osd_req = rbd_osd_req_create(obj_req, 1); + if (!obj_req->osd_req) + return -ENOMEM; - rbd_assert(img_request->rq != NULL); + osd_req_op_extent_init(obj_req->osd_req, 0, CEPH_OSD_OP_READ, + obj_req->ex.oe_off, obj_req->ex.oe_len, 0, 0); + rbd_osd_req_setup_data(obj_req, 0); - more = blk_update_request(img_request->rq, status, xferred); - if (!more) - __blk_mq_end_request(img_request->rq, status); - } + rbd_osd_req_format_read(obj_req); + return 0; +} + +static int __rbd_obj_setup_stat(struct rbd_obj_request *obj_req, + unsigned int which) +{ + struct page **pages; - return more; + /* + * The response data for a STAT call consists of: + * le64 length; + * struct { + * le32 tv_sec; + * le32 tv_nsec; + * } mtime; + */ + pages = ceph_alloc_page_vector(1, GFP_NOIO); + if (IS_ERR(pages)) + return PTR_ERR(pages); + + osd_req_op_init(obj_req->osd_req, which, CEPH_OSD_OP_STAT, 0); + osd_req_op_raw_data_in_pages(obj_req->osd_req, which, pages, + 8 + sizeof(struct ceph_timespec), + 0, false, true); + return 0; } -static void rbd_img_obj_callback(struct rbd_obj_request *obj_request) +static void __rbd_obj_setup_write(struct rbd_obj_request *obj_req, + unsigned int which) { - struct rbd_img_request *img_request; - u32 which = obj_request->which; - bool more = true; + struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; + u16 opcode; - rbd_assert(obj_request_img_data_test(obj_request)); - img_request = obj_request->img_request; + osd_req_op_alloc_hint_init(obj_req->osd_req, which++, + rbd_dev->layout.object_size, + rbd_dev->layout.object_size); - dout("%s: img %p obj %p\n", __func__, img_request, obj_request); - rbd_assert(img_request != NULL); - rbd_assert(img_request->obj_request_count > 0); - rbd_assert(which != BAD_WHICH); - rbd_assert(which < img_request->obj_request_count); + if (rbd_obj_is_entire(obj_req)) + opcode = CEPH_OSD_OP_WRITEFULL; + else + opcode = CEPH_OSD_OP_WRITE; - spin_lock_irq(&img_request->completion_lock); - if (which != img_request->next_completion) - goto out; + osd_req_op_extent_init(obj_req->osd_req, which, opcode, + obj_req->ex.oe_off, obj_req->ex.oe_len, 0, 0); + rbd_osd_req_setup_data(obj_req, which++); + + rbd_assert(which == obj_req->osd_req->r_num_ops); + rbd_osd_req_format_write(obj_req); +} - for_each_obj_request_from(img_request, obj_request) { - rbd_assert(more); - rbd_assert(which < img_request->obj_request_count); +static int rbd_obj_setup_write(struct rbd_obj_request *obj_req) +{ + unsigned int num_osd_ops, which = 0; + int ret; - if (!obj_request_done_test(obj_request)) - break; - more = rbd_img_obj_end_request(obj_request); - which++; + /* reverse map the entire object onto the parent */ + ret = rbd_obj_calc_img_extents(obj_req, true); + if (ret) + return ret; + + if (obj_req->num_img_extents) { + obj_req->write_state = RBD_OBJ_WRITE_GUARD; + num_osd_ops = 3; /* stat + setallochint + write/writefull */ + } else { + obj_req->write_state = RBD_OBJ_WRITE_FLAT; + num_osd_ops = 2; /* setallochint + write/writefull */ } - rbd_assert(more ^ (which == img_request->obj_request_count)); - img_request->next_completion = which; -out: - spin_unlock_irq(&img_request->completion_lock); - rbd_img_request_put(img_request); + obj_req->osd_req = rbd_osd_req_create(obj_req, num_osd_ops); + if (!obj_req->osd_req) + return -ENOMEM; - if (!more) - rbd_img_request_complete(img_request); + if (obj_req->num_img_extents) { + ret = __rbd_obj_setup_stat(obj_req, which++); + if (ret) + return ret; + } + + __rbd_obj_setup_write(obj_req, which); + return 0; } -/* - * Add individual osd ops to the given ceph_osd_request and prepare - * them for submission. num_ops is the current number of - * osd operations already to the object request. - */ -static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request, - struct ceph_osd_request *osd_request, - enum obj_operation_type op_type, - unsigned int num_ops) -{ - struct rbd_img_request *img_request = obj_request->img_request; - struct rbd_device *rbd_dev = img_request->rbd_dev; - u64 object_size = rbd_obj_bytes(&rbd_dev->header); - u64 offset = obj_request->offset; - u64 length = obj_request->length; - u64 img_end; +static void __rbd_obj_setup_discard(struct rbd_obj_request *obj_req, + unsigned int which) +{ u16 opcode; - if (op_type == OBJ_OP_DISCARD) { - if (!offset && length == object_size && - (!img_request_layered_test(img_request) || - !obj_request_overlaps_parent(obj_request))) { - opcode = CEPH_OSD_OP_DELETE; - } else if ((offset + length == object_size)) { + if (rbd_obj_is_entire(obj_req)) { + if (obj_req->num_img_extents) { + osd_req_op_init(obj_req->osd_req, which++, + CEPH_OSD_OP_CREATE, 0); opcode = CEPH_OSD_OP_TRUNCATE; } else { - down_read(&rbd_dev->header_rwsem); - img_end = rbd_dev->header.image_size; - up_read(&rbd_dev->header_rwsem); - - if (obj_request->img_offset + length == img_end) - opcode = CEPH_OSD_OP_TRUNCATE; - else - opcode = CEPH_OSD_OP_ZERO; + osd_req_op_init(obj_req->osd_req, which++, + CEPH_OSD_OP_DELETE, 0); + opcode = 0; } - } else if (op_type == OBJ_OP_WRITE) { - if (!offset && length == object_size) - opcode = CEPH_OSD_OP_WRITEFULL; - else - opcode = CEPH_OSD_OP_WRITE; - osd_req_op_alloc_hint_init(osd_request, num_ops, - object_size, object_size); - num_ops++; + } else if (rbd_obj_is_tail(obj_req)) { + opcode = CEPH_OSD_OP_TRUNCATE; } else { - opcode = CEPH_OSD_OP_READ; + opcode = CEPH_OSD_OP_ZERO; } - if (opcode == CEPH_OSD_OP_DELETE) - osd_req_op_init(osd_request, num_ops, opcode, 0); - else - osd_req_op_extent_init(osd_request, num_ops, opcode, - offset, length, 0, 0); - - if (obj_request->type == OBJ_REQUEST_BIO) - osd_req_op_extent_osd_data_bio(osd_request, num_ops, - obj_request->bio_list, length); - else if (obj_request->type == OBJ_REQUEST_PAGES) - osd_req_op_extent_osd_data_pages(osd_request, num_ops, - obj_request->pages, length, - offset & ~PAGE_MASK, false, false); - - /* Discards are also writes */ - if (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD) - rbd_osd_req_format_write(obj_request); - else - rbd_osd_req_format_read(obj_request); + if (opcode) + osd_req_op_extent_init(obj_req->osd_req, which++, opcode, + obj_req->ex.oe_off, obj_req->ex.oe_len, + 0, 0); + + rbd_assert(which == obj_req->osd_req->r_num_ops); + rbd_osd_req_format_write(obj_req); } -/* - * Split up an image request into one or more object requests, each - * to a different object. The "type" parameter indicates whether - * "data_desc" is the pointer to the head of a list of bio - * structures, or the base of a page array. In either case this - * function assumes data_desc describes memory sufficient to hold - * all data described by the image request. - */ -static int rbd_img_request_fill(struct rbd_img_request *img_request, - enum obj_request_type type, - void *data_desc) +static int rbd_obj_setup_discard(struct rbd_obj_request *obj_req) { - struct rbd_device *rbd_dev = img_request->rbd_dev; - struct rbd_obj_request *obj_request = NULL; - struct rbd_obj_request *next_obj_request; - struct bio *bio_list = NULL; - unsigned int bio_offset = 0; - struct page **pages = NULL; - enum obj_operation_type op_type; - u64 img_offset; - u64 resid; - - dout("%s: img %p type %d data_desc %p\n", __func__, img_request, - (int)type, data_desc); + unsigned int num_osd_ops, which = 0; + int ret; - img_offset = img_request->offset; - resid = img_request->length; - rbd_assert(resid > 0); - op_type = rbd_img_request_op_type(img_request); + /* reverse map the entire object onto the parent */ + ret = rbd_obj_calc_img_extents(obj_req, true); + if (ret) + return ret; - if (type == OBJ_REQUEST_BIO) { - bio_list = data_desc; - rbd_assert(img_offset == - bio_list->bi_iter.bi_sector << SECTOR_SHIFT); - } else if (type == OBJ_REQUEST_PAGES) { - pages = data_desc; + if (rbd_obj_is_entire(obj_req)) { + obj_req->write_state = RBD_OBJ_WRITE_FLAT; + if (obj_req->num_img_extents) + num_osd_ops = 2; /* create + truncate */ + else + num_osd_ops = 1; /* delete */ + } else { + if (obj_req->num_img_extents) { + obj_req->write_state = RBD_OBJ_WRITE_GUARD; + num_osd_ops = 2; /* stat + truncate/zero */ + } else { + obj_req->write_state = RBD_OBJ_WRITE_FLAT; + num_osd_ops = 1; /* truncate/zero */ + } } - while (resid) { - struct ceph_osd_request *osd_req; - u64 object_no = img_offset >> rbd_dev->header.obj_order; - u64 offset = rbd_segment_offset(rbd_dev, img_offset); - u64 length = rbd_segment_length(rbd_dev, img_offset, resid); - - obj_request = rbd_obj_request_create(type); - if (!obj_request) - goto out_unwind; - - obj_request->object_no = object_no; - obj_request->offset = offset; - obj_request->length = length; - - /* - * set obj_request->img_request before creating the - * osd_request so that it gets the right snapc - */ - rbd_img_obj_request_add(img_request, obj_request); - - if (type == OBJ_REQUEST_BIO) { - unsigned int clone_size; - - rbd_assert(length <= (u64)UINT_MAX); - clone_size = (unsigned int)length; - obj_request->bio_list = - bio_chain_clone_range(&bio_list, - &bio_offset, - clone_size, - GFP_NOIO); - if (!obj_request->bio_list) - goto out_unwind; - } else if (type == OBJ_REQUEST_PAGES) { - unsigned int page_count; - - obj_request->pages = pages; - page_count = (u32)calc_pages_for(offset, length); - obj_request->page_count = page_count; - if ((offset + length) & ~PAGE_MASK) - page_count--; /* more on last page */ - pages += page_count; - } + obj_req->osd_req = rbd_osd_req_create(obj_req, num_osd_ops); + if (!obj_req->osd_req) + return -ENOMEM; - osd_req = rbd_osd_req_create(rbd_dev, op_type, - (op_type == OBJ_OP_WRITE) ? 2 : 1, - obj_request); - if (!osd_req) - goto out_unwind; + if (!rbd_obj_is_entire(obj_req) && obj_req->num_img_extents) { + ret = __rbd_obj_setup_stat(obj_req, which++); + if (ret) + return ret; + } - obj_request->osd_req = osd_req; - obj_request->callback = rbd_img_obj_callback; - obj_request->img_offset = img_offset; + __rbd_obj_setup_discard(obj_req, which); + return 0; +} - rbd_img_obj_request_fill(obj_request, osd_req, op_type, 0); +/* + * For each object request in @img_req, allocate an OSD request, add + * individual OSD ops and prepare them for submission. The number of + * OSD ops depends on op_type and the overlap point (if any). + */ +static int __rbd_img_fill_request(struct rbd_img_request *img_req) +{ + struct rbd_obj_request *obj_req; + int ret; - img_offset += length; - resid -= length; + for_each_obj_request(img_req, obj_req) { + switch (img_req->op_type) { + case OBJ_OP_READ: + ret = rbd_obj_setup_read(obj_req); + break; + case OBJ_OP_WRITE: + ret = rbd_obj_setup_write(obj_req); + break; + case OBJ_OP_DISCARD: + ret = rbd_obj_setup_discard(obj_req); + break; + default: + rbd_assert(0); + } + if (ret) + return ret; } return 0; +} -out_unwind: - for_each_obj_request_safe(img_request, obj_request, next_obj_request) - rbd_img_obj_request_del(img_request, obj_request); +union rbd_img_fill_iter { + struct ceph_bio_iter bio_iter; + struct ceph_bvec_iter bvec_iter; +}; - return -ENOMEM; -} +struct rbd_img_fill_ctx { + enum obj_request_type pos_type; + union rbd_img_fill_iter *pos; + union rbd_img_fill_iter iter; + ceph_object_extent_fn_t set_pos_fn; + ceph_object_extent_fn_t count_fn; + ceph_object_extent_fn_t copy_fn; +}; -static void -rbd_osd_copyup_callback(struct rbd_obj_request *obj_request) +static struct ceph_object_extent *alloc_object_extent(void *arg) { - struct rbd_img_request *img_request; - struct rbd_device *rbd_dev; - struct page **pages; - u32 page_count; + struct rbd_img_request *img_req = arg; + struct rbd_obj_request *obj_req; - dout("%s: obj %p\n", __func__, obj_request); + obj_req = rbd_obj_request_create(); + if (!obj_req) + return NULL; - rbd_assert(obj_request->type == OBJ_REQUEST_BIO || - obj_request->type == OBJ_REQUEST_NODATA); - rbd_assert(obj_request_img_data_test(obj_request)); - img_request = obj_request->img_request; - rbd_assert(img_request); + rbd_img_obj_request_add(img_req, obj_req); + return &obj_req->ex; +} - rbd_dev = img_request->rbd_dev; - rbd_assert(rbd_dev); +/* + * While su != os && sc == 1 is technically not fancy (it's the same + * layout as su == os && sc == 1), we can't use the nocopy path for it + * because ->set_pos_fn() should be called only once per object. + * ceph_file_to_extents() invokes action_fn once per stripe unit, so + * treat su != os && sc == 1 as fancy. + */ +static bool rbd_layout_is_fancy(struct ceph_file_layout *l) +{ + return l->stripe_unit != l->object_size; +} - pages = obj_request->copyup_pages; - rbd_assert(pages != NULL); - obj_request->copyup_pages = NULL; - page_count = obj_request->copyup_page_count; - rbd_assert(page_count); - obj_request->copyup_page_count = 0; - ceph_release_page_vector(pages, page_count); +static int rbd_img_fill_request_nocopy(struct rbd_img_request *img_req, + struct ceph_file_extent *img_extents, + u32 num_img_extents, + struct rbd_img_fill_ctx *fctx) +{ + u32 i; + int ret; + + img_req->data_type = fctx->pos_type; /* - * We want the transfer count to reflect the size of the - * original write request. There is no such thing as a - * successful short write, so if the request was successful - * we can just set it to the originally-requested length. + * Create object requests and set each object request's starting + * position in the provided bio (list) or bio_vec array. */ - if (!obj_request->result) - obj_request->xferred = obj_request->length; + fctx->iter = *fctx->pos; + for (i = 0; i < num_img_extents; i++) { + ret = ceph_file_to_extents(&img_req->rbd_dev->layout, + img_extents[i].fe_off, + img_extents[i].fe_len, + &img_req->object_extents, + alloc_object_extent, img_req, + fctx->set_pos_fn, &fctx->iter); + if (ret) + return ret; + } - obj_request_done_set(obj_request); + return __rbd_img_fill_request(img_req); } -static void -rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request) +/* + * Map a list of image extents to a list of object extents, create the + * corresponding object requests (normally each to a different object, + * but not always) and add them to @img_req. For each object request, + * set up its data descriptor to point to the corresponding chunk(s) of + * @fctx->pos data buffer. + * + * Because ceph_file_to_extents() will merge adjacent object extents + * together, each object request's data descriptor may point to multiple + * different chunks of @fctx->pos data buffer. + * + * @fctx->pos data buffer is assumed to be large enough. + */ +static int rbd_img_fill_request(struct rbd_img_request *img_req, + struct ceph_file_extent *img_extents, + u32 num_img_extents, + struct rbd_img_fill_ctx *fctx) { - struct rbd_obj_request *orig_request; - struct ceph_osd_request *osd_req; - struct rbd_device *rbd_dev; - struct page **pages; - enum obj_operation_type op_type; - u32 page_count; - int img_result; - u64 parent_length; - - rbd_assert(img_request_child_test(img_request)); - - /* First get what we need from the image request */ - - pages = img_request->copyup_pages; - rbd_assert(pages != NULL); - img_request->copyup_pages = NULL; - page_count = img_request->copyup_page_count; - rbd_assert(page_count); - img_request->copyup_page_count = 0; - - orig_request = img_request->obj_request; - rbd_assert(orig_request != NULL); - rbd_assert(obj_request_type_valid(orig_request->type)); - img_result = img_request->result; - parent_length = img_request->length; - rbd_assert(img_result || parent_length == img_request->xferred); - rbd_img_request_put(img_request); + struct rbd_device *rbd_dev = img_req->rbd_dev; + struct rbd_obj_request *obj_req; + u32 i; + int ret; - rbd_assert(orig_request->img_request); - rbd_dev = orig_request->img_request->rbd_dev; - rbd_assert(rbd_dev); + if (fctx->pos_type == OBJ_REQUEST_NODATA || + !rbd_layout_is_fancy(&rbd_dev->layout)) + return rbd_img_fill_request_nocopy(img_req, img_extents, + num_img_extents, fctx); + + img_req->data_type = OBJ_REQUEST_OWN_BVECS; /* - * If the overlap has become 0 (most likely because the - * image has been flattened) we need to free the pages - * and re-submit the original write request. + * Create object requests and determine ->bvec_count for each object + * request. Note that ->bvec_count sum over all object requests may + * be greater than the number of bio_vecs in the provided bio (list) + * or bio_vec array because when mapped, those bio_vecs can straddle + * stripe unit boundaries. */ - if (!rbd_dev->parent_overlap) { - ceph_release_page_vector(pages, page_count); - rbd_obj_request_submit(orig_request); - return; + fctx->iter = *fctx->pos; + for (i = 0; i < num_img_extents; i++) { + ret = ceph_file_to_extents(&rbd_dev->layout, + img_extents[i].fe_off, + img_extents[i].fe_len, + &img_req->object_extents, + alloc_object_extent, img_req, + fctx->count_fn, &fctx->iter); + if (ret) + return ret; } - if (img_result) - goto out_err; + for_each_obj_request(img_req, obj_req) { + obj_req->bvec_pos.bvecs = kmalloc_array(obj_req->bvec_count, + sizeof(*obj_req->bvec_pos.bvecs), + GFP_NOIO); + if (!obj_req->bvec_pos.bvecs) + return -ENOMEM; + } /* - * The original osd request is of no use to use any more. - * We need a new one that can hold the three ops in a copyup - * request. Allocate the new copyup osd request for the - * original request, and release the old one. + * Fill in each object request's private bio_vec array, splitting and + * rearranging the provided bio_vecs in stripe unit chunks as needed. */ - img_result = -ENOMEM; - osd_req = rbd_osd_req_create_copyup(orig_request); - if (!osd_req) - goto out_err; - rbd_osd_req_destroy(orig_request->osd_req); - orig_request->osd_req = osd_req; - orig_request->copyup_pages = pages; - orig_request->copyup_page_count = page_count; + fctx->iter = *fctx->pos; + for (i = 0; i < num_img_extents; i++) { + ret = ceph_iterate_extents(&rbd_dev->layout, + img_extents[i].fe_off, + img_extents[i].fe_len, + &img_req->object_extents, + fctx->copy_fn, &fctx->iter); + if (ret) + return ret; + } - /* Initialize the copyup op */ + return __rbd_img_fill_request(img_req); +} - osd_req_op_cls_init(osd_req, 0, CEPH_OSD_OP_CALL, "rbd", "copyup"); - osd_req_op_cls_request_data_pages(osd_req, 0, pages, parent_length, 0, - false, false); +static int rbd_img_fill_nodata(struct rbd_img_request *img_req, + u64 off, u64 len) +{ + struct ceph_file_extent ex = { off, len }; + union rbd_img_fill_iter dummy; + struct rbd_img_fill_ctx fctx = { + .pos_type = OBJ_REQUEST_NODATA, + .pos = &dummy, + }; - /* Add the other op(s) */ + return rbd_img_fill_request(img_req, &ex, 1, &fctx); +} - op_type = rbd_img_request_op_type(orig_request->img_request); - rbd_img_obj_request_fill(orig_request, osd_req, op_type, 1); +static void set_bio_pos(struct ceph_object_extent *ex, u32 bytes, void *arg) +{ + struct rbd_obj_request *obj_req = + container_of(ex, struct rbd_obj_request, ex); + struct ceph_bio_iter *it = arg; - /* All set, send it off. */ + dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes); + obj_req->bio_pos = *it; + ceph_bio_iter_advance(it, bytes); +} - rbd_obj_request_submit(orig_request); - return; +static void count_bio_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg) +{ + struct rbd_obj_request *obj_req = + container_of(ex, struct rbd_obj_request, ex); + struct ceph_bio_iter *it = arg; + + dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes); + ceph_bio_iter_advance_step(it, bytes, ({ + obj_req->bvec_count++; + })); -out_err: - ceph_release_page_vector(pages, page_count); - rbd_obj_request_error(orig_request, img_result); } -/* - * Read from the parent image the range of data that covers the - * entire target of the given object request. This is used for - * satisfying a layered image write request when the target of an - * object request from the image request does not exist. - * - * A page array big enough to hold the returned data is allocated - * and supplied to rbd_img_request_fill() as the "data descriptor." - * When the read completes, this page array will be transferred to - * the original object request for the copyup operation. - * - * If an error occurs, it is recorded as the result of the original - * object request in rbd_img_obj_exists_callback(). - */ -static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request) -{ - struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev; - struct rbd_img_request *parent_request = NULL; - u64 img_offset; - u64 length; - struct page **pages = NULL; - u32 page_count; - int result; +static void copy_bio_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg) +{ + struct rbd_obj_request *obj_req = + container_of(ex, struct rbd_obj_request, ex); + struct ceph_bio_iter *it = arg; - rbd_assert(rbd_dev->parent != NULL); + dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes); + ceph_bio_iter_advance_step(it, bytes, ({ + obj_req->bvec_pos.bvecs[obj_req->bvec_idx++] = bv; + obj_req->bvec_pos.iter.bi_size += bv.bv_len; + })); +} - /* - * Determine the byte range covered by the object in the - * child image to which the original request was to be sent. - */ - img_offset = obj_request->img_offset - obj_request->offset; - length = rbd_obj_bytes(&rbd_dev->header); +static int __rbd_img_fill_from_bio(struct rbd_img_request *img_req, + struct ceph_file_extent *img_extents, + u32 num_img_extents, + struct ceph_bio_iter *bio_pos) +{ + struct rbd_img_fill_ctx fctx = { + .pos_type = OBJ_REQUEST_BIO, + .pos = (union rbd_img_fill_iter *)bio_pos, + .set_pos_fn = set_bio_pos, + .count_fn = count_bio_bvecs, + .copy_fn = copy_bio_bvecs, + }; - /* - * There is no defined parent data beyond the parent - * overlap, so limit what we read at that boundary if - * necessary. - */ - if (img_offset + length > rbd_dev->parent_overlap) { - rbd_assert(img_offset < rbd_dev->parent_overlap); - length = rbd_dev->parent_overlap - img_offset; - } + return rbd_img_fill_request(img_req, img_extents, num_img_extents, + &fctx); +} - /* - * Allocate a page array big enough to receive the data read - * from the parent. - */ - page_count = (u32)calc_pages_for(0, length); - pages = ceph_alloc_page_vector(page_count, GFP_NOIO); - if (IS_ERR(pages)) { - result = PTR_ERR(pages); - pages = NULL; - goto out_err; - } +static int rbd_img_fill_from_bio(struct rbd_img_request *img_req, + u64 off, u64 len, struct bio *bio) +{ + struct ceph_file_extent ex = { off, len }; + struct ceph_bio_iter it = { .bio = bio, .iter = bio->bi_iter }; - result = -ENOMEM; - parent_request = rbd_parent_request_create(obj_request, - img_offset, length); - if (!parent_request) - goto out_err; + return __rbd_img_fill_from_bio(img_req, &ex, 1, &it); +} - result = rbd_img_request_fill(parent_request, OBJ_REQUEST_PAGES, pages); - if (result) - goto out_err; +static void set_bvec_pos(struct ceph_object_extent *ex, u32 bytes, void *arg) +{ + struct rbd_obj_request *obj_req = + container_of(ex, struct rbd_obj_request, ex); + struct ceph_bvec_iter *it = arg; - parent_request->copyup_pages = pages; - parent_request->copyup_page_count = page_count; - parent_request->callback = rbd_img_obj_parent_read_full_callback; + obj_req->bvec_pos = *it; + ceph_bvec_iter_shorten(&obj_req->bvec_pos, bytes); + ceph_bvec_iter_advance(it, bytes); +} - result = rbd_img_request_submit(parent_request); - if (!result) - return 0; +static void count_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg) +{ + struct rbd_obj_request *obj_req = + container_of(ex, struct rbd_obj_request, ex); + struct ceph_bvec_iter *it = arg; - parent_request->copyup_pages = NULL; - parent_request->copyup_page_count = 0; -out_err: - if (pages) - ceph_release_page_vector(pages, page_count); - if (parent_request) - rbd_img_request_put(parent_request); - return result; + ceph_bvec_iter_advance_step(it, bytes, ({ + obj_req->bvec_count++; + })); } -static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request) +static void copy_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg) { - struct rbd_obj_request *orig_request; - struct rbd_device *rbd_dev; - int result; + struct rbd_obj_request *obj_req = + container_of(ex, struct rbd_obj_request, ex); + struct ceph_bvec_iter *it = arg; - rbd_assert(!obj_request_img_data_test(obj_request)); + ceph_bvec_iter_advance_step(it, bytes, ({ + obj_req->bvec_pos.bvecs[obj_req->bvec_idx++] = bv; + obj_req->bvec_pos.iter.bi_size += bv.bv_len; + })); +} - /* - * All we need from the object request is the original - * request and the result of the STAT op. Grab those, then - * we're done with the request. - */ - orig_request = obj_request->obj_request; - obj_request->obj_request = NULL; - rbd_obj_request_put(orig_request); - rbd_assert(orig_request); - rbd_assert(orig_request->img_request); - - result = obj_request->result; - obj_request->result = 0; - - dout("%s: obj %p for obj %p result %d %llu/%llu\n", __func__, - obj_request, orig_request, result, - obj_request->xferred, obj_request->length); - rbd_obj_request_put(obj_request); +static int __rbd_img_fill_from_bvecs(struct rbd_img_request *img_req, + struct ceph_file_extent *img_extents, + u32 num_img_extents, + struct ceph_bvec_iter *bvec_pos) +{ + struct rbd_img_fill_ctx fctx = { + .pos_type = OBJ_REQUEST_BVECS, + .pos = (union rbd_img_fill_iter *)bvec_pos, + .set_pos_fn = set_bvec_pos, + .count_fn = count_bvecs, + .copy_fn = copy_bvecs, + }; - /* - * If the overlap has become 0 (most likely because the - * image has been flattened) we need to re-submit the - * original request. - */ - rbd_dev = orig_request->img_request->rbd_dev; - if (!rbd_dev->parent_overlap) { - rbd_obj_request_submit(orig_request); - return; - } + return rbd_img_fill_request(img_req, img_extents, num_img_extents, + &fctx); +} - /* - * Our only purpose here is to determine whether the object - * exists, and we don't want to treat the non-existence as - * an error. If something else comes back, transfer the - * error to the original request and complete it now. - */ - if (!result) { - obj_request_existence_set(orig_request, true); - } else if (result == -ENOENT) { - obj_request_existence_set(orig_request, false); - } else { - goto fail_orig_request; - } +static int rbd_img_fill_from_bvecs(struct rbd_img_request *img_req, + struct ceph_file_extent *img_extents, + u32 num_img_extents, + struct bio_vec *bvecs) +{ + struct ceph_bvec_iter it = { + .bvecs = bvecs, + .iter = { .bi_size = ceph_file_extents_bytes(img_extents, + num_img_extents) }, + }; - /* - * Resubmit the original request now that we have recorded - * whether the target object exists. - */ - result = rbd_img_obj_request_submit(orig_request); - if (result) - goto fail_orig_request; + return __rbd_img_fill_from_bvecs(img_req, img_extents, num_img_extents, + &it); +} - return; +static void rbd_img_request_submit(struct rbd_img_request *img_request) +{ + struct rbd_obj_request *obj_request; + + dout("%s: img %p\n", __func__, img_request); + + rbd_img_request_get(img_request); + for_each_obj_request(img_request, obj_request) + rbd_obj_request_submit(obj_request); -fail_orig_request: - rbd_obj_request_error(orig_request, result); + rbd_img_request_put(img_request); } -static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request) +static int rbd_obj_read_from_parent(struct rbd_obj_request *obj_req) { - struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev; - struct rbd_obj_request *stat_request; - struct page **pages; - u32 page_count; - size_t size; + struct rbd_img_request *img_req = obj_req->img_request; + struct rbd_img_request *child_img_req; int ret; - stat_request = rbd_obj_request_create(OBJ_REQUEST_PAGES); - if (!stat_request) + child_img_req = rbd_img_request_create(img_req->rbd_dev->parent, + OBJ_OP_READ, NULL); + if (!child_img_req) return -ENOMEM; - stat_request->object_no = obj_request->object_no; + __set_bit(IMG_REQ_CHILD, &child_img_req->flags); + child_img_req->obj_request = obj_req; - stat_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1, - stat_request); - if (!stat_request->osd_req) { - ret = -ENOMEM; - goto fail_stat_request; + if (!rbd_img_is_write(img_req)) { + switch (img_req->data_type) { + case OBJ_REQUEST_BIO: + ret = __rbd_img_fill_from_bio(child_img_req, + obj_req->img_extents, + obj_req->num_img_extents, + &obj_req->bio_pos); + break; + case OBJ_REQUEST_BVECS: + case OBJ_REQUEST_OWN_BVECS: + ret = __rbd_img_fill_from_bvecs(child_img_req, + obj_req->img_extents, + obj_req->num_img_extents, + &obj_req->bvec_pos); + break; + default: + rbd_assert(0); + } + } else { + ret = rbd_img_fill_from_bvecs(child_img_req, + obj_req->img_extents, + obj_req->num_img_extents, + obj_req->copyup_bvecs); + } + if (ret) { + rbd_img_request_put(child_img_req); + return ret; + } + + rbd_img_request_submit(child_img_req); + return 0; +} + +static bool rbd_obj_handle_read(struct rbd_obj_request *obj_req) +{ + struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; + int ret; + + if (obj_req->result == -ENOENT && + rbd_dev->parent_overlap && !obj_req->tried_parent) { + /* reverse map this object extent onto the parent */ + ret = rbd_obj_calc_img_extents(obj_req, false); + if (ret) { + obj_req->result = ret; + return true; + } + + if (obj_req->num_img_extents) { + obj_req->tried_parent = true; + ret = rbd_obj_read_from_parent(obj_req); + if (ret) { + obj_req->result = ret; + return true; + } + return false; + } } /* - * The response data for a STAT call consists of: - * le64 length; - * struct { - * le32 tv_sec; - * le32 tv_nsec; - * } mtime; + * -ENOENT means a hole in the image -- zero-fill the entire + * length of the request. A short read also implies zero-fill + * to the end of the request. In both cases we update xferred + * count to indicate the whole request was satisfied. */ - size = sizeof (__le64) + sizeof (__le32) + sizeof (__le32); - page_count = (u32)calc_pages_for(0, size); - pages = ceph_alloc_page_vector(page_count, GFP_NOIO); - if (IS_ERR(pages)) { - ret = PTR_ERR(pages); - goto fail_stat_request; + if (obj_req->result == -ENOENT || + (!obj_req->result && obj_req->xferred < obj_req->ex.oe_len)) { + rbd_assert(!obj_req->xferred || !obj_req->result); + rbd_obj_zero_range(obj_req, obj_req->xferred, + obj_req->ex.oe_len - obj_req->xferred); + obj_req->result = 0; + obj_req->xferred = obj_req->ex.oe_len; } - osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT, 0); - osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0, - false, false); - - rbd_obj_request_get(obj_request); - stat_request->obj_request = obj_request; - stat_request->pages = pages; - stat_request->page_count = page_count; - stat_request->callback = rbd_img_obj_exists_callback; + return true; +} - rbd_obj_request_submit(stat_request); - return 0; +/* + * copyup_bvecs pages are never highmem pages + */ +static bool is_zero_bvecs(struct bio_vec *bvecs, u32 bytes) +{ + struct ceph_bvec_iter it = { + .bvecs = bvecs, + .iter = { .bi_size = bytes }, + }; -fail_stat_request: - rbd_obj_request_put(stat_request); - return ret; + ceph_bvec_iter_advance_step(&it, bytes, ({ + if (memchr_inv(page_address(bv.bv_page) + bv.bv_offset, 0, + bv.bv_len)) + return false; + })); + return true; } -static bool img_obj_request_simple(struct rbd_obj_request *obj_request) +static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes) { - struct rbd_img_request *img_request = obj_request->img_request; - struct rbd_device *rbd_dev = img_request->rbd_dev; + unsigned int num_osd_ops = obj_req->osd_req->r_num_ops; - /* Reads */ - if (!img_request_write_test(img_request) && - !img_request_discard_test(img_request)) - return true; - - /* Non-layered writes */ - if (!img_request_layered_test(img_request)) - return true; + dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes); + rbd_assert(obj_req->osd_req->r_ops[0].op == CEPH_OSD_OP_STAT); + rbd_osd_req_destroy(obj_req->osd_req); /* - * Layered writes outside of the parent overlap range don't - * share any data with the parent. + * Create a copyup request with the same number of OSD ops as + * the original request. The original request was stat + op(s), + * the new copyup request will be copyup + the same op(s). */ - if (!obj_request_overlaps_parent(obj_request)) - return true; + obj_req->osd_req = rbd_osd_req_create(obj_req, num_osd_ops); + if (!obj_req->osd_req) + return -ENOMEM; /* - * Entire-object layered writes - we will overwrite whatever - * parent data there is anyway. + * Only send non-zero copyup data to save some I/O and network + * bandwidth -- zero copyup data is equivalent to the object not + * existing. */ - if (!obj_request->offset && - obj_request->length == rbd_obj_bytes(&rbd_dev->header)) - return true; + if (is_zero_bvecs(obj_req->copyup_bvecs, bytes)) { + dout("%s obj_req %p detected zeroes\n", __func__, obj_req); + bytes = 0; + } - /* - * If the object is known to already exist, its parent data has - * already been copied. - */ - if (obj_request_known_test(obj_request) && - obj_request_exists_test(obj_request)) - return true; + osd_req_op_cls_init(obj_req->osd_req, 0, CEPH_OSD_OP_CALL, "rbd", + "copyup"); + osd_req_op_cls_request_data_bvecs(obj_req->osd_req, 0, + obj_req->copyup_bvecs, bytes); + + switch (obj_req->img_request->op_type) { + case OBJ_OP_WRITE: + __rbd_obj_setup_write(obj_req, 1); + break; + case OBJ_OP_DISCARD: + rbd_assert(!rbd_obj_is_entire(obj_req)); + __rbd_obj_setup_discard(obj_req, 1); + break; + default: + rbd_assert(0); + } - return false; + rbd_obj_request_submit(obj_req); + return 0; } -static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request) +static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap) { - rbd_assert(obj_request_img_data_test(obj_request)); - rbd_assert(obj_request_type_valid(obj_request->type)); - rbd_assert(obj_request->img_request); + u32 i; - if (img_obj_request_simple(obj_request)) { - rbd_obj_request_submit(obj_request); - return 0; - } + rbd_assert(!obj_req->copyup_bvecs); + obj_req->copyup_bvec_count = calc_pages_for(0, obj_overlap); + obj_req->copyup_bvecs = kcalloc(obj_req->copyup_bvec_count, + sizeof(*obj_req->copyup_bvecs), + GFP_NOIO); + if (!obj_req->copyup_bvecs) + return -ENOMEM; - /* - * It's a layered write. The target object might exist but - * we may not know that yet. If we know it doesn't exist, - * start by reading the data for the full target object from - * the parent so we can use it for a copyup to the target. - */ - if (obj_request_known_test(obj_request)) - return rbd_img_obj_parent_read_full(obj_request); + for (i = 0; i < obj_req->copyup_bvec_count; i++) { + unsigned int len = min(obj_overlap, (u64)PAGE_SIZE); - /* We don't know whether the target exists. Go find out. */ + obj_req->copyup_bvecs[i].bv_page = alloc_page(GFP_NOIO); + if (!obj_req->copyup_bvecs[i].bv_page) + return -ENOMEM; + + obj_req->copyup_bvecs[i].bv_offset = 0; + obj_req->copyup_bvecs[i].bv_len = len; + obj_overlap -= len; + } - return rbd_img_obj_exists_submit(obj_request); + rbd_assert(!obj_overlap); + return 0; } -static int rbd_img_request_submit(struct rbd_img_request *img_request) +static int rbd_obj_handle_write_guard(struct rbd_obj_request *obj_req) { - struct rbd_obj_request *obj_request; - struct rbd_obj_request *next_obj_request; - int ret = 0; - - dout("%s: img %p\n", __func__, img_request); + struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; + int ret; - rbd_img_request_get(img_request); - for_each_obj_request_safe(img_request, obj_request, next_obj_request) { - ret = rbd_img_obj_request_submit(obj_request); - if (ret) - goto out_put_ireq; + rbd_assert(obj_req->num_img_extents); + prune_extents(obj_req->img_extents, &obj_req->num_img_extents, + rbd_dev->parent_overlap); + if (!obj_req->num_img_extents) { + /* + * The overlap has become 0 (most likely because the + * image has been flattened). Use rbd_obj_issue_copyup() + * to re-submit the original write request -- the copyup + * operation itself will be a no-op, since someone must + * have populated the child object while we weren't + * looking. Move to WRITE_FLAT state as we'll be done + * with the operation once the null copyup completes. + */ + obj_req->write_state = RBD_OBJ_WRITE_FLAT; + return rbd_obj_issue_copyup(obj_req, 0); } -out_put_ireq: - rbd_img_request_put(img_request); - return ret; + ret = setup_copyup_bvecs(obj_req, rbd_obj_img_extents_bytes(obj_req)); + if (ret) + return ret; + + obj_req->write_state = RBD_OBJ_WRITE_COPYUP; + return rbd_obj_read_from_parent(obj_req); } -static void rbd_img_parent_read_callback(struct rbd_img_request *img_request) +static bool rbd_obj_handle_write(struct rbd_obj_request *obj_req) { - struct rbd_obj_request *obj_request; - struct rbd_device *rbd_dev; - u64 obj_end; - u64 img_xferred; - int img_result; + int ret; - rbd_assert(img_request_child_test(img_request)); +again: + switch (obj_req->write_state) { + case RBD_OBJ_WRITE_GUARD: + rbd_assert(!obj_req->xferred); + if (obj_req->result == -ENOENT) { + /* + * The target object doesn't exist. Read the data for + * the entire target object up to the overlap point (if + * any) from the parent, so we can use it for a copyup. + */ + ret = rbd_obj_handle_write_guard(obj_req); + if (ret) { + obj_req->result = ret; + return true; + } + return false; + } + /* fall through */ + case RBD_OBJ_WRITE_FLAT: + if (!obj_req->result) + /* + * There is no such thing as a successful short + * write -- indicate the whole request was satisfied. + */ + obj_req->xferred = obj_req->ex.oe_len; + return true; + case RBD_OBJ_WRITE_COPYUP: + obj_req->write_state = RBD_OBJ_WRITE_GUARD; + if (obj_req->result) + goto again; - /* First get what we need from the image request and release it */ + rbd_assert(obj_req->xferred); + ret = rbd_obj_issue_copyup(obj_req, obj_req->xferred); + if (ret) { + obj_req->result = ret; + return true; + } + return false; + default: + rbd_assert(0); + } +} - obj_request = img_request->obj_request; - img_xferred = img_request->xferred; - img_result = img_request->result; - rbd_img_request_put(img_request); +/* + * Returns true if @obj_req is completed, or false otherwise. + */ +static bool __rbd_obj_handle_request(struct rbd_obj_request *obj_req) +{ + switch (obj_req->img_request->op_type) { + case OBJ_OP_READ: + return rbd_obj_handle_read(obj_req); + case OBJ_OP_WRITE: + return rbd_obj_handle_write(obj_req); + case OBJ_OP_DISCARD: + if (rbd_obj_handle_write(obj_req)) { + /* + * Hide -ENOENT from delete/truncate/zero -- discarding + * a non-existent object is not a problem. + */ + if (obj_req->result == -ENOENT) { + obj_req->result = 0; + obj_req->xferred = obj_req->ex.oe_len; + } + return true; + } + return false; + default: + rbd_assert(0); + } +} - /* - * If the overlap has become 0 (most likely because the - * image has been flattened) we need to re-submit the - * original request. - */ - rbd_assert(obj_request); - rbd_assert(obj_request->img_request); - rbd_dev = obj_request->img_request->rbd_dev; - if (!rbd_dev->parent_overlap) { - rbd_obj_request_submit(obj_request); +static void rbd_obj_end_request(struct rbd_obj_request *obj_req) +{ + struct rbd_img_request *img_req = obj_req->img_request; + + rbd_assert((!obj_req->result && + obj_req->xferred == obj_req->ex.oe_len) || + (obj_req->result < 0 && !obj_req->xferred)); + if (!obj_req->result) { + img_req->xferred += obj_req->xferred; return; } - obj_request->result = img_result; - if (obj_request->result) - goto out; + rbd_warn(img_req->rbd_dev, + "%s at objno %llu %llu~%llu result %d xferred %llu", + obj_op_name(img_req->op_type), obj_req->ex.oe_objno, + obj_req->ex.oe_off, obj_req->ex.oe_len, obj_req->result, + obj_req->xferred); + if (!img_req->result) { + img_req->result = obj_req->result; + img_req->xferred = 0; + } +} - /* - * We need to zero anything beyond the parent overlap - * boundary. Since rbd_img_obj_request_read_callback() - * will zero anything beyond the end of a short read, an - * easy way to do this is to pretend the data from the - * parent came up short--ending at the overlap boundary. - */ - rbd_assert(obj_request->img_offset < U64_MAX - obj_request->length); - obj_end = obj_request->img_offset + obj_request->length; - if (obj_end > rbd_dev->parent_overlap) { - u64 xferred = 0; +static void rbd_img_end_child_request(struct rbd_img_request *img_req) +{ + struct rbd_obj_request *obj_req = img_req->obj_request; - if (obj_request->img_offset < rbd_dev->parent_overlap) - xferred = rbd_dev->parent_overlap - - obj_request->img_offset; + rbd_assert(test_bit(IMG_REQ_CHILD, &img_req->flags)); + rbd_assert((!img_req->result && + img_req->xferred == rbd_obj_img_extents_bytes(obj_req)) || + (img_req->result < 0 && !img_req->xferred)); - obj_request->xferred = min(img_xferred, xferred); - } else { - obj_request->xferred = img_xferred; - } -out: - rbd_img_obj_request_read_callback(obj_request); - rbd_obj_request_complete(obj_request); + obj_req->result = img_req->result; + obj_req->xferred = img_req->xferred; + rbd_img_request_put(img_req); } -static void rbd_img_parent_read(struct rbd_obj_request *obj_request) +static void rbd_img_end_request(struct rbd_img_request *img_req) { - struct rbd_img_request *img_request; - int result; + rbd_assert(!test_bit(IMG_REQ_CHILD, &img_req->flags)); + rbd_assert((!img_req->result && + img_req->xferred == blk_rq_bytes(img_req->rq)) || + (img_req->result < 0 && !img_req->xferred)); - rbd_assert(obj_request_img_data_test(obj_request)); - rbd_assert(obj_request->img_request != NULL); - rbd_assert(obj_request->result == (s32) -ENOENT); - rbd_assert(obj_request_type_valid(obj_request->type)); + blk_mq_end_request(img_req->rq, + errno_to_blk_status(img_req->result)); + rbd_img_request_put(img_req); +} - /* rbd_read_finish(obj_request, obj_request->length); */ - img_request = rbd_parent_request_create(obj_request, - obj_request->img_offset, - obj_request->length); - result = -ENOMEM; - if (!img_request) - goto out_err; +static void rbd_obj_handle_request(struct rbd_obj_request *obj_req) +{ + struct rbd_img_request *img_req; - if (obj_request->type == OBJ_REQUEST_BIO) - result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO, - obj_request->bio_list); - else - result = rbd_img_request_fill(img_request, OBJ_REQUEST_PAGES, - obj_request->pages); - if (result) - goto out_err; +again: + if (!__rbd_obj_handle_request(obj_req)) + return; - img_request->callback = rbd_img_parent_read_callback; - result = rbd_img_request_submit(img_request); - if (result) - goto out_err; + img_req = obj_req->img_request; + spin_lock(&img_req->completion_lock); + rbd_obj_end_request(obj_req); + rbd_assert(img_req->pending_count); + if (--img_req->pending_count) { + spin_unlock(&img_req->completion_lock); + return; + } - return; -out_err: - if (img_request) - rbd_img_request_put(img_request); - obj_request->result = result; - obj_request->xferred = 0; - obj_request_done_set(obj_request); + spin_unlock(&img_req->completion_lock); + if (test_bit(IMG_REQ_CHILD, &img_req->flags)) { + obj_req = img_req->obj_request; + rbd_img_end_child_request(img_req); + goto again; + } + rbd_img_end_request(img_req); } static const struct rbd_client_id rbd_empty_cid; @@ -3091,8 +2674,8 @@ static int __rbd_notify_op_lock(struct rbd_device *rbd_dev, { struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; struct rbd_client_id cid = rbd_get_cid(rbd_dev); - int buf_size = 4 + 8 + 8 + CEPH_ENCODING_START_BLK_LEN; - char buf[buf_size]; + char buf[4 + 8 + 8 + CEPH_ENCODING_START_BLK_LEN]; + int buf_size = sizeof(buf); void *p = buf; dout("%s rbd_dev %p notify_op %d\n", __func__, rbd_dev, notify_op); @@ -3610,8 +3193,8 @@ static void __rbd_acknowledge_notify(struct rbd_device *rbd_dev, u64 notify_id, u64 cookie, s32 *result) { struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; - int buf_size = 4 + CEPH_ENCODING_START_BLK_LEN; - char buf[buf_size]; + char buf[4 + CEPH_ENCODING_START_BLK_LEN]; + int buf_size = sizeof(buf); int ret; if (result) { @@ -3887,7 +3470,7 @@ static void rbd_reregister_watch(struct work_struct *work) ret = rbd_dev_refresh(rbd_dev); if (ret) - rbd_warn(rbd_dev, "reregisteration refresh failed: %d", ret); + rbd_warn(rbd_dev, "reregistration refresh failed: %d", ret); } /* @@ -4070,8 +3653,7 @@ static void rbd_queue_workfn(struct work_struct *work) } } - img_request = rbd_img_request_create(rbd_dev, offset, length, op_type, - snapc); + img_request = rbd_img_request_create(rbd_dev, op_type, snapc); if (!img_request) { result = -ENOMEM; goto err_unlock; @@ -4080,18 +3662,14 @@ static void rbd_queue_workfn(struct work_struct *work) snapc = NULL; /* img_request consumes a ref */ if (op_type == OBJ_OP_DISCARD) - result = rbd_img_request_fill(img_request, OBJ_REQUEST_NODATA, - NULL); + result = rbd_img_fill_nodata(img_request, offset, length); else - result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO, - rq->bio); - if (result) - goto err_img_request; - - result = rbd_img_request_submit(img_request); + result = rbd_img_fill_from_bio(img_request, offset, length, + rq->bio); if (result) goto err_img_request; + rbd_img_request_submit(img_request); if (must_be_locked) up_read(&rbd_dev->lock_rwsem); return; @@ -4369,7 +3947,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); q->limits.max_sectors = queue_max_hw_sectors(q); blk_queue_max_segments(q, USHRT_MAX); - blk_queue_max_segment_size(q, segment_size); + blk_queue_max_segment_size(q, UINT_MAX); blk_queue_io_min(q, segment_size); blk_queue_io_opt(q, segment_size); @@ -5057,9 +4635,6 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev) } __attribute__ ((packed)) striping_info_buf = { 0 }; size_t size = sizeof (striping_info_buf); void *p; - u64 obj_size; - u64 stripe_unit; - u64 stripe_count; int ret; ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, @@ -5071,31 +4646,9 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev) if (ret < size) return -ERANGE; - /* - * We don't actually support the "fancy striping" feature - * (STRIPINGV2) yet, but if the striping sizes are the - * defaults the behavior is the same as before. So find - * out, and only fail if the image has non-default values. - */ - ret = -EINVAL; - obj_size = rbd_obj_bytes(&rbd_dev->header); p = &striping_info_buf; - stripe_unit = ceph_decode_64(&p); - if (stripe_unit != obj_size) { - rbd_warn(rbd_dev, "unsupported stripe unit " - "(got %llu want %llu)", - stripe_unit, obj_size); - return -EINVAL; - } - stripe_count = ceph_decode_64(&p); - if (stripe_count != 1) { - rbd_warn(rbd_dev, "unsupported stripe count " - "(got %llu want 1)", stripe_count); - return -EINVAL; - } - rbd_dev->header.stripe_unit = stripe_unit; - rbd_dev->header.stripe_count = stripe_count; - + rbd_dev->header.stripe_unit = ceph_decode_64(&p); + rbd_dev->header.stripe_count = ceph_decode_64(&p); return 0; } @@ -5653,39 +5206,6 @@ out_err: return ret; } -/* - * Return pool id (>= 0) or a negative error code. - */ -static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name) -{ - struct ceph_options *opts = rbdc->client->options; - u64 newest_epoch; - int tries = 0; - int ret; - -again: - ret = ceph_pg_poolid_by_name(rbdc->client->osdc.osdmap, pool_name); - if (ret == -ENOENT && tries++ < 1) { - ret = ceph_monc_get_version(&rbdc->client->monc, "osdmap", - &newest_epoch); - if (ret < 0) - return ret; - - if (rbdc->client->osdc.osdmap->epoch < newest_epoch) { - ceph_osdc_maybe_request_map(&rbdc->client->osdc); - (void) ceph_monc_wait_osdmap(&rbdc->client->monc, - newest_epoch, - opts->mount_timeout); - goto again; - } else { - /* the osdmap we have is new enough */ - return -ENOENT; - } - } - - return ret; -} - static void rbd_dev_image_unlock(struct rbd_device *rbd_dev) { down_write(&rbd_dev->lock_rwsem); @@ -6114,7 +5634,7 @@ static ssize_t do_rbd_add(struct bus_type *bus, } /* pick the pool */ - rc = rbd_add_get_pool_id(rbdc, spec->pool_name); + rc = ceph_pg_poolid_by_name(rbdc->client->osdc.osdmap, spec->pool_name); if (rc < 0) { if (rc == -ENOENT) pr_info("pool %s does not exist\n", spec->pool_name); @@ -6366,16 +5886,8 @@ static int rbd_slab_init(void) if (!rbd_obj_request_cache) goto out_err; - rbd_assert(!rbd_bio_clone); - rbd_bio_clone = bioset_create(BIO_POOL_SIZE, 0, 0); - if (!rbd_bio_clone) - goto out_err_clone; - return 0; -out_err_clone: - kmem_cache_destroy(rbd_obj_request_cache); - rbd_obj_request_cache = NULL; out_err: kmem_cache_destroy(rbd_img_request_cache); rbd_img_request_cache = NULL; @@ -6391,10 +5903,6 @@ static void rbd_slab_exit(void) rbd_assert(rbd_img_request_cache); kmem_cache_destroy(rbd_img_request_cache); rbd_img_request_cache = NULL; - - rbd_assert(rbd_bio_clone); - bioset_free(rbd_bio_clone); - rbd_bio_clone = NULL; } static int __init rbd_init(void) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 27df3e2837fd..6d61cd023633 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -187,6 +187,16 @@ config DMA_SUN6I help Support for the DMA engine first found in Allwinner A31 SoCs. +config DW_AXI_DMAC + tristate "Synopsys DesignWare AXI DMA support" + depends on OF || COMPILE_TEST + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Enable support for Synopsys DesignWare AXI DMA controller. + NOTE: This driver wasn't tested on 64 bit platform because + of lack 64 bit platform with Synopsys DW AXI DMAC. + config EP93XX_DMA bool "Cirrus Logic EP93xx DMA support" depends on ARCH_EP93XX || COMPILE_TEST @@ -633,6 +643,8 @@ config ZX_DMA # driver files source "drivers/dma/bestcomm/Kconfig" +source "drivers/dma/mediatek/Kconfig" + source "drivers/dma/qcom/Kconfig" source "drivers/dma/dw/Kconfig" diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index b9dca8a0e142..0f62a4d49aab 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -28,6 +28,7 @@ obj-$(CONFIG_DMA_OMAP) += omap-dma.o obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o obj-$(CONFIG_DMA_SUN4I) += sun4i-dma.o obj-$(CONFIG_DMA_SUN6I) += sun6i-dma.o +obj-$(CONFIG_DW_AXI_DMAC) += dw-axi-dmac/ obj-$(CONFIG_DW_DMAC_CORE) += dw/ obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o obj-$(CONFIG_FSL_DMA) += fsldma.o @@ -75,5 +76,6 @@ obj-$(CONFIG_XGENE_DMA) += xgene-dma.o obj-$(CONFIG_ZX_DMA) += zx_dma.o obj-$(CONFIG_ST_FDMA) += st_fdma.o +obj-y += mediatek/ obj-y += qcom/ obj-y += xilinx/ diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index c00e3923d7d8..94236ec9d410 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -1471,10 +1471,10 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, for (retry = 0; retry < AT_XDMAC_RESIDUE_MAX_RETRIES; retry++) { check_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc; rmb(); - initd = !!(at_xdmac_chan_read(atchan, AT_XDMAC_CC) & AT_XDMAC_CC_INITD); - rmb(); cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC); rmb(); + initd = !!(at_xdmac_chan_read(atchan, AT_XDMAC_CC) & AT_XDMAC_CC_INITD); + rmb(); cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc; rmb(); diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index 80cc2be6483c..b9339524d5bd 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -74,7 +74,11 @@ MODULE_PARM_DESC(timeout, "Transfer Timeout in msec (default: 3000), " static bool noverify; module_param(noverify, bool, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(noverify, "Disable random data setup and verification"); +MODULE_PARM_DESC(noverify, "Disable data verification (default: verify)"); + +static bool norandom; +module_param(norandom, bool, 0644); +MODULE_PARM_DESC(norandom, "Disable random offset setup (default: random)"); static bool verbose; module_param(verbose, bool, S_IRUGO | S_IWUSR); @@ -103,6 +107,7 @@ struct dmatest_params { unsigned int pq_sources; int timeout; bool noverify; + bool norandom; }; /** @@ -575,7 +580,7 @@ static int dmatest_func(void *data) break; } - if (params->noverify) + if (params->norandom) len = params->buf_size; else len = dmatest_random() % params->buf_size + 1; @@ -586,17 +591,19 @@ static int dmatest_func(void *data) total_len += len; - if (params->noverify) { + if (params->norandom) { src_off = 0; dst_off = 0; } else { - start = ktime_get(); src_off = dmatest_random() % (params->buf_size - len + 1); dst_off = dmatest_random() % (params->buf_size - len + 1); src_off = (src_off >> align) << align; dst_off = (dst_off >> align) << align; + } + if (!params->noverify) { + start = ktime_get(); dmatest_init_srcs(thread->srcs, src_off, len, params->buf_size, is_memset); dmatest_init_dsts(thread->dsts, dst_off, len, @@ -975,6 +982,7 @@ static void run_threaded_test(struct dmatest_info *info) params->pq_sources = pq_sources; params->timeout = timeout; params->noverify = noverify; + params->norandom = norandom; request_channels(info, DMA_MEMCPY); request_channels(info, DMA_MEMSET); diff --git a/drivers/dma/dw-axi-dmac/Makefile b/drivers/dma/dw-axi-dmac/Makefile new file mode 100644 index 000000000000..4bfa462005be --- /dev/null +++ b/drivers/dma/dw-axi-dmac/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_DW_AXI_DMAC) += dw-axi-dmac-platform.o diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c new file mode 100644 index 000000000000..c4eb55e3011c --- /dev/null +++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c @@ -0,0 +1,1008 @@ +// SPDX-License-Identifier: GPL-2.0 +// (C) 2017-2018 Synopsys, Inc. (www.synopsys.com) + +/* + * Synopsys DesignWare AXI DMA Controller driver. + * + * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com> + */ + +#include <linux/bitops.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/dmaengine.h> +#include <linux/dmapool.h> +#include <linux/err.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/property.h> +#include <linux/types.h> + +#include "dw-axi-dmac.h" +#include "../dmaengine.h" +#include "../virt-dma.h" + +/* + * The set of bus widths supported by the DMA controller. DW AXI DMAC supports + * master data bus width up to 512 bits (for both AXI master interfaces), but + * it depends on IP block configurarion. + */ +#define AXI_DMA_BUSWIDTHS \ + (DMA_SLAVE_BUSWIDTH_1_BYTE | \ + DMA_SLAVE_BUSWIDTH_2_BYTES | \ + DMA_SLAVE_BUSWIDTH_4_BYTES | \ + DMA_SLAVE_BUSWIDTH_8_BYTES | \ + DMA_SLAVE_BUSWIDTH_16_BYTES | \ + DMA_SLAVE_BUSWIDTH_32_BYTES | \ + DMA_SLAVE_BUSWIDTH_64_BYTES) + +static inline void +axi_dma_iowrite32(struct axi_dma_chip *chip, u32 reg, u32 val) +{ + iowrite32(val, chip->regs + reg); +} + +static inline u32 axi_dma_ioread32(struct axi_dma_chip *chip, u32 reg) +{ + return ioread32(chip->regs + reg); +} + +static inline void +axi_chan_iowrite32(struct axi_dma_chan *chan, u32 reg, u32 val) +{ + iowrite32(val, chan->chan_regs + reg); +} + +static inline u32 axi_chan_ioread32(struct axi_dma_chan *chan, u32 reg) +{ + return ioread32(chan->chan_regs + reg); +} + +static inline void +axi_chan_iowrite64(struct axi_dma_chan *chan, u32 reg, u64 val) +{ + /* + * We split one 64 bit write for two 32 bit write as some HW doesn't + * support 64 bit access. + */ + iowrite32(lower_32_bits(val), chan->chan_regs + reg); + iowrite32(upper_32_bits(val), chan->chan_regs + reg + 4); +} + +static inline void axi_dma_disable(struct axi_dma_chip *chip) +{ + u32 val; + + val = axi_dma_ioread32(chip, DMAC_CFG); + val &= ~DMAC_EN_MASK; + axi_dma_iowrite32(chip, DMAC_CFG, val); +} + +static inline void axi_dma_enable(struct axi_dma_chip *chip) +{ + u32 val; + + val = axi_dma_ioread32(chip, DMAC_CFG); + val |= DMAC_EN_MASK; + axi_dma_iowrite32(chip, DMAC_CFG, val); +} + +static inline void axi_dma_irq_disable(struct axi_dma_chip *chip) +{ + u32 val; + + val = axi_dma_ioread32(chip, DMAC_CFG); + val &= ~INT_EN_MASK; + axi_dma_iowrite32(chip, DMAC_CFG, val); +} + +static inline void axi_dma_irq_enable(struct axi_dma_chip *chip) +{ + u32 val; + + val = axi_dma_ioread32(chip, DMAC_CFG); + val |= INT_EN_MASK; + axi_dma_iowrite32(chip, DMAC_CFG, val); +} + +static inline void axi_chan_irq_disable(struct axi_dma_chan *chan, u32 irq_mask) +{ + u32 val; + + if (likely(irq_mask == DWAXIDMAC_IRQ_ALL)) { + axi_chan_iowrite32(chan, CH_INTSTATUS_ENA, DWAXIDMAC_IRQ_NONE); + } else { + val = axi_chan_ioread32(chan, CH_INTSTATUS_ENA); + val &= ~irq_mask; + axi_chan_iowrite32(chan, CH_INTSTATUS_ENA, val); + } +} + +static inline void axi_chan_irq_set(struct axi_dma_chan *chan, u32 irq_mask) +{ + axi_chan_iowrite32(chan, CH_INTSTATUS_ENA, irq_mask); +} + +static inline void axi_chan_irq_sig_set(struct axi_dma_chan *chan, u32 irq_mask) +{ + axi_chan_iowrite32(chan, CH_INTSIGNAL_ENA, irq_mask); +} + +static inline void axi_chan_irq_clear(struct axi_dma_chan *chan, u32 irq_mask) +{ + axi_chan_iowrite32(chan, CH_INTCLEAR, irq_mask); +} + +static inline u32 axi_chan_irq_read(struct axi_dma_chan *chan) +{ + return axi_chan_ioread32(chan, CH_INTSTATUS); +} + +static inline void axi_chan_disable(struct axi_dma_chan *chan) +{ + u32 val; + + val = axi_dma_ioread32(chan->chip, DMAC_CHEN); + val &= ~(BIT(chan->id) << DMAC_CHAN_EN_SHIFT); + val |= BIT(chan->id) << DMAC_CHAN_EN_WE_SHIFT; + axi_dma_iowrite32(chan->chip, DMAC_CHEN, val); +} + +static inline void axi_chan_enable(struct axi_dma_chan *chan) +{ + u32 val; + + val = axi_dma_ioread32(chan->chip, DMAC_CHEN); + val |= BIT(chan->id) << DMAC_CHAN_EN_SHIFT | + BIT(chan->id) << DMAC_CHAN_EN_WE_SHIFT; + axi_dma_iowrite32(chan->chip, DMAC_CHEN, val); +} + +static inline bool axi_chan_is_hw_enable(struct axi_dma_chan *chan) +{ + u32 val; + + val = axi_dma_ioread32(chan->chip, DMAC_CHEN); + + return !!(val & (BIT(chan->id) << DMAC_CHAN_EN_SHIFT)); +} + +static void axi_dma_hw_init(struct axi_dma_chip *chip) +{ + u32 i; + + for (i = 0; i < chip->dw->hdata->nr_channels; i++) { + axi_chan_irq_disable(&chip->dw->chan[i], DWAXIDMAC_IRQ_ALL); + axi_chan_disable(&chip->dw->chan[i]); + } +} + +static u32 axi_chan_get_xfer_width(struct axi_dma_chan *chan, dma_addr_t src, + dma_addr_t dst, size_t len) +{ + u32 max_width = chan->chip->dw->hdata->m_data_width; + + return __ffs(src | dst | len | BIT(max_width)); +} + +static inline const char *axi_chan_name(struct axi_dma_chan *chan) +{ + return dma_chan_name(&chan->vc.chan); +} + +static struct axi_dma_desc *axi_desc_get(struct axi_dma_chan *chan) +{ + struct dw_axi_dma *dw = chan->chip->dw; + struct axi_dma_desc *desc; + dma_addr_t phys; + + desc = dma_pool_zalloc(dw->desc_pool, GFP_NOWAIT, &phys); + if (unlikely(!desc)) { + dev_err(chan2dev(chan), "%s: not enough descriptors available\n", + axi_chan_name(chan)); + return NULL; + } + + atomic_inc(&chan->descs_allocated); + INIT_LIST_HEAD(&desc->xfer_list); + desc->vd.tx.phys = phys; + desc->chan = chan; + + return desc; +} + +static void axi_desc_put(struct axi_dma_desc *desc) +{ + struct axi_dma_chan *chan = desc->chan; + struct dw_axi_dma *dw = chan->chip->dw; + struct axi_dma_desc *child, *_next; + unsigned int descs_put = 0; + + list_for_each_entry_safe(child, _next, &desc->xfer_list, xfer_list) { + list_del(&child->xfer_list); + dma_pool_free(dw->desc_pool, child, child->vd.tx.phys); + descs_put++; + } + + dma_pool_free(dw->desc_pool, desc, desc->vd.tx.phys); + descs_put++; + + atomic_sub(descs_put, &chan->descs_allocated); + dev_vdbg(chan2dev(chan), "%s: %d descs put, %d still allocated\n", + axi_chan_name(chan), descs_put, + atomic_read(&chan->descs_allocated)); +} + +static void vchan_desc_put(struct virt_dma_desc *vdesc) +{ + axi_desc_put(vd_to_axi_desc(vdesc)); +} + +static enum dma_status +dma_chan_tx_status(struct dma_chan *dchan, dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan); + enum dma_status ret; + + ret = dma_cookie_status(dchan, cookie, txstate); + + if (chan->is_paused && ret == DMA_IN_PROGRESS) + ret = DMA_PAUSED; + + return ret; +} + +static void write_desc_llp(struct axi_dma_desc *desc, dma_addr_t adr) +{ + desc->lli.llp = cpu_to_le64(adr); +} + +static void write_chan_llp(struct axi_dma_chan *chan, dma_addr_t adr) +{ + axi_chan_iowrite64(chan, CH_LLP, adr); +} + +/* Called in chan locked context */ +static void axi_chan_block_xfer_start(struct axi_dma_chan *chan, + struct axi_dma_desc *first) +{ + u32 priority = chan->chip->dw->hdata->priority[chan->id]; + u32 reg, irq_mask; + u8 lms = 0; /* Select AXI0 master for LLI fetching */ + + if (unlikely(axi_chan_is_hw_enable(chan))) { + dev_err(chan2dev(chan), "%s is non-idle!\n", + axi_chan_name(chan)); + + return; + } + + axi_dma_enable(chan->chip); + + reg = (DWAXIDMAC_MBLK_TYPE_LL << CH_CFG_L_DST_MULTBLK_TYPE_POS | + DWAXIDMAC_MBLK_TYPE_LL << CH_CFG_L_SRC_MULTBLK_TYPE_POS); + axi_chan_iowrite32(chan, CH_CFG_L, reg); + + reg = (DWAXIDMAC_TT_FC_MEM_TO_MEM_DMAC << CH_CFG_H_TT_FC_POS | + priority << CH_CFG_H_PRIORITY_POS | + DWAXIDMAC_HS_SEL_HW << CH_CFG_H_HS_SEL_DST_POS | + DWAXIDMAC_HS_SEL_HW << CH_CFG_H_HS_SEL_SRC_POS); + axi_chan_iowrite32(chan, CH_CFG_H, reg); + + write_chan_llp(chan, first->vd.tx.phys | lms); + + irq_mask = DWAXIDMAC_IRQ_DMA_TRF | DWAXIDMAC_IRQ_ALL_ERR; + axi_chan_irq_sig_set(chan, irq_mask); + + /* Generate 'suspend' status but don't generate interrupt */ + irq_mask |= DWAXIDMAC_IRQ_SUSPENDED; + axi_chan_irq_set(chan, irq_mask); + + axi_chan_enable(chan); +} + +static void axi_chan_start_first_queued(struct axi_dma_chan *chan) +{ + struct axi_dma_desc *desc; + struct virt_dma_desc *vd; + + vd = vchan_next_desc(&chan->vc); + if (!vd) + return; + + desc = vd_to_axi_desc(vd); + dev_vdbg(chan2dev(chan), "%s: started %u\n", axi_chan_name(chan), + vd->tx.cookie); + axi_chan_block_xfer_start(chan, desc); +} + +static void dma_chan_issue_pending(struct dma_chan *dchan) +{ + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan); + unsigned long flags; + + spin_lock_irqsave(&chan->vc.lock, flags); + if (vchan_issue_pending(&chan->vc)) + axi_chan_start_first_queued(chan); + spin_unlock_irqrestore(&chan->vc.lock, flags); +} + +static int dma_chan_alloc_chan_resources(struct dma_chan *dchan) +{ + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan); + + /* ASSERT: channel is idle */ + if (axi_chan_is_hw_enable(chan)) { + dev_err(chan2dev(chan), "%s is non-idle!\n", + axi_chan_name(chan)); + return -EBUSY; + } + + dev_vdbg(dchan2dev(dchan), "%s: allocating\n", axi_chan_name(chan)); + + pm_runtime_get(chan->chip->dev); + + return 0; +} + +static void dma_chan_free_chan_resources(struct dma_chan *dchan) +{ + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan); + + /* ASSERT: channel is idle */ + if (axi_chan_is_hw_enable(chan)) + dev_err(dchan2dev(dchan), "%s is non-idle!\n", + axi_chan_name(chan)); + + axi_chan_disable(chan); + axi_chan_irq_disable(chan, DWAXIDMAC_IRQ_ALL); + + vchan_free_chan_resources(&chan->vc); + + dev_vdbg(dchan2dev(dchan), + "%s: free resources, descriptor still allocated: %u\n", + axi_chan_name(chan), atomic_read(&chan->descs_allocated)); + + pm_runtime_put(chan->chip->dev); +} + +/* + * If DW_axi_dmac sees CHx_CTL.ShadowReg_Or_LLI_Last bit of the fetched LLI + * as 1, it understands that the current block is the final block in the + * transfer and completes the DMA transfer operation at the end of current + * block transfer. + */ +static void set_desc_last(struct axi_dma_desc *desc) +{ + u32 val; + + val = le32_to_cpu(desc->lli.ctl_hi); + val |= CH_CTL_H_LLI_LAST; + desc->lli.ctl_hi = cpu_to_le32(val); +} + +static void write_desc_sar(struct axi_dma_desc *desc, dma_addr_t adr) +{ + desc->lli.sar = cpu_to_le64(adr); +} + +static void write_desc_dar(struct axi_dma_desc *desc, dma_addr_t adr) +{ + desc->lli.dar = cpu_to_le64(adr); +} + +static void set_desc_src_master(struct axi_dma_desc *desc) +{ + u32 val; + + /* Select AXI0 for source master */ + val = le32_to_cpu(desc->lli.ctl_lo); + val &= ~CH_CTL_L_SRC_MAST; + desc->lli.ctl_lo = cpu_to_le32(val); +} + +static void set_desc_dest_master(struct axi_dma_desc *desc) +{ + u32 val; + + /* Select AXI1 for source master if available */ + val = le32_to_cpu(desc->lli.ctl_lo); + if (desc->chan->chip->dw->hdata->nr_masters > 1) + val |= CH_CTL_L_DST_MAST; + else + val &= ~CH_CTL_L_DST_MAST; + + desc->lli.ctl_lo = cpu_to_le32(val); +} + +static struct dma_async_tx_descriptor * +dma_chan_prep_dma_memcpy(struct dma_chan *dchan, dma_addr_t dst_adr, + dma_addr_t src_adr, size_t len, unsigned long flags) +{ + struct axi_dma_desc *first = NULL, *desc = NULL, *prev = NULL; + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan); + size_t block_ts, max_block_ts, xfer_len; + u32 xfer_width, reg; + u8 lms = 0; /* Select AXI0 master for LLI fetching */ + + dev_dbg(chan2dev(chan), "%s: memcpy: src: %pad dst: %pad length: %zd flags: %#lx", + axi_chan_name(chan), &src_adr, &dst_adr, len, flags); + + max_block_ts = chan->chip->dw->hdata->block_size[chan->id]; + + while (len) { + xfer_len = len; + + /* + * Take care for the alignment. + * Actually source and destination widths can be different, but + * make them same to be simpler. + */ + xfer_width = axi_chan_get_xfer_width(chan, src_adr, dst_adr, xfer_len); + + /* + * block_ts indicates the total number of data of width + * to be transferred in a DMA block transfer. + * BLOCK_TS register should be set to block_ts - 1 + */ + block_ts = xfer_len >> xfer_width; + if (block_ts > max_block_ts) { + block_ts = max_block_ts; + xfer_len = max_block_ts << xfer_width; + } + + desc = axi_desc_get(chan); + if (unlikely(!desc)) + goto err_desc_get; + + write_desc_sar(desc, src_adr); + write_desc_dar(desc, dst_adr); + desc->lli.block_ts_lo = cpu_to_le32(block_ts - 1); + + reg = CH_CTL_H_LLI_VALID; + if (chan->chip->dw->hdata->restrict_axi_burst_len) { + u32 burst_len = chan->chip->dw->hdata->axi_rw_burst_len; + + reg |= (CH_CTL_H_ARLEN_EN | + burst_len << CH_CTL_H_ARLEN_POS | + CH_CTL_H_AWLEN_EN | + burst_len << CH_CTL_H_AWLEN_POS); + } + desc->lli.ctl_hi = cpu_to_le32(reg); + + reg = (DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_DST_MSIZE_POS | + DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_SRC_MSIZE_POS | + xfer_width << CH_CTL_L_DST_WIDTH_POS | + xfer_width << CH_CTL_L_SRC_WIDTH_POS | + DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_DST_INC_POS | + DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_SRC_INC_POS); + desc->lli.ctl_lo = cpu_to_le32(reg); + + set_desc_src_master(desc); + set_desc_dest_master(desc); + + /* Manage transfer list (xfer_list) */ + if (!first) { + first = desc; + } else { + list_add_tail(&desc->xfer_list, &first->xfer_list); + write_desc_llp(prev, desc->vd.tx.phys | lms); + } + prev = desc; + + /* update the length and addresses for the next loop cycle */ + len -= xfer_len; + dst_adr += xfer_len; + src_adr += xfer_len; + } + + /* Total len of src/dest sg == 0, so no descriptor were allocated */ + if (unlikely(!first)) + return NULL; + + /* Set end-of-link to the last link descriptor of list */ + set_desc_last(desc); + + return vchan_tx_prep(&chan->vc, &first->vd, flags); + +err_desc_get: + axi_desc_put(first); + return NULL; +} + +static void axi_chan_dump_lli(struct axi_dma_chan *chan, + struct axi_dma_desc *desc) +{ + dev_err(dchan2dev(&chan->vc.chan), + "SAR: 0x%llx DAR: 0x%llx LLP: 0x%llx BTS 0x%x CTL: 0x%x:%08x", + le64_to_cpu(desc->lli.sar), + le64_to_cpu(desc->lli.dar), + le64_to_cpu(desc->lli.llp), + le32_to_cpu(desc->lli.block_ts_lo), + le32_to_cpu(desc->lli.ctl_hi), + le32_to_cpu(desc->lli.ctl_lo)); +} + +static void axi_chan_list_dump_lli(struct axi_dma_chan *chan, + struct axi_dma_desc *desc_head) +{ + struct axi_dma_desc *desc; + + axi_chan_dump_lli(chan, desc_head); + list_for_each_entry(desc, &desc_head->xfer_list, xfer_list) + axi_chan_dump_lli(chan, desc); +} + +static noinline void axi_chan_handle_err(struct axi_dma_chan *chan, u32 status) +{ + struct virt_dma_desc *vd; + unsigned long flags; + + spin_lock_irqsave(&chan->vc.lock, flags); + + axi_chan_disable(chan); + + /* The bad descriptor currently is in the head of vc list */ + vd = vchan_next_desc(&chan->vc); + /* Remove the completed descriptor from issued list */ + list_del(&vd->node); + + /* WARN about bad descriptor */ + dev_err(chan2dev(chan), + "Bad descriptor submitted for %s, cookie: %d, irq: 0x%08x\n", + axi_chan_name(chan), vd->tx.cookie, status); + axi_chan_list_dump_lli(chan, vd_to_axi_desc(vd)); + + vchan_cookie_complete(vd); + + /* Try to restart the controller */ + axi_chan_start_first_queued(chan); + + spin_unlock_irqrestore(&chan->vc.lock, flags); +} + +static void axi_chan_block_xfer_complete(struct axi_dma_chan *chan) +{ + struct virt_dma_desc *vd; + unsigned long flags; + + spin_lock_irqsave(&chan->vc.lock, flags); + if (unlikely(axi_chan_is_hw_enable(chan))) { + dev_err(chan2dev(chan), "BUG: %s caught DWAXIDMAC_IRQ_DMA_TRF, but channel not idle!\n", + axi_chan_name(chan)); + axi_chan_disable(chan); + } + + /* The completed descriptor currently is in the head of vc list */ + vd = vchan_next_desc(&chan->vc); + /* Remove the completed descriptor from issued list before completing */ + list_del(&vd->node); + vchan_cookie_complete(vd); + + /* Submit queued descriptors after processing the completed ones */ + axi_chan_start_first_queued(chan); + + spin_unlock_irqrestore(&chan->vc.lock, flags); +} + +static irqreturn_t dw_axi_dma_interrupt(int irq, void *dev_id) +{ + struct axi_dma_chip *chip = dev_id; + struct dw_axi_dma *dw = chip->dw; + struct axi_dma_chan *chan; + + u32 status, i; + + /* Disable DMAC inerrupts. We'll enable them after processing chanels */ + axi_dma_irq_disable(chip); + + /* Poll, clear and process every chanel interrupt status */ + for (i = 0; i < dw->hdata->nr_channels; i++) { + chan = &dw->chan[i]; + status = axi_chan_irq_read(chan); + axi_chan_irq_clear(chan, status); + + dev_vdbg(chip->dev, "%s %u IRQ status: 0x%08x\n", + axi_chan_name(chan), i, status); + + if (status & DWAXIDMAC_IRQ_ALL_ERR) + axi_chan_handle_err(chan, status); + else if (status & DWAXIDMAC_IRQ_DMA_TRF) + axi_chan_block_xfer_complete(chan); + } + + /* Re-enable interrupts */ + axi_dma_irq_enable(chip); + + return IRQ_HANDLED; +} + +static int dma_chan_terminate_all(struct dma_chan *dchan) +{ + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&chan->vc.lock, flags); + + axi_chan_disable(chan); + + vchan_get_all_descriptors(&chan->vc, &head); + + /* + * As vchan_dma_desc_free_list can access to desc_allocated list + * we need to call it in vc.lock context. + */ + vchan_dma_desc_free_list(&chan->vc, &head); + + spin_unlock_irqrestore(&chan->vc.lock, flags); + + dev_vdbg(dchan2dev(dchan), "terminated: %s\n", axi_chan_name(chan)); + + return 0; +} + +static int dma_chan_pause(struct dma_chan *dchan) +{ + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan); + unsigned long flags; + unsigned int timeout = 20; /* timeout iterations */ + u32 val; + + spin_lock_irqsave(&chan->vc.lock, flags); + + val = axi_dma_ioread32(chan->chip, DMAC_CHEN); + val |= BIT(chan->id) << DMAC_CHAN_SUSP_SHIFT | + BIT(chan->id) << DMAC_CHAN_SUSP_WE_SHIFT; + axi_dma_iowrite32(chan->chip, DMAC_CHEN, val); + + do { + if (axi_chan_irq_read(chan) & DWAXIDMAC_IRQ_SUSPENDED) + break; + + udelay(2); + } while (--timeout); + + axi_chan_irq_clear(chan, DWAXIDMAC_IRQ_SUSPENDED); + + chan->is_paused = true; + + spin_unlock_irqrestore(&chan->vc.lock, flags); + + return timeout ? 0 : -EAGAIN; +} + +/* Called in chan locked context */ +static inline void axi_chan_resume(struct axi_dma_chan *chan) +{ + u32 val; + + val = axi_dma_ioread32(chan->chip, DMAC_CHEN); + val &= ~(BIT(chan->id) << DMAC_CHAN_SUSP_SHIFT); + val |= (BIT(chan->id) << DMAC_CHAN_SUSP_WE_SHIFT); + axi_dma_iowrite32(chan->chip, DMAC_CHEN, val); + + chan->is_paused = false; +} + +static int dma_chan_resume(struct dma_chan *dchan) +{ + struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan); + unsigned long flags; + + spin_lock_irqsave(&chan->vc.lock, flags); + + if (chan->is_paused) + axi_chan_resume(chan); + + spin_unlock_irqrestore(&chan->vc.lock, flags); + + return 0; +} + +static int axi_dma_suspend(struct axi_dma_chip *chip) +{ + axi_dma_irq_disable(chip); + axi_dma_disable(chip); + + clk_disable_unprepare(chip->core_clk); + clk_disable_unprepare(chip->cfgr_clk); + + return 0; +} + +static int axi_dma_resume(struct axi_dma_chip *chip) +{ + int ret; + + ret = clk_prepare_enable(chip->cfgr_clk); + if (ret < 0) + return ret; + + ret = clk_prepare_enable(chip->core_clk); + if (ret < 0) + return ret; + + axi_dma_enable(chip); + axi_dma_irq_enable(chip); + + return 0; +} + +static int __maybe_unused axi_dma_runtime_suspend(struct device *dev) +{ + struct axi_dma_chip *chip = dev_get_drvdata(dev); + + return axi_dma_suspend(chip); +} + +static int __maybe_unused axi_dma_runtime_resume(struct device *dev) +{ + struct axi_dma_chip *chip = dev_get_drvdata(dev); + + return axi_dma_resume(chip); +} + +static int parse_device_properties(struct axi_dma_chip *chip) +{ + struct device *dev = chip->dev; + u32 tmp, carr[DMAC_MAX_CHANNELS]; + int ret; + + ret = device_property_read_u32(dev, "dma-channels", &tmp); + if (ret) + return ret; + if (tmp == 0 || tmp > DMAC_MAX_CHANNELS) + return -EINVAL; + + chip->dw->hdata->nr_channels = tmp; + + ret = device_property_read_u32(dev, "snps,dma-masters", &tmp); + if (ret) + return ret; + if (tmp == 0 || tmp > DMAC_MAX_MASTERS) + return -EINVAL; + + chip->dw->hdata->nr_masters = tmp; + + ret = device_property_read_u32(dev, "snps,data-width", &tmp); + if (ret) + return ret; + if (tmp > DWAXIDMAC_TRANS_WIDTH_MAX) + return -EINVAL; + + chip->dw->hdata->m_data_width = tmp; + + ret = device_property_read_u32_array(dev, "snps,block-size", carr, + chip->dw->hdata->nr_channels); + if (ret) + return ret; + for (tmp = 0; tmp < chip->dw->hdata->nr_channels; tmp++) { + if (carr[tmp] == 0 || carr[tmp] > DMAC_MAX_BLK_SIZE) + return -EINVAL; + + chip->dw->hdata->block_size[tmp] = carr[tmp]; + } + + ret = device_property_read_u32_array(dev, "snps,priority", carr, + chip->dw->hdata->nr_channels); + if (ret) + return ret; + /* Priority value must be programmed within [0:nr_channels-1] range */ + for (tmp = 0; tmp < chip->dw->hdata->nr_channels; tmp++) { + if (carr[tmp] >= chip->dw->hdata->nr_channels) + return -EINVAL; + + chip->dw->hdata->priority[tmp] = carr[tmp]; + } + + /* axi-max-burst-len is optional property */ + ret = device_property_read_u32(dev, "snps,axi-max-burst-len", &tmp); + if (!ret) { + if (tmp > DWAXIDMAC_ARWLEN_MAX + 1) + return -EINVAL; + if (tmp < DWAXIDMAC_ARWLEN_MIN + 1) + return -EINVAL; + + chip->dw->hdata->restrict_axi_burst_len = true; + chip->dw->hdata->axi_rw_burst_len = tmp - 1; + } + + return 0; +} + +static int dw_probe(struct platform_device *pdev) +{ + struct axi_dma_chip *chip; + struct resource *mem; + struct dw_axi_dma *dw; + struct dw_axi_dma_hcfg *hdata; + u32 i; + int ret; + + chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + dw = devm_kzalloc(&pdev->dev, sizeof(*dw), GFP_KERNEL); + if (!dw) + return -ENOMEM; + + hdata = devm_kzalloc(&pdev->dev, sizeof(*hdata), GFP_KERNEL); + if (!hdata) + return -ENOMEM; + + chip->dw = dw; + chip->dev = &pdev->dev; + chip->dw->hdata = hdata; + + chip->irq = platform_get_irq(pdev, 0); + if (chip->irq < 0) + return chip->irq; + + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + chip->regs = devm_ioremap_resource(chip->dev, mem); + if (IS_ERR(chip->regs)) + return PTR_ERR(chip->regs); + + chip->core_clk = devm_clk_get(chip->dev, "core-clk"); + if (IS_ERR(chip->core_clk)) + return PTR_ERR(chip->core_clk); + + chip->cfgr_clk = devm_clk_get(chip->dev, "cfgr-clk"); + if (IS_ERR(chip->cfgr_clk)) + return PTR_ERR(chip->cfgr_clk); + + ret = parse_device_properties(chip); + if (ret) + return ret; + + dw->chan = devm_kcalloc(chip->dev, hdata->nr_channels, + sizeof(*dw->chan), GFP_KERNEL); + if (!dw->chan) + return -ENOMEM; + + ret = devm_request_irq(chip->dev, chip->irq, dw_axi_dma_interrupt, + IRQF_SHARED, KBUILD_MODNAME, chip); + if (ret) + return ret; + + /* Lli address must be aligned to a 64-byte boundary */ + dw->desc_pool = dmam_pool_create(KBUILD_MODNAME, chip->dev, + sizeof(struct axi_dma_desc), 64, 0); + if (!dw->desc_pool) { + dev_err(chip->dev, "No memory for descriptors dma pool\n"); + return -ENOMEM; + } + + INIT_LIST_HEAD(&dw->dma.channels); + for (i = 0; i < hdata->nr_channels; i++) { + struct axi_dma_chan *chan = &dw->chan[i]; + + chan->chip = chip; + chan->id = i; + chan->chan_regs = chip->regs + COMMON_REG_LEN + i * CHAN_REG_LEN; + atomic_set(&chan->descs_allocated, 0); + + chan->vc.desc_free = vchan_desc_put; + vchan_init(&chan->vc, &dw->dma); + } + + /* Set capabilities */ + dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask); + + /* DMA capabilities */ + dw->dma.chancnt = hdata->nr_channels; + dw->dma.src_addr_widths = AXI_DMA_BUSWIDTHS; + dw->dma.dst_addr_widths = AXI_DMA_BUSWIDTHS; + dw->dma.directions = BIT(DMA_MEM_TO_MEM); + dw->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + + dw->dma.dev = chip->dev; + dw->dma.device_tx_status = dma_chan_tx_status; + dw->dma.device_issue_pending = dma_chan_issue_pending; + dw->dma.device_terminate_all = dma_chan_terminate_all; + dw->dma.device_pause = dma_chan_pause; + dw->dma.device_resume = dma_chan_resume; + + dw->dma.device_alloc_chan_resources = dma_chan_alloc_chan_resources; + dw->dma.device_free_chan_resources = dma_chan_free_chan_resources; + + dw->dma.device_prep_dma_memcpy = dma_chan_prep_dma_memcpy; + + platform_set_drvdata(pdev, chip); + + pm_runtime_enable(chip->dev); + + /* + * We can't just call pm_runtime_get here instead of + * pm_runtime_get_noresume + axi_dma_resume because we need + * driver to work also without Runtime PM. + */ + pm_runtime_get_noresume(chip->dev); + ret = axi_dma_resume(chip); + if (ret < 0) + goto err_pm_disable; + + axi_dma_hw_init(chip); + + pm_runtime_put(chip->dev); + + ret = dma_async_device_register(&dw->dma); + if (ret) + goto err_pm_disable; + + dev_info(chip->dev, "DesignWare AXI DMA Controller, %d channels\n", + dw->hdata->nr_channels); + + return 0; + +err_pm_disable: + pm_runtime_disable(chip->dev); + + return ret; +} + +static int dw_remove(struct platform_device *pdev) +{ + struct axi_dma_chip *chip = platform_get_drvdata(pdev); + struct dw_axi_dma *dw = chip->dw; + struct axi_dma_chan *chan, *_chan; + u32 i; + + /* Enable clk before accessing to registers */ + clk_prepare_enable(chip->cfgr_clk); + clk_prepare_enable(chip->core_clk); + axi_dma_irq_disable(chip); + for (i = 0; i < dw->hdata->nr_channels; i++) { + axi_chan_disable(&chip->dw->chan[i]); + axi_chan_irq_disable(&chip->dw->chan[i], DWAXIDMAC_IRQ_ALL); + } + axi_dma_disable(chip); + + pm_runtime_disable(chip->dev); + axi_dma_suspend(chip); + + devm_free_irq(chip->dev, chip->irq, chip); + + list_for_each_entry_safe(chan, _chan, &dw->dma.channels, + vc.chan.device_node) { + list_del(&chan->vc.chan.device_node); + tasklet_kill(&chan->vc.task); + } + + dma_async_device_unregister(&dw->dma); + + return 0; +} + +static const struct dev_pm_ops dw_axi_dma_pm_ops = { + SET_RUNTIME_PM_OPS(axi_dma_runtime_suspend, axi_dma_runtime_resume, NULL) +}; + +static const struct of_device_id dw_dma_of_id_table[] = { + { .compatible = "snps,axi-dma-1.01a" }, + {} +}; +MODULE_DEVICE_TABLE(of, dw_dma_of_id_table); + +static struct platform_driver dw_driver = { + .probe = dw_probe, + .remove = dw_remove, + .driver = { + .name = KBUILD_MODNAME, + .of_match_table = of_match_ptr(dw_dma_of_id_table), + .pm = &dw_axi_dma_pm_ops, + }, +}; +module_platform_driver(dw_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Synopsys DesignWare AXI DMA Controller platform driver"); +MODULE_AUTHOR("Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>"); diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac.h b/drivers/dma/dw-axi-dmac/dw-axi-dmac.h new file mode 100644 index 000000000000..f8888dc0b8dc --- /dev/null +++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac.h @@ -0,0 +1,334 @@ +// SPDX-License-Identifier: GPL-2.0 +// (C) 2017-2018 Synopsys, Inc. (www.synopsys.com) + +/* + * Synopsys DesignWare AXI DMA Controller driver. + * + * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com> + */ + +#ifndef _AXI_DMA_PLATFORM_H +#define _AXI_DMA_PLATFORM_H + +#include <linux/bitops.h> +#include <linux/clk.h> +#include <linux/device.h> +#include <linux/dmaengine.h> +#include <linux/types.h> + +#include "../virt-dma.h" + +#define DMAC_MAX_CHANNELS 8 +#define DMAC_MAX_MASTERS 2 +#define DMAC_MAX_BLK_SIZE 0x200000 + +struct dw_axi_dma_hcfg { + u32 nr_channels; + u32 nr_masters; + u32 m_data_width; + u32 block_size[DMAC_MAX_CHANNELS]; + u32 priority[DMAC_MAX_CHANNELS]; + /* maximum supported axi burst length */ + u32 axi_rw_burst_len; + bool restrict_axi_burst_len; +}; + +struct axi_dma_chan { + struct axi_dma_chip *chip; + void __iomem *chan_regs; + u8 id; + atomic_t descs_allocated; + + struct virt_dma_chan vc; + + /* these other elements are all protected by vc.lock */ + bool is_paused; +}; + +struct dw_axi_dma { + struct dma_device dma; + struct dw_axi_dma_hcfg *hdata; + struct dma_pool *desc_pool; + + /* channels */ + struct axi_dma_chan *chan; +}; + +struct axi_dma_chip { + struct device *dev; + int irq; + void __iomem *regs; + struct clk *core_clk; + struct clk *cfgr_clk; + struct dw_axi_dma *dw; +}; + +/* LLI == Linked List Item */ +struct __packed axi_dma_lli { + __le64 sar; + __le64 dar; + __le32 block_ts_lo; + __le32 block_ts_hi; + __le64 llp; + __le32 ctl_lo; + __le32 ctl_hi; + __le32 sstat; + __le32 dstat; + __le32 status_lo; + __le32 ststus_hi; + __le32 reserved_lo; + __le32 reserved_hi; +}; + +struct axi_dma_desc { + struct axi_dma_lli lli; + + struct virt_dma_desc vd; + struct axi_dma_chan *chan; + struct list_head xfer_list; +}; + +static inline struct device *dchan2dev(struct dma_chan *dchan) +{ + return &dchan->dev->device; +} + +static inline struct device *chan2dev(struct axi_dma_chan *chan) +{ + return &chan->vc.chan.dev->device; +} + +static inline struct axi_dma_desc *vd_to_axi_desc(struct virt_dma_desc *vd) +{ + return container_of(vd, struct axi_dma_desc, vd); +} + +static inline struct axi_dma_chan *vc_to_axi_dma_chan(struct virt_dma_chan *vc) +{ + return container_of(vc, struct axi_dma_chan, vc); +} + +static inline struct axi_dma_chan *dchan_to_axi_dma_chan(struct dma_chan *dchan) +{ + return vc_to_axi_dma_chan(to_virt_chan(dchan)); +} + + +#define COMMON_REG_LEN 0x100 +#define CHAN_REG_LEN 0x100 + +/* Common registers offset */ +#define DMAC_ID 0x000 /* R DMAC ID */ +#define DMAC_COMPVER 0x008 /* R DMAC Component Version */ +#define DMAC_CFG 0x010 /* R/W DMAC Configuration */ +#define DMAC_CHEN 0x018 /* R/W DMAC Channel Enable */ +#define DMAC_CHEN_L 0x018 /* R/W DMAC Channel Enable 00-31 */ +#define DMAC_CHEN_H 0x01C /* R/W DMAC Channel Enable 32-63 */ +#define DMAC_INTSTATUS 0x030 /* R DMAC Interrupt Status */ +#define DMAC_COMMON_INTCLEAR 0x038 /* W DMAC Interrupt Clear */ +#define DMAC_COMMON_INTSTATUS_ENA 0x040 /* R DMAC Interrupt Status Enable */ +#define DMAC_COMMON_INTSIGNAL_ENA 0x048 /* R/W DMAC Interrupt Signal Enable */ +#define DMAC_COMMON_INTSTATUS 0x050 /* R DMAC Interrupt Status */ +#define DMAC_RESET 0x058 /* R DMAC Reset Register1 */ + +/* DMA channel registers offset */ +#define CH_SAR 0x000 /* R/W Chan Source Address */ +#define CH_DAR 0x008 /* R/W Chan Destination Address */ +#define CH_BLOCK_TS 0x010 /* R/W Chan Block Transfer Size */ +#define CH_CTL 0x018 /* R/W Chan Control */ +#define CH_CTL_L 0x018 /* R/W Chan Control 00-31 */ +#define CH_CTL_H 0x01C /* R/W Chan Control 32-63 */ +#define CH_CFG 0x020 /* R/W Chan Configuration */ +#define CH_CFG_L 0x020 /* R/W Chan Configuration 00-31 */ +#define CH_CFG_H 0x024 /* R/W Chan Configuration 32-63 */ +#define CH_LLP 0x028 /* R/W Chan Linked List Pointer */ +#define CH_STATUS 0x030 /* R Chan Status */ +#define CH_SWHSSRC 0x038 /* R/W Chan SW Handshake Source */ +#define CH_SWHSDST 0x040 /* R/W Chan SW Handshake Destination */ +#define CH_BLK_TFR_RESUMEREQ 0x048 /* W Chan Block Transfer Resume Req */ +#define CH_AXI_ID 0x050 /* R/W Chan AXI ID */ +#define CH_AXI_QOS 0x058 /* R/W Chan AXI QOS */ +#define CH_SSTAT 0x060 /* R Chan Source Status */ +#define CH_DSTAT 0x068 /* R Chan Destination Status */ +#define CH_SSTATAR 0x070 /* R/W Chan Source Status Fetch Addr */ +#define CH_DSTATAR 0x078 /* R/W Chan Destination Status Fetch Addr */ +#define CH_INTSTATUS_ENA 0x080 /* R/W Chan Interrupt Status Enable */ +#define CH_INTSTATUS 0x088 /* R/W Chan Interrupt Status */ +#define CH_INTSIGNAL_ENA 0x090 /* R/W Chan Interrupt Signal Enable */ +#define CH_INTCLEAR 0x098 /* W Chan Interrupt Clear */ + + +/* DMAC_CFG */ +#define DMAC_EN_POS 0 +#define DMAC_EN_MASK BIT(DMAC_EN_POS) + +#define INT_EN_POS 1 +#define INT_EN_MASK BIT(INT_EN_POS) + +#define DMAC_CHAN_EN_SHIFT 0 +#define DMAC_CHAN_EN_WE_SHIFT 8 + +#define DMAC_CHAN_SUSP_SHIFT 16 +#define DMAC_CHAN_SUSP_WE_SHIFT 24 + +/* CH_CTL_H */ +#define CH_CTL_H_ARLEN_EN BIT(6) +#define CH_CTL_H_ARLEN_POS 7 +#define CH_CTL_H_AWLEN_EN BIT(15) +#define CH_CTL_H_AWLEN_POS 16 + +enum { + DWAXIDMAC_ARWLEN_1 = 0, + DWAXIDMAC_ARWLEN_2 = 1, + DWAXIDMAC_ARWLEN_4 = 3, + DWAXIDMAC_ARWLEN_8 = 7, + DWAXIDMAC_ARWLEN_16 = 15, + DWAXIDMAC_ARWLEN_32 = 31, + DWAXIDMAC_ARWLEN_64 = 63, + DWAXIDMAC_ARWLEN_128 = 127, + DWAXIDMAC_ARWLEN_256 = 255, + DWAXIDMAC_ARWLEN_MIN = DWAXIDMAC_ARWLEN_1, + DWAXIDMAC_ARWLEN_MAX = DWAXIDMAC_ARWLEN_256 +}; + +#define CH_CTL_H_LLI_LAST BIT(30) +#define CH_CTL_H_LLI_VALID BIT(31) + +/* CH_CTL_L */ +#define CH_CTL_L_LAST_WRITE_EN BIT(30) + +#define CH_CTL_L_DST_MSIZE_POS 18 +#define CH_CTL_L_SRC_MSIZE_POS 14 + +enum { + DWAXIDMAC_BURST_TRANS_LEN_1 = 0, + DWAXIDMAC_BURST_TRANS_LEN_4, + DWAXIDMAC_BURST_TRANS_LEN_8, + DWAXIDMAC_BURST_TRANS_LEN_16, + DWAXIDMAC_BURST_TRANS_LEN_32, + DWAXIDMAC_BURST_TRANS_LEN_64, + DWAXIDMAC_BURST_TRANS_LEN_128, + DWAXIDMAC_BURST_TRANS_LEN_256, + DWAXIDMAC_BURST_TRANS_LEN_512, + DWAXIDMAC_BURST_TRANS_LEN_1024 +}; + +#define CH_CTL_L_DST_WIDTH_POS 11 +#define CH_CTL_L_SRC_WIDTH_POS 8 + +#define CH_CTL_L_DST_INC_POS 6 +#define CH_CTL_L_SRC_INC_POS 4 +enum { + DWAXIDMAC_CH_CTL_L_INC = 0, + DWAXIDMAC_CH_CTL_L_NOINC +}; + +#define CH_CTL_L_DST_MAST BIT(2) +#define CH_CTL_L_SRC_MAST BIT(0) + +/* CH_CFG_H */ +#define CH_CFG_H_PRIORITY_POS 17 +#define CH_CFG_H_HS_SEL_DST_POS 4 +#define CH_CFG_H_HS_SEL_SRC_POS 3 +enum { + DWAXIDMAC_HS_SEL_HW = 0, + DWAXIDMAC_HS_SEL_SW +}; + +#define CH_CFG_H_TT_FC_POS 0 +enum { + DWAXIDMAC_TT_FC_MEM_TO_MEM_DMAC = 0, + DWAXIDMAC_TT_FC_MEM_TO_PER_DMAC, + DWAXIDMAC_TT_FC_PER_TO_MEM_DMAC, + DWAXIDMAC_TT_FC_PER_TO_PER_DMAC, + DWAXIDMAC_TT_FC_PER_TO_MEM_SRC, + DWAXIDMAC_TT_FC_PER_TO_PER_SRC, + DWAXIDMAC_TT_FC_MEM_TO_PER_DST, + DWAXIDMAC_TT_FC_PER_TO_PER_DST +}; + +/* CH_CFG_L */ +#define CH_CFG_L_DST_MULTBLK_TYPE_POS 2 +#define CH_CFG_L_SRC_MULTBLK_TYPE_POS 0 +enum { + DWAXIDMAC_MBLK_TYPE_CONTIGUOUS = 0, + DWAXIDMAC_MBLK_TYPE_RELOAD, + DWAXIDMAC_MBLK_TYPE_SHADOW_REG, + DWAXIDMAC_MBLK_TYPE_LL +}; + +/** + * DW AXI DMA channel interrupts + * + * @DWAXIDMAC_IRQ_NONE: Bitmask of no one interrupt + * @DWAXIDMAC_IRQ_BLOCK_TRF: Block transfer complete + * @DWAXIDMAC_IRQ_DMA_TRF: Dma transfer complete + * @DWAXIDMAC_IRQ_SRC_TRAN: Source transaction complete + * @DWAXIDMAC_IRQ_DST_TRAN: Destination transaction complete + * @DWAXIDMAC_IRQ_SRC_DEC_ERR: Source decode error + * @DWAXIDMAC_IRQ_DST_DEC_ERR: Destination decode error + * @DWAXIDMAC_IRQ_SRC_SLV_ERR: Source slave error + * @DWAXIDMAC_IRQ_DST_SLV_ERR: Destination slave error + * @DWAXIDMAC_IRQ_LLI_RD_DEC_ERR: LLI read decode error + * @DWAXIDMAC_IRQ_LLI_WR_DEC_ERR: LLI write decode error + * @DWAXIDMAC_IRQ_LLI_RD_SLV_ERR: LLI read slave error + * @DWAXIDMAC_IRQ_LLI_WR_SLV_ERR: LLI write slave error + * @DWAXIDMAC_IRQ_INVALID_ERR: LLI invalid error or Shadow register error + * @DWAXIDMAC_IRQ_MULTIBLKTYPE_ERR: Slave Interface Multiblock type error + * @DWAXIDMAC_IRQ_DEC_ERR: Slave Interface decode error + * @DWAXIDMAC_IRQ_WR2RO_ERR: Slave Interface write to read only error + * @DWAXIDMAC_IRQ_RD2RWO_ERR: Slave Interface read to write only error + * @DWAXIDMAC_IRQ_WRONCHEN_ERR: Slave Interface write to channel error + * @DWAXIDMAC_IRQ_SHADOWREG_ERR: Slave Interface shadow reg error + * @DWAXIDMAC_IRQ_WRONHOLD_ERR: Slave Interface hold error + * @DWAXIDMAC_IRQ_LOCK_CLEARED: Lock Cleared Status + * @DWAXIDMAC_IRQ_SRC_SUSPENDED: Source Suspended Status + * @DWAXIDMAC_IRQ_SUSPENDED: Channel Suspended Status + * @DWAXIDMAC_IRQ_DISABLED: Channel Disabled Status + * @DWAXIDMAC_IRQ_ABORTED: Channel Aborted Status + * @DWAXIDMAC_IRQ_ALL_ERR: Bitmask of all error interrupts + * @DWAXIDMAC_IRQ_ALL: Bitmask of all interrupts + */ +enum { + DWAXIDMAC_IRQ_NONE = 0, + DWAXIDMAC_IRQ_BLOCK_TRF = BIT(0), + DWAXIDMAC_IRQ_DMA_TRF = BIT(1), + DWAXIDMAC_IRQ_SRC_TRAN = BIT(3), + DWAXIDMAC_IRQ_DST_TRAN = BIT(4), + DWAXIDMAC_IRQ_SRC_DEC_ERR = BIT(5), + DWAXIDMAC_IRQ_DST_DEC_ERR = BIT(6), + DWAXIDMAC_IRQ_SRC_SLV_ERR = BIT(7), + DWAXIDMAC_IRQ_DST_SLV_ERR = BIT(8), + DWAXIDMAC_IRQ_LLI_RD_DEC_ERR = BIT(9), + DWAXIDMAC_IRQ_LLI_WR_DEC_ERR = BIT(10), + DWAXIDMAC_IRQ_LLI_RD_SLV_ERR = BIT(11), + DWAXIDMAC_IRQ_LLI_WR_SLV_ERR = BIT(12), + DWAXIDMAC_IRQ_INVALID_ERR = BIT(13), + DWAXIDMAC_IRQ_MULTIBLKTYPE_ERR = BIT(14), + DWAXIDMAC_IRQ_DEC_ERR = BIT(16), + DWAXIDMAC_IRQ_WR2RO_ERR = BIT(17), + DWAXIDMAC_IRQ_RD2RWO_ERR = BIT(18), + DWAXIDMAC_IRQ_WRONCHEN_ERR = BIT(19), + DWAXIDMAC_IRQ_SHADOWREG_ERR = BIT(20), + DWAXIDMAC_IRQ_WRONHOLD_ERR = BIT(21), + DWAXIDMAC_IRQ_LOCK_CLEARED = BIT(27), + DWAXIDMAC_IRQ_SRC_SUSPENDED = BIT(28), + DWAXIDMAC_IRQ_SUSPENDED = BIT(29), + DWAXIDMAC_IRQ_DISABLED = BIT(30), + DWAXIDMAC_IRQ_ABORTED = BIT(31), + DWAXIDMAC_IRQ_ALL_ERR = (GENMASK(21, 16) | GENMASK(14, 5)), + DWAXIDMAC_IRQ_ALL = GENMASK(31, 0) +}; + +enum { + DWAXIDMAC_TRANS_WIDTH_8 = 0, + DWAXIDMAC_TRANS_WIDTH_16, + DWAXIDMAC_TRANS_WIDTH_32, + DWAXIDMAC_TRANS_WIDTH_64, + DWAXIDMAC_TRANS_WIDTH_128, + DWAXIDMAC_TRANS_WIDTH_256, + DWAXIDMAC_TRANS_WIDTH_512, + DWAXIDMAC_TRANS_WIDTH_MAX = DWAXIDMAC_TRANS_WIDTH_512 +}; + +#endif /* _AXI_DMA_PLATFORM_H */ diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index 948df1ab5f1a..85ea92fcea54 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -1876,6 +1876,11 @@ static void edma_dma_init(struct edma_cc *ecc, bool legacy_mode) if (memcpy_channels) { m_ddev = devm_kzalloc(ecc->dev, sizeof(*m_ddev), GFP_KERNEL); + if (!m_ddev) { + dev_warn(ecc->dev, "memcpy is disabled due to OoM\n"); + memcpy_channels = NULL; + goto ch_setup; + } ecc->dma_memcpy = m_ddev; dma_cap_zero(m_ddev->cap_mask); @@ -1903,6 +1908,7 @@ static void edma_dma_init(struct edma_cc *ecc, bool legacy_mode) dev_info(ecc->dev, "memcpy is disabled\n"); } +ch_setup: for (i = 0; i < ecc->num_channels; i++) { struct edma_chan *echan = &ecc->slave_chans[i]; echan->ch_num = EDMA_CTLR_CHAN(ecc->id, i); diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index e7db24c67030..ccd03c3cedfe 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -338,6 +338,7 @@ struct sdma_channel { unsigned int chn_real_count; struct tasklet_struct tasklet; struct imx_dma_data data; + bool enabled; }; #define IMX_DMA_SG_LOOP BIT(0) @@ -596,7 +597,14 @@ static int sdma_config_ownership(struct sdma_channel *sdmac, static void sdma_enable_channel(struct sdma_engine *sdma, int channel) { + unsigned long flags; + struct sdma_channel *sdmac = &sdma->channel[channel]; + writel(BIT(channel), sdma->regs + SDMA_H_START); + + spin_lock_irqsave(&sdmac->lock, flags); + sdmac->enabled = true; + spin_unlock_irqrestore(&sdmac->lock, flags); } /* @@ -685,6 +693,14 @@ static void sdma_update_channel_loop(struct sdma_channel *sdmac) struct sdma_buffer_descriptor *bd; int error = 0; enum dma_status old_status = sdmac->status; + unsigned long flags; + + spin_lock_irqsave(&sdmac->lock, flags); + if (!sdmac->enabled) { + spin_unlock_irqrestore(&sdmac->lock, flags); + return; + } + spin_unlock_irqrestore(&sdmac->lock, flags); /* * loop mode. Iterate over descriptors, re-setup them and @@ -938,10 +954,15 @@ static int sdma_disable_channel(struct dma_chan *chan) struct sdma_channel *sdmac = to_sdma_chan(chan); struct sdma_engine *sdma = sdmac->sdma; int channel = sdmac->channel; + unsigned long flags; writel_relaxed(BIT(channel), sdma->regs + SDMA_H_STATSTOP); sdmac->status = DMA_ERROR; + spin_lock_irqsave(&sdmac->lock, flags); + sdmac->enabled = false; + spin_unlock_irqrestore(&sdmac->lock, flags); + return 0; } diff --git a/drivers/dma/mediatek/Kconfig b/drivers/dma/mediatek/Kconfig new file mode 100644 index 000000000000..27bac0bba09e --- /dev/null +++ b/drivers/dma/mediatek/Kconfig @@ -0,0 +1,13 @@ + +config MTK_HSDMA + tristate "MediaTek High-Speed DMA controller support" + depends on ARCH_MEDIATEK || COMPILE_TEST + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + ---help--- + Enable support for High-Speed DMA controller on MediaTek + SoCs. + + This controller provides the channels which is dedicated to + memory-to-memory transfer to offload from CPU through ring- + based descriptor management. diff --git a/drivers/dma/mediatek/Makefile b/drivers/dma/mediatek/Makefile new file mode 100644 index 000000000000..6e778f842f01 --- /dev/null +++ b/drivers/dma/mediatek/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_MTK_HSDMA) += mtk-hsdma.o diff --git a/drivers/dma/mediatek/mtk-hsdma.c b/drivers/dma/mediatek/mtk-hsdma.c new file mode 100644 index 000000000000..b7ec56ae02a6 --- /dev/null +++ b/drivers/dma/mediatek/mtk-hsdma.c @@ -0,0 +1,1056 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017-2018 MediaTek Inc. + +/* + * Driver for MediaTek High-Speed DMA Controller + * + * Author: Sean Wang <sean.wang@mediatek.com> + * + */ + +#include <linux/bitops.h> +#include <linux/clk.h> +#include <linux/dmaengine.h> +#include <linux/dma-mapping.h> +#include <linux/err.h> +#include <linux/iopoll.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/of_dma.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/refcount.h> +#include <linux/slab.h> + +#include "../virt-dma.h" + +#define MTK_HSDMA_USEC_POLL 20 +#define MTK_HSDMA_TIMEOUT_POLL 200000 +#define MTK_HSDMA_DMA_BUSWIDTHS BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) + +/* The default number of virtual channel */ +#define MTK_HSDMA_NR_VCHANS 3 + +/* Only one physical channel supported */ +#define MTK_HSDMA_NR_MAX_PCHANS 1 + +/* Macro for physical descriptor (PD) manipulation */ +/* The number of PD which must be 2 of power */ +#define MTK_DMA_SIZE 64 +#define MTK_HSDMA_NEXT_DESP_IDX(x, y) (((x) + 1) & ((y) - 1)) +#define MTK_HSDMA_LAST_DESP_IDX(x, y) (((x) - 1) & ((y) - 1)) +#define MTK_HSDMA_MAX_LEN 0x3f80 +#define MTK_HSDMA_ALIGN_SIZE 4 +#define MTK_HSDMA_PLEN_MASK 0x3fff +#define MTK_HSDMA_DESC_PLEN(x) (((x) & MTK_HSDMA_PLEN_MASK) << 16) +#define MTK_HSDMA_DESC_PLEN_GET(x) (((x) >> 16) & MTK_HSDMA_PLEN_MASK) + +/* Registers for underlying ring manipulation */ +#define MTK_HSDMA_TX_BASE 0x0 +#define MTK_HSDMA_TX_CNT 0x4 +#define MTK_HSDMA_TX_CPU 0x8 +#define MTK_HSDMA_TX_DMA 0xc +#define MTK_HSDMA_RX_BASE 0x100 +#define MTK_HSDMA_RX_CNT 0x104 +#define MTK_HSDMA_RX_CPU 0x108 +#define MTK_HSDMA_RX_DMA 0x10c + +/* Registers for global setup */ +#define MTK_HSDMA_GLO 0x204 +#define MTK_HSDMA_GLO_MULTI_DMA BIT(10) +#define MTK_HSDMA_TX_WB_DDONE BIT(6) +#define MTK_HSDMA_BURST_64BYTES (0x2 << 4) +#define MTK_HSDMA_GLO_RX_BUSY BIT(3) +#define MTK_HSDMA_GLO_RX_DMA BIT(2) +#define MTK_HSDMA_GLO_TX_BUSY BIT(1) +#define MTK_HSDMA_GLO_TX_DMA BIT(0) +#define MTK_HSDMA_GLO_DMA (MTK_HSDMA_GLO_TX_DMA | \ + MTK_HSDMA_GLO_RX_DMA) +#define MTK_HSDMA_GLO_BUSY (MTK_HSDMA_GLO_RX_BUSY | \ + MTK_HSDMA_GLO_TX_BUSY) +#define MTK_HSDMA_GLO_DEFAULT (MTK_HSDMA_GLO_TX_DMA | \ + MTK_HSDMA_GLO_RX_DMA | \ + MTK_HSDMA_TX_WB_DDONE | \ + MTK_HSDMA_BURST_64BYTES | \ + MTK_HSDMA_GLO_MULTI_DMA) + +/* Registers for reset */ +#define MTK_HSDMA_RESET 0x208 +#define MTK_HSDMA_RST_TX BIT(0) +#define MTK_HSDMA_RST_RX BIT(16) + +/* Registers for interrupt control */ +#define MTK_HSDMA_DLYINT 0x20c +#define MTK_HSDMA_RXDLY_INT_EN BIT(15) + +/* Interrupt fires when the pending number's more than the specified */ +#define MTK_HSDMA_RXMAX_PINT(x) (((x) & 0x7f) << 8) + +/* Interrupt fires when the pending time's more than the specified in 20 us */ +#define MTK_HSDMA_RXMAX_PTIME(x) ((x) & 0x7f) +#define MTK_HSDMA_DLYINT_DEFAULT (MTK_HSDMA_RXDLY_INT_EN | \ + MTK_HSDMA_RXMAX_PINT(20) | \ + MTK_HSDMA_RXMAX_PTIME(20)) +#define MTK_HSDMA_INT_STATUS 0x220 +#define MTK_HSDMA_INT_ENABLE 0x228 +#define MTK_HSDMA_INT_RXDONE BIT(16) + +enum mtk_hsdma_vdesc_flag { + MTK_HSDMA_VDESC_FINISHED = 0x01, +}; + +#define IS_MTK_HSDMA_VDESC_FINISHED(x) ((x) == MTK_HSDMA_VDESC_FINISHED) + +/** + * struct mtk_hsdma_pdesc - This is the struct holding info describing physical + * descriptor (PD) and its placement must be kept at + * 4-bytes alignment in little endian order. + * @desc[1-4]: The control pad used to indicate hardware how to + * deal with the descriptor such as source and + * destination address and data length. The maximum + * data length each pdesc can handle is 0x3f80 bytes + */ +struct mtk_hsdma_pdesc { + __le32 desc1; + __le32 desc2; + __le32 desc3; + __le32 desc4; +} __packed __aligned(4); + +/** + * struct mtk_hsdma_vdesc - This is the struct holding info describing virtual + * descriptor (VD) + * @vd: An instance for struct virt_dma_desc + * @len: The total data size device wants to move + * @residue: The remaining data size device will move + * @dest: The destination address device wants to move to + * @src: The source address device wants to move from + */ +struct mtk_hsdma_vdesc { + struct virt_dma_desc vd; + size_t len; + size_t residue; + dma_addr_t dest; + dma_addr_t src; +}; + +/** + * struct mtk_hsdma_cb - This is the struct holding extra info required for RX + * ring to know what relevant VD the the PD is being + * mapped to. + * @vd: Pointer to the relevant VD. + * @flag: Flag indicating what action should be taken when VD + * is completed. + */ +struct mtk_hsdma_cb { + struct virt_dma_desc *vd; + enum mtk_hsdma_vdesc_flag flag; +}; + +/** + * struct mtk_hsdma_ring - This struct holds info describing underlying ring + * space + * @txd: The descriptor TX ring which describes DMA source + * information + * @rxd: The descriptor RX ring which describes DMA + * destination information + * @cb: The extra information pointed at by RX ring + * @tphys: The physical addr of TX ring + * @rphys: The physical addr of RX ring + * @cur_tptr: Pointer to the next free descriptor used by the host + * @cur_rptr: Pointer to the last done descriptor by the device + */ +struct mtk_hsdma_ring { + struct mtk_hsdma_pdesc *txd; + struct mtk_hsdma_pdesc *rxd; + struct mtk_hsdma_cb *cb; + dma_addr_t tphys; + dma_addr_t rphys; + u16 cur_tptr; + u16 cur_rptr; +}; + +/** + * struct mtk_hsdma_pchan - This is the struct holding info describing physical + * channel (PC) + * @ring: An instance for the underlying ring + * @sz_ring: Total size allocated for the ring + * @nr_free: Total number of free rooms in the ring. It would + * be accessed and updated frequently between IRQ + * context and user context to reflect whether ring + * can accept requests from VD. + */ +struct mtk_hsdma_pchan { + struct mtk_hsdma_ring ring; + size_t sz_ring; + atomic_t nr_free; +}; + +/** + * struct mtk_hsdma_vchan - This is the struct holding info describing virtual + * channel (VC) + * @vc: An instance for struct virt_dma_chan + * @issue_completion: The wait for all issued descriptors completited + * @issue_synchronize: Bool indicating channel synchronization starts + * @desc_hw_processing: List those descriptors the hardware is processing, + * which is protected by vc.lock + */ +struct mtk_hsdma_vchan { + struct virt_dma_chan vc; + struct completion issue_completion; + bool issue_synchronize; + struct list_head desc_hw_processing; +}; + +/** + * struct mtk_hsdma_soc - This is the struct holding differences among SoCs + * @ddone: Bit mask for DDONE + * @ls0: Bit mask for LS0 + */ +struct mtk_hsdma_soc { + __le32 ddone; + __le32 ls0; +}; + +/** + * struct mtk_hsdma_device - This is the struct holding info describing HSDMA + * device + * @ddev: An instance for struct dma_device + * @base: The mapped register I/O base + * @clk: The clock that device internal is using + * @irq: The IRQ that device are using + * @dma_requests: The number of VCs the device supports to + * @vc: The pointer to all available VCs + * @pc: The pointer to the underlying PC + * @pc_refcnt: Track how many VCs are using the PC + * @lock: Lock protect agaisting multiple VCs access PC + * @soc: The pointer to area holding differences among + * vaious platform + */ +struct mtk_hsdma_device { + struct dma_device ddev; + void __iomem *base; + struct clk *clk; + u32 irq; + + u32 dma_requests; + struct mtk_hsdma_vchan *vc; + struct mtk_hsdma_pchan *pc; + refcount_t pc_refcnt; + + /* Lock used to protect against multiple VCs access PC */ + spinlock_t lock; + + const struct mtk_hsdma_soc *soc; +}; + +static struct mtk_hsdma_device *to_hsdma_dev(struct dma_chan *chan) +{ + return container_of(chan->device, struct mtk_hsdma_device, ddev); +} + +static inline struct mtk_hsdma_vchan *to_hsdma_vchan(struct dma_chan *chan) +{ + return container_of(chan, struct mtk_hsdma_vchan, vc.chan); +} + +static struct mtk_hsdma_vdesc *to_hsdma_vdesc(struct virt_dma_desc *vd) +{ + return container_of(vd, struct mtk_hsdma_vdesc, vd); +} + +static struct device *hsdma2dev(struct mtk_hsdma_device *hsdma) +{ + return hsdma->ddev.dev; +} + +static u32 mtk_dma_read(struct mtk_hsdma_device *hsdma, u32 reg) +{ + return readl(hsdma->base + reg); +} + +static void mtk_dma_write(struct mtk_hsdma_device *hsdma, u32 reg, u32 val) +{ + writel(val, hsdma->base + reg); +} + +static void mtk_dma_rmw(struct mtk_hsdma_device *hsdma, u32 reg, + u32 mask, u32 set) +{ + u32 val; + + val = mtk_dma_read(hsdma, reg); + val &= ~mask; + val |= set; + mtk_dma_write(hsdma, reg, val); +} + +static void mtk_dma_set(struct mtk_hsdma_device *hsdma, u32 reg, u32 val) +{ + mtk_dma_rmw(hsdma, reg, 0, val); +} + +static void mtk_dma_clr(struct mtk_hsdma_device *hsdma, u32 reg, u32 val) +{ + mtk_dma_rmw(hsdma, reg, val, 0); +} + +static void mtk_hsdma_vdesc_free(struct virt_dma_desc *vd) +{ + kfree(container_of(vd, struct mtk_hsdma_vdesc, vd)); +} + +static int mtk_hsdma_busy_wait(struct mtk_hsdma_device *hsdma) +{ + u32 status = 0; + + return readl_poll_timeout(hsdma->base + MTK_HSDMA_GLO, status, + !(status & MTK_HSDMA_GLO_BUSY), + MTK_HSDMA_USEC_POLL, + MTK_HSDMA_TIMEOUT_POLL); +} + +static int mtk_hsdma_alloc_pchan(struct mtk_hsdma_device *hsdma, + struct mtk_hsdma_pchan *pc) +{ + struct mtk_hsdma_ring *ring = &pc->ring; + int err; + + memset(pc, 0, sizeof(*pc)); + + /* + * Allocate ring space where [0 ... MTK_DMA_SIZE - 1] is for TX ring + * and [MTK_DMA_SIZE ... 2 * MTK_DMA_SIZE - 1] is for RX ring. + */ + pc->sz_ring = 2 * MTK_DMA_SIZE * sizeof(*ring->txd); + ring->txd = dma_zalloc_coherent(hsdma2dev(hsdma), pc->sz_ring, + &ring->tphys, GFP_NOWAIT); + if (!ring->txd) + return -ENOMEM; + + ring->rxd = &ring->txd[MTK_DMA_SIZE]; + ring->rphys = ring->tphys + MTK_DMA_SIZE * sizeof(*ring->txd); + ring->cur_tptr = 0; + ring->cur_rptr = MTK_DMA_SIZE - 1; + + ring->cb = kcalloc(MTK_DMA_SIZE, sizeof(*ring->cb), GFP_NOWAIT); + if (!ring->cb) { + err = -ENOMEM; + goto err_free_dma; + } + + atomic_set(&pc->nr_free, MTK_DMA_SIZE - 1); + + /* Disable HSDMA and wait for the completion */ + mtk_dma_clr(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DMA); + err = mtk_hsdma_busy_wait(hsdma); + if (err) + goto err_free_cb; + + /* Reset */ + mtk_dma_set(hsdma, MTK_HSDMA_RESET, + MTK_HSDMA_RST_TX | MTK_HSDMA_RST_RX); + mtk_dma_clr(hsdma, MTK_HSDMA_RESET, + MTK_HSDMA_RST_TX | MTK_HSDMA_RST_RX); + + /* Setup HSDMA initial pointer in the ring */ + mtk_dma_write(hsdma, MTK_HSDMA_TX_BASE, ring->tphys); + mtk_dma_write(hsdma, MTK_HSDMA_TX_CNT, MTK_DMA_SIZE); + mtk_dma_write(hsdma, MTK_HSDMA_TX_CPU, ring->cur_tptr); + mtk_dma_write(hsdma, MTK_HSDMA_TX_DMA, 0); + mtk_dma_write(hsdma, MTK_HSDMA_RX_BASE, ring->rphys); + mtk_dma_write(hsdma, MTK_HSDMA_RX_CNT, MTK_DMA_SIZE); + mtk_dma_write(hsdma, MTK_HSDMA_RX_CPU, ring->cur_rptr); + mtk_dma_write(hsdma, MTK_HSDMA_RX_DMA, 0); + + /* Enable HSDMA */ + mtk_dma_set(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DMA); + + /* Setup delayed interrupt */ + mtk_dma_write(hsdma, MTK_HSDMA_DLYINT, MTK_HSDMA_DLYINT_DEFAULT); + + /* Enable interrupt */ + mtk_dma_set(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE); + + return 0; + +err_free_cb: + kfree(ring->cb); + +err_free_dma: + dma_free_coherent(hsdma2dev(hsdma), + pc->sz_ring, ring->txd, ring->tphys); + return err; +} + +static void mtk_hsdma_free_pchan(struct mtk_hsdma_device *hsdma, + struct mtk_hsdma_pchan *pc) +{ + struct mtk_hsdma_ring *ring = &pc->ring; + + /* Disable HSDMA and then wait for the completion */ + mtk_dma_clr(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DMA); + mtk_hsdma_busy_wait(hsdma); + + /* Reset pointer in the ring */ + mtk_dma_clr(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE); + mtk_dma_write(hsdma, MTK_HSDMA_TX_BASE, 0); + mtk_dma_write(hsdma, MTK_HSDMA_TX_CNT, 0); + mtk_dma_write(hsdma, MTK_HSDMA_TX_CPU, 0); + mtk_dma_write(hsdma, MTK_HSDMA_RX_BASE, 0); + mtk_dma_write(hsdma, MTK_HSDMA_RX_CNT, 0); + mtk_dma_write(hsdma, MTK_HSDMA_RX_CPU, MTK_DMA_SIZE - 1); + + kfree(ring->cb); + + dma_free_coherent(hsdma2dev(hsdma), + pc->sz_ring, ring->txd, ring->tphys); +} + +static int mtk_hsdma_issue_pending_vdesc(struct mtk_hsdma_device *hsdma, + struct mtk_hsdma_pchan *pc, + struct mtk_hsdma_vdesc *hvd) +{ + struct mtk_hsdma_ring *ring = &pc->ring; + struct mtk_hsdma_pdesc *txd, *rxd; + u16 reserved, prev, tlen, num_sgs; + unsigned long flags; + + /* Protect against PC is accessed by multiple VCs simultaneously */ + spin_lock_irqsave(&hsdma->lock, flags); + + /* + * Reserve rooms, where pc->nr_free is used to track how many free + * rooms in the ring being updated in user and IRQ context. + */ + num_sgs = DIV_ROUND_UP(hvd->len, MTK_HSDMA_MAX_LEN); + reserved = min_t(u16, num_sgs, atomic_read(&pc->nr_free)); + + if (!reserved) { + spin_unlock_irqrestore(&hsdma->lock, flags); + return -ENOSPC; + } + + atomic_sub(reserved, &pc->nr_free); + + while (reserved--) { + /* Limit size by PD capability for valid data moving */ + tlen = (hvd->len > MTK_HSDMA_MAX_LEN) ? + MTK_HSDMA_MAX_LEN : hvd->len; + + /* + * Setup PDs using the remaining VD info mapped on those + * reserved rooms. And since RXD is shared memory between the + * host and the device allocated by dma_alloc_coherent call, + * the helper macro WRITE_ONCE can ensure the data written to + * RAM would really happens. + */ + txd = &ring->txd[ring->cur_tptr]; + WRITE_ONCE(txd->desc1, hvd->src); + WRITE_ONCE(txd->desc2, + hsdma->soc->ls0 | MTK_HSDMA_DESC_PLEN(tlen)); + + rxd = &ring->rxd[ring->cur_tptr]; + WRITE_ONCE(rxd->desc1, hvd->dest); + WRITE_ONCE(rxd->desc2, MTK_HSDMA_DESC_PLEN(tlen)); + + /* Associate VD, the PD belonged to */ + ring->cb[ring->cur_tptr].vd = &hvd->vd; + + /* Move forward the pointer of TX ring */ + ring->cur_tptr = MTK_HSDMA_NEXT_DESP_IDX(ring->cur_tptr, + MTK_DMA_SIZE); + + /* Update VD with remaining data */ + hvd->src += tlen; + hvd->dest += tlen; + hvd->len -= tlen; + } + + /* + * Tagging flag for the last PD for VD will be responsible for + * completing VD. + */ + if (!hvd->len) { + prev = MTK_HSDMA_LAST_DESP_IDX(ring->cur_tptr, MTK_DMA_SIZE); + ring->cb[prev].flag = MTK_HSDMA_VDESC_FINISHED; + } + + /* Ensure all changes indeed done before we're going on */ + wmb(); + + /* + * Updating into hardware the pointer of TX ring lets HSDMA to take + * action for those pending PDs. + */ + mtk_dma_write(hsdma, MTK_HSDMA_TX_CPU, ring->cur_tptr); + + spin_unlock_irqrestore(&hsdma->lock, flags); + + return 0; +} + +static void mtk_hsdma_issue_vchan_pending(struct mtk_hsdma_device *hsdma, + struct mtk_hsdma_vchan *hvc) +{ + struct virt_dma_desc *vd, *vd2; + int err; + + lockdep_assert_held(&hvc->vc.lock); + + list_for_each_entry_safe(vd, vd2, &hvc->vc.desc_issued, node) { + struct mtk_hsdma_vdesc *hvd; + + hvd = to_hsdma_vdesc(vd); + + /* Map VD into PC and all VCs shares a single PC */ + err = mtk_hsdma_issue_pending_vdesc(hsdma, hsdma->pc, hvd); + + /* + * Move VD from desc_issued to desc_hw_processing when entire + * VD is fit into available PDs. Otherwise, the uncompleted + * VDs would stay in list desc_issued and then restart the + * processing as soon as possible once underlying ring space + * got freed. + */ + if (err == -ENOSPC || hvd->len > 0) + break; + + /* + * The extra list desc_hw_processing is used because + * hardware can't provide sufficient information allowing us + * to know what VDs are still working on the underlying ring. + * Through the additional list, it can help us to implement + * terminate_all, residue calculation and such thing needed + * to know detail descriptor status on the hardware. + */ + list_move_tail(&vd->node, &hvc->desc_hw_processing); + } +} + +static void mtk_hsdma_free_rooms_in_ring(struct mtk_hsdma_device *hsdma) +{ + struct mtk_hsdma_vchan *hvc; + struct mtk_hsdma_pdesc *rxd; + struct mtk_hsdma_vdesc *hvd; + struct mtk_hsdma_pchan *pc; + struct mtk_hsdma_cb *cb; + int i = MTK_DMA_SIZE; + __le32 desc2; + u32 status; + u16 next; + + /* Read IRQ status */ + status = mtk_dma_read(hsdma, MTK_HSDMA_INT_STATUS); + if (unlikely(!(status & MTK_HSDMA_INT_RXDONE))) + goto rx_done; + + pc = hsdma->pc; + + /* + * Using a fail-safe loop with iterations of up to MTK_DMA_SIZE to + * reclaim these finished descriptors: The most number of PDs the ISR + * can handle at one time shouldn't be more than MTK_DMA_SIZE so we + * take it as limited count instead of just using a dangerous infinite + * poll. + */ + while (i--) { + next = MTK_HSDMA_NEXT_DESP_IDX(pc->ring.cur_rptr, + MTK_DMA_SIZE); + rxd = &pc->ring.rxd[next]; + + /* + * If MTK_HSDMA_DESC_DDONE is no specified, that means data + * moving for the PD is still under going. + */ + desc2 = READ_ONCE(rxd->desc2); + if (!(desc2 & hsdma->soc->ddone)) + break; + + cb = &pc->ring.cb[next]; + if (unlikely(!cb->vd)) { + dev_err(hsdma2dev(hsdma), "cb->vd cannot be null\n"); + break; + } + + /* Update residue of VD the associated PD belonged to */ + hvd = to_hsdma_vdesc(cb->vd); + hvd->residue -= MTK_HSDMA_DESC_PLEN_GET(rxd->desc2); + + /* Complete VD until the relevant last PD is finished */ + if (IS_MTK_HSDMA_VDESC_FINISHED(cb->flag)) { + hvc = to_hsdma_vchan(cb->vd->tx.chan); + + spin_lock(&hvc->vc.lock); + + /* Remove VD from list desc_hw_processing */ + list_del(&cb->vd->node); + + /* Add VD into list desc_completed */ + vchan_cookie_complete(cb->vd); + + if (hvc->issue_synchronize && + list_empty(&hvc->desc_hw_processing)) { + complete(&hvc->issue_completion); + hvc->issue_synchronize = false; + } + spin_unlock(&hvc->vc.lock); + + cb->flag = 0; + } + + cb->vd = 0; + + /* + * Recycle the RXD with the helper WRITE_ONCE that can ensure + * data written into RAM would really happens. + */ + WRITE_ONCE(rxd->desc1, 0); + WRITE_ONCE(rxd->desc2, 0); + pc->ring.cur_rptr = next; + + /* Release rooms */ + atomic_inc(&pc->nr_free); + } + + /* Ensure all changes indeed done before we're going on */ + wmb(); + + /* Update CPU pointer for those completed PDs */ + mtk_dma_write(hsdma, MTK_HSDMA_RX_CPU, pc->ring.cur_rptr); + + /* + * Acking the pending IRQ allows hardware no longer to keep the used + * IRQ line in certain trigger state when software has completed all + * the finished physical descriptors. + */ + if (atomic_read(&pc->nr_free) >= MTK_DMA_SIZE - 1) + mtk_dma_write(hsdma, MTK_HSDMA_INT_STATUS, status); + + /* ASAP handles pending VDs in all VCs after freeing some rooms */ + for (i = 0; i < hsdma->dma_requests; i++) { + hvc = &hsdma->vc[i]; + spin_lock(&hvc->vc.lock); + mtk_hsdma_issue_vchan_pending(hsdma, hvc); + spin_unlock(&hvc->vc.lock); + } + +rx_done: + /* All completed PDs are cleaned up, so enable interrupt again */ + mtk_dma_set(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE); +} + +static irqreturn_t mtk_hsdma_irq(int irq, void *devid) +{ + struct mtk_hsdma_device *hsdma = devid; + + /* + * Disable interrupt until all completed PDs are cleaned up in + * mtk_hsdma_free_rooms call. + */ + mtk_dma_clr(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE); + + mtk_hsdma_free_rooms_in_ring(hsdma); + + return IRQ_HANDLED; +} + +static struct virt_dma_desc *mtk_hsdma_find_active_desc(struct dma_chan *c, + dma_cookie_t cookie) +{ + struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c); + struct virt_dma_desc *vd; + + list_for_each_entry(vd, &hvc->desc_hw_processing, node) + if (vd->tx.cookie == cookie) + return vd; + + list_for_each_entry(vd, &hvc->vc.desc_issued, node) + if (vd->tx.cookie == cookie) + return vd; + + return NULL; +} + +static enum dma_status mtk_hsdma_tx_status(struct dma_chan *c, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c); + struct mtk_hsdma_vdesc *hvd; + struct virt_dma_desc *vd; + enum dma_status ret; + unsigned long flags; + size_t bytes = 0; + + ret = dma_cookie_status(c, cookie, txstate); + if (ret == DMA_COMPLETE || !txstate) + return ret; + + spin_lock_irqsave(&hvc->vc.lock, flags); + vd = mtk_hsdma_find_active_desc(c, cookie); + spin_unlock_irqrestore(&hvc->vc.lock, flags); + + if (vd) { + hvd = to_hsdma_vdesc(vd); + bytes = hvd->residue; + } + + dma_set_residue(txstate, bytes); + + return ret; +} + +static void mtk_hsdma_issue_pending(struct dma_chan *c) +{ + struct mtk_hsdma_device *hsdma = to_hsdma_dev(c); + struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c); + unsigned long flags; + + spin_lock_irqsave(&hvc->vc.lock, flags); + + if (vchan_issue_pending(&hvc->vc)) + mtk_hsdma_issue_vchan_pending(hsdma, hvc); + + spin_unlock_irqrestore(&hvc->vc.lock, flags); +} + +static struct dma_async_tx_descriptor * +mtk_hsdma_prep_dma_memcpy(struct dma_chan *c, dma_addr_t dest, + dma_addr_t src, size_t len, unsigned long flags) +{ + struct mtk_hsdma_vdesc *hvd; + + hvd = kzalloc(sizeof(*hvd), GFP_NOWAIT); + if (!hvd) + return NULL; + + hvd->len = len; + hvd->residue = len; + hvd->src = src; + hvd->dest = dest; + + return vchan_tx_prep(to_virt_chan(c), &hvd->vd, flags); +} + +static int mtk_hsdma_free_inactive_desc(struct dma_chan *c) +{ + struct virt_dma_chan *vc = to_virt_chan(c); + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&vc->lock, flags); + list_splice_tail_init(&vc->desc_allocated, &head); + list_splice_tail_init(&vc->desc_submitted, &head); + list_splice_tail_init(&vc->desc_issued, &head); + spin_unlock_irqrestore(&vc->lock, flags); + + /* At the point, we don't expect users put descriptor into VC again */ + vchan_dma_desc_free_list(vc, &head); + + return 0; +} + +static void mtk_hsdma_free_active_desc(struct dma_chan *c) +{ + struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c); + bool sync_needed = false; + + /* + * Once issue_synchronize is being set, which means once the hardware + * consumes all descriptors for the channel in the ring, the + * synchronization must be be notified immediately it is completed. + */ + spin_lock(&hvc->vc.lock); + if (!list_empty(&hvc->desc_hw_processing)) { + hvc->issue_synchronize = true; + sync_needed = true; + } + spin_unlock(&hvc->vc.lock); + + if (sync_needed) + wait_for_completion(&hvc->issue_completion); + /* + * At the point, we expect that all remaining descriptors in the ring + * for the channel should be all processing done. + */ + WARN_ONCE(!list_empty(&hvc->desc_hw_processing), + "Desc pending still in list desc_hw_processing\n"); + + /* Free all descriptors in list desc_completed */ + vchan_synchronize(&hvc->vc); + + WARN_ONCE(!list_empty(&hvc->vc.desc_completed), + "Desc pending still in list desc_completed\n"); +} + +static int mtk_hsdma_terminate_all(struct dma_chan *c) +{ + /* + * Free pending descriptors not processed yet by hardware that have + * previously been submitted to the channel. + */ + mtk_hsdma_free_inactive_desc(c); + + /* + * However, the DMA engine doesn't provide any way to stop these + * descriptors being processed currently by hardware. The only way is + * to just waiting until these descriptors are all processed completely + * through mtk_hsdma_free_active_desc call. + */ + mtk_hsdma_free_active_desc(c); + + return 0; +} + +static int mtk_hsdma_alloc_chan_resources(struct dma_chan *c) +{ + struct mtk_hsdma_device *hsdma = to_hsdma_dev(c); + int err; + + /* + * Since HSDMA has only one PC, the resource for PC is being allocated + * when the first VC is being created and the other VCs would run on + * the same PC. + */ + if (!refcount_read(&hsdma->pc_refcnt)) { + err = mtk_hsdma_alloc_pchan(hsdma, hsdma->pc); + if (err) + return err; + /* + * refcount_inc would complain increment on 0; use-after-free. + * Thus, we need to explicitly set it as 1 initially. + */ + refcount_set(&hsdma->pc_refcnt, 1); + } else { + refcount_inc(&hsdma->pc_refcnt); + } + + return 0; +} + +static void mtk_hsdma_free_chan_resources(struct dma_chan *c) +{ + struct mtk_hsdma_device *hsdma = to_hsdma_dev(c); + + /* Free all descriptors in all lists on the VC */ + mtk_hsdma_terminate_all(c); + + /* The resource for PC is not freed until all the VCs are destroyed */ + if (!refcount_dec_and_test(&hsdma->pc_refcnt)) + return; + + mtk_hsdma_free_pchan(hsdma, hsdma->pc); +} + +static int mtk_hsdma_hw_init(struct mtk_hsdma_device *hsdma) +{ + int err; + + pm_runtime_enable(hsdma2dev(hsdma)); + pm_runtime_get_sync(hsdma2dev(hsdma)); + + err = clk_prepare_enable(hsdma->clk); + if (err) + return err; + + mtk_dma_write(hsdma, MTK_HSDMA_INT_ENABLE, 0); + mtk_dma_write(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DEFAULT); + + return 0; +} + +static int mtk_hsdma_hw_deinit(struct mtk_hsdma_device *hsdma) +{ + mtk_dma_write(hsdma, MTK_HSDMA_GLO, 0); + + clk_disable_unprepare(hsdma->clk); + + pm_runtime_put_sync(hsdma2dev(hsdma)); + pm_runtime_disable(hsdma2dev(hsdma)); + + return 0; +} + +static const struct mtk_hsdma_soc mt7623_soc = { + .ddone = BIT(31), + .ls0 = BIT(30), +}; + +static const struct mtk_hsdma_soc mt7622_soc = { + .ddone = BIT(15), + .ls0 = BIT(14), +}; + +static const struct of_device_id mtk_hsdma_match[] = { + { .compatible = "mediatek,mt7623-hsdma", .data = &mt7623_soc}, + { .compatible = "mediatek,mt7622-hsdma", .data = &mt7622_soc}, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, mtk_hsdma_match); + +static int mtk_hsdma_probe(struct platform_device *pdev) +{ + struct mtk_hsdma_device *hsdma; + struct mtk_hsdma_vchan *vc; + struct dma_device *dd; + struct resource *res; + int i, err; + + hsdma = devm_kzalloc(&pdev->dev, sizeof(*hsdma), GFP_KERNEL); + if (!hsdma) + return -ENOMEM; + + dd = &hsdma->ddev; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + hsdma->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(hsdma->base)) + return PTR_ERR(hsdma->base); + + hsdma->soc = of_device_get_match_data(&pdev->dev); + if (!hsdma->soc) { + dev_err(&pdev->dev, "No device match found\n"); + return -ENODEV; + } + + hsdma->clk = devm_clk_get(&pdev->dev, "hsdma"); + if (IS_ERR(hsdma->clk)) { + dev_err(&pdev->dev, "No clock for %s\n", + dev_name(&pdev->dev)); + return PTR_ERR(hsdma->clk); + } + + res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!res) { + dev_err(&pdev->dev, "No irq resource for %s\n", + dev_name(&pdev->dev)); + return -EINVAL; + } + hsdma->irq = res->start; + + refcount_set(&hsdma->pc_refcnt, 0); + spin_lock_init(&hsdma->lock); + + dma_cap_set(DMA_MEMCPY, dd->cap_mask); + + dd->copy_align = MTK_HSDMA_ALIGN_SIZE; + dd->device_alloc_chan_resources = mtk_hsdma_alloc_chan_resources; + dd->device_free_chan_resources = mtk_hsdma_free_chan_resources; + dd->device_tx_status = mtk_hsdma_tx_status; + dd->device_issue_pending = mtk_hsdma_issue_pending; + dd->device_prep_dma_memcpy = mtk_hsdma_prep_dma_memcpy; + dd->device_terminate_all = mtk_hsdma_terminate_all; + dd->src_addr_widths = MTK_HSDMA_DMA_BUSWIDTHS; + dd->dst_addr_widths = MTK_HSDMA_DMA_BUSWIDTHS; + dd->directions = BIT(DMA_MEM_TO_MEM); + dd->residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; + dd->dev = &pdev->dev; + INIT_LIST_HEAD(&dd->channels); + + hsdma->dma_requests = MTK_HSDMA_NR_VCHANS; + if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node, + "dma-requests", + &hsdma->dma_requests)) { + dev_info(&pdev->dev, + "Using %u as missing dma-requests property\n", + MTK_HSDMA_NR_VCHANS); + } + + hsdma->pc = devm_kcalloc(&pdev->dev, MTK_HSDMA_NR_MAX_PCHANS, + sizeof(*hsdma->pc), GFP_KERNEL); + if (!hsdma->pc) + return -ENOMEM; + + hsdma->vc = devm_kcalloc(&pdev->dev, hsdma->dma_requests, + sizeof(*hsdma->vc), GFP_KERNEL); + if (!hsdma->vc) + return -ENOMEM; + + for (i = 0; i < hsdma->dma_requests; i++) { + vc = &hsdma->vc[i]; + vc->vc.desc_free = mtk_hsdma_vdesc_free; + vchan_init(&vc->vc, dd); + init_completion(&vc->issue_completion); + INIT_LIST_HEAD(&vc->desc_hw_processing); + } + + err = dma_async_device_register(dd); + if (err) + return err; + + err = of_dma_controller_register(pdev->dev.of_node, + of_dma_xlate_by_chan_id, hsdma); + if (err) { + dev_err(&pdev->dev, + "MediaTek HSDMA OF registration failed %d\n", err); + goto err_unregister; + } + + mtk_hsdma_hw_init(hsdma); + + err = devm_request_irq(&pdev->dev, hsdma->irq, + mtk_hsdma_irq, 0, + dev_name(&pdev->dev), hsdma); + if (err) { + dev_err(&pdev->dev, + "request_irq failed with err %d\n", err); + goto err_unregister; + } + + platform_set_drvdata(pdev, hsdma); + + dev_info(&pdev->dev, "MediaTek HSDMA driver registered\n"); + + return 0; + +err_unregister: + dma_async_device_unregister(dd); + + return err; +} + +static int mtk_hsdma_remove(struct platform_device *pdev) +{ + struct mtk_hsdma_device *hsdma = platform_get_drvdata(pdev); + struct mtk_hsdma_vchan *vc; + int i; + + /* Kill VC task */ + for (i = 0; i < hsdma->dma_requests; i++) { + vc = &hsdma->vc[i]; + + list_del(&vc->vc.chan.device_node); + tasklet_kill(&vc->vc.task); + } + + /* Disable DMA interrupt */ + mtk_dma_write(hsdma, MTK_HSDMA_INT_ENABLE, 0); + + /* Waits for any pending IRQ handlers to complete */ + synchronize_irq(hsdma->irq); + + /* Disable hardware */ + mtk_hsdma_hw_deinit(hsdma); + + dma_async_device_unregister(&hsdma->ddev); + of_dma_controller_free(pdev->dev.of_node); + + return 0; +} + +static struct platform_driver mtk_hsdma_driver = { + .probe = mtk_hsdma_probe, + .remove = mtk_hsdma_remove, + .driver = { + .name = KBUILD_MODNAME, + .of_match_table = mtk_hsdma_match, + }, +}; +module_platform_driver(mtk_hsdma_driver); + +MODULE_DESCRIPTION("MediaTek High-Speed DMA Controller Driver"); +MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index d7327fd5f445..de1fd59fe136 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -1510,7 +1510,7 @@ static void pl330_dotask(unsigned long data) /* Returns 1 if state was updated, 0 otherwise */ static int pl330_update(struct pl330_dmac *pl330) { - struct dma_pl330_desc *descdone, *tmp; + struct dma_pl330_desc *descdone; unsigned long flags; void __iomem *regs; u32 val; @@ -1588,7 +1588,9 @@ static int pl330_update(struct pl330_dmac *pl330) } /* Now that we are in no hurry, do the callbacks */ - list_for_each_entry_safe(descdone, tmp, &pl330->req_done, rqd) { + while (!list_empty(&pl330->req_done)) { + descdone = list_first_entry(&pl330->req_done, + struct dma_pl330_desc, rqd); list_del(&descdone->rqd); spin_unlock_irqrestore(&pl330->lock, flags); dma_pl330_rqcb(descdone, PL330_ERR_NONE); diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c index d076940e0c69..d29275b97e84 100644 --- a/drivers/dma/qcom/bam_dma.c +++ b/drivers/dma/qcom/bam_dma.c @@ -393,6 +393,7 @@ struct bam_device { struct device_dma_parameters dma_parms; struct bam_chan *channels; u32 num_channels; + u32 num_ees; /* execution environment ID, from DT */ u32 ee; @@ -934,12 +935,15 @@ static void bam_apply_new_config(struct bam_chan *bchan, struct bam_device *bdev = bchan->bdev; u32 maxburst; - if (dir == DMA_DEV_TO_MEM) - maxburst = bchan->slave.src_maxburst; - else - maxburst = bchan->slave.dst_maxburst; + if (!bdev->controlled_remotely) { + if (dir == DMA_DEV_TO_MEM) + maxburst = bchan->slave.src_maxburst; + else + maxburst = bchan->slave.dst_maxburst; - writel_relaxed(maxburst, bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD)); + writel_relaxed(maxburst, + bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD)); + } bchan->reconfigure = 0; } @@ -1128,15 +1132,19 @@ static int bam_init(struct bam_device *bdev) u32 val; /* read revision and configuration information */ - val = readl_relaxed(bam_addr(bdev, 0, BAM_REVISION)) >> NUM_EES_SHIFT; - val &= NUM_EES_MASK; + if (!bdev->num_ees) { + val = readl_relaxed(bam_addr(bdev, 0, BAM_REVISION)); + bdev->num_ees = (val >> NUM_EES_SHIFT) & NUM_EES_MASK; + } /* check that configured EE is within range */ - if (bdev->ee >= val) + if (bdev->ee >= bdev->num_ees) return -EINVAL; - val = readl_relaxed(bam_addr(bdev, 0, BAM_NUM_PIPES)); - bdev->num_channels = val & BAM_NUM_PIPES_MASK; + if (!bdev->num_channels) { + val = readl_relaxed(bam_addr(bdev, 0, BAM_NUM_PIPES)); + bdev->num_channels = val & BAM_NUM_PIPES_MASK; + } if (bdev->controlled_remotely) return 0; @@ -1232,9 +1240,25 @@ static int bam_dma_probe(struct platform_device *pdev) bdev->controlled_remotely = of_property_read_bool(pdev->dev.of_node, "qcom,controlled-remotely"); + if (bdev->controlled_remotely) { + ret = of_property_read_u32(pdev->dev.of_node, "num-channels", + &bdev->num_channels); + if (ret) + dev_err(bdev->dev, "num-channels unspecified in dt\n"); + + ret = of_property_read_u32(pdev->dev.of_node, "qcom,num-ees", + &bdev->num_ees); + if (ret) + dev_err(bdev->dev, "num-ees unspecified in dt\n"); + } + bdev->bamclk = devm_clk_get(bdev->dev, "bam_clk"); - if (IS_ERR(bdev->bamclk)) - return PTR_ERR(bdev->bamclk); + if (IS_ERR(bdev->bamclk)) { + if (!bdev->controlled_remotely) + return PTR_ERR(bdev->bamclk); + + bdev->bamclk = NULL; + } ret = clk_prepare_enable(bdev->bamclk); if (ret) { @@ -1309,6 +1333,11 @@ static int bam_dma_probe(struct platform_device *pdev) if (ret) goto err_unregister_dma; + if (bdev->controlled_remotely) { + pm_runtime_disable(&pdev->dev); + return 0; + } + pm_runtime_irq_safe(&pdev->dev); pm_runtime_set_autosuspend_delay(&pdev->dev, BAM_DMA_AUTOSUSPEND_DELAY); pm_runtime_use_autosuspend(&pdev->dev); @@ -1392,7 +1421,8 @@ static int __maybe_unused bam_dma_suspend(struct device *dev) { struct bam_device *bdev = dev_get_drvdata(dev); - pm_runtime_force_suspend(dev); + if (!bdev->controlled_remotely) + pm_runtime_force_suspend(dev); clk_unprepare(bdev->bamclk); @@ -1408,7 +1438,8 @@ static int __maybe_unused bam_dma_resume(struct device *dev) if (ret) return ret; - pm_runtime_force_resume(dev); + if (!bdev->controlled_remotely) + pm_runtime_force_resume(dev); return 0; } diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index d0cacdb0713e..2a2ccd9c78e4 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -1301,8 +1301,17 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan, * If the cookie doesn't correspond to the currently running transfer * then the descriptor hasn't been processed yet, and the residue is * equal to the full descriptor size. + * Also, a client driver is possible to call this function before + * rcar_dmac_isr_channel_thread() runs. In this case, the "desc.running" + * will be the next descriptor, and the done list will appear. So, if + * the argument cookie matches the done list's cookie, we can assume + * the residue is zero. */ if (cookie != desc->async_tx.cookie) { + list_for_each_entry(desc, &chan->desc.done, node) { + if (cookie == desc->async_tx.cookie) + return 0; + } list_for_each_entry(desc, &chan->desc.pending, node) { if (cookie == desc->async_tx.cookie) return desc->size; @@ -1677,8 +1686,8 @@ static const struct dev_pm_ops rcar_dmac_pm = { * - Wait for the current transfer to complete and stop the device, * - Resume transfers, if any. */ - SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, - pm_runtime_force_resume) + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) SET_RUNTIME_PM_OPS(rcar_dmac_runtime_suspend, rcar_dmac_runtime_resume, NULL) }; diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c index 786fc8fcc38e..8c5807362a25 100644 --- a/drivers/dma/stm32-dma.c +++ b/drivers/dma/stm32-dma.c @@ -5,6 +5,7 @@ * * Copyright (C) M'boumba Cedric Madianga 2015 * Author: M'boumba Cedric Madianga <cedric.madianga@gmail.com> + * Pierre-Yves Mordret <pierre-yves.mordret@st.com> * * License terms: GNU General Public License (GPL), version 2 */ @@ -33,9 +34,14 @@ #define STM32_DMA_LIFCR 0x0008 /* DMA Low Int Flag Clear Reg */ #define STM32_DMA_HIFCR 0x000c /* DMA High Int Flag Clear Reg */ #define STM32_DMA_TCI BIT(5) /* Transfer Complete Interrupt */ +#define STM32_DMA_HTI BIT(4) /* Half Transfer Interrupt */ #define STM32_DMA_TEI BIT(3) /* Transfer Error Interrupt */ #define STM32_DMA_DMEI BIT(2) /* Direct Mode Error Interrupt */ #define STM32_DMA_FEI BIT(0) /* FIFO Error Interrupt */ +#define STM32_DMA_MASKI (STM32_DMA_TCI \ + | STM32_DMA_TEI \ + | STM32_DMA_DMEI \ + | STM32_DMA_FEI) /* DMA Stream x Configuration Register */ #define STM32_DMA_SCR(x) (0x0010 + 0x18 * (x)) /* x = 0..7 */ @@ -60,7 +66,8 @@ #define STM32_DMA_SCR_PINC BIT(9) /* Peripheral increment mode */ #define STM32_DMA_SCR_CIRC BIT(8) /* Circular mode */ #define STM32_DMA_SCR_PFCTRL BIT(5) /* Peripheral Flow Controller */ -#define STM32_DMA_SCR_TCIE BIT(4) /* Transfer Cplete Int Enable*/ +#define STM32_DMA_SCR_TCIE BIT(4) /* Transfer Complete Int Enable + */ #define STM32_DMA_SCR_TEIE BIT(2) /* Transfer Error Int Enable */ #define STM32_DMA_SCR_DMEIE BIT(1) /* Direct Mode Err Int Enable */ #define STM32_DMA_SCR_EN BIT(0) /* Stream Enable */ @@ -111,11 +118,24 @@ #define STM32_DMA_FIFO_THRESHOLD_FULL 0x03 #define STM32_DMA_MAX_DATA_ITEMS 0xffff +/* + * Valid transfer starts from @0 to @0xFFFE leading to unaligned scatter + * gather at boundary. Thus it's safer to round down this value on FIFO + * size (16 Bytes) + */ +#define STM32_DMA_ALIGNED_MAX_DATA_ITEMS \ + ALIGN_DOWN(STM32_DMA_MAX_DATA_ITEMS, 16) #define STM32_DMA_MAX_CHANNELS 0x08 #define STM32_DMA_MAX_REQUEST_ID 0x08 #define STM32_DMA_MAX_DATA_PARAM 0x03 +#define STM32_DMA_FIFO_SIZE 16 /* FIFO is 16 bytes */ +#define STM32_DMA_MIN_BURST 4 #define STM32_DMA_MAX_BURST 16 +/* DMA Features */ +#define STM32_DMA_THRESHOLD_FTR_MASK GENMASK(1, 0) +#define STM32_DMA_THRESHOLD_FTR_GET(n) ((n) & STM32_DMA_THRESHOLD_FTR_MASK) + enum stm32_dma_width { STM32_DMA_BYTE, STM32_DMA_HALF_WORD, @@ -129,11 +149,18 @@ enum stm32_dma_burst_size { STM32_DMA_BURST_INCR16, }; +/** + * struct stm32_dma_cfg - STM32 DMA custom configuration + * @channel_id: channel ID + * @request_line: DMA request + * @stream_config: 32bit mask specifying the DMA channel configuration + * @features: 32bit mask specifying the DMA Feature list + */ struct stm32_dma_cfg { u32 channel_id; u32 request_line; u32 stream_config; - u32 threshold; + u32 features; }; struct stm32_dma_chan_reg { @@ -171,6 +198,9 @@ struct stm32_dma_chan { u32 next_sg; struct dma_slave_config dma_sconfig; struct stm32_dma_chan_reg chan_reg; + u32 threshold; + u32 mem_burst; + u32 mem_width; }; struct stm32_dma_device { @@ -235,6 +265,85 @@ static int stm32_dma_get_width(struct stm32_dma_chan *chan, } } +static enum dma_slave_buswidth stm32_dma_get_max_width(u32 buf_len, + u32 threshold) +{ + enum dma_slave_buswidth max_width; + + if (threshold == STM32_DMA_FIFO_THRESHOLD_FULL) + max_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + else + max_width = DMA_SLAVE_BUSWIDTH_2_BYTES; + + while ((buf_len < max_width || buf_len % max_width) && + max_width > DMA_SLAVE_BUSWIDTH_1_BYTE) + max_width = max_width >> 1; + + return max_width; +} + +static bool stm32_dma_fifo_threshold_is_allowed(u32 burst, u32 threshold, + enum dma_slave_buswidth width) +{ + u32 remaining; + + if (width != DMA_SLAVE_BUSWIDTH_UNDEFINED) { + if (burst != 0) { + /* + * If number of beats fit in several whole bursts + * this configuration is allowed. + */ + remaining = ((STM32_DMA_FIFO_SIZE / width) * + (threshold + 1) / 4) % burst; + + if (remaining == 0) + return true; + } else { + return true; + } + } + + return false; +} + +static bool stm32_dma_is_burst_possible(u32 buf_len, u32 threshold) +{ + switch (threshold) { + case STM32_DMA_FIFO_THRESHOLD_FULL: + if (buf_len >= STM32_DMA_MAX_BURST) + return true; + else + return false; + case STM32_DMA_FIFO_THRESHOLD_HALFFULL: + if (buf_len >= STM32_DMA_MAX_BURST / 2) + return true; + else + return false; + default: + return false; + } +} + +static u32 stm32_dma_get_best_burst(u32 buf_len, u32 max_burst, u32 threshold, + enum dma_slave_buswidth width) +{ + u32 best_burst = max_burst; + + if (best_burst == 1 || !stm32_dma_is_burst_possible(buf_len, threshold)) + return 0; + + while ((buf_len < best_burst * width && best_burst > 1) || + !stm32_dma_fifo_threshold_is_allowed(best_burst, threshold, + width)) { + if (best_burst > STM32_DMA_MIN_BURST) + best_burst = best_burst >> 1; + else + best_burst = 0; + } + + return best_burst; +} + static int stm32_dma_get_burst(struct stm32_dma_chan *chan, u32 maxburst) { switch (maxburst) { @@ -254,12 +363,12 @@ static int stm32_dma_get_burst(struct stm32_dma_chan *chan, u32 maxburst) } static void stm32_dma_set_fifo_config(struct stm32_dma_chan *chan, - u32 src_maxburst, u32 dst_maxburst) + u32 src_burst, u32 dst_burst) { chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_MASK; chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_DMEIE; - if ((!src_maxburst) && (!dst_maxburst)) { + if (!src_burst && !dst_burst) { /* Using direct mode */ chan->chan_reg.dma_scr |= STM32_DMA_SCR_DMEIE; } else { @@ -300,7 +409,7 @@ static u32 stm32_dma_irq_status(struct stm32_dma_chan *chan) flags = dma_isr >> (((chan->id & 2) << 3) | ((chan->id & 1) * 6)); - return flags; + return flags & STM32_DMA_MASKI; } static void stm32_dma_irq_clear(struct stm32_dma_chan *chan, u32 flags) @@ -315,6 +424,7 @@ static void stm32_dma_irq_clear(struct stm32_dma_chan *chan, u32 flags) * If (ch % 4) is 2 or 3, left shift the mask by 16 bits. * If (ch % 4) is 1 or 3, additionally left shift the mask by 6 bits. */ + flags &= STM32_DMA_MASKI; dma_ifcr = flags << (((chan->id & 2) << 3) | ((chan->id & 1) * 6)); if (chan->id & 4) @@ -429,6 +539,8 @@ static void stm32_dma_dump_reg(struct stm32_dma_chan *chan) dev_dbg(chan2dev(chan), "SFCR: 0x%08x\n", sfcr); } +static void stm32_dma_configure_next_sg(struct stm32_dma_chan *chan); + static void stm32_dma_start_transfer(struct stm32_dma_chan *chan) { struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan); @@ -471,6 +583,9 @@ static void stm32_dma_start_transfer(struct stm32_dma_chan *chan) if (status) stm32_dma_irq_clear(chan, status); + if (chan->desc->cyclic) + stm32_dma_configure_next_sg(chan); + stm32_dma_dump_reg(chan); /* Start DMA */ @@ -541,13 +656,29 @@ static irqreturn_t stm32_dma_chan_irq(int irq, void *devid) status = stm32_dma_irq_status(chan); scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id)); - if ((status & STM32_DMA_TCI) && (scr & STM32_DMA_SCR_TCIE)) { + if (status & STM32_DMA_TCI) { stm32_dma_irq_clear(chan, STM32_DMA_TCI); - stm32_dma_handle_chan_done(chan); - - } else { + if (scr & STM32_DMA_SCR_TCIE) + stm32_dma_handle_chan_done(chan); + status &= ~STM32_DMA_TCI; + } + if (status & STM32_DMA_HTI) { + stm32_dma_irq_clear(chan, STM32_DMA_HTI); + status &= ~STM32_DMA_HTI; + } + if (status & STM32_DMA_FEI) { + stm32_dma_irq_clear(chan, STM32_DMA_FEI); + status &= ~STM32_DMA_FEI; + if (!(scr & STM32_DMA_SCR_EN)) + dev_err(chan2dev(chan), "FIFO Error\n"); + else + dev_dbg(chan2dev(chan), "FIFO over/underrun\n"); + } + if (status) { stm32_dma_irq_clear(chan, status); dev_err(chan2dev(chan), "DMA error: status=0x%08x\n", status); + if (!(scr & STM32_DMA_SCR_EN)) + dev_err(chan2dev(chan), "chan disabled by HW\n"); } spin_unlock(&chan->vchan.lock); @@ -564,45 +695,59 @@ static void stm32_dma_issue_pending(struct dma_chan *c) if (vchan_issue_pending(&chan->vchan) && !chan->desc && !chan->busy) { dev_dbg(chan2dev(chan), "vchan %p: issued\n", &chan->vchan); stm32_dma_start_transfer(chan); - if (chan->desc->cyclic) - stm32_dma_configure_next_sg(chan); + } spin_unlock_irqrestore(&chan->vchan.lock, flags); } static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan, enum dma_transfer_direction direction, - enum dma_slave_buswidth *buswidth) + enum dma_slave_buswidth *buswidth, + u32 buf_len) { enum dma_slave_buswidth src_addr_width, dst_addr_width; int src_bus_width, dst_bus_width; int src_burst_size, dst_burst_size; - u32 src_maxburst, dst_maxburst; - u32 dma_scr = 0; + u32 src_maxburst, dst_maxburst, src_best_burst, dst_best_burst; + u32 dma_scr, threshold; src_addr_width = chan->dma_sconfig.src_addr_width; dst_addr_width = chan->dma_sconfig.dst_addr_width; src_maxburst = chan->dma_sconfig.src_maxburst; dst_maxburst = chan->dma_sconfig.dst_maxburst; + threshold = chan->threshold; switch (direction) { case DMA_MEM_TO_DEV: + /* Set device data size */ dst_bus_width = stm32_dma_get_width(chan, dst_addr_width); if (dst_bus_width < 0) return dst_bus_width; - dst_burst_size = stm32_dma_get_burst(chan, dst_maxburst); + /* Set device burst size */ + dst_best_burst = stm32_dma_get_best_burst(buf_len, + dst_maxburst, + threshold, + dst_addr_width); + + dst_burst_size = stm32_dma_get_burst(chan, dst_best_burst); if (dst_burst_size < 0) return dst_burst_size; - if (!src_addr_width) - src_addr_width = dst_addr_width; - + /* Set memory data size */ + src_addr_width = stm32_dma_get_max_width(buf_len, threshold); + chan->mem_width = src_addr_width; src_bus_width = stm32_dma_get_width(chan, src_addr_width); if (src_bus_width < 0) return src_bus_width; - src_burst_size = stm32_dma_get_burst(chan, src_maxburst); + /* Set memory burst size */ + src_maxburst = STM32_DMA_MAX_BURST; + src_best_burst = stm32_dma_get_best_burst(buf_len, + src_maxburst, + threshold, + src_addr_width); + src_burst_size = stm32_dma_get_burst(chan, src_best_burst); if (src_burst_size < 0) return src_burst_size; @@ -612,27 +757,46 @@ static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan, STM32_DMA_SCR_PBURST(dst_burst_size) | STM32_DMA_SCR_MBURST(src_burst_size); + /* Set FIFO threshold */ + chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_FTH_MASK; + chan->chan_reg.dma_sfcr |= STM32_DMA_SFCR_FTH(threshold); + + /* Set peripheral address */ chan->chan_reg.dma_spar = chan->dma_sconfig.dst_addr; *buswidth = dst_addr_width; break; case DMA_DEV_TO_MEM: + /* Set device data size */ src_bus_width = stm32_dma_get_width(chan, src_addr_width); if (src_bus_width < 0) return src_bus_width; - src_burst_size = stm32_dma_get_burst(chan, src_maxburst); + /* Set device burst size */ + src_best_burst = stm32_dma_get_best_burst(buf_len, + src_maxburst, + threshold, + src_addr_width); + chan->mem_burst = src_best_burst; + src_burst_size = stm32_dma_get_burst(chan, src_best_burst); if (src_burst_size < 0) return src_burst_size; - if (!dst_addr_width) - dst_addr_width = src_addr_width; - + /* Set memory data size */ + dst_addr_width = stm32_dma_get_max_width(buf_len, threshold); + chan->mem_width = dst_addr_width; dst_bus_width = stm32_dma_get_width(chan, dst_addr_width); if (dst_bus_width < 0) return dst_bus_width; - dst_burst_size = stm32_dma_get_burst(chan, dst_maxburst); + /* Set memory burst size */ + dst_maxburst = STM32_DMA_MAX_BURST; + dst_best_burst = stm32_dma_get_best_burst(buf_len, + dst_maxburst, + threshold, + dst_addr_width); + chan->mem_burst = dst_best_burst; + dst_burst_size = stm32_dma_get_burst(chan, dst_best_burst); if (dst_burst_size < 0) return dst_burst_size; @@ -642,6 +806,11 @@ static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan, STM32_DMA_SCR_PBURST(src_burst_size) | STM32_DMA_SCR_MBURST(dst_burst_size); + /* Set FIFO threshold */ + chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_FTH_MASK; + chan->chan_reg.dma_sfcr |= STM32_DMA_SFCR_FTH(threshold); + + /* Set peripheral address */ chan->chan_reg.dma_spar = chan->dma_sconfig.src_addr; *buswidth = chan->dma_sconfig.src_addr_width; break; @@ -651,8 +820,9 @@ static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan, return -EINVAL; } - stm32_dma_set_fifo_config(chan, src_maxburst, dst_maxburst); + stm32_dma_set_fifo_config(chan, src_best_burst, dst_best_burst); + /* Set DMA control register */ chan->chan_reg.dma_scr &= ~(STM32_DMA_SCR_DIR_MASK | STM32_DMA_SCR_PSIZE_MASK | STM32_DMA_SCR_MSIZE_MASK | STM32_DMA_SCR_PBURST_MASK | STM32_DMA_SCR_MBURST_MASK); @@ -692,10 +862,6 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_slave_sg( if (!desc) return NULL; - ret = stm32_dma_set_xfer_param(chan, direction, &buswidth); - if (ret < 0) - goto err; - /* Set peripheral flow controller */ if (chan->dma_sconfig.device_fc) chan->chan_reg.dma_scr |= STM32_DMA_SCR_PFCTRL; @@ -703,10 +869,15 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_slave_sg( chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_PFCTRL; for_each_sg(sgl, sg, sg_len, i) { + ret = stm32_dma_set_xfer_param(chan, direction, &buswidth, + sg_dma_len(sg)); + if (ret < 0) + goto err; + desc->sg_req[i].len = sg_dma_len(sg); nb_data_items = desc->sg_req[i].len / buswidth; - if (nb_data_items > STM32_DMA_MAX_DATA_ITEMS) { + if (nb_data_items > STM32_DMA_ALIGNED_MAX_DATA_ITEMS) { dev_err(chan2dev(chan), "nb items not supported\n"); goto err; } @@ -767,12 +938,12 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_dma_cyclic( return NULL; } - ret = stm32_dma_set_xfer_param(chan, direction, &buswidth); + ret = stm32_dma_set_xfer_param(chan, direction, &buswidth, period_len); if (ret < 0) return NULL; nb_data_items = period_len / buswidth; - if (nb_data_items > STM32_DMA_MAX_DATA_ITEMS) { + if (nb_data_items > STM32_DMA_ALIGNED_MAX_DATA_ITEMS) { dev_err(chan2dev(chan), "number of items not supported\n"); return NULL; } @@ -816,35 +987,45 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_dma_memcpy( dma_addr_t src, size_t len, unsigned long flags) { struct stm32_dma_chan *chan = to_stm32_dma_chan(c); - u32 num_sgs; + enum dma_slave_buswidth max_width; struct stm32_dma_desc *desc; size_t xfer_count, offset; + u32 num_sgs, best_burst, dma_burst, threshold; int i; - num_sgs = DIV_ROUND_UP(len, STM32_DMA_MAX_DATA_ITEMS); + num_sgs = DIV_ROUND_UP(len, STM32_DMA_ALIGNED_MAX_DATA_ITEMS); desc = stm32_dma_alloc_desc(num_sgs); if (!desc) return NULL; + threshold = chan->threshold; + for (offset = 0, i = 0; offset < len; offset += xfer_count, i++) { xfer_count = min_t(size_t, len - offset, - STM32_DMA_MAX_DATA_ITEMS); + STM32_DMA_ALIGNED_MAX_DATA_ITEMS); - desc->sg_req[i].len = xfer_count; + /* Compute best burst size */ + max_width = DMA_SLAVE_BUSWIDTH_1_BYTE; + best_burst = stm32_dma_get_best_burst(len, STM32_DMA_MAX_BURST, + threshold, max_width); + dma_burst = stm32_dma_get_burst(chan, best_burst); stm32_dma_clear_reg(&desc->sg_req[i].chan_reg); desc->sg_req[i].chan_reg.dma_scr = STM32_DMA_SCR_DIR(STM32_DMA_MEM_TO_MEM) | + STM32_DMA_SCR_PBURST(dma_burst) | + STM32_DMA_SCR_MBURST(dma_burst) | STM32_DMA_SCR_MINC | STM32_DMA_SCR_PINC | STM32_DMA_SCR_TCIE | STM32_DMA_SCR_TEIE; - desc->sg_req[i].chan_reg.dma_sfcr = STM32_DMA_SFCR_DMDIS | - STM32_DMA_SFCR_FTH(STM32_DMA_FIFO_THRESHOLD_FULL) | - STM32_DMA_SFCR_FEIE; + desc->sg_req[i].chan_reg.dma_sfcr |= STM32_DMA_SFCR_MASK; + desc->sg_req[i].chan_reg.dma_sfcr |= + STM32_DMA_SFCR_FTH(threshold); desc->sg_req[i].chan_reg.dma_spar = src + offset; desc->sg_req[i].chan_reg.dma_sm0ar = dest + offset; desc->sg_req[i].chan_reg.dma_sndtr = xfer_count; + desc->sg_req[i].len = xfer_count; } desc->num_sgs = num_sgs; @@ -869,6 +1050,7 @@ static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan, struct stm32_dma_desc *desc, u32 next_sg) { + u32 modulo, burst_size; u32 residue = 0; int i; @@ -876,8 +1058,10 @@ static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan, * In cyclic mode, for the last period, residue = remaining bytes from * NDTR */ - if (chan->desc->cyclic && next_sg == 0) - return stm32_dma_get_remaining_bytes(chan); + if (chan->desc->cyclic && next_sg == 0) { + residue = stm32_dma_get_remaining_bytes(chan); + goto end; + } /* * For all other periods in cyclic mode, and in sg mode, @@ -888,6 +1072,15 @@ static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan, residue += desc->sg_req[i].len; residue += stm32_dma_get_remaining_bytes(chan); +end: + if (!chan->mem_burst) + return residue; + + burst_size = chan->mem_burst * chan->mem_width; + modulo = residue % burst_size; + if (modulo) + residue = residue - modulo + burst_size; + return residue; } @@ -902,7 +1095,7 @@ static enum dma_status stm32_dma_tx_status(struct dma_chan *c, u32 residue = 0; status = dma_cookie_status(c, cookie, state); - if ((status == DMA_COMPLETE) || (!state)) + if (status == DMA_COMPLETE || !state) return status; spin_lock_irqsave(&chan->vchan.lock, flags); @@ -966,7 +1159,7 @@ static void stm32_dma_desc_free(struct virt_dma_desc *vdesc) } static void stm32_dma_set_config(struct stm32_dma_chan *chan, - struct stm32_dma_cfg *cfg) + struct stm32_dma_cfg *cfg) { stm32_dma_clear_reg(&chan->chan_reg); @@ -976,7 +1169,7 @@ static void stm32_dma_set_config(struct stm32_dma_chan *chan, /* Enable Interrupts */ chan->chan_reg.dma_scr |= STM32_DMA_SCR_TEIE | STM32_DMA_SCR_TCIE; - chan->chan_reg.dma_sfcr = cfg->threshold & STM32_DMA_SFCR_FTH_MASK; + chan->threshold = STM32_DMA_THRESHOLD_FTR_GET(cfg->features); } static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec, @@ -996,10 +1189,10 @@ static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec, cfg.channel_id = dma_spec->args[0]; cfg.request_line = dma_spec->args[1]; cfg.stream_config = dma_spec->args[2]; - cfg.threshold = dma_spec->args[3]; + cfg.features = dma_spec->args[3]; - if ((cfg.channel_id >= STM32_DMA_MAX_CHANNELS) || - (cfg.request_line >= STM32_DMA_MAX_REQUEST_ID)) { + if (cfg.channel_id >= STM32_DMA_MAX_CHANNELS || + cfg.request_line >= STM32_DMA_MAX_REQUEST_ID) { dev_err(dev, "Bad channel and/or request id\n"); return NULL; } diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index a4f68affc13b..d39400e5bc42 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -89,14 +89,14 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname) */ if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) { ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID, - mem_region, mem_phys, mem_size); + mem_region, mem_phys, mem_size, NULL); } else { char newname[strlen("qcom/") + strlen(fwname) + 1]; sprintf(newname, "qcom/%s", fwname); ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID, - mem_region, mem_phys, mem_size); + mem_region, mem_phys, mem_size, NULL); } if (ret) goto out; diff --git a/drivers/media/platform/qcom/venus/firmware.c b/drivers/media/platform/qcom/venus/firmware.c index 521d4b36c090..c4a577848dd7 100644 --- a/drivers/media/platform/qcom/venus/firmware.c +++ b/drivers/media/platform/qcom/venus/firmware.c @@ -76,7 +76,7 @@ int venus_boot(struct device *dev, const char *fwname) } ret = qcom_mdt_load(dev, mdt, fwname, VENUS_PAS_ID, mem_va, mem_phys, - mem_size); + mem_size, NULL); release_firmware(mdt); diff --git a/drivers/platform/mellanox/mlxreg-hotplug.c b/drivers/platform/mellanox/mlxreg-hotplug.c index 313cf8ad77bf..ea9e7f4479ca 100644 --- a/drivers/platform/mellanox/mlxreg-hotplug.c +++ b/drivers/platform/mellanox/mlxreg-hotplug.c @@ -93,9 +93,11 @@ struct mlxreg_hotplug_priv_data { bool after_probe; }; -static int mlxreg_hotplug_device_create(struct device *dev, +static int mlxreg_hotplug_device_create(struct mlxreg_hotplug_priv_data *priv, struct mlxreg_core_data *data) { + struct mlxreg_core_hotplug_platform_data *pdata; + /* * Return if adapter number is negative. It could be in case hotplug * event is not associated with hotplug device. @@ -103,19 +105,21 @@ static int mlxreg_hotplug_device_create(struct device *dev, if (data->hpdev.nr < 0) return 0; - data->hpdev.adapter = i2c_get_adapter(data->hpdev.nr); + pdata = dev_get_platdata(&priv->pdev->dev); + data->hpdev.adapter = i2c_get_adapter(data->hpdev.nr + + pdata->shift_nr); if (!data->hpdev.adapter) { - dev_err(dev, "Failed to get adapter for bus %d\n", - data->hpdev.nr); + dev_err(priv->dev, "Failed to get adapter for bus %d\n", + data->hpdev.nr + pdata->shift_nr); return -EFAULT; } data->hpdev.client = i2c_new_device(data->hpdev.adapter, data->hpdev.brdinfo); if (!data->hpdev.client) { - dev_err(dev, "Failed to create client %s at bus %d at addr 0x%02x\n", - data->hpdev.brdinfo->type, data->hpdev.nr, - data->hpdev.brdinfo->addr); + dev_err(priv->dev, "Failed to create client %s at bus %d at addr 0x%02x\n", + data->hpdev.brdinfo->type, data->hpdev.nr + + pdata->shift_nr, data->hpdev.brdinfo->addr); i2c_put_adapter(data->hpdev.adapter); data->hpdev.adapter = NULL; @@ -270,10 +274,10 @@ mlxreg_hotplug_work_helper(struct mlxreg_hotplug_priv_data *priv, if (item->inversed) mlxreg_hotplug_device_destroy(data); else - mlxreg_hotplug_device_create(priv->dev, data); + mlxreg_hotplug_device_create(priv, data); } else { if (item->inversed) - mlxreg_hotplug_device_create(priv->dev, data); + mlxreg_hotplug_device_create(priv, data); else mlxreg_hotplug_device_destroy(data); } @@ -319,7 +323,7 @@ mlxreg_hotplug_health_work_helper(struct mlxreg_hotplug_priv_data *priv, if (regval == MLXREG_HOTPLUG_HEALTH_MASK) { if ((data->health_cntr++ == MLXREG_HOTPLUG_RST_CNTR) || !priv->after_probe) { - mlxreg_hotplug_device_create(priv->dev, data); + mlxreg_hotplug_device_create(priv, data); data->attached = true; } } else { @@ -550,6 +554,7 @@ static int mlxreg_hotplug_probe(struct platform_device *pdev) { struct mlxreg_core_hotplug_platform_data *pdata; struct mlxreg_hotplug_priv_data *priv; + struct i2c_adapter *deferred_adap; int err; pdata = dev_get_platdata(&pdev->dev); @@ -558,6 +563,12 @@ static int mlxreg_hotplug_probe(struct platform_device *pdev) return -EINVAL; } + /* Defer probing if the necessary adapter is not configured yet. */ + deferred_adap = i2c_get_adapter(pdata->deferred_nr); + if (!deferred_adap) + return -EPROBE_DEFER; + i2c_put_adapter(deferred_adap); + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index ef016e46544a..39d06dd1f63a 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -757,6 +757,8 @@ config TOPSTAR_LAPTOP depends on ACPI depends on INPUT select INPUT_SPARSEKMAP + select LEDS_CLASS + select NEW_LEDS ---help--- This driver adds support for hotkeys found on Topstar laptops. @@ -1174,6 +1176,7 @@ config INTEL_TELEMETRY config MLX_PLATFORM tristate "Mellanox Technologies platform support" + depends on I2C && REGMAP ---help--- This option enables system support for the Mellanox Technologies platform. The Mellanox systems provide data center networking diff --git a/drivers/platform/x86/dell-smbios-base.c b/drivers/platform/x86/dell-smbios-base.c index 2485c80a9fdd..33fb2a20458a 100644 --- a/drivers/platform/x86/dell-smbios-base.c +++ b/drivers/platform/x86/dell-smbios-base.c @@ -514,7 +514,7 @@ static int build_tokens_sysfs(struct platform_device *dev) continue; loop_fail_create_value: - kfree(value_name); + kfree(location_name); goto out_unwind_strings; } smbios_attribute_group.attrs = token_attrs; @@ -525,7 +525,7 @@ loop_fail_create_value: return 0; out_unwind_strings: - for (i = i-1; i > 0; i--) { + while (i--) { kfree(token_location_attrs[i].attr.name); kfree(token_value_attrs[i].attr.name); } diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index 2cfbd3fa5136..cd95b6f3a064 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -53,6 +53,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/acpi.h> +#include <linux/bitops.h> #include <linux/dmi.h> #include <linux/backlight.h> #include <linux/fb.h> @@ -61,12 +62,11 @@ #include <linux/kfifo.h> #include <linux/leds.h> #include <linux/platform_device.h> -#include <linux/slab.h> #include <acpi/video.h> -#define FUJITSU_DRIVER_VERSION "0.6.0" +#define FUJITSU_DRIVER_VERSION "0.6.0" -#define FUJITSU_LCD_N_LEVELS 8 +#define FUJITSU_LCD_N_LEVELS 8 #define ACPI_FUJITSU_CLASS "fujitsu" #define ACPI_FUJITSU_BL_HID "FUJ02B1" @@ -76,41 +76,51 @@ #define ACPI_FUJITSU_LAPTOP_DRIVER_NAME "Fujitsu laptop FUJ02E3 ACPI hotkeys driver" #define ACPI_FUJITSU_LAPTOP_DEVICE_NAME "Fujitsu FUJ02E3" -#define ACPI_FUJITSU_NOTIFY_CODE1 0x80 +#define ACPI_FUJITSU_NOTIFY_CODE 0x80 /* FUNC interface - command values */ -#define FUNC_FLAGS 0x1000 -#define FUNC_LEDS 0x1001 -#define FUNC_BUTTONS 0x1002 -#define FUNC_BACKLIGHT 0x1004 +#define FUNC_FLAGS BIT(12) +#define FUNC_LEDS (BIT(12) | BIT(0)) +#define FUNC_BUTTONS (BIT(12) | BIT(1)) +#define FUNC_BACKLIGHT (BIT(12) | BIT(2)) /* FUNC interface - responses */ -#define UNSUPPORTED_CMD 0x80000000 +#define UNSUPPORTED_CMD 0x80000000 /* FUNC interface - status flags */ -#define FLAG_RFKILL 0x020 -#define FLAG_LID 0x100 -#define FLAG_DOCK 0x200 +#define FLAG_RFKILL BIT(5) +#define FLAG_LID BIT(8) +#define FLAG_DOCK BIT(9) /* FUNC interface - LED control */ -#define FUNC_LED_OFF 0x1 -#define FUNC_LED_ON 0x30001 -#define KEYBOARD_LAMPS 0x100 -#define LOGOLAMP_POWERON 0x2000 -#define LOGOLAMP_ALWAYS 0x4000 -#define RADIO_LED_ON 0x20 -#define ECO_LED 0x10000 -#define ECO_LED_ON 0x80000 - -/* Hotkey details */ -#define KEY1_CODE 0x410 /* codes for the keys in the GIRB register */ -#define KEY2_CODE 0x411 -#define KEY3_CODE 0x412 -#define KEY4_CODE 0x413 -#define KEY5_CODE 0x420 - -#define MAX_HOTKEY_RINGBUFFER_SIZE 100 -#define RINGBUFFERSIZE 40 +#define FUNC_LED_OFF BIT(0) +#define FUNC_LED_ON (BIT(0) | BIT(16) | BIT(17)) +#define LOGOLAMP_POWERON BIT(13) +#define LOGOLAMP_ALWAYS BIT(14) +#define KEYBOARD_LAMPS BIT(8) +#define RADIO_LED_ON BIT(5) +#define ECO_LED BIT(16) +#define ECO_LED_ON BIT(19) + +/* FUNC interface - backlight power control */ +#define BACKLIGHT_PARAM_POWER BIT(2) +#define BACKLIGHT_OFF (BIT(0) | BIT(1)) +#define BACKLIGHT_ON 0 + +/* Scancodes read from the GIRB register */ +#define KEY1_CODE 0x410 +#define KEY2_CODE 0x411 +#define KEY3_CODE 0x412 +#define KEY4_CODE 0x413 +#define KEY5_CODE 0x420 + +/* Hotkey ringbuffer limits */ +#define MAX_HOTKEY_RINGBUFFER_SIZE 100 +#define RINGBUFFERSIZE 40 + +/* Module parameters */ +static int use_alt_lcd_levels = -1; +static bool disable_brightness_adjust; /* Device controlling the backlight and associated keys */ struct fujitsu_bl { @@ -122,8 +132,6 @@ struct fujitsu_bl { }; static struct fujitsu_bl *fujitsu_bl; -static int use_alt_lcd_levels = -1; -static bool disable_brightness_adjust; /* Device used to access hotkeys and other features on the laptop */ struct fujitsu_laptop { @@ -256,9 +264,11 @@ static int bl_update_status(struct backlight_device *b) if (fext) { if (b->props.power == FB_BLANK_POWERDOWN) - call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x3); + call_fext_func(fext, FUNC_BACKLIGHT, 0x1, + BACKLIGHT_PARAM_POWER, BACKLIGHT_OFF); else - call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x0); + call_fext_func(fext, FUNC_BACKLIGHT, 0x1, + BACKLIGHT_PARAM_POWER, BACKLIGHT_ON); } return set_lcd_level(device, b->props.brightness); @@ -385,7 +395,7 @@ static int fujitsu_backlight_register(struct acpi_device *device) static int acpi_fujitsu_bl_add(struct acpi_device *device) { struct fujitsu_bl *priv; - int error; + int ret; if (acpi_video_get_backlight_type() != acpi_backlight_vendor) return -ENODEV; @@ -399,10 +409,6 @@ static int acpi_fujitsu_bl_add(struct acpi_device *device) strcpy(acpi_device_class(device), ACPI_FUJITSU_CLASS); device->driver_data = priv; - error = acpi_fujitsu_bl_input_setup(device); - if (error) - return error; - pr_info("ACPI: %s [%s]\n", acpi_device_name(device), acpi_device_bid(device)); @@ -410,11 +416,11 @@ static int acpi_fujitsu_bl_add(struct acpi_device *device) priv->max_brightness = FUJITSU_LCD_N_LEVELS; get_lcd_level(device); - error = fujitsu_backlight_register(device); - if (error) - return error; + ret = acpi_fujitsu_bl_input_setup(device); + if (ret) + return ret; - return 0; + return fujitsu_backlight_register(device); } /* Brightness notify */ @@ -424,7 +430,7 @@ static void acpi_fujitsu_bl_notify(struct acpi_device *device, u32 event) struct fujitsu_bl *priv = acpi_driver_data(device); int oldb, newb; - if (event != ACPI_FUJITSU_NOTIFY_CODE1) { + if (event != ACPI_FUJITSU_NOTIFY_CODE) { acpi_handle_info(device->handle, "unsupported event [0x%x]\n", event); sparse_keymap_report_event(priv->input, -1, 1, true); @@ -455,7 +461,9 @@ static const struct key_entry keymap_default[] = { { KE_KEY, KEY3_CODE, { KEY_PROG3 } }, { KE_KEY, KEY4_CODE, { KEY_PROG4 } }, { KE_KEY, KEY5_CODE, { KEY_RFKILL } }, + { KE_KEY, BIT(5), { KEY_RFKILL } }, { KE_KEY, BIT(26), { KEY_TOUCHPAD_TOGGLE } }, + { KE_KEY, BIT(29), { KEY_MICMUTE } }, { KE_END, 0 } }; @@ -693,7 +701,7 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device) { struct fujitsu_laptop *priv = acpi_driver_data(device); struct led_classdev *led; - int result; + int ret; if (call_fext_func(device, FUNC_LEDS, 0x0, 0x0, 0x0) & LOGOLAMP_POWERON) { @@ -704,9 +712,9 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device) led->name = "fujitsu::logolamp"; led->brightness_set_blocking = logolamp_set; led->brightness_get = logolamp_get; - result = devm_led_classdev_register(&device->dev, led); - if (result) - return result; + ret = devm_led_classdev_register(&device->dev, led); + if (ret) + return ret; } if ((call_fext_func(device, @@ -719,9 +727,9 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device) led->name = "fujitsu::kblamps"; led->brightness_set_blocking = kblamps_set; led->brightness_get = kblamps_get; - result = devm_led_classdev_register(&device->dev, led); - if (result) - return result; + ret = devm_led_classdev_register(&device->dev, led); + if (ret) + return ret; } /* @@ -742,9 +750,9 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device) led->brightness_set_blocking = radio_led_set; led->brightness_get = radio_led_get; led->default_trigger = "rfkill-any"; - result = devm_led_classdev_register(&device->dev, led); - if (result) - return result; + ret = devm_led_classdev_register(&device->dev, led); + if (ret) + return ret; } /* Support for eco led is not always signaled in bit corresponding @@ -762,9 +770,9 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device) led->name = "fujitsu::eco_led"; led->brightness_set_blocking = eco_led_set; led->brightness_get = eco_led_get; - result = devm_led_classdev_register(&device->dev, led); - if (result) - return result; + ret = devm_led_classdev_register(&device->dev, led); + if (ret) + return ret; } return 0; @@ -773,8 +781,7 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device) static int acpi_fujitsu_laptop_add(struct acpi_device *device) { struct fujitsu_laptop *priv; - int error; - int i; + int ret, i = 0; priv = devm_kzalloc(&device->dev, sizeof(*priv), GFP_KERNEL); if (!priv) @@ -789,23 +796,16 @@ static int acpi_fujitsu_laptop_add(struct acpi_device *device) /* kfifo */ spin_lock_init(&priv->fifo_lock); - error = kfifo_alloc(&priv->fifo, RINGBUFFERSIZE * sizeof(int), - GFP_KERNEL); - if (error) { - pr_err("kfifo_alloc failed\n"); - goto err_stop; - } - - error = acpi_fujitsu_laptop_input_setup(device); - if (error) - goto err_free_fifo; + ret = kfifo_alloc(&priv->fifo, RINGBUFFERSIZE * sizeof(int), + GFP_KERNEL); + if (ret) + return ret; pr_info("ACPI: %s [%s]\n", acpi_device_name(device), acpi_device_bid(device)); - i = 0; - while (call_fext_func(device, FUNC_BUTTONS, 0x1, 0x0, 0x0) != 0 - && (i++) < MAX_HOTKEY_RINGBUFFER_SIZE) + while (call_fext_func(device, FUNC_BUTTONS, 0x1, 0x0, 0x0) != 0 && + i++ < MAX_HOTKEY_RINGBUFFER_SIZE) ; /* No action, result is discarded */ acpi_handle_debug(device->handle, "Discarded %i ringbuffer entries\n", i); @@ -829,26 +829,31 @@ static int acpi_fujitsu_laptop_add(struct acpi_device *device) /* Sync backlight power status */ if (fujitsu_bl && fujitsu_bl->bl_device && acpi_video_get_backlight_type() == acpi_backlight_vendor) { - if (call_fext_func(fext, FUNC_BACKLIGHT, 0x2, 0x4, 0x0) == 3) + if (call_fext_func(fext, FUNC_BACKLIGHT, 0x2, + BACKLIGHT_PARAM_POWER, 0x0) == BACKLIGHT_OFF) fujitsu_bl->bl_device->props.power = FB_BLANK_POWERDOWN; else fujitsu_bl->bl_device->props.power = FB_BLANK_UNBLANK; } - error = acpi_fujitsu_laptop_leds_register(device); - if (error) + ret = acpi_fujitsu_laptop_input_setup(device); + if (ret) + goto err_free_fifo; + + ret = acpi_fujitsu_laptop_leds_register(device); + if (ret) goto err_free_fifo; - error = fujitsu_laptop_platform_add(device); - if (error) + ret = fujitsu_laptop_platform_add(device); + if (ret) goto err_free_fifo; return 0; err_free_fifo: kfifo_free(&priv->fifo); -err_stop: - return error; + + return ret; } static int acpi_fujitsu_laptop_remove(struct acpi_device *device) @@ -865,11 +870,11 @@ static int acpi_fujitsu_laptop_remove(struct acpi_device *device) static void acpi_fujitsu_laptop_press(struct acpi_device *device, int scancode) { struct fujitsu_laptop *priv = acpi_driver_data(device); - int status; + int ret; - status = kfifo_in_locked(&priv->fifo, (unsigned char *)&scancode, - sizeof(scancode), &priv->fifo_lock); - if (status != sizeof(scancode)) { + ret = kfifo_in_locked(&priv->fifo, (unsigned char *)&scancode, + sizeof(scancode), &priv->fifo_lock); + if (ret != sizeof(scancode)) { dev_info(&priv->input->dev, "Could not push scancode [0x%x]\n", scancode); return; @@ -882,13 +887,12 @@ static void acpi_fujitsu_laptop_press(struct acpi_device *device, int scancode) static void acpi_fujitsu_laptop_release(struct acpi_device *device) { struct fujitsu_laptop *priv = acpi_driver_data(device); - int scancode, status; + int scancode, ret; while (true) { - status = kfifo_out_locked(&priv->fifo, - (unsigned char *)&scancode, - sizeof(scancode), &priv->fifo_lock); - if (status != sizeof(scancode)) + ret = kfifo_out_locked(&priv->fifo, (unsigned char *)&scancode, + sizeof(scancode), &priv->fifo_lock); + if (ret != sizeof(scancode)) return; sparse_keymap_report_event(priv->input, scancode, 0, false); dev_dbg(&priv->input->dev, @@ -899,10 +903,10 @@ static void acpi_fujitsu_laptop_release(struct acpi_device *device) static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event) { struct fujitsu_laptop *priv = acpi_driver_data(device); - int scancode, i = 0; + int scancode, i = 0, ret; unsigned int irb; - if (event != ACPI_FUJITSU_NOTIFY_CODE1) { + if (event != ACPI_FUJITSU_NOTIFY_CODE) { acpi_handle_info(device->handle, "Unsupported event [0x%x]\n", event); sparse_keymap_report_event(priv->input, -1, 1, true); @@ -930,9 +934,18 @@ static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event) * E736/E746/E756), the touchpad toggle hotkey (Fn+F4) is * handled in software; its state is queried using FUNC_FLAGS */ - if ((priv->flags_supported & BIT(26)) && - (call_fext_func(device, FUNC_FLAGS, 0x1, 0x0, 0x0) & BIT(26))) - sparse_keymap_report_event(priv->input, BIT(26), 1, true); + if (priv->flags_supported & (BIT(5) | BIT(26) | BIT(29))) { + ret = call_fext_func(device, FUNC_FLAGS, 0x1, 0x0, 0x0); + if (ret & BIT(5)) + sparse_keymap_report_event(priv->input, + BIT(5), 1, true); + if (ret & BIT(26)) + sparse_keymap_report_event(priv->input, + BIT(26), 1, true); + if (ret & BIT(29)) + sparse_keymap_report_event(priv->input, + BIT(29), 1, true); + } } /* Initialization */ diff --git a/drivers/platform/x86/gpd-pocket-fan.c b/drivers/platform/x86/gpd-pocket-fan.c index 2d645c505f81..be85ed966bf3 100644 --- a/drivers/platform/x86/gpd-pocket-fan.c +++ b/drivers/platform/x86/gpd-pocket-fan.c @@ -19,12 +19,12 @@ static int temp_limits[3] = { 55000, 60000, 65000 }; module_param_array(temp_limits, int, NULL, 0444); MODULE_PARM_DESC(temp_limits, - "Milli-celcius values above which the fan speed increases"); + "Millicelsius values above which the fan speed increases"); static int hysteresis = 3000; module_param(hysteresis, int, 0444); MODULE_PARM_DESC(hysteresis, - "Hysteresis in milli-celcius before lowering the fan speed"); + "Hysteresis in millicelsius before lowering the fan speed"); static int speed_on_ac = 2; module_param(speed_on_ac, int, 0444); diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c index 5e3df194723e..b5adba227783 100644 --- a/drivers/platform/x86/intel-hid.c +++ b/drivers/platform/x86/intel-hid.c @@ -16,16 +16,14 @@ * */ +#include <linux/acpi.h> +#include <linux/dmi.h> +#include <linux/input.h> +#include <linux/input/sparse-keymap.h> #include <linux/kernel.h> #include <linux/module.h> -#include <linux/init.h> -#include <linux/input.h> #include <linux/platform_device.h> -#include <linux/input/sparse-keymap.h> -#include <linux/acpi.h> #include <linux/suspend.h> -#include <acpi/acpi_bus.h> -#include <linux/dmi.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Alex Hung"); @@ -67,8 +65,8 @@ static const struct key_entry intel_array_keymap[] = { { KE_IGNORE, 0xC5, { KEY_VOLUMEUP } }, /* Release */ { KE_KEY, 0xC6, { KEY_VOLUMEDOWN } }, /* Press */ { KE_IGNORE, 0xC7, { KEY_VOLUMEDOWN } }, /* Release */ - { KE_SW, 0xC8, { .sw = { SW_ROTATE_LOCK, 1 } } }, /* Press */ - { KE_SW, 0xC9, { .sw = { SW_ROTATE_LOCK, 0 } } }, /* Release */ + { KE_KEY, 0xC8, { KEY_ROTATE_LOCK_TOGGLE } }, /* Press */ + { KE_IGNORE, 0xC9, { KEY_ROTATE_LOCK_TOGGLE } }, /* Release */ { KE_KEY, 0xCE, { KEY_POWER } }, /* Press */ { KE_IGNORE, 0xCF, { KEY_POWER } }, /* Release */ { KE_END }, diff --git a/drivers/platform/x86/intel_turbo_max_3.c b/drivers/platform/x86/intel_turbo_max_3.c index d4ea01805879..a6d5aa0c3c47 100644 --- a/drivers/platform/x86/intel_turbo_max_3.c +++ b/drivers/platform/x86/intel_turbo_max_3.c @@ -138,9 +138,6 @@ static int __init itmt_legacy_init(void) if (!id) return -ENODEV; - if (boot_cpu_has(X86_FEATURE_HWP)) - return -ENODEV; - ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "platform/x86/turbo_max_3:online", itmt_legacy_cpu_online, NULL); diff --git a/drivers/platform/x86/mlx-platform.c b/drivers/platform/x86/mlx-platform.c index 454e14f02285..7a0bd24c1ae2 100644 --- a/drivers/platform/x86/mlx-platform.c +++ b/drivers/platform/x86/mlx-platform.c @@ -85,6 +85,15 @@ #define MLXPLAT_CPLD_FAN_MASK GENMASK(3, 0) #define MLXPLAT_CPLD_FAN_NG_MASK GENMASK(5, 0) +/* Default I2C parent bus number */ +#define MLXPLAT_CPLD_PHYS_ADAPTER_DEF_NR 1 + +/* Maximum number of possible physical buses equipped on system */ +#define MLXPLAT_CPLD_MAX_PHYS_ADAPTER_NUM 16 + +/* Number of channels in group */ +#define MLXPLAT_CPLD_GRP_CHNL_NUM 8 + /* Start channel numbers */ #define MLXPLAT_CPLD_CH1 2 #define MLXPLAT_CPLD_CH2 10 @@ -124,7 +133,7 @@ static const struct resource mlxplat_lpc_resources[] = { }; /* Platform default channels */ -static const int mlxplat_default_channels[][8] = { +static const int mlxplat_default_channels[][MLXPLAT_CPLD_GRP_CHNL_NUM] = { { MLXPLAT_CPLD_CH1, MLXPLAT_CPLD_CH1 + 1, MLXPLAT_CPLD_CH1 + 2, MLXPLAT_CPLD_CH1 + 3, MLXPLAT_CPLD_CH1 + 4, MLXPLAT_CPLD_CH1 + @@ -694,6 +703,8 @@ static int __init mlxplat_dmi_default_matched(const struct dmi_system_id *dmi) ARRAY_SIZE(mlxplat_default_channels[i]); } mlxplat_hotplug = &mlxplat_mlxcpld_default_data; + mlxplat_hotplug->deferred_nr = + mlxplat_default_channels[i - 1][MLXPLAT_CPLD_GRP_CHNL_NUM - 1]; return 1; }; @@ -708,6 +719,8 @@ static int __init mlxplat_dmi_msn21xx_matched(const struct dmi_system_id *dmi) ARRAY_SIZE(mlxplat_msn21xx_channels); } mlxplat_hotplug = &mlxplat_mlxcpld_msn21xx_data; + mlxplat_hotplug->deferred_nr = + mlxplat_msn21xx_channels[MLXPLAT_CPLD_GRP_CHNL_NUM - 1]; return 1; }; @@ -722,6 +735,8 @@ static int __init mlxplat_dmi_msn274x_matched(const struct dmi_system_id *dmi) ARRAY_SIZE(mlxplat_msn21xx_channels); } mlxplat_hotplug = &mlxplat_mlxcpld_msn274x_data; + mlxplat_hotplug->deferred_nr = + mlxplat_msn21xx_channels[MLXPLAT_CPLD_GRP_CHNL_NUM - 1]; return 1; }; @@ -736,6 +751,8 @@ static int __init mlxplat_dmi_msn201x_matched(const struct dmi_system_id *dmi) ARRAY_SIZE(mlxplat_msn21xx_channels); } mlxplat_hotplug = &mlxplat_mlxcpld_msn201x_data; + mlxplat_hotplug->deferred_nr = + mlxplat_default_channels[i - 1][MLXPLAT_CPLD_GRP_CHNL_NUM - 1]; return 1; }; @@ -750,6 +767,8 @@ static int __init mlxplat_dmi_qmb7xx_matched(const struct dmi_system_id *dmi) ARRAY_SIZE(mlxplat_msn21xx_channels); } mlxplat_hotplug = &mlxplat_mlxcpld_default_ng_data; + mlxplat_hotplug->deferred_nr = + mlxplat_msn21xx_channels[MLXPLAT_CPLD_GRP_CHNL_NUM - 1]; return 1; }; @@ -830,10 +849,48 @@ static const struct dmi_system_id mlxplat_dmi_table[] __initconst = { MODULE_DEVICE_TABLE(dmi, mlxplat_dmi_table); +static int mlxplat_mlxcpld_verify_bus_topology(int *nr) +{ + struct i2c_adapter *search_adap; + int shift, i; + + /* Scan adapters from expected id to verify it is free. */ + *nr = MLXPLAT_CPLD_PHYS_ADAPTER_DEF_NR; + for (i = MLXPLAT_CPLD_PHYS_ADAPTER_DEF_NR; i < + MLXPLAT_CPLD_MAX_PHYS_ADAPTER_NUM; i++) { + search_adap = i2c_get_adapter(i); + if (search_adap) { + i2c_put_adapter(search_adap); + continue; + } + + /* Return if expected parent adapter is free. */ + if (i == MLXPLAT_CPLD_PHYS_ADAPTER_DEF_NR) + return 0; + break; + } + + /* Return with error if free id for adapter is not found. */ + if (i == MLXPLAT_CPLD_MAX_PHYS_ADAPTER_NUM) + return -ENODEV; + + /* Shift adapter ids, since expected parent adapter is not free. */ + *nr = i; + for (i = 0; i < ARRAY_SIZE(mlxplat_mux_data); i++) { + shift = *nr - mlxplat_mux_data[i].parent; + mlxplat_mux_data[i].parent = *nr; + mlxplat_mux_data[i].base_nr += shift; + if (shift > 0) + mlxplat_hotplug->shift_nr = shift; + } + + return 0; +} + static int __init mlxplat_init(void) { struct mlxplat_priv *priv; - int i, err; + int i, nr, err; if (!dmi_check_system(mlxplat_dmi_table)) return -ENODEV; @@ -853,7 +910,12 @@ static int __init mlxplat_init(void) } platform_set_drvdata(mlxplat_dev, priv); - priv->pdev_i2c = platform_device_register_simple("i2c_mlxcpld", -1, + err = mlxplat_mlxcpld_verify_bus_topology(&nr); + if (nr < 0) + goto fail_alloc; + + nr = (nr == MLXPLAT_CPLD_MAX_PHYS_ADAPTER_NUM) ? -1 : nr; + priv->pdev_i2c = platform_device_register_simple("i2c_mlxcpld", nr, NULL, 0); if (IS_ERR(priv->pdev_i2c)) { err = PTR_ERR(priv->pdev_i2c); diff --git a/drivers/platform/x86/silead_dmi.c b/drivers/platform/x86/silead_dmi.c index 3a624090191d..452aacabaa8e 100644 --- a/drivers/platform/x86/silead_dmi.c +++ b/drivers/platform/x86/silead_dmi.c @@ -446,6 +446,23 @@ static const struct dmi_system_id silead_ts_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "X3 Plus"), }, }, + { + /* I.T.Works TW701 */ + .driver_data = (void *)&surftab_wintron70_st70416_6_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Insyde"), + DMI_MATCH(DMI_PRODUCT_NAME, "i71c"), + DMI_MATCH(DMI_BIOS_VERSION, "itWORKS.G.WI71C.JGBMRB"), + }, + }, + { + /* Yours Y8W81, same case and touchscreen as Chuwi Vi8 */ + .driver_data = (void *)&chuwi_vi8_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "YOURS"), + DMI_MATCH(DMI_PRODUCT_NAME, "Y8W81"), + }, + }, { }, }; diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 1c57ee2b6d19..da1ca4856ea1 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -8703,16 +8703,24 @@ static const struct attribute_group fan_attr_group = { .ec = TPID(__id1, __id2), \ .quirks = __quirks } +#define TPACPI_FAN_QB(__id1, __id2, __quirks) \ + { .vendor = PCI_VENDOR_ID_LENOVO, \ + .bios = TPID(__id1, __id2), \ + .ec = TPACPI_MATCH_ANY, \ + .quirks = __quirks } + static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_FAN_QI('1', 'Y', TPACPI_FAN_Q1), TPACPI_FAN_QI('7', '8', TPACPI_FAN_Q1), TPACPI_FAN_QI('7', '6', TPACPI_FAN_Q1), TPACPI_FAN_QI('7', '0', TPACPI_FAN_Q1), TPACPI_FAN_QL('7', 'M', TPACPI_FAN_2FAN), + TPACPI_FAN_QB('N', '1', TPACPI_FAN_2FAN), }; #undef TPACPI_FAN_QL #undef TPACPI_FAN_QI +#undef TPACPI_FAN_QB static int __init fan_init(struct ibm_init_struct *iibm) { diff --git a/drivers/platform/x86/topstar-laptop.c b/drivers/platform/x86/topstar-laptop.c index 1032c00b907b..f7761d98c0fd 100644 --- a/drivers/platform/x86/topstar-laptop.c +++ b/drivers/platform/x86/topstar-laptop.c @@ -1,14 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * ACPI driver for Topstar notebooks (hotkeys support only) + * Topstar Laptop ACPI Extras driver * * Copyright (c) 2009 Herton Ronaldo Krzesinski <herton@mandriva.com.br> + * Copyright (c) 2018 Guillaume Douézan-Grard * * Implementation inspired by existing x86 platform drivers, in special - * asus/eepc/fujitsu-laptop, thanks to their authors - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. + * asus/eepc/fujitsu-laptop, thanks to their authors. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -18,15 +16,93 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/acpi.h> +#include <linux/dmi.h> #include <linux/input.h> #include <linux/input/sparse-keymap.h> +#include <linux/leds.h> +#include <linux/platform_device.h> -#define ACPI_TOPSTAR_CLASS "topstar" +#define TOPSTAR_LAPTOP_CLASS "topstar" -struct topstar_hkey { - struct input_dev *inputdev; +struct topstar_laptop { + struct acpi_device *device; + struct platform_device *platform; + struct input_dev *input; + struct led_classdev led; }; +/* + * LED + */ + +static enum led_brightness topstar_led_get(struct led_classdev *led) +{ + return led->brightness; +} + +static int topstar_led_set(struct led_classdev *led, + enum led_brightness state) +{ + struct topstar_laptop *topstar = container_of(led, + struct topstar_laptop, led); + + struct acpi_object_list params; + union acpi_object in_obj; + unsigned long long int ret; + acpi_status status; + + params.count = 1; + params.pointer = &in_obj; + in_obj.type = ACPI_TYPE_INTEGER; + in_obj.integer.value = 0x83; + + /* + * Topstar ACPI returns 0x30001 when the LED is ON and 0x30000 when it + * is OFF. + */ + status = acpi_evaluate_integer(topstar->device->handle, + "GETX", ¶ms, &ret); + if (ACPI_FAILURE(status)) + return -1; + + /* + * FNCX(0x83) toggles the LED (more precisely, it is supposed to + * act as an hardware switch and disconnect the WLAN adapter but + * it seems to be faulty on some models like the Topstar U931 + * Notebook). + */ + if ((ret == 0x30001 && state == LED_OFF) + || (ret == 0x30000 && state != LED_OFF)) { + status = acpi_execute_simple_method(topstar->device->handle, + "FNCX", 0x83); + if (ACPI_FAILURE(status)) + return -1; + } + + return 0; +} + +static int topstar_led_init(struct topstar_laptop *topstar) +{ + topstar->led = (struct led_classdev) { + .default_trigger = "rfkill0", + .brightness_get = topstar_led_get, + .brightness_set_blocking = topstar_led_set, + .name = TOPSTAR_LAPTOP_CLASS "::wlan", + }; + + return led_classdev_register(&topstar->platform->dev, &topstar->led); +} + +static void topstar_led_exit(struct topstar_laptop *topstar) +{ + led_classdev_unregister(&topstar->led); +} + +/* + * Input + */ + static const struct key_entry topstar_keymap[] = { { KE_KEY, 0x80, { KEY_BRIGHTNESSUP } }, { KE_KEY, 0x81, { KEY_BRIGHTNESSDOWN } }, @@ -57,107 +133,217 @@ static const struct key_entry topstar_keymap[] = { { KE_END, 0 } }; -static void acpi_topstar_notify(struct acpi_device *device, u32 event) +static void topstar_input_notify(struct topstar_laptop *topstar, int event) { - static bool dup_evnt[2]; - bool *dup; - struct topstar_hkey *hkey = acpi_driver_data(device); - - /* 0x83 and 0x84 key events comes duplicated... */ - if (event == 0x83 || event == 0x84) { - dup = &dup_evnt[event - 0x83]; - if (*dup) { - *dup = false; - return; - } - *dup = true; - } - - if (!sparse_keymap_report_event(hkey->inputdev, event, 1, true)) + if (!sparse_keymap_report_event(topstar->input, event, 1, true)) pr_info("unknown event = 0x%02x\n", event); } -static int acpi_topstar_fncx_switch(struct acpi_device *device, bool state) -{ - acpi_status status; - - status = acpi_execute_simple_method(device->handle, "FNCX", - state ? 0x86 : 0x87); - if (ACPI_FAILURE(status)) { - pr_err("Unable to switch FNCX notifications\n"); - return -ENODEV; - } - - return 0; -} - -static int acpi_topstar_init_hkey(struct topstar_hkey *hkey) +static int topstar_input_init(struct topstar_laptop *topstar) { struct input_dev *input; - int error; + int err; input = input_allocate_device(); if (!input) return -ENOMEM; input->name = "Topstar Laptop extra buttons"; - input->phys = "topstar/input0"; + input->phys = TOPSTAR_LAPTOP_CLASS "/input0"; input->id.bustype = BUS_HOST; + input->dev.parent = &topstar->platform->dev; - error = sparse_keymap_setup(input, topstar_keymap, NULL); - if (error) { + err = sparse_keymap_setup(input, topstar_keymap, NULL); + if (err) { pr_err("Unable to setup input device keymap\n"); goto err_free_dev; } - error = input_register_device(input); - if (error) { + err = input_register_device(input); + if (err) { pr_err("Unable to register input device\n"); goto err_free_dev; } - hkey->inputdev = input; + topstar->input = input; return 0; - err_free_dev: +err_free_dev: input_free_device(input); - return error; + return err; } -static int acpi_topstar_add(struct acpi_device *device) +static void topstar_input_exit(struct topstar_laptop *topstar) { - struct topstar_hkey *tps_hkey; + input_unregister_device(topstar->input); +} - tps_hkey = kzalloc(sizeof(struct topstar_hkey), GFP_KERNEL); - if (!tps_hkey) +/* + * Platform + */ + +static struct platform_driver topstar_platform_driver = { + .driver = { + .name = TOPSTAR_LAPTOP_CLASS, + }, +}; + +static int topstar_platform_init(struct topstar_laptop *topstar) +{ + int err; + + topstar->platform = platform_device_alloc(TOPSTAR_LAPTOP_CLASS, -1); + if (!topstar->platform) return -ENOMEM; - strcpy(acpi_device_name(device), "Topstar TPSACPI"); - strcpy(acpi_device_class(device), ACPI_TOPSTAR_CLASS); + platform_set_drvdata(topstar->platform, topstar); + + err = platform_device_add(topstar->platform); + if (err) + goto err_device_put; + + return 0; - if (acpi_topstar_fncx_switch(device, true)) - goto add_err; +err_device_put: + platform_device_put(topstar->platform); + return err; +} + +static void topstar_platform_exit(struct topstar_laptop *topstar) +{ + platform_device_unregister(topstar->platform); +} + +/* + * ACPI + */ + +static int topstar_acpi_fncx_switch(struct acpi_device *device, bool state) +{ + acpi_status status; + u64 arg = state ? 0x86 : 0x87; - if (acpi_topstar_init_hkey(tps_hkey)) - goto add_err; + status = acpi_execute_simple_method(device->handle, "FNCX", arg); + if (ACPI_FAILURE(status)) { + pr_err("Unable to switch FNCX notifications\n"); + return -ENODEV; + } - device->driver_data = tps_hkey; return 0; +} -add_err: - kfree(tps_hkey); - return -ENODEV; +static void topstar_acpi_notify(struct acpi_device *device, u32 event) +{ + struct topstar_laptop *topstar = acpi_driver_data(device); + static bool dup_evnt[2]; + bool *dup; + + /* 0x83 and 0x84 key events comes duplicated... */ + if (event == 0x83 || event == 0x84) { + dup = &dup_evnt[event - 0x83]; + if (*dup) { + *dup = false; + return; + } + *dup = true; + } + + topstar_input_notify(topstar, event); } -static int acpi_topstar_remove(struct acpi_device *device) +static int topstar_acpi_init(struct topstar_laptop *topstar) { - struct topstar_hkey *tps_hkey = acpi_driver_data(device); + return topstar_acpi_fncx_switch(topstar->device, true); +} - acpi_topstar_fncx_switch(device, false); +static void topstar_acpi_exit(struct topstar_laptop *topstar) +{ + topstar_acpi_fncx_switch(topstar->device, false); +} - input_unregister_device(tps_hkey->inputdev); - kfree(tps_hkey); +/* + * Enable software-based WLAN LED control on systems with defective + * hardware switch. + */ +static bool led_workaround; +static int dmi_led_workaround(const struct dmi_system_id *id) +{ + led_workaround = true; + return 0; +} + +static const struct dmi_system_id topstar_dmi_ids[] = { + { + .callback = dmi_led_workaround, + .ident = "Topstar U931/RVP7", + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "U931"), + DMI_MATCH(DMI_BOARD_VERSION, "RVP7"), + }, + }, + {} +}; + +static int topstar_acpi_add(struct acpi_device *device) +{ + struct topstar_laptop *topstar; + int err; + + dmi_check_system(topstar_dmi_ids); + + topstar = kzalloc(sizeof(struct topstar_laptop), GFP_KERNEL); + if (!topstar) + return -ENOMEM; + + strcpy(acpi_device_name(device), "Topstar TPSACPI"); + strcpy(acpi_device_class(device), TOPSTAR_LAPTOP_CLASS); + device->driver_data = topstar; + topstar->device = device; + + err = topstar_acpi_init(topstar); + if (err) + goto err_free; + + err = topstar_platform_init(topstar); + if (err) + goto err_acpi_exit; + + err = topstar_input_init(topstar); + if (err) + goto err_platform_exit; + + if (led_workaround) { + err = topstar_led_init(topstar); + if (err) + goto err_input_exit; + } + + return 0; + +err_input_exit: + topstar_input_exit(topstar); +err_platform_exit: + topstar_platform_exit(topstar); +err_acpi_exit: + topstar_acpi_exit(topstar); +err_free: + kfree(topstar); + return err; +} + +static int topstar_acpi_remove(struct acpi_device *device) +{ + struct topstar_laptop *topstar = acpi_driver_data(device); + + if (led_workaround) + topstar_led_exit(topstar); + + topstar_input_exit(topstar); + topstar_platform_exit(topstar); + topstar_acpi_exit(topstar); + + kfree(topstar); return 0; } @@ -168,18 +354,47 @@ static const struct acpi_device_id topstar_device_ids[] = { }; MODULE_DEVICE_TABLE(acpi, topstar_device_ids); -static struct acpi_driver acpi_topstar_driver = { +static struct acpi_driver topstar_acpi_driver = { .name = "Topstar laptop ACPI driver", - .class = ACPI_TOPSTAR_CLASS, + .class = TOPSTAR_LAPTOP_CLASS, .ids = topstar_device_ids, .ops = { - .add = acpi_topstar_add, - .remove = acpi_topstar_remove, - .notify = acpi_topstar_notify, + .add = topstar_acpi_add, + .remove = topstar_acpi_remove, + .notify = topstar_acpi_notify, }, }; -module_acpi_driver(acpi_topstar_driver); + +static int __init topstar_laptop_init(void) +{ + int ret; + + ret = platform_driver_register(&topstar_platform_driver); + if (ret < 0) + return ret; + + ret = acpi_bus_register_driver(&topstar_acpi_driver); + if (ret < 0) + goto err_driver_unreg; + + pr_info("ACPI extras driver loaded\n"); + return 0; + +err_driver_unreg: + platform_driver_unregister(&topstar_platform_driver); + return ret; +} + +static void __exit topstar_laptop_exit(void) +{ + acpi_bus_unregister_driver(&topstar_acpi_driver); + platform_driver_unregister(&topstar_platform_driver); +} + +module_init(topstar_laptop_init); +module_exit(topstar_laptop_exit); MODULE_AUTHOR("Herton Ronaldo Krzesinski"); +MODULE_AUTHOR("Guillaume Douézan-Grard"); MODULE_DESCRIPTION("Topstar Laptop ACPI Extras driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 8796211ef24a..8e3d0146ff8c 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -130,13 +130,11 @@ static bool find_guid(const char *guid_string, struct wmi_block **out) uuid_le guid_input; struct wmi_block *wblock; struct guid_block *block; - struct list_head *p; if (uuid_le_to_bin(guid_string, &guid_input)) return false; - list_for_each(p, &wmi_block_list) { - wblock = list_entry(p, struct wmi_block, list); + list_for_each_entry(wblock, &wmi_block_list, list) { block = &wblock->gblock; if (memcmp(block->guid, &guid_input, 16) == 0) { @@ -519,7 +517,6 @@ wmi_notify_handler handler, void *data) struct wmi_block *block; acpi_status status = AE_NOT_EXIST; uuid_le guid_input; - struct list_head *p; if (!guid || !handler) return AE_BAD_PARAMETER; @@ -527,9 +524,8 @@ wmi_notify_handler handler, void *data) if (uuid_le_to_bin(guid, &guid_input)) return AE_BAD_PARAMETER; - list_for_each(p, &wmi_block_list) { + list_for_each_entry(block, &wmi_block_list, list) { acpi_status wmi_status; - block = list_entry(p, struct wmi_block, list); if (memcmp(block->gblock.guid, &guid_input, 16) == 0) { if (block->handler && @@ -560,7 +556,6 @@ acpi_status wmi_remove_notify_handler(const char *guid) struct wmi_block *block; acpi_status status = AE_NOT_EXIST; uuid_le guid_input; - struct list_head *p; if (!guid) return AE_BAD_PARAMETER; @@ -568,9 +563,8 @@ acpi_status wmi_remove_notify_handler(const char *guid) if (uuid_le_to_bin(guid, &guid_input)) return AE_BAD_PARAMETER; - list_for_each(p, &wmi_block_list) { + list_for_each_entry(block, &wmi_block_list, list) { acpi_status wmi_status; - block = list_entry(p, struct wmi_block, list); if (memcmp(block->gblock.guid, &guid_input, 16) == 0) { if (!block->handler || @@ -610,15 +604,13 @@ acpi_status wmi_get_event_data(u32 event, struct acpi_buffer *out) union acpi_object params[1]; struct guid_block *gblock; struct wmi_block *wblock; - struct list_head *p; input.count = 1; input.pointer = params; params[0].type = ACPI_TYPE_INTEGER; params[0].integer.value = event; - list_for_each(p, &wmi_block_list) { - wblock = list_entry(p, struct wmi_block, list); + list_for_each_entry(wblock, &wmi_block_list, list) { gblock = &wblock->gblock; if ((gblock->flags & ACPI_WMI_EVENT) && @@ -933,12 +925,11 @@ static int wmi_dev_probe(struct device *dev) goto probe_failure; } - buf = kmalloc(strlen(wdriver->driver.name) + 5, GFP_KERNEL); + buf = kasprintf(GFP_KERNEL, "wmi/%s", wdriver->driver.name); if (!buf) { ret = -ENOMEM; goto probe_string_failure; } - sprintf(buf, "wmi/%s", wdriver->driver.name); wblock->char_dev.minor = MISC_DYNAMIC_MINOR; wblock->char_dev.name = buf; wblock->char_dev.fops = &wmi_fops; @@ -1261,11 +1252,9 @@ static void acpi_wmi_notify_handler(acpi_handle handle, u32 event, { struct guid_block *block; struct wmi_block *wblock; - struct list_head *p; bool found_it = false; - list_for_each(p, &wmi_block_list) { - wblock = list_entry(p, struct wmi_block, list); + list_for_each_entry(wblock, &wmi_block_list, list) { block = &wblock->gblock; if (wblock->acpi_device->handle == handle && diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig index b609e1d3654b..027274008b08 100644 --- a/drivers/remoteproc/Kconfig +++ b/drivers/remoteproc/Kconfig @@ -6,6 +6,7 @@ config REMOTEPROC select CRC32 select FW_LOADER select VIRTIO + select WANT_DEV_COREDUMP help Support for remote processors (such as DSP coprocessors). These are mainly used on embedded systems. @@ -90,6 +91,7 @@ config QCOM_ADSP_PIL depends on QCOM_SMEM depends on RPMSG_QCOM_SMD || (COMPILE_TEST && RPMSG_QCOM_SMD=n) depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n + depends on QCOM_SYSMON || QCOM_SYSMON=n select MFD_SYSCON select QCOM_MDT_LOADER select QCOM_RPROC_COMMON @@ -107,6 +109,7 @@ config QCOM_Q6V5_PIL depends on QCOM_SMEM depends on RPMSG_QCOM_SMD || (COMPILE_TEST && RPMSG_QCOM_SMD=n) depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n + depends on QCOM_SYSMON || QCOM_SYSMON=n select MFD_SYSCON select QCOM_RPROC_COMMON select QCOM_SCM @@ -114,12 +117,28 @@ config QCOM_Q6V5_PIL Say y here to support the Qualcomm Peripherial Image Loader for the Hexagon V5 based remote processors. +config QCOM_SYSMON + tristate "Qualcomm sysmon driver" + depends on RPMSG + depends on ARCH_QCOM + depends on NET + select QCOM_QMI_HELPERS + help + The sysmon driver implements a sysmon QMI client and a handler for + the sys_mon SMD and GLINK channel, which are used for graceful + shutdown, retrieving failure information and propagating information + about other subsystems being shut down. + + Say y here if your system runs firmware on any other subsystems, e.g. + modem or DSP. + config QCOM_WCNSS_PIL tristate "Qualcomm WCNSS Peripheral Image Loader" depends on OF && ARCH_QCOM depends on RPMSG_QCOM_SMD || (COMPILE_TEST && RPMSG_QCOM_SMD=n) depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n depends on QCOM_SMEM + depends on QCOM_SYSMON || QCOM_SYSMON=n select QCOM_MDT_LOADER select QCOM_RPROC_COMMON select QCOM_SCM diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile index 6e16450ce11f..02627ede8d4a 100644 --- a/drivers/remoteproc/Makefile +++ b/drivers/remoteproc/Makefile @@ -17,6 +17,7 @@ obj-$(CONFIG_KEYSTONE_REMOTEPROC) += keystone_remoteproc.o obj-$(CONFIG_QCOM_ADSP_PIL) += qcom_adsp_pil.o obj-$(CONFIG_QCOM_RPROC_COMMON) += qcom_common.o obj-$(CONFIG_QCOM_Q6V5_PIL) += qcom_q6v5_pil.o +obj-$(CONFIG_QCOM_SYSMON) += qcom_sysmon.o obj-$(CONFIG_QCOM_WCNSS_PIL) += qcom_wcnss_pil.o qcom_wcnss_pil-y += qcom_wcnss.o qcom_wcnss_pil-y += qcom_wcnss_iris.o diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c index 633268e9d550..54c07fd3f204 100644 --- a/drivers/remoteproc/imx_rproc.c +++ b/drivers/remoteproc/imx_rproc.c @@ -333,14 +333,14 @@ static int imx_rproc_probe(struct platform_device *pdev) /* set some other name then imx */ rproc = rproc_alloc(dev, "imx-rproc", &imx_rproc_ops, NULL, sizeof(*priv)); - if (!rproc) { - ret = -ENOMEM; - goto err; - } + if (!rproc) + return -ENOMEM; dcfg = of_device_get_match_data(dev); - if (!dcfg) - return -EINVAL; + if (!dcfg) { + ret = -EINVAL; + goto err_put_rproc; + } priv = rproc->priv; priv->rproc = rproc; @@ -359,8 +359,8 @@ static int imx_rproc_probe(struct platform_device *pdev) priv->clk = devm_clk_get(dev, NULL); if (IS_ERR(priv->clk)) { dev_err(dev, "Failed to get clock\n"); - rproc_free(rproc); - return PTR_ERR(priv->clk); + ret = PTR_ERR(priv->clk); + goto err_put_rproc; } /* @@ -370,8 +370,7 @@ static int imx_rproc_probe(struct platform_device *pdev) ret = clk_prepare_enable(priv->clk); if (ret) { dev_err(&rproc->dev, "Failed to enable clock\n"); - rproc_free(rproc); - return ret; + goto err_put_rproc; } ret = rproc_add(rproc); @@ -380,13 +379,13 @@ static int imx_rproc_probe(struct platform_device *pdev) goto err_put_clk; } - return ret; + return 0; err_put_clk: clk_disable_unprepare(priv->clk); err_put_rproc: rproc_free(rproc); -err: + return ret; } diff --git a/drivers/remoteproc/qcom_adsp_pil.c b/drivers/remoteproc/qcom_adsp_pil.c index 373c167892d7..89a86ce07f99 100644 --- a/drivers/remoteproc/qcom_adsp_pil.c +++ b/drivers/remoteproc/qcom_adsp_pil.c @@ -38,7 +38,10 @@ struct adsp_data { const char *firmware_name; int pas_id; bool has_aggre2_clk; + const char *ssr_name; + const char *sysmon_name; + int ssctl_id; }; struct qcom_adsp { @@ -75,6 +78,7 @@ struct qcom_adsp { struct qcom_rproc_glink glink_subdev; struct qcom_rproc_subdev smd_subdev; struct qcom_rproc_ssr ssr_subdev; + struct qcom_sysmon *sysmon; }; static int adsp_load(struct rproc *rproc, const struct firmware *fw) @@ -82,7 +86,9 @@ static int adsp_load(struct rproc *rproc, const struct firmware *fw) struct qcom_adsp *adsp = (struct qcom_adsp *)rproc->priv; return qcom_mdt_load(adsp->dev, fw, rproc->firmware, adsp->pas_id, - adsp->mem_region, adsp->mem_phys, adsp->mem_size); + adsp->mem_region, adsp->mem_phys, adsp->mem_size, + &adsp->mem_reloc); + } static int adsp_start(struct rproc *rproc) @@ -177,6 +183,7 @@ static const struct rproc_ops adsp_ops = { .start = adsp_start, .stop = adsp_stop, .da_to_va = adsp_da_to_va, + .parse_fw = qcom_register_dump_segments, .load = adsp_load, }; @@ -201,9 +208,6 @@ static irqreturn_t adsp_fatal_interrupt(int irq, void *dev) rproc_report_crash(adsp->rproc, RPROC_FATAL_ERROR); - if (!IS_ERR(msg)) - msg[0] = '\0'; - return IRQ_HANDLED; } @@ -398,6 +402,9 @@ static int adsp_probe(struct platform_device *pdev) qcom_add_glink_subdev(rproc, &adsp->glink_subdev); qcom_add_smd_subdev(rproc, &adsp->smd_subdev); qcom_add_ssr_subdev(rproc, &adsp->ssr_subdev, desc->ssr_name); + adsp->sysmon = qcom_add_sysmon_subdev(rproc, + desc->sysmon_name, + desc->ssctl_id); ret = rproc_add(rproc); if (ret) @@ -419,6 +426,7 @@ static int adsp_remove(struct platform_device *pdev) rproc_del(adsp->rproc); qcom_remove_glink_subdev(adsp->rproc, &adsp->glink_subdev); + qcom_remove_sysmon_subdev(adsp->sysmon); qcom_remove_smd_subdev(adsp->rproc, &adsp->smd_subdev); qcom_remove_ssr_subdev(adsp->rproc, &adsp->ssr_subdev); rproc_free(adsp->rproc); @@ -432,6 +440,8 @@ static const struct adsp_data adsp_resource_init = { .pas_id = 1, .has_aggre2_clk = false, .ssr_name = "lpass", + .sysmon_name = "adsp", + .ssctl_id = 0x14, }; static const struct adsp_data slpi_resource_init = { @@ -440,6 +450,8 @@ static const struct adsp_data slpi_resource_init = { .pas_id = 12, .has_aggre2_clk = true, .ssr_name = "dsps", + .sysmon_name = "slpi", + .ssctl_id = 0x16, }; static const struct of_device_id adsp_of_match[] = { diff --git a/drivers/remoteproc/qcom_common.c b/drivers/remoteproc/qcom_common.c index 00602499713f..acfc99f82fb8 100644 --- a/drivers/remoteproc/qcom_common.c +++ b/drivers/remoteproc/qcom_common.c @@ -22,6 +22,7 @@ #include <linux/remoteproc.h> #include <linux/rpmsg/qcom_glink.h> #include <linux/rpmsg/qcom_smd.h> +#include <linux/soc/qcom/mdt_loader.h> #include "remoteproc_internal.h" #include "qcom_common.h" @@ -41,7 +42,7 @@ static int glink_subdev_probe(struct rproc_subdev *subdev) return PTR_ERR_OR_ZERO(glink->edge); } -static void glink_subdev_remove(struct rproc_subdev *subdev) +static void glink_subdev_remove(struct rproc_subdev *subdev, bool crashed) { struct qcom_rproc_glink *glink = to_glink_subdev(subdev); @@ -74,11 +75,57 @@ EXPORT_SYMBOL_GPL(qcom_add_glink_subdev); */ void qcom_remove_glink_subdev(struct rproc *rproc, struct qcom_rproc_glink *glink) { + if (!glink->node) + return; + rproc_remove_subdev(rproc, &glink->subdev); of_node_put(glink->node); } EXPORT_SYMBOL_GPL(qcom_remove_glink_subdev); +/** + * qcom_register_dump_segments() - register segments for coredump + * @rproc: remoteproc handle + * @fw: firmware header + * + * Register all segments of the ELF in the remoteproc coredump segment list + * + * Return: 0 on success, negative errno on failure. + */ +int qcom_register_dump_segments(struct rproc *rproc, + const struct firmware *fw) +{ + const struct elf32_phdr *phdrs; + const struct elf32_phdr *phdr; + const struct elf32_hdr *ehdr; + int ret; + int i; + + ehdr = (struct elf32_hdr *)fw->data; + phdrs = (struct elf32_phdr *)(ehdr + 1); + + for (i = 0; i < ehdr->e_phnum; i++) { + phdr = &phdrs[i]; + + if (phdr->p_type != PT_LOAD) + continue; + + if ((phdr->p_flags & QCOM_MDT_TYPE_MASK) == QCOM_MDT_TYPE_HASH) + continue; + + if (!phdr->p_memsz) + continue; + + ret = rproc_coredump_add_segment(rproc, phdr->p_paddr, + phdr->p_memsz); + if (ret) + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(qcom_register_dump_segments); + static int smd_subdev_probe(struct rproc_subdev *subdev) { struct qcom_rproc_subdev *smd = to_smd_subdev(subdev); @@ -88,7 +135,7 @@ static int smd_subdev_probe(struct rproc_subdev *subdev) return PTR_ERR_OR_ZERO(smd->edge); } -static void smd_subdev_remove(struct rproc_subdev *subdev) +static void smd_subdev_remove(struct rproc_subdev *subdev, bool crashed) { struct qcom_rproc_subdev *smd = to_smd_subdev(subdev); @@ -121,6 +168,9 @@ EXPORT_SYMBOL_GPL(qcom_add_smd_subdev); */ void qcom_remove_smd_subdev(struct rproc *rproc, struct qcom_rproc_subdev *smd) { + if (!smd->node) + return; + rproc_remove_subdev(rproc, &smd->subdev); of_node_put(smd->node); } @@ -157,7 +207,7 @@ static int ssr_notify_start(struct rproc_subdev *subdev) return 0; } -static void ssr_notify_stop(struct rproc_subdev *subdev) +static void ssr_notify_stop(struct rproc_subdev *subdev, bool crashed) { struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev); diff --git a/drivers/remoteproc/qcom_common.h b/drivers/remoteproc/qcom_common.h index 728be9834d8b..58de71e4781c 100644 --- a/drivers/remoteproc/qcom_common.h +++ b/drivers/remoteproc/qcom_common.h @@ -4,6 +4,9 @@ #include <linux/remoteproc.h> #include "remoteproc_internal.h" +#include <linux/soc/qcom/qmi.h> + +struct qcom_sysmon; struct qcom_rproc_glink { struct rproc_subdev subdev; @@ -30,6 +33,8 @@ struct qcom_rproc_ssr { void qcom_add_glink_subdev(struct rproc *rproc, struct qcom_rproc_glink *glink); void qcom_remove_glink_subdev(struct rproc *rproc, struct qcom_rproc_glink *glink); +int qcom_register_dump_segments(struct rproc *rproc, const struct firmware *fw); + void qcom_add_smd_subdev(struct rproc *rproc, struct qcom_rproc_subdev *smd); void qcom_remove_smd_subdev(struct rproc *rproc, struct qcom_rproc_subdev *smd); @@ -37,4 +42,22 @@ void qcom_add_ssr_subdev(struct rproc *rproc, struct qcom_rproc_ssr *ssr, const char *ssr_name); void qcom_remove_ssr_subdev(struct rproc *rproc, struct qcom_rproc_ssr *ssr); +#if IS_ENABLED(CONFIG_QCOM_SYSMON) +struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc, + const char *name, + int ssctl_instance); +void qcom_remove_sysmon_subdev(struct qcom_sysmon *sysmon); +#else +static inline struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc, + const char *name, + int ssctl_instance) +{ + return NULL; +} + +static inline void qcom_remove_sysmon_subdev(struct qcom_sysmon *sysmon) +{ +} +#endif + #endif diff --git a/drivers/remoteproc/qcom_q6v5_pil.c b/drivers/remoteproc/qcom_q6v5_pil.c index b4e5e725848d..8e70a627e0bb 100644 --- a/drivers/remoteproc/qcom_q6v5_pil.c +++ b/drivers/remoteproc/qcom_q6v5_pil.c @@ -168,6 +168,7 @@ struct q6v5 { struct qcom_rproc_subdev smd_subdev; struct qcom_rproc_ssr ssr_subdev; + struct qcom_sysmon *sysmon; bool need_mem_protection; int mpss_perm; int mba_perm; @@ -939,9 +940,6 @@ static irqreturn_t q6v5_wdog_interrupt(int irq, void *dev) rproc_report_crash(qproc->rproc, RPROC_WATCHDOG); - if (!IS_ERR(msg)) - msg[0] = '\0'; - return IRQ_HANDLED; } @@ -959,9 +957,6 @@ static irqreturn_t q6v5_fatal_interrupt(int irq, void *dev) rproc_report_crash(qproc->rproc, RPROC_FATAL_ERROR); - if (!IS_ERR(msg)) - msg[0] = '\0'; - return IRQ_HANDLED; } @@ -1215,6 +1210,7 @@ static int q6v5_probe(struct platform_device *pdev) qproc->mba_perm = BIT(QCOM_SCM_VMID_HLOS); qcom_add_smd_subdev(rproc, &qproc->smd_subdev); qcom_add_ssr_subdev(rproc, &qproc->ssr_subdev, "mpss"); + qproc->sysmon = qcom_add_sysmon_subdev(rproc, "modem", 0x12); ret = rproc_add(rproc); if (ret) @@ -1234,6 +1230,7 @@ static int q6v5_remove(struct platform_device *pdev) rproc_del(qproc->rproc); + qcom_remove_sysmon_subdev(qproc->sysmon); qcom_remove_smd_subdev(qproc->rproc, &qproc->smd_subdev); qcom_remove_ssr_subdev(qproc->rproc, &qproc->ssr_subdev); rproc_free(qproc->rproc); diff --git a/drivers/remoteproc/qcom_sysmon.c b/drivers/remoteproc/qcom_sysmon.c new file mode 100644 index 000000000000..f085545d7da5 --- /dev/null +++ b/drivers/remoteproc/qcom_sysmon.c @@ -0,0 +1,579 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2017, Linaro Ltd. + */ +#include <linux/firmware.h> +#include <linux/module.h> +#include <linux/notifier.h> +#include <linux/slab.h> +#include <linux/io.h> +#include <linux/notifier.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> +#include <linux/remoteproc/qcom_rproc.h> +#include <linux/rpmsg.h> + +#include "qcom_common.h" + +static BLOCKING_NOTIFIER_HEAD(sysmon_notifiers); + +struct qcom_sysmon { + struct rproc_subdev subdev; + struct rproc *rproc; + + struct list_head node; + + const char *name; + + int ssctl_version; + int ssctl_instance; + + struct notifier_block nb; + + struct device *dev; + + struct rpmsg_endpoint *ept; + struct completion comp; + struct mutex lock; + + bool ssr_ack; + + struct qmi_handle qmi; + struct sockaddr_qrtr ssctl; +}; + +static DEFINE_MUTEX(sysmon_lock); +static LIST_HEAD(sysmon_list); + +/** + * sysmon_send_event() - send notification of other remote's SSR event + * @sysmon: sysmon context + * @name: other remote's name + */ +static void sysmon_send_event(struct qcom_sysmon *sysmon, const char *name) +{ + char req[50]; + int len; + int ret; + + len = snprintf(req, sizeof(req), "ssr:%s:before_shutdown", name); + if (len >= sizeof(req)) + return; + + mutex_lock(&sysmon->lock); + reinit_completion(&sysmon->comp); + sysmon->ssr_ack = false; + + ret = rpmsg_send(sysmon->ept, req, len); + if (ret < 0) { + dev_err(sysmon->dev, "failed to send sysmon event\n"); + goto out_unlock; + } + + ret = wait_for_completion_timeout(&sysmon->comp, + msecs_to_jiffies(5000)); + if (!ret) { + dev_err(sysmon->dev, "timeout waiting for sysmon ack\n"); + goto out_unlock; + } + + if (!sysmon->ssr_ack) + dev_err(sysmon->dev, "unexpected response to sysmon event\n"); + +out_unlock: + mutex_unlock(&sysmon->lock); +} + +/** + * sysmon_request_shutdown() - request graceful shutdown of remote + * @sysmon: sysmon context + */ +static void sysmon_request_shutdown(struct qcom_sysmon *sysmon) +{ + char *req = "ssr:shutdown"; + int ret; + + mutex_lock(&sysmon->lock); + reinit_completion(&sysmon->comp); + sysmon->ssr_ack = false; + + ret = rpmsg_send(sysmon->ept, req, strlen(req) + 1); + if (ret < 0) { + dev_err(sysmon->dev, "send sysmon shutdown request failed\n"); + goto out_unlock; + } + + ret = wait_for_completion_timeout(&sysmon->comp, + msecs_to_jiffies(5000)); + if (!ret) { + dev_err(sysmon->dev, "timeout waiting for sysmon ack\n"); + goto out_unlock; + } + + if (!sysmon->ssr_ack) + dev_err(sysmon->dev, + "unexpected response to sysmon shutdown request\n"); + +out_unlock: + mutex_unlock(&sysmon->lock); +} + +static int sysmon_callback(struct rpmsg_device *rpdev, void *data, int count, + void *priv, u32 addr) +{ + struct qcom_sysmon *sysmon = priv; + const char *ssr_ack = "ssr:ack"; + const int ssr_ack_len = strlen(ssr_ack) + 1; + + if (!sysmon) + return -EINVAL; + + if (count >= ssr_ack_len && !memcmp(data, ssr_ack, ssr_ack_len)) + sysmon->ssr_ack = true; + + complete(&sysmon->comp); + + return 0; +} + +#define SSCTL_SHUTDOWN_REQ 0x21 +#define SSCTL_SUBSYS_EVENT_REQ 0x23 + +#define SSCTL_MAX_MSG_LEN 7 + +#define SSCTL_SUBSYS_NAME_LENGTH 15 + +enum { + SSCTL_SSR_EVENT_BEFORE_POWERUP, + SSCTL_SSR_EVENT_AFTER_POWERUP, + SSCTL_SSR_EVENT_BEFORE_SHUTDOWN, + SSCTL_SSR_EVENT_AFTER_SHUTDOWN, +}; + +enum { + SSCTL_SSR_EVENT_FORCED, + SSCTL_SSR_EVENT_GRACEFUL, +}; + +struct ssctl_shutdown_resp { + struct qmi_response_type_v01 resp; +}; + +static struct qmi_elem_info ssctl_shutdown_resp_ei[] = { + { + .data_type = QMI_STRUCT, + .elem_len = 1, + .elem_size = sizeof(struct qmi_response_type_v01), + .array_type = NO_ARRAY, + .tlv_type = 0x02, + .offset = offsetof(struct ssctl_shutdown_resp, resp), + .ei_array = qmi_response_type_v01_ei, + }, + {} +}; + +struct ssctl_subsys_event_req { + u8 subsys_name_len; + char subsys_name[SSCTL_SUBSYS_NAME_LENGTH]; + u32 event; + u8 evt_driven_valid; + u32 evt_driven; +}; + +static struct qmi_elem_info ssctl_subsys_event_req_ei[] = { + { + .data_type = QMI_DATA_LEN, + .elem_len = 1, + .elem_size = sizeof(uint8_t), + .array_type = NO_ARRAY, + .tlv_type = 0x01, + .offset = offsetof(struct ssctl_subsys_event_req, + subsys_name_len), + .ei_array = NULL, + }, + { + .data_type = QMI_UNSIGNED_1_BYTE, + .elem_len = SSCTL_SUBSYS_NAME_LENGTH, + .elem_size = sizeof(char), + .array_type = VAR_LEN_ARRAY, + .tlv_type = 0x01, + .offset = offsetof(struct ssctl_subsys_event_req, + subsys_name), + .ei_array = NULL, + }, + { + .data_type = QMI_SIGNED_4_BYTE_ENUM, + .elem_len = 1, + .elem_size = sizeof(uint32_t), + .array_type = NO_ARRAY, + .tlv_type = 0x02, + .offset = offsetof(struct ssctl_subsys_event_req, + event), + .ei_array = NULL, + }, + { + .data_type = QMI_OPT_FLAG, + .elem_len = 1, + .elem_size = sizeof(uint8_t), + .array_type = NO_ARRAY, + .tlv_type = 0x10, + .offset = offsetof(struct ssctl_subsys_event_req, + evt_driven_valid), + .ei_array = NULL, + }, + { + .data_type = QMI_SIGNED_4_BYTE_ENUM, + .elem_len = 1, + .elem_size = sizeof(uint32_t), + .array_type = NO_ARRAY, + .tlv_type = 0x10, + .offset = offsetof(struct ssctl_subsys_event_req, + evt_driven), + .ei_array = NULL, + }, + {} +}; + +struct ssctl_subsys_event_resp { + struct qmi_response_type_v01 resp; +}; + +static struct qmi_elem_info ssctl_subsys_event_resp_ei[] = { + { + .data_type = QMI_STRUCT, + .elem_len = 1, + .elem_size = sizeof(struct qmi_response_type_v01), + .array_type = NO_ARRAY, + .tlv_type = 0x02, + .offset = offsetof(struct ssctl_subsys_event_resp, + resp), + .ei_array = qmi_response_type_v01_ei, + }, + {} +}; + +/** + * ssctl_request_shutdown() - request shutdown via SSCTL QMI service + * @sysmon: sysmon context + */ +static void ssctl_request_shutdown(struct qcom_sysmon *sysmon) +{ + struct ssctl_shutdown_resp resp; + struct qmi_txn txn; + int ret; + + ret = qmi_txn_init(&sysmon->qmi, &txn, ssctl_shutdown_resp_ei, &resp); + if (ret < 0) { + dev_err(sysmon->dev, "failed to allocate QMI txn\n"); + return; + } + + ret = qmi_send_request(&sysmon->qmi, &sysmon->ssctl, &txn, + SSCTL_SHUTDOWN_REQ, 0, NULL, NULL); + if (ret < 0) { + dev_err(sysmon->dev, "failed to send shutdown request\n"); + qmi_txn_cancel(&txn); + return; + } + + ret = qmi_txn_wait(&txn, 5 * HZ); + if (ret < 0) + dev_err(sysmon->dev, "failed receiving QMI response\n"); + else if (resp.resp.result) + dev_err(sysmon->dev, "shutdown request failed\n"); + else + dev_dbg(sysmon->dev, "shutdown request completed\n"); +} + +/** + * ssctl_send_event() - send notification of other remote's SSR event + * @sysmon: sysmon context + * @name: other remote's name + */ +static void ssctl_send_event(struct qcom_sysmon *sysmon, const char *name) +{ + struct ssctl_subsys_event_resp resp; + struct ssctl_subsys_event_req req; + struct qmi_txn txn; + int ret; + + memset(&resp, 0, sizeof(resp)); + ret = qmi_txn_init(&sysmon->qmi, &txn, ssctl_subsys_event_resp_ei, &resp); + if (ret < 0) { + dev_err(sysmon->dev, "failed to allocate QMI txn\n"); + return; + } + + memset(&req, 0, sizeof(req)); + strlcpy(req.subsys_name, name, sizeof(req.subsys_name)); + req.subsys_name_len = strlen(req.subsys_name); + req.event = SSCTL_SSR_EVENT_BEFORE_SHUTDOWN; + req.evt_driven_valid = true; + req.evt_driven = SSCTL_SSR_EVENT_FORCED; + + ret = qmi_send_request(&sysmon->qmi, &sysmon->ssctl, &txn, + SSCTL_SUBSYS_EVENT_REQ, 40, + ssctl_subsys_event_req_ei, &req); + if (ret < 0) { + dev_err(sysmon->dev, "failed to send shutdown request\n"); + qmi_txn_cancel(&txn); + return; + } + + ret = qmi_txn_wait(&txn, 5 * HZ); + if (ret < 0) + dev_err(sysmon->dev, "failed receiving QMI response\n"); + else if (resp.resp.result) + dev_err(sysmon->dev, "ssr event send failed\n"); + else + dev_dbg(sysmon->dev, "ssr event send completed\n"); +} + +/** + * ssctl_new_server() - QMI callback indicating a new service + * @qmi: QMI handle + * @svc: service information + * + * Return: 0 if we're interested in this service, -EINVAL otherwise. + */ +static int ssctl_new_server(struct qmi_handle *qmi, struct qmi_service *svc) +{ + struct qcom_sysmon *sysmon = container_of(qmi, struct qcom_sysmon, qmi); + + switch (svc->version) { + case 1: + if (svc->instance != 0) + return -EINVAL; + if (strcmp(sysmon->name, "modem")) + return -EINVAL; + break; + case 2: + if (svc->instance != sysmon->ssctl_instance) + return -EINVAL; + break; + default: + return -EINVAL; + }; + + sysmon->ssctl_version = svc->version; + + sysmon->ssctl.sq_family = AF_QIPCRTR; + sysmon->ssctl.sq_node = svc->node; + sysmon->ssctl.sq_port = svc->port; + + svc->priv = sysmon; + + return 0; +} + +/** + * ssctl_del_server() - QMI callback indicating that @svc is removed + * @qmi: QMI handle + * @svc: service information + */ +static void ssctl_del_server(struct qmi_handle *qmi, struct qmi_service *svc) +{ + struct qcom_sysmon *sysmon = svc->priv; + + sysmon->ssctl_version = 0; +} + +static const struct qmi_ops ssctl_ops = { + .new_server = ssctl_new_server, + .del_server = ssctl_del_server, +}; + +static int sysmon_start(struct rproc_subdev *subdev) +{ + return 0; +} + +static void sysmon_stop(struct rproc_subdev *subdev, bool crashed) +{ + struct qcom_sysmon *sysmon = container_of(subdev, struct qcom_sysmon, subdev); + + blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)sysmon->name); + + /* Don't request graceful shutdown if we've crashed */ + if (crashed) + return; + + if (sysmon->ssctl_version) + ssctl_request_shutdown(sysmon); + else if (sysmon->ept) + sysmon_request_shutdown(sysmon); +} + +/** + * sysmon_notify() - notify sysmon target of another's SSR + * @nb: notifier_block associated with sysmon instance + * @event: unused + * @data: SSR identifier of the remote that is going down + */ +static int sysmon_notify(struct notifier_block *nb, unsigned long event, + void *data) +{ + struct qcom_sysmon *sysmon = container_of(nb, struct qcom_sysmon, nb); + struct rproc *rproc = sysmon->rproc; + const char *ssr_name = data; + + /* Skip non-running rprocs and the originating instance */ + if (rproc->state != RPROC_RUNNING || !strcmp(data, sysmon->name)) { + dev_dbg(sysmon->dev, "not notifying %s\n", sysmon->name); + return NOTIFY_DONE; + } + + /* Only SSCTL version 2 supports SSR events */ + if (sysmon->ssctl_version == 2) + ssctl_send_event(sysmon, ssr_name); + else if (sysmon->ept) + sysmon_send_event(sysmon, ssr_name); + + return NOTIFY_DONE; +} + +/** + * qcom_add_sysmon_subdev() - create a sysmon subdev for the given remoteproc + * @rproc: rproc context to associate the subdev with + * @name: name of this subdev, to use in SSR + * @ssctl_instance: instance id of the ssctl QMI service + * + * Return: A new qcom_sysmon object, or NULL on failure + */ +struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc, + const char *name, + int ssctl_instance) +{ + struct qcom_sysmon *sysmon; + int ret; + + sysmon = kzalloc(sizeof(*sysmon), GFP_KERNEL); + if (!sysmon) + return NULL; + + sysmon->dev = rproc->dev.parent; + sysmon->rproc = rproc; + + sysmon->name = name; + sysmon->ssctl_instance = ssctl_instance; + + init_completion(&sysmon->comp); + mutex_init(&sysmon->lock); + + ret = qmi_handle_init(&sysmon->qmi, SSCTL_MAX_MSG_LEN, &ssctl_ops, NULL); + if (ret < 0) { + dev_err(sysmon->dev, "failed to initialize qmi handle\n"); + kfree(sysmon); + return NULL; + } + + qmi_add_lookup(&sysmon->qmi, 43, 0, 0); + + rproc_add_subdev(rproc, &sysmon->subdev, sysmon_start, sysmon_stop); + + sysmon->nb.notifier_call = sysmon_notify; + blocking_notifier_chain_register(&sysmon_notifiers, &sysmon->nb); + + mutex_lock(&sysmon_lock); + list_add(&sysmon->node, &sysmon_list); + mutex_unlock(&sysmon_lock); + + return sysmon; +} +EXPORT_SYMBOL_GPL(qcom_add_sysmon_subdev); + +/** + * qcom_remove_sysmon_subdev() - release a qcom_sysmon + * @sysmon: sysmon context, as retrieved by qcom_add_sysmon_subdev() + */ +void qcom_remove_sysmon_subdev(struct qcom_sysmon *sysmon) +{ + if (!sysmon) + return; + + mutex_lock(&sysmon_lock); + list_del(&sysmon->node); + mutex_unlock(&sysmon_lock); + + blocking_notifier_chain_unregister(&sysmon_notifiers, &sysmon->nb); + + rproc_remove_subdev(sysmon->rproc, &sysmon->subdev); + + qmi_handle_release(&sysmon->qmi); + + kfree(sysmon); +} +EXPORT_SYMBOL_GPL(qcom_remove_sysmon_subdev); + +/** + * sysmon_probe() - probe sys_mon channel + * @rpdev: rpmsg device handle + * + * Find the sysmon context associated with the ancestor remoteproc and assign + * this rpmsg device with said sysmon context. + * + * Return: 0 on success, negative errno on failure. + */ +static int sysmon_probe(struct rpmsg_device *rpdev) +{ + struct qcom_sysmon *sysmon; + struct rproc *rproc; + + rproc = rproc_get_by_child(&rpdev->dev); + if (!rproc) { + dev_err(&rpdev->dev, "sysmon device not child of rproc\n"); + return -EINVAL; + } + + mutex_lock(&sysmon_lock); + list_for_each_entry(sysmon, &sysmon_list, node) { + if (sysmon->rproc == rproc) + goto found; + } + mutex_unlock(&sysmon_lock); + + dev_err(&rpdev->dev, "no sysmon associated with parent rproc\n"); + + return -EINVAL; + +found: + mutex_unlock(&sysmon_lock); + + rpdev->ept->priv = sysmon; + sysmon->ept = rpdev->ept; + + return 0; +} + +/** + * sysmon_remove() - sys_mon channel remove handler + * @rpdev: rpmsg device handle + * + * Disassociate the rpmsg device with the sysmon instance. + */ +static void sysmon_remove(struct rpmsg_device *rpdev) +{ + struct qcom_sysmon *sysmon = rpdev->ept->priv; + + sysmon->ept = NULL; +} + +static const struct rpmsg_device_id sysmon_match[] = { + { "sys_mon" }, + {} +}; + +static struct rpmsg_driver sysmon_driver = { + .probe = sysmon_probe, + .remove = sysmon_remove, + .callback = sysmon_callback, + .id_table = sysmon_match, + .drv = { + .name = "qcom_sysmon", + }, +}; + +module_rpmsg_driver(sysmon_driver); + +MODULE_DESCRIPTION("Qualcomm sysmon driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/remoteproc/qcom_wcnss.c b/drivers/remoteproc/qcom_wcnss.c index 3f0609236a76..b0e07e9f42d5 100644 --- a/drivers/remoteproc/qcom_wcnss.c +++ b/drivers/remoteproc/qcom_wcnss.c @@ -40,6 +40,7 @@ #define WCNSS_CRASH_REASON_SMEM 422 #define WCNSS_FIRMWARE_NAME "wcnss.mdt" #define WCNSS_PAS_ID 6 +#define WCNSS_SSCTL_ID 0x13 #define WCNSS_SPARE_NVBIN_DLND BIT(25) @@ -98,6 +99,7 @@ struct qcom_wcnss { size_t mem_size; struct qcom_rproc_subdev smd_subdev; + struct qcom_sysmon *sysmon; }; static const struct wcnss_data riva_data = { @@ -153,7 +155,8 @@ static int wcnss_load(struct rproc *rproc, const struct firmware *fw) struct qcom_wcnss *wcnss = (struct qcom_wcnss *)rproc->priv; return qcom_mdt_load(wcnss->dev, fw, rproc->firmware, WCNSS_PAS_ID, - wcnss->mem_region, wcnss->mem_phys, wcnss->mem_size); + wcnss->mem_region, wcnss->mem_phys, + wcnss->mem_size, &wcnss->mem_reloc); } static void wcnss_indicate_nv_download(struct qcom_wcnss *wcnss) @@ -308,6 +311,7 @@ static const struct rproc_ops wcnss_ops = { .start = wcnss_start, .stop = wcnss_stop, .da_to_va = wcnss_da_to_va, + .parse_fw = qcom_register_dump_segments, .load = wcnss_load, }; @@ -332,9 +336,6 @@ static irqreturn_t wcnss_fatal_interrupt(int irq, void *dev) rproc_report_crash(wcnss->rproc, RPROC_FATAL_ERROR); - if (!IS_ERR(msg)) - msg[0] = '\0'; - return IRQ_HANDLED; } @@ -551,6 +552,7 @@ static int wcnss_probe(struct platform_device *pdev) } qcom_add_smd_subdev(rproc, &wcnss->smd_subdev); + wcnss->sysmon = qcom_add_sysmon_subdev(rproc, "wcnss", WCNSS_SSCTL_ID); ret = rproc_add(rproc); if (ret) @@ -573,6 +575,7 @@ static int wcnss_remove(struct platform_device *pdev) qcom_smem_state_put(wcnss->state); rproc_del(wcnss->rproc); + qcom_remove_sysmon_subdev(wcnss->sysmon); qcom_remove_smd_subdev(wcnss->rproc, &wcnss->smd_subdev); rproc_free(wcnss->rproc); diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index 4170dfbd93bd..6d9c5832ce47 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -33,6 +33,7 @@ #include <linux/firmware.h> #include <linux/string.h> #include <linux/debugfs.h> +#include <linux/devcoredump.h> #include <linux/remoteproc.h> #include <linux/iommu.h> #include <linux/idr.h> @@ -307,7 +308,7 @@ static int rproc_vdev_do_probe(struct rproc_subdev *subdev) return rproc_add_virtio_dev(rvdev, rvdev->id); } -static void rproc_vdev_do_remove(struct rproc_subdev *subdev) +static void rproc_vdev_do_remove(struct rproc_subdev *subdev, bool crashed) { struct rproc_vdev *rvdev = container_of(subdev, struct rproc_vdev, subdev); @@ -788,17 +789,31 @@ static int rproc_probe_subdevices(struct rproc *rproc) unroll_registration: list_for_each_entry_continue_reverse(subdev, &rproc->subdevs, node) - subdev->remove(subdev); + subdev->remove(subdev, true); return ret; } -static void rproc_remove_subdevices(struct rproc *rproc) +static void rproc_remove_subdevices(struct rproc *rproc, bool crashed) { struct rproc_subdev *subdev; list_for_each_entry_reverse(subdev, &rproc->subdevs, node) - subdev->remove(subdev); + subdev->remove(subdev, crashed); +} + +/** + * rproc_coredump_cleanup() - clean up dump_segments list + * @rproc: the remote processor handle + */ +static void rproc_coredump_cleanup(struct rproc *rproc) +{ + struct rproc_dump_segment *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &rproc->dump_segments, node) { + list_del(&entry->node); + kfree(entry); + } } /** @@ -848,6 +863,8 @@ static void rproc_resource_cleanup(struct rproc *rproc) /* clean up remote vdev entries */ list_for_each_entry_safe(rvdev, rvtmp, &rproc->rvdevs, node) kref_put(&rvdev->refcount, rproc_vdev_release); + + rproc_coredump_cleanup(rproc); } static int rproc_start(struct rproc *rproc, const struct firmware *fw) @@ -927,8 +944,8 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw) rproc->bootaddr = rproc_get_boot_addr(rproc, fw); - /* load resource table */ - ret = rproc_load_rsc_table(rproc, fw); + /* Load resource table, core dump segment list etc from the firmware */ + ret = rproc_parse_fw(rproc, fw); if (ret) goto disable_iommu; @@ -992,13 +1009,13 @@ static int rproc_trigger_auto_boot(struct rproc *rproc) return ret; } -static int rproc_stop(struct rproc *rproc) +static int rproc_stop(struct rproc *rproc, bool crashed) { struct device *dev = &rproc->dev; int ret; /* remove any subdevices for the remote processor */ - rproc_remove_subdevices(rproc); + rproc_remove_subdevices(rproc, crashed); /* the installed resource table is no longer accessible */ rproc->table_ptr = rproc->cached_table; @@ -1018,6 +1035,113 @@ static int rproc_stop(struct rproc *rproc) } /** + * rproc_coredump_add_segment() - add segment of device memory to coredump + * @rproc: handle of a remote processor + * @da: device address + * @size: size of segment + * + * Add device memory to the list of segments to be included in a coredump for + * the remoteproc. + * + * Return: 0 on success, negative errno on error. + */ +int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size) +{ + struct rproc_dump_segment *segment; + + segment = kzalloc(sizeof(*segment), GFP_KERNEL); + if (!segment) + return -ENOMEM; + + segment->da = da; + segment->size = size; + + list_add_tail(&segment->node, &rproc->dump_segments); + + return 0; +} +EXPORT_SYMBOL(rproc_coredump_add_segment); + +/** + * rproc_coredump() - perform coredump + * @rproc: rproc handle + * + * This function will generate an ELF header for the registered segments + * and create a devcoredump device associated with rproc. + */ +static void rproc_coredump(struct rproc *rproc) +{ + struct rproc_dump_segment *segment; + struct elf32_phdr *phdr; + struct elf32_hdr *ehdr; + size_t data_size; + size_t offset; + void *data; + void *ptr; + int phnum = 0; + + if (list_empty(&rproc->dump_segments)) + return; + + data_size = sizeof(*ehdr); + list_for_each_entry(segment, &rproc->dump_segments, node) { + data_size += sizeof(*phdr) + segment->size; + + phnum++; + } + + data = vmalloc(data_size); + if (!data) + return; + + ehdr = data; + + memset(ehdr, 0, sizeof(*ehdr)); + memcpy(ehdr->e_ident, ELFMAG, SELFMAG); + ehdr->e_ident[EI_CLASS] = ELFCLASS32; + ehdr->e_ident[EI_DATA] = ELFDATA2LSB; + ehdr->e_ident[EI_VERSION] = EV_CURRENT; + ehdr->e_ident[EI_OSABI] = ELFOSABI_NONE; + ehdr->e_type = ET_CORE; + ehdr->e_machine = EM_NONE; + ehdr->e_version = EV_CURRENT; + ehdr->e_entry = rproc->bootaddr; + ehdr->e_phoff = sizeof(*ehdr); + ehdr->e_ehsize = sizeof(*ehdr); + ehdr->e_phentsize = sizeof(*phdr); + ehdr->e_phnum = phnum; + + phdr = data + ehdr->e_phoff; + offset = ehdr->e_phoff + sizeof(*phdr) * ehdr->e_phnum; + list_for_each_entry(segment, &rproc->dump_segments, node) { + memset(phdr, 0, sizeof(*phdr)); + phdr->p_type = PT_LOAD; + phdr->p_offset = offset; + phdr->p_vaddr = segment->da; + phdr->p_paddr = segment->da; + phdr->p_filesz = segment->size; + phdr->p_memsz = segment->size; + phdr->p_flags = PF_R | PF_W | PF_X; + phdr->p_align = 0; + + ptr = rproc_da_to_va(rproc, segment->da, segment->size); + if (!ptr) { + dev_err(&rproc->dev, + "invalid coredump segment (%pad, %zu)\n", + &segment->da, segment->size); + memset(data + offset, 0xff, segment->size); + } else { + memcpy(data + offset, ptr, segment->size); + } + + offset += phdr->p_filesz; + phdr++; + } + + dev_coredumpv(&rproc->dev, data, data_size, GFP_KERNEL); +} + +/** * rproc_trigger_recovery() - recover a remoteproc * @rproc: the remote processor * @@ -1039,10 +1163,13 @@ int rproc_trigger_recovery(struct rproc *rproc) if (ret) return ret; - ret = rproc_stop(rproc); + ret = rproc_stop(rproc, false); if (ret) goto unlock_mutex; + /* generate coredump */ + rproc_coredump(rproc); + /* load firmware */ ret = request_firmware(&firmware_p, rproc->firmware, dev); if (ret < 0) { @@ -1189,7 +1316,7 @@ void rproc_shutdown(struct rproc *rproc) if (!atomic_dec_and_test(&rproc->power)) goto out; - ret = rproc_stop(rproc); + ret = rproc_stop(rproc, true); if (ret) { atomic_inc(&rproc->power); goto out; @@ -1428,7 +1555,7 @@ struct rproc *rproc_alloc(struct device *dev, const char *name, /* Default to ELF loader if no load function is specified */ if (!rproc->ops->load) { rproc->ops->load = rproc_elf_load_segments; - rproc->ops->load_rsc_table = rproc_elf_load_rsc_table; + rproc->ops->parse_fw = rproc_elf_load_rsc_table; rproc->ops->find_loaded_rsc_table = rproc_elf_find_loaded_rsc_table; rproc->ops->sanity_check = rproc_elf_sanity_check; rproc->ops->get_boot_addr = rproc_elf_get_boot_addr; @@ -1443,6 +1570,7 @@ struct rproc *rproc_alloc(struct device *dev, const char *name, INIT_LIST_HEAD(&rproc->traces); INIT_LIST_HEAD(&rproc->rvdevs); INIT_LIST_HEAD(&rproc->subdevs); + INIT_LIST_HEAD(&rproc->dump_segments); INIT_WORK(&rproc->crash_handler, rproc_crash_handler_work); @@ -1535,7 +1663,7 @@ EXPORT_SYMBOL(rproc_del); void rproc_add_subdev(struct rproc *rproc, struct rproc_subdev *subdev, int (*probe)(struct rproc_subdev *subdev), - void (*remove)(struct rproc_subdev *subdev)) + void (*remove)(struct rproc_subdev *subdev, bool crashed)) { subdev->probe = probe; subdev->remove = remove; diff --git a/drivers/remoteproc/remoteproc_internal.h b/drivers/remoteproc/remoteproc_internal.h index 55a2950c5cb7..7570beb035b5 100644 --- a/drivers/remoteproc/remoteproc_internal.h +++ b/drivers/remoteproc/remoteproc_internal.h @@ -88,11 +88,10 @@ int rproc_load_segments(struct rproc *rproc, const struct firmware *fw) return -EINVAL; } -static inline int rproc_load_rsc_table(struct rproc *rproc, - const struct firmware *fw) +static inline int rproc_parse_fw(struct rproc *rproc, const struct firmware *fw) { - if (rproc->ops->load_rsc_table) - return rproc->ops->load_rsc_table(rproc, fw); + if (rproc->ops->parse_fw) + return rproc->ops->parse_fw(rproc, fw); return 0; } diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c index e0f31ed096a5..768ef542a841 100644 --- a/drivers/rpmsg/qcom_glink_native.c +++ b/drivers/rpmsg/qcom_glink_native.c @@ -113,7 +113,7 @@ struct qcom_glink { spinlock_t rx_lock; struct list_head rx_queue; - struct mutex tx_lock; + spinlock_t tx_lock; spinlock_t idr_lock; struct idr lcids; @@ -288,15 +288,14 @@ static int qcom_glink_tx(struct qcom_glink *glink, const void *data, size_t dlen, bool wait) { unsigned int tlen = hlen + dlen; - int ret; + unsigned long flags; + int ret = 0; /* Reject packets that are too big */ if (tlen >= glink->tx_pipe->length) return -EINVAL; - ret = mutex_lock_interruptible(&glink->tx_lock); - if (ret) - return ret; + spin_lock_irqsave(&glink->tx_lock, flags); while (qcom_glink_tx_avail(glink) < tlen) { if (!wait) { @@ -304,7 +303,12 @@ static int qcom_glink_tx(struct qcom_glink *glink, goto out; } + /* Wait without holding the tx_lock */ + spin_unlock_irqrestore(&glink->tx_lock, flags); + usleep_range(10000, 15000); + + spin_lock_irqsave(&glink->tx_lock, flags); } qcom_glink_tx_write(glink, hdr, hlen, data, dlen); @@ -313,7 +317,7 @@ static int qcom_glink_tx(struct qcom_glink *glink, mbox_client_txdone(glink->mbox_chan, 0); out: - mutex_unlock(&glink->tx_lock); + spin_unlock_irqrestore(&glink->tx_lock, flags); return ret; } @@ -1567,7 +1571,7 @@ struct qcom_glink *qcom_glink_native_probe(struct device *dev, glink->features = features; glink->intentless = intentless; - mutex_init(&glink->tx_lock); + spin_lock_init(&glink->tx_lock); spin_lock_init(&glink->rx_lock); INIT_LIST_HEAD(&glink->rx_queue); INIT_WORK(&glink->rx_work, qcom_glink_work); diff --git a/drivers/rpmsg/qcom_glink_smem.c b/drivers/rpmsg/qcom_glink_smem.c index 892f2b92a4d8..3fa9d43e2c87 100644 --- a/drivers/rpmsg/qcom_glink_smem.c +++ b/drivers/rpmsg/qcom_glink_smem.c @@ -217,6 +217,7 @@ struct qcom_glink *qcom_glink_smem_register(struct device *parent, ret = device_register(dev); if (ret) { pr_err("failed to register glink edge\n"); + put_device(dev); return ERR_PTR(ret); } @@ -299,7 +300,7 @@ struct qcom_glink *qcom_glink_smem_register(struct device *parent, return glink; err_put_dev: - put_device(dev); + device_unregister(dev); return ERR_PTR(ret); } diff --git a/drivers/rpmsg/qcom_smd.c b/drivers/rpmsg/qcom_smd.c index 92d0c6a7a837..5ce9bf7b897d 100644 --- a/drivers/rpmsg/qcom_smd.c +++ b/drivers/rpmsg/qcom_smd.c @@ -167,9 +167,9 @@ struct qcom_smd_endpoint { struct qcom_smd_channel *qsch; }; -#define to_smd_device(_rpdev) container_of(_rpdev, struct qcom_smd_device, rpdev) +#define to_smd_device(r) container_of(r, struct qcom_smd_device, rpdev) #define to_smd_edge(d) container_of(d, struct qcom_smd_edge, dev) -#define to_smd_endpoint(ept) container_of(ept, struct qcom_smd_endpoint, ept) +#define to_smd_endpoint(e) container_of(e, struct qcom_smd_endpoint, ept) /** * struct qcom_smd_channel - smd channel struct @@ -205,7 +205,7 @@ struct qcom_smd_channel { struct smd_channel_info_pair *info; struct smd_channel_info_word_pair *info_word; - struct mutex tx_lock; + spinlock_t tx_lock; wait_queue_head_t fblockread_event; void *tx_fifo; @@ -729,6 +729,7 @@ static int __qcom_smd_send(struct qcom_smd_channel *channel, const void *data, { __le32 hdr[5] = { cpu_to_le32(len), }; int tlen = sizeof(hdr) + len; + unsigned long flags; int ret; /* Word aligned channels only accept word size aligned data */ @@ -739,9 +740,11 @@ static int __qcom_smd_send(struct qcom_smd_channel *channel, const void *data, if (tlen >= channel->fifo_size) return -EINVAL; - ret = mutex_lock_interruptible(&channel->tx_lock); - if (ret) - return ret; + /* Highlight the fact that if we enter the loop below we might sleep */ + if (wait) + might_sleep(); + + spin_lock_irqsave(&channel->tx_lock, flags); while (qcom_smd_get_tx_avail(channel) < tlen && channel->state == SMD_CHANNEL_OPENED) { @@ -753,7 +756,7 @@ static int __qcom_smd_send(struct qcom_smd_channel *channel, const void *data, SET_TX_CHANNEL_FLAG(channel, fBLOCKREADINTR, 0); /* Wait without holding the tx_lock */ - mutex_unlock(&channel->tx_lock); + spin_unlock_irqrestore(&channel->tx_lock, flags); ret = wait_event_interruptible(channel->fblockread_event, qcom_smd_get_tx_avail(channel) >= tlen || @@ -761,9 +764,7 @@ static int __qcom_smd_send(struct qcom_smd_channel *channel, const void *data, if (ret) return ret; - ret = mutex_lock_interruptible(&channel->tx_lock); - if (ret) - return ret; + spin_lock_irqsave(&channel->tx_lock, flags); SET_TX_CHANNEL_FLAG(channel, fBLOCKREADINTR, 1); } @@ -787,7 +788,7 @@ static int __qcom_smd_send(struct qcom_smd_channel *channel, const void *data, qcom_smd_signal_channel(channel); out_unlock: - mutex_unlock(&channel->tx_lock); + spin_unlock_irqrestore(&channel->tx_lock, flags); return ret; } @@ -996,8 +997,26 @@ static struct device_node *qcom_smd_match_channel(struct device_node *edge_node, return NULL; } +static int qcom_smd_announce_create(struct rpmsg_device *rpdev) +{ + struct qcom_smd_endpoint *qept = to_smd_endpoint(rpdev->ept); + struct qcom_smd_channel *channel = qept->qsch; + unsigned long flags; + bool kick_state; + + spin_lock_irqsave(&channel->recv_lock, flags); + kick_state = qcom_smd_channel_intr(channel); + spin_unlock_irqrestore(&channel->recv_lock, flags); + + if (kick_state) + schedule_work(&channel->edge->state_work); + + return 0; +} + static const struct rpmsg_device_ops qcom_smd_device_ops = { .create_ept = qcom_smd_create_ept, + .announce_create = qcom_smd_announce_create, }; static const struct rpmsg_endpoint_ops qcom_smd_endpoint_ops = { @@ -1090,7 +1109,7 @@ static struct qcom_smd_channel *qcom_smd_create_channel(struct qcom_smd_edge *ed if (!channel->name) return ERR_PTR(-ENOMEM); - mutex_init(&channel->tx_lock); + spin_lock_init(&channel->tx_lock); spin_lock_init(&channel->recv_lock); init_waitqueue_head(&channel->fblockread_event); init_waitqueue_head(&channel->state_change_event); @@ -1234,6 +1253,11 @@ static void qcom_channel_state_worker(struct work_struct *work) if (channel->state != SMD_CHANNEL_CLOSED) continue; + remote_state = GET_RX_CHANNEL_INFO(channel, state); + if (remote_state != SMD_CHANNEL_OPENING && + remote_state != SMD_CHANNEL_OPENED) + continue; + if (channel->registered) continue; @@ -1408,6 +1432,7 @@ struct qcom_smd_edge *qcom_smd_register_edge(struct device *parent, ret = device_register(&edge->dev); if (ret) { pr_err("failed to register smd edge\n"); + put_device(&edge->dev); return ERR_PTR(ret); } @@ -1428,7 +1453,7 @@ struct qcom_smd_edge *qcom_smd_register_edge(struct device *parent, return edge; unregister_dev: - put_device(&edge->dev); + device_unregister(&edge->dev); return ERR_PTR(ret); } EXPORT_SYMBOL(qcom_smd_register_edge); diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c index 5a081762afcc..920a02f0462c 100644 --- a/drivers/rpmsg/rpmsg_core.c +++ b/drivers/rpmsg/rpmsg_core.c @@ -442,7 +442,7 @@ static int rpmsg_dev_probe(struct device *dev) goto out; } - if (rpdev->ops->announce_create) + if (ept && rpdev->ops->announce_create) err = rpdev->ops->announce_create(rpdev); out: return err; diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig index a993d19fa562..5c4535b545cc 100644 --- a/drivers/soc/qcom/Kconfig +++ b/drivers/soc/qcom/Kconfig @@ -37,7 +37,7 @@ config QCOM_PM config QCOM_QMI_HELPERS tristate - depends on ARCH_QCOM + depends on ARCH_QCOM && NET help Helper library for handling QMI encoded messages. QMI encoded messages are used in communication between the majority of QRTR diff --git a/drivers/soc/qcom/mdt_loader.c b/drivers/soc/qcom/mdt_loader.c index 08bd8549242a..17b314d9a148 100644 --- a/drivers/soc/qcom/mdt_loader.c +++ b/drivers/soc/qcom/mdt_loader.c @@ -83,12 +83,14 @@ EXPORT_SYMBOL_GPL(qcom_mdt_get_size); * @mem_region: allocated memory region to load firmware into * @mem_phys: physical address of allocated memory region * @mem_size: size of the allocated memory region + * @reloc_base: adjusted physical address after relocation * * Returns 0 on success, negative errno otherwise. */ int qcom_mdt_load(struct device *dev, const struct firmware *fw, const char *firmware, int pas_id, void *mem_region, - phys_addr_t mem_phys, size_t mem_size) + phys_addr_t mem_phys, size_t mem_size, + phys_addr_t *reloc_base) { const struct elf32_phdr *phdrs; const struct elf32_phdr *phdr; @@ -192,6 +194,9 @@ int qcom_mdt_load(struct device *dev, const struct firmware *fw, memset(ptr + phdr->p_filesz, 0, phdr->p_memsz - phdr->p_filesz); } + if (reloc_base) + *reloc_base = mem_reloc; + out: kfree(fw_name); diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index 174f5709e508..a699e320393f 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_CEPH_FS) += ceph.o ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ - export.o caps.o snap.o xattr.o \ + export.o caps.o snap.o xattr.o quota.o \ mds_client.o mdsmap.o strings.o ceph_frag.o \ debugfs.o diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index b4336b42ce3b..5f7ad3d0df2e 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -15,6 +15,7 @@ #include "mds_client.h" #include "cache.h" #include <linux/ceph/osd_client.h> +#include <linux/ceph/striper.h> /* * Ceph address space ops. @@ -438,7 +439,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, { struct inode *inode = file_inode(file); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); - struct ceph_file_info *ci = file->private_data; + struct ceph_file_info *fi = file->private_data; struct ceph_rw_context *rw_ctx; int rc = 0; int max = 0; @@ -452,7 +453,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, if (rc == 0) goto out; - rw_ctx = ceph_find_rw_context(ci); + rw_ctx = ceph_find_rw_context(fi); max = fsc->mount_options->rsize >> PAGE_SHIFT; dout("readpages %p file %p ctx %p nr_pages %d max %d\n", inode, file, rw_ctx, nr_pages, max); @@ -800,7 +801,7 @@ static int ceph_writepages_start(struct address_space *mapping, struct ceph_osd_request *req = NULL; struct ceph_writeback_ctl ceph_wbc; bool should_loop, range_whole = false; - bool stop, done = false; + bool done = false; dout("writepages_start %p (mode=%s)\n", inode, wbc->sync_mode == WB_SYNC_NONE ? "NONE" : @@ -856,7 +857,7 @@ retry: * in that range can be associated with newer snapc. * They are not writeable until we write all dirty pages * associated with 'snapc' get written */ - if (index > 0 || wbc->sync_mode != WB_SYNC_NONE) + if (index > 0) should_loop = true; dout(" non-head snapc, range whole\n"); } @@ -864,8 +865,7 @@ retry: ceph_put_snap_context(last_snapc); last_snapc = snapc; - stop = false; - while (!stop && index <= end) { + while (!done && index <= end) { int num_ops = 0, op_idx; unsigned i, pvec_pages, max_pages, locked_pages = 0; struct page **pages = NULL, **data_pages; @@ -898,16 +898,30 @@ get_more_pages: unlock_page(page); continue; } - if (strip_unit_end && (page->index > strip_unit_end)) { - dout("end of strip unit %p\n", page); + /* only if matching snap context */ + pgsnapc = page_snap_context(page); + if (pgsnapc != snapc) { + dout("page snapc %p %lld != oldest %p %lld\n", + pgsnapc, pgsnapc->seq, snapc, snapc->seq); + if (!should_loop && + !ceph_wbc.head_snapc && + wbc->sync_mode != WB_SYNC_NONE) + should_loop = true; unlock_page(page); - break; + continue; } if (page_offset(page) >= ceph_wbc.i_size) { dout("%p page eof %llu\n", page, ceph_wbc.i_size); - /* not done if range_cyclic */ - stop = true; + if (ceph_wbc.size_stable || + page_offset(page) >= i_size_read(inode)) + mapping->a_ops->invalidatepage(page, + 0, PAGE_SIZE); + unlock_page(page); + continue; + } + if (strip_unit_end && (page->index > strip_unit_end)) { + dout("end of strip unit %p\n", page); unlock_page(page); break; } @@ -921,15 +935,6 @@ get_more_pages: wait_on_page_writeback(page); } - /* only if matching snap context */ - pgsnapc = page_snap_context(page); - if (pgsnapc != snapc) { - dout("page snapc %p %lld != oldest %p %lld\n", - pgsnapc, pgsnapc->seq, snapc, snapc->seq); - unlock_page(page); - continue; - } - if (!clear_page_dirty_for_io(page)) { dout("%p !clear_page_dirty_for_io\n", page); unlock_page(page); @@ -945,19 +950,15 @@ get_more_pages: if (locked_pages == 0) { u64 objnum; u64 objoff; + u32 xlen; /* prepare async write request */ offset = (u64)page_offset(page); - len = wsize; - - rc = ceph_calc_file_object_mapping(&ci->i_layout, - offset, len, - &objnum, &objoff, - &len); - if (rc < 0) { - unlock_page(page); - break; - } + ceph_calc_file_object_mapping(&ci->i_layout, + offset, wsize, + &objnum, &objoff, + &xlen); + len = xlen; num_ops = 1; strip_unit_end = page->index + @@ -1146,7 +1147,7 @@ new_request: * we tagged for writeback prior to entering this loop. */ if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) - done = stop = true; + done = true; release_pvec_pages: dout("pagevec_release on %d pages (%p)\n", (int)pvec.nr, diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index 33a211b364ed..bb524c880b1e 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -51,7 +51,7 @@ static const struct fscache_cookie_def ceph_fscache_fsid_object_def = { .type = FSCACHE_COOKIE_TYPE_INDEX, }; -int ceph_fscache_register(void) +int __init ceph_fscache_register(void) { return fscache_register_netfs(&ceph_cache_netfs); } @@ -135,7 +135,7 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux( if (memcmp(data, &aux, sizeof(aux)) != 0) return FSCACHE_CHECKAUX_OBSOLETE; - dout("ceph inode 0x%p cached okay", ci); + dout("ceph inode 0x%p cached okay\n", ci); return FSCACHE_CHECKAUX_OKAY; } diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 0e5bd3e3344e..23dbfae16156 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -184,36 +184,54 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc, mdsc->caps_avail_count); spin_unlock(&mdsc->caps_list_lock); - for (i = have; i < need; i++) { -retry: + for (i = have; i < need; ) { cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); - if (!cap) { - if (!trimmed) { - for (j = 0; j < mdsc->max_sessions; j++) { - s = __ceph_lookup_mds_session(mdsc, j); - if (!s) - continue; - mutex_unlock(&mdsc->mutex); + if (cap) { + list_add(&cap->caps_item, &newcaps); + alloc++; + i++; + continue; + } - mutex_lock(&s->s_mutex); - max_caps = s->s_nr_caps - (need - i); - ceph_trim_caps(mdsc, s, max_caps); - mutex_unlock(&s->s_mutex); + if (!trimmed) { + for (j = 0; j < mdsc->max_sessions; j++) { + s = __ceph_lookup_mds_session(mdsc, j); + if (!s) + continue; + mutex_unlock(&mdsc->mutex); - ceph_put_mds_session(s); - mutex_lock(&mdsc->mutex); - } - trimmed = true; - goto retry; - } else { - pr_warn("reserve caps ctx=%p ENOMEM " - "need=%d got=%d\n", - ctx, need, have + alloc); - goto out_nomem; + mutex_lock(&s->s_mutex); + max_caps = s->s_nr_caps - (need - i); + ceph_trim_caps(mdsc, s, max_caps); + mutex_unlock(&s->s_mutex); + + ceph_put_mds_session(s); + mutex_lock(&mdsc->mutex); } + trimmed = true; + + spin_lock(&mdsc->caps_list_lock); + if (mdsc->caps_avail_count) { + int more_have; + if (mdsc->caps_avail_count >= need - i) + more_have = need - i; + else + more_have = mdsc->caps_avail_count; + + i += more_have; + have += more_have; + mdsc->caps_avail_count -= more_have; + mdsc->caps_reserve_count += more_have; + + } + spin_unlock(&mdsc->caps_list_lock); + + continue; } - list_add(&cap->caps_item, &newcaps); - alloc++; + + pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n", + ctx, need, have + alloc); + goto out_nomem; } BUG_ON(have + alloc != need); @@ -234,16 +252,28 @@ retry: return 0; out_nomem: + + spin_lock(&mdsc->caps_list_lock); + mdsc->caps_avail_count += have; + mdsc->caps_reserve_count -= have; + while (!list_empty(&newcaps)) { cap = list_first_entry(&newcaps, struct ceph_cap, caps_item); list_del(&cap->caps_item); - kmem_cache_free(ceph_cap_cachep, cap); + + /* Keep some preallocated caps around (ceph_min_count), to + * avoid lots of free/alloc churn. */ + if (mdsc->caps_avail_count >= + mdsc->caps_reserve_count + mdsc->caps_min_count) { + kmem_cache_free(ceph_cap_cachep, cap); + } else { + mdsc->caps_avail_count++; + mdsc->caps_total_count++; + list_add(&cap->caps_item, &mdsc->caps_list); + } } - spin_lock(&mdsc->caps_list_lock); - mdsc->caps_avail_count += have; - mdsc->caps_reserve_count -= have; BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + mdsc->caps_reserve_count + mdsc->caps_avail_count); @@ -254,12 +284,26 @@ out_nomem: int ceph_unreserve_caps(struct ceph_mds_client *mdsc, struct ceph_cap_reservation *ctx) { + int i; + struct ceph_cap *cap; + dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count); if (ctx->count) { spin_lock(&mdsc->caps_list_lock); BUG_ON(mdsc->caps_reserve_count < ctx->count); mdsc->caps_reserve_count -= ctx->count; - mdsc->caps_avail_count += ctx->count; + if (mdsc->caps_avail_count >= + mdsc->caps_reserve_count + mdsc->caps_min_count) { + mdsc->caps_total_count -= ctx->count; + for (i = 0; i < ctx->count; i++) { + cap = list_first_entry(&mdsc->caps_list, + struct ceph_cap, caps_item); + list_del(&cap->caps_item); + kmem_cache_free(ceph_cap_cachep, cap); + } + } else { + mdsc->caps_avail_count += ctx->count; + } ctx->count = 0; dout("unreserve caps %d = %d used + %d resv + %d avail\n", mdsc->caps_total_count, mdsc->caps_use_count, @@ -285,7 +329,23 @@ struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, mdsc->caps_use_count++; mdsc->caps_total_count++; spin_unlock(&mdsc->caps_list_lock); + } else { + spin_lock(&mdsc->caps_list_lock); + if (mdsc->caps_avail_count) { + BUG_ON(list_empty(&mdsc->caps_list)); + + mdsc->caps_avail_count--; + mdsc->caps_use_count++; + cap = list_first_entry(&mdsc->caps_list, + struct ceph_cap, caps_item); + list_del(&cap->caps_item); + + BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + + mdsc->caps_reserve_count + mdsc->caps_avail_count); + } + spin_unlock(&mdsc->caps_list_lock); } + return cap; } @@ -341,6 +401,8 @@ void ceph_reservation_status(struct ceph_fs_client *fsc, { struct ceph_mds_client *mdsc = fsc->mdsc; + spin_lock(&mdsc->caps_list_lock); + if (total) *total = mdsc->caps_total_count; if (avail) @@ -351,6 +413,8 @@ void ceph_reservation_status(struct ceph_fs_client *fsc, *reserved = mdsc->caps_reserve_count; if (min) *min = mdsc->caps_min_count; + + spin_unlock(&mdsc->caps_list_lock); } /* @@ -639,9 +703,11 @@ void ceph_add_cap(struct inode *inode, } spin_lock(&realm->inodes_with_caps_lock); - ci->i_snap_realm = realm; list_add(&ci->i_snap_realm_item, &realm->inodes_with_caps); + ci->i_snap_realm = realm; + if (realm->ino == ci->i_vino.ino) + realm->inode = inode; spin_unlock(&realm->inodes_with_caps_lock); if (oldrealm) diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 644def813754..abdf98deeec4 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -260,7 +260,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) goto out; fsc->debugfs_mdsmap = debugfs_create_file("mdsmap", - 0600, + 0400, fsc->client->debugfs_dir, fsc, &mdsmap_show_fops); @@ -268,7 +268,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) goto out; fsc->debugfs_mds_sessions = debugfs_create_file("mds_sessions", - 0600, + 0400, fsc->client->debugfs_dir, fsc, &mds_sessions_show_fops); @@ -276,7 +276,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) goto out; fsc->debugfs_mdsc = debugfs_create_file("mdsc", - 0600, + 0400, fsc->client->debugfs_dir, fsc, &mdsc_show_fops); @@ -292,7 +292,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) goto out; fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru", - 0600, + 0400, fsc->client->debugfs_dir, fsc, &dentry_lru_show_fops); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 2bdd561c4c68..1a78dd6f8bf2 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -101,18 +101,18 @@ static int fpos_cmp(loff_t l, loff_t r) * regardless of what dir changes take place on the * server. */ -static int note_last_dentry(struct ceph_file_info *fi, const char *name, +static int note_last_dentry(struct ceph_dir_file_info *dfi, const char *name, int len, unsigned next_offset) { char *buf = kmalloc(len+1, GFP_KERNEL); if (!buf) return -ENOMEM; - kfree(fi->last_name); - fi->last_name = buf; - memcpy(fi->last_name, name, len); - fi->last_name[len] = 0; - fi->next_offset = next_offset; - dout("note_last_dentry '%s'\n", fi->last_name); + kfree(dfi->last_name); + dfi->last_name = buf; + memcpy(dfi->last_name, name, len); + dfi->last_name[len] = 0; + dfi->next_offset = next_offset; + dout("note_last_dentry '%s'\n", dfi->last_name); return 0; } @@ -174,7 +174,7 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx, static int __dcache_readdir(struct file *file, struct dir_context *ctx, int shared_gen) { - struct ceph_file_info *fi = file->private_data; + struct ceph_dir_file_info *dfi = file->private_data; struct dentry *parent = file->f_path.dentry; struct inode *dir = d_inode(parent); struct dentry *dentry, *last = NULL; @@ -221,7 +221,7 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx, bool emit_dentry = false; dentry = __dcache_find_get_entry(parent, idx++, &cache_ctl); if (!dentry) { - fi->flags |= CEPH_F_ATEND; + dfi->file_info.flags |= CEPH_F_ATEND; err = 0; break; } @@ -272,33 +272,33 @@ out: if (last) { int ret; di = ceph_dentry(last); - ret = note_last_dentry(fi, last->d_name.name, last->d_name.len, + ret = note_last_dentry(dfi, last->d_name.name, last->d_name.len, fpos_off(di->offset) + 1); if (ret < 0) err = ret; dput(last); /* last_name no longer match cache index */ - if (fi->readdir_cache_idx >= 0) { - fi->readdir_cache_idx = -1; - fi->dir_release_count = 0; + if (dfi->readdir_cache_idx >= 0) { + dfi->readdir_cache_idx = -1; + dfi->dir_release_count = 0; } } return err; } -static bool need_send_readdir(struct ceph_file_info *fi, loff_t pos) +static bool need_send_readdir(struct ceph_dir_file_info *dfi, loff_t pos) { - if (!fi->last_readdir) + if (!dfi->last_readdir) return true; if (is_hash_order(pos)) - return !ceph_frag_contains_value(fi->frag, fpos_hash(pos)); + return !ceph_frag_contains_value(dfi->frag, fpos_hash(pos)); else - return fi->frag != fpos_frag(pos); + return dfi->frag != fpos_frag(pos); } static int ceph_readdir(struct file *file, struct dir_context *ctx) { - struct ceph_file_info *fi = file->private_data; + struct ceph_dir_file_info *dfi = file->private_data; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); @@ -309,7 +309,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) struct ceph_mds_reply_info_parsed *rinfo; dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos); - if (fi->flags & CEPH_F_ATEND) + if (dfi->file_info.flags & CEPH_F_ATEND) return 0; /* always start with . and .. */ @@ -350,15 +350,15 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) /* proceed with a normal readdir */ more: /* do we have the correct frag content buffered? */ - if (need_send_readdir(fi, ctx->pos)) { + if (need_send_readdir(dfi, ctx->pos)) { struct ceph_mds_request *req; int op = ceph_snap(inode) == CEPH_SNAPDIR ? CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR; /* discard old result, if any */ - if (fi->last_readdir) { - ceph_mdsc_put_request(fi->last_readdir); - fi->last_readdir = NULL; + if (dfi->last_readdir) { + ceph_mdsc_put_request(dfi->last_readdir); + dfi->last_readdir = NULL; } if (is_hash_order(ctx->pos)) { @@ -372,7 +372,7 @@ more: } dout("readdir fetching %llx.%llx frag %x offset '%s'\n", - ceph_vinop(inode), frag, fi->last_name); + ceph_vinop(inode), frag, dfi->last_name); req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); if (IS_ERR(req)) return PTR_ERR(req); @@ -388,8 +388,8 @@ more: __set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags); req->r_inode_drop = CEPH_CAP_FILE_EXCL; } - if (fi->last_name) { - req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL); + if (dfi->last_name) { + req->r_path2 = kstrdup(dfi->last_name, GFP_KERNEL); if (!req->r_path2) { ceph_mdsc_put_request(req); return -ENOMEM; @@ -399,10 +399,10 @@ more: cpu_to_le32(fpos_hash(ctx->pos)); } - req->r_dir_release_cnt = fi->dir_release_count; - req->r_dir_ordered_cnt = fi->dir_ordered_count; - req->r_readdir_cache_idx = fi->readdir_cache_idx; - req->r_readdir_offset = fi->next_offset; + req->r_dir_release_cnt = dfi->dir_release_count; + req->r_dir_ordered_cnt = dfi->dir_ordered_count; + req->r_readdir_cache_idx = dfi->readdir_cache_idx; + req->r_readdir_offset = dfi->next_offset; req->r_args.readdir.frag = cpu_to_le32(frag); req->r_args.readdir.flags = cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS); @@ -426,35 +426,35 @@ more: if (le32_to_cpu(rinfo->dir_dir->frag) != frag) { frag = le32_to_cpu(rinfo->dir_dir->frag); if (!rinfo->hash_order) { - fi->next_offset = req->r_readdir_offset; + dfi->next_offset = req->r_readdir_offset; /* adjust ctx->pos to beginning of frag */ ctx->pos = ceph_make_fpos(frag, - fi->next_offset, + dfi->next_offset, false); } } - fi->frag = frag; - fi->last_readdir = req; + dfi->frag = frag; + dfi->last_readdir = req; if (test_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags)) { - fi->readdir_cache_idx = req->r_readdir_cache_idx; - if (fi->readdir_cache_idx < 0) { + dfi->readdir_cache_idx = req->r_readdir_cache_idx; + if (dfi->readdir_cache_idx < 0) { /* preclude from marking dir ordered */ - fi->dir_ordered_count = 0; + dfi->dir_ordered_count = 0; } else if (ceph_frag_is_leftmost(frag) && - fi->next_offset == 2) { + dfi->next_offset == 2) { /* note dir version at start of readdir so * we can tell if any dentries get dropped */ - fi->dir_release_count = req->r_dir_release_cnt; - fi->dir_ordered_count = req->r_dir_ordered_cnt; + dfi->dir_release_count = req->r_dir_release_cnt; + dfi->dir_ordered_count = req->r_dir_ordered_cnt; } } else { - dout("readdir !did_prepopulate"); + dout("readdir !did_prepopulate\n"); /* disable readdir cache */ - fi->readdir_cache_idx = -1; + dfi->readdir_cache_idx = -1; /* preclude from marking dir complete */ - fi->dir_release_count = 0; + dfi->dir_release_count = 0; } /* note next offset and last dentry name */ @@ -463,19 +463,19 @@ more: rinfo->dir_entries + (rinfo->dir_nr-1); unsigned next_offset = req->r_reply_info.dir_end ? 2 : (fpos_off(rde->offset) + 1); - err = note_last_dentry(fi, rde->name, rde->name_len, + err = note_last_dentry(dfi, rde->name, rde->name_len, next_offset); if (err) return err; } else if (req->r_reply_info.dir_end) { - fi->next_offset = 2; + dfi->next_offset = 2; /* keep last name */ } } - rinfo = &fi->last_readdir->r_reply_info; + rinfo = &dfi->last_readdir->r_reply_info; dout("readdir frag %x num %d pos %llx chunk first %llx\n", - fi->frag, rinfo->dir_nr, ctx->pos, + dfi->frag, rinfo->dir_nr, ctx->pos, rinfo->dir_nr ? rinfo->dir_entries[0].offset : 0LL); i = 0; @@ -519,52 +519,55 @@ more: ctx->pos++; } - ceph_mdsc_put_request(fi->last_readdir); - fi->last_readdir = NULL; + ceph_mdsc_put_request(dfi->last_readdir); + dfi->last_readdir = NULL; - if (fi->next_offset > 2) { - frag = fi->frag; + if (dfi->next_offset > 2) { + frag = dfi->frag; goto more; } /* more frags? */ - if (!ceph_frag_is_rightmost(fi->frag)) { - frag = ceph_frag_next(fi->frag); + if (!ceph_frag_is_rightmost(dfi->frag)) { + frag = ceph_frag_next(dfi->frag); if (is_hash_order(ctx->pos)) { loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag), - fi->next_offset, true); + dfi->next_offset, true); if (new_pos > ctx->pos) ctx->pos = new_pos; /* keep last_name */ } else { - ctx->pos = ceph_make_fpos(frag, fi->next_offset, false); - kfree(fi->last_name); - fi->last_name = NULL; + ctx->pos = ceph_make_fpos(frag, dfi->next_offset, + false); + kfree(dfi->last_name); + dfi->last_name = NULL; } dout("readdir next frag is %x\n", frag); goto more; } - fi->flags |= CEPH_F_ATEND; + dfi->file_info.flags |= CEPH_F_ATEND; /* * if dir_release_count still matches the dir, no dentries * were released during the whole readdir, and we should have * the complete dir contents in our cache. */ - if (atomic64_read(&ci->i_release_count) == fi->dir_release_count) { + if (atomic64_read(&ci->i_release_count) == + dfi->dir_release_count) { spin_lock(&ci->i_ceph_lock); - if (fi->dir_ordered_count == atomic64_read(&ci->i_ordered_count)) { + if (dfi->dir_ordered_count == + atomic64_read(&ci->i_ordered_count)) { dout(" marking %p complete and ordered\n", inode); /* use i_size to track number of entries in * readdir cache */ - BUG_ON(fi->readdir_cache_idx < 0); - i_size_write(inode, fi->readdir_cache_idx * + BUG_ON(dfi->readdir_cache_idx < 0); + i_size_write(inode, dfi->readdir_cache_idx * sizeof(struct dentry*)); } else { dout(" marking %p complete\n", inode); } - __ceph_dir_set_complete(ci, fi->dir_release_count, - fi->dir_ordered_count); + __ceph_dir_set_complete(ci, dfi->dir_release_count, + dfi->dir_ordered_count); spin_unlock(&ci->i_ceph_lock); } @@ -572,25 +575,25 @@ more: return 0; } -static void reset_readdir(struct ceph_file_info *fi) +static void reset_readdir(struct ceph_dir_file_info *dfi) { - if (fi->last_readdir) { - ceph_mdsc_put_request(fi->last_readdir); - fi->last_readdir = NULL; + if (dfi->last_readdir) { + ceph_mdsc_put_request(dfi->last_readdir); + dfi->last_readdir = NULL; } - kfree(fi->last_name); - fi->last_name = NULL; - fi->dir_release_count = 0; - fi->readdir_cache_idx = -1; - fi->next_offset = 2; /* compensate for . and .. */ - fi->flags &= ~CEPH_F_ATEND; + kfree(dfi->last_name); + dfi->last_name = NULL; + dfi->dir_release_count = 0; + dfi->readdir_cache_idx = -1; + dfi->next_offset = 2; /* compensate for . and .. */ + dfi->file_info.flags &= ~CEPH_F_ATEND; } /* * discard buffered readdir content on seekdir(0), or seek to new frag, * or seek prior to current chunk */ -static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos) +static bool need_reset_readdir(struct ceph_dir_file_info *dfi, loff_t new_pos) { struct ceph_mds_reply_info_parsed *rinfo; loff_t chunk_offset; @@ -599,10 +602,10 @@ static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos) if (is_hash_order(new_pos)) { /* no need to reset last_name for a forward seek when * dentries are sotred in hash order */ - } else if (fi->frag != fpos_frag(new_pos)) { + } else if (dfi->frag != fpos_frag(new_pos)) { return true; } - rinfo = fi->last_readdir ? &fi->last_readdir->r_reply_info : NULL; + rinfo = dfi->last_readdir ? &dfi->last_readdir->r_reply_info : NULL; if (!rinfo || !rinfo->dir_nr) return true; chunk_offset = rinfo->dir_entries[0].offset; @@ -612,7 +615,7 @@ static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos) static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) { - struct ceph_file_info *fi = file->private_data; + struct ceph_dir_file_info *dfi = file->private_data; struct inode *inode = file->f_mapping->host; loff_t retval; @@ -630,20 +633,20 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) } if (offset >= 0) { - if (need_reset_readdir(fi, offset)) { + if (need_reset_readdir(dfi, offset)) { dout("dir_llseek dropping %p content\n", file); - reset_readdir(fi); + reset_readdir(dfi); } else if (is_hash_order(offset) && offset > file->f_pos) { /* for hash offset, we don't know if a forward seek * is within same frag */ - fi->dir_release_count = 0; - fi->readdir_cache_idx = -1; + dfi->dir_release_count = 0; + dfi->readdir_cache_idx = -1; } if (offset != file->f_pos) { file->f_pos = offset; file->f_version = 0; - fi->flags &= ~CEPH_F_ATEND; + dfi->file_info.flags &= ~CEPH_F_ATEND; } retval = offset; } @@ -824,6 +827,9 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry, if (ceph_snap(dir) != CEPH_NOSNAP) return -EROFS; + if (ceph_quota_is_max_files_exceeded(dir)) + return -EDQUOT; + err = ceph_pre_init_acls(dir, &mode, &acls); if (err < 0) return err; @@ -877,6 +883,9 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, if (ceph_snap(dir) != CEPH_NOSNAP) return -EROFS; + if (ceph_quota_is_max_files_exceeded(dir)) + return -EDQUOT; + dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS); if (IS_ERR(req)) { @@ -926,6 +935,12 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto out; } + if (op == CEPH_MDS_OP_MKDIR && + ceph_quota_is_max_files_exceeded(dir)) { + err = -EDQUOT; + goto out; + } + mode |= S_IFDIR; err = ceph_pre_init_acls(dir, &mode, &acls); if (err < 0) @@ -1065,6 +1080,11 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, else return -EROFS; } + /* don't allow cross-quota renames */ + if ((old_dir != new_dir) && + (!ceph_quota_is_same_realm(old_dir, new_dir))) + return -EXDEV; + dout("rename dir %p dentry %p to dir %p dentry %p\n", old_dir, old_dentry, new_dir, new_dentry); req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); @@ -1351,7 +1371,7 @@ static void ceph_d_prune(struct dentry *dentry) static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, loff_t *ppos) { - struct ceph_file_info *cf = file->private_data; + struct ceph_dir_file_info *dfi = file->private_data; struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); int left; @@ -1360,12 +1380,12 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) return -EISDIR; - if (!cf->dir_info) { - cf->dir_info = kmalloc(bufsize, GFP_KERNEL); - if (!cf->dir_info) + if (!dfi->dir_info) { + dfi->dir_info = kmalloc(bufsize, GFP_KERNEL); + if (!dfi->dir_info) return -ENOMEM; - cf->dir_info_len = - snprintf(cf->dir_info, bufsize, + dfi->dir_info_len = + snprintf(dfi->dir_info, bufsize, "entries: %20lld\n" " files: %20lld\n" " subdirs: %20lld\n" @@ -1385,10 +1405,10 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, (long)ci->i_rctime.tv_nsec); } - if (*ppos >= cf->dir_info_len) + if (*ppos >= dfi->dir_info_len) return 0; - size = min_t(unsigned, size, cf->dir_info_len-*ppos); - left = copy_to_user(buf, cf->dir_info + *ppos, size); + size = min_t(unsigned, size, dfi->dir_info_len-*ppos); + left = copy_to_user(buf, dfi->dir_info + *ppos, size); if (left == size) return -EFAULT; *ppos += (size - left); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index b67eec3532a1..f85040d73e3d 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -30,6 +30,8 @@ static __le32 ceph_flags_sys2wire(u32 flags) break; } + flags &= ~O_ACCMODE; + #define ceph_sys2wire(a) if (flags & a) { wire_flags |= CEPH_##a; flags &= ~a; } ceph_sys2wire(O_CREAT); @@ -41,7 +43,7 @@ static __le32 ceph_flags_sys2wire(u32 flags) #undef ceph_sys2wire if (flags) - dout("unused open flags: %x", flags); + dout("unused open flags: %x\n", flags); return cpu_to_le32(wire_flags); } @@ -159,13 +161,50 @@ out: return req; } +static int ceph_init_file_info(struct inode *inode, struct file *file, + int fmode, bool isdir) +{ + struct ceph_file_info *fi; + + dout("%s %p %p 0%o (%s)\n", __func__, inode, file, + inode->i_mode, isdir ? "dir" : "regular"); + BUG_ON(inode->i_fop->release != ceph_release); + + if (isdir) { + struct ceph_dir_file_info *dfi = + kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL); + if (!dfi) { + ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */ + return -ENOMEM; + } + + file->private_data = dfi; + fi = &dfi->file_info; + dfi->next_offset = 2; + dfi->readdir_cache_idx = -1; + } else { + fi = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL); + if (!fi) { + ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */ + return -ENOMEM; + } + + file->private_data = fi; + } + + fi->fmode = fmode; + spin_lock_init(&fi->rw_contexts_lock); + INIT_LIST_HEAD(&fi->rw_contexts); + + return 0; +} + /* * initialize private struct file data. * if we fail, clean up by dropping fmode reference on the ceph_inode */ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) { - struct ceph_file_info *cf; int ret = 0; switch (inode->i_mode & S_IFMT) { @@ -173,22 +212,10 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) ceph_fscache_register_inode_cookie(inode); ceph_fscache_file_set_cookie(inode, file); case S_IFDIR: - dout("init_file %p %p 0%o (regular)\n", inode, file, - inode->i_mode); - cf = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL); - if (!cf) { - ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */ - return -ENOMEM; - } - cf->fmode = fmode; - - spin_lock_init(&cf->rw_contexts_lock); - INIT_LIST_HEAD(&cf->rw_contexts); - - cf->next_offset = 2; - cf->readdir_cache_idx = -1; - file->private_data = cf; - BUG_ON(inode->i_fop->release != ceph_release); + ret = ceph_init_file_info(inode, file, fmode, + S_ISDIR(inode->i_mode)); + if (ret) + return ret; break; case S_IFLNK: @@ -278,11 +305,11 @@ int ceph_open(struct inode *inode, struct file *file) struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; - struct ceph_file_info *cf = file->private_data; + struct ceph_file_info *fi = file->private_data; int err; int flags, fmode, wanted; - if (cf) { + if (fi) { dout("open file %p is already opened\n", file); return 0; } @@ -375,7 +402,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, struct ceph_mds_request *req; struct dentry *dn; struct ceph_acls_info acls = {}; - int mask; + int mask; int err; dout("atomic_open %p dentry %p '%pd' %s flags %d mode 0%o\n", @@ -386,6 +413,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, return -ENAMETOOLONG; if (flags & O_CREAT) { + if (ceph_quota_is_max_files_exceeded(dir)) + return -EDQUOT; err = ceph_pre_init_acls(dir, &mode, &acls); if (err < 0) return err; @@ -460,16 +489,27 @@ out_acl: int ceph_release(struct inode *inode, struct file *file) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_file_info *cf = file->private_data; - dout("release inode %p file %p\n", inode, file); - ceph_put_fmode(ci, cf->fmode); - if (cf->last_readdir) - ceph_mdsc_put_request(cf->last_readdir); - kfree(cf->last_name); - kfree(cf->dir_info); - WARN_ON(!list_empty(&cf->rw_contexts)); - kmem_cache_free(ceph_file_cachep, cf); + if (S_ISDIR(inode->i_mode)) { + struct ceph_dir_file_info *dfi = file->private_data; + dout("release inode %p dir file %p\n", inode, file); + WARN_ON(!list_empty(&dfi->file_info.rw_contexts)); + + ceph_put_fmode(ci, dfi->file_info.fmode); + + if (dfi->last_readdir) + ceph_mdsc_put_request(dfi->last_readdir); + kfree(dfi->last_name); + kfree(dfi->dir_info); + kmem_cache_free(ceph_dir_file_cachep, dfi); + } else { + struct ceph_file_info *fi = file->private_data; + dout("release inode %p regular file %p\n", inode, file); + WARN_ON(!list_empty(&fi->rw_contexts)); + + ceph_put_fmode(ci, fi->fmode); + kmem_cache_free(ceph_file_cachep, fi); + } /* wake up anyone waiting for caps on this inode */ wake_up_all(&ci->i_cap_wq); @@ -1338,6 +1378,11 @@ retry_snap: pos = iocb->ki_pos; count = iov_iter_count(from); + if (ceph_quota_is_max_bytes_exceeded(inode, pos + count)) { + err = -EDQUOT; + goto out; + } + err = file_remove_privs(file); if (err) goto out; @@ -1419,6 +1464,7 @@ retry_snap: if (written >= 0) { int dirty; + spin_lock(&ci->i_ceph_lock); ci->i_inline_version = CEPH_INLINE_NONE; dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, @@ -1426,6 +1472,8 @@ retry_snap: spin_unlock(&ci->i_ceph_lock); if (dirty) __mark_inode_dirty(inode, dirty); + if (ceph_quota_is_max_bytes_approaching(inode, iocb->ki_pos)) + ceph_check_caps(ci, CHECK_CAPS_NODELAY, NULL); } dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", @@ -1668,6 +1716,12 @@ static long ceph_fallocate(struct file *file, int mode, goto unlock; } + if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE)) && + ceph_quota_is_max_bytes_exceeded(inode, offset + length)) { + ret = -EDQUOT; + goto unlock; + } + if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) && !(mode & FALLOC_FL_PUNCH_HOLE)) { ret = -ENOSPC; @@ -1716,6 +1770,9 @@ static long ceph_fallocate(struct file *file, int mode, spin_unlock(&ci->i_ceph_lock); if (dirty) __mark_inode_dirty(inode, dirty); + if ((endoff > size) && + ceph_quota_is_max_bytes_approaching(inode, endoff)) + ceph_check_caps(ci, CHECK_CAPS_NODELAY, NULL); } ceph_put_cap_refs(ci, got); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index c6ec5aa46100..8bf60250309e 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -441,6 +441,9 @@ struct inode *ceph_alloc_inode(struct super_block *sb) atomic64_set(&ci->i_complete_seq[1], 0); ci->i_symlink = NULL; + ci->i_max_bytes = 0; + ci->i_max_files = 0; + memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout)); RCU_INIT_POINTER(ci->i_layout.pool_ns, NULL); @@ -536,6 +539,9 @@ void ceph_destroy_inode(struct inode *inode) ceph_queue_caps_release(inode); + if (__ceph_has_any_quota(ci)) + ceph_adjust_quota_realms_count(inode, false); + /* * we may still have a snap_realm reference if there are stray * caps in i_snap_caps. @@ -548,6 +554,9 @@ void ceph_destroy_inode(struct inode *inode) dout(" dropping residual ref to snap realm %p\n", realm); spin_lock(&realm->inodes_with_caps_lock); list_del_init(&ci->i_snap_realm_item); + ci->i_snap_realm = NULL; + if (realm->ino == ci->i_vino.ino) + realm->inode = NULL; spin_unlock(&realm->inodes_with_caps_lock); ceph_put_snap_realm(mdsc, realm); } @@ -790,6 +799,8 @@ static int fill_inode(struct inode *inode, struct page *locked_page, inode->i_rdev = le32_to_cpu(info->rdev); inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; + __ceph_update_quota(ci, iinfo->max_bytes, iinfo->max_files); + if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) && (issued & CEPH_CAP_AUTH_EXCL) == 0) { inode->i_mode = le32_to_cpu(info->mode); @@ -1867,20 +1878,9 @@ retry: * possibly truncate them.. so write AND block! */ if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) { - struct ceph_cap_snap *capsnap; - to = ci->i_truncate_size; - list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { - // MDS should have revoked Frw caps - WARN_ON_ONCE(capsnap->writing); - if (capsnap->dirty_pages && capsnap->size > to) - to = capsnap->size; - } spin_unlock(&ci->i_ceph_lock); dout("__do_pending_vmtruncate %p flushing snaps first\n", inode); - - truncate_pagecache(inode, to); - filemap_write_and_wait_range(&inode->i_data, 0, inode->i_sb->s_maxbytes); goto retry; @@ -2152,6 +2152,10 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) if (err != 0) return err; + if ((attr->ia_valid & ATTR_SIZE) && + ceph_quota_is_max_bytes_exceeded(inode, attr->ia_size)) + return -EDQUOT; + err = __ceph_setattr(inode, attr); if (err >= 0 && (attr->ia_valid & ATTR_MODE)) diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 851aa69ec8f0..c90f03beb15d 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -5,7 +5,7 @@ #include "super.h" #include "mds_client.h" #include "ioctl.h" - +#include <linux/ceph/striper.h> /* * ioctls @@ -185,7 +185,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) &ceph_sb_to_client(inode->i_sb)->client->osdc; struct ceph_object_locator oloc; CEPH_DEFINE_OID_ONSTACK(oid); - u64 len = 1, olen; + u32 xlen; u64 tmp; struct ceph_pg pgid; int r; @@ -195,13 +195,8 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) return -EFAULT; down_read(&osdc->lock); - r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len, - &dl.object_no, &dl.object_offset, - &olen); - if (r < 0) { - up_read(&osdc->lock); - return -EIO; - } + ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, 1, + &dl.object_no, &dl.object_offset, &xlen); dl.file_offset -= dl.object_offset; dl.object_size = ci->i_layout.object_size; dl.block_size = ci->i_layout.stripe_unit; diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 9e66f69ee8a5..9dae2ec7e1fa 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -95,7 +95,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, owner = secure_addr(fl->fl_owner); dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, " - "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type, + "start: %llu, length: %llu, wait: %d, type: %d\n", (int)lock_type, (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length, wait, fl->fl_type); @@ -132,7 +132,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, } ceph_mdsc_put_request(req); dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " - "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type, + "length: %llu, wait: %d, type: %d, err code %d\n", (int)lock_type, (int)operation, (u64)fl->fl_pid, fl->fl_start, length, wait, fl->fl_type, err); return err; @@ -226,7 +226,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK) return -ENOLCK; - dout("ceph_lock, fl_owner: %p", fl->fl_owner); + dout("ceph_lock, fl_owner: %p\n", fl->fl_owner); /* set wait bit as appropriate, then make command as Ceph expects it*/ if (IS_GETLK(cmd)) @@ -264,7 +264,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl); if (!err) { if (op == CEPH_MDS_OP_SETFILELOCK) { - dout("mds locked, locking locally"); + dout("mds locked, locking locally\n"); err = posix_lock_file(file, fl, NULL); if (err) { /* undo! This should only happen if @@ -272,7 +272,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) * deadlock. */ ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, CEPH_LOCK_UNLOCK, 0, fl); - dout("got %d on posix_lock_file, undid lock", + dout("got %d on posix_lock_file, undid lock\n", err); } } @@ -294,7 +294,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) if (fl->fl_type & LOCK_MAND) return -EOPNOTSUPP; - dout("ceph_flock, fl_file: %p", fl->fl_file); + dout("ceph_flock, fl_file: %p\n", fl->fl_file); spin_lock(&ci->i_ceph_lock); if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { @@ -329,7 +329,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, inode, CEPH_LOCK_UNLOCK, 0, fl); - dout("got %d on locks_lock_file_wait, undid lock", err); + dout("got %d on locks_lock_file_wait, undid lock\n", err); } } return err; @@ -356,7 +356,7 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) ++(*flock_count); spin_unlock(&ctx->flc_lock); } - dout("counted %d flock locks and %d fcntl locks", + dout("counted %d flock locks and %d fcntl locks\n", *flock_count, *fcntl_count); } @@ -384,7 +384,7 @@ static int lock_to_ceph_filelock(struct file_lock *lock, cephlock->type = CEPH_LOCK_UNLOCK; break; default: - dout("Have unknown lock type %d", lock->fl_type); + dout("Have unknown lock type %d\n", lock->fl_type); err = -EINVAL; } @@ -407,7 +407,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode, int seen_flock = 0; int l = 0; - dout("encoding %d flock and %d fcntl locks", num_flock_locks, + dout("encoding %d flock and %d fcntl locks\n", num_flock_locks, num_fcntl_locks); if (!ctx) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 2e8f90f96540..5ece2e6ad154 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -100,6 +100,26 @@ static int parse_reply_info_in(void **p, void *end, } else info->inline_version = CEPH_INLINE_NONE; + if (features & CEPH_FEATURE_MDS_QUOTA) { + u8 struct_v, struct_compat; + u32 struct_len; + + /* + * both struct_v and struct_compat are expected to be >= 1 + */ + ceph_decode_8_safe(p, end, struct_v, bad); + ceph_decode_8_safe(p, end, struct_compat, bad); + if (!struct_v || !struct_compat) + goto bad; + ceph_decode_32_safe(p, end, struct_len, bad); + ceph_decode_need(p, end, struct_len, bad); + ceph_decode_64_safe(p, end, info->max_bytes, bad); + ceph_decode_64_safe(p, end, info->max_files, bad); + } else { + info->max_bytes = 0; + info->max_files = 0; + } + info->pool_ns_len = 0; info->pool_ns_data = NULL; if (features & CEPH_FEATURE_FS_FILE_LAYOUT_V2) { @@ -384,7 +404,7 @@ static struct ceph_mds_session *get_session(struct ceph_mds_session *s) refcount_read(&s->s_ref)-1, refcount_read(&s->s_ref)); return s; } else { - dout("mdsc get_session %p 0 -- FAIL", s); + dout("mdsc get_session %p 0 -- FAIL\n", s); return NULL; } } @@ -419,9 +439,10 @@ struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc, static bool __have_session(struct ceph_mds_client *mdsc, int mds) { - if (mds >= mdsc->max_sessions) + if (mds >= mdsc->max_sessions || !mdsc->sessions[mds]) return false; - return mdsc->sessions[mds]; + else + return true; } static int __verify_registered_session(struct ceph_mds_client *mdsc, @@ -448,6 +469,25 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, s = kzalloc(sizeof(*s), GFP_NOFS); if (!s) return ERR_PTR(-ENOMEM); + + if (mds >= mdsc->max_sessions) { + int newmax = 1 << get_count_order(mds + 1); + struct ceph_mds_session **sa; + + dout("%s: realloc to %d\n", __func__, newmax); + sa = kcalloc(newmax, sizeof(void *), GFP_NOFS); + if (!sa) + goto fail_realloc; + if (mdsc->sessions) { + memcpy(sa, mdsc->sessions, + mdsc->max_sessions * sizeof(void *)); + kfree(mdsc->sessions); + } + mdsc->sessions = sa; + mdsc->max_sessions = newmax; + } + + dout("%s: mds%d\n", __func__, mds); s->s_mdsc = mdsc; s->s_mds = mds; s->s_state = CEPH_MDS_SESSION_NEW; @@ -476,23 +516,6 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, INIT_LIST_HEAD(&s->s_cap_releases); INIT_LIST_HEAD(&s->s_cap_flushing); - dout("register_session mds%d\n", mds); - if (mds >= mdsc->max_sessions) { - int newmax = 1 << get_count_order(mds+1); - struct ceph_mds_session **sa; - - dout("register_session realloc to %d\n", newmax); - sa = kcalloc(newmax, sizeof(void *), GFP_NOFS); - if (!sa) - goto fail_realloc; - if (mdsc->sessions) { - memcpy(sa, mdsc->sessions, - mdsc->max_sessions * sizeof(void *)); - kfree(mdsc->sessions); - } - mdsc->sessions = sa; - mdsc->max_sessions = newmax; - } mdsc->sessions[mds] = s; atomic_inc(&mdsc->num_sessions); refcount_inc(&s->s_ref); /* one ref to sessions[], one to caller */ @@ -2531,10 +2554,10 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) * Otherwise we just have to return an ESTALE */ if (result == -ESTALE) { - dout("got ESTALE on request %llu", req->r_tid); + dout("got ESTALE on request %llu\n", req->r_tid); req->r_resend_mds = -1; if (req->r_direct_mode != USE_AUTH_MDS) { - dout("not using auth, setting for that now"); + dout("not using auth, setting for that now\n"); req->r_direct_mode = USE_AUTH_MDS; __do_request(mdsc, req); mutex_unlock(&mdsc->mutex); @@ -2542,13 +2565,13 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) } else { int mds = __choose_mds(mdsc, req); if (mds >= 0 && mds != req->r_session->s_mds) { - dout("but auth changed, so resending"); + dout("but auth changed, so resending\n"); __do_request(mdsc, req); mutex_unlock(&mdsc->mutex); goto out; } } - dout("have to return ESTALE on request %llu", req->r_tid); + dout("have to return ESTALE on request %llu\n", req->r_tid); } @@ -3470,13 +3493,12 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, } /* - * drop all leases (and dentry refs) in preparation for umount + * lock unlock sessions, to wait ongoing session activities */ -static void drop_leases(struct ceph_mds_client *mdsc) +static void lock_unlock_sessions(struct ceph_mds_client *mdsc) { int i; - dout("drop_leases\n"); mutex_lock(&mdsc->mutex); for (i = 0; i < mdsc->max_sessions; i++) { struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i); @@ -3572,7 +3594,6 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) if (!mdsc) return -ENOMEM; mdsc->fsc = fsc; - fsc->mdsc = mdsc; mutex_init(&mdsc->mutex); mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); if (!mdsc->mdsmap) { @@ -3580,6 +3601,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) return -ENOMEM; } + fsc->mdsc = mdsc; init_completion(&mdsc->safe_umount_waiters); init_waitqueue_head(&mdsc->session_close_wq); INIT_LIST_HEAD(&mdsc->waiting_for_map); @@ -3587,6 +3609,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) atomic_set(&mdsc->num_sessions, 0); mdsc->max_sessions = 0; mdsc->stopping = 0; + atomic64_set(&mdsc->quotarealms_count, 0); mdsc->last_snap_seq = 0; init_rwsem(&mdsc->snap_rwsem); mdsc->snap_realms = RB_ROOT; @@ -3660,7 +3683,7 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) dout("pre_umount\n"); mdsc->stopping = 1; - drop_leases(mdsc); + lock_unlock_sessions(mdsc); ceph_flush_dirty_caps(mdsc); wait_requests(mdsc); @@ -3858,6 +3881,9 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc) struct ceph_mds_client *mdsc = fsc->mdsc; dout("mdsc_destroy %p\n", mdsc); + if (!mdsc) + return; + /* flush out any connection work with references to us */ ceph_msgr_flush(); @@ -4077,6 +4103,9 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) case CEPH_MSG_CLIENT_LEASE: handle_lease(mdsc, s, msg); break; + case CEPH_MSG_CLIENT_QUOTA: + ceph_handle_quota(mdsc, s, msg); + break; default: pr_err("received unknown message type %d %s\n", type, diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 71e3b783ee6f..2ec3b5b35067 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -49,6 +49,8 @@ struct ceph_mds_reply_info_in { char *inline_data; u32 pool_ns_len; char *pool_ns_data; + u64 max_bytes; + u64 max_files; }; struct ceph_mds_reply_dir_entry { @@ -312,6 +314,8 @@ struct ceph_mds_client { int max_sessions; /* len of s_mds_sessions */ int stopping; /* true if shutting down */ + atomic64_t quotarealms_count; /* # realms with quota */ + /* * snap_rwsem will cover cap linkage into snaprealms, and * realm snap contexts. (later, we can do per-realm snap diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c new file mode 100644 index 000000000000..242bfa5c0539 --- /dev/null +++ b/fs/ceph/quota.c @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * quota.c - CephFS quota + * + * Copyright (C) 2017-2018 SUSE + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/statfs.h> + +#include "super.h" +#include "mds_client.h" + +void ceph_adjust_quota_realms_count(struct inode *inode, bool inc) +{ + struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + if (inc) + atomic64_inc(&mdsc->quotarealms_count); + else + atomic64_dec(&mdsc->quotarealms_count); +} + +static inline bool ceph_has_realms_with_quotas(struct inode *inode) +{ + struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + return atomic64_read(&mdsc->quotarealms_count) > 0; +} + +void ceph_handle_quota(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + struct ceph_msg *msg) +{ + struct super_block *sb = mdsc->fsc->sb; + struct ceph_mds_quota *h = msg->front.iov_base; + struct ceph_vino vino; + struct inode *inode; + struct ceph_inode_info *ci; + + if (msg->front.iov_len != sizeof(*h)) { + pr_err("%s corrupt message mds%d len %d\n", __func__, + session->s_mds, (int)msg->front.iov_len); + ceph_msg_dump(msg); + return; + } + + /* increment msg sequence number */ + mutex_lock(&session->s_mutex); + session->s_seq++; + mutex_unlock(&session->s_mutex); + + /* lookup inode */ + vino.ino = le64_to_cpu(h->ino); + vino.snap = CEPH_NOSNAP; + inode = ceph_find_inode(sb, vino); + if (!inode) { + pr_warn("Failed to find inode %llu\n", vino.ino); + return; + } + ci = ceph_inode(inode); + + spin_lock(&ci->i_ceph_lock); + ci->i_rbytes = le64_to_cpu(h->rbytes); + ci->i_rfiles = le64_to_cpu(h->rfiles); + ci->i_rsubdirs = le64_to_cpu(h->rsubdirs); + __ceph_update_quota(ci, le64_to_cpu(h->max_bytes), + le64_to_cpu(h->max_files)); + spin_unlock(&ci->i_ceph_lock); + + iput(inode); +} + +/* + * This function walks through the snaprealm for an inode and returns the + * ceph_snap_realm for the first snaprealm that has quotas set (either max_files + * or max_bytes). If the root is reached, return the root ceph_snap_realm + * instead. + * + * Note that the caller is responsible for calling ceph_put_snap_realm() on the + * returned realm. + */ +static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, + struct inode *inode) +{ + struct ceph_inode_info *ci = NULL; + struct ceph_snap_realm *realm, *next; + struct inode *in; + bool has_quota; + + if (ceph_snap(inode) != CEPH_NOSNAP) + return NULL; + + realm = ceph_inode(inode)->i_snap_realm; + if (realm) + ceph_get_snap_realm(mdsc, realm); + else + pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) " + "null i_snap_realm\n", ceph_vinop(inode)); + while (realm) { + spin_lock(&realm->inodes_with_caps_lock); + in = realm->inode ? igrab(realm->inode) : NULL; + spin_unlock(&realm->inodes_with_caps_lock); + if (!in) + break; + + ci = ceph_inode(in); + has_quota = __ceph_has_any_quota(ci); + iput(in); + + next = realm->parent; + if (has_quota || !next) + return realm; + + ceph_get_snap_realm(mdsc, next); + ceph_put_snap_realm(mdsc, realm); + realm = next; + } + if (realm) + ceph_put_snap_realm(mdsc, realm); + + return NULL; +} + +bool ceph_quota_is_same_realm(struct inode *old, struct inode *new) +{ + struct ceph_mds_client *mdsc = ceph_inode_to_client(old)->mdsc; + struct ceph_snap_realm *old_realm, *new_realm; + bool is_same; + + down_read(&mdsc->snap_rwsem); + old_realm = get_quota_realm(mdsc, old); + new_realm = get_quota_realm(mdsc, new); + is_same = (old_realm == new_realm); + up_read(&mdsc->snap_rwsem); + + if (old_realm) + ceph_put_snap_realm(mdsc, old_realm); + if (new_realm) + ceph_put_snap_realm(mdsc, new_realm); + + return is_same; +} + +enum quota_check_op { + QUOTA_CHECK_MAX_FILES_OP, /* check quota max_files limit */ + QUOTA_CHECK_MAX_BYTES_OP, /* check quota max_files limit */ + QUOTA_CHECK_MAX_BYTES_APPROACHING_OP /* check if quota max_files + limit is approaching */ +}; + +/* + * check_quota_exceeded() will walk up the snaprealm hierarchy and, for each + * realm, it will execute quota check operation defined by the 'op' parameter. + * The snaprealm walk is interrupted if the quota check detects that the quota + * is exceeded or if the root inode is reached. + */ +static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, + loff_t delta) +{ + struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; + struct ceph_inode_info *ci; + struct ceph_snap_realm *realm, *next; + struct inode *in; + u64 max, rvalue; + bool exceeded = false; + + if (ceph_snap(inode) != CEPH_NOSNAP) + return false; + + down_read(&mdsc->snap_rwsem); + realm = ceph_inode(inode)->i_snap_realm; + if (realm) + ceph_get_snap_realm(mdsc, realm); + else + pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) " + "null i_snap_realm\n", ceph_vinop(inode)); + while (realm) { + spin_lock(&realm->inodes_with_caps_lock); + in = realm->inode ? igrab(realm->inode) : NULL; + spin_unlock(&realm->inodes_with_caps_lock); + if (!in) + break; + + ci = ceph_inode(in); + spin_lock(&ci->i_ceph_lock); + if (op == QUOTA_CHECK_MAX_FILES_OP) { + max = ci->i_max_files; + rvalue = ci->i_rfiles + ci->i_rsubdirs; + } else { + max = ci->i_max_bytes; + rvalue = ci->i_rbytes; + } + spin_unlock(&ci->i_ceph_lock); + switch (op) { + case QUOTA_CHECK_MAX_FILES_OP: + exceeded = (max && (rvalue >= max)); + break; + case QUOTA_CHECK_MAX_BYTES_OP: + exceeded = (max && (rvalue + delta > max)); + break; + case QUOTA_CHECK_MAX_BYTES_APPROACHING_OP: + if (max) { + if (rvalue >= max) + exceeded = true; + else { + /* + * when we're writing more that 1/16th + * of the available space + */ + exceeded = + (((max - rvalue) >> 4) < delta); + } + } + break; + default: + /* Shouldn't happen */ + pr_warn("Invalid quota check op (%d)\n", op); + exceeded = true; /* Just break the loop */ + } + iput(in); + + next = realm->parent; + if (exceeded || !next) + break; + ceph_get_snap_realm(mdsc, next); + ceph_put_snap_realm(mdsc, realm); + realm = next; + } + ceph_put_snap_realm(mdsc, realm); + up_read(&mdsc->snap_rwsem); + + return exceeded; +} + +/* + * ceph_quota_is_max_files_exceeded - check if we can create a new file + * @inode: directory where a new file is being created + * + * This functions returns true is max_files quota allows a new file to be + * created. It is necessary to walk through the snaprealm hierarchy (until the + * FS root) to check all realms with quotas set. + */ +bool ceph_quota_is_max_files_exceeded(struct inode *inode) +{ + if (!ceph_has_realms_with_quotas(inode)) + return false; + + WARN_ON(!S_ISDIR(inode->i_mode)); + + return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 0); +} + +/* + * ceph_quota_is_max_bytes_exceeded - check if we can write to a file + * @inode: inode being written + * @newsize: new size if write succeeds + * + * This functions returns true is max_bytes quota allows a file size to reach + * @newsize; it returns false otherwise. + */ +bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, loff_t newsize) +{ + loff_t size = i_size_read(inode); + + if (!ceph_has_realms_with_quotas(inode)) + return false; + + /* return immediately if we're decreasing file size */ + if (newsize <= size) + return false; + + return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_OP, (newsize - size)); +} + +/* + * ceph_quota_is_max_bytes_approaching - check if we're reaching max_bytes + * @inode: inode being written + * @newsize: new size if write succeeds + * + * This function returns true if the new file size @newsize will be consuming + * more than 1/16th of the available quota space; it returns false otherwise. + */ +bool ceph_quota_is_max_bytes_approaching(struct inode *inode, loff_t newsize) +{ + loff_t size = ceph_inode(inode)->i_reported_size; + + if (!ceph_has_realms_with_quotas(inode)) + return false; + + /* return immediately if we're decreasing file size */ + if (newsize <= size) + return false; + + return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_APPROACHING_OP, + (newsize - size)); +} + +/* + * ceph_quota_update_statfs - if root has quota update statfs with quota status + * @fsc: filesystem client instance + * @buf: statfs to update + * + * If the mounted filesystem root has max_bytes quota set, update the filesystem + * statistics with the quota status. + * + * This function returns true if the stats have been updated, false otherwise. + */ +bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf) +{ + struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_inode_info *ci; + struct ceph_snap_realm *realm; + struct inode *in; + u64 total = 0, used, free; + bool is_updated = false; + + down_read(&mdsc->snap_rwsem); + realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root)); + up_read(&mdsc->snap_rwsem); + if (!realm) + return false; + + spin_lock(&realm->inodes_with_caps_lock); + in = realm->inode ? igrab(realm->inode) : NULL; + spin_unlock(&realm->inodes_with_caps_lock); + if (in) { + ci = ceph_inode(in); + spin_lock(&ci->i_ceph_lock); + if (ci->i_max_bytes) { + total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT; + used = ci->i_rbytes >> CEPH_BLOCK_SHIFT; + /* It is possible for a quota to be exceeded. + * Report 'zero' in that case + */ + free = total > used ? total - used : 0; + } + spin_unlock(&ci->i_ceph_lock); + if (total) { + buf->f_blocks = total; + buf->f_bfree = free; + buf->f_bavail = free; + is_updated = true; + } + iput(in); + } + ceph_put_snap_realm(mdsc, realm); + + return is_updated; +} + diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 07cf95e6413d..041c27ea8de1 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -931,6 +931,8 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, list_add(&ci->i_snap_realm_item, &realm->inodes_with_caps); ci->i_snap_realm = realm; + if (realm->ino == ci->i_vino.ino) + realm->inode = inode; spin_unlock(&realm->inodes_with_caps_lock); spin_unlock(&ci->i_ceph_lock); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index fb2bc9c15a23..b33082e6878f 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -76,9 +76,18 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) */ buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; - buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); - buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); - buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); + + /* + * By default use root quota for stats; fallback to overall filesystem + * usage if using 'noquotadf' mount option or if the root dir doesn't + * have max_bytes quota set. + */ + if (ceph_test_mount_opt(fsc, NOQUOTADF) || + !ceph_quota_update_statfs(fsc, buf)) { + buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); + buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); + buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); + } buf->f_files = le64_to_cpu(st.num_objects); buf->f_ffree = -1; @@ -151,6 +160,8 @@ enum { Opt_acl, #endif Opt_noacl, + Opt_quotadf, + Opt_noquotadf, }; static match_table_t fsopt_tokens = { @@ -187,6 +198,8 @@ static match_table_t fsopt_tokens = { {Opt_acl, "acl"}, #endif {Opt_noacl, "noacl"}, + {Opt_quotadf, "quotadf"}, + {Opt_noquotadf, "noquotadf"}, {-1, NULL} }; @@ -314,13 +327,16 @@ static int parse_fsopt_token(char *c, void *private) break; case Opt_fscache: fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; + kfree(fsopt->fscache_uniq); + fsopt->fscache_uniq = NULL; break; case Opt_nofscache: fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; + kfree(fsopt->fscache_uniq); + fsopt->fscache_uniq = NULL; break; case Opt_poolperm: fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; - printk ("pool perm"); break; case Opt_nopoolperm: fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; @@ -331,6 +347,12 @@ static int parse_fsopt_token(char *c, void *private) case Opt_norequire_active_mds: fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; break; + case Opt_quotadf: + fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF; + break; + case Opt_noquotadf: + fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF; + break; #ifdef CONFIG_CEPH_FS_POSIX_ACL case Opt_acl: fsopt->sb_flags |= SB_POSIXACL; @@ -513,13 +535,12 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) seq_puts(m, ",nodcache"); if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { - if (fsopt->fscache_uniq) - seq_printf(m, ",fsc=%s", fsopt->fscache_uniq); - else - seq_puts(m, ",fsc"); + seq_show_option(m, "fsc", fsopt->fscache_uniq); } if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) seq_puts(m, ",nopoolperm"); + if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF) + seq_puts(m, ",noquotadf"); #ifdef CONFIG_CEPH_FS_POSIX_ACL if (fsopt->sb_flags & SB_POSIXACL) @@ -529,7 +550,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) #endif if (fsopt->mds_namespace) - seq_printf(m, ",mds_namespace=%s", fsopt->mds_namespace); + seq_show_option(m, "mds_namespace", fsopt->mds_namespace); if (fsopt->wsize) seq_printf(m, ",wsize=%d", fsopt->wsize); if (fsopt->rsize != CEPH_MAX_READ_SIZE) @@ -679,6 +700,7 @@ struct kmem_cache *ceph_cap_cachep; struct kmem_cache *ceph_cap_flush_cachep; struct kmem_cache *ceph_dentry_cachep; struct kmem_cache *ceph_file_cachep; +struct kmem_cache *ceph_dir_file_cachep; static void ceph_inode_init_once(void *foo) { @@ -698,8 +720,7 @@ static int __init init_caches(void) if (!ceph_inode_cachep) return -ENOMEM; - ceph_cap_cachep = KMEM_CACHE(ceph_cap, - SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); + ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD); if (!ceph_cap_cachep) goto bad_cap; ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, @@ -716,6 +737,10 @@ static int __init init_caches(void) if (!ceph_file_cachep) goto bad_file; + ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD); + if (!ceph_dir_file_cachep) + goto bad_dir_file; + error = ceph_fscache_register(); if (error) goto bad_fscache; @@ -723,6 +748,8 @@ static int __init init_caches(void) return 0; bad_fscache: + kmem_cache_destroy(ceph_dir_file_cachep); +bad_dir_file: kmem_cache_destroy(ceph_file_cachep); bad_file: kmem_cache_destroy(ceph_dentry_cachep); @@ -748,6 +775,7 @@ static void destroy_caches(void) kmem_cache_destroy(ceph_cap_flush_cachep); kmem_cache_destroy(ceph_dentry_cachep); kmem_cache_destroy(ceph_file_cachep); + kmem_cache_destroy(ceph_dir_file_cachep); ceph_fscache_unregister(); } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 1c2086e0fec2..a7077a0c989f 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -39,6 +39,7 @@ #define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */ #define CEPH_MOUNT_OPT_NOPOOLPERM (1<<11) /* no pool permission check */ #define CEPH_MOUNT_OPT_MOUNTWAIT (1<<12) /* mount waits if no mds is up */ +#define CEPH_MOUNT_OPT_NOQUOTADF (1<<13) /* no root dir quota in statfs */ #define CEPH_MOUNT_OPT_DEFAULT CEPH_MOUNT_OPT_DCACHE @@ -310,6 +311,9 @@ struct ceph_inode_info { u64 i_rbytes, i_rfiles, i_rsubdirs; u64 i_files, i_subdirs; + /* quotas */ + u64 i_max_bytes, i_max_files; + struct rb_root i_fragtree; int i_fragtree_nsplits; struct mutex i_fragtree_mutex; @@ -671,6 +675,10 @@ struct ceph_file_info { spinlock_t rw_contexts_lock; struct list_head rw_contexts; +}; + +struct ceph_dir_file_info { + struct ceph_file_info file_info; /* readdir: position within the dir */ u32 frag; @@ -748,6 +756,7 @@ struct ceph_readdir_cache_control { */ struct ceph_snap_realm { u64 ino; + struct inode *inode; atomic_t nref; struct rb_node node; @@ -1066,4 +1075,37 @@ extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks, extern int ceph_fs_debugfs_init(struct ceph_fs_client *client); extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client); +/* quota.c */ +static inline bool __ceph_has_any_quota(struct ceph_inode_info *ci) +{ + return ci->i_max_files || ci->i_max_bytes; +} + +extern void ceph_adjust_quota_realms_count(struct inode *inode, bool inc); + +static inline void __ceph_update_quota(struct ceph_inode_info *ci, + u64 max_bytes, u64 max_files) +{ + bool had_quota, has_quota; + had_quota = __ceph_has_any_quota(ci); + ci->i_max_bytes = max_bytes; + ci->i_max_files = max_files; + has_quota = __ceph_has_any_quota(ci); + + if (had_quota != has_quota) + ceph_adjust_quota_realms_count(&ci->vfs_inode, has_quota); +} + +extern void ceph_handle_quota(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + struct ceph_msg *msg); +extern bool ceph_quota_is_max_files_exceeded(struct inode *inode); +extern bool ceph_quota_is_same_realm(struct inode *old, struct inode *new); +extern bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, + loff_t newlen); +extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode, + loff_t newlen); +extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, + struct kstatfs *buf); + #endif /* _FS_CEPH_SUPER_H */ diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index e1c4e0b12b4c..7e72348639e4 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -224,6 +224,31 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, (long)ci->i_rctime.tv_nsec); } +/* quotas */ + +static bool ceph_vxattrcb_quota_exists(struct ceph_inode_info *ci) +{ + return (ci->i_max_files || ci->i_max_bytes); +} + +static size_t ceph_vxattrcb_quota(struct ceph_inode_info *ci, char *val, + size_t size) +{ + return snprintf(val, size, "max_bytes=%llu max_files=%llu", + ci->i_max_bytes, ci->i_max_files); +} + +static size_t ceph_vxattrcb_quota_max_bytes(struct ceph_inode_info *ci, + char *val, size_t size) +{ + return snprintf(val, size, "%llu", ci->i_max_bytes); +} + +static size_t ceph_vxattrcb_quota_max_files(struct ceph_inode_info *ci, + char *val, size_t size) +{ + return snprintf(val, size, "%llu", ci->i_max_files); +} #define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name #define CEPH_XATTR_NAME2(_type, _name, _name2) \ @@ -247,6 +272,15 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, .hidden = true, \ .exists_cb = ceph_vxattrcb_layout_exists, \ } +#define XATTR_QUOTA_FIELD(_type, _name) \ + { \ + .name = CEPH_XATTR_NAME(_type, _name), \ + .name_size = sizeof(CEPH_XATTR_NAME(_type, _name)), \ + .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ + .readonly = false, \ + .hidden = true, \ + .exists_cb = ceph_vxattrcb_quota_exists, \ + } static struct ceph_vxattr ceph_dir_vxattrs[] = { { @@ -270,6 +304,16 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = { XATTR_NAME_CEPH(dir, rsubdirs), XATTR_NAME_CEPH(dir, rbytes), XATTR_NAME_CEPH(dir, rctime), + { + .name = "ceph.quota", + .name_size = sizeof("ceph.quota"), + .getxattr_cb = ceph_vxattrcb_quota, + .readonly = false, + .hidden = true, + .exists_cb = ceph_vxattrcb_quota_exists, + }, + XATTR_QUOTA_FIELD(quota, max_bytes), + XATTR_QUOTA_FIELD(quota, max_files), { .name = NULL, 0 } /* Required table terminator */ }; static size_t ceph_dir_vxattrs_name_size; /* total size of all names */ diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 59042d5ac520..3901927cf6a0 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -204,6 +204,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \ CEPH_FEATURE_MSGR_KEEPALIVE2 | \ CEPH_FEATURE_OSD_POOLRESEND | \ + CEPH_FEATURE_MDS_QUOTA | \ CEPH_FEATURE_CRUSH_V4 | \ CEPH_FEATURE_NEW_OSDOP_ENCODING | \ CEPH_FEATURE_SERVER_JEWEL | \ diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 88dd51381aaf..7ecfc88314d8 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -134,6 +134,7 @@ struct ceph_dir_layout { #define CEPH_MSG_CLIENT_LEASE 0x311 #define CEPH_MSG_CLIENT_SNAP 0x312 #define CEPH_MSG_CLIENT_CAPRELEASE 0x313 +#define CEPH_MSG_CLIENT_QUOTA 0x314 /* pool ops */ #define CEPH_MSG_POOLOP_REPLY 48 @@ -807,4 +808,20 @@ struct ceph_mds_snap_realm { } __attribute__ ((packed)); /* followed by my snap list, then prior parent snap list */ +/* + * quotas + */ +struct ceph_mds_quota { + __le64 ino; /* ino */ + struct ceph_timespec rctime; + __le64 rbytes; /* dir stats */ + __le64 rfiles; + __le64 rsubdirs; + __u8 struct_v; /* compat */ + __u8 struct_compat; + __le32 struct_len; + __le64 max_bytes; /* quota max. bytes */ + __le64 max_files; /* quota max. files */ +} __attribute__ ((packed)); + #endif diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index c2ec44cf5098..49c93b9308d7 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -262,6 +262,7 @@ extern struct kmem_cache *ceph_cap_cachep; extern struct kmem_cache *ceph_cap_flush_cachep; extern struct kmem_cache *ceph_dentry_cachep; extern struct kmem_cache *ceph_file_cachep; +extern struct kmem_cache *ceph_dir_file_cachep; /* ceph_common.c */ extern bool libceph_compatible(void *data); diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index ead9d85f1c11..c7dfcb8a1fb2 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -76,6 +76,7 @@ enum ceph_msg_data_type { #ifdef CONFIG_BLOCK CEPH_MSG_DATA_BIO, /* data source/destination is a bio list */ #endif /* CONFIG_BLOCK */ + CEPH_MSG_DATA_BVECS, /* data source/destination is a bio_vec array */ }; static __inline__ bool ceph_msg_data_type_valid(enum ceph_msg_data_type type) @@ -87,22 +88,106 @@ static __inline__ bool ceph_msg_data_type_valid(enum ceph_msg_data_type type) #ifdef CONFIG_BLOCK case CEPH_MSG_DATA_BIO: #endif /* CONFIG_BLOCK */ + case CEPH_MSG_DATA_BVECS: return true; default: return false; } } +#ifdef CONFIG_BLOCK + +struct ceph_bio_iter { + struct bio *bio; + struct bvec_iter iter; +}; + +#define __ceph_bio_iter_advance_step(it, n, STEP) do { \ + unsigned int __n = (n), __cur_n; \ + \ + while (__n) { \ + BUG_ON(!(it)->iter.bi_size); \ + __cur_n = min((it)->iter.bi_size, __n); \ + (void)(STEP); \ + bio_advance_iter((it)->bio, &(it)->iter, __cur_n); \ + if (!(it)->iter.bi_size && (it)->bio->bi_next) { \ + dout("__ceph_bio_iter_advance_step next bio\n"); \ + (it)->bio = (it)->bio->bi_next; \ + (it)->iter = (it)->bio->bi_iter; \ + } \ + __n -= __cur_n; \ + } \ +} while (0) + +/* + * Advance @it by @n bytes. + */ +#define ceph_bio_iter_advance(it, n) \ + __ceph_bio_iter_advance_step(it, n, 0) + +/* + * Advance @it by @n bytes, executing BVEC_STEP for each bio_vec. + */ +#define ceph_bio_iter_advance_step(it, n, BVEC_STEP) \ + __ceph_bio_iter_advance_step(it, n, ({ \ + struct bio_vec bv; \ + struct bvec_iter __cur_iter; \ + \ + __cur_iter = (it)->iter; \ + __cur_iter.bi_size = __cur_n; \ + __bio_for_each_segment(bv, (it)->bio, __cur_iter, __cur_iter) \ + (void)(BVEC_STEP); \ + })) + +#endif /* CONFIG_BLOCK */ + +struct ceph_bvec_iter { + struct bio_vec *bvecs; + struct bvec_iter iter; +}; + +#define __ceph_bvec_iter_advance_step(it, n, STEP) do { \ + BUG_ON((n) > (it)->iter.bi_size); \ + (void)(STEP); \ + bvec_iter_advance((it)->bvecs, &(it)->iter, (n)); \ +} while (0) + +/* + * Advance @it by @n bytes. + */ +#define ceph_bvec_iter_advance(it, n) \ + __ceph_bvec_iter_advance_step(it, n, 0) + +/* + * Advance @it by @n bytes, executing BVEC_STEP for each bio_vec. + */ +#define ceph_bvec_iter_advance_step(it, n, BVEC_STEP) \ + __ceph_bvec_iter_advance_step(it, n, ({ \ + struct bio_vec bv; \ + struct bvec_iter __cur_iter; \ + \ + __cur_iter = (it)->iter; \ + __cur_iter.bi_size = (n); \ + for_each_bvec(bv, (it)->bvecs, __cur_iter, __cur_iter) \ + (void)(BVEC_STEP); \ + })) + +#define ceph_bvec_iter_shorten(it, n) do { \ + BUG_ON((n) > (it)->iter.bi_size); \ + (it)->iter.bi_size = (n); \ +} while (0) + struct ceph_msg_data { struct list_head links; /* ceph_msg->data */ enum ceph_msg_data_type type; union { #ifdef CONFIG_BLOCK struct { - struct bio *bio; - size_t bio_length; + struct ceph_bio_iter bio_pos; + u32 bio_length; }; #endif /* CONFIG_BLOCK */ + struct ceph_bvec_iter bvec_pos; struct { struct page **pages; /* NOT OWNER. */ size_t length; /* total # bytes */ @@ -122,11 +207,9 @@ struct ceph_msg_data_cursor { bool need_crc; /* crc update needed */ union { #ifdef CONFIG_BLOCK - struct { /* bio */ - struct bio *bio; /* bio from list */ - struct bvec_iter bvec_iter; - }; + struct ceph_bio_iter bio_iter; #endif /* CONFIG_BLOCK */ + struct bvec_iter bvec_iter; struct { /* pages */ unsigned int page_offset; /* offset in page */ unsigned short page_index; /* index in array */ @@ -290,9 +373,11 @@ extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, extern void ceph_msg_data_add_pagelist(struct ceph_msg *msg, struct ceph_pagelist *pagelist); #ifdef CONFIG_BLOCK -extern void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio, - size_t length); +void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos, + u32 length); #endif /* CONFIG_BLOCK */ +void ceph_msg_data_add_bvecs(struct ceph_msg *msg, + struct ceph_bvec_iter *bvec_pos); extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, bool can_fail); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 52fb37d1c2a5..528ccc943cee 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -57,6 +57,7 @@ enum ceph_osd_data_type { #ifdef CONFIG_BLOCK CEPH_OSD_DATA_TYPE_BIO, #endif /* CONFIG_BLOCK */ + CEPH_OSD_DATA_TYPE_BVECS, }; struct ceph_osd_data { @@ -72,10 +73,11 @@ struct ceph_osd_data { struct ceph_pagelist *pagelist; #ifdef CONFIG_BLOCK struct { - struct bio *bio; /* list of bios */ - size_t bio_length; /* total in list */ + struct ceph_bio_iter bio_pos; + u32 bio_length; }; #endif /* CONFIG_BLOCK */ + struct ceph_bvec_iter bvec_pos; }; }; @@ -405,10 +407,14 @@ extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *, unsigned int which, struct ceph_pagelist *pagelist); #ifdef CONFIG_BLOCK -extern void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *, - unsigned int which, - struct bio *bio, size_t bio_length); +void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, + unsigned int which, + struct ceph_bio_iter *bio_pos, + u32 bio_length); #endif /* CONFIG_BLOCK */ +void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, + unsigned int which, + struct ceph_bvec_iter *bvec_pos); extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *, unsigned int which, @@ -418,6 +424,9 @@ extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *, struct page **pages, u64 length, u32 alignment, bool pages_from_pool, bool own_pages); +void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req, + unsigned int which, + struct bio_vec *bvecs, u32 bytes); extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *, unsigned int which, struct page **pages, u64 length, diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index d41fad99c0fa..e71fb222c7c3 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -5,7 +5,6 @@ #include <linux/rbtree.h> #include <linux/ceph/types.h> #include <linux/ceph/decode.h> -#include <linux/ceph/ceph_fs.h> #include <linux/crush/crush.h> /* @@ -280,11 +279,6 @@ bool ceph_osds_changed(const struct ceph_osds *old_acting, const struct ceph_osds *new_acting, bool any_change); -/* calculate mapping of a file extent to an object */ -extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, - u64 off, u64 len, - u64 *bno, u64 *oxoff, u64 *oxlen); - int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi, const struct ceph_object_id *oid, const struct ceph_object_locator *oloc, diff --git a/include/linux/ceph/striper.h b/include/linux/ceph/striper.h new file mode 100644 index 000000000000..cbd0d24b7148 --- /dev/null +++ b/include/linux/ceph/striper.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CEPH_STRIPER_H +#define _LINUX_CEPH_STRIPER_H + +#include <linux/list.h> +#include <linux/types.h> + +struct ceph_file_layout; + +void ceph_calc_file_object_mapping(struct ceph_file_layout *l, + u64 off, u64 len, + u64 *objno, u64 *objoff, u32 *xlen); + +struct ceph_object_extent { + struct list_head oe_item; + u64 oe_objno; + u64 oe_off; + u64 oe_len; +}; + +static inline void ceph_object_extent_init(struct ceph_object_extent *ex) +{ + INIT_LIST_HEAD(&ex->oe_item); +} + +/* + * Called for each mapped stripe unit. + * + * @bytes: number of bytes mapped, i.e. the minimum of the full length + * requested (file extent length) or the remainder of the stripe + * unit within an object + */ +typedef void (*ceph_object_extent_fn_t)(struct ceph_object_extent *ex, + u32 bytes, void *arg); + +int ceph_file_to_extents(struct ceph_file_layout *l, u64 off, u64 len, + struct list_head *object_extents, + struct ceph_object_extent *alloc_fn(void *arg), + void *alloc_arg, + ceph_object_extent_fn_t action_fn, + void *action_arg); +int ceph_iterate_extents(struct ceph_file_layout *l, u64 off, u64 len, + struct list_head *object_extents, + ceph_object_extent_fn_t action_fn, + void *action_arg); + +struct ceph_file_extent { + u64 fe_off; + u64 fe_len; +}; + +static inline u64 ceph_file_extents_bytes(struct ceph_file_extent *file_extents, + u32 num_file_extents) +{ + u64 bytes = 0; + u32 i; + + for (i = 0; i < num_file_extents; i++) + bytes += file_extents[i].fe_len; + + return bytes; +} + +int ceph_extent_to_file(struct ceph_file_layout *l, + u64 objno, u64 objoff, u64 objlen, + struct ceph_file_extent **file_extents, + u32 *num_file_extents); + +#endif diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index f838764993eb..861be5cab1df 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -470,7 +470,11 @@ typedef void (*dma_async_tx_callback_result)(void *dma_async_param, const struct dmaengine_result *result); struct dmaengine_unmap_data { +#if IS_ENABLED(CONFIG_DMA_ENGINE_RAID) + u16 map_cnt; +#else u8 map_cnt; +#endif u8 to_cnt; u8 from_cnt; u8 bidi_cnt; diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h index fcdc707eab99..2744cff1b297 100644 --- a/include/linux/platform_data/mlxreg.h +++ b/include/linux/platform_data/mlxreg.h @@ -129,6 +129,8 @@ struct mlxreg_core_platform_data { * @mask: top aggregation interrupt common mask; * @cell_low: location of low aggregation interrupt register; * @mask_low: low aggregation interrupt common mask; + * @deferred_nr: I2C adapter number must be exist prior probing execution; + * @shift_nr: I2C adapter numbers must be incremented by this value; */ struct mlxreg_core_hotplug_platform_data { struct mlxreg_core_item *items; @@ -139,6 +141,8 @@ struct mlxreg_core_hotplug_platform_data { u32 mask; u32 cell_low; u32 mask_low; + int deferred_nr; + int shift_nr; }; #endif /* __LINUX_PLATFORM_DATA_MLXREG_H */ diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 728d421fffe9..d09a9c7af109 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -344,7 +344,7 @@ struct rproc_ops { int (*stop)(struct rproc *rproc); void (*kick)(struct rproc *rproc, int vqid); void * (*da_to_va)(struct rproc *rproc, u64 da, int len); - int (*load_rsc_table)(struct rproc *rproc, const struct firmware *fw); + int (*parse_fw)(struct rproc *rproc, const struct firmware *fw); struct resource_table *(*find_loaded_rsc_table)( struct rproc *rproc, const struct firmware *fw); int (*load)(struct rproc *rproc, const struct firmware *fw); @@ -395,6 +395,21 @@ enum rproc_crash_type { }; /** + * struct rproc_dump_segment - segment info from ELF header + * @node: list node related to the rproc segment list + * @da: device address of the segment + * @size: size of the segment + */ +struct rproc_dump_segment { + struct list_head node; + + dma_addr_t da; + size_t size; + + loff_t offset; +}; + +/** * struct rproc - represents a physical remote processor device * @node: list node of this rproc object * @domain: iommu domain @@ -424,6 +439,7 @@ enum rproc_crash_type { * @cached_table: copy of the resource table * @table_sz: size of @cached_table * @has_iommu: flag to indicate if remote processor is behind an MMU + * @dump_segments: list of segments in the firmware */ struct rproc { struct list_head node; @@ -455,19 +471,21 @@ struct rproc { size_t table_sz; bool has_iommu; bool auto_boot; + struct list_head dump_segments; }; /** * struct rproc_subdev - subdevice tied to a remoteproc * @node: list node related to the rproc subdevs list * @probe: probe function, called as the rproc is started - * @remove: remove function, called as the rproc is stopped + * @remove: remove function, called as the rproc is being stopped, the @crashed + * parameter indicates if this originates from the a recovery */ struct rproc_subdev { struct list_head node; int (*probe)(struct rproc_subdev *subdev); - void (*remove)(struct rproc_subdev *subdev); + void (*remove)(struct rproc_subdev *subdev, bool crashed); }; /* we currently support only two vrings per rvdev */ @@ -534,6 +552,7 @@ void rproc_free(struct rproc *rproc); int rproc_boot(struct rproc *rproc); void rproc_shutdown(struct rproc *rproc); void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type); +int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size); static inline struct rproc_vdev *vdev_to_rvdev(struct virtio_device *vdev) { @@ -550,7 +569,7 @@ static inline struct rproc *vdev_to_rproc(struct virtio_device *vdev) void rproc_add_subdev(struct rproc *rproc, struct rproc_subdev *subdev, int (*probe)(struct rproc_subdev *subdev), - void (*remove)(struct rproc_subdev *subdev)); + void (*remove)(struct rproc_subdev *subdev, bool graceful)); void rproc_remove_subdev(struct rproc *rproc, struct rproc_subdev *subdev); diff --git a/include/linux/soc/qcom/mdt_loader.h b/include/linux/soc/qcom/mdt_loader.h index bd8e0864b059..5b98bbdabc25 100644 --- a/include/linux/soc/qcom/mdt_loader.h +++ b/include/linux/soc/qcom/mdt_loader.h @@ -14,6 +14,7 @@ struct firmware; ssize_t qcom_mdt_get_size(const struct firmware *fw); int qcom_mdt_load(struct device *dev, const struct firmware *fw, const char *fw_name, int pas_id, void *mem_region, - phys_addr_t mem_phys, size_t mem_size); + phys_addr_t mem_phys, size_t mem_size, + phys_addr_t *reloc_base); #endif diff --git a/net/ceph/Makefile b/net/ceph/Makefile index b4bded4b5396..12bf49772d24 100644 --- a/net/ceph/Makefile +++ b/net/ceph/Makefile @@ -8,6 +8,7 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ mon_client.o \ cls_lock_client.o \ osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ + striper.o \ debugfs.o \ auth.o auth_none.o \ crypto.o armor.o \ diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 4adf07826f4a..584fdbef2088 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -72,6 +72,7 @@ const char *ceph_msg_type_name(int type) case CEPH_MSG_MON_GET_VERSION: return "mon_get_version"; case CEPH_MSG_MON_GET_VERSION_REPLY: return "mon_get_version_reply"; case CEPH_MSG_MDS_MAP: return "mds_map"; + case CEPH_MSG_FS_MAP_USER: return "fs_map_user"; case CEPH_MSG_CLIENT_SESSION: return "client_session"; case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect"; case CEPH_MSG_CLIENT_REQUEST: return "client_request"; @@ -79,8 +80,13 @@ const char *ceph_msg_type_name(int type) case CEPH_MSG_CLIENT_REPLY: return "client_reply"; case CEPH_MSG_CLIENT_CAPS: return "client_caps"; case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release"; + case CEPH_MSG_CLIENT_QUOTA: return "client_quota"; case CEPH_MSG_CLIENT_SNAP: return "client_snap"; case CEPH_MSG_CLIENT_LEASE: return "client_lease"; + case CEPH_MSG_POOLOP_REPLY: return "poolop_reply"; + case CEPH_MSG_POOLOP: return "poolop"; + case CEPH_MSG_MON_COMMAND: return "mon_command"; + case CEPH_MSG_MON_COMMAND_ACK: return "mon_command_ack"; case CEPH_MSG_OSD_MAP: return "osd_map"; case CEPH_MSG_OSD_OP: return "osd_op"; case CEPH_MSG_OSD_OPREPLY: return "osd_opreply"; @@ -217,7 +223,7 @@ static int parse_fsid(const char *str, struct ceph_fsid *fsid) if (i == 16) err = 0; - dout("parse_fsid ret %d got fsid %pU", err, fsid); + dout("parse_fsid ret %d got fsid %pU\n", err, fsid); return err; } diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index bf9d079cbafd..02172c408ff2 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -347,10 +347,12 @@ struct key_type key_type_ceph = { .destroy = ceph_key_destroy, }; -int ceph_crypto_init(void) { +int __init ceph_crypto_init(void) +{ return register_key_type(&key_type_ceph); } -void ceph_crypto_shutdown(void) { +void ceph_crypto_shutdown(void) +{ unregister_key_type(&key_type_ceph); } diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 1eef6806aa1a..02952605d121 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -389,7 +389,7 @@ CEPH_DEFINE_SHOW_FUNC(monc_show) CEPH_DEFINE_SHOW_FUNC(osdc_show) CEPH_DEFINE_SHOW_FUNC(client_options_show) -int ceph_debugfs_init(void) +int __init ceph_debugfs_init(void) { ceph_debugfs_dir = debugfs_create_dir("ceph", NULL); if (!ceph_debugfs_dir) @@ -418,7 +418,7 @@ int ceph_debugfs_client_init(struct ceph_client *client) goto out; client->monc.debugfs_file = debugfs_create_file("monc", - 0600, + 0400, client->debugfs_dir, client, &monc_show_fops); @@ -426,7 +426,7 @@ int ceph_debugfs_client_init(struct ceph_client *client) goto out; client->osdc.debugfs_file = debugfs_create_file("osdc", - 0600, + 0400, client->debugfs_dir, client, &osdc_show_fops); @@ -434,7 +434,7 @@ int ceph_debugfs_client_init(struct ceph_client *client) goto out; client->debugfs_monmap = debugfs_create_file("monmap", - 0600, + 0400, client->debugfs_dir, client, &monmap_show_fops); @@ -442,7 +442,7 @@ int ceph_debugfs_client_init(struct ceph_client *client) goto out; client->debugfs_osdmap = debugfs_create_file("osdmap", - 0600, + 0400, client->debugfs_dir, client, &osdmap_show_fops); @@ -450,7 +450,7 @@ int ceph_debugfs_client_init(struct ceph_client *client) goto out; client->debugfs_options = debugfs_create_file("client_options", - 0600, + 0400, client->debugfs_dir, client, &client_options_show_fops); @@ -477,7 +477,7 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client) #else /* CONFIG_DEBUG_FS */ -int ceph_debugfs_init(void) +int __init ceph_debugfs_init(void) { return 0; } @@ -496,6 +496,3 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client) } #endif /* CONFIG_DEBUG_FS */ - -EXPORT_SYMBOL(ceph_debugfs_init); -EXPORT_SYMBOL(ceph_debugfs_cleanup); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 8a4d3758030b..fcb40c12b1f8 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -277,7 +277,7 @@ static void _ceph_msgr_exit(void) ceph_msgr_slab_exit(); } -int ceph_msgr_init(void) +int __init ceph_msgr_init(void) { if (ceph_msgr_slab_init()) return -ENOMEM; @@ -299,7 +299,6 @@ int ceph_msgr_init(void) return -ENOMEM; } -EXPORT_SYMBOL(ceph_msgr_init); void ceph_msgr_exit(void) { @@ -307,7 +306,6 @@ void ceph_msgr_exit(void) _ceph_msgr_exit(); } -EXPORT_SYMBOL(ceph_msgr_exit); void ceph_msgr_flush(void) { @@ -839,93 +837,112 @@ static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor, size_t length) { struct ceph_msg_data *data = cursor->data; - struct bio *bio; + struct ceph_bio_iter *it = &cursor->bio_iter; - BUG_ON(data->type != CEPH_MSG_DATA_BIO); + cursor->resid = min_t(size_t, length, data->bio_length); + *it = data->bio_pos; + if (cursor->resid < it->iter.bi_size) + it->iter.bi_size = cursor->resid; - bio = data->bio; - BUG_ON(!bio); - - cursor->resid = min(length, data->bio_length); - cursor->bio = bio; - cursor->bvec_iter = bio->bi_iter; - cursor->last_piece = - cursor->resid <= bio_iter_len(bio, cursor->bvec_iter); + BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter)); + cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter); } static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor, size_t *page_offset, size_t *length) { - struct ceph_msg_data *data = cursor->data; - struct bio *bio; - struct bio_vec bio_vec; - - BUG_ON(data->type != CEPH_MSG_DATA_BIO); - - bio = cursor->bio; - BUG_ON(!bio); - - bio_vec = bio_iter_iovec(bio, cursor->bvec_iter); - - *page_offset = (size_t) bio_vec.bv_offset; - BUG_ON(*page_offset >= PAGE_SIZE); - if (cursor->last_piece) /* pagelist offset is always 0 */ - *length = cursor->resid; - else - *length = (size_t) bio_vec.bv_len; - BUG_ON(*length > cursor->resid); - BUG_ON(*page_offset + *length > PAGE_SIZE); + struct bio_vec bv = bio_iter_iovec(cursor->bio_iter.bio, + cursor->bio_iter.iter); - return bio_vec.bv_page; + *page_offset = bv.bv_offset; + *length = bv.bv_len; + return bv.bv_page; } static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor, size_t bytes) { - struct bio *bio; - struct bio_vec bio_vec; + struct ceph_bio_iter *it = &cursor->bio_iter; - BUG_ON(cursor->data->type != CEPH_MSG_DATA_BIO); + BUG_ON(bytes > cursor->resid); + BUG_ON(bytes > bio_iter_len(it->bio, it->iter)); + cursor->resid -= bytes; + bio_advance_iter(it->bio, &it->iter, bytes); - bio = cursor->bio; - BUG_ON(!bio); + if (!cursor->resid) { + BUG_ON(!cursor->last_piece); + return false; /* no more data */ + } - bio_vec = bio_iter_iovec(bio, cursor->bvec_iter); + if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done)) + return false; /* more bytes to process in this segment */ - /* Advance the cursor offset */ + if (!it->iter.bi_size) { + it->bio = it->bio->bi_next; + it->iter = it->bio->bi_iter; + if (cursor->resid < it->iter.bi_size) + it->iter.bi_size = cursor->resid; + } - BUG_ON(cursor->resid < bytes); - cursor->resid -= bytes; + BUG_ON(cursor->last_piece); + BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter)); + cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter); + return true; +} +#endif /* CONFIG_BLOCK */ - bio_advance_iter(bio, &cursor->bvec_iter, bytes); +static void ceph_msg_data_bvecs_cursor_init(struct ceph_msg_data_cursor *cursor, + size_t length) +{ + struct ceph_msg_data *data = cursor->data; + struct bio_vec *bvecs = data->bvec_pos.bvecs; - if (bytes < bio_vec.bv_len) - return false; /* more bytes to process in this segment */ + cursor->resid = min_t(size_t, length, data->bvec_pos.iter.bi_size); + cursor->bvec_iter = data->bvec_pos.iter; + cursor->bvec_iter.bi_size = cursor->resid; - /* Move on to the next segment, and possibly the next bio */ + BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter)); + cursor->last_piece = + cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter); +} - if (!cursor->bvec_iter.bi_size) { - bio = bio->bi_next; - cursor->bio = bio; - if (bio) - cursor->bvec_iter = bio->bi_iter; - else - memset(&cursor->bvec_iter, 0, - sizeof(cursor->bvec_iter)); - } +static struct page *ceph_msg_data_bvecs_next(struct ceph_msg_data_cursor *cursor, + size_t *page_offset, + size_t *length) +{ + struct bio_vec bv = bvec_iter_bvec(cursor->data->bvec_pos.bvecs, + cursor->bvec_iter); + + *page_offset = bv.bv_offset; + *length = bv.bv_len; + return bv.bv_page; +} + +static bool ceph_msg_data_bvecs_advance(struct ceph_msg_data_cursor *cursor, + size_t bytes) +{ + struct bio_vec *bvecs = cursor->data->bvec_pos.bvecs; + + BUG_ON(bytes > cursor->resid); + BUG_ON(bytes > bvec_iter_len(bvecs, cursor->bvec_iter)); + cursor->resid -= bytes; + bvec_iter_advance(bvecs, &cursor->bvec_iter, bytes); - if (!cursor->last_piece) { - BUG_ON(!cursor->resid); - BUG_ON(!bio); - /* A short read is OK, so use <= rather than == */ - if (cursor->resid <= bio_iter_len(bio, cursor->bvec_iter)) - cursor->last_piece = true; + if (!cursor->resid) { + BUG_ON(!cursor->last_piece); + return false; /* no more data */ } + if (!bytes || cursor->bvec_iter.bi_bvec_done) + return false; /* more bytes to process in this segment */ + + BUG_ON(cursor->last_piece); + BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter)); + cursor->last_piece = + cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter); return true; } -#endif /* CONFIG_BLOCK */ /* * For a page array, a piece comes from the first page in the array @@ -1110,6 +1127,9 @@ static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor) ceph_msg_data_bio_cursor_init(cursor, length); break; #endif /* CONFIG_BLOCK */ + case CEPH_MSG_DATA_BVECS: + ceph_msg_data_bvecs_cursor_init(cursor, length); + break; case CEPH_MSG_DATA_NONE: default: /* BUG(); */ @@ -1158,14 +1178,19 @@ static struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor, page = ceph_msg_data_bio_next(cursor, page_offset, length); break; #endif /* CONFIG_BLOCK */ + case CEPH_MSG_DATA_BVECS: + page = ceph_msg_data_bvecs_next(cursor, page_offset, length); + break; case CEPH_MSG_DATA_NONE: default: page = NULL; break; } + BUG_ON(!page); BUG_ON(*page_offset + *length > PAGE_SIZE); BUG_ON(!*length); + BUG_ON(*length > cursor->resid); if (last_piece) *last_piece = cursor->last_piece; @@ -1194,6 +1219,9 @@ static void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, new_piece = ceph_msg_data_bio_advance(cursor, bytes); break; #endif /* CONFIG_BLOCK */ + case CEPH_MSG_DATA_BVECS: + new_piece = ceph_msg_data_bvecs_advance(cursor, bytes); + break; case CEPH_MSG_DATA_NONE: default: BUG(); @@ -1575,13 +1603,18 @@ static int write_partial_message_data(struct ceph_connection *con) * been revoked, so use the zero page. */ crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0; - while (cursor->resid) { + while (cursor->total_resid) { struct page *page; size_t page_offset; size_t length; bool last_piece; int ret; + if (!cursor->resid) { + ceph_msg_data_advance(cursor, 0); + continue; + } + page = ceph_msg_data_next(cursor, &page_offset, &length, &last_piece); ret = ceph_tcp_sendpage(con->sock, page, page_offset, @@ -2297,7 +2330,12 @@ static int read_partial_msg_data(struct ceph_connection *con) if (do_datacrc) crc = con->in_data_crc; - while (cursor->resid) { + while (cursor->total_resid) { + if (!cursor->resid) { + ceph_msg_data_advance(cursor, 0); + continue; + } + page = ceph_msg_data_next(cursor, &page_offset, &length, NULL); ret = ceph_tcp_recvpage(con->sock, page, page_offset, length); if (ret <= 0) { @@ -3262,16 +3300,14 @@ void ceph_msg_data_add_pagelist(struct ceph_msg *msg, EXPORT_SYMBOL(ceph_msg_data_add_pagelist); #ifdef CONFIG_BLOCK -void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio, - size_t length) +void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos, + u32 length) { struct ceph_msg_data *data; - BUG_ON(!bio); - data = ceph_msg_data_create(CEPH_MSG_DATA_BIO); BUG_ON(!data); - data->bio = bio; + data->bio_pos = *bio_pos; data->bio_length = length; list_add_tail(&data->links, &msg->data); @@ -3280,6 +3316,20 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio, EXPORT_SYMBOL(ceph_msg_data_add_bio); #endif /* CONFIG_BLOCK */ +void ceph_msg_data_add_bvecs(struct ceph_msg *msg, + struct ceph_bvec_iter *bvec_pos) +{ + struct ceph_msg_data *data; + + data = ceph_msg_data_create(CEPH_MSG_DATA_BVECS); + BUG_ON(!data); + data->bvec_pos = *bvec_pos; + + list_add_tail(&data->links, &msg->data); + msg->data_length += bvec_pos->iter.bi_size; +} +EXPORT_SYMBOL(ceph_msg_data_add_bvecs); + /* * construct a new message with given type, size * the new msg has a ref count of 1. diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 1547107f4854..b3dac24412d3 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -60,7 +60,7 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end) num_mon = ceph_decode_32(&p); ceph_decode_need(&p, end, num_mon*sizeof(m->mon_inst[0]), bad); - if (num_mon >= CEPH_MAX_MON) + if (num_mon > CEPH_MAX_MON) goto bad; m = kmalloc(sizeof(*m) + sizeof(m->mon_inst[0])*num_mon, GFP_NOFS); if (m == NULL) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 2814dba5902d..ea2a6c9fb7ce 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -20,6 +20,7 @@ #include <linux/ceph/decode.h> #include <linux/ceph/auth.h> #include <linux/ceph/pagelist.h> +#include <linux/ceph/striper.h> #define OSD_OPREPLY_FRONT_LEN 512 @@ -103,13 +104,12 @@ static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen, u64 *objnum, u64 *objoff, u64 *objlen) { u64 orig_len = *plen; - int r; + u32 xlen; /* object extent? */ - r = ceph_calc_file_object_mapping(layout, off, orig_len, objnum, - objoff, objlen); - if (r < 0) - return r; + ceph_calc_file_object_mapping(layout, off, orig_len, objnum, + objoff, &xlen); + *objlen = xlen; if (*objlen < orig_len) { *plen = *objlen; dout(" skipping last %llu, final file extent %llu~%llu\n", @@ -117,7 +117,6 @@ static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen, } dout("calc_layout objnum=%llx %llu~%llu\n", *objnum, *objoff, *objlen); - return 0; } @@ -148,14 +147,22 @@ static void ceph_osd_data_pagelist_init(struct ceph_osd_data *osd_data, #ifdef CONFIG_BLOCK static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data, - struct bio *bio, size_t bio_length) + struct ceph_bio_iter *bio_pos, + u32 bio_length) { osd_data->type = CEPH_OSD_DATA_TYPE_BIO; - osd_data->bio = bio; + osd_data->bio_pos = *bio_pos; osd_data->bio_length = bio_length; } #endif /* CONFIG_BLOCK */ +static void ceph_osd_data_bvecs_init(struct ceph_osd_data *osd_data, + struct ceph_bvec_iter *bvec_pos) +{ + osd_data->type = CEPH_OSD_DATA_TYPE_BVECS; + osd_data->bvec_pos = *bvec_pos; +} + #define osd_req_op_data(oreq, whch, typ, fld) \ ({ \ struct ceph_osd_request *__oreq = (oreq); \ @@ -218,16 +225,29 @@ EXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist); #ifdef CONFIG_BLOCK void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, - unsigned int which, struct bio *bio, size_t bio_length) + unsigned int which, + struct ceph_bio_iter *bio_pos, + u32 bio_length) { struct ceph_osd_data *osd_data; osd_data = osd_req_op_data(osd_req, which, extent, osd_data); - ceph_osd_data_bio_init(osd_data, bio, bio_length); + ceph_osd_data_bio_init(osd_data, bio_pos, bio_length); } EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio); #endif /* CONFIG_BLOCK */ +void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, + unsigned int which, + struct ceph_bvec_iter *bvec_pos) +{ + struct ceph_osd_data *osd_data; + + osd_data = osd_req_op_data(osd_req, which, extent, osd_data); + ceph_osd_data_bvecs_init(osd_data, bvec_pos); +} +EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvec_pos); + static void osd_req_op_cls_request_info_pagelist( struct ceph_osd_request *osd_req, unsigned int which, struct ceph_pagelist *pagelist) @@ -265,6 +285,23 @@ void osd_req_op_cls_request_data_pages(struct ceph_osd_request *osd_req, } EXPORT_SYMBOL(osd_req_op_cls_request_data_pages); +void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req, + unsigned int which, + struct bio_vec *bvecs, u32 bytes) +{ + struct ceph_osd_data *osd_data; + struct ceph_bvec_iter it = { + .bvecs = bvecs, + .iter = { .bi_size = bytes }, + }; + + osd_data = osd_req_op_data(osd_req, which, cls, request_data); + ceph_osd_data_bvecs_init(osd_data, &it); + osd_req->r_ops[which].cls.indata_len += bytes; + osd_req->r_ops[which].indata_len += bytes; +} +EXPORT_SYMBOL(osd_req_op_cls_request_data_bvecs); + void osd_req_op_cls_response_data_pages(struct ceph_osd_request *osd_req, unsigned int which, struct page **pages, u64 length, u32 alignment, bool pages_from_pool, bool own_pages) @@ -290,6 +327,8 @@ static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data) case CEPH_OSD_DATA_TYPE_BIO: return (u64)osd_data->bio_length; #endif /* CONFIG_BLOCK */ + case CEPH_OSD_DATA_TYPE_BVECS: + return osd_data->bvec_pos.iter.bi_size; default: WARN(true, "unrecognized data type %d\n", (int)osd_data->type); return 0; @@ -828,8 +867,10 @@ static void ceph_osdc_msg_data_add(struct ceph_msg *msg, ceph_msg_data_add_pagelist(msg, osd_data->pagelist); #ifdef CONFIG_BLOCK } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BIO) { - ceph_msg_data_add_bio(msg, osd_data->bio, length); + ceph_msg_data_add_bio(msg, &osd_data->bio_pos, length); #endif + } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BVECS) { + ceph_msg_data_add_bvecs(msg, &osd_data->bvec_pos); } else { BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE); } @@ -5065,7 +5106,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, } EXPORT_SYMBOL(ceph_osdc_writepages); -int ceph_osdc_setup(void) +int __init ceph_osdc_setup(void) { size_t size = sizeof(struct ceph_osd_request) + CEPH_OSD_SLAB_OPS * sizeof(struct ceph_osd_req_op); @@ -5076,7 +5117,6 @@ int ceph_osdc_setup(void) return ceph_osd_request_cache ? 0 : -ENOMEM; } -EXPORT_SYMBOL(ceph_osdc_setup); void ceph_osdc_cleanup(void) { @@ -5084,7 +5124,6 @@ void ceph_osdc_cleanup(void) kmem_cache_destroy(ceph_osd_request_cache); ceph_osd_request_cache = NULL; } -EXPORT_SYMBOL(ceph_osdc_cleanup); /* * handle incoming message diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 0da27c66349a..9645ffd6acfb 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -4,7 +4,6 @@ #include <linux/module.h> #include <linux/slab.h> -#include <asm/div64.h> #include <linux/ceph/libceph.h> #include <linux/ceph/osdmap.h> @@ -2141,76 +2140,6 @@ bool ceph_osds_changed(const struct ceph_osds *old_acting, } /* - * calculate file layout from given offset, length. - * fill in correct oid, logical length, and object extent - * offset, length. - * - * for now, we write only a single su, until we can - * pass a stride back to the caller. - */ -int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, - u64 off, u64 len, - u64 *ono, - u64 *oxoff, u64 *oxlen) -{ - u32 osize = layout->object_size; - u32 su = layout->stripe_unit; - u32 sc = layout->stripe_count; - u32 bl, stripeno, stripepos, objsetno; - u32 su_per_object; - u64 t, su_offset; - - dout("mapping %llu~%llu osize %u fl_su %u\n", off, len, - osize, su); - if (su == 0 || sc == 0) - goto invalid; - su_per_object = osize / su; - if (su_per_object == 0) - goto invalid; - dout("osize %u / su %u = su_per_object %u\n", osize, su, - su_per_object); - - if ((su & ~PAGE_MASK) != 0) - goto invalid; - - /* bl = *off / su; */ - t = off; - do_div(t, su); - bl = t; - dout("off %llu / su %u = bl %u\n", off, su, bl); - - stripeno = bl / sc; - stripepos = bl % sc; - objsetno = stripeno / su_per_object; - - *ono = objsetno * sc + stripepos; - dout("objset %u * sc %u = ono %u\n", objsetno, sc, (unsigned int)*ono); - - /* *oxoff = *off % layout->fl_stripe_unit; # offset in su */ - t = off; - su_offset = do_div(t, su); - *oxoff = su_offset + (stripeno % su_per_object) * su; - - /* - * Calculate the length of the extent being written to the selected - * object. This is the minimum of the full length requested (len) or - * the remainder of the current stripe being written to. - */ - *oxlen = min_t(u64, len, su - su_offset); - - dout(" obj extent %llu~%llu\n", *oxoff, *oxlen); - return 0; - -invalid: - dout(" invalid layout\n"); - *ono = 0; - *oxoff = 0; - *oxlen = 0; - return -EINVAL; -} -EXPORT_SYMBOL(ceph_calc_file_object_mapping); - -/* * Map an object into a PG. * * Should only be called with target_oid and target_oloc (as opposed to diff --git a/net/ceph/striper.c b/net/ceph/striper.c new file mode 100644 index 000000000000..c36462dc86b7 --- /dev/null +++ b/net/ceph/striper.c @@ -0,0 +1,261 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <linux/ceph/ceph_debug.h> + +#include <linux/math64.h> +#include <linux/slab.h> + +#include <linux/ceph/striper.h> +#include <linux/ceph/types.h> + +/* + * Map a file extent to a stripe unit within an object. + * Fill in objno, offset into object, and object extent length (i.e. the + * number of bytes mapped, less than or equal to @l->stripe_unit). + * + * Example for stripe_count = 3, stripes_per_object = 4: + * + * blockno | 0 3 6 9 | 1 4 7 10 | 2 5 8 11 | 12 15 18 21 | 13 16 19 + * stripeno | 0 1 2 3 | 0 1 2 3 | 0 1 2 3 | 4 5 6 7 | 4 5 6 + * stripepos | 0 | 1 | 2 | 0 | 1 + * objno | 0 | 1 | 2 | 3 | 4 + * objsetno | 0 | 1 + */ +void ceph_calc_file_object_mapping(struct ceph_file_layout *l, + u64 off, u64 len, + u64 *objno, u64 *objoff, u32 *xlen) +{ + u32 stripes_per_object = l->object_size / l->stripe_unit; + u64 blockno; /* which su in the file (i.e. globally) */ + u32 blockoff; /* offset into su */ + u64 stripeno; /* which stripe */ + u32 stripepos; /* which su in the stripe, + which object in the object set */ + u64 objsetno; /* which object set */ + u32 objsetpos; /* which stripe in the object set */ + + blockno = div_u64_rem(off, l->stripe_unit, &blockoff); + stripeno = div_u64_rem(blockno, l->stripe_count, &stripepos); + objsetno = div_u64_rem(stripeno, stripes_per_object, &objsetpos); + + *objno = objsetno * l->stripe_count + stripepos; + *objoff = objsetpos * l->stripe_unit + blockoff; + *xlen = min_t(u64, len, l->stripe_unit - blockoff); +} +EXPORT_SYMBOL(ceph_calc_file_object_mapping); + +/* + * Return the last extent with given objno (@object_extents is sorted + * by objno). If not found, return NULL and set @add_pos so that the + * new extent can be added with list_add(add_pos, new_ex). + */ +static struct ceph_object_extent * +lookup_last(struct list_head *object_extents, u64 objno, + struct list_head **add_pos) +{ + struct list_head *pos; + + list_for_each_prev(pos, object_extents) { + struct ceph_object_extent *ex = + list_entry(pos, typeof(*ex), oe_item); + + if (ex->oe_objno == objno) + return ex; + + if (ex->oe_objno < objno) + break; + } + + *add_pos = pos; + return NULL; +} + +static struct ceph_object_extent * +lookup_containing(struct list_head *object_extents, u64 objno, + u64 objoff, u32 xlen) +{ + struct ceph_object_extent *ex; + + list_for_each_entry(ex, object_extents, oe_item) { + if (ex->oe_objno == objno && + ex->oe_off <= objoff && + ex->oe_off + ex->oe_len >= objoff + xlen) /* paranoia */ + return ex; + + if (ex->oe_objno > objno) + break; + } + + return NULL; +} + +/* + * Map a file extent to a sorted list of object extents. + * + * We want only one (or as few as possible) object extents per object. + * Adjacent object extents will be merged together, each returned object + * extent may reverse map to multiple different file extents. + * + * Call @alloc_fn for each new object extent and @action_fn for each + * mapped stripe unit, whether it was merged into an already allocated + * object extent or started a new object extent. + * + * Newly allocated object extents are added to @object_extents. + * To keep @object_extents sorted, successive calls to this function + * must map successive file extents (i.e. the list of file extents that + * are mapped using the same @object_extents must be sorted). + * + * The caller is responsible for @object_extents. + */ +int ceph_file_to_extents(struct ceph_file_layout *l, u64 off, u64 len, + struct list_head *object_extents, + struct ceph_object_extent *alloc_fn(void *arg), + void *alloc_arg, + ceph_object_extent_fn_t action_fn, + void *action_arg) +{ + struct ceph_object_extent *last_ex, *ex; + + while (len) { + struct list_head *add_pos = NULL; + u64 objno, objoff; + u32 xlen; + + ceph_calc_file_object_mapping(l, off, len, &objno, &objoff, + &xlen); + + last_ex = lookup_last(object_extents, objno, &add_pos); + if (!last_ex || last_ex->oe_off + last_ex->oe_len != objoff) { + ex = alloc_fn(alloc_arg); + if (!ex) + return -ENOMEM; + + ex->oe_objno = objno; + ex->oe_off = objoff; + ex->oe_len = xlen; + if (action_fn) + action_fn(ex, xlen, action_arg); + + if (!last_ex) + list_add(&ex->oe_item, add_pos); + else + list_add(&ex->oe_item, &last_ex->oe_item); + } else { + last_ex->oe_len += xlen; + if (action_fn) + action_fn(last_ex, xlen, action_arg); + } + + off += xlen; + len -= xlen; + } + + for (last_ex = list_first_entry(object_extents, typeof(*ex), oe_item), + ex = list_next_entry(last_ex, oe_item); + &ex->oe_item != object_extents; + last_ex = ex, ex = list_next_entry(ex, oe_item)) { + if (last_ex->oe_objno > ex->oe_objno || + (last_ex->oe_objno == ex->oe_objno && + last_ex->oe_off + last_ex->oe_len >= ex->oe_off)) { + WARN(1, "%s: object_extents list not sorted!\n", + __func__); + return -EINVAL; + } + } + + return 0; +} +EXPORT_SYMBOL(ceph_file_to_extents); + +/* + * A stripped down, non-allocating version of ceph_file_to_extents(), + * for when @object_extents is already populated. + */ +int ceph_iterate_extents(struct ceph_file_layout *l, u64 off, u64 len, + struct list_head *object_extents, + ceph_object_extent_fn_t action_fn, + void *action_arg) +{ + while (len) { + struct ceph_object_extent *ex; + u64 objno, objoff; + u32 xlen; + + ceph_calc_file_object_mapping(l, off, len, &objno, &objoff, + &xlen); + + ex = lookup_containing(object_extents, objno, objoff, xlen); + if (!ex) { + WARN(1, "%s: objno %llu %llu~%u not found!\n", + __func__, objno, objoff, xlen); + return -EINVAL; + } + + action_fn(ex, xlen, action_arg); + + off += xlen; + len -= xlen; + } + + return 0; +} +EXPORT_SYMBOL(ceph_iterate_extents); + +/* + * Reverse map an object extent to a sorted list of file extents. + * + * On success, the caller is responsible for: + * + * kfree(file_extents) + */ +int ceph_extent_to_file(struct ceph_file_layout *l, + u64 objno, u64 objoff, u64 objlen, + struct ceph_file_extent **file_extents, + u32 *num_file_extents) +{ + u32 stripes_per_object = l->object_size / l->stripe_unit; + u64 blockno; /* which su */ + u32 blockoff; /* offset into su */ + u64 stripeno; /* which stripe */ + u32 stripepos; /* which su in the stripe, + which object in the object set */ + u64 objsetno; /* which object set */ + u32 i = 0; + + if (!objlen) { + *file_extents = NULL; + *num_file_extents = 0; + return 0; + } + + *num_file_extents = DIV_ROUND_UP_ULL(objoff + objlen, l->stripe_unit) - + DIV_ROUND_DOWN_ULL(objoff, l->stripe_unit); + *file_extents = kmalloc_array(*num_file_extents, sizeof(**file_extents), + GFP_NOIO); + if (!*file_extents) + return -ENOMEM; + + div_u64_rem(objoff, l->stripe_unit, &blockoff); + while (objlen) { + u64 off, len; + + objsetno = div_u64_rem(objno, l->stripe_count, &stripepos); + stripeno = div_u64(objoff, l->stripe_unit) + + objsetno * stripes_per_object; + blockno = stripeno * l->stripe_count + stripepos; + off = blockno * l->stripe_unit + blockoff; + len = min_t(u64, objlen, l->stripe_unit - blockoff); + + (*file_extents)[i].fe_off = off; + (*file_extents)[i].fe_len = len; + + blockoff = 0; + objoff += len; + objlen -= len; + i++; + } + + BUG_ON(i != *num_file_extents); + return 0; +} +EXPORT_SYMBOL(ceph_extent_to_file); diff --git a/samples/Kconfig b/samples/Kconfig index f524f551718e..3db002b9e1d3 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -62,6 +62,16 @@ config SAMPLE_KDB Build an example of how to dynamically add the hello command to the kdb shell. +config SAMPLE_QMI_CLIENT + tristate "Build qmi client sample -- loadable modules only" + depends on m + depends on ARCH_QCOM + depends on NET + select QCOM_QMI_HELPERS + help + Build an QMI client sample driver, which demonstrates how to + communicate with a remote QRTR service, using QMI encoded messages. + config SAMPLE_RPMSG_CLIENT tristate "Build rpmsg client sample -- loadable modules only" depends on RPMSG && m diff --git a/samples/Makefile b/samples/Makefile index 70cf3758dcf2..bd601c038b86 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -3,4 +3,4 @@ obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ livepatch/ \ hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \ configfs/ connector/ v4l/ trace_printk/ \ - vfio-mdev/ statx/ + vfio-mdev/ statx/ qmi/ diff --git a/samples/qmi/Makefile b/samples/qmi/Makefile new file mode 100644 index 000000000000..2b111d2769df --- /dev/null +++ b/samples/qmi/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_SAMPLE_QMI_CLIENT) += qmi_sample_client.o diff --git a/samples/qmi/qmi_sample_client.c b/samples/qmi/qmi_sample_client.c new file mode 100644 index 000000000000..c9e7276c3d83 --- /dev/null +++ b/samples/qmi/qmi_sample_client.c @@ -0,0 +1,622 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Sample in-kernel QMI client driver + * + * Copyright (c) 2013-2014, The Linux Foundation. All rights reserved. + * Copyright (C) 2017 Linaro Ltd. + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/debugfs.h> +#include <linux/device.h> +#include <linux/platform_device.h> +#include <linux/qrtr.h> +#include <linux/net.h> +#include <linux/completion.h> +#include <linux/idr.h> +#include <linux/string.h> +#include <net/sock.h> +#include <linux/soc/qcom/qmi.h> + +#define PING_REQ1_TLV_TYPE 0x1 +#define PING_RESP1_TLV_TYPE 0x2 +#define PING_OPT1_TLV_TYPE 0x10 +#define PING_OPT2_TLV_TYPE 0x11 + +#define DATA_REQ1_TLV_TYPE 0x1 +#define DATA_RESP1_TLV_TYPE 0x2 +#define DATA_OPT1_TLV_TYPE 0x10 +#define DATA_OPT2_TLV_TYPE 0x11 + +#define TEST_MED_DATA_SIZE_V01 8192 +#define TEST_MAX_NAME_SIZE_V01 255 + +#define TEST_PING_REQ_MSG_ID_V01 0x20 +#define TEST_DATA_REQ_MSG_ID_V01 0x21 + +#define TEST_PING_REQ_MAX_MSG_LEN_V01 266 +#define TEST_DATA_REQ_MAX_MSG_LEN_V01 8456 + +struct test_name_type_v01 { + u32 name_len; + char name[TEST_MAX_NAME_SIZE_V01]; +}; + +static struct qmi_elem_info test_name_type_v01_ei[] = { + { + .data_type = QMI_DATA_LEN, + .elem_len = 1, + .elem_size = sizeof(u8), + .array_type = NO_ARRAY, + .tlv_type = QMI_COMMON_TLV_TYPE, + .offset = offsetof(struct test_name_type_v01, + name_len), + }, + { + .data_type = QMI_UNSIGNED_1_BYTE, + .elem_len = TEST_MAX_NAME_SIZE_V01, + .elem_size = sizeof(char), + .array_type = VAR_LEN_ARRAY, + .tlv_type = QMI_COMMON_TLV_TYPE, + .offset = offsetof(struct test_name_type_v01, + name), + }, + {} +}; + +struct test_ping_req_msg_v01 { + char ping[4]; + + u8 client_name_valid; + struct test_name_type_v01 client_name; +}; + +static struct qmi_elem_info test_ping_req_msg_v01_ei[] = { + { + .data_type = QMI_UNSIGNED_1_BYTE, + .elem_len = 4, + .elem_size = sizeof(char), + .array_type = STATIC_ARRAY, + .tlv_type = PING_REQ1_TLV_TYPE, + .offset = offsetof(struct test_ping_req_msg_v01, + ping), + }, + { + .data_type = QMI_OPT_FLAG, + .elem_len = 1, + .elem_size = sizeof(u8), + .array_type = NO_ARRAY, + .tlv_type = PING_OPT1_TLV_TYPE, + .offset = offsetof(struct test_ping_req_msg_v01, + client_name_valid), + }, + { + .data_type = QMI_STRUCT, + .elem_len = 1, + .elem_size = sizeof(struct test_name_type_v01), + .array_type = NO_ARRAY, + .tlv_type = PING_OPT1_TLV_TYPE, + .offset = offsetof(struct test_ping_req_msg_v01, + client_name), + .ei_array = test_name_type_v01_ei, + }, + {} +}; + +struct test_ping_resp_msg_v01 { + struct qmi_response_type_v01 resp; + + u8 pong_valid; + char pong[4]; + + u8 service_name_valid; + struct test_name_type_v01 service_name; +}; + +static struct qmi_elem_info test_ping_resp_msg_v01_ei[] = { + { + .data_type = QMI_STRUCT, + .elem_len = 1, + .elem_size = sizeof(struct qmi_response_type_v01), + .array_type = NO_ARRAY, + .tlv_type = PING_RESP1_TLV_TYPE, + .offset = offsetof(struct test_ping_resp_msg_v01, + resp), + .ei_array = qmi_response_type_v01_ei, + }, + { + .data_type = QMI_OPT_FLAG, + .elem_len = 1, + .elem_size = sizeof(u8), + .array_type = NO_ARRAY, + .tlv_type = PING_OPT1_TLV_TYPE, + .offset = offsetof(struct test_ping_resp_msg_v01, + pong_valid), + }, + { + .data_type = QMI_UNSIGNED_1_BYTE, + .elem_len = 4, + .elem_size = sizeof(char), + .array_type = STATIC_ARRAY, + .tlv_type = PING_OPT1_TLV_TYPE, + .offset = offsetof(struct test_ping_resp_msg_v01, + pong), + }, + { + .data_type = QMI_OPT_FLAG, + .elem_len = 1, + .elem_size = sizeof(u8), + .array_type = NO_ARRAY, + .tlv_type = PING_OPT2_TLV_TYPE, + .offset = offsetof(struct test_ping_resp_msg_v01, + service_name_valid), + }, + { + .data_type = QMI_STRUCT, + .elem_len = 1, + .elem_size = sizeof(struct test_name_type_v01), + .array_type = NO_ARRAY, + .tlv_type = PING_OPT2_TLV_TYPE, + .offset = offsetof(struct test_ping_resp_msg_v01, + service_name), + .ei_array = test_name_type_v01_ei, + }, + {} +}; + +struct test_data_req_msg_v01 { + u32 data_len; + u8 data[TEST_MED_DATA_SIZE_V01]; + + u8 client_name_valid; + struct test_name_type_v01 client_name; +}; + +static struct qmi_elem_info test_data_req_msg_v01_ei[] = { + { + .data_type = QMI_DATA_LEN, + .elem_len = 1, + .elem_size = sizeof(u32), + .array_type = NO_ARRAY, + .tlv_type = DATA_REQ1_TLV_TYPE, + .offset = offsetof(struct test_data_req_msg_v01, + data_len), + }, + { + .data_type = QMI_UNSIGNED_1_BYTE, + .elem_len = TEST_MED_DATA_SIZE_V01, + .elem_size = sizeof(u8), + .array_type = VAR_LEN_ARRAY, + .tlv_type = DATA_REQ1_TLV_TYPE, + .offset = offsetof(struct test_data_req_msg_v01, + data), + }, + { + .data_type = QMI_OPT_FLAG, + .elem_len = 1, + .elem_size = sizeof(u8), + .array_type = NO_ARRAY, + .tlv_type = DATA_OPT1_TLV_TYPE, + .offset = offsetof(struct test_data_req_msg_v01, + client_name_valid), + }, + { + .data_type = QMI_STRUCT, + .elem_len = 1, + .elem_size = sizeof(struct test_name_type_v01), + .array_type = NO_ARRAY, + .tlv_type = DATA_OPT1_TLV_TYPE, + .offset = offsetof(struct test_data_req_msg_v01, + client_name), + .ei_array = test_name_type_v01_ei, + }, + {} +}; + +struct test_data_resp_msg_v01 { + struct qmi_response_type_v01 resp; + + u8 data_valid; + u32 data_len; + u8 data[TEST_MED_DATA_SIZE_V01]; + + u8 service_name_valid; + struct test_name_type_v01 service_name; +}; + +static struct qmi_elem_info test_data_resp_msg_v01_ei[] = { + { + .data_type = QMI_STRUCT, + .elem_len = 1, + .elem_size = sizeof(struct qmi_response_type_v01), + .array_type = NO_ARRAY, + .tlv_type = DATA_RESP1_TLV_TYPE, + .offset = offsetof(struct test_data_resp_msg_v01, + resp), + .ei_array = qmi_response_type_v01_ei, + }, + { + .data_type = QMI_OPT_FLAG, + .elem_len = 1, + .elem_size = sizeof(u8), + .array_type = NO_ARRAY, + .tlv_type = DATA_OPT1_TLV_TYPE, + .offset = offsetof(struct test_data_resp_msg_v01, + data_valid), + }, + { + .data_type = QMI_DATA_LEN, + .elem_len = 1, + .elem_size = sizeof(u32), + .array_type = NO_ARRAY, + .tlv_type = DATA_OPT1_TLV_TYPE, + .offset = offsetof(struct test_data_resp_msg_v01, + data_len), + }, + { + .data_type = QMI_UNSIGNED_1_BYTE, + .elem_len = TEST_MED_DATA_SIZE_V01, + .elem_size = sizeof(u8), + .array_type = VAR_LEN_ARRAY, + .tlv_type = DATA_OPT1_TLV_TYPE, + .offset = offsetof(struct test_data_resp_msg_v01, + data), + }, + { + .data_type = QMI_OPT_FLAG, + .elem_len = 1, + .elem_size = sizeof(u8), + .array_type = NO_ARRAY, + .tlv_type = DATA_OPT2_TLV_TYPE, + .offset = offsetof(struct test_data_resp_msg_v01, + service_name_valid), + }, + { + .data_type = QMI_STRUCT, + .elem_len = 1, + .elem_size = sizeof(struct test_name_type_v01), + .array_type = NO_ARRAY, + .tlv_type = DATA_OPT2_TLV_TYPE, + .offset = offsetof(struct test_data_resp_msg_v01, + service_name), + .ei_array = test_name_type_v01_ei, + }, + {} +}; + +/* + * ping_write() - ping_pong debugfs file write handler + * @file: debugfs file context + * @user_buf: reference to the user data (ignored) + * @count: number of bytes in @user_buf + * @ppos: offset in @file to write + * + * This function allows user space to send out a ping_pong QMI encoded message + * to the associated remote test service and will return with the result of the + * transaction. It serves as an example of how to provide a custom response + * handler. + * + * Return: @count, or negative errno on failure. + */ +static ssize_t ping_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct qmi_handle *qmi = file->private_data; + struct test_ping_req_msg_v01 req = {}; + struct qmi_txn txn; + int ret; + + memcpy(req.ping, "ping", sizeof(req.ping)); + + ret = qmi_txn_init(qmi, &txn, NULL, NULL); + if (ret < 0) + return ret; + + ret = qmi_send_request(qmi, NULL, &txn, + TEST_PING_REQ_MSG_ID_V01, + TEST_PING_REQ_MAX_MSG_LEN_V01, + test_ping_req_msg_v01_ei, &req); + if (ret < 0) { + qmi_txn_cancel(&txn); + return ret; + } + + ret = qmi_txn_wait(&txn, 5 * HZ); + if (ret < 0) + count = ret; + + return count; +} + +static const struct file_operations ping_fops = { + .open = simple_open, + .write = ping_write, +}; + +static void ping_pong_cb(struct qmi_handle *qmi, struct sockaddr_qrtr *sq, + struct qmi_txn *txn, const void *data) +{ + const struct test_ping_resp_msg_v01 *resp = data; + + if (!txn) { + pr_err("spurious ping response\n"); + return; + } + + if (resp->resp.result == QMI_RESULT_FAILURE_V01) + txn->result = -ENXIO; + else if (!resp->pong_valid || memcmp(resp->pong, "pong", 4)) + txn->result = -EINVAL; + + complete(&txn->completion); +} + +/* + * data_write() - data debugfs file write handler + * @file: debugfs file context + * @user_buf: reference to the user data + * @count: number of bytes in @user_buf + * @ppos: offset in @file to write + * + * This function allows user space to send out a data QMI encoded message to + * the associated remote test service and will return with the result of the + * transaction. It serves as an example of how to have the QMI helpers decode a + * transaction response into a provided object automatically. + * + * Return: @count, or negative errno on failure. + */ +static ssize_t data_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) + +{ + struct qmi_handle *qmi = file->private_data; + struct test_data_resp_msg_v01 *resp; + struct test_data_req_msg_v01 *req; + struct qmi_txn txn; + int ret; + + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return -ENOMEM; + + resp = kzalloc(sizeof(*resp), GFP_KERNEL); + if (!resp) { + kfree(req); + return -ENOMEM; + } + + req->data_len = min_t(size_t, sizeof(req->data), count); + if (copy_from_user(req->data, user_buf, req->data_len)) { + ret = -EFAULT; + goto out; + } + + ret = qmi_txn_init(qmi, &txn, test_data_resp_msg_v01_ei, resp); + if (ret < 0) + goto out; + + ret = qmi_send_request(qmi, NULL, &txn, + TEST_DATA_REQ_MSG_ID_V01, + TEST_DATA_REQ_MAX_MSG_LEN_V01, + test_data_req_msg_v01_ei, req); + if (ret < 0) { + qmi_txn_cancel(&txn); + goto out; + } + + ret = qmi_txn_wait(&txn, 5 * HZ); + if (ret < 0) { + goto out; + } else if (!resp->data_valid || + resp->data_len != req->data_len || + memcmp(resp->data, req->data, req->data_len)) { + pr_err("response data doesn't match expectation\n"); + ret = -EINVAL; + goto out; + } + + ret = count; + +out: + kfree(resp); + kfree(req); + + return ret; +} + +static const struct file_operations data_fops = { + .open = simple_open, + .write = data_write, +}; + +static struct qmi_msg_handler qmi_sample_handlers[] = { + { + .type = QMI_RESPONSE, + .msg_id = TEST_PING_REQ_MSG_ID_V01, + .ei = test_ping_resp_msg_v01_ei, + .decoded_size = sizeof(struct test_ping_req_msg_v01), + .fn = ping_pong_cb + }, + {} +}; + +struct qmi_sample { + struct qmi_handle qmi; + + struct dentry *de_dir; + struct dentry *de_data; + struct dentry *de_ping; +}; + +static struct dentry *qmi_debug_dir; + +static int qmi_sample_probe(struct platform_device *pdev) +{ + struct sockaddr_qrtr *sq; + struct qmi_sample *sample; + char path[20]; + int ret; + + sample = devm_kzalloc(&pdev->dev, sizeof(*sample), GFP_KERNEL); + if (!sample) + return -ENOMEM; + + ret = qmi_handle_init(&sample->qmi, TEST_DATA_REQ_MAX_MSG_LEN_V01, + NULL, + qmi_sample_handlers); + if (ret < 0) + return ret; + + sq = dev_get_platdata(&pdev->dev); + ret = kernel_connect(sample->qmi.sock, (struct sockaddr *)sq, + sizeof(*sq), 0); + if (ret < 0) { + pr_err("failed to connect to remote service port\n"); + goto err_release_qmi_handle; + } + + snprintf(path, sizeof(path), "%d:%d", sq->sq_node, sq->sq_port); + + sample->de_dir = debugfs_create_dir(path, qmi_debug_dir); + if (IS_ERR(sample->de_dir)) { + ret = PTR_ERR(sample->de_dir); + goto err_release_qmi_handle; + } + + sample->de_data = debugfs_create_file("data", 0600, sample->de_dir, + sample, &data_fops); + if (IS_ERR(sample->de_data)) { + ret = PTR_ERR(sample->de_data); + goto err_remove_de_dir; + } + + sample->de_ping = debugfs_create_file("ping", 0600, sample->de_dir, + sample, &ping_fops); + if (IS_ERR(sample->de_ping)) { + ret = PTR_ERR(sample->de_ping); + goto err_remove_de_data; + } + + platform_set_drvdata(pdev, sample); + + return 0; + +err_remove_de_data: + debugfs_remove(sample->de_data); +err_remove_de_dir: + debugfs_remove(sample->de_dir); +err_release_qmi_handle: + qmi_handle_release(&sample->qmi); + + return ret; +} + +static int qmi_sample_remove(struct platform_device *pdev) +{ + struct qmi_sample *sample = platform_get_drvdata(pdev); + + debugfs_remove(sample->de_ping); + debugfs_remove(sample->de_data); + debugfs_remove(sample->de_dir); + + qmi_handle_release(&sample->qmi); + + return 0; +} + +static struct platform_driver qmi_sample_driver = { + .probe = qmi_sample_probe, + .remove = qmi_sample_remove, + .driver = { + .name = "qmi_sample_client", + }, +}; + +static int qmi_sample_new_server(struct qmi_handle *qmi, + struct qmi_service *service) +{ + struct platform_device *pdev; + struct sockaddr_qrtr sq = { AF_QIPCRTR, service->node, service->port }; + int ret; + + pdev = platform_device_alloc("qmi_sample_client", PLATFORM_DEVID_AUTO); + if (!pdev) + return -ENOMEM; + + ret = platform_device_add_data(pdev, &sq, sizeof(sq)); + if (ret) + goto err_put_device; + + ret = platform_device_add(pdev); + if (ret) + goto err_put_device; + + service->priv = pdev; + + return 0; + +err_put_device: + platform_device_put(pdev); + + return ret; +} + +static void qmi_sample_del_server(struct qmi_handle *qmi, + struct qmi_service *service) +{ + struct platform_device *pdev = service->priv; + + platform_device_unregister(pdev); +} + +static struct qmi_handle lookup_client; + +static struct qmi_ops lookup_ops = { + .new_server = qmi_sample_new_server, + .del_server = qmi_sample_del_server, +}; + +static int qmi_sample_init(void) +{ + int ret; + + qmi_debug_dir = debugfs_create_dir("qmi_sample", NULL); + if (IS_ERR(qmi_debug_dir)) { + pr_err("failed to create qmi_sample dir\n"); + return PTR_ERR(qmi_debug_dir); + } + + ret = platform_driver_register(&qmi_sample_driver); + if (ret) + goto err_remove_debug_dir; + + ret = qmi_handle_init(&lookup_client, 0, &lookup_ops, NULL); + if (ret < 0) + goto err_unregister_driver; + + qmi_add_lookup(&lookup_client, 15, 0, 0); + + return 0; + +err_unregister_driver: + platform_driver_unregister(&qmi_sample_driver); +err_remove_debug_dir: + debugfs_remove(qmi_debug_dir); + + return ret; +} + +static void qmi_sample_exit(void) +{ + qmi_handle_release(&lookup_client); + + platform_driver_unregister(&qmi_sample_driver); + + debugfs_remove(qmi_debug_dir); +} + +module_init(qmi_sample_init); +module_exit(qmi_sample_exit); + +MODULE_DESCRIPTION("Sample QMI client driver"); +MODULE_LICENSE("GPL v2"); |