From f315e3fa1cf5b3317fc948708645fff889ce1e63 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Mon, 2 Dec 2013 17:49:41 +0900 Subject: slab: restrict the number of objects in a slab To prepare to implement byte sized index for managing the freelist of a slab, we should restrict the number of objects in a slab to be less or equal to 256, since byte only represent 256 different values. Setting the size of object to value equal or more than newly introduced SLAB_OBJ_MIN_SIZE ensures that the number of objects in a slab is less or equal to 256 for a slab with 1 page. If page size is rather larger than 4096, above assumption would be wrong. In this case, we would fall back on 2 bytes sized index. If minimum size of kmalloc is less than 16, we use it as minimum object size and give up this optimization. Signed-off-by: Joonsoo Kim Signed-off-by: Pekka Enberg --- include/linux/slab.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 9260abdd67df..d015dec02bf3 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -201,6 +201,17 @@ struct kmem_cache { #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 5 #endif + +/* + * This restriction comes from byte sized index implementation. + * Page size is normally 2^12 bytes and, in this case, if we want to use + * byte sized index which can represent 2^8 entries, the size of the object + * should be equal or greater to 2^12 / 2^8 = 2^4 = 16. + * If minimum size of kmalloc is less than 16, we use it as minimum object + * size and give up to use byte sized index. + */ +#define SLAB_OBJ_MIN_SIZE (KMALLOC_SHIFT_LOW < 4 ? \ + (1 << KMALLOC_SHIFT_LOW) : 16) #endif #ifdef CONFIG_SLUB -- cgit v1.2.3-59-g8ed1b From 0f6a928d035b82c0b3aa387d510a73f3e6dbf8e3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 6 Feb 2014 13:25:40 +0200 Subject: acpi-dma: convert to return error code when asked for channel Currently acpi_dma_request_slave_chan_by_index() and acpi_dma_request_slave_chan_by_name() return only requested channel or NULL. This patch converts them to return appropriate error code instead of NULL in case of unsuccessfull request. Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Signed-off-by: Vinod Koul --- drivers/dma/acpi-dma.c | 15 ++++++++------- drivers/dma/dmaengine.c | 9 ++------- include/linux/acpi_dma.h | 5 +++-- 3 files changed, 13 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/acpi-dma.c b/drivers/dma/acpi-dma.c index 1fda371af4b7..de361a156b34 100644 --- a/drivers/dma/acpi-dma.c +++ b/drivers/dma/acpi-dma.c @@ -13,6 +13,7 @@ */ #include +#include #include #include #include @@ -343,7 +344,7 @@ static int acpi_dma_parse_fixed_dma(struct acpi_resource *res, void *data) * @index: index of FixedDMA descriptor for @dev * * Return: - * Pointer to appropriate dma channel on success or NULL on error. + * Pointer to appropriate dma channel on success or an error pointer. */ struct dma_chan *acpi_dma_request_slave_chan_by_index(struct device *dev, size_t index) @@ -358,10 +359,10 @@ struct dma_chan *acpi_dma_request_slave_chan_by_index(struct device *dev, /* Check if the device was enumerated by ACPI */ if (!dev || !ACPI_HANDLE(dev)) - return NULL; + return ERR_PTR(-ENODEV); if (acpi_bus_get_device(ACPI_HANDLE(dev), &adev)) - return NULL; + return ERR_PTR(-ENODEV); memset(&pdata, 0, sizeof(pdata)); pdata.index = index; @@ -376,7 +377,7 @@ struct dma_chan *acpi_dma_request_slave_chan_by_index(struct device *dev, acpi_dev_free_resource_list(&resource_list); if (dma_spec->slave_id < 0 || dma_spec->chan_id < 0) - return NULL; + return ERR_PTR(-ENODEV); mutex_lock(&acpi_dma_lock); @@ -399,7 +400,7 @@ struct dma_chan *acpi_dma_request_slave_chan_by_index(struct device *dev, } mutex_unlock(&acpi_dma_lock); - return chan; + return chan ? chan : ERR_PTR(-EPROBE_DEFER); } EXPORT_SYMBOL_GPL(acpi_dma_request_slave_chan_by_index); @@ -413,7 +414,7 @@ EXPORT_SYMBOL_GPL(acpi_dma_request_slave_chan_by_index); * the first FixedDMA descriptor is TX and second is RX. * * Return: - * Pointer to appropriate dma channel on success or NULL on error. + * Pointer to appropriate dma channel on success or an error pointer. */ struct dma_chan *acpi_dma_request_slave_chan_by_name(struct device *dev, const char *name) @@ -425,7 +426,7 @@ struct dma_chan *acpi_dma_request_slave_chan_by_name(struct device *dev, else if (!strcmp(name, "rx")) index = 1; else - return NULL; + return ERR_PTR(-ENODEV); return acpi_dma_request_slave_chan_by_index(dev, index); } diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index ed610b497518..a886713937fd 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -627,18 +627,13 @@ EXPORT_SYMBOL_GPL(__dma_request_channel); struct dma_chan *dma_request_slave_channel_reason(struct device *dev, const char *name) { - struct dma_chan *chan; - /* If device-tree is present get slave info from here */ if (dev->of_node) return of_dma_request_slave_channel(dev->of_node, name); /* If device was enumerated by ACPI get slave info from here */ - if (ACPI_HANDLE(dev)) { - chan = acpi_dma_request_slave_chan_by_name(dev, name); - if (chan) - return chan; - } + if (ACPI_HANDLE(dev)) + return acpi_dma_request_slave_chan_by_name(dev, name); return ERR_PTR(-ENODEV); } diff --git a/include/linux/acpi_dma.h b/include/linux/acpi_dma.h index fb0298082916..329436d38e66 100644 --- a/include/linux/acpi_dma.h +++ b/include/linux/acpi_dma.h @@ -16,6 +16,7 @@ #include #include +#include #include /** @@ -103,12 +104,12 @@ static inline void devm_acpi_dma_controller_free(struct device *dev) static inline struct dma_chan *acpi_dma_request_slave_chan_by_index( struct device *dev, size_t index) { - return NULL; + return ERR_PTR(-ENODEV); } static inline struct dma_chan *acpi_dma_request_slave_chan_by_name( struct device *dev, const char *name) { - return NULL; + return ERR_PTR(-ENODEV); } #define acpi_dma_simple_xlate NULL -- cgit v1.2.3-59-g8ed1b From 2ea24497a1b30dd03dd42b873fa5097913587f4d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 10 Feb 2014 11:18:39 -0500 Subject: SUNRPC: RPC callbacks may be split across several TCP segments Since TCP is a stream protocol, our callback read code needs to take into account the fact that RPC callbacks are not always confined to a single TCP segment. This patch adds support for multiple TCP segments by ensuring that we only remove the rpc_rqst structure from the 'free backchannel requests' list once the data has been completely received. We rely on the fact that TCP data is ordered for the duration of the connection. Reported-by: shaobingqing Signed-off-by: Trond Myklebust --- include/linux/sunrpc/bc_xprt.h | 3 +- net/sunrpc/backchannel_rqst.c | 93 +++++++++++++++++++++++++++++------------- net/sunrpc/xprtsock.c | 28 ++++--------- 3 files changed, 74 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index 969c0a671dbf..2ca67b55e0fe 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -32,7 +32,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #ifdef CONFIG_SUNRPC_BACKCHANNEL -struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt); +struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid); +void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied); void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index e860d4f7ed2a..3513d559bc45 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -212,39 +212,23 @@ out: } EXPORT_SYMBOL_GPL(xprt_destroy_backchannel); -/* - * One or more rpc_rqst structure have been preallocated during the - * backchannel setup. Buffer space for the send and private XDR buffers - * has been preallocated as well. Use xprt_alloc_bc_request to allocate - * to this request. Use xprt_free_bc_request to return it. - * - * We know that we're called in soft interrupt context, grab the spin_lock - * since there is no need to grab the bottom half spin_lock. - * - * Return an available rpc_rqst, otherwise NULL if non are available. - */ -struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt) +static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid) { - struct rpc_rqst *req; + struct rpc_rqst *req = NULL; dprintk("RPC: allocate a backchannel request\n"); - spin_lock(&xprt->bc_pa_lock); - if (!list_empty(&xprt->bc_pa_list)) { - req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, - rq_bc_pa_list); - list_del(&req->rq_bc_pa_list); - } else { - req = NULL; - } - spin_unlock(&xprt->bc_pa_lock); + if (list_empty(&xprt->bc_pa_list)) + goto not_found; - if (req != NULL) { - set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); - req->rq_reply_bytes_recvd = 0; - req->rq_bytes_sent = 0; - memcpy(&req->rq_private_buf, &req->rq_rcv_buf, + req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, + rq_bc_pa_list); + req->rq_reply_bytes_recvd = 0; + req->rq_bytes_sent = 0; + memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(req->rq_private_buf)); - } + req->rq_xid = xid; + req->rq_connect_cookie = xprt->connect_cookie; +not_found: dprintk("RPC: backchannel req=%p\n", req); return req; } @@ -259,6 +243,7 @@ void xprt_free_bc_request(struct rpc_rqst *req) dprintk("RPC: free backchannel req=%p\n", req); + req->rq_connect_cookie = xprt->connect_cookie - 1; smp_mb__before_clear_bit(); WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); @@ -281,7 +266,57 @@ void xprt_free_bc_request(struct rpc_rqst *req) * may be reused by a new callback request. */ spin_lock_bh(&xprt->bc_pa_lock); - list_add(&req->rq_bc_pa_list, &xprt->bc_pa_list); + list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list); spin_unlock_bh(&xprt->bc_pa_lock); } +/* + * One or more rpc_rqst structure have been preallocated during the + * backchannel setup. Buffer space for the send and private XDR buffers + * has been preallocated as well. Use xprt_alloc_bc_request to allocate + * to this request. Use xprt_free_bc_request to return it. + * + * We know that we're called in soft interrupt context, grab the spin_lock + * since there is no need to grab the bottom half spin_lock. + * + * Return an available rpc_rqst, otherwise NULL if non are available. + */ +struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid) +{ + struct rpc_rqst *req; + + spin_lock(&xprt->bc_pa_lock); + list_for_each_entry(req, &xprt->bc_pa_list, rq_bc_pa_list) { + if (req->rq_connect_cookie != xprt->connect_cookie) + continue; + if (req->rq_xid == xid) + goto found; + } + req = xprt_alloc_bc_request(xprt, xid); +found: + spin_unlock(&xprt->bc_pa_lock); + return req; +} + +/* + * Add callback request to callback list. The callback + * service sleeps on the sv_cb_waitq waiting for new + * requests. Wake it up after adding enqueing the + * request. + */ +void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied) +{ + struct rpc_xprt *xprt = req->rq_xprt; + struct svc_serv *bc_serv = xprt->bc_serv; + + req->rq_private_buf.len = copied; + set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); + + dprintk("RPC: add callback request to list\n"); + spin_lock(&bc_serv->sv_cb_lock); + list_del(&req->rq_bc_pa_list); + list_add(&req->rq_bc_list, &bc_serv->sv_cb_list); + wake_up(&bc_serv->sv_cb_waitq); + spin_unlock(&bc_serv->sv_cb_lock); +} + diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 0addefca8e77..966763d735e9 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1306,41 +1306,29 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, * If we're unable to obtain the rpc_rqst we schedule the closing of the * connection and return -1. */ -static inline int xs_tcp_read_callback(struct rpc_xprt *xprt, +static int xs_tcp_read_callback(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct rpc_rqst *req; - req = xprt_alloc_bc_request(xprt); + /* Look up and lock the request corresponding to the given XID */ + spin_lock(&xprt->transport_lock); + req = xprt_lookup_bc_request(xprt, transport->tcp_xid); if (req == NULL) { + spin_unlock(&xprt->transport_lock); printk(KERN_WARNING "Callback slot table overflowed\n"); xprt_force_disconnect(xprt); return -1; } - req->rq_xid = transport->tcp_xid; dprintk("RPC: read callback XID %08x\n", ntohl(req->rq_xid)); xs_tcp_read_common(xprt, desc, req); - if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) { - struct svc_serv *bc_serv = xprt->bc_serv; - - /* - * Add callback request to callback list. The callback - * service sleeps on the sv_cb_waitq waiting for new - * requests. Wake it up after adding enqueing the - * request. - */ - dprintk("RPC: add callback request to list\n"); - spin_lock(&bc_serv->sv_cb_lock); - list_add(&req->rq_bc_list, &bc_serv->sv_cb_list); - spin_unlock(&bc_serv->sv_cb_lock); - wake_up(&bc_serv->sv_cb_waitq); - } - - req->rq_private_buf.len = transport->tcp_copied; + if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) + xprt_complete_bc_request(req, transport->tcp_copied); + spin_unlock(&xprt->transport_lock); return 0; } -- cgit v1.2.3-59-g8ed1b From 311324ad1713666a6e803aecf0d4e1a136a5b34a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 7 Feb 2014 17:02:08 -0500 Subject: NFS: Be more aggressive in using readdirplus for 'ls -l' situations Try to detect 'ls -l' by having nfs_getattr() look at whether or not there is an opendir() file descriptor for the parent directory. If so, then assume that we want to force use of readdirplus in order to avoid the multiple GETATTR calls over the wire. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 42 ++++++++++++++++++++++++++++++++++++++---- fs/nfs/inode.c | 34 +++++++++++++++++++++++++++------- fs/nfs/internal.h | 1 + include/linux/nfs_fs.h | 1 + 4 files changed, 67 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index be38b573495a..c8e48c26418b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -69,21 +69,28 @@ const struct address_space_operations nfs_dir_aops = { static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, struct rpc_cred *cred) { + struct nfs_inode *nfsi = NFS_I(dir); struct nfs_open_dir_context *ctx; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (ctx != NULL) { ctx->duped = 0; - ctx->attr_gencount = NFS_I(dir)->attr_gencount; + ctx->attr_gencount = nfsi->attr_gencount; ctx->dir_cookie = 0; ctx->dup_cookie = 0; ctx->cred = get_rpccred(cred); + spin_lock(&dir->i_lock); + list_add(&ctx->list, &nfsi->open_files); + spin_unlock(&dir->i_lock); return ctx; } return ERR_PTR(-ENOMEM); } -static void put_nfs_open_dir_context(struct nfs_open_dir_context *ctx) +static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_context *ctx) { + spin_lock(&dir->i_lock); + list_del(&ctx->list); + spin_unlock(&dir->i_lock); put_rpccred(ctx->cred); kfree(ctx); } @@ -126,7 +133,7 @@ out: static int nfs_closedir(struct inode *inode, struct file *filp) { - put_nfs_open_dir_context(filp->private_data); + put_nfs_open_dir_context(filp->f_path.dentry->d_inode, filp->private_data); return 0; } @@ -437,6 +444,22 @@ void nfs_advise_use_readdirplus(struct inode *dir) set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags); } +/* + * This function is mainly for use by nfs_getattr(). + * + * If this is an 'ls -l', we want to force use of readdirplus. + * Do this by checking if there is an active file descriptor + * and calling nfs_advise_use_readdirplus, then forcing a + * cache flush. + */ +void nfs_force_use_readdirplus(struct inode *dir) +{ + if (!list_empty(&NFS_I(dir)->open_files)) { + nfs_advise_use_readdirplus(dir); + nfs_zap_mapping(dir, dir->i_mapping); + } +} + static void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) { @@ -815,6 +838,17 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc) goto out; } +static bool nfs_dir_mapping_need_revalidate(struct inode *dir) +{ + struct nfs_inode *nfsi = NFS_I(dir); + + if (nfs_attribute_cache_expired(dir)) + return true; + if (nfsi->cache_validity & NFS_INO_INVALID_DATA) + return true; + return false; +} + /* The file offset position represents the dirent entry number. A last cookie cache takes care of the common case of reading the whole directory. @@ -847,7 +881,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; nfs_block_sillyrename(dentry); - if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) + if (ctx->pos == 0 || nfs_dir_mapping_need_revalidate(inode)) res = nfs_revalidate_mapping(inode, file->f_mapping); if (res < 0) goto out; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 360114ae8b82..9dbef878a2b2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -588,6 +588,25 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) } EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); +static void nfs_request_parent_use_readdirplus(struct dentry *dentry) +{ + struct dentry *parent; + + parent = dget_parent(dentry); + nfs_force_use_readdirplus(parent->d_inode); + dput(parent); +} + +static bool nfs_need_revalidate_inode(struct inode *inode) +{ + if (NFS_I(inode)->cache_validity & + (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) + return true; + if (nfs_attribute_cache_expired(inode)) + return true; + return false; +} + int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; @@ -616,10 +635,13 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) need_atime = 0; - if (need_atime) - err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); - else - err = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (need_atime || nfs_need_revalidate_inode(inode)) { + struct nfs_server *server = NFS_SERVER(inode); + + if (server->caps & NFS_CAP_READDIRPLUS) + nfs_request_parent_use_readdirplus(dentry); + err = __nfs_revalidate_inode(server, inode); + } if (!err) { generic_fillattr(inode, stat); stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); @@ -961,9 +983,7 @@ int nfs_attribute_cache_expired(struct inode *inode) */ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { - if (!(NFS_I(inode)->cache_validity & - (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) - && !nfs_attribute_cache_expired(inode)) + if (!nfs_need_revalidate_inode(inode)) return NFS_STALE(inode) ? -ESTALE : 0; return __nfs_revalidate_inode(server, inode); } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index fafdddac8271..7f7c476d0c2c 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -300,6 +300,7 @@ extern struct nfs_client *nfs_init_client(struct nfs_client *clp, const char *ip_addr); /* dir.c */ +extern void nfs_force_use_readdirplus(struct inode *dir); extern unsigned long nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc); extern unsigned long nfs_access_cache_scan(struct shrinker *shrink, diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 0ae5807480f4..f55a90bed0b4 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -92,6 +92,7 @@ struct nfs_open_context { }; struct nfs_open_dir_context { + struct list_head list; struct rpc_cred *cred; unsigned long attr_gencount; __u64 dir_cookie; -- cgit v1.2.3-59-g8ed1b From 2501c9179dff2add6aadd3898cd729e94e777d3a Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 30 Oct 2013 01:00:18 +0100 Subject: mmc: core: Use MMC_UNSAFE_RESUME as default behavior Invoking system suspend or shutdown without using the Kconfig option MMC_UNSAFE_RESUME, did trigger an ungraceful power cut of the card. To improve the situation, change the behavior to always make use of the available bus_ops callbacks that handles system suspend and shutdown properly. By changing the behavior MMC_UNSAFE_RESUME becomes redundant, so lets's remove it. Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- drivers/mmc/core/Kconfig | 15 --------------- drivers/mmc/core/core.c | 17 ----------------- drivers/mmc/core/mmc.c | 23 +---------------------- drivers/mmc/core/sd.c | 23 +---------------------- include/linux/mmc/host.h | 5 +---- 5 files changed, 3 insertions(+), 80 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/Kconfig b/drivers/mmc/core/Kconfig index 269d072ef55e..9ebee72d9c3f 100644 --- a/drivers/mmc/core/Kconfig +++ b/drivers/mmc/core/Kconfig @@ -2,21 +2,6 @@ # MMC core configuration # -config MMC_UNSAFE_RESUME - bool "Assume MMC/SD cards are non-removable (DANGEROUS)" - help - If you say Y here, the MMC layer will assume that all cards - stayed in their respective slots during the suspend. The - normal behaviour is to remove them at suspend and - redetecting them at resume. Breaking this assumption will - in most cases result in data corruption. - - This option is usually just for embedded systems which use - a MMC/SD card for rootfs. Most people should say N here. - - This option sets a default which can be overridden by the - module parameter "removable=0" or "removable=1". - config MMC_CLKGATE bool "MMC host clock gating" help diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 098374b1ab2b..10856ec64412 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -64,23 +64,6 @@ static const unsigned freqs[] = { 400000, 300000, 200000, 100000 }; bool use_spi_crc = 1; module_param(use_spi_crc, bool, 0); -/* - * We normally treat cards as removed during suspend if they are not - * known to be on a non-removable bus, to avoid the risk of writing - * back data to a different card after resume. Allow this to be - * overridden if necessary. - */ -#ifdef CONFIG_MMC_UNSAFE_RESUME -bool mmc_assume_removable; -#else -bool mmc_assume_removable = 1; -#endif -EXPORT_SYMBOL(mmc_assume_removable); -module_param_named(removable, mmc_assume_removable, bool, 0644); -MODULE_PARM_DESC( - removable, - "MMC/SD cards are removable and may be removed during suspend"); - /* * Internal function. Schedule delayed work in the MMC work queue. */ diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 98e9eb0f6643..20f046895228 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1634,16 +1634,6 @@ static int mmc_power_restore(struct mmc_host *host) } static const struct mmc_bus_ops mmc_ops = { - .remove = mmc_remove, - .detect = mmc_detect, - .suspend = NULL, - .resume = NULL, - .power_restore = mmc_power_restore, - .alive = mmc_alive, - .shutdown = mmc_shutdown, -}; - -static const struct mmc_bus_ops mmc_ops_unsafe = { .remove = mmc_remove, .detect = mmc_detect, .suspend = mmc_suspend, @@ -1655,17 +1645,6 @@ static const struct mmc_bus_ops mmc_ops_unsafe = { .shutdown = mmc_shutdown, }; -static void mmc_attach_bus_ops(struct mmc_host *host) -{ - const struct mmc_bus_ops *bus_ops; - - if (!mmc_card_is_removable(host)) - bus_ops = &mmc_ops_unsafe; - else - bus_ops = &mmc_ops; - mmc_attach_bus(host, bus_ops); -} - /* * Starting point for MMC card init. */ @@ -1685,7 +1664,7 @@ int mmc_attach_mmc(struct mmc_host *host) if (err) return err; - mmc_attach_bus_ops(host); + mmc_attach_bus(host, &mmc_ops); if (host->ocr_avail_mmc) host->ocr_avail = host->ocr_avail_mmc; diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 692fdb177294..2dd359d2242f 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -1207,16 +1207,6 @@ static int mmc_sd_power_restore(struct mmc_host *host) } static const struct mmc_bus_ops mmc_sd_ops = { - .remove = mmc_sd_remove, - .detect = mmc_sd_detect, - .suspend = NULL, - .resume = NULL, - .power_restore = mmc_sd_power_restore, - .alive = mmc_sd_alive, - .shutdown = mmc_sd_suspend, -}; - -static const struct mmc_bus_ops mmc_sd_ops_unsafe = { .remove = mmc_sd_remove, .detect = mmc_sd_detect, .runtime_suspend = mmc_sd_runtime_suspend, @@ -1228,17 +1218,6 @@ static const struct mmc_bus_ops mmc_sd_ops_unsafe = { .shutdown = mmc_sd_suspend, }; -static void mmc_sd_attach_bus_ops(struct mmc_host *host) -{ - const struct mmc_bus_ops *bus_ops; - - if (!mmc_card_is_removable(host)) - bus_ops = &mmc_sd_ops_unsafe; - else - bus_ops = &mmc_sd_ops; - mmc_attach_bus(host, bus_ops); -} - /* * Starting point for SD card init. */ @@ -1254,7 +1233,7 @@ int mmc_attach_sd(struct mmc_host *host) if (err) return err; - mmc_sd_attach_bus_ops(host); + mmc_attach_bus(host, &mmc_sd_ops); if (host->ocr_avail_sd) host->ocr_avail = host->ocr_avail_sd; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 99f5709ac343..2a139b2bdb9e 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -424,12 +424,9 @@ static inline int mmc_regulator_get_supply(struct mmc_host *mmc) int mmc_pm_notify(struct notifier_block *notify_block, unsigned long, void *); -/* Module parameter */ -extern bool mmc_assume_removable; - static inline int mmc_card_is_removable(struct mmc_host *host) { - return !(host->caps & MMC_CAP_NONREMOVABLE) && mmc_assume_removable; + return !(host->caps & MMC_CAP_NONREMOVABLE); } static inline int mmc_card_keep_power(struct mmc_host *host) -- cgit v1.2.3-59-g8ed1b From a2d1086de6cc3ae2378d9db8b92712911c9e5fef Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 16 Dec 2013 14:37:26 +0100 Subject: mmc: card: Remove host cap MMC_CAP2_SANITIZE There is no need for keeping a host cap for MMC_CAP2_SANITIZE, instead we just make the feature default available. Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- drivers/mmc/card/block.c | 3 +-- include/linux/mmc/host.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 7b5424f398ac..c2187d5e9070 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -415,8 +415,7 @@ static int ioctl_do_sanitize(struct mmc_card *card) { int err; - if (!(mmc_can_sanitize(card) && - (card->host->caps2 & MMC_CAP2_SANITIZE))) { + if (!mmc_can_sanitize(card)) { pr_warn("%s: %s - SANITIZE is not supported\n", mmc_hostname(card->host), __func__); err = -EOPNOTSUPP; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 2a139b2bdb9e..40f832e5db40 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -281,7 +281,6 @@ struct mmc_host { #define MMC_CAP2_PACKED_CMD (MMC_CAP2_PACKED_RD | \ MMC_CAP2_PACKED_WR) #define MMC_CAP2_NO_PRESCAN_POWERUP (1 << 14) /* Don't power up before scan */ -#define MMC_CAP2_SANITIZE (1 << 15) /* Support Sanitize */ mmc_pm_flag_t pm_caps; /* supported pm features */ -- cgit v1.2.3-59-g8ed1b From 325e2f96b926ae852fa2aa5e64d1040ed9518ae1 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 16 Dec 2013 14:43:51 +0100 Subject: mmc: core: Remove unused host cap MMC_CAP2_BROKEN_VOLTAGE Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- include/linux/mmc/host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 40f832e5db40..461c69f19425 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -272,7 +272,6 @@ struct mmc_host { #define MMC_CAP2_HS200_1_2V_SDR (1 << 6) /* can support */ #define MMC_CAP2_HS200 (MMC_CAP2_HS200_1_8V_SDR | \ MMC_CAP2_HS200_1_2V_SDR) -#define MMC_CAP2_BROKEN_VOLTAGE (1 << 7) /* Use the broken voltage */ #define MMC_CAP2_HC_ERASE_SZ (1 << 9) /* High-capacity erase size */ #define MMC_CAP2_CD_ACTIVE_HIGH (1 << 10) /* Card-detect signal active high */ #define MMC_CAP2_RO_ACTIVE_HIGH (1 << 11) /* Write-protect signal active high */ -- cgit v1.2.3-59-g8ed1b From 469a00b017a9b2c630bff962ffd64ba626977830 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 16 Dec 2013 14:46:00 +0100 Subject: mmc: core: Remove support for MMC_CAP2_NO_SLEEP_CMD There are no active users of this host capability. The primary reason for adding this cap was due to a bug in ux500 boot loader code, which is not a relevant issue any more. So, let's remove it. Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- drivers/mmc/core/mmc.c | 3 --- include/linux/mmc/host.h | 1 - 2 files changed, 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 20f046895228..5c4ee6ab2125 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1358,9 +1358,6 @@ static int mmc_sleep(struct mmc_host *host) struct mmc_card *card = host->card; int err; - if (host->caps2 & MMC_CAP2_NO_SLEEP_CMD) - return 0; - err = mmc_deselect_cards(host); if (err) return err; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 461c69f19425..4c0176a8e474 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -267,7 +267,6 @@ struct mmc_host { #define MMC_CAP2_CACHE_CTRL (1 << 1) /* Allow cache control */ #define MMC_CAP2_FULL_PWR_CYCLE (1 << 2) /* Can do full power cycle */ #define MMC_CAP2_NO_MULTI_READ (1 << 3) /* Multiblock reads don't work */ -#define MMC_CAP2_NO_SLEEP_CMD (1 << 4) /* Don't allow sleep command */ #define MMC_CAP2_HS200_1_8V_SDR (1 << 5) /* can support */ #define MMC_CAP2_HS200_1_2V_SDR (1 << 6) /* can support */ #define MMC_CAP2_HS200 (MMC_CAP2_HS200_1_8V_SDR | \ -- cgit v1.2.3-59-g8ed1b From 10e5d9652499a8bc0a99ffc2a96a3030fee576cb Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 16 Dec 2013 16:23:22 +0100 Subject: mmc: core: Use mmc_flush_cache() during mmc suspend Earlier we disabled the cache during suspend, which meant a flush was internally at the eMMC performed as well. To simplify code we can make use of the mmc_flush_cache(), during mmc suspend, which makes the mmc_cache_ctrl() redundant so then we can remove it. Signed-off-by: Ulf Hansson Acked-by: Seungwon Jeon Signed-off-by: Chris Ball --- drivers/mmc/core/core.c | 38 -------------------------------------- drivers/mmc/core/mmc.c | 2 +- include/linux/mmc/host.h | 2 -- 3 files changed, 1 insertion(+), 41 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 22427c684b36..8928f9f4cfe1 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -2582,44 +2582,6 @@ int mmc_flush_cache(struct mmc_card *card) } EXPORT_SYMBOL(mmc_flush_cache); -/* - * Turn the cache ON/OFF. - * Turning the cache OFF shall trigger flushing of the data - * to the non-volatile storage. - * This function should be called with host claimed - */ -int mmc_cache_ctrl(struct mmc_host *host, u8 enable) -{ - struct mmc_card *card = host->card; - unsigned int timeout; - int err = 0; - - if (!(host->caps2 & MMC_CAP2_CACHE_CTRL) || - mmc_card_is_removable(host)) - return err; - - if (card && mmc_card_mmc(card) && - (card->ext_csd.cache_size > 0)) { - enable = !!enable; - - if (card->ext_csd.cache_ctrl ^ enable) { - timeout = enable ? card->ext_csd.generic_cmd6_time : 0; - err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, - EXT_CSD_CACHE_CTRL, enable, timeout); - if (err) - pr_err("%s: cache %s error %d\n", - mmc_hostname(card->host), - enable ? "on" : "off", - err); - else - card->ext_csd.cache_ctrl = enable; - } - } - - return err; -} -EXPORT_SYMBOL(mmc_cache_ctrl); - #ifdef CONFIG_PM /* Do the card removal on suspend if card is assumed removeable diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 5c4ee6ab2125..6d446e217f36 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1481,7 +1481,7 @@ static int _mmc_suspend(struct mmc_host *host, bool is_suspend) goto out; } - err = mmc_cache_ctrl(host, 0); + err = mmc_flush_cache(host->card); if (err) goto out; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 4c0176a8e474..f69bd70b1046 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -385,8 +385,6 @@ int mmc_power_restore_host(struct mmc_host *host); void mmc_detect_change(struct mmc_host *, unsigned long delay); void mmc_request_done(struct mmc_host *, struct mmc_request *); -int mmc_cache_ctrl(struct mmc_host *, u8); - static inline void mmc_signal_sdio_irq(struct mmc_host *host) { host->ops->enable_sdio_irq(host, 0); -- cgit v1.2.3-59-g8ed1b From 7536d3f83aa42ba1a3b1c6b30c2b6d94a820cbb2 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 18 Dec 2013 11:59:17 +0100 Subject: mmc: core: Enable MMC_CAP2_CACHE_CTRL as default There are no reason to why the use of a non-volatile internal eMMC cache should be controlled by a host cap. Instead let's just enable it if the eMMC card supports it. Signed-off-by: Ulf Hansson Acked-by: Seungwon Jeon Signed-off-by: Chris Ball --- drivers/mmc/core/core.c | 4 ---- drivers/mmc/core/mmc.c | 3 +-- include/linux/mmc/host.h | 1 - 3 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 8928f9f4cfe1..f5a068d55c36 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -2562,12 +2562,8 @@ EXPORT_SYMBOL(mmc_power_restore_host); */ int mmc_flush_cache(struct mmc_card *card) { - struct mmc_host *host = card->host; int err = 0; - if (!(host->caps2 & MMC_CAP2_CACHE_CTRL)) - return err; - if (mmc_card_mmc(card) && (card->ext_csd.cache_size > 0) && (card->ext_csd.cache_ctrl & 1)) { diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 6d446e217f36..072171183d5b 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1287,8 +1287,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, * If cache size is higher than 0, this indicates * the existence of cache and it can be turned on. */ - if ((host->caps2 & MMC_CAP2_CACHE_CTRL) && - card->ext_csd.cache_size > 0) { + if (card->ext_csd.cache_size > 0) { err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_CACHE_CTRL, 1, card->ext_csd.generic_cmd6_time); diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index f69bd70b1046..719db89ef134 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -264,7 +264,6 @@ struct mmc_host { u32 caps2; /* More host capabilities */ #define MMC_CAP2_BOOTPART_NOACC (1 << 0) /* Boot partition no access */ -#define MMC_CAP2_CACHE_CTRL (1 << 1) /* Allow cache control */ #define MMC_CAP2_FULL_PWR_CYCLE (1 << 2) /* Can do full power cycle */ #define MMC_CAP2_NO_MULTI_READ (1 << 3) /* Multiblock reads don't work */ #define MMC_CAP2_HS200_1_8V_SDR (1 << 5) /* can support */ -- cgit v1.2.3-59-g8ed1b From dd5720b3006210ecdf4e3c8889e6051f432c4ba3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 12 Feb 2014 11:16:17 +0200 Subject: dma: dw: remove leftovers in the comment blocks There is couple of leftovers in the comment blocks. This patch modifies the comments accordingly. There is no functional change. Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul --- drivers/dma/dw/core.c | 4 ++-- include/linux/dw_dmac.h | 5 +---- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 13ac3f240e79..1b4509712847 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -33,8 +33,8 @@ * of which use ARM any more). See the "Databook" from Synopsys for * information beyond what licensees probably provide. * - * The driver has currently been tested only with the Atmel AT32AP7000, - * which does not support descriptor writeback. + * The driver has been tested with the Atmel AT32AP7000, which does not + * support descriptor writeback. */ static inline bool is_request_line_unset(struct dw_dma_chan *dwc) diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h index 481ab2345d6b..68b4024184de 100644 --- a/include/linux/dw_dmac.h +++ b/include/linux/dw_dmac.h @@ -1,6 +1,5 @@ /* - * Driver for the Synopsys DesignWare DMA Controller (aka DMACA on - * AVR32 systems.) + * Driver for the Synopsys DesignWare DMA Controller * * Copyright (C) 2007 Atmel Corporation * Copyright (C) 2010-2011 ST Microelectronics @@ -44,8 +43,6 @@ struct dw_dma_slave { * @nr_masters: Number of AHB masters supported by the controller * @data_width: Maximum data width supported by hardware per AHB master * (0 - 8bits, 1 - 16bits, ..., 5 - 256bits) - * @sd: slave specific data. Used for configuring channels - * @sd_count: count of slave data structures passed. */ struct dw_dma_platform_data { unsigned int nr_channels; -- cgit v1.2.3-59-g8ed1b From 9107ebbf9652c033eb5dd10a6ea34a132db3cde1 Mon Sep 17 00:00:00 2001 From: Micky Ching Date: Fri, 21 Feb 2014 18:40:35 +0800 Subject: mmc: sdhci: add support for realtek rts5250 Add support for realtek rts5250 pci card reader. The card reader has some problems with DDR50 mode, so add a new quirks2 for broken ddr50. Signed-off-by: Micky Ching Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci-pci.c | 20 ++++++++++++++++++++ drivers/mmc/host/sdhci.c | 3 ++- include/linux/mmc/sdhci.h | 2 ++ 3 files changed, 24 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index 0955777b6c7e..fdc612120362 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -610,6 +610,18 @@ static const struct sdhci_pci_fixes sdhci_via = { .probe = via_probe, }; +static int rtsx_probe_slot(struct sdhci_pci_slot *slot) +{ + slot->host->mmc->caps2 |= MMC_CAP2_HS200; + return 0; +} + +static const struct sdhci_pci_fixes sdhci_rtsx = { + .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN | + SDHCI_QUIRK2_BROKEN_DDR50, + .probe_slot = rtsx_probe_slot, +}; + static const struct pci_device_id pci_ids[] = { { .vendor = PCI_VENDOR_ID_RICOH, @@ -731,6 +743,14 @@ static const struct pci_device_id pci_ids[] = { .driver_data = (kernel_ulong_t)&sdhci_via, }, + { + .vendor = PCI_VENDOR_ID_REALTEK, + .device = 0x5250, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_rtsx, + }, + { .vendor = PCI_VENDOR_ID_INTEL, .device = PCI_DEVICE_ID_INTEL_MRST_SD0, diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 9ddef4763541..8958edc03e04 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -3020,7 +3020,8 @@ int sdhci_add_host(struct sdhci_host *host) } else if (caps[1] & SDHCI_SUPPORT_SDR50) mmc->caps |= MMC_CAP_UHS_SDR50; - if (caps[1] & SDHCI_SUPPORT_DDR50) + if ((caps[1] & SDHCI_SUPPORT_DDR50) && + !(host->quirks2 & SDHCI_QUIRK2_BROKEN_DDR50)) mmc->caps |= MMC_CAP_UHS_DDR50; /* Does the host need tuning for SDR50? */ diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index 362927c48f97..7be12b883485 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -100,6 +100,8 @@ struct sdhci_host { #define SDHCI_QUIRK2_BROKEN_HOST_CONTROL (1<<5) /* Controller does not support HS200 */ #define SDHCI_QUIRK2_BROKEN_HS200 (1<<6) +/* Controller does not support DDR50 */ +#define SDHCI_QUIRK2_BROKEN_DDR50 (1<<7) int irq; /* Device IRQ */ void __iomem *ioaddr; /* Mapped address */ -- cgit v1.2.3-59-g8ed1b From abcc6b2943145ae2e17a52632ccab50cd612914c Mon Sep 17 00:00:00 2001 From: Micky Ching Date: Mon, 17 Feb 2014 16:45:47 +0800 Subject: mmc: rtsx: modify phase searching method for tuning The new phase searching method is more concise and easier to understand. Signed-off-by: Micky Ching Signed-off-by: Chris Ball --- drivers/mmc/host/rtsx_pci_sdmmc.c | 112 +++++++++++--------------------------- include/linux/mfd/rtsx_pci.h | 2 +- 2 files changed, 33 insertions(+), 81 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c index cc80e3119d1d..0b9ded13a3ae 100644 --- a/drivers/mmc/host/rtsx_pci_sdmmc.c +++ b/drivers/mmc/host/rtsx_pci_sdmmc.c @@ -31,16 +31,6 @@ #include #include -/* SD Tuning Data Structure - * Record continuous timing phase path - */ -struct timing_phase_path { - int start; - int end; - int mid; - int len; -}; - struct realtek_pci_sdmmc { struct platform_device *pdev; struct rtsx_pcr *pcr; @@ -511,85 +501,47 @@ static int sd_change_phase(struct realtek_pci_sdmmc *host, return 0; } -static u8 sd_search_final_phase(struct realtek_pci_sdmmc *host, u32 phase_map) +static inline u32 test_phase_bit(u32 phase_map, unsigned int bit) { - struct timing_phase_path path[MAX_PHASE + 1]; - int i, j, cont_path_cnt; - int new_block, max_len, final_path_idx; - u8 final_phase = 0xFF; + bit %= RTSX_PHASE_MAX; + return phase_map & (1 << bit); +} - /* Parse phase_map, take it as a bit-ring */ - cont_path_cnt = 0; - new_block = 1; - j = 0; - for (i = 0; i < MAX_PHASE + 1; i++) { - if (phase_map & (1 << i)) { - if (new_block) { - new_block = 0; - j = cont_path_cnt++; - path[j].start = i; - path[j].end = i; - } else { - path[j].end = i; - } - } else { - new_block = 1; - if (cont_path_cnt) { - /* Calculate path length and middle point */ - int idx = cont_path_cnt - 1; - path[idx].len = - path[idx].end - path[idx].start + 1; - path[idx].mid = - path[idx].start + path[idx].len / 2; - } - } - } +static int sd_get_phase_len(u32 phase_map, unsigned int start_bit) +{ + int i; - if (cont_path_cnt == 0) { - dev_dbg(sdmmc_dev(host), "No continuous phase path\n"); - goto finish; - } else { - /* Calculate last continuous path length and middle point */ - int idx = cont_path_cnt - 1; - path[idx].len = path[idx].end - path[idx].start + 1; - path[idx].mid = path[idx].start + path[idx].len / 2; + for (i = 0; i < RTSX_PHASE_MAX; i++) { + if (test_phase_bit(phase_map, start_bit + i) == 0) + return i; } + return RTSX_PHASE_MAX; +} + +static u8 sd_search_final_phase(struct realtek_pci_sdmmc *host, u32 phase_map) +{ + int start = 0, len = 0; + int start_final = 0, len_final = 0; + u8 final_phase = 0xFF; - /* Connect the first and last continuous paths if they are adjacent */ - if (!path[0].start && (path[cont_path_cnt - 1].end == MAX_PHASE)) { - /* Using negative index */ - path[0].start = path[cont_path_cnt - 1].start - MAX_PHASE - 1; - path[0].len += path[cont_path_cnt - 1].len; - path[0].mid = path[0].start + path[0].len / 2; - /* Convert negative middle point index to positive one */ - if (path[0].mid < 0) - path[0].mid += MAX_PHASE + 1; - cont_path_cnt--; + if (phase_map == 0) { + dev_err(sdmmc_dev(host), "phase error: [map:%x]\n", phase_map); + return final_phase; } - /* Choose the longest continuous phase path */ - max_len = 0; - final_phase = 0; - final_path_idx = 0; - for (i = 0; i < cont_path_cnt; i++) { - if (path[i].len > max_len) { - max_len = path[i].len; - final_phase = (u8)path[i].mid; - final_path_idx = i; + while (start < RTSX_PHASE_MAX) { + len = sd_get_phase_len(phase_map, start); + if (len_final < len) { + start_final = start; + len_final = len; } - - dev_dbg(sdmmc_dev(host), "path[%d].start = %d\n", - i, path[i].start); - dev_dbg(sdmmc_dev(host), "path[%d].end = %d\n", - i, path[i].end); - dev_dbg(sdmmc_dev(host), "path[%d].len = %d\n", - i, path[i].len); - dev_dbg(sdmmc_dev(host), "path[%d].mid = %d\n", - i, path[i].mid); + start += len ? len : 1; } -finish: - dev_dbg(sdmmc_dev(host), "Final chosen phase: %d\n", final_phase); + final_phase = (start_final + len_final / 2) % RTSX_PHASE_MAX; + dev_dbg(sdmmc_dev(host), "phase: [map:%x] [maxlen:%d] [final:%d]\n", + phase_map, len_final, final_phase); + return final_phase; } @@ -635,7 +587,7 @@ static int sd_tuning_phase(struct realtek_pci_sdmmc *host, int err, i; u32 raw_phase_map = 0; - for (i = MAX_PHASE; i >= 0; i--) { + for (i = 0; i < RTSX_PHASE_MAX; i++) { err = sd_tuning_rx_cmd(host, opcode, (u8)i); if (err == 0) raw_phase_map |= 1 << i; diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h index 0ce772105508..a3835976f7c6 100644 --- a/include/linux/mfd/rtsx_pci.h +++ b/include/linux/mfd/rtsx_pci.h @@ -144,7 +144,7 @@ #define HOST_TO_DEVICE 0 #define DEVICE_TO_HOST 1 -#define MAX_PHASE 31 +#define RTSX_PHASE_MAX 32 #define RX_TUNING_CNT 3 /* SG descriptor */ -- cgit v1.2.3-59-g8ed1b From c42deffd5b53c9e583d83c7964854ede2f12410d Mon Sep 17 00:00:00 2001 From: Micky Ching Date: Mon, 17 Feb 2014 16:45:48 +0800 Subject: mmc: rtsx: add support for pre_req and post_req Add support for non-blocking request, pre_req() runs dma_map_sg() and post_req() runs dma_unmap_sg(). This patch can increase card read/write speed, especially for high speed card and slow CPU(for some embedded platform). Users can get a great benefit from this patch. if CPU frequency is 800MHz, SDR104 or DDR50 card read/write speed may increase more than 15%. test results: intel i3(800MHz - 2.3GHz), SD card clock 208MHz performance mode(2.3GHz): Before: dd if=/dev/mmcblk0p1 of=/dev/null bs=64k count=1024 67108864 bytes (67 MB) copied, 1.18191 s, 56.8 MB/s After: dd if=/dev/mmcblk0p1 of=/dev/null bs=64k count=1024 67108864 bytes (67 MB) copied, 1.09276 s, 61.4 MB/s powersave mode(800MHz): Before: dd if=/dev/mmcblk0p1 of=/dev/null bs=64k count=1024 67108864 bytes (67 MB) copied, 1.29569 s, 51.8 MB/s After: dd if=/dev/mmcblk0p1 of=/dev/null bs=64k count=1024 67108864 bytes (67 MB) copied, 1.11218 s, 60.3 MB/s Signed-off-by: Micky Ching Signed-off-by: Chris Ball --- drivers/mfd/rtsx_pcr.c | 132 ++++++++---- drivers/mmc/host/rtsx_pci_sdmmc.c | 418 +++++++++++++++++++++++++++++++------- include/linux/mfd/rtsx_common.h | 1 + include/linux/mfd/rtsx_pci.h | 6 + 4 files changed, 448 insertions(+), 109 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c index 1d15735f9ef9..c9de3d598ea5 100644 --- a/drivers/mfd/rtsx_pcr.c +++ b/drivers/mfd/rtsx_pcr.c @@ -338,58 +338,28 @@ int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist, int num_sg, bool read, int timeout) { struct completion trans_done; - u8 dir; - int err = 0, i, count; + int err = 0, count; long timeleft; unsigned long flags; - struct scatterlist *sg; - enum dma_data_direction dma_dir; - u32 val; - dma_addr_t addr; - unsigned int len; - - dev_dbg(&(pcr->pci->dev), "--> %s: num_sg = %d\n", __func__, num_sg); - - /* don't transfer data during abort processing */ - if (pcr->remove_pci) - return -EINVAL; - - if ((sglist == NULL) || (num_sg <= 0)) - return -EINVAL; - if (read) { - dir = DEVICE_TO_HOST; - dma_dir = DMA_FROM_DEVICE; - } else { - dir = HOST_TO_DEVICE; - dma_dir = DMA_TO_DEVICE; - } - - count = dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dma_dir); + count = rtsx_pci_dma_map_sg(pcr, sglist, num_sg, read); if (count < 1) { dev_err(&(pcr->pci->dev), "scatterlist map failed\n"); return -EINVAL; } dev_dbg(&(pcr->pci->dev), "DMA mapping count: %d\n", count); - val = ((u32)(dir & 0x01) << 29) | TRIG_DMA | ADMA_MODE; - pcr->sgi = 0; - for_each_sg(sglist, sg, count, i) { - addr = sg_dma_address(sg); - len = sg_dma_len(sg); - rtsx_pci_add_sg_tbl(pcr, addr, len, i == count - 1); - } spin_lock_irqsave(&pcr->lock, flags); pcr->done = &trans_done; pcr->trans_result = TRANS_NOT_READY; init_completion(&trans_done); - rtsx_pci_writel(pcr, RTSX_HDBAR, pcr->host_sg_tbl_addr); - rtsx_pci_writel(pcr, RTSX_HDBCTLR, val); spin_unlock_irqrestore(&pcr->lock, flags); + rtsx_pci_dma_transfer(pcr, sglist, count, read); + timeleft = wait_for_completion_interruptible_timeout( &trans_done, msecs_to_jiffies(timeout)); if (timeleft <= 0) { @@ -413,7 +383,7 @@ out: pcr->done = NULL; spin_unlock_irqrestore(&pcr->lock, flags); - dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dma_dir); + rtsx_pci_dma_unmap_sg(pcr, sglist, num_sg, read); if ((err < 0) && (err != -ENODEV)) rtsx_pci_stop_cmd(pcr); @@ -425,6 +395,73 @@ out: } EXPORT_SYMBOL_GPL(rtsx_pci_transfer_data); +int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int num_sg, bool read) +{ + enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE; + + if (pcr->remove_pci) + return -EINVAL; + + if ((sglist == NULL) || num_sg < 1) + return -EINVAL; + + return dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dir); +} +EXPORT_SYMBOL_GPL(rtsx_pci_dma_map_sg); + +int rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int num_sg, bool read) +{ + enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE; + + if (pcr->remove_pci) + return -EINVAL; + + if (sglist == NULL || num_sg < 1) + return -EINVAL; + + dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dir); + return num_sg; +} +EXPORT_SYMBOL_GPL(rtsx_pci_dma_unmap_sg); + +int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int sg_count, bool read) +{ + struct scatterlist *sg; + dma_addr_t addr; + unsigned int len; + int i; + u32 val; + u8 dir = read ? DEVICE_TO_HOST : HOST_TO_DEVICE; + unsigned long flags; + + if (pcr->remove_pci) + return -EINVAL; + + if ((sglist == NULL) || (sg_count < 1)) + return -EINVAL; + + val = ((u32)(dir & 0x01) << 29) | TRIG_DMA | ADMA_MODE; + pcr->sgi = 0; + for_each_sg(sglist, sg, sg_count, i) { + addr = sg_dma_address(sg); + len = sg_dma_len(sg); + rtsx_pci_add_sg_tbl(pcr, addr, len, i == sg_count - 1); + } + + spin_lock_irqsave(&pcr->lock, flags); + + rtsx_pci_writel(pcr, RTSX_HDBAR, pcr->host_sg_tbl_addr); + rtsx_pci_writel(pcr, RTSX_HDBCTLR, val); + + spin_unlock_irqrestore(&pcr->lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_dma_transfer); + int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len) { int err; @@ -836,6 +873,8 @@ static irqreturn_t rtsx_pci_isr(int irq, void *dev_id) int_reg = rtsx_pci_readl(pcr, RTSX_BIPR); /* Clear interrupt flag */ rtsx_pci_writel(pcr, RTSX_BIPR, int_reg); + dev_dbg(&pcr->pci->dev, "=========== BIPR 0x%8x ==========\n", int_reg); + if ((int_reg & pcr->bier) == 0) { spin_unlock(&pcr->lock); return IRQ_NONE; @@ -866,17 +905,28 @@ static irqreturn_t rtsx_pci_isr(int irq, void *dev_id) } if (int_reg & (NEED_COMPLETE_INT | DELINK_INT)) { - if (int_reg & (TRANS_FAIL_INT | DELINK_INT)) { + if (int_reg & (TRANS_FAIL_INT | DELINK_INT)) pcr->trans_result = TRANS_RESULT_FAIL; - if (pcr->done) - complete(pcr->done); - } else if (int_reg & TRANS_OK_INT) { + else if (int_reg & TRANS_OK_INT) pcr->trans_result = TRANS_RESULT_OK; - if (pcr->done) - complete(pcr->done); + + if (pcr->done) + complete(pcr->done); + + if (int_reg & SD_EXIST) { + struct rtsx_slot *slot = &pcr->slots[RTSX_SD_CARD]; + if (slot && slot->done_transfer) + slot->done_transfer(slot->p_dev); + } + + if (int_reg & MS_EXIST) { + struct rtsx_slot *slot = &pcr->slots[RTSX_SD_CARD]; + if (slot && slot->done_transfer) + slot->done_transfer(slot->p_dev); } } + if (pcr->card_inserted || pcr->card_removed) schedule_delayed_work(&pcr->carddet_work, msecs_to_jiffies(200)); diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c index 0b9ded13a3ae..5fb994f9a653 100644 --- a/drivers/mmc/host/rtsx_pci_sdmmc.c +++ b/drivers/mmc/host/rtsx_pci_sdmmc.c @@ -31,14 +31,28 @@ #include #include +struct realtek_next { + unsigned int sg_count; + s32 cookie; +}; + struct realtek_pci_sdmmc { struct platform_device *pdev; struct rtsx_pcr *pcr; struct mmc_host *mmc; struct mmc_request *mrq; - - struct mutex host_mutex; - + struct mmc_command *cmd; + struct mmc_data *data; + + spinlock_t lock; + struct timer_list timer; + struct tasklet_struct cmd_tasklet; + struct tasklet_struct data_tasklet; + struct tasklet_struct finish_tasklet; + + u8 rsp_type; + u8 rsp_len; + int sg_count; u8 ssc_depth; unsigned int clock; bool vpclk; @@ -48,8 +62,13 @@ struct realtek_pci_sdmmc { int power_state; #define SDMMC_POWER_ON 1 #define SDMMC_POWER_OFF 0 + + struct realtek_next next_data; }; +static int sd_start_multi_rw(struct realtek_pci_sdmmc *host, + struct mmc_request *mrq); + static inline struct device *sdmmc_dev(struct realtek_pci_sdmmc *host) { return &(host->pdev->dev); @@ -86,6 +105,95 @@ static void sd_print_debug_regs(struct realtek_pci_sdmmc *host) #define sd_print_debug_regs(host) #endif /* DEBUG */ +static void sd_isr_done_transfer(struct platform_device *pdev) +{ + struct realtek_pci_sdmmc *host = platform_get_drvdata(pdev); + + spin_lock(&host->lock); + if (host->cmd) + tasklet_schedule(&host->cmd_tasklet); + if (host->data) + tasklet_schedule(&host->data_tasklet); + spin_unlock(&host->lock); +} + +static void sd_request_timeout(unsigned long host_addr) +{ + struct realtek_pci_sdmmc *host = (struct realtek_pci_sdmmc *)host_addr; + unsigned long flags; + + spin_lock_irqsave(&host->lock, flags); + + if (!host->mrq) { + dev_err(sdmmc_dev(host), "error: no request exist\n"); + goto out; + } + + if (host->cmd) + host->cmd->error = -ETIMEDOUT; + if (host->data) + host->data->error = -ETIMEDOUT; + + dev_dbg(sdmmc_dev(host), "timeout for request\n"); + +out: + tasklet_schedule(&host->finish_tasklet); + spin_unlock_irqrestore(&host->lock, flags); +} + +static void sd_finish_request(unsigned long host_addr) +{ + struct realtek_pci_sdmmc *host = (struct realtek_pci_sdmmc *)host_addr; + struct rtsx_pcr *pcr = host->pcr; + struct mmc_request *mrq; + struct mmc_command *cmd; + struct mmc_data *data; + unsigned long flags; + bool any_error; + + spin_lock_irqsave(&host->lock, flags); + + del_timer(&host->timer); + mrq = host->mrq; + if (!mrq) { + dev_err(sdmmc_dev(host), "error: no request need finish\n"); + goto out; + } + + cmd = mrq->cmd; + data = mrq->data; + + any_error = (mrq->sbc && mrq->sbc->error) || + (mrq->stop && mrq->stop->error) || + (cmd && cmd->error) || (data && data->error); + + if (any_error) { + rtsx_pci_stop_cmd(pcr); + sd_clear_error(host); + } + + if (data) { + if (any_error) + data->bytes_xfered = 0; + else + data->bytes_xfered = data->blocks * data->blksz; + + if (!data->host_cookie) + rtsx_pci_dma_unmap_sg(pcr, data->sg, data->sg_len, + data->flags & MMC_DATA_READ); + + } + + host->mrq = NULL; + host->cmd = NULL; + host->data = NULL; + +out: + spin_unlock_irqrestore(&host->lock, flags); + mutex_unlock(&pcr->pcr_mutex); + mmc_request_done(host->mmc, mrq); +} + static int sd_read_data(struct realtek_pci_sdmmc *host, u8 *cmd, u16 byte_cnt, u8 *buf, int buf_len, int timeout) { @@ -203,8 +311,7 @@ static int sd_write_data(struct realtek_pci_sdmmc *host, u8 *cmd, u16 byte_cnt, return 0; } -static void sd_send_cmd_get_rsp(struct realtek_pci_sdmmc *host, - struct mmc_command *cmd) +static void sd_send_cmd(struct realtek_pci_sdmmc *host, struct mmc_command *cmd) { struct rtsx_pcr *pcr = host->pcr; u8 cmd_idx = (u8)cmd->opcode; @@ -212,11 +319,14 @@ static void sd_send_cmd_get_rsp(struct realtek_pci_sdmmc *host, int err = 0; int timeout = 100; int i; - u8 *ptr; - int stat_idx = 0; u8 rsp_type; int rsp_len = 5; - bool clock_toggled = false; + unsigned long flags; + + if (host->cmd) + dev_err(sdmmc_dev(host), "error: cmd already exist\n"); + + host->cmd = cmd; dev_dbg(sdmmc_dev(host), "%s: SD/MMC CMD %d, arg = 0x%08x\n", __func__, cmd_idx, arg); @@ -251,6 +361,8 @@ static void sd_send_cmd_get_rsp(struct realtek_pci_sdmmc *host, err = -EINVAL; goto out; } + host->rsp_type = rsp_type; + host->rsp_len = rsp_len; if (rsp_type == SD_RSP_TYPE_R1b) timeout = 3000; @@ -260,8 +372,6 @@ static void sd_send_cmd_get_rsp(struct realtek_pci_sdmmc *host, 0xFF, SD_CLK_TOGGLE_EN); if (err < 0) goto out; - - clock_toggled = true; } rtsx_pci_init_cmd(pcr); @@ -285,25 +395,60 @@ static void sd_send_cmd_get_rsp(struct realtek_pci_sdmmc *host, /* Read data from ping-pong buffer */ for (i = PPBUF_BASE2; i < PPBUF_BASE2 + 16; i++) rtsx_pci_add_cmd(pcr, READ_REG_CMD, (u16)i, 0, 0); - stat_idx = 16; } else if (rsp_type != SD_RSP_TYPE_R0) { /* Read data from SD_CMDx registers */ for (i = SD_CMD0; i <= SD_CMD4; i++) rtsx_pci_add_cmd(pcr, READ_REG_CMD, (u16)i, 0, 0); - stat_idx = 5; } rtsx_pci_add_cmd(pcr, READ_REG_CMD, SD_STAT1, 0, 0); - err = rtsx_pci_send_cmd(pcr, timeout); - if (err < 0) { - sd_print_debug_regs(host); - sd_clear_error(host); - dev_dbg(sdmmc_dev(host), - "rtsx_pci_send_cmd error (err = %d)\n", err); + mod_timer(&host->timer, jiffies + msecs_to_jiffies(timeout)); + + spin_lock_irqsave(&pcr->lock, flags); + pcr->trans_result = TRANS_NOT_READY; + rtsx_pci_send_cmd_no_wait(pcr); + spin_unlock_irqrestore(&pcr->lock, flags); + + return; + +out: + cmd->error = err; + tasklet_schedule(&host->finish_tasklet); +} + +static void sd_get_rsp(unsigned long host_addr) +{ + struct realtek_pci_sdmmc *host = (struct realtek_pci_sdmmc *)host_addr; + struct rtsx_pcr *pcr = host->pcr; + struct mmc_command *cmd; + int i, err = 0, stat_idx; + u8 *ptr, rsp_type; + unsigned long flags; + + spin_lock_irqsave(&host->lock, flags); + + cmd = host->cmd; + host->cmd = NULL; + + if (!cmd) { + dev_err(sdmmc_dev(host), "error: cmd not exist\n"); goto out; } + spin_lock(&pcr->lock); + if (pcr->trans_result == TRANS_NO_DEVICE) + err = -ENODEV; + else if (pcr->trans_result != TRANS_RESULT_OK) + err = -EINVAL; + spin_unlock(&pcr->lock); + + if (err < 0) + goto out; + + rsp_type = host->rsp_type; + stat_idx = host->rsp_len; + if (rsp_type == SD_RSP_TYPE_R0) { err = 0; goto out; @@ -340,26 +485,106 @@ static void sd_send_cmd_get_rsp(struct realtek_pci_sdmmc *host, cmd->resp[0]); } + if (cmd == host->mrq->sbc) { + sd_send_cmd(host, host->mrq->cmd); + spin_unlock_irqrestore(&host->lock, flags); + return; + } + + if (cmd == host->mrq->stop) + goto out; + + if (cmd->data) { + sd_start_multi_rw(host, host->mrq); + spin_unlock_irqrestore(&host->lock, flags); + return; + } + out: cmd->error = err; - if (err && clock_toggled) - rtsx_pci_write_register(pcr, SD_BUS_STAT, - SD_CLK_TOGGLE_EN | SD_CLK_FORCE_STOP, 0); + tasklet_schedule(&host->finish_tasklet); + spin_unlock_irqrestore(&host->lock, flags); +} + +static int sd_pre_dma_transfer(struct realtek_pci_sdmmc *host, + struct mmc_data *data, struct realtek_next *next) +{ + struct rtsx_pcr *pcr = host->pcr; + int read = data->flags & MMC_DATA_READ; + int sg_count = 0; + + if (!next && data->host_cookie && + data->host_cookie != host->next_data.cookie) { + dev_err(sdmmc_dev(host), + "error: invalid cookie data[%d] host[%d]\n", + data->host_cookie, host->next_data.cookie); + data->host_cookie = 0; + } + + if (next || (!next && data->host_cookie != host->next_data.cookie)) + sg_count = rtsx_pci_dma_map_sg(pcr, + data->sg, data->sg_len, read); + else + sg_count = host->next_data.sg_count; + + if (next) { + next->sg_count = sg_count; + if (++next->cookie < 0) + next->cookie = 1; + data->host_cookie = next->cookie; + } + + return sg_count; +} + +static void sdmmc_pre_req(struct mmc_host *mmc, struct mmc_request *mrq, + bool is_first_req) +{ + struct realtek_pci_sdmmc *host = mmc_priv(mmc); + struct mmc_data *data = mrq->data; + + if (data->host_cookie) { + dev_err(sdmmc_dev(host), + "error: descard already cookie data[%d]\n", + data->host_cookie); + data->host_cookie = 0; + } + + dev_dbg(sdmmc_dev(host), "dma sg prepared: %d\n", + sd_pre_dma_transfer(host, data, &host->next_data)); +} + +static void sdmmc_post_req(struct mmc_host *mmc, struct mmc_request *mrq, + int err) +{ + struct realtek_pci_sdmmc *host = mmc_priv(mmc); + struct rtsx_pcr *pcr = host->pcr; + struct mmc_data *data = mrq->data; + int read = data->flags & MMC_DATA_READ; + + rtsx_pci_dma_unmap_sg(pcr, data->sg, data->sg_len, read); + data->host_cookie = 0; } -static int sd_rw_multi(struct realtek_pci_sdmmc *host, struct mmc_request *mrq) +static int sd_start_multi_rw(struct realtek_pci_sdmmc *host, + struct mmc_request *mrq) { struct rtsx_pcr *pcr = host->pcr; struct mmc_host *mmc = host->mmc; struct mmc_card *card = mmc->card; struct mmc_data *data = mrq->data; int uhs = mmc_card_uhs(card); - int read = (data->flags & MMC_DATA_READ) ? 1 : 0; + int read = data->flags & MMC_DATA_READ; u8 cfg2, trans_mode; int err; size_t data_len = data->blksz * data->blocks; + if (host->data) + dev_err(sdmmc_dev(host), "error: data already exist\n"); + + host->data = data; + if (read) { cfg2 = SD_CALCULATE_CRC7 | SD_CHECK_CRC16 | SD_NO_WAIT_BUSY_END | SD_CHECK_CRC7 | SD_RSP_LEN_0; @@ -410,17 +635,56 @@ static int sd_rw_multi(struct realtek_pci_sdmmc *host, struct mmc_request *mrq) rtsx_pci_add_cmd(pcr, CHECK_REG_CMD, SD_TRANSFER, SD_TRANSFER_END, SD_TRANSFER_END); + mod_timer(&host->timer, jiffies + 10 * HZ); rtsx_pci_send_cmd_no_wait(pcr); - err = rtsx_pci_transfer_data(pcr, data->sg, data->sg_len, read, 10000); + err = rtsx_pci_dma_transfer(pcr, data->sg, host->sg_count, read); if (err < 0) { - sd_clear_error(host); - return err; + data->error = err; + tasklet_schedule(&host->finish_tasklet); } - return 0; } +static void sd_finish_multi_rw(unsigned long host_addr) +{ + struct realtek_pci_sdmmc *host = (struct realtek_pci_sdmmc *)host_addr; + struct rtsx_pcr *pcr = host->pcr; + struct mmc_data *data; + int err = 0; + unsigned long flags; + + spin_lock_irqsave(&host->lock, flags); + + if (!host->data) { + dev_err(sdmmc_dev(host), "error: no data exist\n"); + goto out; + } + + data = host->data; + host->data = NULL; + + if (pcr->trans_result == TRANS_NO_DEVICE) + err = -ENODEV; + else if (pcr->trans_result != TRANS_RESULT_OK) + err = -EINVAL; + + if (err < 0) { + data->error = err; + goto out; + } + + if (!host->mrq->sbc && data->stop) { + sd_send_cmd(host, data->stop); + spin_unlock_irqrestore(&host->lock, flags); + return; + } + +out: + tasklet_schedule(&host->finish_tasklet); + spin_unlock_irqrestore(&host->lock, flags); +} + static inline void sd_enable_initial_mode(struct realtek_pci_sdmmc *host) { rtsx_pci_write_register(host->pcr, SD_CFG1, @@ -637,6 +901,13 @@ static int sd_tuning_rx(struct realtek_pci_sdmmc *host, u8 opcode) return 0; } +static inline bool sd_use_muti_rw(struct mmc_command *cmd) +{ + return mmc_op_multi(cmd->opcode) || + (cmd->opcode == MMC_READ_SINGLE_BLOCK) || + (cmd->opcode == MMC_WRITE_BLOCK); +} + static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq) { struct realtek_pci_sdmmc *host = mmc_priv(mmc); @@ -645,6 +916,14 @@ static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq) struct mmc_data *data = mrq->data; unsigned int data_size = 0; int err; + unsigned long flags; + + mutex_lock(&pcr->pcr_mutex); + spin_lock_irqsave(&host->lock, flags); + + if (host->mrq) + dev_err(sdmmc_dev(host), "error: request already exist\n"); + host->mrq = mrq; if (host->eject) { cmd->error = -ENOMEDIUM; @@ -657,8 +936,6 @@ static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq) goto finish; } - mutex_lock(&pcr->pcr_mutex); - rtsx_pci_start_run(pcr); rtsx_pci_switch_clock(pcr, host->clock, host->ssc_depth, @@ -667,46 +944,28 @@ static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq) rtsx_pci_write_register(pcr, CARD_SHARE_MODE, CARD_SHARE_MASK, CARD_SHARE_48_SD); - mutex_lock(&host->host_mutex); - host->mrq = mrq; - mutex_unlock(&host->host_mutex); - if (mrq->data) data_size = data->blocks * data->blksz; - if (!data_size || mmc_op_multi(cmd->opcode) || - (cmd->opcode == MMC_READ_SINGLE_BLOCK) || - (cmd->opcode == MMC_WRITE_BLOCK)) { - sd_send_cmd_get_rsp(host, cmd); - - if (!cmd->error && data_size) { - sd_rw_multi(host, mrq); + if (sd_use_muti_rw(cmd)) + host->sg_count = sd_pre_dma_transfer(host, data, NULL); - if (mmc_op_multi(cmd->opcode) && mrq->stop) - sd_send_cmd_get_rsp(host, mrq->stop); - } + if (!data_size || sd_use_muti_rw(cmd)) { + if (mrq->sbc) + sd_send_cmd(host, mrq->sbc); + else + sd_send_cmd(host, cmd); + spin_unlock_irqrestore(&host->lock, flags); } else { + spin_unlock_irqrestore(&host->lock, flags); sd_normal_rw(host, mrq); + tasklet_schedule(&host->finish_tasklet); } - - if (mrq->data) { - if (cmd->error || data->error) - data->bytes_xfered = 0; - else - data->bytes_xfered = data->blocks * data->blksz; - } - - mutex_unlock(&pcr->pcr_mutex); + return; finish: - if (cmd->error) - dev_dbg(sdmmc_dev(host), "cmd->error = %d\n", cmd->error); - - mutex_lock(&host->host_mutex); - host->mrq = NULL; - mutex_unlock(&host->host_mutex); - - mmc_request_done(mmc, mrq); + tasklet_schedule(&host->finish_tasklet); + spin_unlock_irqrestore(&host->lock, flags); } static int sd_set_bus_width(struct realtek_pci_sdmmc *host, @@ -1141,6 +1400,8 @@ out: } static const struct mmc_host_ops realtek_pci_sdmmc_ops = { + .pre_req = sdmmc_pre_req, + .post_req = sdmmc_post_req, .request = sdmmc_request, .set_ios = sdmmc_set_ios, .get_ro = sdmmc_get_ro, @@ -1204,6 +1465,7 @@ static int rtsx_pci_sdmmc_drv_probe(struct platform_device *pdev) struct realtek_pci_sdmmc *host; struct rtsx_pcr *pcr; struct pcr_handle *handle = pdev->dev.platform_data; + unsigned long host_addr; if (!handle) return -ENXIO; @@ -1227,8 +1489,15 @@ static int rtsx_pci_sdmmc_drv_probe(struct platform_device *pdev) pcr->slots[RTSX_SD_CARD].p_dev = pdev; pcr->slots[RTSX_SD_CARD].card_event = rtsx_pci_sdmmc_card_event; - mutex_init(&host->host_mutex); + host_addr = (unsigned long)host; + host->next_data.cookie = 1; + setup_timer(&host->timer, sd_request_timeout, host_addr); + tasklet_init(&host->cmd_tasklet, sd_get_rsp, host_addr); + tasklet_init(&host->data_tasklet, sd_finish_multi_rw, host_addr); + tasklet_init(&host->finish_tasklet, sd_finish_request, host_addr); + spin_lock_init(&host->lock); + pcr->slots[RTSX_SD_CARD].done_transfer = sd_isr_done_transfer; realtek_init_host(host); mmc_add_host(mmc); @@ -1241,6 +1510,8 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev) struct realtek_pci_sdmmc *host = platform_get_drvdata(pdev); struct rtsx_pcr *pcr; struct mmc_host *mmc; + struct mmc_request *mrq; + unsigned long flags; if (!host) return 0; @@ -1248,22 +1519,33 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev) pcr = host->pcr; pcr->slots[RTSX_SD_CARD].p_dev = NULL; pcr->slots[RTSX_SD_CARD].card_event = NULL; + pcr->slots[RTSX_SD_CARD].done_transfer = NULL; mmc = host->mmc; + mrq = host->mrq; - mutex_lock(&host->host_mutex); + spin_lock_irqsave(&host->lock, flags); if (host->mrq) { dev_dbg(&(pdev->dev), "%s: Controller removed during transfer\n", mmc_hostname(mmc)); - rtsx_pci_complete_unfinished_transfer(pcr); + if (mrq->sbc) + mrq->sbc->error = -ENOMEDIUM; + if (mrq->cmd) + mrq->cmd->error = -ENOMEDIUM; + if (mrq->stop) + mrq->stop->error = -ENOMEDIUM; + if (mrq->data) + mrq->data->error = -ENOMEDIUM; - host->mrq->cmd->error = -ENOMEDIUM; - if (host->mrq->stop) - host->mrq->stop->error = -ENOMEDIUM; - mmc_request_done(mmc, host->mrq); + tasklet_schedule(&host->finish_tasklet); } - mutex_unlock(&host->host_mutex); + spin_unlock_irqrestore(&host->lock, flags); + + del_timer_sync(&host->timer); + tasklet_kill(&host->cmd_tasklet); + tasklet_kill(&host->data_tasklet); + tasklet_kill(&host->finish_tasklet); mmc_remove_host(mmc); host->eject = true; diff --git a/include/linux/mfd/rtsx_common.h b/include/linux/mfd/rtsx_common.h index 443176ee1ab0..7c36cc55d2c7 100644 --- a/include/linux/mfd/rtsx_common.h +++ b/include/linux/mfd/rtsx_common.h @@ -45,6 +45,7 @@ struct platform_device; struct rtsx_slot { struct platform_device *p_dev; void (*card_event)(struct platform_device *p_dev); + void (*done_transfer)(struct platform_device *p_dev); }; #endif diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h index a3835976f7c6..8d6bbd609ad9 100644 --- a/include/linux/mfd/rtsx_pci.h +++ b/include/linux/mfd/rtsx_pci.h @@ -943,6 +943,12 @@ void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr); int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout); int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist, int num_sg, bool read, int timeout); +int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int num_sg, bool read); +int rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int num_sg, bool read); +int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int sg_count, bool read); int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len); int rtsx_pci_write_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len); int rtsx_pci_card_pull_ctl_enable(struct rtsx_pcr *pcr, int card); -- cgit v1.2.3-59-g8ed1b From 68eb80e06bfa06035d0304686124974780308fae Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 18 Dec 2013 09:57:38 +0100 Subject: mmc: core: Rename max_discard_to to max_busy_timeout Rename host->max_discard_to to host->max_busy_timeout, to reflect that it tells the mmc core layer about the maximum supported busy detection timeout by the host. This timeout is at the moment only applicable to erase/trim/discard commands. By the renaming we provide the option of make use of it for other commands that cares about busy detection. In other words, those commands that wants an R1B response, like for example the mmc switch command. Do note that the max_busy_timeout is supposed to be specified only by hosts supporting MMC_CAP_WAIT_WHILE_BUSY. Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- drivers/mmc/core/core.c | 6 +++--- drivers/mmc/host/sdhci.c | 2 +- include/linux/mmc/host.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index f5a068d55c36..d9c1efa2ce15 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -2120,7 +2120,7 @@ static unsigned int mmc_do_calc_max_discard(struct mmc_card *card, y = 0; for (x = 1; x && x <= max_qty && max_qty - x >= qty; x <<= 1) { timeout = mmc_erase_timeout(card, arg, qty + x); - if (timeout > host->max_discard_to) + if (timeout > host->max_busy_timeout) break; if (timeout < last_timeout) break; @@ -2152,7 +2152,7 @@ unsigned int mmc_calc_max_discard(struct mmc_card *card) struct mmc_host *host = card->host; unsigned int max_discard, max_trim; - if (!host->max_discard_to) + if (!host->max_busy_timeout) return UINT_MAX; /* @@ -2172,7 +2172,7 @@ unsigned int mmc_calc_max_discard(struct mmc_card *card) max_discard = 0; } pr_debug("%s: calculated max. discard sectors %u for timeout %u ms\n", - mmc_hostname(host), max_discard, host->max_discard_to); + mmc_hostname(host), max_discard, host->max_busy_timeout); return max_discard; } EXPORT_SYMBOL(mmc_calc_max_discard); diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 8958edc03e04..d1e1bf52fe52 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2941,7 +2941,7 @@ int sdhci_add_host(struct sdhci_host *host) if (host->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK) host->timeout_clk = mmc->f_max / 1000; - mmc->max_discard_to = (1 << 27) / host->timeout_clk; + mmc->max_busy_timeout = (1 << 27) / host->timeout_clk; mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE | MMC_CAP_CMD23; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 719db89ef134..cb61ea4d6945 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -300,7 +300,7 @@ struct mmc_host { unsigned int max_req_size; /* maximum number of bytes in one req */ unsigned int max_blk_size; /* maximum size of one mmc block */ unsigned int max_blk_count; /* maximum number of blocks in one req */ - unsigned int max_discard_to; /* max. discard timeout in ms */ + unsigned int max_busy_timeout; /* max busy timeout in ms */ /* private data */ spinlock_t lock; /* lock for claim and bus ops */ -- cgit v1.2.3-59-g8ed1b From 1d4d77444bf4212c44585146a2b353ca24c815f9 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 8 Jan 2014 15:06:08 +0100 Subject: mmc: core: Rename cmd_timeout_ms to busy_timeout To better reflect that the cmd_timeout_ms is directly related to the busy detection timeout, let's rename it. Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- drivers/mmc/core/core.c | 2 +- drivers/mmc/core/mmc_ops.c | 2 +- drivers/mmc/host/sdhci.c | 8 ++++---- include/linux/mmc/core.h | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index d9c1efa2ce15..1935812e4215 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1933,7 +1933,7 @@ static int mmc_do_erase(struct mmc_card *card, unsigned int from, cmd.opcode = MMC_ERASE; cmd.arg = arg; cmd.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC; - cmd.cmd_timeout_ms = mmc_erase_timeout(card, arg, qty); + cmd.busy_timeout = mmc_erase_timeout(card, arg, qty); err = mmc_wait_for_cmd(card->host, &cmd, 0); if (err) { pr_err("mmc_erase: erase error %d, status %#x\n", diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c index e5b5eeb548d1..3377bbfc3585 100644 --- a/drivers/mmc/core/mmc_ops.c +++ b/drivers/mmc/core/mmc_ops.c @@ -432,7 +432,7 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value, cmd.flags |= MMC_RSP_SPI_R1 | MMC_RSP_R1; - cmd.cmd_timeout_ms = timeout_ms; + cmd.busy_timeout = timeout_ms; if (index == EXT_CSD_SANITIZE_START) cmd.sanitize_busy = true; diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index d1e1bf52fe52..7f95211e9449 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -675,12 +675,12 @@ static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd) return 0xE; /* Unspecified timeout, assume max */ - if (!data && !cmd->cmd_timeout_ms) + if (!data && !cmd->busy_timeout) return 0xE; /* timeout in us */ if (!data) - target_timeout = cmd->cmd_timeout_ms * 1000; + target_timeout = cmd->busy_timeout * 1000; else { target_timeout = data->timeout_ns / 1000; if (host->clock) @@ -1019,8 +1019,8 @@ void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd) } timeout = jiffies; - if (!cmd->data && cmd->cmd_timeout_ms > 9000) - timeout += DIV_ROUND_UP(cmd->cmd_timeout_ms, 1000) * HZ + HZ; + if (!cmd->data && cmd->busy_timeout > 9000) + timeout += DIV_ROUND_UP(cmd->busy_timeout, 1000) * HZ + HZ; else timeout += 10 * HZ; mod_timer(&host->timer, timeout); diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 87079fc38011..b27699612888 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -95,7 +95,7 @@ struct mmc_command { * actively failing requests */ - unsigned int cmd_timeout_ms; /* in milliseconds */ + unsigned int busy_timeout; /* busy detect timeout in ms */ /* Set this flag only for blocking sanitize request */ bool sanitize_busy; -- cgit v1.2.3-59-g8ed1b From 4509f847751c9d2a724f37fe831393fbac34b80f Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 8 Jan 2014 16:09:33 +0100 Subject: mmc: core: Add ignore_crc flag to __mmc_switch Instead of handle specific adaptations, releated to certain switch operations, inside __mmc_switch, push this to be handled by the caller instead. Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- drivers/mmc/core/core.c | 3 ++- drivers/mmc/core/mmc.c | 13 +++++++------ drivers/mmc/core/mmc_ops.c | 19 ++++++++++--------- include/linux/mmc/core.h | 2 +- 4 files changed, 20 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 1935812e4215..dc7a5fb81a5c 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -285,7 +285,8 @@ void mmc_start_bkops(struct mmc_card *card, bool from_exception) } err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, - EXT_CSD_BKOPS_START, 1, timeout, use_busy_signal, true); + EXT_CSD_BKOPS_START, 1, timeout, + use_busy_signal, true, false); if (err) { pr_warn("%s: Error %d starting bkops\n", mmc_hostname(card->host), err); diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 072171183d5b..c944692f3c4e 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -856,8 +856,8 @@ static int mmc_select_hs200(struct mmc_card *card) /* switch to HS200 mode if bus width set successfully */ if (!err) - err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, - EXT_CSD_HS_TIMING, 2, 0); + err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, + EXT_CSD_HS_TIMING, 2, 0, true, true, true); err: return err; } @@ -1074,9 +1074,10 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, host->caps2 & MMC_CAP2_HS200) err = mmc_select_hs200(card); else if (host->caps & MMC_CAP_MMC_HIGHSPEED) - err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, - EXT_CSD_HS_TIMING, 1, - card->ext_csd.generic_cmd6_time); + err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, + EXT_CSD_HS_TIMING, 1, + card->ext_csd.generic_cmd6_time, + true, true, true); if (err && err != -EBADMSG) goto free_card; @@ -1400,7 +1401,7 @@ static int mmc_poweroff_notify(struct mmc_card *card, unsigned int notify_type) err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_POWER_OFF_NOTIFICATION, - notify_type, timeout, true, false); + notify_type, timeout, true, false, false); if (err) pr_err("%s: Power Off Notification timed out, %u\n", mmc_hostname(card->host), timeout); diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c index 3377bbfc3585..5e1a2cbdc229 100644 --- a/drivers/mmc/core/mmc_ops.c +++ b/drivers/mmc/core/mmc_ops.c @@ -405,17 +405,18 @@ int mmc_spi_set_crc(struct mmc_host *host, int use_crc) * timeout of zero implies maximum possible timeout * @use_busy_signal: use the busy signal as response type * @send_status: send status cmd to poll for busy + * @ignore_crc: ignore CRC errors when sending status cmd to poll for busy * * Modifies the EXT_CSD register for selected card. */ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value, - unsigned int timeout_ms, bool use_busy_signal, bool send_status) + unsigned int timeout_ms, bool use_busy_signal, bool send_status, + bool ignore_crc) { int err; struct mmc_command cmd = {0}; unsigned long timeout; u32 status = 0; - bool ignore_crc = false; BUG_ON(!card); BUG_ON(!card->host); @@ -445,14 +446,13 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value, return 0; /* - * Must check status to be sure of no errors - * If CMD13 is to check the busy completion of the timing change, - * disable the check of CRC error. + * CRC errors shall only be ignored in cases were CMD13 is used to poll + * to detect busy completion. */ - if (index == EXT_CSD_HS_TIMING && - !(card->host->caps & MMC_CAP_WAIT_WHILE_BUSY)) - ignore_crc = true; + if (card->host->caps & MMC_CAP_WAIT_WHILE_BUSY) + ignore_crc = false; + /* Must check status to be sure of no errors. */ timeout = jiffies + msecs_to_jiffies(MMC_OPS_TIMEOUT_MS); do { if (send_status) { @@ -501,7 +501,8 @@ EXPORT_SYMBOL_GPL(__mmc_switch); int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value, unsigned int timeout_ms) { - return __mmc_switch(card, set, index, value, timeout_ms, true, true); + return __mmc_switch(card, set, index, value, timeout_ms, true, true, + false); } EXPORT_SYMBOL_GPL(mmc_switch); diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index b27699612888..f206e29f94d7 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -152,7 +152,7 @@ extern int mmc_wait_for_app_cmd(struct mmc_host *, struct mmc_card *, struct mmc_command *, int); extern void mmc_start_bkops(struct mmc_card *card, bool from_exception); extern int __mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int, bool, - bool); + bool, bool); extern int mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int); extern int mmc_send_ext_csd(struct mmc_card *card, u8 *ext_csd); -- cgit v1.2.3-59-g8ed1b From 2a5153aaae498f645573a13f60fed8226cc4e865 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Tue, 25 Feb 2014 15:18:26 +0530 Subject: mmc: msm: Cleanup mmc-msm_sdcc.h header Commit 1ef21f6343ff ("ARM: msm: move platform_data definitions") moved the file to the current location but forgot to remove the pointer to its previous location. Clean it up. While at it also change the header file protection macros appropriately. Signed-off-by: Sachin Kamat Signed-off-by: Chris Ball --- include/linux/platform_data/mmc-msm_sdcc.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/mmc-msm_sdcc.h b/include/linux/platform_data/mmc-msm_sdcc.h index ffcd9e3a6a7e..55aa873c9396 100644 --- a/include/linux/platform_data/mmc-msm_sdcc.h +++ b/include/linux/platform_data/mmc-msm_sdcc.h @@ -1,8 +1,5 @@ -/* - * arch/arm/include/asm/mach/mmc.h - */ -#ifndef ASMARM_MACH_MMC_H -#define ASMARM_MACH_MMC_H +#ifndef __MMC_MSM_SDCC_H +#define __MMC_MSM_SDCC_H #include #include -- cgit v1.2.3-59-g8ed1b From 550459eeb21b8f3553283d506fdcb92c9147c1eb Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Tue, 25 Feb 2014 15:18:27 +0530 Subject: mmc: mvsdio: Cleanup mmc-mvsdio.h header Commit c02cecb92ed4 ("ARM: orion: move platform_data definitions") moved the file to the current location but forgot to remove the pointer to its previous location. Clean it up. While at it also change the header file protection macros appropriately. Signed-off-by: Sachin Kamat Signed-off-by: Chris Ball --- include/linux/platform_data/mmc-mvsdio.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/mmc-mvsdio.h b/include/linux/platform_data/mmc-mvsdio.h index 1190efedcb94..d02704cd3695 100644 --- a/include/linux/platform_data/mmc-mvsdio.h +++ b/include/linux/platform_data/mmc-mvsdio.h @@ -1,13 +1,11 @@ /* - * arch/arm/plat-orion/include/plat/mvsdio.h - * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any * warranty of any kind, whether express or implied. */ -#ifndef __MACH_MVSDIO_H -#define __MACH_MVSDIO_H +#ifndef __MMC_MVSDIO_H +#define __MMC_MVSDIO_H #include -- cgit v1.2.3-59-g8ed1b From 3843154598a00408f4214a68bd536fdf27b1df10 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 27 Feb 2014 18:20:00 +0900 Subject: f2fs: introduce large directory support This patch introduces an i_dir_level field to support large directory. Previously, f2fs maintains multi-level hash tables to find a dentry quickly from a bunch of chiild dentries in a directory, and the hash tables consist of the following tree structure as below. In Documentation/filesystems/f2fs.txt, ---------------------- A : bucket B : block N : MAX_DIR_HASH_DEPTH ---------------------- level #0 | A(2B) | level #1 | A(2B) - A(2B) | level #2 | A(2B) - A(2B) - A(2B) - A(2B) . | . . . . level #N/2 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B) . | . . . . level #N | A(4B) - A(4B) - A(4B) - A(4B) - A(4B) - ... - A(4B) But, if we can guess that a directory will handle a number of child files, we don't need to traverse the tree from level #0 to #N all the time. Since the lower level tables contain relatively small number of dentries, the miss ratio of the target dentry is likely to be high. In order to avoid that, we can configure the hash tables sparsely from level #0 like this. level #0 | A(2B) - A(2B) - A(2B) - A(2B) level #1 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B) . | . . . . level #N/2 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B) . | . . . . level #N | A(4B) - A(4B) - A(4B) - A(4B) - A(4B) - ... - A(4B) With this structure, we can skip the ineffective tree searches in lower level hash tables. This patch adds just a facility for this by introducing i_dir_level in f2fs_inode. Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 6 ++++-- fs/f2fs/dir.c | 21 ++++++++++++--------- fs/f2fs/f2fs.h | 1 + fs/f2fs/inode.c | 2 ++ include/linux/f2fs_fs.h | 2 +- 5 files changed, 20 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index b8d284975f0f..8eb06b0a7d2b 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -444,9 +444,11 @@ The number of blocks and buckets are determined by, # of blocks in level #n = | `- 4, Otherwise - ,- 2^n, if n < MAX_DIR_HASH_DEPTH / 2, + ,- 2^ (n + dir_level), + | if n < MAX_DIR_HASH_DEPTH / 2, # of buckets in level #n = | - `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), Otherwise + `- 2^((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1), + Otherwise When F2FS finds a file name in a directory, at first a hash value of the file name is calculated. Then, F2FS scans the hash table in level #0 to find the diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index c3ea8f8cc80a..582fa00f3597 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -21,12 +21,12 @@ static unsigned long dir_blocks(struct inode *inode) >> PAGE_CACHE_SHIFT; } -static unsigned int dir_buckets(unsigned int level) +static unsigned int dir_buckets(unsigned int level, int dir_level) { if (level < MAX_DIR_HASH_DEPTH / 2) - return 1 << level; + return 1 << (level + dir_level); else - return 1 << ((MAX_DIR_HASH_DEPTH / 2) - 1); + return 1 << ((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1); } static unsigned int bucket_blocks(unsigned int level) @@ -65,13 +65,14 @@ static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; } -static unsigned long dir_block_index(unsigned int level, unsigned int idx) +static unsigned long dir_block_index(unsigned int level, + int dir_level, unsigned int idx) { unsigned long i; unsigned long bidx = 0; for (i = 0; i < level; i++) - bidx += dir_buckets(i) * bucket_blocks(i); + bidx += dir_buckets(i, dir_level) * bucket_blocks(i); bidx += idx * bucket_blocks(level); return bidx; } @@ -143,10 +144,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, f2fs_bug_on(level > MAX_DIR_HASH_DEPTH); - nbucket = dir_buckets(level); + nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); nblock = bucket_blocks(level); - bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket); + bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level, + le32_to_cpu(namehash) % nbucket); end_block = bidx + nblock; for (; bidx < end_block; bidx++) { @@ -467,10 +469,11 @@ start: if (level == current_depth) ++current_depth; - nbucket = dir_buckets(level); + nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); nblock = bucket_blocks(level); - bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket)); + bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level, + (le32_to_cpu(dentry_hash) % nbucket)); for (block = bidx; block <= (bidx + nblock - 1); block++) { dentry_page = get_new_data_page(dir, NULL, block, true); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4beedccc28a0..a82691674918 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -200,6 +200,7 @@ struct f2fs_inode_info { struct inode vfs_inode; /* serve a vfs inode */ unsigned long i_flags; /* keep an inode flags for ioctl */ unsigned char i_advise; /* use to give file attribute hints */ + unsigned char i_dir_level; /* use for dentry level for large dir */ unsigned int i_current_depth; /* use only in directory structure */ unsigned int i_pino; /* parent inode number */ umode_t i_acl_mode; /* keep file acl mode temporarily */ diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 08d69c94ab8b..d518e37df3a7 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -107,6 +107,7 @@ static int do_read_inode(struct inode *inode) fi->flags = 0; fi->i_advise = ri->i_advise; fi->i_pino = le32_to_cpu(ri->i_pino); + fi->i_dir_level = ri->i_dir_level; get_extent_info(&fi->ext, ri->i_ext); get_inline_info(fi, ri); @@ -204,6 +205,7 @@ void update_inode(struct inode *inode, struct page *node_page) ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags); ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); ri->i_generation = cpu_to_le32(inode->i_generation); + ri->i_dir_level = F2FS_I(inode)->i_dir_level; __set_inode_rdev(inode, ri); set_cold_node(inode, node_page); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index da74d878dc4f..df53e1753a76 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -183,7 +183,7 @@ struct f2fs_inode { __le32 i_pino; /* parent inode number */ __le32 i_namelen; /* file name length */ __u8 i_name[F2FS_NAME_LEN]; /* file name for SPOR */ - __u8 i_reserved2; /* for backward compatibility */ + __u8 i_dir_level; /* dentry_level for large dir */ struct f2fs_extent i_ext; /* caching a largest extent */ -- cgit v1.2.3-59-g8ed1b From a59ce6584d566847980f9dcad5343cd9856145c8 Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Fri, 31 Jan 2014 22:36:28 -0800 Subject: leds: leds-mc13783: Add MC34708 LED support This patch adds support for two LEDs on MC34708 PMIC. Signed-off-by: Alexander Shiyan Signed-off-by: Bryan Wu --- drivers/leds/Kconfig | 2 +- drivers/leds/leds-mc13783.c | 98 ++++++++++++++++++++++----------------------- include/linux/mfd/mc13xxx.h | 6 +++ 3 files changed, 55 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index 72156c123033..93466d215706 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -416,7 +416,7 @@ config LEDS_MC13783 depends on MFD_MC13XXX help This option enable support for on-chip LED drivers found - on Freescale Semiconductor MC13783/MC13892 PMIC. + on Freescale Semiconductor MC13783/MC13892/MC34708 PMIC. config LEDS_NS2 tristate "LED support for Network Space v2 GPIO LEDs" diff --git a/drivers/leds/leds-mc13783.c b/drivers/leds/leds-mc13783.c index ca87a1b4a0db..68f2455c672f 100644 --- a/drivers/leds/leds-mc13783.c +++ b/drivers/leds/leds-mc13783.c @@ -1,5 +1,5 @@ /* - * LEDs driver for Freescale MC13783/MC13892 + * LEDs driver for Freescale MC13783/MC13892/MC34708 * * Copyright (C) 2010 Philippe Rétornaz * @@ -23,23 +23,23 @@ #include #include -#define MC13XXX_REG_LED_CONTROL(x) (51 + (x)) - struct mc13xxx_led_devtype { int led_min; int led_max; int num_regs; + u32 ledctrl_base; }; struct mc13xxx_led { struct led_classdev cdev; struct work_struct work; - struct mc13xxx *master; enum led_brightness new_brightness; int id; + struct mc13xxx_leds *leds; }; struct mc13xxx_leds { + struct mc13xxx *master; struct mc13xxx_led_devtype *devtype; int num_leds; struct mc13xxx_led led[0]; @@ -48,24 +48,15 @@ struct mc13xxx_leds { static void mc13xxx_led_work(struct work_struct *work) { struct mc13xxx_led *led = container_of(work, struct mc13xxx_led, work); - int reg, mask, value, bank, off, shift; + struct mc13xxx_leds *leds = led->leds; + unsigned int reg, mask, value, bank, off, shift; switch (led->id) { case MC13783_LED_MD: - reg = MC13XXX_REG_LED_CONTROL(2); - shift = 9; - mask = 0x0f; - value = led->new_brightness >> 4; - break; case MC13783_LED_AD: - reg = MC13XXX_REG_LED_CONTROL(2); - shift = 13; - mask = 0x0f; - value = led->new_brightness >> 4; - break; case MC13783_LED_KP: - reg = MC13XXX_REG_LED_CONTROL(2); - shift = 17; + reg = 2; + shift = 9 + (led->id - MC13783_LED_MD) * 4; mask = 0x0f; value = led->new_brightness >> 4; break; @@ -80,26 +71,16 @@ static void mc13xxx_led_work(struct work_struct *work) case MC13783_LED_B3: off = led->id - MC13783_LED_R1; bank = off / 3; - reg = MC13XXX_REG_LED_CONTROL(3) + bank; + reg = 3 + bank; shift = (off - bank * 3) * 5 + 6; value = led->new_brightness >> 3; mask = 0x1f; break; case MC13892_LED_MD: - reg = MC13XXX_REG_LED_CONTROL(0); - shift = 3; - mask = 0x3f; - value = led->new_brightness >> 2; - break; case MC13892_LED_AD: - reg = MC13XXX_REG_LED_CONTROL(0); - shift = 15; - mask = 0x3f; - value = led->new_brightness >> 2; - break; case MC13892_LED_KP: - reg = MC13XXX_REG_LED_CONTROL(1); - shift = 3; + reg = (led->id - MC13892_LED_MD) / 2; + shift = 3 + (led->id - MC13892_LED_MD) * 12; mask = 0x3f; value = led->new_brightness >> 2; break; @@ -108,16 +89,24 @@ static void mc13xxx_led_work(struct work_struct *work) case MC13892_LED_B: off = led->id - MC13892_LED_R; bank = off / 2; - reg = MC13XXX_REG_LED_CONTROL(2) + bank; + reg = 2 + bank; shift = (off - bank * 2) * 12 + 3; value = led->new_brightness >> 2; mask = 0x3f; break; + case MC34708_LED_R: + case MC34708_LED_G: + reg = 0; + shift = 3 + (led->id - MC34708_LED_R) * 12; + value = led->new_brightness >> 2; + mask = 0x3f; + break; default: BUG(); } - mc13xxx_reg_rmw(led->master, reg, mask << shift, value << shift); + mc13xxx_reg_rmw(leds->master, leds->devtype->ledctrl_base + reg, + mask << shift, value << shift); } static void mc13xxx_led_set(struct led_classdev *led_cdev, @@ -132,16 +121,17 @@ static void mc13xxx_led_set(struct led_classdev *led_cdev, static int __init mc13xxx_led_probe(struct platform_device *pdev) { - struct mc13xxx_leds_platform_data *pdata = dev_get_platdata(&pdev->dev); - struct mc13xxx *mcdev = dev_get_drvdata(pdev->dev.parent); + struct device *dev = &pdev->dev; + struct mc13xxx_leds_platform_data *pdata = dev_get_platdata(dev); + struct mc13xxx *mcdev = dev_get_drvdata(dev->parent); struct mc13xxx_led_devtype *devtype = (struct mc13xxx_led_devtype *)pdev->id_entry->driver_data; struct mc13xxx_leds *leds; int i, id, num_leds, ret = -ENODATA; - u32 reg, init_led = 0; + u32 init_led = 0; if (!pdata) { - dev_err(&pdev->dev, "Missing platform data\n"); + dev_err(dev, "Missing platform data\n"); return -ENODEV; } @@ -149,23 +139,23 @@ static int __init mc13xxx_led_probe(struct platform_device *pdev) if ((num_leds < 1) || (num_leds > (devtype->led_max - devtype->led_min + 1))) { - dev_err(&pdev->dev, "Invalid LED count %d\n", num_leds); + dev_err(dev, "Invalid LED count %d\n", num_leds); return -EINVAL; } - leds = devm_kzalloc(&pdev->dev, num_leds * sizeof(struct mc13xxx_led) + + leds = devm_kzalloc(dev, num_leds * sizeof(struct mc13xxx_led) + sizeof(struct mc13xxx_leds), GFP_KERNEL); if (!leds) return -ENOMEM; leds->devtype = devtype; leds->num_leds = num_leds; + leds->master = mcdev; platform_set_drvdata(pdev, leds); for (i = 0; i < devtype->num_regs; i++) { - reg = pdata->led_control[i]; - WARN_ON(reg >= (1 << 24)); - ret = mc13xxx_reg_write(mcdev, MC13XXX_REG_LED_CONTROL(i), reg); + ret = mc13xxx_reg_write(mcdev, leds->devtype->ledctrl_base + i, + pdata->led_control[i]); if (ret) return ret; } @@ -180,19 +170,18 @@ static int __init mc13xxx_led_probe(struct platform_device *pdev) trig = pdata->led[i].default_trigger; if ((id > devtype->led_max) || (id < devtype->led_min)) { - dev_err(&pdev->dev, "Invalid ID %i\n", id); + dev_err(dev, "Invalid ID %i\n", id); break; } if (init_led & (1 << id)) { - dev_warn(&pdev->dev, - "LED %i already initialized\n", id); + dev_warn(dev, "LED %i already initialized\n", id); break; } init_led |= 1 << id; leds->led[i].id = id; - leds->led[i].master = mcdev; + leds->led[i].leds = leds; leds->led[i].cdev.name = name; leds->led[i].cdev.default_trigger = trig; leds->led[i].cdev.brightness_set = mc13xxx_led_set; @@ -200,10 +189,9 @@ static int __init mc13xxx_led_probe(struct platform_device *pdev) INIT_WORK(&leds->led[i].work, mc13xxx_led_work); - ret = led_classdev_register(pdev->dev.parent, - &leds->led[i].cdev); + ret = led_classdev_register(dev->parent, &leds->led[i].cdev); if (ret) { - dev_err(&pdev->dev, "Failed to register LED %i\n", id); + dev_err(dev, "Failed to register LED %i\n", id); break; } } @@ -219,8 +207,8 @@ static int __init mc13xxx_led_probe(struct platform_device *pdev) static int mc13xxx_led_remove(struct platform_device *pdev) { - struct mc13xxx *mcdev = dev_get_drvdata(pdev->dev.parent); struct mc13xxx_leds *leds = platform_get_drvdata(pdev); + struct mc13xxx *mcdev = leds->master; int i; for (i = 0; i < leds->num_leds; i++) { @@ -229,7 +217,7 @@ static int mc13xxx_led_remove(struct platform_device *pdev) } for (i = 0; i < leds->devtype->num_regs; i++) - mc13xxx_reg_write(mcdev, MC13XXX_REG_LED_CONTROL(i), 0); + mc13xxx_reg_write(mcdev, leds->devtype->ledctrl_base + i, 0); return 0; } @@ -238,17 +226,27 @@ static const struct mc13xxx_led_devtype mc13783_led_devtype = { .led_min = MC13783_LED_MD, .led_max = MC13783_LED_B3, .num_regs = 6, + .ledctrl_base = 51, }; static const struct mc13xxx_led_devtype mc13892_led_devtype = { .led_min = MC13892_LED_MD, .led_max = MC13892_LED_B, .num_regs = 4, + .ledctrl_base = 51, +}; + +static const struct mc13xxx_led_devtype mc34708_led_devtype = { + .led_min = MC34708_LED_R, + .led_max = MC34708_LED_G, + .num_regs = 1, + .ledctrl_base = 54, }; static const struct platform_device_id mc13xxx_led_id_table[] = { { "mc13783-led", (kernel_ulong_t)&mc13783_led_devtype, }, { "mc13892-led", (kernel_ulong_t)&mc13892_led_devtype, }, + { "mc34708-led", (kernel_ulong_t)&mc34708_led_devtype, }, { } }; MODULE_DEVICE_TABLE(platform, mc13xxx_led_id_table); diff --git a/include/linux/mfd/mc13xxx.h b/include/linux/mfd/mc13xxx.h index ac39d910e70b..a326c850f046 100644 --- a/include/linux/mfd/mc13xxx.h +++ b/include/linux/mfd/mc13xxx.h @@ -104,6 +104,9 @@ enum { MC13892_LED_R, MC13892_LED_G, MC13892_LED_B, + /* MC34708 LED IDs */ + MC34708_LED_R, + MC34708_LED_G, }; struct mc13xxx_led_platform_data { @@ -163,6 +166,9 @@ struct mc13xxx_leds_platform_data { #define MC13892_LED_C2_CURRENT_G(x) (((x) & 0x7) << 21) /* MC13892 LED Control 3 */ #define MC13892_LED_C3_CURRENT_B(x) (((x) & 0x7) << 9) +/* MC34708 LED Control 0 */ +#define MC34708_LED_C0_CURRENT_R(x) (((x) & 0x3) << 9) +#define MC34708_LED_C0_CURRENT_G(x) (((x) & 0x3) << 21) u32 led_control[MAX_LED_CONTROL_REGS]; }; -- cgit v1.2.3-59-g8ed1b From fc87eb0b588394d8304d942bcd5c71d6bcc595c7 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Thu, 13 Feb 2014 22:37:09 -0800 Subject: leds: leds-s3c24xx: Trivial cleanup in header file Commit 436d42c61c3e ("ARM: samsung: move platform_data definitions") moved the files to the current location but forgot to remove the pointer to its previous location. Clean it up. While at it also change the header file protection macros appropriately. Signed-off-by: Sachin Kamat Signed-off-by: Bryan Wu --- include/linux/platform_data/leds-s3c24xx.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/leds-s3c24xx.h b/include/linux/platform_data/leds-s3c24xx.h index d8a7672519b6..441a6f290649 100644 --- a/include/linux/platform_data/leds-s3c24xx.h +++ b/include/linux/platform_data/leds-s3c24xx.h @@ -1,5 +1,4 @@ -/* arch/arm/mach-s3c2410/include/mach/leds-gpio.h - * +/* * Copyright (c) 2006 Simtec Electronics * http://armlinux.simtec.co.uk/ * Ben Dooks @@ -11,8 +10,8 @@ * published by the Free Software Foundation. */ -#ifndef __ASM_ARCH_LEDSGPIO_H -#define __ASM_ARCH_LEDSGPIO_H "leds-gpio.h" +#ifndef __LEDS_S3C24XX_H +#define __LEDS_S3C24XX_H #define S3C24XX_LEDF_ACTLOW (1<<0) /* LED is on when GPIO low */ #define S3C24XX_LEDF_TRISTATE (1<<1) /* tristate to turn off */ @@ -25,4 +24,4 @@ struct s3c24xx_led_platdata { char *def_trigger; }; -#endif /* __ASM_ARCH_LEDSGPIO_H */ +#endif /* __LEDS_S3C24XX_H */ -- cgit v1.2.3-59-g8ed1b From 42c1add97073b5a701b8aee0fdb69ef0345d4c50 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 28 Feb 2014 21:32:44 +0000 Subject: mmc: sdhci-spear: remove support for power gpio None of this code is currently used: there are no definitions of struct sdhci_plat_data in arch/arm, neither are there any DT properties which use card_power_gpio/power_active_high/power_always_enb. In any case, slot power control should be rigged up via vmmc and the regulator subsystem in the DT case. Signed-off-by: Russell King Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci-spear.c | 32 -------------------------------- include/linux/mmc/sdhci-spear.h | 8 -------- 2 files changed, 40 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/sdhci-spear.c b/drivers/mmc/host/sdhci-spear.c index fc6eac5b1064..676df4623057 100644 --- a/drivers/mmc/host/sdhci-spear.c +++ b/drivers/mmc/host/sdhci-spear.c @@ -56,14 +56,6 @@ static irqreturn_t sdhci_gpio_irq(int irq, void *dev_id) gpio_irq_type = val ? IRQF_TRIGGER_LOW : IRQF_TRIGGER_HIGH; irq_set_irq_type(irq, gpio_irq_type); - if (sdhci->data->card_power_gpio >= 0) { - if (!sdhci->data->power_always_enb) { - /* if card inserted, give power, otherwise remove it */ - val = sdhci->data->power_active_high ? !val : val ; - gpio_set_value(sdhci->data->card_power_gpio, val); - } - } - /* inform sdhci driver about card insertion/removal */ tasklet_schedule(&host->card_tasklet); @@ -179,30 +171,6 @@ static int sdhci_probe(struct platform_device *pdev) if (!sdhci->data) return 0; - if (sdhci->data->card_power_gpio >= 0) { - int val = 0; - - ret = devm_gpio_request(&pdev->dev, - sdhci->data->card_power_gpio, "sdhci"); - if (ret < 0) { - dev_dbg(&pdev->dev, "gpio request fail: %d\n", - sdhci->data->card_power_gpio); - goto set_drvdata; - } - - if (sdhci->data->power_always_enb) - val = sdhci->data->power_active_high; - else - val = !sdhci->data->power_active_high; - - ret = gpio_direction_output(sdhci->data->card_power_gpio, val); - if (ret) { - dev_dbg(&pdev->dev, "gpio set direction fail: %d\n", - sdhci->data->card_power_gpio); - goto set_drvdata; - } - } - if (sdhci->data->card_int_gpio >= 0) { ret = devm_gpio_request(&pdev->dev, sdhci->data->card_int_gpio, "sdhci"); diff --git a/include/linux/mmc/sdhci-spear.h b/include/linux/mmc/sdhci-spear.h index e78c0e236e9d..8cc095a76cf8 100644 --- a/include/linux/mmc/sdhci-spear.h +++ b/include/linux/mmc/sdhci-spear.h @@ -18,17 +18,9 @@ /* * struct sdhci_plat_data: spear sdhci platform data structure * - * @card_power_gpio: gpio pin for enabling/disabling power to sdhci socket - * @power_active_high: if set, enable power to sdhci socket by setting - * card_power_gpio - * @power_always_enb: If set, then enable power on probe, otherwise enable only - * on card insertion and disable on card removal. * card_int_gpio: gpio pin used for card detection */ struct sdhci_plat_data { - int card_power_gpio; - int power_active_high; - int power_always_enb; int card_int_gpio; }; -- cgit v1.2.3-59-g8ed1b From 0c176170089c3a7f2a891f9860f5cdc5f481ff78 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 10 Feb 2014 11:03:56 +0100 Subject: i2c: add deprecation warning for class based instantiation Class based instantiation can cause noticeable delays when booting. This mechanism is used when it is not possible to describe slaves on I2C busses. As we do have other mechanisms, most embedded I2C will not need classes and for embedded it is explicitly not recommended to use them. Add a deprecation warning for drivers which want to disable class based instantiation in the near future to gain boot-up time, so users relying on this technique can switch to something better. They really should. Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core.c | 7 +++++++ include/linux/i2c.h | 1 + 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 5fb80b8962a2..98a5fd950f16 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -1941,6 +1941,13 @@ static int i2c_detect_address(struct i2c_client *temp_client, struct i2c_client *client; /* Detection succeeded, instantiate the device */ + if (adapter->class & I2C_CLASS_DEPRECATED) + dev_warn(&adapter->dev, + "This adapter will soon drop class based instantiation of devices. " + "Please make sure client 0x%02x gets instantiated by other means. " + "Check 'Documentation/i2c/instantiating-devices' for details.\n", + info.addr); + dev_dbg(&adapter->dev, "Creating %s at 0x%02x\n", info.type, info.addr); client = i2c_new_device(adapter, &info); diff --git a/include/linux/i2c.h b/include/linux/i2c.h index deddeb8c337c..b556e0ab946f 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -487,6 +487,7 @@ void i2c_unlock_adapter(struct i2c_adapter *); #define I2C_CLASS_HWMON (1<<0) /* lm_sensors, ... */ #define I2C_CLASS_DDC (1<<3) /* DDC bus on graphics adapters */ #define I2C_CLASS_SPD (1<<7) /* Memory modules */ +#define I2C_CLASS_DEPRECATED (1<<8) /* Warn users that adapter will stop using classes */ /* Internal numbers to terminate lists */ #define I2C_CLIENT_END 0xfffeU -- cgit v1.2.3-59-g8ed1b From 7cbccb55f04bef306bc2840185ec8f986bd0df3c Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sun, 16 Feb 2014 14:21:22 +0100 Subject: dma: Remove comment about embedding dma_slave_config into custom structs The documentation for the dma_slave_config struct recommends that if a DMA controller has special configuration options, which can not be configured through the dma_slave_config struct, the driver should create its own custom config struct and embed the dma_slave_config struct in it and pass the custom config struct to dmaengine_slave_config(). This overloads the generic dmaengine_slave_config() API with custom semantics and any caller of the dmaengine_slave_config() that is not aware of these special semantics will cause undefined behavior. This means that it is impossible for generic code to make use of dmaengine_slave_config(). Such a restriction contradicts the very idea of having a generic API. E.g. consider the following case of a DMA controller that has an option to reverse the field polarity of the DMA transfer with the following implementation for setting the configuration: struct my_slave_config { struct dma_slave_config config; unsigned int field_polarity; }; static int my_dma_controller_slave_config(struct dma_chan *chan, struct dma_slave_config *config) { struct my_slave_config *my_cfg = container_of(config, struct my_slave_config, config); ... my_dma_set_field_polarity(chan, my_cfg->field_polarity); ... } Now a generic user of the dmaengine API might want to configure a DMA channel for this DMA controller that it obtained using the following code: struct dma_slave_config config; config.src_addr = ...; ... dmaengine_slave_config(chan, &config); The call to dmaengine_slave_config() will eventually call into my_dma_controller_slave_config() which will cast from dma_slave_config to my_slave_config and then tries to access the field_polarity member. Since the dma_slave_config struct that was passed in was never embedded into a my_slave_config struct this attempt will just read random stack garbage and use that to configure the DMA controller. This is bad. Instead, if a DMA controller really needs to have custom configuration options, the driver should create a custom API for it. This makes it very clear that there is a direct dependency of a user of such an API and the implementer. E.g.: int my_dma_set_field_polarity(struct dma_chan *chan, unsigned int field_polarity) { if (chan->device->dev->driver != &my_dma_controller_driver.driver) return -EINVAL; ... } EXPORT_SYMBOL_GPL(my_dma_set_field_polarity); Signed-off-by: Lars-Peter Clausen Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index c5c92d59e531..8300fb87b84a 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -341,15 +341,11 @@ enum dma_slave_buswidth { * and this struct will then be passed in as an argument to the * DMA engine device_control() function. * - * The rationale for adding configuration information to this struct - * is as follows: if it is likely that most DMA slave controllers in - * the world will support the configuration option, then make it - * generic. If not: if it is fixed so that it be sent in static from - * the platform data, then prefer to do that. Else, if it is neither - * fixed at runtime, nor generic enough (such as bus mastership on - * some CPU family and whatnot) then create a custom slave config - * struct and pass that, then make this config a member of that - * struct, if applicable. + * The rationale for adding configuration information to this struct is as + * follows: if it is likely that more than one DMA slave controllers in + * the world will support the configuration option, then make it generic. + * If not: if it is fixed so that it be sent in static from the platform + * data, then prefer to do that. */ struct dma_slave_config { enum dma_transfer_direction direction; -- cgit v1.2.3-59-g8ed1b From a90902531a06a030a252a07fbff7f45a189a64fe Mon Sep 17 00:00:00 2001 From: William Roberts Date: Tue, 11 Feb 2014 10:11:59 -0800 Subject: mm: Create utility function for accessing a tasks commandline value introduce get_cmdline() for retreiving the value of a processes proc/self/cmdline value. Acked-by: David Rientjes Acked-by: Stephen Smalley Acked-by: Richard Guy Briggs Signed-off-by: William Roberts Signed-off-by: Eric Paris --- include/linux/mm.h | 1 + mm/util.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 35527173cf50..01e797031993 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1134,6 +1134,7 @@ void account_page_writeback(struct page *page); int set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); int clear_page_dirty_for_io(struct page *page); +int get_cmdline(struct task_struct *task, char *buffer, int buflen); /* Is the vma a continuation of the stack vma above it? */ static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr) diff --git a/mm/util.c b/mm/util.c index 808f375648e7..43b44199c5e3 100644 --- a/mm/util.c +++ b/mm/util.c @@ -413,6 +413,54 @@ unsigned long vm_commit_limit(void) * sysctl_overcommit_ratio / 100) + total_swap_pages; } +/** + * get_cmdline() - copy the cmdline value to a buffer. + * @task: the task whose cmdline value to copy. + * @buffer: the buffer to copy to. + * @buflen: the length of the buffer. Larger cmdline values are truncated + * to this length. + * Returns the size of the cmdline field copied. Note that the copy does + * not guarantee an ending NULL byte. + */ +int get_cmdline(struct task_struct *task, char *buffer, int buflen) +{ + int res = 0; + unsigned int len; + struct mm_struct *mm = get_task_mm(task); + if (!mm) + goto out; + if (!mm->arg_end) + goto out_mm; /* Shh! No looking before we're done */ + + len = mm->arg_end - mm->arg_start; + + if (len > buflen) + len = buflen; + + res = access_process_vm(task, mm->arg_start, buffer, len, 0); + + /* + * If the nul at the end of args has been overwritten, then + * assume application is using setproctitle(3). + */ + if (res > 0 && buffer[res-1] != '\0' && len < buflen) { + len = strnlen(buffer, res); + if (len < res) { + res = len; + } else { + len = mm->env_end - mm->env_start; + if (len > buflen - res) + len = buflen - res; + res += access_process_vm(task, mm->env_start, + buffer+res, len, 0); + res = strnlen(buffer, res); + } + } +out_mm: + mmput(mm); +out: + return res; +} /* Tracepoints definitions. */ EXPORT_TRACEPOINT_SYMBOL(kmalloc); -- cgit v1.2.3-59-g8ed1b From 45126da22452ac3d4685401a1e921a39ac0ff2f6 Mon Sep 17 00:00:00 2001 From: Sonic Zhang Date: Tue, 28 Jan 2014 16:55:21 +0800 Subject: i2c: bfin-twi: move bits macros and structs in header from arch include to generic include The ADI TWI peripheral is not binding to the Blackfin processor only. The bits macros and structs should be put in the generic include header. And update head file path in drivers accordingly. Signed-off-by: Sonic Zhang Signed-off-by: Wolfram Sang --- arch/blackfin/include/asm/bfin_twi.h | 126 +----------------------------- arch/blackfin/kernel/debug-mmrs.c | 1 + drivers/i2c/busses/i2c-bfin-twi.c | 4 +- include/linux/i2c/bfin_twi.h | 145 +++++++++++++++++++++++++++++++++++ 4 files changed, 149 insertions(+), 127 deletions(-) create mode 100644 include/linux/i2c/bfin_twi.h (limited to 'include/linux') diff --git a/arch/blackfin/include/asm/bfin_twi.h b/arch/blackfin/include/asm/bfin_twi.h index 90c3c006557d..34cc395f8b77 100644 --- a/arch/blackfin/include/asm/bfin_twi.h +++ b/arch/blackfin/include/asm/bfin_twi.h @@ -9,60 +9,7 @@ #ifndef __ASM_BFIN_TWI_H__ #define __ASM_BFIN_TWI_H__ -#include -#include - -/* - * All Blackfin system MMRs are padded to 32bits even if the register - * itself is only 16bits. So use a helper macro to streamline this. - */ -#define __BFP(m) u16 m; u16 __pad_##m - -/* - * bfin twi registers layout - */ -struct bfin_twi_regs { - __BFP(clkdiv); - __BFP(control); - __BFP(slave_ctl); - __BFP(slave_stat); - __BFP(slave_addr); - __BFP(master_ctl); - __BFP(master_stat); - __BFP(master_addr); - __BFP(int_stat); - __BFP(int_mask); - __BFP(fifo_ctl); - __BFP(fifo_stat); - u32 __pad[20]; - __BFP(xmt_data8); - __BFP(xmt_data16); - __BFP(rcv_data8); - __BFP(rcv_data16); -}; - -#undef __BFP - -struct bfin_twi_iface { - int irq; - spinlock_t lock; - char read_write; - u8 command; - u8 *transPtr; - int readNum; - int writeNum; - int cur_mode; - int manual_stop; - int result; - struct i2c_adapter adap; - struct completion complete; - struct i2c_msg *pmsg; - int msg_num; - int cur_msg; - u16 saved_clkdiv; - u16 saved_control; - struct bfin_twi_regs __iomem *regs_base; -}; +#include #define DEFINE_TWI_REG(reg_name, reg) \ static inline u16 read_##reg_name(struct bfin_twi_iface *iface) \ @@ -113,75 +60,4 @@ static inline u16 read_RCV_DATA16(struct bfin_twi_iface *iface) } #endif - -/* ******************** TWO-WIRE INTERFACE (TWI) MASKS ***********************/ -/* TWI_CLKDIV Macros (Use: *pTWI_CLKDIV = CLKLOW(x)|CLKHI(y); ) */ -#define CLKLOW(x) ((x) & 0xFF) /* Periods Clock Is Held Low */ -#define CLKHI(y) (((y)&0xFF)<<0x8) /* Periods Before New Clock Low */ - -/* TWI_PRESCALE Masks */ -#define PRESCALE 0x007F /* SCLKs Per Internal Time Reference (10MHz) */ -#define TWI_ENA 0x0080 /* TWI Enable */ -#define SCCB 0x0200 /* SCCB Compatibility Enable */ - -/* TWI_SLAVE_CTL Masks */ -#define SEN 0x0001 /* Slave Enable */ -#define SADD_LEN 0x0002 /* Slave Address Length */ -#define STDVAL 0x0004 /* Slave Transmit Data Valid */ -#define NAK 0x0008 /* NAK/ACK* Generated At Conclusion Of Transfer */ -#define GEN 0x0010 /* General Call Address Matching Enabled */ - -/* TWI_SLAVE_STAT Masks */ -#define SDIR 0x0001 /* Slave Transfer Direction (Transmit/Receive*) */ -#define GCALL 0x0002 /* General Call Indicator */ - -/* TWI_MASTER_CTL Masks */ -#define MEN 0x0001 /* Master Mode Enable */ -#define MADD_LEN 0x0002 /* Master Address Length */ -#define MDIR 0x0004 /* Master Transmit Direction (RX/TX*) */ -#define FAST 0x0008 /* Use Fast Mode Timing Specs */ -#define STOP 0x0010 /* Issue Stop Condition */ -#define RSTART 0x0020 /* Repeat Start or Stop* At End Of Transfer */ -#define DCNT 0x3FC0 /* Data Bytes To Transfer */ -#define SDAOVR 0x4000 /* Serial Data Override */ -#define SCLOVR 0x8000 /* Serial Clock Override */ - -/* TWI_MASTER_STAT Masks */ -#define MPROG 0x0001 /* Master Transfer In Progress */ -#define LOSTARB 0x0002 /* Lost Arbitration Indicator (Xfer Aborted) */ -#define ANAK 0x0004 /* Address Not Acknowledged */ -#define DNAK 0x0008 /* Data Not Acknowledged */ -#define BUFRDERR 0x0010 /* Buffer Read Error */ -#define BUFWRERR 0x0020 /* Buffer Write Error */ -#define SDASEN 0x0040 /* Serial Data Sense */ -#define SCLSEN 0x0080 /* Serial Clock Sense */ -#define BUSBUSY 0x0100 /* Bus Busy Indicator */ - -/* TWI_INT_SRC and TWI_INT_ENABLE Masks */ -#define SINIT 0x0001 /* Slave Transfer Initiated */ -#define SCOMP 0x0002 /* Slave Transfer Complete */ -#define SERR 0x0004 /* Slave Transfer Error */ -#define SOVF 0x0008 /* Slave Overflow */ -#define MCOMP 0x0010 /* Master Transfer Complete */ -#define MERR 0x0020 /* Master Transfer Error */ -#define XMTSERV 0x0040 /* Transmit FIFO Service */ -#define RCVSERV 0x0080 /* Receive FIFO Service */ - -/* TWI_FIFO_CTRL Masks */ -#define XMTFLUSH 0x0001 /* Transmit Buffer Flush */ -#define RCVFLUSH 0x0002 /* Receive Buffer Flush */ -#define XMTINTLEN 0x0004 /* Transmit Buffer Interrupt Length */ -#define RCVINTLEN 0x0008 /* Receive Buffer Interrupt Length */ - -/* TWI_FIFO_STAT Masks */ -#define XMTSTAT 0x0003 /* Transmit FIFO Status */ -#define XMT_EMPTY 0x0000 /* Transmit FIFO Empty */ -#define XMT_HALF 0x0001 /* Transmit FIFO Has 1 Byte To Write */ -#define XMT_FULL 0x0003 /* Transmit FIFO Full (2 Bytes To Write) */ - -#define RCVSTAT 0x000C /* Receive FIFO Status */ -#define RCV_EMPTY 0x0000 /* Receive FIFO Empty */ -#define RCV_HALF 0x0004 /* Receive FIFO Has 1 Byte To Read */ -#define RCV_FULL 0x000C /* Receive FIFO Full (2 Bytes To Read) */ - #endif diff --git a/arch/blackfin/kernel/debug-mmrs.c b/arch/blackfin/kernel/debug-mmrs.c index 01232a13470d..947ad0832338 100644 --- a/arch/blackfin/kernel/debug-mmrs.c +++ b/arch/blackfin/kernel/debug-mmrs.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/drivers/i2c/busses/i2c-bfin-twi.c b/drivers/i2c/busses/i2c-bfin-twi.c index c75f0e943b32..c8976a3e03df 100644 --- a/drivers/i2c/busses/i2c-bfin-twi.c +++ b/drivers/i2c/busses/i2c-bfin-twi.c @@ -21,10 +21,10 @@ #include #include #include +#include -#include -#include #include +#include #include /* SMBus mode*/ diff --git a/include/linux/i2c/bfin_twi.h b/include/linux/i2c/bfin_twi.h new file mode 100644 index 000000000000..135a4e0876ae --- /dev/null +++ b/include/linux/i2c/bfin_twi.h @@ -0,0 +1,145 @@ +/* + * i2c-bfin-twi.h - interface to ADI TWI controller + * + * Copyright 2005-2014 Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#ifndef __I2C_BFIN_TWI_H__ +#define __I2C_BFIN_TWI_H__ + +#include +#include + +/* + * ADI twi registers layout + */ +struct bfin_twi_regs { + u16 clkdiv; + u16 dummy1; + u16 control; + u16 dummy2; + u16 slave_ctl; + u16 dummy3; + u16 slave_stat; + u16 dummy4; + u16 slave_addr; + u16 dummy5; + u16 master_ctl; + u16 dummy6; + u16 master_stat; + u16 dummy7; + u16 master_addr; + u16 dummy8; + u16 int_stat; + u16 dummy9; + u16 int_mask; + u16 dummy10; + u16 fifo_ctl; + u16 dummy11; + u16 fifo_stat; + u16 dummy12; + u32 __pad[20]; + u16 xmt_data8; + u16 dummy13; + u16 xmt_data16; + u16 dummy14; + u16 rcv_data8; + u16 dummy15; + u16 rcv_data16; + u16 dummy16; +}; + +struct bfin_twi_iface { + int irq; + spinlock_t lock; + char read_write; + u8 command; + u8 *transPtr; + int readNum; + int writeNum; + int cur_mode; + int manual_stop; + int result; + struct i2c_adapter adap; + struct completion complete; + struct i2c_msg *pmsg; + int msg_num; + int cur_msg; + u16 saved_clkdiv; + u16 saved_control; + struct bfin_twi_regs __iomem *regs_base; +}; + +/* ******************** TWO-WIRE INTERFACE (TWI) MASKS ********************/ +/* TWI_CLKDIV Macros (Use: *pTWI_CLKDIV = CLKLOW(x)|CLKHI(y); ) */ +#define CLKLOW(x) ((x) & 0xFF) /* Periods Clock Is Held Low */ +#define CLKHI(y) (((y)&0xFF)<<0x8) /* Periods Before New Clock Low */ + +/* TWI_PRESCALE Masks */ +#define PRESCALE 0x007F /* SCLKs Per Internal Time Reference (10MHz) */ +#define TWI_ENA 0x0080 /* TWI Enable */ +#define SCCB 0x0200 /* SCCB Compatibility Enable */ + +/* TWI_SLAVE_CTL Masks */ +#define SEN 0x0001 /* Slave Enable */ +#define SADD_LEN 0x0002 /* Slave Address Length */ +#define STDVAL 0x0004 /* Slave Transmit Data Valid */ +#define NAK 0x0008 /* NAK Generated At Conclusion Of Transfer */ +#define GEN 0x0010 /* General Call Address Matching Enabled */ + +/* TWI_SLAVE_STAT Masks */ +#define SDIR 0x0001 /* Slave Transfer Direction (RX/TX*) */ +#define GCALL 0x0002 /* General Call Indicator */ + +/* TWI_MASTER_CTL Masks */ +#define MEN 0x0001 /* Master Mode Enable */ +#define MADD_LEN 0x0002 /* Master Address Length */ +#define MDIR 0x0004 /* Master Transmit Direction (RX/TX*) */ +#define FAST 0x0008 /* Use Fast Mode Timing Specs */ +#define STOP 0x0010 /* Issue Stop Condition */ +#define RSTART 0x0020 /* Repeat Start or Stop* At End Of Transfer */ +#define DCNT 0x3FC0 /* Data Bytes To Transfer */ +#define SDAOVR 0x4000 /* Serial Data Override */ +#define SCLOVR 0x8000 /* Serial Clock Override */ + +/* TWI_MASTER_STAT Masks */ +#define MPROG 0x0001 /* Master Transfer In Progress */ +#define LOSTARB 0x0002 /* Lost Arbitration Indicator (Xfer Aborted) */ +#define ANAK 0x0004 /* Address Not Acknowledged */ +#define DNAK 0x0008 /* Data Not Acknowledged */ +#define BUFRDERR 0x0010 /* Buffer Read Error */ +#define BUFWRERR 0x0020 /* Buffer Write Error */ +#define SDASEN 0x0040 /* Serial Data Sense */ +#define SCLSEN 0x0080 /* Serial Clock Sense */ +#define BUSBUSY 0x0100 /* Bus Busy Indicator */ + +/* TWI_INT_SRC and TWI_INT_ENABLE Masks */ +#define SINIT 0x0001 /* Slave Transfer Initiated */ +#define SCOMP 0x0002 /* Slave Transfer Complete */ +#define SERR 0x0004 /* Slave Transfer Error */ +#define SOVF 0x0008 /* Slave Overflow */ +#define MCOMP 0x0010 /* Master Transfer Complete */ +#define MERR 0x0020 /* Master Transfer Error */ +#define XMTSERV 0x0040 /* Transmit FIFO Service */ +#define RCVSERV 0x0080 /* Receive FIFO Service */ + +/* TWI_FIFO_CTRL Masks */ +#define XMTFLUSH 0x0001 /* Transmit Buffer Flush */ +#define RCVFLUSH 0x0002 /* Receive Buffer Flush */ +#define XMTINTLEN 0x0004 /* Transmit Buffer Interrupt Length */ +#define RCVINTLEN 0x0008 /* Receive Buffer Interrupt Length */ + +/* TWI_FIFO_STAT Masks */ +#define XMTSTAT 0x0003 /* Transmit FIFO Status */ +#define XMT_EMPTY 0x0000 /* Transmit FIFO Empty */ +#define XMT_HALF 0x0001 /* Transmit FIFO Has 1 Byte To Write */ +#define XMT_FULL 0x0003 /* Transmit FIFO Full (2 Bytes To Write) */ + +#define RCVSTAT 0x000C /* Receive FIFO Status */ +#define RCV_EMPTY 0x0000 /* Receive FIFO Empty */ +#define RCV_HALF 0x0004 /* Receive FIFO Has 1 Byte To Read */ +#define RCV_FULL 0x000C /* Receive FIFO Full (2 Bytes To Read) */ + +#endif -- cgit v1.2.3-59-g8ed1b From cd5006db1b6b9128d1f5b0f1ad17cbced9d3841c Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Fri, 14 Feb 2014 12:01:30 +0530 Subject: i2c: s3c2410: Trivial cleanup in header file Commit 436d42c61c3e ("ARM: samsung: move platform_data definitions") moved the files to the current location but forgot to remove the pointer to its previous location. Clean it up. While at it also change the header file protection macros appropriately. Signed-off-by: Sachin Kamat Signed-off-by: Wolfram Sang --- include/linux/platform_data/i2c-s3c2410.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/i2c-s3c2410.h b/include/linux/platform_data/i2c-s3c2410.h index 2a50048c1c44..05af66b840b9 100644 --- a/include/linux/platform_data/i2c-s3c2410.h +++ b/include/linux/platform_data/i2c-s3c2410.h @@ -1,5 +1,4 @@ -/* arch/arm/plat-s3c/include/plat/iic.h - * +/* * Copyright 2004-2009 Simtec Electronics * Ben Dooks * @@ -10,8 +9,8 @@ * published by the Free Software Foundation. */ -#ifndef __ASM_ARCH_IIC_H -#define __ASM_ARCH_IIC_H __FILE__ +#ifndef __I2C_S3C2410_H +#define __I2C_S3C2410_H __FILE__ #define S3C_IICFLG_FILTER (1<<0) /* enable s3c2440 filter */ @@ -76,4 +75,4 @@ extern void s3c_i2c7_cfg_gpio(struct platform_device *dev); extern struct s3c2410_platform_i2c default_i2c_data; -#endif /* __ASM_ARCH_IIC_H */ +#endif /* __I2C_S3C2410_H */ -- cgit v1.2.3-59-g8ed1b From f02ea4e6a47d50a38f5baadbe87f5087dd337db0 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Mon, 13 Jan 2014 14:27:12 +0800 Subject: mtd: nand: kill the the NAND_MAX_PAGESIZE/NAND_MAX_OOBSIZE for nand_buffers{} The patch converts the arrays to buffer pointers for nand_buffers{}. The cafe_nand.c is the only NAND_OWN_BUFFERS user which allocates nand_buffers{} itself. This patch disables the DMA for nand_scan_ident, and restores the DMA status after we finish the nand_scan_ident. This way, we can get page size and OOB size and use them to allocate cafe->dmabuf. Since the cafe_nand.c uses the NAND_ECC_HW_SYNDROME ECC mode, we do not allocate the buffers for @ecccalc and @ecccode. Signed-off-by: Huang Shijie Signed-off-by: Brian Norris --- drivers/mtd/nand/cafe_nand.c | 68 ++++++++++++++++++++++++++++++-------------- drivers/mtd/nand/nand_base.c | 19 ++++++++++--- include/linux/mtd/nand.h | 12 ++++---- 3 files changed, 67 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c index f2f64addb5e8..4e66726da9aa 100644 --- a/drivers/mtd/nand/cafe_nand.c +++ b/drivers/mtd/nand/cafe_nand.c @@ -627,6 +627,8 @@ static int cafe_nand_probe(struct pci_dev *pdev, struct cafe_priv *cafe; uint32_t ctrl; int err = 0; + int old_dma; + struct nand_buffers *nbuf; /* Very old versions shared the same PCI ident for all three functions on the chip. Verify the class too... */ @@ -655,13 +657,6 @@ static int cafe_nand_probe(struct pci_dev *pdev, err = -ENOMEM; goto out_free_mtd; } - cafe->dmabuf = dma_alloc_coherent(&cafe->pdev->dev, 2112 + sizeof(struct nand_buffers), - &cafe->dmaaddr, GFP_KERNEL); - if (!cafe->dmabuf) { - err = -ENOMEM; - goto out_ior; - } - cafe->nand.buffers = (void *)cafe->dmabuf + 2112; cafe->rs = init_rs_non_canonical(12, &cafe_mul, 0, 1, 8); if (!cafe->rs) { @@ -721,7 +716,7 @@ static int cafe_nand_probe(struct pci_dev *pdev, "CAFE NAND", mtd); if (err) { dev_warn(&pdev->dev, "Could not register IRQ %d\n", pdev->irq); - goto out_free_dma; + goto out_ior; } /* Disable master reset, enable NAND clock */ @@ -735,6 +730,32 @@ static int cafe_nand_probe(struct pci_dev *pdev, cafe_writel(cafe, 0x7006, GLOBAL_CTRL); cafe_writel(cafe, 0x700a, GLOBAL_CTRL); + /* Enable NAND IRQ in global IRQ mask register */ + cafe_writel(cafe, 0x80000007, GLOBAL_IRQ_MASK); + cafe_dev_dbg(&cafe->pdev->dev, "Control %x, IRQ mask %x\n", + cafe_readl(cafe, GLOBAL_CTRL), + cafe_readl(cafe, GLOBAL_IRQ_MASK)); + + /* Do not use the DMA for the nand_scan_ident() */ + old_dma = usedma; + usedma = 0; + + /* Scan to find existence of the device */ + if (nand_scan_ident(mtd, 2, NULL)) { + err = -ENXIO; + goto out_irq; + } + + cafe->dmabuf = dma_alloc_coherent(&cafe->pdev->dev, + 2112 + sizeof(struct nand_buffers) + + mtd->writesize + mtd->oobsize, + &cafe->dmaaddr, GFP_KERNEL); + if (!cafe->dmabuf) { + err = -ENOMEM; + goto out_irq; + } + cafe->nand.buffers = nbuf = (void *)cafe->dmabuf + 2112; + /* Set up DMA address */ cafe_writel(cafe, cafe->dmaaddr & 0xffffffff, NAND_DMA_ADDR0); if (sizeof(cafe->dmaaddr) > 4) @@ -746,16 +767,13 @@ static int cafe_nand_probe(struct pci_dev *pdev, cafe_dev_dbg(&cafe->pdev->dev, "Set DMA address to %x (virt %p)\n", cafe_readl(cafe, NAND_DMA_ADDR0), cafe->dmabuf); - /* Enable NAND IRQ in global IRQ mask register */ - cafe_writel(cafe, 0x80000007, GLOBAL_IRQ_MASK); - cafe_dev_dbg(&cafe->pdev->dev, "Control %x, IRQ mask %x\n", - cafe_readl(cafe, GLOBAL_CTRL), cafe_readl(cafe, GLOBAL_IRQ_MASK)); + /* this driver does not need the @ecccalc and @ecccode */ + nbuf->ecccalc = NULL; + nbuf->ecccode = NULL; + nbuf->databuf = (uint8_t *)(nbuf + 1); - /* Scan to find existence of the device */ - if (nand_scan_ident(mtd, 2, NULL)) { - err = -ENXIO; - goto out_irq; - } + /* Restore the DMA flag */ + usedma = old_dma; cafe->ctl2 = 1<<27; /* Reed-Solomon ECC */ if (mtd->writesize == 2048) @@ -773,7 +791,7 @@ static int cafe_nand_probe(struct pci_dev *pdev, } else { printk(KERN_WARNING "Unexpected NAND flash writesize %d. Aborting\n", mtd->writesize); - goto out_irq; + goto out_free_dma; } cafe->nand.ecc.mode = NAND_ECC_HW_SYNDROME; cafe->nand.ecc.size = mtd->writesize; @@ -790,7 +808,7 @@ static int cafe_nand_probe(struct pci_dev *pdev, err = nand_scan_tail(mtd); if (err) - goto out_irq; + goto out_free_dma; pci_set_drvdata(pdev, mtd); @@ -799,12 +817,15 @@ static int cafe_nand_probe(struct pci_dev *pdev, goto out; + out_free_dma: + dma_free_coherent(&cafe->pdev->dev, + 2112 + sizeof(struct nand_buffers) + + mtd->writesize + mtd->oobsize, + cafe->dmabuf, cafe->dmaaddr); out_irq: /* Disable NAND IRQ in global IRQ mask register */ cafe_writel(cafe, ~1 & cafe_readl(cafe, GLOBAL_IRQ_MASK), GLOBAL_IRQ_MASK); free_irq(pdev->irq, mtd); - out_free_dma: - dma_free_coherent(&cafe->pdev->dev, 2112, cafe->dmabuf, cafe->dmaaddr); out_ior: pci_iounmap(pdev, cafe->mmio); out_free_mtd: @@ -824,7 +845,10 @@ static void cafe_nand_remove(struct pci_dev *pdev) nand_release(mtd); free_rs(cafe->rs); pci_iounmap(pdev, cafe->mmio); - dma_free_coherent(&cafe->pdev->dev, 2112, cafe->dmabuf, cafe->dmaaddr); + dma_free_coherent(&cafe->pdev->dev, + 2112 + sizeof(struct nand_buffers) + + mtd->writesize + mtd->oobsize, + cafe->dmabuf, cafe->dmaaddr); kfree(mtd); } diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 9715a7ba164a..deeaa1cc4a85 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -3696,15 +3696,26 @@ int nand_scan_tail(struct mtd_info *mtd) int i; struct nand_chip *chip = mtd->priv; struct nand_ecc_ctrl *ecc = &chip->ecc; + struct nand_buffers *nbuf; /* New bad blocks should be marked in OOB, flash-based BBT, or both */ BUG_ON((chip->bbt_options & NAND_BBT_NO_OOB_BBM) && !(chip->bbt_options & NAND_BBT_USE_FLASH)); - if (!(chip->options & NAND_OWN_BUFFERS)) - chip->buffers = kmalloc(sizeof(*chip->buffers), GFP_KERNEL); - if (!chip->buffers) - return -ENOMEM; + if (!(chip->options & NAND_OWN_BUFFERS)) { + nbuf = kzalloc(sizeof(*nbuf) + mtd->writesize + + mtd->oobsize * 3, GFP_KERNEL); + if (!nbuf) + return -ENOMEM; + nbuf->ecccalc = (uint8_t *)(nbuf + 1); + nbuf->ecccode = nbuf->ecccalc + mtd->oobsize; + nbuf->databuf = nbuf->ecccode + mtd->oobsize; + + chip->buffers = nbuf; + } else { + if (!chip->buffers) + return -ENOMEM; + } /* Set the internal oob buffer location, just after the page data */ chip->oob_poi = chip->buffers->databuf + mtd->writesize; diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 32f8612469d8..520ebca11f5d 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -435,17 +435,17 @@ struct nand_ecc_ctrl { /** * struct nand_buffers - buffer structure for read/write - * @ecccalc: buffer for calculated ECC - * @ecccode: buffer for ECC read from flash - * @databuf: buffer for data - dynamically sized + * @ecccalc: buffer pointer for calculated ECC, size is oobsize. + * @ecccode: buffer pointer for ECC read from flash, size is oobsize. + * @databuf: buffer pointer for data, size is (page size + oobsize). * * Do not change the order of buffers. databuf and oobrbuf must be in * consecutive order. */ struct nand_buffers { - uint8_t ecccalc[NAND_MAX_OOBSIZE]; - uint8_t ecccode[NAND_MAX_OOBSIZE]; - uint8_t databuf[NAND_MAX_PAGESIZE + NAND_MAX_OOBSIZE]; + uint8_t *ecccalc; + uint8_t *ecccode; + uint8_t *databuf; }; /** -- cgit v1.2.3-59-g8ed1b From 3f172cbdfbb799e35cc972fc9f7e43d0e971577b Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Sat, 21 Dec 2013 00:02:30 +0800 Subject: mtd: nand: remove the NAND_MAX_PAGESIZE/NAND_MAX_OOBSIZE There is no reference to these two macros now. Just remove them. Signed-off-by: Huang Shijie Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 520ebca11f5d..a719686c9cce 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -51,14 +51,6 @@ extern int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len); /* The maximum number of NAND chips in an array */ #define NAND_MAX_CHIPS 8 -/* - * This constant declares the max. oobsize / page, which - * is supported now. If you add a chip with bigger oobsize/page - * adjust this accordingly. - */ -#define NAND_MAX_OOBSIZE 744 -#define NAND_MAX_PAGESIZE 8192 - /* * Constants for hardware specific CLE/ALE/NCE function * -- cgit v1.2.3-59-g8ed1b From 3dad2344e92c6e1aeae42df1c4824f307c51bcc7 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 29 Jan 2014 14:08:12 -0800 Subject: mtd: nand: force NAND_CMD_READID onto 8-bit bus The NAND command helpers tend to automatically shift the column address for x16 bus devices, since most commands expect a word address, not a byte address. The Read ID command, however, expects an 8-bit address (i.e., 0x00, 0x20, or 0x40 should not be translated to 0x00, 0x10, or 0x20). This fixes the column address for a few drivers which imitate the nand_base defaults. Note that I don't touch sh_flctl.c, since it already handles this problem slightly differently (note its comment "READID is always performed using an 8-bit bus"). I have not tested this patch, as I only have x8 parts up for testing at this point. Hopefully that can change soon... Signed-off-by: Brian Norris Tested-by: Ezequiel Garcia Tested-By: Pekon Gupta --- drivers/mtd/nand/atmel_nand.c | 11 ++++++----- drivers/mtd/nand/au1550nd.c | 3 ++- drivers/mtd/nand/diskonchip.c | 3 ++- drivers/mtd/nand/nand_base.c | 6 ++++-- drivers/mtd/nand/nuc900_nand.c | 3 ++- include/linux/mtd/nand.h | 10 ++++++++++ 6 files changed, 26 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c index c36e9b84487c..1955389c8fa6 100644 --- a/drivers/mtd/nand/atmel_nand.c +++ b/drivers/mtd/nand/atmel_nand.c @@ -1659,8 +1659,8 @@ static void nfc_select_chip(struct mtd_info *mtd, int chip) nfc_writel(host->nfc->hsmc_regs, CTRL, NFC_CTRL_ENABLE); } -static int nfc_make_addr(struct mtd_info *mtd, int column, int page_addr, - unsigned int *addr1234, unsigned int *cycle0) +static int nfc_make_addr(struct mtd_info *mtd, int command, int column, + int page_addr, unsigned int *addr1234, unsigned int *cycle0) { struct nand_chip *chip = mtd->priv; @@ -1674,7 +1674,8 @@ static int nfc_make_addr(struct mtd_info *mtd, int column, int page_addr, *addr1234 = 0; if (column != -1) { - if (chip->options & NAND_BUSWIDTH_16) + if (chip->options & NAND_BUSWIDTH_16 && + !nand_opcode_8bits(command)) column >>= 1; addr_bytes[acycle++] = column & 0xff; if (mtd->writesize > 512) @@ -1787,8 +1788,8 @@ static void nfc_nand_command(struct mtd_info *mtd, unsigned int command, } if (do_addr) - acycle = nfc_make_addr(mtd, column, page_addr, &addr1234, - &cycle0); + acycle = nfc_make_addr(mtd, command, column, page_addr, + &addr1234, &cycle0); nfc_addr_cmd = cmd1 | cmd2 | vcmd2 | acycle | csid | dataen | nfcwr; nfc_send_command(host, nfc_addr_cmd, addr1234, cycle0); diff --git a/drivers/mtd/nand/au1550nd.c b/drivers/mtd/nand/au1550nd.c index 7d84c4e4bf43..bc5c518828d2 100644 --- a/drivers/mtd/nand/au1550nd.c +++ b/drivers/mtd/nand/au1550nd.c @@ -307,7 +307,8 @@ static void au1550_command(struct mtd_info *mtd, unsigned command, int column, i /* Serially input address */ if (column != -1) { /* Adjust columns for 16 bit buswidth */ - if (this->options & NAND_BUSWIDTH_16) + if (this->options & NAND_BUSWIDTH_16 && + !nand_opcode_8bits(command)) column >>= 1; ctx->write_byte(mtd, column); } diff --git a/drivers/mtd/nand/diskonchip.c b/drivers/mtd/nand/diskonchip.c index fec31d71b84e..b9b4db607850 100644 --- a/drivers/mtd/nand/diskonchip.c +++ b/drivers/mtd/nand/diskonchip.c @@ -698,7 +698,8 @@ static void doc2001plus_command(struct mtd_info *mtd, unsigned command, int colu /* Serially input address */ if (column != -1) { /* Adjust columns for 16 bit buswidth */ - if (this->options & NAND_BUSWIDTH_16) + if (this->options & NAND_BUSWIDTH_16 && + !nand_opcode_8bits(command)) column >>= 1; WriteDOC(column, docptr, Mplus_FlashAddress); } diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index deeaa1cc4a85..6281151e4cb7 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -589,7 +589,8 @@ static void nand_command(struct mtd_info *mtd, unsigned int command, /* Serially input address */ if (column != -1) { /* Adjust columns for 16 bit buswidth */ - if (chip->options & NAND_BUSWIDTH_16) + if (chip->options & NAND_BUSWIDTH_16 && + !nand_opcode_8bits(command)) column >>= 1; chip->cmd_ctrl(mtd, column, ctrl); ctrl &= ~NAND_CTRL_CHANGE; @@ -680,7 +681,8 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command, /* Serially input address */ if (column != -1) { /* Adjust columns for 16 bit buswidth */ - if (chip->options & NAND_BUSWIDTH_16) + if (chip->options & NAND_BUSWIDTH_16 && + !nand_opcode_8bits(command)) column >>= 1; chip->cmd_ctrl(mtd, column, ctrl); ctrl &= ~NAND_CTRL_CHANGE; diff --git a/drivers/mtd/nand/nuc900_nand.c b/drivers/mtd/nand/nuc900_nand.c index 90c99ea5184a..331fccbdde61 100644 --- a/drivers/mtd/nand/nuc900_nand.c +++ b/drivers/mtd/nand/nuc900_nand.c @@ -151,7 +151,8 @@ static void nuc900_nand_command_lp(struct mtd_info *mtd, unsigned int command, if (column != -1 || page_addr != -1) { if (column != -1) { - if (chip->options & NAND_BUSWIDTH_16) + if (chip->options & NAND_BUSWIDTH_16 && + !nand_opcode_8bits(command)) column >>= 1; write_addr_reg(nand, column); write_addr_reg(nand, column >> 8 | ENDADDR); diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index a719686c9cce..c034dc4224cb 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -832,4 +832,14 @@ static inline bool nand_is_slc(struct nand_chip *chip) { return chip->bits_per_cell == 1; } + +/** + * Check if the opcode's address should be sent only on the lower 8 bits + * @command: opcode to check + */ +static inline int nand_opcode_8bits(unsigned int command) +{ + return command == NAND_CMD_READID; +} + #endif /* __LINUX_MTD_NAND_H */ -- cgit v1.2.3-59-g8ed1b From 91e165030deedc612c153dfeaba294d49632ade3 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Fri, 14 Feb 2014 12:16:38 +0530 Subject: mtd: nand: s3c2410: Trivial cleanup in header file Commit 436d42c61c3e ("ARM: samsung: move platform_data definitions") moved the files to the current location but forgot to remove the pointer to its previous location. Clean it up. While at it also add the header file protection macros appropriately. Signed-off-by: Sachin Kamat Signed-off-by: Brian Norris --- include/linux/platform_data/mtd-nand-s3c2410.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/mtd-nand-s3c2410.h b/include/linux/platform_data/mtd-nand-s3c2410.h index b64115fa93a4..36bb92172f47 100644 --- a/include/linux/platform_data/mtd-nand-s3c2410.h +++ b/include/linux/platform_data/mtd-nand-s3c2410.h @@ -1,5 +1,4 @@ -/* arch/arm/mach-s3c2410/include/mach/nand.h - * +/* * Copyright (c) 2004 Simtec Electronics * Ben Dooks * @@ -10,6 +9,9 @@ * published by the Free Software Foundation. */ +#ifndef __MTD_NAND_S3C2410_H +#define __MTD_NAND_S3C2410_H + /** * struct s3c2410_nand_set - define a set of one or more nand chips * @disable_ecc: Entirely disable ECC - Dangerous @@ -65,3 +67,5 @@ struct s3c2410_platform_nand { * it with the s3c_device_nand. This allows @nand to be __initdata. */ extern void s3c_nand_set_platdata(struct s3c2410_platform_nand *nand); + +#endif /*__MTD_NAND_S3C2410_H */ -- cgit v1.2.3-59-g8ed1b From afbfff03d611de22b1ec7127ad56920e02936d5e Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Fri, 21 Feb 2014 13:39:37 +0800 Subject: mtd: nand: add the data structures for JEDEC parameter page Create the nand_jedec_params{} and jedec_ecc_info{} according to the JESD230A (Revision of JESD230, October 2012). Signed-off-by: Huang Shijie Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 75 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index c034dc4224cb..588f8a4a27af 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -342,6 +342,81 @@ struct nand_onfi_vendor_micron { u8 param_revision; } __packed; +struct jedec_ecc_info { + u8 ecc_bits; + u8 codeword_size; + __le16 bb_per_lun; + __le16 block_endurance; + u8 reserved[2]; +} __packed; + +struct nand_jedec_params { + /* rev info and features block */ + /* 'J' 'E' 'S' 'D' */ + u8 sig[4]; + __le16 revision; + __le16 features; + u8 opt_cmd[3]; + __le16 sec_cmd; + u8 num_of_param_pages; + u8 reserved0[18]; + + /* manufacturer information block */ + char manufacturer[12]; + char model[20]; + u8 jedec_id[6]; + u8 reserved1[10]; + + /* memory organization block */ + __le32 byte_per_page; + __le16 spare_bytes_per_page; + u8 reserved2[6]; + __le32 pages_per_block; + __le32 blocks_per_lun; + u8 lun_count; + u8 addr_cycles; + u8 bits_per_cell; + u8 programs_per_page; + u8 multi_plane_addr; + u8 multi_plane_op_attr; + u8 reserved3[38]; + + /* electrical parameter block */ + __le16 async_sdr_speed_grade; + __le16 toggle_ddr_speed_grade; + __le16 sync_ddr_speed_grade; + u8 async_sdr_features; + u8 toggle_ddr_features; + u8 sync_ddr_features; + __le16 t_prog; + __le16 t_bers; + __le16 t_r; + __le16 t_r_multi_plane; + __le16 t_ccs; + __le16 io_pin_capacitance_typ; + __le16 input_pin_capacitance_typ; + __le16 clk_pin_capacitance_typ; + u8 driver_strength_support; + __le16 t_ald; + u8 reserved4[36]; + + /* ECC and endurance block */ + u8 guaranteed_good_blocks; + __le16 guaranteed_block_endurance; + struct jedec_ecc_info ecc_info[4]; + u8 reserved5[29]; + + /* reserved */ + u8 reserved6[148]; + + /* vendor */ + __le16 vendor_rev_num; + u8 reserved7[88]; + + /* CRC for Parameter Page */ + __le16 crc; +} __packed; + /** * struct nand_hw_control - Control structure for hardware controller (e.g ECC generator) shared among independent devices * @lock: protection lock -- cgit v1.2.3-59-g8ed1b From d94abba7605d3c15123eb3b331a1872ef17d29e0 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Fri, 21 Feb 2014 13:39:38 +0800 Subject: mtd: nand: add fields for JEDEC in nand_chip Add the jedec_version field, and add an anonymous union which contains the nand_onfi_params and nand_jedec_params. Signed-off-by: Huang Shijie Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 588f8a4a27af..f9af7564118a 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -590,8 +590,12 @@ struct nand_buffers { * @subpagesize: [INTERN] holds the subpagesize * @onfi_version: [INTERN] holds the chip ONFI version (BCD encoded), * non 0 if ONFI supported. + * @jedec_version: [INTERN] holds the chip JEDEC version (BCD encoded), + * non 0 if JEDEC supported. * @onfi_params: [INTERN] holds the ONFI page parameter when ONFI is * supported, 0 otherwise. + * @jedec_params: [INTERN] holds the JEDEC parameter page when JEDEC is + * supported, 0 otherwise. * @read_retries: [INTERN] the number of read retry modes supported * @onfi_set_features: [REPLACEABLE] set the features for ONFI nand * @onfi_get_features: [REPLACEABLE] get the features for ONFI nand @@ -664,7 +668,11 @@ struct nand_chip { int badblockbits; int onfi_version; - struct nand_onfi_params onfi_params; + int jedec_version; + union { + struct nand_onfi_params onfi_params; + struct nand_jedec_params jedec_params; + }; int read_retries; -- cgit v1.2.3-59-g8ed1b From 7852f8962f0f022b11fc56d63de06226a9f70d88 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Fri, 21 Feb 2014 13:39:39 +0800 Subject: mtd: nand: add a helper to get the supported features for JEDEC Add a helper to get the supported features for JEDEC compliant NAND. Also add a macro JEDEC_FEATURE_16_BIT_BUS. Signed-off-by: Huang Shijie Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index f9af7564118a..afd1cf9b5eaf 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -350,6 +350,9 @@ struct jedec_ecc_info { u8 reserved[2]; } __packed; +/* JEDEC features */ +#define JEDEC_FEATURE_16_BIT_BUS (1 << 0) + struct nand_jedec_params { /* rev info and features block */ /* 'J' 'E' 'S' 'D' */ @@ -925,4 +928,10 @@ static inline int nand_opcode_8bits(unsigned int command) return command == NAND_CMD_READID; } +/* return the supported JEDEC features. */ +static inline int jedec_feature(struct nand_chip *chip) +{ + return chip->jedec_version ? le16_to_cpu(chip->jedec_params.features) + : 0; +} #endif /* __LINUX_MTD_NAND_H */ -- cgit v1.2.3-59-g8ed1b From 913618185e51ee1fcbc193b2f121a9d072405619 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Fri, 21 Feb 2014 13:39:40 +0800 Subject: mtd: nand: parse out the JEDEC compliant NAND This patch adds the parsing code for the JEDEC compliant NAND. Since we need the 0x40 as the column address, this patch also makes the NAND_CMD_PARAM to use the 8-bit address only. Signed-off-by: Huang Shijie Signed-off-by: Brian Norris --- drivers/mtd/nand/nand_base.c | 85 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/mtd/nand.h | 2 +- 2 files changed, 86 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 3cb1cefcd926..30416ecf44ef 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -3162,6 +3162,87 @@ static int nand_flash_detect_onfi(struct mtd_info *mtd, struct nand_chip *chip, return 1; } +/* + * Check if the NAND chip is JEDEC compliant, returns 1 if it is, 0 otherwise. + */ +static int nand_flash_detect_jedec(struct mtd_info *mtd, struct nand_chip *chip, + int *busw) +{ + struct nand_jedec_params *p = &chip->jedec_params; + struct jedec_ecc_info *ecc; + int val; + int i, j; + + /* Try JEDEC for unknown chip or LP */ + chip->cmdfunc(mtd, NAND_CMD_READID, 0x40, -1); + if (chip->read_byte(mtd) != 'J' || chip->read_byte(mtd) != 'E' || + chip->read_byte(mtd) != 'D' || chip->read_byte(mtd) != 'E' || + chip->read_byte(mtd) != 'C') + return 0; + + chip->cmdfunc(mtd, NAND_CMD_PARAM, 0x40, -1); + for (i = 0; i < 3; i++) { + for (j = 0; j < sizeof(*p); j++) + ((uint8_t *)p)[j] = chip->read_byte(mtd); + + if (onfi_crc16(ONFI_CRC_BASE, (uint8_t *)p, 510) == + le16_to_cpu(p->crc)) + break; + } + + if (i == 3) { + pr_err("Could not find valid JEDEC parameter page; aborting\n"); + return 0; + } + + /* Check version */ + val = le16_to_cpu(p->revision); + if (val & (1 << 2)) + chip->jedec_version = 10; + else if (val & (1 << 1)) + chip->jedec_version = 1; /* vendor specific version */ + + if (!chip->jedec_version) { + pr_info("unsupported JEDEC version: %d\n", val); + return 0; + } + + sanitize_string(p->manufacturer, sizeof(p->manufacturer)); + sanitize_string(p->model, sizeof(p->model)); + if (!mtd->name) + mtd->name = p->model; + + mtd->writesize = le32_to_cpu(p->byte_per_page); + + /* Please reference to the comment for nand_flash_detect_onfi. */ + mtd->erasesize = 1 << (fls(le32_to_cpu(p->pages_per_block)) - 1); + mtd->erasesize *= mtd->writesize; + + mtd->oobsize = le16_to_cpu(p->spare_bytes_per_page); + + /* Please reference to the comment for nand_flash_detect_onfi. */ + chip->chipsize = 1 << (fls(le32_to_cpu(p->blocks_per_lun)) - 1); + chip->chipsize *= (uint64_t)mtd->erasesize * p->lun_count; + chip->bits_per_cell = p->bits_per_cell; + + if (jedec_feature(chip) & JEDEC_FEATURE_16_BIT_BUS) + *busw = NAND_BUSWIDTH_16; + else + *busw = 0; + + /* ECC info */ + ecc = &p->ecc_info[0]; + + if (ecc->codeword_size >= 9) { + chip->ecc_strength_ds = ecc->ecc_bits; + chip->ecc_step_ds = 1 << ecc->codeword_size; + } else { + pr_warn("Invalid codeword size\n"); + } + + return 1; +} + /* * nand_id_has_period - Check if an ID string has a given wraparound period * @id_data: the ID string @@ -3527,6 +3608,10 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd, /* Check is chip is ONFI compliant */ if (nand_flash_detect_onfi(mtd, chip, &busw)) goto ident_done; + + /* Check if the chip is JEDEC compliant */ + if (nand_flash_detect_jedec(mtd, chip, &busw)) + goto ident_done; } if (!type->name) diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index afd1cf9b5eaf..aa005e87d161 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -925,7 +925,7 @@ static inline bool nand_is_slc(struct nand_chip *chip) */ static inline int nand_opcode_8bits(unsigned int command) { - return command == NAND_CMD_READID; + return command == NAND_CMD_READID || command == NAND_CMD_PARAM; } /* return the supported JEDEC features. */ -- cgit v1.2.3-59-g8ed1b From 4b78fc42f3e3f07687dc27efc1153d29e360afa1 Mon Sep 17 00:00:00 2001 From: Christian Riesch Date: Tue, 28 Jan 2014 09:29:44 +0100 Subject: mtd: Add a retlen parameter to _get_{fact,user}_prot_info Signed-off-by: Christian Riesch Cc: Artem Bityutskiy Signed-off-by: Brian Norris --- drivers/mtd/chips/cfi_cmdset_0001.c | 31 +++++++++++++------------------ drivers/mtd/devices/mtd_dataflash.c | 7 ++++--- drivers/mtd/mtdchar.c | 11 ++++++----- drivers/mtd/mtdcore.c | 12 ++++++------ drivers/mtd/mtdpart.c | 14 ++++++++------ drivers/mtd/onenand/onenand_base.c | 30 ++++++++++++------------------ include/linux/mtd/mtd.h | 16 ++++++++-------- 7 files changed, 57 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c index 5e74c860e532..e4ec355704a6 100644 --- a/drivers/mtd/chips/cfi_cmdset_0001.c +++ b/drivers/mtd/chips/cfi_cmdset_0001.c @@ -68,10 +68,10 @@ static int cfi_intelext_read_fact_prot_reg (struct mtd_info *, loff_t, size_t, s static int cfi_intelext_read_user_prot_reg (struct mtd_info *, loff_t, size_t, size_t *, u_char *); static int cfi_intelext_write_user_prot_reg (struct mtd_info *, loff_t, size_t, size_t *, u_char *); static int cfi_intelext_lock_user_prot_reg (struct mtd_info *, loff_t, size_t); -static int cfi_intelext_get_fact_prot_info (struct mtd_info *, - struct otp_info *, size_t); -static int cfi_intelext_get_user_prot_info (struct mtd_info *, - struct otp_info *, size_t); +static int cfi_intelext_get_fact_prot_info(struct mtd_info *, size_t, + size_t *, struct otp_info *); +static int cfi_intelext_get_user_prot_info(struct mtd_info *, size_t, + size_t *, struct otp_info *); #endif static int cfi_intelext_suspend (struct mtd_info *); static void cfi_intelext_resume (struct mtd_info *); @@ -2394,24 +2394,19 @@ static int cfi_intelext_lock_user_prot_reg(struct mtd_info *mtd, NULL, do_otp_lock, 1); } -static int cfi_intelext_get_fact_prot_info(struct mtd_info *mtd, - struct otp_info *buf, size_t len) -{ - size_t retlen; - int ret; +static int cfi_intelext_get_fact_prot_info(struct mtd_info *mtd, size_t len, + size_t *retlen, struct otp_info *buf) - ret = cfi_intelext_otp_walk(mtd, 0, len, &retlen, (u_char *)buf, NULL, 0); - return ret ? : retlen; +{ + return cfi_intelext_otp_walk(mtd, 0, len, retlen, (u_char *)buf, + NULL, 0); } -static int cfi_intelext_get_user_prot_info(struct mtd_info *mtd, - struct otp_info *buf, size_t len) +static int cfi_intelext_get_user_prot_info(struct mtd_info *mtd, size_t len, + size_t *retlen, struct otp_info *buf) { - size_t retlen; - int ret; - - ret = cfi_intelext_otp_walk(mtd, 0, len, &retlen, (u_char *)buf, NULL, 1); - return ret ? : retlen; + return cfi_intelext_otp_walk(mtd, 0, len, retlen, (u_char *)buf, + NULL, 1); } #endif diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c index 8b278d2b15bb..a6fdbe83bad6 100644 --- a/drivers/mtd/devices/mtd_dataflash.c +++ b/drivers/mtd/devices/mtd_dataflash.c @@ -439,8 +439,8 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len, #ifdef CONFIG_MTD_DATAFLASH_OTP -static int dataflash_get_otp_info(struct mtd_info *mtd, - struct otp_info *info, size_t len) +static int dataflash_get_otp_info(struct mtd_info *mtd, size_t len, + size_t *retlen, struct otp_info *info) { /* Report both blocks as identical: bytes 0..64, locked. * Unless the user block changed from all-ones, we can't @@ -449,7 +449,8 @@ static int dataflash_get_otp_info(struct mtd_info *mtd, info->start = 0; info->length = 64; info->locked = 1; - return sizeof(*info); + *retlen = sizeof(*info); + return 0; } static ssize_t otp_read(struct spi_device *spi, unsigned base, diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 2147e733533b..250798cf76aa 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -889,25 +889,26 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg) case OTPGETREGIONINFO: { struct otp_info *buf = kmalloc(4096, GFP_KERNEL); + size_t retlen; if (!buf) return -ENOMEM; switch (mfi->mode) { case MTD_FILE_MODE_OTP_FACTORY: - ret = mtd_get_fact_prot_info(mtd, buf, 4096); + ret = mtd_get_fact_prot_info(mtd, 4096, &retlen, buf); break; case MTD_FILE_MODE_OTP_USER: - ret = mtd_get_user_prot_info(mtd, buf, 4096); + ret = mtd_get_user_prot_info(mtd, 4096, &retlen, buf); break; default: ret = -EINVAL; break; } - if (ret >= 0) { + if (!ret) { if (cmd == OTPGETREGIONCOUNT) { - int nbr = ret / sizeof(struct otp_info); + int nbr = retlen / sizeof(struct otp_info); ret = copy_to_user(argp, &nbr, sizeof(int)); } else - ret = copy_to_user(argp, buf, ret); + ret = copy_to_user(argp, buf, retlen); if (ret) ret = -EFAULT; } diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 34c0b16aed5c..0a7d77e65335 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -883,14 +883,14 @@ EXPORT_SYMBOL_GPL(mtd_read_oob); * devices. The user data is one time programmable but the factory data is read * only. */ -int mtd_get_fact_prot_info(struct mtd_info *mtd, struct otp_info *buf, - size_t len) +int mtd_get_fact_prot_info(struct mtd_info *mtd, size_t len, size_t *retlen, + struct otp_info *buf) { if (!mtd->_get_fact_prot_info) return -EOPNOTSUPP; if (!len) return 0; - return mtd->_get_fact_prot_info(mtd, buf, len); + return mtd->_get_fact_prot_info(mtd, len, retlen, buf); } EXPORT_SYMBOL_GPL(mtd_get_fact_prot_info); @@ -906,14 +906,14 @@ int mtd_read_fact_prot_reg(struct mtd_info *mtd, loff_t from, size_t len, } EXPORT_SYMBOL_GPL(mtd_read_fact_prot_reg); -int mtd_get_user_prot_info(struct mtd_info *mtd, struct otp_info *buf, - size_t len) +int mtd_get_user_prot_info(struct mtd_info *mtd, size_t len, size_t *retlen, + struct otp_info *buf) { if (!mtd->_get_user_prot_info) return -EOPNOTSUPP; if (!len) return 0; - return mtd->_get_user_prot_info(mtd, buf, len); + return mtd->_get_user_prot_info(mtd, len, retlen, buf); } EXPORT_SYMBOL_GPL(mtd_get_user_prot_info); diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index 3c7d6d7623c1..1ca9aec141ff 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -150,11 +150,12 @@ static int part_read_user_prot_reg(struct mtd_info *mtd, loff_t from, retlen, buf); } -static int part_get_user_prot_info(struct mtd_info *mtd, - struct otp_info *buf, size_t len) +static int part_get_user_prot_info(struct mtd_info *mtd, size_t len, + size_t *retlen, struct otp_info *buf) { struct mtd_part *part = PART(mtd); - return part->master->_get_user_prot_info(part->master, buf, len); + return part->master->_get_user_prot_info(part->master, len, retlen, + buf); } static int part_read_fact_prot_reg(struct mtd_info *mtd, loff_t from, @@ -165,11 +166,12 @@ static int part_read_fact_prot_reg(struct mtd_info *mtd, loff_t from, retlen, buf); } -static int part_get_fact_prot_info(struct mtd_info *mtd, struct otp_info *buf, - size_t len) +static int part_get_fact_prot_info(struct mtd_info *mtd, size_t len, + size_t *retlen, struct otp_info *buf) { struct mtd_part *part = PART(mtd); - return part->master->_get_fact_prot_info(part->master, buf, len); + return part->master->_get_fact_prot_info(part->master, len, retlen, + buf); } static int part_write(struct mtd_info *mtd, loff_t to, size_t len, diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c index e886d7af868f..635ee0027691 100644 --- a/drivers/mtd/onenand/onenand_base.c +++ b/drivers/mtd/onenand/onenand_base.c @@ -3237,20 +3237,17 @@ static int onenand_otp_walk(struct mtd_info *mtd, loff_t from, size_t len, /** * onenand_get_fact_prot_info - [MTD Interface] Read factory OTP info * @param mtd MTD device structure - * @param buf the databuffer to put/get data * @param len number of bytes to read + * @param retlen pointer to variable to store the number of read bytes + * @param buf the databuffer to put/get data * * Read factory OTP info. */ -static int onenand_get_fact_prot_info(struct mtd_info *mtd, - struct otp_info *buf, size_t len) +static int onenand_get_fact_prot_info(struct mtd_info *mtd, size_t len, + size_t *retlen, struct otp_info *buf) { - size_t retlen; - int ret; - - ret = onenand_otp_walk(mtd, 0, len, &retlen, (u_char *) buf, NULL, MTD_OTP_FACTORY); - - return ret ? : retlen; + return onenand_otp_walk(mtd, 0, len, retlen, (u_char *) buf, NULL, + MTD_OTP_FACTORY); } /** @@ -3272,20 +3269,17 @@ static int onenand_read_fact_prot_reg(struct mtd_info *mtd, loff_t from, /** * onenand_get_user_prot_info - [MTD Interface] Read user OTP info * @param mtd MTD device structure - * @param buf the databuffer to put/get data + * @param retlen pointer to variable to store the number of read bytes * @param len number of bytes to read + * @param buf the databuffer to put/get data * * Read user OTP info. */ -static int onenand_get_user_prot_info(struct mtd_info *mtd, - struct otp_info *buf, size_t len) +static int onenand_get_user_prot_info(struct mtd_info *mtd, size_t len, + size_t *retlen, struct otp_info *buf) { - size_t retlen; - int ret; - - ret = onenand_otp_walk(mtd, 0, len, &retlen, (u_char *) buf, NULL, MTD_OTP_USER); - - return ret ? : retlen; + return onenand_otp_walk(mtd, 0, len, retlen, (u_char *) buf, NULL, + MTD_OTP_USER); } /** diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 8cc0e2fb6894..a1b0b4c8fd79 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -204,12 +204,12 @@ struct mtd_info { struct mtd_oob_ops *ops); int (*_write_oob) (struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops); - int (*_get_fact_prot_info) (struct mtd_info *mtd, struct otp_info *buf, - size_t len); + int (*_get_fact_prot_info) (struct mtd_info *mtd, size_t len, + size_t *retlen, struct otp_info *buf); int (*_read_fact_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf); - int (*_get_user_prot_info) (struct mtd_info *mtd, struct otp_info *buf, - size_t len); + int (*_get_user_prot_info) (struct mtd_info *mtd, size_t len, + size_t *retlen, struct otp_info *buf); int (*_read_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf); int (*_write_user_prot_reg) (struct mtd_info *mtd, loff_t to, @@ -278,12 +278,12 @@ static inline int mtd_write_oob(struct mtd_info *mtd, loff_t to, return mtd->_write_oob(mtd, to, ops); } -int mtd_get_fact_prot_info(struct mtd_info *mtd, struct otp_info *buf, - size_t len); +int mtd_get_fact_prot_info(struct mtd_info *mtd, size_t len, size_t *retlen, + struct otp_info *buf); int mtd_read_fact_prot_reg(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf); -int mtd_get_user_prot_info(struct mtd_info *mtd, struct otp_info *buf, - size_t len); +int mtd_get_user_prot_info(struct mtd_info *mtd, size_t len, size_t *retlen, + struct otp_info *buf); int mtd_read_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf); int mtd_write_user_prot_reg(struct mtd_info *mtd, loff_t to, size_t len, -- cgit v1.2.3-59-g8ed1b From 6d9434ebb76157071164b32ad03fbed165c74382 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Mon, 24 Feb 2014 19:24:48 -0300 Subject: of_mtd: Add helpers to get ECC strength and ECC step size This commit adds simple helpers to obtain the devicetree properties that specify the ECC strength and ECC step size to use on a given NAND controller. Acked-by: Boris BREZILLON Signed-off-by: Ezequiel Garcia Signed-off-by: Brian Norris --- drivers/of/of_mtd.c | 34 ++++++++++++++++++++++++++++++++++ include/linux/of_mtd.h | 12 ++++++++++++ 2 files changed, 46 insertions(+) (limited to 'include/linux') diff --git a/drivers/of/of_mtd.c b/drivers/of/of_mtd.c index a27ec94877e4..b7361ed70537 100644 --- a/drivers/of/of_mtd.c +++ b/drivers/of/of_mtd.c @@ -49,6 +49,40 @@ int of_get_nand_ecc_mode(struct device_node *np) } EXPORT_SYMBOL_GPL(of_get_nand_ecc_mode); +/** + * of_get_nand_ecc_step_size - Get ECC step size associated to + * the required ECC strength (see below). + * @np: Pointer to the given device_node + * + * return the ECC step size, or errno in error case. + */ +int of_get_nand_ecc_step_size(struct device_node *np) +{ + int ret; + u32 val; + + ret = of_property_read_u32(np, "nand-ecc-step-size", &val); + return ret ? ret : val; +} +EXPORT_SYMBOL_GPL(of_get_nand_ecc_step_size); + +/** + * of_get_nand_ecc_strength - Get required ECC strength over the + * correspnding step size as defined by 'nand-ecc-size' + * @np: Pointer to the given device_node + * + * return the ECC strength, or errno in error case. + */ +int of_get_nand_ecc_strength(struct device_node *np) +{ + int ret; + u32 val; + + ret = of_property_read_u32(np, "nand-ecc-strength", &val); + return ret ? ret : val; +} +EXPORT_SYMBOL_GPL(of_get_nand_ecc_strength); + /** * of_get_nand_bus_width - Get nand bus witdh for given device_node * @np: Pointer to the given device_node diff --git a/include/linux/of_mtd.h b/include/linux/of_mtd.h index cb32d9c1e8dc..e266caa36402 100644 --- a/include/linux/of_mtd.h +++ b/include/linux/of_mtd.h @@ -13,6 +13,8 @@ #include int of_get_nand_ecc_mode(struct device_node *np); +int of_get_nand_ecc_step_size(struct device_node *np); +int of_get_nand_ecc_strength(struct device_node *np); int of_get_nand_bus_width(struct device_node *np); bool of_get_nand_on_flash_bbt(struct device_node *np); @@ -23,6 +25,16 @@ static inline int of_get_nand_ecc_mode(struct device_node *np) return -ENOSYS; } +static inline int of_get_nand_ecc_step_size(struct device_node *np) +{ + return -ENOSYS; +} + +static inline int of_get_nand_ecc_strength(struct device_node *np) +{ + return -ENOSYS; +} + static inline int of_get_nand_bus_width(struct device_node *np) { return -ENOSYS; -- cgit v1.2.3-59-g8ed1b From e004debdadf1a661dcd24e2a654e56b0a113e29e Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Fri, 3 Jan 2014 11:01:40 +0800 Subject: mtd: nand: add "page" argument for read_subpage hook Add the "page" argument for the read_subpage hook. With this argument, the implementation of this hook could prints out more accurate information for debugging. Signed-off-by: Huang Shijie Signed-off-by: Brian Norris --- drivers/mtd/nand/nand_base.c | 7 +++++-- include/linux/mtd/nand.h | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 71c5c7a77053..5826da39216e 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -1162,9 +1162,11 @@ static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip, * @data_offs: offset of requested data within the page * @readlen: data length * @bufpoi: buffer to store read data + * @page: page number to read */ static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, - uint32_t data_offs, uint32_t readlen, uint8_t *bufpoi) + uint32_t data_offs, uint32_t readlen, uint8_t *bufpoi, + int page) { int start_step, end_step, num_steps; uint32_t *eccpos = chip->ecc.layout->eccpos; @@ -1540,7 +1542,8 @@ read_retry: else if (!aligned && NAND_HAS_SUBPAGE_READ(chip) && !oob) ret = chip->ecc.read_subpage(mtd, chip, - col, bytes, bufpoi); + col, bytes, bufpoi, + page); else ret = chip->ecc.read_page(mtd, chip, bufpoi, oob_required, page); diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index aa005e87d161..0747fef2bfc6 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -488,7 +488,7 @@ struct nand_ecc_ctrl { int (*read_page)(struct mtd_info *mtd, struct nand_chip *chip, uint8_t *buf, int oob_required, int page); int (*read_subpage)(struct mtd_info *mtd, struct nand_chip *chip, - uint32_t offs, uint32_t len, uint8_t *buf); + uint32_t offs, uint32_t len, uint8_t *buf, int page); int (*write_subpage)(struct mtd_info *mtd, struct nand_chip *chip, uint32_t offset, uint32_t data_len, const uint8_t *data_buf, int oob_required); -- cgit v1.2.3-59-g8ed1b From 21bdd17b21b45ea48e06e23918d681afbe0622e9 Mon Sep 17 00:00:00 2001 From: Tom Gundersen Date: Mon, 3 Feb 2014 11:14:13 +1030 Subject: module: allow multiple calls to MODULE_DEVICE_TABLE() per module Commit 78551277e4df5: "Input: i8042 - add PNP modaliases" had a bug, where the second call to MODULE_DEVICE_TABLE() overrode the first resulting in not all the modaliases being exposed. This fixes the problem by including the name of the device_id table in the __mod_*_device_table alias, allowing us to export several device_id tables per module. Suggested-by: Kay Sievers Acked-by: Greg Kroah-Hartman Cc: Dmitry Torokhov Signed-off-by: Tom Gundersen Signed-off-by: Rusty Russell --- include/linux/module.h | 2 +- scripts/mod/file2alias.c | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index eaf60ff9ba94..ad18f6006f86 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -142,7 +142,7 @@ extern const struct gtype##_id __mod_##gtype##_table \ #define MODULE_DESCRIPTION(_description) MODULE_INFO(description, _description) #define MODULE_DEVICE_TABLE(type, name) \ - MODULE_GENERIC_TABLE(type##_device, name) + MODULE_GENERIC_TABLE(type##__##name##_device, name) /* Version of form [:][-]. * Or for CVS/RCS ID version, everything but the number is stripped. diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 25e5cb0aaef6..ce164044f0cc 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -42,7 +42,7 @@ typedef unsigned char __u8; /* This array collects all instances that use the generic do_table */ struct devtable { - const char *device_id; /* name of table, __mod__device_table. */ + const char *device_id; /* name of table, __mod___*_device_table. */ unsigned long id_size; void *function; }; @@ -146,7 +146,8 @@ static void device_id_check(const char *modname, const char *device_id, if (size % id_size || size < id_size) { fatal("%s: sizeof(struct %s_device_id)=%lu is not a modulo " - "of the size of section __mod_%s_device_table=%lu.\n" + "of the size of " + "section __mod_%s___device_table=%lu.\n" "Fix definition of struct %s_device_id " "in mod_devicetable.h\n", modname, device_id, id_size, device_id, size, device_id); @@ -1206,7 +1207,7 @@ void handle_moddevtable(struct module *mod, struct elf_info *info, { void *symval; char *zeros = NULL; - const char *name; + const char *name, *identifier; unsigned int namelen; /* We're looking for a section relative symbol */ @@ -1217,7 +1218,7 @@ void handle_moddevtable(struct module *mod, struct elf_info *info, if (ELF_ST_TYPE(sym->st_info) != STT_OBJECT) return; - /* All our symbols are of form __mod_XXX_device_table. */ + /* All our symbols are of form __mod____device_table. */ name = strstr(symname, "__mod_"); if (!name) return; @@ -1227,7 +1228,10 @@ void handle_moddevtable(struct module *mod, struct elf_info *info, return; if (strcmp(name + namelen - strlen("_device_table"), "_device_table")) return; - namelen -= strlen("_device_table"); + identifier = strstr(name, "__"); + if (!identifier) + return; + namelen = identifier - name; /* Handle all-NULL symbols allocated into .bss */ if (info->sechdrs[get_secindex(info, sym)].sh_type & SHT_NOBITS) { -- cgit v1.2.3-59-g8ed1b From cff26a51da5d206d3baf871e75778da44710219d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 3 Feb 2014 11:15:13 +1030 Subject: module: remove MODULE_GENERIC_TABLE MODULE_DEVICE_TABLE() calles MODULE_GENERIC_TABLE(); make it do the work directly. This also removes a wart introduced in the last patch, where the alias is defined to be an unknown struct type "struct type##__##name##_device_id" instead of "struct type##_device_id" (it's an extern so GCC doesn't care, but it's wrong). The other user of MODULE_GENERIC_TABLE (ISAPNP_CARD_TABLE) is unused, so delete it. Signed-off-by: Rusty Russell --- include/linux/isapnp.h | 4 ---- include/linux/module.h | 19 ++++++++----------- 2 files changed, 8 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/isapnp.h b/include/linux/isapnp.h index e2d28b026a8c..3c77bf9b1efd 100644 --- a/include/linux/isapnp.h +++ b/include/linux/isapnp.h @@ -56,10 +56,6 @@ #define ISAPNP_DEVICE_ID(_va, _vb, _vc, _function) \ { .vendor = ISAPNP_VENDOR(_va, _vb, _vc), .function = ISAPNP_FUNCTION(_function) } -/* export used IDs outside module */ -#define ISAPNP_CARD_TABLE(name) \ - MODULE_GENERIC_TABLE(isapnp_card, name) - struct isapnp_card_id { unsigned long driver_data; /* data private to the driver */ unsigned short card_vendor, card_device; diff --git a/include/linux/module.h b/include/linux/module.h index ad18f6006f86..5686b37e11d6 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -82,15 +82,6 @@ void sort_extable(struct exception_table_entry *start, void sort_main_extable(void); void trim_init_extable(struct module *m); -#ifdef MODULE -#define MODULE_GENERIC_TABLE(gtype, name) \ -extern const struct gtype##_id __mod_##gtype##_table \ - __attribute__ ((unused, alias(__stringify(name)))) - -#else /* !MODULE */ -#define MODULE_GENERIC_TABLE(gtype, name) -#endif - /* Generic info of form tag = "info" */ #define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info) @@ -141,8 +132,14 @@ extern const struct gtype##_id __mod_##gtype##_table \ /* What your module does. */ #define MODULE_DESCRIPTION(_description) MODULE_INFO(description, _description) -#define MODULE_DEVICE_TABLE(type, name) \ - MODULE_GENERIC_TABLE(type##__##name##_device, name) +#ifdef MODULE +/* Creates an alias so file2alias.c can find device table. */ +#define MODULE_DEVICE_TABLE(type, name) \ + extern const struct type##_device_id __mod_##type##__##name##_device_table \ + __attribute__ ((unused, alias(__stringify(name)))) +#else /* !MODULE */ +#define MODULE_DEVICE_TABLE(type, name) +#endif /* Version of form [:][-]. * Or for CVS/RCS ID version, everything but the number is stripped. -- cgit v1.2.3-59-g8ed1b From 66cc69e34e86a231fbe68d8918c6119e3b7549a3 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 13 Mar 2014 12:11:30 +1030 Subject: Fix: module signature vs tracepoints: add new TAINT_UNSIGNED_MODULE Users have reported being unable to trace non-signed modules loaded within a kernel supporting module signature. This is caused by tracepoint.c:tracepoint_module_coming() refusing to take into account tracepoints sitting within force-loaded modules (TAINT_FORCED_MODULE). The reason for this check, in the first place, is that a force-loaded module may have a struct module incompatible with the layout expected by the kernel, and can thus cause a kernel crash upon forced load of that module on a kernel with CONFIG_TRACEPOINTS=y. Tracepoints, however, specifically accept TAINT_OOT_MODULE and TAINT_CRAP, since those modules do not lead to the "very likely system crash" issue cited above for force-loaded modules. With kernels having CONFIG_MODULE_SIG=y (signed modules), a non-signed module is tainted re-using the TAINT_FORCED_MODULE taint flag. Unfortunately, this means that Tracepoints treat that module as a force-loaded module, and thus silently refuse to consider any tracepoint within this module. Since an unsigned module does not fit within the "very likely system crash" category of tainting, add a new TAINT_UNSIGNED_MODULE taint flag to specifically address this taint behavior, and accept those modules within Tracepoints. We use the letter 'X' as a taint flag character for a module being loaded that doesn't know how to sign its name (proposed by Steven Rostedt). Also add the missing 'O' entry to trace event show_module_flags() list for the sake of completeness. Signed-off-by: Mathieu Desnoyers Acked-by: Steven Rostedt NAKed-by: Ingo Molnar CC: Thomas Gleixner CC: David Howells CC: Greg Kroah-Hartman Signed-off-by: Rusty Russell --- Documentation/ABI/testing/sysfs-module | 1 + Documentation/module-signing.txt | 3 ++- Documentation/oops-tracing.txt | 3 +++ Documentation/sysctl/kernel.txt | 2 ++ include/linux/kernel.h | 1 + include/trace/events/module.h | 4 +++- kernel/module.c | 4 +++- kernel/panic.c | 2 ++ kernel/tracepoint.c | 5 +++-- 9 files changed, 20 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-module b/Documentation/ABI/testing/sysfs-module index 47064c2b1f79..b9a29cdbaccb 100644 --- a/Documentation/ABI/testing/sysfs-module +++ b/Documentation/ABI/testing/sysfs-module @@ -49,3 +49,4 @@ Description: Module taint flags: O - out-of-tree module F - force-loaded module C - staging driver module + X - unsigned module diff --git a/Documentation/module-signing.txt b/Documentation/module-signing.txt index 2b40e04d3c49..b6af42e4d790 100644 --- a/Documentation/module-signing.txt +++ b/Documentation/module-signing.txt @@ -53,7 +53,8 @@ This has a number of options available: If this is off (ie. "permissive"), then modules for which the key is not available and modules that are unsigned are permitted, but the kernel will - be marked as being tainted. + be marked as being tainted, and the concerned modules will be marked as + tainted, shown with the character 'X'. If this is on (ie. "restrictive"), only modules that have a valid signature that can be verified by a public key in the kernel's possession diff --git a/Documentation/oops-tracing.txt b/Documentation/oops-tracing.txt index 13032c0140d4..879abe289523 100644 --- a/Documentation/oops-tracing.txt +++ b/Documentation/oops-tracing.txt @@ -265,6 +265,9 @@ characters, each representing a particular tainted value. 13: 'O' if an externally-built ("out-of-tree") module has been loaded. + 14: 'X' if an unsigned module has been loaded in a kernel supporting + module signature. + The primary reason for the 'Tainted: ' string is to tell kernel debuggers if this is a clean kernel or if anything unusual has occurred. Tainting is permanent: even if an offending module is diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index e55124e7c40c..8ebe1c047004 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -792,6 +792,8 @@ can be ORed together: 1024 - A module from drivers/staging was loaded. 2048 - The system is working around a severe firmware bug. 4096 - An out-of-tree module has been loaded. +8192 - An unsigned module has been loaded in a kernel supporting module + signature. ============================================================== diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 196d1ea86df0..471090093c67 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -469,6 +469,7 @@ extern enum system_states { #define TAINT_CRAP 10 #define TAINT_FIRMWARE_WORKAROUND 11 #define TAINT_OOT_MODULE 12 +#define TAINT_UNSIGNED_MODULE 13 extern const char hex_asc[]; #define hex_asc_lo(x) hex_asc[((x) & 0x0f)] diff --git a/include/trace/events/module.h b/include/trace/events/module.h index 161932737416..11fd51b413de 100644 --- a/include/trace/events/module.h +++ b/include/trace/events/module.h @@ -22,8 +22,10 @@ struct module; #define show_module_flags(flags) __print_flags(flags, "", \ { (1UL << TAINT_PROPRIETARY_MODULE), "P" }, \ + { (1UL << TAINT_OOT_MODULE), "O" }, \ { (1UL << TAINT_FORCED_MODULE), "F" }, \ - { (1UL << TAINT_CRAP), "C" }) + { (1UL << TAINT_CRAP), "C" }, \ + { (1UL << TAINT_UNSIGNED_MODULE), "X" }) TRACE_EVENT(module_load, diff --git a/kernel/module.c b/kernel/module.c index efa1e6031950..c1acb0c5b637 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1013,6 +1013,8 @@ static size_t module_flags_taint(struct module *mod, char *buf) buf[l++] = 'F'; if (mod->taints & (1 << TAINT_CRAP)) buf[l++] = 'C'; + if (mod->taints & (1 << TAINT_UNSIGNED_MODULE)) + buf[l++] = 'X'; /* * TAINT_FORCED_RMMOD: could be added. * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't @@ -3214,7 +3216,7 @@ static int load_module(struct load_info *info, const char __user *uargs, pr_notice_once("%s: module verification failed: signature " "and/or required key missing - tainting " "kernel\n", mod->name); - add_taint_module(mod, TAINT_FORCED_MODULE, LOCKDEP_STILL_OK); + add_taint_module(mod, TAINT_UNSIGNED_MODULE, LOCKDEP_STILL_OK); } #endif diff --git a/kernel/panic.c b/kernel/panic.c index 6d6300375090..0e25fe10871e 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -210,6 +210,7 @@ static const struct tnt tnts[] = { { TAINT_CRAP, 'C', ' ' }, { TAINT_FIRMWARE_WORKAROUND, 'I', ' ' }, { TAINT_OOT_MODULE, 'O', ' ' }, + { TAINT_UNSIGNED_MODULE, 'X', ' ' }, }; /** @@ -228,6 +229,7 @@ static const struct tnt tnts[] = { * 'C' - modules from drivers/staging are loaded. * 'I' - Working around severe firmware bug. * 'O' - Out-of-tree module has been loaded. + * 'X' - Unsigned module has been loaded. * * The string is overwritten by the next call to print_tainted(). */ diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 031cc5655a51..3cdbed1fbdc7 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -633,7 +633,8 @@ EXPORT_SYMBOL_GPL(tracepoint_iter_reset); #ifdef CONFIG_MODULES bool trace_module_has_bad_taint(struct module *mod) { - return mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP)); + return mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP) | + (1 << TAINT_UNSIGNED_MODULE)); } static int tracepoint_module_coming(struct module *mod) @@ -644,7 +645,7 @@ static int tracepoint_module_coming(struct module *mod) /* * We skip modules that taint the kernel, especially those with different * module headers (for forced load), to make sure we don't cause a crash. - * Staging and out-of-tree GPL modules are fine. + * Staging, out-of-tree, and unsigned GPL modules are fine. */ if (trace_module_has_bad_taint(mod)) return 0; -- cgit v1.2.3-59-g8ed1b From 2d123f463669cb7b84b56aa00e073ce07fe7aff2 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 22 Jan 2014 18:26:16 +0100 Subject: drm/docs: Include hdmi infoframe helper reference Thierry created such nice kerneldocs, it's a shame we've left them lingering! For the fun of it also add a bit of kerneldoc to the header so that we can also include that. Just in case someone adds kerneldoc in there. Cc: Thierry Reding Reviewed-by: Alex Deucher Signed-off-by: Daniel Vetter --- Documentation/DocBook/drm.tmpl | 11 +++++++++++ include/linux/hdmi.h | 12 ++++++++++++ 2 files changed, 23 insertions(+) (limited to 'include/linux') diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index 0a9407ae4025..0e0390ece122 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -2219,6 +2219,17 @@ void intel_crt_init(struct drm_device *dev) !Iinclude/drm/drm_flip_work.h !Edrivers/gpu/drm/drm_flip_work.c + + HDMI Infoframes Helper Reference + + Strictly speaking this is not a DRM helper library but generally useable + by any driver interfacing with HDMI outputs like v4l or alsa drivers. + But it nicely fits into the overall topic of mode setting helper + libraries and hence is also included here. + +!Iinclude/linux/hdmi.h +!Edrivers/video/hdmi.c + diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h index 9231be9e90a2..11c0182a153b 100644 --- a/include/linux/hdmi.h +++ b/include/linux/hdmi.h @@ -262,6 +262,18 @@ union hdmi_vendor_any_infoframe { struct hdmi_vendor_infoframe hdmi; }; +/** + * union hdmi_infoframe - overall union of all abstract infoframe representations + * @any: generic infoframe + * @avi: avi infoframe + * @spd: spd infoframe + * @vendor: union of all vendor infoframes + * @audio: audio infoframe + * + * This is used by the generic pack function. This works since all infoframes + * have the same header which also indicates which type of infoframe should be + * packed. + */ union hdmi_infoframe { struct hdmi_any_infoframe any; struct hdmi_avi_infoframe avi; -- cgit v1.2.3-59-g8ed1b From 0283f9a529c81e64bafea80d6d3e056d1c3f656d Mon Sep 17 00:00:00 2001 From: Mark Charlebois Date: Mon, 17 Mar 2014 13:18:26 +1030 Subject: module: LLVMLinux: Remove unused function warning from __param_check macro This code makes a compile time type check that is optimized away. Clang complains that it generates an unused function: linux/kernel/panic.c:471:1: warning: unused function '__check_panic' [-Wunused-function] core_param(panic, panic_timeout, int, 0644); ^ linux/moduleparam.h:283:2: note: expanded from macro 'core_param' param_check_##type(name, &(var)); \ ^ :87:1: note: expanded from here param_check_int ^ linux/moduleparam.h:369:34: note: expanded from macro 'param_check_int' #define param_check_int(name, p) __param_check(name, p, int) ^ linux/moduleparam.h:349:22: note: expanded from macro '__param_check' static inline type *__check_##name(void) { return(p); } ^ :88:1: note: expanded from here __check_panic GCC won't complain for a static inline function but would if it was just a static function. Adding the unused attribute to the function declaration removes the warning. Per request from Rusty Russell it is marked as __always_unused as the code is meant to be optimized away. This code works for both GCC and clang. Signed-off-by: Mark Charlebois Signed-off-by: Behan Webster Signed-off-by: Rusty Russell --- include/linux/moduleparam.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index c3eb102a9cc8..175f6995d1af 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -346,7 +346,7 @@ static inline void destroy_params(const struct kernel_param *params, /* The macros to do compile-time type checking stolen from Jakub Jelinek, who IIRC came up with this idea for the 2.4 module init code. */ #define __param_check(name, p, type) \ - static inline type *__check_##name(void) { return(p); } + static inline type __always_unused *__check_##name(void) { return(p); } extern struct kernel_param_ops param_ops_byte; extern int param_set_byte(const char *val, const struct kernel_param *kp); -- cgit v1.2.3-59-g8ed1b From 740a221ef0e579dc7c675cf6b90f5313509788f7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 10 Mar 2014 15:02:41 +0200 Subject: mmc: slot-gpio: Add GPIO descriptor based CD GPIO API Add functions to request a CD GPIO using the GPIO descriptor API. Note that the new request function is paired with mmc_gpiod_free_cd() not mmc_gpio_free_cd(). Note also that it must be called prior to mmc_add_host() otherwise the caller must also call mmc_gpiod_request_cd_irq(). Signed-off-by: Adrian Hunter Reviewed-by: Linus Walleij Signed-off-by: Chris Ball --- drivers/mmc/core/core.c | 4 +++ drivers/mmc/core/slot-gpio.c | 81 ++++++++++++++++++++++++++++++++++++++++++- include/linux/mmc/slot-gpio.h | 6 ++++ 3 files changed, 90 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index dc7a5fb81a5c..acbc3f2aaaf9 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "core.h" #include "bus.h" @@ -2471,6 +2472,7 @@ void mmc_start_host(struct mmc_host *host) mmc_power_off(host); else mmc_power_up(host, host->ocr_avail); + mmc_gpiod_request_cd_irq(host); _mmc_detect_change(host, 0, false); } @@ -2482,6 +2484,8 @@ void mmc_stop_host(struct mmc_host *host) host->removed = 1; spin_unlock_irqrestore(&host->lock, flags); #endif + if (host->slot.cd_irq >= 0) + disable_irq(host->slot.cd_irq); host->rescan_disable = 1; cancel_delayed_work_sync(&host->detect); diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c index 47fa07e3604d..f7650b899e3d 100644 --- a/drivers/mmc/core/slot-gpio.c +++ b/drivers/mmc/core/slot-gpio.c @@ -139,7 +139,7 @@ int mmc_gpio_request_ro(struct mmc_host *host, unsigned int gpio) } EXPORT_SYMBOL(mmc_gpio_request_ro); -static void mmc_gpiod_request_cd_irq(struct mmc_host *host) +void mmc_gpiod_request_cd_irq(struct mmc_host *host) { struct mmc_gpio *ctx = host->slot.handler_priv; int ret, irq; @@ -171,6 +171,7 @@ static void mmc_gpiod_request_cd_irq(struct mmc_host *host) if (irq < 0) host->caps |= MMC_CAP_NEEDS_POLL; } +EXPORT_SYMBOL(mmc_gpiod_request_cd_irq); /** * mmc_gpio_request_cd - request a gpio for card-detection @@ -276,3 +277,81 @@ void mmc_gpio_free_cd(struct mmc_host *host) devm_gpio_free(&host->class_dev, gpio); } EXPORT_SYMBOL(mmc_gpio_free_cd); + +/** + * mmc_gpiod_request_cd - request a gpio descriptor for card-detection + * @host: mmc host + * @con_id: function within the GPIO consumer + * @idx: index of the GPIO to obtain in the consumer + * @override_active_level: ignore %GPIO_ACTIVE_LOW flag + * @debounce: debounce time in microseconds + * + * Use this function in place of mmc_gpio_request_cd() to use the GPIO + * descriptor API. Note that it is paired with mmc_gpiod_free_cd() not + * mmc_gpio_free_cd(). Note also that it must be called prior to mmc_add_host() + * otherwise the caller must also call mmc_gpiod_request_cd_irq(). + * + * Returns zero on success, else an error. + */ +int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id, + unsigned int idx, bool override_active_level, + unsigned int debounce) +{ + struct mmc_gpio *ctx; + struct gpio_desc *desc; + int ret; + + ret = mmc_gpio_alloc(host); + if (ret < 0) + return ret; + + ctx = host->slot.handler_priv; + + if (!con_id) + con_id = ctx->cd_label; + + desc = devm_gpiod_get_index(host->parent, con_id, idx); + if (IS_ERR(desc)) + return PTR_ERR(desc); + + ret = gpiod_direction_input(desc); + if (ret < 0) + return ret; + + if (debounce) { + ret = gpiod_set_debounce(desc, debounce); + if (ret < 0) + return ret; + } + + ctx->override_cd_active_level = override_active_level; + ctx->cd_gpio = desc; + + return 0; +} +EXPORT_SYMBOL(mmc_gpiod_request_cd); + +/** + * mmc_gpiod_free_cd - free the card-detection gpio descriptor + * @host: mmc host + * + * It's provided only for cases that client drivers need to manually free + * up the card-detection gpio requested by mmc_gpiod_request_cd(). + */ +void mmc_gpiod_free_cd(struct mmc_host *host) +{ + struct mmc_gpio *ctx = host->slot.handler_priv; + + if (!ctx || !ctx->cd_gpio) + return; + + if (host->slot.cd_irq >= 0) { + devm_free_irq(&host->class_dev, host->slot.cd_irq, host); + host->slot.cd_irq = -EINVAL; + } + + devm_gpiod_put(&host->class_dev, ctx->cd_gpio); + + ctx->cd_gpio = NULL; +} +EXPORT_SYMBOL(mmc_gpiod_free_cd); diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h index b0c73e4cacea..d2433381e828 100644 --- a/include/linux/mmc/slot-gpio.h +++ b/include/linux/mmc/slot-gpio.h @@ -22,4 +22,10 @@ int mmc_gpio_request_cd(struct mmc_host *host, unsigned int gpio, unsigned int debounce); void mmc_gpio_free_cd(struct mmc_host *host); +int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id, + unsigned int idx, bool override_active_level, + unsigned int debounce); +void mmc_gpiod_free_cd(struct mmc_host *host); +void mmc_gpiod_request_cd_irq(struct mmc_host *host); + #endif -- cgit v1.2.3-59-g8ed1b From 96f9d8c0740264c5e2975361389ff2c21f2c5a4d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 17 Mar 2014 07:06:54 -0400 Subject: nfs: abstract out code needed to complete a sillyrename The async rename code is currently "polluted" with some parts that are really just for sillyrenames. Add a new "complete" operation vector to the nfs_renamedata to separate out the stuff that just needs to be done for a sillyrename. Signed-off-by: Jeff Layton Tested-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/unlink.c | 22 ++++++++++++++++++---- include/linux/nfs_xdr.h | 1 + 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 11d78944de79..3e6798c9ba1f 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -353,8 +353,8 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata) return; } - if (task->tk_status != 0) - nfs_cancel_async_unlink(old_dentry); + if (data->complete) + data->complete(task, data); } /** @@ -401,7 +401,8 @@ static const struct rpc_call_ops nfs_rename_ops = { */ static struct rpc_task * nfs_async_rename(struct inode *old_dir, struct inode *new_dir, - struct dentry *old_dentry, struct dentry *new_dentry) + struct dentry *old_dentry, struct dentry *new_dentry, + void (*complete)(struct rpc_task *, struct nfs_renamedata *)) { struct nfs_renamedata *data; struct rpc_message msg = { }; @@ -438,6 +439,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, data->new_dentry = dget(new_dentry); nfs_fattr_init(&data->old_fattr); nfs_fattr_init(&data->new_fattr); + data->complete = complete; /* set up nfs_renameargs */ data->args.old_dir = NFS_FH(old_dir); @@ -456,6 +458,17 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, return rpc_run_task(&task_setup_data); } +/* + * Perform tasks needed when a sillyrename is done such as cancelling the + * queued async unlink if it failed. + */ +static void +nfs_complete_sillyrename(struct rpc_task *task, struct nfs_renamedata *data) +{ + if (task->tk_status != 0) + nfs_cancel_async_unlink(data->old_dentry); +} + #define SILLYNAME_PREFIX ".nfs" #define SILLYNAME_PREFIX_LEN ((unsigned)sizeof(SILLYNAME_PREFIX) - 1) #define SILLYNAME_FILEID_LEN ((unsigned)sizeof(u64) << 1) @@ -548,7 +561,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) } /* run the rename task, undo unlink if it fails */ - task = nfs_async_rename(dir, dir, dentry, sdentry); + task = nfs_async_rename(dir, dir, dentry, sdentry, + nfs_complete_sillyrename); if (IS_ERR(task)) { error = -EBUSY; nfs_cancel_async_unlink(dentry); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index b2fb167b2e6d..0534184b65ce 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1397,6 +1397,7 @@ struct nfs_renamedata { struct inode *new_dir; struct dentry *new_dentry; struct nfs_fattr new_fattr; + void (*complete)(struct rpc_task *, struct nfs_renamedata *); }; struct nfs_access_entry; -- cgit v1.2.3-59-g8ed1b From 0e862a405185b28e775eeeae6b04bfa39724b1ad Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 17 Mar 2014 07:06:55 -0400 Subject: nfs: make nfs_async_rename non-static ...and move the prototype for nfs_sillyrename to internal.h. Signed-off-by: Jeff Layton Tested-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 7 +++++++ fs/nfs/unlink.c | 2 +- include/linux/nfs_fs.h | 1 - 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7f7c476d0c2c..2a81cdb93ec0 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -474,6 +474,13 @@ extern int nfs_migrate_page(struct address_space *, #define nfs_migrate_page NULL #endif +/* unlink.c */ +extern struct rpc_task * +nfs_async_rename(struct inode *old_dir, struct inode *new_dir, + struct dentry *old_dentry, struct dentry *new_dentry, + void (*complete)(struct rpc_task *, struct nfs_renamedata *)); +extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry); + /* direct.c */ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, struct nfs_direct_req *dreq); diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 3e6798c9ba1f..818ded7b7b74 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -399,7 +399,7 @@ static const struct rpc_call_ops nfs_rename_ops = { * * It's expected that valid references to the dentries and inodes are held */ -static struct rpc_task * +struct rpc_task * nfs_async_rename(struct inode *old_dir, struct inode *new_dir, struct dentry *old_dentry, struct dentry *new_dentry, void (*complete)(struct rpc_task *, struct nfs_renamedata *)) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f55a90bed0b4..fa6918b0f829 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -511,7 +511,6 @@ extern void nfs_complete_unlink(struct dentry *dentry, struct inode *); extern void nfs_wait_on_sillyrename(struct dentry *dentry); extern void nfs_block_sillyrename(struct dentry *dentry); extern void nfs_unblock_sillyrename(struct dentry *dentry); -extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry); /* * linux/fs/nfs/write.c -- cgit v1.2.3-59-g8ed1b From 33912be816d96e204ed7a93690552daa39c08ea9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 17 Mar 2014 07:06:57 -0400 Subject: nfs: remove synchronous rename code Now that nfs_rename uses the async infrastructure, we can remove this. Signed-off-by: Jeff Layton Tested-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs3proc.c | 36 ------------------------------------ fs/nfs/nfs4proc.c | 44 -------------------------------------------- fs/nfs/proc.c | 25 ------------------------- include/linux/nfs_xdr.h | 2 -- 4 files changed, 107 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index aa9bc973f36a..251e6253fc36 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -477,41 +477,6 @@ nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir, return 1; } -static int -nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, - struct inode *new_dir, struct qstr *new_name) -{ - struct nfs_renameargs arg = { - .old_dir = NFS_FH(old_dir), - .old_name = old_name, - .new_dir = NFS_FH(new_dir), - .new_name = new_name, - }; - struct nfs_renameres res; - struct rpc_message msg = { - .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME], - .rpc_argp = &arg, - .rpc_resp = &res, - }; - int status = -ENOMEM; - - dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); - - res.old_fattr = nfs_alloc_fattr(); - res.new_fattr = nfs_alloc_fattr(); - if (res.old_fattr == NULL || res.new_fattr == NULL) - goto out; - - status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); - nfs_post_op_update_inode(old_dir, res.old_fattr); - nfs_post_op_update_inode(new_dir, res.new_fattr); -out: - nfs_free_fattr(res.old_fattr); - nfs_free_fattr(res.new_fattr); - dprintk("NFS reply rename: %d\n", status); - return status; -} - static int nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) { @@ -967,7 +932,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .unlink_setup = nfs3_proc_unlink_setup, .unlink_rpc_prepare = nfs3_proc_unlink_rpc_prepare, .unlink_done = nfs3_proc_unlink_done, - .rename = nfs3_proc_rename, .rename_setup = nfs3_proc_rename_setup, .rename_rpc_prepare = nfs3_proc_rename_rpc_prepare, .rename_done = nfs3_proc_rename_done, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2427ef4c4d63..013b97afb371 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3544,49 +3544,6 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, return 1; } -static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, - struct inode *new_dir, struct qstr *new_name) -{ - struct nfs_server *server = NFS_SERVER(old_dir); - struct nfs_renameargs arg = { - .old_dir = NFS_FH(old_dir), - .new_dir = NFS_FH(new_dir), - .old_name = old_name, - .new_name = new_name, - }; - struct nfs_renameres res = { - .server = server, - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME], - .rpc_argp = &arg, - .rpc_resp = &res, - }; - int status = -ENOMEM; - - status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); - if (!status) { - update_changeattr(old_dir, &res.old_cinfo); - update_changeattr(new_dir, &res.new_cinfo); - } - return status; -} - -static int nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, - struct inode *new_dir, struct qstr *new_name) -{ - struct nfs4_exception exception = { }; - int err; - do { - err = _nfs4_proc_rename(old_dir, old_name, - new_dir, new_name); - trace_nfs4_rename(old_dir, old_name, new_dir, new_name, err); - err = nfs4_handle_exception(NFS_SERVER(old_dir), err, - &exception); - } while (exception.retry); - return err; -} - static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) { struct nfs_server *server = NFS_SERVER(inode); @@ -8444,7 +8401,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .unlink_setup = nfs4_proc_unlink_setup, .unlink_rpc_prepare = nfs4_proc_unlink_rpc_prepare, .unlink_done = nfs4_proc_unlink_done, - .rename = nfs4_proc_rename, .rename_setup = nfs4_proc_rename_setup, .rename_rpc_prepare = nfs4_proc_rename_rpc_prepare, .rename_done = nfs4_proc_rename_done, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index fddbba2d9eff..e55ce9e8b034 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -356,30 +356,6 @@ nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir, return 1; } -static int -nfs_proc_rename(struct inode *old_dir, struct qstr *old_name, - struct inode *new_dir, struct qstr *new_name) -{ - struct nfs_renameargs arg = { - .old_dir = NFS_FH(old_dir), - .old_name = old_name, - .new_dir = NFS_FH(new_dir), - .new_name = new_name, - }; - struct rpc_message msg = { - .rpc_proc = &nfs_procedures[NFSPROC_RENAME], - .rpc_argp = &arg, - }; - int status; - - dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); - status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); - nfs_mark_for_revalidate(old_dir); - nfs_mark_for_revalidate(new_dir); - dprintk("NFS reply rename: %d\n", status); - return status; -} - static int nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) { @@ -745,7 +721,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .unlink_setup = nfs_proc_unlink_setup, .unlink_rpc_prepare = nfs_proc_unlink_rpc_prepare, .unlink_done = nfs_proc_unlink_done, - .rename = nfs_proc_rename, .rename_setup = nfs_proc_rename_setup, .rename_rpc_prepare = nfs_proc_rename_rpc_prepare, .rename_done = nfs_proc_rename_done, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0534184b65ce..ad88a0a30a18 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1440,8 +1440,6 @@ struct nfs_rpc_ops { void (*unlink_setup) (struct rpc_message *, struct inode *dir); void (*unlink_rpc_prepare) (struct rpc_task *, struct nfs_unlinkdata *); int (*unlink_done) (struct rpc_task *, struct inode *); - int (*rename) (struct inode *, struct qstr *, - struct inode *, struct qstr *); void (*rename_setup) (struct rpc_message *msg, struct inode *dir); void (*rename_rpc_prepare)(struct rpc_task *task, struct nfs_renamedata *); int (*rename_done) (struct rpc_task *task, struct inode *old_dir, struct inode *new_dir); -- cgit v1.2.3-59-g8ed1b From 99be0245c8869cbd7b9c0ab3f093f41c60362f91 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Sun, 16 Mar 2014 02:43:27 +0100 Subject: mfd: twl4030-madc: Cleanup driver Some style fixes in twl4030-madc driver. Reported-by: Jonathan Cameron Reported-by: Lee Jones Signed-off-by: Sebastian Reichel Acked-by: Jonathan Cameron Tested-by: Marek Belisko Signed-off-by: Lee Jones --- drivers/mfd/twl4030-madc.c | 145 +++++++++++++++++++-------------------- include/linux/i2c/twl4030-madc.h | 2 +- 2 files changed, 73 insertions(+), 74 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/twl4030-madc.c b/drivers/mfd/twl4030-madc.c index 93ef91229cd3..98b9c9874d26 100644 --- a/drivers/mfd/twl4030-madc.c +++ b/drivers/mfd/twl4030-madc.c @@ -49,22 +49,22 @@ #include -/* +/** * struct twl4030_madc_data - a container for madc info - * @dev - pointer to device structure for madc - * @lock - mutex protecting this data structure - * @requests - Array of request struct corresponding to SW1, SW2 and RT - * @use_second_irq - IRQ selection (main or co-processor) - * @imr - Interrupt mask register of MADC - * @isr - Interrupt status register of MADC + * @dev: Pointer to device structure for madc + * @lock: Mutex protecting this data structure + * @requests: Array of request struct corresponding to SW1, SW2 and RT + * @use_second_irq: IRQ selection (main or co-processor) + * @imr: Interrupt mask register of MADC + * @isr: Interrupt status register of MADC */ struct twl4030_madc_data { struct device *dev; struct mutex lock; /* mutex protecting this data structure */ struct twl4030_madc_request requests[TWL4030_MADC_NUM_METHODS]; bool use_second_irq; - int imr; - int isr; + u8 imr; + u8 isr; }; static int twl4030_madc_read(struct iio_dev *iio_dev, @@ -155,17 +155,16 @@ twl4030_divider_ratios[16] = { }; -/* - * Conversion table from -3 to 55 degree Celcius - */ -static int therm_tbl[] = { -30800, 29500, 28300, 27100, -26000, 24900, 23900, 22900, 22000, 21100, 20300, 19400, 18700, 17900, -17200, 16500, 15900, 15300, 14700, 14100, 13600, 13100, 12600, 12100, -11600, 11200, 10800, 10400, 10000, 9630, 9280, 8950, 8620, 8310, -8020, 7730, 7460, 7200, 6950, 6710, 6470, 6250, 6040, 5830, -5640, 5450, 5260, 5090, 4920, 4760, 4600, 4450, 4310, 4170, -4040, 3910, 3790, 3670, 3550 +/* Conversion table from -3 to 55 degrees Celcius */ +static int twl4030_therm_tbl[] = { + 30800, 29500, 28300, 27100, + 26000, 24900, 23900, 22900, 22000, 21100, 20300, 19400, 18700, + 17900, 17200, 16500, 15900, 15300, 14700, 14100, 13600, 13100, + 12600, 12100, 11600, 11200, 10800, 10400, 10000, 9630, 9280, + 8950, 8620, 8310, 8020, 7730, 7460, 7200, 6950, 6710, + 6470, 6250, 6040, 5830, 5640, 5450, 5260, 5090, 4920, + 4760, 4600, 4450, 4310, 4170, 4040, 3910, 3790, 3670, + 3550 }; /* @@ -197,11 +196,12 @@ const struct twl4030_madc_conversion_method twl4030_conversion_methods[] = { }, }; -/* - * Function to read a particular channel value. - * @madc - pointer to struct twl4030_madc_data - * @reg - lsb of ADC Channel - * If the i2c read fails it returns an error else returns 0. +/** + * twl4030_madc_channel_raw_read() - Function to read a particular channel value + * @madc: pointer to struct twl4030_madc_data + * @reg: lsb of ADC Channel + * + * Return: 0 on success, an error code otherwise. */ static int twl4030_madc_channel_raw_read(struct twl4030_madc_data *madc, u8 reg) { @@ -227,7 +227,7 @@ static int twl4030_madc_channel_raw_read(struct twl4030_madc_data *madc, u8 reg) } /* - * Return battery temperature + * Return battery temperature in degrees Celsius * Or < 0 on failure. */ static int twl4030battery_temperature(int raw_volt) @@ -236,18 +236,18 @@ static int twl4030battery_temperature(int raw_volt) int temp, curr, volt, res, ret; volt = (raw_volt * TEMP_STEP_SIZE) / TEMP_PSR_R; - /* Getting and calculating the supply current in micro ampers */ + /* Getting and calculating the supply current in micro amperes */ ret = twl_i2c_read_u8(TWL_MODULE_MAIN_CHARGE, &val, REG_BCICTL2); if (ret < 0) return ret; + curr = ((val & TWL4030_BCI_ITHEN) + 1) * 10; /* Getting and calculating the thermistor resistance in ohms */ res = volt * 1000 / curr; /* calculating temperature */ for (temp = 58; temp >= 0; temp--) { - int actual = therm_tbl[temp]; - + int actual = twl4030_therm_tbl[temp]; if ((actual - res) >= 0) break; } @@ -269,11 +269,12 @@ static int twl4030battery_current(int raw_volt) else /* slope of 0.88 mV/mA */ return (raw_volt * CURR_STEP_SIZE) / CURR_PSR_R2; } + /* * Function to read channel values * @madc - pointer to twl4030_madc_data struct * @reg_base - Base address of the first channel - * @Channels - 16 bit bitmap. If the bit is set, channel value is read + * @Channels - 16 bit bitmap. If the bit is set, channel's value is read * @buf - The channel values are stored here. if read fails error * @raw - Return raw values without conversion * value is stored @@ -284,17 +285,17 @@ static int twl4030_madc_read_channels(struct twl4030_madc_data *madc, long channels, int *buf, bool raw) { - int count = 0, count_req = 0, i; + int count = 0; + int i; u8 reg; for_each_set_bit(i, &channels, TWL4030_MADC_MAX_CHANNELS) { - reg = reg_base + 2 * i; + reg = reg_base + (2 * i); buf[i] = twl4030_madc_channel_raw_read(madc, reg); if (buf[i] < 0) { - dev_err(madc->dev, - "Unable to read register 0x%X\n", reg); - count_req++; - continue; + dev_err(madc->dev, "Unable to read register 0x%X\n", + reg); + return buf[i]; } if (raw) { count++; @@ -305,7 +306,7 @@ static int twl4030_madc_read_channels(struct twl4030_madc_data *madc, buf[i] = twl4030battery_current(buf[i]); if (buf[i] < 0) { dev_err(madc->dev, "err reading current\n"); - count_req++; + return buf[i]; } else { count++; buf[i] = buf[i] - 750; @@ -315,7 +316,7 @@ static int twl4030_madc_read_channels(struct twl4030_madc_data *madc, buf[i] = twl4030battery_temperature(buf[i]); if (buf[i] < 0) { dev_err(madc->dev, "err reading temperature\n"); - count_req++; + return buf[i]; } else { buf[i] -= 3; count++; @@ -336,8 +337,6 @@ static int twl4030_madc_read_channels(struct twl4030_madc_data *madc, twl4030_divider_ratios[i].numerator); } } - if (count_req) - dev_err(madc->dev, "%d channel conversion failed\n", count_req); return count; } @@ -361,13 +360,13 @@ static int twl4030_madc_enable_irq(struct twl4030_madc_data *madc, u8 id) madc->imr); return ret; } + val &= ~(1 << id); ret = twl_i2c_write_u8(TWL4030_MODULE_MADC, val, madc->imr); if (ret) { dev_err(madc->dev, "unable to write imr register 0x%X\n", madc->imr); return ret; - } return 0; @@ -430,7 +429,7 @@ static irqreturn_t twl4030_madc_threaded_irq_handler(int irq, void *_madc) continue; ret = twl4030_madc_disable_irq(madc, i); if (ret < 0) - dev_dbg(madc->dev, "Disable interrupt failed%d\n", i); + dev_dbg(madc->dev, "Disable interrupt failed %d\n", i); madc->requests[i].result_pending = 1; } for (i = 0; i < TWL4030_MADC_NUM_METHODS; i++) { @@ -512,21 +511,17 @@ static int twl4030_madc_start_conversion(struct twl4030_madc_data *madc, { const struct twl4030_madc_conversion_method *method; int ret = 0; + + if (conv_method != TWL4030_MADC_SW1 && conv_method != TWL4030_MADC_SW2) + return -ENOTSUPP; + method = &twl4030_conversion_methods[conv_method]; - switch (conv_method) { - case TWL4030_MADC_SW1: - case TWL4030_MADC_SW2: - ret = twl_i2c_write_u8(TWL4030_MODULE_MADC, - TWL4030_MADC_SW_START, method->ctrl); - if (ret) { - dev_err(madc->dev, - "unable to write ctrl register 0x%X\n", - method->ctrl); - return ret; - } - break; - default: - break; + ret = twl_i2c_write_u8(TWL4030_MODULE_MADC, TWL4030_MADC_SW_START, + method->ctrl); + if (ret) { + dev_err(madc->dev, "unable to write ctrl register 0x%X\n", + method->ctrl); + return ret; } return 0; @@ -623,8 +618,8 @@ int twl4030_madc_conversion(struct twl4030_madc_request *req) ch_lsb, method->avg); if (ret) { dev_err(twl4030_madc->dev, - "unable to write sel reg 0x%X\n", - method->sel + 1); + "unable to write avg reg 0x%X\n", + method->avg); goto out; } } @@ -665,10 +660,6 @@ out: } EXPORT_SYMBOL_GPL(twl4030_madc_conversion); -/* - * Return channel value - * Or < 0 on failure. - */ int twl4030_get_madc_conversion(int channel_no) { struct twl4030_madc_request req; @@ -689,20 +680,25 @@ int twl4030_get_madc_conversion(int channel_no) } EXPORT_SYMBOL_GPL(twl4030_get_madc_conversion); -/* +/** + * twl4030_madc_set_current_generator() - setup bias current + * + * @madc: pointer to twl4030_madc_data struct + * @chan: can be one of the two values: + * TWL4030_BCI_ITHEN + * Enables bias current for main battery type reading + * TWL4030_BCI_TYPEN + * Enables bias current for main battery temperature sensing + * @on: enable or disable chan. + * * Function to enable or disable bias current for * main battery type reading or temperature sensing - * @madc - pointer to twl4030_madc_data struct - * @chan - can be one of the two values - * TWL4030_BCI_ITHEN - Enables bias current for main battery type reading - * TWL4030_BCI_TYPEN - Enables bias current for main battery temperature - * sensing - * @on - enable or disable chan. */ static int twl4030_madc_set_current_generator(struct twl4030_madc_data *madc, int chan, int on) { int ret; + int regmask; u8 regval; ret = twl_i2c_read_u8(TWL_MODULE_MAIN_CHARGE, @@ -712,10 +708,13 @@ static int twl4030_madc_set_current_generator(struct twl4030_madc_data *madc, TWL4030_BCI_BCICTL1); return ret; } + + regmask = chan ? TWL4030_BCI_ITHEN : TWL4030_BCI_TYPEN; if (on) - regval |= chan ? TWL4030_BCI_ITHEN : TWL4030_BCI_TYPEN; + regval |= regmask; else - regval &= chan ? ~TWL4030_BCI_ITHEN : ~TWL4030_BCI_TYPEN; + regval &= ~regmask; + ret = twl_i2c_write_u8(TWL_MODULE_MAIN_CHARGE, regval, TWL4030_BCI_BCICTL1); if (ret) { @@ -730,7 +729,7 @@ static int twl4030_madc_set_current_generator(struct twl4030_madc_data *madc, /* * Function that sets MADC software power on bit to enable MADC * @madc - pointer to twl4030_madc_data struct - * @on - Enable or disable MADC software powen on bit. + * @on - Enable or disable MADC software power on bit. * returns error if i2c read/write fails else 0 */ static int twl4030_madc_set_power(struct twl4030_madc_data *madc, int on) @@ -909,7 +908,7 @@ static struct platform_driver twl4030_madc_driver = { .name = "twl4030_madc", .owner = THIS_MODULE, .of_match_table = of_match_ptr(twl_madc_of_match), - }, + }, }; module_platform_driver(twl4030_madc_driver); diff --git a/include/linux/i2c/twl4030-madc.h b/include/linux/i2c/twl4030-madc.h index 01f595107048..1c0134dd3271 100644 --- a/include/linux/i2c/twl4030-madc.h +++ b/include/linux/i2c/twl4030-madc.h @@ -44,7 +44,7 @@ struct twl4030_madc_conversion_method { struct twl4030_madc_request { unsigned long channels; - u16 do_avg; + bool do_avg; u16 method; u16 type; bool active; -- cgit v1.2.3-59-g8ed1b From 204dfd63a07ec4785fc786ff3511d85eb1346b7b Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Sun, 16 Mar 2014 02:43:28 +0100 Subject: mfd: twl-core: Add twl_i2c_read/write_u16 Add a simple twl_i2c_read/write_u16 wrapper over the twl_i2c_read/write, which is similar to the twl_i2c_read/write_u8 wrapper. Signed-off-by: Sebastian Reichel Acked-by: Jonathan Cameron Tested-by: Marek Belisko Signed-off-by: Lee Jones --- include/linux/i2c/twl.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h index ade1c06d4ceb..d2b16704624c 100644 --- a/include/linux/i2c/twl.h +++ b/include/linux/i2c/twl.h @@ -195,6 +195,18 @@ static inline int twl_i2c_read_u8(u8 mod_no, u8 *val, u8 reg) { return twl_i2c_read(mod_no, val, reg, 1); } +static inline int twl_i2c_write_u16(u8 mod_no, u16 val, u8 reg) { + val = cpu_to_le16(val); + return twl_i2c_write(mod_no, (u8*) &val, reg, 2); +} + +static inline int twl_i2c_read_u16(u8 mod_no, u16 *val, u8 reg) { + int ret; + ret = twl_i2c_read(mod_no, (u8*) val, reg, 2); + *val = le16_to_cpu(*val); + return ret; +} + int twl_get_type(void); int twl_get_version(void); int twl_get_hfclk_rate(void); -- cgit v1.2.3-59-g8ed1b From 5e98fc8feaa869aa6ca5c73830f0855133eb461e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 28 Jan 2014 13:18:26 +0100 Subject: mfd: max14577: Remove unused enum max14577_irq_source Remove unused symbol: enum max14577_irq_source. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Lee Jones --- include/linux/mfd/max14577-private.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/max14577-private.h b/include/linux/mfd/max14577-private.h index a3d0185196d3..c9b332fb0d5d 100644 --- a/include/linux/mfd/max14577-private.h +++ b/include/linux/mfd/max14577-private.h @@ -248,14 +248,6 @@ enum max14577_charger_reg { /* MAX14577 regulator SFOUT LDO voltage, fixed, uV */ #define MAX14577_REGULATOR_SAFEOUT_VOLTAGE 4900000 -enum max14577_irq_source { - MAX14577_IRQ_INT1 = 0, - MAX14577_IRQ_INT2, - MAX14577_IRQ_INT3, - - MAX14577_IRQ_REGS_NUM, -}; - enum max14577_irq { /* INT1 */ MAX14577_IRQ_INT1_ADC, -- cgit v1.2.3-59-g8ed1b From ec2b26724f81d58008626228686f47ca3337e7ed Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 28 Jan 2014 13:18:27 +0100 Subject: mfd: max14577: Remove not needed header inclusion Remove not needed max14577-private.h header inclusion in the main driver header. Remove obvious comment. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Lee Jones --- include/linux/mfd/max14577.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/max14577.h b/include/linux/mfd/max14577.h index 247b021dfaaf..736d39c3ec0d 100644 --- a/include/linux/mfd/max14577.h +++ b/include/linux/mfd/max14577.h @@ -25,13 +25,8 @@ #ifndef __MAX14577_H__ #define __MAX14577_H__ -#include #include -/* - * MAX14577 Regulator - */ - /* MAX14577 regulator IDs */ enum max14577_regulators { MAX14577_SAFEOUT = 0, -- cgit v1.2.3-59-g8ed1b From 8dbb947af37159d3df202d3cdbbae5fb896c462a Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 22 Jan 2014 13:04:23 +0000 Subject: mfd: wm5102: Make additional DSP registers available to the user Expose some DSP registers which are useful for DSP users to be able to access whilst debugging their firmware. Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- drivers/mfd/wm5102-tables.c | 34 ++++++++++++++++++++++++++++++++++ include/linux/mfd/arizona/registers.h | 17 +++++++++++++++++ 2 files changed, 51 insertions(+) (limited to 'include/linux') diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c index f9b1e9666013..187ee86cbbe6 100644 --- a/drivers/mfd/wm5102-tables.c +++ b/drivers/mfd/wm5102-tables.c @@ -1853,6 +1853,23 @@ static bool wm5102_readable_register(struct device *dev, unsigned int reg) case ARIZONA_DSP1_STATUS_1: case ARIZONA_DSP1_STATUS_2: case ARIZONA_DSP1_STATUS_3: + case ARIZONA_DSP1_WDMA_BUFFER_1: + case ARIZONA_DSP1_WDMA_BUFFER_2: + case ARIZONA_DSP1_WDMA_BUFFER_3: + case ARIZONA_DSP1_WDMA_BUFFER_4: + case ARIZONA_DSP1_WDMA_BUFFER_5: + case ARIZONA_DSP1_WDMA_BUFFER_6: + case ARIZONA_DSP1_WDMA_BUFFER_7: + case ARIZONA_DSP1_WDMA_BUFFER_8: + case ARIZONA_DSP1_RDMA_BUFFER_1: + case ARIZONA_DSP1_RDMA_BUFFER_2: + case ARIZONA_DSP1_RDMA_BUFFER_3: + case ARIZONA_DSP1_RDMA_BUFFER_4: + case ARIZONA_DSP1_RDMA_BUFFER_5: + case ARIZONA_DSP1_RDMA_BUFFER_6: + case ARIZONA_DSP1_WDMA_CONFIG_1: + case ARIZONA_DSP1_WDMA_CONFIG_2: + case ARIZONA_DSP1_RDMA_CONFIG_1: case ARIZONA_DSP1_SCRATCH_0: case ARIZONA_DSP1_SCRATCH_1: case ARIZONA_DSP1_SCRATCH_2: @@ -1911,6 +1928,23 @@ static bool wm5102_volatile_register(struct device *dev, unsigned int reg) case ARIZONA_DSP1_STATUS_1: case ARIZONA_DSP1_STATUS_2: case ARIZONA_DSP1_STATUS_3: + case ARIZONA_DSP1_WDMA_BUFFER_1: + case ARIZONA_DSP1_WDMA_BUFFER_2: + case ARIZONA_DSP1_WDMA_BUFFER_3: + case ARIZONA_DSP1_WDMA_BUFFER_4: + case ARIZONA_DSP1_WDMA_BUFFER_5: + case ARIZONA_DSP1_WDMA_BUFFER_6: + case ARIZONA_DSP1_WDMA_BUFFER_7: + case ARIZONA_DSP1_WDMA_BUFFER_8: + case ARIZONA_DSP1_RDMA_BUFFER_1: + case ARIZONA_DSP1_RDMA_BUFFER_2: + case ARIZONA_DSP1_RDMA_BUFFER_3: + case ARIZONA_DSP1_RDMA_BUFFER_4: + case ARIZONA_DSP1_RDMA_BUFFER_5: + case ARIZONA_DSP1_RDMA_BUFFER_6: + case ARIZONA_DSP1_WDMA_CONFIG_1: + case ARIZONA_DSP1_WDMA_CONFIG_2: + case ARIZONA_DSP1_RDMA_CONFIG_1: case ARIZONA_DSP1_SCRATCH_0: case ARIZONA_DSP1_SCRATCH_1: case ARIZONA_DSP1_SCRATCH_2: diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h index fdf3aa376eb2..6a3b16993c1a 100644 --- a/include/linux/mfd/arizona/registers.h +++ b/include/linux/mfd/arizona/registers.h @@ -1034,6 +1034,23 @@ #define ARIZONA_DSP1_STATUS_1 0x1104 #define ARIZONA_DSP1_STATUS_2 0x1105 #define ARIZONA_DSP1_STATUS_3 0x1106 +#define ARIZONA_DSP1_WDMA_BUFFER_1 0x1110 +#define ARIZONA_DSP1_WDMA_BUFFER_2 0x1111 +#define ARIZONA_DSP1_WDMA_BUFFER_3 0x1112 +#define ARIZONA_DSP1_WDMA_BUFFER_4 0x1113 +#define ARIZONA_DSP1_WDMA_BUFFER_5 0x1114 +#define ARIZONA_DSP1_WDMA_BUFFER_6 0x1115 +#define ARIZONA_DSP1_WDMA_BUFFER_7 0x1116 +#define ARIZONA_DSP1_WDMA_BUFFER_8 0x1117 +#define ARIZONA_DSP1_RDMA_BUFFER_1 0x1120 +#define ARIZONA_DSP1_RDMA_BUFFER_2 0x1121 +#define ARIZONA_DSP1_RDMA_BUFFER_3 0x1122 +#define ARIZONA_DSP1_RDMA_BUFFER_4 0x1123 +#define ARIZONA_DSP1_RDMA_BUFFER_5 0x1124 +#define ARIZONA_DSP1_RDMA_BUFFER_6 0x1125 +#define ARIZONA_DSP1_WDMA_CONFIG_1 0x1130 +#define ARIZONA_DSP1_WDMA_CONFIG_2 0x1131 +#define ARIZONA_DSP1_RDMA_CONFIG_1 0x1134 #define ARIZONA_DSP1_SCRATCH_0 0x1140 #define ARIZONA_DSP1_SCRATCH_1 0x1141 #define ARIZONA_DSP1_SCRATCH_2 0x1142 -- cgit v1.2.3-59-g8ed1b From 47ec66a17cca571eb7038b7835dff7cbd5851b90 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 22 Jan 2014 13:04:24 +0000 Subject: mfd: wm5110: Make additional DSP registers available to the user Expose some DSP registers which are useful for DSP users to be able to access whilst debugging their firmware. Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- drivers/mfd/wm5110-tables.c | 168 ++++++++++++++++++++++++++++++++++ include/linux/mfd/arizona/registers.h | 67 ++++++++++++++ 2 files changed, 235 insertions(+) (limited to 'include/linux') diff --git a/drivers/mfd/wm5110-tables.c b/drivers/mfd/wm5110-tables.c index 11632f135e8c..ca9968525017 100644 --- a/drivers/mfd/wm5110-tables.c +++ b/drivers/mfd/wm5110-tables.c @@ -2461,6 +2461,27 @@ static bool wm5110_readable_register(struct device *dev, unsigned int reg) case ARIZONA_DSP1_STATUS_1: case ARIZONA_DSP1_STATUS_2: case ARIZONA_DSP1_STATUS_3: + case ARIZONA_DSP1_STATUS_4: + case ARIZONA_DSP1_WDMA_BUFFER_1: + case ARIZONA_DSP1_WDMA_BUFFER_2: + case ARIZONA_DSP1_WDMA_BUFFER_3: + case ARIZONA_DSP1_WDMA_BUFFER_4: + case ARIZONA_DSP1_WDMA_BUFFER_5: + case ARIZONA_DSP1_WDMA_BUFFER_6: + case ARIZONA_DSP1_WDMA_BUFFER_7: + case ARIZONA_DSP1_WDMA_BUFFER_8: + case ARIZONA_DSP1_RDMA_BUFFER_1: + case ARIZONA_DSP1_RDMA_BUFFER_2: + case ARIZONA_DSP1_RDMA_BUFFER_3: + case ARIZONA_DSP1_RDMA_BUFFER_4: + case ARIZONA_DSP1_RDMA_BUFFER_5: + case ARIZONA_DSP1_RDMA_BUFFER_6: + case ARIZONA_DSP1_WDMA_CONFIG_1: + case ARIZONA_DSP1_WDMA_CONFIG_2: + case ARIZONA_DSP1_WDMA_OFFSET_1: + case ARIZONA_DSP1_RDMA_CONFIG_1: + case ARIZONA_DSP1_RDMA_OFFSET_1: + case ARIZONA_DSP1_EXTERNAL_START_SELECT_1: case ARIZONA_DSP1_SCRATCH_0: case ARIZONA_DSP1_SCRATCH_1: case ARIZONA_DSP1_SCRATCH_2: @@ -2470,6 +2491,27 @@ static bool wm5110_readable_register(struct device *dev, unsigned int reg) case ARIZONA_DSP2_STATUS_1: case ARIZONA_DSP2_STATUS_2: case ARIZONA_DSP2_STATUS_3: + case ARIZONA_DSP2_STATUS_4: + case ARIZONA_DSP2_WDMA_BUFFER_1: + case ARIZONA_DSP2_WDMA_BUFFER_2: + case ARIZONA_DSP2_WDMA_BUFFER_3: + case ARIZONA_DSP2_WDMA_BUFFER_4: + case ARIZONA_DSP2_WDMA_BUFFER_5: + case ARIZONA_DSP2_WDMA_BUFFER_6: + case ARIZONA_DSP2_WDMA_BUFFER_7: + case ARIZONA_DSP2_WDMA_BUFFER_8: + case ARIZONA_DSP2_RDMA_BUFFER_1: + case ARIZONA_DSP2_RDMA_BUFFER_2: + case ARIZONA_DSP2_RDMA_BUFFER_3: + case ARIZONA_DSP2_RDMA_BUFFER_4: + case ARIZONA_DSP2_RDMA_BUFFER_5: + case ARIZONA_DSP2_RDMA_BUFFER_6: + case ARIZONA_DSP2_WDMA_CONFIG_1: + case ARIZONA_DSP2_WDMA_CONFIG_2: + case ARIZONA_DSP2_WDMA_OFFSET_1: + case ARIZONA_DSP2_RDMA_CONFIG_1: + case ARIZONA_DSP2_RDMA_OFFSET_1: + case ARIZONA_DSP2_EXTERNAL_START_SELECT_1: case ARIZONA_DSP2_SCRATCH_0: case ARIZONA_DSP2_SCRATCH_1: case ARIZONA_DSP2_SCRATCH_2: @@ -2479,6 +2521,27 @@ static bool wm5110_readable_register(struct device *dev, unsigned int reg) case ARIZONA_DSP3_STATUS_1: case ARIZONA_DSP3_STATUS_2: case ARIZONA_DSP3_STATUS_3: + case ARIZONA_DSP3_STATUS_4: + case ARIZONA_DSP3_WDMA_BUFFER_1: + case ARIZONA_DSP3_WDMA_BUFFER_2: + case ARIZONA_DSP3_WDMA_BUFFER_3: + case ARIZONA_DSP3_WDMA_BUFFER_4: + case ARIZONA_DSP3_WDMA_BUFFER_5: + case ARIZONA_DSP3_WDMA_BUFFER_6: + case ARIZONA_DSP3_WDMA_BUFFER_7: + case ARIZONA_DSP3_WDMA_BUFFER_8: + case ARIZONA_DSP3_RDMA_BUFFER_1: + case ARIZONA_DSP3_RDMA_BUFFER_2: + case ARIZONA_DSP3_RDMA_BUFFER_3: + case ARIZONA_DSP3_RDMA_BUFFER_4: + case ARIZONA_DSP3_RDMA_BUFFER_5: + case ARIZONA_DSP3_RDMA_BUFFER_6: + case ARIZONA_DSP3_WDMA_CONFIG_1: + case ARIZONA_DSP3_WDMA_CONFIG_2: + case ARIZONA_DSP3_WDMA_OFFSET_1: + case ARIZONA_DSP3_RDMA_CONFIG_1: + case ARIZONA_DSP3_RDMA_OFFSET_1: + case ARIZONA_DSP3_EXTERNAL_START_SELECT_1: case ARIZONA_DSP3_SCRATCH_0: case ARIZONA_DSP3_SCRATCH_1: case ARIZONA_DSP3_SCRATCH_2: @@ -2488,6 +2551,27 @@ static bool wm5110_readable_register(struct device *dev, unsigned int reg) case ARIZONA_DSP4_STATUS_1: case ARIZONA_DSP4_STATUS_2: case ARIZONA_DSP4_STATUS_3: + case ARIZONA_DSP4_STATUS_4: + case ARIZONA_DSP4_WDMA_BUFFER_1: + case ARIZONA_DSP4_WDMA_BUFFER_2: + case ARIZONA_DSP4_WDMA_BUFFER_3: + case ARIZONA_DSP4_WDMA_BUFFER_4: + case ARIZONA_DSP4_WDMA_BUFFER_5: + case ARIZONA_DSP4_WDMA_BUFFER_6: + case ARIZONA_DSP4_WDMA_BUFFER_7: + case ARIZONA_DSP4_WDMA_BUFFER_8: + case ARIZONA_DSP4_RDMA_BUFFER_1: + case ARIZONA_DSP4_RDMA_BUFFER_2: + case ARIZONA_DSP4_RDMA_BUFFER_3: + case ARIZONA_DSP4_RDMA_BUFFER_4: + case ARIZONA_DSP4_RDMA_BUFFER_5: + case ARIZONA_DSP4_RDMA_BUFFER_6: + case ARIZONA_DSP4_WDMA_CONFIG_1: + case ARIZONA_DSP4_WDMA_CONFIG_2: + case ARIZONA_DSP4_WDMA_OFFSET_1: + case ARIZONA_DSP4_RDMA_CONFIG_1: + case ARIZONA_DSP4_RDMA_OFFSET_1: + case ARIZONA_DSP4_EXTERNAL_START_SELECT_1: case ARIZONA_DSP4_SCRATCH_0: case ARIZONA_DSP4_SCRATCH_1: case ARIZONA_DSP4_SCRATCH_2: @@ -2543,6 +2627,27 @@ static bool wm5110_volatile_register(struct device *dev, unsigned int reg) case ARIZONA_DSP1_STATUS_1: case ARIZONA_DSP1_STATUS_2: case ARIZONA_DSP1_STATUS_3: + case ARIZONA_DSP1_STATUS_4: + case ARIZONA_DSP1_WDMA_BUFFER_1: + case ARIZONA_DSP1_WDMA_BUFFER_2: + case ARIZONA_DSP1_WDMA_BUFFER_3: + case ARIZONA_DSP1_WDMA_BUFFER_4: + case ARIZONA_DSP1_WDMA_BUFFER_5: + case ARIZONA_DSP1_WDMA_BUFFER_6: + case ARIZONA_DSP1_WDMA_BUFFER_7: + case ARIZONA_DSP1_WDMA_BUFFER_8: + case ARIZONA_DSP1_RDMA_BUFFER_1: + case ARIZONA_DSP1_RDMA_BUFFER_2: + case ARIZONA_DSP1_RDMA_BUFFER_3: + case ARIZONA_DSP1_RDMA_BUFFER_4: + case ARIZONA_DSP1_RDMA_BUFFER_5: + case ARIZONA_DSP1_RDMA_BUFFER_6: + case ARIZONA_DSP1_WDMA_CONFIG_1: + case ARIZONA_DSP1_WDMA_CONFIG_2: + case ARIZONA_DSP1_WDMA_OFFSET_1: + case ARIZONA_DSP1_RDMA_CONFIG_1: + case ARIZONA_DSP1_RDMA_OFFSET_1: + case ARIZONA_DSP1_EXTERNAL_START_SELECT_1: case ARIZONA_DSP1_SCRATCH_0: case ARIZONA_DSP1_SCRATCH_1: case ARIZONA_DSP1_SCRATCH_2: @@ -2550,6 +2655,27 @@ static bool wm5110_volatile_register(struct device *dev, unsigned int reg) case ARIZONA_DSP2_STATUS_1: case ARIZONA_DSP2_STATUS_2: case ARIZONA_DSP2_STATUS_3: + case ARIZONA_DSP2_STATUS_4: + case ARIZONA_DSP2_WDMA_BUFFER_1: + case ARIZONA_DSP2_WDMA_BUFFER_2: + case ARIZONA_DSP2_WDMA_BUFFER_3: + case ARIZONA_DSP2_WDMA_BUFFER_4: + case ARIZONA_DSP2_WDMA_BUFFER_5: + case ARIZONA_DSP2_WDMA_BUFFER_6: + case ARIZONA_DSP2_WDMA_BUFFER_7: + case ARIZONA_DSP2_WDMA_BUFFER_8: + case ARIZONA_DSP2_RDMA_BUFFER_1: + case ARIZONA_DSP2_RDMA_BUFFER_2: + case ARIZONA_DSP2_RDMA_BUFFER_3: + case ARIZONA_DSP2_RDMA_BUFFER_4: + case ARIZONA_DSP2_RDMA_BUFFER_5: + case ARIZONA_DSP2_RDMA_BUFFER_6: + case ARIZONA_DSP2_WDMA_CONFIG_1: + case ARIZONA_DSP2_WDMA_CONFIG_2: + case ARIZONA_DSP2_WDMA_OFFSET_1: + case ARIZONA_DSP2_RDMA_CONFIG_1: + case ARIZONA_DSP2_RDMA_OFFSET_1: + case ARIZONA_DSP2_EXTERNAL_START_SELECT_1: case ARIZONA_DSP2_SCRATCH_0: case ARIZONA_DSP2_SCRATCH_1: case ARIZONA_DSP2_SCRATCH_2: @@ -2557,6 +2683,27 @@ static bool wm5110_volatile_register(struct device *dev, unsigned int reg) case ARIZONA_DSP3_STATUS_1: case ARIZONA_DSP3_STATUS_2: case ARIZONA_DSP3_STATUS_3: + case ARIZONA_DSP3_STATUS_4: + case ARIZONA_DSP3_WDMA_BUFFER_1: + case ARIZONA_DSP3_WDMA_BUFFER_2: + case ARIZONA_DSP3_WDMA_BUFFER_3: + case ARIZONA_DSP3_WDMA_BUFFER_4: + case ARIZONA_DSP3_WDMA_BUFFER_5: + case ARIZONA_DSP3_WDMA_BUFFER_6: + case ARIZONA_DSP3_WDMA_BUFFER_7: + case ARIZONA_DSP3_WDMA_BUFFER_8: + case ARIZONA_DSP3_RDMA_BUFFER_1: + case ARIZONA_DSP3_RDMA_BUFFER_2: + case ARIZONA_DSP3_RDMA_BUFFER_3: + case ARIZONA_DSP3_RDMA_BUFFER_4: + case ARIZONA_DSP3_RDMA_BUFFER_5: + case ARIZONA_DSP3_RDMA_BUFFER_6: + case ARIZONA_DSP3_WDMA_CONFIG_1: + case ARIZONA_DSP3_WDMA_CONFIG_2: + case ARIZONA_DSP3_WDMA_OFFSET_1: + case ARIZONA_DSP3_RDMA_CONFIG_1: + case ARIZONA_DSP3_RDMA_OFFSET_1: + case ARIZONA_DSP3_EXTERNAL_START_SELECT_1: case ARIZONA_DSP3_SCRATCH_0: case ARIZONA_DSP3_SCRATCH_1: case ARIZONA_DSP3_SCRATCH_2: @@ -2564,6 +2711,27 @@ static bool wm5110_volatile_register(struct device *dev, unsigned int reg) case ARIZONA_DSP4_STATUS_1: case ARIZONA_DSP4_STATUS_2: case ARIZONA_DSP4_STATUS_3: + case ARIZONA_DSP4_STATUS_4: + case ARIZONA_DSP4_WDMA_BUFFER_1: + case ARIZONA_DSP4_WDMA_BUFFER_2: + case ARIZONA_DSP4_WDMA_BUFFER_3: + case ARIZONA_DSP4_WDMA_BUFFER_4: + case ARIZONA_DSP4_WDMA_BUFFER_5: + case ARIZONA_DSP4_WDMA_BUFFER_6: + case ARIZONA_DSP4_WDMA_BUFFER_7: + case ARIZONA_DSP4_WDMA_BUFFER_8: + case ARIZONA_DSP4_RDMA_BUFFER_1: + case ARIZONA_DSP4_RDMA_BUFFER_2: + case ARIZONA_DSP4_RDMA_BUFFER_3: + case ARIZONA_DSP4_RDMA_BUFFER_4: + case ARIZONA_DSP4_RDMA_BUFFER_5: + case ARIZONA_DSP4_RDMA_BUFFER_6: + case ARIZONA_DSP4_WDMA_CONFIG_1: + case ARIZONA_DSP4_WDMA_CONFIG_2: + case ARIZONA_DSP4_WDMA_OFFSET_1: + case ARIZONA_DSP4_RDMA_CONFIG_1: + case ARIZONA_DSP4_RDMA_OFFSET_1: + case ARIZONA_DSP4_EXTERNAL_START_SELECT_1: case ARIZONA_DSP4_SCRATCH_0: case ARIZONA_DSP4_SCRATCH_1: case ARIZONA_DSP4_SCRATCH_2: diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h index 6a3b16993c1a..90d57aec34e7 100644 --- a/include/linux/mfd/arizona/registers.h +++ b/include/linux/mfd/arizona/registers.h @@ -1034,6 +1034,7 @@ #define ARIZONA_DSP1_STATUS_1 0x1104 #define ARIZONA_DSP1_STATUS_2 0x1105 #define ARIZONA_DSP1_STATUS_3 0x1106 +#define ARIZONA_DSP1_STATUS_4 0x1107 #define ARIZONA_DSP1_WDMA_BUFFER_1 0x1110 #define ARIZONA_DSP1_WDMA_BUFFER_2 0x1111 #define ARIZONA_DSP1_WDMA_BUFFER_3 0x1112 @@ -1050,7 +1051,10 @@ #define ARIZONA_DSP1_RDMA_BUFFER_6 0x1125 #define ARIZONA_DSP1_WDMA_CONFIG_1 0x1130 #define ARIZONA_DSP1_WDMA_CONFIG_2 0x1131 +#define ARIZONA_DSP1_WDMA_OFFSET_1 0x1132 #define ARIZONA_DSP1_RDMA_CONFIG_1 0x1134 +#define ARIZONA_DSP1_RDMA_OFFSET_1 0x1135 +#define ARIZONA_DSP1_EXTERNAL_START_SELECT_1 0x1138 #define ARIZONA_DSP1_SCRATCH_0 0x1140 #define ARIZONA_DSP1_SCRATCH_1 0x1141 #define ARIZONA_DSP1_SCRATCH_2 0x1142 @@ -1060,6 +1064,27 @@ #define ARIZONA_DSP2_STATUS_1 0x1204 #define ARIZONA_DSP2_STATUS_2 0x1205 #define ARIZONA_DSP2_STATUS_3 0x1206 +#define ARIZONA_DSP2_STATUS_4 0x1207 +#define ARIZONA_DSP2_WDMA_BUFFER_1 0x1210 +#define ARIZONA_DSP2_WDMA_BUFFER_2 0x1211 +#define ARIZONA_DSP2_WDMA_BUFFER_3 0x1212 +#define ARIZONA_DSP2_WDMA_BUFFER_4 0x1213 +#define ARIZONA_DSP2_WDMA_BUFFER_5 0x1214 +#define ARIZONA_DSP2_WDMA_BUFFER_6 0x1215 +#define ARIZONA_DSP2_WDMA_BUFFER_7 0x1216 +#define ARIZONA_DSP2_WDMA_BUFFER_8 0x1217 +#define ARIZONA_DSP2_RDMA_BUFFER_1 0x1220 +#define ARIZONA_DSP2_RDMA_BUFFER_2 0x1221 +#define ARIZONA_DSP2_RDMA_BUFFER_3 0x1222 +#define ARIZONA_DSP2_RDMA_BUFFER_4 0x1223 +#define ARIZONA_DSP2_RDMA_BUFFER_5 0x1224 +#define ARIZONA_DSP2_RDMA_BUFFER_6 0x1225 +#define ARIZONA_DSP2_WDMA_CONFIG_1 0x1230 +#define ARIZONA_DSP2_WDMA_CONFIG_2 0x1231 +#define ARIZONA_DSP2_WDMA_OFFSET_1 0x1232 +#define ARIZONA_DSP2_RDMA_CONFIG_1 0x1234 +#define ARIZONA_DSP2_RDMA_OFFSET_1 0x1235 +#define ARIZONA_DSP2_EXTERNAL_START_SELECT_1 0x1238 #define ARIZONA_DSP2_SCRATCH_0 0x1240 #define ARIZONA_DSP2_SCRATCH_1 0x1241 #define ARIZONA_DSP2_SCRATCH_2 0x1242 @@ -1069,6 +1094,27 @@ #define ARIZONA_DSP3_STATUS_1 0x1304 #define ARIZONA_DSP3_STATUS_2 0x1305 #define ARIZONA_DSP3_STATUS_3 0x1306 +#define ARIZONA_DSP3_STATUS_4 0x1307 +#define ARIZONA_DSP3_WDMA_BUFFER_1 0x1310 +#define ARIZONA_DSP3_WDMA_BUFFER_2 0x1311 +#define ARIZONA_DSP3_WDMA_BUFFER_3 0x1312 +#define ARIZONA_DSP3_WDMA_BUFFER_4 0x1313 +#define ARIZONA_DSP3_WDMA_BUFFER_5 0x1314 +#define ARIZONA_DSP3_WDMA_BUFFER_6 0x1315 +#define ARIZONA_DSP3_WDMA_BUFFER_7 0x1316 +#define ARIZONA_DSP3_WDMA_BUFFER_8 0x1317 +#define ARIZONA_DSP3_RDMA_BUFFER_1 0x1320 +#define ARIZONA_DSP3_RDMA_BUFFER_2 0x1321 +#define ARIZONA_DSP3_RDMA_BUFFER_3 0x1322 +#define ARIZONA_DSP3_RDMA_BUFFER_4 0x1323 +#define ARIZONA_DSP3_RDMA_BUFFER_5 0x1324 +#define ARIZONA_DSP3_RDMA_BUFFER_6 0x1325 +#define ARIZONA_DSP3_WDMA_CONFIG_1 0x1330 +#define ARIZONA_DSP3_WDMA_CONFIG_2 0x1331 +#define ARIZONA_DSP3_WDMA_OFFSET_1 0x1332 +#define ARIZONA_DSP3_RDMA_CONFIG_1 0x1334 +#define ARIZONA_DSP3_RDMA_OFFSET_1 0x1335 +#define ARIZONA_DSP3_EXTERNAL_START_SELECT_1 0x1338 #define ARIZONA_DSP3_SCRATCH_0 0x1340 #define ARIZONA_DSP3_SCRATCH_1 0x1341 #define ARIZONA_DSP3_SCRATCH_2 0x1342 @@ -1078,6 +1124,27 @@ #define ARIZONA_DSP4_STATUS_1 0x1404 #define ARIZONA_DSP4_STATUS_2 0x1405 #define ARIZONA_DSP4_STATUS_3 0x1406 +#define ARIZONA_DSP4_STATUS_4 0x1407 +#define ARIZONA_DSP4_WDMA_BUFFER_1 0x1410 +#define ARIZONA_DSP4_WDMA_BUFFER_2 0x1411 +#define ARIZONA_DSP4_WDMA_BUFFER_3 0x1412 +#define ARIZONA_DSP4_WDMA_BUFFER_4 0x1413 +#define ARIZONA_DSP4_WDMA_BUFFER_5 0x1414 +#define ARIZONA_DSP4_WDMA_BUFFER_6 0x1415 +#define ARIZONA_DSP4_WDMA_BUFFER_7 0x1416 +#define ARIZONA_DSP4_WDMA_BUFFER_8 0x1417 +#define ARIZONA_DSP4_RDMA_BUFFER_1 0x1420 +#define ARIZONA_DSP4_RDMA_BUFFER_2 0x1421 +#define ARIZONA_DSP4_RDMA_BUFFER_3 0x1422 +#define ARIZONA_DSP4_RDMA_BUFFER_4 0x1423 +#define ARIZONA_DSP4_RDMA_BUFFER_5 0x1424 +#define ARIZONA_DSP4_RDMA_BUFFER_6 0x1425 +#define ARIZONA_DSP4_WDMA_CONFIG_1 0x1430 +#define ARIZONA_DSP4_WDMA_CONFIG_2 0x1431 +#define ARIZONA_DSP4_WDMA_OFFSET_1 0x1432 +#define ARIZONA_DSP4_RDMA_CONFIG_1 0x1434 +#define ARIZONA_DSP4_RDMA_OFFSET_1 0x1435 +#define ARIZONA_DSP4_EXTERNAL_START_SELECT_1 0x1438 #define ARIZONA_DSP4_SCRATCH_0 0x1440 #define ARIZONA_DSP4_SCRATCH_1 0x1441 #define ARIZONA_DSP4_SCRATCH_2 0x1442 -- cgit v1.2.3-59-g8ed1b From 44b4dc616365d7897808555d415099330e3af9df Mon Sep 17 00:00:00 2001 From: Keerthy Date: Thu, 6 Feb 2014 11:20:12 +0530 Subject: mfd: tps65218: Add driver for the TPS65218 PMIC The TPS65218 chip is a power management IC for Portable Navigation Systems and Tablet Computing devices. It contains the following components: - Regulators. - Over Temperature warning and Shut down. This patch adds support for tps65218 mfd device. At this time only the regulator functionality is made available. Signed-off-by: Keerthy Signed-off-by: Lee Jones --- drivers/mfd/Kconfig | 15 +++ drivers/mfd/Makefile | 1 + drivers/mfd/tps65218.c | 282 ++++++++++++++++++++++++++++++++++++++++++ include/linux/mfd/tps65218.h | 284 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 582 insertions(+) create mode 100644 drivers/mfd/tps65218.c create mode 100644 include/linux/mfd/tps65218.h (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 30fbacfe8b3b..3e2d698ac432 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -853,6 +853,21 @@ config MFD_TPS65217 This driver can also be built as a module. If so, the module will be called tps65217. +config MFD_TPS65218 + tristate "TI TPS65218 Power Management chips" + depends on I2C + select MFD_CORE + select REGMAP_I2C + help + If you say yes here you get support for the TPS65218 series of + Power Management chips. + These include voltage regulators, gpio and other features + that are often used in portable devices. Only regulator + component is currently supported. + + This driver can also be built as a module. If so, the module + will be called tps65218. + config MFD_TPS6586X bool "TI TPS6586x Power Management chips" depends on I2C=y diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index c8eb0bcf4da0..5e5cfbd1b8da 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -62,6 +62,7 @@ obj-$(CONFIG_TPS6105X) += tps6105x.o obj-$(CONFIG_TPS65010) += tps65010.o obj-$(CONFIG_TPS6507X) += tps6507x.o obj-$(CONFIG_MFD_TPS65217) += tps65217.o +obj-$(CONFIG_MFD_TPS65218) += tps65218.o obj-$(CONFIG_MFD_TPS65910) += tps65910.o tps65912-objs := tps65912-core.o tps65912-irq.o obj-$(CONFIG_MFD_TPS65912) += tps65912.o diff --git a/drivers/mfd/tps65218.c b/drivers/mfd/tps65218.c new file mode 100644 index 000000000000..a74bfb59f18f --- /dev/null +++ b/drivers/mfd/tps65218.c @@ -0,0 +1,282 @@ +/* + * Driver for TPS65218 Integrated power management chipsets + * + * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com/ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether expressed or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License version 2 for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define TPS65218_PASSWORD_REGS_UNLOCK 0x7D + +/** + * tps65218_reg_read: Read a single tps65218 register. + * + * @tps: Device to read from. + * @reg: Register to read. + * @val: Contians the value + */ +int tps65218_reg_read(struct tps65218 *tps, unsigned int reg, + unsigned int *val) +{ + return regmap_read(tps->regmap, reg, val); +} +EXPORT_SYMBOL_GPL(tps65218_reg_read); + +/** + * tps65218_reg_write: Write a single tps65218 register. + * + * @tps65218: Device to write to. + * @reg: Register to write to. + * @val: Value to write. + * @level: Password protected level + */ +int tps65218_reg_write(struct tps65218 *tps, unsigned int reg, + unsigned int val, unsigned int level) +{ + int ret; + unsigned int xor_reg_val; + + switch (level) { + case TPS65218_PROTECT_NONE: + return regmap_write(tps->regmap, reg, val); + case TPS65218_PROTECT_L1: + xor_reg_val = reg ^ TPS65218_PASSWORD_REGS_UNLOCK; + ret = regmap_write(tps->regmap, TPS65218_REG_PASSWORD, + xor_reg_val); + if (ret < 0) + return ret; + + return regmap_write(tps->regmap, reg, val); + default: + return -EINVAL; + } +} +EXPORT_SYMBOL_GPL(tps65218_reg_write); + +/** + * tps65218_update_bits: Modify bits w.r.t mask, val and level. + * + * @tps65218: Device to write to. + * @reg: Register to read-write to. + * @mask: Mask. + * @val: Value to write. + * @level: Password protected level + */ +static int tps65218_update_bits(struct tps65218 *tps, unsigned int reg, + unsigned int mask, unsigned int val, unsigned int level) +{ + int ret; + unsigned int data; + + ret = tps65218_reg_read(tps, reg, &data); + if (ret) { + dev_err(tps->dev, "Read from reg 0x%x failed\n", reg); + return ret; + } + + data &= ~mask; + data |= val & mask; + + mutex_lock(&tps->tps_lock); + ret = tps65218_reg_write(tps, reg, data, level); + if (ret) + dev_err(tps->dev, "Write for reg 0x%x failed\n", reg); + mutex_unlock(&tps->tps_lock); + + return ret; +} + +int tps65218_set_bits(struct tps65218 *tps, unsigned int reg, + unsigned int mask, unsigned int val, unsigned int level) +{ + return tps65218_update_bits(tps, reg, mask, val, level); +} +EXPORT_SYMBOL_GPL(tps65218_set_bits); + +int tps65218_clear_bits(struct tps65218 *tps, unsigned int reg, + unsigned int mask, unsigned int level) +{ + return tps65218_update_bits(tps, reg, mask, 0, level); +} +EXPORT_SYMBOL_GPL(tps65218_clear_bits); + +static struct regmap_config tps65218_regmap_config = { + .reg_bits = 8, + .val_bits = 8, + .cache_type = REGCACHE_RBTREE, +}; + +static const struct regmap_irq tps65218_irqs[] = { + /* INT1 IRQs */ + [TPS65218_PRGC_IRQ] = { + .mask = TPS65218_INT1_PRGC, + }, + [TPS65218_CC_AQC_IRQ] = { + .mask = TPS65218_INT1_CC_AQC, + }, + [TPS65218_HOT_IRQ] = { + .mask = TPS65218_INT1_HOT, + }, + [TPS65218_PB_IRQ] = { + .mask = TPS65218_INT1_PB, + }, + [TPS65218_AC_IRQ] = { + .mask = TPS65218_INT1_AC, + }, + [TPS65218_VPRG_IRQ] = { + .mask = TPS65218_INT1_VPRG, + }, + [TPS65218_INVALID1_IRQ] = { + }, + [TPS65218_INVALID2_IRQ] = { + }, + /* INT2 IRQs*/ + [TPS65218_LS1_I_IRQ] = { + .mask = TPS65218_INT2_LS1_I, + .reg_offset = 1, + }, + [TPS65218_LS2_I_IRQ] = { + .mask = TPS65218_INT2_LS2_I, + .reg_offset = 1, + }, + [TPS65218_LS3_I_IRQ] = { + .mask = TPS65218_INT2_LS3_I, + .reg_offset = 1, + }, + [TPS65218_LS1_F_IRQ] = { + .mask = TPS65218_INT2_LS1_F, + .reg_offset = 1, + }, + [TPS65218_LS2_F_IRQ] = { + .mask = TPS65218_INT2_LS2_F, + .reg_offset = 1, + }, + [TPS65218_LS3_F_IRQ] = { + .mask = TPS65218_INT2_LS3_F, + .reg_offset = 1, + }, + [TPS65218_INVALID3_IRQ] = { + }, + [TPS65218_INVALID4_IRQ] = { + }, +}; + +static struct regmap_irq_chip tps65218_irq_chip = { + .name = "tps65218", + .irqs = tps65218_irqs, + .num_irqs = ARRAY_SIZE(tps65218_irqs), + + .num_regs = 2, + .mask_base = TPS65218_REG_INT_MASK1, +}; + +static const struct of_device_id of_tps65218_match_table[] = { + { .compatible = "ti,tps65218", }, +}; + +static int tps65218_probe(struct i2c_client *client, + const struct i2c_device_id *ids) +{ + struct tps65218 *tps; + const struct of_device_id *match; + int ret; + + match = of_match_device(of_tps65218_match_table, &client->dev); + if (!match) { + dev_err(&client->dev, + "Failed to find matching dt id\n"); + return -EINVAL; + } + + tps = devm_kzalloc(&client->dev, sizeof(*tps), GFP_KERNEL); + if (!tps) + return -ENOMEM; + + i2c_set_clientdata(client, tps); + tps->dev = &client->dev; + tps->irq = client->irq; + tps->regmap = devm_regmap_init_i2c(client, &tps65218_regmap_config); + if (IS_ERR(tps->regmap)) { + ret = PTR_ERR(tps->regmap); + dev_err(tps->dev, "Failed to allocate register map: %d\n", + ret); + return ret; + } + + mutex_init(&tps->tps_lock); + + ret = regmap_add_irq_chip(tps->regmap, tps->irq, + IRQF_ONESHOT, 0, &tps65218_irq_chip, + &tps->irq_data); + if (ret < 0) + return ret; + + ret = of_platform_populate(client->dev.of_node, NULL, NULL, + &client->dev); + if (ret < 0) + goto err_irq; + + return 0; + +err_irq: + regmap_del_irq_chip(tps->irq, tps->irq_data); + + return ret; +} + +static int tps65218_remove(struct i2c_client *client) +{ + struct tps65218 *tps = i2c_get_clientdata(client); + + regmap_del_irq_chip(tps->irq, tps->irq_data); + + return 0; +} + +static const struct i2c_device_id tps65218_id_table[] = { + { "tps65218", TPS65218 }, + { }, +}; +MODULE_DEVICE_TABLE(i2c, tps65218_id_table); + +static struct i2c_driver tps65218_driver = { + .driver = { + .name = "tps65218", + .owner = THIS_MODULE, + .of_match_table = of_tps65218_match_table, + }, + .probe = tps65218_probe, + .remove = tps65218_remove, + .id_table = tps65218_id_table, +}; + +module_i2c_driver(tps65218_driver); + +MODULE_AUTHOR("J Keerthy "); +MODULE_DESCRIPTION("TPS65218 chip family multi-function driver"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/mfd/tps65218.h b/include/linux/mfd/tps65218.h new file mode 100644 index 000000000000..d2e357df5a0e --- /dev/null +++ b/include/linux/mfd/tps65218.h @@ -0,0 +1,284 @@ +/* + * linux/mfd/tps65218.h + * + * Functions to access TPS65219 power management chip. + * + * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com/ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether expressed or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License version 2 for more details. + */ + +#ifndef __LINUX_MFD_TPS65218_H +#define __LINUX_MFD_TPS65218_H + +#include +#include +#include +#include + +/* TPS chip id list */ +#define TPS65218 0xF0 + +/* I2C ID for TPS65218 part */ +#define TPS65218_I2C_ID 0x24 + +/* All register addresses */ +#define TPS65218_REG_CHIPID 0x00 +#define TPS65218_REG_INT1 0x01 +#define TPS65218_REG_INT2 0x02 +#define TPS65218_REG_INT_MASK1 0x03 +#define TPS65218_REG_INT_MASK2 0x04 +#define TPS65218_REG_STATUS 0x05 +#define TPS65218_REG_CONTROL 0x06 +#define TPS65218_REG_FLAG 0x07 + +#define TPS65218_REG_PASSWORD 0x10 +#define TPS65218_REG_ENABLE1 0x11 +#define TPS65218_REG_ENABLE2 0x12 +#define TPS65218_REG_CONFIG1 0x13 +#define TPS65218_REG_CONFIG2 0x14 +#define TPS65218_REG_CONFIG3 0x15 +#define TPS65218_REG_CONTROL_DCDC1 0x16 +#define TPS65218_REG_CONTROL_DCDC2 0x17 +#define TPS65218_REG_CONTROL_DCDC3 0x18 +#define TPS65218_REG_CONTROL_DCDC4 0x19 +#define TPS65218_REG_CONTRL_SLEW_RATE 0x1A +#define TPS65218_REG_CONTROL_LDO1 0x1B +#define TPS65218_REG_SEQ1 0x20 +#define TPS65218_REG_SEQ2 0x21 +#define TPS65218_REG_SEQ3 0x22 +#define TPS65218_REG_SEQ4 0x23 +#define TPS65218_REG_SEQ5 0x24 +#define TPS65218_REG_SEQ6 0x25 +#define TPS65218_REG_SEQ7 0x26 + +/* Register field definitions */ +#define TPS65218_CHIPID_CHIP_MASK 0xF8 +#define TPS65218_CHIPID_REV_MASK 0x07 + +#define TPS65218_INT1_VPRG BIT(5) +#define TPS65218_INT1_AC BIT(4) +#define TPS65218_INT1_PB BIT(3) +#define TPS65218_INT1_HOT BIT(2) +#define TPS65218_INT1_CC_AQC BIT(1) +#define TPS65218_INT1_PRGC BIT(0) + +#define TPS65218_INT2_LS3_F BIT(5) +#define TPS65218_INT2_LS2_F BIT(4) +#define TPS65218_INT2_LS1_F BIT(3) +#define TPS65218_INT2_LS3_I BIT(2) +#define TPS65218_INT2_LS2_I BIT(1) +#define TPS65218_INT2_LS1_I BIT(0) + +#define TPS65218_INT_MASK1_VPRG BIT(5) +#define TPS65218_INT_MASK1_AC BIT(4) +#define TPS65218_INT_MASK1_PB BIT(3) +#define TPS65218_INT_MASK1_HOT BIT(2) +#define TPS65218_INT_MASK1_CC_AQC BIT(1) +#define TPS65218_INT_MASK1_PRGC BIT(0) + +#define TPS65218_INT_MASK2_LS3_F BIT(5) +#define TPS65218_INT_MASK2_LS2_F BIT(4) +#define TPS65218_INT_MASK2_LS1_F BIT(3) +#define TPS65218_INT_MASK2_LS3_I BIT(2) +#define TPS65218_INT_MASK2_LS2_I BIT(1) +#define TPS65218_INT_MASK2_LS1_I BIT(0) + +#define TPS65218_STATUS_FSEAL BIT(7) +#define TPS65218_STATUS_EE BIT(6) +#define TPS65218_STATUS_AC_STATE BIT(5) +#define TPS65218_STATUS_PB_STATE BIT(4) +#define TPS65218_STATUS_STATE_MASK 0xC +#define TPS65218_STATUS_CC_STAT 0x3 + +#define TPS65218_CONTROL_OFFNPFO BIT(1) +#define TPS65218_CONTROL_CC_AQ BIT(0) + +#define TPS65218_FLAG_GPO3_FLG BIT(7) +#define TPS65218_FLAG_GPO2_FLG BIT(6) +#define TPS65218_FLAG_GPO1_FLG BIT(5) +#define TPS65218_FLAG_LDO1_FLG BIT(4) +#define TPS65218_FLAG_DC4_FLG BIT(3) +#define TPS65218_FLAG_DC3_FLG BIT(2) +#define TPS65218_FLAG_DC2_FLG BIT(1) +#define TPS65218_FLAG_DC1_FLG BIT(0) + +#define TPS65218_ENABLE1_DC6_EN BIT(5) +#define TPS65218_ENABLE1_DC5_EN BIT(4) +#define TPS65218_ENABLE1_DC4_EN BIT(3) +#define TPS65218_ENABLE1_DC3_EN BIT(2) +#define TPS65218_ENABLE1_DC2_EN BIT(1) +#define TPS65218_ENABLE1_DC1_EN BIT(0) + +#define TPS65218_ENABLE2_GPIO3 BIT(6) +#define TPS65218_ENABLE2_GPIO2 BIT(5) +#define TPS65218_ENABLE2_GPIO1 BIT(4) +#define TPS65218_ENABLE2_LS3_EN BIT(3) +#define TPS65218_ENABLE2_LS2_EN BIT(2) +#define TPS65218_ENABLE2_LS1_EN BIT(1) +#define TPS65218_ENABLE2_LDO1_EN BIT(0) + + +#define TPS65218_CONFIG1_TRST BIT(7) +#define TPS65218_CONFIG1_GPO2_BUF BIT(6) +#define TPS65218_CONFIG1_IO1_SEL BIT(5) +#define TPS65218_CONFIG1_PGDLY_MASK 0x18 +#define TPS65218_CONFIG1_STRICT BIT(2) +#define TPS65218_CONFIG1_UVLO_MASK 0x3 + +#define TPS65218_CONFIG2_DC12_RST BIT(7) +#define TPS65218_CONFIG2_UVLOHYS BIT(6) +#define TPS65218_CONFIG2_LS3ILIM_MASK 0xC +#define TPS65218_CONFIG2_LS2ILIM_MASK 0x3 + +#define TPS65218_CONFIG3_LS3NPFO BIT(5) +#define TPS65218_CONFIG3_LS2NPFO BIT(4) +#define TPS65218_CONFIG3_LS1NPFO BIT(3) +#define TPS65218_CONFIG3_LS3DCHRG BIT(2) +#define TPS65218_CONFIG3_LS2DCHRG BIT(1) +#define TPS65218_CONFIG3_LS1DCHRG BIT(0) + +#define TPS65218_CONTROL_DCDC1_PFM BIT(7) +#define TPS65218_CONTROL_DCDC1_MASK 0x7F + +#define TPS65218_CONTROL_DCDC2_PFM BIT(7) +#define TPS65218_CONTROL_DCDC2_MASK 0x3F + +#define TPS65218_CONTROL_DCDC3_PFM BIT(7) +#define TPS65218_CONTROL_DCDC3_MASK 0x3F + +#define TPS65218_CONTROL_DCDC4_PFM BIT(7) +#define TPS65218_CONTROL_DCDC4_MASK 0x3F + +#define TPS65218_SLEW_RATE_GO BIT(7) +#define TPS65218_SLEW_RATE_GODSBL BIT(6) +#define TPS65218_SLEW_RATE_SLEW_MASK 0x7 + +#define TPS65218_CONTROL_LDO1_MASK 0x3F + +#define TPS65218_SEQ1_DLY8 BIT(7) +#define TPS65218_SEQ1_DLY7 BIT(6) +#define TPS65218_SEQ1_DLY6 BIT(5) +#define TPS65218_SEQ1_DLY5 BIT(4) +#define TPS65218_SEQ1_DLY4 BIT(3) +#define TPS65218_SEQ1_DLY3 BIT(2) +#define TPS65218_SEQ1_DLY2 BIT(1) +#define TPS65218_SEQ1_DLY1 BIT(0) + +#define TPS65218_SEQ2_DLYFCTR BIT(7) +#define TPS65218_SEQ2_DLY9 BIT(0) + +#define TPS65218_SEQ3_DC2_SEQ_MASK 0xF0 +#define TPS65218_SEQ3_DC1_SEQ_MASK 0xF + +#define TPS65218_SEQ4_DC4_SEQ_MASK 0xF0 +#define TPS65218_SEQ4_DC3_SEQ_MASK 0xF + +#define TPS65218_SEQ5_DC6_SEQ_MASK 0xF0 +#define TPS65218_SEQ5_DC5_SEQ_MASK 0xF + +#define TPS65218_SEQ6_LS1_SEQ_MASK 0xF0 +#define TPS65218_SEQ6_LDO1_SEQ_MASK 0xF + +#define TPS65218_SEQ7_GPO3_SEQ_MASK 0xF0 +#define TPS65218_SEQ7_GPO1_SEQ_MASK 0xF +#define TPS65218_PROTECT_NONE 0 +#define TPS65218_PROTECT_L1 1 + +enum tps65218_regulator_id { + /* DCDC's */ + TPS65218_DCDC_1, + TPS65218_DCDC_2, + TPS65218_DCDC_3, + TPS65218_DCDC_4, + TPS65218_DCDC_5, + TPS65218_DCDC_6, + /* LDOs */ + TPS65218_LDO_1, +}; + +#define TPS65218_MAX_REG_ID TPS65218_LDO_1 + +/* Number of step-down converters available */ +#define TPS65218_NUM_DCDC 6 +/* Number of LDO voltage regulators available */ +#define TPS65218_NUM_LDO 1 +/* Number of total regulators available */ +#define TPS65218_NUM_REGULATOR (TPS65218_NUM_DCDC + TPS65218_NUM_LDO) + +/* Define the TPS65218 IRQ numbers */ +enum tps65218_irqs { + /* INT1 registers */ + TPS65218_PRGC_IRQ, + TPS65218_CC_AQC_IRQ, + TPS65218_HOT_IRQ, + TPS65218_PB_IRQ, + TPS65218_AC_IRQ, + TPS65218_VPRG_IRQ, + TPS65218_INVALID1_IRQ, + TPS65218_INVALID2_IRQ, + /* INT2 registers */ + TPS65218_LS1_I_IRQ, + TPS65218_LS2_I_IRQ, + TPS65218_LS3_I_IRQ, + TPS65218_LS1_F_IRQ, + TPS65218_LS2_F_IRQ, + TPS65218_LS3_F_IRQ, + TPS65218_INVALID3_IRQ, + TPS65218_INVALID4_IRQ, +}; + +/** + * struct tps_info - packages regulator constraints + * @id: Id of the regulator + * @name: Voltage regulator name + * @min_uV: minimum micro volts + * @max_uV: minimum micro volts + * + * This data is used to check the regualtor voltage limits while setting. + */ +struct tps_info { + int id; + const char *name; + int min_uV; + int max_uV; +}; + +/** + * struct tps65218 - tps65218 sub-driver chip access routines + * + * Device data may be used to access the TPS65218 chip + */ + +struct tps65218 { + struct device *dev; + unsigned int id; + + struct mutex tps_lock; /* lock guarding the data structure */ + /* IRQ Data */ + int irq; + u32 irq_mask; + struct regmap_irq_chip_data *irq_data; + struct regulator_desc desc[TPS65218_NUM_REGULATOR]; + struct regulator_dev *rdev[TPS65218_NUM_REGULATOR]; + struct tps_info *info[TPS65218_NUM_REGULATOR]; + struct regmap *regmap; +}; + +int tps65218_reg_read(struct tps65218 *tps, unsigned int reg, + unsigned int *val); +int tps65218_reg_write(struct tps65218 *tps, unsigned int reg, + unsigned int val, unsigned int level); +int tps65218_set_bits(struct tps65218 *tps, unsigned int reg, + unsigned int mask, unsigned int val, unsigned int level); +int tps65218_clear_bits(struct tps65218 *tps, unsigned int reg, + unsigned int mask, unsigned int level); + +#endif /* __LINUX_MFD_TPS65218_H */ -- cgit v1.2.3-59-g8ed1b From 730876be256603b4ee7225a125467d97a7ce9060 Mon Sep 17 00:00:00 2001 From: Roger Tseng Date: Wed, 12 Feb 2014 18:00:36 +0800 Subject: mfd: Add realtek USB card reader driver Realtek USB card reader provides a channel to transfer command or data to flash memory cards. This driver exports host instances for mmc and memstick subsystems and handles basic works. Acked-by: Greg Kroah-Hartman Signed-off-by: Roger Tseng Signed-off-by: Lee Jones --- drivers/mfd/Kconfig | 10 + drivers/mfd/Makefile | 1 + drivers/mfd/rtsx_usb.c | 760 +++++++++++++++++++++++++++++++++++++++++++ include/linux/mfd/rtsx_usb.h | 628 +++++++++++++++++++++++++++++++++++ 4 files changed, 1399 insertions(+) create mode 100644 drivers/mfd/rtsx_usb.c create mode 100644 include/linux/mfd/rtsx_usb.h (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 91fea6164bce..09fb5706208f 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -516,6 +516,16 @@ config MFD_RTSX_PCI types of memory cards, such as Memory Stick, Memory Stick Pro, Secure Digital and MultiMediaCard. +config MFD_RTSX_USB + tristate "Realtek USB card reader" + depends on USB + select MFD_CORE + help + Select this option to get support for Realtek USB 2.0 card readers + including RTS5129, RTS5139, RTS5179 and RTS5170. + Realtek card reader supports access to many types of memory cards, + such as Memory Stick Pro, Secure Digital and MultiMediaCard. + config MFD_RC5T583 bool "Ricoh RC5T583 Power Management system device" depends on I2C=y diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 5e5cfbd1b8da..377bd519884c 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_MFD_CROS_EC_SPI) += cros_ec_spi.o rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o obj-$(CONFIG_MFD_RTSX_PCI) += rtsx_pci.o +obj-$(CONFIG_MFD_RTSX_USB) += rtsx_usb.o obj-$(CONFIG_HTC_EGPIO) += htc-egpio.o obj-$(CONFIG_HTC_PASIC3) += htc-pasic3.o diff --git a/drivers/mfd/rtsx_usb.c b/drivers/mfd/rtsx_usb.c new file mode 100644 index 000000000000..b53b9d46cc45 --- /dev/null +++ b/drivers/mfd/rtsx_usb.c @@ -0,0 +1,760 @@ +/* Driver for Realtek USB card reader + * + * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Roger Tseng + */ +#include +#include +#include +#include +#include +#include +#include + +static int polling_pipe = 1; +module_param(polling_pipe, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(polling_pipe, "polling pipe (0: ctl, 1: bulk)"); + +static struct mfd_cell rtsx_usb_cells[] = { + [RTSX_USB_SD_CARD] = { + .name = "rtsx_usb_sdmmc", + .pdata_size = 0, + }, + [RTSX_USB_MS_CARD] = { + .name = "rtsx_usb_ms", + .pdata_size = 0, + }, +}; + +static void rtsx_usb_sg_timed_out(unsigned long data) +{ + struct rtsx_ucr *ucr = (struct rtsx_ucr *)data; + + dev_dbg(&ucr->pusb_intf->dev, "%s: sg transfer timed out", __func__); + usb_sg_cancel(&ucr->current_sg); + + /* we know the cancellation is caused by time-out */ + ucr->current_sg.status = -ETIMEDOUT; +} + +static int rtsx_usb_bulk_transfer_sglist(struct rtsx_ucr *ucr, + unsigned int pipe, struct scatterlist *sg, int num_sg, + unsigned int length, unsigned int *act_len, int timeout) +{ + int ret; + + dev_dbg(&ucr->pusb_intf->dev, "%s: xfer %u bytes, %d entries\n", + __func__, length, num_sg); + ret = usb_sg_init(&ucr->current_sg, ucr->pusb_dev, pipe, 0, + sg, num_sg, length, GFP_NOIO); + if (ret) + return ret; + + ucr->sg_timer.expires = jiffies + msecs_to_jiffies(timeout); + add_timer(&ucr->sg_timer); + usb_sg_wait(&ucr->current_sg); + del_timer(&ucr->sg_timer); + + if (act_len) + *act_len = ucr->current_sg.bytes; + + return ucr->current_sg.status; +} + +int rtsx_usb_transfer_data(struct rtsx_ucr *ucr, unsigned int pipe, + void *buf, unsigned int len, int num_sg, + unsigned int *act_len, int timeout) +{ + if (timeout < 600) + timeout = 600; + + if (num_sg) + return rtsx_usb_bulk_transfer_sglist(ucr, pipe, + (struct scatterlist *)buf, num_sg, len, act_len, + timeout); + else + return usb_bulk_msg(ucr->pusb_dev, pipe, buf, len, act_len, + timeout); +} +EXPORT_SYMBOL_GPL(rtsx_usb_transfer_data); + +static inline void rtsx_usb_seq_cmd_hdr(struct rtsx_ucr *ucr, + u16 addr, u16 len, u8 seq_type) +{ + rtsx_usb_cmd_hdr_tag(ucr); + + ucr->cmd_buf[PACKET_TYPE] = seq_type; + ucr->cmd_buf[5] = (u8)(len >> 8); + ucr->cmd_buf[6] = (u8)len; + ucr->cmd_buf[8] = (u8)(addr >> 8); + ucr->cmd_buf[9] = (u8)addr; + + if (seq_type == SEQ_WRITE) + ucr->cmd_buf[STAGE_FLAG] = 0; + else + ucr->cmd_buf[STAGE_FLAG] = STAGE_R; +} + +static int rtsx_usb_seq_write_register(struct rtsx_ucr *ucr, + u16 addr, u16 len, u8 *data) +{ + u16 cmd_len = ALIGN(SEQ_WRITE_DATA_OFFSET + len, 4); + + if (!data) + return -EINVAL; + + if (cmd_len > IOBUF_SIZE) + return -EINVAL; + + rtsx_usb_seq_cmd_hdr(ucr, addr, len, SEQ_WRITE); + memcpy(ucr->cmd_buf + SEQ_WRITE_DATA_OFFSET, data, len); + + return rtsx_usb_transfer_data(ucr, + usb_sndbulkpipe(ucr->pusb_dev, EP_BULK_OUT), + ucr->cmd_buf, cmd_len, 0, NULL, 100); +} + +static int rtsx_usb_seq_read_register(struct rtsx_ucr *ucr, + u16 addr, u16 len, u8 *data) +{ + int i, ret; + u16 rsp_len = round_down(len, 4); + u16 res_len = len - rsp_len; + + if (!data) + return -EINVAL; + + /* 4-byte aligned part */ + if (rsp_len) { + rtsx_usb_seq_cmd_hdr(ucr, addr, len, SEQ_READ); + ret = rtsx_usb_transfer_data(ucr, + usb_sndbulkpipe(ucr->pusb_dev, EP_BULK_OUT), + ucr->cmd_buf, 12, 0, NULL, 100); + if (ret) + return ret; + + ret = rtsx_usb_transfer_data(ucr, + usb_rcvbulkpipe(ucr->pusb_dev, EP_BULK_IN), + data, rsp_len, 0, NULL, 100); + if (ret) + return ret; + } + + /* unaligned part */ + for (i = 0; i < res_len; i++) { + ret = rtsx_usb_read_register(ucr, addr + rsp_len + i, + data + rsp_len + i); + if (ret) + return ret; + } + + return 0; +} + +int rtsx_usb_read_ppbuf(struct rtsx_ucr *ucr, u8 *buf, int buf_len) +{ + return rtsx_usb_seq_read_register(ucr, PPBUF_BASE2, (u16)buf_len, buf); +} +EXPORT_SYMBOL_GPL(rtsx_usb_read_ppbuf); + +int rtsx_usb_write_ppbuf(struct rtsx_ucr *ucr, u8 *buf, int buf_len) +{ + return rtsx_usb_seq_write_register(ucr, PPBUF_BASE2, (u16)buf_len, buf); +} +EXPORT_SYMBOL_GPL(rtsx_usb_write_ppbuf); + +int rtsx_usb_ep0_write_register(struct rtsx_ucr *ucr, u16 addr, + u8 mask, u8 data) +{ + u16 value, index; + + addr |= EP0_WRITE_REG_CMD << EP0_OP_SHIFT; + value = swab16(addr); + index = mask | data << 8; + + return usb_control_msg(ucr->pusb_dev, + usb_sndctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_REG_OP, + USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + value, index, NULL, 0, 100); +} +EXPORT_SYMBOL_GPL(rtsx_usb_ep0_write_register); + +int rtsx_usb_ep0_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data) +{ + u16 value; + + if (!data) + return -EINVAL; + *data = 0; + + addr |= EP0_READ_REG_CMD << EP0_OP_SHIFT; + value = swab16(addr); + + return usb_control_msg(ucr->pusb_dev, + usb_rcvctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_REG_OP, + USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + value, 0, data, 1, 100); +} +EXPORT_SYMBOL_GPL(rtsx_usb_ep0_read_register); + +void rtsx_usb_add_cmd(struct rtsx_ucr *ucr, u8 cmd_type, u16 reg_addr, + u8 mask, u8 data) +{ + int i; + + if (ucr->cmd_idx < (IOBUF_SIZE - CMD_OFFSET) / 4) { + i = CMD_OFFSET + ucr->cmd_idx * 4; + + ucr->cmd_buf[i++] = ((cmd_type & 0x03) << 6) | + (u8)((reg_addr >> 8) & 0x3F); + ucr->cmd_buf[i++] = (u8)reg_addr; + ucr->cmd_buf[i++] = mask; + ucr->cmd_buf[i++] = data; + + ucr->cmd_idx++; + } +} +EXPORT_SYMBOL_GPL(rtsx_usb_add_cmd); + +int rtsx_usb_send_cmd(struct rtsx_ucr *ucr, u8 flag, int timeout) +{ + int ret; + + ucr->cmd_buf[CNT_H] = (u8)(ucr->cmd_idx >> 8); + ucr->cmd_buf[CNT_L] = (u8)(ucr->cmd_idx); + ucr->cmd_buf[STAGE_FLAG] = flag; + + ret = rtsx_usb_transfer_data(ucr, + usb_sndbulkpipe(ucr->pusb_dev, EP_BULK_OUT), + ucr->cmd_buf, ucr->cmd_idx * 4 + CMD_OFFSET, + 0, NULL, timeout); + if (ret) { + rtsx_usb_clear_fsm_err(ucr); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_usb_send_cmd); + +int rtsx_usb_get_rsp(struct rtsx_ucr *ucr, int rsp_len, int timeout) +{ + if (rsp_len <= 0) + return -EINVAL; + + rsp_len = ALIGN(rsp_len, 4); + + return rtsx_usb_transfer_data(ucr, + usb_rcvbulkpipe(ucr->pusb_dev, EP_BULK_IN), + ucr->rsp_buf, rsp_len, 0, NULL, timeout); +} +EXPORT_SYMBOL_GPL(rtsx_usb_get_rsp); + +static int rtsx_usb_get_status_with_bulk(struct rtsx_ucr *ucr, u16 *status) +{ + int ret; + + rtsx_usb_init_cmd(ucr); + rtsx_usb_add_cmd(ucr, READ_REG_CMD, CARD_EXIST, 0x00, 0x00); + rtsx_usb_add_cmd(ucr, READ_REG_CMD, OCPSTAT, 0x00, 0x00); + ret = rtsx_usb_send_cmd(ucr, MODE_CR, 100); + if (ret) + return ret; + + ret = rtsx_usb_get_rsp(ucr, 2, 100); + if (ret) + return ret; + + *status = ((ucr->rsp_buf[0] >> 2) & 0x0f) | + ((ucr->rsp_buf[1] & 0x03) << 4); + + return 0; +} + +int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status) +{ + int ret; + + if (!status) + return -EINVAL; + + if (polling_pipe == 0) + ret = usb_control_msg(ucr->pusb_dev, + usb_rcvctrlpipe(ucr->pusb_dev, 0), + RTSX_USB_REQ_POLL, + USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + 0, 0, status, 2, 100); + else + ret = rtsx_usb_get_status_with_bulk(ucr, status); + + /* usb_control_msg may return positive when success */ + if (ret < 0) + return ret; + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_usb_get_card_status); + +static int rtsx_usb_write_phy_register(struct rtsx_ucr *ucr, u8 addr, u8 val) +{ + dev_dbg(&ucr->pusb_intf->dev, "Write 0x%x to phy register 0x%x\n", + val, addr); + + rtsx_usb_init_cmd(ucr); + + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VSTAIN, 0xFF, val); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VCONTROL, 0xFF, addr & 0x0F); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x01); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VCONTROL, + 0xFF, (addr >> 4) & 0x0F); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x01); + + return rtsx_usb_send_cmd(ucr, MODE_C, 100); +} + +int rtsx_usb_write_register(struct rtsx_ucr *ucr, u16 addr, u8 mask, u8 data) +{ + rtsx_usb_init_cmd(ucr); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, addr, mask, data); + return rtsx_usb_send_cmd(ucr, MODE_C, 100); +} +EXPORT_SYMBOL_GPL(rtsx_usb_write_register); + +int rtsx_usb_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data) +{ + int ret; + + if (data != NULL) + *data = 0; + + rtsx_usb_init_cmd(ucr); + rtsx_usb_add_cmd(ucr, READ_REG_CMD, addr, 0, 0); + ret = rtsx_usb_send_cmd(ucr, MODE_CR, 100); + if (ret) + return ret; + + ret = rtsx_usb_get_rsp(ucr, 1, 100); + if (ret) + return ret; + + if (data != NULL) + *data = ucr->rsp_buf[0]; + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_usb_read_register); + +static inline u8 double_ssc_depth(u8 depth) +{ + return (depth > 1) ? (depth - 1) : depth; +} + +static u8 revise_ssc_depth(u8 ssc_depth, u8 div) +{ + if (div > CLK_DIV_1) { + if (ssc_depth > div - 1) + ssc_depth -= (div - 1); + else + ssc_depth = SSC_DEPTH_2M; + } + + return ssc_depth; +} + +int rtsx_usb_switch_clock(struct rtsx_ucr *ucr, unsigned int card_clock, + u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk) +{ + int ret; + u8 n, clk_divider, mcu_cnt, div; + + if (!card_clock) { + ucr->cur_clk = 0; + return 0; + } + + if (initial_mode) { + /* We use 250k(around) here, in initial stage */ + clk_divider = SD_CLK_DIVIDE_128; + card_clock = 30000000; + } else { + clk_divider = SD_CLK_DIVIDE_0; + } + + ret = rtsx_usb_write_register(ucr, SD_CFG1, + SD_CLK_DIVIDE_MASK, clk_divider); + if (ret < 0) + return ret; + + card_clock /= 1000000; + dev_dbg(&ucr->pusb_intf->dev, + "Switch card clock to %dMHz\n", card_clock); + + if (!initial_mode && double_clk) + card_clock *= 2; + dev_dbg(&ucr->pusb_intf->dev, + "Internal SSC clock: %dMHz (cur_clk = %d)\n", + card_clock, ucr->cur_clk); + + if (card_clock == ucr->cur_clk) + return 0; + + /* Converting clock value into internal settings: n and div */ + n = card_clock - 2; + if ((card_clock <= 2) || (n > MAX_DIV_N)) + return -EINVAL; + + mcu_cnt = 60/card_clock + 3; + if (mcu_cnt > 15) + mcu_cnt = 15; + + /* Make sure that the SSC clock div_n is not less than MIN_DIV_N */ + + div = CLK_DIV_1; + while (n < MIN_DIV_N && div < CLK_DIV_4) { + n = (n + 2) * 2 - 2; + div++; + } + dev_dbg(&ucr->pusb_intf->dev, "n = %d, div = %d\n", n, div); + + if (double_clk) + ssc_depth = double_ssc_depth(ssc_depth); + + ssc_depth = revise_ssc_depth(ssc_depth, div); + dev_dbg(&ucr->pusb_intf->dev, "ssc_depth = %d\n", ssc_depth); + + rtsx_usb_init_cmd(ucr); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CLK_DIV, CLK_CHANGE, CLK_CHANGE); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CLK_DIV, + 0x3F, (div << 4) | mcu_cnt); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, 0); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_CTL2, + SSC_DEPTH_MASK, ssc_depth); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_DIV_N_0, 0xFF, n); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, SSC_RSTB); + if (vpclk) { + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SD_VPCLK0_CTL, + PHASE_NOT_RESET, 0); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SD_VPCLK0_CTL, + PHASE_NOT_RESET, PHASE_NOT_RESET); + } + + ret = rtsx_usb_send_cmd(ucr, MODE_C, 2000); + if (ret < 0) + return ret; + + ret = rtsx_usb_write_register(ucr, SSC_CTL1, 0xff, + SSC_RSTB | SSC_8X_EN | SSC_SEL_4M); + if (ret < 0) + return ret; + + /* Wait SSC clock stable */ + usleep_range(100, 1000); + + ret = rtsx_usb_write_register(ucr, CLK_DIV, CLK_CHANGE, 0); + if (ret < 0) + return ret; + + ucr->cur_clk = card_clock; + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_usb_switch_clock); + +int rtsx_usb_card_exclusive_check(struct rtsx_ucr *ucr, int card) +{ + int ret; + u16 val; + u16 cd_mask[] = { + [RTSX_USB_SD_CARD] = (CD_MASK & ~SD_CD), + [RTSX_USB_MS_CARD] = (CD_MASK & ~MS_CD) + }; + + ret = rtsx_usb_get_card_status(ucr, &val); + /* + * If get status fails, return 0 (ok) for the exclusive check + * and let the flow fail at somewhere else. + */ + if (ret) + return 0; + + if (val & cd_mask[card]) + return -EIO; + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_usb_card_exclusive_check); + +static int rtsx_usb_reset_chip(struct rtsx_ucr *ucr) +{ + int ret; + u8 val; + + rtsx_usb_init_cmd(ucr); + + if (CHECK_PKG(ucr, LQFP48)) { + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PWR_CTL, + LDO3318_PWR_MASK, LDO_SUSPEND); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PWR_CTL, + FORCE_LDO_POWERB, FORCE_LDO_POWERB); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PULL_CTL1, + 0x30, 0x10); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PULL_CTL5, + 0x03, 0x01); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PULL_CTL6, + 0x0C, 0x04); + } + + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SYS_DUMMY0, NYET_MSAK, NYET_EN); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CD_DEGLITCH_WIDTH, 0xFF, 0x08); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, + CD_DEGLITCH_EN, XD_CD_DEGLITCH_EN, 0x0); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SD30_DRIVE_SEL, + SD30_DRIVE_MASK, DRIVER_TYPE_D); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, + CARD_DRIVE_SEL, SD20_DRIVE_MASK, 0x0); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, LDO_POWER_CFG, 0xE0, 0x0); + + if (ucr->is_rts5179) + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, + CARD_PULL_CTL5, 0x03, 0x01); + + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_DMA1_CTL, + EXTEND_DMA1_ASYNC_SIGNAL, EXTEND_DMA1_ASYNC_SIGNAL); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_INT_PEND, + XD_INT | MS_INT | SD_INT, + XD_INT | MS_INT | SD_INT); + + ret = rtsx_usb_send_cmd(ucr, MODE_C, 100); + if (ret) + return ret; + + /* config non-crystal mode */ + rtsx_usb_read_register(ucr, CFG_MODE, &val); + if ((val & XTAL_FREE) || ((val & CLK_MODE_MASK) == CLK_MODE_NON_XTAL)) { + ret = rtsx_usb_write_phy_register(ucr, 0xC2, 0x7C); + if (ret) + return ret; + } + + return 0; +} + +static int rtsx_usb_init_chip(struct rtsx_ucr *ucr) +{ + int ret; + u8 val; + + rtsx_usb_clear_fsm_err(ucr); + + /* power on SSC */ + ret = rtsx_usb_write_register(ucr, + FPDCTL, SSC_POWER_MASK, SSC_POWER_ON); + if (ret) + return ret; + + usleep_range(100, 1000); + ret = rtsx_usb_write_register(ucr, CLK_DIV, CLK_CHANGE, 0x00); + if (ret) + return ret; + + /* determine IC version */ + ret = rtsx_usb_read_register(ucr, HW_VERSION, &val); + if (ret) + return ret; + + ucr->ic_version = val & HW_VER_MASK; + + /* determine package */ + ret = rtsx_usb_read_register(ucr, CARD_SHARE_MODE, &val); + if (ret) + return ret; + + if (val & CARD_SHARE_LQFP_SEL) { + ucr->package = LQFP48; + dev_dbg(&ucr->pusb_intf->dev, "Package: LQFP48\n"); + } else { + ucr->package = QFN24; + dev_dbg(&ucr->pusb_intf->dev, "Package: QFN24\n"); + } + + /* determine IC variations */ + rtsx_usb_read_register(ucr, CFG_MODE_1, &val); + if (val & RTS5179) { + ucr->is_rts5179 = true; + dev_dbg(&ucr->pusb_intf->dev, "Device is rts5179\n"); + } else { + ucr->is_rts5179 = false; + } + + return rtsx_usb_reset_chip(ucr); +} + +static int rtsx_usb_probe(struct usb_interface *intf, + const struct usb_device_id *id) +{ + struct usb_device *usb_dev = interface_to_usbdev(intf); + struct rtsx_ucr *ucr; + int ret; + + dev_dbg(&intf->dev, + ": Realtek USB Card Reader found at bus %03d address %03d\n", + usb_dev->bus->busnum, usb_dev->devnum); + + ucr = devm_kzalloc(&intf->dev, sizeof(*ucr), GFP_KERNEL); + if (!ucr) + return -ENOMEM; + + ucr->pusb_dev = usb_dev; + + ucr->iobuf = usb_alloc_coherent(ucr->pusb_dev, IOBUF_SIZE, + GFP_KERNEL, &ucr->iobuf_dma); + if (!ucr->iobuf) + return -ENOMEM; + + usb_set_intfdata(intf, ucr); + + ucr->vendor_id = id->idVendor; + ucr->product_id = id->idProduct; + ucr->cmd_buf = ucr->rsp_buf = ucr->iobuf; + + mutex_init(&ucr->dev_mutex); + + ucr->pusb_intf = intf; + + /* initialize */ + ret = rtsx_usb_init_chip(ucr); + if (ret) + goto out_init_fail; + + ret = mfd_add_devices(&intf->dev, usb_dev->devnum, rtsx_usb_cells, + ARRAY_SIZE(rtsx_usb_cells), NULL, 0, NULL); + if (ret) + goto out_init_fail; + + /* initialize USB SG transfer timer */ + init_timer(&ucr->sg_timer); + setup_timer(&ucr->sg_timer, rtsx_usb_sg_timed_out, (unsigned long) ucr); +#ifdef CONFIG_PM + intf->needs_remote_wakeup = 1; + usb_enable_autosuspend(usb_dev); +#endif + + return 0; + +out_init_fail: + usb_free_coherent(ucr->pusb_dev, IOBUF_SIZE, ucr->iobuf, + ucr->iobuf_dma); + return ret; +} + +static void rtsx_usb_disconnect(struct usb_interface *intf) +{ + struct rtsx_ucr *ucr = (struct rtsx_ucr *)usb_get_intfdata(intf); + + dev_dbg(&intf->dev, "%s called\n", __func__); + + mfd_remove_devices(&intf->dev); + + usb_set_intfdata(ucr->pusb_intf, NULL); + usb_free_coherent(ucr->pusb_dev, IOBUF_SIZE, ucr->iobuf, + ucr->iobuf_dma); +} + +#ifdef CONFIG_PM +static int rtsx_usb_suspend(struct usb_interface *intf, pm_message_t message) +{ + struct rtsx_ucr *ucr = + (struct rtsx_ucr *)usb_get_intfdata(intf); + + dev_dbg(&intf->dev, "%s called with pm message 0x%04u\n", + __func__, message.event); + + mutex_lock(&ucr->dev_mutex); + rtsx_usb_turn_off_led(ucr); + mutex_unlock(&ucr->dev_mutex); + return 0; +} + +static int rtsx_usb_resume(struct usb_interface *intf) +{ + return 0; +} + +static int rtsx_usb_reset_resume(struct usb_interface *intf) +{ + struct rtsx_ucr *ucr = + (struct rtsx_ucr *)usb_get_intfdata(intf); + + rtsx_usb_reset_chip(ucr); + return 0; +} + +#else /* CONFIG_PM */ + +#define rtsx_usb_suspend NULL +#define rtsx_usb_resume NULL +#define rtsx_usb_reset_resume NULL + +#endif /* CONFIG_PM */ + + +static int rtsx_usb_pre_reset(struct usb_interface *intf) +{ + struct rtsx_ucr *ucr = (struct rtsx_ucr *)usb_get_intfdata(intf); + + mutex_lock(&ucr->dev_mutex); + return 0; +} + +static int rtsx_usb_post_reset(struct usb_interface *intf) +{ + struct rtsx_ucr *ucr = (struct rtsx_ucr *)usb_get_intfdata(intf); + + mutex_unlock(&ucr->dev_mutex); + return 0; +} + +static struct usb_device_id rtsx_usb_usb_ids[] = { + { USB_DEVICE(0x0BDA, 0x0129) }, + { USB_DEVICE(0x0BDA, 0x0139) }, + { USB_DEVICE(0x0BDA, 0x0140) }, + { } +}; + +static struct usb_driver rtsx_usb_driver = { + .name = "rtsx_usb", + .probe = rtsx_usb_probe, + .disconnect = rtsx_usb_disconnect, + .suspend = rtsx_usb_suspend, + .resume = rtsx_usb_resume, + .reset_resume = rtsx_usb_reset_resume, + .pre_reset = rtsx_usb_pre_reset, + .post_reset = rtsx_usb_post_reset, + .id_table = rtsx_usb_usb_ids, + .supports_autosuspend = 1, + .soft_unbind = 1, +}; + +module_usb_driver(rtsx_usb_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Roger Tseng "); +MODULE_DESCRIPTION("Realtek USB Card Reader Driver"); diff --git a/include/linux/mfd/rtsx_usb.h b/include/linux/mfd/rtsx_usb.h new file mode 100644 index 000000000000..c446e4fd6b5c --- /dev/null +++ b/include/linux/mfd/rtsx_usb.h @@ -0,0 +1,628 @@ +/* Driver for Realtek RTS5139 USB card reader + * + * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Roger Tseng + */ + +#ifndef __RTSX_USB_H +#define __RTSX_USB_H + +#include + +/* related module names */ +#define RTSX_USB_SD_CARD 0 +#define RTSX_USB_MS_CARD 1 + +/* endpoint numbers */ +#define EP_BULK_OUT 1 +#define EP_BULK_IN 2 +#define EP_INTR_IN 3 + +/* USB vendor requests */ +#define RTSX_USB_REQ_REG_OP 0x00 +#define RTSX_USB_REQ_POLL 0x02 + +/* miscellaneous parameters */ +#define MIN_DIV_N 60 +#define MAX_DIV_N 120 + +#define MAX_PHASE 15 +#define RX_TUNING_CNT 3 + +#define QFN24 0 +#define LQFP48 1 +#define CHECK_PKG(ucr, pkg) ((ucr)->package == (pkg)) + +/* data structures */ +struct rtsx_ucr { + u16 vendor_id; + u16 product_id; + + int package; + u8 ic_version; + bool is_rts5179; + + unsigned int cur_clk; + + u8 *cmd_buf; + unsigned int cmd_idx; + u8 *rsp_buf; + + struct usb_device *pusb_dev; + struct usb_interface *pusb_intf; + struct usb_sg_request current_sg; + unsigned char *iobuf; + dma_addr_t iobuf_dma; + + struct timer_list sg_timer; + struct mutex dev_mutex; +}; + +/* buffer size */ +#define IOBUF_SIZE 1024 + +/* prototypes of exported functions */ +extern int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status); + +extern int rtsx_usb_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data); +extern int rtsx_usb_write_register(struct rtsx_ucr *ucr, u16 addr, u8 mask, + u8 data); + +extern int rtsx_usb_ep0_write_register(struct rtsx_ucr *ucr, u16 addr, u8 mask, + u8 data); +extern int rtsx_usb_ep0_read_register(struct rtsx_ucr *ucr, u16 addr, + u8 *data); + +extern void rtsx_usb_add_cmd(struct rtsx_ucr *ucr, u8 cmd_type, + u16 reg_addr, u8 mask, u8 data); +extern int rtsx_usb_send_cmd(struct rtsx_ucr *ucr, u8 flag, int timeout); +extern int rtsx_usb_get_rsp(struct rtsx_ucr *ucr, int rsp_len, int timeout); +extern int rtsx_usb_transfer_data(struct rtsx_ucr *ucr, unsigned int pipe, + void *buf, unsigned int len, int use_sg, + unsigned int *act_len, int timeout); + +extern int rtsx_usb_read_ppbuf(struct rtsx_ucr *ucr, u8 *buf, int buf_len); +extern int rtsx_usb_write_ppbuf(struct rtsx_ucr *ucr, u8 *buf, int buf_len); +extern int rtsx_usb_switch_clock(struct rtsx_ucr *ucr, unsigned int card_clock, + u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk); +extern int rtsx_usb_card_exclusive_check(struct rtsx_ucr *ucr, int card); + +/* card status */ +#define SD_CD 0x01 +#define MS_CD 0x02 +#define XD_CD 0x04 +#define CD_MASK (SD_CD | MS_CD | XD_CD) +#define SD_WP 0x08 + +/* reader command field offset & parameters */ +#define READ_REG_CMD 0 +#define WRITE_REG_CMD 1 +#define CHECK_REG_CMD 2 + +#define PACKET_TYPE 4 +#define CNT_H 5 +#define CNT_L 6 +#define STAGE_FLAG 7 +#define CMD_OFFSET 8 +#define SEQ_WRITE_DATA_OFFSET 12 + +#define BATCH_CMD 0 +#define SEQ_READ 1 +#define SEQ_WRITE 2 + +#define STAGE_R 0x01 +#define STAGE_DI 0x02 +#define STAGE_DO 0x04 +#define STAGE_MS_STATUS 0x08 +#define STAGE_XD_STATUS 0x10 +#define MODE_C 0x00 +#define MODE_CR (STAGE_R) +#define MODE_CDIR (STAGE_R | STAGE_DI) +#define MODE_CDOR (STAGE_R | STAGE_DO) + +#define EP0_OP_SHIFT 14 +#define EP0_READ_REG_CMD 2 +#define EP0_WRITE_REG_CMD 3 + +#define rtsx_usb_cmd_hdr_tag(ucr) \ + do { \ + ucr->cmd_buf[0] = 'R'; \ + ucr->cmd_buf[1] = 'T'; \ + ucr->cmd_buf[2] = 'C'; \ + ucr->cmd_buf[3] = 'R'; \ + } while (0) + +static inline void rtsx_usb_init_cmd(struct rtsx_ucr *ucr) +{ + rtsx_usb_cmd_hdr_tag(ucr); + ucr->cmd_idx = 0; + ucr->cmd_buf[PACKET_TYPE] = BATCH_CMD; +} + +/* internal register address */ +#define FPDCTL 0xFC00 +#define SSC_DIV_N_0 0xFC07 +#define SSC_CTL1 0xFC09 +#define SSC_CTL2 0xFC0A +#define CFG_MODE 0xFC0E +#define CFG_MODE_1 0xFC0F +#define RCCTL 0xFC14 +#define SOF_WDOG 0xFC28 +#define SYS_DUMMY0 0xFC30 + +#define MS_BLKEND 0xFD30 +#define MS_READ_START 0xFD31 +#define MS_READ_COUNT 0xFD32 +#define MS_WRITE_START 0xFD33 +#define MS_WRITE_COUNT 0xFD34 +#define MS_COMMAND 0xFD35 +#define MS_OLD_BLOCK_0 0xFD36 +#define MS_OLD_BLOCK_1 0xFD37 +#define MS_NEW_BLOCK_0 0xFD38 +#define MS_NEW_BLOCK_1 0xFD39 +#define MS_LOG_BLOCK_0 0xFD3A +#define MS_LOG_BLOCK_1 0xFD3B +#define MS_BUS_WIDTH 0xFD3C +#define MS_PAGE_START 0xFD3D +#define MS_PAGE_LENGTH 0xFD3E +#define MS_CFG 0xFD40 +#define MS_TPC 0xFD41 +#define MS_TRANS_CFG 0xFD42 +#define MS_TRANSFER 0xFD43 +#define MS_INT_REG 0xFD44 +#define MS_BYTE_CNT 0xFD45 +#define MS_SECTOR_CNT_L 0xFD46 +#define MS_SECTOR_CNT_H 0xFD47 +#define MS_DBUS_H 0xFD48 + +#define CARD_DMA1_CTL 0xFD5C +#define CARD_PULL_CTL1 0xFD60 +#define CARD_PULL_CTL2 0xFD61 +#define CARD_PULL_CTL3 0xFD62 +#define CARD_PULL_CTL4 0xFD63 +#define CARD_PULL_CTL5 0xFD64 +#define CARD_PULL_CTL6 0xFD65 +#define CARD_EXIST 0xFD6F +#define CARD_INT_PEND 0xFD71 + +#define LDO_POWER_CFG 0xFD7B + +#define SD_CFG1 0xFDA0 +#define SD_CFG2 0xFDA1 +#define SD_CFG3 0xFDA2 +#define SD_STAT1 0xFDA3 +#define SD_STAT2 0xFDA4 +#define SD_BUS_STAT 0xFDA5 +#define SD_PAD_CTL 0xFDA6 +#define SD_SAMPLE_POINT_CTL 0xFDA7 +#define SD_PUSH_POINT_CTL 0xFDA8 +#define SD_CMD0 0xFDA9 +#define SD_CMD1 0xFDAA +#define SD_CMD2 0xFDAB +#define SD_CMD3 0xFDAC +#define SD_CMD4 0xFDAD +#define SD_CMD5 0xFDAE +#define SD_BYTE_CNT_L 0xFDAF +#define SD_BYTE_CNT_H 0xFDB0 +#define SD_BLOCK_CNT_L 0xFDB1 +#define SD_BLOCK_CNT_H 0xFDB2 +#define SD_TRANSFER 0xFDB3 +#define SD_CMD_STATE 0xFDB5 +#define SD_DATA_STATE 0xFDB6 +#define SD_VPCLK0_CTL 0xFC2A +#define SD_VPCLK1_CTL 0xFC2B +#define SD_DCMPS0_CTL 0xFC2C +#define SD_DCMPS1_CTL 0xFC2D + +#define CARD_DMA1_CTL 0xFD5C + +#define HW_VERSION 0xFC01 + +#define SSC_CLK_FPGA_SEL 0xFC02 +#define CLK_DIV 0xFC03 +#define SFSM_ED 0xFC04 + +#define CD_DEGLITCH_WIDTH 0xFC20 +#define CD_DEGLITCH_EN 0xFC21 +#define AUTO_DELINK_EN 0xFC23 + +#define FPGA_PULL_CTL 0xFC1D +#define CARD_CLK_SOURCE 0xFC2E + +#define CARD_SHARE_MODE 0xFD51 +#define CARD_DRIVE_SEL 0xFD52 +#define CARD_STOP 0xFD53 +#define CARD_OE 0xFD54 +#define CARD_AUTO_BLINK 0xFD55 +#define CARD_GPIO 0xFD56 +#define SD30_DRIVE_SEL 0xFD57 + +#define CARD_DATA_SOURCE 0xFD5D +#define CARD_SELECT 0xFD5E + +#define CARD_CLK_EN 0xFD79 +#define CARD_PWR_CTL 0xFD7A + +#define OCPCTL 0xFD80 +#define OCPPARA1 0xFD81 +#define OCPPARA2 0xFD82 +#define OCPSTAT 0xFD83 + +#define HS_USB_STAT 0xFE01 +#define HS_VCONTROL 0xFE26 +#define HS_VSTAIN 0xFE27 +#define HS_VLOADM 0xFE28 +#define HS_VSTAOUT 0xFE29 + +#define MC_IRQ 0xFF00 +#define MC_IRQEN 0xFF01 +#define MC_FIFO_CTL 0xFF02 +#define MC_FIFO_BC0 0xFF03 +#define MC_FIFO_BC1 0xFF04 +#define MC_FIFO_STAT 0xFF05 +#define MC_FIFO_MODE 0xFF06 +#define MC_FIFO_RD_PTR0 0xFF07 +#define MC_FIFO_RD_PTR1 0xFF08 +#define MC_DMA_CTL 0xFF10 +#define MC_DMA_TC0 0xFF11 +#define MC_DMA_TC1 0xFF12 +#define MC_DMA_TC2 0xFF13 +#define MC_DMA_TC3 0xFF14 +#define MC_DMA_RST 0xFF15 + +#define RBUF_SIZE_MASK 0xFBFF +#define RBUF_BASE 0xF000 +#define PPBUF_BASE1 0xF800 +#define PPBUF_BASE2 0xFA00 + +/* internal register value macros */ +#define POWER_OFF 0x03 +#define PARTIAL_POWER_ON 0x02 +#define POWER_ON 0x00 +#define POWER_MASK 0x03 +#define LDO3318_PWR_MASK 0x0C +#define LDO_ON 0x00 +#define LDO_SUSPEND 0x08 +#define LDO_OFF 0x0C +#define DV3318_AUTO_PWR_OFF 0x10 +#define FORCE_LDO_POWERB 0x60 + +/* LDO_POWER_CFG */ +#define TUNE_SD18_MASK 0x1C +#define TUNE_SD18_1V7 0x00 +#define TUNE_SD18_1V8 (0x01 << 2) +#define TUNE_SD18_1V9 (0x02 << 2) +#define TUNE_SD18_2V0 (0x03 << 2) +#define TUNE_SD18_2V7 (0x04 << 2) +#define TUNE_SD18_2V8 (0x05 << 2) +#define TUNE_SD18_2V9 (0x06 << 2) +#define TUNE_SD18_3V3 (0x07 << 2) + +/* CLK_DIV */ +#define CLK_CHANGE 0x80 +#define CLK_DIV_1 0x00 +#define CLK_DIV_2 0x01 +#define CLK_DIV_4 0x02 +#define CLK_DIV_8 0x03 + +#define SSC_POWER_MASK 0x01 +#define SSC_POWER_DOWN 0x01 +#define SSC_POWER_ON 0x00 + +#define FPGA_VER 0x80 +#define HW_VER_MASK 0x0F + +#define EXTEND_DMA1_ASYNC_SIGNAL 0x02 + +/* CFG_MODE*/ +#define XTAL_FREE 0x80 +#define CLK_MODE_MASK 0x03 +#define CLK_MODE_12M_XTAL 0x00 +#define CLK_MODE_NON_XTAL 0x01 +#define CLK_MODE_24M_OSC 0x02 +#define CLK_MODE_48M_OSC 0x03 + +/* CFG_MODE_1*/ +#define RTS5179 0x02 + +#define NYET_EN 0x01 +#define NYET_MSAK 0x01 + +#define SD30_DRIVE_MASK 0x07 +#define SD20_DRIVE_MASK 0x03 + +#define DISABLE_SD_CD 0x08 +#define DISABLE_MS_CD 0x10 +#define DISABLE_XD_CD 0x20 +#define SD_CD_DEGLITCH_EN 0x01 +#define MS_CD_DEGLITCH_EN 0x02 +#define XD_CD_DEGLITCH_EN 0x04 + +#define CARD_SHARE_LQFP48 0x04 +#define CARD_SHARE_QFN24 0x00 +#define CARD_SHARE_LQFP_SEL 0x04 +#define CARD_SHARE_XD 0x00 +#define CARD_SHARE_SD 0x01 +#define CARD_SHARE_MS 0x02 +#define CARD_SHARE_MASK 0x03 + + +/* SD30_DRIVE_SEL */ +#define DRIVER_TYPE_A 0x05 +#define DRIVER_TYPE_B 0x03 +#define DRIVER_TYPE_C 0x02 +#define DRIVER_TYPE_D 0x01 + +/* SD_BUS_STAT */ +#define SD_CLK_TOGGLE_EN 0x80 +#define SD_CLK_FORCE_STOP 0x40 +#define SD_DAT3_STATUS 0x10 +#define SD_DAT2_STATUS 0x08 +#define SD_DAT1_STATUS 0x04 +#define SD_DAT0_STATUS 0x02 +#define SD_CMD_STATUS 0x01 + +/* SD_PAD_CTL */ +#define SD_IO_USING_1V8 0x80 +#define SD_IO_USING_3V3 0x7F +#define TYPE_A_DRIVING 0x00 +#define TYPE_B_DRIVING 0x01 +#define TYPE_C_DRIVING 0x02 +#define TYPE_D_DRIVING 0x03 + +/* CARD_CLK_EN */ +#define SD_CLK_EN 0x04 +#define MS_CLK_EN 0x08 + +/* CARD_SELECT */ +#define SD_MOD_SEL 2 +#define MS_MOD_SEL 3 + +/* CARD_SHARE_MODE */ +#define CARD_SHARE_LQFP48 0x04 +#define CARD_SHARE_QFN24 0x00 +#define CARD_SHARE_LQFP_SEL 0x04 +#define CARD_SHARE_XD 0x00 +#define CARD_SHARE_SD 0x01 +#define CARD_SHARE_MS 0x02 +#define CARD_SHARE_MASK 0x03 + +/* SSC_CTL1 */ +#define SSC_RSTB 0x80 +#define SSC_8X_EN 0x40 +#define SSC_FIX_FRAC 0x20 +#define SSC_SEL_1M 0x00 +#define SSC_SEL_2M 0x08 +#define SSC_SEL_4M 0x10 +#define SSC_SEL_8M 0x18 + +/* SSC_CTL2 */ +#define SSC_DEPTH_MASK 0x03 +#define SSC_DEPTH_DISALBE 0x00 +#define SSC_DEPTH_2M 0x01 +#define SSC_DEPTH_1M 0x02 +#define SSC_DEPTH_512K 0x03 + +/* SD_VPCLK0_CTL */ +#define PHASE_CHANGE 0x80 +#define PHASE_NOT_RESET 0x40 + +/* SD_TRANSFER */ +#define SD_TRANSFER_START 0x80 +#define SD_TRANSFER_END 0x40 +#define SD_STAT_IDLE 0x20 +#define SD_TRANSFER_ERR 0x10 +#define SD_TM_NORMAL_WRITE 0x00 +#define SD_TM_AUTO_WRITE_3 0x01 +#define SD_TM_AUTO_WRITE_4 0x02 +#define SD_TM_AUTO_READ_3 0x05 +#define SD_TM_AUTO_READ_4 0x06 +#define SD_TM_CMD_RSP 0x08 +#define SD_TM_AUTO_WRITE_1 0x09 +#define SD_TM_AUTO_WRITE_2 0x0A +#define SD_TM_NORMAL_READ 0x0C +#define SD_TM_AUTO_READ_1 0x0D +#define SD_TM_AUTO_READ_2 0x0E +#define SD_TM_AUTO_TUNING 0x0F + +/* SD_CFG1 */ +#define SD_CLK_DIVIDE_0 0x00 +#define SD_CLK_DIVIDE_256 0xC0 +#define SD_CLK_DIVIDE_128 0x80 +#define SD_CLK_DIVIDE_MASK 0xC0 +#define SD_BUS_WIDTH_1BIT 0x00 +#define SD_BUS_WIDTH_4BIT 0x01 +#define SD_BUS_WIDTH_8BIT 0x02 +#define SD_ASYNC_FIFO_RST 0x10 +#define SD_20_MODE 0x00 +#define SD_DDR_MODE 0x04 +#define SD_30_MODE 0x08 + +/* SD_CFG2 */ +#define SD_CALCULATE_CRC7 0x00 +#define SD_NO_CALCULATE_CRC7 0x80 +#define SD_CHECK_CRC16 0x00 +#define SD_NO_CHECK_CRC16 0x40 +#define SD_WAIT_CRC_TO_EN 0x20 +#define SD_WAIT_BUSY_END 0x08 +#define SD_NO_WAIT_BUSY_END 0x00 +#define SD_CHECK_CRC7 0x00 +#define SD_NO_CHECK_CRC7 0x04 +#define SD_RSP_LEN_0 0x00 +#define SD_RSP_LEN_6 0x01 +#define SD_RSP_LEN_17 0x02 +#define SD_RSP_TYPE_R0 0x04 +#define SD_RSP_TYPE_R1 0x01 +#define SD_RSP_TYPE_R1b 0x09 +#define SD_RSP_TYPE_R2 0x02 +#define SD_RSP_TYPE_R3 0x05 +#define SD_RSP_TYPE_R4 0x05 +#define SD_RSP_TYPE_R5 0x01 +#define SD_RSP_TYPE_R6 0x01 +#define SD_RSP_TYPE_R7 0x01 + +/* SD_STAT1 */ +#define SD_CRC7_ERR 0x80 +#define SD_CRC16_ERR 0x40 +#define SD_CRC_WRITE_ERR 0x20 +#define SD_CRC_WRITE_ERR_MASK 0x1C +#define GET_CRC_TIME_OUT 0x02 +#define SD_TUNING_COMPARE_ERR 0x01 + +/* SD_DATA_STATE */ +#define SD_DATA_IDLE 0x80 + +/* CARD_DATA_SOURCE */ +#define PINGPONG_BUFFER 0x01 +#define RING_BUFFER 0x00 + +/* CARD_OE */ +#define SD_OUTPUT_EN 0x04 +#define MS_OUTPUT_EN 0x08 + +/* CARD_STOP */ +#define SD_STOP 0x04 +#define MS_STOP 0x08 +#define SD_CLR_ERR 0x40 +#define MS_CLR_ERR 0x80 + +/* CARD_CLK_SOURCE */ +#define CRC_FIX_CLK (0x00 << 0) +#define CRC_VAR_CLK0 (0x01 << 0) +#define CRC_VAR_CLK1 (0x02 << 0) +#define SD30_FIX_CLK (0x00 << 2) +#define SD30_VAR_CLK0 (0x01 << 2) +#define SD30_VAR_CLK1 (0x02 << 2) +#define SAMPLE_FIX_CLK (0x00 << 4) +#define SAMPLE_VAR_CLK0 (0x01 << 4) +#define SAMPLE_VAR_CLK1 (0x02 << 4) + +/* SD_SAMPLE_POINT_CTL */ +#define DDR_FIX_RX_DAT 0x00 +#define DDR_VAR_RX_DAT 0x80 +#define DDR_FIX_RX_DAT_EDGE 0x00 +#define DDR_FIX_RX_DAT_14_DELAY 0x40 +#define DDR_FIX_RX_CMD 0x00 +#define DDR_VAR_RX_CMD 0x20 +#define DDR_FIX_RX_CMD_POS_EDGE 0x00 +#define DDR_FIX_RX_CMD_14_DELAY 0x10 +#define SD20_RX_POS_EDGE 0x00 +#define SD20_RX_14_DELAY 0x08 +#define SD20_RX_SEL_MASK 0x08 + +/* SD_PUSH_POINT_CTL */ +#define DDR_FIX_TX_CMD_DAT 0x00 +#define DDR_VAR_TX_CMD_DAT 0x80 +#define DDR_FIX_TX_DAT_14_TSU 0x00 +#define DDR_FIX_TX_DAT_12_TSU 0x40 +#define DDR_FIX_TX_CMD_NEG_EDGE 0x00 +#define DDR_FIX_TX_CMD_14_AHEAD 0x20 +#define SD20_TX_NEG_EDGE 0x00 +#define SD20_TX_14_AHEAD 0x10 +#define SD20_TX_SEL_MASK 0x10 +#define DDR_VAR_SDCLK_POL_SWAP 0x01 + +/* MS_CFG */ +#define SAMPLE_TIME_RISING 0x00 +#define SAMPLE_TIME_FALLING 0x80 +#define PUSH_TIME_DEFAULT 0x00 +#define PUSH_TIME_ODD 0x40 +#define NO_EXTEND_TOGGLE 0x00 +#define EXTEND_TOGGLE_CHK 0x20 +#define MS_BUS_WIDTH_1 0x00 +#define MS_BUS_WIDTH_4 0x10 +#define MS_BUS_WIDTH_8 0x18 +#define MS_2K_SECTOR_MODE 0x04 +#define MS_512_SECTOR_MODE 0x00 +#define MS_TOGGLE_TIMEOUT_EN 0x00 +#define MS_TOGGLE_TIMEOUT_DISEN 0x01 +#define MS_NO_CHECK_INT 0x02 + +/* MS_TRANS_CFG */ +#define WAIT_INT 0x80 +#define NO_WAIT_INT 0x00 +#define NO_AUTO_READ_INT_REG 0x00 +#define AUTO_READ_INT_REG 0x40 +#define MS_CRC16_ERR 0x20 +#define MS_RDY_TIMEOUT 0x10 +#define MS_INT_CMDNK 0x08 +#define MS_INT_BREQ 0x04 +#define MS_INT_ERR 0x02 +#define MS_INT_CED 0x01 + +/* MS_TRANSFER */ +#define MS_TRANSFER_START 0x80 +#define MS_TRANSFER_END 0x40 +#define MS_TRANSFER_ERR 0x20 +#define MS_BS_STATE 0x10 +#define MS_TM_READ_BYTES 0x00 +#define MS_TM_NORMAL_READ 0x01 +#define MS_TM_WRITE_BYTES 0x04 +#define MS_TM_NORMAL_WRITE 0x05 +#define MS_TM_AUTO_READ 0x08 +#define MS_TM_AUTO_WRITE 0x0C +#define MS_TM_SET_CMD 0x06 +#define MS_TM_COPY_PAGE 0x07 +#define MS_TM_MULTI_READ 0x02 +#define MS_TM_MULTI_WRITE 0x03 + +/* MC_FIFO_CTL */ +#define FIFO_FLUSH 0x01 + +/* MC_DMA_RST */ +#define DMA_RESET 0x01 + +/* MC_DMA_CTL */ +#define DMA_TC_EQ_0 0x80 +#define DMA_DIR_TO_CARD 0x00 +#define DMA_DIR_FROM_CARD 0x02 +#define DMA_EN 0x01 +#define DMA_128 (0 << 2) +#define DMA_256 (1 << 2) +#define DMA_512 (2 << 2) +#define DMA_1024 (3 << 2) +#define DMA_PACK_SIZE_MASK 0x0C + +/* CARD_INT_PEND */ +#define XD_INT 0x10 +#define MS_INT 0x08 +#define SD_INT 0x04 + +/* LED operations*/ +static inline int rtsx_usb_turn_on_led(struct rtsx_ucr *ucr) +{ + return rtsx_usb_ep0_write_register(ucr, CARD_GPIO, 0x03, 0x02); +} + +static inline int rtsx_usb_turn_off_led(struct rtsx_ucr *ucr) +{ + return rtsx_usb_ep0_write_register(ucr, CARD_GPIO, 0x03, 0x03); +} + +/* HW error clearing */ +static inline void rtsx_usb_clear_fsm_err(struct rtsx_ucr *ucr) +{ + rtsx_usb_ep0_write_register(ucr, SFSM_ED, 0xf8, 0xf8); +} + +static inline void rtsx_usb_clear_dma_err(struct rtsx_ucr *ucr) +{ + rtsx_usb_ep0_write_register(ucr, MC_FIFO_CTL, + FIFO_FLUSH, FIFO_FLUSH); + rtsx_usb_ep0_write_register(ucr, MC_DMA_RST, DMA_RESET, DMA_RESET); +} +#endif /* __RTS51139_H */ -- cgit v1.2.3-59-g8ed1b From c1d12c784c49980e4cbe57b7e6cc14b406449099 Mon Sep 17 00:00:00 2001 From: "Opensource [Steve Twiss]" Date: Fri, 14 Feb 2014 14:08:11 +0000 Subject: mfd: da9063: Add support for production silicon variant code Add the correct silicon variant code ID (0x5) to the driver. This new code is the 'production' variant code ID for DA9063. This patch will remove the older variant code ID which matches the pre-production silicon ID (0x3) for the DA9063 chip. There is also some small amount of correction done in this patch: it splits the revision code and correctly names it according to the hardware specification and moves the dev_info() call before the variant ID test. Signed-off-by: Opensource [Steve Twiss] Signed-off-by: Lee Jones --- drivers/mfd/da9063-core.c | 25 ++++++++++++++----------- include/linux/mfd/da9063/core.h | 6 +++++- 2 files changed, 19 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/da9063-core.c b/drivers/mfd/da9063-core.c index 26937cd01071..e70ae315abc7 100644 --- a/drivers/mfd/da9063-core.c +++ b/drivers/mfd/da9063-core.c @@ -110,7 +110,7 @@ static const struct mfd_cell da9063_devs[] = { int da9063_device_init(struct da9063 *da9063, unsigned int irq) { struct da9063_pdata *pdata = da9063->dev->platform_data; - int model, revision; + int model, variant_id, variant_code; int ret; if (pdata) { @@ -141,23 +141,26 @@ int da9063_device_init(struct da9063 *da9063, unsigned int irq) return -ENODEV; } - ret = regmap_read(da9063->regmap, DA9063_REG_CHIP_VARIANT, &revision); + ret = regmap_read(da9063->regmap, DA9063_REG_CHIP_VARIANT, &variant_id); if (ret < 0) { - dev_err(da9063->dev, "Cannot read chip revision id.\n"); + dev_err(da9063->dev, "Cannot read chip variant id.\n"); return -EIO; } - revision >>= DA9063_CHIP_VARIANT_SHIFT; - if (revision != 3) { - dev_err(da9063->dev, "Unknown chip revision: %d\n", revision); + + variant_code = variant_id >> DA9063_CHIP_VARIANT_SHIFT; + + dev_info(da9063->dev, + "Device detected (chip-ID: 0x%02X, var-ID: 0x%02X)\n", + model, variant_id); + + if (variant_code != PMIC_DA9063_BB) { + dev_err(da9063->dev, "Unknown chip variant code: 0x%02X\n", + variant_code); return -ENODEV; } da9063->model = model; - da9063->revision = revision; - - dev_info(da9063->dev, - "Device detected (model-ID: 0x%02X rev-ID: 0x%02X)\n", - model, revision); + da9063->variant_code = variant_code; ret = da9063_irq_init(da9063); if (ret) { diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h index 2d2a0af675fd..00a9aac5d1e8 100644 --- a/include/linux/mfd/da9063/core.h +++ b/include/linux/mfd/da9063/core.h @@ -33,6 +33,10 @@ enum da9063_models { PMIC_DA9063 = 0x61, }; +enum da9063_variant_codes { + PMIC_DA9063_BB = 0x5 +}; + /* Interrupts */ enum da9063_irqs { DA9063_IRQ_ONKEY = 0, @@ -72,7 +76,7 @@ struct da9063 { /* Device */ struct device *dev; unsigned short model; - unsigned short revision; + unsigned char variant_code; unsigned int flags; /* Control interface */ -- cgit v1.2.3-59-g8ed1b From 6cec365e3eba3dd8c864056d8d3fd9e73ab8dd7a Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Fri, 14 Feb 2014 15:01:53 +0100 Subject: mfd: lpc_ich: Convert ICH GPIOs IDs to enum All those IDs are arbitrary and so can be encapsulated into an enumeration. Signed-off-by: Vincent Donnefort Signed-off-by: Lee Jones --- include/linux/mfd/lpc_ich.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/lpc_ich.h b/include/linux/mfd/lpc_ich.h index 3e1df644c407..293b06209b9c 100644 --- a/include/linux/mfd/lpc_ich.h +++ b/include/linux/mfd/lpc_ich.h @@ -31,13 +31,15 @@ #define ICH_RES_GPE0 1 /* GPIO compatibility */ -#define ICH_I3100_GPIO 0x401 -#define ICH_V5_GPIO 0x501 -#define ICH_V6_GPIO 0x601 -#define ICH_V7_GPIO 0x701 -#define ICH_V9_GPIO 0x801 -#define ICH_V10CORP_GPIO 0xa01 -#define ICH_V10CONS_GPIO 0xa11 +enum { + ICH_I3100_GPIO, + ICH_V5_GPIO, + ICH_V6_GPIO, + ICH_V7_GPIO, + ICH_V9_GPIO, + ICH_V10CORP_GPIO, + ICH_V10CONS_GPIO, +}; struct lpc_ich_info { char name[32]; -- cgit v1.2.3-59-g8ed1b From facd9939403cb5769190054a600474399e776e3a Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Fri, 14 Feb 2014 15:01:54 +0100 Subject: mfd: lpc_ich: Add support for Intel Avoton GPIOs Signed-off-by: Vincent Donnefort Signed-off-by: Lee Jones --- drivers/mfd/lpc_ich.c | 1 + include/linux/mfd/lpc_ich.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/drivers/mfd/lpc_ich.c b/drivers/mfd/lpc_ich.c index 010c1b490478..17eff0925ac1 100644 --- a/drivers/mfd/lpc_ich.c +++ b/drivers/mfd/lpc_ich.c @@ -499,6 +499,7 @@ static struct lpc_ich_info lpc_chipset_info[] = { [LPC_AVN] = { .name = "Avoton SoC", .iTCO_version = 1, + .gpio_version = AVOTON_GPIO, }, [LPC_COLETO] = { .name = "Coleto Creek", diff --git a/include/linux/mfd/lpc_ich.h b/include/linux/mfd/lpc_ich.h index 293b06209b9c..b2364dd026a6 100644 --- a/include/linux/mfd/lpc_ich.h +++ b/include/linux/mfd/lpc_ich.h @@ -39,6 +39,7 @@ enum { ICH_V9_GPIO, ICH_V10CORP_GPIO, ICH_V10CONS_GPIO, + AVOTON_GPIO, }; struct lpc_ich_info { -- cgit v1.2.3-59-g8ed1b From 16263f2879afecb7772f4bae238bca66352ecffe Mon Sep 17 00:00:00 2001 From: "Opensource [Anthony Olech]" Date: Wed, 19 Feb 2014 16:32:46 +0000 Subject: mfd: da9052: Extend support to a new chip Add the hash define for the new variant of the DA9053 PMIC called BC. Signed-off-by: Anthony Olech Signed-off-by: Lee Jones --- include/linux/mfd/da9052/da9052.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h index 21e21b81cc75..bba65f51a0b5 100644 --- a/include/linux/mfd/da9052/da9052.h +++ b/include/linux/mfd/da9052/da9052.h @@ -83,6 +83,7 @@ enum da9052_chip_id { DA9053_AA, DA9053_BA, DA9053_BB, + DA9053_BC, }; struct da9052_pdata; -- cgit v1.2.3-59-g8ed1b From bc866fc7a8c4322de40b694ffcfcdda50ab82f35 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 26 Feb 2014 10:59:19 -0800 Subject: mfd: pm8xxx: Move pm8xxx-irq.c contents into only driver that uses it The pm8xxx-irq.c code is practically mandatory given that the pm8921-core driver will WARN about it missing and the Kconfig marks it as default y when a PM8xxx chips is enabled. The only reason the file was split out was because we planned to support other pm8xxx chips with different pm8xxx-core.c files. Now that we have DT on ARM this isn't necessary because we should be able to support all the ssbi based PM8xxx chips in one driver and one file with no data bloat. Let's move this code into the only driver that uses it right now (pm8921) so that it's always compiled when needed. In the future we can rename pm8921-core.c to something more generic. Signed-off-by: Stephen Boyd Signed-off-by: Lee Jones --- drivers/mfd/Kconfig | 10 -- drivers/mfd/Makefile | 1 - drivers/mfd/pm8921-core.c | 348 ++++++++++++++++++++++++++++++++++++++ drivers/mfd/pm8xxx-irq.c | 371 ----------------------------------------- include/linux/mfd/pm8xxx/irq.h | 23 --- 5 files changed, 348 insertions(+), 405 deletions(-) delete mode 100644 drivers/mfd/pm8xxx-irq.c (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index d93d1183991b..001668d8895f 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -492,16 +492,6 @@ config MFD_PM8921_CORE Say M here if you want to include support for PM8921 chip as a module. This will build a module called "pm8921-core". -config MFD_PM8XXX_IRQ - bool "Qualcomm PM8xxx IRQ features" - depends on MFD_PM8XXX - default y if MFD_PM8XXX - help - This is the IRQ driver for Qualcomm PM 8xxx PMIC chips. - - This is required to use certain other PM 8xxx features, such as GPIO - and MPP. - config MFD_RDC321X tristate "RDC R-321x southbridge" select MFD_CORE diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 377bd519884c..89a7951503ca 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -151,7 +151,6 @@ obj-$(CONFIG_MFD_SI476X_CORE) += si476x-core.o obj-$(CONFIG_MFD_CS5535) += cs5535-mfd.o obj-$(CONFIG_MFD_OMAP_USB_HOST) += omap-usb-host.o omap-usb-tll.o obj-$(CONFIG_MFD_PM8921_CORE) += pm8921-core.o ssbi.o -obj-$(CONFIG_MFD_PM8XXX_IRQ) += pm8xxx-irq.o obj-$(CONFIG_TPS65911_COMPARATOR) += tps65911-comparator.o obj-$(CONFIG_MFD_TPS65090) += tps65090.o obj-$(CONFIG_MFD_AAT2870_CORE) += aat2870-core.o diff --git a/drivers/mfd/pm8921-core.c b/drivers/mfd/pm8921-core.c index 484fe66e6c88..50e0a9b69b9d 100644 --- a/drivers/mfd/pm8921-core.c +++ b/drivers/mfd/pm8921-core.c @@ -14,6 +14,8 @@ #define pr_fmt(fmt) "%s: " fmt, __func__ #include +#include +#include #include #include #include @@ -22,15 +24,361 @@ #include #include #include +#include + +#define SSBI_REG_ADDR_IRQ_BASE 0x1BB + +#define SSBI_REG_ADDR_IRQ_ROOT (SSBI_REG_ADDR_IRQ_BASE + 0) +#define SSBI_REG_ADDR_IRQ_M_STATUS1 (SSBI_REG_ADDR_IRQ_BASE + 1) +#define SSBI_REG_ADDR_IRQ_M_STATUS2 (SSBI_REG_ADDR_IRQ_BASE + 2) +#define SSBI_REG_ADDR_IRQ_M_STATUS3 (SSBI_REG_ADDR_IRQ_BASE + 3) +#define SSBI_REG_ADDR_IRQ_M_STATUS4 (SSBI_REG_ADDR_IRQ_BASE + 4) +#define SSBI_REG_ADDR_IRQ_BLK_SEL (SSBI_REG_ADDR_IRQ_BASE + 5) +#define SSBI_REG_ADDR_IRQ_IT_STATUS (SSBI_REG_ADDR_IRQ_BASE + 6) +#define SSBI_REG_ADDR_IRQ_CONFIG (SSBI_REG_ADDR_IRQ_BASE + 7) +#define SSBI_REG_ADDR_IRQ_RT_STATUS (SSBI_REG_ADDR_IRQ_BASE + 8) + +#define PM_IRQF_LVL_SEL 0x01 /* level select */ +#define PM_IRQF_MASK_FE 0x02 /* mask falling edge */ +#define PM_IRQF_MASK_RE 0x04 /* mask rising edge */ +#define PM_IRQF_CLR 0x08 /* clear interrupt */ +#define PM_IRQF_BITS_MASK 0x70 +#define PM_IRQF_BITS_SHIFT 4 +#define PM_IRQF_WRITE 0x80 + +#define PM_IRQF_MASK_ALL (PM_IRQF_MASK_FE | \ + PM_IRQF_MASK_RE) #define REG_HWREV 0x002 /* PMIC4 revision */ #define REG_HWREV_2 0x0E8 /* PMIC4 revision 2 */ +struct pm_irq_chip { + struct device *dev; + spinlock_t pm_irq_lock; + unsigned int devirq; + unsigned int irq_base; + unsigned int num_irqs; + unsigned int num_blocks; + unsigned int num_masters; + u8 config[0]; +}; + struct pm8921 { struct device *dev; struct pm_irq_chip *irq_chip; }; +static int pm8xxx_read_root_irq(const struct pm_irq_chip *chip, u8 *rp) +{ + return pm8xxx_readb(chip->dev, SSBI_REG_ADDR_IRQ_ROOT, rp); +} + +static int pm8xxx_read_master_irq(const struct pm_irq_chip *chip, u8 m, u8 *bp) +{ + return pm8xxx_readb(chip->dev, + SSBI_REG_ADDR_IRQ_M_STATUS1 + m, bp); +} + +static int pm8xxx_read_block_irq(struct pm_irq_chip *chip, u8 bp, u8 *ip) +{ + int rc; + + spin_lock(&chip->pm_irq_lock); + rc = pm8xxx_writeb(chip->dev, SSBI_REG_ADDR_IRQ_BLK_SEL, bp); + if (rc) { + pr_err("Failed Selecting Block %d rc=%d\n", bp, rc); + goto bail; + } + + rc = pm8xxx_readb(chip->dev, SSBI_REG_ADDR_IRQ_IT_STATUS, ip); + if (rc) + pr_err("Failed Reading Status rc=%d\n", rc); +bail: + spin_unlock(&chip->pm_irq_lock); + return rc; +} + +static int pm8xxx_config_irq(struct pm_irq_chip *chip, u8 bp, u8 cp) +{ + int rc; + + spin_lock(&chip->pm_irq_lock); + rc = pm8xxx_writeb(chip->dev, SSBI_REG_ADDR_IRQ_BLK_SEL, bp); + if (rc) { + pr_err("Failed Selecting Block %d rc=%d\n", bp, rc); + goto bail; + } + + cp |= PM_IRQF_WRITE; + rc = pm8xxx_writeb(chip->dev, SSBI_REG_ADDR_IRQ_CONFIG, cp); + if (rc) + pr_err("Failed Configuring IRQ rc=%d\n", rc); +bail: + spin_unlock(&chip->pm_irq_lock); + return rc; +} + +static int pm8xxx_irq_block_handler(struct pm_irq_chip *chip, int block) +{ + int pmirq, irq, i, ret = 0; + u8 bits; + + ret = pm8xxx_read_block_irq(chip, block, &bits); + if (ret) { + pr_err("Failed reading %d block ret=%d", block, ret); + return ret; + } + if (!bits) { + pr_err("block bit set in master but no irqs: %d", block); + return 0; + } + + /* Check IRQ bits */ + for (i = 0; i < 8; i++) { + if (bits & (1 << i)) { + pmirq = block * 8 + i; + irq = pmirq + chip->irq_base; + generic_handle_irq(irq); + } + } + return 0; +} + +static int pm8xxx_irq_master_handler(struct pm_irq_chip *chip, int master) +{ + u8 blockbits; + int block_number, i, ret = 0; + + ret = pm8xxx_read_master_irq(chip, master, &blockbits); + if (ret) { + pr_err("Failed to read master %d ret=%d\n", master, ret); + return ret; + } + if (!blockbits) { + pr_err("master bit set in root but no blocks: %d", master); + return 0; + } + + for (i = 0; i < 8; i++) + if (blockbits & (1 << i)) { + block_number = master * 8 + i; /* block # */ + ret |= pm8xxx_irq_block_handler(chip, block_number); + } + return ret; +} + +static void pm8xxx_irq_handler(unsigned int irq, struct irq_desc *desc) +{ + struct pm_irq_chip *chip = irq_desc_get_handler_data(desc); + struct irq_chip *irq_chip = irq_desc_get_chip(desc); + u8 root; + int i, ret, masters = 0; + + ret = pm8xxx_read_root_irq(chip, &root); + if (ret) { + pr_err("Can't read root status ret=%d\n", ret); + return; + } + + /* on pm8xxx series masters start from bit 1 of the root */ + masters = root >> 1; + + /* Read allowed masters for blocks. */ + for (i = 0; i < chip->num_masters; i++) + if (masters & (1 << i)) + pm8xxx_irq_master_handler(chip, i); + + irq_chip->irq_ack(&desc->irq_data); +} + +static void pm8xxx_irq_mask_ack(struct irq_data *d) +{ + struct pm_irq_chip *chip = irq_data_get_irq_chip_data(d); + unsigned int pmirq = d->irq - chip->irq_base; + int master, irq_bit; + u8 block, config; + + block = pmirq / 8; + master = block / 8; + irq_bit = pmirq % 8; + + config = chip->config[pmirq] | PM_IRQF_MASK_ALL | PM_IRQF_CLR; + pm8xxx_config_irq(chip, block, config); +} + +static void pm8xxx_irq_unmask(struct irq_data *d) +{ + struct pm_irq_chip *chip = irq_data_get_irq_chip_data(d); + unsigned int pmirq = d->irq - chip->irq_base; + int master, irq_bit; + u8 block, config; + + block = pmirq / 8; + master = block / 8; + irq_bit = pmirq % 8; + + config = chip->config[pmirq]; + pm8xxx_config_irq(chip, block, config); +} + +static int pm8xxx_irq_set_type(struct irq_data *d, unsigned int flow_type) +{ + struct pm_irq_chip *chip = irq_data_get_irq_chip_data(d); + unsigned int pmirq = d->irq - chip->irq_base; + int master, irq_bit; + u8 block, config; + + block = pmirq / 8; + master = block / 8; + irq_bit = pmirq % 8; + + chip->config[pmirq] = (irq_bit << PM_IRQF_BITS_SHIFT) + | PM_IRQF_MASK_ALL; + if (flow_type & (IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING)) { + if (flow_type & IRQF_TRIGGER_RISING) + chip->config[pmirq] &= ~PM_IRQF_MASK_RE; + if (flow_type & IRQF_TRIGGER_FALLING) + chip->config[pmirq] &= ~PM_IRQF_MASK_FE; + } else { + chip->config[pmirq] |= PM_IRQF_LVL_SEL; + + if (flow_type & IRQF_TRIGGER_HIGH) + chip->config[pmirq] &= ~PM_IRQF_MASK_RE; + else + chip->config[pmirq] &= ~PM_IRQF_MASK_FE; + } + + config = chip->config[pmirq] | PM_IRQF_CLR; + return pm8xxx_config_irq(chip, block, config); +} + +static int pm8xxx_irq_set_wake(struct irq_data *d, unsigned int on) +{ + return 0; +} + +static struct irq_chip pm8xxx_irq_chip = { + .name = "pm8xxx", + .irq_mask_ack = pm8xxx_irq_mask_ack, + .irq_unmask = pm8xxx_irq_unmask, + .irq_set_type = pm8xxx_irq_set_type, + .irq_set_wake = pm8xxx_irq_set_wake, + .flags = IRQCHIP_MASK_ON_SUSPEND, +}; + +/** + * pm8xxx_get_irq_stat - get the status of the irq line + * @chip: pointer to identify a pmic irq controller + * @irq: the irq number + * + * The pm8xxx gpio and mpp rely on the interrupt block to read + * the values on their pins. This function is to facilitate reading + * the status of a gpio or an mpp line. The caller has to convert the + * gpio number to irq number. + * + * RETURNS: + * an int indicating the value read on that line + */ +static int pm8xxx_get_irq_stat(struct pm_irq_chip *chip, int irq) +{ + int pmirq, rc; + u8 block, bits, bit; + unsigned long flags; + + if (chip == NULL || irq < chip->irq_base || + irq >= chip->irq_base + chip->num_irqs) + return -EINVAL; + + pmirq = irq - chip->irq_base; + + block = pmirq / 8; + bit = pmirq % 8; + + spin_lock_irqsave(&chip->pm_irq_lock, flags); + + rc = pm8xxx_writeb(chip->dev, SSBI_REG_ADDR_IRQ_BLK_SEL, block); + if (rc) { + pr_err("Failed Selecting block irq=%d pmirq=%d blk=%d rc=%d\n", + irq, pmirq, block, rc); + goto bail_out; + } + + rc = pm8xxx_readb(chip->dev, SSBI_REG_ADDR_IRQ_RT_STATUS, &bits); + if (rc) { + pr_err("Failed Configuring irq=%d pmirq=%d blk=%d rc=%d\n", + irq, pmirq, block, rc); + goto bail_out; + } + + rc = (bits & (1 << bit)) ? 1 : 0; + +bail_out: + spin_unlock_irqrestore(&chip->pm_irq_lock, flags); + + return rc; +} + +static struct pm_irq_chip *pm8xxx_irq_init(struct device *dev, + const struct pm8xxx_irq_platform_data *pdata) +{ + struct pm_irq_chip *chip; + int devirq, rc; + unsigned int pmirq; + + if (!pdata) { + pr_err("No platform data\n"); + return ERR_PTR(-EINVAL); + } + + devirq = pdata->devirq; + if (devirq < 0) { + pr_err("missing devirq\n"); + rc = devirq; + return ERR_PTR(-EINVAL); + } + + chip = kzalloc(sizeof(struct pm_irq_chip) + + sizeof(u8) * pdata->irq_cdata.nirqs, GFP_KERNEL); + if (!chip) { + pr_err("Cannot alloc pm_irq_chip struct\n"); + return ERR_PTR(-EINVAL); + } + + chip->dev = dev; + chip->devirq = devirq; + chip->irq_base = pdata->irq_base; + chip->num_irqs = pdata->irq_cdata.nirqs; + chip->num_blocks = DIV_ROUND_UP(chip->num_irqs, 8); + chip->num_masters = DIV_ROUND_UP(chip->num_blocks, 8); + spin_lock_init(&chip->pm_irq_lock); + + for (pmirq = 0; pmirq < chip->num_irqs; pmirq++) { + irq_set_chip_and_handler(chip->irq_base + pmirq, + &pm8xxx_irq_chip, + handle_level_irq); + irq_set_chip_data(chip->irq_base + pmirq, chip); +#ifdef CONFIG_ARM + set_irq_flags(chip->irq_base + pmirq, IRQF_VALID); +#else + irq_set_noprobe(chip->irq_base + pmirq); +#endif + } + + irq_set_irq_type(devirq, pdata->irq_trigger_flag); + irq_set_handler_data(devirq, chip); + irq_set_chained_handler(devirq, pm8xxx_irq_handler); + set_irq_wake(devirq, 1); + + return chip; +} + +static int pm8xxx_irq_exit(struct pm_irq_chip *chip) +{ + irq_set_chained_handler(chip->devirq, NULL); + kfree(chip); + return 0; +} + static int pm8921_readb(const struct device *dev, u16 addr, u8 *val) { const struct pm8xxx_drvdata *pm8921_drvdata = dev_get_drvdata(dev); diff --git a/drivers/mfd/pm8xxx-irq.c b/drivers/mfd/pm8xxx-irq.c deleted file mode 100644 index 1360e20adf11..000000000000 --- a/drivers/mfd/pm8xxx-irq.c +++ /dev/null @@ -1,371 +0,0 @@ -/* - * Copyright (c) 2011, Code Aurora Forum. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 and - * only version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#define pr_fmt(fmt) "%s: " fmt, __func__ - -#include -#include -#include -#include -#include -#include -#include -#include - -/* PMIC8xxx IRQ */ - -#define SSBI_REG_ADDR_IRQ_BASE 0x1BB - -#define SSBI_REG_ADDR_IRQ_ROOT (SSBI_REG_ADDR_IRQ_BASE + 0) -#define SSBI_REG_ADDR_IRQ_M_STATUS1 (SSBI_REG_ADDR_IRQ_BASE + 1) -#define SSBI_REG_ADDR_IRQ_M_STATUS2 (SSBI_REG_ADDR_IRQ_BASE + 2) -#define SSBI_REG_ADDR_IRQ_M_STATUS3 (SSBI_REG_ADDR_IRQ_BASE + 3) -#define SSBI_REG_ADDR_IRQ_M_STATUS4 (SSBI_REG_ADDR_IRQ_BASE + 4) -#define SSBI_REG_ADDR_IRQ_BLK_SEL (SSBI_REG_ADDR_IRQ_BASE + 5) -#define SSBI_REG_ADDR_IRQ_IT_STATUS (SSBI_REG_ADDR_IRQ_BASE + 6) -#define SSBI_REG_ADDR_IRQ_CONFIG (SSBI_REG_ADDR_IRQ_BASE + 7) -#define SSBI_REG_ADDR_IRQ_RT_STATUS (SSBI_REG_ADDR_IRQ_BASE + 8) - -#define PM_IRQF_LVL_SEL 0x01 /* level select */ -#define PM_IRQF_MASK_FE 0x02 /* mask falling edge */ -#define PM_IRQF_MASK_RE 0x04 /* mask rising edge */ -#define PM_IRQF_CLR 0x08 /* clear interrupt */ -#define PM_IRQF_BITS_MASK 0x70 -#define PM_IRQF_BITS_SHIFT 4 -#define PM_IRQF_WRITE 0x80 - -#define PM_IRQF_MASK_ALL (PM_IRQF_MASK_FE | \ - PM_IRQF_MASK_RE) - -struct pm_irq_chip { - struct device *dev; - spinlock_t pm_irq_lock; - unsigned int devirq; - unsigned int irq_base; - unsigned int num_irqs; - unsigned int num_blocks; - unsigned int num_masters; - u8 config[0]; -}; - -static int pm8xxx_read_root_irq(const struct pm_irq_chip *chip, u8 *rp) -{ - return pm8xxx_readb(chip->dev, SSBI_REG_ADDR_IRQ_ROOT, rp); -} - -static int pm8xxx_read_master_irq(const struct pm_irq_chip *chip, u8 m, u8 *bp) -{ - return pm8xxx_readb(chip->dev, - SSBI_REG_ADDR_IRQ_M_STATUS1 + m, bp); -} - -static int pm8xxx_read_block_irq(struct pm_irq_chip *chip, u8 bp, u8 *ip) -{ - int rc; - - spin_lock(&chip->pm_irq_lock); - rc = pm8xxx_writeb(chip->dev, SSBI_REG_ADDR_IRQ_BLK_SEL, bp); - if (rc) { - pr_err("Failed Selecting Block %d rc=%d\n", bp, rc); - goto bail; - } - - rc = pm8xxx_readb(chip->dev, SSBI_REG_ADDR_IRQ_IT_STATUS, ip); - if (rc) - pr_err("Failed Reading Status rc=%d\n", rc); -bail: - spin_unlock(&chip->pm_irq_lock); - return rc; -} - -static int pm8xxx_config_irq(struct pm_irq_chip *chip, u8 bp, u8 cp) -{ - int rc; - - spin_lock(&chip->pm_irq_lock); - rc = pm8xxx_writeb(chip->dev, SSBI_REG_ADDR_IRQ_BLK_SEL, bp); - if (rc) { - pr_err("Failed Selecting Block %d rc=%d\n", bp, rc); - goto bail; - } - - cp |= PM_IRQF_WRITE; - rc = pm8xxx_writeb(chip->dev, SSBI_REG_ADDR_IRQ_CONFIG, cp); - if (rc) - pr_err("Failed Configuring IRQ rc=%d\n", rc); -bail: - spin_unlock(&chip->pm_irq_lock); - return rc; -} - -static int pm8xxx_irq_block_handler(struct pm_irq_chip *chip, int block) -{ - int pmirq, irq, i, ret = 0; - u8 bits; - - ret = pm8xxx_read_block_irq(chip, block, &bits); - if (ret) { - pr_err("Failed reading %d block ret=%d", block, ret); - return ret; - } - if (!bits) { - pr_err("block bit set in master but no irqs: %d", block); - return 0; - } - - /* Check IRQ bits */ - for (i = 0; i < 8; i++) { - if (bits & (1 << i)) { - pmirq = block * 8 + i; - irq = pmirq + chip->irq_base; - generic_handle_irq(irq); - } - } - return 0; -} - -static int pm8xxx_irq_master_handler(struct pm_irq_chip *chip, int master) -{ - u8 blockbits; - int block_number, i, ret = 0; - - ret = pm8xxx_read_master_irq(chip, master, &blockbits); - if (ret) { - pr_err("Failed to read master %d ret=%d\n", master, ret); - return ret; - } - if (!blockbits) { - pr_err("master bit set in root but no blocks: %d", master); - return 0; - } - - for (i = 0; i < 8; i++) - if (blockbits & (1 << i)) { - block_number = master * 8 + i; /* block # */ - ret |= pm8xxx_irq_block_handler(chip, block_number); - } - return ret; -} - -static void pm8xxx_irq_handler(unsigned int irq, struct irq_desc *desc) -{ - struct pm_irq_chip *chip = irq_desc_get_handler_data(desc); - struct irq_chip *irq_chip = irq_desc_get_chip(desc); - u8 root; - int i, ret, masters = 0; - - ret = pm8xxx_read_root_irq(chip, &root); - if (ret) { - pr_err("Can't read root status ret=%d\n", ret); - return; - } - - /* on pm8xxx series masters start from bit 1 of the root */ - masters = root >> 1; - - /* Read allowed masters for blocks. */ - for (i = 0; i < chip->num_masters; i++) - if (masters & (1 << i)) - pm8xxx_irq_master_handler(chip, i); - - irq_chip->irq_ack(&desc->irq_data); -} - -static void pm8xxx_irq_mask_ack(struct irq_data *d) -{ - struct pm_irq_chip *chip = irq_data_get_irq_chip_data(d); - unsigned int pmirq = d->irq - chip->irq_base; - int master, irq_bit; - u8 block, config; - - block = pmirq / 8; - master = block / 8; - irq_bit = pmirq % 8; - - config = chip->config[pmirq] | PM_IRQF_MASK_ALL | PM_IRQF_CLR; - pm8xxx_config_irq(chip, block, config); -} - -static void pm8xxx_irq_unmask(struct irq_data *d) -{ - struct pm_irq_chip *chip = irq_data_get_irq_chip_data(d); - unsigned int pmirq = d->irq - chip->irq_base; - int master, irq_bit; - u8 block, config; - - block = pmirq / 8; - master = block / 8; - irq_bit = pmirq % 8; - - config = chip->config[pmirq]; - pm8xxx_config_irq(chip, block, config); -} - -static int pm8xxx_irq_set_type(struct irq_data *d, unsigned int flow_type) -{ - struct pm_irq_chip *chip = irq_data_get_irq_chip_data(d); - unsigned int pmirq = d->irq - chip->irq_base; - int master, irq_bit; - u8 block, config; - - block = pmirq / 8; - master = block / 8; - irq_bit = pmirq % 8; - - chip->config[pmirq] = (irq_bit << PM_IRQF_BITS_SHIFT) - | PM_IRQF_MASK_ALL; - if (flow_type & (IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING)) { - if (flow_type & IRQF_TRIGGER_RISING) - chip->config[pmirq] &= ~PM_IRQF_MASK_RE; - if (flow_type & IRQF_TRIGGER_FALLING) - chip->config[pmirq] &= ~PM_IRQF_MASK_FE; - } else { - chip->config[pmirq] |= PM_IRQF_LVL_SEL; - - if (flow_type & IRQF_TRIGGER_HIGH) - chip->config[pmirq] &= ~PM_IRQF_MASK_RE; - else - chip->config[pmirq] &= ~PM_IRQF_MASK_FE; - } - - config = chip->config[pmirq] | PM_IRQF_CLR; - return pm8xxx_config_irq(chip, block, config); -} - -static int pm8xxx_irq_set_wake(struct irq_data *d, unsigned int on) -{ - return 0; -} - -static struct irq_chip pm8xxx_irq_chip = { - .name = "pm8xxx", - .irq_mask_ack = pm8xxx_irq_mask_ack, - .irq_unmask = pm8xxx_irq_unmask, - .irq_set_type = pm8xxx_irq_set_type, - .irq_set_wake = pm8xxx_irq_set_wake, - .flags = IRQCHIP_MASK_ON_SUSPEND, -}; - -/** - * pm8xxx_get_irq_stat - get the status of the irq line - * @chip: pointer to identify a pmic irq controller - * @irq: the irq number - * - * The pm8xxx gpio and mpp rely on the interrupt block to read - * the values on their pins. This function is to facilitate reading - * the status of a gpio or an mpp line. The caller has to convert the - * gpio number to irq number. - * - * RETURNS: - * an int indicating the value read on that line - */ -int pm8xxx_get_irq_stat(struct pm_irq_chip *chip, int irq) -{ - int pmirq, rc; - u8 block, bits, bit; - unsigned long flags; - - if (chip == NULL || irq < chip->irq_base || - irq >= chip->irq_base + chip->num_irqs) - return -EINVAL; - - pmirq = irq - chip->irq_base; - - block = pmirq / 8; - bit = pmirq % 8; - - spin_lock_irqsave(&chip->pm_irq_lock, flags); - - rc = pm8xxx_writeb(chip->dev, SSBI_REG_ADDR_IRQ_BLK_SEL, block); - if (rc) { - pr_err("Failed Selecting block irq=%d pmirq=%d blk=%d rc=%d\n", - irq, pmirq, block, rc); - goto bail_out; - } - - rc = pm8xxx_readb(chip->dev, SSBI_REG_ADDR_IRQ_RT_STATUS, &bits); - if (rc) { - pr_err("Failed Configuring irq=%d pmirq=%d blk=%d rc=%d\n", - irq, pmirq, block, rc); - goto bail_out; - } - - rc = (bits & (1 << bit)) ? 1 : 0; - -bail_out: - spin_unlock_irqrestore(&chip->pm_irq_lock, flags); - - return rc; -} -EXPORT_SYMBOL_GPL(pm8xxx_get_irq_stat); - -struct pm_irq_chip * pm8xxx_irq_init(struct device *dev, - const struct pm8xxx_irq_platform_data *pdata) -{ - struct pm_irq_chip *chip; - int devirq, rc; - unsigned int pmirq; - - if (!pdata) { - pr_err("No platform data\n"); - return ERR_PTR(-EINVAL); - } - - devirq = pdata->devirq; - if (devirq < 0) { - pr_err("missing devirq\n"); - rc = devirq; - return ERR_PTR(-EINVAL); - } - - chip = kzalloc(sizeof(struct pm_irq_chip) - + sizeof(u8) * pdata->irq_cdata.nirqs, GFP_KERNEL); - if (!chip) { - pr_err("Cannot alloc pm_irq_chip struct\n"); - return ERR_PTR(-EINVAL); - } - - chip->dev = dev; - chip->devirq = devirq; - chip->irq_base = pdata->irq_base; - chip->num_irqs = pdata->irq_cdata.nirqs; - chip->num_blocks = DIV_ROUND_UP(chip->num_irqs, 8); - chip->num_masters = DIV_ROUND_UP(chip->num_blocks, 8); - spin_lock_init(&chip->pm_irq_lock); - - for (pmirq = 0; pmirq < chip->num_irqs; pmirq++) { - irq_set_chip_and_handler(chip->irq_base + pmirq, - &pm8xxx_irq_chip, - handle_level_irq); - irq_set_chip_data(chip->irq_base + pmirq, chip); -#ifdef CONFIG_ARM - set_irq_flags(chip->irq_base + pmirq, IRQF_VALID); -#else - irq_set_noprobe(chip->irq_base + pmirq); -#endif - } - - irq_set_irq_type(devirq, pdata->irq_trigger_flag); - irq_set_handler_data(devirq, chip); - irq_set_chained_handler(devirq, pm8xxx_irq_handler); - set_irq_wake(devirq, 1); - - return chip; -} - -int pm8xxx_irq_exit(struct pm_irq_chip *chip) -{ - irq_set_chained_handler(chip->devirq, NULL); - kfree(chip); - return 0; -} diff --git a/include/linux/mfd/pm8xxx/irq.h b/include/linux/mfd/pm8xxx/irq.h index f83d6b43ecbb..1a11b02383f0 100644 --- a/include/linux/mfd/pm8xxx/irq.h +++ b/include/linux/mfd/pm8xxx/irq.h @@ -33,27 +33,4 @@ struct pm8xxx_irq_platform_data { int irq_trigger_flag; }; -struct pm_irq_chip; - -#ifdef CONFIG_MFD_PM8XXX_IRQ -int pm8xxx_get_irq_stat(struct pm_irq_chip *chip, int irq); -struct pm_irq_chip *pm8xxx_irq_init(struct device *dev, - const struct pm8xxx_irq_platform_data *pdata); -int pm8xxx_irq_exit(struct pm_irq_chip *chip); -#else -static inline int pm8xxx_get_irq_stat(struct pm_irq_chip *chip, int irq) -{ - return -ENXIO; -} -static inline struct pm_irq_chip *pm8xxx_irq_init( - const struct device *dev, - const struct pm8xxx_irq_platform_data *pdata) -{ - return ERR_PTR(-ENXIO); -} -static inline int pm8xxx_irq_exit(struct pm_irq_chip *chip) -{ - return -ENXIO; -} -#endif /* CONFIG_MFD_PM8XXX_IRQ */ #endif /* __MFD_PM8XXX_IRQ_H */ -- cgit v1.2.3-59-g8ed1b From dc1a95ccaa1158948bbc6648d6dadc534a30ed92 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 26 Feb 2014 10:59:21 -0800 Subject: mfd: pm8921: Migrate to irqdomains Convert this driver to use irqdomains so that the PMIC's child devices can be converted to devicetree. Acked-by: Lee Jones Signed-off-by: Stephen Boyd --- drivers/mfd/Kconfig | 1 + drivers/mfd/pm8921-core.c | 198 +++++++++++++++----------------------- include/linux/mfd/pm8xxx/irq.h | 36 ------- include/linux/mfd/pm8xxx/pm8921.h | 30 ------ 4 files changed, 76 insertions(+), 189 deletions(-) delete mode 100644 include/linux/mfd/pm8xxx/irq.h delete mode 100644 include/linux/mfd/pm8xxx/pm8921.h (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 650c90f814ff..833d2c884437 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -479,6 +479,7 @@ config MFD_PM8XXX config MFD_PM8921_CORE tristate "Qualcomm PM8921 PMIC chip" depends on (ARCH_MSM || HEXAGON) + select IRQ_DOMAIN select MFD_CORE select MFD_PM8XXX help diff --git a/drivers/mfd/pm8921-core.c b/drivers/mfd/pm8921-core.c index 9ddc31f7a71d..c25e7dae150b 100644 --- a/drivers/mfd/pm8921-core.c +++ b/drivers/mfd/pm8921-core.c @@ -17,15 +17,15 @@ #include #include #include +#include #include #include #include #include #include +#include #include -#include #include -#include #define SSBI_REG_ADDR_IRQ_BASE 0x1BB @@ -53,11 +53,12 @@ #define REG_HWREV 0x002 /* PMIC4 revision */ #define REG_HWREV_2 0x0E8 /* PMIC4 revision 2 */ +#define PM8921_NR_IRQS 256 + struct pm_irq_chip { struct device *dev; spinlock_t pm_irq_lock; - unsigned int devirq; - unsigned int irq_base; + struct irq_domain *irqdomain; unsigned int num_irqs; unsigned int num_blocks; unsigned int num_masters; @@ -138,7 +139,7 @@ static int pm8xxx_irq_block_handler(struct pm_irq_chip *chip, int block) for (i = 0; i < 8; i++) { if (bits & (1 << i)) { pmirq = block * 8 + i; - irq = pmirq + chip->irq_base; + irq = irq_find_mapping(chip->irqdomain, pmirq); generic_handle_irq(irq); } } @@ -197,12 +198,11 @@ static void pm8xxx_irq_handler(unsigned int irq, struct irq_desc *desc) static void pm8xxx_irq_mask_ack(struct irq_data *d) { struct pm_irq_chip *chip = irq_data_get_irq_chip_data(d); - unsigned int pmirq = d->irq - chip->irq_base; - int master, irq_bit; + unsigned int pmirq = irqd_to_hwirq(d); + int irq_bit; u8 block, config; block = pmirq / 8; - master = block / 8; irq_bit = pmirq % 8; config = chip->config[pmirq] | PM_IRQF_MASK_ALL | PM_IRQF_CLR; @@ -212,12 +212,11 @@ static void pm8xxx_irq_mask_ack(struct irq_data *d) static void pm8xxx_irq_unmask(struct irq_data *d) { struct pm_irq_chip *chip = irq_data_get_irq_chip_data(d); - unsigned int pmirq = d->irq - chip->irq_base; - int master, irq_bit; + unsigned int pmirq = irqd_to_hwirq(d); + int irq_bit; u8 block, config; block = pmirq / 8; - master = block / 8; irq_bit = pmirq % 8; config = chip->config[pmirq]; @@ -227,12 +226,11 @@ static void pm8xxx_irq_unmask(struct irq_data *d) static int pm8xxx_irq_set_type(struct irq_data *d, unsigned int flow_type) { struct pm_irq_chip *chip = irq_data_get_irq_chip_data(d); - unsigned int pmirq = d->irq - chip->irq_base; - int master, irq_bit; + unsigned int pmirq = irqd_to_hwirq(d); + int irq_bit; u8 block, config; block = pmirq / 8; - master = block / 8; irq_bit = pmirq % 8; chip->config[pmirq] = (irq_bit << PM_IRQF_BITS_SHIFT) @@ -287,12 +285,9 @@ static int pm8xxx_get_irq_stat(struct pm_irq_chip *chip, int irq) int pmirq, rc; u8 block, bits, bit; unsigned long flags; + struct irq_data *irq_data = irq_get_irq_data(irq); - if (chip == NULL || irq < chip->irq_base || - irq >= chip->irq_base + chip->num_irqs) - return -EINVAL; - - pmirq = irq - chip->irq_base; + pmirq = irq_data->hwirq; block = pmirq / 8; bit = pmirq % 8; @@ -321,67 +316,29 @@ bail_out: return rc; } -static struct pm_irq_chip *pm8xxx_irq_init(struct device *dev, - const struct pm8xxx_irq_platform_data *pdata) -{ - struct pm_irq_chip *chip; - int devirq, rc; - unsigned int pmirq; - - if (!pdata) { - pr_err("No platform data\n"); - return ERR_PTR(-EINVAL); - } +static struct lock_class_key pm8xxx_irq_lock_class; - devirq = pdata->devirq; - if (devirq < 0) { - pr_err("missing devirq\n"); - rc = devirq; - return ERR_PTR(-EINVAL); - } - - chip = kzalloc(sizeof(struct pm_irq_chip) - + sizeof(u8) * pdata->irq_cdata.nirqs, GFP_KERNEL); - if (!chip) { - pr_err("Cannot alloc pm_irq_chip struct\n"); - return ERR_PTR(-EINVAL); - } - - chip->dev = dev; - chip->devirq = devirq; - chip->irq_base = pdata->irq_base; - chip->num_irqs = pdata->irq_cdata.nirqs; - chip->num_blocks = DIV_ROUND_UP(chip->num_irqs, 8); - chip->num_masters = DIV_ROUND_UP(chip->num_blocks, 8); - spin_lock_init(&chip->pm_irq_lock); +static int pm8xxx_irq_domain_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hwirq) +{ + struct pm_irq_chip *chip = d->host_data; - for (pmirq = 0; pmirq < chip->num_irqs; pmirq++) { - irq_set_chip_and_handler(chip->irq_base + pmirq, - &pm8xxx_irq_chip, - handle_level_irq); - irq_set_chip_data(chip->irq_base + pmirq, chip); + irq_set_lockdep_class(irq, &pm8xxx_irq_lock_class); + irq_set_chip_and_handler(irq, &pm8xxx_irq_chip, handle_level_irq); + irq_set_chip_data(irq, chip); #ifdef CONFIG_ARM - set_irq_flags(chip->irq_base + pmirq, IRQF_VALID); + set_irq_flags(irq, IRQF_VALID); #else - irq_set_noprobe(chip->irq_base + pmirq); + irq_set_noprobe(irq); #endif - } - - irq_set_irq_type(devirq, pdata->irq_trigger_flag); - irq_set_handler_data(devirq, chip); - irq_set_chained_handler(devirq, pm8xxx_irq_handler); - irq_set_irq_wake(devirq, 1); - - return chip; -} - -static int pm8xxx_irq_exit(struct pm_irq_chip *chip) -{ - irq_set_chained_handler(chip->devirq, NULL); - kfree(chip); return 0; } +static const struct irq_domain_ops pm8xxx_irq_domain_ops = { + .xlate = irq_domain_xlate_twocell, + .map = pm8xxx_irq_domain_map, +}; + static int pm8921_readb(const struct device *dev, u16 addr, u8 *val) { const struct pm8xxx_drvdata *pm8921_drvdata = dev_get_drvdata(dev); @@ -432,42 +389,19 @@ static struct pm8xxx_drvdata pm8921_drvdata = { .pmic_read_irq_stat = pm8921_read_irq_stat, }; -static int pm8921_add_subdevices(const struct pm8921_platform_data - *pdata, - struct pm8921 *pmic, - u32 rev) -{ - int ret = 0, irq_base = 0; - struct pm_irq_chip *irq_chip; - - if (pdata->irq_pdata) { - pdata->irq_pdata->irq_cdata.nirqs = PM8921_NR_IRQS; - pdata->irq_pdata->irq_cdata.rev = rev; - irq_base = pdata->irq_pdata->irq_base; - irq_chip = pm8xxx_irq_init(pmic->dev, pdata->irq_pdata); - - if (IS_ERR(irq_chip)) { - pr_err("Failed to init interrupts ret=%ld\n", - PTR_ERR(irq_chip)); - return PTR_ERR(irq_chip); - } - pmic->irq_chip = irq_chip; - } - return ret; -} - static int pm8921_probe(struct platform_device *pdev) { - const struct pm8921_platform_data *pdata = dev_get_platdata(&pdev->dev); struct pm8921 *pmic; int rc; u8 val; + unsigned int irq; u32 rev; + struct pm_irq_chip *chip; + unsigned int nirqs = PM8921_NR_IRQS; - if (!pdata) { - pr_err("missing platform data\n"); - return -EINVAL; - } + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; pmic = devm_kzalloc(&pdev->dev, sizeof(struct pm8921), GFP_KERNEL); if (!pmic) { @@ -498,37 +432,55 @@ static int pm8921_probe(struct platform_device *pdev) pm8921_drvdata.pm_chip_data = pmic; platform_set_drvdata(pdev, &pm8921_drvdata); - rc = pm8921_add_subdevices(pdata, pmic, rev); + chip = devm_kzalloc(&pdev->dev, sizeof(*chip) + + sizeof(chip->config[0]) * nirqs, + GFP_KERNEL); + if (!chip) + return -ENOMEM; + + pmic->irq_chip = chip; + chip->dev = &pdev->dev; + chip->num_irqs = nirqs; + chip->num_blocks = DIV_ROUND_UP(chip->num_irqs, 8); + chip->num_masters = DIV_ROUND_UP(chip->num_blocks, 8); + spin_lock_init(&chip->pm_irq_lock); + + chip->irqdomain = irq_domain_add_linear(pdev->dev.of_node, nirqs, + &pm8xxx_irq_domain_ops, + chip); + if (!chip->irqdomain) + return -ENODEV; + + irq_set_handler_data(irq, chip); + irq_set_chained_handler(irq, pm8xxx_irq_handler); + irq_set_irq_wake(irq, 1); + + rc = of_platform_populate(pdev->dev.of_node, NULL, NULL, &pdev->dev); if (rc) { - pr_err("Cannot add subdevices rc=%d\n", rc); - goto err; + irq_set_chained_handler(irq, NULL); + irq_set_handler_data(irq, NULL); + irq_domain_remove(chip->irqdomain); } - /* gpio might not work if no irq device is found */ - WARN_ON(pmic->irq_chip == NULL); + return rc; +} +static int pm8921_remove_child(struct device *dev, void *unused) +{ + platform_device_unregister(to_platform_device(dev)); return 0; - -err: - mfd_remove_devices(pmic->dev); - return rc; } static int pm8921_remove(struct platform_device *pdev) { - struct pm8xxx_drvdata *drvdata; - struct pm8921 *pmic = NULL; - - drvdata = platform_get_drvdata(pdev); - if (drvdata) - pmic = drvdata->pm_chip_data; - if (pmic) { - mfd_remove_devices(pmic->dev); - if (pmic->irq_chip) { - pm8xxx_irq_exit(pmic->irq_chip); - pmic->irq_chip = NULL; - } - } + int irq = platform_get_irq(pdev, 0); + struct pm8921 *pmic = pm8921_drvdata.pm_chip_data; + struct pm_irq_chip *chip = pmic->irq_chip; + + device_for_each_child(&pdev->dev, NULL, pm8921_remove_child); + irq_set_chained_handler(irq, NULL); + irq_set_handler_data(irq, NULL); + irq_domain_remove(chip->irqdomain); return 0; } diff --git a/include/linux/mfd/pm8xxx/irq.h b/include/linux/mfd/pm8xxx/irq.h deleted file mode 100644 index 1a11b02383f0..000000000000 --- a/include/linux/mfd/pm8xxx/irq.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2011, Code Aurora Forum. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 and - * only version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ -/* - * Qualcomm PMIC irq 8xxx driver header file - * - */ - -#ifndef __MFD_PM8XXX_IRQ_H -#define __MFD_PM8XXX_IRQ_H - -#include -#include - -struct pm8xxx_irq_core_data { - u32 rev; - int nirqs; -}; - -struct pm8xxx_irq_platform_data { - int irq_base; - struct pm8xxx_irq_core_data irq_cdata; - int devirq; - int irq_trigger_flag; -}; - -#endif /* __MFD_PM8XXX_IRQ_H */ diff --git a/include/linux/mfd/pm8xxx/pm8921.h b/include/linux/mfd/pm8xxx/pm8921.h deleted file mode 100644 index 00fa3de7659d..000000000000 --- a/include/linux/mfd/pm8xxx/pm8921.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2011, Code Aurora Forum. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 and - * only version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ -/* - * Qualcomm PMIC 8921 driver header file - * - */ - -#ifndef __MFD_PM8921_H -#define __MFD_PM8921_H - -#include - -#define PM8921_NR_IRQS 256 - -struct pm8921_platform_data { - int irq_base; - struct pm8xxx_irq_platform_data *irq_pdata; -}; - -#endif -- cgit v1.2.3-59-g8ed1b From 559c04f6f116075a1ebead411666c7ab47dcc7fe Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 26 Feb 2014 10:59:22 -0800 Subject: mfd: ssbi: Add regmap read/write helpers Add read and write helper functions that the pm8921-core driver can use to read and write ssbi registers via a "no-bus" regmap. Signed-off-by: Stephen Boyd Signed-off-by: Lee Jones --- include/linux/ssbi.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ssbi.h b/include/linux/ssbi.h index bcbb642a7641..087b08a4d333 100644 --- a/include/linux/ssbi.h +++ b/include/linux/ssbi.h @@ -20,4 +20,24 @@ int ssbi_write(struct device *dev, u16 addr, const u8 *buf, int len); int ssbi_read(struct device *dev, u16 addr, u8 *buf, int len); +static inline int +ssbi_reg_read(void *context, unsigned int reg, unsigned int *val) +{ + int ret; + u8 v; + + ret = ssbi_read(context, reg, &v, 1); + if (!ret) + *val = v; + + return ret; +} + +static inline int +ssbi_reg_write(void *context, unsigned int reg, unsigned int val) +{ + u8 v = val; + return ssbi_write(context, reg, &v, 1); +} + #endif -- cgit v1.2.3-59-g8ed1b From 9fc2b9ca99428b54e6bbfd2d7bf0ccc1c594f1f5 Mon Sep 17 00:00:00 2001 From: "Opensource [Steve Twiss]" Date: Thu, 6 Mar 2014 16:40:02 +0000 Subject: mfd: da9063: Upgrade of register definitions to support production silicon This patch updates the register definitions for DA9063 to support the production silicon variant code ID (0x5). These changes are not backwards compatible with the previous register definitions and can only be used with the production variant of DA9063. Signed-off-by: Opensource [Steve Twiss] Signed-off-by: Lee Jones --- include/linux/mfd/da9063/registers.h | 120 ++++++++++++++++++----------------- 1 file changed, 62 insertions(+), 58 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/da9063/registers.h b/include/linux/mfd/da9063/registers.h index 5834813fb5f3..09a85c699da1 100644 --- a/include/linux/mfd/da9063/registers.h +++ b/include/linux/mfd/da9063/registers.h @@ -17,11 +17,7 @@ #define _DA9063_REG_H #define DA9063_I2C_PAGE_SEL_SHIFT 1 - #define DA9063_EVENT_REG_NUM 4 -#define DA9210_EVENT_REG_NUM 2 -#define DA9063_EXT_EVENT_REG_NUM (DA9063_EVENT_REG_NUM + \ - DA9210_EVENT_REG_NUM) /* Page selection I2C or SPI always in the begining of any page. */ /* Page 0 : I2C access 0x000 - 0x0FF SPI access 0x000 - 0x07F */ @@ -61,9 +57,9 @@ #define DA9063_REG_GPIO_10_11 0x1A #define DA9063_REG_GPIO_12_13 0x1B #define DA9063_REG_GPIO_14_15 0x1C -#define DA9063_REG_GPIO_MODE_0_7 0x1D -#define DA9063_REG_GPIO_MODE_8_15 0x1E -#define DA9063_REG_GPIO_SWITCH_CONT 0x1F +#define DA9063_REG_GPIO_MODE0_7 0x1D +#define DA9063_REG_GPIO_MODE8_15 0x1E +#define DA9063_REG_SWITCH_CONT 0x1F /* Regulator Control Registers */ #define DA9063_REG_BCORE2_CONT 0x20 @@ -83,7 +79,7 @@ #define DA9063_REG_LDO9_CONT 0x2E #define DA9063_REG_LDO10_CONT 0x2F #define DA9063_REG_LDO11_CONT 0x30 -#define DA9063_REG_VIB 0x31 +#define DA9063_REG_SUPPLIES 0x31 #define DA9063_REG_DVC_1 0x32 #define DA9063_REG_DVC_2 0x33 @@ -97,9 +93,9 @@ #define DA9063_REG_ADCIN1_RES 0x3A #define DA9063_REG_ADCIN2_RES 0x3B #define DA9063_REG_ADCIN3_RES 0x3C -#define DA9063_REG_MON1_RES 0x3D -#define DA9063_REG_MON2_RES 0x3E -#define DA9063_REG_MON3_RES 0x3F +#define DA9063_REG_MON_A8_RES 0x3D +#define DA9063_REG_MON_A9_RES 0x3E +#define DA9063_REG_MON_A10_RES 0x3F /* RTC Calendar and Alarm Registers */ #define DA9063_REG_COUNT_S 0x40 @@ -108,15 +104,16 @@ #define DA9063_REG_COUNT_D 0x43 #define DA9063_REG_COUNT_MO 0x44 #define DA9063_REG_COUNT_Y 0x45 -#define DA9063_REG_ALARM_MI 0x46 -#define DA9063_REG_ALARM_H 0x47 -#define DA9063_REG_ALARM_D 0x48 -#define DA9063_REG_ALARM_MO 0x49 -#define DA9063_REG_ALARM_Y 0x4A -#define DA9063_REG_SECOND_A 0x4B -#define DA9063_REG_SECOND_B 0x4C -#define DA9063_REG_SECOND_C 0x4D -#define DA9063_REG_SECOND_D 0x4E +#define DA9063_REG_ALARM_S 0x46 +#define DA9063_REG_ALARM_MI 0x47 +#define DA9063_REG_ALARM_H 0x48 +#define DA9063_REG_ALARM_D 0x49 +#define DA9063_REG_ALARM_MO 0x4A +#define DA9063_REG_ALARM_Y 0x4B +#define DA9063_REG_SECOND_A 0x4C +#define DA9063_REG_SECOND_B 0x4D +#define DA9063_REG_SECOND_C 0x4E +#define DA9063_REG_SECOND_D 0x4F /* Sequencer Control Registers */ #define DA9063_REG_SEQ 0x81 @@ -226,35 +223,37 @@ #define DA9063_REG_CONFIG_J 0x10F #define DA9063_REG_CONFIG_K 0x110 #define DA9063_REG_CONFIG_L 0x111 -#define DA9063_REG_MON_REG_1 0x112 -#define DA9063_REG_MON_REG_2 0x113 -#define DA9063_REG_MON_REG_3 0x114 -#define DA9063_REG_MON_REG_4 0x115 -#define DA9063_REG_MON_REG_5 0x116 -#define DA9063_REG_MON_REG_6 0x117 -#define DA9063_REG_TRIM_CLDR 0x118 - +#define DA9063_REG_CONFIG_M 0x112 +#define DA9063_REG_CONFIG_N 0x113 + +#define DA9063_REG_MON_REG_1 0x114 +#define DA9063_REG_MON_REG_2 0x115 +#define DA9063_REG_MON_REG_3 0x116 +#define DA9063_REG_MON_REG_4 0x117 +#define DA9063_REG_MON_REG_5 0x11E +#define DA9063_REG_MON_REG_6 0x11F +#define DA9063_REG_TRIM_CLDR 0x120 /* General Purpose Registers */ -#define DA9063_REG_GP_ID_0 0x119 -#define DA9063_REG_GP_ID_1 0x11A -#define DA9063_REG_GP_ID_2 0x11B -#define DA9063_REG_GP_ID_3 0x11C -#define DA9063_REG_GP_ID_4 0x11D -#define DA9063_REG_GP_ID_5 0x11E -#define DA9063_REG_GP_ID_6 0x11F -#define DA9063_REG_GP_ID_7 0x120 -#define DA9063_REG_GP_ID_8 0x121 -#define DA9063_REG_GP_ID_9 0x122 -#define DA9063_REG_GP_ID_10 0x123 -#define DA9063_REG_GP_ID_11 0x124 -#define DA9063_REG_GP_ID_12 0x125 -#define DA9063_REG_GP_ID_13 0x126 -#define DA9063_REG_GP_ID_14 0x127 -#define DA9063_REG_GP_ID_15 0x128 -#define DA9063_REG_GP_ID_16 0x129 -#define DA9063_REG_GP_ID_17 0x12A -#define DA9063_REG_GP_ID_18 0x12B -#define DA9063_REG_GP_ID_19 0x12C +#define DA9063_REG_GP_ID_0 0x121 +#define DA9063_REG_GP_ID_1 0x122 +#define DA9063_REG_GP_ID_2 0x123 +#define DA9063_REG_GP_ID_3 0x124 +#define DA9063_REG_GP_ID_4 0x125 +#define DA9063_REG_GP_ID_5 0x126 +#define DA9063_REG_GP_ID_6 0x127 +#define DA9063_REG_GP_ID_7 0x128 +#define DA9063_REG_GP_ID_8 0x129 +#define DA9063_REG_GP_ID_9 0x12A +#define DA9063_REG_GP_ID_10 0x12B +#define DA9063_REG_GP_ID_11 0x12C +#define DA9063_REG_GP_ID_12 0x12D +#define DA9063_REG_GP_ID_13 0x12E +#define DA9063_REG_GP_ID_14 0x12F +#define DA9063_REG_GP_ID_15 0x130 +#define DA9063_REG_GP_ID_16 0x131 +#define DA9063_REG_GP_ID_17 0x132 +#define DA9063_REG_GP_ID_18 0x133 +#define DA9063_REG_GP_ID_19 0x134 /* Chip ID and variant */ #define DA9063_REG_CHIP_ID 0x181 @@ -405,8 +404,10 @@ /* DA9063_REG_CONTROL_B (addr=0x0F) */ #define DA9063_CHG_SEL 0x01 #define DA9063_WATCHDOG_PD 0x02 +#define DA9063_RESET_BLINKING 0x04 #define DA9063_NRES_MODE 0x08 #define DA9063_NONKEY_LOCK 0x10 +#define DA9063_BUCK_SLOWSTART 0x80 /* DA9063_REG_CONTROL_C (addr=0x10) */ #define DA9063_DEBOUNCING_MASK 0x07 @@ -466,6 +467,7 @@ #define DA9063_GPADC_PAUSE 0x02 #define DA9063_PMIF_DIS 0x04 #define DA9063_HS2WIRE_DIS 0x08 +#define DA9063_CLDR_PAUSE 0x10 #define DA9063_BBAT_DIS 0x20 #define DA9063_OUT_32K_PAUSE 0x40 #define DA9063_PMCONT_DIS 0x80 @@ -660,7 +662,7 @@ #define DA9063_GPIO15_TYPE_GPO 0x04 #define DA9063_GPIO15_NO_WAKEUP 0x80 -/* DA9063_REG_GPIO_MODE_0_7 (addr=0x1D) */ +/* DA9063_REG_GPIO_MODE0_7 (addr=0x1D) */ #define DA9063_GPIO0_MODE 0x01 #define DA9063_GPIO1_MODE 0x02 #define DA9063_GPIO2_MODE 0x04 @@ -670,7 +672,7 @@ #define DA9063_GPIO6_MODE 0x40 #define DA9063_GPIO7_MODE 0x80 -/* DA9063_REG_GPIO_MODE_8_15 (addr=0x1E) */ +/* DA9063_REG_GPIO_MODE8_15 (addr=0x1E) */ #define DA9063_GPIO8_MODE 0x01 #define DA9063_GPIO9_MODE 0x02 #define DA9063_GPIO10_MODE 0x04 @@ -702,12 +704,12 @@ #define DA9063_SWITCH_SR_5MV 0x10 #define DA9063_SWITCH_SR_10MV 0x20 #define DA9063_SWITCH_SR_50MV 0x30 -#define DA9063_SWITCH_SR_DIS 0x40 +#define DA9063_CORE_SW_INTERNAL 0x40 #define DA9063_CP_EN_MODE 0x80 /* DA9063_REGL_Bxxxx_CONT common bits (addr=0x20-0x25) */ #define DA9063_BUCK_EN 0x01 -#define DA9063_BUCK_GPI_MASK 0x06 +#define DA9063_BUCK_GPI_MASK 0x06 #define DA9063_BUCK_GPI_OFF 0x00 #define DA9063_BUCK_GPI_GPIO1 0x02 #define DA9063_BUCK_GPI_GPIO2 0x04 @@ -841,25 +843,27 @@ #define DA9063_COUNT_YEAR_MASK 0x3F #define DA9063_MONITOR 0x40 -/* DA9063_REG_ALARM_MI (addr=0x46) */ +/* DA9063_REG_ALARM_S (addr=0x46) */ +#define DA9063_ALARM_S_MASK 0x3F #define DA9063_ALARM_STATUS_ALARM 0x80 #define DA9063_ALARM_STATUS_TICK 0x40 +/* DA9063_REG_ALARM_MI (addr=0x47) */ #define DA9063_ALARM_MIN_MASK 0x3F -/* DA9063_REG_ALARM_H (addr=0x47) */ +/* DA9063_REG_ALARM_H (addr=0x48) */ #define DA9063_ALARM_HOUR_MASK 0x1F -/* DA9063_REG_ALARM_D (addr=0x48) */ +/* DA9063_REG_ALARM_D (addr=0x49) */ #define DA9063_ALARM_DAY_MASK 0x1F -/* DA9063_REG_ALARM_MO (addr=0x49) */ +/* DA9063_REG_ALARM_MO (addr=0x4A) */ #define DA9063_TICK_WAKE 0x20 #define DA9063_TICK_TYPE 0x10 #define DA9063_TICK_TYPE_SEC 0x00 #define DA9063_TICK_TYPE_MIN 0x10 #define DA9063_ALARM_MONTH_MASK 0x0F -/* DA9063_REG_ALARM_Y (addr=0x4A) */ +/* DA9063_REG_ALARM_Y (addr=0x4B) */ #define DA9063_TICK_ON 0x80 #define DA9063_ALARM_ON 0x40 #define DA9063_ALARM_YEAR_MASK 0x3F @@ -906,7 +910,7 @@ /* DA9063_REG_Bxxxx_CFG common bits (addr=0x9D-0xA2) */ #define DA9063_BUCK_FB_MASK 0x07 -#define DA9063_BUCK_PD_DIS_SHIFT 5 +#define DA9063_BUCK_PD_DIS_MASK 0x20 #define DA9063_BUCK_MODE_MASK 0xC0 #define DA9063_BUCK_MODE_MANUAL 0x00 #define DA9063_BUCK_MODE_SLEEP 0x40 -- cgit v1.2.3-59-g8ed1b From 037b60f2ca9403d02cbee60ad0a10f3806f9dc4b Mon Sep 17 00:00:00 2001 From: Matt Porter Date: Tue, 11 Mar 2014 19:33:54 -0400 Subject: mfd: Add bcm590xx pmu driver Add a driver for the BCM590xx PMU multi-function devices. The driver initially supports regmap initialization and instantiation of the voltage regulator device function of the PMU. Signed-off-by: Matt Porter Reviewed-by: Tim Kryger Reviewed-by: Markus Mayer Signed-off-by: Lee Jones --- drivers/mfd/Kconfig | 8 ++++ drivers/mfd/Makefile | 1 + drivers/mfd/bcm590xx.c | 93 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/mfd/bcm590xx.h | 31 +++++++++++++++ 4 files changed, 133 insertions(+) create mode 100644 drivers/mfd/bcm590xx.c create mode 100644 include/linux/mfd/bcm590xx.h (limited to 'include/linux') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index d22ca0fc873f..7587c9e1a519 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -59,6 +59,14 @@ config MFD_AAT2870_CORE additional drivers must be enabled in order to use the functionality of the device. +config MFD_BCM590XX + tristate "Broadcom BCM590xx PMUs" + select MFD_CORE + select REGMAP_I2C + depends on I2C + help + Support for the BCM590xx PMUs from Broadcom + config MFD_CROS_EC tristate "ChromeOS Embedded Controller" select MFD_CORE diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 89a7951503ca..23835dfa615c 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_MFD_88PM800) += 88pm800.o 88pm80x.o obj-$(CONFIG_MFD_88PM805) += 88pm805.o 88pm80x.o obj-$(CONFIG_MFD_SM501) += sm501.o obj-$(CONFIG_MFD_ASIC3) += asic3.o tmio_core.o +obj-$(CONFIG_MFD_BCM590XX) += bcm590xx.o obj-$(CONFIG_MFD_CROS_EC) += cros_ec.o obj-$(CONFIG_MFD_CROS_EC_I2C) += cros_ec_i2c.o obj-$(CONFIG_MFD_CROS_EC_SPI) += cros_ec_spi.o diff --git a/drivers/mfd/bcm590xx.c b/drivers/mfd/bcm590xx.c new file mode 100644 index 000000000000..926a57e4d533 --- /dev/null +++ b/drivers/mfd/bcm590xx.c @@ -0,0 +1,93 @@ +/* + * Broadcom BCM590xx PMU + * + * Copyright 2014 Linaro Limited + * Author: Matt Porter + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const struct mfd_cell bcm590xx_devs[] = { + { + .name = "bcm590xx-vregs", + }, +}; + +static const struct regmap_config bcm590xx_regmap_config = { + .reg_bits = 8, + .val_bits = 8, + .max_register = BCM590XX_MAX_REGISTER, + .cache_type = REGCACHE_RBTREE, +}; + +static int bcm590xx_i2c_probe(struct i2c_client *i2c, + const struct i2c_device_id *id) +{ + struct bcm590xx *bcm590xx; + int ret; + + bcm590xx = devm_kzalloc(&i2c->dev, sizeof(*bcm590xx), GFP_KERNEL); + if (!bcm590xx) + return -ENOMEM; + + i2c_set_clientdata(i2c, bcm590xx); + bcm590xx->dev = &i2c->dev; + bcm590xx->i2c_client = i2c; + + bcm590xx->regmap = devm_regmap_init_i2c(i2c, &bcm590xx_regmap_config); + if (IS_ERR(bcm590xx->regmap)) { + ret = PTR_ERR(bcm590xx->regmap); + dev_err(&i2c->dev, "regmap initialization failed: %d\n", ret); + return ret; + } + + ret = mfd_add_devices(&i2c->dev, -1, bcm590xx_devs, + ARRAY_SIZE(bcm590xx_devs), NULL, 0, NULL); + if (ret < 0) + dev_err(&i2c->dev, "failed to add sub-devices: %d\n", ret); + + return ret; +} + +static const struct of_device_id bcm590xx_of_match[] = { + { .compatible = "brcm,bcm59056" }, + { } +}; +MODULE_DEVICE_TABLE(i2c, bcm590xx_of_match); + +static const struct i2c_device_id bcm590xx_i2c_id[] = { + { "bcm59056" }, + { } +}; +MODULE_DEVICE_TABLE(i2c, bcm590xx_i2c_id); + +static struct i2c_driver bcm590xx_i2c_driver = { + .driver = { + .name = "bcm590xx", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(bcm590xx_of_match), + }, + .probe = bcm590xx_i2c_probe, + .id_table = bcm590xx_i2c_id, +}; +module_i2c_driver(bcm590xx_i2c_driver); + +MODULE_AUTHOR("Matt Porter "); +MODULE_DESCRIPTION("BCM590xx multi-function driver"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:bcm590xx"); diff --git a/include/linux/mfd/bcm590xx.h b/include/linux/mfd/bcm590xx.h new file mode 100644 index 000000000000..434df2d4e587 --- /dev/null +++ b/include/linux/mfd/bcm590xx.h @@ -0,0 +1,31 @@ +/* + * Broadcom BCM590xx PMU + * + * Copyright 2014 Linaro Limited + * Author: Matt Porter + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifndef __LINUX_MFD_BCM590XX_H +#define __LINUX_MFD_BCM590XX_H + +#include +#include +#include + +/* max register address */ +#define BCM590XX_MAX_REGISTER 0xe7 + +struct bcm590xx { + struct device *dev; + struct i2c_client *i2c_client; + struct regmap *regmap; + unsigned int id; +}; + +#endif /* __LINUX_MFD_BCM590XX_H */ -- cgit v1.2.3-59-g8ed1b From eb71d4dec4a5e010e34b9d7afdb5af41884c388e Mon Sep 17 00:00:00 2001 From: Peter Tyser Date: Mon, 10 Mar 2014 16:34:54 -0500 Subject: mfd: lpc_ich: Add support for iTCO v3 Some newer Atom CPUs, eg Avoton and Bay Trail, use slightly different register layouts for the iTCO than the current v1 and v2 iTCO. Differences from previous iTCO versions include: - The ACPI space is enabled in the "ACPI base address" register instead of the "ACPI control register" - The "no reboot" functionality is set in the "Power Management Configuration" register instead of the "General Control and Status" (GCS) register or PCI configuration space. - The "ACPI Control Register" is not present on v3. The "Power Management Configuration Base Address" register resides at the same address is Avoton/Bay Trail. To differentiate these newer chipsets create a new v3 iTCO version and update the MFD driver to support them. Signed-off-by: Peter Tyser Tested-by: Rajat Jain Reviewed-by: Guenter Roeck Signed-off-by: Lee Jones --- drivers/mfd/lpc_ich.c | 81 +++++++++++++++++++++++++++++++++++++-------- include/linux/mfd/lpc_ich.h | 8 ++--- 2 files changed, 71 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/mfd/lpc_ich.c b/drivers/mfd/lpc_ich.c index b24bae2bcdea..c0683370abbf 100644 --- a/drivers/mfd/lpc_ich.c +++ b/drivers/mfd/lpc_ich.c @@ -71,9 +71,11 @@ #define ACPIBASE_GPE_END 0x2f #define ACPIBASE_SMI_OFF 0x30 #define ACPIBASE_SMI_END 0x33 +#define ACPIBASE_PMC_OFF 0x08 +#define ACPIBASE_PMC_END 0x0c #define ACPIBASE_TCO_OFF 0x60 #define ACPIBASE_TCO_END 0x7f -#define ACPICTRL 0x44 +#define ACPICTRL_PMCBASE 0x44 #define ACPIBASE_GCS_OFF 0x3410 #define ACPIBASE_GCS_END 0x3414 @@ -93,11 +95,12 @@ struct lpc_ich_priv { int chipset; int abase; /* ACPI base */ - int actrl; /* ACPI control or PMC base */ + int actrl_pbase; /* ACPI control or PMC base */ int gbase; /* GPIO base */ int gctrl; /* GPIO control */ - int actrl_save; /* Cached ACPI control base value */ + int abase_save; /* Cached ACPI base value */ + int actrl_pbase_save; /* Cached ACPI control or PMC base value */ int gctrl_save; /* Cached GPIO control value */ }; @@ -110,7 +113,7 @@ static struct resource wdt_ich_res[] = { { .flags = IORESOURCE_IO, }, - /* GCS */ + /* GCS or PMC */ { .flags = IORESOURCE_MEM, }, @@ -742,9 +745,15 @@ static void lpc_ich_restore_config_space(struct pci_dev *dev) { struct lpc_ich_priv *priv = pci_get_drvdata(dev); - if (priv->actrl_save >= 0) { - pci_write_config_byte(dev, priv->actrl, priv->actrl_save); - priv->actrl_save = -1; + if (priv->abase_save >= 0) { + pci_write_config_byte(dev, priv->abase, priv->abase_save); + priv->abase_save = -1; + } + + if (priv->actrl_pbase_save >= 0) { + pci_write_config_byte(dev, priv->actrl_pbase, + priv->actrl_pbase_save); + priv->actrl_pbase_save = -1; } if (priv->gctrl_save >= 0) { @@ -758,9 +767,26 @@ static void lpc_ich_enable_acpi_space(struct pci_dev *dev) struct lpc_ich_priv *priv = pci_get_drvdata(dev); u8 reg_save; - pci_read_config_byte(dev, priv->actrl, ®_save); - pci_write_config_byte(dev, priv->actrl, reg_save | 0x80); - priv->actrl_save = reg_save; + switch (lpc_chipset_info[priv->chipset].iTCO_version) { + case 3: + /* + * Some chipsets (eg Avoton) enable the ACPI space in the + * ACPI BASE register. + */ + pci_read_config_byte(dev, priv->abase, ®_save); + pci_write_config_byte(dev, priv->abase, reg_save | 0x2); + priv->abase_save = reg_save; + break; + default: + /* + * Most chipsets enable the ACPI space in the ACPI control + * register. + */ + pci_read_config_byte(dev, priv->actrl_pbase, ®_save); + pci_write_config_byte(dev, priv->actrl_pbase, reg_save | 0x80); + priv->actrl_pbase_save = reg_save; + break; + } } static void lpc_ich_enable_gpio_space(struct pci_dev *dev) @@ -773,6 +799,17 @@ static void lpc_ich_enable_gpio_space(struct pci_dev *dev) priv->gctrl_save = reg_save; } +static void lpc_ich_enable_pmc_space(struct pci_dev *dev) +{ + struct lpc_ich_priv *priv = pci_get_drvdata(dev); + u8 reg_save; + + pci_read_config_byte(dev, priv->actrl_pbase, ®_save); + pci_write_config_byte(dev, priv->actrl_pbase, reg_save | 0x2); + + priv->actrl_pbase_save = reg_save; +} + static void lpc_ich_finalize_cell(struct pci_dev *dev, struct mfd_cell *cell) { struct lpc_ich_priv *priv = pci_get_drvdata(dev); @@ -910,14 +947,20 @@ static int lpc_ich_init_wdt(struct pci_dev *dev) lpc_ich_enable_acpi_space(dev); /* + * iTCO v2: * Get the Memory-Mapped GCS register. To get access to it * we have to read RCBA from PCI Config space 0xf0 and use * it as base. GCS = RCBA + ICH6_GCS(0x3410). + * + * iTCO v3: + * Get the Power Management Configuration register. To get access + * to it we have to read the PMC BASE from config space and address + * the register at offset 0x8. */ if (lpc_chipset_info[priv->chipset].iTCO_version == 1) { /* Don't register iomem for TCO ver 1 */ lpc_ich_cells[LPC_WDT].num_resources--; - } else { + } else if (lpc_chipset_info[priv->chipset].iTCO_version == 2) { pci_read_config_dword(dev, RCBABASE, &base_addr_cfg); base_addr = base_addr_cfg & 0xffffc000; if (!(base_addr_cfg & 1)) { @@ -926,9 +969,17 @@ static int lpc_ich_init_wdt(struct pci_dev *dev) ret = -ENODEV; goto wdt_done; } - res = wdt_mem_res(ICH_RES_MEM_GCS); + res = wdt_mem_res(ICH_RES_MEM_GCS_PMC); res->start = base_addr + ACPIBASE_GCS_OFF; res->end = base_addr + ACPIBASE_GCS_END; + } else if (lpc_chipset_info[priv->chipset].iTCO_version == 3) { + lpc_ich_enable_pmc_space(dev); + pci_read_config_dword(dev, ACPICTRL_PMCBASE, &base_addr_cfg); + base_addr = base_addr_cfg & 0xfffffe00; + + res = wdt_mem_res(ICH_RES_MEM_GCS_PMC); + res->start = base_addr + ACPIBASE_PMC_OFF; + res->end = base_addr + ACPIBASE_PMC_END; } lpc_ich_finalize_cell(dev, &lpc_ich_cells[LPC_WDT]); @@ -953,9 +1004,11 @@ static int lpc_ich_probe(struct pci_dev *dev, priv->chipset = id->driver_data; - priv->actrl_save = -1; + priv->actrl_pbase_save = -1; + priv->abase_save = -1; + priv->abase = ACPIBASE; - priv->actrl = ACPICTRL; + priv->actrl_pbase = ACPICTRL_PMCBASE; priv->gctrl_save = -1; if (priv->chipset <= LPC_ICH5) { diff --git a/include/linux/mfd/lpc_ich.h b/include/linux/mfd/lpc_ich.h index b2364dd026a6..8feac782fa83 100644 --- a/include/linux/mfd/lpc_ich.h +++ b/include/linux/mfd/lpc_ich.h @@ -21,10 +21,10 @@ #define LPC_ICH_H /* Watchdog resources */ -#define ICH_RES_IO_TCO 0 -#define ICH_RES_IO_SMI 1 -#define ICH_RES_MEM_OFF 2 -#define ICH_RES_MEM_GCS 0 +#define ICH_RES_IO_TCO 0 +#define ICH_RES_IO_SMI 1 +#define ICH_RES_MEM_OFF 2 +#define ICH_RES_MEM_GCS_PMC 0 /* GPIO resources */ #define ICH_RES_GPIO 0 -- cgit v1.2.3-59-g8ed1b From 93ae4f978ca7f26d17df915ac7afc919c1dd0353 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Tue, 11 Mar 2014 02:04:14 +0530 Subject: CPU hotplug: Provide lockless versions of callback registration functions The following method of CPU hotplug callback registration is not safe due to the possibility of an ABBA deadlock involving the cpu_add_remove_lock and the cpu_hotplug.lock. get_online_cpus(); for_each_online_cpu(cpu) init_cpu(cpu); register_cpu_notifier(&foobar_cpu_notifier); put_online_cpus(); The deadlock is shown below: CPU 0 CPU 1 ----- ----- Acquire cpu_hotplug.lock [via get_online_cpus()] CPU online/offline operation takes cpu_add_remove_lock [via cpu_maps_update_begin()] Try to acquire cpu_add_remove_lock [via register_cpu_notifier()] CPU online/offline operation tries to acquire cpu_hotplug.lock [via cpu_hotplug_begin()] *** DEADLOCK! *** The problem here is that callback registration takes the locks in one order whereas the CPU hotplug operations take the same locks in the opposite order. To avoid this issue and to provide a race-free method to register CPU hotplug callbacks (along with initialization of already online CPUs), introduce new variants of the callback registration APIs that simply register the callbacks without holding the cpu_add_remove_lock during the registration. That way, we can avoid the ABBA scenario. However, we will need to hold the cpu_add_remove_lock throughout the entire critical section, to protect updates to the callback/notifier chain. This can be achieved by writing the callback registration code as follows: cpu_maps_update_begin(); [ or cpu_notifier_register_begin(); see below ] for_each_online_cpu(cpu) init_cpu(cpu); /* This doesn't take the cpu_add_remove_lock */ __register_cpu_notifier(&foobar_cpu_notifier); cpu_maps_update_done(); [ or cpu_notifier_register_done(); see below ] Note that we can't use get_online_cpus() here instead of cpu_maps_update_begin() because the cpu_hotplug.lock is dropped during the invocation of CPU_POST_DEAD notifiers, and hence get_online_cpus() cannot provide the necessary synchronization to protect the callback/notifier chains against concurrent reads and writes. On the other hand, since the cpu_add_remove_lock protects the entire hotplug operation (including CPU_POST_DEAD), we can use cpu_maps_update_begin/done() to guarantee proper synchronization. Also, since cpu_maps_update_begin/done() is like a super-set of get/put_online_cpus(), the former naturally protects the critical sections from concurrent hotplug operations. Since the names cpu_maps_update_begin/done() don't make much sense in CPU hotplug callback registration scenarios, we'll introduce new APIs named cpu_notifier_register_begin/done() and map them to cpu_maps_update_begin/done(). In summary, introduce the lockless variants of un/register_cpu_notifier() and also export the cpu_notifier_register_begin/done() APIs for use by modules. This way, we provide a race-free way to register hotplug callbacks as well as perform initialization for the CPUs that are already online. Cc: Thomas Gleixner Cc: Andrew Morton Cc: Peter Zijlstra Cc: Ingo Molnar Acked-by: Oleg Nesterov Acked-by: Toshi Kani Reviewed-by: Gautham R. Shenoy Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki --- include/linux/cpu.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++ kernel/cpu.c | 21 +++++++++++++++++++-- 2 files changed, 66 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 03e235ad1bba..488d6ebcf6a1 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -122,26 +122,46 @@ enum { { .notifier_call = fn, .priority = pri }; \ register_cpu_notifier(&fn##_nb); \ } + +#define __cpu_notifier(fn, pri) { \ + static struct notifier_block fn##_nb = \ + { .notifier_call = fn, .priority = pri }; \ + __register_cpu_notifier(&fn##_nb); \ +} #else /* #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ #define cpu_notifier(fn, pri) do { (void)(fn); } while (0) +#define __cpu_notifier(fn, pri) do { (void)(fn); } while (0) #endif /* #else #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ + #ifdef CONFIG_HOTPLUG_CPU extern int register_cpu_notifier(struct notifier_block *nb); +extern int __register_cpu_notifier(struct notifier_block *nb); extern void unregister_cpu_notifier(struct notifier_block *nb); +extern void __unregister_cpu_notifier(struct notifier_block *nb); #else #ifndef MODULE extern int register_cpu_notifier(struct notifier_block *nb); +extern int __register_cpu_notifier(struct notifier_block *nb); #else static inline int register_cpu_notifier(struct notifier_block *nb) { return 0; } + +static inline int __register_cpu_notifier(struct notifier_block *nb) +{ + return 0; +} #endif static inline void unregister_cpu_notifier(struct notifier_block *nb) { } + +static inline void __unregister_cpu_notifier(struct notifier_block *nb) +{ +} #endif int cpu_up(unsigned int cpu); @@ -149,19 +169,32 @@ void notify_cpu_starting(unsigned int cpu); extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); +#define cpu_notifier_register_begin cpu_maps_update_begin +#define cpu_notifier_register_done cpu_maps_update_done + #else /* CONFIG_SMP */ #define cpu_notifier(fn, pri) do { (void)(fn); } while (0) +#define __cpu_notifier(fn, pri) do { (void)(fn); } while (0) static inline int register_cpu_notifier(struct notifier_block *nb) { return 0; } +static inline int __register_cpu_notifier(struct notifier_block *nb) +{ + return 0; +} + static inline void unregister_cpu_notifier(struct notifier_block *nb) { } +static inline void __unregister_cpu_notifier(struct notifier_block *nb) +{ +} + static inline void cpu_maps_update_begin(void) { } @@ -170,6 +203,14 @@ static inline void cpu_maps_update_done(void) { } +static inline void cpu_notifier_register_begin(void) +{ +} + +static inline void cpu_notifier_register_done(void) +{ +} + #endif /* CONFIG_SMP */ extern struct bus_type cpu_subsys; @@ -183,8 +224,11 @@ extern void put_online_cpus(void); extern void cpu_hotplug_disable(void); extern void cpu_hotplug_enable(void); #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri) +#define __hotcpu_notifier(fn, pri) __cpu_notifier(fn, pri) #define register_hotcpu_notifier(nb) register_cpu_notifier(nb) +#define __register_hotcpu_notifier(nb) __register_cpu_notifier(nb) #define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb) +#define __unregister_hotcpu_notifier(nb) __unregister_cpu_notifier(nb) void clear_tasks_mm_cpumask(int cpu); int cpu_down(unsigned int cpu); @@ -197,9 +241,12 @@ static inline void cpu_hotplug_done(void) {} #define cpu_hotplug_disable() do { } while (0) #define cpu_hotplug_enable() do { } while (0) #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) +#define __hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) /* These aren't inline functions due to a GCC bug. */ #define register_hotcpu_notifier(nb) ({ (void)(nb); 0; }) +#define __register_hotcpu_notifier(nb) ({ (void)(nb); 0; }) #define unregister_hotcpu_notifier(nb) ({ (void)(nb); }) +#define __unregister_hotcpu_notifier(nb) ({ (void)(nb); }) #endif /* CONFIG_HOTPLUG_CPU */ #ifdef CONFIG_PM_SLEEP_SMP diff --git a/kernel/cpu.c b/kernel/cpu.c index 33caf5e97701..a9e710eef0e2 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -28,18 +28,23 @@ static DEFINE_MUTEX(cpu_add_remove_lock); /* - * The following two API's must be used when attempting - * to serialize the updates to cpu_online_mask, cpu_present_mask. + * The following two APIs (cpu_maps_update_begin/done) must be used when + * attempting to serialize the updates to cpu_online_mask & cpu_present_mask. + * The APIs cpu_notifier_register_begin/done() must be used to protect CPU + * hotplug callback (un)registration performed using __register_cpu_notifier() + * or __unregister_cpu_notifier(). */ void cpu_maps_update_begin(void) { mutex_lock(&cpu_add_remove_lock); } +EXPORT_SYMBOL(cpu_notifier_register_begin); void cpu_maps_update_done(void) { mutex_unlock(&cpu_add_remove_lock); } +EXPORT_SYMBOL(cpu_notifier_register_done); static RAW_NOTIFIER_HEAD(cpu_chain); @@ -183,6 +188,11 @@ int __ref register_cpu_notifier(struct notifier_block *nb) return ret; } +int __ref __register_cpu_notifier(struct notifier_block *nb) +{ + return raw_notifier_chain_register(&cpu_chain, nb); +} + static int __cpu_notify(unsigned long val, void *v, int nr_to_call, int *nr_calls) { @@ -206,6 +216,7 @@ static void cpu_notify_nofail(unsigned long val, void *v) BUG_ON(cpu_notify(val, v)); } EXPORT_SYMBOL(register_cpu_notifier); +EXPORT_SYMBOL(__register_cpu_notifier); void __ref unregister_cpu_notifier(struct notifier_block *nb) { @@ -215,6 +226,12 @@ void __ref unregister_cpu_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(unregister_cpu_notifier); +void __ref __unregister_cpu_notifier(struct notifier_block *nb) +{ + raw_notifier_chain_unregister(&cpu_chain, nb); +} +EXPORT_SYMBOL(__unregister_cpu_notifier); + /** * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU * @cpu: a CPU id -- cgit v1.2.3-59-g8ed1b From f0bdb5e0c72b7347c867da539367138ad95c6b24 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Tue, 11 Mar 2014 02:04:39 +0530 Subject: CPU hotplug, perf: Fix CPU hotplug callback registration Subsystems that want to register CPU hotplug callbacks, as well as perform initialization for the CPUs that are already online, often do it as shown below: get_online_cpus(); for_each_online_cpu(cpu) init_cpu(cpu); register_cpu_notifier(&foobar_cpu_notifier); put_online_cpus(); This is wrong, since it is prone to ABBA deadlocks involving the cpu_add_remove_lock and the cpu_hotplug.lock (when running concurrently with CPU hotplug operations). Instead, the correct and race-free way of performing the callback registration is: cpu_notifier_register_begin(); for_each_online_cpu(cpu) init_cpu(cpu); /* Note the use of the double underscored version of the API */ __register_cpu_notifier(&foobar_cpu_notifier); cpu_notifier_register_done(); Fix the perf subsystem's hotplug notifier by using this latter form of callback registration. Also provide a bare-bones version of perf_cpu_notifier() that doesn't invoke the notifiers for the already online CPUs. This would be useful for subsystems that need to perform a different set of initialization for the already online CPUs, or don't need the initialization altogether. Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki --- include/linux/perf_event.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index e56b07f5c9b6..3356abcfff18 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -835,6 +835,8 @@ do { \ { .notifier_call = fn, .priority = CPU_PRI_PERF }; \ unsigned long cpu = smp_processor_id(); \ unsigned long flags; \ + \ + cpu_notifier_register_begin(); \ fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE, \ (void *)(unsigned long)cpu); \ local_irq_save(flags); \ @@ -843,9 +845,21 @@ do { \ local_irq_restore(flags); \ fn(&fn##_nb, (unsigned long)CPU_ONLINE, \ (void *)(unsigned long)cpu); \ - register_cpu_notifier(&fn##_nb); \ + __register_cpu_notifier(&fn##_nb); \ + cpu_notifier_register_done(); \ } while (0) +/* + * Bare-bones version of perf_cpu_notifier(), which doesn't invoke the + * callback for already online CPUs. + */ +#define __perf_cpu_notifier(fn) \ +do { \ + static struct notifier_block fn##_nb = \ + { .notifier_call = fn, .priority = CPU_PRI_PERF }; \ + \ + __register_cpu_notifier(&fn##_nb); \ +} while (0) struct perf_pmu_events_attr { struct device_attribute attr; -- cgit v1.2.3-59-g8ed1b From 099dd235113700bbb476e572cd191ddb77b9af46 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 28 Feb 2014 20:36:55 -0800 Subject: audit: Send replies in the proper network namespace. In perverse cases of file descriptor passing the current network namespace of a process and the network namespace of a socket used by that socket may differ. Therefore use the network namespace of the appropiate socket to ensure replies always go to the appropiate socket. Signed-off-by: "Eric W. Biederman" Acked-by: Richard Guy Briggs Signed-off-by: Eric Paris --- include/linux/audit.h | 3 ++- kernel/audit.c | 21 ++++++++++----------- kernel/auditfilter.c | 7 +++++-- 3 files changed, 17 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index aa865a9a4c4f..ec1464df4c60 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -43,6 +43,7 @@ struct mq_attr; struct mqstat; struct audit_watch; struct audit_tree; +struct sk_buff; struct audit_krule { int vers_ops; @@ -463,7 +464,7 @@ extern int audit_filter_user(int type); extern int audit_filter_type(int type); extern int audit_rule_change(int type, __u32 portid, int seq, void *data, size_t datasz); -extern int audit_list_rules_send(__u32 portid, int seq); +extern int audit_list_rules_send(struct sk_buff *request_skb, int seq); extern u32 audit_enabled; #else /* CONFIG_AUDIT */ diff --git a/kernel/audit.c b/kernel/audit.c index 71fb41f393d7..7b44bd47759c 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -570,9 +570,11 @@ static int audit_send_reply_thread(void *arg) * Allocates an skb, builds the netlink message, and sends it to the port id. * No failure notifications. */ -static void audit_send_reply(__u32 portid, int seq, int type, int done, +static void audit_send_reply(struct sk_buff *request_skb, int seq, int type, int done, int multi, const void *payload, int size) { + u32 portid = NETLINK_CB(request_skb).portid; + struct net *net = sock_net(NETLINK_CB(request_skb).sk); struct sk_buff *skb; struct task_struct *tsk; struct audit_reply *reply = kmalloc(sizeof(struct audit_reply), @@ -585,7 +587,7 @@ static void audit_send_reply(__u32 portid, int seq, int type, int done, if (!skb) goto out; - reply->net = get_net(current->nsproxy->net_ns); + reply->net = get_net(net); reply->portid = portid; reply->skb = skb; @@ -675,8 +677,7 @@ static int audit_get_feature(struct sk_buff *skb) seq = nlmsg_hdr(skb)->nlmsg_seq; - audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0, - &af, sizeof(af)); + audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &af, sizeof(af)); return 0; } @@ -796,8 +797,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) s.backlog = skb_queue_len(&audit_skb_queue); s.version = AUDIT_VERSION_LATEST; s.backlog_wait_time = audit_backlog_wait_time; - audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0, - &s, sizeof(s)); + audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s)); break; } case AUDIT_SET: { @@ -907,7 +907,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) seq, data, nlmsg_len(nlh)); break; case AUDIT_LIST_RULES: - err = audit_list_rules_send(NETLINK_CB(skb).portid, seq); + err = audit_list_rules_send(skb, seq); break; case AUDIT_TRIM: audit_trim_trees(); @@ -972,8 +972,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) memcpy(sig_data->ctx, ctx, len); security_release_secctx(ctx, len); } - audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_SIGNAL_INFO, - 0, 0, sig_data, sizeof(*sig_data) + len); + audit_send_reply(skb, seq, AUDIT_SIGNAL_INFO, 0, 0, + sig_data, sizeof(*sig_data) + len); kfree(sig_data); break; case AUDIT_TTY_GET: { @@ -985,8 +985,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) s.log_passwd = tsk->signal->audit_tty_log_passwd; spin_unlock(&tsk->sighand->siglock); - audit_send_reply(NETLINK_CB(skb).portid, seq, - AUDIT_TTY_GET, 0, 0, &s, sizeof(s)); + audit_send_reply(skb, seq, AUDIT_TTY_GET, 0, 0, &s, sizeof(s)); break; } case AUDIT_TTY_SET: { diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index a0d470131fd0..549bbb6e6597 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "audit.h" /* @@ -1071,8 +1072,10 @@ int audit_rule_change(int type, __u32 portid, int seq, void *data, * @portid: target portid for netlink audit messages * @seq: netlink audit message sequence (serial) number */ -int audit_list_rules_send(__u32 portid, int seq) +int audit_list_rules_send(struct sk_buff *request_skb, int seq) { + u32 portid = NETLINK_CB(request_skb).portid; + struct net *net = sock_net(NETLINK_CB(request_skb).sk); struct task_struct *tsk; struct audit_netlink_list *dest; int err = 0; @@ -1086,7 +1089,7 @@ int audit_list_rules_send(__u32 portid, int seq) dest = kmalloc(sizeof(struct audit_netlink_list), GFP_KERNEL); if (!dest) return -ENOMEM; - dest->net = get_net(current->nsproxy->net_ns); + dest->net = get_net(net); dest->portid = portid; skb_queue_head_init(&dest->q); -- cgit v1.2.3-59-g8ed1b From 4b58841149dcaa500ceba1d5378ae70622fe4899 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Sat, 15 Mar 2014 14:48:00 +0900 Subject: audit: Add generic compat syscall support lib/audit.c provides a generic function for auditing system calls. This patch extends it for compat syscall support on bi-architectures (32/64-bit) by adding lib/compat_audit.c. What is required to support this feature are: * add asm/unistd32.h for compat system call names * select CONFIG_AUDIT_ARCH_COMPAT_GENERIC Signed-off-by: AKASHI Takahiro Acked-by: Richard Guy Briggs Signed-off-by: Eric Paris --- include/linux/audit.h | 8 ++++++++ include/uapi/linux/audit.h | 6 ++++++ lib/Kconfig | 9 +++++++++ lib/Makefile | 1 + lib/audit.c | 15 +++++++++++++- lib/compat_audit.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 lib/compat_audit.c (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index ec1464df4c60..4b2983e25ce0 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -79,6 +79,14 @@ extern int is_audit_feature_set(int which); extern int __init audit_register_class(int class, unsigned *list); extern int audit_classify_syscall(int abi, unsigned syscall); extern int audit_classify_arch(int arch); +/* only for compat system calls */ +extern unsigned compat_write_class[]; +extern unsigned compat_read_class[]; +extern unsigned compat_dir_class[]; +extern unsigned compat_chattr_class[]; +extern unsigned compat_signal_class[]; + +extern int __weak audit_classify_compat_syscall(int abi, unsigned syscall); /* audit_names->type values */ #define AUDIT_TYPE_UNKNOWN 0 /* we don't know yet */ diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 4315ee99b967..9af01d77dc44 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -362,6 +362,12 @@ enum { #define AUDIT_ARCH_SPARC64 (EM_SPARCV9|__AUDIT_ARCH_64BIT) #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) +#ifdef CONFIG_COMPAT +#define audit_is_compat(arch) (!((arch) & __AUDIT_ARCH_64BIT)) +#else +#define audit_is_compat(arch) false +#endif + #define AUDIT_PERM_EXEC 1 #define AUDIT_PERM_WRITE 2 #define AUDIT_PERM_READ 4 diff --git a/lib/Kconfig b/lib/Kconfig index 991c98bc4a3f..1e80cb3c77a9 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -182,6 +182,15 @@ config AUDIT_GENERIC depends on AUDIT && !AUDIT_ARCH default y +config AUDIT_ARCH_COMPAT_GENERIC + bool + default n + +config AUDIT_COMPAT_GENERIC + bool + depends on AUDIT_GENERIC && AUDIT_ARCH_COMPAT_GENERIC && COMPAT + default y + config RANDOM32_SELFTEST bool "PRNG perform self test on init" default n diff --git a/lib/Makefile b/lib/Makefile index a459c31e8c6b..972552b39cf5 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -93,6 +93,7 @@ obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o obj-$(CONFIG_SMP) += percpu_counter.o obj-$(CONFIG_AUDIT_GENERIC) += audit.o +obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o diff --git a/lib/audit.c b/lib/audit.c index 76bbed4a20e5..1d726a22565b 100644 --- a/lib/audit.c +++ b/lib/audit.c @@ -30,11 +30,17 @@ static unsigned signal_class[] = { int audit_classify_arch(int arch) { - return 0; + if (audit_is_compat(arch)) + return 1; + else + return 0; } int audit_classify_syscall(int abi, unsigned syscall) { + if (audit_is_compat(abi)) + return audit_classify_compat_syscall(abi, syscall); + switch(syscall) { #ifdef __NR_open case __NR_open: @@ -57,6 +63,13 @@ int audit_classify_syscall(int abi, unsigned syscall) static int __init audit_classes_init(void) { +#ifdef CONFIG_AUDIT_COMPAT_GENERIC + audit_register_class(AUDIT_CLASS_WRITE_32, compat_write_class); + audit_register_class(AUDIT_CLASS_READ_32, compat_read_class); + audit_register_class(AUDIT_CLASS_DIR_WRITE_32, compat_dir_class); + audit_register_class(AUDIT_CLASS_CHATTR_32, compat_chattr_class); + audit_register_class(AUDIT_CLASS_SIGNAL_32, compat_signal_class); +#endif audit_register_class(AUDIT_CLASS_WRITE, write_class); audit_register_class(AUDIT_CLASS_READ, read_class); audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class); diff --git a/lib/compat_audit.c b/lib/compat_audit.c new file mode 100644 index 000000000000..873f75b640ab --- /dev/null +++ b/lib/compat_audit.c @@ -0,0 +1,50 @@ +#include +#include +#include + +unsigned compat_dir_class[] = { +#include +~0U +}; + +unsigned compat_read_class[] = { +#include +~0U +}; + +unsigned compat_write_class[] = { +#include +~0U +}; + +unsigned compat_chattr_class[] = { +#include +~0U +}; + +unsigned compat_signal_class[] = { +#include +~0U +}; + +int audit_classify_compat_syscall(int abi, unsigned syscall) +{ + switch (syscall) { +#ifdef __NR_open + case __NR_open: + return 2; +#endif +#ifdef __NR_openat + case __NR_openat: + return 3; +#endif +#ifdef __NR_socketcall + case __NR_socketcall: + return 4; +#endif + case __NR_execve: + return 5; + default: + return 1; + } +} -- cgit v1.2.3-59-g8ed1b From ad36d28293936b03d6b7996e9d6aadfd73c0eb08 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Thu, 15 Aug 2013 18:05:12 -0400 Subject: pid: get pid_t ppid of task in init_pid_ns Added the functions task_ppid_nr_ns() and task_ppid_nr() to abstract the lookup of the PPID (real_parent's pid_t) of a process, including rcu locking, in the arbitrary and init_pid_ns. This provides an alternative to sys_getppid(), which is relative to the child process' pid namespace. (informed by ebiederman's 6c621b7e) Cc: stable@vger.kernel.org Cc: Eric W. Biederman Signed-off-by: Richard Guy Briggs --- include/linux/sched.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 53f97eb8dbc7..116e301fb13f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1561,6 +1561,24 @@ static inline pid_t task_tgid_vnr(struct task_struct *tsk) } +static int pid_alive(const struct task_struct *p); +static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) +{ + pid_t pid = 0; + + rcu_read_lock(); + if (pid_alive(tsk)) + pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns); + rcu_read_unlock(); + + return pid; +} + +static inline pid_t task_ppid_nr(const struct task_struct *tsk) +{ + return task_ppid_nr_ns(tsk, &init_pid_ns); +} + static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { @@ -1600,7 +1618,7 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk) * * Return: 1 if the process is alive. 0 otherwise. */ -static inline int pid_alive(struct task_struct *p) +static inline int pid_alive(const struct task_struct *p) { return p->pids[PIDTYPE_PID].pid != NULL; } -- cgit v1.2.3-59-g8ed1b From 80e0b6e8a001361316a2d62b748fe677ec46b860 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Sun, 16 Mar 2014 14:00:19 -0400 Subject: sched: declare pid_alive as inline We accidentally declared pid_alive without any extern/inline connotation. Some platforms were fine with this, some like ia64 and mips were very angry. If the function is inline, the prototype should be inline! on ia64: include/linux/sched.h:1718: warning: 'pid_alive' declared inline after being called Signed-off-by: Richard Guy Briggs Signed-off-by: Eric Paris --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 116e301fb13f..0f72548732f1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1561,7 +1561,7 @@ static inline pid_t task_tgid_vnr(struct task_struct *tsk) } -static int pid_alive(const struct task_struct *p); +static inline int pid_alive(const struct task_struct *p); static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) { pid_t pid = 0; -- cgit v1.2.3-59-g8ed1b From 58f86cc89c3372d3e61d5b71e5513ec5a0b02848 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 24 Mar 2014 12:00:34 +1030 Subject: VERIFY_OCTAL_PERMISSIONS: stricter checking for sysfs perms. Summary of http://lkml.org/lkml/2014/3/14/363 : Ted: module_param(queue_depth, int, 444) Joe: 0444! Rusty: User perms >= group perms >= other perms? Joe: CLASS_ATTR, DEVICE_ATTR, SENSOR_ATTR and SENSOR_ATTR_2? Side effect of stricter permissions means removing the unnecessary S_IFREG from several callers. Note that the BUILD_BUG_ON_ZERO((perm) & 2) test was removed: a fair number of drivers fail this test, so that will be the debate for a future patch. Suggested-by: Joe Perches Acked-by: Bjorn Helgaas for drivers/pci/slot.c Acked-by: Greg Kroah-Hartman Cc: Miklos Szeredi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Rusty Russell --- drivers/pci/slot.c | 6 +++--- fs/fuse/cuse.c | 4 ++-- fs/ocfs2/cluster/sys.c | 2 +- fs/ocfs2/stackglue.c | 8 ++++---- include/linux/kernel.h | 8 ++++++++ include/linux/moduleparam.h | 8 +++----- include/linux/sysfs.h | 3 ++- 7 files changed, 23 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index 7dd62fa9d0bd..396c200b9ddb 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -116,11 +116,11 @@ static void pci_slot_release(struct kobject *kobj) } static struct pci_slot_attribute pci_slot_attr_address = - __ATTR(address, (S_IFREG | S_IRUGO), address_read_file, NULL); + __ATTR(address, S_IRUGO, address_read_file, NULL); static struct pci_slot_attribute pci_slot_attr_max_speed = - __ATTR(max_bus_speed, (S_IFREG | S_IRUGO), max_speed_read_file, NULL); + __ATTR(max_bus_speed, S_IRUGO, max_speed_read_file, NULL); static struct pci_slot_attribute pci_slot_attr_cur_speed = - __ATTR(cur_bus_speed, (S_IFREG | S_IRUGO), cur_speed_read_file, NULL); + __ATTR(cur_bus_speed, S_IRUGO, cur_speed_read_file, NULL); static struct attribute *pci_slot_default_attrs[] = { &pci_slot_attr_address.attr, diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index b96a49b37d66..77cf5eeeabd1 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -568,7 +568,7 @@ static ssize_t cuse_class_waiting_show(struct device *dev, return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting)); } -static DEVICE_ATTR(waiting, S_IFREG | 0400, cuse_class_waiting_show, NULL); +static DEVICE_ATTR(waiting, 0400, cuse_class_waiting_show, NULL); static ssize_t cuse_class_abort_store(struct device *dev, struct device_attribute *attr, @@ -579,7 +579,7 @@ static ssize_t cuse_class_abort_store(struct device *dev, fuse_abort_conn(&cc->fc); return count; } -static DEVICE_ATTR(abort, S_IFREG | 0200, NULL, cuse_class_abort_store); +static DEVICE_ATTR(abort, 0200, NULL, cuse_class_abort_store); static struct attribute *cuse_class_dev_attrs[] = { &dev_attr_waiting.attr, diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c index a4b07730b2e1..b7f57271d49c 100644 --- a/fs/ocfs2/cluster/sys.c +++ b/fs/ocfs2/cluster/sys.c @@ -41,7 +41,7 @@ static ssize_t version_show(struct kobject *kobj, struct kobj_attribute *attr, return snprintf(buf, PAGE_SIZE, "%u\n", O2NM_API_VERSION); } static struct kobj_attribute attr_version = - __ATTR(interface_revision, S_IFREG | S_IRUGO, version_show, NULL); + __ATTR(interface_revision, S_IRUGO, version_show, NULL); static struct attribute *o2cb_attrs[] = { &attr_version.attr, diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 1324e6600e57..25e9f7b5bad3 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -494,7 +494,7 @@ static ssize_t ocfs2_max_locking_protocol_show(struct kobject *kobj, } static struct kobj_attribute ocfs2_attr_max_locking_protocol = - __ATTR(max_locking_protocol, S_IFREG | S_IRUGO, + __ATTR(max_locking_protocol, S_IRUGO, ocfs2_max_locking_protocol_show, NULL); static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj, @@ -526,7 +526,7 @@ static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj, } static struct kobj_attribute ocfs2_attr_loaded_cluster_plugins = - __ATTR(loaded_cluster_plugins, S_IFREG | S_IRUGO, + __ATTR(loaded_cluster_plugins, S_IRUGO, ocfs2_loaded_cluster_plugins_show, NULL); static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj, @@ -548,7 +548,7 @@ static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj, } static struct kobj_attribute ocfs2_attr_active_cluster_plugin = - __ATTR(active_cluster_plugin, S_IFREG | S_IRUGO, + __ATTR(active_cluster_plugin, S_IRUGO, ocfs2_active_cluster_plugin_show, NULL); static ssize_t ocfs2_cluster_stack_show(struct kobject *kobj, @@ -597,7 +597,7 @@ static ssize_t ocfs2_cluster_stack_store(struct kobject *kobj, static struct kobj_attribute ocfs2_attr_cluster_stack = - __ATTR(cluster_stack, S_IFREG | S_IRUGO | S_IWUSR, + __ATTR(cluster_stack, S_IRUGO | S_IWUSR, ocfs2_cluster_stack_show, ocfs2_cluster_stack_store); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 471090093c67..4679eddc110a 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -842,4 +842,12 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD #endif +/* Permissions on a sysfs file: you didn't miss the 0 prefix did you? */ +#define VERIFY_OCTAL_PERMISSIONS(perms) \ + (BUILD_BUG_ON_ZERO((perms) < 0) + \ + BUILD_BUG_ON_ZERO((perms) > 0777) + \ + /* User perms >= group perms >= other perms */ \ + BUILD_BUG_ON_ZERO(((perms) >> 6) < (((perms) >> 3) & 7)) + \ + BUILD_BUG_ON_ZERO((((perms) >> 3) & 7) < ((perms) & 7)) + \ + (perms)) #endif diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 175f6995d1af..204a67743804 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -186,14 +186,12 @@ struct kparam_array parameters. */ #define __module_param_call(prefix, name, ops, arg, perm, level) \ /* Default value instead of permissions? */ \ - static int __param_perm_check_##name __attribute__((unused)) = \ - BUILD_BUG_ON_ZERO((perm) < 0 || (perm) > 0777 || ((perm) & 2)) \ - + BUILD_BUG_ON_ZERO(sizeof(""prefix) > MAX_PARAM_PREFIX_LEN); \ - static const char __param_str_##name[] = prefix #name; \ + static const char __param_str_##name[] = prefix #name; \ static struct kernel_param __moduleparam_const __param_##name \ __used \ __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \ - = { __param_str_##name, ops, perm, level, { arg } } + = { __param_str_##name, ops, VERIFY_OCTAL_PERMISSIONS(perm), \ + level, { arg } } /* Obsolete - use module_param_cb() */ #define module_param_call(name, set, get, arg, perm) \ diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 30b2ebee6439..f517e6e488c8 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -71,7 +71,8 @@ struct attribute_group { */ #define __ATTR(_name, _mode, _show, _store) { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr = {.name = __stringify(_name), \ + .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \ .show = _show, \ .store = _store, \ } -- cgit v1.2.3-59-g8ed1b From 5a92e700af2e5e0e6404988d6a7f2ed3dad3f46f Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 21 Feb 2014 14:13:44 -0700 Subject: NVMe: RCU protected access to io queues This adds rcu protected access to nvme_queue to fix a race between a surprise removal freeing the queue and a thread with open reference on a NVMe block device using that queue. The queues do not need to be rcu protected during the initialization or shutdown parts, so I've added a helper function for raw deferencing to get around the sparse errors. There is still a hole in the IOCTL path for the same problem, which is fixed in a subsequent patch. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 91 +++++++++++++++++++++++------------------------ include/linux/nvme.h | 2 +- 2 files changed, 46 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index f565212a9e32..b66ab1db4629 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -74,6 +74,7 @@ struct async_cmd_info { * commands and one for I/O commands). */ struct nvme_queue { + struct rcu_head r_head; struct device *q_dmadev; struct nvme_dev *dev; char irqname[24]; /* nvme4294967295-65535\0 */ @@ -262,14 +263,21 @@ static void *cancel_cmdid(struct nvme_queue *nvmeq, int cmdid, return ctx; } -struct nvme_queue *get_nvmeq(struct nvme_dev *dev) +static struct nvme_queue *raw_nvmeq(struct nvme_dev *dev, int qid) { - return dev->queues[get_cpu() + 1]; + return rcu_dereference_raw(dev->queues[qid]); } -void put_nvmeq(struct nvme_queue *nvmeq) +struct nvme_queue *get_nvmeq(struct nvme_dev *dev) __acquires(RCU) +{ + rcu_read_lock(); + return rcu_dereference(dev->queues[get_cpu() + 1]); +} + +void put_nvmeq(struct nvme_queue *nvmeq) __releases(RCU) { put_cpu(); + rcu_read_unlock(); } /** @@ -852,13 +860,14 @@ static int nvme_submit_async_cmd(struct nvme_queue *nvmeq, int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, u32 *result) { - return nvme_submit_sync_cmd(dev->queues[0], cmd, result, ADMIN_TIMEOUT); + return nvme_submit_sync_cmd(raw_nvmeq(dev, 0), cmd, result, + ADMIN_TIMEOUT); } static int nvme_submit_admin_cmd_async(struct nvme_dev *dev, struct nvme_command *cmd, struct async_cmd_info *cmdinfo) { - return nvme_submit_async_cmd(dev->queues[0], cmd, cmdinfo, + return nvme_submit_async_cmd(raw_nvmeq(dev, 0), cmd, cmdinfo, ADMIN_TIMEOUT); } @@ -985,6 +994,7 @@ static void nvme_abort_cmd(int cmdid, struct nvme_queue *nvmeq) struct nvme_command cmd; struct nvme_dev *dev = nvmeq->dev; struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); + struct nvme_queue *adminq; if (!nvmeq->qid || info[cmdid].aborted) { if (work_busy(&dev->reset_work)) @@ -1001,7 +1011,8 @@ static void nvme_abort_cmd(int cmdid, struct nvme_queue *nvmeq) if (!dev->abort_limit) return; - a_cmdid = alloc_cmdid(dev->queues[0], CMD_CTX_ABORT, special_completion, + adminq = rcu_dereference(dev->queues[0]); + a_cmdid = alloc_cmdid(adminq, CMD_CTX_ABORT, special_completion, ADMIN_TIMEOUT); if (a_cmdid < 0) return; @@ -1018,7 +1029,7 @@ static void nvme_abort_cmd(int cmdid, struct nvme_queue *nvmeq) dev_warn(nvmeq->q_dmadev, "Aborting I/O %d QID %d\n", cmdid, nvmeq->qid); - nvme_submit_cmd(dev->queues[0], &cmd); + nvme_submit_cmd(adminq, &cmd); } /** @@ -1055,8 +1066,10 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout) } } -static void nvme_free_queue(struct nvme_queue *nvmeq) +static void nvme_free_queue(struct rcu_head *r) { + struct nvme_queue *nvmeq = container_of(r, struct nvme_queue, r_head); + spin_lock_irq(&nvmeq->q_lock); while (bio_list_peek(&nvmeq->sq_cong)) { struct bio *bio = bio_list_pop(&nvmeq->sq_cong); @@ -1075,10 +1088,13 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest) { int i; + for (i = num_possible_cpus(); i > dev->queue_count - 1; i--) + rcu_assign_pointer(dev->queues[i], NULL); for (i = dev->queue_count - 1; i >= lowest; i--) { - nvme_free_queue(dev->queues[i]); + struct nvme_queue *nvmeq = raw_nvmeq(dev, i); + rcu_assign_pointer(dev->queues[i], NULL); + call_rcu(&nvmeq->r_head, nvme_free_queue); dev->queue_count--; - dev->queues[i] = NULL; } } @@ -1116,7 +1132,7 @@ static void nvme_clear_queue(struct nvme_queue *nvmeq) static void nvme_disable_queue(struct nvme_dev *dev, int qid) { - struct nvme_queue *nvmeq = dev->queues[qid]; + struct nvme_queue *nvmeq = raw_nvmeq(dev, qid); if (!nvmeq) return; @@ -1168,6 +1184,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, nvmeq->qid = qid; nvmeq->q_suspended = 1; dev->queue_count++; + rcu_assign_pointer(dev->queues[qid], nvmeq); return nvmeq; @@ -1311,12 +1328,11 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) if (result < 0) return result; - nvmeq = dev->queues[0]; + nvmeq = raw_nvmeq(dev, 0); if (!nvmeq) { nvmeq = nvme_alloc_queue(dev, 0, 64, 0); if (!nvmeq) return -ENOMEM; - dev->queues[0] = nvmeq; } aqa = nvmeq->q_depth - 1; @@ -1581,8 +1597,8 @@ static int nvme_user_admin_cmd(struct nvme_dev *dev, if (length != cmd.data_len) status = -ENOMEM; else - status = nvme_submit_sync_cmd(dev->queues[0], &c, &cmd.result, - timeout); + status = nvme_submit_sync_cmd(raw_nvmeq(dev, 0), &c, + &cmd.result, timeout); if (cmd.data_len) { nvme_unmap_user_pages(dev, cmd.opcode & 1, iod); @@ -1701,8 +1717,10 @@ static int nvme_kthread(void *data) queue_work(nvme_workq, &dev->reset_work); continue; } + rcu_read_lock(); for (i = 0; i < dev->queue_count; i++) { - struct nvme_queue *nvmeq = dev->queues[i]; + struct nvme_queue *nvmeq = + rcu_dereference(dev->queues[i]); if (!nvmeq) continue; spin_lock_irq(&nvmeq->q_lock); @@ -1714,6 +1732,7 @@ static int nvme_kthread(void *data) unlock: spin_unlock_irq(&nvmeq->q_lock); } + rcu_read_unlock(); } spin_unlock(&dev_list_lock); schedule_timeout(round_jiffies_relative(HZ)); @@ -1808,7 +1827,7 @@ static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) static int nvme_setup_io_queues(struct nvme_dev *dev) { - struct nvme_queue *adminq = dev->queues[0]; + struct nvme_queue *adminq = raw_nvmeq(dev, 0); struct pci_dev *pdev = dev->pci_dev; int result, cpu, i, vecs, nr_io_queues, size, q_depth; @@ -1831,7 +1850,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) size = db_bar_size(dev, nr_io_queues); } while (1); dev->dbs = ((void __iomem *)dev->bar) + 4096; - dev->queues[0]->q_db = dev->dbs; + adminq->q_db = dev->dbs; } /* Deregister the admin queue's interrupt */ @@ -1880,19 +1899,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) } /* Free previously allocated queues that are no longer usable */ - spin_lock(&dev_list_lock); - for (i = dev->queue_count - 1; i > nr_io_queues; i--) { - struct nvme_queue *nvmeq = dev->queues[i]; - - spin_lock_irq(&nvmeq->q_lock); - nvme_cancel_ios(nvmeq, false); - spin_unlock_irq(&nvmeq->q_lock); - - nvme_free_queue(nvmeq); - dev->queue_count--; - dev->queues[i] = NULL; - } - spin_unlock(&dev_list_lock); + nvme_free_queues(dev, nr_io_queues); cpu = cpumask_first(cpu_online_mask); for (i = 0; i < nr_io_queues; i++) { @@ -1903,8 +1910,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1, NVME_Q_DEPTH); for (i = dev->queue_count - 1; i < nr_io_queues; i++) { - dev->queues[i + 1] = nvme_alloc_queue(dev, i + 1, q_depth, i); - if (!dev->queues[i + 1]) { + if (!nvme_alloc_queue(dev, i + 1, q_depth, i)) { result = -ENOMEM; goto free_queues; } @@ -1912,11 +1918,11 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) for (; i < num_possible_cpus(); i++) { int target = i % rounddown_pow_of_two(dev->queue_count - 1); - dev->queues[i + 1] = dev->queues[target + 1]; + rcu_assign_pointer(dev->queues[i + 1], dev->queues[target + 1]); } for (i = 1; i < dev->queue_count; i++) { - result = nvme_create_queue(dev->queues[i], i); + result = nvme_create_queue(raw_nvmeq(dev, i), i); if (result) { for (--i; i > 0; i--) nvme_disable_queue(dev, i); @@ -2180,7 +2186,7 @@ static void nvme_disable_io_queues(struct nvme_dev *dev) atomic_set(&dq.refcount, 0); dq.worker = &worker; for (i = dev->queue_count - 1; i > 0; i--) { - struct nvme_queue *nvmeq = dev->queues[i]; + struct nvme_queue *nvmeq = raw_nvmeq(dev, i); if (nvme_suspend_queue(nvmeq)) continue; @@ -2205,7 +2211,7 @@ static void nvme_dev_shutdown(struct nvme_dev *dev) if (!dev->bar || (dev->bar && readl(&dev->bar->csts) == -1)) { for (i = dev->queue_count - 1; i >= 0; i--) { - struct nvme_queue *nvmeq = dev->queues[i]; + struct nvme_queue *nvmeq = raw_nvmeq(dev, i); nvme_suspend_queue(nvmeq); nvme_clear_queue(nvmeq); } @@ -2383,18 +2389,10 @@ static int nvme_remove_dead_ctrl(void *arg) static void nvme_remove_disks(struct work_struct *ws) { - int i; struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work); nvme_dev_remove(dev); - spin_lock(&dev_list_lock); - for (i = dev->queue_count - 1; i > 0; i--) { - BUG_ON(!dev->queues[i] || !dev->queues[i]->q_suspended); - nvme_free_queue(dev->queues[i]); - dev->queue_count--; - dev->queues[i] = NULL; - } - spin_unlock(&dev_list_lock); + nvme_free_queues(dev, 1); } static int nvme_dev_resume(struct nvme_dev *dev) @@ -2526,6 +2524,7 @@ static void nvme_remove(struct pci_dev *pdev) nvme_dev_remove(dev); nvme_dev_shutdown(dev); nvme_free_queues(dev, 0); + rcu_barrier(); nvme_release_instance(dev); nvme_release_prp_pools(dev); kref_put(&dev->kref, nvme_free_dev); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 69ae03f6eb15..98d367b06f9c 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -73,7 +73,7 @@ enum { */ struct nvme_dev { struct list_head node; - struct nvme_queue **queues; + struct nvme_queue __rcu **queues; u32 __iomem *dbs; struct pci_dev *pci_dev; struct dma_pool *prp_page_pool; -- cgit v1.2.3-59-g8ed1b From 4f5099af4f3d5f999d8ab7784472d93e810e3912 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 3 Mar 2014 16:39:13 -0700 Subject: NVMe: IOCTL path RCU protect queue access This adds rcu protected access to a queue in the nvme IOCTL path to fix potential races between a surprise removal and queue usage in nvme_submit_sync_cmd. The fix holds the rcu_read_lock() here to prevent the nvme_queue from freeing while this path is executing so it can't sleep, and so this path will no longer wait for a available command id should they all be in use at the time a passthrough IOCTL request is received. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- drivers/block/nvme-core.c | 82 +++++++++++++++++++++++++++++++---------------- drivers/block/nvme-scsi.c | 31 +++--------------- include/linux/nvme.h | 5 +-- 3 files changed, 60 insertions(+), 58 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index b66ab1db4629..04664cadadfa 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -268,18 +268,30 @@ static struct nvme_queue *raw_nvmeq(struct nvme_dev *dev, int qid) return rcu_dereference_raw(dev->queues[qid]); } -struct nvme_queue *get_nvmeq(struct nvme_dev *dev) __acquires(RCU) +static struct nvme_queue *get_nvmeq(struct nvme_dev *dev) __acquires(RCU) { rcu_read_lock(); return rcu_dereference(dev->queues[get_cpu() + 1]); } -void put_nvmeq(struct nvme_queue *nvmeq) __releases(RCU) +static void put_nvmeq(struct nvme_queue *nvmeq) __releases(RCU) { put_cpu(); rcu_read_unlock(); } +static struct nvme_queue *lock_nvmeq(struct nvme_dev *dev, int q_idx) + __acquires(RCU) +{ + rcu_read_lock(); + return rcu_dereference(dev->queues[q_idx]); +} + +static void unlock_nvmeq(struct nvme_queue *nvmeq) __releases(RCU) +{ + rcu_read_unlock(); +} + /** * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell * @nvmeq: The queue to use @@ -292,6 +304,10 @@ static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) unsigned long flags; u16 tail; spin_lock_irqsave(&nvmeq->q_lock, flags); + if (nvmeq->q_suspended) { + spin_unlock_irqrestore(&nvmeq->q_lock, flags); + return -EBUSY; + } tail = nvmeq->sq_tail; memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd)); if (++tail == nvmeq->q_depth) @@ -812,27 +828,46 @@ static void sync_completion(struct nvme_dev *dev, void *ctx, * Returns 0 on success. If the result is negative, it's a Linux error code; * if the result is positive, it's an NVM Express status code */ -int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, +static int nvme_submit_sync_cmd(struct nvme_dev *dev, int q_idx, + struct nvme_command *cmd, u32 *result, unsigned timeout) { - int cmdid; + int cmdid, ret; struct sync_cmd_info cmdinfo; + struct nvme_queue *nvmeq; + + nvmeq = lock_nvmeq(dev, q_idx); + if (!nvmeq) { + unlock_nvmeq(nvmeq); + return -ENODEV; + } cmdinfo.task = current; cmdinfo.status = -EINTR; - cmdid = alloc_cmdid_killable(nvmeq, &cmdinfo, sync_completion, - timeout); - if (cmdid < 0) + cmdid = alloc_cmdid(nvmeq, &cmdinfo, sync_completion, timeout); + if (cmdid < 0) { + unlock_nvmeq(nvmeq); return cmdid; + } cmd->common.command_id = cmdid; set_current_state(TASK_KILLABLE); - nvme_submit_cmd(nvmeq, cmd); + ret = nvme_submit_cmd(nvmeq, cmd); + if (ret) { + free_cmdid(nvmeq, cmdid, NULL); + unlock_nvmeq(nvmeq); + set_current_state(TASK_RUNNING); + return ret; + } + unlock_nvmeq(nvmeq); schedule_timeout(timeout); if (cmdinfo.status == -EINTR) { - nvme_abort_command(nvmeq, cmdid); + nvmeq = lock_nvmeq(dev, q_idx); + if (nvmeq) + nvme_abort_command(nvmeq, cmdid); + unlock_nvmeq(nvmeq); return -EINTR; } @@ -853,15 +888,20 @@ static int nvme_submit_async_cmd(struct nvme_queue *nvmeq, return cmdid; cmdinfo->status = -EINTR; cmd->common.command_id = cmdid; - nvme_submit_cmd(nvmeq, cmd); - return 0; + return nvme_submit_cmd(nvmeq, cmd); } int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, u32 *result) { - return nvme_submit_sync_cmd(raw_nvmeq(dev, 0), cmd, result, - ADMIN_TIMEOUT); + return nvme_submit_sync_cmd(dev, 0, cmd, result, ADMIN_TIMEOUT); +} + +int nvme_submit_io_cmd(struct nvme_dev *dev, struct nvme_command *cmd, + u32 *result) +{ + return nvme_submit_sync_cmd(dev, smp_processor_id() + 1, cmd, result, + NVME_IO_TIMEOUT); } static int nvme_submit_admin_cmd_async(struct nvme_dev *dev, @@ -1434,7 +1474,6 @@ void nvme_unmap_user_pages(struct nvme_dev *dev, int write, static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) { struct nvme_dev *dev = ns->dev; - struct nvme_queue *nvmeq; struct nvme_user_io io; struct nvme_command c; unsigned length, meta_len; @@ -1510,20 +1549,10 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) length = nvme_setup_prps(dev, &c.common, iod, length, GFP_KERNEL); - nvmeq = get_nvmeq(dev); - /* - * Since nvme_submit_sync_cmd sleeps, we can't keep preemption - * disabled. We may be preempted at any point, and be rescheduled - * to a different CPU. That will cause cacheline bouncing, but no - * additional races since q_lock already protects against other CPUs. - */ - put_nvmeq(nvmeq); if (length != (io.nblocks + 1) << ns->lba_shift) status = -ENOMEM; - else if (!nvmeq || nvmeq->q_suspended) - status = -EBUSY; else - status = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT); + status = nvme_submit_io_cmd(dev, &c, NULL); if (meta_len) { if (status == NVME_SC_SUCCESS && !(io.opcode & 1)) { @@ -1597,8 +1626,7 @@ static int nvme_user_admin_cmd(struct nvme_dev *dev, if (length != cmd.data_len) status = -ENOMEM; else - status = nvme_submit_sync_cmd(raw_nvmeq(dev, 0), &c, - &cmd.result, timeout); + status = nvme_submit_sync_cmd(dev, 0, &c, &cmd.result, timeout); if (cmd.data_len) { nvme_unmap_user_pages(dev, cmd.opcode & 1, iod); diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 4a0ceb64e269..e157e85bb5d7 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -2033,7 +2033,6 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, int res = SNTI_TRANSLATION_SUCCESS; int nvme_sc; struct nvme_dev *dev = ns->dev; - struct nvme_queue *nvmeq; u32 num_cmds; struct nvme_iod *iod; u64 unit_len; @@ -2106,18 +2105,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, nvme_offset += unit_num_blocks; - nvmeq = get_nvmeq(dev); - /* - * Since nvme_submit_sync_cmd sleeps, we can't keep - * preemption disabled. We may be preempted at any - * point, and be rescheduled to a different CPU. That - * will cause cacheline bouncing, but no additional - * races since q_lock already protects against other - * CPUs. - */ - put_nvmeq(nvmeq); - nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL, - NVME_IO_TIMEOUT); + nvme_sc = nvme_submit_io_cmd(dev, &c, NULL); if (nvme_sc != NVME_SC_SUCCESS) { nvme_unmap_user_pages(dev, (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, @@ -2644,7 +2632,6 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, { int res = SNTI_TRANSLATION_SUCCESS; int nvme_sc; - struct nvme_queue *nvmeq; struct nvme_command c; u8 immed, pcmod, pc, no_flush, start; @@ -2671,10 +2658,7 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, c.common.opcode = nvme_cmd_flush; c.common.nsid = cpu_to_le32(ns->ns_id); - nvmeq = get_nvmeq(ns->dev); - put_nvmeq(nvmeq); - nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT); - + nvme_sc = nvme_submit_io_cmd(ns->dev, &c, NULL); res = nvme_trans_status_code(hdr, nvme_sc); if (res) goto out; @@ -2697,15 +2681,12 @@ static int nvme_trans_synchronize_cache(struct nvme_ns *ns, int res = SNTI_TRANSLATION_SUCCESS; int nvme_sc; struct nvme_command c; - struct nvme_queue *nvmeq; memset(&c, 0, sizeof(c)); c.common.opcode = nvme_cmd_flush; c.common.nsid = cpu_to_le32(ns->ns_id); - nvmeq = get_nvmeq(ns->dev); - put_nvmeq(nvmeq); - nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT); + nvme_sc = nvme_submit_io_cmd(ns->dev, &c, NULL); res = nvme_trans_status_code(hdr, nvme_sc); if (res) @@ -2872,7 +2853,6 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, struct nvme_dev *dev = ns->dev; struct scsi_unmap_parm_list *plist; struct nvme_dsm_range *range; - struct nvme_queue *nvmeq; struct nvme_command c; int i, nvme_sc, res = -ENOMEM; u16 ndesc, list_len; @@ -2914,10 +2894,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, c.dsm.nr = cpu_to_le32(ndesc - 1); c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); - nvmeq = get_nvmeq(dev); - put_nvmeq(nvmeq); - - nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT); + nvme_sc = nvme_submit_io_cmd(dev, &c, NULL); res = nvme_trans_status_code(hdr, nvme_sc); dma_free_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range), diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 98d367b06f9c..7c3f85bc10f1 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -151,10 +151,7 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, unsigned long addr, unsigned length); void nvme_unmap_user_pages(struct nvme_dev *dev, int write, struct nvme_iod *iod); -struct nvme_queue *get_nvmeq(struct nvme_dev *dev); -void put_nvmeq(struct nvme_queue *nvmeq); -int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, - u32 *result, unsigned timeout); +int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_command *, u32 *); int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns); int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *, u32 *result); -- cgit v1.2.3-59-g8ed1b From 356750e35e86485c464704c0a32c1d8dc77590d7 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 24 Mar 2014 12:13:48 -0400 Subject: audit: define audit_is_compat in kernel internal header We were exposing a function based on kernel config options to userspace. This is wrong. Move it to the audit internal header. Suggested-by: Chris Metcalf Signed-off-by: Eric Paris --- include/linux/audit.h | 6 ++++++ include/uapi/linux/audit.h | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 4b2983e25ce0..611a59a56f1a 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -102,6 +102,12 @@ struct filename; extern void audit_log_session_info(struct audit_buffer *ab); +#ifdef CONFIG_COMPAT +#define audit_is_compat(arch) (!((arch) & __AUDIT_ARCH_64BIT)) +#else +#define audit_is_compat(arch) false +#endif + #ifdef CONFIG_AUDITSYSCALL /* These are defined in auditsc.c */ /* Public API */ diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 9af01d77dc44..4315ee99b967 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -362,12 +362,6 @@ enum { #define AUDIT_ARCH_SPARC64 (EM_SPARCV9|__AUDIT_ARCH_64BIT) #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) -#ifdef CONFIG_COMPAT -#define audit_is_compat(arch) (!((arch) & __AUDIT_ARCH_64BIT)) -#else -#define audit_is_compat(arch) false -#endif - #define AUDIT_PERM_EXEC 1 #define AUDIT_PERM_WRITE 2 #define AUDIT_PERM_READ 4 -- cgit v1.2.3-59-g8ed1b From 3f4eb14bdbe148fcc3a8e02f506ccc9b8c955ad4 Mon Sep 17 00:00:00 2001 From: Pekon Gupta Date: Thu, 20 Mar 2014 18:48:34 +0530 Subject: mtd: devices: elm: check for hardware engine's design constraints ELM hardware engine is used by BCH ecc-schemes for detecting and locating ECC errors. This patch adds the following checks for ELM hardware engine: - ELM internal buffers are of 1K, so it cannot process data with ecc-step-size > 1K. - ELM engine can execute upto maximum of 8 threads in parallel, so in *page-mode* (when complete page is processed in single iteration), ELM cannot support ecc-steps > 8. Signed-off-by: Pekon Gupta Reviewed-by: Ezequiel Garcia Signed-off-by: Brian Norris --- drivers/mtd/devices/elm.c | 13 ++++++++++++- drivers/mtd/nand/omap2.c | 9 ++++++--- include/linux/platform_data/elm.h | 3 ++- 3 files changed, 20 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/devices/elm.c b/drivers/mtd/devices/elm.c index f160d2c6cd59..c51752ad072e 100644 --- a/drivers/mtd/devices/elm.c +++ b/drivers/mtd/devices/elm.c @@ -103,7 +103,8 @@ static u32 elm_read_reg(struct elm_info *info, int offset) * @dev: ELM device * @bch_type: Type of BCH ecc */ -int elm_config(struct device *dev, enum bch_ecc bch_type) +int elm_config(struct device *dev, enum bch_ecc bch_type, + int ecc_steps, int ecc_step_size, int ecc_syndrome_size) { u32 reg_val; struct elm_info *info = dev_get_drvdata(dev); @@ -112,6 +113,16 @@ int elm_config(struct device *dev, enum bch_ecc bch_type) dev_err(dev, "Unable to configure elm - device not probed?\n"); return -ENODEV; } + /* ELM cannot detect ECC errors for chunks > 1KB */ + if (ecc_step_size > ((ELM_ECC_SIZE + 1) / 2)) { + dev_err(dev, "unsupported config ecc-size=%d\n", ecc_step_size); + return -EINVAL; + } + /* ELM support 8 error syndrome process */ + if (ecc_steps > ERROR_VECTOR_MAX) { + dev_err(dev, "unsupported config ecc-step=%d\n", ecc_steps); + return -EINVAL; + } reg_val = (bch_type & ECC_BCH_LEVEL_MASK) | (ELM_ECC_SIZE << 16); elm_write_reg(info, ELM_LOCATION_CONFIG, reg_val); diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index ab9c472456d9..6f9b339aed74 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -1540,6 +1540,8 @@ static int is_elm_present(struct omap_nand_info *info, struct device_node *elm_node, enum bch_ecc bch_type) { struct platform_device *pdev; + struct nand_ecc_ctrl *ecc = &info->nand.ecc; + int err; /* check whether elm-id is passed via DT */ if (!elm_node) { pr_err("nand: error: ELM DT node not found\n"); @@ -1553,9 +1555,10 @@ static int is_elm_present(struct omap_nand_info *info, } /* ELM module available, now configure it */ info->elm_dev = &pdev->dev; - if (elm_config(info->elm_dev, bch_type)) - return -ENODEV; - return 0; + err = elm_config(info->elm_dev, bch_type, + (info->mtd.writesize / ecc->size), ecc->size, ecc->bytes); + + return err; } #endif /* CONFIG_MTD_NAND_ECC_BCH */ diff --git a/include/linux/platform_data/elm.h b/include/linux/platform_data/elm.h index bf0a83b7ed9d..6e37156b0902 100644 --- a/include/linux/platform_data/elm.h +++ b/include/linux/platform_data/elm.h @@ -50,5 +50,6 @@ struct elm_errorvec { void elm_decode_bch_error_page(struct device *dev, u8 *ecc_calc, struct elm_errorvec *err_vec); -int elm_config(struct device *dev, enum bch_ecc bch_type); +int elm_config(struct device *dev, enum bch_ecc bch_type, + int ecc_steps, int ecc_step_size, int ecc_syndrome_size); #endif /* __ELM_H */ -- cgit v1.2.3-59-g8ed1b From ea0760244d235688b5fae4e5cdd9412c1fb1c2fe Mon Sep 17 00:00:00 2001 From: Pekon Gupta Date: Thu, 20 Mar 2014 18:48:35 +0530 Subject: mtd: devices: elm: clean elm_load_syndrome This patch refactors elm_load_syndrome() to make it scalable for newer ECC schemes by removing scheme specific macros (like ECC_BYTES*xx), and instead using ECC control information passed during elm_config. Signed-off-by: Pekon Gupta Reviewed-by: Ezequiel Garcia Signed-off-by: Brian Norris --- drivers/mtd/devices/elm.c | 18 +++++++++++------- include/linux/platform_data/elm.h | 7 ------- 2 files changed, 11 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/devices/elm.c b/drivers/mtd/devices/elm.c index c51752ad072e..4fbfaf65fabd 100644 --- a/drivers/mtd/devices/elm.c +++ b/drivers/mtd/devices/elm.c @@ -84,6 +84,7 @@ struct elm_info { struct list_head list; enum bch_ecc bch_type; struct elm_registers elm_regs; + int ecc_syndrome_size; }; static LIST_HEAD(elm_devices); @@ -126,7 +127,8 @@ int elm_config(struct device *dev, enum bch_ecc bch_type, reg_val = (bch_type & ECC_BCH_LEVEL_MASK) | (ELM_ECC_SIZE << 16); elm_write_reg(info, ELM_LOCATION_CONFIG, reg_val); - info->bch_type = bch_type; + info->bch_type = bch_type; + info->ecc_syndrome_size = ecc_syndrome_size; return 0; } @@ -175,10 +177,8 @@ static void elm_load_syndrome(struct elm_info *info, elm_configure_page_mode(info, i, true); offset = ELM_SYNDROME_FRAGMENT_0 + SYNDROME_FRAGMENT_REG_SIZE * i; - - /* BCH8 */ - if (info->bch_type) { - + switch (info->bch_type) { + case BCH8_ECC: /* syndrome fragment 0 = ecc[9-12B] */ val = cpu_to_be32(*(u32 *) &ecc[9]); elm_write_reg(info, offset, val); @@ -197,7 +197,8 @@ static void elm_load_syndrome(struct elm_info *info, offset += 4; val = ecc[0]; elm_write_reg(info, offset, val); - } else { + break; + case BCH4_ECC: /* syndrome fragment 0 = ecc[20-52b] bits */ val = (cpu_to_be32(*(u32 *) &ecc[3]) >> 4) | ((ecc[2] & 0xf) << 28); @@ -207,11 +208,14 @@ static void elm_load_syndrome(struct elm_info *info, offset += 4; val = cpu_to_be32(*(u32 *) &ecc[0]) >> 12; elm_write_reg(info, offset, val); + break; + default: + pr_err("invalid config bch_type\n"); } } /* Update ecc pointer with ecc byte size */ - ecc += info->bch_type ? BCH8_SIZE : BCH4_SIZE; + ecc += info->ecc_syndrome_size; } } diff --git a/include/linux/platform_data/elm.h b/include/linux/platform_data/elm.h index 6e37156b0902..4edb40676b3f 100644 --- a/include/linux/platform_data/elm.h +++ b/include/linux/platform_data/elm.h @@ -26,13 +26,6 @@ enum bch_ecc { /* ELM support 8 error syndrome process */ #define ERROR_VECTOR_MAX 8 -#define BCH8_ECC_OOB_BYTES 13 -#define BCH4_ECC_OOB_BYTES 7 -/* RBL requires 14 byte even though BCH8 uses only 13 byte */ -#define BCH8_SIZE (BCH8_ECC_OOB_BYTES + 1) -/* Uses 1 extra byte to handle erased pages */ -#define BCH4_SIZE (BCH4_ECC_OOB_BYTES + 1) - /** * struct elm_errorvec - error vector for elm * @error_reported: set true for vectors error is reported -- cgit v1.2.3-59-g8ed1b From e34fcb07a6d57411de6e15a47724fbe92c5caa42 Mon Sep 17 00:00:00 2001 From: David Mosberger Date: Fri, 21 Mar 2014 16:05:10 -0600 Subject: mtd: nand: fix GET/SET_FEATURES address on 16-bit devices GET_FEATURES and SET_FEATURES also need byte-addressing on 16-bit devices. Signed-off-by: David Mosberger Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 0747fef2bfc6..450d61ec7f06 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -925,7 +925,16 @@ static inline bool nand_is_slc(struct nand_chip *chip) */ static inline int nand_opcode_8bits(unsigned int command) { - return command == NAND_CMD_READID || command == NAND_CMD_PARAM; + switch (command) { + case NAND_CMD_READID: + case NAND_CMD_PARAM: + case NAND_CMD_GET_FEATURES: + case NAND_CMD_SET_FEATURES: + return 1; + default: + break; + } + return 0; } /* return the supported JEDEC features. */ -- cgit v1.2.3-59-g8ed1b From e43a34e3ec5d1b14a11c3220f5a12aa797d73cd1 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 10 Mar 2014 18:11:50 -0700 Subject: shdma: add R-Car Audio DMAC peri peri driver Add support Audio DMAC peri peri driver for Renesas R-Car Gen2 SoC, using 'shdma-base' DMA driver framework. Signed-off-by: Kuninori Morimoto [fixed checkpatch error] Signed-off-by: Vinod Koul --- drivers/dma/sh/Kconfig | 6 + drivers/dma/sh/Makefile | 1 + drivers/dma/sh/rcar-audmapp.c | 320 +++++++++++++++++++++++++ include/linux/platform_data/dma-rcar-audmapp.h | 34 +++ 4 files changed, 361 insertions(+) create mode 100644 drivers/dma/sh/rcar-audmapp.c create mode 100644 include/linux/platform_data/dma-rcar-audmapp.h (limited to 'include/linux') diff --git a/drivers/dma/sh/Kconfig b/drivers/dma/sh/Kconfig index dadd9e010c0b..b4c813831006 100644 --- a/drivers/dma/sh/Kconfig +++ b/drivers/dma/sh/Kconfig @@ -29,6 +29,12 @@ config RCAR_HPB_DMAE help Enable support for the Renesas R-Car series DMA controllers. +config RCAR_AUDMAC_PP + tristate "Renesas R-Car Audio DMAC Peripheral Peripheral support" + depends on SH_DMAE_BASE + help + Enable support for the Renesas R-Car Audio DMAC Peripheral Peripheral controllers. + config SHDMA_R8A73A4 def_bool y depends on ARCH_R8A73A4 && SH_DMAE != n diff --git a/drivers/dma/sh/Makefile b/drivers/dma/sh/Makefile index e856af23b789..1ce88b28cfc6 100644 --- a/drivers/dma/sh/Makefile +++ b/drivers/dma/sh/Makefile @@ -7,3 +7,4 @@ endif shdma-objs := $(shdma-y) obj-$(CONFIG_SUDMAC) += sudmac.o obj-$(CONFIG_RCAR_HPB_DMAE) += rcar-hpbdma.o +obj-$(CONFIG_RCAR_AUDMAC_PP) += rcar-audmapp.o diff --git a/drivers/dma/sh/rcar-audmapp.c b/drivers/dma/sh/rcar-audmapp.c new file mode 100644 index 000000000000..2de77289a2e9 --- /dev/null +++ b/drivers/dma/sh/rcar-audmapp.c @@ -0,0 +1,320 @@ +/* + * This is for Renesas R-Car Audio-DMAC-peri-peri. + * + * Copyright (C) 2014 Renesas Electronics Corporation + * Copyright (C) 2014 Kuninori Morimoto + * + * based on the drivers/dma/sh/shdma.c + * + * Copyright (C) 2011-2012 Guennadi Liakhovetski + * Copyright (C) 2009 Nobuhiro Iwamatsu + * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved. + * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved. + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * DMA register + */ +#define PDMASAR 0x00 +#define PDMADAR 0x04 +#define PDMACHCR 0x0c + +/* PDMACHCR */ +#define PDMACHCR_DE (1 << 0) + +#define AUDMAPP_MAX_CHANNELS 29 + +/* Default MEMCPY transfer size = 2^2 = 4 bytes */ +#define LOG2_DEFAULT_XFER_SIZE 2 +#define AUDMAPP_SLAVE_NUMBER 256 +#define AUDMAPP_LEN_MAX (16 * 1024 * 1024) + +struct audmapp_chan { + struct shdma_chan shdma_chan; + struct audmapp_slave_config *config; + void __iomem *base; +}; + +struct audmapp_device { + struct shdma_dev shdma_dev; + struct audmapp_pdata *pdata; + struct device *dev; + void __iomem *chan_reg; +}; + +#define to_chan(chan) container_of(chan, struct audmapp_chan, shdma_chan) +#define to_dev(chan) container_of(chan->shdma_chan.dma_chan.device, \ + struct audmapp_device, shdma_dev.dma_dev) + +static void audmapp_write(struct audmapp_chan *auchan, u32 data, u32 reg) +{ + struct audmapp_device *audev = to_dev(auchan); + struct device *dev = audev->dev; + + dev_dbg(dev, "w %p : %08x\n", auchan->base + reg, data); + + iowrite32(data, auchan->base + reg); +} + +static u32 audmapp_read(struct audmapp_chan *auchan, u32 reg) +{ + return ioread32(auchan->base + reg); +} + +static void audmapp_halt(struct shdma_chan *schan) +{ + struct audmapp_chan *auchan = to_chan(schan); + int i; + + audmapp_write(auchan, 0, PDMACHCR); + + for (i = 0; i < 1024; i++) { + if (0 == audmapp_read(auchan, PDMACHCR)) + return; + udelay(1); + } +} + +static void audmapp_start_xfer(struct shdma_chan *schan, + struct shdma_desc *sdecs) +{ + struct audmapp_chan *auchan = to_chan(schan); + struct audmapp_device *audev = to_dev(auchan); + struct audmapp_slave_config *cfg = auchan->config; + struct device *dev = audev->dev; + u32 chcr = cfg->chcr | PDMACHCR_DE; + + dev_dbg(dev, "src/dst/chcr = %pad/%pad/%x\n", + &cfg->src, &cfg->dst, cfg->chcr); + + audmapp_write(auchan, cfg->src, PDMASAR); + audmapp_write(auchan, cfg->dst, PDMADAR); + audmapp_write(auchan, chcr, PDMACHCR); +} + +static struct audmapp_slave_config * +audmapp_find_slave(struct audmapp_chan *auchan, int slave_id) +{ + struct audmapp_device *audev = to_dev(auchan); + struct audmapp_pdata *pdata = audev->pdata; + struct audmapp_slave_config *cfg; + int i; + + if (slave_id >= AUDMAPP_SLAVE_NUMBER) + return NULL; + + for (i = 0, cfg = pdata->slave; i < pdata->slave_num; i++, cfg++) + if (cfg->slave_id == slave_id) + return cfg; + + return NULL; +} + +static int audmapp_set_slave(struct shdma_chan *schan, int slave_id, + dma_addr_t slave_addr, bool try) +{ + struct audmapp_chan *auchan = to_chan(schan); + struct audmapp_slave_config *cfg = + audmapp_find_slave(auchan, slave_id); + + if (!cfg) + return -ENODEV; + if (try) + return 0; + + auchan->config = cfg; + + return 0; +} + +static int audmapp_desc_setup(struct shdma_chan *schan, + struct shdma_desc *sdecs, + dma_addr_t src, dma_addr_t dst, size_t *len) +{ + struct audmapp_chan *auchan = to_chan(schan); + struct audmapp_slave_config *cfg = auchan->config; + + if (!cfg) + return -ENODEV; + + if (*len > (size_t)AUDMAPP_LEN_MAX) + *len = (size_t)AUDMAPP_LEN_MAX; + + return 0; +} + +static void audmapp_setup_xfer(struct shdma_chan *schan, + int slave_id) +{ +} + +static dma_addr_t audmapp_slave_addr(struct shdma_chan *schan) +{ + return 0; /* always fixed address */ +} + +static bool audmapp_channel_busy(struct shdma_chan *schan) +{ + struct audmapp_chan *auchan = to_chan(schan); + u32 chcr = audmapp_read(auchan, PDMACHCR); + + return chcr & ~PDMACHCR_DE; +} + +static bool audmapp_desc_completed(struct shdma_chan *schan, + struct shdma_desc *sdesc) +{ + return true; +} + +static struct shdma_desc *audmapp_embedded_desc(void *buf, int i) +{ + return &((struct shdma_desc *)buf)[i]; +} + +static const struct shdma_ops audmapp_shdma_ops = { + .halt_channel = audmapp_halt, + .desc_setup = audmapp_desc_setup, + .set_slave = audmapp_set_slave, + .start_xfer = audmapp_start_xfer, + .embedded_desc = audmapp_embedded_desc, + .setup_xfer = audmapp_setup_xfer, + .slave_addr = audmapp_slave_addr, + .channel_busy = audmapp_channel_busy, + .desc_completed = audmapp_desc_completed, +}; + +static int audmapp_chan_probe(struct platform_device *pdev, + struct audmapp_device *audev, int id) +{ + struct shdma_dev *sdev = &audev->shdma_dev; + struct audmapp_chan *auchan; + struct shdma_chan *schan; + struct device *dev = audev->dev; + + auchan = devm_kzalloc(dev, sizeof(*auchan), GFP_KERNEL); + if (!auchan) + return -ENOMEM; + + schan = &auchan->shdma_chan; + schan->max_xfer_len = AUDMAPP_LEN_MAX; + + shdma_chan_probe(sdev, schan, id); + + auchan->base = audev->chan_reg + 0x20 + (0x10 * id); + dev_dbg(dev, "%02d : %p / %p", id, auchan->base, audev->chan_reg); + + return 0; +} + +static void audmapp_chan_remove(struct audmapp_device *audev) +{ + struct dma_device *dma_dev = &audev->shdma_dev.dma_dev; + struct shdma_chan *schan; + int i; + + shdma_for_each_chan(schan, &audev->shdma_dev, i) { + BUG_ON(!schan); + shdma_chan_remove(schan); + } + dma_dev->chancnt = 0; +} + +static int audmapp_probe(struct platform_device *pdev) +{ + struct audmapp_pdata *pdata = pdev->dev.platform_data; + struct audmapp_device *audev; + struct shdma_dev *sdev; + struct dma_device *dma_dev; + struct resource *res; + int err, i; + + if (!pdata) + return -ENODEV; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + + audev = devm_kzalloc(&pdev->dev, sizeof(*audev), GFP_KERNEL); + if (!audev) + return -ENOMEM; + + audev->dev = &pdev->dev; + audev->pdata = pdata; + audev->chan_reg = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(audev->chan_reg)) + return PTR_ERR(audev->chan_reg); + + sdev = &audev->shdma_dev; + sdev->ops = &audmapp_shdma_ops; + sdev->desc_size = sizeof(struct shdma_desc); + + dma_dev = &sdev->dma_dev; + dma_dev->copy_align = LOG2_DEFAULT_XFER_SIZE; + dma_cap_set(DMA_SLAVE, dma_dev->cap_mask); + + err = shdma_init(&pdev->dev, sdev, AUDMAPP_MAX_CHANNELS); + if (err < 0) + return err; + + platform_set_drvdata(pdev, audev); + + /* Create DMA Channel */ + for (i = 0; i < AUDMAPP_MAX_CHANNELS; i++) { + err = audmapp_chan_probe(pdev, audev, i); + if (err) + goto chan_probe_err; + } + + err = dma_async_device_register(dma_dev); + if (err < 0) + goto chan_probe_err; + + return err; + +chan_probe_err: + audmapp_chan_remove(audev); + shdma_cleanup(sdev); + + return err; +} + +static int audmapp_remove(struct platform_device *pdev) +{ + struct audmapp_device *audev = platform_get_drvdata(pdev); + struct dma_device *dma_dev = &audev->shdma_dev.dma_dev; + + dma_async_device_unregister(dma_dev); + + audmapp_chan_remove(audev); + shdma_cleanup(&audev->shdma_dev); + + return 0; +} + +static struct platform_driver audmapp_driver = { + .probe = audmapp_probe, + .remove = audmapp_remove, + .driver = { + .owner = THIS_MODULE, + .name = "rcar-audmapp-engine", + }, +}; +module_platform_driver(audmapp_driver); + +MODULE_AUTHOR("Kuninori Morimoto "); +MODULE_DESCRIPTION("Renesas R-Car Audio DMAC peri-peri driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/platform_data/dma-rcar-audmapp.h b/include/linux/platform_data/dma-rcar-audmapp.h new file mode 100644 index 000000000000..471fffebbeb4 --- /dev/null +++ b/include/linux/platform_data/dma-rcar-audmapp.h @@ -0,0 +1,34 @@ +/* + * This is for Renesas R-Car Audio-DMAC-peri-peri. + * + * Copyright (C) 2014 Renesas Electronics Corporation + * Copyright (C) 2014 Kuninori Morimoto + * + * This file is based on the include/linux/sh_dma.h + * + * Header for the new SH dmaengine driver + * + * Copyright (C) 2010 Guennadi Liakhovetski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef SH_AUDMAPP_H +#define SH_AUDMAPP_H + +#include + +struct audmapp_slave_config { + int slave_id; + dma_addr_t src; + dma_addr_t dst; + u32 chcr; +}; + +struct audmapp_pdata { + struct audmapp_slave_config *slave; + int slave_num; +}; + +#endif /* SH_AUDMAPP_H */ -- cgit v1.2.3-59-g8ed1b From 83ddfebdd21da669918d7f9854fd592858625f4b Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Mon, 24 Mar 2014 11:58:59 +0800 Subject: SUNRPC: New helper for creating client with rpc_xprt Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/clnt.h | 2 ++ net/sunrpc/clnt.c | 58 ++++++++++++++++++++++++++------------------- 2 files changed, 35 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 8af2804bab16..70736b98c721 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -130,6 +130,8 @@ struct rpc_create_args { #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9) struct rpc_clnt *rpc_create(struct rpc_create_args *args); +struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, + struct rpc_xprt *xprt); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, const struct rpc_program *, u32); void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 0edada973434..3c9a0f02374d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -438,6 +438,38 @@ out_no_rpciod: return ERR_PTR(err); } +struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, + struct rpc_xprt *xprt) +{ + struct rpc_clnt *clnt = NULL; + + clnt = rpc_new_client(args, xprt, NULL); + if (IS_ERR(clnt)) + return clnt; + + if (!(args->flags & RPC_CLNT_CREATE_NOPING)) { + int err = rpc_ping(clnt); + if (err != 0) { + rpc_shutdown_client(clnt); + return ERR_PTR(err); + } + } + + clnt->cl_softrtry = 1; + if (args->flags & RPC_CLNT_CREATE_HARDRTRY) + clnt->cl_softrtry = 0; + + if (args->flags & RPC_CLNT_CREATE_AUTOBIND) + clnt->cl_autobind = 1; + if (args->flags & RPC_CLNT_CREATE_DISCRTRY) + clnt->cl_discrtry = 1; + if (!(args->flags & RPC_CLNT_CREATE_QUIET)) + clnt->cl_chatty = 1; + + return clnt; +} +EXPORT_SYMBOL_GPL(rpc_create_xprt); + /** * rpc_create - create an RPC client and transport with one call * @args: rpc_clnt create argument structure @@ -451,7 +483,6 @@ out_no_rpciod: struct rpc_clnt *rpc_create(struct rpc_create_args *args) { struct rpc_xprt *xprt; - struct rpc_clnt *clnt; struct xprt_create xprtargs = { .net = args->net, .ident = args->protocol, @@ -515,30 +546,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT) xprt->resvport = 0; - clnt = rpc_new_client(args, xprt, NULL); - if (IS_ERR(clnt)) - return clnt; - - if (!(args->flags & RPC_CLNT_CREATE_NOPING)) { - int err = rpc_ping(clnt); - if (err != 0) { - rpc_shutdown_client(clnt); - return ERR_PTR(err); - } - } - - clnt->cl_softrtry = 1; - if (args->flags & RPC_CLNT_CREATE_HARDRTRY) - clnt->cl_softrtry = 0; - - if (args->flags & RPC_CLNT_CREATE_AUTOBIND) - clnt->cl_autobind = 1; - if (args->flags & RPC_CLNT_CREATE_DISCRTRY) - clnt->cl_discrtry = 1; - if (!(args->flags & RPC_CLNT_CREATE_QUIET)) - clnt->cl_chatty = 1; - - return clnt; + return rpc_create_xprt(args, xprt); } EXPORT_SYMBOL_GPL(rpc_create); -- cgit v1.2.3-59-g8ed1b From d531c008d7d9713456abe3d265fc577bba2e1cef Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Mon, 24 Mar 2014 11:59:46 +0800 Subject: NFSD/SUNRPC: Check rpc_xprt out of xs_setup_bc_tcp Besides checking rpc_xprt out of xs_setup_bc_tcp, increase it's reference (it's important). Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 19 ++++++++++++++++++- include/linux/sunrpc/xprt.h | 13 ++++++++++++- net/sunrpc/xprt.c | 12 ------------ net/sunrpc/xprtsock.c | 9 --------- 4 files changed, 30 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 7f05cd140de3..39c8ef875f91 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -32,6 +32,7 @@ */ #include +#include #include #include #include "nfsd.h" @@ -635,6 +636,22 @@ static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc } } +static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args) +{ + struct rpc_xprt *xprt; + + if (args->protocol != XPRT_TRANSPORT_BC_TCP) + return rpc_create(args); + + xprt = args->bc_xprt->xpt_bc_xprt; + if (xprt) { + xprt_get(xprt); + return rpc_create_xprt(args, xprt); + } + + return rpc_create(args); +} + static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses) { struct rpc_timeout timeparms = { @@ -674,7 +691,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c args.authflavor = ses->se_cb_sec.flavor; } /* Create RPC client */ - client = rpc_create(&args); + client = create_backchannel_client(&args); if (IS_ERR(client)) { dprintk("NFSD: couldn't create callback client: %ld\n", PTR_ERR(client)); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 8097b9df6773..3e5efb2b236e 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -295,13 +295,24 @@ int xprt_adjust_timeout(struct rpc_rqst *req); void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_release(struct rpc_task *task); -struct rpc_xprt * xprt_get(struct rpc_xprt *xprt); void xprt_put(struct rpc_xprt *xprt); struct rpc_xprt * xprt_alloc(struct net *net, size_t size, unsigned int num_prealloc, unsigned int max_req); void xprt_free(struct rpc_xprt *); +/** + * xprt_get - return a reference to an RPC transport. + * @xprt: pointer to the transport + * + */ +static inline struct rpc_xprt *xprt_get(struct rpc_xprt *xprt) +{ + if (atomic_inc_not_zero(&xprt->count)) + return xprt; + return NULL; +} + static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p) { return p + xprt->tsh_size; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 7d4df99f761f..d173f79947c6 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1383,15 +1383,3 @@ void xprt_put(struct rpc_xprt *xprt) if (atomic_dec_and_test(&xprt->count)) xprt_destroy(xprt); } - -/** - * xprt_get - return a reference to an RPC transport. - * @xprt: pointer to the transport - * - */ -struct rpc_xprt *xprt_get(struct rpc_xprt *xprt) -{ - if (atomic_inc_not_zero(&xprt->count)) - return xprt; - return NULL; -} diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 63ae657f255b..1335239217cd 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2923,15 +2923,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) struct svc_sock *bc_sock; struct rpc_xprt *ret; - if (args->bc_xprt->xpt_bc_xprt) { - /* - * This server connection already has a backchannel - * transport; we can't create a new one, as we wouldn't - * be able to match replies based on xid any more. So, - * reuse the already-existing one: - */ - return args->bc_xprt->xpt_bc_xprt; - } xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, xprt_tcp_slot_table_entries); if (IS_ERR(xprt)) -- cgit v1.2.3-59-g8ed1b From 3064639423c48d6e0eb9ecc27c512a58e38c6c57 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 26 Feb 2014 16:50:01 +0300 Subject: nfsd: check passed socket's net matches NFSd superblock's one There could be a case, when NFSd file system is mounted in network, different to socket's one, like below: "ip netns exec" creates new network and mount namespace, which duplicates NFSd mount point, created in init_net context. And thus NFS server stop in nested network context leads to RPCBIND client destruction in init_net. Then, on NFSd start in nested network context, rpc.nfsd process creates socket in nested net and passes it into "write_ports", which leads to RPCBIND sockets creation in init_net context because of the same reason (NFSd monut point was created in init_net context). An attempt to register passed socket in nested net leads to panic, because no RPCBIND client present in nexted network namespace. This patch add check that passed socket's net matches NFSd superblock's one. And returns -EINVAL error to user psace otherwise. v2: Put socket on exit. Reported-by: Weng Meiling Signed-off-by: Stanislav Kinsbursky Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 5 +++++ include/linux/sunrpc/svcsock.h | 1 + net/sunrpc/svcsock.c | 16 ++++++++++++++++ 3 files changed, 22 insertions(+) (limited to 'include/linux') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 7f555179bf81..f34d9de802ab 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -699,6 +699,11 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net) if (err != 0 || fd < 0) return -EINVAL; + if (svc_alien_sock(net, fd)) { + printk(KERN_ERR "%s: socket net is different to NFSd's one\n", __func__); + return -EINVAL; + } + err = nfsd_create_serv(net); if (err != 0) return err; diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 62fd1b756e99..947009ed5996 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -56,6 +56,7 @@ int svc_recv(struct svc_rqst *, long); int svc_send(struct svc_rqst *); void svc_drop(struct svc_rqst *); void svc_sock_update_bufs(struct svc_serv *serv); +bool svc_alien_sock(struct net *net, int fd); int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, const size_t len); void svc_init_xprt_sock(void); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index b6e59f0a9475..d06cb8752dcd 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1397,6 +1397,22 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, return svsk; } +bool svc_alien_sock(struct net *net, int fd) +{ + int err; + struct socket *sock = sockfd_lookup(fd, &err); + bool ret = false; + + if (!sock) + goto out; + if (sock_net(sock->sk) != net) + ret = true; + sockfd_put(sock); +out: + return ret; +} +EXPORT_SYMBOL_GPL(svc_alien_sock); + /** * svc_addsock - add a listener socket to an RPC service * @serv: pointer to RPC service to which to add a new listener -- cgit v1.2.3-59-g8ed1b From 24f870d8f0adcd38639f2f66e37aa7591a3fc408 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 12 Mar 2014 17:06:19 +0900 Subject: slab: fix wrongly used macro commit 'slab: restrict the number of objects in a slab' uses __builtin_constant_p() on #if macro. It is wrong usage of builtin function, but it is compiled on x86 without any problem, so I can't find it before 0 day build system find it. This commit fixes the situation by using KMALLOC_MIN_SIZE, instead of KMALLOC_SHIFT_LOW. KMALLOC_SHIFT_LOW is parsed to ilog2() on some architecture and this ilog2() uses __builtin_constant_p() and results in the problem. This problem would disappear by using KMALLOC_MIN_SIZE, since it is just constant. Tested-by: David Rientjes Signed-off-by: Joonsoo Kim Signed-off-by: Pekka Enberg --- include/linux/slab.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index d015dec02bf3..5df89f777a54 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -201,17 +201,6 @@ struct kmem_cache { #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 5 #endif - -/* - * This restriction comes from byte sized index implementation. - * Page size is normally 2^12 bytes and, in this case, if we want to use - * byte sized index which can represent 2^8 entries, the size of the object - * should be equal or greater to 2^12 / 2^8 = 2^4 = 16. - * If minimum size of kmalloc is less than 16, we use it as minimum object - * size and give up to use byte sized index. - */ -#define SLAB_OBJ_MIN_SIZE (KMALLOC_SHIFT_LOW < 4 ? \ - (1 << KMALLOC_SHIFT_LOW) : 16) #endif #ifdef CONFIG_SLUB @@ -253,6 +242,17 @@ struct kmem_cache { #define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW) #endif +/* + * This restriction comes from byte sized index implementation. + * Page size is normally 2^12 bytes and, in this case, if we want to use + * byte sized index which can represent 2^8 entries, the size of the object + * should be equal or greater to 2^12 / 2^8 = 2^4 = 16. + * If minimum size of kmalloc is less than 16, we use it as minimum object + * size and give up to use byte sized index. + */ +#define SLAB_OBJ_MIN_SIZE (KMALLOC_MIN_SIZE < 16 ? \ + (KMALLOC_MIN_SIZE) : 16) + #ifndef CONFIG_SLOB extern struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; #ifdef CONFIG_ZONE_DMA -- cgit v1.2.3-59-g8ed1b From f2ebb3a921c1ca1e2ddd9242e95a1989a50c4c68 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 27 Feb 2014 09:35:45 -0500 Subject: smarter propagate_mnt() The current mainline has copies propagated to *all* nodes, then tears down the copies we made for nodes that do not contain counterparts of the desired mountpoint. That sets the right propagation graph for the copies (at teardown time we move the slaves of removed node to a surviving peer or directly to master), but we end up paying a fairly steep price in useless allocations. It's fairly easy to create a situation where N calls of mount(2) create exactly N bindings, with O(N^2) vfsmounts allocated and freed in process. Fortunately, it is possible to avoid those allocations/freeings. The trick is to create copies in the right order and find which one would've eventually become a master with the current algorithm. It turns out to be possible in O(nodes getting propagation) time and with no extra allocations at all. One part is that we need to make sure that eventual master will be created before its slaves, so we need to walk the propagation tree in a different order - by peer groups. And iterate through the peers before dealing with the next group. Another thing is finding the (earlier) copy that will be a master of one we are about to create; to do that we are (temporary) marking the masters of mountpoints we are attaching the copies to. Either we are in a peer of the last mountpoint we'd dealt with, or we have the following situation: we are attaching to mountpoint M, the last copy S_0 had been attached to M_0 and there are sequences S_0...S_n, M_0...M_n such that S_{i+1} is a master of S_{i}, S_{i} mounted on M{i} and we need to create a slave of the first S_{k} such that M is getting propagation from M_{k}. It means that the master of M_{k} will be among the sequence of masters of M. On the other hand, the nearest marked node in that sequence will either be the master of M_{k} or the master of M_{k-1} (the latter - in the case if M_{k-1} is a slave of something M gets propagation from, but in a wrong peer group). So we go through the sequence of masters of M until we find a marked one (P). Let N be the one before it. Then we go through the sequence of masters of S_0 until we find one (say, S) mounted on a node D that has P as master and check if D is a peer of N. If it is, S will be the master of new copy, if not - the master of S will be. That's it for the hard part; the rest is fairly simple. Iterator is in next_group(), handling of one prospective mountpoint is propagate_one(). It seems to survive all tests and gives a noticably better performance than the current mainline for setups that are seriously using shared subtrees. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- fs/namespace.c | 11 ++- fs/pnode.c | 198 ++++++++++++++++++++++++++++++-------------------- fs/pnode.h | 3 + include/linux/mount.h | 3 + 4 files changed, 133 insertions(+), 82 deletions(-) (limited to 'include/linux') diff --git a/fs/namespace.c b/fs/namespace.c index 2ffc5a2905d4..65233a5f390a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -885,7 +885,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, goto out_free; } - mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; + mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED); /* Don't allow unprivileged users to change mount flags */ if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY)) mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; @@ -1661,9 +1661,9 @@ static int attach_recursive_mnt(struct mount *source_mnt, if (err) goto out; err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list); + lock_mount_hash(); if (err) goto out_cleanup_ids; - lock_mount_hash(); for (p = source_mnt; p; p = next_mnt(p, source_mnt)) set_mnt_shared(p); } else { @@ -1690,6 +1690,11 @@ static int attach_recursive_mnt(struct mount *source_mnt, return 0; out_cleanup_ids: + while (!hlist_empty(&tree_list)) { + child = hlist_entry(tree_list.first, struct mount, mnt_hash); + umount_tree(child, 0); + } + unlock_mount_hash(); cleanup_group_ids(source_mnt, NULL); out: return err; @@ -2044,7 +2049,7 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) struct mount *parent; int err; - mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT); + mnt_flags &= ~MNT_INTERNAL_FLAGS; mp = lock_mount(path); if (IS_ERR(mp)) diff --git a/fs/pnode.c b/fs/pnode.c index 88396df725b4..302bf22c4a30 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -164,46 +164,94 @@ static struct mount *propagation_next(struct mount *m, } } -/* - * return the source mount to be used for cloning - * - * @dest the current destination mount - * @last_dest the last seen destination mount - * @last_src the last seen source mount - * @type return CL_SLAVE if the new mount has to be - * cloned as a slave. - */ -static struct mount *get_source(struct mount *dest, - struct mount *last_dest, - struct mount *last_src, - int *type) +static struct mount *next_group(struct mount *m, struct mount *origin) { - struct mount *p_last_src = NULL; - struct mount *p_last_dest = NULL; - - while (last_dest != dest->mnt_master) { - p_last_dest = last_dest; - p_last_src = last_src; - last_dest = last_dest->mnt_master; - last_src = last_src->mnt_master; + while (1) { + while (1) { + struct mount *next; + if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) + return first_slave(m); + next = next_peer(m); + if (m->mnt_group_id == origin->mnt_group_id) { + if (next == origin) + return NULL; + } else if (m->mnt_slave.next != &next->mnt_slave) + break; + m = next; + } + /* m is the last peer */ + while (1) { + struct mount *master = m->mnt_master; + if (m->mnt_slave.next != &master->mnt_slave_list) + return next_slave(m); + m = next_peer(master); + if (master->mnt_group_id == origin->mnt_group_id) + break; + if (master->mnt_slave.next == &m->mnt_slave) + break; + m = master; + } + if (m == origin) + return NULL; } +} - if (p_last_dest) { - do { - p_last_dest = next_peer(p_last_dest); - } while (IS_MNT_NEW(p_last_dest)); - /* is that a peer of the earlier? */ - if (dest == p_last_dest) { - *type = CL_MAKE_SHARED; - return p_last_src; +/* all accesses are serialized by namespace_sem */ +static struct user_namespace *user_ns; +static struct mount *last_dest, *last_source, *dest_master; +static struct mountpoint *mp; +static struct hlist_head *list; + +static int propagate_one(struct mount *m) +{ + struct mount *child; + int type; + /* skip ones added by this propagate_mnt() */ + if (IS_MNT_NEW(m)) + return 0; + /* skip if mountpoint isn't covered by it */ + if (!is_subdir(mp->m_dentry, m->mnt.mnt_root)) + return 0; + if (m->mnt_group_id == last_dest->mnt_group_id) { + type = CL_MAKE_SHARED; + } else { + struct mount *n, *p; + for (n = m; ; n = p) { + p = n->mnt_master; + if (p == dest_master || IS_MNT_MARKED(p)) { + while (last_dest->mnt_master != p) { + last_source = last_source->mnt_master; + last_dest = last_source->mnt_parent; + } + if (n->mnt_group_id != last_dest->mnt_group_id) { + last_source = last_source->mnt_master; + last_dest = last_source->mnt_parent; + } + break; + } } + type = CL_SLAVE; + /* beginning of peer group among the slaves? */ + if (IS_MNT_SHARED(m)) + type |= CL_MAKE_SHARED; } - /* slave of the earlier, then */ - *type = CL_SLAVE; - /* beginning of peer group among the slaves? */ - if (IS_MNT_SHARED(dest)) - *type |= CL_MAKE_SHARED; - return last_src; + + /* Notice when we are propagating across user namespaces */ + if (m->mnt_ns->user_ns != user_ns) + type |= CL_UNPRIVILEGED; + child = copy_tree(last_source, last_source->mnt.mnt_root, type); + if (IS_ERR(child)) + return PTR_ERR(child); + mnt_set_mountpoint(m, mp, child); + last_dest = m; + last_source = child; + if (m->mnt_master != dest_master) { + read_seqlock_excl(&mount_lock); + SET_MNT_MARK(m->mnt_master); + read_sequnlock_excl(&mount_lock); + } + hlist_add_head(&child->mnt_hash, list); + return 0; } /* @@ -222,56 +270,48 @@ static struct mount *get_source(struct mount *dest, int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, struct mount *source_mnt, struct hlist_head *tree_list) { - struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; - struct mount *m, *child; + struct mount *m, *n; int ret = 0; - struct mount *prev_dest_mnt = dest_mnt; - struct mount *prev_src_mnt = source_mnt; - HLIST_HEAD(tmp_list); - - for (m = propagation_next(dest_mnt, dest_mnt); m; - m = propagation_next(m, dest_mnt)) { - int type; - struct mount *source; - - if (IS_MNT_NEW(m)) - continue; - - source = get_source(m, prev_dest_mnt, prev_src_mnt, &type); - - /* Notice when we are propagating across user namespaces */ - if (m->mnt_ns->user_ns != user_ns) - type |= CL_UNPRIVILEGED; - - child = copy_tree(source, source->mnt.mnt_root, type); - if (IS_ERR(child)) { - ret = PTR_ERR(child); - tmp_list = *tree_list; - tmp_list.first->pprev = &tmp_list.first; - INIT_HLIST_HEAD(tree_list); + + /* + * we don't want to bother passing tons of arguments to + * propagate_one(); everything is serialized by namespace_sem, + * so globals will do just fine. + */ + user_ns = current->nsproxy->mnt_ns->user_ns; + last_dest = dest_mnt; + last_source = source_mnt; + mp = dest_mp; + list = tree_list; + dest_master = dest_mnt->mnt_master; + + /* all peers of dest_mnt, except dest_mnt itself */ + for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) { + ret = propagate_one(n); + if (ret) goto out; - } + } - if (is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) { - mnt_set_mountpoint(m, dest_mp, child); - hlist_add_head(&child->mnt_hash, tree_list); - } else { - /* - * This can happen if the parent mount was bind mounted - * on some subdirectory of a shared/slave mount. - */ - hlist_add_head(&child->mnt_hash, &tmp_list); - } - prev_dest_mnt = m; - prev_src_mnt = child; + /* all slave groups */ + for (m = next_group(dest_mnt, dest_mnt); m; + m = next_group(m, dest_mnt)) { + /* everything in that slave group */ + n = m; + do { + ret = propagate_one(n); + if (ret) + goto out; + n = next_peer(n); + } while (n != m); } out: - lock_mount_hash(); - while (!hlist_empty(&tmp_list)) { - child = hlist_entry(tmp_list.first, struct mount, mnt_hash); - umount_tree(child, 0); + read_seqlock_excl(&mount_lock); + hlist_for_each_entry(n, tree_list, mnt_hash) { + m = n->mnt_parent; + if (m->mnt_master != dest_mnt->mnt_master) + CLEAR_MNT_MARK(m->mnt_master); } - unlock_mount_hash(); + read_sequnlock_excl(&mount_lock); return ret; } diff --git a/fs/pnode.h b/fs/pnode.h index fc28a27fa892..4a246358b031 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -16,6 +16,9 @@ #define IS_MNT_NEW(m) (!(m)->mnt_ns) #define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED) #define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE) +#define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED) +#define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED) +#define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED) #define CL_EXPIRE 0x01 #define CL_SLAVE 0x02 diff --git a/include/linux/mount.h b/include/linux/mount.h index 371d346fa270..839bac270904 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -44,6 +44,8 @@ struct mnt_namespace; #define MNT_SHARED_MASK (MNT_UNBINDABLE) #define MNT_PROPAGATION_MASK (MNT_SHARED | MNT_UNBINDABLE) +#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \ + MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED) #define MNT_INTERNAL 0x4000 @@ -51,6 +53,7 @@ struct mnt_namespace; #define MNT_LOCKED 0x800000 #define MNT_DOOMED 0x1000000 #define MNT_SYNC_UMOUNT 0x2000000 +#define MNT_MARKED 0x4000000 struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ -- cgit v1.2.3-59-g8ed1b From e25115786ee540fc428a14872ebd4f56252aba32 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 5 Mar 2014 20:41:36 -0500 Subject: switch nbd to sockfd_lookup/sockfd_put Signed-off-by: Al Viro --- drivers/block/nbd.c | 48 +++++++++++++++++++----------------------------- include/linux/nbd.h | 3 +-- 2 files changed, 20 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 55298db36b2d..3a70ea2f7cd6 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -630,37 +630,29 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, } case NBD_CLEAR_SOCK: { - struct file *file; - + struct socket *sock = nbd->sock; nbd->sock = NULL; - file = nbd->file; - nbd->file = NULL; nbd_clear_que(nbd); BUG_ON(!list_empty(&nbd->queue_head)); BUG_ON(!list_empty(&nbd->waiting_queue)); kill_bdev(bdev); - if (file) - fput(file); + if (sock) + sockfd_put(sock); return 0; } case NBD_SET_SOCK: { - struct file *file; - if (nbd->file) + struct socket *sock; + int err; + if (nbd->sock) return -EBUSY; - file = fget(arg); - if (file) { - struct inode *inode = file_inode(file); - if (S_ISSOCK(inode->i_mode)) { - nbd->file = file; - nbd->sock = SOCKET_I(inode); - if (max_part > 0) - bdev->bd_invalidated = 1; - nbd->disconnect = 0; /* we're connected now */ - return 0; - } else { - fput(file); - } + sock = sockfd_lookup(arg, &err); + if (sock) { + nbd->sock = sock; + if (max_part > 0) + bdev->bd_invalidated = 1; + nbd->disconnect = 0; /* we're connected now */ + return 0; } return -EINVAL; } @@ -697,12 +689,12 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, case NBD_DO_IT: { struct task_struct *thread; - struct file *file; + struct socket *sock; int error; if (nbd->pid) return -EBUSY; - if (!nbd->file) + if (!nbd->sock) return -EINVAL; mutex_unlock(&nbd->tx_lock); @@ -731,15 +723,15 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, if (error) return error; sock_shutdown(nbd, 0); - file = nbd->file; - nbd->file = NULL; + sock = nbd->sock; + nbd->sock = NULL; nbd_clear_que(nbd); dev_warn(disk_to_dev(nbd->disk), "queue cleared\n"); kill_bdev(bdev); queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); set_device_ro(bdev, false); - if (file) - fput(file); + if (sock) + sockfd_put(sock); nbd->flags = 0; nbd->bytesize = 0; bdev->bd_inode->i_size = 0; @@ -875,9 +867,7 @@ static int __init nbd_init(void) for (i = 0; i < nbds_max; i++) { struct gendisk *disk = nbd_dev[i].disk; - nbd_dev[i].file = NULL; nbd_dev[i].magic = NBD_MAGIC; - nbd_dev[i].flags = 0; INIT_LIST_HEAD(&nbd_dev[i].waiting_queue); spin_lock_init(&nbd_dev[i].queue_lock); INIT_LIST_HEAD(&nbd_dev[i].queue_head); diff --git a/include/linux/nbd.h b/include/linux/nbd.h index ae4981ebd18e..f62f78aef4ac 100644 --- a/include/linux/nbd.h +++ b/include/linux/nbd.h @@ -24,8 +24,7 @@ struct request; struct nbd_device { int flags; int harderror; /* Code of hard error */ - struct socket * sock; - struct file * file; /* If == NULL, device is not ready, yet */ + struct socket * sock; /* If == NULL, device is not ready, yet */ int magic; spinlock_t queue_lock; -- cgit v1.2.3-59-g8ed1b From 4597e695b8baa3e2620da89c7593be70cf20566b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Mar 2014 10:06:32 -0400 Subject: get rid of DEBUG_WRITECOUNT it only makes control flow in __fput() and friends more convoluted. Signed-off-by: Al Viro --- arch/powerpc/configs/ppc6xx_defconfig | 1 - arch/powerpc/configs/ps3_defconfig | 1 - arch/s390/configs/default_defconfig | 1 - arch/sh/configs/rsk7203_defconfig | 1 - arch/xtensa/configs/iss_defconfig | 1 - arch/xtensa/configs/s6105_defconfig | 1 - fs/file_table.c | 5 ---- fs/open.c | 8 ------ include/linux/fs.h | 49 ----------------------------------- lib/Kconfig.debug | 10 ------- 10 files changed, 78 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index c2353bf059fd..175a8b99c196 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -1244,7 +1244,6 @@ CONFIG_DEBUG_SPINLOCK_SLEEP=y CONFIG_DEBUG_HIGHMEM=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_VM=y -CONFIG_DEBUG_WRITECOUNT=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 139a8308070c..fdee37fab81c 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -174,7 +174,6 @@ CONFIG_DETECT_HUNG_TASK=y CONFIG_PROVE_LOCKING=y CONFIG_DEBUG_LOCKDEP=y CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_WRITECOUNT=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEBUG_LIST=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index e0af2ee58751..3f538468a86e 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -550,7 +550,6 @@ CONFIG_LOCK_STAT=y CONFIG_DEBUG_LOCKDEP=y CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DEBUG_LOCKING_API_SELFTESTS=y -CONFIG_DEBUG_WRITECOUNT=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y diff --git a/arch/sh/configs/rsk7203_defconfig b/arch/sh/configs/rsk7203_defconfig index 4e5229b0c5bb..47236573db83 100644 --- a/arch/sh/configs/rsk7203_defconfig +++ b/arch/sh/configs/rsk7203_defconfig @@ -128,7 +128,6 @@ CONFIG_DEBUG_MUTEXES=y CONFIG_DEBUG_SPINLOCK_SLEEP=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_VM=y -CONFIG_DEBUG_WRITECOUNT=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y CONFIG_FRAME_POINTER=y diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig index 4f233204faf9..711f8aa14743 100644 --- a/arch/xtensa/configs/iss_defconfig +++ b/arch/xtensa/configs/iss_defconfig @@ -627,7 +627,6 @@ CONFIG_SCHED_DEBUG=y # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set -# CONFIG_DEBUG_WRITECOUNT is not set # CONFIG_DEBUG_MEMORY_INIT is not set # CONFIG_DEBUG_LIST is not set # CONFIG_DEBUG_SG is not set diff --git a/arch/xtensa/configs/s6105_defconfig b/arch/xtensa/configs/s6105_defconfig index d929f77a0360..78318a76fa16 100644 --- a/arch/xtensa/configs/s6105_defconfig +++ b/arch/xtensa/configs/s6105_defconfig @@ -569,7 +569,6 @@ CONFIG_DEBUG_SPINLOCK_SLEEP=y # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set CONFIG_DEBUG_NOMMU_REGIONS=y -# CONFIG_DEBUG_WRITECOUNT is not set # CONFIG_DEBUG_MEMORY_INIT is not set # CONFIG_DEBUG_LIST is not set # CONFIG_DEBUG_SG is not set diff --git a/fs/file_table.c b/fs/file_table.c index 79ecae62209a..ee20658a0647 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -52,7 +52,6 @@ static void file_free_rcu(struct rcu_head *head) static inline void file_free(struct file *f) { percpu_counter_dec(&nr_files); - file_check_state(f); call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); } @@ -186,7 +185,6 @@ struct file *alloc_file(struct path *path, fmode_t mode, * that we can do debugging checks at __fput() */ if ((mode & FMODE_WRITE) && !special_file(path->dentry->d_inode->i_mode)) { - file_take_write(file); WARN_ON(mnt_clone_write(path->mnt)); } if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) @@ -213,10 +211,7 @@ static void drop_file_write_access(struct file *file) return; put_write_access(inode); - if (file_check_writeable(file) != 0) - return; __mnt_drop_write(mnt); - file_release_write(file); } /* the real guts of fput() - releasing the last reference to file diff --git a/fs/open.c b/fs/open.c index 2ed7325f713e..8d0b6adfe7b8 100644 --- a/fs/open.c +++ b/fs/open.c @@ -683,7 +683,6 @@ static int do_dentry_open(struct file *f, error = __get_file_write_access(inode, f->f_path.mnt); if (error) goto cleanup_file; - file_take_write(f); } f->f_mapping = inode->i_mapping; @@ -731,14 +730,7 @@ cleanup_all: fops_put(f->f_op); if (f->f_mode & FMODE_WRITE) { if (!special_file(inode->i_mode)) { - /* - * We don't consider this a real - * mnt_want/drop_write() pair - * because it all happenend right - * here, so just reset the state. - */ put_write_access(inode); - file_reset_write(f); __mnt_drop_write(f->f_path.mnt); } } diff --git a/include/linux/fs.h b/include/linux/fs.h index 23b2a35d712e..e80659ed78fc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -769,9 +769,6 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) index < ra->start + ra->size); } -#define FILE_MNT_WRITE_TAKEN 1 -#define FILE_MNT_WRITE_RELEASED 2 - struct file { union { struct llist_node fu_llist; @@ -809,9 +806,6 @@ struct file { struct list_head f_tfile_llink; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; -#ifdef CONFIG_DEBUG_WRITECOUNT - unsigned long f_mnt_write_state; -#endif } __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ struct file_handle { @@ -829,49 +823,6 @@ static inline struct file *get_file(struct file *f) #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) #define file_count(x) atomic_long_read(&(x)->f_count) -#ifdef CONFIG_DEBUG_WRITECOUNT -static inline void file_take_write(struct file *f) -{ - WARN_ON(f->f_mnt_write_state != 0); - f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN; -} -static inline void file_release_write(struct file *f) -{ - f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED; -} -static inline void file_reset_write(struct file *f) -{ - f->f_mnt_write_state = 0; -} -static inline void file_check_state(struct file *f) -{ - /* - * At this point, either both or neither of these bits - * should be set. - */ - WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN); - WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED); -} -static inline int file_check_writeable(struct file *f) -{ - if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN) - return 0; - printk(KERN_WARNING "writeable file with no " - "mnt_want_write()\n"); - WARN_ON(1); - return -EINVAL; -} -#else /* !CONFIG_DEBUG_WRITECOUNT */ -static inline void file_take_write(struct file *filp) {} -static inline void file_release_write(struct file *filp) {} -static inline void file_reset_write(struct file *filp) {} -static inline void file_check_state(struct file *filp) {} -static inline int file_check_writeable(struct file *filp) -{ - return 0; -} -#endif /* CONFIG_DEBUG_WRITECOUNT */ - #define MAX_NON_LFS ((1UL<<31) - 1) /* Page cache limit. The filesystems should put that into their s_maxbytes diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a48abeac753f..7a0859314bdf 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1030,16 +1030,6 @@ config DEBUG_BUGVERBOSE of the BUG call as well as the EIP and oops trace. This aids debugging but costs about 70-100K of memory. -config DEBUG_WRITECOUNT - bool "Debug filesystem writers count" - depends on DEBUG_KERNEL - help - Enable this to catch wrong use of the writers count in struct - vfsmount. This will increase the size of each file struct by - 32 bits. - - If unsure, say N. - config DEBUG_LIST bool "Debug linked list manipulation" depends on DEBUG_KERNEL -- cgit v1.2.3-59-g8ed1b From 83f936c75e3689a63253d89c47a4d239c56d7410 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Mar 2014 12:02:47 -0400 Subject: mark struct file that had write access grabbed by open() new flag in ->f_mode - FMODE_WRITER. Set by do_dentry_open() in case when it has grabbed write access, checked by __fput() to decide whether it wants to drop the sucker. Allows to stop bothering with mnt_clone_write() in alloc_file(), along with fewer special_file() checks. Signed-off-by: Al Viro --- fs/file_table.c | 37 ++++--------------------------------- fs/namespace.c | 4 +--- fs/open.c | 9 ++++----- include/linux/fs.h | 2 ++ 4 files changed, 11 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/fs/file_table.c b/fs/file_table.c index ee20658a0647..ce1504fec5a1 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -177,43 +177,12 @@ struct file *alloc_file(struct path *path, fmode_t mode, file->f_mapping = path->dentry->d_inode->i_mapping; file->f_mode = mode; file->f_op = fop; - - /* - * These mounts don't really matter in practice - * for r/o bind mounts. They aren't userspace- - * visible. We do this for consistency, and so - * that we can do debugging checks at __fput() - */ - if ((mode & FMODE_WRITE) && !special_file(path->dentry->d_inode->i_mode)) { - WARN_ON(mnt_clone_write(path->mnt)); - } if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_inc(path->dentry->d_inode); return file; } EXPORT_SYMBOL(alloc_file); -/** - * drop_file_write_access - give up ability to write to a file - * @file: the file to which we will stop writing - * - * This is a central place which will give up the ability - * to write to @file, along with access to write through - * its vfsmount. - */ -static void drop_file_write_access(struct file *file) -{ - struct vfsmount *mnt = file->f_path.mnt; - struct dentry *dentry = file->f_path.dentry; - struct inode *inode = dentry->d_inode; - - if (special_file(inode->i_mode)) - return; - - put_write_access(inode); - __mnt_drop_write(mnt); -} - /* the real guts of fput() - releasing the last reference to file */ static void __fput(struct file *file) @@ -248,8 +217,10 @@ static void __fput(struct file *file) put_pid(file->f_owner.pid); if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_dec(inode); - if (file->f_mode & FMODE_WRITE) - drop_file_write_access(file); + if (file->f_mode & FMODE_WRITER) { + put_write_access(inode); + __mnt_drop_write(mnt); + } file->f_path.dentry = NULL; file->f_path.mnt = NULL; file->f_inode = NULL; diff --git a/fs/namespace.c b/fs/namespace.c index a66aff5bd3fe..20e8696c31a7 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -414,9 +414,7 @@ EXPORT_SYMBOL_GPL(mnt_clone_write); */ int __mnt_want_write_file(struct file *file) { - struct inode *inode = file_inode(file); - - if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode)) + if (!(file->f_mode & FMODE_WRITER)) return __mnt_want_write(file->f_path.mnt); else return mnt_clone_write(file->f_path.mnt); diff --git a/fs/open.c b/fs/open.c index ebef0c5fa10c..dcefb2f02d10 100644 --- a/fs/open.c +++ b/fs/open.c @@ -670,6 +670,7 @@ static int do_dentry_open(struct file *f, put_write_access(inode); goto cleanup_file; } + f->f_mode |= FMODE_WRITER; } f->f_mapping = inode->i_mapping; @@ -715,11 +716,9 @@ static int do_dentry_open(struct file *f, cleanup_all: fops_put(f->f_op); - if (f->f_mode & FMODE_WRITE) { - if (!special_file(inode->i_mode)) { - put_write_access(inode); - __mnt_drop_write(f->f_path.mnt); - } + if (f->f_mode & FMODE_WRITER) { + put_write_access(inode); + __mnt_drop_write(f->f_path.mnt); } cleanup_file: path_put(&f->f_path); diff --git a/include/linux/fs.h b/include/linux/fs.h index e80659ed78fc..d9d88a0b456e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -125,6 +125,8 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File needs atomic accesses to f_pos */ #define FMODE_ATOMIC_POS ((__force fmode_t)0x8000) +/* Write access to underlying fs */ +#define FMODE_WRITER ((__force fmode_t)0x10000) /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x1000000) -- cgit v1.2.3-59-g8ed1b From 7f4b36f9bb930b3b2105a9a2cb0121fa7028c432 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Mar 2014 12:45:29 -0400 Subject: get rid of files_defer_init() the only thing it's doing these days is calculation of upper limit for fs.nr_open sysctl and that can be done statically Signed-off-by: Al Viro --- fs/file.c | 11 ++++------- fs/file_table.c | 1 - include/linux/fdtable.h | 2 -- 3 files changed, 4 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/file.c b/fs/file.c index eb56a13dab3e..682103b95f8f 100644 --- a/fs/file.c +++ b/fs/file.c @@ -25,7 +25,10 @@ int sysctl_nr_open __read_mostly = 1024*1024; int sysctl_nr_open_min = BITS_PER_LONG; -int sysctl_nr_open_max = 1024 * 1024; /* raised later */ +/* our max() is unusable in constant expressions ;-/ */ +#define __const_max(x, y) ((x) < (y) ? (x) : (y)) +int sysctl_nr_open_max = __const_max(INT_MAX, ~(size_t)0/sizeof(void *)) & + -BITS_PER_LONG; static void *alloc_fdmem(size_t size) { @@ -429,12 +432,6 @@ void exit_files(struct task_struct *tsk) } } -void __init files_defer_init(void) -{ - sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) & - -BITS_PER_LONG; -} - struct files_struct init_files = { .count = ATOMIC_INIT(1), .fdt = &init_files.fdtab, diff --git a/fs/file_table.c b/fs/file_table.c index ce1504fec5a1..718e8e5224f8 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -325,6 +325,5 @@ void __init files_init(unsigned long mempages) n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = max_t(unsigned long, n, NR_FILE); - files_defer_init(); percpu_counter_init(&nr_files, 0); } diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 70e8e21c0a30..230f87bdf5ad 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -63,8 +63,6 @@ struct file_operations; struct vfsmount; struct dentry; -extern void __init files_defer_init(void); - #define rcu_dereference_check_fdtable(files, fdtfd) \ rcu_dereference_check((fdtfd), lockdep_is_held(&(files)->file_lock)) -- cgit v1.2.3-59-g8ed1b From 5d826c847b34de6415b4f1becd88a57ff619af50 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Mar 2014 13:42:45 -0400 Subject: new helper: readlink_copy() Signed-off-by: Al Viro --- fs/namei.c | 13 +++++-------- fs/proc/namespaces.c | 14 ++++---------- fs/proc/self.c | 2 +- fs/xfs/xfs_ioctl.c | 28 +--------------------------- include/linux/fs.h | 2 +- 5 files changed, 12 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index 617de9e9967b..4fb52f0ca5cb 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4297,11 +4297,9 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname); } -int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) +int readlink_copy(char __user *buffer, int buflen, const char *link) { - int len; - - len = PTR_ERR(link); + int len = PTR_ERR(link); if (IS_ERR(link)) goto out; @@ -4313,7 +4311,7 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const c out: return len; } -EXPORT_SYMBOL(vfs_readlink); +EXPORT_SYMBOL(readlink_copy); /* * A helper for ->readlink(). This should be used *ONLY* for symlinks that @@ -4331,7 +4329,7 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) if (IS_ERR(cookie)) return PTR_ERR(cookie); - res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); + res = readlink_copy(buffer, buflen, nd_get_link(&nd)); if (dentry->d_inode->i_op->put_link) dentry->d_inode->i_op->put_link(dentry, &nd, cookie); return res; @@ -4356,8 +4354,7 @@ static char *page_getlink(struct dentry * dentry, struct page **ppage) int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) { struct page *page = NULL; - char *s = page_getlink(dentry, &page); - int res = vfs_readlink(dentry,buffer,buflen,s); + int res = readlink_copy(buffer, buflen, page_getlink(dentry, &page)); if (page) { kunmap(page); page_cache_release(page); diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 9ae46b87470d..89026095f2b5 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -146,7 +146,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl struct task_struct *task; void *ns; char name[50]; - int len = -EACCES; + int res = -EACCES; task = get_proc_task(inode); if (!task) @@ -155,24 +155,18 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl if (!ptrace_may_access(task, PTRACE_MODE_READ)) goto out_put_task; - len = -ENOENT; + res = -ENOENT; ns = ns_ops->get(task); if (!ns) goto out_put_task; snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns)); - len = strlen(name); - - if (len > buflen) - len = buflen; - if (copy_to_user(buffer, name, len)) - len = -EFAULT; - + res = readlink_copy(buffer, buflen, name); ns_ops->put(ns); out_put_task: put_task_struct(task); out: - return len; + return res; } static const struct inode_operations proc_ns_link_inode_operations = { diff --git a/fs/proc/self.c b/fs/proc/self.c index ffeb202ec942..4348bb8907c2 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -16,7 +16,7 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer, if (!tgid) return -ENOENT; sprintf(tmp, "%d", tgid); - return vfs_readlink(dentry,buffer,buflen,tmp); + return readlink_copy(buffer, buflen, tmp); } static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index bcfe61202115..0b18776b075e 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -271,32 +271,6 @@ xfs_open_by_handle( return error; } -/* - * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's - * unused first argument. - */ -STATIC int -do_readlink( - char __user *buffer, - int buflen, - const char *link) -{ - int len; - - len = PTR_ERR(link); - if (IS_ERR(link)) - goto out; - - len = strlen(link); - if (len > (unsigned) buflen) - len = buflen; - if (copy_to_user(buffer, link, len)) - len = -EFAULT; - out: - return len; -} - - int xfs_readlink_by_handle( struct file *parfilp, @@ -334,7 +308,7 @@ xfs_readlink_by_handle( error = -xfs_readlink(XFS_I(dentry->d_inode), link); if (error) goto out_kfree; - error = do_readlink(hreq->ohandle, olen, link); + error = readlink_copy(hreq->ohandle, olen, link); if (error) goto out_kfree; diff --git a/include/linux/fs.h b/include/linux/fs.h index d9d88a0b456e..db181b542db1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2517,7 +2517,7 @@ extern const struct file_operations generic_ro_fops; #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) -extern int vfs_readlink(struct dentry *, char __user *, int, const char *); +extern int readlink_copy(char __user *, int, const char *); extern int page_readlink(struct dentry *, char __user *, int); extern void *page_follow_link_light(struct dentry *, struct nameidata *); extern void page_put_link(struct dentry *, struct nameidata *, void *); -- cgit v1.2.3-59-g8ed1b From fbb32750a62df75d1ffea547f3908b21c5496d9f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 2 Feb 2014 21:09:54 -0500 Subject: pipe: kill ->map() and ->unmap() all pipe_buffer_operations have the same instances of those... Signed-off-by: Al Viro --- drivers/char/virtio_console.c | 4 +-- fs/fuse/dev.c | 6 ++-- fs/pipe.c | 70 ++++++++++--------------------------------- fs/splice.c | 24 +++++---------- include/linux/pipe_fs_i.h | 19 ------------ kernel/relay.c | 2 -- kernel/trace/trace.c | 4 --- 7 files changed, 29 insertions(+), 100 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 6928d094451d..60aafb8a1f2e 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -901,9 +901,9 @@ static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf, if (len + offset > PAGE_SIZE) len = PAGE_SIZE - offset; - src = buf->ops->map(pipe, buf, 1); + src = kmap_atomic(buf->page); memcpy(page_address(page) + offset, src + buf->offset, len); - buf->ops->unmap(pipe, buf, src); + kunmap_atomic(src); sg_set_page(&(sgl->sg[sgl->n]), page, len, offset); } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index f90af6fe6884..aac71ce373e4 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -667,7 +667,7 @@ static void fuse_copy_finish(struct fuse_copy_state *cs) struct pipe_buffer *buf = cs->currbuf; if (!cs->write) { - buf->ops->unmap(cs->pipe, buf, cs->mapaddr); + kunmap_atomic(cs->mapaddr); } else { kunmap_atomic(cs->mapaddr); buf->len = PAGE_SIZE - cs->len; @@ -706,7 +706,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) BUG_ON(!cs->nr_segs); cs->currbuf = buf; - cs->mapaddr = buf->ops->map(cs->pipe, buf, 1); + cs->mapaddr = kmap_atomic(buf->page); cs->len = buf->len; cs->buf = cs->mapaddr + buf->offset; cs->pipebufs++; @@ -874,7 +874,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) out_fallback_unlock: unlock_page(newpage); out_fallback: - cs->mapaddr = buf->ops->map(cs->pipe, buf, 1); + cs->mapaddr = kmap_atomic(buf->page); cs->buf = cs->mapaddr + buf->offset; err = lock_request(cs->fc, cs->req); diff --git a/fs/pipe.c b/fs/pipe.c index 78fd0d0788db..6679c95eb4c3 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -225,52 +225,6 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe, page_cache_release(page); } -/** - * generic_pipe_buf_map - virtually map a pipe buffer - * @pipe: the pipe that the buffer belongs to - * @buf: the buffer that should be mapped - * @atomic: whether to use an atomic map - * - * Description: - * This function returns a kernel virtual address mapping for the - * pipe_buffer passed in @buf. If @atomic is set, an atomic map is provided - * and the caller has to be careful not to fault before calling - * the unmap function. - * - * Note that this function calls kmap_atomic() if @atomic != 0. - */ -void *generic_pipe_buf_map(struct pipe_inode_info *pipe, - struct pipe_buffer *buf, int atomic) -{ - if (atomic) { - buf->flags |= PIPE_BUF_FLAG_ATOMIC; - return kmap_atomic(buf->page); - } - - return kmap(buf->page); -} -EXPORT_SYMBOL(generic_pipe_buf_map); - -/** - * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer - * @pipe: the pipe that the buffer belongs to - * @buf: the buffer that should be unmapped - * @map_data: the data that the mapping function returned - * - * Description: - * This function undoes the mapping that ->map() provided. - */ -void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, - struct pipe_buffer *buf, void *map_data) -{ - if (buf->flags & PIPE_BUF_FLAG_ATOMIC) { - buf->flags &= ~PIPE_BUF_FLAG_ATOMIC; - kunmap_atomic(map_data); - } else - kunmap(buf->page); -} -EXPORT_SYMBOL(generic_pipe_buf_unmap); - /** * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer * @pipe: the pipe that the buffer belongs to @@ -351,8 +305,6 @@ EXPORT_SYMBOL(generic_pipe_buf_release); static const struct pipe_buf_operations anon_pipe_buf_ops = { .can_merge = 1, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, .confirm = generic_pipe_buf_confirm, .release = anon_pipe_buf_release, .steal = generic_pipe_buf_steal, @@ -361,8 +313,6 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = { static const struct pipe_buf_operations packet_pipe_buf_ops = { .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, .confirm = generic_pipe_buf_confirm, .release = anon_pipe_buf_release, .steal = generic_pipe_buf_steal, @@ -410,9 +360,15 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov, atomic = !iov_fault_in_pages_write(iov, chars); redo: - addr = ops->map(pipe, buf, atomic); + if (atomic) + addr = kmap_atomic(buf->page); + else + addr = kmap(buf->page); error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic); - ops->unmap(pipe, buf, addr); + if (atomic) + kunmap_atomic(addr); + else + kunmap(buf->page); if (unlikely(error)) { /* * Just retry with the slow path if we failed. @@ -538,10 +494,16 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov, iov_fault_in_pages_read(iov, chars); redo1: - addr = ops->map(pipe, buf, atomic); + if (atomic) + addr = kmap_atomic(buf->page); + else + addr = kmap(buf->page); error = pipe_iov_copy_from_user(offset + addr, iov, chars, atomic); - ops->unmap(pipe, buf, addr); + if (atomic) + kunmap_atomic(addr); + else + kunmap(buf->page); ret = error; do_wakeup = 1; if (error) { diff --git a/fs/splice.c b/fs/splice.c index 12028fa41def..ca3bfbd970f3 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -136,8 +136,6 @@ error: const struct pipe_buf_operations page_cache_pipe_buf_ops = { .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, .confirm = page_cache_pipe_buf_confirm, .release = page_cache_pipe_buf_release, .steal = page_cache_pipe_buf_steal, @@ -156,8 +154,6 @@ static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, static const struct pipe_buf_operations user_page_pipe_buf_ops = { .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, .confirm = generic_pipe_buf_confirm, .release = page_cache_pipe_buf_release, .steal = user_page_pipe_buf_steal, @@ -547,8 +543,6 @@ EXPORT_SYMBOL(generic_file_splice_read); static const struct pipe_buf_operations default_pipe_buf_ops = { .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, .confirm = generic_pipe_buf_confirm, .release = generic_pipe_buf_release, .steal = generic_pipe_buf_steal, @@ -564,8 +558,6 @@ static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe, /* Pipe buffer operations for a socket and similar. */ const struct pipe_buf_operations nosteal_pipe_buf_ops = { .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, .confirm = generic_pipe_buf_confirm, .release = generic_pipe_buf_release, .steal = generic_pipe_buf_nosteal, @@ -767,13 +759,13 @@ int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, goto out; if (buf->page != page) { - char *src = buf->ops->map(pipe, buf, 1); + char *src = kmap_atomic(buf->page); char *dst = kmap_atomic(page); memcpy(dst + offset, src + buf->offset, this_len); flush_dcache_page(page); kunmap_atomic(dst); - buf->ops->unmap(pipe, buf, src); + kunmap_atomic(src); } ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len, page, fsdata); @@ -1067,9 +1059,9 @@ static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf, void *data; loff_t tmp = sd->pos; - data = buf->ops->map(pipe, buf, 0); + data = kmap(buf->page); ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp); - buf->ops->unmap(pipe, buf, data); + kunmap(buf->page); return ret; } @@ -1536,10 +1528,10 @@ static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, * pages and doing an atomic copy */ if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) { - src = buf->ops->map(pipe, buf, 1); + src = kmap_atomic(buf->page); ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset, sd->len); - buf->ops->unmap(pipe, buf, src); + kunmap_atomic(src); if (!ret) { ret = sd->len; goto out; @@ -1549,13 +1541,13 @@ static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, /* * No dice, use slow non-atomic map and copy */ - src = buf->ops->map(pipe, buf, 0); + src = kmap(buf->page); ret = sd->len; if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len)) ret = -EFAULT; - buf->ops->unmap(pipe, buf, src); + kunmap(buf->page); out: if (ret > 0) sd->u.userptr += ret; diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index ab5752692113..6dffcebe6105 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -82,23 +82,6 @@ struct pipe_buf_operations { */ int can_merge; - /* - * ->map() returns a virtual address mapping of the pipe buffer. - * The last integer flag reflects whether this should be an atomic - * mapping or not. The atomic map is faster, however you can't take - * page faults before calling ->unmap() again. So if you need to eg - * access user data through copy_to/from_user(), then you must get - * a non-atomic map. ->map() uses the kmap_atomic slot for - * atomic maps, you have to be careful if mapping another page as - * source or destination for a copy. - */ - void * (*map)(struct pipe_inode_info *, struct pipe_buffer *, int); - - /* - * Undoes ->map(), finishes the virtual mapping of the pipe buffer. - */ - void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *, void *); - /* * ->confirm() verifies that the data in the pipe buffer is there * and that the contents are good. If the pages in the pipe belong @@ -150,8 +133,6 @@ struct pipe_inode_info *alloc_pipe_info(void); void free_pipe_info(struct pipe_inode_info *); /* Generic pipe buffer ops functions */ -void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *, int); -void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void *); void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); diff --git a/kernel/relay.c b/kernel/relay.c index 5001c9887db1..98833f664fb6 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1195,8 +1195,6 @@ static void relay_pipe_buf_release(struct pipe_inode_info *pipe, static const struct pipe_buf_operations relay_pipe_buf_ops = { .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, .confirm = generic_pipe_buf_confirm, .release = relay_pipe_buf_release, .steal = generic_pipe_buf_steal, diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 24c1f2382557..7511de35257f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4316,8 +4316,6 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd, static const struct pipe_buf_operations tracing_pipe_buf_ops = { .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, .confirm = generic_pipe_buf_confirm, .release = generic_pipe_buf_release, .steal = generic_pipe_buf_steal, @@ -5194,8 +5192,6 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, /* Pipe buffer operations for a buffer. */ static const struct pipe_buf_operations buffer_pipe_buf_ops = { .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, .confirm = generic_pipe_buf_confirm, .release = buffer_pipe_buf_release, .steal = generic_pipe_buf_steal, -- cgit v1.2.3-59-g8ed1b From c186afb4dbd0050a537b96c7fbee2dba3b57fc38 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 2 Feb 2014 21:16:54 -0500 Subject: switch ->is_partially_uptodate() to saner arguments Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 2 +- Documentation/filesystems/vfs.txt | 2 +- fs/buffer.c | 6 +++--- include/linux/buffer_head.h | 4 ++-- include/linux/fs.h | 2 +- mm/filemap.c | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 5b0c083d7c0e..bb2534bc0b03 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -198,7 +198,7 @@ prototypes: unsigned long *); int (*migratepage)(struct address_space *, struct page *, struct page *); int (*launder_page)(struct page *); - int (*is_partially_uptodate)(struct page *, read_descriptor_t *, unsigned long); + int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long); int (*error_remove_page)(struct address_space *, struct page *); int (*swap_activate)(struct file *); int (*swap_deactivate)(struct file *); diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index c53784c119c8..419e7348c481 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -580,7 +580,7 @@ struct address_space_operations { /* migrate the contents of a page to the specified target */ int (*migratepage) (struct page *, struct page *); int (*launder_page) (struct page *); - int (*is_partially_uptodate) (struct page *, read_descriptor_t *, + int (*is_partially_uptodate) (struct page *, unsigned long, unsigned long); void (*is_dirty_writeback) (struct page *, bool *, bool *); int (*error_remove_page) (struct mapping *mapping, struct page *page); diff --git a/fs/buffer.c b/fs/buffer.c index 27265a8b43c1..027ae3bdfbbd 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2114,8 +2114,8 @@ EXPORT_SYMBOL(generic_write_end); * Returns true if all buffers which correspond to a file portion * we want to read are uptodate. */ -int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, - unsigned long from) +int block_is_partially_uptodate(struct page *page, unsigned long from, + unsigned long count) { unsigned block_start, block_end, blocksize; unsigned to; @@ -2127,7 +2127,7 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, head = page_buffers(page); blocksize = head->b_size; - to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); + to = min_t(unsigned, PAGE_CACHE_SIZE - from, count); to = from + to; if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) return 0; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index d77797a52b7b..c40302f909ce 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -210,8 +210,8 @@ int block_write_full_page(struct page *page, get_block_t *get_block, int block_write_full_page_endio(struct page *page, get_block_t *get_block, struct writeback_control *wbc, bh_end_io_t *handler); int block_read_full_page(struct page*, get_block_t*); -int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, - unsigned long from); +int block_is_partially_uptodate(struct page *page, unsigned long from, + unsigned long count); int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, get_block_t *get_block); int __block_write_begin(struct page *page, loff_t pos, unsigned len, diff --git a/include/linux/fs.h b/include/linux/fs.h index db181b542db1..ddfff2ecef0b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -385,7 +385,7 @@ struct address_space_operations { int (*migratepage) (struct address_space *, struct page *, struct page *, enum migrate_mode); int (*launder_page) (struct page *); - int (*is_partially_uptodate) (struct page *, read_descriptor_t *, + int (*is_partially_uptodate) (struct page *, unsigned long, unsigned long); void (*is_dirty_writeback) (struct page *, bool *, bool *); int (*error_remove_page)(struct address_space *, struct page *); diff --git a/mm/filemap.c b/mm/filemap.c index 7a13f6ac5421..46e98019af6c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1148,7 +1148,7 @@ find_page: if (!page->mapping) goto page_not_up_to_date_locked; if (!mapping->a_ops->is_partially_uptodate(page, - desc, offset)) + offset, desc->count)) goto page_not_up_to_date_locked; unlock_page(page); } -- cgit v1.2.3-59-g8ed1b From 9223687863ffa63fa655f52ef64148ee08dee4d1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 27 Nov 2013 16:29:46 -0800 Subject: iov_iter: Move iov_iter to uio.h Signed-off-by: Kent Overstreet --- include/linux/fs.h | 32 -------------------------------- include/linux/uio.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index ddfff2ecef0b..2261ac8f0534 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -295,38 +295,6 @@ struct page; struct address_space; struct writeback_control; -struct iov_iter { - const struct iovec *iov; - unsigned long nr_segs; - size_t iov_offset; - size_t count; -}; - -size_t iov_iter_copy_from_user_atomic(struct page *page, - struct iov_iter *i, unsigned long offset, size_t bytes); -size_t iov_iter_copy_from_user(struct page *page, - struct iov_iter *i, unsigned long offset, size_t bytes); -void iov_iter_advance(struct iov_iter *i, size_t bytes); -int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); -size_t iov_iter_single_seg_count(const struct iov_iter *i); - -static inline void iov_iter_init(struct iov_iter *i, - const struct iovec *iov, unsigned long nr_segs, - size_t count, size_t written) -{ - i->iov = iov; - i->nr_segs = nr_segs; - i->iov_offset = 0; - i->count = count + written; - - iov_iter_advance(i, written); -} - -static inline size_t iov_iter_count(struct iov_iter *i) -{ - return i->count; -} - /* * "descriptor" for what we're up to with a read. * This allows us to use the same read code yet diff --git a/include/linux/uio.h b/include/linux/uio.h index c55ce243cc09..347d70ce098e 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -9,14 +9,23 @@ #ifndef __LINUX_UIO_H #define __LINUX_UIO_H +#include #include +struct page; struct kvec { void *iov_base; /* and that should *never* hold a userland pointer */ size_t iov_len; }; +struct iov_iter { + const struct iovec *iov; + unsigned long nr_segs; + size_t iov_offset; + size_t count; +}; + /* * Total number of bytes covered by an iovec. * @@ -34,8 +43,49 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs) return ret; } +static inline struct iovec iov_iter_iovec(const struct iov_iter *iter) +{ + return (struct iovec) { + .iov_base = iter->iov->iov_base + iter->iov_offset, + .iov_len = min(iter->count, + iter->iov->iov_len - iter->iov_offset), + }; +} + +#define iov_for_each(iov, iter, start) \ + for (iter = (start); \ + (iter).count && \ + ((iov = iov_iter_iovec(&(iter))), 1); \ + iov_iter_advance(&(iter), (iov).iov_len)) + unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to); +size_t iov_iter_copy_from_user_atomic(struct page *page, + struct iov_iter *i, unsigned long offset, size_t bytes); +size_t iov_iter_copy_from_user(struct page *page, + struct iov_iter *i, unsigned long offset, size_t bytes); +void iov_iter_advance(struct iov_iter *i, size_t bytes); +int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); +size_t iov_iter_single_seg_count(const struct iov_iter *i); + +static inline void iov_iter_init(struct iov_iter *i, + const struct iovec *iov, unsigned long nr_segs, + size_t count, size_t written) +{ + i->iov = iov; + i->nr_segs = nr_segs; + i->iov_offset = 0; + i->count = count + written; + + iov_iter_advance(i, written); +} + +static inline size_t iov_iter_count(struct iov_iter *i) +{ + return i->count; +} + int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len); int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len); + #endif -- cgit v1.2.3-59-g8ed1b From 6e58e79db8a16222b31fc8da1ca2ac2dccfc4237 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 3 Feb 2014 17:07:03 -0500 Subject: introduce copy_page_to_iter, kill loop over iovec in generic_file_aio_read() generic_file_aio_read() was looping over the target iovec, with loop over (source) pages nested inside that. Just set an iov_iter up and pass *that* to do_generic_file_aio_read(). With copy_page_to_iter() doing all work of mapping and copying a page to iovec and advancing iov_iter. Switch shmem_file_aio_read() to the same and kill file_read_actor(), while we are at it. Signed-off-by: Al Viro --- include/linux/fs.h | 1 - include/linux/uio.h | 2 + mm/filemap.c | 202 ++++++++++++++++++++++++++++------------------------ mm/shmem.c | 77 +++++++------------- 4 files changed, 138 insertions(+), 144 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2261ac8f0534..2a5b1744f80a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2390,7 +2390,6 @@ extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr, unsigned long size, pgoff_t pgoff); -extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, diff --git a/include/linux/uio.h b/include/linux/uio.h index 347d70ce098e..199bcc34241b 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -67,6 +67,8 @@ size_t iov_iter_copy_from_user(struct page *page, void iov_iter_advance(struct iov_iter *i, size_t bytes); int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); size_t iov_iter_single_seg_count(const struct iov_iter *i); +size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i); static inline void iov_iter_init(struct iov_iter *i, const struct iovec *iov, unsigned long nr_segs, diff --git a/mm/filemap.c b/mm/filemap.c index bfb7a97d6d0f..a16eb2c4f316 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1085,11 +1085,90 @@ static void shrink_readahead_size_eio(struct file *filp, ra->ra_pages /= 4; } +size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + size_t skip, copy, left, wanted; + const struct iovec *iov; + char __user *buf; + void *kaddr, *from; + + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + wanted = bytes; + iov = i->iov; + skip = i->iov_offset; + buf = iov->iov_base + skip; + copy = min(bytes, iov->iov_len - skip); + + if (!fault_in_pages_writeable(buf, copy)) { + kaddr = kmap_atomic(page); + from = kaddr + offset; + + /* first chunk, usually the only one */ + left = __copy_to_user_inatomic(buf, from, copy); + copy -= left; + skip += copy; + from += copy; + bytes -= copy; + + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __copy_to_user_inatomic(buf, from, copy); + copy -= left; + skip = copy; + from += copy; + bytes -= copy; + } + if (likely(!bytes)) { + kunmap_atomic(kaddr); + goto done; + } + offset = from - kaddr; + buf += copy; + kunmap_atomic(kaddr); + copy = min(bytes, iov->iov_len - skip); + } + /* Too bad - revert to non-atomic kmap */ + kaddr = kmap(page); + from = kaddr + offset; + left = __copy_to_user(buf, from, copy); + copy -= left; + skip += copy; + from += copy; + bytes -= copy; + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __copy_to_user(buf, from, copy); + copy -= left; + skip = copy; + from += copy; + bytes -= copy; + } + kunmap(page); +done: + i->count -= wanted - bytes; + i->nr_segs -= iov - i->iov; + i->iov = iov; + i->iov_offset = skip; + return wanted - bytes; +} +EXPORT_SYMBOL(copy_page_to_iter); + /** * do_generic_file_read - generic file read routine * @filp: the file to read * @ppos: current file position - * @desc: read_descriptor + * @iter: data destination + * @written: already copied * * This is a generic file read routine, and uses the * mapping->a_ops->readpage() function for the actual low-level stuff. @@ -1097,8 +1176,8 @@ static void shrink_readahead_size_eio(struct file *filp, * This is really ugly. But the goto's actually try to clarify some * of the logic when it comes to error handling etc. */ -static void do_generic_file_read(struct file *filp, loff_t *ppos, - read_descriptor_t *desc) +static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos, + struct iov_iter *iter, ssize_t written) { struct address_space *mapping = filp->f_mapping; struct inode *inode = mapping->host; @@ -1108,12 +1187,12 @@ static void do_generic_file_read(struct file *filp, loff_t *ppos, pgoff_t prev_index; unsigned long offset; /* offset into pagecache page */ unsigned int prev_offset; - int error; + int error = 0; index = *ppos >> PAGE_CACHE_SHIFT; prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT; prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1); - last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; + last_index = (*ppos + iter->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; offset = *ppos & ~PAGE_CACHE_MASK; for (;;) { @@ -1148,7 +1227,7 @@ find_page: if (!page->mapping) goto page_not_up_to_date_locked; if (!mapping->a_ops->is_partially_uptodate(page, - offset, desc->count)) + offset, iter->count)) goto page_not_up_to_date_locked; unlock_page(page); } @@ -1198,24 +1277,23 @@ page_ok: /* * Ok, we have the page, and it's up-to-date, so * now we can copy it to user space... - * - * The file_read_actor routine returns how many bytes were - * actually used.. - * NOTE! This may not be the same as how much of a user buffer - * we filled up (we may be padding etc), so we can only update - * "pos" here (the actor routine has to update the user buffer - * pointers and the remaining count). */ - ret = file_read_actor(desc, page, offset, nr); + + ret = copy_page_to_iter(page, offset, nr, iter); offset += ret; index += offset >> PAGE_CACHE_SHIFT; offset &= ~PAGE_CACHE_MASK; prev_offset = offset; page_cache_release(page); - if (ret == nr && desc->count) - continue; - goto out; + written += ret; + if (!iov_iter_count(iter)) + goto out; + if (ret < nr) { + error = -EFAULT; + goto out; + } + continue; page_not_up_to_date: /* Get exclusive access to the page ... */ @@ -1250,6 +1328,7 @@ readpage: if (unlikely(error)) { if (error == AOP_TRUNCATED_PAGE) { page_cache_release(page); + error = 0; goto find_page; } goto readpage_error; @@ -1280,7 +1359,6 @@ readpage: readpage_error: /* UHHUH! A synchronous read error occurred. Report it */ - desc->error = error; page_cache_release(page); goto out; @@ -1291,16 +1369,17 @@ no_cached_page: */ page = page_cache_alloc_cold(mapping); if (!page) { - desc->error = -ENOMEM; + error = -ENOMEM; goto out; } error = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); if (error) { page_cache_release(page); - if (error == -EEXIST) + if (error == -EEXIST) { + error = 0; goto find_page; - desc->error = error; + } goto out; } goto readpage; @@ -1313,44 +1392,7 @@ out: *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset; file_accessed(filp); -} - -int file_read_actor(read_descriptor_t *desc, struct page *page, - unsigned long offset, unsigned long size) -{ - char *kaddr; - unsigned long left, count = desc->count; - - if (size > count) - size = count; - - /* - * Faults on the destination of a read are common, so do it before - * taking the kmap. - */ - if (!fault_in_pages_writeable(desc->arg.buf, size)) { - kaddr = kmap_atomic(page); - left = __copy_to_user_inatomic(desc->arg.buf, - kaddr + offset, size); - kunmap_atomic(kaddr); - if (left == 0) - goto success; - } - - /* Do it the slow way */ - kaddr = kmap(page); - left = __copy_to_user(desc->arg.buf, kaddr + offset, size); - kunmap(page); - - if (left) { - size -= left; - desc->error = -EFAULT; - } -success: - desc->count = count - size; - desc->written += size; - desc->arg.buf += size; - return size; + return written ? written : error; } /* @@ -1408,14 +1450,15 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, { struct file *filp = iocb->ki_filp; ssize_t retval; - unsigned long seg = 0; size_t count; loff_t *ppos = &iocb->ki_pos; + struct iov_iter i; count = 0; retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); if (retval) return retval; + iov_iter_init(&i, iov, nr_segs, count, 0); /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (filp->f_flags & O_DIRECT) { @@ -1437,6 +1480,11 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, if (retval > 0) { *ppos = pos + retval; count -= retval; + /* + * If we did a short DIO read we need to skip the + * section of the iov that we've already read data into. + */ + iov_iter_advance(&i, retval); } /* @@ -1453,39 +1501,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, } } - count = retval; - for (seg = 0; seg < nr_segs; seg++) { - read_descriptor_t desc; - loff_t offset = 0; - - /* - * If we did a short DIO read we need to skip the section of the - * iov that we've already read data into. - */ - if (count) { - if (count > iov[seg].iov_len) { - count -= iov[seg].iov_len; - continue; - } - offset = count; - count = 0; - } - - desc.written = 0; - desc.arg.buf = iov[seg].iov_base + offset; - desc.count = iov[seg].iov_len - offset; - if (desc.count == 0) - continue; - desc.error = 0; - do_generic_file_read(filp, ppos, &desc); - retval += desc.written; - if (desc.error) { - retval = retval ?: desc.error; - break; - } - if (desc.count > 0) - break; - } + retval = do_generic_file_read(filp, ppos, &i, retval); out: return retval; } diff --git a/mm/shmem.c b/mm/shmem.c index 9398e6cd48cb..17d3799d04bd 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1462,13 +1462,25 @@ shmem_write_end(struct file *file, struct address_space *mapping, return copied; } -static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc) +static ssize_t shmem_file_aio_read(struct kiocb *iocb, + const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - struct inode *inode = file_inode(filp); + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file); struct address_space *mapping = inode->i_mapping; pgoff_t index; unsigned long offset; enum sgp_type sgp = SGP_READ; + int error; + ssize_t retval; + size_t count; + loff_t *ppos = &iocb->ki_pos; + struct iov_iter iter; + + retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); + if (retval) + return retval; + iov_iter_init(&iter, iov, nr_segs, count, 0); /* * Might this read be for a stacking filesystem? Then when reading @@ -1496,10 +1508,10 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_ break; } - desc->error = shmem_getpage(inode, index, &page, sgp, NULL); - if (desc->error) { - if (desc->error == -EINVAL) - desc->error = 0; + error = shmem_getpage(inode, index, &page, sgp, NULL); + if (error) { + if (error == -EINVAL) + error = 0; break; } if (page) @@ -1543,61 +1555,26 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_ /* * Ok, we have the page, and it's up-to-date, so * now we can copy it to user space... - * - * The actor routine returns how many bytes were actually used.. - * NOTE! This may not be the same as how much of a user buffer - * we filled up (we may be padding etc), so we can only update - * "pos" here (the actor routine has to update the user buffer - * pointers and the remaining count). */ - ret = file_read_actor(desc, page, offset, nr); + ret = copy_page_to_iter(page, offset, nr, &iter); + retval += ret; offset += ret; index += offset >> PAGE_CACHE_SHIFT; offset &= ~PAGE_CACHE_MASK; page_cache_release(page); - if (ret != nr || !desc->count) + if (!iov_iter_count(&iter)) break; - + if (ret < nr) { + error = -EFAULT; + break; + } cond_resched(); } *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset; - file_accessed(filp); -} - -static ssize_t shmem_file_aio_read(struct kiocb *iocb, - const struct iovec *iov, unsigned long nr_segs, loff_t pos) -{ - struct file *filp = iocb->ki_filp; - ssize_t retval; - unsigned long seg; - size_t count; - loff_t *ppos = &iocb->ki_pos; - - retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); - if (retval) - return retval; - - for (seg = 0; seg < nr_segs; seg++) { - read_descriptor_t desc; - - desc.written = 0; - desc.arg.buf = iov[seg].iov_base; - desc.count = iov[seg].iov_len; - if (desc.count == 0) - continue; - desc.error = 0; - do_shmem_file_read(filp, ppos, &desc); - retval += desc.written; - if (desc.error) { - retval = retval ?: desc.error; - break; - } - if (desc.count > 0) - break; - } - return retval; + file_accessed(file); + return retval ? retval : error; } static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, -- cgit v1.2.3-59-g8ed1b From 86d564c84c38b1ec06d9f2120d6a7373dcaeff0c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 8 Feb 2014 20:42:52 -0500 Subject: constify blk_rq_map_user_iov() and friends sg_iovec array passed to it can be const Signed-off-by: Al Viro --- block/blk-map.c | 2 +- fs/bio.c | 10 +++++----- include/linux/bio.h | 5 +++-- include/linux/blkdev.h | 4 ++-- 4 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/block/blk-map.c b/block/blk-map.c index ae4ae1047fd9..86d93779c066 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -188,7 +188,7 @@ EXPORT_SYMBOL(blk_rq_map_user); * unmapping. */ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, - struct rq_map_data *map_data, struct sg_iovec *iov, + struct rq_map_data *map_data, const struct sg_iovec *iov, int iov_count, unsigned int len, gfp_t gfp_mask) { struct bio *bio; diff --git a/fs/bio.c b/fs/bio.c index 8754e7b6eb49..7065837ae9f3 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -1003,7 +1003,7 @@ struct bio_map_data { }; static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, - struct sg_iovec *iov, int iov_count, + const struct sg_iovec *iov, int iov_count, int is_our_pages) { memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); @@ -1023,7 +1023,7 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, sizeof(struct sg_iovec) * iov_count, gfp_mask); } -static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, +static int __bio_copy_iov(struct bio *bio, const struct sg_iovec *iov, int iov_count, int to_user, int from_user, int do_free_page) { int ret = 0, i; @@ -1121,7 +1121,7 @@ EXPORT_SYMBOL(bio_uncopy_user); */ struct bio *bio_copy_user_iov(struct request_queue *q, struct rq_map_data *map_data, - struct sg_iovec *iov, int iov_count, + const struct sg_iovec *iov, int iov_count, int write_to_vm, gfp_t gfp_mask) { struct bio_map_data *bmd; @@ -1260,7 +1260,7 @@ EXPORT_SYMBOL(bio_copy_user); static struct bio *__bio_map_user_iov(struct request_queue *q, struct block_device *bdev, - struct sg_iovec *iov, int iov_count, + const struct sg_iovec *iov, int iov_count, int write_to_vm, gfp_t gfp_mask) { int i, j; @@ -1408,7 +1408,7 @@ EXPORT_SYMBOL(bio_map_user); * device. Returns an error pointer in case of error. */ struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, - struct sg_iovec *iov, int iov_count, + const struct sg_iovec *iov, int iov_count, int write_to_vm, gfp_t gfp_mask) { struct bio *bio; diff --git a/include/linux/bio.h b/include/linux/bio.h index 5a4d39b4686b..21e27208316c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -388,7 +388,7 @@ struct sg_iovec; struct rq_map_data; extern struct bio *bio_map_user_iov(struct request_queue *, struct block_device *, - struct sg_iovec *, int, int, gfp_t); + const struct sg_iovec *, int, int, gfp_t); extern void bio_unmap_user(struct bio *); extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, gfp_t); @@ -414,7 +414,8 @@ extern int bio_alloc_pages(struct bio *bio, gfp_t gfp); extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *, unsigned long, unsigned int, int, gfp_t); extern struct bio *bio_copy_user_iov(struct request_queue *, - struct rq_map_data *, struct sg_iovec *, + struct rq_map_data *, + const struct sg_iovec *, int, int, gfp_t); extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4afa4f8f6090..a639fd8a6d7b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -823,8 +823,8 @@ extern int blk_rq_map_user(struct request_queue *, struct request *, extern int blk_rq_unmap_user(struct bio *); extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); extern int blk_rq_map_user_iov(struct request_queue *, struct request *, - struct rq_map_data *, struct sg_iovec *, int, - unsigned int, gfp_t); + struct rq_map_data *, const struct sg_iovec *, + int, unsigned int, gfp_t); extern int blk_execute_rq(struct request_queue *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, -- cgit v1.2.3-59-g8ed1b From 41fc56d573c35a212688b12b48af8c303f9bb6d2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 9 Feb 2014 12:58:52 -0500 Subject: kill the 4th argument of __generic_file_aio_write() It's always equal to &iocb->ki_pos, where iocb is the value of the 1st argument. Signed-off-by: Al Viro --- fs/block_dev.c | 2 +- fs/ext4/file.c | 2 +- fs/udf/file.c | 2 +- include/linux/fs.h | 3 +-- mm/filemap.c | 13 ++++++------- 5 files changed, 10 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/fs/block_dev.c b/fs/block_dev.c index 1e86823a9cbd..764bd3b8d2fa 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1518,7 +1518,7 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, BUG_ON(iocb->ki_pos != pos); blk_start_plug(&plug); - ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); + ret = __generic_file_aio_write(iocb, iov, nr_segs); if (ret > 0) { ssize_t err; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 1a5073959f32..d564bcfb23c5 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -146,7 +146,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, overwrite = 1; } - ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); + ret = __generic_file_aio_write(iocb, iov, nr_segs); mutex_unlock(&inode->i_mutex); if (ret > 0) { diff --git a/fs/udf/file.c b/fs/udf/file.c index 1037637957c7..d2c170f8b035 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -171,7 +171,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, } else up_write(&iinfo->i_data_sem); - retval = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); + retval = __generic_file_aio_write(iocb, iov, nr_segs); mutex_unlock(&inode->i_mutex); if (retval > 0) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 2a5b1744f80a..e677d1e1189f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2392,8 +2392,7 @@ extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr, unsigned long size, pgoff_t pgoff); int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); -extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, - loff_t *); +extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long); extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, unsigned long *, loff_t, loff_t *, size_t, size_t); diff --git a/mm/filemap.c b/mm/filemap.c index c4730efa5d9e..ce2246dd90de 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2222,14 +2222,14 @@ EXPORT_SYMBOL(generic_file_buffered_write); * avoid syncing under i_mutex. */ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) + unsigned long nr_segs) { struct file *file = iocb->ki_filp; struct address_space * mapping = file->f_mapping; size_t ocount; /* original count */ size_t count; /* after file limit checks */ struct inode *inode = mapping->host; - loff_t pos; + loff_t pos = iocb->ki_pos; ssize_t written; ssize_t err; @@ -2239,7 +2239,6 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, return err; count = ocount; - pos = *ppos; /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; @@ -2266,7 +2265,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, ssize_t written_buffered; written = generic_file_direct_write(iocb, iov, &nr_segs, pos, - ppos, count, ocount); + &iocb->ki_pos, count, ocount); if (written < 0 || written == count) goto out; /* @@ -2276,7 +2275,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, pos += written; count -= written; written_buffered = generic_file_buffered_write(iocb, iov, - nr_segs, pos, ppos, count, + nr_segs, pos, &iocb->ki_pos, count, written); /* * If generic_file_buffered_write() retuned a synchronous error @@ -2310,7 +2309,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, } } else { written = generic_file_buffered_write(iocb, iov, nr_segs, - pos, ppos, count, written); + pos, &iocb->ki_pos, count, written); } out: current->backing_dev_info = NULL; @@ -2339,7 +2338,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, BUG_ON(iocb->ki_pos != pos); mutex_lock(&inode->i_mutex); - ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); + ret = __generic_file_aio_write(iocb, iov, nr_segs); mutex_unlock(&inode->i_mutex); if (ret > 0) { -- cgit v1.2.3-59-g8ed1b From fcacafd269adc88f41b68cb77a3f957a66563428 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 9 Feb 2014 13:37:49 -0500 Subject: kill the 5th argument of generic_file_buffered_write() same story - it's &iocb->ki_pos in all cases Signed-off-by: Al Viro --- fs/ceph/file.c | 3 +-- fs/ocfs2/file.c | 2 +- fs/xfs/xfs_file.c | 2 +- include/linux/fs.h | 2 +- mm/filemap.c | 9 ++++----- 5 files changed, 8 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 09c7afe32e49..a798db5e5e39 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -978,8 +978,7 @@ retry_snap: * can not run at the same time */ written = generic_file_buffered_write(iocb, iov, nr_segs, - pos, &iocb->ki_pos, - count, 0); + pos, count, 0); mutex_unlock(&inode->i_mutex); } diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 51632c40e896..89099cce14fe 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2383,7 +2383,7 @@ relock: } else { current->backing_dev_info = file->f_mapping->backing_dev_info; written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos, - ppos, count, 0); + count, 0); current->backing_dev_info = NULL; } diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 64b48eade91d..175ce58fbfa3 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -738,7 +738,7 @@ xfs_file_buffered_aio_write( write_retry: trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); ret = generic_file_buffered_write(iocb, iovp, nr_segs, - pos, &iocb->ki_pos, count, 0); + pos, count, 0); /* * If we just got an ENOSPC, try to write back all dirty inodes to diff --git a/include/linux/fs.h b/include/linux/fs.h index e677d1e1189f..830e37420f5e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2397,7 +2397,7 @@ extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsi extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, unsigned long *, loff_t, loff_t *, size_t, size_t); extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, - unsigned long, loff_t, loff_t *, size_t, ssize_t); + unsigned long, loff_t, size_t, ssize_t); extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); extern int generic_segment_checks(const struct iovec *iov, diff --git a/mm/filemap.c b/mm/filemap.c index ce2246dd90de..9d515a1a2372 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2183,7 +2183,7 @@ again: ssize_t generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos, loff_t *ppos, + unsigned long nr_segs, loff_t pos, size_t count, ssize_t written) { struct file *file = iocb->ki_filp; @@ -2195,7 +2195,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, if (likely(status >= 0)) { written += status; - *ppos = pos + status; + iocb->ki_pos = pos + status; } return written ? written : status; @@ -2275,8 +2275,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, pos += written; count -= written; written_buffered = generic_file_buffered_write(iocb, iov, - nr_segs, pos, &iocb->ki_pos, count, - written); + nr_segs, pos, count, written); /* * If generic_file_buffered_write() retuned a synchronous error * then we want to return the number of bytes which were @@ -2309,7 +2308,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, } } else { written = generic_file_buffered_write(iocb, iov, nr_segs, - pos, &iocb->ki_pos, count, written); + pos, count, written); } out: current->backing_dev_info = NULL; -- cgit v1.2.3-59-g8ed1b From 5cb6c6c7eb1ed24744b41fad47d9a25b72207098 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 11 Feb 2014 20:58:20 -0500 Subject: generic_file_direct_write(): get rid of ppos argument always equal to &iocb->ki_pos. Signed-off-by: Al Viro --- fs/btrfs/file.c | 2 +- fs/fuse/file.c | 3 +-- fs/ocfs2/file.c | 2 +- fs/xfs/xfs_file.c | 2 +- include/linux/fs.h | 2 +- mm/filemap.c | 6 +++--- 6 files changed, 8 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f6032a2bfab9..8ed4b165abbd 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1640,7 +1640,7 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, loff_t endbyte; int err; - written = generic_file_direct_write(iocb, iov, &nr_segs, pos, &iocb->ki_pos, + written = generic_file_direct_write(iocb, iov, &nr_segs, pos, count, ocount); if (written < 0 || written == count) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index a91d3b4d32f3..fd06d1ebc2eb 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1143,8 +1143,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, goto out; if (file->f_flags & O_DIRECT) { - written = generic_file_direct_write(iocb, iov, &nr_segs, - pos, &iocb->ki_pos, + written = generic_file_direct_write(iocb, iov, &nr_segs, pos, count, ocount); if (written < 0 || written == count) goto out; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 89099cce14fe..77b8a742866f 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2375,7 +2375,7 @@ relock: if (direct_io) { written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, - ppos, count, ocount); + count, ocount); if (written < 0) { ret = written; goto out_dio; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 175ce58fbfa3..e593554ce65e 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -699,7 +699,7 @@ xfs_file_dio_aio_write( trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); ret = generic_file_direct_write(iocb, iovp, - &nr_segs, pos, &iocb->ki_pos, count, ocount); + &nr_segs, pos, count, ocount); out: xfs_rw_iunlock(ip, iolock); diff --git a/include/linux/fs.h b/include/linux/fs.h index 830e37420f5e..9dfd7c7ff8e3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2395,7 +2395,7 @@ extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsig extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long); extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, - unsigned long *, loff_t, loff_t *, size_t, size_t); + unsigned long *, loff_t, size_t, size_t); extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, unsigned long, loff_t, size_t, ssize_t); extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); diff --git a/mm/filemap.c b/mm/filemap.c index 9d515a1a2372..93e9cf576452 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1985,7 +1985,7 @@ EXPORT_SYMBOL(pagecache_write_end); ssize_t generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long *nr_segs, loff_t pos, loff_t *ppos, + unsigned long *nr_segs, loff_t pos, size_t count, size_t ocount) { struct file *file = iocb->ki_filp; @@ -2046,7 +2046,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, i_size_write(inode, pos); mark_inode_dirty(inode); } - *ppos = pos; + iocb->ki_pos = pos; } out: return written; @@ -2265,7 +2265,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, ssize_t written_buffered; written = generic_file_direct_write(iocb, iov, &nr_segs, pos, - &iocb->ki_pos, count, ocount); + count, ocount); if (written < 0 || written == count) goto out; /* -- cgit v1.2.3-59-g8ed1b From 3b93f911d55cf8b3540f82fac4eeddfee7b5de0c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 11 Feb 2014 21:34:08 -0500 Subject: export generic_perform_write(), start getting rid of generic_file_buffer_write() Signed-off-by: Al Viro --- include/linux/fs.h | 2 ++ mm/filemap.c | 37 +++++++++++++++++++++---------------- 2 files changed, 23 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9dfd7c7ff8e3..87be51aff9d7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -48,6 +48,7 @@ struct cred; struct swap_info_struct; struct seq_file; struct workqueue_struct; +struct iov_iter; extern void __init inode_init(void); extern void __init inode_init_early(void); @@ -2396,6 +2397,7 @@ extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, un extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, unsigned long *, loff_t, size_t, size_t); +extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, unsigned long, loff_t, size_t, ssize_t); extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); diff --git a/mm/filemap.c b/mm/filemap.c index 93e9cf576452..09bfc9b3bb51 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2092,7 +2092,7 @@ found: } EXPORT_SYMBOL(grab_cache_page_write_begin); -static ssize_t generic_perform_write(struct file *file, +ssize_t generic_perform_write(struct file *file, struct iov_iter *i, loff_t pos) { struct address_space *mapping = file->f_mapping; @@ -2180,6 +2180,7 @@ again: return written ? written : status; } +EXPORT_SYMBOL(generic_perform_write); ssize_t generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, @@ -2230,8 +2231,10 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, size_t count; /* after file limit checks */ struct inode *inode = mapping->host; loff_t pos = iocb->ki_pos; - ssize_t written; + ssize_t written = 0; ssize_t err; + ssize_t status; + struct iov_iter from; ocount = 0; err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); @@ -2242,8 +2245,6 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; - written = 0; - err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (err) goto out; @@ -2259,44 +2260,47 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err) goto out; + iov_iter_init(&from, iov, nr_segs, count, 0); + /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (unlikely(file->f_flags & O_DIRECT)) { loff_t endbyte; - ssize_t written_buffered; - written = generic_file_direct_write(iocb, iov, &nr_segs, pos, + written = generic_file_direct_write(iocb, iov, &from.nr_segs, pos, count, ocount); if (written < 0 || written == count) goto out; + iov_iter_advance(&from, written); + /* * direct-io write to a hole: fall through to buffered I/O * for completing the rest of the request. */ pos += written; count -= written; - written_buffered = generic_file_buffered_write(iocb, iov, - nr_segs, pos, count, written); + + status = generic_perform_write(file, &from, pos); /* - * If generic_file_buffered_write() retuned a synchronous error + * If generic_perform_write() returned a synchronous error * then we want to return the number of bytes which were * direct-written, or the error code if that was zero. Note * that this differs from normal direct-io semantics, which * will return -EFOO even if some bytes were written. */ - if (written_buffered < 0) { - err = written_buffered; + if (unlikely(status < 0) && !written) { + err = status; goto out; } - + iocb->ki_pos = pos + status; /* * We need to ensure that the page cache pages are written to * disk and invalidated to preserve the expected O_DIRECT * semantics. */ - endbyte = pos + written_buffered - written - 1; + endbyte = pos + status - 1; err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte); if (err == 0) { - written = written_buffered; + written += status; invalidate_mapping_pages(mapping, pos >> PAGE_CACHE_SHIFT, endbyte >> PAGE_CACHE_SHIFT); @@ -2307,8 +2311,9 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, */ } } else { - written = generic_file_buffered_write(iocb, iov, nr_segs, - pos, count, written); + written = generic_perform_write(file, &from, pos); + if (likely(written >= 0)) + iocb->ki_pos = pos + written; } out: current->backing_dev_info = NULL; -- cgit v1.2.3-59-g8ed1b From ccad2365668f12336dd497d9039e8584af836070 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 11 Feb 2014 22:36:48 -0500 Subject: kill generic_file_buffered_write() Signed-off-by: Al Viro --- include/linux/fs.h | 2 -- mm/filemap.c | 23 +---------------------- 2 files changed, 1 insertion(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 87be51aff9d7..c309b7a0da2d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2398,8 +2398,6 @@ extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsi extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, unsigned long *, loff_t, size_t, size_t); extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); -extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, - unsigned long, loff_t, size_t, ssize_t); extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); extern int generic_segment_checks(const struct iovec *iov, diff --git a/mm/filemap.c b/mm/filemap.c index 09bfc9b3bb51..1a455142784d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -76,7 +76,7 @@ * ->mmap_sem * ->lock_page (access_process_vm) * - * ->i_mutex (generic_file_buffered_write) + * ->i_mutex (generic_perform_write) * ->mmap_sem (fault_in_pages_readable->do_page_fault) * * bdi->wb.list_lock @@ -2182,27 +2182,6 @@ again: } EXPORT_SYMBOL(generic_perform_write); -ssize_t -generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos, - size_t count, ssize_t written) -{ - struct file *file = iocb->ki_filp; - ssize_t status; - struct iov_iter i; - - iov_iter_init(&i, iov, nr_segs, count, written); - status = generic_perform_write(file, &i, pos); - - if (likely(status >= 0)) { - written += status; - iocb->ki_pos = pos + status; - } - - return written ? written : status; -} -EXPORT_SYMBOL(generic_file_buffered_write); - /** * __generic_file_aio_write - write data to a file * @iocb: IO state structure (file, offset, etc.) -- cgit v1.2.3-59-g8ed1b From 9d521470a40f16110bd31018034155c60c1a1275 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 31 Jan 2014 17:54:26 +0200 Subject: libceph: a per-osdc crush scratch buffer With the addition of erasure coding support in the future, scratch variable-length array in crush_do_rule_ary() is going to grow to at least 200 bytes on average, on top of another 128 bytes consumed by rawosd/osd arrays in the call chain. Replace it with a buffer inside struct osdmap and a mutex. This shouldn't result in any contention, because all osd requests were already serialized by request_mutex at that point; the only unlocked caller was ceph_ioctl_get_dataloc(). Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/ceph/osdmap.h | 3 +++ net/ceph/osdmap.c | 25 ++++++++++++++++--------- 2 files changed, 19 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 49ff69f0746b..8c8b3cefc28b 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -84,6 +84,9 @@ struct ceph_osdmap { /* the CRUSH map specifies the mapping of placement groups to * the list of osds that store+replicate them. */ struct crush_map *crush; + + struct mutex crush_scratch_mutex; + int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3]; }; static inline void ceph_oid_set_name(struct ceph_object_id *oid, diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index aade4a5c1c07..9d1aaa24def6 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -698,7 +698,9 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) map = kzalloc(sizeof(*map), GFP_NOFS); if (map == NULL) return ERR_PTR(-ENOMEM); + map->pg_temp = RB_ROOT; + mutex_init(&map->crush_scratch_mutex); ceph_decode_16_safe(p, end, version, bad); if (version > 6) { @@ -1142,14 +1144,20 @@ int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap, } EXPORT_SYMBOL(ceph_oloc_oid_to_pg); -static int crush_do_rule_ary(const struct crush_map *map, int ruleno, int x, - int *result, int result_max, - const __u32 *weight, int weight_max) +static int do_crush(struct ceph_osdmap *map, int ruleno, int x, + int *result, int result_max, + const __u32 *weight, int weight_max) { - int scratch[result_max * 3]; + int r; + + BUG_ON(result_max > CEPH_PG_MAX_SIZE); + + mutex_lock(&map->crush_scratch_mutex); + r = crush_do_rule(map->crush, ruleno, x, result, result_max, + weight, weight_max, map->crush_scratch_ary); + mutex_unlock(&map->crush_scratch_mutex); - return crush_do_rule(map, ruleno, x, result, result_max, - weight, weight_max, scratch); + return r; } /* @@ -1205,9 +1213,8 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, pool->pgp_num_mask) + (unsigned)pgid.pool; } - r = crush_do_rule_ary(osdmap->crush, ruleno, pps, - osds, min_t(int, pool->size, *num), - osdmap->osd_weight, osdmap->max_osd); + r = do_crush(osdmap, ruleno, pps, osds, min_t(int, pool->size, *num), + osdmap->osd_weight, osdmap->max_osd); if (r < 0) { pr_err("error %d from crush rule: pool %lld ruleset %d type %d" " size %d\n", r, pgid.pool, pool->crush_ruleset, -- cgit v1.2.3-59-g8ed1b From 7b25bf5f02c5c80adf96120e031dc3a1756ce54d Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 25 Feb 2014 16:22:26 +0200 Subject: libceph: encode CEPH_OSD_OP_FLAG_* op flags Encode ceph_osd_op::flags field so that it gets sent over the wire. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Reviewed-by: Alex Elder --- include/linux/ceph/osd_client.h | 1 + include/linux/ceph/rados.h | 2 +- net/ceph/osd_client.c | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index fd47e872ebcc..e94f5da251d6 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -76,6 +76,7 @@ struct ceph_osd_data { struct ceph_osd_req_op { u16 op; /* CEPH_OSD_OP_* */ + u32 flags; /* CEPH_OSD_OP_FLAG_* */ u32 payload_len; union { struct ceph_osd_data raw_data_in; diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 96292df4041b..8f9bf4570215 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -382,7 +382,7 @@ enum { */ struct ceph_osd_op { __le16 op; /* CEPH_OSD_OP_* */ - __le32 flags; /* CEPH_OSD_FLAG_* */ + __le32 flags; /* CEPH_OSD_OP_FLAG_* */ union { struct { __le64 offset, length; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 0676f2b199d6..5d7fd0b8c1c8 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -688,7 +688,9 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req, return 0; } + dst->op = cpu_to_le16(src->op); + dst->flags = cpu_to_le32(src->flags); dst->payload_len = cpu_to_le32(src->payload_len); return request_data_len; -- cgit v1.2.3-59-g8ed1b From c647b8a8c6366f849c2a237bfe525cb1d316d5f4 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 25 Feb 2014 16:22:27 +0200 Subject: libceph: add support for CEPH_OSD_OP_SETALLOCHINT osd op This is primarily for rbd's benefit and is supposed to combat fragmentation: "... knowing that rbd images have a 4m size, librbd can pass a hint that will let the osd do the xfs allocation size ioctl on new files so that they are allocated in 1m or 4m chunks. We've seen cases where users with rbd workloads have very high levels of fragmentation in xfs and this would mitigate that and probably have a pretty nice performance benefit." SETALLOCHINT is considered advisory, so our backwards compatibility mechanism here is to set FAILOK flag for all SETALLOCHINT ops. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Reviewed-by: Alex Elder --- include/linux/ceph/osd_client.h | 8 ++++++++ include/linux/ceph/rados.h | 7 +++++++ net/ceph/osd_client.c | 27 +++++++++++++++++++++++++++ 3 files changed, 42 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index e94f5da251d6..c42d1ada685f 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -103,6 +103,10 @@ struct ceph_osd_req_op { u32 timeout; __u8 flag; } watch; + struct { + u64 expected_object_size; + u64 expected_write_size; + } alloc_hint; }; }; @@ -294,6 +298,10 @@ extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req, extern void osd_req_op_watch_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, u64 cookie, u64 version, int flag); +extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req, + unsigned int which, + u64 expected_object_size, + u64 expected_write_size); extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, struct ceph_snap_context *snapc, diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 8f9bf4570215..2caabef8d369 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -227,6 +227,9 @@ enum { CEPH_OSD_OP_OMAPRMKEYS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 24, CEPH_OSD_OP_OMAP_CMP = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 25, + /* hints */ + CEPH_OSD_OP_SETALLOCHINT = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 35, + /** multi **/ CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1, CEPH_OSD_OP_ASSERT_SRC_VERSION = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 2, @@ -416,6 +419,10 @@ struct ceph_osd_op { __le64 offset, length; __le64 src_offset; } __attribute__ ((packed)) clonerange; + struct { + __le64 expected_object_size; + __le64 expected_write_size; + } __attribute__ ((packed)) alloc_hint; }; __le32 payload_len; } __attribute__ ((packed)); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 5d7fd0b8c1c8..71830d79b0f4 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -436,6 +436,7 @@ static bool osd_req_opcode_valid(u16 opcode) case CEPH_OSD_OP_OMAPCLEAR: case CEPH_OSD_OP_OMAPRMKEYS: case CEPH_OSD_OP_OMAP_CMP: + case CEPH_OSD_OP_SETALLOCHINT: case CEPH_OSD_OP_CLONERANGE: case CEPH_OSD_OP_ASSERT_SRC_VERSION: case CEPH_OSD_OP_SRC_CMPXATTR: @@ -591,6 +592,26 @@ void osd_req_op_watch_init(struct ceph_osd_request *osd_req, } EXPORT_SYMBOL(osd_req_op_watch_init); +void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req, + unsigned int which, + u64 expected_object_size, + u64 expected_write_size) +{ + struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, + CEPH_OSD_OP_SETALLOCHINT); + + op->alloc_hint.expected_object_size = expected_object_size; + op->alloc_hint.expected_write_size = expected_write_size; + + /* + * CEPH_OSD_OP_SETALLOCHINT op is advisory and therefore deemed + * not worth a feature bit. Set FAILOK per-op flag to make + * sure older osds don't trip over an unsupported opcode. + */ + op->flags |= CEPH_OSD_OP_FLAG_FAILOK; +} +EXPORT_SYMBOL(osd_req_op_alloc_hint_init); + static void ceph_osdc_msg_data_add(struct ceph_msg *msg, struct ceph_osd_data *osd_data) { @@ -681,6 +702,12 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req, dst->watch.ver = cpu_to_le64(src->watch.ver); dst->watch.flag = src->watch.flag; break; + case CEPH_OSD_OP_SETALLOCHINT: + dst->alloc_hint.expected_object_size = + cpu_to_le64(src->alloc_hint.expected_object_size); + dst->alloc_hint.expected_write_size = + cpu_to_le64(src->alloc_hint.expected_write_size); + break; default: pr_err("unsupported osd opcode %s\n", ceph_osd_op_name(src->op)); -- cgit v1.2.3-59-g8ed1b From 7cc69d42e6950404587bef9489a5ed6f9f6bab4e Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 25 Feb 2014 16:22:27 +0200 Subject: libceph: bump CEPH_OSD_MAX_OP to 3 Our longest osd request now contains 3 ops: copyup+hint+write. Also, CEPH_OSD_MAX_OP value in a BUG_ON in rbd_osd_req_callback() was hard-coded to 2. Fix it, and switch to rbd_assert while at it. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Reviewed-by: Alex Elder --- drivers/block/rbd.c | 2 +- include/linux/ceph/osd_client.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index b55b7812cf93..4c612c4041b6 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1654,7 +1654,7 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, if (osd_req->r_result < 0) obj_request->result = osd_req->r_result; - BUG_ON(osd_req->r_num_ops > 2); + rbd_assert(osd_req->r_num_ops <= CEPH_OSD_MAX_OP); /* * We support a 64-bit length, but ultimately it has to be diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index c42d1ada685f..94ec69672164 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -43,7 +43,7 @@ struct ceph_osd { }; -#define CEPH_OSD_MAX_OP 2 +#define CEPH_OSD_MAX_OP 3 enum ceph_osd_data_type { CEPH_OSD_DATA_TYPE_NONE = 0, -- cgit v1.2.3-59-g8ed1b From 19913b4eac4a230dccb548931358398f45dabe4c Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 6 Mar 2014 16:40:32 +0800 Subject: ceph: add get_name() NFS export callback Use the newly introduced LOOKUPNAME MDS request to connect child inode to its parent directory. Signed-off-by: Yan, Zheng Reviewed-by: Sage Weil --- fs/ceph/export.c | 40 ++++++++++++++++++++++++++++++++++ fs/ceph/inode.c | 51 +++++++++++++++++++++++++++++++++++++++++++- fs/ceph/strings.c | 1 + include/linux/ceph/ceph_fs.h | 1 + 4 files changed, 92 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/ceph/export.c b/fs/ceph/export.c index eb66408ff236..00d6af6a32ec 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -202,9 +202,49 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, return dentry; } +static int ceph_get_name(struct dentry *parent, char *name, + struct dentry *child) +{ + struct ceph_mds_client *mdsc; + struct ceph_mds_request *req; + int err; + + mdsc = ceph_inode_to_client(child->d_inode)->mdsc; + req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME, + USE_ANY_MDS); + if (IS_ERR(req)) + return PTR_ERR(req); + + mutex_lock(&parent->d_inode->i_mutex); + + req->r_inode = child->d_inode; + ihold(child->d_inode); + req->r_ino2 = ceph_vino(parent->d_inode); + req->r_locked_dir = parent->d_inode; + req->r_num_caps = 2; + err = ceph_mdsc_do_request(mdsc, NULL, req); + + mutex_unlock(&parent->d_inode->i_mutex); + + if (!err) { + struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; + memcpy(name, rinfo->dname, rinfo->dname_len); + name[rinfo->dname_len] = 0; + dout("get_name %p ino %llx.%llx name %s\n", + child, ceph_vinop(child->d_inode), name); + } else { + dout("get_name %p ino %llx.%llx err %d\n", + child, ceph_vinop(child->d_inode), err); + } + + ceph_mdsc_put_request(req); + return err; +} + const struct export_operations ceph_export_ops = { .encode_fh = ceph_encode_fh, .fh_to_dentry = ceph_fh_to_dentry, .fh_to_parent = ceph_fh_to_parent, .get_parent = ceph_get_parent, + .get_name = ceph_get_name, }; diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 8bf2384bd423..91d6c9d49e3e 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1044,10 +1044,59 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, session, req->r_request_started, -1, &req->r_caps_reservation); if (err < 0) - return err; + goto done; } else { WARN_ON_ONCE(1); } + + if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME) { + struct qstr dname; + struct dentry *dn, *parent; + + BUG_ON(!rinfo->head->is_target); + BUG_ON(req->r_dentry); + + parent = d_find_any_alias(dir); + BUG_ON(!parent); + + dname.name = rinfo->dname; + dname.len = rinfo->dname_len; + dname.hash = full_name_hash(dname.name, dname.len); + vino.ino = le64_to_cpu(rinfo->targeti.in->ino); + vino.snap = le64_to_cpu(rinfo->targeti.in->snapid); +retry_lookup: + dn = d_lookup(parent, &dname); + dout("d_lookup on parent=%p name=%.*s got %p\n", + parent, dname.len, dname.name, dn); + + if (!dn) { + dn = d_alloc(parent, &dname); + dout("d_alloc %p '%.*s' = %p\n", parent, + dname.len, dname.name, dn); + if (dn == NULL) { + dput(parent); + err = -ENOMEM; + goto done; + } + err = ceph_init_dentry(dn); + if (err < 0) { + dput(dn); + dput(parent); + goto done; + } + } else if (dn->d_inode && + (ceph_ino(dn->d_inode) != vino.ino || + ceph_snap(dn->d_inode) != vino.snap)) { + dout(" dn %p points to wrong inode %p\n", + dn, dn->d_inode); + d_delete(dn); + dput(dn); + goto retry_lookup; + } + + req->r_dentry = dn; + dput(parent); + } } if (rinfo->head->is_target) { diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c index 4440f447fd3f..51cc23e48111 100644 --- a/fs/ceph/strings.c +++ b/fs/ceph/strings.c @@ -54,6 +54,7 @@ const char *ceph_mds_op_name(int op) case CEPH_MDS_OP_LOOKUPHASH: return "lookuphash"; case CEPH_MDS_OP_LOOKUPPARENT: return "lookupparent"; case CEPH_MDS_OP_LOOKUPINO: return "lookupino"; + case CEPH_MDS_OP_LOOKUPNAME: return "lookupname"; case CEPH_MDS_OP_GETATTR: return "getattr"; case CEPH_MDS_OP_SETXATTR: return "setxattr"; case CEPH_MDS_OP_SETATTR: return "setattr"; diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 25bfb0eff772..35f345f7b3a3 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -332,6 +332,7 @@ enum { CEPH_MDS_OP_LOOKUPHASH = 0x00102, CEPH_MDS_OP_LOOKUPPARENT = 0x00103, CEPH_MDS_OP_LOOKUPINO = 0x00104, + CEPH_MDS_OP_LOOKUPNAME = 0x00105, CEPH_MDS_OP_SETXATTR = 0x01105, CEPH_MDS_OP_RMXATTR = 0x01106, -- cgit v1.2.3-59-g8ed1b From 8e2f1a63f2217365223026422a2f8ba5967051d6 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 2 Apr 2014 20:52:57 +0200 Subject: packet: fix packet_direct_xmit for BQL enabled drivers Currently, in packet_direct_xmit() we test the assigned netdevice queue for netif_xmit_frozen_or_stopped() before doing an ndo_start_xmit(). This can have the side-effect that BQL enabled drivers which make use of netdev_tx_sent_queue() internally, set __QUEUE_STATE_STACK_XOFF from within the stack and would not fully fill the device's TX ring from packet sockets with PACKET_QDISC_BYPASS enabled. Instead, use a test without BQL bit so that bursts can be absorbed into the NICs TX ring. Fix and code suggested by Eric Dumazet, thanks! Signed-off-by: Eric Dumazet Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/netdevice.h | 24 +++++++++++++++++++----- net/packet/af_packet.c | 2 +- 2 files changed, 20 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 775cc956ff78..7ed3a3aa6604 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -519,11 +519,18 @@ enum netdev_queue_state_t { __QUEUE_STATE_DRV_XOFF, __QUEUE_STATE_STACK_XOFF, __QUEUE_STATE_FROZEN, -#define QUEUE_STATE_ANY_XOFF ((1 << __QUEUE_STATE_DRV_XOFF) | \ - (1 << __QUEUE_STATE_STACK_XOFF)) -#define QUEUE_STATE_ANY_XOFF_OR_FROZEN (QUEUE_STATE_ANY_XOFF | \ - (1 << __QUEUE_STATE_FROZEN)) }; + +#define QUEUE_STATE_DRV_XOFF (1 << __QUEUE_STATE_DRV_XOFF) +#define QUEUE_STATE_STACK_XOFF (1 << __QUEUE_STATE_STACK_XOFF) +#define QUEUE_STATE_FROZEN (1 << __QUEUE_STATE_FROZEN) + +#define QUEUE_STATE_ANY_XOFF (QUEUE_STATE_DRV_XOFF | QUEUE_STATE_STACK_XOFF) +#define QUEUE_STATE_ANY_XOFF_OR_FROZEN (QUEUE_STATE_ANY_XOFF | \ + QUEUE_STATE_FROZEN) +#define QUEUE_STATE_DRV_XOFF_OR_FROZEN (QUEUE_STATE_DRV_XOFF | \ + QUEUE_STATE_FROZEN) + /* * __QUEUE_STATE_DRV_XOFF is used by drivers to stop the transmit queue. The * netif_tx_* functions below are used to manipulate this flag. The @@ -2252,11 +2259,18 @@ static inline bool netif_xmit_stopped(const struct netdev_queue *dev_queue) return dev_queue->state & QUEUE_STATE_ANY_XOFF; } -static inline bool netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_queue) +static inline bool +netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_queue) { return dev_queue->state & QUEUE_STATE_ANY_XOFF_OR_FROZEN; } +static inline bool +netif_xmit_frozen_or_drv_stopped(const struct netdev_queue *dev_queue) +{ + return dev_queue->state & QUEUE_STATE_DRV_XOFF_OR_FROZEN; +} + static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue, unsigned int bytes) { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c81a9719b5b2..72e0c71fb01d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -261,7 +261,7 @@ static int packet_direct_xmit(struct sk_buff *skb) local_bh_disable(); HARD_TX_LOCK(dev, txq, smp_processor_id()); - if (!netif_xmit_frozen_or_stopped(txq)) { + if (!netif_xmit_frozen_or_drv_stopped(txq)) { ret = ops->ndo_start_xmit(skb, dev); if (ret == NETDEV_TX_OK) txq_trans_update(txq); -- cgit v1.2.3-59-g8ed1b From d0290214de712150b118a532ded378a29255893b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 2 Apr 2014 23:09:31 +0200 Subject: net: add busy_poll device feature Currently there is no way how to find out if a device supports busy polling. So add a feature and make it dependent on ndo_busy_poll existence. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 2 ++ net/core/dev.c | 7 +++++++ net/core/ethtool.c | 1 + 3 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 5a09a48f2658..c26d0ec2ef3a 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -63,6 +63,7 @@ enum { NETIF_F_HW_VLAN_STAG_RX_BIT, /* Receive VLAN STAG HW acceleration */ NETIF_F_HW_VLAN_STAG_FILTER_BIT,/* Receive filtering on VLAN STAGs */ NETIF_F_HW_L2FW_DOFFLOAD_BIT, /* Allow L2 Forwarding in Hardware */ + NETIF_F_BUSY_POLL_BIT, /* Busy poll */ /* * Add your fresh new feature above and remember to update @@ -118,6 +119,7 @@ enum { #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) #define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX) #define NETIF_F_HW_L2FW_DOFFLOAD __NETIF_F(HW_L2FW_DOFFLOAD) +#define NETIF_F_BUSY_POLL __NETIF_F(BUSY_POLL) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ diff --git a/net/core/dev.c b/net/core/dev.c index 757063420ce0..75e88e0372cb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5696,6 +5696,13 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, } } +#ifdef CONFIG_NET_RX_BUSY_POLL + if (dev->netdev_ops->ndo_busy_poll) + features |= NETIF_F_BUSY_POLL; + else +#endif + features &= ~NETIF_F_BUSY_POLL; + return features; } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 30071dec287a..640ba0e5831c 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -97,6 +97,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_RXFCS_BIT] = "rx-fcs", [NETIF_F_RXALL_BIT] = "rx-all", [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", + [NETIF_F_BUSY_POLL_BIT] = "busy-poll", }; static int ethtool_get_features(struct net_device *dev, void __user *useraddr) -- cgit v1.2.3-59-g8ed1b From 64400c3791d9fcebf23318a289f9da964547a6f3 Mon Sep 17 00:00:00 2001 From: Bryan Wu Date: Wed, 19 Feb 2014 14:48:36 -0800 Subject: gpu: host1x: export host1x_syncpt_incr_max() function Tegra V4L2 camera driver needs this function to do frame capture. Signed-off-by: Bryan Wu Signed-off-by: Thierry Reding --- drivers/gpu/host1x/syncpt.c | 1 + include/linux/host1x.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index bfb09d802abd..b10550ee1d89 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -102,6 +102,7 @@ u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs) { return (u32)atomic_add_return(incrs, &sp->max_val); } +EXPORT_SYMBOL(host1x_syncpt_incr_max); /* * Write cached syncpoint and waitbase values to hardware. diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 3af847273277..d2b52999e771 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -136,6 +136,7 @@ u32 host1x_syncpt_id(struct host1x_syncpt *sp); u32 host1x_syncpt_read_min(struct host1x_syncpt *sp); u32 host1x_syncpt_read_max(struct host1x_syncpt *sp); int host1x_syncpt_incr(struct host1x_syncpt *sp); +u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs); int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout, u32 *value); struct host1x_syncpt *host1x_syncpt_request(struct device *dev, -- cgit v1.2.3-59-g8ed1b From eb13e832f823f6c110ea53e3067bafe22b87de63 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sun, 9 Mar 2014 23:16:40 +0800 Subject: ceph: use fl->fl_file as owner identifier of flock and posix lock flock and posix lock should use fl->fl_file instead of process ID as owner identifier. (posix lock uses fl->fl_owner. fl->fl_owner is usually equal to fl->fl_file, but it also can be a customized value). The process ID of who holds the lock is just for F_GETLK fcntl(2). The fix is rename the 'pid' fields of struct ceph_mds_request_args and struct ceph_filelock to 'owner', rename 'pid_namespace' fields to 'pid'. Assign fl->fl_file to the 'owner' field of lock messages. We also set the most significant bit of the 'owner' field. MDS can use that bit to distinguish between old and new clients. The MDS counterpart of this patch modifies the flock code to not take the 'pid_namespace' into consideration when checking conflict locks. Signed-off-by: Yan, Zheng Reviewed-by: Sage Weil --- fs/ceph/locks.c | 61 +++++++++++++++++++++++++++++--------------- fs/ceph/super.c | 1 + fs/ceph/super.h | 1 + include/linux/ceph/ceph_fs.h | 4 +-- 4 files changed, 45 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index f91a569a20fb..d94ba0df9f4d 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -2,11 +2,31 @@ #include #include +#include #include "super.h" #include "mds_client.h" #include +static u64 lock_secret; + +static inline u64 secure_addr(void *addr) +{ + u64 v = lock_secret ^ (u64)(unsigned long)addr; + /* + * Set the most significant bit, so that MDS knows the 'owner' + * is sufficient to identify the owner of lock. (old code uses + * both 'owner' and 'pid') + */ + v |= (1ULL << 63); + return v; +} + +void __init ceph_flock_init(void) +{ + get_random_bytes(&lock_secret, sizeof(lock_secret)); +} + /** * Implement fcntl and flock locking functions. */ @@ -14,11 +34,11 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, int cmd, u8 wait, struct file_lock *fl) { struct inode *inode = file_inode(file); - struct ceph_mds_client *mdsc = - ceph_sb_to_client(inode->i_sb)->mdsc; + struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_mds_request *req; int err; u64 length = 0; + u64 owner; req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); if (IS_ERR(req)) @@ -32,25 +52,27 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, else length = fl->fl_end - fl->fl_start + 1; - dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " - "length: %llu, wait: %d, type: %d", (int)lock_type, - (int)operation, (u64)fl->fl_pid, fl->fl_start, - length, wait, fl->fl_type); + if (lock_type == CEPH_LOCK_FCNTL) + owner = secure_addr(fl->fl_owner); + else + owner = secure_addr(fl->fl_file); + + dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, " + "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type, + (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length, + wait, fl->fl_type); req->r_args.filelock_change.rule = lock_type; req->r_args.filelock_change.type = cmd; + req->r_args.filelock_change.owner = cpu_to_le64(owner); req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid); - /* This should be adjusted, but I'm not sure if - namespaces actually get id numbers*/ - req->r_args.filelock_change.pid_namespace = - cpu_to_le64((u64)(unsigned long)fl->fl_nspid); req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start); req->r_args.filelock_change.length = cpu_to_le64(length); req->r_args.filelock_change.wait = wait; err = ceph_mdsc_do_request(mdsc, inode, req); - if ( operation == CEPH_MDS_OP_GETFILELOCK){ + if (operation == CEPH_MDS_OP_GETFILELOCK) { fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid); if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type) fl->fl_type = F_RDLCK; @@ -93,8 +115,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK) return -ENOLCK; - fl->fl_nspid = get_pid(task_tgid(current)); - dout("ceph_lock, fl_pid:%d", fl->fl_pid); + dout("ceph_lock, fl_owner: %p", fl->fl_owner); /* set wait bit as appropriate, then make command as Ceph expects it*/ if (IS_GETLK(cmd)) @@ -111,7 +132,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl); if (!err) { - if ( op != CEPH_MDS_OP_GETFILELOCK ){ + if (op != CEPH_MDS_OP_GETFILELOCK) { dout("mds locked, locking locally"); err = posix_lock_file(file, fl, NULL); if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { @@ -145,8 +166,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK) return -ENOLCK; - fl->fl_nspid = get_pid(task_tgid(current)); - dout("ceph_flock, fl_pid:%d", fl->fl_pid); + dout("ceph_flock, fl_file: %p", fl->fl_file); if (IS_SETLKW(cmd)) wait = 1; @@ -289,13 +309,14 @@ int lock_to_ceph_filelock(struct file_lock *lock, struct ceph_filelock *cephlock) { int err = 0; - cephlock->start = cpu_to_le64(lock->fl_start); cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); cephlock->client = cpu_to_le64(0); - cephlock->pid = cpu_to_le64(lock->fl_pid); - cephlock->pid_namespace = - cpu_to_le64((u64)(unsigned long)lock->fl_nspid); + cephlock->pid = cpu_to_le64((u64)lock->fl_pid); + if (lock->fl_flags & FL_POSIX) + cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner)); + else + cephlock->owner = cpu_to_le64(secure_addr(lock->fl_file)); switch (lock->fl_type) { case F_RDLCK: diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 10a4ccbf38da..06150fd745ac 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1026,6 +1026,7 @@ static int __init init_ceph(void) if (ret) goto out; + ceph_flock_init(); ceph_xattr_init(); ret = register_filesystem(&ceph_fs_type); if (ret) diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 70bb183385b7..7866cd05a6bb 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -871,6 +871,7 @@ extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg); extern const struct export_operations ceph_export_ops; /* locks.c */ +extern __init void ceph_flock_init(void); extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num); diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 35f345f7b3a3..5f6db18d72e8 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -421,8 +421,8 @@ union ceph_mds_request_args { struct { __u8 rule; /* currently fcntl or flock */ __u8 type; /* shared, exclusive, remove*/ + __le64 owner; /* owner of the lock */ __le64 pid; /* process id requesting the lock */ - __le64 pid_namespace; __le64 start; /* initial location to lock */ __le64 length; /* num bytes to lock from start */ __u8 wait; /* will caller wait for lock to become available? */ @@ -533,8 +533,8 @@ struct ceph_filelock { __le64 start;/* file offset to start lock at */ __le64 length; /* num bytes to lock; 0 for all following start */ __le64 client; /* which client holds the lock */ + __le64 owner; /* owner the lock */ __le64 pid; /* process id holding the lock on the client */ - __le64 pid_namespace; __u8 type; /* shared lock, exclusive lock, or unlock */ } __attribute__ ((packed)); -- cgit v1.2.3-59-g8ed1b From e2b149cc4ba00766aceb87950c6de72ea7fc8b2e Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 19 Mar 2014 16:58:37 +0200 Subject: crush: add chooseleaf_vary_r tunable The current crush_choose_firstn code will re-use the same 'r' value for the recursive call. That means that if we are hitting a collision or rejection for some reason (say, an OSD that is marked out) and need to retry, we will keep making the same (bad) choice in that recursive selection. Introduce a tunable that fixes that behavior by incorporating the parent 'r' value into the recursive starting point, so that a different path will be taken in subsequent placement attempts. Note that this was done from the get-go for the new crush_choose_indep algorithm. This was exposed by a user who was seeing PGs stuck in active+remapped after reweight-by-utilization because the up set mapped to a single OSD. Reflects ceph.git commit a8e6c9fbf88bad056dd05d3eb790e98a5e43451a. Signed-off-by: Ilya Dryomov Reviewed-by: Josh Durgin --- include/linux/crush/crush.h | 6 ++++++ net/ceph/crush/mapper.c | 30 ++++++++++++++++++++++++------ 2 files changed, 30 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index acaa5615d634..75f36a6c7f67 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -173,6 +173,12 @@ struct crush_map { * apply to a collision: in that case we will retry as we used * to. */ __u32 chooseleaf_descend_once; + + /* if non-zero, feed r into chooseleaf, bit-shifted right by (r-1) + * bits. a value of 1 is best for new clusters. for legacy clusters + * that want to limit reshuffling, a value of 3 or 4 will make the + * mappings line up a bit better with previous mappings. */ + __u8 chooseleaf_vary_r; }; diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index b3fb84903b30..947150cde297 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -295,7 +295,9 @@ static int is_out(const struct crush_map *map, * @local_retries: localized retries * @local_fallback_retries: localized fallback retries * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose) + * @vary_r: pass r to recursive calls * @out2: second output vector for leaf items (if @recurse_to_leaf) + * @parent_r: r value passed from the parent */ static int crush_choose_firstn(const struct crush_map *map, struct crush_bucket *bucket, @@ -307,7 +309,9 @@ static int crush_choose_firstn(const struct crush_map *map, unsigned int local_retries, unsigned int local_fallback_retries, int recurse_to_leaf, - int *out2) + unsigned int vary_r, + int *out2, + int parent_r) { int rep; unsigned int ftotal, flocal; @@ -319,8 +323,11 @@ static int crush_choose_firstn(const struct crush_map *map, int itemtype; int collide, reject; - dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "", - bucket->id, x, outpos, numrep); + dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n", + recurse_to_leaf ? "_LEAF" : "", + bucket->id, x, outpos, numrep, + tries, recurse_tries, local_retries, local_fallback_retries, + parent_r); for (rep = outpos; rep < numrep; rep++) { /* keep trying until we get a non-out, non-colliding item */ @@ -335,7 +342,7 @@ static int crush_choose_firstn(const struct crush_map *map, do { collide = 0; retry_bucket = 0; - r = rep; + r = rep + parent_r; /* r' = r + f_total */ r += ftotal; @@ -387,6 +394,11 @@ static int crush_choose_firstn(const struct crush_map *map, reject = 0; if (!collide && recurse_to_leaf) { if (item < 0) { + int sub_r; + if (vary_r) + sub_r = r >> (vary_r-1); + else + sub_r = 0; if (crush_choose_firstn(map, map->buckets[-1-item], weight, weight_max, @@ -396,7 +408,9 @@ static int crush_choose_firstn(const struct crush_map *map, local_retries, local_fallback_retries, 0, - NULL) <= outpos) + vary_r, + NULL, + sub_r) <= outpos) /* didn't get leaf */ reject = 1; } else { @@ -653,6 +667,8 @@ int crush_do_rule(const struct crush_map *map, int choose_local_retries = map->choose_local_tries; int choose_local_fallback_retries = map->choose_local_fallback_tries; + int vary_r = map->chooseleaf_vary_r; + if ((__u32)ruleno >= map->max_rules) { dprintk(" bad ruleno %d\n", ruleno); return 0; @@ -745,7 +761,9 @@ int crush_do_rule(const struct crush_map *map, choose_local_retries, choose_local_fallback_retries, recurse_to_leaf, - c+osize); + vary_r, + c+osize, + 0); } else { crush_choose_indep( map, -- cgit v1.2.3-59-g8ed1b From d83ed858f144e3cfbef883d4bc499113cdddabeb Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 19 Mar 2014 16:58:37 +0200 Subject: crush: add SET_CHOOSELEAF_VARY_R step This lets you adjust the vary_r tunable on a per-rule basis. Reflects ceph.git commit f944ccc20aee60a7d8da7e405ec75ad1cd449fac. Signed-off-by: Ilya Dryomov Reviewed-by: Josh Durgin --- include/linux/crush/crush.h | 1 + net/ceph/crush/mapper.c | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 75f36a6c7f67..4fad5f8ee01d 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -51,6 +51,7 @@ enum { CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */ CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10, CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11, + CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12 }; /* diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index 947150cde297..a1ef53c04415 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -709,6 +709,11 @@ int crush_do_rule(const struct crush_map *map, choose_local_fallback_retries = curstep->arg1; break; + case CRUSH_RULE_SET_CHOOSELEAF_VARY_R: + if (curstep->arg1 >= 0) + vary_r = curstep->arg1; + break; + case CRUSH_RULE_CHOOSELEAF_FIRSTN: case CRUSH_RULE_CHOOSE_FIRSTN: firstn = 1; -- cgit v1.2.3-59-g8ed1b From 07bd7de47a65767432ceb66d4ab30cdc05ed2b35 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 19 Mar 2014 16:58:37 +0200 Subject: crush: support chooseleaf_vary_r tunable (tunables3) by default Add TUNABLES3 feature (chooseleaf_vary_r tunable) to a set of features supported by default. Signed-off-by: Ilya Dryomov Reviewed-by: Josh Durgin --- include/linux/ceph/ceph_features.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 138448f766b4..77c097fe9ea9 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -43,6 +43,13 @@ #define CEPH_FEATURE_CRUSH_V2 (1ULL<<36) /* new indep; SET_* steps */ #define CEPH_FEATURE_EXPORT_PEER (1ULL<<37) #define CEPH_FEATURE_OSD_ERASURE_CODES (1ULL<<38) +#define CEPH_FEATURE_OSD_TMAP2OMAP (1ULL<<38) /* overlap with EC */ +/* The process supports new-style OSDMap encoding. Monitors also use + this bit to determine if peers support NAK messages. */ +#define CEPH_FEATURE_OSDMAP_ENC (1ULL<<39) +#define CEPH_FEATURE_MDS_INLINE_DATA (1ULL<<40) +#define CEPH_FEATURE_CRUSH_TUNABLES3 (1ULL<<41) +#define CEPH_FEATURE_OSD_PRIMARY_AFFINITY (1ULL<<41) /* overlap w/ tunables3 */ /* * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature @@ -82,7 +89,8 @@ static inline u64 ceph_sanitize_features(u64 features) CEPH_FEATURE_OSDHASHPSPOOL | \ CEPH_FEATURE_OSD_CACHEPOOL | \ CEPH_FEATURE_CRUSH_V2 | \ - CEPH_FEATURE_EXPORT_PEER) + CEPH_FEATURE_EXPORT_PEER | \ + CEPH_FEATURE_CRUSH_TUNABLES3) #define CEPH_FEATURES_REQUIRED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR | \ -- cgit v1.2.3-59-g8ed1b From a2505d63ee0541d9b4685250b033192e68222e97 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 13 Mar 2014 16:36:13 +0200 Subject: libceph: split osdmap allocation and decode steps Split osdmap allocation and initialization into a separate function, ceph_osdmap_decode(). Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 2 +- net/ceph/osd_client.c | 2 +- net/ceph/osdmap.c | 44 +++++++++++++++++++++++++++++--------------- 3 files changed, 31 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 8c8b3cefc28b..46c3e304c3d8 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -156,7 +156,7 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid) return 0; } -extern struct ceph_osdmap *osdmap_decode(void **p, void *end); +extern struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end); extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, struct ceph_osdmap *map, struct ceph_messenger *msgr); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 71830d79b0f4..6f64eec18851 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -2062,7 +2062,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) int skipped_map = 0; dout("taking full map %u len %d\n", epoch, maplen); - newmap = osdmap_decode(&p, p+maplen); + newmap = ceph_osdmap_decode(&p, p+maplen); if (IS_ERR(newmap)) { err = PTR_ERR(newmap); goto bad; diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 4dd000d128fd..a82df6ea0749 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -684,9 +684,8 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) /* * decode a full map. */ -struct ceph_osdmap *osdmap_decode(void **p, void *end) +static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map) { - struct ceph_osdmap *map; u16 version; u32 len, max, i; int err = -EINVAL; @@ -694,14 +693,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) void *start = *p; struct ceph_pg_pool_info *pi; - dout("osdmap_decode %p to %p len %d\n", *p, end, (int)(end - *p)); - - map = kzalloc(sizeof(*map), GFP_NOFS); - if (map == NULL) - return ERR_PTR(-ENOMEM); - - map->pg_temp = RB_ROOT; - mutex_init(&map->crush_scratch_mutex); + dout("%s %p to %p len %d\n", __func__, *p, end, (int)(end - *p)); ceph_decode_16_safe(p, end, version, bad); if (version > 6) { @@ -751,7 +743,6 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) err = osdmap_set_max_osd(map, max); if (err < 0) goto bad; - dout("osdmap_decode max_osd = %d\n", map->max_osd); /* osds */ err = -EINVAL; @@ -819,7 +810,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) *p = end; dout("full osdmap epoch %d max_osd %d\n", map->epoch, map->max_osd); - return map; + return 0; bad: pr_err("corrupt full osdmap (%d) epoch %d off %d (%p of %p-%p)\n", @@ -827,8 +818,31 @@ bad: print_hex_dump(KERN_DEBUG, "osdmap: ", DUMP_PREFIX_OFFSET, 16, 1, start, end - start, true); - ceph_osdmap_destroy(map); - return ERR_PTR(err); + return err; +} + +/* + * Allocate and decode a full map. + */ +struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end) +{ + struct ceph_osdmap *map; + int ret; + + map = kzalloc(sizeof(*map), GFP_NOFS); + if (!map) + return ERR_PTR(-ENOMEM); + + map->pg_temp = RB_ROOT; + mutex_init(&map->crush_scratch_mutex); + + ret = osdmap_decode(p, end, map); + if (ret) { + ceph_osdmap_destroy(map); + return ERR_PTR(ret); + } + + return map; } /* @@ -872,7 +886,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, if (len > 0) { dout("apply_incremental full map len %d, %p to %p\n", len, *p, end); - return osdmap_decode(p, min(*p+len, end)); + return ceph_osdmap_decode(p, min(*p+len, end)); } /* new crush? */ -- cgit v1.2.3-59-g8ed1b From 35a935d75d51abe58d3427a8b4ae3745a5a14e1c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 21 Mar 2014 19:05:29 +0200 Subject: libceph: generalize ceph_pg_mapping In preparation for adding support for primary_temp mappings, generalize struct ceph_pg_mapping so it can hold mappings other than pg_temp. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 9 +++++++-- net/ceph/debugfs.c | 4 ++-- net/ceph/osdmap.c | 8 ++++---- 3 files changed, 13 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 46c3e304c3d8..4837e58e3203 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -60,8 +60,13 @@ struct ceph_object_id { struct ceph_pg_mapping { struct rb_node node; struct ceph_pg pgid; - int len; - int osds[]; + + union { + struct { + int len; + int osds[]; + } pg_temp; + }; }; struct ceph_osdmap { diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index c45d235e774e..5865f2c9580a 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -88,9 +88,9 @@ static int osdmap_show(struct seq_file *s, void *p) seq_printf(s, "pg_temp %llu.%x [", pg->pgid.pool, pg->pgid.seed); - for (i = 0; i < pg->len; i++) + for (i = 0; i < pg->pg_temp.len; i++) seq_printf(s, "%s%d", (i == 0 ? "" : ","), - pg->osds[i]); + pg->pg_temp.osds[i]); seq_printf(s, "]\n"); } diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index be2a65fbd902..c67a309fdfc2 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -822,9 +822,9 @@ static int __decode_pg_temp(void **p, void *end, struct ceph_osdmap *map, return -ENOMEM; pg->pgid = pgid; - pg->len = len; + pg->pg_temp.len = len; for (i = 0; i < len; i++) - pg->osds[i] = ceph_decode_32(p); + pg->pg_temp.osds[i] = ceph_decode_32(p); ret = __insert_pg_mapping(pg, &map->pg_temp); if (ret) { @@ -1281,8 +1281,8 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, pool->pg_num_mask); pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); if (pg) { - *num = pg->len; - return pg->osds; + *num = pg->pg_temp.len; + return pg->pg_temp.osds; } /* crush */ -- cgit v1.2.3-59-g8ed1b From 9686f94c8cfc06e8afb7b2233ab8f1f6ac01957f Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 21 Mar 2014 19:05:29 +0200 Subject: libceph: primary_temp infrastructure Add primary_temp mappings infrastructure. struct ceph_pg_mapping is overloaded, primary_temp mappings are stored in an rb-tree, rooted at ceph_osdmap, in a manner similar to pg_temp mappings. Dump primary_temp mappings to /sys/kernel/debug/ceph//osdmap, one 'primary_temp ' per line, e.g: primary_temp 2.6 4 Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 5 +++++ net/ceph/debugfs.c | 7 +++++++ net/ceph/osdmap.c | 10 +++++++++- 3 files changed, 21 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 4837e58e3203..db4fb6322aae 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -66,6 +66,9 @@ struct ceph_pg_mapping { int len; int osds[]; } pg_temp; + struct { + int osd; + } primary_temp; }; }; @@ -83,6 +86,8 @@ struct ceph_osdmap { struct ceph_entity_addr *osd_addr; struct rb_root pg_temp; + struct rb_root primary_temp; + struct rb_root pg_pools; u32 pool_max; diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 5865f2c9580a..612bf55e6a8b 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -93,6 +93,13 @@ static int osdmap_show(struct seq_file *s, void *p) pg->pg_temp.osds[i]); seq_printf(s, "]\n"); } + for (n = rb_first(&map->primary_temp); n; n = rb_next(n)) { + struct ceph_pg_mapping *pg = + rb_entry(n, struct ceph_pg_mapping, node); + + seq_printf(s, "primary_temp %llu.%x %d\n", pg->pgid.pool, + pg->pgid.seed, pg->primary_temp.osd); + } return 0; } diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index c67a309fdfc2..c0fc517ab321 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -343,7 +343,7 @@ bad: /* * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid - * to a set of osds) + * to a set of osds) and primary_temp (explicit primary setting) */ static int pgid_cmp(struct ceph_pg l, struct ceph_pg r) { @@ -633,6 +633,13 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map) rb_erase(&pg->node, &map->pg_temp); kfree(pg); } + while (!RB_EMPTY_ROOT(&map->primary_temp)) { + struct ceph_pg_mapping *pg = + rb_entry(rb_first(&map->primary_temp), + struct ceph_pg_mapping, node); + rb_erase(&pg->node, &map->primary_temp); + kfree(pg); + } while (!RB_EMPTY_ROOT(&map->pg_pools)) { struct ceph_pg_pool_info *pi = rb_entry(rb_first(&map->pg_pools), @@ -966,6 +973,7 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end) return ERR_PTR(-ENOMEM); map->pg_temp = RB_ROOT; + map->primary_temp = RB_ROOT; mutex_init(&map->crush_scratch_mutex); ret = osdmap_decode(p, end, map); -- cgit v1.2.3-59-g8ed1b From 2cfa34f2d67a36e292cbe6e4c1e60d212b7ba4d1 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 21 Mar 2014 19:05:30 +0200 Subject: libceph: primary_affinity infrastructure Add primary_affinity infrastructure. primary_affinity values are stored in an max_osd-sized array, hanging off ceph_osdmap, similar to a osd_weight array. Introduce {get,set}_primary_affinity() helpers, primarily to return CEPH_OSD_DEFAULT_PRIMARY_AFFINITY when no affinity has been set and to abstract out osd_primary_affinity array allocation and initialization. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 3 +++ include/linux/ceph/rados.h | 4 ++++ net/ceph/debugfs.c | 5 +++-- net/ceph/osdmap.c | 47 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 57 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index db4fb6322aae..6e030cb3c9ca 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -88,6 +88,8 @@ struct ceph_osdmap { struct rb_root pg_temp; struct rb_root primary_temp; + u32 *osd_primary_affinity; + struct rb_root pg_pools; u32 pool_max; @@ -134,6 +136,7 @@ static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag) } extern char *ceph_osdmap_state_str(char *str, int len, int state); +extern u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd); static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map, int osd) diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 2caabef8d369..bb6f40c9cb0f 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -133,6 +133,10 @@ extern const char *ceph_osd_state_name(int s); #define CEPH_OSD_IN 0x10000 #define CEPH_OSD_OUT 0 +/* osd primary-affinity. fixed point value: 0x10000 == baseline */ +#define CEPH_OSD_MAX_PRIMARY_AFFINITY 0x10000 +#define CEPH_OSD_DEFAULT_PRIMARY_AFFINITY 0x10000 + /* * osd map flag bits diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 612bf55e6a8b..34453a2b4b4d 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -77,10 +77,11 @@ static int osdmap_show(struct seq_file *s, void *p) int state = map->osd_state[i]; char sb[64]; - seq_printf(s, "osd%d\t%s\t%3d%%\t(%s)\n", + seq_printf(s, "osd%d\t%s\t%3d%%\t(%s)\t%3d%%\n", i, ceph_pr_addr(&addr->in_addr), ((map->osd_weight[i]*100) >> 16), - ceph_osdmap_state_str(sb, sizeof(sb), state)); + ceph_osdmap_state_str(sb, sizeof(sb), state), + ((ceph_get_primary_affinity(map, i)*100) >> 16)); } for (n = rb_first(&map->pg_temp); n; n = rb_next(n)) { struct ceph_pg_mapping *pg = diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index c2d793d3d98d..0ac129388e07 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -649,6 +649,7 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map) kfree(map->osd_state); kfree(map->osd_weight); kfree(map->osd_addr); + kfree(map->osd_primary_affinity); kfree(map); } @@ -685,6 +686,20 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) map->osd_weight = weight; map->osd_addr = addr; + if (map->osd_primary_affinity) { + u32 *affinity; + + affinity = krealloc(map->osd_primary_affinity, + max*sizeof(*affinity), GFP_NOFS); + if (!affinity) + return -ENOMEM; + + for (i = map->max_osd; i < max; i++) + affinity[i] = CEPH_OSD_DEFAULT_PRIMARY_AFFINITY; + + map->osd_primary_affinity = affinity; + } + map->max_osd = max; return 0; @@ -912,6 +927,38 @@ static int decode_new_primary_temp(void **p, void *end, return __decode_primary_temp(p, end, map, true); } +u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd) +{ + BUG_ON(osd >= map->max_osd); + + if (!map->osd_primary_affinity) + return CEPH_OSD_DEFAULT_PRIMARY_AFFINITY; + + return map->osd_primary_affinity[osd]; +} + +static int set_primary_affinity(struct ceph_osdmap *map, int osd, u32 aff) +{ + BUG_ON(osd >= map->max_osd); + + if (!map->osd_primary_affinity) { + int i; + + map->osd_primary_affinity = kmalloc(map->max_osd*sizeof(u32), + GFP_NOFS); + if (!map->osd_primary_affinity) + return -ENOMEM; + + for (i = 0; i < map->max_osd; i++) + map->osd_primary_affinity[i] = + CEPH_OSD_DEFAULT_PRIMARY_AFFINITY; + } + + map->osd_primary_affinity[osd] = aff; + + return 0; +} + /* * decode a full map. */ -- cgit v1.2.3-59-g8ed1b From ddf3a21a03d0f01c5ba83deaecd2d0c381d5ef42 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 21 Mar 2014 19:05:31 +0200 Subject: libceph: enable OSDMAP_ENC feature bit Announce our support for "new" (v7 - split and separately versioned client and osd sections) osdmap enconding. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/ceph_features.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 77c097fe9ea9..7a4cab50b2cd 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -90,6 +90,7 @@ static inline u64 ceph_sanitize_features(u64 features) CEPH_FEATURE_OSD_CACHEPOOL | \ CEPH_FEATURE_CRUSH_V2 | \ CEPH_FEATURE_EXPORT_PEER | \ + CEPH_FEATURE_OSDMAP_ENC | \ CEPH_FEATURE_CRUSH_TUNABLES3) #define CEPH_FEATURES_REQUIRED_DEFAULT \ -- cgit v1.2.3-59-g8ed1b From 246138fa6787db6f4016f26604fdc05dc9f95627 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 24 Mar 2014 17:12:46 +0200 Subject: libceph: ceph_osd_{exists,is_up,is_down}(osd) definitions Sync up with ceph.git definitions. Bring in ceph_osd_is_down(). Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 6e030cb3c9ca..0895797b9e28 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -125,9 +125,21 @@ static inline void ceph_oid_copy(struct ceph_object_id *dest, dest->name_len = src->name_len; } +static inline int ceph_osd_exists(struct ceph_osdmap *map, int osd) +{ + return osd >= 0 && osd < map->max_osd && + (map->osd_state[osd] & CEPH_OSD_EXISTS); +} + static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd) { - return (osd < map->max_osd) && (map->osd_state[osd] & CEPH_OSD_UP); + return ceph_osd_exists(map, osd) && + (map->osd_state[osd] & CEPH_OSD_UP); +} + +static inline int ceph_osd_is_down(struct ceph_osdmap *map, int osd) +{ + return !ceph_osd_is_up(map, osd); } static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag) -- cgit v1.2.3-59-g8ed1b From 2abebdbca7997422bfab6bf8b6559384a6b95294 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 24 Mar 2014 17:12:47 +0200 Subject: libceph: ceph_can_shift_osds(pool) and pool type defines Bring in pg_pool_t::can_shift_osds() counterpart along with pool type defines. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 12 ++++++++++++ include/linux/ceph/rados.h | 5 +++-- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 0895797b9e28..4e28c1e5d62f 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -41,6 +41,18 @@ struct ceph_pg_pool_info { char *name; }; +static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool) +{ + switch (pool->type) { + case CEPH_POOL_TYPE_REP: + return true; + case CEPH_POOL_TYPE_EC: + return false; + default: + BUG_ON(1); + } +} + struct ceph_object_locator { s64 pool; }; diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index bb6f40c9cb0f..f20e0d8a2155 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -81,8 +81,9 @@ struct ceph_pg_v1 { */ #define CEPH_NOPOOL ((__u64) (-1)) /* pool id not defined */ -#define CEPH_PG_TYPE_REP 1 -#define CEPH_PG_TYPE_RAID4 2 +#define CEPH_POOL_TYPE_REP 1 +#define CEPH_POOL_TYPE_RAID4 2 /* never implemented */ +#define CEPH_POOL_TYPE_EC 3 /* * stable_mod func is used to control number of placement groups. -- cgit v1.2.3-59-g8ed1b From ac972230e20581b044f5ce66dcaf3c5af8d57444 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 24 Mar 2014 17:12:48 +0200 Subject: libceph: switch ceph_calc_pg_acting() to new helpers Switch ceph_calc_pg_acting() to new helpers: pg_to_raw_osds(), raw_to_up_osds() and apply_temps(). Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 2 +- net/ceph/osdmap.c | 51 +++++++++++++++++++++++++++++++++------------ 2 files changed, 39 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 4e28c1e5d62f..b0c8f8490663 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -212,7 +212,7 @@ extern int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap, extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, - int *acting); + int *osds); extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid); diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index bd40f56b53ed..f1cad21d1533 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1649,24 +1649,49 @@ static int apply_temps(struct ceph_osdmap *osdmap, } /* - * Return acting set for given pgid. + * Calculate acting set for given pgid. + * + * Return acting set length, or error. */ int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, - int *acting) + int *osds) { - int rawosds[CEPH_PG_MAX_SIZE], *osds; - int i, o, num = CEPH_PG_MAX_SIZE; + struct ceph_pg_pool_info *pool; + u32 pps; + int len; + int primary; - osds = calc_pg_raw(osdmap, pgid, rawosds, &num); - if (!osds) - return -1; + pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool); + if (!pool) + return 0; - /* primary is first up osd */ - o = 0; - for (i = 0; i < num; i++) - if (ceph_osd_is_up(osdmap, osds[i])) - acting[o++] = osds[i]; - return o; + if (pool->flags & CEPH_POOL_FLAG_HASHPSPOOL) { + /* hash pool id and seed so that pool PGs do not overlap */ + pps = crush_hash32_2(CRUSH_HASH_RJENKINS1, + ceph_stable_mod(pgid.seed, pool->pgp_num, + pool->pgp_num_mask), + pgid.pool); + } else { + /* + * legacy behavior: add ps and pool together. this is + * not a great approach because the PGs from each pool + * will overlap on top of each other: 0.5 == 1.4 == + * 2.3 == ... + */ + pps = ceph_stable_mod(pgid.seed, pool->pgp_num, + pool->pgp_num_mask) + + (unsigned)pgid.pool; + } + + len = pg_to_raw_osds(osdmap, pool, pgid, pps, osds); + if (len < 0) + return len; + + len = raw_to_up_osds(osdmap, pool, osds, len, &primary); + + len = apply_temps(osdmap, pool, pgid, osds, len, &primary); + + return len; } /* -- cgit v1.2.3-59-g8ed1b From 8008ab1080c1768b02d232dcfd9e161cd47cc9f7 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 24 Mar 2014 17:12:48 +0200 Subject: libceph: return primary from ceph_calc_pg_acting() In preparation for adding support for primary_temp, stop assuming primaryness: add a primary out parameter to ceph_calc_pg_acting() and change call sites accordingly. Primary is now specified separately from the order of osds in the set. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 2 +- net/ceph/osd_client.c | 10 ++++------ net/ceph/osdmap.c | 20 ++++++++++++-------- 3 files changed, 17 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index b0c8f8490663..561ea896c657 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -212,7 +212,7 @@ extern int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap, extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, - int *osds); + int *osds, int *primary); extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 6f64eec18851..b4157dc22199 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1333,7 +1333,7 @@ static int __map_request(struct ceph_osd_client *osdc, { struct ceph_pg pgid; int acting[CEPH_PG_MAX_SIZE]; - int o = -1, num = 0; + int num, o; int err; bool was_paused; @@ -1346,11 +1346,9 @@ static int __map_request(struct ceph_osd_client *osdc, } req->r_pgid = pgid; - err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting); - if (err > 0) { - o = acting[0]; - num = err; - } + num = ceph_calc_pg_acting(osdc->osdmap, pgid, acting, &o); + if (num < 0) + num = 0; was_paused = req->r_paused; req->r_paused = __req_should_be_paused(osdc, req); diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index f1cad21d1533..df9389ddd56c 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1651,19 +1651,21 @@ static int apply_temps(struct ceph_osdmap *osdmap, /* * Calculate acting set for given pgid. * - * Return acting set length, or error. + * Return acting set length, or error. *primary is set to acting + * primary osd id, or -1 if acting set is empty or on error. */ int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, - int *osds) + int *osds, int *primary) { struct ceph_pg_pool_info *pool; u32 pps; int len; - int primary; pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool); - if (!pool) - return 0; + if (!pool) { + *primary = -1; + return -ENOENT; + } if (pool->flags & CEPH_POOL_FLAG_HASHPSPOOL) { /* hash pool id and seed so that pool PGs do not overlap */ @@ -1684,12 +1686,14 @@ int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, } len = pg_to_raw_osds(osdmap, pool, pgid, pps, osds); - if (len < 0) + if (len < 0) { + *primary = -1; return len; + } - len = raw_to_up_osds(osdmap, pool, osds, len, &primary); + len = raw_to_up_osds(osdmap, pool, osds, len, primary); - len = apply_temps(osdmap, pool, pgid, osds, len, &primary); + len = apply_temps(osdmap, pool, pgid, osds, len, primary); return len; } -- cgit v1.2.3-59-g8ed1b From 18cb95af2d7c69aa136ab13f02dd55188c120e75 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 24 Mar 2014 17:12:50 +0200 Subject: libceph: enable PRIMARY_AFFINITY feature bit Announce our support for osdmaps with non-default primary affinity values. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/ceph_features.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 7a4cab50b2cd..d12659ce550d 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -91,7 +91,8 @@ static inline u64 ceph_sanitize_features(u64 features) CEPH_FEATURE_CRUSH_V2 | \ CEPH_FEATURE_EXPORT_PEER | \ CEPH_FEATURE_OSDMAP_ENC | \ - CEPH_FEATURE_CRUSH_TUNABLES3) + CEPH_FEATURE_CRUSH_TUNABLES3 | \ + CEPH_FEATURE_OSD_PRIMARY_AFFINITY) #define CEPH_FEATURES_REQUIRED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR | \ -- cgit v1.2.3-59-g8ed1b From 7f4b04614a273089ad65654f53771c033fadc65e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 28 Mar 2014 19:11:47 +0530 Subject: cpufreq: create another field .flags in cpufreq_frequency_table Currently cpufreq frequency table has two fields: frequency and driver_data. driver_data is only for drivers' internal use and cpufreq core shouldn't use it at all. But with the introduction of BOOST frequencies, this assumption was broken and we started using it as a flag instead. There are two problems due to this: - It is against the description of this field, as driver's data is used by the core now. - if drivers fill it with -3 for any frequency, then those frequencies are never considered by cpufreq core as it is exactly same as value of CPUFREQ_BOOST_FREQ, i.e. ~2. The best way to get this fixed is by creating another field flags which will be used for such flags. This patch does that. Along with that various drivers need modifications due to the change of struct cpufreq_frequency_table. Reviewed-by: Gautham R Shenoy Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- arch/mips/loongson/lemote-2f/clock.c | 20 ++++++++++---------- drivers/cpufreq/cris-artpec3-cpufreq.c | 6 +++--- drivers/cpufreq/cris-etraxfs-cpufreq.c | 6 +++--- drivers/cpufreq/elanfreq.c | 18 +++++++++--------- drivers/cpufreq/exynos4210-cpufreq.c | 12 ++++++------ drivers/cpufreq/exynos4x12-cpufreq.c | 30 +++++++++++++++--------------- drivers/cpufreq/exynos5250-cpufreq.c | 34 +++++++++++++++++----------------- drivers/cpufreq/freq_table.c | 4 ++-- drivers/cpufreq/kirkwood-cpufreq.c | 6 +++--- drivers/cpufreq/maple-cpufreq.c | 6 +++--- drivers/cpufreq/p4-clockmod.c | 20 ++++++++++---------- drivers/cpufreq/pasemi-cpufreq.c | 12 ++++++------ drivers/cpufreq/pmac32-cpufreq.c | 6 +++--- drivers/cpufreq/pmac64-cpufreq.c | 6 +++--- drivers/cpufreq/powernow-k6.c | 18 +++++++++--------- drivers/cpufreq/ppc_cbe_cpufreq.c | 18 +++++++++--------- drivers/cpufreq/s3c2416-cpufreq.c | 20 ++++++++++---------- drivers/cpufreq/s3c64xx-cpufreq.c | 26 +++++++++++++------------- drivers/cpufreq/s5pv210-cpufreq.c | 12 ++++++------ drivers/cpufreq/sc520_freq.c | 6 +++--- drivers/cpufreq/speedstep-ich.c | 6 +++--- drivers/cpufreq/speedstep-smi.c | 6 +++--- include/linux/cpufreq.h | 9 ++++++--- 23 files changed, 155 insertions(+), 152 deletions(-) (limited to 'include/linux') diff --git a/arch/mips/loongson/lemote-2f/clock.c b/arch/mips/loongson/lemote-2f/clock.c index aed32b88576c..e1f427f4f5f3 100644 --- a/arch/mips/loongson/lemote-2f/clock.c +++ b/arch/mips/loongson/lemote-2f/clock.c @@ -28,16 +28,16 @@ enum { }; struct cpufreq_frequency_table loongson2_clockmod_table[] = { - {DC_RESV, CPUFREQ_ENTRY_INVALID}, - {DC_ZERO, CPUFREQ_ENTRY_INVALID}, - {DC_25PT, 0}, - {DC_37PT, 0}, - {DC_50PT, 0}, - {DC_62PT, 0}, - {DC_75PT, 0}, - {DC_87PT, 0}, - {DC_DISABLE, 0}, - {DC_RESV, CPUFREQ_TABLE_END}, + {0, DC_RESV, CPUFREQ_ENTRY_INVALID}, + {0, DC_ZERO, CPUFREQ_ENTRY_INVALID}, + {0, DC_25PT, 0}, + {0, DC_37PT, 0}, + {0, DC_50PT, 0}, + {0, DC_62PT, 0}, + {0, DC_75PT, 0}, + {0, DC_87PT, 0}, + {0, DC_DISABLE, 0}, + {0, DC_RESV, CPUFREQ_TABLE_END}, }; EXPORT_SYMBOL_GPL(loongson2_clockmod_table); diff --git a/drivers/cpufreq/cris-artpec3-cpufreq.c b/drivers/cpufreq/cris-artpec3-cpufreq.c index d4573032cbbc..601b88c490cf 100644 --- a/drivers/cpufreq/cris-artpec3-cpufreq.c +++ b/drivers/cpufreq/cris-artpec3-cpufreq.c @@ -15,9 +15,9 @@ static struct notifier_block cris_sdram_freq_notifier_block = { }; static struct cpufreq_frequency_table cris_freq_table[] = { - {0x01, 6000}, - {0x02, 200000}, - {0, CPUFREQ_TABLE_END}, + {0, 0x01, 6000}, + {0, 0x02, 200000}, + {0, 0, CPUFREQ_TABLE_END}, }; static unsigned int cris_freq_get_cpu_frequency(unsigned int cpu) diff --git a/drivers/cpufreq/cris-etraxfs-cpufreq.c b/drivers/cpufreq/cris-etraxfs-cpufreq.c index 13c3361437f7..22b2cdde74d9 100644 --- a/drivers/cpufreq/cris-etraxfs-cpufreq.c +++ b/drivers/cpufreq/cris-etraxfs-cpufreq.c @@ -15,9 +15,9 @@ static struct notifier_block cris_sdram_freq_notifier_block = { }; static struct cpufreq_frequency_table cris_freq_table[] = { - {0x01, 6000}, - {0x02, 200000}, - {0, CPUFREQ_TABLE_END}, + {0, 0x01, 6000}, + {0, 0x02, 200000}, + {0, 0, CPUFREQ_TABLE_END}, }; static unsigned int cris_freq_get_cpu_frequency(unsigned int cpu) diff --git a/drivers/cpufreq/elanfreq.c b/drivers/cpufreq/elanfreq.c index c987e94708f5..7f5d2a68c353 100644 --- a/drivers/cpufreq/elanfreq.c +++ b/drivers/cpufreq/elanfreq.c @@ -56,15 +56,15 @@ static struct s_elan_multiplier elan_multiplier[] = { }; static struct cpufreq_frequency_table elanfreq_table[] = { - {0, 1000}, - {1, 2000}, - {2, 4000}, - {3, 8000}, - {4, 16000}, - {5, 33000}, - {6, 66000}, - {7, 99000}, - {0, CPUFREQ_TABLE_END}, + {0, 0, 1000}, + {0, 1, 2000}, + {0, 2, 4000}, + {0, 3, 8000}, + {0, 4, 16000}, + {0, 5, 33000}, + {0, 6, 66000}, + {0, 7, 99000}, + {0, 0, CPUFREQ_TABLE_END}, }; diff --git a/drivers/cpufreq/exynos4210-cpufreq.c b/drivers/cpufreq/exynos4210-cpufreq.c index 40d84c43d8f4..6384e5b9a347 100644 --- a/drivers/cpufreq/exynos4210-cpufreq.c +++ b/drivers/cpufreq/exynos4210-cpufreq.c @@ -29,12 +29,12 @@ static unsigned int exynos4210_volt_table[] = { }; static struct cpufreq_frequency_table exynos4210_freq_table[] = { - {L0, 1200 * 1000}, - {L1, 1000 * 1000}, - {L2, 800 * 1000}, - {L3, 500 * 1000}, - {L4, 200 * 1000}, - {0, CPUFREQ_TABLE_END}, + {0, L0, 1200 * 1000}, + {0, L1, 1000 * 1000}, + {0, L2, 800 * 1000}, + {0, L3, 500 * 1000}, + {0, L4, 200 * 1000}, + {0, 0, CPUFREQ_TABLE_END}, }; static struct apll_freq apll_freq_4210[] = { diff --git a/drivers/cpufreq/exynos4x12-cpufreq.c b/drivers/cpufreq/exynos4x12-cpufreq.c index 7c11ace3b3fc..466c76ad335b 100644 --- a/drivers/cpufreq/exynos4x12-cpufreq.c +++ b/drivers/cpufreq/exynos4x12-cpufreq.c @@ -30,21 +30,21 @@ static unsigned int exynos4x12_volt_table[] = { }; static struct cpufreq_frequency_table exynos4x12_freq_table[] = { - {CPUFREQ_BOOST_FREQ, 1500 * 1000}, - {L1, 1400 * 1000}, - {L2, 1300 * 1000}, - {L3, 1200 * 1000}, - {L4, 1100 * 1000}, - {L5, 1000 * 1000}, - {L6, 900 * 1000}, - {L7, 800 * 1000}, - {L8, 700 * 1000}, - {L9, 600 * 1000}, - {L10, 500 * 1000}, - {L11, 400 * 1000}, - {L12, 300 * 1000}, - {L13, 200 * 1000}, - {0, CPUFREQ_TABLE_END}, + {CPUFREQ_BOOST_FREQ, L0, 1500 * 1000}, + {0, L1, 1400 * 1000}, + {0, L2, 1300 * 1000}, + {0, L3, 1200 * 1000}, + {0, L4, 1100 * 1000}, + {0, L5, 1000 * 1000}, + {0, L6, 900 * 1000}, + {0, L7, 800 * 1000}, + {0, L8, 700 * 1000}, + {0, L9, 600 * 1000}, + {0, L10, 500 * 1000}, + {0, L11, 400 * 1000}, + {0, L12, 300 * 1000}, + {0, L13, 200 * 1000}, + {0, 0, CPUFREQ_TABLE_END}, }; static struct apll_freq *apll_freq_4x12; diff --git a/drivers/cpufreq/exynos5250-cpufreq.c b/drivers/cpufreq/exynos5250-cpufreq.c index 5f90b82a4082..363a0b3fe1b1 100644 --- a/drivers/cpufreq/exynos5250-cpufreq.c +++ b/drivers/cpufreq/exynos5250-cpufreq.c @@ -34,23 +34,23 @@ static unsigned int exynos5250_volt_table[] = { }; static struct cpufreq_frequency_table exynos5250_freq_table[] = { - {L0, 1700 * 1000}, - {L1, 1600 * 1000}, - {L2, 1500 * 1000}, - {L3, 1400 * 1000}, - {L4, 1300 * 1000}, - {L5, 1200 * 1000}, - {L6, 1100 * 1000}, - {L7, 1000 * 1000}, - {L8, 900 * 1000}, - {L9, 800 * 1000}, - {L10, 700 * 1000}, - {L11, 600 * 1000}, - {L12, 500 * 1000}, - {L13, 400 * 1000}, - {L14, 300 * 1000}, - {L15, 200 * 1000}, - {0, CPUFREQ_TABLE_END}, + {0, L0, 1700 * 1000}, + {0, L1, 1600 * 1000}, + {0, L2, 1500 * 1000}, + {0, L3, 1400 * 1000}, + {0, L4, 1300 * 1000}, + {0, L5, 1200 * 1000}, + {0, L6, 1100 * 1000}, + {0, L7, 1000 * 1000}, + {0, L8, 900 * 1000}, + {0, L9, 800 * 1000}, + {0, L10, 700 * 1000}, + {0, L11, 600 * 1000}, + {0, L12, 500 * 1000}, + {0, L13, 400 * 1000}, + {0, L14, 300 * 1000}, + {0, L15, 200 * 1000}, + {0, 0, CPUFREQ_TABLE_END}, }; static struct apll_freq apll_freq_5250[] = { diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index 53ff3c209606..08e7bbcf6d73 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -33,7 +33,7 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, continue; } if (!cpufreq_boost_enabled() - && table[i].driver_data == CPUFREQ_BOOST_FREQ) + && (table[i].flags & CPUFREQ_BOOST_FREQ)) continue; pr_debug("table entry %u: %u kHz\n", i, freq); @@ -229,7 +229,7 @@ static ssize_t show_available_freqs(struct cpufreq_policy *policy, char *buf, * show_boost = false and driver_data != BOOST freq * display NON BOOST freqs */ - if (show_boost ^ (table[i].driver_data == CPUFREQ_BOOST_FREQ)) + if (show_boost ^ (table[i].flags & CPUFREQ_BOOST_FREQ)) continue; count += sprintf(&buf[count], "%d ", table[i].frequency); diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c index 3d114bc5a97a..37a480680cd0 100644 --- a/drivers/cpufreq/kirkwood-cpufreq.c +++ b/drivers/cpufreq/kirkwood-cpufreq.c @@ -43,9 +43,9 @@ static struct priv * table. */ static struct cpufreq_frequency_table kirkwood_freq_table[] = { - {STATE_CPU_FREQ, 0}, /* CPU uses cpuclk */ - {STATE_DDR_FREQ, 0}, /* CPU uses ddrclk */ - {0, CPUFREQ_TABLE_END}, + {0, STATE_CPU_FREQ, 0}, /* CPU uses cpuclk */ + {0, STATE_DDR_FREQ, 0}, /* CPU uses ddrclk */ + {0, 0, CPUFREQ_TABLE_END}, }; static unsigned int kirkwood_cpufreq_get_cpu_frequency(unsigned int cpu) diff --git a/drivers/cpufreq/maple-cpufreq.c b/drivers/cpufreq/maple-cpufreq.c index c4dfa42a75ac..cc3408fc073f 100644 --- a/drivers/cpufreq/maple-cpufreq.c +++ b/drivers/cpufreq/maple-cpufreq.c @@ -59,9 +59,9 @@ #define CPUFREQ_LOW 1 static struct cpufreq_frequency_table maple_cpu_freqs[] = { - {CPUFREQ_HIGH, 0}, - {CPUFREQ_LOW, 0}, - {0, CPUFREQ_TABLE_END}, + {0, CPUFREQ_HIGH, 0}, + {0, CPUFREQ_LOW, 0}, + {0, 0, CPUFREQ_TABLE_END}, }; /* Power mode data is an array of the 32 bits PCR values to use for diff --git a/drivers/cpufreq/p4-clockmod.c b/drivers/cpufreq/p4-clockmod.c index 74f593e70e19..529cfd92158f 100644 --- a/drivers/cpufreq/p4-clockmod.c +++ b/drivers/cpufreq/p4-clockmod.c @@ -92,16 +92,16 @@ static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate) static struct cpufreq_frequency_table p4clockmod_table[] = { - {DC_RESV, CPUFREQ_ENTRY_INVALID}, - {DC_DFLT, 0}, - {DC_25PT, 0}, - {DC_38PT, 0}, - {DC_50PT, 0}, - {DC_64PT, 0}, - {DC_75PT, 0}, - {DC_88PT, 0}, - {DC_DISABLE, 0}, - {DC_RESV, CPUFREQ_TABLE_END}, + {0, DC_RESV, CPUFREQ_ENTRY_INVALID}, + {0, DC_DFLT, 0}, + {0, DC_25PT, 0}, + {0, DC_38PT, 0}, + {0, DC_50PT, 0}, + {0, DC_64PT, 0}, + {0, DC_75PT, 0}, + {0, DC_88PT, 0}, + {0, DC_DISABLE, 0}, + {0, DC_RESV, CPUFREQ_TABLE_END}, }; diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c index 6a2b7d3e85a7..84c84b5f0f3a 100644 --- a/drivers/cpufreq/pasemi-cpufreq.c +++ b/drivers/cpufreq/pasemi-cpufreq.c @@ -60,12 +60,12 @@ static int current_astate; /* We support 5(A0-A4) power states excluding turbo(A5-A6) modes */ static struct cpufreq_frequency_table pas_freqs[] = { - {0, 0}, - {1, 0}, - {2, 0}, - {3, 0}, - {4, 0}, - {0, CPUFREQ_TABLE_END}, + {0, 0, 0}, + {0, 1, 0}, + {0, 2, 0}, + {0, 3, 0}, + {0, 4, 0}, + {0, 0, CPUFREQ_TABLE_END}, }; /* diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c index cf55d202f332..7615180d7ee3 100644 --- a/drivers/cpufreq/pmac32-cpufreq.c +++ b/drivers/cpufreq/pmac32-cpufreq.c @@ -81,9 +81,9 @@ static int is_pmu_based; #define CPUFREQ_LOW 1 static struct cpufreq_frequency_table pmac_cpu_freqs[] = { - {CPUFREQ_HIGH, 0}, - {CPUFREQ_LOW, 0}, - {0, CPUFREQ_TABLE_END}, + {0, CPUFREQ_HIGH, 0}, + {0, CPUFREQ_LOW, 0}, + {0, 0, CPUFREQ_TABLE_END}, }; static inline void local_delay(unsigned long ms) diff --git a/drivers/cpufreq/pmac64-cpufreq.c b/drivers/cpufreq/pmac64-cpufreq.c index 6a338f8c3860..8bc422977b5b 100644 --- a/drivers/cpufreq/pmac64-cpufreq.c +++ b/drivers/cpufreq/pmac64-cpufreq.c @@ -65,9 +65,9 @@ #define CPUFREQ_LOW 1 static struct cpufreq_frequency_table g5_cpu_freqs[] = { - {CPUFREQ_HIGH, 0}, - {CPUFREQ_LOW, 0}, - {0, CPUFREQ_TABLE_END}, + {0, CPUFREQ_HIGH, 0}, + {0, CPUFREQ_LOW, 0}, + {0, 0, CPUFREQ_TABLE_END}, }; /* Power mode data is an array of the 32 bits PCR values to use for diff --git a/drivers/cpufreq/powernow-k6.c b/drivers/cpufreq/powernow-k6.c index 62c6f2e5afce..49f120e1bc7b 100644 --- a/drivers/cpufreq/powernow-k6.c +++ b/drivers/cpufreq/powernow-k6.c @@ -37,15 +37,15 @@ MODULE_PARM_DESC(bus_frequency, "Bus frequency in kHz"); /* Clock ratio multiplied by 10 - see table 27 in AMD#23446 */ static struct cpufreq_frequency_table clock_ratio[] = { - {60, /* 110 -> 6.0x */ 0}, - {55, /* 011 -> 5.5x */ 0}, - {50, /* 001 -> 5.0x */ 0}, - {45, /* 000 -> 4.5x */ 0}, - {40, /* 010 -> 4.0x */ 0}, - {35, /* 111 -> 3.5x */ 0}, - {30, /* 101 -> 3.0x */ 0}, - {20, /* 100 -> 2.0x */ 0}, - {0, CPUFREQ_TABLE_END} + {0, 60, /* 110 -> 6.0x */ 0}, + {0, 55, /* 011 -> 5.5x */ 0}, + {0, 50, /* 001 -> 5.0x */ 0}, + {0, 45, /* 000 -> 4.5x */ 0}, + {0, 40, /* 010 -> 4.0x */ 0}, + {0, 35, /* 111 -> 3.5x */ 0}, + {0, 30, /* 101 -> 3.0x */ 0}, + {0, 20, /* 100 -> 2.0x */ 0}, + {0, 0, CPUFREQ_TABLE_END} }; static const u8 index_to_register[8] = { 6, 3, 1, 0, 2, 7, 5, 4 }; diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.c b/drivers/cpufreq/ppc_cbe_cpufreq.c index af7b1cabd1e7..5be8a48dba74 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq.c @@ -32,15 +32,15 @@ /* the CBE supports an 8 step frequency scaling */ static struct cpufreq_frequency_table cbe_freqs[] = { - {1, 0}, - {2, 0}, - {3, 0}, - {4, 0}, - {5, 0}, - {6, 0}, - {8, 0}, - {10, 0}, - {0, CPUFREQ_TABLE_END}, + {0, 1, 0}, + {0, 2, 0}, + {0, 3, 0}, + {0, 4, 0}, + {0, 5, 0}, + {0, 6, 0}, + {0, 8, 0}, + {0, 10, 0}, + {0, 0, CPUFREQ_TABLE_END}, }; /* diff --git a/drivers/cpufreq/s3c2416-cpufreq.c b/drivers/cpufreq/s3c2416-cpufreq.c index 826b8be23099..4626f90559b5 100644 --- a/drivers/cpufreq/s3c2416-cpufreq.c +++ b/drivers/cpufreq/s3c2416-cpufreq.c @@ -72,19 +72,19 @@ static struct s3c2416_dvfs s3c2416_dvfs_table[] = { #endif static struct cpufreq_frequency_table s3c2416_freq_table[] = { - { SOURCE_HCLK, FREQ_DVS }, - { SOURCE_ARMDIV, 133333 }, - { SOURCE_ARMDIV, 266666 }, - { SOURCE_ARMDIV, 400000 }, - { 0, CPUFREQ_TABLE_END }, + { 0, SOURCE_HCLK, FREQ_DVS }, + { 0, SOURCE_ARMDIV, 133333 }, + { 0, SOURCE_ARMDIV, 266666 }, + { 0, SOURCE_ARMDIV, 400000 }, + { 0, 0, CPUFREQ_TABLE_END }, }; static struct cpufreq_frequency_table s3c2450_freq_table[] = { - { SOURCE_HCLK, FREQ_DVS }, - { SOURCE_ARMDIV, 133500 }, - { SOURCE_ARMDIV, 267000 }, - { SOURCE_ARMDIV, 534000 }, - { 0, CPUFREQ_TABLE_END }, + { 0, SOURCE_HCLK, FREQ_DVS }, + { 0, SOURCE_ARMDIV, 133500 }, + { 0, SOURCE_ARMDIV, 267000 }, + { 0, SOURCE_ARMDIV, 534000 }, + { 0, 0, CPUFREQ_TABLE_END }, }; static unsigned int s3c2416_cpufreq_get_speed(unsigned int cpu) diff --git a/drivers/cpufreq/s3c64xx-cpufreq.c b/drivers/cpufreq/s3c64xx-cpufreq.c index c4226de079ab..ff7d3ecb85f0 100644 --- a/drivers/cpufreq/s3c64xx-cpufreq.c +++ b/drivers/cpufreq/s3c64xx-cpufreq.c @@ -37,19 +37,19 @@ static struct s3c64xx_dvfs s3c64xx_dvfs_table[] = { }; static struct cpufreq_frequency_table s3c64xx_freq_table[] = { - { 0, 66000 }, - { 0, 100000 }, - { 0, 133000 }, - { 1, 200000 }, - { 1, 222000 }, - { 1, 266000 }, - { 2, 333000 }, - { 2, 400000 }, - { 2, 532000 }, - { 2, 533000 }, - { 3, 667000 }, - { 4, 800000 }, - { 0, CPUFREQ_TABLE_END }, + { 0, 0, 66000 }, + { 0, 0, 100000 }, + { 0, 0, 133000 }, + { 0, 1, 200000 }, + { 0, 1, 222000 }, + { 0, 1, 266000 }, + { 0, 2, 333000 }, + { 0, 2, 400000 }, + { 0, 2, 532000 }, + { 0, 2, 533000 }, + { 0, 3, 667000 }, + { 0, 4, 800000 }, + { 0, 0, CPUFREQ_TABLE_END }, }; #endif diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c index 72421534fff5..ab2c1a40d437 100644 --- a/drivers/cpufreq/s5pv210-cpufreq.c +++ b/drivers/cpufreq/s5pv210-cpufreq.c @@ -64,12 +64,12 @@ enum s5pv210_dmc_port { }; static struct cpufreq_frequency_table s5pv210_freq_table[] = { - {L0, 1000*1000}, - {L1, 800*1000}, - {L2, 400*1000}, - {L3, 200*1000}, - {L4, 100*1000}, - {0, CPUFREQ_TABLE_END}, + {0, L0, 1000*1000}, + {0, L1, 800*1000}, + {0, L2, 400*1000}, + {0, L3, 200*1000}, + {0, L4, 100*1000}, + {0, 0, CPUFREQ_TABLE_END}, }; static struct regulator *arm_regulator; diff --git a/drivers/cpufreq/sc520_freq.c b/drivers/cpufreq/sc520_freq.c index 69371bf0886d..ac84e4818014 100644 --- a/drivers/cpufreq/sc520_freq.c +++ b/drivers/cpufreq/sc520_freq.c @@ -33,9 +33,9 @@ static __u8 __iomem *cpuctl; #define PFX "sc520_freq: " static struct cpufreq_frequency_table sc520_freq_table[] = { - {0x01, 100000}, - {0x02, 133000}, - {0, CPUFREQ_TABLE_END}, + {0, 0x01, 100000}, + {0, 0x02, 133000}, + {0, 0, CPUFREQ_TABLE_END}, }; static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu) diff --git a/drivers/cpufreq/speedstep-ich.c b/drivers/cpufreq/speedstep-ich.c index 394ac159312a..1a07b5904ed5 100644 --- a/drivers/cpufreq/speedstep-ich.c +++ b/drivers/cpufreq/speedstep-ich.c @@ -49,9 +49,9 @@ static u32 pmbase; * are in kHz for the time being. */ static struct cpufreq_frequency_table speedstep_freqs[] = { - {SPEEDSTEP_HIGH, 0}, - {SPEEDSTEP_LOW, 0}, - {0, CPUFREQ_TABLE_END}, + {0, SPEEDSTEP_HIGH, 0}, + {0, SPEEDSTEP_LOW, 0}, + {0, 0, CPUFREQ_TABLE_END}, }; diff --git a/drivers/cpufreq/speedstep-smi.c b/drivers/cpufreq/speedstep-smi.c index db5d274dc13a..8635eec96da5 100644 --- a/drivers/cpufreq/speedstep-smi.c +++ b/drivers/cpufreq/speedstep-smi.c @@ -42,9 +42,9 @@ static enum speedstep_processor speedstep_processor; * are in kHz for the time being. */ static struct cpufreq_frequency_table speedstep_freqs[] = { - {SPEEDSTEP_HIGH, 0}, - {SPEEDSTEP_LOW, 0}, - {0, CPUFREQ_TABLE_END}, + {0, SPEEDSTEP_HIGH, 0}, + {0, SPEEDSTEP_LOW, 0}, + {0, 0, CPUFREQ_TABLE_END}, }; #define GET_SPEEDSTEP_OWNER 0 diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index c48e595f623e..5ae5100c1f24 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -455,11 +455,14 @@ extern struct cpufreq_governor cpufreq_gov_conservative; * FREQUENCY TABLE HELPERS * *********************************************************************/ -#define CPUFREQ_ENTRY_INVALID ~0 -#define CPUFREQ_TABLE_END ~1 -#define CPUFREQ_BOOST_FREQ ~2 +/* Special Values of .frequency field */ +#define CPUFREQ_ENTRY_INVALID ~0 +#define CPUFREQ_TABLE_END ~1 +/* Special Values of .flags field */ +#define CPUFREQ_BOOST_FREQ (1 << 0) struct cpufreq_frequency_table { + unsigned int flags; unsigned int driver_data; /* driver specific data, not used by core */ unsigned int frequency; /* kHz - doesn't need to be in ascending * order */ -- cgit v1.2.3-59-g8ed1b From 403c63cb6d7ec2caca2f9222ff843ac89d7d700a Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Mon, 29 Jul 2013 16:31:18 -0700 Subject: NTB: client event cleanup Provide a better event interface between the client and transport Signed-off-by: Jon Mason --- drivers/net/ntb_netdev.c | 16 ++++++++++++---- drivers/ntb/ntb_hw.h | 4 +--- drivers/ntb/ntb_transport.c | 1 - include/linux/ntb.h | 5 +++++ 4 files changed, 18 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index 8e6752fd89ac..14570b28d528 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -78,11 +78,19 @@ static void ntb_netdev_event_handler(void *data, int status) netdev_dbg(ndev, "Event %x, Link %x\n", status, ntb_transport_link_query(dev->qp)); - /* Currently, only link status event is supported */ - if (status) - netif_carrier_on(ndev); - else + switch (status) { + case NTB_LINK_DOWN: netif_carrier_off(ndev); + break; + case NTB_LINK_UP: + if (!ntb_transport_link_query(dev->qp)) + return; + + netif_carrier_on(ndev); + break; + default: + netdev_warn(ndev, "Unsupported event type %d\n", status); + } } static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp, void *qp_data, diff --git a/drivers/ntb/ntb_hw.h b/drivers/ntb/ntb_hw.h index bbdb7edca10c..d1960ffb4bbc 100644 --- a/drivers/ntb/ntb_hw.h +++ b/drivers/ntb/ntb_hw.h @@ -45,6 +45,7 @@ * Contact Information: * Jon Mason */ +#include #define PCI_DEVICE_ID_INTEL_NTB_B2B_JSF 0x3725 #define PCI_DEVICE_ID_INTEL_NTB_PS_JSF 0x3726 @@ -83,9 +84,6 @@ static inline void writeq(u64 val, void __iomem *addr) #define NTB_BAR_MASK ((1 << NTB_BAR_MMIO) | (1 << NTB_BAR_23) |\ (1 << NTB_BAR_45)) -#define NTB_LINK_DOWN 0 -#define NTB_LINK_UP 1 - #define NTB_HB_TIMEOUT msecs_to_jiffies(1000) #define NTB_MAX_NUM_MW 2 diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 3217f394d45b..042fb3dd354f 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -56,7 +56,6 @@ #include #include #include -#include #include "ntb_hw.h" #define NTB_TRANSPORT_VERSION 3 diff --git a/include/linux/ntb.h b/include/linux/ntb.h index f6a15205853b..cbc792cd745e 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -54,6 +54,11 @@ struct ntb_client { void (*remove) (struct pci_dev *pdev); }; +enum { + NTB_LINK_DOWN = 0, + NTB_LINK_UP, +}; + int ntb_register_client(struct ntb_client *drvr); void ntb_unregister_client(struct ntb_client *drvr); int ntb_register_client_dev(char *device_name); -- cgit v1.2.3-59-g8ed1b From 53ca4fea0bbe966b3123509125898b286a136f47 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Tue, 26 Nov 2013 11:21:50 -0700 Subject: NTB: Code Style Clean-up Some white space and 80 char overruns corrected. Signed-off-by: Jon Mason --- drivers/net/ntb_netdev.c | 2 +- drivers/ntb/ntb_hw.c | 19 ++++++++++--------- drivers/ntb/ntb_hw.h | 2 +- drivers/ntb/ntb_transport.c | 19 +++++++++---------- include/linux/ntb.h | 14 +++++++------- 5 files changed, 28 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index 14570b28d528..63aa9d9e34c5 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -377,7 +377,7 @@ static void ntb_netdev_remove(struct pci_dev *pdev) { struct net_device *ndev; struct ntb_netdev *dev; - bool found = false; + bool found = false; list_for_each_entry(dev, &dev_list, list) { if (dev->pdev == pdev) { diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c index 0e8ae70fd918..eba1ed58471d 100644 --- a/drivers/ntb/ntb_hw.c +++ b/drivers/ntb/ntb_hw.c @@ -91,7 +91,7 @@ static struct dentry *debugfs_dir; /* Translate memory window 0,1 to BAR 2,4 */ #define MW_TO_BAR(mw) (mw * NTB_MAX_NUM_MW + 2) -static DEFINE_PCI_DEVICE_TABLE(ntb_pci_tbl) = { +static const struct pci_device_id ntb_pci_tbl[] = { {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_BWD)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_JSF)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_SNB)}, @@ -120,7 +120,8 @@ MODULE_DEVICE_TABLE(pci, ntb_pci_tbl); * RETURNS: An appropriate -ERRNO error value on error, or zero for success. */ int ntb_register_event_callback(struct ntb_device *ndev, - void (*func)(void *handle, enum ntb_hw_event event)) + void (*func)(void *handle, + enum ntb_hw_event event)) { if (ndev->event_cb) return -EINVAL; @@ -715,9 +716,9 @@ static int ntb_xeon_setup(struct ntb_device *ndev) SNB_PBAR4LMT_OFFSET); /* HW errata on the Limit registers. They can only be * written when the base register is 4GB aligned and - * < 32bit. This should already be the case based on the - * driver defaults, but write the Limit registers first - * just in case. + * < 32bit. This should already be the case based on + * the driver defaults, but write the Limit registers + * first just in case. */ } else { ndev->limits.max_mw = SNB_MAX_MW; @@ -739,9 +740,9 @@ static int ntb_xeon_setup(struct ntb_device *ndev) writeq(0, ndev->reg_base + SNB_PBAR4LMT_OFFSET); /* HW errata on the Limit registers. They can only be * written when the base register is 4GB aligned and - * < 32bit. This should already be the case based on the - * driver defaults, but write the Limit registers first - * just in case. + * < 32bit. This should already be the case based on + * the driver defaults, but write the Limit registers + * first just in case. */ } @@ -803,7 +804,7 @@ static int ntb_xeon_setup(struct ntb_device *ndev) ndev->conn_type = NTB_CONN_RP; if (xeon_errata_workaround) { - dev_err(&ndev->pdev->dev, + dev_err(&ndev->pdev->dev, "NTB-RP disabled due to hardware errata. To disregard this warning and potentially lock-up the system, add the parameter 'xeon_errata_workaround=0'.\n"); return -EINVAL; } diff --git a/drivers/ntb/ntb_hw.h b/drivers/ntb/ntb_hw.h index d1960ffb4bbc..923a0fbb0eb8 100644 --- a/drivers/ntb/ntb_hw.h +++ b/drivers/ntb/ntb_hw.h @@ -231,7 +231,7 @@ int ntb_register_db_callback(struct ntb_device *ndev, unsigned int idx, int db_num)); void ntb_unregister_db_callback(struct ntb_device *ndev, unsigned int idx); int ntb_register_event_callback(struct ntb_device *ndev, - void (*event_cb_func) (void *handle, + void (*event_cb_func)(void *handle, enum ntb_hw_event event)); void ntb_unregister_event_callback(struct ntb_device *ndev); int ntb_get_max_spads(struct ntb_device *ndev); diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 042fb3dd354f..9dd63b822025 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -106,8 +106,8 @@ struct ntb_transport_qp { struct ntb_rx_info __iomem *rx_info; struct ntb_rx_info *remote_rx_info; - void (*tx_handler) (struct ntb_transport_qp *qp, void *qp_data, - void *data, int len); + void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); struct list_head tx_free_q; spinlock_t ntb_tx_free_q_lock; void __iomem *tx_mw; @@ -116,8 +116,8 @@ struct ntb_transport_qp { unsigned int tx_max_entry; unsigned int tx_max_frame; - void (*rx_handler) (struct ntb_transport_qp *qp, void *qp_data, - void *data, int len); + void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); struct list_head rx_pend_q; struct list_head rx_free_q; spinlock_t ntb_rx_pend_q_lock; @@ -128,7 +128,7 @@ struct ntb_transport_qp { unsigned int rx_max_frame; dma_cookie_t last_cookie; - void (*event_handler) (void *data, int status); + void (*event_handler)(void *data, int status); struct delayed_work link_work; struct work_struct link_cleanup; @@ -479,7 +479,7 @@ static void ntb_list_add(spinlock_t *lock, struct list_head *entry, } static struct ntb_queue_entry *ntb_list_rm(spinlock_t *lock, - struct list_head *list) + struct list_head *list) { struct ntb_queue_entry *entry; unsigned long flags; @@ -838,7 +838,7 @@ static void ntb_qp_link_work(struct work_struct *work) } static int ntb_transport_init_queue(struct ntb_transport *nt, - unsigned int qp_num) + unsigned int qp_num) { struct ntb_transport_qp *qp; unsigned int num_qps_mw, tx_size; @@ -1054,7 +1054,7 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset, if (!chan) goto err; - if (len < copy_bytes) + if (len < copy_bytes) goto err_wait; device = chan->device; @@ -1189,8 +1189,7 @@ out: return 0; err: - ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry, - &qp->rx_pend_q); + ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry, &qp->rx_pend_q); /* Ensure that the data is fully copied out before clearing the flag */ wmb(); hdr->flags = 0; diff --git a/include/linux/ntb.h b/include/linux/ntb.h index cbc792cd745e..9ac1a62fc6f5 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -50,8 +50,8 @@ struct ntb_transport_qp; struct ntb_client { struct device_driver driver; - int (*probe) (struct pci_dev *pdev); - void (*remove) (struct pci_dev *pdev); + int (*probe)(struct pci_dev *pdev); + void (*remove)(struct pci_dev *pdev); }; enum { @@ -65,11 +65,11 @@ int ntb_register_client_dev(char *device_name); void ntb_unregister_client_dev(char *device_name); struct ntb_queue_handlers { - void (*rx_handler) (struct ntb_transport_qp *qp, void *qp_data, - void *data, int len); - void (*tx_handler) (struct ntb_transport_qp *qp, void *qp_data, - void *data, int len); - void (*event_handler) (void *data, int status); + void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); + void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); + void (*event_handler)(void *data, int status); }; unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp); -- cgit v1.2.3-59-g8ed1b From b8780c363d808a726a34793caa900923d32b6b80 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 7 Apr 2014 17:33:06 +0200 Subject: sched: remove sleep_on() and friends This is the final piece in the puzzle, as all patches to remove the last users of \(interruptible_\|\)sleep_on\(_timeout\|\) have made it into the 3.15 merge window. The work was long overdue, and this interface in particular should not have survived the BKL removal that was done a couple of years ago. Citing Jon Corbet from http://lwn.net/2001/0201/kernel.php3": "[...] it was suggested that the janitors look for and fix all code that calls sleep_on() [...] since (1) almost all such code is incorrect, and (2) Linus has agreed that those functions should be removed in the 2.5 development series". We haven't quite made it for 2.5, but maybe we can merge this for 3.15. Signed-off-by: Arnd Bergmann Cc: Peter Zijlstra Cc: Ingo Molnar Signed-off-by: Linus Torvalds --- Documentation/DocBook/kernel-hacking.tmpl | 10 ------- include/linux/wait.h | 11 -------- kernel/sched/core.c | 46 ------------------------------- 3 files changed, 67 deletions(-) (limited to 'include/linux') diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl index d0758b241b23..bd9015d10cff 100644 --- a/Documentation/DocBook/kernel-hacking.tmpl +++ b/Documentation/DocBook/kernel-hacking.tmpl @@ -850,16 +850,6 @@ printk(KERN_INFO "my ip: %pI4\n", &ipaddress); -ERESTARTSYS if a signal is received. The wait_event() version ignores signals. - - Do not use the sleep_on() function family - - it is very easy to accidentally introduce races; almost certainly - one of the wait_event() family will do, or a - loop around schedule_timeout(). If you choose - to loop around schedule_timeout() remember - you must set the task state (with - set_current_state()) on each iteration to avoid - busy-looping. - diff --git a/include/linux/wait.h b/include/linux/wait.h index 559044c79232..e7d9d9ed14f5 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -803,17 +803,6 @@ do { \ __ret; \ }) - -/* - * These are the old interfaces to sleep waiting for an event. - * They are racy. DO NOT use them, use the wait_event* interfaces above. - * We plan to remove these interfaces. - */ -extern void sleep_on(wait_queue_head_t *q); -extern long sleep_on_timeout(wait_queue_head_t *q, signed long timeout); -extern void interruptible_sleep_on(wait_queue_head_t *q); -extern long interruptible_sleep_on_timeout(wait_queue_head_t *q, signed long timeout); - /* * Waitqueues which are removed from the waitqueue_head at wakeup time */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1d1b87b36778..0ff3f34bc7e3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2845,52 +2845,6 @@ int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags, } EXPORT_SYMBOL(default_wake_function); -static long __sched -sleep_on_common(wait_queue_head_t *q, int state, long timeout) -{ - unsigned long flags; - wait_queue_t wait; - - init_waitqueue_entry(&wait, current); - - __set_current_state(state); - - spin_lock_irqsave(&q->lock, flags); - __add_wait_queue(q, &wait); - spin_unlock(&q->lock); - timeout = schedule_timeout(timeout); - spin_lock_irq(&q->lock); - __remove_wait_queue(q, &wait); - spin_unlock_irqrestore(&q->lock, flags); - - return timeout; -} - -void __sched interruptible_sleep_on(wait_queue_head_t *q) -{ - sleep_on_common(q, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); -} -EXPORT_SYMBOL(interruptible_sleep_on); - -long __sched -interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout) -{ - return sleep_on_common(q, TASK_INTERRUPTIBLE, timeout); -} -EXPORT_SYMBOL(interruptible_sleep_on_timeout); - -void __sched sleep_on(wait_queue_head_t *q) -{ - sleep_on_common(q, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); -} -EXPORT_SYMBOL(sleep_on); - -long __sched sleep_on_timeout(wait_queue_head_t *q, long timeout) -{ - return sleep_on_common(q, TASK_UNINTERRUPTIBLE, timeout); -} -EXPORT_SYMBOL(sleep_on_timeout); - #ifdef CONFIG_RT_MUTEXES /* -- cgit v1.2.3-59-g8ed1b From a0715cc22601e8830ace98366c0c2bd8da52af52 Mon Sep 17 00:00:00 2001 From: Alex Thorlton Date: Mon, 7 Apr 2014 15:37:10 -0700 Subject: mm, thp: add VM_INIT_DEF_MASK and PRCTL_THP_DISABLE Add VM_INIT_DEF_MASK, to allow us to set the default flags for VMs. It also adds a prctl control which allows us to set the THP disable bit in mm->def_flags so that VMs will pick up the setting as they are created. Signed-off-by: Alex Thorlton Suggested-by: Oleg Nesterov Cc: Gerald Schaefer Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Christian Borntraeger Cc: Paolo Bonzini Cc: "Kirill A. Shutemov" Cc: Mel Gorman Acked-by: Rik van Riel Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Andrea Arcangeli Cc: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Alexander Viro Cc: Johannes Weiner Cc: David Rientjes Cc: Paolo Bonzini Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 +++ include/uapi/linux/prctl.h | 3 +++ kernel/fork.c | 11 ++++++++--- kernel/sys.c | 15 +++++++++++++++ 4 files changed, 29 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 35300f390eb6..c270fa68a32b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -177,6 +177,9 @@ extern unsigned int kobjsize(const void *objp); */ #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) +/* This mask defines which mm->def_flags a process can inherit its parent */ +#define VM_INIT_DEF_MASK VM_NOHUGEPAGE + /* * mapping from the currently active vm_flags protection bits (the * low four bits) to a page protection mask.. diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 289760f424aa..58afc04c107e 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -149,4 +149,7 @@ #define PR_GET_TID_ADDRESS 40 +#define PR_SET_THP_DISABLE 41 +#define PR_GET_THP_DISABLE 42 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/fork.c b/kernel/fork.c index abc45890f0a5..e40c0a01d5a6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -530,8 +530,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) atomic_set(&mm->mm_count, 1); init_rwsem(&mm->mmap_sem); INIT_LIST_HEAD(&mm->mmlist); - mm->flags = (current->mm) ? - (current->mm->flags & MMF_INIT_MASK) : default_dump_filter; mm->core_state = NULL; atomic_long_set(&mm->nr_ptes, 0); memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); @@ -540,8 +538,15 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) mm_init_owner(mm, p); clear_tlb_flush_pending(mm); - if (likely(!mm_alloc_pgd(mm))) { + if (current->mm) { + mm->flags = current->mm->flags & MMF_INIT_MASK; + mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK; + } else { + mm->flags = default_dump_filter; mm->def_flags = 0; + } + + if (likely(!mm_alloc_pgd(mm))) { mmu_notifier_mm_init(mm); return mm; } diff --git a/kernel/sys.c b/kernel/sys.c index adaeab6f7a87..fba0f29401ea 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1996,6 +1996,21 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, if (arg2 || arg3 || arg4 || arg5) return -EINVAL; return current->no_new_privs ? 1 : 0; + case PR_GET_THP_DISABLE: + if (arg2 || arg3 || arg4 || arg5) + return -EINVAL; + error = !!(me->mm->def_flags & VM_NOHUGEPAGE); + break; + case PR_SET_THP_DISABLE: + if (arg3 || arg4 || arg5) + return -EINVAL; + down_write(&me->mm->mmap_sem); + if (arg2) + me->mm->def_flags |= VM_NOHUGEPAGE; + else + me->mm->def_flags &= ~VM_NOHUGEPAGE; + up_write(&me->mm->mmap_sem); + break; default: error = -EINVAL; break; -- cgit v1.2.3-59-g8ed1b From 8c6e50b0290c4c708a3e6462729e1e9151a9a7df Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 7 Apr 2014 15:37:18 -0700 Subject: mm: introduce vm_ops->map_pages() Here's new version of faultaround patchset. It took a while to tune it and collect performance data. First patch adds new callback ->map_pages to vm_operations_struct. ->map_pages() is called when VM asks to map easy accessible pages. Filesystem should find and map pages associated with offsets from "pgoff" till "max_pgoff". ->map_pages() is called with page table locked and must not block. If it's not possible to reach a page without blocking, filesystem should skip it. Filesystem should use do_set_pte() to setup page table entry. Pointer to entry associated with offset "pgoff" is passed in "pte" field in vm_fault structure. Pointers to entries for other offsets should be calculated relative to "pte". Currently VM use ->map_pages only on read page fault path. We try to map FAULT_AROUND_PAGES a time. FAULT_AROUND_PAGES is 16 for now. Performance data for different FAULT_AROUND_ORDER is below. TODO: - implement ->map_pages() for shmem/tmpfs; - modify get_user_pages() to be able to use ->map_pages() and implement mmap(MAP_POPULATE|MAP_NONBLOCK) on top. ========================================================================= Tested on 4-socket machine (120 threads) with 128GiB of RAM. Few real-world workloads. The sweet spot for FAULT_AROUND_ORDER here is somewhere between 3 and 5. Let's say 4 :) Linux build (make -j60) FAULT_AROUND_ORDER Baseline 1 3 4 5 7 9 minor-faults 283,301,572 247,151,987 212,215,789 204,772,882 199,568,944 194,703,779 193,381,485 time, seconds 151.227629483 153.920996480 151.356125472 150.863792049 150.879207877 151.150764954 151.450962358 Linux rebuild (make -j60) FAULT_AROUND_ORDER Baseline 1 3 4 5 7 9 minor-faults 5,396,854 4,148,444 2,855,286 2,577,282 2,361,957 2,169,573 2,112,643 time, seconds 27.404543757 27.559725591 27.030057426 26.855045126 26.678618635 26.974523490 26.761320095 Git test suite (make -j60 test) FAULT_AROUND_ORDER Baseline 1 3 4 5 7 9 minor-faults 129,591,823 99,200,751 66,106,718 57,606,410 51,510,808 45,776,813 44,085,515 time, seconds 66.087215026 64.784546905 64.401156567 65.282708668 66.034016829 66.793780811 67.237810413 Two synthetic tests: access every word in file in sequential/random order. It doesn't improve much after FAULT_AROUND_ORDER == 4. Sequential access 16GiB file FAULT_AROUND_ORDER Baseline 1 3 4 5 7 9 1 thread minor-faults 4,195,437 2,098,275 525,068 262,251 131,170 32,856 8,282 time, seconds 7.250461742 6.461711074 5.493859139 5.488488147 5.707213983 5.898510832 5.109232856 8 threads minor-faults 33,557,540 16,892,728 4,515,848 2,366,999 1,423,382 442,732 142,339 time, seconds 16.649304881 9.312555263 6.612490639 6.394316732 6.669827501 6.75078944 6.371900528 32 threads minor-faults 134,228,222 67,526,810 17,725,386 9,716,537 4,763,731 1,668,921 537,200 time, seconds 49.164430543 29.712060103 12.938649729 10.175151004 11.840094583 9.594081325 9.928461797 60 threads minor-faults 251,687,988 126,146,952 32,919,406 18,208,804 10,458,947 2,733,907 928,217 time, seconds 86.260656897 49.626551828 22.335007632 17.608243696 16.523119035 16.339489186 16.326390902 120 threads minor-faults 503,352,863 252,939,677 67,039,168 35,191,827 19,170,091 4,688,357 1,471,862 time, seconds 124.589206333 79.757867787 39.508707872 32.167281632 29.972989292 28.729834575 28.042251622 Random access 1GiB file 1 thread minor-faults 262,636 132,743 34,369 17,299 8,527 3,451 1,222 time, seconds 15.351890914 16.613802482 16.569227308 15.179220992 16.557356122 16.578247824 15.365266994 8 threads minor-faults 2,098,948 1,061,871 273,690 154,501 87,110 25,663 7,384 time, seconds 15.040026343 15.096933500 14.474757288 14.289129964 14.411537468 14.296316837 14.395635804 32 threads minor-faults 8,390,734 4,231,023 1,054,432 528,847 269,242 97,746 26,881 time, seconds 20.430433109 21.585235358 22.115062928 14.872878951 14.880856305 14.883370649 14.821261690 60 threads minor-faults 15,733,258 7,892,809 1,973,393 988,266 594,789 164,994 51,691 time, seconds 26.577302548 25.692397770 18.728863715 20.153026398 21.619101933 17.745086260 17.613215273 120 threads minor-faults 31,471,111 15,816,616 3,959,209 1,978,685 1,008,299 264,635 96,010 time, seconds 41.835322703 40.459786095 36.085306105 35.313894834 35.814445675 36.552633793 34.289210594 Touch only one page in page table in 16GiB file FAULT_AROUND_ORDER Baseline 1 3 4 5 7 9 1 thread minor-faults 8,372 8,324 8,270 8,260 8,249 8,239 8,237 time, seconds 0.039892712 0.045369149 0.051846126 0.063681685 0.079095975 0.17652406 0.541213386 8 threads minor-faults 65,731 65,681 65,628 65,620 65,608 65,599 65,596 time, seconds 0.124159196 0.488600638 0.156854426 0.191901957 0.242631486 0.543569456 1.677303984 32 threads minor-faults 262,388 262,341 262,285 262,276 262,266 262,257 263,183 time, seconds 0.452421421 0.488600638 0.565020946 0.648229739 0.789850823 1.651584361 5.000361559 60 threads minor-faults 491,822 491,792 491,723 491,711 491,701 491,691 491,825 time, seconds 0.763288616 0.869620515 0.980727360 1.161732354 1.466915814 3.04041448 9.308612938 120 threads minor-faults 983,466 983,655 983,366 983,372 983,363 984,083 984,164 time, seconds 1.595846553 1.667902182 2.008959376 2.425380942 2.941368804 5.977807890 18.401846125 This patch (of 2): Introduce new vm_ops callback ->map_pages() and uses it for mapping easy accessible pages around fault address. On read page fault, if filesystem provides ->map_pages(), we try to map up to FAULT_AROUND_PAGES pages around page fault address in hope to reduce number of minor page faults. We call ->map_pages first and use ->fault() as fallback if page by the offset is not ready to be mapped (cold page cache or something). Signed-off-by: Kirill A. Shutemov Acked-by: Linus Torvalds Cc: Mel Gorman Cc: Rik van Riel Cc: Andi Kleen Cc: Matthew Wilcox Cc: Dave Hansen Cc: Alexander Viro Cc: Dave Chinner Cc: Ning Qu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/Locking | 10 +++++ include/linux/mm.h | 8 ++++ mm/memory.c | 81 +++++++++++++++++++++++++++++++++++++-- 3 files changed, 96 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index f424e0e5b46b..efca5c1bbb10 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -529,6 +529,7 @@ locking rules: open: yes close: yes fault: yes can return with page locked +map_pages: yes page_mkwrite: yes can return with page locked access: yes @@ -540,6 +541,15 @@ the page, then ensure it is not already truncated (the page lock will block subsequent truncate), and then return with VM_FAULT_LOCKED, and the page locked. The VM will unlock the page. + ->map_pages() is called when VM asks to map easy accessible pages. +Filesystem should find and map pages associated with offsets from "pgoff" +till "max_pgoff". ->map_pages() is called with page table locked and must +not block. If it's not possible to reach a page without blocking, +filesystem should skip it. Filesystem should use do_set_pte() to setup +page table entry. Pointer to entry associated with offset "pgoff" is +passed in "pte" field in vm_fault structure. Pointers to entries for other +offsets should be calculated relative to "pte". + ->page_mkwrite() is called when a previously read-only pte is about to become writeable. The filesystem again must ensure that there are no truncate/invalidate races, and then return with the page locked. If diff --git a/include/linux/mm.h b/include/linux/mm.h index c270fa68a32b..f710d32291e8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -213,6 +213,10 @@ struct vm_fault { * is set (which is also implied by * VM_FAULT_ERROR). */ + /* for ->map_pages() only */ + pgoff_t max_pgoff; /* map pages for offset from pgoff till + * max_pgoff inclusive */ + pte_t *pte; /* pte entry associated with ->pgoff */ }; /* @@ -224,6 +228,7 @@ struct vm_operations_struct { void (*open)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area); int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf); + void (*map_pages)(struct vm_area_struct *vma, struct vm_fault *vmf); /* notification that a previously read-only page is about to become * writable, if an error is returned it will cause a SIGBUS */ @@ -584,6 +589,9 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) pte = pte_mkwrite(pte); return pte; } + +void do_set_pte(struct vm_area_struct *vma, unsigned long address, + struct page *page, pte_t *pte, bool write, bool anon); #endif /* diff --git a/mm/memory.c b/mm/memory.c index c6ee34d10fcc..4eefb7e31521 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3342,7 +3342,22 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address, return ret; } -static void do_set_pte(struct vm_area_struct *vma, unsigned long address, +/** + * do_set_pte - setup new PTE entry for given page and add reverse page mapping. + * + * @vma: virtual memory area + * @address: user virtual address + * @page: page to map + * @pte: pointer to target page table entry + * @write: true, if new entry is writable + * @anon: true, if it's anonymous page + * + * Caller must hold page table lock relevant for @pte. + * + * Target users are page handler itself and implementations of + * vm_ops->map_pages. + */ +void do_set_pte(struct vm_area_struct *vma, unsigned long address, struct page *page, pte_t *pte, bool write, bool anon) { pte_t entry; @@ -3366,6 +3381,52 @@ static void do_set_pte(struct vm_area_struct *vma, unsigned long address, update_mmu_cache(vma, address, pte); } +#define FAULT_AROUND_ORDER 4 +#define FAULT_AROUND_PAGES (1UL << FAULT_AROUND_ORDER) +#define FAULT_AROUND_MASK ~((1UL << (PAGE_SHIFT + FAULT_AROUND_ORDER)) - 1) + +static void do_fault_around(struct vm_area_struct *vma, unsigned long address, + pte_t *pte, pgoff_t pgoff, unsigned int flags) +{ + unsigned long start_addr; + pgoff_t max_pgoff; + struct vm_fault vmf; + int off; + + BUILD_BUG_ON(FAULT_AROUND_PAGES > PTRS_PER_PTE); + + start_addr = max(address & FAULT_AROUND_MASK, vma->vm_start); + off = ((address - start_addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); + pte -= off; + pgoff -= off; + + /* + * max_pgoff is either end of page table or end of vma + * or FAULT_AROUND_PAGES from pgoff, depending what is neast. + */ + max_pgoff = pgoff - ((start_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + + PTRS_PER_PTE - 1; + max_pgoff = min3(max_pgoff, vma_pages(vma) + vma->vm_pgoff - 1, + pgoff + FAULT_AROUND_PAGES - 1); + + /* Check if it makes any sense to call ->map_pages */ + while (!pte_none(*pte)) { + if (++pgoff > max_pgoff) + return; + start_addr += PAGE_SIZE; + if (start_addr >= vma->vm_end) + return; + pte++; + } + + vmf.virtual_address = (void __user *) start_addr; + vmf.pte = pte; + vmf.pgoff = pgoff; + vmf.max_pgoff = max_pgoff; + vmf.flags = flags; + vma->vm_ops->map_pages(vma, &vmf); +} + static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, pgoff_t pgoff, unsigned int flags, pte_t orig_pte) @@ -3373,7 +3434,20 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, struct page *fault_page; spinlock_t *ptl; pte_t *pte; - int ret; + int ret = 0; + + /* + * Let's call ->map_pages() first and use ->fault() as fallback + * if page by the offset is not ready to be mapped (cold cache or + * something). + */ + if (vma->vm_ops->map_pages) { + pte = pte_offset_map_lock(mm, pmd, address, &ptl); + do_fault_around(vma, address, pte, pgoff, flags); + if (!pte_same(*pte, orig_pte)) + goto unlock_out; + pte_unmap_unlock(pte, ptl); + } ret = __do_fault(vma, address, pgoff, flags, &fault_page); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) @@ -3387,8 +3461,9 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, return ret; } do_set_pte(vma, address, fault_page, pte, false, false); - pte_unmap_unlock(pte, ptl); unlock_page(fault_page); +unlock_out: + pte_unmap_unlock(pte, ptl); return ret; } -- cgit v1.2.3-59-g8ed1b From f1820361f83d556a7f0a9f629100f3825e594328 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 7 Apr 2014 15:37:19 -0700 Subject: mm: implement ->map_pages for page cache filemap_map_pages() is generic implementation of ->map_pages() for filesystems who uses page cache. It should be safe to use filemap_map_pages() for ->map_pages() if filesystem use filemap_fault() for ->fault(). Signed-off-by: Kirill A. Shutemov Acked-by: Linus Torvalds Cc: Mel Gorman Cc: Rik van Riel Cc: Andi Kleen Cc: Matthew Wilcox Cc: Dave Hansen Cc: Alexander Viro Cc: Dave Chinner Cc: Ning Qu Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/9p/vfs_file.c | 2 ++ fs/btrfs/file.c | 1 + fs/cifs/file.c | 1 + fs/ext4/file.c | 1 + fs/f2fs/file.c | 1 + fs/fuse/file.c | 1 + fs/gfs2/file.c | 1 + fs/nfs/file.c | 1 + fs/nilfs2/file.c | 1 + fs/ubifs/file.c | 1 + fs/xfs/xfs_file.c | 1 + include/linux/mm.h | 1 + mm/filemap.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ mm/nommu.c | 6 +++++ 14 files changed, 93 insertions(+) (limited to 'include/linux') diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index a16b0ff497ca..d8223209d4b1 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -832,6 +832,7 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma) static const struct vm_operations_struct v9fs_file_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = v9fs_vm_page_mkwrite, .remap_pages = generic_file_remap_pages, }; @@ -839,6 +840,7 @@ static const struct vm_operations_struct v9fs_file_vm_ops = { static const struct vm_operations_struct v9fs_mmap_file_vm_ops = { .close = v9fs_mmap_vm_close, .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = v9fs_vm_page_mkwrite, .remap_pages = generic_file_remap_pages, }; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e1ffb1e22898..c660527af838 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2025,6 +2025,7 @@ out: static const struct vm_operations_struct btrfs_file_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = btrfs_page_mkwrite, .remap_pages = generic_file_remap_pages, }; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 834fce759d80..216d7e99f921 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3113,6 +3113,7 @@ cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) static struct vm_operations_struct cifs_file_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = cifs_page_mkwrite, .remap_pages = generic_file_remap_pages, }; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 6db7f7db7777..4e508fc83dcf 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -200,6 +200,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, static const struct vm_operations_struct ext4_file_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = ext4_page_mkwrite, .remap_pages = generic_file_remap_pages, }; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0dfcef53a6ed..129a3bdb05ca 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -84,6 +84,7 @@ out: static const struct vm_operations_struct f2fs_file_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = f2fs_vm_page_mkwrite, .remap_pages = generic_file_remap_pages, }; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 65df7d8be4f5..48992cac714b 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2117,6 +2117,7 @@ static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) static const struct vm_operations_struct fuse_file_vm_ops = { .close = fuse_vma_close, .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = fuse_page_mkwrite, .remap_pages = generic_file_remap_pages, }; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 6c794085abac..80d67253623c 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -494,6 +494,7 @@ out: static const struct vm_operations_struct gfs2_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = gfs2_page_mkwrite, .remap_pages = generic_file_remap_pages, }; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 5bb790a69c71..284ca901fe16 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -617,6 +617,7 @@ out: static const struct vm_operations_struct nfs_file_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = nfs_vm_page_mkwrite, .remap_pages = generic_file_remap_pages, }; diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 08fdb77852ac..f3a82fbcae02 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -134,6 +134,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) static const struct vm_operations_struct nilfs_file_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = nilfs_page_mkwrite, .remap_pages = generic_file_remap_pages, }; diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 123c79b7261e..4f34dbae823d 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1538,6 +1538,7 @@ out_unlock: static const struct vm_operations_struct ubifs_file_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = ubifs_vm_page_mkwrite, .remap_pages = generic_file_remap_pages, }; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index f7abff8c16ca..003c0051b62f 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1483,6 +1483,7 @@ const struct file_operations xfs_dir_file_operations = { static const struct vm_operations_struct xfs_file_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = xfs_vm_page_mkwrite, .remap_pages = generic_file_remap_pages, }; diff --git a/include/linux/mm.h b/include/linux/mm.h index f710d32291e8..9132faed1a41 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1847,6 +1847,7 @@ extern void truncate_inode_pages_final(struct address_space *); /* generic vm_area_ops exported for stackable file systems */ extern int filemap_fault(struct vm_area_struct *, struct vm_fault *); +extern void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf); extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); /* mm/page-writeback.c */ diff --git a/mm/filemap.c b/mm/filemap.c index 21781f1fe52b..3f9b5fbb623f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -33,6 +33,7 @@ #include /* for BUG_ON(!in_atomic()) only */ #include #include +#include #include "internal.h" #define CREATE_TRACE_POINTS @@ -2064,6 +2065,78 @@ page_not_uptodate: } EXPORT_SYMBOL(filemap_fault); +void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct radix_tree_iter iter; + void **slot; + struct file *file = vma->vm_file; + struct address_space *mapping = file->f_mapping; + loff_t size; + struct page *page; + unsigned long address = (unsigned long) vmf->virtual_address; + unsigned long addr; + pte_t *pte; + + rcu_read_lock(); + radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, vmf->pgoff) { + if (iter.index > vmf->max_pgoff) + break; +repeat: + page = radix_tree_deref_slot(slot); + if (unlikely(!page)) + goto next; + if (radix_tree_exception(page)) { + if (radix_tree_deref_retry(page)) + break; + else + goto next; + } + + if (!page_cache_get_speculative(page)) + goto repeat; + + /* Has the page moved? */ + if (unlikely(page != *slot)) { + page_cache_release(page); + goto repeat; + } + + if (!PageUptodate(page) || + PageReadahead(page) || + PageHWPoison(page)) + goto skip; + if (!trylock_page(page)) + goto skip; + + if (page->mapping != mapping || !PageUptodate(page)) + goto unlock; + + size = i_size_read(mapping->host) + PAGE_CACHE_SIZE - 1; + if (page->index >= size >> PAGE_CACHE_SHIFT) + goto unlock; + + pte = vmf->pte + page->index - vmf->pgoff; + if (!pte_none(*pte)) + goto unlock; + + if (file->f_ra.mmap_miss > 0) + file->f_ra.mmap_miss--; + addr = address + (page->index - vmf->pgoff) * PAGE_SIZE; + do_set_pte(vma, addr, page, pte, false, false); + unlock_page(page); + goto next; +unlock: + unlock_page(page); +skip: + page_cache_release(page); +next: + if (iter.index == vmf->max_pgoff) + break; + } + rcu_read_unlock(); +} +EXPORT_SYMBOL(filemap_map_pages); + int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; @@ -2093,6 +2166,7 @@ EXPORT_SYMBOL(filemap_page_mkwrite); const struct vm_operations_struct generic_file_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = filemap_page_mkwrite, .remap_pages = generic_file_remap_pages, }; diff --git a/mm/nommu.c b/mm/nommu.c index a554e5a451cd..e19482533ce3 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1985,6 +1985,12 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } EXPORT_SYMBOL(filemap_fault); +void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + BUG(); +} +EXPORT_SYMBOL(filemap_map_pages); + int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr, unsigned long size, pgoff_t pgoff) { -- cgit v1.2.3-59-g8ed1b From 615d6e8756c87149f2d4c1b93d471bca002bd849 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 7 Apr 2014 15:37:25 -0700 Subject: mm: per-thread vma caching This patch is a continuation of efforts trying to optimize find_vma(), avoiding potentially expensive rbtree walks to locate a vma upon faults. The original approach (https://lkml.org/lkml/2013/11/1/410), where the largest vma was also cached, ended up being too specific and random, thus further comparison with other approaches were needed. There are two things to consider when dealing with this, the cache hit rate and the latency of find_vma(). Improving the hit-rate does not necessarily translate in finding the vma any faster, as the overhead of any fancy caching schemes can be too high to consider. We currently cache the last used vma for the whole address space, which provides a nice optimization, reducing the total cycles in find_vma() by up to 250%, for workloads with good locality. On the other hand, this simple scheme is pretty much useless for workloads with poor locality. Analyzing ebizzy runs shows that, no matter how many threads are running, the mmap_cache hit rate is less than 2%, and in many situations below 1%. The proposed approach is to replace this scheme with a small per-thread cache, maximizing hit rates at a very low maintenance cost. Invalidations are performed by simply bumping up a 32-bit sequence number. The only expensive operation is in the rare case of a seq number overflow, where all caches that share the same address space are flushed. Upon a miss, the proposed replacement policy is based on the page number that contains the virtual address in question. Concretely, the following results are seen on an 80 core, 8 socket x86-64 box: 1) System bootup: Most programs are single threaded, so the per-thread scheme does improve ~50% hit rate by just adding a few more slots to the cache. +----------------+----------+------------------+ | caching scheme | hit-rate | cycles (billion) | +----------------+----------+------------------+ | baseline | 50.61% | 19.90 | | patched | 73.45% | 13.58 | +----------------+----------+------------------+ 2) Kernel build: This one is already pretty good with the current approach as we're dealing with good locality. +----------------+----------+------------------+ | caching scheme | hit-rate | cycles (billion) | +----------------+----------+------------------+ | baseline | 75.28% | 11.03 | | patched | 88.09% | 9.31 | +----------------+----------+------------------+ 3) Oracle 11g Data Mining (4k pages): Similar to the kernel build workload. +----------------+----------+------------------+ | caching scheme | hit-rate | cycles (billion) | +----------------+----------+------------------+ | baseline | 70.66% | 17.14 | | patched | 91.15% | 12.57 | +----------------+----------+------------------+ 4) Ebizzy: There's a fair amount of variation from run to run, but this approach always shows nearly perfect hit rates, while baseline is just about non-existent. The amounts of cycles can fluctuate between anywhere from ~60 to ~116 for the baseline scheme, but this approach reduces it considerably. For instance, with 80 threads: +----------------+----------+------------------+ | caching scheme | hit-rate | cycles (billion) | +----------------+----------+------------------+ | baseline | 1.06% | 91.54 | | patched | 99.97% | 14.18 | +----------------+----------+------------------+ [akpm@linux-foundation.org: fix nommu build, per Davidlohr] [akpm@linux-foundation.org: document vmacache_valid() logic] [akpm@linux-foundation.org: attempt to untangle header files] [akpm@linux-foundation.org: add vmacache_find() BUG_ON] [hughd@google.com: add vmacache_valid_mm() (from Oleg)] [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: adjust and enhance comments] Signed-off-by: Davidlohr Bueso Reviewed-by: Rik van Riel Acked-by: Linus Torvalds Reviewed-by: Michel Lespinasse Cc: Oleg Nesterov Tested-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/unicore32/include/asm/mmu_context.h | 4 +- fs/exec.c | 5 +- fs/proc/task_mmu.c | 3 +- include/linux/mm_types.h | 4 +- include/linux/sched.h | 7 ++ include/linux/vmacache.h | 38 +++++++++++ kernel/debug/debug_core.c | 14 +++- kernel/fork.c | 7 +- mm/Makefile | 2 +- mm/mmap.c | 55 ++++++++------- mm/nommu.c | 24 ++++--- mm/vmacache.c | 112 +++++++++++++++++++++++++++++++ 12 files changed, 231 insertions(+), 44 deletions(-) create mode 100644 include/linux/vmacache.h create mode 100644 mm/vmacache.c (limited to 'include/linux') diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h index fb5e4c658f7a..ef470a7a3d0f 100644 --- a/arch/unicore32/include/asm/mmu_context.h +++ b/arch/unicore32/include/asm/mmu_context.h @@ -14,6 +14,8 @@ #include #include +#include +#include #include #include @@ -73,7 +75,7 @@ do { \ else \ mm->mmap = NULL; \ rb_erase(&high_vma->vm_rb, &mm->mm_rb); \ - mm->mmap_cache = NULL; \ + vmacache_invalidate(mm); \ mm->map_count--; \ remove_vma(high_vma); \ } \ diff --git a/fs/exec.c b/fs/exec.c index 25dfeba6d55f..b60ccf969a8b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -822,7 +823,7 @@ EXPORT_SYMBOL(read_code); static int exec_mmap(struct mm_struct *mm) { struct task_struct *tsk; - struct mm_struct * old_mm, *active_mm; + struct mm_struct *old_mm, *active_mm; /* Notify parent that we're no longer interested in the old VM */ tsk = current; @@ -848,6 +849,8 @@ static int exec_mmap(struct mm_struct *mm) tsk->mm = mm; tsk->active_mm = mm; activate_mm(active_mm, mm); + tsk->mm->vmacache_seqnum = 0; + vmacache_flush(tsk); task_unlock(tsk); if (old_mm) { up_read(&old_mm->mmap_sem); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index fb52b548080d..442177b1119a 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -152,7 +153,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) /* * We remember last_addr rather than next_addr to hit with - * mmap_cache most of the time. We have zero last_addr at + * vmacache most of the time. We have zero last_addr at * the beginning and also after lseek. We will have -1 last_addr * after the end of the vmas. */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 290901a8c1de..2b58d192ea24 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -342,9 +342,9 @@ struct mm_rss_stat { struct kioctx_table; struct mm_struct { - struct vm_area_struct * mmap; /* list of VMAs */ + struct vm_area_struct *mmap; /* list of VMAs */ struct rb_root mm_rb; - struct vm_area_struct * mmap_cache; /* last find_vma result */ + u32 vmacache_seqnum; /* per-thread vmacache */ #ifdef CONFIG_MMU unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, diff --git a/include/linux/sched.h b/include/linux/sched.h index 7cb07fd26680..642477dd814a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -132,6 +132,10 @@ struct perf_event_context; struct blk_plug; struct filename; +#define VMACACHE_BITS 2 +#define VMACACHE_SIZE (1U << VMACACHE_BITS) +#define VMACACHE_MASK (VMACACHE_SIZE - 1) + /* * List of flags we want to share for kernel threads, * if only because they are not used by them anyway. @@ -1235,6 +1239,9 @@ struct task_struct { #ifdef CONFIG_COMPAT_BRK unsigned brk_randomized:1; #endif + /* per-thread vma caching */ + u32 vmacache_seqnum; + struct vm_area_struct *vmacache[VMACACHE_SIZE]; #if defined(SPLIT_RSS_COUNTING) struct task_rss_stat rss_stat; #endif diff --git a/include/linux/vmacache.h b/include/linux/vmacache.h new file mode 100644 index 000000000000..c3fa0fd43949 --- /dev/null +++ b/include/linux/vmacache.h @@ -0,0 +1,38 @@ +#ifndef __LINUX_VMACACHE_H +#define __LINUX_VMACACHE_H + +#include +#include + +/* + * Hash based on the page number. Provides a good hit rate for + * workloads with good locality and those with random accesses as well. + */ +#define VMACACHE_HASH(addr) ((addr >> PAGE_SHIFT) & VMACACHE_MASK) + +static inline void vmacache_flush(struct task_struct *tsk) +{ + memset(tsk->vmacache, 0, sizeof(tsk->vmacache)); +} + +extern void vmacache_flush_all(struct mm_struct *mm); +extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma); +extern struct vm_area_struct *vmacache_find(struct mm_struct *mm, + unsigned long addr); + +#ifndef CONFIG_MMU +extern struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm, + unsigned long start, + unsigned long end); +#endif + +static inline void vmacache_invalidate(struct mm_struct *mm) +{ + mm->vmacache_seqnum++; + + /* deal with overflows */ + if (unlikely(mm->vmacache_seqnum == 0)) + vmacache_flush_all(mm); +} + +#endif /* __LINUX_VMACACHE_H */ diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 99982a70ddad..2956c8da1605 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -224,10 +225,17 @@ static void kgdb_flush_swbreak_addr(unsigned long addr) if (!CACHE_FLUSH_IS_SAFE) return; - if (current->mm && current->mm->mmap_cache) { - flush_cache_range(current->mm->mmap_cache, - addr, addr + BREAK_INSTR_SIZE); + if (current->mm) { + int i; + + for (i = 0; i < VMACACHE_SIZE; i++) { + if (!current->vmacache[i]) + continue; + flush_cache_range(current->vmacache[i], + addr, addr + BREAK_INSTR_SIZE); + } } + /* Force flush instruction cache if it was outside the mm */ flush_icache_range(addr, addr + BREAK_INSTR_SIZE); } diff --git a/kernel/fork.c b/kernel/fork.c index e40c0a01d5a6..bc0e96b78dfd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include #include #include @@ -364,7 +366,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) mm->locked_vm = 0; mm->mmap = NULL; - mm->mmap_cache = NULL; + mm->vmacache_seqnum = 0; mm->map_count = 0; cpumask_clear(mm_cpumask(mm)); mm->mm_rb = RB_ROOT; @@ -882,6 +884,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) if (!oldmm) return 0; + /* initialize the new vmacache entries */ + vmacache_flush(tsk); + if (clone_flags & CLONE_VM) { atomic_inc(&oldmm->mm_users); mm = oldmm; diff --git a/mm/Makefile b/mm/Makefile index cdd741519ee0..23a6f7e23019 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -16,7 +16,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ util.o mmzone.o vmstat.o backing-dev.o \ mm_init.o mmu_context.o percpu.o slab_common.o \ - compaction.o balloon_compaction.o \ + compaction.o balloon_compaction.o vmacache.o \ interval_tree.o list_lru.o workingset.o $(mmu-y) obj-y += init-mm.o diff --git a/mm/mmap.c b/mm/mmap.c index 46433e137abc..b1202cf81f4b 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -681,8 +682,9 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma, prev->vm_next = next = vma->vm_next; if (next) next->vm_prev = prev; - if (mm->mmap_cache == vma) - mm->mmap_cache = prev; + + /* Kill the cache */ + vmacache_invalidate(mm); } /* @@ -1989,34 +1991,33 @@ EXPORT_SYMBOL(get_unmapped_area); /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) { - struct vm_area_struct *vma = NULL; + struct rb_node *rb_node; + struct vm_area_struct *vma; /* Check the cache first. */ - /* (Cache hit rate is typically around 35%.) */ - vma = ACCESS_ONCE(mm->mmap_cache); - if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) { - struct rb_node *rb_node; + vma = vmacache_find(mm, addr); + if (likely(vma)) + return vma; - rb_node = mm->mm_rb.rb_node; - vma = NULL; + rb_node = mm->mm_rb.rb_node; + vma = NULL; - while (rb_node) { - struct vm_area_struct *vma_tmp; - - vma_tmp = rb_entry(rb_node, - struct vm_area_struct, vm_rb); - - if (vma_tmp->vm_end > addr) { - vma = vma_tmp; - if (vma_tmp->vm_start <= addr) - break; - rb_node = rb_node->rb_left; - } else - rb_node = rb_node->rb_right; - } - if (vma) - mm->mmap_cache = vma; + while (rb_node) { + struct vm_area_struct *tmp; + + tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb); + + if (tmp->vm_end > addr) { + vma = tmp; + if (tmp->vm_start <= addr) + break; + rb_node = rb_node->rb_left; + } else + rb_node = rb_node->rb_right; } + + if (vma) + vmacache_update(addr, vma); return vma; } @@ -2388,7 +2389,9 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, } else mm->highest_vm_end = prev ? prev->vm_end : 0; tail_vma->vm_next = NULL; - mm->mmap_cache = NULL; /* Kill the cache. */ + + /* Kill the cache */ + vmacache_invalidate(mm); } /* diff --git a/mm/nommu.c b/mm/nommu.c index e19482533ce3..5d3f3524bbdc 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -768,16 +769,23 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) */ static void delete_vma_from_mm(struct vm_area_struct *vma) { + int i; struct address_space *mapping; struct mm_struct *mm = vma->vm_mm; + struct task_struct *curr = current; kenter("%p", vma); protect_vma(vma, 0); mm->map_count--; - if (mm->mmap_cache == vma) - mm->mmap_cache = NULL; + for (i = 0; i < VMACACHE_SIZE; i++) { + /* if the vma is cached, invalidate the entire cache */ + if (curr->vmacache[i] == vma) { + vmacache_invalidate(curr->mm); + break; + } + } /* remove the VMA from the mapping */ if (vma->vm_file) { @@ -825,8 +833,8 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) struct vm_area_struct *vma; /* check the cache first */ - vma = ACCESS_ONCE(mm->mmap_cache); - if (vma && vma->vm_start <= addr && vma->vm_end > addr) + vma = vmacache_find(mm, addr); + if (likely(vma)) return vma; /* trawl the list (there may be multiple mappings in which addr @@ -835,7 +843,7 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) if (vma->vm_start > addr) return NULL; if (vma->vm_end > addr) { - mm->mmap_cache = vma; + vmacache_update(addr, vma); return vma; } } @@ -874,8 +882,8 @@ static struct vm_area_struct *find_vma_exact(struct mm_struct *mm, unsigned long end = addr + len; /* check the cache first */ - vma = mm->mmap_cache; - if (vma && vma->vm_start == addr && vma->vm_end == end) + vma = vmacache_find_exact(mm, addr, end); + if (vma) return vma; /* trawl the list (there may be multiple mappings in which addr @@ -886,7 +894,7 @@ static struct vm_area_struct *find_vma_exact(struct mm_struct *mm, if (vma->vm_start > addr) return NULL; if (vma->vm_end == end) { - mm->mmap_cache = vma; + vmacache_update(addr, vma); return vma; } } diff --git a/mm/vmacache.c b/mm/vmacache.c new file mode 100644 index 000000000000..d4224b397c0e --- /dev/null +++ b/mm/vmacache.c @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2014 Davidlohr Bueso. + */ +#include +#include +#include + +/* + * Flush vma caches for threads that share a given mm. + * + * The operation is safe because the caller holds the mmap_sem + * exclusively and other threads accessing the vma cache will + * have mmap_sem held at least for read, so no extra locking + * is required to maintain the vma cache. + */ +void vmacache_flush_all(struct mm_struct *mm) +{ + struct task_struct *g, *p; + + rcu_read_lock(); + for_each_process_thread(g, p) { + /* + * Only flush the vmacache pointers as the + * mm seqnum is already set and curr's will + * be set upon invalidation when the next + * lookup is done. + */ + if (mm == p->mm) + vmacache_flush(p); + } + rcu_read_unlock(); +} + +/* + * This task may be accessing a foreign mm via (for example) + * get_user_pages()->find_vma(). The vmacache is task-local and this + * task's vmacache pertains to a different mm (ie, its own). There is + * nothing we can do here. + * + * Also handle the case where a kernel thread has adopted this mm via use_mm(). + * That kernel thread's vmacache is not applicable to this mm. + */ +static bool vmacache_valid_mm(struct mm_struct *mm) +{ + return current->mm == mm && !(current->flags & PF_KTHREAD); +} + +void vmacache_update(unsigned long addr, struct vm_area_struct *newvma) +{ + if (vmacache_valid_mm(newvma->vm_mm)) + current->vmacache[VMACACHE_HASH(addr)] = newvma; +} + +static bool vmacache_valid(struct mm_struct *mm) +{ + struct task_struct *curr; + + if (!vmacache_valid_mm(mm)) + return false; + + curr = current; + if (mm->vmacache_seqnum != curr->vmacache_seqnum) { + /* + * First attempt will always be invalid, initialize + * the new cache for this task here. + */ + curr->vmacache_seqnum = mm->vmacache_seqnum; + vmacache_flush(curr); + return false; + } + return true; +} + +struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr) +{ + int i; + + if (!vmacache_valid(mm)) + return NULL; + + for (i = 0; i < VMACACHE_SIZE; i++) { + struct vm_area_struct *vma = current->vmacache[i]; + + if (vma && vma->vm_start <= addr && vma->vm_end > addr) { + BUG_ON(vma->vm_mm != mm); + return vma; + } + } + + return NULL; +} + +#ifndef CONFIG_MMU +struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + int i; + + if (!vmacache_valid(mm)) + return NULL; + + for (i = 0; i < VMACACHE_SIZE; i++) { + struct vm_area_struct *vma = current->vmacache[i]; + + if (vma && vma->vm_start == start && vma->vm_end == end) + return vma; + } + + return NULL; +} +#endif -- cgit v1.2.3-59-g8ed1b From 2a389610a7331d22344698f23ef2e8c55b2cde7b Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 7 Apr 2014 15:37:29 -0700 Subject: mm, mempolicy: rename slab_node for clarity slab_node() is actually a mempolicy function, so rename it to mempolicy_slab_node() to make it clearer that it used for processes with mempolicies. At the same time, cleanup its code by saving numa_mem_id() in a local variable (since we require a node with memory, not just any node) and remove an obsolete comment that assumes the mempolicy is actually passed into the function. Signed-off-by: David Rientjes Acked-by: Christoph Lameter Cc: Johannes Weiner Cc: Michal Hocko Cc: KAMEZAWA Hiroyuki Cc: Christoph Lameter Cc: Pekka Enberg Cc: Tejun Heo Cc: Mel Gorman Cc: Oleg Nesterov Cc: Rik van Riel Cc: Jianguo Wu Cc: Tim Hockin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 2 +- mm/mempolicy.c | 15 ++++++--------- mm/slab.c | 4 ++-- mm/slub.c | 2 +- 4 files changed, 10 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 5f1ea756aace..cfe55dfca015 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -151,7 +151,7 @@ extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, extern bool init_nodemask_of_mempolicy(nodemask_t *mask); extern bool mempolicy_nodemask_intersects(struct task_struct *tsk, const nodemask_t *mask); -extern unsigned slab_node(void); +extern unsigned int mempolicy_slab_node(void); extern enum zone_type policy_zone; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e3ab02822799..0ad0ba31979f 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1782,21 +1782,18 @@ static unsigned interleave_nodes(struct mempolicy *policy) /* * Depending on the memory policy provide a node from which to allocate the * next slab entry. - * @policy must be protected by freeing by the caller. If @policy is - * the current task's mempolicy, this protection is implicit, as only the - * task can change it's policy. The system default policy requires no - * such protection. */ -unsigned slab_node(void) +unsigned int mempolicy_slab_node(void) { struct mempolicy *policy; + int node = numa_mem_id(); if (in_interrupt()) - return numa_node_id(); + return node; policy = current->mempolicy; if (!policy || policy->flags & MPOL_F_LOCAL) - return numa_node_id(); + return node; switch (policy->mode) { case MPOL_PREFERRED: @@ -1816,11 +1813,11 @@ unsigned slab_node(void) struct zonelist *zonelist; struct zone *zone; enum zone_type highest_zoneidx = gfp_zone(GFP_KERNEL); - zonelist = &NODE_DATA(numa_node_id())->node_zonelists[0]; + zonelist = &NODE_DATA(node)->node_zonelists[0]; (void)first_zones_zonelist(zonelist, highest_zoneidx, &policy->v.nodes, &zone); - return zone ? zone->node : numa_node_id(); + return zone ? zone->node : node; } default: diff --git a/mm/slab.c b/mm/slab.c index 9153c802e2fe..4b17f4c2e92d 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3042,7 +3042,7 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) nid_alloc = cpuset_slab_spread_node(); else if (current->mempolicy) - nid_alloc = slab_node(); + nid_alloc = mempolicy_slab_node(); if (nid_alloc != nid_here) return ____cache_alloc_node(cachep, flags, nid_alloc); return NULL; @@ -3074,7 +3074,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) retry_cpuset: cpuset_mems_cookie = read_mems_allowed_begin(); - zonelist = node_zonelist(slab_node(), flags); + zonelist = node_zonelist(mempolicy_slab_node(), flags); retry: /* diff --git a/mm/slub.c b/mm/slub.c index fe6d7be22ef0..5b05e4fe9a1a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1685,7 +1685,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags, do { cpuset_mems_cookie = read_mems_allowed_begin(); - zonelist = node_zonelist(slab_node(), flags); + zonelist = node_zonelist(mempolicy_slab_node(), flags); for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { struct kmem_cache_node *n; -- cgit v1.2.3-59-g8ed1b From f0432d159601f96839f514f286eaa5b75c4112dc Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 7 Apr 2014 15:37:30 -0700 Subject: mm, mempolicy: remove per-process flag PF_MEMPOLICY is an unnecessary optimization for CONFIG_SLAB users. There's no significant performance degradation to checking current->mempolicy rather than current->flags & PF_MEMPOLICY in the allocation path, especially since this is considered unlikely(). Running TCP_RR with netperf-2.4.5 through localhost on 16 cpu machine with 64GB of memory and without a mempolicy: threads before after 16 1249409 1244487 32 1281786 1246783 48 1239175 1239138 64 1244642 1241841 80 1244346 1248918 96 1266436 1254316 112 1307398 1312135 128 1327607 1326502 Per-process flags are a scarce resource so we should free them up whenever possible and make them available. We'll be using it shortly for memcg oom reserves. Signed-off-by: David Rientjes Cc: Johannes Weiner Cc: Michal Hocko Cc: KAMEZAWA Hiroyuki Cc: Christoph Lameter Cc: Pekka Enberg Cc: Tejun Heo Cc: Mel Gorman Cc: Oleg Nesterov Cc: Rik van Riel Cc: Jianguo Wu Cc: Tim Hockin Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 1 - include/linux/sched.h | 1 - kernel/fork.c | 1 - mm/mempolicy.c | 31 ------------------------------- mm/slab.c | 4 ++-- 5 files changed, 2 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index cfe55dfca015..3c1b968da0ca 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -143,7 +143,6 @@ extern void numa_policy_init(void); extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new, enum mpol_rebind_step step); extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); -extern void mpol_fix_fork_child_flag(struct task_struct *p); extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, diff --git a/include/linux/sched.h b/include/linux/sched.h index 642477dd814a..6c70645eb3b6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1851,7 +1851,6 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ -#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ #define PF_SUSPEND_TASK 0x80000000 /* this thread called freeze_processes and should not be frozen */ diff --git a/kernel/fork.c b/kernel/fork.c index c777964c0662..e905e9c6b224 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1276,7 +1276,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->mempolicy = NULL; goto bad_fork_cleanup_threadgroup_lock; } - mpol_fix_fork_child_flag(p); #endif #ifdef CONFIG_CPUSETS p->cpuset_mem_spread_rotor = NUMA_NO_NODE; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 0ad0ba31979f..78e1472933ea 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -795,36 +795,6 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, return err; } -/* - * Update task->flags PF_MEMPOLICY bit: set iff non-default - * mempolicy. Allows more rapid checking of this (combined perhaps - * with other PF_* flag bits) on memory allocation hot code paths. - * - * If called from outside this file, the task 'p' should -only- be - * a newly forked child not yet visible on the task list, because - * manipulating the task flags of a visible task is not safe. - * - * The above limitation is why this routine has the funny name - * mpol_fix_fork_child_flag(). - * - * It is also safe to call this with a task pointer of current, - * which the static wrapper mpol_set_task_struct_flag() does, - * for use within this file. - */ - -void mpol_fix_fork_child_flag(struct task_struct *p) -{ - if (p->mempolicy) - p->flags |= PF_MEMPOLICY; - else - p->flags &= ~PF_MEMPOLICY; -} - -static void mpol_set_task_struct_flag(void) -{ - mpol_fix_fork_child_flag(current); -} - /* Set the process memory policy */ static long do_set_mempolicy(unsigned short mode, unsigned short flags, nodemask_t *nodes) @@ -861,7 +831,6 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags, } old = current->mempolicy; current->mempolicy = new; - mpol_set_task_struct_flag(); if (new && new->mode == MPOL_INTERLEAVE && nodes_weight(new->v.nodes)) current->il_next = first_node(new->v.nodes); diff --git a/mm/slab.c b/mm/slab.c index 4b17f4c2e92d..3db4cb06e32e 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3027,7 +3027,7 @@ out: #ifdef CONFIG_NUMA /* - * Try allocating on another node if PF_SPREAD_SLAB|PF_MEMPOLICY. + * Try allocating on another node if PF_SPREAD_SLAB is a mempolicy is set. * * If we are in_interrupt, then process context, including cpusets and * mempolicy, may not apply and should not be used for allocation policy. @@ -3259,7 +3259,7 @@ __do_cache_alloc(struct kmem_cache *cache, gfp_t flags) { void *objp; - if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) { + if (current->mempolicy || unlikely(current->flags & PF_SPREAD_SLAB)) { objp = alternate_node_alloc(cache, flags); if (objp) goto out; -- cgit v1.2.3-59-g8ed1b From 539a13b47e462d28c48f076c63871580f694a366 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 7 Apr 2014 15:37:32 -0700 Subject: res_counter: remove interface for locked charging and uncharging The res_counter_{charge,uncharge}_locked() variants are not used in the kernel outside of the resource counter code itself, so remove the interface. Signed-off-by: David Rientjes Acked-by: Michal Hocko Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: Christoph Lameter Cc: Pekka Enberg Cc: Tejun Heo Cc: Mel Gorman Cc: Oleg Nesterov Cc: Rik van Riel Cc: Jianguo Wu Cc: Tim Hockin Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroups/resource_counter.txt | 12 ++---------- include/linux/res_counter.h | 6 +----- kernel/res_counter.c | 23 ++++++++++++----------- 3 files changed, 15 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/Documentation/cgroups/resource_counter.txt b/Documentation/cgroups/resource_counter.txt index 5108afb3645c..762ca54eb929 100644 --- a/Documentation/cgroups/resource_counter.txt +++ b/Documentation/cgroups/resource_counter.txt @@ -76,15 +76,7 @@ to work with it. limit_fail_at parameter is set to the particular res_counter element where the charging failed. - d. int res_counter_charge_locked - (struct res_counter *rc, unsigned long val, bool force) - - The same as res_counter_charge(), but it must not acquire/release the - res_counter->lock internally (it must be called with res_counter->lock - held). The force parameter indicates whether we can bypass the limit. - - e. u64 res_counter_uncharge[_locked] - (struct res_counter *rc, unsigned long val) + d. u64 res_counter_uncharge(struct res_counter *rc, unsigned long val) When a resource is released (freed) it should be de-accounted from the resource counter it was accounted to. This is called @@ -93,7 +85,7 @@ to work with it. The _locked routines imply that the res_counter->lock is taken. - f. u64 res_counter_uncharge_until + e. u64 res_counter_uncharge_until (struct res_counter *rc, struct res_counter *top, unsigned long val) diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 201a69749659..56b7bc32db4f 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -104,15 +104,13 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent); * units, e.g. numbers, bytes, Kbytes, etc * * returns 0 on success and <0 if the counter->usage will exceed the - * counter->limit _locked call expects the counter->lock to be taken + * counter->limit * * charge_nofail works the same, except that it charges the resource * counter unconditionally, and returns < 0 if the after the current * charge we are over limit. */ -int __must_check res_counter_charge_locked(struct res_counter *counter, - unsigned long val, bool force); int __must_check res_counter_charge(struct res_counter *counter, unsigned long val, struct res_counter **limit_fail_at); int res_counter_charge_nofail(struct res_counter *counter, @@ -125,12 +123,10 @@ int res_counter_charge_nofail(struct res_counter *counter, * @val: the amount of the resource * * these calls check for usage underflow and show a warning on the console - * _locked call expects the counter->lock to be taken * * returns the total charges still present in @counter. */ -u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val); u64 res_counter_uncharge(struct res_counter *counter, unsigned long val); u64 res_counter_uncharge_until(struct res_counter *counter, diff --git a/kernel/res_counter.c b/kernel/res_counter.c index 4aa8a305aede..51dbac6a3633 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c @@ -22,8 +22,18 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent) counter->parent = parent; } -int res_counter_charge_locked(struct res_counter *counter, unsigned long val, - bool force) +static u64 res_counter_uncharge_locked(struct res_counter *counter, + unsigned long val) +{ + if (WARN_ON(counter->usage < val)) + val = counter->usage; + + counter->usage -= val; + return counter->usage; +} + +static int res_counter_charge_locked(struct res_counter *counter, + unsigned long val, bool force) { int ret = 0; @@ -86,15 +96,6 @@ int res_counter_charge_nofail(struct res_counter *counter, unsigned long val, return __res_counter_charge(counter, val, limit_fail_at, true); } -u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val) -{ - if (WARN_ON(counter->usage < val)) - val = counter->usage; - - counter->usage -= val; - return counter->usage; -} - u64 res_counter_uncharge_until(struct res_counter *counter, struct res_counter *top, unsigned long val) -- cgit v1.2.3-59-g8ed1b From d230dec18dc9a581f153c14cb5371640cd62543b Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 7 Apr 2014 15:37:38 -0700 Subject: mm: use 'const char *' insted of 'char *' for reason in dump_page() I tried to use 'dump_page(page, __func__)' for debugging, but it triggers warning: warning: passing argument 2 of `dump_page' discards `const' qualifier from pointer target type [enabled by default] Let's convert 'reason' to 'const char *' in dump_page() and friends: we shouldn't modify it anyway. Signed-off-by: Kirill A. Shutemov Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmdebug.h | 4 ++-- mm/page_alloc.c | 12 +++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 5042c036dda9..2d57efa64cc1 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -3,8 +3,8 @@ struct page; -extern void dump_page(struct page *page, char *reason); -extern void dump_page_badflags(struct page *page, char *reason, +extern void dump_page(struct page *page, const char *reason); +extern void dump_page_badflags(struct page *page, const char *reason, unsigned long badflags); #ifdef CONFIG_DEBUG_VM diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 336ee925f756..73c25912c7c4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -295,7 +295,8 @@ static inline int bad_range(struct zone *zone, struct page *page) } #endif -static void bad_page(struct page *page, char *reason, unsigned long bad_flags) +static void bad_page(struct page *page, const char *reason, + unsigned long bad_flags) { static unsigned long resume; static unsigned long nr_shown; @@ -623,7 +624,7 @@ out: static inline int free_pages_check(struct page *page) { - char *bad_reason = NULL; + const char *bad_reason = NULL; unsigned long bad_flags = 0; if (unlikely(page_mapcount(page))) @@ -859,7 +860,7 @@ static inline void expand(struct zone *zone, struct page *page, */ static inline int check_new_page(struct page *page) { - char *bad_reason = NULL; + const char *bad_reason = NULL; unsigned long bad_flags = 0; if (unlikely(page_mapcount(page))) @@ -6545,7 +6546,8 @@ static void dump_page_flags(unsigned long flags) printk(")\n"); } -void dump_page_badflags(struct page *page, char *reason, unsigned long badflags) +void dump_page_badflags(struct page *page, const char *reason, + unsigned long badflags) { printk(KERN_ALERT "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n", @@ -6561,7 +6563,7 @@ void dump_page_badflags(struct page *page, char *reason, unsigned long badflags) mem_cgroup_print_bad_page(page); } -void dump_page(struct page *page, char *reason) +void dump_page(struct page *page, const char *reason) { dump_page_badflags(page, reason, 0); } -- cgit v1.2.3-59-g8ed1b From df381975463996178d685f6ef7d3555c5f887201 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 7 Apr 2014 15:37:43 -0700 Subject: memcg: get_mem_cgroup_from_mm() Instead of returning NULL from try_get_mem_cgroup_from_mm() when the mm owner is exiting, just return root_mem_cgroup. This makes sense for all callsites and gets rid of some of them having to fallback manually. [fengguang.wu@intel.com: fix warnings] Signed-off-by: Johannes Weiner Signed-off-by: Fengguang Wu Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ------ mm/memcontrol.c | 18 ++++-------------- 2 files changed, 4 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index eccfb4a4b379..134636f835f7 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -94,7 +94,6 @@ bool task_in_mem_cgroup(struct task_struct *task, extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page); extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); -extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm); extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg); extern struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css); @@ -294,11 +293,6 @@ static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) return NULL; } -static inline struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) -{ - return NULL; -} - static inline bool mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *memcg) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c3b674f9774f..87c3ec37dd26 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1071,7 +1071,7 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) return mem_cgroup_from_css(task_css(p, memory_cgrp_id)); } -struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) +static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) { struct mem_cgroup *memcg = NULL; @@ -1079,7 +1079,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) do { memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); if (unlikely(!memcg)) - break; + memcg = root_mem_cgroup; } while (!css_tryget(&memcg->css)); rcu_read_unlock(); return memcg; @@ -1475,7 +1475,7 @@ bool task_in_mem_cgroup(struct task_struct *task, p = find_lock_task_mm(task); if (p) { - curr = try_get_mem_cgroup_from_mm(p->mm); + curr = get_mem_cgroup_from_mm(p->mm); task_unlock(p); } else { /* @@ -1489,8 +1489,6 @@ bool task_in_mem_cgroup(struct task_struct *task, css_get(&curr->css); rcu_read_unlock(); } - if (!curr) - return false; /* * We should check use_hierarchy of "memcg" not "curr". Because checking * use_hierarchy of "curr" here make this function true if hierarchy is @@ -3617,15 +3615,7 @@ __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order) if (!current->mm || current->memcg_kmem_skip_account) return true; - memcg = try_get_mem_cgroup_from_mm(current->mm); - - /* - * very rare case described in mem_cgroup_from_task. Unfortunately there - * isn't much we can do without complicating this too much, and it would - * be gfp-dependent anyway. Just let it go - */ - if (unlikely(!memcg)) - return true; + memcg = get_mem_cgroup_from_mm(current->mm); if (!memcg_can_account_kmem(memcg)) { css_put(&memcg->css); -- cgit v1.2.3-59-g8ed1b From d715ae08f2ff87508a081c4df78061bf4f7211d6 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 7 Apr 2014 15:37:46 -0700 Subject: memcg: rename high level charging functions mem_cgroup_newpage_charge is used only for charging anonymous memory so it is better to rename it to mem_cgroup_charge_anon. mem_cgroup_cache_charge is used for file backed memory so rename it to mem_cgroup_charge_file. Signed-off-by: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroups/memcg_test.txt | 4 ++-- include/linux/memcontrol.h | 8 ++++---- mm/filemap.c | 2 +- mm/huge_memory.c | 8 ++++---- mm/memcontrol.c | 4 ++-- mm/memory.c | 6 +++--- mm/shmem.c | 6 +++--- 7 files changed, 19 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroups/memcg_test.txt index ce94a83a7d9a..80ac454704b8 100644 --- a/Documentation/cgroups/memcg_test.txt +++ b/Documentation/cgroups/memcg_test.txt @@ -24,7 +24,7 @@ Please note that implementation details can be changed. a page/swp_entry may be charged (usage += PAGE_SIZE) at - mem_cgroup_newpage_charge() + mem_cgroup_charge_anon() Called at new page fault and Copy-On-Write. mem_cgroup_try_charge_swapin() @@ -32,7 +32,7 @@ Please note that implementation details can be changed. Followed by charge-commit-cancel protocol. (With swap accounting) At commit, a charge recorded in swap_cgroup is removed. - mem_cgroup_cache_charge() + mem_cgroup_charge_file() Called at add_to_page_cache() mem_cgroup_cache_charge_swapin() diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 134636f835f7..96f3fc87ab96 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -65,7 +65,7 @@ struct mem_cgroup_reclaim_cookie { * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.) */ -extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, +extern int mem_cgroup_charge_anon(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); /* for swap handling */ extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm, @@ -74,7 +74,7 @@ extern void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg); extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg); -extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, +extern int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); @@ -233,13 +233,13 @@ void mem_cgroup_print_bad_page(struct page *page); #else /* CONFIG_MEMCG */ struct mem_cgroup; -static inline int mem_cgroup_newpage_charge(struct page *page, +static inline int mem_cgroup_charge_anon(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { return 0; } -static inline int mem_cgroup_cache_charge(struct page *page, +static inline int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { return 0; diff --git a/mm/filemap.c b/mm/filemap.c index b952d99c827c..27ebc0c9571b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -563,7 +563,7 @@ static int __add_to_page_cache_locked(struct page *page, VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageSwapBacked(page), page); - error = mem_cgroup_cache_charge(page, current->mm, + error = mem_cgroup_charge_file(page, current->mm, gfp_mask & GFP_RECLAIM_MASK); if (error) return error; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index a2f4981418fc..64635f5278ff 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -827,7 +827,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; } - if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { + if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_KERNEL))) { put_page(page); count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; @@ -968,7 +968,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, __GFP_OTHER_NODE, vma, address, page_to_nid(page)); if (unlikely(!pages[i] || - mem_cgroup_newpage_charge(pages[i], mm, + mem_cgroup_charge_anon(pages[i], mm, GFP_KERNEL))) { if (pages[i]) put_page(pages[i]); @@ -1101,7 +1101,7 @@ alloc: goto out; } - if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { + if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))) { put_page(new_page); if (page) { split_huge_page(page); @@ -2359,7 +2359,7 @@ static void collapse_huge_page(struct mm_struct *mm, if (!new_page) return; - if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) + if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))) return; /* diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 038b037f8d67..e33b1d09eb1f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3818,7 +3818,7 @@ out: return ret; } -int mem_cgroup_newpage_charge(struct page *page, +int mem_cgroup_charge_anon(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { unsigned int nr_pages = 1; @@ -3954,7 +3954,7 @@ void mem_cgroup_commit_charge_swapin(struct page *page, MEM_CGROUP_CHARGE_TYPE_ANON); } -int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, +int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; diff --git a/mm/memory.c b/mm/memory.c index 1b88da5c08b3..854e4027719f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2828,7 +2828,7 @@ gotten: } __SetPageUptodate(new_page); - if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) + if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) goto oom_free_new; mmun_start = address & PAGE_MASK; @@ -3281,7 +3281,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, */ __SetPageUptodate(page); - if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL)) + if (mem_cgroup_charge_anon(page, mm, GFP_KERNEL)) goto oom_free_page; entry = mk_pte(page, vma->vm_page_prot); @@ -3537,7 +3537,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (!new_page) return VM_FAULT_OOM; - if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) { + if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) { page_cache_release(new_page); return VM_FAULT_OOM; } diff --git a/mm/shmem.c b/mm/shmem.c index 70709347a1e2..70273f8df586 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -683,7 +683,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page) * the shmem_swaplist_mutex which might hold up shmem_writepage(). * Charged back to the user (not to caller) when swap account is used. */ - error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); + error = mem_cgroup_charge_file(page, current->mm, GFP_KERNEL); if (error) goto out; /* No radix_tree_preload: swap entry keeps a place for page in tree */ @@ -1080,7 +1080,7 @@ repeat: goto failed; } - error = mem_cgroup_cache_charge(page, current->mm, + error = mem_cgroup_charge_file(page, current->mm, gfp & GFP_RECLAIM_MASK); if (!error) { error = shmem_add_to_page_cache(page, mapping, index, @@ -1134,7 +1134,7 @@ repeat: SetPageSwapBacked(page); __set_page_locked(page); - error = mem_cgroup_cache_charge(page, current->mm, + error = mem_cgroup_charge_file(page, current->mm, gfp & GFP_RECLAIM_MASK); if (error) goto decused; -- cgit v1.2.3-59-g8ed1b From ed6d7c8e578331cad594ee70d60e2e146b5dce7b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 7 Apr 2014 15:37:51 -0700 Subject: mm: remove unused arg of set_page_dirty_balance() There's only one caller of set_page_dirty_balance() and that will call it with page_mkwrite == 0. The page_mkwrite argument was unused since commit b827e496c893 "mm: close page_mkwrite races". Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/writeback.h | 2 +- mm/memory.c | 2 +- mm/page-writeback.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 021b8a319b9e..5777c13849ba 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -178,7 +178,7 @@ int write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, void *data); int do_writepages(struct address_space *mapping, struct writeback_control *wbc); -void set_page_dirty_balance(struct page *page, int page_mkwrite); +void set_page_dirty_balance(struct page *page); void writeback_set_ratelimit(void); void tag_pages_for_writeback(struct address_space *mapping, pgoff_t start, pgoff_t end); diff --git a/mm/memory.c b/mm/memory.c index 854e4027719f..d0f0bef3be48 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2782,7 +2782,7 @@ reuse: */ if (!page_mkwrite) { wait_on_page_locked(dirty_page); - set_page_dirty_balance(dirty_page, page_mkwrite); + set_page_dirty_balance(dirty_page); /* file_update_time outside page_lock */ if (vma->vm_file) file_update_time(vma->vm_file); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 7106cb1aca8e..ef413492a149 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1562,9 +1562,9 @@ pause: bdi_start_background_writeback(bdi); } -void set_page_dirty_balance(struct page *page, int page_mkwrite) +void set_page_dirty_balance(struct page *page) { - if (set_page_dirty(page) || page_mkwrite) { + if (set_page_dirty(page)) { struct address_space *mapping = page_mapping(page); if (mapping) -- cgit v1.2.3-59-g8ed1b From 29f175d125f0f3a9503af8a5596f93d714cceb08 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Mon, 7 Apr 2014 15:37:55 -0700 Subject: mm/readahead.c: inline ra_submit Commit f9acc8c7b35a ("readahead: sanify file_ra_state names") left ra_submit with a single function call. Move ra_submit to internal.h and inline it to save some stack. Thanks to Andrew Morton for commenting different versions. Signed-off-by: Fabian Frederick Suggested-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 --- mm/internal.h | 15 +++++++++++++++ mm/readahead.c | 21 +++------------------ 3 files changed, 18 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9132faed1a41..6edcea720ddd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1875,9 +1875,6 @@ void page_cache_async_readahead(struct address_space *mapping, unsigned long size); unsigned long max_sane_readahead(unsigned long nr); -unsigned long ra_submit(struct file_ra_state *ra, - struct address_space *mapping, - struct file *filp); /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ extern int expand_stack(struct vm_area_struct *vma, unsigned long address); diff --git a/mm/internal.h b/mm/internal.h index 3e910000fda4..07b67361a40a 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -11,6 +11,7 @@ #ifndef __MM_INTERNAL_H #define __MM_INTERNAL_H +#include #include void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, @@ -21,6 +22,20 @@ static inline void set_page_count(struct page *page, int v) atomic_set(&page->_count, v); } +extern int __do_page_cache_readahead(struct address_space *mapping, + struct file *filp, pgoff_t offset, unsigned long nr_to_read, + unsigned long lookahead_size); + +/* + * Submit IO for the read-ahead request in file_ra_state. + */ +static inline unsigned long ra_submit(struct file_ra_state *ra, + struct address_space *mapping, struct file *filp) +{ + return __do_page_cache_readahead(mapping, filp, + ra->start, ra->size, ra->async_size); +} + /* * Turn a non-refcounted page (->_count == 0) into refcounted with * a count of one. diff --git a/mm/readahead.c b/mm/readahead.c index 29c5e1af5a0c..0ca36a7770b1 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -8,9 +8,7 @@ */ #include -#include #include -#include #include #include #include @@ -20,6 +18,8 @@ #include #include +#include "internal.h" + /* * Initialise a struct file's readahead state. Assumes that the caller has * memset *ra to zero. @@ -149,8 +149,7 @@ out: * * Returns the number of pages requested, or the maximum amount of I/O allowed. */ -static int -__do_page_cache_readahead(struct address_space *mapping, struct file *filp, +int __do_page_cache_readahead(struct address_space *mapping, struct file *filp, pgoff_t offset, unsigned long nr_to_read, unsigned long lookahead_size) { @@ -243,20 +242,6 @@ unsigned long max_sane_readahead(unsigned long nr) return min(nr, MAX_READAHEAD); } -/* - * Submit IO for the read-ahead request in file_ra_state. - */ -unsigned long ra_submit(struct file_ra_state *ra, - struct address_space *mapping, struct file *filp) -{ - int actual; - - actual = __do_page_cache_readahead(mapping, filp, - ra->start, ra->size, ra->async_size); - - return actual; -} - /* * Set the initial window size, round to next power of 2 and square * for small size, x 4 for medium, and x 2 for large -- cgit v1.2.3-59-g8ed1b From 834a964a098e7726fc296d7cd8f65ed3eeedd412 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Mon, 7 Apr 2014 15:37:57 -0700 Subject: numa: use LAST_CPUPID_SHIFT to calculate LAST_CPUPID_MASK LAST_CPUPID_MASK is calculated using LAST_CPUPID_WIDTH. However LAST_CPUPID_WIDTH itself can be 0. (when LAST_CPUPID_NOT_IN_PAGE_FLAGS is set). In such a case LAST_CPUPID_MASK turns out to be 0. But with recent commit 1ae71d0319: (mm: numa: bugfix for LAST_CPUPID_NOT_IN_PAGE_FLAGS) if LAST_CPUPID_MASK is 0, page_cpupid_xchg_last() and page_cpupid_reset_last() causes page->_last_cpupid to be set to 0. This causes performance regression. Its almost as if numa_balancing is off. Fix LAST_CPUPID_MASK by using LAST_CPUPID_SHIFT instead of LAST_CPUPID_WIDTH. Some performance numbers and perf stats with and without the fix. (3.14-rc6) ---------- numa01 Performance counter stats for '/usr/bin/time -f %e %S %U %c %w -o start_bench.out -a ./numa01': 12,27,462 cs [100.00%] 2,41,957 migrations [100.00%] 1,68,01,713 faults [100.00%] 7,99,35,29,041 cache-misses 98,808 migrate:mm_migrate_pages [100.00%] 1407.690148814 seconds time elapsed numa02 Performance counter stats for '/usr/bin/time -f %e %S %U %c %w -o start_bench.out -a ./numa02': 63,065 cs [100.00%] 14,364 migrations [100.00%] 2,08,118 faults [100.00%] 25,32,59,404 cache-misses 12 migrate:mm_migrate_pages [100.00%] 63.840827219 seconds time elapsed (3.14-rc6 with fix) ------------------- numa01 Performance counter stats for '/usr/bin/time -f %e %S %U %c %w -o start_bench.out -a ./numa01': 9,68,911 cs [100.00%] 1,01,414 migrations [100.00%] 88,38,697 faults [100.00%] 4,42,92,51,042 cache-misses 4,25,060 migrate:mm_migrate_pages [100.00%] 685.965331189 seconds time elapsed numa02 Performance counter stats for '/usr/bin/time -f %e %S %U %c %w -o start_bench.out -a ./numa02': 17,543 cs [100.00%] 2,962 migrations [100.00%] 1,17,843 faults [100.00%] 11,80,61,644 cache-misses 12,358 migrate:mm_migrate_pages [100.00%] 20.380132343 seconds time elapsed Signed-off-by: Srikar Dronamraju Cc: Liu Ping Fan Reviewed-by: Aneesh Kumar K.V Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 6edcea720ddd..abc848412e3c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -695,7 +695,7 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, #define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) #define NODES_MASK ((1UL << NODES_WIDTH) - 1) #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) -#define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_WIDTH) - 1) +#define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_SHIFT) - 1) #define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) static inline enum zone_type page_zonenum(const struct page *page) -- cgit v1.2.3-59-g8ed1b From 23aebe1691a3d98a79676db6c0fd813e16478804 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 7 Apr 2014 15:38:39 -0700 Subject: exec: kill bprm->tcomm[], simplify the "basename" logic Starting from commit c4ad8f98bef7 ("execve: use 'struct filename *' for executable name passing") bprm->filename can not go away after flush_old_exec(), so we do not need to save the binary name in bprm->tcomm[] added by 96e02d158678 ("exec: fix use-after-free bug in setup_new_exec()"). And there was never need for filename_to_taskname-like code, we can simply do set_task_comm(kbasename(filename). This patch has to change set_task_comm() and trace_task_rename() to accept "const char *", but I think this change is also good. Signed-off-by: Oleg Nesterov Cc: Heiko Carstens Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 21 ++------------------- include/linux/binfmts.h | 1 - include/linux/sched.h | 2 +- include/trace/events/task.h | 2 +- 4 files changed, 4 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index b60ccf969a8b..9e81c630dfa7 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1046,7 +1046,7 @@ EXPORT_SYMBOL_GPL(get_task_comm); * so that a new one can be started */ -void set_task_comm(struct task_struct *tsk, char *buf) +void set_task_comm(struct task_struct *tsk, const char *buf) { task_lock(tsk); trace_task_rename(tsk, buf); @@ -1055,21 +1055,6 @@ void set_task_comm(struct task_struct *tsk, char *buf) perf_event_comm(tsk); } -static void filename_to_taskname(char *tcomm, const char *fn, unsigned int len) -{ - int i, ch; - - /* Copies the binary name from after last slash */ - for (i = 0; (ch = *(fn++)) != '\0';) { - if (ch == '/') - i = 0; /* overwrite what we wrote */ - else - if (i < len - 1) - tcomm[i++] = ch; - } - tcomm[i] = '\0'; -} - int flush_old_exec(struct linux_binprm * bprm) { int retval; @@ -1083,8 +1068,6 @@ int flush_old_exec(struct linux_binprm * bprm) goto out; set_mm_exe_file(bprm->mm, bprm->file); - - filename_to_taskname(bprm->tcomm, bprm->filename, sizeof(bprm->tcomm)); /* * Release all of the old mmap stuff */ @@ -1127,7 +1110,7 @@ void setup_new_exec(struct linux_binprm * bprm) else set_dumpable(current->mm, suid_dumpable); - set_task_comm(current, bprm->tcomm); + set_task_comm(current, kbasename(bprm->filename)); /* Set the new mm task size. We have to do that late because it may * depend on TIF_32BIT which is only updated in flush_thread() on diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index b4a745d7d9a9..61f29e5ea840 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -44,7 +44,6 @@ struct linux_binprm { unsigned interp_flags; unsigned interp_data; unsigned long loader, exec; - char tcomm[TASK_COMM_LEN]; }; #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0 diff --git a/include/linux/sched.h b/include/linux/sched.h index 6c70645eb3b6..f8497059f88c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2357,7 +2357,7 @@ extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, i struct task_struct *fork_idle(int); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); -extern void set_task_comm(struct task_struct *tsk, char *from); +extern void set_task_comm(struct task_struct *tsk, const char *from); extern char *get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP diff --git a/include/trace/events/task.h b/include/trace/events/task.h index 102a646e1996..dee3bb1d5a6b 100644 --- a/include/trace/events/task.h +++ b/include/trace/events/task.h @@ -32,7 +32,7 @@ TRACE_EVENT(task_newtask, TRACE_EVENT(task_rename, - TP_PROTO(struct task_struct *task, char *comm), + TP_PROTO(struct task_struct *task, const char *comm), TP_ARGS(task, comm), -- cgit v1.2.3-59-g8ed1b From abd50b39e783e1b6c75c7534c37f1eb2d94a89cd Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 7 Apr 2014 15:38:42 -0700 Subject: wait: introduce EXIT_TRACE to avoid the racy EXIT_DEAD->EXIT_ZOMBIE transition wait_task_zombie() first does EXIT_ZOMBIE->EXIT_DEAD transition and drops tasklist_lock. If this task is not the natural child and it is traced, we change its state back to EXIT_ZOMBIE for ->real_parent. The last transition is racy, this is even documented in 50b8d257486a "ptrace: partially fix the do_wait(WEXITED) vs EXIT_DEAD->EXIT_ZOMBIE race". wait_consider_task() tries to detect this transition and clear ->notask_error but we can't rely on ptrace_reparented(), debugger can exit and do ptrace_unlink() before its sub-thread sets EXIT_ZOMBIE. And there is another problem which were missed before: this transition can also race with reparent_leader() which doesn't reset >exit_signal if EXIT_DEAD, assuming that this task must be reaped by someone else. So the tracee can be re-parented with ->exit_signal != SIGCHLD, and if /sbin/init doesn't use __WALL it becomes unreapable. This was fixed by the previous commit, but it was the temporary hack. 1. Add the new exit_state, EXIT_TRACE. It means that the task is the traced zombie, debugger is going to detach and notify its natural parent. This new state is actually EXIT_ZOMBIE | EXIT_DEAD. This way we can avoid the changes in proc/kgdb code, get_task_state() still reports "X (dead)" in this case. Note: with or without this change userspace can see Z -> X -> Z transition. Not really bad, but probably makes sense to fix. 2. Change wait_task_zombie() to use EXIT_TRACE instead of EXIT_DEAD if we need to notify the ->real_parent. 3. Revert the previous hack in reparent_leader(), now that EXIT_DEAD is always the final state we can safely ignore such a task. 4. Change wait_consider_task() to check EXIT_TRACE separately and kill the racy and no longer needed ptrace_reparented() case. If ptrace == T an EXIT_TRACE thread should be simply ignored, the owner of this state is going to ptrace_unlink() this task. We can pretend that it was already removed from ->ptraced list. Otherwise we should skip this thread too but clear ->notask_error, we must be the natural parent and debugger is going to untrace and notify us. IOW, this doesn't differ from "EXIT_ZOMBIE && p->ptrace" even if the task was already untraced. Signed-off-by: Oleg Nesterov Reported-by: Jan Kratochvil Reported-by: Michal Schmidt Tested-by: Michal Schmidt Cc: Al Viro Cc: Lennart Poettering Cc: Roland McGrath Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + kernel/exit.c | 50 +++++++++++++++++++++----------------------------- 2 files changed, 22 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index f8497059f88c..7781de5e5e7b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -212,6 +212,7 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); /* in tsk->exit_state */ #define EXIT_ZOMBIE 16 #define EXIT_DEAD 32 +#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) /* in tsk->state again */ #define TASK_DEAD 64 #define TASK_WAKEKILL 128 diff --git a/kernel/exit.c b/kernel/exit.c index e354cbb13a9b..022a0ff17318 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -560,6 +560,9 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p, struct list_head *dead) { list_move_tail(&p->sibling, &p->real_parent->children); + + if (p->exit_state == EXIT_DEAD) + return; /* * If this is a threaded reparent there is no need to * notify anyone anything has happened. @@ -567,19 +570,9 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p, if (same_thread_group(p->real_parent, father)) return; - /* - * We don't want people slaying init. - * - * Note: we do this even if it is EXIT_DEAD, wait_task_zombie() - * can change ->exit_state to EXIT_ZOMBIE. If this is the final - * state, do_notify_parent() was already called and ->exit_signal - * doesn't matter. - */ + /* We don't want people slaying init. */ p->exit_signal = SIGCHLD; - if (p->exit_state == EXIT_DEAD) - return; - /* If it has exited notify the new parent about this child's death. */ if (!p->ptrace && p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { @@ -1043,17 +1036,13 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) return wait_noreap_copyout(wo, p, pid, uid, why, status); } + traced = ptrace_reparented(p); /* - * Try to move the task's state to DEAD - * only one thread is allowed to do this: + * Move the task's state to DEAD/TRACE, only one thread can do this. */ - state = xchg(&p->exit_state, EXIT_DEAD); - if (state != EXIT_ZOMBIE) { - BUG_ON(state != EXIT_DEAD); + state = traced ? EXIT_TRACE : EXIT_DEAD; + if (cmpxchg(&p->exit_state, EXIT_ZOMBIE, state) != EXIT_ZOMBIE) return 0; - } - - traced = ptrace_reparented(p); /* * It can be ptraced but not reparented, check * thread_group_leader() to filter out sub-threads. @@ -1114,7 +1103,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) /* * Now we are sure this task is interesting, and no other - * thread can reap it because we set its state to EXIT_DEAD. + * thread can reap it because we its state == DEAD/TRACE. */ read_unlock(&tasklist_lock); @@ -1159,14 +1148,14 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) * If this is not a sub-thread, notify the parent. * If parent wants a zombie, don't release it now. */ + state = EXIT_DEAD; if (thread_group_leader(p) && - !do_notify_parent(p, p->exit_signal)) { - p->exit_state = EXIT_ZOMBIE; - p = NULL; - } + !do_notify_parent(p, p->exit_signal)) + state = EXIT_ZOMBIE; + p->exit_state = state; write_unlock_irq(&tasklist_lock); } - if (p != NULL) + if (state == EXIT_DEAD) release_task(p); return retval; @@ -1362,12 +1351,15 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, } /* dead body doesn't have much to contribute */ - if (unlikely(p->exit_state == EXIT_DEAD)) { + if (unlikely(p->exit_state == EXIT_DEAD)) + return 0; + + if (unlikely(p->exit_state == EXIT_TRACE)) { /* - * But do not ignore this task until the tracer does - * wait_task_zombie()->do_notify_parent(). + * ptrace == 0 means we are the natural parent. In this case + * we should clear notask_error, debugger will notify us. */ - if (likely(!ptrace) && unlikely(ptrace_reparented(p))) + if (likely(!ptrace)) wo->notask_error = 0; return 0; } -- cgit v1.2.3-59-g8ed1b From ad86622b478eaafdc25b74237df82b10fce6326d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 7 Apr 2014 15:38:46 -0700 Subject: wait: swap EXIT_ZOMBIE and EXIT_DEAD to hide EXIT_TRACE from user-space get_task_state() uses the most significant bit to report the state to user-space, this means that EXIT_ZOMBIE->EXIT_TRACE->EXIT_DEAD transition can be noticed via /proc as Z -> X -> Z change. Note that this was possible even before EXIT_TRACE was introduced. This is not really bad but imho it make sense to hide EXIT_TRACE from user-space completely. So the patch simply swaps EXIT_ZOMBIE and EXIT_DEAD, this way EXIT_TRACE will be seen as EXIT_ZOMBIE by user-space. Signed-off-by: Oleg Nesterov Cc: Jan Kratochvil Cc: Michal Schmidt Cc: Al Viro Cc: Lennart Poettering Cc: Roland McGrath Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/array.c | 4 ++-- include/linux/sched.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/array.c b/fs/proc/array.c index 656e401794de..64db2bceac59 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -138,8 +138,8 @@ static const char * const task_state_array[] = { "D (disk sleep)", /* 2 */ "T (stopped)", /* 4 */ "t (tracing stop)", /* 8 */ - "Z (zombie)", /* 16 */ - "X (dead)", /* 32 */ + "X (dead)", /* 16 */ + "Z (zombie)", /* 32 */ }; static inline const char *get_task_state(struct task_struct *tsk) diff --git a/include/linux/sched.h b/include/linux/sched.h index 7781de5e5e7b..075b3056c0c0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -210,8 +210,8 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); #define __TASK_STOPPED 4 #define __TASK_TRACED 8 /* in tsk->exit_state */ -#define EXIT_ZOMBIE 16 -#define EXIT_DEAD 32 +#define EXIT_DEAD 16 +#define EXIT_ZOMBIE 32 #define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) /* in tsk->state again */ #define TASK_DEAD 64 -- cgit v1.2.3-59-g8ed1b From 82e0703b6ca8b549952c1e4f04746f27eaec012d Mon Sep 17 00:00:00 2001 From: Rashika Kheria Date: Mon, 7 Apr 2014 15:38:50 -0700 Subject: include/linux/crash_dump.h: add vmcore_cleanup() prototype Eliminate the following warning in proc/vmcore.c: fs/proc/vmcore.c:1088:6: warning: no previous prototype for `vmcore_cleanup' [-Wmissing-prototypes] [akpm@linux-foundation.org: clean up powerpc, remove unneeded EXPORT_SYMBOL] Signed-off-by: Rashika Kheria Reviewed-by: Josh Triplett Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/fadump.h | 1 - fs/proc/vmcore.c | 1 - include/linux/crash_dump.h | 1 + 3 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index 88dbf9659185..a6774560afe3 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -210,7 +210,6 @@ extern int is_fadump_active(void); extern void crash_fadump(struct pt_regs *, const char *); extern void fadump_cleanup(void); -extern void vmcore_cleanup(void); #else /* CONFIG_FA_DUMP */ static inline int is_fadump_active(void) { return 0; } static inline void crash_fadump(struct pt_regs *regs, const char *str) { } diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 88d4585b30f1..ab852715916d 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -1118,4 +1118,3 @@ void vmcore_cleanup(void) } free_elfcorebuf(); } -EXPORT_SYMBOL_GPL(vmcore_cleanup); diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 7032518f8542..72ab536ad3de 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -25,6 +25,7 @@ extern int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma, extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, unsigned long, int); +void vmcore_cleanup(void); /* Architecture code defines this if there are other possible ELF * machine types, e.g. on bi-arch capable hardware. */ -- cgit v1.2.3-59-g8ed1b From 90ae3ae539246984d36e43b0e23554bca941476f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 7 Apr 2014 15:38:52 -0700 Subject: idr: remove dead code Remove no longer used deprecated code, and make local functions static. Signed-off-by: Stephen Hemminger Acked-by: Jean Delvare Acked-by: Tejun Heo Cc: Jeff Layton Cc: Philipp Reisner Cc: Jens Axboe Cc: George Spelvin Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 63 ----------------------------------------------------- lib/idr.c | 20 ++--------------- 2 files changed, 2 insertions(+), 81 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index f669585c4fc5..6af3400b9b2f 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -132,69 +132,6 @@ static inline void *idr_find(struct idr *idr, int id) #define idr_for_each_entry(idp, entry, id) \ for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id) -/* - * Don't use the following functions. These exist only to suppress - * deprecated warnings on EXPORT_SYMBOL()s. - */ -int __idr_pre_get(struct idr *idp, gfp_t gfp_mask); -int __idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id); -void __idr_remove_all(struct idr *idp); - -/** - * idr_pre_get - reserve resources for idr allocation - * @idp: idr handle - * @gfp_mask: memory allocation flags - * - * Part of old alloc interface. This is going away. Use - * idr_preload[_end]() and idr_alloc() instead. - */ -static inline int __deprecated idr_pre_get(struct idr *idp, gfp_t gfp_mask) -{ - return __idr_pre_get(idp, gfp_mask); -} - -/** - * idr_get_new_above - allocate new idr entry above or equal to a start id - * @idp: idr handle - * @ptr: pointer you want associated with the id - * @starting_id: id to start search at - * @id: pointer to the allocated handle - * - * Part of old alloc interface. This is going away. Use - * idr_preload[_end]() and idr_alloc() instead. - */ -static inline int __deprecated idr_get_new_above(struct idr *idp, void *ptr, - int starting_id, int *id) -{ - return __idr_get_new_above(idp, ptr, starting_id, id); -} - -/** - * idr_get_new - allocate new idr entry - * @idp: idr handle - * @ptr: pointer you want associated with the id - * @id: pointer to the allocated handle - * - * Part of old alloc interface. This is going away. Use - * idr_preload[_end]() and idr_alloc() instead. - */ -static inline int __deprecated idr_get_new(struct idr *idp, void *ptr, int *id) -{ - return __idr_get_new_above(idp, ptr, 0, id); -} - -/** - * idr_remove_all - remove all ids from the given idr tree - * @idp: idr handle - * - * If you're trying to destroy @idp, calling idr_destroy() is enough. - * This is going away. Don't use. - */ -static inline void __deprecated idr_remove_all(struct idr *idp) -{ - __idr_remove_all(idp); -} - /* * IDA - IDR based id allocator, use when translation from id to * pointer isn't necessary. diff --git a/lib/idr.c b/lib/idr.c index 1ba4956bfbff..ba2df393c3b0 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -196,7 +196,7 @@ static void idr_mark_full(struct idr_layer **pa, int id) } } -int __idr_pre_get(struct idr *idp, gfp_t gfp_mask) +static int __idr_pre_get(struct idr *idp, gfp_t gfp_mask) { while (idp->id_free_cnt < MAX_IDR_FREE) { struct idr_layer *new; @@ -207,7 +207,6 @@ int __idr_pre_get(struct idr *idp, gfp_t gfp_mask) } return 1; } -EXPORT_SYMBOL(__idr_pre_get); /** * sub_alloc - try to allocate an id without growing the tree depth @@ -374,20 +373,6 @@ static void idr_fill_slot(struct idr *idr, void *ptr, int id, idr_mark_full(pa, id); } -int __idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) -{ - struct idr_layer *pa[MAX_IDR_LEVEL + 1]; - int rv; - - rv = idr_get_empty_slot(idp, starting_id, pa, 0, idp); - if (rv < 0) - return rv == -ENOMEM ? -EAGAIN : rv; - - idr_fill_slot(idp, ptr, rv, pa); - *id = rv; - return 0; -} -EXPORT_SYMBOL(__idr_get_new_above); /** * idr_preload - preload for idr_alloc() @@ -607,7 +592,7 @@ void idr_remove(struct idr *idp, int id) } EXPORT_SYMBOL(idr_remove); -void __idr_remove_all(struct idr *idp) +static void __idr_remove_all(struct idr *idp) { int n, id, max; int bt_mask; @@ -640,7 +625,6 @@ void __idr_remove_all(struct idr *idp) } idp->layers = 0; } -EXPORT_SYMBOL(__idr_remove_all); /** * idr_destroy - release all cached layers within an idr tree -- cgit v1.2.3-59-g8ed1b From 2aaf308b95b24649a6dcfed89cd956e972089b2a Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Mon, 7 Apr 2014 15:38:56 -0700 Subject: rapidio: rework device hierarchy and introduce mport class of devices This patch removes an artificial RapidIO bus root device and establishes actual device hierarchy by providing reference to real parent devices. It also introduces device class for RapidIO controller devices (on-chip or an eternal bridge, known as "mport"). Existing implementation was sufficient for SoC-based platforms that have a single RapidIO controller. With introduction of devices using multiple RapidIO controllers and PCIe-to-RapidIO bridges the old scheme is very limiting or does not work at all. The implemented changes allow to properly reference platform's local RapidIO mport devices and provide device details needed for upper layers. This change to RapidIO device hierarchy does not break any known existing kernel or user space interfaces. Signed-off-by: Alexandre Bounine Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Andre van Herk Cc: Stef van Os Cc: Jerry Jacobs Cc: Arno Tiemersma Cc: Rob Landley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/rapidio/sysfs.txt | 66 +++++++++++++++++++++++++++++++++++----- arch/powerpc/sysdev/fsl_rio.c | 1 + drivers/net/rionet.c | 1 + drivers/rapidio/devices/tsi721.c | 1 + drivers/rapidio/rio-driver.c | 22 +++++++++----- drivers/rapidio/rio-scan.c | 1 + drivers/rapidio/rio-sysfs.c | 40 ++++++++++++++++++++++++ drivers/rapidio/rio.c | 11 +++++++ drivers/rapidio/rio.h | 1 + include/linux/rio.h | 5 ++- 10 files changed, 133 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/Documentation/rapidio/sysfs.txt b/Documentation/rapidio/sysfs.txt index 271438c0617f..47ce9a5336e1 100644 --- a/Documentation/rapidio/sysfs.txt +++ b/Documentation/rapidio/sysfs.txt @@ -2,8 +2,8 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -1. Device Subdirectories ------------------------- +1. RapidIO Device Subdirectories +-------------------------------- For each RapidIO device, the RapidIO subsystem creates files in an individual subdirectory with the following name, /sys/bus/rapidio/devices/. @@ -25,8 +25,8 @@ seen by the enumerating host (destID = 1): NOTE: An enumerating or discovering endpoint does not create a sysfs entry for itself, this is why an endpoint with destID=1 is not shown in the list. -2. Attributes Common for All Devices ------------------------------------- +2. Attributes Common for All RapidIO Devices +-------------------------------------------- Each device subdirectory contains the following informational read-only files: @@ -52,16 +52,16 @@ This attribute is similar in behavior to the "config" attribute of PCI devices and provides an access to the RapidIO device registers using standard file read and write operations. -3. Endpoint Device Attributes ------------------------------ +3. RapidIO Endpoint Device Attributes +------------------------------------- Currently Linux RapidIO subsystem does not create any endpoint specific sysfs attributes. It is possible that RapidIO master port drivers and endpoint device drivers will add their device-specific sysfs attributes but such attributes are outside the scope of this document. -4. Switch Device Attributes ---------------------------- +4. RapidIO Switch Device Attributes +----------------------------------- RapidIO switches have additional attributes in sysfs. RapidIO subsystem supports common and device-specific sysfs attributes for switches. Because switches are @@ -106,3 +106,53 @@ attribute: for that controller always will be 0. To initiate RapidIO enumeration/discovery on all available mports a user must write '-1' (or RIO_MPORT_ANY) into this attribute file. + + +6. RapidIO Bus Controllers/Ports +-------------------------------- + +On-chip RapidIO controllers and PCIe-to-RapidIO bridges (referenced as +"Master Port" or "mport") are presented in sysfs as the special class of +devices: "rapidio_port". + +The /sys/class/rapidio_port subdirectory contains individual subdirectories +named as "rapidioN" where N = mport ID registered with RapidIO subsystem. + +NOTE: An mport ID is not a RapidIO destination ID assigned to a given local +mport device. + +Each mport device subdirectory in addition to standard entries contains the +following device-specific attributes: + + port_destid - reports RapidIO destination ID assigned to the given RapidIO + mport device. If value 0xFFFFFFFF is returned this means that + no valid destination ID have been assigned to the mport (yet). + Normally, before enumeration/discovery have been executed only + fabric enumerating mports have a valid destination ID assigned + to them using "hdid=..." rapidio module parameter. + sys_size - reports RapidIO common transport system size: + 0 = small (8-bit destination ID, max. 256 devices), + 1 = large (16-bit destination ID, max. 65536 devices). + +After enumeration or discovery was performed for a given mport device, +the corresponding subdirectory will also contain subdirectories for each +child RapidIO device connected to the mport. Naming conventions for RapidIO +devices are described in Section 1 above. + +The example below shows mport device subdirectory with several child RapidIO +devices attached to it. + +[rio@rapidio ~]$ ls /sys/class/rapidio_port/rapidio0/ -l +total 0 +drwxr-xr-x 3 root root 0 Feb 11 15:10 00:e:0001 +drwxr-xr-x 3 root root 0 Feb 11 15:10 00:e:0004 +drwxr-xr-x 3 root root 0 Feb 11 15:10 00:e:0007 +drwxr-xr-x 3 root root 0 Feb 11 15:10 00:s:0002 +drwxr-xr-x 3 root root 0 Feb 11 15:10 00:s:0003 +drwxr-xr-x 3 root root 0 Feb 11 15:10 00:s:0005 +lrwxrwxrwx 1 root root 0 Feb 11 15:11 device -> ../../../0000:01:00.0 +-r--r--r-- 1 root root 4096 Feb 11 15:11 port_destid +drwxr-xr-x 2 root root 0 Feb 11 15:11 power +lrwxrwxrwx 1 root root 0 Feb 11 15:04 subsystem -> ../../../../../../class/rapidio_port +-r--r--r-- 1 root root 4096 Feb 11 15:11 sys_size +-rw-r--r-- 1 root root 4096 Feb 11 15:04 uevent diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 95dd892e9904..cf2b0840a672 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -531,6 +531,7 @@ int fsl_rio_setup(struct platform_device *dev) sprintf(port->name, "RIO mport %d", i); priv->dev = &dev->dev; + port->dev.parent = &dev->dev; port->ops = ops; port->priv = priv; port->phys_efptr = 0x100; diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index 6d1f6ed3113f..a8497183ff8b 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -493,6 +493,7 @@ static int rionet_setup_netdev(struct rio_mport *mport, struct net_device *ndev) ndev->netdev_ops = &rionet_netdev_ops; ndev->mtu = RIO_MAX_MSG_SIZE - 14; ndev->features = NETIF_F_LLTX; + SET_NETDEV_DEV(ndev, &mport->dev); SET_ETHTOOL_OPS(ndev, &rionet_ethtool_ops); spin_lock_init(&rnet->lock); diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c index ff7cbf2d28e3..1753dc693c15 100644 --- a/drivers/rapidio/devices/tsi721.c +++ b/drivers/rapidio/devices/tsi721.c @@ -2256,6 +2256,7 @@ static int tsi721_setup_mport(struct tsi721_device *priv) mport->phy_type = RIO_PHY_SERIAL; mport->priv = (void *)priv; mport->phys_efptr = 0x100; + mport->dev.parent = &pdev->dev; priv->mport = mport; INIT_LIST_HEAD(&mport->dbells); diff --git a/drivers/rapidio/rio-driver.c b/drivers/rapidio/rio-driver.c index c9ae692d3451..f301f059bb85 100644 --- a/drivers/rapidio/rio-driver.c +++ b/drivers/rapidio/rio-driver.c @@ -167,7 +167,6 @@ void rio_unregister_driver(struct rio_driver *rdrv) void rio_attach_device(struct rio_dev *rdev) { rdev->dev.bus = &rio_bus_type; - rdev->dev.parent = &rio_bus; } EXPORT_SYMBOL_GPL(rio_attach_device); @@ -216,9 +215,12 @@ static int rio_uevent(struct device *dev, struct kobj_uevent_env *env) return 0; } -struct device rio_bus = { - .init_name = "rapidio", +struct class rio_mport_class = { + .name = "rapidio_port", + .owner = THIS_MODULE, + .dev_groups = rio_mport_groups, }; +EXPORT_SYMBOL_GPL(rio_mport_class); struct bus_type rio_bus_type = { .name = "rapidio", @@ -233,14 +235,20 @@ struct bus_type rio_bus_type = { /** * rio_bus_init - Register the RapidIO bus with the device model * - * Registers the RIO bus device and RIO bus type with the Linux + * Registers the RIO mport device class and RIO bus type with the Linux * device model. */ static int __init rio_bus_init(void) { - if (device_register(&rio_bus) < 0) - printk("RIO: failed to register RIO bus device\n"); - return bus_register(&rio_bus_type); + int ret; + + ret = class_register(&rio_mport_class); + if (!ret) { + ret = bus_register(&rio_bus_type); + if (ret) + class_unregister(&rio_mport_class); + } + return ret; } postcore_initcall(rio_bus_init); diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index d3a6539a77cc..47a1b2ea76c4 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -461,6 +461,7 @@ static struct rio_dev *rio_setup_device(struct rio_net *net, rdev->comp_tag & RIO_CTAG_UDEVID); } + rdev->dev.parent = &port->dev; rio_attach_device(rdev); device_initialize(&rdev->dev); diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c index e0221c6d0cc2..cdb005c0094d 100644 --- a/drivers/rapidio/rio-sysfs.c +++ b/drivers/rapidio/rio-sysfs.c @@ -341,3 +341,43 @@ const struct attribute_group *rio_bus_groups[] = { &rio_bus_group, NULL, }; + +static ssize_t +port_destid_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct rio_mport *mport = to_rio_mport(dev); + + if (mport) + return sprintf(buf, "0x%04x\n", mport->host_deviceid); + else + return -ENODEV; +} +static DEVICE_ATTR_RO(port_destid); + +static ssize_t sys_size_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct rio_mport *mport = to_rio_mport(dev); + + if (mport) + return sprintf(buf, "%u\n", mport->sys_size); + else + return -ENODEV; +} +static DEVICE_ATTR_RO(sys_size); + +static struct attribute *rio_mport_attrs[] = { + &dev_attr_port_destid.attr, + &dev_attr_sys_size.attr, + NULL, +}; + +static const struct attribute_group rio_mport_group = { + .attrs = rio_mport_attrs, +}; + +const struct attribute_group *rio_mport_groups[] = { + &rio_mport_group, + NULL, +}; diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index 2e8a20cac588..a54ba0494dd3 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -1884,6 +1884,7 @@ static int rio_get_hdid(int index) int rio_register_mport(struct rio_mport *port) { struct rio_scan_node *scan = NULL; + int res = 0; if (next_portid >= RIO_MAX_MPORTS) { pr_err("RIO: reached specified max number of mports\n"); @@ -1894,6 +1895,16 @@ int rio_register_mport(struct rio_mport *port) port->host_deviceid = rio_get_hdid(port->id); port->nscan = NULL; + dev_set_name(&port->dev, "rapidio%d", port->id); + port->dev.class = &rio_mport_class; + + res = device_register(&port->dev); + if (res) + dev_err(&port->dev, "RIO: mport%d registration failed ERR=%d\n", + port->id, res); + else + dev_dbg(&port->dev, "RIO: mport%d registered\n", port->id); + mutex_lock(&rio_mport_list_lock); list_add_tail(&port->node, &rio_mports); diff --git a/drivers/rapidio/rio.h b/drivers/rapidio/rio.h index 5f99d22ad0b0..2d0550e08ea2 100644 --- a/drivers/rapidio/rio.h +++ b/drivers/rapidio/rio.h @@ -50,6 +50,7 @@ extern int rio_mport_scan(int mport_id); /* Structures internal to the RIO core code */ extern const struct attribute_group *rio_dev_groups[]; extern const struct attribute_group *rio_bus_groups[]; +extern const struct attribute_group *rio_mport_groups[]; #define RIO_GET_DID(size, x) (size ? (x & 0xffff) : ((x & 0x00ff0000) >> 16)) #define RIO_SET_DID(size, x) (size ? (x & 0xffff) : ((x & 0x000000ff) << 16)) diff --git a/include/linux/rio.h b/include/linux/rio.h index b71d5738e683..6bda06f21930 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -83,7 +83,7 @@ #define RIO_CTAG_UDEVID 0x0001ffff /* Unique device identifier */ extern struct bus_type rio_bus_type; -extern struct device rio_bus; +extern struct class rio_mport_class; struct rio_mport; struct rio_dev; @@ -201,6 +201,7 @@ struct rio_dev { #define rio_dev_f(n) list_entry(n, struct rio_dev, net_list) #define to_rio_dev(n) container_of(n, struct rio_dev, dev) #define sw_to_rio_dev(n) container_of(n, struct rio_dev, rswitch[0]) +#define to_rio_mport(n) container_of(n, struct rio_mport, dev) /** * struct rio_msg - RIO message event @@ -248,6 +249,7 @@ enum rio_phy_type { * @phy_type: RapidIO phy type * @phys_efptr: RIO port extended features pointer * @name: Port name string + * @dev: device structure associated with an mport * @priv: Master port private data * @dma: DMA device associated with mport * @nscan: RapidIO network enumeration/discovery operations @@ -272,6 +274,7 @@ struct rio_mport { enum rio_phy_type phy_type; /* RapidIO phy type */ u32 phys_efptr; unsigned char name[RIO_MAX_MPORT_NAME]; + struct device dev; void *priv; /* Master port private data */ #ifdef CONFIG_RAPIDIO_DMA_ENGINE struct dma_device dma; -- cgit v1.2.3-59-g8ed1b From ce816fa88cca083c47ab9000b2138a83043a78be Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Mon, 7 Apr 2014 15:39:19 -0700 Subject: Kconfig: rename HAS_IOPORT to HAS_IOPORT_MAP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the renamed symbol is defined lib/iomap.c implements ioport_map and ioport_unmap and currently (nearly) all platforms define the port accessor functions outb/inb and friend unconditionally. So HAS_IOPORT_MAP is the better name for this. Consequently NO_IOPORT is renamed to NO_IOPORT_MAP. The motivation for this change is to reintroduce a symbol HAS_IOPORT that signals if outb/int et al are available. I will address that at least one merge window later though to keep surprises to a minimum and catch new introductions of (HAS|NO)_IOPORT. The changes in this commit were done using: $ git grep -l -E '(NO|HAS)_IOPORT' | xargs perl -p -i -e 's/\b((?:CONFIG_)?(?:NO|HAS)_IOPORT)\b/$1_MAP/' Signed-off-by: Uwe Kleine-König Acked-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arc/Kconfig | 2 +- arch/arm/Kconfig | 12 ++++++------ arch/arm/mach-picoxcell/Kconfig | 2 +- arch/arm/mach-prima2/Kconfig | 2 +- arch/arm/mach-s3c24xx/Kconfig | 2 +- arch/arm/mach-shmobile/Kconfig | 2 +- arch/arm/mach-vexpress/Kconfig | 2 +- arch/arm/plat-samsung/Kconfig | 4 ++-- arch/arm64/Kconfig | 2 +- arch/cris/Kconfig | 2 +- arch/hexagon/Kconfig | 2 +- arch/m32r/Kconfig | 2 +- arch/m68k/Kconfig | 2 +- arch/metag/Kconfig | 2 +- arch/mips/Kconfig | 4 ++-- arch/openrisc/Kconfig | 2 +- arch/s390/Kconfig | 2 +- arch/sh/Kconfig | 4 ++-- arch/sh/boards/Kconfig | 8 ++++---- arch/sh/include/asm/io.h | 4 ++-- arch/sh/include/asm/io_trapped.h | 2 +- arch/sh/include/asm/machvec.h | 2 +- arch/sh/kernel/Makefile | 2 +- arch/sh/kernel/io_trapped.c | 4 ++-- arch/tile/Kconfig | 2 +- arch/unicore32/Kconfig | 2 +- arch/xtensa/Kconfig | 4 ++-- arch/xtensa/configs/iss_defconfig | 2 +- arch/xtensa/configs/s6105_defconfig | 2 +- drivers/char/tpm/Kconfig | 2 +- drivers/i2c/busses/Kconfig | 2 +- drivers/net/can/sja1000/Kconfig | 2 +- drivers/net/ethernet/3com/Kconfig | 2 +- include/asm-generic/io.h | 4 ++-- include/asm-generic/iomap.h | 2 +- include/linux/io.h | 2 +- lib/Kconfig | 4 ++-- lib/devres.c | 4 ++-- lib/iomap.c | 4 ++-- sound/isa/Kconfig | 2 +- sound/pci/Kconfig | 2 +- 41 files changed, 59 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 75de197a2fef..9596b0ab108d 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -57,7 +57,7 @@ config ARCH_FLATMEM_ENABLE config MMU def_bool y -config NO_IOPORT +config NO_IOPORT_MAP def_bool y config GENERIC_CALIBRATE_DELAY diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index d7a71e3ef55f..5db05f6a0412 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -126,7 +126,7 @@ config HAVE_TCM config HAVE_PROC_CPU bool -config NO_IOPORT +config NO_IOPORT_MAP bool config EISA @@ -410,7 +410,7 @@ config ARCH_EBSA110 select ISA select NEED_MACH_IO_H select NEED_MACH_MEMORY_H - select NO_IOPORT + select NO_IOPORT_MAP help This is an evaluation board for the StrongARM processor available from Digital. It has limited hardware on-board, including an @@ -428,7 +428,7 @@ config ARCH_EFM32 select CPU_V7M select GENERIC_CLOCKEVENTS select NO_DMA - select NO_IOPORT + select NO_IOPORT_MAP select SPARSE_IRQ select USE_OF help @@ -677,7 +677,7 @@ config ARCH_SHMOBILE_LEGACY select HAVE_SMP select MIGHT_HAVE_CACHE_L2X0 select MULTI_IRQ_HANDLER - select NO_IOPORT + select NO_IOPORT_MAP select PINCTRL select PM_GENERIC_DOMAINS if PM select SPARSE_IRQ @@ -699,7 +699,7 @@ config ARCH_RPC select ISA_DMA_API select NEED_MACH_IO_H select NEED_MACH_MEMORY_H - select NO_IOPORT + select NO_IOPORT_MAP select VIRT_TO_BUS help On the Acorn Risc-PC, Linux can support the internal IDE disk and @@ -760,7 +760,7 @@ config ARCH_S3C64XX select HAVE_S3C2410_I2C if I2C select HAVE_S3C2410_WATCHDOG if WATCHDOG select HAVE_TCM - select NO_IOPORT + select NO_IOPORT_MAP select PLAT_SAMSUNG select PM_GENERIC_DOMAINS if PM select S3C_DEV_NAND diff --git a/arch/arm/mach-picoxcell/Kconfig b/arch/arm/mach-picoxcell/Kconfig index eca9eb1c5931..62240f69b4ee 100644 --- a/arch/arm/mach-picoxcell/Kconfig +++ b/arch/arm/mach-picoxcell/Kconfig @@ -4,4 +4,4 @@ config ARCH_PICOXCELL select ARM_VIC select DW_APB_TIMER_OF select HAVE_TCM - select NO_IOPORT + select NO_IOPORT_MAP diff --git a/arch/arm/mach-prima2/Kconfig b/arch/arm/mach-prima2/Kconfig index 3e8189186a5b..e4e505f52ba0 100644 --- a/arch/arm/mach-prima2/Kconfig +++ b/arch/arm/mach-prima2/Kconfig @@ -3,7 +3,7 @@ config ARCH_SIRF select ARCH_HAS_RESET_CONTROLLER select ARCH_REQUIRE_GPIOLIB select GENERIC_IRQ_CHIP - select NO_IOPORT + select NO_IOPORT_MAP select PINCTRL select PINCTRL_SIRF help diff --git a/arch/arm/mach-s3c24xx/Kconfig b/arch/arm/mach-s3c24xx/Kconfig index ba1cc6246778..40cf50b9940c 100644 --- a/arch/arm/mach-s3c24xx/Kconfig +++ b/arch/arm/mach-s3c24xx/Kconfig @@ -12,7 +12,7 @@ if ARCH_S3C24XX config PLAT_S3C24XX def_bool y select ARCH_REQUIRE_GPIOLIB - select NO_IOPORT + select NO_IOPORT_MAP select S3C_DEV_NAND select IRQ_DOMAIN help diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index a182008e3aeb..0f92ba8e7884 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -10,7 +10,7 @@ config ARCH_SHMOBILE_MULTI select ARM_GIC select MIGHT_HAVE_PCI select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE - select NO_IOPORT + select NO_IOPORT_MAP select PINCTRL select ARCH_REQUIRE_GPIOLIB diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig index 80b4be36f10a..657d52d0391f 100644 --- a/arch/arm/mach-vexpress/Kconfig +++ b/arch/arm/mach-vexpress/Kconfig @@ -10,7 +10,7 @@ config ARCH_VEXPRESS select HAVE_ARM_TWD if SMP select HAVE_PATA_PLATFORM select ICST - select NO_IOPORT + select NO_IOPORT_MAP select PLAT_VERSATILE select PLAT_VERSATILE_CLCD select POWER_RESET diff --git a/arch/arm/plat-samsung/Kconfig b/arch/arm/plat-samsung/Kconfig index b57e922f1614..243dfcb2ca0e 100644 --- a/arch/arm/plat-samsung/Kconfig +++ b/arch/arm/plat-samsung/Kconfig @@ -9,7 +9,7 @@ config PLAT_SAMSUNG depends on PLAT_S3C24XX || ARCH_S3C64XX || PLAT_S5P || ARCH_EXYNOS default y select GENERIC_IRQ_CHIP - select NO_IOPORT + select NO_IOPORT_MAP help Base platform code for all Samsung SoC based systems @@ -19,7 +19,7 @@ config PLAT_S5P default y select ARCH_REQUIRE_GPIOLIB select ARM_VIC - select NO_IOPORT + select NO_IOPORT_MAP select PLAT_SAMSUNG select S3C_GPIO_TRACK select S5P_GPIO_DRVSTR diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9711a5fd948d..8079a23e2701 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -66,7 +66,7 @@ config ARCH_PHYS_ADDR_T_64BIT config MMU def_bool y -config NO_IOPORT +config NO_IOPORT_MAP def_bool y config STACKTRACE_SUPPORT diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index 7cb90a54b598..52731e221851 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -29,7 +29,7 @@ config GENERIC_CALIBRATE_DELAY bool default y -config NO_IOPORT +config NO_IOPORT_MAP def_bool y config FORCE_MAX_ZONEORDER diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index fbc5c78c9ac7..0fd6138f6203 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -19,7 +19,7 @@ config HEXAGON select GENERIC_IRQ_SHOW select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK - select NO_IOPORT + select NO_IOPORT_MAP select GENERIC_IOMAP select GENERIC_SMP_IDLE_THREAD select STACKTRACE_SUPPORT diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index ca4504424dae..9e44bbd8051e 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -28,7 +28,7 @@ config ZONE_DMA bool default y -config NO_IOPORT +config NO_IOPORT_MAP def_bool y config NO_DMA diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index b2e322939256..87b7c7581b1d 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -52,7 +52,7 @@ config TIME_LOW_RES bool default y -config NO_IOPORT +config NO_IOPORT_MAP def_bool y config NO_DMA diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig index b1d3c9c0eff8..499b7610eaaf 100644 --- a/arch/metag/Kconfig +++ b/arch/metag/Kconfig @@ -52,7 +52,7 @@ config GENERIC_HWEIGHT config GENERIC_CALIBRATE_DELAY def_bool y -config NO_IOPORT +config NO_IOPORT_MAP def_bool y source "init/Kconfig" diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 16d5ab1615b1..5cd695f905a1 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -175,7 +175,7 @@ config MACH_DECSTATION select CPU_R4000_WORKAROUNDS if 64BIT select CPU_R4400_WORKAROUNDS if 64BIT select DMA_NONCOHERENT - select NO_IOPORT + select NO_IOPORT_MAP select IRQ_CPU select SYS_HAS_CPU_R3000 select SYS_HAS_CPU_R4X00 @@ -947,7 +947,7 @@ config SYNC_R4K config MIPS_MACHINE def_bool n -config NO_IOPORT +config NO_IOPORT_MAP def_bool n config GENERIC_ISA_DMA diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index 9488209a5253..e71d712afb79 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -41,7 +41,7 @@ config RWSEM_XCHGADD_ALGORITHM config GENERIC_HWEIGHT def_bool y -config NO_IOPORT +config NO_IOPORT_MAP def_bool y config TRACE_IRQFLAGS_SUPPORT diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 953f17c8d17c..346d21678ffd 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -52,7 +52,7 @@ config KEXEC config AUDIT_ARCH def_bool y -config NO_IOPORT +config NO_IOPORT_MAP def_bool y config PCI_QUIRKS diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 1399383315a3..ba55e939a820 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -3,7 +3,7 @@ config SUPERH select ARCH_MIGHT_HAVE_PC_PARPORT select EXPERT select CLKDEV_LOOKUP - select HAVE_IDE if HAS_IOPORT + select HAVE_IDE if HAS_IOPORT_MAP select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select ARCH_DISCARD_MEMBLOCK @@ -138,7 +138,7 @@ config ARCH_HAS_ILOG2_U32 config ARCH_HAS_ILOG2_U64 def_bool n -config NO_IOPORT +config NO_IOPORT_MAP def_bool !PCI depends on !SH_CAYMAN && !SH_SH4202_MICRODEV && !SH_SHMIN && \ !SH_HP6XX && !SH_SOLUTION_ENGINE diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig index eb1cf84231a2..e331e5373b8e 100644 --- a/arch/sh/boards/Kconfig +++ b/arch/sh/boards/Kconfig @@ -158,7 +158,7 @@ config SH_SDK7786 bool "SDK7786" depends on CPU_SUBTYPE_SH7786 select SYS_SUPPORTS_PCI - select NO_IOPORT if !PCI + select NO_IOPORT_MAP if !PCI select ARCH_WANT_OPTIONAL_GPIOLIB select HAVE_SRAM_POOL select REGULATOR_FIXED_VOLTAGE if REGULATOR @@ -204,7 +204,7 @@ config SH_URQUELL depends on CPU_SUBTYPE_SH7786 select ARCH_REQUIRE_GPIOLIB select SYS_SUPPORTS_PCI - select NO_IOPORT if !PCI + select NO_IOPORT_MAP if !PCI config SH_MIGOR bool "Migo-R" @@ -306,7 +306,7 @@ config SH_LBOX_RE2 config SH_X3PROTO bool "SH-X3 Prototype board" depends on CPU_SUBTYPE_SHX3 - select NO_IOPORT if !PCI + select NO_IOPORT_MAP if !PCI select IRQ_DOMAIN config SH_MAGIC_PANEL_R2 @@ -333,7 +333,7 @@ config SH_POLARIS config SH_SH2007 bool "SH-2007 board" - select NO_IOPORT + select NO_IOPORT_MAP select REGULATOR_FIXED_VOLTAGE if REGULATOR depends on CPU_SUBTYPE_SH7780 help diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h index 629db2ad7916..728c4c571f40 100644 --- a/arch/sh/include/asm/io.h +++ b/arch/sh/include/asm/io.h @@ -122,7 +122,7 @@ __BUILD_MEMORY_STRING(__raw_, l, u32) __BUILD_MEMORY_STRING(__raw_, q, u64) -#ifdef CONFIG_HAS_IOPORT +#ifdef CONFIG_HAS_IOPORT_MAP /* * Slowdown I/O port space accesses for antique hardware. @@ -218,7 +218,7 @@ __BUILD_IOPORT_STRING(w, u16) __BUILD_IOPORT_STRING(l, u32) __BUILD_IOPORT_STRING(q, u64) -#else /* !CONFIG_HAS_IOPORT */ +#else /* !CONFIG_HAS_IOPORT_MAP */ #include diff --git a/arch/sh/include/asm/io_trapped.h b/arch/sh/include/asm/io_trapped.h index f1251d4f0ba9..4ab94ef51071 100644 --- a/arch/sh/include/asm/io_trapped.h +++ b/arch/sh/include/asm/io_trapped.h @@ -36,7 +36,7 @@ __ioremap_trapped(unsigned long offset, unsigned long size) #define __ioremap_trapped(offset, size) NULL #endif -#ifdef CONFIG_HAS_IOPORT +#ifdef CONFIG_HAS_IOPORT_MAP extern struct list_head trapped_io; static inline void __iomem * diff --git a/arch/sh/include/asm/machvec.h b/arch/sh/include/asm/machvec.h index eb9c20d971dd..d3324e4f372e 100644 --- a/arch/sh/include/asm/machvec.h +++ b/arch/sh/include/asm/machvec.h @@ -21,7 +21,7 @@ struct sh_machine_vector { int (*mv_irq_demux)(int irq); void (*mv_init_irq)(void); -#ifdef CONFIG_HAS_IOPORT +#ifdef CONFIG_HAS_IOPORT_MAP void __iomem *(*mv_ioport_map)(unsigned long port, unsigned int size); void (*mv_ioport_unmap)(void __iomem *); #endif diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile index 261c8bfd75ce..2ccf36c824c6 100644 --- a/arch/sh/kernel/Makefile +++ b/arch/sh/kernel/Makefile @@ -22,7 +22,7 @@ obj-y := debugtraps.o dma-nommu.o dumpstack.o \ ifndef CONFIG_GENERIC_IOMAP obj-y += iomap.o -obj-$(CONFIG_HAS_IOPORT) += ioport.o +obj-$(CONFIG_HAS_IOPORT_MAP) += ioport.o endif obj-$(CONFIG_SUPERH32) += sys_sh32.o diff --git a/arch/sh/kernel/io_trapped.c b/arch/sh/kernel/io_trapped.c index c0a9761f2f8a..f8ce36286cea 100644 --- a/arch/sh/kernel/io_trapped.c +++ b/arch/sh/kernel/io_trapped.c @@ -22,7 +22,7 @@ #define TRAPPED_PAGES_MAX 16 -#ifdef CONFIG_HAS_IOPORT +#ifdef CONFIG_HAS_IOPORT_MAP LIST_HEAD(trapped_io); EXPORT_SYMBOL_GPL(trapped_io); #endif @@ -90,7 +90,7 @@ int register_trapped_io(struct trapped_io *tiop) tiop->magic = IO_TRAPPED_MAGIC; INIT_LIST_HEAD(&tiop->list); spin_lock_irq(&trapped_lock); -#ifdef CONFIG_HAS_IOPORT +#ifdef CONFIG_HAS_IOPORT_MAP if (flags & IORESOURCE_IO) list_add(&tiop->list, &trapped_io); #endif diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 31c8c6223995..85258ca43ff5 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -411,7 +411,7 @@ config PCI_DOMAINS config NO_IOMEM def_bool !PCI -config NO_IOPORT +config NO_IOPORT_MAP def_bool !PCI config TILE_PCI_IO diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig index 25c0dba508cc..aafad6fa1667 100644 --- a/arch/unicore32/Kconfig +++ b/arch/unicore32/Kconfig @@ -27,7 +27,7 @@ config UNICORE32 config GENERIC_CSUM def_bool y -config NO_IOPORT +config NO_IOPORT_MAP bool config STACKTRACE_SUPPORT diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index c87ae7c6e5f9..02d6d29a63c1 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -41,7 +41,7 @@ config ARCH_HAS_ILOG2_U32 config ARCH_HAS_ILOG2_U64 def_bool n -config NO_IOPORT +config NO_IOPORT_MAP def_bool n config HZ @@ -239,7 +239,7 @@ config XTENSA_PLATFORM_XT2000 config XTENSA_PLATFORM_S6105 bool "S6105" select SERIAL_CONSOLE - select NO_IOPORT + select NO_IOPORT_MAP config XTENSA_PLATFORM_XTFPGA bool "XTFPGA" diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig index 4f233204faf9..d57d917ff240 100644 --- a/arch/xtensa/configs/iss_defconfig +++ b/arch/xtensa/configs/iss_defconfig @@ -11,7 +11,7 @@ CONFIG_GENERIC_FIND_NEXT_BIT=y CONFIG_GENERIC_HWEIGHT=y # CONFIG_ARCH_HAS_ILOG2_U32 is not set # CONFIG_ARCH_HAS_ILOG2_U64 is not set -CONFIG_NO_IOPORT=y +CONFIG_NO_IOPORT_MAP=y CONFIG_HZ=100 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" CONFIG_CONSTRUCTORS=y diff --git a/arch/xtensa/configs/s6105_defconfig b/arch/xtensa/configs/s6105_defconfig index d929f77a0360..583c2b0974ca 100644 --- a/arch/xtensa/configs/s6105_defconfig +++ b/arch/xtensa/configs/s6105_defconfig @@ -11,7 +11,7 @@ CONFIG_GENERIC_FIND_NEXT_BIT=y CONFIG_GENERIC_HWEIGHT=y # CONFIG_ARCH_HAS_ILOG2_U32 is not set # CONFIG_ARCH_HAS_ILOG2_U64 is not set -CONFIG_NO_IOPORT=y +CONFIG_NO_IOPORT_MAP=y CONFIG_HZ=100 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig index 1a65838888cd..c54cac3f8bc8 100644 --- a/drivers/char/tpm/Kconfig +++ b/drivers/char/tpm/Kconfig @@ -74,7 +74,7 @@ config TCG_NSC config TCG_ATMEL tristate "Atmel TPM Interface" - depends on PPC64 || HAS_IOPORT + depends on PPC64 || HAS_IOPORT_MAP ---help--- If you have a TPM security chip from Atmel say Yes and it will be accessible from within Linux. To compile this driver diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index de17c5593d97..014afab1d551 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -936,7 +936,7 @@ config I2C_ACORN config I2C_ELEKTOR tristate "Elektor ISA card" - depends on ISA && HAS_IOPORT && BROKEN_ON_SMP + depends on ISA && HAS_IOPORT_MAP && BROKEN_ON_SMP select I2C_ALGOPCF help This supports the PCF8584 ISA bus I2C adapter. Say Y if you own diff --git a/drivers/net/can/sja1000/Kconfig b/drivers/net/can/sja1000/Kconfig index 4b18b8765523..1e65cb6c2591 100644 --- a/drivers/net/can/sja1000/Kconfig +++ b/drivers/net/can/sja1000/Kconfig @@ -39,7 +39,7 @@ config CAN_EMS_PCI config CAN_PEAK_PCMCIA tristate "PEAK PCAN-PC Card" depends on PCMCIA - depends on HAS_IOPORT + depends on HAS_IOPORT_MAP ---help--- This driver is for the PCAN-PC Card PCMCIA adapter (1 or 2 channels) from PEAK-System (http://www.peak-system.com). To compile this diff --git a/drivers/net/ethernet/3com/Kconfig b/drivers/net/ethernet/3com/Kconfig index 65b735d4a6ad..afaab4b2333f 100644 --- a/drivers/net/ethernet/3com/Kconfig +++ b/drivers/net/ethernet/3com/Kconfig @@ -66,7 +66,7 @@ config PCMCIA_3C589 config VORTEX tristate "3c590/3c900 series (592/595/597) \"Vortex/Boomerang\" support" - depends on (PCI || EISA) && HAS_IOPORT + depends on (PCI || EISA) && HAS_IOPORT_MAP select MII ---help--- This option enables driver support for a large number of 10Mbps and diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index d5afe96adba6..975e1cc75edb 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -327,7 +327,7 @@ static inline void iounmap(void __iomem *addr) } #endif /* CONFIG_MMU */ -#ifdef CONFIG_HAS_IOPORT +#ifdef CONFIG_HAS_IOPORT_MAP #ifndef CONFIG_GENERIC_IOMAP static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) { @@ -341,7 +341,7 @@ static inline void ioport_unmap(void __iomem *p) extern void __iomem *ioport_map(unsigned long port, unsigned int nr); extern void ioport_unmap(void __iomem *p); #endif /* CONFIG_GENERIC_IOMAP */ -#endif /* CONFIG_HAS_IOPORT */ +#endif /* CONFIG_HAS_IOPORT_MAP */ #ifndef xlate_dev_kmem_ptr #define xlate_dev_kmem_ptr(p) p diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h index 6afd7d6a9899..1b41011643a5 100644 --- a/include/asm-generic/iomap.h +++ b/include/asm-generic/iomap.h @@ -56,7 +56,7 @@ extern void iowrite8_rep(void __iomem *port, const void *buf, unsigned long coun extern void iowrite16_rep(void __iomem *port, const void *buf, unsigned long count); extern void iowrite32_rep(void __iomem *port, const void *buf, unsigned long count); -#ifdef CONFIG_HAS_IOPORT +#ifdef CONFIG_HAS_IOPORT_MAP /* Create a virtual mapping cookie for an IO port range */ extern void __iomem *ioport_map(unsigned long port, unsigned int nr); extern void ioport_unmap(void __iomem *); diff --git a/include/linux/io.h b/include/linux/io.h index 8a18e75600cc..b76e6e545806 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -41,7 +41,7 @@ static inline int ioremap_page_range(unsigned long addr, unsigned long end, /* * Managed iomap interface */ -#ifdef CONFIG_HAS_IOPORT +#ifdef CONFIG_HAS_IOPORT_MAP void __iomem * devm_ioport_map(struct device *dev, unsigned long port, unsigned int nr); void devm_ioport_unmap(struct device *dev, void __iomem *addr); diff --git a/lib/Kconfig b/lib/Kconfig index 991c98bc4a3f..5d4984c505f8 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -342,9 +342,9 @@ config HAS_IOMEM select GENERIC_IO default y -config HAS_IOPORT +config HAS_IOPORT_MAP boolean - depends on HAS_IOMEM && !NO_IOPORT + depends on HAS_IOMEM && !NO_IOPORT_MAP default y config HAS_DMA diff --git a/lib/devres.c b/lib/devres.c index 48cb3c7bd7de..2f16c133fd36 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -170,7 +170,7 @@ void __iomem *devm_request_and_ioremap(struct device *device, } EXPORT_SYMBOL(devm_request_and_ioremap); -#ifdef CONFIG_HAS_IOPORT +#ifdef CONFIG_HAS_IOPORT_MAP /* * Generic iomap devres */ @@ -229,7 +229,7 @@ void devm_ioport_unmap(struct device *dev, void __iomem *addr) devm_ioport_map_match, (__force void *)addr)); } EXPORT_SYMBOL(devm_ioport_unmap); -#endif /* CONFIG_HAS_IOPORT */ +#endif /* CONFIG_HAS_IOPORT_MAP */ #ifdef CONFIG_PCI /* diff --git a/lib/iomap.c b/lib/iomap.c index 2c08f36862eb..fc3dcb4b238e 100644 --- a/lib/iomap.c +++ b/lib/iomap.c @@ -224,7 +224,7 @@ EXPORT_SYMBOL(iowrite8_rep); EXPORT_SYMBOL(iowrite16_rep); EXPORT_SYMBOL(iowrite32_rep); -#ifdef CONFIG_HAS_IOPORT +#ifdef CONFIG_HAS_IOPORT_MAP /* Create a virtual mapping cookie for an IO port range */ void __iomem *ioport_map(unsigned long port, unsigned int nr) { @@ -239,7 +239,7 @@ void ioport_unmap(void __iomem *addr) } EXPORT_SYMBOL(ioport_map); EXPORT_SYMBOL(ioport_unmap); -#endif /* CONFIG_HAS_IOPORT */ +#endif /* CONFIG_HAS_IOPORT_MAP */ #ifdef CONFIG_PCI /* Hide the details if this is a MMIO or PIO address space and just do what diff --git a/sound/isa/Kconfig b/sound/isa/Kconfig index affa13480659..0216475fc759 100644 --- a/sound/isa/Kconfig +++ b/sound/isa/Kconfig @@ -191,7 +191,7 @@ config SND_ES18XX config SND_SC6000 tristate "Gallant SC-6000/6600/7000 and Audio Excel DSP 16" - depends on HAS_IOPORT + depends on HAS_IOPORT_MAP select SND_WSS_LIB select SND_OPL3_LIB select SND_MPU401_UART diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig index 0b0c0cf13f74..3a3a3a71088b 100644 --- a/sound/pci/Kconfig +++ b/sound/pci/Kconfig @@ -688,7 +688,7 @@ config SND_LOLA config SND_LX6464ES tristate "Digigram LX6464ES" - depends on HAS_IOPORT + depends on HAS_IOPORT_MAP select SND_PCM help Say Y here to include support for Digigram LX6464ES boards. -- cgit v1.2.3-59-g8ed1b From 5722d094ad2b56fa2c1cb3adaf40071a55bbf242 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Mon, 7 Apr 2014 15:39:24 -0700 Subject: memcg, slab: cleanup memcg cache creation This patch cleans up the memcg cache creation path as follows: - Move memcg cache name creation to a separate function to be called from kmem_cache_create_memcg(). This allows us to get rid of the mutex protecting the temporary buffer used for the name formatting, because the whole cache creation path is protected by the slab_mutex. - Get rid of memcg_create_kmem_cache(). This function serves as a proxy to kmem_cache_create_memcg(). After separating the cache name creation path, it would be reduced to a function call, so let's inline it. Signed-off-by: Vladimir Davydov Cc: Michal Hocko Cc: Johannes Weiner Cc: David Rientjes Cc: Pekka Enberg Cc: Glauber Costa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 9 +++++ mm/memcontrol.c | 89 ++++++++++++++++++++-------------------------- mm/slab_common.c | 5 ++- 3 files changed, 52 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 96f3fc87ab96..ab7f02884983 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -491,6 +491,9 @@ void __memcg_kmem_commit_charge(struct page *page, void __memcg_kmem_uncharge_pages(struct page *page, int order); int memcg_cache_id(struct mem_cgroup *memcg); + +char *memcg_create_cache_name(struct mem_cgroup *memcg, + struct kmem_cache *root_cache); int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s, struct kmem_cache *root_cache); void memcg_free_cache_params(struct kmem_cache *s); @@ -635,6 +638,12 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg) return -1; } +static inline char *memcg_create_cache_name(struct mem_cgroup *memcg, + struct kmem_cache *root_cache) +{ + return NULL; +} + static inline int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s, struct kmem_cache *root_cache) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e33b1d09eb1f..32c7342df4bf 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3094,6 +3094,29 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups) return 0; } +char *memcg_create_cache_name(struct mem_cgroup *memcg, + struct kmem_cache *root_cache) +{ + static char *buf = NULL; + + /* + * We need a mutex here to protect the shared buffer. Since this is + * expected to be called only on cache creation, we can employ the + * slab_mutex for that purpose. + */ + lockdep_assert_held(&slab_mutex); + + if (!buf) { + buf = kmalloc(NAME_MAX + 1, GFP_KERNEL); + if (!buf) + return NULL; + } + + cgroup_name(memcg->css.cgroup, buf, NAME_MAX + 1); + return kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name, + memcg_cache_id(memcg), buf); +} + int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s, struct kmem_cache *root_cache) { @@ -3298,46 +3321,6 @@ void mem_cgroup_destroy_cache(struct kmem_cache *cachep) schedule_work(&cachep->memcg_params->destroy); } -static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, - struct kmem_cache *s) -{ - struct kmem_cache *new = NULL; - static char *tmp_path = NULL, *tmp_name = NULL; - static DEFINE_MUTEX(mutex); /* protects tmp_name */ - - BUG_ON(!memcg_can_account_kmem(memcg)); - - mutex_lock(&mutex); - /* - * kmem_cache_create_memcg duplicates the given name and - * cgroup_name for this name requires RCU context. - * This static temporary buffer is used to prevent from - * pointless shortliving allocation. - */ - if (!tmp_path || !tmp_name) { - if (!tmp_path) - tmp_path = kmalloc(PATH_MAX, GFP_KERNEL); - if (!tmp_name) - tmp_name = kmalloc(NAME_MAX + 1, GFP_KERNEL); - if (!tmp_path || !tmp_name) - goto out; - } - - cgroup_name(memcg->css.cgroup, tmp_name, NAME_MAX + 1); - snprintf(tmp_path, PATH_MAX, "%s(%d:%s)", s->name, - memcg_cache_id(memcg), tmp_name); - - new = kmem_cache_create_memcg(memcg, tmp_path, s->object_size, s->align, - (s->flags & ~SLAB_PANIC), s->ctor, s); - if (new) - new->allocflags |= __GFP_KMEMCG; - else - new = s; -out: - mutex_unlock(&mutex); - return new; -} - void kmem_cache_destroy_memcg_children(struct kmem_cache *s) { struct kmem_cache *c; @@ -3384,12 +3367,6 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s) mutex_unlock(&activate_kmem_mutex); } -struct create_work { - struct mem_cgroup *memcg; - struct kmem_cache *cachep; - struct work_struct work; -}; - static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg) { struct kmem_cache *cachep; @@ -3407,13 +3384,25 @@ static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg) mutex_unlock(&memcg->slab_caches_mutex); } +struct create_work { + struct mem_cgroup *memcg; + struct kmem_cache *cachep; + struct work_struct work; +}; + static void memcg_create_cache_work_func(struct work_struct *w) { - struct create_work *cw; + struct create_work *cw = container_of(w, struct create_work, work); + struct mem_cgroup *memcg = cw->memcg; + struct kmem_cache *cachep = cw->cachep; + struct kmem_cache *new; - cw = container_of(w, struct create_work, work); - memcg_create_kmem_cache(cw->memcg, cw->cachep); - css_put(&cw->memcg->css); + new = kmem_cache_create_memcg(memcg, cachep->name, + cachep->object_size, cachep->align, + cachep->flags & ~SLAB_PANIC, cachep->ctor, cachep); + if (new) + new->allocflags |= __GFP_KMEMCG; + css_put(&memcg->css); kfree(cw); } diff --git a/mm/slab_common.c b/mm/slab_common.c index e77b51eb7347..11857abf7057 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -215,7 +215,10 @@ kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size, s->align = calculate_alignment(flags, align, size); s->ctor = ctor; - s->name = kstrdup(name, GFP_KERNEL); + if (memcg) + s->name = memcg_create_cache_name(memcg, parent_cache); + else + s->name = kstrdup(name, GFP_KERNEL); if (!s->name) goto out_free_cache; -- cgit v1.2.3-59-g8ed1b From 794b1248be4e7e157f5535c3ee49168aa4643349 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Mon, 7 Apr 2014 15:39:26 -0700 Subject: memcg, slab: separate memcg vs root cache creation paths Memcg-awareness turned kmem_cache_create() into a dirty interweaving of memcg-only and except-for-memcg calls. To clean this up, let's move the code responsible for memcg cache creation to a separate function. Signed-off-by: Vladimir Davydov Cc: Michal Hocko Cc: Johannes Weiner Cc: David Rientjes Cc: Pekka Enberg Cc: Glauber Costa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 -- include/linux/slab.h | 6 +- mm/memcontrol.c | 7 +- mm/slab_common.c | 187 ++++++++++++++++++++++++++------------------- 4 files changed, 111 insertions(+), 95 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ab7f02884983..02d3072841e9 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -638,12 +638,6 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg) return -1; } -static inline char *memcg_create_cache_name(struct mem_cgroup *memcg, - struct kmem_cache *root_cache) -{ - return NULL; -} - static inline int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s, struct kmem_cache *root_cache) { diff --git a/include/linux/slab.h b/include/linux/slab.h index b5b2df60299e..3dd389aa91c7 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -115,9 +115,9 @@ int slab_is_available(void); struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, unsigned long, void (*)(void *)); -struct kmem_cache * -kmem_cache_create_memcg(struct mem_cgroup *, const char *, size_t, size_t, - unsigned long, void (*)(void *), struct kmem_cache *); +#ifdef CONFIG_MEMCG_KMEM +void kmem_cache_create_memcg(struct mem_cgroup *, struct kmem_cache *); +#endif void kmem_cache_destroy(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *); void kmem_cache_free(struct kmem_cache *, void *); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 32c7342df4bf..451523c3bd4e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3395,13 +3395,8 @@ static void memcg_create_cache_work_func(struct work_struct *w) struct create_work *cw = container_of(w, struct create_work, work); struct mem_cgroup *memcg = cw->memcg; struct kmem_cache *cachep = cw->cachep; - struct kmem_cache *new; - new = kmem_cache_create_memcg(memcg, cachep->name, - cachep->object_size, cachep->align, - cachep->flags & ~SLAB_PANIC, cachep->ctor, cachep); - if (new) - new->allocflags |= __GFP_KMEMCG; + kmem_cache_create_memcg(memcg, cachep); css_put(&memcg->css); kfree(cw); } diff --git a/mm/slab_common.c b/mm/slab_common.c index 11857abf7057..ccc012f00126 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -29,8 +29,7 @@ DEFINE_MUTEX(slab_mutex); struct kmem_cache *kmem_cache; #ifdef CONFIG_DEBUG_VM -static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name, - size_t size) +static int kmem_cache_sanity_check(const char *name, size_t size) { struct kmem_cache *s = NULL; @@ -57,13 +56,7 @@ static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name, } #if !defined(CONFIG_SLUB) || !defined(CONFIG_SLUB_DEBUG_ON) - /* - * For simplicity, we won't check this in the list of memcg - * caches. We have control over memcg naming, and if there - * aren't duplicates in the global list, there won't be any - * duplicates in the memcg lists as well. - */ - if (!memcg && !strcmp(s->name, name)) { + if (!strcmp(s->name, name)) { pr_err("%s (%s): Cache name already exists.\n", __func__, name); dump_stack(); @@ -77,8 +70,7 @@ static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name, return 0; } #else -static inline int kmem_cache_sanity_check(struct mem_cgroup *memcg, - const char *name, size_t size) +static inline int kmem_cache_sanity_check(const char *name, size_t size) { return 0; } @@ -139,6 +131,46 @@ unsigned long calculate_alignment(unsigned long flags, return ALIGN(align, sizeof(void *)); } +static struct kmem_cache * +do_kmem_cache_create(char *name, size_t object_size, size_t size, size_t align, + unsigned long flags, void (*ctor)(void *), + struct mem_cgroup *memcg, struct kmem_cache *root_cache) +{ + struct kmem_cache *s; + int err; + + err = -ENOMEM; + s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL); + if (!s) + goto out; + + s->name = name; + s->object_size = object_size; + s->size = size; + s->align = align; + s->ctor = ctor; + + err = memcg_alloc_cache_params(memcg, s, root_cache); + if (err) + goto out_free_cache; + + err = __kmem_cache_create(s, flags); + if (err) + goto out_free_cache; + + s->refcount = 1; + list_add(&s->list, &slab_caches); + memcg_register_cache(s); +out: + if (err) + return ERR_PTR(err); + return s; + +out_free_cache: + memcg_free_cache_params(s); + kfree(s); + goto out; +} /* * kmem_cache_create - Create a cache. @@ -164,34 +196,21 @@ unsigned long calculate_alignment(unsigned long flags, * cacheline. This can be beneficial if you're counting cycles as closely * as davem. */ - struct kmem_cache * -kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size, - size_t align, unsigned long flags, void (*ctor)(void *), - struct kmem_cache *parent_cache) +kmem_cache_create(const char *name, size_t size, size_t align, + unsigned long flags, void (*ctor)(void *)) { - struct kmem_cache *s = NULL; + struct kmem_cache *s; + char *cache_name; int err; get_online_cpus(); mutex_lock(&slab_mutex); - err = kmem_cache_sanity_check(memcg, name, size); + err = kmem_cache_sanity_check(name, size); if (err) goto out_unlock; - if (memcg) { - /* - * Since per-memcg caches are created asynchronously on first - * allocation (see memcg_kmem_get_cache()), several threads can - * try to create the same cache, but only one of them may - * succeed. Therefore if we get here and see the cache has - * already been created, we silently return NULL. - */ - if (cache_from_memcg_idx(parent_cache, memcg_cache_id(memcg))) - goto out_unlock; - } - /* * Some allocators will constraint the set of valid flags to a subset * of all flags. We expect them to define CACHE_CREATE_MASK in this @@ -200,55 +219,29 @@ kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size, */ flags &= CACHE_CREATE_MASK; - if (!memcg) { - s = __kmem_cache_alias(name, size, align, flags, ctor); - if (s) - goto out_unlock; - } - - err = -ENOMEM; - s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL); - if (!s) + s = __kmem_cache_alias(name, size, align, flags, ctor); + if (s) goto out_unlock; - s->object_size = s->size = size; - s->align = calculate_alignment(flags, align, size); - s->ctor = ctor; - - if (memcg) - s->name = memcg_create_cache_name(memcg, parent_cache); - else - s->name = kstrdup(name, GFP_KERNEL); - if (!s->name) - goto out_free_cache; - - err = memcg_alloc_cache_params(memcg, s, parent_cache); - if (err) - goto out_free_cache; - - err = __kmem_cache_create(s, flags); - if (err) - goto out_free_cache; + cache_name = kstrdup(name, GFP_KERNEL); + if (!cache_name) { + err = -ENOMEM; + goto out_unlock; + } - s->refcount = 1; - list_add(&s->list, &slab_caches); - memcg_register_cache(s); + s = do_kmem_cache_create(cache_name, size, size, + calculate_alignment(flags, align, size), + flags, ctor, NULL, NULL); + if (IS_ERR(s)) { + err = PTR_ERR(s); + kfree(cache_name); + } out_unlock: mutex_unlock(&slab_mutex); put_online_cpus(); if (err) { - /* - * There is no point in flooding logs with warnings or - * especially crashing the system if we fail to create a cache - * for a memcg. In this case we will be accounting the memcg - * allocation to the root cgroup until we succeed to create its - * own cache, but it isn't that critical. - */ - if (!memcg) - return NULL; - if (flags & SLAB_PANIC) panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n", name, err); @@ -260,21 +253,55 @@ out_unlock: return NULL; } return s; - -out_free_cache: - memcg_free_cache_params(s); - kfree(s->name); - kmem_cache_free(kmem_cache, s); - goto out_unlock; } +EXPORT_SYMBOL(kmem_cache_create); -struct kmem_cache * -kmem_cache_create(const char *name, size_t size, size_t align, - unsigned long flags, void (*ctor)(void *)) +#ifdef CONFIG_MEMCG_KMEM +/* + * kmem_cache_create_memcg - Create a cache for a memory cgroup. + * @memcg: The memory cgroup the new cache is for. + * @root_cache: The parent of the new cache. + * + * This function attempts to create a kmem cache that will serve allocation + * requests going from @memcg to @root_cache. The new cache inherits properties + * from its parent. + */ +void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_cache) { - return kmem_cache_create_memcg(NULL, name, size, align, flags, ctor, NULL); + struct kmem_cache *s; + char *cache_name; + + get_online_cpus(); + mutex_lock(&slab_mutex); + + /* + * Since per-memcg caches are created asynchronously on first + * allocation (see memcg_kmem_get_cache()), several threads can try to + * create the same cache, but only one of them may succeed. + */ + if (cache_from_memcg_idx(root_cache, memcg_cache_id(memcg))) + goto out_unlock; + + cache_name = memcg_create_cache_name(memcg, root_cache); + if (!cache_name) + goto out_unlock; + + s = do_kmem_cache_create(cache_name, root_cache->object_size, + root_cache->size, root_cache->align, + root_cache->flags, root_cache->ctor, + memcg, root_cache); + if (IS_ERR(s)) { + kfree(cache_name); + goto out_unlock; + } + + s->allocflags |= __GFP_KMEMCG; + +out_unlock: + mutex_unlock(&slab_mutex); + put_online_cpus(); } -EXPORT_SYMBOL(kmem_cache_create); +#endif /* CONFIG_MEMCG_KMEM */ void kmem_cache_destroy(struct kmem_cache *s) { -- cgit v1.2.3-59-g8ed1b From b8529907ba35d625fa4b85d3e4dc8021be97c1f3 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Mon, 7 Apr 2014 15:39:28 -0700 Subject: memcg, slab: do not destroy children caches if parent has aliases Currently we destroy children caches at the very beginning of kmem_cache_destroy(). This is wrong, because the root cache will not necessarily be destroyed in the end - if it has aliases (refcount > 0), kmem_cache_destroy() will simply decrement its refcount and return. In this case, at best we will get a bunch of warnings in dmesg, like this one: kmem_cache_destroy kmalloc-32:0: Slab cache still has objects CPU: 1 PID: 7139 Comm: modprobe Tainted: G B W 3.13.0+ #117 Call Trace: dump_stack+0x49/0x5b kmem_cache_destroy+0xdf/0xf0 kmem_cache_destroy_memcg_children+0x97/0xc0 kmem_cache_destroy+0xf/0xf0 xfs_mru_cache_uninit+0x21/0x30 [xfs] exit_xfs_fs+0x2e/0xc44 [xfs] SyS_delete_module+0x198/0x1f0 system_call_fastpath+0x16/0x1b At worst - if kmem_cache_destroy() will race with an allocation from a memcg cache - the kernel will panic. This patch fixes this by moving children caches destruction after the check if the cache has aliases. Plus, it forbids destroying a root cache if it still has children caches, because each children cache keeps a reference to its parent. Signed-off-by: Vladimir Davydov Cc: Michal Hocko Cc: Johannes Weiner Cc: David Rientjes Cc: Pekka Enberg Cc: Glauber Costa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 +--- mm/memcontrol.c | 13 ++++---- mm/slab_common.c | 75 ++++++++++++++++++++++++++++++---------------- 3 files changed, 57 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 02d3072841e9..b569b8be5c5a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -507,7 +507,7 @@ struct kmem_cache * __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp); void mem_cgroup_destroy_cache(struct kmem_cache *cachep); -void kmem_cache_destroy_memcg_children(struct kmem_cache *s); +int __kmem_cache_destroy_memcg_children(struct kmem_cache *s); /** * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed. @@ -661,10 +661,6 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) { return cachep; } - -static inline void kmem_cache_destroy_memcg_children(struct kmem_cache *s) -{ -} #endif /* CONFIG_MEMCG_KMEM */ #endif /* _LINUX_MEMCONTROL_H */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c22d8bf42d9a..29501f040568 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3321,15 +3321,10 @@ void mem_cgroup_destroy_cache(struct kmem_cache *cachep) schedule_work(&cachep->memcg_params->destroy); } -void kmem_cache_destroy_memcg_children(struct kmem_cache *s) +int __kmem_cache_destroy_memcg_children(struct kmem_cache *s) { struct kmem_cache *c; - int i; - - if (!s->memcg_params) - return; - if (!s->memcg_params->is_root_cache) - return; + int i, failed = 0; /* * If the cache is being destroyed, we trust that there is no one else @@ -3363,8 +3358,12 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s) c->memcg_params->dead = false; cancel_work_sync(&c->memcg_params->destroy); kmem_cache_destroy(c); + + if (cache_from_memcg_idx(s, i)) + failed++; } mutex_unlock(&activate_kmem_mutex); + return failed; } static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg) diff --git a/mm/slab_common.c b/mm/slab_common.c index 0c2879ff414c..f3cfccf76dda 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -301,39 +301,64 @@ out_unlock: mutex_unlock(&slab_mutex); put_online_cpus(); } + +static int kmem_cache_destroy_memcg_children(struct kmem_cache *s) +{ + int rc; + + if (!s->memcg_params || + !s->memcg_params->is_root_cache) + return 0; + + mutex_unlock(&slab_mutex); + rc = __kmem_cache_destroy_memcg_children(s); + mutex_lock(&slab_mutex); + + return rc; +} +#else +static int kmem_cache_destroy_memcg_children(struct kmem_cache *s) +{ + return 0; +} #endif /* CONFIG_MEMCG_KMEM */ void kmem_cache_destroy(struct kmem_cache *s) { - /* Destroy all the children caches if we aren't a memcg cache */ - kmem_cache_destroy_memcg_children(s); - get_online_cpus(); mutex_lock(&slab_mutex); + s->refcount--; - if (!s->refcount) { - list_del(&s->list); - memcg_unregister_cache(s); - - if (!__kmem_cache_shutdown(s)) { - mutex_unlock(&slab_mutex); - if (s->flags & SLAB_DESTROY_BY_RCU) - rcu_barrier(); - - memcg_free_cache_params(s); - kfree(s->name); - kmem_cache_free(kmem_cache, s); - } else { - list_add(&s->list, &slab_caches); - memcg_register_cache(s); - mutex_unlock(&slab_mutex); - printk(KERN_ERR "kmem_cache_destroy %s: Slab cache still has objects\n", - s->name); - dump_stack(); - } - } else { - mutex_unlock(&slab_mutex); + if (s->refcount) + goto out_unlock; + + if (kmem_cache_destroy_memcg_children(s) != 0) + goto out_unlock; + + list_del(&s->list); + memcg_unregister_cache(s); + + if (__kmem_cache_shutdown(s) != 0) { + list_add(&s->list, &slab_caches); + memcg_register_cache(s); + printk(KERN_ERR "kmem_cache_destroy %s: " + "Slab cache still has objects\n", s->name); + dump_stack(); + goto out_unlock; } + + mutex_unlock(&slab_mutex); + if (s->flags & SLAB_DESTROY_BY_RCU) + rcu_barrier(); + + memcg_free_cache_params(s); + kfree(s->name); + kmem_cache_free(kmem_cache, s); + goto out_put_cpus; + +out_unlock: + mutex_unlock(&slab_mutex); +out_put_cpus: put_online_cpus(); } EXPORT_SYMBOL(kmem_cache_destroy); -- cgit v1.2.3-59-g8ed1b From 9a41707bd3a0811919000daf094e9d50ea65f7da Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Mon, 7 Apr 2014 15:39:31 -0700 Subject: slub: rework sysfs layout for memcg caches Currently, we try to arrange sysfs entries for memcg caches in the same manner as for global caches. Apart from turning /sys/kernel/slab into a mess when there are a lot of kmem-active memcgs created, it actually does not work properly - we won't create more than one link to a memcg cache in case its parent is merged with another cache. For instance, if A is a root cache merged with another root cache B, we will have the following sysfs setup: X A -> X B -> X where X is some unique id (see create_unique_id()). Now if memcgs M and N start to allocate from cache A (or B, which is the same), we will get: X X:M X:N A -> X B -> X A:M -> X:M A:N -> X:N Since B is an alias for A, we won't get entries B:M and B:N, which is confusing. It is more logical to have entries for memcg caches under the corresponding root cache's sysfs directory. This would allow us to keep sysfs layout clean, and avoid such inconsistencies like one described above. This patch does the trick. It creates a "cgroup" kset in each root cache kobject to keep its children caches there. Signed-off-by: Vladimir Davydov Cc: Michal Hocko Cc: Johannes Weiner Cc: David Rientjes Cc: Pekka Enberg Cc: Glauber Costa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 3 +++ mm/slub.c | 26 +++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index f56bfa9e4526..f2f7398848cf 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -87,6 +87,9 @@ struct kmem_cache { #ifdef CONFIG_MEMCG_KMEM struct memcg_cache_params *memcg_params; int max_attr_size; /* for propagation, maximum size of a stored attr */ +#ifdef CONFIG_SYSFS + struct kset *memcg_kset; +#endif #endif #ifdef CONFIG_NUMA diff --git a/mm/slub.c b/mm/slub.c index 33939e72bc37..3508edec19f9 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5138,6 +5138,15 @@ static const struct kset_uevent_ops slab_uevent_ops = { static struct kset *slab_kset; +static inline struct kset *cache_kset(struct kmem_cache *s) +{ +#ifdef CONFIG_MEMCG_KMEM + if (!is_root_cache(s)) + return s->memcg_params->root_cache->memcg_kset; +#endif + return slab_kset; +} + #define ID_STR_LENGTH 64 /* Create a unique string id for a slab cache: @@ -5203,7 +5212,7 @@ static int sysfs_slab_add(struct kmem_cache *s) name = create_unique_id(s); } - s->kobj.kset = slab_kset; + s->kobj.kset = cache_kset(s); err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name); if (err) { kobject_put(&s->kobj); @@ -5216,6 +5225,18 @@ static int sysfs_slab_add(struct kmem_cache *s) kobject_put(&s->kobj); return err; } + +#ifdef CONFIG_MEMCG_KMEM + if (is_root_cache(s)) { + s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj); + if (!s->memcg_kset) { + kobject_del(&s->kobj); + kobject_put(&s->kobj); + return -ENOMEM; + } + } +#endif + kobject_uevent(&s->kobj, KOBJ_ADD); if (!unmergeable) { /* Setup first alias */ @@ -5234,6 +5255,9 @@ static void sysfs_slab_remove(struct kmem_cache *s) */ return; +#ifdef CONFIG_MEMCG_KMEM + kset_unregister(s->memcg_kset); +#endif kobject_uevent(&s->kobj, KOBJ_REMOVE); kobject_del(&s->kobj); kobject_put(&s->kobj); -- cgit v1.2.3-59-g8ed1b From b3ca1c10d7b32fdfdfaf5484eda486323f52d9be Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 7 Apr 2014 15:39:34 -0700 Subject: percpu: add raw_cpu_ops The kernel has never been audited to ensure that this_cpu operations are consistently used throughout the kernel. The code generated in many places can be improved through the use of this_cpu operations (which uses a segment register for relocation of per cpu offsets instead of performing address calculations). The patch set also addresses various consistency issues in general with the per cpu macros. A. The semantics of __this_cpu_ptr() differs from this_cpu_ptr only because checks are skipped. This is typically shown through a raw_ prefix. So this patch set changes the places where __this_cpu_ptr() is used to raw_cpu_ptr(). B. There has been the long term wish by some that __this_cpu operations would check for preemption. However, there are cases where preemption checks need to be skipped. This patch set adds raw_cpu operations that do not check for preemption and then adds preemption checks to the __this_cpu operations. C. The use of __get_cpu_var is always a reference to a percpu variable that can also be handled via a this_cpu operation. This patch set replaces all uses of __get_cpu_var with this_cpu operations. D. We can then use this_cpu RMW operations in various places replacing sequences of instructions by a single one. E. The use of this_cpu operations throughout will allow other arches than x86 to implement optimized references and RMV operations to work with per cpu local data. F. The use of this_cpu operations opens up the possibility to further optimize code that relies on synchronization through per cpu data. The patch set works in a couple of stages: I. Patch 1 adds the additional raw_cpu operations and raw_cpu_ptr(). Also converts the existing __this_cpu_xx_# primitive in the x86 code to raw_cpu_xx_#. II. Patch 2-4 use the raw_cpu operations in places that would give us false positives once they are enabled. III. Patch 5 adds preemption checks to __this_cpu operations to allow checking if preemption is properly disabled when these functions are used. IV. Patches 6-20 are patches that simply replace uses of __get_cpu_var with this_cpu_ptr. They do not depend on any changes to the percpu code. No preemption tests are skipped if they are applied. V. Patches 21-46 are conversion patches that use this_cpu operations in various kernel subsystems/drivers or arch code. VI. Patches 47/48 (not included in this series) remove no longer used functions (__this_cpu_ptr and __get_cpu_var). These should only be applied after all the conversion patches have made it and after we have done additional passes through the kernel to ensure that none of the uses of these functions remain. This patch (of 46): The patches following this one will add preemption checks to __this_cpu ops so we need to have an alternative way to use this_cpu operations without preemption checks. raw_cpu_ops will be the basis for all other ops since these will be the operations that do not implement any checks. Primitive operations are renamed by this patch from __this_cpu_xxx to raw_cpu_xxxx. Also change the uses of the x86 percpu primitives in preempt.h. These depend directly on asm/percpu.h (header #include nesting issue). Signed-off-by: Peter Zijlstra Signed-off-by: Christoph Lameter Acked-by: Ingo Molnar Cc: Tejun Heo Cc: "James E.J. Bottomley" Cc: "Paul E. McKenney" Cc: Alex Shi Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Bryan Wu Cc: Catalin Marinas Cc: Chris Metcalf Cc: Daniel Lezcano Cc: David Daney Cc: David Miller Cc: David S. Miller Cc: Dimitri Sivanich Cc: Dipankar Sarma Cc: Eric Dumazet Cc: Fenghua Yu Cc: Frederic Weisbecker Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Hedi Berriche Cc: Heiko Carstens Cc: Helge Deller Cc: Ivan Kokshaysky Cc: James Hogan Cc: Jens Axboe Cc: John Stultz Cc: Martin Schwidefsky Cc: Masami Hiramatsu Cc: Matt Turner Cc: Mike Frysinger Cc: Mike Travis Cc: Neil Brown Cc: Nicolas Pitre Cc: Paul Mackerras Cc: Paul Mundt Cc: Rafael J. Wysocki Cc: Ralf Baechle Cc: Richard Henderson Cc: Robert Richter Cc: Russell King Cc: Russell King Cc: Rusty Russell Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tony Luck Cc: Will Deacon Cc: Wim Van Sebroeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/percpu.h | 98 ++++++------ arch/x86/include/asm/preempt.h | 16 +- include/asm-generic/percpu.h | 13 +- include/linux/percpu.h | 331 ++++++++++++++++++++++++----------------- 4 files changed, 260 insertions(+), 198 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 94220d14d5cc..851bcdc5db04 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -52,7 +52,7 @@ * Compared to the generic __my_cpu_offset version, the following * saves one instruction and avoids clobbering a temp register. */ -#define __this_cpu_ptr(ptr) \ +#define raw_cpu_ptr(ptr) \ ({ \ unsigned long tcp_ptr__; \ __verify_pcpu_ptr(ptr); \ @@ -362,25 +362,25 @@ do { \ */ #define this_cpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var))) -#define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) -#define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) -#define __this_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) - -#define __this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) -#define __this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) -#define __this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) -#define __this_cpu_add_1(pcp, val) percpu_add_op((pcp), val) -#define __this_cpu_add_2(pcp, val) percpu_add_op((pcp), val) -#define __this_cpu_add_4(pcp, val) percpu_add_op((pcp), val) -#define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) -#define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) -#define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) -#define __this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) -#define __this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) -#define __this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) -#define __this_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val) -#define __this_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val) -#define __this_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val) +#define raw_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define raw_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define raw_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) + +#define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) +#define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) +#define raw_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) +#define raw_cpu_add_1(pcp, val) percpu_add_op((pcp), val) +#define raw_cpu_add_2(pcp, val) percpu_add_op((pcp), val) +#define raw_cpu_add_4(pcp, val) percpu_add_op((pcp), val) +#define raw_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) +#define raw_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) +#define raw_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) +#define raw_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) +#define raw_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) +#define raw_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) +#define raw_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val) +#define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val) +#define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val) #define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) #define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) @@ -401,16 +401,16 @@ do { \ #define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) #define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) -#define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) -#define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) -#define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) -#define __this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) -#define __this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) -#define __this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) +#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) +#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) +#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) +#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) +#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) +#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) -#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) -#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) -#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) +#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) +#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) +#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) #define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) #define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) #define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) @@ -427,7 +427,7 @@ do { \ __ret; \ }) -#define __this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double +#define raw_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double #define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double #endif /* CONFIG_X86_CMPXCHG64 */ @@ -436,22 +436,22 @@ do { \ * 32 bit must fall back to generic operations. */ #ifdef CONFIG_X86_64 -#define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) -#define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) -#define __this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) -#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) -#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) -#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) -#define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) -#define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) - -#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) -#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) -#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) -#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) -#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) -#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) -#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) +#define raw_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) +#define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val) +#define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) +#define raw_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) +#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) +#define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) +#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) + +#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) +#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) +#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) +#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) +#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) +#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) /* @@ -474,7 +474,7 @@ do { \ __ret; \ }) -#define __this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double +#define raw_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double #define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double #endif @@ -495,9 +495,9 @@ static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr, unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG; #ifdef CONFIG_X86_64 - return ((1UL << (nr % BITS_PER_LONG)) & __this_cpu_read_8(*a)) != 0; + return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_8(*a)) != 0; #else - return ((1UL << (nr % BITS_PER_LONG)) & __this_cpu_read_4(*a)) != 0; + return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_4(*a)) != 0; #endif } diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index c8b051933b1b..7024c12f7bfe 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -19,12 +19,12 @@ DECLARE_PER_CPU(int, __preempt_count); */ static __always_inline int preempt_count(void) { - return __this_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED; + return raw_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED; } static __always_inline void preempt_count_set(int pc) { - __this_cpu_write_4(__preempt_count, pc); + raw_cpu_write_4(__preempt_count, pc); } /* @@ -53,17 +53,17 @@ static __always_inline void preempt_count_set(int pc) static __always_inline void set_preempt_need_resched(void) { - __this_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED); + raw_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED); } static __always_inline void clear_preempt_need_resched(void) { - __this_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED); + raw_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED); } static __always_inline bool test_preempt_need_resched(void) { - return !(__this_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED); + return !(raw_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED); } /* @@ -72,12 +72,12 @@ static __always_inline bool test_preempt_need_resched(void) static __always_inline void __preempt_count_add(int val) { - __this_cpu_add_4(__preempt_count, val); + raw_cpu_add_4(__preempt_count, val); } static __always_inline void __preempt_count_sub(int val) { - __this_cpu_add_4(__preempt_count, -val); + raw_cpu_add_4(__preempt_count, -val); } /* @@ -95,7 +95,7 @@ static __always_inline bool __preempt_count_dec_and_test(void) */ static __always_inline bool should_resched(void) { - return unlikely(!__this_cpu_read_4(__preempt_count)); + return unlikely(!raw_cpu_read_4(__preempt_count)); } #ifdef CONFIG_PREEMPT diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index d17784ea37ff..0703aa75b5e8 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -56,17 +56,17 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #define per_cpu(var, cpu) \ (*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu))) -#ifndef __this_cpu_ptr -#define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) +#ifndef raw_cpu_ptr +#define raw_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) #endif #ifdef CONFIG_DEBUG_PREEMPT #define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) #else -#define this_cpu_ptr(ptr) __this_cpu_ptr(ptr) +#define this_cpu_ptr(ptr) raw_cpu_ptr(ptr) #endif #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) -#define __raw_get_cpu_var(var) (*__this_cpu_ptr(&(var))) +#define __raw_get_cpu_var(var) (*raw_cpu_ptr(&(var))) #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA extern void setup_per_cpu_areas(void); @@ -83,7 +83,7 @@ extern void setup_per_cpu_areas(void); #define __get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var))) #define __raw_get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var))) #define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) -#define __this_cpu_ptr(ptr) this_cpu_ptr(ptr) +#define raw_cpu_ptr(ptr) this_cpu_ptr(ptr) #endif /* SMP */ @@ -122,4 +122,7 @@ extern void setup_per_cpu_areas(void); #define PER_CPU_DEF_ATTRIBUTES #endif +/* Keep until we have removed all uses of __this_cpu_ptr */ +#define __this_cpu_ptr raw_cpu_ptr + #endif /* _ASM_GENERIC_PERCPU_H_ */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index e3817d2441b6..4e4d2afcc0c7 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -243,6 +243,8 @@ do { \ } while (0) /* + * this_cpu operations (C) 2008-2013 Christoph Lameter + * * Optimized manipulation for memory allocated through the per cpu * allocator or for addresses of per cpu variables. * @@ -296,7 +298,7 @@ do { \ do { \ unsigned long flags; \ raw_local_irq_save(flags); \ - *__this_cpu_ptr(&(pcp)) op val; \ + *raw_cpu_ptr(&(pcp)) op val; \ raw_local_irq_restore(flags); \ } while (0) @@ -381,8 +383,8 @@ do { \ typeof(pcp) ret__; \ unsigned long flags; \ raw_local_irq_save(flags); \ - __this_cpu_add(pcp, val); \ - ret__ = __this_cpu_read(pcp); \ + raw_cpu_add(pcp, val); \ + ret__ = raw_cpu_read(pcp); \ raw_local_irq_restore(flags); \ ret__; \ }) @@ -411,8 +413,8 @@ do { \ ({ typeof(pcp) ret__; \ unsigned long flags; \ raw_local_irq_save(flags); \ - ret__ = __this_cpu_read(pcp); \ - __this_cpu_write(pcp, nval); \ + ret__ = raw_cpu_read(pcp); \ + raw_cpu_write(pcp, nval); \ raw_local_irq_restore(flags); \ ret__; \ }) @@ -439,9 +441,9 @@ do { \ typeof(pcp) ret__; \ unsigned long flags; \ raw_local_irq_save(flags); \ - ret__ = __this_cpu_read(pcp); \ + ret__ = raw_cpu_read(pcp); \ if (ret__ == (oval)) \ - __this_cpu_write(pcp, nval); \ + raw_cpu_write(pcp, nval); \ raw_local_irq_restore(flags); \ ret__; \ }) @@ -476,7 +478,7 @@ do { \ int ret__; \ unsigned long flags; \ raw_local_irq_save(flags); \ - ret__ = __this_cpu_generic_cmpxchg_double(pcp1, pcp2, \ + ret__ = raw_cpu_generic_cmpxchg_double(pcp1, pcp2, \ oval1, oval2, nval1, nval2); \ raw_local_irq_restore(flags); \ ret__; \ @@ -504,12 +506,8 @@ do { \ #endif /* - * Generic percpu operations for context that are safe from preemption/interrupts. - * Either we do not care about races or the caller has the - * responsibility of handling preemption/interrupt issues. Arch code can still - * override these instructions since the arch per cpu code may be more - * efficient and may actually get race freeness for free (that is the - * case for x86 for example). + * Generic percpu operations for contexts where we do not want to do + * any checks for preemptiosn. * * If there is no other protection through preempt disable and/or * disabling interupts then one of these RMW operations can show unexpected @@ -517,211 +515,272 @@ do { \ * or an interrupt occurred and the same percpu variable was modified from * the interrupt context. */ -#ifndef __this_cpu_read -# ifndef __this_cpu_read_1 -# define __this_cpu_read_1(pcp) (*__this_cpu_ptr(&(pcp))) +#ifndef raw_cpu_read +# ifndef raw_cpu_read_1 +# define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp))) # endif -# ifndef __this_cpu_read_2 -# define __this_cpu_read_2(pcp) (*__this_cpu_ptr(&(pcp))) +# ifndef raw_cpu_read_2 +# define raw_cpu_read_2(pcp) (*raw_cpu_ptr(&(pcp))) # endif -# ifndef __this_cpu_read_4 -# define __this_cpu_read_4(pcp) (*__this_cpu_ptr(&(pcp))) +# ifndef raw_cpu_read_4 +# define raw_cpu_read_4(pcp) (*raw_cpu_ptr(&(pcp))) # endif -# ifndef __this_cpu_read_8 -# define __this_cpu_read_8(pcp) (*__this_cpu_ptr(&(pcp))) +# ifndef raw_cpu_read_8 +# define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp))) # endif -# define __this_cpu_read(pcp) __pcpu_size_call_return(__this_cpu_read_, (pcp)) +# define raw_cpu_read(pcp) __pcpu_size_call_return(raw_cpu_read_, (pcp)) #endif -#define __this_cpu_generic_to_op(pcp, val, op) \ +#define raw_cpu_generic_to_op(pcp, val, op) \ do { \ - *__this_cpu_ptr(&(pcp)) op val; \ + *raw_cpu_ptr(&(pcp)) op val; \ } while (0) -#ifndef __this_cpu_write -# ifndef __this_cpu_write_1 -# define __this_cpu_write_1(pcp, val) __this_cpu_generic_to_op((pcp), (val), =) + +#ifndef raw_cpu_write +# ifndef raw_cpu_write_1 +# define raw_cpu_write_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) # endif -# ifndef __this_cpu_write_2 -# define __this_cpu_write_2(pcp, val) __this_cpu_generic_to_op((pcp), (val), =) +# ifndef raw_cpu_write_2 +# define raw_cpu_write_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) # endif -# ifndef __this_cpu_write_4 -# define __this_cpu_write_4(pcp, val) __this_cpu_generic_to_op((pcp), (val), =) +# ifndef raw_cpu_write_4 +# define raw_cpu_write_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) # endif -# ifndef __this_cpu_write_8 -# define __this_cpu_write_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), =) +# ifndef raw_cpu_write_8 +# define raw_cpu_write_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) # endif -# define __this_cpu_write(pcp, val) __pcpu_size_call(__this_cpu_write_, (pcp), (val)) +# define raw_cpu_write(pcp, val) __pcpu_size_call(raw_cpu_write_, (pcp), (val)) #endif -#ifndef __this_cpu_add -# ifndef __this_cpu_add_1 -# define __this_cpu_add_1(pcp, val) __this_cpu_generic_to_op((pcp), (val), +=) +#ifndef raw_cpu_add +# ifndef raw_cpu_add_1 +# define raw_cpu_add_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) # endif -# ifndef __this_cpu_add_2 -# define __this_cpu_add_2(pcp, val) __this_cpu_generic_to_op((pcp), (val), +=) +# ifndef raw_cpu_add_2 +# define raw_cpu_add_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) # endif -# ifndef __this_cpu_add_4 -# define __this_cpu_add_4(pcp, val) __this_cpu_generic_to_op((pcp), (val), +=) +# ifndef raw_cpu_add_4 +# define raw_cpu_add_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) # endif -# ifndef __this_cpu_add_8 -# define __this_cpu_add_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), +=) +# ifndef raw_cpu_add_8 +# define raw_cpu_add_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) # endif -# define __this_cpu_add(pcp, val) __pcpu_size_call(__this_cpu_add_, (pcp), (val)) +# define raw_cpu_add(pcp, val) __pcpu_size_call(raw_cpu_add_, (pcp), (val)) #endif -#ifndef __this_cpu_sub -# define __this_cpu_sub(pcp, val) __this_cpu_add((pcp), -(typeof(pcp))(val)) +#ifndef raw_cpu_sub +# define raw_cpu_sub(pcp, val) raw_cpu_add((pcp), -(val)) #endif -#ifndef __this_cpu_inc -# define __this_cpu_inc(pcp) __this_cpu_add((pcp), 1) +#ifndef raw_cpu_inc +# define raw_cpu_inc(pcp) raw_cpu_add((pcp), 1) #endif -#ifndef __this_cpu_dec -# define __this_cpu_dec(pcp) __this_cpu_sub((pcp), 1) +#ifndef raw_cpu_dec +# define raw_cpu_dec(pcp) raw_cpu_sub((pcp), 1) #endif -#ifndef __this_cpu_and -# ifndef __this_cpu_and_1 -# define __this_cpu_and_1(pcp, val) __this_cpu_generic_to_op((pcp), (val), &=) +#ifndef raw_cpu_and +# ifndef raw_cpu_and_1 +# define raw_cpu_and_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) # endif -# ifndef __this_cpu_and_2 -# define __this_cpu_and_2(pcp, val) __this_cpu_generic_to_op((pcp), (val), &=) +# ifndef raw_cpu_and_2 +# define raw_cpu_and_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) # endif -# ifndef __this_cpu_and_4 -# define __this_cpu_and_4(pcp, val) __this_cpu_generic_to_op((pcp), (val), &=) +# ifndef raw_cpu_and_4 +# define raw_cpu_and_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) # endif -# ifndef __this_cpu_and_8 -# define __this_cpu_and_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), &=) +# ifndef raw_cpu_and_8 +# define raw_cpu_and_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) # endif -# define __this_cpu_and(pcp, val) __pcpu_size_call(__this_cpu_and_, (pcp), (val)) +# define raw_cpu_and(pcp, val) __pcpu_size_call(raw_cpu_and_, (pcp), (val)) #endif -#ifndef __this_cpu_or -# ifndef __this_cpu_or_1 -# define __this_cpu_or_1(pcp, val) __this_cpu_generic_to_op((pcp), (val), |=) +#ifndef raw_cpu_or +# ifndef raw_cpu_or_1 +# define raw_cpu_or_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) # endif -# ifndef __this_cpu_or_2 -# define __this_cpu_or_2(pcp, val) __this_cpu_generic_to_op((pcp), (val), |=) +# ifndef raw_cpu_or_2 +# define raw_cpu_or_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) # endif -# ifndef __this_cpu_or_4 -# define __this_cpu_or_4(pcp, val) __this_cpu_generic_to_op((pcp), (val), |=) +# ifndef raw_cpu_or_4 +# define raw_cpu_or_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) # endif -# ifndef __this_cpu_or_8 -# define __this_cpu_or_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), |=) +# ifndef raw_cpu_or_8 +# define raw_cpu_or_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) # endif -# define __this_cpu_or(pcp, val) __pcpu_size_call(__this_cpu_or_, (pcp), (val)) +# define raw_cpu_or(pcp, val) __pcpu_size_call(raw_cpu_or_, (pcp), (val)) #endif -#define __this_cpu_generic_add_return(pcp, val) \ +#define raw_cpu_generic_add_return(pcp, val) \ ({ \ - __this_cpu_add(pcp, val); \ - __this_cpu_read(pcp); \ + raw_cpu_add(pcp, val); \ + raw_cpu_read(pcp); \ }) -#ifndef __this_cpu_add_return -# ifndef __this_cpu_add_return_1 -# define __this_cpu_add_return_1(pcp, val) __this_cpu_generic_add_return(pcp, val) +#ifndef raw_cpu_add_return +# ifndef raw_cpu_add_return_1 +# define raw_cpu_add_return_1(pcp, val) raw_cpu_generic_add_return(pcp, val) # endif -# ifndef __this_cpu_add_return_2 -# define __this_cpu_add_return_2(pcp, val) __this_cpu_generic_add_return(pcp, val) +# ifndef raw_cpu_add_return_2 +# define raw_cpu_add_return_2(pcp, val) raw_cpu_generic_add_return(pcp, val) # endif -# ifndef __this_cpu_add_return_4 -# define __this_cpu_add_return_4(pcp, val) __this_cpu_generic_add_return(pcp, val) +# ifndef raw_cpu_add_return_4 +# define raw_cpu_add_return_4(pcp, val) raw_cpu_generic_add_return(pcp, val) # endif -# ifndef __this_cpu_add_return_8 -# define __this_cpu_add_return_8(pcp, val) __this_cpu_generic_add_return(pcp, val) +# ifndef raw_cpu_add_return_8 +# define raw_cpu_add_return_8(pcp, val) raw_cpu_generic_add_return(pcp, val) # endif -# define __this_cpu_add_return(pcp, val) \ - __pcpu_size_call_return2(__this_cpu_add_return_, pcp, val) +# define raw_cpu_add_return(pcp, val) \ + __pcpu_size_call_return2(raw_add_return_, pcp, val) #endif -#define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(typeof(pcp))(val)) -#define __this_cpu_inc_return(pcp) __this_cpu_add_return(pcp, 1) -#define __this_cpu_dec_return(pcp) __this_cpu_add_return(pcp, -1) +#define raw_cpu_sub_return(pcp, val) raw_cpu_add_return(pcp, -(typeof(pcp))(val)) +#define raw_cpu_inc_return(pcp) raw_cpu_add_return(pcp, 1) +#define raw_cpu_dec_return(pcp) raw_cpu_add_return(pcp, -1) -#define __this_cpu_generic_xchg(pcp, nval) \ +#define raw_cpu_generic_xchg(pcp, nval) \ ({ typeof(pcp) ret__; \ - ret__ = __this_cpu_read(pcp); \ - __this_cpu_write(pcp, nval); \ + ret__ = raw_cpu_read(pcp); \ + raw_cpu_write(pcp, nval); \ ret__; \ }) -#ifndef __this_cpu_xchg -# ifndef __this_cpu_xchg_1 -# define __this_cpu_xchg_1(pcp, nval) __this_cpu_generic_xchg(pcp, nval) +#ifndef raw_cpu_xchg +# ifndef raw_cpu_xchg_1 +# define raw_cpu_xchg_1(pcp, nval) raw_cpu_generic_xchg(pcp, nval) # endif -# ifndef __this_cpu_xchg_2 -# define __this_cpu_xchg_2(pcp, nval) __this_cpu_generic_xchg(pcp, nval) +# ifndef raw_cpu_xchg_2 +# define raw_cpu_xchg_2(pcp, nval) raw_cpu_generic_xchg(pcp, nval) # endif -# ifndef __this_cpu_xchg_4 -# define __this_cpu_xchg_4(pcp, nval) __this_cpu_generic_xchg(pcp, nval) +# ifndef raw_cpu_xchg_4 +# define raw_cpu_xchg_4(pcp, nval) raw_cpu_generic_xchg(pcp, nval) # endif -# ifndef __this_cpu_xchg_8 -# define __this_cpu_xchg_8(pcp, nval) __this_cpu_generic_xchg(pcp, nval) +# ifndef raw_cpu_xchg_8 +# define raw_cpu_xchg_8(pcp, nval) raw_cpu_generic_xchg(pcp, nval) # endif -# define __this_cpu_xchg(pcp, nval) \ - __pcpu_size_call_return2(__this_cpu_xchg_, (pcp), nval) +# define raw_cpu_xchg(pcp, nval) \ + __pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval) #endif -#define __this_cpu_generic_cmpxchg(pcp, oval, nval) \ +#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \ ({ \ typeof(pcp) ret__; \ - ret__ = __this_cpu_read(pcp); \ + ret__ = raw_cpu_read(pcp); \ if (ret__ == (oval)) \ - __this_cpu_write(pcp, nval); \ + raw_cpu_write(pcp, nval); \ ret__; \ }) -#ifndef __this_cpu_cmpxchg -# ifndef __this_cpu_cmpxchg_1 -# define __this_cpu_cmpxchg_1(pcp, oval, nval) __this_cpu_generic_cmpxchg(pcp, oval, nval) +#ifndef raw_cpu_cmpxchg +# ifndef raw_cpu_cmpxchg_1 +# define raw_cpu_cmpxchg_1(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) # endif -# ifndef __this_cpu_cmpxchg_2 -# define __this_cpu_cmpxchg_2(pcp, oval, nval) __this_cpu_generic_cmpxchg(pcp, oval, nval) +# ifndef raw_cpu_cmpxchg_2 +# define raw_cpu_cmpxchg_2(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) # endif -# ifndef __this_cpu_cmpxchg_4 -# define __this_cpu_cmpxchg_4(pcp, oval, nval) __this_cpu_generic_cmpxchg(pcp, oval, nval) +# ifndef raw_cpu_cmpxchg_4 +# define raw_cpu_cmpxchg_4(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) # endif -# ifndef __this_cpu_cmpxchg_8 -# define __this_cpu_cmpxchg_8(pcp, oval, nval) __this_cpu_generic_cmpxchg(pcp, oval, nval) +# ifndef raw_cpu_cmpxchg_8 +# define raw_cpu_cmpxchg_8(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) # endif -# define __this_cpu_cmpxchg(pcp, oval, nval) \ - __pcpu_size_call_return2(__this_cpu_cmpxchg_, pcp, oval, nval) +# define raw_cpu_cmpxchg(pcp, oval, nval) \ + __pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval) #endif -#define __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ +#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ ({ \ int __ret = 0; \ - if (__this_cpu_read(pcp1) == (oval1) && \ - __this_cpu_read(pcp2) == (oval2)) { \ - __this_cpu_write(pcp1, (nval1)); \ - __this_cpu_write(pcp2, (nval2)); \ + if (raw_cpu_read(pcp1) == (oval1) && \ + raw_cpu_read(pcp2) == (oval2)) { \ + raw_cpu_write(pcp1, (nval1)); \ + raw_cpu_write(pcp2, (nval2)); \ __ret = 1; \ } \ (__ret); \ }) -#ifndef __this_cpu_cmpxchg_double -# ifndef __this_cpu_cmpxchg_double_1 -# define __this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) +#ifndef raw_cpu_cmpxchg_double +# ifndef raw_cpu_cmpxchg_double_1 +# define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) # endif -# ifndef __this_cpu_cmpxchg_double_2 -# define __this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) +# ifndef raw_cpu_cmpxchg_double_2 +# define raw_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) # endif -# ifndef __this_cpu_cmpxchg_double_4 -# define __this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) +# ifndef raw_cpu_cmpxchg_double_4 +# define raw_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) # endif -# ifndef __this_cpu_cmpxchg_double_8 -# define __this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) +# ifndef raw_cpu_cmpxchg_double_8 +# define raw_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) # endif +# define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + __pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) +#endif + +/* + * Generic percpu operations for context that are safe from preemption/interrupts. + * Checks will be added here soon. + */ +#ifndef __this_cpu_read +# define __this_cpu_read(pcp) __pcpu_size_call_return(raw_cpu_read_, (pcp)) +#endif + +#ifndef __this_cpu_write +# define __this_cpu_write(pcp, val) __pcpu_size_call(raw_cpu_write_, (pcp), (val)) +#endif + +#ifndef __this_cpu_add +# define __this_cpu_add(pcp, val) __pcpu_size_call(raw_cpu_add_, (pcp), (val)) +#endif + +#ifndef __this_cpu_sub +# define __this_cpu_sub(pcp, val) __this_cpu_add((pcp), -(typeof(pcp))(val)) +#endif + +#ifndef __this_cpu_inc +# define __this_cpu_inc(pcp) __this_cpu_add((pcp), 1) +#endif + +#ifndef __this_cpu_dec +# define __this_cpu_dec(pcp) __this_cpu_sub((pcp), 1) +#endif + +#ifndef __this_cpu_and +# define __this_cpu_and(pcp, val) __pcpu_size_call(raw_cpu_and_, (pcp), (val)) +#endif + +#ifndef __this_cpu_or +# define __this_cpu_or(pcp, val) __pcpu_size_call(raw_cpu_or_, (pcp), (val)) +#endif + +#ifndef __this_cpu_add_return +# define __this_cpu_add_return(pcp, val) \ + __pcpu_size_call_return2(raw_cpu_add_return_, pcp, val) +#endif + +#define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(typeof(pcp))(val)) +#define __this_cpu_inc_return(pcp) __this_cpu_add_return(pcp, 1) +#define __this_cpu_dec_return(pcp) __this_cpu_add_return(pcp, -1) + +#ifndef __this_cpu_xchg +# define __this_cpu_xchg(pcp, nval) \ + __pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval) +#endif + +#ifndef __this_cpu_cmpxchg +# define __this_cpu_cmpxchg(pcp, oval, nval) \ + __pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval) +#endif + +#ifndef __this_cpu_cmpxchg_double # define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - __pcpu_double_call_return_bool(__this_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) + __pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) #endif #endif /* __LINUX_PERCPU_H */ -- cgit v1.2.3-59-g8ed1b From dc322a99d31fff5d3f8acfa061ad033953efdebe Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 7 Apr 2014 15:39:38 -0700 Subject: mm: use raw_cpu ops for determining current NUMA node With the preempt checking logic for __this_cpu_ops we will get false positives from locations in the code that use numa_node_id. Before the __this_cpu ops where introduced there were no checks for preemption present either. smp_raw_processor_id() was used. See http://www.spinics.net/lists/linux-numa/msg00641.html Therefore we need to use raw_cpu_read here to avoid false postives. Note that this issue has been discussed in prior years. If the process changes nodes after retrieving the current numa node then that is acceptable since most uses of numa_node etc are for optimization and not for correctness. There were suggestions to implement a raw_numa_node_id in order to do preempt checks for numa_node_id as well. But I think we better defer that to another patch since that would mean investigating how numa_node_id() is used throughout the kernel which would increase the scope of this patchset significantly. After all preemption was never checked before when numa_node_id() was used. Some sample traces: __this_cpu_read operation in preemptible [00000000] code: login/1456 caller is __this_cpu_preempt_check+0x2b/0x2d CPU: 0 PID: 1456 Comm: login Not tainted 3.12.0-rc4-cl-00062-g2fe80d3-dirty #185 Call Trace: dump_stack+0x4e/0x82 check_preemption_disabled+0xc5/0xe0 __this_cpu_preempt_check+0x2b/0x2d get_task_policy+0x1d/0x49 get_vma_policy+0x14/0x76 alloc_pages_vma+0x35/0xff handle_mm_fault+0x290/0x73b __do_page_fault+0x3fe/0x44d do_page_fault+0x9/0xc page_fault+0x22/0x30 generic_file_aio_read+0x38e/0x624 do_sync_read+0x54/0x73 vfs_read+0x9d/0x12a SyS_read+0x47/0x7e cstar_dispatch+0x7/0x23 caller is __this_cpu_preempt_check+0x2b/0x2d CPU: 0 PID: 1456 Comm: login Not tainted 3.12.0-rc4-cl-00062-g2fe80d3-dirty #185 Call Trace: dump_stack+0x4e/0x82 check_preemption_disabled+0xc5/0xe0 __this_cpu_preempt_check+0x2b/0x2d alloc_pages_current+0x8f/0xbc __page_cache_alloc+0xb/0xd __do_page_cache_readahead+0xf4/0x219 ra_submit+0x1c/0x20 ondemand_readahead+0x28c/0x2b4 page_cache_sync_readahead+0x38/0x3a generic_file_aio_read+0x261/0x624 do_sync_read+0x54/0x73 vfs_read+0x9d/0x12a SyS_read+0x47/0x7e cstar_dispatch+0x7/0x23 Signed-off-by: Christoph Lameter Acked-by: Ingo Molnar Cc: Alex Shi Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/topology.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index 12ae6ce997d6..7062330a1329 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -188,7 +188,7 @@ DECLARE_PER_CPU(int, numa_node); /* Returns the number of the current Node. */ static inline int numa_node_id(void) { - return __this_cpu_read(numa_node); + return raw_cpu_read(numa_node); } #endif @@ -245,7 +245,7 @@ static inline void set_numa_mem(int node) /* Returns the number of the nearest Node with memory */ static inline int numa_mem_id(void) { - return __this_cpu_read(_numa_mem_); + return raw_cpu_read(_numa_mem_); } #endif -- cgit v1.2.3-59-g8ed1b From 293b6a4c875c3b49853bff7de99954f49f59aa75 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 7 Apr 2014 15:39:43 -0700 Subject: vmstat: use raw_cpu_ops to avoid false positives on preemption checks vm counters are allowed to be racy. Use raw_cpu_ops to avoid the local_irq_disable overhead and to avoid preemption checks which will be added to the __this_cpu operations. [akpm@linux-foundation.org: Add comment. Again.] Signed-off-by: Christoph Lameter Reported-by: Sergey Senozhatsky Cc: Dave Chinner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmstat.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index ea4476157e00..45c9cd1daf7a 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -27,9 +27,13 @@ struct vm_event_state { DECLARE_PER_CPU(struct vm_event_state, vm_event_states); +/* + * vm counters are allowed to be racy. Use raw_cpu_ops to avoid the + * local_irq_disable overhead. + */ static inline void __count_vm_event(enum vm_event_item item) { - __this_cpu_inc(vm_event_states.event[item]); + raw_cpu_inc(vm_event_states.event[item]); } static inline void count_vm_event(enum vm_event_item item) @@ -39,7 +43,7 @@ static inline void count_vm_event(enum vm_event_item item) static inline void __count_vm_events(enum vm_event_item item, long delta) { - __this_cpu_add(vm_event_states.event[item], delta); + raw_cpu_add(vm_event_states.event[item], delta); } static inline void count_vm_events(enum vm_event_item item, long delta) -- cgit v1.2.3-59-g8ed1b From 188a81409ff7de1c5aae947a96356ddd8ff4aaa3 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 7 Apr 2014 15:39:44 -0700 Subject: percpu: add preemption checks to __this_cpu ops We define a check function in order to avoid trouble with the include files. Then the higher level __this_cpu macros are modified to invoke the preemption check. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Christoph Lameter Acked-by: Ingo Molnar Cc: Tejun Heo Tested-by: Grygorii Strashko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu.h | 39 +++++++++++++++++++++++++++++---------- lib/smp_processor_id.c | 18 ++++++++++++++---- 2 files changed, 43 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 4e4d2afcc0c7..e7a0b95ed527 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -173,6 +173,12 @@ extern phys_addr_t per_cpu_ptr_to_phys(void *addr); extern void __bad_size_call_parameter(void); +#ifdef CONFIG_DEBUG_PREEMPT +extern void __this_cpu_preempt_check(const char *op); +#else +static inline void __this_cpu_preempt_check(const char *op) { } +#endif + #define __pcpu_size_call_return(stem, variable) \ ({ typeof(variable) pscr_ret__; \ __verify_pcpu_ptr(&(variable)); \ @@ -725,18 +731,24 @@ do { \ /* * Generic percpu operations for context that are safe from preemption/interrupts. - * Checks will be added here soon. */ #ifndef __this_cpu_read -# define __this_cpu_read(pcp) __pcpu_size_call_return(raw_cpu_read_, (pcp)) +# define __this_cpu_read(pcp) \ + (__this_cpu_preempt_check("read"),__pcpu_size_call_return(raw_cpu_read_, (pcp))) #endif #ifndef __this_cpu_write -# define __this_cpu_write(pcp, val) __pcpu_size_call(raw_cpu_write_, (pcp), (val)) +# define __this_cpu_write(pcp, val) \ +do { __this_cpu_preempt_check("write"); \ + __pcpu_size_call(raw_cpu_write_, (pcp), (val)); \ +} while (0) #endif #ifndef __this_cpu_add -# define __this_cpu_add(pcp, val) __pcpu_size_call(raw_cpu_add_, (pcp), (val)) +# define __this_cpu_add(pcp, val) \ +do { __this_cpu_preempt_check("add"); \ + __pcpu_size_call(raw_cpu_add_, (pcp), (val)); \ +} while (0) #endif #ifndef __this_cpu_sub @@ -752,16 +764,23 @@ do { \ #endif #ifndef __this_cpu_and -# define __this_cpu_and(pcp, val) __pcpu_size_call(raw_cpu_and_, (pcp), (val)) +# define __this_cpu_and(pcp, val) \ +do { __this_cpu_preempt_check("and"); \ + __pcpu_size_call(raw_cpu_and_, (pcp), (val)); \ +} while (0) + #endif #ifndef __this_cpu_or -# define __this_cpu_or(pcp, val) __pcpu_size_call(raw_cpu_or_, (pcp), (val)) +# define __this_cpu_or(pcp, val) \ +do { __this_cpu_preempt_check("or"); \ + __pcpu_size_call(raw_cpu_or_, (pcp), (val)); \ +} while (0) #endif #ifndef __this_cpu_add_return # define __this_cpu_add_return(pcp, val) \ - __pcpu_size_call_return2(raw_cpu_add_return_, pcp, val) + (__this_cpu_preempt_check("add_return"),__pcpu_size_call_return2(raw_cpu_add_return_, pcp, val)) #endif #define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(typeof(pcp))(val)) @@ -770,17 +789,17 @@ do { \ #ifndef __this_cpu_xchg # define __this_cpu_xchg(pcp, nval) \ - __pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval) + (__this_cpu_preempt_check("xchg"),__pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval)) #endif #ifndef __this_cpu_cmpxchg # define __this_cpu_cmpxchg(pcp, oval, nval) \ - __pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval) + (__this_cpu_preempt_check("cmpxchg"),__pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval)) #endif #ifndef __this_cpu_cmpxchg_double # define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - __pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) + (__this_cpu_preempt_check("cmpxchg_double"),__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))) #endif #endif /* __LINUX_PERCPU_H */ diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 04abe53f12a1..1afec32de6f2 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -7,7 +7,8 @@ #include #include -notrace unsigned int debug_smp_processor_id(void) +notrace static unsigned int check_preemption_disabled(const char *what1, + const char *what2) { int this_cpu = raw_smp_processor_id(); @@ -38,9 +39,9 @@ notrace unsigned int debug_smp_processor_id(void) if (!printk_ratelimit()) goto out_enable; - printk(KERN_ERR "BUG: using smp_processor_id() in preemptible [%08x] " - "code: %s/%d\n", - preempt_count() - 1, current->comm, current->pid); + printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x] code: %s/%d\n", + what1, what2, preempt_count() - 1, current->comm, current->pid); + print_symbol("caller is %s\n", (long)__builtin_return_address(0)); dump_stack(); @@ -50,5 +51,14 @@ out: return this_cpu; } +notrace unsigned int debug_smp_processor_id(void) +{ + return check_preemption_disabled("smp_processor_id", ""); +} EXPORT_SYMBOL(debug_smp_processor_id); +notrace void __this_cpu_preempt_check(const char *op) +{ + check_preemption_disabled("__this_cpu_", op); +} +EXPORT_SYMBOL(__this_cpu_preempt_check); -- cgit v1.2.3-59-g8ed1b From 64b47e8fdb40a0d46e8cf458dd3e24f8afa073f6 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Mon, 7 Apr 2014 15:39:45 -0700 Subject: lglock: map to spinlock when !CONFIG_SMP When the system has only one CPU, lglock is effectively a spinlock; map it directly to spinlock to eliminate the indirection and duplicate code. In addition to removing overhead, this drops 1.6k of code with a defconfig modified to have !CONFIG_SMP, and 1.1k with a minimal config. Signed-off-by: Josh Triplett Cc: Rusty Russell Cc: Michal Marek Cc: Thomas Gleixner Cc: David Howells Cc: "H. Peter Anvin" Cc: Nick Piggin Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lglock.h | 16 ++++++++++++++++ kernel/locking/Makefile | 3 ++- 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lglock.h b/include/linux/lglock.h index 96549abe8842..0081f000e34b 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -25,6 +25,8 @@ #include #include +#ifdef CONFIG_SMP + #ifdef CONFIG_DEBUG_LOCK_ALLOC #define LOCKDEP_INIT_MAP lockdep_init_map #else @@ -57,4 +59,18 @@ void lg_local_unlock_cpu(struct lglock *lg, int cpu); void lg_global_lock(struct lglock *lg); void lg_global_unlock(struct lglock *lg); +#else +/* When !CONFIG_SMP, map lglock to spinlock */ +#define lglock spinlock +#define DEFINE_LGLOCK(name) DEFINE_SPINLOCK(name) +#define DEFINE_STATIC_LGLOCK(name) static DEFINE_SPINLOCK(name) +#define lg_lock_init(lg, name) spin_lock_init(lg) +#define lg_local_lock spin_lock +#define lg_local_unlock spin_unlock +#define lg_local_lock_cpu(lg, cpu) spin_lock(lg) +#define lg_local_unlock_cpu(lg, cpu) spin_unlock(lg) +#define lg_global_lock spin_lock +#define lg_global_unlock spin_unlock +#endif + #endif diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index 306a76b51e0f..b8bdcd4785b7 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -1,5 +1,5 @@ -obj-y += mutex.o semaphore.o rwsem.o lglock.o mcs_spinlock.o +obj-y += mutex.o semaphore.o rwsem.o mcs_spinlock.o ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_lockdep.o = -pg @@ -14,6 +14,7 @@ ifeq ($(CONFIG_PROC_FS),y) obj-$(CONFIG_LOCKDEP) += lockdep_proc.o endif obj-$(CONFIG_SMP) += spinlock.o +obj-$(CONFIG_SMP) += lglock.o obj-$(CONFIG_PROVE_LOCKING) += spinlock.o obj-$(CONFIG_RT_MUTEXES) += rtmutex.o obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o -- cgit v1.2.3-59-g8ed1b From 5fb6b953bb7aa86a9c8ea760934982cedc45c52b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 8 Apr 2014 12:55:46 +0200 Subject: include/linux/syscalls.h: add sys_renameat2() prototype Signed-off-by: Heiko Carstens Signed-off-by: Linus Torvalds --- include/linux/syscalls.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 2aa8b749f13d..697ceb70a9a9 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -748,6 +748,9 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname, int newdfd, const char __user *newname, int flags); asmlinkage long sys_renameat(int olddfd, const char __user * oldname, int newdfd, const char __user * newname); +asmlinkage long sys_renameat2(int olddfd, const char __user *oldname, + int newdfd, const char __user *newname, + unsigned int flags); asmlinkage long sys_futimesat(int dfd, const char __user *filename, struct timeval __user *utimes); asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode); -- cgit v1.2.3-59-g8ed1b From de7b2973903c6cc50b31ee5682a69b2219b9919d Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 8 Apr 2014 17:26:21 -0400 Subject: tracepoint: Use struct pointer instead of name hash for reg/unreg tracepoints Register/unregister tracepoint probes with struct tracepoint pointer rather than tracepoint name. This change, which vastly simplifies tracepoint.c, has been proposed by Steven Rostedt. It also removes 8.8kB (mostly of text) to the vmlinux size. From this point on, the tracers need to pass a struct tracepoint pointer to probe register/unregister. A probe can now only be connected to a tracepoint that exists. Moreover, tracers are responsible for unregistering the probe before the module containing its associated tracepoint is unloaded. text data bss dec hex filename 10443444 4282528 10391552 25117524 17f4354 vmlinux.orig 10434930 4282848 10391552 25109330 17f2352 vmlinux Link: http://lkml.kernel.org/r/1396992381-23785-2-git-send-email-mathieu.desnoyers@efficios.com CC: Ingo Molnar CC: Frederic Weisbecker CC: Andrew Morton CC: Frank Ch. Eigler CC: Johannes Berg Signed-off-by: Mathieu Desnoyers [ SDR - fixed return val in void func in tracepoint_module_going() ] Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 22 +- include/linux/tracepoint.h | 41 +-- include/trace/ftrace.h | 9 +- kernel/trace/trace_events.c | 55 ++-- kernel/trace/trace_events_trigger.c | 2 +- kernel/trace/trace_kprobe.c | 21 +- kernel/trace/trace_output.c | 2 +- kernel/trace/trace_uprobe.c | 20 +- kernel/tracepoint.c | 511 ++++++++++++++++-------------------- 9 files changed, 331 insertions(+), 352 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index cdc30111d2f8..d16da3e53bc7 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -7,6 +7,7 @@ #include #include #include +#include struct trace_array; struct trace_buffer; @@ -232,6 +233,7 @@ enum { TRACE_EVENT_FL_IGNORE_ENABLE_BIT, TRACE_EVENT_FL_WAS_ENABLED_BIT, TRACE_EVENT_FL_USE_CALL_FILTER_BIT, + TRACE_EVENT_FL_TRACEPOINT_BIT, }; /* @@ -244,6 +246,7 @@ enum { * (used for module unloading, if a module event is enabled, * it is best to clear the buffers that used it). * USE_CALL_FILTER - For ftrace internal events, don't use file filter + * TRACEPOINT - Event is a tracepoint */ enum { TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), @@ -252,12 +255,17 @@ enum { TRACE_EVENT_FL_IGNORE_ENABLE = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT), TRACE_EVENT_FL_WAS_ENABLED = (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT), TRACE_EVENT_FL_USE_CALL_FILTER = (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT), + TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT), }; struct ftrace_event_call { struct list_head list; struct ftrace_event_class *class; - char *name; + union { + char *name; + /* Set TRACE_EVENT_FL_TRACEPOINT flag when using "tp" */ + struct tracepoint *tp; + }; struct trace_event event; const char *print_fmt; struct event_filter *filter; @@ -271,6 +279,7 @@ struct ftrace_event_call { * bit 3: ftrace internal event (do not enable) * bit 4: Event was enabled by module * bit 5: use call filter rather than file filter + * bit 6: Event is a tracepoint */ int flags; /* static flags of different events */ @@ -283,6 +292,15 @@ struct ftrace_event_call { #endif }; +static inline const char * +ftrace_event_name(struct ftrace_event_call *call) +{ + if (call->flags & TRACE_EVENT_FL_TRACEPOINT) + return call->tp ? call->tp->name : NULL; + else + return call->name; +} + struct trace_array; struct ftrace_subsystem_dir; @@ -353,7 +371,7 @@ struct ftrace_event_file { #define __TRACE_EVENT_FLAGS(name, value) \ static int __init trace_init_flags_##name(void) \ { \ - event_##name.flags = value; \ + event_##name.flags |= value; \ return 0; \ } \ early_initcall(trace_init_flags_##name); diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 812b2553dfd8..08150e265761 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -6,7 +6,7 @@ * * See Documentation/trace/tracepoints.txt. * - * (C) Copyright 2008 Mathieu Desnoyers + * Copyright (C) 2008-2014 Mathieu Desnoyers * * Heavily inspired from the Linux Kernel Markers. * @@ -21,6 +21,7 @@ struct module; struct tracepoint; +struct notifier_block; struct tracepoint_func { void *func; @@ -35,18 +36,13 @@ struct tracepoint { struct tracepoint_func __rcu *funcs; }; -/* - * Connect a probe to a tracepoint. - * Internal API, should not be used directly. - */ -extern int tracepoint_probe_register(const char *name, void *probe, void *data); - -/* - * Disconnect a probe from a tracepoint. - * Internal API, should not be used directly. - */ extern int -tracepoint_probe_unregister(const char *name, void *probe, void *data); +tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data); +extern int +tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data); +extern void +for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv), + void *priv); #ifdef CONFIG_MODULES struct tp_module { @@ -54,12 +50,25 @@ struct tp_module { unsigned int num_tracepoints; struct tracepoint * const *tracepoints_ptrs; }; + bool trace_module_has_bad_taint(struct module *mod); +extern int register_tracepoint_module_notifier(struct notifier_block *nb); +extern int unregister_tracepoint_module_notifier(struct notifier_block *nb); #else static inline bool trace_module_has_bad_taint(struct module *mod) { return false; } +static inline +int register_tracepoint_module_notifier(struct notifier_block *nb) +{ + return 0; +} +static inline +int unregister_tracepoint_module_notifier(struct notifier_block *nb) +{ + return 0; +} #endif /* CONFIG_MODULES */ /* @@ -160,14 +169,14 @@ static inline void tracepoint_synchronize_unregister(void) static inline int \ register_trace_##name(void (*probe)(data_proto), void *data) \ { \ - return tracepoint_probe_register(#name, (void *)probe, \ - data); \ + return tracepoint_probe_register(&__tracepoint_##name, \ + (void *)probe, data); \ } \ static inline int \ unregister_trace_##name(void (*probe)(data_proto), void *data) \ { \ - return tracepoint_probe_unregister(#name, (void *)probe, \ - data); \ + return tracepoint_probe_unregister(&__tracepoint_##name,\ + (void *)probe, data); \ } \ static inline void \ check_trace_callback_type_##name(void (*cb)(data_proto)) \ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 8765126b328c..9c44c11cd9bb 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -470,10 +470,11 @@ static inline notrace int ftrace_get_offsets_##call( \ * }; * * static struct ftrace_event_call event_ = { - * .name = "", + * .tp = &__tracepoint_, * .class = event_class_