aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c42
-rw-r--r--fs/bio-integrity.c86
-rw-r--r--fs/bio.c91
-rw-r--r--fs/btrfs/ctree.c10
-rw-r--r--fs/btrfs/ctree.h9
-rw-r--r--fs/btrfs/disk-io.c6
-rw-r--r--fs/btrfs/extent-tree.c49
-rw-r--r--fs/btrfs/locking.c6
-rw-r--r--fs/btrfs/locking.h2
-rw-r--r--fs/btrfs/volumes.c8
-rw-r--r--fs/buffer.c23
-rw-r--r--fs/cifs/CHANGES11
-rw-r--r--fs/cifs/Kconfig21
-rw-r--r--fs/cifs/README22
-rw-r--r--fs/cifs/cifs_debug.c2
-rw-r--r--fs/cifs/cifs_dfs_ref.c36
-rw-r--r--fs/cifs/cifs_fs_sb.h1
-rw-r--r--fs/cifs/cifsglob.h2
-rw-r--r--fs/cifs/cifspdu.h76
-rw-r--r--fs/cifs/cifsproto.h9
-rw-r--r--fs/cifs/cifssmb.c27
-rw-r--r--fs/cifs/connect.c9
-rw-r--r--fs/cifs/dir.c6
-rw-r--r--fs/cifs/file.c199
-rw-r--r--fs/cifs/inode.c3
-rw-r--r--fs/cifs/smbfsctl.h84
-rw-r--r--fs/compat.c3
-rw-r--r--fs/compat_ioctl.c8
-rw-r--r--fs/devpts/inode.c5
-rw-r--r--fs/dquot.c5
-rw-r--r--fs/ecryptfs/crypto.c51
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h4
-rw-r--r--fs/ecryptfs/inode.c32
-rw-r--r--fs/ecryptfs/keystore.c3
-rw-r--r--fs/ecryptfs/main.c5
-rw-r--r--fs/eventpoll.c12
-rw-r--r--fs/exec.c13
-rw-r--r--fs/ext3/inode.c18
-rw-r--r--fs/ext4/extents.c6
-rw-r--r--fs/ext4/ialloc.c12
-rw-r--r--fs/ext4/mballoc.c13
-rw-r--r--fs/fat/inode.c4
-rw-r--r--fs/fcntl.c33
-rw-r--r--fs/file_table.c3
-rw-r--r--fs/fs-writeback.c9
-rw-r--r--fs/gfs2/Kconfig17
-rw-r--r--fs/gfs2/Makefile4
-rw-r--r--fs/gfs2/acl.c1
-rw-r--r--fs/gfs2/bmap.c1
-rw-r--r--fs/gfs2/dir.c1
-rw-r--r--fs/gfs2/eaops.c1
-rw-r--r--fs/gfs2/eattr.c1
-rw-r--r--fs/gfs2/glock.c268
-rw-r--r--fs/gfs2/glock.h127
-rw-r--r--fs/gfs2/glops.c160
-rw-r--r--fs/gfs2/glops.h1
-rw-r--r--fs/gfs2/incore.h71
-rw-r--r--fs/gfs2/inode.c13
-rw-r--r--fs/gfs2/inode.h22
-rw-r--r--fs/gfs2/lock_dlm.c241
-rw-r--r--fs/gfs2/locking.c232
-rw-r--r--fs/gfs2/locking/dlm/Makefile3
-rw-r--r--fs/gfs2/locking/dlm/lock.c708
-rw-r--r--fs/gfs2/locking/dlm/lock_dlm.h166
-rw-r--r--fs/gfs2/locking/dlm/main.c48
-rw-r--r--fs/gfs2/locking/dlm/mount.c276
-rw-r--r--fs/gfs2/locking/dlm/sysfs.c226
-rw-r--r--fs/gfs2/locking/dlm/thread.c68
-rw-r--r--fs/gfs2/log.c1
-rw-r--r--fs/gfs2/lops.c1
-rw-r--r--fs/gfs2/main.c13
-rw-r--r--fs/gfs2/meta_io.c22
-rw-r--r--fs/gfs2/meta_io.h1
-rw-r--r--fs/gfs2/mount.c128
-rw-r--r--fs/gfs2/mount.h17
-rw-r--r--fs/gfs2/ops_address.c5
-rw-r--r--fs/gfs2/ops_dentry.c1
-rw-r--r--fs/gfs2/ops_export.c1
-rw-r--r--fs/gfs2/ops_file.c76
-rw-r--r--fs/gfs2/ops_fstype.c156
-rw-r--r--fs/gfs2/ops_inode.c1
-rw-r--r--fs/gfs2/ops_super.c44
-rw-r--r--fs/gfs2/quota.c203
-rw-r--r--fs/gfs2/quota.h2
-rw-r--r--fs/gfs2/recovery.c28
-rw-r--r--fs/gfs2/rgrp.c189
-rw-r--r--fs/gfs2/super.c3
-rw-r--r--fs/gfs2/super.h26
-rw-r--r--fs/gfs2/sys.c236
-rw-r--r--fs/gfs2/trans.c19
-rw-r--r--fs/gfs2/util.c11
-rw-r--r--fs/inode.c78
-rw-r--r--fs/ioctl.c18
-rw-r--r--fs/lockd/clntlock.c51
-rw-r--r--fs/minix/inode.c2
-rw-r--r--fs/namei.c8
-rw-r--r--fs/namespace.c11
-rw-r--r--fs/nfs/client.c73
-rw-r--r--fs/nfs/dir.c8
-rw-r--r--fs/nfs/nfs3acl.c27
-rw-r--r--fs/nfs/nfs3xdr.c34
-rw-r--r--fs/nfs/nfs4namespace.c15
-rw-r--r--fs/nfsd/nfs4xdr.c1
-rw-r--r--fs/nfsd/vfs.c5
-rw-r--r--fs/ocfs2/alloc.c3
-rw-r--r--fs/ocfs2/aops.c7
-rw-r--r--fs/ocfs2/namei.c3
-rw-r--r--fs/ocfs2/ocfs2_fs.h6
-rw-r--r--fs/ocfs2/xattr.c30
-rw-r--r--fs/partitions/check.c10
-rw-r--r--fs/partitions/ibm.c101
-rw-r--r--fs/pipe.c24
-rw-r--r--fs/proc/base.c16
-rw-r--r--fs/proc/page.c2
-rw-r--r--fs/ramfs/file-nommu.c4
-rw-r--r--fs/squashfs/block.c18
-rw-r--r--fs/squashfs/cache.c4
-rw-r--r--fs/squashfs/inode.c6
-rw-r--r--fs/squashfs/squashfs.h2
-rw-r--r--fs/squashfs/super.c2
-rw-r--r--fs/super.c16
-rw-r--r--fs/sync.c14
-rw-r--r--fs/sysfs/bin.c253
-rw-r--r--fs/sysfs/dir.c33
-rw-r--r--fs/sysfs/file.c26
-rw-r--r--fs/sysfs/inode.c17
-rw-r--r--fs/sysfs/mount.c6
-rw-r--r--fs/sysfs/sysfs.h3
-rw-r--r--fs/ufs/super.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c12
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c10
-rw-r--r--fs/xfs/xfs_iget.c15
-rw-r--r--fs/xfs/xfs_log_recover.c17
134 files changed, 2872 insertions, 3042 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 8fa77e233944..76da12537956 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -443,7 +443,7 @@ static struct kiocb *__aio_get_req(struct kioctx *ctx)
req->private = NULL;
req->ki_iovec = NULL;
INIT_LIST_HEAD(&req->ki_run_list);
- req->ki_eventfd = ERR_PTR(-EINVAL);
+ req->ki_eventfd = NULL;
/* Check if the completion queue has enough free space to
* accept an event from this io.
@@ -485,8 +485,6 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
{
assert_spin_locked(&ctx->ctx_lock);
- if (!IS_ERR(req->ki_eventfd))
- fput(req->ki_eventfd);
if (req->ki_dtor)
req->ki_dtor(req);
if (req->ki_iovec != &req->ki_inline_vec)
@@ -508,8 +506,11 @@ static void aio_fput_routine(struct work_struct *data)
list_del(&req->ki_list);
spin_unlock_irq(&fput_lock);
- /* Complete the fput */
- __fput(req->ki_filp);
+ /* Complete the fput(s) */
+ if (req->ki_filp != NULL)
+ __fput(req->ki_filp);
+ if (req->ki_eventfd != NULL)
+ __fput(req->ki_eventfd);
/* Link the iocb into the context's free list */
spin_lock_irq(&ctx->ctx_lock);
@@ -527,12 +528,14 @@ static void aio_fput_routine(struct work_struct *data)
*/
static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
{
+ int schedule_putreq = 0;
+
dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
req, atomic_long_read(&req->ki_filp->f_count));
assert_spin_locked(&ctx->ctx_lock);
- req->ki_users --;
+ req->ki_users--;
BUG_ON(req->ki_users < 0);
if (likely(req->ki_users))
return 0;
@@ -540,10 +543,23 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
req->ki_cancel = NULL;
req->ki_retry = NULL;
- /* Must be done under the lock to serialise against cancellation.
- * Call this aio_fput as it duplicates fput via the fput_work.
+ /*
+ * Try to optimize the aio and eventfd file* puts, by avoiding to
+ * schedule work in case it is not __fput() time. In normal cases,
+ * we would not be holding the last reference to the file*, so
+ * this function will be executed w/out any aio kthread wakeup.
*/
- if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) {
+ if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count)))
+ schedule_putreq++;
+ else
+ req->ki_filp = NULL;
+ if (req->ki_eventfd != NULL) {
+ if (unlikely(atomic_long_dec_and_test(&req->ki_eventfd->f_count)))
+ schedule_putreq++;
+ else
+ req->ki_eventfd = NULL;
+ }
+ if (unlikely(schedule_putreq)) {
get_ioctx(ctx);
spin_lock(&fput_lock);
list_add(&req->ki_list, &fput_head);
@@ -571,7 +587,7 @@ int aio_put_req(struct kiocb *req)
static struct kioctx *lookup_ioctx(unsigned long ctx_id)
{
struct mm_struct *mm = current->mm;
- struct kioctx *ctx = NULL;
+ struct kioctx *ctx, *ret = NULL;
struct hlist_node *n;
rcu_read_lock();
@@ -579,12 +595,13 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
if (ctx->user_id == ctx_id && !ctx->dead) {
get_ioctx(ctx);
+ ret = ctx;
break;
}
}
rcu_read_unlock();
- return ctx;
+ return ret;
}
/*
@@ -1009,7 +1026,7 @@ int aio_complete(struct kiocb *iocb, long res, long res2)
* eventfd. The eventfd_signal() function is safe to be called
* from IRQ context.
*/
- if (!IS_ERR(iocb->ki_eventfd))
+ if (iocb->ki_eventfd != NULL)
eventfd_signal(iocb->ki_eventfd, 1);
put_rq:
@@ -1608,6 +1625,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd);
if (IS_ERR(req->ki_eventfd)) {
ret = PTR_ERR(req->ki_eventfd);
+ req->ki_eventfd = NULL;
goto out_put_req;
}
}
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 549b0144da11..31c46a241bac 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -26,23 +26,23 @@
#include <linux/workqueue.h>
static struct kmem_cache *bio_integrity_slab __read_mostly;
+static mempool_t *bio_integrity_pool;
+static struct bio_set *integrity_bio_set;
static struct workqueue_struct *kintegrityd_wq;
/**
- * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio
+ * bio_integrity_alloc - Allocate integrity payload and attach it to bio
* @bio: bio to attach integrity metadata to
* @gfp_mask: Memory allocation mask
* @nr_vecs: Number of integrity metadata scatter-gather elements
- * @bs: bio_set to allocate from
*
* Description: This function prepares a bio for attaching integrity
* metadata. nr_vecs specifies the maximum number of pages containing
* integrity metadata that can be attached.
*/
-struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
- gfp_t gfp_mask,
- unsigned int nr_vecs,
- struct bio_set *bs)
+struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
+ gfp_t gfp_mask,
+ unsigned int nr_vecs)
{
struct bio_integrity_payload *bip;
struct bio_vec *iv;
@@ -50,7 +50,7 @@ struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
BUG_ON(bio == NULL);
- bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
+ bip = mempool_alloc(bio_integrity_pool, gfp_mask);
if (unlikely(bip == NULL)) {
printk(KERN_ERR "%s: could not alloc bip\n", __func__);
return NULL;
@@ -58,10 +58,10 @@ struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
memset(bip, 0, sizeof(*bip));
- iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, bs);
+ iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, integrity_bio_set);
if (unlikely(iv == NULL)) {
printk(KERN_ERR "%s: could not alloc bip_vec\n", __func__);
- mempool_free(bip, bs->bio_integrity_pool);
+ mempool_free(bip, bio_integrity_pool);
return NULL;
}
@@ -72,35 +72,16 @@ struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
return bip;
}
-EXPORT_SYMBOL(bio_integrity_alloc_bioset);
-
-/**
- * bio_integrity_alloc - Allocate integrity payload and attach it to bio
- * @bio: bio to attach integrity metadata to
- * @gfp_mask: Memory allocation mask
- * @nr_vecs: Number of integrity metadata scatter-gather elements
- *
- * Description: This function prepares a bio for attaching integrity
- * metadata. nr_vecs specifies the maximum number of pages containing
- * integrity metadata that can be attached.
- */
-struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
- gfp_t gfp_mask,
- unsigned int nr_vecs)
-{
- return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set);
-}
EXPORT_SYMBOL(bio_integrity_alloc);
/**
* bio_integrity_free - Free bio integrity payload
* @bio: bio containing bip to be freed
- * @bs: bio_set this bio was allocated from
*
* Description: Used to free the integrity portion of a bio. Usually
* called from bio_free().
*/
-void bio_integrity_free(struct bio *bio, struct bio_set *bs)
+void bio_integrity_free(struct bio *bio)
{
struct bio_integrity_payload *bip = bio->bi_integrity;
@@ -111,8 +92,8 @@ void bio_integrity_free(struct bio *bio, struct bio_set *bs)
&& bip->bip_buf != NULL)
kfree(bip->bip_buf);
- bvec_free_bs(bs, bip->bip_vec, bip->bip_pool);
- mempool_free(bip, bs->bio_integrity_pool);
+ bvec_free_bs(integrity_bio_set, bip->bip_vec, bip->bip_pool);
+ mempool_free(bip, bio_integrity_pool);
bio->bi_integrity = NULL;
}
@@ -685,19 +666,18 @@ EXPORT_SYMBOL(bio_integrity_split);
* bio_integrity_clone - Callback for cloning bios with integrity metadata
* @bio: New bio
* @bio_src: Original bio
- * @bs: bio_set to allocate bip from
+ * @gfp_mask: Memory allocation mask
*
* Description: Called to allocate a bip when cloning a bio
*/
-int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
- struct bio_set *bs)
+int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask)
{
struct bio_integrity_payload *bip_src = bio_src->bi_integrity;
struct bio_integrity_payload *bip;
BUG_ON(bip_src == NULL);
- bip = bio_integrity_alloc_bioset(bio, GFP_NOIO, bip_src->bip_vcnt, bs);
+ bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt);
if (bip == NULL)
return -EIO;
@@ -713,37 +693,25 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
}
EXPORT_SYMBOL(bio_integrity_clone);
-int bioset_integrity_create(struct bio_set *bs, int pool_size)
+static int __init bio_integrity_init(void)
{
- bs->bio_integrity_pool = mempool_create_slab_pool(pool_size,
- bio_integrity_slab);
- if (!bs->bio_integrity_pool)
- return -1;
-
- return 0;
-}
-EXPORT_SYMBOL(bioset_integrity_create);
+ kintegrityd_wq = create_workqueue("kintegrityd");
-void bioset_integrity_free(struct bio_set *bs)
-{
- if (bs->bio_integrity_pool)
- mempool_destroy(bs->bio_integrity_pool);
-}
-EXPORT_SYMBOL(bioset_integrity_free);
+ if (!kintegrityd_wq)
+ panic("Failed to create kintegrityd\n");
-void __init bio_integrity_init_slab(void)
-{
bio_integrity_slab = KMEM_CACHE(bio_integrity_payload,
SLAB_HWCACHE_ALIGN|SLAB_PANIC);
-}
-static int __init integrity_init(void)
-{
- kintegrityd_wq = create_workqueue("kintegrityd");
+ bio_integrity_pool = mempool_create_slab_pool(BIO_POOL_SIZE,
+ bio_integrity_slab);
+ if (!bio_integrity_pool)
+ panic("bio_integrity: can't allocate bip pool\n");
- if (!kintegrityd_wq)
- panic("Failed to create kintegrityd\n");
+ integrity_bio_set = bioset_create(BIO_POOL_SIZE, 0);
+ if (!integrity_bio_set)
+ panic("bio_integrity: can't allocate bio_set\n");
return 0;
}
-subsys_initcall(integrity_init);
+subsys_initcall(bio_integrity_init);
diff --git a/fs/bio.c b/fs/bio.c
index 124b95c4d582..a040cde7f6fd 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -248,7 +248,7 @@ void bio_free(struct bio *bio, struct bio_set *bs)
bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
if (bio_integrity(bio))
- bio_integrity_free(bio, bs);
+ bio_integrity_free(bio);
/*
* If we have front padding, adjust the bio pointer before freeing
@@ -301,48 +301,51 @@ void bio_init(struct bio *bio)
**/
struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
{
+ struct bio_vec *bvl = NULL;
struct bio *bio = NULL;
- void *uninitialized_var(p);
+ unsigned long idx = 0;
+ void *p = NULL;
if (bs) {
p = mempool_alloc(bs->bio_pool, gfp_mask);
-
- if (p)
- bio = p + bs->front_pad;
- } else
+ if (!p)
+ goto err;
+ bio = p + bs->front_pad;
+ } else {
bio = kmalloc(sizeof(*bio), gfp_mask);
+ if (!bio)
+ goto err;
+ }
- if (likely(bio)) {
- struct bio_vec *bvl = NULL;
-
- bio_init(bio);
- if (likely(nr_iovecs)) {
- unsigned long uninitialized_var(idx);
-
- if (nr_iovecs <= BIO_INLINE_VECS) {
- idx = 0;
- bvl = bio->bi_inline_vecs;
- nr_iovecs = BIO_INLINE_VECS;
- } else {
- bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx,
- bs);
- nr_iovecs = bvec_nr_vecs(idx);
- }
- if (unlikely(!bvl)) {
- if (bs)
- mempool_free(p, bs->bio_pool);
- else
- kfree(bio);
- bio = NULL;
- goto out;
- }
- bio->bi_flags |= idx << BIO_POOL_OFFSET;
- bio->bi_max_vecs = nr_iovecs;
- }
- bio->bi_io_vec = bvl;
+ bio_init(bio);
+
+ if (unlikely(!nr_iovecs))
+ goto out_set;
+
+ if (nr_iovecs <= BIO_INLINE_VECS) {
+ bvl = bio->bi_inline_vecs;
+ nr_iovecs = BIO_INLINE_VECS;
+ } else {
+ bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
+ if (unlikely(!bvl))
+ goto err_free;
+
+ nr_iovecs = bvec_nr_vecs(idx);
}
-out:
+ bio->bi_flags |= idx << BIO_POOL_OFFSET;
+ bio->bi_max_vecs = nr_iovecs;
+out_set:
+ bio->bi_io_vec = bvl;
+
return bio;
+
+err_free:
+ if (bs)
+ mempool_free(p, bs->bio_pool);
+ else
+ kfree(bio);
+err:
+ return NULL;
}
struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
@@ -463,10 +466,12 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
if (bio_integrity(bio)) {
int ret;
- ret = bio_integrity_clone(b, bio, fs_bio_set);
+ ret = bio_integrity_clone(b, bio, gfp_mask);
- if (ret < 0)
+ if (ret < 0) {
+ bio_put(b);
return NULL;
+ }
}
return b;
@@ -1524,7 +1529,6 @@ void bioset_free(struct bio_set *bs)
if (bs->bio_pool)
mempool_destroy(bs->bio_pool);
- bioset_integrity_free(bs);
biovec_free_pools(bs);
bio_put_slab(bs);
@@ -1565,9 +1569,6 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
if (!bs->bio_pool)
goto bad;
- if (bioset_integrity_create(bs, pool_size))
- goto bad;
-
if (!biovec_create_pools(bs, pool_size))
return bs;
@@ -1584,6 +1585,13 @@ static void __init biovec_init_slabs(void)
int size;
struct biovec_slab *bvs = bvec_slabs + i;
+#ifndef CONFIG_BLK_DEV_INTEGRITY
+ if (bvs->nr_vecs <= BIO_INLINE_VECS) {
+ bvs->slab = NULL;
+ continue;
+ }
+#endif
+
size = bvs->nr_vecs * sizeof(struct bio_vec);
bvs->slab = kmem_cache_create(bvs->name, size, 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
@@ -1598,7 +1606,6 @@ static int __init init_bio(void)
if (!bio_slabs)
panic("bio: can't allocate bios\n");
- bio_integrity_init_slab();
biovec_init_slabs();
fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 42491d728e99..37f31b5529aa 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -277,7 +277,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
if (*cow_ret == buf)
unlock_orig = 1;
- WARN_ON(!btrfs_tree_locked(buf));
+ btrfs_assert_tree_locked(buf);
if (parent)
parent_start = parent->start;
@@ -2365,7 +2365,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (slot >= btrfs_header_nritems(upper) - 1)
return 1;
- WARN_ON(!btrfs_tree_locked(path->nodes[1]));
+ btrfs_assert_tree_locked(path->nodes[1]);
right = read_node_slot(root, upper, slot + 1);
btrfs_tree_lock(right);
@@ -2562,7 +2562,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
if (right_nritems == 0)
return 1;
- WARN_ON(!btrfs_tree_locked(path->nodes[1]));
+ btrfs_assert_tree_locked(path->nodes[1]);
left = read_node_slot(root, path->nodes[1], slot - 1);
btrfs_tree_lock(left);
@@ -4101,7 +4101,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
next = read_node_slot(root, c, slot);
if (!path->skip_locking) {
- WARN_ON(!btrfs_tree_locked(c));
+ btrfs_assert_tree_locked(c);
btrfs_tree_lock(next);
btrfs_set_lock_blocking(next);
}
@@ -4126,7 +4126,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
reada_for_search(root, path, level, slot, 0);
next = read_node_slot(root, next, 0);
if (!path->skip_locking) {
- WARN_ON(!btrfs_tree_locked(path->nodes[level]));
+ btrfs_assert_tree_locked(path->nodes[level]);
btrfs_tree_lock(next);
btrfs_set_lock_blocking(next);
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 82491ba8fa40..5e1d4e30e9d8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -784,7 +784,14 @@ struct btrfs_fs_info {
struct list_head dirty_cowonly_roots;
struct btrfs_fs_devices *fs_devices;
+
+ /*
+ * the space_info list is almost entirely read only. It only changes
+ * when we add a new raid type to the FS, and that happens
+ * very rarely. RCU is used to protect it.
+ */
struct list_head space_info;
+
spinlock_t delalloc_lock;
spinlock_t new_trans_lock;
u64 delalloc_bytes;
@@ -1797,6 +1804,8 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root);
int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
+void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
+
int btrfs_check_metadata_free_space(struct btrfs_root *root);
int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index adda739a0215..6ec80c0fc869 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -857,7 +857,7 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct inode *btree_inode = root->fs_info->btree_inode;
if (btrfs_header_generation(buf) ==
root->fs_info->running_transaction->transid) {
- WARN_ON(!btrfs_tree_locked(buf));
+ btrfs_assert_tree_locked(buf);
/* ugh, clear_extent_buffer_dirty can be expensive */
btrfs_set_lock_blocking(buf);
@@ -2361,7 +2361,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
btrfs_set_lock_blocking(buf);
- WARN_ON(!btrfs_tree_locked(buf));
+ btrfs_assert_tree_locked(buf);
if (transid != root->fs_info->generation) {
printk(KERN_CRIT "btrfs transid mismatch buffer %llu, "
"found %llu running %llu\n",
@@ -2385,7 +2385,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
unsigned long thresh = 32 * 1024 * 1024;
tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
- if (current_is_pdflush() || current->flags & PF_MEMALLOC)
+ if (current->flags & PF_MEMALLOC)
return;
num_dirty = count_range_bits(tree, &start, (u64)-1,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 6b5966aacf44..fefe83ad2059 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -20,6 +20,7 @@
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/sort.h>
+#include <linux/rcupdate.h>
#include "compat.h"
#include "hash.h"
#include "crc32c.h"
@@ -330,13 +331,33 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
{
struct list_head *head = &info->space_info;
struct btrfs_space_info *found;
- list_for_each_entry(found, head, list) {
- if (found->flags == flags)
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(found, head, list) {
+ if (found->flags == flags) {
+ rcu_read_unlock();
return found;
+ }
}
+ rcu_read_unlock();
return NULL;
}
+/*
+ * after adding space to the filesystem, we need to clear the full flags
+ * on all the space infos.
+ */
+void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
+{
+ struct list_head *head = &info->space_info;
+ struct btrfs_space_info *found;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(found, head, list)
+ found->full = 0;
+ rcu_read_unlock();
+}
+
static u64 div_factor(u64 num, int factor)
{
if (factor == 10)
@@ -1903,7 +1924,6 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
if (!found)
return -ENOMEM;
- list_add(&found->list, &info->space_info);
INIT_LIST_HEAD(&found->block_groups);
init_rwsem(&found->groups_sem);
spin_lock_init(&found->lock);
@@ -1917,6 +1937,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->full = 0;
found->force_alloc = 0;
*space_info = found;
+ list_add_rcu(&found->list, &info->space_info);
return 0;
}
@@ -4418,13 +4439,13 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
BUG_ON(!path);
- BUG_ON(!btrfs_tree_locked(parent));
+ btrfs_assert_tree_locked(parent);
parent_level = btrfs_header_level(parent);
extent_buffer_get(parent);
path->nodes[parent_level] = parent;
path->slots[parent_level] = btrfs_header_nritems(parent);
- BUG_ON(!btrfs_tree_locked(node));
+ btrfs_assert_tree_locked(node);
level = btrfs_header_level(node);
extent_buffer_get(node);
path->nodes[level] = node;
@@ -6320,6 +6341,7 @@ out:
int btrfs_free_block_groups(struct btrfs_fs_info *info)
{
struct btrfs_block_group_cache *block_group;
+ struct btrfs_space_info *space_info;
struct rb_node *n;
spin_lock(&info->block_group_cache_lock);
@@ -6341,6 +6363,23 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
spin_lock(&info->block_group_cache_lock);
}
spin_unlock(&info->block_group_cache_lock);
+
+ /* now that all the block groups are freed, go through and
+ * free all the space_info structs. This is only called during
+ * the final stages of unmount, and so we know nobody is
+ * using them. We call synchronize_rcu() once before we start,
+ * just to be on the safe side.
+ */
+ synchronize_rcu();
+
+ while(!list_empty(&info->space_info)) {
+ space_info = list_entry(info->space_info.next,
+ struct btrfs_space_info,
+ list);
+
+ list_del(&space_info->list);
+ kfree(space_info);
+ }
return 0;
}
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 85506c4a3af7..47b0a88c12a2 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -220,8 +220,8 @@ int btrfs_tree_unlock(struct extent_buffer *eb)
return 0;
}
-int btrfs_tree_locked(struct extent_buffer *eb)
+void btrfs_assert_tree_locked(struct extent_buffer *eb)
{
- return test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags) ||
- spin_is_locked(&eb->lock);
+ if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags))
+ assert_spin_locked(&eb->lock);
}
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index 6bb0afbff928..6c4ce457168c 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -21,11 +21,11 @@
int btrfs_tree_lock(struct extent_buffer *eb);
int btrfs_tree_unlock(struct extent_buffer *eb);
-int btrfs_tree_locked(struct extent_buffer *eb);
int btrfs_try_tree_lock(struct extent_buffer *eb);
int btrfs_try_spin_lock(struct extent_buffer *eb);
void btrfs_set_lock_blocking(struct extent_buffer *eb);
void btrfs_clear_lock_blocking(struct extent_buffer *eb);
+void btrfs_assert_tree_locked(struct extent_buffer *eb);
#endif
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1316139bf9e8..dd06e18e5aac 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1374,6 +1374,12 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
ret = btrfs_add_device(trans, root, device);
}
+ /*
+ * we've got more storage, clear any full flags on the space
+ * infos
+ */
+ btrfs_clear_space_info_full(root->fs_info);
+
unlock_chunks(root);
btrfs_commit_transaction(trans, root);
@@ -1459,6 +1465,8 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
device->fs_devices->total_rw_bytes += diff;
device->total_bytes = new_size;
+ btrfs_clear_space_info_full(device->dev_root->fs_info);
+
return btrfs_update_device(trans, device);
}
diff --git a/fs/buffer.c b/fs/buffer.c
index 9f697419ed8e..891e1c78e4f1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -760,15 +760,9 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
* If warn is true, then emit a warning if the page is not uptodate and has
* not been truncated.
*/
-static int __set_page_dirty(struct page *page,
+static void __set_page_dirty(struct page *page,
struct address_space *mapping, int warn)
{
- if (unlikely(!mapping))
- return !TestSetPageDirty(page);
-
- if (TestSetPageDirty(page))
- return 0;
-
spin_lock_irq(&mapping->tree_lock);
if (page->mapping) { /* Race with truncate? */
WARN_ON_ONCE(warn && !PageUptodate(page));
@@ -785,8 +779,6 @@ static int __set_page_dirty(struct page *page,
}
spin_unlock_irq(&mapping->tree_lock);
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-
- return 1;
}
/*
@@ -816,6 +808,7 @@ static int __set_page_dirty(struct page *page,
*/
int __set_page_dirty_buffers(struct page *page)
{
+ int newly_dirty;
struct address_space *mapping = page_mapping(page);
if (unlikely(!mapping))
@@ -831,9 +824,12 @@ int __set_page_dirty_buffers(struct page *page)
bh = bh->b_this_page;
} while (bh != head);
}
+ newly_dirty = !TestSetPageDirty(page);
spin_unlock(&mapping->private_lock);
- return __set_page_dirty(page, mapping, 1);
+ if (newly_dirty)
+ __set_page_dirty(page, mapping, 1);
+ return newly_dirty;
}
EXPORT_SYMBOL(__set_page_dirty_buffers);
@@ -1262,8 +1258,11 @@ void mark_buffer_dirty(struct buffer_head *bh)
return;
}
- if (!test_set_buffer_dirty(bh))
- __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0);
+ if (!test_set_buffer_dirty(bh)) {
+ struct page *page = bh->b_page;
+ if (!TestSetPageDirty(page))
+ __set_page_dirty(page, page_mapping(page), 0);
+ }
}
/*
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 851388fafc73..65984006192c 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -6,7 +6,16 @@ the server to treat subsequent connections, especially those that
are authenticated as guest, as reconnections, invalidating the earlier
user's smb session. This fix allows cifs to mount multiple times to the
same server with different userids without risking invalidating earlier
-established security contexts.
+established security contexts. fsync now sends SMB Flush operation
+to better ensure that we wait for server to write all of the data to
+server disk (not just write it over the network). Add new mount
+parameter to allow user to disable sending the (slow) SMB flush on
+fsync if desired (fsync still flushes all cached write data to the server).
+Posix file open support added (turned off after one attempt if server
+fails to support it properly, as with Samba server versions prior to 3.3.2)
+Fix "redzone overwritten" bug in cifs_put_tcon (CIFSTcon may allocate too
+little memory for the "nativeFileSystem" field returned by the server
+during mount).
Version 1.56
------------
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 341a98965bd0..6994a0f54f02 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -118,6 +118,18 @@ config CIFS_DEBUG2
option can be turned off unless you are debugging
cifs problems. If unsure, say N.
+config CIFS_DFS_UPCALL
+ bool "DFS feature support"
+ depends on CIFS && KEYS
+ help
+ Distributed File System (DFS) support is used to access shares
+ transparently in an enterprise name space, even if the share
+ moves to a different server. This feature also enables
+ an upcall mechanism for CIFS which contacts userspace helper
+ utilities to provide server name resolution (host names to
+ IP addresses) which is needed for implicit mounts of DFS junction
+ points. If unsure, say N.
+
config CIFS_EXPERIMENTAL
bool "CIFS Experimental Features (EXPERIMENTAL)"
depends on CIFS && EXPERIMENTAL
@@ -131,12 +143,3 @@ config CIFS_EXPERIMENTAL
(which is disabled by default). See the file fs/cifs/README
for more details. If unsure, say N.
-config CIFS_DFS_UPCALL
- bool "DFS feature support (EXPERIMENTAL)"
- depends on CIFS_EXPERIMENTAL
- depends on KEYS
- help
- Enables an upcall mechanism for CIFS which contacts userspace
- helper utilities to provide server name resolution (host names to
- IP addresses) which is needed for implicit mounts of DFS junction
- points. If unsure, say N.
diff --git a/fs/cifs/README b/fs/cifs/README
index da4515e3be20..07434181623b 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -472,6 +472,19 @@ A partial list of the supported mount options follows:
even if the cifs server would support posix advisory locks.
"forcemand" is accepted as a shorter form of this mount
option.
+ nostrictsync If this mount option is set, when an application does an
+ fsync call then the cifs client does not send an SMB Flush
+ to the server (to force the server to write all dirty data
+ for this file immediately to disk), although cifs still sends
+ all dirty (cached) file data to the server and waits for the
+ server to respond to the write. Since SMB Flush can be
+ very slow, and some servers may be reliable enough (to risk
+ delaying slightly flushing the data to disk on the server),
+ turning on this option may be useful to improve performance for
+ applications that fsync too much, at a small risk of server
+ crash. If this mount option is not set, by default cifs will
+ send an SMB flush request (and wait for a response) on every
+ fsync call.
nodfs Disable DFS (global name space support) even if the
server claims to support it. This can help work around
a problem with parsing of DFS paths with Samba server
@@ -692,13 +705,14 @@ require this helper. Note that NTLMv2 security (which does not require the
cifs.upcall helper program), instead of using Kerberos, is sufficient for
some use cases.
-Enabling DFS support (used to access shares transparently in an MS-DFS
-global name space) requires that CONFIG_CIFS_EXPERIMENTAL be enabled. In
-addition, DFS support for target shares which are specified as UNC
+DFS support allows transparent redirection to shares in an MS-DFS name space.
+In addition, DFS support for target shares which are specified as UNC
names which begin with host names (rather than IP addresses) requires
a user space helper (such as cifs.upcall) to be present in order to
translate host names to ip address, and the user space helper must also
-be configured in the file /etc/request-key.conf
+be configured in the file /etc/request-key.conf. Samba, Windows servers and
+many NAS appliances support DFS as a way of constructing a global name
+space to ease network configuration and improve reliability.
To use cifs Kerberos and DFS support, the Linux keyutils package should be
installed and something like the following lines should be added to the
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 490e34bbf27a..877e4d9a1159 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -340,6 +340,8 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v)
seq_printf(m, "\nWrites: %d Bytes: %lld",
atomic_read(&tcon->num_writes),
(long long)(tcon->bytes_written));
+ seq_printf(m, "\nFlushes: %d",
+ atomic_read(&tcon->num_flushes));
seq_printf(m, "\nLocks: %d HardLinks: %d "
"Symlinks: %d",
atomic_read(&tcon->num_locks),
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 85c0a74d034d..5fdbf8a14472 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -104,9 +104,9 @@ static char *cifs_get_share_name(const char *node_name)
/**
- * compose_mount_options - creates mount options for refferral
+ * cifs_compose_mount_options - creates mount options for refferral
* @sb_mountdata: parent/root DFS mount options (template)
- * @dentry: point where we are going to mount
+ * @fullpath: full path in UNC format
* @ref: server's referral
* @devname: pointer for saving device name
*
@@ -116,8 +116,8 @@ static char *cifs_get_share_name(const char *node_name)
* Returns: pointer to new mount options or ERR_PTR.
* Caller is responcible for freeing retunrned value if it is not error.
*/
-static char *compose_mount_options(const char *sb_mountdata,
- struct dentry *dentry,
+char *cifs_compose_mount_options(const char *sb_mountdata,
+ const char *fullpath,
const struct dfs_info3_param *ref,
char **devname)
{
@@ -128,7 +128,6 @@ static char *compose_mount_options(const char *sb_mountdata,
char *srvIP = NULL;
char sep = ',';
int off, noff;
- char *fullpath;
if (sb_mountdata == NULL)
return ERR_PTR(-EINVAL);
@@ -202,17 +201,6 @@ static char *compose_mount_options(const char *sb_mountdata,
goto compose_mount_options_err;
}
- /*
- * this function gives us a path with a double backslash prefix. We
- * require a single backslash for DFS. Temporarily increment fullpath
- * to put it in the proper form and decrement before freeing it.
- */
- fullpath = build_path_from_dentry(dentry);
- if (!fullpath) {
- rc = -ENOMEM;
- goto compose_mount_options_err;
- }
- ++fullpath;
tkn_e = strchr(tkn_e + 1, '\\');
if (tkn_e || (strlen(fullpath) - ref->path_consumed)) {
strncat(mountdata, &sep, 1);
@@ -221,8 +209,6 @@ static char *compose_mount_options(const char *sb_mountdata,
strcat(mountdata, tkn_e + 1);
strcat(mountdata, fullpath + ref->path_consumed);
}
- --fullpath;
- kfree(fullpath);
/*cFYI(1,("%s: parent mountdata: %s", __func__,sb_mountdata));*/
/*cFYI(1, ("%s: submount mountdata: %s", __func__, mountdata ));*/
@@ -245,10 +231,20 @@ static struct vfsmount *cifs_dfs_do_refmount(const struct vfsmount *mnt_parent,
struct vfsmount *mnt;
char *mountdata;
char *devname = NULL;
+ char *fullpath;
cifs_sb = CIFS_SB(dentry->d_inode->i_sb);
- mountdata = compose_mount_options(cifs_sb->mountdata,
- dentry, ref, &devname);
+ /*
+ * this function gives us a path with a double backslash prefix. We
+ * require a single backslash for DFS.
+ */
+ fullpath = build_path_from_dentry(dentry);
+ if (!fullpath)
+ return ERR_PTR(-ENOMEM);
+
+ mountdata = cifs_compose_mount_options(cifs_sb->mountdata,
+ fullpath + 1, ref, &devname);
+ kfree(fullpath);
if (IS_ERR(mountdata))
return (struct vfsmount *)mountdata;
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index c4c306f7b06f..4797787c6a44 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -32,6 +32,7 @@
#define CIFS_MOUNT_OVERR_GID 0x800 /* override gid returned from server */
#define CIFS_MOUNT_DYNPERM 0x1000 /* allow in-memory only mode setting */
#define CIFS_MOUNT_NOPOSIXBRL 0x2000 /* mandatory not posix byte range lock */
+#define CIFS_MOUNT_NOSSYNC 0x4000 /* don't do slow SMBflush on every sync*/
struct cifs_sb_info {
struct cifsTconInfo *tcon; /* primary mount */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index e004f6db5fc8..9fbf4dff5da6 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -254,6 +254,7 @@ struct cifsTconInfo {
atomic_t num_smbs_sent;
atomic_t num_writes;
atomic_t num_reads;
+ atomic_t num_flushes;
atomic_t num_oplock_brks;
atomic_t num_opens;
atomic_t num_closes;
@@ -298,6 +299,7 @@ struct cifsTconInfo {
bool unix_ext:1; /* if false disable Linux extensions to CIFS protocol
for this mount even if server would support */
bool local_lease:1; /* check leases (only) on local system not remote */
+ bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */
bool need_reconnect:1; /* connection reset, tid now invalid */
/* BB add field for back pointer to sb struct(s)? */
};
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b4e2e9f0ee3d..b370489c8da5 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -1,7 +1,7 @@
/*
* fs/cifs/cifspdu.h
*
- * Copyright (c) International Business Machines Corp., 2002,2008
+ * Copyright (c) International Business Machines Corp., 2002,2009
* Author(s): Steve French (sfrench@us.ibm.com)
*
* This library is free software; you can redistribute it and/or modify
@@ -23,6 +23,7 @@
#define _CIFSPDU_H
#include <net/sock.h>
+#include "smbfsctl.h"
#ifdef CONFIG_CIFS_WEAK_PW_HASH
#define LANMAN_PROT 0
@@ -34,15 +35,15 @@
#define POSIX_PROT (CIFS_PROT+1)
#define BAD_PROT 0xFFFF
-/* SMB command codes */
-/*
- * Some commands have minimal (wct=0,bcc=0), or uninteresting, responses
+/* SMB command codes:
+ * Note some commands have minimal (wct=0,bcc=0), or uninteresting, responses
* (ie which include no useful data other than the SMB error code itself).
- * Knowing this helps avoid response buffer allocations and copy in some cases
+ * This can allow us to avoid response buffer allocations and copy in some cases
*/
#define SMB_COM_CREATE_DIRECTORY 0x00 /* trivial response */
#define SMB_COM_DELETE_DIRECTORY 0x01 /* trivial response */
#define SMB_COM_CLOSE 0x04 /* triv req/rsp, timestamp ignored */
+#define SMB_COM_FLUSH 0x05 /* triv req/rsp */
#define SMB_COM_DELETE 0x06 /* trivial response */
#define SMB_COM_RENAME 0x07 /* trivial response */
#define SMB_COM_QUERY_INFORMATION 0x08 /* aka getattr */
@@ -790,6 +791,12 @@ typedef struct smb_com_close_rsp {
__u16 ByteCount; /* bct = 0 */
} __attribute__((packed)) CLOSE_RSP;
+typedef struct smb_com_flush_req {
+ struct smb_hdr hdr; /* wct = 1 */
+ __u16 FileID;
+ __u16 ByteCount; /* 0 */
+} __attribute__((packed)) FLUSH_REQ;
+
typedef struct smb_com_findclose_req {
struct smb_hdr hdr; /* wct = 1 */
__u16 FileID;
@@ -1924,19 +1931,19 @@ typedef struct smb_com_transaction2_get_dfs_refer_req {
#define DFS_TYPE_ROOT 0x0001
/* Referral Entry Flags */
-#define DFS_NAME_LIST_REF 0x0200
+#define DFS_NAME_LIST_REF 0x0200 /* set for domain or DC referral responses */
+#define DFS_TARGET_SET_BOUNDARY 0x0400 /* only valid with version 4 dfs req */
-typedef struct dfs_referral_level_3 {
- __le16 VersionNumber;
+typedef struct dfs_referral_level_3 { /* version 4 is same, + one flag bit */
+ __le16 VersionNumber; /* must be 3 or 4 */
__le16 Size;
__le16 ServerType; /* 0x0001 = root targets; 0x0000 = link targets */
- __le16 ReferralEntryFlags; /* 0x0200 bit set only for domain
- or DC referral responce */
+ __le16 ReferralEntryFlags;
__le32 TimeToLive;
__le16 DfsPathOffset;
__le16 DfsAlternatePathOffset;
__le16 NetworkAddressOffset; /* offset of the link target */
- __le16 ServiceSiteGuid;
+ __u8 ServiceSiteGuid[16]; /* MBZ, ignored */
} __attribute__((packed)) REFERRAL3;
typedef struct smb_com_transaction_get_dfs_refer_rsp {
@@ -1946,48 +1953,15 @@ typedef struct smb_com_transaction_get_dfs_refer_rsp {
__u8 Pad;
__le16 PathConsumed;
__le16 NumberOfReferrals;
- __le16 DFSFlags;
- __u16 Pad2;
+ __le32 DFSFlags;
REFERRAL3 referrals[1]; /* array of level 3 dfs_referral structures */
/* followed by the strings pointed to by the referral structures */
} __attribute__((packed)) TRANSACTION2_GET_DFS_REFER_RSP;
/* DFS Flags */
-#define DFSREF_REFERRAL_SERVER 0x0001
-#define DFSREF_STORAGE_SERVER 0x0002
-
-/* IOCTL information */
-/*
- * List of ioctl function codes that look to be of interest to remote clients
- * like this one. Need to do some experimentation to make sure they all work
- * remotely. Some of the following, such as the encryption/compression ones
- * would be invoked from tools via a specialized hook into the VFS rather
- * than via the standard vfs entry points
- */
-#define FSCTL_REQUEST_OPLOCK_LEVEL_1 0x00090000
-#define FSCTL_REQUEST_OPLOCK_LEVEL_2 0x00090004
-#define FSCTL_REQUEST_BATCH_OPLOCK 0x00090008
-#define FSCTL_LOCK_VOLUME 0x00090018
-#define FSCTL_UNLOCK_VOLUME 0x0009001C
-#define FSCTL_GET_COMPRESSION 0x0009003C
-#define FSCTL_SET_COMPRESSION 0x0009C040
-#define FSCTL_REQUEST_FILTER_OPLOCK 0x0009008C
-#define FSCTL_FILESYS_GET_STATISTICS 0x00090090
-#define FSCTL_SET_REPARSE_POINT 0x000900A4
-#define FSCTL_GET_REPARSE_POINT 0x000900A8
-#define FSCTL_DELETE_REPARSE_POINT 0x000900AC
-#define FSCTL_SET_SPARSE 0x000900C4
-#define FSCTL_SET_ZERO_DATA 0x000900C8
-#define FSCTL_SET_ENCRYPTION 0x000900D7
-#define FSCTL_ENCRYPTION_FSCTL_IO 0x000900DB
-#define FSCTL_WRITE_RAW_ENCRYPTED 0x000900DF
-#define FSCTL_READ_RAW_ENCRYPTED 0x000900E3
-#define FSCTL_SIS_COPYFILE 0x00090100
-#define FSCTL_SIS_LINK_FILES 0x0009C104
-
-#define IO_REPARSE_TAG_MOUNT_POINT 0xA0000003
-#define IO_REPARSE_TAG_HSM 0xC0000004
-#define IO_REPARSE_TAG_SIS 0x80000007
+#define DFSREF_REFERRAL_SERVER 0x00000001 /* all targets are DFS roots */
+#define DFSREF_STORAGE_SERVER 0x00000002 /* no further ref requests needed */
+#define DFSREF_TARGET_FAILBACK 0x00000004 /* only for DFS referral version 4 */
/*
************************************************************************
@@ -2508,8 +2482,6 @@ struct data_blob {
6) Use nanosecond timestamps throughout all time fields if
corresponding attribute flag is set
7) sendfile - handle based copy
- 8) Direct i/o
- 9) Misc fcntls?
what about fixing 64 bit alignment
@@ -2628,7 +2600,5 @@ typedef struct file_chattr_info {
__le64 mode; /* list of actual attribute bits on this inode */
} __attribute__((packed)) FILE_CHATTR_INFO; /* ext attributes
(chattr, chflags) level 0x206 */
-
-#endif
-
+#endif /* POSIX */
#endif /* _CIFSPDU_H */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 083dfc57c7a3..4167716d32f2 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -44,6 +44,9 @@ extern void _FreeXid(unsigned int);
extern char *build_path_from_dentry(struct dentry *);
extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb);
extern char *build_wildcard_path_from_dentry(struct dentry *direntry);
+extern char *cifs_compose_mount_options(const char *sb_mountdata,
+ const char *fullpath, const struct dfs_info3_param *ref,
+ char **devname);
/* extern void renew_parental_timestamps(struct dentry *direntry);*/
extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *,
struct smb_hdr * /* input */ ,
@@ -92,6 +95,9 @@ extern u64 cifs_UnixTimeToNT(struct timespec);
extern __le64 cnvrtDosCifsTm(__u16 date, __u16 time);
extern struct timespec cnvrtDosUnixTm(__u16 date, __u16 time);
+extern int cifs_posix_open(char *full_path, struct inode **pinode,
+ struct super_block *sb, int mode, int oflags,
+ int *poplock, __u16 *pnetfid, int xid);
extern void posix_fill_in_inode(struct inode *tmp_inode,
FILE_UNIX_BASIC_INFO *pData, int isNewInode);
extern struct inode *cifs_new_inode(struct super_block *sb, __u64 *inum);
@@ -281,6 +287,9 @@ extern int CIFSPOSIXCreate(const int xid, struct cifsTconInfo *tcon,
extern int CIFSSMBClose(const int xid, struct cifsTconInfo *tcon,
const int smb_file_id);
+extern int CIFSSMBFlush(const int xid, struct cifsTconInfo *tcon,
+ const int smb_file_id);
+
extern int CIFSSMBRead(const int xid, struct cifsTconInfo *tcon,
const int netfid, unsigned int count,
const __u64 lseek, unsigned int *nbytes, char **buf,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 939e2f76b959..bc09c998631f 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1934,6 +1934,27 @@ CIFSSMBClose(const int xid, struct cifsTconInfo *tcon, int smb_file_id)
}
int
+CIFSSMBFlush(const int xid, struct cifsTconInfo *tcon, int smb_file_id)
+{
+ int rc = 0;
+ FLUSH_REQ *pSMB = NULL;
+ cFYI(1, ("In CIFSSMBFlush"));
+
+ rc = small_smb_init(SMB_COM_FLUSH, 1, tcon, (void **) &pSMB);
+ if (rc)
+ return rc;
+
+ pSMB->FileID = (__u16) smb_file_id;
+ pSMB->ByteCount = 0;
+ rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
+ cifs_stats_inc(&tcon->num_flushes);
+ if (rc)
+ cERROR(1, ("Send error in Flush = %d", rc));
+
+ return rc;
+}
+
+int
CIFSSMBRename(const int xid, struct cifsTconInfo *tcon,
const char *fromName, const char *toName,
const struct nls_table *nls_codepage, int remap)
@@ -2356,8 +2377,10 @@ winCreateHardLinkRetry:
PATH_MAX, nls_codepage, remap);
name_len++; /* trailing null */
name_len *= 2;
- pSMB->OldFileName[name_len] = 0; /* pad */
- pSMB->OldFileName[name_len + 1] = 0x04;
+
+ /* protocol specifies ASCII buffer format (0x04) for unicode */
+ pSMB->OldFileName[name_len] = 0x04;
+ pSMB->OldFileName[name_len + 1] = 0x00; /* pad */
name_len2 =
cifsConvertToUCS((__le16 *)&pSMB->OldFileName[name_len + 2],
toName, PATH_MAX, nls_codepage, remap);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index da0f4ffa0613..0de3b5615a22 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -95,6 +95,7 @@ struct smb_vol {
bool local_lease:1; /* check leases only on local system, not remote */
bool noblocksnd:1;
bool noautotune:1;
+ bool nostrictsync:1; /* do not force expensive SMBflush on every sync */
unsigned int rsize;
unsigned int wsize;
unsigned int sockopt;
@@ -1274,6 +1275,10 @@ cifs_parse_mount_options(char *options, const char *devname,
vol->intr = 0;
} else if (strnicmp(data, "intr", 4) == 0) {
vol->intr = 1;
+ } else if (strnicmp(data, "nostrictsync", 12) == 0) {
+ vol->nostrictsync = 1;
+ } else if (strnicmp(data, "strictsync", 10) == 0) {
+ vol->nostrictsync = 0;
} else if (strnicmp(data, "serverino", 7) == 0) {
vol->server_ino = 1;
} else if (strnicmp(data, "noserverino", 9) == 0) {
@@ -2160,6 +2165,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL;
if (pvolume_info->nobrl)
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL;
+ if (pvolume_info->nostrictsync)
+ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOSSYNC;
if (pvolume_info->mand_lock)
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOPOSIXBRL;
if (pvolume_info->cifs_acl)
@@ -3667,7 +3674,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
BCC(smb_buffer_response)) {
kfree(tcon->nativeFileSystem);
tcon->nativeFileSystem =
- kzalloc(length + 2, GFP_KERNEL);
+ kzalloc(2*(length + 1), GFP_KERNEL);
if (tcon->nativeFileSystem)
cifs_strfromUCS_le(
tcon->nativeFileSystem,
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 89fb72832652..f9b6f68be976 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -129,7 +129,7 @@ cifs_bp_rename_retry:
return full_path;
}
-static int cifs_posix_open(char *full_path, struct inode **pinode,
+int cifs_posix_open(char *full_path, struct inode **pinode,
struct super_block *sb, int mode, int oflags,
int *poplock, __u16 *pnetfid, int xid)
{
@@ -187,7 +187,9 @@ static int cifs_posix_open(char *full_path, struct inode **pinode,
if (!pinode)
goto posix_open_ret; /* caller does not need info */
- *pinode = cifs_new_inode(sb, &presp_data->UniqueId);
+ if (*pinode == NULL)
+ *pinode = cifs_new_inode(sb, &presp_data->UniqueId);
+ /* else an inode was passed in. Update its info, don't create one */
/* We do not need to close the file if new_inode fails since
the caller will retry qpathinfo as long as inode is null */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 12bb656fbe75..81747acca4c4 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -78,8 +78,36 @@ static inline int cifs_convert_flags(unsigned int flags)
return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
FILE_READ_DATA);
+}
+static inline fmode_t cifs_posix_convert_flags(unsigned int flags)
+{
+ fmode_t posix_flags = 0;
+ if ((flags & O_ACCMODE) == O_RDONLY)
+ posix_flags = FMODE_READ;
+ else if ((flags & O_ACCMODE) == O_WRONLY)
+ posix_flags = FMODE_WRITE;
+ else if ((flags & O_ACCMODE) == O_RDWR) {
+ /* GENERIC_ALL is too much permission to request
+ can cause unnecessary access denied on create */
+ /* return GENERIC_ALL; */
+ posix_flags = FMODE_READ | FMODE_WRITE;
+ }
+ /* can not map O_CREAT or O_EXCL or O_TRUNC flags when
+ reopening a file. They had their effect on the original open */
+ if (flags & O_APPEND)
+ posix_flags |= (fmode_t)O_APPEND;
+ if (flags & O_SYNC)
+ posix_flags |= (fmode_t)O_SYNC;
+ if (flags & O_DIRECTORY)
+ posix_flags |= (fmode_t)O_DIRECTORY;
+ if (flags & O_NOFOLLOW)
+ posix_flags |= (fmode_t)O_NOFOLLOW;
+ if (flags & O_DIRECT)
+ posix_flags |= (fmode_t)O_DIRECT;
+
+ return posix_flags;
}
static inline int cifs_get_disposition(unsigned int flags)
@@ -97,6 +125,80 @@ static inline int cifs_get_disposition(unsigned int flags)
}
/* all arguments to this function must be checked for validity in caller */
+static inline int cifs_posix_open_inode_helper(struct inode *inode,
+ struct file *file, struct cifsInodeInfo *pCifsInode,
+ struct cifsFileInfo *pCifsFile, int oplock, u16 netfid)
+{
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+/* struct timespec temp; */ /* BB REMOVEME BB */
+
+ file->private_data = kmalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
+ if (file->private_data == NULL)
+ return -ENOMEM;
+ pCifsFile = cifs_init_private(file->private_data, inode, file, netfid);
+ write_lock(&GlobalSMBSeslock);
+ list_add(&pCifsFile->tlist, &cifs_sb->tcon->openFileList);
+
+ pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
+ if (pCifsInode == NULL) {
+ write_unlock(&GlobalSMBSeslock);
+ return -EINVAL;
+ }
+
+ /* want handles we can use to read with first
+ in the list so we do not have to walk the
+ list to search for one in write_begin */
+ if ((file->f_flags & O_ACCMODE) == O_WRONLY) {
+ list_add_tail(&pCifsFile->flist,
+ &pCifsInode->openFileList);
+ } else {
+ list_add(&pCifsFile->flist,
+ &pCifsInode->openFileList);
+ }
+
+ if (pCifsInode->clientCanCacheRead) {
+ /* we have the inode open somewhere else
+ no need to discard cache data */
+ goto psx_client_can_cache;
+ }
+
+ /* BB FIXME need to fix this check to move it earlier into posix_open
+ BB fIX following section BB FIXME */
+
+ /* if not oplocked, invalidate inode pages if mtime or file
+ size changed */
+/* temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime));
+ if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
+ (file->f_path.dentry->d_inode->i_size ==
+ (loff_t)le64_to_cpu(buf->EndOfFile))) {
+ cFYI(1, ("inode unchanged on server"));
+ } else {
+ if (file->f_path.dentry->d_inode->i_mapping) {
+ rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
+ if (rc != 0)
+ CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
+ }
+ cFYI(1, ("invalidating remote inode since open detected it "
+ "changed"));
+ invalidate_remote_inode(file->f_path.dentry->d_inode);
+ } */
+
+psx_client_can_cache:
+ if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
+ pCifsInode->clientCanCacheAll = true;
+ pCifsInode->clientCanCacheRead = true;
+ cFYI(1, ("Exclusive Oplock granted on inode %p",
+ file->f_path.dentry->d_inode));
+ } else if ((oplock & 0xF) == OPLOCK_READ)
+ pCifsInode->clientCanCacheRead = true;
+
+ /* will have to change the unlock if we reenable the
+ filemap_fdatawrite (which does not seem necessary */
+ write_unlock(&GlobalSMBSeslock);
+ return 0;
+}
+
+/* all arguments to this function must be checked for validity in caller */
static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf,
@@ -167,7 +269,7 @@ int cifs_open(struct inode *inode, struct file *file)
int rc = -EACCES;
int xid, oplock;
struct cifs_sb_info *cifs_sb;
- struct cifsTconInfo *pTcon;
+ struct cifsTconInfo *tcon;
struct cifsFileInfo *pCifsFile;
struct cifsInodeInfo *pCifsInode;
struct list_head *tmp;
@@ -180,7 +282,7 @@ int cifs_open(struct inode *inode, struct file *file)
xid = GetXid();
cifs_sb = CIFS_SB(inode->i_sb);
- pTcon = cifs_sb->tcon;
+ tcon = cifs_sb->tcon;
if (file->f_flags & O_CREAT) {
/* search inode for this file and fill in file->private_data */
@@ -220,6 +322,45 @@ int cifs_open(struct inode *inode, struct file *file)
cFYI(1, ("inode = 0x%p file flags are 0x%x for %s",
inode, file->f_flags, full_path));
+
+ if (oplockEnabled)
+ oplock = REQ_OPLOCK;
+ else
+ oplock = 0;
+
+ if (!tcon->broken_posix_open && tcon->unix_ext &&
+ (tcon->ses->capabilities & CAP_UNIX) &&
+ (CIFS_UNIX_POSIX_PATH_OPS_CAP &
+ le64_to_cpu(tcon->fsUnixInfo.Capability))) {
+ int oflags = (int) cifs_posix_convert_flags(file->f_flags);
+ /* can not refresh inode info since size could be stale */
+ rc = cifs_posix_open(full_path, &inode, inode->i_sb,
+ cifs_sb->mnt_file_mode /* ignored */,
+ oflags, &oplock, &netfid, xid);
+ if (rc == 0) {
+ cFYI(1, ("posix open succeeded"));
+ /* no need for special case handling of setting mode
+ on read only files needed here */
+
+ cifs_posix_open_inode_helper(inode, file, pCifsInode,
+ pCifsFile, oplock, netfid);
+ goto out;
+ } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
+ if (tcon->ses->serverNOS)
+ cERROR(1, ("server %s of type %s returned"
+ " unexpected error on SMB posix open"
+ ", disabling posix open support."
+ " Check if server update available.",
+ tcon->ses->serverName,
+ tcon->ses->serverNOS));
+ tcon->broken_posix_open = true;
+ } else if ((rc != -EIO) && (rc != -EREMOTE) &&
+ (rc != -EOPNOTSUPP)) /* path not found or net err */
+ goto out;
+ /* else fallthrough to retry open the old way on network i/o
+ or DFS errors */
+ }
+
desiredAccess = cifs_convert_flags(file->f_flags);
/*********************************************************************
@@ -248,11 +389,6 @@ int cifs_open(struct inode *inode, struct file *file)
disposition = cifs_get_disposition(file->f_flags);
- if (oplockEnabled)
- oplock = REQ_OPLOCK;
- else
- oplock = 0;
-
/* BB pass O_SYNC flag through on file attributes .. BB */
/* Also refresh inode by passing in file_info buf returned by SMBOpen
@@ -269,7 +405,7 @@ int cifs_open(struct inode *inode, struct file *file)
}
if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS)
- rc = CIFSSMBOpen(xid, pTcon, full_path, disposition,
+ rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
& CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -278,7 +414,7 @@ int cifs_open(struct inode *inode, struct file *file)
if (rc == -EIO) {
/* Old server, try legacy style OpenX */
- rc = SMBLegacyOpen(xid, pTcon, full_path, disposition,
+ rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
& CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -295,12 +431,12 @@ int cifs_open(struct inode *inode, struct file *file)
}
pCifsFile = cifs_init_private(file->private_data, inode, file, netfid);
write_lock(&GlobalSMBSeslock);
- list_add(&pCifsFile->tlist, &pTcon->openFileList);
+ list_add(&pCifsFile->tlist, &tcon->openFileList);
pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
if (pCifsInode) {
rc = cifs_open_inode_helper(inode, file, pCifsInode,
- pCifsFile, pTcon,
+ pCifsFile, tcon,
&oplock, buf, full_path, xid);
} else {
write_unlock(&GlobalSMBSeslock);
@@ -309,7 +445,7 @@ int cifs_open(struct inode *inode, struct file *file)
if (oplock & CIFS_CREATE_ACTION) {
/* time to set mode which we can not set earlier due to
problems creating new read-only files */
- if (pTcon->unix_ext) {
+ if (tcon->unix_ext) {
struct cifs_unix_set_info_args args = {
.mode = inode->i_mode,
.uid = NO_CHANGE_64,
@@ -319,7 +455,7 @@ int cifs_open(struct inode *inode, struct file *file)
.mtime = NO_CHANGE_64,
.device = 0,
};
- CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
+ CIFSSMBUnixSetInfo(xid, tcon, full_path, &args,
cifs_sb->local_nls,
cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -349,7 +485,7 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
int rc = -EACCES;
int xid, oplock;
struct cifs_sb_info *cifs_sb;
- struct cifsTconInfo *pTcon;
+ struct cifsTconInfo *tcon;
struct cifsFileInfo *pCifsFile;
struct cifsInodeInfo *pCifsInode;
struct inode *inode;
@@ -387,7 +523,7 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
}
cifs_sb = CIFS_SB(inode->i_sb);
- pTcon = cifs_sb->tcon;
+ tcon = cifs_sb->tcon;
/* can not grab rename sem here because various ops, including
those that already have the rename sem can end up causing writepage
@@ -404,20 +540,37 @@ reopen_error_exit:
cFYI(1, ("inode = 0x%p file flags 0x%x for %s",
inode, file->f_flags, full_path));
- desiredAccess = cifs_convert_flags(file->f_flags);
if (oplockEnabled)
oplock = REQ_OPLOCK;
else
oplock = 0;
+ if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
+ (CIFS_UNIX_POSIX_PATH_OPS_CAP &
+ le64_to_cpu(tcon->fsUnixInfo.Capability))) {
+ int oflags = (int) cifs_posix_convert_flags(file->f_flags);
+ /* can not refresh inode info since size could be stale */
+ rc = cifs_posix_open(full_path, NULL, inode->i_sb,
+ cifs_sb->mnt_file_mode /* ignored */,
+ oflags, &oplock, &netfid, xid);
+ if (rc == 0) {
+ cFYI(1, ("posix reopen succeeded"));
+ goto reopen_success;
+ }
+ /* fallthrough to retry open the old way on errors, especially
+ in the reconnect path it is important to retry hard */
+ }
+
+ desiredAccess = cifs_convert_flags(file->f_flags);
+
/* Can not refresh inode by passing in file_info buf to be returned
by SMBOpen and then calling get_inode_info with returned buf
since file might have write behind data that needs to be flushed
and server version of file size can be stale. If we knew for sure
that inode was not dirty locally we could do this */
- rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, desiredAccess,
+ rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
CREATE_NOT_DIR, &netfid, &oplock, NULL,
cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -426,6 +579,7 @@ reopen_error_exit:
cFYI(1, ("cifs_open returned 0x%x", rc));
cFYI(1, ("oplock: %d", oplock));
} else {
+reopen_success:
pCifsFile->netfid = netfid;
pCifsFile->invalidHandle = false;
up(&pCifsFile->fh_sem);
@@ -439,7 +593,7 @@ reopen_error_exit:
go to server to get inode info */
pCifsInode->clientCanCacheAll = false;
pCifsInode->clientCanCacheRead = false;
- if (pTcon->unix_ext)
+ if (tcon->unix_ext)
rc = cifs_get_inode_info_unix(&inode,
full_path, inode->i_sb, xid);
else
@@ -467,7 +621,6 @@ reopen_error_exit:
cifs_relock_file(pCifsFile);
}
}
-
kfree(full_path);
FreeXid(xid);
return rc;
@@ -1523,6 +1676,9 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
{
int xid;
int rc = 0;
+ struct cifsTconInfo *tcon;
+ struct cifsFileInfo *smbfile =
+ (struct cifsFileInfo *)file->private_data;
struct inode *inode = file->f_path.dentry->d_inode;
xid = GetXid();
@@ -1534,7 +1690,12 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
if (rc == 0) {
rc = CIFS_I(inode)->write_behind_rc;
CIFS_I(inode)->write_behind_rc = 0;
+ tcon = CIFS_SB(inode->i_sb)->tcon;
+ if (!rc && tcon && smbfile &&
+ !(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
+ rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
}
+
FreeXid(xid);
return rc;
}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 4690a360c855..a8797cc60805 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -763,6 +763,9 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid,
struct cifsTconInfo *pTcon = cifs_sb->tcon;
FILE_BASIC_INFO info_buf;
+ if (attrs == NULL)
+ return -EINVAL;
+
if (attrs->ia_valid & ATTR_ATIME) {
set_time = true;
info_buf.LastAccessTime =
diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h
new file mode 100644
index 000000000000..7056b891e087
--- /dev/null
+++ b/fs/cifs/smbfsctl.h
@@ -0,0 +1,84 @@
+/*
+ * fs/cifs/smbfsctl.h: SMB, CIFS, SMB2 FSCTL definitions
+ *
+ * Copyright (c) International Business Machines Corp., 2002,2009
+ * Author(s): Steve French (sfrench@us.ibm.com)
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* IOCTL information */
+/*
+ * List of ioctl/fsctl function codes that are or could be useful in the
+ * future to remote clients like cifs or SMB2 client. There is probably
+ * a slightly larger set of fsctls that NTFS local filesystem could handle,
+ * including the seven below that we do not have struct definitions for.
+ * Even with protocol definitions for most of these now available, we still
+ * need to do some experimentation to identify which are practical to do
+ * remotely. Some of the following, such as the encryption/compression ones
+ * could be invoked from tools via a specialized hook into the VFS rather
+ * than via the standard vfs entry points
+ */
+#define FSCTL_REQUEST_OPLOCK_LEVEL_1 0x00090000
+#define FSCTL_REQUEST_OPLOCK_LEVEL_2 0x00090004
+#define FSCTL_REQUEST_BATCH_OPLOCK 0x00090008
+#define FSCTL_LOCK_VOLUME 0x00090018
+#define FSCTL_UNLOCK_VOLUME 0x0009001C
+#define FSCTL_IS_PATHNAME_VALID 0x0009002C /* BB add struct */
+#define FSCTL_GET_COMPRESSION 0x0009003C /* BB add struct */
+#define FSCTL_SET_COMPRESSION 0x0009C040 /* BB add struct */
+#define FSCTL_QUERY_FAT_BPB 0x00090058 /* BB add struct */
+/* Verify the next FSCTL number, we had it as 0x00090090 before */
+#define FSCTL_FILESYSTEM_GET_STATS 0x00090060 /* BB add struct */
+#define FSCTL_GET_NTFS_VOLUME_DATA 0x00090064 /* BB add struct */
+#define FSCTL_GET_RETRIEVAL_POINTERS 0x00090073 /* BB add struct */
+#define FSCTL_IS_VOLUME_DIRTY 0x00090078 /* BB add struct */
+#define FSCTL_ALLOW_EXTENDED_DASD_IO 0x00090083 /* BB add struct */
+#define FSCTL_REQUEST_FILTER_OPLOCK 0x0009008C
+#define FSCTL_FIND_FILES_BY_SID 0x0009008F /* BB add struct */
+#define FSCTL_SET_OBJECT_ID 0x00090098 /* BB add struct */
+#define FSCTL_GET_OBJECT_ID 0x0009009C /* BB add struct */
+#define FSCTL_DELETE_OBJECT_ID 0x000900A0 /* BB add struct */
+#define FSCTL_SET_REPARSE_POINT 0x000900A4 /* BB add struct */
+#define FSCTL_GET_REPARSE_POINT 0x000900A8 /* BB add struct */
+#define FSCTL_DELETE_REPARSE_POINT 0x000900AC /* BB add struct */
+#define FSCTL_SET_OBJECT_ID_EXTENDED 0x000900BC /* BB add struct */
+#define FSCTL_CREATE_OR_GET_OBJECT_ID 0x000900C0 /* BB add struct */
+#define FSCTL_SET_SPARSE 0x000900C4 /* BB add struct */
+#define FSCTL_SET_ZERO_DATA 0x000900C8 /* BB add struct */
+#define FSCTL_SET_ENCRYPTION 0x000900D7 /* BB add struct */
+#define FSCTL_ENCRYPTION_FSCTL_IO 0x000900DB /* BB add struct */
+#define FSCTL_WRITE_RAW_ENCRYPTED 0x000900DF /* BB add struct */
+#define FSCTL_READ_RAW_ENCRYPTED 0x000900E3 /* BB add struct */
+#define FSCTL_READ_FILE_USN_DATA 0x000900EB /* BB add struct */
+#define FSCTL_WRITE_USN_CLOSE_RECORD 0x000900EF /* BB add struct */
+#define FSCTL_SIS_COPYFILE 0x00090100 /* BB add struct */
+#define FSCTL_RECALL_FILE 0x00090117 /* BB add struct */
+#define FSCTL_QUERY_SPARING_INFO 0x00090138 /* BB add struct */
+#define FSCTL_SET_ZERO_ON_DEALLOC 0x00090194 /* BB add struct */
+#define FSCTL_SET_SHORT_NAME_BEHAVIOR 0x000901B4 /* BB add struct */
+#define FSCTL_QUERY_ALLOCATED_RANGES 0x000940CF /* BB add struct */
+#define FSCTL_SET_DEFECT_MANAGEMENT 0x00098134 /* BB add struct */
+#define FSCTL_SIS_LINK_FILES 0x0009C104
+#define FSCTL_PIPE_PEEK 0x0011400C /* BB add struct */
+#define FSCTL_PIPE_TRANSCEIVE 0x0011C017 /* BB add struct */
+/* strange that the number for this op is not sequential with previous op */
+#define FSCTL_PIPE_WAIT 0x00110018 /* BB add struct */
+#define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */
+#define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */
+
+#define IO_REPARSE_TAG_MOUNT_POINT 0xA0000003
+#define IO_REPARSE_TAG_HSM 0xC0000004
+#define IO_REPARSE_TAG_SIS 0x80000007
diff --git a/fs/compat.c b/fs/compat.c
index d0145ca27572..0949b43794a4 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1402,6 +1402,7 @@ int compat_do_execve(char * filename,
retval = mutex_lock_interruptible(&current->cred_exec_mutex);
if (retval < 0)
goto out_free;
+ current->in_execve = 1;
retval = -ENOMEM;
bprm->cred = prepare_exec_creds();
@@ -1454,6 +1455,7 @@ int compat_do_execve(char * filename,
goto out;
/* execve succeeded */
+ current->in_execve = 0;
mutex_unlock(&current->cred_exec_mutex);
acct_update_integrals(current);
free_bprm(bprm);
@@ -1470,6 +1472,7 @@ out_file:
}
out_unlock:
+ current->in_execve = 0;
mutex_unlock(&current->cred_exec_mutex);
out_free:
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 45e59d3c7f1f..ff786687e93b 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -522,6 +522,11 @@ static int dev_ifsioc(unsigned int fd, unsigned int cmd, unsigned long arg)
if (err)
return -EFAULT;
break;
+ case SIOCSHWTSTAMP:
+ if (copy_from_user(&ifr, uifr32, sizeof(*uifr32)))
+ return -EFAULT;
+ ifr.ifr_data = compat_ptr(uifr32->ifr_ifru.ifru_data);
+ break;
default:
if (copy_from_user(&ifr, uifr32, sizeof(*uifr32)))
return -EFAULT;
@@ -1993,6 +1998,8 @@ COMPATIBLE_IOCTL(TUNSETGROUP)
COMPATIBLE_IOCTL(TUNGETFEATURES)
COMPATIBLE_IOCTL(TUNSETOFFLOAD)
COMPATIBLE_IOCTL(TUNSETTXFILTER)
+COMPATIBLE_IOCTL(TUNGETSNDBUF)
+COMPATIBLE_IOCTL(TUNSETSNDBUF)
/* Big V */
COMPATIBLE_IOCTL(VT_SETMODE)
COMPATIBLE_IOCTL(VT_GETMODE)
@@ -2566,6 +2573,7 @@ HANDLE_IOCTL(SIOCSIFMAP, dev_ifsioc)
HANDLE_IOCTL(SIOCGIFADDR, dev_ifsioc)
HANDLE_IOCTL(SIOCSIFADDR, dev_ifsioc)
HANDLE_IOCTL(SIOCSIFHWBROADCAST, dev_ifsioc)
+HANDLE_IOCTL(SIOCSHWTSTAMP, dev_ifsioc)
/* ioctls used by appletalk ddp.c */
HANDLE_IOCTL(SIOCATALKDIFADDR, dev_ifsioc)
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 5f3231b9633f..bff4052b05e7 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -198,9 +198,6 @@ static int mknod_ptmx(struct super_block *sb)
fsi->ptmx_dentry = dentry;
rc = 0;
-
- printk(KERN_DEBUG "Created ptmx node in devpts ino %lu\n",
- inode->i_ino);
out:
mutex_unlock(&root->d_inode->i_mutex);
return rc;
@@ -369,8 +366,6 @@ static int new_pts_mount(struct file_system_type *fs_type, int flags,
struct pts_fs_info *fsi;
struct pts_mount_opts *opts;
- printk(KERN_NOTICE "devpts: newinstance mount\n");
-
err = get_sb_nodev(fs_type, flags, data, devpts_fill_super, mnt);
if (err)
return err;
diff --git a/fs/dquot.c b/fs/dquot.c
index bca3cac4bee7..d6add0bf5ad3 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1057,10 +1057,7 @@ static void send_warning(const struct dquot *dquot, const char warntype)
goto attr_err_out;
genlmsg_end(skb, msg_head);
- ret = genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
- if (ret < 0 && ret != -ESRCH)
- printk(KERN_ERR
- "VFS: Failed to send notification message: %d\n", ret);
+ genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
return;
attr_err_out:
printk(KERN_ERR "VFS: Not enough space to compose quota message!\n");
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index f6caeb1d1106..8b65f289ee00 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -946,6 +946,8 @@ static int ecryptfs_copy_mount_wide_sigs_to_inode_sigs(
list_for_each_entry(global_auth_tok,
&mount_crypt_stat->global_auth_tok_list,
mount_crypt_stat_list) {
+ if (global_auth_tok->flags & ECRYPTFS_AUTH_TOK_FNEK)
+ continue;
rc = ecryptfs_add_keysig(crypt_stat, global_auth_tok->sig);
if (rc) {
printk(KERN_ERR "Error adding keysig; rc = [%d]\n", rc);
@@ -1322,14 +1324,13 @@ static int ecryptfs_write_headers_virt(char *page_virt, size_t max,
}
static int
-ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat,
- struct dentry *ecryptfs_dentry,
- char *virt)
+ecryptfs_write_metadata_to_contents(struct dentry *ecryptfs_dentry,
+ char *virt, size_t virt_len)
{
int rc;
rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, virt,
- 0, crypt_stat->num_header_bytes_at_front);
+ 0, virt_len);
if (rc)
printk(KERN_ERR "%s: Error attempting to write header "
"information to lower file; rc = [%d]\n", __func__,
@@ -1339,7 +1340,6 @@ ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat,
static int
ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry,
- struct ecryptfs_crypt_stat *crypt_stat,
char *page_virt, size_t size)
{
int rc;
@@ -1349,6 +1349,17 @@ ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry,
return rc;
}
+static unsigned long ecryptfs_get_zeroed_pages(gfp_t gfp_mask,
+ unsigned int order)
+{
+ struct page *page;
+
+ page = alloc_pages(gfp_mask | __GFP_ZERO, order);
+ if (page)
+ return (unsigned long) page_address(page);
+ return 0;
+}
+
/**
* ecryptfs_write_metadata
* @ecryptfs_dentry: The eCryptfs dentry
@@ -1365,7 +1376,9 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
{
struct ecryptfs_crypt_stat *crypt_stat =
&ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
+ unsigned int order;
char *virt;
+ size_t virt_len;
size_t size = 0;
int rc = 0;
@@ -1381,33 +1394,35 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
rc = -EINVAL;
goto out;
}
+ virt_len = crypt_stat->num_header_bytes_at_front;
+ order = get_order(virt_len);
/* Released in this function */
- virt = (char *)get_zeroed_page(GFP_KERNEL);
+ virt = (char *)ecryptfs_get_zeroed_pages(GFP_KERNEL, order);
if (!virt) {
printk(KERN_ERR "%s: Out of memory\n", __func__);
rc = -ENOMEM;
goto out;
}
- rc = ecryptfs_write_headers_virt(virt, PAGE_CACHE_SIZE, &size,
- crypt_stat, ecryptfs_dentry);
+ rc = ecryptfs_write_headers_virt(virt, virt_len, &size, crypt_stat,
+ ecryptfs_dentry);
if (unlikely(rc)) {
printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n",
__func__, rc);
goto out_free;
}
if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
- rc = ecryptfs_write_metadata_to_xattr(ecryptfs_dentry,
- crypt_stat, virt, size);
+ rc = ecryptfs_write_metadata_to_xattr(ecryptfs_dentry, virt,
+ size);
else
- rc = ecryptfs_write_metadata_to_contents(crypt_stat,
- ecryptfs_dentry, virt);
+ rc = ecryptfs_write_metadata_to_contents(ecryptfs_dentry, virt,
+ virt_len);
if (rc) {
printk(KERN_ERR "%s: Error writing metadata out to lower file; "
"rc = [%d]\n", __func__, rc);
goto out_free;
}
out_free:
- free_page((unsigned long)virt);
+ free_pages((unsigned long)virt, order);
out:
return rc;
}
@@ -2206,17 +2221,19 @@ int ecryptfs_decode_and_decrypt_filename(char **plaintext_name,
struct dentry *ecryptfs_dir_dentry,
const char *name, size_t name_size)
{
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
+ &ecryptfs_superblock_to_private(
+ ecryptfs_dir_dentry->d_sb)->mount_crypt_stat;
char *decoded_name;
size_t decoded_name_size;
size_t packet_size;
int rc = 0;
- if ((name_size > ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE)
+ if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)
+ && !(mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
+ && (name_size > ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE)
&& (strncmp(name, ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX,
ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE) == 0)) {
- struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
- &ecryptfs_superblock_to_private(
- ecryptfs_dir_dentry->d_sb)->mount_crypt_stat;
const char *orig_name = name;
size_t orig_name_size = name_size;
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index c11fc95714ab..ac749d4d644f 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -328,6 +328,7 @@ struct ecryptfs_dentry_info {
*/
struct ecryptfs_global_auth_tok {
#define ECRYPTFS_AUTH_TOK_INVALID 0x00000001
+#define ECRYPTFS_AUTH_TOK_FNEK 0x00000002
u32 flags;
struct list_head mount_crypt_stat_list;
struct key *global_auth_tok_key;
@@ -619,7 +620,6 @@ int ecryptfs_interpose(struct dentry *hidden_dentry,
u32 flags);
int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
struct dentry *lower_dentry,
- struct ecryptfs_crypt_stat *crypt_stat,
struct inode *ecryptfs_dir_inode,
struct nameidata *ecryptfs_nd);
int ecryptfs_decode_and_decrypt_filename(char **decrypted_name,
@@ -696,7 +696,7 @@ ecryptfs_write_header_metadata(char *virt,
int ecryptfs_add_keysig(struct ecryptfs_crypt_stat *crypt_stat, char *sig);
int
ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
- char *sig);
+ char *sig, u32 global_auth_tok_flags);
int ecryptfs_get_global_auth_tok_for_sig(
struct ecryptfs_global_auth_tok **global_auth_tok,
struct ecryptfs_mount_crypt_stat *mount_crypt_stat, char *sig);
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 5697899a168d..55b3145b8072 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -246,7 +246,6 @@ out:
*/
int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
struct dentry *lower_dentry,
- struct ecryptfs_crypt_stat *crypt_stat,
struct inode *ecryptfs_dir_inode,
struct nameidata *ecryptfs_nd)
{
@@ -254,6 +253,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
struct vfsmount *lower_mnt;
struct inode *lower_inode;
struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
+ struct ecryptfs_crypt_stat *crypt_stat;
char *page_virt = NULL;
u64 file_size;
int rc = 0;
@@ -314,6 +314,11 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
goto out_free_kmem;
}
}
+ crypt_stat = &ecryptfs_inode_to_private(
+ ecryptfs_dentry->d_inode)->crypt_stat;
+ /* TODO: lock for crypt_stat comparison */
+ if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED))
+ ecryptfs_set_default_sizes(crypt_stat);
rc = ecryptfs_read_and_validate_header_region(page_virt,
ecryptfs_dentry->d_inode);
if (rc) {
@@ -362,9 +367,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
{
char *encrypted_and_encoded_name = NULL;
size_t encrypted_and_encoded_name_size;
- struct ecryptfs_crypt_stat *crypt_stat = NULL;
struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL;
- struct ecryptfs_inode_info *inode_info;
struct dentry *lower_dir_dentry, *lower_dentry;
int rc = 0;
@@ -388,26 +391,15 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
}
if (lower_dentry->d_inode)
goto lookup_and_interpose;
- inode_info = ecryptfs_inode_to_private(ecryptfs_dentry->d_inode);
- if (inode_info) {
- crypt_stat = &inode_info->crypt_stat;
- /* TODO: lock for crypt_stat comparison */
- if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED))
- ecryptfs_set_default_sizes(crypt_stat);
- }
- if (crypt_stat)
- mount_crypt_stat = crypt_stat->mount_crypt_stat;
- else
- mount_crypt_stat = &ecryptfs_superblock_to_private(
- ecryptfs_dentry->d_sb)->mount_crypt_stat;
- if (!(crypt_stat && (crypt_stat->flags & ECRYPTFS_ENCRYPT_FILENAMES))
- && !(mount_crypt_stat && (mount_crypt_stat->flags
- & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)))
+ mount_crypt_stat = &ecryptfs_superblock_to_private(
+ ecryptfs_dentry->d_sb)->mount_crypt_stat;
+ if (!(mount_crypt_stat
+ && (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)))
goto lookup_and_interpose;
dput(lower_dentry);
rc = ecryptfs_encrypt_and_encode_filename(
&encrypted_and_encoded_name, &encrypted_and_encoded_name_size,
- crypt_stat, mount_crypt_stat, ecryptfs_dentry->d_name.name,
+ NULL, mount_crypt_stat, ecryptfs_dentry->d_name.name,
ecryptfs_dentry->d_name.len);
if (rc) {
printk(KERN_ERR "%s: Error attempting to encrypt and encode "
@@ -426,7 +418,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
}
lookup_and_interpose:
rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry,
- crypt_stat, ecryptfs_dir_inode,
+ ecryptfs_dir_inode,
ecryptfs_nd);
goto out;
out_d_drop:
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index ff539420cc6f..e4a6223c3145 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -2375,7 +2375,7 @@ struct kmem_cache *ecryptfs_global_auth_tok_cache;
int
ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
- char *sig)
+ char *sig, u32 global_auth_tok_flags)
{
struct ecryptfs_global_auth_tok *new_auth_tok;
int rc = 0;
@@ -2389,6 +2389,7 @@ ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
goto out;
}
memcpy(new_auth_tok->sig, sig, ECRYPTFS_SIG_SIZE_HEX);
+ new_auth_tok->flags = global_auth_tok_flags;
new_auth_tok->sig[ECRYPTFS_SIG_SIZE_HEX] = '\0';
mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex);
list_add(&new_auth_tok->mount_crypt_stat_list,
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 789cf2e1be1e..aed56c25539b 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -319,7 +319,7 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
case ecryptfs_opt_ecryptfs_sig:
sig_src = args[0].from;
rc = ecryptfs_add_global_auth_tok(mount_crypt_stat,
- sig_src);
+ sig_src, 0);
if (rc) {
printk(KERN_ERR "Error attempting to register "
"global sig; rc = [%d]\n", rc);
@@ -370,7 +370,8 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
ECRYPTFS_SIG_SIZE_HEX] = '\0';
rc = ecryptfs_add_global_auth_tok(
mount_crypt_stat,
- mount_crypt_stat->global_default_fnek_sig);
+ mount_crypt_stat->global_default_fnek_sig,
+ ECRYPTFS_AUTH_TOK_FNEK);
if (rc) {
printk(KERN_ERR "Error attempting to register "
"global fnek sig [%s]; rc = [%d]\n",
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 011b9b8c90c6..c5c424f23fd5 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -417,10 +417,10 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
ep_unregister_pollwait(ep, epi);
/* Remove the current item from the list of epoll hooks */
- spin_lock(&file->f_ep_lock);
+ spin_lock(&file->f_lock);
if (ep_is_linked(&epi->fllink))
list_del_init(&epi->fllink);
- spin_unlock(&file->f_ep_lock);
+ spin_unlock(&file->f_lock);
rb_erase(&epi->rbn, &ep->rbr);
@@ -538,7 +538,7 @@ void eventpoll_release_file(struct file *file)
struct epitem *epi;
/*
- * We don't want to get "file->f_ep_lock" because it is not
+ * We don't want to get "file->f_lock" because it is not
* necessary. It is not necessary because we're in the "struct file"
* cleanup path, and this means that noone is using this file anymore.
* So, for example, epoll_ctl() cannot hit here sicne if we reach this
@@ -547,6 +547,8 @@ void eventpoll_release_file(struct file *file)
* will correctly serialize the operation. We do need to acquire
* "ep->mtx" after "epmutex" because ep_remove() requires it when called
* from anywhere but ep_free().
+ *
+ * Besides, ep_remove() acquires the lock, so we can't hold it here.
*/
mutex_lock(&epmutex);
@@ -785,9 +787,9 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
goto error_unregister;
/* Add the current item to the list of active epoll hook for this file */
- spin_lock(&tfile->f_ep_lock);
+ spin_lock(&tfile->f_lock);
list_add_tail(&epi->fllink, &tfile->f_ep_links);
- spin_unlock(&tfile->f_ep_lock);
+ spin_unlock(&tfile->f_lock);
/*
* Add the current item to the RB tree. All RB tree operations are
diff --git a/fs/exec.c b/fs/exec.c
index 929b58004b7e..b9f1c144b7a1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -45,6 +45,7 @@
#include <linux/proc_fs.h>
#include <linux/mount.h>
#include <linux/security.h>
+#include <linux/ima.h>
#include <linux/syscalls.h>
#include <linux/tsacct_kern.h>
#include <linux/cn_proc.h>
@@ -127,6 +128,9 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
MAY_READ | MAY_EXEC | MAY_OPEN);
if (error)
goto exit;
+ error = ima_path_check(&nd.path, MAY_READ | MAY_EXEC | MAY_OPEN);
+ if (error)
+ goto exit;
file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE);
error = PTR_ERR(file);
@@ -674,6 +678,9 @@ struct file *open_exec(const char *name)
err = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_OPEN);
if (err)
goto out_path_put;
+ err = ima_path_check(&nd.path, MAY_EXEC | MAY_OPEN);
+ if (err)
+ goto out_path_put;
file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE);
if (IS_ERR(file))
@@ -1184,6 +1191,9 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
retval = security_bprm_check(bprm);
if (retval)
return retval;
+ retval = ima_bprm_check(bprm);
+ if (retval)
+ return retval;
/* kernel module loader fixup */
/* so we don't try to load run modprobe in kernel space. */
@@ -1284,6 +1294,7 @@ int do_execve(char * filename,
retval = mutex_lock_interruptible(&current->cred_exec_mutex);
if (retval < 0)
goto out_free;
+ current->in_execve = 1;
retval = -ENOMEM;
bprm->cred = prepare_exec_creds();
@@ -1337,6 +1348,7 @@ int do_execve(char * filename,
goto out;
/* execve succeeded */
+ current->in_execve = 0;
mutex_unlock(&current->cred_exec_mutex);
acct_update_integrals(current);
free_bprm(bprm);
@@ -1355,6 +1367,7 @@ out_file:
}
out_unlock:
+ current->in_execve = 0;
mutex_unlock(&current->cred_exec_mutex);
out_free:
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 5fa453b49a64..05e5c2e5c0d7 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1435,6 +1435,10 @@ static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
return 0;
}
+static int buffer_unmapped(handle_t *handle, struct buffer_head *bh)
+{
+ return !buffer_mapped(bh);
+}
/*
* Note that we always start a transaction even if we're not journalling
* data. This is to preserve ordering: any hole instantiation within
@@ -1505,6 +1509,15 @@ static int ext3_ordered_writepage(struct page *page,
if (ext3_journal_current_handle())
goto out_fail;
+ if (!page_has_buffers(page)) {
+ create_empty_buffers(page, inode->i_sb->s_blocksize,
+ (1 << BH_Dirty)|(1 << BH_Uptodate));
+ } else if (!walk_page_buffers(NULL, page_buffers(page), 0, PAGE_CACHE_SIZE, NULL, buffer_unmapped)) {
+ /* Provide NULL instead of get_block so that we catch bugs if buffers weren't really mapped */
+ return block_write_full_page(page, NULL, wbc);
+ }
+ page_bufs = page_buffers(page);
+
handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
if (IS_ERR(handle)) {
@@ -1512,11 +1525,6 @@ static int ext3_ordered_writepage(struct page *page,
goto out_fail;
}
- if (!page_has_buffers(page)) {
- create_empty_buffers(page, inode->i_sb->s_blocksize,
- (1 << BH_Dirty)|(1 << BH_Uptodate));
- }
- page_bufs = page_buffers(page);
walk_page_buffers(handle, page_bufs, 0,
PAGE_CACHE_SIZE, NULL, bget_one);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e2eab196875f..e0aa4fe4f596 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1122,7 +1122,8 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
struct ext4_extent_idx *ix;
struct ext4_extent *ex;
ext4_fsblk_t block;
- int depth, ee_len;
+ int depth; /* Note, NOT eh_depth; depth from top of tree */
+ int ee_len;
BUG_ON(path == NULL);
depth = path->p_depth;
@@ -1179,7 +1180,8 @@ got_index:
if (bh == NULL)
return -EIO;
eh = ext_block_hdr(bh);
- if (ext4_ext_check_header(inode, eh, depth)) {
+ /* subtract from p_depth to get proper eh_depth */
+ if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) {
put_bh(bh);
return -EIO;
}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f18a919be70b..2d2b3585ee91 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -188,7 +188,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
struct ext4_group_desc *gdp;
struct ext4_super_block *es;
struct ext4_sb_info *sbi;
- int fatal = 0, err, count;
+ int fatal = 0, err, count, cleared;
ext4_group_t flex_group;
if (atomic_read(&inode->i_count) > 1) {
@@ -248,8 +248,10 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
goto error_return;
/* Ok, now we can actually update the inode bitmaps.. */
- if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
- bit, bitmap_bh->b_data))
+ spin_lock(sb_bgl_lock(sbi, block_group));
+ cleared = ext4_clear_bit(bit, bitmap_bh->b_data);
+ spin_unlock(sb_bgl_lock(sbi, block_group));
+ if (!cleared)
ext4_error(sb, "ext4_free_inode",
"bit already cleared for inode %lu", ino);
else {
@@ -696,6 +698,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
struct inode *ret;
ext4_group_t i;
int free = 0;
+ static int once = 1;
ext4_group_t flex_group;
/* Cannot create files in a deleted directory */
@@ -717,7 +720,8 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
ret2 = find_group_flex(sb, dir, &group);
if (ret2 == -1) {
ret2 = find_group_other(sb, dir, &group);
- if (ret2 == 0 && printk_ratelimit())
+ if (ret2 == 0 && once)
+ once = 0;
printk(KERN_NOTICE "ext4: find_group_flex "
"failed, fallback succeeded dir %lu\n",
dir->i_ino);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 4415beeb0b62..9f61e62f435f 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1447,7 +1447,7 @@ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
struct ext4_free_extent *gex = &ac->ac_g_ex;
BUG_ON(ex->fe_len <= 0);
- BUG_ON(ex->fe_len >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+ BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
@@ -3292,7 +3292,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
}
BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
start > ac->ac_o_ex.fe_logical);
- BUG_ON(size <= 0 || size >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+ BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
/* now prepare goal request */
@@ -3589,6 +3589,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
struct super_block *sb, struct ext4_prealloc_space *pa)
{
ext4_group_t grp;
+ ext4_fsblk_t grp_blk;
if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
return;
@@ -3603,8 +3604,12 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
pa->pa_deleted = 1;
spin_unlock(&pa->pa_lock);
- /* -1 is to protect from crossing allocation group */
- ext4_get_group_no_and_offset(sb, pa->pa_pstart - 1, &grp, NULL);
+ grp_blk = pa->pa_pstart;
+ /* If linear, pa_pstart may be in the next group when pa is used up */
+ if (pa->pa_linear)
+ grp_blk--;
+
+ ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);
/*
* possible race:
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 6b74d09adbe5..de0004fe6e00 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -202,9 +202,9 @@ static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
sector_t blocknr;
/* fat_get_cluster() assumes the requested blocknr isn't truncated. */
- mutex_lock(&mapping->host->i_mutex);
+ down_read(&mapping->host->i_alloc_sem);
blocknr = generic_block_bmap(mapping, block, fat_get_block);
- mutex_unlock(&mapping->host->i_mutex);
+ up_read(&mapping->host->i_alloc_sem);
return blocknr;
}
diff --git a/fs/fcntl.c b/fs/fcntl.c
index bd215cc791da..d865ca66ccba 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -141,7 +141,7 @@ SYSCALL_DEFINE1(dup, unsigned int, fildes)
return ret;
}
-#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME)
+#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
static int setfl(int fd, struct file * filp, unsigned long arg)
{
@@ -177,21 +177,21 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
return error;
/*
- * We still need a lock here for now to keep multiple FASYNC calls
- * from racing with each other.
+ * ->fasync() is responsible for setting the FASYNC bit.
*/
- lock_kernel();
- if ((arg ^ filp->f_flags) & FASYNC) {
- if (filp->f_op && filp->f_op->fasync) {
- error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
- if (error < 0)
- goto out;
- }
+ if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op &&
+ filp->f_op->fasync) {
+ error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
+ if (error < 0)
+ goto out;
+ if (error > 0)
+ error = 0;
}
-
+ spin_lock(&filp->f_lock);
filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
+ spin_unlock(&filp->f_lock);
+
out:
- unlock_kernel();
return error;
}
@@ -516,7 +516,7 @@ static DEFINE_RWLOCK(fasync_lock);
static struct kmem_cache *fasync_cache __read_mostly;
/*
- * fasync_helper() is used by some character device drivers (mainly mice)
+ * fasync_helper() is used by almost all character device drivers
* to set up the fasync queue. It returns negative on error, 0 if it did
* no changes and positive if it added/deleted the entry.
*/
@@ -555,6 +555,13 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap
result = 1;
}
out:
+ /* Fix up FASYNC bit while still holding fasync_lock */
+ spin_lock(&filp->f_lock);
+ if (on)
+ filp->f_flags |= FASYNC;
+ else
+ filp->f_flags &= ~FASYNC;
+ spin_unlock(&filp->f_lock);
write_unlock_irq(&fasync_lock);
return result;
}
diff --git a/fs/file_table.c b/fs/file_table.c
index bbeeac6efa1a..b74a8e1da913 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/security.h>
+#include <linux/ima.h>
#include <linux/eventpoll.h>
#include <linux/rcupdate.h>
#include <linux/mount.h>
@@ -127,6 +128,7 @@ struct file *get_empty_filp(void)
atomic_long_set(&f->f_count, 1);
rwlock_init(&f->f_owner.lock);
f->f_cred = get_cred(cred);
+ spin_lock_init(&f->f_lock);
eventpoll_init_file(f);
/* f->f_version: 0 */
return f;
@@ -279,6 +281,7 @@ void __fput(struct file *file)
if (file->f_op && file->f_op->release)
file->f_op->release(inode, file);
security_file_free(file);
+ ima_file_free(file);
if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL))
cdev_put(inode->i_cdev);
fops_put(file->f_op);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e5eaa62fd17f..e3fe9918faaf 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -274,6 +274,7 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
int ret;
BUG_ON(inode->i_state & I_SYNC);
+ WARN_ON(inode->i_state & I_NEW);
/* Set I_SYNC, reset I_DIRTY */
dirty = inode->i_state & I_DIRTY;
@@ -298,6 +299,7 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
}
spin_lock(&inode_lock);
+ WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_SYNC;
if (!(inode->i_state & I_FREEING)) {
if (!(inode->i_state & I_DIRTY) &&
@@ -470,6 +472,11 @@ void generic_sync_sb_inodes(struct super_block *sb,
break;
}
+ if (inode->i_state & I_NEW) {
+ requeue_io(inode);
+ continue;
+ }
+
if (wbc->nonblocking && bdi_write_congested(bdi)) {
wbc->encountered_congestion = 1;
if (!sb_is_blkdev_sb(sb))
@@ -531,7 +538,7 @@ void generic_sync_sb_inodes(struct super_block *sb,
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
struct address_space *mapping;
- if (inode->i_state & (I_FREEING|I_WILL_FREE))
+ if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
continue;
mapping = inode->i_mapping;
if (mapping->nrpages == 0)
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index e563a6449811..3a981b7f64ca 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -1,6 +1,10 @@
config GFS2_FS
tristate "GFS2 file system support"
depends on EXPERIMENTAL && (64BIT || LBD)
+ select DLM if GFS2_FS_LOCKING_DLM
+ select CONFIGFS_FS if GFS2_FS_LOCKING_DLM
+ select SYSFS if GFS2_FS_LOCKING_DLM
+ select IP_SCTP if DLM_SCTP
select FS_POSIX_ACL
select CRC32
help
@@ -18,17 +22,16 @@ config GFS2_FS
the locking module below. Documentation and utilities for GFS2 can
be found here: http://sources.redhat.com/cluster
- The "nolock" lock module is now built in to GFS2 by default.
+ The "nolock" lock module is now built in to GFS2 by default. If
+ you want to use the DLM, be sure to enable HOTPLUG and IPv4/6
+ networking.
config GFS2_FS_LOCKING_DLM
- tristate "GFS2 DLM locking module"
- depends on GFS2_FS && SYSFS && NET && INET && (IPV6 || IPV6=n)
- select IP_SCTP if DLM_SCTP
- select CONFIGFS_FS
- select DLM
+ bool "GFS2 DLM locking"
+ depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && HOTPLUG
help
Multiple node locking module for GFS2
- Most users of GFS2 will require this module. It provides the locking
+ Most users of GFS2 will require this. It provides the locking
interface between GFS2 and the DLM, which is required to use GFS2
in a cluster environment.
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index c1b4ec6a9650..a851ea4bdf70 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,9 +1,9 @@
obj-$(CONFIG_GFS2_FS) += gfs2.o
gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \
- glops.o inode.o log.o lops.o locking.o main.o meta_io.o \
+ glops.o inode.o log.o lops.o main.o meta_io.o \
mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
ops_fstype.o ops_inode.o ops_super.o quota.o \
recovery.o rgrp.o super.o sys.o trans.o util.o
-obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += locking/dlm/
+gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index e335dceb6a4f..43764f4fa763 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -15,7 +15,6 @@
#include <linux/posix_acl.h>
#include <linux/posix_acl_xattr.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 11ffc56f1f81..3a5d3f883e10 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -13,7 +13,6 @@
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
-#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index b7c8e5c70791..aef4d0c06748 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -60,7 +60,6 @@
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
#include <linux/vmalloc.h>
-#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c
index f114ba2b3557..dee9b03e5b37 100644
--- a/fs/gfs2/eaops.c
+++ b/fs/gfs2/eaops.c
@@ -14,7 +14,6 @@
#include <linux/capability.h>
#include <linux/xattr.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include <asm/uaccess.h>
#include "gfs2.h"
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index 0d1c76d906ae..899763aed217 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -13,7 +13,6 @@
#include <linux/buffer_head.h>
#include <linux/xattr.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include <asm/uaccess.h>
#include "gfs2.h"
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 6b983aef785d..3984e47d1d33 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -10,7 +10,6 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
-#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/delay.h>
#include <linux/sort.h>
@@ -18,7 +17,6 @@
#include <linux/kallsyms.h>
#include <linux/gfs2_ondisk.h>
#include <linux/list.h>
-#include <linux/lm_interface.h>
#include <linux/wait.h>
#include <linux/module.h>
#include <linux/rwsem.h>
@@ -155,13 +153,10 @@ static void glock_free(struct gfs2_glock *gl)
struct gfs2_sbd *sdp = gl->gl_sbd;
struct inode *aspace = gl->gl_aspace;
- if (sdp->sd_lockstruct.ls_ops->lm_put_lock)
- sdp->sd_lockstruct.ls_ops->lm_put_lock(gl->gl_lock);
-
if (aspace)
gfs2_aspace_put(aspace);
- kmem_cache_free(gfs2_glock_cachep, gl);
+ sdp->sd_lockstruct.ls_ops->lm_put_lock(gfs2_glock_cachep, gl);
}
/**
@@ -172,6 +167,7 @@ static void glock_free(struct gfs2_glock *gl)
static void gfs2_glock_hold(struct gfs2_glock *gl)
{
+ GLOCK_BUG_ON(gl, atomic_read(&gl->gl_ref) == 0);
atomic_inc(&gl->gl_ref);
}
@@ -211,17 +207,15 @@ int gfs2_glock_put(struct gfs2_glock *gl)
atomic_dec(&lru_count);
}
spin_unlock(&lru_lock);
- GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_UNLOCKED);
- GLOCK_BUG_ON(gl, !list_empty(&gl->gl_lru));
GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
glock_free(gl);
rv = 1;
goto out;
}
- write_unlock(gl_lock_addr(gl->gl_hash));
/* 1 for being hashed, 1 for having state != LM_ST_UNLOCKED */
if (atomic_read(&gl->gl_ref) == 2)
gfs2_glock_schedule_for_reclaim(gl);
+ write_unlock(gl_lock_addr(gl->gl_hash));
out:
return rv;
}
@@ -256,27 +250,6 @@ static struct gfs2_glock *search_bucket(unsigned int hash,
}
/**
- * gfs2_glock_find() - Find glock by lock number
- * @sdp: The GFS2 superblock
- * @name: The lock name
- *
- * Returns: NULL, or the struct gfs2_glock with the requested number
- */
-
-static struct gfs2_glock *gfs2_glock_find(const struct gfs2_sbd *sdp,
- const struct lm_lockname *name)
-{
- unsigned int hash = gl_hash(sdp, name);
- struct gfs2_glock *gl;
-
- read_lock(gl_lock_addr(hash));
- gl = search_bucket(hash, sdp, name);
- read_unlock(gl_lock_addr(hash));
-
- return gl;
-}
-
-/**
* may_grant - check if its ok to grant a new lock
* @gl: The glock
* @gh: The lock request which we wish to grant
@@ -523,7 +496,7 @@ out_locked:
}
static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
- unsigned int cur_state, unsigned int req_state,
+ unsigned int req_state,
unsigned int flags)
{
int ret = LM_OUT_ERROR;
@@ -532,7 +505,7 @@ static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
return req_state == LM_ST_UNLOCKED ? 0 : req_state;
if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
- ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state,
+ ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock,
req_state, flags);
return ret;
}
@@ -575,7 +548,7 @@ __acquires(&gl->gl_spin)
gl->gl_state == LM_ST_DEFERRED) &&
!(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
lck_flags |= LM_FLAG_TRY_1CB;
- ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, target, lck_flags);
+ ret = gfs2_lm_lock(sdp, gl, target, lck_flags);
if (!(ret & LM_OUT_ASYNC)) {
finish_xmote(gl, ret);
@@ -624,10 +597,11 @@ __acquires(&gl->gl_spin)
GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags));
+ down_read(&gfs2_umount_flush_sem);
if (test_bit(GLF_DEMOTE, &gl->gl_flags) &&
gl->gl_demote_state != gl->gl_state) {
if (find_first_holder(gl))
- goto out;
+ goto out_unlock;
if (nonblock)
goto out_sched;
set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
@@ -638,23 +612,26 @@ __acquires(&gl->gl_spin)
gfs2_demote_wake(gl);
ret = do_promote(gl);
if (ret == 0)
- goto out;
+ goto out_unlock;
if (ret == 2)
- return;
+ goto out_sem;
gh = find_first_waiter(gl);
gl->gl_target = gh->gh_state;
if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
do_error(gl, 0); /* Fail queued try locks */
}
do_xmote(gl, gh, gl->gl_target);
+out_sem:
+ up_read(&gfs2_umount_flush_sem);
return;
out_sched:
gfs2_glock_hold(gl);
if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
gfs2_glock_put(gl);
-out:
+out_unlock:
clear_bit(GLF_LOCK, &gl->gl_flags);
+ goto out_sem;
}
static void glock_work_func(struct work_struct *work)
@@ -681,18 +658,6 @@ static void glock_work_func(struct work_struct *work)
gfs2_glock_put(gl);
}
-static int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name,
- void **lockp)
-{
- int error = -EIO;
- if (!sdp->sd_lockstruct.ls_ops->lm_get_lock)
- return 0;
- if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
- error = sdp->sd_lockstruct.ls_ops->lm_get_lock(
- sdp->sd_lockstruct.ls_lockspace, name, lockp);
- return error;
-}
-
/**
* gfs2_glock_get() - Get a glock, or create one if one doesn't exist
* @sdp: The GFS2 superblock
@@ -719,10 +684,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
gl = search_bucket(hash, sdp, &name);
read_unlock(gl_lock_addr(hash));
- if (gl || !create) {
- *glp = gl;
+ *glp = gl;
+ if (gl)
return 0;
- }
+ if (!create)
+ return -ENOENT;
gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL);
if (!gl)
@@ -736,7 +702,9 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
gl->gl_demote_state = LM_ST_EXCLUSIVE;
gl->gl_hash = hash;
gl->gl_ops = glops;
- gl->gl_stamp = jiffies;
+ snprintf(gl->gl_strname, GDLM_STRNAME_BYTES, "%8x%16llx", name.ln_type, (unsigned long long)number);
+ memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
+ gl->gl_lksb.sb_lvbptr = gl->gl_lvb;
gl->gl_tchange = jiffies;
gl->gl_object = NULL;
gl->gl_sbd = sdp;
@@ -753,10 +721,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
}
}
- error = gfs2_lm_get_lock(sdp, &name, &gl->gl_lock);
- if (error)
- goto fail_aspace;
-
write_lock(gl_lock_addr(hash));
tmp = search_bucket(hash, sdp, &name);
if (tmp) {
@@ -772,9 +736,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
return 0;
-fail_aspace:
- if (gl->gl_aspace)
- gfs2_aspace_put(gl->gl_aspace);
fail:
kmem_cache_free(gfs2_glock_cachep, gl);
return error;
@@ -966,7 +927,7 @@ do_cancel:
if (!(gh->gh_flags & LM_FLAG_PRIORITY)) {
spin_unlock(&gl->gl_spin);
if (sdp->sd_lockstruct.ls_ops->lm_cancel)
- sdp->sd_lockstruct.ls_ops->lm_cancel(gl->gl_lock);
+ sdp->sd_lockstruct.ls_ops->lm_cancel(gl);
spin_lock(&gl->gl_spin);
}
return;
@@ -1051,7 +1012,6 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
spin_lock(&gl->gl_spin);
clear_bit(GLF_LOCK, &gl->gl_flags);
}
- gl->gl_stamp = jiffies;
if (list_empty(&gl->gl_holders) &&
!test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
!test_bit(GLF_DEMOTE, &gl->gl_flags))
@@ -1240,70 +1200,13 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
gfs2_glock_dq_uninit(&ghs[x]);
}
-static int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp)
-{
- int error = -EIO;
- if (!sdp->sd_lockstruct.ls_ops->lm_hold_lvb)
- return 0;
- if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
- error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp);
- return error;
-}
-
-/**
- * gfs2_lvb_hold - attach a LVB from a glock
- * @gl: The glock in question
- *
- */
-
-int gfs2_lvb_hold(struct gfs2_glock *gl)
-{
- int error;
-
- if (!atomic_read(&gl->gl_lvb_count)) {
- error = gfs2_lm_hold_lvb(gl->gl_sbd, gl->gl_lock, &gl->gl_lvb);
- if (error)
- return error;
- gfs2_glock_hold(gl);
- }
- atomic_inc(&gl->gl_lvb_count);
-
- return 0;
-}
-
-/**
- * gfs2_lvb_unhold - detach a LVB from a glock
- * @gl: The glock in question
- *
- */
-
-void gfs2_lvb_unhold(struct gfs2_glock *gl)
-{
- struct gfs2_sbd *sdp = gl->gl_sbd;
-
- gfs2_glock_hold(gl);
- gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0);
- if (atomic_dec_and_test(&gl->gl_lvb_count)) {
- if (sdp->sd_lockstruct.ls_ops->lm_unhold_lvb)
- sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(gl->gl_lock, gl->gl_lvb);
- gl->gl_lvb = NULL;
- gfs2_glock_put(gl);
- }
- gfs2_glock_put(gl);
-}
-
-static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
- unsigned int state)
+void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
{
- struct gfs2_glock *gl;
unsigned long delay = 0;
unsigned long holdtime;
unsigned long now = jiffies;
- gl = gfs2_glock_find(sdp, name);
- if (!gl)
- return;
-
+ gfs2_glock_hold(gl);
holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
if (time_before(now, holdtime))
delay = holdtime - now;
@@ -1317,74 +1220,33 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
gfs2_glock_put(gl);
}
-static void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid)
-{
- struct gfs2_jdesc *jd;
-
- spin_lock(&sdp->sd_jindex_spin);
- list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
- if (jd->jd_jid != jid)
- continue;
- jd->jd_dirty = 1;
- break;
- }
- spin_unlock(&sdp->sd_jindex_spin);
-}
-
/**
- * gfs2_glock_cb - Callback used by locking module
- * @sdp: Pointer to the superblock
- * @type: Type of callback
- * @data: Type dependent data pointer
+ * gfs2_glock_complete - Callback used by locking
+ * @gl: Pointer to the glock
+ * @ret: The return value from the dlm
*
- * Called by the locking module when it wants to tell us something.
- * Either we need to drop a lock, one of our ASYNC requests completed, or
- * a journal from another client needs to be recovered.
*/
-void gfs2_glock_cb(void *cb_data, unsigned int type, void *data)
+void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
{
- struct gfs2_sbd *sdp = cb_data;
-
- switch (type) {
- case LM_CB_NEED_E:
- blocking_cb(sdp, data, LM_ST_UNLOCKED);
- return;
-
- case LM_CB_NEED_D:
- blocking_cb(sdp, data, LM_ST_DEFERRED);
- return;
-
- case LM_CB_NEED_S:
- blocking_cb(sdp, data, LM_ST_SHARED);
- return;
-
- case LM_CB_ASYNC: {
- struct lm_async_cb *async = data;
- struct gfs2_glock *gl;
-
- down_read(&gfs2_umount_flush_sem);
- gl = gfs2_glock_find(sdp, &async->lc_name);
- if (gfs2_assert_warn(sdp, gl))
+ struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
+ gl->gl_reply = ret;
+ if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) {
+ struct gfs2_holder *gh;
+ spin_lock(&gl->gl_spin);
+ gh = find_first_waiter(gl);
+ if ((!(gh && (gh->gh_flags & LM_FLAG_NOEXP)) &&
+ (gl->gl_target != LM_ST_UNLOCKED)) ||
+ ((ret & ~LM_OUT_ST_MASK) != 0))
+ set_bit(GLF_FROZEN, &gl->gl_flags);
+ spin_unlock(&gl->gl_spin);
+ if (test_bit(GLF_FROZEN, &gl->gl_flags))
return;
- gl->gl_reply = async->lc_ret;
- set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
- if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
- gfs2_glock_put(gl);
- up_read(&gfs2_umount_flush_sem);
- return;
- }
-
- case LM_CB_NEED_RECOVERY:
- gfs2_jdesc_make_dirty(sdp, *(unsigned int *)data);
- if (sdp->sd_recoverd_process)
- wake_up_process(sdp->sd_recoverd_process);
- return;
-
- default:
- gfs2_assert_warn(sdp, 0);
- return;
}
+ set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
+ gfs2_glock_hold(gl);
+ if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
+ gfs2_glock_put(gl);
}
/**
@@ -1515,6 +1377,25 @@ out:
return has_entries;
}
+
+/**
+ * thaw_glock - thaw out a glock which has an unprocessed reply waiting
+ * @gl: The glock to thaw
+ *
+ * N.B. When we freeze a glock, we leave a ref to the glock outstanding,
+ * so this has to result in the ref count being dropped by one.
+ */
+
+static void thaw_glock(struct gfs2_glock *gl)
+{
+ if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))
+ return;
+ set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
+ gfs2_glock_hold(gl);
+ if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
+ gfs2_glock_put(gl);
+}
+
/**
* clear_glock - look at a glock and see if we can free it from glock cache
* @gl: the glock to look at
@@ -1540,6 +1421,20 @@ static void clear_glock(struct gfs2_glock *gl)
}
/**
+ * gfs2_glock_thaw - Thaw any frozen glocks
+ * @sdp: The super block
+ *
+ */
+
+void gfs2_glock_thaw(struct gfs2_sbd *sdp)
+{
+ unsigned x;
+
+ for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
+ examine_bucket(thaw_glock, sdp, x);
+}
+
+/**
* gfs2_gl_hash_clear - Empty out the glock hash table
* @sdp: the filesystem
* @wait: wait until it's all gone
@@ -1619,7 +1514,7 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags)
if (flags & LM_FLAG_NOEXP)
*p++ = 'e';
if (flags & LM_FLAG_ANY)
- *p++ = 'a';
+ *p++ = 'A';
if (flags & LM_FLAG_PRIORITY)
*p++ = 'p';
if (flags & GL_ASYNC)
@@ -1683,6 +1578,10 @@ static const char *gflags2str(char *buf, const unsigned long *gflags)
*p++ = 'i';
if (test_bit(GLF_REPLY_PENDING, gflags))
*p++ = 'r';
+ if (test_bit(GLF_INITIAL, gflags))
+ *p++ = 'I';
+ if (test_bit(GLF_FROZEN, gflags))
+ *p++ = 'F';
*p = 0;
return buf;
}
@@ -1717,14 +1616,13 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
dtime *= 1000000/HZ; /* demote time in uSec */
if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
dtime = 0;
- gfs2_print_dbg(seq, "G: s:%s n:%u/%llu f:%s t:%s d:%s/%llu l:%d a:%d r:%d\n",
+ gfs2_print_dbg(seq, "G: s:%s n:%u/%llu f:%s t:%s d:%s/%llu a:%d r:%d\n",
state2str(gl->gl_state),
gl->gl_name.ln_type,
(unsigned long long)gl->gl_name.ln_number,
gflags2str(gflags_buf, &gl->gl_flags),
state2str(gl->gl_target),
state2str(gl->gl_demote_state), dtime,
- atomic_read(&gl->gl_lvb_count),
atomic_read(&gl->gl_ail_count),
atomic_read(&gl->gl_ref));
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 543ec7ecfbda..a602a28f6f08 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -11,15 +11,130 @@
#define __GLOCK_DOT_H__
#include <linux/sched.h>
+#include <linux/parser.h>
#include "incore.h"
-/* Flags for lock requests; used in gfs2_holder gh_flag field.
- From lm_interface.h:
+/* Options for hostdata parser */
+
+enum {
+ Opt_jid,
+ Opt_id,
+ Opt_first,
+ Opt_nodir,
+ Opt_err,
+};
+
+/*
+ * lm_lockname types
+ */
+
+#define LM_TYPE_RESERVED 0x00
+#define LM_TYPE_NONDISK 0x01
+#define LM_TYPE_INODE 0x02
+#define LM_TYPE_RGRP 0x03
+#define LM_TYPE_META 0x04
+#define LM_TYPE_IOPEN 0x05
+#define LM_TYPE_FLOCK 0x06
+#define LM_TYPE_PLOCK 0x07
+#define LM_TYPE_QUOTA 0x08
+#define LM_TYPE_JOURNAL 0x09
+
+/*
+ * lm_lock() states
+ *
+ * SHARED is compatible with SHARED, not with DEFERRED or EX.
+ * DEFERRED is compatible with DEFERRED, not with SHARED or EX.
+ */
+
+#define LM_ST_UNLOCKED 0
+#define LM_ST_EXCLUSIVE 1
+#define LM_ST_DEFERRED 2
+#define LM_ST_SHARED 3
+
+/*
+ * lm_lock() flags
+ *
+ * LM_FLAG_TRY
+ * Don't wait to acquire the lock if it can't be granted immediately.
+ *
+ * LM_FLAG_TRY_1CB
+ * Send one blocking callback if TRY is set and the lock is not granted.
+ *
+ * LM_FLAG_NOEXP
+ * GFS sets this flag on lock requests it makes while doing journal recovery.
+ * These special requests should not be blocked due to the recovery like
+ * ordinary locks would be.
+ *
+ * LM_FLAG_ANY
+ * A SHARED request may also be granted in DEFERRED, or a DEFERRED request may
+ * also be granted in SHARED. The preferred state is whichever is compatible
+ * with other granted locks, or the specified state if no other locks exist.
+ *
+ * LM_FLAG_PRIORITY
+ * Override fairness considerations. Suppose a lock is held in a shared state
+ * and there is a pending request for the deferred state. A shared lock
+ * request with the priority flag would be allowed to bypass the deferred
+ * request and directly join the other shared lock. A shared lock request
+ * without the priority flag might be forced to wait until the deferred
+ * requested had acquired and released the lock.
+ */
+
#define LM_FLAG_TRY 0x00000001
#define LM_FLAG_TRY_1CB 0x00000002
#define LM_FLAG_NOEXP 0x00000004
#define LM_FLAG_ANY 0x00000008
-#define LM_FLAG_PRIORITY 0x00000010 */
+#define LM_FLAG_PRIORITY 0x00000010
+#define GL_ASYNC 0x00000040
+#define GL_EXACT 0x00000080
+#define GL_SKIP 0x00000100
+#define GL_ATIME 0x00000200
+#define GL_NOCACHE 0x00000400
+
+/*
+ * lm_lock() and lm_async_cb return flags
+ *
+ * LM_OUT_ST_MASK
+ * Masks the lower two bits of lock state in the returned value.
+ *
+ * LM_OUT_CANCELED
+ * The lock request was canceled.
+ *
+ * LM_OUT_ASYNC
+ * The result of the request will be returned in an LM_CB_ASYNC callback.
+ *
+ */
+
+#define LM_OUT_ST_MASK 0x00000003
+#define LM_OUT_CANCELED 0x00000008
+#define LM_OUT_ASYNC 0x00000080
+#define LM_OUT_ERROR 0x00000100
+
+/*
+ * lm_recovery_done() messages
+ */
+
+#define LM_RD_GAVEUP 308
+#define LM_RD_SUCCESS 309
+
+#define GLR_TRYFAILED 13
+
+struct lm_lockops {
+ const char *lm_proto_name;
+ int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
+ void (*lm_unmount) (struct gfs2_sbd *sdp);
+ void (*lm_withdraw) (struct gfs2_sbd *sdp);
+ void (*lm_put_lock) (struct kmem_cache *cachep, void *gl);
+ unsigned int (*lm_lock) (struct gfs2_glock *gl,
+ unsigned int req_state, unsigned int flags);
+ void (*lm_cancel) (struct gfs2_glock *gl);
+ const match_table_t *lm_tokens;
+};
+
+#define LM_FLAG_TRY 0x00000001
+#define LM_FLAG_TRY_1CB 0x00000002
+#define LM_FLAG_NOEXP 0x00000004
+#define LM_FLAG_ANY 0x00000008
+#define LM_FLAG_PRIORITY 0x00000010
#define GL_ASYNC 0x00000040
#define GL_EXACT 0x00000080
@@ -128,10 +243,12 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
int gfs2_lvb_hold(struct gfs2_glock *gl);
void gfs2_lvb_unhold(struct gfs2_glock *gl);
-void gfs2_glock_cb(void *cb_data, unsigned int type, void *data);
+void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
+void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
+void gfs2_glock_thaw(struct gfs2_sbd *sdp);
int __init gfs2_glock_init(void);
void gfs2_glock_exit(void);
@@ -141,4 +258,6 @@ void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
int gfs2_register_debugfs(void);
void gfs2_unregister_debugfs(void);
+extern const struct lm_lockops gfs2_dlm_ops;
+
#endif /* __GLOCK_DOT_H__ */
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 8522d3aa64fc..bf23a62aa925 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -12,7 +12,6 @@
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include <linux/bio.h>
#include "gfs2.h"
@@ -38,20 +37,25 @@
static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
- unsigned int blocks;
struct list_head *head = &gl->gl_ail_list;
struct gfs2_bufdata *bd;
struct buffer_head *bh;
- int error;
+ struct gfs2_trans tr;
- blocks = atomic_read(&gl->gl_ail_count);
- if (!blocks)
- return;
+ memset(&tr, 0, sizeof(tr));
+ tr.tr_revokes = atomic_read(&gl->gl_ail_count);
- error = gfs2_trans_begin(sdp, 0, blocks);
- if (gfs2_assert_withdraw(sdp, !error))
+ if (!tr.tr_revokes)
return;
+ /* A shortened, inline version of gfs2_trans_begin() */
+ tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
+ tr.tr_ip = (unsigned long)__builtin_return_address(0);
+ INIT_LIST_HEAD(&tr.tr_list_buf);
+ gfs2_log_reserve(sdp, tr.tr_reserved);
+ BUG_ON(current->journal_info);
+ current->journal_info = &tr;
+
gfs2_log_lock(sdp);
while (!list_empty(head)) {
bd = list_entry(head->next, struct gfs2_bufdata,
@@ -72,29 +76,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
}
/**
- * gfs2_pte_inval - Sync and invalidate all PTEs associated with a glock
- * @gl: the glock
- *
- */
-
-static void gfs2_pte_inval(struct gfs2_glock *gl)
-{
- struct gfs2_inode *ip;
- struct inode *inode;
-
- ip = gl->gl_object;
- inode = &ip->i_inode;
- if (!ip || !S_ISREG(inode->i_mode))
- return;
-
- unmap_shared_mapping_range(inode->i_mapping, 0, 0);
- if (test_bit(GIF_SW_PAGED, &ip->i_flags))
- set_bit(GLF_DIRTY, &gl->gl_flags);
-
-}
-
-/**
- * meta_go_sync - sync out the metadata for this glock
+ * rgrp_go_sync - sync out the metadata for this glock
* @gl: the glock
*
* Called when demoting or unlocking an EX glock. We must flush
@@ -102,36 +84,42 @@ static void gfs2_pte_inval(struct gfs2_glock *gl)
* not return to caller to demote/unlock the glock until I/O is complete.
*/
-static void meta_go_sync(struct gfs2_glock *gl)
+static void rgrp_go_sync(struct gfs2_glock *gl)
{
- if (gl->gl_state != LM_ST_EXCLUSIVE)
+ struct address_space *metamapping = gl->gl_aspace->i_mapping;
+ int error;
+
+ if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
return;
+ BUG_ON(gl->gl_state != LM_ST_EXCLUSIVE);
- if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) {
- gfs2_log_flush(gl->gl_sbd, gl);
- gfs2_meta_sync(gl);
- gfs2_ail_empty_gl(gl);
- }
+ gfs2_log_flush(gl->gl_sbd, gl);
+ filemap_fdatawrite(metamapping);
+ error = filemap_fdatawait(metamapping);
+ mapping_set_error(metamapping, error);
+ gfs2_ail_empty_gl(gl);
}
/**
- * meta_go_inval - invalidate the metadata for this glock
+ * rgrp_go_inval - invalidate the metadata for this glock
* @gl: the glock
* @flags:
*
+ * We never used LM_ST_DEFERRED with resource groups, so that we
+ * should always see the metadata flag set here.
+ *
*/
-static void meta_go_inval(struct gfs2_glock *gl, int flags)
+static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
{
- if (!(flags & DIO_METADATA))
- return;
+ struct address_space *mapping = gl->gl_aspace->i_mapping;
- gfs2_meta_inval(gl);
- if (gl->gl_object == GFS2_I(gl->gl_sbd->sd_rindex))
- gl->gl_sbd->sd_rindex_uptodate = 0;
- else if (gl->gl_ops == &gfs2_rgrp_glops && gl->gl_object) {
- struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object;
+ BUG_ON(!(flags & DIO_METADATA));
+ gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count));
+ truncate_inode_pages(mapping, 0);
+ if (gl->gl_object) {
+ struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object;
rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
}
}
@@ -148,48 +136,54 @@ static void inode_go_sync(struct gfs2_glock *gl)
struct address_space *metamapping = gl->gl_aspace->i_mapping;
int error;
- if (gl->gl_state != LM_ST_UNLOCKED)
- gfs2_pte_inval(gl);
- if (gl->gl_state != LM_ST_EXCLUSIVE)
- return;
-
if (ip && !S_ISREG(ip->i_inode.i_mode))
ip = NULL;
+ if (ip && test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
+ unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0);
+ if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
+ return;
- if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
- gfs2_log_flush(gl->gl_sbd, gl);
- filemap_fdatawrite(metamapping);
- if (ip) {
- struct address_space *mapping = ip->i_inode.i_mapping;
- filemap_fdatawrite(mapping);
- error = filemap_fdatawait(mapping);
- mapping_set_error(mapping, error);
- }
- error = filemap_fdatawait(metamapping);
- mapping_set_error(metamapping, error);
- clear_bit(GLF_DIRTY, &gl->gl_flags);
- gfs2_ail_empty_gl(gl);
+ BUG_ON(gl->gl_state != LM_ST_EXCLUSIVE);
+
+ gfs2_log_flush(gl->gl_sbd, gl);
+ filemap_fdatawrite(metamapping);
+ if (ip) {
+ struct address_space *mapping = ip->i_inode.i_mapping;
+ filemap_fdatawrite(mapping);
+ error = filemap_fdatawait(mapping);
+ mapping_set_error(mapping, error);
}
+ error = filemap_fdatawait(metamapping);
+ mapping_set_error(metamapping, error);
+ gfs2_ail_empty_gl(gl);
}
/**
* inode_go_inval - prepare a inode glock to be released
* @gl: the glock
* @flags:
+ *
+ * Normally we invlidate everything, but if we are moving into
+ * LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we
+ * can keep hold of the metadata, since it won't have changed.
*
*/
static void inode_go_inval(struct gfs2_glock *gl, int flags)
{
struct gfs2_inode *ip = gl->gl_object;
- int meta = (flags & DIO_METADATA);
- if (meta) {
- gfs2_meta_inval(gl);
+ gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count));
+
+ if (flags & DIO_METADATA) {
+ struct address_space *mapping = gl->gl_aspace->i_mapping;
+ truncate_inode_pages(mapping, 0);
if (ip)
set_bit(GIF_INVALID, &ip->i_flags);
}
+ if (ip == GFS2_I(gl->gl_sbd->sd_rindex))
+ gl->gl_sbd->sd_rindex_uptodate = 0;
if (ip && S_ISREG(ip->i_inode.i_mode))
truncate_inode_pages(ip->i_inode.i_mapping, 0);
}
@@ -390,20 +384,7 @@ static int trans_go_demote_ok(const struct gfs2_glock *gl)
return 0;
}
-/**
- * quota_go_demote_ok - Check to see if it's ok to unlock a quota glock
- * @gl: the glock
- *
- * Returns: 1 if it's ok
- */
-
-static int quota_go_demote_ok(const struct gfs2_glock *gl)
-{
- return !atomic_read(&gl->gl_lvb_count);
-}
-
const struct gfs2_glock_operations gfs2_meta_glops = {
- .go_xmote_th = meta_go_sync,
.go_type = LM_TYPE_META,
};
@@ -418,8 +399,8 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
};
const struct gfs2_glock_operations gfs2_rgrp_glops = {
- .go_xmote_th = meta_go_sync,
- .go_inval = meta_go_inval,
+ .go_xmote_th = rgrp_go_sync,
+ .go_inval = rgrp_go_inval,
.go_demote_ok = rgrp_go_demote_ok,
.go_lock = rgrp_go_lock,
.go_unlock = rgrp_go_unlock,
@@ -448,7 +429,6 @@ const struct gfs2_glock_operations gfs2_nondisk_glops = {
};
const struct gfs2_glock_operations gfs2_quota_glops = {
- .go_demote_ok = quota_go_demote_ok,
.go_type = LM_TYPE_QUOTA,
};
@@ -456,3 +436,15 @@ const struct gfs2_glock_operations gfs2_journal_glops = {
.go_type = LM_TYPE_JOURNAL,
};
+const struct gfs2_glock_operations *gfs2_glops_list[] = {
+ [LM_TYPE_META] = &gfs2_meta_glops,
+ [LM_TYPE_INODE] = &gfs2_inode_glops,
+ [LM_TYPE_RGRP] = &gfs2_rgrp_glops,
+ [LM_TYPE_NONDISK] = &gfs2_trans_glops,
+ [LM_TYPE_IOPEN] = &gfs2_iopen_glops,
+ [LM_TYPE_FLOCK] = &gfs2_flock_glops,
+ [LM_TYPE_NONDISK] = &gfs2_nondisk_glops,
+ [LM_TYPE_QUOTA] = &gfs2_quota_glops,
+ [LM_TYPE_JOURNAL] = &gfs2_journal_glops,
+};
+
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
index a1d9b5b024e6..b3aa2e3210fd 100644
--- a/fs/gfs2/glops.h
+++ b/fs/gfs2/glops.h
@@ -21,5 +21,6 @@ extern const struct gfs2_glock_operations gfs2_flock_glops;
extern const struct gfs2_glock_operations gfs2_nondisk_glops;
extern const struct gfs2_glock_operations gfs2_quota_glops;
extern const struct gfs2_glock_operations gfs2_journal_glops;
+extern const struct gfs2_glock_operations *gfs2_glops_list[];
#endif /* __GLOPS_DOT_H__ */
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 608849d00021..399d1b978049 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -12,6 +12,8 @@
#include <linux/fs.h>
#include <linux/workqueue.h>
+#include <linux/dlm.h>
+#include <linux/buffer_head.h>
#define DIO_WAIT 0x00000010
#define DIO_METADATA 0x00000020
@@ -26,6 +28,7 @@ struct gfs2_trans;
struct gfs2_ail;
struct gfs2_jdesc;
struct gfs2_sbd;
+struct lm_lockops;
typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret);
@@ -121,6 +124,28 @@ struct gfs2_bufdata {
struct list_head bd_ail_gl_list;
};
+/*
+ * Internally, we prefix things with gdlm_ and GDLM_ (for gfs-dlm) since a
+ * prefix of lock_dlm_ gets awkward.
+ */
+
+#define GDLM_STRNAME_BYTES 25
+#define GDLM_LVB_SIZE 32
+
+enum {
+ DFL_BLOCK_LOCKS = 0,
+};
+
+struct lm_lockname {
+ u64 ln_number;
+ unsigned int ln_type;
+};
+
+#define lm_name_equal(name1, name2) \
+ (((name1)->ln_number == (name2)->ln_number) && \
+ ((name1)->ln_type == (name2)->ln_type))
+
+
struct gfs2_glock_operations {
void (*go_xmote_th) (struct gfs2_glock *gl);
int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh);
@@ -162,6 +187,8 @@ enum {
GLF_LFLUSH = 7,
GLF_INVALIDATE_IN_PROGRESS = 8,
GLF_REPLY_PENDING = 9,
+ GLF_INITIAL = 10,
+ GLF_FROZEN = 11,
};
struct gfs2_glock {
@@ -176,16 +203,15 @@ struct gfs2_glock {
unsigned int gl_target;
unsigned int gl_reply;
unsigned int gl_hash;
+ unsigned int gl_req;
unsigned int gl_demote_state; /* state requested by remote node */
unsigned long gl_demote_time; /* time of first demote request */
struct list_head gl_holders;
const struct gfs2_glock_operations *gl_ops;
- void *gl_lock;
- char *gl_lvb;
- atomic_t gl_lvb_count;
-
- unsigned long gl_stamp;
+ char gl_strname[GDLM_STRNAME_BYTES];
+ struct dlm_lksb gl_lksb;
+ char gl_lvb[32];
unsigned long gl_tchange;
void *gl_object;
@@ -283,7 +309,9 @@ enum {
struct gfs2_quota_data {
struct list_head qd_list;
- unsigned int qd_count;
+ struct list_head qd_reclaim;
+
+ atomic_t qd_count;
u32 qd_id;
unsigned long qd_flags; /* QDF_... */
@@ -303,7 +331,6 @@ struct gfs2_quota_data {
u64 qd_sync_gen;
unsigned long qd_last_warn;
- unsigned long qd_last_touched;
};
struct gfs2_trans {
@@ -390,7 +417,7 @@ struct gfs2_args {
unsigned int ar_suiddir:1; /* suiddir support */
unsigned int ar_data:2; /* ordered/writeback */
unsigned int ar_meta:1; /* mount metafs */
- unsigned int ar_num_glockd; /* Number of glockd threads */
+ unsigned int ar_discard:1; /* discard requests */
};
struct gfs2_tune {
@@ -406,7 +433,6 @@ struct gfs2_tune {
unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */
unsigned int gt_quota_scale_num; /* Numerator */
unsigned int gt_quota_scale_den; /* Denominator */
- unsigned int gt_quota_cache_secs;
unsigned int gt_quota_quantum; /* Secs between syncs to quota file */
unsigned int gt_new_files_jdata;
unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
@@ -445,6 +471,31 @@ struct gfs2_sb_host {
char sb_lockproto[GFS2_LOCKNAME_LEN];
char sb_locktable[GFS2_LOCKNAME_LEN];
+ u8 sb_uuid[16];
+};
+
+/*
+ * lm_mount() return values
+ *
+ * ls_jid - the journal ID this node should use
+ * ls_first - this node is the first to mount the file system
+ * ls_lockspace - lock module's context for this file system
+ * ls_ops - lock module's functions
+ */
+
+struct lm_lockstruct {
+ u32 ls_id;
+ unsigned int ls_jid;
+ unsigned int ls_first;
+ unsigned int ls_first_done;
+ unsigned int ls_nodir;
+ const struct lm_lockops *ls_ops;
+ unsigned long ls_flags;
+ dlm_lockspace_t *ls_dlm;
+
+ int ls_recover_jid;
+ int ls_recover_jid_done;
+ int ls_recover_jid_status;
};
struct gfs2_sbd {
@@ -520,7 +571,6 @@ struct gfs2_sbd {
spinlock_t sd_jindex_spin;
struct mutex sd_jindex_mutex;
unsigned int sd_journals;
- unsigned long sd_jindex_refresh_time;
struct gfs2_jdesc *sd_jdesc;
struct gfs2_holder sd_journal_gh;
@@ -540,7 +590,6 @@ struct gfs2_sbd {
struct list_head sd_quota_list;
atomic_t sd_quota_count;
- spinlock_t sd_quota_spin;
struct mutex sd_quota_mutex;
wait_queue_head_t sd_quota_wait;
struct list_head sd_trunc_list;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 3b87c188da41..7b277d449155 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -16,7 +16,6 @@
#include <linux/sort.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
-#include <linux/lm_interface.h>
#include <linux/security.h>
#include <linux/time.h>
@@ -137,16 +136,16 @@ void gfs2_set_iop(struct inode *inode)
if (S_ISREG(mode)) {
inode->i_op = &gfs2_file_iops;
- if (sdp->sd_args.ar_localflocks)
- inode->i_fop = &gfs2_file_fops_nolock;
+ if (gfs2_localflocks(sdp))
+ inode->i_fop = gfs2_file_fops_nolock;
else
- inode->i_fop = &gfs2_file_fops;
+ inode->i_fop = gfs2_file_fops;
} else if (S_ISDIR(mode)) {
inode->i_op = &gfs2_dir_iops;
- if (sdp->sd_args.ar_localflocks)
- inode->i_fop = &gfs2_dir_fops_nolock;
+ if (gfs2_localflocks(sdp))
+ inode->i_fop = gfs2_dir_fops_nolock;
else
- inode->i_fop = &gfs2_dir_fops;
+ inode->i_fop = gfs2_dir_fops;
} else if (S_ISLNK(mode)) {
inode->i_op = &gfs2_symlink_iops;
} else {
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index d5329364cdff..dca4fee3078b 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -101,12 +101,26 @@ void gfs2_dinode_print(const struct gfs2_inode *ip);
extern const struct inode_operations gfs2_file_iops;
extern const struct inode_operations gfs2_dir_iops;
extern const struct inode_operations gfs2_symlink_iops;
-extern const struct file_operations gfs2_file_fops;
-extern const struct file_operations gfs2_dir_fops;
-extern const struct file_operations gfs2_file_fops_nolock;
-extern const struct file_operations gfs2_dir_fops_nolock;
+extern const struct file_operations *gfs2_file_fops_nolock;
+extern const struct file_operations *gfs2_dir_fops_nolock;
extern void gfs2_set_inode_flags(struct inode *inode);
+
+#ifdef CONFIG_GFS2_FS_LOCKING_DLM
+extern const struct file_operations *gfs2_file_fops;
+extern const struct file_operations *gfs2_dir_fops;
+static inline int gfs2_localflocks(const struct gfs2_sbd *sdp)
+{
+ return sdp->sd_args.ar_localflocks;
+}
+#else /* Single node only */
+#define gfs2_file_fops NULL
+#define gfs2_dir_fops NULL
+static inline int gfs2_localflocks(const struct gfs2_sbd *sdp)
+{
+ return 1;
+}
+#endif /* CONFIG_GFS2_FS_LOCKING_DLM */
#endif /* __INODE_DOT_H__ */
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
new file mode 100644
index 000000000000..46df988323bc
--- /dev/null
+++ b/fs/gfs2/lock_dlm.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
+ * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License version 2.
+ */
+
+#include <linux/fs.h>
+#include <linux/dlm.h>
+#include <linux/types.h>
+#include <linux/gfs2_ondisk.h>
+
+#include "incore.h"
+#include "glock.h"
+#include "util.h"
+
+
+static void gdlm_ast(void *arg)
+{
+ struct gfs2_glock *gl = arg;
+ unsigned ret = gl->gl_state;
+
+ BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
+
+ if (gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID)
+ memset(gl->gl_lvb, 0, GDLM_LVB_SIZE);
+
+ switch (gl->gl_lksb.sb_status) {
+ case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
+ kmem_cache_free(gfs2_glock_cachep, gl);
+ return;
+ case -DLM_ECANCEL: /* Cancel while getting lock */
+ ret |= LM_OUT_CANCELED;
+ goto out;
+ case -EAGAIN: /* Try lock fails */
+ goto out;
+ case -EINVAL: /* Invalid */
+ case -ENOMEM: /* Out of memory */
+ ret |= LM_OUT_ERROR;
+ goto out;
+ case 0: /* Success */
+ break;
+ default: /* Something unexpected */
+ BUG();
+ }
+
+ ret = gl->gl_req;
+ if (gl->gl_lksb.sb_flags & DLM_SBF_ALTMODE) {
+ if (gl->gl_req == LM_ST_SHARED)
+ ret = LM_ST_DEFERRED;
+ else if (gl->gl_req == LM_ST_DEFERRED)
+ ret = LM_ST_SHARED;
+ else
+ BUG();
+ }
+
+ set_bit(GLF_INITIAL, &gl->gl_flags);
+ gfs2_glock_complete(gl, ret);
+ return;
+out:
+ if (!test_bit(GLF_INITIAL, &gl->gl_flags))
+ gl->gl_lksb.sb_lkid = 0;
+ gfs2_glock_complete(gl, ret);
+}
+
+static void gdlm_bast(void *arg, int mode)
+{
+ struct gfs2_glock *gl = arg;
+
+ switch (mode) {
+ case DLM_LOCK_EX:
+ gfs2_glock_cb(gl, LM_ST_UNLOCKED);
+ break;
+ case DLM_LOCK_CW:
+ gfs2_glock_cb(gl, LM_ST_DEFERRED);
+ break;
+ case DLM_LOCK_PR:
+ gfs2_glock_cb(gl, LM_ST_SHARED);
+ break;
+ default:
+ printk(KERN_ERR "unknown bast mode %d", mode);
+ BUG();
+ }
+}
+
+/* convert gfs lock-state to dlm lock-mode */
+
+static int make_mode(const unsigned int lmstate)
+{
+ switch (lmstate) {
+ case LM_ST_UNLOCKED:
+ return DLM_LOCK_NL;
+ case LM_ST_EXCLUSIVE:
+ return DLM_LOCK_EX;
+ case LM_ST_DEFERRED:
+ return DLM_LOCK_CW;
+ case LM_ST_SHARED:
+ return DLM_LOCK_PR;
+ }
+ printk(KERN_ERR "unknown LM state %d", lmstate);
+ BUG();
+ return -1;
+}
+
+static u32 make_flags(const u32 lkid, const unsigned int gfs_flags,
+ const int req)
+{
+ u32 lkf = 0;
+
+ if (gfs_flags & LM_FLAG_TRY)
+ lkf |= DLM_LKF_NOQUEUE;
+
+ if (gfs_flags & LM_FLAG_TRY_1CB) {
+ lkf |= DLM_LKF_NOQUEUE;
+ lkf |= DLM_LKF_NOQUEUEBAST;
+ }
+
+ if (gfs_flags & LM_FLAG_PRIORITY) {
+ lkf |= DLM_LKF_NOORDER;
+ lkf |= DLM_LKF_HEADQUE;
+ }
+
+ if (gfs_flags & LM_FLAG_ANY) {
+ if (req == DLM_LOCK_PR)
+ lkf |= DLM_LKF_ALTCW;
+ else if (req == DLM_LOCK_CW)
+ lkf |= DLM_LKF_ALTPR;
+ else
+ BUG();
+ }
+
+ if (lkid != 0)
+ lkf |= DLM_LKF_CONVERT;
+
+ lkf |= DLM_LKF_VALBLK;
+
+ return lkf;
+}
+
+static unsigned int gdlm_lock(struct gfs2_glock *gl,
+ unsigned int req_state, unsigned int flags)
+{
+ struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
+ int error;
+ int req;
+ u32 lkf;
+
+ gl->gl_req = req_state;
+ req = make_mode(req_state);
+ lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req);
+
+ /*
+ * Submit the actual lock request.
+ */
+
+ error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname,
+ GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
+ if (error == -EAGAIN)
+ return 0;
+ if (error)
+ return LM_OUT_ERROR;
+ return LM_OUT_ASYNC;
+}
+
+static void gdlm_put_lock(struct kmem_cache *cachep, void *ptr)
+{
+ struct gfs2_glock *gl = ptr;
+ struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
+ int error;
+
+ if (gl->gl_lksb.sb_lkid == 0) {
+ kmem_cache_free(cachep, gl);
+ return;
+ }
+
+ error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
+ NULL, gl);
+ if (error) {
+ printk(KERN_ERR "gdlm_unlock %x,%llx err=%d\n",
+ gl->gl_name.ln_type,
+ (unsigned long long)gl->gl_name.ln_number, error);
+ return;
+ }
+}
+
+static void gdlm_cancel(struct gfs2_glock *gl)
+{
+ struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
+ dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl);
+}
+
+static int gdlm_mount(struct gfs2_sbd *sdp, const char *fsname)
+{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ int error;
+
+ if (fsname == NULL) {
+ fs_info(sdp, "no fsname found\n");
+ return -EINVAL;
+ }
+
+ error = dlm_new_lockspace(fsname, strlen(fsname), &ls->ls_dlm,
+ DLM_LSFL_FS | DLM_LSFL_NEWEXCL |
+ (ls->ls_nodir ? DLM_LSFL_NODIR : 0),
+ GDLM_LVB_SIZE);
+ if (error)
+ printk(KERN_ERR "dlm_new_lockspace error %d", error);
+
+ return error;
+}
+
+static void gdlm_unmount(struct gfs2_sbd *sdp)
+{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+
+ if (ls->ls_dlm) {
+ dlm_release_lockspace(ls->ls_dlm, 2);
+ ls->ls_dlm = NULL;
+ }
+}
+
+static const match_table_t dlm_tokens = {
+ { Opt_jid, "jid=%d"},
+ { Opt_id, "id=%d"},
+ { Opt_first, "first=%d"},
+ { Opt_nodir, "nodir=%d"},
+ { Opt_err, NULL },
+};
+
+const struct lm_lockops gfs2_dlm_ops = {
+ .lm_proto_name = "lock_dlm",
+ .lm_mount = gdlm_mount,
+ .lm_unmount = gdlm_unmount,
+ .lm_put_lock = gdlm_put_lock,
+ .lm_lock = gdlm_lock,
+ .lm_cancel = gdlm_cancel,
+ .lm_tokens = &dlm_tokens,
+};
+
diff --git a/fs/gfs2/locking.c b/fs/gfs2/locking.c
deleted file mode 100644
index 523243a13a21..000000000000
--- a/fs/gfs2/locking.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/wait.h>
-#include <linux/sched.h>
-#include <linux/kmod.h>
-#include <linux/fs.h>
-#include <linux/delay.h>
-#include <linux/lm_interface.h>
-
-struct lmh_wrapper {
- struct list_head lw_list;
- const struct lm_lockops *lw_ops;
-};
-
-static int nolock_mount(char *table_name, char *host_data,
- lm_callback_t cb, void *cb_data,
- unsigned int min_lvb_size, int flags,
- struct lm_lockstruct *lockstruct,
- struct kobject *fskobj);
-
-/* List of registered low-level locking protocols. A file system selects one
- of them by name at mount time, e.g. lock_nolock, lock_dlm. */
-
-static const struct lm_lockops nolock_ops = {
- .lm_proto_name = "lock_nolock",
- .lm_mount = nolock_mount,
-};
-
-static struct lmh_wrapper nolock_proto = {
- .lw_list = LIST_HEAD_INIT(nolock_proto.lw_list),
- .lw_ops = &nolock_ops,
-};
-
-static LIST_HEAD(lmh_list);
-static DEFINE_MUTEX(lmh_lock);
-
-static int nolock_mount(char *table_name, char *host_data,
- lm_callback_t cb, void *cb_data,
- unsigned int min_lvb_size, int flags,
- struct lm_lockstruct *lockstruct,
- struct kobject *fskobj)
-{
- char *c;
- unsigned int jid;
-
- c = strstr(host_data, "jid=");
- if (!c)
- jid = 0;
- else {
- c += 4;
- sscanf(c, "%u", &jid);
- }
-
- lockstruct->ls_jid = jid;
- lockstruct->ls_first = 1;
- lockstruct->ls_lvb_size = min_lvb_size;
- lockstruct->ls_ops = &nolock_ops;
- lockstruct->ls_flags = LM_LSFLAG_LOCAL;
-
- return 0;
-}
-
-/**
- * gfs2_register_lockproto - Register a low-level locking protocol
- * @proto: the protocol definition
- *
- * Returns: 0 on success, -EXXX on failure
- */
-
-int gfs2_register_lockproto(const struct lm_lockops *proto)
-{
- struct lmh_wrapper *lw;
-
- mutex_lock(&lmh_lock);
-
- list_for_each_entry(lw, &lmh_list, lw_list) {
- if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) {
- mutex_unlock(&lmh_lock);
- printk(KERN_INFO "GFS2: protocol %s already exists\n",
- proto->lm_proto_name);
- return -EEXIST;
- }
- }
-
- lw = kzalloc(sizeof(struct lmh_wrapper), GFP_KERNEL);
- if (!lw) {
- mutex_unlock(&lmh_lock);
- return -ENOMEM;
- }
-
- lw->lw_ops = proto;
- list_add(&lw->lw_list, &lmh_list);
-
- mutex_unlock(&lmh_lock);
-
- return 0;
-}
-
-/**
- * gfs2_unregister_lockproto - Unregister a low-level locking protocol
- * @proto: the protocol definition
- *
- */
-
-void gfs2_unregister_lockproto(const struct lm_lockops *proto)
-{
- struct lmh_wrapper *lw;
-
- mutex_lock(&lmh_lock);
-
- list_for_each_entry(lw, &lmh_list, lw_list) {
- if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) {
- list_del(&lw->lw_list);
- mutex_unlock(&lmh_lock);
- kfree(lw);
- return;
- }
- }
-
- mutex_unlock(&lmh_lock);
-
- printk(KERN_WARNING "GFS2: can't unregister lock protocol %s\n",
- proto->lm_proto_name);
-}
-
-/**
- * gfs2_mount_lockproto - Mount a lock protocol
- * @proto_name - the name of the protocol
- * @table_name - the name of the lock space
- * @host_data - data specific to this host
- * @cb - the callback to the code using the lock module
- * @sdp - The GFS2 superblock
- * @min_lvb_size - the mininum LVB size that the caller can deal with
- * @flags - LM_MFLAG_*
- * @lockstruct - a structure returned describing the mount
- *
- * Returns: 0 on success, -EXXX on failure
- */
-
-int gfs2_mount_lockproto(char *proto_name, char *table_name, char *host_data,
- lm_callback_t cb, void *cb_data,
- unsigned int min_lvb_size, int flags,
- struct lm_lockstruct *lockstruct,
- struct kobject *fskobj)
-{
- struct lmh_wrapper *lw = NULL;
- int try = 0;
- int error, found;
-
-
-retry:
- mutex_lock(&lmh_lock);
-
- if (list_empty(&nolock_proto.lw_list))
- list_add(&nolock_proto.lw_list, &lmh_list);
-
- found = 0;
- list_for_each_entry(lw, &lmh_list, lw_list) {
- if (!strcmp(lw->lw_ops->lm_proto_name, proto_name)) {
- found = 1;
- break;
- }
- }
-
- if (!found) {
- if (!try && capable(CAP_SYS_MODULE)) {
- try = 1;
- mutex_unlock(&lmh_lock);
- request_module(proto_name);
- goto retry;
- }
- printk(KERN_INFO "GFS2: can't find protocol %s\n", proto_name);
- error = -ENOENT;
- goto out;
- }
-
- if (lw->lw_ops->lm_owner &&
- !try_module_get(lw->lw_ops->lm_owner)) {
- try = 0;
- mutex_unlock(&lmh_lock);
- msleep(1000);
- goto retry;
- }
-
- error = lw->lw_ops->lm_mount(table_name, host_data, cb, cb_data,
- min_lvb_size, flags, lockstruct, fskobj);
- if (error)
- module_put(lw->lw_ops->lm_owner);
-out:
- mutex_unlock(&lmh_lock);
- return error;
-}
-
-void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct)
-{
- mutex_lock(&lmh_lock);
- if (lockstruct->ls_ops->lm_unmount)
- lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace);
- if (lockstruct->ls_ops->lm_owner)
- module_put(lockstruct->ls_ops->lm_owner);
- mutex_unlock(&lmh_lock);
-}
-
-/**
- * gfs2_withdraw_lockproto - abnormally unmount a lock module
- * @lockstruct: the lockstruct passed into mount
- *
- */
-
-void gfs2_withdraw_lockproto(struct lm_lockstruct *lockstruct)
-{
- mutex_lock(&lmh_lock);
- lockstruct->ls_ops->lm_withdraw(lockstruct->ls_lockspace);
- if (lockstruct->ls_ops->lm_owner)
- module_put(lockstruct->ls_ops->lm_owner);
- mutex_unlock(&lmh_lock);
-}
-
-EXPORT_SYMBOL_GPL(gfs2_register_lockproto);
-EXPORT_SYMBOL_GPL(gfs2_unregister_lockproto);
-
diff --git a/fs/gfs2/locking/dlm/Makefile b/fs/gfs2/locking/dlm/Makefile
deleted file mode 100644
index 2609bb6cd013..000000000000
--- a/fs/gfs2/locking/dlm/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o
-lock_dlm-y := lock.o main.o mount.o sysfs.o thread.o
-
diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c
deleted file mode 100644
index 2482c9047505..000000000000
--- a/fs/gfs2/locking/dlm/lock.c
+++ /dev/null
@@ -1,708 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include "lock_dlm.h"
-
-static char junk_lvb[GDLM_LVB_SIZE];
-
-
-/* convert dlm lock-mode to gfs lock-state */
-
-static s16 gdlm_make_lmstate(s16 dlmmode)
-{
- switch (dlmmode) {
- case DLM_LOCK_IV:
- case DLM_LOCK_NL:
- return LM_ST_UNLOCKED;
- case DLM_LOCK_EX:
- return LM_ST_EXCLUSIVE;
- case DLM_LOCK_CW:
- return LM_ST_DEFERRED;
- case DLM_LOCK_PR:
- return LM_ST_SHARED;
- }
- gdlm_assert(0, "unknown DLM mode %d", dlmmode);
- return -1;
-}
-
-/* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm
- thread gets to it. */
-
-static void queue_submit(struct gdlm_lock *lp)
-{
- struct gdlm_ls *ls = lp->ls;
-
- spin_lock(&ls->async_lock);
- list_add_tail(&lp->delay_list, &ls->submit);
- spin_unlock(&ls->async_lock);
- wake_up(&ls->thread_wait);
-}
-
-static void wake_up_ast(struct gdlm_lock *lp)
-{
- clear_bit(LFL_AST_WAIT, &lp->flags);
- smp_mb__after_clear_bit();
- wake_up_bit(&lp->flags, LFL_AST_WAIT);
-}
-
-static void gdlm_delete_lp(struct gdlm_lock *lp)
-{
- struct gdlm_ls *ls = lp->ls;
-
- spin_lock(&ls->async_lock);
- if (!list_empty(&lp->delay_list))
- list_del_init(&lp->delay_list);
- ls->all_locks_count--;
- spin_unlock(&ls->async_lock);
-
- kfree(lp);
-}
-
-static void gdlm_queue_delayed(struct gdlm_lock *lp)
-{
- struct gdlm_ls *ls = lp->ls;
-
- spin_lock(&ls->async_lock);
- list_add_tail(&lp->delay_list, &ls->delayed);
- spin_unlock(&ls->async_lock);
-}
-
-static void process_complete(struct gdlm_lock *lp)
-{
- struct gdlm_ls *ls = lp->ls;
- struct lm_async_cb acb;
-
- memset(&acb, 0, sizeof(acb));
-
- if (lp->lksb.sb_status == -DLM_ECANCEL) {
- log_info("complete dlm cancel %x,%llx flags %lx",
- lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number,
- lp->flags);
-
- lp->req = lp->cur;
- acb.lc_ret |= LM_OUT_CANCELED;
- if (lp->cur == DLM_LOCK_IV)
- lp->lksb.sb_lkid = 0;
- goto out;
- }
-
- if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) {
- if (lp->lksb.sb_status != -DLM_EUNLOCK) {
- log_info("unlock sb_status %d %x,%llx flags %lx",
- lp->lksb.sb_status, lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number,
- lp->flags);
- return;
- }
-
- lp->cur = DLM_LOCK_IV;
- lp->req = DLM_LOCK_IV;
- lp->lksb.sb_lkid = 0;
-
- if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) {
- gdlm_delete_lp(lp);
- return;
- }
- goto out;
- }
-
- if (lp->lksb.sb_flags & DLM_SBF_VALNOTVALID)
- memset(lp->lksb.sb_lvbptr, 0, GDLM_LVB_SIZE);
-
- if (lp->lksb.sb_flags & DLM_SBF_ALTMODE) {
- if (lp->req == DLM_LOCK_PR)
- lp->req = DLM_LOCK_CW;
- else if (lp->req == DLM_LOCK_CW)
- lp->req = DLM_LOCK_PR;
- }
-
- /*
- * A canceled lock request. The lock was just taken off the delayed
- * list and was never even submitted to dlm.
- */
-
- if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) {
- log_info("complete internal cancel %x,%llx",
- lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number);
- lp->req = lp->cur;
- acb.lc_ret |= LM_OUT_CANCELED;
- goto out;
- }
-
- /*
- * An error occured.
- */
-
- if (lp->lksb.sb_status) {
- /* a "normal" error */
- if ((lp->lksb.sb_status == -EAGAIN) &&
- (lp->lkf & DLM_LKF_NOQUEUE)) {
- lp->req = lp->cur;
- if (lp->cur == DLM_LOCK_IV)
- lp->lksb.sb_lkid = 0;
- goto out;
- }
-
- /* this could only happen with cancels I think */
- log_info("ast sb_status %d %x,%llx flags %lx",
- lp->lksb.sb_status, lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number,
- lp->flags);
- return;
- }
-
- /*
- * This is an AST for an EX->EX conversion for sync_lvb from GFS.
- */
-
- if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
- wake_up_ast(lp);
- return;
- }
-
- /*
- * A lock has been demoted to NL because it initially completed during
- * BLOCK_LOCKS. Now it must be requested in the originally requested
- * mode.
- */
-
- if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) {
- gdlm_assert(lp->req == DLM_LOCK_NL, "%x,%llx",
- lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number);
- gdlm_assert(lp->prev_req > DLM_LOCK_NL, "%x,%llx",
- lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number);
-
- lp->cur = DLM_LOCK_NL;
- lp->req = lp->prev_req;
- lp->prev_req = DLM_LOCK_IV;
- lp->lkf &= ~DLM_LKF_CONVDEADLK;
-
- set_bit(LFL_NOCACHE, &lp->flags);
-
- if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
- !test_bit(LFL_NOBLOCK, &lp->flags))
- gdlm_queue_delayed(lp);
- else
- queue_submit(lp);
- return;
- }
-
- /*
- * A request is granted during dlm recovery. It may be granted
- * because the locks of a failed node were cleared. In that case,
- * there may be inconsistent data beneath this lock and we must wait
- * for recovery to complete to use it. When gfs recovery is done this
- * granted lock will be converted to NL and then reacquired in this
- * granted state.
- */
-
- if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
- !test_bit(LFL_NOBLOCK, &lp->flags) &&
- lp->req != DLM_LOCK_NL) {
-
- lp->cur = lp->req;
- lp->prev_req = lp->req;
- lp->req = DLM_LOCK_NL;
- lp->lkf |= DLM_LKF_CONVERT;
- lp->lkf &= ~DLM_LKF_CONVDEADLK;
-
- log_debug("rereq %x,%llx id %x %d,%d",
- lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number,
- lp->lksb.sb_lkid, lp->cur, lp->req);
-
- set_bit(LFL_REREQUEST, &lp->flags);
- queue_submit(lp);
- return;
- }
-
- /*
- * DLM demoted the lock to NL before it was granted so GFS must be
- * told it cannot cache data for this lock.
- */
-
- if (lp->lksb.sb_flags & DLM_SBF_DEMOTED)
- set_bit(LFL_NOCACHE, &lp->flags);
-
-out:
- /*
- * This is an internal lock_dlm lock
- */
-
- if (test_bit(LFL_INLOCK, &lp->flags)) {
- clear_bit(LFL_NOBLOCK, &lp->flags);
- lp->cur = lp->req;
- wake_up_ast(lp);
- return;
- }
-
- /*
- * Normal completion of a lock request. Tell GFS it now has the lock.
- */
-
- clear_bit(LFL_NOBLOCK, &lp->flags);
- lp->cur = lp->req;
-
- acb.lc_name = lp->lockname;
- acb.lc_ret |= gdlm_make_lmstate(lp->cur);
-
- ls->fscb(ls->sdp, LM_CB_ASYNC, &acb);
-}
-
-static void gdlm_ast(void *astarg)
-{
- struct gdlm_lock *lp = astarg;
- clear_bit(LFL_ACTIVE, &lp->flags);
- process_complete(lp);
-}
-
-static void process_blocking(struct gdlm_lock *lp, int bast_mode)
-{
- struct gdlm_ls *ls = lp->ls;
- unsigned int cb = 0;
-
- switch (gdlm_make_lmstate(bast_mode)) {
- case LM_ST_EXCLUSIVE:
- cb = LM_CB_NEED_E;
- break;
- case LM_ST_DEFERRED:
- cb = LM_CB_NEED_D;
- break;
- case LM_ST_SHARED:
- cb = LM_CB_NEED_S;
- break;
- default:
- gdlm_assert(0, "unknown bast mode %u", bast_mode);
- }
-
- ls->fscb(ls->sdp, cb, &lp->lockname);
-}
-
-
-static void gdlm_bast(void *astarg, int mode)
-{
- struct gdlm_lock *lp = astarg;
-
- if (!mode) {
- printk(KERN_INFO "lock_dlm: bast mode zero %x,%llx\n",
- lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number);
- return;
- }
-
- process_blocking(lp, mode);
-}
-
-/* convert gfs lock-state to dlm lock-mode */
-
-static s16 make_mode(s16 lmstate)
-{
- switch (lmstate) {
- case LM_ST_UNLOCKED:
- return DLM_LOCK_NL;
- case LM_ST_EXCLUSIVE:
- return DLM_LOCK_EX;
- case LM_ST_DEFERRED:
- return DLM_LOCK_CW;
- case LM_ST_SHARED:
- return DLM_LOCK_PR;
- }
- gdlm_assert(0, "unknown LM state %d", lmstate);
- return -1;
-}
-
-
-/* verify agreement with GFS on the current lock state, NB: DLM_LOCK_NL and
- DLM_LOCK_IV are both considered LM_ST_UNLOCKED by GFS. */
-
-static void check_cur_state(struct gdlm_lock *lp, unsigned int cur_state)
-{
- s16 cur = make_mode(cur_state);
- if (lp->cur != DLM_LOCK_IV)
- gdlm_assert(lp->cur == cur, "%d, %d", lp->cur, cur);
-}
-
-static inline unsigned int make_flags(struct gdlm_lock *lp,
- unsigned int gfs_flags,
- s16 cur, s16 req)
-{
- unsigned int lkf = 0;
-
- if (gfs_flags & LM_FLAG_TRY)
- lkf |= DLM_LKF_NOQUEUE;
-
- if (gfs_flags & LM_FLAG_TRY_1CB) {
- lkf |= DLM_LKF_NOQUEUE;
- lkf |= DLM_LKF_NOQUEUEBAST;
- }
-
- if (gfs_flags & LM_FLAG_PRIORITY) {
- lkf |= DLM_LKF_NOORDER;
- lkf |= DLM_LKF_HEADQUE;
- }
-
- if (gfs_flags & LM_FLAG_ANY) {
- if (req == DLM_LOCK_PR)
- lkf |= DLM_LKF_ALTCW;
- else if (req == DLM_LOCK_CW)
- lkf |= DLM_LKF_ALTPR;
- }
-
- if (lp->lksb.sb_lkid != 0) {
- lkf |= DLM_LKF_CONVERT;
- }
-
- if (lp->lvb)
- lkf |= DLM_LKF_VALBLK;
-
- return lkf;
-}
-
-/* make_strname - convert GFS lock numbers to a string */
-
-static inline void make_strname(const struct lm_lockname *lockname,
- struct gdlm_strname *str)
-{
- sprintf(str->name, "%8x%16llx", lockname->ln_type,
- (unsigned long long)lockname->ln_number);
- str->namelen = GDLM_STRNAME_BYTES;
-}
-
-static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
- struct gdlm_lock **lpp)
-{
- struct gdlm_lock *lp;
-
- lp = kzalloc(sizeof(struct gdlm_lock), GFP_NOFS);
- if (!lp)
- return -ENOMEM;
-
- lp->lockname = *name;
- make_strname(name, &lp->strname);
- lp->ls = ls;
- lp->cur = DLM_LOCK_IV;
- INIT_LIST_HEAD(&lp->delay_list);
-
- spin_lock(&ls->async_lock);
- ls->all_locks_count++;
- spin_unlock(&ls->async_lock);
-
- *lpp = lp;
- return 0;
-}
-
-int gdlm_get_lock(void *lockspace, struct lm_lockname *name,
- void **lockp)
-{
- struct gdlm_lock *lp;
- int error;
-
- error = gdlm_create_lp(lockspace, name, &lp);
-
- *lockp = lp;
- return error;
-}
-
-void gdlm_put_lock(void *lock)
-{
- gdlm_delete_lp(lock);
-}
-
-unsigned int gdlm_do_lock(struct gdlm_lock *lp)
-{
- struct gdlm_ls *ls = lp->ls;
- int error, bast = 1;
-
- /*
- * When recovery is in progress, delay lock requests for submission
- * once recovery is done. Requests for recovery (NOEXP) and unlocks
- * can pass.
- */
-
- if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
- !test_bit(LFL_NOBLOCK, &lp->flags) && lp->req != DLM_LOCK_NL) {
- gdlm_queue_delayed(lp);
- return LM_OUT_ASYNC;
- }
-
- /*
- * Submit the actual lock request.
- */
-
- if (test_bit(LFL_NOBAST, &lp->flags))
- bast = 0;
-
- set_bit(LFL_ACTIVE, &lp->flags);
-
- log_debug("lk %x,%llx id %x %d,%d %x", lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number, lp->lksb.sb_lkid,
- lp->cur, lp->req, lp->lkf);
-
- error = dlm_lock(ls->dlm_lockspace, lp->req, &lp->lksb, lp->lkf,
- lp->strname.name, lp->strname.namelen, 0, gdlm_ast,
- lp, bast ? gdlm_bast : NULL);
-
- if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) {
- lp->lksb.sb_status = -EAGAIN;
- gdlm_ast(lp);
- error = 0;
- }
-
- if (error) {
- log_error("%s: gdlm_lock %x,%llx err=%d cur=%d req=%d lkf=%x "
- "flags=%lx", ls->fsname, lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number, error,
- lp->cur, lp->req, lp->lkf, lp->flags);
- return LM_OUT_ERROR;
- }
- return LM_OUT_ASYNC;
-}
-
-static unsigned int gdlm_do_unlock(struct gdlm_lock *lp)
-{
- struct gdlm_ls *ls = lp->ls;
- unsigned int lkf = 0;
- int error;
-
- set_bit(LFL_DLM_UNLOCK, &lp->flags);
- set_bit(LFL_ACTIVE, &lp->flags);
-
- if (lp->lvb)
- lkf = DLM_LKF_VALBLK;
-
- log_debug("un %x,%llx %x %d %x", lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number,
- lp->lksb.sb_lkid, lp->cur, lkf);
-
- error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, lkf, NULL, lp);
-
- if (error) {
- log_error("%s: gdlm_unlock %x,%llx err=%d cur=%d req=%d lkf=%x "
- "flags=%lx", ls->fsname, lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number, error,
- lp->cur, lp->req, lp->lkf, lp->flags);
- return LM_OUT_ERROR;
- }
- return LM_OUT_ASYNC;
-}
-
-unsigned int gdlm_lock(void *lock, unsigned int cur_state,
- unsigned int req_state, unsigned int flags)
-{
- struct gdlm_lock *lp = lock;
-
- if (req_state == LM_ST_UNLOCKED)
- return gdlm_unlock(lock, cur_state);
-
- if (req_state == LM_ST_UNLOCKED)
- return gdlm_unlock(lock, cur_state);
-
- clear_bit(LFL_DLM_CANCEL, &lp->flags);
- if (flags & LM_FLAG_NOEXP)
- set_bit(LFL_NOBLOCK, &lp->flags);
-
- check_cur_state(lp, cur_state);
- lp->req = make_mode(req_state);
- lp->lkf = make_flags(lp, flags, lp->cur, lp->req);
-
- return gdlm_do_lock(lp);
-}
-
-unsigned int gdlm_unlock(void *lock, unsigned int cur_state)
-{
- struct gdlm_lock *lp = lock;
-
- clear_bit(LFL_DLM_CANCEL, &lp->flags);
- if (lp->cur == DLM_LOCK_IV)
- return 0;
- return gdlm_do_unlock(lp);
-}
-
-void gdlm_cancel(void *lock)
-{
- struct gdlm_lock *lp = lock;
- struct gdlm_ls *ls = lp->ls;
- int error, delay_list = 0;
-
- if (test_bit(LFL_DLM_CANCEL, &lp->flags))
- return;
-
- log_info("gdlm_cancel %x,%llx flags %lx", lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number, lp->flags);
-
- spin_lock(&ls->async_lock);
- if (!list_empty(&lp->delay_list)) {
- list_del_init(&lp->delay_list);
- delay_list = 1;
- }
- spin_unlock(&ls->async_lock);
-
- if (delay_list) {
- set_bit(LFL_CANCEL, &lp->flags);
- set_bit(LFL_ACTIVE, &lp->flags);
- gdlm_ast(lp);
- return;
- }
-
- if (!test_bit(LFL_ACTIVE, &lp->flags) ||
- test_bit(LFL_DLM_UNLOCK, &lp->flags)) {
- log_info("gdlm_cancel skip %x,%llx flags %lx",
- lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number, lp->flags);
- return;
- }
-
- /* the lock is blocked in the dlm */
-
- set_bit(LFL_DLM_CANCEL, &lp->flags);
- set_bit(LFL_ACTIVE, &lp->flags);
-
- error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, DLM_LKF_CANCEL,
- NULL, lp);
-
- log_info("gdlm_cancel rv %d %x,%llx flags %lx", error,
- lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number, lp->flags);
-
- if (error == -EBUSY)
- clear_bit(LFL_DLM_CANCEL, &lp->flags);
-}
-
-static int gdlm_add_lvb(struct gdlm_lock *lp)
-{
- char *lvb;
-
- lvb = kzalloc(GDLM_LVB_SIZE, GFP_NOFS);
- if (!lvb)
- return -ENOMEM;
-
- lp->lksb.sb_lvbptr = lvb;
- lp->lvb = lvb;
- return 0;
-}
-
-static void gdlm_del_lvb(struct gdlm_lock *lp)
-{
- kfree(lp->lvb);
- lp->lvb = NULL;
- lp->lksb.sb_lvbptr = NULL;
-}
-
-static int gdlm_ast_wait(void *word)
-{
- schedule();
- return 0;
-}
-
-/* This can do a synchronous dlm request (requiring a lock_dlm thread to get
- the completion) because gfs won't call hold_lvb() during a callback (from
- the context of a lock_dlm thread). */
-
-static int hold_null_lock(struct gdlm_lock *lp)
-{
- struct gdlm_lock *lpn = NULL;
- int error;
-
- if (lp->hold_null) {
- printk(KERN_INFO "lock_dlm: lvb already held\n");
- return 0;
- }
-
- error = gdlm_create_lp(lp->ls, &lp->lockname, &lpn);
- if (error)
- goto out;
-
- lpn->lksb.sb_lvbptr = junk_lvb;
- lpn->lvb = junk_lvb;
-
- lpn->req = DLM_LOCK_NL;
- lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE;
- set_bit(LFL_NOBAST, &lpn->flags);
- set_bit(LFL_INLOCK, &lpn->flags);
- set_bit(LFL_AST_WAIT, &lpn->flags);
-
- gdlm_do_lock(lpn);
- wait_on_bit(&lpn->flags, LFL_AST_WAIT, gdlm_ast_wait, TASK_UNINTERRUPTIBLE);
- error = lpn->lksb.sb_status;
- if (error) {
- printk(KERN_INFO "lock_dlm: hold_null_lock dlm error %d\n",
- error);
- gdlm_delete_lp(lpn);
- lpn = NULL;
- }
-out:
- lp->hold_null = lpn;
- return error;
-}
-
-/* This cannot do a synchronous dlm request (requiring a lock_dlm thread to get
- the completion) because gfs may call unhold_lvb() during a callback (from
- the context of a lock_dlm thread) which could cause a deadlock since the
- other lock_dlm thread could be engaged in recovery. */
-
-static void unhold_null_lock(struct gdlm_lock *lp)
-{
- struct gdlm_lock *lpn = lp->hold_null;
-
- gdlm_assert(lpn, "%x,%llx", lp->lockname.ln_type,
- (unsigned long long)lp->lockname.ln_number);
- lpn->lksb.sb_lvbptr = NULL;
- lpn->lvb = NULL;
- set_bit(LFL_UNLOCK_DELETE, &lpn->flags);
- gdlm_do_unlock(lpn);
- lp->hold_null = NULL;
-}
-
-/* Acquire a NL lock because gfs requires the value block to remain
- intact on the resource while the lvb is "held" even if it's holding no locks
- on the resource. */
-
-int gdlm_hold_lvb(void *lock, char **lvbp)
-{
- struct gdlm_lock *lp = lock;
- int error;
-
- error = gdlm_add_lvb(lp);
- if (error)
- return error;
-
- *lvbp = lp->lvb;
-
- error = hold_null_lock(lp);
- if (error)
- gdlm_del_lvb(lp);
-
- return error;
-}
-
-void gdlm_unhold_lvb(void *lock, char *lvb)
-{
- struct gdlm_lock *lp = lock;
-
- unhold_null_lock(lp);
- gdlm_del_lvb(lp);
-}
-
-void gdlm_submit_delayed(struct gdlm_ls *ls)
-{
- struct gdlm_lock *lp, *safe;
-
- spin_lock(&ls->async_lock);
- list_for_each_entry_safe(lp, safe, &ls->delayed, delay_list) {
- list_del_init(&lp->delay_list);
- list_add_tail(&lp->delay_list, &ls->submit);
- }
- spin_unlock(&ls->async_lock);
- wake_up(&ls->thread_wait);
-}
-
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
deleted file mode 100644
index 3c98e7c6f93b..000000000000
--- a/fs/gfs2/locking/dlm/lock_dlm.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#ifndef LOCK_DLM_DOT_H
-#define LOCK_DLM_DOT_H
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/list.h>
-#include <linux/socket.h>
-#include <linux/delay.h>
-#include <linux/kthread.h>
-#include <linux/kobject.h>
-#include <linux/fcntl.h>
-#include <linux/wait.h>
-#include <net/sock.h>
-
-#include <linux/dlm.h>
-#include <linux/dlm_plock.h>
-#include <linux/lm_interface.h>
-
-/*
- * Internally, we prefix things with gdlm_ and GDLM_ (for gfs-dlm) since a
- * prefix of lock_dlm_ gets awkward. Externally, GFS refers to this module
- * as "lock_dlm".
- */
-
-#define GDLM_STRNAME_BYTES 24
-#define GDLM_LVB_SIZE 32
-#define GDLM_DROP_COUNT 0
-#define GDLM_DROP_PERIOD 60
-#define GDLM_NAME_LEN 128
-
-/* GFS uses 12 bytes to identify a resource (32 bit type + 64 bit number).
- We sprintf these numbers into a 24 byte string of hex values to make them
- human-readable (to make debugging simpler.) */
-
-struct gdlm_strname {
- unsigned char name[GDLM_STRNAME_BYTES];
- unsigned short namelen;
-};
-
-enum {
- DFL_BLOCK_LOCKS = 0,
- DFL_SPECTATOR = 1,
- DFL_WITHDRAW = 2,
-};
-
-struct gdlm_ls {
- u32 id;
- int jid;
- int first;
- int first_done;
- unsigned long flags;
- struct kobject kobj;
- char clustername[GDLM_NAME_LEN];
- char fsname[GDLM_NAME_LEN];
- int fsflags;
- dlm_lockspace_t *dlm_lockspace;
- lm_callback_t fscb;
- struct gfs2_sbd *sdp;
- int recover_jid;
- int recover_jid_done;
- int recover_jid_status;
- spinlock_t async_lock;
- struct list_head delayed;
- struct list_head submit;
- u32 all_locks_count;
- wait_queue_head_t wait_control;
- struct task_struct *thread;
- wait_queue_head_t thread_wait;
-};
-
-enum {
- LFL_NOBLOCK = 0,
- LFL_NOCACHE = 1,
- LFL_DLM_UNLOCK = 2,
- LFL_DLM_CANCEL = 3,
- LFL_SYNC_LVB = 4,
- LFL_FORCE_PROMOTE = 5,
- LFL_REREQUEST = 6,
- LFL_ACTIVE = 7,
- LFL_INLOCK = 8,
- LFL_CANCEL = 9,
- LFL_NOBAST = 10,
- LFL_HEADQUE = 11,
- LFL_UNLOCK_DELETE = 12,
- LFL_AST_WAIT = 13,
-};
-
-struct gdlm_lock {
- struct gdlm_ls *ls;
- struct lm_lockname lockname;
- struct gdlm_strname strname;
- char *lvb;
- struct dlm_lksb lksb;
-
- s16 cur;
- s16 req;
- s16 prev_req;
- u32 lkf; /* dlm flags DLM_LKF_ */
- unsigned long flags; /* lock_dlm flags LFL_ */
-
- struct list_head delay_list; /* delayed */
- struct gdlm_lock *hold_null; /* NL lock for hold_lvb */
-};
-
-#define gdlm_assert(assertion, fmt, args...) \
-do { \
- if (unlikely(!(assertion))) { \
- printk(KERN_EMERG "lock_dlm: fatal assertion failed \"%s\"\n" \
- "lock_dlm: " fmt "\n", \
- #assertion, ##args); \
- BUG(); \
- } \
-} while (0)
-
-#define log_print(lev, fmt, arg...) printk(lev "lock_dlm: " fmt "\n" , ## arg)
-#define log_info(fmt, arg...) log_print(KERN_INFO , fmt , ## arg)
-#define log_error(fmt, arg...) log_print(KERN_ERR , fmt , ## arg)
-#ifdef LOCK_DLM_LOG_DEBUG
-#define log_debug(fmt, arg...) log_print(KERN_DEBUG , fmt , ## arg)
-#else
-#define log_debug(fmt, arg...)
-#endif
-
-/* sysfs.c */
-
-int gdlm_sysfs_init(void);
-void gdlm_sysfs_exit(void);
-int gdlm_kobject_setup(struct gdlm_ls *, struct kobject *);
-void gdlm_kobject_release(struct gdlm_ls *);
-
-/* thread.c */
-
-int gdlm_init_threads(struct gdlm_ls *);
-void gdlm_release_threads(struct gdlm_ls *);
-
-/* lock.c */
-
-void gdlm_submit_delayed(struct gdlm_ls *);
-unsigned int gdlm_do_lock(struct gdlm_lock *);
-
-int gdlm_get_lock(void *, struct lm_lockname *, void **);
-void gdlm_put_lock(void *);
-unsigned int gdlm_lock(void *, unsigned int, unsigned int, unsigned int);
-unsigned int gdlm_unlock(void *, unsigned int);
-void gdlm_cancel(void *);
-int gdlm_hold_lvb(void *, char **);
-void gdlm_unhold_lvb(void *, char *);
-
-/* mount.c */
-
-extern const struct lm_lockops gdlm_ops;
-
-#endif
-
diff --git a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c
deleted file mode 100644
index b9a03a7ff801..000000000000
--- a/fs/gfs2/locking/dlm/main.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include <linux/init.h>
-
-#include "lock_dlm.h"
-
-static int __init init_lock_dlm(void)
-{
- int error;
-
- error = gfs2_register_lockproto(&gdlm_ops);
- if (error) {
- printk(KERN_WARNING "lock_dlm: can't register protocol: %d\n",
- error);
- return error;
- }
-
- error = gdlm_sysfs_init();
- if (error) {
- gfs2_unregister_lockproto(&gdlm_ops);
- return error;
- }
-
- printk(KERN_INFO
- "Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__);
- return 0;
-}
-
-static void __exit exit_lock_dlm(void)
-{
- gdlm_sysfs_exit();
- gfs2_unregister_lockproto(&gdlm_ops);
-}
-
-module_init(init_lock_dlm);
-module_exit(exit_lock_dlm);
-
-MODULE_DESCRIPTION("GFS DLM Locking Module");
-MODULE_AUTHOR("Red Hat, Inc.");
-MODULE_LICENSE("GPL");
-
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
deleted file mode 100644
index 1aa7eb6a0226..000000000000
--- a/fs/gfs2/locking/dlm/mount.c
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include "lock_dlm.h"
-
-const struct lm_lockops gdlm_ops;
-
-
-static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp,
- int flags, char *table_name)
-{
- struct gdlm_ls *ls;
- char buf[256], *p;
-
- ls = kzalloc(sizeof(struct gdlm_ls), GFP_KERNEL);
- if (!ls)
- return NULL;
-
- ls->fscb = cb;
- ls->sdp = sdp;
- ls->fsflags = flags;
- spin_lock_init(&ls->async_lock);
- INIT_LIST_HEAD(&ls->delayed);
- INIT_LIST_HEAD(&ls->submit);
- init_waitqueue_head(&ls->thread_wait);
- init_waitqueue_head(&ls->wait_control);
- ls->jid = -1;
-
- strncpy(buf, table_name, 256);
- buf[255] = '\0';
-
- p = strchr(buf, ':');
- if (!p) {
- log_info("invalid table_name \"%s\"", table_name);
- kfree(ls);
- return NULL;
- }
- *p = '\0';
- p++;
-
- strncpy(ls->clustername, buf, GDLM_NAME_LEN);
- strncpy(ls->fsname, p, GDLM_NAME_LEN);
-
- return ls;
-}
-
-static int make_args(struct gdlm_ls *ls, char *data_arg, int *nodir)
-{
- char data[256];
- char *options, *x, *y;
- int error = 0;
-
- memset(data, 0, 256);
- strncpy(data, data_arg, 255);
-
- if (!strlen(data)) {
- log_error("no mount options, (u)mount helpers not installed");
- return -EINVAL;
- }
-
- for (options = data; (x = strsep(&options, ":")); ) {
- if (!*x)
- continue;
-
- y = strchr(x, '=');
- if (y)
- *y++ = 0;
-
- if (!strcmp(x, "jid")) {
- if (!y) {
- log_error("need argument to jid");
- error = -EINVAL;
- break;
- }
- sscanf(y, "%u", &ls->jid);
-
- } else if (!strcmp(x, "first")) {
- if (!y) {
- log_error("need argument to first");
- error = -EINVAL;
- break;
- }
- sscanf(y, "%u", &ls->first);
-
- } else if (!strcmp(x, "id")) {
- if (!y) {
- log_error("need argument to id");
- error = -EINVAL;
- break;
- }
- sscanf(y, "%u", &ls->id);
-
- } else if (!strcmp(x, "nodir")) {
- if (!y) {
- log_error("need argument to nodir");
- error = -EINVAL;
- break;
- }
- sscanf(y, "%u", nodir);
-
- } else {
- log_error("unkonwn option: %s", x);
- error = -EINVAL;
- break;
- }
- }
-
- return error;
-}
-
-static int gdlm_mount(char *table_name, char *host_data,
- lm_callback_t cb, void *cb_data,
- unsigned int min_lvb_size, int flags,
- struct lm_lockstruct *lockstruct,
- struct kobject *fskobj)
-{
- struct gdlm_ls *ls;
- int error = -ENOMEM, nodir = 0;
-
- if (min_lvb_size > GDLM_LVB_SIZE)
- goto out;
-
- ls = init_gdlm(cb, cb_data, flags, table_name);
- if (!ls)
- goto out;
-
- error = make_args(ls, host_data, &nodir);
- if (error)
- goto out;
-
- error = gdlm_init_threads(ls);
- if (error)
- goto out_free;
-
- error = gdlm_kobject_setup(ls, fskobj);
- if (error)
- goto out_thread;
-
- error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname),
- &ls->dlm_lockspace,
- DLM_LSFL_FS | DLM_LSFL_NEWEXCL |
- (nodir ? DLM_LSFL_NODIR : 0),
- GDLM_LVB_SIZE);
- if (error) {
- log_error("dlm_new_lockspace error %d", error);
- goto out_kobj;
- }
-
- lockstruct->ls_jid = ls->jid;
- lockstruct->ls_first = ls->first;
- lockstruct->ls_lockspace = ls;
- lockstruct->ls_ops = &gdlm_ops;
- lockstruct->ls_flags = 0;
- lockstruct->ls_lvb_size = GDLM_LVB_SIZE;
- return 0;
-
-out_kobj:
- gdlm_kobject_release(ls);
-out_thread:
- gdlm_release_threads(ls);
-out_free:
- kfree(ls);
-out:
- return error;
-}
-
-static void gdlm_unmount(void *lockspace)
-{
- struct gdlm_ls *ls = lockspace;
-
- log_debug("unmount flags %lx", ls->flags);
-
- /* FIXME: serialize unmount and withdraw in case they
- happen at once. Also, if unmount follows withdraw,
- wait for withdraw to finish. */
-
- if (test_bit(DFL_WITHDRAW, &ls->flags))
- goto out;
-
- gdlm_kobject_release(ls);
- dlm_release_lockspace(ls->dlm_lockspace, 2);
- gdlm_release_threads(ls);
- BUG_ON(ls->all_locks_count);
-out:
- kfree(ls);
-}
-
-static void gdlm_recovery_done(void *lockspace, unsigned int jid,
- unsigned int message)
-{
- char env_jid[20];
- char env_status[20];
- char *envp[] = { env_jid, env_status, NULL };
- struct gdlm_ls *ls = lockspace;
- ls->recover_jid_done = jid;
- ls->recover_jid_status = message;
- sprintf(env_jid, "JID=%d", jid);
- sprintf(env_status, "RECOVERY=%s",
- message == LM_RD_SUCCESS ? "Done" : "Failed");
- kobject_uevent_env(&ls->kobj, KOBJ_CHANGE, envp);
-}
-
-static void gdlm_others_may_mount(void *lockspace)
-{
- char *message = "FIRSTMOUNT=Done";
- char *envp[] = { message, NULL };
- struct gdlm_ls *ls = lockspace;
- ls->first_done = 1;
- kobject_uevent_env(&ls->kobj, KOBJ_CHANGE, envp);
-}
-
-/* Userspace gets the offline uevent, blocks new gfs locks on
- other mounters, and lets us know (sets WITHDRAW flag). Then,
- userspace leaves the mount group while we leave the lockspace. */
-
-static void gdlm_withdraw(void *lockspace)
-{
- struct gdlm_ls *ls = lockspace;
-
- kobject_uevent(&ls->kobj, KOBJ_OFFLINE);
-
- wait_event_interruptible(ls->wait_control,
- test_bit(DFL_WITHDRAW, &ls->flags));
-
- dlm_release_lockspace(ls->dlm_lockspace, 2);
- gdlm_release_threads(ls);
- gdlm_kobject_release(ls);
-}
-
-static int gdlm_plock(void *lockspace, struct lm_lockname *name,
- struct file *file, int cmd, struct file_lock *fl)
-{
- struct gdlm_ls *ls = lockspace;
- return dlm_posix_lock(ls->dlm_lockspace, name->ln_number, file, cmd, fl);
-}
-
-static int gdlm_punlock(void *lockspace, struct lm_lockname *name,
- struct file *file, struct file_lock *fl)
-{
- struct gdlm_ls *ls = lockspace;
- return dlm_posix_unlock(ls->dlm_lockspace, name->ln_number, file, fl);
-}
-
-static int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
- struct file *file, struct file_lock *fl)
-{
- struct gdlm_ls *ls = lockspace;
- return dlm_posix_get(ls->dlm_lockspace, name->ln_number, file, fl);
-}
-
-const struct lm_lockops gdlm_ops = {
- .lm_proto_name = "lock_dlm",
- .lm_mount = gdlm_mount,
- .lm_others_may_mount = gdlm_others_may_mount,
- .lm_unmount = gdlm_unmount,
- .lm_withdraw = gdlm_withdraw,
- .lm_get_lock = gdlm_get_lock,
- .lm_put_lock = gdlm_put_lock,
- .lm_lock = gdlm_lock,
- .lm_unlock = gdlm_unlock,
- .lm_plock = gdlm_plock,
- .lm_punlock = gdlm_punlock,
- .lm_plock_get = gdlm_plock_get,
- .lm_cancel = gdlm_cancel,
- .lm_hold_lvb = gdlm_hold_lvb,
- .lm_unhold_lvb = gdlm_unhold_lvb,
- .lm_recovery_done = gdlm_recovery_done,
- .lm_owner = THIS_MODULE,
-};
-
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
deleted file mode 100644
index 9b7edcf7bd49..000000000000
--- a/fs/gfs2/locking/dlm/sysfs.c
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include <linux/ctype.h>
-#include <linux/stat.h>
-
-#include "lock_dlm.h"
-
-static ssize_t proto_name_show(struct gdlm_ls *ls, char *buf)
-{
- return sprintf(buf, "%s\n", gdlm_ops.lm_proto_name);
-}
-
-static ssize_t block_show(struct gdlm_ls *ls, char *buf)
-{
- ssize_t ret;
- int val = 0;
-
- if (test_bit(DFL_BLOCK_LOCKS, &ls->flags))
- val = 1;
- ret = sprintf(buf, "%d\n", val);
- return ret;
-}
-
-static ssize_t block_store(struct gdlm_ls *ls, const char *buf, size_t len)
-{
- ssize_t ret = len;
- int val;
-
- val = simple_strtol(buf, NULL, 0);
-
- if (val == 1)
- set_bit(DFL_BLOCK_LOCKS, &ls->flags);
- else if (val == 0) {
- clear_bit(DFL_BLOCK_LOCKS, &ls->flags);
- gdlm_submit_delayed(ls);
- } else {
- ret = -EINVAL;
- }
- return ret;
-}
-
-static ssize_t withdraw_show(struct gdlm_ls *ls, char *buf)
-{
- ssize_t ret;
- int val = 0;
-
- if (test_bit(DFL_WITHDRAW, &ls->flags))
- val = 1;
- ret = sprintf(buf, "%d\n", val);
- return ret;
-}
-
-static ssize_t withdraw_store(struct gdlm_ls *ls, const char *buf, size_t len)
-{
- ssize_t ret = len;
- int val;
-
- val = simple_strtol(buf, NULL, 0);
-
- if (val == 1)
- set_bit(DFL_WITHDRAW, &ls->flags);
- else
- ret = -EINVAL;
- wake_up(&ls->wait_control);
- return ret;
-}
-
-static ssize_t id_show(struct gdlm_ls *ls, char *buf)
-{
- return sprintf(buf, "%u\n", ls->id);
-}
-
-static ssize_t jid_show(struct gdlm_ls *ls, char *buf)
-{
- return sprintf(buf, "%d\n", ls->jid);
-}
-
-static ssize_t first_show(struct gdlm_ls *ls, char *buf)
-{
- return sprintf(buf, "%d\n", ls->first);
-}
-
-static ssize_t first_done_show(struct gdlm_ls *ls, char *buf)
-{
- return sprintf(buf, "%d\n", ls->first_done);
-}
-
-static ssize_t recover_show(struct gdlm_ls *ls, char *buf)
-{
- return sprintf(buf, "%d\n", ls->recover_jid);
-}
-
-static ssize_t recover_store(struct gdlm_ls *ls, const char *buf, size_t len)
-{
- ls->recover_jid = simple_strtol(buf, NULL, 0);
- ls->fscb(ls->sdp, LM_CB_NEED_RECOVERY, &ls->recover_jid);
- return len;
-}
-
-static ssize_t recover_done_show(struct gdlm_ls *ls, char *buf)
-{
- return sprintf(buf, "%d\n", ls->recover_jid_done);
-}
-
-static ssize_t recover_status_show(struct gdlm_ls *ls, char *buf)
-{
- return sprintf(buf, "%d\n", ls->recover_jid_status);
-}
-
-struct gdlm_attr {
- struct attribute attr;
- ssize_t (*show)(struct gdlm_ls *, char *);
- ssize_t (*store)(struct gdlm_ls *, const char *, size_t);
-};
-
-#define GDLM_ATTR(_name,_mode,_show,_store) \
-static struct gdlm_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
-
-GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
-GDLM_ATTR(block, 0644, block_show, block_store);
-GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
-GDLM_ATTR(id, 0444, id_show, NULL);
-GDLM_ATTR(jid, 0444, jid_show, NULL);
-GDLM_ATTR(first, 0444, first_show, NULL);
-GDLM_ATTR(first_done, 0444, first_done_show, NULL);
-GDLM_ATTR(recover, 0644, recover_show, recover_store);
-GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
-GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
-
-static struct attribute *gdlm_attrs[] = {
- &gdlm_attr_proto_name.attr,
- &gdlm_attr_block.attr,
- &gdlm_attr_withdraw.attr,
- &gdlm_attr_id.attr,
- &gdlm_attr_jid.attr,
- &gdlm_attr_first.attr,
- &gdlm_attr_first_done.attr,
- &gdlm_attr_recover.attr,
- &gdlm_attr_recover_done.attr,
- &gdlm_attr_recover_status.attr,
- NULL,
-};
-
-static ssize_t gdlm_attr_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
- struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
- return a->show ? a->show(ls, buf) : 0;
-}
-
-static ssize_t gdlm_attr_store(struct kobject *kobj, struct attribute *attr,
- const char *buf, size_t len)
-{
- struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
- struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
- return a->store ? a->store(ls, buf, len) : len;
-}
-
-static struct sysfs_ops gdlm_attr_ops = {
- .show = gdlm_attr_show,
- .store = gdlm_attr_store,
-};
-
-static struct kobj_type gdlm_ktype = {
- .default_attrs = gdlm_attrs,
- .sysfs_ops = &gdlm_attr_ops,
-};
-
-static struct kset *gdlm_kset;
-
-int gdlm_kobject_setup(struct gdlm_ls *ls, struct kobject *fskobj)
-{
- int error;
-
- ls->kobj.kset = gdlm_kset;
- error = kobject_init_and_add(&ls->kobj, &gdlm_ktype, fskobj,
- "lock_module");
- if (error)
- log_error("can't register kobj %d", error);
- kobject_uevent(&ls->kobj, KOBJ_ADD);
-
- return error;
-}
-
-void gdlm_kobject_release(struct gdlm_ls *ls)
-{
- kobject_put(&ls->kobj);
-}
-
-static int gdlm_uevent(struct kset *kset, struct kobject *kobj,
- struct kobj_uevent_env *env)
-{
- struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
- add_uevent_var(env, "LOCKTABLE=%s:%s", ls->clustername, ls->fsname);
- add_uevent_var(env, "LOCKPROTO=lock_dlm");
- return 0;
-}
-
-static struct kset_uevent_ops gdlm_uevent_ops = {
- .uevent = gdlm_uevent,
-};
-
-
-int gdlm_sysfs_init(void)
-{
- gdlm_kset = kset_create_and_add("lock_dlm", &gdlm_uevent_ops, kernel_kobj);
- if (!gdlm_kset) {
- printk(KERN_WARNING "%s: can not create kset\n", __func__);
- return -ENOMEM;
- }
- return 0;
-}
-
-void gdlm_sysfs_exit(void)
-{
- kset_unregister(gdlm_kset);
-}
-
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c
deleted file mode 100644
index 38823efd698c..000000000000
--- a/fs/gfs2/locking/dlm/thread.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include "lock_dlm.h"
-
-static inline int no_work(struct gdlm_ls *ls)
-{
- int ret;
-
- spin_lock(&ls->async_lock);
- ret = list_empty(&ls->submit);
- spin_unlock(&ls->async_lock);
-
- return ret;
-}
-
-static int gdlm_thread(void *data)
-{
- struct gdlm_ls *ls = (struct gdlm_ls *) data;
- struct gdlm_lock *lp = NULL;
-
- while (!kthread_should_stop()) {
- wait_event_interruptible(ls->thread_wait,
- !no_work(ls) || kthread_should_stop());
-
- spin_lock(&ls->async_lock);
-
- if (!list_empty(&ls->submit)) {
- lp = list_entry(ls->submit.next, struct gdlm_lock,
- delay_list);
- list_del_init(&lp->delay_list);
- spin_unlock(&ls->async_lock);
- gdlm_do_lock(lp);
- spin_lock(&ls->async_lock);
- }
- spin_unlock(&ls->async_lock);
- }
-
- return 0;
-}
-
-int gdlm_init_threads(struct gdlm_ls *ls)
-{
- struct task_struct *p;
- int error;
-
- p = kthread_run(gdlm_thread, ls, "lock_dlm");
- error = IS_ERR(p);
- if (error) {
- log_error("can't start lock_dlm thread %d", error);
- return error;
- }
- ls->thread = p;
-
- return 0;
-}
-
-void gdlm_release_threads(struct gdlm_ls *ls)
-{
- kthread_stop(ls->thread);
-}
-
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index ad305854bdc6..98918a756410 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -14,7 +14,6 @@
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
-#include <linux/lm_interface.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 4390f6f4047d..80e4f5f898bb 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -13,7 +13,6 @@
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 7cacfde32194..a6892ed0840a 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -14,7 +14,6 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include <asm/atomic.h>
#include "gfs2.h"
@@ -23,6 +22,12 @@
#include "sys.h"
#include "util.h"
#include "glock.h"
+#include "quota.h"
+
+static struct shrinker qd_shrinker = {
+ .shrink = gfs2_shrink_qd_memory,
+ .seeks = DEFAULT_SEEKS,
+};
static void gfs2_init_inode_once(void *foo)
{
@@ -41,8 +46,6 @@ static void gfs2_init_glock_once(void *foo)
INIT_HLIST_NODE(&gl->gl_list);
spin_lock_init(&gl->gl_spin);
INIT_LIST_HEAD(&gl->gl_holders);
- gl->gl_lvb = NULL;
- atomic_set(&gl->gl_lvb_count, 0);
INIT_LIST_HEAD(&gl->gl_lru);
INIT_LIST_HEAD(&gl->gl_ail_list);
atomic_set(&gl->gl_ail_count, 0);
@@ -100,6 +103,8 @@ static int __init init_gfs2_fs(void)
if (!gfs2_quotad_cachep)
goto fail;
+ register_shrinker(&qd_shrinker);
+
error = register_filesystem(&gfs2_fs_type);
if (error)
goto fail;
@@ -117,6 +122,7 @@ static int __init init_gfs2_fs(void)
fail_unregister:
unregister_filesystem(&gfs2_fs_type);
fail:
+ unregister_shrinker(&qd_shrinker);
gfs2_glock_exit();
if (gfs2_quotad_cachep)
@@ -145,6 +151,7 @@ fail:
static void __exit exit_gfs2_fs(void)
{
+ unregister_shrinker(&qd_shrinker);
gfs2_glock_exit();
gfs2_unregister_debugfs();
unregister_filesystem(&gfs2_fs_type);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 09853620c951..8d6f13256b26 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -19,7 +19,6 @@
#include <linux/delay.h>
#include <linux/bio.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
@@ -90,27 +89,6 @@ void gfs2_aspace_put(struct inode *aspace)
}
/**
- * gfs2_meta_inval - Invalidate all buffers associated with a glock
- * @gl: the glock
- *
- */
-
-void gfs2_meta_inval(struct gfs2_glock *gl)
-{
- struct gfs2_sbd *sdp = gl->gl_sbd;
- struct inode *aspace = gl->gl_aspace;
- struct address_space *mapping = gl->gl_aspace->i_mapping;
-
- gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
-
- atomic_inc(&aspace->i_writecount);
- truncate_inode_pages(mapping, 0);
- atomic_dec(&aspace->i_writecount);
-
- gfs2_assert_withdraw(sdp, !mapping->nrpages);
-}
-
-/**
* gfs2_meta_sync - Sync all buffers associated with a glock
* @gl: The glock
*
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index b1a5f3674d43..de270c2f9b63 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -40,7 +40,6 @@ static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh,
struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp);
void gfs2_aspace_put(struct inode *aspace);
-void gfs2_meta_inval(struct gfs2_glock *gl);
void gfs2_meta_sync(struct gfs2_glock *gl);
struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno);
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
index 3cb0a44ba023..f7e8527a21e0 100644
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@@ -12,12 +12,11 @@
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include <linux/parser.h>
#include "gfs2.h"
#include "incore.h"
-#include "mount.h"
+#include "super.h"
#include "sys.h"
#include "util.h"
@@ -37,11 +36,15 @@ enum {
Opt_quota_off,
Opt_quota_account,
Opt_quota_on,
+ Opt_quota,
+ Opt_noquota,
Opt_suiddir,
Opt_nosuiddir,
Opt_data_writeback,
Opt_data_ordered,
Opt_meta,
+ Opt_discard,
+ Opt_nodiscard,
Opt_err,
};
@@ -61,11 +64,15 @@ static const match_table_t tokens = {
{Opt_quota_off, "quota=off"},
{Opt_quota_account, "quota=account"},
{Opt_quota_on, "quota=on"},
+ {Opt_quota, "quota"},
+ {Opt_noquota, "noquota"},
{Opt_suiddir, "suiddir"},
{Opt_nosuiddir, "nosuiddir"},
{Opt_data_writeback, "data=writeback"},
{Opt_data_ordered, "data=ordered"},
{Opt_meta, "meta"},
+ {Opt_discard, "discard"},
+ {Opt_nodiscard, "nodiscard"},
{Opt_err, NULL}
};
@@ -77,101 +84,46 @@ static const match_table_t tokens = {
* Return: errno
*/
-int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
+int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options)
{
- struct gfs2_args *args = &sdp->sd_args;
- char *data = data_arg;
- char *options, *o, *v;
- int error = 0;
-
- if (!remount) {
- /* Set some defaults */
- args->ar_quota = GFS2_QUOTA_DEFAULT;
- args->ar_data = GFS2_DATA_DEFAULT;
- }
+ char *o;
+ int token;
+ substring_t tmp[MAX_OPT_ARGS];
/* Split the options into tokens with the "," character and
process them */
- for (options = data; (o = strsep(&options, ",")); ) {
- int token;
- substring_t tmp[MAX_OPT_ARGS];
-
- if (!*o)
+ while (1) {
+ o = strsep(&options, ",");
+ if (o == NULL)
+ break;
+ if (*o == '\0')
continue;
token = match_token(o, tokens, tmp);
switch (token) {
case Opt_lockproto:
- v = match_strdup(&tmp[0]);
- if (!v) {
- fs_info(sdp, "no memory for lockproto\n");
- error = -ENOMEM;
- goto out_error;
- }
-
- if (remount && strcmp(v, args->ar_lockproto)) {
- kfree(v);
- goto cant_remount;
- }
-
- strncpy(args->ar_lockproto, v, GFS2_LOCKNAME_LEN);
- args->ar_lockproto[GFS2_LOCKNAME_LEN - 1] = 0;
- kfree(v);
+ match_strlcpy(args->ar_lockproto, &tmp[0],
+ GFS2_LOCKNAME_LEN);
break;
case Opt_locktable:
- v = match_strdup(&tmp[0]);
- if (!v) {
- fs_info(sdp, "no memory for locktable\n");
- error = -ENOMEM;
- goto out_error;
- }
-
- if (remount && strcmp(v, args->ar_locktable)) {
- kfree(v);
- goto cant_remount;
- }
-
- strncpy(args->ar_locktable, v, GFS2_LOCKNAME_LEN);
- args->ar_locktable[GFS2_LOCKNAME_LEN - 1] = 0;
- kfree(v);
+ match_strlcpy(args->ar_locktable, &tmp[0],
+ GFS2_LOCKNAME_LEN);
break;
case Opt_hostdata:
- v = match_strdup(&tmp[0]);
- if (!v) {
- fs_info(sdp, "no memory for hostdata\n");
- error = -ENOMEM;
- goto out_error;
- }
-
- if (remount && strcmp(v, args->ar_hostdata)) {
- kfree(v);
- goto cant_remount;
- }
-
- strncpy(args->ar_hostdata, v, GFS2_LOCKNAME_LEN);
- args->ar_hostdata[GFS2_LOCKNAME_LEN - 1] = 0;
- kfree(v);
+ match_strlcpy(args->ar_hostdata, &tmp[0],
+ GFS2_LOCKNAME_LEN);
break;
case Opt_spectator:
- if (remount && !args->ar_spectator)
- goto cant_remount;
args->ar_spectator = 1;
- sdp->sd_vfs->s_flags |= MS_RDONLY;
break;
case Opt_ignore_local_fs:
- if (remount && !args->ar_ignore_local_fs)
- goto cant_remount;
args->ar_ignore_local_fs = 1;
break;
case Opt_localflocks:
- if (remount && !args->ar_localflocks)
- goto cant_remount;
args->ar_localflocks = 1;
break;
case Opt_localcaching:
- if (remount && !args->ar_localcaching)
- goto cant_remount;
args->ar_localcaching = 1;
break;
case Opt_debug:
@@ -181,25 +133,23 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
args->ar_debug = 0;
break;
case Opt_upgrade:
- if (remount && !args->ar_upgrade)
- goto cant_remount;
args->ar_upgrade = 1;
break;
case Opt_acl:
args->ar_posix_acl = 1;
- sdp->sd_vfs->s_flags |= MS_POSIXACL;
break;
case Opt_noacl:
args->ar_posix_acl = 0;
- sdp->sd_vfs->s_flags &= ~MS_POSIXACL;
break;
case Opt_quota_off:
+ case Opt_noquota:
args->ar_quota = GFS2_QUOTA_OFF;
break;
case Opt_quota_account:
args->ar_quota = GFS2_QUOTA_ACCOUNT;
break;
case Opt_quota_on:
+ case Opt_quota:
args->ar_quota = GFS2_QUOTA_ON;
break;
case Opt_suiddir:
@@ -215,29 +165,21 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
args->ar_data = GFS2_DATA_ORDERED;
break;
case Opt_meta:
- if (remount && args->ar_meta != 1)
- goto cant_remount;
args->ar_meta = 1;
break;
+ case Opt_discard:
+ args->ar_discard = 1;
+ break;
+ case Opt_nodiscard:
+ args->ar_discard = 0;
+ break;
case Opt_err:
default:
- fs_info(sdp, "unknown option: %s\n", o);
- error = -EINVAL;
- goto out_error;
+ fs_info(sdp, "invalid mount option: %s\n", o);
+ return -EINVAL;
}
}
-out_error:
- if (error)
- fs_info(sdp, "invalid mount option(s)\n");
-
- if (data != data_arg)
- kfree(data);
-
- return error;
-
-cant_remount:
- fs_info(sdp, "can't remount with option %s\n", o);
- return -EINVAL;
+ return 0;
}
diff --git a/fs/gfs2/mount.h b/fs/gfs2/mount.h
deleted file mode 100644
index 401288acfdf3..000000000000
--- a/fs/gfs2/mount.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#ifndef __MOUNT_DOT_H__
-#define __MOUNT_DOT_H__
-
-struct gfs2_sbd;
-
-int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount);
-
-#endif /* __MOUNT_DOT_H__ */
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 4ddab67867eb..a6dde1751e17 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -19,7 +19,6 @@
#include <linux/writeback.h>
#include <linux/swap.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include <linux/backing-dev.h>
#include "gfs2.h"
@@ -442,6 +441,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
*/
if (unlikely(page->index)) {
zero_user(page, 0, PAGE_CACHE_SIZE);
+ SetPageUptodate(page);
return 0;
}
@@ -1096,6 +1096,7 @@ static const struct address_space_operations gfs2_writeback_aops = {
.releasepage = gfs2_releasepage,
.direct_IO = gfs2_direct_IO,
.migratepage = buffer_migrate_page,
+ .is_partially_uptodate = block_is_partially_uptodate,
};
static const struct address_space_operations gfs2_ordered_aops = {
@@ -1111,6 +1112,7 @@ static const struct address_space_operations gfs2_ordered_aops = {
.releasepage = gfs2_releasepage,
.direct_IO = gfs2_direct_IO,
.migratepage = buffer_migrate_page,
+ .is_partially_uptodate = block_is_partially_uptodate,
};
static const struct address_space_operations gfs2_jdata_aops = {
@@ -1125,6 +1127,7 @@ static const struct address_space_operations gfs2_jdata_aops = {
.bmap = gfs2_bmap,
.invalidatepage = gfs2_invalidatepage,
.releasepage = gfs2_releasepage,
+ .is_partially_uptodate = block_is_partially_uptodate,
};
void gfs2_set_aops(struct inode *inode)
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
index c2ad36330ca3..5eb57b044382 100644
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@@ -13,7 +13,6 @@
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
-#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index 7fdeb14ddd1a..9200ef221716 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -14,7 +14,6 @@
#include <linux/exportfs.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
-#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 93fe41b67f97..3b9e8de3500b 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -20,9 +20,10 @@
#include <linux/gfs2_ondisk.h>
#include <linux/ext2_fs.h>
#include <linux/crc32.h>
-#include <linux/lm_interface.h>
#include <linux/writeback.h>
#include <asm/uaccess.h>
+#include <linux/dlm.h>
+#include <linux/dlm_plock.h>
#include "gfs2.h"
#include "incore.h"
@@ -354,7 +355,9 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
if (ret)
goto out;
+ set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
set_bit(GIF_SW_PAGED, &ip->i_flags);
+
ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required);
if (ret || !alloc_required)
goto out_unlock;
@@ -560,57 +563,24 @@ static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
return ret;
}
+#ifdef CONFIG_GFS2_FS_LOCKING_DLM
+
/**
* gfs2_setlease - acquire/release a file lease
* @file: the file pointer
* @arg: lease type
* @fl: file lock
*
+ * We don't currently have a way to enforce a lease across the whole
+ * cluster; until we do, disable leases (by just returning -EINVAL),
+ * unless the administrator has requested purely local locking.
+ *
* Returns: errno
*/
static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl)
{
- struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
-
- /*
- * We don't currently have a way to enforce a lease across the whole
- * cluster; until we do, disable leases (by just returning -EINVAL),
- * unless the administrator has requested purely local locking.
- */
- if (!sdp->sd_args.ar_localflocks)
- return -EINVAL;
- return generic_setlease(file, arg, fl);
-}
-
-static int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name,
- struct file *file, struct file_lock *fl)
-{
- int error = -EIO;
- if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
- error = sdp->sd_lockstruct.ls_ops->lm_plock_get(
- sdp->sd_lockstruct.ls_lockspace, name, file, fl);
- return error;
-}
-
-static int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name,
- struct file *file, int cmd, struct file_lock *fl)
-{
- int error = -EIO;
- if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
- error = sdp->sd_lockstruct.ls_ops->lm_plock(
- sdp->sd_lockstruct.ls_lockspace, name, file, cmd, fl);
- return error;
-}
-
-static int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name,
- struct file *file, struct file_lock *fl)
-{
- int error = -EIO;
- if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
- error = sdp->sd_lockstruct.ls_ops->lm_punlock(
- sdp->sd_lockstruct.ls_lockspace, name, file, fl);
- return error;
+ return -EINVAL;
}
/**
@@ -626,9 +596,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
{
struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
- struct lm_lockname name =
- { .ln_number = ip->i_no_addr,
- .ln_type = LM_TYPE_PLOCK };
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
if (!(fl->fl_flags & FL_POSIX))
return -ENOLCK;
@@ -640,12 +608,14 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
cmd = F_SETLK;
fl->fl_type = F_UNLCK;
}
+ if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+ return -EIO;
if (IS_GETLK(cmd))
- return gfs2_lm_plock_get(sdp, &name, file, fl);
+ return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl);
else if (fl->fl_type == F_UNLCK)
- return gfs2_lm_punlock(sdp, &name, file, fl);
+ return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl);
else
- return gfs2_lm_plock(sdp, &name, file, cmd, fl);
+ return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl);
}
static int do_flock(struct file *file, int cmd, struct file_lock *fl)
@@ -732,7 +702,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
}
}
-const struct file_operations gfs2_file_fops = {
+const struct file_operations *gfs2_file_fops = &(const struct file_operations){
.llseek = gfs2_llseek,
.read = do_sync_read,
.aio_read = generic_file_aio_read,
@@ -750,7 +720,7 @@ const struct file_operations gfs2_file_fops = {
.setlease = gfs2_setlease,
};
-const struct file_operations gfs2_dir_fops = {
+const struct file_operations *gfs2_dir_fops = &(const struct file_operations){
.readdir = gfs2_readdir,
.unlocked_ioctl = gfs2_ioctl,
.open = gfs2_open,
@@ -760,7 +730,9 @@ const struct file_operations gfs2_dir_fops = {
.flock = gfs2_flock,
};
-const struct file_operations gfs2_file_fops_nolock = {
+#endif /* CONFIG_GFS2_FS_LOCKING_DLM */
+
+const struct file_operations *gfs2_file_fops_nolock = &(const struct file_operations){
.llseek = gfs2_llseek,
.read = do_sync_read,
.aio_read = generic_file_aio_read,
@@ -773,10 +745,10 @@ const struct file_operations gfs2_file_fops_nolock = {
.fsync = gfs2_fsync,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
- .setlease = gfs2_setlease,
+ .setlease = generic_setlease,
};
-const struct file_operations gfs2_dir_fops_nolock = {
+const struct file_operations *gfs2_dir_fops_nolock = &(const struct file_operations){
.readdir = gfs2_readdir,
.unlocked_ioctl = gfs2_ioctl,
.open = gfs2_open,
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index f91eebdde581..51883b3ad89c 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -17,7 +17,6 @@
#include <linux/namei.h>
#include <linux/mount.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
@@ -25,7 +24,6 @@
#include "glock.h"
#include "glops.h"
#include "inode.h"
-#include "mount.h"
#include "recovery.h"
#include "rgrp.h"
#include "super.h"
@@ -64,7 +62,6 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
gt->gt_quota_warn_period = 10;
gt->gt_quota_scale_num = 1;
gt->gt_quota_scale_den = 1;
- gt->gt_quota_cache_secs = 300;
gt->gt_quota_quantum = 60;
gt->gt_new_files_jdata = 0;
gt->gt_max_readahead = 1 << 18;
@@ -100,7 +97,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
mutex_init(&sdp->sd_jindex_mutex);
INIT_LIST_HEAD(&sdp->sd_quota_list);
- spin_lock_init(&sdp->sd_quota_spin);
mutex_init(&sdp->sd_quota_mutex);
init_waitqueue_head(&sdp->sd_quota_wait);
INIT_LIST_HEAD(&sdp->sd_trunc_list);
@@ -238,6 +234,7 @@ static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
+ memcpy(sb->sb_uuid, str->sb_uuid, 16);
}
/**
@@ -299,15 +296,15 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
__free_page(page);
return 0;
}
+
/**
* gfs2_read_sb - Read super block
* @sdp: The GFS2 superblock
- * @gl: the glock for the superblock (assumed to be held)
* @silent: Don't print message if mount fails
*
*/
-static int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
+static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
{
u32 hash_blocks, ind_blocks, leaf_blocks;
u32 tmp_blocks;
@@ -527,7 +524,7 @@ static int init_sb(struct gfs2_sbd *sdp, int silent)
return ret;
}
- ret = gfs2_read_sb(sdp, sb_gh.gh_gl, silent);
+ ret = gfs2_read_sb(sdp, silent);
if (ret) {
fs_err(sdp, "can't read superblock: %d\n", ret);
goto out;
@@ -630,13 +627,13 @@ static int map_journal_extents(struct gfs2_sbd *sdp)
return rc;
}
-static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp)
+static void gfs2_others_may_mount(struct gfs2_sbd *sdp)
{
- if (!sdp->sd_lockstruct.ls_ops->lm_others_may_mount)
- return;
- if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
- sdp->sd_lockstruct.ls_ops->lm_others_may_mount(
- sdp->sd_lockstruct.ls_lockspace);
+ char *message = "FIRSTMOUNT=Done";
+ char *envp[] = { message, NULL };
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ ls->ls_first_done = 1;
+ kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
}
/**
@@ -796,7 +793,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
}
}
- gfs2_lm_others_may_mount(sdp);
+ gfs2_others_may_mount(sdp);
} else if (!sdp->sd_args.ar_spectator) {
error = gfs2_recover_journal(sdp->sd_jdesc);
if (error) {
@@ -1005,7 +1002,6 @@ static int init_threads(struct gfs2_sbd *sdp, int undo)
goto fail_quotad;
sdp->sd_log_flush_time = jiffies;
- sdp->sd_jindex_refresh_time = jiffies;
p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
error = IS_ERR(p);
@@ -1033,6 +1029,17 @@ fail:
return error;
}
+static const match_table_t nolock_tokens = {
+ { Opt_jid, "jid=%d\n", },
+ { Opt_err, NULL },
+};
+
+static const struct lm_lockops nolock_ops = {
+ .lm_proto_name = "lock_nolock",
+ .lm_put_lock = kmem_cache_free,
+ .lm_tokens = &nolock_tokens,
+};
+
/**
* gfs2_lm_mount - mount a locking protocol
* @sdp: the filesystem
@@ -1044,31 +1051,73 @@ fail:
static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
{
- char *proto = sdp->sd_proto_name;
- char *table = sdp->sd_table_name;
- int flags = LM_MFLAG_CONV_NODROP;
- int error;
+ const struct lm_lockops *lm;
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ struct gfs2_args *args = &sdp->sd_args;
+ const char *proto = sdp->sd_proto_name;
+ const char *table = sdp->sd_table_name;
+ const char *fsname;
+ char *o, *options;
+ int ret;
- if (sdp->sd_args.ar_spectator)
- flags |= LM_MFLAG_SPECTATOR;
+ if (!strcmp("lock_nolock", proto)) {
+ lm = &nolock_ops;
+ sdp->sd_args.ar_localflocks = 1;
+ sdp->sd_args.ar_localcaching = 1;
+#ifdef CONFIG_GFS2_FS_LOCKING_DLM
+ } else if (!strcmp("lock_dlm", proto)) {
+ lm = &gfs2_dlm_ops;
+#endif
+ } else {
+ printk(KERN_INFO "GFS2: can't find protocol %s\n", proto);
+ return -ENOENT;
+ }
fs_info(sdp, "Trying to join cluster \"%s\", \"%s\"\n", proto, table);
- error = gfs2_mount_lockproto(proto, table, sdp->sd_args.ar_hostdata,
- gfs2_glock_cb, sdp,
- GFS2_MIN_LVB_SIZE, flags,
- &sdp->sd_lockstruct, &sdp->sd_kobj);
- if (error) {
- fs_info(sdp, "can't mount proto=%s, table=%s, hostdata=%s\n",
- proto, table, sdp->sd_args.ar_hostdata);
- goto out;
- }
+ ls->ls_ops = lm;
+ ls->ls_first = 1;
+ ls->ls_id = 0;
- if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) ||
- gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >=
- GFS2_MIN_LVB_SIZE)) {
- gfs2_unmount_lockproto(&sdp->sd_lockstruct);
- goto out;
+ for (options = args->ar_hostdata; (o = strsep(&options, ":")); ) {
+ substring_t tmp[MAX_OPT_ARGS];
+ int token, option;
+
+ if (!o || !*o)
+ continue;
+
+ token = match_token(o, *lm->lm_tokens, tmp);
+ switch (token) {
+ case Opt_jid:
+ ret = match_int(&tmp[0], &option);
+ if (ret || option < 0)
+ goto hostdata_error;
+ ls->ls_jid = option;
+ break;
+ case Opt_id:
+ ret = match_int(&tmp[0], &option);
+ if (ret)
+ goto hostdata_error;
+ ls->ls_id = option;
+ break;
+ case Opt_first:
+ ret = match_int(&tmp[0], &option);
+ if (ret || (option != 0 && option != 1))
+ goto hostdata_error;
+ ls->ls_first = option;
+ break;
+ case Opt_nodir:
+ ret = match_int(&tmp[0], &option);
+ if (ret || (option != 0 && option != 1))
+ goto hostdata_error;
+ ls->ls_nodir = option;
+ break;
+ case Opt_err:
+ default:
+hostdata_error:
+ fs_info(sdp, "unknown hostdata (%s)\n", o);
+ return -EINVAL;
+ }
}
if (sdp->sd_args.ar_spectator)
@@ -1077,22 +1126,25 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table,
sdp->sd_lockstruct.ls_jid);
- fs_info(sdp, "Joined cluster. Now mounting FS...\n");
-
- if ((sdp->sd_lockstruct.ls_flags & LM_LSFLAG_LOCAL) &&
- !sdp->sd_args.ar_ignore_local_fs) {
- sdp->sd_args.ar_localflocks = 1;
- sdp->sd_args.ar_localcaching = 1;
+ fsname = strchr(table, ':');
+ if (fsname)
+ fsname++;
+ if (lm->lm_mount == NULL) {
+ fs_info(sdp, "Now mounting FS...\n");
+ return 0;
}
-
-out:
- return error;
+ ret = lm->lm_mount(sdp, fsname);
+ if (ret == 0)
+ fs_info(sdp, "Joined cluster. Now mounting FS...\n");
+ return ret;
}
void gfs2_lm_unmount(struct gfs2_sbd *sdp)
{
- if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
- gfs2_unmount_lockproto(&sdp->sd_lockstruct);
+ const struct lm_lockops *lm = sdp->sd_lockstruct.ls_ops;
+ if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) &&
+ lm->lm_unmount)
+ lm->lm_unmount(sdp);
}
/**
@@ -1116,12 +1168,20 @@ static int fill_super(struct super_block *sb, void *data, int silent)
return -ENOMEM;
}
- error = gfs2_mount_args(sdp, (char *)data, 0);
+ sdp->sd_args.ar_quota = GFS2_QUOTA_DEFAULT;
+ sdp->sd_args.ar_data = GFS2_DATA_DEFAULT;
+
+ error = gfs2_mount_args(sdp, &sdp->sd_args, data);
if (error) {
printk(KERN_WARNING "GFS2: can't parse mount arguments\n");
goto fail;
}
+ if (sdp->sd_args.ar_spectator)
+ sb->s_flags |= MS_RDONLY;
+ if (sdp->sd_args.ar_posix_acl)
+ sb->s_flags |= MS_POSIXACL;
+
sb->s_magic = GFS2_MAGIC;
sb->s_op = &gfs2_super_ops;
sb->s_export_op = &gfs2_export_ops;
@@ -1199,6 +1259,8 @@ fail_sb:
dput(sdp->sd_root_dir);
if (sdp->sd_master_dir)
dput(sdp->sd_master_dir);
+ if (sb->s_root)
+ dput(sb->s_root);
sb->s_root = NULL;
fail_locking:
init_locking(sdp, &mount_gh, UNDO);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 49877546beb9..abd5429ae285 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -18,7 +18,6 @@
#include <linux/posix_acl.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
-#include <linux/lm_interface.h>
#include <linux/fiemap.h>
#include <asm/uaccess.h>
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 320323d03479..458019569dcb 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -19,7 +19,6 @@
#include <linux/delay.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
-#include <linux/lm_interface.h>
#include <linux/time.h>
#include "gfs2.h"
@@ -27,7 +26,6 @@
#include "glock.h"
#include "inode.h"
#include "log.h"
-#include "mount.h"
#include "quota.h"
#include "recovery.h"
#include "rgrp.h"
@@ -40,6 +38,8 @@
#include "bmap.h"
#include "meta_io.h"
+#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x)
+
/**
* gfs2_write_inode - Make sure the inode is stable on the disk
* @inode: The inode
@@ -435,25 +435,45 @@ static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
+ struct gfs2_args args = sdp->sd_args; /* Default to current settings */
int error;
- error = gfs2_mount_args(sdp, data, 1);
+ error = gfs2_mount_args(sdp, &args, data);
if (error)
return error;
+ /* Not allowed to change locking details */
+ if (strcmp(args.ar_lockproto, sdp->sd_args.ar_lockproto) ||
+ strcmp(args.ar_locktable, sdp->sd_args.ar_locktable) ||
+ strcmp(args.ar_hostdata, sdp->sd_args.ar_hostdata))
+ return -EINVAL;
+
+ /* Some flags must not be changed */
+ if (args_neq(&args, &sdp->sd_args, spectator) ||
+ args_neq(&args, &sdp->sd_args, ignore_local_fs) ||
+ args_neq(&args, &sdp->sd_args, localflocks) ||
+ args_neq(&args, &sdp->sd_args, localcaching) ||
+ args_neq(&args, &sdp->sd_args, meta))
+ return -EINVAL;
+
if (sdp->sd_args.ar_spectator)
*flags |= MS_RDONLY;
- else {
- if (*flags & MS_RDONLY) {
- if (!(sb->s_flags & MS_RDONLY))
- error = gfs2_make_fs_ro(sdp);
- } else if (!(*flags & MS_RDONLY) &&
- (sb->s_flags & MS_RDONLY)) {
+
+ if ((sb->s_flags ^ *flags) & MS_RDONLY) {
+ if (*flags & MS_RDONLY)
+ error = gfs2_make_fs_ro(sdp);
+ else
error = gfs2_make_fs_rw(sdp);
- }
+ if (error)
+ return error;
}
- return error;
+ sdp->sd_args = args;
+ if (sdp->sd_args.ar_posix_acl)
+ sb->s_flags |= MS_POSIXACL;
+ else
+ sb->s_flags &= ~MS_POSIXACL;
+ return 0;
}
/**
@@ -588,6 +608,8 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
}
seq_printf(s, ",data=%s", state);
}
+ if (args->ar_discard)
+ seq_printf(s, ",discard");
return 0;
}
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index b08d09696b3e..8d53f66b5bcc 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -45,7 +45,6 @@
#include <linux/fs.h>
#include <linux/bio.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
@@ -80,6 +79,51 @@ struct gfs2_quota_change_host {
u32 qc_id;
};
+static LIST_HEAD(qd_lru_list);
+static atomic_t qd_lru_count = ATOMIC_INIT(0);
+static spinlock_t qd_lru_lock = SPIN_LOCK_UNLOCKED;
+
+int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask)
+{
+ struct gfs2_quota_data *qd;
+ struct gfs2_sbd *sdp;
+
+ if (nr == 0)
+ goto out;
+
+ if (!(gfp_mask & __GFP_FS))
+ return -1;
+
+ spin_lock(&qd_lru_lock);
+ while (nr && !list_empty(&qd_lru_list)) {
+ qd = list_entry(qd_lru_list.next,
+ struct gfs2_quota_data, qd_reclaim);
+ sdp = qd->qd_gl->gl_sbd;
+
+ /* Free from the filesystem-specific list */
+ list_del(&qd->qd_list);
+
+ gfs2_assert_warn(sdp, !qd->qd_change);
+ gfs2_assert_warn(sdp, !qd->qd_slot_count);
+ gfs2_assert_warn(sdp, !qd->qd_bh_count);
+
+ gfs2_glock_put(qd->qd_gl);
+ atomic_dec(&sdp->sd_quota_count);
+
+ /* Delete it from the common reclaim list */
+ list_del_init(&qd->qd_reclaim);
+ atomic_dec(&qd_lru_count);
+ spin_unlock(&qd_lru_lock);
+ kmem_cache_free(gfs2_quotad_cachep, qd);
+ spin_lock(&qd_lru_lock);
+ nr--;
+ }
+ spin_unlock(&qd_lru_lock);
+
+out:
+ return (atomic_read(&qd_lru_count) * sysctl_vfs_cache_pressure) / 100;
+}
+
static u64 qd2offset(struct gfs2_quota_data *qd)
{
u64 offset;
@@ -100,22 +144,18 @@ static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id,
if (!qd)
return -ENOMEM;
- qd->qd_count = 1;
+ atomic_set(&qd->qd_count, 1);
qd->qd_id = id;
if (user)
set_bit(QDF_USER, &qd->qd_flags);
qd->qd_slot = -1;
+ INIT_LIST_HEAD(&qd->qd_reclaim);
error = gfs2_glock_get(sdp, 2 * (u64)id + !user,
&gfs2_quota_glops, CREATE, &qd->qd_gl);
if (error)
goto fail;
- error = gfs2_lvb_hold(qd->qd_gl);
- gfs2_glock_put(qd->qd_gl);
- if (error)
- goto fail;
-
*qdp = qd;
return 0;
@@ -135,11 +175,17 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create,
for (;;) {
found = 0;
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
if (qd->qd_id == id &&
!test_bit(QDF_USER, &qd->qd_flags) == !user) {
- qd->qd_count++;
+ if (!atomic_read(&qd->qd_count) &&
+ !list_empty(&qd->qd_reclaim)) {
+ /* Remove it from reclaim list */
+ list_del_init(&qd->qd_reclaim);
+ atomic_dec(&qd_lru_count);
+ }
+ atomic_inc(&qd->qd_count);
found = 1;
break;
}
@@ -155,11 +201,11 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create,
new_qd = NULL;
}
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
if (qd || !create) {
if (new_qd) {
- gfs2_lvb_unhold(new_qd->qd_gl);
+ gfs2_glock_put(new_qd->qd_gl);
kmem_cache_free(gfs2_quotad_cachep, new_qd);
}
*qdp = qd;
@@ -175,21 +221,18 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create,
static void qd_hold(struct gfs2_quota_data *qd)
{
struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
-
- spin_lock(&sdp->sd_quota_spin);
- gfs2_assert(sdp, qd->qd_count);
- qd->qd_count++;
- spin_unlock(&sdp->sd_quota_spin);
+ gfs2_assert(sdp, atomic_read(&qd->qd_count));
+ atomic_inc(&qd->qd_count);
}
static void qd_put(struct gfs2_quota_data *qd)
{
- struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
- spin_lock(&sdp->sd_quota_spin);
- gfs2_assert(sdp, qd->qd_count);
- if (!--qd->qd_count)
- qd->qd_last_touched = jiffies;
- spin_unlock(&sdp->sd_quota_spin);
+ if (atomic_dec_and_lock(&qd->qd_count, &qd_lru_lock)) {
+ /* Add to the reclaim list */
+ list_add_tail(&qd->qd_reclaim, &qd_lru_list);
+ atomic_inc(&qd_lru_count);
+ spin_unlock(&qd_lru_lock);
+ }
}
static int slot_get(struct gfs2_quota_data *qd)
@@ -198,10 +241,10 @@ static int slot_get(struct gfs2_quota_data *qd)
unsigned int c, o = 0, b;
unsigned char byte = 0;
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
if (qd->qd_slot_count++) {
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
return 0;
}
@@ -225,13 +268,13 @@ found:
sdp->sd_quota_bitmap[c][o] |= 1 << b;
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
return 0;
fail:
qd->qd_slot_count--;
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
return -ENOSPC;
}
@@ -239,23 +282,23 @@ static void slot_hold(struct gfs2_quota_data *qd)
{
struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
gfs2_assert(sdp, qd->qd_slot_count);
qd->qd_slot_count++;
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
}
static void slot_put(struct gfs2_quota_data *qd)
{
struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
gfs2_assert(sdp, qd->qd_slot_count);
if (!--qd->qd_slot_count) {
gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, qd->qd_slot, 0);
qd->qd_slot = -1;
}
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
}
static int bh_get(struct gfs2_quota_data *qd)
@@ -330,7 +373,7 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
if (sdp->sd_vfs->s_flags & MS_RDONLY)
return 0;
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
@@ -341,8 +384,8 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
set_bit(QDF_LOCKED, &qd->qd_flags);
- gfs2_assert_warn(sdp, qd->qd_count);
- qd->qd_count++;
+ gfs2_assert_warn(sdp, atomic_read(&qd->qd_count));
+ atomic_inc(&qd->qd_count);
qd->qd_change_sync = qd->qd_change;
gfs2_assert_warn(sdp, qd->qd_slot_count);
qd->qd_slot_count++;
@@ -354,7 +397,7 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
if (!found)
qd = NULL;
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
if (qd) {
gfs2_assert_warn(sdp, qd->qd_change_sync);
@@ -379,24 +422,24 @@ static int qd_trylock(struct gfs2_quota_data *qd)
if (sdp->sd_vfs->s_flags & MS_RDONLY)
return 0;
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
!test_bit(QDF_CHANGE, &qd->qd_flags)) {
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
return 0;
}
list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
set_bit(QDF_LOCKED, &qd->qd_flags);
- gfs2_assert_warn(sdp, qd->qd_count);
- qd->qd_count++;
+ gfs2_assert_warn(sdp, atomic_read(&qd->qd_count));
+ atomic_inc(&qd->qd_count);
qd->qd_change_sync = qd->qd_change;
gfs2_assert_warn(sdp, qd->qd_slot_count);
qd->qd_slot_count++;
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
gfs2_assert_warn(sdp, qd->qd_change_sync);
if (bh_get(qd)) {
@@ -556,9 +599,9 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change)
x = be64_to_cpu(qc->qc_change) + change;
qc->qc_change = cpu_to_be64(x);
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
qd->qd_change = x;
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
if (!x) {
gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags));
@@ -802,8 +845,8 @@ restart:
loff_t pos;
gfs2_glock_dq_uninit(q_gh);
error = gfs2_glock_nq_init(qd->qd_gl,
- LM_ST_EXCLUSIVE, GL_NOCACHE,
- q_gh);
+ LM_ST_EXCLUSIVE, GL_NOCACHE,
+ q_gh);
if (error)
return error;
@@ -820,7 +863,6 @@ restart:
gfs2_glock_dq_uninit(&i_gh);
-
gfs2_quota_in(&q, buf);
qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC);
@@ -890,9 +932,9 @@ static int need_sync(struct gfs2_quota_data *qd)
if (!qd->qd_qb.qb_limit)
return 0;
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
value = qd->qd_change;
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
spin_lock(&gt->gt_spin);
num = gt->gt_quota_scale_num;
@@ -985,9 +1027,9 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
continue;
value = (s64)be64_to_cpu(qd->qd_qb.qb_value);
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
value += qd->qd_change;
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) {
print_message(qd, "exceeded");
@@ -1171,13 +1213,12 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
qd->qd_change = qc.qc_change;
qd->qd_slot = slot;
qd->qd_slot_count = 1;
- qd->qd_last_touched = jiffies;
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, slot, 1);
list_add(&qd->qd_list, &sdp->sd_quota_list);
atomic_inc(&sdp->sd_quota_count);
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
found++;
}
@@ -1197,73 +1238,48 @@ fail:
return error;
}
-static void gfs2_quota_scan(struct gfs2_sbd *sdp)
-{
- struct gfs2_quota_data *qd, *safe;
- LIST_HEAD(dead);
-
- spin_lock(&sdp->sd_quota_spin);
- list_for_each_entry_safe(qd, safe, &sdp->sd_quota_list, qd_list) {
- if (!qd->qd_count &&
- time_after_eq(jiffies, qd->qd_last_touched +
- gfs2_tune_get(sdp, gt_quota_cache_secs) * HZ)) {
- list_move(&qd->qd_list, &dead);
- gfs2_assert_warn(sdp,
- atomic_read(&sdp->sd_quota_count) > 0);
- atomic_dec(&sdp->sd_quota_count);
- }
- }
- spin_unlock(&sdp->sd_quota_spin);
-
- while (!list_empty(&dead)) {
- qd = list_entry(dead.next, struct gfs2_quota_data, qd_list);
- list_del(&qd->qd_list);
-
- gfs2_assert_warn(sdp, !qd->qd_change);
- gfs2_assert_warn(sdp, !qd->qd_slot_count);
- gfs2_assert_warn(sdp, !qd->qd_bh_count);
-
- gfs2_lvb_unhold(qd->qd_gl);
- kmem_cache_free(gfs2_quotad_cachep, qd);
- }
-}
-
void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
{
struct list_head *head = &sdp->sd_quota_list;
struct gfs2_quota_data *qd;
unsigned int x;
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
while (!list_empty(head)) {
qd = list_entry(head->prev, struct gfs2_quota_data, qd_list);
- if (qd->qd_count > 1 ||
- (qd->qd_count && !test_bit(QDF_CHANGE, &qd->qd_flags))) {
+ if (atomic_read(&qd->qd_count) > 1 ||
+ (atomic_read(&qd->qd_count) &&
+ !test_bit(QDF_CHANGE, &qd->qd_flags))) {
list_move(&qd->qd_list, head);
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
schedule();
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
continue;
}
list_del(&qd->qd_list);
+ /* Also remove if this qd exists in the reclaim list */
+ if (!list_empty(&qd->qd_reclaim)) {
+ list_del_init(&qd->qd_reclaim);
+ atomic_dec(&qd_lru_count);
+ }
atomic_dec(&sdp->sd_quota_count);
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
- if (!qd->qd_count) {
+ if (!atomic_read(&qd->qd_count)) {
gfs2_assert_warn(sdp, !qd->qd_change);
gfs2_assert_warn(sdp, !qd->qd_slot_count);
} else
gfs2_assert_warn(sdp, qd->qd_slot_count == 1);
gfs2_assert_warn(sdp, !qd->qd_bh_count);
- gfs2_lvb_unhold(qd->qd_gl);
+ gfs2_glock_put(qd->qd_gl);
kmem_cache_free(gfs2_quotad_cachep, qd);
- spin_lock(&sdp->sd_quota_spin);
+ spin_lock(&qd_lru_lock);
}
- spin_unlock(&sdp->sd_quota_spin);
+ spin_unlock(&qd_lru_lock);
gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count));
@@ -1341,9 +1357,6 @@ int gfs2_quotad(void *data)
quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t,
&quotad_timeo, &tune->gt_quota_quantum);
- /* FIXME: This should be turned into a shrinker */
- gfs2_quota_scan(sdp);
-
/* Check for & recover partially truncated inodes */
quotad_check_trunc_list(sdp);
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index cec9032be97d..0fa5fa63d0e8 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -49,4 +49,6 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
return ret;
}
+extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask);
+
#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index efd09c3d2b26..247e8f7d6b3d 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -13,7 +13,6 @@
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
-#include <linux/lm_interface.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
@@ -427,20 +426,23 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea
}
-static void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
- unsigned int message)
+static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
+ unsigned int message)
{
- if (!sdp->sd_lockstruct.ls_ops->lm_recovery_done)
- return;
-
- if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
- sdp->sd_lockstruct.ls_ops->lm_recovery_done(
- sdp->sd_lockstruct.ls_lockspace, jid, message);
+ char env_jid[20];
+ char env_status[20];
+ char *envp[] = { env_jid, env_status, NULL };
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ ls->ls_recover_jid_done = jid;
+ ls->ls_recover_jid_status = message;
+ sprintf(env_jid, "JID=%d", jid);
+ sprintf(env_status, "RECOVERY=%s",
+ message == LM_RD_SUCCESS ? "Done" : "Failed");
+ kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
}
-
/**
- * gfs2_recover_journal - recovery a given journal
+ * gfs2_recover_journal - recover a given journal
* @jd: the struct gfs2_jdesc describing the journal
*
* Acquire the journal's lock, check to see if the journal is clean, and
@@ -561,7 +563,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd)
if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
gfs2_glock_dq_uninit(&ji_gh);
- gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
+ gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
gfs2_glock_dq_uninit(&j_gh);
@@ -581,7 +583,7 @@ fail_gunlock_j:
fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
fail:
- gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
+ gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
return error;
}
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 8b01c635d925..f03d024038ea 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -13,8 +13,8 @@
#include <linux/buffer_head.h>
#include <linux/fs.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include <linux/prefetch.h>
+#include <linux/blkdev.h>
#include "gfs2.h"
#include "incore.h"
@@ -132,81 +132,90 @@ static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd,
}
/**
+ * gfs2_bit_search
+ * @ptr: Pointer to bitmap data
+ * @mask: Mask to use (normally 0x55555.... but adjusted for search start)
+ * @state: The state we are searching for
+ *
+ * We xor the bitmap data with a patter which is the bitwise opposite
+ * of what we are looking for, this gives rise to a pattern of ones
+ * wherever there is a match. Since we have two bits per entry, we
+ * take this pattern, shift it down by one place and then and it with
+ * the original. All the even bit positions (0,2,4, etc) then represent
+ * successful matches, so we mask with 0x55555..... to remove the unwanted
+ * odd bit positions.
+ *
+ * This allows searching of a whole u64 at once (32 blocks) with a
+ * single test (on 64 bit arches).
+ */
+
+static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
+{
+ u64 tmp;
+ static const u64 search[] = {
+ [0] = 0xffffffffffffffffULL,
+ [1] = 0xaaaaaaaaaaaaaaaaULL,
+ [2] = 0x5555555555555555ULL,
+ [3] = 0x0000000000000000ULL,
+ };
+ tmp = le64_to_cpu(*ptr) ^ search[state];
+ tmp &= (tmp >> 1);
+ tmp &= mask;
+ return tmp;
+}
+
+/**
* gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
* a block in a given allocation state.
* @buffer: the buffer that holds the bitmaps
- * @buflen: the length (in bytes) of the buffer
+ * @len: the length (in bytes) of the buffer
* @goal: start search at this block's bit-pair (within @buffer)
- * @old_state: GFS2_BLKST_XXX the state of the block we're looking for.
+ * @state: GFS2_BLKST_XXX the state of the block we're looking for.
*
* Scope of @goal and returned block number is only within this bitmap buffer,
* not entire rgrp or filesystem. @buffer will be offset from the actual
- * beginning of a bitmap block buffer, skipping any header structures.
+ * beginning of a bitmap block buffer, skipping any header structures, but
+ * headers are always a multiple of 64 bits long so that the buffer is
+ * always aligned to a 64 bit boundary.
+ *
+ * The size of the buffer is in bytes, but is it assumed that it is
+ * always ok to to read a complete multiple of 64 bits at the end
+ * of the block in case the end is no aligned to a natural boundary.
*
* Return: the block number (bitmap buffer scope) that was found
*/
-static u32 gfs2_bitfit(const u8 *buffer, unsigned int buflen, u32 goal,
- u8 old_state)
+static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
+ u32 goal, u8 state)
{
- const u8 *byte, *start, *end;
- int bit, startbit;
- u32 g1, g2, misaligned;
- unsigned long *plong;
- unsigned long lskipval;
-
- lskipval = (old_state & GFS2_BLKST_USED) ? LBITSKIP00 : LBITSKIP55;
- g1 = (goal / GFS2_NBBY);
- start = buffer + g1;
- byte = start;
- end = buffer + buflen;
- g2 = ALIGN(g1, sizeof(unsigned long));
- plong = (unsigned long *)(buffer + g2);
- startbit = bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
- misaligned = g2 - g1;
- if (!misaligned)
- goto ulong_aligned;
-/* parse the bitmap a byte at a time */
-misaligned:
- while (byte < end) {
- if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) {
- return goal +
- (((byte - start) * GFS2_NBBY) +
- ((bit - startbit) >> 1));
- }
- bit += GFS2_BIT_SIZE;
- if (bit >= GFS2_NBBY * GFS2_BIT_SIZE) {
- bit = 0;
- byte++;
- misaligned--;
- if (!misaligned) {
- plong = (unsigned long *)byte;
- goto ulong_aligned;
- }
- }
- }
- return BFITNOENT;
-
-/* parse the bitmap a unsigned long at a time */
-ulong_aligned:
- /* Stop at "end - 1" or else prefetch can go past the end and segfault.
- We could "if" it but we'd lose some of the performance gained.
- This way will only slow down searching the very last 4/8 bytes
- depending on architecture. I've experimented with several ways
- of writing this section such as using an else before the goto
- but this one seems to be the fastest. */
- while ((unsigned char *)plong < end - sizeof(unsigned long)) {
- prefetch(plong + 1);
- if (((*plong) & LBITMASK) != lskipval)
- break;
- plong++;
- }
- if ((unsigned char *)plong < end) {
- byte = (const u8 *)plong;
- misaligned += sizeof(unsigned long) - 1;
- goto misaligned;
+ u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1);
+ const __le64 *ptr = ((__le64 *)buf) + (goal >> 5);
+ const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64)));
+ u64 tmp;
+ u64 mask = 0x5555555555555555ULL;
+ u32 bit;
+
+ BUG_ON(state > 3);
+
+ /* Mask off bits we don't care about at the start of the search */
+ mask <<= spoint;
+ tmp = gfs2_bit_search(ptr, mask, state);
+ ptr++;
+ while(tmp == 0 && ptr < end) {
+ tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state);
+ ptr++;
}
- return BFITNOENT;
+ /* Mask off any bits which are more than len bytes from the start */
+ if (ptr == end && (len & (sizeof(u64) - 1)))
+ tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1))));
+ /* Didn't find anything, so return */
+ if (tmp == 0)
+ return BFITNOENT;
+ ptr--;
+ bit = fls64(tmp);
+ bit--; /* fls64 always adds one to the bit count */
+ bit /= 2; /* two bits per entry in the bitmap */
+ return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
}
/**
@@ -831,6 +840,58 @@ void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
spin_unlock(&sdp->sd_rindex_spin);
}
+static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
+ const struct gfs2_bitmap *bi)
+{
+ struct super_block *sb = sdp->sd_vfs;
+ struct block_device *bdev = sb->s_bdev;
+ const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize /
+ bdev_hardsect_size(sb->s_bdev);
+ u64 blk;
+ sector_t start = 0;
+ sector_t nr_sects = 0;
+ int rv;
+ unsigned int x;
+
+ for (x = 0; x < bi->bi_len; x++) {
+ const u8 *orig = bi->bi_bh->b_data + bi->bi_offset + x;
+ const u8 *clone = bi->bi_clone + bi->bi_offset + x;
+ u8 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
+ diff &= 0x55;
+ if (diff == 0)
+ continue;
+ blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
+ blk *= sects_per_blk; /* convert to sectors */
+ while(diff) {
+ if (diff & 1) {
+ if (nr_sects == 0)
+ goto start_new_extent;
+ if ((start + nr_sects) != blk) {
+ rv = blkdev_issue_discard(bdev, start,
+ nr_sects, GFP_NOFS);
+ if (rv)
+ goto fail;
+ nr_sects = 0;
+start_new_extent:
+ start = blk;
+ }
+ nr_sects += sects_per_blk;
+ }
+ diff >>= 2;
+ blk += sects_per_blk;
+ }
+ }
+ if (nr_sects) {
+ rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS);
+ if (rv)
+ goto fail;
+ }
+ return;
+fail:
+ fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
+ sdp->sd_args.ar_discard = 0;
+}
+
void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
{
struct gfs2_sbd *sdp = rgd->rd_sbd;
@@ -841,6 +902,8 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
struct gfs2_bitmap *bi = rgd->rd_bits + x;
if (!bi->bi_clone)
continue;
+ if (sdp->sd_args.ar_discard)
+ gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bi);
memcpy(bi->bi_clone + bi->bi_offset,
bi->bi_bh->b_data + bi->bi_offset, bi->bi_len);
}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 141b781f2fcc..601913e0a482 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -15,7 +15,6 @@
#include <linux/crc32.h>
#include <linux/gfs2_ondisk.h>
#include <linux/bio.h>
-#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
@@ -339,7 +338,6 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp,
struct gfs2_holder *t_gh)
{
struct gfs2_inode *ip;
- struct gfs2_holder ji_gh;
struct gfs2_jdesc *jd;
struct lfcc *lfcc;
LIST_HEAD(list);
@@ -387,7 +385,6 @@ out:
gfs2_glock_dq_uninit(&lfcc->gh);
kfree(lfcc);
}
- gfs2_glock_dq_uninit(&ji_gh);
return error;
}
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index f6b8b00ad881..91abdbedcc86 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -14,7 +14,7 @@
#include <linux/dcache.h>
#include "incore.h"
-void gfs2_lm_unmount(struct gfs2_sbd *sdp);
+extern void gfs2_lm_unmount(struct gfs2_sbd *sdp);
static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
{
@@ -27,21 +27,23 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
void gfs2_jindex_free(struct gfs2_sbd *sdp);
-struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
-int gfs2_jdesc_check(struct gfs2_jdesc *jd);
+extern int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *data);
-int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
- struct gfs2_inode **ipp);
+extern struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
+extern int gfs2_jdesc_check(struct gfs2_jdesc *jd);
-int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
+extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
+ struct gfs2_inode **ipp);
-int gfs2_statfs_init(struct gfs2_sbd *sdp);
-void gfs2_statfs_change(struct gfs2_sbd *sdp,
- s64 total, s64 free, s64 dinodes);
-int gfs2_statfs_sync(struct gfs2_sbd *sdp);
+extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
-int gfs2_freeze_fs(struct gfs2_sbd *sdp);
-void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
+extern int gfs2_statfs_init(struct gfs2_sbd *sdp);
+extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
+ s64 dinodes);
+extern int gfs2_statfs_sync(struct gfs2_sbd *sdp);
+
+extern int gfs2_freeze_fs(struct gfs2_sbd *sdp);
+extern void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
extern struct file_system_type gfs2_fs_type;
extern struct file_system_type gfs2meta_fs_type;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 26c1fa777a95..7655f5025fec 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -14,9 +14,8 @@
#include <linux/buffer_head.h>
#include <linux/module.h>
#include <linux/kobject.h>
-#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include <asm/uaccess.h>
+#include <linux/gfs2_ondisk.h>
#include "gfs2.h"
#include "incore.h"
@@ -25,6 +24,7 @@
#include "glock.h"
#include "quota.h"
#include "util.h"
+#include "glops.h"
static ssize_t id_show(struct gfs2_sbd *sdp, char *buf)
{
@@ -37,6 +37,30 @@ static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf)
return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname);
}
+static int gfs2_uuid_valid(const u8 *uuid)
+{
+ int i;
+
+ for (i = 0; i < 16; i++) {
+ if (uuid[i])
+ return 1;
+ }
+ return 0;
+}
+
+static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
+{
+ const u8 *uuid = sdp->sd_sb.sb_uuid;
+ buf[0] = '\0';
+ if (!gfs2_uuid_valid(uuid))
+ return 0;
+ return snprintf(buf, PAGE_SIZE, "%02X%02X%02X%02X-%02X%02X-"
+ "%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X\n",
+ uuid[0], uuid[1], uuid[2], uuid[3], uuid[4], uuid[5],
+ uuid[6], uuid[7], uuid[8], uuid[9], uuid[10], uuid[11],
+ uuid[12], uuid[13], uuid[14], uuid[15]);
+}
+
static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
{
unsigned int count;
@@ -148,6 +172,46 @@ static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf,
return len;
}
+static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
+{
+ struct gfs2_glock *gl;
+ const struct gfs2_glock_operations *glops;
+ unsigned int glmode;
+ unsigned int gltype;
+ unsigned long long glnum;
+ char mode[16];
+ int rv;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
+ rv = sscanf(buf, "%u:%llu %15s", &gltype, &glnum,
+ mode);
+ if (rv != 3)
+ return -EINVAL;
+
+ if (strcmp(mode, "EX") == 0)
+ glmode = LM_ST_UNLOCKED;
+ else if ((strcmp(mode, "CW") == 0) || (strcmp(mode, "DF") == 0))
+ glmode = LM_ST_DEFERRED;
+ else if ((strcmp(mode, "PR") == 0) || (strcmp(mode, "SH") == 0))
+ glmode = LM_ST_SHARED;
+ else
+ return -EINVAL;
+
+ if (gltype > LM_TYPE_JOURNAL)
+ return -EINVAL;
+ glops = gfs2_glops_list[gltype];
+ if (glops == NULL)
+ return -EINVAL;
+ rv = gfs2_glock_get(sdp, glnum, glops, 0, &gl);
+ if (rv)
+ return rv;
+ gfs2_glock_cb(gl, glmode);
+ gfs2_glock_put(gl);
+ return len;
+}
+
struct gfs2_attr {
struct attribute attr;
ssize_t (*show)(struct gfs2_sbd *, char *);
@@ -159,22 +223,26 @@ static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store)
GFS2_ATTR(id, 0444, id_show, NULL);
GFS2_ATTR(fsname, 0444, fsname_show, NULL);
+GFS2_ATTR(uuid, 0444, uuid_show, NULL);
GFS2_ATTR(freeze, 0644, freeze_show, freeze_store);
GFS2_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
GFS2_ATTR(statfs_sync, 0200, NULL, statfs_sync_store);
GFS2_ATTR(quota_sync, 0200, NULL, quota_sync_store);
GFS2_ATTR(quota_refresh_user, 0200, NULL, quota_refresh_user_store);
GFS2_ATTR(quota_refresh_group, 0200, NULL, quota_refresh_group_store);
+GFS2_ATTR(demote_rq, 0200, NULL, demote_rq_store);
static struct attribute *gfs2_attrs[] = {
&gfs2_attr_id.attr,
&gfs2_attr_fsname.attr,
+ &gfs2_attr_uuid.attr,
&gfs2_attr_freeze.attr,
&gfs2_attr_withdraw.attr,
&gfs2_attr_statfs_sync.attr,
&gfs2_attr_quota_sync.attr,
&gfs2_attr_quota_refresh_user.attr,
&gfs2_attr_quota_refresh_group.attr,
+ &gfs2_attr_demote_rq.attr,
NULL,
};
@@ -224,14 +292,145 @@ static struct lockstruct_attr lockstruct_attr_##name = __ATTR_RO(name)
LOCKSTRUCT_ATTR(jid, "%u\n");
LOCKSTRUCT_ATTR(first, "%u\n");
-LOCKSTRUCT_ATTR(lvb_size, "%u\n");
-LOCKSTRUCT_ATTR(flags, "%d\n");
static struct attribute *lockstruct_attrs[] = {
&lockstruct_attr_jid.attr,
&lockstruct_attr_first.attr,
- &lockstruct_attr_lvb_size.attr,
- &lockstruct_attr_flags.attr,
+ NULL,
+};
+
+/*
+ * lock_module. Originally from lock_dlm
+ */
+
+static ssize_t proto_name_show(struct gfs2_sbd *sdp, char *buf)
+{
+ const struct lm_lockops *ops = sdp->sd_lockstruct.ls_ops;
+ return sprintf(buf, "%s\n", ops->lm_proto_name);
+}
+
+static ssize_t block_show(struct gfs2_sbd *sdp, char *buf)
+{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ ssize_t ret;
+ int val = 0;
+
+ if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))
+ val = 1;
+ ret = sprintf(buf, "%d\n", val);
+ return ret;
+}
+
+static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
+{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ ssize_t ret = len;
+ int val;
+
+ val = simple_strtol(buf, NULL, 0);
+
+ if (val == 1)
+ set_bit(DFL_BLOCK_LOCKS, &ls->ls_flags);
+ else if (val == 0) {
+ clear_bit(DFL_BLOCK_LOCKS, &ls->ls_flags);
+ smp_mb__after_clear_bit();
+ gfs2_glock_thaw(sdp);
+ } else {
+ ret = -EINVAL;
+ }
+ return ret;
+}
+
+static ssize_t lkid_show(struct gfs2_sbd *sdp, char *buf)
+{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ return sprintf(buf, "%u\n", ls->ls_id);
+}
+
+static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf)
+{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ return sprintf(buf, "%d\n", ls->ls_first);
+}
+
+static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf)
+{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ return sprintf(buf, "%d\n", ls->ls_first_done);
+}
+
+static ssize_t recover_show(struct gfs2_sbd *sdp, char *buf)
+{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ return sprintf(buf, "%d\n", ls->ls_recover_jid);
+}
+
+static void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid)
+{
+ struct gfs2_jdesc *jd;
+
+ spin_lock(&sdp->sd_jindex_spin);
+ list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
+ if (jd->jd_jid != jid)
+ continue;
+ jd->jd_dirty = 1;
+ break;
+ }
+ spin_unlock(&sdp->sd_jindex_spin);
+}
+
+static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
+{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ ls->ls_recover_jid = simple_strtol(buf, NULL, 0);
+ gfs2_jdesc_make_dirty(sdp, ls->ls_recover_jid);
+ if (sdp->sd_recoverd_process)
+ wake_up_process(sdp->sd_recoverd_process);
+ return len;
+}
+
+static ssize_t recover_done_show(struct gfs2_sbd *sdp, char *buf)
+{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ return sprintf(buf, "%d\n", ls->ls_recover_jid_done);
+}
+
+static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf)
+{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ return sprintf(buf, "%d\n", ls->ls_recover_jid_status);
+}
+
+struct gdlm_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct gfs2_sbd *sdp, char *);
+ ssize_t (*store)(struct gfs2_sbd *sdp, const char *, size_t);
+};
+
+#define GDLM_ATTR(_name,_mode,_show,_store) \
+static struct gdlm_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
+
+GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
+GDLM_ATTR(block, 0644, block_show, block_store);
+GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
+GDLM_ATTR(id, 0444, lkid_show, NULL);
+GDLM_ATTR(first, 0444, lkfirst_show, NULL);
+GDLM_ATTR(first_done, 0444, first_done_show, NULL);
+GDLM_ATTR(recover, 0644, recover_show, recover_store);
+GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
+GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
+
+static struct attribute *lock_module_attrs[] = {
+ &gdlm_attr_proto_name.attr,
+ &gdlm_attr_block.attr,
+ &gdlm_attr_withdraw.attr,
+ &gdlm_attr_id.attr,
+ &lockstruct_attr_jid.attr,
+ &gdlm_attr_first.attr,
+ &gdlm_attr_first_done.attr,
+ &gdlm_attr_recover.attr,
+ &gdlm_attr_recover_done.attr,
+ &gdlm_attr_recover_status.attr,
NULL,
};
@@ -373,7 +572,6 @@ TUNE_ATTR(complain_secs, 0);
TUNE_ATTR(statfs_slow, 0);
TUNE_ATTR(new_files_jdata, 0);
TUNE_ATTR(quota_simul_sync, 1);
-TUNE_ATTR(quota_cache_secs, 1);
TUNE_ATTR(stall_secs, 1);
TUNE_ATTR(statfs_quantum, 1);
TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process);
@@ -389,7 +587,6 @@ static struct attribute *tune_attrs[] = {
&tune_attr_complain_secs.attr,
&tune_attr_statfs_slow.attr,
&tune_attr_quota_simul_sync.attr,
- &tune_attr_quota_cache_secs.attr,
&tune_attr_stall_secs.attr,
&tune_attr_statfs_quantum.attr,
&tune_attr_recoverd_secs.attr,
@@ -414,6 +611,11 @@ static struct attribute_group tune_group = {
.attrs = tune_attrs,
};
+static struct attribute_group lock_module_group = {
+ .name = "lock_module",
+ .attrs = lock_module_attrs,
+};
+
int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
{
int error;
@@ -436,9 +638,15 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
if (error)
goto fail_args;
+ error = sysfs_create_group(&sdp->sd_kobj, &lock_module_group);
+ if (error)
+ goto fail_tune;
+
kobject_uevent(&sdp->sd_kobj, KOBJ_ADD);
return 0;
+fail_tune:
+ sysfs_remove_group(&sdp->sd_kobj, &tune_group);
fail_args:
sysfs_remove_group(&sdp->sd_kobj, &args_group);
fail_lockstruct:
@@ -455,15 +663,27 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
sysfs_remove_group(&sdp->sd_kobj, &tune_group);
sysfs_remove_group(&sdp->sd_kobj, &args_group);
sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
+ sysfs_remove_group(&sdp->sd_kobj, &lock_module_group);
kobject_put(&sdp->sd_kobj);
}
+
static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
struct kobj_uevent_env *env)
{
struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
+ const u8 *uuid = sdp->sd_sb.sb_uuid;
+
add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
+ if (gfs2_uuid_valid(uuid)) {
+ add_uevent_var(env, "UUID=%02X%02X%02X%02X-%02X%02X-%02X%02X-"
+ "%02X%02X-%02X%02X%02X%02X%02X%02X",
+ uuid[0], uuid[1], uuid[2], uuid[3], uuid[4],
+ uuid[5], uuid[6], uuid[7], uuid[8], uuid[9],
+ uuid[10], uuid[11], uuid[12], uuid[13],
+ uuid[14], uuid[15]);
+ }
return 0;
}
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index f677b8a83f0c..053752d4b27f 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -12,9 +12,8 @@
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
-#include <linux/gfs2_ondisk.h>
#include <linux/kallsyms.h>
-#include <linux/lm_interface.h>
+#include <linux/gfs2_ondisk.h>
#include "gfs2.h"
#include "incore.h"
@@ -88,9 +87,11 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
if (!tr->tr_touched) {
gfs2_log_release(sdp, tr->tr_reserved);
- gfs2_glock_dq(&tr->tr_t_gh);
- gfs2_holder_uninit(&tr->tr_t_gh);
- kfree(tr);
+ if (tr->tr_t_gh.gh_gl) {
+ gfs2_glock_dq(&tr->tr_t_gh);
+ gfs2_holder_uninit(&tr->tr_t_gh);
+ kfree(tr);
+ }
return;
}
@@ -106,9 +107,11 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
}
gfs2_log_commit(sdp, tr);
- gfs2_glock_dq(&tr->tr_t_gh);
- gfs2_holder_uninit(&tr->tr_t_gh);
- kfree(tr);
+ if (tr->tr_t_gh.gh_gl) {
+ gfs2_glock_dq(&tr->tr_t_gh);
+ gfs2_holder_uninit(&tr->tr_t_gh);
+ kfree(tr);
+ }
if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
gfs2_log_flush(sdp, NULL);
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 374f50e95496..9d12b1118ba0 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -13,7 +13,6 @@
#include <linux/buffer_head.h>
#include <linux/crc32.h>
#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
#include <asm/uaccess.h>
#include "gfs2.h"
@@ -35,6 +34,8 @@ void gfs2_assert_i(struct gfs2_sbd *sdp)
int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ const struct lm_lockops *lm = ls->ls_ops;
va_list args;
if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags))
@@ -47,8 +48,12 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
fs_err(sdp, "about to withdraw this file system\n");
BUG_ON(sdp->sd_args.ar_debug);
- fs_err(sdp, "telling LM to withdraw\n");
- gfs2_withdraw_lockproto(&sdp->sd_lockstruct);
+ kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
+
+ if (lm->lm_unmount) {
+ fs_err(sdp, "telling LM to unmount\n");
+ lm->lm_unmount(sdp);
+ }
fs_err(sdp, "withdrawn\n");
dump_stack();
diff --git a/fs/inode.c b/fs/inode.c
index 913ab2d9a5d1..643ac43e5a5c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -17,6 +17,7 @@
#include <linux/hash.h>
#include <linux/swap.h>
#include <linux/security.h>
+#include <linux/ima.h>
#include <linux/pagemap.h>
#include <linux/cdev.h>
#include <linux/bootmem.h>
@@ -147,13 +148,13 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_cdev = NULL;
inode->i_rdev = 0;
inode->dirtied_when = 0;
- if (security_inode_alloc(inode)) {
- if (inode->i_sb->s_op->destroy_inode)
- inode->i_sb->s_op->destroy_inode(inode);
- else
- kmem_cache_free(inode_cachep, (inode));
- return NULL;
- }
+
+ if (security_inode_alloc(inode))
+ goto out_free_inode;
+
+ /* allocate and initialize an i_integrity */
+ if (ima_inode_alloc(inode))
+ goto out_free_security;
spin_lock_init(&inode->i_lock);
lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
@@ -189,6 +190,15 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_mapping = mapping;
return inode;
+
+out_free_security:
+ security_inode_free(inode);
+out_free_inode:
+ if (inode->i_sb->s_op->destroy_inode)
+ inode->i_sb->s_op->destroy_inode(inode);
+ else
+ kmem_cache_free(inode_cachep, (inode));
+ return NULL;
}
EXPORT_SYMBOL(inode_init_always);
@@ -359,6 +369,7 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
invalidate_inode_buffers(inode);
if (!atomic_read(&inode->i_count)) {
list_move(&inode->i_list, dispose);
+ WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
count++;
continue;
@@ -460,6 +471,7 @@ static void prune_icache(int nr_to_scan)
continue;
}
list_move(&inode->i_list, &freeable);
+ WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
nr_pruned++;
}
@@ -656,6 +668,7 @@ void unlock_new_inode(struct inode *inode)
* just created it (so there can be no old holders
* that haven't tested I_LOCK).
*/
+ WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW));
inode->i_state &= ~(I_LOCK|I_NEW);
wake_up_inode(inode);
}
@@ -1145,6 +1158,7 @@ void generic_delete_inode(struct inode *inode)
list_del_init(&inode->i_list);
list_del_init(&inode->i_sb_list);
+ WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
inodes_stat.nr_inodes--;
spin_unlock(&inode_lock);
@@ -1186,16 +1200,19 @@ static void generic_forget_inode(struct inode *inode)
spin_unlock(&inode_lock);
return;
}
+ WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_WILL_FREE;
spin_unlock(&inode_lock);
write_inode_now(inode, 1);
spin_lock(&inode_lock);
+ WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_WILL_FREE;
inodes_stat.nr_unused--;
hlist_del_init(&inode->i_hash);
}
list_del_init(&inode->i_list);
list_del_init(&inode->i_sb_list);
+ WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
inodes_stat.nr_inodes--;
spin_unlock(&inode_lock);
@@ -1283,6 +1300,40 @@ sector_t bmap(struct inode * inode, sector_t block)
}
EXPORT_SYMBOL(bmap);
+/*
+ * With relative atime, only update atime if the previous atime is
+ * earlier than either the ctime or mtime or if at least a day has
+ * passed since the last atime update.
+ */
+static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
+ struct timespec now)
+{
+
+ if (!(mnt->mnt_flags & MNT_RELATIME))
+ return 1;
+ /*
+ * Is mtime younger than atime? If yes, update atime:
+ */
+ if (timespec_compare(&inode->i_mtime, &inode->i_atime) >= 0)
+ return 1;
+ /*
+ * Is ctime younger than atime? If yes, update atime:
+ */
+ if (timespec_compare(&inode->i_ctime, &inode->i_atime) >= 0)
+ return 1;
+
+ /*
+ * Is the previous atime value older than a day? If yes,
+ * update atime:
+ */
+ if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60)
+ return 1;
+ /*
+ * Good, we can skip the atime update:
+ */
+ return 0;
+}
+
/**
* touch_atime - update the access time
* @mnt: mount the inode is accessed on
@@ -1310,17 +1361,12 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
goto out;
if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
goto out;
- if (mnt->mnt_flags & MNT_RELATIME) {
- /*
- * With relative atime, only update atime if the previous
- * atime is earlier than either the ctime or mtime.
- */
- if (timespec_compare(&inode->i_mtime, &inode->i_atime) < 0 &&
- timespec_compare(&inode->i_ctime, &inode->i_atime) < 0)
- goto out;
- }
now = current_fs_time(inode->i_sb);
+
+ if (!relatime_need_update(mnt, inode, now))
+ goto out;
+
if (timespec_equal(&inode->i_atime, &now))
goto out;
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 240ec63984cb..ac2d47e43926 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -404,10 +404,12 @@ static int ioctl_fionbio(struct file *filp, int __user *argp)
if (O_NONBLOCK != O_NDELAY)
flag |= O_NDELAY;
#endif
+ spin_lock(&filp->f_lock);
if (on)
filp->f_flags |= flag;
else
filp->f_flags &= ~flag;
+ spin_unlock(&filp->f_lock);
return error;
}
@@ -425,18 +427,12 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp,
/* Did FASYNC state change ? */
if ((flag ^ filp->f_flags) & FASYNC) {
if (filp->f_op && filp->f_op->fasync)
+ /* fasync() adjusts filp->f_flags */
error = filp->f_op->fasync(fd, filp, on);
else
error = -ENOTTY;
}
- if (error)
- return error;
-
- if (on)
- filp->f_flags |= FASYNC;
- else
- filp->f_flags &= ~FASYNC;
- return error;
+ return error < 0 ? error : 0;
}
static int ioctl_fsfreeze(struct file *filp)
@@ -499,17 +495,11 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
break;
case FIONBIO:
- /* BKL needed to avoid races tweaking f_flags */
- lock_kernel();
error = ioctl_fionbio(filp, argp);
- unlock_kernel();
break;
case FIOASYNC:
- /* BKL needed to avoid races tweaking f_flags */
- lock_kernel();
error = ioctl_fioasync(fd, filp, argp);
- unlock_kernel();
break;
case FIOQSIZE:
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 1f3b0fc0d351..aedc47a264c1 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -139,6 +139,55 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
return 0;
}
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static const struct in6_addr *nlmclnt_map_v4addr(const struct sockaddr *sap,
+ struct in6_addr *addr_mapped)
+{
+ const struct sockaddr_in *sin = (const struct sockaddr_in *)sap;
+
+ switch (sap->sa_family) {
+ case AF_INET6:
+ return &((const struct sockaddr_in6 *)sap)->sin6_addr;
+ case AF_INET:
+ ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, addr_mapped);
+ return addr_mapped;
+ }
+
+ return NULL;
+}
+
+/*
+ * If lockd is using a PF_INET6 listener, all incoming requests appear
+ * to come from AF_INET6 remotes. The address of AF_INET remotes are
+ * mapped to AF_INET6 automatically by the network layer. In case the
+ * user passed an AF_INET server address at mount time, ensure both
+ * addresses are AF_INET6 before comparing them.
+ */
+static int nlmclnt_cmp_addr(const struct nlm_host *host,
+ const struct sockaddr *sap)
+{
+ const struct in6_addr *addr1;
+ const struct in6_addr *addr2;
+ struct in6_addr addr1_mapped;
+ struct in6_addr addr2_mapped;
+
+ addr1 = nlmclnt_map_v4addr(nlm_addr(host), &addr1_mapped);
+ if (likely(addr1 != NULL)) {
+ addr2 = nlmclnt_map_v4addr(sap, &addr2_mapped);
+ if (likely(addr2 != NULL))
+ return ipv6_addr_equal(addr1, addr2);
+ }
+
+ return 0;
+}
+#else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
+static int nlmclnt_cmp_addr(const struct nlm_host *host,
+ const struct sockaddr *sap)
+{
+ return nlm_cmp_addr(nlm_addr(host), sap);
+}
+#endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
+
/*
* The server lockd has called us back to tell us the lock was granted
*/
@@ -166,7 +215,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
*/
if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid)
continue;
- if (!nlm_cmp_addr(nlm_addr(block->b_host), addr))
+ if (!nlmclnt_cmp_addr(block->b_host, addr))
continue;
if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0)
continue;
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index d1d1eb84679d..618865b3128b 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -3,7 +3,7 @@
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
- * Copyright (C) 1996 Gertjan van Wingerde (gertjan@cs.vu.nl)
+ * Copyright (C) 1996 Gertjan van Wingerde
* Minix V2 fs support.
*
* Modified for 680x0 by Andreas Schwab
diff --git a/fs/namei.c b/fs/namei.c
index bbc15c237558..199317642ad6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -24,6 +24,7 @@
#include <linux/fsnotify.h>
#include <linux/personality.h>
#include <linux/security.h>
+#include <linux/ima.h>
#include <linux/syscalls.h>
#include <linux/mount.h>
#include <linux/audit.h>
@@ -850,6 +851,8 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
if (err == -EAGAIN)
err = inode_permission(nd->path.dentry->d_inode,
MAY_EXEC);
+ if (!err)
+ err = ima_path_check(&nd->path, MAY_EXEC);
if (err)
break;
@@ -1509,6 +1512,11 @@ int may_open(struct path *path, int acc_mode, int flag)
error = inode_permission(inode, acc_mode);
if (error)
return error;
+
+ error = ima_path_check(path,
+ acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC));
+ if (error)
+ return error;
/*
* An append-only file must be opened in append mode for writing.
*/
diff --git a/fs/namespace.c b/fs/namespace.c
index 06f8e63f6cb1..f0e753097353 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -780,6 +780,7 @@ static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
{ MNT_NOATIME, ",noatime" },
{ MNT_NODIRATIME, ",nodiratime" },
{ MNT_RELATIME, ",relatime" },
+ { MNT_STRICTATIME, ",strictatime" },
{ 0, NULL }
};
const struct proc_fs_info *fs_infop;
@@ -1919,6 +1920,9 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
if (data_page)
((char *)data_page)[PAGE_SIZE - 1] = 0;
+ /* Default to relatime */
+ mnt_flags |= MNT_RELATIME;
+
/* Separate the per-mountpoint flags */
if (flags & MS_NOSUID)
mnt_flags |= MNT_NOSUID;
@@ -1930,13 +1934,14 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
mnt_flags |= MNT_NOATIME;
if (flags & MS_NODIRATIME)
mnt_flags |= MNT_NODIRATIME;
- if (flags & MS_RELATIME)
- mnt_flags |= MNT_RELATIME;
+ if (flags & MS_STRICTATIME)
+ mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
if (flags & MS_RDONLY)
mnt_flags |= MNT_READONLY;
flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
- MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT);
+ MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
+ MS_STRICTATIME);
/* ... and get the mountpoint */
retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 9b728f3565a1..574158ae2398 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -255,6 +255,32 @@ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
}
return 0;
}
+
+/*
+ * Test if two ip6 socket addresses refer to the same socket by
+ * comparing relevant fields. The padding bytes specifically, are not
+ * compared. sin6_flowinfo is not compared because it only affects QoS
+ * and sin6_scope_id is only compared if the address is "link local"
+ * because "link local" addresses need only be unique to a specific
+ * link. Conversely, ordinary unicast addresses might have different
+ * sin6_scope_id.
+ *
+ * The caller should ensure both socket addresses are AF_INET6.
+ */
+static int nfs_sockaddr_cmp_ip6(const struct sockaddr *sa1,
+ const struct sockaddr *sa2)
+{
+ const struct sockaddr_in6 *saddr1 = (const struct sockaddr_in6 *)sa1;
+ const struct sockaddr_in6 *saddr2 = (const struct sockaddr_in6 *)sa2;
+
+ if (!ipv6_addr_equal(&saddr1->sin6_addr,
+ &saddr1->sin6_addr))
+ return 0;
+ if (ipv6_addr_scope(&saddr1->sin6_addr) == IPV6_ADDR_SCOPE_LINKLOCAL &&
+ saddr1->sin6_scope_id != saddr2->sin6_scope_id)
+ return 0;
+ return saddr1->sin6_port == saddr2->sin6_port;
+}
#else
static int nfs_sockaddr_match_ipaddr4(const struct sockaddr_in *sa1,
const struct sockaddr_in *sa2)
@@ -270,9 +296,52 @@ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
return nfs_sockaddr_match_ipaddr4((const struct sockaddr_in *)sa1,
(const struct sockaddr_in *)sa2);
}
+
+static int nfs_sockaddr_cmp_ip6(const struct sockaddr * sa1,
+ const struct sockaddr * sa2)
+{
+ return 0;
+}
#endif
/*
+ * Test if two ip4 socket addresses refer to the same socket, by
+ * comparing relevant fields. The padding bytes specifically, are
+ * not compared.
+ *
+ * The caller should ensure both socket addresses are AF_INET.
+ */
+static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1,
+ const struct sockaddr *sa2)
+{
+ const struct sockaddr_in *saddr1 = (const struct sockaddr_in *)sa1;
+ const struct sockaddr_in *saddr2 = (const struct sockaddr_in *)sa2;
+
+ if (saddr1->sin_addr.s_addr != saddr2->sin_addr.s_addr)
+ return 0;
+ return saddr1->sin_port == saddr2->sin_port;
+}
+
+/*
+ * Test if two socket addresses represent the same actual socket,
+ * by comparing (only) relevant fields.
+ */
+static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
+ const struct sockaddr *sa2)
+{
+ if (sa1->sa_family != sa2->sa_family)
+ return 0;
+
+ switch (sa1->sa_family) {
+ case AF_INET:
+ return nfs_sockaddr_cmp_ip4(sa1, sa2);
+ case AF_INET6:
+ return nfs_sockaddr_cmp_ip6(sa1, sa2);
+ }
+ return 0;
+}
+
+/*
* Find a client by IP address and protocol version
* - returns NULL if no such client
*/
@@ -344,8 +413,10 @@ struct nfs_client *nfs_find_client_next(struct nfs_client *clp)
static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *data)
{
struct nfs_client *clp;
+ const struct sockaddr *sap = data->addr;
list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+ const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
/* Don't match clients that failed to initialise properly */
if (clp->cl_cons_state < 0)
continue;
@@ -358,7 +429,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
continue;
/* Match the full socket address */
- if (memcmp(&clp->cl_addr, data->addr, sizeof(clp->cl_addr)) != 0)
+ if (!nfs_sockaddr_cmp(sap, clap))
continue;
atomic_inc(&clp->cl_count);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e35c8199f82f..672368f865ca 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1892,8 +1892,14 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
cache.cred = cred;
cache.jiffies = jiffies;
status = NFS_PROTO(inode)->access(inode, &cache);
- if (status != 0)
+ if (status != 0) {
+ if (status == -ESTALE) {
+ nfs_zap_caches(inode);
+ if (!S_ISDIR(inode->i_mode))
+ set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
+ }
return status;
+ }
nfs_access_add_cache(inode, &cache);
out:
if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index cef62557c87d..6bbf0e6daad2 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -292,7 +292,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_fattr fattr;
- struct page *pages[NFSACL_MAXPAGES] = { };
+ struct page *pages[NFSACL_MAXPAGES];
struct nfs3_setaclargs args = {
.inode = inode,
.mask = NFS_ACL,
@@ -303,7 +303,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
.rpc_argp = &args,
.rpc_resp = &fattr,
};
- int status, count;
+ int status;
status = -EOPNOTSUPP;
if (!nfs_server_capable(inode, NFS_CAP_ACLS))
@@ -319,6 +319,20 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
if (S_ISDIR(inode->i_mode)) {
args.mask |= NFS_DFACL;
args.acl_default = dfacl;
+ args.len = nfsacl_size(acl, dfacl);
+ } else
+ args.len = nfsacl_size(acl, NULL);
+
+ if (args.len > NFS_ACL_INLINE_BUFSIZE) {
+ unsigned int npages = 1 + ((args.len - 1) >> PAGE_SHIFT);
+
+ status = -ENOMEM;
+ do {
+ args.pages[args.npages] = alloc_page(GFP_KERNEL);
+ if (args.pages[args.npages] == NULL)
+ goto out_freepages;
+ args.npages++;
+ } while (args.npages < npages);
}
dprintk("NFS call setacl\n");
@@ -329,10 +343,6 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
nfs_zap_acl_cache(inode);
dprintk("NFS reply setacl: %d\n", status);
- /* pages may have been allocated at the xdr layer. */
- for (count = 0; count < NFSACL_MAXPAGES && args.pages[count]; count++)
- __free_page(args.pages[count]);
-
switch (status) {
case 0:
status = nfs_refresh_inode(inode, &fattr);
@@ -346,6 +356,11 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
case -ENOTSUPP:
status = -EOPNOTSUPP;
}
+out_freepages:
+ while (args.npages != 0) {
+ args.npages--;
+ __free_page(args.pages[args.npages]);
+ }
out:
return status;
}
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 11cdddec1432..6cdeacffde46 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -82,8 +82,10 @@
#define NFS3_commitres_sz (1+NFS3_wcc_data_sz+2)
#define ACL3_getaclargs_sz (NFS3_fh_sz+1)
-#define ACL3_setaclargs_sz (NFS3_fh_sz+1+2*(2+5*3))
-#define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+2*(2+5*3))
+#define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \
+ XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
+#define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \
+ XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
/*
@@ -703,28 +705,18 @@ nfs3_xdr_setaclargs(struct rpc_rqst *req, __be32 *p,
struct nfs3_setaclargs *args)
{
struct xdr_buf *buf = &req->rq_snd_buf;
- unsigned int base, len_in_head, len = nfsacl_size(
- (args->mask & NFS_ACL) ? args->acl_access : NULL,
- (args->mask & NFS_DFACL) ? args->acl_default : NULL);
- int count, err;
+ unsigned int base;
+ int err;
p = xdr_encode_fhandle(p, NFS_FH(args->inode));
*p++ = htonl(args->mask);
- base = (char *)p - (char *)buf->head->iov_base;
- /* put as much of the acls into head as possible. */
- len_in_head = min_t(unsigned int, buf->head->iov_len - base, len);
- len -= len_in_head;
- req->rq_slen = xdr_adjust_iovec(req->rq_svec, p + (len_in_head >> 2));
-
- for (count = 0; (count << PAGE_SHIFT) < len; count++) {
- args->pages[count] = alloc_page(GFP_KERNEL);
- if (!args->pages[count]) {
- while (count)
- __free_page(args->pages[--count]);
- return -ENOMEM;
- }
- }
- xdr_encode_pages(buf, args->pages, 0, len);
+ req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+ base = req->rq_slen;
+
+ if (args->npages != 0)
+ xdr_encode_pages(buf, args->pages, 0, args->len);
+ else
+ req->rq_slen += args->len;
err = nfsacl_encode(buf, base, args->inode,
(args->mask & NFS_ACL) ?
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 30befc39b3c6..2a2a0a7143ad 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -21,7 +21,9 @@
#define NFSDBG_FACILITY NFSDBG_VFS
/*
- * Check if fs_root is valid
+ * Convert the NFSv4 pathname components into a standard posix path.
+ *
+ * Note that the resulting string will be placed at the end of the buffer
*/
static inline char *nfs4_pathname_string(const struct nfs4_pathname *pathname,
char *buffer, ssize_t buflen)
@@ -99,21 +101,20 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
{
struct vfsmount *mnt = ERR_PTR(-ENOENT);
char *mnt_path;
- int page2len;
+ unsigned int maxbuflen;
unsigned int s;
mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE);
if (IS_ERR(mnt_path))
return mnt;
mountdata->mnt_path = mnt_path;
- page2 += strlen(mnt_path) + 1;
- page2len = PAGE_SIZE - strlen(mnt_path) - 1;
+ maxbuflen = mnt_path - 1 - page2;
for (s = 0; s < location->nservers; s++) {
const struct nfs4_string *buf = &location->servers[s];
struct sockaddr_storage addr;
- if (buf->len <= 0 || buf->len >= PAGE_SIZE)
+ if (buf->len <= 0 || buf->len >= maxbuflen)
continue;
mountdata->addr = (struct sockaddr *)&addr;
@@ -126,8 +127,8 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
continue;
nfs_set_port(mountdata->addr, NFS_PORT);
- strncpy(page2, buf->data, page2len);
- page2[page2len] = '\0';
+ memcpy(page2, buf->data, buf->len);
+ page2[buf->len] = '\0';
mountdata->hostname = page2;
snprintf(page, PAGE_SIZE, "%s:%s",
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index f65953be39c0..9250067943d8 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2596,6 +2596,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {
[OP_LOOKUPP] = (nfsd4_enc)nfsd4_encode_noop,
[OP_NVERIFY] = (nfsd4_enc)nfsd4_encode_noop,
[OP_OPEN] = (nfsd4_enc)nfsd4_encode_open,
+ [OP_OPENATTR] = (nfsd4_enc)nfsd4_encode_noop,
[OP_OPEN_CONFIRM] = (nfsd4_enc)nfsd4_encode_open_confirm,
[OP_OPEN_DOWNGRADE] = (nfsd4_enc)nfsd4_encode_open_downgrade,
[OP_PUTFH] = (nfsd4_enc)nfsd4_encode_noop,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 6e50aaa56ca2..c165a6403df0 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -998,8 +998,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
if (!EX_ISSYNC(exp))
stable = 0;
- if (stable && !EX_WGATHER(exp))
+ if (stable && !EX_WGATHER(exp)) {
+ spin_lock(&file->f_lock);
file->f_flags |= O_SYNC;
+ spin_unlock(&file->f_lock);
+ }
/* Write the data. */
oldfs = get_fs(); set_fs(KERNEL_DS);
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 3a9e5deed74d..19e3a96aa02c 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -176,7 +176,8 @@ static int ocfs2_dinode_insert_check(struct inode *inode,
BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
- (OCFS2_I(inode)->ip_clusters != rec->e_cpos),
+ (OCFS2_I(inode)->ip_clusters !=
+ le32_to_cpu(rec->e_cpos)),
"Device %s, asking for sparse allocation: inode %llu, "
"cpos %u, clusters %u\n",
osb->dev_str,
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index a067a6cffb01..8e1709a679b7 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -227,7 +227,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
size = i_size_read(inode);
if (size > PAGE_CACHE_SIZE ||
- size > ocfs2_max_inline_data(inode->i_sb)) {
+ size > ocfs2_max_inline_data_with_xattr(inode->i_sb, di)) {
ocfs2_error(inode->i_sb,
"Inode %llu has with inline data has bad size: %Lu",
(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -1555,6 +1555,7 @@ static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
int ret, written = 0;
loff_t end = pos + len;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ struct ocfs2_dinode *di = NULL;
mlog(0, "Inode %llu, write of %u bytes at off %llu. features: 0x%x\n",
(unsigned long long)oi->ip_blkno, len, (unsigned long long)pos,
@@ -1587,7 +1588,9 @@ static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
/*
* Check whether the write can fit.
*/
- if (mmap_page || end > ocfs2_max_inline_data(inode->i_sb))
+ di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
+ if (mmap_page ||
+ end > ocfs2_max_inline_data_with_xattr(inode->i_sb, di))
return 0;
do_inline_write:
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 084aba86c3b2..4b11762f249e 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -532,7 +532,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL);
- fe->id2.i_data.id_count = cpu_to_le16(ocfs2_max_inline_data(osb->sb));
+ fe->id2.i_data.id_count = cpu_to_le16(
+ ocfs2_max_inline_data_with_xattr(osb->sb, fe));
} else {
fel = &fe->id2.i_list;
fel->l_tree_depth = 0;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index c7ae45aaa36c..2332ef740f4f 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -1070,12 +1070,6 @@ static inline int ocfs2_fast_symlink_chars(struct super_block *sb)
offsetof(struct ocfs2_dinode, id2.i_symlink);
}
-static inline int ocfs2_max_inline_data(struct super_block *sb)
-{
- return sb->s_blocksize -
- offsetof(struct ocfs2_dinode, id2.i_data.id_data);
-}
-
static inline int ocfs2_max_inline_data_with_xattr(struct super_block *sb,
struct ocfs2_dinode *di)
{
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 4ddd788add67..2563df89fc2a 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -547,8 +547,12 @@ int ocfs2_calc_xattr_init(struct inode *dir,
* when blocksize = 512, may reserve one more cluser for
* xattr bucket, otherwise reserve one metadata block
* for them is ok.
+ * If this is a new directory with inline data,
+ * we choose to reserve the entire inline area for
+ * directory contents and force an external xattr block.
*/
if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
+ (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
(s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
if (ret) {
@@ -4791,19 +4795,33 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
char *val,
int value_len)
{
- int offset;
+ int ret, offset, block_off;
struct ocfs2_xattr_value_root *xv;
struct ocfs2_xattr_entry *xe = xs->here;
+ struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
+ void *base;
BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
- offset = le16_to_cpu(xe->xe_name_offset) +
- OCFS2_XATTR_SIZE(xe->xe_name_len);
+ ret = ocfs2_xattr_bucket_get_name_value(inode, xh,
+ xe - xh->xh_entries,
+ &block_off,
+ &offset);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
- xv = (struct ocfs2_xattr_value_root *)(xs->base + offset);
+ base = bucket_block(xs->bucket, block_off);
+ xv = (struct ocfs2_xattr_value_root *)(base + offset +
+ OCFS2_XATTR_SIZE(xe->xe_name_len));
- return __ocfs2_xattr_set_value_outside(inode, handle,
- xv, val, value_len);
+ ret = __ocfs2_xattr_set_value_outside(inode, handle,
+ xv, val, value_len);
+ if (ret)
+ mlog_errno(ret);
+out:
+ return ret;
}
static int ocfs2_rm_xattr_cluster(struct inode *inode,
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 6d720243f5f4..38e337d51ced 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -400,7 +400,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
pdev->devt = devt;
/* delay uevent until 'holders' subdir is created */
- pdev->uevent_suppress = 1;
+ dev_set_uevent_suppress(pdev, 1);
err = device_add(pdev);
if (err)
goto out_put;
@@ -410,7 +410,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
if (!p->holder_dir)
goto out_del;
- pdev->uevent_suppress = 0;
+ dev_set_uevent_suppress(pdev, 0);
if (flags & ADDPART_FLAG_WHOLEDISK) {
err = device_create_file(pdev, &dev_attr_whole_disk);
if (err)
@@ -422,7 +422,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
rcu_assign_pointer(ptbl->part[partno], p);
/* suppress uevent if the disk supresses it */
- if (!ddev->uevent_suppress)
+ if (!dev_get_uevent_suppress(pdev))
kobject_uevent(&pdev->kobj, KOBJ_ADD);
return p;
@@ -455,7 +455,7 @@ void register_disk(struct gendisk *disk)
dev_set_name(ddev, disk->disk_name);
/* delay uevents, until we scanned partition table */
- ddev->uevent_suppress = 1;
+ dev_set_uevent_suppress(ddev, 1);
if (device_add(ddev))
return;
@@ -490,7 +490,7 @@ void register_disk(struct gendisk *disk)
exit:
/* announce disk after possible partitions are created */
- ddev->uevent_suppress = 0;
+ dev_set_uevent_suppress(ddev, 0);
kobject_uevent(&ddev->kobj, KOBJ_ADD);
/* announce possible partitions */
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index 1e064c4a4f86..46297683cd34 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -21,20 +21,38 @@
* compute the block number from a
* cyl-cyl-head-head structure
*/
-static inline int
+static sector_t
cchh2blk (struct vtoc_cchh *ptr, struct hd_geometry *geo) {
- return ptr->cc * geo->heads * geo->sectors +
- ptr->hh * geo->sectors;
+
+ sector_t cyl;
+ __u16 head;
+
+ /*decode cylinder and heads for large volumes */
+ cyl = ptr->hh & 0xFFF0;
+ cyl <<= 12;
+ cyl |= ptr->cc;
+ head = ptr->hh & 0x000F;
+ return cyl * geo->heads * geo->sectors +
+ head * geo->sectors;
}
/*
* compute the block number from a
* cyl-cyl-head-head-block structure
*/
-static inline int
+static sector_t
cchhb2blk (struct vtoc_cchhb *ptr, struct hd_geometry *geo) {
- return ptr->cc * geo->heads * geo->sectors +
- ptr->hh * geo->sectors +
+
+ sector_t cyl;
+ __u16 head;
+
+ /*decode cylinder and heads for large volumes */
+ cyl = ptr->hh & 0xFFF0;
+ cyl <<= 12;
+ cyl |= ptr->cc;
+ head = ptr->hh & 0x000F;
+ return cyl * geo->heads * geo->sectors +
+ head * geo->sectors +
ptr->b;
}
@@ -43,14 +61,15 @@ cchhb2blk (struct vtoc_cchhb *ptr, struct hd_geometry *geo) {
int
ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
{
- int blocksize, offset, size,res;
- loff_t i_size;
+ int blocksize, res;
+ loff_t i_size, offset, size, fmt_size;
dasd_information2_t *info;
struct hd_geometry *geo;
char type[5] = {0,};
char name[7] = {0,};
union label_t {
- struct vtoc_volume_label vol;
+ struct vtoc_volume_label_cdl vol;
+ struct vtoc_volume_label_ldl lnx;
struct vtoc_cms_label cms;
} *label;
unsigned char *data;
@@ -85,14 +104,16 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
if (data == NULL)
goto out_readerr;
- strncpy (type, data, 4);
- if ((!info->FBA_layout) && (!strcmp(info->type, "ECKD")))
- strncpy(name, data + 8, 6);
- else
- strncpy(name, data + 4, 6);
memcpy(label, data, sizeof(union label_t));
put_dev_sector(sect);
+ if ((!info->FBA_layout) && (!strcmp(info->type, "ECKD"))) {
+ strncpy(type, label->vol.vollbl, 4);
+ strncpy(name, label->vol.volid, 6);
+ } else {
+ strncpy(type, label->lnx.vollbl, 4);
+ strncpy(name, label->lnx.volid, 6);
+ }
EBCASC(type, 4);
EBCASC(name, 6);
@@ -110,36 +131,54 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
/*
* VM style CMS1 labeled disk
*/
+ blocksize = label->cms.block_size;
if (label->cms.disk_offset != 0) {
printk("CMS1/%8s(MDSK):", name);
/* disk is reserved minidisk */
- blocksize = label->cms.block_size;
offset = label->cms.disk_offset;
size = (label->cms.block_count - 1)
* (blocksize >> 9);
} else {
printk("CMS1/%8s:", name);
offset = (info->label_block + 1);
- size = i_size >> 9;
+ size = label->cms.block_count
+ * (blocksize >> 9);
}
+ put_partition(state, 1, offset*(blocksize >> 9),
+ size-offset*(blocksize >> 9));
} else {
- /*
- * Old style LNX1 or unlabeled disk
- */
- if (strncmp(type, "LNX1", 4) == 0)
- printk ("LNX1/%8s:", name);
- else
+ if (strncmp(type, "LNX1", 4) == 0) {
+ printk("LNX1/%8s:", name);
+ if (label->lnx.ldl_version == 0xf2) {
+ fmt_size = label->lnx.formatted_blocks
+ * (blocksize >> 9);
+ } else if (!strcmp(info->type, "ECKD")) {
+ /* formated w/o large volume support */
+ fmt_size = geo->cylinders * geo->heads
+ * geo->sectors * (blocksize >> 9);
+ } else {
+ /* old label and no usable disk geometry
+ * (e.g. DIAG) */
+ fmt_size = i_size >> 9;
+ }
+ size = i_size >> 9;
+ if (fmt_size < size)
+ size = fmt_size;
+ offset = (info->label_block + 1);
+ } else {
+ /* unlabeled disk */
printk("(nonl)");
- offset = (info->label_block + 1);
- size = i_size >> 9;
- }
- put_partition(state, 1, offset*(blocksize >> 9),
+ size = i_size >> 9;
+ offset = (info->label_block + 1);
+ }
+ put_partition(state, 1, offset*(blocksize >> 9),
size-offset*(blocksize >> 9));
+ }
} else if (info->format == DASD_FORMAT_CDL) {
/*
* New style CDL formatted disk
*/
- unsigned int blk;
+ sector_t blk;
int counter;
/*
@@ -166,7 +205,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
/* skip FMT4 / FMT5 / FMT7 labels */
if (f1.DS1FMTID == _ascebc['4']
|| f1.DS1FMTID == _ascebc['5']
- || f1.DS1FMTID == _ascebc['7']) {
+ || f1.DS1FMTID == _ascebc['7']
+ || f1.DS1FMTID == _ascebc['9']) {
blk++;
data = read_dev_sector(bdev, blk *
(blocksize/512),
@@ -174,8 +214,9 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
continue;
}
- /* only FMT1 valid at this point */
- if (f1.DS1FMTID != _ascebc['1'])
+ /* only FMT1 and 8 labels valid at this point */
+ if (f1.DS1FMTID != _ascebc['1'] &&
+ f1.DS1FMTID != _ascebc['8'])
break;
/* OK, we got valid partition data */
diff --git a/fs/pipe.c b/fs/pipe.c
index 3a48ba5179d5..94ad15967cf9 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -667,10 +667,7 @@ pipe_read_fasync(int fd, struct file *filp, int on)
retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers);
mutex_unlock(&inode->i_mutex);
- if (retval < 0)
- return retval;
-
- return 0;
+ return retval;
}
@@ -684,10 +681,7 @@ pipe_write_fasync(int fd, struct file *filp, int on)
retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers);
mutex_unlock(&inode->i_mutex);
- if (retval < 0)
- return retval;
-
- return 0;
+ return retval;
}
@@ -699,18 +693,14 @@ pipe_rdwr_fasync(int fd, struct file *filp, int on)
int retval;
mutex_lock(&inode->i_mutex);
-
retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
-
- if (retval >= 0)
+ if (retval >= 0) {
retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
-
+ if (retval < 0) /* this can happen only if on == T */
+ fasync_helper(-1, filp, 0, &pipe->fasync_readers);
+ }
mutex_unlock(&inode->i_mutex);
-
- if (retval < 0)
- return retval;
-
- return 0;
+ return retval;
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 0c9de19a1633..beaa0ce3b82e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3066,7 +3066,6 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
int retval = -ENOENT;
ino_t ino;
int tid;
- unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */
struct pid_namespace *ns;
task = get_proc_task(inode);
@@ -3083,18 +3082,18 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
goto out_no_task;
retval = 0;
- switch (pos) {
+ switch ((unsigned long)filp->f_pos) {
case 0:
ino = inode->i_ino;
- if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
+ if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) < 0)
goto out;
- pos++;
+ filp->f_pos++;
/* fall through */
case 1:
ino = parent_ino(dentry);
- if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
+ if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) < 0)
goto out;
- pos++;
+ filp->f_pos++;
/* fall through */
}
@@ -3104,9 +3103,9 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
ns = filp->f_dentry->d_sb->s_fs_info;
tid = (int)filp->f_version;
filp->f_version = 0;
- for (task = first_tid(leader, tid, pos - 2, ns);
+ for (task = first_tid(leader, tid, filp->f_pos - 2, ns);
task;
- task = next_tid(task), pos++) {
+ task = next_tid(task), filp->f_pos++) {
tid = task_pid_nr_ns(task, ns);
if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) {
/* returning this tgid failed, save it as the first
@@ -3117,7 +3116,6 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
}
}
out:
- filp->f_pos = pos;
put_task_struct(leader);
out_no_task:
return retval;
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 2d1345112a42..e9983837d08d 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -80,7 +80,7 @@ static const struct file_operations proc_kpagecount_operations = {
#define KPF_RECLAIM 9
#define KPF_BUDDY 10
-#define kpf_copy_bit(flags, srcpos, dstpos) (((flags >> srcpos) & 1) << dstpos)
+#define kpf_copy_bit(flags, dstpos, srcpos) (((flags >> srcpos) & 1) << dstpos)
static ssize_t kpageflags_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index b9b567a28376..5d7c7ececa64 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -114,6 +114,9 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
if (!pagevec_add(&lru_pvec, page))
__pagevec_lru_add_file(&lru_pvec);
+ /* prevent the page from being discarded on memory pressure */
+ SetPageDirty(page);
+
unlock_page(page);
}
@@ -126,6 +129,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
return -EFBIG;
add_error:
+ pagevec_lru_add_file(&lru_pvec);
page_cache_release(pages + loop);
for (loop++; loop < npages; loop++)
__free_page(pages + loop);
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index c837dfc2b3c6..2a7960310349 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -80,7 +80,7 @@ static struct buffer_head *get_block_length(struct super_block *sb,
* generated a larger block - this does occasionally happen with zlib).
*/
int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
- int length, u64 *next_index, int srclength)
+ int length, u64 *next_index, int srclength, int pages)
{
struct squashfs_sb_info *msblk = sb->s_fs_info;
struct buffer_head **bh;
@@ -184,7 +184,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
offset = 0;
}
- if (msblk->stream.avail_out == 0) {
+ if (msblk->stream.avail_out == 0 && page < pages) {
msblk->stream.next_out = buffer[page++];
msblk->stream.avail_out = PAGE_CACHE_SIZE;
}
@@ -201,25 +201,20 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
zlib_init = 1;
}
- zlib_err = zlib_inflate(&msblk->stream, Z_NO_FLUSH);
+ zlib_err = zlib_inflate(&msblk->stream, Z_SYNC_FLUSH);
if (msblk->stream.avail_in == 0 && k < b)
put_bh(bh[k++]);
} while (zlib_err == Z_OK);
if (zlib_err != Z_STREAM_END) {
- ERROR("zlib_inflate returned unexpected result"
- " 0x%x, srclength %d, avail_in %d,"
- " avail_out %d\n", zlib_err, srclength,
- msblk->stream.avail_in,
- msblk->stream.avail_out);
+ ERROR("zlib_inflate error, data probably corrupt\n");
goto release_mutex;
}
zlib_err = zlib_inflateEnd(&msblk->stream);
if (zlib_err != Z_OK) {
- ERROR("zlib_inflateEnd returned unexpected result 0x%x,"
- " srclength %d\n", zlib_err, srclength);
+ ERROR("zlib_inflate error, data probably corrupt\n");
goto release_mutex;
}
length = msblk->stream.total_out;
@@ -268,7 +263,8 @@ block_release:
put_bh(bh[k]);
read_failure:
- ERROR("sb_bread failed reading block 0x%llx\n", cur_index);
+ ERROR("squashfs_read_data failed to read block 0x%llx\n",
+ (unsigned long long) index);
kfree(bh);
return -EIO;
}
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index f29eda16d25e..1c4739e33af6 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -119,7 +119,7 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb,
entry->length = squashfs_read_data(sb, entry->data,
block, length, &entry->next_index,
- cache->block_size);
+ cache->block_size, cache->pages);
spin_lock(&cache->lock);
@@ -406,7 +406,7 @@ int squashfs_read_table(struct super_block *sb, void *buffer, u64 block,
for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE)
data[i] = buffer;
res = squashfs_read_data(sb, data, block, length |
- SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length);
+ SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length, pages);
kfree(data);
return res;
}
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index 7a63398bb855..9101dbde39ec 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -133,7 +133,8 @@ int squashfs_read_inode(struct inode *inode, long long ino)
type = le16_to_cpu(sqshb_ino->inode_type);
switch (type) {
case SQUASHFS_REG_TYPE: {
- unsigned int frag_offset, frag_size, frag;
+ unsigned int frag_offset, frag;
+ int frag_size;
u64 frag_blk;
struct squashfs_reg_inode *sqsh_ino = &squashfs_ino.reg;
@@ -175,7 +176,8 @@ int squashfs_read_inode(struct inode *inode, long long ino)
break;
}
case SQUASHFS_LREG_TYPE: {
- unsigned int frag_offset, frag_size, frag;
+ unsigned int frag_offset, frag;
+ int frag_size;
u64 frag_blk;
struct squashfs_lreg_inode *sqsh_ino = &squashfs_ino.lreg;
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 6b2515d027d5..0e9feb6adf7e 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -34,7 +34,7 @@ static inline struct squashfs_inode_info *squashfs_i(struct inode *inode)
/* block.c */
extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *,
- int);
+ int, int);
/* cache.c */
extern struct squashfs_cache *squashfs_cache_init(char *, int, int);
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 071df5b5b491..681ec0d83799 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -389,7 +389,7 @@ static int __init init_squashfs_fs(void)
return err;
}
- printk(KERN_INFO "squashfs: version 4.0 (2009/01/03) "
+ printk(KERN_INFO "squashfs: version 4.0 (2009/01/31) "
"Phillip Lougher\n");
return 0;
diff --git a/fs/super.c b/fs/super.c
index 8349ed6b1412..dd4acb158b5e 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -371,8 +371,10 @@ retry:
continue;
if (!grab_super(old))
goto retry;
- if (s)
+ if (s) {
+ up_write(&s->s_umount);
destroy_super(s);
+ }
return old;
}
}
@@ -387,6 +389,7 @@ retry:
err = set(s, data);
if (err) {
spin_unlock(&sb_lock);
+ up_write(&s->s_umount);
destroy_super(s);
return ERR_PTR(err);
}
@@ -671,7 +674,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
return 0;
}
-static void do_emergency_remount(unsigned long foo)
+static void do_emergency_remount(struct work_struct *work)
{
struct super_block *sb;
@@ -694,12 +697,19 @@ static void do_emergency_remount(unsigned long foo)
spin_lock(&sb_lock);
}
spin_unlock(&sb_lock);
+ kfree(work);
printk("Emergency Remount complete\n");
}
void emergency_remount(void)
{
- pdflush_operation(do_emergency_remount, 0);
+ struct work_struct *work;
+
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
+ if (work) {
+ INIT_WORK(work, do_emergency_remount);
+ schedule_work(work);
+ }
}
/*
diff --git a/fs/sync.c b/fs/sync.c
index a16d53e5fe9d..ec95a69d17aa 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -42,9 +42,21 @@ SYSCALL_DEFINE0(sync)
return 0;
}
+static void do_sync_work(struct work_struct *work)
+{
+ do_sync(0);
+ kfree(work);
+}
+
void emergency_sync(void)
{
- pdflush_operation(do_sync, 0);
+ struct work_struct *work;
+
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
+ if (work) {
+ INIT_WORK(work, do_sync_work);
+ schedule_work(work);
+ }
}
/*
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index f2c478c3424e..07703d3ff4a1 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -21,15 +21,28 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/mutex.h>
+#include <linux/mm.h>
#include <asm/uaccess.h>
#include "sysfs.h"
+/*
+ * There's one bin_buffer for each open file.
+ *
+ * filp->private_data points to bin_buffer and
+ * sysfs_dirent->s_bin_attr.buffers points to a the bin_buffer s
+ * sysfs_dirent->s_bin_attr.buffers is protected by sysfs_bin_lock
+ */
+static DEFINE_MUTEX(sysfs_bin_lock);
+
struct bin_buffer {
- struct mutex mutex;
- void *buffer;
- int mmapped;
+ struct mutex mutex;
+ void *buffer;
+ int mmapped;
+ struct vm_operations_struct *vm_ops;
+ struct file *file;
+ struct hlist_node list;
};
static int
@@ -168,6 +181,175 @@ out_free:
return count;
}
+static void bin_vma_open(struct vm_area_struct *vma)
+{
+ struct file *file = vma->vm_file;
+ struct bin_buffer *bb = file->private_data;
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+
+ if (!bb->vm_ops || !bb->vm_ops->open)
+ return;
+
+ if (!sysfs_get_active_two(attr_sd))
+ return;
+
+ bb->vm_ops->open(vma);
+
+ sysfs_put_active_two(attr_sd);
+}
+
+static void bin_vma_close(struct vm_area_struct *vma)
+{
+ struct file *file = vma->vm_file;
+ struct bin_buffer *bb = file->private_data;
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+
+ if (!bb->vm_ops || !bb->vm_ops->close)
+ return;
+
+ if (!sysfs_get_active_two(attr_sd))
+ return;
+
+ bb->vm_ops->close(vma);
+
+ sysfs_put_active_two(attr_sd);
+}
+
+static int bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct file *file = vma->vm_file;
+ struct bin_buffer *bb = file->private_data;
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+ int ret;
+
+ if (!bb->vm_ops || !bb->vm_ops->fault)
+ return VM_FAULT_SIGBUS;
+
+ if (!sysfs_get_active_two(attr_sd))
+ return VM_FAULT_SIGBUS;
+
+ ret = bb->vm_ops->fault(vma, vmf);
+
+ sysfs_put_active_two(attr_sd);
+ return ret;
+}
+
+static int bin_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+ struct file *file = vma->vm_file;
+ struct bin_buffer *bb = file->private_data;
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+ int ret;
+
+ if (!bb->vm_ops)
+ return -EINVAL;
+
+ if (!bb->vm_ops->page_mkwrite)
+ return 0;
+
+ if (!sysfs_get_active_two(attr_sd))
+ return -EINVAL;
+
+ ret = bb->vm_ops->page_mkwrite(vma, page);
+
+ sysfs_put_active_two(attr_sd);
+ return ret;
+}
+
+static int bin_access(struct vm_area_struct *vma, unsigned long addr,
+ void *buf, int len, int write)
+{
+ struct file *file = vma->vm_file;
+ struct bin_buffer *bb = file->private_data;
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+ int ret;
+
+ if (!bb->vm_ops || !bb->vm_ops->access)
+ return -EINVAL;
+
+ if (!sysfs_get_active_two(attr_sd))
+ return -EINVAL;
+
+ ret = bb->vm_ops->access(vma, addr, buf, len, write);
+
+ sysfs_put_active_two(attr_sd);
+ return ret;
+}
+
+#ifdef CONFIG_NUMA
+static int bin_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
+{
+ struct file *file = vma->vm_file;
+ struct bin_buffer *bb = file->private_data;
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+ int ret;
+
+ if (!bb->vm_ops || !bb->vm_ops->set_policy)
+ return 0;
+
+ if (!sysfs_get_active_two(attr_sd))
+ return -EINVAL;
+
+ ret = bb->vm_ops->set_policy(vma, new);
+
+ sysfs_put_active_two(attr_sd);
+ return ret;
+}
+
+static struct mempolicy *bin_get_policy(struct vm_area_struct *vma,
+ unsigned long addr)
+{
+ struct file *file = vma->vm_file;
+ struct bin_buffer *bb = file->private_data;
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+ struct mempolicy *pol;
+
+ if (!bb->vm_ops || !bb->vm_ops->get_policy)
+ return vma->vm_policy;
+
+ if (!sysfs_get_active_two(attr_sd))
+ return vma->vm_policy;
+
+ pol = bb->vm_ops->get_policy(vma, addr);
+
+ sysfs_put_active_two(attr_sd);
+ return pol;
+}
+
+static int bin_migrate(struct vm_area_struct *vma, const nodemask_t *from,
+ const nodemask_t *to, unsigned long flags)
+{
+ struct file *file = vma->vm_file;
+ struct bin_buffer *bb = file->private_data;
+ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+ int ret;
+
+ if (!bb->vm_ops || !bb->vm_ops->migrate)
+ return 0;
+
+ if (!sysfs_get_active_two(attr_sd))
+ return 0;
+
+ ret = bb->vm_ops->migrate(vma, from, to, flags);
+
+ sysfs_put_active_two(attr_sd);
+ return ret;
+}
+#endif
+
+static struct vm_operations_struct bin_vm_ops = {
+ .open = bin_vma_open,
+ .close = bin_vma_close,
+ .fault = bin_fault,
+ .page_mkwrite = bin_page_mkwrite,
+ .access = bin_access,
+#ifdef CONFIG_NUMA
+ .set_policy = bin_set_policy,
+ .get_policy = bin_get_policy,
+ .migrate = bin_migrate,
+#endif
+};
+
static int mmap(struct file *file, struct vm_area_struct *vma)
{
struct bin_buffer *bb = file->private_data;
@@ -179,18 +361,37 @@ static int mmap(struct file *file, struct vm_area_struct *vma)
mutex_lock(&bb->mutex);
/* need attr_sd for attr, its parent for kobj */
+ rc = -ENODEV;
if (!sysfs_get_active_two(attr_sd))
- return -ENODEV;
+ goto out_unlock;
rc = -EINVAL;
- if (attr->mmap)
- rc = attr->mmap(kobj, attr, vma);
+ if (!attr->mmap)
+ goto out_put;
+
+ rc = attr->mmap(kobj, attr, vma);
+ if (rc)
+ goto out_put;
- if (rc == 0 && !bb->mmapped)
- bb->mmapped = 1;
- else
- sysfs_put_active_two(attr_sd);
+ /*
+ * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
+ * to satisfy versions of X which crash if the mmap fails: that
+ * substitutes a new vm_file, and we don't then want bin_vm_ops.
+ */
+ if (vma->vm_file != file)
+ goto out_put;
+ rc = -EINVAL;
+ if (bb->mmapped && bb->vm_ops != vma->vm_ops)
+ goto out_put;
+
+ rc = 0;
+ bb->mmapped = 1;
+ bb->vm_ops = vma->vm_ops;
+ vma->vm_ops = &bin_vm_ops;
+out_put:
+ sysfs_put_active_two(attr_sd);
+out_unlock:
mutex_unlock(&bb->mutex);
return rc;
@@ -223,8 +424,13 @@ static int open(struct inode * inode, struct file * file)
goto err_out;
mutex_init(&bb->mutex);
+ bb->file = file;
file->private_data = bb;
+ mutex_lock(&sysfs_bin_lock);
+ hlist_add_head(&bb->list, &attr_sd->s_bin_attr.buffers);
+ mutex_unlock(&sysfs_bin_lock);
+
/* open succeeded, put active references */
sysfs_put_active_two(attr_sd);
return 0;
@@ -237,11 +443,12 @@ static int open(struct inode * inode, struct file * file)
static int release(struct inode * inode, struct file * file)
{
- struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
struct bin_buffer *bb = file->private_data;
- if (bb->mmapped)
- sysfs_put_active_two(attr_sd);
+ mutex_lock(&sysfs_bin_lock);
+ hlist_del(&bb->list);
+ mutex_unlock(&sysfs_bin_lock);
+
kfree(bb->buffer);
kfree(bb);
return 0;
@@ -256,6 +463,26 @@ const struct file_operations bin_fops = {
.release = release,
};
+
+void unmap_bin_file(struct sysfs_dirent *attr_sd)
+{
+ struct bin_buffer *bb;
+ struct hlist_node *tmp;
+
+ if (sysfs_type(attr_sd) != SYSFS_KOBJ_BIN_ATTR)
+ return;
+
+ mutex_lock(&sysfs_bin_lock);
+
+ hlist_for_each_entry(bb, tmp, &attr_sd->s_bin_attr.buffers, list) {
+ struct inode *inode = bb->file->f_path.dentry->d_inode;
+
+ unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+ }
+
+ mutex_unlock(&sysfs_bin_lock);
+}
+
/**
* sysfs_create_bin_file - create binary file for object.
* @kobj: object.
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 82d3b79d0e08..66aeb4fff0c3 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -434,6 +434,26 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
}
/**
+ * sysfs_pathname - return full path to sysfs dirent
+ * @sd: sysfs_dirent whose path we want
+ * @path: caller allocated buffer
+ *
+ * Gives the name "/" to the sysfs_root entry; any path returned
+ * is relative to wherever sysfs is mounted.
+ *
+ * XXX: does no error checking on @path size
+ */
+static char *sysfs_pathname(struct sysfs_dirent *sd, char *path)
+{
+ if (sd->s_parent) {
+ sysfs_pathname(sd->s_parent, path);
+ strcat(path, "/");
+ }
+ strcat(path, sd->s_name);
+ return path;
+}
+
+/**
* sysfs_add_one - add sysfs_dirent to parent
* @acxt: addrm context to use
* @sd: sysfs_dirent to be added
@@ -458,8 +478,16 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
int ret;
ret = __sysfs_add_one(acxt, sd);
- WARN(ret == -EEXIST, KERN_WARNING "sysfs: duplicate filename '%s' "
- "can not be created\n", sd->s_name);
+ if (ret == -EEXIST) {
+ char *path = kzalloc(PATH_MAX, GFP_KERNEL);
+ WARN(1, KERN_WARNING
+ "sysfs: cannot create duplicate filename '%s'\n",
+ (path == NULL) ? sd->s_name :
+ strcat(strcat(sysfs_pathname(acxt->parent_sd, path), "/"),
+ sd->s_name));
+ kfree(path);
+ }
+
return ret;
}
@@ -581,6 +609,7 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
sysfs_drop_dentry(sd);
sysfs_deactivate(sd);
+ unmap_bin_file(sd);
sysfs_put(sd);
}
}
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 1f4a3f877262..289c43a47263 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -659,13 +659,16 @@ void sysfs_remove_file_from_group(struct kobject *kobj,
EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
struct sysfs_schedule_callback_struct {
- struct kobject *kobj;
+ struct list_head workq_list;
+ struct kobject *kobj;
void (*func)(void *);
void *data;
struct module *owner;
struct work_struct work;
};
+static DEFINE_MUTEX(sysfs_workq_mutex);
+static LIST_HEAD(sysfs_workq);
static void sysfs_schedule_callback_work(struct work_struct *work)
{
struct sysfs_schedule_callback_struct *ss = container_of(work,
@@ -674,6 +677,9 @@ static void sysfs_schedule_callback_work(struct work_struct *work)
(ss->func)(ss->data);
kobject_put(ss->kobj);
module_put(ss->owner);
+ mutex_lock(&sysfs_workq_mutex);
+ list_del(&ss->workq_list);
+ mutex_unlock(&sysfs_workq_mutex);
kfree(ss);
}
@@ -695,15 +701,25 @@ static void sysfs_schedule_callback_work(struct work_struct *work)
* until @func returns.
*
* Returns 0 if the request was submitted, -ENOMEM if storage could not
- * be allocated, -ENODEV if a reference to @owner isn't available.
+ * be allocated, -ENODEV if a reference to @owner isn't available,
+ * -EAGAIN if a callback has already been scheduled for @kobj.
*/
int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
void *data, struct module *owner)
{
- struct sysfs_schedule_callback_struct *ss;
+ struct sysfs_schedule_callback_struct *ss, *tmp;
if (!try_module_get(owner))
return -ENODEV;
+
+ mutex_lock(&sysfs_workq_mutex);
+ list_for_each_entry_safe(ss, tmp, &sysfs_workq, workq_list)
+ if (ss->kobj == kobj) {
+ mutex_unlock(&sysfs_workq_mutex);
+ return -EAGAIN;
+ }
+ mutex_unlock(&sysfs_workq_mutex);
+
ss = kmalloc(sizeof(*ss), GFP_KERNEL);
if (!ss) {
module_put(owner);
@@ -715,6 +731,10 @@ int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
ss->data = data;
ss->owner = owner;
INIT_WORK(&ss->work, sysfs_schedule_callback_work);
+ INIT_LIST_HEAD(&ss->workq_list);
+ mutex_lock(&sysfs_workq_mutex);
+ list_add_tail(&ss->workq_list, &sysfs_workq);
+ mutex_unlock(&sysfs_workq_mutex);
schedule_work(&ss->work);
return 0;
}
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index dfa3d94cfc74..555f0ff988df 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -147,6 +147,7 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
{
struct bin_attribute *bin_attr;
+ inode->i_private = sysfs_get(sd);
inode->i_mapping->a_ops = &sysfs_aops;
inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
inode->i_op = &sysfs_inode_operations;
@@ -214,6 +215,22 @@ struct inode * sysfs_get_inode(struct sysfs_dirent *sd)
return inode;
}
+/*
+ * The sysfs_dirent serves as both an inode and a directory entry for sysfs.
+ * To prevent the sysfs inode numbers from being freed prematurely we take a
+ * reference to sysfs_dirent from the sysfs inode. A
+ * super_operations.delete_inode() implementation is needed to drop that
+ * reference upon inode destruction.
+ */
+void sysfs_delete_inode(struct inode *inode)
+{
+ struct sysfs_dirent *sd = inode->i_private;
+
+ truncate_inode_pages(&inode->i_data, 0);
+ clear_inode(inode);
+ sysfs_put(sd);
+}
+
int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
{
struct sysfs_addrm_cxt acxt;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index ab343e371d64..49749955ccaf 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -17,11 +17,10 @@
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/module.h>
+#include <linux/magic.h>
#include "sysfs.h"
-/* Random magic number */
-#define SYSFS_MAGIC 0x62656572
static struct vfsmount *sysfs_mount;
struct super_block * sysfs_sb = NULL;
@@ -30,6 +29,7 @@ struct kmem_cache *sysfs_dir_cachep;
static const struct super_operations sysfs_ops = {
.statfs = simple_statfs,
.drop_inode = generic_delete_inode,
+ .delete_inode = sysfs_delete_inode,
};
struct sysfs_dirent sysfs_root = {
@@ -53,7 +53,9 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
sysfs_sb = sb;
/* get root inode, initialize and unlock it */
+ mutex_lock(&sysfs_mutex);
inode = sysfs_get_inode(&sysfs_root);
+ mutex_unlock(&sysfs_mutex);
if (!inode) {
pr_debug("sysfs: could not get root inode\n");
return -ENOMEM;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 93c6d6b27c4d..3fa0d98481e2 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -28,6 +28,7 @@ struct sysfs_elem_attr {
struct sysfs_elem_bin_attr {
struct bin_attribute *bin_attr;
+ struct hlist_head buffers;
};
/*
@@ -145,6 +146,7 @@ static inline void __sysfs_put(struct sysfs_dirent *sd)
* inode.c
*/
struct inode *sysfs_get_inode(struct sysfs_dirent *sd);
+void sysfs_delete_inode(struct inode *inode);
int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
int sysfs_inode_init(void);
@@ -163,6 +165,7 @@ int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
* bin.c
*/
extern const struct file_operations bin_fops;
+void unmap_bin_file(struct sysfs_dirent *attr_sd);
/*
* symlink.c
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index e65212dfb60e..261a1c2f22dd 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -41,7 +41,7 @@
* Stefan Reinauer <stepan@home.culture.mipt.ru>
*
* Module usage counts added on 96/04/29 by
- * Gertjan van Wingerde <gertjan@cs.vu.nl>
+ * Gertjan van Wingerde <gwingerde@gmail.com>
*
* Clean swab support on 19970406 by
* Francois-Rene Rideau <fare@tunes.org>
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index cb329edc925b..aa1016bb9134 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -34,6 +34,12 @@
#include <linux/backing-dev.h>
#include <linux/freezer.h>
+#include "xfs_sb.h"
+#include "xfs_inum.h"
+#include "xfs_ag.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+
static kmem_zone_t *xfs_buf_zone;
STATIC int xfsbufd(void *);
STATIC int xfsbufd_wakeup(int, gfp_t);
@@ -1435,10 +1441,12 @@ xfs_unregister_buftarg(
void
xfs_free_buftarg(
- xfs_buftarg_t *btp)
+ struct xfs_mount *mp,
+ struct xfs_buftarg *btp)
{
xfs_flush_buftarg(btp, 1);
- xfs_blkdev_issue_flush(btp);
+ if (mp->m_flags & XFS_MOUNT_BARRIER)
+ xfs_blkdev_issue_flush(btp);
xfs_free_bufhash(btp);
iput(btp->bt_mapping->host);
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 288ae7c4c800..9b4d666ad31f 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -413,7 +413,7 @@ static inline int XFS_bwrite(xfs_buf_t *bp)
* Handling of buftargs.
*/
extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
-extern void xfs_free_buftarg(xfs_buftarg_t *);
+extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
extern void xfs_wait_buftarg(xfs_buftarg_t *);
extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index c71e226da7f5..32ae5028e96b 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -734,15 +734,15 @@ xfs_close_devices(
{
if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
- xfs_free_buftarg(mp->m_logdev_targp);
+ xfs_free_buftarg(mp, mp->m_logdev_targp);
xfs_blkdev_put(logdev);
}
if (mp->m_rtdev_targp) {
struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
- xfs_free_buftarg(mp->m_rtdev_targp);
+ xfs_free_buftarg(mp, mp->m_rtdev_targp);
xfs_blkdev_put(rtdev);
}
- xfs_free_buftarg(mp->m_ddev_targp);
+ xfs_free_buftarg(mp, mp->m_ddev_targp);
}
/*
@@ -811,9 +811,9 @@ xfs_open_devices(
out_free_rtdev_targ:
if (mp->m_rtdev_targp)
- xfs_free_buftarg(mp->m_rtdev_targp);
+ xfs_free_buftarg(mp, mp->m_rtdev_targp);
out_free_ddev_targ:
- xfs_free_buftarg(mp->m_ddev_targp);
+ xfs_free_buftarg(mp, mp->m_ddev_targp);
out_close_rtdev:
if (rtdev)
xfs_blkdev_put(rtdev);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index e2fb6210d4c5..478e587087fe 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -246,9 +246,6 @@ xfs_iget_cache_miss(
goto out_destroy;
}
- if (lock_flags)
- xfs_ilock(ip, lock_flags);
-
/*
* Preload the radix tree so we can insert safely under the
* write spinlock. Note that we cannot sleep inside the preload
@@ -256,7 +253,16 @@ xfs_iget_cache_miss(
*/
if (radix_tree_preload(GFP_KERNEL)) {
error = EAGAIN;
- goto out_unlock;
+ goto out_destroy;
+ }
+
+ /*
+ * Because the inode hasn't been added to the radix-tree yet it can't
+ * be found by another thread, so we can do the non-sleeping lock here.
+ */
+ if (lock_flags) {
+ if (!xfs_ilock_nowait(ip, lock_flags))
+ BUG();
}
mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
@@ -284,7 +290,6 @@ xfs_iget_cache_miss(
out_preload_end:
write_unlock(&pag->pag_ici_lock);
radix_tree_preload_end();
-out_unlock:
if (lock_flags)
xfs_iunlock(ip, lock_flags);
out_destroy:
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index b1047de2fffd..61af610d79b3 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1455,10 +1455,19 @@ xlog_recover_add_to_trans(
item = item->ri_prev;
if (item->ri_total == 0) { /* first region to be added */
- item->ri_total = in_f->ilf_size;
- ASSERT(item->ri_total <= XLOG_MAX_REGIONS_IN_ITEM);
- item->ri_buf = kmem_zalloc((item->ri_total *
- sizeof(xfs_log_iovec_t)), KM_SLEEP);
+ if (in_f->ilf_size == 0 ||
+ in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) {
+ xlog_warn(
+ "XFS: bad number of regions (%d) in inode log format",
+ in_f->ilf_size);
+ ASSERT(0);
+ return XFS_ERROR(EIO);
+ }
+
+ item->ri_total = in_f->ilf_size;
+ item->ri_buf =
+ kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t),
+ KM_SLEEP);
}
ASSERT(item->ri_total > item->ri_cnt);
/* Description region is ri_buf[0] */