aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/cmservice.c16
-rw-r--r--fs/afs/internal.h11
-rw-r--r--fs/afs/main.c2
-rw-r--r--fs/afs/rxrpc.c18
-rw-r--r--fs/aio.c15
-rw-r--r--fs/autofs4/autofs_i.h2
-rw-r--r--fs/autofs4/dev-ioctl.c2
-rw-r--r--fs/autofs4/waitq.c18
-rw-r--r--fs/block_dev.c28
-rw-r--r--fs/btrfs/btrfs_inode.h3
-rw-r--r--fs/btrfs/check-integrity.c4
-rw-r--r--fs/btrfs/compression.c46
-rw-r--r--fs/btrfs/compression.h4
-rw-r--r--fs/btrfs/ctree.h10
-rw-r--r--fs/btrfs/dir-item.c2
-rw-r--r--fs/btrfs/disk-io.c85
-rw-r--r--fs/btrfs/disk-io.h12
-rw-r--r--fs/btrfs/extent-tree.c7
-rw-r--r--fs/btrfs/extent_io.c153
-rw-r--r--fs/btrfs/extent_io.h6
-rw-r--r--fs/btrfs/file-item.c14
-rw-r--r--fs/btrfs/file.c33
-rw-r--r--fs/btrfs/hash.c5
-rw-r--r--fs/btrfs/inode.c88
-rw-r--r--fs/btrfs/raid56.c16
-rw-r--r--fs/btrfs/scrub.c26
-rw-r--r--fs/btrfs/volumes.c11
-rw-r--r--fs/buffer.c15
-rw-r--r--fs/cachefiles/internal.h4
-rw-r--r--fs/cachefiles/namei.c2
-rw-r--r--fs/cachefiles/rdwr.c4
-rw-r--r--fs/ceph/acl.c1
-rw-r--r--fs/ceph/export.c4
-rw-r--r--fs/ceph/inode.c5
-rw-r--r--fs/ceph/mds_client.c4
-rw-r--r--fs/cifs/file.c2
-rw-r--r--fs/cifs/inode.c1
-rw-r--r--fs/cifs/misc.c2
-rw-r--r--fs/cifs/smb1ops.c9
-rw-r--r--fs/cifs/smb2ops.c8
-rw-r--r--fs/cifs/xattr.c2
-rw-r--r--fs/compat_ioctl.c6
-rw-r--r--fs/configfs/item.c8
-rw-r--r--fs/configfs/symlink.c3
-rw-r--r--fs/crypto/bio.c2
-rw-r--r--fs/dax.c28
-rw-r--r--fs/dcache.c10
-rw-r--r--fs/debugfs/file.c2
-rw-r--r--fs/debugfs/inode.c2
-rw-r--r--fs/direct-io.c25
-rw-r--r--fs/eventfd.c6
-rw-r--r--fs/eventpoll.c12
-rw-r--r--fs/exec.c28
-rw-r--r--fs/ext4/acl.c4
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/extents.c85
-rw-r--r--fs/ext4/file.c89
-rw-r--r--fs/ext4/inline.c5
-rw-r--r--fs/ext4/inode.c30
-rw-r--r--fs/ext4/mballoc.c23
-rw-r--r--fs/ext4/namei.c13
-rw-r--r--fs/ext4/page-io.c15
-rw-r--r--fs/ext4/readpage.c4
-rw-r--r--fs/ext4/super.c19
-rw-r--r--fs/ext4/xattr.c8
-rw-r--r--fs/f2fs/data.c10
-rw-r--r--fs/f2fs/f2fs.h5
-rw-r--r--fs/f2fs/segment.c2
-rw-r--r--fs/f2fs/super.c2
-rw-r--r--fs/fcntl.c67
-rw-r--r--fs/fs-writeback.c12
-rw-r--r--fs/fs_pin.c4
-rw-r--r--fs/gfs2/incore.h1
-rw-r--r--fs/gfs2/log.c2
-rw-r--r--fs/gfs2/lops.c10
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/ops_fstype.c6
-rw-r--r--fs/gfs2/sys.c22
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/inode.c9
-rw-r--r--fs/iomap.c13
-rw-r--r--fs/jbd2/journal.c4
-rw-r--r--fs/jbd2/transaction.c48
-rw-r--r--fs/jfs/jfs_logmgr.c2
-rw-r--r--fs/jfs/jfs_metapage.c4
-rw-r--r--fs/mpage.c5
-rw-r--r--fs/namei.c1
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/nfs/blocklayout/blocklayout.c4
-rw-r--r--fs/nfs/callback_xdr.c1
-rw-r--r--fs/nfs/dir.c51
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c1
-rw-r--r--fs/nfs/internal.h3
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/nfs42proc.c2
-rw-r--r--fs/nfs/nfs4client.c1
-rw-r--r--fs/nfs/nfs4proc.c9
-rw-r--r--fs/nfs/nfs4state.c2
-rw-r--r--fs/nfs/pnfs.c25
-rw-r--r--fs/nfs/pnfs.h10
-rw-r--r--fs/nfs/super.c22
-rw-r--r--fs/nfsd/blocklayout.c4
-rw-r--r--fs/nfsd/export.c4
-rw-r--r--fs/nfsd/nfs3xdr.c23
-rw-r--r--fs/nfsd/nfs4proc.c13
-rw-r--r--fs/nfsd/nfsxdr.c13
-rw-r--r--fs/nilfs2/segbuf.c2
-rw-r--r--fs/nilfs2/segment.c5
-rw-r--r--fs/ntfs/namei.c2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c6
-rw-r--r--fs/ocfs2/dlmglue.c4
-rw-r--r--fs/ocfs2/export.c2
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/ocfs2/xattr.c23
-rw-r--r--fs/open.c1
-rw-r--r--fs/orangefs/orangefs-bufmap.c12
-rw-r--r--fs/overlayfs/Kconfig1
-rw-r--r--fs/overlayfs/copy_up.c64
-rw-r--r--fs/overlayfs/dir.c61
-rw-r--r--fs/overlayfs/inode.c12
-rw-r--r--fs/overlayfs/namei.c18
-rw-r--r--fs/overlayfs/overlayfs.h18
-rw-r--r--fs/overlayfs/ovl_entry.h2
-rw-r--r--fs/overlayfs/super.c18
-rw-r--r--fs/overlayfs/util.c72
-rw-r--r--fs/proc/base.c2
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/pstore/inode.c22
-rw-r--r--fs/pstore/internal.h2
-rw-r--r--fs/pstore/platform.c69
-rw-r--r--fs/pstore/pmsg.c10
-rw-r--r--fs/pstore/ram.c16
-rw-r--r--fs/quota/dquot.c16
-rw-r--r--fs/read_write.c14
-rw-r--r--fs/reiserfs/journal.c6
-rw-r--r--fs/select.c4
-rw-r--r--fs/signalfd.c2
-rw-r--r--fs/stat.c1
-rw-r--r--fs/ufs/balloc.c70
-rw-r--r--fs/ufs/inode.c96
-rw-r--r--fs/ufs/super.c96
-rw-r--r--fs/ufs/ufs_fs.h9
-rw-r--r--fs/ufs/util.c17
-rw-r--r--fs/ufs/util.h19
-rw-r--r--fs/userfaultfd.c59
-rw-r--r--fs/xfs/Makefile3
-rw-r--r--fs/xfs/uuid.c63
-rw-r--r--fs/xfs/uuid.h35
-rw-r--r--fs/xfs/xfs_aops.c16
-rw-r--r--fs/xfs/xfs_buf.c45
-rw-r--r--fs/xfs/xfs_buf.h5
-rw-r--r--fs/xfs/xfs_file.c32
-rw-r--r--fs/xfs/xfs_icache.c9
-rw-r--r--fs/xfs/xfs_inode.c8
-rw-r--r--fs/xfs/xfs_inode_item.c8
-rw-r--r--fs/xfs/xfs_iomap.c22
-rw-r--r--fs/xfs/xfs_linux.h2
-rw-r--r--fs/xfs/xfs_log_recover.c6
-rw-r--r--fs/xfs/xfs_mount.c16
-rw-r--r--fs/xfs/xfs_super.c3
160 files changed, 1639 insertions, 1047 deletions
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 3062cceb5c2a..782d4d05a53b 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -350,7 +350,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
{
struct sockaddr_rxrpc srx;
struct afs_server *server;
- struct uuid_v1 *r;
+ struct afs_uuid *r;
unsigned loop;
__be32 *b;
int ret;
@@ -380,7 +380,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
}
_debug("unmarshall UUID");
- call->request = kmalloc(sizeof(struct uuid_v1), GFP_KERNEL);
+ call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL);
if (!call->request)
return -ENOMEM;
@@ -453,7 +453,7 @@ static int afs_deliver_cb_probe(struct afs_call *call)
static void SRXAFSCB_ProbeUuid(struct work_struct *work)
{
struct afs_call *call = container_of(work, struct afs_call, work);
- struct uuid_v1 *r = call->request;
+ struct afs_uuid *r = call->request;
struct {
__be32 match;
@@ -476,7 +476,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work)
*/
static int afs_deliver_cb_probe_uuid(struct afs_call *call)
{
- struct uuid_v1 *r;
+ struct afs_uuid *r;
unsigned loop;
__be32 *b;
int ret;
@@ -502,15 +502,15 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
}
_debug("unmarshall UUID");
- call->request = kmalloc(sizeof(struct uuid_v1), GFP_KERNEL);
+ call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL);
if (!call->request)
return -ENOMEM;
b = call->buffer;
r = call->request;
- r->time_low = b[0];
- r->time_mid = htons(ntohl(b[1]));
- r->time_hi_and_version = htons(ntohl(b[2]));
+ r->time_low = ntohl(b[0]);
+ r->time_mid = ntohl(b[1]);
+ r->time_hi_and_version = ntohl(b[2]);
r->clock_seq_hi_and_reserved = ntohl(b[3]);
r->clock_seq_low = ntohl(b[4]);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 393672997cc2..4e2556606623 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -410,6 +410,15 @@ struct afs_interface {
unsigned mtu; /* MTU of interface */
};
+struct afs_uuid {
+ __be32 time_low; /* low part of timestamp */
+ __be16 time_mid; /* mid part of timestamp */
+ __be16 time_hi_and_version; /* high part of timestamp and version */
+ __u8 clock_seq_hi_and_reserved; /* clock seq hi and variant */
+ __u8 clock_seq_low; /* clock seq low */
+ __u8 node[6]; /* spatially unique node ID (MAC addr) */
+};
+
/*****************************************************************************/
/*
* cache.c
@@ -544,7 +553,7 @@ extern int afs_drop_inode(struct inode *);
* main.c
*/
extern struct workqueue_struct *afs_wq;
-extern struct uuid_v1 afs_uuid;
+extern struct afs_uuid afs_uuid;
/*
* misc.c
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 51d7d17bca57..9944770849da 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -31,7 +31,7 @@ static char *rootcell;
module_param(rootcell, charp, 0);
MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list");
-struct uuid_v1 afs_uuid;
+struct afs_uuid afs_uuid;
struct workqueue_struct *afs_wq;
/*
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index d5990eb160bd..02781e78ffb6 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -341,6 +341,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
struct msghdr msg;
struct kvec iov[1];
size_t offset;
+ s64 tx_total_len;
u32 abort_code;
int ret;
@@ -364,9 +365,20 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
srx.transport.sin.sin_port = call->port;
memcpy(&srx.transport.sin.sin_addr, addr, 4);
+ /* Work out the length we're going to transmit. This is awkward for
+ * calls such as FS.StoreData where there's an extra injection of data
+ * after the initial fixed part.
+ */
+ tx_total_len = call->request_size;
+ if (call->send_pages) {
+ tx_total_len += call->last_to - call->first_offset;
+ tx_total_len += (call->last - call->first) * PAGE_SIZE;
+ }
+
/* create a call */
rxcall = rxrpc_kernel_begin_call(afs_socket, &srx, call->key,
- (unsigned long) call, gfp,
+ (unsigned long)call,
+ tx_total_len, gfp,
(async ?
afs_wake_up_async_call :
afs_wake_up_call_waiter));
@@ -738,6 +750,8 @@ void afs_send_empty_reply(struct afs_call *call)
_enter("");
+ rxrpc_kernel_set_tx_length(afs_socket, call->rxcall, 0);
+
msg.msg_name = NULL;
msg.msg_namelen = 0;
iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, NULL, 0, 0);
@@ -772,6 +786,8 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
_enter("");
+ rxrpc_kernel_set_tx_length(afs_socket, call->rxcall, len);
+
iov[0].iov_base = (void *) buf;
iov[0].iov_len = len;
msg.msg_name = NULL;
diff --git a/fs/aio.c b/fs/aio.c
index f52d925ee259..dcad3a66748c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1541,7 +1541,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
ssize_t ret;
/* enforce forwards compatibility on users */
- if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2)) {
+ if (unlikely(iocb->aio_reserved2)) {
pr_debug("EINVAL: reserve field set\n");
return -EINVAL;
}
@@ -1568,6 +1568,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
req->common.ki_pos = iocb->aio_offset;
req->common.ki_complete = aio_complete;
req->common.ki_flags = iocb_flags(req->common.ki_filp);
+ req->common.ki_hint = file_write_hint(file);
if (iocb->aio_flags & IOCB_FLAG_RESFD) {
/*
@@ -1586,6 +1587,18 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
req->common.ki_flags |= IOCB_EVENTFD;
}
+ ret = kiocb_set_rw_flags(&req->common, iocb->aio_rw_flags);
+ if (unlikely(ret)) {
+ pr_debug("EINVAL: aio_rw_flags\n");
+ goto out_put_req;
+ }
+
+ if ((req->common.ki_flags & IOCB_NOWAIT) &&
+ !(req->common.ki_flags & IOCB_DIRECT)) {
+ ret = -EOPNOTSUPP;
+ goto out_put_req;
+ }
+
ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
if (unlikely(ret)) {
pr_debug("EFAULT: aio_key\n");
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index beef981aa54f..974f5346458a 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -83,7 +83,7 @@ struct autofs_info {
struct autofs_wait_queue {
wait_queue_head_t queue;
struct autofs_wait_queue *next;
- autofs_wqt_t wait_queue_token;
+ autofs_wqt_t wait_queue_entry_token;
/* We use the following to see what we are waiting for */
struct qstr name;
u32 dev;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 734cbf8d9676..dd9f1bebb5a3 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -344,7 +344,7 @@ static int autofs_dev_ioctl_fail(struct file *fp,
int status;
token = (autofs_wqt_t) param->fail.token;
- status = param->fail.status ? param->fail.status : -ENOENT;
+ status = param->fail.status < 0 ? param->fail.status : -ENOENT;
return autofs4_wait_release(sbi, token, status);
}
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 24a58bf9ca72..7071895b0678 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -104,7 +104,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
size_t pktsz;
pr_debug("wait id = 0x%08lx, name = %.*s, type=%d\n",
- (unsigned long) wq->wait_queue_token,
+ (unsigned long) wq->wait_queue_entry_token,
wq->name.len, wq->name.name, type);
memset(&pkt, 0, sizeof(pkt)); /* For security reasons */
@@ -120,7 +120,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
pktsz = sizeof(*mp);
- mp->wait_queue_token = wq->wait_queue_token;
+ mp->wait_queue_entry_token = wq->wait_queue_entry_token;
mp->len = wq->name.len;
memcpy(mp->name, wq->name.name, wq->name.len);
mp->name[wq->name.len] = '\0';
@@ -133,7 +133,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
pktsz = sizeof(*ep);
- ep->wait_queue_token = wq->wait_queue_token;
+ ep->wait_queue_entry_token = wq->wait_queue_entry_token;
ep->len = wq->name.len;
memcpy(ep->name, wq->name.name, wq->name.len);
ep->name[wq->name.len] = '\0';
@@ -153,7 +153,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
pktsz = sizeof(*packet);
- packet->wait_queue_token = wq->wait_queue_token;
+ packet->wait_queue_entry_token = wq->wait_queue_entry_token;
packet->len = wq->name.len;
memcpy(packet->name, wq->name.name, wq->name.len);
packet->name[wq->name.len] = '\0';
@@ -428,7 +428,7 @@ int autofs4_wait(struct autofs_sb_info *sbi,
return -ENOMEM;
}
- wq->wait_queue_token = autofs4_next_wait_queue;
+ wq->wait_queue_entry_token = autofs4_next_wait_queue;
if (++autofs4_next_wait_queue == 0)
autofs4_next_wait_queue = 1;
wq->next = sbi->queues;
@@ -461,7 +461,7 @@ int autofs4_wait(struct autofs_sb_info *sbi,
}
pr_debug("new wait id = 0x%08lx, name = %.*s, nfy=%d\n",
- (unsigned long) wq->wait_queue_token, wq->name.len,
+ (unsigned long) wq->wait_queue_entry_token, wq->name.len,
wq->name.name, notify);
/*
@@ -471,7 +471,7 @@ int autofs4_wait(struct autofs_sb_info *sbi,
} else {
wq->wait_ctr++;
pr_debug("existing wait id = 0x%08lx, name = %.*s, nfy=%d\n",
- (unsigned long) wq->wait_queue_token, wq->name.len,
+ (unsigned long) wq->wait_queue_entry_token, wq->name.len,
wq->name.name, notify);
mutex_unlock(&sbi->wq_mutex);
kfree(qstr.name);
@@ -550,13 +550,13 @@ int autofs4_wait(struct autofs_sb_info *sbi,
}
-int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_token, int status)
+int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_entry_token, int status)
{
struct autofs_wait_queue *wq, **wql;
mutex_lock(&sbi->wq_mutex);
for (wql = &sbi->queues; (wq = *wql) != NULL; wql = &wq->next) {
- if (wq->wait_queue_token == wait_queue_token)
+ if (wq->wait_queue_entry_token == wait_queue_entry_token)
break;
}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 519599dddd36..a7df151f8aba 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -225,6 +225,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
bio_init(&bio, vecs, nr_pages);
bio.bi_bdev = bdev;
bio.bi_iter.bi_sector = pos >> 9;
+ bio.bi_write_hint = iocb->ki_hint;
bio.bi_private = current;
bio.bi_end_io = blkdev_bio_end_io_simple;
@@ -262,8 +263,11 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
if (vecs != inline_vecs)
kfree(vecs);
- if (unlikely(bio.bi_error))
- return bio.bi_error;
+ if (unlikely(bio.bi_status))
+ ret = blk_status_to_errno(bio.bi_status);
+
+ bio_uninit(&bio);
+
return ret;
}
@@ -288,16 +292,18 @@ static void blkdev_bio_end_io(struct bio *bio)
bool should_dirty = dio->should_dirty;
if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) {
- if (bio->bi_error && !dio->bio.bi_error)
- dio->bio.bi_error = bio->bi_error;
+ if (bio->bi_status && !dio->bio.bi_status)
+ dio->bio.bi_status = bio->bi_status;
} else {
if (!dio->is_sync) {
struct kiocb *iocb = dio->iocb;
- ssize_t ret = dio->bio.bi_error;
+ ssize_t ret;
- if (likely(!ret)) {
+ if (likely(!dio->bio.bi_status)) {
ret = dio->size;
iocb->ki_pos += ret;
+ } else {
+ ret = blk_status_to_errno(dio->bio.bi_status);
}
dio->iocb->ki_complete(iocb, ret, 0);
@@ -334,7 +340,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
bool is_read = (iov_iter_rw(iter) == READ), is_sync;
loff_t pos = iocb->ki_pos;
blk_qc_t qc = BLK_QC_T_NONE;
- int ret;
+ int ret = 0;
if ((pos | iov_iter_alignment(iter)) &
(bdev_logical_block_size(bdev) - 1))
@@ -358,12 +364,13 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
for (;;) {
bio->bi_bdev = bdev;
bio->bi_iter.bi_sector = pos >> 9;
+ bio->bi_write_hint = iocb->ki_hint;
bio->bi_private = dio;
bio->bi_end_io = blkdev_bio_end_io;
ret = bio_iov_iter_get_pages(bio, iter);
if (unlikely(ret)) {
- bio->bi_error = ret;
+ bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
break;
}
@@ -412,7 +419,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
}
__set_current_state(TASK_RUNNING);
- ret = dio->bio.bi_error;
+ if (!ret)
+ ret = blk_status_to_errno(dio->bio.bi_status);
if (likely(!ret))
ret = dio->size;
@@ -436,7 +444,7 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
static __init int blkdev_init(void)
{
- blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio));
+ blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
if (!blkdev_dio_pool)
return -ENOMEM;
return 0;
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index b8622e4d1744..d87ac27a5f2b 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -310,7 +310,8 @@ struct btrfs_dio_private {
* The original bio may be split to several sub-bios, this is
* done during endio of sub-bios
*/
- int (*subio_endio)(struct inode *, struct btrfs_io_bio *, int);
+ blk_status_t (*subio_endio)(struct inode *, struct btrfs_io_bio *,
+ blk_status_t);
};
/*
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index ab14c2e635ca..4ded1c3f92b8 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -2129,7 +2129,7 @@ static void btrfsic_bio_end_io(struct bio *bp)
/* mutex is not held! This is not save if IO is not yet completed
* on umount */
iodone_w_error = 0;
- if (bp->bi_error)
+ if (bp->bi_status)
iodone_w_error = 1;
BUG_ON(NULL == block);
@@ -2143,7 +2143,7 @@ static void btrfsic_bio_end_io(struct bio *bp)
if ((dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
pr_info("bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
- bp->bi_error,
+ bp->bi_status,
btrfsic_get_block_type(dev_state->state, block),
block->logical_bytenr, dev_state->name,
block->dev_bytenr, block->mirror_num);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 10e6b282d09d..a2fad39f79ba 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -155,7 +155,7 @@ static void end_compressed_bio_read(struct bio *bio)
unsigned long index;
int ret;
- if (bio->bi_error)
+ if (bio->bi_status)
cb->errors = 1;
/* if there are more bios still pending for this compressed
@@ -268,7 +268,7 @@ static void end_compressed_bio_write(struct bio *bio)
struct page *page;
unsigned long index;
- if (bio->bi_error)
+ if (bio->bi_status)
cb->errors = 1;
/* if there are more bios still pending for this compressed
@@ -287,7 +287,7 @@ static void end_compressed_bio_write(struct bio *bio)
cb->start,
cb->start + cb->len - 1,
NULL,
- bio->bi_error ? 0 : 1);
+ bio->bi_status ? 0 : 1);
cb->compressed_pages[0]->mapping = NULL;
end_compressed_writeback(inode, cb);
@@ -320,7 +320,7 @@ out:
* This also checksums the file bytes and gets things ready for
* the end io hooks.
*/
-int btrfs_submit_compressed_write(struct inode *inode, u64 start,
+blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long len, u64 disk_start,
unsigned long compressed_len,
struct page **compressed_pages,
@@ -335,13 +335,13 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
struct page *page;
u64 first_byte = disk_start;
struct block_device *bdev;
- int ret;
+ blk_status_t ret;
int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
WARN_ON(start & ((u64)PAGE_SIZE - 1));
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
if (!cb)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
refcount_set(&cb->pending_bios, 0);
cb->errors = 0;
cb->inode = inode;
@@ -358,7 +358,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
if (!bio) {
kfree(cb);
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
}
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
bio->bi_private = cb;
@@ -368,17 +368,17 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
/* create and submit bios for the compressed pages */
bytes_left = compressed_len;
for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
+ int submit = 0;
+
page = compressed_pages[pg_index];
page->mapping = inode->i_mapping;
if (bio->bi_iter.bi_size)
- ret = io_tree->ops->merge_bio_hook(page, 0,
+ submit = io_tree->ops->merge_bio_hook(page, 0,
PAGE_SIZE,
bio, 0);
- else
- ret = 0;
page->mapping = NULL;
- if (ret || bio_add_page(bio, page, PAGE_SIZE, 0) <
+ if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
PAGE_SIZE) {
bio_get(bio);
@@ -400,7 +400,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
ret = btrfs_map_bio(fs_info, bio, 0, 1);
if (ret) {
- bio->bi_error = ret;
+ bio->bi_status = ret;
bio_endio(bio);
}
@@ -434,7 +434,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
ret = btrfs_map_bio(fs_info, bio, 0, 1);
if (ret) {
- bio->bi_error = ret;
+ bio->bi_status = ret;
bio_endio(bio);
}
@@ -569,7 +569,7 @@ next:
* After the compressed pages are read, we copy the bytes into the
* bio we were passed and then call the bio end_io calls
*/
-int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -586,7 +586,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
u64 em_len;
u64 em_start;
struct extent_map *em;
- int ret = -ENOMEM;
+ blk_status_t ret = BLK_STS_RESOURCE;
int faili = 0;
u32 *sums;
@@ -600,7 +600,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
PAGE_SIZE);
read_unlock(&em_tree->lock);
if (!em)
- return -EIO;
+ return BLK_STS_IOERR;
compressed_len = em->block_len;
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
@@ -638,7 +638,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
__GFP_HIGHMEM);
if (!cb->compressed_pages[pg_index]) {
faili = pg_index - 1;
- ret = -ENOMEM;
+ ret = BLK_STS_RESOURCE;
goto fail2;
}
}
@@ -659,19 +659,19 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
refcount_set(&cb->pending_bios, 1);
for (pg_index = 0; pg_index < nr_pages; pg_index++) {
+ int submit = 0;
+
page = cb->compressed_pages[pg_index];
page->mapping = inode->i_mapping;
page->index = em_start >> PAGE_SHIFT;
if (comp_bio->bi_iter.bi_size)
- ret = tree->ops->merge_bio_hook(page, 0,
+ submit = tree->ops->merge_bio_hook(page, 0,
PAGE_SIZE,
comp_bio, 0);
- else
- ret = 0;
page->mapping = NULL;
- if (ret || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
+ if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
PAGE_SIZE) {
bio_get(comp_bio);
@@ -697,7 +697,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
if (ret) {
- comp_bio->bi_error = ret;
+ comp_bio->bi_status = ret;
bio_endio(comp_bio);
}
@@ -726,7 +726,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
if (ret) {
- comp_bio->bi_error = ret;
+ comp_bio->bi_status = ret;
bio_endio(comp_bio);
}
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 39ec43ab8df1..680d4265d601 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -48,12 +48,12 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
unsigned long total_out, u64 disk_start,
struct bio *bio);
-int btrfs_submit_compressed_write(struct inode *inode, u64 start,
+blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long len, u64 disk_start,
unsigned long compressed_len,
struct page **compressed_pages,
unsigned long nr_pages);
-int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags);
enum btrfs_compression_type {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 643c70d2b2e6..a0d0c79d95ed 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2563,7 +2563,7 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes);
static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_fs_info *fs_info,
unsigned num_items)
{
- return fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items;
+ return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items;
}
/*
@@ -2573,7 +2573,7 @@ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_fs_info *fs_info,
static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_fs_info *fs_info,
unsigned num_items)
{
- return fs_info->nodesize * BTRFS_MAX_LEVEL * num_items;
+ return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * num_items;
}
int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
@@ -3078,8 +3078,8 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path,
struct btrfs_dio_private;
int btrfs_del_csums(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr, u64 len);
-int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst);
-int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst);
+blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
u64 logical_offset);
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -3094,7 +3094,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_ordered_sum *sums);
-int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
u64 file_start, int contig);
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit);
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 60a750678a82..c24d615e3d7f 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -468,7 +468,7 @@ int verify_dir_item(struct btrfs_fs_info *fs_info,
if (btrfs_dir_name_len(leaf, dir_item) > namelen) {
btrfs_crit(fs_info, "invalid dir item name len: %u",
- (unsigned)btrfs_dir_data_len(leaf, dir_item));
+ (unsigned)btrfs_dir_name_len(leaf, dir_item));
return 1;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8685d67185d0..6036d15b47b8 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -87,7 +87,7 @@ struct btrfs_end_io_wq {
bio_end_io_t *end_io;
void *private;
struct btrfs_fs_info *info;
- int error;
+ blk_status_t status;
enum btrfs_wq_endio_type metadata;
struct list_head list;
struct btrfs_work work;
@@ -131,7 +131,7 @@ struct async_submit_bio {
*/
u64 bio_offset;
struct btrfs_work work;
- int error;
+ blk_status_t status;
};
/*
@@ -799,7 +799,7 @@ static void end_workqueue_bio(struct bio *bio)
btrfs_work_func_t func;
fs_info = end_io_wq->info;
- end_io_wq->error = bio->bi_error;
+ end_io_wq->status = bio->bi_status;
if (bio_op(bio) == REQ_OP_WRITE) {
if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
@@ -836,19 +836,19 @@ static void end_workqueue_bio(struct bio *bio)
btrfs_queue_work(wq, &end_io_wq->work);
}
-int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
+blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
enum btrfs_wq_endio_type metadata)
{
struct btrfs_end_io_wq *end_io_wq;
end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS);
if (!end_io_wq)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
end_io_wq->private = bio->bi_private;
end_io_wq->end_io = bio->bi_end_io;
end_io_wq->info = info;
- end_io_wq->error = 0;
+ end_io_wq->status = 0;
end_io_wq->bio = bio;
end_io_wq->metadata = metadata;
@@ -868,14 +868,14 @@ unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
static void run_one_async_start(struct btrfs_work *work)
{
struct async_submit_bio *async;
- int ret;
+ blk_status_t ret;
async = container_of(work, struct async_submit_bio, work);
ret = async->submit_bio_start(async->inode, async->bio,
async->mirror_num, async->bio_flags,
async->bio_offset);
if (ret)
- async->error = ret;
+ async->status = ret;
}
static void run_one_async_done(struct btrfs_work *work)
@@ -898,8 +898,8 @@ static void run_one_async_done(struct btrfs_work *work)
wake_up(&fs_info->async_submit_wait);
/* If an error occurred we just want to clean up the bio and move on */
- if (async->error) {
- async->bio->bi_error = async->error;
+ if (async->status) {
+ async->bio->bi_status = async->status;
bio_endio(async->bio);
return;
}
@@ -916,18 +916,17 @@ static void run_one_async_free(struct btrfs_work *work)
kfree(async);
}
-int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
- struct bio *bio, int mirror_num,
- unsigned long bio_flags,
- u64 bio_offset,
- extent_submit_bio_hook_t *submit_bio_start,
- extent_submit_bio_hook_t *submit_bio_done)
+blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info,
+ struct inode *inode, struct bio *bio, int mirror_num,
+ unsigned long bio_flags, u64 bio_offset,
+ extent_submit_bio_hook_t *submit_bio_start,
+ extent_submit_bio_hook_t *submit_bio_done)
{
struct async_submit_bio *async;
async = kmalloc(sizeof(*async), GFP_NOFS);
if (!async)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
async->inode = inode;
async->bio = bio;
@@ -941,7 +940,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
async->bio_flags = bio_flags;
async->bio_offset = bio_offset;
- async->error = 0;
+ async->status = 0;
atomic_inc(&fs_info->nr_async_submits);
@@ -959,7 +958,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
return 0;
}
-static int btree_csum_one_bio(struct bio *bio)
+static blk_status_t btree_csum_one_bio(struct bio *bio)
{
struct bio_vec *bvec;
struct btrfs_root *root;
@@ -972,12 +971,12 @@ static int btree_csum_one_bio(struct bio *bio)
break;
}
- return ret;
+ return errno_to_blk_status(ret);
}
-static int __btree_submit_bio_start(struct inode *inode, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
+static blk_status_t __btree_submit_bio_start(struct inode *inode,
+ struct bio *bio, int mirror_num, unsigned long bio_flags,
+ u64 bio_offset)
{
/*
* when we're called for a write, we're already in the async
@@ -986,11 +985,11 @@ static int __btree_submit_bio_start(struct inode *inode, struct bio *bio,
return btree_csum_one_bio(bio);
}
-static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
+static blk_status_t __btree_submit_bio_done(struct inode *inode,
+ struct bio *bio, int mirror_num, unsigned long bio_flags,
+ u64 bio_offset)
{
- int ret;
+ blk_status_t ret;
/*
* when we're called for a write, we're already in the async
@@ -998,7 +997,7 @@ static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
*/
ret = btrfs_map_bio(btrfs_sb(inode->i_sb), bio, mirror_num, 1);
if (ret) {
- bio->bi_error = ret;
+ bio->bi_status = ret;
bio_endio(bio);
}
return ret;
@@ -1015,13 +1014,13 @@ static int check_async_write(unsigned long bio_flags)
return 1;
}
-static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
+static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
int async = check_async_write(bio_flags);
- int ret;
+ blk_status_t ret;
if (bio_op(bio) != REQ_OP_WRITE) {
/*
@@ -1054,7 +1053,7 @@ static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
return 0;
out_w_error:
- bio->bi_error = ret;
+ bio->bi_status = ret;
bio_endio(bio);
return ret;
}
@@ -1820,7 +1819,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
bio = end_io_wq->bio;
- bio->bi_error = end_io_wq->error;
+ bio->bi_status = end_io_wq->status;
bio->bi_private = end_io_wq->private;
bio->bi_end_io = end_io_wq->end_io;
kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
@@ -3467,10 +3466,12 @@ static int write_dev_supers(struct btrfs_device *device,
* we fua the first super. The others we allow
* to go down lazy.
*/
- if (i == 0)
- ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_FUA, bh);
- else
+ if (i == 0) {
+ ret = btrfsic_submit_bh(REQ_OP_WRITE,
+ REQ_SYNC | REQ_FUA, bh);
+ } else {
ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
+ }
if (ret)
errors++;
}
@@ -3495,11 +3496,11 @@ static void btrfs_end_empty_barrier(struct bio *bio)
* any device where the flush fails with eopnotsupp are flagged as not-barrier
* capable
*/
-static int write_dev_flush(struct btrfs_device *device, int wait)
+static blk_status_t write_dev_flush(struct btrfs_device *device, int wait)
{
struct request_queue *q = bdev_get_queue(device->bdev);
struct bio *bio;
- int ret = 0;
+ blk_status_t ret = 0;
if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
return 0;
@@ -3511,8 +3512,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
wait_for_completion(&device->flush_wait);
- if (bio->bi_error) {
- ret = bio->bi_error;
+ if (bio->bi_status) {
+ ret = bio->bi_status;
btrfs_dev_stat_inc_and_print(device,
BTRFS_DEV_STAT_FLUSH_ERRS);
}
@@ -3531,11 +3532,11 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
device->flush_bio = NULL;
bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
if (!bio)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
bio->bi_end_io = btrfs_end_empty_barrier;
bio->bi_bdev = device->bdev;
- bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
+ bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
init_completion(&device->flush_wait);
bio->bi_private = &device->flush_wait;
device->flush_bio = bio;
@@ -3556,7 +3557,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
struct btrfs_device *dev;
int errors_send = 0;
int errors_wait = 0;
- int ret;
+ blk_status_t ret;
/* send down all the barriers */
head = &info->fs_devices->devices;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 21f1ceb85b76..c581927555f3 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -118,13 +118,13 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
u32 btrfs_csum_data(const char *data, u32 seed, size_t len);
void btrfs_csum_final(u32 crc, u8 *result);
-int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
+blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
enum btrfs_wq_endio_type metadata);
-int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
- struct bio *bio, int mirror_num,
- unsigned long bio_flags, u64 bio_offset,
- extent_submit_bio_hook_t *submit_bio_start,
- extent_submit_bio_hook_t *submit_bio_done);
+blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info,
+ struct inode *inode, struct bio *bio, int mirror_num,
+ unsigned long bio_flags, u64 bio_offset,
+ extent_submit_bio_hook_t *submit_bio_start,
+ extent_submit_bio_hook_t *submit_bio_done);
unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
int btrfs_write_tree_block(struct extent_buffer *buf);
int btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e390451c72e6..33d979e9ea2a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3993,6 +3993,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
info->space_info_kobj, "%s",
alloc_name(found->flags));
if (ret) {
+ percpu_counter_destroy(&found->total_bytes_pinned);
kfree(found);
return ret;
}
@@ -4844,7 +4845,7 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
spin_unlock(&delayed_rsv->lock);
commit:
- trans = btrfs_join_transaction(fs_info->fs_root);
+ trans = btrfs_join_transaction(fs_info->extent_root);
if (IS_ERR(trans))
return -ENOSPC;
@@ -4862,7 +4863,7 @@ static int flush_space(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info, u64 num_bytes,
u64 orig_bytes, int state)
{
- struct btrfs_root *root = fs_info->fs_root;
+ struct btrfs_root *root = fs_info->extent_root;
struct btrfs_trans_handle *trans;
int nr;
int ret = 0;
@@ -5062,7 +5063,7 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
int flush_state = FLUSH_DELAYED_ITEMS_NR;
spin_lock(&space_info->lock);
- to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
+ to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->extent_root,
space_info);
if (!to_reclaim) {
spin_unlock(&space_info->lock);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d8da3edf2ac3..d1cd60140817 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -174,7 +174,8 @@ int __init extent_io_init(void)
goto free_state_cache;
btrfs_bioset = bioset_create(BIO_POOL_SIZE,
- offsetof(struct btrfs_io_bio, bio));
+ offsetof(struct btrfs_io_bio, bio),
+ BIOSET_NEED_BVECS);
if (!btrfs_bioset)
goto free_buffer_cache;
@@ -2399,6 +2400,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
struct bio *bio;
int read_mode = 0;
+ blk_status_t status;
int ret;
BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
@@ -2431,11 +2433,12 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
"Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
read_mode, failrec->this_mirror, failrec->in_validation);
- ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
+ status = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
failrec->bio_flags, 0);
- if (ret) {
+ if (status) {
free_io_failure(BTRFS_I(inode), failrec);
bio_put(bio);
+ ret = blk_status_to_errno(status);
}
return ret;
@@ -2458,7 +2461,7 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
if (!uptodate) {
ClearPageUptodate(page);
SetPageError(page);
- ret = ret < 0 ? ret : -EIO;
+ ret = err < 0 ? err : -EIO;
mapping_set_error(page->mapping, ret);
}
}
@@ -2474,6 +2477,7 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
*/
static void end_bio_extent_writepage(struct bio *bio)
{
+ int error = blk_status_to_errno(bio->bi_status);
struct bio_vec *bvec;
u64 start;
u64 end;
@@ -2503,7 +2507,7 @@ static void end_bio_extent_writepage(struct bio *bio)
start = page_offset(page);
end = start + bvec->bv_offset + bvec->bv_len - 1;
- end_extent_writepage(page, bio->bi_error, start, end);
+ end_extent_writepage(page, error, start, end);
end_page_writeback(page);
}
@@ -2536,7 +2540,7 @@ endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
static void end_bio_extent_readpage(struct bio *bio)
{
struct bio_vec *bvec;
- int uptodate = !bio->bi_error;
+ int uptodate = !bio->bi_status;
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
struct extent_io_tree *tree;
u64 offset = 0;
@@ -2556,7 +2560,7 @@ static void end_bio_extent_readpage(struct bio *bio)
btrfs_debug(fs_info,
"end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
- (u64)bio->bi_iter.bi_sector, bio->bi_error,
+ (u64)bio->bi_iter.bi_sector, bio->bi_status,
io_bio->mirror_num);
tree = &BTRFS_I(inode)->io_tree;
@@ -2615,7 +2619,7 @@ static void end_bio_extent_readpage(struct bio *bio)
ret = bio_readpage_error(bio, offset, page,
start, end, mirror);
if (ret == 0) {
- uptodate = !bio->bi_error;
+ uptodate = !bio->bi_status;
offset += len;
continue;
}
@@ -2673,7 +2677,7 @@ readpage_ok:
endio_readpage_release_extent(tree, extent_start, extent_len,
uptodate);
if (io_bio->end_io)
- io_bio->end_io(io_bio, bio->bi_error);
+ io_bio->end_io(io_bio, blk_status_to_errno(bio->bi_status));
bio_put(bio);
}
@@ -2743,7 +2747,7 @@ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
unsigned long bio_flags)
{
- int ret = 0;
+ blk_status_t ret = 0;
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
struct page *page = bvec->bv_page;
struct extent_io_tree *tree = bio->bi_private;
@@ -2761,7 +2765,7 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
btrfsic_submit_bio(bio);
bio_put(bio);
- return ret;
+ return blk_status_to_errno(ret);
}
static int merge_bio(struct extent_io_tree *tree, struct page *page,
@@ -2826,6 +2830,7 @@ static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree,
bio_add_page(bio, page, page_size, offset);
bio->bi_end_io = end_io_func;
bio->bi_private = tree;
+ bio->bi_write_hint = page->mapping->host->i_write_hint;
bio_set_op_attrs(bio, op, op_flags);
if (wbc) {
wbc_init_bio(wbc, bio);
@@ -3707,7 +3712,7 @@ static void end_bio_extent_buffer_writepage(struct bio *bio)
BUG_ON(!eb);
done = atomic_dec_and_test(&eb->io_pages);
- if (bio->bi_error ||
+ if (bio->bi_status ||
test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
ClearPageUptodate(page);
set_btree_ioerr(page);
@@ -4377,6 +4382,123 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
return NULL;
}
+/*
+ * To cache previous fiemap extent
+ *
+ * Will be used for merging fiemap extent
+ */
+struct fiemap_cache {
+ u64 offset;
+ u64 phys;
+ u64 len;
+ u32 flags;
+ bool cached;
+};
+
+/*
+ * Helper to submit fiemap extent.
+ *
+ * Will try to merge current fiemap extent specified by @offset, @phys,
+ * @len and @flags with cached one.
+ * And only when we fails to merge, cached one will be submitted as
+ * fiemap extent.
+ *
+ * Return value is the same as fiemap_fill_next_extent().
+ */
+static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
+ struct fiemap_cache *cache,
+ u64 offset, u64 phys, u64 len, u32 flags)
+{
+ int ret = 0;
+
+ if (!cache->cached)
+ goto assign;
+
+ /*
+ * Sanity check, extent_fiemap() should have ensured that new
+ * fiemap extent won't overlap with cahced one.
+ * Not recoverable.
+ *
+ * NOTE: Physical address can overlap, due to compression
+ */
+ if (cache->offset + cache->len > offset) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ /*
+ * Only merges fiemap extents if
+ * 1) Their logical addresses are continuous
+ *
+ * 2) Their physical addresses are continuous
+ * So truly compressed (physical size smaller than logical size)
+ * extents won't get merged with each other
+ *
+ * 3) Share same flags except FIEMAP_EXTENT_LAST
+ * So regular extent won't get merged with prealloc extent
+ */
+ if (cache->offset + cache->len == offset &&
+ cache->phys + cache->len == phys &&
+ (cache->flags & ~FIEMAP_EXTENT_LAST) ==
+ (flags & ~FIEMAP_EXTENT_LAST)) {
+ cache->len += len;
+ cache->flags |= flags;
+ goto try_submit_last;
+ }
+
+ /* Not mergeable, need to submit cached one */
+ ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
+ cache->len, cache->flags);
+ cache->cached = false;
+ if (ret)
+ return ret;
+assign:
+ cache->cached = true;
+ cache->offset = offset;
+ cache->phys = phys;
+ cache->len = len;
+ cache->flags = flags;
+try_submit_last:
+ if (cache->flags & FIEMAP_EXTENT_LAST) {
+ ret = fiemap_fill_next_extent(fieinfo, cache->offset,
+ cache->phys, cache->len, cache->flags);
+ cache->cached = false;
+ }
+ return ret;
+}
+
+/*
+ * Sanity check for fiemap cache
+ *
+ * All fiemap cache should be submitted by emit_fiemap_extent()
+ * Iteration should be terminated either by last fiemap extent or
+ * fieinfo->fi_extents_max.
+ * So no cached fiemap should exist.
+ */
+static int check_fiemap_cache(struct btrfs_fs_info *fs_info,
+ struct fiemap_extent_info *fieinfo,
+ struct fiemap_cache *cache)
+{
+ int ret;
+
+ if (!cache->cached)
+ return 0;
+
+ /* Small and recoverbale problem, only to info developer */
+#ifdef CONFIG_BTRFS_DEBUG
+ WARN_ON(1);
+#endif
+ btrfs_warn(fs_info,
+ "unhandled fiemap cache detected: offset=%llu phys=%llu len=%llu flags=0x%x",
+ cache->offset, cache->phys, cache->len, cache->flags);
+ ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
+ cache->len, cache->flags);
+ cache->cached = false;
+ if (ret > 0)
+ ret = 0;
+ return ret;
+}
+
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent)
{
@@ -4394,6 +4516,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
struct extent_state *cached_state = NULL;
struct btrfs_path *path;
struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct fiemap_cache cache = { 0 };
int end = 0;
u64 em_start = 0;
u64 em_len = 0;
@@ -4573,8 +4696,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
flags |= FIEMAP_EXTENT_LAST;
end = 1;
}
- ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
- em_len, flags);
+ ret = emit_fiemap_extent(fieinfo, &cache, em_start, disko,
+ em_len, flags);
if (ret) {
if (ret == 1)
ret = 0;
@@ -4582,6 +4705,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
}
out_free:
+ if (!ret)
+ ret = check_fiemap_cache(root->fs_info, fieinfo, &cache);
free_extent_map(em);
out:
btrfs_free_path(path);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 1eafa2f0ede3..487ca0207cb6 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -92,9 +92,9 @@ struct btrfs_inode;
struct btrfs_io_bio;
struct io_failure_record;
-typedef int (extent_submit_bio_hook_t)(struct inode *inode, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset);
+typedef blk_status_t (extent_submit_bio_hook_t)(struct inode *inode,
+ struct bio *bio, int mirror_num, unsigned long bio_flags,
+ u64 bio_offset);
struct extent_io_ops {
/*
* The following callbacks must be allways defined, the function
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 64fcb31d7163..5b1c7090e546 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -160,7 +160,7 @@ static void btrfs_io_bio_endio_readpage(struct btrfs_io_bio *bio, int err)
kfree(bio->csum_allocated);
}
-static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
+static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
u64 logical_offset, u32 *dst, int dio)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -182,7 +182,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
path = btrfs_alloc_path();
if (!path)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
if (!dst) {
@@ -191,7 +191,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
csum_size, GFP_NOFS);
if (!btrfs_bio->csum_allocated) {
btrfs_free_path(path);
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
}
btrfs_bio->csum = btrfs_bio->csum_allocated;
btrfs_bio->end_io = btrfs_io_bio_endio_readpage;
@@ -303,12 +303,12 @@ next:
return 0;
}
-int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst)
+blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst)
{
return __btrfs_lookup_bio_sums(inode, bio, 0, dst, 0);
}
-int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset)
+blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset)
{
return __btrfs_lookup_bio_sums(inode, bio, offset, NULL, 1);
}
@@ -433,7 +433,7 @@ fail:
return ret;
}
-int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
+blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
u64 file_start, int contig)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -452,7 +452,7 @@ int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
GFP_NOFS);
if (!sums)
- return -ENOMEM;
+ return BLK_STS_RESOURCE;
sums->len = bio->bi_iter.bi_size;
INIT_LIST_HEAD(&sums->list);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index da1096eb1a40..59e2dccdf75b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1875,12 +1875,29 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
ssize_t num_written = 0;
bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
ssize_t err;
- loff_t pos;
- size_t count;
+ loff_t pos = iocb->ki_pos;
+ size_t count = iov_iter_count(from);
loff_t oldsize;
int clean_page = 0;
- inode_lock(inode);
+ if ((iocb->ki_flags & IOCB_NOWAIT) &&
+ (iocb->ki_flags & IOCB_DIRECT)) {
+ /* Don't sleep on inode rwsem */
+ if (!inode_trylock(inode))
+ return -EAGAIN;
+ /*
+ * We will allocate space in case nodatacow is not set,
+ * so bail
+ */
+ if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
+ BTRFS_INODE_PREALLOC)) ||
+ check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) {
+ inode_unlock(inode);
+ return -EAGAIN;
+ }
+ } else
+ inode_lock(inode);
+
err = generic_write_checks(iocb, from);
if (err <= 0) {
inode_unlock(inode);
@@ -1914,8 +1931,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
*/
update_time_for_write(inode);
- pos = iocb->ki_pos;
- count = iov_iter_count(from);
start_pos = round_down(pos, fs_info->sectorsize);
oldsize = i_size_read(inode);
if (start_pos > oldsize) {
@@ -3071,13 +3086,19 @@ out:
return offset;
}
+static int btrfs_file_open(struct inode *inode, struct file *filp)
+{
+ filp->f_mode |= FMODE_AIO_NOWAIT;
+ return generic_file_open(inode, filp);
+}
+
const struct file_operations btrfs_file_operations = {
.llseek = btrfs_file_llseek,
.read_iter = generic_file_read_iter,
.splice_read = generic_file_splice_read,
.write_iter = btrfs_file_write_iter,
.mmap = btrfs_file_mmap,
- .open = generic_file_open,
+ .open = btrfs_file_open,
.release = btrfs_release_file,
.fsync = btrfs_sync_file,
.fallocate = btrfs_fallocate,
diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c
index a97fdc156a03..baacc1866861 100644
--- a/fs/btrfs/hash.c
+++ b/fs/btrfs/hash.c
@@ -38,6 +38,7 @@ u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length)
{
SHASH_DESC_ON_STACK(shash, tfm);
u32 *ctx = (u32 *)shash_desc_ctx(shash);
+ u32 retval;
int err;
shash->tfm = tfm;
@@ -47,5 +48,7 @@ u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length)
err = crypto_shash_update(shash, address, length);
BUG_ON(err);
- return *ctx;
+ retval = *ctx;
+ barrier_data(ctx);
+ return retval;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 17cbe9306faf..556c93060606 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -842,13 +842,12 @@ retry:
NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
PAGE_SET_WRITEBACK);
- ret = btrfs_submit_compressed_write(inode,
+ if (btrfs_submit_compressed_write(inode,
async_extent->start,
async_extent->ram_size,
ins.objectid,
ins.offset, async_extent->pages,
- async_extent->nr_pages);
- if (ret) {
+ async_extent->nr_pages)) {
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
struct page *p = async_extent->pages[0];
const u64 start = async_extent->start;
@@ -1901,11 +1900,11 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
* At IO completion time the cums attached on the ordered extent record
* are inserted into the btree
*/
-static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
+static blk_status_t __btrfs_submit_bio_start(struct inode *inode,
+ struct bio *bio, int mirror_num, unsigned long bio_flags,
+ u64 bio_offset)
{
- int ret = 0;
+ blk_status_t ret = 0;
ret = btrfs_csum_one_bio(inode, bio, 0, 0);
BUG_ON(ret); /* -ENOMEM */
@@ -1920,16 +1919,16 @@ static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
* At IO completion time the cums attached on the ordered extent record
* are inserted into the btree
*/
-static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
+static blk_status_t __btrfs_submit_bio_done(struct inode *inode,
+ struct bio *bio, int mirror_num, unsigned long bio_flags,
+ u64 bio_offset)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- int ret;
+ blk_status_t ret;
ret = btrfs_map_bio(fs_info, bio, mirror_num, 1);
if (ret) {
- bio->bi_error = ret;
+ bio->bi_status = ret;
bio_endio(bio);
}
return ret;
@@ -1939,14 +1938,14 @@ static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio,
* extent_io.c submission hook. This does the right thing for csum calculation
* on write, or reading the csums from the tree before a read
*/
-static int btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
+static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
- int ret = 0;
+ blk_status_t ret = 0;
int skip_sum;
int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
@@ -1991,8 +1990,8 @@ mapit:
ret = btrfs_map_bio(fs_info, bio, mirror_num, 0);
out:
- if (ret < 0) {
- bio->bi_error = ret;
+ if (ret) {
+ bio->bi_status = ret;
bio_endio(bio);
}
return ret;
@@ -2952,7 +2951,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
ret = test_range_bit(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset + ordered_extent->len - 1,
- EXTENT_DEFRAG, 1, cached_state);
+ EXTENT_DEFRAG, 0, cached_state);
if (ret) {
u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
if (0 && last_snapshot >= BTRFS_I(inode)->generation)
@@ -7483,8 +7482,8 @@ bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end)
int found = false;
void **pagep = NULL;
struct page *page = NULL;
- int start_idx;
- int end_idx;
+ unsigned long start_idx;
+ unsigned long end_idx;
start_idx = start >> PAGE_SHIFT;
@@ -8037,7 +8036,7 @@ static void btrfs_retry_endio_nocsum(struct bio *bio)
struct bio_vec *bvec;
int i;
- if (bio->bi_error)
+ if (bio->bi_status)
goto end;
ASSERT(bio->bi_vcnt == 1);
@@ -8116,7 +8115,7 @@ static void btrfs_retry_endio(struct bio *bio)
int ret;
int i;
- if (bio->bi_error)
+ if (bio->bi_status)
goto end;
uptodate = 1;
@@ -8141,8 +8140,8 @@ end:
bio_put(bio);
}
-static int __btrfs_subio_endio_read(struct inode *inode,
- struct btrfs_io_bio *io_bio, int err)
+static blk_status_t __btrfs_subio_endio_read(struct inode *inode,
+ struct btrfs_io_bio *io_bio, blk_status_t err)
{
struct btrfs_fs_info *fs_info;
struct bio_vec *bvec;
@@ -8184,7 +8183,7 @@ try_again:
io_bio->mirror_num,
btrfs_retry_endio, &done);
if (ret) {
- err = ret;
+ err = errno_to_blk_status(ret);
goto next;
}
@@ -8211,8 +8210,8 @@ next:
return err;
}
-static int btrfs_subio_endio_read(struct inode *inode,
- struct btrfs_io_bio *io_bio, int err)
+static blk_status_t btrfs_subio_endio_read(struct inode *inode,
+ struct btrfs_io_bio *io_bio, blk_status_t err)
{
bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
@@ -8232,7 +8231,7 @@ static void btrfs_endio_direct_read(struct bio *bio)
struct inode *inode = dip->inode;
struct bio *dio_bio;
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
- int err = bio->bi_error;
+ blk_status_t err = bio->bi_status;
if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
err = btrfs_subio_endio_read(inode, io_bio, err);
@@ -8243,11 +8242,11 @@ static void btrfs_endio_direct_read(struct bio *bio)
kfree(dip);
- dio_bio->bi_error = bio->bi_error;
- dio_end_io(dio_bio, bio->bi_error);
+ dio_bio->bi_status = bio->bi_status;
+ dio_end_io(dio_bio);
if (io_bio->end_io)
- io_bio->end_io(io_bio, err);
+ io_bio->end_io(io_bio, blk_status_to_errno(err));
bio_put(bio);
}
@@ -8299,20 +8298,20 @@ static void btrfs_endio_direct_write(struct bio *bio)
struct bio *dio_bio = dip->dio_bio;
__endio_write_update_ordered(dip->inode, dip->logical_offset,
- dip->bytes, !bio->bi_error);
+ dip->bytes, !bio->bi_status);
kfree(dip);
- dio_bio->bi_error = bio->bi_error;
- dio_end_io(dio_bio, bio->bi_error);
+ dio_bio->bi_status = bio->bi_status;
+ dio_end_io(dio_bio);
bio_put(bio);
}
-static int __btrfs_submit_bio_start_direct_io(struct inode *inode,
+static blk_status_t __btrfs_submit_bio_start_direct_io(struct inode *inode,
struct bio *bio, int mirror_num,
unsigned long bio_flags, u64 offset)
{
- int ret;
+ blk_status_t ret;
ret = btrfs_csum_one_bio(inode, bio, offset, 1);
BUG_ON(ret); /* -ENOMEM */
return 0;
@@ -8321,7 +8320,7 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode,
static void btrfs_end_dio_bio(struct bio *bio)
{
struct btrfs_dio_private *dip = bio->bi_private;
- int err = bio->bi_error;
+ blk_status_t err = bio->bi_status;
if (err)
btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
@@ -8351,7 +8350,7 @@ static void btrfs_end_dio_bio(struct bio *bio)
if (dip->errors) {
bio_io_error(dip->orig_bio);
} else {
- dip->dio_bio->bi_error = 0;
+ dip->dio_bio->bi_status = 0;
bio_endio(dip->orig_bio);
}
out:
@@ -8368,14 +8367,14 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
return bio;
}
-static inline int btrfs_lookup_and_bind_dio_csum(struct inode *inode,
+static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,
struct btrfs_dio_private *dip,
struct bio *bio,
u64 file_offset)
{
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio);
- int ret;
+ blk_status_t ret;
/*
* We load all the csum data we need when we submit
@@ -8406,7 +8405,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_dio_private *dip = bio->bi_private;
bool write = bio_op(bio) == REQ_OP_WRITE;
- int ret;
+ blk_status_t ret;
if (async_submit)
async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
@@ -8649,7 +8648,7 @@ free_ordered:
* callbacks - they require an allocated dip and a clone of dio_bio.
*/
if (io_bio && dip) {
- io_bio->bi_error = -EIO;
+ io_bio->bi_status = BLK_STS_IOERR;
bio_endio(io_bio);
/*
* The end io callbacks free our dip, do the final put on io_bio
@@ -8668,12 +8667,12 @@ free_ordered:
unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
file_offset + dio_bio->bi_iter.bi_size - 1);
- dio_bio->bi_error = -EIO;
+ dio_bio->bi_status = BLK_STS_IOERR;
/*
* Releases and cleans up our dio_bio, no need to bio_put()
* nor bio_endio()/bio_io_error() against dio_bio.
*/
- dio_end_io(dio_bio, ret);
+ dio_end_io(dio_bio);
}
if (io_bio)
bio_put(io_bio);
@@ -8755,6 +8754,9 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
dio_data.overwrite = 1;
inode_unlock(inode);
relock = true;
+ } else if (iocb->ki_flags & IOCB_NOWAIT) {
+ ret = -EAGAIN;
+ goto out;
}
ret = btrfs_delalloc_reserve_space(inode, offset, count);
if (ret)
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index d8ea0eb76325..f3d30d9ea8f9 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -871,7 +871,7 @@ static void free_raid_bio(struct btrfs_raid_bio *rbio)
* this frees the rbio and runs through all the bios in the
* bio_list and calls end_io on them
*/
-static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
+static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
{
struct bio *cur = bio_list_get(&rbio->bio_list);
struct bio *next;
@@ -884,7 +884,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
while (cur) {
next = cur->bi_next;
cur->bi_next = NULL;
- cur->bi_error = err;
+ cur->bi_status = err;
bio_endio(cur);
cur = next;
}
@@ -897,7 +897,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
static void raid_write_end_io(struct bio *bio)
{
struct btrfs_raid_bio *rbio = bio->bi_private;
- int err = bio->bi_error;
+ blk_status_t err = bio->bi_status;
int max_errors;
if (err)
@@ -914,7 +914,7 @@ static void raid_write_end_io(struct bio *bio)
max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
0 : rbio->bbio->max_errors;
if (atomic_read(&rbio->error) > max_errors)
- err = -EIO;
+ err = BLK_STS_IOERR;
rbio_orig_end_io(rbio, err);
}
@@ -1092,7 +1092,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
* devices or if they are not contiguous
*/
if (last_end == disk_start && stripe->dev->bdev &&
- !last->bi_error &&
+ !last->bi_status &&
last->bi_bdev == stripe->dev->bdev) {
ret = bio_add_page(last, page, PAGE_SIZE, 0);
if (ret == PAGE_SIZE)
@@ -1448,7 +1448,7 @@ static void raid_rmw_end_io(struct bio *bio)
{
struct btrfs_raid_bio *rbio = bio->bi_private;
- if (bio->bi_error)
+ if (bio->bi_status)
fail_bio_stripe(rbio, bio);
else
set_bio_pages_uptodate(bio);
@@ -1991,7 +1991,7 @@ static void raid_recover_end_io(struct bio *bio)
* we only read stripe pages off the disk, set them
* up to date if there were no errors
*/
- if (bio->bi_error)
+ if (bio->bi_status)
fail_bio_stripe(rbio, bio);
else
set_bio_pages_uptodate(bio);
@@ -2530,7 +2530,7 @@ static void raid56_parity_scrub_end_io(struct bio *bio)
{
struct btrfs_raid_bio *rbio = bio->bi_private;
- if (bio->bi_error)
+ if (bio->bi_status)
fail_bio_stripe(rbio, bio);
else
set_bio_pages_uptodate(bio);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index c7b45eb2403d..ba5595d19de1 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -95,7 +95,7 @@ struct scrub_bio {
struct scrub_ctx *sctx;
struct btrfs_device *dev;
struct bio *bio;
- int err;
+ blk_status_t status;
u64 logical;
u64 physical;
#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
@@ -1668,14 +1668,14 @@ leave_nomem:
struct scrub_bio_ret {
struct completion event;
- int error;
+ blk_status_t status;
};
static void scrub_bio_wait_endio(struct bio *bio)
{
struct scrub_bio_ret *ret = bio->bi_private;
- ret->error = bio->bi_error;
+ ret->status = bio->bi_status;
complete(&ret->event);
}
@@ -1693,7 +1693,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
int ret;
init_completion(&done.event);
- done.error = 0;
+ done.status = 0;
bio->bi_iter.bi_sector = page->logical >> 9;
bio->bi_private = &done;
bio->bi_end_io = scrub_bio_wait_endio;
@@ -1705,7 +1705,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
return ret;
wait_for_completion(&done.event);
- if (done.error)
+ if (done.status)
return -EIO;
return 0;
@@ -1937,7 +1937,7 @@ again:
bio->bi_bdev = sbio->dev->bdev;
bio->bi_iter.bi_sector = sbio->physical >> 9;
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
- sbio->err = 0;
+ sbio->status = 0;
} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
spage->physical_for_dev_replace ||
sbio->logical + sbio->page_count * PAGE_SIZE !=
@@ -1992,7 +1992,7 @@ static void scrub_wr_bio_end_io(struct bio *bio)
struct scrub_bio *sbio = bio->bi_private;
struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
- sbio->err = bio->bi_error;
+ sbio->status = bio->bi_status;
sbio->bio = bio;
btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
@@ -2007,7 +2007,7 @@ static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
int i;
WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
- if (sbio->err) {
+ if (sbio->status) {
struct btrfs_dev_replace *dev_replace =
&sbio->sctx->fs_info->dev_replace;
@@ -2341,7 +2341,7 @@ again:
bio->bi_bdev = sbio->dev->bdev;
bio->bi_iter.bi_sector = sbio->physical >> 9;
bio_set_op_attrs(bio, REQ_OP_READ, 0);
- sbio->err = 0;
+ sbio->status = 0;
} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
spage->physical ||
sbio->logical + sbio->page_count * PAGE_SIZE !=
@@ -2377,7 +2377,7 @@ static void scrub_missing_raid56_end_io(struct bio *bio)
struct scrub_block *sblock = bio->bi_private;
struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
- if (bio->bi_error)
+ if (bio->bi_status)
sblock->no_io_error_seen = 0;
bio_put(bio);
@@ -2588,7 +2588,7 @@ static void scrub_bio_end_io(struct bio *bio)
struct scrub_bio *sbio = bio->bi_private;
struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
- sbio->err = bio->bi_error;
+ sbio->status = bio->bi_status;
sbio->bio = bio;
btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
@@ -2601,7 +2601,7 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work)
int i;
BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
- if (sbio->err) {
+ if (sbio->status) {
for (i = 0; i < sbio->page_count; i++) {
struct scrub_page *spage = sbio->pagev[i];
@@ -3004,7 +3004,7 @@ static void scrub_parity_bio_endio(struct bio *bio)
struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
struct btrfs_fs_info *fs_info = sparity->sctx->fs_info;
- if (bio->bi_error)
+ if (bio->bi_status)
bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
sparity->nsectors);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 017b67daa3bb..84a495967e0a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6042,9 +6042,10 @@ static void btrfs_end_bio(struct bio *bio)
struct btrfs_bio *bbio = bio->bi_private;
int is_orig_bio = 0;
- if (bio->bi_error) {
+ if (bio->bi_status) {
atomic_inc(&bbio->error);
- if (bio->bi_error == -EIO || bio->bi_error == -EREMOTEIO) {
+ if (bio->bi_status == BLK_STS_IOERR ||
+ bio->bi_status == BLK_STS_TARGET) {
unsigned int stripe_index =
btrfs_io_bio(bio)->stripe_index;
struct btrfs_device *dev;
@@ -6082,13 +6083,13 @@ static void btrfs_end_bio(struct bio *bio)
* beyond the tolerance of the btrfs bio
*/
if (atomic_read(&bbio->error) > bbio->max_errors) {
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
} else {
/*
* this bio is actually up to date, we didn't
* go over the max number of errors
*/
- bio->bi_error = 0;
+ bio->bi_status = 0;
}
btrfs_end_bbio(bbio, bio);
@@ -6199,7 +6200,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
bio->bi_iter.bi_sector = logical >> 9;
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
btrfs_end_bbio(bbio, bio);
}
}
diff --git a/fs/buffer.c b/fs/buffer.c
index 161be58c5cb0..5c2cba8d2387 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -49,7 +49,7 @@
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
- struct writeback_control *wbc);
+ enum rw_hint hint, struct writeback_control *wbc);
#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
@@ -1829,7 +1829,8 @@ int __block_write_full_page(struct inode *inode, struct page *page,
do {
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
- submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc);
+ submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
+ inode->i_write_hint, wbc);
nr_underway++;
}
bh = next;
@@ -1883,7 +1884,8 @@ recover:
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
clear_buffer_dirty(bh);
- submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc);
+ submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
+ inode->i_write_hint, wbc);
nr_underway++;
}
bh = next;
@@ -3038,7 +3040,7 @@ static void end_bio_bh_io_sync(struct bio *bio)
if (unlikely(bio_flagged(bio, BIO_QUIET)))
set_bit(BH_Quiet, &bh->b_state);
- bh->b_end_io(bh, !bio->bi_error);
+ bh->b_end_io(bh, !bio->bi_status);
bio_put(bio);
}
@@ -3091,7 +3093,7 @@ void guard_bio_eod(int op, struct bio *bio)
}
static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
- struct writeback_control *wbc)
+ enum rw_hint write_hint, struct writeback_control *wbc)
{
struct bio *bio;
@@ -3120,6 +3122,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_bdev = bh->b_bdev;
+ bio->bi_write_hint = write_hint;
bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
BUG_ON(bio->bi_iter.bi_size != bh->b_size);
@@ -3142,7 +3145,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
int submit_bh(int op, int op_flags, struct buffer_head *bh)
{
- return submit_bh_wbc(op, op_flags, bh, NULL);
+ return submit_bh_wbc(op, op_flags, bh, 0, NULL);
}
EXPORT_SYMBOL(submit_bh);
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 9bf90bcc56ac..bb3a02ca9da4 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -18,7 +18,7 @@
#include <linux/fscache-cache.h>
#include <linux/timer.h>
-#include <linux/wait.h>
+#include <linux/wait_bit.h>
#include <linux/cred.h>
#include <linux/workqueue.h>
#include <linux/security.h>
@@ -97,7 +97,7 @@ struct cachefiles_cache {
* backing file read tracking
*/
struct cachefiles_one_read {
- wait_queue_t monitor; /* link into monitored waitqueue */
+ wait_queue_entry_t monitor; /* link into monitored waitqueue */
struct page *back_page; /* backing file page we're waiting for */
struct page *netfs_page; /* netfs page we're going to fill */
struct fscache_retrieval *op; /* retrieval op covering this */
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 41df8a27d7eb..3978b324cbca 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -204,7 +204,7 @@ wait_for_old_object:
wait_queue_head_t *wq;
signed long timeout = 60 * HZ;
- wait_queue_t wait;
+ wait_queue_entry_t wait;
bool requeue;
/* if the object we're waiting for is queued for processing,
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index afbdc418966d..18d7aa61ef0f 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -21,7 +21,7 @@
* - we use this to detect read completion of backing pages
* - the caller holds the waitqueue lock
*/
-static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
+static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode,
int sync, void *_key)
{
struct cachefiles_one_read *monitor =
@@ -48,7 +48,7 @@ static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
}
/* remove from the waitqueue */
- list_del(&wait->task_list);
+ list_del(&wait->entry);
/* move onto the action list and queue for FS-Cache thread pool */
ASSERT(monitor->op);
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 987044bca1c2..59cb307b15fb 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -131,6 +131,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
}
if (new_mode != old_mode) {
+ newattrs.ia_ctime = current_time(inode);
newattrs.ia_mode = new_mode;
newattrs.ia_valid = ATTR_MODE;
ret = __ceph_setattr(inode, &newattrs);
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index e8f11fa565c5..7df550c13d7f 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -91,6 +91,10 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
ceph_mdsc_put_request(req);
if (!inode)
return ERR_PTR(-ESTALE);
+ if (inode->i_nlink == 0) {
+ iput(inode);
+ return ERR_PTR(-ESTALE);
+ }
}
return d_obtain_alias(inode);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index dcce79b84406..4de6cdddf059 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -2022,7 +2022,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
attr->ia_size > inode->i_size) {
i_size_write(inode, attr->ia_size);
inode->i_blocks = calc_inode_blocks(attr->ia_size);
- inode->i_ctime = attr->ia_ctime;
ci->i_reported_size = attr->ia_size;
dirtied |= CEPH_CAP_FILE_EXCL;
} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
@@ -2044,7 +2043,6 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec,
only ? "ctime only" : "ignored");
- inode->i_ctime = attr->ia_ctime;
if (only) {
/*
* if kernel wants to dirty ctime but nothing else,
@@ -2067,7 +2065,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
if (dirtied) {
inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied,
&prealloc_cf);
- inode->i_ctime = current_time(inode);
+ inode->i_ctime = attr->ia_ctime;
}
release &= issued;
@@ -2085,6 +2083,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
req->r_inode_drop = release;
req->r_args.setattr.mask = cpu_to_le32(mask);
req->r_num_caps = 1;
+ req->r_stamp = attr->ia_ctime;
err = ceph_mdsc_do_request(mdsc, NULL, req);
}
dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f38e56fa9712..0c05df44cc6c 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1687,7 +1687,6 @@ struct ceph_mds_request *
ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
{
struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS);
- struct timespec ts;
if (!req)
return ERR_PTR(-ENOMEM);
@@ -1706,8 +1705,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
init_completion(&req->r_safe_completion);
INIT_LIST_HEAD(&req->r_unsafe_item);
- ktime_get_real_ts(&ts);
- req->r_stamp = timespec_trunc(ts, mdsc->fsc->sb->s_time_gran);
+ req->r_stamp = timespec_trunc(current_kernel_time(), mdsc->fsc->sb->s_time_gran);
req->r_op = op;
req->r_direct_mode = mode;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0fd081bd2a2f..fcef70602b27 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3271,7 +3271,7 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
if (!is_sync_kiocb(iocb))
ctx->iocb = iocb;
- if (to->type & ITER_IOVEC)
+ if (to->type == ITER_IOVEC)
ctx->should_dirty = true;
rc = setup_aio_ctx_iter(ctx, to, READ);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 4d1fcd76d022..a8693632235f 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -24,6 +24,7 @@
#include <linux/pagemap.h>
#include <linux/freezer.h>
#include <linux/sched/signal.h>
+#include <linux/wait_bit.h>
#include <asm/div64.h>
#include "cifsfs.h"
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index b08531977daa..3b147dc6af63 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -810,7 +810,7 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw)
if (!pages) {
pages = vmalloc(max_pages * sizeof(struct page *));
- if (!bv) {
+ if (!pages) {
kvfree(bv);
return -ENOMEM;
}
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 27bc360c7ffd..a723df3e0197 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -849,8 +849,13 @@ cifs_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid *fid, __u16 search_flags,
struct cifs_search_info *srch_inf)
{
- return CIFSFindFirst(xid, tcon, path, cifs_sb,
- &fid->netfid, search_flags, srch_inf, true);
+ int rc;
+
+ rc = CIFSFindFirst(xid, tcon, path, cifs_sb,
+ &fid->netfid, search_flags, srch_inf, true);
+ if (rc)
+ cifs_dbg(FYI, "find first failed=%d\n", rc);
+ return rc;
}
static int
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index c58691834eb2..7e48561abd29 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -982,7 +982,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
kfree(utf16_path);
if (rc) {
- cifs_dbg(VFS, "open dir failed\n");
+ cifs_dbg(FYI, "open dir failed rc=%d\n", rc);
return rc;
}
@@ -992,7 +992,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_query_directory(xid, tcon, fid->persistent_fid,
fid->volatile_fid, 0, srch_inf);
if (rc) {
- cifs_dbg(VFS, "query directory failed\n");
+ cifs_dbg(FYI, "query directory failed rc=%d\n", rc);
SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
}
return rc;
@@ -1809,7 +1809,8 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
sg = init_sg(rqst, sign);
if (!sg) {
- cifs_dbg(VFS, "%s: Failed to init sg %d", __func__, rc);
+ cifs_dbg(VFS, "%s: Failed to init sg", __func__);
+ rc = -ENOMEM;
goto free_req;
}
@@ -1817,6 +1818,7 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
iv = kzalloc(iv_len, GFP_KERNEL);
if (!iv) {
cifs_dbg(VFS, "%s: Failed to alloc IV", __func__);
+ rc = -ENOMEM;
goto free_sg;
}
iv[0] = 3;
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 3cb5c9e2d4e7..de50e749ff05 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -188,8 +188,6 @@ static int cifs_creation_time_get(struct dentry *dentry, struct inode *inode,
pcreatetime = (__u64 *)value;
*pcreatetime = CIFS_I(inode)->createtime;
return sizeof(__u64);
-
- return rc;
}
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 6116d5275a3e..112b3e1e20e3 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -866,8 +866,6 @@ COMPATIBLE_IOCTL(TIOCGDEV)
COMPATIBLE_IOCTL(TIOCCBRK)
COMPATIBLE_IOCTL(TIOCGSID)
COMPATIBLE_IOCTL(TIOCGICOUNT)
-COMPATIBLE_IOCTL(TIOCGPKT)
-COMPATIBLE_IOCTL(TIOCGPTLCK)
COMPATIBLE_IOCTL(TIOCGEXCL)
/* Little t */
COMPATIBLE_IOCTL(TIOCGETD)
@@ -883,16 +881,12 @@ COMPATIBLE_IOCTL(TIOCMGET)
COMPATIBLE_IOCTL(TIOCMBIC)
COMPATIBLE_IOCTL(TIOCMBIS)
COMPATIBLE_IOCTL(TIOCMSET)
-COMPATIBLE_IOCTL(TIOCPKT)
COMPATIBLE_IOCTL(TIOCNOTTY)
COMPATIBLE_IOCTL(TIOCSTI)
COMPATIBLE_IOCTL(TIOCOUTQ)
COMPATIBLE_IOCTL(TIOCSPGRP)
COMPATIBLE_IOCTL(TIOCGPGRP)
-COMPATIBLE_IOCTL(TIOCGPTN)
-COMPATIBLE_IOCTL(TIOCSPTLCK)
COMPATIBLE_IOCTL(TIOCSERGETLSR)
-COMPATIBLE_IOCTL(TIOCSIG)
#ifdef TIOCSRS485
COMPATIBLE_IOCTL(TIOCSRS485)
#endif
diff --git a/fs/configfs/item.c b/fs/configfs/item.c
index 8b2a994042dd..a66f6624d899 100644
--- a/fs/configfs/item.c
+++ b/fs/configfs/item.c
@@ -138,6 +138,14 @@ struct config_item *config_item_get(struct config_item *item)
}
EXPORT_SYMBOL(config_item_get);
+struct config_item *config_item_get_unless_zero(struct config_item *item)
+{
+ if (item && kref_get_unless_zero(&item->ci_kref))
+ return item;
+ return NULL;
+}
+EXPORT_SYMBOL(config_item_get_unless_zero);
+
static void config_item_cleanup(struct config_item *item)
{
struct config_item_type *t = item->ci_type;
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index a6ab012a2c6a..c8aabba502f6 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -83,14 +83,13 @@ static int create_link(struct config_item *parent_item,
ret = -ENOMEM;
sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL);
if (sl) {
- sl->sl_target = config_item_get(item);
spin_lock(&configfs_dirent_lock);
if (target_sd->s_type & CONFIGFS_USET_DROPPING) {
spin_unlock(&configfs_dirent_lock);
- config_item_put(item);
kfree(sl);
return -ENOENT;
}
+ sl->sl_target = config_item_get(item);
list_add(&sl->sl_list, &target_sd->s_links);
spin_unlock(&configfs_dirent_lock);
ret = configfs_create_link(sl, parent_item->ci_dentry,
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index a409a84f1bca..6181e9526860 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -129,7 +129,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
goto errout;
}
err = submit_bio_wait(bio);
- if ((err == 0) && bio->bi_error)
+ if (err == 0 && bio->bi_status)
err = -EIO;
bio_put(bio);
if (err)
diff --git a/fs/dax.c b/fs/dax.c
index c22eaf162f95..33d05aa02aad 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -84,7 +84,7 @@ struct exceptional_entry_key {
};
struct wait_exceptional_entry_queue {
- wait_queue_t wait;
+ wait_queue_entry_t wait;
struct exceptional_entry_key key;
};
@@ -108,7 +108,7 @@ static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
return wait_table + hash;
}
-static int wake_exceptional_entry_func(wait_queue_t *wait, unsigned int mode,
+static int wake_exceptional_entry_func(wait_queue_entry_t *wait, unsigned int mode,
int sync, void *keyp)
{
struct exceptional_entry_key *key = keyp;
@@ -859,6 +859,7 @@ int dax_writeback_mapping_range(struct address_space *mapping,
if (ret < 0)
goto out;
}
+ start_index = indices[pvec.nr - 1] + 1;
}
out:
put_dax(dax_dev);
@@ -1155,6 +1156,17 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
}
/*
+ * It is possible, particularly with mixed reads & writes to private
+ * mappings, that we have raced with a PMD fault that overlaps with
+ * the PTE we need to set up. If so just return and the fault will be
+ * retried.
+ */
+ if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) {
+ vmf_ret = VM_FAULT_NOPAGE;
+ goto unlock_entry;
+ }
+
+ /*
* Note that we don't bother to use iomap_apply here: DAX required
* the file system block size to be equal the page size, which means
* that we never have to deal with more than a single extent here.
@@ -1398,6 +1410,18 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
goto fallback;
/*
+ * It is possible, particularly with mixed reads & writes to private
+ * mappings, that we have raced with a PTE fault that overlaps with
+ * the PMD we need to set up. If so just return and the fault will be
+ * retried.
+ */
+ if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd) &&
+ !pmd_devmap(*vmf->pmd)) {
+ result = 0;
+ goto unlock_entry;
+ }
+
+ /*
* Note that we don't use iomap_apply here. We aren't doing I/O, only
* setting up a mapping, so really we're using iomap_begin() as a way
* to look up our filesystem block.
diff --git a/fs/dcache.c b/fs/dcache.c
index cddf39777835..a9f995f6859e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1494,7 +1494,7 @@ static void check_and_drop(void *_data)
{
struct detach_data *data = _data;
- if (!data->mountpoint && !data->select.found)
+ if (!data->mountpoint && list_empty(&data->select.dispose))
__d_drop(data->select.start);
}
@@ -1536,17 +1536,15 @@ void d_invalidate(struct dentry *dentry)
d_walk(dentry, &data, detach_and_collect, check_and_drop);
- if (data.select.found)
+ if (!list_empty(&data.select.dispose))
shrink_dentry_list(&data.select.dispose);
+ else if (!data.mountpoint)
+ return;
if (data.mountpoint) {
detach_mounts(data.mountpoint);
dput(data.mountpoint);
}
-
- if (!data.mountpoint && !data.select.found)
- break;
-
cond_resched();
}
}
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 354e2ab62031..6dabc4a10396 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -9,7 +9,7 @@
* 2 as published by the Free Software Foundation.
*
* debugfs is for people to use instead of /proc or /sys.
- * See Documentation/DocBook/filesystems for more details.
+ * See Documentation/filesystems/ for more details.
*
*/
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index e892ae7d89f8..77440e4aa9d4 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -9,7 +9,7 @@
* 2 as published by the Free Software Foundation.
*
* debugfs is for people to use instead of /proc or /sys.
- * See Documentation/DocBook/kernel-api for more details.
+ * See ./Documentation/core-api/kernel-api.rst for more details.
*
*/
diff --git a/fs/direct-io.c b/fs/direct-io.c
index a04ebea77de8..08cf27811e5a 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -294,7 +294,7 @@ static void dio_aio_complete_work(struct work_struct *work)
dio_complete(dio, 0, true);
}
-static int dio_bio_complete(struct dio *dio, struct bio *bio);
+static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio);
/*
* Asynchronous IO callback.
@@ -348,13 +348,12 @@ static void dio_bio_end_io(struct bio *bio)
/**
* dio_end_io - handle the end io action for the given bio
* @bio: The direct io bio thats being completed
- * @error: Error if there was one
*
* This is meant to be called by any filesystem that uses their own dio_submit_t
* so that the DIO specific endio actions are dealt with after the filesystem
* has done it's completion work.
*/
-void dio_end_io(struct bio *bio, int error)
+void dio_end_io(struct bio *bio)
{
struct dio *dio = bio->bi_private;
@@ -386,6 +385,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
else
bio->bi_end_io = dio_bio_end_io;
+ bio->bi_write_hint = dio->iocb->ki_hint;
+
sdio->bio = bio;
sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
}
@@ -474,17 +475,20 @@ static struct bio *dio_await_one(struct dio *dio)
/*
* Process one completed BIO. No locks are held.
*/
-static int dio_bio_complete(struct dio *dio, struct bio *bio)
+static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio)
{
struct bio_vec *bvec;
unsigned i;
- int err;
+ blk_status_t err = bio->bi_status;
- if (bio->bi_error)
- dio->io_error = -EIO;
+ if (err) {
+ if (err == BLK_STS_AGAIN && (bio->bi_opf & REQ_NOWAIT))
+ dio->io_error = -EAGAIN;
+ else
+ dio->io_error = -EIO;
+ }
if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) {
- err = bio->bi_error;
bio_check_pages_dirty(bio); /* transfers ownership */
} else {
bio_for_each_segment_all(bvec, bio, i) {
@@ -495,7 +499,6 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
set_page_dirty_lock(page);
put_page(page);
}
- err = bio->bi_error;
bio_put(bio);
}
return err;
@@ -539,7 +542,7 @@ static inline int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
bio = dio->bio_list;
dio->bio_list = bio->bi_private;
spin_unlock_irqrestore(&dio->bio_lock, flags);
- ret2 = dio_bio_complete(dio, bio);
+ ret2 = blk_status_to_errno(dio_bio_complete(dio, bio));
if (ret == 0)
ret = ret2;
}
@@ -1197,6 +1200,8 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
if (iov_iter_rw(iter) == WRITE) {
dio->op = REQ_OP_WRITE;
dio->op_flags = REQ_SYNC | REQ_IDLE;
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ dio->op_flags |= REQ_NOWAIT;
} else {
dio->op = REQ_OP_READ;
}
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 68b9fffcb2c8..2fb4eadaa118 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -191,7 +191,7 @@ static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
* This is used to atomically remove a wait queue entry from the eventfd wait
* queue head, and read/reset the counter value.
*/
-int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
+int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
__u64 *cnt)
{
unsigned long flags;
@@ -215,8 +215,8 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
*
* Returns %0 if successful, or the following error codes:
*
- * -EAGAIN : The operation would have blocked but @no_wait was non-zero.
- * -ERESTARTSYS : A signal interrupted the wait operation.
+ * - -EAGAIN : The operation would have blocked but @no_wait was non-zero.
+ * - -ERESTARTSYS : A signal interrupted the wait operation.
*
* If @no_wait is zero, the function might sleep until the eventfd internal
* counter becomes greater than zero.
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 5420767c9b68..b1c8e23ddf65 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -244,7 +244,7 @@ struct eppoll_entry {
* Wait queue item that will be linked to the target file wait
* queue head.
*/
- wait_queue_t wait;
+ wait_queue_entry_t wait;
/* The wait queue head that linked the "wait" wait queue item */
wait_queue_head_t *whead;
@@ -347,13 +347,13 @@ static inline int ep_is_linked(struct list_head *p)
return !list_empty(p);
}
-static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_t *p)
+static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_entry_t *p)
{
return container_of(p, struct eppoll_entry, wait);
}
/* Get the "struct epitem" from a wait queue pointer */
-static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
+static inline struct epitem *ep_item_from_wait(wait_queue_entry_t *p)
{
return container_of(p, struct eppoll_entry, wait)->base;
}
@@ -1078,7 +1078,7 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
* mechanism. It is called by the stored file descriptors when they
* have events to report.
*/
-static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
int pwake = 0;
unsigned long flags;
@@ -1094,7 +1094,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
* can't use __remove_wait_queue(). whead->lock is held by
* the caller.
*/
- list_del_init(&wait->task_list);
+ list_del_init(&wait->entry);
}
spin_lock_irqsave(&ep->lock, flags);
@@ -1699,7 +1699,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
int res = 0, eavail, timed_out = 0;
unsigned long flags;
u64 slack = 0;
- wait_queue_t wait;
+ wait_queue_entry_t wait;
ktime_t expires, *to = NULL;
if (timeout > 0) {
diff --git a/fs/exec.c b/fs/exec.c
index 72934df68471..904199086490 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -220,8 +220,26 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
if (write) {
unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
+ unsigned long ptr_size;
struct rlimit *rlim;
+ /*
+ * Since the stack will hold pointers to the strings, we
+ * must account for them as well.
+ *
+ * The size calculation is the entire vma while each arg page is
+ * built, so each time we get here it's calculating how far it
+ * is currently (rather than each call being just the newly
+ * added size from the arg page). As a result, we need to
+ * always add the entire size of the pointers, so that on the
+ * last call to get_arg_page() we'll actually have the entire
+ * correct size.
+ */
+ ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
+ if (ptr_size > ULONG_MAX - size)
+ goto fail;
+ size += ptr_size;
+
acct_arg_size(bprm, size / PAGE_SIZE);
/*
@@ -239,13 +257,15 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
* to work from.
*/
rlim = current->signal->rlim;
- if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) {
- put_page(page);
- return NULL;
- }
+ if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4)
+ goto fail;
}
return page;
+
+fail:
+ put_page(page);
+ return NULL;
}
static void put_arg_page(struct page *page)
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index fd389935ecd1..3ec0e46de95f 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -4,6 +4,7 @@
* Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
*/
+#include <linux/quotaops.h>
#include "ext4_jbd2.h"
#include "ext4.h"
#include "xattr.h"
@@ -232,6 +233,9 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type)
handle_t *handle;
int error, retries = 0;
+ error = dquot_initialize(inode);
+ if (error)
+ return error;
retry:
handle = ext4_journal_start(inode, EXT4_HT_XATTR,
ext4_jbd2_credits_xattr(inode));
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8e8046104f4d..32191548abed 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2523,7 +2523,6 @@ extern int ext4_search_dir(struct buffer_head *bh,
int buf_size,
struct inode *dir,
struct ext4_filename *fname,
- const struct qstr *d_name,
unsigned int offset,
struct ext4_dir_entry_2 **res_dir);
extern int ext4_generic_delete_entry(handle_t *handle,
@@ -3007,7 +3006,6 @@ extern int htree_inlinedir_to_tree(struct file *dir_file,
int *has_inline_data);
extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
struct ext4_filename *fname,
- const struct qstr *d_name,
struct ext4_dir_entry_2 **res_dir,
int *has_inline_data);
extern int ext4_delete_inline_entry(handle_t *handle,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2a97dff87b96..3e36508610b7 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3413,13 +3413,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
struct ext4_sb_info *sbi;
struct ext4_extent_header *eh;
struct ext4_map_blocks split_map;
- struct ext4_extent zero_ex;
+ struct ext4_extent zero_ex1, zero_ex2;
struct ext4_extent *ex, *abut_ex;
ext4_lblk_t ee_block, eof_block;
unsigned int ee_len, depth, map_len = map->m_len;
int allocated = 0, max_zeroout = 0;
int err = 0;
- int split_flag = 0;
+ int split_flag = EXT4_EXT_DATA_VALID2;
ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
"block %llu, max_blocks %u\n", inode->i_ino,
@@ -3436,7 +3436,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ex = path[depth].p_ext;
ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex);
- zero_ex.ee_len = 0;
+ zero_ex1.ee_len = 0;
+ zero_ex2.ee_len = 0;
trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
@@ -3576,62 +3577,52 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
if (ext4_encrypted_inode(inode))
max_zeroout = 0;
- /* If extent is less than s_max_zeroout_kb, zeroout directly */
- if (max_zeroout && (ee_len <= max_zeroout)) {
- err = ext4_ext_zeroout(inode, ex);
- if (err)
- goto out;
- zero_ex.ee_block = ex->ee_block;
- zero_ex.ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex));
- ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex));
-
- err = ext4_ext_get_access(handle, inode, path + depth);
- if (err)
- goto out;
- ext4_ext_mark_initialized(ex);
- ext4_ext_try_to_merge(handle, inode, path, ex);
- err = ext4_ext_dirty(handle, inode, path + path->p_depth);
- goto out;
- }
-
/*
- * four cases:
+ * five cases:
* 1. split the extent into three extents.
- * 2. split the extent into two extents, zeroout the first half.
- * 3. split the extent into two extents, zeroout the second half.
+ * 2. split the extent into two extents, zeroout the head of the first
+ * extent.
+ * 3. split the extent into two extents, zeroout the tail of the second
+ * extent.
* 4. split the extent into two extents with out zeroout.
+ * 5. no splitting needed, just possibly zeroout the head and / or the
+ * tail of the extent.
*/
split_map.m_lblk = map->m_lblk;
split_map.m_len = map->m_len;
- if (max_zeroout && (allocated > map->m_len)) {
+ if (max_zeroout && (allocated > split_map.m_len)) {
if (allocated <= max_zeroout) {
- /* case 3 */
- zero_ex.ee_block =
- cpu_to_le32(map->m_lblk);
- zero_ex.ee_len = cpu_to_le16(allocated);
- ext4_ext_store_pblock(&zero_ex,
- ext4_ext_pblock(ex) + map->m_lblk - ee_block);
- err = ext4_ext_zeroout(inode, &zero_ex);
+ /* case 3 or 5 */
+ zero_ex1.ee_block =
+ cpu_to_le32(split_map.m_lblk +
+ split_map.m_len);
+ zero_ex1.ee_len =
+ cpu_to_le16(allocated - split_map.m_len);
+ ext4_ext_store_pblock(&zero_ex1,
+ ext4_ext_pblock(ex) + split_map.m_lblk +
+ split_map.m_len - ee_block);
+ err = ext4_ext_zeroout(inode, &zero_ex1);
if (err)
goto out;
- split_map.m_lblk = map->m_lblk;
split_map.m_len = allocated;
- } else if (map->m_lblk - ee_block + map->m_len < max_zeroout) {
- /* case 2 */
- if (map->m_lblk != ee_block) {
- zero_ex.ee_block = ex->ee_block;
- zero_ex.ee_len = cpu_to_le16(map->m_lblk -
+ }
+ if (split_map.m_lblk - ee_block + split_map.m_len <
+ max_zeroout) {
+ /* case 2 or 5 */
+ if (split_map.m_lblk != ee_block) {
+ zero_ex2.ee_block = ex->ee_block;
+ zero_ex2.ee_len = cpu_to_le16(split_map.m_lblk -
ee_block);
- ext4_ext_store_pblock(&zero_ex,
+ ext4_ext_store_pblock(&zero_ex2,
ext4_ext_pblock(ex));
- err = ext4_ext_zeroout(inode, &zero_ex);
+ err = ext4_ext_zeroout(inode, &zero_ex2);
if (err)
goto out;
}
+ split_map.m_len += split_map.m_lblk - ee_block;
split_map.m_lblk = ee_block;
- split_map.m_len = map->m_lblk - ee_block + map->m_len;
allocated = map->m_len;
}
}
@@ -3642,8 +3633,11 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
err = 0;
out:
/* If we have gotten a failure, don't zero out status tree */
- if (!err)
- err = ext4_zeroout_es(inode, &zero_ex);
+ if (!err) {
+ err = ext4_zeroout_es(inode, &zero_ex1);
+ if (!err)
+ err = ext4_zeroout_es(inode, &zero_ex2);
+ }
return err ? err : allocated;
}
@@ -4883,6 +4877,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
/* Zero out partial block at the edges of the range */
ret = ext4_zero_partial_blocks(handle, inode, offset, len);
+ if (ret >= 0)
+ ext4_update_inode_fsync_trans(handle, inode, 1);
if (file->f_flags & O_SYNC)
ext4_handle_sync(handle);
@@ -5569,6 +5565,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
ext4_handle_sync(handle);
inode->i_mtime = inode->i_ctime = current_time(inode);
ext4_mark_inode_dirty(handle, inode);
+ ext4_update_inode_fsync_trans(handle, inode, 1);
out_stop:
ext4_journal_stop(handle);
@@ -5742,6 +5739,8 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
up_write(&EXT4_I(inode)->i_data_sem);
if (IS_SYNC(inode))
ext4_handle_sync(handle);
+ if (ret >= 0)
+ ext4_update_inode_fsync_trans(handle, inode, 1);
out_stop:
ext4_journal_stop(handle);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 831fd6beebf0..58e2eeaa0bc4 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -37,7 +37,11 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;
- inode_lock_shared(inode);
+ if (!inode_trylock_shared(inode)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ inode_lock_shared(inode);
+ }
/*
* Recheck under inode lock - at this point we are sure it cannot
* change anymore
@@ -179,7 +183,11 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;
- inode_lock(inode);
+ if (!inode_trylock(inode)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ inode_lock(inode);
+ }
ret = ext4_write_checks(iocb, from);
if (ret <= 0)
goto out;
@@ -216,7 +224,12 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
return ext4_dax_write_iter(iocb, from);
#endif
- inode_lock(inode);
+ if (!inode_trylock(inode)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ inode_lock(inode);
+ }
+
ret = ext4_write_checks(iocb, from);
if (ret <= 0)
goto out;
@@ -235,9 +248,15 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
iocb->private = &overwrite;
/* Check whether we do a DIO overwrite or not */
- if (o_direct && ext4_should_dioread_nolock(inode) && !unaligned_aio &&
- ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from)))
- overwrite = 1;
+ if (o_direct && !unaligned_aio) {
+ if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) {
+ if (ext4_should_dioread_nolock(inode))
+ overwrite = 1;
+ } else if (iocb->ki_flags & IOCB_NOWAIT) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ }
ret = __generic_file_write_iter(iocb, from);
inode_unlock(inode);
@@ -435,6 +454,10 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
if (ret < 0)
return ret;
}
+
+ /* Set the flags to support nowait AIO */
+ filp->f_mode |= FMODE_AIO_NOWAIT;
+
return dquot_file_open(inode, filp);
}
@@ -474,57 +497,37 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
endoff = (loff_t)end_blk << blkbits;
index = startoff >> PAGE_SHIFT;
- end = endoff >> PAGE_SHIFT;
+ end = (endoff - 1) >> PAGE_SHIFT;
pagevec_init(&pvec, 0);
do {
int i, num;
unsigned long nr_pages;
- num = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
+ num = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1;
nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
(pgoff_t)num);
- if (nr_pages == 0) {
- if (whence == SEEK_DATA)
- break;
-
- BUG_ON(whence != SEEK_HOLE);
- /*
- * If this is the first time to go into the loop and
- * offset is not beyond the end offset, it will be a
- * hole at this offset
- */
- if (lastoff == startoff || lastoff < endoff)
- found = 1;
+ if (nr_pages == 0)
break;
- }
-
- /*
- * If this is the first time to go into the loop and
- * offset is smaller than the first page offset, it will be a
- * hole at this offset.
- */
- if (lastoff == startoff && whence == SEEK_HOLE &&
- lastoff < page_offset(pvec.pages[0])) {
- found = 1;
- break;
- }
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
struct buffer_head *bh, *head;
/*
- * If the current offset is not beyond the end of given
- * range, it will be a hole.
+ * If current offset is smaller than the page offset,
+ * there is a hole at this offset.
*/
- if (lastoff < endoff && whence == SEEK_HOLE &&
- page->index > end) {
+ if (whence == SEEK_HOLE && lastoff < endoff &&
+ lastoff < page_offset(pvec.pages[i])) {
found = 1;
*offset = lastoff;
goto out;
}
+ if (page->index > end)
+ goto out;
+
lock_page(page);
if (unlikely(page->mapping != inode->i_mapping)) {
@@ -564,20 +567,18 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
unlock_page(page);
}
- /*
- * The no. of pages is less than our desired, that would be a
- * hole in there.
- */
- if (nr_pages < num && whence == SEEK_HOLE) {
- found = 1;
- *offset = lastoff;
+ /* The no. of pages is less than our desired, we are done. */
+ if (nr_pages < num)
break;
- }
index = pvec.pages[i - 1]->index + 1;
pagevec_release(&pvec);
} while (index <= end);
+ if (whence == SEEK_HOLE && lastoff < endoff) {
+ found = 1;
+ *offset = lastoff;
+ }
out:
pagevec_release(&pvec);
return found;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index d5dea4c293ef..8d141c0c8ff9 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1627,7 +1627,6 @@ out:
struct buffer_head *ext4_find_inline_entry(struct inode *dir,
struct ext4_filename *fname,
- const struct qstr *d_name,
struct ext4_dir_entry_2 **res_dir,
int *has_inline_data)
{
@@ -1649,7 +1648,7 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir,
EXT4_INLINE_DOTDOT_SIZE;
inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
ret = ext4_search_dir(iloc.bh, inline_start, inline_size,
- dir, fname, d_name, 0, res_dir);
+ dir, fname, 0, res_dir);
if (ret == 1)
goto out_find;
if (ret < 0)
@@ -1662,7 +1661,7 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir,
inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE;
ret = ext4_search_dir(iloc.bh, inline_start, inline_size,
- dir, fname, d_name, 0, res_dir);
+ dir, fname, 0, res_dir);
if (ret == 1)
goto out_find;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 1bd0bfa547f6..5cf82d03968c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2124,15 +2124,29 @@ static int ext4_writepage(struct page *page,
static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
{
int len;
- loff_t size = i_size_read(mpd->inode);
+ loff_t size;
int err;
BUG_ON(page->index != mpd->first_page);
+ clear_page_dirty_for_io(page);
+ /*
+ * We have to be very careful here! Nothing protects writeback path
+ * against i_size changes and the page can be writeably mapped into
+ * page tables. So an application can be growing i_size and writing
+ * data through mmap while writeback runs. clear_page_dirty_for_io()
+ * write-protects our page in page tables and the page cannot get
+ * written to again until we release page lock. So only after
+ * clear_page_dirty_for_io() we are safe to sample i_size for
+ * ext4_bio_write_page() to zero-out tail of the written page. We rely
+ * on the barrier provided by TestClearPageDirty in
+ * clear_page_dirty_for_io() to make sure i_size is really sampled only
+ * after page tables are updated.
+ */
+ size = i_size_read(mpd->inode);
if (page->index == size >> PAGE_SHIFT)
len = size & ~PAGE_MASK;
else
len = PAGE_SIZE;
- clear_page_dirty_for_io(page);
err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);
if (!err)
mpd->wbc->nr_to_write--;
@@ -3629,9 +3643,6 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
get_block_func = ext4_dio_get_block_unwritten_async;
dio_flags = DIO_LOCKING;
}
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
- BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
-#endif
ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
get_block_func, ext4_end_io_dio, NULL,
dio_flags);
@@ -3713,7 +3724,7 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
*/
inode_lock_shared(inode);
ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
- iocb->ki_pos + count);
+ iocb->ki_pos + count - 1);
if (ret)
goto out_unlock;
ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
@@ -4207,6 +4218,8 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
inode->i_mtime = inode->i_ctime = current_time(inode);
ext4_mark_inode_dirty(handle, inode);
+ if (ret >= 0)
+ ext4_update_inode_fsync_trans(handle, inode, 1);
out_stop:
ext4_journal_stop(handle);
out_dio:
@@ -5637,8 +5650,9 @@ static int ext4_expand_extra_isize(struct inode *inode,
/* No extended attributes present */
if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
- memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0,
- new_extra_isize);
+ memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE +
+ EXT4_I(inode)->i_extra_isize, 0,
+ new_extra_isize - EXT4_I(inode)->i_extra_isize);
EXT4_I(inode)->i_extra_isize = new_extra_isize;
return 0;
}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 5083bce20ac4..b7928cddd539 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3887,7 +3887,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
err = ext4_mb_load_buddy(sb, group, &e4b);
if (err) {
- ext4_error(sb, "Error loading buddy information for %u", group);
+ ext4_warning(sb, "Error %d loading buddy information for %u",
+ err, group);
put_bh(bitmap_bh);
return 0;
}
@@ -4044,10 +4045,11 @@ repeat:
BUG_ON(pa->pa_type != MB_INODE_PA);
group = ext4_get_group_number(sb, pa->pa_pstart);
- err = ext4_mb_load_buddy(sb, group, &e4b);
+ err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
+ GFP_NOFS|__GFP_NOFAIL);
if (err) {
- ext4_error(sb, "Error loading buddy information for %u",
- group);
+ ext4_error(sb, "Error %d loading buddy information for %u",
+ err, group);
continue;
}
@@ -4303,11 +4305,14 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
spin_unlock(&lg->lg_prealloc_lock);
list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
+ int err;
group = ext4_get_group_number(sb, pa->pa_pstart);
- if (ext4_mb_load_buddy(sb, group, &e4b)) {
- ext4_error(sb, "Error loading buddy information for %u",
- group);
+ err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
+ GFP_NOFS|__GFP_NOFAIL);
+ if (err) {
+ ext4_error(sb, "Error %d loading buddy information for %u",
+ err, group);
continue;
}
ext4_lock_group(sb, group);
@@ -5127,8 +5132,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
ret = ext4_mb_load_buddy(sb, group, &e4b);
if (ret) {
- ext4_error(sb, "Error in loading buddy "
- "information for %u", group);
+ ext4_warning(sb, "Error %d loading buddy information for %u",
+ ret, group);
return ret;
}
bitmap = e4b.bd_bitmap;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index b81f7d46f344..404256caf9cf 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1155,12 +1155,11 @@ errout:
static inline int search_dirblock(struct buffer_head *bh,
struct inode *dir,
struct ext4_filename *fname,
- const struct qstr *d_name,
unsigned int offset,
struct ext4_dir_entry_2 **res_dir)
{
return ext4_search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir,
- fname, d_name, offset, res_dir);
+ fname, offset, res_dir);
}
/*
@@ -1262,7 +1261,6 @@ static inline bool ext4_match(const struct ext4_filename *fname,
*/
int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
struct inode *dir, struct ext4_filename *fname,
- const struct qstr *d_name,
unsigned int offset, struct ext4_dir_entry_2 **res_dir)
{
struct ext4_dir_entry_2 * de;
@@ -1355,7 +1353,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
if (ext4_has_inline_data(dir)) {
int has_inline_data = 1;
- ret = ext4_find_inline_entry(dir, &fname, d_name, res_dir,
+ ret = ext4_find_inline_entry(dir, &fname, res_dir,
&has_inline_data);
if (has_inline_data) {
if (inlined)
@@ -1447,7 +1445,7 @@ restart:
goto next;
}
set_buffer_verified(bh);
- i = search_dirblock(bh, dir, &fname, d_name,
+ i = search_dirblock(bh, dir, &fname,
block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
if (i == 1) {
EXT4_I(dir)->i_dir_start_lookup = block;
@@ -1488,7 +1486,6 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
{
struct super_block * sb = dir->i_sb;
struct dx_frame frames[2], *frame;
- const struct qstr *d_name = fname->usr_fname;
struct buffer_head *bh;
ext4_lblk_t block;
int retval;
@@ -1505,7 +1502,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
if (IS_ERR(bh))
goto errout;
- retval = search_dirblock(bh, dir, fname, d_name,
+ retval = search_dirblock(bh, dir, fname,
block << EXT4_BLOCK_SIZE_BITS(sb),
res_dir);
if (retval == 1)
@@ -1530,7 +1527,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
bh = NULL;
errout:
- dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name));
+ dxtrace(printk(KERN_DEBUG "%s not found\n", fname->usr_fname->name));
success:
dx_release(frames);
return bh;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 1a82138ba739..c2fce4478cca 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -85,7 +85,7 @@ static void ext4_finish_bio(struct bio *bio)
}
#endif
- if (bio->bi_error) {
+ if (bio->bi_status) {
SetPageError(page);
mapping_set_error(page->mapping, -EIO);
}
@@ -104,7 +104,7 @@ static void ext4_finish_bio(struct bio *bio)
continue;
}
clear_buffer_async_write(bh);
- if (bio->bi_error)
+ if (bio->bi_status)
buffer_io_error(bh);
} while ((bh = bh->b_this_page) != head);
bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
@@ -303,24 +303,25 @@ static void ext4_end_bio(struct bio *bio)
bdevname(bio->bi_bdev, b),
(long long) bio->bi_iter.bi_sector,
(unsigned) bio_sectors(bio),
- bio->bi_error)) {
+ bio->bi_status)) {
ext4_finish_bio(bio);
bio_put(bio);
return;
}
bio->bi_end_io = NULL;
- if (bio->bi_error) {
+ if (bio->bi_status) {
struct inode *inode = io_end->inode;
ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
"(offset %llu size %ld starting block %llu)",
- bio->bi_error, inode->i_ino,
+ bio->bi_status, inode->i_ino,
(unsigned long long) io_end->offset,
(long) io_end->size,
(unsigned long long)
bi_sector >> (inode->i_blkbits - 9));
- mapping_set_error(inode->i_mapping, bio->bi_error);
+ mapping_set_error(inode->i_mapping,
+ blk_status_to_errno(bio->bi_status));
}
if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
@@ -349,6 +350,7 @@ void ext4_io_submit(struct ext4_io_submit *io)
if (bio) {
int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ?
REQ_SYNC : 0;
+ io->io_bio->bi_write_hint = io->io_end->inode->i_write_hint;
bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags);
submit_bio(io->io_bio);
}
@@ -396,6 +398,7 @@ submit_and_retry:
ret = io_submit_init_bio(io, bh);
if (ret)
return ret;
+ io->io_bio->bi_write_hint = inode->i_write_hint;
}
ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
if (ret != bh->b_size)
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index a81b829d56de..40a5497b0f60 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -73,7 +73,7 @@ static void mpage_end_io(struct bio *bio)
int i;
if (ext4_bio_encrypted(bio)) {
- if (bio->bi_error) {
+ if (bio->bi_status) {
fscrypt_release_ctx(bio->bi_private);
} else {
fscrypt_decrypt_bio_pages(bio->bi_private, bio);
@@ -83,7 +83,7 @@ static void mpage_end_io(struct bio *bio)
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
- if (!bio->bi_error) {
+ if (!bio->bi_status) {
SetPageUptodate(page);
} else {
ClearPageUptodate(page);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0b177da9ea82..9006cb5857b8 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -848,14 +848,9 @@ static inline void ext4_quota_off_umount(struct super_block *sb)
{
int type;
- if (ext4_has_feature_quota(sb)) {
- dquot_disable(sb, -1,
- DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
- } else {
- /* Use our quota_off function to clear inode flags etc. */
- for (type = 0; type < EXT4_MAXQUOTAS; type++)
- ext4_quota_off(sb, type);
- }
+ /* Use our quota_off function to clear inode flags etc. */
+ for (type = 0; type < EXT4_MAXQUOTAS; type++)
+ ext4_quota_off(sb, type);
}
#else
static inline void ext4_quota_off_umount(struct super_block *sb)
@@ -1179,6 +1174,9 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
return res;
}
+ res = dquot_initialize(inode);
+ if (res)
+ return res;
retry:
handle = ext4_journal_start(inode, EXT4_HT_MISC,
ext4_jbd2_credits_xattr(inode));
@@ -3952,7 +3950,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sb->s_qcop = &ext4_qctl_operations;
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
#endif
- memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
+ memcpy(&sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
mutex_init(&sbi->s_orphan_lock);
@@ -5485,7 +5483,7 @@ static int ext4_quota_off(struct super_block *sb, int type)
goto out;
err = dquot_quota_off(sb, type);
- if (err)
+ if (err || ext4_has_feature_quota(sb))
goto out_put;
inode_lock(inode);
@@ -5505,6 +5503,7 @@ static int ext4_quota_off(struct super_block *sb, int type)
out_unlock:
inode_unlock(inode);
out_put:
+ lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL);
iput(inode);
return err;
out:
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 8fb7ce14e6eb..5d3c2536641c 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -888,6 +888,8 @@ inserted:
else {
u32 ref;
+ WARN_ON_ONCE(dquot_initialize_needed(inode));
+
/* The old block is released after updating
the inode. */
error = dquot_alloc_block(inode,
@@ -954,6 +956,8 @@ inserted:
/* We need to allocate a new block */
ext4_fsblk_t goal, block;
+ WARN_ON_ONCE(dquot_initialize_needed(inode));
+
goal = ext4_group_first_block_no(sb,
EXT4_I(inode)->i_block_group);
@@ -1166,6 +1170,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
return -EINVAL;
if (strlen(name) > 255)
return -ERANGE;
+
ext4_write_lock_xattr(inode, &no_expand);
error = ext4_reserve_inode_write(handle, inode, &is.iloc);
@@ -1267,6 +1272,9 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
int error, retries = 0;
int credits = ext4_jbd2_credits_xattr(inode);
+ error = dquot_initialize(inode);
+ if (error)
+ return error;
retry:
handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
if (IS_ERR(handle)) {
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7c0f6bdf817d..36fe82012a33 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -58,12 +58,12 @@ static void f2fs_read_end_io(struct bio *bio)
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) {
f2fs_show_injection_info(FAULT_IO);
- bio->bi_error = -EIO;
+ bio->bi_status = BLK_STS_IOERR;
}
#endif
if (f2fs_bio_encrypted(bio)) {
- if (bio->bi_error) {
+ if (bio->bi_status) {
fscrypt_release_ctx(bio->bi_private);
} else {
fscrypt_decrypt_bio_pages(bio->bi_private, bio);
@@ -74,7 +74,7 @@ static void f2fs_read_end_io(struct bio *bio)
bio_for_each_segment_all(bvec, bio, i) {
struct page *page = bvec->bv_page;
- if (!bio->bi_error) {
+ if (!bio->bi_status) {
if (!PageUptodate(page))
SetPageUptodate(page);
} else {
@@ -102,14 +102,14 @@ static void f2fs_write_end_io(struct bio *bio)
unlock_page(page);
mempool_free(page, sbi->write_io_dummy);
- if (unlikely(bio->bi_error))
+ if (unlikely(bio->bi_status))
f2fs_stop_checkpoint(sbi, true);
continue;
}
fscrypt_pullback_bio_page(&page, true);
- if (unlikely(bio->bi_error)) {
+ if (unlikely(bio->bi_status)) {
mapping_set_error(page->mapping, -EIO);
f2fs_stop_checkpoint(sbi, true);
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 2185c7a040a1..fd2e651bad6d 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1078,6 +1078,7 @@ static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address,
{
SHASH_DESC_ON_STACK(shash, sbi->s_chksum_driver);
u32 *ctx = (u32 *)shash_desc_ctx(shash);
+ u32 retval;
int err;
shash->tfm = sbi->s_chksum_driver;
@@ -1087,7 +1088,9 @@ static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address,
err = crypto_shash_update(shash, address, length);
BUG_ON(err);
- return *ctx;
+ retval = *ctx;
+ barrier_data(ctx);
+ return retval;
}
static inline bool f2fs_crc_valid(struct f2fs_sb_info *sbi, __u32 blk_crc,
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 96845854e7ee..ea9f455d94ba 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -749,7 +749,7 @@ static void f2fs_submit_discard_endio(struct bio *bio)
{
struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
- dc->error = bio->bi_error;
+ dc->error = blk_status_to_errno(bio->bi_status);
dc->state = D_DONE;
complete(&dc->wait);
bio_put(bio);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 83355ec4a92c..0b89b0b7b9f7 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1937,7 +1937,7 @@ try_onemore:
sb->s_time_gran = 1;
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
(test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
- memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
+ memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
/* init f2fs-specific super block info */
sbi->valid_super_block = valid_super_block;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index f525bc75c07d..b6bd89628025 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -257,6 +257,67 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
}
#endif
+static bool rw_hint_valid(enum rw_hint hint)
+{
+ switch (hint) {
+ case RWF_WRITE_LIFE_NOT_SET:
+ case RWH_WRITE_LIFE_NONE:
+ case RWH_WRITE_LIFE_SHORT:
+ case RWH_WRITE_LIFE_MEDIUM:
+ case RWH_WRITE_LIFE_LONG:
+ case RWH_WRITE_LIFE_EXTREME:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static long fcntl_rw_hint(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct inode *inode = file_inode(file);
+ u64 *argp = (u64 __user *)arg;
+ enum rw_hint hint;
+ u64 h;
+
+ switch (cmd) {
+ case F_GET_FILE_RW_HINT:
+ h = file_write_hint(file);
+ if (copy_to_user(argp, &h, sizeof(*argp)))
+ return -EFAULT;
+ return 0;
+ case F_SET_FILE_RW_HINT:
+ if (copy_from_user(&h, argp, sizeof(h)))
+ return -EFAULT;
+ hint = (enum rw_hint) h;
+ if (!rw_hint_valid(hint))
+ return -EINVAL;
+
+ spin_lock(&file->f_lock);
+ file->f_write_hint = hint;
+ spin_unlock(&file->f_lock);
+ return 0;
+ case F_GET_RW_HINT:
+ h = inode->i_write_hint;
+ if (copy_to_user(argp, &h, sizeof(*argp)))
+ return -EFAULT;
+ return 0;
+ case F_SET_RW_HINT:
+ if (copy_from_user(&h, argp, sizeof(h)))
+ return -EFAULT;
+ hint = (enum rw_hint) h;
+ if (!rw_hint_valid(hint))
+ return -EINVAL;
+
+ inode_lock(inode);
+ inode->i_write_hint = hint;
+ inode_unlock(inode);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
struct file *filp)
{
@@ -358,6 +419,12 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
case F_GET_SEALS:
err = shmem_fcntl(filp, cmd, arg);
break;
+ case F_GET_RW_HINT:
+ case F_SET_RW_HINT:
+ case F_GET_FILE_RW_HINT:
+ case F_SET_FILE_RW_HINT:
+ err = fcntl_rw_hint(filp, cmd, arg);
+ break;
default:
break;
}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 63ee2940775c..8b426f83909f 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2052,11 +2052,13 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
}
/**
- * __mark_inode_dirty - internal function
- * @inode: inode to mark
- * @flags: what kind of dirty (i.e. I_DIRTY_SYNC)
- * Mark an inode as dirty. Callers should use mark_inode_dirty or
- * mark_inode_dirty_sync.
+ * __mark_inode_dirty - internal function
+ *
+ * @inode: inode to mark
+ * @flags: what kind of dirty (i.e. I_DIRTY_SYNC)
+ *
+ * Mark an inode as dirty. Callers should use mark_inode_dirty or
+ * mark_inode_dirty_sync.
*
* Put the inode on the super block's dirty list.
*
diff --git a/fs/fs_pin.c b/fs/fs_pin.c
index 611b5408f6ec..e747b3d720ee 100644
--- a/fs/fs_pin.c
+++ b/fs/fs_pin.c
@@ -34,7 +34,7 @@ void pin_insert(struct fs_pin *pin, struct vfsmount *m)
void pin_kill(struct fs_pin *p)
{
- wait_queue_t wait;
+ wait_queue_entry_t wait;
if (!p) {
rcu_read_unlock();
@@ -61,7 +61,7 @@ void pin_kill(struct fs_pin *p)
rcu_read_unlock();
schedule();
rcu_read_lock();
- if (likely(list_empty(&wait.task_list)))
+ if (likely(list_empty(&wait.entry)))
break;
/* OK, we know p couldn't have been freed yet */
spin_lock_irq(&p->wait.lock);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index b7cf65d13561..aa3d44527fa2 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -815,7 +815,6 @@ struct gfs2_sbd {
atomic_t sd_log_in_flight;
struct bio *sd_log_bio;
wait_queue_head_t sd_log_flush_wait;
- int sd_log_error;
atomic_t sd_reserving_log;
wait_queue_head_t sd_reserving_log_wait;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f865b96374df..d2955daf17a4 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -659,7 +659,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
struct gfs2_log_header *lh;
unsigned int tail;
u32 hash;
- int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META;
+ int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC;
struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
lh = page_address(page);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index b1f9144b42c7..885d36e7a29f 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -170,7 +170,7 @@ static u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
*/
static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
- int error)
+ blk_status_t error)
{
struct buffer_head *bh, *next;
struct page *page = bvec->bv_page;
@@ -209,15 +209,13 @@ static void gfs2_end_log_write(struct bio *bio)
struct page *page;
int i;
- if (bio->bi_error) {
- sdp->sd_log_error = bio->bi_error;
- fs_err(sdp, "Error %d writing to log\n", bio->bi_error);
- }
+ if (bio->bi_status)
+ fs_err(sdp, "Error %d writing to log\n", bio->bi_status);
bio_for_each_segment_all(bvec, bio, i) {
page = bvec->bv_page;
if (page_has_buffers(page))
- gfs2_end_log_write_bh(sdp, bvec, bio->bi_error);
+ gfs2_end_log_write_bh(sdp, bvec, bio->bi_status);
else
mempool_free(page, gfs2_page_pool);
}
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 663ffc135ef3..fabe1614f879 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -201,7 +201,7 @@ static void gfs2_meta_read_endio(struct bio *bio)
do {
struct buffer_head *next = bh->b_this_page;
len -= bh->b_size;
- bh->b_end_io(bh, !bio->bi_error);
+ bh->b_end_io(bh, !bio->bi_status);
bh = next;
} while (bh && len);
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index ed67548b286c..e76058d34b74 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -176,10 +176,10 @@ static void end_bio_io_page(struct bio *bio)
{
struct page *page = bio->bi_private;
- if (!bio->bi_error)
+ if (!bio->bi_status)
SetPageUptodate(page);
else
- pr_warn("error %d reading superblock\n", bio->bi_error);
+ pr_warn("error %d reading superblock\n", bio->bi_status);
unlock_page(page);
}
@@ -203,7 +203,7 @@ static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf)
memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
- memcpy(s->s_uuid, str->sb_uuid, 16);
+ memcpy(&s->s_uuid, str->sb_uuid, 16);
}
/**
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 7a515345610c..e77bc52b468f 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -71,25 +71,14 @@ static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf)
return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname);
}
-static int gfs2_uuid_valid(const u8 *uuid)
-{
- int i;
-
- for (i = 0; i < 16; i++) {
- if (uuid[i])
- return 1;
- }
- return 0;
-}
-
static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
{
struct super_block *s = sdp->sd_vfs;
- const u8 *uuid = s->s_uuid;
+
buf[0] = '\0';
- if (!gfs2_uuid_valid(uuid))
+ if (uuid_is_null(&s->s_uuid))
return 0;
- return snprintf(buf, PAGE_SIZE, "%pUB\n", uuid);
+ return snprintf(buf, PAGE_SIZE, "%pUB\n", &s->s_uuid);
}
static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
@@ -712,14 +701,13 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
{
struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
struct super_block *s = sdp->sd_vfs;
- const u8 *uuid = s->s_uuid;
add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags))
add_uevent_var(env, "JOURNALID=%d", sdp->sd_lockstruct.ls_jid);
- if (gfs2_uuid_valid(uuid))
- add_uevent_var(env, "UUID=%pUB", uuid);
+ if (!uuid_is_null(&s->s_uuid))
+ add_uevent_var(env, "UUID=%pUB", &s->s_uuid);
return 0;
}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index dde861387a40..d44f5456eb9b 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -200,7 +200,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
diff --git a/fs/inode.c b/fs/inode.c
index db5914783a71..ab3b9a795c0b 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -146,6 +146,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
i_gid_write(inode, 0);
atomic_set(&inode->i_writecount, 0);
inode->i_size = 0;
+ inode->i_write_hint = WRITE_LIFE_NOT_SET;
inode->i_blocks = 0;
inode->i_bytes = 0;
inode->i_generation = 0;
@@ -1891,11 +1892,11 @@ static void __wait_on_freeing_inode(struct inode *inode)
wait_queue_head_t *wq;
DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
wq = bit_waitqueue(&inode->i_state, __I_NEW);
- prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_hash_lock);
schedule();
- finish_wait(wq, &wait.wait);
+ finish_wait(wq, &wait.wq_entry);
spin_lock(&inode_hash_lock);
}
@@ -2038,11 +2039,11 @@ static void __inode_dio_wait(struct inode *inode)
DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
do {
- prepare_to_wait(wq, &q.wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE);
if (atomic_read(&inode->i_dio_count))
schedule();
} while (atomic_read(&inode->i_dio_count));
- finish_wait(wq, &q.wait);
+ finish_wait(wq, &q.wq_entry);
}
/**
diff --git a/fs/iomap.c b/fs/iomap.c
index 4b10892967a5..fa6cd5b3f578 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -672,8 +672,8 @@ static void iomap_dio_bio_end_io(struct bio *bio)
struct iomap_dio *dio = bio->bi_private;
bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
- if (bio->bi_error)
- iomap_dio_set_error(dio, bio->bi_error);
+ if (bio->bi_status)
+ iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
if (atomic_dec_and_test(&dio->ref)) {
if (is_sync_kiocb(dio->iocb)) {
@@ -793,6 +793,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
bio->bi_bdev = iomap->bdev;
bio->bi_iter.bi_sector =
iomap->blkno + ((pos - iomap->offset) >> 9);
+ bio->bi_write_hint = dio->iocb->ki_hint;
bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io;
@@ -881,6 +882,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
flags |= IOMAP_WRITE;
}
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (filemap_range_has_page(mapping, start, end)) {
+ ret = -EAGAIN;
+ goto out_free_dio;
+ }
+ flags |= IOMAP_NOWAIT;
+ }
+
ret = filemap_write_and_wait_range(mapping, start, end);
if (ret)
goto out_free_dio;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index ebad34266bcf..7d5ef3bf3f3e 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2579,10 +2579,10 @@ restart:
wait_queue_head_t *wq;
DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
- prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
spin_unlock(&journal->j_list_lock);
schedule();
- finish_wait(wq, &wait.wait);
+ finish_wait(wq, &wait.wq_entry);
goto restart;
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 9ee4832b6f8b..8b08044b3120 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -409,25 +409,6 @@ static handle_t *new_handle(int nblocks)
return handle;
}
-/**
- * handle_t *jbd2_journal_start() - Obtain a new handle.
- * @journal: Journal to start transaction on.
- * @nblocks: number of block buffer we might modify
- *
- * We make sure that the transaction can guarantee at least nblocks of
- * modified buffers in the log. We block until the log can guarantee
- * that much space. Additionally, if rsv_blocks > 0, we also create another
- * handle with rsv_blocks reserved blocks in the journal. This handle is
- * is stored in h_rsv_handle. It is not attached to any particular transaction
- * and thus doesn't block transaction commit. If the caller uses this reserved
- * handle, it has to set h_rsv_handle to NULL as otherwise jbd2_journal_stop()
- * on the parent handle will dispose the reserved one. Reserved handle has to
- * be converted to a normal handle using jbd2_journal_start_reserved() before
- * it can be used.
- *
- * Return a pointer to a newly allocated handle, or an ERR_PTR() value
- * on failure.
- */
handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks,
gfp_t gfp_mask, unsigned int type,
unsigned int line_no)
@@ -478,6 +459,25 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks,
EXPORT_SYMBOL(jbd2__journal_start);
+/**
+ * handle_t *jbd2_journal_start() - Obtain a new handle.
+ * @journal: Journal to start transaction on.
+ * @nblocks: number of block buffer we might modify
+ *
+ * We make sure that the transaction can guarantee at least nblocks of
+ * modified buffers in the log. We block until the log can guarantee
+ * that much space. Additionally, if rsv_blocks > 0, we also create another
+ * handle with rsv_blocks reserved blocks in the journal. This handle is
+ * is stored in h_rsv_handle. It is not attached to any particular transaction
+ * and thus doesn't block transaction commit. If the caller uses this reserved
+ * handle, it has to set h_rsv_handle to NULL as otherwise jbd2_journal_stop()
+ * on the parent handle will dispose the reserved one. Reserved handle has to
+ * be converted to a normal handle using jbd2_journal_start_reserved() before
+ * it can be used.
+ *
+ * Return a pointer to a newly allocated handle, or an ERR_PTR() value
+ * on failure.
+ */
handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
{
return jbd2__journal_start(journal, nblocks, 0, GFP_NOFS, 0, 0);
@@ -680,6 +680,12 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
rwsem_release(&journal->j_trans_commit_map, 1, _THIS_IP_);
handle->h_buffer_credits = nblocks;
+ /*
+ * Restore the original nofs context because the journal restart
+ * is basically the same thing as journal stop and start.
+ * start_this_handle will start a new nofs context.
+ */
+ memalloc_nofs_restore(handle->saved_alloc_context);
ret = start_this_handle(journal, handle, gfp_mask);
return ret;
}
@@ -1066,10 +1072,10 @@ out:
* @handle: transaction to add buffer modifications to
* @bh: bh to be used for metadata writes
*
- * Returns an error code or 0 on success.
+ * Returns: error code or 0 on success.
*
* In full data journalling mode the buffer may be of type BJ_AsyncData,
- * because we're write()ing a buffer which is also part of a shared mapping.
+ * because we're ``write()ing`` a buffer which is also part of a shared mapping.
*/
int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index bb1da1feafeb..a21f0e9eecd4 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2205,7 +2205,7 @@ static void lbmIODone(struct bio *bio)
bp->l_flag |= lbmDONE;
- if (bio->bi_error) {
+ if (bio->bi_status) {
bp->l_flag |= lbmERROR;
jfs_err("lbmIODone: I/O error in JFS log");
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 489aaa1403e5..ce93db3aef3c 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -280,7 +280,7 @@ static void metapage_read_end_io(struct bio *bio)
{
struct page *page = bio->bi_private;
- if (bio->bi_error) {
+ if (bio->bi_status) {
printk(KERN_ERR "metapage_read_end_io: I/O error\n");
SetPageError(page);
}
@@ -337,7 +337,7 @@ static void metapage_write_end_io(struct bio *bio)
BUG_ON(!PagePrivate(page));
- if (bio->bi_error) {
+ if (bio->bi_status) {
printk(KERN_ERR "metapage_write_end_io: I/O error\n");
SetPageError(page);
}
diff --git a/fs/mpage.c b/fs/mpage.c
index baff8f820c29..2e4c41ccb5c9 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -50,7 +50,8 @@ static void mpage_end_io(struct bio *bio)
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
- page_endio(page, op_is_write(bio_op(bio)), bio->bi_error);
+ page_endio(page, op_is_write(bio_op(bio)),
+ blk_status_to_errno(bio->bi_status));
}
bio_put(bio);
@@ -344,6 +345,7 @@ confused:
*
* So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be
* submitted in the following order:
+ *
* 12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16
*
* because the indirect block has to be read to get the mappings of blocks
@@ -614,6 +616,7 @@ alloc_new:
goto confused;
wbc_init_bio(wbc, bio);
+ bio->bi_write_hint = inode->i_write_hint;
}
/*
diff --git a/fs/namei.c b/fs/namei.c
index 6571a5f5112e..8bacc390c51e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4332,6 +4332,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
* The worst of all namespace operations - renaming directory. "Perverted"
* doesn't even start to describe it. Somebody in UCB had a heck of a trip...
* Problems:
+ *
* a) we can get into loop creation.
* b) race potential - two innocent renames can create a loop together.
* That's where 4.4 screws up. Current fix: serialization on
diff --git a/fs/namespace.c b/fs/namespace.c
index 8bd3e4d448b9..5a4438445bf7 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3488,6 +3488,8 @@ static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
return err;
}
+ put_mnt_ns(old_mnt_ns);
+
/* Update the pwd and root */
set_fs_pwd(fs, &root);
set_fs_root(fs, &root);
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 0ca370d23ddb..d8863a804b15 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -188,7 +188,7 @@ static void bl_end_io_read(struct bio *bio)
{
struct parallel_io *par = bio->bi_private;
- if (bio->bi_error) {
+ if (bio->bi_status) {
struct nfs_pgio_header *header = par->data;
if (!header->pnfs_error)
@@ -319,7 +319,7 @@ static void bl_end_io_write(struct bio *bio)
struct parallel_io *par = bio->bi_private;
struct nfs_pgio_header *header = par->data;
- if (bio->bi_error) {
+ if (bio->bi_status) {
if (!header->pnfs_error)
header->pnfs_error = -EIO;
pnfs_set_lo_fail(header->lseg);
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index c14758e08d73..390ac9c39c59 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -753,7 +753,6 @@ static void nfs4_callback_free_slot(struct nfs4_session *session,
* A single slot, so highest used slotid is either 0 or -1
*/
nfs4_free_slot(tbl, slot);
- nfs4_slot_tbl_drain_complete(tbl);
spin_unlock(&tbl->slot_tbl_lock);
}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 32ccd7754f8a..2ac00bf4ecf1 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1946,29 +1946,6 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
}
EXPORT_SYMBOL_GPL(nfs_link);
-static void
-nfs_complete_rename(struct rpc_task *task, struct nfs_renamedata *data)
-{
- struct dentry *old_dentry = data->old_dentry;
- struct dentry *new_dentry = data->new_dentry;
- struct inode *old_inode = d_inode(old_dentry);
- struct inode *new_inode = d_inode(new_dentry);
-
- nfs_mark_for_revalidate(old_inode);
-
- switch (task->tk_status) {
- case 0:
- if (new_inode != NULL)
- nfs_drop_nlink(new_inode);
- d_move(old_dentry, new_dentry);
- nfs_set_verifier(new_dentry,
- nfs_save_change_attribute(data->new_dir));
- break;
- case -ENOENT:
- nfs_dentry_handle_enoent(old_dentry);
- }
-}
-
/*
* RENAME
* FIXME: Some nfsds, like the Linux user space nfsd, may generate a
@@ -1999,7 +1976,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
{
struct inode *old_inode = d_inode(old_dentry);
struct inode *new_inode = d_inode(new_dentry);
- struct dentry *dentry = NULL;
+ struct dentry *dentry = NULL, *rehash = NULL;
struct rpc_task *task;
int error = -EBUSY;
@@ -2022,8 +1999,10 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
* To prevent any new references to the target during the
* rename, we unhash the dentry in advance.
*/
- if (!d_unhashed(new_dentry))
+ if (!d_unhashed(new_dentry)) {
d_drop(new_dentry);
+ rehash = new_dentry;
+ }
if (d_count(new_dentry) > 2) {
int err;
@@ -2040,6 +2019,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out;
new_dentry = dentry;
+ rehash = NULL;
new_inode = NULL;
}
}
@@ -2048,8 +2028,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_inode != NULL)
NFS_PROTO(new_inode)->return_delegation(new_inode);
- task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
- nfs_complete_rename);
+ task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
if (IS_ERR(task)) {
error = PTR_ERR(task);
goto out;
@@ -2059,9 +2038,27 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (error == 0)
error = task->tk_status;
rpc_put_task(task);
+ nfs_mark_for_revalidate(old_inode);
out:
+ if (rehash)
+ d_rehash(rehash);
trace_nfs_rename_exit(old_dir, old_dentry,
new_dir, new_dentry, error);
+ if (!error) {
+ if (new_inode != NULL)
+ nfs_drop_nlink(new_inode);
+ /*
+ * The d_move() should be here instead of in an async RPC completion
+ * handler because we need the proper locks to move the dentry. If
+ * we're interrupted by a signal, the async RPC completion handler
+ * should mark the directories for revalidation.
+ */
+ d_move(old_dentry, new_dentry);
+ nfs_set_verifier(new_dentry,
+ nfs_save_change_attribute(new_dir));
+ } else if (error == -ENOENT)
+ nfs_dentry_handle_enoent(old_dentry);
+
/* new dentry created? */
if (dentry)
dput(dentry);
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index f5714ee01000..23542dc44a25 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -454,6 +454,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
goto out_err_free;
/* fh */
+ rc = -EIO;
p = xdr_inline_decode(&stream, 4);
if (!p)
goto out_err_free;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e9b4c3320e37..8701d7617964 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -7,6 +7,7 @@
#include <linux/security.h>
#include <linux/crc32.h>
#include <linux/nfs_page.h>
+#include <linux/wait_bit.h>
#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
@@ -398,7 +399,6 @@ extern struct file_system_type nfs4_referral_fs_type;
bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t);
struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *,
struct nfs_subversion *);
-void nfs_initialise_sb(struct super_block *);
int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *);
int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *);
struct dentry *nfs_fs_mount_common(struct nfs_server *, int, const char *,
@@ -458,7 +458,6 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
/* super.c */
-void nfs_clone_super(struct super_block *, struct nfs_mount_info *);
void nfs_umount_begin(struct super_block *);
int nfs_statfs(struct dentry *, struct kstatfs *);
int nfs_show_options(struct seq_file *, struct dentry *);
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 1a224a33a6c2..e5686be67be8 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -246,7 +246,7 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
devname = nfs_devname(dentry, page, PAGE_SIZE);
if (IS_ERR(devname))
- mnt = (struct vfsmount *)devname;
+ mnt = ERR_CAST(devname);
else
mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata);
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 929d09a5310a..319a47db218d 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -177,7 +177,7 @@ static ssize_t _nfs42_proc_copy(struct file *src,
if (status)
goto out;
- if (!nfs_write_verifier_cmp(&res->write_res.verifier.verifier,
+ if (nfs_write_verifier_cmp(&res->write_res.verifier.verifier,
&res->commit_res.verf->verifier)) {
status = -EAGAIN;
goto out;
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 692a7a8bfc7a..66776f022111 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -582,7 +582,6 @@ int nfs40_walk_client_list(struct nfs_client *new,
*/
nfs4_schedule_path_down_recovery(pos);
default:
- spin_lock(&nn->nfs_client_lock);
goto out;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c08c46a3b8cd..98b0b662af09 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2589,7 +2589,8 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata,
/* Except MODE, it seems harmless of setting twice. */
if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE &&
- attrset[1] & FATTR4_WORD1_MODE)
+ (attrset[1] & FATTR4_WORD1_MODE ||
+ attrset[2] & FATTR4_WORD2_MODE_UMASK))
sattr->ia_valid &= ~ATTR_MODE;
if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL)
@@ -6372,7 +6373,7 @@ struct nfs4_lock_waiter {
};
static int
-nfs4_wake_lock_waiter(wait_queue_t *wait, unsigned int mode, int flags, void *key)
+nfs4_wake_lock_waiter(wait_queue_entry_t *wait, unsigned int mode, int flags, void *key)
{
int ret;
struct cb_notify_lock_args *cbnl = key;
@@ -6415,7 +6416,7 @@ nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
.inode = state->inode,
.owner = &owner,
.notified = false };
- wait_queue_t wait;
+ wait_queue_entry_t wait;
/* Don't bother with waitqueue if we don't expect a callback */
if (!test_bit(NFS_STATE_MAY_NOTIFY_LOCK, &state->flags))
@@ -8416,6 +8417,7 @@ static void nfs4_layoutget_release(void *calldata)
size_t max_pages = max_response_pages(server);
dprintk("--> %s\n", __func__);
+ nfs4_sequence_free_slot(&lgp->res.seq_res);
nfs4_free_pages(lgp->args.layout.pages, max_pages);
pnfs_put_layout_hdr(NFS_I(inode)->layout);
put_nfs_open_context(lgp->args.ctx);
@@ -8490,7 +8492,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags)
/* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
if (status == 0 && lgp->res.layoutp->len)
lseg = pnfs_layout_process(lgp);
- nfs4_sequence_free_slot(&lgp->res.seq_res);
rpc_put_task(task);
dprintk("<-- %s status=%d\n", __func__, status);
if (status)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index b34de036501b..cbf82b0d4467 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -2134,6 +2134,8 @@ again:
put_rpccred(cred);
switch (status) {
case 0:
+ case -EINTR:
+ case -ERESTARTSYS:
break;
case -ETIMEDOUT:
if (clnt->cl_softrtry)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index adc6ec28d4b5..c383d0913b54 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2094,12 +2094,26 @@ pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio)
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_layout);
+/*
+ * Check for any intersection between the request and the pgio->pg_lseg,
+ * and if none, put this pgio->pg_lseg away.
+ */
+static void
+pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+ if (pgio->pg_lseg && !pnfs_lseg_request_intersecting(pgio->pg_lseg, req)) {
+ pnfs_put_lseg(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ }
+}
+
void
pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
u64 rd_size = req->wb_bytes;
pnfs_generic_pg_check_layout(pgio);
+ pnfs_generic_pg_check_range(pgio, req);
if (pgio->pg_lseg == NULL) {
if (pgio->pg_dreq == NULL)
rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
@@ -2131,6 +2145,7 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req, u64 wb_size)
{
pnfs_generic_pg_check_layout(pgio);
+ pnfs_generic_pg_check_range(pgio, req);
if (pgio->pg_lseg == NULL) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
@@ -2191,16 +2206,10 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio,
seg_end = pnfs_end_offset(pgio->pg_lseg->pls_range.offset,
pgio->pg_lseg->pls_range.length);
req_start = req_offset(req);
- WARN_ON_ONCE(req_start >= seg_end);
+
/* start of request is past the last byte of this segment */
- if (req_start >= seg_end) {
- /* reference the new lseg */
- if (pgio->pg_ops->pg_cleanup)
- pgio->pg_ops->pg_cleanup(pgio);
- if (pgio->pg_ops->pg_init)
- pgio->pg_ops->pg_init(pgio, req);
+ if (req_start >= seg_end)
return 0;
- }
/* adjust 'size' iff there are fewer bytes left in the
* segment than what nfs_generic_pg_test returned */
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 2d05b756a8d6..99731e3e332f 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -593,6 +593,16 @@ pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1,
return pnfs_is_range_intersecting(l1->offset, end1, l2->offset, end2);
}
+static inline bool
+pnfs_lseg_request_intersecting(struct pnfs_layout_segment *lseg, struct nfs_page *req)
+{
+ u64 seg_last = pnfs_end_offset(lseg->pls_range.offset, lseg->pls_range.length);
+ u64 req_last = req_offset(req) + req->wb_bytes;
+
+ return pnfs_is_range_intersecting(lseg->pls_range.offset, seg_last,
+ req_offset(req), req_last);
+}
+
extern unsigned int layoutstats_timer;
#ifdef NFS_DEBUG
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2f3822a4a7d5..c5334c0e23a1 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2301,7 +2301,7 @@ EXPORT_SYMBOL_GPL(nfs_remount);
/*
* Initialise the common bits of the superblock
*/
-inline void nfs_initialise_sb(struct super_block *sb)
+static void nfs_initialise_sb(struct super_block *sb)
{
struct nfs_server *server = NFS_SB(sb);
@@ -2348,7 +2348,8 @@ EXPORT_SYMBOL_GPL(nfs_fill_super);
/*
* Finish setting up a cloned NFS2/3/4 superblock
*/
-void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info)
+static void nfs_clone_super(struct super_block *sb,
+ struct nfs_mount_info *mount_info)
{
const struct super_block *old_sb = mount_info->cloned->sb;
struct nfs_server *server = NFS_SB(sb);
@@ -2544,10 +2545,25 @@ EXPORT_SYMBOL_GPL(nfs_set_sb_security);
int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot,
struct nfs_mount_info *mount_info)
{
+ int error;
+ unsigned long kflags = 0, kflags_out = 0;
+
/* clone any lsm security options from the parent to the new sb */
if (d_inode(mntroot)->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops)
return -ESTALE;
- return security_sb_clone_mnt_opts(mount_info->cloned->sb, s);
+
+ if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL)
+ kflags |= SECURITY_LSM_NATIVE_LABELS;
+
+ error = security_sb_clone_mnt_opts(mount_info->cloned->sb, s, kflags,
+ &kflags_out);
+ if (error)
+ return error;
+
+ if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL &&
+ !(kflags_out & SECURITY_LSM_NATIVE_LABELS))
+ NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL;
+ return 0;
}
EXPORT_SYMBOL_GPL(nfs_clone_sb_security);
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index fb5213afc854..c862c2489df0 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -219,6 +219,9 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
u8 *buf, *d, type, assoc;
int error;
+ if (WARN_ON_ONCE(!blk_queue_scsi_passthrough(q)))
+ return -EINVAL;
+
buf = kzalloc(bufflen, GFP_KERNEL);
if (!buf)
return -ENOMEM;
@@ -229,7 +232,6 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
goto out_free_buf;
}
req = scsi_req(rq);
- scsi_req_init(rq);
error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL);
if (error)
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index e71f11b1a180..3bc08c394a3f 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -486,7 +486,7 @@ secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; }
#endif
static inline int
-uuid_parse(char **mesg, char *buf, unsigned char **puuid)
+nfsd_uuid_parse(char **mesg, char *buf, unsigned char **puuid)
{
int len;
@@ -586,7 +586,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
if (strcmp(buf, "fsloc") == 0)
err = fsloc_parse(&mesg, buf, &exp.ex_fslocs);
else if (strcmp(buf, "uuid") == 0)
- err = uuid_parse(&mesg, buf, &exp.ex_uuid);
+ err = nfsd_uuid_parse(&mesg, buf, &exp.ex_uuid);
else if (strcmp(buf, "secinfo") == 0)
err = secinfo_parse(&mesg, buf, &exp);
else
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 12feac6ee2fd..452334694a5d 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -334,11 +334,8 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
if (!p)
return 0;
p = xdr_decode_hyper(p, &args->offset);
- args->count = ntohl(*p++);
-
- if (!xdr_argsize_check(rqstp, p))
- return 0;
+ args->count = ntohl(*p++);
len = min(args->count, max_blocksize);
/* set up the kvec */
@@ -352,7 +349,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
v++;
}
args->vlen = v;
- return 1;
+ return xdr_argsize_check(rqstp, p);
}
int
@@ -544,11 +541,9 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
p = decode_fh(p, &args->fh);
if (!p)
return 0;
- if (!xdr_argsize_check(rqstp, p))
- return 0;
args->buffer = page_address(*(rqstp->rq_next_page++));
- return 1;
+ return xdr_argsize_check(rqstp, p);
}
int
@@ -574,14 +569,10 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
args->verf = p; p += 2;
args->dircount = ~0;
args->count = ntohl(*p++);
-
- if (!xdr_argsize_check(rqstp, p))
- return 0;
-
args->count = min_t(u32, args->count, PAGE_SIZE);
args->buffer = page_address(*(rqstp->rq_next_page++));
- return 1;
+ return xdr_argsize_check(rqstp, p);
}
int
@@ -599,9 +590,6 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
args->dircount = ntohl(*p++);
args->count = ntohl(*p++);
- if (!xdr_argsize_check(rqstp, p))
- return 0;
-
len = args->count = min(args->count, max_blocksize);
while (len > 0) {
struct page *p = *(rqstp->rq_next_page++);
@@ -609,7 +597,8 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
args->buffer = page_address(p);
len -= PAGE_SIZE;
}
- return 1;
+
+ return xdr_argsize_check(rqstp, p);
}
int
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index c453a1998e00..dadb3bf305b2 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1769,6 +1769,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
opdesc->op_get_currentstateid(cstate, &op->u);
op->status = opdesc->op_func(rqstp, cstate, &op->u);
+ /* Only from SEQUENCE */
+ if (cstate->status == nfserr_replay_cache) {
+ dprintk("%s NFS4.1 replay from cache\n", __func__);
+ status = op->status;
+ goto out;
+ }
if (!op->status) {
if (opdesc->op_set_currentstateid)
opdesc->op_set_currentstateid(cstate, &op->u);
@@ -1779,14 +1785,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
if (need_wrongsec_check(rqstp))
op->status = check_nfsd_access(current_fh->fh_export, rqstp);
}
-
encode_op:
- /* Only from SEQUENCE */
- if (cstate->status == nfserr_replay_cache) {
- dprintk("%s NFS4.1 replay from cache\n", __func__);
- status = op->status;
- goto out;
- }
if (op->status == nfserr_replay_me) {
op->replay = &cstate->replay_owner->so_replay;
nfsd4_encode_replay(&resp->xdr, op);
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 6a4947a3f4fa..de07ff625777 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -257,9 +257,6 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
len = args->count = ntohl(*p++);
p++; /* totalcount - unused */
- if (!xdr_argsize_check(rqstp, p))
- return 0;
-
len = min_t(unsigned int, len, NFSSVC_MAXBLKSIZE_V2);
/* set up somewhere to store response.
@@ -275,7 +272,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
v++;
}
args->vlen = v;
- return 1;
+ return xdr_argsize_check(rqstp, p);
}
int
@@ -365,11 +362,9 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readli
p = decode_fh(p, &args->fh);
if (!p)
return 0;
- if (!xdr_argsize_check(rqstp, p))
- return 0;
args->buffer = page_address(*(rqstp->rq_next_page++));
- return 1;
+ return xdr_argsize_check(rqstp, p);
}
int
@@ -407,11 +402,9 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
args->cookie = ntohl(*p++);
args->count = ntohl(*p++);
args->count = min_t(u32, args->count, PAGE_SIZE);
- if (!xdr_argsize_check(rqstp, p))
- return 0;
args->buffer = page_address(*(rqstp->rq_next_page++));
- return 1;
+ return xdr_argsize_check(rqstp, p);
}
/*
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 6f87b2ac1aeb..e73c86d9855c 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -338,7 +338,7 @@ static void nilfs_end_bio_write(struct bio *bio)
{
struct nilfs_segment_buffer *segbuf = bio->bi_private;
- if (bio->bi_error)
+ if (bio->bi_status)
atomic_inc(&segbuf->sb_err);
bio_put(bio);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index febed1217b3f..70ded52dc1dd 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2161,7 +2161,7 @@ void nilfs_flush_segment(struct super_block *sb, ino_t ino)
}
struct nilfs_segctor_wait_request {
- wait_queue_t wq;
+ wait_queue_entry_t wq;
__u32 seq;
int err;
atomic_t done;
@@ -2206,8 +2206,7 @@ static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err)
unsigned long flags;
spin_lock_irqsave(&sci->sc_wait_request.lock, flags);
- list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.task_list,
- wq.task_list) {
+ list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.head, wq.entry) {
if (!atomic_read(&wrq->done) &&
nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) {
wrq->err = err;
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 358258364616..4690cd75d8d7 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -159,7 +159,7 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
PTR_ERR(dent_inode));
kfree(name);
/* Return the error code. */
- return (struct dentry *)dent_inode;
+ return ERR_CAST(dent_inode);
}
/* It is guaranteed that @name is no longer allocated at this point. */
if (MREF_ERR(mref) == -ENOENT) {
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 0da0332725aa..ffe003982d95 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -516,9 +516,9 @@ static void o2hb_bio_end_io(struct bio *bio)
{
struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
- if (bio->bi_error) {
- mlog(ML_ERROR, "IO Error %d\n", bio->bi_error);
- wc->wc_error = bio->bi_error;
+ if (bio->bi_status) {
+ mlog(ML_ERROR, "IO Error %d\n", bio->bi_status);
+ wc->wc_error = blk_status_to_errno(bio->bi_status);
}
o2hb_bio_wait_dec(wc, 1);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 3b7c937a36b5..4689940a953c 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2591,6 +2591,10 @@ void ocfs2_inode_unlock_tracker(struct inode *inode,
struct ocfs2_lock_res *lockres;
lockres = &OCFS2_I(inode)->ip_inode_lockres;
+ /* had_lock means that the currect process already takes the cluster
+ * lock previously. If had_lock is 1, we have nothing to do here, and
+ * it will get unlocked where we got the lock.
+ */
if (!had_lock) {
ocfs2_remove_holder(lockres, oh);
ocfs2_inode_unlock(inode, ex);
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 827fc9809bc2..9f88188060db 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -119,7 +119,7 @@ check_err:
if (IS_ERR(inode)) {
mlog_errno(PTR_ERR(inode));
- result = (void *)inode;
+ result = ERR_CAST(inode);
goto bail;
}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index ca1646fbcaef..83005f486451 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -2062,7 +2062,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits);
bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits);
- memcpy(sb->s_uuid, di->id2.i_super.s_uuid,
+ memcpy(&sb->s_uuid, di->id2.i_super.s_uuid,
sizeof(di->id2.i_super.s_uuid));
osb->osb_dx_mask = (1 << (cbits - bbits)) - 1;
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 3c5384d9b3a5..f70c3778d600 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1328,20 +1328,21 @@ static int ocfs2_xattr_get(struct inode *inode,
void *buffer,
size_t buffer_size)
{
- int ret;
+ int ret, had_lock;
struct buffer_head *di_bh = NULL;
+ struct ocfs2_lock_holder oh;
- ret = ocfs2_inode_lock(inode, &di_bh, 0);
- if (ret < 0) {
- mlog_errno(ret);
- return ret;
+ had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh);
+ if (had_lock < 0) {
+ mlog_errno(had_lock);
+ return had_lock;
}
down_read(&OCFS2_I(inode)->ip_xattr_sem);
ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
name, buffer, buffer_size);
up_read(&OCFS2_I(inode)->ip_xattr_sem);
- ocfs2_inode_unlock(inode, 0);
+ ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock);
brelse(di_bh);
@@ -3537,11 +3538,12 @@ int ocfs2_xattr_set(struct inode *inode,
{
struct buffer_head *di_bh = NULL;
struct ocfs2_dinode *di;
- int ret, credits, ref_meta = 0, ref_credits = 0;
+ int ret, credits, had_lock, ref_meta = 0, ref_credits = 0;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct inode *tl_inode = osb->osb_tl_inode;
struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, };
struct ocfs2_refcount_tree *ref_tree = NULL;
+ struct ocfs2_lock_holder oh;
struct ocfs2_xattr_info xi = {
.xi_name_index = name_index,
@@ -3572,8 +3574,9 @@ int ocfs2_xattr_set(struct inode *inode,
return -ENOMEM;
}
- ret = ocfs2_inode_lock(inode, &di_bh, 1);
- if (ret < 0) {
+ had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh);
+ if (had_lock < 0) {
+ ret = had_lock;
mlog_errno(ret);
goto cleanup_nolock;
}
@@ -3670,7 +3673,7 @@ cleanup:
if (ret)
mlog_errno(ret);
}
- ocfs2_inode_unlock(inode, 1);
+ ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
cleanup_nolock:
brelse(di_bh);
brelse(xbs.xattr_bh);
diff --git a/fs/open.c b/fs/open.c
index cd0c5be8d012..3fe0c4aa7d27 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -759,6 +759,7 @@ static int do_dentry_open(struct file *f,
likely(f->f_op->write || f->f_op->write_iter))
f->f_mode |= FMODE_CAN_WRITE;
+ f->f_write_hint = WRITE_LIFE_NOT_SET;
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c
index 83b506020718..038d67545d9f 100644
--- a/fs/orangefs/orangefs-bufmap.c
+++ b/fs/orangefs/orangefs-bufmap.c
@@ -46,8 +46,8 @@ static void run_down(struct slot_map *m)
spin_lock(&m->q.lock);
if (m->c != -1) {
for (;;) {
- if (likely(list_empty(&wait.task_list)))
- __add_wait_queue_tail(&m->q, &wait);
+ if (likely(list_empty(&wait.entry)))
+ __add_wait_queue_entry_tail(&m->q, &wait);
set_current_state(TASK_UNINTERRUPTIBLE);
if (m->c == -1)
@@ -84,8 +84,8 @@ static int wait_for_free(struct slot_map *m)
do {
long n = left, t;
- if (likely(list_empty(&wait.task_list)))
- __add_wait_queue_tail_exclusive(&m->q, &wait);
+ if (likely(list_empty(&wait.entry)))
+ __add_wait_queue_entry_tail_exclusive(&m->q, &wait);
set_current_state(TASK_INTERRUPTIBLE);
if (m->c > 0)
@@ -108,8 +108,8 @@ static int wait_for_free(struct slot_map *m)
left = -EINTR;
} while (left > 0);
- if (!list_empty(&wait.task_list))
- list_del(&wait.task_list);
+ if (!list_empty(&wait.entry))
+ list_del(&wait.entry);
else if (left <= 0 && waitqueue_active(&m->q))
__wake_up_locked_key(&m->q, TASK_INTERRUPTIBLE, NULL);
__set_current_state(TASK_RUNNING);
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index 0daac5112f7a..c0c9683934b7 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -1,5 +1,6 @@
config OVERLAY_FS
tristate "Overlay filesystem support"
+ select EXPORTFS
help
An overlay filesystem combines two filesystems - an 'upper' filesystem
and a 'lower' filesystem. When a name exists in both filesystems, the
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 9008ab9fbd2e..e5869f91b3ab 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -233,7 +233,7 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
return err;
}
-static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_be *uuid)
+static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_t *uuid)
{
struct ovl_fh *fh;
int fh_type, fh_len, dwords;
@@ -284,7 +284,6 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
struct dentry *upper)
{
struct super_block *sb = lower->d_sb;
- uuid_be *uuid = (uuid_be *) &sb->s_uuid;
const struct ovl_fh *fh = NULL;
int err;
@@ -294,13 +293,17 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
* up and a pure upper inode.
*/
if (sb->s_export_op && sb->s_export_op->fh_to_dentry &&
- uuid_be_cmp(*uuid, NULL_UUID_BE)) {
- fh = ovl_encode_fh(lower, uuid);
+ !uuid_is_null(&sb->s_uuid)) {
+ fh = ovl_encode_fh(lower, &sb->s_uuid);
if (IS_ERR(fh))
return PTR_ERR(fh);
}
- err = ovl_do_setxattr(upper, OVL_XATTR_ORIGIN, fh, fh ? fh->len : 0, 0);
+ /*
+ * Do not fail when upper doesn't support xattrs.
+ */
+ err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh,
+ fh ? fh->len : 0, 0);
kfree(fh);
return err;
@@ -326,15 +329,9 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
.link = link
};
- upper = lookup_one_len(dentry->d_name.name, upperdir,
- dentry->d_name.len);
- err = PTR_ERR(upper);
- if (IS_ERR(upper))
- goto out;
-
err = security_inode_copy_up(dentry, &new_creds);
if (err < 0)
- goto out1;
+ goto out;
if (new_creds)
old_creds = override_creds(new_creds);
@@ -342,13 +339,14 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
if (tmpfile)
temp = ovl_do_tmpfile(upperdir, stat->mode);
else
- temp = ovl_lookup_temp(workdir, dentry);
- err = PTR_ERR(temp);
- if (IS_ERR(temp))
- goto out1;
-
+ temp = ovl_lookup_temp(workdir);
err = 0;
- if (!tmpfile)
+ if (IS_ERR(temp)) {
+ err = PTR_ERR(temp);
+ temp = NULL;
+ }
+
+ if (!err && !tmpfile)
err = ovl_create_real(wdir, temp, &cattr, NULL, true);
if (new_creds) {
@@ -357,7 +355,7 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
}
if (err)
- goto out2;
+ goto out;
if (S_ISREG(stat->mode)) {
struct path upperpath;
@@ -393,10 +391,23 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
/*
* Store identifier of lower inode in upper inode xattr to
* allow lookup of the copy up origin inode.
+ *
+ * Don't set origin when we are breaking the association with a lower
+ * hard link.
*/
- err = ovl_set_origin(dentry, lowerpath->dentry, temp);
- if (err)
+ if (S_ISDIR(stat->mode) || stat->nlink == 1) {
+ err = ovl_set_origin(dentry, lowerpath->dentry, temp);
+ if (err)
+ goto out_cleanup;
+ }
+
+ upper = lookup_one_len(dentry->d_name.name, upperdir,
+ dentry->d_name.len);
+ if (IS_ERR(upper)) {
+ err = PTR_ERR(upper);
+ upper = NULL;
goto out_cleanup;
+ }
if (tmpfile)
err = ovl_do_link(temp, udir, upper, true);
@@ -411,17 +422,15 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
/* Restore timestamps on parent (best effort) */
ovl_set_timestamps(upperdir, pstat);
-out2:
+out:
dput(temp);
-out1:
dput(upper);
-out:
return err;
out_cleanup:
if (!tmpfile)
ovl_cleanup(wdir, temp);
- goto out2;
+ goto out;
}
/*
@@ -454,6 +463,11 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
ovl_path_upper(parent, &parentpath);
upperdir = parentpath.dentry;
+ /* Mark parent "impure" because it may now contain non-pure upper */
+ err = ovl_set_impure(parent, upperdir);
+ if (err)
+ return err;
+
err = vfs_getattr(&parentpath, &pstat,
STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT);
if (err)
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 723b98b90698..a63a71656e9b 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -41,7 +41,7 @@ void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
}
}
-struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry)
+struct dentry *ovl_lookup_temp(struct dentry *workdir)
{
struct dentry *temp;
char name[20];
@@ -68,7 +68,7 @@ static struct dentry *ovl_whiteout(struct dentry *workdir,
struct dentry *whiteout;
struct inode *wdir = workdir->d_inode;
- whiteout = ovl_lookup_temp(workdir, dentry);
+ whiteout = ovl_lookup_temp(workdir);
if (IS_ERR(whiteout))
return whiteout;
@@ -127,17 +127,28 @@ int ovl_create_real(struct inode *dir, struct dentry *newdentry,
return err;
}
-static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
+static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper,
+ int xerr)
{
int err;
- err = ovl_do_setxattr(upperdentry, OVL_XATTR_OPAQUE, "y", 1, 0);
+ err = ovl_check_setxattr(dentry, upper, OVL_XATTR_OPAQUE, "y", 1, xerr);
if (!err)
ovl_dentry_set_opaque(dentry);
return err;
}
+static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
+{
+ /*
+ * Fail with -EIO when trying to create opaque dir and upper doesn't
+ * support xattrs. ovl_rename() calls ovl_set_opaque_xerr(-EXDEV) to
+ * return a specific error for noxattr case.
+ */
+ return ovl_set_opaque_xerr(dentry, upperdentry, -EIO);
+}
+
/* Common operations required to be done after creation of file on upper */
static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
struct dentry *newdentry, bool hardlink)
@@ -162,6 +173,11 @@ static bool ovl_type_merge(struct dentry *dentry)
return OVL_TYPE_MERGE(ovl_path_type(dentry));
}
+static bool ovl_type_origin(struct dentry *dentry)
+{
+ return OVL_TYPE_ORIGIN(ovl_path_type(dentry));
+}
+
static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
struct cattr *attr, struct dentry *hardlink)
{
@@ -250,7 +266,7 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
if (upper->d_parent->d_inode != udir)
goto out_unlock;
- opaquedir = ovl_lookup_temp(workdir, dentry);
+ opaquedir = ovl_lookup_temp(workdir);
err = PTR_ERR(opaquedir);
if (IS_ERR(opaquedir))
goto out_unlock;
@@ -382,7 +398,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
if (err)
goto out;
- newdentry = ovl_lookup_temp(workdir, dentry);
+ newdentry = ovl_lookup_temp(workdir);
err = PTR_ERR(newdentry);
if (IS_ERR(newdentry))
goto out_unlock;
@@ -846,18 +862,16 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir)
if (IS_ERR(redirect))
return PTR_ERR(redirect);
- err = ovl_do_setxattr(ovl_dentry_upper(dentry), OVL_XATTR_REDIRECT,
- redirect, strlen(redirect), 0);
+ err = ovl_check_setxattr(dentry, ovl_dentry_upper(dentry),
+ OVL_XATTR_REDIRECT,
+ redirect, strlen(redirect), -EXDEV);
if (!err) {
spin_lock(&dentry->d_lock);
ovl_dentry_set_redirect(dentry, redirect);
spin_unlock(&dentry->d_lock);
} else {
kfree(redirect);
- if (err == -EOPNOTSUPP)
- ovl_clear_redirect_dir(dentry->d_sb);
- else
- pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err);
+ pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err);
/* Fall back to userspace copy-up */
err = -EXDEV;
}
@@ -943,6 +957,25 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
old_upperdir = ovl_dentry_upper(old->d_parent);
new_upperdir = ovl_dentry_upper(new->d_parent);
+ if (!samedir) {
+ /*
+ * When moving a merge dir or non-dir with copy up origin into
+ * a new parent, we are marking the new parent dir "impure".
+ * When ovl_iterate() iterates an "impure" upper dir, it will
+ * lookup the origin inodes of the entries to fill d_ino.
+ */
+ if (ovl_type_origin(old)) {
+ err = ovl_set_impure(new->d_parent, new_upperdir);
+ if (err)
+ goto out_revert_creds;
+ }
+ if (!overwrite && ovl_type_origin(new)) {
+ err = ovl_set_impure(old->d_parent, old_upperdir);
+ if (err)
+ goto out_revert_creds;
+ }
+ }
+
trap = lock_rename(new_upperdir, old_upperdir);
olddentry = lookup_one_len(old->d_name.name, old_upperdir,
@@ -992,7 +1025,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
if (ovl_type_merge_or_lower(old))
err = ovl_set_redirect(old, samedir);
else if (!old_opaque && ovl_type_merge(new->d_parent))
- err = ovl_set_opaque(old, olddentry);
+ err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
if (err)
goto out_dput;
}
@@ -1000,7 +1033,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
if (ovl_type_merge_or_lower(new))
err = ovl_set_redirect(new, samedir);
else if (!new_opaque && ovl_type_merge(old->d_parent))
- err = ovl_set_opaque(new, newdentry);
+ err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
if (err)
goto out_dput;
}
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index ad9547f82da5..d613e2c41242 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -240,6 +240,16 @@ int ovl_xattr_get(struct dentry *dentry, const char *name,
return res;
}
+static bool ovl_can_list(const char *s)
+{
+ /* List all non-trusted xatts */
+ if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
+ return true;
+
+ /* Never list trusted.overlay, list other trusted for superuser only */
+ return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN);
+}
+
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
{
struct dentry *realdentry = ovl_dentry_real(dentry);
@@ -263,7 +273,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
return -EIO;
len -= slen;
- if (ovl_is_private_xattr(s)) {
+ if (!ovl_can_list(s)) {
res -= slen;
memmove(s, s + slen, len);
} else {
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index bad0f665a635..de0d4f742f36 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -135,7 +135,7 @@ static struct dentry *ovl_get_origin(struct dentry *dentry,
* Make sure that the stored uuid matches the uuid of the lower
* layer where file handle will be decoded.
*/
- if (uuid_be_cmp(fh->uuid, *(uuid_be *) &mnt->mnt_sb->s_uuid))
+ if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
goto out;
origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
@@ -169,17 +169,7 @@ invalid:
static bool ovl_is_opaquedir(struct dentry *dentry)
{
- int res;
- char val;
-
- if (!d_is_dir(dentry))
- return false;
-
- res = vfs_getxattr(dentry, OVL_XATTR_OPAQUE, &val, 1);
- if (res == 1 && val == 'y')
- return true;
-
- return false;
+ return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE);
}
static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
@@ -351,6 +341,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
unsigned int ctr = 0;
struct inode *inode = NULL;
bool upperopaque = false;
+ bool upperimpure = false;
char *upperredirect = NULL;
struct dentry *this;
unsigned int i;
@@ -395,6 +386,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
poe = roe;
}
upperopaque = d.opaque;
+ if (upperdentry && d.is_dir)
+ upperimpure = ovl_is_impuredir(upperdentry);
}
if (!d.stop && poe->numlower) {
@@ -463,6 +456,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
revert_creds(old_cred);
oe->opaque = upperopaque;
+ oe->impure = upperimpure;
oe->redirect = upperredirect;
oe->__upperdentry = upperdentry;
memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index caa36cb9c46d..10863b4105fa 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -24,6 +24,7 @@ enum ovl_path_type {
#define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque"
#define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect"
#define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin"
+#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure"
/*
* The tuple (fh,uuid) is a universal unique identifier for a copy up origin,
@@ -55,7 +56,7 @@ struct ovl_fh {
u8 len; /* size of this header + size of fid */
u8 flags; /* OVL_FH_FLAG_* */
u8 type; /* fid_type of fid */
- uuid_be uuid; /* uuid of filesystem */
+ uuid_t uuid; /* uuid of filesystem */
u8 fid[0]; /* file identifier */
} __packed;
@@ -203,10 +204,10 @@ struct dentry *ovl_dentry_real(struct dentry *dentry);
struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry);
void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache);
bool ovl_dentry_is_opaque(struct dentry *dentry);
+bool ovl_dentry_is_impure(struct dentry *dentry);
bool ovl_dentry_is_whiteout(struct dentry *dentry);
void ovl_dentry_set_opaque(struct dentry *dentry);
bool ovl_redirect_dir(struct super_block *sb);
-void ovl_clear_redirect_dir(struct super_block *sb);
const char *ovl_dentry_get_redirect(struct dentry *dentry);
void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
@@ -219,6 +220,17 @@ bool ovl_is_whiteout(struct dentry *dentry);
struct file *ovl_path_open(struct path *path, int flags);
int ovl_copy_up_start(struct dentry *dentry);
void ovl_copy_up_end(struct dentry *dentry);
+bool ovl_check_dir_xattr(struct dentry *dentry, const char *name);
+int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
+ const char *name, const void *value, size_t size,
+ int xerr);
+int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry);
+
+static inline bool ovl_is_impuredir(struct dentry *dentry)
+{
+ return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE);
+}
+
/* namei.c */
int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
@@ -263,7 +275,7 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to)
/* dir.c */
extern const struct inode_operations ovl_dir_inode_operations;
-struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry);
+struct dentry *ovl_lookup_temp(struct dentry *workdir);
struct cattr {
dev_t rdev;
umode_t mode;
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index b2023ddb8532..34bc4a9f5c61 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -28,6 +28,7 @@ struct ovl_fs {
/* creds of process who forced instantiation of super block */
const struct cred *creator_cred;
bool tmpfile;
+ bool noxattr;
wait_queue_head_t copyup_wq;
/* sb common to all layers */
struct super_block *same_sb;
@@ -42,6 +43,7 @@ struct ovl_entry {
u64 version;
const char *redirect;
bool opaque;
+ bool impure;
bool copying;
};
struct rcu_head rcu;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 9828b7de8999..4882ffb37bae 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -891,6 +891,19 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
dput(temp);
else
pr_warn("overlayfs: upper fs does not support tmpfile.\n");
+
+ /*
+ * Check if upper/work fs supports trusted.overlay.*
+ * xattr
+ */
+ err = ovl_do_setxattr(ufs->workdir, OVL_XATTR_OPAQUE,
+ "0", 1, 0);
+ if (err) {
+ ufs->noxattr = true;
+ pr_warn("overlayfs: upper fs does not support xattr.\n");
+ } else {
+ vfs_removexattr(ufs->workdir, OVL_XATTR_OPAQUE);
+ }
}
}
@@ -961,7 +974,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
path_put(&workpath);
kfree(lowertmp);
- oe->__upperdentry = upperpath.dentry;
+ if (upperpath.dentry) {
+ oe->__upperdentry = upperpath.dentry;
+ oe->impure = ovl_is_impuredir(upperpath.dentry);
+ }
for (i = 0; i < numlower; i++) {
oe->lowerstack[i].dentry = stack[i].dentry;
oe->lowerstack[i].mnt = ufs->lower_mnt[i];
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index cfdea47313a1..809048913889 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -175,6 +175,13 @@ bool ovl_dentry_is_opaque(struct dentry *dentry)
return oe->opaque;
}
+bool ovl_dentry_is_impure(struct dentry *dentry)
+{
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ return oe->impure;
+}
+
bool ovl_dentry_is_whiteout(struct dentry *dentry)
{
return !dentry->d_inode && ovl_dentry_is_opaque(dentry);
@@ -191,14 +198,7 @@ bool ovl_redirect_dir(struct super_block *sb)
{
struct ovl_fs *ofs = sb->s_fs_info;
- return ofs->config.redirect_dir;
-}
-
-void ovl_clear_redirect_dir(struct super_block *sb)
-{
- struct ovl_fs *ofs = sb->s_fs_info;
-
- ofs->config.redirect_dir = false;
+ return ofs->config.redirect_dir && !ofs->noxattr;
}
const char *ovl_dentry_get_redirect(struct dentry *dentry)
@@ -303,3 +303,59 @@ void ovl_copy_up_end(struct dentry *dentry)
wake_up_locked(&ofs->copyup_wq);
spin_unlock(&ofs->copyup_wq.lock);
}
+
+bool ovl_check_dir_xattr(struct dentry *dentry, const char *name)
+{
+ int res;
+ char val;
+
+ if (!d_is_dir(dentry))
+ return false;
+
+ res = vfs_getxattr(dentry, name, &val, 1);
+ if (res == 1 && val == 'y')
+ return true;
+
+ return false;
+}
+
+int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
+ const char *name, const void *value, size_t size,
+ int xerr)
+{
+ int err;
+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+
+ if (ofs->noxattr)
+ return xerr;
+
+ err = ovl_do_setxattr(upperdentry, name, value, size, 0);
+
+ if (err == -EOPNOTSUPP) {
+ pr_warn("overlayfs: cannot set %s xattr on upper\n", name);
+ ofs->noxattr = true;
+ return xerr;
+ }
+
+ return err;
+}
+
+int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry)
+{
+ int err;
+ struct ovl_entry *oe = dentry->d_fsdata;
+
+ if (oe->impure)
+ return 0;
+
+ /*
+ * Do not fail when upper doesn't support xattrs.
+ * Upper inodes won't have origin nor redirect xattr anyway.
+ */
+ err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE,
+ "y", 1, 0);
+ if (!err)
+ oe->impure = true;
+
+ return err;
+}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 45f6bf68fff3..f1e1927ccd48 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -821,7 +821,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
if (!mmget_not_zero(mm))
goto free;
- flags = write ? FOLL_WRITE : 0;
+ flags = FOLL_FORCE | (write ? FOLL_WRITE : 0);
while (count > 0) {
int this_len = min_t(int, count, PAGE_SIZE);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index f0c8b33d99b1..520802da059c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -300,11 +300,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
/* We don't show the stack guard page in /proc/maps */
start = vma->vm_start;
- if (stack_guard_page_start(vma, start))
- start += PAGE_SIZE;
end = vma->vm_end;
- if (stack_guard_page_end(vma, end))
- end -= PAGE_SIZE;
seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 792a4e5f9226..4d02c3b65061 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -349,48 +349,48 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record)
switch (record->type) {
case PSTORE_TYPE_DMESG:
- scnprintf(name, sizeof(name), "dmesg-%s-%lld%s",
+ scnprintf(name, sizeof(name), "dmesg-%s-%llu%s",
record->psi->name, record->id,
record->compressed ? ".enc.z" : "");
break;
case PSTORE_TYPE_CONSOLE:
- scnprintf(name, sizeof(name), "console-%s-%lld",
+ scnprintf(name, sizeof(name), "console-%s-%llu",
record->psi->name, record->id);
break;
case PSTORE_TYPE_FTRACE:
- scnprintf(name, sizeof(name), "ftrace-%s-%lld",
+ scnprintf(name, sizeof(name), "ftrace-%s-%llu",
record->psi->name, record->id);
break;
case PSTORE_TYPE_MCE:
- scnprintf(name, sizeof(name), "mce-%s-%lld",
+ scnprintf(name, sizeof(name), "mce-%s-%llu",
record->psi->name, record->id);
break;
case PSTORE_TYPE_PPC_RTAS:
- scnprintf(name, sizeof(name), "rtas-%s-%lld",
+ scnprintf(name, sizeof(name), "rtas-%s-%llu",
record->psi->name, record->id);
break;
case PSTORE_TYPE_PPC_OF:
- scnprintf(name, sizeof(name), "powerpc-ofw-%s-%lld",
+ scnprintf(name, sizeof(name), "powerpc-ofw-%s-%llu",
record->psi->name, record->id);
break;
case PSTORE_TYPE_PPC_COMMON:
- scnprintf(name, sizeof(name), "powerpc-common-%s-%lld",
+ scnprintf(name, sizeof(name), "powerpc-common-%s-%llu",
record->psi->name, record->id);
break;
case PSTORE_TYPE_PMSG:
- scnprintf(name, sizeof(name), "pmsg-%s-%lld",
+ scnprintf(name, sizeof(name), "pmsg-%s-%llu",
record->psi->name, record->id);
break;
case PSTORE_TYPE_PPC_OPAL:
- scnprintf(name, sizeof(name), "powerpc-opal-%s-%lld",
+ scnprintf(name, sizeof(name), "powerpc-opal-%s-%llu",
record->psi->name, record->id);
break;
case PSTORE_TYPE_UNKNOWN:
- scnprintf(name, sizeof(name), "unknown-%s-%lld",
+ scnprintf(name, sizeof(name), "unknown-%s-%llu",
record->psi->name, record->id);
break;
default:
- scnprintf(name, sizeof(name), "type%d-%s-%lld",
+ scnprintf(name, sizeof(name), "type%d-%s-%llu",
record->type, record->psi->name, record->id);
break;
}
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
index c416e653dc4f..58051265626f 100644
--- a/fs/pstore/internal.h
+++ b/fs/pstore/internal.h
@@ -30,5 +30,7 @@ extern void pstore_get_backend_records(struct pstore_info *psi,
extern int pstore_mkfile(struct dentry *root,
struct pstore_record *record);
extern bool pstore_is_mounted(void);
+extern void pstore_record_init(struct pstore_record *record,
+ struct pstore_info *psi);
#endif
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index d468eec9b8a6..1b6e0ff6bff5 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -474,6 +474,20 @@ static size_t copy_kmsg_to_buffer(int hsize, size_t len)
return total_len;
}
+void pstore_record_init(struct pstore_record *record,
+ struct pstore_info *psinfo)
+{
+ memset(record, 0, sizeof(*record));
+
+ record->psi = psinfo;
+
+ /* Report zeroed timestamp if called before timekeeping has resumed. */
+ if (__getnstimeofday(&record->time)) {
+ record->time.tv_sec = 0;
+ record->time.tv_nsec = 0;
+ }
+}
+
/*
* callback from kmsg_dump. (s2,l2) has the most recently
* written bytes, older bytes are in (s1,l1). Save as much
@@ -509,15 +523,14 @@ static void pstore_dump(struct kmsg_dumper *dumper,
int header_size;
int zipped_len = -1;
size_t dump_size;
- struct pstore_record record = {
- .type = PSTORE_TYPE_DMESG,
- .count = oopscount,
- .reason = reason,
- .part = part,
- .compressed = false,
- .buf = psinfo->buf,
- .psi = psinfo,
- };
+ struct pstore_record record;
+
+ pstore_record_init(&record, psinfo);
+ record.type = PSTORE_TYPE_DMESG;
+ record.count = oopscount;
+ record.reason = reason;
+ record.part = part;
+ record.buf = psinfo->buf;
if (big_oops_buf && is_locked) {
dst = big_oops_buf;
@@ -587,12 +600,12 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c)
const char *e = s + c;
while (s < e) {
- struct pstore_record record = {
- .type = PSTORE_TYPE_CONSOLE,
- .psi = psinfo,
- };
+ struct pstore_record record;
unsigned long flags;
+ pstore_record_init(&record, psinfo);
+ record.type = PSTORE_TYPE_CONSOLE;
+
if (c > psinfo->bufsize)
c = psinfo->bufsize;
@@ -640,19 +653,16 @@ static int pstore_write_user_compat(struct pstore_record *record,
if (record->buf)
return -EINVAL;
- record->buf = kmalloc(record->size, GFP_KERNEL);
- if (!record->buf)
- return -ENOMEM;
-
- if (unlikely(copy_from_user(record->buf, buf, record->size))) {
- ret = -EFAULT;
+ record->buf = memdup_user(buf, record->size);
+ if (unlikely(IS_ERR(record->buf))) {
+ ret = PTR_ERR(record->buf);
goto out;
}
ret = record->psi->write(record);
-out:
kfree(record->buf);
+out:
record->buf = NULL;
return unlikely(ret < 0) ? ret : record->size;
@@ -770,8 +780,11 @@ static void decompress_record(struct pstore_record *record)
int unzipped_len;
char *decompressed;
+ if (!record->compressed)
+ return;
+
/* Only PSTORE_TYPE_DMESG support compression. */
- if (!record->compressed || record->type != PSTORE_TYPE_DMESG) {
+ if (record->type != PSTORE_TYPE_DMESG) {
pr_warn("ignored compressed record type %d\n", record->type);
return;
}
@@ -819,6 +832,7 @@ void pstore_get_backend_records(struct pstore_info *psi,
struct dentry *root, int quiet)
{
int failed = 0;
+ unsigned int stop_loop = 65536;
if (!psi || !root)
return;
@@ -832,7 +846,7 @@ void pstore_get_backend_records(struct pstore_info *psi,
* may reallocate record.buf. On success, pstore_mkfile() will keep
* the record.buf, so free it only on failure.
*/
- for (;;) {
+ for (; stop_loop; stop_loop--) {
struct pstore_record *record;
int rc;
@@ -841,13 +855,15 @@ void pstore_get_backend_records(struct pstore_info *psi,
pr_err("out of memory creating record\n");
break;
}
- record->psi = psi;
+ pstore_record_init(record, psi);
record->size = psi->read(record);
/* No more records left in backend? */
- if (record->size <= 0)
+ if (record->size <= 0) {
+ kfree(record);
break;
+ }
decompress_record(record);
rc = pstore_mkfile(root, record);
@@ -865,8 +881,11 @@ out:
mutex_unlock(&psi->read_mutex);
if (failed)
- pr_warn("failed to load %d record(s) from '%s'\n",
+ pr_warn("failed to create %d record(s) from '%s'\n",
failed, psi->name);
+ if (!stop_loop)
+ pr_err("looping? Too many records seen from '%s'\n",
+ psi->name);
}
static void pstore_dowork(struct work_struct *work)
diff --git a/fs/pstore/pmsg.c b/fs/pstore/pmsg.c
index 209755e0d7c8..24db02de1787 100644
--- a/fs/pstore/pmsg.c
+++ b/fs/pstore/pmsg.c
@@ -22,16 +22,16 @@ static DEFINE_MUTEX(pmsg_lock);
static ssize_t write_pmsg(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
- struct pstore_record record = {
- .type = PSTORE_TYPE_PMSG,
- .size = count,
- .psi = psinfo,
- };
+ struct pstore_record record;
int ret;
if (!count)
return 0;
+ pstore_record_init(&record, psinfo);
+ record.type = PSTORE_TYPE_PMSG;
+ record.size = count;
+
/* check outside lock, page in any data. write_user also checks */
if (!access_ok(VERIFY_READ, buf, count))
return -EFAULT;
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 5cb022c8cd33..7125b398d312 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -27,7 +27,6 @@
#include <linux/module.h>
#include <linux/version.h>
#include <linux/pstore.h>
-#include <linux/time.h>
#include <linux/io.h>
#include <linux/ioport.h>
#include <linux/platform_device.h>
@@ -356,20 +355,15 @@ out:
}
static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz,
- bool compressed)
+ struct pstore_record *record)
{
char *hdr;
- struct timespec timestamp;
size_t len;
- /* Report zeroed timestamp if called before timekeeping has resumed. */
- if (__getnstimeofday(&timestamp)) {
- timestamp.tv_sec = 0;
- timestamp.tv_nsec = 0;
- }
hdr = kasprintf(GFP_ATOMIC, RAMOOPS_KERNMSG_HDR "%lu.%lu-%c\n",
- (long)timestamp.tv_sec, (long)(timestamp.tv_nsec / 1000),
- compressed ? 'C' : 'D');
+ record->time.tv_sec,
+ record->time.tv_nsec / 1000,
+ record->compressed ? 'C' : 'D');
WARN_ON_ONCE(!hdr);
len = hdr ? strlen(hdr) : 0;
persistent_ram_write(prz, hdr, len);
@@ -440,7 +434,7 @@ static int notrace ramoops_pstore_write(struct pstore_record *record)
prz = cxt->dprzs[cxt->dump_write_cnt];
/* Build header and append record contents. */
- hlen = ramoops_write_kmsg_hdr(prz, record->compressed);
+ hlen = ramoops_write_kmsg_hdr(prz, record);
size = record->size;
if (size + hlen > prz->buffer_size)
size = prz->buffer_size - hlen;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index ebf80c7739e1..48813aeaab80 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1512,6 +1512,22 @@ int dquot_initialize(struct inode *inode)
}
EXPORT_SYMBOL(dquot_initialize);
+bool dquot_initialize_needed(struct inode *inode)
+{
+ struct dquot **dquots;
+ int i;
+
+ if (!dquot_active(inode))
+ return false;
+
+ dquots = i_dquot(inode);
+ for (i = 0; i < MAXQUOTAS; i++)
+ if (!dquots[i] && sb_has_quota_active(inode->i_sb, i))
+ return true;
+ return false;
+}
+EXPORT_SYMBOL(dquot_initialize_needed);
+
/*
* Release all quotas referenced by inode.
*
diff --git a/fs/read_write.c b/fs/read_write.c
index 47c1d4484df9..d591eeed061f 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -678,16 +678,10 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
struct kiocb kiocb;
ssize_t ret;
- if (flags & ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC))
- return -EOPNOTSUPP;
-
init_sync_kiocb(&kiocb, filp);
- if (flags & RWF_HIPRI)
- kiocb.ki_flags |= IOCB_HIPRI;
- if (flags & RWF_DSYNC)
- kiocb.ki_flags |= IOCB_DSYNC;
- if (flags & RWF_SYNC)
- kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
+ ret = kiocb_set_rw_flags(&kiocb, flags);
+ if (ret)
+ return ret;
kiocb.ki_pos = *ppos;
if (type == READ)
@@ -1285,7 +1279,7 @@ static size_t compat_writev(struct file *file,
if (!(file->f_mode & FMODE_CAN_WRITE))
goto out;
- ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, 0);
+ ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, flags);
out:
if (ret > 0)
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index da01f497180a..a11d773e5ff3 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1112,7 +1112,7 @@ static int flush_commit_list(struct super_block *s,
depth = reiserfs_write_unlock_nested(s);
if (reiserfs_barrier_flush(s))
__sync_dirty_buffer(jl->j_commit_bh,
- REQ_PREFLUSH | REQ_FUA);
+ REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
else
sync_dirty_buffer(jl->j_commit_bh);
reiserfs_write_lock_nested(s, depth);
@@ -1271,7 +1271,7 @@ static int _update_journal_header_block(struct super_block *sb,
if (reiserfs_barrier_flush(sb))
__sync_dirty_buffer(journal->j_header_bh,
- REQ_PREFLUSH | REQ_FUA);
+ REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
else
sync_dirty_buffer(journal->j_header_bh);
@@ -2956,7 +2956,7 @@ void reiserfs_wait_on_write_block(struct super_block *s)
static void queue_log_writer(struct super_block *s)
{
- wait_queue_t wait;
+ wait_queue_entry_t wait;
struct reiserfs_journal *journal = SB_JOURNAL(s);
set_bit(J_WRITERS_QUEUED, &journal->j_state);
diff --git a/fs/select.c b/fs/select.c
index d6c652a31e99..5b524a977d91 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -180,7 +180,7 @@ static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
return table->entry++;
}
-static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int __pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
struct poll_wqueues *pwq = wait->private;
DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
@@ -206,7 +206,7 @@ static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
return default_wake_function(&dummy_wait, mode, sync, key);
}
-static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int pollwake(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
struct poll_table_entry *entry;
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 7e3d71109f51..593b022ac11b 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -43,7 +43,7 @@ void signalfd_cleanup(struct sighand_struct *sighand)
if (likely(!waitqueue_active(wqh)))
return;
- /* wait_queue_t->func(POLLFREE) should do remove_wait_queue() */
+ /* wait_queue_entry_t->func(POLLFREE) should do remove_wait_queue() */
wake_up_poll(wqh, POLLHUP | POLLFREE);
}
diff --git a/fs/stat.c b/fs/stat.c
index f494b182c7c7..c35610845ab1 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -672,6 +672,7 @@ void __inode_add_bytes(struct inode *inode, loff_t bytes)
inode->i_bytes -= 512;
}
}
+EXPORT_SYMBOL(__inode_add_bytes);
void inode_add_bytes(struct inode *inode, loff_t bytes)
{
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index a0376a2c1c29..f80be4c5df9d 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -82,7 +82,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
ufs_error (sb, "ufs_free_fragments",
"bit already cleared for fragment %u", i);
}
-
+
+ inode_sub_bytes(inode, count << uspi->s_fshift);
fs32_add(sb, &ucg->cg_cs.cs_nffree, count);
uspi->cs_total.cs_nffree += count;
fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count);
@@ -184,6 +185,7 @@ do_more:
ufs_error(sb, "ufs_free_blocks", "freeing free fragment");
}
ubh_setblock(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
+ inode_sub_bytes(inode, uspi->s_fpb << uspi->s_fshift);
if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
ufs_clusteracct (sb, ucpi, blkno, 1);
@@ -398,10 +400,12 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
/*
* There is not enough space for user on the device
*/
- if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(uspi, UFS_MINFREE) <= 0) {
- mutex_unlock(&UFS_SB(sb)->s_lock);
- UFSD("EXIT (FAILED)\n");
- return 0;
+ if (unlikely(ufs_freefrags(uspi) <= uspi->s_root_blocks)) {
+ if (!capable(CAP_SYS_RESOURCE)) {
+ mutex_unlock(&UFS_SB(sb)->s_lock);
+ UFSD("EXIT (FAILED)\n");
+ return 0;
+ }
}
if (goal >= uspi->s_size)
@@ -419,12 +423,12 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
if (result) {
ufs_clear_frags(inode, result + oldcount,
newcount - oldcount, locked_page != NULL);
+ *err = 0;
write_seqlock(&UFS_I(inode)->meta_lock);
ufs_cpu_to_data_ptr(sb, p, result);
- write_sequnlock(&UFS_I(inode)->meta_lock);
- *err = 0;
UFS_I(inode)->i_lastfrag =
max(UFS_I(inode)->i_lastfrag, fragment + count);
+ write_sequnlock(&UFS_I(inode)->meta_lock);
}
mutex_unlock(&UFS_SB(sb)->s_lock);
UFSD("EXIT, result %llu\n", (unsigned long long)result);
@@ -437,8 +441,10 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
result = ufs_add_fragments(inode, tmp, oldcount, newcount);
if (result) {
*err = 0;
+ read_seqlock_excl(&UFS_I(inode)->meta_lock);
UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
fragment + count);
+ read_sequnlock_excl(&UFS_I(inode)->meta_lock);
ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
locked_page != NULL);
mutex_unlock(&UFS_SB(sb)->s_lock);
@@ -449,39 +455,29 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
/*
* allocate new block and move data
*/
- switch (fs32_to_cpu(sb, usb1->fs_optim)) {
- case UFS_OPTSPACE:
+ if (fs32_to_cpu(sb, usb1->fs_optim) == UFS_OPTSPACE) {
request = newcount;
- if (uspi->s_minfree < 5 || uspi->cs_total.cs_nffree
- > uspi->s_dsize * uspi->s_minfree / (2 * 100))
- break;
- usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
- break;
- default:
- usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
-
- case UFS_OPTTIME:
+ if (uspi->cs_total.cs_nffree < uspi->s_space_to_time)
+ usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
+ } else {
request = uspi->s_fpb;
- if (uspi->cs_total.cs_nffree < uspi->s_dsize *
- (uspi->s_minfree - 2) / 100)
- break;
- usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTTIME);
- break;
+ if (uspi->cs_total.cs_nffree > uspi->s_time_to_space)
+ usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTSPACE);
}
result = ufs_alloc_fragments (inode, cgno, goal, request, err);
if (result) {
ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
locked_page != NULL);
+ mutex_unlock(&UFS_SB(sb)->s_lock);
ufs_change_blocknr(inode, fragment - oldcount, oldcount,
uspi->s_sbbase + tmp,
uspi->s_sbbase + result, locked_page);
+ *err = 0;
write_seqlock(&UFS_I(inode)->meta_lock);
ufs_cpu_to_data_ptr(sb, p, result);
- write_sequnlock(&UFS_I(inode)->meta_lock);
- *err = 0;
UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
fragment + count);
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ write_sequnlock(&UFS_I(inode)->meta_lock);
if (newcount < request)
ufs_free_fragments (inode, result + newcount, request - newcount);
ufs_free_fragments (inode, tmp, oldcount);
@@ -494,6 +490,20 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
return 0;
}
+static bool try_add_frags(struct inode *inode, unsigned frags)
+{
+ unsigned size = frags * i_blocksize(inode);
+ spin_lock(&inode->i_lock);
+ __inode_add_bytes(inode, size);
+ if (unlikely((u32)inode->i_blocks != inode->i_blocks)) {
+ __inode_sub_bytes(inode, size);
+ spin_unlock(&inode->i_lock);
+ return false;
+ }
+ spin_unlock(&inode->i_lock);
+ return true;
+}
+
static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
unsigned oldcount, unsigned newcount)
{
@@ -530,6 +540,9 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
for (i = oldcount; i < newcount; i++)
if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i))
return 0;
+
+ if (!try_add_frags(inode, count))
+ return 0;
/*
* Block can be extended
*/
@@ -647,6 +660,7 @@ cg_found:
ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i);
i = uspi->s_fpb - count;
+ inode_sub_bytes(inode, i << uspi->s_fshift);
fs32_add(sb, &ucg->cg_cs.cs_nffree, i);
uspi->cs_total.cs_nffree += i;
fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, i);
@@ -657,6 +671,8 @@ cg_found:
result = ufs_bitmap_search (sb, ucpi, goal, allocsize);
if (result == INVBLOCK)
return 0;
+ if (!try_add_frags(inode, count))
+ return 0;
for (i = 0; i < count; i++)
ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, result + i);
@@ -716,6 +732,8 @@ norot:
return INVBLOCK;
ucpi->c_rotor = result;
gotit:
+ if (!try_add_frags(inode, uspi->s_fpb))
+ return 0;
blkno = ufs_fragstoblks(result);
ubh_clrblock (UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 7e41aee7b69a..f36d6a53687d 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -235,7 +235,8 @@ ufs_extend_tail(struct inode *inode, u64 writes_to,
p = ufs_get_direct_data_ptr(uspi, ufsi, block);
tmp = ufs_new_fragments(inode, p, lastfrag, ufs_data_ptr_to_cpu(sb, p),
- new_size, err, locked_page);
+ new_size - (lastfrag & uspi->s_fpbmask), err,
+ locked_page);
return tmp != 0;
}
@@ -284,7 +285,7 @@ ufs_inode_getfrag(struct inode *inode, unsigned index,
goal += uspi->s_fpb;
}
tmp = ufs_new_fragments(inode, p, ufs_blknum(new_fragment),
- goal, uspi->s_fpb, err, locked_page);
+ goal, nfrags, err, locked_page);
if (!tmp) {
*err = -ENOSPC;
@@ -400,11 +401,20 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
u64 phys64 = 0;
unsigned frag = fragment & uspi->s_fpbmask;
- if (!create) {
- phys64 = ufs_frag_map(inode, offsets, depth);
- goto out;
- }
+ phys64 = ufs_frag_map(inode, offsets, depth);
+ if (!create)
+ goto done;
+ if (phys64) {
+ if (fragment >= UFS_NDIR_FRAGMENT)
+ goto done;
+ read_seqlock_excl(&UFS_I(inode)->meta_lock);
+ if (fragment < UFS_I(inode)->i_lastfrag) {
+ read_sequnlock_excl(&UFS_I(inode)->meta_lock);
+ goto done;
+ }
+ read_sequnlock_excl(&UFS_I(inode)->meta_lock);
+ }
/* This code entered only while writing ....? */
mutex_lock(&UFS_I(inode)->truncate_mutex);
@@ -448,6 +458,11 @@ out:
}
mutex_unlock(&UFS_I(inode)->truncate_mutex);
return err;
+
+done:
+ if (phys64)
+ map_bh(bh_result, sb, phys64 + frag);
+ return 0;
}
static int ufs_writepage(struct page *page, struct writeback_control *wbc)
@@ -551,10 +566,8 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
*/
inode->i_mode = mode = fs16_to_cpu(sb, ufs_inode->ui_mode);
set_nlink(inode, fs16_to_cpu(sb, ufs_inode->ui_nlink));
- if (inode->i_nlink == 0) {
- ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino);
- return -1;
- }
+ if (inode->i_nlink == 0)
+ return -ESTALE;
/*
* Linux now has 32-bit uid and gid, so we can support EFT.
@@ -563,9 +576,9 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
i_gid_write(inode, ufs_get_inode_gid(sb, ufs_inode));
inode->i_size = fs64_to_cpu(sb, ufs_inode->ui_size);
- inode->i_atime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec);
- inode->i_ctime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_ctime.tv_sec);
- inode->i_mtime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_mtime.tv_sec);
+ inode->i_atime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec);
+ inode->i_ctime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_ctime.tv_sec);
+ inode->i_mtime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_mtime.tv_sec);
inode->i_mtime.tv_nsec = 0;
inode->i_atime.tv_nsec = 0;
inode->i_ctime.tv_nsec = 0;
@@ -599,10 +612,8 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
*/
inode->i_mode = mode = fs16_to_cpu(sb, ufs2_inode->ui_mode);
set_nlink(inode, fs16_to_cpu(sb, ufs2_inode->ui_nlink));
- if (inode->i_nlink == 0) {
- ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino);
- return -1;
- }
+ if (inode->i_nlink == 0)
+ return -ESTALE;
/*
* Linux now has 32-bit uid and gid, so we can support EFT.
@@ -642,7 +653,7 @@ struct inode *ufs_iget(struct super_block *sb, unsigned long ino)
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
struct buffer_head * bh;
struct inode *inode;
- int err;
+ int err = -EIO;
UFSD("ENTER, ino %lu\n", ino);
@@ -677,9 +688,10 @@ struct inode *ufs_iget(struct super_block *sb, unsigned long ino)
err = ufs1_read_inode(inode,
ufs_inode + ufs_inotofsbo(inode->i_ino));
}
-
+ brelse(bh);
if (err)
goto bad_inode;
+
inode->i_version++;
ufsi->i_lastfrag =
(inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift;
@@ -688,15 +700,13 @@ struct inode *ufs_iget(struct super_block *sb, unsigned long ino)
ufs_set_inode_ops(inode);
- brelse(bh);
-
UFSD("EXIT\n");
unlock_new_inode(inode);
return inode;
bad_inode:
iget_failed(inode);
- return ERR_PTR(-EIO);
+ return ERR_PTR(err);
}
static void ufs1_update_inode(struct inode *inode, struct ufs_inode *ufs_inode)
@@ -841,8 +851,11 @@ void ufs_evict_inode(struct inode * inode)
truncate_inode_pages_final(&inode->i_data);
if (want_delete) {
inode->i_size = 0;
- if (inode->i_blocks)
+ if (inode->i_blocks &&
+ (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)))
ufs_truncate_blocks(inode);
+ ufs_update_inode(inode, inode_needs_sync(inode));
}
invalidate_inode_buffers(inode);
@@ -868,7 +881,6 @@ static inline void free_data(struct to_free *ctx, u64 from, unsigned count)
ctx->to = from + count;
}
-#define DIRECT_BLOCK ((inode->i_size + uspi->s_bsize - 1) >> uspi->s_bshift)
#define DIRECT_FRAGMENT ((inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift)
static void ufs_trunc_direct(struct inode *inode)
@@ -1100,25 +1112,30 @@ out:
return err;
}
-static void __ufs_truncate_blocks(struct inode *inode)
+static void ufs_truncate_blocks(struct inode *inode)
{
struct ufs_inode_info *ufsi = UFS_I(inode);
struct super_block *sb = inode->i_sb;
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
unsigned offsets[4];
- int depth = ufs_block_to_path(inode, DIRECT_BLOCK, offsets);
+ int depth;
int depth2;
unsigned i;
struct ufs_buffer_head *ubh[3];
void *p;
u64 block;
- if (!depth)
- return;
+ if (inode->i_size) {
+ sector_t last = (inode->i_size - 1) >> uspi->s_bshift;
+ depth = ufs_block_to_path(inode, last, offsets);
+ if (!depth)
+ return;
+ } else {
+ depth = 1;
+ }
- /* find the last non-zero in offsets[] */
for (depth2 = depth - 1; depth2; depth2--)
- if (offsets[depth2])
+ if (offsets[depth2] != uspi->s_apb - 1)
break;
mutex_lock(&ufsi->truncate_mutex);
@@ -1127,9 +1144,8 @@ static void __ufs_truncate_blocks(struct inode *inode)
offsets[0] = UFS_IND_BLOCK;
} else {
/* get the blocks that should be partially emptied */
- p = ufs_get_direct_data_ptr(uspi, ufsi, offsets[0]);
+ p = ufs_get_direct_data_ptr(uspi, ufsi, offsets[0]++);
for (i = 0; i < depth2; i++) {
- offsets[i]++; /* next branch is fully freed */
block = ufs_data_ptr_to_cpu(sb, p);
if (!block)
break;
@@ -1140,7 +1156,7 @@ static void __ufs_truncate_blocks(struct inode *inode)
write_sequnlock(&ufsi->meta_lock);
break;
}
- p = ubh_get_data_ptr(uspi, ubh[i], offsets[i + 1]);
+ p = ubh_get_data_ptr(uspi, ubh[i], offsets[i + 1]++);
}
while (i--)
free_branch_tail(inode, offsets[i + 1], ubh[i], depth - i - 1);
@@ -1155,7 +1171,9 @@ static void __ufs_truncate_blocks(struct inode *inode)
free_full_branch(inode, block, i - UFS_IND_BLOCK + 1);
}
}
+ read_seqlock_excl(&ufsi->meta_lock);
ufsi->i_lastfrag = DIRECT_FRAGMENT;
+ read_sequnlock_excl(&ufsi->meta_lock);
mark_inode_dirty(inode);
mutex_unlock(&ufsi->truncate_mutex);
}
@@ -1183,7 +1201,7 @@ static int ufs_truncate(struct inode *inode, loff_t size)
truncate_setsize(inode, size);
- __ufs_truncate_blocks(inode);
+ ufs_truncate_blocks(inode);
inode->i_mtime = inode->i_ctime = current_time(inode);
mark_inode_dirty(inode);
out:
@@ -1191,16 +1209,6 @@ out:
return err;
}
-static void ufs_truncate_blocks(struct inode *inode)
-{
- if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
- S_ISLNK(inode->i_mode)))
- return;
- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
- return;
- __ufs_truncate_blocks(inode);
-}
-
int ufs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 131b2b77c818..0a4f58a5073c 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -480,7 +480,7 @@ static void ufs_setup_cstotal(struct super_block *sb)
usb3 = ubh_get_usb_third(uspi);
if ((mtype == UFS_MOUNT_UFSTYPE_44BSD &&
- (usb1->fs_flags & UFS_FLAGS_UPDATED)) ||
+ (usb2->fs_un.fs_u2.fs_maxbsize == usb1->fs_bsize)) ||
mtype == UFS_MOUNT_UFSTYPE_UFS2) {
/*we have statistic in different place, then usual*/
uspi->cs_total.cs_ndir = fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir);
@@ -596,9 +596,7 @@ static void ufs_put_cstotal(struct super_block *sb)
usb2 = ubh_get_usb_second(uspi);
usb3 = ubh_get_usb_third(uspi);
- if ((mtype == UFS_MOUNT_UFSTYPE_44BSD &&
- (usb1->fs_flags & UFS_FLAGS_UPDATED)) ||
- mtype == UFS_MOUNT_UFSTYPE_UFS2) {
+ if (mtype == UFS_MOUNT_UFSTYPE_UFS2) {
/*we have statistic in different place, then usual*/
usb2->fs_un.fs_u2.cs_ndir =
cpu_to_fs64(sb, uspi->cs_total.cs_ndir);
@@ -608,16 +606,26 @@ static void ufs_put_cstotal(struct super_block *sb)
cpu_to_fs64(sb, uspi->cs_total.cs_nifree);
usb3->fs_un1.fs_u2.cs_nffree =
cpu_to_fs64(sb, uspi->cs_total.cs_nffree);
- } else {
- usb1->fs_cstotal.cs_ndir =
- cpu_to_fs32(sb, uspi->cs_total.cs_ndir);
- usb1->fs_cstotal.cs_nbfree =
- cpu_to_fs32(sb, uspi->cs_total.cs_nbfree);
- usb1->fs_cstotal.cs_nifree =
- cpu_to_fs32(sb, uspi->cs_total.cs_nifree);
- usb1->fs_cstotal.cs_nffree =
- cpu_to_fs32(sb, uspi->cs_total.cs_nffree);
+ goto out;
+ }
+
+ if (mtype == UFS_MOUNT_UFSTYPE_44BSD &&
+ (usb2->fs_un.fs_u2.fs_maxbsize == usb1->fs_bsize)) {
+ /* store stats in both old and new places */
+ usb2->fs_un.fs_u2.cs_ndir =
+ cpu_to_fs64(sb, uspi->cs_total.cs_ndir);
+ usb2->fs_un.fs_u2.cs_nbfree =
+ cpu_to_fs64(sb, uspi->cs_total.cs_nbfree);
+ usb3->fs_un1.fs_u2.cs_nifree =
+ cpu_to_fs64(sb, uspi->cs_total.cs_nifree);
+ usb3->fs_un1.fs_u2.cs_nffree =
+ cpu_to_fs64(sb, uspi->cs_total.cs_nffree);
}
+ usb1->fs_cstotal.cs_ndir = cpu_to_fs32(sb, uspi->cs_total.cs_ndir);
+ usb1->fs_cstotal.cs_nbfree = cpu_to_fs32(sb, uspi->cs_total.cs_nbfree);
+ usb1->fs_cstotal.cs_nifree = cpu_to_fs32(sb, uspi->cs_total.cs_nifree);
+ usb1->fs_cstotal.cs_nffree = cpu_to_fs32(sb, uspi->cs_total.cs_nffree);
+out:
ubh_mark_buffer_dirty(USPI_UBH(uspi));
ufs_print_super_stuff(sb, usb1, usb2, usb3);
UFSD("EXIT\n");
@@ -746,6 +754,23 @@ static void ufs_put_super(struct super_block *sb)
return;
}
+static u64 ufs_max_bytes(struct super_block *sb)
+{
+ struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
+ int bits = uspi->s_apbshift;
+ u64 res;
+
+ if (bits > 21)
+ res = ~0ULL;
+ else
+ res = UFS_NDADDR + (1LL << bits) + (1LL << (2*bits)) +
+ (1LL << (3*bits));
+
+ if (res >= (MAX_LFS_FILESIZE >> uspi->s_bshift))
+ return MAX_LFS_FILESIZE;
+ return res << uspi->s_bshift;
+}
+
static int ufs_fill_super(struct super_block *sb, void *data, int silent)
{
struct ufs_sb_info * sbi;
@@ -812,9 +837,8 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
uspi->s_dirblksize = UFS_SECTOR_SIZE;
super_block_offset=UFS_SBLOCK;
- /* Keep 2Gig file limit. Some UFS variants need to override
- this but as I don't know which I'll let those in the know loosen
- the rules */
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+
switch (sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) {
case UFS_MOUNT_UFSTYPE_44BSD:
UFSD("ufstype=44bsd\n");
@@ -980,6 +1004,13 @@ again:
flags |= UFS_ST_SUN;
}
+ if ((flags & UFS_ST_MASK) == UFS_ST_44BSD &&
+ uspi->s_postblformat == UFS_42POSTBLFMT) {
+ if (!silent)
+ pr_err("this is not a 44bsd filesystem");
+ goto failed;
+ }
+
/*
* Check ufs magic number
*/
@@ -1127,8 +1158,8 @@ magic_found:
uspi->s_cgmask = fs32_to_cpu(sb, usb1->fs_cgmask);
if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
- uspi->s_u2_size = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size);
- uspi->s_u2_dsize = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize);
+ uspi->s_size = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size);
+ uspi->s_dsize = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize);
} else {
uspi->s_size = fs32_to_cpu(sb, usb1->fs_size);
uspi->s_dsize = fs32_to_cpu(sb, usb1->fs_dsize);
@@ -1177,6 +1208,18 @@ magic_found:
uspi->s_postbloff = fs32_to_cpu(sb, usb3->fs_postbloff);
uspi->s_rotbloff = fs32_to_cpu(sb, usb3->fs_rotbloff);
+ uspi->s_root_blocks = mul_u64_u32_div(uspi->s_dsize,
+ uspi->s_minfree, 100);
+ if (uspi->s_minfree <= 5) {
+ uspi->s_time_to_space = ~0ULL;
+ uspi->s_space_to_time = 0;
+ usb1->fs_optim = cpu_to_fs32(sb, UFS_OPTSPACE);
+ } else {
+ uspi->s_time_to_space = (uspi->s_root_blocks / 2) + 1;
+ uspi->s_space_to_time = mul_u64_u32_div(uspi->s_dsize,
+ uspi->s_minfree - 2, 100) - 1;
+ }
+
/*
* Compute another frequently used values
*/
@@ -1212,6 +1255,7 @@ magic_found:
"fast symlink size (%u)\n", uspi->s_maxsymlinklen);
uspi->s_maxsymlinklen = maxsymlen;
}
+ sb->s_maxbytes = ufs_max_bytes(sb);
sb->s_max_links = UFS_LINK_MAX;
inode = ufs_iget(sb, UFS_ROOTINO);
@@ -1365,19 +1409,17 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
mutex_lock(&UFS_SB(sb)->s_lock);
usb3 = ubh_get_usb_third(uspi);
- if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
+ if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
buf->f_type = UFS2_MAGIC;
- buf->f_blocks = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize);
- } else {
+ else
buf->f_type = UFS_MAGIC;
- buf->f_blocks = uspi->s_dsize;
- }
- buf->f_bfree = ufs_blkstofrags(uspi->cs_total.cs_nbfree) +
- uspi->cs_total.cs_nffree;
+
+ buf->f_blocks = uspi->s_dsize;
+ buf->f_bfree = ufs_freefrags(uspi);
buf->f_ffree = uspi->cs_total.cs_nifree;
buf->f_bsize = sb->s_blocksize;
- buf->f_bavail = (buf->f_bfree > (((long)buf->f_blocks / 100) * uspi->s_minfree))
- ? (buf->f_bfree - (((long)buf->f_blocks / 100) * uspi->s_minfree)) : 0;
+ buf->f_bavail = (buf->f_bfree > uspi->s_root_blocks)
+ ? (buf->f_bfree - uspi->s_root_blocks) : 0;
buf->f_files = uspi->s_ncg * uspi->s_ipg;
buf->f_namelen = UFS_MAXNAMLEN;
buf->f_fsid.val[0] = (u32)id;
diff --git a/fs/ufs/ufs_fs.h b/fs/ufs/ufs_fs.h
index 0cbd5d340b67..150eef6f1233 100644
--- a/fs/ufs/ufs_fs.h
+++ b/fs/ufs/ufs_fs.h
@@ -733,10 +733,8 @@ struct ufs_sb_private_info {
__u32 s_dblkno; /* offset of first data after cg */
__u32 s_cgoffset; /* cylinder group offset in cylinder */
__u32 s_cgmask; /* used to calc mod fs_ntrak */
- __u32 s_size; /* number of blocks (fragments) in fs */
- __u32 s_dsize; /* number of data blocks in fs */
- __u64 s_u2_size; /* ufs2: number of blocks (fragments) in fs */
- __u64 s_u2_dsize; /*ufs2: number of data blocks in fs */
+ __u64 s_size; /* number of blocks (fragments) in fs */
+ __u64 s_dsize; /* number of data blocks in fs */
__u32 s_ncg; /* number of cylinder groups */
__u32 s_bsize; /* size of basic blocks */
__u32 s_fsize; /* size of fragments */
@@ -793,6 +791,9 @@ struct ufs_sb_private_info {
__u32 s_maxsymlinklen;/* upper limit on fast symlinks' size */
__s32 fs_magic; /* filesystem magic */
unsigned int s_dirblksize;
+ __u64 s_root_blocks;
+ __u64 s_time_to_space;
+ __u64 s_space_to_time;
};
/*
diff --git a/fs/ufs/util.c b/fs/ufs/util.c
index f41ad0a6106f..02497a492eb2 100644
--- a/fs/ufs/util.c
+++ b/fs/ufs/util.c
@@ -243,9 +243,8 @@ ufs_set_inode_dev(struct super_block *sb, struct ufs_inode_info *ufsi, dev_t dev
struct page *ufs_get_locked_page(struct address_space *mapping,
pgoff_t index)
{
- struct page *page;
-
- page = find_lock_page(mapping, index);
+ struct inode *inode = mapping->host;
+ struct page *page = find_lock_page(mapping, index);
if (!page) {
page = read_mapping_page(mapping, index, NULL);
@@ -253,7 +252,7 @@ struct page *ufs_get_locked_page(struct address_space *mapping,
printk(KERN_ERR "ufs_change_blocknr: "
"read_mapping_page error: ino %lu, index: %lu\n",
mapping->host->i_ino, index);
- goto out;
+ return page;
}
lock_page(page);
@@ -262,8 +261,7 @@ struct page *ufs_get_locked_page(struct address_space *mapping,
/* Truncate got there first */
unlock_page(page);
put_page(page);
- page = NULL;
- goto out;
+ return NULL;
}
if (!PageUptodate(page) || PageError(page)) {
@@ -272,11 +270,12 @@ struct page *ufs_get_locked_page(struct address_space *mapping,
printk(KERN_ERR "ufs_change_blocknr: "
"can not read page: ino %lu, index: %lu\n",
- mapping->host->i_ino, index);
+ inode->i_ino, index);
- page = ERR_PTR(-EIO);
+ return ERR_PTR(-EIO);
}
}
-out:
+ if (!page_has_buffers(page))
+ create_empty_buffers(page, 1 << inode->i_blkbits, 0);
return page;
}
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index b7fbf53dbc81..9fc7119a1551 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -350,16 +350,11 @@ static inline void *ubh_get_data_ptr(struct ufs_sb_private_info *uspi,
#define ubh_blkmap(ubh,begin,bit) \
((*ubh_get_addr(ubh, (begin) + ((bit) >> 3)) >> ((bit) & 7)) & (0xff >> (UFS_MAXFRAG - uspi->s_fpb)))
-/*
- * Determine the number of available frags given a
- * percentage to hold in reserve.
- */
static inline u64
-ufs_freespace(struct ufs_sb_private_info *uspi, int percentreserved)
+ufs_freefrags(struct ufs_sb_private_info *uspi)
{
return ufs_blkstofrags(uspi->cs_total.cs_nbfree) +
- uspi->cs_total.cs_nffree -
- (uspi->s_dsize * (percentreserved) / 100);
+ uspi->cs_total.cs_nffree;
}
/*
@@ -473,15 +468,19 @@ static inline unsigned _ubh_find_last_zero_bit_(
static inline int _ubh_isblockset_(struct ufs_sb_private_info * uspi,
struct ufs_buffer_head * ubh, unsigned begin, unsigned block)
{
+ u8 mask;
switch (uspi->s_fpb) {
case 8:
return (*ubh_get_addr (ubh, begin + block) == 0xff);
case 4:
- return (*ubh_get_addr (ubh, begin + (block >> 1)) == (0x0f << ((block & 0x01) << 2)));
+ mask = 0x0f << ((block & 0x01) << 2);
+ return (*ubh_get_addr (ubh, begin + (block >> 1)) & mask) == mask;
case 2:
- return (*ubh_get_addr (ubh, begin + (block >> 2)) == (0x03 << ((block & 0x03) << 1)));
+ mask = 0x03 << ((block & 0x03) << 1);
+ return (*ubh_get_addr (ubh, begin + (block >> 2)) & mask) == mask;
case 1:
- return (*ubh_get_addr (ubh, begin + (block >> 3)) == (0x01 << (block & 0x07)));
+ mask = 0x01 << (block & 0x07);
+ return (*ubh_get_addr (ubh, begin + (block >> 3)) & mask) == mask;
}
return 0;
}
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index f7555fc25877..6148ccd6cccf 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -81,7 +81,7 @@ struct userfaultfd_unmap_ctx {
struct userfaultfd_wait_queue {
struct uffd_msg msg;
- wait_queue_t wq;
+ wait_queue_entry_t wq;
struct userfaultfd_ctx *ctx;
bool waken;
};
@@ -91,7 +91,7 @@ struct userfaultfd_wake_range {
unsigned long len;
};
-static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode,
+static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
int wake_flags, void *key)
{
struct userfaultfd_wake_range *range = key;
@@ -129,7 +129,7 @@ static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode,
* wouldn't be enough, the smp_mb__before_spinlock is
* enough to avoid an explicit smp_mb() here.
*/
- list_del_init(&wq->task_list);
+ list_del_init(&wq->entry);
out:
return ret;
}
@@ -340,9 +340,28 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
bool must_wait, return_to_userland;
long blocking_state;
- BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
-
ret = VM_FAULT_SIGBUS;
+
+ /*
+ * We don't do userfault handling for the final child pid update.
+ *
+ * We also don't do userfault handling during
+ * coredumping. hugetlbfs has the special
+ * follow_hugetlb_page() to skip missing pages in the
+ * FOLL_DUMP case, anon memory also checks for FOLL_DUMP with
+ * the no_page_table() helper in follow_page_mask(), but the
+ * shmem_vm_ops->fault method is invoked even during
+ * coredumping without mmap_sem and it ends up here.
+ */
+ if (current->flags & (PF_EXITING|PF_DUMPCORE))
+ goto out;
+
+ /*
+ * Coredumping runs without mmap_sem so we can only check that
+ * the mmap_sem is held, if PF_DUMPCORE was not set.
+ */
+ WARN_ON_ONCE(!rwsem_is_locked(&mm->mmap_sem));
+
ctx = vmf->vma->vm_userfaultfd_ctx.ctx;
if (!ctx)
goto out;
@@ -361,12 +380,6 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
goto out;
/*
- * We don't do userfault handling for the final child pid update.
- */
- if (current->flags & PF_EXITING)
- goto out;
-
- /*
* Check that we can return VM_FAULT_RETRY.
*
* NOTE: it should become possible to return VM_FAULT_RETRY
@@ -509,13 +522,13 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
* and it's fine not to block on the spinlock. The uwq on this
* kernel stack can be released after the list_del_init.
*/
- if (!list_empty_careful(&uwq.wq.task_list)) {
+ if (!list_empty_careful(&uwq.wq.entry)) {
spin_lock(&ctx->fault_pending_wqh.lock);
/*
* No need of list_del_init(), the uwq on the stack
* will be freed shortly anyway.
*/
- list_del(&uwq.wq.task_list);
+ list_del(&uwq.wq.entry);
spin_unlock(&ctx->fault_pending_wqh.lock);
}
@@ -847,7 +860,7 @@ wakeup:
static inline struct userfaultfd_wait_queue *find_userfault_in(
wait_queue_head_t *wqh)
{
- wait_queue_t *wq;
+ wait_queue_entry_t *wq;
struct userfaultfd_wait_queue *uwq;
VM_BUG_ON(!spin_is_locked(&wqh->lock));
@@ -856,7 +869,7 @@ static inline struct userfaultfd_wait_queue *find_userfault_in(
if (!waitqueue_active(wqh))
goto out;
/* walk in reverse to provide FIFO behavior to read userfaults */
- wq = list_last_entry(&wqh->task_list, typeof(*wq), task_list);
+ wq = list_last_entry(&wqh->head, typeof(*wq), entry);
uwq = container_of(wq, struct userfaultfd_wait_queue, wq);
out:
return uwq;
@@ -990,14 +1003,14 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
* changes __remove_wait_queue() to use
* list_del_init() in turn breaking the
* !list_empty_careful() check in
- * handle_userfault(). The uwq->wq.task_list
+ * handle_userfault(). The uwq->wq.head list
* must never be empty at any time during the
* refile, or the waitqueue could disappear
* from under us. The "wait_queue_head_t"
* parameter of __remove_wait_queue() is unused
* anyway.
*/
- list_del(&uwq->wq.task_list);
+ list_del(&uwq->wq.entry);
__add_wait_queue(&ctx->fault_wqh, &uwq->wq);
write_seqcount_end(&ctx->refile_seq);
@@ -1019,7 +1032,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
fork_nctx = (struct userfaultfd_ctx *)
(unsigned long)
uwq->msg.arg.reserved.reserved1;
- list_move(&uwq->wq.task_list, &fork_event);
+ list_move(&uwq->wq.entry, &fork_event);
spin_unlock(&ctx->event_wqh.lock);
ret = 0;
break;
@@ -1056,8 +1069,8 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
if (!list_empty(&fork_event)) {
uwq = list_first_entry(&fork_event,
typeof(*uwq),
- wq.task_list);
- list_del(&uwq->wq.task_list);
+ wq.entry);
+ list_del(&uwq->wq.entry);
__add_wait_queue(&ctx->event_wqh, &uwq->wq);
userfaultfd_event_complete(ctx, uwq);
}
@@ -1734,17 +1747,17 @@ static long userfaultfd_ioctl(struct file *file, unsigned cmd,
static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
{
struct userfaultfd_ctx *ctx = f->private_data;
- wait_queue_t *wq;
+ wait_queue_entry_t *wq;
struct userfaultfd_wait_queue *uwq;
unsigned long pending = 0, total = 0;
spin_lock(&ctx->fault_pending_wqh.lock);
- list_for_each_entry(wq, &ctx->fault_pending_wqh.task_list, task_list) {
+ list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) {
uwq = container_of(wq, struct userfaultfd_wait_queue, wq);
pending++;
total++;
}
- list_for_each_entry(wq, &ctx->fault_wqh.task_list, task_list) {
+ list_for_each_entry(wq, &ctx->fault_wqh.head, entry) {
uwq = container_of(wq, struct userfaultfd_wait_queue, wq);
total++;
}
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 5c90f82b8f6b..a6e955bfead8 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -98,8 +98,7 @@ xfs-y += xfs_aops.o \
xfs_sysfs.o \
xfs_trans.o \
xfs_xattr.o \
- kmem.o \
- uuid.o
+ kmem.o
# low-level transaction/log code
xfs-y += xfs_log.o \
diff --git a/fs/xfs/uuid.c b/fs/xfs/uuid.c
deleted file mode 100644
index b83f76b6d410..000000000000
--- a/fs/xfs/uuid.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include <xfs.h>
-
-/* IRIX interpretation of an uuid_t */
-typedef struct {
- __be32 uu_timelow;
- __be16 uu_timemid;
- __be16 uu_timehi;
- __be16 uu_clockseq;
- __be16 uu_node[3];
-} xfs_uu_t;
-
-/*
- * uuid_getnodeuniq - obtain the node unique fields of a UUID.
- *
- * This is not in any way a standard or condoned UUID function;
- * it just something that's needed for user-level file handles.
- */
-void
-uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
-{
- xfs_uu_t *uup = (xfs_uu_t *)uuid;
-
- fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |
- be16_to_cpu(uup->uu_timemid);
- fsid[1] = be32_to_cpu(uup->uu_timelow);
-}
-
-int
-uuid_is_nil(uuid_t *uuid)
-{
- int i;
- char *cp = (char *)uuid;
-
- if (uuid == NULL)
- return 0;
- /* implied check of version number here... */
- for (i = 0; i < sizeof *uuid; i++)
- if (*cp++) return 0; /* not nil */
- return 1; /* is nil */
-}
-
-int
-uuid_equal(uuid_t *uuid1, uuid_t *uuid2)
-{
- return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1;
-}
diff --git a/fs/xfs/uuid.h b/fs/xfs/uuid.h
deleted file mode 100644
index 104db0f3bed6..000000000000
--- a/fs/xfs/uuid.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_SUPPORT_UUID_H__
-#define __XFS_SUPPORT_UUID_H__
-
-typedef struct {
- unsigned char __u_bits[16];
-} uuid_t;
-
-extern int uuid_is_nil(uuid_t *uuid);
-extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
-extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]);
-
-static inline void
-uuid_copy(uuid_t *dst, uuid_t *src)
-{
- memcpy(dst, src, sizeof(uuid_t));
-}
-
-#endif /* __XFS_SUPPORT_UUID_H__ */
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 09af0f7cd55e..d20c29b9c95b 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -276,7 +276,7 @@ xfs_end_io(
struct xfs_inode *ip = XFS_I(ioend->io_inode);
xfs_off_t offset = ioend->io_offset;
size_t size = ioend->io_size;
- int error = ioend->io_bio->bi_error;
+ int error;
/*
* Just clean up the in-memory strutures if the fs has been shut down.
@@ -289,6 +289,7 @@ xfs_end_io(
/*
* Clean up any COW blocks on an I/O error.
*/
+ error = blk_status_to_errno(ioend->io_bio->bi_status);
if (unlikely(error)) {
switch (ioend->io_type) {
case XFS_IO_COW:
@@ -332,7 +333,7 @@ xfs_end_bio(
else if (ioend->io_append_trans)
queue_work(mp->m_data_workqueue, &ioend->io_work);
else
- xfs_destroy_ioend(ioend, bio->bi_error);
+ xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status));
}
STATIC int
@@ -500,11 +501,12 @@ xfs_submit_ioend(
* time.
*/
if (status) {
- ioend->io_bio->bi_error = status;
+ ioend->io_bio->bi_status = errno_to_blk_status(status);
bio_endio(ioend->io_bio);
return status;
}
+ ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
submit_bio(ioend->io_bio);
return 0;
}
@@ -564,6 +566,7 @@ xfs_chain_bio(
bio_chain(ioend->io_bio, new);
bio_get(ioend->io_bio); /* for xfs_destroy_ioend */
ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
+ ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
submit_bio(ioend->io_bio);
ioend->io_bio = new;
}
@@ -1316,9 +1319,12 @@ xfs_vm_bmap(
* The swap code (ab-)uses ->bmap to get a block mapping and then
* bypasseѕ the file system for actual I/O. We really can't allow
* that on reflinks inodes, so we have to skip out here. And yes,
- * 0 is the magic code for a bmap error..
+ * 0 is the magic code for a bmap error.
+ *
+ * Since we don't pass back blockdev info, we can't return bmap
+ * information for rt files either.
*/
- if (xfs_is_reflink_inode(ip))
+ if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip))
return 0;
filemap_write_and_wait(mapping);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 62fa39276a24..438505f395e7 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -97,12 +97,16 @@ static inline void
xfs_buf_ioacct_inc(
struct xfs_buf *bp)
{
- if (bp->b_flags & (XBF_NO_IOACCT|_XBF_IN_FLIGHT))
+ if (bp->b_flags & XBF_NO_IOACCT)
return;
ASSERT(bp->b_flags & XBF_ASYNC);
- bp->b_flags |= _XBF_IN_FLIGHT;
- percpu_counter_inc(&bp->b_target->bt_io_count);
+ spin_lock(&bp->b_lock);
+ if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) {
+ bp->b_state |= XFS_BSTATE_IN_FLIGHT;
+ percpu_counter_inc(&bp->b_target->bt_io_count);
+ }
+ spin_unlock(&bp->b_lock);
}
/*
@@ -110,14 +114,24 @@ xfs_buf_ioacct_inc(
* freed and unaccount from the buftarg.
*/
static inline void
-xfs_buf_ioacct_dec(
+__xfs_buf_ioacct_dec(
struct xfs_buf *bp)
{
- if (!(bp->b_flags & _XBF_IN_FLIGHT))
- return;
+ lockdep_assert_held(&bp->b_lock);
- bp->b_flags &= ~_XBF_IN_FLIGHT;
- percpu_counter_dec(&bp->b_target->bt_io_count);
+ if (bp->b_state & XFS_BSTATE_IN_FLIGHT) {
+ bp->b_state &= ~XFS_BSTATE_IN_FLIGHT;
+ percpu_counter_dec(&bp->b_target->bt_io_count);
+ }
+}
+
+static inline void
+xfs_buf_ioacct_dec(
+ struct xfs_buf *bp)
+{
+ spin_lock(&bp->b_lock);
+ __xfs_buf_ioacct_dec(bp);
+ spin_unlock(&bp->b_lock);
}
/*
@@ -149,9 +163,9 @@ xfs_buf_stale(
* unaccounted (released to LRU) before that occurs. Drop in-flight
* status now to preserve accounting consistency.
*/
- xfs_buf_ioacct_dec(bp);
-
spin_lock(&bp->b_lock);
+ __xfs_buf_ioacct_dec(bp);
+
atomic_set(&bp->b_lru_ref, 0);
if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
(list_lru_del(&bp->b_target->bt_lru, &bp->b_lru)))
@@ -979,12 +993,12 @@ xfs_buf_rele(
* ensures the decrement occurs only once per-buf.
*/
if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru))
- xfs_buf_ioacct_dec(bp);
+ __xfs_buf_ioacct_dec(bp);
goto out_unlock;
}
/* the last reference has been dropped ... */
- xfs_buf_ioacct_dec(bp);
+ __xfs_buf_ioacct_dec(bp);
if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
/*
* If the buffer is added to the LRU take a new reference to the
@@ -1213,8 +1227,11 @@ xfs_buf_bio_end_io(
* don't overwrite existing errors - otherwise we can lose errors on
* buffers that require multiple bios to complete.
*/
- if (bio->bi_error)
- cmpxchg(&bp->b_io_error, 0, bio->bi_error);
+ if (bio->bi_status) {
+ int error = blk_status_to_errno(bio->bi_status);
+
+ cmpxchg(&bp->b_io_error, 0, error);
+ }
if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 8d1d44f87ce9..1508121f29f2 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -63,7 +63,6 @@ typedef enum {
#define _XBF_KMEM (1 << 21)/* backed by heap memory */
#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */
#define _XBF_COMPOUND (1 << 23)/* compound buffer */
-#define _XBF_IN_FLIGHT (1 << 25) /* I/O in flight, for accounting purposes */
typedef unsigned int xfs_buf_flags_t;
@@ -84,14 +83,14 @@ typedef unsigned int xfs_buf_flags_t;
{ _XBF_PAGES, "PAGES" }, \
{ _XBF_KMEM, "KMEM" }, \
{ _XBF_DELWRI_Q, "DELWRI_Q" }, \
- { _XBF_COMPOUND, "COMPOUND" }, \
- { _XBF_IN_FLIGHT, "IN_FLIGHT" }
+ { _XBF_COMPOUND, "COMPOUND" }
/*
* Internal state flags.
*/
#define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */
+#define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */
/*
* The xfs_buftarg contains 2 notions of "sector size" -
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 5fb5a0958a14..17f27a2fb5e2 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -237,7 +237,11 @@ xfs_file_dax_read(
if (!count)
return 0; /* skip atime */
- xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ }
ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
@@ -541,7 +545,11 @@ xfs_file_dio_aio_write(
iolock = XFS_IOLOCK_SHARED;
}
- xfs_ilock(ip, iolock);
+ if (!xfs_ilock_nowait(ip, iolock)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ xfs_ilock(ip, iolock);
+ }
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
@@ -553,9 +561,15 @@ xfs_file_dio_aio_write(
* otherwise demote the lock if we had to take the exclusive lock
* for other reasons in xfs_file_aio_write_checks.
*/
- if (unaligned_io)
- inode_dio_wait(inode);
- else if (iolock == XFS_IOLOCK_EXCL) {
+ if (unaligned_io) {
+ /* If we are going to wait for other DIO to finish, bail */
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (atomic_read(&inode->i_dio_count))
+ return -EAGAIN;
+ } else {
+ inode_dio_wait(inode);
+ }
+ } else if (iolock == XFS_IOLOCK_EXCL) {
xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
iolock = XFS_IOLOCK_SHARED;
}
@@ -585,7 +599,12 @@ xfs_file_dax_write(
size_t count;
loff_t pos;
- xfs_ilock(ip, iolock);
+ if (!xfs_ilock_nowait(ip, iolock)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ xfs_ilock(ip, iolock);
+ }
+
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
goto out;
@@ -892,6 +911,7 @@ xfs_file_open(
return -EFBIG;
if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
return -EIO;
+ file->f_mode |= FMODE_AIO_NOWAIT;
return 0;
}
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index f61c84f8e31a..b9c12e1cc23a 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -66,7 +66,6 @@ xfs_inode_alloc(
XFS_STATS_INC(mp, vn_active);
ASSERT(atomic_read(&ip->i_pincount) == 0);
- ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(!xfs_isiflocked(ip));
ASSERT(ip->i_ino == 0);
@@ -190,7 +189,7 @@ xfs_perag_set_reclaim_tag(
{
struct xfs_mount *mp = pag->pag_mount;
- ASSERT(spin_is_locked(&pag->pag_ici_lock));
+ lockdep_assert_held(&pag->pag_ici_lock);
if (pag->pag_ici_reclaimable++)
return;
@@ -212,7 +211,7 @@ xfs_perag_clear_reclaim_tag(
{
struct xfs_mount *mp = pag->pag_mount;
- ASSERT(spin_is_locked(&pag->pag_ici_lock));
+ lockdep_assert_held(&pag->pag_ici_lock);
if (--pag->pag_ici_reclaimable)
return;
@@ -270,12 +269,12 @@ xfs_inew_wait(
DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_INEW_BIT);
do {
- prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
if (!xfs_iflags_test(ip, XFS_INEW))
break;
schedule();
} while (true);
- finish_wait(wq, &wait.wait);
+ finish_wait(wq, &wait.wq_entry);
}
/*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ec9826c56500..c0a1e840a588 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -622,12 +622,12 @@ __xfs_iflock(
DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
do {
- prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_wait_exclusive(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
if (xfs_isiflocked(ip))
io_schedule();
} while (!xfs_iflock_nowait(ip));
- finish_wait(wq, &wait.wait);
+ finish_wait(wq, &wait.wq_entry);
}
STATIC uint
@@ -2486,11 +2486,11 @@ __xfs_iunpin_wait(
xfs_iunpin(ip);
do {
- prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
if (xfs_ipincount(ip))
io_schedule();
} while (xfs_ipincount(ip));
- finish_wait(wq, &wait.wait);
+ finish_wait(wq, &wait.wq_entry);
}
void
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 08cb7d1a4a3a..013cc78d7daf 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -834,9 +834,7 @@ xfs_inode_item_format_convert(
in_f->ilf_dsize = in_f32->ilf_dsize;
in_f->ilf_ino = in_f32->ilf_ino;
/* copy biggest field of ilf_u */
- memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
- in_f32->ilf_u.ilfu_uuid.__u_bits,
- sizeof(uuid_t));
+ uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f32->ilf_u.ilfu_uuid);
in_f->ilf_blkno = in_f32->ilf_blkno;
in_f->ilf_len = in_f32->ilf_len;
in_f->ilf_boffset = in_f32->ilf_boffset;
@@ -851,9 +849,7 @@ xfs_inode_item_format_convert(
in_f->ilf_dsize = in_f64->ilf_dsize;
in_f->ilf_ino = in_f64->ilf_ino;
/* copy biggest field of ilf_u */
- memcpy(in_f->ilf_u.ilfu_uuid.__u_bits,
- in_f64->ilf_u.ilfu_uuid.__u_bits,
- sizeof(uuid_t));
+ uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f64->ilf_u.ilfu_uuid);
in_f->ilf_blkno = in_f64->ilf_blkno;
in_f->ilf_len = in_f64->ilf_len;
in_f->ilf_boffset = in_f64->ilf_boffset;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 94e5bdf7304c..05dc87e8c1f5 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -995,6 +995,11 @@ xfs_file_iomap_begin(
lockmode = xfs_ilock_data_map_shared(ip);
}
+ if ((flags & IOMAP_NOWAIT) && !(ip->i_df.if_flags & XFS_IFEXTENTS)) {
+ error = -EAGAIN;
+ goto out_unlock;
+ }
+
ASSERT(offset <= mp->m_super->s_maxbytes);
if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
length = mp->m_super->s_maxbytes - offset;
@@ -1016,6 +1021,15 @@ xfs_file_iomap_begin(
if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
if (flags & IOMAP_DIRECT) {
+ /*
+ * A reflinked inode will result in CoW alloc.
+ * FIXME: It could still overwrite on unshared extents
+ * and not need allocation.
+ */
+ if (flags & IOMAP_NOWAIT) {
+ error = -EAGAIN;
+ goto out_unlock;
+ }
/* may drop and re-acquire the ilock */
error = xfs_reflink_allocate_cow(ip, &imap, &shared,
&lockmode);
@@ -1033,6 +1047,14 @@ xfs_file_iomap_begin(
if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
/*
+ * If nowait is set bail since we are going to make
+ * allocations.
+ */
+ if (flags & IOMAP_NOWAIT) {
+ error = -EAGAIN;
+ goto out_unlock;
+ }
+ /*
* We cap the maximum length we map here to MAX_WRITEBACK_PAGES
* pages to keep the chunks of work done where somewhat symmetric
* with the work writeback does. This is a completely arbitrary
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 044fb0e15390..2d167fe643ec 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -19,6 +19,7 @@
#define __XFS_LINUX__
#include <linux/types.h>
+#include <linux/uuid.h>
/*
* Kernel specific type declarations for XFS
@@ -42,7 +43,6 @@ typedef __u32 xfs_nlink_t;
#include "kmem.h"
#include "mrlock.h"
-#include "uuid.h"
#include <linux/semaphore.h>
#include <linux/mm.h>
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index cd0b077deb35..8cec1e5505a4 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -352,13 +352,13 @@ xlog_header_check_mount(
{
ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM));
- if (uuid_is_nil(&head->h_fs_uuid)) {
+ if (uuid_is_null(&head->h_fs_uuid)) {
/*
* IRIX doesn't write the h_fs_uuid or h_fmt fields. If
- * h_fs_uuid is nil, we assume this log was last mounted
+ * h_fs_uuid is null, we assume this log was last mounted
* by IRIX and continue.
*/
- xfs_warn(mp, "nil uuid in log - IRIX style log");
+ xfs_warn(mp, "null uuid in log - IRIX style log");
} else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
xfs_warn(mp, "log has mismatched uuid - can't recover");
xlog_header_check_dump(mp, head);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 2eaf81859166..d249546da15e 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -74,20 +74,19 @@ xfs_uuid_mount(
int hole, i;
/* Publish UUID in struct super_block */
- BUILD_BUG_ON(sizeof(mp->m_super->s_uuid) != sizeof(uuid_t));
- memcpy(&mp->m_super->s_uuid, uuid, sizeof(uuid_t));
+ uuid_copy(&mp->m_super->s_uuid, uuid);
if (mp->m_flags & XFS_MOUNT_NOUUID)
return 0;
- if (uuid_is_nil(uuid)) {
- xfs_warn(mp, "Filesystem has nil UUID - can't mount");
+ if (uuid_is_null(uuid)) {
+ xfs_warn(mp, "Filesystem has null UUID - can't mount");
return -EINVAL;
}
mutex_lock(&xfs_uuid_table_mutex);
for (i = 0, hole = -1; i < xfs_uuid_table_size; i++) {
- if (uuid_is_nil(&xfs_uuid_table[i])) {
+ if (uuid_is_null(&xfs_uuid_table[i])) {
hole = i;
continue;
}
@@ -124,7 +123,7 @@ xfs_uuid_unmount(
mutex_lock(&xfs_uuid_table_mutex);
for (i = 0; i < xfs_uuid_table_size; i++) {
- if (uuid_is_nil(&xfs_uuid_table[i]))
+ if (uuid_is_null(&xfs_uuid_table[i]))
continue;
if (!uuid_equal(uuid, &xfs_uuid_table[i]))
continue;
@@ -793,7 +792,10 @@ xfs_mountfs(
* Copies the low order bits of the timestamp and the randomly
* set "sequence" number out of a UUID.
*/
- uuid_getnodeuniq(&sbp->sb_uuid, mp->m_fixedfsid);
+ mp->m_fixedfsid[0] =
+ (get_unaligned_be16(&sbp->sb_uuid.b[8]) << 16) |
+ get_unaligned_be16(&sbp->sb_uuid.b[4]);
+ mp->m_fixedfsid[1] = get_unaligned_be32(&sbp->sb_uuid.b[0]);
mp->m_dmevmask = 0; /* not persistent; set after each mount */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 455a575f101d..97df4db13b2e 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1766,7 +1766,8 @@ STATIC int __init
xfs_init_zones(void)
{
xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE,
- offsetof(struct xfs_ioend, io_inline_bio));
+ offsetof(struct xfs_ioend, io_inline_bio),
+ BIOSET_NEED_BVECS);
if (!xfs_ioend_bioset)
goto out;