aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--fs/netfs/buffered_read.c19
-rw-r--r--fs/netfs/internal.h4
-rw-r--r--fs/netfs/read_collect.c6
-rw-r--r--fs/netfs/read_retry.c43
-rw-r--r--fs/netfs/stats.c9
-rw-r--r--fs/netfs/write_issue.c1
-rw-r--r--fs/netfs/write_retry.c2
-rw-r--r--include/linux/netfs.h2
-rw-r--r--include/trace/events/netfs.h4
-rw-r--r--kernel/acct.c134
10 files changed, 154 insertions, 70 deletions
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index f761d44b3436..0d1b6d35ff3b 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -155,8 +155,9 @@ static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq,
netfs_cache_read_terminated, subreq);
}
-static void netfs_issue_read(struct netfs_io_request *rreq,
- struct netfs_io_subrequest *subreq)
+static void netfs_queue_read(struct netfs_io_request *rreq,
+ struct netfs_io_subrequest *subreq,
+ bool last_subreq)
{
struct netfs_io_stream *stream = &rreq->io_streams[0];
@@ -177,8 +178,17 @@ static void netfs_issue_read(struct netfs_io_request *rreq,
}
}
+ if (last_subreq) {
+ smp_wmb(); /* Write lists before ALL_QUEUED. */
+ set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
+ }
+
spin_unlock(&rreq->lock);
+}
+static void netfs_issue_read(struct netfs_io_request *rreq,
+ struct netfs_io_subrequest *subreq)
+{
switch (subreq->source) {
case NETFS_DOWNLOAD_FROM_SERVER:
rreq->netfs_ops->issue_read(subreq);
@@ -293,11 +303,8 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
}
size -= slice;
start += slice;
- if (size <= 0) {
- smp_wmb(); /* Write lists before ALL_QUEUED. */
- set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
- }
+ netfs_queue_read(rreq, subreq, size <= 0);
netfs_issue_read(rreq, subreq);
cond_resched();
} while (size > 0);
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index eb76f98c894b..1c4f953c3d68 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -135,6 +135,8 @@ extern atomic_t netfs_n_rh_write_begin;
extern atomic_t netfs_n_rh_write_done;
extern atomic_t netfs_n_rh_write_failed;
extern atomic_t netfs_n_rh_write_zskip;
+extern atomic_t netfs_n_rh_retry_read_req;
+extern atomic_t netfs_n_rh_retry_read_subreq;
extern atomic_t netfs_n_wh_buffered_write;
extern atomic_t netfs_n_wh_writethrough;
extern atomic_t netfs_n_wh_dio_write;
@@ -147,6 +149,8 @@ extern atomic_t netfs_n_wh_upload_failed;
extern atomic_t netfs_n_wh_write;
extern atomic_t netfs_n_wh_write_done;
extern atomic_t netfs_n_wh_write_failed;
+extern atomic_t netfs_n_wh_retry_write_req;
+extern atomic_t netfs_n_wh_retry_write_subreq;
extern atomic_t netfs_n_wb_lock_skip;
extern atomic_t netfs_n_wb_lock_wait;
extern atomic_t netfs_n_folioq;
diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
index f65affa5a9e4..636cc5a98ef5 100644
--- a/fs/netfs/read_collect.c
+++ b/fs/netfs/read_collect.c
@@ -470,7 +470,8 @@ void netfs_read_collection_worker(struct work_struct *work)
*/
void netfs_wake_read_collector(struct netfs_io_request *rreq)
{
- if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) {
+ if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags) &&
+ !test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) {
if (!work_pending(&rreq->work)) {
netfs_get_request(rreq, netfs_rreq_trace_get_work);
if (!queue_work(system_unbound_wq, &rreq->work))
@@ -586,7 +587,8 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq)
smp_mb__after_atomic(); /* Clear IN_PROGRESS before task state */
/* If we are at the head of the queue, wake up the collector. */
- if (list_is_first(&subreq->rreq_link, &stream->subrequests))
+ if (list_is_first(&subreq->rreq_link, &stream->subrequests) ||
+ test_bit(NETFS_RREQ_RETRYING, &rreq->flags))
netfs_wake_read_collector(rreq);
netfs_put_subrequest(subreq, true, netfs_sreq_trace_put_terminated);
diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c
index 2290af0d51ac..0f294b26e08c 100644
--- a/fs/netfs/read_retry.c
+++ b/fs/netfs/read_retry.c
@@ -14,7 +14,7 @@ static void netfs_reissue_read(struct netfs_io_request *rreq,
{
__clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
- netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
+ netfs_stat(&netfs_n_rh_retry_read_subreq);
subreq->rreq->netfs_ops->issue_read(subreq);
}
@@ -48,6 +48,7 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
__clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
subreq->retry_count++;
netfs_reset_iter(subreq);
+ netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
netfs_reissue_read(rreq, subreq);
}
}
@@ -75,7 +76,7 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
struct iov_iter source;
unsigned long long start, len;
size_t part;
- bool boundary = false;
+ bool boundary = false, subreq_superfluous = false;
/* Go through the subreqs and find the next span of contiguous
* buffer that we then rejig (cifs, for example, needs the
@@ -116,8 +117,10 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
/* Work through the sublist. */
subreq = from;
list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) {
- if (!len)
+ if (!len) {
+ subreq_superfluous = true;
break;
+ }
subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
subreq->start = start - subreq->transferred;
subreq->len = len + subreq->transferred;
@@ -154,19 +157,21 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
netfs_reissue_read(rreq, subreq);
- if (subreq == to)
+ if (subreq == to) {
+ subreq_superfluous = false;
break;
+ }
}
/* If we managed to use fewer subreqs, we can discard the
* excess; if we used the same number, then we're done.
*/
if (!len) {
- if (subreq == to)
+ if (!subreq_superfluous)
continue;
list_for_each_entry_safe_from(subreq, tmp,
&stream->subrequests, rreq_link) {
- trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_superfluous);
list_del(&subreq->rreq_link);
netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
if (subreq == to)
@@ -187,14 +192,12 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
subreq->start = start;
subreq->len = len;
- subreq->debug_index = atomic_inc_return(&rreq->subreq_counter);
subreq->stream_nr = stream->stream_nr;
subreq->retry_count = 1;
trace_netfs_sreq_ref(rreq->debug_id, subreq->debug_index,
refcount_read(&subreq->ref),
netfs_sreq_trace_new);
- netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
list_add(&subreq->rreq_link, &to->rreq_link);
to = list_next_entry(to, rreq_link);
@@ -256,14 +259,34 @@ void netfs_retry_reads(struct netfs_io_request *rreq)
{
struct netfs_io_subrequest *subreq;
struct netfs_io_stream *stream = &rreq->io_streams[0];
+ DEFINE_WAIT(myself);
+
+ netfs_stat(&netfs_n_rh_retry_read_req);
+
+ set_bit(NETFS_RREQ_RETRYING, &rreq->flags);
/* Wait for all outstanding I/O to quiesce before performing retries as
* we may need to renegotiate the I/O sizes.
*/
list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
- wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS,
- TASK_UNINTERRUPTIBLE);
+ if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
+ continue;
+
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
+ for (;;) {
+ prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
+
+ if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
+ break;
+
+ trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for);
+ schedule();
+ trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
+ }
+
+ finish_wait(&rreq->waitq, &myself);
}
+ clear_bit(NETFS_RREQ_RETRYING, &rreq->flags);
trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit);
netfs_retry_read_subrequests(rreq);
diff --git a/fs/netfs/stats.c b/fs/netfs/stats.c
index f1af344266cc..ab6b916addc4 100644
--- a/fs/netfs/stats.c
+++ b/fs/netfs/stats.c
@@ -29,6 +29,8 @@ atomic_t netfs_n_rh_write_begin;
atomic_t netfs_n_rh_write_done;
atomic_t netfs_n_rh_write_failed;
atomic_t netfs_n_rh_write_zskip;
+atomic_t netfs_n_rh_retry_read_req;
+atomic_t netfs_n_rh_retry_read_subreq;
atomic_t netfs_n_wh_buffered_write;
atomic_t netfs_n_wh_writethrough;
atomic_t netfs_n_wh_dio_write;
@@ -41,6 +43,8 @@ atomic_t netfs_n_wh_upload_failed;
atomic_t netfs_n_wh_write;
atomic_t netfs_n_wh_write_done;
atomic_t netfs_n_wh_write_failed;
+atomic_t netfs_n_wh_retry_write_req;
+atomic_t netfs_n_wh_retry_write_subreq;
atomic_t netfs_n_wb_lock_skip;
atomic_t netfs_n_wb_lock_wait;
atomic_t netfs_n_folioq;
@@ -81,6 +85,11 @@ int netfs_stats_show(struct seq_file *m, void *v)
atomic_read(&netfs_n_wh_write),
atomic_read(&netfs_n_wh_write_done),
atomic_read(&netfs_n_wh_write_failed));
+ seq_printf(m, "Retries: rq=%u rs=%u wq=%u ws=%u\n",
+ atomic_read(&netfs_n_rh_retry_read_req),
+ atomic_read(&netfs_n_rh_retry_read_subreq),
+ atomic_read(&netfs_n_wh_retry_write_req),
+ atomic_read(&netfs_n_wh_retry_write_subreq));
seq_printf(m, "Objs : rr=%u sr=%u foq=%u wsc=%u\n",
atomic_read(&netfs_n_rh_rreq),
atomic_read(&netfs_n_rh_sreq),
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 69727411683e..77279fc5b5a7 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -253,6 +253,7 @@ void netfs_reissue_write(struct netfs_io_stream *stream,
subreq->retry_count++;
__clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
+ netfs_stat(&netfs_n_wh_retry_write_subreq);
netfs_do_issue_write(stream, subreq);
}
diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c
index c841a851dd73..545d33079a77 100644
--- a/fs/netfs/write_retry.c
+++ b/fs/netfs/write_retry.c
@@ -203,6 +203,8 @@ void netfs_retry_writes(struct netfs_io_request *wreq)
struct netfs_io_stream *stream;
int s;
+ netfs_stat(&netfs_n_wh_retry_write_req);
+
/* Wait for all outstanding I/O to quiesce before performing retries as
* we may need to renegotiate the I/O sizes.
*/
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 071d05d81d38..c86a11cfc4a3 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -278,7 +278,7 @@ struct netfs_io_request {
#define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */
#define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */
#define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */
-#define NETFS_RREQ_NEED_RETRY 14 /* Need to try retrying */
+#define NETFS_RREQ_RETRYING 14 /* Set if we're in the retry path */
#define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark
* write to cache on read */
const struct netfs_request_ops *netfs_ops;
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 6e699cadcb29..f880835f7695 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -99,7 +99,7 @@
EM(netfs_sreq_trace_limited, "LIMIT") \
EM(netfs_sreq_trace_need_clear, "N-CLR") \
EM(netfs_sreq_trace_partial_read, "PARTR") \
- EM(netfs_sreq_trace_need_retry, "NRTRY") \
+ EM(netfs_sreq_trace_need_retry, "ND-RT") \
EM(netfs_sreq_trace_prepare, "PREP ") \
EM(netfs_sreq_trace_prep_failed, "PRPFL") \
EM(netfs_sreq_trace_progress, "PRGRS") \
@@ -108,7 +108,9 @@
EM(netfs_sreq_trace_short, "SHORT") \
EM(netfs_sreq_trace_split, "SPLIT") \
EM(netfs_sreq_trace_submit, "SUBMT") \
+ EM(netfs_sreq_trace_superfluous, "SPRFL") \
EM(netfs_sreq_trace_terminated, "TERM ") \
+ EM(netfs_sreq_trace_wait_for, "_WAIT") \
EM(netfs_sreq_trace_write, "WRITE") \
EM(netfs_sreq_trace_write_skip, "SKIP ") \
E_(netfs_sreq_trace_write_term, "WTERM")
diff --git a/kernel/acct.c b/kernel/acct.c
index 31222e8cd534..6520baa13669 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -103,48 +103,50 @@ struct bsd_acct_struct {
atomic_long_t count;
struct rcu_head rcu;
struct mutex lock;
- int active;
+ bool active;
+ bool check_space;
unsigned long needcheck;
struct file *file;
struct pid_namespace *ns;
struct work_struct work;
struct completion done;
+ acct_t ac;
};
-static void do_acct_process(struct bsd_acct_struct *acct);
+static void fill_ac(struct bsd_acct_struct *acct);
+static void acct_write_process(struct bsd_acct_struct *acct);
/*
* Check the amount of free space and suspend/resume accordingly.
*/
-static int check_free_space(struct bsd_acct_struct *acct)
+static bool check_free_space(struct bsd_acct_struct *acct)
{
struct kstatfs sbuf;
- if (time_is_after_jiffies(acct->needcheck))
- goto out;
+ if (!acct->check_space)
+ return acct->active;
/* May block */
if (vfs_statfs(&acct->file->f_path, &sbuf))
- goto out;
+ return acct->active;
if (acct->active) {
u64 suspend = sbuf.f_blocks * SUSPEND;
do_div(suspend, 100);
if (sbuf.f_bavail <= suspend) {
- acct->active = 0;
+ acct->active = false;
pr_info("Process accounting paused\n");
}
} else {
u64 resume = sbuf.f_blocks * RESUME;
do_div(resume, 100);
if (sbuf.f_bavail >= resume) {
- acct->active = 1;
+ acct->active = true;
pr_info("Process accounting resumed\n");
}
}
acct->needcheck = jiffies + ACCT_TIMEOUT*HZ;
-out:
return acct->active;
}
@@ -189,7 +191,11 @@ static void acct_pin_kill(struct fs_pin *pin)
{
struct bsd_acct_struct *acct = to_acct(pin);
mutex_lock(&acct->lock);
- do_acct_process(acct);
+ /*
+ * Fill the accounting struct with the exiting task's info
+ * before punting to the workqueue.
+ */
+ fill_ac(acct);
schedule_work(&acct->work);
wait_for_completion(&acct->done);
cmpxchg(&acct->ns->bacct, pin, NULL);
@@ -202,6 +208,9 @@ static void close_work(struct work_struct *work)
{
struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work);
struct file *file = acct->file;
+
+ /* We were fired by acct_pin_kill() which holds acct->lock. */
+ acct_write_process(acct);
if (file->f_op->flush)
file->f_op->flush(file, NULL);
__fput_sync(file);
@@ -234,6 +243,20 @@ static int acct_on(struct filename *pathname)
return -EACCES;
}
+ /* Exclude kernel kernel internal filesystems. */
+ if (file_inode(file)->i_sb->s_flags & (SB_NOUSER | SB_KERNMOUNT)) {
+ kfree(acct);
+ filp_close(file, NULL);
+ return -EINVAL;
+ }
+
+ /* Exclude procfs and sysfs. */
+ if (file_inode(file)->i_sb->s_iflags & SB_I_USERNS_VISIBLE) {
+ kfree(acct);
+ filp_close(file, NULL);
+ return -EINVAL;
+ }
+
if (!(file->f_mode & FMODE_CAN_WRITE)) {
kfree(acct);
filp_close(file, NULL);
@@ -430,13 +453,27 @@ static u32 encode_float(u64 value)
* do_exit() or when switching to a different output file.
*/
-static void fill_ac(acct_t *ac)
+static void fill_ac(struct bsd_acct_struct *acct)
{
struct pacct_struct *pacct = &current->signal->pacct;
+ struct file *file = acct->file;
+ acct_t *ac = &acct->ac;
u64 elapsed, run_time;
time64_t btime;
struct tty_struct *tty;
+ lockdep_assert_held(&acct->lock);
+
+ if (time_is_after_jiffies(acct->needcheck)) {
+ acct->check_space = false;
+
+ /* Don't fill in @ac if nothing will be written. */
+ if (!acct->active)
+ return;
+ } else {
+ acct->check_space = true;
+ }
+
/*
* Fill the accounting struct with the needed info as recorded
* by the different kernel functions.
@@ -484,64 +521,61 @@ static void fill_ac(acct_t *ac)
ac->ac_majflt = encode_comp_t(pacct->ac_majflt);
ac->ac_exitcode = pacct->ac_exitcode;
spin_unlock_irq(&current->sighand->siglock);
-}
-/*
- * do_acct_process does all actual work. Caller holds the reference to file.
- */
-static void do_acct_process(struct bsd_acct_struct *acct)
-{
- acct_t ac;
- unsigned long flim;
- const struct cred *orig_cred;
- struct file *file = acct->file;
- /*
- * Accounting records are not subject to resource limits.
- */
- flim = rlimit(RLIMIT_FSIZE);
- current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
- /* Perform file operations on behalf of whoever enabled accounting */
- orig_cred = override_creds(file->f_cred);
-
- /*
- * First check to see if there is enough free_space to continue
- * the process accounting system.
- */
- if (!check_free_space(acct))
- goto out;
-
- fill_ac(&ac);
/* we really need to bite the bullet and change layout */
- ac.ac_uid = from_kuid_munged(file->f_cred->user_ns, orig_cred->uid);
- ac.ac_gid = from_kgid_munged(file->f_cred->user_ns, orig_cred->gid);
+ ac->ac_uid = from_kuid_munged(file->f_cred->user_ns, current_uid());
+ ac->ac_gid = from_kgid_munged(file->f_cred->user_ns, current_gid());
#if ACCT_VERSION == 1 || ACCT_VERSION == 2
/* backward-compatible 16 bit fields */
- ac.ac_uid16 = ac.ac_uid;
- ac.ac_gid16 = ac.ac_gid;
+ ac->ac_uid16 = ac->ac_uid;
+ ac->ac_gid16 = ac->ac_gid;
#elif ACCT_VERSION == 3
{
struct pid_namespace *ns = acct->ns;
- ac.ac_pid = task_tgid_nr_ns(current, ns);
+ ac->ac_pid = task_tgid_nr_ns(current, ns);
rcu_read_lock();
- ac.ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent),
- ns);
+ ac->ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns);
rcu_read_unlock();
}
#endif
+}
+
+static void acct_write_process(struct bsd_acct_struct *acct)
+{
+ struct file *file = acct->file;
+ const struct cred *cred;
+ acct_t *ac = &acct->ac;
+
+ /* Perform file operations on behalf of whoever enabled accounting */
+ cred = override_creds(file->f_cred);
+
/*
- * Get freeze protection. If the fs is frozen, just skip the write
- * as we could deadlock the system otherwise.
+ * First check to see if there is enough free_space to continue
+ * the process accounting system. Then get freeze protection. If
+ * the fs is frozen, just skip the write as we could deadlock
+ * the system otherwise.
*/
- if (file_start_write_trylock(file)) {
+ if (check_free_space(acct) && file_start_write_trylock(file)) {
/* it's been opened O_APPEND, so position is irrelevant */
loff_t pos = 0;
- __kernel_write(file, &ac, sizeof(acct_t), &pos);
+ __kernel_write(file, ac, sizeof(acct_t), &pos);
file_end_write(file);
}
-out:
+
+ revert_creds(cred);
+}
+
+static void do_acct_process(struct bsd_acct_struct *acct)
+{
+ unsigned long flim;
+
+ /* Accounting records are not subject to resource limits. */
+ flim = rlimit(RLIMIT_FSIZE);
+ current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
+ fill_ac(acct);
+ acct_write_process(acct);
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
- revert_creds(orig_cred);
}
/**