aboutsummaryrefslogtreecommitdiffstats
path: root/fs/dcache.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/dcache.c')
-rw-r--r--fs/dcache.c206
1 files changed, 147 insertions, 59 deletions
diff --git a/fs/dcache.c b/fs/dcache.c
index b60ddc41d783..40469044088d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -141,18 +141,25 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
* Compare 2 name strings, return 0 if they match, otherwise non-zero.
* The strings are both count bytes long, and count is non-zero.
*/
-static inline int dentry_cmp(const unsigned char *cs, size_t scount,
- const unsigned char *ct, size_t tcount)
-{
#ifdef CONFIG_DCACHE_WORD_ACCESS
- unsigned long a,b,mask;
- if (unlikely(scount != tcount))
- return 1;
+#include <asm/word-at-a-time.h>
+/*
+ * NOTE! 'cs' and 'scount' come from a dentry, so it has a
+ * aligned allocation for this particular component. We don't
+ * strictly need the load_unaligned_zeropad() safety, but it
+ * doesn't hurt either.
+ *
+ * In contrast, 'ct' and 'tcount' can be from a pathname, and do
+ * need the careful unaligned handling.
+ */
+static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
+{
+ unsigned long a,b,mask;
for (;;) {
a = *(unsigned long *)cs;
- b = *(unsigned long *)ct;
+ b = load_unaligned_zeropad(ct);
if (tcount < sizeof(unsigned long))
break;
if (unlikely(a != b))
@@ -165,10 +172,12 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,
}
mask = ~(~0ul << tcount*8);
return unlikely(!!((a ^ b) & mask));
+}
+
#else
- if (scount != tcount)
- return 1;
+static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
+{
do {
if (*cs != *ct)
return 1;
@@ -177,7 +186,32 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,
tcount--;
} while (tcount);
return 0;
+}
+
#endif
+
+static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
+{
+ const unsigned char *cs;
+ /*
+ * Be careful about RCU walk racing with rename:
+ * use ACCESS_ONCE to fetch the name pointer.
+ *
+ * NOTE! Even if a rename will mean that the length
+ * was not loaded atomically, we don't care. The
+ * RCU walk will check the sequence count eventually,
+ * and catch it. And we won't overrun the buffer,
+ * because we're reading the name pointer atomically,
+ * and a dentry name is guaranteed to be properly
+ * terminated with a NUL byte.
+ *
+ * End result: even if 'len' is wrong, we'll exit
+ * early because the data cannot match (there can
+ * be no NUL in the ct/tcount data)
+ */
+ cs = ACCESS_ONCE(dentry->d_name.name);
+ smp_read_barrier_depends();
+ return dentry_string_cmp(cs, ct, tcount);
}
static void __d_free(struct rcu_head *head)
@@ -1240,6 +1274,13 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
if (!dentry)
return NULL;
+ /*
+ * We guarantee that the inline name is always NUL-terminated.
+ * This way the memcpy() done by the name switching in rename
+ * will still always have a NUL at the end, even if we might
+ * be overwriting an internal NUL character
+ */
+ dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
if (name->len > DNAME_INLINE_LEN-1) {
dname = kmalloc(name->len + 1, GFP_KERNEL);
if (!dname) {
@@ -1249,13 +1290,16 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
} else {
dname = dentry->d_iname;
}
- dentry->d_name.name = dname;
dentry->d_name.len = name->len;
dentry->d_name.hash = name->hash;
memcpy(dname, name->name, name->len);
dname[name->len] = 0;
+ /* Make sure we always see the terminating NUL character */
+ smp_wmb();
+ dentry->d_name.name = dname;
+
dentry->d_count = 1;
dentry->d_flags = 0;
spin_lock_init(&dentry->d_lock);
@@ -1421,18 +1465,18 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
}
list_for_each_entry(alias, &inode->i_dentry, d_alias) {
- struct qstr *qstr = &alias->d_name;
-
/*
* Don't need alias->d_lock here, because aliases with
* d_parent == entry->d_parent are not subject to name or
* parent changes, because the parent inode i_mutex is held.
*/
- if (qstr->hash != hash)
+ if (alias->d_name.hash != hash)
continue;
if (alias->d_parent != entry->d_parent)
continue;
- if (dentry_cmp(qstr->name, qstr->len, name, len))
+ if (alias->d_name.len != len)
+ continue;
+ if (dentry_cmp(alias, name, len))
continue;
__dget(alias);
return alias;
@@ -1471,7 +1515,7 @@ struct dentry *d_make_root(struct inode *root_inode)
struct dentry *res = NULL;
if (root_inode) {
- static const struct qstr name = { .name = "/", .len = 1 };
+ static const struct qstr name = QSTR_INIT("/", 1);
res = __d_alloc(root_inode->i_sb, &name);
if (res)
@@ -1709,6 +1753,48 @@ err_out:
}
EXPORT_SYMBOL(d_add_ci);
+/*
+ * Do the slow-case of the dentry name compare.
+ *
+ * Unlike the dentry_cmp() function, we need to atomically
+ * load the name, length and inode information, so that the
+ * filesystem can rely on them, and can use the 'name' and
+ * 'len' information without worrying about walking off the
+ * end of memory etc.
+ *
+ * Thus the read_seqcount_retry() and the "duplicate" info
+ * in arguments (the low-level filesystem should not look
+ * at the dentry inode or name contents directly, since
+ * rename can change them while we're in RCU mode).
+ */
+enum slow_d_compare {
+ D_COMP_OK,
+ D_COMP_NOMATCH,
+ D_COMP_SEQRETRY,
+};
+
+static noinline enum slow_d_compare slow_dentry_cmp(
+ const struct dentry *parent,
+ struct inode *inode,
+ struct dentry *dentry,
+ unsigned int seq,
+ const struct qstr *name)
+{
+ int tlen = dentry->d_name.len;
+ const char *tname = dentry->d_name.name;
+ struct inode *i = dentry->d_inode;
+
+ if (read_seqcount_retry(&dentry->d_seq, seq)) {
+ cpu_relax();
+ return D_COMP_SEQRETRY;
+ }
+ if (parent->d_op->d_compare(parent, inode,
+ dentry, i,
+ tlen, tname, name))
+ return D_COMP_NOMATCH;
+ return D_COMP_OK;
+}
+
/**
* __d_lookup_rcu - search for a dentry (racy, store-free)
* @parent: parent dentry
@@ -1735,15 +1821,17 @@ EXPORT_SYMBOL(d_add_ci);
* the returned dentry, so long as its parent's seqlock is checked after the
* child is looked up. Thus, an interlocking stepping of sequence lock checks
* is formed, giving integrity down the path walk.
+ *
+ * NOTE! The caller *has* to check the resulting dentry against the sequence
+ * number we've returned before using any of the resulting dentry state!
*/
struct dentry *__d_lookup_rcu(const struct dentry *parent,
const struct qstr *name,
- unsigned *seqp, struct inode **inode)
+ unsigned *seqp, struct inode *inode)
{
- unsigned int len = name->len;
- unsigned int hash = name->hash;
+ u64 hashlen = name->hash_len;
const unsigned char *str = name->name;
- struct hlist_bl_head *b = d_hash(parent, hash);
+ struct hlist_bl_head *b = d_hash(parent, hashlen_hash(hashlen));
struct hlist_bl_node *node;
struct dentry *dentry;
@@ -1769,49 +1857,47 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent,
*/
hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
unsigned seq;
- struct inode *i;
- const char *tname;
- int tlen;
-
- if (dentry->d_name.hash != hash)
- continue;
seqretry:
- seq = read_seqcount_begin(&dentry->d_seq);
+ /*
+ * The dentry sequence count protects us from concurrent
+ * renames, and thus protects inode, parent and name fields.
+ *
+ * The caller must perform a seqcount check in order
+ * to do anything useful with the returned dentry,
+ * including using the 'd_inode' pointer.
+ *
+ * NOTE! We do a "raw" seqcount_begin here. That means that
+ * we don't wait for the sequence count to stabilize if it
+ * is in the middle of a sequence change. If we do the slow
+ * dentry compare, we will do seqretries until it is stable,
+ * and if we end up with a successful lookup, we actually
+ * want to exit RCU lookup anyway.
+ */
+ seq = raw_seqcount_begin(&dentry->d_seq);
if (dentry->d_parent != parent)
continue;
if (d_unhashed(dentry))
continue;
- tlen = dentry->d_name.len;
- tname = dentry->d_name.name;
- i = dentry->d_inode;
- prefetch(tname);
- /*
- * This seqcount check is required to ensure name and
- * len are loaded atomically, so as not to walk off the
- * edge of memory when walking. If we could load this
- * atomically some other way, we could drop this check.
- */
- if (read_seqcount_retry(&dentry->d_seq, seq))
- goto seqretry;
+ *seqp = seq;
+
if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
- if (parent->d_op->d_compare(parent, *inode,
- dentry, i,
- tlen, tname, name))
+ if (dentry->d_name.hash != hashlen_hash(hashlen))
continue;
- } else {
- if (dentry_cmp(tname, tlen, str, len))
+ switch (slow_dentry_cmp(parent, inode, dentry, seq, name)) {
+ case D_COMP_OK:
+ return dentry;
+ case D_COMP_NOMATCH:
continue;
+ default:
+ goto seqretry;
+ }
}
- /*
- * No extra seqcount check is required after the name
- * compare. The caller must perform a seqcount check in
- * order to do anything useful with the returned dentry
- * anyway.
- */
- *seqp = seq;
- *inode = i;
- return dentry;
+
+ if (dentry->d_name.hash_len != hashlen)
+ continue;
+ if (!dentry_cmp(dentry, str, hashlen_len(hashlen)))
+ return dentry;
}
return NULL;
}
@@ -1890,8 +1976,6 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
rcu_read_lock();
hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
- const char *tname;
- int tlen;
if (dentry->d_name.hash != hash)
continue;
@@ -1906,15 +1990,17 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
* It is safe to compare names since d_move() cannot
* change the qstr (protected by d_lock).
*/
- tlen = dentry->d_name.len;
- tname = dentry->d_name.name;
if (parent->d_flags & DCACHE_OP_COMPARE) {
+ int tlen = dentry->d_name.len;
+ const char *tname = dentry->d_name.name;
if (parent->d_op->d_compare(parent, parent->d_inode,
dentry, dentry->d_inode,
tlen, tname, name))
goto next;
} else {
- if (dentry_cmp(tname, tlen, str, len))
+ if (dentry->d_name.len != len)
+ goto next;
+ if (dentry_cmp(dentry, str, len))
goto next;
}
@@ -2489,7 +2575,7 @@ static int prepend_path(const struct path *path,
bool slash = false;
int error = 0;
- br_read_lock(vfsmount_lock);
+ br_read_lock(&vfsmount_lock);
while (dentry != root->dentry || vfsmnt != root->mnt) {
struct dentry * parent;
@@ -2520,7 +2606,7 @@ static int prepend_path(const struct path *path,
error = prepend(buffer, buflen, "/", 1);
out:
- br_read_unlock(vfsmount_lock);
+ br_read_unlock(&vfsmount_lock);
return error;
global_root:
@@ -3007,6 +3093,7 @@ static void __init dcache_init_early(void)
HASH_EARLY,
&d_hash_shift,
&d_hash_mask,
+ 0,
0);
for (loop = 0; loop < (1U << d_hash_shift); loop++)
@@ -3037,6 +3124,7 @@ static void __init dcache_init(void)
0,
&d_hash_shift,
&d_hash_mask,
+ 0,
0);
for (loop = 0; loop < (1U << d_hash_shift); loop++)