aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/fs/inode.c
diff options
context:
space:
mode:
authorChristian Brauner <brauner@kernel.org>2024-08-26 14:19:53 +0200
committerChristian Brauner <brauner@kernel.org>2024-08-30 08:22:40 +0200
commit41b734352c1314807e2eea610023a9d9a340070f (patch)
tree97146b25c5e92523ca723bf81de3dd5a8a2d91df /fs/inode.c
parentvfs: fix race between evice_inodes() and find_inode()&iput() (diff)
parentinode: make i_state a u32 (diff)
downloadwireguard-linux-41b734352c1314807e2eea610023a9d9a340070f.tar.xz
wireguard-linux-41b734352c1314807e2eea610023a9d9a340070f.zip
Merge patch series "fs: add i_state helpers"
Christian Brauner <brauner@kernel.org> says: I've recently looked for some free space in struct inode again because of some exec kerfuffle we had and while my idea didn't turn into anything I noticed that we often waste bytes when using wait bit operations. So I set out to switch that to another mechanism that would allow us to free up bytes. So this is an attempt to turn i_state from an unsigned long into an u32 using the individual bytes of i_state as addresses for the wait var event mechanism (Thanks to Linus for that idea.). This survives LTP, xfstests on various filesystems, and will-it-scale. * patches from https://lore.kernel.org/r/20240823-work-i_state-v3-1-5cd5fd207a57@kernel.org: inode: make i_state a u32 inode: port __I_LRU_ISOLATING to var event inode: port __I_NEW to var event inode: port __I_SYNC to var event fs: reorder i_state bits fs: add i_state helpers Link: https://lore.kernel.org/r/20240823-work-i_state-v3-1-5cd5fd207a57@kernel.org Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'fs/inode.c')
-rw-r--r--fs/inode.c70
1 files changed, 54 insertions, 16 deletions
diff --git a/fs/inode.c b/fs/inode.c
index ba1645a09603..aacd05749c1f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -472,6 +472,17 @@ static void __inode_add_lru(struct inode *inode, bool rotate)
inode->i_state |= I_REFERENCED;
}
+struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
+ struct inode *inode, u32 bit)
+{
+ void *bit_address;
+
+ bit_address = inode_state_wait_address(inode, bit);
+ init_wait_var_entry(wqe, bit_address, 0);
+ return __var_waitqueue(bit_address);
+}
+EXPORT_SYMBOL(inode_bit_waitqueue);
+
/*
* Add inode to LRU if needed (inode is unused and clean).
*
@@ -500,24 +511,35 @@ static void inode_unpin_lru_isolating(struct inode *inode)
spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_LRU_ISOLATING));
inode->i_state &= ~I_LRU_ISOLATING;
- smp_mb();
- wake_up_bit(&inode->i_state, __I_LRU_ISOLATING);
+ /* Called with inode->i_lock which ensures memory ordering. */
+ inode_wake_up_bit(inode, __I_LRU_ISOLATING);
spin_unlock(&inode->i_lock);
}
static void inode_wait_for_lru_isolating(struct inode *inode)
{
+ struct wait_bit_queue_entry wqe;
+ struct wait_queue_head *wq_head;
+
lockdep_assert_held(&inode->i_lock);
- if (inode->i_state & I_LRU_ISOLATING) {
- DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING);
- wait_queue_head_t *wqh;
+ if (!(inode->i_state & I_LRU_ISOLATING))
+ return;
- wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING);
+ wq_head = inode_bit_waitqueue(&wqe, inode, __I_LRU_ISOLATING);
+ for (;;) {
+ prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
+ /*
+ * Checking I_LRU_ISOLATING with inode->i_lock guarantees
+ * memory ordering.
+ */
+ if (!(inode->i_state & I_LRU_ISOLATING))
+ break;
spin_unlock(&inode->i_lock);
- __wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE);
+ schedule();
spin_lock(&inode->i_lock);
- WARN_ON(inode->i_state & I_LRU_ISOLATING);
}
+ finish_wait(wq_head, &wqe.wq_entry);
+ WARN_ON(inode->i_state & I_LRU_ISOLATING);
}
/**
@@ -723,7 +745,13 @@ static void evict(struct inode *inode)
* used as an indicator whether blocking on it is safe.
*/
spin_lock(&inode->i_lock);
- wake_up_bit(&inode->i_state, __I_NEW);
+ /*
+ * Pairs with the barrier in prepare_to_wait_event() to make sure
+ * ___wait_var_event() either sees the bit cleared or
+ * waitqueue_active() check in wake_up_var() sees the waiter.
+ */
+ smp_mb();
+ inode_wake_up_bit(inode, __I_NEW);
BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
spin_unlock(&inode->i_lock);
@@ -1135,8 +1163,13 @@ void unlock_new_inode(struct inode *inode)
spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_NEW));
inode->i_state &= ~I_NEW & ~I_CREATING;
+ /*
+ * Pairs with the barrier in prepare_to_wait_event() to make sure
+ * ___wait_var_event() either sees the bit cleared or
+ * waitqueue_active() check in wake_up_var() sees the waiter.
+ */
smp_mb();
- wake_up_bit(&inode->i_state, __I_NEW);
+ inode_wake_up_bit(inode, __I_NEW);
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(unlock_new_inode);
@@ -1147,8 +1180,13 @@ void discard_new_inode(struct inode *inode)
spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_NEW));
inode->i_state &= ~I_NEW;
+ /*
+ * Pairs with the barrier in prepare_to_wait_event() to make sure
+ * ___wait_var_event() either sees the bit cleared or
+ * waitqueue_active() check in wake_up_var() sees the waiter.
+ */
smp_mb();
- wake_up_bit(&inode->i_state, __I_NEW);
+ inode_wake_up_bit(inode, __I_NEW);
spin_unlock(&inode->i_lock);
iput(inode);
}
@@ -2337,8 +2375,8 @@ EXPORT_SYMBOL(inode_needs_sync);
*/
static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked)
{
- wait_queue_head_t *wq;
- DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
+ struct wait_bit_queue_entry wqe;
+ struct wait_queue_head *wq_head;
/*
* Handle racing against evict(), see that routine for more details.
@@ -2349,14 +2387,14 @@ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_lock
return;
}
- wq = bit_waitqueue(&inode->i_state, __I_NEW);
- prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
+ wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW);
+ prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
spin_unlock(&inode->i_lock);
rcu_read_unlock();
if (is_inode_hash_locked)
spin_unlock(&inode_hash_lock);
schedule();
- finish_wait(wq, &wait.wq_entry);
+ finish_wait(wq_head, &wqe.wq_entry);
if (is_inode_hash_locked)
spin_lock(&inode_hash_lock);
rcu_read_lock();