/* * Copyright (C) 2008 Oracle. All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License v2 as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 021110-1307, USA. */ #include #include #include #include #include #include "ctree.h" #include "extent_io.h" #include "locking.h" static inline void spin_nested(struct extent_buffer *eb) { spin_lock(&eb->lock); } /* * Setting a lock to blocking will drop the spinlock and set the * flag that forces other procs who want the lock to wait. After * this you can safely schedule with the lock held. */ void btrfs_set_lock_blocking(struct extent_buffer *eb) { if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); spin_unlock(&eb->lock); } /* exit with the spin lock released and the bit set */ } /* * clearing the blocking flag will take the spinlock again. * After this you can't safely schedule */ void btrfs_clear_lock_blocking(struct extent_buffer *eb) { if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { spin_nested(eb); clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); smp_mb__after_clear_bit(); } /* exit with the spin lock held */ } /* * unfortunately, many of the places that currently set a lock to blocking * don't end up blocking for very long, and often they don't block * at all. For a dbench 50 run, if we don't spin on the blocking bit * at all, the context switch rate can jump up to 400,000/sec or more. * * So, we're still stuck with this crummy spin on the blocking bit, * at least until the most common causes of the short blocks * can be dealt with. */ static int btrfs_spin_on_block(struct extent_buffer *eb) { int i; for (i = 0; i < 512; i++) { if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) return 1; if (need_resched()) break; cpu_relax(); } return 0; } /* * This is somewhat different from trylock. It will take the * spinlock but if it finds the lock is set to blocking, it will * return without the lock held. * * returns 1 if it was able to take the lock and zero otherwise * * After this call, scheduling is not safe without first calling * btrfs_set_lock_blocking() */ int btrfs_try_spin_lock(struct extent_buffer *eb) { int i; if (btrfs_spin_on_block(eb)) { spin_nested(eb); if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) return 1; spin_unlock(&eb->lock); } /* spin for a bit on the BLOCKING flag */ for (i = 0; i < 2; i++) { cpu_relax(); if (!btrfs_spin_on_block(eb)) break; spin_nested(eb); if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) return 1; spin_unlock(&eb->lock); } return 0; } /* * the autoremove wake function will return 0 if it tried to wake up * a process that was already awake, which means that process won't * count as an exclusive wakeup. The waitq code will continue waking * procs until it finds one that was actually sleeping. * * For btrfs, this isn't quite what we want. We want a single proc * to be notified that the lock is ready for taking. If that proc * already happen to be awake, great, it will loop around and try for * the lock. * * So, btrfs_wake_function always returns 1, even when the proc that we * tried to wake up was already awake. */ static int btrfs_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) { autoremove_wake_function(wait, mode, sync, key); return 1; } /* * returns with the extent buffer spinlocked. * * This will spin and/or wait as required to take the lock, and then * return with the spinlock held. * * After this call, scheduling is not safe without first calling * btrfs_set_lock_blocking() */ int btrfs_tree_lock(struct extent_buffer *eb) { DEFINE_WAIT(wait); wait.func = btrfs_wake_function; if (!btrfs_spin_on_block(eb)) goto sleep; while(1) { spin_nested(eb); /* nobody is blocking, exit with the spinlock held */ if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) return 0; /* * we have the spinlock, but the real owner is blocking. * wait for them */ spin_unlock(&eb->lock); /* * spin for a bit, and if the blocking flag goes away, * loop around */ cpu_relax(); if (btrfs_spin_on_block(eb)) continue; sleep: prepare_to_wait_exclusive(&eb->lock_wq, &wait, TASK_UNINTERRUPTIBLE); if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) schedule(); finish_wait(&eb->lock_wq, &wait); } return 0; } /* * Very quick trylock, this does not spin or schedule. It returns * 1 with the spinlock held if it was able to take the lock, or it * returns zero if it was unable to take the lock. * * After this call, scheduling is not safe without first calling * btrfs_set_lock_blocking() */ int btrfs_try_tree_lock(struct extent_buffer *eb) { if (spin_trylock(&eb->lock)) { if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { /* * we've got the spinlock, but the real owner is * blocking. Drop the spinlock and return failure */ spin_unlock(&eb->lock); return 0; } return 1; } /* someone else has the spinlock giveup */ return 0; } int btrfs_tree_unlock(struct extent_buffer *eb) { /* * if we were a blocking owner, we don't have the spinlock held * just clear the bit and look for waiters */ if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) smp_mb__after_clear_bit(); else spin_unlock(&eb->lock); if (waitqueue_active(&eb->lock_wq)) wake_up(&eb->lock_wq); return 0; } void btrfs_assert_tree_locked(struct extent_buffer *eb) { if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) assert_spin_locked(&eb->lock); }