From 1252c434e39dc60ca9e8ed682f3e04930e2c08de Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 30 Oct 2007 12:09:03 -0700 Subject: ocfs2: Documentation update Remove 'readpages' from the list in ocfs2.txt. Instead of having two identical lists, I just removed the list in the OCFS2 section of fs/Kconfig and added a pointer to Documentation/filesystems/ocfs2.txt. Signed-off-by: Mark Fasheh --- Documentation/filesystems/ocfs2.txt | 1 - 1 file changed, 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index ed55238023a9..e78abdcc59ee 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt @@ -35,7 +35,6 @@ Features which OCFS2 does not support yet: - Directory change notification (F_NOTIFY) - Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease) - POSIX ACLs - - readpages / writepages (not user visible) Mount options ============= -- cgit v1.2.3-59-g8ed1b From 7f68fc28219be3b44ef4132f95c6506ff3e806b5 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Tue, 18 Dec 2007 15:46:37 +0800 Subject: ocfs2: Reserve ioctl range We need to reserve at least two ioctls for online-resize. Reserve a small range of ioctls for Ocfs2 use in Documentation/ioctl-number.txt. This should give us enough room for future growth. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- Documentation/ioctl-number.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/ioctl-number.txt b/Documentation/ioctl-number.txt index 5c7fbf9d96b4..c18363bd8d11 100644 --- a/Documentation/ioctl-number.txt +++ b/Documentation/ioctl-number.txt @@ -138,6 +138,7 @@ Code Seq# Include File Comments 'm' 00-1F net/irda/irmod.h conflict! 'n' 00-7F linux/ncp_fs.h 'n' E0-FF video/matrox.h matroxfb +'o' 00-1F fs/ocfs2/ocfs2_fs.h OCFS2 'p' 00-0F linux/phantom.h conflict! (OpenHaptics needs this) 'p' 00-3F linux/mc146818rtc.h conflict! 'p' 40-7F linux/nvram.h -- cgit v1.2.3-59-g8ed1b From d147b3d630edef1d34de6ea819787a1ac1b8603b Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Wed, 7 Nov 2007 14:40:36 -0800 Subject: ocfs2: Support commit= mount option Mostly taken from ext3. This allows the user to set the jbd commit interval, in seconds. The default of 5 seconds stays the same, but now users can easily increase the commit interval. Typically, this would be increased in order to benefit performance at the expense of data-safety. Signed-off-by: Mark Fasheh --- Documentation/filesystems/ocfs2.txt | 11 +++++++++++ fs/ocfs2/journal.c | 8 ++++++-- fs/ocfs2/ocfs2.h | 1 + fs/ocfs2/super.c | 23 +++++++++++++++++++++++ 4 files changed, 41 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index e78abdcc59ee..b63bd2d7fcd3 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt @@ -61,3 +61,14 @@ data=writeback Data ordering is not preserved, data may be written preferred_slot=0(*) During mount, try to use this filesystem slot first. If it is in use by another node, the first empty one found will be chosen. Invalid values will be ignored. +commit=nrsec (*) Ocfs2 can be told to sync all its data and metadata + every 'nrsec' seconds. The default value is 5 seconds. + This means that if you lose your power, you will lose + as much as the latest 5 seconds of work (your + filesystem will not be damaged though, thanks to the + journaling). This default value (or any low value) + will hurt performance, but it's good for data-safety. + Setting it to 0 will have the same effect as leaving + it at the default (5 seconds). + Setting it to very large values will improve + performance. diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 4f440a88bf53..8b9ce2a729ab 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -313,14 +313,18 @@ int ocfs2_journal_dirty_data(handle_t *handle, return err; } -#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * 5) +#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD_DEFAULT_MAX_COMMIT_AGE) void ocfs2_set_journal_params(struct ocfs2_super *osb) { journal_t *journal = osb->journal->j_journal; + unsigned long commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL; + + if (osb->osb_commit_interval) + commit_interval = osb->osb_commit_interval; spin_lock(&journal->j_state_lock); - journal->j_commit_interval = OCFS2_DEFAULT_COMMIT_INTERVAL; + journal->j_commit_interval = commit_interval; if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) journal->j_flags |= JFS_BARRIER; else diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index f8f866144c6a..82802f5672a1 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -229,6 +229,7 @@ struct ocfs2_super wait_queue_head_t checkpoint_event; atomic_t needs_checkpoint; struct ocfs2_journal *journal; + unsigned long osb_commit_interval; enum ocfs2_local_alloc_state local_alloc_state; struct buffer_head *local_alloc_bh; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 479ac50c86d9..8044ed97d362 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -83,6 +83,7 @@ MODULE_LICENSE("GPL"); struct mount_options { + unsigned long commit_interval; unsigned long mount_opt; unsigned int atime_quantum; signed short slot; @@ -149,6 +150,7 @@ enum { Opt_data_writeback, Opt_atime_quantum, Opt_slot, + Opt_commit, Opt_err, }; @@ -164,6 +166,7 @@ static match_table_t tokens = { {Opt_data_writeback, "data=writeback"}, {Opt_atime_quantum, "atime_quantum=%u"}, {Opt_slot, "preferred_slot=%u"}, + {Opt_commit, "commit=%u"}, {Opt_err, NULL} }; @@ -442,6 +445,8 @@ unlock_osb: osb->s_mount_opt = parsed_options.mount_opt; osb->s_atime_quantum = parsed_options.atime_quantum; osb->preferred_slot = parsed_options.slot; + if (parsed_options.commit_interval) + osb->osb_commit_interval = parsed_options.commit_interval; if (!ocfs2_is_hard_readonly(osb)) ocfs2_set_journal_params(osb); @@ -596,6 +601,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) osb->s_mount_opt = parsed_options.mount_opt; osb->s_atime_quantum = parsed_options.atime_quantum; osb->preferred_slot = parsed_options.slot; + osb->osb_commit_interval = parsed_options.commit_interval; sb->s_magic = OCFS2_SUPER_MAGIC; @@ -746,6 +752,7 @@ static int ocfs2_parse_options(struct super_block *sb, mlog_entry("remount: %d, options: \"%s\"\n", is_remount, options ? options : "(none)"); + mopt->commit_interval = 0; mopt->mount_opt = 0; mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; mopt->slot = OCFS2_INVALID_SLOT; @@ -815,6 +822,18 @@ static int ocfs2_parse_options(struct super_block *sb, if (option) mopt->slot = (s16)option; break; + case Opt_commit: + option = 0; + if (match_int(&args[0], &option)) { + status = 0; + goto bail; + } + if (option < 0) + return 0; + if (option == 0) + option = JBD_DEFAULT_MAX_COMMIT_AGE; + mopt->commit_interval = HZ * option; + break; default: mlog(ML_ERROR, "Unrecognized mount option \"%s\" " @@ -863,6 +882,10 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) if (osb->s_atime_quantum != OCFS2_DEFAULT_ATIME_QUANTUM) seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum); + if (osb->osb_commit_interval) + seq_printf(s, ",commit=%u", + (unsigned) (osb->osb_commit_interval / HZ)); + return 0; } -- cgit v1.2.3-59-g8ed1b From 2fbe8d1ebe004425b4f7b8bba345623d2280be82 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Thu, 20 Dec 2007 14:58:11 -0800 Subject: ocfs2: Local alloc window size changeable via mount option Local alloc is a performance optimization in ocfs2 in which a node takes a window of bits from the global bitmap and then uses that for all small local allocations. This window size is fixed to 8MB currently. This patch allows users to specify the window size in MB including disabling it by passing in 0. If the number specified is too large, the fs will use the default value of 8MB. mount -o localalloc=X /dev/sdX /mntpoint Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh --- Documentation/filesystems/ocfs2.txt | 3 +++ fs/ocfs2/localalloc.c | 42 ++++++++++++++++++++++++++----------- fs/ocfs2/ocfs2.h | 1 + fs/ocfs2/ocfs2_fs.h | 8 +++++++ fs/ocfs2/suballoc.c | 5 +++-- fs/ocfs2/super.c | 17 +++++++++++++++ 6 files changed, 62 insertions(+), 14 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index b63bd2d7fcd3..071fad137eb5 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt @@ -72,3 +72,6 @@ commit=nrsec (*) Ocfs2 can be told to sync all its data and metadata it at the default (5 seconds). Setting it to very large values will improve performance. +localalloc=8(*) Allows custom localalloc size in MB. If the value is too + large, the fs will silently revert it to the default. + Localalloc is not enabled for local mounts. diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 0de0792fce7f..add1ffdc5c6c 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -75,18 +75,12 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, struct inode *local_alloc_inode); -/* - * Determine how large our local alloc window should be, in bits. - * - * These values (and the behavior in ocfs2_alloc_should_use_local) have - * been chosen so that most allocations, including new block groups go - * through local alloc. - */ static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb) { - BUG_ON(osb->s_clustersize_bits < 12); + BUG_ON(osb->s_clustersize_bits > 20); - return 2048 >> (osb->s_clustersize_bits - 12); + /* Size local alloc windows by the megabyte */ + return osb->local_alloc_size << (20 - osb->s_clustersize_bits); } /* @@ -96,18 +90,23 @@ static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb) int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) { int la_bits = ocfs2_local_alloc_window_bits(osb); + int ret = 0; if (osb->local_alloc_state != OCFS2_LA_ENABLED) - return 0; + goto bail; /* la_bits should be at least twice the size (in clusters) of * a new block group. We want to be sure block group * allocations go through the local alloc, so allow an * allocation to take up to half the bitmap. */ if (bits > (la_bits / 2)) - return 0; + goto bail; - return 1; + ret = 1; +bail: + mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n", + osb->local_alloc_state, (unsigned long long)bits, la_bits, ret); + return ret; } int ocfs2_load_local_alloc(struct ocfs2_super *osb) @@ -121,6 +120,19 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) mlog_entry_void(); + if (ocfs2_mount_local(osb)) + goto bail; + + if (osb->local_alloc_size == 0) + goto bail; + + if (ocfs2_local_alloc_window_bits(osb) >= osb->bitmap_cpg) { + mlog(ML_NOTICE, "Requested local alloc window %d is larger " + "than max possible %u. Using defaults.\n", + ocfs2_local_alloc_window_bits(osb), (osb->bitmap_cpg - 1)); + osb->local_alloc_size = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; + } + /* read the alloc off disk */ inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, osb->slot_num); @@ -181,6 +193,9 @@ bail: if (inode) iput(inode); + mlog(0, "Local alloc window bits = %d\n", + ocfs2_local_alloc_window_bits(osb)); + mlog_exit(status); return status; } @@ -521,6 +536,9 @@ bail: iput(local_alloc_inode); } + mlog(0, "bits=%d, slot=%d, ret=%d\n", bits_wanted, osb->slot_num, + status); + mlog_exit(status); return status; } diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 82802f5672a1..d12bd7036da7 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -231,6 +231,7 @@ struct ocfs2_super struct ocfs2_journal *journal; unsigned long osb_commit_interval; + int local_alloc_size; enum ocfs2_local_alloc_state local_alloc_state; struct buffer_head *local_alloc_bh; u64 la_last_gd; diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 425551737f1f..3633edd3982f 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -270,6 +270,14 @@ struct ocfs2_new_group_input { /* Journal limits (in bytes) */ #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) +/* + * Default local alloc size (in megabytes) + * + * The value chosen should be such that most allocations, including new + * block groups, use local alloc. + */ +#define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE 8 + struct ocfs2_system_inode_info { char *si_name; int si_iflags; diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 4391744e80f8..7e397e2c25dd 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1516,8 +1516,9 @@ int __ocfs2_claim_clusters(struct ocfs2_super *osb, if (min_clusters > (osb->bitmap_cpg - 1)) { /* The only paths asking for contiguousness * should know about this already. */ - mlog(ML_ERROR, "minimum allocation requested exceeds " - "group bitmap size!"); + mlog(ML_ERROR, "minimum allocation requested %u exceeds " + "group bitmap size %u!\n", min_clusters, + osb->bitmap_cpg); status = -ENOSPC; goto bail; } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 8044ed97d362..1104f14c3183 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -87,6 +87,7 @@ struct mount_options unsigned long mount_opt; unsigned int atime_quantum; signed short slot; + unsigned int localalloc_opt; }; static int ocfs2_parse_options(struct super_block *sb, char *options, @@ -151,6 +152,7 @@ enum { Opt_atime_quantum, Opt_slot, Opt_commit, + Opt_localalloc, Opt_err, }; @@ -167,6 +169,7 @@ static match_table_t tokens = { {Opt_atime_quantum, "atime_quantum=%u"}, {Opt_slot, "preferred_slot=%u"}, {Opt_commit, "commit=%u"}, + {Opt_localalloc, "localalloc=%d"}, {Opt_err, NULL} }; @@ -602,6 +605,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) osb->s_atime_quantum = parsed_options.atime_quantum; osb->preferred_slot = parsed_options.slot; osb->osb_commit_interval = parsed_options.commit_interval; + osb->local_alloc_size = parsed_options.localalloc_opt; sb->s_magic = OCFS2_SUPER_MAGIC; @@ -756,6 +760,7 @@ static int ocfs2_parse_options(struct super_block *sb, mopt->mount_opt = 0; mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; mopt->slot = OCFS2_INVALID_SLOT; + mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; if (!options) { status = 1; @@ -834,6 +839,15 @@ static int ocfs2_parse_options(struct super_block *sb, option = JBD_DEFAULT_MAX_COMMIT_AGE; mopt->commit_interval = HZ * option; break; + case Opt_localalloc: + option = 0; + if (match_int(&args[0], &option)) { + status = 0; + goto bail; + } + if (option >= 0 && (option <= ocfs2_local_alloc_size(sb) * 8)) + mopt->localalloc_opt = option; + break; default: mlog(ML_ERROR, "Unrecognized mount option \"%s\" " @@ -886,6 +900,9 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) seq_printf(s, ",commit=%u", (unsigned) (osb->osb_commit_interval / HZ)); + if (osb->local_alloc_size != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE) + seq_printf(s, ",localalloc=%d", osb->local_alloc_size); + return 0; } -- cgit v1.2.3-59-g8ed1b From 53fc622b9e829c8e632e45ef8c14f054388759c1 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Thu, 20 Dec 2007 16:49:04 -0800 Subject: [PATCH 2/2] ocfs2: cluster aware flock() Hook up ocfs2_flock(), using the new flock lock type in dlmglue.c. A new mount option, "localflocks" is added so that users can revert to old functionality as need be. Signed-off-by: Mark Fasheh --- Documentation/filesystems/ocfs2.txt | 1 + fs/ocfs2/Makefile | 1 + fs/ocfs2/file.c | 60 ++++++++++++++++- fs/ocfs2/locks.c | 125 ++++++++++++++++++++++++++++++++++++ fs/ocfs2/locks.h | 31 +++++++++ fs/ocfs2/ocfs2.h | 1 + fs/ocfs2/super.c | 19 ++++++ 7 files changed, 237 insertions(+), 1 deletion(-) create mode 100644 fs/ocfs2/locks.c create mode 100644 fs/ocfs2/locks.h (limited to 'Documentation') diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index 071fad137eb5..c318a8bbb1ef 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt @@ -75,3 +75,4 @@ commit=nrsec (*) Ocfs2 can be told to sync all its data and metadata localalloc=8(*) Allows custom localalloc size in MB. If the value is too large, the fs will silently revert it to the default. Localalloc is not enabled for local mounts. +localflocks This disables cluster aware flock. diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index 3591890b32c6..4d4ce48bb42c 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile @@ -19,6 +19,7 @@ ocfs2-objs := \ ioctl.o \ journal.o \ localalloc.o \ + locks.o \ mmap.o \ namei.o \ resize.o \ diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 432e5f3c4784..caefd571782e 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -51,6 +51,7 @@ #include "inode.h" #include "ioctl.h" #include "journal.h" +#include "locks.h" #include "mmap.h" #include "suballoc.h" #include "super.h" @@ -63,6 +64,35 @@ static int ocfs2_sync_inode(struct inode *inode) return sync_mapping_buffers(inode->i_mapping); } +static int ocfs2_init_file_private(struct inode *inode, struct file *file) +{ + struct ocfs2_file_private *fp; + + fp = kzalloc(sizeof(struct ocfs2_file_private), GFP_KERNEL); + if (!fp) + return -ENOMEM; + + fp->fp_file = file; + mutex_init(&fp->fp_mutex); + ocfs2_file_lock_res_init(&fp->fp_flock, fp); + file->private_data = fp; + + return 0; +} + +static void ocfs2_free_file_private(struct inode *inode, struct file *file) +{ + struct ocfs2_file_private *fp = file->private_data; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + + if (fp) { + ocfs2_simple_drop_lockres(osb, &fp->fp_flock); + ocfs2_lock_res_free(&fp->fp_flock); + kfree(fp); + file->private_data = NULL; + } +} + static int ocfs2_file_open(struct inode *inode, struct file *file) { int status; @@ -89,7 +119,18 @@ static int ocfs2_file_open(struct inode *inode, struct file *file) oi->ip_open_count++; spin_unlock(&oi->ip_lock); - status = 0; + + status = ocfs2_init_file_private(inode, file); + if (status) { + /* + * We want to set open count back if we're failing the + * open. + */ + spin_lock(&oi->ip_lock); + oi->ip_open_count--; + spin_unlock(&oi->ip_lock); + } + leave: mlog_exit(status); return status; @@ -108,11 +149,24 @@ static int ocfs2_file_release(struct inode *inode, struct file *file) oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT; spin_unlock(&oi->ip_lock); + ocfs2_free_file_private(inode, file); + mlog_exit(0); return 0; } +static int ocfs2_dir_open(struct inode *inode, struct file *file) +{ + return ocfs2_init_file_private(inode, file); +} + +static int ocfs2_dir_release(struct inode *inode, struct file *file) +{ + ocfs2_free_file_private(inode, file); + return 0; +} + static int ocfs2_sync_file(struct file *file, struct dentry *dentry, int datasync) @@ -2191,6 +2245,7 @@ const struct file_operations ocfs2_fops = { #ifdef CONFIG_COMPAT .compat_ioctl = ocfs2_compat_ioctl, #endif + .flock = ocfs2_flock, .splice_read = ocfs2_file_splice_read, .splice_write = ocfs2_file_splice_write, }; @@ -2199,8 +2254,11 @@ const struct file_operations ocfs2_dops = { .read = generic_read_dir, .readdir = ocfs2_readdir, .fsync = ocfs2_sync_file, + .release = ocfs2_dir_release, + .open = ocfs2_dir_open, .ioctl = ocfs2_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ocfs2_compat_ioctl, #endif + .flock = ocfs2_flock, }; diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c new file mode 100644 index 000000000000..203f87143877 --- /dev/null +++ b/fs/ocfs2/locks.c @@ -0,0 +1,125 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * locks.c + * + * Userspace file locking support + * + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include + +#define MLOG_MASK_PREFIX ML_INODE +#include + +#include "ocfs2.h" + +#include "dlmglue.h" +#include "file.h" +#include "locks.h" + +static int ocfs2_do_flock(struct file *file, struct inode *inode, + int cmd, struct file_lock *fl) +{ + int ret = 0, level = 0, trylock = 0; + struct ocfs2_file_private *fp = file->private_data; + struct ocfs2_lock_res *lockres = &fp->fp_flock; + + if (fl->fl_type == F_WRLCK) + level = 1; + if (!IS_SETLKW(cmd)) + trylock = 1; + + mutex_lock(&fp->fp_mutex); + + if (lockres->l_flags & OCFS2_LOCK_ATTACHED && + lockres->l_level > LKM_NLMODE) { + int old_level = 0; + + if (lockres->l_level == LKM_EXMODE) + old_level = 1; + + if (level == old_level) + goto out; + + /* + * Converting an existing lock is not guaranteed to be + * atomic, so we can get away with simply unlocking + * here and allowing the lock code to try at the new + * level. + */ + + flock_lock_file_wait(file, + &(struct file_lock){.fl_type = F_UNLCK}); + + ocfs2_file_unlock(file); + } + + ret = ocfs2_file_lock(file, level, trylock); + if (ret) { + if (ret == -EAGAIN && trylock) + ret = -EWOULDBLOCK; + else + mlog_errno(ret); + goto out; + } + + ret = flock_lock_file_wait(file, fl); + +out: + mutex_unlock(&fp->fp_mutex); + + return ret; +} + +static int ocfs2_do_funlock(struct file *file, int cmd, struct file_lock *fl) +{ + int ret; + struct ocfs2_file_private *fp = file->private_data; + + mutex_lock(&fp->fp_mutex); + ocfs2_file_unlock(file); + ret = flock_lock_file_wait(file, fl); + mutex_unlock(&fp->fp_mutex); + + return ret; +} + +/* + * Overall flow of ocfs2_flock() was influenced by gfs2_flock(). + */ +int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl) +{ + struct inode *inode = file->f_mapping->host; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + + if (!(fl->fl_flags & FL_FLOCK)) + return -ENOLCK; + if (__mandatory_lock(inode)) + return -ENOLCK; + + if ((osb->s_mount_opt & OCFS2_MOUNT_LOCALFLOCKS) || + ocfs2_mount_local(osb)) + return flock_lock_file_wait(file, fl); + + if (fl->fl_type == F_UNLCK) + return ocfs2_do_funlock(file, cmd, fl); + else + return ocfs2_do_flock(file, inode, cmd, fl); +} diff --git a/fs/ocfs2/locks.h b/fs/ocfs2/locks.h new file mode 100644 index 000000000000..9743ef2324ec --- /dev/null +++ b/fs/ocfs2/locks.h @@ -0,0 +1,31 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * locks.h + * + * Function prototypes for Userspace file locking support + * + * Copyright (C) 2002, 2004 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef OCFS2_LOCKS_H +#define OCFS2_LOCKS_H + +int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl); + +#endif /* OCFS2_LOCKS_H */ diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 63c131e1cc77..22e334d125d0 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -171,6 +171,7 @@ enum ocfs2_mount_options OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ + OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ }; #define OCFS2_OSB_SOFT_RO 0x0001 diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 1104f14c3183..4a091f586646 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -153,6 +153,7 @@ enum { Opt_slot, Opt_commit, Opt_localalloc, + Opt_localflocks, Opt_err, }; @@ -170,6 +171,7 @@ static match_table_t tokens = { {Opt_slot, "preferred_slot=%u"}, {Opt_commit, "commit=%u"}, {Opt_localalloc, "localalloc=%d"}, + {Opt_localflocks, "localflocks"}, {Opt_err, NULL} }; @@ -848,6 +850,20 @@ static int ocfs2_parse_options(struct super_block *sb, if (option >= 0 && (option <= ocfs2_local_alloc_size(sb) * 8)) mopt->localalloc_opt = option; break; + case Opt_localflocks: + /* + * Changing this during remount could race + * flock() requests, or "unbalance" existing + * ones (e.g., a lock is taken in one mode but + * dropped in the other). If users care enough + * to flip locking modes during remount, we + * could add a "local" flag to individual + * flock structures for proper tracking of + * state. + */ + if (!is_remount) + mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS; + break; default: mlog(ML_ERROR, "Unrecognized mount option \"%s\" " @@ -903,6 +919,9 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) if (osb->local_alloc_size != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE) seq_printf(s, ",localalloc=%d", osb->local_alloc_size); + if (opts & OCFS2_MOUNT_LOCALFLOCKS) + seq_printf(s, ",localflocks,"); + return 0; } -- cgit v1.2.3-59-g8ed1b