aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/exec.c3
-rw-r--r--fs/ext3/namei.c73
-rw-r--r--fs/ext4/namei.c73
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/ocfs2/aops.c33
-rw-r--r--fs/ocfs2/file.c4
-rw-r--r--fs/ocfs2/localalloc.c4
-rw-r--r--fs/ocfs2/localalloc.h2
-rw-r--r--fs/ocfs2/suballoc.c29
-rw-r--r--fs/ocfs2/suballoc.h11
-rw-r--r--fs/ocfs2/vote.c4
-rw-r--r--fs/signalfd.c190
12 files changed, 217 insertions, 211 deletions
diff --git a/fs/exec.c b/fs/exec.c
index c21a8cc06277..073b0b8c6d05 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -50,7 +50,6 @@
#include <linux/tsacct_kern.h>
#include <linux/cn_proc.h>
#include <linux/audit.h>
-#include <linux/signalfd.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -784,7 +783,6 @@ static int de_thread(struct task_struct *tsk)
* and we can just re-use it all.
*/
if (atomic_read(&oldsighand->count) <= 1) {
- signalfd_detach(tsk);
exit_itimers(sig);
return 0;
}
@@ -923,7 +921,6 @@ static int de_thread(struct task_struct *tsk)
sig->flags = 0;
no_thread_group:
- signalfd_detach(tsk);
exit_itimers(sig);
if (leader)
release_task(leader);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 1586807b8177..c1fa1908dba0 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -140,7 +140,8 @@ struct dx_frame
struct dx_map_entry
{
u32 hash;
- u32 offs;
+ u16 offs;
+ u16 size;
};
#ifdef CONFIG_EXT3_INDEX
@@ -379,13 +380,28 @@ dx_probe(struct dentry *dentry, struct inode *dir,
entries = (struct dx_entry *) (((char *)&root->info) +
root->info.info_length);
- assert(dx_get_limit(entries) == dx_root_limit(dir,
- root->info.info_length));
+
+ if (dx_get_limit(entries) != dx_root_limit(dir,
+ root->info.info_length)) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: limit != root limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail;
+ }
+
dxtrace (printk("Look up %x", hash));
while (1)
{
count = dx_get_count(entries);
- assert (count && count <= dx_get_limit(entries));
+ if (!count || count > dx_get_limit(entries)) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: no count or count > limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
+
p = entries + 1;
q = entries + count - 1;
while (p <= q)
@@ -423,8 +439,15 @@ dx_probe(struct dentry *dentry, struct inode *dir,
if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
goto fail2;
at = entries = ((struct dx_node *) bh->b_data)->entries;
- assert (dx_get_limit(entries) == dx_node_limit (dir));
+ if (dx_get_limit(entries) != dx_node_limit (dir)) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: limit != node limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
frame++;
+ frame->bh = NULL;
}
fail2:
while (frame >= frame_in) {
@@ -432,6 +455,10 @@ fail2:
frame--;
}
fail:
+ if (*err == ERR_BAD_DX_DIR)
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "Corrupt dir inode %ld, running e2fsck is "
+ "recommended.", dir->i_ino);
return NULL;
}
@@ -671,6 +698,10 @@ errout:
* Directory block splitting, compacting
*/
+/*
+ * Create map of hash values, offsets, and sizes, stored at end of block.
+ * Returns number of entries mapped.
+ */
static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
{
@@ -684,7 +715,8 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
ext3fs_dirhash(de->name, de->name_len, &h);
map_tail--;
map_tail->hash = h.hash;
- map_tail->offs = (u32) ((char *) de - base);
+ map_tail->offs = (u16) ((char *) de - base);
+ map_tail->size = le16_to_cpu(de->rec_len);
count++;
cond_resched();
}
@@ -694,6 +726,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
return count;
}
+/* Sort map by hash value */
static void dx_sort_map (struct dx_map_entry *map, unsigned count)
{
struct dx_map_entry *p, *q, *top = map + count - 1;
@@ -1091,6 +1124,10 @@ static inline void ext3_set_de_type(struct super_block *sb,
}
#ifdef CONFIG_EXT3_INDEX
+/*
+ * Move count entries from end of map between two memory locations.
+ * Returns pointer to last entry moved.
+ */
static struct ext3_dir_entry_2 *
dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
{
@@ -1109,6 +1146,10 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
return (struct ext3_dir_entry_2 *) (to - rec_len);
}
+/*
+ * Compact each dir entry in the range to the minimal rec_len.
+ * Returns pointer to last entry in range.
+ */
static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
{
struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
@@ -1131,6 +1172,11 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
return prev;
}
+/*
+ * Split a full leaf block to make room for a new dir entry.
+ * Allocate a new block, and move entries so that they are approx. equally full.
+ * Returns pointer to de in block into which the new entry will be inserted.
+ */
static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
struct buffer_head **bh,struct dx_frame *frame,
struct dx_hash_info *hinfo, int *error)
@@ -1142,7 +1188,7 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
u32 hash2;
struct dx_map_entry *map;
char *data1 = (*bh)->b_data, *data2;
- unsigned split;
+ unsigned split, move, size, i;
struct ext3_dir_entry_2 *de = NULL, *de2;
int err = 0;
@@ -1170,8 +1216,19 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
blocksize, hinfo, map);
map -= count;
- split = count/2; // need to adjust to actual middle
dx_sort_map (map, count);
+ /* Split the existing block in the middle, size-wise */
+ size = 0;
+ move = 0;
+ for (i = count-1; i >= 0; i--) {
+ /* is more than half of this entry in 2nd half of the block? */
+ if (size + map[i].size/2 > blocksize/2)
+ break;
+ size += map[i].size;
+ move++;
+ }
+ /* map index at which we will split */
+ split = count - move;
hash2 = map[split].hash;
continued = hash2 == map[split - 1].hash;
dxtrace(printk("Split block %i at %x, %i/%i\n",
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index da224974af78..5fdb862e71c4 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -140,7 +140,8 @@ struct dx_frame
struct dx_map_entry
{
u32 hash;
- u32 offs;
+ u16 offs;
+ u16 size;
};
#ifdef CONFIG_EXT4_INDEX
@@ -379,13 +380,28 @@ dx_probe(struct dentry *dentry, struct inode *dir,
entries = (struct dx_entry *) (((char *)&root->info) +
root->info.info_length);
- assert(dx_get_limit(entries) == dx_root_limit(dir,
- root->info.info_length));
+
+ if (dx_get_limit(entries) != dx_root_limit(dir,
+ root->info.info_length)) {
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: limit != root limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail;
+ }
+
dxtrace (printk("Look up %x", hash));
while (1)
{
count = dx_get_count(entries);
- assert (count && count <= dx_get_limit(entries));
+ if (!count || count > dx_get_limit(entries)) {
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: no count or count > limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
+
p = entries + 1;
q = entries + count - 1;
while (p <= q)
@@ -423,8 +439,15 @@ dx_probe(struct dentry *dentry, struct inode *dir,
if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err)))
goto fail2;
at = entries = ((struct dx_node *) bh->b_data)->entries;
- assert (dx_get_limit(entries) == dx_node_limit (dir));
+ if (dx_get_limit(entries) != dx_node_limit (dir)) {
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: limit != node limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
frame++;
+ frame->bh = NULL;
}
fail2:
while (frame >= frame_in) {
@@ -432,6 +455,10 @@ fail2:
frame--;
}
fail:
+ if (*err == ERR_BAD_DX_DIR)
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "Corrupt dir inode %ld, running e2fsck is "
+ "recommended.", dir->i_ino);
return NULL;
}
@@ -671,6 +698,10 @@ errout:
* Directory block splitting, compacting
*/
+/*
+ * Create map of hash values, offsets, and sizes, stored at end of block.
+ * Returns number of entries mapped.
+ */
static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
{
@@ -684,7 +715,8 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
ext4fs_dirhash(de->name, de->name_len, &h);
map_tail--;
map_tail->hash = h.hash;
- map_tail->offs = (u32) ((char *) de - base);
+ map_tail->offs = (u16) ((char *) de - base);
+ map_tail->size = le16_to_cpu(de->rec_len);
count++;
cond_resched();
}
@@ -694,6 +726,7 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
return count;
}
+/* Sort map by hash value */
static void dx_sort_map (struct dx_map_entry *map, unsigned count)
{
struct dx_map_entry *p, *q, *top = map + count - 1;
@@ -1089,6 +1122,10 @@ static inline void ext4_set_de_type(struct super_block *sb,
}
#ifdef CONFIG_EXT4_INDEX
+/*
+ * Move count entries from end of map between two memory locations.
+ * Returns pointer to last entry moved.
+ */
static struct ext4_dir_entry_2 *
dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
{
@@ -1107,6 +1144,10 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
return (struct ext4_dir_entry_2 *) (to - rec_len);
}
+/*
+ * Compact each dir entry in the range to the minimal rec_len.
+ * Returns pointer to last entry in range.
+ */
static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
{
struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
@@ -1129,6 +1170,11 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
return prev;
}
+/*
+ * Split a full leaf block to make room for a new dir entry.
+ * Allocate a new block, and move entries so that they are approx. equally full.
+ * Returns pointer to de in block into which the new entry will be inserted.
+ */
static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
struct buffer_head **bh,struct dx_frame *frame,
struct dx_hash_info *hinfo, int *error)
@@ -1140,7 +1186,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
u32 hash2;
struct dx_map_entry *map;
char *data1 = (*bh)->b_data, *data2;
- unsigned split;
+ unsigned split, move, size, i;
struct ext4_dir_entry_2 *de = NULL, *de2;
int err = 0;
@@ -1168,8 +1214,19 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
count = dx_make_map ((struct ext4_dir_entry_2 *) data1,
blocksize, hinfo, map);
map -= count;
- split = count/2; // need to adjust to actual middle
dx_sort_map (map, count);
+ /* Split the existing block in the middle, size-wise */
+ size = 0;
+ move = 0;
+ for (i = count-1; i >= 0; i--) {
+ /* is more than half of this entry in 2nd half of the block? */
+ if (size + map[i].size/2 > blocksize/2)
+ break;
+ size += map[i].size;
+ move++;
+ }
+ /* map index at which we will split */
+ split = count - move;
hash2 = map[split].hash;
continued = hash2 == map[split - 1].hash;
dxtrace(printk("Split block %i at %x, %i/%i\n",
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 8ed593766f16..b878528b64c1 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -345,8 +345,8 @@ void __exit unregister_nfs_fs(void)
unregister_shrinker(&acl_shrinker);
#ifdef CONFIG_NFS_V4
unregister_filesystem(&nfs4_fs_type);
- nfs_unregister_sysctl();
#endif
+ nfs_unregister_sysctl();
unregister_filesystem(&nfs_fs_type);
}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 50cd8a209012..f37f25c931f5 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -930,18 +930,11 @@ static void ocfs2_write_failure(struct inode *inode,
loff_t user_pos, unsigned user_len)
{
int i;
- unsigned from, to;
+ unsigned from = user_pos & (PAGE_CACHE_SIZE - 1),
+ to = user_pos + user_len;
struct page *tmppage;
- ocfs2_zero_new_buffers(wc->w_target_page, user_pos, user_len);
-
- if (wc->w_large_pages) {
- from = wc->w_target_from;
- to = wc->w_target_to;
- } else {
- from = 0;
- to = PAGE_CACHE_SIZE;
- }
+ ocfs2_zero_new_buffers(wc->w_target_page, from, to);
for(i = 0; i < wc->w_num_pages; i++) {
tmppage = wc->w_pages[i];
@@ -991,9 +984,6 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,
map_from = cluster_start;
map_to = cluster_end;
}
-
- wc->w_target_from = map_from;
- wc->w_target_to = map_to;
} else {
/*
* If we haven't allocated the new page yet, we
@@ -1211,18 +1201,33 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
loff_t pos, unsigned len)
{
int ret, i;
+ loff_t cluster_off;
+ unsigned int local_len = len;
struct ocfs2_write_cluster_desc *desc;
+ struct ocfs2_super *osb = OCFS2_SB(mapping->host->i_sb);
for (i = 0; i < wc->w_clen; i++) {
desc = &wc->w_desc[i];
+ /*
+ * We have to make sure that the total write passed in
+ * doesn't extend past a single cluster.
+ */
+ local_len = len;
+ cluster_off = pos & (osb->s_clustersize - 1);
+ if ((cluster_off + local_len) > osb->s_clustersize)
+ local_len = osb->s_clustersize - cluster_off;
+
ret = ocfs2_write_cluster(mapping, desc->c_phys,
desc->c_unwritten, data_ac, meta_ac,
- wc, desc->c_cpos, pos, len);
+ wc, desc->c_cpos, pos, local_len);
if (ret) {
mlog_errno(ret);
goto out;
}
+
+ len -= local_len;
+ pos += local_len;
}
ret = 0;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 7e34e66159c6..f3bc3658e7a5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -491,8 +491,8 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
goto leave;
}
- status = ocfs2_claim_clusters(osb, handle, data_ac, 1,
- &bit_off, &num_bits);
+ status = __ocfs2_claim_clusters(osb, handle, data_ac, 1,
+ clusters_to_add, &bit_off, &num_bits);
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 545f7892cdf3..de984d272576 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -524,13 +524,12 @@ bail:
int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
handle_t *handle,
struct ocfs2_alloc_context *ac,
- u32 min_bits,
+ u32 bits_wanted,
u32 *bit_off,
u32 *num_bits)
{
int status, start;
struct inode *local_alloc_inode;
- u32 bits_wanted;
void *bitmap;
struct ocfs2_dinode *alloc;
struct ocfs2_local_alloc *la;
@@ -538,7 +537,6 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
mlog_entry_void();
BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
- bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
local_alloc_inode = ac->ac_inode;
alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
la = OCFS2_LOCAL_ALLOC(alloc);
diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h
index 385a10152f9c..3f76631e110c 100644
--- a/fs/ocfs2/localalloc.h
+++ b/fs/ocfs2/localalloc.h
@@ -48,7 +48,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
handle_t *handle,
struct ocfs2_alloc_context *ac,
- u32 min_bits,
+ u32 bits_wanted,
u32 *bit_off,
u32 *num_bits);
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index d9c5c9fcb30f..8f09f5235e3a 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -1486,21 +1486,21 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
* contig. allocation, set to '1' to indicate we can deal with extents
* of any size.
*/
-int ocfs2_claim_clusters(struct ocfs2_super *osb,
- handle_t *handle,
- struct ocfs2_alloc_context *ac,
- u32 min_clusters,
- u32 *cluster_start,
- u32 *num_clusters)
+int __ocfs2_claim_clusters(struct ocfs2_super *osb,
+ handle_t *handle,
+ struct ocfs2_alloc_context *ac,
+ u32 min_clusters,
+ u32 max_clusters,
+ u32 *cluster_start,
+ u32 *num_clusters)
{
int status;
- unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
+ unsigned int bits_wanted = max_clusters;
u64 bg_blkno = 0;
u16 bg_bit_off;
mlog_entry_void();
- BUG_ON(!ac);
BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
@@ -1557,6 +1557,19 @@ bail:
return status;
}
+int ocfs2_claim_clusters(struct ocfs2_super *osb,
+ handle_t *handle,
+ struct ocfs2_alloc_context *ac,
+ u32 min_clusters,
+ u32 *cluster_start,
+ u32 *num_clusters)
+{
+ unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
+
+ return __ocfs2_claim_clusters(osb, handle, ac, min_clusters,
+ bits_wanted, cluster_start, num_clusters);
+}
+
static inline int ocfs2_block_group_clear_bits(handle_t *handle,
struct inode *alloc_inode,
struct ocfs2_group_desc *bg,
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index f212dc01a84b..cafe93703095 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -85,6 +85,17 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
u32 min_clusters,
u32 *cluster_start,
u32 *num_clusters);
+/*
+ * Use this variant of ocfs2_claim_clusters to specify a maxiumum
+ * number of clusters smaller than the allocation reserved.
+ */
+int __ocfs2_claim_clusters(struct ocfs2_super *osb,
+ handle_t *handle,
+ struct ocfs2_alloc_context *ac,
+ u32 min_clusters,
+ u32 max_clusters,
+ u32 *cluster_start,
+ u32 *num_clusters);
int ocfs2_free_suballoc_bits(handle_t *handle,
struct inode *alloc_inode,
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index 66a13ee63d4c..c05358538f2b 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -66,7 +66,7 @@ struct ocfs2_vote_msg
{
struct ocfs2_msg_hdr v_hdr;
__be32 v_reserved1;
-};
+} __attribute__ ((packed));
/* Responses are given these values to maintain backwards
* compatibility with older ocfs2 versions */
@@ -78,7 +78,7 @@ struct ocfs2_response_msg
{
struct ocfs2_msg_hdr r_hdr;
__be32 r_response;
-};
+} __attribute__ ((packed));
struct ocfs2_vote_work {
struct list_head w_list;
diff --git a/fs/signalfd.c b/fs/signalfd.c
index a8e293d30034..aefb0be07942 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -11,8 +11,10 @@
* Now using anonymous inode source.
* Thanks to Oleg Nesterov for useful code review and suggestions.
* More comments and suggestions from Arnd Bergmann.
- * Sat May 19, 2007: Davi E. M. Arnaut <davi@haxent.com.br>
+ * Sat May 19, 2007: Davi E. M. Arnaut <davi@haxent.com.br>
* Retrieve multiple signals with one read() call
+ * Sun Jul 15, 2007: Davide Libenzi <davidel@xmailserver.org>
+ * Attach to the sighand only during read() and poll().
*/
#include <linux/file.h>
@@ -27,102 +29,12 @@
#include <linux/signalfd.h>
struct signalfd_ctx {
- struct list_head lnk;
- wait_queue_head_t wqh;
sigset_t sigmask;
- struct task_struct *tsk;
};
-struct signalfd_lockctx {
- struct task_struct *tsk;
- unsigned long flags;
-};
-
-/*
- * Tries to acquire the sighand lock. We do not increment the sighand
- * use count, and we do not even pin the task struct, so we need to
- * do it inside an RCU read lock, and we must be prepared for the
- * ctx->tsk going to NULL (in signalfd_deliver()), and for the sighand
- * being detached. We return 0 if the sighand has been detached, or
- * 1 if we were able to pin the sighand lock.
- */
-static int signalfd_lock(struct signalfd_ctx *ctx, struct signalfd_lockctx *lk)
-{
- struct sighand_struct *sighand = NULL;
-
- rcu_read_lock();
- lk->tsk = rcu_dereference(ctx->tsk);
- if (likely(lk->tsk != NULL))
- sighand = lock_task_sighand(lk->tsk, &lk->flags);
- rcu_read_unlock();
-
- if (!sighand)
- return 0;
-
- if (!ctx->tsk) {
- unlock_task_sighand(lk->tsk, &lk->flags);
- return 0;
- }
-
- if (lk->tsk->tgid == current->tgid)
- lk->tsk = current;
-
- return 1;
-}
-
-static void signalfd_unlock(struct signalfd_lockctx *lk)
-{
- unlock_task_sighand(lk->tsk, &lk->flags);
-}
-
-/*
- * This must be called with the sighand lock held.
- */
-void signalfd_deliver(struct task_struct *tsk, int sig)
-{
- struct sighand_struct *sighand = tsk->sighand;
- struct signalfd_ctx *ctx, *tmp;
-
- BUG_ON(!sig);
- list_for_each_entry_safe(ctx, tmp, &sighand->signalfd_list, lnk) {
- /*
- * We use a negative signal value as a way to broadcast that the
- * sighand has been orphaned, so that we can notify all the
- * listeners about this. Remember the ctx->sigmask is inverted,
- * so if the user is interested in a signal, that corresponding
- * bit will be zero.
- */
- if (sig < 0) {
- if (ctx->tsk == tsk) {
- ctx->tsk = NULL;
- list_del_init(&ctx->lnk);
- wake_up(&ctx->wqh);
- }
- } else {
- if (!sigismember(&ctx->sigmask, sig))
- wake_up(&ctx->wqh);
- }
- }
-}
-
-static void signalfd_cleanup(struct signalfd_ctx *ctx)
-{
- struct signalfd_lockctx lk;
-
- /*
- * This is tricky. If the sighand is gone, we do not need to remove
- * context from the list, the list itself won't be there anymore.
- */
- if (signalfd_lock(ctx, &lk)) {
- list_del(&ctx->lnk);
- signalfd_unlock(&lk);
- }
- kfree(ctx);
-}
-
static int signalfd_release(struct inode *inode, struct file *file)
{
- signalfd_cleanup(file->private_data);
+ kfree(file->private_data);
return 0;
}
@@ -130,23 +42,15 @@ static unsigned int signalfd_poll(struct file *file, poll_table *wait)
{
struct signalfd_ctx *ctx = file->private_data;
unsigned int events = 0;
- struct signalfd_lockctx lk;
- poll_wait(file, &ctx->wqh, wait);
+ poll_wait(file, &current->sighand->signalfd_wqh, wait);
- /*
- * Let the caller get a POLLIN in this case, ala socket recv() when
- * the peer disconnects.
- */
- if (signalfd_lock(ctx, &lk)) {
- if ((lk.tsk == current &&
- next_signal(&lk.tsk->pending, &ctx->sigmask) > 0) ||
- next_signal(&lk.tsk->signal->shared_pending,
- &ctx->sigmask) > 0)
- events |= POLLIN;
- signalfd_unlock(&lk);
- } else
+ spin_lock_irq(&current->sighand->siglock);
+ if (next_signal(&current->pending, &ctx->sigmask) ||
+ next_signal(&current->signal->shared_pending,
+ &ctx->sigmask))
events |= POLLIN;
+ spin_unlock_irq(&current->sighand->siglock);
return events;
}
@@ -219,59 +123,46 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, siginfo_t *info,
int nonblock)
{
ssize_t ret;
- struct signalfd_lockctx lk;
DECLARE_WAITQUEUE(wait, current);
- if (!signalfd_lock(ctx, &lk))
- return 0;
-
- ret = dequeue_signal(lk.tsk, &ctx->sigmask, info);
+ spin_lock_irq(&current->sighand->siglock);
+ ret = dequeue_signal(current, &ctx->sigmask, info);
switch (ret) {
case 0:
if (!nonblock)
break;
ret = -EAGAIN;
default:
- signalfd_unlock(&lk);
+ spin_unlock_irq(&current->sighand->siglock);
return ret;
}
- add_wait_queue(&ctx->wqh, &wait);
+ add_wait_queue(&current->sighand->signalfd_wqh, &wait);
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
- ret = dequeue_signal(lk.tsk, &ctx->sigmask, info);
- signalfd_unlock(&lk);
+ ret = dequeue_signal(current, &ctx->sigmask, info);
if (ret != 0)
break;
if (signal_pending(current)) {
ret = -ERESTARTSYS;
break;
}
+ spin_unlock_irq(&current->sighand->siglock);
schedule();
- ret = signalfd_lock(ctx, &lk);
- if (unlikely(!ret)) {
- /*
- * Let the caller read zero byte, ala socket
- * recv() when the peer disconnect. This test
- * must be done before doing a dequeue_signal(),
- * because if the sighand has been orphaned,
- * the dequeue_signal() call is going to crash
- * because ->sighand will be long gone.
- */
- break;
- }
+ spin_lock_irq(&current->sighand->siglock);
}
+ spin_unlock_irq(&current->sighand->siglock);
- remove_wait_queue(&ctx->wqh, &wait);
+ remove_wait_queue(&current->sighand->signalfd_wqh, &wait);
__set_current_state(TASK_RUNNING);
return ret;
}
/*
- * Returns either the size of a "struct signalfd_siginfo", or zero if the
- * sighand we are attached to, has been orphaned. The "count" parameter
- * must be at least the size of a "struct signalfd_siginfo".
+ * Returns a multiple of the size of a "struct signalfd_siginfo", or a negative
+ * error code. The "count" parameter must be at least the size of a
+ * "struct signalfd_siginfo".
*/
static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
loff_t *ppos)
@@ -287,7 +178,6 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
return -EINVAL;
siginfo = (struct signalfd_siginfo __user *) buf;
-
do {
ret = signalfd_dequeue(ctx, &info, nonblock);
if (unlikely(ret <= 0))
@@ -300,7 +190,7 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
nonblock = 1;
} while (--count);
- return total ? total : ret;
+ return total ? total: ret;
}
static const struct file_operations signalfd_fops = {
@@ -309,20 +199,13 @@ static const struct file_operations signalfd_fops = {
.read = signalfd_read,
};
-/*
- * Create a file descriptor that is associated with our signal
- * state. We can pass it around to others if we want to, but
- * it will always be _our_ signal state.
- */
asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask)
{
int error;
sigset_t sigmask;
struct signalfd_ctx *ctx;
- struct sighand_struct *sighand;
struct file *file;
struct inode *inode;
- struct signalfd_lockctx lk;
if (sizemask != sizeof(sigset_t) ||
copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
@@ -335,17 +218,7 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
if (!ctx)
return -ENOMEM;
- init_waitqueue_head(&ctx->wqh);
ctx->sigmask = sigmask;
- ctx->tsk = current->group_leader;
-
- sighand = current->sighand;
- /*
- * Add this fd to the list of signal listeners.
- */
- spin_lock_irq(&sighand->siglock);
- list_add_tail(&ctx->lnk, &sighand->signalfd_list);
- spin_unlock_irq(&sighand->siglock);
/*
* When we call this, the initialization must be complete, since
@@ -364,23 +237,18 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
fput(file);
return -EINVAL;
}
- /*
- * We need to be prepared of the fact that the sighand this fd
- * is attached to, has been detched. In that case signalfd_lock()
- * will return 0, and we'll just skip setting the new mask.
- */
- if (signalfd_lock(ctx, &lk)) {
- ctx->sigmask = sigmask;
- signalfd_unlock(&lk);
- }
- wake_up(&ctx->wqh);
+ spin_lock_irq(&current->sighand->siglock);
+ ctx->sigmask = sigmask;
+ spin_unlock_irq(&current->sighand->siglock);
+
+ wake_up(&current->sighand->signalfd_wqh);
fput(file);
}
return ufd;
err_fdalloc:
- signalfd_cleanup(ctx);
+ kfree(ctx);
return error;
}