diff options
Diffstat (limited to 'fs/gfs2')
-rw-r--r-- | fs/gfs2/aops.c | 152 | ||||
-rw-r--r-- | fs/gfs2/bmap.c | 25 | ||||
-rw-r--r-- | fs/gfs2/dir.c | 7 | ||||
-rw-r--r-- | fs/gfs2/export.c | 6 | ||||
-rw-r--r-- | fs/gfs2/file.c | 218 | ||||
-rw-r--r-- | fs/gfs2/glock.c | 515 | ||||
-rw-r--r-- | fs/gfs2/glock.h | 16 | ||||
-rw-r--r-- | fs/gfs2/glops.c | 33 | ||||
-rw-r--r-- | fs/gfs2/incore.h | 6 | ||||
-rw-r--r-- | fs/gfs2/inode.c | 58 | ||||
-rw-r--r-- | fs/gfs2/inode.h | 2 | ||||
-rw-r--r-- | fs/gfs2/lock_dlm.c | 19 | ||||
-rw-r--r-- | fs/gfs2/log.c | 9 | ||||
-rw-r--r-- | fs/gfs2/log.h | 2 | ||||
-rw-r--r-- | fs/gfs2/lops.c | 41 | ||||
-rw-r--r-- | fs/gfs2/lops.h | 2 | ||||
-rw-r--r-- | fs/gfs2/main.c | 37 | ||||
-rw-r--r-- | fs/gfs2/meta_io.c | 31 | ||||
-rw-r--r-- | fs/gfs2/meta_io.h | 8 | ||||
-rw-r--r-- | fs/gfs2/ops_fstype.c | 37 | ||||
-rw-r--r-- | fs/gfs2/quota.c | 98 | ||||
-rw-r--r-- | fs/gfs2/recovery.c | 22 | ||||
-rw-r--r-- | fs/gfs2/rgrp.c | 40 | ||||
-rw-r--r-- | fs/gfs2/rgrp.h | 7 | ||||
-rw-r--r-- | fs/gfs2/super.c | 17 | ||||
-rw-r--r-- | fs/gfs2/sys.c | 5 | ||||
-rw-r--r-- | fs/gfs2/util.c | 12 | ||||
-rw-r--r-- | fs/gfs2/xattr.c | 2 |
28 files changed, 800 insertions, 627 deletions
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 005e920f5d4a..05bee80ac7de 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -82,31 +82,6 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock, } /** - * gfs2_writepage - Write page for writeback mappings - * @page: The page - * @wbc: The writeback control - */ -static int gfs2_writepage(struct page *page, struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); - struct iomap_writepage_ctx wpc = { }; - - if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) - goto out; - if (current->journal_info) - goto redirty; - return iomap_writepage(page, wbc, &wpc, &gfs2_writeback_ops); - -redirty: - redirty_page_for_writepage(wbc, page); -out: - unlock_page(page); - return 0; -} - -/** * gfs2_write_jdata_page - gfs2 jdata-specific version of block_write_full_page * @page: The page to write * @wbc: The writeback control @@ -464,22 +439,26 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) return 0; } - -static int __gfs2_readpage(void *file, struct page *page) +/** + * gfs2_read_folio - read a folio from a file + * @file: The file to read + * @folio: The folio in the file + */ +static int gfs2_read_folio(struct file *file, struct folio *folio) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); int error; if (!gfs2_is_jdata(ip) || - (i_blocksize(inode) == PAGE_SIZE && !page_has_buffers(page))) { - error = iomap_readpage(page, &gfs2_iomap_ops); + (i_blocksize(inode) == PAGE_SIZE && !folio_buffers(folio))) { + error = iomap_read_folio(folio, &gfs2_iomap_ops); } else if (gfs2_is_stuffed(ip)) { - error = stuffed_readpage(ip, page); - unlock_page(page); + error = stuffed_readpage(ip, &folio->page); + folio_unlock(folio); } else { - error = mpage_readpage(page, gfs2_block_map); + error = mpage_read_folio(folio, gfs2_block_map); } if (unlikely(gfs2_withdrawn(sdp))) @@ -489,17 +468,6 @@ static int __gfs2_readpage(void *file, struct page *page) } /** - * gfs2_readpage - read a page of a file - * @file: The file to read - * @page: The page of the file - */ - -static int gfs2_readpage(struct file *file, struct page *page) -{ - return __gfs2_readpage(file, page); -} - -/** * gfs2_internal_read - read an internal file * @ip: The gfs2 inode * @buf: The buffer to fill @@ -523,7 +491,7 @@ int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos, amt = size - copied; if (offset + size > PAGE_SIZE) amt = PAGE_SIZE - offset; - page = read_cache_page(mapping, index, __gfs2_readpage, NULL); + page = read_cache_page(mapping, index, gfs2_read_folio, NULL); if (IS_ERR(page)) return PTR_ERR(page); p = kmap_atomic(page); @@ -606,18 +574,12 @@ out: gfs2_trans_end(sdp); } -/** - * jdata_set_page_dirty - Page dirtying function - * @page: The page to dirty - * - * Returns: 1 if it dirtyed the page, or 0 otherwise - */ - -static int jdata_set_page_dirty(struct page *page) +static bool jdata_dirty_folio(struct address_space *mapping, + struct folio *folio) { if (current->journal_info) - SetPageChecked(page); - return __set_page_dirty_buffers(page); + folio_set_checked(folio); + return block_dirty_folio(mapping, folio); } /** @@ -672,22 +634,23 @@ static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh) unlock_buffer(bh); } -static void gfs2_invalidatepage(struct page *page, unsigned int offset, - unsigned int length) +static void gfs2_invalidate_folio(struct folio *folio, size_t offset, + size_t length) { - struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); - unsigned int stop = offset + length; - int partial_page = (offset || length < PAGE_SIZE); + struct gfs2_sbd *sdp = GFS2_SB(folio->mapping->host); + size_t stop = offset + length; + int partial_page = (offset || length < folio_size(folio)); struct buffer_head *bh, *head; unsigned long pos = 0; - BUG_ON(!PageLocked(page)); + BUG_ON(!folio_test_locked(folio)); if (!partial_page) - ClearPageChecked(page); - if (!page_has_buffers(page)) + folio_clear_checked(folio); + head = folio_buffers(folio); + if (!head) goto out; - bh = head = page_buffers(page); + bh = head; do { if (pos + bh->b_size > stop) return; @@ -699,42 +662,44 @@ static void gfs2_invalidatepage(struct page *page, unsigned int offset, } while (bh != head); out: if (!partial_page) - try_to_release_page(page, 0); + filemap_release_folio(folio, 0); } /** - * gfs2_releasepage - free the metadata associated with a page - * @page: the page that's being released + * gfs2_release_folio - free the metadata associated with a folio + * @folio: the folio that's being released * @gfp_mask: passed from Linux VFS, ignored by us * - * Calls try_to_free_buffers() to free the buffers and put the page if the + * Calls try_to_free_buffers() to free the buffers and put the folio if the * buffers can be released. * - * Returns: 1 if the page was put or else 0 + * Returns: true if the folio was put or else false */ -int gfs2_releasepage(struct page *page, gfp_t gfp_mask) +bool gfs2_release_folio(struct folio *folio, gfp_t gfp_mask) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = folio->mapping; struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping); struct buffer_head *bh, *head; struct gfs2_bufdata *bd; - if (!page_has_buffers(page)) - return 0; + head = folio_buffers(folio); + if (!head) + return false; /* - * From xfs_vm_releasepage: mm accommodates an old ext3 case where - * clean pages might not have had the dirty bit cleared. Thus, it can - * send actual dirty pages to ->releasepage() via shrink_active_list(). + * mm accommodates an old ext3 case where clean folios might + * not have had the dirty bit cleared. Thus, it can send actual + * dirty folios to ->release_folio() via shrink_active_list(). * - * As a workaround, we skip pages that contain dirty buffers below. - * Once ->releasepage isn't called on dirty pages anymore, we can warn - * on dirty buffers like we used to here again. + * As a workaround, we skip folios that contain dirty buffers + * below. Once ->release_folio isn't called on dirty folios + * anymore, we can warn on dirty buffers like we used to here + * again. */ gfs2_log_lock(sdp); - head = bh = page_buffers(page); + bh = head; do { if (atomic_read(&bh->b_count)) goto cannot_release; @@ -744,9 +709,9 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask) if (buffer_dirty(bh) || WARN_ON(buffer_pinned(bh))) goto cannot_release; bh = bh->b_this_page; - } while(bh != head); + } while (bh != head); - head = bh = page_buffers(page); + bh = head; do { bd = bh->b_private; if (bd) { @@ -767,24 +732,23 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask) } while (bh != head); gfs2_log_unlock(sdp); - return try_to_free_buffers(page); + return try_to_free_buffers(folio); cannot_release: gfs2_log_unlock(sdp); - return 0; + return false; } static const struct address_space_operations gfs2_aops = { - .writepage = gfs2_writepage, .writepages = gfs2_writepages, - .readpage = gfs2_readpage, + .read_folio = gfs2_read_folio, .readahead = gfs2_readahead, - .set_page_dirty = __set_page_dirty_nobuffers, - .releasepage = iomap_releasepage, - .invalidatepage = iomap_invalidatepage, + .dirty_folio = filemap_dirty_folio, + .release_folio = iomap_release_folio, + .invalidate_folio = iomap_invalidate_folio, .bmap = gfs2_bmap, .direct_IO = noop_direct_IO, - .migratepage = iomap_migrate_page, + .migrate_folio = filemap_migrate_folio, .is_partially_uptodate = iomap_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; @@ -792,12 +756,12 @@ static const struct address_space_operations gfs2_aops = { static const struct address_space_operations gfs2_jdata_aops = { .writepage = gfs2_jdata_writepage, .writepages = gfs2_jdata_writepages, - .readpage = gfs2_readpage, + .read_folio = gfs2_read_folio, .readahead = gfs2_readahead, - .set_page_dirty = jdata_set_page_dirty, + .dirty_folio = jdata_dirty_folio, .bmap = gfs2_bmap, - .invalidatepage = gfs2_invalidatepage, - .releasepage = gfs2_releasepage, + .invalidate_folio = gfs2_invalidate_folio, + .release_folio = gfs2_release_folio, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index d67108489148..3bdb2c668a71 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -310,9 +310,8 @@ static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end) if (trylock_buffer(rabh)) { if (!buffer_uptodate(rabh)) { rabh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, - REQ_RAHEAD | REQ_META | REQ_PRIO, - rabh); + submit_bh(REQ_OP_READ | REQ_RAHEAD | REQ_META | + REQ_PRIO, rabh); continue; } unlock_buffer(rabh); @@ -606,9 +605,9 @@ out: return ret; } -static inline __be64 *gfs2_indirect_init(struct metapath *mp, - struct gfs2_glock *gl, unsigned int i, - unsigned offset, u64 bn) +static inline void gfs2_indirect_init(struct metapath *mp, + struct gfs2_glock *gl, unsigned int i, + unsigned offset, u64 bn) { __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data + ((i > 1) ? sizeof(struct gfs2_meta_header) : @@ -621,7 +620,6 @@ static inline __be64 *gfs2_indirect_init(struct metapath *mp, gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header)); ptr += offset; *ptr = cpu_to_be64(bn); - return ptr; } enum alloc_state { @@ -1154,13 +1152,12 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, if (length != written && (iomap->flags & IOMAP_F_NEW)) { /* Deallocate blocks that were just allocated. */ - loff_t blockmask = i_blocksize(inode) - 1; - loff_t end = (pos + length) & ~blockmask; + loff_t hstart = round_up(pos + written, i_blocksize(inode)); + loff_t hend = iomap->offset + iomap->length; - pos = (pos + written + blockmask) & ~blockmask; - if (pos < end) { - truncate_pagecache_range(inode, pos, end - 1); - punch_hole(ip, pos, end - pos); + if (hstart < hend) { + truncate_pagecache_range(inode, hstart, hend - 1); + punch_hole(ip, hstart, hend - hstart); } } @@ -2146,7 +2143,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) ret = do_shrink(inode, newsize); out: - gfs2_rs_delete(ip, NULL); + gfs2_rs_delete(ip); gfs2_qa_put(ip); return ret; } diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 42b7dfffb5e7..54a6d17b8c25 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1508,9 +1508,8 @@ static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index, continue; } bh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, - REQ_RAHEAD | REQ_META | REQ_PRIO, - bh); + submit_bh(REQ_OP_READ | REQ_RAHEAD | REQ_META | + REQ_PRIO, bh); continue; } brelse(bh); @@ -2017,7 +2016,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, l_blocks++; } - gfs2_rlist_alloc(&rlist); + gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, LM_FLAG_NODE_SCOPE); for (x = 0; x < rlist.rl_rgrps; x++) { struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(rlist.rl_ghs[x].gh_gl); diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index 756d05779200..cf40895233f5 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -66,7 +66,7 @@ struct get_name_filldir { char *name; }; -static int get_name_filldir(struct dir_context *ctx, const char *name, +static bool get_name_filldir(struct dir_context *ctx, const char *name, int length, loff_t offset, u64 inum, unsigned int type) { @@ -74,12 +74,12 @@ static int get_name_filldir(struct dir_context *ctx, const char *name, container_of(ctx, struct get_name_filldir, ctx); if (inum != gnfd->inum.no_addr) - return 0; + return true; memcpy(gnfd->name, name, length); gnfd->name[length] = 0; - return 1; + return false; } static int gfs2_get_name(struct dentry *parent, char *name, diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 3e718cfc19a7..60c6fb91fb58 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -704,10 +704,11 @@ static int gfs2_release(struct inode *inode, struct file *file) kfree(file->private_data); file->private_data = NULL; - if (gfs2_rs_active(&ip->i_res)) - gfs2_rs_delete(ip, &inode->i_writecount); - if (file->f_mode & FMODE_WRITE) + if (file->f_mode & FMODE_WRITE) { + if (gfs2_rs_active(&ip->i_res)) + gfs2_rs_delete(ip); gfs2_qa_put(ip); + } return 0; } @@ -769,33 +770,31 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, return ret ? ret : ret1; } -static inline bool should_fault_in_pages(ssize_t ret, struct iov_iter *i, +static inline bool should_fault_in_pages(struct iov_iter *i, + struct kiocb *iocb, size_t *prev_count, size_t *window_size) { size_t count = iov_iter_count(i); - char __user *p; - int pages = 1; + size_t size, offs; - if (likely(!count)) + if (!count) return false; - if (ret <= 0 && ret != -EFAULT) - return false; - if (!iter_is_iovec(i)) + if (!user_backed_iter(i)) return false; + size = PAGE_SIZE; + offs = offset_in_page(iocb->ki_pos); if (*prev_count != count || !*window_size) { - int pages, nr_dirtied; + size_t nr_dirtied; - pages = min_t(int, BIO_MAX_VECS, DIV_ROUND_UP(count, PAGE_SIZE)); nr_dirtied = max(current->nr_dirtied_pause - - current->nr_dirtied, 1); - pages = min(pages, nr_dirtied); + current->nr_dirtied, 8); + size = min_t(size_t, SZ_1M, nr_dirtied << PAGE_SHIFT); } *prev_count = count; - p = i->iov[0].iov_base + i->iov_offset; - *window_size = (size_t)PAGE_SIZE * pages - offset_in_page(p); + *window_size = size - offs; return true; } @@ -805,7 +804,7 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to, struct file *file = iocb->ki_filp; struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); size_t prev_count = 0, window_size = 0; - size_t written = 0; + size_t read = 0; ssize_t ret; /* @@ -833,35 +832,33 @@ retry: ret = gfs2_glock_nq(gh); if (ret) goto out_uninit; -retry_under_glock: pagefault_disable(); to->nofault = true; ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, - IOMAP_DIO_PARTIAL, written); + IOMAP_DIO_PARTIAL, NULL, read); to->nofault = false; pagefault_enable(); + if (ret <= 0 && ret != -EFAULT) + goto out_unlock; + /* No increment (+=) because iomap_dio_rw returns a cumulative value. */ if (ret > 0) - written = ret; - - if (should_fault_in_pages(ret, to, &prev_count, &window_size)) { - size_t leftover; + read = ret; - gfs2_holder_allow_demote(gh); - leftover = fault_in_iov_iter_writeable(to, window_size); - gfs2_holder_disallow_demote(gh); - if (leftover != window_size) { - if (!gfs2_holder_queued(gh)) - goto retry; - goto retry_under_glock; - } + if (should_fault_in_pages(to, iocb, &prev_count, &window_size)) { + gfs2_glock_dq(gh); + window_size -= fault_in_iov_iter_writeable(to, window_size); + if (window_size) + goto retry; } +out_unlock: if (gfs2_holder_queued(gh)) gfs2_glock_dq(gh); out_uninit: gfs2_holder_uninit(gh); + /* User space doesn't expect partial success. */ if (ret < 0) return ret; - return written; + return read; } static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, @@ -871,7 +868,7 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, struct inode *inode = file->f_mapping->host; struct gfs2_inode *ip = GFS2_I(inode); size_t prev_count = 0, window_size = 0; - size_t read = 0; + size_t written = 0; ssize_t ret; /* @@ -897,41 +894,39 @@ retry: ret = gfs2_glock_nq(gh); if (ret) goto out_uninit; -retry_under_glock: /* Silently fall back to buffered I/O when writing beyond EOF */ if (iocb->ki_pos + iov_iter_count(from) > i_size_read(&ip->i_inode)) - goto out; + goto out_unlock; from->nofault = true; ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, - IOMAP_DIO_PARTIAL, read); + IOMAP_DIO_PARTIAL, NULL, written); from->nofault = false; - - if (ret == -ENOTBLK) - ret = 0; + if (ret <= 0) { + if (ret == -ENOTBLK) + ret = 0; + if (ret != -EFAULT) + goto out_unlock; + } + /* No increment (+=) because iomap_dio_rw returns a cumulative value. */ if (ret > 0) - read = ret; - - if (should_fault_in_pages(ret, from, &prev_count, &window_size)) { - size_t leftover; + written = ret; - gfs2_holder_allow_demote(gh); - leftover = fault_in_iov_iter_readable(from, window_size); - gfs2_holder_disallow_demote(gh); - if (leftover != window_size) { - if (!gfs2_holder_queued(gh)) - goto retry; - goto retry_under_glock; - } + if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { + gfs2_glock_dq(gh); + window_size -= fault_in_iov_iter_readable(from, window_size); + if (window_size) + goto retry; } -out: +out_unlock: if (gfs2_holder_queued(gh)) gfs2_glock_dq(gh); out_uninit: gfs2_holder_uninit(gh); + /* User space doesn't expect partial success. */ if (ret < 0) return ret; - return read; + return written; } static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) @@ -939,7 +934,7 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) struct gfs2_inode *ip; struct gfs2_holder gh; size_t prev_count = 0, window_size = 0; - size_t written = 0; + size_t read = 0; ssize_t ret; /* @@ -949,20 +944,19 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) * and retry. */ - if (iocb->ki_flags & IOCB_DIRECT) { - ret = gfs2_file_direct_read(iocb, to, &gh); - if (likely(ret != -ENOTBLK)) - return ret; - iocb->ki_flags &= ~IOCB_DIRECT; - } + if (iocb->ki_flags & IOCB_DIRECT) + return gfs2_file_direct_read(iocb, to, &gh); + + pagefault_disable(); iocb->ki_flags |= IOCB_NOIO; ret = generic_file_read_iter(iocb, to); iocb->ki_flags &= ~IOCB_NOIO; + pagefault_enable(); if (ret >= 0) { if (!iov_iter_count(to)) return ret; - written = ret; - } else { + read = ret; + } else if (ret != -EFAULT) { if (ret != -EAGAIN) return ret; if (iocb->ki_flags & IOCB_NOWAIT) @@ -974,33 +968,26 @@ retry: ret = gfs2_glock_nq(&gh); if (ret) goto out_uninit; -retry_under_glock: pagefault_disable(); ret = generic_file_read_iter(iocb, to); pagefault_enable(); + if (ret <= 0 && ret != -EFAULT) + goto out_unlock; if (ret > 0) - written += ret; + read += ret; - if (should_fault_in_pages(ret, to, &prev_count, &window_size)) { - size_t leftover; - - gfs2_holder_allow_demote(&gh); - leftover = fault_in_iov_iter_writeable(to, window_size); - gfs2_holder_disallow_demote(&gh); - if (leftover != window_size) { - if (!gfs2_holder_queued(&gh)) { - if (written) - goto out_uninit; - goto retry; - } - goto retry_under_glock; - } + if (should_fault_in_pages(to, iocb, &prev_count, &window_size)) { + gfs2_glock_dq(&gh); + window_size -= fault_in_iov_iter_writeable(to, window_size); + if (window_size) + goto retry; } +out_unlock: if (gfs2_holder_queued(&gh)) gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); - return written ? written : ret; + return read ? read : ret; } static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, @@ -1014,7 +1001,7 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, struct gfs2_holder *statfs_gh = NULL; size_t prev_count = 0, window_size = 0; size_t orig_count = iov_iter_count(from); - size_t read = 0; + size_t written = 0; ssize_t ret; /* @@ -1032,10 +1019,18 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, gh); retry: + if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { + window_size -= fault_in_iov_iter_readable(from, window_size); + if (!window_size) { + ret = -EFAULT; + goto out_uninit; + } + from->count = min(from->count, window_size); + } ret = gfs2_glock_nq(gh); if (ret) goto out_uninit; -retry_under_glock: + if (inode == sdp->sd_rindex) { struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); @@ -1052,37 +1047,28 @@ retry_under_glock: current->backing_dev_info = NULL; if (ret > 0) { iocb->ki_pos += ret; - read += ret; + written += ret; } if (inode == sdp->sd_rindex) gfs2_glock_dq_uninit(statfs_gh); - from->count = orig_count - read; - if (should_fault_in_pages(ret, from, &prev_count, &window_size)) { - size_t leftover; - - gfs2_holder_allow_demote(gh); - leftover = fault_in_iov_iter_readable(from, window_size); - gfs2_holder_disallow_demote(gh); - if (leftover != window_size) { - from->count = min(from->count, window_size - leftover); - if (!gfs2_holder_queued(gh)) { - if (read) - goto out_uninit; - goto retry; - } - goto retry_under_glock; - } + if (ret <= 0 && ret != -EFAULT) + goto out_unlock; + + from->count = orig_count - written; + if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { + gfs2_glock_dq(gh); + goto retry; } out_unlock: if (gfs2_holder_queued(gh)) gfs2_glock_dq(gh); out_uninit: gfs2_holder_uninit(gh); - if (statfs_gh) - kfree(statfs_gh); - return read ? read : ret; + kfree(statfs_gh); + from->count = orig_count - written; + return written ? written : ret; } /** @@ -1457,6 +1443,22 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl); } +static void __flock_holder_uninit(struct file *file, struct gfs2_holder *fl_gh) +{ + struct gfs2_glock *gl = fl_gh->gh_gl; + + /* + * Make sure gfs2_glock_put() won't sleep under the file->f_lock + * spinlock. + */ + + gfs2_glock_hold(gl); + spin_lock(&file->f_lock); + gfs2_holder_uninit(fl_gh); + spin_unlock(&file->f_lock); + gfs2_glock_put(gl); +} + static int do_flock(struct file *file, int cmd, struct file_lock *fl) { struct gfs2_file *fp = file->private_data; @@ -1469,7 +1471,9 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) int sleeptime; state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; - flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY_1CB) | GL_EXACT; + flags = GL_EXACT | GL_NOPID; + if (!IS_SETLKW(cmd)) + flags |= LM_FLAG_TRY_1CB; mutex_lock(&fp->f_fl_mutex); @@ -1488,19 +1492,21 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) &gfs2_flock_glops, CREATE, &gl); if (error) goto out; + spin_lock(&file->f_lock); gfs2_holder_init(gl, state, flags, fl_gh); + spin_unlock(&file->f_lock); gfs2_glock_put(gl); } for (sleeptime = 1; sleeptime <= 4; sleeptime <<= 1) { error = gfs2_glock_nq(fl_gh); if (error != GLR_TRYFAILED) break; - fl_gh->gh_flags = LM_FLAG_TRY | GL_EXACT; - fl_gh->gh_error = 0; + fl_gh->gh_flags &= ~LM_FLAG_TRY_1CB; + fl_gh->gh_flags |= LM_FLAG_TRY; msleep(sleeptime); } if (error) { - gfs2_holder_uninit(fl_gh); + __flock_holder_uninit(file, fl_gh); if (error == GLR_TRYFAILED) error = -EAGAIN; } else { @@ -1522,7 +1528,7 @@ static void do_unflock(struct file *file, struct file_lock *fl) locks_lock_file_wait(file, fl); if (gfs2_holder_initialized(fl_gh)) { gfs2_glock_dq(fl_gh); - gfs2_holder_uninit(fl_gh); + __flock_holder_uninit(file, fl_gh); } mutex_unlock(&fp->f_fl_mutex); } diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 44a7a4288956..df335c258eb0 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -33,6 +33,9 @@ #include <linux/list_sort.h> #include <linux/lockref.h> #include <linux/rhashtable.h> +#include <linux/pid_namespace.h> +#include <linux/fdtable.h> +#include <linux/file.h> #include "gfs2.h" #include "incore.h" @@ -59,6 +62,8 @@ typedef void (*glock_examiner) (struct gfs2_glock * gl); static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); static void __gfs2_glock_dq(struct gfs2_holder *gh); +static void handle_callback(struct gfs2_glock *gl, unsigned int state, + unsigned long delay, bool remote); static struct dentry *gfs2_root; static struct workqueue_struct *glock_workqueue; @@ -127,9 +132,11 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu) struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu); kfree(gl->gl_lksb.sb_lvbptr); - if (gl->gl_ops->go_flags & GLOF_ASPACE) - kmem_cache_free(gfs2_glock_aspace_cachep, gl); - else + if (gl->gl_ops->go_flags & GLOF_ASPACE) { + struct gfs2_glock_aspace *gla = + container_of(gl, struct gfs2_glock_aspace, glock); + kmem_cache_free(gfs2_glock_aspace_cachep, gla); + } else kmem_cache_free(gfs2_glock_cachep, gl); } @@ -301,9 +308,6 @@ void gfs2_glock_queue_put(struct gfs2_glock *gl) void gfs2_glock_put(struct gfs2_glock *gl) { - /* last put could call sleepable dlm api */ - might_sleep(); - if (lockref_put_or_lock(&gl->gl_lockref)) return; @@ -406,10 +410,13 @@ static void do_error(struct gfs2_glock *gl, const int ret) /** * demote_incompat_holders - demote incompatible demoteable holders * @gl: the glock we want to promote - * @new_gh: the new holder to be promoted + * @current_gh: the newly promoted holder + * + * We're passing the newly promoted holder in @current_gh, but actually, any of + * the strong holders would do. */ static void demote_incompat_holders(struct gfs2_glock *gl, - struct gfs2_holder *new_gh) + struct gfs2_holder *current_gh) { struct gfs2_holder *gh, *tmp; @@ -425,8 +432,10 @@ static void demote_incompat_holders(struct gfs2_glock *gl, */ if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) return; + if (gh == current_gh) + continue; if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags) && - !may_grant(gl, new_gh, gh)) { + !may_grant(gl, current_gh, gh)) { /* * We should not recurse into do_promote because * __gfs2_glock_dq only calls handle_callback, @@ -477,10 +486,9 @@ find_first_strong_holder(struct gfs2_glock *gl) /* * gfs2_instantiate - Call the glops instantiate function - * @gl: The glock + * @gh: The glock holder * - * Returns: 0 if instantiate was successful, 2 if type specific operation is - * underway, or error. + * Returns: 0 if instantiate was successful, or error. */ int gfs2_instantiate(struct gfs2_holder *gh) { @@ -490,7 +498,7 @@ int gfs2_instantiate(struct gfs2_holder *gh) again: if (!test_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags)) - return 0; + goto done; /* * Since we unlock the lockref lock, we set a flag to indicate @@ -509,78 +517,55 @@ again: goto again; } - ret = glops->go_instantiate(gh); + ret = glops->go_instantiate(gl); if (!ret) clear_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags); clear_and_wake_up_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags); - return ret; + if (ret) + return ret; + +done: + if (glops->go_held) + return glops->go_held(gh); + return 0; } /** * do_promote - promote as many requests as possible on the current queue * @gl: The glock * - * Returns: 1 if there is a blocked holder at the head of the list, or 2 - * if a type specific operation is underway. + * Returns: 1 if there is a blocked holder at the head of the list */ static int do_promote(struct gfs2_glock *gl) -__releases(&gl->gl_lockref.lock) -__acquires(&gl->gl_lockref.lock) { - struct gfs2_holder *gh, *tmp, *first_gh; + struct gfs2_holder *gh, *current_gh; bool incompat_holders_demoted = false; - bool lock_released; - int ret; -restart: - first_gh = find_first_strong_holder(gl); - list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { - lock_released = false; + current_gh = find_first_strong_holder(gl); + list_for_each_entry(gh, &gl->gl_holders, gh_list) { if (test_bit(HIF_HOLDER, &gh->gh_iflags)) continue; - if (!may_grant(gl, first_gh, gh)) { + if (!may_grant(gl, current_gh, gh)) { /* - * If we get here, it means we may not grant this holder for - * some reason. If this holder is the head of the list, it - * means we have a blocked holder at the head, so return 1. + * If we get here, it means we may not grant this + * holder for some reason. If this holder is at the + * head of the list, it means we have a blocked holder + * at the head, so return 1. */ - if (gh->gh_list.prev == &gl->gl_holders) + if (list_is_first(&gh->gh_list, &gl->gl_holders)) return 1; do_error(gl, 0); break; } - if (!incompat_holders_demoted) { - demote_incompat_holders(gl, first_gh); - incompat_holders_demoted = true; - first_gh = gh; - } - if (test_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags) && - !(gh->gh_flags & GL_SKIP) && gl->gl_ops->go_instantiate) { - lock_released = true; - spin_unlock(&gl->gl_lockref.lock); - ret = gfs2_instantiate(gh); - spin_lock(&gl->gl_lockref.lock); - if (ret) { - if (ret == 1) - return 2; - gh->gh_error = ret; - list_del_init(&gh->gh_list); - trace_gfs2_glock_queue(gh, 0); - gfs2_holder_wake(gh); - goto restart; - } - } set_bit(HIF_HOLDER, &gh->gh_iflags); trace_gfs2_promote(gh); gfs2_holder_wake(gh); - /* - * If we released the gl_lockref.lock the holders list may have - * changed. For that reason, we start again at the start of - * the holders queue. - */ - if (lock_released) - goto restart; + if (!incompat_holders_demoted) { + current_gh = gh; + demote_incompat_holders(gl, current_gh); + incompat_holders_demoted = true; + } } return 0; } @@ -658,7 +643,6 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) const struct gfs2_glock_operations *glops = gl->gl_ops; struct gfs2_holder *gh; unsigned state = ret & LM_OUT_ST_MASK; - int rv; spin_lock(&gl->gl_lockref.lock); trace_gfs2_glock_state_change(gl, state); @@ -672,6 +656,8 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) /* Check for state != intended state */ if (unlikely(state != gl->gl_target)) { + if (gh && (ret & LM_OUT_CANCELED)) + gfs2_holder_wake(gh); if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) { /* move to back of queue and try next entry */ if (ret & LM_OUT_CANCELED) { @@ -714,6 +700,8 @@ retry: gfs2_demote_wake(gl); if (state != LM_ST_UNLOCKED) { if (glops->go_xmote_bh) { + int rv; + spin_unlock(&gl->gl_lockref.lock); rv = glops->go_xmote_bh(gl); spin_lock(&gl->gl_lockref.lock); @@ -722,13 +710,10 @@ retry: goto out; } } - rv = do_promote(gl); - if (rv == 2) - goto out_locked; + do_promote(gl); } out: clear_bit(GLF_LOCK, &gl->gl_flags); -out_locked: spin_unlock(&gl->gl_lockref.lock); } @@ -750,7 +735,8 @@ static bool is_system_glock(struct gfs2_glock *gl) * */ -static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target) +static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, + unsigned int target) __releases(&gl->gl_lockref.lock) __acquires(&gl->gl_lockref.lock) { @@ -761,7 +747,8 @@ __acquires(&gl->gl_lockref.lock) if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl) && gh && !(gh->gh_flags & LM_FLAG_NOEXP)) - return; + goto skip_inval; + lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | LM_FLAG_PRIORITY); GLOCK_BUG_ON(gl, gl->gl_state == target); @@ -846,6 +833,20 @@ skip_inval: (target != LM_ST_UNLOCKED || test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags))) { if (!is_system_glock(gl)) { + handle_callback(gl, LM_ST_UNLOCKED, 0, false); /* sets demote */ + /* + * Ordinarily, we would call dlm and its callback would call + * finish_xmote, which would call state_change() to the new state. + * Since we withdrew, we won't call dlm, so call state_change + * manually, but to the UNLOCKED state we desire. + */ + state_change(gl, LM_ST_UNLOCKED); + /* + * We skip telling dlm to do the locking, so we won't get a + * reply that would otherwise clear GLF_LOCK. So we clear it here. + */ + clear_bit(GLF_LOCK, &gl->gl_flags); + clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); goto out; } else { @@ -885,7 +886,6 @@ __releases(&gl->gl_lockref.lock) __acquires(&gl->gl_lockref.lock) { struct gfs2_holder *gh = NULL; - int ret; if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) return; @@ -904,18 +904,14 @@ __acquires(&gl->gl_lockref.lock) } else { if (test_bit(GLF_DEMOTE, &gl->gl_flags)) gfs2_demote_wake(gl); - ret = do_promote(gl); - if (ret == 0) + if (do_promote(gl) == 0) goto out_unlock; - if (ret == 2) - goto out; gh = find_first_waiter(gl); gl->gl_target = gh->gh_state; if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) do_error(gl, 0); /* Fail queued try locks */ } do_xmote(gl, gh, gl->gl_target); -out: return; out_sched: @@ -1043,16 +1039,18 @@ static void delete_work_func(struct work_struct *work) if (gfs2_queue_delete_work(gl, 5 * HZ)) return; } - goto out; } inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino, GFS2_BLKST_UNLINKED); - if (!IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { + if (PTR_ERR(inode) == -EAGAIN && + (gfs2_queue_delete_work(gl, 5 * HZ))) + return; + } else { d_prune_aliases(inode); iput(inode); } -out: gfs2_glock_put(gl); } @@ -1160,7 +1158,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, .ln_sbd = sdp }; struct gfs2_glock *gl, *tmp; struct address_space *mapping; - struct kmem_cache *cachep; int ret = 0; gl = find_insert_glock(&name, NULL); @@ -1171,20 +1168,24 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, if (!create) return -ENOENT; - if (glops->go_flags & GLOF_ASPACE) - cachep = gfs2_glock_aspace_cachep; - else - cachep = gfs2_glock_cachep; - gl = kmem_cache_alloc(cachep, GFP_NOFS); - if (!gl) - return -ENOMEM; - + if (glops->go_flags & GLOF_ASPACE) { + struct gfs2_glock_aspace *gla = + kmem_cache_alloc(gfs2_glock_aspace_cachep, GFP_NOFS); + if (!gla) + return -ENOMEM; + gl = &gla->glock; + } else { + gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_NOFS); + if (!gl) + return -ENOMEM; + } memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); + gl->gl_ops = glops; if (glops->go_flags & GLOF_LVB) { gl->gl_lksb.sb_lvbptr = kzalloc(GDLM_LVB_SIZE, GFP_NOFS); if (!gl->gl_lksb.sb_lvbptr) { - kmem_cache_free(cachep, gl); + gfs2_glock_dealloc(&gl->gl_rcu); return -ENOMEM; } } @@ -1198,7 +1199,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_state = LM_ST_UNLOCKED; gl->gl_target = LM_ST_UNLOCKED; gl->gl_demote_state = LM_ST_EXCLUSIVE; - gl->gl_ops = glops; gl->gl_dstamp = 0; preempt_disable(); /* We use the global stats to estimate the initial per-glock stats */ @@ -1235,8 +1235,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, *glp = tmp; out_free: - kfree(gl->gl_lksb.sb_lvbptr); - kmem_cache_free(cachep, gl); + gfs2_glock_dealloc(&gl->gl_rcu); if (atomic_dec_and_test(&sdp->sd_glock_disposal)) wake_up(&sdp->sd_glock_wait); @@ -1245,7 +1244,7 @@ out: } /** - * gfs2_holder_init - initialize a struct gfs2_holder in the default way + * __gfs2_holder_init - initialize a struct gfs2_holder in the default way * @gl: the glock * @state: the state we're requesting * @flags: the modifier flags @@ -1262,7 +1261,6 @@ void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, gh->gh_owner_pid = get_pid(task_pid(current)); gh->gh_state = state; gh->gh_flags = flags; - gh->gh_error = 0; gh->gh_iflags = 0; gfs2_glock_hold(gl); } @@ -1313,6 +1311,25 @@ static void gfs2_glock_update_hold_time(struct gfs2_glock *gl, } /** + * gfs2_glock_holder_ready - holder is ready and its error code can be collected + * @gh: the glock holder + * + * Called when a glock holder no longer needs to be waited for because it is + * now either held (HIF_HOLDER set; gh_error == 0), or acquiring the lock has + * failed (gh_error != 0). + */ + +int gfs2_glock_holder_ready(struct gfs2_holder *gh) +{ + if (gh->gh_error || (gh->gh_flags & GL_SKIP)) + return gh->gh_error; + gh->gh_error = gfs2_instantiate(gh); + if (gh->gh_error) + gfs2_glock_dq(gh); + return gh->gh_error; +} + +/** * gfs2_glock_wait - wait on a glock acquisition * @gh: the glock holder * @@ -1326,7 +1343,7 @@ int gfs2_glock_wait(struct gfs2_holder *gh) might_sleep(); wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE); gfs2_glock_update_hold_time(gh->gh_gl, start_time); - return gh->gh_error; + return gfs2_glock_holder_ready(gh); } static int glocks_pending(unsigned int num_gh, struct gfs2_holder *ghs) @@ -1354,7 +1371,6 @@ int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs) struct gfs2_sbd *sdp = ghs[0].gh_gl->gl_name.ln_sbd; int i, ret = 0, timeout = 0; unsigned long start_time = jiffies; - bool keep_waiting; might_sleep(); /* @@ -1364,53 +1380,33 @@ int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs) for (i = 0; i < num_gh; i++) timeout += ghs[i].gh_gl->gl_hold_time << 1; -wait_for_dlm: if (!wait_event_timeout(sdp->sd_async_glock_wait, - !glocks_pending(num_gh, ghs), timeout)) + !glocks_pending(num_gh, ghs), timeout)) { ret = -ESTALE; /* request timed out. */ + goto out; + } - /* - * If dlm granted all our requests, we need to adjust the glock - * minimum hold time values according to how long we waited. - * - * If our request timed out, we need to repeatedly release any held - * glocks we acquired thus far to allow dlm to acquire the remaining - * glocks without deadlocking. We cannot currently cancel outstanding - * glock acquisitions. - * - * The HIF_WAIT bit tells us which requests still need a response from - * dlm. - * - * If dlm sent us any errors, we return the first error we find. - */ - keep_waiting = false; for (i = 0; i < num_gh; i++) { - /* Skip holders we have already dequeued below. */ - if (!gfs2_holder_queued(&ghs[i])) - continue; - /* Skip holders with a pending DLM response. */ - if (test_bit(HIF_WAIT, &ghs[i].gh_iflags)) { - keep_waiting = true; - continue; - } + struct gfs2_holder *gh = &ghs[i]; + int ret2; - if (test_bit(HIF_HOLDER, &ghs[i].gh_iflags)) { - if (ret == -ESTALE) - gfs2_glock_dq(&ghs[i]); - else - gfs2_glock_update_hold_time(ghs[i].gh_gl, - start_time); + if (test_bit(HIF_HOLDER, &gh->gh_iflags)) { + gfs2_glock_update_hold_time(gh->gh_gl, + start_time); } + ret2 = gfs2_glock_holder_ready(gh); if (!ret) - ret = ghs[i].gh_error; + ret = ret2; } - if (keep_waiting) - goto wait_for_dlm; +out: + if (ret) { + for (i = 0; i < num_gh; i++) { + struct gfs2_holder *gh = &ghs[i]; - /* - * At this point, we've either acquired all locks or released them all. - */ + gfs2_glock_dq(gh); + } + } return ret; } @@ -1463,6 +1459,15 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) va_end(args); } +static inline bool pid_is_meaningful(const struct gfs2_holder *gh) +{ + if (!(gh->gh_flags & GL_NOPID)) + return true; + if (gh->gh_state == LM_ST_UNLOCKED) + return true; + return false; +} + /** * add_to_queue - Add a holder to the wait queue (but look for recursion) * @gh: the holder structure to add @@ -1489,20 +1494,27 @@ __acquires(&gl->gl_lockref.lock) if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { if (test_bit(GLF_LOCK, &gl->gl_flags)) { - struct gfs2_holder *first_gh; + struct gfs2_holder *current_gh; - first_gh = find_first_strong_holder(gl); - try_futile = !may_grant(gl, first_gh, gh); + current_gh = find_first_strong_holder(gl); + try_futile = !may_grant(gl, current_gh, gh); } if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) goto fail; } list_for_each_entry(gh2, &gl->gl_holders, gh_list) { - if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid && - (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK) && - !test_bit(HIF_MAY_DEMOTE, &gh2->gh_iflags))) - goto trap_recursive; + if (likely(gh2->gh_owner_pid != gh->gh_owner_pid)) + continue; + if (gh->gh_gl->gl_ops->go_type == LM_TYPE_FLOCK) + continue; + if (test_bit(HIF_MAY_DEMOTE, &gh2->gh_iflags)) + continue; + if (!pid_is_meaningful(gh2)) + continue; + goto trap_recursive; + } + list_for_each_entry(gh2, &gl->gl_holders, gh_list) { if (try_futile && !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { fail: @@ -1568,6 +1580,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh) if (test_bit(GLF_LRU, &gl->gl_flags)) gfs2_glock_remove_from_lru(gl); + gh->gh_error = 0; spin_lock(&gl->gl_lockref.lock); add_to_queue(gh); if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) && @@ -1694,6 +1707,14 @@ void gfs2_glock_dq(struct gfs2_holder *gh) struct gfs2_glock *gl = gh->gh_gl; spin_lock(&gl->gl_lockref.lock); + if (list_is_first(&gh->gh_list, &gl->gl_holders) && + !test_bit(HIF_HOLDER, &gh->gh_iflags)) { + spin_unlock(&gl->gl_lockref.lock); + gl->gl_name.ln_sbd->sd_lockstruct.ls_ops->lm_cancel(gl); + wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE); + spin_lock(&gl->gl_lockref.lock); + } + __gfs2_glock_dq(gh); spin_unlock(&gl->gl_lockref.lock); } @@ -1769,7 +1790,7 @@ static int glock_compare(const void *arg_a, const void *arg_b) } /** - * nq_m_sync - synchonously acquire more than one glock in deadlock free order + * nq_m_sync - synchronously acquire more than one glock in deadlock free order * @num_gh: the number of structures * @ghs: an array of struct gfs2_holder structures * @p: placeholder for the holder structure to pass back @@ -1790,8 +1811,6 @@ static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs, sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare, NULL); for (x = 0; x < num_gh; x++) { - p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC); - error = gfs2_glock_nq(p[x]); if (error) { while (x--) @@ -1808,7 +1827,6 @@ static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs, * @num_gh: the number of structures * @ghs: an array of struct gfs2_holder structures * - * * Returns: 0 on success (all glocks acquired), * errno on failure (no glocks acquired) */ @@ -1823,7 +1841,6 @@ int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs) case 0: return 0; case 1: - ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC); return gfs2_glock_nq(ghs); default: if (num_gh <= 4) @@ -2216,6 +2233,20 @@ static void dump_glock_func(struct gfs2_glock *gl) dump_glock(NULL, gl, true); } +static void withdraw_dq(struct gfs2_glock *gl) +{ + spin_lock(&gl->gl_lockref.lock); + if (!__lockref_is_dead(&gl->gl_lockref) && + glock_blocked_by_withdraw(gl)) + do_error(gl, LM_OUT_ERROR); /* remove pending waiters */ + spin_unlock(&gl->gl_lockref.lock); +} + +void gfs2_gl_dq_holders(struct gfs2_sbd *sdp) +{ + glock_hash_walk(withdraw_dq, sdp); +} + /** * gfs2_gl_hash_clear - Empty out the glock hash table * @sdp: the filesystem @@ -2235,20 +2266,6 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) glock_hash_walk(dump_glock_func, sdp); } -void gfs2_glock_finish_truncate(struct gfs2_inode *ip) -{ - struct gfs2_glock *gl = ip->i_gl; - int ret; - - ret = gfs2_truncatei_resume(ip); - gfs2_glock_assert_withdraw(gl, ret == 0); - - spin_lock(&gl->gl_lockref.lock); - clear_bit(GLF_LOCK, &gl->gl_flags); - run_queue(gl, 1); - spin_unlock(&gl->gl_lockref.lock); -} - static const char *state2str(unsigned state) { switch(state) { @@ -2308,19 +2325,24 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags) static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh, const char *fs_id_buf) { - struct task_struct *gh_owner = NULL; + const char *comm = "(none)"; + pid_t owner_pid = 0; char flags_buf[32]; rcu_read_lock(); - if (gh->gh_owner_pid) + if (pid_is_meaningful(gh)) { + struct task_struct *gh_owner; + + comm = "(ended)"; + owner_pid = pid_nr(gh->gh_owner_pid); gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); + if (gh_owner) + comm = gh_owner->comm; + } gfs2_print_dbg(seq, "%s H: s:%s f:%s e:%d p:%ld [%s] %pS\n", fs_id_buf, state2str(gh->gh_state), hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags), - gh->gh_error, - gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, - gh_owner ? gh_owner->comm : "(ended)", - (void *)gh->gh_ip); + gh->gh_error, (long)owner_pid, comm, (void *)gh->gh_ip); rcu_read_unlock(); } @@ -2523,7 +2545,7 @@ int __init gfs2_glock_init(void) return -ENOMEM; } - ret = register_shrinker(&glock_shrinker); + ret = register_shrinker(&glock_shrinker, "gfs2-glock"); if (ret) { destroy_workqueue(gfs2_delete_workqueue); destroy_workqueue(glock_workqueue); @@ -2735,6 +2757,172 @@ static const struct file_operations gfs2_glstats_fops = { .release = gfs2_glocks_release, }; +struct gfs2_glockfd_iter { + struct super_block *sb; + unsigned int tgid; + struct task_struct *task; + unsigned int fd; + struct file *file; +}; + +static struct task_struct *gfs2_glockfd_next_task(struct gfs2_glockfd_iter *i) +{ + struct pid_namespace *ns = task_active_pid_ns(current); + struct pid *pid; + + if (i->task) + put_task_struct(i->task); + + rcu_read_lock(); +retry: + i->task = NULL; + pid = find_ge_pid(i->tgid, ns); + if (pid) { + i->tgid = pid_nr_ns(pid, ns); + i->task = pid_task(pid, PIDTYPE_TGID); + if (!i->task) { + i->tgid++; + goto retry; + } + get_task_struct(i->task); + } + rcu_read_unlock(); + return i->task; +} + +static struct file *gfs2_glockfd_next_file(struct gfs2_glockfd_iter *i) +{ + if (i->file) { + fput(i->file); + i->file = NULL; + } + + rcu_read_lock(); + for(;; i->fd++) { + struct inode *inode; + + i->file = task_lookup_next_fd_rcu(i->task, &i->fd); + if (!i->file) { + i->fd = 0; + break; + } + inode = file_inode(i->file); + if (inode->i_sb != i->sb) + continue; + if (get_file_rcu(i->file)) + break; + } + rcu_read_unlock(); + return i->file; +} + +static void *gfs2_glockfd_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct gfs2_glockfd_iter *i = seq->private; + + if (*pos) + return NULL; + while (gfs2_glockfd_next_task(i)) { + if (gfs2_glockfd_next_file(i)) + return i; + i->tgid++; + } + return NULL; +} + +static void *gfs2_glockfd_seq_next(struct seq_file *seq, void *iter_ptr, + loff_t *pos) +{ + struct gfs2_glockfd_iter *i = seq->private; + + (*pos)++; + i->fd++; + do { + if (gfs2_glockfd_next_file(i)) + return i; + i->tgid++; + } while (gfs2_glockfd_next_task(i)); + return NULL; +} + +static void gfs2_glockfd_seq_stop(struct seq_file *seq, void *iter_ptr) +{ + struct gfs2_glockfd_iter *i = seq->private; + + if (i->file) + fput(i->file); + if (i->task) + put_task_struct(i->task); +} + +static void gfs2_glockfd_seq_show_flock(struct seq_file *seq, + struct gfs2_glockfd_iter *i) +{ + struct gfs2_file *fp = i->file->private_data; + struct gfs2_holder *fl_gh = &fp->f_fl_gh; + struct lm_lockname gl_name = { .ln_type = LM_TYPE_RESERVED }; + + if (!READ_ONCE(fl_gh->gh_gl)) + return; + + spin_lock(&i->file->f_lock); + if (gfs2_holder_initialized(fl_gh)) + gl_name = fl_gh->gh_gl->gl_name; + spin_unlock(&i->file->f_lock); + + if (gl_name.ln_type != LM_TYPE_RESERVED) { + seq_printf(seq, "%d %u %u/%llx\n", + i->tgid, i->fd, gl_name.ln_type, + (unsigned long long)gl_name.ln_number); + } +} + +static int gfs2_glockfd_seq_show(struct seq_file *seq, void *iter_ptr) +{ + struct gfs2_glockfd_iter *i = seq->private; + struct inode *inode = file_inode(i->file); + struct gfs2_glock *gl; + + inode_lock_shared(inode); + gl = GFS2_I(inode)->i_iopen_gh.gh_gl; + if (gl) { + seq_printf(seq, "%d %u %u/%llx\n", + i->tgid, i->fd, gl->gl_name.ln_type, + (unsigned long long)gl->gl_name.ln_number); + } + gfs2_glockfd_seq_show_flock(seq, i); + inode_unlock_shared(inode); + return 0; +} + +static const struct seq_operations gfs2_glockfd_seq_ops = { + .start = gfs2_glockfd_seq_start, + .next = gfs2_glockfd_seq_next, + .stop = gfs2_glockfd_seq_stop, + .show = gfs2_glockfd_seq_show, +}; + +static int gfs2_glockfd_open(struct inode *inode, struct file *file) +{ + struct gfs2_glockfd_iter *i; + struct gfs2_sbd *sdp = inode->i_private; + + i = __seq_open_private(file, &gfs2_glockfd_seq_ops, + sizeof(struct gfs2_glockfd_iter)); + if (!i) + return -ENOMEM; + i->sb = sdp->sd_vfs; + return 0; +} + +static const struct file_operations gfs2_glockfd_fops = { + .owner = THIS_MODULE, + .open = gfs2_glockfd_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + DEFINE_SEQ_ATTRIBUTE(gfs2_sbstats); void gfs2_create_debugfs_file(struct gfs2_sbd *sdp) @@ -2744,6 +2932,9 @@ void gfs2_create_debugfs_file(struct gfs2_sbd *sdp) debugfs_create_file("glocks", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, &gfs2_glocks_fops); + debugfs_create_file("glockfd", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, + &gfs2_glockfd_fops); + debugfs_create_file("glstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp, &gfs2_glstats_fops); diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 4f8642301801..0d068f4fd7d6 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -91,6 +91,7 @@ enum { #define GL_ASYNC 0x0040 #define GL_EXACT 0x0080 #define GL_SKIP 0x0100 +#define GL_NOPID 0x0200 #define GL_NOCACHE 0x0400 /* @@ -138,6 +139,11 @@ struct lm_lockops { const match_table_t *lm_tokens; }; +struct gfs2_glock_aspace { + struct gfs2_glock glock; + struct address_space mapping; +}; + extern struct workqueue_struct *gfs2_delete_workqueue; static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) { @@ -179,8 +185,11 @@ static inline int gfs2_glock_is_held_shrd(struct gfs2_glock *gl) static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl) { - if (gl->gl_ops->go_flags & GLOF_ASPACE) - return (struct address_space *)(gl + 1); + if (gl->gl_ops->go_flags & GLOF_ASPACE) { + struct gfs2_glock_aspace *gla = + container_of(gl, struct gfs2_glock_aspace, glock); + return &gla->mapping; + } return NULL; } @@ -205,6 +214,7 @@ extern void gfs2_holder_uninit(struct gfs2_holder *gh); extern int gfs2_glock_nq(struct gfs2_holder *gh); extern int gfs2_glock_poll(struct gfs2_holder *gh); extern int gfs2_instantiate(struct gfs2_holder *gh); +extern int gfs2_glock_holder_ready(struct gfs2_holder *gh); extern int gfs2_glock_wait(struct gfs2_holder *gh); extern int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs); extern void gfs2_glock_dq(struct gfs2_holder *gh); @@ -265,7 +275,7 @@ extern void gfs2_cancel_delete_work(struct gfs2_glock *gl); extern bool gfs2_delete_work_queued(const struct gfs2_glock *gl); extern void gfs2_flush_delete_work(struct gfs2_sbd *sdp); extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); -extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip); +extern void gfs2_gl_dq_holders(struct gfs2_sbd *sdp); extern void gfs2_glock_thaw(struct gfs2_sbd *sdp); extern void gfs2_glock_add_to_lru(struct gfs2_glock *gl); extern void gfs2_glock_free(struct gfs2_glock *gl); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 650ad77c4d0b..49210a2e7ce7 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -228,7 +228,6 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags) gfs2_rgrp_brelse(rgd); WARN_ON_ONCE(!(flags & DIO_METADATA)); truncate_inode_pages_range(mapping, start, end); - set_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags); } static void gfs2_rgrp_go_dump(struct seq_file *seq, struct gfs2_glock *gl, @@ -486,35 +485,33 @@ int gfs2_inode_refresh(struct gfs2_inode *ip) * Returns: errno */ -static int inode_go_instantiate(struct gfs2_holder *gh) +static int inode_go_instantiate(struct gfs2_glock *gl) +{ + struct gfs2_inode *ip = gl->gl_object; + + if (!ip) /* no inode to populate - read it in later */ + return 0; + + return gfs2_inode_refresh(ip); +} + +static int inode_go_held(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_inode *ip = gl->gl_object; int error = 0; if (!ip) /* no inode to populate - read it in later */ - goto out; - - error = gfs2_inode_refresh(ip); - if (error) - goto out; + return 0; if (gh->gh_state != LM_ST_DEFERRED) inode_dio_wait(&ip->i_inode); if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) && (gl->gl_state == LM_ST_EXCLUSIVE) && - (gh->gh_state == LM_ST_EXCLUSIVE)) { - spin_lock(&sdp->sd_trunc_lock); - if (list_empty(&ip->i_trunc_list)) - list_add(&ip->i_trunc_list, &sdp->sd_trunc_list); - spin_unlock(&sdp->sd_trunc_lock); - wake_up(&sdp->sd_quota_wait); - error = 1; - } + (gh->gh_state == LM_ST_EXCLUSIVE)) + error = gfs2_truncatei_resume(ip); -out: return error; } @@ -738,6 +735,7 @@ const struct gfs2_glock_operations gfs2_inode_glops = { .go_inval = inode_go_inval, .go_demote_ok = inode_go_demote_ok, .go_instantiate = inode_go_instantiate, + .go_held = inode_go_held, .go_dump = inode_go_dump, .go_type = LM_TYPE_INODE, .go_flags = GLOF_ASPACE | GLOF_LRU | GLOF_LVB, @@ -764,6 +762,7 @@ const struct gfs2_glock_operations gfs2_freeze_glops = { const struct gfs2_glock_operations gfs2_iopen_glops = { .go_type = LM_TYPE_IOPEN, .go_callback = iopen_go_callback, + .go_dump = inode_go_dump, .go_demote_ok = iopen_go_demote_ok, .go_flags = GLOF_LRU | GLOF_NONDISK, .go_subclass = 1, diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 8c00fb389ae5..d09d9892cd05 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -219,7 +219,8 @@ struct gfs2_glock_operations { int (*go_xmote_bh)(struct gfs2_glock *gl); void (*go_inval) (struct gfs2_glock *gl, int flags); int (*go_demote_ok) (const struct gfs2_glock *gl); - int (*go_instantiate) (struct gfs2_holder *gh); + int (*go_instantiate) (struct gfs2_glock *gl); + int (*go_held)(struct gfs2_holder *gh); void (*go_dump)(struct seq_file *seq, struct gfs2_glock *gl, const char *fs_id_buf); void (*go_callback)(struct gfs2_glock *gl, bool remote); @@ -396,7 +397,6 @@ struct gfs2_inode { atomic_t i_sizehint; /* hint of the write size */ struct rw_semaphore i_rw_mutex; struct list_head i_ordered; - struct list_head i_trunc_list; __be64 *i_hash_cache; u32 i_entries; u32 i_diskflags; @@ -784,8 +784,6 @@ struct gfs2_sbd { struct mutex sd_quota_mutex; struct mutex sd_quota_sync_mutex; wait_queue_head_t sd_quota_wait; - struct list_head sd_trunc_list; - spinlock_t sd_trunc_lock; unsigned int sd_quota_slots; unsigned long *sd_quota_bitmap; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 89905f4f29bb..04a201584fa7 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -130,8 +130,26 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, if (inode->i_state & I_NEW) { struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_glock *io_gl; + int extra_flags = 0; - error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); + error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, + &ip->i_gl); + if (unlikely(error)) + goto fail; + + error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, + &io_gl); + if (unlikely(error)) + goto fail; + + if (blktype == GFS2_BLKST_UNLINKED) + extra_flags |= LM_FLAG_TRY; + else + gfs2_cancel_delete_work(io_gl); + error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, + GL_EXACT | GL_NOPID | extra_flags, + &ip->i_iopen_gh); + gfs2_glock_put(io_gl); if (unlikely(error)) goto fail; @@ -161,16 +179,6 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, set_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags); - error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); - if (unlikely(error)) - goto fail; - if (blktype != GFS2_BLKST_UNLINKED) - gfs2_cancel_delete_work(io_gl); - error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); - gfs2_glock_put(io_gl); - if (unlikely(error)) - goto fail; - /* Lowest possible timestamp; will be overwritten in gfs2_dinode_in. */ inode->i_atime.tv_sec = 1LL << (8 * sizeof(inode->i_atime.tv_sec) - 1); inode->i_atime.tv_nsec = 0; @@ -206,6 +214,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, return inode; fail: + if (error == GLR_TRYFAILED) + error = -EAGAIN; if (gfs2_holder_initialized(&ip->i_iopen_gh)) gfs2_glock_dq_uninit(&ip->i_iopen_gh); if (gfs2_holder_initialized(&i_gh)) @@ -716,13 +726,18 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = insert_inode_locked4(inode, ip->i_no_addr, iget_test, &ip->i_no_addr); BUG_ON(error); - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); + error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT | GL_NOPID, + &ip->i_iopen_gh); if (error) goto fail_gunlock2; + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); + if (error) + goto fail_gunlock3; + error = gfs2_trans_begin(sdp, blocks, 0); if (error) - goto fail_gunlock2; + goto fail_gunlock3; if (blocks > 1) { ip->i_eattr = ip->i_no_addr + 1; @@ -731,10 +746,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, init_dinode(dip, ip, symname); gfs2_trans_end(sdp); - error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); - if (error) - goto fail_gunlock2; - glock_set_object(ip->i_gl, ip); glock_set_object(io_gl, ip); gfs2_set_iop(inode); @@ -745,14 +756,14 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (default_acl) { error = __gfs2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); if (error) - goto fail_gunlock3; + goto fail_gunlock4; posix_acl_release(default_acl); default_acl = NULL; } if (acl) { error = __gfs2_set_acl(inode, acl, ACL_TYPE_ACCESS); if (error) - goto fail_gunlock3; + goto fail_gunlock4; posix_acl_release(acl); acl = NULL; } @@ -760,11 +771,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = security_inode_init_security(&ip->i_inode, &dip->i_inode, name, &gfs2_initxattrs, NULL); if (error) - goto fail_gunlock3; + goto fail_gunlock4; error = link_dinode(dip, name, ip, &da); if (error) - goto fail_gunlock3; + goto fail_gunlock4; mark_inode_dirty(inode); d_instantiate(dentry, inode); @@ -782,9 +793,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, unlock_new_inode(inode); return error; -fail_gunlock3: +fail_gunlock4: glock_clear_object(ip->i_gl, ip); glock_clear_object(io_gl, ip); +fail_gunlock3: gfs2_glock_dq_uninit(&ip->i_iopen_gh); fail_gunlock2: gfs2_glock_put(io_gl); @@ -793,7 +805,7 @@ fail_free_inode: if (free_vfs_inode) /* else evict will do the put for us */ gfs2_glock_put(ip->i_gl); } - gfs2_rs_delete(ip, NULL); + gfs2_rs_deltree(&ip->i_res); gfs2_qa_put(ip); fail_free_acls: posix_acl_release(default_acl); diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 7b2c1f390db7..0264d514dda7 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -12,7 +12,7 @@ #include <linux/mm.h> #include "util.h" -extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask); +bool gfs2_release_folio(struct folio *folio, gfp_t gfp_mask); extern int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos, unsigned size); extern void gfs2_set_aops(struct inode *inode); diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 50578f881e6d..71911bf9ab34 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -261,6 +261,7 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, int req; u32 lkf; char strname[GDLM_STRNAME_BYTES] = ""; + int error; req = make_mode(gl->gl_name.ln_sbd, req_state); lkf = make_flags(gl, flags, req); @@ -279,8 +280,14 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, * Submit the actual lock request. */ - return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname, +again: + error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname, GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); + if (error == -EBUSY) { + msleep(20); + goto again; + } + return error; } static void gdlm_put_lock(struct gfs2_glock *gl) @@ -312,8 +319,14 @@ static void gdlm_put_lock(struct gfs2_glock *gl) return; } +again: error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, NULL, gl); + if (error == -EBUSY) { + msleep(20); + goto again; + } + if (error) { fs_err(sdp, "gdlm_unlock %x,%llx err=%d\n", gl->gl_name.ln_type, @@ -1045,7 +1058,7 @@ restart: /* * Expand static jid arrays if necessary (by increments of RECOVER_SIZE_INC) - * to accomodate the largest slot number. (NB dlm slot numbers start at 1, + * to accommodate the largest slot number. (NB dlm slot numbers start at 1, * gfs2 jids start at 0, so jid = slot - 1) */ @@ -1289,7 +1302,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table) memcpy(cluster, table, strlen(table) - strlen(fsname)); fsname++; - flags = DLM_LSFL_FS | DLM_LSFL_NEWEXCL; + flags = DLM_LSFL_NEWEXCL; /* * create/join lockspace diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index f0ee3ff6f9a8..723639376ae2 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -131,7 +131,7 @@ __acquires(&sdp->sd_ail_lock) if (!mapping) continue; spin_unlock(&sdp->sd_ail_lock); - ret = generic_writepages(mapping, wbc); + ret = filemap_fdatawrite_wbc(mapping, wbc); if (need_resched()) { blk_finish_plug(plug); cond_resched(); @@ -222,8 +222,7 @@ out: spin_unlock(&sdp->sd_ail_lock); blk_finish_plug(&plug); if (ret) { - gfs2_lm(sdp, "gfs2_ail1_start_one (generic_writepages) " - "returned: %d\n", ret); + gfs2_lm(sdp, "gfs2_ail1_start_one returned: %d\n", ret); gfs2_withdraw(sdp); } trace_gfs2_ail_flush(sdp, wbc, 0); @@ -823,7 +822,7 @@ void gfs2_flush_revokes(struct gfs2_sbd *sdp) void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, u64 seq, u32 tail, u32 lblock, u32 flags, - int op_flags) + blk_opf_t op_flags) { struct gfs2_log_header *lh; u32 hash, crc; @@ -905,7 +904,7 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, static void log_write_header(struct gfs2_sbd *sdp, u32 flags) { - int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC; + blk_opf_t op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC; enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); gfs2_assert_withdraw(sdp, (state != SFS_FROZEN)); diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index fc905c2af53c..653cffcbf869 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -82,7 +82,7 @@ extern void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr, unsigned int *extra_revokes); extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, u64 seq, u32 tail, u32 lblock, u32 flags, - int op_flags); + blk_opf_t op_flags); extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 type); extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index ca0bb3a73912..1902413d5d12 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -238,7 +238,7 @@ static void gfs2_end_log_write(struct bio *bio) * there is no pending bio, then this is a no-op. */ -void gfs2_log_submit_bio(struct bio **biop, int opf) +void gfs2_log_submit_bio(struct bio **biop, blk_opf_t opf) { struct bio *bio = *biop; if (bio) { @@ -265,10 +265,9 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno, bio_end_io_t *end_io) { struct super_block *sb = sdp->sd_vfs; - struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS); + struct bio *bio = bio_alloc(sb->s_bdev, BIO_MAX_VECS, 0, GFP_NOIO); bio->bi_iter.bi_sector = blkno << sdp->sd_fsb2bb_shift; - bio_set_dev(bio, sb->s_bdev); bio->bi_end_io = end_io; bio->bi_private = sdp; @@ -293,7 +292,7 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno, */ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno, - struct bio **biop, int op, + struct bio **biop, enum req_op op, bio_end_io_t *end_io, bool flush) { struct bio *bio = *biop; @@ -453,47 +452,45 @@ static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd, * @head: The journal head to start from * @done: If set, perform only cleanup, else search and set if found. * - * Find the page with 'index' in the journal's mapping. Search the page for + * Find the folio with 'index' in the journal's mapping. Search the folio for * the journal head if requested (cleanup == false). Release refs on the - * page so the page cache can reclaim it (put_page() twice). We grabbed a - * reference on this page two times, first when we did a find_or_create_page() - * to obtain the page to add it to the bio and second when we do a - * find_get_page() here to get the page to wait on while I/O on it is being + * folio so the page cache can reclaim it. We grabbed a + * reference on this folio twice, first when we did a find_or_create_page() + * to obtain the folio to add it to the bio and second when we do a + * filemap_get_folio() here to get the folio to wait on while I/O on it is being * completed. - * This function is also used to free up a page we might've grabbed but not + * This function is also used to free up a folio we might've grabbed but not * used. Maybe we added it to a bio, but not submitted it for I/O. Or we * submitted the I/O, but we already found the jhead so we only need to drop - * our references to the page. + * our references to the folio. */ static void gfs2_jhead_process_page(struct gfs2_jdesc *jd, unsigned long index, struct gfs2_log_header_host *head, bool *done) { - struct page *page; + struct folio *folio; - page = find_get_page(jd->jd_inode->i_mapping, index); - wait_on_page_locked(page); + folio = filemap_get_folio(jd->jd_inode->i_mapping, index); - if (PageError(page)) + folio_wait_locked(folio); + if (folio_test_error(folio)) *done = true; if (!*done) - *done = gfs2_jhead_pg_srch(jd, head, page); + *done = gfs2_jhead_pg_srch(jd, head, &folio->page); - put_page(page); /* Once for find_get_page */ - put_page(page); /* Once more for find_or_create_page */ + /* filemap_get_folio() and the earlier find_or_create_page() */ + folio_put_refs(folio, 2); } static struct bio *gfs2_chain_bio(struct bio *prev, unsigned int nr_iovecs) { struct bio *new; - new = bio_alloc(GFP_NOIO, nr_iovecs); - bio_copy_dev(new, prev); + new = bio_alloc(prev->bi_bdev, nr_iovecs, prev->bi_opf, GFP_NOIO); + bio_clone_blkg_association(new, prev); new->bi_iter.bi_sector = bio_end_sector(prev); - new->bi_opf = prev->bi_opf; - new->bi_write_hint = prev->bi_write_hint; bio_chain(new, prev); submit_bio(prev); return new; diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h index f707601597dc..1412ffba1d44 100644 --- a/fs/gfs2/lops.h +++ b/fs/gfs2/lops.h @@ -16,7 +16,7 @@ extern u64 gfs2_log_bmap(struct gfs2_jdesc *jd, unsigned int lbn); extern void gfs2_log_write(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, struct page *page, unsigned size, unsigned offset, u64 blkno); -extern void gfs2_log_submit_bio(struct bio **biop, int opf); +extern void gfs2_log_submit_bio(struct bio **biop, blk_opf_t opf); extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh); extern int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, bool keep_cache); diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 28d0eb23e18e..afcb32854f14 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -38,7 +38,6 @@ static void gfs2_init_inode_once(void *foo) inode_init_once(&ip->i_inode); atomic_set(&ip->i_sizehint, 0); init_rwsem(&ip->i_rw_mutex); - INIT_LIST_HEAD(&ip->i_trunc_list); INIT_LIST_HEAD(&ip->i_ordered); ip->i_qadata = NULL; gfs2_holder_mark_uninitialized(&ip->i_rgd_gh); @@ -62,11 +61,10 @@ static void gfs2_init_glock_once(void *foo) static void gfs2_init_gl_aspace_once(void *foo) { - struct gfs2_glock *gl = foo; - struct address_space *mapping = (struct address_space *)(gl + 1); + struct gfs2_glock_aspace *gla = foo; - gfs2_init_glock_once(gl); - address_space_init_once(mapping); + gfs2_init_glock_once(&gla->glock); + address_space_init_once(&gla->mapping); } /** @@ -104,8 +102,7 @@ static int __init init_gfs2_fs(void) goto fail_cachep1; gfs2_glock_aspace_cachep = kmem_cache_create("gfs2_glock(aspace)", - sizeof(struct gfs2_glock) + - sizeof(struct address_space), + sizeof(struct gfs2_glock_aspace), 0, 0, gfs2_init_gl_aspace_once); if (!gfs2_glock_aspace_cachep) @@ -150,18 +147,10 @@ static int __init init_gfs2_fs(void) if (!gfs2_trans_cachep) goto fail_cachep8; - error = register_shrinker(&gfs2_qd_shrinker); + error = register_shrinker(&gfs2_qd_shrinker, "gfs2-qd"); if (error) goto fail_shrinker; - error = register_filesystem(&gfs2_fs_type); - if (error) - goto fail_fs1; - - error = register_filesystem(&gfs2meta_fs_type); - if (error) - goto fail_fs2; - error = -ENOMEM; gfs_recovery_wq = alloc_workqueue("gfs_recovery", WQ_MEM_RECLAIM | WQ_FREEZABLE, 0); @@ -183,11 +172,23 @@ static int __init init_gfs2_fs(void) goto fail_mempool; gfs2_register_debugfs(); + error = register_filesystem(&gfs2_fs_type); + if (error) + goto fail_fs1; + + error = register_filesystem(&gfs2meta_fs_type); + if (error) + goto fail_fs2; + pr_info("GFS2 installed\n"); return 0; +fail_fs2: + unregister_filesystem(&gfs2_fs_type); +fail_fs1: + mempool_destroy(gfs2_page_pool); fail_mempool: destroy_workqueue(gfs2_freeze_wq); fail_wq3: @@ -195,10 +196,6 @@ fail_wq3: fail_wq2: destroy_workqueue(gfs_recovery_wq); fail_wq1: - unregister_filesystem(&gfs2meta_fs_type); -fail_fs2: - unregister_filesystem(&gfs2_fs_type); -fail_fs1: unregister_shrinker(&gfs2_qd_shrinker); fail_shrinker: kmem_cache_destroy(gfs2_trans_cachep); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 72d30a682ece..6ed728aae9a5 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -34,7 +34,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb { struct buffer_head *bh, *head; int nr_underway = 0; - int write_flags = REQ_META | REQ_PRIO | wbc_to_write_flags(wbc); + blk_opf_t write_flags = REQ_META | REQ_PRIO | wbc_to_write_flags(wbc); BUG_ON(!PageLocked(page)); BUG_ON(!page_has_buffers(page)); @@ -75,7 +75,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { - submit_bh(REQ_OP_WRITE, write_flags, bh); + submit_bh(REQ_OP_WRITE | write_flags, bh); nr_underway++; } bh = next; @@ -89,15 +89,17 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb } const struct address_space_operations gfs2_meta_aops = { - .set_page_dirty = __set_page_dirty_buffers, + .dirty_folio = block_dirty_folio, + .invalidate_folio = block_invalidate_folio, .writepage = gfs2_aspace_writepage, - .releasepage = gfs2_releasepage, + .release_folio = gfs2_release_folio, }; const struct address_space_operations gfs2_rgrp_aops = { - .set_page_dirty = __set_page_dirty_buffers, + .dirty_folio = block_dirty_folio, + .invalidate_folio = block_invalidate_folio, .writepage = gfs2_aspace_writepage, - .releasepage = gfs2_releasepage, + .release_folio = gfs2_release_folio, }; /** @@ -215,16 +217,14 @@ static void gfs2_meta_read_endio(struct bio *bio) * Submit several consecutive buffer head I/O requests as a single bio I/O * request. (See submit_bh_wbc.) */ -static void gfs2_submit_bhs(int op, int op_flags, struct buffer_head *bhs[], - int num) +static void gfs2_submit_bhs(blk_opf_t opf, struct buffer_head *bhs[], int num) { while (num > 0) { struct buffer_head *bh = *bhs; struct bio *bio; - bio = bio_alloc(GFP_NOIO, num); + bio = bio_alloc(bh->b_bdev, num, opf, GFP_NOIO); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); - bio_set_dev(bio, bh->b_bdev); while (num > 0) { bh = *bhs; if (!bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh))) { @@ -235,7 +235,6 @@ static void gfs2_submit_bhs(int op, int op_flags, struct buffer_head *bhs[], num--; } bio->bi_end_io = gfs2_meta_read_endio; - bio_set_op_attrs(bio, op, op_flags); submit_bio(bio); } } @@ -288,7 +287,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, } } - gfs2_submit_bhs(REQ_OP_READ, REQ_META | REQ_PRIO, bhs, num); + gfs2_submit_bhs(REQ_OP_READ | REQ_META | REQ_PRIO, bhs, num); if (!(flags & DIO_WAIT)) return 0; @@ -526,8 +525,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) if (buffer_uptodate(first_bh)) goto out; - if (!buffer_locked(first_bh)) - ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &first_bh); + bh_read_nowait(first_bh, REQ_META | REQ_PRIO); dblock++; extlen--; @@ -535,10 +533,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) while (extlen) { bh = gfs2_getbuf(gl, dblock, CREATE); - if (!buffer_uptodate(bh) && !buffer_locked(bh)) - ll_rw_block(REQ_OP_READ, - REQ_RAHEAD | REQ_META | REQ_PRIO, - 1, &bh); + bh_readahead(bh, REQ_RAHEAD | REQ_META | REQ_PRIO); brelse(bh); dblock++; extlen--; diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index 21880d72081a..d0a58cdd433a 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h @@ -40,9 +40,11 @@ extern const struct address_space_operations gfs2_rgrp_aops; static inline struct gfs2_sbd *gfs2_mapping2sbd(struct address_space *mapping) { struct inode *inode = mapping->host; - if (mapping->a_ops == &gfs2_meta_aops) - return (((struct gfs2_glock *)mapping) - 1)->gl_name.ln_sbd; - else if (mapping->a_ops == &gfs2_rgrp_aops) + if (mapping->a_ops == &gfs2_meta_aops) { + struct gfs2_glock_aspace *gla = + container_of(mapping, struct gfs2_glock_aspace, mapping); + return gla->glock.gl_name.ln_sbd; + } else if (mapping->a_ops == &gfs2_rgrp_aops) return container_of(mapping, struct gfs2_sbd, sd_aspace); else return inode->i_sb->s_fs_info; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 7f8410d8fdc1..c0cf1d2d0ef5 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -106,8 +106,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) mutex_init(&sdp->sd_quota_mutex); mutex_init(&sdp->sd_quota_sync_mutex); init_waitqueue_head(&sdp->sd_quota_wait); - INIT_LIST_HEAD(&sdp->sd_trunc_list); - spin_lock_init(&sdp->sd_trunc_lock); spin_lock_init(&sdp->sd_bitmap_lock); INIT_LIST_HEAD(&sdp->sd_sc_inodes_list); @@ -180,7 +178,10 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent) pr_warn("Invalid block size\n"); return -EINVAL; } - + if (sb->sb_bsize_shift != ffs(sb->sb_bsize) - 1) { + pr_warn("Invalid block size shift\n"); + return -EINVAL; + } return 0; } @@ -251,14 +252,12 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent) ClearPageDirty(page); lock_page(page); - bio = bio_alloc(GFP_NOFS, 1); + bio = bio_alloc(sb->s_bdev, 1, REQ_OP_READ | REQ_META, GFP_NOFS); bio->bi_iter.bi_sector = sector * (sb->s_blocksize >> 9); - bio_set_dev(bio, sb->s_bdev); bio_add_page(bio, page, PAGE_SIZE, 0); bio->bi_end_io = end_bio_io_page; bio->bi_private = page; - bio_set_op_attrs(bio, REQ_OP_READ, REQ_META); submit_bio(bio); wait_on_page_locked(page); bio_put(bio); @@ -385,8 +384,10 @@ static int init_names(struct gfs2_sbd *sdp, int silent) if (!table[0]) table = sdp->sd_vfs->s_id; - strlcpy(sdp->sd_proto_name, proto, GFS2_FSNAME_LEN); - strlcpy(sdp->sd_table_name, table, GFS2_FSNAME_LEN); + BUILD_BUG_ON(GFS2_LOCKNAME_LEN > GFS2_FSNAME_LEN); + + strscpy(sdp->sd_proto_name, proto, GFS2_LOCKNAME_LEN); + strscpy(sdp->sd_table_name, table, GFS2_LOCKNAME_LEN); table = sdp->sd_table_name; while ((table = strchr(table, '/'))) @@ -405,7 +406,8 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh, error = gfs2_glock_nq_num(sdp, GFS2_MOUNT_LOCK, &gfs2_nondisk_glops, - LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | GL_NOCACHE, + LM_ST_EXCLUSIVE, + LM_FLAG_NOEXP | GL_NOCACHE | GL_NOPID, mount_gh); if (error) { fs_err(sdp, "can't acquire mount glock: %d\n", error); @@ -415,7 +417,7 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh, error = gfs2_glock_nq_num(sdp, GFS2_LIVE_LOCK, &gfs2_nondisk_glops, LM_ST_SHARED, - LM_FLAG_NOEXP | GL_EXACT, + LM_FLAG_NOEXP | GL_EXACT | GL_NOPID, &sdp->sd_live_gh); if (error) { fs_err(sdp, "can't acquire live glock: %d\n", error); @@ -691,7 +693,7 @@ static int init_statfs(struct gfs2_sbd *sdp) iput(pn); pn = NULL; ip = GFS2_I(sdp->sd_sc_inode); - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_NOPID, &sdp->sd_sc_gh); if (error) { fs_err(sdp, "can't lock local \"sc\" file: %d\n", error); @@ -780,7 +782,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) error = gfs2_glock_nq_num(sdp, sdp->sd_lockstruct.ls_jid, &gfs2_journal_glops, LM_ST_EXCLUSIVE, - LM_FLAG_NOEXP | GL_NOCACHE, + LM_FLAG_NOEXP | GL_NOCACHE | GL_NOPID, &sdp->sd_journal_gh); if (error) { fs_err(sdp, "can't acquire journal glock: %d\n", error); @@ -790,7 +792,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) ip = GFS2_I(sdp->sd_jdesc->jd_inode); sdp->sd_jinode_gl = ip->i_gl; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, - LM_FLAG_NOEXP | GL_EXACT | GL_NOCACHE, + LM_FLAG_NOEXP | GL_EXACT | + GL_NOCACHE | GL_NOPID, &sdp->sd_jinode_gh); if (error) { fs_err(sdp, "can't acquire journal inode glock: %d\n", @@ -961,7 +964,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo) pn = NULL; ip = GFS2_I(sdp->sd_qc_inode); - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_NOPID, &sdp->sd_qc_gh); if (error) { fs_err(sdp, "can't lock local \"qc\" file: %d\n", error); @@ -1443,13 +1446,13 @@ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param) switch (o) { case Opt_lockproto: - strlcpy(args->ar_lockproto, param->string, GFS2_LOCKNAME_LEN); + strscpy(args->ar_lockproto, param->string, GFS2_LOCKNAME_LEN); break; case Opt_locktable: - strlcpy(args->ar_locktable, param->string, GFS2_LOCKNAME_LEN); + strscpy(args->ar_locktable, param->string, GFS2_LOCKNAME_LEN); break; case Opt_hostdata: - strlcpy(args->ar_hostdata, param->string, GFS2_LOCKNAME_LEN); + strscpy(args->ar_hostdata, param->string, GFS2_LOCKNAME_LEN); break; case Opt_spectator: args->ar_spectator = 1; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index be0997e24d60..1ed17226d9ed 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -365,11 +365,12 @@ static void slot_put(struct gfs2_quota_data *qd) static int bh_get(struct gfs2_quota_data *qd) { struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd; - struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); + struct inode *inode = sdp->sd_qc_inode; + struct gfs2_inode *ip = GFS2_I(inode); unsigned int block, offset; struct buffer_head *bh; + struct iomap iomap = { }; int error; - struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 }; mutex_lock(&sdp->sd_quota_mutex); @@ -381,11 +382,17 @@ static int bh_get(struct gfs2_quota_data *qd) block = qd->qd_slot / sdp->sd_qc_per_block; offset = qd->qd_slot % sdp->sd_qc_per_block; - bh_map.b_size = BIT(ip->i_inode.i_blkbits); - error = gfs2_block_map(&ip->i_inode, block, &bh_map, 0); + error = gfs2_iomap_get(inode, + (loff_t)block << inode->i_blkbits, + i_blocksize(inode), &iomap); if (error) goto fail; - error = gfs2_meta_read(ip->i_gl, bh_map.b_blocknr, DIO_WAIT, 0, &bh); + error = -ENOENT; + if (iomap.type != IOMAP_MAPPED) + goto fail; + + error = gfs2_meta_read(ip->i_gl, iomap.addr >> inode->i_blkbits, + DIO_WAIT, 0, &bh); if (error) goto fail; error = -EIO; @@ -443,9 +450,8 @@ static int qd_check_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd, static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) { - struct gfs2_quota_data *qd = NULL; + struct gfs2_quota_data *qd = NULL, *iter; int error; - int found = 0; *qdp = NULL; @@ -454,15 +460,13 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) spin_lock(&qd_lock); - list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { - found = qd_check_sync(sdp, qd, &sdp->sd_quota_sync_gen); - if (found) + list_for_each_entry(iter, &sdp->sd_quota_list, qd_list) { + if (qd_check_sync(sdp, iter, &sdp->sd_quota_sync_gen)) { + qd = iter; break; + } } - if (!found) - qd = NULL; - spin_unlock(&qd_lock); if (qd) { @@ -531,34 +535,42 @@ static void qdsb_put(struct gfs2_quota_data *qd) */ int gfs2_qa_get(struct gfs2_inode *ip) { - int error = 0; struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct inode *inode = &ip->i_inode; if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) return 0; - down_write(&ip->i_rw_mutex); + spin_lock(&inode->i_lock); if (ip->i_qadata == NULL) { - ip->i_qadata = kmem_cache_zalloc(gfs2_qadata_cachep, GFP_NOFS); - if (!ip->i_qadata) { - error = -ENOMEM; - goto out; - } + struct gfs2_qadata *tmp; + + spin_unlock(&inode->i_lock); + tmp = kmem_cache_zalloc(gfs2_qadata_cachep, GFP_NOFS); + if (!tmp) + return -ENOMEM; + + spin_lock(&inode->i_lock); + if (ip->i_qadata == NULL) + ip->i_qadata = tmp; + else + kmem_cache_free(gfs2_qadata_cachep, tmp); } ip->i_qadata->qa_ref++; -out: - up_write(&ip->i_rw_mutex); - return error; + spin_unlock(&inode->i_lock); + return 0; } void gfs2_qa_put(struct gfs2_inode *ip) { - down_write(&ip->i_rw_mutex); + struct inode *inode = &ip->i_inode; + + spin_lock(&inode->i_lock); if (ip->i_qadata && --ip->i_qadata->qa_ref == 0) { kmem_cache_free(gfs2_qadata_cachep, ip->i_qadata); ip->i_qadata = NULL; } - up_write(&ip->i_rw_mutex); + spin_unlock(&inode->i_lock); } int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) @@ -733,12 +745,8 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index, } if (PageUptodate(page)) set_buffer_uptodate(bh); - if (!buffer_uptodate(bh)) { - ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &bh); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) - goto unlock_out; - } + if (bh_read(bh, REQ_META | REQ_PRIO) < 0) + goto unlock_out; if (gfs2_is_jdata(ip)) gfs2_trans_add_data(ip->i_gl, bh); else @@ -1505,25 +1513,6 @@ static void quotad_check_timeo(struct gfs2_sbd *sdp, const char *msg, } } -static void quotad_check_trunc_list(struct gfs2_sbd *sdp) -{ - struct gfs2_inode *ip; - - while(1) { - ip = NULL; - spin_lock(&sdp->sd_trunc_lock); - if (!list_empty(&sdp->sd_trunc_list)) { - ip = list_first_entry(&sdp->sd_trunc_list, - struct gfs2_inode, i_trunc_list); - list_del_init(&ip->i_trunc_list); - } - spin_unlock(&sdp->sd_trunc_lock); - if (ip == NULL) - return; - gfs2_glock_finish_truncate(ip); - } -} - void gfs2_wake_up_statfs(struct gfs2_sbd *sdp) { if (!sdp->sd_statfs_force_sync) { sdp->sd_statfs_force_sync = 1; @@ -1546,7 +1535,6 @@ int gfs2_quotad(void *data) unsigned long quotad_timeo = 0; unsigned long t = 0; DEFINE_WAIT(wait); - int empty; while (!kthread_should_stop()) { @@ -1567,19 +1555,13 @@ int gfs2_quotad(void *data) quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t, "ad_timeo, &tune->gt_quota_quantum); - /* Check for & recover partially truncated inodes */ - quotad_check_trunc_list(sdp); - try_to_freeze(); bypass: t = min(quotad_timeo, statfs_timeo); prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE); - spin_lock(&sdp->sd_trunc_lock); - empty = list_empty(&sdp->sd_trunc_list); - spin_unlock(&sdp->sd_trunc_lock); - if (empty && !sdp->sd_statfs_force_sync) + if (!sdp->sd_statfs_force_sync) t -= schedule_timeout(t); else t = 0; diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 016ed1b2ca1d..2bb085a72e8e 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -55,17 +55,16 @@ int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where) { struct list_head *head = &jd->jd_revoke_list; - struct gfs2_revoke_replay *rr; - int found = 0; + struct gfs2_revoke_replay *rr = NULL, *iter; - list_for_each_entry(rr, head, rr_list) { - if (rr->rr_blkno == blkno) { - found = 1; + list_for_each_entry(iter, head, rr_list) { + if (iter->rr_blkno == blkno) { + rr = iter; break; } } - if (found) { + if (rr) { rr->rr_where = where; return 0; } @@ -83,18 +82,17 @@ int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where) int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where) { - struct gfs2_revoke_replay *rr; + struct gfs2_revoke_replay *rr = NULL, *iter; int wrap, a, b, revoke; - int found = 0; - list_for_each_entry(rr, &jd->jd_revoke_list, rr_list) { - if (rr->rr_blkno == blkno) { - found = 1; + list_for_each_entry(iter, &jd->jd_revoke_list, rr_list) { + if (iter->rr_blkno == blkno) { + rr = iter; break; } } - if (!found) + if (!rr) return 0; wrap = (rr->rr_where < jd->jd_replay_tail); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 0fb3c01bc557..f602fb844951 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -680,13 +680,14 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs) /** * gfs2_rs_delete - delete a multi-block reservation * @ip: The inode for this reservation - * @wcount: The inode's write count, or NULL * */ -void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount) +void gfs2_rs_delete(struct gfs2_inode *ip) { + struct inode *inode = &ip->i_inode; + down_write(&ip->i_rw_mutex); - if ((wcount == NULL) || (atomic_read(wcount) <= 1)) + if (atomic_read(&inode->i_writecount) <= 1) gfs2_rs_deltree(&ip->i_res); up_write(&ip->i_rw_mutex); } @@ -922,15 +923,15 @@ static int read_rindex_entry(struct gfs2_inode *ip) spin_lock_init(&rgd->rd_rsspin); mutex_init(&rgd->rd_mutex); - error = compute_bitstructs(rgd); - if (error) - goto fail; - error = gfs2_glock_get(sdp, rgd->rd_addr, &gfs2_rgrp_glops, CREATE, &rgd->rd_gl); if (error) goto fail; + error = compute_bitstructs(rgd); + if (error) + goto fail_glock; + rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; rgd->rd_flags &= ~GFS2_RDF_PREFERRED; if (rgd->rd_data > sdp->sd_max_rg_data) @@ -944,6 +945,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) } error = 0; /* someone else read in the rgrp; free it and ignore it */ +fail_glock: gfs2_glock_put(rgd->rd_gl); fail: @@ -1194,9 +1196,8 @@ static void rgrp_set_bitmap_flags(struct gfs2_rgrpd *rgd) * Returns: errno */ -int gfs2_rgrp_go_instantiate(struct gfs2_holder *gh) +int gfs2_rgrp_go_instantiate(struct gfs2_glock *gl) { - struct gfs2_glock *gl = gh->gh_gl; struct gfs2_rgrpd *rgd = gl->gl_object; struct gfs2_sbd *sdp = rgd->rd_sbd; unsigned int length = rgd->rd_length; @@ -1313,7 +1314,7 @@ int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, u64 blk; sector_t start = 0; sector_t nr_blks = 0; - int rv; + int rv = -EIO; unsigned int x; u32 trimmed = 0; u8 diff; @@ -1369,7 +1370,7 @@ fail: if (sdp->sd_args.ar_discard) fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem\n", rv); sdp->sd_args.ar_discard = 0; - return -EIO; + return rv; } /** @@ -1384,7 +1385,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp) { struct inode *inode = file_inode(filp); struct gfs2_sbd *sdp = GFS2_SB(inode); - struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev); + struct block_device *bdev = sdp->sd_vfs->s_bdev; struct buffer_head *bh; struct gfs2_rgrpd *rgd; struct gfs2_rgrpd *rgd_end; @@ -1403,7 +1404,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp) if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) return -EROFS; - if (!blk_queue_discard(q)) + if (!bdev_max_discard_sectors(bdev)) return -EOPNOTSUPP; if (copy_from_user(&r, argp, sizeof(r))) @@ -1415,8 +1416,8 @@ int gfs2_fitrim(struct file *filp, void __user *argp) start = r.start >> bs_shift; end = start + (r.len >> bs_shift); - minlen = max_t(u64, r.minlen, - q->limits.discard_granularity) >> bs_shift; + minlen = max_t(u64, r.minlen, sdp->sd_sb.sb_bsize); + minlen = max_t(u64, minlen, bdev_discard_granularity(bdev)) >> bs_shift; if (end <= start || minlen > sdp->sd_max_rg_data) return -EINVAL; @@ -2718,12 +2719,15 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate * and initialize an array of glock holders for them * @rlist: the list of resource groups + * @state: the state we're requesting + * @flags: the modifier flags * * FIXME: Don't use NOFAIL * */ -void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist) +void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, + unsigned int state, u16 flags) { unsigned int x; @@ -2731,8 +2735,8 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist) sizeof(struct gfs2_holder), GFP_NOFS | __GFP_NOFAIL); for (x = 0; x < rlist->rl_rgrps; x++) - gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, LM_ST_EXCLUSIVE, - LM_FLAG_NODE_SCOPE, &rlist->rl_ghs[x]); + gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, state, flags, + &rlist->rl_ghs[x]); } /** diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 3e2ca1fb4305..00b30cf893af 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -31,7 +31,7 @@ extern struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd); extern void gfs2_clear_rgrpd(struct gfs2_sbd *sdp); extern int gfs2_rindex_update(struct gfs2_sbd *sdp); extern void gfs2_free_clones(struct gfs2_rgrpd *rgd); -extern int gfs2_rgrp_go_instantiate(struct gfs2_holder *gh); +extern int gfs2_rgrp_go_instantiate(struct gfs2_glock *gl); extern void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd); extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); @@ -45,7 +45,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, bool dinode, u64 *generation); extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); -extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount); +extern void gfs2_rs_delete(struct gfs2_inode *ip); extern void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, @@ -64,7 +64,8 @@ struct gfs2_rgrp_list { extern void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, u64 block); -extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist); +extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, + unsigned int state, u16 flags); extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); extern u64 gfs2_ri_total(struct gfs2_sbd *sdp); extern void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd, diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 0f93e8beca4d..b018957a1bb2 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -346,7 +346,8 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp) } error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_EXCLUSIVE, - LM_FLAG_NOEXP, &sdp->sd_freeze_gh); + LM_FLAG_NOEXP | GL_NOPID, + &sdp->sd_freeze_gh); if (error) goto out; @@ -1196,7 +1197,7 @@ static bool gfs2_upgrade_iopen_glock(struct inode *inode) gfs2_glock_dq(gh); return false; } - return true; + return gfs2_glock_holder_ready(gh) == 0; } /** @@ -1244,11 +1245,9 @@ static enum dinode_demise evict_should_delete(struct inode *inode, if (ret) return SHOULD_NOT_DELETE_DINODE; - if (test_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags)) { - ret = gfs2_instantiate(gh); - if (ret) - return SHOULD_NOT_DELETE_DINODE; - } + ret = gfs2_instantiate(gh); + if (ret) + return SHOULD_NOT_DELETE_DINODE; /* * The inode may have been recreated in the meantime. @@ -1398,7 +1397,7 @@ out: truncate_inode_pages_final(&inode->i_data); if (ip->i_qadata) gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0); - gfs2_rs_delete(ip, NULL); + gfs2_rs_deltree(&ip->i_res); gfs2_ordered_del_inode(ip); clear_inode(inode); gfs2_dir_hash_inval(ip); @@ -1427,7 +1426,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) { struct gfs2_inode *ip; - ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL); + ip = alloc_inode_sb(sb, gfs2_inode_cachep, GFP_KERNEL); if (!ip) return NULL; ip->i_flags = 0; diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index c0a34d9ddee4..d87ea98cf535 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -15,7 +15,7 @@ #include <linux/kobject.h> #include <linux/uaccess.h> #include <linux/gfs2_ondisk.h> -#include <linux/genhd.h> +#include <linux/blkdev.h> #include "gfs2.h" #include "incore.h" @@ -767,8 +767,7 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp) wait_for_completion(&sdp->sd_kobj_unregister); } -static int gfs2_uevent(struct kset *kset, struct kobject *kobj, - struct kobj_uevent_env *env) +static int gfs2_uevent(struct kobject *kobj, struct kobj_uevent_env *env) { struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); struct super_block *s = sdp->sd_vfs; diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 8241029a2a5d..7a6aeffcdf5c 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -164,6 +164,11 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) } if (!ret) gfs2_make_fs_ro(sdp); + /* + * Dequeue any pending non-system glock holders that can no + * longer be granted because the file system is withdrawn. + */ + gfs2_gl_dq_holders(sdp); gfs2_freeze_unlock(&freeze_gh); } @@ -204,6 +209,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) * exception code in glock_dq. */ iput(inode); + sdp->sd_jdesc->jd_inode = NULL; /* * Wait until the journal inode's glock is freed. This allows try locks * on other nodes to be successful, otherwise we remain the owner of @@ -226,7 +232,8 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) */ fs_warn(sdp, "Requesting recovery of jid %d.\n", sdp->sd_lockstruct.ls_jid); - gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | LM_FLAG_NOEXP, + gfs2_holder_reinit(LM_ST_EXCLUSIVE, + LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | GL_NOPID, &sdp->sd_live_gh); msleep(GL_GLOCK_MAX_HOLD); /* @@ -251,7 +258,8 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) fs_warn(sdp, "Unable to recover our journal jid %d.\n", sdp->sd_lockstruct.ls_jid); gfs2_glock_dq_wait(&sdp->sd_live_gh); - gfs2_holder_reinit(LM_ST_SHARED, LM_FLAG_NOEXP | GL_EXACT, + gfs2_holder_reinit(LM_ST_SHARED, + LM_FLAG_NOEXP | GL_EXACT | GL_NOPID, &sdp->sd_live_gh); gfs2_glock_nq(&sdp->sd_live_gh); } diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 0c5650fe1fd1..f6a66050380e 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -1313,7 +1313,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip) else goto out; - gfs2_rlist_alloc(&rlist); + gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, LM_FLAG_NODE_SCOPE); for (x = 0; x < rlist.rl_rgrps; x++) { rgd = gfs2_glock2rgrp(rlist.rl_ghs[x].gh_gl); |