diff options
Diffstat (limited to 'fs/gfs2/rgrp.c')
-rw-r--r-- | fs/gfs2/rgrp.c | 654 |
1 files changed, 368 insertions, 286 deletions
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index e7bf91ec231c..f602fb844951 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -36,6 +36,24 @@ #define BFITNOENT ((u32)~0) #define NO_BLOCK ((u64)~0) +struct gfs2_rbm { + struct gfs2_rgrpd *rgd; + u32 offset; /* The offset is bitmap relative */ + int bii; /* Bitmap index */ +}; + +static inline struct gfs2_bitmap *rbm_bi(const struct gfs2_rbm *rbm) +{ + return rbm->rgd->rd_bits + rbm->bii; +} + +static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm) +{ + BUG_ON(rbm->offset >= rbm->rgd->rd_data); + return rbm->rgd->rd_data0 + (rbm_bi(rbm)->bi_start * GFS2_NBBY) + + rbm->offset; +} + /* * These routines are used by the resource group routines (rgrp.c) * to keep track of block allocation. Each block is represented by two @@ -61,7 +79,7 @@ static const char valid_change[16] = { }; static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, - const struct gfs2_inode *ip, bool nowrap); + struct gfs2_blkreserv *rs, bool nowrap); /** @@ -175,7 +193,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) /** * rs_cmp - multi-block reservation range compare - * @blk: absolute file system block number of the new reservation + * @start: start of the new reservation * @len: number of blocks in the new reservation * @rs: existing reservation to compare against * @@ -183,13 +201,11 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) * -1 if the block range is before the start of the reservation * 0 if the block range overlaps with the reservation */ -static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) +static inline int rs_cmp(u64 start, u32 len, struct gfs2_blkreserv *rs) { - u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm); - - if (blk >= startblk + rs->rs_free) + if (start >= rs->rs_start + rs->rs_requested) return 1; - if (blk + len - 1 < startblk) + if (rs->rs_start >= start + len) return -1; return 0; } @@ -277,29 +293,38 @@ static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) } /** - * gfs2_rbm_incr - increment an rbm structure + * gfs2_rbm_add - add a number of blocks to an rbm * @rbm: The rbm with rgd already set correctly + * @blocks: The number of blocks to add to rpm * - * This function takes an existing rbm structure and increments it to the next - * viable block offset. - * - * Returns: If incrementing the offset would cause the rbm to go past the - * end of the rgrp, true is returned, otherwise false. + * This function takes an existing rbm structure and adds a number of blocks to + * it. * + * Returns: True if the new rbm would point past the end of the rgrp. */ -static bool gfs2_rbm_incr(struct gfs2_rbm *rbm) +static bool gfs2_rbm_add(struct gfs2_rbm *rbm, u32 blocks) { - if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */ - rbm->offset++; + struct gfs2_rgrpd *rgd = rbm->rgd; + struct gfs2_bitmap *bi = rgd->rd_bits + rbm->bii; + + if (rbm->offset + blocks < bi->bi_blocks) { + rbm->offset += blocks; return false; } - if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */ - return true; + blocks -= bi->bi_blocks - rbm->offset; - rbm->offset = 0; - rbm->bii++; - return false; + for(;;) { + bi++; + if (bi == rgd->rd_bits + rgd->rd_length) + return true; + if (blocks < bi->bi_blocks) { + rbm->offset = blocks; + rbm->bii = bi - rgd->rd_bits; + return false; + } + blocks -= bi->bi_blocks; + } } /** @@ -308,7 +333,8 @@ static bool gfs2_rbm_incr(struct gfs2_rbm *rbm) * @n_unaligned: Number of unaligned blocks to check * @len: Decremented for each block found (terminate on zero) * - * Returns: true if a non-free block is encountered + * Returns: true if a non-free block is encountered or the end of the resource + * group is reached. */ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len) @@ -323,7 +349,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le (*len)--; if (*len == 0) return true; - if (gfs2_rbm_incr(rbm)) + if (gfs2_rbm_add(rbm, 1)) return true; } @@ -457,24 +483,24 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd) } if (count[0] != rgd->rd_free) { - if (gfs2_consist_rgrpd(rgd)) - fs_err(sdp, "free data mismatch: %u != %u\n", - count[0], rgd->rd_free); + gfs2_lm(sdp, "free data mismatch: %u != %u\n", + count[0], rgd->rd_free); + gfs2_consist_rgrpd(rgd); return; } tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes; if (count[1] != tmp) { - if (gfs2_consist_rgrpd(rgd)) - fs_err(sdp, "used data mismatch: %u != %u\n", - count[1], tmp); + gfs2_lm(sdp, "used data mismatch: %u != %u\n", + count[1], tmp); + gfs2_consist_rgrpd(rgd); return; } if (count[2] + count[3] != rgd->rd_dinodes) { - if (gfs2_consist_rgrpd(rgd)) - fs_err(sdp, "used metadata mismatch: %u != %u\n", - count[2] + count[3], rgd->rd_dinodes); + gfs2_lm(sdp, "used metadata mismatch: %u != %u\n", + count[2] + count[3], rgd->rd_dinodes); + gfs2_consist_rgrpd(rgd); return; } } @@ -590,25 +616,16 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd) } } -/** - * gfs2_rsqa_alloc - make sure we have a reservation assigned to the inode - * plus a quota allocations data structure, if necessary - * @ip: the inode for this reservation - */ -int gfs2_rsqa_alloc(struct gfs2_inode *ip) -{ - return gfs2_qa_alloc(ip); -} - static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs, const char *fs_id_buf) { struct gfs2_inode *ip = container_of(rs, struct gfs2_inode, i_res); - gfs2_print_dbg(seq, "%s B: n:%llu s:%llu b:%u f:%u\n", fs_id_buf, + gfs2_print_dbg(seq, "%s B: n:%llu s:%llu f:%u\n", + fs_id_buf, (unsigned long long)ip->i_no_addr, - (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm), - rs->rs_rbm.offset, rs->rs_free); + (unsigned long long)rs->rs_start, + rs->rs_requested); } /** @@ -623,33 +640,22 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) if (!gfs2_rs_active(rs)) return; - rgd = rs->rs_rbm.rgd; + rgd = rs->rs_rgd; trace_gfs2_rs(rs, TRACE_RS_TREEDEL); rb_erase(&rs->rs_node, &rgd->rd_rstree); RB_CLEAR_NODE(&rs->rs_node); - if (rs->rs_free) { - u64 last_block = gfs2_rbm_to_block(&rs->rs_rbm) + - rs->rs_free - 1; - struct gfs2_rbm last_rbm = { .rgd = rs->rs_rbm.rgd, }; - struct gfs2_bitmap *start, *last; + if (rs->rs_requested) { + /* return requested blocks to the rgrp */ + BUG_ON(rs->rs_rgd->rd_requested < rs->rs_requested); + rs->rs_rgd->rd_requested -= rs->rs_requested; - /* return reserved blocks to the rgrp */ - BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); - rs->rs_rbm.rgd->rd_reserved -= rs->rs_free; /* The rgrp extent failure point is likely not to increase; it will only do so if the freed blocks are somehow contiguous with a span of free blocks that follows. Still, it will force the number to be recalculated later. */ - rgd->rd_extfail_pt += rs->rs_free; - rs->rs_free = 0; - if (gfs2_rbm_from_block(&last_rbm, last_block)) - return; - start = rbm_bi(&rs->rs_rbm); - last = rbm_bi(&last_rbm); - do - clear_bit(GBF_FULL, &start->bi_flags); - while (start++ != last); + rgd->rd_extfail_pt += rs->rs_requested; + rs->rs_requested = 0; } } @@ -662,28 +668,28 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs) { struct gfs2_rgrpd *rgd; - rgd = rs->rs_rbm.rgd; + rgd = rs->rs_rgd; if (rgd) { spin_lock(&rgd->rd_rsspin); __rs_deltree(rs); - BUG_ON(rs->rs_free); + BUG_ON(rs->rs_requested); spin_unlock(&rgd->rd_rsspin); } } /** - * gfs2_rsqa_delete - delete a multi-block reservation and quota allocation + * gfs2_rs_delete - delete a multi-block reservation * @ip: The inode for this reservation - * @wcount: The inode's write count, or NULL * */ -void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount) +void gfs2_rs_delete(struct gfs2_inode *ip) { + struct inode *inode = &ip->i_inode; + down_write(&ip->i_rw_mutex); - if ((wcount == NULL) || (atomic_read(wcount) <= 1)) + if (atomic_read(&inode->i_writecount) <= 1) gfs2_rs_deltree(&ip->i_res); up_write(&ip->i_rw_mutex); - gfs2_qa_delete(ip, wcount); } /** @@ -720,32 +726,25 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) rb_erase(n, &sdp->sd_rindex_tree); if (gl) { - glock_clear_object(gl, rgd); + if (gl->gl_state != LM_ST_UNLOCKED) { + gfs2_glock_cb(gl, LM_ST_UNLOCKED); + flush_delayed_work(&gl->gl_work); + } gfs2_rgrp_brelse(rgd); + glock_clear_object(gl, rgd); gfs2_glock_put(gl); } gfs2_free_clones(rgd); + return_all_reservations(rgd); kfree(rgd->rd_bits); rgd->rd_bits = NULL; - return_all_reservations(rgd); kmem_cache_free(gfs2_rgrpd_cachep, rgd); } } -static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd) -{ - struct gfs2_sbd *sdp = rgd->rd_sbd; - - fs_info(sdp, "ri_addr = %llu\n", (unsigned long long)rgd->rd_addr); - fs_info(sdp, "ri_length = %u\n", rgd->rd_length); - fs_info(sdp, "ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0); - fs_info(sdp, "ri_data = %u\n", rgd->rd_data); - fs_info(sdp, "ri_bitbytes = %u\n", rgd->rd_bitbytes); -} - /** - * gfs2_compute_bitstructs - Compute the bitmap sizes + * compute_bitstructs - Compute the bitmap sizes * @rgd: The resource group descriptor * * Calculates bitmap descriptors, one for each block that contains bitmap data @@ -814,11 +813,20 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) } bi = rgd->rd_bits + (length - 1); if ((bi->bi_start + bi->bi_bytes) * GFS2_NBBY != rgd->rd_data) { - if (gfs2_consist_rgrpd(rgd)) { - gfs2_rindex_print(rgd); - fs_err(sdp, "start=%u len=%u offset=%u\n", - bi->bi_start, bi->bi_bytes, bi->bi_offset); - } + gfs2_lm(sdp, + "ri_addr = %llu\n" + "ri_length = %u\n" + "ri_data0 = %llu\n" + "ri_data = %u\n" + "ri_bitbytes = %u\n" + "start=%u len=%u offset=%u\n", + (unsigned long long)rgd->rd_addr, + rgd->rd_length, + (unsigned long long)rgd->rd_data0, + rgd->rd_data, + rgd->rd_bitbytes, + bi->bi_start, bi->bi_bytes, bi->bi_offset); + gfs2_consist_rgrpd(rgd); return -EIO; } @@ -887,7 +895,6 @@ static int rgd_insert(struct gfs2_rgrpd *rgd) static int read_rindex_entry(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - const unsigned bsize = sdp->sd_sb.sb_bsize; loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); struct gfs2_rindex buf; int error; @@ -914,18 +921,19 @@ static int read_rindex_entry(struct gfs2_inode *ip) rgd->rd_data = be32_to_cpu(buf.ri_data); rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); spin_lock_init(&rgd->rd_rsspin); - - error = compute_bitstructs(rgd); - if (error) - goto fail; + mutex_init(&rgd->rd_mutex); error = gfs2_glock_get(sdp, rgd->rd_addr, &gfs2_rgrp_glops, CREATE, &rgd->rd_gl); if (error) goto fail; + error = compute_bitstructs(rgd); + if (error) + goto fail_glock; + rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; - rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED); + rgd->rd_flags &= ~GFS2_RDF_PREFERRED; if (rgd->rd_data > sdp->sd_max_rg_data) sdp->sd_max_rg_data = rgd->rd_data; spin_lock(&sdp->sd_rindex_spin); @@ -933,13 +941,11 @@ static int read_rindex_entry(struct gfs2_inode *ip) spin_unlock(&sdp->sd_rindex_spin); if (!error) { glock_set_object(rgd->rd_gl, rgd); - rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_MASK; - rgd->rd_gl->gl_vm.end = PAGE_ALIGN((rgd->rd_addr + - rgd->rd_length) * bsize) - 1; return 0; } error = 0; /* someone else read in the rgrp; free it and ignore it */ +fail_glock: gfs2_glock_put(rgd->rd_gl); fail: @@ -998,6 +1004,10 @@ static int gfs2_ri_update(struct gfs2_inode *ip) if (error < 0) return error; + if (RB_EMPTY_ROOT(&sdp->sd_rindex_tree)) { + fs_err(sdp, "no resource groups found in the file system.\n"); + return -ENOENT; + } set_rgrp_preferences(sdp); sdp->sd_rindex_uptodate = 1; @@ -1158,10 +1168,27 @@ static u32 count_unlinked(struct gfs2_rgrpd *rgd) return count; } +static void rgrp_set_bitmap_flags(struct gfs2_rgrpd *rgd) +{ + struct gfs2_bitmap *bi; + int x; + + if (rgd->rd_free) { + for (x = 0; x < rgd->rd_length; x++) { + bi = rgd->rd_bits + x; + clear_bit(GBF_FULL, &bi->bi_flags); + } + } else { + for (x = 0; x < rgd->rd_length; x++) { + bi = rgd->rd_bits + x; + set_bit(GBF_FULL, &bi->bi_flags); + } + } +} /** - * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps - * @rgd: the struct gfs2_rgrpd describing the RG to read in + * gfs2_rgrp_go_instantiate - Read in a RG's header and bitmaps + * @gh: the glock holder representing the rgrpd to read in * * Read in all of a Resource Group's header and bitmap blocks. * Caller must eventually call gfs2_rgrp_brelse() to free the bitmaps. @@ -1169,10 +1196,10 @@ static u32 count_unlinked(struct gfs2_rgrpd *rgd) * Returns: errno */ -static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) +int gfs2_rgrp_go_instantiate(struct gfs2_glock *gl) { + struct gfs2_rgrpd *rgd = gl->gl_object; struct gfs2_sbd *sdp = rgd->rd_sbd; - struct gfs2_glock *gl = rgd->rd_gl; unsigned int length = rgd->rd_length; struct gfs2_bitmap *bi; unsigned int x, y; @@ -1200,21 +1227,18 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) } } - if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) { - for (x = 0; x < length; x++) - clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags); - gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); - rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); - rgd->rd_free_clone = rgd->rd_free; - /* max out the rgrp allocation failure point */ - rgd->rd_extfail_pt = rgd->rd_free; - } + gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); + rgrp_set_bitmap_flags(rgd); + rgd->rd_flags |= GFS2_RDF_CHECK; + rgd->rd_free_clone = rgd->rd_free; + GLOCK_BUG_ON(rgd->rd_gl, rgd->rd_reserved); + /* max out the rgrp allocation failure point */ + rgd->rd_extfail_pt = rgd->rd_free; if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); - } - else if (sdp->sd_args.ar_rgrplvb) { + } else if (sdp->sd_args.ar_rgrplvb) { if (!gfs2_rgrp_lvb_valid(rgd)){ gfs2_consist_rgrpd(rgd); error = -EIO; @@ -1232,19 +1256,18 @@ fail: bi->bi_bh = NULL; gfs2_assert_warn(sdp, !bi->bi_clone); } - return error; } -static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) +static int update_rgrp_lvb(struct gfs2_rgrpd *rgd, struct gfs2_holder *gh) { u32 rl_flags; - if (rgd->rd_flags & GFS2_RDF_UPTODATE) + if (!test_bit(GLF_INSTANTIATE_NEEDED, &gh->gh_gl->gl_flags)) return 0; if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) - return gfs2_rgrp_bh_get(rgd); + return gfs2_instantiate(gh); rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); rl_flags &= ~GFS2_RDF_MASK; @@ -1253,22 +1276,16 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) if (rgd->rd_rgl->rl_unlinked == 0) rgd->rd_flags &= ~GFS2_RDF_CHECK; rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free); + rgrp_set_bitmap_flags(rgd); rgd->rd_free_clone = rgd->rd_free; + GLOCK_BUG_ON(rgd->rd_gl, rgd->rd_reserved); + /* max out the rgrp allocation failure point */ + rgd->rd_extfail_pt = rgd->rd_free; rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes); rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration); return 0; } -int gfs2_rgrp_go_lock(struct gfs2_holder *gh) -{ - struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; - struct gfs2_sbd *sdp = rgd->rd_sbd; - - if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb) - return 0; - return gfs2_rgrp_bh_get(rgd); -} - /** * gfs2_rgrp_brelse - Release RG bitmaps read in with gfs2_rgrp_bh_get() * @rgd: The resource group @@ -1286,23 +1303,7 @@ void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd) bi->bi_bh = NULL; } } - -} - -/** - * gfs2_rgrp_go_unlock - Unlock a rgrp glock - * @gh: The glock holder for the resource group - * - */ - -void gfs2_rgrp_go_unlock(struct gfs2_holder *gh) -{ - struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; - int demote_requested = test_bit(GLF_DEMOTE, &gh->gh_gl->gl_flags) | - test_bit(GLF_PENDING_DEMOTE, &gh->gh_gl->gl_flags); - - if (rgd && demote_requested) - gfs2_rgrp_brelse(rgd); + set_bit(GLF_INSTANTIATE_NEEDED, &rgd->rd_gl->gl_flags); } int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, @@ -1313,7 +1314,7 @@ int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, u64 blk; sector_t start = 0; sector_t nr_blks = 0; - int rv; + int rv = -EIO; unsigned int x; u32 trimmed = 0; u8 diff; @@ -1369,7 +1370,7 @@ fail: if (sdp->sd_args.ar_discard) fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem\n", rv); sdp->sd_args.ar_discard = 0; - return -EIO; + return rv; } /** @@ -1384,7 +1385,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp) { struct inode *inode = file_inode(filp); struct gfs2_sbd *sdp = GFS2_SB(inode); - struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev); + struct block_device *bdev = sdp->sd_vfs->s_bdev; struct buffer_head *bh; struct gfs2_rgrpd *rgd; struct gfs2_rgrpd *rgd_end; @@ -1400,7 +1401,10 @@ int gfs2_fitrim(struct file *filp, void __user *argp) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!blk_queue_discard(q)) + if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) + return -EROFS; + + if (!bdev_max_discard_sectors(bdev)) return -EOPNOTSUPP; if (copy_from_user(&r, argp, sizeof(r))) @@ -1412,8 +1416,8 @@ int gfs2_fitrim(struct file *filp, void __user *argp) start = r.start >> bs_shift; end = start + (r.len >> bs_shift); - minlen = max_t(u64, r.minlen, - q->limits.discard_granularity) >> bs_shift; + minlen = max_t(u64, r.minlen, sdp->sd_sb.sb_bsize); + minlen = max_t(u64, minlen, bdev_discard_granularity(bdev)) >> bs_shift; if (end <= start || minlen > sdp->sd_max_rg_data) return -EINVAL; @@ -1427,7 +1431,8 @@ int gfs2_fitrim(struct file *filp, void __user *argp) while (1) { - ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh); + ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, + LM_FLAG_NODE_SCOPE, &gh); if (ret) goto out; @@ -1435,9 +1440,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp) /* Trim each bitmap in the rgrp */ for (x = 0; x < rgd->rd_length; x++) { struct gfs2_bitmap *bi = rgd->rd_bits + x; + rgrp_lock_local(rgd); ret = gfs2_rgrp_send_discards(sdp, rgd->rd_data0, NULL, bi, minlen, &amt); + rgrp_unlock_local(rgd); if (ret) { gfs2_glock_dq_uninit(&gh); goto out; @@ -1449,9 +1456,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp) ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0); if (ret == 0) { bh = rgd->rd_bits[0].bi_bh; + rgrp_lock_local(rgd); rgd->rd_flags |= GFS2_RGF_TRIMMED; gfs2_trans_add_meta(rgd->rd_gl, bh); gfs2_rgrp_out(rgd, bh->b_data); + rgrp_unlock_local(rgd); gfs2_trans_end(sdp); } } @@ -1481,8 +1490,7 @@ static void rs_insert(struct gfs2_inode *ip) struct rb_node **newn, *parent = NULL; int rc; struct gfs2_blkreserv *rs = &ip->i_res; - struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; - u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm); + struct gfs2_rgrpd *rgd = rs->rs_rgd; BUG_ON(gfs2_rs_active(rs)); @@ -1493,7 +1501,7 @@ static void rs_insert(struct gfs2_inode *ip) rb_entry(*newn, struct gfs2_blkreserv, rs_node); parent = *newn; - rc = rs_cmp(fsblock, rs->rs_free, cur); + rc = rs_cmp(rs->rs_start, rs->rs_requested, cur); if (rc > 0) newn = &((*newn)->rb_right); else if (rc < 0) @@ -1509,14 +1517,15 @@ static void rs_insert(struct gfs2_inode *ip) rb_insert_color(&rs->rs_node, &rgd->rd_rstree); /* Do our rgrp accounting for the reservation */ - rgd->rd_reserved += rs->rs_free; /* blocks reserved */ + rgd->rd_requested += rs->rs_requested; /* blocks requested */ spin_unlock(&rgd->rd_rsspin); trace_gfs2_rs(rs, TRACE_RS_INSERT); } /** - * rgd_free - return the number of free blocks we can allocate. + * rgd_free - return the number of free blocks we can allocate * @rgd: the resource group + * @rs: The reservation to free * * This function returns the number of free blocks for an rgrp. * That's the clone-free blocks (blocks that are free, not including those @@ -1530,9 +1539,9 @@ static inline u32 rgd_free(struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *rs) { u32 tot_reserved, tot_free; - if (WARN_ON_ONCE(rgd->rd_reserved < rs->rs_free)) + if (WARN_ON_ONCE(rgd->rd_requested < rs->rs_requested)) return 0; - tot_reserved = rgd->rd_reserved - rs->rs_free; + tot_reserved = rgd->rd_requested - rs->rs_requested; if (rgd->rd_free_clone < tot_reserved) tot_reserved = 0; @@ -1557,17 +1566,26 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, u64 goal; struct gfs2_blkreserv *rs = &ip->i_res; u32 extlen; - u32 free_blocks = rgd_free(rgd, rs); + u32 free_blocks, blocks_available; int ret; struct inode *inode = &ip->i_inode; + spin_lock(&rgd->rd_rsspin); + free_blocks = rgd_free(rgd, rs); + if (rgd->rd_free_clone < rgd->rd_requested) + free_blocks = 0; + blocks_available = rgd->rd_free_clone - rgd->rd_reserved; + if (rgd == rs->rs_rgd) + blocks_available += rs->rs_reserved; + spin_unlock(&rgd->rd_rsspin); + if (S_ISDIR(inode->i_mode)) extlen = 1; else { extlen = max_t(u32, atomic_read(&ip->i_sizehint), ap->target); extlen = clamp(extlen, (u32)RGRP_RSRV_MINBLKS, free_blocks); } - if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen)) + if (free_blocks < extlen || blocks_available < extlen) return; /* Find bitmap block that contains bits for goal block */ @@ -1579,10 +1597,10 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, if (WARN_ON(gfs2_rbm_from_block(&rbm, goal))) return; - ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true); + ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, &ip->i_res, true); if (ret == 0) { - rs->rs_rbm = rbm; - rs->rs_free = extlen; + rs->rs_start = gfs2_rbm_to_block(&rbm); + rs->rs_requested = extlen; rs_insert(ip); } else { if (goal == rgd->rd_last_alloc + rgd->rd_data0) @@ -1595,7 +1613,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, * @rgd: The resource group * @block: The starting block * @length: The required length - * @ip: Ignore any reservations for this inode + * @ignore_rs: Reservation to ignore * * If the block does not appear in any reservation, then return the * block number unchanged. If it does appear in the reservation, then @@ -1605,7 +1623,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, u32 length, - const struct gfs2_inode *ip) + struct gfs2_blkreserv *ignore_rs) { struct gfs2_blkreserv *rs; struct rb_node *n; @@ -1625,8 +1643,8 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, } if (n) { - while ((rs_cmp(block, length, rs) == 0) && (&ip->i_res != rs)) { - block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free; + while (rs_cmp(block, length, rs) == 0 && rs != ignore_rs) { + block = rs->rs_start + rs->rs_requested; n = n->rb_right; if (n == NULL) break; @@ -1641,7 +1659,7 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, /** * gfs2_reservation_check_and_update - Check for reservations during block alloc * @rbm: The current position in the resource group - * @ip: The inode for which we are searching for blocks + * @rs: Our own reservation * @minext: The minimum extent length * @maxext: A pointer to the maximum extent structure * @@ -1655,20 +1673,19 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, */ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, - const struct gfs2_inode *ip, + struct gfs2_blkreserv *rs, u32 minext, struct gfs2_extent *maxext) { u64 block = gfs2_rbm_to_block(rbm); u32 extlen = 1; u64 nblock; - int ret; /* * If we have a minimum extent length, then skip over any extent * which is less than the min extent length in size. */ - if (minext) { + if (minext > 1) { extlen = gfs2_free_extlen(rbm, minext); if (extlen <= maxext->len) goto fail; @@ -1678,7 +1695,7 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, * Check the extent which has been found against the reservations * and skip if parts of it are already reserved */ - nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip); + nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, rs); if (nblock == block) { if (!minext || extlen >= minext) return 0; @@ -1687,12 +1704,15 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, maxext->len = extlen; maxext->rbm = *rbm; } -fail: - nblock = block + extlen; + } else { + u64 len = nblock - block; + if (len >= (u64)1 << 32) + return -E2BIG; + extlen = len; } - ret = gfs2_rbm_from_block(rbm, nblock); - if (ret < 0) - return ret; +fail: + if (gfs2_rbm_add(rbm, extlen)) + return -E2BIG; return 1; } @@ -1700,9 +1720,9 @@ fail: * gfs2_rbm_find - Look for blocks of a particular state * @rbm: Value/result starting position and final position * @state: The state which we want to find - * @minext: Pointer to the requested extent length (NULL for a single block) + * @minext: Pointer to the requested extent length * This is updated to be the actual reservation size. - * @ip: If set, check for reservations + * @rs: Our own reservation (NULL to skip checking for reservations) * @nowrap: Stop looking at the end of the rgrp, rather than wrapping * around until we've reached the starting point. * @@ -1716,7 +1736,7 @@ fail: */ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, - const struct gfs2_inode *ip, bool nowrap) + struct gfs2_blkreserv *rs, bool nowrap) { bool scan_from_start = rbm->bii == 0 && rbm->offset == 0; struct buffer_head *bh; @@ -1737,8 +1757,7 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, while(1) { bi = rbm_bi(rbm); - if ((ip == NULL || !gfs2_rs_active(&ip->i_res)) && - test_bit(GBF_FULL, &bi->bi_flags) && + if (test_bit(GBF_FULL, &bi->bi_flags) && (state == GFS2_BLKST_FREE)) goto next_bitmap; @@ -1754,11 +1773,10 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, goto next_bitmap; } rbm->offset = offset; - if (ip == NULL) + if (!rs || !minext) return 0; - ret = gfs2_reservation_check_and_update(rbm, ip, - minext ? *minext : 0, + ret = gfs2_reservation_check_and_update(rbm, rs, *minext, &maxext); if (ret == 0) return 0; @@ -1790,7 +1808,7 @@ next_iter: break; } - if (minext == NULL || state != GFS2_BLKST_FREE) + if (state != GFS2_BLKST_FREE) return -ENOSPC; /* If the extent was too small, and it's smaller than the smallest @@ -1798,7 +1816,7 @@ next_iter: useless to search this rgrp again for this amount or more. */ if (wrapped && (scan_from_start || rbm->bii > last_bii) && *minext < rbm->rgd->rd_extfail_pt) - rbm->rgd->rd_extfail_pt = *minext; + rbm->rgd->rd_extfail_pt = *minext - 1; /* If the maximum extent we found is big enough to fulfill the minimum requirements, use it anyway. */ @@ -1832,10 +1850,8 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 }; while (1) { - down_write(&sdp->sd_log_flush_lock); error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL, true); - up_write(&sdp->sd_log_flush_lock); if (error == -ENOSPC) break; if (WARN_ON_ONCE(error)) @@ -1863,7 +1879,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip */ ip = gl->gl_object; - if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) + if (ip || !gfs2_queue_delete_work(gl, 0)) gfs2_glock_put(gl); else found++; @@ -1963,7 +1979,7 @@ static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs, u64 tdiff; tdiff = ktime_to_ns(ktime_sub(ktime_get_real(), - rs->rs_rbm.rgd->rd_gl->gl_dstamp)); + rs->rs_rgd->rd_gl->gl_dstamp)); return tdiff > (msecs * 1000 * 1000); } @@ -1993,6 +2009,7 @@ static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *b /** * fast_to_acquire - determine if a resource group will be fast to acquire + * @rgd: The rgrp * * If this is one of our preferred rgrps, it should be quicker to acquire, * because we tried to set ourselves up as dlm lock master. @@ -2018,8 +2035,7 @@ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd) * We try our best to find an rgrp that has at least ap->target blocks * available. After a couple of passes (loops == 2), the prospects of finding * such an rgrp diminish. At this stage, we return the first rgrp that has - * at least ap->min_target blocks available. Either way, we set ap->allowed to - * the number of blocks available in the chosen rgrp. + * at least ap->min_target blocks available. * * Returns: 0 on success, * -ENOMEM if a suitable rgrp can't be found @@ -2031,56 +2047,65 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *begin = NULL; struct gfs2_blkreserv *rs = &ip->i_res; - int error = 0, rg_locked, flags = 0; + int error = 0, flags = LM_FLAG_NODE_SCOPE; + bool rg_locked; u64 last_unlinked = NO_BLOCK; + u32 target = ap->target; int loops = 0; - u32 free_blocks, skip = 0; + u32 free_blocks, blocks_available, skip = 0; + + BUG_ON(rs->rs_reserved); if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; - if (gfs2_assert_warn(sdp, ap->target)) + if (gfs2_assert_warn(sdp, target)) return -EINVAL; if (gfs2_rs_active(rs)) { - begin = rs->rs_rbm.rgd; - } else if (rs->rs_rbm.rgd && - rgrp_contains_block(rs->rs_rbm.rgd, ip->i_goal)) { - begin = rs->rs_rbm.rgd; + begin = rs->rs_rgd; + } else if (rs->rs_rgd && + rgrp_contains_block(rs->rs_rgd, ip->i_goal)) { + begin = rs->rs_rgd; } else { check_and_update_goal(ip); - rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); + rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); } if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV)) skip = gfs2_orlov_skip(ip); - if (rs->rs_rbm.rgd == NULL) + if (rs->rs_rgd == NULL) return -EBADSLT; while (loops < 3) { - rg_locked = 1; + struct gfs2_rgrpd *rgd; - if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { - rg_locked = 0; + rg_locked = gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl); + if (rg_locked) { + rgrp_lock_local(rs->rs_rgd); + } else { if (skip && skip--) goto next_rgrp; if (!gfs2_rs_active(rs)) { if (loops == 0 && - !fast_to_acquire(rs->rs_rbm.rgd)) + !fast_to_acquire(rs->rs_rgd)) goto next_rgrp; if ((loops < 2) && gfs2_rgrp_used_recently(rs, 1000) && - gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) + gfs2_rgrp_congested(rs->rs_rgd, loops)) goto next_rgrp; } - error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, + error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl, LM_ST_EXCLUSIVE, flags, &ip->i_rgd_gh); if (unlikely(error)) return error; + rgrp_lock_local(rs->rs_rgd); if (!gfs2_rs_active(rs) && (loops < 2) && - gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) + gfs2_rgrp_congested(rs->rs_rgd, loops)) goto skip_rgrp; if (sdp->sd_args.ar_rgrplvb) { - error = update_rgrp_lvb(rs->rs_rbm.rgd); + error = update_rgrp_lvb(rs->rs_rgd, + &ip->i_rgd_gh); if (unlikely(error)) { + rgrp_unlock_local(rs->rs_rgd); gfs2_glock_dq_uninit(&ip->i_rgd_gh); return error; } @@ -2088,36 +2113,49 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) } /* Skip unusable resource groups */ - if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | + if ((rs->rs_rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) || - (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt)) + (loops == 0 && target > rs->rs_rgd->rd_extfail_pt)) goto skip_rgrp; - if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rs->rs_rbm.rgd); + if (sdp->sd_args.ar_rgrplvb) { + error = gfs2_instantiate(&ip->i_rgd_gh); + if (error) + goto skip_rgrp; + } /* Get a reservation if we don't already have one */ if (!gfs2_rs_active(rs)) - rg_mblk_search(rs->rs_rbm.rgd, ip, ap); + rg_mblk_search(rs->rs_rgd, ip, ap); /* Skip rgrps when we can't get a reservation on first pass */ if (!gfs2_rs_active(rs) && (loops < 1)) goto check_rgrp; /* If rgrp has enough free space, use it */ - free_blocks = rgd_free(rs->rs_rbm.rgd, rs); - if (free_blocks >= ap->target || - (loops == 2 && ap->min_target && - free_blocks >= ap->min_target)) { - ap->allowed = free_blocks; - return 0; + rgd = rs->rs_rgd; + spin_lock(&rgd->rd_rsspin); + free_blocks = rgd_free(rgd, rs); + blocks_available = rgd->rd_free_clone - rgd->rd_reserved; + if (free_blocks < target || blocks_available < target) { + spin_unlock(&rgd->rd_rsspin); + goto check_rgrp; } + rs->rs_reserved = ap->target; + if (rs->rs_reserved > blocks_available) + rs->rs_reserved = blocks_available; + rgd->rd_reserved += rs->rs_reserved; + spin_unlock(&rgd->rd_rsspin); + rgrp_unlock_local(rs->rs_rgd); + return 0; check_rgrp: /* Check for unlinked inodes which can be reclaimed */ - if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) - try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, + if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) + try_rgrp_unlink(rs->rs_rgd, &last_unlinked, ip->i_no_addr); skip_rgrp: + rgrp_unlock_local(rs->rs_rgd); + /* Drop reservation, if we couldn't use reserved rgrp */ if (gfs2_rs_active(rs)) gfs2_rs_deltree(rs); @@ -2127,7 +2165,7 @@ skip_rgrp: gfs2_glock_dq_uninit(&ip->i_rgd_gh); next_rgrp: /* Find the next rgrp, and continue looking */ - if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) + if (gfs2_select_rgrp(&rs->rs_rgd, begin)) continue; if (skip) continue; @@ -2144,9 +2182,12 @@ next_rgrp: return error; } /* Flushing the log may release space */ - if (loops == 2) + if (loops == 2) { + if (ap->min_target) + target = ap->min_target; gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_INPLACE_RESERVE); + } } return -ENOSPC; @@ -2161,6 +2202,17 @@ next_rgrp: void gfs2_inplace_release(struct gfs2_inode *ip) { + struct gfs2_blkreserv *rs = &ip->i_res; + + if (rs->rs_reserved) { + struct gfs2_rgrpd *rgd = rs->rs_rgd; + + spin_lock(&rgd->rd_rsspin); + GLOCK_BUG_ON(rgd->rd_gl, rgd->rd_reserved < rs->rs_reserved); + rgd->rd_reserved -= rs->rs_reserved; + spin_unlock(&rgd->rd_rsspin); + rs->rs_reserved = 0; + } if (gfs2_holder_initialized(&ip->i_rgd_gh)) gfs2_glock_dq_uninit(&ip->i_rgd_gh); } @@ -2230,32 +2282,30 @@ static void rgblk_free(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd, bi_prev = bi; } gfs2_setbit(&rbm, false, new_state); - gfs2_rbm_incr(&rbm); + gfs2_rbm_add(&rbm, 1); } } /** * gfs2_rgrp_dump - print out an rgrp * @seq: The iterator - * @gl: The glock in question + * @rgd: The rgrp in question * @fs_id_buf: pointer to file system id (if requested) * */ -void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl, +void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd, const char *fs_id_buf) { - struct gfs2_rgrpd *rgd = gl->gl_object; struct gfs2_blkreserv *trs; const struct rb_node *n; - if (rgd == NULL) - return; - gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n", + spin_lock(&rgd->rd_rsspin); + gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u q:%u r:%u e:%u\n", fs_id_buf, (unsigned long long)rgd->rd_addr, rgd->rd_flags, rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, - rgd->rd_reserved, rgd->rd_extfail_pt); + rgd->rd_requested, rgd->rd_reserved, rgd->rd_extfail_pt); if (rgd->rd_sbd->sd_args.ar_rgrplvb) { struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; @@ -2264,7 +2314,6 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl, be32_to_cpu(rgl->rl_free), be32_to_cpu(rgl->rl_dinodes)); } - spin_lock(&rgd->rd_rsspin); for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { trs = rb_entry(n, struct gfs2_blkreserv, rs_node); dump_rs(seq, trs, fs_id_buf); @@ -2281,7 +2330,7 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) (unsigned long long)rgd->rd_addr); fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n"); sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname); - gfs2_rgrp_dump(NULL, rgd->rd_gl, fs_id_buf); + gfs2_rgrp_dump(NULL, rgd, fs_id_buf); rgd->rd_flags |= GFS2_RDF_ERROR; } @@ -2301,29 +2350,29 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip, { struct gfs2_blkreserv *rs = &ip->i_res; struct gfs2_rgrpd *rgd = rbm->rgd; - unsigned rlen; - u64 block; - int ret; - spin_lock(&rgd->rd_rsspin); + BUG_ON(rs->rs_reserved < len); + rs->rs_reserved -= len; if (gfs2_rs_active(rs)) { - if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) { - block = gfs2_rbm_to_block(rbm); - ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len); - rlen = min(rs->rs_free, len); - rs->rs_free -= rlen; - rgd->rd_reserved -= rlen; + u64 start = gfs2_rbm_to_block(rbm); + + if (rs->rs_start == start) { + unsigned int rlen; + + rs->rs_start += len; + rlen = min(rs->rs_requested, len); + rs->rs_requested -= rlen; + rgd->rd_requested -= rlen; trace_gfs2_rs(rs, TRACE_RS_CLAIM); - if (rs->rs_free && !ret) - goto out; + if (rs->rs_start < rgd->rd_data0 + rgd->rd_data && + rs->rs_requested) + return; /* We used up our block reservation, so we should reserve more blocks next time. */ atomic_add(RGRP_RSRV_ADDBLKS, &ip->i_sizehint); } __rs_deltree(rs); } -out: - spin_unlock(&rgd->rd_rsspin); } /** @@ -2343,15 +2392,13 @@ static void gfs2_set_alloc_start(struct gfs2_rbm *rbm, u64 goal; if (gfs2_rs_active(&ip->i_res)) { - *rbm = ip->i_res.rs_rbm; - return; + goal = ip->i_res.rs_start; + } else { + if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal)) + goal = ip->i_goal; + else + goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0; } - - if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal)) - goal = ip->i_goal; - else - goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0; - if (WARN_ON_ONCE(gfs2_rbm_from_block(rbm, goal))) { rbm->bii = 0; rbm->offset = 0; @@ -2374,17 +2421,21 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh; - struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rbm.rgd, }; - unsigned int ndata; + struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rgd, }; u64 block; /* block, within the file system scope */ - int error; + u32 minext = 1; + int error = -ENOSPC; - gfs2_set_alloc_start(&rbm, ip, dinode); - error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false); + BUG_ON(ip->i_res.rs_reserved < *nblocks); + rgrp_lock_local(rbm.rgd); + if (gfs2_rs_active(&ip->i_res)) { + gfs2_set_alloc_start(&rbm, ip, dinode); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, &ip->i_res, false); + } if (error == -ENOSPC) { gfs2_set_alloc_start(&rbm, ip, dinode); - error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, NULL, false); } /* Since all blocks are reserved in advance, this shouldn't happen */ @@ -2399,14 +2450,8 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_alloc_extent(&rbm, dinode, nblocks); block = gfs2_rbm_to_block(&rbm); rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0; - if (gfs2_rs_active(&ip->i_res)) - gfs2_adjust_reservation(ip, &rbm, *nblocks); - ndata = *nblocks; - if (dinode) - ndata--; - if (!dinode) { - ip->i_goal = block + ndata - 1; + ip->i_goal = block + *nblocks - 1; error = gfs2_meta_inode_buffer(ip, &dibh); if (error == 0) { struct gfs2_dinode *di = @@ -2417,12 +2462,20 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, brelse(dibh); } } - if (rbm.rgd->rd_free < *nblocks) { + spin_lock(&rbm.rgd->rd_rsspin); + gfs2_adjust_reservation(ip, &rbm, *nblocks); + if (rbm.rgd->rd_free < *nblocks || rbm.rgd->rd_reserved < *nblocks) { fs_warn(sdp, "nblocks=%u\n", *nblocks); + spin_unlock(&rbm.rgd->rd_rsspin); goto rgrp_error; } - + GLOCK_BUG_ON(rbm.rgd->rd_gl, rbm.rgd->rd_reserved < *nblocks); + GLOCK_BUG_ON(rbm.rgd->rd_gl, rbm.rgd->rd_free_clone < *nblocks); + GLOCK_BUG_ON(rbm.rgd->rd_gl, rbm.rgd->rd_free < *nblocks); + rbm.rgd->rd_reserved -= *nblocks; + rbm.rgd->rd_free_clone -= *nblocks; rbm.rgd->rd_free -= *nblocks; + spin_unlock(&rbm.rgd->rd_rsspin); if (dinode) { rbm.rgd->rd_dinodes++; *generation = rbm.rgd->rd_igeneration++; @@ -2432,6 +2485,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data); + rgrp_unlock_local(rbm.rgd); gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); if (dinode) @@ -2439,13 +2493,13 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid); - rbm.rgd->rd_free_clone -= *nblocks; trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); *bn = block; return 0; rgrp_error: + rgrp_unlock_local(rbm.rgd); gfs2_rgrp_error(rbm.rgd); return -EIO; } @@ -2465,16 +2519,18 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + rgrp_lock_local(rgd); rgblk_free(sdp, rgd, bstart, blen, GFS2_BLKST_FREE); trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE); rgd->rd_free += blen; rgd->rd_flags &= ~GFS2_RGF_TRIMMED; gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + rgrp_unlock_local(rgd); /* Directories keep their data in the metadata address space */ - if (meta || ip->i_depth) - gfs2_meta_wipe(ip, bstart, blen); + if (meta || ip->i_depth || gfs2_is_jdata(ip)) + gfs2_journal_wipe(ip, bstart, blen); } /** @@ -2506,17 +2562,20 @@ void gfs2_unlink_di(struct inode *inode) rgd = gfs2_blk2rgrpd(sdp, blkno, true); if (!rgd) return; + rgrp_lock_local(rgd); rgblk_free(sdp, rgd, blkno, 1, GFS2_BLKST_UNLINKED); trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); be32_add_cpu(&rgd->rd_rgl->rl_unlinked, 1); + rgrp_unlock_local(rgd); } void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) { struct gfs2_sbd *sdp = rgd->rd_sbd; + rgrp_lock_local(rgd); rgblk_free(sdp, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE); if (!rgd->rd_dinodes) gfs2_consist_rgrpd(rgd); @@ -2525,12 +2584,13 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + rgrp_unlock_local(rgd); be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1); gfs2_statfs_change(sdp, 0, +1, -1); trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE); gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid); - gfs2_meta_wipe(ip, ip->i_no_addr, 1); + gfs2_journal_wipe(ip, ip->i_no_addr, 1); } /** @@ -2539,6 +2599,10 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) * @no_addr: The block number to check * @type: The block type we are looking for * + * The inode glock of @no_addr must be held. The @type to check for is either + * GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED; checking for type GFS2_BLKST_FREE + * or GFS2_BLKST_USED would make no sense. + * * Returns: 0 if the block type matches the expected type * -ESTALE if it doesn't match * or -ve errno if something went wrong while checking @@ -2561,13 +2625,20 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) rbm.rgd = rgd; error = gfs2_rbm_from_block(&rbm, no_addr); - if (WARN_ON_ONCE(error)) - goto fail; - - if (gfs2_testbit(&rbm, false) != type) - error = -ESTALE; + if (!WARN_ON_ONCE(error)) { + /* + * No need to take the local resource group lock here; the + * inode glock of @no_addr provides the necessary + * synchronization in case the block is an inode. (In case + * the block is not an inode, the block type will not match + * the @type we are looking for.) + */ + if (gfs2_testbit(&rbm, false) != type) + error = -ESTALE; + } gfs2_glock_dq_uninit(&rgd_gh); + fail: return error; } @@ -2606,7 +2677,7 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, return; rgd = gfs2_blk2rgrpd(sdp, block, 1); } else { - rgd = ip->i_res.rs_rbm.rgd; + rgd = ip->i_res.rs_rgd; if (!rgd || !rgrp_contains_block(rgd, block)) rgd = gfs2_blk2rgrpd(sdp, block, 1); } @@ -2648,12 +2719,15 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate * and initialize an array of glock holders for them * @rlist: the list of resource groups + * @state: the state we're requesting + * @flags: the modifier flags * * FIXME: Don't use NOFAIL * */ -void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist) +void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, + unsigned int state, u16 flags) { unsigned int x; @@ -2661,9 +2735,8 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist) sizeof(struct gfs2_holder), GFP_NOFS | __GFP_NOFAIL); for (x = 0; x < rlist->rl_rgrps; x++) - gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, - LM_ST_EXCLUSIVE, 0, - &rlist->rl_ghs[x]); + gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, state, flags, + &rlist->rl_ghs[x]); } /** @@ -2686,3 +2759,12 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) } } +void rgrp_lock_local(struct gfs2_rgrpd *rgd) +{ + mutex_lock(&rgd->rd_mutex); +} + +void rgrp_unlock_local(struct gfs2_rgrpd *rgd) +{ + mutex_unlock(&rgd->rd_mutex); +} |