diff options
Diffstat (limited to 'fs/xfs/xfs_log.c')
| -rw-r--r-- | fs/xfs/xfs_log.c | 796 | 
1 files changed, 520 insertions, 276 deletions
| diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 2be019136287..5215abc8023a 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -44,13 +44,8 @@  kmem_zone_t	*xfs_log_ticket_zone; -#define xlog_write_adv_cnt(ptr, len, off, bytes) \ -	{ (ptr) += (bytes); \ -	  (len) -= (bytes); \ -	  (off) += (bytes);} -  /* Local miscellaneous function prototypes */ -STATIC int	 xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket, +STATIC int	 xlog_commit_record(struct log *log, struct xlog_ticket *ticket,  				    xlog_in_core_t **, xfs_lsn_t *);  STATIC xlog_t *  xlog_alloc_log(xfs_mount_t	*mp,  				xfs_buftarg_t	*log_target, @@ -59,11 +54,6 @@ STATIC xlog_t *  xlog_alloc_log(xfs_mount_t	*mp,  STATIC int	 xlog_space_left(xlog_t *log, int cycle, int bytes);  STATIC int	 xlog_sync(xlog_t *log, xlog_in_core_t *iclog);  STATIC void	 xlog_dealloc_log(xlog_t *log); -STATIC int	 xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[], -			    int nentries, struct xlog_ticket *tic, -			    xfs_lsn_t *start_lsn, -			    xlog_in_core_t **commit_iclog, -			    uint flags);  /* local state machine functions */  STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int); @@ -93,16 +83,8 @@ STATIC int xlog_regrant_write_log_space(xlog_t		*log,  STATIC void xlog_ungrant_log_space(xlog_t	 *log,  				   xlog_ticket_t *ticket); - -/* local ticket functions */ -STATIC xlog_ticket_t	*xlog_ticket_alloc(xlog_t *log, -					 int	unit_bytes, -					 int	count, -					 char	clientid, -					 uint	flags); -  #if defined(DEBUG) -STATIC void	xlog_verify_dest_ptr(xlog_t *log, __psint_t ptr); +STATIC void	xlog_verify_dest_ptr(xlog_t *log, char *ptr);  STATIC void	xlog_verify_grant_head(xlog_t *log, int equals);  STATIC void	xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog,  				  int count, boolean_t syncing); @@ -258,7 +240,7 @@ xfs_log_done(  	     * If we get an error, just continue and give back the log ticket.  	     */  	    (((ticket->t_flags & XLOG_TIC_INITED) == 0) && -	     (xlog_commit_record(mp, ticket, iclog, &lsn)))) { +	     (xlog_commit_record(log, ticket, iclog, &lsn)))) {  		lsn = (xfs_lsn_t) -1;  		if (ticket->t_flags & XLOG_TIC_PERM_RESERV) {  			flags |= XFS_LOG_REL_PERM_RESERV; @@ -367,6 +349,15 @@ xfs_log_reserve(  		ASSERT(flags & XFS_LOG_PERM_RESERV);  		internal_ticket = *ticket; +		/* +		 * this is a new transaction on the ticket, so we need to +		 * change the transaction ID so that the next transaction has a +		 * different TID in the log. Just add one to the existing tid +		 * so that we can see chains of rolling transactions in the log +		 * easily. +		 */ +		internal_ticket->t_tid++; +  		trace_xfs_log_reserve(log, internal_ticket);  		xlog_grant_push_ail(mp, internal_ticket->t_unit_res); @@ -374,7 +365,8 @@ xfs_log_reserve(  	} else {  		/* may sleep if need to allocate more tickets */  		internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt, -						  client, flags); +						  client, flags, +						  KM_SLEEP|KM_MAYFAIL);  		if (!internal_ticket)  			return XFS_ERROR(ENOMEM);  		internal_ticket->t_trans_type = t_type; @@ -459,6 +451,13 @@ xfs_log_mount(  	/* Normal transactions can now occur */  	mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY; +	/* +	 * Now the log has been fully initialised and we know were our +	 * space grant counters are, we can initialise the permanent ticket +	 * needed for delayed logging to work. +	 */ +	xlog_cil_init_post_recovery(mp->m_log); +  	return 0;  out_destroy_ail: @@ -516,18 +515,10 @@ xfs_log_unmount_write(xfs_mount_t *mp)  #ifdef DEBUG  	xlog_in_core_t	 *first_iclog;  #endif -	xfs_log_iovec_t  reg[1];  	xlog_ticket_t	*tic = NULL;  	xfs_lsn_t	 lsn;  	int		 error; -	/* the data section must be 32 bit size aligned */ -	struct { -	    __uint16_t magic; -	    __uint16_t pad1; -	    __uint32_t pad2; /* may as well make it 64 bits */ -	} magic = { XLOG_UNMOUNT_TYPE, 0, 0 }; -  	/*  	 * Don't write out unmount record on read-only mounts.  	 * Or, if we are doing a forced umount (typically because of IO errors). @@ -549,16 +540,30 @@ xfs_log_unmount_write(xfs_mount_t *mp)  	} while (iclog != first_iclog);  #endif  	if (! (XLOG_FORCED_SHUTDOWN(log))) { -		reg[0].i_addr = (void*)&magic; -		reg[0].i_len  = sizeof(magic); -		reg[0].i_type = XLOG_REG_TYPE_UNMOUNT; -  		error = xfs_log_reserve(mp, 600, 1, &tic,  					XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE);  		if (!error) { +			/* the data section must be 32 bit size aligned */ +			struct { +			    __uint16_t magic; +			    __uint16_t pad1; +			    __uint32_t pad2; /* may as well make it 64 bits */ +			} magic = { +				.magic = XLOG_UNMOUNT_TYPE, +			}; +			struct xfs_log_iovec reg = { +				.i_addr = (void *)&magic, +				.i_len = sizeof(magic), +				.i_type = XLOG_REG_TYPE_UNMOUNT, +			}; +			struct xfs_log_vec vec = { +				.lv_niovecs = 1, +				.lv_iovecp = ®, +			}; +  			/* remove inited flag */ -			((xlog_ticket_t *)tic)->t_flags = 0; -			error = xlog_write(mp, reg, 1, tic, &lsn, +			tic->t_flags = 0; +			error = xlog_write(log, &vec, tic, &lsn,  					   NULL, XLOG_UNMOUNT_TRANS);  			/*  			 * At this point, we're umounting anyway, @@ -648,10 +653,30 @@ xfs_log_unmount(xfs_mount_t *mp)  	xlog_dealloc_log(mp->m_log);  } +void +xfs_log_item_init( +	struct xfs_mount	*mp, +	struct xfs_log_item	*item, +	int			type, +	struct xfs_item_ops	*ops) +{ +	item->li_mountp = mp; +	item->li_ailp = mp->m_ail; +	item->li_type = type; +	item->li_ops = ops; +	item->li_lv = NULL; + +	INIT_LIST_HEAD(&item->li_ail); +	INIT_LIST_HEAD(&item->li_cil); +} +  /*   * Write region vectors to log.  The write happens using the space reservation   * of the ticket (tic).  It is not a requirement that all writes for a given - * transaction occur with one call to xfs_log_write(). + * transaction occur with one call to xfs_log_write(). However, it is important + * to note that the transaction reservation code makes an assumption about the + * number of log headers a transaction requires that may be violated if you + * don't pass all the transaction vectors in one call....   */  int  xfs_log_write( @@ -663,11 +688,15 @@ xfs_log_write(  {  	struct log		*log = mp->m_log;  	int			error; +	struct xfs_log_vec	vec = { +		.lv_niovecs = nentries, +		.lv_iovecp = reg, +	};  	if (XLOG_FORCED_SHUTDOWN(log))  		return XFS_ERROR(EIO); -	error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0); +	error = xlog_write(log, &vec, tic, start_lsn, NULL, 0);  	if (error)  		xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);  	return error; @@ -1020,6 +1049,7 @@ xlog_alloc_log(xfs_mount_t	*mp,  	int			i;  	int			iclogsize;  	int			error = ENOMEM; +	uint			log2_size = 0;  	log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL);  	if (!log) { @@ -1045,29 +1075,30 @@ xlog_alloc_log(xfs_mount_t	*mp,  	error = EFSCORRUPTED;  	if (xfs_sb_version_hassector(&mp->m_sb)) { -		log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT; -		if (log->l_sectbb_log < 0 || -		    log->l_sectbb_log > mp->m_sectbb_log) { -			xlog_warn("XFS: Log sector size (0x%x) out of range.", -						log->l_sectbb_log); +	        log2_size = mp->m_sb.sb_logsectlog; +		if (log2_size < BBSHIFT) { +			xlog_warn("XFS: Log sector size too small " +				"(0x%x < 0x%x)", log2_size, BBSHIFT);  			goto out_free_log;  		} -		/* for larger sector sizes, must have v2 or external log */ -		if (log->l_sectbb_log != 0 && -		    (log->l_logBBstart != 0 && -		     !xfs_sb_version_haslogv2(&mp->m_sb))) { -			xlog_warn("XFS: log sector size (0x%x) invalid " -				  "for configuration.", log->l_sectbb_log); +	        log2_size -= BBSHIFT; +		if (log2_size > mp->m_sectbb_log) { +			xlog_warn("XFS: Log sector size too large " +				"(0x%x > 0x%x)", log2_size, mp->m_sectbb_log);  			goto out_free_log;  		} -		if (mp->m_sb.sb_logsectlog < BBSHIFT) { -			xlog_warn("XFS: Log sector log (0x%x) too small.", -						mp->m_sb.sb_logsectlog); + +		/* for larger sector sizes, must have v2 or external log */ +		if (log2_size && log->l_logBBstart > 0 && +			    !xfs_sb_version_haslogv2(&mp->m_sb)) { + +			xlog_warn("XFS: log sector size (0x%x) invalid " +				  "for configuration.", log2_size);  			goto out_free_log;  		}  	} -	log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1; +	log->l_sectBBsize = 1 << log2_size;  	xlog_get_iclog_buffer_size(mp, log); @@ -1147,6 +1178,9 @@ xlog_alloc_log(xfs_mount_t	*mp,  	*iclogp = log->l_iclog;			/* complete ring */  	log->l_iclog->ic_prev = prev_iclog;	/* re-write 1st prev ptr */ +	error = xlog_cil_init(log); +	if (error) +		goto out_free_iclog;  	return log;  out_free_iclog: @@ -1174,26 +1208,31 @@ out:   * ticket.  Return the lsn of the commit record.   */  STATIC int -xlog_commit_record(xfs_mount_t  *mp, -		   xlog_ticket_t *ticket, -		   xlog_in_core_t **iclog, -		   xfs_lsn_t	*commitlsnp) +xlog_commit_record( +	struct log		*log, +	struct xlog_ticket	*ticket, +	struct xlog_in_core	**iclog, +	xfs_lsn_t		*commitlsnp)  { -	int		error; -	xfs_log_iovec_t	reg[1]; - -	reg[0].i_addr = NULL; -	reg[0].i_len = 0; -	reg[0].i_type = XLOG_REG_TYPE_COMMIT; +	struct xfs_mount *mp = log->l_mp; +	int	error; +	struct xfs_log_iovec reg = { +		.i_addr = NULL, +		.i_len = 0, +		.i_type = XLOG_REG_TYPE_COMMIT, +	}; +	struct xfs_log_vec vec = { +		.lv_niovecs = 1, +		.lv_iovecp = ®, +	};  	ASSERT_ALWAYS(iclog); -	if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, -			       iclog, XLOG_COMMIT_TRANS))) { +	error = xlog_write(log, &vec, ticket, commitlsnp, iclog, +					XLOG_COMMIT_TRANS); +	if (error)  		xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); -	}  	return error; -}	/* xlog_commit_record */ - +}  /*   * Push on the buffer cache code if we ever use more than 75% of the on-disk @@ -1468,6 +1507,8 @@ xlog_dealloc_log(xlog_t *log)  	xlog_in_core_t	*iclog, *next_iclog;  	int		i; +	xlog_cil_destroy(log); +  	iclog = log->l_iclog;  	for (i=0; i<log->l_iclog_bufs; i++) {  		sv_destroy(&iclog->ic_force_wait); @@ -1510,8 +1551,10 @@ xlog_state_finish_copy(xlog_t		*log,   * print out info relating to regions written which consume   * the reservation   */ -STATIC void -xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket) +void +xlog_print_tic_res( +	struct xfs_mount	*mp, +	struct xlog_ticket	*ticket)  {  	uint i;  	uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t); @@ -1611,6 +1654,196 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)  			    "bad-rtype" : res_type_str[r_type-1]),  			    ticket->t_res_arr[i].r_len);  	} + +	xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp, +		"xfs_log_write: reservation ran out. Need to up reservation"); +	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); +} + +/* + * Calculate the potential space needed by the log vector.  Each region gets + * its own xlog_op_header_t and may need to be double word aligned. + */ +static int +xlog_write_calc_vec_length( +	struct xlog_ticket	*ticket, +	struct xfs_log_vec	*log_vector) +{ +	struct xfs_log_vec	*lv; +	int			headers = 0; +	int			len = 0; +	int			i; + +	/* acct for start rec of xact */ +	if (ticket->t_flags & XLOG_TIC_INITED) +		headers++; + +	for (lv = log_vector; lv; lv = lv->lv_next) { +		headers += lv->lv_niovecs; + +		for (i = 0; i < lv->lv_niovecs; i++) { +			struct xfs_log_iovec	*vecp = &lv->lv_iovecp[i]; + +			len += vecp->i_len; +			xlog_tic_add_region(ticket, vecp->i_len, vecp->i_type); +		} +	} + +	ticket->t_res_num_ophdrs += headers; +	len += headers * sizeof(struct xlog_op_header); + +	return len; +} + +/* + * If first write for transaction, insert start record  We can't be trying to + * commit if we are inited.  We can't have any "partial_copy" if we are inited. + */ +static int +xlog_write_start_rec( +	struct xlog_op_header	*ophdr, +	struct xlog_ticket	*ticket) +{ +	if (!(ticket->t_flags & XLOG_TIC_INITED)) +		return 0; + +	ophdr->oh_tid	= cpu_to_be32(ticket->t_tid); +	ophdr->oh_clientid = ticket->t_clientid; +	ophdr->oh_len = 0; +	ophdr->oh_flags = XLOG_START_TRANS; +	ophdr->oh_res2 = 0; + +	ticket->t_flags &= ~XLOG_TIC_INITED; + +	return sizeof(struct xlog_op_header); +} + +static xlog_op_header_t * +xlog_write_setup_ophdr( +	struct log		*log, +	struct xlog_op_header	*ophdr, +	struct xlog_ticket	*ticket, +	uint			flags) +{ +	ophdr->oh_tid = cpu_to_be32(ticket->t_tid); +	ophdr->oh_clientid = ticket->t_clientid; +	ophdr->oh_res2 = 0; + +	/* are we copying a commit or unmount record? */ +	ophdr->oh_flags = flags; + +	/* +	 * We've seen logs corrupted with bad transaction client ids.  This +	 * makes sure that XFS doesn't generate them on.  Turn this into an EIO +	 * and shut down the filesystem. +	 */ +	switch (ophdr->oh_clientid)  { +	case XFS_TRANSACTION: +	case XFS_VOLUME: +	case XFS_LOG: +		break; +	default: +		xfs_fs_cmn_err(CE_WARN, log->l_mp, +			"Bad XFS transaction clientid 0x%x in ticket 0x%p", +			ophdr->oh_clientid, ticket); +		return NULL; +	} + +	return ophdr; +} + +/* + * Set up the parameters of the region copy into the log. This has + * to handle region write split across multiple log buffers - this + * state is kept external to this function so that this code can + * can be written in an obvious, self documenting manner. + */ +static int +xlog_write_setup_copy( +	struct xlog_ticket	*ticket, +	struct xlog_op_header	*ophdr, +	int			space_available, +	int			space_required, +	int			*copy_off, +	int			*copy_len, +	int			*last_was_partial_copy, +	int			*bytes_consumed) +{ +	int			still_to_copy; + +	still_to_copy = space_required - *bytes_consumed; +	*copy_off = *bytes_consumed; + +	if (still_to_copy <= space_available) { +		/* write of region completes here */ +		*copy_len = still_to_copy; +		ophdr->oh_len = cpu_to_be32(*copy_len); +		if (*last_was_partial_copy) +			ophdr->oh_flags |= (XLOG_END_TRANS|XLOG_WAS_CONT_TRANS); +		*last_was_partial_copy = 0; +		*bytes_consumed = 0; +		return 0; +	} + +	/* partial write of region, needs extra log op header reservation */ +	*copy_len = space_available; +	ophdr->oh_len = cpu_to_be32(*copy_len); +	ophdr->oh_flags |= XLOG_CONTINUE_TRANS; +	if (*last_was_partial_copy) +		ophdr->oh_flags |= XLOG_WAS_CONT_TRANS; +	*bytes_consumed += *copy_len; +	(*last_was_partial_copy)++; + +	/* account for new log op header */ +	ticket->t_curr_res -= sizeof(struct xlog_op_header); +	ticket->t_res_num_ophdrs++; + +	return sizeof(struct xlog_op_header); +} + +static int +xlog_write_copy_finish( +	struct log		*log, +	struct xlog_in_core	*iclog, +	uint			flags, +	int			*record_cnt, +	int			*data_cnt, +	int			*partial_copy, +	int			*partial_copy_len, +	int			log_offset, +	struct xlog_in_core	**commit_iclog) +{ +	if (*partial_copy) { +		/* +		 * This iclog has already been marked WANT_SYNC by +		 * xlog_state_get_iclog_space. +		 */ +		xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt); +		*record_cnt = 0; +		*data_cnt = 0; +		return xlog_state_release_iclog(log, iclog); +	} + +	*partial_copy = 0; +	*partial_copy_len = 0; + +	if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) { +		/* no more space in this iclog - push it. */ +		xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt); +		*record_cnt = 0; +		*data_cnt = 0; + +		spin_lock(&log->l_icloglock); +		xlog_state_want_sync(log, iclog); +		spin_unlock(&log->l_icloglock); + +		if (!commit_iclog) +			return xlog_state_release_iclog(log, iclog); +		ASSERT(flags & XLOG_COMMIT_TRANS); +		*commit_iclog = iclog; +	} + +	return 0;  }  /* @@ -1653,211 +1886,163 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)   *	we don't update ic_offset until the end when we know exactly how many   *	bytes have been written out.   */ -STATIC int +int  xlog_write( -	struct xfs_mount	*mp, -	struct xfs_log_iovec	reg[], -	int			nentries, +	struct log		*log, +	struct xfs_log_vec	*log_vector,  	struct xlog_ticket	*ticket,  	xfs_lsn_t		*start_lsn,  	struct xlog_in_core	**commit_iclog,  	uint			flags)  { -    xlog_t	     *log = mp->m_log; -    xlog_in_core_t   *iclog = NULL;  /* ptr to current in-core log */ -    xlog_op_header_t *logop_head;    /* ptr to log operation header */ -    __psint_t	     ptr;	     /* copy address into data region */ -    int		     len;	     /* # xlog_write() bytes 2 still copy */ -    int		     index;	     /* region index currently copying */ -    int		     log_offset;     /* offset (from 0) into data region */ -    int		     start_rec_copy; /* # bytes to copy for start record */ -    int		     partial_copy;   /* did we split a region? */ -    int		     partial_copy_len;/* # bytes copied if split region */ -    int		     need_copy;	     /* # bytes need to memcpy this region */ -    int		     copy_len;	     /* # bytes actually memcpy'ing */ -    int		     copy_off;	     /* # bytes from entry start */ -    int		     contwr;	     /* continued write of in-core log? */ -    int		     error; -    int		     record_cnt = 0, data_cnt = 0; - -    partial_copy_len = partial_copy = 0; - -    /* Calculate potential maximum space.  Each region gets its own -     * xlog_op_header_t and may need to be double word aligned. -     */ -    len = 0; -    if (ticket->t_flags & XLOG_TIC_INITED) {    /* acct for start rec of xact */ -	len += sizeof(xlog_op_header_t); -	ticket->t_res_num_ophdrs++; -    } - -    for (index = 0; index < nentries; index++) { -	len += sizeof(xlog_op_header_t);	    /* each region gets >= 1 */ -	ticket->t_res_num_ophdrs++; -	len += reg[index].i_len; -	xlog_tic_add_region(ticket, reg[index].i_len, reg[index].i_type); -    } -    contwr = *start_lsn = 0; +	struct xlog_in_core	*iclog = NULL; +	struct xfs_log_iovec	*vecp; +	struct xfs_log_vec	*lv; +	int			len; +	int			index; +	int			partial_copy = 0; +	int			partial_copy_len = 0; +	int			contwr = 0; +	int			record_cnt = 0; +	int			data_cnt = 0; +	int			error; -    if (ticket->t_curr_res < len) { -	xlog_print_tic_res(mp, ticket); -#ifdef DEBUG -	xlog_panic( -		"xfs_log_write: reservation ran out. Need to up reservation"); -#else -	/* Customer configurable panic */ -	xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp, -		"xfs_log_write: reservation ran out. Need to up reservation"); -	/* If we did not panic, shutdown the filesystem */ -	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); -#endif -    } else -	ticket->t_curr_res -= len; +	*start_lsn = 0; -    for (index = 0; index < nentries; ) { -	if ((error = xlog_state_get_iclog_space(log, len, &iclog, ticket, -					       &contwr, &log_offset))) -		return error; +	len = xlog_write_calc_vec_length(ticket, log_vector); +	if (log->l_cilp) { +		/* +		 * Region headers and bytes are already accounted for. +		 * We only need to take into account start records and +		 * split regions in this function. +		 */ +		if (ticket->t_flags & XLOG_TIC_INITED) +			ticket->t_curr_res -= sizeof(xlog_op_header_t); -	ASSERT(log_offset <= iclog->ic_size - 1); -	ptr = (__psint_t) ((char *)iclog->ic_datap+log_offset); +		/* +		 * Commit record headers need to be accounted for. These +		 * come in as separate writes so are easy to detect. +		 */ +		if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)) +			ticket->t_curr_res -= sizeof(xlog_op_header_t); +	} else +		ticket->t_curr_res -= len; + +	if (ticket->t_curr_res < 0) +		xlog_print_tic_res(log->l_mp, ticket); + +	index = 0; +	lv = log_vector; +	vecp = lv->lv_iovecp; +	while (lv && index < lv->lv_niovecs) { +		void		*ptr; +		int		log_offset; + +		error = xlog_state_get_iclog_space(log, len, &iclog, ticket, +						   &contwr, &log_offset); +		if (error) +			return error; -	/* start_lsn is the first lsn written to. That's all we need. */ -	if (! *start_lsn) -	    *start_lsn = be64_to_cpu(iclog->ic_header.h_lsn); +		ASSERT(log_offset <= iclog->ic_size - 1); +		ptr = iclog->ic_datap + log_offset; -	/* This loop writes out as many regions as can fit in the amount -	 * of space which was allocated by xlog_state_get_iclog_space(). -	 */ -	while (index < nentries) { -	    ASSERT(reg[index].i_len % sizeof(__int32_t) == 0); -	    ASSERT((__psint_t)ptr % sizeof(__int32_t) == 0); -	    start_rec_copy = 0; - -	    /* If first write for transaction, insert start record. -	     * We can't be trying to commit if we are inited.  We can't -	     * have any "partial_copy" if we are inited. -	     */ -	    if (ticket->t_flags & XLOG_TIC_INITED) { -		logop_head		= (xlog_op_header_t *)ptr; -		logop_head->oh_tid	= cpu_to_be32(ticket->t_tid); -		logop_head->oh_clientid = ticket->t_clientid; -		logop_head->oh_len	= 0; -		logop_head->oh_flags    = XLOG_START_TRANS; -		logop_head->oh_res2	= 0; -		ticket->t_flags		&= ~XLOG_TIC_INITED;	/* clear bit */ -		record_cnt++; - -		start_rec_copy = sizeof(xlog_op_header_t); -		xlog_write_adv_cnt(ptr, len, log_offset, start_rec_copy); -	    } +		/* start_lsn is the first lsn written to. That's all we need. */ +		if (!*start_lsn) +			*start_lsn = be64_to_cpu(iclog->ic_header.h_lsn); -	    /* Copy log operation header directly into data section */ -	    logop_head			= (xlog_op_header_t *)ptr; -	    logop_head->oh_tid		= cpu_to_be32(ticket->t_tid); -	    logop_head->oh_clientid	= ticket->t_clientid; -	    logop_head->oh_res2		= 0; +		/* +		 * This loop writes out as many regions as can fit in the amount +		 * of space which was allocated by xlog_state_get_iclog_space(). +		 */ +		while (lv && index < lv->lv_niovecs) { +			struct xfs_log_iovec	*reg = &vecp[index]; +			struct xlog_op_header	*ophdr; +			int			start_rec_copy; +			int			copy_len; +			int			copy_off; + +			ASSERT(reg->i_len % sizeof(__int32_t) == 0); +			ASSERT((unsigned long)ptr % sizeof(__int32_t) == 0); + +			start_rec_copy = xlog_write_start_rec(ptr, ticket); +			if (start_rec_copy) { +				record_cnt++; +				xlog_write_adv_cnt(&ptr, &len, &log_offset, +						   start_rec_copy); +			} -	    /* header copied directly */ -	    xlog_write_adv_cnt(ptr, len, log_offset, sizeof(xlog_op_header_t)); +			ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags); +			if (!ophdr) +				return XFS_ERROR(EIO); -	    /* are we copying a commit or unmount record? */ -	    logop_head->oh_flags = flags; +			xlog_write_adv_cnt(&ptr, &len, &log_offset, +					   sizeof(struct xlog_op_header)); + +			len += xlog_write_setup_copy(ticket, ophdr, +						     iclog->ic_size-log_offset, +						     reg->i_len, +						     ©_off, ©_len, +						     &partial_copy, +						     &partial_copy_len); +			xlog_verify_dest_ptr(log, ptr); + +			/* copy region */ +			ASSERT(copy_len >= 0); +			memcpy(ptr, reg->i_addr + copy_off, copy_len); +			xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len); + +			copy_len += start_rec_copy + sizeof(xlog_op_header_t); +			record_cnt++; +			data_cnt += contwr ? copy_len : 0; + +			error = xlog_write_copy_finish(log, iclog, flags, +						       &record_cnt, &data_cnt, +						       &partial_copy, +						       &partial_copy_len, +						       log_offset, +						       commit_iclog); +			if (error) +				return error; -	    /* -	     * We've seen logs corrupted with bad transaction client -	     * ids.  This makes sure that XFS doesn't generate them on. -	     * Turn this into an EIO and shut down the filesystem. -	     */ -	    switch (logop_head->oh_clientid)  { -	    case XFS_TRANSACTION: -	    case XFS_VOLUME: -	    case XFS_LOG: -		break; -	    default: -		xfs_fs_cmn_err(CE_WARN, mp, -		    "Bad XFS transaction clientid 0x%x in ticket 0x%p", -		    logop_head->oh_clientid, ticket); -		return XFS_ERROR(EIO); -	    } +			/* +			 * if we had a partial copy, we need to get more iclog +			 * space but we don't want to increment the region +			 * index because there is still more is this region to +			 * write. +			 * +			 * If we completed writing this region, and we flushed +			 * the iclog (indicated by resetting of the record +			 * count), then we also need to get more log space. If +			 * this was the last record, though, we are done and +			 * can just return. +			 */ +			if (partial_copy) +				break; -	    /* Partial write last time? => (partial_copy != 0) -	     * need_copy is the amount we'd like to copy if everything could -	     * fit in the current memcpy. -	     */ -	    need_copy =	reg[index].i_len - partial_copy_len; - -	    copy_off = partial_copy_len; -	    if (need_copy <= iclog->ic_size - log_offset) { /*complete write */ -	        copy_len = need_copy; -		logop_head->oh_len = cpu_to_be32(copy_len); -		if (partial_copy) -		    logop_head->oh_flags|= (XLOG_END_TRANS|XLOG_WAS_CONT_TRANS); -		partial_copy_len = partial_copy = 0; -	    } else {					    /* partial write */ -		copy_len = iclog->ic_size - log_offset; -		logop_head->oh_len = cpu_to_be32(copy_len); -		logop_head->oh_flags |= XLOG_CONTINUE_TRANS; -		if (partial_copy) -			logop_head->oh_flags |= XLOG_WAS_CONT_TRANS; -		partial_copy_len += copy_len; -		partial_copy++; -		len += sizeof(xlog_op_header_t); /* from splitting of region */ -		/* account for new log op header */ -		ticket->t_curr_res -= sizeof(xlog_op_header_t); -		ticket->t_res_num_ophdrs++; -	    } -	    xlog_verify_dest_ptr(log, ptr); - -	    /* copy region */ -	    ASSERT(copy_len >= 0); -	    memcpy((xfs_caddr_t)ptr, reg[index].i_addr + copy_off, copy_len); -	    xlog_write_adv_cnt(ptr, len, log_offset, copy_len); - -	    /* make copy_len total bytes copied, including headers */ -	    copy_len += start_rec_copy + sizeof(xlog_op_header_t); -	    record_cnt++; -	    data_cnt += contwr ? copy_len : 0; -	    if (partial_copy) {			/* copied partial region */ -		    /* already marked WANT_SYNC by xlog_state_get_iclog_space */ -		    xlog_state_finish_copy(log, iclog, record_cnt, data_cnt); -		    record_cnt = data_cnt = 0; -		    if ((error = xlog_state_release_iclog(log, iclog))) -			    return error; -		    break;			/* don't increment index */ -	    } else {				/* copied entire region */ -		index++; -		partial_copy_len = partial_copy = 0; - -		if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) { -		    xlog_state_finish_copy(log, iclog, record_cnt, data_cnt); -		    record_cnt = data_cnt = 0; -		    spin_lock(&log->l_icloglock); -		    xlog_state_want_sync(log, iclog); -		    spin_unlock(&log->l_icloglock); -		    if (commit_iclog) { -			ASSERT(flags & XLOG_COMMIT_TRANS); -			*commit_iclog = iclog; -		    } else if ((error = xlog_state_release_iclog(log, iclog))) -			   return error; -		    if (index == nentries) -			    return 0;		/* we are done */ -		    else -			    break; +			if (++index == lv->lv_niovecs) { +				lv = lv->lv_next; +				index = 0; +				if (lv) +					vecp = lv->lv_iovecp; +			} +			if (record_cnt == 0) { +				if (!lv) +					return 0; +				break; +			}  		} -	    } /* if (partial_copy) */ -	} /* while (index < nentries) */ -    } /* for (index = 0; index < nentries; ) */ -    ASSERT(len == 0); +	} + +	ASSERT(len == 0); + +	xlog_state_finish_copy(log, iclog, record_cnt, data_cnt); +	if (!commit_iclog) +		return xlog_state_release_iclog(log, iclog); -    xlog_state_finish_copy(log, iclog, record_cnt, data_cnt); -    if (commit_iclog) {  	ASSERT(flags & XLOG_COMMIT_TRANS);  	*commit_iclog = iclog;  	return 0; -    } -    return xlog_state_release_iclog(log, iclog); -}	/* xlog_write */ +}  /***************************************************************************** @@ -2840,6 +3025,8 @@ _xfs_log_force(  	XFS_STATS_INC(xs_log_force); +	xlog_cil_push(log, 1); +  	spin_lock(&log->l_icloglock);  	iclog = log->l_iclog; @@ -2989,6 +3176,12 @@ _xfs_log_force_lsn(  	XFS_STATS_INC(xs_log_force); +	if (log->l_cilp) { +		lsn = xlog_cil_push_lsn(log, lsn); +		if (lsn == NULLCOMMITLSN) +			return 0; +	} +  try_again:  	spin_lock(&log->l_icloglock);  	iclog = log->l_iclog; @@ -3153,20 +3346,30 @@ xfs_log_ticket_get(  	return ticket;  } +xlog_tid_t +xfs_log_get_trans_ident( +	struct xfs_trans	*tp) +{ +	return tp->t_ticket->t_tid; +} +  /*   * Allocate and initialise a new log ticket.   */ -STATIC xlog_ticket_t * -xlog_ticket_alloc(xlog_t		*log, -		int		unit_bytes, -		int		cnt, -		char		client, -		uint		xflags) +xlog_ticket_t * +xlog_ticket_alloc( +	struct log	*log, +	int		unit_bytes, +	int		cnt, +	char		client, +	uint		xflags, +	int		alloc_flags)  { -	xlog_ticket_t	*tic; +	struct xlog_ticket *tic;  	uint		num_headers; +	int		iclog_space; -	tic = kmem_zone_zalloc(xfs_log_ticket_zone, KM_SLEEP|KM_MAYFAIL); +	tic = kmem_zone_zalloc(xfs_log_ticket_zone, alloc_flags);  	if (!tic)  		return NULL; @@ -3208,16 +3411,40 @@ xlog_ticket_alloc(xlog_t		*log,  	/* for start-rec */  	unit_bytes += sizeof(xlog_op_header_t); -	/* for LR headers */ -	num_headers = ((unit_bytes + log->l_iclog_size-1) >> log->l_iclog_size_log); +	/* +	 * for LR headers - the space for data in an iclog is the size minus +	 * the space used for the headers. If we use the iclog size, then we +	 * undercalculate the number of headers required. +	 * +	 * Furthermore - the addition of op headers for split-recs might +	 * increase the space required enough to require more log and op +	 * headers, so take that into account too. +	 * +	 * IMPORTANT: This reservation makes the assumption that if this +	 * transaction is the first in an iclog and hence has the LR headers +	 * accounted to it, then the remaining space in the iclog is +	 * exclusively for this transaction.  i.e. if the transaction is larger +	 * than the iclog, it will be the only thing in that iclog. +	 * Fundamentally, this means we must pass the entire log vector to +	 * xlog_write to guarantee this. +	 */ +	iclog_space = log->l_iclog_size - log->l_iclog_hsize; +	num_headers = howmany(unit_bytes, iclog_space); + +	/* for split-recs - ophdrs added when data split over LRs */ +	unit_bytes += sizeof(xlog_op_header_t) * num_headers; + +	/* add extra header reservations if we overrun */ +	while (!num_headers || +	       howmany(unit_bytes, iclog_space) > num_headers) { +		unit_bytes += sizeof(xlog_op_header_t); +		num_headers++; +	}  	unit_bytes += log->l_iclog_hsize * num_headers;  	/* for commit-rec LR header - note: padding will subsume the ophdr */  	unit_bytes += log->l_iclog_hsize; -	/* for split-recs - ophdrs added when data split over LRs */ -	unit_bytes += sizeof(xlog_op_header_t) * num_headers; -  	/* for roundoff padding for transaction data and one for commit record */  	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&  	    log->l_mp->m_sb.sb_logsunit > 1) { @@ -3233,13 +3460,13 @@ xlog_ticket_alloc(xlog_t		*log,  	tic->t_curr_res		= unit_bytes;  	tic->t_cnt		= cnt;  	tic->t_ocnt		= cnt; -	tic->t_tid		= (xlog_tid_t)((__psint_t)tic & 0xffffffff); +	tic->t_tid		= random32();  	tic->t_clientid		= client;  	tic->t_flags		= XLOG_TIC_INITED;  	tic->t_trans_type	= 0;  	if (xflags & XFS_LOG_PERM_RESERV)  		tic->t_flags |= XLOG_TIC_PERM_RESERV; -	sv_init(&(tic->t_wait), SV_DEFAULT, "logtick"); +	sv_init(&tic->t_wait, SV_DEFAULT, "logtick");  	xlog_tic_reset_res(tic); @@ -3260,20 +3487,22 @@ xlog_ticket_alloc(xlog_t		*log,   * part of the log in case we trash the log structure.   */  void -xlog_verify_dest_ptr(xlog_t     *log, -		     __psint_t  ptr) +xlog_verify_dest_ptr( +	struct log	*log, +	char		*ptr)  {  	int i;  	int good_ptr = 0; -	for (i=0; i < log->l_iclog_bufs; i++) { -		if (ptr >= (__psint_t)log->l_iclog_bak[i] && -		    ptr <= (__psint_t)log->l_iclog_bak[i]+log->l_iclog_size) +	for (i = 0; i < log->l_iclog_bufs; i++) { +		if (ptr >= log->l_iclog_bak[i] && +		    ptr <= log->l_iclog_bak[i] + log->l_iclog_size)  			good_ptr++;  	} -	if (! good_ptr) + +	if (!good_ptr)  		xlog_panic("xlog_verify_dest_ptr: invalid ptr"); -}	/* xlog_verify_dest_ptr */ +}  STATIC void  xlog_verify_grant_head(xlog_t *log, int equals) @@ -3459,6 +3688,11 @@ xlog_state_ioerror(   *	c. nothing new gets queued up after (a) and (b) are done.   *	d. if !logerror, flush the iclogs to disk, then seal them off   *	   for business. + * + * Note: for delayed logging the !logerror case needs to flush the regions + * held in memory out to the iclogs before flushing them to disk. This needs + * to be done before the log is marked as shutdown, otherwise the flush to the + * iclogs will fail.   */  int  xfs_log_force_umount( @@ -3492,6 +3726,16 @@ xfs_log_force_umount(  		return 1;  	}  	retval = 0; + +	/* +	 * Flush the in memory commit item list before marking the log as +	 * being shut down. We need to do it in this order to ensure all the +	 * completed transactions are flushed to disk with the xfs_log_force() +	 * call below. +	 */ +	if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG)) +		xlog_cil_push(log, 1); +  	/*  	 * We must hold both the GRANT lock and the LOG lock,  	 * before we mark the filesystem SHUTDOWN and wake | 
