/*
 * btree.c - NILFS B-tree operations.
 *
 * Copyright (C) 2005 Nippon Telegraph and Telephone Corporation.
 *
 * This file is part of NILFS.
 *
 * NILFS is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * NILFS is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with NILFS; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *
 * btree.c,v 1.12 2006/06/27 05:48:55 koji Exp
 *
 * Written by Koji Sato <koji@osrg.net>
 */

#include <linux/slab.h>
#include <linux/string.h>
#include "nilfs.h"
#include "nilfs_btree.h"
#include "bbt.h"
#include "ibt.h"

/* cache for nilfs_btree_path */
static kmem_cache_t *nilfs_btree_path_cache;

/* B-tree-type-specific initialization functions */
static int (*nilfs_btree_init_tab[])(struct nilfs_btree *, dbn_t, void *) = {
	nilfs_bbt_init,	/* block mapping B-tree */
	nilfs_ibt_init,	/* inode B-tree */
};

/* the number of B-tree types */
#define NILFS_BTREE_NTYPES	(sizeof(nilfs_btree_init_tab) / sizeof(nilfs_btree_init_tab[0]))

inline static struct buffer_head *
nilfs_btree_get_node(struct nilfs_btree *btree, nilfs_btree_ptr_t ptr)
{
	return (((ptr) != NILFS_BTREE_INVALID_PTR) ?
		(NILFS_BTREE_PTR_IS_BH(ptr) ?
		 NILFS_BTREE_PTR_TO_BH(ptr) :
		 NILFS_BTREE_BOP_GET_NODE(btree, NILFS_BTREE_PTR_TO_DBN(ptr))) :
		NULL);
}

inline static void
nilfs_btree_put_node(struct nilfs_btree *btree, struct buffer_head *bh)
{
	if (!buffer_nilfs_new_node(bh))
		NILFS_BTREE_BOP_PUT_NODE(btree, bh);
}

/**
 * nilfs_btree_path_cache_init - create a &struct nilfs_btree_path cache
 * @void: no arguments
 *
 * Description: nilfs_btree_path_cache_init() creates a cache which stores
 * objects of type &struct nilfs_btree_path. This function is invoked when the
 * NILFS module is loaded.
 *
 * Return Value: On success, 0 is returned. On error, a following negative
 * error code is returned.
 *
 * %-NILFS_BTREE_ENOMEM - Insufficient amount of memory available.
 */
int __init nilfs_btree_path_cache_init(void)
{
	nilfs_btree_path_cache = kmem_cache_create("nilfs_btree_path_cache",
					sizeof(struct nilfs_btree_path),
					0, 0, NULL, NULL);
	return (nilfs_btree_path_cache != NULL) ? 0 : -NILFS_BTREE_ENOMEM;
}

/**
 * nilfs_btree_path_cache_destroy - destroy a &struct nilfs_btree_path cache
 * @void: no arguments
 *
 * Description: nilfs_btree_path_cache_destroy() destroys the
 * &struct nilfs_btree_path cache. This function is invoked when the NILFS
 * module is unloaded.
 */
void nilfs_btree_path_cache_destroy(void)
{
	kmem_cache_destroy(nilfs_btree_path_cache);
}

static struct nilfs_btree_path *
nilfs_btree_init_path(struct nilfs_btree *btree,
		      struct nilfs_btree_path *path)
{
	int i;

	path->bp_nlevels = 0;
	for (i = 0; i < NILFS_BTREE_MAX_LEVELS; i++) {
		path->bp_bh[i] = NULL;
		path->bp_index[i] = 0;
	}
	return path;
}

static void nilfs_btree_clear_path(struct nilfs_btree *btree,
				   struct nilfs_btree_path *path)
{
	int i;

	for (i = 0; i < NILFS_BTREE_MAX_LEVELS; i++) {
		if (path->bp_bh[i] != NULL)
			nilfs_btree_put_node(btree, path->bp_bh[i]);
		path->bp_bh[i] = NULL;
		path->bp_index[i] = 0;
	}
	path->bp_nlevels = 0;
}

static struct nilfs_btree_path *
nilfs_btree_alloc_path(struct nilfs_btree *btree)
{
	struct nilfs_btree_path *path;

	if ((path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS)) == NULL)
		return NULL;
	nilfs_btree_init_path(btree, path);
	return path;
}

static void nilfs_btree_free_path(struct nilfs_btree *btree,
				  struct nilfs_btree_path *path)
{
	nilfs_btree_clear_path(btree, path);
	kmem_cache_free(nilfs_btree_path_cache, path);
}

static int nilfs_btree_path_next(struct nilfs_btree *btree,
				 struct nilfs_btree_path *path,
				 int level)
{
	struct buffer_head *bh;
	struct nilfs_btree_node *node;
	nilfs_btree_dptr_t *dptrs;
	nilfs_btree_ptr_t ptr;
	int nchildren, l;

	bh = path->bp_bh[level];
	node = NILFS_BTREE_BH_TO_NODE(bh);
	nchildren = le16_to_cpu(node->bn_nchildren);
	if (++path->bp_index[level] < nchildren)
		return 0;

	for (l = level + 1; l < path->bp_nlevels; l++) {
		bh = path->bp_bh[l];
		node = NILFS_BTREE_BH_TO_NODE(bh);
		nchildren = le16_to_cpu(node->bn_nchildren);
		if (path->bp_index[l] + 1 < nchildren)
			break;
	}
	if (l == path->bp_nlevels)
		return -NILFS_BTREE_ENOSIB;

	dptrs = NILFS_BTREE_NODE_DPTRS(node, btree->bt_blksize);
	path->bp_index[l]++;
	ptr = le64_to_cpu(dptrs[path->bp_index[l]]);
	for (l--; l >= level; l--) {
		if ((bh = nilfs_btree_get_node(btree, ptr)) == NULL)
			return -NILFS_BTREE_EIO;
		if (path->bp_bh[l] != NULL)
			nilfs_btree_put_node(btree, path->bp_bh[l]);
		path->bp_bh[l] = bh;
		path->bp_index[l] = 0;
		node = NILFS_BTREE_BH_TO_NODE(bh);
		dptrs = NILFS_BTREE_NODE_DPTRS(node, btree->bt_blksize);
		ptr = le64_to_cpu(dptrs[0]);
	}
	return 0;
}

inline static int nilfs_btree_dirty(struct nilfs_btree *btree)
{
	return !!(btree->bt_state & (1 << NILFS_BTREE_DIRTY));
}

inline static void nilfs_btree_mark_dirty(struct nilfs_btree *btree)
{
	btree->bt_state |= 1 << NILFS_BTREE_DIRTY;
}

inline static void nilfs_btree_clear_dirty(struct nilfs_btree *btree)
{
	btree->bt_state &= ~(1 << NILFS_BTREE_DIRTY);
}

int nilfs_btree_test_and_clear_dirty(struct nilfs_btree *btree)
{
	int ret;

	down_write(&btree->bt_sem);
	ret = nilfs_btree_dirty(btree);
	nilfs_btree_clear_dirty(btree);
	up_write(&btree->bt_sem);
	return ret;
}

/**
 * nilfs_btree_init - initialize a B-tree
 * @btree: B-tree
 * @type: B-tree type
 * @dbn: Disk block number of B-tree root
 * @data: B-tree-type-specific data
 *
 * Description: nilfs_btree_init() initializes the B-tree data structure
 * specified by @btree.
 *
 * Return Value: On success, 0 is returned. On error, one of the following
 * negative error codes is returned.
 *
 * %-NILFS_BTREE_EINVAL - Unknown B-tree type.
 *
 * %-NILFS_BTREE_ENOMEM - Insufficient amount of memory available.
 */
int nilfs_btree_init(struct nilfs_btree *btree,
		     int type,
		     dbn_t dbn,
		     void *data)
{
	if ((type < 0) || (type >= NILFS_BTREE_NTYPES))
		return -NILFS_BTREE_EINVAL;
	btree->bt_state = 0;
	init_rwsem(&btree->bt_sem);
	btree->bt_root = NILFS_BTREE_DBN_TO_PTR(dbn);
	btree->bt_owner = data;
	btree->bt_dirty_buffers = NULL;
	return (*nilfs_btree_init_tab[type])(btree, dbn, data);
}

/**
 * nilfs_btree_path_get_left_sibling - find a left sibling
 * @btree: B-tree
 * @path: path
 * @level: level
 *
 * Description: nilfs_btree_path_get_left_sibling() finds the left sibling of
 * a node on the path @path at level @level.
 *
 * Return Value: A pointer to the left sibling is returned if
 * exist. Otherwise, NILFS_BTREE_INVALID_PTR is returned.
 */
static nilfs_btree_ptr_t
nilfs_btree_path_get_left_sibling(struct nilfs_btree *btree,
				  struct nilfs_btree_path *path,
				  int level)
{
	struct buffer_head *pbh;
	struct nilfs_btree_node *parent;
	nilfs_btree_dptr_t *pdptrs;
	int plevel, pindex;

	if (level == path->bp_nlevels - 1)
		return NILFS_BTREE_INVALID_PTR;
	plevel = level + 1;
	pindex = path->bp_index[plevel];
	if (--pindex >= 0) {
		pbh = path->bp_bh[plevel];
		parent = NILFS_BTREE_BH_TO_NODE(pbh);
		pdptrs = NILFS_BTREE_NODE_DPTRS(parent, btree->bt_blksize);
		return le64_to_cpu(pdptrs[pindex]);
	} else
		return NILFS_BTREE_INVALID_PTR;
}

/**
 * nilfs_btree_path_get_right_sibling - find a right sibling
 * @btree: B-tree
 * @path: path
 * @level: level
 *
 * Description: nilfs_btree_path_get_right_sibling() finds the right sibling
 * of a node on the path @path at level @level.
 *
 * Return Value: A pointer to the right sibling is returned if
 * exist. Otherwise, NILFS_BTREE_INVALID_PTR is returned.
 */
static nilfs_btree_ptr_t
nilfs_btree_path_get_right_sibling(struct nilfs_btree *btree,
				   struct nilfs_btree_path *path,
				   int level)
{
	struct buffer_head *pbh;
	struct nilfs_btree_node *parent;
	nilfs_btree_dptr_t *pdptrs;
	int plevel, pindex;

	if (level == path->bp_nlevels - 1)
		return NILFS_BTREE_INVALID_PTR;
	plevel = level + 1;
	pbh = path->bp_bh[plevel];
	parent = NILFS_BTREE_BH_TO_NODE(pbh);
	pindex = path->bp_index[plevel];
	if (++pindex < le16_to_cpu(parent->bn_nchildren)) {
		pdptrs = NILFS_BTREE_NODE_DPTRS(parent, btree->bt_blksize);
		return le64_to_cpu(pdptrs[pindex]);
	} else
		return NILFS_BTREE_INVALID_PTR;
}

/**
 * nilfs_btree_lookup_path - find a path
 * @btree: B-tree
 * @path: path
 * @key: key
 * @ptrp: a pointer to a pointer
 * @minlevel: level to which the lookup operation is performed
 *
 * Description: nilfs_btree_lookup_path() finds a path to a node that contains
 * the key @key at the level @minlevel. Nodes which the lookup operation
 * visited are stored in @path.
 *
 * Return Value: On success, 0 is returned, and a pointer associated with @key
 * is stored in the place pointed by @ptrp. On error, one of the following
 * negative error codes is returned.
 *
 * %-NILFS_BTREE_EIO - I/O error.
 *
 * %-NILFS_BTREE_ENOKEY - @key does not exist.
 */
static int nilfs_btree_lookup_path(struct nilfs_btree *btree,
				   struct nilfs_btree_path *path,
				   nilfs_btree_key_t key,
				   nilfs_btree_ptr_t *ptrp,
				   int minlevel)
{
	struct buffer_head *bh;
	struct nilfs_btree_node *node;
	nilfs_btree_dkey_t *dkeys;
	nilfs_btree_dptr_t *dptrs;
	nilfs_btree_ptr_t ptr;
	nilfs_btree_keydiff_t diff;
	int level, index;
	int low, high;

	if ((ptr = btree->bt_root) == NILFS_BTREE_INVALID_PTR)
		return -NILFS_BTREE_ENOKEY;

	if ((bh = nilfs_btree_get_node(btree, ptr)) == NULL)
		return -NILFS_BTREE_EIO;

	node = NILFS_BTREE_BH_TO_NODE(bh);
	level = le16_to_cpu(node->bn_level);
	path->bp_nlevels = level + 1;
	nilfs_btree_put_node(btree, bh);

	index = 0;
	diff = 1;
	for (; level >= minlevel; level--) {
		if ((bh = nilfs_btree_get_node(btree, ptr)) == NULL)
			return -NILFS_BTREE_EIO;
		node = NILFS_BTREE_BH_TO_NODE(bh);
		BUG_ON(level != le16_to_cpu(node->bn_level));
		if (path->bp_bh[level] != NULL)
			nilfs_btree_put_node(btree, path->bp_bh[level]);
		path->bp_bh[level] = bh;

		if (diff != 0) {
			/* binary search */
			low = 0;
			high = le16_to_cpu(node->bn_nchildren) - 1;
			while (low <= high) {
				index = (low + high) / 2;
				dkeys = NILFS_BTREE_NODE_DKEYS(node, btree->bt_blksize);
				diff = le64_to_cpu(dkeys[index]) - key;
				if (diff == 0)
					break;
				else if (diff < 0)
					low = index + 1;
				else
					high = index - 1;
			}
		} else
			index = 0;

		/* adjust index */
		if (level > 0) {
			if ((diff > 0) && (index > 0))
				index--;
			dptrs = NILFS_BTREE_NODE_DPTRS(node, btree->bt_blksize);
			ptr = le64_to_cpu(dptrs[index]);
		} else if (diff < 0)
			index++;
		path->bp_index[level] = index;
	}

	if ((diff == 0) && (minlevel == 0) && (ptrp != NULL)) {
		dptrs = NILFS_BTREE_NODE_DPTRS(node, btree->bt_blksize);
		*ptrp = le64_to_cpu(dptrs[index]);
	}

	return (diff == 0) ? 0 : -NILFS_BTREE_ENOKEY;
}

/**
 * nilfs_btree_lookup_last_path - find a path to the largest key
 * @btree: B-tree
 * @path: path
 * @keyp: a pointer to a key
 * @ptrp: a pointer to a pointer
 * @minlevel: level to which the lookup operation is performed
 *
 * Description: nilfs_btree_lookup_last_path() finds a path to the largest key
 * at the level @minlevel. Nodes which the lookup operation visited are stored
 * in @path.
 *
 * Return Value: On success, 0 is returned, and a key-pointer pair is stored
 * in the place pointed by @keyp an @ptrp, respectively. On error, one of a
 * following negative error codes is returned.
 *
 * %-NILFS_BTREE_EIO - I/O error.
 *
 * %-NILFS_BTREE_ENOKEY - @key does not exist.
 */
static int nilfs_btree_lookup_last_path(struct nilfs_btree *btree,
					struct nilfs_btree_path *path,
					nilfs_btree_key_t *keyp,
					nilfs_btree_ptr_t *ptrp,
					int minlevel)
{
	struct buffer_head *bh;
	struct nilfs_btree_node *node;
	nilfs_btree_dkey_t *dkeys;
	nilfs_btree_dptr_t *dptrs;
	nilfs_btree_ptr_t ptr;
	int level, index;

	if ((ptr = btree->bt_root) == NILFS_BTREE_INVALID_PTR)
		return -NILFS_BTREE_ENOKEY;
	if ((bh = nilfs_btree_get_node(btree, ptr)) == NULL)
		return -NILFS_BTREE_EIO;

	node = NILFS_BTREE_BH_TO_NODE(bh);
	level = le16_to_cpu(node->bn_level);
	path->bp_nlevels = level + 1;
	nilfs_btree_put_node(btree, bh);
	for (; level >= minlevel; level--) {
		if ((bh = nilfs_btree_get_node(btree, ptr)) == NULL)
			return -NILFS_BTREE_EIO;
		if (path->bp_bh[level] != NULL)
			nilfs_btree_put_node(btree, path->bp_bh[level]);
		path->bp_bh[level] = bh;
		node = NILFS_BTREE_BH_TO_NODE(bh);
		dptrs = NILFS_BTREE_NODE_DPTRS(node, btree->bt_blksize);
		index = le16_to_cpu(node->bn_nchildren) - 1;
		path->bp_index[level] = index;
		ptr = le64_to_cpu(dptrs[index]);
	}
	if (minlevel == 0) {
		if (keyp != NULL) {
			dkeys = NILFS_BTREE_NODE_DKEYS(node, btree->bt_blksize);
			index = le16_to_cpu(node->bn_nchildren) - 1;
			*keyp = le64_to_cpu(dkeys[index]);
		}
		if (ptrp != NULL)
			*ptrp = ptr;
	}
	return 0;
}

/**
 * nilfs_btree_lookup - find a record
 * @btree: B-tree
 * @key: key
 * @rec: record
 *
 * Description: nilfs_btree_lookup() finds a record whose key matches @key in
 * the B-tree @btree.
 *
 * Return Value:
 *
 * %-NILFS_BTREE_EIO - I/O error.
 *
 * %-NILFS_BTREE_ENOKEY - @key does not exist.
 *
 * %-NILFS_BTREE_ENOMEM - Insufficient amount of memory available.
 */
int nilfs_btree_lookup(struct nilfs_btree *btree,
		       unsigned long key,
		       unsigned long *rec)
{
	struct nilfs_btree_path *path;
	nilfs_btree_ptr_t ptr;
	int ret;

	down_read(&btree->bt_sem);
	if ((path = nilfs_btree_alloc_path(btree)) == NULL) {
		ret = -NILFS_BTREE_ENOMEM;
		goto out;
	}
	ret = nilfs_btree_lookup_path(btree, path, key, &ptr, 0);
	if ((ret == 0) && (rec != NULL)) {
		if (NILFS_BTREE_PTR_IS_DBN(ptr))
			*rec = (unsigned long)NILFS_BTREE_PTR_TO_DBN(ptr);
		else {
			*rec = (unsigned long)NILFS_BTREE_PTR_TO_BH(ptr);
			ret = 1;
		}
	}

	nilfs_btree_free_path(btree, path);
 out:
	up_read(&btree->bt_sem);
	return ret;
}

static void nilfs_btree_promote_key(struct nilfs_btree *btree,
				    struct nilfs_btree_path *path,
				    int level,
				    nilfs_btree_key_t key)
{
	struct buffer_head *bh;
	struct nilfs_btree_node *node;
	nilfs_btree_dkey_t *dkeys;
	int index;

	do {
		bh = path->bp_bh[level];
		lock_buffer(bh);
		node = NILFS_BTREE_BH_TO_NODE(bh);
		index = path->bp_index[level];
		dkeys = NILFS_BTREE_NODE_DKEYS(node, btree->bt_blksize);
		dkeys[index] = cpu_to_le64(key);
		if (!buffer_dirty(bh)) {
			NILFS_BTREE_BOP_MARK_NODE_DIRTY(btree, bh);
			nilfs_btree_mark_dirty(btree);
		}
		unlock_buffer(bh);
	} while ((index == 0) && (++level < path->bp_nlevels));
}

/**
 * nilfs_btree_redistribute_to_left - rebalance a node with the left sibling
 * @btree: B-tree
 * @path: path
 * @level: level
 *
 * Description: nilfs_btree_redistribute_to_left() redistributes key-pointer
 * pairs between a node on the path @path at the level @level and its left
 * sibling node.
 *
 * Return value: On success, 0 is returned. On error, one of the following
 * negative error codes is returned.
 *
 * %-NILFS_BTREE_EIO - I/O error.
 *
 * %-NILFS_BTREE_ENOSIB - No left sibling node.
 *
 * %-NILFS_BTREE_ENOSPC - The left sibling is full.
 */
static int nilfs_btree_redistribute_to_left(struct nilfs_btree *btree,
					    struct nilfs_btree_path *path,
					    int level)
{
	struct buffer_head *lbh, *rbh;
	struct nilfs_btree_node *left, *right;
	nilfs_btree_dkey_t *ldkeys, *rdkeys;
	nilfs_btree_dptr_t *ldptrs, *rdptrs;
	nilfs_btree_ptr_t lsib;
	int nlchildren, nrchildren, n;
	int ret;

	if ((lsib = nilfs_btree_path_get_left_sibling(btree, path, level)) == NILFS_BTREE_INVALID_PTR)
		return -NILFS_BTREE_ENOSIB;
	if ((lbh = nilfs_btree_get_node(btree, lsib)) == NULL)
		return -NILFS_BTREE_EIO;
	lock_buffer(lbh);
	left = NILFS_BTREE_BH_TO_NODE(lbh);
	nlchildren = le16_to_cpu(left->bn_nchildren);

	rbh = path->bp_bh[level];
	lock_buffer(rbh);
	right = NILFS_BTREE_BH_TO_NODE(rbh);
	nrchildren = le16_to_cpu(right->bn_nchildren);

	if ((n = nrchildren - (nlchildren + nrchildren) / 2) == 0) {
		ret = -NILFS_BTREE_ENOSPC;
		goto out;
	}

	/* insert position does not move. */
	if (n > path->bp_index[level]) {
		if ((n = path->bp_index[level]) == 0) {
			ret = -NILFS_BTREE_ENOSPC;
			goto out;
		}
	}

	/* move n key-pointer pairs from right to left. */
	ret = 0;
	ldkeys = NILFS_BTREE_NODE_DKEYS(left, btree->bt_blksize);
	ldptrs = NILFS_BTREE_NODE_DPTRS(left, btree->bt_blksize);
	rdkeys = NILFS_BTREE_NODE_DKEYS(right, btree->bt_blksize);
	rdptrs = NILFS_BTREE_NODE_DPTRS(right, btree->bt_blksize);
	memcpy(ldkeys + nlchildren, rdkeys, n * sizeof(nilfs_btree_dkey_t));
	memcpy(ldptrs + nlchildren, rdptrs, n * sizeof(nilfs_btree_dptr_t));
	memmove(rdkeys, rdkeys + n,
		(nrchildren - n) * sizeof(nilfs_btree_dkey_t));
	memmove(rdptrs, rdptrs + n,
		(nrchildren - n) * sizeof(nilfs_btree_dptr_t));
	nlchildren += n;
	nrchildren -= n;
	left->bn_nchildren = cpu_to_le16(nlchildren);
	right->bn_nchildren = cpu_to_le16(nrchildren);
	if (!buffer_dirty(lbh)) {
		NILFS_BTREE_BOP_MARK_NODE_DIRTY(btree, lbh);
		nilfs_btree_mark_dirty(btree);
	}
	if (!buffer_dirty(rbh)) {
		NILFS_BTREE_BOP_MARK_NODE_DIRTY(btree, rbh);
		nilfs_btree_mark_dirty(btree);
	}
	nilfs_btree_promote_key(btree, path, level + 1, le64_to_cpu(rdkeys[0]));

	/* adjust path */
	path->bp_index[level] -= n;

 out:
	unlock_buffer(lbh);
	unlock_buffer(rbh);
	nilfs_btree_put_node(btree, lbh);

	return ret;
}

/**
 * nilfs_btree_redistribute_to_right - rebalance a node with the right sibling
 * @btree: B-tree
 * @path: path
 * @level: level
 *
 * Description: nilfs_btree_redistribute_to_right() redistributes key-pointer
 * pairs between a node on the path @path at the level @level and its right
 * sibling node.
 *
 * Return value: On success, 0 is returned. On error, one of the following
 * negative error codes is returned.
 *
 * %-NILFS_BTREE_EIO - I/O error.
 *
 * %-NILFS_BTREE_ENOSIB - No right sibling node.
 *
 * %-NILFS_BTREE_ENOSPC - The right sibling is full.
 */
static int nilfs_btree_redistribute_to_right(struct nilfs_btree *btree,
					     struct nilfs_btree_path *path,
					     int level)
{
	struct buffer_head *lbh, *rbh;
	struct nilfs_btree_node *left, *right;
	nilfs_btree_dkey_t *ldkeys, *rdkeys;
	nilfs_btree_dptr_t *ldptrs, *rdptrs;
	nilfs_btree_ptr_t rsib;
	int nlchildren, nrchildren, n;
	int ret;

	if ((rsib = nilfs_btree_path_get_right_sibling(btree, path, level)) == NILFS_BTREE_INVALID_PTR)
		return -NILFS_BTREE_ENOSIB;
	if ((rbh = nilfs_btree_get_node(btree, rsib)) == NULL)
		return -NILFS_BTREE_EIO;
	lock_buffer(rbh);
	right = NILFS_BTREE_BH_TO_NODE(rbh);
	nrchildren = le16_to_cpu(right->bn_nchildren);

	lbh = path->bp_bh[level];
	lock_buffer(lbh);
	left = NILFS_BTREE_BH_TO_NODE(lbh);
	nlchildren = le16_to_cpu(left->bn_nchildren);

	if ((n = nlchildren - (nlchildren + nrchildren) / 2) == 0) {
		ret = -NILFS_BTREE_ENOSPC;
		goto out;
	}

	/* insert position does not move. */
	if (n > nlchildren - path->bp_index[level]) {
		if ((n = nlchildren - path->bp_index[level]) == 0) {
			ret = -NILFS_BTREE_ENOSPC;
			goto out;
		}
	}

	/* move n key-pointer pairs from left to right. */
	ret = 0;
	ldkeys = NILFS_BTREE_NODE_DKEYS(left, btree->bt_blksize);
	ldptrs = NILFS_BTREE_NODE_DPTRS(left, btree->bt_blksize);
	rdkeys = NILFS_BTREE_NODE_DKEYS(right, btree->bt_blksize);
	rdptrs = NILFS_BTREE_NODE_DPTRS(right, btree->bt_blksize);
	memmove(rdkeys + n, rdkeys, nrchildren * sizeof(nilfs_btree_dkey_t));
	memmove(rdptrs + n, rdptrs, nrchildren * sizeof(nilfs_btree_dptr_t));
	memcpy(rdkeys, ldkeys + nlchildren - n, n * sizeof(nilfs_btree_dkey_t));
	memcpy(rdptrs, ldptrs + nlchildren - n, n * sizeof(nilfs_btree_dptr_t));
	nlchildren -= n;
	nrchildren += n;
	left->bn_nchildren = cpu_to_le16(nlchildren);
	right->bn_nchildren = cpu_to_le16(nrchildren);
	if (!buffer_dirty(lbh)) {
		NILFS_BTREE_BOP_MARK_NODE_DIRTY(btree, lbh);
		nilfs_btree_mark_dirty(btree);
	}
	if (!buffer_dirty(rbh)) {
		NILFS_BTREE_BOP_MARK_NODE_DIRTY(btree, rbh);
		nilfs_btree_mark_dirty(btree);
	}
	path->bp_index[level + 1]++;
	nilfs_btree_promote_key(btree, path, level + 1, le64_to_cpu(rdkeys[0]));
	path->bp_index[level + 1]--;

 out:
	unlock_buffer(lbh);
	unlock_buffer(rbh);
	nilfs_btree_put_node(btree, rbh);

	return ret;
}

/**
 * nilfs_btree_split - divide a full node
 * @btree: B-tree
 * @path: path
 * @level: level
 * @key: key
 * @ptr: ptr
 *
 * Description: nilfs_btree_split() divides a node on the path @path at the
 * level @level into two. The key-pointer pairs in the node are evenly
 * redistributed between these two nodes.
 *
 * Return Value: On success, 0 is returned and a key-pointer pair to be
 * promoted to the parent node is stored in the place pointed by @key and
 * @ptr, respectively. On error, a following negative error code is returned.
 *
 * %-NILFS_BTREE_ENOMEM - Insufficient amount of memory available.
 */
static int nilfs_btree_split(struct nilfs_btree *btree,
			     struct nilfs_btree_path *path,
			     int level,
			     nilfs_btree_key_t *key,
			     nilfs_btree_ptr_t *ptr)
{
	struct buffer_head *lbh, *rbh;
	struct nilfs_btree_node *left, *right;
	nilfs_btree_dkey_t *ldkeys, *rdkeys;
	nilfs_btree_dptr_t *ldptrs, *rdptrs;
	int nlchildren, nrchildren;

	lbh = path->bp_bh[level];
	lock_buffer(lbh);
	left = NILFS_BTREE_BH_TO_NODE(lbh);
	nlchildren = le16_to_cpu(left->bn_nchildren);

	if ((rbh = NILFS_BTREE_BOP_GET_NEW_NODE(btree)) == NULL) {
		unlock_buffer(lbh);
		return -NILFS_BTREE_ENOMEM;
	}
	lock_buffer(rbh);
	right = NILFS_BTREE_BH_TO_NODE(rbh);
	nrchildren = nlchildren / 2;
	if ((nlchildren & 0x1) && (path->bp_index[level] <= nrchildren + 1))
		nrchildren++;
	nlchildren -= nrchildren;

	ldkeys = NILFS_BTREE_NODE_DKEYS(left, btree->bt_blksize);
	ldptrs = NILFS_BTREE_NODE_DPTRS(left, btree->bt_blksize);
	rdkeys = NILFS_BTREE_NODE_DKEYS(right, btree->bt_blksize);
	rdptrs = NILFS_BTREE_NODE_DPTRS(right, btree->bt_blksize);
	memcpy(rdkeys, ldkeys + nlchildren,
	       nrchildren * sizeof(nilfs_btree_dkey_t));
	memcpy(rdptrs, ldptrs + nlchildren,
	       nrchildren * sizeof(nilfs_btree_dptr_t));
	left->bn_nchildren = cpu_to_le16(nlchildren);
	right->bn_nchildren = cpu_to_le16(nrchildren);
	right->bn_level = left->bn_level;	/* copy in disk format */

	if (!buffer_dirty(lbh)) {
		NILFS_BTREE_BOP_MARK_NODE_DIRTY(btree, lbh);
		nilfs_btree_mark_dirty(btree);
	}
	if (!buffer_dirty(rbh)) {
		NILFS_BTREE_BOP_MARK_NODE_DIRTY(btree, rbh);
		nilfs_btree_mark_dirty(btree);
	}

	if (path->bp_index[level] > nlchildren) {
		if (path->bp_bh[level] != NULL)
			nilfs_btree_put_node(btree, path->bp_bh[level]);
		path->bp_bh[level] = rbh;
		path->bp_index[level] -= nlchildren;
	}

	unlock_buffer(lbh);
	unlock_buffer(rbh);

	*key = le64_to_cpu(rdkeys[0]);
	*ptr = NILFS_BTREE_BH_TO_PTR(rbh);
	return 0;
}

/**
 * nilfs_btree_increase_height - increase tree height by one level
 * @btree: B-tree
 * @path: path
 *
 * Description: nilfs_btree_increase_height() creates a new root and places
 * the old root as the only child of the new root.
 *
 * Return Value: On success, 0 is returned. On error, a following negative
 * error code is returned.
 *
 * %-NILFS_BTREE_ENOMEM - Insufficient amount of memory available.
 */
static int nilfs_btree_increase_height(struct nilfs_btree *btree,
				       struct nilfs_btree_path *path)
{
	struct buffer_head *oldbh, *newbh;
	struct nilfs_btree_node *oldroot, *newroot;
	nilfs_btree_dkey_t *olddkeys, *newdkeys;
	nilfs_btree_dptr_t *newdptrs;
	int level, plevel;

	level = path->bp_nlevels - 1;
	oldbh = path->bp_bh[level];
	oldroot = NILFS_BTREE_BH_TO_NODE(oldbh);
	if ((newbh = NILFS_BTREE_BOP_GET_NEW_NODE(btree)) == NULL)
		return -NILFS_BTREE_ENOMEM;
	lock_buffer(newbh);
	newroot = NILFS_BTREE_BH_TO_NODE(newbh);
	plevel = level + 1;
	newroot->bn_level = cpu_to_le16(plevel);
	newroot->bn_nchildren = cpu_to_le16(1);
	newdkeys = NILFS_BTREE_NODE_DKEYS(newroot, btree->bt_blksize);
	newdptrs = NILFS_BTREE_NODE_DPTRS(newroot, btree->bt_blksize);
	olddkeys = NILFS_BTREE_NODE_DKEYS(oldroot, btree->bt_blksize);
	newdkeys[0] = olddkeys[0];	/* copy in disk format */
	newdptrs[0] = cpu_to_le64(btree->bt_root);
	NILFS_BTREE_BOP_MARK_NODE_DIRTY(btree, newbh);
	nilfs_btree_mark_dirty(btree);
	unlock_buffer(newbh);
	btree->bt_root = NILFS_BTREE_BH_TO_PTR(newbh);
	if (path->bp_bh[plevel] != NULL)
		nilfs_btree_put_node(btree, path->bp_bh[plevel]);
	path->bp_bh[plevel] = newbh;
	path->bp_index[plevel] = 0;
	path->bp_nlevels++;
	return 0;
}

#define NILFS_BTREE_INSERT_PROMOTION	1

/**
 * nilfs_btree_insert_in_node - insert a new key-pointer pair in a node
 * @btree: B-tree
 * @path: path
 * @level: level
 * @key: key
 * @ptr: ptr
 *
 * Description: nilfs_btree_insert_in_node() inserts a new pair of @key and
 * @ptr in a node on the path @path at the level @level. If the node can
 * accommodate the new pair, the pair is simply inserted. If the node is
 * already full, the balance of the B-tree must be restored. A distribution of
 * key-pointer pairs between the node and its sibling is suffice if the
 * sibling node is not full. When the sibling nodes are already full, a split
 * must occur. The node is devided into two and a newly created node and a key
 * separating these two nodes are promoted to the parent node. This may force
 * splitting at the parent level. If the split is promoted to the root, a new
 * root is created and the tree increases in height.
 *
 * Return Value: On success, 0 is returned if a split does not occur, or
 * %NILFS_BTREE_INSERT_PROMOTION is returned and a key-pointer pair to be
 * promoted to the parent node is stored in the place pointed by @key and
 * @ptr, respectively, if a split occurs. On error, one of the following
 * negative error codes is returned.
 *
 * %-NILFS_BTREE_EIO - I/O error.
 *
 * %-NILFS_BTREE_ENOMEM - Insufficient amount of memory available.
 */
static int nilfs_btree_insert_in_node(struct nilfs_btree *btree,
				      struct nilfs_btree_path *path,
				      int level,
				      nilfs_btree_key_t *key,
				      nilfs_btree_ptr_t *ptr)
{
	struct buffer_head *bh;
	struct nilfs_btree_node *node;
	nilfs_btree_dkey_t *dkeys;
	nilfs_btree_dptr_t *dptrs;
	nilfs_btree_key_t newkey;
	nilfs_btree_ptr_t newptr;
	int nchildren, index;
	int ret;

	if (btree->bt_root == NILFS_BTREE_INVALID_PTR) {
		/* create root */
		if ((bh = NILFS_BTREE_BOP_GET_NEW_NODE(btree)) == NULL)
			return -NILFS_BTREE_ENOMEM;
		btree->bt_root = NILFS_BTREE_BH_TO_PTR(bh);
		if (path->bp_bh[0] != NULL)
			nilfs_btree_put_node(btree, path->bp_bh[0]);
		path->bp_bh[0] = bh;
		path->bp_index[0] = 0;
		path->bp_nlevels = 1;
		lock_buffer(bh);
		node = NILFS_BTREE_BH_TO_NODE(bh);
		node->bn_level = cpu_to_le16(0);
		node->bn_nchildren = cpu_to_le16(0);
		unlock_buffer(bh);
	}

	index = path->bp_index[level];
	BUG_ON((index < 0) ||
	       (index > NILFS_BTREE_NODE_MAX_NCHILDREN(btree->bt_blksize)));
	newptr = NILFS_BTREE_INVALID_PTR;
	bh = path->bp_bh[level];
	node = NILFS_BTREE_BH_TO_NODE(bh);
	nchildren = le16_to_cpu(node->bn_nchildren);
	ret = 0;
	if (nchildren == NILFS_BTREE_NODE_MAX_NCHILDREN(btree->bt_blksize)) {
		/* balance the tree */
		if (level == path->bp_nlevels - 1) {
			/*
			 * increase the height of the tree because the root is
			 * full.
			 */
			if ((ret = nilfs_btree_increase_height(btree, path)) < 0)
				return ret;
			if ((ret = nilfs_btree_split(btree, path, level, &newkey, &newptr)) < 0)
				return ret;
			ret = NILFS_BTREE_INSERT_PROMOTION;
		} else {
			if ((ret = nilfs_btree_redistribute_to_left(btree, path, level)) < 0) {
				if ((ret != -NILFS_BTREE_ENOSIB) &&
				    (ret != -NILFS_BTREE_ENOSPC))
					return ret;
				if ((ret = nilfs_btree_redistribute_to_right(btree, path, level)) < 0) {
					if ((ret != -NILFS_BTREE_ENOSIB) &&
					    (ret != -NILFS_BTREE_ENOSPC))
						return ret;
					if ((ret = nilfs_btree_split(btree, path, level, &newkey, &newptr)) < 0)
						return ret;
					ret = NILFS_BTREE_INSERT_PROMOTION;
				}
			}
		}
	}

	bh = path->bp_bh[level];
	lock_buffer(bh);
	node = NILFS_BTREE_BH_TO_NODE(bh);
	index = path->bp_index[level];
	nchildren = le16_to_cpu(node->bn_nchildren);
	dkeys = NILFS_BTREE_NODE_DKEYS(node, btree->bt_blksize);
	dptrs = NILFS_BTREE_NODE_DPTRS(node, btree->bt_blksize);
	if (index < nchildren) {
		memmove(dkeys + index + 1, dkeys + index,
			(nchildren - index) * sizeof(nilfs_btree_dkey_t));
		memmove(dptrs + index + 1, dptrs + index,
			(nchildren - index) * sizeof(nilfs_btree_dptr_t));
	}
	dkeys[index] = cpu_to_le64(*key);
	dptrs[index] = cpu_to_le64(*ptr);
	/* The reference count of the data block is incremented. */
	if (level == 0)
		get_bh(NILFS_BTREE_PTR_TO_BH(*ptr));
	nchildren++;
	node->bn_nchildren = cpu_to_le16(nchildren);
	if (!buffer_dirty(bh)) {
		NILFS_BTREE_BOP_MARK_NODE_DIRTY(btree, bh);
		nilfs_btree_mark_dirty(btree);
	}

	if ((level < path->bp_nlevels - 1) && (index == 0))
		nilfs_btree_promote_key(btree, path, level + 1,
					le64_to_cpu(dkeys[0]));
	unlock_buffer(bh);
	if (ret == NILFS_BTREE_INSERT_PROMOTION) {
		path->bp_index[level + 1]++;
		*key = newkey;
		*ptr = newptr;
	}
	return ret;
}

/**
 * nilfs_btree_insert - insert a new key-record pair
 * @btree: B-tree
 * @key: key
 * @bh: buffer head
 *
 * Description: nilfs_btree_insert() inserts a new key-record pair specified
 * by @key and @bh in the B-tree @btree. An insert operation first finds a
 * leaf node in which the new pair should be inserted. Then, it adds the pair
 * in the leaf node, and balances the tree from the leaf back to the root.
 *
 * Return Value: On success, 0 is returned. On error, one of the following
 * negative error codes is returned.
 *
 * %-NILFS_BTREE_EEXIST - key specified by @key is not unique.
 *
 * %-NILFS_BTREE_ENOMEM - Insufficient amount of memory available.
 */
int nilfs_btree_insert(struct nilfs_btree *btree,
		       unsigned long key,
		       struct buffer_head *bh)
{
	struct nilfs_btree_path *path;
	nilfs_btree_key_t k;
	nilfs_btree_ptr_t ptr;
	int level, ret;

	down_write(&btree->bt_sem);
	if ((path = nilfs_btree_alloc_path(btree)) == NULL) {
		ret = -NILFS_BTREE_ENOMEM;
		goto out_up_write;
	}
	k = (nilfs_btree_key_t)key;
	if ((ret = nilfs_btree_lookup_path(btree, path, k, NULL, 0)) == 0) {
		ret = -NILFS_BTREE_EEXIST;
		goto out_free_path;
	} else if (ret != -NILFS_BTREE_ENOKEY)
		goto out_free_path;
	level = 0;
	ptr = NILFS_BTREE_BH_TO_PTR(bh);
	while ((ret = nilfs_btree_insert_in_node(btree, path, level++, &k, &ptr)) == NILFS_BTREE_INSERT_PROMOTION);

 out_free_path:
	nilfs_btree_free_path(btree, path);
 out_up_write:
	up_write(&btree->bt_sem);
	return ret;
}

/**
 * nilfs_btree_decrease_height - decrease tree height by one level
 * @btree: B-tree
 * @path: path
 *
 * Description: nilfs_btree_decrease_height() removes the root and places the
 * only child of the old root as the new root.
 */
static void nilfs_btree_decrease_height(struct nilfs_btree *btree,
					struct nilfs_btree_path *path)
{
	struct buffer_head *bh;
	struct nilfs_btree_node *node;
	nilfs_btree_dptr_t *dptrs;
	nilfs_btree_ptr_t ptr;
	int level;

	level = path->bp_nlevels - 1;
	bh = path->bp_bh[level];
	node = NILFS_BTREE_BH_TO_NODE(bh);
	dptrs = NILFS_BTREE_NODE_DPTRS(node, btree->bt_blksize);
	ptr = le64_to_cpu(dptrs[0]);
	NILFS_BTREE_BOP_DELETE_NODE(btree, bh);
	path->bp_bh[level] = NULL;
	btree->bt_root = ptr;
	path->bp_nlevels--;
}

#define NILFS_BTREE_DELETE_DEMOTION	1

/**
 * nilfs_btree_rebalance_with_left - rebalance a node with left sibling
 * @btree: B-tree
 * @path: path
 * @level: level
 *
 * Description: nilfs_btree_rebalance_with_left() restores the balance of the
 * B-tree @btree due to a distribution of key-pointer pairs between a node on
 * the path @path at the level @level and its left sibling, or a concatenation
 * of these two nodes.
 *
 * Return Value: On success, 0 is returned. On error, one of the following
 * negative error codes is returned.
 *
 * %-NILFS_BTREE_EIO - I/O error.
 *
 * %-NILFS_BTREE_ENOSIB - No left sibling node.
 */
static int nilfs_btree_rebalance_with_left(struct nilfs_btree *btree,
					   struct nilfs_btree_path *path,
					   int level)
{
	struct buffer_head *lbh, *rbh;
	struct nilfs_btree_node *left, *right;
	nilfs_btree_dkey_t *ldkeys, *rdkeys;
	nilfs_btree_dptr_t *ldptrs, *rdptrs;
	nilfs_btree_ptr_t lsib;
	int nlchildren, nrchildren, n;
	int ret;

	if ((lsib = nilfs_btree_path_get_left_sibling(btree, path, level)) == NILFS_BTREE_INVALID_PTR)
		return -NILFS_BTREE_ENOSIB;
	if ((lbh = nilfs_btree_get_node(btree, lsib)) == NULL)
		return -NILFS_BTREE_EIO;
	lock_buffer(lbh);
	left = NILFS_BTREE_BH_TO_NODE(lbh);
	nlchildren = le16_to_cpu(left->bn_nchildren);
	ldkeys = NILFS_BTREE_NODE_DKEYS(left, btree->bt_blksize);
	ldptrs = NILFS_BTREE_NODE_DPTRS(left, btree->bt_blksize);

	rbh = path->bp_bh[level];
	lock_buffer(rbh);
	right = NILFS_BTREE_BH_TO_NODE(rbh);
	nrchildren = le16_to_cpu(right->bn_nchildren);
	rdkeys = NILFS_BTREE_NODE_DKEYS(right, btree->bt_blksize);
	rdptrs = NILFS_BTREE_NODE_DPTRS(right, btree->bt_blksize);

	if ((n = (nlchildren + nrchildren) / 2 - nrchildren) == 0) {
		/* concatenate left and right */
		memcpy(ldkeys + nlchildren, rdkeys,
		       nrchildren * sizeof(nilfs_btree_dkey_t));
		memcpy(ldptrs + nlchildren, rdptrs,
		       nrchildren * sizeof(nilfs_btree_dptr_t));
		nlchildren += nrchildren;
		left->bn_nchildren = cpu_to_le16(nlchildren);
		if (!buffer_dirty(lbh)) {
			NILFS_BTREE_BOP_MARK_NODE_DIRTY(btree, lbh);
			nilfs_btree_mark_dirty(btree);
		}
		path->bp_bh[level] = lbh;
		path->bp_index[level] += nlchildren;
		unlock_buffer(lbh);
		unlock_buffer(rbh);
		NILFS_BTREE_BOP_DELETE_NODE(btree, rbh);
		ret = NILFS_BTREE_DELETE_DEMOTION;
	} else {
		/* move n key-pointer pairs from left to right */
		memmove(rdkeys + n, rdkeys,
			nrchildren * sizeof(nilfs_btree_dkey_t));
		memmove(rdptrs + n, rdptrs,
			nrchildren * sizeof(nilfs_btree_dptr_t));
		memcpy(rdkeys, ldkeys + nlchildren - n,
		       n * sizeof(nilfs_btree_dkey_t));
		memcpy(rdptrs, ldptrs + nlchildren - n,
		       n * sizeof(nilfs_btree_dptr_t));
		nlchildren -= n;
		nrchildren += n;
		left->bn_nchildren = cpu_to_le16(nlchildren);
		right->bn_nchildren = cpu_to_le16(nrchildren);
		if (!buffer_dirty(lbh)) {
			NILFS_BTREE_BOP_MARK_NODE_DIRTY(btree, lbh);
			nilfs_btree_mark_dirty(btree);
		}
		if (!buffer_dirty(rbh)) {
			NILFS_BTREE_BOP_MARK_NODE_DIRTY(btree, rbh);
			nilfs_btree_mark_dirty(btree);
		}
		nilfs_btree_promote_key(btree, path,
					level + 1, le64_to_cpu(rdkeys[0]));
		path->bp_index[level] += n;
		unlock_buffer(lbh);
		unlock_buffer(rbh);
		nilfs_btree_put_node(btree, lbh);
		ret = 0;
	}

	return ret;
}

/**
 * nilfs_btree_rebalance_with_right - rebalance a node with right sibling
 * @btree: B-tree
 * @path: path
 * @level: level
 *
 * Description: nilfs_btree_rebalance_with_left() restores the balance of the
 * B-tree @btree due to a distribution of key-pointer pairs between a node on
 * the path @path at the level @level and its right sibling, or a
 * concatenation of these two nodes.
 *
 * Return Value: On success, 0 is returned. On error, one of the following
 * negative error codes is returned.
 *
 * %-NILFS_BTREE_EIO - I/O error.
 *
 * %-NILFS_BTREE_ENOSIB - No right sibling node.
 */
static int nilfs_btree_rebalance_with_right(struct nilfs_btree *btree,
					    struct nilfs_btree_path *path,
					    int level)
{
	struct buffer_head *lbh, *rbh;
	struct nilfs_btree_node *left, *right;
	nilfs_btree_dkey_t *ldkeys, *rdkeys;
	nilfs_btree_dptr_t *ldptrs, *rdptrs;
	nilfs_btree_ptr_t rsib;
	int nlchildren, nrchildren, n;
	int ret;

	if ((rsib = nilfs_btree_path_get_right_sibling(btree, path, level)) == NILFS_BTREE_INVALID_PTR)
		return -NILFS_BTREE_ENOSIB;
	if ((rbh = nilfs_btree_get_node(btree, rsib)) == NULL)
		return -NILFS_BTREE_EIO;
	lock_buffer(rbh);
	right = NILFS_BTREE_BH_TO_NODE(rbh);
	nrchildren = le16_to_cpu(right->bn_nchildren);
	rdkeys = NILFS_BTREE_NODE_DKEYS(right, btree->bt_blksize);
	rdptrs = NILFS_BTREE_NODE_DPTRS(right, btree->bt_blksize);

	lbh = path->bp_bh[level];
	lock_buffer(lbh);
	left = NILFS_BTREE_BH_TO_NODE(lbh);
	nlchildren = le16_to_cpu(left->bn_nchildren);
	ldkeys = NILFS_BTREE_NODE_DKEYS(left, btree->bt_blksize);
	ldptrs = NILFS_BTREE_NODE_DPTRS(left, btree->bt_blksize);

	if ((n = (nlchildren + nrchildren) / 2 - nlchildren) == 0) {
		/* concatenate left and right */
		memcpy(ldkeys + nlchildren, rdkeys,
		       nrchildren * sizeof(nilfs_btree_dkey_t));
		memcpy(ldptrs + nlchildren, rdptrs,
		       nrchildren * sizeof(nilfs_btree_dptr_t));
		nlchildren += nrchildren;
		left->bn_nchildren = cpu_to_le16(nlchildren);
		if (!buffer_dirty(lbh)) {
			NILFS_BTREE_BOP_MARK_NODE_DIRTY(btree, lbh);
			nilfs_btree_mark_dirty(btree);
		}
		path->bp_index[level + 1]++;
		unlock_buffer(lbh);
		unlock_buffer(rbh);
		NILFS_BTREE_BOP_DELETE_NODE(btree, rbh);
		ret = NILFS_BTREE_DELETE_DEMOTION;
	} else {
		/* move n key-pointer pairs from right to left */
		memcpy(ldkeys + nlchildren, rdkeys,
		       n * sizeof(nilfs_btree_dkey_t));
		memcpy(ldptrs + nlchildren, rdptrs,
		       n * sizeof(nilfs_btree_dptr_t));
		memmove(rdkeys, rdkeys + n,
			(nrchildren - n) * sizeof(nilfs_btree_dkey_t));
		memmove(rdptrs, rdptrs + n,
			(nrchildren - n) * sizeof(nilfs_btree_dptr_t));
		nlchildren += n;
		nrchildren -= n;
		left->bn_nchildren = cpu_to_le16(nlchildren);
		right->bn_nchildren = cpu_to_le16(nrchildren);
		if (!buffer_dirty(lbh)) {
			NILFS_BTREE_BOP_MARK_NODE_DIRTY(btree, lbh);
			nilfs_btree_mark_dirty(btree);
		}
		if (!buffer_dirty(rbh)) {
			NILFS_BTREE_BOP_MARK_NODE_DIRTY(btree, rbh);
			nilfs_btree_mark_dirty(btree);
		}
		path->bp_index[level + 1]++;
		nilfs_btree_promote_key(btree, path,
					level + 1, le64_to_cpu(rdkeys[0]));
		path->bp_index[level + 1]--;
		unlock_buffer(lbh);
		unlock_buffer(rbh);
		nilfs_btree_put_node(btree, rbh);
		ret = 0;
	}

	return ret;
}

/**
 * nilfs_btree_delete_in_node - delete a key-pointer pair from a node
 * @btree: B-tree
 * @path: path
 * @level: level
 *
 * Description: nilfs_btree_delete_in_node() deletes a key-pointer pair in a
 * node on the path @path at the level @level. If the node is less than half
 * full, the balance of the B-tree must be restored. A distribution of
 * key-pointer pairs between the node and its sibling is suffice if the
 * sibling node is more than half full. When the sibling nodes are half full,
 * a concatenation must occur. The pairs are combined into one of the nodes
 * and the other is removed from the parent. This may force concatenating at
 * the parent level. If the concatenation propagates to the children of the
 * root and causes the root to have only one child, this child becomes a new
 * root and the tree decreases in height.
 *
 * Return Value: On success, 0 is returned if a concatenation does not occur,
 * or %NILFS_BTREE_DELETE_DEMOTION is returned if a concatenation occurs. On
 * error, a following negative error code is returned.
 *
 * %-NILFS_BTREE_EIO - I/O error.
 */
static int nilfs_btree_delete_in_node(struct nilfs_btree *btree,
				      struct nilfs_btree_path *path,
				      int level)
{
	struct buffer_head *bh;
	struct nilfs_btree_node *node;
	nilfs_btree_dkey_t *dkeys;
	nilfs_btree_dptr_t *dptrs;
	nilfs_btree_ptr_t ptr;
	int nchildren, index, ret;

	index = path->bp_index[level];
	BUG_ON((index < 0) ||
	       (index > NILFS_BTREE_NODE_MAX_NCHILDREN(btree->bt_blksize)));
	bh = path->bp_bh[level];
	lock_buffer(bh);
	node = NILFS_BTREE_BH_TO_NODE(bh);
	nchildren = le16_to_cpu(node->bn_nchildren);
	dkeys = NILFS_BTREE_NODE_DKEYS(node, btree->bt_blksize);
	dptrs = NILFS_BTREE_NODE_DPTRS(node, btree->bt_blksize);
	/* The reference count of the data block is decremented. */
	ptr = le64_to_cpu(dptrs[index]);
	if ((level == 0) && NILFS_BTREE_PTR_IS_BH(ptr))
		brelse(NILFS_BTREE_PTR_TO_BH(ptr));
	if (index < nchildren - 1) {
		memmove(dkeys + index, dkeys + index + 1,
			(nchildren - index - 1) * sizeof(nilfs_btree_dkey_t));
		memmove(dptrs + index, dptrs + index + 1,
			(nchildren - index - 1) * sizeof(nilfs_btree_dptr_t));
	}
	nchildren--;
	node->bn_nchildren = cpu_to_le16(nchildren);
	if (!buffer_dirty(bh)) {
		NILFS_BTREE_BOP_MARK_NODE_DIRTY(btree, bh);
		nilfs_btree_mark_dirty(btree);
	}
	if ((level < path->bp_nlevels - 1) && (index == 0))
		nilfs_btree_promote_key(btree, path, level + 1,
					le64_to_cpu(dkeys[index]));
	unlock_buffer(bh);

	if (level == path->bp_nlevels - 1) {
		/* root node */
		if (level > 0) {
			if (nchildren <= 1) {
				nilfs_btree_decrease_height(btree, path);
			}
		} else {
			if (nchildren == 0) {
				NILFS_BTREE_BOP_DELETE_NODE(btree, path->bp_bh[level]);
				path->bp_bh[level] = NULL;
				btree->bt_root = NILFS_BTREE_INVALID_PTR;
			}
		}
		return 0;
	} else {
		/* non-root node */
		if (nchildren >= NILFS_BTREE_NODE_MIN_NCHILDREN(btree->bt_blksize))
			return 0;

		if (((ret = nilfs_btree_rebalance_with_left(btree, path, level)) == -NILFS_BTREE_ENOSIB) &&
		    ((ret = nilfs_btree_rebalance_with_right(btree, path, level)) == -NILFS_BTREE_ENOSIB)) {
			nilfs_debug(0, "Non-root node has no siblings\n");
			ret = 0;
		}
		return ret;
	}
}

/**
 * nilfs_btree_delete - delete a key-record pair from a B-tree
 * @btree: B-tree
 * @key: key
 *
 * Description: nilfs_btree_delete() deletes a key-record pair specified by
 * @key in the B-tree @btree. A delete operation first finds a leaf node from
 * which the pair should be removed. Then, it deletes the pair in the leaf
 * node, and balances the tree from the leaf back to the root.
 *
 * Return Value: On success, 0 is returned. On error, one of the following
 * negative codes is returned.
 *
 * %-NILFS_BTREE_EIO - I/O error.
 *
 * %-NILFS_BTREE_ENOKEY - @key does not exist.
 *
 * %-NILFS_BTREE_ENOMEM - Insufficient amount of memory available.
 */
int nilfs_btree_delete(struct nilfs_btree *btree, unsigned long key)
{
	struct nilfs_btree_path *path;
	nilfs_btree_key_t k;
	int level;
	int ret;

	down_write(&btree->bt_sem);
	if ((path = nilfs_btree_alloc_path(btree)) == NULL) {
		ret = -NILFS_BTREE_ENOMEM;
		goto out_up_write;
	}
	k = (nilfs_btree_key_t)key;
	if ((ret = nilfs_btree_lookup_path(btree, path, k, NULL, 0)) < 0)
		goto out_free_path;
	level = 0;
	while ((ret = nilfs_btree_delete_in_node(btree, path, level++)) == NILFS_BTREE_DELETE_DEMOTION);

 out_free_path:
	nilfs_btree_free_path(btree, path);
 out_up_write:
	up_write(&btree->bt_sem);
	return ret;
}


static void nilfs_btree_put_data_in_page(struct nilfs_btree *btree,
					 struct page *page)
{
	struct buffer_head *bh;
	struct nilfs_btree_node *node;
	nilfs_btree_dptr_t *dptrs;
	nilfs_btree_ptr_t ptr;
	int nchildren, i;

	bh = page_buffers(page);
	do {
		get_bh(bh);
		if (buffer_dirty(bh)) {
			node = NILFS_BTREE_BH_TO_NODE(bh);
			if (le16_to_cpu(node->bn_level) == 0) {
				nchildren = le16_to_cpu(node->bn_nchildren);
				dptrs = NILFS_BTREE_NODE_DPTRS(node, btree->bt_blksize);
				for (i = 0; i < nchildren; i++) {
					ptr = le64_to_cpu(dptrs[i]);
					if (NILFS_BTREE_PTR_IS_BH(ptr))
						brelse(NILFS_BTREE_PTR_TO_BH(ptr));
				}
			}
		} else {
			page_warn("buffer head %p in page %p is not dirty\n",
				  bh, page);
		}
		brelse(bh);
		bh = bh->b_this_page;
	} while (bh != page_buffers(page));

}

#define NILFS_BTREE_GANG_LOOKUP_SIZE	16

static void nilfs_btree_delete_all_nodes(struct nilfs_btree *btree)
{
	struct page *pages[NILFS_BTREE_GANG_LOOKUP_SIZE];
	nilfs_btree_key_t index;
	int i, n;

	spin_lock_irq(btree->bt_inactive_list_lock);
	spin_lock(btree->bt_rtree_lock);

	index = 0;
	n = radix_tree_64_gang_lookup_tag(btree->bt_rtree,
					  (void **)pages, index,
					  NILFS_BTREE_GANG_LOOKUP_SIZE,
					  PAGECACHE64_TAG_DIRTY);
	while (n > 0) {
		for (i = 0; i < n; i++)
			nilfs_btree_put_data_in_page(btree, pages[i]);
		index = nilfs_node_page_index(pages[n - 1]) + 1;
		n = radix_tree_64_gang_lookup_tag(btree->bt_rtree,
						  (void **)pages, index,
						  NILFS_BTREE_GANG_LOOKUP_SIZE,
						  PAGECACHE64_TAG_DIRTY);
	}

	spin_unlock(btree->bt_rtree_lock);
	spin_unlock_irq(btree->bt_inactive_list_lock);

	NILFS_BTREE_BOP_DELETE_ALL_NODE(btree);
}

/**
 * nilfs_btree_clear - clear a B-tree
 * @btree: B-tree
 *
 * Description: nilfs_btree_clear() deletes all the nodes in the B-tree @btree
 * and frees B-tree-type-specific data if exist.
 */
void nilfs_btree_clear(struct nilfs_btree *btree)
{
	down_write(&btree->bt_sem);
	nilfs_btree_delete_all_nodes(btree);
	btree->bt_root = NILFS_BTREE_INVALID_PTR;
	if (btree->bt_info != NULL) {
		kfree(btree->bt_info);
		btree->bt_info = NULL;
	}
	up_write(&btree->bt_sem);
}

/**
 * nilfs_btree_truncate - truncate a B-tree to a specifed key
 * @btree: B-tree
 * @key: key
 *
 * Description: nilfs_btree_truncate() removes key-record pairs whose key is
 * greater than or equal to the key @key.
 *
 * Return Value: On success, 0 is returned. On error, one of the following
 * negative error codes is returned.
 *
 * %-NILFS_BTREE_EIO - I/O error.
 *
 * %-NILFS_BTREE_ENOMEM - Insufficient amount of memory available.
 */
int nilfs_btree_truncate(struct nilfs_btree *btree, nilfs_btree_key_t key)
{
	struct nilfs_btree_path *path;
	nilfs_btree_key_t lastkey;
	int level;
	int ret;

	down_write(&btree->bt_sem);
	if (key == 0) {
		nilfs_btree_delete_all_nodes(btree);
		btree->bt_root = NILFS_BTREE_DBN_TO_PTR(0);
		ret = 0;
		goto out_up_write;
	}

	if ((path = nilfs_btree_alloc_path(btree)) == NULL) {
		ret = -NILFS_BTREE_ENOMEM;
		goto out_up_write;
	}
	if ((ret = nilfs_btree_lookup_last_path(btree, path, &lastkey, NULL, 0)) < 0) {
		if (ret == -NILFS_BTREE_ENOKEY)
			ret = 0;
		goto out_free_path;
	}
	while (key <= lastkey) {
		level = 0;
		while ((ret = nilfs_btree_delete_in_node(btree, path, level++)) == NILFS_BTREE_DELETE_DEMOTION);
		if (ret < 0)
			break;
		nilfs_btree_clear_path(btree, path);
		if ((ret = nilfs_btree_lookup_last_path(btree, path, &lastkey, NULL, 0)) < 0) {
			if (ret == -NILFS_BTREE_ENOKEY)
				ret = 0;
			break;
		}
	}

 out_free_path:
	nilfs_btree_free_path(btree, path);
 out_up_write:
	up_write(&btree->bt_sem);
	return ret;
}

/**
 * nilfs_btree_find_unused_key_in_range - find an unused key
 * @btree: B-tree
 * @minkey: minimum key
 * @maxkey: maxmum key
 * @startkey: start key
 * @keyp: 
 *
 * Description: nilfs_btree_find_unused_key_in_range() searches an unused key
 * in the B-tree @btree sequentially from @minkey to @maxkey, starting at
 * @startkey.
 *
 * Return Value: On success, 0 is returned and an unused key is stored in the
 * place pointed by @keyp. On error, one of the following negative error codes
 * is returned.
 *
 * %-NILFS_BTREE_EINVAL - @startkey is not within the range from @minkey to
 * @maxkey.
 *
 * %-NILFS_BTREE_ENOKEY - Every key from @minkey to @maxkey is used.
 *
 * %-NILFS_BTREE_ENOMEM - Insufficient amount of memory available.
 */
int nilfs_btree_find_unused_key_in_range(struct nilfs_btree *btree,
					 nilfs_btree_key_t minkey,
					 nilfs_btree_key_t maxkey,
					 nilfs_btree_key_t startkey,
					 nilfs_btree_key_t *keyp)
{
	struct buffer_head *bh;
	struct nilfs_btree_path *path;
	struct nilfs_btree_node *node;
	nilfs_btree_dkey_t *dkeys;
	nilfs_btree_key_t currkey, nextkey;
	int index, ret;

	if ((startkey < minkey) || (startkey > maxkey))
		return -NILFS_BTREE_EINVAL;
	if ((path = nilfs_btree_alloc_path(btree)) == NULL)
		return -NILFS_BTREE_ENOMEM;
	currkey = startkey;
	if ((ret = nilfs_btree_lookup_path(btree, path, currkey, NULL, 0)) < 0) {
		if (ret != -NILFS_BTREE_ENOKEY)
			goto out;
		if (keyp != NULL)
			*keyp = currkey;
		ret = 0;
		goto out;
	}

	nextkey = currkey;
	do {
		if (++currkey <= maxkey) {
			if ((ret = nilfs_btree_path_next(btree, path, 0)) == 0) {
				bh = path->bp_bh[0];
				node = NILFS_BTREE_BH_TO_NODE(bh);
				dkeys = NILFS_BTREE_NODE_DKEYS(node, btree->bt_blksize);
				index = path->bp_index[0];
				nextkey = le64_to_cpu(dkeys[index]);
			} else {
				if (ret != -NILFS_BTREE_ENOSIB)
					goto  out;
				ret = 0;
				break;
			}
		} else {
			/* wrap */
			currkey = minkey;
			if ((ret = nilfs_btree_lookup_path(btree, path, currkey, NULL, 0)) < 0) {
				if (ret != -NILFS_BTREE_ENOKEY)
					goto out;
				ret = 0;
				break;
			}
			nextkey = currkey;
		}
	} while ((currkey == nextkey) && (currkey != startkey));
  
	if (currkey == startkey) {
		ret = -NILFS_BTREE_ENOKEY;
		goto out;
	}
	if (keyp != NULL)
		*keyp = currkey;

 out:
	nilfs_btree_free_path(btree, path);
	return ret;
}

/**
 * nilfs_btree_find_unused_key - find an unused key
 * @btree: B-tree
 * @key: key
 *
 * Description:
 * Return Value:
 */
int nilfs_btree_find_unused_key(struct nilfs_btree *btree, unsigned long *key)
{
	nilfs_btree_key_t k;
	int ret;

	down_read(&btree->bt_sem);
	ret = NILFS_BTREE_BOP_FIND_UNUSED_KEY(btree, &k);
	if ((ret == 0) && (key != NULL))
		*key = (unsigned long)k;
	up_read(&btree->bt_sem);
	return ret;
}

/**
 * nilfs_btree_mark
 * @btree: B-tree
 * @key: key
 * @minlevel: level to which the marking operation is performed
 *
 * Description:
 * Return Value:
 */
static int nilfs_btree_mark(struct nilfs_btree *btree,
			    nilfs_btree_key_t key,
			    int minlevel)
{
	struct nilfs_btree_path *path;
	int i, ret;

	if ((path = nilfs_btree_alloc_path(btree)) == NULL)
		return -NILFS_BTREE_ENOMEM;
	if ((ret = nilfs_btree_lookup_path(btree, path, key, NULL, minlevel)) < 0)
		goto out;
	for (i = minlevel; i < path->bp_nlevels; i++) {
		if (!buffer_dirty(path->bp_bh[i]))
			NILFS_BTREE_BOP_MARK_NODE_PREPARE_DIRTY(btree, path->bp_bh[i]);
	}

 out:
	nilfs_btree_free_path(btree, path);
	return ret;
}

/**
 * nilfs_btree_mark_from_data
 * @btree: B-tree
 * @bh: buffer head
 *
 * Descripton: nilfs_btree_mark_from_data() marks nodes on the path from the
 * root to the data specified by @bh. These node blocks on the path must be
 * written back to disk.
 * 
 * Return Value: On success, 0 is returned. On error, one of the following
 * negative error codes is returned.
 *
 * %-NILFS_BTREE_EIO - I/O error.
 *
 * %-NILFS_BTREE_ENOMEM - Insufficient amount of memory available.
 */
int nilfs_btree_mark_from_data(struct nilfs_btree *btree,
			       struct buffer_head *bh)
{
	struct buffer_head *pbh;
	nilfs_btree_key_t key;
  
	for (pbh = page_buffers(bh->b_page), key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT - btree->bt_blkbits);
	     pbh != bh;
	     pbh = pbh->b_this_page, key++);
	return nilfs_btree_mark(btree, key, 0);
}

/**
 * nilfs_btree_mark_from_node
 * @btree: B-tree
 * @bh: buffer head
 *
 * Descripton: nilfs_btree_mark_from_data() marks nodes on the path from the
 * root to the node specified by @bh. These node blocks on the path must be
 * written back to disk.
 * 
 * Return Value: On success, 0 is returned. On error, one of the following
 * negative error codes is returned.
 *
 * %-NILFS_BTREE_EIO - I/O error.
 *
 * %-NILFS_BTREE_ENOMEM - Insufficient amount of memory available.
 */
int nilfs_btree_mark_from_node(struct nilfs_btree *btree,
			       struct buffer_head *bh)
{
	struct nilfs_btree_node *node;
	nilfs_btree_dkey_t *dkeys;
	int level;

	node = NILFS_BTREE_BH_TO_NODE(bh);
	level = le16_to_cpu(node->bn_level);
	dkeys = NILFS_BTREE_NODE_DKEYS(node, btree->bt_blksize);
	return nilfs_btree_mark(btree, le64_to_cpu(dkeys[0]), level);
}

static int nilfs_btree_update(struct nilfs_btree *btree,
			      nilfs_btree_key_t key,
			      int level,
			      dbn_t dbn)
{
	struct buffer_head *bh;
	struct nilfs_btree_path *path;
	struct nilfs_btree_node *node;
	nilfs_btree_dptr_t *dptrs;
	nilfs_btree_ptr_t ptr;
	int index, ret;

	if ((path = nilfs_btree_alloc_path(btree)) == NULL)
		return -NILFS_BTREE_ENOMEM;
	if ((ret = nilfs_btree_lookup_path(btree, path, key, NULL, level)) < 0)
		goto out;

	bh = path->bp_bh[level];
	lock_buffer(bh);
	node = NILFS_BTREE_BH_TO_NODE(bh);
	dptrs = NILFS_BTREE_NODE_DPTRS(node, btree->bt_blksize);
	index = path->bp_index[level];
	ptr = le64_to_cpu(dptrs[index]);
	if (NILFS_BTREE_PTR_IS_BH(ptr)) {
		if (level == 0)
			brelse(NILFS_BTREE_PTR_TO_BH(ptr));
		else
			NILFS_BTREE_BOP_PUT_NODE(btree, NILFS_BTREE_PTR_TO_BH(ptr));
	}
	dptrs[index] = cpu_to_le64((__u64)dbn);
	unlock_buffer(bh);

 out:
	nilfs_btree_free_path(btree, path);
	return ret;
}

/**
 * nilfs_btree_update_data_dbn
 * @btree: B-tree
 * @bh: buffer head
 * @dbn: disk block number
 *
 * Description:
 * Return Value:
 */
int nilfs_btree_update_data_dbn(struct nilfs_btree *btree,
				struct buffer_head *bh,
				dbn_t dbn)
{
	struct buffer_head *pbh;
	nilfs_btree_key_t key;

	for (pbh = page_buffers(bh->b_page), key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT - btree->bt_blkbits);
	     pbh != bh;
	     pbh = pbh->b_this_page, key++);
	return nilfs_btree_update(btree, key, 0, dbn);
}

/**
 * nilfs_btree_update_node_dbn
 * @btree: B-tree
 * @bh: buffer head
 * @dbn: disk block number
 *
 * Description:
 * Return Value:
 */
int nilfs_btree_update_node_dbn(struct nilfs_btree *btree,
				struct buffer_head *bh,
				dbn_t dbn)
{
	struct buffer_head *root;
	struct nilfs_btree_node *node;
	nilfs_btree_dkey_t *dkeys;
	int level;

	if ((root = nilfs_btree_get_node(btree, btree->bt_root)) == NULL)
		return -NILFS_BTREE_EIO;
	if (root == bh) {
		NILFS_BTREE_BOP_PUT_NODE(btree, root);
		btree->bt_root = NILFS_BTREE_DBN_TO_PTR(dbn);
		return 0;
	}

	nilfs_btree_put_node(btree, root);
	node = NILFS_BTREE_BH_TO_NODE(bh);
	level = le16_to_cpu(node->bn_level);
	dkeys = NILFS_BTREE_NODE_DKEYS(node, btree->bt_blksize);
	return nilfs_btree_update(btree, le64_to_cpu(dkeys[0]), level + 1, dbn);
}

struct nilfs_btree_bhlist {
	struct buffer_head *first;
	struct buffer_head **last;
};

inline static int nilfs_btree_buffer_dirty(struct buffer_head *bh)
{
	return buffer_dirty(bh) && !buffer_prepare_dirty(bh);
}

inline static int nilfs_btree_buffer_prepare_dirty(struct buffer_head *bh)
{
	return buffer_prepare_dirty(bh);
}

inline static int nilfs_btree_buffer_need_gather(struct buffer_head *bh,
						 int tag)
{
	switch (tag) {
	case PAGECACHE64_TAG_DIRTY:
		return nilfs_btree_buffer_dirty(bh);
	case PAGECACHE64_TAG_PREPARE_DIRTY:
		return nilfs_btree_buffer_prepare_dirty(bh);
	default:
		nilfs_debug(0, "Invalid PAGECACHE64_TAG: %d\n", tag);
		return 0;
	}
}

static void nilfs_btree_gather_page_buffer(struct nilfs_btree *btree,
					   struct nilfs_btree_bhlist *lists,
					   struct page *page,
					   int tag)
{
	struct buffer_head *bh;
	struct nilfs_btree_node *node;
	int level;

	if (nilfs_inactive_node_page(page))
		nilfs_clear_inactive_node_page(page);
	bh = page_buffers(page);
	do {
		if (nilfs_btree_buffer_need_gather(bh, tag)) {
			get_bh(bh);
			/* lock_buffer(bh); */
			node = NILFS_BTREE_BH_TO_NODE(bh);
			level = le16_to_cpu(node->bn_level);
			NILFS_BTREE_NODE_NEXT_BH(node) = NULL;
			*lists[level].last = bh;
			lists[level].last = &NILFS_BTREE_NODE_NEXT_BH(node);
			/* unlock_buffer(bh); */
		}
		bh = bh->b_this_page;
	} while (bh != page_buffers(page));
}

static void nilfs_btree_gather_page_tag(struct nilfs_btree *btree,
					struct nilfs_btree_bhlist *lists,
					int tag)
{
	struct page *pages[NILFS_BTREE_GANG_LOOKUP_SIZE];
	nilfs_btree_key_t index;
	int i, n;

	index = 0;
	n = radix_tree_64_gang_lookup_tag(btree->bt_rtree,
					  (void **)pages, index,
					  NILFS_BTREE_GANG_LOOKUP_SIZE,
					  tag);
	while (n > 0) {
		for (i = 0; i < n; i++) {
			nilfs_btree_gather_page_buffer(btree, lists,
						       pages[i], tag);
		}
		index = nilfs_node_page_index(pages[n - 1]) + 1;
		n = radix_tree_64_gang_lookup_tag(btree->bt_rtree,
						  (void **)pages, index,
						  NILFS_BTREE_GANG_LOOKUP_SIZE,
						  tag);
	}
}

/**
 * nilfs_btree_lookup_dirty_buffers_begin
 * @btree: B-tree
 *
 * Description:
 */
void nilfs_btree_lookup_dirty_buffers_begin(struct nilfs_btree *btree)
{
	struct buffer_head **last;
	struct nilfs_btree_bhlist lists[NILFS_BTREE_MAX_LEVELS];
	int i;

	for (i = 0; i < NILFS_BTREE_MAX_LEVELS; i++) {
		lists[i].first = NULL;
		lists[i].last = &lists[i].first;
	}

	spin_lock_irq(btree->bt_inactive_list_lock);
	spin_lock(btree->bt_rtree_lock);

	nilfs_btree_gather_page_tag(btree, lists, PAGECACHE64_TAG_DIRTY);
	nilfs_btree_gather_page_tag(btree, lists, PAGECACHE64_TAG_PREPARE_DIRTY);

	last = &btree->bt_dirty_buffers;
	for (i = 0; i < NILFS_BTREE_MAX_LEVELS; i++) {
		if (lists[i].first != NULL) {
			*last = lists[i].first;
			last = lists[i].last;
		}
	}

	spin_unlock(btree->bt_rtree_lock);
	spin_unlock_irq(btree->bt_inactive_list_lock);
}

/**
 * nilfs_btree_lookup_dirty_buffers - gather dirty buffers
 * @btree: B-tree
 * @bhs: array of pointers to buffer heads
 * @nitems: number of items
 *
 * Description: nilfs_btree_lookup_dirty_buffers() returns dirty buffers.
 *
 * Return Value:
 */
unsigned int nilfs_btree_lookup_dirty_buffers(struct nilfs_btree *btree,
					      struct buffer_head **bhs,
					      unsigned int nitems)
{
	struct buffer_head *bh;
	struct nilfs_btree_node *node;
	unsigned int n;

	n = 0;
	while ((nitems > n) && ((bh = btree->bt_dirty_buffers) != NULL)) {
		lock_buffer(bh);
		node = NILFS_BTREE_BH_TO_NODE(bh);
		btree->bt_dirty_buffers = NILFS_BTREE_NODE_NEXT_BH(node);
		NILFS_BTREE_NODE_NEXT_BH(node) = NULL;
		bhs[n++] = bh;
		unlock_buffer(bh);
	}
	return n;
}

/**
 * nilfs_btree_lookup_dirty_buffers_end
 * @btree: B-tree
 *
 * Description:
 */
void nilfs_btree_lookup_dirty_buffers_end(struct nilfs_btree *btree)
{
	struct buffer_head *bh;
	struct nilfs_btree_node *node;

	while ((bh = btree->bt_dirty_buffers) != NULL) {
		lock_buffer(bh);
		node = NILFS_BTREE_BH_TO_NODE(bh);
		btree->bt_dirty_buffers = NILFS_BTREE_NODE_NEXT_BH(node);
		NILFS_BTREE_NODE_NEXT_BH(node) = NULL;
		unlock_buffer(bh);
		brelse(bh);
	}
}

/**
 * nilfs_btree_part_of - examine whether a node block is a part of a B-tree
 * @btree: B-tree
 * @bh: buffer head
 *
 * Description: nilfs_btree_part_of() examines whether @bh is one of the node
 * in @btree. @bh must be a pointer to a buffer head which data is a B-tree
 * node block.
 *
 * Return Value: On success, 1 is returned if @bh is a node of @btree, or 0 if
 * not. On error, one of the following negative error codes is returned.
 *
 * %-NILFS_BTREE_EIO - I/O error.
 *
 * %-NILFS_BTREE_ENOMEM - Insufficient amount of memory available.
 */
int nilfs_btree_part_of(struct nilfs_btree *btree, struct buffer_head *bh)
{
	struct nilfs_btree_path *path;
	struct nilfs_btree_node *node;
	nilfs_btree_dkey_t *dkeys;
	nilfs_btree_key_t key;
	int level, ret;

	down_read(&btree->bt_sem);
	node = NILFS_BTREE_BH_TO_NODE(bh);
	level = le16_to_cpu(node->bn_level);
	dkeys = NILFS_BTREE_NODE_DKEYS(node, btree->bt_blksize);
	key = le64_to_cpu(dkeys[0]);

	if ((path = nilfs_btree_alloc_path(btree)) == NULL) {
		ret = -NILFS_BTREE_ENOMEM;
		goto out;
	}
	if ((ret = nilfs_btree_lookup_path(btree, path, key, NULL, level)) == 0)
		ret = (path->bp_bh[level]->b_blocknr == bh->b_blocknr);

	nilfs_btree_free_path(btree, path);
 out:
	up_read(&btree->bt_sem);
	return ret;
}

/* Local Variables:	*/
/* eval: (c-set-style "linux")	*/
/* End:			*/
