/*
 * nasd_free.c
 *
 * Management of tracking what blocks are free.
 *
 * Author: Jim Zelenka
 */
/*
 * Copyright (c) of Carnegie Mellon University, 1997,1998,1999.
 *
 * Permission to reproduce, use, and prepare derivative works of
 * this software for internal use is granted provided the copyright
 * and "No Warranty" statements are included with all reproductions
 * and derivative works. This software may also be redistributed
 * without charge provided that the copyright and "No Warranty"
 * statements are included in all redistributions.
 *
 * NO WARRANTY. THIS SOFTWARE IS FURNISHED ON AN "AS IS" BASIS.
 * CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER
 * EXPRESSED OR IMPLIED AS TO THE MATTER INCLUDING, BUT NOT LIMITED
 * TO: WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY
 * OF RESULTS OR RESULTS OBTAINED FROM USE OF THIS SOFTWARE. CARNEGIE
 * MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT
 * TO FREEDOM FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
 */


#include <nasd/nasd_options.h>
#include <nasd/nasd_drive_options.h>
#include <nasd/nasd_types.h>
#include <nasd/nasd_freelist.h>
#include <nasd/nasd_threadstuff.h>
#include <nasd/nasd_itypes.h>
#include <nasd/nasd_mem.h>
#include <nasd/nasd_cache.h>
#include <nasd/nasd_common.h>

nasd_odc_exlist_t nasd_odc_unallocated_extents;

nasd_freelist_t *nasd_free_exlist;
#define NASD_MAX_FREE_EXLIST  32
#define NASD_EXLIST_INC        8
#define NASD_EXLIST_INITIAL   12

nasd_freelist_t *nasd_free_exle;
#define NASD_MAX_FREE_EXENT 2048
#define NASD_EXENT_INC        64
#define NASD_EXENT_INITIAL  1856

#define NASD_EXLE_FREEGET(_ee_) \
  NASD_FREELIST_GET(nasd_free_exle,_ee_,next,(nasd_odc_exlist_ent_t *))

#define NASD_EXLE_FREEPUT(_ee_) \
  NASD_FREELIST_FREE(nasd_free_exle,_ee_,next)

#define NASD_DEQ_EXL_ENT(_ent_) { \
  (_ent_)->next->prev = (_ent_)->prev; \
  (_ent_)->prev->next = (_ent_)->next; \
  (_ent_)->next = (_ent_)->prev = NULL; \
}

/* see nasd_drive_tunable.c */
extern int nasd_drive_unallocated_hash_chunk;

#define FIXUP_HASH_INS(_exl_,_ent_) { \
  nasd_blkno_t _hcf, _hcl, _prevno, _hcp; \
  int _i; \
  NASD_ASSERT((_ent_) != NULL); \
  if ((_exl_)->hashlen) { \
    if ((_ent_)->prev != &(_exl_)->head) \
      _prevno = (_ent_)->prev->range.last; \
    else \
      _prevno = 0; \
    _hcp = _prevno / (_exl_)->hashdiv; \
    _hcf = (_ent_)->range.first / (_exl_)->hashdiv; \
    _hcl = (_ent_)->range.last / (_exl_)->hashdiv; \
    for(_i=_hcp;_i<=_hcl;_i++) { \
      if (((_exl_)->hashhints[_i] == &(_exl_)->head) || \
        (_exl_)->hashhints[_i]->range.first > (_ent_)->range.first) \
      { \
        (_exl_)->hashhints[_i] = (_ent_); \
      } \
      else { \
        if ((_exl_)->hashhints[_i] == (_ent_)) \
          break; \
        NASD_ASSERT(_i <= _hcf); \
      } \
    } \
  } \
}

#define FIXUP_HASH_DEL(_exl_,_ent_) { \
  nasd_blkno_t _hcf, _hcl, _prevno, _hcp; \
  nasd_odc_exlist_ent_t *_nextent; \
  int _i; \
  if ((_exl_)->hashlen) { \
    if ((_ent_)->prev != &(_exl_)->head) \
      _prevno = (_ent_)->prev->range.last; \
    else \
      _prevno = 0; \
    _hcp = _prevno / (_exl_)->hashdiv; \
    _hcf = (_ent_)->range.first / (_exl_)->hashdiv; \
    _hcl = (_ent_)->range.last / (_exl_)->hashdiv; \
    _nextent = (_ent_)->next; \
    NASD_ASSERT(_nextent != NULL); \
    for(_i=_hcp;_i<=_hcl;_i++) { \
      if ((_exl_)->hashhints[_i] == (_ent_)) { \
        (_exl_)->hashhints[_i] = _nextent; \
      } \
      else { \
        if ((_exl_)->hashhints[_i] == _nextent) \
          break; \
        NASD_ASSERT(_i <= _hcf); \
      } \
    } \
  } \
}

void
nasd_odc_free_verify_hashing(
  nasd_odc_exlist_t  *exl)
{
  nasd_odc_exlist_ent_t *hint, *hintprev, *ee, *e;
  int i;

  if (exl->hashlen == 0)
    return;

  /*
   * Now, run through the list updating hash buckets.
   */
  for(e=exl->head.next;e!=&exl->head;e=e->next) {
    NASD_ASSERT(e->range.last >= e->range.first);
    NASD_ASSERT(e->range.last >= e->range.first);
    NASD_ASSERT(e->next->prev == e);
    NASD_ASSERT(e->prev->next == e);
  }
  NASD_ASSERT(e->next->prev == e);
  NASD_ASSERT(e->prev->next == e);

  if (exl->head.next == &exl->head) {
    for(i=0;i<exl->hashlen;i++) {
      NASD_ASSERT(exl->hashhints[i] == &exl->head);
    }
  }
  else {
    for(i=0;i<exl->hashlen;i++) {
      hint = exl->hashhints[i];
      hintprev = hint->prev;
      for(ee=exl->head.next;ee!=&exl->head;ee=ee->next) {
        if (ee == hint)
          break;
      }
      NASD_ASSERT(ee == hint);
      NASD_ASSERT(hint->next->prev == hint);
      NASD_ASSERT(hint->prev->next == hint);
      NASD_ASSERT(hint->range.last >= hint->range.first);
      if (hint == &exl->head) {
        NASD_ASSERT(exl->head.prev->range.last < (i*exl->hashdiv));
      }
      else {
        NASD_ASSERT(hintprev->range.last < hint->range.first);
        if (hintprev == &exl->head) {
          NASD_ASSERT(exl->head.next == hint);
        }
        else {
          NASD_ASSERT(hintprev->range.last < (i*exl->hashdiv));
        }
        NASD_ASSERT(hint->range.last >= (i*exl->hashdiv));
      }
    }
  }

}

/*
 * Return a list of blocks to a freelist. Deallocate
 * (or reuse) the list elements, coalesce with elements
 * already in list.
 *
 * This nolock version assumes that the list lock
 * is held by the caller.
 */
nasd_status_t
nasd_odc_free_release_blocks_nolock(
  nasd_odc_exlist_t      *exl,
  nasd_odc_exlist_ent_t  *release,
  nasd_blkcnt_t          *release_cnt_p)
{
  nasd_odc_exlist_ent_t *ent, *next, *e, *p, *lst, *fe;
  nasd_blkcnt_t cnt, delt;
  nasd_blkno_t n, hb;
  int insert;

  cnt = 0;
  if (exl->head.next == &exl->head) {
    /*
     * List is empty. Make our list be the list.
     * (Must fixup next/prev pointers and sort
     * the list.)
     */
    /* cheezy merge sort */
    for(lst=NULL,ent=release;ent;ent=next) {
      NASD_ASSERT(ent->range.first <= ent->range.last);
      next = ent->next;
      if (lst == NULL) {
        lst = ent;
        ent->next = NULL;
        continue;
      }
      NASD_ASSERT(ent->range.last != lst->range.first);
      if (ent->range.last < lst->range.first) {
        ent->next = lst;
        lst = ent;
        continue;
      }
      for(p=lst,e=lst->next;e;p=e,e=e->next) {
        NASD_ASSERT(ent->range.first != e->range.last);
        NASD_ASSERT(ent->range.last != e->range.first);
        NASD_ASSERT(ent->range.first != p->range.last);
        NASD_ASSERT(ent->range.last != p->range.first);
        if ((ent->range.first > p->range.last)
          && (ent->range.last < e->range.first))
        {
          /* insert after p and before e */
          NASD_ASSERT(p->next == e);
          p->next = ent;
          ent->next = e;
          ent = NULL;
          break;
        }
      }
      if (ent) {
        /* could not insert anywhere in list, append */
        NASD_ASSERT(e == NULL);
        NASD_ASSERT(p->next == NULL);
        NASD_ASSERT(ent->range.first > p->range.last);
        p->next = ent;
        ent->next = NULL;
      }
    }
    /* fixup next/prev */
    for(p=NULL,ent=lst;ent;p=ent,ent=ent->next) {
      ent->prev = p;
      delt = ent->range.last - ent->range.first + 1;
      cnt += delt;
    }
    /* p now is last element */
    lst->prev = &exl->head;
    p->next = &exl->head;
    exl->head.next = lst;
    exl->head.prev = p;
    exl->num = cnt;
    if (exl->hashlen) {
      for(ent=lst;ent;ent=ent->next) {
        FIXUP_HASH_INS(exl, ent);
      }
    }
  }
  else {
    for(ent=release;ent;ent=next) {
      next = ent->next;
      NASD_ASSERT(ent->range.first <= ent->range.last);
      /*
       * Search list for a chunk contig to ent.
       * If we get to an element whose first block
       * is greater than our last, then we insert
       * before it.
       *
       * !!! this may be optimizable by assuming that the
       * incoming list is mostly ordered.
       */
      insert = 1;
      if (exl->hashlen) {
        NASD_ASSERT(ent->range.last <= exl->maxval);
        hb = (ent->range.first-1) / exl->hashdiv;
        fe = exl->hashhints[hb];
      }
      else {
        fe = exl->head.next;
      }
      for(e=fe;e!=&exl->head;e=e->next) {
        n = ent->range.last + 1;
        if (n == e->range.first) {
          /* prepend to e */
          e->range.first = ent->range.first;
          insert = 0;
          FIXUP_HASH_INS(exl,e);
          break;
        }
        n = e->range.last + 1;
        if (n == ent->range.first) {
          /* append to e */
          e->range.last = ent->range.last;
          insert = 0;
          FIXUP_HASH_INS(exl,e);
          break;
        }
        if (e->range.last > ent->range.last) {
          NASD_ASSERT(e->range.first > ent->range.last);
          break;
        }
      }
      delt = ent->range.last - ent->range.first + 1;
      cnt += delt;
      exl->num += delt;
      if (insert) {
        /* insert ent before e */
        ent->next = e;
        ent->prev = e->prev;
        ent->next->prev = ent;
        ent->prev->next = ent;
        FIXUP_HASH_INS(exl, ent);
      }
      else {
        /* merged range with an existing chunk, free ent */
        NASD_EXLE_FREEPUT(ent);
      }
    }
  }

  *release_cnt_p = cnt;
  return(NASD_SUCCESS);
}

/*
 * Return a list of blocks to a freelist. Deallocate
 * (or reuse) the list elements, coalesce with elements
 * already in list.
 */
nasd_status_t
nasd_odc_free_release_blocks_to_list(
  nasd_odc_exlist_t      *exl,
  nasd_odc_exlist_ent_t  *release,
  nasd_blkcnt_t          *release_cnt_p)
{
  nasd_status_t rc;

  NASD_LOCK_MUTEX(exl->lock);
  rc = nasd_odc_free_release_blocks_nolock(exl, release, release_cnt_p);
  NASD_UNLOCK_MUTEX(exl->lock);

  return(rc);
}

/*
 * Take a chunk of count blocks starting with first
 * from entry ent in list exl. Split up, truncate,
 * or remove ent as necessary.
 */
nasd_status_t
nasd_odc_exlist_take_chunk(
  nasd_odc_exlist_t       *exl,
  nasd_odc_exlist_ent_t   *ent,
  nasd_blkno_t             in_first,
  nasd_blkno_t             bound_last,
  nasd_blkcnt_t            nblocks,
  nasd_odc_exlist_ent_t  **ep,
  nasd_blkcnt_t           *gp)
{
  nasd_blkcnt_t len, maxtake, cblocks;
  nasd_odc_exlist_ent_t *e, *newent;
  nasd_blkno_t first, last, newlast;
  int nfr, fr, olr, nlr, i;

  *ep = NULL;
  *gp = 0;

  first = NASD_MAX(ent->range.first, in_first);
  last = NASD_MIN((first + nblocks) - 1, ent->range.last);
  if (bound_last && (last > bound_last))
    last = bound_last;
  cblocks = ent->range.last - ent->range.first + 1;
  if ((first > ent->range.last) || (last < ent->range.first)) {
    /* no possible allocation */
    return(NASD_FAIL);
  }
  len = (last - first) + 1;
  maxtake = NASD_MIN(len, cblocks);
  NASD_ASSERT(maxtake <= nblocks);

  /*
   * Check no allocation case first: we are allocating
   * exactly ent
   */
  NASD_ASSERT(maxtake <= cblocks);
  if ((ent->range.first == first) && (maxtake == cblocks)) {
    /* use whole chunk */
    FIXUP_HASH_DEL(exl, ent);
    NASD_DEQ_EXL_ENT(ent);
    *ep = ent;
    *gp = maxtake;
    exl->num -= maxtake;
    return(NASD_SUCCESS);
  }
  NASD_EXLE_FREEGET(e);
  if (e == NULL)
    return(NASD_NO_MEM);
  e->next = e->prev = NULL;
  if (ent->range.first == first) {
    /*
     * Swipe from beginning
     * No need to update hash hints here- they're
     * still valid, since nothing has appeared in
     * the list.
     */
    e->range.first = ent->range.first;
    e->range.last = e->range.first + (maxtake - 1);
    ent->range.first = e->range.last + 1;
    *ep = e;
    *gp = maxtake;
    exl->num -= maxtake;
    return(NASD_SUCCESS);
  }
  if (ent->range.last == last) {
    /*
     * Swipe from end.
     * If we're hashing, and the ent spans multiple regions,
     * rehash the truncated regions to ent's successor.
     */
    newlast = first - 1;
    if (exl->hashlen) {
      fr = ent->range.first / exl->hashdiv;
      olr = ent->range.last / exl->hashdiv;
      if (fr != olr) {
        nlr = newlast / exl->hashdiv;
        for(i=nlr+1;i<=olr;i++) {
          NASD_ASSERT(exl->hashhints[i] == ent);
          NASD_ASSERT(ent->next != NULL);
          exl->hashhints[i] = ent->next;
        }
      }
    }
    e->range.first = first;
    e->range.last = last;
    ent->range.last = newlast;
    NASD_ASSERT(e->range.first <= e->range.last);
    NASD_ASSERT(ent->range.first <= ent->range.last);
    *ep = e;
    *gp = maxtake;
    exl->num -= maxtake;
    return(NASD_SUCCESS);
  }
  /*
   * Split ent- ent will be fragmented into three parts, the
   * first ent (with a new range.last), the second e, the
   * third newent (with range.last == ent's old range.last)
   * If we're hashing, and ent covers multiple regions, then
   * rehash middle regions to point to newent.
   */
  NASD_EXLE_FREEGET(newent);
  if (newent == NULL) {
    /* can't split old element, give up our new one and fail */
    NASD_EXLE_FREEPUT(e);
    return(NASD_NO_MEM);
  }
  newlast = first - 1;
  if (exl->hashlen) {
      nlr = newlast / exl->hashdiv;
      olr = ent->range.last / exl->hashdiv;
      if (nlr != olr) {
        nfr = (first + 1) / exl->hashdiv;
        NASD_ASSERT(newent != NULL);
        if (nfr != nlr) {
          NASD_ASSERT(exl->hashhints[nfr] == ent);
          exl->hashhints[nfr] = newent;
        }
        for(i=nfr+1;i<=olr;i++) {
          NASD_ASSERT(exl->hashhints[i] == ent);
          exl->hashhints[i] = newent;
        }
      }
  }
  e->range.first = first;
  e->range.last = last;
  newent->next = ent->next;
  newent->prev = ent;
  newent->next->prev = newent;
  newent->prev->next = newent;
  newent->range.first = e->range.last + 1;
  newent->range.last = ent->range.last;
  ent->range.last = newlast;

  NASD_ASSERT(ent->range.first <= ent->range.last);
  NASD_ASSERT(e->range.first <= e->range.last);
  NASD_ASSERT(newent->range.first <= newent->range.last);

  *ep = e;
  *gp = maxtake;
  exl->num -= maxtake;

  return(NASD_SUCCESS);
}

/*
 * Get blocks in a freelist. Start looking at
 * block "first", wrap around as necessary.
 * Note that the returned list will not be sorted
 * absolutely, but rather sorted such that the first
 * block is the first one occurring after "first"
 * (if any), then sweeping through the end of the
 * partition, then starting again at the beginning.
 * It is this allocation mechanism which really causes
 * sequential chunks of NASD objects to be laid out
 * in this manner. If partial_single_range is set,
 * stop allocating blocks after we have a single range
 * of blocks, even if the number of blocks in that range
 * is less than nblocks (we still ensure that the partition
 * can hold nblocks). This latter mechanism is
 * mostly used for preallocation.
 */
nasd_status_t
nasd_odc_exlist_get_blocks(
  nasd_odc_exlist_t       *exl,
  nasd_blkcnt_t            nblocks,
  nasd_blkno_t             first,
  nasd_blkno_t             last,
  int                      partial_single_range,
  nasd_odc_exlist_ent_t  **exlp,
  nasd_blkcnt_t           *blocks_allocated_p)
{
  nasd_odc_exlist_ent_t *newfirst, *newlast, *ent, *next, *e, *fe;
  nasd_blkcnt_t got, got_this, got_first, got_second, rel_cnt;
  nasd_status_t rc;
  nasd_blkno_t hb;

  if (last) {
    NASD_ASSERT(first <= last);
  }

  newfirst = newlast = NULL;
  rc = NASD_SUCCESS;
  got = got_first = got_second = 0;


  NASD_LOCK_MUTEX(exl->lock);
  if (exl->num < nblocks) {
    rc = NASD_NO_SPACE;
  }
  else {
    /*
     * First pass: start looking for first block,
     * begin allocating blocks there (or thereafter)
     */
    if (exl->hashlen) {
      NASD_ASSERT(first <= exl->maxval);
      hb = first / exl->hashdiv;
      fe = exl->hashhints[hb];
    }
    else {
      fe = exl->head.next;
    }
    if (fe != &exl->head) {
      for(ent=fe;((ent!=&exl->head)&&(got<nblocks));ent=next) {
        next = ent->next;
        /*
         * Exclude ranges which are entirely before the range we
         * care about.
         */
        if (ent->range.last < first)
          continue;
        /*
         * Are we a range that the first block is in the middle of?
         */
        if (ent->range.first <= first) {
          rc = nasd_odc_exlist_take_chunk(exl, ent, first, last, nblocks - got,
            &e, &got_this);
          if (rc)
            break;
          got += got_this;
          got_first += got_this;
          if (newlast) {
            NASD_ASSERT(partial_single_range == 0);
            newlast->next = e;
            newlast = e;
          }
          else {
            newfirst = newlast = e;
            e->next = NULL;
          }
          if (partial_single_range || (got == nblocks)) {
            goto done_alloc;
          }
        }
        if ((last == 0) || (ent->range.first <= last)) {
          /*
           * Range has blocks valid for consideration.
           */
          rc = nasd_odc_exlist_take_chunk(exl, ent, first, last, nblocks - got,
            &e, &got_this);
          if (rc)
            break;
          got += got_this;
          got_first += got_this;
          if (newlast) {
            NASD_ASSERT(partial_single_range == 0);
            newlast->next = e;
            newlast = e;
          }
          else {
            newfirst = newlast = e;
            e->next = NULL;
          }
          if (partial_single_range || (got == nblocks)) {
            goto done_alloc;
          }
        }
        /*
         * This range is entirely after the range we care
         * about (if we get to one that's entirely after,
         * since we're a sorted list, all the remaining
         * ranges will be as well, so we bail from this loop).
         */
        if (last && (ent->range.first > last))
          break;
     }
    }
    if (last)
      goto done_alloc;
    /*
     * Second pass: come back around, taking what we can get from
     * beginning. We don't worry about covering old ground, since
     * if there were blocks there, we would have already taken
     * them, or we don't need them, in which case we never really
     * enter this loop.
     */
    for(ent=exl->head.next;((ent!=&exl->head)&&(got<nblocks));ent=next) {
      NASD_ASSERT(ent->range.first <= ent->range.last);
      next = ent->next;
      rc = nasd_odc_exlist_take_chunk(exl, ent, ent->range.first, 0,
        nblocks-got, &e, &got_this);
      got += got_this;
      got_second += got_this;
      if (newlast) {
        NASD_ASSERT(partial_single_range == 0);
        newlast->next = e;
        newlast = e;
      }
      else {
        newfirst = newlast = e;
        e->next = NULL;
      }
      if (partial_single_range || (got == nblocks))
        goto done_alloc;
    }
    if (got < nblocks) {
      /*
       * ??? How could this happen? It suggests that exl->num is incorrect.
       */
      nasd_printf("WARNING: reached %s:%d, exl->num=%d nblocks=%d got=%d\n",
        __FILE__, __LINE__, exl->num, nblocks, got);
      nasd_printf("got_first=%d got_second=%d\n", got_first, got_second);
      nasd_printf("BEFORE release\n");
      nasd_odc_free_dump();
      nasd_odc_free_release_blocks_nolock(exl, newfirst, &rel_cnt);
      nasd_printf("AFTER release (rel_cnt=%lu)\n", (unsigned long)rel_cnt);
      nasd_odc_free_dump();
      newfirst = newlast = NULL;
      rc = NASD_NO_SPACE;
    }
  }
done_alloc:
  NASD_UNLOCK_MUTEX(exl->lock);

  *exlp = newfirst;
  *blocks_allocated_p = got;

  return(rc);
}

/*
 * Get a single block from an extent list. Start looking
 * at the requested block number, roll forward, then
 * restart from the beginning.
 */
nasd_status_t
nasd_odc_exlist_get_oneblock(
  nasd_odc_exlist_t  *exl,
  nasd_blkno_t        first,
  nasd_blkno_t       *blkp)
{
  nasd_odc_exlist_ent_t *ent, *next, *e, *fe;
  int i, olr, nfr, nlr;
  nasd_status_t rc;
  nasd_blkno_t hb;

  rc = NASD_SUCCESS;

  NASD_LOCK_MUTEX(exl->lock);

  if (exl->num == 0) {
    rc = NASD_NO_SPACE;
    goto done;
  }
  else {
    /*
     * First pass: start looking for first block, take
     * the next one we find.
     */
    if (exl->hashlen) {
      NASD_ASSERT(first <= exl->maxval);
      hb = first / exl->hashdiv;
      fe = exl->hashhints[hb];
    }
    else {
      fe = exl->head.next;
    }
    for(ent=fe;ent!=&exl->head;ent=next) {
      next = ent->next;
      if (ent->range.last < first)
        continue;
      if (ent->range.first >= first) {
        /*
         * This component of the extent list begins
         * with the block we want. If possible, just
         * trim it from the extent component. If this
         * is the entire extent component, delete it.
         */
        *blkp = ent->range.first;
        if (ent->range.last > ent->range.first) {
          /* simple trim, no need to fixup hash */
          ent->range.first++;
        }
        else {
          /* snarf entire component */
          NASD_ASSERT(ent->range.first == ent->range.last);
          FIXUP_HASH_DEL(exl,ent);
          NASD_DEQ_EXL_ENT(ent);
          NASD_EXLE_FREEPUT(ent);
        }
        exl->num--;
        goto done;
      }
      if (ent->range.last == first) {
        /*
         * We want the last block in this chunk, and the
         * first block of the chunk is before here (thus,
         * chunk has more than one block). Grab it and run.
         */
        NASD_ASSERT(ent->range.first < first);
        if (exl->hashlen) {
          if ((ent->range.last % exl->hashdiv) == 0) {
            /* falling out of hash region, update */
            NASD_ASSERT(ent->next != NULL);
            exl->hashhints[ent->range.last / exl->hashdiv] = ent->next;
          }
        }
        *blkp = ent->range.last;
        ent->range.last--;
        exl->num--;
        goto done;
      }
      /*
       * The first block we want is in the middle of this
       * range. Break it up.
       */
      NASD_EXLE_FREEGET(e);
      if (e == NULL) {
        rc = NASD_NO_MEM;
        goto done;
      }
      NASD_ASSERT(ent->range.first < first);
      NASD_ASSERT(ent->range.last > first);
      if (exl->hashlen) {
        NASD_ASSERT(first <= exl->maxval);
        olr = (first - 1) / exl->hashdiv;
        nfr = (first + 1) / exl->hashdiv;
        nlr = ent->range.last / exl->hashdiv;
        if (olr != nlr) {
          for(i=nfr;i<=nlr;i++) {
            NASD_ASSERT(e != NULL);
            exl->hashhints[i] = e;
          }
        }
      }
      e->next = ent->next;
      e->prev = ent;
      e->prev->next = e;
      e->next->prev = e;
      e->range.last = ent->range.last;
      ent->range.last = first - 1;
      e->range.first = first + 1;
      exl->num--;
      *blkp = first;
      goto done;
    }
    /*
     * Second pass: grab first thing we can find.
     */
    ent = exl->head.next;
    NASD_ASSERT(ent != &exl->head);
    *blkp = ent->range.first;
    exl->num--;
    if (ent->range.last > ent->range.first) {
      /* no need to update hash */
      ent->range.first++;
    }
    else {
      NASD_ASSERT(ent->range.first == ent->range.last);
      FIXUP_HASH_DEL(exl, ent);
      NASD_DEQ_EXL_ENT(ent);
      NASD_EXLE_FREEPUT(ent);
    }
    goto done;
  }

  /* NOTREACHED */
  rc = NASD_FAIL; /* should never get here */

done:
  NASD_UNLOCK_MUTEX(exl->lock);
  return(rc);
}

/*
 * Get contiguous range of blocks in freelist.
 *
 * Begin with block first, finding the first range
 * that can contiguously grant our request. If we get
 * to the end without finding one, start at the beginning,
 * looking for *smallest* contiguous range that can hold
 * our desired number of blocks. Give up if there is
 * no contiguous range.
 */
nasd_status_t
nasd_odc_exlist_get_contig(
  nasd_odc_exlist_t       *exl,
  nasd_blkno_t             first,
  nasd_blkcnt_t            nblocks,
  nasd_odc_exlist_ent_t  **exlp)
{
  nasd_odc_exlist_ent_t *ent, *min_ent, *tmp, *fe;
  nasd_blkcnt_t cnt, min_cnt;
  nasd_status_t rc;
  nasd_blkno_t hb;

  *exlp = NULL;
  min_ent = NULL;
  min_cnt = 0;

  if (nblocks < 1) {
    return(NASD_BAD_LEN);
  }

  NASD_LOCK_MUTEX(exl->lock);

  /*
   * Scan forward until we find block first. Go with first
   * range after that.
   */
  if (exl->hashlen) {
    NASD_ASSERT(first <= exl->maxval);
    hb = first / exl->hashdiv;
    fe = exl->hashhints[hb];
  }
  else {
    fe = exl->head.next;
  }
  for(ent=fe;ent!=&exl->head;ent=ent->next) {
    if (ent->range.last < first)
      continue;
    cnt = (ent->range.last - ent->range.first) + 1;
    if (cnt < nblocks)
      continue;
    /*
     * The extent we're looking at is big enough, and has
     * blocks that are after the first block.
     */
    if (ent->range.first < first) {
      /*
       * The extent straddles our first block. Can we go
       * from first to last and satisfy our needs?
       */
      cnt = (ent->range.last - first) + 1;
      if (cnt < nblocks)
        continue;
      /*
       * We can satisfy our needs by grabbing blocks starting
       * with the first one. Fragment this ent in the existing
       * list so we start on the correct boundary, the code
       * below will perform the correct extraction.
       */
      NASD_EXLE_FREEGET(tmp);
      if (tmp == NULL)
        break;
      tmp->range.first = first;
      tmp->range.last = ent->range.last;
      ent->range.last = first - 1;
      tmp->prev = ent;
      tmp->next = ent->next;
      tmp->prev->next = tmp;
      tmp->next->prev = tmp;
      min_ent = tmp;
      min_cnt = cnt;
      FIXUP_HASH_INS(exl, tmp);
      break;
    }

    /*
     * This is the first region which is big enough to hold
     * us, and it starts after our first desired block.
     */
    min_ent = ent;
    min_cnt = cnt;
    break;

  }

  if (min_ent == NULL) {
    /*
     * Didn't find anything. Restart at the beginning, taking the
     * _smallest_ range which satisfies our needs.
     */
    for(ent=exl->head.next;ent!=&exl->head;ent=ent->next) {
      cnt = (ent->range.last - ent->range.first) + 1;
      if ((cnt >= nblocks) && ((min_ent == NULL) || (cnt < min_cnt))) {
        min_ent = ent;
        min_cnt = cnt;
      }
    }
  }

  if (min_ent) {
    cnt = min_cnt;
    ent = min_ent;
    /*
     * ent, cnt represent smallest range containing nblocks,
     * and its size
     */
    if (cnt == nblocks) {
      /* just pull out this entry and return it */
      NASD_DEQ_EXL_ENT(ent);
      FIXUP_HASH_DEL(exl, ent);
      *exlp = ent;
    }
    else {
      NASD_EXLE_FREEGET(*exlp);
      if (*exlp) {
        /* no need to modify hash hints here */
        (*exlp)->next = (*exlp)->prev = NULL;
        (*exlp)->range.first = ent->range.first;
        ent->range.first = (*exlp)->range.first + nblocks;
        (*exlp)->range.last = ent->range.first - 1;
      }
    }
  }

  if (*exlp) {
    rc = NASD_SUCCESS;
    exl->num -= nblocks;
  }
  else { 
    rc = NASD_NO_SPACE;
  }

  NASD_UNLOCK_MUTEX(exl->lock);

  return(rc);
}

/*
 * Release a single block in a list
 */
nasd_status_t
nasd_odc_exlist_release_oneblock(
  nasd_odc_exlist_t  *exl,
  nasd_blkno_t        blknum)
{
  nasd_odc_exlist_ent_t *e, *n, *fe;
  nasd_status_t rc;
  nasd_blkno_t hb;
  int nfr;

  rc = NASD_SUCCESS;

  NASD_LOCK_MUTEX(exl->lock);
  /* search free ranges, see if we can append */
  if (exl->hashlen) {
    NASD_ASSERT(blknum <= exl->maxval);
    hb = (blknum-1) / exl->hashdiv;
    fe = exl->hashhints[hb];
  }
  else {
    fe = exl->head.next;
  }
  for(e=fe;e!=&exl->head;e=e->next) {
    if ((e->range.first <= blknum) && (e->range.last >= blknum)) {
      /* uh-oh, already in list */
      NASD_PANIC();
    }
    if ((e->range.last+1) == blknum) {
      /*
       * Found! We can just append to an existing chunk.
       * Update the hash table if we break into a new hash
       * chunk.
       */
      e->range.last = blknum;
      exl->num++;
      if (exl->hashdiv) {
        if ((blknum % exl->hashdiv) == 0) {
          nfr = blknum / exl->hashdiv;
          NASD_ASSERT(e != NULL);
          exl->hashhints[nfr] = e;
        }
      }
      e = NULL;
      break;
    }
    if ((e->range.first) == (blknum+1)) {
      /*
       * Found! We can just prepend to an existing chunk.
       * No need to update hash table here.
       */
      e->range.first = blknum;
      e = NULL;
      exl->num++;
      break;
    }
    if (e->range.first > blknum) {
      /* need to insert a new entry before e */
      break;
    }
  }
  if (e) {
    /* must cons a new entry, insert before e */
    NASD_EXLE_FREEGET(n);
    if (n) {
      n->range.first = n->range.last = blknum;
      n->next = e;
      n->prev = e->prev;
      n->prev->next = n;
      n->next->prev = n;
      FIXUP_HASH_INS(exl, n);
      exl->num++;
    }
    else {
      /* bad day! the ref engine will almost certainly panic here */
      rc = NASD_NO_MEM;
    }
  }
  NASD_UNLOCK_MUTEX(exl->lock);

  return(rc);
}

void
nasd_odc_shutdown_exlist(
  void  *arg)
{
  nasd_odc_exlist_ent_t *e;
  nasd_odc_exlist_t *exl;
  nasd_status_t rc;

  exl = (nasd_odc_exlist_t *)arg;

  /* if list is nonempty, free contents */
  if (exl->head.next != &exl->head) {
    /* free list contents */
    e = exl->head.next;
    exl->head.prev->next = NULL;
    exl->head.next->prev = NULL;
    nasd_odc_release_extent_list(e);
  }

  rc = nasd_odc_destroy_exlist(exl);
  if (rc) {
    nasd_printf("ERROR: %s:%d got 0x%x (%s) from nasd_odc_destroy_exlist()\n",
      __FILE__, __LINE__, rc, nasd_error_string(rc));
  }
}

/*
 * Init single list of free extents
 */
nasd_status_t
nasd_odc_init_exlist(
  nasd_odc_exlist_t  *exl)
{
  nasd_status_t rc;

  rc = nasd_mutex_init(&exl->lock);
  if (rc)
    return(NASD_FAIL);

  exl->head.next = exl->head.prev = &exl->head;
  exl->num = 0;
  exl->hashlen = 0;
  exl->hashdiv = 0;
  exl->maxval = 0;
  exl->hashhints = NULL;

  return(NASD_SUCCESS);
}

nasd_status_t
nasd_odc_destroy_exlist(
  nasd_odc_exlist_t  *exl)
{
  if (exl->hashlen) {
    NASD_Free(exl->hashhints, sizeof(nasd_odc_exlist_ent_t *)*exl->hashlen);
    exl->hashlen = 0;
    exl->hashhints = NULL;
    exl->hashdiv = 0;
    exl->maxval = 0;
  }
  nasd_mutex_destroy(&exl->lock);
  return(NASD_SUCCESS);
}

static void
clean_exlist(
  nasd_odc_exlist_t  *exl)
{
  nasd_status_t rc;

  rc = nasd_odc_destroy_exlist(exl);
  if (rc)
    NASD_PANIC();
}

/*
 * External stuff below
 */

/*
 * Create a hash table of hint pointers to
 * accellerate operations on large lists.
 */
nasd_status_t
nasd_odc_exlist_hashify(
  nasd_odc_exlist_t  *exl,
  nasd_blkno_t        maxval,
  int                 hashbound)
{
  int maxbuckets, nbuckets, i;
  nasd_odc_exlist_ent_t *e;

  maxbuckets = maxval / hashbound;
  nbuckets = maxbuckets + 1;

  NASD_Malloc(exl->hashhints, nbuckets*sizeof(nasd_odc_exlist_ent_t *),
    (nasd_odc_exlist_ent_t **));
  if (exl->hashhints == NULL)
    return(NASD_NO_MEM);

  /*
   * First, init our hash buckets.
   */
  for(i=0;i<nbuckets;i++) {
    exl->hashhints[i] = &exl->head;
  }

  exl->hashlen = nbuckets;
  exl->hashdiv = hashbound;
  exl->maxval = maxval;

  /*
   * Now, run through the list updating hash buckets.
   */
  for(e=exl->head.next;e!=&exl->head;e=e->next) {
    FIXUP_HASH_INS(exl, e);
  }

  return(NASD_SUCCESS);
}

nasd_status_t
nasd_odc_exlist_get(
  nasd_odc_exlist_t  **exlp)
{
  nasd_odc_exlist_t *exl;

  NASD_FREELIST_GET_INIT(nasd_free_exlist,exl,next,
    (nasd_odc_exlist_t *),nasd_odc_init_exlist);
  *exlp = exl;

  if (exl == NULL)
    return(NASD_NO_MEM);

  exl->head.next = exl->head.prev = &exl->head;
  exl->num = 0;

  *exlp = exl;

  return(NASD_SUCCESS);
}

void
nasd_odc_exlist_free(
  nasd_odc_exlist_t  *exl)
{
  NASD_FREELIST_FREE_CLEAN(nasd_free_exlist,exl,next,clean_exlist);
}

void
nasd_odc_shutdown_free_exlist(
  void  *ignored)
{
  NASD_FREELIST_DESTROY_CLEAN(nasd_free_exlist,next,(nasd_odc_exlist_t *),
    clean_exlist);
}

void
nasd_odc_shutdown_free_exle(
  void  *ignored)
{
  NASD_FREELIST_DESTROY(nasd_free_exle,next,(nasd_odc_exlist_ent_t *));
}

/*
 * Init free block subsystem
 */
nasd_status_t
nasd_odc_freeblock_init()
{
  nasd_status_t rc;

  NASD_FREELIST_CREATE(nasd_free_exle, NASD_MAX_FREE_EXENT,
    NASD_EXENT_INC, sizeof(nasd_odc_exlist_ent_t));
  if (nasd_free_exle == NULL)
    return(NASD_NO_MEM);
  NASD_FREELIST_PRIME(nasd_free_exle, NASD_EXENT_INITIAL,next,
    (nasd_odc_exlist_ent_t *));
  rc = nasd_shutdown_proc(nasd_odc_shutdown, nasd_odc_shutdown_free_exle,
    NULL);
  if (rc) {
    nasd_odc_shutdown_free_exle(NULL);
    return(rc);
  }

  NASD_FREELIST_CREATE(nasd_free_exlist, NASD_MAX_FREE_EXLIST,
    NASD_EXLIST_INC, sizeof(nasd_odc_exlist_t));
  if (nasd_free_exlist == NULL)
    return(NASD_NO_MEM);
  NASD_FREELIST_PRIME_INIT(nasd_free_exlist, NASD_EXLIST_INITIAL,next,
    (nasd_odc_exlist_t *),nasd_odc_init_exlist);
  rc = nasd_shutdown_proc(nasd_odc_shutdown, nasd_odc_shutdown_free_exlist,
    NULL);
  if (rc) {
    nasd_odc_shutdown_free_exlist(NULL);
    return(rc);
  }

  return(NASD_SUCCESS);
}

/*
 * Construct our notion of what blocks are free
 * (blocks with 0 refcnt are free)
 */
nasd_status_t
nasd_odc_freeblock_build_lists()
{
  nasd_blkno_t bn, zhibn, zbn;
  nasd_odc_exlist_ent_t *exe;
  nasd_odc_exlist_t *exl;
  nasd_odc_ent_t *ent;
  nasd_status_t rc;
  int i, j;

  rc = nasd_odc_init_exlist(&nasd_odc_unallocated_extents);
  if (rc) {
    nasd_printf("ERROR (%s:%d): unable to create unallocated extent list rc=%ld\n",
      __FILE__, __LINE__, (long)rc);
    return(rc);
  }
  rc = nasd_shutdown_proc(nasd_odc_shutdown, nasd_odc_shutdown_exlist,
    &nasd_odc_unallocated_extents);
  if (rc) {
    nasd_odc_shutdown_exlist(&nasd_odc_unallocated_extents);
    return(rc);
  }

  nasd_printf("DRIVE: Building free chunks model\n");

  /*
   * Search the entire block space. Instead of starting
   * at block 1 and always subtracting 1, we run from 0.
   * We're still starting, so we don't bother locking
   * refents. When entering stuff in the list, we use
   * bn, which is zbn+1. zbn is our zero-based block
   * number, bn is the "real" (1-based) block number.
   *
   * Someone should probably do a more efficient version
   * of this, someday.
   */
  zbn = 0;
  exl = &nasd_odc_unallocated_extents;
  for(i=0;i<nasd_odc_refblocks;i++) {
    rc = nasd_odc_block_get(NULL, (nasd_blkno_t)i,
        NASD_ODC_L_FORCE|NASD_ODC_L_BLOCK|NASD_ODC_L_LOAD,
        &ent, NASD_ID_NULL, 0, NASD_ODC_T_REFCNT, NULL);
    NASD_ODC_LOCK_BLOCK(ent);
    nasd_odc_wait_not_busy_invalid(ent);
    NASD_ODC_UNLOCK_BLOCK(ent);
    zhibn = zbn + NASD_OD_REFS_PER_BLOCK;
    if (zhibn > nasd_od_blocks)
      zhibn = nasd_od_blocks;
    for(j=0;zbn<zhibn;j++,zbn++) {
      bn = zbn + 1;
      if (ent->data.cnt[j] == 0) {
        /*
         * Block #bn has refcnt 0, and is therefore free.
         * First, find what list it should end up in
         * (by partition, or unallocated to a partition).
         *
         * Add the block to the unallocated extent list.
         */
        if (exl->head.prev == &exl->head) {
          /* list is empty, put us in as the sole ent */
          NASD_EXLE_FREEGET(exe);
          if (exe == NULL)
            return(NASD_NO_MEM);
          exe->range.first = exe->range.last = bn;
          exe->prev = exl->head.prev;
          exe->next = &exl->head;
          exe->prev->next = exe;
          exe->next->prev = exe;
        }
        else {
          exe = exl->head.prev;
          if ((exe->range.last + 1) == bn) {
            /* block is contiguous with previous, add it to range */
            exe->range.last = bn;
          }
          else {
            /* not contiguous with previous, add a new chunk at end-of-list */
            NASD_EXLE_FREEGET(exe);
            if (exe == NULL)
              return(NASD_NO_MEM);
            exe->range.first = exe->range.last = bn;
            exe->prev = exl->head.prev;
            exe->next = &exl->head;
            exe->prev->next = exe;
            exe->next->prev = exe;
          }
        }
        exl->num++;
      }
    }
    nasd_odc_block_release(ent);
  }

  rc = nasd_odc_exlist_hashify(&nasd_odc_unallocated_extents,
    nasd_od_blocks, nasd_drive_unallocated_hash_chunk);
  if (rc) {
    return(rc);
  }

  return(NASD_SUCCESS);
}

/*
 * Free memory only of list (used after a caller of
 * a range alloc function is done with exlist ents)
 */
void
nasd_odc_release_extent_list(
  nasd_odc_exlist_ent_t  *ent)
{
  nasd_odc_exlist_ent_t *e, *next;

  for(e=ent;e;e=next) {
    next = e->next;
    NASD_EXLE_FREEPUT(e);
  }
}

/*
 * Allocate a single list entry
 */
nasd_status_t
nasd_odc_get_extent_list(
  nasd_odc_exlist_ent_t  **exlp)
{
  NASD_EXLE_FREEGET(*exlp);
  if (*exlp) {
    (*exlp)->next = NULL;
    return(NASD_SUCCESS);
  }
  return(NASD_NO_MEM);
}

/*
 * Release blocks for general use.
 */
nasd_status_t
nasd_odc_free_release_blocks(
  nasd_odc_exlist_ent_t  *release,
  nasd_blkcnt_t          *release_cnt_p)
{
  nasd_status_t rc;

  rc = nasd_odc_free_release_blocks_to_list(&nasd_odc_unallocated_extents,
    release, release_cnt_p);
  return(rc);
}

/*
 * Allocate blocks from general-use unallocated pool.
 */
nasd_status_t
nasd_odc_free_get_range(
  nasd_blkcnt_t            nblocks,
  nasd_blkno_t             first,
  nasd_odc_exlist_ent_t  **exlp,
  nasd_blkcnt_t           *blocks_allocated_p)
{
  nasd_odc_exlist_t *exl;
  nasd_status_t rc;

  exl = &nasd_odc_unallocated_extents;

  rc = nasd_odc_exlist_get_blocks(exl, nblocks, first, 0, 0, exlp,
    blocks_allocated_p);
  return(rc);
}

/*
 * Allocate blocks from general-use unallocated pool,
 * but only within a restricted range. May return
 * partial range.
 */
nasd_status_t
nasd_odc_free_get_range_bounded_partial(
  nasd_blkcnt_t            nblocks,
  nasd_blkno_t             first,
  nasd_blkno_t             last,
  nasd_odc_exlist_ent_t  **exlp,
  nasd_blkcnt_t           *blocks_allocated_p)
{
  nasd_odc_exlist_t *exl;
  nasd_status_t rc;

  exl = &nasd_odc_unallocated_extents;

  rc = nasd_odc_exlist_get_blocks(exl, nblocks, first, last, 1, exlp,
    blocks_allocated_p);
  return(rc);
}

/*
 * Allocate single blocks. Range returned may be smaller than
 * requested. This is used by the preallocation system
 * to actually hold a range of blocks in reserve.
 */
nasd_status_t
nasd_odc_free_get_partial_range(
  nasd_blkcnt_t            nblocks,
  nasd_blkno_t             first,
  nasd_odc_exlist_ent_t  **exlp)
{
  nasd_odc_exlist_t *exl;
  nasd_blkcnt_t cnt;
  nasd_status_t rc;

  exl = &nasd_odc_unallocated_extents;

  rc = nasd_odc_exlist_get_blocks(exl, nblocks, first, 0, 1, exlp, &cnt);
  NASD_ASSERT(cnt <= nblocks);
  return(rc);
}

/*
 * Release a single block to general-use freelist.
 */
nasd_status_t
nasd_odc_free_release_oneblock(
  nasd_blkno_t  blknum)
{
  nasd_odc_exlist_t *exl;
  nasd_status_t rc;

  exl = &nasd_odc_unallocated_extents;

  rc = nasd_odc_exlist_release_oneblock(exl, blknum);
  return(rc);
}

void
nasd_odc_exlist_dump(
  nasd_odc_exlist_t *exl)
{
  nasd_odc_exlist_ent_t *exe;
  int p;

  p = 0;
  for(exe=exl->head.next;exe!=&exl->head;exe=exe->next) {
    nasd_printf("  %u..%u\n", exe->range.first, exe->range.last);
    p++;
  }
  if (p == 0) {
    nasd_printf("  none\n");
  }
}

void
nasd_odc_free_dump()
{
  nasd_odc_exlist_t *exl;

  exl = &nasd_odc_unallocated_extents;
  nasd_printf("free blocks:\n");
  nasd_odc_exlist_dump(exl);
}

/* Local Variables:  */
/* indent-tabs-mode: nil */
/* tab-width: 2 */
/* End: */
