// Copyright (c) 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// ---
//
//
// A sparsetable is a random container that implements a sparse array,
// that is, an array that uses very little memory to store unassigned
// indices (in this case, between 1-2 bits per unassigned index).  For
// instance, if you allocate an array of size 5 and assign a[2] = <big
// struct>, then a[2] will take up a lot of memory but a[0], a[1],
// a[3], and a[4] will not.  Array elements that have a value are
// called "assigned".  Array elements that have no value yet, or have
// had their value cleared using erase() or clear(), are called
// "unassigned".
//
// Unassigned values seem to have the default value of T (see below).
// Nevertheless, there is a difference between an unassigned index and
// one explicitly assigned the value of T().  The latter is considered
// assigned.
//
// Access to an array element is constant time, as is insertion and
// deletion.  Insertion and deletion may be fairly slow, however:
// because of this container's memory economy, each insert and delete
// causes a memory reallocation.
//
// NOTE: You should not test(), get(), or set() any index that is
// greater than sparsetable.size().  If you need to do that, call
// resize() first.
//
// --- Template parameters
// PARAMETER   DESCRIPTION                           DEFAULT
// T           The value of the array: the type of   --
//             object that is stored in the array.
//
// GROUP_SIZE  How large each "group" in the table   48
//             is (see below).  Larger values use
//             a little less memory but cause most
//             operations to be a little slower
//
// Alloc:      Allocator to use to allocate memory.  libc_allocator_with_realloc
//
// --- Model of
// Random Access Container
//
// --- Type requirements
// T must be Copy Constructible. It need not be Assignable.
//
// --- Public base classes
// None.
//
// --- Members
// Type members
//
// MEMBER           WHERE DEFINED DESCRIPTION
// value_type       container     The type of object, T, stored in the array
// allocator_type   container     Allocator to use
// pointer          container     Pointer to p
// const_pointer    container     Const pointer to p
// reference        container     Reference to t
// const_reference  container     Const reference to t
// size_type        container     An unsigned integral type
// difference_type  container     A signed integral type
// iterator [*]     container     Iterator used to iterate over a sparsetable
// const_iterator   container     Const iterator used to iterate over a table
// reverse_iterator reversible    Iterator used to iterate backwards over
//                  container     a sparsetable
// const_reverse_iterator   reversible container   Guess
// nonempty_iterator [+]           sparsetable     Iterates over assigned
//                                                 array elements only
// const_nonempty_iterator         sparsetable     Iterates over assigned
//                                                 array elements only
// reverse_nonempty_iterator       sparsetable     Iterates backwards over
//                                                 assigned array elements only
// const_reverse_nonempty_iterator sparsetable     Iterates backwards over
//                                                 assigned array elements only
//
// [*] All iterators are const in a sparsetable (though nonempty_iterators
//     may not be).  Use get() and set() to assign values, not iterators.
//
// [+] iterators are random-access iterators.  nonempty_iterators are
//     bidirectional iterators.

// Iterator members
// MEMBER              WHERE DEFINED  DESCRIPTION
//
// iterator begin()    container      An iterator to the beginning of the table
// iterator end()      container      An iterator to the end of the table
// const_iterator      container      A const_iterator pointing to the
//   begin() const                    beginning of a sparsetable
// const_iterator      container      A const_iterator pointing to the
//   end() const                      end of a sparsetable
//
// reverse_iterator          reversable     Points to beginning of a reversed
//   rbegin()                container      sparsetable
// reverse_iterator          reversable     Points to end of a reversed table
//   rend()                  container
// const_reverse_iterator    reversable     Points to beginning of a
//   rbegin() const          container      reversed sparsetable
// const_reverse_iterator    reversable     Points to end of a reversed table
//   rend() const            container
//
// nonempty_iterator         sparsetable    Points to first assigned element
//    begin()                               of a sparsetable
// nonempty_iterator         sparsetable    Points past last assigned element
//    end()                                 of a sparsetable
// const_nonempty_iterator   sparsetable    Points to first assigned element
//    begin() const                         of a sparsetable
// const_nonempty_iterator   sparsetable    Points past last assigned element
//    end() const                           of a sparsetable
//
// reverse_nonempty_iterator sparsetable    Points to first assigned element
//    begin()                               of a reversed sparsetable
// reverse_nonempty_iterator sparsetable    Points past last assigned element
//    end()                                 of a reversed sparsetable
// const_reverse_nonempty_iterator sparsetable    Points to first assigned
//    begin() const                               elt of a reversed sparsetable
// const_reverse_nonempty_iterator sparsetable    Points past last assigned
//    end() const                                 elt of a reversed sparsetable
//
//
// Other members
// MEMBER                      WHERE DEFINED  DESCRIPTION
// sparsetable()               sparsetable    A table of size 0; must resize()
//                                            before using.
// sparsetable(size_type size) sparsetable    A table of size size.  All
//                                            indices are unassigned.
// sparsetable(
//    const sparsetable &tbl)  sparsetable    Copy constructor
// ~sparsetable()              sparsetable    The destructor
// sparsetable &operator=(     sparsetable    The assignment operator
//    const sparsetable &tbl)
//
// void resize(size_type size) sparsetable    Grow or shrink a table to
//                                            have size indices [*]
//
// void swap(sparsetable &x)   sparsetable    Swap two sparsetables
// void swap(sparsetable &x,   sparsetable    Swap two sparsetables
//           sparsetable &y)                  (global, not member, function)
//
// size_type size() const      sparsetable    Number of "buckets" in the table
// size_type max_size() const  sparsetable    Max allowed size of a sparsetable
// bool empty() const          sparsetable    true if size() == 0
// size_type num_nonempty() const  sparsetable  Number of assigned "buckets"
//
// const_reference get(        sparsetable    Value at index i, or default
//    size_type i) const                      value if i is unassigned
// const_reference operator[]( sparsetable    Identical to get(i) [+]
//    difference_type i) const
// reference set(size_type i,  sparsetable    Set element at index i to
//    const_reference val)                    be a copy of val
// bool test(size_type i)      sparsetable    True if element at index i
//    const                                   has been assigned to
// bool test(iterator pos)     sparsetable    True if element pointed to
//    const                                   by pos has been assigned to
// void erase(iterator pos)    sparsetable    Set element pointed to by
//                                            pos to be unassigned [!]
// void erase(size_type i)     sparsetable    Set element i to be unassigned
// void erase(iterator start,  sparsetable    Erases all elements between
//            iterator end)                   start and end
// void clear()                sparsetable    Erases all elements in the table
//
// I/O versions exist for both FILE* and for File* (Google2-style files):
// bool write_metadata(FILE *fp) sparsetable  Writes a sparsetable to the
// bool write_metadata(File *fp)              given file.  true if write
//                                            completes successfully
// bool read_metadata(FILE *fp) sparsetable   Replaces sparsetable with
// bool read_metadata(File *fp)               version read from fp.  true
//                                            if read completes sucessfully
// bool write_nopointer_data(FILE *fp)        Read/write the data stored in
// bool read_nopointer_data(FILE*fp)          the table, if it's simple
//
// bool operator==(            forward        Tests two tables for equality.
//    const sparsetable &t1,   container      This is a global function,
//    const sparsetable &t2)                  not a member function.
// bool operator<(             forward        Lexicographical comparison.
//    const sparsetable &t1,   container      This is a global function,
//    const sparsetable &t2)                  not a member function.
//
// [*] If you shrink a sparsetable using resize(), assigned elements
// past the end of the table are removed using erase().  If you grow
// a sparsetable, new unassigned indices are created.
//
// [+] Note that operator[] returns a const reference.  You must use
// set() to change the value of a table element.
//
// [!] Unassignment also calls the destructor.
//
// Iterators are invalidated whenever an item is inserted or
// deleted (ie set() or erase() is used) or when the size of
// the table changes (ie resize() or clear() is used).
//
// See doc/sparsetable.html for more information about how to use this class.

// Note: this uses STL style for naming, rather than Google naming.
// That's because this is an STL-y container

#ifndef UTIL_GTL_SPARSETABLE_H_
#define UTIL_GTL_SPARSETABLE_H_

#include <sparseconfig.h>
#include <stdlib.h>             // for malloc/free
#include <stdio.h>              // to read/write tables
#include <string.h>             // for memcpy
#ifdef HAVE_STDINT_H
#include <stdint.h>         // the normal place uint16_t is defined
#endif
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>      // the normal place u_int16_t is defined
#endif
#ifdef HAVE_INTTYPES_H
#include <inttypes.h>       // a third place for uint16_t or u_int16_t
#endif
#include <assert.h>             // for bounds checking
#include <iterator>             // to define reverse_iterator for me
#include <algorithm>            // equal, lexicographical_compare, swap,...
#include <memory>               // uninitialized_copy, uninitialized_fill
#include <vector>               // a sparsetable is a vector of groups
#include <type_traits.h>
#include <hashtable-common.h>
#include <libc_allocator_with_realloc.h>

// A lot of work to get a type that's guaranteed to be 16 bits...
#ifndef HAVE_U_INT16_T
# if defined HAVE_UINT16_T
    typedef uint16_t u_int16_t;    // true on solaris, possibly other C99 libc's
# elif defined HAVE___UINT16
    typedef __int16 int16_t;       // true on vc++7
    typedef unsigned __int16 u_int16_t;
# else
    // Cannot find a 16-bit integer type.  Hoping for the best with "short"...
    typedef short int int16_t;
    typedef unsigned short int u_int16_t;
# endif
#endif

_START_GOOGLE_NAMESPACE_

namespace base {   // just to make google->opensource transition easier
using GOOGLE_NAMESPACE::true_type;
using GOOGLE_NAMESPACE::false_type;
using GOOGLE_NAMESPACE::integral_constant;
using GOOGLE_NAMESPACE::has_trivial_copy;
using GOOGLE_NAMESPACE::has_trivial_destructor;
using GOOGLE_NAMESPACE::is_same;
}


// The smaller this is, the faster lookup is (because the group bitmap is
// smaller) and the faster insert is, because there's less to move.
// On the other hand, there are more groups.  Since group::size_type is
// a short, this number should be of the form 32*x + 16 to avoid waste.
static const u_int16_t DEFAULT_SPARSEGROUP_SIZE = 48;   // fits in 1.5 words


// Our iterator as simple as iterators can be: basically it's just
// the index into our table.  Dereference, the only complicated
// thing, we punt to the table class.  This just goes to show how
// much machinery STL requires to do even the most trivial tasks.
//
// A NOTE ON ASSIGNING:
// A sparse table does not actually allocate memory for entries
// that are not filled.  Because of this, it becomes complicated
// to have a non-const iterator: we don't know, if the iterator points
// to a not-filled bucket, whether you plan to fill it with something
// or whether you plan to read its value (in which case you'll get
// the default bucket value).  Therefore, while we can define const
// operations in a pretty 'normal' way, for non-const operations, we
// define something that returns a helper object with operator= and
// operator& that allocate a bucket lazily.  We use this for table[]
// and also for regular table iterators.

template <class tabletype>
class table_element_adaptor {
 public:
  typedef typename tabletype::value_type value_type;
  typedef typename tabletype::size_type size_type;
  typedef typename tabletype::reference reference;
  typedef typename tabletype::pointer pointer;

  table_element_adaptor(tabletype *tbl, size_type p)
    : table(tbl), pos(p) { }
  table_element_adaptor& operator= (const value_type &val) {
    table->set(pos, val);
    return *this;
  }
  operator value_type() { return table->get(pos); }   // we look like a value
  pointer operator& () { return &table->mutating_get(pos); }

 private:
  tabletype* table;
  size_type pos;
};

// Our iterator as simple as iterators can be: basically it's just
// the index into our table.  Dereference, the only complicated
// thing, we punt to the table class.  This just goes to show how
// much machinery STL requires to do even the most trivial tasks.
//
// By templatizing over tabletype, we have one iterator type which
// we can use for both sparsetables and sparsebins.  In fact it
// works on any class that allows size() and operator[] (eg vector),
// as long as it does the standard STL typedefs too (eg value_type).

template <class tabletype>
class table_iterator {
 public:
  typedef table_iterator iterator;

  typedef std::random_access_iterator_tag iterator_category;
  typedef typename tabletype::value_type value_type;
  typedef typename tabletype::difference_type difference_type;
  typedef typename tabletype::size_type size_type;
  typedef table_element_adaptor<tabletype> reference;
  typedef table_element_adaptor<tabletype>* pointer;

  // The "real" constructor
  table_iterator(tabletype *tbl, size_type p)
    : table(tbl), pos(p) { }
  // The default constructor, used when I define vars of type table::iterator
  table_iterator() : table(NULL), pos(0) { }
  // The copy constructor, for when I say table::iterator foo = tbl.begin()
  // The default destructor is fine; we don't define one
  // The default operator= is fine; we don't define one

  // The main thing our iterator does is dereference.  If the table entry
  // we point to is empty, we return the default value type.
  // This is the big different function from the const iterator.
  reference operator*()              {
    return table_element_adaptor<tabletype>(table, pos);
  }
  pointer operator->()               { return &(operator*()); }

  // Helper function to assert things are ok; eg pos is still in range
  void check() const {
    assert(table);
    assert(pos <= table->size());
  }

  // Arithmetic: we just do arithmetic on pos.  We don't even need to
  // do bounds checking, since STL doesn't consider that its job.  :-)
  iterator& operator+=(size_type t) { pos += t; check(); return *this; }
  iterator& operator-=(size_type t) { pos -= t; check(); return *this; }
  iterator& operator++()            { ++pos; check(); return *this; }
  iterator& operator--()            { --pos; check(); return *this; }
  iterator operator++(int)          { iterator tmp(*this);     // for x++
                                      ++pos; check(); return tmp; }
  iterator operator--(int)          { iterator tmp(*this);     // for x--
                                      --pos; check(); return tmp; }
  iterator operator+(difference_type i) const  { iterator tmp(*this);
                                                 tmp += i; return tmp; }
  iterator operator-(difference_type i) const  { iterator tmp(*this);
                                                 tmp -= i; return tmp; }
  difference_type operator-(iterator it) const {      // for "x = it2 - it"
    assert(table == it.table);
    return pos - it.pos;
  }
  reference operator[](difference_type n) const {
    return *(*this + n);            // simple though not totally efficient
  }

  // Comparisons.
  bool operator==(const iterator& it) const {
    return table == it.table && pos == it.pos;
  }
  bool operator<(const iterator& it) const {
    assert(table == it.table);              // life is bad bad bad otherwise
    return pos < it.pos;
  }
  bool operator!=(const iterator& it) const { return !(*this == it); }
  bool operator<=(const iterator& it) const { return !(it < *this); }
  bool operator>(const iterator& it) const { return it < *this; }
  bool operator>=(const iterator& it) const { return !(*this < it); }

  // Here's the info we actually need to be an iterator
  tabletype *table;              // so we can dereference and bounds-check
  size_type pos;                 // index into the table
};

// support for "3 + iterator" has to be defined outside the class, alas
template<class T>
table_iterator<T> operator+(typename table_iterator<T>::difference_type i,
                            table_iterator<T> it) {
  return it + i;               // so people can say it2 = 3 + it
}

template <class tabletype>
class const_table_iterator {
 public:
  typedef table_iterator<tabletype> iterator;
  typedef const_table_iterator const_iterator;

  typedef std::random_access_iterator_tag iterator_category;
  typedef typename tabletype::value_type value_type;
  typedef typename tabletype::difference_type difference_type;
  typedef typename tabletype::size_type size_type;
  typedef typename tabletype::const_reference reference;  // we're const-only
  typedef typename tabletype::const_pointer pointer;

  // The "real" constructor
  const_table_iterator(const tabletype *tbl, size_type p)
    : table(tbl), pos(p) { }
  // The default constructor, used when I define vars of type table::iterator
  const_table_iterator() : table(NULL), pos(0) { }
  // The copy constructor, for when I say table::iterator foo = tbl.begin()
  // Also converts normal iterators to const iterators
  const_table_iterator(const iterator &from)
    : table(from.table), pos(from.pos) { }
  // The default destructor is fine; we don't define one
  // The default operator= is fine; we don't define one

  // The main thing our iterator does is dereference.  If the table entry
  // we point to is empty, we return the default value type.
  reference operator*() const       { return (*table)[pos]; }
  pointer operator->() const        { return &(operator*()); }

  // Helper function to assert things are ok; eg pos is still in range
  void check() const {
    assert(table);
    assert(pos <= table->size());
  }

  // Arithmetic: we just do arithmetic on pos.  We don't even need to
  // do bounds checking, since STL doesn't consider that its job.  :-)
  const_iterator& operator+=(size_type t) { pos += t; check(); return *this; }
  const_iterator& operator-=(size_type t) { pos -= t; check(); return *this; }
  const_iterator& operator++()            { ++pos; check(); return *this; }
  const_iterator& operator--()            { --pos; check(); return *this; }
  const_iterator operator++(int)          { const_iterator tmp(*this); // for x++
                                            ++pos; check(); return tmp; }
  const_iterator operator--(int)          { const_iterator tmp(*this); // for x--
                                            --pos; check(); return tmp; }
  const_iterator operator+(difference_type i) const  { const_iterator tmp(*this);
                                                       tmp += i; return tmp; }
  const_iterator operator-(difference_type i) const  { const_iterator tmp(*this);
                                                       tmp -= i; return tmp; }
  difference_type operator-(const_iterator it) const {   // for "x = it2 - it"
    assert(table == it.table);
    return pos - it.pos;
  }
  reference operator[](difference_type n) const {
    return *(*this + n);            // simple though not totally efficient
  }

  // Comparisons.
  bool operator==(const const_iterator& it) const {
    return table == it.table && pos == it.pos;
  }
  bool operator<(const const_iterator& it) const {
    assert(table == it.table);              // life is bad bad bad otherwise
    return pos < it.pos;
  }
  bool operator!=(const const_iterator& it) const { return !(*this == it); }
  bool operator<=(const const_iterator& it) const { return !(it < *this); }
  bool operator>(const const_iterator& it) const { return it < *this; }
  bool operator>=(const const_iterator& it) const { return !(*this < it); }

  // Here's the info we actually need to be an iterator
  const tabletype *table;        // so we can dereference and bounds-check
  size_type pos;                 // index into the table
};

// support for "3 + iterator" has to be defined outside the class, alas
template<class T>
const_table_iterator<T> operator+(typename
                                  const_table_iterator<T>::difference_type i,
                                  const_table_iterator<T> it) {
  return it + i;               // so people can say it2 = 3 + it
}


// ---------------------------------------------------------------------------


/*
// This is a 2-D iterator.  You specify a begin and end over a list
// of *containers*.  We iterate over each container by iterating over
// it.  It's actually simple:
// VECTOR.begin() VECTOR[0].begin()  --------> VECTOR[0].end() ---,
//     |          ________________________________________________/
//     |          \_> VECTOR[1].begin()  -------->  VECTOR[1].end() -,
//     |          ___________________________________________________/
//     v          \_> ......
// VECTOR.end()
//
// It's impossible to do random access on one of these things in constant
// time, so it's just a bidirectional iterator.
//
// Unfortunately, because we need to use this for a non-empty iterator,
// we use nonempty_begin() and nonempty_end() instead of begin() and end()
// (though only going across, not down).
*/

#define TWOD_BEGIN_      nonempty_begin
#define TWOD_END_        nonempty_end
#define TWOD_ITER_       nonempty_iterator
#define TWOD_CONST_ITER_ const_nonempty_iterator

template <class containertype>
class two_d_iterator {
 public:
  typedef two_d_iterator iterator;

  typedef std::bidirectional_iterator_tag iterator_category;
  // apparently some versions of VC++ have trouble with two ::'s in a typename
  typedef typename containertype::value_type _tmp_vt;
  typedef typename _tmp_vt::value_type value_type;
  typedef typename _tmp_vt::difference_type difference_type;
  typedef typename _tmp_vt::reference reference;
  typedef typename _tmp_vt::pointer pointer;

  // The "real" constructor.  begin and end specify how many rows we have
  // (in the diagram above); we always iterate over each row completely.
  two_d_iterator(typename containertype::iterator begin,
                 typename containertype::iterator end,
                 typename containertype::iterator curr)
    : row_begin(begin), row_end(end), row_current(curr), col_current() {
    if ( row_current != row_end ) {
      col_current = row_current->TWOD_BEGIN_();
      advance_past_end();                 // in case cur->begin() == cur->end()
    }
  }
  // If you want to start at an arbitrary place, you can, I guess
  two_d_iterator(typename containertype::iterator begin,
                 typename containertype::iterator end,
                 typename containertype::iterator curr,
                 typename containertype::value_type::TWOD_ITER_ col)
    : row_begin(begin), row_end(end), row_current(curr), col_current(col) {
    advance_past_end();                 // in case cur->begin() == cur->end()
  }
  // The default constructor, used when I define vars of type table::iterator
  two_d_iterator() : row_begin(), row_end(), row_current(), col_current() { }
  // The default destructor is fine; we don't define one
  // The default operator= is fine; we don't define one

  // Happy dereferencer
  reference operator*() const    { return *col_current; }
  pointer operator->() const     { return &(operator*()); }

  // Arithmetic: we just do arithmetic on pos.  We don't even need to
  // do bounds checking, since STL doesn't consider that its job.  :-)
  // NOTE: this is not amortized constant time!  What do we do about it?
  void advance_past_end() {          // used when col_current points to end()
    while ( col_current == row_current->TWOD_END_() ) {  // end of current row
      ++row_current;                                // go to beginning of next
      if ( row_current != row_end )                 // col is irrelevant at end
        col_current = row_current->TWOD_BEGIN_();
      else
        break;                                      // don't go past row_end
    }
  }

  iterator& operator++() {
    assert(row_current != row_end);                 // how to ++ from there?
    ++col_current;
    advance_past_end();                 // in case col_current is at end()
    return *this;
  }
  iterator& operator--() {
    while ( row_current == row_end ||
            col_current == row_current->TWOD_BEGIN_() ) {
      assert(row_current != row_begin);
      --row_current;
      col_current = row_current->TWOD_END_();             // this is 1 too far
    }
    --col_current;
    return *this;
  }
  iterator operator++(int)       { iterator tmp(*this); ++*this; return tmp; }
  iterator operator--(int)       { iterator tmp(*this); --*this; return tmp; }


  // Comparisons.
  bool operator==(const iterator& it) const {
    return ( row_begin == it.row_begin &&
             row_end == it.row_end &&
             row_current == it.row_current &&
             (row_current == row_end || col_current == it.col_current) );
  }
  bool operator!=(const iterator& it) const { return !(*this == it); }


  // Here's the info we actually need to be an iterator
  // These need to be public so we convert from iterator to const_iterator
  typename containertype::iterator row_begin, row_end, row_current;
  typename containertype::value_type::TWOD_ITER_ col_current;
};

// The same thing again, but this time const.  :-(
template <class containertype>
class const_two_d_iterator {
 public:
  typedef const_two_d_iterator iterator;

  typedef std::bidirectional_iterator_tag iterator_category;
  // apparently some versions of VC++ have trouble with two ::'s in a typename
  typedef typename containertype::value_type _tmp_vt;
  typedef typename _tmp_vt::value_type value_type;
  typedef typename _tmp_vt::difference_type difference_type;
  typedef typename _tmp_vt::const_reference reference;
  typedef typename _tmp_vt::const_pointer pointer;

  const_two_d_iterator(typename containertype::const_iterator begin,
                       typename containertype::const_iterator end,
                       typename containertype::const_iterator curr)
    : row_begin(begin), row_end(end), row_current(curr), col_current() {
    if ( curr != end ) {
      col_current = curr->TWOD_BEGIN_();
      advance_past_end();                 // in case cur->begin() == cur->end()
    }
  }
  const_two_d_iterator(typename containertype::const_iterator begin,
                       typename containertype::const_iterator end,
                       typename containertype::const_iterator curr,
                       typename containertype::value_type::TWOD_CONST_ITER_ col)
    : row_begin(begin), row_end(end), row_current(curr), col_current(col) {
    advance_past_end();                 // in case cur->begin() == cur->end()
  }
  const_two_d_iterator()
    : row_begin(), row_end(), row_current(), col_current() {
  }
  // Need this explicitly so we can convert normal iterators to const iterators
  const_two_d_iterator(const two_d_iterator<containertype>& it) :
    row_begin(it.row_begin), row_end(it.row_end), row_current(it.row_current),
    col_current(it.col_current) { }

  typename containertype::const_iterator row_begin, row_end, row_current;
  typename containertype::value_type::TWOD_CONST_ITER_ col_current;


  // EVERYTHING FROM HERE DOWN IS THE SAME AS THE NON-CONST ITERATOR
  reference operator*() const    { return *col_current; }
  pointer operator->() const     { return &(operator*()); }

  void advance_past_end() {          // used when col_current points to end()
    while ( col_current == row_current->TWOD_END_() ) {  // end of current row
      ++row_current;                                // go to beginning of next
      if ( row_current != row_end )                 // col is irrelevant at end
        col_current = row_current->TWOD_BEGIN_();
      else
        break;                                      // don't go past row_end
    }
  }
  iterator& operator++() {
    assert(row_current != row_end);                 // how to ++ from there?
    ++col_current;
    advance_past_end();                 // in case col_current is at end()
    return *this;
  }
  iterator& operator--() {
    while ( row_current == row_end ||
            col_current == row_current->TWOD_BEGIN_() ) {
      assert(row_current != row_begin);
      --row_current;
      col_current = row_current->TWOD_END_();             // this is 1 too far
    }
    --col_current;
    return *this;
  }
  iterator operator++(int)       { iterator tmp(*this); ++*this; return tmp; }
  iterator operator--(int)       { iterator tmp(*this); --*this; return tmp; }

  bool operator==(const iterator& it) const {
    return ( row_begin == it.row_begin &&
             row_end == it.row_end &&
             row_current == it.row_current &&
             (row_current == row_end || col_current == it.col_current) );
  }
  bool operator!=(const iterator& it) const { return !(*this == it); }
};

// We provide yet another version, to be as frugal with memory as
// possible.  This one frees each block of memory as it finishes
// iterating over it.  By the end, the entire table is freed.
// For understandable reasons, you can only iterate over it once,
// which is why it's an input iterator
template <class containertype>
class destructive_two_d_iterator {
 public:
  typedef destructive_two_d_iterator iterator;

  typedef std::input_iterator_tag iterator_category;
  // apparently some versions of VC++ have trouble with two ::'s in a typename
  typedef typename containertype::value_type _tmp_vt;
  typedef typename _tmp_vt::value_type value_type;
  typedef typename _tmp_vt::difference_type difference_type;
  typedef typename _tmp_vt::reference reference;
  typedef typename _tmp_vt::pointer pointer;

  destructive_two_d_iterator(typename containertype::iterator begin,
                             typename containertype::iterator end,
                             typename containertype::iterator curr)
    : row_begin(begin), row_end(end), row_current(curr), col_current() {
    if ( curr != end ) {
      col_current = curr->TWOD_BEGIN_();
      advance_past_end();                 // in case cur->begin() == cur->end()
    }
  }
  destructive_two_d_iterator(typename containertype::iterator begin,
                             typename containertype::iterator end,
                             typename containertype::iterator curr,
                             typename containertype::value_type::TWOD_ITER_ col)
    : row_begin(begin), row_end(end), row_current(curr), col_current(col) {
    advance_past_end();                 // in case cur->begin() == cur->end()
  }
  destructive_two_d_iterator()
    : row_begin(), row_end(), row_current(), col_current() {
  }

  typename containertype::iterator row_begin, row_end, row_current;
  typename containertype::value_type::TWOD_ITER_ col_current;

  // This is the part that destroys
  void advance_past_end() {          // used when col_current points to end()
    while ( col_current == row_current->TWOD_END_() ) {  // end of current row
      row_current->clear();                         // the destructive part
      // It would be nice if we could decrement sparsetable->num_buckets here
      ++row_current;                                // go to beginning of next
      if ( row_current != row_end )                 // col is irrelevant at end
        col_current = row_current->TWOD_BEGIN_();
      else
        break;                                      // don't go past row_end
    }
  }

  // EVERYTHING FROM HERE DOWN IS THE SAME AS THE REGULAR ITERATOR
  reference operator*() const    { return *col_current; }
  pointer operator->() const     { return &(operator*()); }

  iterator& operator++() {
    assert(row_current != row_end);                 // how to ++ from there?
    ++col_current;
    advance_past_end();                 // in case col_current is at end()
    return *this;
  }
  iterator operator++(int)       { iterator tmp(*this); ++*this; return tmp; }

  bool operator==(const iterator& it) const {
    return ( row_begin == it.row_begin &&
             row_end == it.row_end &&
             row_current == it.row_current &&
             (row_current == row_end || col_current == it.col_current) );
  }
  bool operator!=(const iterator& it) const { return !(*this == it); }
};

#undef TWOD_BEGIN_
#undef TWOD_END_
#undef TWOD_ITER_
#undef TWOD_CONST_ITER_




// SPARSE-TABLE
// ------------
// The idea is that a table with (logically) t buckets is divided
// into t/M *groups* of M buckets each.  (M is a constant set in
// GROUP_SIZE for efficiency.)  Each group is stored sparsely.
// Thus, inserting into the table causes some array to grow, which is
// slow but still constant time.  Lookup involves doing a
// logical-position-to-sparse-position lookup, which is also slow but
// constant time.  The larger M is, the slower these operations are
// but the less overhead (slightly).
//
// To store the sparse array, we store a bitmap B, where B[i] = 1 iff
// bucket i is non-empty.  Then to look up bucket i we really look up
// array[# of 1s before i in B].  This is constant time for fixed M.
//
// Terminology: the position of an item in the overall table (from
// 1 .. t) is called its "location."  The logical position in a group
// (from 1 .. M ) is called its "position."  The actual location in
// the array (from 1 .. # of non-empty buckets in the group) is
// called its "offset."

template <class T, u_int16_t GROUP_SIZE, class Alloc>
class sparsegroup {
 private:
  typedef typename Alloc::template rebind<T>::other value_alloc_type;

 public:
  // Basic types
  typedef T value_type;
  typedef Alloc allocator_type;
  typedef typename value_alloc_type::reference reference;
  typedef typename value_alloc_type::const_reference const_reference;
  typedef typename value_alloc_type::pointer pointer;
  typedef typename value_alloc_type::const_pointer const_pointer;

  typedef table_iterator<sparsegroup<T, GROUP_SIZE, Alloc> > iterator;
  typedef const_table_iterator<sparsegroup<T, GROUP_SIZE, Alloc> >
      const_iterator;
  typedef table_element_adaptor<sparsegroup<T, GROUP_SIZE, Alloc> >
      element_adaptor;
  typedef u_int16_t size_type;                  // max # of buckets
  typedef int16_t difference_type;
  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
  typedef std::reverse_iterator<iterator> reverse_iterator;   // from iterator.h

  // These are our special iterators, that go over non-empty buckets in a
  // group.  These aren't const-only because you can change non-empty bcks.
  typedef pointer nonempty_iterator;
  typedef const_pointer const_nonempty_iterator;
  typedef std::reverse_iterator<nonempty_iterator> reverse_nonempty_iterator;
  typedef std::reverse_iterator<const_nonempty_iterator> const_reverse_nonempty_iterator;

  // Iterator functions
  iterator begin()                      { return iterator(this, 0); }
  const_iterator begin() const          { return const_iterator(this, 0); }
  iterator end()                        { return iterator(this, size()); }
  const_iterator end() const            { return const_iterator(this, size()); }
  reverse_iterator rbegin()             { return reverse_iterator(end()); }
  const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
  reverse_iterator rend()               { return reverse_iterator(begin()); }
  const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }

  // We'll have versions for our special non-empty iterator too
  nonempty_iterator nonempty_begin()             { return group; }
  const_nonempty_iterator nonempty_begin() const { return group; }
  nonempty_iterator nonempty_end() {
    return group + settings.num_buckets;
  }
  const_nonempty_iterator nonempty_end() const {
    return group + settings.num_buckets;
  }
  reverse_nonempty_iterator nonempty_rbegin() {
    return reverse_nonempty_iterator(nonempty_end());
  }
  const_reverse_nonempty_iterator nonempty_rbegin() const {
    return const_reverse_nonempty_iterator(nonempty_end());
  }
  reverse_nonempty_iterator nonempty_rend() {
    return reverse_nonempty_iterator(nonempty_begin());
  }
  const_reverse_nonempty_iterator nonempty_rend() const {
    return const_reverse_nonempty_iterator(nonempty_begin());
  }


  // This gives us the "default" value to return for an empty bucket.
  // We just use the default constructor on T, the template type
  const_reference default_value() const {
    static value_type defaultval = value_type();
    return defaultval;
  }


 private:
  // We need to do all this bit manipulation, of course.  ick
  static size_type charbit(size_type i)  { return i >> 3; }
  static size_type modbit(size_type i)   { return 1 << (i&7); }
  int bmtest(size_type i) const    { return bitmap[charbit(i)] & modbit(i); }
  void bmset(size_type i)          { bitmap[charbit(i)] |= modbit(i); }
  void bmclear(size_type i)        { bitmap[charbit(i)] &= ~modbit(i); }

  pointer allocate_group(size_type n) {
    pointer retval = settings.allocate(n);
    if (retval == NULL) {
      // We really should use PRIuS here, but I don't want to have to add
      // a whole new configure option, with concomitant macro namespace
      // pollution, just to print this (unlikely) error message.  So I cast.
      fprintf(stderr, "sparsehash FATAL ERROR: failed to allocate %lu groups\n",
              static_cast<unsigned long>(n));
      exit(1);
    }
    return retval;
  }

  void free_group() {
    if (!group)  return;
    pointer end_it = group + settings.num_buckets;
    for (pointer p = group; p != end_it; ++p)
      p->~value_type();
    settings.deallocate(group, settings.num_buckets);
    group = NULL;
  }

  static size_type bits_in_char(unsigned char c) {
    // We could make these ints.  The tradeoff is size (eg does it overwhelm
    // the cache?) vs efficiency in referencing sub-word-sized array elements.
    static const char bits_in[256] = {
      0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
      1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
      1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
      1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
      3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
      1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
      3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
      3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
      3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
      4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
    };
    return bits_in[c];
  }

 public:                         // get_iter() in sparsetable needs it
  // We need a small function that tells us how many set bits there are
  // in positions 0..i-1 of the bitmap.  It uses a big table.
  // We make it static so templates don't allocate lots of these tables.
  // There are lots of ways to do this calculation (called 'popcount').
  // The 8-bit table lookup is one of the fastest, though this
  // implementation suffers from not doing any loop unrolling.  See, eg,
  //   http://www.dalkescientific.com/writings/diary/archive/2008/07/03/hakmem_and_other_popcounts.html
  //   http://gurmeetsingh.wordpress.com/2008/08/05/fast-bit-counting-routines/
  static size_type pos_to_offset(const unsigned char *bm, size_type pos) {
    size_type retval = 0;

    // [Note: condition pos > 8 is an optimization; convince yourself we
    // give exactly the same result as if we had pos >= 8 here instead.]
    for ( ; pos > 8; pos -= 8 )                   // bm[0..pos/8-1]
      retval += bits_in_char(*bm++);              // chars we want *all* bits in
    return retval + bits_in_char(*bm & ((1 << pos)-1));    // char including pos
  }

  size_type pos_to_offset(size_type pos) const {  // not static but still const
    return pos_to_offset(bitmap, pos);
  }

  // Returns the (logical) position in the bm[] array, i, such that
  // bm[i] is the offset-th set bit in the array.  It is the inverse
  // of pos_to_offset.  get_pos() uses this function to find the index
  // of an nonempty_iterator in the table.  Bit-twiddling from
  // http://hackersdelight.org/basics.pdf
  static size_type offset_to_pos(const unsigned char *bm, size_type offset) {
    size_type retval = 0;
    // This is sizeof(this->bitmap).
    const size_type group_size = (GROUP_SIZE-1) / 8 + 1;
    for (size_type i = 0; i < group_size; i++) {   // forward scan
      const size_type pop_count = bits_in_char(*bm);
      if (pop_count > offset) {
        unsigned char last_bm = *bm;
        for (; offset > 0; offset--) {
          last_bm &= (last_bm-1);  // remove right-most set bit
        }
        // Clear all bits to the left of the rightmost bit (the &),
        // and then clear the rightmost bit but set all bits to the
        // right of it (the -1).
        last_bm = (last_bm & -last_bm) - 1;
        retval += bits_in_char(last_bm);
        return retval;
      }
      offset -= pop_count;
      retval += 8;
      bm++;
    }
    return retval;
  }

  size_type offset_to_pos(size_type offset) const {
    return offset_to_pos(bitmap, offset);
  }


 public:
  // Constructors -- default and copy -- and destructor
  explicit sparsegroup(allocator_type& a) :
      group(0), settings(alloc_impl<value_alloc_type>(a)) {
    memset(bitmap, 0, sizeof(bitmap));
  }
  sparsegroup(const sparsegroup& x) : group(0), settings(x.settings) {
    if ( settings.num_buckets ) {
      group = allocate_group(x.settings.num_buckets);
      std::uninitialized_copy(x.group, x.group + x.settings.num_buckets, group);
    }
    memcpy(bitmap, x.bitmap, sizeof(bitmap));
  }
  ~sparsegroup() { free_group(); }

  // Operator= is just like the copy constructor, I guess
  // TODO(austern): Make this exception safe. Handle exceptions in value_type's
  // copy constructor.
  sparsegroup &operator=(const sparsegroup& x) {
    if ( &x == this ) return *this;                    // x = x
    if ( x.settings.num_buckets == 0 ) {
      free_group();
    } else {
      pointer p = allocate_group(x.settings.num_buckets);
      std::uninitialized_copy(x.group, x.group + x.settings.num_buckets, p);
      free_group();
      group = p;
    }
    memcpy(bitmap, x.bitmap, sizeof(bitmap));
    settings.num_buckets = x.settings.num_buckets;
    return *this;
  }

  // Many STL algorithms use swap instead of copy constructors
  void swap(sparsegroup& x) {
    std::swap(group, x.group);                // defined in <algorithm>
    for ( int i = 0; i < sizeof(bitmap) / sizeof(*bitmap); ++i )
      std::swap(bitmap[i], x.bitmap[i]);      // swap not defined on arrays
    std::swap(settings.num_buckets, x.settings.num_buckets);
    // we purposefully don't swap the allocator, which may not be swap-able
  }

  // It's always nice to be able to clear a table without deallocating it
  void clear() {
    free_group();
    memset(bitmap, 0, sizeof(bitmap));
    settings.num_buckets = 0;
  }

  // Functions that tell you about size.  Alas, these aren't so useful
  // because our table is always fixed size.
  size_type size() const           { return GROUP_SIZE; }
  size_type max_size() const       { return GROUP_SIZE; }
  bool empty() const               { return false; }
  // We also may want to know how many *used* buckets there are
  size_type num_nonempty() const   { return settings.num_buckets; }


  // get()/set() are explicitly const/non-const.  You can use [] if
  // you want something that can be either (potentially more expensive).
  const_reference get(size_type i) const {
    if ( bmtest(i) )           // bucket i is occupied
      return group[pos_to_offset(bitmap, i)];
    else
      return default_value();  // return the default reference
  }

  // TODO(csilvers): make protected + friend
  // This is used by sparse_hashtable to get an element from the table
  // when we know it exists.
  const_reference unsafe_get(size_type i) const {
    assert(bmtest(i));
    return group[pos_to_offset(bitmap, i)];
  }

  // TODO(csilvers): make protected + friend
  reference mutating_get(size_type i) {    // fills bucket i before getting
    if ( !bmtest(i) )
      set(i, default_value());
    return group[pos_to_offset(bitmap, i)];
  }

  // Syntactic sugar.  It's easy to return a const reference.  To
  // return a non-const reference, we need to use the assigner adaptor.
  const_reference operator[](size_type i) const {
    return get(i);
  }

  element_adaptor operator[](size_type i) {
    return element_adaptor(this, i);
  }

 private:
  // Create space at group[offset], assuming value_type has trivial
  // copy constructor and destructor, and the allocator_type is
  // the default libc_allocator_with_alloc.  (Really, we want it to have
  // "trivial move", because that's what realloc and memmove both do.
  // But there's no way to capture that using type_traits, so we
  // pretend that move(x, y) is equivalent to "x.~T(); new(x) T(y);"
  // which is pretty much correct, if a bit conservative.)
  void set_aux(size_type offset, base::true_type) {
    group = settings.realloc_or_die(group, settings.num_buckets+1);
    // This is equivalent to memmove(), but faster on my Intel P4,
    // at least with gcc4.1 -O2 / glibc 2.3.6.
    for (size_type i = settings.num_buckets; i > offset; --i)
      memcpy(group + i, group + i-1, sizeof(*group));
  }

  // Create space at group[offset], without special assumptions about value_type
  // and allocator_type.
  void set_aux(size_type offset, base::false_type) {
    // This is valid because 0 <= offset <= num_buckets
    pointer p = allocate_group(settings.num_buckets + 1);
    std::uninitialized_copy(group, group + offset, p);
    std::uninitialized_copy(group + offset, group + settings.num_buckets,
                            p + offset + 1);
    free_group();
    group = p;
  }

 public:
  // This returns a reference to the inserted item (which is a copy of val).
  // TODO(austern): Make this exception safe: handle exceptions from
  // value_type's copy constructor.
  reference set(size_type i, const_reference val) {
    size_type offset = pos_to_offset(bitmap, i);  // where we'll find (or insert)
    if ( bmtest(i) ) {
      // Delete the old value, which we're replacing with the new one
      group[offset].~value_type();
    } else {
      typedef base::integral_constant<bool,
          (base::has_trivial_copy<value_type>::value &&
           base::has_trivial_destructor<value_type>::value &&
           base::is_same<
               allocator_type,
               libc_allocator_with_realloc<value_type> >::value)>
          realloc_and_memmove_ok; // we pretend mv(x,y) == "x.~T(); new(x) T(y)"
      set_aux(offset, realloc_and_memmove_ok());
      ++settings.num_buckets;
      bmset(i);
    }
    // This does the actual inserting.  Since we made the array using
    // malloc, we use "placement new" to just call the constructor.
    new(&group[offset]) value_type(val);
    return group[offset];
  }

  // We let you see if a bucket is non-empty without retrieving it
  bool test(size_type i) const {
    return bmtest(i) != 0;
  }
  bool test(iterator pos) const {
    return bmtest(pos.pos) != 0;
  }

 private:
  // Shrink the array, assuming value_type has trivial copy
  // constructor and destructor, and the allocator_type is the default
  // libc_allocator_with_alloc.  (Really, we want it to have "trivial
  // move", because that's what realloc and memmove both do.  But
  // there's no way to capture that using type_traits, so we pretend
  // that move(x, y) is equivalent to ""x.~T(); new(x) T(y);"
  // which is pretty much correct, if a bit conservative.)
  void erase_aux(size_type offset, base::true_type) {
    // This isn't technically necessary, since we know we have a
    // trivial destructor, but is a cheap way to get a bit more safety.
    group[offset].~value_type();
    // This is equivalent to memmove(), but faster on my Intel P4,
    // at lesat with gcc4.1 -O2 / glibc 2.3.6.
    assert(settings.num_buckets > 0);
    for (size_type i = offset; i < settings.num_buckets-1; ++i)
      memcpy(group + i, group + i+1, sizeof(*group));  // hopefully inlined!
    group = settings.realloc_or_die(group, settings.num_buckets-1);
  }

  // Shrink the array, without any special assumptions about value_type and
  // allocator_type.
  void erase_aux(size_type offset, base::false_type) {
    // This is valid because 0 <= offset < num_buckets. Note the inequality.
    pointer p = allocate_group(settings.num_buckets - 1);
    std::uninitialized_copy(group, group + offset, p);
    std::uninitialized_copy(group + offset + 1, group + settings.num_buckets,
                            p + offset);
    free_group();
    group = p;
  }

 public:
  // This takes the specified elements out of the group.  This is
  // "undefining", rather than "clearing".
  // TODO(austern): Make this exception safe: handle exceptions from
  // value_type's copy constructor.
  void erase(size_type i) {
    if ( bmtest(i) ) {                         // trivial to erase empty bucket
      size_type offset = pos_to_offset(bitmap,i); // where we'll find (or insert)
      if ( settings.num_buckets == 1 ) {
        free_group();
        group = NULL;
      } else {
        typedef base::integral_constant<bool,
            (base::has_trivial_copy<value_type>::value &&
             base::has_trivial_destructor<value_type>::value &&
             base::is_same<
                 allocator_type,
                 libc_allocator_with_realloc<value_type> >::value)>
            realloc_and_memmove_ok; // pretend mv(x,y) == "x.~T(); new(x) T(y)"
        erase_aux(offset, realloc_and_memmove_ok());
      }
      --settings.num_buckets;
      bmclear(i);
    }
  }

  void erase(iterator pos) {
    erase(pos.pos);
  }

  void erase(iterator start_it, iterator end_it) {
    // This could be more efficient, but to do so we'd need to make
    // bmclear() clear a range of indices.  Doesn't seem worth it.
    for ( ; start_it != end_it; ++start_it )
      erase(start_it);
  }


  // I/O
  // We support reading and writing groups to disk.  We don't store
  // the actual array contents (which we don't know how to store),
  // just the bitmap and size.  Meant to be used with table I/O.

  template <typename OUTPUT> bool write_metadata(OUTPUT *fp) const {
    // we explicitly set to u_int16_t
    assert(sizeof(settings.num_buckets) == 2);
    if ( !sparsehash_internal::write_bigendian_number(fp, settings.num_buckets,
                                                      2) )
      return false;
    if ( !sparsehash_internal::write_data(fp, bitmap, sizeof(bitmap)) )
      return false;
    return true;
  }

  // Reading destroys the old group contents!  Returns true if all was ok.
  template <typename INPUT> bool read_metadata(INPUT *fp) {
    clear();
    if ( !sparsehash_internal::read_bigendian_number(fp, &settings.num_buckets,
                                                     2) )
      return false;
    if ( !sparsehash_internal::read_data(fp, bitmap, sizeof(bitmap)) )
      return false;
    // We'll allocate the space, but we won't fill it: it will be
    // left as uninitialized raw memory.
    group = allocate_group(settings.num_buckets);
    return true;
  }

  // Again, only meaningful if value_type is a POD.
  template <typename INPUT> bool read_nopointer_data(INPUT *fp) {
     for ( nonempty_iterator it = nonempty_begin();
           it != nonempty_end(); ++it ) {
       if ( !sparsehash_internal::read_data(fp, &(*it), sizeof(*it)) )
         return false;
     }
     return true;
  }

  // If your keys and values are simple enough, we can write them
  // to disk for you.  "simple enough" means POD and no pointers.
  // However, we don't try to normalize endianness.
  template <typename OUTPUT> bool write_nopointer_data(OUTPUT *fp) const {
    for ( const_nonempty_iterator it = nonempty_begin();
          it != nonempty_end(); ++it ) {
      if ( !sparsehash_internal::write_data(fp, &(*it), sizeof(*it)) )
        return false;
    }
    return true;
  }


  // Comparisons.  We only need to define == and < -- we get
  // != > <= >= via relops.h (which we happily included above).
  // Note the comparisons are pretty arbitrary: we compare
  // values of the first index that isn't equal (using default
  // value for empty buckets).
  bool operator==(const sparsegroup& x) const {
    return ( settings.num_buckets == x.settings.num_buckets &&
             memcmp(bitmap, x.bitmap, sizeof(bitmap)) == 0 &&
             std::equal(begin(), end(), x.begin()) );    // from <algorithm>
  }

  bool operator<(const sparsegroup& x) const {      // also from <algorithm>
    return std::lexicographical_compare(begin(), end(), x.begin(), x.end());
  }
  bool operator!=(const sparsegroup& x) const { return !(*this == x); }
  bool operator<=(const sparsegroup& x) const { return !(x < *this); }
  bool operator>(const sparsegroup& x) const { return x < *this; }
  bool operator>=(const sparsegroup& x) const { return !(*this < x); }

 private:
  template <class A>
  class alloc_impl : public A {
   public:
    typedef typename A::pointer pointer;
    typedef typename A::size_type size_type;

    // Convert a normal allocator to one that has realloc_or_die()
    alloc_impl(const A& a) : A(a) { }

    // realloc_or_die should only be used when using the default
    // allocator (libc_allocator_with_realloc).
    pointer realloc_or_die(pointer /*ptr*/, size_type /*n*/) {
      fprintf(stderr, "realloc_or_die is only supported for "
                      "libc_allocator_with_realloc\n");
      exit(1);
      return NULL;
    }
  };

  // A template specialization of alloc_impl for
  // libc_allocator_with_realloc that can handle realloc_or_die.
  template <class A>
  class alloc_impl<libc_allocator_with_realloc<A> >
      : public libc_allocator_with_realloc<A> {
   public:
    typedef typename libc_allocator_with_realloc<A>::pointer pointer;
    typedef typename libc_allocator_with_realloc<A>::size_type size_type;

    alloc_impl(const libc_allocator_with_realloc<A>& a)
        : libc_allocator_with_realloc<A>(a) { }

    pointer realloc_or_die(pointer ptr, size_type n) {
      pointer retval = this->reallocate(ptr, n);
      if (retval == NULL) {
        fprintf(stderr, "sparsehash: FATAL ERROR: failed to reallocate "
                "%lu elements for ptr %p", static_cast<unsigned long>(n), ptr);
        exit(1);
      }
      return retval;
    }
  };

  // Package allocator with num_buckets to eliminate memory needed for the
  // zero-size allocator.
  // If new fields are added to this class, we should add them to
  // operator= and swap.
  class Settings : public alloc_impl<value_alloc_type> {
   public:
    Settings(const alloc_impl<value_alloc_type>& a, u_int16_t n = 0)
        : alloc_impl<value_alloc_type>(a), num_buckets(n) { }
    Settings(const Settings& s)
        : alloc_impl<value_alloc_type>(s), num_buckets(s.num_buckets) { }

    u_int16_t num_buckets;                    // limits GROUP_SIZE to 64K
  };

  // The actual data
  pointer group;                              // (small) array of T's
  Settings settings;                          // allocator and num_buckets
  unsigned char bitmap[(GROUP_SIZE-1)/8 + 1]; // fancy math is so we round up
};

// We need a global swap as well
template <class T, u_int16_t GROUP_SIZE, class Alloc>
inline void swap(sparsegroup<T,GROUP_SIZE,Alloc> &x,
                 sparsegroup<T,GROUP_SIZE,Alloc> &y) {
  x.swap(y);
}

// ---------------------------------------------------------------------------


template <class T, u_int16_t GROUP_SIZE = DEFAULT_SPARSEGROUP_SIZE,
          class Alloc = libc_allocator_with_realloc<T> >
class sparsetable {
 private:
  typedef typename Alloc::template rebind<T>::other value_alloc_type;
  typedef typename Alloc::template rebind<
      sparsegroup<T, GROUP_SIZE, value_alloc_type> >::other vector_alloc;

 public:
  // Basic types
  typedef T value_type;                        // stolen from stl_vector.h
  typedef Alloc allocator_type;
  typedef typename value_alloc_type::size_type size_type;
  typedef typename value_alloc_type::difference_type difference_type;
  typedef typename value_alloc_type::reference reference;
  typedef typename value_alloc_type::const_reference const_reference;
  typedef typename value_alloc_type::pointer pointer;
  typedef typename value_alloc_type::const_pointer const_pointer;
  typedef table_iterator<sparsetable<T, GROUP_SIZE, Alloc> > iterator;
  typedef const_table_iterator<sparsetable<T, GROUP_SIZE, Alloc> >
      const_iterator;
  typedef table_element_adaptor<sparsetable<T, GROUP_SIZE, Alloc> >
      element_adaptor;
  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
  typedef std::reverse_iterator<iterator> reverse_iterator;   // from iterator.h

  // These are our special iterators, that go over non-empty buckets in a
  // table.  These aren't const only because you can change non-empty bcks.
  typedef two_d_iterator< std::vector< sparsegroup<value_type, GROUP_SIZE,
                                                   value_alloc_type>,
                                  vector_alloc> >
     nonempty_iterator;
  typedef const_two_d_iterator< std::vector< sparsegroup<value_type,
                                                         GROUP_SIZE,
                                                         value_alloc_type>,
                                        vector_alloc> >
     const_nonempty_iterator;
  typedef std::reverse_iterator<nonempty_iterator> reverse_nonempty_iterator;
  typedef std::reverse_iterator<const_nonempty_iterator> const_reverse_nonempty_iterator;
  // Another special iterator: it frees memory as it iterates (used to resize)
  typedef destructive_two_d_iterator< std::vector< sparsegroup<value_type,
                                                               GROUP_SIZE,
                                                               value_alloc_type>,
                                              vector_alloc> >
     destructive_iterator;

  // Iterator functions
  iterator begin()                      { return iterator(this, 0); }
  const_iterator begin() const          { return const_iterator(this, 0); }
  iterator end()                        { return iterator(this, size()); }
  const_iterator end() const            { return const_iterator(this, size()); }
  reverse_iterator rbegin()             { return reverse_iterator(end()); }
  const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
  reverse_iterator rend()               { return reverse_iterator(begin()); }
  const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }

  // Versions for our special non-empty iterator
  nonempty_iterator nonempty_begin()             {
    return nonempty_iterator(groups.begin(), groups.end(), groups.begin());
  }
  const_nonempty_iterator nonempty_begin() const {
    return const_nonempty_iterator(groups.begin(),groups.end(), groups.begin());
  }
  nonempty_iterator nonempty_end() {
    return nonempty_iterator(groups.begin(), groups.end(), groups.end());
  }
  const_nonempty_iterator nonempty_end() const {
    return const_nonempty_iterator(groups.begin(), groups.end(), groups.end());
  }
  reverse_nonempty_iterator nonempty_rbegin() {
    return reverse_nonempty_iterator(nonempty_end());
  }
  const_reverse_nonempty_iterator nonempty_rbegin() const {
    return const_reverse_nonempty_iterator(nonempty_end());
  }
  reverse_nonempty_iterator nonempty_rend() {
    return reverse_nonempty_iterator(nonempty_begin());
  }
  const_reverse_nonempty_iterator nonempty_rend() const {
    return const_reverse_nonempty_iterator(nonempty_begin());
  }
  destructive_iterator destructive_begin() {
    return destructive_iterator(groups.begin(), groups.end(), groups.begin());
  }
  destructive_iterator destructive_end() {
    return destructive_iterator(groups.begin(), groups.end(), groups.end());
  }

  typedef sparsegroup<value_type, GROUP_SIZE, allocator_type> group_type;
  typedef std::vector<group_type, vector_alloc > group_vector_type;

  typedef typename group_vector_type::reference GroupsReference;
  typedef typename group_vector_type::const_reference GroupsConstReference;
  typedef typename group_vector_type::iterator GroupsIterator;
  typedef typename group_vector_type::const_iterator GroupsConstIterator;

  // How to deal with the proper group
  static size_type num_groups(size_type num) {   // how many to hold num buckets
    return num == 0 ? 0 : ((num-1) / GROUP_SIZE) + 1;
  }

  u_int16_t pos_in_group(size_type i) const {
    return static_cast<u_int16_t>(i % GROUP_SIZE);
  }
  size_type group_num(size_type i) const {
    return i / GROUP_SIZE;
  }
  GroupsReference which_group(size_type i) {
    return groups[group_num(i)];
  }
  GroupsConstReference which_group(size_type i) const {
    return groups[group_num(i)];
  }

 public:
  // Constructors -- default, normal (when you specify size), and copy
  explicit sparsetable(size_type sz = 0, Alloc alloc = Alloc())
      : groups(vector_alloc(alloc)), settings(alloc, sz) {
    groups.resize(num_groups(sz), group_type(settings));
  }
  // We can get away with using the default copy constructor,
  // and default destructor, and hence the default operator=.  Huzzah!

  // Many STL algorithms use swap instead of copy constructors
  void swap(sparsetable& x) {
    std::swap(groups, x.groups);              // defined in stl_algobase.h
    std::swap(settings.table_size, x.settings.table_size);
    std::swap(settings.num_buckets, x.settings.num_buckets);
  }

  // It's always nice to be able to clear a table without deallocating it
  void clear() {
    GroupsIterator group;
    for ( group = groups.begin(); group != groups.end(); ++group ) {
      group->clear();
    }
    settings.num_buckets = 0;
  }

  // ACCESSOR FUNCTIONS for the things we templatize on, basically
  allocator_type get_allocator() const {
    return allocator_type(settings);
  }


  // Functions that tell you about size.
  // NOTE: empty() is non-intuitive!  It does not tell you the number
  // of not-empty buckets (use num_nonempty() for that).  Instead
  // it says whether you've allocated any buckets or not.
  size_type size() const           { return settings.table_size; }
  size_type max_size() const       { return settings.max_size(); }
  bool empty() const               { return settings.table_size == 0; }
  // We also may want to know how many *used* buckets there are
  size_type num_nonempty() const   { return settings.num_buckets; }

  // OK, we'll let you resize one of these puppies
  void resize(size_type new_size) {
    groups.resize(num_groups(new_size), group_type(settings));
    if ( new_size < settings.table_size) {
      // lower num_buckets, clear last group
      if ( pos_in_group(new_size) > 0 )     // need to clear inside last group
        groups.back().erase(groups.back().begin() + pos_in_group(new_size),
                            groups.back().end());
      settings.num_buckets = 0;                   // refigure # of used buckets
      GroupsConstIterator group;
      for ( group = groups.begin(); group != groups.end(); ++group )
        settings.num_buckets += group->num_nonempty();
    }
    settings.table_size = new_size;
  }


  // We let you see if a bucket is non-empty without retrieving it
  bool test(size_type i) const {
    assert(i < settings.table_size);
    return which_group(i).test(pos_in_group(i));
  }
  bool test(iterator pos) const {
    return which_group(pos.pos).test(pos_in_group(pos.pos));
  }
  bool test(const_iterator pos) const {
    return which_group(pos.pos).test(pos_in_group(pos.pos));
  }

  // We only return const_references because it's really hard to
  // return something settable for empty buckets.  Use set() instead.
  const_reference get(size_type i) const {
    assert(i < settings.table_size);
    return which_group(i).get(pos_in_group(i));
  }

  // TODO(csilvers): make protected + friend
  // This is used by sparse_hashtable to get an element from the table
  // when we know it exists (because the caller has called test(i)).
  const_reference unsafe_get(size_type i) const {
    assert(i < settings.table_size);
    assert(test(i));
    return which_group(i).unsafe_get(pos_in_group(i));
  }

  // TODO(csilvers): make protected + friend element_adaptor
  reference mutating_get(size_type i) {    // fills bucket i before getting
    assert(i < settings.table_size);
    typename group_type::size_type old_numbuckets = which_group(i).num_nonempty();
    reference retval = which_group(i).mutating_get(pos_in_group(i));
    settings.num_buckets += which_group(i).num_nonempty() - old_numbuckets;
    return retval;
  }

  // Syntactic sugar.  As in sparsegroup, the non-const version is harder
  const_reference operator[](size_type i) const {
    return get(i);
  }

  element_adaptor operator[](size_type i) {
    return element_adaptor(this, i);
  }

  // Needed for hashtables, gets as a nonempty_iterator.  Crashes for empty bcks
  const_nonempty_iterator get_iter(size_type i) const {
    assert(test(i));    // how can a nonempty_iterator point to an empty bucket?
    return const_nonempty_iterator(
      groups.begin(), groups.end(),
      groups.begin() + group_num(i),
      (groups[group_num(i)].nonempty_begin() +
       groups[group_num(i)].pos_to_offset(pos_in_group(i))));
  }
  // For nonempty we can return a non-const version
  nonempty_iterator get_iter(size_type i) {
    assert(test(i));    // how can a nonempty_iterator point to an empty bucket?
    return nonempty_iterator(
      groups.begin(), groups.end(),
      groups.begin() + group_num(i),
      (groups[group_num(i)].nonempty_begin() +
       groups[group_num(i)].pos_to_offset(pos_in_group(i))));
  }

  // And the reverse transformation.
  size_type get_pos(const const_nonempty_iterator it) const {
    difference_type current_row = it.row_current - it.row_begin;
    difference_type current_col = (it.col_current -
                                   groups[current_row].nonempty_begin());
    return ((current_row * GROUP_SIZE) +
            groups[current_row].offset_to_pos(current_col));
  }


  // This returns a reference to the inserted item (which is a copy of val)
  // The trick is to figure out whether we're replacing or inserting anew
  reference set(size_type i, const_reference val) {
    assert(i < settings.table_size);
    typename group_type::size_type old_numbuckets = which_group(i).num_nonempty();
    reference retval = which_group(i).set(pos_in_group(i), val);
    settings.num_buckets += which_group(i).num_nonempty() - old_numbuckets;
    return retval;
  }

  // This takes the specified elements out of the table.  This is
  // "undefining", rather than "clearing".
  void erase(size_type i) {
    assert(i < settings.table_size);
    typename group_type::size_type old_numbuckets = which_group(i).num_nonempty();
    which_group(i).erase(pos_in_group(i));
    settings.num_buckets += which_group(i).num_nonempty() - old_numbuckets;
  }

  void erase(iterator pos) {
    erase(pos.pos);
  }

  void erase(iterator start_it, iterator end_it) {
    // This could be more efficient, but then we'd need to figure
    // out if we spanned groups or not.  Doesn't seem worth it.
    for ( ; start_it != end_it; ++start_it )
      erase(start_it);
  }


  // We support reading and writing tables to disk.  We don't store
  // the actual array contents (which we don't know how to store),
  // just the groups and sizes.  Returns true if all went ok.

 private:
  // Every time the disk format changes, this should probably change too
  typedef unsigned long MagicNumberType;
  static const MagicNumberType MAGIC_NUMBER = 0x24687531;

  // Old versions of this code write all data in 32 bits.  We need to
  // support these files as well as having support for 64-bit systems.
  // So we use the following encoding scheme: for values < 2^32-1, we
  // store in 4 bytes in big-endian order.  For values > 2^32, we
  // store 0xFFFFFFF followed by 8 bytes in big-endian order.  This
  // causes us to mis-read old-version code that stores exactly
  // 0xFFFFFFF, but I don't think that is likely to have happened for
  // these particular values.
  template <typename OUTPUT, typename IntType>
  static bool write_32_or_64(OUTPUT* fp, IntType value) {
    if ( value < 0xFFFFFFFFULL ) {        // fits in 4 bytes
      if ( !sparsehash_internal::write_bigendian_number(fp, value, 4) )
        return false;
    } else {
      if ( !sparsehash_internal::write_bigendian_number(fp, 0xFFFFFFFFUL, 4) )
        return false;
      if ( !sparsehash_internal::write_bigendian_number(fp, value, 8) )
        return false;
    }
    return true;
  }

  template <typename INPUT, typename IntType>
  static bool read_32_or_64(INPUT* fp, IntType *value) {  // reads into value
    MagicNumberType first4 = 0;   // a convenient 32-bit unsigned type
    if ( !sparsehash_internal::read_bigendian_number(fp, &first4, 4) )
      return false;
    if ( first4 < 0xFFFFFFFFULL ) {
      *value = first4;
    } else {
      if ( !sparsehash_internal::read_bigendian_number(fp, value, 8) )
        return false;
    }
    return true;
  }

 public:
  // read/write_metadata() and read_write/nopointer_data() are DEPRECATED.
  // Use serialize() and unserialize(), below, for new code.

  template <typename OUTPUT> bool write_metadata(OUTPUT *fp) const {
    if ( !write_32_or_64(fp, MAGIC_NUMBER) )  return false;
    if ( !write_32_or_64(fp, settings.table_size) )  return false;
    if ( !write_32_or_64(fp, settings.num_buckets) )  return false;

    GroupsConstIterator group;
    for ( group = groups.begin(); group != groups.end(); ++group )
      if ( group->write_metadata(fp) == false )  return false;
    return true;
  }

  // Reading destroys the old table contents!  Returns true if read ok.
  template <typename INPUT> bool read_metadata(INPUT *fp) {
    size_type magic_read = 0;
    if ( !read_32_or_64(fp, &magic_read) )  return false;
    if ( magic_read != MAGIC_NUMBER ) {
      clear();                        // just to be consistent
      return false;
    }

    if ( !read_32_or_64(fp, &settings.table_size) )  return false;
    if ( !read_32_or_64(fp, &settings.num_buckets) )  return false;

    resize(settings.table_size);                    // so the vector's sized ok
    GroupsIterator group;
    for ( group = groups.begin(); group != groups.end(); ++group )
      if ( group->read_metadata(fp) == false )  return false;
    return true;
  }

  // This code is identical to that for SparseGroup
  // If your keys and values are simple enough, we can write them
  // to disk for you.  "simple enough" means no pointers.
  // However, we don't try to normalize endianness
  bool write_nopointer_data(FILE *fp) const {
    for ( const_nonempty_iterator it = nonempty_begin();
          it != nonempty_end(); ++it ) {
      if ( !fwrite(&*it, sizeof(*it), 1, fp) )  return false;
    }
    return true;
  }

  // When reading, we have to override the potential const-ness of *it
  bool read_nopointer_data(FILE *fp) {
    for ( nonempty_iterator it = nonempty_begin();
          it != nonempty_end(); ++it ) {
      if ( !fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp) )
        return false;
    }
    return true;
  }

  // INPUT and OUTPUT must be either a FILE, *or* a C++ stream
  //    (istream, ostream, etc) *or* a class providing
  //    Read(void*, size_t) and Write(const void*, size_t)
  //    (respectively), which writes a buffer into a stream
  //    (which the INPUT/OUTPUT instance presumably owns).

  typedef sparsehash_internal::pod_serializer<value_type> NopointerSerializer;

  // ValueSerializer: a functor.  operator()(OUTPUT*, const value_type&)
  template <typename ValueSerializer, typename OUTPUT>
  bool serialize(ValueSerializer serializer, OUTPUT *fp) {
    if ( !write_metadata(fp) )
      return false;
    for ( const_nonempty_iterator it = nonempty_begin();
          it != nonempty_end(); ++it ) {
      if ( !serializer(fp, *it) )  return false;
    }
    return true;
  }

  // ValueSerializer: a functor.  operator()(INPUT*, value_type*)
  template <typename ValueSerializer, typename INPUT>
  bool unserialize(ValueSerializer serializer, INPUT *fp) {
    clear();
    if ( !read_metadata(fp) )
      return false;
    for ( nonempty_iterator it = nonempty_begin();
          it != nonempty_end(); ++it ) {
      if ( !serializer(fp, &*it) )  return false;
    }
    return true;
  }

  // Comparisons.  Note the comparisons are pretty arbitrary: we
  // compare values of the first index that isn't equal (using default
  // value for empty buckets).
  bool operator==(const sparsetable& x) const {
    return ( settings.table_size == x.settings.table_size &&
             settings.num_buckets == x.settings.num_buckets &&
             groups == x.groups );
  }

  bool operator<(const sparsetable& x) const {
    return std::lexicographical_compare(begin(), end(), x.begin(), x.end());
  }
  bool operator!=(const sparsetable& x) const { return !(*this == x); }
  bool operator<=(const sparsetable& x) const { return !(x < *this); }
  bool operator>(const sparsetable& x) const { return x < *this; }
  bool operator>=(const sparsetable& x) const { return !(*this < x); }


 private:
  // Package allocator with table_size and num_buckets to eliminate memory
  // needed for the zero-size allocator.
  // If new fields are added to this class, we should add them to
  // operator= and swap.
  class Settings : public allocator_type {
   public:
    typedef typename allocator_type::size_type size_type;

    Settings(const allocator_type& a, size_type sz = 0, size_type n = 0)
        : allocator_type(a), table_size(sz), num_buckets(n) { }

    Settings(const Settings& s)
        : allocator_type(s),
          table_size(s.table_size), num_buckets(s.num_buckets) { }

    size_type table_size;          // how many buckets they want
    size_type num_buckets;         // number of non-empty buckets
  };

  // The actual data
  group_vector_type groups;        // our list of groups
  Settings settings;               // allocator, table size, buckets
};

// We need a global swap as well
template <class T, u_int16_t GROUP_SIZE, class Alloc>
inline void swap(sparsetable<T,GROUP_SIZE,Alloc> &x,
                 sparsetable<T,GROUP_SIZE,Alloc> &y) {
  x.swap(y);
}

_END_GOOGLE_NAMESPACE_

#endif  // UTIL_GTL_SPARSETABLE_H_