// Matt Wells,  Copyright, Dec. 2002

// . generic hash table class

#ifndef GB_HASHTABLEX_H
#define GB_HASHTABLEX_H

#include "SafeBuf.h"
#include "Sanity.h"


class HashTableX {

 public:

	bool set ( int32_t  keySize         ,
		   int32_t  dataSize        ,
		   int32_t  initialNumSlots , // = 0    ,
		   char *buf             , // = NULL ,
		   int32_t  bufSize         , // = 0    ,
		   bool  allowDups       , // = false ,
		   const char *allocName  ,
		   bool  useKeyMagic = false,
		   int32_t maskKeyOffset = 0);

	// key size is 0 if UNinitialized
	bool isInitialized ( ) const { return (m_ks != 0); }

	 HashTableX       ( );
	~HashTableX       ( );

	// . add key/value entry to hash table
	// . will grow hash table if it needs to
	// . returns false and sets g_errno on error, returns true otherwise
	bool addKey ( const void *key , const void *value , int32_t *slot = NULL );

	// for value-less hashtables
	bool addKey ( const void *key );

	// . remove key/value entry to hash table. 
	// . returns false and sets g_errno on error.
	bool removeKey  ( const void *key );

	// same as remove
	bool deleteSlot ( int32_t n ) { return removeSlot(n); }

	// like removeKey. returns false and sets g_errno on error.
	bool removeSlot ( int32_t n );


	// a replacement for TermTable.cpp
	bool addTerm(int64_t wid, int32_t score = 1) {
		int32_t slot = getSlot(&wid);
                if ( slot<0 ) return addKey(&wid ,&score,&slot);
                uint32_t *val = (uint32_t *)getValueFromSlot ( slot );
		// overflow check
		if ( *val + (uint32_t)score < *val ) *val = 0xffffffff;
		else                                 *val = *val + score;
		return true;
	}
	// a replacement for TermTable.cpp
	uint32_t getScore(int64_t wid) const {
		int32_t slot = getSlot(&wid);
		if ( slot < 0 ) return 0;
		return *(const uint32_t *)getValueFromSlot ( slot );
	}
	// a replacement for TermTable.cpp
	uint64_t getScore64FromSlot ( int32_t slot ) const {
		return *(const uint64_t *)getValueFromSlot ( slot ); }


	bool addTerm32 (int32_t wid, int32_t score = 1) {
		int32_t slot = getSlot ( &wid );
                if ( slot<0 ) return addKey( &wid ,&score,&slot);
                uint32_t *val = (uint32_t *)getValueFromSlot ( slot );
		// overflow check
		if ( *val + (uint32_t)score < *val ) *val = 0xffffffff;
		else                                 *val = *val + score;
		return true;
	}
	bool addTerm32(uint32_t wid, int32_t score = 1) {
		int32_t slot = getSlot ( &wid );
                if ( slot<0 ) return addKey( &wid ,&score,&slot);
                uint32_t *val = (uint32_t *)getValueFromSlot ( slot );
		// overflow check
		if ( *val + (uint32_t)score < *val ) *val = 0xffffffff;
		else                                 *val = *val + score;
		return true;
	}
	bool addScore(int32_t key, int32_t score = 1) {
		return addTerm32(key, score);
	}
	uint32_t getScore32(int32_t wid) const {
		int32_t slot = getSlot(&wid);
		if ( slot < 0 ) return 0;
		return *(const uint32_t *)getValueFromSlot ( slot );
	}


	bool addTerm144 ( const key144_t *kp , int32_t score = 1 ) {
		// grow it!
		if ( (m_numSlots < 20 || 4 * m_numSlotsUsed >= m_numSlots) &&
		     m_numSlots < m_maxSlots ) {
			int64_t growTo ;
			growTo = ((int64_t)m_numSlots * 150LL )/100LL+20LL;
			if ( growTo > m_maxSlots ) growTo = m_maxSlots;
			if ( ! setTableSize ( (int32_t)growTo , NULL , 0 ) ) 
				return false;
		}
		// hash it up
		int32_t n = hash32 ( (const char *)kp, 18 );
		// then mask it
		n &= m_mask;
		int32_t count = 0;
		while ( count++ < m_numSlots ) {
			// this is set to 0x01 if non-empty
			if ( m_flags [ n ] == 0 ) {
				gbmemcpy( &((key144_t *)m_keys)[n] ,kp,18);
				m_vals[n*m_ds] = score;
				m_flags[n] = 1;
				m_numSlotsUsed++;
				return true;
			}
			// get the key there
			if (((key144_t *)m_keys)[n] == *kp) {
				uint32_t *val = (uint32_t *)&m_vals[n*m_ds];
				// overflow check
				if ( *val + (uint32_t)score < *val ) 
					*val = 0xffffffff;
				else 
					*val = *val + score;
				return true;
			}
			// advance otherwise
			if ( ++n == m_numSlots ) n = 0;
		}
		// crazy!
		log("hash: table is full!");
		gbshutdownAbort(true);
		/*NOTREACHED*/
		return true;
	}

	// return 32-bit checksum of keys in table
	int32_t getKeyChecksum32 () const;

	// . used by ../english/Bits.h to store stop words, abbr's, ...
	// . returns the score for this termId (0 means empty usually)
	// . return 0 if key not in hash table
	void *getValue ( const void *key ) {
		// make it fast
		if ( m_ks == 4 ) return getValue32 ( *(const int32_t *)key );
		if ( m_ks == 8 ) return getValue64 ( *(const int64_t *)key );
		// returns -1 if key not in hash table
		int32_t n = getOccupiedSlotNum ( key );
		if ( n < 0 ) return NULL;
		return &m_vals[n*m_ds];
	}

	// . specialized for 32-bit keys for speed
	// . returns NULL if not in table
	void *getValue32 ( int32_t key ) {
		// return NULL if completely empty
		if ( m_numSlots <= 0 ) return NULL;
		// sanity check
		if ( m_ks != 4 ) { gbshutdownAbort(true); }
		int32_t n;
		if ( ! m_useKeyMagic ) {
			// mask on the lower 32 bits i guess
			n = key & m_mask;
		}
		else {
			// get lower 32 bits of key
			n =*(uint32_t *)(((char *)&key) +m_maskKeyOffset);
			// use magic to "randomize" key a little
			n^=g_hashtab[(unsigned char)((char *)&key)[m_maskKeyOffset]][0];
			// mask on the lower 32 bits i guess
			n &= m_mask;
		}
		int32_t count = 0;
		while ( count++ < m_numSlots ) {
			// this is set to 0x01 if non-empty
			if ( m_flags [ n ] == 0 ) return NULL;
			// get the key there
			if (((int32_t *)m_keys)[n] == key) 
				return &m_vals[n*m_ds]; 
			// advance otherwise
			if ( ++n == m_numSlots ) n = 0;
		}
		return NULL;
	}

	// . specialized for 64-bit keys for speed
	// . returns NULL if not in table
	void *getValue64 ( int64_t key ) {
		// return NULL if completely empty
		if ( m_numSlots <= 0 ) return NULL;
		// sanity check
		if ( m_ks != 8 ) { gbshutdownAbort(true); }
		int32_t n;
		if ( ! m_useKeyMagic ) {
			// mask on the lower 32 bits i guess
			// get lower 32 bits of key
			n = key & m_mask;
		}
		else {
			// use magic to "randomize" key a little
			n =*(uint32_t *)(((char *)&key) +m_maskKeyOffset);
			n ^= g_hashtab[(unsigned char)((char *)&key)[m_maskKeyOffset]][0];
			// mask on the lower 32 bits i guess
			n &= m_mask;
		}
		int32_t count = 0;
		while ( count++ < m_numSlots ) {
			// this is set to 0x01 if non-empty
			if ( m_flags [ n ] == 0 ) return NULL;
			// get the key there
			if (((int64_t *)m_keys)[n] == key) 
				return &m_vals[n*m_ds]; 
			// advance otherwise
			if ( ++n == m_numSlots ) n = 0;
		}
		return NULL;
	}

	// value of 0 means empty
	bool isEmpty ( const void *key ) const { return (getSlot(key) < 0); }

	bool isInTable ( const void *key ) const { return (getSlot(key) >= 0); }

	bool isEmpty ( int32_t n ) const { return (m_flags[n] == 0); }

	bool isTableEmpty ( ) const { return (m_numSlotsUsed == 0); }

	void *      getKeyFromSlot ( int32_t n )       { return m_keys + n * m_ks; }
	const void *getKeyFromSlot ( int32_t n ) const { return m_keys + n * m_ks; }

	int64_t getKey64FromSlot ( int32_t n ) const {
		return *(int64_t *)(m_keys+n*m_ks); }

	int32_t getSlot ( const void *key ) const { return getOccupiedSlotNum ( key ); }

	int32_t getNextSlot ( int32_t slot, const void *key ) const;

	// count how many slots have this key
	int32_t getCount ( const void *key ) const;

	void setValue ( int32_t n , const void *val ) { 
		if      (m_ds == 4) ((int32_t *)m_vals)[n] = *(const int32_t *)val;
		else if (m_ds == 8) ((int64_t *)m_vals)[n] = *(const int64_t *)val;
		else                gbmemcpy(m_vals+n*m_ds,val,m_ds);
	}

	void *      getValueFromSlot ( int32_t n )       { return m_vals + n * m_ds; }
	const void *getValueFromSlot ( int32_t n ) const { return m_vals + n * m_ds; }

	// frees the used memory, etc.
	void  reset  ( );

	// removes all key/value pairs from hash table, vacates all slots
	void  clear  ( );

	// how many are occupied?
	int32_t getNumUsedSlots ( ) const { return m_numSlotsUsed; }

	bool isEmpty() const { return (m_numSlotsUsed == 0); }

	// how many are there total? used and unused.
	int32_t getNumSlots() const { return m_numSlots; }

	// both return false and set g_errno on error, true otherwise
	bool load ( const char *dir, const char *filename , 
		    char **tbuf = NULL , int32_t *tsize = NULL );

	bool save ( const char *dir, const char *filename , 
		    const char  *tbuf = NULL , int32_t  tsize = 0);

	bool setTableSize ( int32_t numSlots , char *buf , int32_t bufSize );

	// for debugging
	void print();

	bool isWritable() const { return m_isWritable; }
	void disableWrites() { m_isWritable = false; }
	void enableWrites() { m_isWritable = true; }

	int32_t getKeySize() const { return m_ks; }
	int32_t getDataSize() const { return m_ds; }

	bool isAllowDups() const { return m_allowDups; }

	// . the array of buckets in which we store the terms
	// . scores are allowed to exceed 8 bits for weighting purposes
	char  *m_keys;
	char  *m_vals;
	char  *m_flags;

private:
	int32_t getOccupiedSlotNum ( const void *key ) const;

	bool m_isWritable;

	int32_t     m_numSlots;
	int32_t     m_numSlotsUsed;
	uint32_t m_mask;

	bool  m_doFree;
	char *m_buf;
	int32_t  m_bufSize;

	bool m_useKeyMagic;

	int32_t m_ks;
	int32_t m_ds;
	bool m_allowDups;

	bool m_isSaving;
	bool m_needsSave;

	// limits growing to this # of slots total
	int64_t  m_maxSlots;

	const char *m_allocName;

	int32_t m_maskKeyOffset;

	// the addon buf used by SOME hashtables. data that the ptrs
	// in the table itself reference.
	char *m_txtBuf;
	int32_t  m_txtBufSize;
};

#endif // GB_HASHTABLEX_H