Files
antiword-dir
doxygen
html
misc
script
test
third-party
tools
ucdata
.clang-format
.gitignore
.gitmodules
.valgrindrc
Abbreviations.cpp
Abbreviations.h
AdultCheck.cpp
AdultCheck.h
BigFile.cpp
BigFile.h
BitOperations.h
Bits.cpp
Bits.h
Blaster.cpp
Blaster.h
Clusterdb.cpp
Clusterdb.h
Collectiondb.cpp
Collectiondb.h
Conf.cpp
Conf.h
CountryCode.cpp
CountryCode.h
DailyMerge.cpp
DailyMerge.h
Dir.cpp
Dir.h
Dns.cpp
Dns.h
DnsProtocol.h
Doledb.cpp
Doledb.h
Domains.cpp
Domains.h
Entities.cpp
Entities.h
Errno.cpp
Errno.h
File.cpp
File.h
GbFormat.h
GbMutex.cpp
GbMutex.h
GigablastRequest.h
HashTable.cpp
HashTable.h
HashTableT.cpp
HashTableT.h
HashTableX.cpp
HashTableX.h
HighFrequencyTermShortcuts.cpp
HighFrequencyTermShortcuts.h
Highlight.cpp
Highlight.h
Hostdb.cpp
Hostdb.h
HttpMime.cpp
HttpMime.h
HttpRequest.cpp
HttpRequest.h
HttpServer.cpp
HttpServer.h
IPAddressChecks.cpp
IPAddressChecks.h
Images.cpp
Images.h
IndexList.h
JobScheduler.cpp
JobScheduler.h
Json.cpp
Json.h
LICENSE
Lang.cpp
Lang.h
LanguageIdentifier.cpp
LanguageIdentifier.h
Linkdb.cpp
Linkdb.h
Log.cpp
Log.h
Loop.cpp
Loop.h
Makefile
Matches.cpp
Matches.h
Mem.cpp
Mem.h
Msg0.cpp
Msg0.h
Msg1.cpp
Msg1.h
Msg13.cpp
Msg13.h
Msg1f.cpp
Msg1f.h
Msg2.cpp
Msg2.h
Msg20.cpp
Msg20.h
Msg22.cpp
Msg22.h
Msg3.cpp
Msg3.h
Msg39.cpp
Msg39.h
Msg3a.cpp
Msg3a.h
Msg4.cpp
Msg4.h
Msg40.cpp
Msg40.h
Msg5.cpp
Msg5.h
Msg51.cpp
Msg51.h
MsgC.cpp
MsgC.h
MsgType.h
Msge0.cpp
Msge0.h
Msge1.cpp
Msge1.h
Multicast.cpp
Multicast.h
PageAddColl.cpp
PageAddUrl.cpp
PageBasic.cpp
PageCrawlBot.cpp
PageCrawlBot.h
PageGet.cpp
PageHealthCheck.cpp
PageHosts.cpp
PageInject.cpp
PageInject.h
PageLogView.cpp
PageParser.cpp
PageParser.h
PagePerf.cpp
PageReindex.cpp
PageReindex.h
PageResults.cpp
PageResults.h
PageRoot.cpp
PageRoot.h
PageSockets.cpp
PageStats.cpp
PageStatsdb.cpp
PageThreads.cpp
PageTitledb.cpp
Pages.cpp
Pages.h
Parms.cpp
Parms.h
Phrases.cpp
Phrases.h
PingServer.cpp
PingServer.h
Pops.cpp
Pops.h
Pos.cpp
Pos.h
Posdb.cpp
Posdb.h
PosdbTable.cpp
PosdbTable.h
Process.cpp
Process.h
Profiler.cpp
Profiler.h
Proxy.cpp
Proxy.h
Punycode.cpp
Punycode.h
Query.cpp
Query.h
README.md
Rdb.cpp
Rdb.h
RdbBase.cpp
RdbBase.h
RdbBuckets.cpp
RdbBuckets.h
RdbCache.cpp
RdbCache.h
RdbDump.cpp
RdbDump.h
RdbIndex.cpp
RdbIndex.h
RdbIndexQuery.cpp
RdbIndexQuery.h
RdbList.cpp
RdbList.h
RdbMap.cpp
RdbMap.h
RdbMem.cpp
RdbMem.h
RdbMerge.cpp
RdbMerge.h
RdbScan.cpp
RdbScan.h
RdbTree.cpp
RdbTree.h
Rebalance.cpp
Rebalance.h
Repair.cpp
Repair.h
RobotRule.cpp
RobotRule.h
Robots.cpp
Robots.h
S99gb
SafeBuf.cpp
SafeBuf.h
Sanity.cpp
Sanity.h
ScalingFunctions.cpp
ScalingFunctions.h
ScopedLock.h
SearchInput.cpp
SearchInput.h
Sections.cpp
Sections.h
SiteGetter.cpp
SiteGetter.h
Speller.cpp
Speller.h
Spider.cpp
Spider.h
SpiderColl.cpp
SpiderColl.h
SpiderLoop.cpp
SpiderLoop.h
SpiderProxy.cpp
SpiderProxy.h
Statistics.cpp
Statistics.h
Stats.cpp
Stats.h
Statsdb.cpp
Statsdb.h
StopWords.cpp
StopWords.h
Summary.cpp
Summary.h
SummaryCache.cpp
SummaryCache.h
Synonyms.cpp
Synonyms.h
Tagdb.cpp
Tagdb.h
TcpServer.cpp
TcpServer.h
TcpSocket.h
Timezone.cpp
Timezone.h
Title.cpp
Title.h
TitleRecVersion.h
Titledb.cpp
Titledb.h
TopTree.cpp
TopTree.h
UCPropTable.cpp
UCPropTable.h
UdpProtocol.h
UdpServer.cpp
UdpServer.h
UdpSlot.cpp
UdpSlot.h
UdpStatistic.cpp
UdpStatistic.h
Unicode.cpp
Unicode.h
UnicodeProperties.cpp
UnicodeProperties.h
Url.cpp
Url.h
UrlComponent.cpp
UrlComponent.h
UrlParser.cpp
UrlParser.h
Version.cpp
Version.h
Wiki.cpp
Wiki.h
Wiktionary.cpp
Wiktionary.h
Words.cpp
Words.h
Xml.cpp
Xml.h
XmlDoc.cpp
XmlDoc.h
XmlDoc_Indexing.cpp
XmlNode.cpp
XmlNode.h
XmlXPath.cpp
XmlXPath.h
antiword
bmptopnm
control.deb
copyright.head
copyright.tail
entities.json
fctypes.cpp
fctypes.h
gb-1.0.spec
gb-include.h
gb.deb.rules
gb.pem
gbconvert.sh
gbstart.sh
generate_entities.py
giftopnm
hash.cpp
hash.h
iana_charset.cpp
iana_charset.h
init.gb.conf
injectme3
injectmedemo
ip.cpp
ip.h
jpegtopnm
libiconv.a
libiconv.la
libiconv64.a
libjpeg.so.62
libnetpbm.so.10
libpng12.so.0
libtiff.so.4
linkspam.cpp
linkspam.h
main.cpp
matches2.cpp
matches2.h
max_hosts.h
max_niceness.h
max_words.h
mysynonyms.txt
pngtopnm
pnmscale
ppmtojpeg
pstotext
rdbid_t.h
sitelinks.txt
sort.cpp
sort.h
tifftopnm
types.h
unifiedDict.txt
valgrind.cfg
wikititles.txt.part1
wikititles.txt.part2
wiktionary-buf.txt
wiktionary-lang.txt
wiktionary-syns.dat
zconf.h
privacore-open-source-searc…/HashTable.h

86 lines
2.1 KiB
C
Raw Normal View History

2013-08-02 13:12:24 -07:00
// Matt Wells, Copyright, Dec. 2002
// . generic hash table class
2016-03-08 22:14:30 +01:00
#ifndef GB_HASHTABLE_H
#define GB_HASHTABLE_H
2013-08-02 13:12:24 -07:00
#include "Mem.h" // for mcalloc and mmalloc
class HashTable {
public:
2014-11-10 14:45:11 -08:00
bool set ( int32_t initialNumSlots = 0 ,
2013-08-02 13:12:24 -07:00
char *buf = NULL ,
2014-11-10 14:45:11 -08:00
int32_t bufSize = 0 ,
2016-03-14 23:10:06 +01:00
const char *label = NULL );
2013-08-02 13:12:24 -07:00
HashTable ( );
~HashTable ( );
2016-03-14 23:10:06 +01:00
void setLabel ( const char *label ) { m_label = label; }
2013-08-02 13:12:24 -07:00
// . add key/value entry to hash table
// . will grow hash table if it needs to
2014-11-10 14:45:11 -08:00
bool addKey ( int32_t key , int32_t value , int32_t *slot = NULL );
2013-08-02 13:12:24 -07:00
// remove key/value entry to hash table
2014-11-10 14:45:11 -08:00
bool removeKey ( int32_t key );
2013-08-02 13:12:24 -07:00
// like removeKey
2014-11-10 14:45:11 -08:00
void removeSlot ( int32_t n );
2013-08-02 13:12:24 -07:00
// . used by ../english/Bits.h to store stop words, abbr's, ...
// . returns the score for this termId (0 means empty usually)
2014-11-10 14:45:11 -08:00
int32_t getValue ( int32_t key );
2013-08-02 13:12:24 -07:00
// value of 0 means empty
2016-05-19 18:37:26 +02:00
bool isEmpty ( int32_t key ) { return (getValue(key) == 0); }
2013-08-02 13:12:24 -07:00
2016-05-19 18:37:26 +02:00
int32_t getKey ( int32_t n ) { return m_keys[n]; }
2013-08-02 13:12:24 -07:00
2016-05-19 18:37:26 +02:00
int32_t getSlot ( int32_t key ) { return getOccupiedSlotNum ( key ); }
2013-08-02 13:12:24 -07:00
2016-05-19 18:37:26 +02:00
void setValue ( int32_t n , int32_t val ) { m_vals[n] = val; }
2013-08-02 13:12:24 -07:00
2016-05-19 18:37:26 +02:00
int32_t getValueFromSlot ( int32_t n ) { return m_vals[n]; }
2013-08-02 13:12:24 -07:00
// frees the used memory, etc.
void reset ( );
// removes all key/value pairs from hash table, vacates all slots
void clear ( );
// how many are occupied?
2016-03-14 23:10:06 +01:00
int32_t getNumSlotsUsed ( ) const { return m_numSlotsUsed; }
2013-08-02 13:12:24 -07:00
// how many are there total? used and unused.
2016-03-14 23:10:06 +01:00
int32_t getNumSlots ( ) const { return m_numSlots; }
2013-08-02 13:12:24 -07:00
// both return false and set g_errno on error, true otherwise
2016-03-14 23:10:06 +01:00
bool load ( const char *dir, const char *filename );
bool save ( const char *dir, const char *filename );
2013-08-02 13:12:24 -07:00
private:
2014-11-10 14:45:11 -08:00
bool setTableSize ( int32_t numSlots , char *buf , int32_t bufSize );
2013-08-02 13:12:24 -07:00
2014-11-10 14:45:11 -08:00
int32_t getOccupiedSlotNum ( int32_t key ) ;
2013-08-02 13:12:24 -07:00
// . the array of buckets in which we store the terms
// . scores are allowed to exceed 8 bits for weighting purposes
2014-11-10 14:45:11 -08:00
int32_t *m_keys;
int32_t *m_vals;
2013-08-02 13:12:24 -07:00
2014-11-10 14:45:11 -08:00
int32_t m_numSlots;
int32_t m_numSlotsUsed;
uint32_t m_mask;
2013-08-02 13:12:24 -07:00
2016-03-14 23:10:06 +01:00
bool m_doFree;
2016-03-14 23:10:06 +01:00
const char *m_label;
2013-08-02 13:12:24 -07:00
};
2016-03-08 22:14:30 +01:00
#endif // GB_HASHTABLE_H