Files

57 lines
1.3 KiB
C
Raw Permalink Normal View History

2013-08-02 13:12:24 -07:00
// Matt Wells, copyright Sep 2003
// Speller is a class for doing spell checking on user queries.
// . TODO: we might be able to use this as a related searches feature too, but
// we might have to use a different distance metric (getSimilarity())
// that is more word based and less letter based.
2016-03-08 22:14:30 +01:00
#ifndef GB_SPELLER_H
#define GB_SPELLER_H
2013-08-02 13:12:24 -07:00
#define MAX_FRAG_SIZE 1024
2014-11-10 14:45:11 -08:00
// max int32_t returned by getPhrasePopularity() function
2013-08-02 13:12:24 -07:00
#define MAX_PHRASE_POP 16800
#include "HashTableX.h"
2013-08-02 13:12:24 -07:00
class Speller {
public:
Speller();
~Speller();
void reset();
bool init();
2016-11-29 14:29:32 +01:00
void dictLookupTest ( char *ff );
int64_t getLangBits64 ( int64_t wid ) ;
2013-08-02 13:12:24 -07:00
2016-05-23 15:21:15 +02:00
int32_t getPhrasePopularity( const char *s, uint64_t h, unsigned char langId );
2013-08-02 13:12:24 -07:00
2016-11-12 21:23:01 +01:00
bool canSplitWords(const char *s, int32_t slen, bool *isPorn, char *splitWords, unsigned char langId);
2016-11-29 14:29:32 +01:00
private:
2013-08-02 13:12:24 -07:00
2016-11-12 21:20:38 +01:00
bool findNext(const char *s, const char *send, const char **nextWord, bool *isPorn, unsigned char langId);
2013-08-02 13:12:24 -07:00
bool loadUnifiedDict();
2016-11-12 21:20:38 +01:00
const char *getPhraseRecord(const char *phrase, int len);
2016-03-03 15:56:36 +01:00
2016-11-12 21:20:38 +01:00
bool getPhraseLanguages(const char *phrase, int len, int64_t *array);
bool getPhraseLanguages2(const char *phraseRec, int64_t *array) ;
2013-08-02 13:12:24 -07:00
2016-03-03 15:56:36 +01:00
HashTableX m_unifiedDict;
2013-08-02 13:12:24 -07:00
SafeBuf m_unifiedBuf;
};
extern class Speller g_speller;
2016-03-08 22:14:30 +01:00
#endif // GB_SPELLER_H