Files
2016-10-21 22:40:52 +02:00

59 lines
1.3 KiB
C++

#include "gb-include.h"
#include "Pops.h"
#include "Words.h"
#include "Speller.h"
#include "Mem.h"
#include "Sanity.h"
Pops::Pops () {
m_pops = NULL;
m_popsSize = 0;
memset(m_localBuf, 0, sizeof(m_localBuf));
}
Pops::~Pops() {
if ( m_pops && m_pops != (int32_t *)m_localBuf ) {
mfree ( m_pops , m_popsSize , "Pops" );
}
}
bool Pops::set ( const Words *words , int32_t a , int32_t b ) {
int32_t nw = words->getNumWords();
int32_t need = nw * 4;
if ( need > POPS_BUF_SIZE ) m_pops = (int32_t *)mmalloc(need,"Pops");
else m_pops = (int32_t *)m_localBuf;
if ( ! m_pops ) return false;
m_popsSize = need;
for ( int32_t i = a ; i < b && i < nw ; i++ ) {
// skip if not indexable
int64_t wid = words->getWordId(i);
if ( !wid ) {
m_pops[i] = 0;
continue;
}
// once again for the 50th time partap's utf16 crap gets in
// the way... we have to have all kinds of different hashing
// methods because of it...
uint64_t key;
const char *wp = words->getWord(i);
int32_t wlen = words->getWordLen(i);
key = hash64d( wp, wlen );
m_pops[i] = g_speller.getPhrasePopularity( wp, key, 0 );
// sanity check
if ( m_pops[i] < 0 ) gbshutdownLogicError();
if ( m_pops[i] == 0 ) {
m_pops[i] = 1;
}
}
return true;
}