Files

59 lines
1.3 KiB
C++
Raw Permalink Normal View History

2013-08-02 13:12:24 -07:00
#include "gb-include.h"
#include "Pops.h"
#include "Words.h"
#include "Speller.h"
2016-09-07 18:46:28 +02:00
#include "Mem.h"
2016-08-05 14:21:01 +02:00
#include "Sanity.h"
2013-08-02 13:12:24 -07:00
Pops::Pops () {
m_pops = NULL;
2016-09-26 17:41:54 +02:00
m_popsSize = 0;
2016-10-21 22:40:52 +02:00
memset(m_localBuf, 0, sizeof(m_localBuf));
2013-08-02 13:12:24 -07:00
}
Pops::~Pops() {
2016-02-17 14:43:47 +01:00
if ( m_pops && m_pops != (int32_t *)m_localBuf ) {
2013-08-02 13:12:24 -07:00
mfree ( m_pops , m_popsSize , "Pops" );
2016-02-17 14:43:47 +01:00
}
2013-08-02 13:12:24 -07:00
}
2016-05-23 16:53:54 +02:00
bool Pops::set ( const Words *words , int32_t a , int32_t b ) {
2016-02-17 14:43:47 +01:00
int32_t nw = words->getNumWords();
2013-08-02 13:12:24 -07:00
2014-11-10 14:45:11 -08:00
int32_t need = nw * 4;
if ( need > POPS_BUF_SIZE ) m_pops = (int32_t *)mmalloc(need,"Pops");
else m_pops = (int32_t *)m_localBuf;
2013-08-02 13:12:24 -07:00
if ( ! m_pops ) return false;
m_popsSize = need;
2014-11-10 14:45:11 -08:00
for ( int32_t i = a ; i < b && i < nw ; i++ ) {
2013-08-02 13:12:24 -07:00
// skip if not indexable
int64_t wid = words->getWordId(i);
if ( !wid ) {
2016-02-17 14:43:47 +01:00
m_pops[i] = 0;
continue;
2013-08-02 13:12:24 -07:00
}
// once again for the 50th time partap's utf16 crap gets in
// the way... we have to have all kinds of different hashing
// methods because of it...
2016-02-17 14:43:47 +01:00
uint64_t key;
const char *wp = words->getWord(i);
int32_t wlen = words->getWordLen(i);
key = hash64d( wp, wlen );
m_pops[i] = g_speller.getPhrasePopularity( wp, key, 0 );
2016-02-17 14:43:47 +01:00
2013-08-02 13:12:24 -07:00
// sanity check
2016-08-05 14:21:01 +02:00
if ( m_pops[i] < 0 ) gbshutdownLogicError();
2016-02-17 14:43:47 +01:00
if ( m_pops[i] == 0 ) {
m_pops[i] = 1;
}
2013-08-02 13:12:24 -07:00
}
2016-02-17 14:43:47 +01:00
2013-08-02 13:12:24 -07:00
return true;
}