603 lines
17 KiB
C++
603 lines
17 KiB
C++
#include "Wiki.h"
|
|
|
|
#include "Query.h"
|
|
#include "Words.h"
|
|
#include "Titledb.h"
|
|
#include "Threads.h"
|
|
|
|
static void doneReadingWikiWrapper ( void *state ) ;
|
|
|
|
// the global instance
|
|
Wiki g_wiki;
|
|
|
|
Wiki::Wiki () {
|
|
m_callback = NULL;
|
|
m_state = NULL;
|
|
m_opened = false;
|
|
// use a 4 byte key size and 1 byte data size
|
|
m_ht.set ( 4 , 1,0,NULL,0,false,0 ,"tbl-wiki");
|
|
}
|
|
|
|
void Wiki::reset() {
|
|
m_ht.reset();
|
|
}
|
|
|
|
Wiki::~Wiki () {
|
|
if ( m_opened ) m_f.close();
|
|
}
|
|
|
|
// . load from disk
|
|
// . wikititles.txt (loads wikititles.dat if and date is newer)
|
|
bool Wiki::load() {
|
|
|
|
// load it from .dat file if exists and is newer
|
|
char ff1[277];
|
|
char ff2[272];
|
|
sprintf(ff1, "%swikititles.txt.part2", g_hostdb.m_dir);
|
|
sprintf(ff2, "%swikititles2.dat", g_hostdb.m_dir);
|
|
int fd1 = open ( ff1 , O_RDONLY );
|
|
if ( fd1 < 0 ) log(LOG_INFO,"wiki: open %s: %s",ff1,mstrerror(errno));
|
|
int fd2 = open ( ff2 , O_RDONLY );
|
|
if ( fd2 < 0 ) log(LOG_INFO,"wiki: open %s: %s",ff2,mstrerror(errno));
|
|
struct stat stats1;
|
|
struct stat stats2;
|
|
int32_t errno1 = 0;
|
|
int32_t errno2 = 0;
|
|
if ( fstat ( fd1 , &stats1 ) == -1 ) errno1 = errno;
|
|
if ( fstat ( fd2 , &stats2 ) == -1 ) errno2 = errno;
|
|
// close all
|
|
close ( fd1 );
|
|
close ( fd2 );
|
|
// save text size for getRandomPhrase() function below
|
|
m_txtSize = stats1.st_size;
|
|
// just use the .dat if we got it
|
|
if ( ! errno2 ) {
|
|
log(LOG_INFO,"wiki: Loading %s",ff2);
|
|
// "dir" is NULL since already included in ff2
|
|
return m_ht.load ( NULL , ff2 );
|
|
}
|
|
// if we got a newer binary version, use that
|
|
// add in 10 seconds i guess
|
|
if ( ! errno2 && ! errno1 && stats2.st_mtime +10> stats1.st_mtime ) {
|
|
log(LOG_INFO,"wiki: Loading %s",ff2);
|
|
// "dir" is NULL since already included in ff2
|
|
return m_ht.load ( NULL , ff2 );
|
|
}
|
|
// if no text file that is bad
|
|
if ( errno1 ) {
|
|
g_errno = errno1 ;
|
|
return log ("gb: could not open %s for reading: %s",ff1,
|
|
mstrerror(g_errno));
|
|
}
|
|
// get the size of it
|
|
int32_t size = stats1.st_size;
|
|
// now we have to load the text file
|
|
return loadText( size );
|
|
}
|
|
|
|
bool Wiki::loadText ( int32_t fileSize ) {
|
|
|
|
log(LOG_INFO,"wiki: generating wikititles2.dat file");
|
|
|
|
SafeBuf sb;
|
|
char ff1[277];
|
|
sprintf(ff1, "%swikititles.txt.part1", g_hostdb.m_dir);
|
|
log(LOG_INFO,"wiki: Loading %s",ff1);
|
|
if ( ! sb.fillFromFile(ff1) ) return false;
|
|
|
|
char ff2[277];
|
|
sprintf(ff2, "%swikititles.txt.part2", g_hostdb.m_dir);
|
|
log(LOG_INFO,"wiki: Loading %s",ff2);
|
|
if ( ! sb.catFile(ff2) ) return false;
|
|
|
|
|
|
//int fd1 = open ( ff1 , O_RDONLY );
|
|
// read in whole thing
|
|
//char *buf = (char *)mmalloc ( size + 1 , "wiki" );
|
|
//if ( ! buf ) return false;
|
|
//int32_t n = read ( fd1 , buf , size );
|
|
//close ( fd1 );
|
|
//if ( n != size ) { g_errno = EBADENGINEER; return false; }
|
|
// null terminate
|
|
//buf[size] = '\0';
|
|
sb.pushChar('\0');
|
|
// should not have reallocated too much
|
|
if ( sb.length() + 100 < sb.m_capacity ) { char *xx=NULL;*xx=0; }
|
|
|
|
char *buf = sb.getBufStart();
|
|
int32_t size = sb.length() - 1;
|
|
|
|
// scan each line
|
|
char *p = buf;
|
|
char *pend = buf + size;
|
|
char *eol = NULL;
|
|
for ( ; p < pend ; p = eol + 1 ) {
|
|
// skip spaces
|
|
while ( p < pend && is_wspace_a ( *p ) ) p++;
|
|
// find end of line, "eol" (also treat '(' as \n now)
|
|
//for(eol = p; eol < pend && *eol !='\n' && *eol!='('; eol++) ;
|
|
// do not use '(' since too many non-phraes in ()'s (for love)
|
|
for (eol = p; eol < pend && *eol !='\n' ; eol++) ;
|
|
// parse into words
|
|
Words w;
|
|
if ( ! w.set ( p , // s
|
|
eol - p , // slen
|
|
TITLEREC_CURRENT_VERSION ,
|
|
true , // computeIds?
|
|
MAX_NICENESS ) )
|
|
return false;
|
|
int32_t nw = w.getNumWords();
|
|
|
|
// skip if it begins with 'the', like 'the uk' because it
|
|
// is causing uk to get a low score in 'boots in the uk'.
|
|
// same for all stop words i guess...
|
|
int32_t start = 0;
|
|
|
|
//if ( nw >= 2 && w.m_wordIds[0] == 3522767639246570644LL &&
|
|
// w.m_wordIds[1] == -943426581783550057LL )
|
|
// log("poo"); // hashfast32 = 2117103295
|
|
|
|
// this was letting "To_bell_the_cat" through as
|
|
// "bell_the_cat" and messing up "To_be_or_not_to_be"
|
|
/*
|
|
for ( ; start < nw ; start++ ) {
|
|
// skip punct stuff
|
|
if ( w.getWordId(start) == 0LL ) continue;
|
|
// skip query stop words
|
|
// why? messes up "to be or not to be"
|
|
if ( w.isQueryStopWord(start) ) continue;
|
|
// ok, we got a good starter word
|
|
break;
|
|
}
|
|
*/
|
|
|
|
// if no words, bail
|
|
if ( start >= nw ) continue;
|
|
// remove last words if not alnum
|
|
if ( nw > 0 && !w.isAlnum(nw-1) ) nw--;
|
|
// if no words, bail
|
|
if ( start >= nw ) continue;
|
|
// skip this line if no words
|
|
if ( nw <= 0 ) continue;
|
|
// if begins with upper/lower and ends with lower/upper, skip
|
|
// don't skip because i still want it for gigabits for
|
|
// "point-in-time" "real-time" etc. common phrases.
|
|
//char flag = 0;
|
|
//if ( w.isCapitalized(start) && ! w.isCapitalized(nw-1) &&
|
|
// // fix "To_be_or_not_to_be"
|
|
// ! w.isStopWord(nw-1) )
|
|
// flag = 1;
|
|
//if ( ! w.isCapitalized(start) && w.isCapitalized(nw-1) )
|
|
// flag = 1;
|
|
// skip if it has ('s in it
|
|
char c = *eol;
|
|
*eol = '\0';
|
|
char *pp = NULL;
|
|
if ( !pp ) pp = strstr ( p,"[" );
|
|
if ( !pp && strncasecmp( p,"List of ",8)==0) pp = p;
|
|
if ( !pp ) pp = strstr ( p,"," );
|
|
// show it for debug
|
|
//if ( ! pp ) printf("%s\n",p);
|
|
*eol = c;
|
|
if ( pp ) continue;
|
|
// get these
|
|
int64_t *wids = w.getWordIds();
|
|
// reset hash
|
|
uint32_t h = 0;
|
|
// count the words in the phrase
|
|
int32_t count = 0;
|
|
// hash the word ids together
|
|
for ( int32_t i = start ; i < nw ; i++ ) {
|
|
// skip if not a proper word
|
|
if ( ! w.isAlnum(i) ) continue;
|
|
// add into hash quickly
|
|
h = hash32Fast ( wids[i] & 0xffffffff , h );
|
|
// count them
|
|
count++;
|
|
}
|
|
// skip if too big
|
|
if ( count > 250 ) continue;
|
|
// store into hash table
|
|
// make negative i guess to indicate it is not
|
|
// the best title form
|
|
//if ( flag ) count = count * -1;
|
|
if ( ! m_ht.addKey ( &h , &count ) ) return false;
|
|
}
|
|
|
|
/*
|
|
// reset for 2nd scan
|
|
p = buf;
|
|
//
|
|
//
|
|
// scan a second time and allow the mixed case guys like "Lock_pick"
|
|
// if Lockpick exists and is a word
|
|
//
|
|
//
|
|
for ( ; p < pend ; p = eol + 1 ) {
|
|
// skip spaces
|
|
while ( p < pend && is_wspace_a ( *p ) ) p++;
|
|
// find end of line, "eol" (also treat '(' as \n now)
|
|
//for(eol = p; eol < pend && *eol !='\n' && *eol!='('; eol++) ;
|
|
// do not use '(' since too many non-phraes in ()'s (for love)
|
|
for (eol = p; eol < pend && *eol !='\n' ; eol++) ;
|
|
// parse into words
|
|
Words w;
|
|
if ( ! w.set ( p , // s
|
|
eol - p , // slen
|
|
TITLEREC_CURRENT_VERSION ,
|
|
true , // computeIds?
|
|
MAX_NICENESS ) )
|
|
return false;
|
|
int32_t nw = w.getNumWords();
|
|
|
|
// skip if it begins with 'the', like 'the uk' because it
|
|
// is causing uk to get a low score in 'boots in the uk'.
|
|
// same for all stop words i guess...
|
|
int32_t start = 0;
|
|
|
|
//if ( nw >= 2 && w.m_wordIds[0] == 3522767639246570644LL &&
|
|
// w.m_wordIds[1] == -943426581783550057LL )
|
|
// log("poo"); // hashfast32 = 2117103295
|
|
|
|
for ( ; start < nw ; start++ ) {
|
|
// skip punct stuff
|
|
if ( w.getWordId(start) == 0LL ) continue;
|
|
// skip query stop words
|
|
if ( w.isQueryStopWord(start) ) continue;
|
|
// ok, we got a good starter word
|
|
break;
|
|
}
|
|
// if no words, bail
|
|
if ( start >= nw ) continue;
|
|
// need something like "Lock_pick"
|
|
if ( nw != 3 ) continue;
|
|
// remove last words if not alnum
|
|
if ( nw > 0 && !w.isAlnum(nw-1) ) nw--;
|
|
// if no words, bail
|
|
if ( start >= nw ) continue;
|
|
// skip this line if no words
|
|
if ( nw <= 0 ) continue;
|
|
// if not mixed, skip for 2nd scan
|
|
if ( w.isCapitalized(start) && w.isCapitalized(nw-1) ) continue;
|
|
if ( ! w.isCapitalized(start) && ! w.isCapitalized(nw-1)) continue;
|
|
|
|
char **wptrs = w.getWords();
|
|
int32_t *wlens = w.getWordLens();
|
|
uint64_t h64 = 0;
|
|
int32_t conti = 0;
|
|
int32_t count = 0;
|
|
// hash the word ids together
|
|
for ( int32_t i = start ; i < nw ; i++ ) {
|
|
// skip if not a proper word
|
|
if ( ! w.isAlnum(i) ) continue;
|
|
// no digits starting "08-Hillary" "08Hillary"
|
|
if ( i == start && !w.isAlpha(i) ) continue;
|
|
// add into hash quickly
|
|
h64 = hash64Lower_utf8_cont(wptrs[i],
|
|
wlens[i],
|
|
h64,
|
|
&conti );
|
|
count++;
|
|
}
|
|
// 2 words
|
|
if ( count != 2 ) continue;
|
|
// skip if too big
|
|
if ( count > 250 ) continue;
|
|
// convert
|
|
uint32_t h32 = h64 & 0xffffffff;
|
|
// the compound form must be in the table from the first scan,
|
|
// if not, skip it
|
|
if ( ! m_ht.isInTable ( &h32 ) ) continue;
|
|
|
|
uint32_t hf32 = 0;
|
|
count = 0;
|
|
int64_t *wids = w.getWordIds();
|
|
// hash the word ids together to make a new hash that takes the
|
|
// space into account.
|
|
for ( int32_t i = start ; i < nw ; i++ ) {
|
|
// skip if not a proper word
|
|
if ( ! w.isAlnum(i) ) continue;
|
|
// add into hash quickly
|
|
hf32 = hash32Fast ( wids[i] & 0xffffffff , hf32 );
|
|
// count them
|
|
count++;
|
|
}
|
|
|
|
// ok, store it
|
|
if ( ! m_ht.addKey ( &hf32 , &count ) ) return false;
|
|
// for debug
|
|
//char c = *eol;
|
|
// *eol = '\0';
|
|
//log("wiki: %s",wptrs[0]);
|
|
// *eol = c;
|
|
}
|
|
*/
|
|
|
|
|
|
|
|
// do not save if we can't
|
|
if ( g_conf.m_readOnlyMode ) return true;
|
|
// now save this hash table for quicker loading next time
|
|
//char ff2[256];
|
|
//sprintf(ff2, "%s/wikititles2.dat", g_hostdb.m_dir);
|
|
if ( ! m_ht.save ( g_hostdb.m_dir , "wikititles2.dat" ) ) return false;
|
|
|
|
log(LOG_INFO,"wiki: done generating wikititles2.dat file");
|
|
|
|
// success
|
|
return true;
|
|
}
|
|
|
|
// if a phrase in a query is in a wikipedia title, then increase
|
|
// its affWeights beyond the normal 1.0
|
|
int32_t Wiki::getNumWordsInWikiPhrase ( int32_t i , Words *w ) {
|
|
int64_t *wids = w->m_wordIds;
|
|
if ( ! wids[i] ) return 0;
|
|
int32_t nw = w->m_numWords;
|
|
char **wptrs = w->getWords();
|
|
int32_t *wlens = w->getWordLens();
|
|
// how many in the phrase
|
|
int32_t max = -1;
|
|
int32_t maxCount = 0;
|
|
// accumulate a hash of the word ids
|
|
//int64_t h = 0LL;
|
|
uint32_t h = 0;
|
|
int32_t wcount = 0;
|
|
// otherwise, increase affinity high for included words
|
|
for ( int32_t j = i ; j < nw && j < i + 12 ; j++ ) {
|
|
// count all words
|
|
wcount++;
|
|
// skip if not alnum
|
|
if ( ! wids[j] ) continue;
|
|
// add to hash
|
|
//h = hash64 ( wids[j] , h );
|
|
// add into hash quickly
|
|
h = hash32Fast ( wids[j] & 0xffffffff , h );
|
|
// skip single words, we only want to check phrases
|
|
if ( j == i ) continue;
|
|
// look in table
|
|
char *vp = (char *)m_ht.getValue ( &h );
|
|
// skip if nothing
|
|
if ( ! vp ) {
|
|
// try combining. FIX FOR "Lock_pick". we want that to
|
|
// be a wikipedia phrase, but it's not recorded because
|
|
// its case is mixed.
|
|
if ( j != i + 2 ) continue;
|
|
// fix for "Make a" being a phrase because "Makea"
|
|
// is in the wikipedia. fix for
|
|
// 'how to make a lock pick set'
|
|
if ( wlens[i+2] <= 2 ) continue;
|
|
// special hash
|
|
uint64_t h64 = 0;
|
|
int32_t conti = 0;
|
|
// add into hash quickly
|
|
h64 = hash64Lower_utf8_cont(wptrs[i],
|
|
wlens[i],
|
|
h64,
|
|
&conti );
|
|
h64 = hash64Lower_utf8_cont(wptrs[i+2],
|
|
wlens[i+2],
|
|
h64,
|
|
&conti );
|
|
// try looking that up
|
|
uint32_t hf32 = h64 & 0xffffffff;
|
|
vp = (char *)m_ht.getValue(&hf32);
|
|
}
|
|
if ( ! vp ) continue;
|
|
// we got a match
|
|
max = j;
|
|
maxCount = wcount;
|
|
}
|
|
// return now if we got one
|
|
if ( maxCount > 0 ) return maxCount;
|
|
// otherwise, try combining so "lock pick" is a wikipedia phrase because
|
|
// "lockpick" is a wikipedia title.
|
|
|
|
|
|
return maxCount;
|
|
}
|
|
|
|
/*
|
|
// if a phrase in a query is in a wikipedia title, then increase
|
|
// its affWeights beyond the normal 1.0
|
|
bool Wiki::setPhraseAffinityWeights ( Query *q , float *affWeights ,
|
|
bool *oneTitle ) {
|
|
|
|
// reset all terms wiki affinities to 1.0
|
|
for ( int32_t i = 0 ; i < q->m_numTerms ; i++ )
|
|
q->m_qterms[i].m_wikiAff = 1.0;
|
|
|
|
int32_t loopCount = 0;
|
|
int32_t lasti;
|
|
// loop over query words
|
|
QueryWord *qw = q->m_qwords;
|
|
int32_t nw = q->m_numWords;
|
|
for ( int32_t i = 0 ; i < nw ; i++ ) {
|
|
// skip if not alnum
|
|
if ( qw[i].m_rawWordId == 0LL ) continue;
|
|
// loop count
|
|
loopCount++;
|
|
// no longer one title?
|
|
if ( loopCount > 1 && oneTitle && i >lasti ) *oneTitle = false;
|
|
// count it
|
|
int32_t count = 0;
|
|
// how many in the phrase
|
|
int32_t max = -1;
|
|
// accumulate a hash of the word ids
|
|
int64_t h = 0LL;
|
|
// otherwise, increase affinity high for included words
|
|
for ( int32_t j = i ; j < nw && count < 12 ; j++ ) {
|
|
// skip if not alnum
|
|
if ( qw[j].m_rawWordId == 0LL ) continue;
|
|
// add to hash
|
|
h = hash64 ( qw[j].m_rawWordId , h );
|
|
// count it
|
|
count++;
|
|
// skip single words, we only want to check phrases
|
|
if ( count == 1 ) continue;
|
|
// look in table
|
|
char *vp = (char *)m_ht.getValue ( &h );
|
|
// skip if nothing
|
|
if ( ! vp ) continue;
|
|
// we got a match
|
|
max = j;
|
|
}
|
|
// set all up to max to affinity of 10.0
|
|
for ( int32_t j = i ; j <= max ; j++ ) {
|
|
// skip if not alnum
|
|
if ( qw[j].m_rawWordId == 0LL ) continue;
|
|
// get term
|
|
QueryTerm *qtp = qw[j].m_queryPhraseTerm;
|
|
QueryTerm *qtw = qw[j].m_queryWordTerm;
|
|
// store affinity, say 10x normal and 1/10th normal
|
|
if ( qtp ) qtp->m_wikiAff = 1.00;
|
|
if ( qtw ) qtw->m_wikiAff = 0.05;
|
|
}
|
|
// if that was our firstTime and max is maxed, we got a
|
|
// single wikipedia title
|
|
if ( loopCount == 1 && oneTitle ) {
|
|
*oneTitle = true;
|
|
lasti = i + max;
|
|
}
|
|
}
|
|
|
|
// store into array
|
|
for ( int32_t i = 0 ; i < q->m_numTerms ; i++ )
|
|
affWeights[i] = q->m_qterms[i].m_wikiAff;
|
|
|
|
return true;
|
|
}
|
|
*/
|
|
|
|
// . we hit google with random queries to see what blog sites and
|
|
// news sites they have
|
|
// . returns false if blocks, true otherwise
|
|
// . returns true and sets g_errno on error
|
|
bool Wiki::getRandomPhrase ( void *state , void (*callback)(void *state)) {
|
|
// sanity check
|
|
if ( this != &g_wiki ) { char *xx=NULL;*xx=0; }
|
|
// this will be set on error
|
|
m_errno = 0;
|
|
// just get it from the file...
|
|
int32_t r = rand() % m_txtSize;
|
|
// read in 5k
|
|
r -= 5000;
|
|
// prevent negatives
|
|
if ( r < 0 ) r = 0;
|
|
// open it
|
|
if ( ! m_opened ) {
|
|
// return NULL on error, g_errno should be set
|
|
if ( ! m_f.set ( g_hostdb.m_dir , "wikititles.txt" ) )
|
|
return true;
|
|
if ( ! m_f.open ( O_NONBLOCK , // flags
|
|
NULL , // pc
|
|
-1 , // maxFileSize
|
|
0 ))// permissions
|
|
return true;
|
|
m_opened = true;
|
|
}
|
|
// someone is already outstanding!
|
|
if ( m_callback ) { char *xx=NULL;*xx=0; }
|
|
// save it
|
|
m_state = state;
|
|
m_callback = callback;
|
|
// . read it from there
|
|
// . returns false if blocked
|
|
if ( ! m_f.read (m_buf,5000,r,&m_fs,NULL,doneReadingWikiWrapper ) )
|
|
return false;
|
|
// . ok it did not block
|
|
// . this will set Wiki::m_randPhrase[]
|
|
doneReadingWiki ( );
|
|
// no need to call callback, we did not block
|
|
return true;
|
|
}
|
|
|
|
void doneReadingWikiWrapper ( void *state ) {
|
|
// set m_randPhrase[] buf
|
|
g_wiki.doneReadingWiki ( );
|
|
// call callback
|
|
g_wiki.m_callback ( g_wiki.m_state );
|
|
// NULLify it to "free it up"
|
|
g_wiki.m_state = NULL;
|
|
g_wiki.m_callback = NULL;
|
|
}
|
|
|
|
void Wiki::doneReadingWiki ( ) {
|
|
// error?
|
|
if ( g_errno ) {
|
|
m_errno = g_errno;
|
|
log("wiki: had error getting rand phrase: %s",
|
|
mstrerror(g_errno));
|
|
return;
|
|
}
|
|
// find first \n in buf
|
|
char *p = m_buf;
|
|
char *pend = m_buf + 5000;
|
|
while ( p < pend && *p != '\n' ) p++;
|
|
// skip that '\n'
|
|
p++;
|
|
// strange!!
|
|
if ( p >= pend ) {
|
|
m_errno = EBADENGINEER;
|
|
log("wiki: bad random read.");
|
|
return;
|
|
}
|
|
|
|
//
|
|
// parse like in Wiki::load() now
|
|
//
|
|
char *eol = NULL;
|
|
for ( ; p < pend ; p = eol + 1 ) {
|
|
// skip spaces
|
|
while ( p < pend && is_wspace_a ( *p ) ) p++;
|
|
// find end of line, "eol" (also treat '(' as \n now)
|
|
//for(eol = p; eol < pend && *eol !='\n' && *eol!='('; eol++) ;
|
|
for ( eol = p; eol < pend && *eol !='\n' ; eol++) ;
|
|
// parse into words
|
|
Words w;
|
|
if ( ! w.set ( p , // s
|
|
eol - p , // slen
|
|
TITLEREC_CURRENT_VERSION ,
|
|
true , // computeIds?
|
|
MAX_NICENESS ) ) {
|
|
m_errno = g_errno;
|
|
return;
|
|
}
|
|
int32_t nw = w.getNumWords();
|
|
// remove last words if not alnum
|
|
if ( nw > 0 && !w.isAlnum(nw-1) ) nw--;
|
|
// skip this line if no words
|
|
if ( nw <= 0 ) continue;
|
|
// if begins with upper/lower and ends with lower/upper, skip
|
|
if ( w.isCapitalized(0) && ! w.isCapitalized(nw-1) )
|
|
continue;
|
|
if ( ! w.isCapitalized(0) && w.isCapitalized(nw-1) )
|
|
continue;
|
|
// skip if it has ('s in it
|
|
char c = *eol;
|
|
*eol = '\0';
|
|
char *pp = NULL;
|
|
if ( !pp ) pp = strstr ( p,"(" );
|
|
if ( !pp ) pp = strstr ( p,"[" );
|
|
if ( !pp && strncasecmp( p,"List of ",8)==0) pp = p;
|
|
if ( !pp ) pp = strstr ( p,"," );
|
|
*eol = c;
|
|
if ( pp ) continue;
|
|
//
|
|
// BUT here we skip if too long!
|
|
// we don't want to send insanely int32_t queries to goog/yhoo
|
|
//
|
|
int32_t size = w.getWord(nw-1) - w.getWord(0);
|
|
size += w.getWordLen(nw-1);
|
|
if ( size >= 128 ) continue;
|
|
// get ptr to it
|
|
char *wptr = w.getWord ( 0 );
|
|
// copy that
|
|
gbmemcpy ( m_randPhrase , wptr , size );
|
|
// NULL terminate
|
|
m_randPhrase[size] = '\0';
|
|
// all done
|
|
return;
|
|
}
|
|
}
|