Licence to kill (gotos)

This commit is contained in:
Brian Rasmusson
2016-09-09 12:57:46 +02:00
parent cf38a5530c
commit 1ca5ae698b

@ -3218,7 +3218,7 @@ void PosdbTable::intersectLists10_r ( ) {
if ( ! qti->m_matchingSubListSavedCursor[k] ) {
continue;
}
// getMaxPossibleScore() incremented m_matchingSubListCursor to
// the next docid so use m_matchingSubListSavedCursor.
nwp [nsub] = qti->m_matchingSubListSavedCursor [k];
@ -3251,130 +3251,134 @@ void PosdbTable::intersectLists10_r ( ) {
}
// . ok, merge the lists into a list in mbuf
// . get the min of each list
mergeMore:
int32_t mink = -1;
for ( int32_t k = 0 ; k < nsub ; k++ ) {
// skip if list is exhausted
if ( ! nwp[k] ) {
continue;
bool currTermDone = false;
char ks;
do {
int32_t mink = -1;
ks = 0;
for ( int32_t k = 0 ; k < nsub ; k++ ) {
// skip if list is exhausted
if ( ! nwp[k] ) {
continue;
}
// auto winner?
if ( mink == -1 ) {
mink = k;
continue;
}
if ( KEYCMP(nwp[k],nwp[mink],6) < 0 ) {
mink = k; // a new min...
}
}
// auto winner?
// all exhausted? merge next set of sublists then for term #j
if ( mink == -1 ) {
mink = k;
continue;
}
if ( KEYCMP(nwp[k],nwp[mink],6) < 0 ) {
mink = k; // a new min...
}
}
// all exhausted? merge next set of sublists then for term #j
if ( mink == -1 ) {
miniMergedEnd[j] = mptr;
continue;
}
// get keysize
char ks = Posdb::getKeySize(nwp[mink]);
// HACK OF CONFUSION:
//
// skip it if its a query phrase term, like
// "searchengine" is for the 'search engine' query
// AND it has the synbit which means it was a bigram
// in the doc (i.e. occurred as two separate terms)
//
// second check means it occurred as two separate terms
// or could be like bob and occurred as "bob's".
// see XmlDoc::hashWords3().
if ( ! ((nwpFlags[mink] & BF_BIGRAM) && (nwp[mink][2] & 0x03)) ) {
// if the first key in our merged list store the docid crap
if ( isFirstKey ) {
// store a 12 byte key in the merged list buffer
memcpy ( mptr, nwp[mink], 12 );
// wipe out its syn bits and re-use our way
mptr[2] &= 0xfc;
// set the synbit so we know if its a synonym of term
if ( nwpFlags[mink] & (BF_BIGRAM|BF_SYNONYM)) {
mptr[2] |= 0x02;
}
// wiki half stop bigram? so for the query
// 'time enough for love' the phrase term "enough for"
// is a half stopword wiki bigram, because it is in
// a phrase in wikipedia ("time enough for love") and
// one of the two words in the phrase term is a
// stop word. therefore we give it more weight than
// just 'enough' by itself.
if ( nwpFlags[mink] & BF_HALFSTOPWIKIBIGRAM ) {
mptr[2] |= 0x01;
}
// make sure its 12 bytes! it might have been
// the first key for the termid, and 18 bytes.
mptr[0] &= 0xf9;
mptr[0] |= 0x02;
// save it
lastMptr = mptr;
mptr += 12;
isFirstKey = false;
// continue outer "j < m_numQueryTermInfos" loop.
currTermDone = true;
}
else {
// if matches last key word position, do not add!
// we should add the bigram first if more important
// since it should be added before the actual term
// above in the sublist array. so if they are
// wikihalfstop bigrams they will be added first,
// otherwise, they are added after the regular term.
// should fix double scoring bug for 'cheat codes'
// query!
if ( Posdb::getWordPos(lastMptr) != Posdb::getWordPos(nwp[mink]) ) {
memcpy ( mptr, nwp[mink], 6 );
// wipe out its syn bits and re-use our way
mptr[2] &= 0xfc;
// set the synbit so we know if its a synonym of term
if ( nwpFlags[mink] & (BF_BIGRAM|BF_SYNONYM)) {
mptr[2] |= 0x02;
}
// get keysize
ks = Posdb::getKeySize(nwp[mink]);
if ( nwpFlags[mink] & BF_HALFSTOPWIKIBIGRAM ) {
mptr[2] |= 0x01;
}
// HACK OF CONFUSION:
//
// skip it if its a query phrase term, like
// "searchengine" is for the 'search engine' query
// AND it has the synbit which means it was a bigram
// in the doc (i.e. occurred as two separate terms)
//
// second check means it occurred as two separate terms
// or could be like bob and occurred as "bob's".
// see XmlDoc::hashWords3().
if ( ! ((nwpFlags[mink] & BF_BIGRAM) && (nwp[mink][2] & 0x03)) ) {
// if it was the first key of its list it may not
// have its bit set for being 6 bytes now! so turn
// on the 2 compression bits
mptr[0] &= 0xf9;
mptr[0] |= 0x06;
// save it
lastMptr = mptr;
mptr += 6;
// if the first key in our merged list store the docid crap
if ( isFirstKey ) {
// store a 12 byte key in the merged list buffer
memcpy ( mptr, nwp[mink], 12 );
// wipe out its syn bits and re-use our way
mptr[2] &= 0xfc;
// set the synbit so we know if its a synonym of term
if ( nwpFlags[mink] & (BF_BIGRAM|BF_SYNONYM)) {
mptr[2] |= 0x02;
}
// wiki half stop bigram? so for the query
// 'time enough for love' the phrase term "enough for"
// is a half stopword wiki bigram, because it is in
// a phrase in wikipedia ("time enough for love") and
// one of the two words in the phrase term is a
// stop word. therefore we give it more weight than
// just 'enough' by itself.
if ( nwpFlags[mink] & BF_HALFSTOPWIKIBIGRAM ) {
mptr[2] |= 0x01;
}
// make sure its 12 bytes! it might have been
// the first key for the termid, and 18 bytes.
mptr[0] &= 0xf9;
mptr[0] |= 0x02;
// save it
lastMptr = mptr;
mptr += 12;
isFirstKey = false;
}
else {
// if matches last key word position, do not add!
// we should add the bigram first if more important
// since it should be added before the actual term
// above in the sublist array. so if they are
// wikihalfstop bigrams they will be added first,
// otherwise, they are added after the regular term.
// should fix double scoring bug for 'cheat codes'
// query!
if ( Posdb::getWordPos(lastMptr) != Posdb::getWordPos(nwp[mink]) ) {
memcpy ( mptr, nwp[mink], 6 );
// wipe out its syn bits and re-use our way
mptr[2] &= 0xfc;
// set the synbit so we know if its a synonym of term
if ( nwpFlags[mink] & (BF_BIGRAM|BF_SYNONYM)) {
mptr[2] |= 0x02;
}
if ( nwpFlags[mink] & BF_HALFSTOPWIKIBIGRAM ) {
mptr[2] |= 0x01;
}
// if it was the first key of its list it may not
// have its bit set for being 6 bytes now! so turn
// on the 2 compression bits
mptr[0] &= 0xf9;
mptr[0] |= 0x06;
// save it
lastMptr = mptr;
mptr += 6;
}
}
}
}
}
//log("skipping ks=%" PRId32,(int32_t)ks);
// advance the cursor over the key we used.
nwp[mink] += ks; // Posdb::getKeySize(nwp[mink]);
// exhausted?
if ( nwp[mink] >= nwpEnd[mink] ) {
nwp[mink] = NULL;
}
// or hit a different docid
else if ( Posdb::getKeySize(nwp[mink]) != 6 ) {
nwp[mink] = NULL;
}
// avoid breach of core below now
if ( mptr < mptrEnd ) {
goto mergeMore;
}
// advance the cursor over the key we used.
nwp[mink] += ks; // Posdb::getKeySize(nwp[mink]);
// exhausted?
if ( nwp[mink] >= nwpEnd[mink] ) {
nwp[mink] = NULL;
}
else
if ( Posdb::getKeySize(nwp[mink]) != 6 ) {
// or hit a different docid
nwp[mink] = NULL;
}
} // mink != -1
//log("skipping ks=%" PRId32,(int32_t)ks);
} while( !currTermDone && mptr < mptrEnd ); // merge more ...
// wrap it up here since done merging
miniMergedEnd[j] = mptr;
}