privacore-open-source-searc.../PosdbTable.cpp

#include "PosdbTable.h"
#include "Posdb.h"

#include "PageTemperatureRegistry.h"
#include "Docid2Siteflags.h"
#include "SiteMedianPageTemperatureRegistry.h"
#include "ScalingFunctions.h"
#include "ScoringWeights.h"
#include "BitOperations.h"
#include "Msg2.h"
#include "Msg39.h"
#include "Sanity.h"
#include "Stats.h"
#include "Conf.h"
#include "TopTree.h"
#include "DocumentIndexChecker.h"
#include "Lang.h"
#include "GbMutex.h"
#include "ScopedLock.h"
#include "Errno.h"
#include <math.h>
#include <valarray>

#ifdef _VALGRIND_
#include <valgrind/memcheck.h>
#include <valgrind/helgrind.h>
#endif

#define BF_HALFSTOPWIKIBIGRAM 0x01  // "to be" in "to be or not to be"
#define BF_PIPED              0x02  // before a query pipe operator
#define BF_SYNONYM            0x04
#define BF_NEGATIVE           0x08  // query word has a negative sign before it
#define BF_BIGRAM             0x10

static const int INTERSECT_SCORING    = 0;
static const int INTERSECT_DEBUG_INFO = 1;


static bool  s_init = false;
static GbMutex s_mtx_weights;
static DerivedScoringWeights s_scoringWeights;
static bool  s_isCompatible     [HASHGROUP_END][HASHGROUP_END];
static bool  s_inBody           [HASHGROUP_END];


#define gbmin(a,b) ((a)<(b) ? (a) : (b))
#define gbmax(a,b) ((a)>(b) ? (a) : (b))

static inline const char *getWordPosList(uint64_t docId, const char *list, int32_t listSize);
static int docIdVoteBufKeyCompare_desc ( const void *h1, const void *h2 );
static void initWeights();


//struct used for the mini-merges (see mergeTermSubListsForDocId() etc)
struct MiniMergeBuffer {
	//the simpl merge buffer
	char buffer[300000];
	//the bufptr-to-which-term-did-it-come-from mapping. posbd keys in 'buffer' above is always a multiple of 6 so we only need a sixth
	int16_t termIndex[300000/6];
	std::vector<const char *> mergedListStart;
	std::vector<const char *> mergedListEnd;
	MiniMergeBuffer(int numQueryTerms)
	  : mergedListStart(numQueryTerms),
	    mergedListEnd(numQueryTerms)
	{
#ifdef _VALGRIND_
		VALGRIND_MAKE_MEM_UNDEFINED(termIndex,sizeof(termIndex));
#endif
	}
	int16_t *getTermIndexPtrForBufferPos(const char *ptr) {
		size_t bufferOffset = (size_t)(ptr-buffer)/6;
		return termIndex+bufferOffset;
	}
	const int16_t *getTermIndexPtrForBufferPos(const char *ptr) const {
		size_t bufferOffset = (size_t)(ptr-buffer)/6;
		return termIndex+bufferOffset;
	}
	int16_t getTermIndexForBufferPos(const char *ptr) const {
		return *getTermIndexPtrForBufferPos(ptr);
	}
};


// A 2D matrix used by createNonBodyTermPairScoreMatrix/findMinTermPairScoreInWindow/getMinTermPairScoreSlidingWindow
class PairScoreMatrix {
	std::vector<float> m;
	const int columns;
public:
	PairScoreMatrix(int size)
	  : m(size*size),
	    columns(size)
	    {}
	float get(int x, int y) const {
		return m[y*columns+x];
	}
	void set(int x, int y, float value) {
		m[y*columns+x] = value;
	}
};


//////////////////
//
// THE NEW INTERSECTION LOGIC
//
//////////////////


PosdbTable::PosdbTable() {
	// top docid info
	m_q             = NULL;
	m_msg39req        = NULL;
	reset();
}


PosdbTable::~PosdbTable() {
	reset();
}


void PosdbTable::reset() {
	// has init() been called?
	m_initialized          = false;
	//freeMem(); // not implemented
	// does not free the mem of this safebuf, only resets length
	m_docIdVoteBuf.reset();
	m_filtered = 0;
	m_queryTermInfos.clear();
	// assume no-op
	m_t1 = 0LL;
	m_whiteListTable.reset();
	m_addedSites = false;

	// Coverity
	m_docId = 0;
	m_hasMaxSerpScore = false;
	m_addListsTime = 0;
	m_t2 = 0;
	m_qpos.clear();
	m_wikiPhraseIds.clear();
	m_quotedStartIds.clear();
	m_bflags.clear();
	m_qtermNums.clear();
	m_msg2 = NULL;
	m_topTree = NULL;
	m_nqt = 0;
	m_debug = false;
	m_useWhiteTable = false;
	m_numQueryTermInfos = 0;
	m_minTermListSize = 0;
	m_minTermListIdx = 0;
	m_vecSize = 0;
	m_allInSameWikiPhrase = 0;
	m_realMaxTop = 0;
}


// . returns false on error and sets g_errno
// . NOTE: termFreqs is just referenced by us, not copied
// . sets m_startKeys, m_endKeys and m_minNumRecs for each termId
// . TODO: ensure that m_termFreqs[] are all UPPER BOUNDS on the actual #!!
//         we should be able to get an upper bound estimate from the b-tree
//         quickly using Msg36!
// . we now support multiple plus signs before the query term
// . lists[] and termFreqs[] must be 1-1 with q->m_qterms[]
void PosdbTable::init(Query *q, bool debug, TopTree *topTree, const DocumentIndexChecker &documentIndexChecker, Msg2 *msg2, Msg39Request *r) {
	// sanity check -- watch out for double calls
	if ( m_initialized )
		gbshutdownAbort(true);
	// clear everything
	reset();
	// we are now
	m_initialized = true;
	// set debug flag
	m_debug = (debug || g_conf.m_logDebugQuery);
	// we should save the lists!
	//m_lists    = msg2->m_lists;//lists;
	//m_numLists = q->m_numTerms;

	// seo.cpp supplies a NULL msg2 because it already sets
	// QueryTerm::m_posdbListPtrs
	if ( ! msg2 ) return;

	m_msg2 = msg2;
	// save the request
	m_msg39req = r;

	m_baseScoringParameters = r->m_baseScoringParameters;
	m_derivedScoringWeights.init(m_baseScoringParameters);
	if(g_conf.m_logTracePosdb || m_msg39req->m_debug)
		m_baseScoringParameters.traceToLog("posdbtable");

	// save this
	//m_coll = coll;
	// get the rec for it
//	CollectionRec *cr = g_collectiondb.getRec ( m_collnum );
//	if ( ! cr )
//		gbshutdownAbort(true);
	// set this now
	//m_collnum = cr->m_collnum;

	m_documentIndexChecker = &documentIndexChecker;
	m_topTree = topTree;

	// remember the query class, it has all the info about the termIds
	m_q = q;
	m_nqt = q->getNumTerms();

	m_realMaxTop = r->m_realMaxTop;
	if ( m_realMaxTop > MAX_TOP ) m_realMaxTop = MAX_TOP;

	if ( m_q->m_isBoolean ) m_baseScoringParameters.m_siteRankMultiplier = 0.0;

	// sanity
	if ( msg2->getNumLists() != m_q->getNumTerms() )
		gbshutdownAbort(true);
	// copy the list ptrs to the QueryTerm::m_posdbListPtr
	for ( int32_t i = 0 ; i < m_q->m_numTerms ; i++ )
		m_q->m_qterms[i].m_posdbListPtr = msg2->getList(i);
	// we always use it now
	if ( ! topTree )
		gbshutdownAbort(true);
}


namespace {
//getMaxScoreForNonBodyTermPair(), getScoreForTermPair() and getTermPairScoreForAny() need to keep several items for each of the
//two lists and set them based on the posdb pointer(s). This helper struct handles the common things
struct PosdbDecodeHelper {
	int32_t p;
	unsigned char hg;
	unsigned char mhg;
	unsigned char wsr;
	float spamw;
	float denw;
	//float diversityWeight; //todo?
	//unsigned char syn; //not supported anymore
	void set(const char *wp, const DerivedScoringWeights &derivedScoringWeights) {
		p = Posdb::getWordPos(wp);
		hg = Posdb::getHashGroup(wp);
		//temporary fix: posdb can have junk in it so clamp the hashgroup to the limit
		if(hg>=HASHGROUP_END)
			hg = HASHGROUP_END-1;
		mhg = hg;
		// reduce to either HASHGROUP_BODY/TITLE/INLINK/META
		if(s_inBody[mhg])
			mhg = HASHGROUP_BODY;
		wsr = Posdb::getWordSpamRank(wp);
		if(hg == HASHGROUP_INLINKTEXT)
			spamw = derivedScoringWeights.m_linkerWeights[wsr];
		else
			spamw = derivedScoringWeights.m_wordSpamWeights[wsr];
		denw = derivedScoringWeights.m_densityWeights[Posdb::getDensityRank(wp)];
		//syn = Posdb::getIsSynonym(wp);
	}
};
} //anonymous namespace


//
// Find the top score for the term for each hash group.
// INLINK_TEXT may have more than one entry in the top-scorer list, other hash groups only 1.
// INLINK_TEXT may override other lower scoring hashgroups' scores
// Sum up the best scores, and return that result.
// Sets highestScoringNonBodyPos to the highest scoring position.
//
float PosdbTable::getBestScoreSumForSingleTerm(const MiniMergeBuffer *miniMergeBuffer, int32_t i, DocIdScore *pdcs, const char **highestScoringNonBodyPos) {
	const char *wpi = miniMergeBuffer->mergedListStart[i];
	const char *endi = miniMergeBuffer->mergedListEnd[i];
	float nonBodyMax = -1.0;
	int32_t lowestScoreTermIdx = 0;
	float bestScores[MAX_TOP] = {0};	// make Coverity happy
	const char *bestwpi[MAX_TOP];
	int32_t numTop = 0;

	logTrace(g_conf.m_logTracePosdb, "BEGIN.");

	// assume no terms!
	*highestScoringNonBodyPos = NULL;

	if ( wpi ) {
		// Sanity check
		if( wpi >= endi ) {
			logTrace(g_conf.m_logTracePosdb, "END, wpi %p >= %p", wpi, endi);
			return -1.0;
		}
#ifdef _VALGRIND_
		VALGRIND_CHECK_MEM_IS_DEFINED(wpi,endi-wpi);
#endif
		//stuff for detecting when a position has bigram matches on both sides
		const bool leftTermIsIgnored = m_queryTermInfos[i].m_leftTermIsIgnored;
		int prevBigramMatchPos = -1;
		int prevBigramMatchQtermIdx = -1;
		float prevBigramMatchTermWeight = 1.0;

		bool first = true;
		char bestmhg[MAX_TOP];
		while(wpi < endi) {
			float score = 100.0;
			PosdbDecodeHelper helper;
			helper.set(wpi, m_derivedScoringWeights);
			// good diversity?
			//unsigned char div = Posdb::getDiversityRank ( wpi );
			//score *= m_derivedScoringWeights.m_diversityWeights[div];
			//score *= m_derivedScoringWeights.m_diversityWeights[div];
			// hash group? title? body? heading? etc.
			score *= m_derivedScoringWeights.m_hashGroupWeights[helper.mhg];
			score *= m_derivedScoringWeights.m_hashGroupWeights[helper.mhg];
			// good density?
			score *= helper.denw;
			score *= helper.denw;
			// to make more compatible with pair scores divide by distance of 2
			//score /= 2.0;

			// word spam?
			score *= helper.spamw;
			score *= helper.spamw;

			int queryTermIndex = miniMergeBuffer->getTermIndexForBufferPos(wpi);
			const QueryTerm &qterm = m_q->m_qterms[queryTermIndex];

			score *= qterm.m_userWeight;

			score *= m_q->m_qterms[queryTermIndex].m_termFreqWeight;
			score *= m_q->m_qterms[queryTermIndex].m_termFreqWeight;

			score *= qterm.m_termWeight; //regular/bigram/synonym
			score *= qterm.m_termWeight; //regular/bigram/synonym

			if(leftTermIsIgnored && m_q->m_qterms[queryTermIndex].m_isPhrase) {
				//if the term to the left of us is ignored (stopword/qstopword/highfreqterm/...) then that bigram boost may be lost. Worst case:
				//        <ignored> term <ignored>
				// ==>
				//        <bigram-hit-left> <term-hit> <bigram-hit-2>
				//and then the boost from the left bigram hit would be lost. So detect this case and give it the extra boost:
				if(prevBigramMatchPos+2>=helper.p &&
				   prevBigramMatchQtermIdx!=queryTermIndex) {
					score *= prevBigramMatchTermWeight;
					score *= prevBigramMatchTermWeight;
				}
				//record where we got a bigram hit
				prevBigramMatchPos = helper.p;
				prevBigramMatchQtermIdx = queryTermIndex;
				prevBigramMatchTermWeight = qterm.m_termWeight;
			}

			// do not allow duplicate hashgroups!
			int32_t bro = -1;
			for( int32_t k=0; k < numTop; k++) {
				if( bestmhg[k] == helper.mhg && helper.hg != HASHGROUP_INLINKTEXT) {
					bro = k;
					break;
				}
			}

			if ( bro >= 0 ) {
				//
				// We already have a score for this hash group, update if
				// current score is higher
				//
				if ( score > bestScores[bro] ) {
					bestScores[bro] = score;
					bestwpi   [bro] = wpi;
					bestmhg   [bro] = helper.mhg;
				}
			}
			else
			if ( numTop < m_realMaxTop ) { // MAX_TOP ) {
				//
				// New hash group (or INLINKTEXT).
				// We have free slots in the top-list.
				// Store found score.
				//
				bestScores[numTop] = score;
				bestwpi   [numTop] = wpi;
				bestmhg   [numTop] = helper.mhg;
				numTop++;
			}
			else
			if ( score > bestScores[lowestScoreTermIdx] ) {
				//
				// New hash group (or INLINKTEXT).
				// We have NO free slots in the top-list.
				// Replace lowest score in top-list with current higher score.
				//
				bestScores[lowestScoreTermIdx] = score;
				bestwpi   [lowestScoreTermIdx] = wpi;
				bestmhg   [lowestScoreTermIdx] =helper. mhg;
			}

			// If top-list is full, make lowestScoreTermIdx point to the lowest score
			// in the top-list.
			if ( numTop >= m_realMaxTop ) { // MAX_TOP ) {
				lowestScoreTermIdx = 0;
				for ( int32_t k = 1 ; k < m_realMaxTop; k++ ) {//MAX_TOP ; k++ ) {
					if ( bestScores[k] > bestScores[lowestScoreTermIdx] ) {
						continue;
					}
					lowestScoreTermIdx = k;
				}
			}

			// For findMinTermPairScoreInWindow() sub-out algo.
			// If the term is not in the body, and the score is the
			// highest non-body term score, return the position index.
			if ( score > nonBodyMax && ! s_inBody[helper.hg] ) {
				nonBodyMax = score;
				*highestScoringNonBodyPos = wpi;
			}

			// advance posdb pointer
			if(!first)
				wpi += 6;
			else {
				wpi += 12;
				first = false;
			}

		};
	}

	// add up the top scores
	float sum = 0.0;
	for ( int32_t k = 0 ; k < numTop ; k++ ) {
		// if it is something like "enough for" in a wikipedia
		// phrase like "time enough for love" give it a boost!
		// now we set a special bit in the keys since we do a mini
		// merge, we do the same thing for the syn bits
		if ( Posdb::getIsHalfStopWikiBigram(bestwpi[k]) ) {
			sum += (bestScores[k] * WIKI_BIGRAM_WEIGHT * WIKI_BIGRAM_WEIGHT);
		}
		else {
			// otherwise just add it up
			sum += bestScores[k];
		}
	}

	// wiki weight
	//sum *= ts;

	// shortcut
	//char *maxp = bestwpi[k];

	// if terms is a special wiki half stop bigram
	//if ( m_bflags[i] & BF_HALFSTOPWIKIBIGRAM ) {
	//	sum *= WIKI_BIGRAM_WEIGHT;
	//	sum *= WIKI_BIGRAM_WEIGHT;
	//}

	// empty list?
	//if ( ! wpi ) sum = -2.0;

	//
	// end the loop. return now if not collecting scoring info.
	//
	if ( ! pdcs ) {
		logTrace(g_conf.m_logTracePosdb, "END.");
		return sum;
	}


	//#
	//# The below is for visual presentation of the scoring ONLY
	//#

	// none? wtf?
	if ( numTop <= 0 ) {
		logTrace(g_conf.m_logTracePosdb, "END.");
		return sum;
	}

	// point into buf
	SingleScore *sx = (SingleScore *)m_singleScoreBuf.getBufPtr();
	int32_t need = sizeof(SingleScore) * numTop;

	// point to that
	if ( pdcs->m_singlesOffset < 0 ) {
		pdcs->m_singlesOffset = m_singleScoreBuf.length();
	}

	// reset this i guess
	pdcs->m_singleScores = NULL;

	// sanity
	if ( m_singleScoreBuf.getAvail() < need ) {
		static bool s_first = true;
		if ( s_first ) {
			log("posdb: CRITICAL single buf overflow");
		}
		s_first = false;
		logTrace(g_conf.m_logTracePosdb, "END.");
		return sum;
		//gbshutdownAbort(true); }
	}

	// increase buf ptr over this then
	m_singleScoreBuf.incrementLength(need);

	// set each of the top scoring terms individiually
	for ( int32_t k = 0 ; k < numTop ; k++, sx++ ) {
		// udpate count
		pdcs->m_numSingles++;
		const char *maxp = bestwpi[k];
		memset(sx,0,sizeof(*sx));
		sx->m_isSynonym = Posdb::getIsSynonym(maxp);
		sx->m_isHalfStopWikiBigram = Posdb::getIsHalfStopWikiBigram(maxp);
		//sx->m_isSynonym = (m_bflags[i] & BF_SYNONYM) ;
		sx->m_diversityRank  = Posdb::getDiversityRank(maxp);
		sx->m_wordSpamRank   = Posdb::getWordSpamRank(maxp);
		sx->m_hashGroup      = Posdb::getHashGroup(maxp);
		sx->m_wordPos        = Posdb::getWordPos(maxp);
		sx->m_densityRank = Posdb::getDensityRank(maxp);

		float score = bestScores[k];

		int queryTermIndex = miniMergeBuffer->getTermIndexForBufferPos(maxp);
		//TODO: shouldn't we multiply with userweight here too?
		//float userWeight = m_q->m_qterms[queryTermIndex].m_userWeight;
		//score *= userWeight;

		score *= m_q->m_qterms[queryTermIndex].m_termFreqWeight;
		score *= m_q->m_qterms[queryTermIndex].m_termFreqWeight;

		// if terms is a special wiki half stop bigram
		if ( sx->m_isHalfStopWikiBigram ) {
			score *= WIKI_BIGRAM_WEIGHT;
			score *= WIKI_BIGRAM_WEIGHT;
		}
		sx->m_finalScore = score;
		sx->m_tfWeight = m_q->m_qterms[queryTermIndex].m_termFreqWeight;
		sx->m_qtermNum = m_qtermNums[i];
		//int64_t *termFreqs = (int64_t *)m_msg39req->ptr_termFreqs;
		//sx->m_termFreq = termFreqs[sx->m_qtermNum];
		sx->m_bflags   = m_bflags[i];
	}

	logTrace(g_conf.m_logTracePosdb, "END. sum=%f", sum);
	return sum;
}


// . advace two ptrs at the same time so it's just a linear scan
// . TODO: add all up, then basically taking a weight of the top 6 or so...
float PosdbTable::getMaxScoreForNonBodyTermPair(const MiniMergeBuffer *miniMergeBuffer, int i, int j, int32_t qdist) {
	const char *wpi = miniMergeBuffer->mergedListStart[i];
	const char *wpj = miniMergeBuffer->mergedListStart[j];
	const char *endi = miniMergeBuffer->mergedListEnd[i];
	const char *endj = miniMergeBuffer->mergedListEnd[j];

	// Sanity check
	if( wpi >= endi || wpj >= endj ) {
		return -1.0;
	}

#ifdef _VALGRIND_
	VALGRIND_CHECK_MEM_IS_DEFINED(wpi,endi-wpi);
	VALGRIND_CHECK_MEM_IS_DEFINED(wpj,endj-wpj);
#endif
	logTrace(g_conf.m_logTracePosdb, "BEGIN.");

	PosdbDecodeHelper helper1, helper2;
	helper1.set(wpi, m_derivedScoringWeights);
	helper2.set(wpj, m_derivedScoringWeights);

	bool firsti = true;
	bool firstj = true;

	float max = -1.0;

	for(;;) {

		if ( helper1.p <= helper2.p ) {
			// . skip the pair if they are in different hashgroups
			// . we no longer allow either to be in the body in this
			//   algo because we handle those cases in the sliding window
			//   algo!
			if( s_isCompatible[helper1.hg][helper2.hg] ) {
				// git distance
				int32_t dist = helper2.p - helper1.p;

				// if zero, make sure its 2. this happens when the same bigram
				// is used by both terms. i.e. street uses the bigram
				// 'street light' and so does 'light'. so the wordpositions
				// are exactly the same!
				if ( dist < 2 ) {
					dist = 2;
				}

				// fix distance if in different non-body hashgroups
				if ( dist > 50 ) {
					dist = FIXED_DISTANCE;
				}

				// subtract from the dist the terms are apart in the query
				if ( dist >= qdist ) {
					dist =  dist - qdist;
				}

				// good density?
				float score = 100 * helper1.denw * helper2.denw;

				// hashgroup modifier
				score *= m_derivedScoringWeights.m_hashGroupWeights[helper1.hg];
				score *= m_derivedScoringWeights.m_hashGroupWeights[helper2.hg];

				const int queryTermIndex1 = miniMergeBuffer->getTermIndexForBufferPos(wpi);
				const QueryTerm &qterm1 = m_q->m_qterms[queryTermIndex1];
				const int queryTermIndex2 = miniMergeBuffer->getTermIndexForBufferPos(wpj);
				const QueryTerm &qterm2 = m_q->m_qterms[queryTermIndex2];

				score *= qterm1.m_userWeight;
				score *= qterm2.m_userWeight;

				score *= qterm1.m_termWeight; //regular/bigram/synonym
				score *= qterm2.m_termWeight; //regular/bigram/synonym

				// word spam weights
				score *= helper1.spamw * helper2.spamw;
				// huge title? do not allow 11th+ word to be weighted high
				//if ( helper1.hg == HASHGROUP_TITLE && dist > 20 )
				//	score /= m_baseScoringParameters.m_derivedScoringWeights.m_hashGroupWeights[helper1.hg];
				// mod by distance
				score /= (dist + 1.0);
				// best?
				max = gbmax(max,score);
			}

			// advance posdb pointer
			if(!firsti)
				wpi += 6;
			else {
				wpi += 12;
				firsti = false;
			}

			// end of list?
			if ( wpi >= endi ) {
				break;	// exit for(;;) loop
			}

			helper1.set(wpi, m_derivedScoringWeights);
		}
		else {
			// . skip the pair if they are in different hashgroups
			// . we no longer allow either to be in the body in this
			//   algo because we handle those cases in the sliding window
			//   algo!
			if ( s_isCompatible[helper1.hg][helper2.hg] ) {
				// get distance
				int32_t dist = helper1.p - helper2.p;
				// if zero, make sure its 2. this happens when the same bigram
				// is used by both terms. i.e. street uses the bigram
				// 'street light' and so does 'light'. so the wordpositions
				// are exactly the same!
				if ( dist < 2 ) dist = 2;
				// fix distance if in different non-body hashgroups
				if ( dist > 50 ) {
					dist = FIXED_DISTANCE;
				}

				// subtract from the dist the terms are apart in the query
				if ( dist >= qdist ) {
					dist =  dist - qdist;
					// add 1 for being out of order
					dist += qdist - 1;
				}
				else {
					//dist =  dist - qdist;
					// add 1 for being out of order
					dist += 1; // qdist - 1;
				}

				// compute score based on that junk
				//score = (MAXWORDPOS+1) - dist;
				// good diversity? uneeded for pair algo
				//score *= m_derivedScoringWeights.m_diversityWeights[div1];
				//score *= m_derivedScoringWeights.m_diversityWeights[div2];
				// good density?
				float score = 100 * helper1.denw * helper2.denw;
				// hashgroup modifier
				score *= m_derivedScoringWeights.m_hashGroupWeights[helper1.hg];
				score *= m_derivedScoringWeights.m_hashGroupWeights[helper2.hg];

				const int queryTermIndex1 = miniMergeBuffer->getTermIndexForBufferPos(wpi);
				const QueryTerm &qterm1 = m_q->m_qterms[queryTermIndex1];
				const int queryTermIndex2 = miniMergeBuffer->getTermIndexForBufferPos(wpj);
				const QueryTerm &qterm2 = m_q->m_qterms[queryTermIndex2];

				score *= qterm1.m_userWeight;
				score *= qterm2.m_userWeight;

				score *= qterm1.m_termWeight; //regular/bigram/synonym
				score *= qterm2.m_termWeight; //regular/bigram/synonym

				// word spam weights
				score *= helper1.spamw * helper2.spamw;
				// huge title? do not allow 11th+ word to be weighted high
				//if ( helper1.hg == HASHGROUP_TITLE && dist > 20 )
				//	score /= m_derivedScoringWeights.m_hashGroupWeights[helper1.hg];
				// mod by distance
				score /= (dist + 1.0);
				// best?
				max = gbmax(max,score);
			}

			// advance posdb pointer
			if(!firstj)
				wpj += 6;
			else {
				wpj += 12;
				firstj = false;
			}

			// end of list?
			if ( wpj >= endj ) {
				break;	// exit for(;;) loop
			}

			helper2.set(wpj, m_derivedScoringWeights);
		}
	}

	logTrace(g_conf.m_logTracePosdb, "END.");
	return max;
}


float PosdbTable::getScoreForTermPair(const MiniMergeBuffer *miniMergeBuffer, const char *wpi, const char *wpj, int32_t fixedDistance, int32_t qdist) {
	logTrace(g_conf.m_logTracePosdb, "BEGIN.");

	if ( ! wpi ) {
		logTrace(g_conf.m_logTracePosdb, "END.");
		return -1.00;
	}
	if ( ! wpj ) {
		logTrace(g_conf.m_logTracePosdb, "END.");
		return -1.00;
	}

#ifdef _VALGRIND_
	VALGRIND_CHECK_MEM_IS_DEFINED(wpi,6);
	VALGRIND_CHECK_MEM_IS_DEFINED(wpj,6);
#endif
	PosdbDecodeHelper helper1, helper2;
	helper1.set(wpi, m_derivedScoringWeights);
	helper2.set(wpj, m_derivedScoringWeights);

	float dist;
	// set this
	if ( fixedDistance != 0 ) {
		dist = fixedDistance;
	}
	else {
		dist = abs(helper1.p - helper2.p);
		// if zero, make sure its 2. this happens when the same bigram
		// is used by both terms. i.e. street uses the bigram
		// 'street light' and so does 'light'. so the wordpositions
		// are exactly the same!
		if ( dist < 2 ) dist = 2;
		// subtract from the dist the terms are apart in the query
		if ( dist >= qdist ) dist =  dist - qdist;
		// out of order? penalize by 1 unit
		if ( helper2.p < helper1.p ) dist += 1;
	}
	// TODO: use left and right diversity if no matching query term
	// is on the left or right
	//score *= m_derivedScoringWeights.m_diversityWeights[div1];
	//score *= m_derivedScoringWeights.m_diversityWeights[div2];
	// good density?
	float score = 100 * helper1.denw * helper2.denw;
	// wikipedia phrase weight
	//score *= ts;
	// hashgroup modifier
	score *= m_derivedScoringWeights.m_hashGroupWeights[helper1.hg];
	score *= m_derivedScoringWeights.m_hashGroupWeights[helper2.hg];

	const int queryTermIndex1 = miniMergeBuffer->getTermIndexForBufferPos(wpi);
	const QueryTerm &qterm1 = m_q->m_qterms[queryTermIndex1];
	const int queryTermIndex2 = miniMergeBuffer->getTermIndexForBufferPos(wpj);
	const QueryTerm &qterm2 = m_q->m_qterms[queryTermIndex2];

	score *= qterm1.m_userWeight;
	score *= qterm2.m_userWeight;

	score *= qterm1.m_termWeight; //regular/bigram/synonym
	score *= qterm2.m_termWeight; //regular/bigram/synonym

	// word spam weights
	score *= helper1.spamw * helper2.spamw;
	// mod by distance
	score /= (dist + 1.0);

	logTrace(g_conf.m_logTracePosdb, "END. score=%f", score);
	return score;
}


// . advance two ptrs at the same time so it's just a linear scan
// . TODO: add all up, then basically taking a weight of the top 6 or so...
// . skip body terms not in the sliding window as defined by bestMinTermPairWindowPtrs[]
float PosdbTable::getTermPairScoreForAny(const MiniMergeBuffer *miniMergeBuffer, int i, int j, const std::vector<const char *> &bestMinTermPairWindowPtrs, DocIdScore *pdcs) {
	const char *wpi = miniMergeBuffer->mergedListStart[i];
	const char *wpj = miniMergeBuffer->mergedListStart[j];
	const char *endi = miniMergeBuffer->mergedListEnd[i];
	const char *endj = miniMergeBuffer->mergedListEnd[j];

	// wiki phrase weight?
	float wikiPhraseWeight;

	logTrace(g_conf.m_logTracePosdb, "BEGIN. i=%d, j=%d", i, j);

	int32_t qdist;

	// but if they are in the same wikipedia phrase
	// then try to keep their positions as in the query.
	// so for 'time enough for love' ideally we want
	// 'time' to be 6 units apart from 'love'
	if ( m_wikiPhraseIds[j] == m_wikiPhraseIds[i] && m_wikiPhraseIds[j] ) { // zero means not in a phrase
		qdist = m_qpos[j] - m_qpos[i];
		// wiki weight
		wikiPhraseWeight = (float)WIKI_WEIGHT;
	}
	else {
		// basically try to get query words as close
		// together as possible
		qdist = 2;
		// this should help fix
		// 'what is an unsecured loan' so we are more likely
		// to get the page that has that exact phrase in it.
		// yes, but hurts how to make a lock pick set.
		//qdist = qpos[j] - qpos[i];
		// wiki weight
		wikiPhraseWeight = 1.0;
	}

	bool inSameQuotedPhrase = false;
	if ( m_quotedStartIds[i] == m_quotedStartIds[j] && m_quotedStartIds[i] >= 0 ) {
		inSameQuotedPhrase = true;
	}

	// oops.. this was not counting non-space punct for 2 units
	// instead of 1
	if ( inSameQuotedPhrase ) {
		qdist = m_qpos[j] - m_qpos[i];
	}

	PosdbDecodeHelper helper1, helper2;
	helper1.set(wpi, m_derivedScoringWeights);
	helper2.set(wpj, m_derivedScoringWeights);

	bool firsti = true;
	bool firstj = true;

	int32_t  lowestScoreTermIdx = -1;
	float bestScores[MAX_TOP] = {0};	 // make Coverity happy
	const char *bestwpi   [MAX_TOP];
	const char *bestwpj   [MAX_TOP];
	char  bestmhg1  [MAX_TOP];
	char  bestmhg2  [MAX_TOP];
	char  bestFixed [MAX_TOP];
	int32_t  numTop = 0;
	bool  fixedDistance;
	int32_t  bro;

#define advance(first,wp,end,helper) \
	if(!first) \
		wp += 6; \
	else { \
		wp += 12; \
		first = false; \
	} \
	if(wp >= end) \
		break; \
	helper.set(wp, m_derivedScoringWeights);

	for(;;) {
		// . if p1/p2 is in body and not in window, skip
		// . this is how we restrict all body terms to the winning
		//   sliding window
		if ( s_inBody[helper1.hg] && wpi != bestMinTermPairWindowPtrs[i] ) {
			advance(firsti,wpi,endi,helper1);
		}

		if ( s_inBody[helper2.hg] && wpj != bestMinTermPairWindowPtrs[j] ) {
			advance(firstj,wpj,endj,helper2);
		}

		// make this strictly < now and not <= because in the event
		// of bigram terms, where p1==p2 we'd like to advance p2/wj to
		// point to the non-syn single term in order to get a better score
		// to fix the 'search engine' query on gigablast.com
		if ( helper1.p <= helper2.p ) {
			// git distance
			int32_t dist = helper2.p - helper1.p;

			// if in the same quoted phrase, order is bad!
			if ( inSameQuotedPhrase ) {
				// TODO: allow for off by 1
				// if it has punct in it then dist will be 3,
				// just a space or similar then dist should be 2.
				if ( dist > qdist && dist - qdist >= 2 ) {
					advance(firsti,wpi,endi,helper1);
				}

				if ( dist < qdist && qdist - dist >= 2 ) {
					advance(firsti,wpi,endi,helper1);
				}
			}

			// if zero, make sure its 2. this happens when the same bigram
			// is used by both terms. i.e. street uses the bigram
			// 'street light' and so does 'light'. so the wordpositions
			// are exactly the same!
			dist = gbmax(dist,2);
			// fix distance if in different non-body hashgroups
			if ( dist < 50 ) {
				fixedDistance = false;
			}
			// body vs title, linktext vs title, linktext vs body
			else if ( helper1.mhg != helper2.mhg ) {
				dist = FIXED_DISTANCE;
				fixedDistance = true;
			}
			// link text to other link text
			else if ( helper1.mhg == HASHGROUP_INLINKTEXT ) {
				dist = FIXED_DISTANCE;
				fixedDistance = true;
			}
			else {
				fixedDistance = false;
			}

			// subtract from the dist the terms are apart in the query
			if ( dist >= qdist ) {
				dist =  dist - qdist;
			}

			// good density?
			float score;
			score = 100 * helper1.denw * helper2.denw;
			// hashgroup modifier
			score *= m_derivedScoringWeights.m_hashGroupWeights[helper1.hg];
			score *= m_derivedScoringWeights.m_hashGroupWeights[helper2.hg];

			{
				const int queryTermIndex1 = miniMergeBuffer->getTermIndexForBufferPos(wpi);
				const QueryTerm &qterm1 = m_q->m_qterms[queryTermIndex1];
				const int queryTermIndex2 = miniMergeBuffer->getTermIndexForBufferPos(wpj);
				const QueryTerm &qterm2 = m_q->m_qterms[queryTermIndex2];

				score *= qterm1.m_termFreqWeight;
				score *= qterm2.m_termFreqWeight;

				score *= qterm1.m_userWeight;
				score *= qterm2.m_userWeight;

				score *= qterm1.m_termWeight; //regular/bigram/synonym
				score *= qterm2.m_termWeight; //regular/bigram/synonym
			}

			// the new logic
			if ( Posdb::getIsHalfStopWikiBigram(wpi) ) {
				score *= WIKI_BIGRAM_WEIGHT;
			}

			if ( Posdb::getIsHalfStopWikiBigram(wpj) ) {
				score *= WIKI_BIGRAM_WEIGHT;
			}

			// word spam weights
			score *= helper1.spamw * helper2.spamw;
			// huge title? do not allow 11th+ word to be weighted high
			//if ( helper1.hg == HASHGROUP_TITLE && dist > 20 )
			//	score /= m_derivedScoringWeights.m_hashGroupWeights[helper1.hg];
			// mod by distance
			score /= (dist + 1.0);

			// if our hg1/hg2 hashgroup pairing already exists
			// in the bestScores array we have to beat it and then
			// we have to replace that. we can only have one per,
			// except for linktext!

			bro = -1;
			for ( int32_t k = 0 ; k < numTop ; k++ ) {
				if ( bestmhg1[k]==helper1.mhg && helper1.hg != HASHGROUP_INLINKTEXT ){
					bro = k;
					break;
				}
				if ( bestmhg2[k]==helper2.mhg && helper2.hg != HASHGROUP_INLINKTEXT ){
					bro = k;
					break;
				}
			}

			if ( bro >= 0 ) {
				if ( score > bestScores[bro] ) {
					bestScores[bro] = score;
					bestwpi   [bro] = wpi;
					bestwpj   [bro] = wpj;
					bestmhg1  [bro] = helper1.mhg;
					bestmhg2  [bro] = helper2.mhg;
					bestFixed [bro] = fixedDistance;
				}
			}
			else
			if ( numTop < m_realMaxTop ) { // MAX_TOP ) {
				bestScores[numTop] = score;
				bestwpi   [numTop] = wpi;
				bestwpj   [numTop] = wpj;
				bestmhg1  [numTop] = helper1.mhg;
				bestmhg2  [numTop] = helper2.mhg;
				bestFixed [numTop] = fixedDistance;
				numTop++;
			}
			else
			if ( lowestScoreTermIdx >= 0 && score > bestScores[lowestScoreTermIdx] ) {
				bestScores[lowestScoreTermIdx] = score;
				bestwpi   [lowestScoreTermIdx] = wpi;
				bestwpj   [lowestScoreTermIdx] = wpj;
				bestmhg1  [lowestScoreTermIdx] = helper1.mhg;
				bestmhg2  [lowestScoreTermIdx] = helper2.mhg;
				bestFixed [lowestScoreTermIdx] = fixedDistance;
			}

			// set "lowestScoreTermIdx" to the lowest score out of the top scores
			if ( numTop >= m_realMaxTop ) { // MAX_TOP ) {
				lowestScoreTermIdx = 0;
				for ( int32_t k = 1 ; k < m_realMaxTop;k++){//MAX_TOP;k++
					if (bestScores[k] > bestScores[lowestScoreTermIdx] ) {
						continue;
					}

					lowestScoreTermIdx = k;
				}
			}

			advance(firsti,wpi,endi,helper1);
		}
		else {
			// get distance
			int32_t dist = helper1.p - helper2.p;

			// if in the same quoted phrase, order is bad!
			if ( inSameQuotedPhrase ) {
				advance(firstj,wpj,endj,helper2);
			}

			// if zero, make sure its 2. this happens when the same bigram
			// is used by both terms. i.e. street uses the bigram
			// 'street light' and so does 'light'. so the wordpositions
			// are exactly the same!
			dist = gbmax(dist,2);

			// fix distance if in different non-body hashgroups
			if ( dist < 50 ) {
				fixedDistance = false;
			}
			// body vs title, linktext vs title, linktext vs body
			else if ( helper1.mhg != helper2.mhg ) {
				dist = FIXED_DISTANCE;
				fixedDistance = true;
			}
			// link text to other link text
			else if ( helper1.mhg == HASHGROUP_INLINKTEXT ) {
				dist = FIXED_DISTANCE;
				fixedDistance = true;
			}
			else
				fixedDistance = false;
			// subtract from the dist the terms are apart in the query
			if ( dist >= qdist ) {
				dist =  dist - qdist;
				// add 1 for being out of order
				dist += qdist - 1;
			}
			else {
				//dist =  dist - qdist;
				// add 1 for being out of order
				dist += 1; // qdist - 1;
			}

			// good diversity? uneeded for pair algo
			// good density?
			float score;
			score = 100 * helper1.denw * helper2.denw;
			// hashgroup modifier
			score *= m_derivedScoringWeights.m_hashGroupWeights[helper1.hg];
			score *= m_derivedScoringWeights.m_hashGroupWeights[helper2.hg];

			{
				const int queryTermIndex1 = miniMergeBuffer->getTermIndexForBufferPos(wpi);
				const QueryTerm &qterm1 = m_q->m_qterms[queryTermIndex1];
				const int queryTermIndex2 = miniMergeBuffer->getTermIndexForBufferPos(wpj);
				const QueryTerm &qterm2 = m_q->m_qterms[queryTermIndex2];

				score *= qterm1.m_termFreqWeight;
				score *= qterm2.m_termFreqWeight;

				score *= qterm1.m_userWeight;
				score *= qterm2.m_userWeight;

				score *= qterm1.m_termWeight; //regular/bigram/synonym
				score *= qterm2.m_termWeight; //regular/bigram/synonym
			}

			// word spam weights
			score *= helper1.spamw * helper2.spamw;
			// huge title? do not allow 11th+ word to be weighted high
			//if ( hg1 == HASHGROUP_TITLE && dist > 20 )
			//	score /= m_derivedScoringWeights.m_hashGroupWeights[hg1];
			// mod by distance
			score /= (dist + 1.0);

			// if our hg1/hg2 hashgroup pairing already exists
			// in the bestScores array we have to beat it and then
			// we have to replace that. we can only have one per,
			// except for linktext!

			bro = -1;
			for ( int32_t k = 0 ; k < numTop ; k++ ) {
				if ( bestmhg1[k]==helper1.mhg && helper1.hg !=HASHGROUP_INLINKTEXT ){
					bro = k;
					break;
				}
				if ( bestmhg2[k]==helper2.mhg && helper2.hg !=HASHGROUP_INLINKTEXT ){
					bro = k;
					break;
				}
			}
			if ( bro >= 0 ) {
				if ( score > bestScores[bro] ) {
					bestScores[bro] = score;
					bestwpi   [bro] = wpi;
					bestwpj   [bro] = wpj;
					bestmhg1  [bro] = helper1.mhg;
					bestmhg2  [bro] = helper2.mhg;
					bestFixed [bro] = fixedDistance;
				}
			}
			// best?
			else if ( numTop < m_realMaxTop ) { // MAX_TOP ) {
				bestScores[numTop] = score;
				bestwpi   [numTop] = wpi;
				bestwpj   [numTop] = wpj;
				bestmhg1  [numTop] = helper1.mhg;
				bestmhg2  [numTop] = helper2.mhg;
				bestFixed [numTop] = fixedDistance;
				numTop++;
			}
			else if ( lowestScoreTermIdx >= 0 && score > bestScores[lowestScoreTermIdx] ) {
				bestScores[lowestScoreTermIdx] = score;
				bestwpi   [lowestScoreTermIdx] = wpi;
				bestwpj   [lowestScoreTermIdx] = wpj;
				bestmhg1  [lowestScoreTermIdx] = helper1.mhg;
				bestmhg2  [lowestScoreTermIdx] = helper2.mhg;
				bestFixed [lowestScoreTermIdx] = fixedDistance;
			}

			// set "lowestScoreTermIdx" to the lowest score out of the top scores
			if ( numTop >= m_realMaxTop ) { // MAX_TOP ) {
				lowestScoreTermIdx = 0;
				for ( int32_t k = 1 ; k < m_realMaxTop;k++){//MAX_TOP;k++
					if( bestScores[k] > bestScores[lowestScoreTermIdx]) {
						continue;
					}
					lowestScoreTermIdx = k;
				}
			}
			advance(firstj,wpj,endj,helper2);
		}
	} // for(;;)

	if(g_conf.m_logTracePosdb) {
		logTrace(g_conf.m_logTracePosdb, "numTop=%d", numTop);
		for(int i=0; i<numTop; i++)
			logTrace(g_conf.m_logTracePosdb, "  bestScores[%d]=%f", i, bestScores[i]);
	}


	// add up the top scores
	float sum = 0.0;
	for ( int32_t k = 0 ; k < numTop ; k++ ) {
		sum += bestScores[k];
	}

	if (m_debug) {
		for ( int32_t k = 0 ; k < numTop ; k++ )
			log(LOG_INFO, "posdb: best score #%" PRId32" = %f",k,bestScores[k]);
		log(LOG_INFO, "posdb: best score sum = %f",sum);
	}

	// wiki phrase weight
	sum *= wikiPhraseWeight;

	if (m_debug) {
		log(LOG_INFO, "posdb: best score final = %f",sum);
	}

	//
	// end the loop. return now if not collecting scoring info.
	//
	if ( ! pdcs ) {
		logTrace(g_conf.m_logTracePosdb, "END. sum=%.3f", sum);
		return sum;
	}

	// none? wtf?
	if ( numTop <= 0 ) {
		logTrace(g_conf.m_logTracePosdb, "END. sum=%.3f", sum);
		return sum;
	}

	//
	// now store the PairScores into the m_pairScoreBuf for this
	// top docid.
	//

	// point into buf
	PairScore *px = (PairScore *)m_pairScoreBuf.getBufPtr();
	int32_t need = sizeof(PairScore) * numTop;

	// point to that
	if ( pdcs->m_pairsOffset < 0 ) {
		pdcs->m_pairsOffset = m_pairScoreBuf.length();
	}

	// reset this i guess
	pdcs->m_pairScores = NULL;

	// sanity
	if ( m_pairScoreBuf.getAvail() < need ) {
		// m_pairScores will be NULL
		static bool s_first = true;
		if ( s_first ) {
			log("posdb: CRITICAL pair buf overflow");
		}

		s_first = false;
		logTrace(g_conf.m_logTracePosdb, "END.");
		return sum;
	}
	// increase buf ptr over this then
	m_pairScoreBuf.incrementLength(need);

	// set each of the top scoring terms individiually
	for ( int32_t k = 0 ; k < numTop ; k++, px++ ) {
		pdcs->m_numPairs++;
		memset(px,0,sizeof(*px));
		const char *maxp1 = bestwpi[k];
		const char *maxp2 = bestwpj[k];
		float score = bestScores[k];
		bool fixedDist = bestFixed[k];
		score *= wikiPhraseWeight;

		const int queryTermIndex1 = miniMergeBuffer->getTermIndexForBufferPos(maxp1);
		const float termFreqWeight1 = m_q->m_qterms[queryTermIndex1].m_termFreqWeight;
		const int queryTermIndex2 = miniMergeBuffer->getTermIndexForBufferPos(maxp2);
		const float termFreqWeight2 = m_q->m_qterms[queryTermIndex2].m_termFreqWeight;

		// we have to encode these bits into the mini merge now
		if ( Posdb::getIsHalfStopWikiBigram(maxp1) ) {
			score *= WIKI_BIGRAM_WEIGHT;
		}

		if ( Posdb::getIsHalfStopWikiBigram(maxp2) ) {
			score *= WIKI_BIGRAM_WEIGHT;
		}

		//if ( m_bflags[i] & BF_HALFSTOPWIKIBIGRAM )
		//if ( m_bflags[j] & BF_HALFSTOPWIKIBIGRAM )
		// wiki phrase weight
		px->m_finalScore     = score;
		px->m_wordPos1       = Posdb::getWordPos(maxp1);
		px->m_wordPos2       = Posdb::getWordPos(maxp2);
		px->m_isSynonym1     = Posdb::getIsSynonym(maxp1);
		px->m_isSynonym2     = Posdb::getIsSynonym(maxp2);
		px->m_isHalfStopWikiBigram1 = Posdb::getIsHalfStopWikiBigram(maxp1);
		px->m_isHalfStopWikiBigram2 = Posdb::getIsHalfStopWikiBigram(maxp2);
		px->m_diversityRank1 = Posdb::getDiversityRank(maxp1);
		px->m_diversityRank2 = Posdb::getDiversityRank(maxp2);
		px->m_wordSpamRank1  = Posdb::getWordSpamRank(maxp1);
		px->m_wordSpamRank2  = Posdb::getWordSpamRank(maxp2);
		px->m_hashGroup1     = Posdb::getHashGroup(maxp1);
		px->m_hashGroup2     = Posdb::getHashGroup(maxp2);
		px->m_qdist          = qdist;
		// bigram algorithm fix
		//if ( px->m_wordPos1 == px->m_wordPos2 )
		//	px->m_wordPos2 += 2;
		px->m_densityRank1   = Posdb::getDensityRank(maxp1);
		px->m_densityRank2   = Posdb::getDensityRank(maxp2);
		px->m_fixedDistance  = fixedDist;
		px->m_qtermNum1      = m_qtermNums[i];
		px->m_qtermNum2      = m_qtermNums[j];
		px->m_tfWeight1      = termFreqWeight1;
		px->m_tfWeight2      = termFreqWeight2;
		px->m_bflags1        = m_bflags[i];
		px->m_bflags2        = m_bflags[j];

		// flag it as in same wiki phrase
		if ( almostEqualFloat(wikiPhraseWeight, (float)WIKI_WEIGHT) ) {
			px->m_inSameWikiPhrase = 1;
		}
		else {
			px->m_inSameWikiPhrase = 0;
		}

#ifdef _VALGRIND_
	VALGRIND_CHECK_MEM_IS_DEFINED(px,sizeof(*px));
#endif
		// only log for debug if it is one result
		if ( m_debug ) {
			// log each one for debug
			log(LOG_INFO, "posdb: result #%" PRId32" "
					    "i=%" PRId32" "
					    "j=%" PRId32" "
					    "termNum0=%" PRId32" "
					    "termNum1=%" PRId32" "
					    "finalscore=%f "
					    "tfw0=%f "
					    "tfw1=%f "
					    "fixeddist=%" PRId32" " // bool
					    "wikiPhraseWeight=%f "
					    "bflags0=%" PRId32" "
					    "bflags1=%" PRId32" "
					    "syn0=%" PRId32" "
					    "syn1=%" PRId32" "
					    "div0=%" PRId32" "
					    "div1=%" PRId32" "
					    "wspam0=%" PRId32" "
					    "wspam1=%" PRId32" "
					    "hgrp0=%s "
					    "hgrp1=%s "
					    "qdist=%" PRId32" "
					    "wpos0=%" PRId32" "
					    "wpos1=%" PRId32" "
					    "dens0=%" PRId32" "
					    "dens1=%" PRId32" ", k, i, j, px->m_qtermNum1, px->m_qtermNum2, score, termFreqWeight1,
			    termFreqWeight2, (int32_t) bestFixed[k], wikiPhraseWeight, (int32_t) m_bflags[i], (int32_t) m_bflags[j],
			    (int32_t) px->m_isSynonym1, (int32_t) px->m_isSynonym2, (int32_t) px->m_diversityRank1,
			    (int32_t) px->m_diversityRank2, (int32_t) px->m_wordSpamRank1, (int32_t) px->m_wordSpamRank2,
			    getHashGroupString(px->m_hashGroup1), getHashGroupString(px->m_hashGroup2), (int32_t) px->m_qdist,
			    (int32_t) px->m_wordPos1, (int32_t) px->m_wordPos2, (int32_t) px->m_densityRank1,
			    (int32_t) px->m_densityRank2
			);
		}
	}

	// do the same but for second bests! so seo.cpp's top term pairs
	// algo can do a term insertion, and if that hurts the best pair
	// the 2nd best might cover for it. ideally, we'd have all the term
	// pairs for this algo, but i think we'll have to get by on just this.

	logTrace(g_conf.m_logTracePosdb, "END. sum=%f", sum);
	return sum;
}

//
//
// TRY TO SPEED IT UP!!!
//
//


// returns false and sets g_errno on error
bool PosdbTable::setQueryTermInfo ( ) {

	logTrace(g_conf.m_logTracePosdb, "BEGIN.");

	// alloc space. assume max
	m_queryTermInfos.resize(m_q->m_numTerms);
#ifdef _VALGRIND_
	VALGRIND_MAKE_MEM_UNDEFINED(&(m_queryTermInfos[0]), sizeof(m_queryTermInfos[0])*m_queryTermInfos.size());
#endif

	int32_t nrg = 0;

	m_hasMaxSerpScore = false;
	if ( m_msg39req->m_minSerpDocId ) {
		m_hasMaxSerpScore = true;
	}

	for ( int32_t i = 0 ; i < m_q->m_numTerms ; i++ ) {
		QueryTerm *qt = &m_q->m_qterms[i];

		logTrace(g_conf.m_logTracePosdb, "i=%" PRId32 ", term=[%.*s], required=%s", i, qt->m_termLen, qt->m_term, qt->m_isRequired?"true":"false");

		if ( ! qt->m_isRequired ) {
			continue;
		}

		// set this stff
		const QueryWord     *qw =   qt->m_qword;
		// get one
		QueryTermInfo *qti = &(m_queryTermInfos[nrg]);
		// and set it
		qti->m_qtermNum      = i;
		qti->m_qterm         = qt;

		// this is not good enough, we need to count
		// non-whitespace punct as 2 units not 1 unit
		// otherwise qdist gets thrown off and our phrasing fails.
		// so use QueryTerm::m_qpos just for this.
		qti->m_qpos          = qw->m_posNum;
		qti->m_wikiPhraseId  = qw->m_wikiPhraseId;
		qti->m_quotedStartId = qw->m_quoteStart;
		// count
		int32_t nn = 0;
		// also add in bigram lists
		int32_t left  = qt->m_leftPhraseTermNum;
		int32_t right = qt->m_rightPhraseTermNum;
		// terms
		const QueryTerm *leftTerm  = qt->m_leftPhraseTerm;
		const QueryTerm *rightTerm = qt->m_rightPhraseTerm;
		bool leftAlreadyAdded = false;
		bool rightAlreadyAdded = false;
		//
		// add the non-bigram list AFTER the
		// bigrams, which we like to do when we PREFER the bigram
		// terms because they are scored higher, specifically, as
		// in the case of being half stop wikipedia phrases like
		// "the tigers" for the query 'the tigers' we want to give
		// a slight bonus, 1.20x, for having that bigram since its
		// in wikipedia
		//

		//
		// add left bigram lists. BACKWARDS.
		//
		if ( left>=0 && leftTerm && leftTerm->m_isWikiHalfStopBigram ){
			// assume added
			leftAlreadyAdded = true;
			// get list
			RdbList *list = m_q->m_qterms[left].m_posdbListPtr;
			// add list ptr into our required group
			qti->m_subList[nn].m_qt = &m_q->m_qterms[left];
			qti->m_subList[nn].m_list = list;
			// special flags
			qti->m_subList[nn].m_bigramFlag = BF_HALFSTOPWIKIBIGRAM;
			// before a pipe operator?
			if ( qt->m_piped ) qti->m_subList[nn].m_bigramFlag |= BF_PIPED;
			// add list of member terms as well
			m_q->m_qterms[left].m_bitNum = nrg;
			// only really add if useful
			if ( list && !list->isEmpty() ) {
				nn++;
			}

			// add bigram synonyms! like "new jersey" bigram
			// has the synonym "nj"
			for ( int32_t k = 0 ; k < m_q->m_numTerms ; k++ ) {
				QueryTerm *bt = &m_q->m_qterms[k];
				if ( bt->m_synonymOf != leftTerm ) {
					continue;
				}

				list = m_q->m_qterms[k].m_posdbListPtr;
				qti->m_subList[nn].m_qt = bt;
				qti->m_subList[nn].m_list = list;
				qti->m_subList[nn].m_bigramFlag = 0;
				qti->m_subList[nn].m_bigramFlag |= BF_HALFSTOPWIKIBIGRAM;
				qti->m_subList[nn].m_bigramFlag |= BF_SYNONYM;
				if (qt->m_piped) {
					qti->m_subList[nn].m_bigramFlag |= BF_PIPED;
				}
				// add list of member terms as well
				bt->m_bitNum = nrg;
				if ( list && !list->isEmpty() ) {
					nn++;
				}
			}
		}
		//
		// then the right bigram if also in a wiki half stop bigram
		//
		if ( right >=0 && rightTerm && rightTerm->m_isWikiHalfStopBigram ){
			// assume added
			rightAlreadyAdded = true;
			// get list
			RdbList *list = m_q->m_qterms[right].m_posdbListPtr;
			// add list ptr into our required group
			qti->m_subList[nn].m_qt = &m_q->m_qterms[right];
			qti->m_subList[nn].m_list = list;
			// special flags
			qti->m_subList[nn].m_bigramFlag = BF_HALFSTOPWIKIBIGRAM;
			// before a pipe operator?
			if ( qt->m_piped ) qti->m_subList[nn].m_bigramFlag |= BF_PIPED;
			// add list of member terms as well
			m_q->m_qterms[right].m_bitNum = nrg;
			// only really add if useful
			if ( list && !list->isEmpty() ) {
				nn++;
			}

			// add bigram synonyms! like "new jersey" bigram
			// has the synonym "nj"
			for ( int32_t k = 0 ; k < m_q->m_numTerms ; k++ ) {
				QueryTerm *bt = &m_q->m_qterms[k];
				if ( bt->m_synonymOf != rightTerm ) {
					continue;
				}

				list = m_q->m_qterms[k].m_posdbListPtr;
				qti->m_subList[nn].m_qt = bt;
				qti->m_subList[nn].m_list = list;
				qti->m_subList[nn].m_bigramFlag = 0;
				qti->m_subList[nn].m_bigramFlag |= BF_HALFSTOPWIKIBIGRAM;
				qti->m_subList[nn].m_bigramFlag |= BF_SYNONYM;
				if (qt->m_piped) {
					qti->m_subList[nn].m_bigramFlag |= BF_PIPED;
				}
				// add list of member terms as well
				bt->m_bitNum = nrg;
				if ( list && !list->isEmpty() ) {
					nn++;
				}
			}
		}

		//
		// then the non-bigram termlist
		//
		// add to it. add backwards since we give precedence to
		// the first list and we want that to be the NEWEST list!
		RdbList *list = m_q->m_qterms[i].m_posdbListPtr;
		// add list ptr into our required group
		qti->m_subList[nn].m_qt         = qt;
		qti->m_subList[nn].m_list       = list;
		// special flags
		qti->m_subList[nn].m_bigramFlag = 0;
		// before a pipe operator?
		if ( qt->m_piped )
			qti->m_subList[nn].m_bigramFlag |= BF_PIPED;
		// is it a negative term?
		if ( qt->m_termSign=='-')
			qti->m_subList[nn].m_bigramFlag |= BF_NEGATIVE;

		// add list of member terms
		qt->m_bitNum = nrg;

		// only really add if useful
		// no, because when inserting NEW (related) terms that are
		// not currently in the document, this list may initially
		// be empty.
		if ( list && !list->isEmpty() ) {
			nn++;
		}

		//
		// add left bigram now if not added above
		//
		if ( left >= 0 && ! leftAlreadyAdded ) {
			// get list
			list = m_q->m_qterms[left].m_posdbListPtr;
			// add list ptr into our required group
			qti->m_subList[nn].m_qt = &m_q->m_qterms[left];
			qti->m_subList[nn].m_list = list;
			// special flags
			qti->m_subList[nn].m_bigramFlag = BF_BIGRAM;
			// before a pipe operator?
			if ( qt->m_piped ) qti->m_subList[nn].m_bigramFlag |= BF_PIPED;
			// add list of member terms as well
			m_q->m_qterms[left].m_bitNum = nrg;
			// only really add if useful
			if ( list && !list->isEmpty() ) {
				nn++;
			}

			// add bigram synonyms! like "new jersey" bigram
			// has the synonym "nj"
			for ( int32_t k = 0 ; k < m_q->m_numTerms ; k++ ) {
				QueryTerm *bt = &m_q->m_qterms[k];
				if ( bt->m_synonymOf != leftTerm ) {
					continue;
				}

				list = m_q->m_qterms[k].m_posdbListPtr;
				qti->m_subList[nn].m_qt = bt;
				qti->m_subList[nn].m_list = list;
				qti->m_subList[nn].m_bigramFlag = 0;
				qti->m_subList[nn].m_bigramFlag |= BF_SYNONYM;
				if (qt->m_piped) {
					qti->m_subList[nn].m_bigramFlag |= BF_PIPED;
				}
				// add list of member terms as well
				bt->m_bitNum = nrg;
				if ( list && !list->isEmpty() ) {
					nn++;
				}
			}
		}

		//
		// add right bigram now if not added above
		//
		if ( right >= 0 && ! rightAlreadyAdded ) {
			// get list
			list = m_q->m_qterms[right].m_posdbListPtr;
			// add list ptr into our required group
			qti->m_subList[nn].m_qt = &m_q->m_qterms[right];
			qti->m_subList[nn].m_list = list;
			// special flags
			qti->m_subList[nn].m_bigramFlag = BF_BIGRAM;
			// before a pipe operator?
			if ( qt->m_piped ) qti->m_subList[nn].m_bigramFlag |= BF_PIPED;
			// add list of member terms as well
			m_q->m_qterms[right].m_bitNum = nrg;
			// only really add if useful
			if ( list && !list->isEmpty() ) {
				nn++;
			}

			// add bigram synonyms! like "new jersey" bigram
			// has the synonym "nj"
			for ( int32_t k = 0 ; k < m_q->m_numTerms ; k++ ) {
				QueryTerm *bt = &m_q->m_qterms[k];
				if ( bt->m_synonymOf != rightTerm ) {
					continue;
				}

				list = m_q->m_qterms[k].m_posdbListPtr;
				qti->m_subList[nn].m_qt = bt;
				qti->m_subList[nn].m_list = list;
				qti->m_subList[nn].m_bigramFlag = 0;
				qti->m_subList[nn].m_bigramFlag |= BF_SYNONYM;
				if (qt->m_piped) {
					qti->m_subList[nn].m_bigramFlag |= BF_PIPED;
				}
				// add list of member terms as well
				//qti->m_qtermList[nn] = bt;
				bt->m_bitNum = nrg;
				if ( list && !list->isEmpty() ) {
					nn++;
				}
			}
		}

		//
		// ADD SYNONYM TERMS
		//
		for ( int32_t k = 0 ; k < m_q->m_numTerms ; k++ ) {
			QueryTerm *qt2 = &m_q->m_qterms[k];
			const QueryTerm *st = qt2->m_synonymOf;
			// skip if not a synonym of this term
			if ( st != qt ) {
				continue;
			}

			// its a synonym, add it!
			list = m_q->m_qterms[k].m_posdbListPtr;
			// add list ptr into our required group
			qti->m_subList[nn].m_qt = qt2;
			qti->m_subList[nn].m_list = list;
			// special flags
			qti->m_subList[nn].m_bigramFlag = BF_SYNONYM;
			// before a pipe operator?
			if ( qt->m_piped ) qti->m_subList[nn].m_bigramFlag |= BF_PIPED;
			// set bitnum here i guess
			qt2->m_bitNum = nrg;
			// only really add if useful
			if ( list && !list->isEmpty() ) {
				nn++;
			}
		}


		// store # lists in required group. nn might be zero!
		qti->m_numSubLists = nn;
		// crazy?
		if ( nn >= MAX_SUBLISTS ) {
			log("query: too many sublists. %" PRId32" >= %" PRId32,
			    nn,(int32_t)MAX_SUBLISTS);
			logTrace(g_conf.m_logTracePosdb, "END.");
			g_errno = EQUERYTOOBIG;
			return false;
		}

		// count # required groups
		nrg++;
	}

	//Detect the cases where the base term to the left of the qti-baseterm is ignored. This is used by getBestScoreSumForSingleTerm()
	//to ensure that bigram match boosts are not lost.
	for(int i=0; i<nrg; i++) {
		QueryTermInfo &qti = m_queryTermInfos[i];
		qti.m_leftTermIsIgnored = false;
		if(m_queryTermInfos[i].m_qterm->m_leftPhraseTerm) {
			for(int j=0; j<m_q->m_numTerms; j++) {
				QueryTerm *qt = &m_q->m_qterms[j];
				if(qt==m_queryTermInfos[i].m_qterm->m_leftPhraseTerm) {
					if(qt->m_qword->m_ignoreWord!=IGNORE_NO_IGNORE)
						qti.m_leftTermIsIgnored = true;
					break; //assumption: only one bigram per term. May not be true when bigram-synonyms are added by word variations
				}
			}
		}
	}

	//
	// get the query term with the least data in posdb including syns
	//
	m_minTermListSize	= 0;
	m_minTermListIdx	= -1;
	int64_t grand 		= 0LL;

	for ( int32_t i = 0 ; i < nrg ; i++ ) {
		// compute total sizes
		QueryTermInfo *qti = &(m_queryTermInfos[i]);
		// do not consider for first termlist if negative
		if ( qti->m_subList[0].m_bigramFlag & BF_NEGATIVE ) {
			continue;
		}

		int64_t totalSubListsSize = 0;
		for(int32_t q = 0; q < qti->m_numSubLists; q++) {
			const RdbList *l = qti->m_subList[q].m_list;
			totalSubListsSize += l->getListSize();
		}

		// add up this now
		grand += totalSubListsSize;

		// get min
		if ( totalSubListsSize < m_minTermListSize || m_minTermListIdx == -1 ) {
			m_minTermListSize	= totalSubListsSize;
			m_minTermListIdx	= i;
		}
	}

	// bad! ringbuf[] was not designed for this nonsense!
	if ( m_minTermListIdx >= 255 ) {
		gbshutdownAbort(true);
	}

	// set this for caller to use to loop over the queryterminfos
	m_numQueryTermInfos = nrg;

	if(g_conf.m_logTracePosdb) {
		logTrace(g_conf.m_logTracePosdb, "m_numQueryTermInfos=%d", m_numQueryTermInfos);
		for(int i=0; i<m_numQueryTermInfos; i++) {
			const QueryTermInfo *qti = &(m_queryTermInfos[i]);
			logTrace(g_conf.m_logTracePosdb, "  qti[%d]: m_numSubLists=%d m_qtermNum=%d m_qpos=%d m_leftTermIsIgnored=%s", i, qti->m_numSubLists, qti->m_qtermNum, qti->m_qpos, qti->m_leftTermIsIgnored?"true":"false");
			for(int j=0; j<qti->m_numSubLists; j++)
				logTrace(g_conf.m_logTracePosdb, "    sublist %d = %p", j, qti->m_subList[j].m_list);
		}
	}

	//check if any of the terms are not being used. This can happen if query was truncated and some bigrams or synonyms were included while the base word was not.
	std::vector<bool> termUsed(m_q->m_numTerms);
	for(int i=0; i<m_numQueryTermInfos; i++) {
		const QueryTermInfo *qti = &(m_queryTermInfos[i]);
		for(int j=0; j<qti->m_numSubLists; j++) {
			const QueryTerm *qt = qti->m_subList[j].m_qt;
			int qtermNum = qt - m_q->m_qterms;
			termUsed[qtermNum] = true;
		}
	}
	for(int i=0; i<m_q->m_numTerms; i++) {
		const QueryTerm *qt = &m_q->m_qterms[i];
		logTrace(g_conf.m_logTracePosdb,"termUsed[%d]=%s (%.*s)", i, termUsed[i]?"true":"false", qt->m_termLen,qt->m_term);
		if(!termUsed[i] && !qt->m_ignored && !qt->m_isQueryStopWord && qt->m_posdbListPtr && !qt->m_posdbListPtr->isEmpty())
			log(LOG_DEBUG, "posdb: unused term found #%d '%.*s' in query '%.*s'. Was query truncated?", i, qt->m_termLen, qt->m_term, m_q->getQueryLen(), m_q->getQuery());
	}

	// . m_minTermListSize is set in setQueryTermInfo()
	// . how many docids do we have at most in the intersection?
	// . all keys are of same termid, so they are 12 or 6 bytes compressed
	// . assume 12 if each is a different docid
	int32_t maxDocIds = m_minTermListSize / 12;
	// store all interesected docids in here for new algo plus 1 byte vote
	int32_t need = maxDocIds * 6;

	// they could all be OR'd together!
	if ( m_q->m_isBoolean ) need = grand;

	// so we can always cast a int64_t from a ptr in there
	// for setting m_docId when m_booleanQuery is true below
	need += 8;

	// get max # of docids we got in an intersection from all the lists
	if ( ! m_docIdVoteBuf.reserve ( need,"divbuf" ) ) {
		logTrace(g_conf.m_logTracePosdb, "END.");
		return false;
	}

	// i'm feeling if a boolean query put this in there too, the
	// hashtable that maps each docid to its boolean bit vector
	// where each bit stands for an operand so we can quickly evaluate
	// the bit vector in a truth table.
	// CRAP, can't use min list size because it might be behind a
	// NOT operator!!! then we core trying to realloc m_bt in a thread
	// below when trying to grow it. they could all be OR'd together
	// so alloc the most!
	int32_t maxSlots = (grand/12) * 2;
	// try to speed up. this doesn't *seem* to matter, so i took out:
	//maxSlots *= 2;
	// get total operands we used
	//int32_t numOperands = m_q->m_numWords;//Operands;
	// a quoted phrase counts as a single operand
	// . QueryTerm::m_bitNum <== m_numQueryTermInfos
	// . each queryTermInfo class corresponds to one bit in our bit vec
	// . essentially each queryTermInfo is a query term, but it has
	//   all the synonym and word forms for that query, etc.
	m_vecSize = m_numQueryTermInfos / 8;//numOperands / 8 ;
	// allow an extra byte for remainders
	if ( m_numQueryTermInfos % 8 ) m_vecSize++;
	// now preallocate the hashtable. 0 niceness.
	if ( m_q->m_isBoolean &&  // true = useKeyMagic
	     ! m_bt.set (8,m_vecSize,maxSlots,NULL,0,false,"booltbl",true)) {
		logTrace(g_conf.m_logTracePosdb, "END.");
		return false;
	}

	// . m_ct maps a boolean "bit vector" to a true/false value
	// . each "bit" in the "bit vector" indicates if docid has that
	//   particular query term
	if ( m_q->m_isBoolean && // true = useKeyMagic
	     ! m_ct.set (8,1,maxSlots,NULL,0,false,"booltbl",true)) {
		logTrace(g_conf.m_logTracePosdb, "END.");
		return false;
	}

#ifdef _VALGRIND_
	//various arrays are over-allocated. tell valgrind what is not defined content
	for(int i=0; i<m_numQueryTermInfos; i++) {
		QueryTermInfo &qti = m_queryTermInfos[i];
		VALGRIND_MAKE_MEM_UNDEFINED(qti.m_subList+qti.m_numSubLists, sizeof(qti.m_subList[0])*(MAX_SUBLISTS-qti.m_numSubLists));
	}
	for(int i=m_numQueryTermInfos; i<m_q->m_numTerms;  i++)
		VALGRIND_MAKE_MEM_UNDEFINED(&(m_queryTermInfos[i]), sizeof(QueryTermInfo));
#endif

	logTrace(g_conf.m_logTracePosdb, "END.");
	return true;
}


bool PosdbTable::findCandidateDocIds() {
	int64_t lastTime = gettimeofdayInMilliseconds();


	//
	// Swap the top 6 bytes of each list.
	//
	// This gives a contiguous list of 12-byte docid/score entries
	// because the first record is always 18 bytes (termid, docid, score)
	// and the rest are 6 bytes due to our termid compression.
	//
	// This makes the lists much much easier to work with, but we have
	// to remember to swap back when done! (but we dont...)
	//
	for ( int32_t k = 0 ; k < m_q->m_numTerms ; k++ ) {
		// count
		int64_t total = 0LL;
		// get the list
		RdbList *list = m_q->m_qterms[k].m_posdbListPtr;
		// skip if null or empty
		if(list && !list->isEmpty()) {
			// tally
			total += list->getListSize();
			// point to start
			char *p = list->getList();

			// remember to swap back when done!!
			char ttt[12];
			memcpy ( ttt   , p       , 12 );
			memcpy ( p     , p + 12  ,  6 );
			memcpy ( p + 6 , ttt     , 12 );

			// point to the low "hks" bytes now, skipping the termid
			p += 6;

			// turn half bit on. first key is now 12 bytes!!
			*p |= 0x02;
			// MANGLE the list
			list->setListSize(list->getListSize() - 6);
			list->setList(p);

			logTrace(g_conf.m_logTracePosdb, "termList #%" PRId32" totalSize=%" PRId64, k, total);

			// print total list sizes
			if ( m_debug ) {
				log(LOG_INFO, "query: termlist #%" PRId32" totalSize=%" PRId64, k, total);
			}
		}
	}


	// setQueryTermInfos() should have set how many we have
	if ( m_numQueryTermInfos == 0 ) {
		log(LOG_DEBUG, "query: NO REQUIRED TERMS IN QUERY!");
		logTrace(g_conf.m_logTracePosdb, "END, m_numQueryTermInfos = 0");
		return false;
	}

	// . if smallest required list is empty, 0 results
	// . also set in setQueryTermInfo
	if ( m_minTermListSize == 0 && ! m_q->m_isBoolean ) {
		logTrace(g_conf.m_logTracePosdb, "END, m_minTermListSize = 0 and not boolean");
		return false;
	}


	int32_t listGroupNum = 0;


	// if all non-negative query terms are in the same wikiphrase then
	// we can apply the WIKI_WEIGHT in getMaxPossibleScore() which
	// should help us speed things up!
	// @@@ BR: Why should this give a boost at all.. ?
	m_allInSameWikiPhrase = true;
	for ( int32_t i = 0 ; i < m_numQueryTermInfos ; i++ ) {
		// get it
		const QueryTermInfo *qti = &(m_queryTermInfos[i]);

		if(qti->m_numSubLists==0)
			continue; //ignored word or stopword.

		// skip if negative query term
		if ( qti->m_subList[0].m_bigramFlag & BF_NEGATIVE ) {
			continue;
		}

		// set it
		if ( qti->m_wikiPhraseId == 1 ) {
			continue;			//@@@ BR: Why only check for id = 1 ??
		}

		// stop
		m_allInSameWikiPhrase = false;
		break;
	}
	logTrace(g_conf.m_logTracePosdb, "m_allInSameWikiPhrase: %s", (m_allInSameWikiPhrase?"true":"false") );


	// for boolean queries we scan every docid in all termlists,
	// then we see what query terms it has, and make a bit vector for it.
	// then use a hashtable to map that bit vector to a true or false
	// as to whether we should include it in the results or not.
	// we use Query::getBitScore(qvec_t ebits) to evaluate a docid's
	// query term explicit term bit vector.
	if ( m_q->m_isBoolean ) {
		logTrace(g_conf.m_logTracePosdb, "makeDocIdVoteBufForBoolQuery");
		// keeping the docids sorted is the challenge here...
		makeDocIdVoteBufForBoolQuery();
	}
	else {
		// create "m_docIdVoteBuf" filled with just the docids from the
		// smallest group of sublists (one term, all variations).
		//
		// m_minTermListIdx is the queryterminfo that had the smallest total
		// sublist sizes of m_minTermListSize. this was set in
		// setQueryTermInfos()
		//
		// if all these sublist termlists were 50MB i'd day 10-25ms to
		// add their docid votes.
		logTrace(g_conf.m_logTracePosdb, "addDocIdVotes");
		addDocIdVotes ( &m_queryTermInfos[m_minTermListIdx], listGroupNum );


		// now repeat the docid scan for successive lists but only
		// inc the docid count for docids we match. docids that are
		// in m_docIdVoteBuf but not in sublist group #i will be removed
		// from m_docIdVoteBuf.
		//
		// worst case scenario with termlists limited
		// to 30MB will be about 10MB of docid vote buf, but it should
		// shrink quite a bit every time we call addDocIdVotes() with a
		// new group of sublists for a query term. but scanning 10MB should
		// be pretty fast since gk0 does like 4GB/s of main memory reads.
		// i would think scanning and docid voting for 200MB of termlists
		// should take like 50-100ms
		for ( int32_t i = 0 ; i < m_numQueryTermInfos ; i++ ) {
			// skip if we did it above when allocating the vote buffer
			if ( i == m_minTermListIdx ) {
				continue;
			}

			// get it
			const QueryTermInfo *qti = &(m_queryTermInfos[i]);
			// do not consider for adding if negative ('my house -home')
			if ( qti->m_subList[0].m_bigramFlag & BF_NEGATIVE ) {
				continue;
			}

			// inc the group number ("score"), which will be set on the docids
			// in addDocIdVotes if they match.
			listGroupNum++;

			// if it hits 256 then wrap back down to 1
			if ( listGroupNum >= 256 ) {
				listGroupNum = 1;
			}

			// add it
			addDocIdVotes ( qti, listGroupNum );
		}
		logTrace(g_conf.m_logTracePosdb, "Added DocIdVotes");


		//
		// remove the negative query term docids from our docid vote buf
		//
		for ( int32_t i = 0 ; i < m_numQueryTermInfos ; i++ ) {
			// skip if we did it above
			if ( i == m_minTermListIdx ) {
				continue;
			}

			// get it
			const QueryTermInfo *qti = &(m_queryTermInfos[i]);

			// do not consider for adding if negative ('my house -home')
			if ( ! (qti->m_subList[0].m_bigramFlag & BF_NEGATIVE) ) {
				continue;
			}

			// delete the docid from the vote buffer
			delDocIdVotes ( qti );
		}
		logTrace(g_conf.m_logTracePosdb, "Removed DocIdVotes for negative query terms");
	}

	if ( m_debug ) {
		int64_t now = gettimeofdayInMilliseconds();
		int64_t took = now - lastTime;
		log(LOG_INFO, "posdb: new algo phase (find matching docids) took %" PRId64" ms", took);
		lastTime = now;
	}


	//
	// NOW FILTER EVERY SUBLIST to just the docids in m_docIdVoteBuf.
	// Filter in place so it is destructive. i would think
	// that doing a filter on 200MB of termlists wouldn't be more than
	// 50-100ms since we can read 4GB/s from main memory.
	//
	delNonMatchingDocIdsFromSubLists();
	logTrace(g_conf.m_logTracePosdb, "Shrunk SubLists");
	if(g_conf.m_logTracePosdb) {
		log(LOG_TRACE,"Shrunk sublists, m_numQueryTermInfos=%d", m_numQueryTermInfos);
		for(int i=0; i<m_numQueryTermInfos; i++) {
			log(LOG_TRACE,"  qti #%d: m_numSubLists=%d", i, m_queryTermInfos[i].m_numSubLists);
			for(int j=0; j<m_queryTermInfos[i].m_numSubLists; j++)
				log(LOG_TRACE,"             list #%d: m_qt=%p (%*.*s) m_list=%p m_bigramFlag=%02x", j, m_queryTermInfos[i].m_subList[j].m_qt, m_queryTermInfos[i].m_subList[j].m_qt->m_termLen, m_queryTermInfos[i].m_subList[j].m_qt->m_termLen, m_queryTermInfos[i].m_subList[j].m_qt->m_term, m_queryTermInfos[i].m_subList[j].m_list, m_queryTermInfos[i].m_subList[j].m_bigramFlag);
			log(LOG_TRACE,"          m_numMatchingSubLists=%d", m_queryTermInfos[i].m_numMatchingSubLists);
			for(int j=0; j<m_queryTermInfos[i].m_numMatchingSubLists; j++)
				log(LOG_TRACE,"             matchlist #%d: %d bytes %p - %p baseidx=%d", j, m_queryTermInfos[i].m_matchingSublist[j].m_size, m_queryTermInfos[i].m_matchingSublist[j].m_start, m_queryTermInfos[i].m_matchingSublist[j].m_end, m_queryTermInfos[i].m_matchingSublist[j].m_baseSubListIndex);
		}
	}

	if ( m_debug ) {
		int64_t now = gettimeofdayInMilliseconds();
		int64_t took = now - lastTime;
		log(LOG_INFO, "posdb: new algo phase (shrink sublists) took %" PRId64" ms", took);
	}

	return true;
}


bool PosdbTable::genDebugScoreInfo1(int32_t *numProcessed, int32_t *topCursor, bool *docInThisFile) {
	*docInThisFile = false;

	// did we get enough score info?
	if ( *numProcessed >= m_msg39req->m_docsToGet ) {
		logTrace(g_conf.m_logTracePosdb, "Too many docs processed. m_docId=%" PRId64 ". Reached msg39 m_docsToGet: %" PRId32 ", SKIPPED", m_docId, *numProcessed);
		return true;
	}

	// loop back up here if the docid is from a previous range
nextNode:
	// this mean top tree empty basically
	if ( *topCursor == -1 ) {
		logTrace(g_conf.m_logTracePosdb, "topCursor is -1, SKIPPED");
		return true;
	}

	// get the #1 docid/score node #
	if ( *topCursor == -9 ) {
		// if our query had a quoted phrase, might have had no
		// docids in the top tree! getHighNode() can't handle
		// that so handle it here
		if ( m_topTree->getNumUsedNodes() == 0 ) {
			logTrace(g_conf.m_logTracePosdb, "Num used nodes is 0, SKIPPED");
			return true;
		}

		// otherwise, initialize topCursor
		*topCursor = m_topTree->getHighNode();
	}

	// get current node
	TopNode *tn = m_topTree->getNode ( *topCursor );
	// advance
	*topCursor = m_topTree->getPrev ( *topCursor );

	// shortcut
	m_docId = tn->m_docId;

	// skip if not in our range! the top tree now holds
	// all the winners from previous docid ranges. msg39
	// now does the search result in docid range chunks to avoid
	// OOM conditions.
	if ( m_msg39req->m_minDocId != -1 &&
	     m_msg39req->m_maxDocId != -1 &&
	     ( m_docId < (uint64_t)m_msg39req->m_minDocId ||
	       m_docId >= (uint64_t)m_msg39req->m_maxDocId ) ) {
		logTrace(g_conf.m_logTracePosdb, "DocId %" PRIu64 " does not match docId range %" PRIu64 " - %" PRIu64 ", SKIPPED", m_docId, (uint64_t)m_msg39req->m_minDocId, (uint64_t)m_msg39req->m_maxDocId);
		goto nextNode;
	}

	*docInThisFile = m_documentIndexChecker->exists(m_docId);
	if( !(*docInThisFile) ) {
		logTrace(g_conf.m_logTracePosdb, "DocId %" PRId64 " is not in this file, SKIPPED", m_docId);
		return false;
	}

	logTrace(g_conf.m_logTracePosdb, "DocId %" PRId64 " - setting up score buffer", m_docId);

	(*numProcessed)++;

	// set query termlists in all sublists
	for ( int32_t i = 0 ; i < m_numQueryTermInfos ; i++ ) {
		// get it
		QueryTermInfo *qti = &(m_queryTermInfos[i]);
		// do not advance negative termlist cursor
		if ( qti->m_numSubLists>0 && qti->m_subList[0].m_bigramFlag & BF_NEGATIVE ) {
			continue;
		}

		// do each sublist
		for ( int32_t j = 0 ; j < qti->m_numMatchingSubLists ; j++ ) {
			// get termlist for that docid
			const char *xlist    = qti->m_matchingSublist[j].m_start;
			const char *xlistEnd = qti->m_matchingSublist[j].m_end;
			const char *xp = getWordPosList ( m_docId, xlist, xlistEnd - xlist);

			// not there? xlist will be NULL
			qti->m_matchingSublist[j].m_savedCursor = xp;

			// if not there make cursor NULL as well
			if ( ! xp ) {
				qti->m_matchingSublist[j].m_cursor = NULL;
				continue;
			}

			// skip over docid list
			xp += 12;

			for ( ; ; ) {
				// do not breach sublist
				if ( xp >= xlistEnd ) {
					break;
				}

				// break if 12 byte key: another docid!
				if ( !(xp[0] & 0x04) ) {
					break;
				}

				// skip over 6 byte key
				xp += 6;
			}

			// point to docid sublist end
			qti->m_matchingSublist[j].m_cursor = xp;
		}
	}

	return false;
}


bool PosdbTable::genDebugScoreInfo2(DocIdScore *dcs, int32_t *lastLen, uint64_t *lastDocId, char siteRank, float score, int32_t intScore, lang_t docLang) {

	dcs->m_siteRank   = siteRank;
	dcs->m_finalScore = score;

	dcs->m_docId      = m_docId;
	dcs->m_numRequiredTerms = m_numQueryTermInfos;
	dcs->m_docLang = docLang;
	logTrace(g_conf.m_logTracePosdb, "m_docId=%" PRId64 ", *lastDocId=%" PRId64 "", m_docId, *lastDocId);

	// ensure enough room we can't allocate in a thread!
	if ( m_scoreInfoBuf.getAvail()<(int32_t)sizeof(DocIdScore)+1) {
		logTrace(g_conf.m_logTracePosdb, "END, NO ROOM");
		return true;
	}

	// if same as last docid, overwrite it since we have a higher
	// siterank or langid i guess
	if ( m_docId == *lastDocId ) {
		m_scoreInfoBuf.m_length = *lastLen;
	}

	// save that
	int32_t len = m_scoreInfoBuf.m_length;

	// copy into the safebuf for holding the scoring info
#ifdef _VALGRIND_
VALGRIND_CHECK_MEM_IS_DEFINED(&dcs,sizeof(dcs));
#endif
	m_scoreInfoBuf.safeMemcpy ( (char *)dcs, sizeof(DocIdScore) );
	// save that
	*lastLen = len;
	// save it
	*lastDocId = m_docId;
	// try to fix dup docid problem! it was hitting the
	// overflow check right above here... strange!!!
	//m_docIdTable.removeKey ( &docId );

	/////////////////////////////
	//
	// . docid range split HACK...
	// . if we are doing docid range splitting, we process
	//   the search results separately in disjoint docid ranges.
	// . so because we still use the same m_scoreInfoBuf etc.
	//   for each split we process, we must remove info from
	//   a top docid of a previous split that got supplanted by
	//   a docid of this docid-range split, so we do not breach
	//   the allocated buffer size.
	// . so  find out which docid we replaced
	//   in the top tree, and replace his entry in scoreinfobuf
	//   as well!
	// . his info was already added to m_pairScoreBuf in the
	//   getTermPairScoreForAny() function
	//
	//////////////////////////////

	// the top tree remains persistent between docid ranges.
	// and so does the score buf. so we must replace scores
	// if the docid gets replaced by a better scoring docid
	// from a following docid range of different docids.
	// However, scanning the docid scor buffer each time is
	// really slow, so we need to get the kicked out docid
	// from the top tree and use that to store its offset
	// into m_scoreInfoBuf so we can remove it.

	DocIdScore *si;

	// only kick out docids from the score buffer when there
	// is no room left...
	if ( m_scoreInfoBuf.getAvail() >= (int)sizeof(DocIdScore ) ) {
		logTrace(g_conf.m_logTracePosdb, "END, OK. m_docId=%" PRId64 ", *lastDocId=%" PRId64 "", m_docId, *lastDocId);
		return true;
	}

	char *sx = m_scoreInfoBuf.getBufStart();
	char *sxEnd = sx + m_scoreInfoBuf.length();

	// if we have not supplanted anyone yet, be on our way
	for ( ; sx < sxEnd ; sx += sizeof(DocIdScore) ) {
		si = (DocIdScore *)sx;
		// if top tree no longer has this docid, we must
		// remove its associated scoring info so we do not
		// breach our scoring info bufs
		if ( ! m_topTree->hasDocId( si->m_docId ) ) {
			break;
		}
	}

	// might not be full yet
	if ( sx >= sxEnd ) {
		logTrace(g_conf.m_logTracePosdb, "END, OK 2");
		return true;
	}

	// must be there!
	if ( ! si ) {
		gbshutdownAbort(true);
	}

	// note it because it is slow
	// this is only used if getting score info, which is
	// not default when getting an xml or json feed
	logTrace(g_conf.m_logTracePosdb, "Kicking out docid %" PRId64" from score buf", si->m_docId);

	// get his single and pair offsets
	int32_t pairOffset   = si->m_pairsOffset;
	int32_t pairSize     = si->m_numPairs * sizeof(PairScore);
	int32_t singleOffset = si->m_singlesOffset;
	int32_t singleSize   = si->m_numSingles * sizeof(SingleScore);
	// nuke him
	m_scoreInfoBuf  .removeChunk1 ( sx, sizeof(DocIdScore) );
	// and his related info
	m_pairScoreBuf  .removeChunk2 ( pairOffset   , pairSize   );
	m_singleScoreBuf.removeChunk2 ( singleOffset , singleSize );

	// adjust offsets of remaining single scores
	sx = m_scoreInfoBuf.getBufStart();
	for ( ; sx < sxEnd ; sx += sizeof(DocIdScore) ) {
		si = (DocIdScore *)sx;
		if ( si->m_pairsOffset > pairOffset ) {
			si->m_pairsOffset -= pairSize;
		}

		if ( si->m_singlesOffset > singleOffset ) {
			si->m_singlesOffset -= singleSize;
		}
	}

	// adjust this too!
	*lastLen -= sizeof(DocIdScore);

	logTrace(g_conf.m_logTracePosdb, "Returning false");
	return false;
}


void PosdbTable::logDebugScoreInfo(int32_t loglevel) {
	logTrace(g_conf.m_logTracePosdb, "BEGIN");

	const char *sx = m_scoreInfoBuf.getBufStart();
	const char *sxEnd = sx + m_scoreInfoBuf.length();

	log(loglevel, "DocId scores in m_scoreInfoBuf:");
	for ( ; sx < sxEnd ; sx += sizeof(DocIdScore) ) {
		const DocIdScore *si = (const DocIdScore *)sx;

		log(loglevel, "  docId: %14" PRIu64 ", score: %f", si->m_docId, si->m_finalScore);

		// if top tree no longer has this docid, we must
		// remove its associated scoring info so we do not
		// breach our scoring info bufs
		if ( ! m_topTree->hasDocId( si->m_docId ) ) {
			log(loglevel, "    ^ NOT in topTree anymore!");
		}
	}

	logTrace(g_conf.m_logTracePosdb, "END");
}


void PosdbTable::removeScoreInfoForDeletedDocIds() {
	logTrace(g_conf.m_logTracePosdb, "BEGIN");

	char *sx = m_scoreInfoBuf.getBufStart();
	char *sxEnd = sx + m_scoreInfoBuf.length();

	for ( ; sx < sxEnd ; sx += sizeof(DocIdScore) ) {
		DocIdScore *si = (DocIdScore *)sx;

		// if top tree no longer has this docid, we must
		// remove its associated scoring info so we do not
		// breach our scoring info bufs
		if ( m_topTree->hasDocId( si->m_docId ) ) {
			continue;
		}

		logTrace(g_conf.m_logTracePosdb, "Removing old score info for docId %" PRId64 "", si->m_docId);
		// get his single and pair offsets
		int32_t pairOffset   = si->m_pairsOffset;
		int32_t pairSize     = si->m_numPairs * sizeof(PairScore);
		int32_t singleOffset = si->m_singlesOffset;
		int32_t singleSize   = si->m_numSingles * sizeof(SingleScore);
		// nuke him
		m_scoreInfoBuf  .removeChunk1 ( sx, sizeof(DocIdScore) );
		// and his related info
		m_pairScoreBuf  .removeChunk2 ( pairOffset   , pairSize   );
		m_singleScoreBuf.removeChunk2 ( singleOffset , singleSize );

		// adjust offsets of remaining single scores
		sx = m_scoreInfoBuf.getBufStart();
		for ( ; sx < sxEnd ; sx += sizeof(DocIdScore) ) {
			si = (DocIdScore *)sx;
			if ( si->m_pairsOffset > pairOffset ) {
				si->m_pairsOffset -= pairSize;
			}

			if ( si->m_singlesOffset > singleOffset ) {
				si->m_singlesOffset -= singleSize;
			}
		}
		sxEnd -= sizeof(DocIdScore);
	}

	logTrace(g_conf.m_logTracePosdb, "END");
}


// Pre-advance each termlist's cursor to skip to next docid.
//
// Set QueryTermInfo::m_matchingSubListCursor to NEXT docid
// Set QueryTermInfo::m_matchingSubListSavedCursor to CURRENT docid
// of each termlist so we are ready for a quick skip over this docid.
//
// TODO: use just a single array of termlist ptrs perhaps,
// then we can remove them when they go NULL.  and we'd save a little
// time not having a nested loop.
bool PosdbTable::advanceTermListCursors(const char *docIdPtr) {
	logTrace(g_conf.m_logTracePosdb, "BEGIN");

	for ( int32_t i = 0 ; i < m_numQueryTermInfos ; i++ ) {
		// get it
		QueryTermInfo *qti = &(m_queryTermInfos[i]);
		// do not advance negative termlist cursor
		if ( qti->m_numSubLists>0 && qti->m_subList[0].m_bigramFlag & BF_NEGATIVE ) {
			continue;
		}

		//
		// In first pass, sublists data is initialized by delNonMatchingDocIdsFromSubLists.
		// In second pass (to get detailed scoring info for UI output), they are initialized above
		//
		for ( int32_t j = 0 ; j < qti->m_numMatchingSubLists ; j++ ) {
			// shortcuts
			const char *xc    = qti->m_matchingSublist[j].m_cursor;
			const char *xcEnd = qti->m_matchingSublist[j].m_end;

			// exhausted? (we can't make cursor NULL because
			// getMaxPossibleScore() needs the last ptr)
			// must match docid
			if ( xc >= xcEnd ||
			     *(int32_t *)(xc+8) != *(int32_t *)(docIdPtr+1) ||
			     (*(char *)(xc+7)&0xfc) != (*(char *)(docIdPtr)&0xfc) ) {
				// flag it as not having the docid
				qti->m_matchingSublist[j].m_savedCursor = NULL;
				// skip this sublist if does not have our docid
				continue;
			}

			// save it
			qti->m_matchingSublist[j].m_savedCursor = xc;
			// get new docid
			//log("new docid %" PRId64,Posdb::getDocId(xc) );
			// advance the cursors. skip our 12
			xc += 12;
			// then skip any following 6 byte keys because they
			// share the same docid
			for ( ;  ; xc += 6 ) {
				// end of whole termlist?
				if ( xc >= xcEnd ) {
					break;
				}

				// sanity. no 18 byte keys allowed
				if ( (*xc & 0x06) == 0x00 ) {
					// i've seen this triggered on gk28.
					// a dump of posdb for the termlist
					// for 'post' had corruption in it,
					// yet its twin, gk92 did not. the
					// corruption could have occurred
					// anywhere from nov 2012 to may 2013,
					// and the posdb file was never
					// re-merged! must have been blatant
					// disk malfunction?
					log("posdb: encountered corrupt posdb list. bailing.");
					logTrace(g_conf.m_logTracePosdb, "END.");
					return false;
					//gbshutdownAbort(true);
				}
				// the next docid? it will be a 12 byte key.
				if ( ! (*xc & 0x04) ) {
					break;
				}
			}
			// assign to next docid word position list
			qti->m_matchingSublist[j].m_cursor = xc;
		}
	}

	logTrace(g_conf.m_logTracePosdb, "END");
	return true;
}


#define RINGBUFSIZE 4096

//Go through the word positions of the queryterminfo posdb lists and set the corresponding ringBuf[] elements to the qti-index.
//Also find the first position and return that.
static int32_t setRingbufFromQTI(const QueryTermInfo *qti, int32_t listIdx, unsigned char ringBuf[]) {
	int32_t firstPos = -1;
	for(int32_t i = 0 ; i < qti->m_numMatchingSubLists; i++) {
		// scan that sublist and add word positions
		const char *sub = qti->m_matchingSublist[i].m_savedCursor;
		// skip sublist if it's cursor is exhausted
		if(!sub)
			continue;

		const char *end = qti->m_matchingSublist[i].m_cursor;
		// add first key
		int32_t wx = Posdb::getWordPos(sub);
		wx &= (RINGBUFSIZE-1);
		// store it. 0 is legit.
		ringBuf[wx] = listIdx;
		if(firstPos<0 || wx<firstPos)
			firstPos = wx;
		// skip first key
		sub += 12;
		// then 6 byte keys
		for( ; sub < end; sub += 6) {
			// get word position
			wx = Posdb::getWordPos(sub);
			wx &= (RINGBUFSIZE-1);
			// store it. 0 is legit.
			ringBuf[wx] = listIdx;
		}
	}
	return firstPos;
}

//
// TODO: consider skipping this pre-filter if it sucks, as it does
// for 'search engine'. it might save time!
//
// Returns:
//	false - docid does not meet minimum score requirement
//	true - docid can potentially be a top scoring docid
//
bool PosdbTable::prefilterMaxPossibleScoreByDistance(float minWinningScore) {
	unsigned char ringBuf[RINGBUFSIZE+10];

	// reset ring buf. make all slots 0xff. should be 1000 cycles or so.
	memset ( ringBuf, 0xff, sizeof(ringBuf) );

	logTrace(g_conf.m_logTracePosdb, "BEGIN");


	// now to speed up 'time enough for love' query which does not
	// have many super high scoring guys on top we need a more restrictive
	// filter than getMaxPossibleScore() so let's pick one query term,
	// the one with the shortest termlist, and see how close it gets to
	// each of the other query terms. then score each of those pairs.
	// so quickly record the word positions of each query term into
	// a ring buffer of 4096 slots where each slot contains the
	// query term info #.

	logTrace(g_conf.m_logTracePosdb, "Ring buffer generation");

	int32_t ourFirstPos = setRingbufFromQTI(&(m_queryTermInfos[m_minTermListIdx]), m_minTermListIdx, ringBuf);

	// now get query term closest to query term # m_minTermListIdx which
	// is the query term # with the shortest termlist
	// get closest term to m_minTermListIdx and the distance
	logTrace(g_conf.m_logTracePosdb, "Ring buffer generation 2");
	for ( int32_t i = 0 ; i < m_numQueryTermInfos ; i++ ) {
		if ( i == m_minTermListIdx ) {
			continue;
		}

		// get the query term info
		const QueryTermInfo *qti = &(m_queryTermInfos[i]);

		// if we have a negative term, skip it
		if ( qti->m_subList[0].m_bigramFlag & (BF_NEGATIVE) ) {
			continue;
		}

		// store all his word positions into ring buffer AS WELL
		setRingbufFromQTI(qti, i, ringBuf);

		// reset
		int32_t ourLastPos = -1;
		int32_t hisLastPos = -1;
		int32_t bestDist = 0x7fffffff;
		// how far is this guy from the man?
		for ( int32_t x = 0 ; x < (int32_t)RINGBUFSIZE ; ) {
			// skip next 4 slots if all empty. fast?
			if (*(uint32_t *)(ringBuf+x) == 0xffffffff) {
				x+=4;
				continue;
			}

			// skip if nobody
			if ( ringBuf[x] == 0xff ) {
				x++;
				continue;
			}

			// get query term #
			unsigned char qt = ringBuf[x];

			// if it's the man
			if ( qt == m_minTermListIdx ) {
				// record
				hisLastPos = x;
				// skip if we are not there yet
				if ( ourLastPos == -1 ) {
					x++;
					continue;
				}

				// try distance fix
				if ( x - ourLastPos < bestDist ) {
					bestDist = x - ourLastPos;
				}
			}
			// if us
			else
			if ( qt == i ) {
				// record
				ourLastPos = x;
				// skip if he's not recorded yet
				if ( hisLastPos == -1 ) {
					x++;
					continue;
				}

				// check dist
				if ( x - hisLastPos < bestDist ) {
					bestDist = x - hisLastPos;
				}
			}

			x++;
		}

		// compare last occurence of query term #x with our first occ.
		// since this is a RING buffer
		int32_t wrapDist = ourFirstPos + ((int32_t)RINGBUFSIZE-hisLastPos);
		if ( wrapDist < bestDist ) {
			bestDist = wrapDist;
		}

		// query distance
		int32_t qdist = m_qpos[m_minTermListIdx] - m_qpos[i];
		// compute it
		float maxScore2 = getMaxPossibleScore(&(m_queryTermInfos[i]));
		maxScore2 = modifyMaxScoreByDistance(maxScore2,
						     bestDist,
						     qdist,
						     &(m_queryTermInfos[m_minTermListIdx]));
		// -1 means it has inlink text so do not apply this constraint
		// to this docid because it is too difficult because we
		// sum up the inlink text
		if ( maxScore2 < 0.0 ) {
			continue;
		}

		// if any one of these terms have a max score below the
		// worst score of the 10th result, then it can not win.
		// @todo: BR. Really? ANY of them?
		if ( maxScore2 <= minWinningScore ) {
			logTrace(g_conf.m_logTracePosdb, "END - docid score too low");
			return false;
		}
	}

	logTrace(g_conf.m_logTracePosdb, "END - docid score high enough");
	return true;
}


//
// Data for the current DocID found in sublists of each query term
// is merged into a single list, so we end up with one list per query
// term.
//
void PosdbTable::mergeTermSubListsForDocId(MiniMergeBuffer *miniMergeBuffer, int *highestInlinkSiteRank) {
	logTrace(g_conf.m_logTracePosdb, "BEGIN.");
	char *miniMergeBuf = miniMergeBuffer->buffer;
	char *miniMergeBufEnd = miniMergeBuffer->buffer+sizeof(miniMergeBuffer->buffer);

	// we got a docid that has all the query terms, so merge
	// each term's sublists into a single list just for this docid.
	//
	// all posdb keys for this docid should fit in here, the
	// mini merge buf:
	char *mptr 	= miniMergeBuf;
	char *miniMergeBufSafeEnd = miniMergeBufEnd - 1000; //fragile hack but no worse than the original code
	char *lastMptr = NULL;

	// Merge each set of sublists, like we merge a term's list with
	// its two associated bigram lists, if there, the left bigram and
	// right bigram list. Merge all the synonym lists for that term
	// together as well, so if the term is 'run' we merge it with the
	// lists for 'running' 'ran' etc.
	logTrace(g_conf.m_logTracePosdb, "Merge sublists into a single list per query term");
	for ( int32_t j = 0 ; j < m_numQueryTermInfos ; j++ ) {
		// get the query term info
		QueryTermInfo *qti = &(m_queryTermInfos[j]);

		// just use the flags from first term i guess
		// NO! this loses the wikihalfstopbigram bit! so we gotta
		// add that in for the key i guess the same way we add in
		// the syn bits below!!!!!
		m_bflags [j] = qti->m_numSubLists>0 ? qti->m_subList[0].m_bigramFlag : 0;
		// if we have a negative term, skip it
		if ( qti->m_numSubLists>0 && qti->m_subList[0].m_bigramFlag & BF_NEGATIVE ) {
			// need to make this NULL for getSiteRank() call below
			miniMergeBuffer->mergedListStart[j] = NULL;
			// if its empty, that's good!
			continue;
		}

		// the merged list for term #j is here:
		miniMergeBuffer->mergedListStart[j] = mptr;
		bool isFirstKey = true;

		const char *nwp[MAX_SUBLISTS];
		const char *nwpEnd[MAX_SUBLISTS];
		char  nwpFlags[MAX_SUBLISTS];
		int baseSubListIndex[MAX_SUBLISTS];
		// populate the nwp[] arrays for merging
		int32_t nsub = 0;
		for ( int32_t k = 0 ; k < qti->m_numMatchingSubLists ; k++ ) {
			baseSubListIndex[nsub] = k;
			// NULL means does not have that docid
			if ( ! qti->m_matchingSublist[k].m_savedCursor ) {
				continue;
			}

			// getMaxPossibleScore() incremented m_matchingSubListCursor to
			// the next docid so use m_matchingSubListSavedCursor.
			nwp[nsub] 	= qti->m_matchingSublist[k].m_savedCursor;
			nwpEnd[nsub]	= qti->m_matchingSublist[k].m_cursor;
			nwpFlags[nsub]	= qti->m_subList[k].m_bigramFlag;
			nsub++;
		}

		// Merge the lists into a list in miniMergeBuf.
		// Get the min of each list
		while( mptr < miniMergeBufSafeEnd ) {
			int32_t mink = -1;

			for ( int32_t k = 0 ; k < nsub ; k++ ) {
				// skip if list is exhausted
				if ( ! nwp[k] ) {
					continue;
				}

				// auto winner?
				if ( mink == -1 ) {
					mink = k;
					continue;
				}

				if ( KEYCMP(nwp[k], nwp[mink], 6) < 0 ) {
					mink = k; // a new min...
				}
			}

			// all exhausted? merge next set of sublists then for term #j
			if ( mink == -1 ) {
				// continue outer "j < m_numQueryTermInfos" loop.
				break;
			}

			// get keysize
			char ks = Posdb::getKeySize(nwp[mink]);

			// if the first key in our merged list store the docid crap
			if ( isFirstKey ) {
				// store a 12 byte key in the merged list buffer
				memcpy ( mptr, nwp[mink], 12 );
				int termIndex = qti->m_subList[baseSubListIndex[mink]].m_qt - m_q->m_qterms;
				*(miniMergeBuffer->getTermIndexPtrForBufferPos(mptr)) = termIndex;

				// Detect highest siterank of inlinkers
				if ( Posdb::getHashGroup(mptr) == HASHGROUP_INLINKTEXT) {
					char inlinkerSiteRank = Posdb::getWordSpamRank(mptr);
					if(inlinkerSiteRank > *highestInlinkSiteRank) {
						*highestInlinkSiteRank = inlinkerSiteRank;
					}
				}

				// wipe out its syn bits and re-use our way
				mptr[2] &= 0xfc;
				// set the synbit so we know if its a synonym of term
				if ( nwpFlags[mink] & (BF_BIGRAM|BF_SYNONYM)) {
					mptr[2] |= 0x02;
				}

				// wiki half stop bigram? so for the query
				// 'time enough for love' the phrase term "enough for"
				// is a half stopword wiki bigram, because it is in
				// a phrase in wikipedia ("time enough for love") and
				// one of the two words in the phrase term is a
				// stop word. therefore we give it more weight than
				// just 'enough' by itself.
				if ( nwpFlags[mink] & BF_HALFSTOPWIKIBIGRAM ) {
					mptr[2] |= 0x01;
				}

				// make sure its 12 bytes! it might have been
				// the first key for the termid, and 18 bytes.
				mptr[0] &= 0xf9;
				mptr[0] |= 0x02;
				// save it
				lastMptr = mptr;
				mptr += 12;
				isFirstKey = false;
			}
			else {
				// Old, dubious comment: 2dont add if same keypos as previous item ..."
				// Relied on the matchSubList to be in the order: left-bigram, right-bigram, term, synonyms...
				// But setQueryTermInfo() has had a bug there for as long as we know and
				// multiple entries with same position seems to have no harmful effect.
				memcpy ( mptr, nwp[mink], 6 );
				int termIndex = qti->m_subList[qti->m_matchingSublist[mink].m_baseSubListIndex].m_qt - m_q->m_qterms;
				*(miniMergeBuffer->getTermIndexPtrForBufferPos(mptr)) = termIndex;

				// Detect highest siterank of inlinkers
				if ( Posdb::getHashGroup(mptr) == HASHGROUP_INLINKTEXT) {
					char inlinkerSiteRank = Posdb::getWordSpamRank(mptr);
					if(inlinkerSiteRank > *highestInlinkSiteRank) {
						*highestInlinkSiteRank = inlinkerSiteRank;
					}
				}

				// wipe out its syn bits and re-use our way
				mptr[2] &= 0xfc;
				// set the synbit so we know if its a synonym of term
				if ( nwpFlags[mink] & (BF_BIGRAM|BF_SYNONYM)) {
					mptr[2] |= 0x02;
				}

				if ( nwpFlags[mink] & BF_HALFSTOPWIKIBIGRAM ) {
					mptr[2] |= 0x01;
				}

				// if it was the first key of its list it may not
				// have its bit set for being 6 bytes now! so turn
				// on the 2 compression bits
				mptr[0] &= 0xf9;
				mptr[0] |= 0x06;
				// save it
				lastMptr = mptr;
				mptr += 6;
			}

			// advance the cursor over the key we used.
			nwp[mink] += ks; // Posdb::getKeySize(nwp[mink]);

			// exhausted?
			if ( nwp[mink] >= nwpEnd[mink] ) {
				nwp[mink] = NULL;
			}
		}

		// wrap it up here since done merging
		miniMergeBuffer->mergedListEnd[j] = mptr;
		//log(LOG_ERROR,"%s:%d: j=%" PRId32 ": miniMergedListStart[%" PRId32 "]=%p, miniMergedListEnd[%" PRId32 "]=%p, mptr=%p, miniMergeBufEnd=%p, term=[%.*s] - TERM DONE", __func__, __LINE__, j, j, miniMergeBuffer->mergedListStart[j], j, miniMergeBuffer->mergedListEnd[j], mptr, miniMergeBufEnd, qti->m_qt->m_termLen, qti->m_qt->m_term);
	}

	// breach?
	if ( mptr > miniMergeBufEnd ) {
		log(LOG_ERROR,"%s:%s:%d: miniMergeBuf=%p miniMergeBufEnd=%p mptr=%p", __FILE__, __func__, __LINE__, miniMergeBuf, miniMergeBufEnd, mptr);
		gbshutdownAbort(true);
	}

#ifdef _VALGRIND_
	VALGRIND_MAKE_MEM_UNDEFINED(mptr, miniMergeBufEnd-mptr);
#endif


	// Make sure miniMergeList[x] is NULL if it contains no values
	for(int32_t i=0; i < m_numQueryTermInfos; i++) {
		// skip if not part of score
		if ( m_bflags[i] & (BF_PIPED|BF_NEGATIVE) ) {
			continue;
		}

		// get list
		const char *plist    = miniMergeBuffer->mergedListStart[i];
		const char *plistEnd = miniMergeBuffer->mergedListEnd[i];
		int32_t  psize		= plistEnd - plist;

		if( !psize ) {
			// BR fix 20160918: Avoid working on empty lists where start and
			// end pointers are the same. This can happen if no positions are
			// copied to the merged list because they are all synonyms or
			// phrase terms. See "HACK OF CONFUSION" above..
			miniMergeBuffer->mergedListStart[i] = NULL;
		}


		//
		// sanity check for all
		//
		// test it. first key is 12 bytes.
		if ( psize && Posdb::getKeySize(plist) != 12 ) {
			log(LOG_ERROR,"%s:%s:%d: psize=%" PRId32 "", __FILE__, __func__, __LINE__, psize);
			gbshutdownAbort(true);
		}

		// next key is 6
		if ( psize > 12 && Posdb::getKeySize(plist+12) != 6) {
			log(LOG_ERROR,"%s:%s:%d: next key size=%" PRId32 "", __FILE__, __func__, __LINE__, Posdb::getKeySize(plist+12));
			gbshutdownAbort(true);
		}
	}


	logTrace(g_conf.m_logTracePosdb, "END.");
}


//
// Nested for loops to score the term pairs.
//
// Store best scores into the scoreMatrix so the sliding window
// algorithm can use them from there to do sub-outs
//
void PosdbTable::createNonBodyTermPairScoreMatrix(const MiniMergeBuffer *miniMergeBuffer, PairScoreMatrix *scoreMatrix) {
	logTrace(g_conf.m_logTracePosdb, "BEGIN");

	// scan over each query term (its synonyms are part of the
	// QueryTermInfo)
	for(int32_t i=0; i < m_numQueryTermInfos; i++) {
		// skip if not part of score
		if ( m_bflags[i] & (BF_PIPED|BF_NEGATIVE) ) {
			continue;
		}

		// and pair it with each other possible query term
		for ( int32_t j = i+1 ; j < m_numQueryTermInfos ; j++ ) {
			// skip if not part of score
			if ( m_bflags[j] & (BF_PIPED|BF_NEGATIVE) ) {
				continue;
			}

			// but if they are in the same wikipedia phrase
			// then try to keep their positions as in the query.
			// so for 'time enough for love' ideally we want
			// 'time' to be 6 units apart from 'love'
			int32_t qdist;
			float score;
			// zero means not in a phrase
			if ( m_wikiPhraseIds[j] == m_wikiPhraseIds[i] && m_wikiPhraseIds[j] ) {
				// . the distance between the terms in the query
				// . ideally we'd like this distance to be reflected
				//   in the matched terms in the body
				qdist = m_qpos[j] - m_qpos[i];
				// wiki weight
				score = (float)WIKI_WEIGHT; // .50;
			}
			else {
				// basically try to get query words as close
				// together as possible
				qdist = 2;
				// this should help fix
				// 'what is an unsecured loan' so we are more likely
				// to get the page that has that exact phrase in it.
				// yes, but hurts how to make a lock pick set.
				//qdist = qpos[j] - qpos[i];
				// wiki weight
				score = 1.0;
			}

			float maxnbtp;
			//
			// get score for term pair from non-body occuring terms
			//
			if ( miniMergeBuffer->mergedListStart[i] && miniMergeBuffer->mergedListStart[j] ) {
				maxnbtp = getMaxScoreForNonBodyTermPair(miniMergeBuffer, i, j, qdist);
			}
			else {
				maxnbtp = -1;
			}

			// it's -1 if one term is in the body/header/menu/etc.
			if ( maxnbtp < 0 ) {
				score = -1.00;
			}
			else {
				score *= maxnbtp;
			}

			// store in matrix for "sub out" algo below
			// when doing sliding window
			scoreMatrix->set(j,i, score);
		}
	}
	logTrace(g_conf.m_logTracePosdb, "END");
}


//
// Finds the highest single term score sum.
// Creates array of highest scoring non-body positions
//
float PosdbTable::getMinSingleTermScoreSum(const MiniMergeBuffer *miniMergeBuffer, std::vector<const char *> &highestScoringNonBodyPos, DocIdScore *pdcs) {
	float minSingleScore = 999999999.0;
	bool mergedListFound = false;
	bool allSpecialTerms = true;
	bool scoredTerm = false;

	logTrace(g_conf.m_logTracePosdb, "BEGIN");

	// Now add single word scores.
	//
	// Having inlink text from siterank 15 of max
	// diversity/density will result in the largest score,
	// but we add them all up...
	//
	// This should be highly negative if singles[i] has a '-'
	// termsign...


	for ( int32_t i = 0 ; i < m_numQueryTermInfos ; i++ ) {
		if ( ! miniMergeBuffer->mergedListStart[i] ) {
			continue;
		}
		mergedListFound = true;

		// skip if to the left of a pipe operator
		if( m_bflags[i] & (BF_PIPED|BF_NEGATIVE) ) {
			continue;
		}

		allSpecialTerms = false;

		// sometimes there is no wordpos subtermlist for this docid
		// because it just has the bigram, like "streetlight" and not
		// the word "light" by itself for the query 'street light'
		//if ( miniMergeBuffer->mergedListStart[i] ) {
		// assume all word positions are in body
		//highestScoringNonBodyPos[i] = NULL;

		// This scans all word positions for this term.
		//
		// This should ignore occurences in the body and only
		// focus on inlink text, etc.
		//
		// Sets "highestScoringNonBodyPos" to point to the winning word
		// position which does NOT occur in the body.
		//
		// Adds up MAX_TOP top scores and returns that sum.
		//
		// pdcs is NULL if not currPassNum == INTERSECT_DEBUG_INFO
		float sts = getBestScoreSumForSingleTerm(miniMergeBuffer, i, pdcs, &(highestScoringNonBodyPos[i]));
		scoredTerm = true;
		//logTrace(g_conf.m_logTracePosdb, "i=%d sts=%f", i,sts);

		// sanity check
		if ( highestScoringNonBodyPos[i] && s_inBody[Posdb::getHashGroup(highestScoringNonBodyPos[i])] ) {
			gbshutdownAbort(true);
		}

		//sts /= 3.0;
		if ( sts < minSingleScore ) {
			minSingleScore = sts;
		}
	}

	if( !mergedListFound || (!scoredTerm && !allSpecialTerms) ) {
		// Fix default value if no single terms were scored, and all terms are not special (e.g. numbers).
		// This returns -1 for documents matching bigrams only, and not single terms. Can happen when searching
		// for "bridget jones" and a document has the text "bridgetjon es" as the only match (bigram).
		//
		// If terms are numbers, do NOT return -1, otherwise gbsortbyint queries do not work.
		minSingleScore = -1;
	}

	logTrace(g_conf.m_logTracePosdb, "END. minSingleScore=%f", minSingleScore);
	return minSingleScore;
}


// Like getTermPairScore, but uses the word positions currently pointed to by ptrs[i].
// Does NOT scan the word position lists.
// Also tries to sub-out each term with the title or linktext wordpos term
//   pointed to by "highestScoringNonBodyPos[i]".
//
// OUTPUT:
//   bestMinTermPairWindowScore: The best minimum window score
//   bestMinTermPairWindowPtrs : Pointers to query term positions giving the best minimum score
//
void PosdbTable::findMinTermPairScoreInWindow(const MiniMergeBuffer *miniMergeBuffer, const std::vector<const char *> &ptrs, std::vector<const char *> *bestMinTermPairWindowPtrs, float *bestMinTermPairWindowScore, const std::vector<const char *> &highestScoringNonBodyPos, const PairScoreMatrix &scoreMatrix) {
	float minTermPairScoreInWindow = 999999999.0;
	bool mergedListFound = false;
	bool allSpecialTerms = true;
	bool scoredTerms = false;

	logTrace(g_conf.m_logTracePosdb, "BEGIN.");

	// TODO: only do this loop on the (i,j) pairs where i or j
	// is the term whose position got advanced in the sliding window.

	for ( int32_t i = 0 ; i < m_numQueryTermInfos; i++ ) {
		// skip if to the left of a pipe operator
		if ( m_bflags[i] & (BF_PIPED|BF_NEGATIVE) ) {
			continue;
		}
		allSpecialTerms = false;

		// skip empty list
		if( !ptrs[i] ) {
			continue;
		}
		mergedListFound = true;

		//if ( ptrs[i] ) wpi = ptrs[i];
		// if term does not occur in body, sub-in the best term
		// from the title/linktext/etc.
		//else           wpi = highestScoringNonBodyPos[i];

		const char *wpi = ptrs[i];

		// loop over other terms
		for(int32_t j = i + 1; j < m_numQueryTermInfos; j++) {
			// skip if to the left of a pipe operator
			if ( m_bflags[j] & (BF_PIPED|BF_NEGATIVE) ) {
				continue;
			}

			// skip empty list
			if( !ptrs[j] ) {
				continue;
			}


			// TODO: use a cache using wpi/wpj as the key.
			//if ( ptrs[j] ) wpj = ptrs[j];
			// if term does not occur in body, sub-in the best term
			// from the title/linktext/etc.
			//else wpj = highestScoringNonBodyPos[j];

			const char *wpj = ptrs[j];
			int32_t qdist;
			float wikiWeight;

			// in same wikipedia phrase?
			if ( m_wikiPhraseIds[j] == m_wikiPhraseIds[i] &&
			     // zero means not in a phrase
			     m_wikiPhraseIds[j] ) {
				// try to get dist that matches qdist exactly
				qdist = m_qpos[j] - m_qpos[i];
				// wiki weight
				wikiWeight = WIKI_WEIGHT; // .50;
			}
			else {
				// basically try to get query words as close
				// together as possible
				qdist = 2;
				// fix 'what is an unsecured loan' to get the
				// exact phrase with higher score
				//m_qdist = m_qpos[j] - m_qpos[i];
				// wiki weight
				wikiWeight = 1.0;
			}

			// this will be -1 if wpi or wpj is NULL
			float max = getScoreForTermPair(miniMergeBuffer, wpi, wpj, 0, qdist);
			scoredTerms = true;

			// try sub-ing in the best title occurence or best
			// inlink text occurence. cuz if the term is in the title
			// but these two terms are really far apart, we should
			// get a better score
			float score = getScoreForTermPair(miniMergeBuffer, highestScoringNonBodyPos[i], wpj, FIXED_DISTANCE, qdist);
			max = gbmax(max,score);

			// a double pair sub should be covered in the
			// getMaxScoreForNonBodyTermPair() function
			score = getScoreForTermPair(miniMergeBuffer, highestScoringNonBodyPos[i], highestScoringNonBodyPos[j], FIXED_DISTANCE, qdist);
			max = gbmax(max,score);

			score = getScoreForTermPair(miniMergeBuffer, wpi, highestScoringNonBodyPos[j], FIXED_DISTANCE, qdist);
			max = gbmax(max,score);

			// wikipedia phrase weight
			if ( !almostEqualFloat(wikiWeight, 1.0) ) {
				max *= wikiWeight;
			}

			// term freqweight here
			const int queryTermIndex1 = miniMergeBuffer->getTermIndexForBufferPos(wpi);
			const float termFreqWeight1 = m_q->m_qterms[queryTermIndex1].m_termFreqWeight;
			max *= termFreqWeight1;
			const int queryTermIndex2 = miniMergeBuffer->getTermIndexForBufferPos(wpj);
			const float termFreqWeight2 = m_q->m_qterms[queryTermIndex2].m_termFreqWeight;
			max *= termFreqWeight2;
			//TODO: shouldn't we multiply with userweight here too?

			// use score from scoreMatrix if bigger
			max = gbmax(max,scoreMatrix.get(j,i));


			// in same quoted phrase?
			if ( m_quotedStartIds[j] >= 0 && m_quotedStartIds[j] == m_quotedStartIds[i] ) {
				// no subouts allowed i guess
				if( !wpi ) {
					max = -1.0;
				}
				else
				if( !wpj ) {
					max = -1.0;
				}
				else {
					int32_t qdist = m_qpos[j] - m_qpos[i];
					int32_t p1 = Posdb::getWordPos ( wpi );
					int32_t p2 = Posdb::getWordPos ( wpj );
					int32_t  dist = p2 - p1;

					// must be in right order!
					if(dist < 0) {
						max = -1.0;
					}
					// allow for a discrepancy of 1 unit in case
					// there is a comma? i think we add an extra
					// unit
					else
					if(dist > qdist && dist - qdist > 1) {
						max = -1.0;
						//log("ddd1: i=%" PRId32" j=%" PRId32" "
						//    "dist=%" PRId32" qdist=%" PRId32,
						//    i,j,dist,qdist);
					}
					else
					if(dist < qdist && qdist - dist > 1) {
						max = -1.0;
					}
				}
			}

			// now we want the sliding window with the largest min
			// term pair score!
			minTermPairScoreInWindow = gbmin(max,minTermPairScoreInWindow);
		}
	}

	if( !mergedListFound || (!scoredTerms && !allSpecialTerms) ) {
		// Similar fix as in getMinSingleTermScoreSum, but should not happen in this function ...
		minTermPairScoreInWindow = -1;
	}

	logTrace(g_conf.m_logTracePosdb, "minTermPairScoreInWindow=%f, bestMinTermPairWindowScore=%f", minTermPairScoreInWindow, *bestMinTermPairWindowScore);

	// Our best minimum score better than current best minimum score?
	if ( minTermPairScoreInWindow <= *bestMinTermPairWindowScore ) {
		logTrace(g_conf.m_logTracePosdb, "END.");
		return;
	}

	// Yep, our best minimum score is the highest so far
	*bestMinTermPairWindowScore = minTermPairScoreInWindow;

	// Record term positions in winning window
	for(int32_t i=0; i < m_numQueryTermInfos; i++) {
		if(ptrs[i]!=NULL)
			(*bestMinTermPairWindowPtrs)[i] = ptrs[i];
	}

	logTrace(g_conf.m_logTracePosdb, "END.");
}


// Find first entry that is a body entry (determined by 's_inBody' array).
// Returns NULL if none found
static const char *findFirstBodyPosdbEntry(const char *listStart, const char *listEnd) {
	for(const char *p = listStart; p<listEnd; ) {
		unsigned char hg = Posdb::getHashGroup(p);
		if(s_inBody[hg])
			return p;
		if(*p & 0x04)
			p += 6;
		else
			p += 12;
	}
	return NULL;
}

// Find next entry that is a body entry (determined by 's_inBody' array).
// Returns NULL if none found
static const char *findNextBodyPosdbEntry(const char *p, const char *listEnd) {
	if(*p & 0x04)
		p += 6;
	else
		p += 12;
	return findFirstBodyPosdbEntry(p,listEnd);
}


float PosdbTable::getMinTermPairScoreSlidingWindow(const MiniMergeBuffer *miniMergeBuffer, const std::vector<const char *> &highestScoringNonBodyPos, std::vector<const char *> &bestMinTermPairWindowPtrs, std::vector<const char *> &xpos, const PairScoreMatrix &scoreMatrix, DocIdScore *pdcs) {
	logTrace(g_conf.m_logTracePosdb, "Sliding Window algorithm begins");

	//bestMinTermPairWindowPtrs is just a buffer allocated ones by caller
	for(int i=0; i<m_numQueryTermInfos; i++)
		bestMinTermPairWindowPtrs[i] = NULL;

	// Scan the terms that are in the body in a sliding window
	//
	// Compute the term pair score on just the terms in that
	// sliding window. that way, the term for a word like 'dog'
	// keeps the same word position when it is paired up with the
	// other terms.
	//
	// Compute the score the same way getTermPairScore() works so
	// we are on the same playing field
	//
	// Sub-out each term with its best scoring occurence in the title
	// or link text or meta tag, etc. but it gets a distance penalty
	// of like 100 units or so.
	//
	// If term does not occur in the body, the sub-out approach should
	// fix that.
	//
	// Keep a matrix of the best scores between two terms from the
	// above double nested loop algo, and replace that pair if we
	// got a better score in the sliding window.

	// use special ptrs for the windows so we do not mangle
	// miniMergeBuffer->mergedListStart[] array because we use that below!

	//
	// init each list ptr to the first wordpos rec in the body
	// for THIS DOCID and if no such rec, make it NULL
	//
	int numNonNullLists = 0;
	for(int i = 0; i < m_numQueryTermInfos; i++) {
		if(m_bflags[i] & (BF_PIPED|BF_NEGATIVE)) {
			// not a ranking term
			xpos[i] = NULL;
		} else if(miniMergeBuffer->mergedListStart[i]==NULL) {
			//no list
			xpos[i] = NULL;
		} else {
			xpos[i] = findFirstBodyPosdbEntry(miniMergeBuffer->mergedListStart[i], miniMergeBuffer->mergedListEnd[i]);
			if(xpos[i]) {
				numNonNullLists++;
			} else if(!highestScoringNonBodyPos[i]) {
				// not in nody, not in title?
				gbshutdownLogicError();
			}
		}
	}


	logTrace(g_conf.m_logTracePosdb, "Run sliding window algo? %s", numNonNullLists>0 ? "yes" : "no, no matches found in body");

	float bestMinTermPairWindowScore = -2.0;

	//todo: does it make any sense to iterate after one list has been exhausted? findMinTermPairScoreInWindow() is not going to find
	//any lower pair-score because it skips the now-null list.
	while(numNonNullLists>0) {
		//
		// Now all xpos point to positions in the document body. Calc the "window" score (score
		// for current term positions).
		//
		// If window score beats bestMinTermPairWindowScore we store the term xpos pointers
		// that define this window in the bestMinTermPairWindowPtrs[] array.
		//
		// Will try to substitute either of the two term positions with highestScoringNonBodyPos[i]
		// if better, but will fix the distance to FIXED_DISTANCE to give a distance penalty.
		//
		// "scoreMatrix" contains the highest scoring non-body term pair score, which will
		// be used if higher than the calculated score for the terms.
		//
		// Sets m_bestMinTermPairWindowScore and bestMinTermPairWindowPtrs if this window score beats it.
		//
		findMinTermPairScoreInWindow(miniMergeBuffer, xpos, &bestMinTermPairWindowPtrs, &bestMinTermPairWindowScore, highestScoringNonBodyPos, scoreMatrix);

		bool advanceMin = true;

		while(advanceMin && numNonNullLists>0) {
			advanceMin = false;

			//
			// Find the minimum word position in the document for ANY of the query terms.
			// minPosTermIdx will contain the term index, minPos the position.
			//
			int32_t minPosTermIdx = -1;
			int32_t minPos = 0;
			for ( int32_t x = 0 ; x < m_numQueryTermInfos ; x++ ) {
				if(xpos[x]!=NULL && (minPosTermIdx == -1 || Posdb::getWordPos(xpos[x]) < minPos)) {
					minPosTermIdx = x;
					minPos = Posdb::getWordPos(xpos[x]);
				}
			}
			// sanity
			if ( minPosTermIdx < 0 ) {
				gbshutdownLogicError();
			}

			xpos[minPosTermIdx] = findNextBodyPosdbEntry(xpos[minPosTermIdx],miniMergeBuffer->mergedListEnd[minPosTermIdx]);
			if(xpos[minPosTermIdx]==NULL) { //we exhausted the list
				numNonNullLists--;
				if(numNonNullLists>0)
					advanceMin = true; //just advance the next list, if any
			}
		}
	}


	float minPairScore = -1.0;
	logTrace(g_conf.m_logTracePosdb, "Zak algorithm begins");

	for(int32_t i=0; i < m_numQueryTermInfos; i++) {
		// skip if to the left of a pipe operator
		if ( m_bflags[i] & (BF_PIPED|BF_NEGATIVE) ) {
			continue;
		}

		if ( ! miniMergeBuffer->mergedListStart[i] ) {
			continue;
		}


		for ( int32_t j = i+1 ; j < m_numQueryTermInfos ; j++ ) {
			// skip if to the left of a pipe operator
			if ( m_bflags[j] & (BF_PIPED|BF_NEGATIVE) ) {
				continue;
			}

			if ( ! miniMergeBuffer->mergedListStart[j] ) {
				continue;
			}
			// . this limits its scoring to the winning sliding window
			//   as far as the in-body terms are concerned
			// . it will do sub-outs using the score matrix
			// . this will skip over body terms that are not
			//   in the winning window defined by bestMinTermPairWindowPtrs[]
			//   that we set in findMinTermPairScoreInWindow()
			// . returns the best score for this term
			float tpscore = getTermPairScoreForAny(miniMergeBuffer, i, j, bestMinTermPairWindowPtrs, pdcs);

			// get min of all term pair scores
			if ( tpscore >= minPairScore && minPairScore >= 0.0 ) {
				continue;
			}

			// got a new min
			logTrace(g_conf.m_logTracePosdb, "i=%d, tpscore=%f", i, tpscore);
			minPairScore = tpscore;
		}
	}

	logTrace(g_conf.m_logTracePosdb, "Zak algorithm ends. minPairScore=%f", minPairScore);
	return minPairScore;
}


//simple wrapper around intersectLists_real() just for transforming std::bad_alloca exceptions in ENOMEM
void PosdbTable::intersectLists() {
	try {
		intersectLists_real();
	} catch(std::bad_alloc&) {
		log(LOG_ERROR,"posdb: caught std::bad_alloc - out of memory");
		if(g_errno==0)
			g_errno = ENOMEM;
	}
}


// . compare the output of this to intersectLists9_r()
// . hopefully this will be easier to understand and faster
// . IDEAS:
//   we could also note that if a term was not in the title or
//   inlink text it could never beat the 10th score.
void PosdbTable::intersectLists_real() {
	logTrace(g_conf.m_logTracePosdb, "BEGIN. numTerms: %" PRId32, m_q->m_numTerms);

	if(m_topTree->getNumNodes()==0 && !allocateTopTree()) {
		logTrace(g_conf.m_logTracePosdb, "END. could not allocate toptree");
		g_errno = ENOMEM;
		return;
	}
	if(m_topTree->getNumNodes()==0) {
		logTrace(g_conf.m_logTracePosdb, "END. toptree has zero size");
		return;
	}

	if(!allocateScoringInfo()) {
		logTrace(g_conf.m_logTracePosdb, "END. could not allocate scoring info");
		g_errno = ENOMEM;
		return;
	}

	if(!allocWhiteListTable()) {
		logTrace(g_conf.m_logTracePosdb, "END. could not allocate whitelist table");
		return;
	}
	prepareWhiteListTable();

	if(!setQueryTermInfo()) {
		logTrace(g_conf.m_logTracePosdb, "END. could not allocate query term info");
		return;
	}

	initWeights();
	// assume no-op
	m_t1 = 0LL;

	// set start time
	int64_t t1 = gettimeofdayInMilliseconds();

	// assume we return early
	m_addListsTime = 0;


	if( !findCandidateDocIds() ) {
		logTrace(g_conf.m_logTracePosdb, "END. Found no candidate docids");
		return;
	}

	//
	// The vote buffer now contains the matching docids and each term sublist
	// has been adjusted to only contain these docids as well. Let the fun begin.
	//


	//
	// TRANSFORM QueryTermInfo::m_* vars into old style arrays

	m_wikiPhraseIds.resize(m_numQueryTermInfos);
	m_quotedStartIds.resize(m_numQueryTermInfos);
	m_qpos.resize(m_numQueryTermInfos);
	m_qtermNums.resize(m_numQueryTermInfos);
	m_bflags.resize(m_numQueryTermInfos);
	std::vector<const char *> highestScoringNonBodyPos(m_numQueryTermInfos);
	std::vector<const char *> bestMinTermPairWindowPtrs(m_numQueryTermInfos);
	std::vector<const char *> xpos(m_numQueryTermInfos);
	PairScoreMatrix           scoreMatrix(m_numQueryTermInfos);

	int64_t lastTime = gettimeofdayInMilliseconds();
	int64_t now;
	int64_t took;
	int32_t phase = 3; // 2 first in findCandidateDocIds


	for ( int32_t i = 0 ; i < m_numQueryTermInfos ; i++ ) {
		// get it
		QueryTermInfo &qti = m_queryTermInfos[i];
		// set it
		m_wikiPhraseIds [i] = qti.m_wikiPhraseId;
		m_quotedStartIds[i] = qti.m_quotedStartId;
		// query term position
		m_qpos          [i] = qti.m_qpos;
		m_qtermNums     [i] = qti.m_qtermNum;
	}


	//////////
	//
	// OLD MAIN INTERSECTION LOGIC
	//
	/////////

	DocIdScore dcs;
	DocIdScore *pdcs = NULL;
	uint64_t lastDocId = 0LL;
	int32_t lastLen = 0;
	char siteRank;
	int highestInlinkSiteRank;
	lang_t docLang;
	float score;
	int32_t intScore = 0;
	float minScore = 0.0;
	float minSingleScore;
	// scan the posdb keys in the smallest list
	// raised from 200 to 300,000 for 'da da da' query
	MiniMergeBuffer miniMergeBuf(m_numQueryTermInfos);
	const char *docIdPtr;
	char *docIdEnd = m_docIdVoteBuf.getBufStart()+m_docIdVoteBuf.length();
	float minWinningScore = -1.0;
	int32_t topCursor = -9;
	int32_t numProcessed = 0;
	int32_t prefiltMaxPossScoreFail 		= 0;
	int32_t prefiltMaxPossScorePass 		= 0;
	int32_t prefiltBestDistMaxPossScoreFail = 0;
	int32_t prefiltBestDistMaxPossScorePass	= 0;


	// populate the cursors for each sublist

	int32_t numQueryTermsToHandle = m_numQueryTermInfos;
	if ( ! m_msg39req->m_doMaxScoreAlgo ) {
		numQueryTermsToHandle = 0;
	}


 	//
 	// Run through the scoring logic once or twice. Two passes needed ONLY if we
 	// need to create additional (debug) scoring info for visual display.
 	//
 	int numPassesNeeded = m_msg39req->m_getDocIdScoringInfo ? 2 : 1;

	for(int currPassNum=0; currPassNum < numPassesNeeded; currPassNum++) {
		//
		// Pass 0: Find candidate docids and calculate scores
		// Pass 1: Only for creating debug scoring info for visual display
		//
		switch( currPassNum ) {
			case INTERSECT_SCORING:
				logTrace(g_conf.m_logTracePosdb, "Loop 0: Real scoring");
				break;
			case INTERSECT_DEBUG_INFO:
				logTrace(g_conf.m_logTracePosdb, "Loop 1: Data for visual scoring info");
				removeScoreInfoForDeletedDocIds();
				break;
			default:
				log(LOG_LOGIC,"%s:%d: Illegal pass number %d", __FILE__, __LINE__, currPassNum);
				gbshutdownLogicError();
		}

		// reset docid to start!
		docIdPtr = m_docIdVoteBuf.getBufStart();

		if( currPassNum == INTERSECT_DEBUG_INFO ) {
			//
			// reset QueryTermInfo::m_matchingSubListCursor[] to point to start of
			// term lists for second pass that gets printable scoring info
			//
			for ( int32_t i = 0 ; i < m_numQueryTermInfos ; i++ ) {
				// get it
				QueryTermInfo &qti = m_queryTermInfos[i];
				// skip negative termlists
				if ( qti.m_numSubLists>0 && qti.m_subList[0].m_bigramFlag & BF_NEGATIVE ) {
					continue;
				}

				// do each sublist
				for ( int32_t j = 0 ; j < qti.m_numMatchingSubLists ; j++ ) {
					qti.m_matchingSublist[j].m_cursor      = qti.m_matchingSublist[j].m_start;
					qti.m_matchingSublist[j].m_savedCursor = qti.m_matchingSublist[j].m_start;
				}
			}
		}


		//#
		//# MAIN LOOP for looping over each docid
		//#

		bool allDone = false;
		while( !allDone && docIdPtr < docIdEnd ) {
//			logTrace(g_conf.m_logTracePosdb, "Handling next docId");

			bool skipToNextDocId = false;
			siteRank				= 0;
			docLang					= langUnknown;
			highestInlinkSiteRank 	= -1;
			bool docInThisFile;

			if ( currPassNum == INTERSECT_SCORING ) {
				m_docId = *(uint32_t *)(docIdPtr+1);
				m_docId <<= 8;
				m_docId |= (unsigned char)docIdPtr[0];
				m_docId >>= 2;
				docInThisFile = m_documentIndexChecker->exists(m_docId);
			}
			else {
				//
				// second pass? for printing out transparency info.
				// genDebugScoreInfo1 sets m_docId from the top scorer tree
				//
				if( genDebugScoreInfo1(&numProcessed, &topCursor, &docInThisFile) ) {
					logTrace(g_conf.m_logTracePosdb, "Pass #%d for file %" PRId32 " done", currPassNum, m_documentIndexChecker->getFileNum());

					// returns true if no more docids to handle
					allDone = true;
					break;	// break out of docIdPtr < docIdEnd loop
				}
			}

			//bool docInThisFile = m_documentIndexChecker->exists(m_docId);
			logTrace(g_conf.m_logTracePosdb, "Handling next docId: %" PRId64 " - pass #%d - %sfound in this file (%" PRId32 ")", m_docId, currPassNum, docInThisFile?"":"SKIPPING, not ", m_documentIndexChecker->getFileNum());

			if(!docInThisFile) {
				// Only advance cursors in first pass
				if( currPassNum == INTERSECT_SCORING ) {
					if( !advanceTermListCursors(docIdPtr) ) {
						logTrace(g_conf.m_logTracePosdb, "END. advanceTermListCursors failed");
						return;
					}
					docIdPtr += 6;
				}

				continue;
			}

			//calculate complete score multiplier
			float completeScoreMultiplier = 1.0;
			unsigned flags = 0;
			if(g_d2fasm.lookupFlags(m_docId,&flags) && flags) {
				for(int i=0; i<26; i++) {
					if(flags&(1<<i))
						completeScoreMultiplier *= m_baseScoringParameters.m_flagScoreMultiplier[i];
				}
			}


			if( currPassNum == INTERSECT_SCORING ) {
				//
				// Pre-advance each termlist's cursor to skip to next docid.
				//
				if( !advanceTermListCursors(docIdPtr) ) {
					logTrace(g_conf.m_logTracePosdb, "END. advanceTermListCursors failed");
					return;
				}

				if( !m_q->m_isBoolean ) {
					//##
					//## PRE-FILTERS. Discard DocIDs that cannot meet the minimum required
					//## score, before entering the main scoring loop below
					//##

					//
					// TODO: consider skipping this pre-filter if it sucks, as it does
					// for 'time enough for love'. it might save time!
					//
					// Calculate maximum possible score for a document. If the max score
					// is lower than the current minimum winning score, give up already
					// now and skip to the next docid.
					//
					// minWinningScore is the score of the lowest scoring doc in m_topTree IF
					// the topTree contains the number of docs requested. Otherwise it is -1
					// and the doc is inserted no matter the score.
					//
					// Only go through this if we actually have a minimum score to compare with ...
					// No need if minWinningScore is still -1.
					//
					if ( minWinningScore >= 0.0 ) {
						logTrace(g_conf.m_logTracePosdb, "Compute 'upper bound' for each query term");

						// If there's no way we can break into the winner's circle, give up!
						// This computes an upper bound for each query term
						for ( int32_t i = 0 ; i < numQueryTermsToHandle ; i++ ) {
							// skip negative termlists.
							if ( m_queryTermInfos[i].m_subList[0].m_bigramFlag & (BF_NEGATIVE) ) {
								continue;
							}

							// an upper bound on the score we could get
							float maxScore = getMaxPossibleScore(&(m_queryTermInfos[i]));
							// -1 means it has inlink text so do not apply this constraint
							// to this docid because it is too difficult because we
							// sum up the inlink text
							if ( maxScore < 0.0 ) {
								continue;
							}

							maxScore *= completeScoreMultiplier;
							// logTrace(g_conf.m_logTracePosdb, "maxScore=%f  minWinningScore=%f", maxScore, minWinningScore);
							// if any one of these terms have a max score below the
							// worst score of the 10th result, then it can not win.
							if ( maxScore <= minWinningScore ) {
								docIdPtr += 6;
								prefiltMaxPossScoreFail++;
								skipToNextDocId = true;
								break;	// break out of numQueryTermsToHandle loop
							}
						}
					}

					if( skipToNextDocId ) {
						// continue docIdPtr < docIdEnd loop
						logTrace(g_conf.m_logTracePosdb, "Max possible score for docId %" PRId64 " too low, skipping to next", m_docId);
						continue;
					}

					prefiltMaxPossScorePass++;

					if ( minWinningScore >= 0.0 ) {

						if( !prefilterMaxPossibleScoreByDistance(minWinningScore*completeScoreMultiplier) ) {
							docIdPtr += 6;
							prefiltBestDistMaxPossScoreFail++;
							skipToNextDocId = true;
						}
					}

					if( skipToNextDocId ) {
						// Continue docIdPtr < docIdEnd loop
						logTrace(g_conf.m_logTracePosdb, "Max possible score by distance for docId %" PRId64 " too low, skipping to next", m_docId);
						continue;
					}
					prefiltBestDistMaxPossScorePass++;
				} // !m_q->m_isBoolean
			}	// currPassNum == INTERSECT_SCORING

			if ( m_q->m_isBoolean ) {
				// add one point for each term matched in the bool query
				// this is really just for when the terms are from different
				// fields. if we have unfielded boolean terms we should
				// do proximity matching.
				int32_t slot = m_bt.getSlot ( &m_docId );
				if ( slot >= 0 ) {
					uint8_t *bv = (uint8_t *)m_bt.getValueFromSlot(slot);
					// then a score based on the # of terms that matched
					int16_t bitsOn = getNumBitsOnX ( bv, m_vecSize );
					// but store in hashtable now
					minScore = (float)bitsOn;
				}
				else {
					minScore = 1.0;
				}
			}


			//##
			//## PERFORMANCE HACK: ON-DEMAND MINI MERGES.
			//##
			//## Data for the current DocID found in sublists of each query term
			//## is merged into a single list, so we end up with one list per query
			//## term. They are all stored in a single buffer (miniMergeBuf), which
			//## the miniMerged* pointers point into..
			//##

			mergeTermSubListsForDocId(&miniMergeBuf, &highestInlinkSiteRank);

			// clear the counts on this DocIdScore class for this new docid
			pdcs = NULL;
			if ( currPassNum == INTERSECT_DEBUG_INFO ) {
				dcs.reset();
				pdcs = &dcs;
			}

			//##
			//## ACTUAL SCORING BEGINS
			//##

			if ( !m_q->m_isBoolean ) {
				//#
				//# NON-BODY TERM PAIR SCORING LOOP
				//#
				createNonBodyTermPairScoreMatrix(&miniMergeBuf, &scoreMatrix);


				//#
				//# SINGLE TERM SCORE LOOP
				//#
				minSingleScore = getMinSingleTermScoreSum(&miniMergeBuf, highestScoringNonBodyPos, pdcs);
				logTrace(g_conf.m_logTracePosdb, "minSingleScore=%f before multiplication for docId %" PRIu64 "", minSingleScore, m_docId);

				minSingleScore *= completeScoreMultiplier;

				//#
				//# DOCID / SITERANK DETECTION
				//#
				for(int32_t k=0; k < m_numQueryTermInfos; k++) {
					if ( ! miniMergeBuf.mergedListStart[k] ) {
						continue;
					}

					siteRank = Posdb::getSiteRank ( miniMergeBuf.mergedListStart[k] );
					docLang  = Posdb::getLangId(miniMergeBuf.mergedListStart[k]);
					break;
				}
				logTrace(g_conf.m_logTracePosdb, "Got siteRank %d and docLang %d", (int)siteRank, (int)docLang);

				//#
				//# SLIDING WINDOW SCORING ALGORITHM
				//#
				// After calling this, bestMinTermPairWindowPtrs will point to the
				// term positions set ("window") that has the highest minimum score. These
				// pointers are used when determining the minimum term pair score returned
				// by the function.
				float minPairScore = getMinTermPairScoreSlidingWindow(&miniMergeBuf, highestScoringNonBodyPos, bestMinTermPairWindowPtrs, xpos, scoreMatrix, pdcs);
				logTrace(g_conf.m_logTracePosdb, "minPairScore=%f before multiplication for docId %" PRIu64 "", minPairScore, m_docId);

				minPairScore *= completeScoreMultiplier;

				//#
				//# Find minimum score - either single term or term pair
				//#
				minScore = 999999999.0;
				// get a min score from all the term pairs
				if ( minPairScore < minScore && minPairScore >= 0.0 ) {
					minScore = minPairScore;
				}

				// if we only had one query term
				if ( minSingleScore < minScore ) {
					minScore = minSingleScore;
				}
				logTrace(g_conf.m_logTracePosdb, "minPairScore=%f, minScore=%f for docId %" PRIu64 "", minPairScore, minScore, m_docId);


				// No positive score? Then skip the doc
				if ( minScore <= 0.0 ) {

					if( currPassNum == INTERSECT_SCORING ) {
						// advance to next docid
						docIdPtr += 6;
					}

					logTrace(g_conf.m_logTracePosdb, "Skipping docid %" PRIu64 " - no positive score", m_docId);
					// Continue docid loop
					continue;
				}
			} // !m_q->m_isBoolean

			//#
			//# Calculate score and give boost based on siterank and highest inlinking siterank
			//#
			float adjustedSiteRank = siteRank;

			if( highestInlinkSiteRank > siteRank ) {
				//adjust effective siterank because a high-rank site linked to it. Don't adjust it too much though.
				adjustedSiteRank = siteRank + (highestInlinkSiteRank-siteRank) / 3.0;
				logTrace(g_conf.m_logTracePosdb, "Highest inlink siterank %d > siterank %d. Adjusting to %f for docId %" PRIu64 "", highestInlinkSiteRank, (int)siteRank, adjustedSiteRank, m_docId);
			}
			score = minScore * (adjustedSiteRank*m_baseScoringParameters.m_siteRankMultiplier+1.0);
			logTrace(g_conf.m_logTracePosdb, "Score %f for docId %" PRIu64 "", score, m_docId);

			//#
			//# Give score boost if query and doc language is the same,
			//# and optionally a different boost if the language of the
			//# page is unknown.
			//#
			//# Use "qlang" parm to set the language. i.e. "&qlang=fr"
			//#
			score *= m_msg39req->m_baseScoringParameters.m_languageWeights[docLang];

			double page_temperature = 0;
			bool use_page_temperature = false;
			float score_before_page_temp = score;

			if(m_baseScoringParameters.m_usePageTemperatureForRanking) {
				use_page_temperature = true;
				const auto range_min = m_baseScoringParameters.m_pageTemperatureWeightMin;
				const auto range_max = m_baseScoringParameters.m_pageTemperatureWeightMax;
				uint32_t sitehash32;
				unsigned raw_default_site_page_temperature;
				if(g_pageTemperatureRegistry.query_page_temperature(m_docId, range_min, range_max, &page_temperature)) {
					//excellent, we know the page's temperature
				} else if(g_d2fasm.lookupSiteHash(m_docId,&sitehash32) && g_smptr.lookup(sitehash32,&raw_default_site_page_temperature)) {
					// we'll only use site median page temperature when we have updated docid2siteflags file
					//hmm, use the site-default page temperature
					page_temperature = g_pageTemperatureRegistry.scale_temperature(range_min, range_max, raw_default_site_page_temperature);
				} else {
					//ok, last resort, use the global default page temperature
					page_temperature = g_pageTemperatureRegistry.query_default_page_temperature(range_min, range_max);
				}
				score *= page_temperature;
				logTrace(g_conf.m_logTracePosdb, "Page temperature for docId %" PRIu64 " is %.14f, score %f -> %f", m_docId, page_temperature, score_before_page_temp, score);
			}


			//#
			//# Handle sortby int/float and minimum docid/score pairs
			//#

			if( m_hasMaxSerpScore ) {
				// assume filtered out
				if ( currPassNum == INTERSECT_SCORING ) {
					m_filtered++;
				}

				// now we have a maxscore/maxdocid upper range so the widget
				// can append only new results to an older result set.
				if ( m_hasMaxSerpScore ) {
					bool skipToNext = false;
					// if dealing with an "int" score use the extra precision
					// of the double that m_maxSerpScore is!
					if ( score > (float)m_msg39req->m_maxSerpScore ) {
						skipToNext = true;
					}
					else
					if ( almostEqualFloat(score, (float)m_msg39req->m_maxSerpScore) && (int64_t)m_docId <= m_msg39req->m_minSerpDocId ) {
						//@todo: Why is  m_msg39req->m_maxSerpScore double and score float?
						skipToNext = true;
					}

					if( skipToNext ) {
						// advance to next docid
						if( currPassNum == INTERSECT_SCORING ) {
							docIdPtr += 6;
						}
						// Continue docIdPtr < docIdEnd loop
						continue;
					}
				}

				// we did not get filtered out
				if ( currPassNum == INTERSECT_SCORING ) {
					m_filtered--;
				}
			}


			// We only come here if we actually made it into m_topTree - second round only loops through
			// TopTree entries.
			if ( currPassNum == INTERSECT_DEBUG_INFO ) {
				// NEW 20170423
				dcs.m_usePageTemperature = use_page_temperature;
				dcs.m_pageTemperature = page_temperature;
				dcs.m_adjustedSiteRank = adjustedSiteRank;

				if( genDebugScoreInfo2(&dcs, &lastLen, &lastDocId, siteRank, score, intScore, docLang) ) {
					// advance to next docid
					if( currPassNum == INTERSECT_SCORING ) {
						docIdPtr += 6;
					}
					// Continue docIdPtr < docIdEnd loop
					continue;
				}
			}


			if( currPassNum == INTERSECT_SCORING ) {

				//#
				//# SCORING DONE! Add to top-scorer tree.
				//#

				int32_t tn = m_topTree->getEmptyNode();

				// Sanity check
				if( tn < 0 ) {
					log(LOG_LOGIC,"%s:%s:%d: No space left in m_topTree", __FILE__, __func__, __LINE__);
					gbshutdownLogicError();
				}

				TopNode *t  = m_topTree->getNode(tn);

				// set the score and docid ptr
				t->m_score = score;
				t->m_docId = m_docId;
				t->m_flags = flags;
				logTrace(g_conf.m_logTracePosdb, "docid=%15ld score=%f", m_docId, score);

				//
				// This will not add if tree is full and it is less than the m_lowNode in score.
				//
				// If it does get added to a full tree, lowNode will be removed.
				//
				m_topTree->addNode(t, tn);

				// top tree only holds enough docids to satisfy the
				// m_msg39request::m_docsToGet (m_msg39req->m_docsToGet) request
				// from the searcher. It basically stores m_docsToGet
				// into TopTree::m_docsWanted. TopTree::m_docsWanted is often
				// double m_docsToGet to compensate for site clustering, and
				// it can be even more than that in order to ensure we get
				// enough domains represented in the search results.
				// See TopTree::addNode(). it will not add the "t" node if
				// its score is not high enough when the top tree is full.
				if ( m_topTree->getNumUsedNodes() > m_topTree->getNumDocsWanted() ) {
					// get the lowest scoring node
					int32_t lowNode = m_topTree->getLowNode();
					// and record its score in "minWinningScore"
					minWinningScore = m_topTree->getNode(lowNode)->m_score;
				}
			}

			// advance to next docid
			if( currPassNum == INTERSECT_SCORING ) {
				docIdPtr += 6;
			}
		} // docIdPtr < docIdEnd loop


		if ( m_debug ) {
			now = gettimeofdayInMilliseconds();
			took = now - lastTime;
			log(LOG_INFO, "posdb: new algo phase %" PRId32" took %" PRId64" ms", phase,took);
			lastTime = now;
			phase++;
		}

	} // for ... currPassNum < numPassesNeeded


	if ( m_debug ) {
		log(LOG_INFO, "posdb: # prefiltMaxPossScoreFail........: %" PRId32" ", prefiltMaxPossScoreFail );
		log(LOG_INFO, "posdb: # prefiltMaxPossScorePass........: %" PRId32" ", prefiltMaxPossScorePass );
		log(LOG_INFO, "posdb: # prefiltBestDistMaxPossScoreFail: %" PRId32" ", prefiltBestDistMaxPossScoreFail );
		log(LOG_INFO, "posdb: # prefiltBestDistMaxPossScorePass: %" PRId32" ", prefiltBestDistMaxPossScorePass );
	}

	if( g_conf.m_logTracePosdb ) {
		m_topTree->logTreeData(LOG_TRACE);
		logDebugScoreInfo(LOG_TRACE);
	}

	// get time now
	now = gettimeofdayInMilliseconds();
	// store the addLists time
	m_addListsTime = now - t1;
	m_t1 = t1;
	m_t2 = now;

	//opportunistic cleanup (memory release)
	m_wikiPhraseIds.clear();
	m_quotedStartIds.clear();
	m_qpos.clear();
	m_qtermNums.clear();
	m_bflags.clear();

	logTrace(g_conf.m_logTracePosdb, "END. Took %" PRId64" msec", m_addListsTime);
}


// . "bestDist" is closest distance to query term # m_minTermListIdx
// . set "bestDist" to 1 to ignore it
float PosdbTable::getMaxPossibleScore(const QueryTermInfo *qti) {
	logTrace(g_conf.m_logTracePosdb, "BEGIN.");

	// get max score of all sublists
	float bestHashGroupWeight = -1.0;
	float bestTermFreqWeight = -1.0;
	unsigned char bestDensityRank = 0;
	char siteRank = -1;
	bool docLangSet = false;
	lang_t docLang = langUnknown;
	unsigned char hgrp;
	bool hadHalfStopWikiBigram = false;

	// scan those sublists to set m_ptrs[] and to get the
	// max possible score of each one
	for ( int32_t j = 0 ; j < qti->m_numMatchingSubLists ; j++ ) {
		// scan backwards up to this
		const char *start = qti->m_matchingSublist[j].m_savedCursor;

		// skip if does not have our docid
		if ( ! start ) {
			continue;
		}

		// note it if any is a wiki bigram
		if ( qti->m_subList[0].m_bigramFlag & BF_HALFSTOPWIKIBIGRAM ) {
			hadHalfStopWikiBigram = true;
		}

		// skip if entire sublist/termlist is exhausted
		if ( start >= qti->m_matchingSublist[j].m_end ) {
			continue;
		}

		if ( siteRank == -1 ) {
			siteRank = Posdb::getSiteRank(start);
		}
		if ( !docLangSet ) {
			docLang = Posdb::getLangId(start);
			docLangSet = true;
		}

		// skip first key because it is 12 bytes, go right to the
		// 6 byte keys. we deal with it below.
		start += 12;
		// get cursor. this is actually pointing to the next docid
		const char *dc = qti->m_matchingSublist[j].m_cursor;
		// back up into our list
		dc -= 6;
		// reset this
		bool retried = false;

		// do not include the beginning 12 byte key in this loop!
		for ( ; dc >= start ; dc -= 6 ) {
			// loop back here for the 12 byte key
		retry:
			// get the best hash group
			hgrp = Posdb::getHashGroup(dc);

			// if not body, do not apply this algo because
			// we end up adding term pairs from each hash group
			if ( hgrp == HASHGROUP_INLINKTEXT ) {
				return -1.0;
			}

			float termFreqWeight = qti->m_subList[qti->m_matchingSublist[j].m_baseSubListIndex].m_qt->m_termFreqWeight;
			if(termFreqWeight>bestTermFreqWeight)
				bestTermFreqWeight = termFreqWeight;

			//if ( hgrp == HASHGROUP_TITLE      ) return -1.0;
			// loser?
			if ( m_derivedScoringWeights.m_hashGroupWeights[hgrp] < bestHashGroupWeight ) {
				// if in body, it's over for this termlist
				// because this is the last hash group
				// we will encounter.
				if ( hgrp == HASHGROUP_BODY ) {
					// @@@ BR: Dangerous assumption if we change indexing order in XmlDoc_Indexing ! @@@
					goto nextTermList;
				}
				// otherwise, keep chugging
				continue;
			}

			unsigned char dr = Posdb::getDensityRank(dc);

			// a clean win?
			if ( m_derivedScoringWeights.m_hashGroupWeights[hgrp] > bestHashGroupWeight ) {
				bestHashGroupWeight = m_derivedScoringWeights.m_hashGroupWeights[hgrp];
				bestDensityRank = dr;
				continue;
			}

			// better density rank?
			if ( dr > bestDensityRank ) {
				bestDensityRank = dr;
			}
		}

		// handle the beginning 12 byte key
		if ( ! retried ) {
			retried = true;
			dc = qti->m_matchingSublist[j].m_savedCursor;
			goto retry;
		}

	nextTermList:
		;

	}

	// if nothing, then maybe all sublists were empty?
	if ( bestHashGroupWeight < 0 ) {
		return 0.0;
	}

	// assume perfect adjacency and that the other term is perfect
	float score = 100.0;

	score *= bestHashGroupWeight;
	score *= bestHashGroupWeight;

	// since adjacent, 2nd term in pair will be in same sentence
	// TODO: fix this for 'qtm' it might have a better density rank and
	//       better hashgroup weight, like being in title!
	score *= m_derivedScoringWeights.m_densityWeights[bestDensityRank];
	score *= m_derivedScoringWeights.m_densityWeights[bestDensityRank];

	// wiki bigram?
	if ( hadHalfStopWikiBigram ) {
		score *= WIKI_BIGRAM_WEIGHT;
		score *= WIKI_BIGRAM_WEIGHT;
	}

	//score *= perfectWordSpamWeight * perfectWordSpamWeight;
	score *= (((float)siteRank)*m_baseScoringParameters.m_siteRankMultiplier+1.0);

	// language boost if language specified and if page is same language, or unknown language
	score *= m_msg39req->m_baseScoringParameters.m_languageWeights[docLang];

	// assume the other term we pair with will be 1.0
	score *= bestTermFreqWeight;
	score *= bestTermFreqWeight;
	//TODO: shouldn't we multiple with userweight here too?


	// terms in same wikipedia phrase?
	//if ( wikiWeight != 1.0 )
	//	score *= WIKI_WEIGHT;

	// if query is 'time enough for love' it is kinda slow because
	// we were never multiplying by WIKI_WEIGHT, even though all
	// query terms were in the same wikipedia phrase. so see if this
	// speeds it up.
	if ( m_allInSameWikiPhrase ) {
		score *= WIKI_WEIGHT;
	}

	logTrace(g_conf.m_logTracePosdb, "END. score=%f", score);
	return score;
}


float PosdbTable::modifyMaxScoreByDistance(float score,
					   int32_t bestDist,
					   int32_t qdist,
					   const QueryTermInfo *qtm)
{
	score *= qtm->m_maxMatchingTermFreqWeight;

	// subtract qdist
	bestDist -= qdist;

	// make it positive
	bestDist = abs(bestDist);

	// avoid 0 division
	if(bestDist > 1)
		score /= (float)bestDist;

	return score;
}


////////////////////
//
// "White list" functions used to find docids from only specific sites
//
////////////////////

// this is separate from allocTopTree() function below because we must
// call it for each iteration in Msg39::doDocIdSplitLoop() which is used
// to avoid reading huge termlists into memory. it breaks the huge lists
// up by smaller docid ranges and gets the search results for each docid
// range separately.
bool PosdbTable::allocWhiteListTable ( ) {
	//
	// the whitetable is for the docids in the whitelist. we have
	// to only show results whose docid is in the whitetable, which
	// is from the "&sites=abc.com+xyz.com..." custom search site list
	// provided by the user.
	//
	if ( m_msg39req->size_whiteList <= 1 ) m_useWhiteTable = false; // inclds \0
	else 		                m_useWhiteTable = true;
	int32_t sum = 0;
	for ( int32_t i = 0 ; i < m_msg2->getNumWhiteLists() ; i++ ) {
		const RdbList *list = m_msg2->getWhiteList(i);
		if(!list->isEmpty()) {
			// assume 12 bytes for all keys but first which is 18
			int32_t size = list->getListSize();
			sum += size / 12 + 1;
		}
	}
	if ( sum ) {
		// making this sum * 3 does not show a speedup... hmmm...
		int32_t numSlots = sum * 2;
		// keep it restricted to 5 byte keys so we do not have to
		// extract the docid, we can just hash the ptr to those
		// 5 bytes (which includes 1 siterank bit as the lowbit,
		// but should be ok since it should be set the same in
		// all termlists that have that docid)
		if (!m_whiteListTable.set(5, 0, numSlots, NULL, 0, false, "wtall", true)) {
			return false;
		}
	}
	return true;
}


void PosdbTable::prepareWhiteListTable()
{
	// hash the docids in the whitelist termlists into a hashtable.
	// every docid in the search results must be in there. the
	// whitelist termlists are from a provided "&sites=abc.com+xyz.com+.."
	// cgi parm. the user only wants search results returned from the
	// specified subdomains. there can be up to MAX_WHITELISTS (500)
	// sites right now. this hash table must have been pre-allocated
	// in Posdb::allocTopTree() above since we might be in a thread.
	if ( m_addedSites )
		return;

	for ( int32_t i = 0 ; i < m_msg2->getNumWhiteLists() ; i++ ) {
		RdbList *list = m_msg2->getWhiteList(i);
		if(!list->isEmpty()) {
			// sanity test
			int64_t d1 = Posdb::getDocId(list->getList());
			if ( d1 > m_msg2->docIdEnd() ) {
				log("posdb: d1=%" PRId64" > %" PRId64,
				    d1,m_msg2->docIdEnd());
				//gbshutdownAbort(true);
			}
			if ( d1 < m_msg2->docIdStart() ) {
				log("posdb: d1=%" PRId64" < %" PRId64,
				    d1,m_msg2->docIdStart());
				//gbshutdownAbort(true);
			}
			// first key is always 18 bytes cuz it has the termid
			// scan recs in the list
			for ( ; ! list->isExhausted() ; list->skipCurrentRecord() ) {
				char *rec = list->getCurrentRec();
				// point to the 5 bytes of docid
				m_whiteListTable.addKey ( rec + 7 );
			}
		}
	}

	m_addedSites = true;
}


bool PosdbTable::allocateTopTree() {
	//If all Msg2 rdblists are empty then don't do anything (there is nothing to rank or score)
	bool allEmpty = true;
	for(int i = 0; i < m_msg2->getNumLists(); i++) {
		const RdbList *list = m_msg2->getList(i);

		if(list && !list->isEmpty() && list->getListSize()>=18) {
			allEmpty = false;
			break;
		}
	}
	if(allEmpty) {
		if(m_debug)
			log(LOG_DEBUG,"toptree: all Msg2 lists are empty");
		return true;
	}

	// Normally m_msg39req->m_docsToGet is something sensible such as 10 or 50. Some specialized queries or attempts
	// at DOSing can set it to something unreasonable as 1.000.000. Internal functions such as QueryReindex sets
	// m_msg39req->m_docsToGet to 99999999 meaning "all documents".
	// We cannot protect against DOSs here, but the 99999999 value must be handled. The problem is that 99999999
	// would likely cause OOM, so we have to size the toptree to what is actually in the database. And in the case
	// the database-derived size causes OOM then we'd have to use the documentSplit functionality (which is
	// currently defunct after nomerge2 branch was merged to master. See Msg39.cpp for details).
	//
	// Strategy:
	//   - if m_msg39req->m_docsToGet is smallish then accept it. Only adjust as needed by enabled clustering.
	//   - otherwise get an estimated list size from Posdb so we can put an upper (and better) limit on
	//     m_msg39req->m_docsToGet instead of 99999999

	int32_t docsWanted;
	if(m_msg39req->m_docsToGet <= 1000) {
		// smallish, accept as-is
		docsWanted = m_msg39req->m_docsToGet;
		if(m_debug)
			log(LOG_DEBUG, "toptree: docsToGet is small (%d), docsWanted = %d", m_msg39req->m_docsToGet, docsWanted);
	} else {
		//not small. Get list size estimates and estimate an upper limit on the number of documents that could possibly match.
		//
		//we cannot calculate the estimate based on m_msg2->getList(...)->getListSize() because m_msg2 only holds the lists
		//from the current fileNumber (eg. posdb0007.dat) and we need the estimate over all the files + bucket
		int64_t totalEstimatedEntries = 0;
		for(int i=0; i<m_q->getNumTerms(); i++) {
			int64_t termId = m_q->getTermId(i);
			int64_t estimatedTermListSize = g_posdb.estimateLocalTermListSize(m_msg39req->m_collnum, termId);
			logTrace(g_conf.m_logTracePosdb, "allocateTopTree: termId=%ld, estimatedTermListSize=%ld bytes", termId, estimatedTermListSize);
			//If we have listsize=54 then due to the double compression of posdb entries it could be one
			//document with 7 entries, or four documents with one entry each
			//The latter is the worst case so we'll use that.
			int32_t estimatedEntries = estimatedTermListSize / (sizeof(posdbkey_t)-6);
			totalEstimatedEntries += estimatedEntries;
		}
		logTrace(g_conf.m_logTracePosdb, "totalEstimatedEntries=%ld", totalEstimatedEntries);
		if(m_msg39req->m_docsToGet < totalEstimatedEntries) {
			//wants less than what is in the DB. excellent
			docsWanted = m_msg39req->m_docsToGet;
		} else {
			//wants as much or more than there is in the DB. Hmmm.
			if(totalEstimatedEntries > INT32_MAX) { //32bit overflow
				log(LOG_ERROR,"toptree: estimated number of documents = %ld. Cannot squeeze that into a TopTree", totalEstimatedEntries);
				return false;
			}
			docsWanted = totalEstimatedEntries;
		}
		if(m_debug)
			log(LOG_DEBUG, "toptree: docsToGet is large (%d), docsWanted = %d", m_msg39req->m_docsToGet, docsWanted);
	}

	//Magic multiplication. Original source had no comment on why this was done. TopTree appears to already
	//take care of enlarging the allocation if clustering is enabled, so uhm... ?
	if(m_msg39req->m_doSiteClustering)
		docsWanted *= 2;

	// limit to 2B docids i guess
	docsWanted = gbmin(docsWanted,2000000000);

	if(m_debug)
		log(LOG_INFO, "toptree: toptree: initializing %d nodes",docsWanted);

	if(docsWanted < m_msg39req->m_docsToGet) {
		log("query: warning only getting up to %d docids even though %d requested because termlist sizes are smaller",
		    docsWanted, m_msg39req->m_docsToGet);
	}

	// keep it sane
	if(docsWanted > m_msg39req->m_docsToGet * 2 && docsWanted > 60) {
		docsWanted = m_msg39req->m_docsToGet * 2;
	}

	// this actually sets the # of nodes to MORE than nn!!!
	if(!m_topTree->setNumNodes(docsWanted, m_msg39req->m_doSiteClustering)) {
		log("toptree: toptree: error allocating nodes: %s",
		    mstrerror(g_errno));
		return false;
	}

	return true;
}


bool PosdbTable::allocateScoringInfo() {
	// let's use nn*4 to try to get as many score as possible, although
	// it may still not work!
	int32_t xx = m_topTree->getNumDocsWanted();

	// try to fix a core of growing this table in a thread when xx == 1
	xx = gbmax(xx,32);

	//if ( m_msg39req->m_doSiteClustering ) xx *= 4;
	// for seeing if a docid is in toptree. niceness=0.
	//if ( ! m_docIdTable.set(8,0,xx*4,NULL,0,false,0,"dotb") )
	//	return false;

	if ( m_msg39req->m_getDocIdScoringInfo ) {

		m_scoreInfoBuf.setLabel ("scinfobuf" );

		// . for holding the scoring info
		// . add 1 for the \0 safeMemcpy() likes to put at the end so
		//   it will not realloc on us
		if ( ! m_scoreInfoBuf.reserve ( xx * sizeof(DocIdScore) +100) ) {
			return false;
		}

		// likewise how many query term pair scores should we get?
		int32_t numTerms = m_q->m_numTerms;

		// limit
		numTerms = gbmin(numTerms,10);

		// the pairs. divide by 2 since (x,y) is same as (y,x)
		int32_t numPairs = (numTerms * numTerms) / 2;

		// then for each pair assume no more than MAX_TOP reps, usually
		// it's just 1, but be on the safe side
		numPairs *= m_realMaxTop;//MAX_TOP;

		// now that is how many pairs per docid and can be 500! but we
		// still have to multiply by how many docids we want to
		// compute. so this could easily get into the megabytes, most
		// of the time we will not need nearly that much however.
		numPairs *= xx;

		m_pairScoreBuf.setLabel ( "pairbuf" );
		m_singleScoreBuf.setLabel ("snglbuf" );

		// but alloc it just in case
		if ( ! m_pairScoreBuf.reserve (numPairs * sizeof(PairScore) ) ) {
			return false;
		}

		// and for singles
		int32_t numSingles = numTerms * m_realMaxTop * xx; // MAX_TOP *xx;
		if ( !m_singleScoreBuf.reserve(numSingles*sizeof(SingleScore))) {
			return false;
		}
	}

	return true;
}


////////////////////
//
// Functions used to find candidate docids, including "Voting" functions
// used to find docids with all required term ids and functions to remove
// docids that do not match all required terms
//
////////////////////


//
// Run through each term sublist and remove all docids not
// found in the docid vote buffer
//
void PosdbTable::delNonMatchingDocIdsFromSubLists() {

	logTrace(g_conf.m_logTracePosdb, "BEGIN.");

	//phase 1: shrink the rdblists for all queryterms (except those with a minus sign)
	std::valarray<char *> newEndPtr(m_q->m_numTerms);
	for(int i=0; i<m_q->m_numTerms; i++) {
		newEndPtr[i] = NULL;
		if(m_q->m_qterms[i].m_termSign=='-')
			continue;
		RdbList *list = m_q->m_qterms[i].m_posdbListPtr;
		if(!list || list->isEmpty())
			continue;

		// get that sublist
		char *subListPtr = list->getList();
		char *subListEnd = list->getListEnd();
		char *dst = subListPtr;
		// reset docid list ptrs
		const char *dp    =      m_docIdVoteBuf.getBufStart();
		const char *dpEnd = dp + m_docIdVoteBuf.length();
		//log(LOG_INFO,"@@@@ i#%d subListPtr=%p subListEnd=%p", i, subListPtr, subListEnd);

		for(;;) {
			// scan the docid list for the current docid in this termlist
			for ( ; dp < dpEnd; dp += 6 ) {
				// if current docid in docid list is >= the docid
				// in the sublist, stop. docid in list is 6 bytes and
				// subListPtr must be pointing to a 12 byte posdb rec.
				if ( *(uint32_t *)(dp+1) > *(uint32_t *)(subListPtr+8) ) {
					break;
				}

				// try to catch up docid if it is behind
				if ( *(uint32_t *)(dp+1) < *(uint32_t *)(subListPtr+8) ) {
					continue;
				}

				// check lower byte if equal
				if ( *(unsigned char *)(dp) > (*(unsigned char *)(subListPtr+7) & 0xfc ) ) {
					break;
				}

				if ( *(unsigned char *)(dp) < (*(unsigned char *)(subListPtr+7) & 0xfc ) ) {
					continue;
				}

				// copy over the 12 byte key
				*(int64_t *)dst = *(int64_t *)subListPtr;
				*(int32_t *)(dst+8) = *(int32_t *)(subListPtr+8);

				// skip that
				dst    += 12;
				subListPtr += 12;

				// copy over any 6 bytes keys following
				for ( ; ; ) {
					if ( subListPtr >= subListEnd ) {
						// give up on this exhausted term list!
						goto doneWithSubList;
					}

					// next docid willbe next 12 bytekey
					if ( ! ( subListPtr[0] & 0x04 ) ) {
						break;
					}

					// otherwise it's 6 bytes
					*(int32_t *)dst = *(int32_t *)subListPtr;
					*(int16_t *)(dst+4) = *(int16_t *)(subListPtr+4);
					dst += 6;
					subListPtr += 6;
				}
			}

			// skip that docid record in our termlist. it MUST have been
			// 12 bytes, a docid heading record.
			subListPtr += 12;

			// skip any following keys that are 6 bytes, that means they
			// share the same docid
			for ( ; ;  ) {
				// list exhausted?
				if ( subListPtr >= subListEnd ) {
					goto doneWithSubList;
				}

				// stop if next key is 12 bytes, that is a new docid
				if ( ! (subListPtr[0] & 0x04) ) {
					break;
				}

				// skip it
				subListPtr += 6;
			}
		}

	doneWithSubList:
		//log(LOG_INFO,"@@@ shrunk #%d to %ld (%p-%p)", i, dst - list->getList(), list->getList(), dst);
		newEndPtr[i] = dst;
	}

	//phase 2: set the matchingsublist pointers in qti
	for(int i=0; i<m_numQueryTermInfos; i++) {
		QueryTermInfo *qti = &(m_queryTermInfos[i]);;
		qti->m_numMatchingSubLists = 0;
		if(qti->m_numSubLists>0 && qti->m_subList[0].m_bigramFlag & BF_NEGATIVE)
			continue; //don't modify sublist for negative terms
#ifdef _VALGRIND_
		VALGRIND_MAKE_MEM_UNDEFINED(qti->m_matchingSublist, sizeof(qti->m_matchingSublist));
#endif
		for(int j=0; j<qti->m_numSubLists; j++) {
			for(int k=0; k<m_q->m_numTerms; k++) {
				if(qti->m_subList[j].m_list == m_q->m_qterms[k].m_posdbListPtr) {
					char *newStartPtr = m_q->m_qterms[k].m_posdbListPtr->getList(); //same as always
					int32_t x = qti->m_numMatchingSubLists;
					qti->m_matchingSublist[x].m_size  	    = newEndPtr[k] - newStartPtr;
					qti->m_matchingSublist[x].m_start 	    = newStartPtr;
					qti->m_matchingSublist[x].m_end	    = newEndPtr[k];
					qti->m_matchingSublist[x].m_cursor	    = newStartPtr;
					qti->m_matchingSublist[x].m_savedCursor = newStartPtr;
					qti->m_matchingSublist[x].m_baseSubListIndex    = j;
					qti->m_numMatchingSubLists++;
					break;
				}
			}
		}
	}

	//calculate qti[].m_maxMatchingTermFreqWeight
	for(int i=0; i<m_numQueryTermInfos; i++) {
		QueryTermInfo *qti = &(m_queryTermInfos[i]);
		if(qti->m_numMatchingSubLists>0) {
			float maxMatchingTermFreqWeight = qti->m_subList[qti->m_matchingSublist[0].m_baseSubListIndex].m_qt->m_termFreqWeight;
			for(int j=1; j<qti->m_numMatchingSubLists; j++) {
				float w = qti->m_subList[qti->m_matchingSublist[j].m_baseSubListIndex].m_qt->m_termFreqWeight;
				if(w>maxMatchingTermFreqWeight)
					maxMatchingTermFreqWeight = w;
			}
			qti->m_maxMatchingTermFreqWeight = maxMatchingTermFreqWeight;
		} else
			qti->m_maxMatchingTermFreqWeight = -1.0;
	}

	logTrace(g_conf.m_logTracePosdb, "END.");
}


//
// Removes docids with vote -1, which is set for docids matching
// negative query terms (e.g. -rock)
//
void PosdbTable::delDocIdVotes ( const QueryTermInfo *qti ) {
	char *bufStart = m_docIdVoteBuf.getBufStart();
	char *voteBufPtr = NULL;
	char *voteBufEnd;

	logTrace(g_conf.m_logTracePosdb, "BEGIN.");

	// just scan each sublist vs. the docid list
	for ( int32_t i = 0 ; i < qti->m_numSubLists  ; i++ ) {
		// get that sublist
		char *subListPtr = qti->m_subList[i].m_list->getList();
		char *subListEnd = qti->m_subList[i].m_list->getListEnd();
		// reset docid list ptrs
		voteBufPtr = m_docIdVoteBuf.getBufStart();
		voteBufEnd = voteBufPtr + m_docIdVoteBuf.length();

		// loop it
		while ( subListPtr < subListEnd ) {
			// scan for his docids and inc the vote
			for ( ;voteBufPtr <voteBufEnd ;voteBufPtr += 6 ) {
				// if current docid in docid list is >= the docid
				// in the sublist, stop. docid in list is 6 bytes and
				// subListPtr must be pointing to a 12 byte posdb rec.
				if ( *(uint32_t *)(voteBufPtr+1) > *(uint32_t *)(subListPtr+8) ) {
					break;
				}

				// less than? keep going
				if ( *(uint32_t *)(voteBufPtr+1) < *(uint32_t *)(subListPtr+8) ) {
					continue;
				}

				// top 4 bytes are equal. check lower single byte then.
				if ( *(unsigned char *)(voteBufPtr) > (*(unsigned char *)(subListPtr+7) & 0xfc ) ) {
					break;
				}

				if ( *(unsigned char *)(voteBufPtr) < (*(unsigned char *)(subListPtr+7) & 0xfc ) ) {
					continue;
				}

				// . equal! mark it as nuked!
				voteBufPtr[5] = -1;
				// skip it
				voteBufPtr += 6;
				// advance subListPtr now
				break;
			}

			// if we've exhausted this docid list go to next sublist
			if ( voteBufPtr >= voteBufEnd ) {
				goto endloop2;
			}

			// skip that docid record in our termlist. it MUST have been
			// 12 bytes, a docid heading record.
			subListPtr += 12;
			// skip any following keys that are 6 bytes, that means they
			// share the same docid
			for ( ; subListPtr < subListEnd && ((*subListPtr)&0x04); ) {
				subListPtr += 6;
			}

			// if we have more posdb recs in this sublist, then keep
			// adding our docid votes into the docid list
		}
	endloop2: ;
		// otherwise, advance to next sublist
	}

	// now remove docids with a -1 vote, they are nuked
	voteBufPtr = m_docIdVoteBuf.getBufStart();
	voteBufEnd = voteBufPtr + m_docIdVoteBuf.length();
	char *dst   = voteBufPtr;
	for ( ; voteBufPtr < voteBufEnd ; voteBufPtr += 6 ) {
		// do not re-copy it if it was in this negative termlist
		if(voteBufPtr[5] != -1) {
			// copy it over. might be the same address!
			*(int32_t *) dst    = *(int32_t *) voteBufPtr;
			*(int16_t *)(dst+4) = *(int16_t *)(voteBufPtr+4);
			dst += 6;
		}
	}
	// shrink the buffer size now
	m_docIdVoteBuf.setLength ( dst - bufStart );

	logTrace(g_conf.m_logTracePosdb, "END.");
}


//
// First call will allocate the m_docIdVoteBuf and add docids from the shortest
// term list to the buffer.
//
// Next calls will run through all term sublists (synonyms, term variations) and
// increase the score in the m_docIdVoteBuf for matching docids. Docids that do
// not match a term is removed, so we end up with list of docids matching all
// query terms.
//
void PosdbTable::addDocIdVotes( const QueryTermInfo *qti, int32_t listGroupNum) {

	logTrace(g_conf.m_logTracePosdb, "BEGIN.");

	// sanity check, we store this in a single byte below for voting
	if ( listGroupNum >= 256 ) {
		gbshutdownAbort(true);
	}


	//
	// add the first sublist's docids into the docid buf
	//
	if( listGroupNum == 0 ) {
		// the first listGroup is not intersecting, just adding to
		// the docid vote buf. that is, if the query is "jump car" we
		// just add all the docids for "jump". Subsequent calls to
		// this function will intersect those docids with the docids
		// for "car", resulting in a buffer with docids that contain
		// both terms.

		makeDocIdVoteBufForRarestTerm(qti);
		logTrace(g_conf.m_logTracePosdb, "END.");
		return;
	}


	char *bufStart = m_docIdVoteBuf.getBufStart();
	char *voteBufPtr = NULL;
	char *voteBufEnd;

	//
	// For each sublist (term variation) loop through all sublist records
	// and compare with docids in the vote buffer. If a match is found, the
	// score in the vote buffer is set to the current ListGroupNum.
	//
	// A sublist is a termlist for a particular query term, for instance
	// the query term "jump" will have sublists for "jump" "jumps"
	// "jumping" "jumped" and maybe even "jumpy", so that could be
	// 5 sublists, and their QueryTermInfo::m_qtermNum should be the
	// same for all 5.
	//
	for ( int32_t i = 0 ; i < qti->m_numSubLists; i++) {
		// get that sublist
		const char *subListPtr = qti->m_subList[i].m_list->getList();
		const char *subListEnd = qti->m_subList[i].m_list->getListEnd();
		// reset docid list ptrs
		voteBufPtr	= m_docIdVoteBuf.getBufStart();
		voteBufEnd	= voteBufPtr + m_docIdVoteBuf.length();

		// loop it
	handleNextSubListRecord:

		// scan for his docids and inc the vote
		for ( ;voteBufPtr < voteBufEnd ; voteBufPtr += 6 ) {
			// if current docid in docid list is >= the docid
			// in the sublist, stop. docid in list is 6 bytes and
			// subListPtr must be pointing to a 12 byte posdb rec.
			if ( *(uint32_t *)(voteBufPtr+1) > *(uint32_t *)(subListPtr+8) ) {
				break;
			}

			// less than? keep going
			if ( *(uint32_t *)(voteBufPtr+1) < *(uint32_t *)(subListPtr+8) ) {
				continue;
			}

			// top 4 bytes are equal. check lower single byte then.
			if ( *(unsigned char *)(voteBufPtr) > (*(unsigned char *)(subListPtr+7) & 0xfc ) ) {
				break;
			}

			if ( *(unsigned char *)(voteBufPtr) < (*(unsigned char *)(subListPtr+7) & 0xfc ) ) {
				continue;
			}

			// . equal! record our vote!
			// . we start at zero for the
			//   first termlist, and go to 1, etc.
			voteBufPtr[5] = listGroupNum;
			// skip it
			voteBufPtr += 6;

			// break out to advance subListPtr
			break;
		}

		// if we've exhausted this docid list go to next sublist
		// since this docid is NOT in the current/ongoing intersection
		// of the docids for each queryterm
		if ( voteBufPtr >= voteBufEnd ) {
			continue;
		}

		// skip that docid record in our termlist. it MUST have been
		// 12 bytes, a docid heading record.
		subListPtr += 12;

		// skip any following keys that are 6 bytes, that means they
		// share the same docid
		for ( ; subListPtr < subListEnd && ((*subListPtr)&0x04); ) {
			subListPtr += 6;
		}

		// if we have more posdb recs in this sublist, then keep
		// adding our docid votes into the docid list
		if ( subListPtr < subListEnd ) {
			goto handleNextSubListRecord;
		}

		// otherwise, advance to next sublist
	}


	//
	// Shrink the docidbuf by removing docids with not enough
	// votes which means they are missing a query term
	//
	voteBufPtr	= m_docIdVoteBuf.getBufStart();
	voteBufEnd	= voteBufPtr + m_docIdVoteBuf.length();
	char *dst   = voteBufPtr;
	for ( ; voteBufPtr < voteBufEnd ; voteBufPtr += 6 ) {
		// skip if it has enough votes to be in search
		// results so far
		if(voteBufPtr[5] == listGroupNum) {
			// copy it over. might be the same address!
			*(int32_t  *) dst   = *(int32_t *) voteBufPtr;
			*(int16_t *)(dst+4) = *(int16_t *)(voteBufPtr+4);
			dst += 6;
		}
	}

	// shrink the buffer size
	m_docIdVoteBuf.setLength ( dst - bufStart );

	logTrace(g_conf.m_logTracePosdb, "END.");
}


//
// Initialize the vote buffer with docids from the shortest
// term (rarest required term) list and initialize the scores to 0.
// Called by addDocIdVotes
//
// The buffer consists of 5-byte docids and 1-byte scores. The score
// is incremented for each term that matches the docid, and after
// each run, the list is "compacted" and shortened so only the
// matching docids are left.
//
void PosdbTable::makeDocIdVoteBufForRarestTerm(const QueryTermInfo *qti) {
	char *cursor[MAX_SUBLISTS];
	char *cursorEnd[MAX_SUBLISTS];

	logTrace(g_conf.m_logTracePosdb, "term id [%" PRId64 "] [%.*s]", qti->m_subList[0].m_qt->m_termId, qti->m_subList[0].m_qt->m_termLen, qti->m_subList[0].m_qt->m_term);

	for ( int32_t i = 0 ; i < qti->m_numSubLists ; i++ ) {
		// get that sublist
		cursor    [i] = qti->m_subList[i].m_list->getList();
		cursorEnd [i] = qti->m_subList[i].m_list->getListEnd();
	}

	char * const bufStart = m_docIdVoteBuf.getBufStart();
	char *voteBufPtr = m_docIdVoteBuf.getBufStart();
	char *lastMinRecPtr = NULL;
	int32_t mini = -1;

	// get the next min from all the termlists
	for(;;) {

		char *minRecPtr = NULL;

		// just scan each sublist vs. the docid list
		for ( int32_t i = 0 ; i < qti->m_numSubLists ; i++ ) {
			// skip if exhausted
			if ( ! cursor[i] ) {
				continue;
			}

			// shortcut
			char *recPtr = cursor[i];

			// get the min docid
			if ( ! minRecPtr ) {
				minRecPtr = recPtr;
				mini = i;
				continue;
			}

			// compare!
			if ( *(uint32_t *)(recPtr   +8) >
			     *(uint32_t *)(minRecPtr+8) ) {
				continue;
			}

			// a new min
			if ( *(uint32_t *)(recPtr   +8) <
			     *(uint32_t *)(minRecPtr+8) ) {
				minRecPtr = recPtr;
				mini = i;
				continue;
			}

			// check lowest byte
			if ( (*(unsigned char *)(recPtr   +7) & 0xfc ) >
			     (*(unsigned char *)(minRecPtr+7) & 0xfc ) ) {
				continue;
			}

			// a new min
			if ( (*(unsigned char *)(recPtr   +7) & 0xfc ) <
			     (*(unsigned char *)(minRecPtr+7) & 0xfc ) ) {
				minRecPtr = recPtr;
				mini = i;
				continue;
			}
		}

		// if no min then all lists exhausted!
		if ( ! minRecPtr ) {
			// update length
			m_docIdVoteBuf.setLength ( voteBufPtr - bufStart );

			// all done!
			logTrace(g_conf.m_logTracePosdb, "END.");
			return;
		}

		// advance that guy over that docid
		cursor[mini] += 12;
		// 6 byte keys follow?
		for ( ; ; ) {
			// end of list?
			if ( cursor[mini] >= cursorEnd[mini] ) {
				// use NULL to indicate list is exhausted
				cursor[mini] = NULL;
				break;
			}

			// if we hit a new 12 byte key for a new docid, stop
			if ( ! ( cursor[mini][0] & 0x04 ) ) {
				break;
			}

			// otherwise, skip this 6 byte key
			cursor[mini] += 6;
		}

		// is it a docid dup?
		if(lastMinRecPtr &&
		   *(uint32_t *)(lastMinRecPtr+8) ==
		   *(uint32_t *)(minRecPtr+8) &&
		   (*(unsigned char *)(lastMinRecPtr+7)&0xfc) ==
		   (*(unsigned char *)(minRecPtr+7)&0xfc)) {
			continue;
		}

		// . do not store the docid if not in the whitelist
		// . FIX: two lower bits, what are they? at minRecPtrs[7].
		// . well the lowest bit is the siterank upper bit and the
		//   other bit is always 0. we should be ok with just using
		//   the 6 bytes of the docid ptr as is though since the siterank
		//   should be the same for the site: terms we indexed for the same
		//   docid!!
		if ( m_useWhiteTable && ! m_whiteListTable.isInTable(minRecPtr+7) ) {
			continue;
		}

		// only update this if we add the docid... that way there can be
		// a winning "inRange" term in another sublist and the docid will
		// get added.
		lastMinRecPtr = minRecPtr;

		// store our docid. actually it contains two lower bits not
		// part of the docid, so we'll have to shift and mask to get
		// the actual docid!
		// docid is only 5 bytes for now
		*(int32_t  *)(voteBufPtr+1) = *(int32_t  *)(minRecPtr+8);
		// the single lower byte
		voteBufPtr[0] = minRecPtr[7] & 0xfc;
		// 0 vote count
		voteBufPtr[5] = 0;

		// debug
		//	int64_t dd = Posdb::getDocId(minRecPtr);
		//	log(LOG_ERROR, "%s:%s: adding docid %" PRId64 "", __FILE__, __func__, dd);

		// advance
		voteBufPtr += 6;
	}
}


// TODO: do this in docid range phases to save memory and be much faster
// since we could contain to the L1 cache for hashing
bool PosdbTable::makeDocIdVoteBufForBoolQuery( ) {

	logTrace(g_conf.m_logTracePosdb, "BEGIN.");

	// . make a hashtable of all the docids from all the termlists
	// . the value slot will be the operand bit vector i guess
	// . the size of the vector needs one bit per query operand
	// . if the vector is only 1-2 bytes we can just evaluate each
	//   combination we encounter and store it into an array, otherwise,
	//   we can use a another hashtable in order to avoid re-evaluation
	//   on if it passes the boolean query.
	char bitVec[MAX_OVEC_SIZE];
	if ( m_vecSize > MAX_OVEC_SIZE ) {
		m_vecSize = MAX_OVEC_SIZE;
	}

	// . scan each list of docids to a get a new docid, keep a dedup
	//   table to avoid re-processing the same docid.
	// . each posdb list we read corresponds to a query term,
	//   or a synonym of a query term, or bigram of a query term, etc.
	//   but we really want to know what operand, so we associate an
	//   operand bit with each query term, and each list can map to
	//   the base query term so we can get the operand # from that.
	for ( int32_t i = 0 ; i < m_numQueryTermInfos ; i++ ) {

		// get it
		QueryTermInfo *qti = &(m_queryTermInfos[i]);

		QueryTerm *qt = &m_q->m_qterms[qti->m_qtermNum];
		// get the query word
		//QueryWord *qw = qt->m_qword;

		// just use the word # now
		//int32_t opNum = qw->m_wordNum;//opNum;

		// . make it consistent with Query::isTruth()
		// . m_bitNum is set above to the QueryTermInfo #
		int32_t bitNum = qt->m_bitNum;

		// do not consider for adding if negative ('my house -home')
		//if ( qti->m_bigramFlags[0] & BF_NEGATIVE ) continue;

		// set all to zeroes
		memset ( bitVec, 0, m_vecSize );

		// set bitvec for this query term #
		int32_t byte = bitNum / 8;
		unsigned char mask = 1<<(bitNum % 8);
		bitVec[byte] |= mask;

		// each query term can have synonym lists etc. scan those.
		// this includes the original query termlist as well.
		for ( int32_t j = 0 ; j < qti->m_numSubLists ; j++ ) {

			// scan all docids in this list
			const char *p =    qti->m_subList[j].m_list->getList();
			const char *pend = qti->m_subList[j].m_list->getListEnd();

			//int64_t lastDocId = 0LL;

			// scan the sub termlist #j
			for ( ; p < pend ; ) {
				// place holder
				int64_t docId = Posdb::getDocId(p);

				// sanity
				//if ( d < lastDocId )
				//	gbshutdownAbort(true);
				//lastDocId = d;

				// point to it
				//char *voteBufPtr = p + 8;

				// this was the first key for this docid for
				// this termid and possible the first key for
				// this termid, so skip it, either 12 or 18
				// bytes
				if ( (((char *)p)[0])&0x02 ) {
					p += 12;
				}
				// the first key for this termid?
				else {
					p += 18;
				}

				// then only 6 byte keys would follow from the
				// same docid, so skip those as well
			subloop:
				if( p < pend && (((char *)p)[0]) & 0x04 ) {
					p += 6;
					goto subloop;
				}

				// convert docid into hash key
				//int64_t docId = *(int64_t *)voteBufPtr;
				// shift down 2 bits
				//docId >>= 2;
				// and mask
				//docId &= DOCID_MASK;
				// test it
				//int64_t docId = Posdb::getDocId(voteBufPtr-8);
				//if ( d2 != docId )
				//	gbshutdownAbort(true);
				// store this docid though. treat as int64_t
				// but we mask with keymask
				int32_t slot = m_bt.getSlot ( &docId );
				if ( slot < 0 ) {
					// we can't alloc in a thread, careful
					if ( ! m_bt.addKey(&docId,bitVec) ) {
						gbshutdownAbort(true);
					}

					continue;
				}
				// or the bit in otherwise
				char *bv = (char *)m_bt.getValueFromSlot(slot);
				bv[byte] |= mask;
			}
		}
	}


	// debug info
	// int32_t nc = m_bt.getLongestString();
	// log("posdb: string of %" PRId32" filled slots!",nc);

	char *dst = m_docIdVoteBuf.getBufStart();

	// . now our hash table is filled with all the docids
	// . evaluate each bit vector
	for ( int32_t i = 0 ; i < m_bt.getNumSlots() ; i++ ) {
		// skip if empty
		if ( ! m_bt.m_flags[i] ) {
			continue;
		}

		// get the bit vector
		const unsigned char *vec = (unsigned char *)m_bt.getValueFromSlot(i);

		// hash the vector
		int64_t h64 = 0LL;
		for ( int32_t k = 0 ; k < m_vecSize ; k++ )
		       h64 ^= g_hashtab[(unsigned char)vec[k]][(unsigned char)k];

		// check in hash table
		char *val = (char *)m_ct.getValue ( &h64 );

		// it passes, add the ocid
		if ( m_debug ) {
			int64_t docId =*(int64_t *)m_bt.getKeyFromSlot(i);
			log(LOG_INFO, "query: eval d=%" PRIu64" vec[0]=%" PRIx32" h64=%" PRId64, docId,(int32_t)vec[0],h64);
		}

		// add him to the good table
		if ( val && *val ) {
			// it passes, add the ocid
			int64_t docId =*(int64_t *)m_bt.getKeyFromSlot(i);

			// fix it up
			if ( m_debug ) {
				log(LOG_INFO, "query: adding d=%" PRIu64" bitVecSize=%" PRId32" bitvec[0]=0x%" PRIx32" (TRUE)",
				    docId,m_vecSize,(int32_t)vec[0]);
			}
			// shift up
			docId <<= 2;
			// a 6 byte key means you pass
			memcpy ( dst, &docId, 6 );
			dst += 6;
			continue;
		}

		// evaluate the vector
		char include = m_q->matchesBoolQuery ( (unsigned char *)vec, m_vecSize );
		if ( include ) {
			// it passes, add the ocid
			int64_t docId =*(int64_t *)m_bt.getKeyFromSlot(i);

			// fix it up
			if ( m_debug ) {
				log(LOG_INFO, "query: adding d=%" PRIu64" vec[0]=0x%" PRIx32, docId,(int32_t)vec[0]);
			}

			// shift up
			docId <<= 2;
			// a 6 byte key means you pass
			memcpy ( dst, &docId, 6 );

			// test it
			if ( m_debug ) {
				int64_t d2;
				d2 = *(uint32_t *)(dst+1);
				d2 <<= 8;
				d2 |= (unsigned char)dst[0];
				d2 >>= 2;
				docId >>= 2;
				if ( d2 != docId )
					gbshutdownAbort(true);
			}
			// end test
			dst += 6;
		}
		// store in hash table
		m_ct.addKey ( &h64, &include );
	}

	// update SafeBuf::m_length
	m_docIdVoteBuf.setLength ( dst - m_docIdVoteBuf.getBufStart() );

	// now sort the docids. TODO: break makeDocIdVoteBufForBoolQuery()
	// up into docid ranges so we have like 1/100th the # of docids to
	// sort. that should make this part a lot faster.
	// i.e. 1000*log(1000) > 1000*(10*log(10))) --> 3000 > 1000
	// i.e. it's faster to break it down into 1000 pieces
	// i.e. for log base 2 maybe it's like 10x faster...
	qsort ( m_docIdVoteBuf.getBufStart(),
		m_docIdVoteBuf.length() / 6,
		6,
		docIdVoteBufKeyCompare_desc );

	logTrace(g_conf.m_logTracePosdb, "END.");
	return true;
}


////////////////////
//
// Global functions
//
////////////////////


// sort vote buf entries in descending order
static int docIdVoteBufKeyCompare_desc ( const void *h1, const void *h2 ) {
	return KEYCMP((const char*)h1, (const char*)h2, 6);
}


// . b-step into list looking for docid "docId"
// . assume p is start of list, excluding 6 byte of termid
static inline const char *getWordPosList(uint64_t docId, const char *list, int32_t listSize) {
	// make step divisible by 6 initially
	int32_t step = (listSize / 12) * 6;
	// shortcut
	const char *listEnd = list + listSize;
	// divide in half
	const char *p = list + step;
	// for detecting not founds
	char count = 0;

	for(;;) {
		// save it
		const char *origp = p;
		// scan up to docid. we use this special bit to distinguish between
		// 6-byte and 12-byte posdb keys
		for ( ; p > list && (p[1] & 0x02); ) {
			p -= 6;
		}
		// ok, we hit a 12 byte key i guess, so backup 6 more
		p -= 6;

		// ok, we got a 12-byte key then i guess
		uint64_t d = Posdb::getDocId ( p );
		// we got a match, but it might be a NEGATIVE key so
		// we have to try to find the positive keys in that case
		if ( d == docId ) {
			// if its positive, no need to do anything else
			if ( (p[0] & 0x01) == 0x01 ) return p;
			// ok, it's negative, try to see if the positive is
			// in here, if not then return NULL.
			// save current pos
			const char *current = p;
			// back up to 6 byte key before this 12 byte key
			p -= 6;

			// now go backwards to previous 12 byte key
			for ( ; p > list && (p[1] & 0x02); ) {
				p -= 6;
			}
			// ok, we hit a 12 byte key i guess, so backup 6 more
			p -= 6;

			// is it there?
			if ( p >= list && Posdb::getDocId(p) == docId ) {
				// sanity. return NULL if its negative! wtf????
				if ( (p[0] & 0x01) == 0x00 ) return NULL;
				// got it
				return p;
			}
			// ok, no positive before us, try after us
			p = current;
			// advance over current 12 byte key
			p += 12;
			// now go forwards to next 12 byte key
			for ( ; p < listEnd && (p[1] & 0x02); ) {
				p += 6;
			}

			// is it there?
			if ( p + 12 < listEnd && Posdb::getDocId(p) == docId ) {
				// sanity. return NULL if its negative! wtf????
				if ( (p[0] & 0x01) == 0x00 ) {
					return NULL;
				}

				// got it
				return p;
			}
			// . crap, i guess just had a single negative docid then
			// . return that and the caller will see its negative
			return current;
		}

		// reduce step
		//step /= 2;
		step >>= 1;
		// . make divisible by 6!
		// . TODO: speed this up!!!
		step = step - (step % 6);

		// sanity
		if ( step % 6 ) {
			gbshutdownAbort(true);
		}

		// ensure never 0
		if ( step <= 0 ) {
			step = 6;
			// return NULL if not found
			if ( count++ >= 2 ) {
				return NULL;
			}
		}

		// go up or down then
		if ( d < docId ) {
			p = origp + step;
			if ( p >= listEnd ) {
				p = listEnd - 6;
			}
		}
		else {
			p = origp - step;
			if ( p < list ) {
				p = list;
			}
		}
	}
}


// initialize the weights table
static void initWeights ( ) {
	ScopedLock sl(s_mtx_weights);
	if ( s_init ) {
		return;
	}

	logTrace(g_conf.m_logTracePosdb, "BEGIN.");

	s_scoringWeights.init(g_conf.m_baseScoringParameters);

	// if two hashgroups are comaptible they can be paired
	for ( int32_t i = 0 ; i < HASHGROUP_END ; i++ ) {
		// set this
		s_inBody[i] = false;
		// is it body?
		if ( i == HASHGROUP_BODY    ||
		     i == HASHGROUP_HEADING ||
		     i == HASHGROUP_INLIST  ||
		     i == HASHGROUP_INMENU   )
			s_inBody[i] = true;

		for ( int32_t j = 0 ; j < HASHGROUP_END ; j++ ) {
			// assume not
			s_isCompatible[i][j] = false;
			// or both in body (and not title)
			bool inBody1 = true;
			if ( i != HASHGROUP_BODY &&
			     i != HASHGROUP_HEADING &&
			     i != HASHGROUP_INLIST &&
			     //i != HASHGROUP_INURL &&
			     i != HASHGROUP_INMENU )
				inBody1 = false;
			bool inBody2 = true;
			if ( j != HASHGROUP_BODY &&
			     j != HASHGROUP_HEADING &&
			     j != HASHGROUP_INLIST &&
			     //j != HASHGROUP_INURL &&
			     j != HASHGROUP_INMENU )
				inBody2 = false;
			// no body allowed now!
			if ( inBody1 || inBody2 ) {
				continue;
			}

			//if ( ! inBody ) continue;
			// now neither can be in the body, because we handle
			// those cases in the new sliding window algo.
			// if one term is only in the link text and the other
			// term is only in the title, ... what then? i guess
			// allow those here, but they will be penalized
			// some with the fixed distance of like 64 units or
			// something...
			s_isCompatible[i][j] = true;
			// if either is in the body then do not allow now
			// and handle in the sliding window algo
			//s_isCompatible[i][j] = 1;
		}
	}

	s_init = true;

#ifdef _VALGRIND_
	//we read from the weight tables without locking. tell helgrind to ignore that
	VALGRIND_HG_DISABLE_CHECKING(&s_scoringWeights,sizeof(s_scoringWeights));
	VALGRIND_HG_DISABLE_CHECKING(s_isCompatible,sizeof(s_isCompatible));
	VALGRIND_HG_DISABLE_CHECKING(s_inBody,sizeof(s_inBody));
#endif

	logTrace(g_conf.m_logTracePosdb, "END.");
}


// Called when ranking settings are changed. Normally called from update-parameter
// broadcast handling (see handleRequest3fLoop() )
void reinitializeRankingSettings()
{
	s_init = false;
	initWeights();
}


float getHashGroupWeight ( unsigned char hg ) {
	initWeights();

	return s_scoringWeights.m_hashGroupWeights[hg];
}


float getDiversityWeight ( unsigned char diversityRank ) {
	initWeights();

	return s_scoringWeights.m_diversityWeights[diversityRank];
}


float getDensityWeight ( unsigned char densityRank ) {
	initWeights();

	return s_scoringWeights.m_densityWeights[densityRank];
}


float getWordSpamWeight ( unsigned char wordSpamRank ) {
	initWeights();

	return s_scoringWeights.m_wordSpamWeights[wordSpamRank];
}


float getLinkerWeight ( unsigned char wordSpamRank ) {
	initWeights();

	return s_scoringWeights.m_linkerWeights[wordSpamRank];
}