Moved getTermFreqWeight() logic into Msg3a.cpp

This commit is contained in:
Ivan Skytte Jørgensen
2017-01-20 16:40:28 +01:00
parent dc8d152844
commit 363b9200a1
3 changed files with 17 additions and 26 deletions

@ -10,6 +10,7 @@
#include "Process.h"
#include "Posdb.h"
#include "Collectiondb.h"
#include "ScalingFunctions.h"
#include "Conf.h"
#include "Lang.h"
#include "Mem.h"
@ -1056,6 +1057,22 @@ void Msg3a::printTerms ( ) {
}
}
static float getTermFreqWeight(int64_t termFreq, int64_t numDocsInColl) {
if(numDocsInColl>0)
scale_linear(termFreq/numDocsInColl, g_conf.m_termFreqWeightFreqMin, g_conf.m_termFreqWeightFreqMax, g_conf.m_termFreqWeightMin, g_conf.m_termFreqWeightMax);
else
return 1.0; //whatever...
float fw = termFreq;
if ( numDocsInColl ) {
fw /= numDocsInColl;
}
// limit
return scale_linear(fw, g_conf.m_termFreqWeightFreqMin, g_conf.m_termFreqWeightFreqMax, g_conf.m_termFreqWeightMin, g_conf.m_termFreqWeightMax);
}
void setTermFreqWeights ( collnum_t collnum , Query *q ) {
int64_t numDocsInColl = 0;
RdbBase *base = getRdbBase ( RDB_CLUSTERDB, collnum );

@ -87,7 +87,6 @@
#define POSDB_DELETEDOC_TERMID 0
const char *getHashGroupString ( unsigned char hg );
float getTermFreqWeight ( int64_t termFreq , int64_t numDocsInColl );
typedef key144_t posdbkey_t;

@ -6046,28 +6046,3 @@ float getLinkerWeight ( unsigned char wordSpamRank ) {
return s_scoringWeights.m_linkerWeights[wordSpamRank];
}
float getTermFreqWeight ( int64_t termFreq, int64_t numDocsInColl ) {
// do not include top 6 bytes at top of list that are termid
//float fw = listSize - 6;
// sanity
//if ( fw < 0 ) fw = 0;
// estimate # of docs that have this term. the problem is
// that posdb keys can be 18, 12 or 6 bytes!
//fw /= 11.0;
// adjust this so its per split!
//int32_t nd = numDocsInColl / g_hostdb.m_numShards;
float fw = termFreq;
// what chunk are we of entire collection?
//if ( nd ) fw /= nd;
if ( numDocsInColl ) {
fw /= numDocsInColl;
}
// limit
return scale_linear(fw, g_conf.m_termFreqWeightFreqMin, g_conf.m_termFreqWeightFreqMax, g_conf.m_termFreqWeightMin, g_conf.m_termFreqWeightMax);
}