mirror of
https://github.com/privacore/open-source-search-engine.git
synced 2025-07-13 02:36:06 -04:00
Moved getTermFreqWeight() logic into Msg3a.cpp
This commit is contained in:
17
Msg3a.cpp
17
Msg3a.cpp
@ -10,6 +10,7 @@
|
||||
#include "Process.h"
|
||||
#include "Posdb.h"
|
||||
#include "Collectiondb.h"
|
||||
#include "ScalingFunctions.h"
|
||||
#include "Conf.h"
|
||||
#include "Lang.h"
|
||||
#include "Mem.h"
|
||||
@ -1056,6 +1057,22 @@ void Msg3a::printTerms ( ) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static float getTermFreqWeight(int64_t termFreq, int64_t numDocsInColl) {
|
||||
if(numDocsInColl>0)
|
||||
scale_linear(termFreq/numDocsInColl, g_conf.m_termFreqWeightFreqMin, g_conf.m_termFreqWeightFreqMax, g_conf.m_termFreqWeightMin, g_conf.m_termFreqWeightMax);
|
||||
else
|
||||
return 1.0; //whatever...
|
||||
float fw = termFreq;
|
||||
if ( numDocsInColl ) {
|
||||
fw /= numDocsInColl;
|
||||
}
|
||||
|
||||
// limit
|
||||
return scale_linear(fw, g_conf.m_termFreqWeightFreqMin, g_conf.m_termFreqWeightFreqMax, g_conf.m_termFreqWeightMin, g_conf.m_termFreqWeightMax);
|
||||
}
|
||||
|
||||
|
||||
void setTermFreqWeights ( collnum_t collnum , Query *q ) {
|
||||
int64_t numDocsInColl = 0;
|
||||
RdbBase *base = getRdbBase ( RDB_CLUSTERDB, collnum );
|
||||
|
1
Posdb.h
1
Posdb.h
@ -87,7 +87,6 @@
|
||||
#define POSDB_DELETEDOC_TERMID 0
|
||||
|
||||
const char *getHashGroupString ( unsigned char hg );
|
||||
float getTermFreqWeight ( int64_t termFreq , int64_t numDocsInColl );
|
||||
|
||||
typedef key144_t posdbkey_t;
|
||||
|
||||
|
@ -6046,28 +6046,3 @@ float getLinkerWeight ( unsigned char wordSpamRank ) {
|
||||
|
||||
return s_scoringWeights.m_linkerWeights[wordSpamRank];
|
||||
}
|
||||
|
||||
|
||||
float getTermFreqWeight ( int64_t termFreq, int64_t numDocsInColl ) {
|
||||
// do not include top 6 bytes at top of list that are termid
|
||||
//float fw = listSize - 6;
|
||||
// sanity
|
||||
//if ( fw < 0 ) fw = 0;
|
||||
// estimate # of docs that have this term. the problem is
|
||||
// that posdb keys can be 18, 12 or 6 bytes!
|
||||
//fw /= 11.0;
|
||||
// adjust this so its per split!
|
||||
//int32_t nd = numDocsInColl / g_hostdb.m_numShards;
|
||||
float fw = termFreq;
|
||||
// what chunk are we of entire collection?
|
||||
//if ( nd ) fw /= nd;
|
||||
if ( numDocsInColl ) {
|
||||
fw /= numDocsInColl;
|
||||
}
|
||||
|
||||
// limit
|
||||
return scale_linear(fw, g_conf.m_termFreqWeightFreqMin, g_conf.m_termFreqWeightFreqMax, g_conf.m_termFreqWeightMin, g_conf.m_termFreqWeightMax);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user