forked from Mirrors/privacore-open-source-search-engine
Merge branch 'master' into dev-siteinfo
This commit is contained in:
@ -70,3 +70,11 @@ void BaseScoringParameters::traceToLog(const char *prefix) {
|
||||
log(LOG_TRACE,"%s: m_flagRankAdjustment[%d]=%d",prefix,i,m_flagRankAdjustment[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool BaseScoringParameters::allLanguageWeightsAreTheSame() const {
|
||||
for(unsigned i=0; i<64; i++)
|
||||
if(m_languageWeights[i]!=m_languageWeights[0])
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
@ -40,6 +40,7 @@ struct BaseScoringParameters {
|
||||
BaseScoringParameters() { clear(); }
|
||||
void clear();
|
||||
void traceToLog(const char *prefix);
|
||||
bool allLanguageWeightsAreTheSame() const;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -345,12 +345,28 @@ bool sendPageResults ( TcpSocket *s , HttpRequest *hr ) {
|
||||
tld_hint, strlen(tld_hint), true);
|
||||
|
||||
|
||||
// Then try the external language detection server
|
||||
if (g_queryLanguage.getLanguage(st, gotQueryLanguageWrapper, fx_qlang, fx_blang, fx_country, fx_fetld, query)) {
|
||||
// blocked
|
||||
return false;
|
||||
//We can't use st->m_si.m_baseScoringParameters.allLanguageWeightsAreTheSame() because the cgi parameters have not set the members in SearchInput yet.
|
||||
//So we have to check if any CGI parameter using the "lw_" prefix has been specified.
|
||||
bool any_specific_language_weight_specified = false;
|
||||
for(int32_t i = 0; i < hr->getNumFields(); i++) {
|
||||
if(!hr->getValue(i))
|
||||
continue;
|
||||
const char *full_field_name = hr->getField(i);
|
||||
if(strncmp(full_field_name,"lw_",3)==0) {
|
||||
any_specific_language_weight_specified = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(!any_specific_language_weight_specified) {
|
||||
// Then try the external language detection server
|
||||
if (g_queryLanguage.getLanguage(st, gotQueryLanguageWrapper, fx_qlang, fx_blang, fx_country, fx_fetld, query)) {
|
||||
// blocked
|
||||
return false;
|
||||
}
|
||||
} else
|
||||
return gotQueryLanguage(st, {});
|
||||
log(LOG_DEBUG,"Explicit language weight specified. Not contacting external language detection server");
|
||||
return gotQueryLanguage(st, {});
|
||||
}
|
||||
|
||||
static void gotQueryLanguageWrapper(void *state, const std::vector<std::pair<lang_t, double>> &language_weights) {
|
||||
|
21
Parms.cpp
21
Parms.cpp
@ -4016,7 +4016,7 @@ void Parms::init ( ) {
|
||||
"set in the search controls and is usually something like "
|
||||
"20.0. Which means that we multiply a result's score by 20 "
|
||||
"if from the same language as the query or the language is "
|
||||
"unknown.";
|
||||
"unknown. Has no effect if any of the per-language weights have been specified.";
|
||||
simple_m_set(SearchInput,m_sameLangWeight);
|
||||
m->m_defOff= offsetof(CollectionRec,m_sameLangWeight);
|
||||
m->m_cgi = "langw";
|
||||
@ -4028,7 +4028,7 @@ void Parms::init ( ) {
|
||||
m->m_desc = "Use this to override the default uknown language weight "
|
||||
"for this collection. We multiply a result's score by this value "
|
||||
"if the user requested a specific language, but the language of the "
|
||||
"indexed page could not be determined.";
|
||||
"indexed page could not be determined. Has no effect if any of the per-language weights have been specified.";
|
||||
simple_m_set(SearchInput,m_unknownLangWeight);
|
||||
m->m_defOff= offsetof(CollectionRec,m_unknownLangWeight);
|
||||
m->m_cgi = "ulangw";
|
||||
@ -4036,6 +4036,23 @@ void Parms::init ( ) {
|
||||
m->m_page = PAGE_RESULTS;
|
||||
m++;
|
||||
|
||||
for(int i=0; i<MAX_LANGUAGES; i++) {
|
||||
static char title[MAX_LANGUAGES][64];
|
||||
sprintf(title[i],"Language weight for %s", getLanguageString(i));
|
||||
static char cgi[MAX_LANGUAGES][64];
|
||||
sprintf(cgi[i],"lw_%s", getLanguageAbbr(i)); //note: sendPageResults() relies on this
|
||||
m->m_title = title[i];
|
||||
m->m_desc = "";
|
||||
m->m_obj = OBJ_SI;
|
||||
m->m_off = offsetof(SearchInput,m_baseScoringParameters.m_languageWeights) + sizeof(float)*i;
|
||||
m->m_type = TYPE_FLOAT;
|
||||
m->m_def = "1.0";
|
||||
m->m_cgi = cgi[i];
|
||||
m->m_flags = PF_API;
|
||||
m->m_page = PAGE_RESULTS;
|
||||
m++;
|
||||
}
|
||||
|
||||
m->m_title = "site-rank multiplier";
|
||||
m->m_desc = "formula: score = (siterank*multiplier)+1";
|
||||
simple_m_set(SearchInput,m_baseScoringParameters.m_siteRankMultiplier);
|
||||
|
@ -262,16 +262,20 @@ bool SearchInput::set(TcpSocket *sock, HttpRequest *r, lang_t primaryQueryLangua
|
||||
log(LOG_INFO, "query: using primary query lang of %s", getLanguageAbbr(m_queryLangId));
|
||||
|
||||
if(!language_weights.empty()) {
|
||||
//external language detection server had results
|
||||
for(int i=0; i<64; i++)
|
||||
m_baseScoringParameters.m_languageWeights[i] = 0.01; //server returns weights in the range [0..1]
|
||||
for(const auto &e : language_weights)
|
||||
m_baseScoringParameters.m_languageWeights[e.first] = e.second;
|
||||
} else {
|
||||
//query language server is unavailable.
|
||||
for(int i=0; i<64; i++)
|
||||
m_baseScoringParameters.m_languageWeights[i] = 1.0;
|
||||
m_baseScoringParameters.m_languageWeights[langUnknown] = m_unknownLangWeight; //backward compatible. questionable behaviour
|
||||
m_baseScoringParameters.m_languageWeights[m_queryLangId] = m_sameLangWeight;
|
||||
if(m_baseScoringParameters.allLanguageWeightsAreTheSame()) {
|
||||
//query language server is unavailable.
|
||||
for(int i=0; i<64; i++)
|
||||
m_baseScoringParameters.m_languageWeights[i] = 1.0;
|
||||
m_baseScoringParameters.m_languageWeights[langUnknown] = m_unknownLangWeight; //backward compatible. questionable behaviour
|
||||
m_baseScoringParameters.m_languageWeights[m_queryLangId] = m_sameLangWeight;
|
||||
}
|
||||
//else: user specified language weights explicitly
|
||||
}
|
||||
|
||||
int32_t maxQueryTerms = cr->m_maxQueryTerms;
|
||||
|
Reference in New Issue
Block a user