Merge branch 'master' into dev-siteinfo

This commit is contained in:
Ai Lin Chia
2018-05-25 11:34:47 +02:00
5 changed files with 58 additions and 12 deletions

@ -70,3 +70,11 @@ void BaseScoringParameters::traceToLog(const char *prefix) {
log(LOG_TRACE,"%s: m_flagRankAdjustment[%d]=%d",prefix,i,m_flagRankAdjustment[i]);
}
}
bool BaseScoringParameters::allLanguageWeightsAreTheSame() const {
for(unsigned i=0; i<64; i++)
if(m_languageWeights[i]!=m_languageWeights[0])
return false;
return true;
}

@ -40,6 +40,7 @@ struct BaseScoringParameters {
BaseScoringParameters() { clear(); }
void clear();
void traceToLog(const char *prefix);
bool allLanguageWeightsAreTheSame() const;
};
#endif

@ -345,12 +345,28 @@ bool sendPageResults ( TcpSocket *s , HttpRequest *hr ) {
tld_hint, strlen(tld_hint), true);
// Then try the external language detection server
if (g_queryLanguage.getLanguage(st, gotQueryLanguageWrapper, fx_qlang, fx_blang, fx_country, fx_fetld, query)) {
// blocked
return false;
//We can't use st->m_si.m_baseScoringParameters.allLanguageWeightsAreTheSame() because the cgi parameters have not set the members in SearchInput yet.
//So we have to check if any CGI parameter using the "lw_" prefix has been specified.
bool any_specific_language_weight_specified = false;
for(int32_t i = 0; i < hr->getNumFields(); i++) {
if(!hr->getValue(i))
continue;
const char *full_field_name = hr->getField(i);
if(strncmp(full_field_name,"lw_",3)==0) {
any_specific_language_weight_specified = true;
break;
}
}
if(!any_specific_language_weight_specified) {
// Then try the external language detection server
if (g_queryLanguage.getLanguage(st, gotQueryLanguageWrapper, fx_qlang, fx_blang, fx_country, fx_fetld, query)) {
// blocked
return false;
}
} else
return gotQueryLanguage(st, {});
log(LOG_DEBUG,"Explicit language weight specified. Not contacting external language detection server");
return gotQueryLanguage(st, {});
}
static void gotQueryLanguageWrapper(void *state, const std::vector<std::pair<lang_t, double>> &language_weights) {

@ -4016,7 +4016,7 @@ void Parms::init ( ) {
"set in the search controls and is usually something like "
"20.0. Which means that we multiply a result's score by 20 "
"if from the same language as the query or the language is "
"unknown.";
"unknown. Has no effect if any of the per-language weights have been specified.";
simple_m_set(SearchInput,m_sameLangWeight);
m->m_defOff= offsetof(CollectionRec,m_sameLangWeight);
m->m_cgi = "langw";
@ -4028,7 +4028,7 @@ void Parms::init ( ) {
m->m_desc = "Use this to override the default uknown language weight "
"for this collection. We multiply a result's score by this value "
"if the user requested a specific language, but the language of the "
"indexed page could not be determined.";
"indexed page could not be determined. Has no effect if any of the per-language weights have been specified.";
simple_m_set(SearchInput,m_unknownLangWeight);
m->m_defOff= offsetof(CollectionRec,m_unknownLangWeight);
m->m_cgi = "ulangw";
@ -4036,6 +4036,23 @@ void Parms::init ( ) {
m->m_page = PAGE_RESULTS;
m++;
for(int i=0; i<MAX_LANGUAGES; i++) {
static char title[MAX_LANGUAGES][64];
sprintf(title[i],"Language weight for %s", getLanguageString(i));
static char cgi[MAX_LANGUAGES][64];
sprintf(cgi[i],"lw_%s", getLanguageAbbr(i)); //note: sendPageResults() relies on this
m->m_title = title[i];
m->m_desc = "";
m->m_obj = OBJ_SI;
m->m_off = offsetof(SearchInput,m_baseScoringParameters.m_languageWeights) + sizeof(float)*i;
m->m_type = TYPE_FLOAT;
m->m_def = "1.0";
m->m_cgi = cgi[i];
m->m_flags = PF_API;
m->m_page = PAGE_RESULTS;
m++;
}
m->m_title = "site-rank multiplier";
m->m_desc = "formula: score = (siterank*multiplier)+1";
simple_m_set(SearchInput,m_baseScoringParameters.m_siteRankMultiplier);

@ -262,16 +262,20 @@ bool SearchInput::set(TcpSocket *sock, HttpRequest *r, lang_t primaryQueryLangua
log(LOG_INFO, "query: using primary query lang of %s", getLanguageAbbr(m_queryLangId));
if(!language_weights.empty()) {
//external language detection server had results
for(int i=0; i<64; i++)
m_baseScoringParameters.m_languageWeights[i] = 0.01; //server returns weights in the range [0..1]
for(const auto &e : language_weights)
m_baseScoringParameters.m_languageWeights[e.first] = e.second;
} else {
//query language server is unavailable.
for(int i=0; i<64; i++)
m_baseScoringParameters.m_languageWeights[i] = 1.0;
m_baseScoringParameters.m_languageWeights[langUnknown] = m_unknownLangWeight; //backward compatible. questionable behaviour
m_baseScoringParameters.m_languageWeights[m_queryLangId] = m_sameLangWeight;
if(m_baseScoringParameters.allLanguageWeightsAreTheSame()) {
//query language server is unavailable.
for(int i=0; i<64; i++)
m_baseScoringParameters.m_languageWeights[i] = 1.0;
m_baseScoringParameters.m_languageWeights[langUnknown] = m_unknownLangWeight; //backward compatible. questionable behaviour
m_baseScoringParameters.m_languageWeights[m_queryLangId] = m_sameLangWeight;
}
//else: user specified language weights explicitly
}
int32_t maxQueryTerms = cr->m_maxQueryTerms;