Merge branch 'master' into dev-dumpthread

2017-05-01 14:45:20 +02:00
parent 935c8fae54 81f3fb8234
commit 6fc926e67d
35 changed files with 574 additions and 672 deletions
--- a/Collectiondb.cpp
+++ b/Collectiondb.cpp
@ -1021,6 +1021,7 @@ CollectionRec::CollectionRec() {
 	m_summDedupNumLines = 0;
 	m_maxQueryTerms = 0;
 	m_sameLangWeight = 0.0;
+	m_unknownLangWeight = 0.0;
 	memset(m_defaultSortLanguage2, 0, sizeof(m_defaultSortLanguage2));
 	m_importEnabled = false;
 	m_numImportInjects = 0;
--- a/Collectiondb.h
+++ b/Collectiondb.h
@ -240,6 +240,7 @@ public:

 	//ranking settings
 	float m_sameLangWeight;
+	float m_unknownLangWeight;

 	// Language stuff
 	char 			m_defaultSortLanguage2[6];
--- a/Conf.cpp
+++ b/Conf.cpp
@ -131,6 +131,8 @@ Conf::Conf ( ) {
 	m_hashGroupWeightInUrl = 0.0;
 	m_hashGroupWeightInMenu = 0.0;
 	m_synonymWeight = 0.0;
+	m_pageTemperatureWeightMin = 0.0;
+	m_pageTemperatureWeightMax = 0.0;
 	m_usePageTemperatureForRanking = true;
 	m_numFlagScoreMultipliers = 26;
 	m_numFlagRankAdjustments = 26;
--- a/Conf.h
+++ b/Conf.h
@ -224,8 +224,9 @@ class Conf {
 	float m_hashGroupWeightInternalLinkText;
 	float m_hashGroupWeightInUrl;
 	float m_hashGroupWeightInMenu;
-	
 	float m_synonymWeight;
+	float m_pageTemperatureWeightMin;
+	float m_pageTemperatureWeightMax;

 	bool m_usePageTemperatureForRanking;

--- a/Hostdb.cpp
+++ b/Hostdb.cpp
@ -683,7 +683,7 @@ createFile:
 		m_hosts[i].m_emailCode = -2;
 		// reset these
 		m_hosts[i].m_pingInfo.m_flags    = 0;
-		m_hosts[i].m_pingInfo.m_cpuUsage = 0.0;
+		m_hosts[i].m_pingInfo.m_unused4 = 0.0;
 		m_hosts[i].m_loadAvg  = 0.0;

 		m_hosts[i].m_lastResponseReceiveTimestamp = 0;
@ -1237,8 +1237,9 @@ Host *Hostdb::getHostWithSpideringEnabled ( uint32_t shardNum ) {

 // if niceness 0 can't pick noquery host/ must pick spider host.
 // if niceness 1 can't pick nospider host/ must pick query host.
+// Used to select based on PingInfo::m_udpSlotsInUseIncoming but that information is not exchanged often enough to
+// be even remotely accurate with any realistic number of shards.
 Host *Hostdb::getLeastLoadedInShard ( uint32_t shardNum , char niceness ) {
-	int32_t minOutstandingRequests = 0x7fffffff;
 	int32_t minOutstandingRequestsIndex = -1;
 	Host *shard = getShard ( shardNum );
 	Host *bestDead = NULL;
@ -1251,13 +1252,7 @@ Host *Hostdb::getLeastLoadedInShard ( uint32_t shardNum , char niceness ) {
 		if ( niceness == 0 && ! hh->m_queryEnabled  ) continue;
 		if ( ! bestDead ) bestDead = hh;
 		if(isDead(hh)) continue;
-		// log("host %" PRId32 " numOutstanding is %" PRId32, hh->m_hostId, 
-		// 	hh->m_pingInfo.m_udpSlotsInUseIncoming);
-		if ( hh->m_pingInfo.m_udpSlotsInUseIncoming > 
-		     minOutstandingRequests )
-			continue;

-		minOutstandingRequests =hh->m_pingInfo.m_udpSlotsInUseIncoming;
 		minOutstandingRequestsIndex = i;
 	}
 	// we should never return a nospider/noquery host depending on
@ -1374,7 +1369,7 @@ bool Hostdb::replaceHost ( int32_t origHostId, int32_t spareHostId ) {
 	oldHost->m_ping                = g_conf.m_deadHostTimeout;
 	oldHost->m_pingShotgun         = g_conf.m_deadHostTimeout;
 	oldHost->m_emailCode           = 0;
-	oldHost->m_pingInfo.m_udpSlotsInUseIncoming = 0;
+	oldHost->m_pingInfo.m_unused12 = 0;
 	oldHost->m_errorReplies        = 0;
 	oldHost->m_dgramsTo            = 0;
 	oldHost->m_dgramsFrom          = 0;
@ -1431,27 +1426,27 @@ void Hostdb::updatePingInfo(Host *h, const PingInfo &pi) {

 	h->m_pingInfo.m_unused0 = 0;
 	h->m_pingInfo.m_hostId = pi.m_hostId;
-	h->m_pingInfo.m_loadAvg = pi.m_loadAvg;
-	h->m_pingInfo.m_percentMemUsed = pi.m_percentMemUsed;
-	h->m_pingInfo.m_cpuUsage = pi.m_cpuUsage;
+	h->m_pingInfo.m_unused2 = 0;
+	h->m_pingInfo.m_unused3 = 0;
+	h->m_pingInfo.m_unused4 = 0.0;
 	h->m_pingInfo.m_totalDocsIndexed = pi.m_totalDocsIndexed;
 	h->m_pingInfo.m_hostsConfCRC = pi.m_hostsConfCRC;
-	h->m_pingInfo.m_diskUsage = pi.m_diskUsage;
+	h->m_pingInfo.m_unused7 = 0.0;
 	h->m_pingInfo.m_flags = pi.m_flags;
-	h->m_pingInfo.m_numCorruptDiskReads = pi.m_numCorruptDiskReads;
-	h->m_pingInfo.m_numOutOfMems = pi.m_numOutOfMems;
-	h->m_pingInfo.m_socketsClosedFromHittingLimit = pi.m_socketsClosedFromHittingLimit;
+	h->m_pingInfo.m_unused9 = 0;
+	h->m_pingInfo.m_unused10 = 0;
+	h->m_pingInfo.m_unused11 = 0;
 	//m_totalResends is updated direclty by UdpSlot
 	//h->m_pingInfo.m_totalResends = pi.m_totalResends;
 	//m_etryagains is updated directly by UdpServer
 	//h->m_pingInfo.m_etryagains = pi.m_etryagains;
-	h->m_pingInfo.m_udpSlotsInUseIncoming = pi.m_udpSlotsInUseIncoming;
-	h->m_pingInfo.m_tcpSocketsInUse = pi.m_tcpSocketsInUse;
-	h->m_pingInfo.m_currentSpiders = pi.m_currentSpiders;
+	h->m_pingInfo.m_unused12 = 0;
+	h->m_pingInfo.m_unused13 = 0;
+	h->m_pingInfo.m_unused14 = 0;
 	h->m_pingInfo.m_dailyMergeCollnum = pi.m_dailyMergeCollnum;
 	memcpy(h->m_pingInfo.m_gbVersionStr,pi.m_gbVersionStr,sizeof(pi.m_gbVersionStr));
 	h->m_pingInfo.m_repairMode = pi.m_repairMode;
-	h->m_pingInfo.m_recoveryLevel = pi.m_recoveryLevel;
+	h->m_pingInfo.m_unused18 = 0;
 }


@ -1750,15 +1745,14 @@ int32_t *getLocalIps ( ) {
 		log("hostdb: getifaddrs: %s.",mstrerror(errno));
 		return NULL;
 	}
-	ifaddrs *p = ifap;
 	int32_t ni = 0;
 	// store loopback just in case
 	int32_t loopback = atoip("127.0.0.1");
 	s_localIps[ni++] = loopback;
-	for ( ; p && ni < 18 ; p = p->ifa_next ) {
-		// avoid possible core dump
+	for(ifaddrs *p = ifap; p && ni < 18 ; p = p->ifa_next) {
 		if ( ! p->ifa_addr ) continue;
-		//break; // mdw hack...
+		if(p->ifa_addr->sa_family != AF_INET)
+			continue;
 		struct sockaddr_in *xx = (sockaddr_in *)(void*)p->ifa_addr;
 		int32_t ip = xx->sin_addr.s_addr;
 		// skip if loopback we stored above
--- a/Hostdb.h
+++ b/Hostdb.h
@ -45,27 +45,27 @@ class PingInfo {
 public:
 	int64_t m_unused0; //used to be a timestamp for clock synchronization
 	int32_t m_hostId;
-	int32_t m_loadAvg;
-	float m_percentMemUsed;
-	float m_cpuUsage;
+	int32_t m_unused2; //used for the m_loadAvg
+	float m_unused3; //used to me m_percentMemUsed;
+	float m_unused4; //used to be m_cpuUsage
 	int32_t m_totalDocsIndexed;
 	int32_t m_hostsConfCRC;
-	float m_diskUsage;
+	float m_unused7; //used to be m_diskUsage
 	int32_t m_flags;
 	// some new stuff
-	int32_t m_numCorruptDiskReads;
-	int32_t m_numOutOfMems;
-	int32_t m_socketsClosedFromHittingLimit;
+	int32_t m_unused9;
+	int32_t m_unused10;
+	int32_t m_unused11;

-	int32_t m_udpSlotsInUseIncoming;
-	int32_t m_tcpSocketsInUse;
+	int32_t m_unused12;
+	int32_t m_unused13;

-	int16_t m_currentSpiders;
+	int16_t m_unused14;
 	collnum_t m_dailyMergeCollnum;

 	char m_gbVersionStr[21];
 	char m_repairMode;
-	uint8_t m_recoveryLevel;
+	uint8_t m_unused18;
 };

 class Host {
--- a/HttpServer.cpp
+++ b/HttpServer.cpp
@ -7,6 +7,7 @@
 #include "Collectiondb.h"
 #include "HashTable.h"
 #include "Stats.h"
+#include "Statistics.h"
 #include "HttpMime.h"
 #include "Hostdb.h"
 #include "Loop.h"
@ -521,7 +522,7 @@ void HttpServer::requestHandler ( TcpSocket *s ) {
 		sendErrorReply ( s , 500 , "Too many sockets open."); 
 		// count as a failed query so we send an email alert if too
 		// many of these happen
-		g_stats.m_closedSockets++;
+		Statistics::register_socket_limit_hit();
 		return; 
 	}

--- a/Msg39.cpp
+++ b/Msg39.cpp
@ -49,8 +49,15 @@ void Msg39Request::reset() {
 	m_collnum                 = -1;
 	m_useQueryStopWords       = true;
 	m_doMaxScoreAlgo          = true;
+	m_termFreqWeightFreqMin = 0.0;
+	m_termFreqWeightFreqMax = 0.5;
+	m_termFreqWeightMin = 0.5;
+	m_termFreqWeightMax = 1.0;
 	m_synonymWeight           = 0.9;
+	m_pageTemperatureWeightMin = 1.0;
+	m_pageTemperatureWeightMax = 20.0;
 	m_usePageTemperatureForRanking = true;
+
 	for(int i=0; i<26; i++)
 		m_flagScoreMultiplier[i] = 1.0;
 	for(int i=0; i<26; i++)
@ -61,6 +68,7 @@ void Msg39Request::reset() {
 	size_query                = 0;
 	size_whiteList            = 0;
 	m_sameLangWeight          = 20.0;
+	m_unknownLangWeight       = 10.0;

 	// -1 means to not to docid range restriction
 	m_minDocId = -1LL;
--- a/Msg39.h
+++ b/Msg39.h
@ -37,6 +37,7 @@ class Msg39Request {
 	int32_t    m_maxQueryTerms;
 	int32_t    m_numDocIdSplits;
 	float   m_sameLangWeight;
+	float	m_unknownLangWeight;

 	//int32_t    m_compoundListMaxSize;
 	uint8_t m_language;
@ -58,7 +59,13 @@ class Msg39Request {
 	bool    m_doMaxScoreAlgo;

 	ScoringWeights m_scoringWeights;
+	float m_termFreqWeightFreqMin;
+	float m_termFreqWeightFreqMax;
+	float m_termFreqWeightMin;
+	float m_termFreqWeightMax;
 	float   m_synonymWeight;
+	float	m_pageTemperatureWeightMin;
+	float	m_pageTemperatureWeightMax;
 	bool    m_usePageTemperatureForRanking;

 	float m_flagScoreMultiplier[26];
--- a/Msg3a.cpp
+++ b/Msg3a.cpp
@ -200,7 +200,8 @@ bool Msg3a::getDocIds(const SearchInput *si, Query *q, void *state,
 		     (PTRTYPE)this);
 	}

-	setTermFreqWeights(m_msg39req.m_collnum, m_q);
+	setTermFreqWeights(m_msg39req.m_collnum, m_q, m_msg39req.m_termFreqWeightFreqMin, m_msg39req.m_termFreqWeightFreqMax,
+		m_msg39req.m_termFreqWeightMin,	m_msg39req.m_termFreqWeightMax);

 	if ( m_debug ) {
 		for ( int32_t i = 0 ; i < m_q->m_numTerms ; i++ ) {
@ -1005,15 +1006,15 @@ void Msg3a::printTerms ( ) {
 }


-static float getTermFreqWeight(int64_t termFreq, int64_t numDocsInColl) {
+static float getTermFreqWeight(int64_t termFreq, int64_t numDocsInColl, float termFreqWeightFreqMin, float termFreqWeightFreqMax, float termFreqWeightMin, float termFreqWeightMax) {
 	if(numDocsInColl>0)
-		return scale_linear(((float)termFreq)/numDocsInColl, g_conf.m_termFreqWeightFreqMin, g_conf.m_termFreqWeightFreqMax, g_conf.m_termFreqWeightMax, g_conf.m_termFreqWeightMin);
+		return scale_linear(((float)termFreq)/numDocsInColl, termFreqWeightFreqMin, termFreqWeightFreqMax, termFreqWeightMax, termFreqWeightMin);
 	else
 		return 1.0; //whatever...
 }


-void setTermFreqWeights ( collnum_t collnum , Query *q ) {
+void setTermFreqWeights ( collnum_t collnum , Query *q, float termFreqWeightFreqMin, float termFreqWeightFreqMax, float termFreqWeightMin, float termFreqWeightMax) {
 	int64_t numDocsInColl = 0;
 	RdbBase *base = getRdbBase ( RDB_CLUSTERDB, collnum );
 	if ( base ) numDocsInColl = base->estimateNumGlobalRecs();
@ -1032,7 +1033,7 @@ void setTermFreqWeights ( collnum_t collnum , Query *q ) {
 		// GET THE TERMFREQ for setting weights
 		int64_t tf = g_posdb.getTermFreq ( collnum ,qt->m_termId);
 		qt->m_termFreq = tf;
-		float tfw = getTermFreqWeight(tf,numDocsInColl);
+		float tfw = getTermFreqWeight(tf,numDocsInColl, termFreqWeightFreqMin, termFreqWeightFreqMax, termFreqWeightMin, termFreqWeightMax);
 		qt->m_termFreqWeight = tfw;
 	}
 }
--- a/Msg3a.h
+++ b/Msg3a.h
@ -7,7 +7,7 @@
 class SearchInput;
 class Query;

-void setTermFreqWeights ( collnum_t collnum, class Query *q );
+void setTermFreqWeights ( collnum_t collnum , Query *q, float termFreqWeightFreqMin, float termFreqWeightFreqMax, float termFreqWeightMin, float termFreqWeightMax);

 #define MAX_SHARDS 1024

--- a/Msg40.cpp
+++ b/Msg40.cpp
@ -344,7 +344,16 @@ bool Msg40::federatedLoop ( ) {
 				 m_si->m_hashGroupWeightInternalLinkText,
 				 m_si->m_hashGroupWeightInUrl,
 				 m_si->m_hashGroupWeightInMenu);
+
+	mr.m_termFreqWeightFreqMin = m_si->m_termFreqWeightFreqMin;
+	mr.m_termFreqWeightFreqMax = m_si->m_termFreqWeightFreqMax;
+	mr.m_termFreqWeightMin = m_si->m_termFreqWeightMin;
+	mr.m_termFreqWeightMax = m_si->m_termFreqWeightMax;
+
 	mr.m_synonymWeight             = m_si->m_synonymWeight;
+	mr.m_pageTemperatureWeightMin = m_si->m_pageTemperatureWeightMin;
+	mr.m_pageTemperatureWeightMax = m_si->m_pageTemperatureWeightMax;
+
 	mr.m_usePageTemperatureForRanking = m_si->m_usePageTemperatureForRanking;
 	memcpy(mr.m_flagScoreMultiplier, m_si->m_flagScoreMultiplier, sizeof(mr.m_flagScoreMultiplier));
 	memcpy(mr.m_flagRankAdjustment, m_si->m_flagRankAdjustment, sizeof(mr.m_flagRankAdjustment));
@ -364,6 +373,7 @@ bool Msg40::federatedLoop ( ) {
 	mr.m_minSerpDocId              = m_si->m_minSerpDocId;
 	mr.m_maxSerpScore              = m_si->m_maxSerpScore;
 	mr.m_sameLangWeight            = m_si->m_sameLangWeight;
+	mr.m_unknownLangWeight = m_si->m_unknownLangWeight;
 	memcpy(mr.m_queryId, m_si->m_queryId, sizeof(m_si->m_queryId));

 	if ( mr.m_timeout < m_si->m_minMsg3aTimeout )
@ -1494,6 +1504,14 @@ bool Msg40::gotSummary ( ) {
 			continue;
 		}

+		// filter simplified redirection/non-caconical document
+		if (mr && mr->size_rubuf > 1 && mr->m_contentLen == 0) {
+			if (!m_si->m_showErrors) {
+				*level = CR_EMPTY_REDIRECTION_PAGE;
+				continue;
+			}
+		}
+
 		// filter empty title & summaries
 		if ( mr && mr->size_tbuf <= 1 && mr->size_displaySum <= 1 ) {
 			if ( ! m_si->m_showErrors ) {
--- a/Msg51.cpp
+++ b/Msg51.cpp
@ -34,8 +34,8 @@ const char * const g_crStrings[] = {
 	"summary error"          ,
 	"duplicate"              ,
 	"clusterdb error (subcount of visible)" ,
-        "duplicate url",
-	"wasted summary lookup"  ,
+	"duplicate url",
+	"empty redirection page" ,
 	"visible"                ,
 	"blacklisted"            ,
 	"ruleset filtered"       ,
--- a/Msg51.h
+++ b/Msg51.h
@ -48,9 +48,8 @@ enum {
 	CR_ERROR_CLUSTERDB ,
 	// the url is a dup of a previous url (wiki pages capitalization)
 	CR_DUP_URL         ,
-	// . subset of the CR_OK (visible) results are "wasted" titlerec lookup
-	// . only used for stats by Msg40.cpp/Stats.cpp
-	CR_WASTED          ,
+	// the url doesn't have any content due to simplified redirection page/non-caconical page
+	CR_EMPTY_REDIRECTION_PAGE,
 	// the docid is ok to display!
 	CR_OK              ,
 	// from a blacklisted site hash
--- a/PageHosts.cpp
+++ b/PageHosts.cpp
@ -21,9 +21,6 @@ static int errorsSort     ( const void *i1, const void *i2 );
 static int tryagainSort   ( const void *i1, const void *i2 );
 static int dgramsToSort   ( const void *i1, const void *i2 );
 static int dgramsFromSort ( const void *i1, const void *i2 );
-static int memUsedSort    ( const void *i1, const void *i2 );
-static int cpuUsageSort   ( const void *i1, const void *i2 );
-static int diskUsageSort  ( const void *i1, const void *i2 );

 static int32_t generatePingMsg( Host *h, int64_t nowms, char *buffer );

@ -156,15 +153,6 @@ skipReplaceHost:

 			       "<td><b>docs indexed</a></td>"

-			       "<td><a href=\"/admin/hosts?c=%s&sort=9\">"
-			       "<b>mem used</a></td>"
-
-			       "<td><a href=\"/admin/hosts?c=%s&sort=10\">"
-			       "<b>cpu used</b></a></td>"
-
-			       "<td><a href=\"/admin/hosts?c=%s&sort=17\">"
-			       "<b>disk used</b></a></td>"
-
 			       "<td><a href=\"/admin/hosts?c=%s&sort=14\">"
 			       "<b>max ping1</b></a></td>"

@ -190,9 +178,6 @@ skipReplaceHost:
 			       cs,
 			       cs,
 			       cs,
-			       cs,
-			       cs,
-			       cs,
 			       shotcol    );

 	// loop through each host we know and print it's stats
@ -225,15 +210,12 @@ skipReplaceHost:
 	case 6: gbsort ( hostSort, nh, sizeof(int32_t), dgramsToSort   ); break;
 	case 7: gbsort ( hostSort, nh, sizeof(int32_t), dgramsFromSort ); break;
 	//case 8:
-	case 9: gbsort ( hostSort, nh, sizeof(int32_t), memUsedSort    ); break;
-	case 10:gbsort ( hostSort, nh, sizeof(int32_t), cpuUsageSort   ); break;
 	case 11:gbsort ( hostSort, nh, sizeof(int32_t), pingAgeSort    ); break;
 	case 12:gbsort ( hostSort, nh, sizeof(int32_t), flagSort       ); break;
 	case 13:gbsort ( hostSort, nh, sizeof(int32_t), splitTimeSort  ); break;
 	case 14:gbsort ( hostSort, nh, sizeof(int32_t), pingMaxSort    ); break;
 	//case 15:
 	case 16:gbsort ( hostSort, nh, sizeof(int32_t), defaultSort    ); break;
-	case 17:gbsort ( hostSort, nh, sizeof(int32_t), diskUsageSort   ); break;

 	}

@ -309,27 +291,6 @@ skipReplaceHost:
 		char ipbuf3[64];
 		strcpy(ipbuf3,iptoa(eip));

-		const char *fontTagFront = "";
-		const char *fontTagBack  = "";
-		if ( h->m_pingInfo.m_percentMemUsed >= 98.0 && 
-		     format == FORMAT_HTML ) {
-			fontTagFront = "<font color=red>";
-			fontTagBack  = "</font>";
-		}
-
-		float cpu = h->m_pingInfo.m_cpuUsage;
-		if ( cpu > 100.0 ) cpu = 100.0;
-		if ( cpu < 0.0   ) cpu = -1.0;
-
-		char diskUsageMsg[64];
-		sprintf(diskUsageMsg,"%.1f%%",h->m_pingInfo.m_diskUsage);
-		if ( h->m_pingInfo.m_diskUsage < 0.0 )
-			sprintf(diskUsageMsg,"???");
-		if ( h->m_pingInfo.m_diskUsage>=98.0 && format == FORMAT_HTML )
-			sprintf(diskUsageMsg,"<font color=red><b>%.1f%%"
-				"</b></font>",h->m_pingInfo.m_diskUsage);
-
-
 		// split time, don't divide by zero!
 		int32_t splitTime = 0;
 		if ( h->m_splitsDone ) 
@ -355,42 +316,10 @@ skipReplaceHost:
 		int32_t flags = h->m_pingInfo.m_flags;


-		if ( format == FORMAT_HTML ) {
-			// use these new ones for now
-			int n = h->m_pingInfo.m_numCorruptDiskReads;
-			if ( n )
-				fb.safePrintf("<font color=red><b>"
-					      "C"
-					      "<sup>%" PRId32"</sup>"
-					      "</b></font>"
-					      , n );
-			n = h->m_pingInfo.m_numOutOfMems;
-			if ( n )
-				fb.safePrintf("<font color=red><b>"
-					      "O"
-					      "<sup>%" PRId32"</sup>"
-					      "</b></font>"
-					      , n );
-			n = h->m_pingInfo.m_socketsClosedFromHittingLimit;
-			if ( n )
-				fb.safePrintf("<font color=red><b>"
-					      "K"
-					      "<sup>%" PRId32"</sup>"
-					      "</b></font>"
-					      , n );
-		}
-
 		// recovery mode? reocvered from coring?
 		if ((flags & PFLAG_RECOVERYMODE)&& format == FORMAT_HTML ) {
 			fb.safePrintf("<b title=\"Recovered from core"
 				      "\">x</b>");
-			// this is only 8-bits at the moment so it's capped
-			// at 255. this level is 1 the first time we core
-			// and are restarted.
-			if ( h->m_pingInfo.m_recoveryLevel > 1 )
-			fb.safePrintf("<sup>%" PRId32"</sup>",
-				      (int32_t)
-				      h->m_pingInfo.m_recoveryLevel);
 		}

 		if ((flags & PFLAG_RECOVERYMODE)&& format != FORMAT_HTML )
@ -416,59 +345,7 @@ skipReplaceHost:

 		// if it has spiders going on say "S" with # as the superscript
 		if ((flags & PFLAG_HASSPIDERS) && format == FORMAT_HTML )
-			fb.safePrintf ( "<span title=\"Spidering\">S"
-					"<sup>%" PRId32"</sup>"
-					"</span>"
-					,h->m_pingInfo.m_currentSpiders
-					);
-
-		if ( format == FORMAT_HTML && 
-		     h->m_pingInfo.m_udpSlotsInUseIncoming ) {
-			const char *f1 = "";
-			const char *f2 = "";
-			// MAXUDPSLOTS in Spider.cpp is 300 right now
-			if ( h->m_pingInfo.m_udpSlotsInUseIncoming >= 300 ) {
-				f1 = "<b>";
-				f2 = "</b>";
-			}
-			if ( h->m_pingInfo.m_udpSlotsInUseIncoming >= 400 ) {
-				f1 = "<b><font color=red>";
-				f2 = "</font></b>";
-			}
-			fb.safePrintf("<span title=\"udpSlotsInUse\">"
-				      "%s"
-				      "U"
-				      "<sup>%" PRId32"</sup>"
-				      "%s"
-				      "</span>"
-				      ,f1
-				      ,h->m_pingInfo.m_udpSlotsInUseIncoming
-				      ,f2
-				      );
-		}
-
-		if ( format == FORMAT_HTML && h->m_pingInfo.m_tcpSocketsInUse){
-			const char *f1 = "";
-			const char *f2 = "";
-			if ( h->m_pingInfo.m_tcpSocketsInUse >= 100 ) {
-				f1 = "<b>";
-				f2 = "</b>";
-			}
-			if ( h->m_pingInfo.m_tcpSocketsInUse >= 200 ) {
-				f1 = "<b><font color=red>";
-				f2 = "</font></b>";
-			}
-			fb.safePrintf("<span title=\"tcpSocketsInUse\">"
-				      "%s"
-				      "T"
-				      "<sup>%" PRId32"</sup>"
-				      "%s"
-				      "</span>"
-				      ,f1
-				      ,h->m_pingInfo.m_tcpSocketsInUse
-				      ,f2
-				      );
-		}
+			fb.safePrintf ( "<span title=\"Spidering\">S</span>");

 		if ((flags & PFLAG_HASSPIDERS) && format != FORMAT_HTML )
 			fb.safePrintf ( "Spidering");
@ -556,14 +433,6 @@ skipReplaceHost:
 				      "</errorTryAgains>\n",
 				      h->m_etryagains.load());

-			sb.safePrintf("\t\t<udpSlotsInUse>%" PRId32
-				      "</udpSlotsInUse>\n",
-				      h->m_pingInfo.m_udpSlotsInUseIncoming);
-
-			sb.safePrintf("\t\t<tcpSocketsInUse>%" PRId32
-				      "</tcpSocketsInUse>\n",
-				      h->m_pingInfo.m_tcpSocketsInUse);
-
 			/*
 			sb.safePrintf("\t\t<dgramsTo>%" PRId64"</dgramsTo>\n",
 				      h->m_dgramsTo);
@ -571,21 +440,6 @@ skipReplaceHost:
 				      h->m_dgramsFrom);
 			*/

-			sb.safePrintf("\t\t<numCorruptDiskReads>%" PRId32
-				      "</numCorruptDiskReads>\n"
-				      ,h->m_pingInfo.m_numCorruptDiskReads);
-			sb.safePrintf("\t\t<numOutOfMems>%" PRId32
-				      "</numOutOfMems>\n"
-				      ,h->m_pingInfo.m_numOutOfMems);
-			sb.safePrintf("\t\t<numClosedSockets>%" PRId32
-				      "</numClosedSockets>\n"
-				      ,h->m_pingInfo.
-				      m_socketsClosedFromHittingLimit);
-			sb.safePrintf("\t\t<numOutstandingSpiders>%" PRId32
-				      "</numOutstandingSpiders>\n"
-				      ,h->m_pingInfo.m_currentSpiders );
-
-
 			sb.safePrintf("\t\t<splitTime>%" PRId32"</splitTime>\n",
 				      splitTime);
 			sb.safePrintf("\t\t<splitsDone>%" PRId32"</splitsDone>\n",
@ -598,18 +452,6 @@ skipReplaceHost:
 				      "</docsIndexed>\n",
 				      h->m_pingInfo.m_totalDocsIndexed);

-			sb.safePrintf("\t\t<percentMemUsed>%.1f%%"
-				      "</percentMemUsed>",
-				      h->m_pingInfo.m_percentMemUsed); // float
-
-			sb.safePrintf("\t\t<cpuUsage>%.1f%%"
-				      "</cpuUsage>",
-				      cpu );
-
-			sb.safePrintf("\t\t<percentDiskUsed><![CDATA[%s]]>"
-				      "</percentDiskUsed>",
-				      diskUsageMsg);
-
 			sb.safePrintf("\t\t<maxPing1>%s</maxPing1>\n",
 				      pms );

@ -671,10 +513,6 @@ skipReplaceHost:
 			*/
 			sb.safePrintf("\t\t\t\t\"errorTryAgains\":%" PRId32",\n",
 				      h->m_etryagains.load());
-			sb.safePrintf("\t\t\t\t\"udpSlotsInUse\":%" PRId32",\n",
-				      h->m_pingInfo.m_udpSlotsInUseIncoming);
-			sb.safePrintf("\t\t\t\t\"tcpSocketsInUse\":%" PRId32",\n",
-				      h->m_pingInfo.m_tcpSocketsInUse);

 			/*
 			sb.safePrintf("\t\t\t\t\"dgramsTo\":%" PRId64",\n",
@ -684,18 +522,6 @@ skipReplaceHost:
 			*/


-			sb.safePrintf("\t\t\t\t\"numCorruptDiskReads\":%" PRId32",\n"
-				      ,h->m_pingInfo.m_numCorruptDiskReads);
-			sb.safePrintf("\t\t\t\t\"numOutOfMems\":%" PRId32",\n"
-				      ,h->m_pingInfo.m_numOutOfMems);
-			sb.safePrintf("\t\t\t\t\"numClosedSockets\":%" PRId32",\n"
-				      ,h->m_pingInfo.
-				      m_socketsClosedFromHittingLimit);
-			sb.safePrintf("\t\t\t\t\"numOutstandingSpiders\":%" PRId32
-				      ",\n"
-				      ,h->m_pingInfo.m_currentSpiders );
-
-
 			sb.safePrintf("\t\t\t\t\"splitTime\":%" PRId32",\n",
 				      splitTime);
 			sb.safePrintf("\t\t\t\t\"splitsDone\":%" PRId32",\n",
@ -707,14 +533,6 @@ skipReplaceHost:
 			sb.safePrintf("\t\t\t\t\"docsIndexed\":%" PRId32",\n",
 				      h->m_pingInfo.m_totalDocsIndexed);

-			sb.safePrintf("\t\t\t\t\"percentMemUsed\":\"%.1f%%\",\n",
-				      h->m_pingInfo.m_percentMemUsed); // float
-
-			sb.safePrintf("\t\t\t\t\"cpuUsage\":\"%.1f%%\",\n",cpu);
-
-			sb.safePrintf("\t\t\t\t\"percentDiskUsed\":\"%s\",\n",
-				      diskUsageMsg);
-
 			sb.safePrintf("\t\t\t\t\"maxPing1\":\"%s\",\n",pms);

 			sb.safePrintf("\t\t\t\t\"maxPingAge1\":\"%" PRId32"ms\",\n",
@ -799,13 +617,6 @@ skipReplaceHost:
 			  // docs indexed
 			  "<td>%" PRId32"</td>"

-			  // percent mem used
-			  "<td>%s%.1f%%%s</td>"
-			  // cpu usage
-			  "<td>%.1f%%</td>"
-			  // disk usage
-			  "<td>%s</td>"
-
 			  // ping max
 			  "<td>%s</td>"

@ -844,12 +655,6 @@ skipReplaceHost:

 			  h->m_pingInfo.m_totalDocsIndexed,

-			  fontTagFront,
-			  h->m_pingInfo.m_percentMemUsed, // float
-			  fontTagBack,
-			  cpu, // float
-			  diskUsageMsg,
-
 			  // ping max
 			  pms,
 			  // ping age
@ -889,13 +694,6 @@ skipReplaceHost:
 	// end the table now
 	sb.safePrintf ( "</table><br>\n" );

-sb.safePrintf("<table>");
-for(int i=0; i<nh; i++) {
-	Host *h = g_hostdb.getHost(hostSort[i]);
-	sb.safePrintf("<tr><td>%lu</t><td>%lu</td></tr>", h->getLastRequestSendTimestamp(), h->getLastResponseReceiveTimestamp());
-}
-sb.safePrintf("</table>");
-	

 	if( g_hostdb.m_numSpareHosts ) {
 		// print spare hosts table
@ -1341,33 +1139,3 @@ int dgramsFromSort ( const void *i1, const void *i2 ) {
 	if ( h1->m_dgramsFrom < h2->m_dgramsFrom ) return  1;
 	return 0;
 }
-
-int memUsedSort ( const void *i1, const void *i2 ) {
-	Host *h1 = g_hostdb.getHost ( *(int32_t*)i1 );
-	Host *h2 = g_hostdb.getHost ( *(int32_t*)i2 );
-	PingInfo *p1 = &h1->m_pingInfo;
-	PingInfo *p2 = &h2->m_pingInfo;
-	if ( p1->m_percentMemUsed > p2->m_percentMemUsed ) return -1;
-	if ( p1->m_percentMemUsed < p2->m_percentMemUsed ) return  1;
-	return 0;
-}
-
-int cpuUsageSort ( const void *i1, const void *i2 ) {
-	Host *h1 = g_hostdb.getHost ( *(int32_t*)i1 );
-	Host *h2 = g_hostdb.getHost ( *(int32_t*)i2 );
-	PingInfo *p1 = &h1->m_pingInfo;
-	PingInfo *p2 = &h2->m_pingInfo;
-	if ( p1->m_cpuUsage > p2->m_cpuUsage ) return -1;
-	if ( p1->m_cpuUsage < p2->m_cpuUsage ) return  1;
-	return 0;
-}
-
-int diskUsageSort ( const void *i1, const void *i2 ) {
-	Host *h1 = g_hostdb.getHost ( *(int32_t*)i1 );
-	Host *h2 = g_hostdb.getHost ( *(int32_t*)i2 );
-	PingInfo *p1 = &h1->m_pingInfo;
-	PingInfo *p2 = &h2->m_pingInfo;
-	if ( p1->m_diskUsage > p2->m_diskUsage ) return -1;
-	if ( p1->m_diskUsage < p2->m_diskUsage ) return  1;
-	return 0;
-}
--- a/PageResults.cpp
+++ b/PageResults.cpp
@ -2737,7 +2737,7 @@ badformat:
 	if ( scr ) coll = scr->m_coll;

 	if ( si->m_format == FORMAT_HTML && printCached ) {
-		sb->safePrintf ( "<a href=\"/get?q=%s&qlang=%s&c=%s&d=%" PRId64 "&cnsp=0\">cached</a>\n",
+		sb->safePrintf ( "<a href=\"/get?q=%s&qlang=%s&c=%s&d=%" PRId64 "&cnsp=0\">cached</a> - \n",
 			st->m_qesb.getBufStart() ,
 			si->m_defaultSortLang,		// "qlang" parm
 			coll ,
@ -2750,7 +2750,7 @@ badformat:
 	if ( si->m_format == FORMAT_HTML && si->m_getDocIdScoringInfo ) {
 		// place holder for backlink table link
 		placeHolder = sb->length();
-		sb->safePrintf (" - <a onclick="
+		sb->safePrintf ("<a onclick="
 			       "\""
 			       "var e = document.getElementById('bl%" PRId32"');"
 			       "if ( e.style.display == 'none' ){"
@ -2772,7 +2772,7 @@ badformat:
 		placeHolderLen = sb->length() - placeHolder;

 		// unhide the scoring table on click
-		sb->safePrintf (" - <a onclick="
+		sb->safePrintf ("<a onclick="
 			       "\""
 			       "var e = document.getElementById('sc%" PRId32"');"
 			       "if ( e.style.display == 'none' ){"
--- a/PageTemperatureRegistry.cpp
+++ b/PageTemperatureRegistry.cpp
@ -1,10 +1,11 @@
 #include "PageTemperatureRegistry.h"
+#include "ScalingFunctions.h"
 #include "Log.h"
 #include <stdio.h>
 #include <string.h>
 #include <errno.h>
 #include <sys/stat.h>
-
+#include <math.h>

 PageTemperatureRegistry g_pageTemperatureRegistry;

@ -97,13 +98,18 @@ bool PageTemperatureRegistry::load() {

 	temperature_range_for_scaling = max_temperature-min_temperature;
 	
+	min_temperature_log = log(min_temperature);
+	max_temperature_log = log(max_temperature);
+	temperature_range_for_scaling_log = log(temperature_range_for_scaling);
+	default_temperature_log = log(default_temperature);
+
 	if(!using_meta)
 		log(LOG_WARN, "meta-file %s could not be loaded. Using default temperature of %u which can scew results for new pages", meta_filename, default_temperature);
 	
 	log(LOG_DEBUG, "pagetemp: min_temperature=%u",min_temperature);
 	log(LOG_DEBUG, "pagetemp: max_temperature=%u",max_temperature);
 	log(LOG_DEBUG, "pagetemp: default_temperature=%u",default_temperature);
-	
+
 	log(LOG_DEBUG, "%s loaded (%lu items)", filename, (unsigned long)new_entries);
 	return true;
 }
@ -129,11 +135,15 @@ unsigned PageTemperatureRegistry::query_page_temperature_internal(uint64_t docid
 }


-double PageTemperatureRegistry::query_page_temperature(uint64_t docid) const {
+double PageTemperatureRegistry::query_page_temperature(uint64_t docid, double range_min, double range_max) const {
 	if(hash_table_size==0)
-		return 1.0;
-	unsigned temperature_26bit = query_page_temperature_internal(docid);
+		return scale_linear(default_temperature_log, min_temperature_log, max_temperature_log, range_min, range_max);
+
+	double temperature_26bit_log = log((double)query_page_temperature_internal(docid));
 	//Then scale to a number in the rangte [0..1]
 	//It is a bit annoying to do this computation for each lookup but it saves memory
-	return ((double)(temperature_26bit - min_temperature)) / temperature_range_for_scaling;
+//	return ((double)(temperature_26bit - min_temperature)) / temperature_range_for_scaling;
+	return scale_linear(temperature_26bit_log, min_temperature_log, max_temperature_log, range_min, range_max);
 }
+
+
--- a/PageTemperatureRegistry.h
+++ b/PageTemperatureRegistry.h
@ -15,6 +15,12 @@ class PageTemperatureRegistry {
 	unsigned max_temperature;
 	unsigned temperature_range_for_scaling;
 	unsigned default_temperature;
+
+	double min_temperature_log;
+	double max_temperature_log;
+	double temperature_range_for_scaling_log;
+	double default_temperature_log;
+
 	unsigned query_page_temperature_internal(uint64_t docid) const;
 public:
 	PageTemperatureRegistry()
@ -26,7 +32,7 @@ public:
 	bool load();
 	void unload();
 	
-	double query_page_temperature(uint64_t docid) const;
+	double query_page_temperature(uint64_t docid, double range_min, double range_max) const;
 	
 	bool empty() const { return entries==0; }
 };
--- a/Pages.cpp
+++ b/Pages.cpp
@ -2411,25 +2411,6 @@ bool printRedBox ( SafeBuf *mb , TcpSocket *sock , HttpRequest *hr ) {
 		mb->safePrintf("%s",boxEnd);
 	}

-	// out of disk space?
-	int32_t out = 0;
-	for ( int32_t i = 0 ; i < g_hostdb.getNumHosts() ; i++ ) {
-		Host *h = &g_hostdb.m_hosts[i];
-		if ( h->m_pingInfo.m_diskUsage < 98.0 ) continue;
-		out++;
-	}
-	if ( out > 0 ) {
-		if ( adds ) mb->safePrintf("<br>");
-		adds++;
-		const char *s = "s are";
-		if ( out == 1 ) s = " is";
-		mb->safePrintf("%s",box);
-		mb->safePrintf("%" PRId32" host%s over 98%% disk usage. "
-			       "See the <a href=/admin/hosts?c=%s>"
-			       "hosts</a> table.",out,s,coll);
-		mb->safePrintf("%s",boxEnd);
-	}
-
 	// injections disabled?
 	if ( ! g_conf.m_injectionsEnabled ) {
 		if ( adds ) mb->safePrintf("<br>");
@ -2481,13 +2462,12 @@ bool printRedBox ( SafeBuf *mb , TcpSocket *sock , HttpRequest *hr ) {
 	for ( int32_t i = 1 ; i < g_hostdb.getNumHosts() ; i++ ) {
 		Host *h = &g_hostdb.m_hosts[i];
 		if ( g_hostdb.isDead( h ) ) continue;
-		if ( h->m_pingInfo.m_udpSlotsInUseIncoming>= 400)jammedHosts++;
 	}
 	if ( jammedHosts > 0 ) {
 		if ( adds ) mb->safePrintf("<br>");
 		adds++;
 		const char *s = "s are";
-		if ( out == 1 ) s = " is";
+		if ( jammedHosts == 1 ) s = " is";
 		mb->safePrintf("%s",box);
 		mb->safePrintf("%" PRId32" host%s jammed with "
 			       "over %" PRId32" unhandled "
--- a/Parms.cpp
+++ b/Parms.cpp
@ -53,8 +53,40 @@ public:
 };


+//
+// User configured values for these parms need to be adjusted to internal ranges
+//
+const struct {
+	char *name;
+	float div_by;
+} static g_fxui_parms[] = {
+	{"diversityweightmin", 100.0},
+	{"diversityweightmax", 100.0},
+	{"densityweightmin", 100.0},
+	{"densityweightmax", 100.0},
+	{"hgw_body", 10.0},
+	{"hgw_title", 10.0},
+	{"hgw_heading", 10.0},
+	{"hgw_list", 10.0},
+	{"hgw_metatag", 10.0},
+	{"hgw_inlinktext", 10.0},
+	{"hgw_intag", 10.0},
+	{"hgw_neighborhood", 10.0},
+	{"hgw_inmenu", 10.0},
+	{"hgw_inintlinktext", 10.0},
+	{"hgw_inurl", 10.0},
+	{"synonym_weight", 10.0},
+	{"termfreqweightfreqmin", 100.0},
+	{"termfreqweightfreqmax", 100.0},
+	{"termfreqweightmin", 100.0},
+	{"termfreqweightmax", 100.0}
+};
+
+static const int g_num_fxui_parms = sizeof(g_fxui_parms) / sizeof(g_fxui_parms[0]);
+
 Parms g_parms;

+
 Parm::Parm() {
 	// Coverity
 	m_title = NULL;
@ -827,8 +859,7 @@ bool Parms::setGigablastRequest ( TcpSocket *socket ,
 		//if ( (m->m_perms & user) == 0 ) continue;
 		// set it. now our TYPE_CHARPTR will just be set to it directly
 		// to save memory...
-		setParm ( (char *)THIS , m, 0, v, false,//not html enc
-			  false ); // true );
+		setParm ( (char *)THIS , m, 0, v);
 	}

 	return true;
@ -1962,12 +1993,63 @@ bool Parms::printParm( SafeBuf* sb,
 	return status;
 }

+
+//
+// Convert external weights presented in the frontend UI to internal values
+//
+bool Parms::convertUIToInternal(const char *field_base_name, parameter_type_t type, const char *s, char *adjusted_value) {
+	for(int fx=0; fx < g_num_fxui_parms; fx++) {
+		if( strcmp(g_fxui_parms[fx].name, field_base_name) == 0 ) {
+
+			switch(type) {
+				case TYPE_FLOAT: {
+						float f = s ? (float)atof(s) : 0;
+						if( f >= 1.0 && g_fxui_parms[fx].div_by > 1.0 ) {
+							f = f / g_fxui_parms[fx].div_by;
+						}
+						snprintf(adjusted_value, 128, "%f", f);
+					}
+					return true;
+
+				case TYPE_DOUBLE: {
+						double d = s ? (double)atof ( s ) : 0;
+						if( d >= 1.0 && g_fxui_parms[fx].div_by > 1.0 ) {
+							d = d / g_fxui_parms[fx].div_by;
+						}
+						snprintf(adjusted_value, 128, "%f", d);
+					}
+					return true;
+
+				case TYPE_INT32:
+				case TYPE_INT32_CONST: {
+						int32_t v = s ? atol(s) : 0;
+						if( v >= 1 && (int32_t)g_fxui_parms[fx].div_by > 1 ) {
+							v = v / (int32_t)g_fxui_parms[fx].div_by;
+						}
+						snprintf(adjusted_value, 128, "%" PRId32 "", v);
+					}
+					return true;
+
+				case TYPE_INT64: {
+						int64_t i64 = s ? strtoull(s,NULL,10) : 0;
+						if( i64 >= 1 && (int64_t)g_fxui_parms[fx].div_by > 1 ) {
+							i64 = i64 / (int64_t)g_fxui_parms[fx].div_by;
+						}
+						snprintf(adjusted_value, 128, "%" PRId64 "", i64);
+					}
+					return true;
+
+				default:
+					break;
+			}
+		}
+	}
+	return false;
+}
+
+
 // now we use this to set SearchInput and GigablastRequest
-bool Parms::setFromRequest ( HttpRequest *r ,
-			     TcpSocket* s,
-			     CollectionRec *newcr ,
-			     char *THIS ,
-			     parameter_object_type_t objType) {
+bool Parms::setFromRequest(HttpRequest *r, TcpSocket *s, CollectionRec *newcr, char *THIS, parameter_object_type_t objType) {

 	// use convertHttpRequestToParmList() for these because they
 	// are persistent records that are updated on every shard.
@ -1985,31 +2067,47 @@ bool Parms::setFromRequest ( HttpRequest *r ,
 	for(int32_t i = 0; i < r->getNumFields(); i++) {
 		// get the value of cgi parm (null terminated)
 		const char *v = r->getValue(i);
-		if(!v)
+		if(!v) {
 			continue; //no value
+		}
 		// get cgi parm name
 		const char *full_field_name = r->getField(i);
 		size_t full_field_name_len = strlen(full_field_name);
-		if(full_field_name_len>=128)
+		if(full_field_name_len>=128) {
 			continue;
-		char field_base_name[128];
-		int field_index;
-		size_t nondigit_prefix_len = strcspn(full_field_name,"0123456789");
-		if(nondigit_prefix_len!=full_field_name_len) {
-			//field name contains digits. Split into base field name and index
-			memcpy(field_base_name,full_field_name,nondigit_prefix_len);
-			field_base_name[nondigit_prefix_len] = '\0';
-			char *endptr = NULL;
-			field_index = strtol(full_field_name+nondigit_prefix_len, &endptr, 10);
-			if(field_index<0)
-				continue; //hmm?
-			if(endptr && *endptr)
-				continue; //digits weren't the last part
-			
-		} else {
-			strcpy(field_base_name,full_field_name);
-			field_index = 0;
 		}
+
+		char field_base_name[128];
+		bool uiconvert = false;
+		int field_index=0;
+
+		//
+		// To make user configuration of ranking parameters simpler, we sometimes
+		// use other valid ranges in parameters than those used internally. Prefix
+		// the param name with 'fxui_' and add the name and divisor to the global
+		// table to automatically adjust external values to internal ones.
+		//
+		if( strncmp(full_field_name, "fxui_", 5) == 0 ) {
+			strcpy(field_base_name, full_field_name+5);
+			uiconvert=true;
+		}
+		else {
+			size_t nondigit_prefix_len = strcspn(full_field_name,"0123456789");
+			if(nondigit_prefix_len!=full_field_name_len) {
+				//field name contains digits. Split into base field name and index
+				memcpy(field_base_name,full_field_name,nondigit_prefix_len);
+				field_base_name[nondigit_prefix_len] = '\0';
+				char *endptr = NULL;
+				field_index = strtol(full_field_name+nondigit_prefix_len, &endptr, 10);
+				if(field_index<0)
+					continue; //hmm?
+				if(endptr && *endptr)
+					continue; //digits weren't the last part
+			} else {
+				strcpy(field_base_name,full_field_name);
+			}
+		}
+
 		// find in parms list
 		int32_t  j;
 		Parm *m;
@ -2021,17 +2119,33 @@ bool Parms::setFromRequest ( HttpRequest *r ,
 			   strcmp(field_base_name,m->m_cgi) == 0)
 				break; //found it
 		}
-		if(j >= m_numParms)
+		if(j >= m_numParms) {
 			continue; //cgi parm name not found
-		if(field_index>0 && field_index>m->m_max)
+		}
+
+		if(field_index>0 && field_index>m->m_max) {
 			continue; //out-of-bounds
+		}
+
 		// . skip if no value was provided
 		// . unless it was a string! so we can make them empty.
 		if(v[0] == '\0' &&
 		     m->m_type != TYPE_STRING &&
-		     m->m_type != TYPE_STRINGBOX) continue;
+		     m->m_type != TYPE_STRINGBOX) {
+			continue;
+		}
+
+		char adjusted_value[128];
+		if( uiconvert ) {
+			if( !convertUIToInternal(field_base_name, m->m_type, v, adjusted_value) ) {
+				log(LOG_ERROR, "Could not convert value of '%s' for '%s'", field_base_name, v);
+				continue;
+			}
+			v = adjusted_value;
+		}
+
 		// set it
-		setParm(THIS, m, field_index, v, false, false);
+		setParm(THIS, m, field_index, v);
 	}

 	return true;
@ -2078,7 +2192,7 @@ bool Parms::insertParm ( int32_t i , int32_t an ,  char *THIS ) {
 	*(int32_t *)(THIS + m->m_arrayCountOffset) = *(int32_t *)(THIS + m->m_arrayCountOffset)+1;

 	// put the defaults in the inserted line
-	setParm ( (char *)THIS , m, an , m->m_def , false ,false );
+	setParm ( (char *)THIS , m, an , m->m_def);
 	return true;
 }

@ -2128,9 +2242,7 @@ bool Parms::removeParm ( int32_t i , int32_t an , char *THIS ) {



-void Parms::setParm(char *THIS, Parm *m, int32_t array_index, const char *s, bool isHtmlEncoded, bool fromRequest) {
-
-	if ( fromRequest ) { g_process.shutdownAbort(true); }
+void Parms::setParm(char *THIS, Parm *m, int32_t array_index, const char *s) {

 	// . this is just for setting CollectionRecs, so skip if offset < 0
 	// . some parms are just for SearchInput (search parms)
@ -2170,8 +2282,6 @@ void Parms::setParm(char *THIS, Parm *m, int32_t array_index, const char *s, boo
 		case TYPE_BOOL:
 		case TYPE_PRIORITY: {
 			char *ptr = (char*)THIS + m->m_off + sizeof(char)*array_index;
-			if ( fromRequest && *(char*)ptr == atol(s))
-				return;
 			*(char*)ptr = s ? atol(s) : 0;
 			break;
 		}
@ -2191,25 +2301,16 @@ void Parms::setParm(char *THIS, Parm *m, int32_t array_index, const char *s, boo
 		}
 		case TYPE_FLOAT: {
 			char *ptr = (char*)THIS + m->m_off + sizeof(float)*array_index;
-			if( fromRequest && almostEqualFloat(*(float *)ptr, (s ? (float)atof(s) : 0)) ) {
-				return;
-			}
-
 			*(float*)ptr = s ? (float)atof ( s ) : 0;
 			break;
 		}
 		case TYPE_DOUBLE: {
 			char *ptr = (char*)THIS + m->m_off + sizeof(double)*array_index;
-			if( fromRequest && almostEqualFloat(*(double*)ptr, ( s ? (double)atof(s) : 0)) ) {
-				return;
-			}
 			*(double*)ptr = s ? (double)atof ( s ) : 0;
 			break;
 		}
 		case TYPE_IP: {
 			char *ptr = (char*)THIS + m->m_off + sizeof(int32_t)*array_index;
-			if ( fromRequest && *(int32_t*)ptr == (s ? (int32_t)atoip(s,strlen(s)) : 0) )
-				return;
 			*(int32_t*)ptr = s ? (int32_t)atoip(s,strlen(s)) : 0;
 			break;
 		}
@ -2219,16 +2320,11 @@ void Parms::setParm(char *THIS, Parm *m, int32_t array_index, const char *s, boo
 			int32_t v = s ? atol(s) : 0;
 			// min is considered valid if >= 0
 			if ( m->m_min >= 0 && v < m->m_min ) v = m->m_min;
-			if ( fromRequest && *(int32_t *)ptr == v )
-				return;
 			*(int32_t *)ptr = v;
 			break;
 		}
 		case TYPE_INT64: {
 			char *ptr = (char*)THIS + m->m_off + sizeof(int64_t)*array_index;
-			if ( fromRequest && *(uint64_t*)ptr == ( s ? strtoull(s,NULL,10) : 0) ) {
-				return;
-			}
 			*(int64_t*)ptr = s ? strtoull(s,NULL,10) : 0;
 			break;
 		}
@ -2240,18 +2336,9 @@ void Parms::setParm(char *THIS, Parm *m, int32_t array_index, const char *s, boo
 			// SafeBufs "array_index" is the # in the array, starting at 0
 			char *ptr = (char*)THIS + m->m_off + sizeof(SafeBuf)*array_index;
 			SafeBuf *sb = (SafeBuf *)ptr;
-			int32_t oldLen = sb->length();
-			// why was this commented out??? we need it now that we
-			// send email alerts when parms change!
-			if ( fromRequest &&
-			     ! isHtmlEncoded && oldLen == len &&
-			     memcmp ( sb->getBufStart() , s , len ) == 0 )
-				return;
-			// nuke it
 			sb->purge();
 			// this means that we can not use string POINTERS as parms!!
-			if ( ! isHtmlEncoded ) sb->safeMemcpy ( s , len );
-			else                   len = sb->htmlDecode (s,len);
+			sb->safeMemcpy ( s , len );
 			// tag it
 			sb->setLabel ( "parm1" );
 			// ensure null terminated
@ -2267,22 +2354,11 @@ void Parms::setParm(char *THIS, Parm *m, int32_t array_index, const char *s, boo
 			int32_t len = strlen(s);
 			if ( len >= m->m_size ) len = m->m_size - 1; // truncate!!
 			char *dst = THIS + m->m_off + m->m_size*array_index;
-			// why was this commented out??? we need it now that we
-			// send email alerts when parms change!
-			if ( fromRequest &&
-			     ! isHtmlEncoded && (int32_t)strlen(dst) == len &&
-			     memcmp ( dst , s , len ) == 0 ) {
-				return;
-			}

 			// this means that we can not use string POINTERS as parms!!
-			if ( !isHtmlEncoded ) {
-				gbmemcpy( dst, s, len );
-			} else {
-				len = htmlDecode( dst, s, len, false );
-			}
-
+			gbmemcpy( dst, s, len );
 			dst[len] = '\0';
+
 			// . might have to set length
 			// . used for CollectionRec::m_htmlHeadLen and m_htmlTailLen
 			if ( m->m_plen >= 0 )
@ -2295,14 +2371,9 @@ void Parms::setParm(char *THIS, Parm *m, int32_t array_index, const char *s, boo
 			log(LOG_LOGIC,"admin: attempt to set parameter %s from cgi-request", m->m_title);
 			return;
 	}
-
-	// do not send if setting from startup
-	if ( ! fromRequest ) return;
-
-	// note it in the log
-	log("admin: parm \"%s\" changed value",m->m_title);
 }

+
 void Parms::setToDefault(char *THIS, parameter_object_type_t objType, CollectionRec *argcr) {
 	// init if we should
 	init();
@ -2344,7 +2415,7 @@ void Parms::setToDefault(char *THIS, parameter_object_type_t objType, Collection
 				char *dst = THIS + m->m_off;
 				memcpy(dst, raw_default, m->m_size);
 			} else
-				setParm(THIS , m, 0, m->m_def, false/*not enc.*/, false );
+				setParm(THIS , m, 0, m->m_def);
 		} else if(m->m_fixed<=0) {
 			//variable-sized array
 			//empty it
@ -2357,7 +2428,7 @@ void Parms::setToDefault(char *THIS, parameter_object_type_t objType, Collection
 					memcpy(dst, raw_default, m->m_size);
 					raw_default = ((char*)raw_default) + m->m_size;
 				} else
-					setParm(THIS, m, k, m->m_def, false/*not enc.*/, false);
+					setParm(THIS, m, k, m->m_def);
 			}
 		}
 	}
@ -2485,7 +2556,7 @@ bool Parms::setFromFile ( void *THIS        ,
 		v[nb] = '\0';

 		// set our parm
-		setParm( (char *)THIS, m, j, v, false, false );
+		setParm( (char *)THIS, m, j, v);

 		// we were set from the explicit file
 		//((CollectionRec *)THIS)->m_orig[i] = 2;
@ -2569,7 +2640,7 @@ bool Parms::setFromFile ( void *THIS        ,
 		v[nb] = '\0';

 		// set our parm
-		setParm( (char *)THIS, m, j, v, false /*is html encoded?*/, false );
+		setParm( (char *)THIS, m, j, v);

 		// do not repeat same node
 		nn++;
@ -3519,27 +3590,48 @@ void Parms::init ( ) {
 	m++;


-	m->m_title = "diversityWeightMin";
-	m->m_desc  = "diversityWeightMin";
-	m->m_cgi   = "diversity_weight_min";
-	simple_m_set(SearchInput,m_diversityWeightMin);
-	m->m_defOff2 = offsetof(Conf,m_diversityWeightMin);
+	m->m_title = "termfreq min";
+	m->m_desc  = "Term frequency estimate minimum";
+	m->m_cgi   = "termfreqweightfreqmin";
+	simple_m_set(Conf,m_termFreqWeightFreqMin);
+	simple_m_set(SearchInput,m_termFreqWeightFreqMin);
+	m->m_defOff2 = offsetof(Conf,m_termFreqWeightFreqMin);
+	m->m_def   = "0.000000";
+	m->m_page  = PAGE_RESULTS;
+	m++;
+
+	m->m_title = "termfreq max";
+	m->m_desc  = "Term frequency estimate maximum";
+	m->m_cgi   = "termfreqweightfreqmax";
+	simple_m_set(SearchInput,m_termFreqWeightFreqMax);
+	m->m_defOff2 = offsetof(Conf,m_termFreqWeightFreqMax);
+	m->m_def   = "0.500000";
+	m->m_page  = PAGE_RESULTS;
+	m++;
+
+	m->m_title = "termfreq weight min";
+	m->m_desc  = "Term frequency weight minimum";
+	m->m_cgi   = "termfreqweightmin";
+	simple_m_set(SearchInput,m_termFreqWeightMin);
+	m->m_defOff2 = offsetof(Conf,m_termFreqWeightMin);
+	m->m_def   = "0.500000";
+	m->m_page  = PAGE_RESULTS;
+	m++;
+
+	m->m_title = "termfreq weight max";
+	m->m_desc  = "Term frequency weight maximum";
+	m->m_cgi   = "termfreqweightmax";
+	simple_m_set(SearchInput,m_termFreqWeightMax);
+	m->m_defOff2 = offsetof(Conf,m_termFreqWeightMax);
 	m->m_def   = "1.000000";
 	m->m_page  = PAGE_RESULTS;
 	m++;

-	m->m_title = "diversityWeightMax";
-	m->m_desc  = "diversityWeightMax";
-	m->m_cgi   = "diversity_weight_max";
-	simple_m_set(SearchInput,m_diversityWeightMax);
-	m->m_defOff2 = offsetof(Conf,m_diversityWeightMax);
-	m->m_def   = "1.000000";
-	m->m_page  = PAGE_RESULTS;
-	m++;
+

 	m->m_title = "densityWeightMin";
 	m->m_desc  = "densityWeightMin";
-	m->m_cgi   = "density_weight_min";
+	m->m_cgi   = "densityweightmin";
 	simple_m_set(SearchInput,m_densityWeightMin);
 	m->m_defOff2 = offsetof(Conf,m_densityWeightMin);
 	m->m_def   = "0.350000";
@ -3548,16 +3640,34 @@ void Parms::init ( ) {

 	m->m_title = "densityWeightMax";
 	m->m_desc  = "densityWeightMax";
-	m->m_cgi   = "density_weight_max";
+	m->m_cgi   = "densityweightmax";
 	simple_m_set(SearchInput,m_densityWeightMax);
 	m->m_defOff2 = offsetof(Conf,m_densityWeightMax);
 	m->m_def   = "1.000000";
 	m->m_page  = PAGE_RESULTS;
 	m++;

+	m->m_title = "diversityWeightMin";
+	m->m_desc  = "diversityWeightMin";
+	m->m_cgi   = "diversityweightmin";
+	simple_m_set(SearchInput,m_diversityWeightMin);
+	m->m_defOff2 = offsetof(Conf,m_diversityWeightMin);
+	m->m_def   = "1.000000";
+	m->m_page  = PAGE_RESULTS;
+	m++;
+
+	m->m_title = "diversityWeightMax";
+	m->m_desc  = "diversityWeightMax";
+	m->m_cgi   = "diversityweightmax";
+	simple_m_set(SearchInput,m_diversityWeightMax);
+	m->m_defOff2 = offsetof(Conf,m_diversityWeightMax);
+	m->m_def   = "1.000000";
+	m->m_page  = PAGE_RESULTS;
+	m++;
+
 	m->m_title = "hashGroupWeightBody";
 	m->m_desc  = "hashGroupWeightBody";
-	m->m_cgi   = "hash_group_weight_body";
+	m->m_cgi   = "hgw_body";
 	simple_m_set(SearchInput,m_hashGroupWeightBody);
 	m->m_defOff2 = offsetof(Conf,m_hashGroupWeightBody);
 	m->m_def   = "1.000000";
@ -3566,7 +3676,7 @@ void Parms::init ( ) {

 	m->m_title = "hashGroupWeightTitle";
 	m->m_desc  = "hashGroupWeightTitle";
-	m->m_cgi   = "hashGroupWeightTitle";
+	m->m_cgi   = "hgw_title";
 	simple_m_set(SearchInput,m_hashGroupWeightTitle);
 	m->m_defOff2 = offsetof(Conf,m_hashGroupWeightTitle);
 	m->m_def   = "8.000000";
@ -3575,7 +3685,7 @@ void Parms::init ( ) {

 	m->m_title = "hashGroupWeightHeading";
 	m->m_desc  = "hashGroupWeightHeading";
-	m->m_cgi   = "hash_group_weight_heading";
+	m->m_cgi   = "hgw_heading";
 	simple_m_set(SearchInput,m_hashGroupWeightHeading);
 	m->m_defOff2 = offsetof(Conf,m_hashGroupWeightHeading);
 	m->m_def   = "1.500000";
@ -3584,7 +3694,7 @@ void Parms::init ( ) {

 	m->m_title = "hashGroupWeightInlist";
 	m->m_desc  = "hashGroupWeightInlist";
-	m->m_cgi   = "hash_group_weight_inlist";
+	m->m_cgi   = "hgw_list";
 	simple_m_set(SearchInput,m_hashGroupWeightInlist);
 	m->m_defOff2 = offsetof(Conf,m_hashGroupWeightInlist);
 	m->m_def   = "0.300000";
@ -3593,7 +3703,7 @@ void Parms::init ( ) {

 	m->m_title = "hashGroupWeightInMetaTag";
 	m->m_desc  = "hashGroupWeightInMetaTag";
-	m->m_cgi   = "hash_group_weight_in_meta_tag";
+	m->m_cgi   = "hgw_metatag";
 	simple_m_set(SearchInput,m_hashGroupWeightInMetaTag);
 	m->m_defOff2 = offsetof(Conf,m_hashGroupWeightInMetaTag);
 	m->m_def   = "0.100000";
@ -3602,7 +3712,7 @@ void Parms::init ( ) {

 	m->m_title = "hashGroupWeightInLinkText";
 	m->m_desc  = "hashGroupWeightInLinkText";
-	m->m_cgi   = "hash_group_weight_in_link_text";
+	m->m_cgi   = "hgw_inlinktext";
 	simple_m_set(SearchInput,m_hashGroupWeightInLinkText);
 	m->m_defOff2 = offsetof(Conf,m_hashGroupWeightInLinkText);
 	m->m_def   = "16.000000";
@ -3611,7 +3721,7 @@ void Parms::init ( ) {

 	m->m_title = "hashGroupWeightInTag";
 	m->m_desc  = "hashGroupWeightInTag";
-	m->m_cgi   = "hash_group_weight_in_tag";
+	m->m_cgi   = "hgw_intag";
 	simple_m_set(SearchInput,m_hashGroupWeightInTag);
 	m->m_defOff2 = offsetof(Conf,m_hashGroupWeightInTag);
 	m->m_def   = "1.000000";
@ -3620,7 +3730,7 @@ void Parms::init ( ) {

 	m->m_title = "hashGroupWeightNeighborhood";
 	m->m_desc  = "hashGroupWeightNeighborhood";
-	m->m_cgi   = "hash_group_weight_neighborhood";
+	m->m_cgi   = "hgw_neighborhood";
 	simple_m_set(SearchInput,m_hashGroupWeightNeighborhood);
 	m->m_defOff2 = offsetof(Conf,m_hashGroupWeightNeighborhood);
 	m->m_def   = "0.000000";
@ -3629,7 +3739,7 @@ void Parms::init ( ) {

 	m->m_title = "hashGroupWeightInternalLinkText";
 	m->m_desc  = "hashGroupWeightInternalLinkText";
-	m->m_cgi   = "hash_group_weight_internal_link_text";
+	m->m_cgi   = "hgw_inintlinktext";
 	simple_m_set(SearchInput,m_hashGroupWeightInternalLinkText);
 	m->m_defOff2 = offsetof(Conf,m_hashGroupWeightInternalLinkText);
 	m->m_def   = "4.000000";
@ -3638,7 +3748,7 @@ void Parms::init ( ) {

 	m->m_title = "hashGroupWeightInUrl";
 	m->m_desc  = "hashGroupWeightInUrl";
-	m->m_cgi   = "hash_group_weight_in_url";
+	m->m_cgi   = "hgw_inurl";
 	simple_m_set(SearchInput,m_hashGroupWeightInUrl);
 	m->m_defOff2 = offsetof(Conf,m_hashGroupWeightInUrl);
 	m->m_def   = "1.000000";
@ -3647,7 +3757,7 @@ void Parms::init ( ) {

 	m->m_title = "hashGroupWeightInMenu";
 	m->m_desc  = "hashGroupWeightInMenu";
-	m->m_cgi   = "hash_group_weight_in_menu";
+	m->m_cgi   = "hgw_inmenu";
 	simple_m_set(SearchInput,m_hashGroupWeightInMenu);
 	m->m_defOff2 = offsetof(Conf,m_hashGroupWeightInMenu);
 	m->m_def   = "0.200000";
@ -3665,6 +3775,26 @@ void Parms::init ( ) {
 	m->m_page  = PAGE_RESULTS;
 	m++;

+	m->m_title = "Page temp weight min";
+	m->m_desc  = "Page temp is scaled to be between the min and max";
+	m->m_cgi   = "pagetempweightmin";
+	simple_m_set(SearchInput,m_pageTemperatureWeightMin);
+	m->m_defOff2 = offsetof(Conf,m_pageTemperatureWeightMin);
+	m->m_def   = "1.000000";
+	m->m_flags = PF_HIDDEN | PF_NOSAVE;
+	m->m_page  = PAGE_RESULTS;
+	m++;
+
+	m->m_title = "Page temp weight max";
+	m->m_desc  = "Page temp is scaled to be between the min and max";
+	m->m_cgi   = "pagetempweightmax";
+	simple_m_set(SearchInput,m_pageTemperatureWeightMax);
+	m->m_defOff2 = offsetof(Conf,m_pageTemperatureWeightMax);
+	m->m_def   = "20.000000";
+	m->m_flags = PF_HIDDEN | PF_NOSAVE;
+	m->m_page  = PAGE_RESULTS;
+	m++;
+
 	m->m_title = "Use page temperature";
 	m->m_desc  = "Use page temperature (if available) for ranking";
 	m->m_cgi   = "use_page_temperature";
@ -3741,6 +3871,18 @@ void Parms::init ( ) {
 	m->m_page  = PAGE_RESULTS;
 	m++;

+	m->m_title = "unknown language weight";
+	m->m_desc  = "Use this to override the default uknown language weight "
+		"for this collection. We multiply a result's score by this value "
+		"if the user requested a specific language, but the language of the "
+		"indexed page could not be determined.";
+	simple_m_set(SearchInput,m_unknownLangWeight);
+	m->m_defOff= offsetof(CollectionRec,m_unknownLangWeight);
+	m->m_cgi  = "ulangw";
+	m->m_flags = PF_API;
+	m->m_page  = PAGE_RESULTS;
+	m++;
+
 	m->m_title = "max query terms";
 	m->m_desc  = "Do not allow more than this many query terms. Helps "
 		"prevent big queries from resource hogging.";
@ -3809,11 +3951,10 @@ void Parms::init ( ) {

 	m->m_title = "language weight";
 	m->m_desc  = "Default language weight if document matches query "
-		"language. Use this to give results that match the specified "
-		"the specified &qlang higher ranking, or docs whose language "
-		"is unknown. Can be overridden with "
+		"language. Use this to give results that match "
+		"the specified &qlang higher ranking. Can be overridden with "
 		"&langw in the query url.";
-	m->m_cgi   = "langweight";
+	m->m_cgi   = "langw";
 	simple_m_set(CollectionRec,m_sameLangWeight);
 	m->m_def   = "20.000000";
 	m->m_group = true;
@ -3821,6 +3962,21 @@ void Parms::init ( ) {
 	m->m_page  = PAGE_RANKING;
 	m++;

+	m->m_title = "unknown language weight";
+	m->m_desc  = "Default language weight if query language is specified but document "
+		"language could not be determined. Use this to give docs with unknown language a "
+		"higher ranking when qlang is specified. Can be overridden with "
+		"&ulangw in the query url.";
+	m->m_cgi   = "ulangw";
+	simple_m_set(CollectionRec,m_unknownLangWeight);
+	m->m_def   = "10.000000";
+	m->m_group = true;
+	m->m_flags = PF_REBUILDRANKINGSETTINGS;
+	m->m_page  = PAGE_RANKING;
+	m++;
+
+
+
 	m->m_title = "termfreq min";
 	m->m_desc  = "Term frequency estimate minimum";
 	m->m_cgi   = "termfreqweightfreqmin";
@ -3901,6 +4057,8 @@ void Parms::init ( ) {
 	m->m_page  = PAGE_RANKING;
 	m++;

+
+
 	m->m_title = "Hashgroup weight - body";
 	m->m_desc  = "";
 	m->m_cgi   = "hgw_body";
@ -3953,7 +4111,7 @@ void Parms::init ( ) {

 	m->m_title = "Hashgroup weight - in link text";
 	m->m_desc  = "";
-	m->m_cgi   = "hgw_innlinktext";
+	m->m_cgi   = "hgw_inlinktext";
 	simple_m_set(Conf,m_hashGroupWeightInLinkText);
 	m->m_def   = "16.000000";
 	m->m_group = false;
@ -4021,6 +4179,26 @@ void Parms::init ( ) {
 	m->m_page  = PAGE_RANKING;
 	m++;

+	m->m_title = "Page temp weight min";
+	m->m_desc  = "Page temp is scaled to be between the min and max";
+	m->m_cgi   = "pagetempweightmin";
+	simple_m_set(Conf,m_pageTemperatureWeightMin);
+	m->m_def   = "1.000000";
+	m->m_group = false;
+	m->m_flags = PF_REBUILDRANKINGSETTINGS;
+	m->m_page  = PAGE_RANKING;
+	m++;
+
+	m->m_title = "Page temp weight max";
+	m->m_desc  = "Page temp is scaled to be between the min and max";
+	m->m_cgi   = "pagetempweightmax";
+	simple_m_set(Conf,m_pageTemperatureWeightMax);
+	m->m_def   = "20.000000";
+	m->m_group = false;
+	m->m_flags = PF_REBUILDRANKINGSETTINGS;
+	m->m_page  = PAGE_RANKING;
+	m++;
+
 	m->m_title = "Use page temperature";
 	m->m_desc  = "Use page temperature (if available) for ranking";
 	m->m_cgi   = "use_page_temperature";
@ -4047,6 +4225,7 @@ void Parms::init ( ) {
 	m->m_page  = PAGE_RANKING;
 	m->m_obj   = OBJ_CONF;
 	m++;
+
 	m->m_title = "Rank adjustment";
 	m->m_cgi   = "flag_rerank";
 	m->m_xml   = "RankAdjustment";
--- a/Parms.h
+++ b/Parms.h
@ -198,7 +198,7 @@ class Parms {
 	bool insertParm ( int32_t i , int32_t an , char *THIS ) ;
 	bool removeParm ( int32_t i , int32_t an , char *THIS ) ;

-	void setParm(char *THIS, Parm *m, int32_t array_index, const char *s, bool isHtmlEncoded, bool fromRequest);
+	void setParm(char *THIS, Parm *m, int32_t array_index, const char *s);

 	void setToDefault(char *THIS, parameter_object_type_t objType,
 			  CollectionRec *argcr );
@ -224,6 +224,7 @@ class Parms {

 	Parm *getParm(int32_t i) { return m_parms+i; }
 	int32_t getNumParms() const { return m_numParms; }
+	bool convertUIToInternal(const char *field_base_name, parameter_type_t type, const char *s, char *adjusted_value);

 private:
 	//
--- a/PingServer.cpp
+++ b/PingServer.cpp
@ -202,22 +202,17 @@ void PingServer::pingHost ( Host *h , uint32_t ip , uint16_t port ) {
 	//first we update our pinginfo
 	PingInfo newPingInfo;

-	newPingInfo.m_numCorruptDiskReads = g_numCorrupt;
-	newPingInfo.m_numOutOfMems = g_mem.getOOMCount();
-	newPingInfo.m_socketsClosedFromHittingLimit = g_stats.m_closedSockets;
-	newPingInfo.m_currentSpiders = g_spiderLoop.getNumSpidersOut();
+	newPingInfo.m_unused9 = 0;
+	newPingInfo.m_unused3 = 0;
+	newPingInfo.m_unused11 = 0;
+	newPingInfo.m_unused14 = 0;

 	// let the receiver know our repair mode
 	newPingInfo.m_repairMode = g_repairMode;

-	int32_t l_loadavg = (int32_t) (g_process.getLoadAvg() * 100.0);
-	//gbmemcpy(p, &l_loadavg, sizeof(int32_t));	p += sizeof(int32_t);
-	newPingInfo.m_loadAvg = l_loadavg ;
+	newPingInfo.m_unused2 = 0;

-	// then our percent mem used
-	float mem = g_mem.getUsedMemPercentage();
-	//*(float *)p = mem ; p += sizeof(float); // 4 bytes
-	newPingInfo.m_percentMemUsed = mem;
+	newPingInfo.m_unused3 = 0;

 	// our num recs, docsIndexed
 	newPingInfo.m_totalDocsIndexed = (int32_t)g_process.getTotalDocsIndexed();
@ -229,7 +224,7 @@ void PingServer::pingHost ( Host *h , uint32_t ip , uint16_t port ) {
 	if ( g_hostdb.getCRC() == 0 ) { g_process.shutdownAbort(true); }

 	// disk usage (df -ka)
-	newPingInfo.m_diskUsage = g_process.m_diskUsage;
+	newPingInfo.m_unused7 = 0.0;

 	// flags indicating our state
 	int32_t flags = 0;
@ -247,9 +242,7 @@ void PingServer::pingHost ( Host *h , uint32_t ip , uint16_t port ) {
 	if ( g_dailyMerge.m_mergeMode ==0 || g_dailyMerge.m_mergeMode == 6 )
 		flags |= PFLAG_MERGEMODE0OR6;

-	uint8_t rv8 = (uint8_t)g_recoveryLevel;
-	if ( g_recoveryLevel > 255 ) rv8 = 255;
-	newPingInfo.m_recoveryLevel = rv8;
+	newPingInfo.m_unused18 = 0;

 	//*(int32_t *)p = flags; p += 4; // 4 bytes
 	newPingInfo.m_flags = flags;
@ -263,12 +256,11 @@ void PingServer::pingHost ( Host *h , uint32_t ip , uint16_t port ) {

 	newPingInfo.m_unused0 = 0;

-	newPingInfo.m_udpSlotsInUseIncoming = g_udpServer.getNumUsedSlotsIncoming();
+	newPingInfo.m_unused12 = 0;

-	newPingInfo.m_tcpSocketsInUse = g_httpServer.m_tcp.m_numUsed;
+	newPingInfo.m_unused13 = 0;

-	// from Loop.cpp
-	newPingInfo.m_cpuUsage = 0.0;
+	newPingInfo.m_unused4 = 0.0;

 	// store the gbVersionStrBuf now, just a date with a \0 included
 	char *v = getVersion();
@ -369,18 +361,6 @@ void PingServer::gotReplyWrapperP(void *state, UdpSlot *slot) {
 		// he is back up then we are free to send another alert about
 		// any other host that goes down
 		if ( h->m_hostId == s_lastSentHostId ) s_lastSentHostId = -1;
-
-		if ( h->m_pingInfo.m_percentMemUsed >= 99.0 &&
-		     h->m_firstOOMTime == 0 )
-			h->m_firstOOMTime = nowms;
-		if ( h->m_pingInfo.m_percentMemUsed < 99.0 )
-			h->m_firstOOMTime = 0LL;
-		// if this host is alive and has been at 99% or more mem usage
-		// for the last X minutes, and we have got at least 10 ping replies
-		// from him, then send an email alert.
-		if ( h->m_pingInfo.m_percentMemUsed >= 99.0 &&
-		     nowms - h->m_firstOOMTime >= g_conf.m_sendEmailTimeout )
-			g_pingServer.sendEmail ( h , NULL , true );
 	} else {
 		// . if his ping was dead, try to send an email alert to the admin
 		// . returns false if blocked, true otherwise
--- a/PosdbTable.cpp
+++ b/PosdbTable.cpp
@ -246,19 +246,16 @@ float PosdbTable::getBestScoreSumForSingleTerm(int32_t i, const char *wpi, const
 			unsigned char div = Posdb::getDiversityRank ( wpi );
 			score *= m_msg39req->m_scoringWeights.m_diversityWeights[div];
 			score *= m_msg39req->m_scoringWeights.m_diversityWeights[div];
-
 			// hash group? title? body? heading? etc.
 			unsigned char hg = Posdb::getHashGroup ( wpi );
 			unsigned char mhg = hg;
 			if ( s_inBody[mhg] ) mhg = HASHGROUP_BODY;
 			score *= m_msg39req->m_scoringWeights.m_hashGroupWeights[hg];
 			score *= m_msg39req->m_scoringWeights.m_hashGroupWeights[hg];
-
 			// good density?
 			unsigned char dens = Posdb::getDensityRank ( wpi );
 			score *= m_msg39req->m_scoringWeights.m_densityWeights[dens];
 			score *= m_msg39req->m_scoringWeights.m_densityWeights[dens];
-
 			// to make more compatible with pair scores divide by distance of 2
 			//score /= 2.0;

@ -452,6 +449,7 @@ float PosdbTable::getBestScoreSumForSingleTerm(int32_t i, const char *wpi, const
 		sx->m_densityRank = Posdb::getDensityRank(maxp);

 		float score = bestScores[k];
+
 		//score *= ts;
 		score *= m_freqWeights[i];
 		score *= m_freqWeights[i];
@ -3937,6 +3935,7 @@ void PosdbTable::intersectLists10_r ( ) {
 				}
 			}

+
 			if( currPassNum == INTERSECT_SCORING ) {
 				//
 				// Pre-advance each termlist's cursor to skip to next docid.
@ -4094,7 +4093,6 @@ void PosdbTable::intersectLists10_r ( ) {

 				minSingleScore *= completeScoreMultiplier;

-
 				//#
 				//# DOCID / SITERANK DETECTION
 				//#
@ -4128,7 +4126,6 @@ void PosdbTable::intersectLists10_r ( ) {

 				minPairScore *= completeScoreMultiplier;

-
 				//#
 				//# Find minimum score - either single term or term pair
 				//#
@ -4155,7 +4152,6 @@ void PosdbTable::intersectLists10_r ( ) {
 				}
 			} // !m_q->m_isBoolean

-
 			//#
 			//# Calculate score and give boost based on siterank and highest inlinking siterank
 			//#
@ -4169,14 +4165,23 @@ void PosdbTable::intersectLists10_r ( ) {
 			score = minScore * (adjustedSiteRank*m_siteRankMultiplier+1.0);
 			logTrace(g_conf.m_logTracePosdb, "Score %f for docId %" PRIu64 "", score, m_docId);

-
 			//# 
-			//# Give score boost if query and doc language is the same.
+			//# Give score boost if query and doc language is the same, 
+			//# and optionally a different boost if the language of the
+			//# page is unknown.
+			//#
 			//# Use "qlang" parm to set the language. i.e. "&qlang=fr"
 			//#
-			if ( m_msg39req->m_language == 0 || docLang == 0 || m_msg39req->m_language == docLang) {
-				score *= (m_msg39req->m_sameLangWeight); //SAMELANGMULT;
-				logTrace(g_conf.m_logTracePosdb, "Giving score a matching language boost of x%f: %f for docId %" PRIu64 "", m_msg39req->m_sameLangWeight, score, m_docId);
+			if ( m_msg39req->m_language != 0 ) {
+				if( m_msg39req->m_language == docLang) {
+					score *= (m_msg39req->m_sameLangWeight);
+					logTrace(g_conf.m_logTracePosdb, "Giving score a matching language boost of x%f: %f for docId %" PRIu64 "", m_msg39req->m_sameLangWeight, score, m_docId);
+				}
+				else
+				if( docLang == 0 ) {
+					score *= (m_msg39req->m_unknownLangWeight); 
+					logTrace(g_conf.m_logTracePosdb, "Giving score an unknown language boost of x%f: %f for docId %" PRIu64 "", m_msg39req->m_unknownLangWeight, score, m_docId);
+				}
 			}

 			double page_temperature = 0;
@ -4185,13 +4190,12 @@ void PosdbTable::intersectLists10_r ( ) {

 			if(m_msg39req->m_usePageTemperatureForRanking) {
 				use_page_temperature = true;
-				page_temperature = g_pageTemperatureRegistry.query_page_temperature(m_docId);
+				page_temperature = g_pageTemperatureRegistry.query_page_temperature(m_docId, m_msg39req->m_pageTemperatureWeightMin, m_msg39req->m_pageTemperatureWeightMax);
 				score *= page_temperature;
-				logTrace(g_conf.m_logTracePosdb, "Page temperature for docId %" PRIu64 " is %.4f, score %f->%f", m_docId, page_temperature, score_before_page_temp, score);
+				logTrace(g_conf.m_logTracePosdb, "Page temperature for docId %" PRIu64 " is %.14f, score %f -> %f", m_docId, page_temperature, score_before_page_temp, score);
 			}


-
 			//#
 			//# Handle sortby int/float and minimum docid/score pairs
 			//#
@ -4524,11 +4528,15 @@ float PosdbTable::getMaxPossibleScore ( const QueryTermInfo *qti,
 	//score *= perfectWordSpamWeight * perfectWordSpamWeight;
 	score *= (((float)siteRank)*m_siteRankMultiplier+1.0);

-	// language boost if same language (or no lang specified)
-	if ( m_msg39req->m_language == docLang ||
-	     m_msg39req->m_language == 0 || 
-	     docLang == 0 ) {
-		score *= m_msg39req->m_sameLangWeight;//SAMELANGMULT;
+	// language boost if language specified and if page is same language, or unknown language
+	if ( m_msg39req->m_language != 0 ) {
+		if( m_msg39req->m_language == docLang) {
+			score *= (m_msg39req->m_sameLangWeight);
+		}
+		else
+		if( docLang == 0 ) {
+			score *= (m_msg39req->m_unknownLangWeight); 
+		}
 	}
 	
 	// assume the other term we pair with will be 1.0
--- a/Proxy.cpp
+++ b/Proxy.cpp
@ -280,7 +280,7 @@ bool Proxy::handleRequest (TcpSocket *s){
 			s_count = 0;
 			s_last = now;
 		}
-		g_stats.m_closedSockets++;; 
+		Statistics::register_socket_limit_hit();
 		return g_httpServer.sendErrorReply ( s , 500 , 
 						     "Too many sockets open.");
 	}
--- a/ScoringWeights.cpp
+++ b/ScoringWeights.cpp
@ -17,17 +17,24 @@ void ScoringWeights::init(float diversityWeightMin, float diversityWeightMax,
 			  float hashGroupWeightInMenu)
 {
 	for(int i = 0; i <= MAXDIVERSITYRANK; i++)
-		m_diversityWeights[i] = scale_quadratic(i,0,MAXDIVERSITYRANK,diversityWeightMin,diversityWeightMax);
+		m_diversityWeights[i] = scale_quadratic(i, 0, MAXDIVERSITYRANK, diversityWeightMin, diversityWeightMax);
 	
 	for(int i = 0; i <= MAXDENSITYRANK; i++)
-		m_densityWeights[i] = scale_quadratic(i,0,MAXDENSITYRANK,densityWeightMin,densityWeightMax);
+		m_densityWeights[i] = scale_quadratic(i, 0, MAXDENSITYRANK, densityWeightMin, densityWeightMax);
 	
 	// make sure if word spam is 0 that the weight is not 0
 	for(int i = 0; i <= MAXWORDSPAMRANK; i++)
-		m_wordSpamWeights[i] = scale_linear(i, 0,MAXWORDSPAMRANK, 1.0/MAXWORDSPAMRANK, 1.0);
-	
-	for(int i = 0; i <= MAXWORDSPAMRANK; i++)
+		m_wordSpamWeights[i] = scale_linear(i, 0, MAXWORDSPAMRANK, 1.0/MAXWORDSPAMRANK, 1.0);
+
+	// site rank of inlinker
+	// to be on the same level as multiplying the final score
+	// by the siterank+1 we should make this a sqrt() type thing
+	// since we square it so that single term scores are on the same
+	// level as term pair scores
+	// @@@ BR: Right way to do it? Gives a weight between 1 and 4
+	for(int i = 0; i <= MAXWORDSPAMRANK; i++) {
 		m_linkerWeights[i] = sqrt(1.0 + i);
+	}
 	
 	for(int i=0; i<HASHGROUP_END; i++)
 		m_hashGroupWeights[i] = 1.0;
--- a/SearchInput.cpp
+++ b/SearchInput.cpp
@ -48,6 +48,7 @@ SearchInput::SearchInput() {
 	m_maxSerpScore = 0.0;
 	m_minSerpDocId = 0;
 	m_sameLangWeight = 0.0;
+	m_unknownLangWeight = 0.0;
 	m_defaultSortLang = NULL;
 	m_dedupURL = 0;
 	m_percentSimilarSummary = 0;
@ -64,7 +65,15 @@ SearchInput::SearchInput() {
 	m_askOtherShards = false;
 	memset(m_queryId, 0, sizeof(m_queryId));
 	m_doMaxScoreAlgo = false;
+
+	m_termFreqWeightFreqMin = 0.0;
+	m_termFreqWeightFreqMax = 0.5;
+	m_termFreqWeightMin = 0.5;
+	m_termFreqWeightMax = 1.0;
+
 	m_synonymWeight = 0.9;
+	m_pageTemperatureWeightMin = 1.0;
+	m_pageTemperatureWeightMax = 20.0;
 	m_usePageTemperatureForRanking = true;
 	m_numFlagScoreMultipliers=26;
 	for(int i=0; i<26; i++)
--- a/SearchInput.h
+++ b/SearchInput.h
@ -130,6 +130,7 @@ public:
 	int64_t m_minSerpDocId;

 	float m_sameLangWeight;
+	float m_unknownLangWeight;

 	// prefer what lang in the results. it gets a 20x boost. "en" "xx" "fr"
 	char 	      *m_defaultSortLang;
@ -147,6 +148,11 @@ public:
 	bool   m_doDupContentRemoval;     // msg40
 	bool   m_getDocIdScoringInfo;

+	float m_termFreqWeightFreqMin;
+	float m_termFreqWeightFreqMax;
+	float m_termFreqWeightMin;
+	float m_termFreqWeightMax;
+
 	float m_diversityWeightMin;
 	float m_diversityWeightMax;
 	float m_densityWeightMin;
@ -162,8 +168,9 @@ public:
 	float m_hashGroupWeightInternalLinkText;
 	float m_hashGroupWeightInUrl;
 	float m_hashGroupWeightInMenu;
-
 	float m_synonymWeight;
+	float m_pageTemperatureWeightMin;
+	float m_pageTemperatureWeightMax;
 	bool m_usePageTemperatureForRanking;

 	int32_t m_numFlagScoreMultipliers;
--- a/Statistics.cpp
+++ b/Statistics.cpp
@ -4,6 +4,11 @@
 #include "gb-include.h"
 #include "types.h"
 #include "Msg3.h"            //getDiskPageCache()
+#include "Mem.h"             //memory statistics
+#include "UdpServer.h"       //g_udpServer.getNumUsedSlotsIncoming()
+#include "HttpServer.h"      //g_httpServer.m_tcp.m_numUsed
+#include "Msg5.h"            //g_numCorrupt
+#include "SpiderLoop.h"
 #include "RdbCache.h"
 #include "Rdb.h"
 #include "GbMutex.h"
@ -364,6 +369,28 @@ static void dump_rdb_cache_statistics( FILE *fp ) {
 	}
 }

+
+//////////////////////////////////////////////////////////////////////////////
+// Assorted statistics
+
+static std::atomic<unsigned long> socket_limit_hit_count(0);
+
+void Statistics::register_socket_limit_hit() {
+	socket_limit_hit_count++;
+}
+
+//Fetch various counters and levels. Some of them were previously exchanged in PingInfo
+static void dump_assorted_statistics(FILE *fp) {
+	fprintf(fp,"mem:pctused:%f\n",g_mem.getUsedMemPercentage());
+	fprintf(fp,"mem:oom_count:%d\n",g_mem.getOOMCount());
+	fprintf(fp,"socket:limit_hit:%lu\n",socket_limit_hit_count.load());
+	fprintf(fp,"socket:slots_incoming:%d\n",g_udpServer.getNumUsedSlotsIncoming());
+	fprintf(fp,"socket:tcp_in_use:%d\n",g_httpServer.m_tcp.m_numUsed);
+	fprintf(fp,"misc::corrupt_list_reads:%d\n",g_numCorrupt);
+	fprintf(fp,"spider:current_spiders:%d\n",g_spiderLoop.getNumSpidersOut());
+}
+
+
 //////////////////////////////////////////////////////////////////////////////
 // statistics

@ -381,6 +408,7 @@ static void dump_statistics(time_t now) {
 	dump_spider_statistics( fp );
 	dump_io_statistics( fp );
 	dump_rdb_cache_statistics( fp );
+	dump_assorted_statistics(fp);
 	
 	if ( fflush(fp) != 0 ) {
 		log( LOG_ERROR, "fflush(%s) failed with errno=%d (%s)", tmp_filename, errno, strerror( errno ) );
--- a/Statistics.h
+++ b/Statistics.h
@ -12,6 +12,8 @@ void register_spider_time( bool is_new, int error_code, int http_status, unsigne

 void register_io_time( bool is_write, int error_code, unsigned long bytes, unsigned ms );

+void register_socket_limit_hit();
+
 } //namespace

 #endif
--- a/Stats.cpp
+++ b/Stats.cpp
@ -14,8 +14,6 @@ Stats::Stats ( ) {
 	m_next               = 0;
 	memset ( m_pts , 0 , sizeof(StatPoint)*MAX_POINTS );

-	m_closedSockets = 0;
-
 	memset(m_msg3aRecalls, 0, sizeof(m_msg3aRecalls));

 	clearMsgStats();
--- a/Stats.h
+++ b/Stats.h
@ -60,9 +60,6 @@ class Stats {

 	int64_t m_startTime;

-	// when we have to close a socket because too many are open.. count it
-	int32_t      m_closedSockets;
-
 	time_t m_uptimeStart;

 	// one count for each CR_* defined in Msg51.h
--- a/TcpServer.cpp
+++ b/TcpServer.cpp
@ -2,6 +2,7 @@

 #include "TcpServer.h"
 #include "Stats.h"
+#include "Statistics.h"
 #include "Profiler.h"
 #include "PingServer.h"
 #include "HttpServer.h" //g_httpServer.m_ssltcp.m_ctx
@ -771,7 +772,7 @@ TcpSocket *TcpServer::getNewSocket ( ) {
 				s_last = now;
 			}
 			// another stat
-			g_stats.m_closedSockets++;
+			Statistics::register_socket_limit_hit();
 			g_errno = EOUTOFSOCKETS; 
 			// send email alert
 			g_pingServer.sendEmailMsg ( &s_lastTime ,
@ -888,7 +889,7 @@ TcpSocket *TcpServer::wrapSocket ( int sd , int32_t niceness , bool isIncoming )
 				s_last = now;
 			}
 			// another stat
-			g_stats.m_closedSockets++;
+			Statistics::register_socket_limit_hit();
 			g_errno = EOUTOFSOCKETS; 

 			// send email alert
@ -904,7 +905,7 @@ TcpSocket *TcpServer::wrapSocket ( int sd , int32_t niceness , bool isIncoming )
 	if ( sd < 0 || sd >= MAX_TCP_SOCKS ) {
 		log(LOG_LOGIC,"tcp: Got bad sd of %" PRId32".",(int32_t)sd);
 		// another stat
-		g_stats.m_closedSockets++;
+		Statistics::register_socket_limit_hit();
 		g_errno = EOUTOFSOCKETS; 
 		// send email alert
 		g_pingServer.sendEmailMsg ( &s_lastTime , "out of sockets on https2");
@ -919,7 +920,7 @@ TcpSocket *TcpServer::wrapSocket ( int sd , int32_t niceness , bool isIncoming )
 	// . this has happened a few times lately...
 	if ( s->m_startTime != 0 ) {
 		log(LOG_LOGIC,"tcp: sd of %" PRId32" is already in use.",(int32_t)sd);
-		g_stats.m_closedSockets++;
+		Statistics::register_socket_limit_hit();
 		g_errno = EOUTOFSOCKETS;
 		if ( sd == 0 ) log("tcp: closing2 sd of 0");
 		if ( ::close(sd) == -1 )
--- a/XmlDoc.cpp
+++ b/XmlDoc.cpp
@ -12922,6 +12922,9 @@ char *XmlDoc::getMetaList(bool forDelete) {
 			// we're adding titlerec to keep links between redirection intact
 			addTitleRec = true;

+			// since we're adding titlerec, add posrec as well
+			addPosRec = true;
+
 			// if we are adding a simplified redirect as a link to spiderdb
 			// likewise if the error was ENONCANONICAL treat it like that
 			spideringLinks = true;
@ -16062,7 +16065,7 @@ Msg20Reply *XmlDoc::getMsg20Reply ( ) {
 	m_reply.m_ip               = m_ip;
 	m_reply.m_firstIp          = *fip;
 	m_reply.m_docId            = m_docId;
-	m_reply.m_contentLen       = size_utf8Content;
+	m_reply.m_contentLen       = size_utf8Content - 1;
 	m_reply.m_lastSpidered     = getSpideredTime();//m_spideredTime;
 	m_reply.m_datedbDate       = 0;
 	m_reply.m_firstIndexedDate = m_firstIndexedDate;
--- a/XmlDoc.h
+++ b/XmlDoc.h
@ -483,7 +483,7 @@ public:
 	SafeBuf *getTimeAxisUrl ( );
 	bool hashUrl ( class HashTableX *table, bool urlOnly );
 	bool hashDateNumbers ( class HashTableX *tt );
-	bool hashIncomingLinkText( class HashTableX *table, bool hashAnomalies, bool hashNonAnomalies );
+	bool hashIncomingLinkText(HashTableX *table);
 	bool hashLinksForLinkdb ( class HashTableX *table ) ;
 	bool hashNeighborhoods ( class HashTableX *table ) ;
 	bool hashTitle ( class HashTableX *table );
--- a/XmlDoc_Indexing.cpp
+++ b/XmlDoc_Indexing.cpp
@ -154,13 +154,6 @@ static bool storeTerm ( const char	*s        ,
 //   we know the termlist is small, or the termlist is being used for spidering
 //   or parsing purposes and is usually not sent across the network.
 bool XmlDoc::hashNoSplit ( HashTableX *tt ) {
-	// this should be ready to go and not block!
-	int64_t *pch64 = getExactContentHash64();
-	if ( ! pch64 || pch64 == (void *)-1 ) { g_process.shutdownAbort(true); }
-
-	// shortcut
-	Url *fu = getFirstUrl();
-
 	// constructor should set to defaults automatically
 	HashInfo hi;
 	hi.m_hashGroup = HASHGROUP_INTAG;
@ -168,19 +161,26 @@ bool XmlDoc::hashNoSplit ( HashTableX *tt ) {
 	// usually we shard by docid, but these are terms we shard by termid!
 	hi.m_shardByTermId   = true;

+	if ((size_utf8Content - 1) > 0) {
+		// for exact content deduping
+		setStatus("hashing gbcontenthash (deduping) no-split keys");

-	// for exact content deduping
-	setStatus ( "hashing gbcontenthash (deduping) no-split keys" );
-	char cbuf[64];
-	int32_t clen = sprintf(cbuf,"%" PRIu64,(uint64_t)*pch64);
-	hi.m_prefix    = "gbcontenthash";
-	if ( ! hashString ( cbuf,clen,&hi ) ) return false;
+		// this should be ready to go and not block!
+		int64_t *pch64 = getExactContentHash64();
+		if (!pch64 || pch64 == (void *)-1) { g_process.shutdownAbort(true); }

-	char *host = fu->getHost    ();
+		char cbuf[64];
+		int32_t clen = sprintf(cbuf, "%" PRIu64, (uint64_t)*pch64);
+		hi.m_prefix = "gbcontenthash";
+		if (!hashString(cbuf, clen, &hi)) return false;
+	}

 	// now hash the site
 	setStatus ( "hashing no-split SiteGetter terms");

+	Url *fu = getFirstUrl();
+	char *host = fu->getHost    ();
+
 	//
 	// HASH terms for SiteGetter.cpp
 	//
@ -217,44 +217,6 @@ bool XmlDoc::hashNoSplit ( HashTableX *tt ) {
 		if ( ! hashSingleTerm ( host,end2-host,&hi) ) return false;
 	}

-	//Dates *dp = getDates ();
-	// hash the clocks into indexdb
-	//if ( ! dp->hash ( m_docId , tt , this ) ) return false;
-
-	// . hash special site/hopcount thing for permalinks
-	// . used by Images.cpp for doing thumbnails
-	// . this returns false and sets g_errno on error
-	// . let's try thumbnails for all...
-	//if ( ! *getIsPermalink() ) return true;
-
-/*
-	BR 20160117: No longer has image URLs
-	setStatus ( "hashing no-split gbimage keys" );
-
-	hi.m_prefix    = "gbimage";
-	// hash gbimage: for permalinks only for Images.cpp
-	for ( int32_t i = 0 ; i < m_images.m_numImages ; i++ ) {
-		// get the node number
-		//int32_t nn = m_images.m_imageNodes[i];
-		// get the url of the image
-		//XmlNode *xn = m_xml.getNodePtr(nn);
-		int32_t  srcLen;
-		char *src = m_images.getImageUrl(i,&srcLen);
-		// set it to the full url
-		Url iu;
-		// use "pageUrl" as the baseUrl
-		Url *cu = getCurrentUrl();
-		// we can addwww to normalize since this is for deduping kinda
-		iu.set ( cu , src , srcLen , true );  // addWWW? yes...
-		char *u    = iu.getUrl   ();
-		int32_t  ulen = iu.getUrlLen();
-		// hash each one
-		//if ( ! hashString ( u,ulen,&hi ) ) return false;
-		// hash a single entity
-		if ( ! hashSingleTerm ( u,ulen,&hi) ) return false;
-		//log("test: %s",u);
-	}
-*/
 	return true;
 }

@ -285,9 +247,14 @@ char *XmlDoc::hashAll(HashTableX *table) {
 		logTrace(g_conf.m_logTraceXmlDoc, "END, getContentType failed");
 		return NULL;
 	}
-	
+
 	// BR 20160127: Never index JSON and XML content
 	if (*ct == CT_JSON || *ct == CT_XML) {
+		if (!hashContentType(table)) {
+			logTrace(g_conf.m_logTraceXmlDoc, "END, hashContentType failed");
+			return NULL;
+		}
+
 		// For XML (JSON should not get here as it should be filtered out during spidering)
 		// store the URL as the only thing in posdb so we are able to find it, and
 		// eventually ban it.
@ -405,18 +372,17 @@ char *XmlDoc::hashAll(HashTableX *table) {
 	// global index now, so don't need this... 9/28/2014

 	// stop indexing xml docs
-	bool indexDoc = cr->m_indexBody;
-
 	// global index unless this is a json object in which case it is
 	// hashed above in the call to hashJSON(). this will decrease disk
 	// usage by about half, posdb* files are pretty big.
-	if (!indexDoc) {
+	if (!cr->m_indexBody) {
 		logTrace(g_conf.m_logTraceXmlDoc, "END, !indexDoc");
 		return (char *)1;
 	}

-	if ( *ct == CT_JSON || *ct == CT_XML ) {
-		goto skip;
+	if ((size_utf8Content - 1) <= 0) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, contentLen == 0");
+		return (char *)1;
 	}

 	// hash the body of the doc first so m_dist is 0 to match
@ -449,7 +415,7 @@ char *XmlDoc::hashAll(HashTableX *table) {
 	// we index the single words in the neighborhoods next, and
 	// we had songfacts.com coming up for the 'street light facts'
 	// query because it had a bunch of anomalous inlink text.
-	if (!hashIncomingLinkText(table, false, true)) {
+	if (!hashIncomingLinkText(table)) {
 		logTrace(g_conf.m_logTraceXmlDoc, "END, hashIncomingLinkText failed");
 		return NULL;
 	}
@ -462,7 +428,6 @@ char *XmlDoc::hashAll(HashTableX *table) {
 		return NULL;
 	}

-
 	// BR 20160220
 	// Store value of meta tag "geo.placename" to help aid searches for
 	// location specific sites, e.g. 'Restaurant in London'
@ -471,8 +436,6 @@ char *XmlDoc::hashAll(HashTableX *table) {
 		return NULL;
 	}

-skip:
-
 	// this will only increment the scores of terms already in the table
 	// because we neighborhoods are not techincally in the document
 	// necessarily and we do not want to ruin our precision
@ -714,30 +677,6 @@ bool XmlDoc::hashDateNumbers ( HashTableX *tt ) { // , bool isStatusDoc ) {
 	if ( ! hashNumberForSorting ( buf , buf , bufLen , &hi ) )
 		return false;

-	// do not index the rest if we are a "spider reply" document
-	// which is like a fake document for seeing spider statuses
-	//if ( isStatusDoc == CT_STATUS ) return true;
-	//if ( isStatusDoc ) return true;
-
-	// now for CT_STATUS spider status "documents" we also index
-	// gbspiderdate so index this so we can just do a
-	// gbsortby:gbdocspiderdate and only get real DOCUMENTS not the
-	// spider status "documents"
-/*
-  BR 20160108: Don't store these as we don't plan to use them
-	hi.m_desc      = "doc last spidered date";
-	hi.m_prefix    = "gbdocspiderdate";
-	bufLen = sprintf ( buf , "%" PRIu32, (uint32_t)m_spideredTime );
-	if ( ! hashNumberForSorting ( buf , buf , bufLen , &hi ) )
-		return false;
-
- 	hi.m_desc      = "doc last indexed date";
- 	hi.m_prefix    = "gbdocindexdate";
-	bufLen = sprintf ( buf , "%" PRIu32, (uint32_t)indexedTime );
- 	if ( ! hashNumberForSorting ( buf , buf , bufLen , &hi ) )
- 		return false;
-*/
-
 	// all done
 	return true;
 }
@ -1024,8 +963,7 @@ bool XmlDoc::hashUrl ( HashTableX *tt, bool urlOnly ) { // , bool isStatusDoc )
 	Url uw;
 	uw.set( fu->getUrl(), fu->getUrlLen(), true, false );
 	hi.m_prefix    = "url";
-	// no longer, we just index json now
-	//if ( isStatusDoc ) hi.m_prefix = "url2";
+
 	if ( ! hashSingleTerm(uw.getUrl(),uw.getUrlLen(),&hi) )
 		return false;

@ -1228,21 +1166,15 @@ bool XmlDoc::hashUrl ( HashTableX *tt, bool urlOnly ) { // , bool isStatusDoc )
 	int32_t  elen = fu->getExtensionLen();
 	// update hash parms
 	hi.m_prefix    = "ext";
-	// no longer, we just index json now
-	//if ( isStatusDoc ) hi.m_prefix = "ext2";
 	if ( ! hashSingleTerm(ext,elen,&hi ) ) return false;


 	setStatus ( "hashing gbdocid" );
 	hi.m_prefix = "gbdocid";
-	// no longer, we just index json now
-	//if ( isStatusDoc ) hi.m_prefix = "gbdocid2";
 	char buf2[32];
 	sprintf(buf2,"%" PRIu64, (uint64_t)m_docId );
 	if ( ! hashSingleTerm(buf2,strlen(buf2),&hi) ) return false;

-	//if ( isStatusDoc ) return true;
-
 	setStatus ( "hashing SiteGetter terms");

 	//
@ -1299,76 +1231,50 @@ bool XmlDoc::hashUrl ( HashTableX *tt, bool urlOnly ) { // , bool isStatusDoc )
 	hi.m_prefix    = "urlhash";
 	if ( ! hashString(buf,blen,&hi) ) return false;

-/*
-	BR 20160106 removed.
-	blen = sprintf(buf,"%" PRIu32,h/10);
-	// update hashing parms
-	hi.m_prefix = "urlhashdiv10";
-	if ( ! hashString(buf,blen,&hi) ) return false;
-	blen = sprintf(buf,"%" PRIu32,h/100);
-	// update hashing parms
-	hi.m_prefix = "urlhashdiv100";
-	if ( ! hashString(buf,blen,&hi) ) return false;
-*/
+	if (m_contentLen > 0) {
+		setStatus("hashing url mid domain");

+		// update parms
+		hi.m_prefix = NULL;
+		hi.m_desc = "middle domain";
+		hi.m_hashGroup = HASHGROUP_INURL;
+		hi.m_hashCommonWebWords = false;    // Skip www, com, http etc.
+		if (!hashString(host, hlen, &hi)) {
+			return false;
+		}

-	setStatus ( "hashing url mid domain");
+		hi.m_hashCommonWebWords = true;
+		if (!hashSingleTerm(fu->getDomain(), fu->getDomainLen(), &hi)) {
+			return false;
+		}

-	// update parms
-	hi.m_prefix    = NULL;
-	hi.m_desc      = "middle domain";
-	hi.m_hashGroup = HASHGROUP_INURL;
-	hi.m_hashCommonWebWords = false;	// Skip www, com, http etc.
-	if ( ! hashString ( host,hlen,&hi)) return false;
+		setStatus("hashing url path");
+		char *path = fu->getPath();
+		int32_t plen = fu->getPathLen();

-	hi.m_hashCommonWebWords = true;
-	if ( ! hashSingleTerm ( fu->getDomain(),fu->getDomainLen(),&hi)) return false;
+		// BR 20160113: Do not hash and combine the page filename extension with the page name (skip e.g. .com)
+		if (elen > 0) {
+			elen++;    // also skip the dot
+		}
+		plen -= elen;

-
-	setStatus ( "hashing url path");
-	char *path = fu->getPath();
-	int32_t  plen = fu->getPathLen();
-
-	// BR 20160113: Do not hash and combine the page filename extension with the page name (skip e.g. .com)
-	if( elen > 0 )
-	{
-		elen++;	// also skip the dot
-	}
-	plen -= elen;
-
-
-	// BR 20160113: Do not hash the most common page names
-	if( strncmp(path, "/index", plen) != 0 )
-	{
-		// hash the path
-		// BR 20160114: Exclude numbers in paths (usually dates)
-		hi.m_hashNumbers = false;
-		if ( ! hashString (path,plen,&hi) ) return false;
+		// BR 20160113: Do not hash the most common page names
+		if (strncmp(path, "/index", plen) != 0) {
+			// hash the path
+			// BR 20160114: Exclude numbers in paths (usually dates)
+			hi.m_hashNumbers = false;
+			if (!hashString(path, plen, &hi)) return false;
+		}
 	}

 	return true;
 }

 // . returns false and sets g_errno on error
-bool XmlDoc::hashIncomingLinkText ( HashTableX *tt               ,
-				    bool        hashAnomalies    ,
-				    bool        hashNonAnomalies ) {
-
-	// do not index ANY of the body if it is NOT a permalink and
-	// "menu elimination" technology is enabled.
-	//if ( ! *getIsPermalink() && m_eliminateMenus ) return true;
+bool XmlDoc::hashIncomingLinkText(HashTableX *tt) {

 	setStatus ( "hashing link text" );

-	// . now it must have an rss item to be indexed in all its glory
-	// . but if it tells us it has an rss feed, toss it and wait for
-	//   the feed.... BUT sometimes the rss feed outlink is 404!
-	// . NO, now we discard with ENORSS at Msg16.cpp
-	//if ( ! *getHasRSSItem() &&  m_eliminateMenus ) return true;
-
-	// sanity check
-	if ( hashAnomalies == hashNonAnomalies ) { g_process.shutdownAbort(true); }
-
 	// sanity
 	if ( ! m_linkInfo1Valid ) { g_process.shutdownAbort(true); }

@ -1404,14 +1310,7 @@ bool XmlDoc::hashIncomingLinkText ( HashTableX *tt               ,
 		bool internal=((m_ip&0x0000ffff)==(k->m_ip&0x0000ffff));
 		// count external inlinks we have for indexing gbmininlinks:
 		if ( ! internal ) ecount++;
-		// get score
-		//int64_t baseScore = k->m_baseScore;
-                // get the weight
-		//int64_t ww ;
-		//if ( internal ) ww = m_internalLinkTextWeight;
-		//else            ww = m_externalLinkTextWeight;
-		// modify the baseScore
-		//int64_t final = (baseScore * ww) / 100LL;
+
 		// get length of link text
 		int32_t tlen = k->size_linkText;
 		if ( tlen > 0 ) tlen--;
@ -1423,15 +1322,16 @@ bool XmlDoc::hashIncomingLinkText ( HashTableX *tt               ,
 			    k->getUrl(),m_firstUrl.getUrl());
 			continue;
 		}
-		// if it is anomalous, set this, we don't
-		//if ( k->m_isAnomaly )
-		//	hi.m_hashIffNotUnique = true;
-		//hi.m_baseScore = final;
+
 		if ( internal ) hi.m_hashGroup = HASHGROUP_INTERNALINLINKTEXT;
 		else            hi.m_hashGroup = HASHGROUP_INLINKTEXT;
 		// store the siterank of the linker in this and use that
 		// to set the multiplier M bits i guess
 		hi.m_linkerSiteRank = k->m_siteRank;
+		if(hi.m_linkerSiteRank>MAXSITERANK) {
+			log(LOG_INFO,"Inlink had siteRank>max (%d), probably from docid %ld", k->m_siteRank, k->m_docId);
+			hi.m_linkerSiteRank = MAXSITERANK;
+		}
 		// now record this so we can match the link text to
 		// a matched offsite inlink text term in the scoring info
 		k->m_wordPosStart = m_dist; // hi.m_startDist;
@ -1453,14 +1353,8 @@ bool XmlDoc::hashIncomingLinkText ( HashTableX *tt               ,

 // . returns false and sets g_errno on error
 bool XmlDoc::hashNeighborhoods ( HashTableX *tt ) {
-
-	// seems like iffUnique is off, so do this
-	//if ( ! *getIsPermalink() && m_eliminateMenus ) return true;
-
 	setStatus ( "hashing neighborhoods" );

-	//g_tt = table;
-
 	// . now we also hash the neighborhood text of each inlink, that is,
 	//   the text surrounding the inlink text.
 	// . this is also destructive in that it will remove termids that
@ -1702,15 +1596,6 @@ bool XmlDoc::hashLanguage ( HashTableX *tt ) {

 	if ( ! hashString ( s, slen, &hi ) ) return false;

-/* 
-	BR 20160117: Duplicate
-	// try lang abbreviation
-	sprintf(s , "%s ", getLanguageAbbr(langId) );
-	// go back to broken way to try to fix parsing consistency bug
-	// by adding hashLanguageString() function below
-	//sprintf(s , "%s ", getLanguageAbbr(langId) );
-	if ( ! hashString ( s, slen, &hi ) ) return false;
-*/
 	return true;
 }