Merge branch 'master' into nomerge2

Conflicts:
	RdbList.cpp
	RdbList.h
This commit is contained in:
Ivan Skytte Jørgensen
2016-09-05 11:37:46 +02:00
15 changed files with 1168 additions and 1139 deletions

@ -291,7 +291,7 @@ bool Conf::save ( ) {
// will look for the hostname in each collection for a match
// no match defaults to default collection
const char *Conf::getDefaultColl ( char *hostname, int32_t hostnameLen ) {
if ( ! m_defaultColl || ! m_defaultColl[0] ) {
if ( ! m_defaultColl[0] ) {
return "main";
}

128
Msg13.cpp

@ -2203,7 +2203,7 @@ bool addNewProxyAuthorization ( SafeBuf *req , Msg13Request *r ) {
SpiderProxy *sp = getSpiderProxyByIpPort (r->m_proxyIp,r->m_proxyPort);
// if none required, all done
if ( ! sp->m_usernamePwd ) return true;
if ( ! sp->m_usernamePwd[0] ) return true;
// strange?
if ( req->length() < 8 ) return false;
// back up over final \r\n
@ -2410,77 +2410,71 @@ bool printHammerQueueTable ( SafeBuf *sb ) {
, DARK_BLUE
);
Msg13Request *r = s_hammerQueueHead ;
int32_t count = 0;
int64_t nowms = gettimeofdayInMilliseconds();
loop:
if ( ! r ) return true;
// print row
sb->safePrintf( "<tr bgcolor=#%s>"
"<td>%i</td>" // #
"<td>%ims</td>" // age in hammer queue
"<td>%s</td>"
,LIGHT_BLUE
,(int)count
,(int)(nowms - r->m_stored)
,iptoa(r->m_firstIp)
);
for(Msg13Request *r = s_hammerQueueHead; r; r = r->m_nextLink) {
// print row
sb->safePrintf( "<tr bgcolor=#%s>"
"<td>%i</td>" // #
"<td>%ims</td>" // age in hammer queue
"<td>%s</td>"
,LIGHT_BLUE
,(int)count
,(int)(nowms - r->m_stored)
,iptoa(r->m_firstIp)
);
sb->safePrintf("<td>%s</td>" // actual ip
, iptoa(r->m_urlIp));
sb->safePrintf("<td>%s</td>" // actual ip
, iptoa(r->m_urlIp));
// print crawl delay as link to robots.txt
sb->safePrintf( "<td><a href=\"");
Url cu;
cu.set ( r->ptr_url );
bool isHttps = cu.isHttps();
if ( isHttps ) {
sb->safeStrcpy( "https://" );
} else {
sb->safeStrcpy( "http://" );
// print crawl delay as link to robots.txt
sb->safePrintf( "<td><a href=\"");
Url cu;
cu.set ( r->ptr_url );
bool isHttps = cu.isHttps();
if ( isHttps ) {
sb->safeStrcpy( "https://" );
} else {
sb->safeStrcpy( "http://" );
}
sb->safeMemcpy ( cu.getHost() , cu.getHostLen() );
int32_t port = cu.getPort();
int32_t defPort = isHttps ? 443 : 80;
if ( port != defPort ) {
sb->safePrintf( ":%" PRId32, port );
}
sb->safePrintf ( "/robots.txt\">"
"%i"
"</a>"
"</td>" // crawl delay MS
"<td>%i</td>" // proxies banning
, r->m_crawlDelayMS
, r->m_numBannedProxies
);
// show collection name as a link, also truncate to 32 chars
CollectionRec *cr = g_collectiondb.getRec ( r->m_collnum );
const char *coll = "none";
if ( cr ) coll = cr->m_coll;
sb->safePrintf("<td>");
if ( cr ) {
sb->safePrintf("<a href=/admin/sockets?c=");
sb->urlEncode(coll);
sb->safePrintf(">");
}
sb->safeTruncateEllipsis ( coll , 32 );
if ( cr ) sb->safePrintf("</a>");
sb->safePrintf("</td>");
// then the url itself
sb->safePrintf("<td><a href=%s>",r->ptr_url);
sb->safeTruncateEllipsis ( r->ptr_url , 128 );
sb->safePrintf("</a></td>");
sb->safePrintf("</tr>\n");
}
sb->safeMemcpy ( cu.getHost() , cu.getHostLen() );
int32_t port = cu.getPort();
int32_t defPort = isHttps ? 443 : 80;
if ( port != defPort ) {
sb->safePrintf( ":%" PRId32, port );
}
sb->safePrintf ( "/robots.txt\">"
"%i"
"</a>"
"</td>" // crawl delay MS
"<td>%i</td>" // proxies banning
, r->m_crawlDelayMS
, r->m_numBannedProxies
);
// show collection name as a link, also truncate to 32 chars
CollectionRec *cr = g_collectiondb.getRec ( r->m_collnum );
const char *coll = "none";
if ( cr ) coll = cr->m_coll;
sb->safePrintf("<td>");
if ( cr ) {
sb->safePrintf("<a href=/admin/sockets?c=");
sb->urlEncode(coll);
sb->safePrintf(">");
}
sb->safeTruncateEllipsis ( coll , 32 );
if ( cr ) sb->safePrintf("</a>");
sb->safePrintf("</td>");
// then the url itself
sb->safePrintf("<td><a href=%s>",r->ptr_url);
sb->safeTruncateEllipsis ( r->ptr_url , 128 );
sb->safePrintf("</a></td>");
sb->safePrintf("</tr>\n");
// print next entry now
r = r->m_nextLink;
goto loop;
return true;
}

7
Msg4.h

@ -13,6 +13,7 @@ bool hasAddsInQueue ( ) ;
bool isInMsg4LinkedList ( class Msg4 *msg4 ) ;
#include "SafeBuf.h"
#include "rdbid_t.h"
class Msg4 {
@ -23,11 +24,17 @@ class Msg4 {
// (rdbId | 0x00) then rdb record [if split ]
bool addMetaList( class SafeBuf *sb, collnum_t collnum, void *state,
void (* callback)(void *state), int32_t niceness, char rdbId = -1, int32_t shardOverride = -1 );
bool addMetaList( class SafeBuf *sb, collnum_t collnum, void *state,
void (* callback)(void *state), int32_t niceness, rdbid_t rdbId, int32_t shardOverride = -1 )
{ return addMetaList(sb,collnum,state,callback,niceness,(char)rdbId,shardOverride); }
// this one is faster...
// returns false if blocked
bool addMetaList( const char *metaList, int32_t metaListSize, collnum_t collnum, void *state,
void (* callback)(void *state), int32_t niceness, char rdbId = -1, int32_t shardOverride = -1 );
bool addMetaList( const char *metaList, int32_t metaListSize, collnum_t collnum, void *state,
void (* callback)(void *state), int32_t niceness, rdbid_t rdbId, int32_t shardOverride = -1 )
{ return addMetaList(metaList,metaListSize,collnum,state,callback,niceness,(char)rdbId,shardOverride); }
bool addMetaList2();

@ -556,7 +556,7 @@ bool processLoop ( void *state ) {
Highlight hi;
// make words so we can set the scores to ignore fielded terms
Words qw;
qw.set( q, qlen, true, false );
qw.set(q, qlen, true);
// declare up here
Matches m;

@ -17,9 +17,7 @@ public:
//Url m_rootUrl;
const char *m_u;
int32_t m_ulen;
bool m_applyRulesetToRoot;
char m_rootQuality;
int32_t m_reparseRootRetries;
char m_coll[MAX_COLL_LEN];
int32_t m_collLen;
//int32_t m_sfn;

@ -33,7 +33,6 @@ class StateStatsdb {
int32_t m_hostId;
// Request & build flags
bool m_dateLimit;
bool m_dateCustom;
bool m_cacti;
bool m_now;

File diff suppressed because it is too large Load Diff

@ -137,6 +137,9 @@ class PosdbTable {
return m_initialized;
}
bool genDebugScoreInfo1(int32_t &numProcessed, int32_t &topCursor, QueryTermInfo *qtibuf);
bool genDebugScoreInfo2(DocIdScore &dcs, int32_t &lastLen, uint64_t &lastDocId, char siteRank, float score, int32_t intScore, char docLang);
uint64_t m_docId;
bool m_hasMaxSerpScore;
@ -198,7 +201,7 @@ class PosdbTable {
// for debug msgs
void *m_logstate;
Msg39Request *m_r;
Msg39Request *m_msg39req;
// for gbsortby:item.price ...
int32_t m_sortByTermNum;

@ -21,9 +21,6 @@ static void uncountStripe ( struct StateControl *stC ) ;
struct StateControl{
int32_t m_pageNum;
int64_t m_start;
int32_t m_reqNum;
SafeBuf m_sb;
TcpSocket *m_s;
int64_t m_startTime;
bool m_isQuery;

@ -386,10 +386,8 @@ bool RdbDump::dumpTree(bool recall) {
char tmp1[32];
char tmp2[32];
if (m_firstKeyInQueue) {
strcpy(tmp1, KEYSTR(m_firstKeyInQueue, m_list->m_ks));
ks1 = tmp1;
}
strcpy(tmp1, KEYSTR(m_firstKeyInQueue, m_list->m_ks));
ks1 = tmp1;
if (m_lastKeyInQueue) {
strcpy(tmp2, KEYSTR(m_lastKeyInQueue, m_list->m_ks));
@ -511,7 +509,7 @@ bool RdbDump::dumpList(RdbList *list, int32_t niceness, bool recall) {
}
if (g_conf.m_verifyWrites) {
char rdbId = 0;
rdbid_t rdbId = RDB_NONE;
if (m_rdb) rdbId = m_rdb->getRdbId();
m_list->checkList_r(false, rdbId);
m_list->resetListPtr();

@ -657,7 +657,7 @@ bool RdbList::growList ( int32_t newSize ) {
// . I had a problem where a foreign spider rec was in our spiderdb and
// i couldn't delete it because the del key would go to the foreign group!
// . as a temp patch i added a msg1 force local group option
bool RdbList::checkList_r ( bool abortOnProblem , char rdbId ) {
bool RdbList::checkList_r(bool abortOnProblem, rdbid_t rdbId) {
assert(this);
verify_signature();
// bail if empty
@ -1134,7 +1134,7 @@ int RdbList::printList ( int32_t logtype ) {
// . mincRecSizes is really only important when we read just 1 list
// . it's a really good idea to keep it as -1 otherwise
bool RdbList::constrain(const char *startKey, char *endKey, int32_t minRecSizes,
int32_t hintOffset, const char *hintKey, char rdbId, const char *filename) {
int32_t hintOffset, const char *hintKey, rdbid_t rdbId, const char *filename) {
// log(LOG_TRACE,"RdbList(%p)::constrain()",this);
assert(this);
verify_signature();
@ -1729,7 +1729,7 @@ bool RdbList::posdbConstrain(const char *startKey, char *endKey, int32_t minRecS
// before calling this
// . CAUTION: you should call constrain() on all "lists" before calling this
// so we don't have to do boundary checks on the keys here
void RdbList::merge_r(RdbList **lists, int32_t numLists, const char *startKey, const char *endKey, int32_t minRecSizes, bool removeNegRecs, char rdbId) {
void RdbList::merge_r(RdbList **lists, int32_t numLists, const char *startKey, const char *endKey, int32_t minRecSizes, bool removeNegRecs, rdbid_t rdbId) {
assert(this);
verify_signature();
// sanity
@ -2455,7 +2455,7 @@ skip:
return true;
}
void RdbList::setFromPtr ( char *p , int32_t psize , char rdbId ) {
void RdbList::setFromPtr(char *p, int32_t psize, rdbid_t rdbId) {
// free and NULLify any old m_list we had to make room for our new list
freeList();

@ -8,6 +8,7 @@
#include "Sanity.h"
#include "types.h"
#include "GbSignature.h"
#include "rdbid_t.h"
#include <stdint.h>
/**
@ -85,7 +86,7 @@ public:
bool useHalfKeys ,
char keySize = sizeof(key96_t) );
void setFromPtr ( char *p , int32_t psize , char rdbId ) ;
void setFromPtr(char *p, int32_t psize, rdbid_t rdbId);
void stealFromOtherList(RdbList *other_list);
@ -184,7 +185,7 @@ public:
// and malloc()'ing
// . may change m_list and/or m_listSize
bool constrain(const char *startKey, char *endKey, int32_t minRecSizes,
int32_t hintOffset, const char *hintKey, char rdbId, const char *filename);
int32_t hintOffset, const char *hintKey, rdbid_t rdbId, const char *filename);
bool posdbConstrain(const char *startKey, char *endKey, int32_t minRecSizes,
int32_t hintOffset, const char *hintKey, const char *filename);
@ -198,7 +199,7 @@ public:
// . merge the lists into this list
// . set our startKey/endKey to "startKey"/"endKey"
// . exclude any records from lists not in that range
void merge_r(RdbList **lists, int32_t numLists, const char *startKey, const char *endKey, int32_t minRecSizes, bool removeNegRecs, char rdbId);
void merge_r(RdbList **lists, int32_t numLists, const char *startKey, const char *endKey, int32_t minRecSizes, bool removeNegRecs, rdbid_t rdbId);
bool posdbMerge_r(RdbList **lists, int32_t numLists, const char *startKey, const char *endKey, int32_t minRecSizes, bool removeNegKeys);
@ -248,7 +249,7 @@ public:
// . check to see if keys in order
// . logs any problems
// . sleeps if any problems encountered
bool checkList_r ( bool abortOnProblem = true , char rdbId = 0 ); // RDB_NONE );
bool checkList_r(bool abortOnProblem = true, rdbid_t rdbId = RDB_NONE);
// . removes records whose keys aren't in proper range (corruption)
// . returns false and sets errno on error/problem

@ -437,7 +437,7 @@ bool Rebalance::gotList ( ) {
Rdb *rdb = g_process.m_rdbs[m_rdbNum];
char rdbId = rdb->getRdbId();
rdbid_t rdbId = rdb->getRdbId();
int32_t ks = rdb->getKeySize();

@ -1858,7 +1858,7 @@ bool XmlDoc::indexDoc ( ) {
goto skipNewAdd1;
}
// store the new request (store reply for this below)
char rd = RDB_SPIDERDB;
rdbid_t rd = RDB_SPIDERDB;
if ( m_useSecondaryRdbs ) rd = RDB2_SPIDERDB2;
if ( ! m_metaList2.pushChar(rd) )
{
@ -1902,7 +1902,7 @@ bool XmlDoc::indexDoc ( ) {
//SafeBuf metaList;
char rd = RDB_SPIDERDB;
rdbid_t rd = RDB_SPIDERDB;
if ( m_useSecondaryRdbs ) rd = RDB2_SPIDERDB2;
if ( ! m_metaList2.pushChar( rd ) )
{
@ -12118,7 +12118,7 @@ void XmlDoc::printMetaList ( char *p , char *pend , SafeBuf *sb ) {
int32_t rcount = 0;
for ( ; p < pend ; p += recSize ) {
// get rdbid
uint8_t rdbId = *p & 0x7f;
rdbid_t rdbId = (rdbid_t)(*p & 0x7f);
// skip
p++;
// get key size
@ -12342,9 +12342,7 @@ bool XmlDoc::verifyMetaList ( char *p , char *pend , bool forDelete ) {
// first is rdbId
//char rdbId = -1; // m_rdbId;
//if ( rdbId < 0 ) rdbId = *p++;
uint8_t rdbId = *p++;
// mask off rdbId
rdbId &= 0x7f;
rdbid_t rdbId = (rdbid_t)(*p++ & 0x7f);
// negative key?
bool del = !( *p & 0x01 );
@ -12442,7 +12440,7 @@ bool XmlDoc::hashMetaList ( HashTableX *ht ,
int32_t count = 0;
for ( ; p < pend ; p += recSize , count++ ) {
// get rdbid
char rdbId = *p & 0x7f;
rdbid_t rdbId = (rdbid_t)(*p & 0x7f);
// skip rdb id
p++;
// save that
@ -13041,9 +13039,9 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
logTrace( g_conf.m_logTraceXmlDoc, "Adding spider reply to spiderdb" );
// rdbid first
char rd = RDB_SPIDERDB;
rdbid_t rd = RDB_SPIDERDB;
if ( m_useSecondaryRdbs ) rd = RDB2_SPIDERDB2;
*m_p++ = rd;
*m_p++ = (char)rd;
// get this
if ( ! m_srepValid ) { g_process.shutdownAbort(true); }
// store the spider rec
@ -14008,7 +14006,7 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
char *rec = p;
// get the rdbid for this rec
char rdbId = byte & 0x7f;
rdbid_t rdbId = (rdbid_t)(byte & 0x7f);
p++;
// get the key size
@ -14082,7 +14080,7 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
char byte = *p;
// get rdbId
char rdbId = byte & 0x7f;
rdbid_t rdbId = (rdbid_t)(byte & 0x7f);
p++;
// key size
@ -14184,7 +14182,7 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
char *rec = *(char **)dt8.getValueFromSlot(i);
// get rdbId with hi bit possibly set
char rdbId = rec[0] & 0x7f;
rdbid_t rdbId = (rdbid_t)(rec[0] & 0x7f);
// key size
int32_t ks = getKeySizeFromRdbId(rdbId);
@ -15433,7 +15431,7 @@ bool XmlDoc::addTable144 ( HashTableX *tt1 , int64_t docId , SafeBuf *buf ) {
if ( ! m_langIdValid ) { g_process.shutdownAbort(true); }
char rdbId = RDB_POSDB;
rdbid_t rdbId = RDB_POSDB;
if ( m_useSecondaryRdbs ) rdbId = RDB2_POSDB2;
// store terms from "tt1" table
@ -15493,7 +15491,7 @@ bool XmlDoc::addTable224 ( HashTableX *tt1 ) {
if ( tt1->m_ds != 0 ) {g_process.shutdownAbort(true);}
}
char rdbId = RDB_LINKDB;
rdbid_t rdbId = RDB_LINKDB;
if ( m_useSecondaryRdbs ) rdbId = RDB2_LINKDB2;
// store terms from "tt1" table

@ -7060,12 +7060,6 @@ bool ramdiskTest() {
}
// CountDomains Structures and function definitions
struct lnk_info {
char *dom;
int32_t domLen;
int32_t pages;
};
struct dom_info {
char *dom;
int32_t domLen;
@ -7268,7 +7262,6 @@ void countdomains( const char* coll, int32_t numRecs, int32_t verbosity, int32_t
}
for( int32_t i = 0; i < dlinks->getNumLinks(); i++ ) {
//struct lnk_info *slink;
char *link = dlinks->getLink(i);
int32_t dlen;
const char *dom = getDomFast ( link , &dlen );
@ -7359,7 +7352,6 @@ void countdomains( const char* coll, int32_t numRecs, int32_t verbosity, int32_t
int32_t recsDisp;
struct ip_info *tmpipi ;
struct dom_info *tmpdomi ;
//struct lnk_info *tmplnk ;
loop = 0;
FILE *fhndl;