trying to bring back dmoz integration.
This commit is contained in:
parent
91b8921b9e
commit
6c2c9f7774
33
Catdb.cpp
33
Catdb.cpp
@ -51,8 +51,8 @@ bool Catdb::init ( ) {
|
||||
// . initialize our own internal rdb
|
||||
// . i no longer use cache so changes to tagdb are instant
|
||||
// . we still use page cache however, which is good enough!
|
||||
if ( this == &g_catdb )
|
||||
return m_rdb.init ( g_hostdb.m_dir ,
|
||||
//if ( this == &g_catdb )
|
||||
if ( ! m_rdb.init ( g_hostdb.m_dir ,
|
||||
"catdb" ,
|
||||
true , // dedup same keys?
|
||||
-1 , // fixed record size
|
||||
@ -72,8 +72,14 @@ bool Catdb::init ( ) {
|
||||
false,
|
||||
12,
|
||||
false,
|
||||
true ); // is collectionless?
|
||||
return true;
|
||||
true )) // is collectionless?
|
||||
return false;
|
||||
|
||||
// normally Collectiondb.addColl() will call Rdb::addColl() which
|
||||
// will init the CollectionRec::m_rdbBase, which is what
|
||||
// Rdb::getBase(collnum_t) will return. however, for collectionless
|
||||
// rdb databases we set Rdb::m_collectionlessBase special here.
|
||||
return m_rdb.addColl ( NULL );
|
||||
}
|
||||
|
||||
bool Catdb::init2 ( long treeMem ) {
|
||||
@ -119,7 +125,7 @@ bool Catdb::verify ( char *coll ) {
|
||||
g_threads.disableThreads();
|
||||
|
||||
Msg5 msg5;
|
||||
Msg5 msg5b;
|
||||
//Msg5 msg5b;
|
||||
RdbList list;
|
||||
key_t startKey;
|
||||
key_t endKey;
|
||||
@ -128,7 +134,7 @@ bool Catdb::verify ( char *coll ) {
|
||||
//long minRecSizes = 64000;
|
||||
|
||||
if ( ! msg5.getList ( RDB_CATDB ,
|
||||
coll ,
|
||||
"",//coll ,
|
||||
&list ,
|
||||
startKey ,
|
||||
endKey ,
|
||||
@ -147,7 +153,7 @@ bool Catdb::verify ( char *coll ) {
|
||||
-1 ,
|
||||
true ,
|
||||
-1LL ,
|
||||
&msg5b ,
|
||||
NULL,//&msg5b ,
|
||||
true )) {
|
||||
g_threads.enableThreads();
|
||||
return log("db: HEY! it did not block");
|
||||
@ -309,6 +315,19 @@ void Catdb::listSearch ( RdbList *list,
|
||||
// for small lists, just loop through the list
|
||||
if (list->getListSize() < 16*1024) {
|
||||
while ( ! list->isExhausted() ) {
|
||||
// for debug!
|
||||
/*
|
||||
CatRec crec;
|
||||
crec.set ( NULL,
|
||||
list->getCurrentData(),
|
||||
list->getCurrentDataSize(),
|
||||
false);
|
||||
log("catdb: caturl=%s #catid=%li version=%li"
|
||||
,crec.m_url
|
||||
,(long)crec.m_numCatids
|
||||
,(long)crec.m_version
|
||||
);
|
||||
*/
|
||||
// check the current key
|
||||
if ( list->getCurrentKey() != exactKey ) {
|
||||
// miss, next
|
||||
|
@ -1011,13 +1011,17 @@ errEnd:
|
||||
return false;
|
||||
}
|
||||
|
||||
// generate sub categories for a given catid
|
||||
// . generate sub categories for a given catid
|
||||
// . store list of SubCategories into "subCatBuf" return # stored
|
||||
long Categories::generateSubCats ( long catid,
|
||||
SubCategory *subCats,
|
||||
char **catBuffer,
|
||||
long *catBufferSize,
|
||||
long *catBufferLen,
|
||||
bool allowRealloc ) {
|
||||
SafeBuf *subCatBuf
|
||||
//SubCategory *subCats,
|
||||
//char **catBuffer,
|
||||
//long *catBufferSize,
|
||||
//long *catBufferLen,
|
||||
//bool allowRealloc
|
||||
) {
|
||||
|
||||
long catIndex;
|
||||
unsigned long fileOffset;
|
||||
unsigned long n;
|
||||
@ -1029,15 +1033,22 @@ long Categories::generateSubCats ( long catid,
|
||||
long prefixLen;
|
||||
long nameStart;
|
||||
long nameLen;
|
||||
long catp = 0;
|
||||
long catBufferInc = *catBufferSize;
|
||||
// lookup the index for this catid
|
||||
long need ;
|
||||
SubCategory *cat;
|
||||
char *p ;
|
||||
|
||||
//long catp = 0;
|
||||
//long catBufferInc = *catBufferSize;
|
||||
// . lookup the index for this catid
|
||||
// . binary step, guessing to approximate place
|
||||
// and then scanning from there
|
||||
catIndex = getIndexFromId(catid);
|
||||
if (catIndex < 0)
|
||||
goto errEnd;
|
||||
// get the file offset
|
||||
fileOffset = m_cats[catIndex].m_structureOffset;
|
||||
// open the structure file
|
||||
// cat/content.rdf.u8 in utf8
|
||||
char filename[512];
|
||||
sprintf(filename, "%scat/%s", g_hostdb.m_dir, RDFSTRUCTURE_FILE);
|
||||
//m_rdfStream.clear();
|
||||
@ -1066,12 +1077,16 @@ long Categories::generateSubCats ( long catid,
|
||||
log("cat: Error Reading Structure Offset");
|
||||
goto errEnd;
|
||||
}
|
||||
// point to the buffer we just read with m_rdfPtr
|
||||
m_rdfPtr = m_rdfBuffer;
|
||||
m_rdfEnd = &m_rdfBuffer[n];
|
||||
m_currOffset = fileOffset;
|
||||
|
||||
// parse tags for the sub categories or until we hit /Topic
|
||||
nextTag:
|
||||
// . this increments m_rdfPtr until it points to the beginning of a tag
|
||||
// . it may end up reading another chunk from disk
|
||||
// . it memcopies m_tagRecfer to be the name of the tag it points to
|
||||
if (rdfNextTag() < 0)
|
||||
goto gotSubCats;
|
||||
// check for /Topic
|
||||
@ -1173,37 +1188,36 @@ nextTag:
|
||||
break;
|
||||
}
|
||||
// . fill the next sub category
|
||||
if (catp + prefixLen + nameLen >= *catBufferSize) {
|
||||
if (!allowRealloc)
|
||||
goto gotSubCats;
|
||||
// realloc the buffer
|
||||
char *re_catBuffer = (char*)mrealloc ( *catBuffer,
|
||||
*catBufferSize,
|
||||
*catBufferSize+catBufferInc,
|
||||
"Categories" );
|
||||
if (!re_catBuffer) {
|
||||
log ( "Could not allocate %li bytes for catBuffer",
|
||||
*catBufferSize+catBufferInc );
|
||||
g_errno = ENOMEM;
|
||||
goto errEnd;
|
||||
}
|
||||
*catBuffer = re_catBuffer;
|
||||
*catBufferSize += catBufferInc;
|
||||
}
|
||||
// fill the prefix and name in the buffer and subcat
|
||||
// . fill the prefix and name in the buffer and subcat
|
||||
need = sizeof(SubCategory) + prefixLen + 1 + nameLen + 1;
|
||||
if ( ! subCatBuf->reserve(need) ) goto errEnd;
|
||||
cat = (SubCategory *)(subCatBuf->getBuf());
|
||||
cat->m_prefixLen = prefixLen;
|
||||
cat->m_nameLen = nameLen;
|
||||
cat->m_type = currType;
|
||||
p = cat->m_buf;
|
||||
memcpy ( p , catStr + prefixStart , prefixLen );
|
||||
p += prefixLen;
|
||||
*p++ = '\0';
|
||||
memcpy ( p , catStr + nameStart , nameLen );
|
||||
p += nameLen;
|
||||
*p++ = '\0';
|
||||
|
||||
/*
|
||||
subCats[numSubCats].m_prefixOffset = catp;
|
||||
subCats[numSubCats].m_prefixLen = prefixLen;
|
||||
if (prefixLen > 0) {
|
||||
memcpy(&((*catBuffer)[catp]), &catStr[prefixStart], prefixLen);
|
||||
catp += prefixLen;
|
||||
}
|
||||
subCats[numSubCats].m_nameOffset = catp;
|
||||
subCats[numSubCats].m_nameOffset = catBuf->length();//catp;
|
||||
subCats[numSubCats].m_nameLen = nameLen;
|
||||
if (nameLen > 0) {
|
||||
memcpy(&((*catBuffer)[catp]), &catStr[nameStart], nameLen);
|
||||
catp += nameLen;
|
||||
}
|
||||
subCats[numSubCats].m_type = currType;
|
||||
*/
|
||||
// next sub cat
|
||||
numSubCats++;
|
||||
if (numSubCats >= MAX_SUB_CATS) {
|
||||
@ -1214,14 +1228,14 @@ nextTag:
|
||||
// next tag
|
||||
goto nextTag;
|
||||
gotSubCats:
|
||||
*catBufferLen = catp;
|
||||
//*catBufferLen = catp;
|
||||
//m_rdfStream.close();
|
||||
//m_rdfStream.clear();
|
||||
close(m_rdfStream);
|
||||
return numSubCats;
|
||||
|
||||
errEnd:
|
||||
*catBufferLen = 0;
|
||||
//*catBufferLen = 0;
|
||||
//m_rdfStream.close();
|
||||
//m_rdfStream.clear();
|
||||
close(m_rdfStream);
|
||||
|
19
Categories.h
19
Categories.h
@ -61,11 +61,15 @@ struct CategoryHash {
|
||||
};
|
||||
|
||||
struct SubCategory {
|
||||
long m_prefixOffset;
|
||||
//long m_prefixOffset;
|
||||
long m_prefixLen;
|
||||
long m_nameOffset;
|
||||
//long m_nameOffset;
|
||||
long m_nameLen;
|
||||
char m_type;
|
||||
long getRecSize () { return sizeof(SubCategory)+m_prefixLen+m_nameLen+2;};
|
||||
char *getPrefix() { return m_buf; };
|
||||
char *getName () { return m_buf+m_prefixLen+1;};
|
||||
char m_buf[0];
|
||||
};
|
||||
|
||||
class Categories {
|
||||
@ -153,13 +157,10 @@ public:
|
||||
// normalize a url string
|
||||
long fixUrl ( char *url, long urlLen );
|
||||
|
||||
// generate sub categories for a given catid
|
||||
long generateSubCats ( long catid,
|
||||
SubCategory *subCats,
|
||||
char **catBuffer,
|
||||
long *catBufferSize,
|
||||
long *catBufferLen,
|
||||
bool allowRealloc = true );
|
||||
// . generate sub categories for a given catid
|
||||
// . store list of SubCategories into "subCatBuf" return # stored
|
||||
// . hits disk without using threads... so kinda sucks...
|
||||
long generateSubCats ( long catid, SafeBuf *subCatBuf );
|
||||
|
||||
long getNumUrlsFromIndex ( long catIndex ) {
|
||||
return m_cats[catIndex].m_numUrls; };
|
||||
|
@ -2308,10 +2308,9 @@ uint32_t Hostdb::getGroupId ( char rdbId,void *k,bool split ) {
|
||||
unsigned long long d = g_revdb.getDocId( (key_t *)k );
|
||||
return m_map [ ((d>>14)^(d>>7)) & (MAX_KSLOTS-1) ];
|
||||
}
|
||||
|
||||
//else if ( rdbId == RDB_CATDB || rdbId == RDB2_CATDB2 ) {
|
||||
// return m_map [(*(uint16_t *)((char *)k + 10))>>3];
|
||||
//}
|
||||
else if ( rdbId == RDB_CATDB || rdbId == RDB2_CATDB2 ) {
|
||||
return m_map [(*(uint16_t *)((char *)k + 10))>>3];
|
||||
}
|
||||
// core -- must be provided
|
||||
char *xx = NULL; *xx = 0;
|
||||
//groupId=key.n1 & g_hostdb.m_groupMask;
|
||||
|
2
Makefile
2
Makefile
@ -39,7 +39,7 @@ OBJS = Tfndb.o UdpSlot.o \
|
||||
Parms.o Pages.o Msg28.o Msg30.o \
|
||||
Unicode.o iana_charset.o Iso8859.o \
|
||||
SearchInput.o \
|
||||
Categories.o Msg2a.o PageCatdb.o PageDirectory.o Msg2b.o \
|
||||
Categories.o Msg2a.o PageCatdb.o PageDirectory.o \
|
||||
SafeBuf.o Datedb.o \
|
||||
UCNormalizer.o UCPropTable.o UnicodeProperties.o \
|
||||
Pops.o Title.o Pos.o LangList.o \
|
||||
|
34
Msg40.cpp
34
Msg40.cpp
@ -148,6 +148,10 @@ bool Msg40::getResults ( SearchInput *si ,
|
||||
// we need this info for caching as well
|
||||
//m_numGigabitInfos = 0;
|
||||
|
||||
|
||||
//just getfrom searchinput
|
||||
//.... m_catId = hr->getLong("catid",0);m_si->m_catId;
|
||||
|
||||
m_postQueryRerank.set1( this, si );
|
||||
|
||||
// get the collection rec
|
||||
@ -680,6 +684,20 @@ bool Msg40::gotDocIds ( ) {
|
||||
// if ( ! m_msg1a.generateReferences(m_si,(void*)this,didTaskWrapper) )
|
||||
// m_tasksRemaining++;
|
||||
|
||||
|
||||
//
|
||||
// call Msg2b to generate directory
|
||||
//
|
||||
// why is this here? it does not depend on the docids. (mdw 9/25/13)
|
||||
// dissect it and fix it!!
|
||||
//
|
||||
//if ( m_si->m_catId &&
|
||||
// ! m_msg2b.generateDirectory ( m_si->m_catId,
|
||||
// (void*)this,
|
||||
// didTaskWrapper ) )
|
||||
// m_tasksRemaining++;
|
||||
|
||||
|
||||
return launchMsg20s ( false );
|
||||
}
|
||||
|
||||
@ -878,7 +896,6 @@ bool Msg40::reallocMsg20Buf ( ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
void didTaskWrapper ( void* state ) {
|
||||
Msg40 *THIS = (Msg40 *) state;
|
||||
// one less task
|
||||
@ -888,7 +905,6 @@ void didTaskWrapper ( void* state ) {
|
||||
// we are done, call the callback
|
||||
THIS->m_callback ( THIS->m_state );
|
||||
}
|
||||
*/
|
||||
|
||||
bool Msg40::launchMsg20s ( bool recalled ) {
|
||||
|
||||
@ -2128,7 +2144,7 @@ long Msg40::getStoredSize ( ) {
|
||||
//size += m_msg24.getStoredSize ( );
|
||||
//size += m_msg1a.getStoredSize ( );
|
||||
// cache msg2b if we have it
|
||||
size += m_msg2b.getStoredSize();
|
||||
//size += m_msg2b.getStoredSize();
|
||||
|
||||
return size;
|
||||
}
|
||||
@ -2203,9 +2219,9 @@ long Msg40::serialize ( char *buf , long bufLen ) {
|
||||
//if ( y == -1 ) return -1;
|
||||
//p += y;
|
||||
|
||||
long z = m_msg2b.serialize (p, pend - p);
|
||||
if ( z == -1 ) return -1;
|
||||
p += z;
|
||||
//long z = m_msg2b.serialize (p, pend - p);
|
||||
//if ( z == -1 ) return -1;
|
||||
//p += z;
|
||||
|
||||
if ( m_r.m_debug )
|
||||
log("query: msg40 serialize nd=%li "
|
||||
@ -2258,9 +2274,9 @@ long Msg40::deserialize ( char *buf , long bufSize ) {
|
||||
}
|
||||
|
||||
// msg2b
|
||||
long z = m_msg2b.deserialize ( p , pend - p );
|
||||
if ( z == -1 ) return -1;
|
||||
p += z;
|
||||
//long z = m_msg2b.deserialize ( p , pend - p );
|
||||
//if ( z == -1 ) return -1;
|
||||
//p += z;
|
||||
|
||||
// return bytes read
|
||||
return p - buf;
|
||||
|
4
Msg40.h
4
Msg40.h
@ -14,7 +14,7 @@
|
||||
#include "Msg39.h" // getTermFreqs()
|
||||
#include "Msg20.h" // for getting summary from docId
|
||||
#include "Msg17.h" // a distributed cache of serialized/compressed Msg40s
|
||||
#include "Msg2b.h" // for generating directories
|
||||
//#include "Msg2b.h" // for generating directories
|
||||
#include "IndexReadInfo.h" // STAGE0,...
|
||||
#include "Msg3a.h"
|
||||
#include "PostQueryRerank.h"
|
||||
@ -302,7 +302,7 @@ class Msg40 {
|
||||
long m_docsToScanForTopics;
|
||||
|
||||
// Msg2b for generating a directory
|
||||
Msg2b m_msg2b;
|
||||
//Msg2b m_msg2b;
|
||||
|
||||
PostQueryRerank m_postQueryRerank;
|
||||
|
||||
|
28
Msg8b.cpp
28
Msg8b.cpp
@ -50,8 +50,8 @@ bool Msg8b::getCatRec ( Url *url ,
|
||||
m_state = state;
|
||||
m_callback = callback;
|
||||
m_url = url;
|
||||
m_coll = coll;
|
||||
m_collLen = collLen;
|
||||
//m_coll = coll;
|
||||
//m_collLen = collLen;
|
||||
m_cr = cr;
|
||||
m_niceness = niceness;
|
||||
|
||||
@ -68,10 +68,10 @@ bool Msg8b::getCatRec ( Url *url ,
|
||||
//m_coll = g_conf.m_dirColl;
|
||||
//m_collLen = gbstrlen(m_coll);
|
||||
// catdb uses a dummy collection now, should not be looked at
|
||||
m_coll = "catdb";
|
||||
m_collLen = 5;
|
||||
//m_coll = "catdb";
|
||||
//m_collLen = 5;
|
||||
|
||||
m_collnum = g_collectiondb.getCollnum ( m_coll , m_collLen );
|
||||
//m_collnum = g_collectiondb.getCollnum ( m_coll , m_collLen );
|
||||
|
||||
// . first, try it by canonical domain name
|
||||
// . if that finds no matches, then try it by ip domain
|
||||
@ -89,7 +89,7 @@ bool Msg8b::getCatRec ( Url *url ,
|
||||
//
|
||||
if ( g_hostdb.m_groupId != m_groupId ) {
|
||||
// coll, url, niceness(1), rdbid(1), useCanonicalName(1)
|
||||
long requestSize = m_collLen + m_url->getUrlLen() + 4 + 4;
|
||||
long requestSize = m_url->getUrlLen() + 4 + 3;
|
||||
// make the request
|
||||
char *p = m_request;
|
||||
*(long *)p = m_url->getIp() ; p+=4;
|
||||
@ -97,10 +97,10 @@ bool Msg8b::getCatRec ( Url *url ,
|
||||
*p = (char)niceness ; p++;
|
||||
*p = (char)useCanonicalName; p++;
|
||||
// coll
|
||||
memcpy(p, m_coll, m_collLen);
|
||||
p += m_collLen;
|
||||
*p = '\0';
|
||||
p++;
|
||||
//memcpy(p, m_coll, m_collLen);
|
||||
//p += m_collLen;
|
||||
//*p = '\0';
|
||||
//p++;
|
||||
// url
|
||||
memcpy(p, m_url->getUrl(), m_url->getUrlLen());
|
||||
p += m_url->getUrlLen();
|
||||
@ -186,7 +186,7 @@ bool Msg8b::getCatRec ( Url *url ,
|
||||
0 , // max cached age in seconds (60)
|
||||
false , // add net recv'd list to cache?
|
||||
RDB_CATDB, // specifies the rdb, 1 = tagdb
|
||||
m_coll ,
|
||||
"",//NULL,//m_coll ,
|
||||
//&m_list ,
|
||||
m_list ,
|
||||
startKey ,
|
||||
@ -545,7 +545,7 @@ bool Msg8b::gotList ( ) {
|
||||
char *rec;
|
||||
|
||||
//rec = g_catdb->getRec ( &m_list , m_url , &recSize );
|
||||
rec = g_catdb.getRec(m_list,m_url,&recSize,m_coll,m_collLen);
|
||||
rec = g_catdb.getRec(m_list,m_url,&recSize,NULL,0);//m_coll,m_collLen);
|
||||
|
||||
// if record found then set it and also set gotIt to true
|
||||
if ( rec ) {
|
||||
@ -588,8 +588,8 @@ void Msg8b::getIndirectCatids ( ) {
|
||||
matchRecs,
|
||||
matchRecSizes,
|
||||
MAX_IND_CATIDS,
|
||||
m_coll,
|
||||
m_collLen);
|
||||
NULL,//m_coll,
|
||||
0);//m_collLen);
|
||||
// parse out the catids from the matches
|
||||
m_cr->m_numIndCatids = 0;
|
||||
for ( long i = 0; i < numMatches; i++ ) {
|
||||
|
6
Msg8b.h
6
Msg8b.h
@ -68,11 +68,11 @@ class Msg8b {
|
||||
void cleanSlot ( );
|
||||
|
||||
// some specified input
|
||||
char *m_coll;
|
||||
long m_collLen;
|
||||
//char *m_coll;
|
||||
//long m_collLen;
|
||||
Url *m_url;
|
||||
|
||||
collnum_t m_collnum;
|
||||
//collnum_t m_collnum;
|
||||
|
||||
void (*m_callback ) ( void *state );//, CatRec *rec );
|
||||
void *m_state; // ptr to caller's private state data
|
||||
|
@ -105,8 +105,8 @@ bool sendPageCatdb ( TcpSocket *s , HttpRequest *r ) {
|
||||
st->m_url.set(url, urlLen);
|
||||
// call msg8b to lookup in catdb
|
||||
if (!st->m_msg8b.getCatRec ( &st->m_url,
|
||||
st->m_coll,
|
||||
st->m_collLen,
|
||||
NULL,//st->m_coll,
|
||||
0,//st->m_collLen,
|
||||
true,
|
||||
1,
|
||||
&st->m_catRec,
|
||||
|
674
PageResults.cpp
674
PageResults.cpp
@ -263,7 +263,7 @@ bool sendPageResults ( TcpSocket *s , HttpRequest *hr ) {
|
||||
char *qstr = hr->getString("q",&qlen,"",NULL);
|
||||
// . crap! also gotta encode apostrophe since "var url='..."
|
||||
// . true = encodeApostrophes?
|
||||
sb.urlEncode ( qstr , true );
|
||||
sb.urlEncode2 ( qstr , true );
|
||||
// propagate "admin" if set
|
||||
long admin = hr->getLong("admin",-1);
|
||||
if ( admin != -1 ) sb.safePrintf("&admin=%li",admin);
|
||||
@ -272,7 +272,7 @@ bool sendPageResults ( TcpSocket *s , HttpRequest *hr ) {
|
||||
char *sites = hr->getString("sites",&sitesLen,NULL);
|
||||
if ( sites ) {
|
||||
sb.safePrintf("&sites=");
|
||||
sb.urlEncode ( sites,true);
|
||||
sb.urlEncode2 ( sites,true);
|
||||
}
|
||||
// propagate "debug" if set
|
||||
long debug = hr->getLong("debug",0);
|
||||
@ -744,6 +744,8 @@ static bool printGigabit ( State0 *st,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool printDMOZSubTopics ( SafeBuf& sb, long catId, State0 *st, bool inXml ) ;
|
||||
|
||||
// . make a web page from results stored in msg40
|
||||
// . send it on TcpSocket "s" when done
|
||||
// . returns false if blocked, true otherwise
|
||||
@ -805,6 +807,70 @@ bool gotResults ( void *state ) {
|
||||
return sendReply(st,NULL);
|
||||
}
|
||||
|
||||
|
||||
// grab the query
|
||||
char *q = msg40->getQuery();
|
||||
long qlen = msg40->getQueryLen();
|
||||
|
||||
bool xml = si->m_xml;
|
||||
|
||||
|
||||
// display it?
|
||||
if ( si->m_catId >= 0 ) {
|
||||
long dirIndex = g_categories->getIndexFromId(si->m_catId);
|
||||
// dirIndex = g_categories->getIndexFromId(si->m_cat_sdir);
|
||||
if (dirIndex < 0) dirIndex = 0;
|
||||
// display the directory bread crumb
|
||||
//if( (si->m_cat_dirId > 0 && si->m_isAdmin && !si->m_isFriend)
|
||||
// || (si->m_cat_sdir > 0 && si->m_cat_sdirt != 0) )
|
||||
// sb.safePrintf("<br><br>");
|
||||
// shortcut. rtl=Right To Left language format.
|
||||
bool rtl = g_categories->isIdRTL ( si->m_catId ) ;
|
||||
//st->m_isRTL = rtl;
|
||||
if ( ! xml ) {
|
||||
sb.safePrintf("\n<font size=4><b>");
|
||||
if ( rtl ) sb.safePrintf("<span dir=ltr>");
|
||||
sb.safePrintf("<a href=\"/\">Top</a>: ");
|
||||
}
|
||||
// put crumbin xml?
|
||||
if ( xml )
|
||||
sb.safePrintf("<breacdcrumb><![CDATA[");
|
||||
// display the breadcrumb in xml or html?
|
||||
g_categories->printPathCrumbFromIndex(&sb,dirIndex,rtl);
|
||||
sb.safePrintf("]]></breadcrumb>\n" );
|
||||
|
||||
// print the num
|
||||
if ( ! xml ) {
|
||||
sb.safePrintf("</b>  <i>");
|
||||
// how many urls/entries in this topic?
|
||||
long nu =g_categories->getNumUrlsFromIndex(dirIndex);
|
||||
if ( rtl )
|
||||
sb.safePrintf("<span dir=ltr>(%li)</span>",nu);
|
||||
else
|
||||
sb.safePrintf("(%li)", nu);
|
||||
sb.safePrintf("</i></font><br><br>\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
///////////
|
||||
//
|
||||
// show DMOZ subcategories if doing either a
|
||||
// "gbpdcat:<catid> |" (Search restricted to category)
|
||||
// "gbdcat:<catid>" (DMOZ urls in that topic, c=dmoz3)
|
||||
//
|
||||
// The search gbdcat: results should be sorted by siterank i guess
|
||||
// since it is only search a single term: gbdcat:<catid> so we can
|
||||
// put our stars back onto that and should be sorted by them.
|
||||
//
|
||||
///////////
|
||||
if ( si->m_catId >= 0 )
|
||||
// print the subtopcis in this topic. show as links above
|
||||
// the search results
|
||||
printDMOZSubTopics ( sb, si->m_catId , st, xml );
|
||||
|
||||
|
||||
|
||||
// save how many docs are in it
|
||||
long long docsInColl = -1;
|
||||
//RdbBase *base = getRdbBase ( RDB_CHECKSUMDB , si->m_coll );
|
||||
@ -854,9 +920,6 @@ bool gotResults ( void *state ) {
|
||||
// numResults may be more than we requested now!
|
||||
long n = msg40->getDocsWanted();
|
||||
if ( n > numResults ) n = numResults;
|
||||
// grab the query
|
||||
char *q = msg40->getQuery();
|
||||
long qlen = msg40->getQueryLen();
|
||||
// . make the query class here for highlighting
|
||||
// . keepAllSingles means to convert all individual words into
|
||||
// QueryTerms even if they're in quotes or in a connection (cd-rom).
|
||||
@ -1204,7 +1267,7 @@ bool gotResults ( void *state ) {
|
||||
// print the word
|
||||
char *t = qw->m_word;
|
||||
long tlen = qw->m_wordLen;
|
||||
sb.utf8Encode ( t , tlen );
|
||||
sb.utf8Encode2 ( t , tlen );
|
||||
sb.safePrintf (" ");
|
||||
}
|
||||
// print tail if we had ignored terms
|
||||
@ -1264,7 +1327,7 @@ bool gotResults ( void *state ) {
|
||||
qe2 );
|
||||
// close it up
|
||||
sb.safePrintf ("\"><i><b>");
|
||||
sb.utf8Encode(st->m_spell, len);
|
||||
sb.utf8Encode2(st->m_spell, len);
|
||||
// then finish it off
|
||||
sb.safePrintf ("</b></i></a></font>\n<br><br>\n");
|
||||
}
|
||||
@ -1682,6 +1745,60 @@ bool printInlinkText ( SafeBuf &sb , Msg20Reply *mr , SearchInput *si ,
|
||||
return true;
|
||||
}
|
||||
|
||||
//
|
||||
// . print a dmoz topic for the given numeric catid UNDER search result
|
||||
// . print "Search in Category" link as well
|
||||
//
|
||||
static bool printDMOZCategoryUnderResult ( SafeBuf &sb ,
|
||||
SearchInput *si,
|
||||
long catid ,
|
||||
State0 *st ) {
|
||||
|
||||
uint8_t queryLanguage = langUnknown;
|
||||
// Don't print category if not in native language category
|
||||
// Note that this only trims out "World" cats, not all
|
||||
// of them. Some of them may still sneak in.
|
||||
if(si->m_langHint)
|
||||
queryLanguage = si->m_langHint;
|
||||
if(queryLanguage != langUnknown) {
|
||||
char tmpbuf[1024];
|
||||
SafeBuf langsb(tmpbuf, 1024);
|
||||
g_categories->printPathFromId(&langsb, catid, false);
|
||||
char *ptr = langsb.getBufStart();
|
||||
uint8_t lang = g_langId.findLangFromDMOZTopic(ptr + 7);
|
||||
if(!strncmp("World: ", ptr, 6) &&
|
||||
lang != langUnknown &&
|
||||
lang != queryLanguage)
|
||||
// do not print it if not in our language
|
||||
return true;
|
||||
}
|
||||
//////
|
||||
//
|
||||
// print a link to apply your query to this DMOZ category
|
||||
//
|
||||
//////
|
||||
sb.safePrintf("<a href=\"/search?s=0&q=gbpdcat%%3A%li",catid);
|
||||
sb.urlEncode("|",1);
|
||||
sb.urlEncode(si->m_sbuf1.getBufStart(),si->m_sbuf1.length());
|
||||
sb.safePrintf("\">Search in Category</a>: ");
|
||||
|
||||
// setup the host of the url
|
||||
//if ( dmozHost )
|
||||
// sb.safePrintf("<a href=\"http://%s/", dmozHost );
|
||||
//else
|
||||
sb.safePrintf("<a href=\"/");
|
||||
// print link
|
||||
g_categories->printPathFromId(&sb, catid, true,si->m_isRTL);
|
||||
sb.safePrintf("/\">");
|
||||
// print the name of the dmoz category
|
||||
sb.safePrintf("<font color=#c62939>");
|
||||
g_categories->printPathFromId(&sb, catid, false,si->m_isRTL);
|
||||
sb.safePrintf("</font></a><br>");
|
||||
//++tr.brCount;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// use this for xml as well as html
|
||||
static int printResult ( SafeBuf &sb,
|
||||
State0 *st,
|
||||
@ -1806,6 +1923,13 @@ static int printResult ( SafeBuf &sb,
|
||||
if ( mr->m_isBanned && ! si->m_xml )
|
||||
sb.safePrintf("<font color=red><b>BANNED</b></font> ");
|
||||
|
||||
///////
|
||||
//
|
||||
// PRINT THE TITLE
|
||||
//
|
||||
///////
|
||||
|
||||
|
||||
// the a href tag
|
||||
if ( ! si->m_xml ) {
|
||||
sb.safePrintf ( "<a href=" );
|
||||
@ -1824,6 +1948,41 @@ static int printResult ( SafeBuf &sb,
|
||||
long strLen = mr->size_tbuf - 1;// msg40->getTitleLen(i);
|
||||
if ( ! str || strLen < 0 ) strLen = 0;
|
||||
|
||||
/////
|
||||
//
|
||||
// are we printing a dmoz category page?
|
||||
// get the appropriate dmoz title/summary to use since the same
|
||||
// url can exist in multiple topics (catIds) with different
|
||||
// titles summaries.
|
||||
//
|
||||
/////
|
||||
|
||||
char *dmozSummary = NULL;
|
||||
// TODO: just get the catid from httprequest directly?
|
||||
if ( si->m_catId > 0 ) { // si->m_cat_dirId > 0) {
|
||||
// . get the dmoz title and summary
|
||||
// . if empty then just a bunch of \0s, except for catIds
|
||||
Msg20Reply *mr = m20->getReply();
|
||||
char *dmozTitle = mr->ptr_dmozTitles;
|
||||
dmozSummary = mr->ptr_dmozSumms;
|
||||
char *dmozAnchor = mr->ptr_dmozAnchors;
|
||||
long *catIds = mr->ptr_catIds;
|
||||
long numCats = mr->size_catIds / 4;
|
||||
// loop through looking for the right ID
|
||||
for (long i = 0; i < numCats ; i++ ) {
|
||||
// assign shit if we match the dmoz cat we are showing
|
||||
if ( catIds[i] == si->m_catId) break;
|
||||
dmozTitle +=gbstrlen(dmozTitle)+1;
|
||||
dmozSummary +=gbstrlen(dmozSummary)+1;
|
||||
dmozAnchor += gbstrlen(dmozAnchor)+1;
|
||||
}
|
||||
// now make the title the dmoz title
|
||||
str = dmozTitle;
|
||||
strLen = gbstrlen(str);
|
||||
}
|
||||
|
||||
|
||||
|
||||
long hlen;
|
||||
//copy all summary and title excerpts for this result into here
|
||||
char tt[1024*32];
|
||||
@ -1872,7 +2031,11 @@ static int printResult ( SafeBuf &sb,
|
||||
|
||||
if ( ! si->m_xml ) sb.safePrintf ("</a><br>\n" ) ;
|
||||
|
||||
/////
|
||||
//
|
||||
// print content type after title
|
||||
//
|
||||
/////
|
||||
unsigned char ctype = mr->m_contentType;
|
||||
if ( ctype > 2 && ctype <= 13 ) {
|
||||
char *cs = g_contentTypeStrings[ctype];
|
||||
@ -1887,6 +2050,12 @@ static int printResult ( SafeBuf &sb,
|
||||
sb.safePrintf(" (%s) " ,cs);
|
||||
}
|
||||
|
||||
////////////
|
||||
//
|
||||
// print the summary
|
||||
//
|
||||
////////////
|
||||
|
||||
// . then the summary
|
||||
// . "s" is a string of null terminated strings
|
||||
char *send;
|
||||
@ -1897,22 +2066,56 @@ static int printResult ( SafeBuf &sb,
|
||||
if ( strLen < 0 ) strLen = 0;
|
||||
send = str + strLen;
|
||||
|
||||
// dmoz summary might override if we are showing a dmoz topic page
|
||||
if ( dmozSummary ) {
|
||||
str = dmozSummary;
|
||||
strLen = gbstrlen(dmozSummary);
|
||||
}
|
||||
|
||||
if ( si->m_xml ) sb.safePrintf("\t\t<sum><![CDATA[");
|
||||
// print summary out
|
||||
//sb.safeMemcpy ( str , strLen );
|
||||
|
||||
sb.brify ( str , strLen, 0 , cols ); // niceness = 0
|
||||
|
||||
// remove \0's... wtf?
|
||||
//char *xend = sb.getBuf();
|
||||
//char *x = xend - strLen;
|
||||
//for ( ; x < xend ; x++ ) if ( ! *x ) *x = ' ';
|
||||
|
||||
// close xml tag
|
||||
if ( si->m_xml ) sb.safePrintf("]]></sum>\n");
|
||||
// new line if not xml
|
||||
else if ( strLen ) sb.safePrintf("<br>\n");
|
||||
|
||||
|
||||
////////////
|
||||
//
|
||||
// . print DMOZ topics under the summary
|
||||
// . will print the "Search in Category" link too
|
||||
//
|
||||
////////////
|
||||
//Msg20Reply *mr = m20->getMsg20Reply();
|
||||
long nCatIds = mr->getNumCatIds();
|
||||
for (long i = 0; i < nCatIds; i++) {
|
||||
long catid = ((long *)(mr->ptr_catIds))[i];
|
||||
printDMOZCategoryUnderResult(sb,si,catid,st);
|
||||
}
|
||||
// skipCatsPrint:
|
||||
// print the indirect category Ids
|
||||
long nIndCatids = mr->size_indCatIds / 4;
|
||||
//if ( !cr->m_displayIndirectDmozCategories )
|
||||
// goto skipCatsPrint2;
|
||||
for ( long i = 0; i < nIndCatids; i++ ) {
|
||||
long catid = ((long *)(mr->ptr_indCatIds))[i];
|
||||
// skip it if it's a regular category
|
||||
//bool skip = false;
|
||||
long d; for ( d = 0; d < nCatIds; d++) {
|
||||
if ( catid == mr->ptr_catIds[i] ) break;
|
||||
}
|
||||
// skip if the indirect catid matched a directed catid
|
||||
if ( d < nCatIds ) continue;
|
||||
// otherwise print it
|
||||
printDMOZCategoryUnderResult(sb,si,catid,st);
|
||||
}
|
||||
|
||||
|
||||
////////////
|
||||
//
|
||||
// print the URL
|
||||
//
|
||||
////////////
|
||||
// hack off the http:// if any for displaying it on screen
|
||||
if ( urlLen > 8 && strncmp ( url , "http://" , 7 )==0 ) {
|
||||
url += 7; urlLen -= 7; }
|
||||
@ -1928,7 +2131,6 @@ static int printResult ( SafeBuf &sb,
|
||||
// so hack off the last slash
|
||||
if ( j < 0 ) urlLen--;
|
||||
}
|
||||
|
||||
if ( ! si->m_xml ) {
|
||||
sb.safePrintf ("<font color=gray>" );
|
||||
//sb.htmlEncode ( url , gbstrlen(url) , false );
|
||||
@ -1937,7 +2139,6 @@ static int printResult ( SafeBuf &sb,
|
||||
// turn off the color
|
||||
sb.safePrintf ( "</font>\n" );
|
||||
}
|
||||
|
||||
if ( si->m_xml ) {
|
||||
sb.safePrintf("\t\t<url><![CDATA[");
|
||||
sb.safeMemcpy ( url , urlLen );
|
||||
@ -3880,3 +4081,440 @@ bool printSingleScore ( SafeBuf &sb ,
|
||||
// "<br>");
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// print the search options under a dmoz search box
|
||||
bool printDirectorySearchType ( SafeBuf& sb, long sdirt ) {
|
||||
// default to entire directory
|
||||
if (sdirt < 1 || sdirt > 4)
|
||||
sdirt = 3;
|
||||
|
||||
// by default search the whole thing
|
||||
sb.safePrintf("<input type=\"radio\" name=\"sdirt\" value=\"3\"");
|
||||
if (sdirt == 3) sb.safePrintf(" checked>");
|
||||
else sb.safePrintf(">");
|
||||
sb.safePrintf("Entire Directory<br>\n");
|
||||
// entire category
|
||||
sb.safePrintf("<input type=\"radio\" name=\"sdirt\" value=\"1\"");
|
||||
if (sdirt == 1) sb.safePrintf(" checked>");
|
||||
else sb.safePrintf(">");
|
||||
sb.safePrintf("Entire Category<br>\n");
|
||||
// base category only
|
||||
sb.safePrintf("<nobr><input type=\"radio\" name=\"sdirt\" value=\"2\"");
|
||||
if (sdirt == 2) sb.safePrintf(" checked>");
|
||||
else sb.safePrintf(">");
|
||||
sb.safePrintf("Pages in Base Category</nobr><br>\n");
|
||||
// sites in base category
|
||||
sb.safePrintf("<input type=\"radio\" name=\"sdirt\" value=\"7\"");
|
||||
if (sdirt == 7) sb.safePrintf(" checked>");
|
||||
else sb.safePrintf(">");
|
||||
sb.safePrintf("Sites in Base Category<br>\n");
|
||||
// sites in entire category
|
||||
sb.safePrintf("<input type=\"radio\" name=\"sdirt\" value=\"6\"");
|
||||
if (sdirt == 6) sb.safePrintf(" checked>");
|
||||
else sb.safePrintf(">");
|
||||
sb.safePrintf("Sites in Entire Category<br>\n");
|
||||
// end it
|
||||
return true;
|
||||
}
|
||||
|
||||
////////
|
||||
//
|
||||
// . print the directory subtopics
|
||||
// . show these when we are in a directory topic browsing dmoz
|
||||
// . just a list of all the topics/categories
|
||||
//
|
||||
////////
|
||||
bool printDMOZSubTopics ( SafeBuf& sb, long catId, State0 *st, bool inXml ) {
|
||||
long currType;
|
||||
bool first;
|
||||
bool nextColumn;
|
||||
long maxPerColumn;
|
||||
long currInColumn;
|
||||
long currIndex;
|
||||
char *prefixp;
|
||||
long prefixLen;
|
||||
char *catName;
|
||||
long catNameLen;
|
||||
char encodedName[2048];
|
||||
|
||||
SearchInput *si = &st->m_si;
|
||||
|
||||
SafeBuf subCatBuf;
|
||||
// stores a list of SubCategories into "subCatBuf"
|
||||
long numSubCats = g_categories->generateSubCats ( si->m_catId , &subCatBuf );
|
||||
|
||||
// . get the subcategories for a given categoriy
|
||||
// . msg2b::gernerateDirectory() was launched in Msg40.cpp
|
||||
//long numSubCats = st->m_msg40.m_msg2b.m_numSubCats;
|
||||
//SubCategory *subCats = st->m_msg40.m_msg2b.m_subCats;
|
||||
//char *catBuffer = st->m_msg40.m_msg2b.m_catBuffer;
|
||||
//bool showAdultOnTop = st->m_si.m_cr->m_showAdultCategoryOnTop;
|
||||
|
||||
|
||||
// just print <hr> if no sub categories
|
||||
if (inXml) {
|
||||
sb.safePrintf ( "\t<directory>\n"
|
||||
"\t\t<dirId>%li</dirId>\n"
|
||||
"\t\t<dirName><![CDATA[",
|
||||
si->m_catId);//si.m_cat_dirId );
|
||||
g_categories->printPathFromId ( &sb,
|
||||
si->m_catId, // st->m_si.m_cat_dirId,
|
||||
true );
|
||||
sb.safePrintf ( "]]></dirName>\n");
|
||||
sb.safePrintf ( "\t\t<dirIsRTL>%li</dirIsRTL>\n",
|
||||
(long)si->m_isRTL);
|
||||
}
|
||||
|
||||
char *p = subCatBuf.getBufStart();
|
||||
char *pend = subCatBuf.getBuf();
|
||||
SubCategory *ptrs[MAX_SUB_CATS];
|
||||
long count = 0;
|
||||
|
||||
if (numSubCats <= 0)
|
||||
goto dirEnd;
|
||||
// print out the cats
|
||||
currType = 0;
|
||||
|
||||
// first make ptrs to them
|
||||
for ( ; p < pend ; ) {
|
||||
SubCategory *cat = (SubCategory *)p;
|
||||
ptrs[count++] = cat;
|
||||
p += cat->getRecSize();
|
||||
}
|
||||
|
||||
|
||||
for (long i = 0; i < count ; i++ ) {
|
||||
SubCategory *cat = ptrs[i];
|
||||
first = false;
|
||||
catName = cat->getName();//&catBuffer[subCats[i].m_nameOffset];
|
||||
catNameLen = cat->m_nameLen;//subCats[i].m_nameLen;
|
||||
prefixp = cat->getPrefix();//&catBuffer[subCats[i].m_prefixOffset];
|
||||
prefixLen = cat->m_prefixLen;//subCats[i].m_prefixLen;
|
||||
// skip bad categories
|
||||
currIndex = g_categories->getIndexFromPath(catName, catNameLen);
|
||||
if (currIndex < 0)
|
||||
continue;
|
||||
// skip top adult category if we're supposed to
|
||||
if ( !inXml &&
|
||||
st->m_si.m_catId == 1 &&
|
||||
si->m_familyFilter &&
|
||||
g_categories->isIndexAdultStart ( currIndex ) )
|
||||
continue;
|
||||
// check for room
|
||||
//if (p + subCats[i].m_prefixLen*2 +
|
||||
// subCats[i].m_nameLen*2 +
|
||||
// 512 > pend){
|
||||
// goto diroverflow;
|
||||
//}
|
||||
// print simple xml tag for inXml
|
||||
if (inXml) {
|
||||
switch ( cat->m_type ) {
|
||||
case SUBCAT_LETTERBAR:
|
||||
sb.safePrintf ( "\t\t<letterbar><![CDATA[" );
|
||||
sb.safePrintf ( "]]>" );
|
||||
sb.safePrintf ( "<urlcount>%li</urlcount>",
|
||||
g_categories->getNumUrlsFromIndex(
|
||||
currIndex) );
|
||||
sb.safePrintf ( "</letterbar>\n" );
|
||||
break;
|
||||
case SUBCAT_NARROW2:
|
||||
sb.safePrintf ( "\t\t<narrow2><![CDATA[" );
|
||||
sb.utf8Encode2 ( catName, catNameLen );
|
||||
sb.safePrintf ( "]]>");
|
||||
sb.safePrintf ( "<urlcount>%li</urlcount>",
|
||||
g_categories->getNumUrlsFromIndex(
|
||||
currIndex) );
|
||||
sb.safePrintf ( "</narrow2>\n" );
|
||||
break;
|
||||
case SUBCAT_NARROW1:
|
||||
sb.safePrintf ( "\t\t<narrow1><![CDATA[" );
|
||||
sb.utf8Encode2 ( catName, catNameLen );
|
||||
sb.safePrintf ( "]]>" );
|
||||
sb.safePrintf ( "<urlcount>%li</urlcount>",
|
||||
g_categories->getNumUrlsFromIndex(
|
||||
currIndex) );
|
||||
sb.safePrintf ( "</narrow1>\n" );
|
||||
break;
|
||||
case SUBCAT_NARROW:
|
||||
sb.safePrintf ( "\t\t<narrow><![CDATA[" );
|
||||
sb.utf8Encode2 ( catName, catNameLen );
|
||||
sb.safePrintf ( "]]>" );
|
||||
sb.safePrintf ( "<urlcount>%li</urlcount>",
|
||||
g_categories->getNumUrlsFromIndex(
|
||||
currIndex) );
|
||||
sb.safePrintf ( "</narrow>\n" );
|
||||
break;
|
||||
case SUBCAT_SYMBOLIC2:
|
||||
sb.safePrintf ( "\t\t<symbolic2><![CDATA[" );
|
||||
sb.utf8Encode2 ( prefixp, prefixLen );
|
||||
sb.safePrintf ( ":" );
|
||||
sb.utf8Encode2 ( catName, catNameLen );
|
||||
sb.safePrintf ( "]]>" );
|
||||
sb.safePrintf ( "<urlcount>%li</urlcount>",
|
||||
g_categories->getNumUrlsFromIndex(
|
||||
currIndex) );
|
||||
sb.safePrintf ( "</symbolic2>\n" );
|
||||
break;
|
||||
case SUBCAT_SYMBOLIC1:
|
||||
sb.safePrintf ( "\t\t<symbolic1><![CDATA[" );
|
||||
sb.utf8Encode2 ( prefixp, prefixLen );
|
||||
sb.safePrintf ( ":" );
|
||||
sb.utf8Encode2 ( catName, catNameLen );
|
||||
sb.safePrintf ( "]]>" );
|
||||
sb.safePrintf ( "<urlcount>%li</urlcount>",
|
||||
g_categories->getNumUrlsFromIndex(
|
||||
currIndex) );
|
||||
sb.safePrintf ( "</symbolic1>\n" );
|
||||
break;
|
||||
case SUBCAT_SYMBOLIC:
|
||||
sb.safePrintf ( "\t\t<symbolic><![CDATA[" );
|
||||
sb.utf8Encode2 ( prefixp, prefixLen );
|
||||
sb.safePrintf ( ":" );
|
||||
sb.utf8Encode2 ( catName, catNameLen );
|
||||
sb.safePrintf ( "]]>" );
|
||||
sb.safePrintf ( "<urlcount>%li</urlcount>",
|
||||
g_categories->getNumUrlsFromIndex(
|
||||
currIndex) );
|
||||
sb.safePrintf ( "</symbolic>\n" );
|
||||
break;
|
||||
case SUBCAT_RELATED:
|
||||
sb.safePrintf ( "\t\t<related><![CDATA[" );
|
||||
sb.utf8Encode2 ( catName, catNameLen );
|
||||
sb.safePrintf ( "]]>" );
|
||||
sb.safePrintf ( "<urlcount>%li</urlcount>",
|
||||
g_categories->getNumUrlsFromIndex(
|
||||
currIndex) );
|
||||
sb.safePrintf ( "</related>\n" );
|
||||
break;
|
||||
case SUBCAT_ALTLANG:
|
||||
sb.safePrintf ( "\t\t<altlang><![CDATA[" );
|
||||
sb.utf8Encode2 ( prefixp, prefixLen );
|
||||
sb.safePrintf ( ":" );
|
||||
sb.utf8Encode2 ( catName, catNameLen );
|
||||
sb.safePrintf ( "]]>" );
|
||||
sb.safePrintf ( "<urlcount>%li</urlcount>",
|
||||
g_categories->getNumUrlsFromIndex(
|
||||
currIndex) );
|
||||
sb.safePrintf ( "</altlang>\n");
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// print type header
|
||||
if ( cat->m_type - currType >= 10) {
|
||||
// end the last type
|
||||
if (currType == SUBCAT_LETTERBAR)
|
||||
sb.safePrintf(" ]</center>\n");
|
||||
else if (currType != 0)
|
||||
sb.safePrintf ( "\n</span></ul></td></tr>"
|
||||
"</table>\n" );
|
||||
// start the new type
|
||||
switch (cat->m_type) {
|
||||
case SUBCAT_LETTERBAR:
|
||||
sb.safePrintf ( "<span class=\"directory\">"
|
||||
"<center>[ " );
|
||||
break;
|
||||
case SUBCAT_NARROW2:
|
||||
case SUBCAT_SYMBOLIC2:
|
||||
case SUBCAT_NARROW1:
|
||||
case SUBCAT_SYMBOLIC1:
|
||||
case SUBCAT_NARROW:
|
||||
case SUBCAT_SYMBOLIC:
|
||||
sb.safePrintf("<hr>\n");
|
||||
break;
|
||||
case SUBCAT_RELATED:
|
||||
if (currType == 0 ||
|
||||
currType == SUBCAT_LETTERBAR)
|
||||
sb.safePrintf("<hr>");
|
||||
else
|
||||
sb.safePrintf("<br>");
|
||||
if (si->m_isRTL)
|
||||
sb.safePrintf("<span dir=ltr>");
|
||||
sb.safePrintf ( "<b>Related Categories:"
|
||||
"</b>" );
|
||||
if (si->m_isRTL)
|
||||
sb.safePrintf("</span>");
|
||||
break;
|
||||
case SUBCAT_ALTLANG:
|
||||
if (currType == 0 ||
|
||||
currType == SUBCAT_LETTERBAR)
|
||||
sb.safePrintf("<hr>");
|
||||
else
|
||||
sb.safePrintf("<br>");
|
||||
if (si->m_isRTL)
|
||||
sb.safePrintf("<span dir=ltr>");
|
||||
sb.safePrintf ( "<b>This category in other"
|
||||
" languages:</b>");
|
||||
if (si->m_isRTL)
|
||||
sb.safePrintf("</span>");
|
||||
break;
|
||||
}
|
||||
currType = ( cat->m_type/10)*10;
|
||||
first = true;
|
||||
nextColumn = false;
|
||||
currInColumn = 0;
|
||||
if (currType == SUBCAT_LETTERBAR ||
|
||||
currType == SUBCAT_RELATED)
|
||||
maxPerColumn = 999;
|
||||
else {
|
||||
// . check how many columns we'll use for this
|
||||
// type
|
||||
long numInType = 1;
|
||||
for (long j = i+1; j < numSubCats; j++) {
|
||||
if ( ptrs[j]->m_type - currType >= 10)
|
||||
break;
|
||||
numInType++;
|
||||
}
|
||||
// column for every 5, up to 3 columns
|
||||
long numColumns = numInType/5;
|
||||
if ( numInType%5 > 0 ) numColumns++;
|
||||
if ( currType == SUBCAT_ALTLANG &&
|
||||
numColumns > 4)
|
||||
numColumns = 4;
|
||||
else if (numColumns > 3)
|
||||
numColumns = 3;
|
||||
// max number of links per column
|
||||
maxPerColumn = numInType/numColumns;
|
||||
if (numInType%numColumns > 0)
|
||||
maxPerColumn++;
|
||||
}
|
||||
}
|
||||
// start the sub cat
|
||||
if (first) {
|
||||
if (currType != SUBCAT_LETTERBAR)
|
||||
sb.safePrintf ( "<table border=0>"
|
||||
"<tr><td valign=top>"
|
||||
"<ul><span class=\"directory\">"
|
||||
"\n<li>");
|
||||
}
|
||||
// check for the next column
|
||||
else if (nextColumn) {
|
||||
sb.safePrintf ( "\n</span></ul></td><td valign=top>"
|
||||
"<ul><span class=\"directory\">"
|
||||
"\n<li>");
|
||||
nextColumn = false;
|
||||
}
|
||||
// or just next link
|
||||
else {
|
||||
if (currType == SUBCAT_LETTERBAR)
|
||||
sb.safePrintf("| ");
|
||||
else
|
||||
sb.safePrintf("<li>");
|
||||
}
|
||||
// print out the prefix as a link
|
||||
//if ( p + catNameLen + 16 > pend ) {
|
||||
// goto diroverflow;
|
||||
//}
|
||||
sb.safePrintf("<a href=\"/");
|
||||
sb.utf8Encode2(catName, catNameLen);
|
||||
sb.safePrintf("/\">");
|
||||
// prefix...
|
||||
//if ( p + prefixLen + 512 > pend ) {
|
||||
// goto diroverflow;
|
||||
//}
|
||||
if (currType != SUBCAT_ALTLANG)
|
||||
sb.safePrintf("<b>");
|
||||
else {
|
||||
// check for coded <b> or <strong> tags, remove
|
||||
if (prefixLen >= 19 &&
|
||||
strncasecmp(prefixp, "<b>", 9) == 0 &&
|
||||
strncasecmp(prefixp + (prefixLen-10),
|
||||
"</b>", 10) == 0) {
|
||||
prefixp += 9;
|
||||
prefixLen -= 19;
|
||||
}
|
||||
else if (prefixLen >= 29 &&
|
||||
strncasecmp(prefixp, "<strong>", 14) == 0 &&
|
||||
strncasecmp(prefixp + (prefixLen-15),
|
||||
"</strong>", 15) == 0) {
|
||||
prefixp += 14;
|
||||
prefixLen -= 29;
|
||||
}
|
||||
}
|
||||
if (currType == SUBCAT_RELATED) {
|
||||
// print the full path
|
||||
if (g_categories->isIndexRTL(currIndex))
|
||||
sb.safePrintf("<span dir=ltr>");
|
||||
g_categories->printPathFromIndex (
|
||||
&sb,
|
||||
currIndex,
|
||||
false,
|
||||
si->m_isRTL);
|
||||
}
|
||||
else {
|
||||
char *encodeEnd = htmlEncode ( encodedName,
|
||||
encodedName + 2047,
|
||||
prefixp,
|
||||
prefixp + prefixLen );
|
||||
prefixp = encodedName;
|
||||
prefixLen = encodeEnd - encodedName;
|
||||
//if ( p + prefixLen + 512 > pend ) {
|
||||
// goto diroverflow;
|
||||
//}
|
||||
for (long c = 0; c < prefixLen; c++) {
|
||||
if (*prefixp == '_')
|
||||
//*p = ' ';
|
||||
sb.safePrintf(" ");
|
||||
else
|
||||
//*p = *prefixp;
|
||||
sb.utf8Encode2(prefixp, 1);
|
||||
//p++;
|
||||
prefixp++;
|
||||
}
|
||||
}
|
||||
//if ( p + 512 > pend ) {
|
||||
// goto diroverflow;
|
||||
//}
|
||||
// end the link
|
||||
if (currType != SUBCAT_ALTLANG)
|
||||
sb.safePrintf("</b>");
|
||||
sb.safePrintf("</a>");
|
||||
// print an @ for symbolic links
|
||||
if ( (cat->m_type % 10) == 1)
|
||||
sb.safePrintf("@");
|
||||
// print number of urls under here
|
||||
if ( cat->m_type != SUBCAT_LETTERBAR) {
|
||||
sb.safePrintf("  <i>");
|
||||
if (si->m_isRTL)
|
||||
sb.safePrintf ( "<span dir=ltr>(%li)"
|
||||
"</span></i>",
|
||||
g_categories->getNumUrlsFromIndex(
|
||||
currIndex) );
|
||||
else
|
||||
sb.safePrintf ( "(%li)</i>",
|
||||
g_categories->getNumUrlsFromIndex(
|
||||
currIndex) );
|
||||
}
|
||||
// next line/letter
|
||||
if ( cat->m_type == SUBCAT_LETTERBAR) {
|
||||
sb.safePrintf(" ");
|
||||
continue;
|
||||
}
|
||||
// check for next column
|
||||
currInColumn++;
|
||||
if (currInColumn >= maxPerColumn) {
|
||||
currInColumn = 0;
|
||||
nextColumn = true;
|
||||
}
|
||||
}
|
||||
//if ( p + 512 > pend ) {
|
||||
// goto diroverflow;
|
||||
//}
|
||||
// end the last type
|
||||
if (!inXml) {
|
||||
if (currType == SUBCAT_LETTERBAR)
|
||||
sb.safePrintf(" ]</center>\n");
|
||||
else
|
||||
sb.safePrintf("</ul></td></tr></table>\n");
|
||||
}
|
||||
dirEnd:
|
||||
if (inXml)
|
||||
sb.safePrintf("\t</directory>\n");
|
||||
else {
|
||||
sb.safePrintf("</span>");
|
||||
sb.safePrintf("<hr><br>\n");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
13
Rdb.cpp
13
Rdb.cpp
@ -5,7 +5,7 @@
|
||||
#include "Clusterdb.h"
|
||||
#include "Hostdb.h"
|
||||
#include "Tagdb.h"
|
||||
//#include "Catdb.h"
|
||||
#include "Catdb.h"
|
||||
#include "Indexdb.h"
|
||||
#include "Posdb.h"
|
||||
#include "Cachedb.h"
|
||||
@ -1340,7 +1340,7 @@ void attemptMergeAll ( int fd , void *state ) {
|
||||
g_titledb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
//g_tfndb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
g_tagdb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
//g_catdb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
g_catdb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
g_clusterdb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
g_statsdb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
g_syncdb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
@ -2351,7 +2351,7 @@ Rdb *getRdbFromId ( uint8_t rdbId ) {
|
||||
s_table9 [ RDB_DOLEDB ] = g_doledb.getRdb();
|
||||
s_table9 [ RDB_TFNDB ] = g_tfndb.getRdb();
|
||||
s_table9 [ RDB_CLUSTERDB ] = g_clusterdb.getRdb();
|
||||
//s_table9 [ RDB_CATDB ] = g_catdb.getRdb();
|
||||
s_table9 [ RDB_CATDB ] = g_catdb.getRdb();
|
||||
s_table9 [ RDB_DATEDB ] = g_datedb.getRdb();
|
||||
s_table9 [ RDB_LINKDB ] = g_linkdb.getRdb();
|
||||
s_table9 [ RDB_CACHEDB ] = g_cachedb.getRdb();
|
||||
@ -2380,7 +2380,7 @@ Rdb *getRdbFromId ( uint8_t rdbId ) {
|
||||
// the opposite of the above
|
||||
char getIdFromRdb ( Rdb *rdb ) {
|
||||
if ( rdb == g_tagdb.getRdb () ) return RDB_TAGDB;
|
||||
//if ( rdb == g_catdb.getRdb () ) return RDB_CATDB;
|
||||
if ( rdb == g_catdb.getRdb () ) return RDB_CATDB;
|
||||
if ( rdb == g_indexdb.getRdb () ) return RDB_INDEXDB;
|
||||
if ( rdb == g_posdb.getRdb () ) return RDB_POSDB;
|
||||
if ( rdb == g_datedb.getRdb () ) return RDB_DATEDB;
|
||||
@ -2401,7 +2401,7 @@ char getIdFromRdb ( Rdb *rdb ) {
|
||||
if ( rdb == g_revdb.getRdb () ) return RDB_REVDB;
|
||||
//if ( rdb == g_sitedb.getRdb () ) return RDB_SITEDB;
|
||||
//if ( rdb == g_tagdb2.getRdb () ) return RDB2_SITEDB2;
|
||||
//if ( rdb == g_catdb.getRdb () ) return RDB_CATDB;
|
||||
if ( rdb == g_catdb.getRdb () ) return RDB_CATDB;
|
||||
if ( rdb == g_indexdb2.getRdb () ) return RDB2_INDEXDB2;
|
||||
if ( rdb == g_posdb2.getRdb () ) return RDB2_POSDB2;
|
||||
if ( rdb == g_datedb2.getRdb () ) return RDB2_DATEDB2;
|
||||
@ -2425,7 +2425,7 @@ char getIdFromRdb ( Rdb *rdb ) {
|
||||
char isSecondaryRdb ( uint8_t rdbId ) {
|
||||
switch ( rdbId ) {
|
||||
//case RDB2_SITEDB2 : return true;
|
||||
//case RDB_CATDB2 : return g_catdb2.getRdb();
|
||||
case RDB2_CATDB2 : return true;
|
||||
case RDB2_INDEXDB2 : return true;
|
||||
case RDB2_POSDB2 : return true;
|
||||
case RDB2_DATEDB2 : return true;
|
||||
@ -2532,6 +2532,7 @@ long getDataSizeFromRdbId ( uint8_t rdbId ) {
|
||||
else if ( i == RDB2_TITLEDB2 ||
|
||||
i == RDB2_REVDB2 ||
|
||||
i == RDB2_TAGDB2 ||
|
||||
i == RDB2_CATDB2 ||
|
||||
i == RDB2_SPIDERDB2 ||
|
||||
i == RDB2_PLACEDB2 )
|
||||
ds = -1;
|
||||
|
1
Rdb.h
1
Rdb.h
@ -52,6 +52,7 @@ enum {
|
||||
RDB2_REVDB2,
|
||||
RDB2_TAGDB2,
|
||||
RDB2_POSDB2, // 31
|
||||
RDB2_CATDB2,
|
||||
RDB_END
|
||||
};
|
||||
// how many rdbs are in "urgent merge" mode?
|
||||
|
@ -680,7 +680,7 @@ bool SafeBuf::setEncoding(short cs) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SafeBuf::utf8Encode(char *s, long len, bool encodeHTML,long niceness) {
|
||||
bool SafeBuf::utf8Encode2(char *s, long len, bool encodeHTML,long niceness) {
|
||||
long tmp = m_length;
|
||||
if ( m_encoding == csUTF8 ) {
|
||||
if (! safeMemcpy(s,len)) return false;
|
||||
@ -1235,7 +1235,8 @@ void initTable ( ) {
|
||||
}
|
||||
}
|
||||
|
||||
bool SafeBuf::urlEncode ( bool spaceToPlus ) {
|
||||
// url encode the whole buffer
|
||||
bool SafeBuf::urlEncodeAllBuf ( bool spaceToPlus ) {
|
||||
// this makes things faster
|
||||
if ( ! s_init23 ) initTable();
|
||||
// how many chars do we need?
|
||||
|
14
SafeBuf.h
14
SafeBuf.h
@ -178,9 +178,9 @@ struct SafeBuf {
|
||||
|
||||
//insert strings in their native encoding
|
||||
bool encode ( char *s , long len , long niceness=0) {
|
||||
return utf8Encode(s,len,false,niceness); };
|
||||
return utf8Encode2(s,len,false,niceness); };
|
||||
// htmlEncode default = false
|
||||
bool utf8Encode(char *s, long len, bool htmlEncode=false,
|
||||
bool utf8Encode2(char *s, long len, bool htmlEncode=false,
|
||||
long niceness=0);
|
||||
bool latin1Encode(char *s, long len, bool htmlEncode=false,
|
||||
long niceness=0);
|
||||
@ -203,11 +203,15 @@ struct SafeBuf {
|
||||
bool requestPath = false,
|
||||
bool encodeApostrophes = false );
|
||||
|
||||
bool urlEncode (char *s ,
|
||||
bool encodeApostrophes = false ) {
|
||||
bool urlEncode (char *s ) {
|
||||
return urlEncode ( s,strlen(s),false,false); };
|
||||
|
||||
|
||||
bool urlEncode2 (char *s ,
|
||||
bool encodeApostrophes ) { // usually false
|
||||
return urlEncode ( s,strlen(s),false,encodeApostrophes); };
|
||||
|
||||
bool urlEncode ( bool spaceToPlus = true );
|
||||
bool urlEncodeAllBuf ( bool spaceToPlus = true );
|
||||
bool latin1CdataEncode(char *s, long len);
|
||||
bool utf8CdataEncode(char *s, long len);
|
||||
|
||||
|
@ -1210,6 +1210,40 @@ bool SearchInput::setQueryBuffers ( ) {
|
||||
m_displayQuery,
|
||||
m_displayQueryLen);
|
||||
|
||||
|
||||
|
||||
|
||||
//////////
|
||||
//
|
||||
// show DMOZ BREADCRUMB if doing a
|
||||
// "gbpdcat:<catid> |" (Search restricted to category)
|
||||
// "gbdcat:<catid>" (DMOZ urls in that topic, c=dmoz3)
|
||||
//
|
||||
//////////
|
||||
long pcatId = -1;
|
||||
long dcatId = -1;
|
||||
// get the final query
|
||||
char *q =m_sbuf1.getBufStart();
|
||||
if ( q ) sscanf(q,"gbpdcat:%li",&pcatId);
|
||||
if ( q ) sscanf(q,"gbcat:%li",&dcatId);
|
||||
// pick the one that is valid
|
||||
long catId = -1;
|
||||
if ( pcatId >= 0 ) catId = pcatId;
|
||||
if ( dcatId >= 0 ) catId = dcatId;
|
||||
|
||||
//////
|
||||
//
|
||||
// save catid into the state
|
||||
m_catId = catId;
|
||||
//
|
||||
///////
|
||||
|
||||
// are we a right to left language like hebrew?
|
||||
if ( catId > 0 && g_categories->isIdRTL(catId) )
|
||||
m_isRTL = true;
|
||||
else
|
||||
m_isRTL = false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -400,6 +400,9 @@ class SearchInput {
|
||||
SafeBuf m_sbuf2;
|
||||
SafeBuf m_sbuf3;
|
||||
|
||||
long m_catId;
|
||||
bool m_isRTL;
|
||||
|
||||
// make a cookie from parms with m_flags of PF_COOKIE set
|
||||
SafeBuf m_cookieBuf;
|
||||
|
||||
|
120
XmlDoc.cpp
120
XmlDoc.cpp
@ -2790,7 +2790,11 @@ char **XmlDoc::getTitleRec ( ) {
|
||||
long dslen = 0;
|
||||
unsigned char dalen = 0;
|
||||
|
||||
// store all dmoz info separated by \0's into titles[] buffer
|
||||
// . store all dmoz info separated by \0's into titles[] buffer
|
||||
// . crap, this does a disk read and blocks on that
|
||||
//
|
||||
// . TODO: make it non-blocking!!!!
|
||||
//
|
||||
g_categories->getTitleAndSummary ( m_firstUrl.getUrl(),
|
||||
m_firstUrl.getUrlLen(),
|
||||
ptr_catIds[i],
|
||||
@ -3372,7 +3376,7 @@ CatRec *XmlDoc::getCatRec ( ) {
|
||||
// return what we got
|
||||
if ( m_catRecValid ) return &m_catRec;
|
||||
// call that
|
||||
setStatus ("getting cat rec");
|
||||
setStatus ("getting dmoz cat rec");
|
||||
// callback?
|
||||
if ( m_calledMsg8b ) {
|
||||
// return NULL on error
|
||||
@ -3386,7 +3390,8 @@ CatRec *XmlDoc::getCatRec ( ) {
|
||||
// assume empty and skip the call for now
|
||||
m_catRec.reset();
|
||||
m_catRecValid = true;
|
||||
return &m_catRec;
|
||||
// let's bring dmoz back
|
||||
//return &m_catRec;
|
||||
// compute it otherwise
|
||||
if ( ! m_msg8b.getCatRec ( &m_firstUrl ,
|
||||
m_coll ,
|
||||
@ -20303,7 +20308,7 @@ char *XmlDoc::hashAll ( HashTableX *table ) {
|
||||
if ( ! hashUrl ( table ) ) return NULL;
|
||||
if ( ! hashMetaTags ( table ) ) return NULL;
|
||||
if ( ! hashMetaZip ( table ) ) return NULL;
|
||||
//if ( ! hashCategories ( table ) ) return NULL;
|
||||
if ( ! hashDMOZCategories( table ) ) return NULL;
|
||||
if ( ! hashLanguage ( table ) ) return NULL;
|
||||
if ( ! hashCountry ( table ) ) return NULL;
|
||||
if ( ! hashSiteNumInlinks( table ) ) return NULL;
|
||||
@ -21789,6 +21794,113 @@ bool XmlDoc::searchboxToGigablast ( ) {
|
||||
return m_xml.hasGigablastForm();
|
||||
}
|
||||
|
||||
// . bring back support for dmoz integration
|
||||
// . when clicking on a "search within this category" it does a gbpdcat:<catid>
|
||||
// search to capture all pages that have that dmoz category as one of their
|
||||
// parent topics
|
||||
bool XmlDoc::hashDMOZCategories ( HashTableX *tt ) {
|
||||
|
||||
char *titlePtr = ptr_dmozTitles;
|
||||
char *sumPtr = ptr_dmozSumms;
|
||||
//char *anchPtr = ptr_dmozAnchors;
|
||||
|
||||
char buf[128];
|
||||
|
||||
HashInfo hi;
|
||||
hi.m_tt = tt;
|
||||
hi.m_hashGroup = HASHGROUP_INTAG;
|
||||
|
||||
long *catIds = (long *)ptr_catIds;
|
||||
long numCatIds = size_catIds / 4;
|
||||
// go through the catIds and hash them
|
||||
for (long i = 0; i < numCatIds; i++) {
|
||||
// write the catid as a string
|
||||
sprintf(buf, "%lu", catIds[i]);
|
||||
// term prefix for hashing
|
||||
hi.m_prefix = "gbdcat";
|
||||
// hash it
|
||||
hashString ( buf , gbstrlen(buf) , &hi );
|
||||
// we also want to hash the parents
|
||||
long currCatId = catIds[i];
|
||||
long currParentId = catIds[i];
|
||||
long currCatIndex;
|
||||
// loop to the Top, Top = 1
|
||||
while ( currCatId > 1 ) {
|
||||
// hash the parent
|
||||
sprintf(buf, "%lu", currParentId);
|
||||
hi.m_prefix = "gbpdcat";
|
||||
hashString ( buf , gbstrlen(buf), &hi );
|
||||
// next cat
|
||||
currCatId = currParentId;
|
||||
// get the index for this cat
|
||||
currCatIndex = g_categories->getIndexFromId(currCatId);
|
||||
if ( currCatIndex <= 0 ) break;
|
||||
// get the parent for this cat
|
||||
currParentId =
|
||||
g_categories->m_cats[currCatIndex].m_parentid;
|
||||
}
|
||||
|
||||
// do not hash titles or summaries if "index article content
|
||||
// only" parm is on
|
||||
//if ( tr->eliminateMenus() ) continue;
|
||||
|
||||
// hash dmoz title
|
||||
hi.m_prefix = NULL;
|
||||
// call this DMOZ title as regular title i guess
|
||||
hi.m_hashGroup = HASHGROUP_TITLE;
|
||||
// hash the DMOZ title
|
||||
hashString ( titlePtr , gbstrlen(titlePtr), &hi );
|
||||
// next title
|
||||
titlePtr += gbstrlen(titlePtr) + 1;
|
||||
|
||||
// hash DMOZ summary
|
||||
hi.m_prefix = NULL;
|
||||
// call this DMOZ summary as body i guess
|
||||
hi.m_hashGroup = HASHGROUP_BODY;
|
||||
// hash the DMOZ summary
|
||||
hashString ( sumPtr , gbstrlen(sumPtr), &hi );
|
||||
// next summary
|
||||
sumPtr += gbstrlen(sumPtr) + 1;
|
||||
}
|
||||
|
||||
long numIndCatIds = size_indCatIds / 4;
|
||||
long *indCatIds = (long *)ptr_indCatIds;
|
||||
// go through the INDIRECT catIds and hash them
|
||||
for (long i = 0 ; i < numIndCatIds; i++) {
|
||||
|
||||
// write the catid as a string
|
||||
sprintf(buf, "%lu", indCatIds[i]);
|
||||
// use prefix
|
||||
hi.m_prefix = "gbicat";
|
||||
hi.m_hashGroup = HASHGROUP_INTAG;
|
||||
// hash it
|
||||
hashString ( buf , gbstrlen(buf), &hi );
|
||||
|
||||
// we also want to hash the parents
|
||||
long currCatId = indCatIds[i];
|
||||
long currParentId = indCatIds[i];
|
||||
long currCatIndex;
|
||||
// loop to the Top, Top = 1
|
||||
while (currCatId > 1) {
|
||||
// hash the parent
|
||||
sprintf(buf, "%lu", currParentId);
|
||||
// new prefix
|
||||
hi.m_prefix = "gbpicat";
|
||||
// hash it
|
||||
hashString ( buf , gbstrlen(buf), &hi );
|
||||
// next cat
|
||||
currCatId = currParentId;
|
||||
// get the index for this cat
|
||||
currCatIndex = g_categories->getIndexFromId(currCatId);
|
||||
if ( currCatIndex <= 0 ) break;
|
||||
// get the parent for this cat
|
||||
currParentId =
|
||||
g_categories->m_cats[currCatIndex].m_parentid;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool XmlDoc::hashLanguage ( HashTableX *tt ) {
|
||||
|
||||
setStatus ( "hashing language" );
|
||||
|
1
XmlDoc.h
1
XmlDoc.h
@ -693,6 +693,7 @@ class XmlDoc {
|
||||
bool hashZipCodes ( class HashTableX *table ) ;
|
||||
bool hashMetaZip ( class HashTableX *table ) ;
|
||||
bool hashContentType ( class HashTableX *table ) ;
|
||||
bool hashDMOZCategories ( class HashTableX *table ) ;
|
||||
bool hashLinks ( class HashTableX *table ) ;
|
||||
bool hashUrl ( class HashTableX *table ) ;
|
||||
bool hashSections ( class HashTableX *table ) ;
|
||||
|
150
dmozparse.cpp
150
dmozparse.cpp
@ -21,6 +21,11 @@
|
||||
bool closeAll ( void *state , void (* callback)(void *state) ) { return true; }
|
||||
bool allExit ( ) { return true; };
|
||||
|
||||
bool sendPageSEO(TcpSocket *s, HttpRequest *hr) {return true;}
|
||||
|
||||
//long g_qbufNeedSave = false;
|
||||
//SafeBuf g_qbuf;
|
||||
|
||||
#define RDFBUFFER_SIZE (1024*1024*10)
|
||||
#define RDFSTRUCTURE_FILE "structure.rdf.u8"
|
||||
#define RDFCONTENT_FILE "content.rdf.u8"
|
||||
@ -518,7 +523,7 @@ bool isGoodUrl ( char *url, long urlLen ) {
|
||||
if ( urlLen <= 0 )
|
||||
return false;
|
||||
for (long i = 0; i < urlLen; i++) {
|
||||
if (is_space(url[i]))
|
||||
if (is_wspace_a(url[i]))
|
||||
return false;
|
||||
}
|
||||
// check for [prot]://[url]
|
||||
@ -621,7 +626,7 @@ long fixUrl ( char *url, long urlLen ) {
|
||||
memmove(&url[slashi-1], &url[slashi], newUrlLen - slashi);
|
||||
newUrlLen--;
|
||||
}
|
||||
if (is_space(url[slashi])) {
|
||||
if (is_wspace_a(url[slashi])) {
|
||||
memmove(&url[slashi], &url[slashi+1], newUrlLen - (slashi+1));
|
||||
newUrlLen--;
|
||||
}
|
||||
@ -678,7 +683,7 @@ int main ( int argc, char *argv[] ) {
|
||||
long m = 0;
|
||||
long newNameBufferSize = 0;
|
||||
long newOffset = 0;
|
||||
char filename[256];
|
||||
char filename[1256];
|
||||
long urlTxtCount = 0;
|
||||
long urlTxtFile = 0;
|
||||
Url normUrl;
|
||||
@ -695,6 +700,7 @@ int main ( int argc, char *argv[] ) {
|
||||
bool splitUrls = false;
|
||||
char mode = MODE_NONE;
|
||||
long totalNEC = 0;
|
||||
char *dir;
|
||||
|
||||
// check the options and mode
|
||||
for (long i = 0; i < argc; i++) {
|
||||
@ -783,20 +789,29 @@ int main ( int argc, char *argv[] ) {
|
||||
goto errExit;
|
||||
}
|
||||
|
||||
dir = "";
|
||||
|
||||
retry:
|
||||
|
||||
// open the structure file
|
||||
if ( mode == MODE_NEW || mode == MODE_CATDUMP )
|
||||
sprintf(filename, "%s", RDFSTRUCTURE_FILE);
|
||||
sprintf(filename, "%s%s", dir,RDFSTRUCTURE_FILE);
|
||||
else
|
||||
sprintf(filename, "%s.new", RDFSTRUCTURE_FILE);
|
||||
sprintf(filename, "%s%s.new", dir,RDFSTRUCTURE_FILE);
|
||||
//rdfStream.open(filename, ifstream::in);
|
||||
rdfStream = open ( filename, O_RDONLY );
|
||||
// make sure it openned okay
|
||||
// make sure it opened okay
|
||||
//if (!rdfStream.is_open()) {
|
||||
if ( rdfStream < 0 ) {
|
||||
printf("Error Openning %s\n", filename);
|
||||
// try ./cat/ subdir if not found
|
||||
if ( ! dir[0] ) {
|
||||
dir = "./cat/";
|
||||
goto retry;
|
||||
}
|
||||
printf("Error Opening %s\n", filename);
|
||||
goto errExit;
|
||||
}
|
||||
printf("Openned Structure File: %s\n", filename);
|
||||
printf("Opened Structure File: %s\n", filename);
|
||||
|
||||
// take the first chunk
|
||||
//rdfStream.read(rdfBuffer, RDFBUFFER_SIZE);
|
||||
@ -832,7 +847,9 @@ int main ( int argc, char *argv[] ) {
|
||||
nameLen = MAX_HTTP_FILENAME_LEN;
|
||||
nameLen = htmlDecode ( htmlDecoded,
|
||||
&nameBuffer[nameOffset],
|
||||
nameLen );
|
||||
nameLen ,
|
||||
false,
|
||||
0);
|
||||
memcpy(&nameBuffer[nameOffset], htmlDecoded, nameLen);
|
||||
nameBufferLen += nameLen;
|
||||
// parse the catid
|
||||
@ -977,7 +994,9 @@ nextChildTag:
|
||||
childNameLen = MAX_HTTP_FILENAME_LEN;
|
||||
childNameLen = htmlDecode ( htmlDecoded,
|
||||
childName,
|
||||
childNameLen );
|
||||
childNameLen ,
|
||||
false,
|
||||
0);
|
||||
memcpy(childName, htmlDecoded, childNameLen);
|
||||
// cut off the leading label if symbolic
|
||||
// if (parentType == 2) {
|
||||
@ -1066,25 +1085,25 @@ fileEnd1:
|
||||
for (long i = 0; i < numRdfCats; i++) {
|
||||
// get the hash of the path
|
||||
rawPathLen = printCatPath(rawPath, rdfCats[i].m_catid, true);
|
||||
rdfCats[i].m_catHash = hash32Lower(rawPath, rawPathLen, 0);
|
||||
rdfCats[i].m_catHash = hash32Lower_a(rawPath, rawPathLen, 0);
|
||||
}
|
||||
|
||||
// . now we want to serialize the needed data into
|
||||
// one (or more?) file(s) to be quickly read by gb
|
||||
if ( mode == MODE_NEW )
|
||||
sprintf(filename, "%s", STRUCTURE_OUTPUT_FILE);
|
||||
sprintf(filename, "%s%s", dir,STRUCTURE_OUTPUT_FILE);
|
||||
else
|
||||
sprintf(filename, "%s.new", STRUCTURE_OUTPUT_FILE);
|
||||
sprintf(filename, "%s%s.new", dir,STRUCTURE_OUTPUT_FILE);
|
||||
//outStream.open(filename, ofstream::out|ofstream::trunc);
|
||||
outStream = open ( filename, O_CREAT|O_WRONLY|O_TRUNC,
|
||||
S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP );
|
||||
// make sure it openned okay
|
||||
// make sure it opened okay
|
||||
//if (!outStream.is_open()) {
|
||||
if ( outStream < 0 ) {
|
||||
printf("Error Openning %s\n", filename);
|
||||
printf("Error Opening %s\n", filename);
|
||||
goto errExit;
|
||||
}
|
||||
printf("\nOpenned %s for writing.\n", filename);
|
||||
printf("\nOpened %s for writing.\n", filename);
|
||||
|
||||
// write the size of the truncated name buffer
|
||||
//outStream.write((char*)&newNameBufferSize, sizeof(long));
|
||||
@ -1152,18 +1171,18 @@ contentParse:
|
||||
|
||||
// open the content file
|
||||
if ( mode == MODE_NEW || mode == MODE_URLDUMP )
|
||||
sprintf(filename, "%s", RDFCONTENT_FILE);
|
||||
sprintf(filename, "%s%s", dir,RDFCONTENT_FILE);
|
||||
else
|
||||
sprintf(filename, "%s.new", RDFCONTENT_FILE);
|
||||
sprintf(filename, "%s%s.new", dir,RDFCONTENT_FILE);
|
||||
//rdfStream.open(filename, ifstream::in);
|
||||
rdfStream = open ( filename, O_RDONLY );
|
||||
// make sure it openned okay
|
||||
// make sure it opened okay
|
||||
//if (!rdfStream.is_open()) {
|
||||
if ( rdfStream < 0 ) {
|
||||
printf("Error Openning %s\n", filename);
|
||||
printf("Error Opening %s\n", filename);
|
||||
goto errExit;
|
||||
}
|
||||
printf("\nOpenned Content File: %s\n", filename);
|
||||
printf("\nOpened Content File: %s\n", filename);
|
||||
|
||||
// take the first chunk
|
||||
//rdfStream.read(rdfBuffer, RDFBUFFER_SIZE);
|
||||
@ -1199,13 +1218,13 @@ contentParse:
|
||||
//outStream2.open(filename, ofstream::out|ofstream::trunc);
|
||||
outStream2 = open ( filename, O_CREAT|O_WRONLY|O_TRUNC,
|
||||
S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP );
|
||||
// make sure it openned okay
|
||||
// make sure it opened okay
|
||||
//if (!outStream2.is_open()) {
|
||||
if ( outStream2 < 0 ) {
|
||||
printf("Error Openning %s\n", filename);
|
||||
printf("Error Opening %s\n", filename);
|
||||
goto errExit1;
|
||||
}
|
||||
printf("Openned %s for writing.\n", filename);
|
||||
printf("Opened %s for writing.\n", filename);
|
||||
|
||||
// if we're doing a diffurldump, load up the diff file first
|
||||
if ( mode == MODE_DIFFURLDUMP ) {
|
||||
@ -1219,10 +1238,10 @@ contentParse:
|
||||
diffInStream = open(filename, O_RDONLY);
|
||||
//if (!diffInStream.is_open()) {
|
||||
if ( diffInStream < 0 ) {
|
||||
printf("Error Openning %s\n", filename);
|
||||
printf("Error Opening %s\n", filename);
|
||||
goto errExit;
|
||||
}
|
||||
printf("Openned Diff File: %s\n", filename);
|
||||
printf("Opened Diff File: %s\n", filename);
|
||||
|
||||
// read in the number of urls to update/add
|
||||
//diffInStream.read((char*)&numUpdateIndexes,
|
||||
@ -1326,14 +1345,14 @@ contentParse:
|
||||
outStream2 = open ( filename,
|
||||
O_CREAT|O_WRONLY|O_TRUNC,
|
||||
S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP );
|
||||
// make sure it openned okay
|
||||
// make sure it opened okay
|
||||
//if (!outStream2.is_open()) {
|
||||
if ( outStream2 < 0 ) {
|
||||
printf("Error Openning %s\n",
|
||||
printf("Error Opening %s\n",
|
||||
filename);
|
||||
goto errExit1;
|
||||
}
|
||||
printf("Openned %s for writing.\n",
|
||||
printf("Opened %s for writing.\n",
|
||||
filename);
|
||||
urlTxtCount = 0;
|
||||
}
|
||||
@ -1348,20 +1367,20 @@ contentParse:
|
||||
}
|
||||
else {
|
||||
if ( mode == MODE_NEW )
|
||||
sprintf(filename, "%s", CONTENT_OUTPUT_FILE);
|
||||
sprintf(filename, "%s%s", dir,CONTENT_OUTPUT_FILE);
|
||||
else
|
||||
sprintf(filename, "%s.new", CONTENT_OUTPUT_FILE);
|
||||
sprintf(filename, "%s%s.new", dir,CONTENT_OUTPUT_FILE);
|
||||
// stream the urls into the content
|
||||
//outStream.open(filename, ofstream::out|ofstream::trunc);
|
||||
outStream = open ( filename, O_CREAT|O_WRONLY|O_TRUNC,
|
||||
S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP );
|
||||
// make sure it openned okay
|
||||
// make sure it opened okay
|
||||
//if (!outStream.is_open()) {
|
||||
if ( outStream < 0 ) {
|
||||
printf("Error Openning %s\n", filename);
|
||||
printf("Error Opening %s\n", filename);
|
||||
goto errExit;
|
||||
}
|
||||
printf("Openned %s for writing.\n", filename);
|
||||
printf("Opened %s for writing.\n", filename);
|
||||
|
||||
// store a space for the number of urls at the start of the file
|
||||
//outStream.write((char*)&numUrlInfos, sizeof(long));
|
||||
@ -1442,7 +1461,8 @@ hashLink:
|
||||
// html decode the url
|
||||
if (urlLen > MAX_URL_LEN)
|
||||
urlLen = MAX_URL_LEN;
|
||||
urlLen = htmlDecode(decodedUrl, &urlBuffer[urlOffset], urlLen);
|
||||
urlLen = htmlDecode(decodedUrl, &urlBuffer[urlOffset], urlLen,
|
||||
false,0);
|
||||
memcpy(&urlBuffer[urlOffset], decodedUrl, urlLen);
|
||||
// fix up bad urls
|
||||
urlLen = fixUrl(&urlBuffer[urlOffset], urlLen);
|
||||
@ -1473,7 +1493,7 @@ hashLink:
|
||||
//urlBufferLen += urlLen;
|
||||
// get the hash value
|
||||
unsigned long long urlHash =
|
||||
hash64Lower(&urlBuffer[urlOffset], urlLen, 0);
|
||||
hash64Lower_a(&urlBuffer[urlOffset], urlLen, 0);
|
||||
//unsigned long urlHash2 =
|
||||
// hash32Lower(&urlBuffer[urlOffset], urlLen, 0);
|
||||
// see if it's already indexed
|
||||
@ -1530,14 +1550,14 @@ hashLink:
|
||||
outStream2 = open ( filename,
|
||||
O_CREAT|O_WRONLY|O_TRUNC,
|
||||
S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP );
|
||||
// make sure it openned okay
|
||||
// make sure it opened okay
|
||||
//if (!outStream2.is_open()) {
|
||||
if ( outStream2 < 0 ) {
|
||||
printf("Error Openning %s\n",
|
||||
printf("Error Opening %s\n",
|
||||
filename);
|
||||
goto errExit1;
|
||||
}
|
||||
printf("Openned %s for writing.\n",
|
||||
printf("Opened %s for writing.\n",
|
||||
filename);
|
||||
urlTxtCount = 0;
|
||||
}
|
||||
@ -1697,19 +1717,19 @@ fileEnd2:
|
||||
|
||||
// load the content and url files
|
||||
// url info (content) file
|
||||
sprintf(filename, "%s", CONTENT_OUTPUT_FILE);
|
||||
sprintf(filename, "%s%s", dir,CONTENT_OUTPUT_FILE);
|
||||
//rdfStream.open(filename, ifstream::in);
|
||||
rdfStream = open ( filename, O_RDONLY );
|
||||
//if (!rdfStream.is_open()) {
|
||||
if ( rdfStream < 0 ) {
|
||||
printf("Error Openning %s\n", CONTENT_OUTPUT_FILE);
|
||||
printf("Error Opening %s\n", filename);
|
||||
goto oldErrExit;
|
||||
}
|
||||
// read in the number of urls
|
||||
//rdfStream.read((char*)&oldNumUrls, sizeof(long));
|
||||
if (fileRead(rdfStream, &oldNumUrls, sizeof(long)) !=
|
||||
sizeof(long)) {
|
||||
printf("Error Reading %s\n", CONTENT_OUTPUT_FILE);
|
||||
printf("Error Reading %s\n", filename);
|
||||
goto oldErrExit;
|
||||
}
|
||||
|
||||
@ -1749,8 +1769,8 @@ fileEnd2:
|
||||
//rdfStream.read((char*)&urlLen, sizeof(short));
|
||||
long n = fileRead(rdfStream, &urlLen, sizeof(short));
|
||||
if ( n < 0 || n > (long)sizeof(short) ) {
|
||||
printf("Error Reading %s\n",
|
||||
CONTENT_OUTPUT_FILE);
|
||||
printf("Error Reading %s\n",filename);
|
||||
//CONTENT_OUTPUT_FILE);
|
||||
goto oldErrExit;
|
||||
}
|
||||
if ( n == 0 )
|
||||
@ -1780,8 +1800,8 @@ fileEnd2:
|
||||
}
|
||||
n = fileRead(rdfStream, &oldUrls[urlp], urlLen);
|
||||
if ( n < 0 || n > urlLen ) {
|
||||
printf("Error Reading %s\n",
|
||||
CONTENT_OUTPUT_FILE);
|
||||
printf("Error Reading %s\n",filename);
|
||||
//CONTENT_OUTPUT_FILE);
|
||||
goto oldErrExit;
|
||||
}
|
||||
if ( n == 0 )
|
||||
@ -1791,7 +1811,7 @@ fileEnd2:
|
||||
urlLen = fixUrl(&oldUrls[urlp], urlLen);
|
||||
// make the hash
|
||||
oldUrlHashes[currUrl] =
|
||||
hash64Lower(&oldUrls[urlp], urlLen, 0);
|
||||
hash64Lower_a(&oldUrls[urlp], urlLen, 0);
|
||||
removeOldUrl[currUrl] = 0;
|
||||
// increment the buffer pointer
|
||||
if (urlLen <= 0) {
|
||||
@ -1814,8 +1834,8 @@ fileEnd2:
|
||||
//rdfStream.read((char*)&oldNumCatids[currUrl], 1);
|
||||
long n = fileRead(rdfStream, &oldNumCatids[currUrl], 1);
|
||||
if ( n < 0 || n > 1 ) {
|
||||
printf("Error Reading %s\n",
|
||||
CONTENT_OUTPUT_FILE);
|
||||
printf("Error Reading %s\n",filename);
|
||||
//CONTENT_OUTPUT_FILE);
|
||||
goto oldErrExit;
|
||||
}
|
||||
if ( n == 0 )
|
||||
@ -1839,8 +1859,8 @@ fileEnd2:
|
||||
long readSize = sizeof(long)*oldNumCatids[currUrl];
|
||||
n = fileRead(rdfStream, &oldCatids[catidp], readSize);
|
||||
if ( n < 0 || n > readSize ) {
|
||||
printf("Error Reading %s\n",
|
||||
CONTENT_OUTPUT_FILE);
|
||||
printf("Error Reading %s\n",filename);
|
||||
//CONTENT_OUTPUT_FILE);
|
||||
goto oldErrExit;
|
||||
}
|
||||
if ( n == 0 )
|
||||
@ -1907,17 +1927,17 @@ oldIsDifferent:
|
||||
// also urls to remove
|
||||
//
|
||||
// open the new diff file for writing
|
||||
sprintf(filename, "%s.new.diff", CONTENT_OUTPUT_FILE);
|
||||
sprintf(filename, "%s%s.new.diff", dir,CONTENT_OUTPUT_FILE);
|
||||
//outStream.open(filename, ofstream::out|ofstream::trunc);
|
||||
outStream = open ( filename, O_CREAT|O_WRONLY|O_TRUNC,
|
||||
S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP );
|
||||
// make sure it openned okay
|
||||
// make sure it opened okay
|
||||
//if (!outStream.is_open()) {
|
||||
if ( outStream < 0 ) {
|
||||
printf("Error Openning %s\n", filename);
|
||||
printf("Error Opening %s\n", filename);
|
||||
goto oldErrExit;
|
||||
}
|
||||
printf("\nOpenned %s for writing.\n", filename);
|
||||
printf("\nOpened %s for writing.\n", filename);
|
||||
|
||||
// write out the number of urls to update/add
|
||||
//outStream.write(&numUpdateUrls, sizeof(long));
|
||||
@ -2027,19 +2047,19 @@ oldGoodExit:
|
||||
// . now we want to serialize the needed data into
|
||||
// one (or more?) file(s) to be quickly read by gb
|
||||
if ( mode == MODE_NEW )
|
||||
sprintf(filename, "%s", STRUCTURE_OUTPUT_FILE);
|
||||
sprintf(filename, "%s%s", dir,STRUCTURE_OUTPUT_FILE);
|
||||
else
|
||||
sprintf(filename, "%s.new", STRUCTURE_OUTPUT_FILE);
|
||||
sprintf(filename, "%s%s.new", dir,STRUCTURE_OUTPUT_FILE);
|
||||
//outStream.open(filename, ofstream::out|ofstream::ate);
|
||||
outStream = open ( filename, O_WRONLY|O_APPEND,
|
||||
S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP );
|
||||
// make sure it openned okay
|
||||
// make sure it opened okay
|
||||
//if (!outStream.is_open()) {
|
||||
if ( outStream < 0 ) {
|
||||
printf("Error Openning %s\n", filename);
|
||||
printf("Error Opening %s\n", filename);
|
||||
goto errExit;
|
||||
}
|
||||
printf("\nOpenned %s for writing.\n", filename);
|
||||
printf("\nOpened %s for writing.\n", filename);
|
||||
|
||||
// write the cats
|
||||
//outStream.write((char*)rdfCats, sizeof(RdfCat)*numRdfCats);
|
||||
@ -2109,21 +2129,21 @@ oldGoodExit:
|
||||
|
||||
// write another file for the urls
|
||||
if ( mode == MODE_NEW )
|
||||
sprintf(filename, "%s", CONTENT_OUTPUT_FILE);
|
||||
sprintf(filename, "%s%s", dir,CONTENT_OUTPUT_FILE);
|
||||
else
|
||||
sprintf(filename, "%s.new", CONTENT_OUTPUT_FILE);
|
||||
sprintf(filename, "%s%s.new", dir,CONTENT_OUTPUT_FILE);
|
||||
//outStream.open(filename, ofstream::out|ofstream::ate);
|
||||
outStream = open ( filename, O_WRONLY,
|
||||
S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP );
|
||||
//outStream.open(filename, ofstream::out|ofstream::trunc);
|
||||
//endpos = outStream.tellp();
|
||||
// make sure it openned okay
|
||||
// make sure it opened okay
|
||||
//if (!outStream.is_open()) {
|
||||
if ( outStream < 0 ) {
|
||||
printf("Error Openning %s\n", filename);
|
||||
printf("Error Opening %s\n", filename);
|
||||
goto errExit;
|
||||
}
|
||||
printf("\nOpenned %s for writing.\n", filename);
|
||||
printf("\nOpened %s for writing.\n", filename);
|
||||
|
||||
//outStream.seekp(0);
|
||||
lseek(outStream, 0, SEEK_SET);
|
||||
|
32
main.cpp
32
main.cpp
@ -22,7 +22,7 @@
|
||||
#include "Titledb.h"
|
||||
#include "Revdb.h"
|
||||
#include "Tagdb.h"
|
||||
//#include "Catdb.h"
|
||||
#include "Catdb.h"
|
||||
#include "Users.h"
|
||||
#include "Tfndb.h"
|
||||
#include "Spider.h"
|
||||
@ -2624,8 +2624,8 @@ int main ( int argc , char *argv[] ) {
|
||||
if ( ! g_tagdb.init() ) {
|
||||
log("db: Tagdb init failed." ); return 1; }
|
||||
// the catdb, it's an instance of tagdb, pass RDB_CATDB
|
||||
//if ( ! g_catdb.init() ) {
|
||||
// log("db: Catdb1 init failed." ); return 1; }
|
||||
if ( ! g_catdb.init() ) {
|
||||
log("db: Catdb1 init failed." ); return 1; }
|
||||
// initialize Users
|
||||
if ( ! g_users.init() ){
|
||||
log("db: Users init failed. "); return 1;}
|
||||
@ -10986,7 +10986,8 @@ void dumpTagdb (char *coll,long startFileNum,long numFiles,bool includeTree,
|
||||
//g_conf.m_spiderdbMaxTreeMem = 1024*1024*30;
|
||||
g_tagdb.init ();
|
||||
g_collectiondb.init(true);
|
||||
g_tagdb.addColl ( coll, false );
|
||||
if ( rdbId == RDB_TAGDB ) g_tagdb.addColl ( coll, false );
|
||||
if ( rdbId == RDB_CATDB ) g_catdb.init();
|
||||
key128_t startKey ;
|
||||
key128_t endKey ;
|
||||
startKey.setMin();
|
||||
@ -11051,6 +11052,21 @@ void dumpTagdb (char *coll,long startFileNum,long numFiles,bool includeTree,
|
||||
printf("corrupt tagdb rec k.n0=%llu",k.n0);
|
||||
continue;
|
||||
}
|
||||
// catdb?
|
||||
if ( rdbId == RDB_CATDB ) {
|
||||
// for debug!
|
||||
CatRec crec;
|
||||
crec.set ( NULL,
|
||||
data ,
|
||||
size ,
|
||||
false);
|
||||
printf("caturl=%s #catids=%li version=%li\n"
|
||||
,crec.m_url
|
||||
,(long)crec.m_numCatids
|
||||
,(long)crec.m_version
|
||||
);
|
||||
continue;
|
||||
}
|
||||
// parse it up
|
||||
//TagRec *tagRec = (TagRec *)rec;
|
||||
Tag *tag = (Tag *)rec;
|
||||
@ -13945,10 +13961,10 @@ void saveRdbs ( int fd , void *state ) {
|
||||
last = rdb->getLastWriteTime();
|
||||
if ( now - last > delta )
|
||||
if ( ! rdb->close(NULL,NULL,false,false)) return;
|
||||
//rdb = g_catdb.getRdb();
|
||||
//last = rdb->getLastWriteTime();
|
||||
//if ( now - last > delta )
|
||||
// if ( ! rdb->close(NULL,NULL,false,false)) return;
|
||||
rdb = g_catdb.getRdb();
|
||||
last = rdb->getLastWriteTime();
|
||||
if ( now - last > delta )
|
||||
if ( ! rdb->close(NULL,NULL,false,false)) return;
|
||||
//rdb = g_indexdb.getRdb();
|
||||
//last = rdb->getLastWriteTime();
|
||||
//if ( now - last > delta )
|
||||
|
Loading…
x
Reference in New Issue
Block a user