mirror of
https://github.com/privacore/open-source-search-engine.git
synced 2025-07-14 02:36:06 -04:00
a bunch of bug fixes, mostly spider related.
also some for pagereindex.
This commit is contained in:
@ -15718,7 +15718,8 @@ pd=(PlaceDesc *)g_cities.getValueFromSlot(pd->getSlot());
|
||||
g_cityBuf = tbuf;
|
||||
g_cityBufSize = tbufSize;
|
||||
// do not let "sb" free it
|
||||
sb.m_buf = NULL;
|
||||
//sb.m_buf = NULL;
|
||||
sb.detachBuf();
|
||||
|
||||
//if ( ! g_indicators.save ( g_hostdb.m_dir, "indicators.dat" ) )
|
||||
// return log("places: failed to save indicators.dat");
|
||||
|
@ -1234,6 +1234,11 @@ bool readwrite_r ( FileState *fstate , ThreadEntry *t ) {
|
||||
bool BigFile::unlink ( ) {
|
||||
return unlinkRename ( NULL , -1 , false, NULL, NULL );
|
||||
}
|
||||
|
||||
bool BigFile::move ( char *newDir ) {
|
||||
return rename ( m_baseFilename , newDir );
|
||||
}
|
||||
|
||||
bool BigFile::rename ( char *newBaseFilename , char *newBaseFilenameDir ) {
|
||||
return unlinkRename ( newBaseFilename, -1, false, NULL, NULL ,
|
||||
newBaseFilenameDir );
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "Datedb.h"
|
||||
#include "Timedb.h"
|
||||
#include "Spider.h"
|
||||
#include "Process.h"
|
||||
|
||||
static CollectionRec g_default;
|
||||
|
||||
@ -85,6 +86,7 @@ CollectionRec::CollectionRec() {
|
||||
|
||||
CollectionRec::~CollectionRec() {
|
||||
//invalidateRegEx ();
|
||||
reset();
|
||||
}
|
||||
|
||||
// new collection recs get this called on them
|
||||
@ -109,6 +111,12 @@ void CollectionRec::reset() {
|
||||
m_globalCrawlInfo.reset();
|
||||
//m_requests = 0;
|
||||
//m_replies = 0;
|
||||
// free all RdbBases in each rdb
|
||||
for ( long i = 0 ; i < g_process.m_numRdbs ; i++ ) {
|
||||
Rdb *rdb = g_process.m_rdbs[i];
|
||||
rdb->resetBase ( m_collnum );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
CollectionRec *g_cr = NULL;
|
||||
|
@ -891,13 +891,13 @@ bool Collectiondb::resetColl ( char *coll , WaitEntry *we , bool purgeSeeds) {
|
||||
// . updates RdbBase::m_collnum
|
||||
// . so for the tree it just needs to mark the old collnum recs
|
||||
// with a collnum -1 in case it is saving...
|
||||
g_posdb.getRdb()->resetColl ( oldCollnum , newCollnum );
|
||||
g_titledb.getRdb()->resetColl ( oldCollnum , newCollnum );
|
||||
g_tagdb.getRdb()->resetColl ( oldCollnum , newCollnum );
|
||||
g_spiderdb.getRdb()->resetColl ( oldCollnum , newCollnum );
|
||||
g_doledb.getRdb()->resetColl ( oldCollnum , newCollnum );
|
||||
g_clusterdb.getRdb()->resetColl ( oldCollnum , newCollnum );
|
||||
g_linkdb.getRdb()->resetColl ( oldCollnum , newCollnum );
|
||||
g_posdb.getRdb()->deleteColl ( oldCollnum , newCollnum );
|
||||
g_titledb.getRdb()->deleteColl ( oldCollnum , newCollnum );
|
||||
g_tagdb.getRdb()->deleteColl ( oldCollnum , newCollnum );
|
||||
g_spiderdb.getRdb()->deleteColl ( oldCollnum , newCollnum );
|
||||
g_doledb.getRdb()->deleteColl ( oldCollnum , newCollnum );
|
||||
g_clusterdb.getRdb()->deleteColl ( oldCollnum , newCollnum );
|
||||
g_linkdb.getRdb()->deleteColl ( oldCollnum , newCollnum );
|
||||
|
||||
// reset crawl status too!
|
||||
cr->m_spiderStatus = SP_INITIALIZING;
|
||||
|
3
Loop.cpp
3
Loop.cpp
@ -834,7 +834,8 @@ void sigalrmHandler ( int x , siginfo_t *info , void *y ) {
|
||||
// if we missed to many, then dump core
|
||||
if ( g_niceness == 1 && g_missedQuickPolls >= 4 ) {
|
||||
//g_inSigHandler = true;
|
||||
log("loop: missed quickpoll");
|
||||
// NOT SAFE! can block forever waiting for a printf lock!
|
||||
//log("loop: missed quickpoll");
|
||||
//g_inSigHandler = false;
|
||||
// seems to core a lot in gbcompress() we need to
|
||||
// put a quickpoll into zlib deflate() or
|
||||
|
@ -2767,8 +2767,11 @@ bool Parms::setFromRequest ( HttpRequest *r ,
|
||||
if ( changedUrlFilters && THIS != (char *)&g_conf ) {
|
||||
// cast it
|
||||
CollectionRec *cr = (CollectionRec *)THIS;
|
||||
// to prevent us having to rebuild doledb/waitingtree at startup
|
||||
// we need to make the spidercoll here so it is not null
|
||||
SpiderColl *sc = g_spiderCache.getSpiderColl(cr->m_collnum);
|
||||
// get it
|
||||
SpiderColl *sc = cr->m_spiderColl;
|
||||
//SpiderColl *sc = cr->m_spiderColl;
|
||||
// this will rebuild the waiting tree
|
||||
if ( sc ) sc->urlFiltersChanged();
|
||||
}
|
||||
@ -2890,6 +2893,7 @@ void Parms::setParm ( char *THIS , Parm *m , long mm , long j , char *s ,
|
||||
// array whose "count" was not incremented like it should have been.
|
||||
// HACK: make new line at bottom always have spidering enabled
|
||||
// checkbox set and make it impossible to unset.
|
||||
/*
|
||||
if ( m->m_max > 1 && m->m_rowid >= 0 && mm > 0 &&
|
||||
m_parms[mm-1].m_rowid == m->m_rowid ) {
|
||||
char *pos = (char *)THIS + m_parms[mm-1].m_off - 4 ;
|
||||
@ -2902,6 +2906,7 @@ void Parms::setParm ( char *THIS , Parm *m , long mm , long j , char *s ,
|
||||
return;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// ensure array count at least j+1
|
||||
if ( m->m_max > 1 ) {
|
||||
|
54
Rdb.cpp
54
Rdb.cpp
@ -555,7 +555,47 @@ bool Rdb::addColl2 ( collnum_t collnum ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Rdb::resetColl ( collnum_t collnum , collnum_t newCollnum ) {
|
||||
bool Rdb::resetBase ( collnum_t collnum ) {
|
||||
CollectionRec *cr = g_collectiondb.getRec(collnum);
|
||||
if ( ! cr ) return true;
|
||||
RdbBase *base = cr->m_bases[(unsigned char)m_rdbId];
|
||||
if ( ! base ) return true;
|
||||
base->reset();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Rdb::deleteAllRecs ( collnum_t collnum ) {
|
||||
|
||||
// remove from tree
|
||||
if(m_useTree) m_tree.delColl ( collnum );
|
||||
else m_buckets.delColl ( collnum );
|
||||
|
||||
// only for doledb now, because we unlink we do not move the files
|
||||
// into the trash subdir and doledb is easily regenerated. i don't
|
||||
// want to take the risk with other files.
|
||||
if ( m_rdbId != RDB_DOLEDB ) { char *xx=NULL;*xx=0; }
|
||||
|
||||
CollectionRec *cr = g_collectiondb.getRec ( collnum );
|
||||
|
||||
RdbBase *base = cr->m_bases[(unsigned char)m_rdbId];
|
||||
if ( ! base ) return true;
|
||||
|
||||
// scan files in there
|
||||
for ( long i = 0 ; i < base->m_numFiles ; i++ ) {
|
||||
BigFile *f = base->m_files[i];
|
||||
// move to trash
|
||||
char newdir[1024];
|
||||
sprintf(newdir, "%strash/",g_hostdb.m_dir);
|
||||
f->move ( newdir );
|
||||
}
|
||||
|
||||
// nuke all the files
|
||||
base->reset();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool Rdb::deleteColl ( collnum_t collnum , collnum_t newCollnum ) {
|
||||
|
||||
//char *coll = g_collectiondb.m_recs[collnum]->m_coll;
|
||||
|
||||
@ -645,7 +685,7 @@ bool Rdb::delColl ( char *coll ) {
|
||||
}
|
||||
|
||||
// move all files to trash and clear the tree/buckets
|
||||
resetColl ( collnum , collnum );
|
||||
deleteColl ( collnum , collnum );
|
||||
|
||||
// remove these collnums from tree
|
||||
//if(m_useTree) m_tree.delColl ( collnum );
|
||||
@ -2389,8 +2429,16 @@ bool Rdb::addRecord ( collnum_t collnum,
|
||||
// don't actually add it if "fake". i.e. if it
|
||||
// was an internal error of some sort... this will
|
||||
// make it try over and over again i guess...
|
||||
// no because we need some kinda reply so that gb knows
|
||||
// the pagereindex docid-based spider requests are done,
|
||||
// at least for now, because the replies were not being
|
||||
// added for now. just for internal errors at least...
|
||||
// we were not adding spider replies to the page reindexes
|
||||
// as they completed and when i tried to rerun it
|
||||
// the title recs were not found since they were deleted,
|
||||
// so we gotta add the replies now.
|
||||
long indexCode = rr->m_errCode;
|
||||
if ( indexCode == EINTERNALERROR ||
|
||||
if ( //indexCode == EINTERNALERROR ||
|
||||
indexCode == EABANDONED ||
|
||||
indexCode == EHITCRAWLLIMIT ||
|
||||
indexCode == EHITPROCESSLIMIT ) {
|
||||
|
142
Spider.cpp
142
Spider.cpp
@ -1235,25 +1235,83 @@ char *SpiderColl::getCollName() {
|
||||
return cr->m_coll;
|
||||
}
|
||||
|
||||
// . call this when changing the url filters
|
||||
// . will make all entries in waiting tree have zero time basically
|
||||
void SpiderColl::urlFiltersChanged ( ) {
|
||||
// log it
|
||||
log("spider: rebuilding waiting tree for coll=%s",getCollName());
|
||||
m_lastUrlFiltersUpdate = getTimeGlobal();
|
||||
//
|
||||
// remove all recs from doledb for the given collection
|
||||
//
|
||||
void doDoledbNuke ( int fd , void *state ) {
|
||||
|
||||
WaitEntry *we = (WaitEntry *)state;
|
||||
|
||||
if ( we->m_registered )
|
||||
g_loop.unregisterSleepCallback ( we , doDoledbNuke );
|
||||
|
||||
// . nuke doledb for this collnum
|
||||
// . it will unlink the files and maps for doledb for this collnum
|
||||
// . it will remove all recs of this collnum from its tree too
|
||||
if ( g_doledb.getRdb()->isSavingTree () ) {
|
||||
g_loop.registerSleepCallback ( 100 , we , doDoledbNuke );
|
||||
we->m_registered = true;
|
||||
return;
|
||||
}
|
||||
|
||||
// . ok, tree is not saving, it should complete entirely from this call
|
||||
// . crap this is moving the whole directory!!!
|
||||
// . say "false" to not move whole coll dira
|
||||
g_doledb.getRdb()->deleteAllRecs ( we->m_cr->m_collnum );
|
||||
|
||||
// re-add it back so the RdbBase is new'd
|
||||
//g_doledb.getRdb()->addColl2 ( we->m_collnum );
|
||||
|
||||
// shortcut
|
||||
SpiderColl *sc = we->m_cr->m_spiderColl;
|
||||
|
||||
sc->m_lastUrlFiltersUpdate = getTimeGlobal();
|
||||
// need to recompute this!
|
||||
m_ufnMapValid = false;
|
||||
sc->m_ufnMapValid = false;
|
||||
// reset this cache
|
||||
clearUfnTable();
|
||||
// activate a scan if not already activated
|
||||
m_waitingTreeNeedsRebuild = true;
|
||||
sc->m_waitingTreeNeedsRebuild = true;
|
||||
// if a scan is ongoing, this will re-set it
|
||||
m_nextKey2.setMin();
|
||||
sc->m_nextKey2.setMin();
|
||||
// clear it?
|
||||
m_waitingTree.clear();
|
||||
m_waitingTable.clear();
|
||||
// kick off the spiderdb scan
|
||||
populateWaitingTreeFromSpiderdb(false);
|
||||
sc->m_waitingTree.clear();
|
||||
sc->m_waitingTable.clear();
|
||||
|
||||
// kick off the spiderdb scan to repopulate waiting tree and doledb
|
||||
sc->populateWaitingTreeFromSpiderdb(false);
|
||||
|
||||
// nuke this state
|
||||
mfree ( we , sizeof(WaitEntry) , "waitet" );
|
||||
|
||||
// note it
|
||||
log("spider: finished clearing out doledb/waitingtree for %s",sc->m_coll);
|
||||
}
|
||||
|
||||
// . call this when changing the url filters
|
||||
// . will make all entries in waiting tree have zero time basically
|
||||
// . and makes us repopulate doledb from these waiting tree entries
|
||||
void SpiderColl::urlFiltersChanged ( ) {
|
||||
|
||||
// log it
|
||||
log("spider: rebuilding doledb/waitingtree for coll=%s",getCollName());
|
||||
|
||||
WaitEntry *we = (WaitEntry *)mmalloc ( sizeof(WaitEntry) , "waite2" );
|
||||
if ( ! we ) {
|
||||
log("spider: wait entry alloc: %s",mstrerror(g_errno));
|
||||
g_errno = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
// prepare our state in case the purge operation would block
|
||||
we->m_registered = false;
|
||||
we->m_cr = m_cr;
|
||||
we->m_collnum = m_cr->m_collnum;
|
||||
//we->m_callback = doDoledbNuke2;
|
||||
//we->m_state = NULL;
|
||||
|
||||
// remove all recs from doledb for the given collection
|
||||
doDoledbNuke ( 0 , we );
|
||||
}
|
||||
|
||||
// this one has to scan all of spiderdb
|
||||
@ -1611,8 +1669,10 @@ bool SpiderColl::addSpiderReply ( SpiderReply *srep ) {
|
||||
// . skip the rest if injecting
|
||||
// . otherwise it triggers a lookup for this firstip in spiderdb to
|
||||
// get a new spider request to add to doledb
|
||||
if ( srep->m_fromInjectionRequest )
|
||||
return true;
|
||||
// . no, because there might be more on disk from the same firstip
|
||||
// so comment this out again
|
||||
//if ( srep->m_fromInjectionRequest )
|
||||
// return true;
|
||||
|
||||
// clear error for this
|
||||
g_errno = 0;
|
||||
@ -1625,11 +1685,17 @@ bool SpiderColl::addSpiderReply ( SpiderReply *srep ) {
|
||||
// and the webmaster did not have one. then we can
|
||||
// crawl more vigorously...
|
||||
//if ( srep->m_crawlDelayMS >= 0 ) {
|
||||
|
||||
bool update = false;
|
||||
// use the domain hash for this guy! since its from robots.txt
|
||||
long *cdp = (long *)m_cdTable.getValue32(srep->m_domHash32);
|
||||
// update it only if better or empty
|
||||
bool update = false;
|
||||
if ( ! cdp ) update = true;
|
||||
|
||||
// no update if injecting or from pagereindex (docid based spider request)
|
||||
if ( srep->m_fromInjectionRequest )
|
||||
update = false;
|
||||
|
||||
//else if (((*cdp)&0xffffffff)<(uint32_t)srep->m_spideredTime)
|
||||
// update = true;
|
||||
// update m_sniTable if we should
|
||||
@ -1668,19 +1734,26 @@ bool SpiderColl::addSpiderReply ( SpiderReply *srep ) {
|
||||
// . TODO: consult crawldelay table here too! use that value if is
|
||||
// less than our sameIpWait
|
||||
// . make m_lastDownloadTable an rdbcache ...
|
||||
// . this is 0 for pagereindex docid-based replies
|
||||
if ( srep->m_downloadEndTime )
|
||||
m_lastDownloadCache.addLongLong ( m_collnum,
|
||||
srep->m_firstIp ,
|
||||
srep->m_downloadEndTime );
|
||||
// log this for now
|
||||
if ( g_conf.m_logDebugSpider )
|
||||
log("spider: adding last download end time %lli for "
|
||||
"ip=%s uh48=%llu indexcode=\"%s\" coll=%li "
|
||||
"to SpiderColl::m_lastDownloadCache",
|
||||
log("spider: adding spider reply, download end time %lli for "
|
||||
"ip=%s(%lu) uh48=%llu indexcode=\"%s\" coll=%li "
|
||||
"k.n1=%llu k.n0=%llu",
|
||||
//"to SpiderColl::m_lastDownloadCache",
|
||||
srep->m_downloadEndTime,
|
||||
iptoa(srep->m_firstIp),srep->getUrlHash48(),
|
||||
iptoa(srep->m_firstIp),
|
||||
srep->m_firstIp,
|
||||
srep->getUrlHash48(),
|
||||
mstrerror(srep->m_errCode),
|
||||
(long)m_collnum);
|
||||
(long)m_collnum,
|
||||
srep->m_key.n1,
|
||||
srep->m_key.n0);
|
||||
|
||||
// ignore errors from that, it's just a cache
|
||||
g_errno = 0;
|
||||
// sanity check - test cache
|
||||
@ -2046,7 +2119,7 @@ bool SpiderColl::addToWaitingTree ( uint64_t spiderTimeMS , long firstIp ,
|
||||
|
||||
// only if we are the responsible host in the shard
|
||||
if ( ! isAssignedToUs ( firstIp ) )
|
||||
return true;
|
||||
return false;
|
||||
|
||||
// . do not add to waiting tree if already in doledb
|
||||
// . an ip should not exist in both doledb and waiting tree.
|
||||
@ -3879,10 +3952,10 @@ bool SpiderColl::addToDoleTable ( SpiderRequest *sreq ) {
|
||||
long long pdocid = sreq->getParentDocId();
|
||||
long ss = 1;
|
||||
if ( score ) ss = *score + 1;
|
||||
log("spider: added to doletbl uh48=%llu parentdocid=%llu "
|
||||
"ipdolecount=%li ufn=%li priority=%li firstip=%s",
|
||||
uh48,pdocid,ss,(long)sreq->m_ufn,(long)sreq->m_priority,
|
||||
iptoa(sreq->m_firstIp));
|
||||
//log("spider: added to doletbl uh48=%llu parentdocid=%llu "
|
||||
// "ipdolecount=%li ufn=%li priority=%li firstip=%s",
|
||||
// uh48,pdocid,ss,(long)sreq->m_ufn,(long)sreq->m_priority,
|
||||
// iptoa(sreq->m_firstIp));
|
||||
}
|
||||
// we had a score there already, so inc it
|
||||
if ( score ) {
|
||||
@ -5542,8 +5615,15 @@ bool SpiderLoop::spiderUrl2 ( ) {
|
||||
//}
|
||||
|
||||
if ( g_conf.m_logDebugSpider )
|
||||
logf(LOG_DEBUG,"spider: spidering uh48=%llu pdocid=%llu",
|
||||
m_sreq->getUrlHash48(),m_sreq->getParentDocId() );
|
||||
logf(LOG_DEBUG,"spider: spidering firstip9=%s(%lu) "
|
||||
"uh48=%llu prntdocid=%llu k.n1=%llu k.n0=%llu",
|
||||
iptoa(m_sreq->m_firstIp),
|
||||
m_sreq->m_firstIp,
|
||||
m_sreq->getUrlHash48(),
|
||||
m_sreq->getParentDocId() ,
|
||||
m_sreq->m_key.n1,
|
||||
m_sreq->m_key.n0);
|
||||
|
||||
|
||||
// this returns false and sets g_errno on error
|
||||
if ( ! xd->set4 ( m_sreq ,
|
||||
@ -6495,7 +6575,9 @@ void handleRequest12 ( UdpSlot *udpSlot , long niceness ) {
|
||||
// this will just return true if we are not the
|
||||
// responsible host for this firstip
|
||||
// DO NOT populate from this!!! say "false" here...
|
||||
! sc->addToWaitingTree ( 0 , cq->m_firstIp, false ) ) {
|
||||
! sc->addToWaitingTree ( 0 , cq->m_firstIp, false ) &&
|
||||
// must be an error...
|
||||
g_errno ) {
|
||||
msg = "FAILED TO ADD TO WAITING TREE";
|
||||
log("spider: %s %s",msg,mstrerror(g_errno));
|
||||
us->sendErrorReply ( udpSlot , g_errno );
|
||||
@ -6658,7 +6740,7 @@ void removeExpiredLocks ( long hostId ) {
|
||||
// when we last cleaned them out
|
||||
static time_t s_lastTime = 0;
|
||||
|
||||
long nowGlobal = getTimeGlobal();
|
||||
long nowGlobal = getTimeGlobalNoCore();
|
||||
long niceness = MAX_NICENESS;
|
||||
|
||||
// only do this once per second at the most
|
||||
|
31
XmlDoc.cpp
31
XmlDoc.cpp
@ -1900,8 +1900,15 @@ bool XmlDoc::indexDoc ( ) {
|
||||
// to spiderdb to release the lock.
|
||||
///
|
||||
|
||||
log("build: %s had internal error = %s. adding spider error reply.",
|
||||
m_firstUrl.m_url,mstrerror(g_errno));
|
||||
if ( m_firstUrlValid )
|
||||
log("build: %s had internal error = %s. adding spider "
|
||||
"error reply.",
|
||||
m_firstUrl.m_url,mstrerror(g_errno));
|
||||
else
|
||||
log("build: docid=%lli had internal error = %s. adding spider "
|
||||
"error reply.",
|
||||
m_docId,mstrerror(g_errno));
|
||||
|
||||
|
||||
if ( ! m_indexCodeValid ) {
|
||||
m_indexCode = EINTERNALERROR;//g_errno;
|
||||
@ -1945,21 +1952,27 @@ bool XmlDoc::indexDoc ( ) {
|
||||
// url spider lock in SpiderLoop::m_lockTable.
|
||||
SpiderReply *nsr = getNewSpiderReply ();
|
||||
if ( nsr == (void *)-1) { char *xx=NULL;*xx=0; }
|
||||
if ( nsr->getRecSize() <= 1) { char *xx=NULL;*xx=0; }
|
||||
|
||||
CollectionRec *cr = getCollRec();
|
||||
if ( ! cr ) return true;
|
||||
|
||||
SafeBuf metaList;
|
||||
metaList.pushChar(RDB_SPIDERDB);
|
||||
metaList.safeMemcpy ( (char *)nsr , nsr->getRecSize() );
|
||||
//SafeBuf metaList;
|
||||
m_metaList2.pushChar(RDB_SPIDERDB);
|
||||
m_metaList2.safeMemcpy ( (char *)nsr , nsr->getRecSize() );
|
||||
|
||||
m_msg4Launched = true;
|
||||
|
||||
// log this for debug now
|
||||
SafeBuf tmp;
|
||||
nsr->print(&tmp);
|
||||
log("xmldoc: added reply %s",tmp.getBufStart());
|
||||
|
||||
// clear g_errno
|
||||
g_errno = 0;
|
||||
|
||||
if ( ! m_msg4.addMetaList ( metaList.getBufStart() ,
|
||||
metaList.length() ,
|
||||
if ( ! m_msg4.addMetaList ( m_metaList2.getBufStart() ,
|
||||
m_metaList2.length() ,
|
||||
cr->m_coll ,
|
||||
m_masterState , // state
|
||||
m_masterLoop ,
|
||||
@ -15793,7 +15806,7 @@ char **XmlDoc::getExpandedUtf8Content ( ) {
|
||||
// null term it
|
||||
m_esbuf.pushChar('\0');
|
||||
// and point to that buffer
|
||||
m_expandedUtf8Content = m_esbuf.m_buf;
|
||||
m_expandedUtf8Content = m_esbuf.getBufStart();//m_buf;
|
||||
// include the \0 as part of the size
|
||||
m_expandedUtf8ContentSize = m_esbuf.m_length; // + 1;
|
||||
}
|
||||
@ -16012,7 +16025,7 @@ char **XmlDoc::getUtf8Content ( ) {
|
||||
// final \0
|
||||
*dst = '\0';
|
||||
// re-assign these
|
||||
m_expandedUtf8Content = m_xbuf.m_buf;
|
||||
m_expandedUtf8Content = m_xbuf.getBufStart();//m_buf;
|
||||
m_expandedUtf8ContentSize = m_xbuf.m_length + 1;
|
||||
// free esbuf if we were referencing that to save mem
|
||||
m_esbuf.purge();
|
||||
|
1
main.cpp
1
main.cpp
@ -6150,6 +6150,7 @@ long dumpSpiderdb ( char *coll,
|
||||
if ( ! g_spiderdb.isSpiderRequest((key128_t *)srec) ) {
|
||||
// print it
|
||||
if ( ! printStats ) {
|
||||
printf( "offset=%lli ",curOff);
|
||||
g_spiderdb.print ( srec );
|
||||
printf("\n");
|
||||
}
|
||||
|
Reference in New Issue
Block a user