This commit is contained in:
Brian Rasmusson
2016-01-07 14:41:24 +01:00
7 changed files with 140 additions and 674 deletions

@ -555,37 +555,7 @@ bool Matches::addMatches( Words *words, Phrases *phrases, Sections *sections, Bi
int32_t nextMatchWordPos = 0;
int32_t lasti = -3;
int32_t dist = 0;
// . every tag increments "dist" by a value
// . rather than use a switch/case statement, which does a binary
// lookup thing which is really slow, let's use a 256 bucket table
// for constant lookup, rather than log(N).
static char s_tableInit = false;
static int8_t s_tab[512];
if ( getNumXmlNodes() > 512 ) { char *xx=NULL;*xx=0; }
for ( int32_t i = 0 ; ! s_tableInit && i < 128 ; i++ ) {
char step = 0;
if ( i == TAG_TR ) step = 2;
if ( i == TAG_P ) step = 10;
if ( i == TAG_HR ) step = 10;
if ( i == TAG_H1 ) step = 10;
if ( i == TAG_H2 ) step = 10;
if ( i == TAG_H3 ) step = 10;
if ( i == TAG_H4 ) step = 10;
if ( i == TAG_H5 ) step = 10;
if ( i == TAG_H6 ) step = 10;
if ( i == TAG_TABLE ) step = 30;
if ( i == TAG_BLOCKQUOTE ) step = 10;
// default
if ( step == 0 ) {
if ( g_nodes[i].m_isBreaking ) step = 10;
else step = 1;
}
// account for both the back and the front tags
s_tab[i ] = step;
}
s_tableInit = true;
// google seems to index SEC_MARQUEE so i took that out of here
int32_t badFlags =SEC_SCRIPT|SEC_STYLE|SEC_SELECT|SEC_IN_TITLE;
@ -604,29 +574,13 @@ bool Matches::addMatches( Words *words, Phrases *phrases, Sections *sections, Bi
//else if (tids && (tids[i]&BACKBITCOMP) == TAG_A)
// inAnchTag = false;
// for each word increment distance
dist++;
//if ( addToMatches && tids && tids[i] ){
if ( tids && tids[i] ){
int32_t tid = tids[i] & BACKBITCOMP;
// accumulate distance
dist += s_tab[tid];
// monitor boundaries so that the proximity algo
// knows when two matches are separated by such tags
// MDW: isn't the "dist" good enough for this?????
// let's try just using "dist" then.
// "crossedSection" is hereby replaced by "dist".
//if ( s_tab[tid]
// tagIds don't have wids and are skipped
continue;
}
// skip if wid is 0, it is not an alnum word then
if ( ! wids[i] ) {
// and extra unit if it starts with \n i guess
if ( words->m_words[i][0] == '\n' ) dist++;
// dist += words->m_wordLens[i] / 3;
continue;
}
@ -859,7 +813,6 @@ bool Matches::addMatches( Words *words, Phrases *phrases, Sections *sections, Bi
m->m_sections = sections;
m->m_bits = bits;
m->m_pos = pos;
m->m_dist = dist;
m->m_flags = flags | eflag ;
// add to our vector. we want to know where each QueryWord

@ -62,10 +62,6 @@ class Match {
// "match group" or type of match. i.e. MF_TITLETAG, MF_METASUMM, ...
mf_t m_flags;
// improve summary generation parms
int32_t m_dist;
//bool m_crossedSection;
// . for convenience, these four class ptrs are used by Summary.cpp
// . m_wordNum is relative to this "words" class (and scores,bits,pos)
class Words *m_words;

27
Pos.cpp

@ -82,6 +82,8 @@ bool Pos::set (Words *words, char *f, char *fend, int32_t *len , int32_t a , int
// flag for stopping back-to-back spaces. only count those as one char.
bool lastSpace = false;
int32_t maxCharSize = 4; // we are utf8
int in_bad_tags = 0;
for ( int32_t i = a ; i < b ; i++ ) {
if (trunc) {
break;
@ -94,8 +96,26 @@ bool Pos::set (Words *words, char *f, char *fend, int32_t *len , int32_t a , int
// is tag?
if ( tids && tids[i] ) {
if ( f ) {
// let's not get from bad tags when filtering into buffer (used for generating summaries)
if ( ( tids[i] == TAG_STYLE ) || ( tids[i] == TAG_SCRIPT ) ) {
++in_bad_tags;
continue;
}
if ( in_bad_tags ) {
if ( ( ( tids[i] & BACKBITCOMP ) == TAG_STYLE ) ||
( ( tids[i] & BACKBITCOMP ) == TAG_SCRIPT ) ) {
--in_bad_tags;
}
}
}
// if not breaking, does nothing
if ( ! g_nodes[tids[i]&0x7f].m_isBreaking ) continue;
if ( ! g_nodes[tids[i]&0x7f].m_isBreaking ) {
continue;
}
// list tag? <li>
if ( tids[i] == TAG_LI ) {
if ( f ){
@ -153,6 +173,11 @@ bool Pos::set (Words *words, char *f, char *fend, int32_t *len , int32_t a , int
continue;
}
// skip words if we're in 'bad' tags
if ( in_bad_tags ) {
continue;
}
// scan through all chars discounting back-to-back spaces
// assume filters out to the same # of chars

@ -124,14 +124,7 @@ bool Sections::set ( Words *w ,
int32_t niceness ,
void *state ,
void (*callback)(void *state) ,
uint8_t contentType ,
// from XmlDoc::ptr_sectionsData in a title rec
char *sectionsData ,
bool sectionsDataValid ,
char *sectionsVotes ,
//uint64_t tagPairHash ,
char *buf ,
int32_t bufSize ) {
uint8_t contentType ) {
reset();
@ -1821,23 +1814,6 @@ bool Sections::set ( Words *w ,
///////////////////////////////////////
setMenus();
///////////////////////////////////////
//
// now set SENT_LIST flags on m_sentFlags
//
// try to capture sentences that are not menus but are a list of
// things. if the sentence itself has a list of short items, or a bunch
// of commas, then also set the SEC_LIST flag on it. or if sentence
// is part of a sequence of sentences that are a list of sentences then
// set it for them as well. typically such sentences will be separated
// by a vertical space, have no periods, maybe have an <li> tag or only
// have a few words per sentence. this will help us demote search results
// that have the query terms in such a list because it is usually not
// very useful information.
//
///////////////////////////////////////
setListFlags();
//verifySections();
// don't use nsvt/osvt now
@ -1856,16 +1832,6 @@ bool Sections::addImpliedSections ( Addresses *aa ) {
// no point in going any further if we have nothing
if ( m_numSections == 0 ) return true;
// set this
//m_osvt = osvt;
// as part of a replacement for table swoggling which is confusing
// and didn't really work right, especially when we had both
// table header row and column, we set these on each table cell:
// SEC_HASDATEHEADERROW and SEC_HASDATEHEADERCOL
if ( ! setTableStuff ( ) ) return false;
m_aa = aa;
@ -4060,7 +4026,6 @@ bool Sections::setSentFlagsPart2 ( ) {
inParens = false;
int32_t dollarCount = 0;
int32_t priceWordCount = 0;
bool hadAt = false;
// watchout if in a table. the last table column header
// should not be applied to the first table cell in the
@ -4435,8 +4400,7 @@ bool Sections::setSentFlagsPart2 ( ) {
isStopWord = true;
// count them
if ( isStopWord ) stops++;
// set this
if ( m_wids[i] == h_at ) hadAt = true;
// if we end on a stop word that is usually indicative
// of something like
// "Search Results for <h1>Doughnuts</h1>" as for
@ -5270,8 +5234,6 @@ int32_t hasTitleWords ( sentflags_t sflags ,
static int64_t h_tickets;
static int64_t h_events;
static int64_t h_jobs;
static int64_t h_this;
static int64_t h_series;
static int64_t h_total;
static int64_t h_times;
static int64_t h_purchase;
@ -5321,8 +5283,6 @@ int32_t hasTitleWords ( sentflags_t sflags ,
h_tickets = hash64n("tickets");
h_events = hash64n("events");
h_jobs = hash64n("jobs");
h_this = hash64n("this");
h_series = hash64n("series");
h_total = hash64n("total");
h_times = hash64n("times");
h_purchase = hash64n("purchase");
@ -6537,10 +6497,6 @@ int32_t hasTitleWords ( sentflags_t sflags ,
to_lower_a(wptrs[i][wlens[i]-1]) == 't' )
hadAthon = true;
//if ( wids[i] == h_this &&
// i+2<nw && wids[i+2] == h_series )
// log("hey");
// save it
int64_t savedWid = lastWid;
// assign
@ -7088,7 +7044,6 @@ int32_t Sections::addImpliedSections3 ( ) {
//bro = sk;
// assume no winner
int32_t bestScore = 0;
Section *bestBro = NULL;
char bestMethod = -1;
Partition *bestPart = NULL;
// loop over all enumerated methods
@ -7147,7 +7102,6 @@ int32_t Sections::addImpliedSections3 ( ) {
if ( score <= bestScore ) continue;
// is best of all methods so far?
bestScore = score;
bestBro = bro;
bestMethod = m;
bestPart = &parts[m];
}
@ -7186,11 +7140,9 @@ int32_t Sections::addImpliedSections3 ( ) {
Section *cc = m_sectionPtrs[a];
if ( cc && cc->m_a == a && cc->m_b == b ) continue;
// this returns false and sets g_errno on error
if ( ! insertSubSection( sk->m_parent ,
winnerPart->m_a[i],
winnerPart->m_b[i],
BH_IMPLIED ) )
if ( ! insertSubSection( winnerPart->m_a[i], winnerPart->m_b[i], BH_IMPLIED ) ) {
return -1;
}
}
// ok, flag it
@ -7214,8 +7166,7 @@ float computeSimilarity2 ( int32_t *vec0 ,
int32_t niceness ,
SafeBuf *pbuf ,
HashTableX *labelTable ,
int32_t nv0 ,
int32_t nv1 ) {
int32_t nv0 ) {
// if both empty, assume not similar at all
if ( *vec0 == 0 && *vec1 == 0 ) return 0;
// if either is empty, return 0 to be on the safe side
@ -7379,7 +7330,7 @@ int32_t Sections::getDelimScore ( Section *bro ,
// save it
Section *start = bro;
int32_t dh = getDelimHash ( method , delim , start );
int32_t dh = getDelimHash ( method , delim );
// bro must be certain type for some methods
if ( dh == -1 ) return -2;
@ -7395,8 +7346,6 @@ int32_t Sections::getDelimScore ( Section *bro ,
// sanity check... should all be brothers (same parent)
if ( delim->m_parent != container ) { char *xx=NULL;*xx=0; }
// the head section of a particular partition's section
Section *currDelim = bro;
// scores
int32_t brosWithWords = 0;
int32_t maxBrosWithWords = 0;
@ -7453,7 +7402,6 @@ int32_t Sections::getDelimScore ( Section *bro ,
// reset prev sentence
Section *prevSent = NULL;
Section *lastBro = NULL;
// scan the brothers
for ( ; ; bro = bro->m_nextBrother ) {
@ -7467,7 +7415,7 @@ int32_t Sections::getDelimScore ( Section *bro ,
// get its hash
int32_t h = 0LL ;
if ( bro ) h = getDelimHash ( method , bro , start );
if ( bro ) h = getDelimHash ( method , bro );
// . check this out
// . don't return 0 because we make a vector of these hashes
@ -7480,12 +7428,6 @@ int32_t Sections::getDelimScore ( Section *bro ,
// if first time, ignore crap above the first delimeter occurnc
if ( ignoreAbove ) continue;
// update this for insertSubSection()
lastBro = bro;
if ( h == dh )
currDelim = bro;
// count non delimeter sections. at least one section
// must have text and not be a delimeter section
if ( h != dh && bro && bro->m_firstWordPos >= 0 )
@ -7592,8 +7534,7 @@ int32_t Sections::getDelimScore ( Section *bro ,
m_niceness ,
pbuf ,
dbt ,
nva ,
nvb );
nva );
// add up all sims
if ( cellCount >= 2 ) { // ! firstTime ) {
simTotal += sim;
@ -7610,8 +7551,6 @@ int32_t Sections::getDelimScore ( Section *bro ,
if ( pbuf )
minBuf.safeMemcpy ( pbuf );
}
// a new head
//currDelim = bro;
// reset vht for next partition cell to call
// hashSentenceBits() into
vht.clear();
@ -8171,7 +8110,7 @@ bool Sections::hashSentPairs (Section *sx ,
// . don't return 0 because we make a vector of these hashes
// and computeSimilarity() assumes vectors are NULL term'd. return -1 instead
int32_t Sections::getDelimHash ( char method , Section *bro , Section *head ) {
int32_t Sections::getDelimHash ( char method , Section *bro ) {
// now all must have text!
//if ( bro->m_firstWordPos < 0 ) return -1;
@ -8370,31 +8309,6 @@ int32_t Sections::getDelimHash ( char method , Section *bro , Section *head ) {
// was not getting an implied section set, so let's do away
// with the pure dow algo and see what happens.
return -1;
// must be a sort of heading like "Jul 24"
//if ( !(bro->m_flags & SEC_HEADING_CONTAINER) &&
// !(bro->m_flags & SEC_HEADING ) )
// return -1;
if ( ! (bro->m_flags & SEC_HAS_DOW) )
return -1;
// this is causing core
if ( bro->m_tagId == TAG_TC ) return -1;
// now it must be all date words
int32_t a = bro->m_firstWordPos;
int32_t b = bro->m_lastWordPos;
// sanity check
if ( a < 0 ) { char *xx=NULL;*xx=0; }
// scan
for ( int32_t i = a ; i <= b ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
// skip if not wid
if ( ! m_wids[i] ) continue;
// must be in date
if ( ! ( m_bits->m_bits[i] & D_IS_IN_DATE ) )
return -1;
}
// do not collide with tagids
return 66666;
}
if ( method == METHOD_ABOVE_DOW ) {
// must be a sort of heading like "Jul 24"
@ -8503,10 +8417,6 @@ bool Sections::addSentenceSections ( ) {
static int64_t h_the;
static int64_t h_and;
static int64_t h_a;
static int64_t h_p;
static int64_t h_m;
static int64_t h_am;
static int64_t h_pm;
static int64_t h_http;
static int64_t h_https;
static int64_t h_room;
@ -8516,8 +8426,6 @@ bool Sections::addSentenceSections ( ) {
static int64_t h_suite;
static int64_t h_ste;
static int64_t h_tags;
//static int64_t h_noon;
//static int64_t h_midnight;
if ( ! s_init ) {
s_init = true;
h_tags = hash64n("tags");
@ -8525,10 +8433,6 @@ bool Sections::addSentenceSections ( ) {
h_the = hash64n("the");
h_and = hash64n("and");
h_a = hash64n("a");
h_p = hash64n("p");
h_m = hash64n("m");
h_am = hash64n("am");
h_pm = hash64n("pm");
h_a = hash64n("a");
h_at = hash64n("at");
h_for = hash64n("for");
@ -8549,8 +8453,6 @@ bool Sections::addSentenceSections ( ) {
h_building = hash64n("building");
h_suite = hash64n("suite");
h_ste = hash64n("ste");
//h_noon = hash64n("noon");
//h_midnight = hash64n("midnight");
}
// need D_IS_IN_URL bits to be valid
@ -8578,7 +8480,6 @@ bool Sections::addSentenceSections ( ) {
bool lastWasComma = false;
nodeid_t includedTag = -2;
int32_t lastbr = -1;
bool hasColon = false;
bool endOnBr = false;
bool endOnBold = false;
bool capped = true;
@ -8842,11 +8743,6 @@ bool Sections::addSentenceSections ( ) {
j == i )
break;
// flag it, but only if not in
// a time format like "8:30"
if ( j>0 && !is_digit(m_wptrs[j][-1]))
hasColon = true;
// a "::" is used in breadcrumbs,
// so break on that.
// fixes "Dining :: Visit ::
@ -8881,39 +8777,9 @@ bool Sections::addSentenceSections ( ) {
if ( tagBefore ) break;
if ( tagAfter ) break;
// for now allow it!
continue;
// do not break http://... though
if ( p[1] == '/' ) continue;
// or 10:30 etc.
if ( is_digit(p[1]) ) continue;
if ( j>0 && is_digit(p[-1]) ) continue;
// allow trumba titles to have colons
// so they can get the TSF_TITLEY
// event title boost in Events.cpp
if ( m_isTrumba &&
sp[j]->m_tagId == TAG_TITLE )
continue;
// fix guysndollsllc.com which has
// "Battle of the Bands with: The Cincy
// Rockers, Second Wind, ..."
// if last word was a lowercase
// and one of these, let it in the
// sentence
//if ( lastWidPos < 0 )
// break;
// must have been lowercase
if(!is_lower_utf8(m_wptrs[lastWidPos]))
break;
// and must be one of these words:
if ( prevWid == h_with ||
// "Send info to: Booking"
// from guysndollsllc.com/page4.ht
prevWid == h_to ||
prevWid == h_and )
continue;
// otherwise, break it
break;
}
// . special hyphen
// . breaks up title for peachpundit.com
@ -8961,28 +8827,12 @@ bool Sections::addSentenceSections ( ) {
// set "next" to next alnum word after us
int32_t next = j+1;
int64_t nwid = 0LL;
int32_t max = next + 10;
if ( max > m_nw ) max = m_nw;
for ( ; next < max ; next++ ) {
if ( ! m_wids[next] ) continue;
nwid = m_wids[next];
break;
}
// am. pm.
// if prev word was like 'm' as in am or pm
// then assume a cap word following ends sent.
// although if we got
// "At 1 p.m. Bob Jones plays"
// then we'd be wrong.
bool isAmPm = false;
if ( prevWid == h_m &&
(prevPrevWid == h_a ||
prevPrevWid == h_p ) )
isAmPm = true;
if ( (prevWid == h_am ||
prevWid == h_pm ) )
isAmPm = true;
// was previous word/abbr capitalized?
// if so, assume period does not end sentence.
@ -9206,7 +9056,7 @@ bool Sections::addSentenceSections ( ) {
if ( addb >= m_nw ) { char *xx=NULL;*xx=0; }
// ok, now add the split sentence
Section *is =insertSubSection(parent,adda,addb+1,bh);
Section *is =insertSubSection(adda,addb+1,bh);
// panic?
if ( ! is ) return false;
// set sentence flag on it
@ -9270,8 +9120,7 @@ bool Sections::addSentenceSections ( ) {
return true;
}
Section *Sections::insertSubSection ( Section *parentArg , int32_t a , int32_t b ,
int32_t newBaseHash ) {
Section *Sections::insertSubSection ( int32_t a, int32_t b, int32_t newBaseHash ) {
// debug
//log("sect: inserting subsection [%"INT32",%"INT32")",a,b);
@ -9373,11 +9222,11 @@ Section *Sections::insertSubSection ( Section *parentArg , int32_t a , int32_t b
m_numSections--;
char *xx=NULL;*xx=0;
return NULL;
sk->m_next = m_rootSection;//m_rootSection;
sk->m_prev = NULL;
//m_sections[0].m_prev = sk;
m_rootSection->m_prev = sk;
m_rootSection = sk;
// sk->m_next = m_rootSection;//m_rootSection;
// sk->m_prev = NULL;
// //m_sections[0].m_prev = sk;
// m_rootSection->m_prev = sk;
// m_rootSection = sk;
} else {
// insert us into the linked list of sections
if ( si->m_next ) si->m_next->m_prev = sk;
@ -9541,96 +9390,6 @@ Section *Sections::insertSubSection ( Section *parentArg , int32_t a , int32_t b
}
return sk;
// start scanning here
Section *start = parent->m_next;
int32_t lastb = -1;
// try just scanning sections in parent
for ( Section *sx = start ; sx ; sx = sx->m_next ) {
// breathe
QUICKPOLL ( m_niceness );
// get it
//Section *sx = &m_sections[xx];
// skip if section ends before our sentence begins
if ( sx->m_b <= a ) continue;
// stop if beyond sk
if ( sx->m_a >= b ) break;
// skip if sn not parent
if ( sx->m_parent != parent ) continue;
// when splitting a section do not reparent if
// not in our split...
//if ( sx->m_a >= b ) continue;
// do not reparent if it contains us
if ( sx->m_a <= a && sx->m_b >= b ) continue;
// reset his parent to the newly added section
sx->m_parent = sk;
// and or his flags into us. SEC_HAS_DOM, etc.
sk->m_flags |= sx->m_flags & mask;
// sanity check
if ( sx->m_b > sk->m_b ) { char *xy=NULL;*xy=0; }
if ( sx->m_a < sk->m_a ) { char *xy=NULL;*xy=0; }
// skip if already got the xor for this section
if ( sx->m_a < lastb ) continue;
// set this
lastb = sx->m_b;
// add all the entries from this child section from the
// phone/email/etc. tables
sk->m_phoneXor ^= sx->m_phoneXor;
sk->m_emailXor ^= sx->m_emailXor;
sk->m_priceXor ^= sx->m_priceXor;
sk->m_todXor ^= sx->m_todXor;
sk->m_dayXor ^= sx->m_dayXor;
sk->m_addrXor ^= sx->m_addrXor;
// make sure did not make it zero
if ( sx->m_phoneXor && sk->m_phoneXor == 0 )
sk->m_phoneXor = sx->m_phoneXor;
if ( sx->m_emailXor && sk->m_emailXor == 0 )
sk->m_emailXor = sx->m_emailXor;
if ( sx->m_priceXor && sk->m_priceXor == 0 )
sk->m_priceXor = sx->m_priceXor;
if ( sx->m_todXor && sk->m_todXor == 0 )
sk->m_todXor = sx->m_todXor;
if ( sx->m_dayXor && sk->m_dayXor == 0 )
sk->m_dayXor = sx->m_dayXor;
if ( sx->m_addrXor && sk->m_addrXor == 0 )
sk->m_addrXor = sx->m_addrXor;
// set this perhaps
if ( sk->m_firstPlaceNum < 0 )
sk->m_firstPlaceNum = sx->m_firstPlaceNum;
// update this?
if ( sx->m_alnumPosA < 0 ) continue;
// take the first one we get
if ( sk->m_alnumPosA == -1 )
sk->m_alnumPosA = sx->m_alnumPosA;
// update to the last one always
sk->m_alnumPosB = sx->m_alnumPosB;
}
// a flag
bool needsFirst = true;
// . set the words ptrs to it
// . TODO: can later speed up with ptr to ptr logic
for ( int32_t yy = a ; yy < b ; yy++ ) {
// breathe
QUICKPOLL ( m_niceness );
// and first/last word pos
if ( m_wids[yy] ) {
// mark this
if ( needsFirst ) {
sk->m_firstWordPos = yy;
needsFirst = false;
}
// remember last
sk->m_lastWordPos = yy;
}
// must have had sn as parent
if ( m_sectionPtrs[yy] != parent ) continue;
// "sk" becomes the new parent
m_sectionPtrs[yy] = sk;
}
return sk;
}
// for brbr and hr splitting delimeters
@ -9666,8 +9425,6 @@ int32_t Sections::splitSectionsByTag ( nodeid_t tagid ) {
for ( ; first->m_prevBrother ; first = first->m_prevBrother )
// breathe
QUICKPOLL(m_niceness);
// save parent
Section *parent = first->m_parent;
subloop:
// mark it
@ -9715,7 +9472,7 @@ int32_t Sections::splitSectionsByTag ( nodeid_t tagid ) {
// and must group together something meaningful
numTextSections >= 2 ) {
// do the insertion
Section *sk = insertSubSection (parent,a,b,BH_IMPLIED);
Section *sk = insertSubSection (a,b,BH_IMPLIED);
// error?
if ( ! sk ) return -1;
// fix it
@ -9792,7 +9549,7 @@ bool Sections::splitSections ( char *delimeter , int32_t dh ) {
//
// try this now
//
Section *sk = insertSubSection ( sn , start , i , dh );
Section *sk = insertSubSection ( start , i , dh );
// do not resplit this split section with same delimeter!!
if ( sk ) sk->m_processedHash = dh;
@ -9900,7 +9657,6 @@ SectionVotingTable::SectionVotingTable ( ) {
//bool Sections::gotSectiondbList ( bool *needsRecall ) {
bool SectionVotingTable::addListOfVotes ( RdbList *list,
key128_t **lastKey ,
uint32_t tagPairHash ,
int64_t myDocId ,
int32_t niceness ) {
@ -11296,27 +11052,6 @@ bool Sections::containsTagId ( Section *si, nodeid_t tagId ) {
return false;
}
bool Sections::setTableStuff ( ) {
return true;
}
// . does table have a date header row or column?
// . we need this for our weekly schedule detection algorithm
// . many sites have a row header this is the days of the week
// . sometimes they have tods in the first column, and sometimes they
// just put the tods and tod ranges in the table cells directly.
// . sets Section::m_flags SEC_HASDATEHEADERCOL/ROW for JUST the table
// section if it indeed has such date headers
// . Dates::isCompatible() looks at that table flag to see if it should
// apply special processing when deciding if two dates should be paired
// . then we set DF_TABLEDATEHEADERROW/COL for the dates in those
// header rows/cols so that we can set SF_RECURRING_DOW if the dow date
// was in the header row/col
bool Sections::setTableDateHeaders ( Section *ts ) {
return true;
}
// . just the voting info for passing into diffbot in json
// . along w/ the title/summary/etc. we can return this json blob for each search result
bool Sections::printVotingInfoInJSON ( SafeBuf *sb ) {
@ -11354,10 +11089,8 @@ bool Sections::print2 ( SafeBuf *sbuf ,
char *diversityVec,
char *wordSpamVec,
char *fragVec,
HashTableX *st2 ,
HashTableX *tt ,
Addresses *aa ,
char format ) { // bool forProCog ){
char format ) {
//FORMAT_PROCOG FORMAT_JSON HTML
//sbuf->safePrintf("<b>Sections in Document</b>\n");
@ -11367,10 +11100,6 @@ bool Sections::print2 ( SafeBuf *sbuf ,
m_sbuf->setLabel ("sectprnt");
//m_pt = pt;
//m_et = et;
//m_at = at;
//m_priceTable = priceTable;
m_aa = aa;
m_hiPos = hiPos;
@ -11991,7 +11720,6 @@ bool Sections::setRegistrationBits ( ) {
static int64_t h_request;
static int64_t h_requesting;
static int64_t h_get;
static int64_t h_enroll;
static int64_t h_buy;
static int64_t h_presale ;
static int64_t h_pre ;
@ -12004,7 +11732,6 @@ bool Sections::setRegistrationBits ( ) {
static int64_t h_box; // box office for newmexicojazzfestival.org
static int64_t h_office;
static int64_t h_ticket;//ticket window for newmexicojazzfestival.org
static int64_t h_online;
static int64_t h_window;
static int64_t h_patron;
static int64_t h_service;
@ -12043,7 +11770,6 @@ bool Sections::setRegistrationBits ( ) {
h_requesting = hash64n("requesting");
h_request = hash64n("request");
h_get = hash64n("get");
h_enroll = hash64n("enroll");
h_buy = hash64n("buy");
h_presale = hash64n("presale");
h_pre = hash64n("pre");
@ -12069,7 +11795,6 @@ bool Sections::setRegistrationBits ( ) {
h_box = hash64n("box");
h_office = hash64n("office");
h_ticket = hash64n("ticket");
h_online = hash64n("online");
h_window = hash64n("window");
h_patron = hash64n("patron");
h_service = hash64n("service");
@ -12186,7 +11911,6 @@ bool Sections::setRegistrationBits ( ) {
if ( wid == h_sign && nextWid == h_up ) gotIt = 1;
if ( wid == h_signup ) gotIt = 1;
if ( wid == h_buy && nextWid == h_ticket ) gotIt = 1;
//if ( wid == h_buy && nextWid == h_online ) gotIt = 1;
if ( wid == h_purchase&&nextWid==h_ticket ) gotIt = 1;
if ( wid == h_get && nextWid==h_ticket ) gotIt = 1;
// for that jimmy kimmel live url "requesting tickets online"
@ -13102,100 +12826,95 @@ bool Sectiondb::verify ( char *coll ) {
return true;
}
bool Sections::setListFlags ( ) {
return true;
}
bool Sections::growSections ( ) {
// make a log note b/c this should not happen a lot because it's slow
log("build: growing sections!");
g_errno = EDOCBADSECTIONS;
return true;
// record old buf start
char *oldBuf = m_sectionBuf.getBufStart();
// grow by 20MB at a time
if ( ! m_sectionBuf.reserve ( 20000000 ) ) return false;
// for fixing ptrs:
char *newBuf = m_sectionBuf.getBufStart();
// set the new max
m_maxNumSections = m_sectionBuf.getCapacity() / sizeof(Section);
// update ptrs in the old sections
for ( int32_t i = 0 ; i < m_numSections ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
Section *si = &m_sections[i];
if ( si->m_parent ) {
char *np = (char *)si->m_parent;
np = np - oldBuf + newBuf;
si->m_parent = (Section *)np;
}
if ( si->m_next ) {
char *np = (char *)si->m_next;
np = np - oldBuf + newBuf;
si->m_next = (Section *)np;
}
if ( si->m_prev ) {
char *np = (char *)si->m_prev;
np = np - oldBuf + newBuf;
si->m_prev = (Section *)np;
}
if ( si->m_listContainer ) {
char *np = (char *)si->m_listContainer;
np = np - oldBuf + newBuf;
si->m_listContainer = (Section *)np;
}
if ( si->m_prevBrother ) {
char *np = (char *)si->m_prevBrother;
np = np - oldBuf + newBuf;
si->m_prevBrother = (Section *)np;
}
if ( si->m_nextBrother ) {
char *np = (char *)si->m_nextBrother;
np = np - oldBuf + newBuf;
si->m_nextBrother = (Section *)np;
}
if ( si->m_sentenceSection ) {
char *np = (char *)si->m_sentenceSection;
np = np - oldBuf + newBuf;
si->m_sentenceSection = (Section *)np;
}
if ( si->m_prevSent ) {
char *np = (char *)si->m_prevSent;
np = np - oldBuf + newBuf;
si->m_prevSent = (Section *)np;
}
if ( si->m_nextSent ) {
char *np = (char *)si->m_nextSent;
np = np - oldBuf + newBuf;
si->m_nextSent = (Section *)np;
}
if ( si->m_tableSec ) {
char *np = (char *)si->m_tableSec;
np = np - oldBuf + newBuf;
si->m_tableSec = (Section *)np;
}
if ( si->m_headColSection ) {
char *np = (char *)si->m_headColSection;
np = np - oldBuf + newBuf;
si->m_headColSection = (Section *)np;
}
if ( si->m_headRowSection ) {
char *np = (char *)si->m_headRowSection;
np = np - oldBuf + newBuf;
si->m_headRowSection = (Section *)np;
}
if ( si->m_leftCell ) {
char *np = (char *)si->m_leftCell;
np = np - oldBuf + newBuf;
si->m_leftCell = (Section *)np;
}
if ( si->m_aboveCell ) {
char *np = (char *)si->m_aboveCell;
np = np - oldBuf + newBuf;
si->m_aboveCell = (Section *)np;
}
}
return true;
// // record old buf start
// char *oldBuf = m_sectionBuf.getBufStart();
// // grow by 20MB at a time
// if ( ! m_sectionBuf.reserve ( 20000000 ) ) return false;
// // for fixing ptrs:
// char *newBuf = m_sectionBuf.getBufStart();
// // set the new max
// m_maxNumSections = m_sectionBuf.getCapacity() / sizeof(Section);
// // update ptrs in the old sections
// for ( int32_t i = 0 ; i < m_numSections ; i++ ) {
// // breathe
// QUICKPOLL(m_niceness);
// Section *si = &m_sections[i];
// if ( si->m_parent ) {
// char *np = (char *)si->m_parent;
// np = np - oldBuf + newBuf;
// si->m_parent = (Section *)np;
// }
// if ( si->m_next ) {
// char *np = (char *)si->m_next;
// np = np - oldBuf + newBuf;
// si->m_next = (Section *)np;
// }
// if ( si->m_prev ) {
// char *np = (char *)si->m_prev;
// np = np - oldBuf + newBuf;
// si->m_prev = (Section *)np;
// }
// if ( si->m_listContainer ) {
// char *np = (char *)si->m_listContainer;
// np = np - oldBuf + newBuf;
// si->m_listContainer = (Section *)np;
// }
// if ( si->m_prevBrother ) {
// char *np = (char *)si->m_prevBrother;
// np = np - oldBuf + newBuf;
// si->m_prevBrother = (Section *)np;
// }
// if ( si->m_nextBrother ) {
// char *np = (char *)si->m_nextBrother;
// np = np - oldBuf + newBuf;
// si->m_nextBrother = (Section *)np;
// }
// if ( si->m_sentenceSection ) {
// char *np = (char *)si->m_sentenceSection;
// np = np - oldBuf + newBuf;
// si->m_sentenceSection = (Section *)np;
// }
// if ( si->m_prevSent ) {
// char *np = (char *)si->m_prevSent;
// np = np - oldBuf + newBuf;
// si->m_prevSent = (Section *)np;
// }
// if ( si->m_nextSent ) {
// char *np = (char *)si->m_nextSent;
// np = np - oldBuf + newBuf;
// si->m_nextSent = (Section *)np;
// }
// if ( si->m_tableSec ) {
// char *np = (char *)si->m_tableSec;
// np = np - oldBuf + newBuf;
// si->m_tableSec = (Section *)np;
// }
// if ( si->m_headColSection ) {
// char *np = (char *)si->m_headColSection;
// np = np - oldBuf + newBuf;
// si->m_headColSection = (Section *)np;
// }
// if ( si->m_headRowSection ) {
// char *np = (char *)si->m_headRowSection;
// np = np - oldBuf + newBuf;
// si->m_headRowSection = (Section *)np;
// }
// if ( si->m_leftCell ) {
// char *np = (char *)si->m_leftCell;
// np = np - oldBuf + newBuf;
// si->m_leftCell = (Section *)np;
// }
// if ( si->m_aboveCell ) {
// char *np = (char *)si->m_aboveCell;
// np = np - oldBuf + newBuf;
// si->m_aboveCell = (Section *)np;
// }
// }
// return true;
}

@ -33,8 +33,6 @@
// . these are descriptive flags, they are computed when Sections is set
// . SEC_NOTEXT sections do not vote, i.e. they are not stored in Sectiondb
#define SEC_NOTEXT 0x0001 // implies section has no alnum words
//#define SEC_ARTICLE 0x0002 // section is SV_UNIQUE and SV_TEXTY
//#define SEC_DUP 0x0004 // content hash repeated on same site
// . Weights.cpp zeroes out the weights for these types of sections
// . is section delimeted by the <script> tag, <marquee> tag, etc.
@ -43,9 +41,6 @@
#define SEC_SELECT 0x0020
#define SEC_MARQUEE 0x0040
#define SEC_CONTAINER 0x0080
// . is section in anchor text
// . is section delimeted by the <a href...> tag
//#define SEC_A 0x0080
// . in title/header. for gigabits in XmlDoc.cpp
// . is section delemited by <title> or <hN> tags?
@ -69,8 +64,6 @@
#define SEC_HEADING 0x200000
// reasons why a section is not an event
//#define SEC_MULT_PLACES 0x008000
//#define SEC_IS_MENUITEM 0x00040000 // in a list of menu items?
#define SEC_UNBALANCED 0x00400000 // interlaced section/tags
#define SEC_OPEN_ENDED 0x00800000 // no closing tag found
#define SEC_SENTENCE 0x01000000 // made by a sentence?
@ -115,26 +108,11 @@
#define SEC_MULTIDIMS 0x0008000000000000LL
#define SEC_HASHXPATH 0x0010000000000000LL
//#define SEC_HAS_ADDRESS 0x08000000
//#define SEC_ADDRESS_CONTAINER 0x40000000
//#define SEC_HAS_STOREHOURS 0x01000000 // event is really just store hours
//#define SEC_HAS_NONSTOREHOURS 0x02000000
//#define SEC_HAS_NON_EVENT_DATE 0x04000000
// . some random-y numbers for Section::m_baseHash
// . used by splitSection() function
//#define BH_BR -1113348753
//#define BH_BRBR 3947503
//#define BH_HR 1378153634
//#define BH_H1 -1788814047
//#define BH_H2 -1170023066
//#define BH_H3 -132582659
//#define BH_H4 2095609929
#define BH_BULLET 7845934
#define BH_SENTENCE 4590649
#define BH_IMPLIED 95468323
//#define BH_IMPLIED_LIST 9434499
// values for Section::m_sentFlags (sentence flags)
#define SENT_HAS_COLON 0x00000001
@ -342,12 +320,7 @@ public:
// used by Events.cpp to count # of timeofdays in section
//class Event *m_event;
// for Events class
//uint8_t m_numAddresses;
//class Address *m_address;
// for Events class, usually streets!
//uint8_t m_numPlaces;
//class Place *m_place;
class Addresses *m_aa;
// . if we are an element in a list, what is the list container section
@ -357,10 +330,6 @@ public:
// . used to set SEC_HAS_MENUBROTHER flag
class Section *m_listContainer;
// if we are a header, of what list are we a header of?
//class Section *m_headerOfList;
// the sibling section before/after us. can be NULL.
class Section *m_prevBrother;
class Section *m_nextBrother;
@ -453,30 +422,9 @@ public:
// for debug output display of color coded nested sections
uint32_t m_colorHash;
// like tag hash but only the tag ids, no hashed attributes or
// virtual section base hashes
//int32_t m_formatHash;
// tagid of this section, 0 means none (like sentence section, etc.)
nodeid_t m_tagId;
/*
// used by addImpliedSections()
int32_t getBaseHash2 ( ) {
// fix for funkefiredarts.com since one of the header tags
// has a different tag attribute, but it says "Monday". so
// treat all these special headers the same since it is
// critical we get these type of implied sections right, lest
// we hurt our date telscoping.
if ( m_flags & SEC_HAS_DOM_DOW ) return 22222;
if ( m_flags&SEC_HEADING_CONTAINER) return m_baseHash^0x789123;
else return m_baseHash;
};
*/
//int32_t getBaseHash3 ();
// usually just the m_tagId, but hashes in the class attributes of
// div and span tags, etc. to make them unique
uint32_t m_baseHash;
@ -491,25 +439,12 @@ public:
// these deal with enumertated tags and are used by Events.cpp
int32_t m_occNum;
int32_t m_numOccurences;
// section with same m_tagHash and before you
//class Section *m_prevSibling;
// used by XmlDoc.cpp to set a topological distance
int32_t m_topDist;
//int32_t m_sortedIndex;
// all the parent tags are enumerated, but the kid (youngest tag)
// is not enumerated
//int32_t m_enumTagHash;
// . tag hash which disregards non-breaking or tags with no back tags
// . used by Events.cpp
//int32_t m_hardTagHash;
// hash of all the alnum words DIRECTLY in this section
uint64_t m_contentHash64;
// if section contains words indirectly, then store xor'ed wids in here
//int32_t m_contentHash2;
uint64_t m_sentenceContentHash64;
@ -523,12 +458,6 @@ public:
// uses m_sentenceContentHash64 (for sentences)
uint64_t m_indirectSentHash64;
// for voting! we basically ignore numbers and dates, months, etc.
// for doing this hash so that if the date changes from page to page
// it will still be recognized as a "dup section" and m_votesForDup
// should be high
//uint32_t m_voteHash32;
// . range of words in Words class we encompass
// . m_wordStart and m_wordEnd are the tag word #'s
// . ACTUALLY it is a half-closed interval [a,b) like all else
@ -539,31 +468,12 @@ public:
int32_t m_b;//wordEnd;
// for event titles and descriptions
//float m_titleScore;
//float m_descScore;
//titleflags_t m_titleFlags;
sentflags_t m_sentFlags;
// bits set based on turk votes. see the TB_* bits in XmlDoc.h
//turkbits_t m_turkBits;
// alnum count for us and all sections we contain
//int32_t m_alnumCount;
// . # alnum words only in this and only this section
// . if we have none, we are SEC_NOTEXT
int32_t m_exclusive;
// like above, but word must also NOT be in a hyperlink
//int32_t m_plain;
// Address.cpp uses this
//char m_numBackToBackSubsections;
//nodeid_t m_lastTid;
// # of times this section appears in this doc
//int32_t m_totalOccurences;
// our depth. # of tags in the hash
int32_t m_depth;
@ -574,61 +484,15 @@ public:
int32_t m_mark;
// Events.cpp assigns a date to each section
//int32_t m_fullDate;
//class Date *m_datePtr;
int32_t m_firstDate;
char m_used;
//int32_t m_numTods;
// the event section we contain. used by Events.cpp
//class Section *m_eventSec;
// used by Events.cpp for determining what range of events a section
// contains. we store that range in Events::hash() when we index each
// word into datedb for events.
//int32_t m_minEventId;
//int32_t m_maxEventId;
// used in Sections::splitSections() function
int32_t m_processedHash;
int32_t m_gbFrameNum;
// . support event ids from 0 to 255
// . this increases the sizeof this class from 160 to 192 bytes
//char m_evIdBits[32];
// how many bits in the above array are set?
//int16_t m_numEventIdBits;
/*
bool hasEventId ( int32_t evId ) {
// this is an overflow condition...
if ( evId > 255 ) return false;
// -1 or 0 means not associated with any event id since
// all eventIds are >= 1
if ( m_minEventId <= 0 ) return false;
if ( evId < m_minEventId ) return false;
if ( evId > m_maxEventId ) return false;
unsigned char bitMask = 1 << (evId % 8);
return m_evIdBits[evId/8] & bitMask;
};
void addEventId ( int32_t eid ) {
if ( eid >= 256 ) return;
unsigned char bitMask = 1 << (eid % 8);
unsigned char byteOff = eid / 8;
if ( m_evIdBits[byteOff] & bitMask ) return;
m_evIdBits[byteOff] |= bitMask;
m_numEventIdBits++;
if ( m_minEventId <= 0 || m_minEventId > eid )
m_minEventId = eid;
if ( m_maxEventId <= 0 || m_maxEventId < eid )
m_maxEventId = eid;
};
*/
// do we contain section "arg"?
bool contains ( class Section *arg ) {
return ( m_a <= arg->m_a && m_b >= arg->m_b ); };
@ -675,12 +539,7 @@ class Sections {
int32_t niceness ,
void *state ,
void (*callback)(void *state) ,
uint8_t contentType ,
char *sectionsData,
bool sectionsDataValid ,
char *sectionsData2,
char *buf ,
int32_t bufSize ) ;
uint8_t contentType ) ;
bool addVotes(class SectionVotingTable *nsvt, uint32_t tagPairHash );
@ -721,17 +580,10 @@ class Sections {
char *diversityVec,
char *wordSpamVec,
char *fragVec,
class HashTableX *st2 ,
class HashTableX *tt ,
class Addresses *aa ,
char format = FMT_HTML ); // bool forProCog );
char format = FMT_HTML );
bool printSectionDiv ( class Section *sk , char format = FMT_HTML );
//bool forProCog = false ) ;
class SafeBuf *m_sbuf;
//class HashTableX *m_pt;
//class HashTableX *m_et;
//class HashTableX *m_at;
//class HashTableX *m_priceTable;
char *getSectionsReply ( int32_t *size );
char *getSectionsVotes ( int32_t *size );
@ -739,13 +591,10 @@ class Sections {
bool isHardSection ( class Section *sn );
bool setMenus ( );
bool setListFlags ( );
bool setFormTableBits ( ) ;
bool setTableRowsAndCols ( class Section *tableSec ) ;
bool setTableHeaderBits ( class Section *table );
bool setTableStuff ( ) ;
bool setTableDateHeaders ( class Section *ts ) ;
bool setTableScanPtrs ( class Section *ts ) ;
void setHeader ( int32_t r , class Section *first , sec_t flag ) ;
@ -765,7 +614,6 @@ class Sections {
class Url *m_url ;
int64_t m_docId ;
int64_t m_siteHash64 ;
//int64_t m_tagPairHash;
char *m_coll ;
void *m_state ;
void (*m_callback) ( void *state );
@ -797,7 +645,6 @@ class Sections {
bool m_waitInLine;
int32_t m_articleStartWord;
int32_t m_articleEndWord;
//int32_t m_totalSimilarLayouts;
bool m_hadArticle;
int32_t m_numInvalids;
int32_t m_totalSiteVoters;
@ -848,13 +695,6 @@ class Sections {
int32_t m_numSentenceSections;
// . the section ptrs sorted by Section::m_a
// . since we set SEC_FAKE from splitSections() those new sections
// are appended on m_sections[] array and are out of order, so
// we merge sort the two sublists of m_sections[] and put the
// pointers into here...
//class Section **m_sorted;
bool m_isTestColl;
// assume no malloc
@ -870,15 +710,8 @@ class Sections {
char **m_wptrs;
nodeid_t *m_tids;
//int32_t addImpliedSections ( bool needHR );
//int32_t addHeaderImpliedSections ( );
//int32_t addImpliedSectionsOld ( );
//int32_t getHeadingScore ( class Section *sk , int32_t baseHash );
// the new way
bool addImpliedSections ( class Addresses *aa );//, HashTableX *svt );
//HashTableX *m_svt;
bool setSentFlagsPart1 ( );
bool setSentFlagsPart2 ( );
@ -899,10 +732,7 @@ class Sections {
char method,
class Section *delim ,
class Partition *part );
int32_t getDelimHash ( char method , class Section *bro ,
class Section *head ) ;
//int32_t m_totalHdrCount;
//bool m_called;
int32_t getDelimHash ( char method , class Section *bro ) ;
bool addImpliedLists ( ) ;
int32_t getDelimScore2 ( class Section *bro,
@ -926,10 +756,7 @@ class Sections {
bool addSentenceSections ( ) ;
class Section *insertSubSection ( class Section *parent ,
int32_t a ,
int32_t b ,
int32_t newBaseHash ) ;
class Section *insertSubSection ( int32_t a, int32_t b, int32_t newBaseHash ) ;
int32_t splitSectionsByTag ( nodeid_t tagid ) ;
bool splitSections ( char *delimeter , int32_t dh );
@ -1040,7 +867,6 @@ class SectionVotingTable {
// stock table from a sectiondb rdblist
bool addListOfVotes ( RdbList *list,
key128_t **lastKey ,
uint32_t tagPairHash ,
int64_t docId ,
int32_t niceness ) ;
@ -1105,26 +931,7 @@ class SectionVotingTable {
#define SV_EURDATEFMT 3 // DateParse2.cpp. contains european date fmt
#define SV_EVENT 4 // used in Events.cpp to indicate event container
#define SV_ADDRESS 5 // used in Events.cpp to indicate address container
// . place types here
// . these #define's are used for values of Place::m_type in Events.cpp too!
// . score is from 0 to 1.0 which is probability section is a place container
// for the specified place type
// . used by Events.cpp for address extraction
/*
#define SV_PLACE_NAME_1 7 // places now have two names
#define SV_PLACE_NAME_2 8 // places now have two names
#define SV_PLACE_STREET 9
#define SV_PLACE_CITY 10
#define SV_PLACE_ZIP 11
#define SV_PLACE_SUITE 12
#define SV_PLACE_ADM1 13
#define SV_PLACE_ADM2 14
#define SV_PLACE_ADM3 15
#define SV_PLACE_ADM4 16
#define SV_PLACE_CTRY 17
#define SV_PLACE_SCH 18
#define SV_PLACE_PRK 19
*/
// . HACK: the "date" is not the enum tag hash, but is the tagPairHash for this
// . every doc has just one of these describing the entire layout of the page
// . basically looking for these is same as doing a gbtaghash: query
@ -1133,25 +940,11 @@ class SectionVotingTable {
// . this allows us to detect a duplicate section even though the layout
// of the web page is not quite the same, but is from the same site
#define SV_TAGCONTENTHASH 21
// . HACK: a statistic
// . the voter that had the max SectionVote::m_numSampled
// . the m_numSampled for this statistic is his m_numSampled
// . if we find that a section is not unique (i.e. repeated) on just one
// voting document, then we think it is probably a comment and we do not
// set the SEC_ARTICLE flag for that section
//#define SV_TEXTY_MAX_SAMPLED 22
// . HACK: the "date" is not the enum tag hash, but is the tagPairHash!
// . indicates this doc is waiting in line for enough docs from its site
// with the same page layout (tagpairhash) to become indexed so that it can
// make an informed decision in regards to eliminating comment sections
// and determining article sections
//#define SV_WAITINLINE 23
// now Dates.cpp sets these too
#define SV_FUTURE_DATE 24
#define SV_PAST_DATE 25
#define SV_CURRENT_DATE 26
//#define SV_DUP 27
//#define SV_NOT_DUP 28
#define SV_SITE_VOTER 29
#define SV_TURKTAGHASH 30

@ -5424,13 +5424,7 @@ Sections *XmlDoc::getExplicitSections ( ) {
m_niceness ,
m_masterState , // state
m_masterLoop , // callback
*ct ,
NULL , // sd // sections data
true , // sections data valid?
NULL , // sv // for m_nsvt
//*tph ,
NULL , // buf
0 )) { // bufSize
*ct )) {
m_calledSections = true;
// sanity check, this should not block, we are setting
// exclusively from the titleRec
@ -6438,7 +6432,6 @@ SectionVotingTable *XmlDoc::getOldSectionVotingTable ( ) {
// occurs in the document.
if ( ! m_osvt.addListOfVotes(&m_secdbList,
&lastKey,
*tph,
*d , // docid
m_niceness))
return NULL;
@ -30703,12 +30696,7 @@ SafeBuf *XmlDoc::getSampleForGigabitsJSON ( ) {
m_niceness ,
NULL,//m_masterState , // state
NULL,//m_masterLoop , // callback
CT_JSON, // *ct ,
NULL , // sd // sections data
true , // sections data valid?
NULL , // sv // for m_nsvt
NULL , // buf
0 )) { // bufSize
CT_JSON )) {
return NULL;
}
@ -34963,8 +34951,6 @@ bool XmlDoc::printRainbowSections ( SafeBuf *sb , HttpRequest *hr ) {
diversityVec,
wordSpamVec,
fragVec,
NULL,
NULL ,
&m_addresses ,
true );
return true;

@ -8806,13 +8806,7 @@ bool parseTest ( char *coll , int64_t docId , char *query ) {
// do not supply xd so it will be set from scratch
if ( ! sections.set (&words,&phrases,&bits,NULL,0,0,
NULL,0,NULL,NULL,
0, // contenttype
NULL, // sectionsdata
false, // sectionsdatavalid
NULL, // sectionsdata2
//0, // tagpairhash
NULL, // buf
0)) // bufSize
0))
return log("build: speedtestxml: sections set: %s",
mstrerror(g_errno));