Merge branch 'master' of github.com:privacore/open-source-search-engine

This commit is contained in:
Ivan Skytte Jørgensen
2018-02-09 14:34:10 +01:00
9 changed files with 21 additions and 39 deletions

3
Jenkinsfile vendored

@ -104,8 +104,7 @@ pipeline {
post {
always {
sh "cd ${env.PYWEBTEST_DIR} && ./shutdown_instances.py --num-instances=1 --num-shards=1 --offset=0"
sh "cd ${env.PYWEBTEST_DIR} && ./shutdown_instances.py --num-instances=4 --num-shards=2 --offset=1"
sh "killall -u \$(whoami) -s SIGINT gb"
}
changed {

@ -705,7 +705,7 @@ bool processLoop ( void *state ) {
return sendErrorReply ( st , g_errno );
}
if ( ! ww.set ( &xml, true ) ) {
if ( ! ww.set ( &xml ) ) {
return sendErrorReply ( st , g_errno );
}

@ -80,7 +80,6 @@ SiteGetter::SiteGetter ( ) {
m_collnum = 0;
m_state = NULL;
m_callback = NULL;
m_sitePathDepth = 0;
m_pathDepth = 0;
m_maxPathDepth = 0;
m_niceness = 0;
@ -122,9 +121,6 @@ bool SiteGetter::getSite ( const char *url, TagRec *gr, int32_t timestamp, colln
m_allDone = false;
// set this to unknown for now
m_sitePathDepth = -1;
// reset this just in case
g_errno = 0;
@ -153,12 +149,6 @@ bool SiteGetter::getSite ( const char *url, TagRec *gr, int32_t timestamp, colln
// do not add to tagdb and do not block!
m_state = NULL;
// . use a sitepathdepth of -1 by default then, until host #0
// has a chance to evaluate
// . a sitepathdepth of -1 means to use the full hostname
// as the site
m_sitePathDepth = -1;
// sanity check, should not block since m_state is NULL
if ( ! setSite () ) { g_process.shutdownAbort(true); }
@ -346,11 +336,6 @@ bool SiteGetter::gotSiteList ( ) {
const char *pend = getPathEnd(m_url, m_pathDepth);
const char *host = getHostFast( m_url, NULL );
log(LOG_INFO,"site: '%.*s' detected as a site with linkcount=~%d", (int)(pend-host), host, count);
// ok, i guess this indicates we have a subsite level
m_sitePathDepth = m_pathDepth;
// this basically means none!
if ( m_pathDepth >= m_maxPathDepth ) m_sitePathDepth = -1;
// . sets m_site and m_siteLen from m_url
// . this returns false if blocked, true otherwise

@ -44,8 +44,6 @@ private:
void (*m_callback) (void *state );
RdbList m_list;
int32_t m_sitePathDepth;
// use Msg0 for getting the no-split termlist that combines
// gbpathdepth: with the site hash in a single termid
Msg0 m_msg0;

@ -990,7 +990,7 @@ bool Xml::getTagContent( const char *fieldName, const char *fieldContent, char *
s[len] = saved;
}
if ( ( !wp.set(&xml, true) ) ) {
if ( ( !wp.set(&xml) ) ) {
// unable to allocate buffer
return false;
}

@ -3860,7 +3860,7 @@ Words *XmlDoc::getWords ( ) {
int64_t start = logQueryTimingStart();
// now set what we need
if ( !m_words.set( xml, true ) ) {
if ( !m_words.set( xml ) ) {
return NULL;
}
@ -4623,7 +4623,7 @@ int32_t *XmlDoc::getSummaryVector ( ) {
// word-ify it
Words words;
if ( ! words.set ( sb.getBufStart() , true ) ) {
if ( ! words.set ( sb.getBufStart() ) ) {
return NULL;
}
@ -9472,7 +9472,7 @@ char **XmlDoc::getFilteredContent ( ) {
xml.set(m_content, m_contentLen, m_version, *ct);
Words words;
words.set(&xml, true);
words.set(&xml);
if (words.getNumAlnumWords() > g_conf.m_spiderFilterableMaxWordCount) {
logTrace( g_conf.m_logTraceXmlDoc, "END. HTML and getNumAlnumWords too high");
return &m_filteredContent;

@ -4899,7 +4899,7 @@ static bool parseTest(const char *coll, int64_t docId, const char *query) {
t = gettimeofdayInMilliseconds();
for ( int32_t i = 0 ; i < 100 ; i++ )
if ( ! words.set ( &xml , true ) ) {
if ( ! words.set ( &xml ) ) {
log(LOG_WARN, "build: speedtestxml: words set: %s", mstrerror(g_errno));
return false;
}
@ -4912,7 +4912,7 @@ static bool parseTest(const char *coll, int64_t docId, const char *query) {
t = gettimeofdayInMilliseconds();
for ( int32_t i = 0 ; i < 100 ; i++ )
if ( ! words.set ( &xml , true ) ) {
if ( ! words.set ( &xml ) ) {
log(LOG_WARN, "build: speedtestxml: words set: %s", mstrerror(g_errno));
return false;
}
@ -4926,7 +4926,7 @@ static bool parseTest(const char *coll, int64_t docId, const char *query) {
t = gettimeofdayInMilliseconds();
for ( int32_t i = 0 ; i < 100 ; i++ )
if ( ! words.set ( content , true ) ) {
if ( ! words.set ( content ) ) {
log(LOG_WARN, "build: speedtestxml: words set: %s", mstrerror(g_errno));
return false;
}
@ -4940,7 +4940,7 @@ static bool parseTest(const char *coll, int64_t docId, const char *query) {
Pos pos;
// computeWordIds from xml
words.set ( &xml , true ) ;
words.set ( &xml ) ;
t = gettimeofdayInMilliseconds();
for ( int32_t i = 0 ; i < 100 ; i++ )
if ( ! pos.set ( &words ) ) {
@ -4957,7 +4957,7 @@ static bool parseTest(const char *coll, int64_t docId, const char *query) {
Bits bits;
// computeWordIds from xml
words.set ( &xml , true ) ;
words.set ( &xml ) ;
t = gettimeofdayInMilliseconds();
for ( int32_t i = 0 ; i < 100 ; i++ )
if ( ! bits.setForSummary ( &words ) ) {
@ -4974,7 +4974,7 @@ static bool parseTest(const char *coll, int64_t docId, const char *query) {
Sections sections;
// computeWordIds from xml
words.set ( &xml , true ) ;
words.set ( &xml ) ;
bits.set(&words);
t = gettimeofdayInMilliseconds();
for ( int32_t i = 0 ; i < 100 ; i++ )
@ -5029,7 +5029,7 @@ static bool parseTest(const char *coll, int64_t docId, const char *query) {
log(LOG_WARN, "build: speedtestxml: getText: %s", mstrerror(g_errno));
return false;
}
if ( ! words.set ( buf,true) ) {
if ( ! words.set ( buf ) ) {
log(LOG_WARN, "build: speedtestxml: words set: %s", mstrerror(g_errno));
return false;
}
@ -5047,7 +5047,7 @@ static bool parseTest(const char *coll, int64_t docId, const char *query) {
Query q;
q.set2(query, langUnknown, 1.0, 1.0, NULL, false, true, ABS_MAX_QUERY_TERMS);
matches.setQuery ( &q );
words.set ( &xml , true ) ;
words.set ( &xml ) ;
t = gettimeofdayInMilliseconds();
for ( int32_t i = 0 ; i < 100 ; i++ ) {
matches.reset();

@ -29,7 +29,7 @@ TEST( PosTest, FilterAllCaps ) {
Pos pos;
char buf[MAX_BUF_SIZE];
ASSERT_TRUE( words.set( const_cast<char*>(input_strs[i]), true ) );
ASSERT_TRUE( words.set( const_cast<char*>(input_strs[i]) ) );
int32_t len = pos.filter( &words, 0, words.getNumWords(), false, buf, buf + MAX_BUF_SIZE );
@ -110,7 +110,7 @@ TEST( PosTest, FilterEnding ) {
Pos pos;
char buf[MAX_BUF_SIZE];
ASSERT_TRUE( words.set( const_cast<char*>(input_strs[i]), true ) );
ASSERT_TRUE( words.set( const_cast<char*>(input_strs[i]) ) );
int32_t len = pos.filter( &words, 0, -1, true, buf, buf + 180 );
@ -144,7 +144,7 @@ TEST( PosTest, FilterTags ) {
std::sprintf(input, input_strs[i]);
ASSERT_TRUE( xml.set( input, strlen( input ), TITLEREC_CURRENT_VERSION, CT_HTML ) );
ASSERT_TRUE( words.set( &xml, true ) );
ASSERT_TRUE( words.set( &xml ) );
int32_t len = pos.filter( &words, 0, words.getNumWords(), false, buf, buf + MAX_BUF_SIZE );
@ -182,7 +182,7 @@ TEST( PosTest, FilterSamePunct ) {
Pos pos;
char buf[MAX_BUF_SIZE];
ASSERT_TRUE( words.set( const_cast<char*>(input_strs[i]), true ) );
ASSERT_TRUE( words.set( const_cast<char*>(input_strs[i]) ) );
int32_t len = pos.filter( &words, 0, -1, true, buf, buf + 180 );
@ -223,7 +223,7 @@ TEST( PosTest, DecodeHTMLEntities ) {
Pos pos;
char buf[MAX_BUF_SIZE];
ASSERT_TRUE( words.set( const_cast<char*>(input_strs[i]), true ) );
ASSERT_TRUE( words.set( const_cast<char*>(input_strs[i]) ) );
int32_t len = pos.filter( &words, 0, -1, true, buf, buf + 180 );
@ -239,7 +239,7 @@ TEST(PosTest, SegFaultDotPrevChar) {
const char *input_str = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .. . . . . . . . . . . . . . . . . . . .. . . . . . . . . . . ...";
ASSERT_TRUE( words.set( const_cast<char*>(input_str), true ) );
ASSERT_TRUE( words.set( const_cast<char*>(input_str) ) );
int32_t len = pos.filter( &words, 0, -1, true, buf, buf + 180 );

@ -23,7 +23,7 @@ static void generateSummary( Summary &summary, char *htmlInput, const char *quer
ASSERT_TRUE(xml.set(htmlInput, strlen(htmlInput), 0, CT_HTML));
Words words;
ASSERT_TRUE(words.set(&xml, true));
ASSERT_TRUE(words.set(&xml));
Bits bits;
ASSERT_TRUE(bits.set(&words));