Merge branch 'master' of github.com:privacore/open-source-search-engine

2025-07-12 02:26:07 -04:00 · 2018-02-09 14:34:10 +01:00
parent 27e90e164f 1a03f99252
commit 5f50a48dd4
9 changed files with 21 additions and 39 deletions
--- a/3
+++ b/3
@ -104,8 +104,7 @@ pipeline {

 	post {
 		always {
-			sh "cd ${env.PYWEBTEST_DIR} && ./shutdown_instances.py --num-instances=1 --num-shards=1 --offset=0"
-			sh "cd ${env.PYWEBTEST_DIR} && ./shutdown_instances.py --num-instances=4 --num-shards=2 --offset=1"
+			sh "killall -u \$(whoami) -s SIGINT gb"
 		}

 		changed {
--- a/PageGet.cpp
+++ b/PageGet.cpp
@ -705,7 +705,7 @@ bool processLoop ( void *state ) {
 			return sendErrorReply ( st , g_errno );
 		}

-		if ( ! ww.set ( &xml, true ) ) {
+		if ( ! ww.set ( &xml ) ) {
 			return sendErrorReply ( st , g_errno );
 		}

--- a/SiteGetter.cpp
+++ b/SiteGetter.cpp
@ -80,7 +80,6 @@ SiteGetter::SiteGetter ( ) {
 	m_collnum = 0;
 	m_state = NULL;
 	m_callback = NULL;
-	m_sitePathDepth = 0;
 	m_pathDepth = 0;
 	m_maxPathDepth = 0;
 	m_niceness = 0;
@ -122,9 +121,6 @@ bool SiteGetter::getSite ( const char *url, TagRec *gr, int32_t timestamp, colln
 	
 	m_allDone  = false;

-	// set this to unknown for now
-	m_sitePathDepth    = -1;
-
 	// reset this just in case
 	g_errno = 0;

@ -153,12 +149,6 @@ bool SiteGetter::getSite ( const char *url, TagRec *gr, int32_t timestamp, colln
 		// do not add to tagdb and do not block!
 		m_state = NULL;

-		// . use a sitepathdepth of -1 by default then, until host #0
-		//   has a chance to evaluate
-		// . a sitepathdepth of -1 means to use the full hostname
-		//   as the site
-		m_sitePathDepth = -1;
-
 		// sanity check, should not block since m_state is NULL
 		if ( ! setSite () ) { g_process.shutdownAbort(true); }

@ -346,11 +336,6 @@ bool SiteGetter::gotSiteList ( ) {
 	const char *pend = getPathEnd(m_url, m_pathDepth);
 	const char *host = getHostFast( m_url, NULL );
 	log(LOG_INFO,"site: '%.*s' detected as a site with linkcount=~%d", (int)(pend-host), host, count);
-	// ok, i guess this indicates we have a subsite level
-	m_sitePathDepth = m_pathDepth;
-
-	// this basically means none!
-	if ( m_pathDepth >= m_maxPathDepth ) m_sitePathDepth = -1;

 	// . sets m_site and m_siteLen from m_url
 	// . this returns false if blocked, true otherwise
--- a/SiteGetter.h
+++ b/SiteGetter.h
@ -44,8 +44,6 @@ private:
 	void        (*m_callback) (void *state );
 	RdbList       m_list;

-	int32_t          m_sitePathDepth;
-
 	// use Msg0 for getting the no-split termlist that combines 
 	// gbpathdepth: with the site hash in a single termid
 	Msg0   m_msg0;
--- a/Xml.cpp
+++ b/Xml.cpp
@ -990,7 +990,7 @@ bool Xml::getTagContent( const char *fieldName, const char *fieldContent, char *
 					s[len] = saved;
 				}

-				if ( ( !wp.set(&xml, true) ) ) {
+				if ( ( !wp.set(&xml) ) ) {
 					// unable to allocate buffer
 					return false;
 				}
--- a/XmlDoc.cpp
+++ b/XmlDoc.cpp
@ -3860,7 +3860,7 @@ Words *XmlDoc::getWords ( ) {
 	int64_t start = logQueryTimingStart();

 	// now set what we need
-	if ( !m_words.set( xml, true ) ) {
+	if ( !m_words.set( xml ) ) {
 		return NULL;
 	}

@ -4623,7 +4623,7 @@ int32_t *XmlDoc::getSummaryVector ( ) {

 	// word-ify it
 	Words words;
-	if ( ! words.set ( sb.getBufStart() , true ) ) {
+	if ( ! words.set ( sb.getBufStart() ) ) {
 		return NULL;
 	}

@ -9472,7 +9472,7 @@ char **XmlDoc::getFilteredContent ( ) {
 			xml.set(m_content, m_contentLen, m_version, *ct);

 			Words words;
-			words.set(&xml, true);
+			words.set(&xml);
 			if (words.getNumAlnumWords() > g_conf.m_spiderFilterableMaxWordCount) {
 				logTrace( g_conf.m_logTraceXmlDoc, "END. HTML and getNumAlnumWords too high");
 				return &m_filteredContent;
--- a/main.cpp
+++ b/main.cpp
@ -4899,7 +4899,7 @@ static bool parseTest(const char *coll, int64_t docId, const char *query) {

 	t = gettimeofdayInMilliseconds();
 	for ( int32_t i = 0 ; i < 100 ; i++ ) 
-		if ( ! words.set ( &xml , true ) ) {
+		if ( ! words.set ( &xml ) ) {
 			log(LOG_WARN, "build: speedtestxml: words set: %s", mstrerror(g_errno));
 			return false;
 		}
@ -4912,7 +4912,7 @@ static bool parseTest(const char *coll, int64_t docId, const char *query) {

 	t = gettimeofdayInMilliseconds();
 	for ( int32_t i = 0 ; i < 100 ; i++ ) 
-		if ( ! words.set ( &xml , true ) ) {
+		if ( ! words.set ( &xml ) ) {
 			log(LOG_WARN, "build: speedtestxml: words set: %s", mstrerror(g_errno));
 			return false;
 		}
@ -4926,7 +4926,7 @@ static bool parseTest(const char *coll, int64_t docId, const char *query) {

 	t = gettimeofdayInMilliseconds();
 	for ( int32_t i = 0 ; i < 100 ; i++ ) 
-		if ( ! words.set ( content , true ) ) {
+		if ( ! words.set ( content ) ) {
 			log(LOG_WARN, "build: speedtestxml: words set: %s", mstrerror(g_errno));
 			return false;
 		}
@ -4940,7 +4940,7 @@ static bool parseTest(const char *coll, int64_t docId, const char *query) {

 	Pos pos;
 	// computeWordIds from xml
-	words.set ( &xml , true ) ;
+	words.set ( &xml ) ;
 	t = gettimeofdayInMilliseconds();
 	for ( int32_t i = 0 ; i < 100 ; i++ ) 
 		if ( ! pos.set ( &words ) ) {
@ -4957,7 +4957,7 @@ static bool parseTest(const char *coll, int64_t docId, const char *query) {

 	Bits bits;
 	// computeWordIds from xml
-	words.set ( &xml , true ) ;
+	words.set ( &xml ) ;
 	t = gettimeofdayInMilliseconds();
 	for ( int32_t i = 0 ; i < 100 ; i++ ) 
 		if ( ! bits.setForSummary ( &words ) ) {
@ -4974,7 +4974,7 @@ static bool parseTest(const char *coll, int64_t docId, const char *query) {

 	Sections sections;
 	// computeWordIds from xml
-	words.set ( &xml , true ) ;
+	words.set ( &xml ) ;
 	bits.set(&words);
 	t = gettimeofdayInMilliseconds();
 	for ( int32_t i = 0 ; i < 100 ; i++ ) 
@ -5029,7 +5029,7 @@ static bool parseTest(const char *coll, int64_t docId, const char *query) {
 			log(LOG_WARN, "build: speedtestxml: getText: %s", mstrerror(g_errno));
 			return false;
 		}
-		if ( ! words.set ( buf,true) ) {
+		if ( ! words.set ( buf ) ) {
 			log(LOG_WARN, "build: speedtestxml: words set: %s", mstrerror(g_errno));
 			return false;
 		}
@ -5047,7 +5047,7 @@ static bool parseTest(const char *coll, int64_t docId, const char *query) {
 	Query q;
 	q.set2(query, langUnknown, 1.0, 1.0, NULL, false, true, ABS_MAX_QUERY_TERMS);
 	matches.setQuery ( &q );
-	words.set ( &xml , true ) ;
+	words.set ( &xml ) ;
 	t = gettimeofdayInMilliseconds();
 	for ( int32_t i = 0 ; i < 100 ; i++ ) {
 		matches.reset();
--- a/test/unit/PosTest.cpp
+++ b/test/unit/PosTest.cpp
@ -29,7 +29,7 @@ TEST( PosTest, FilterAllCaps ) {
 		Pos pos;
 		char buf[MAX_BUF_SIZE];

-		ASSERT_TRUE( words.set( const_cast<char*>(input_strs[i]), true ) );
+		ASSERT_TRUE( words.set( const_cast<char*>(input_strs[i]) ) );

 		int32_t len = pos.filter( &words, 0, words.getNumWords(), false, buf, buf + MAX_BUF_SIZE );

@ -110,7 +110,7 @@ TEST( PosTest, FilterEnding ) {
 		Pos pos;
 		char buf[MAX_BUF_SIZE];

-		ASSERT_TRUE( words.set( const_cast<char*>(input_strs[i]), true ) );
+		ASSERT_TRUE( words.set( const_cast<char*>(input_strs[i]) ) );

 		int32_t len = pos.filter( &words, 0, -1, true, buf, buf + 180 );

@ -144,7 +144,7 @@ TEST( PosTest, FilterTags ) {
 		std::sprintf(input, input_strs[i]);

 		ASSERT_TRUE( xml.set( input, strlen( input ), TITLEREC_CURRENT_VERSION, CT_HTML ) );
-		ASSERT_TRUE( words.set( &xml, true ) );
+		ASSERT_TRUE( words.set( &xml ) );

 		int32_t len = pos.filter( &words, 0, words.getNumWords(), false, buf, buf + MAX_BUF_SIZE );

@ -182,7 +182,7 @@ TEST( PosTest, FilterSamePunct ) {
 		Pos pos;
 		char buf[MAX_BUF_SIZE];

-		ASSERT_TRUE( words.set( const_cast<char*>(input_strs[i]), true ) );
+		ASSERT_TRUE( words.set( const_cast<char*>(input_strs[i]) ) );

 		int32_t len = pos.filter( &words, 0, -1, true, buf, buf + 180 );

@ -223,7 +223,7 @@ TEST( PosTest, DecodeHTMLEntities ) {
 		Pos pos;
 		char buf[MAX_BUF_SIZE];

-		ASSERT_TRUE( words.set( const_cast<char*>(input_strs[i]), true ) );
+		ASSERT_TRUE( words.set( const_cast<char*>(input_strs[i]) ) );

 		int32_t len = pos.filter( &words, 0, -1, true, buf, buf + 180 );

@ -239,7 +239,7 @@ TEST(PosTest, SegFaultDotPrevChar) {

 	const char *input_str = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .. . . . . . . . . . . . . . . . . . . .. . . . . . . . . . . ...";

-	ASSERT_TRUE( words.set( const_cast<char*>(input_str), true ) );
+	ASSERT_TRUE( words.set( const_cast<char*>(input_str) ) );

 	int32_t len = pos.filter( &words, 0, -1, true, buf, buf + 180 );

--- a/test/unit/SummaryTest.cpp
+++ b/test/unit/SummaryTest.cpp
@ -23,7 +23,7 @@ static void generateSummary( Summary &summary, char *htmlInput, const char *quer
 	ASSERT_TRUE(xml.set(htmlInput, strlen(htmlInput), 0, CT_HTML));

 	Words words;
-	ASSERT_TRUE(words.set(&xml, true));
+	ASSERT_TRUE(words.set(&xml));

 	Bits bits;
 	ASSERT_TRUE(bits.set(&words));