Merge branch 'master' into testing

2014-08-23 07:30:17 -07:00
parent 5d3fd80063 425b2bb81b
commit 80f73bf297
4 changed files with 30 additions and 12 deletions
--- a/Parms.cpp
+++ b/Parms.cpp
@ -16140,7 +16140,9 @@ void Parms::init ( ) {
 	m->m_cgi   = "usr";
 	m->m_off   = (char *)&cr.m_useSimplifiedRedirects - x;
 	m->m_type  = TYPE_BOOL;
-	m->m_def   = "1";
+	// turn off for now. spider time deduping should help any issues
+	// by disabling this.
+	m->m_def   = "0";
 	m->m_page  = PAGE_SPIDER;
 	m->m_obj   = OBJ_COLL;
 	m->m_flags = PF_CLONE;
--- a/XmlDoc.cpp
+++ b/XmlDoc.cpp
@ -528,7 +528,7 @@ void XmlDoc::reset ( ) {
 		// should point into a safebuf as well
 		//mfree ( ptr_linkInfo2 , size_linkInfo2, "LinkInfo2");
 		ptr_linkInfo2    = NULL;
-		m_linkInfo1Valid = false;
+		m_linkInfo2Valid = false;
 	}
 	if ( m_rawUtf8ContentValid && m_rawUtf8Content && !m_setFromTitleRec
 	     // was content supplied by pageInject.cpp?
@ -21158,6 +21158,7 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
 		// since we might have set it in copyFromOldDoc() above
 		ptr_linkInfo1  = NULL;
 		size_linkInfo1 = 0;
+		m_linkInfo1Valid = false;

 		// . if not using spiderdb we are done at this point
 		// . this happens for diffbot json replies (m_dx)
@ -23434,7 +23435,8 @@ void XmlDoc::copyFromOldDoc ( XmlDoc *od ) {
 	// we need the link info too!
 	ptr_linkInfo1  = od->ptr_linkInfo1;
 	size_linkInfo1 = od->size_linkInfo1;
-	m_linkInfo1Valid = true;
+	if ( ptr_linkInfo1 && size_linkInfo1 ) m_linkInfo1Valid = true;
+	else m_linkInfo1Valid = false; 

 	// turn off for debug
 	ptr_sectiondbData = NULL;
--- a/html/admin.html
+++ b/html/admin.html
@ -96,7 +96,7 @@ A work-in-progress <a href=/compare.html>comparison to SOLR</a>.

 <br><br><a name=quickstart></a>
 <h1>Quick Start</h1>
-&lt;<i>Last Updated June 2014</i>&gt;
+&lt;<i>Last Updated August 2014</i>&gt;
 <br>
 <br>

@ -112,7 +112,7 @@ You will need an Intel or AMD system with at least 4GB of RAM for every gigablas
 <b><font color=red>For Linux:</font></b>
 <br><br>

-1. Download the <a href=http://www.gigablast.com/gb-1.0-2.i386.rpm>Gigablast package for <b>RedHat</b> Linux</a> or the <a href=http://www.gigablast.com/gb_1.0-1_i386.deb>Gigablast package for <b>Ubuntu</b>/Debian Linux</a>. 
+1. Download the <a href=http://www.gigablast.com/gb-1.7-2.i386.rpm>Gigablast package for <b>RedHat</b> Linux</a> or the <a href=http://www.gigablast.com/gb_1.7-1_i386.deb>Gigablast package for <b>Ubuntu</b>/Debian Linux</a>. 

 <br><br>

--- a/html/compare.html
+++ b/html/compare.html
@ -121,7 +121,7 @@ Apache Open Source License 2
 <td><b>HTTP API</b></td>
 <!-- gb install -->
 <td>
-<a href=/api>here</a>
+<a href=/api2.html>here</a>
 </td>
 <!-- solr install-->
 <td>
@ -156,6 +156,20 @@ Apache Open Source License 2
 </tr>


+<tr valign=top>
+<td><b>Github Star Ratings</b></td>
+<!-- gb install -->
+<td>
+<a href=https://github.com/gigablast/open-source-search-engine>326</a> (8/2/2014)
+</td>
+<!-- solr install-->
+<td>
+<a href=https://github.com/apache/lucene-solr>767</a> (8/2/2014)
+</td>
+</tr>
+
+
+



@ -232,7 +246,7 @@ Many different packages quilted together. Apache, MySQL, Lucene, Tika, Zookeeper
 <!--gigablast-->
 <td>
 <font color=green><b>
-Use curl using args (including <i>delim</i>) listed <a href=/api.html#/admin/inject>here</a>
+Use curl using args (including <i>delim</i>) listed <a href=/api2.html#/admin/inject>here</a>
 </b></font>
 <br>
 </td>
@ -252,7 +266,7 @@ unsupported
 <!--gigablast-->
 <td>
 Use curl to post the content of the file with args listed
-<a href=/api.html#/admin/inject>here</a>
+<a href=/api2.html#/admin/inject>here</a>
 </td>
 <!--solr-->
 <td>
@ -270,7 +284,7 @@ You can index individual local files as such:
 <!--gigablast-->
 <td>
 Use curl to inject the url with args listed 
-<a href=/api.html#/admin/inject>here</a>
+<a href=/api2.html#/admin/inject>here</a>

 </td>
 <!--solr-->
@ -287,7 +301,7 @@ Use curl to inject the url with args listed
 <!--gigablast-->
 <td>
 Use one curl command for each url, using the interface described
-<a href=/api.html#/admin/inject>here</a></b>
+<a href=/api2.html#/admin/inject>here</a></b>
 </td>
 <!--solr-->
 <td>
@ -305,7 +319,7 @@ Use one curl command for each url, using the interface described
 <!--gigablast-->
 <td>
 Use curl command to delete a url, using the interface described
-<a href=/api.html#/admin/inject>here</a></b>
+<a href=/api2.html#/admin/inject>here</a></b>
 </td>
 <!--solr-->
 <td>
@ -321,7 +335,7 @@ You can delete individual documents by specifying queries that match just those
 <td><b>Getting Results via cmdline</b></td>
 <td>
 Use curl command to do a search, using the interface described
-<a href=/api.html#/search>here</a></b>
+<a href=/api2.html#/search>here</a></b>
 </td>
 <td>
 ???