do not set indexcode to EFAKEFIRSTIP
for INJECTED urls, just added urls. fix add url page to not always use 'main' collection. added reset/restart cmds to spider page.
This commit is contained in:
parent
178af5f781
commit
22aa13e34d
20
PageRoot.cpp
20
PageRoot.cpp
@ -391,8 +391,15 @@ bool printAddUrlHomePage ( SafeBuf &sb , char *url , HttpRequest *r ) {
|
||||
sb.safePrintf("<br><br>\n");
|
||||
sb.safePrintf("<form method=get action=/addurl name=f>\n");
|
||||
|
||||
|
||||
//CollectionRec *cr = g_collectiondb.getRec ( "main" );
|
||||
//sb.safePrintf("<input type=hidden name=c value=\"%s\">",cr->m_coll);
|
||||
|
||||
// the collection we want to add the url to
|
||||
char *coll = r->getString("c");
|
||||
if ( ! coll ) coll = "";
|
||||
if ( coll )
|
||||
sb.safePrintf("<input type=hidden name=c value=\"%s\">",coll);
|
||||
|
||||
sb.safePrintf("<input name=u type=text size=60 value=\"");
|
||||
if ( url ) {
|
||||
SafeBuf tmp;
|
||||
@ -453,11 +460,12 @@ bool printAddUrlHomePage ( SafeBuf &sb , char *url , HttpRequest *r ) {
|
||||
unsigned long long rand64 = gettimeofdayInMillisecondsLocal();
|
||||
// msg7 needs an explicit collection for /addurl for injecting
|
||||
// in PageInject.cpp. it does not use defaults for safety.
|
||||
sb.safePrintf("&id=%lu&c=main&rand=%llu';\n"
|
||||
sb.safePrintf("&id=%lu&c=%s&rand=%llu';\n"
|
||||
"client.open('GET', url );\n"
|
||||
"client.send();\n"
|
||||
"</script>\n"
|
||||
, h32
|
||||
, coll
|
||||
, rand64
|
||||
);
|
||||
sb.safePrintf("</div>\n");
|
||||
@ -1552,6 +1560,8 @@ void doneInjectingWrapper3 ( void *st ) {
|
||||
//CollectionRec *cr = g_collectiondb.getRec ( st1->m_coll );
|
||||
|
||||
// collection name
|
||||
char *coll = st1->m_coll;
|
||||
if ( ! coll ) coll = "";
|
||||
|
||||
//char tt [ 128 ];
|
||||
//tt[0] = '\0';
|
||||
@ -1658,8 +1668,10 @@ void doneInjectingWrapper3 ( void *st ) {
|
||||
unsigned long rand32 = rand();
|
||||
// in the mime to 0 seconds!
|
||||
sb.safePrintf("<b>Url successfully added. "
|
||||
"<a href=/search?rand=%lu&q=url%%3A",
|
||||
rand32);
|
||||
"<a href=/search?rand=%lu&"
|
||||
"c=%s&q=url%%3A",
|
||||
rand32,
|
||||
coll);
|
||||
sb.urlEncode(url);
|
||||
sb.safePrintf(">Check it</a> or "
|
||||
"<a href=http://www.gigablast.com/seo?u=");
|
||||
|
38
Parms.cpp
38
Parms.cpp
@ -9271,24 +9271,6 @@ void Parms::init ( ) {
|
||||
m->m_cast = 1;
|
||||
m++;
|
||||
|
||||
m->m_title = "reset collection";
|
||||
m->m_desc = "reset collection";
|
||||
m->m_cgi = "reset";
|
||||
m->m_type = TYPE_CMD;
|
||||
m->m_page = PAGE_NONE;
|
||||
m->m_func2 = CommandResetColl;
|
||||
m->m_cast = 1;
|
||||
m++;
|
||||
|
||||
m->m_title = "restart collection";
|
||||
m->m_desc = "restart collection";
|
||||
m->m_cgi = "restart";
|
||||
m->m_type = TYPE_CMD;
|
||||
m->m_page = PAGE_NONE;
|
||||
m->m_func2 = CommandRestartColl;
|
||||
m->m_cast = 1;
|
||||
m++;
|
||||
|
||||
m->m_title = "in sync";
|
||||
m->m_desc = "signify in sync with host 0";
|
||||
m->m_cgi = "insync";
|
||||
@ -9321,6 +9303,26 @@ void Parms::init ( ) {
|
||||
m->m_def = "1";
|
||||
m++;
|
||||
|
||||
m->m_title = "reset collection";
|
||||
m->m_desc = "Remove all documents from the collection and turn "
|
||||
"spiders off.";
|
||||
m->m_cgi = "reset";
|
||||
m->m_type = TYPE_CMD;
|
||||
m->m_page = PAGE_SPIDER;
|
||||
m->m_func2 = CommandResetColl;
|
||||
m->m_cast = 1;
|
||||
m++;
|
||||
|
||||
m->m_title = "restart collection";
|
||||
m->m_desc = "Remove all documents from the collection and start "
|
||||
"spidering over again.";
|
||||
m->m_cgi = "restart";
|
||||
m->m_type = TYPE_CMD;
|
||||
m->m_page = PAGE_SPIDER;
|
||||
m->m_func2 = CommandRestartColl;
|
||||
m->m_cast = 1;
|
||||
m++;
|
||||
|
||||
/*
|
||||
m->m_title = "new spidering enabled";
|
||||
m->m_desc = "When enabled the spider adds NEW "
|
||||
|
@ -2106,7 +2106,13 @@ bool XmlDoc::indexDoc2 ( ) {
|
||||
|
||||
// do this before we increment pageDownloadAttempts below so that
|
||||
// john's smoke tests, which use those counts, are not affected
|
||||
if ( m_oldsrValid && m_oldsr.m_fakeFirstIp &&
|
||||
if ( m_oldsrValid &&
|
||||
m_oldsr.m_fakeFirstIp &&
|
||||
// only do for add url, not for injects. injects expect
|
||||
// the doc to be indexed while the browser waits. add url
|
||||
// is really just adding the spider request and returning
|
||||
// to the browser without delay.
|
||||
! m_oldsr.m_isInjecting &&
|
||||
// diffbot requests are ok though!
|
||||
! strstr(m_oldsr.m_url,"-diffbotxyz") ) {
|
||||
m_indexCodeValid = true;
|
||||
|
Loading…
x
Reference in New Issue
Block a user