#include "gb-include.h" #include "PageRoot.h" #include "HttpServer.h" #include "Titledb.h" #include "Spider.h" #include "Tagdb.h" #include "Collectiondb.h" #include "Clusterdb.h" // for getting # of docs indexed #include "Pages.h" #include "Query.h" // MAX_QUERY_LEN #include "SafeBuf.h" #include "Proxy.h" #include "HashTable.h" #include "ip.h" #include "GbUtil.h" #include "Conf.h" #include "Mem.h" #ifdef _VALGRIND_ #include <valgrind/memcheck.h> #endif bool sendPageRoot ( TcpSocket *s, HttpRequest *r ){ return sendPageRoot ( s, r, NULL ); } static bool printNav ( SafeBuf &sb , HttpRequest *r ) { sb.safePrintf("</TD></TR></TABLE>" "</body></html>"); return true; } ////////////// // // BEGIN expandHtml() helper functions // ////////////// static bool printFamilyFilter ( SafeBuf& sb , bool familyFilterOn ) { const char *s1 = ""; const char *s2 = ""; if ( familyFilterOn ) s1 = " checked"; else s2 = " checked"; //p += sprintf ( p , return sb.safePrintf ( "Family filter: " "<input type=radio name=ff value=1%s>On " "<input type=radio name=ff value=0%s>Off " , s1 , s2 ); //return p; } #include "SearchInput.h" static bool printRadioButtons( SafeBuf& sb, const SearchInput *si) { // don't display this for directory search // look it up. returns catId <= 0 if dmoz not setup yet. // From PageDirectory.cpp //int32_t catId= g_categories->getIdFromPath(decodedPath, decodedPathLen); // if /Top print the directory homepage //if ( catId == 1 || catId <= 0 ) // return true; // site /* if ( si->m_siteLen > 0 ) { // . print rest of search box etc. // . print cobranding radio buttons //if ( p + si->m_siteLen + 1 >= pend ) return p; //p += sprintf ( p , return sb.safePrintf ( //" " //"<font size=-1>" //"<b><a href=\"/\"><font color=red>" //"Powered by Gigablast</font></a></b>" //"<br>" //"<tr align=center><td></td><td>" "<input type=radio name=site value=\"\">" "Search the Web " "<input type=radio name=site " "value=\"%s\" checked>Search %s" , //"</td></tr></table><br>" //"</td></tr>" //"<font size=-1>" , si->m_site , si->m_site ); } else if ( si->m_sitesLen > 0 ) { */ if ( si->m_sites && si->m_sites[0] ) { // . print rest of search box etc. // . print cobranding radio buttons //if ( p + si->m_sitesLen + 1 >= pend ) return p; // if not explicitly instructed to print all sites // and they are a int32_t list, do not print all /* char tmp[1000]; char *x = si->m_sites; if ( si->m_sitesLen > 255){//&&!st->m_printAllSites){ // copy what's there strncpy ( tmp , si->m_sites , 255 ); x = tmp + 254 ; // do not hack off in the middle of a site while ( is_alnum(*x) && x > tmp ) x--; // overwrite it with [more] link //x += sprintf ( x , "<a href=\"/search?" ); // our current query parameters //if ( x + uclen + 10 >= xend ) goto skipit; sprintf ( x , " ..." ); x = tmp; } */ //p += sprintf ( p , sb.safePrintf ( //" " //"<font size=-1>" //"<b><a href=\"/\"><font color=red>" //"Powered by Gigablast</font></a></b>" //"<br>" //"<tr align=center><td></td><td>" "<input type=radio name=sites value=\"\">" "Search the Web " "<input type=radio name=sites " "value=\"%s\" checked>Search ", //"</td></tr></table><br>" //"</td></tr>" //"<font size=-1>" , si->m_sites ); sb.safeTruncateEllipsis ( si->m_sites, 255 ); } return true; } static bool printLogo(SafeBuf& sb, const SearchInput *si) { return sb.safePrintf("<a href=\"/\"><img valign=top width=250 height=61 border=0 src=\"/rocket.jpg\"></a>"); } ///////////// // // END expandHtml() helper functions // ///////////// bool expandHtml ( SafeBuf& sb, const char *head , int32_t hlen , const char *q , int32_t qlen , const HttpRequest *r , const SearchInput *si, const char *method , CollectionRec *cr ) { //char *pend = p + plen; // store custom header into buf now //for ( int32_t i = 0 ; i < hlen && p+10 < pend ; i++ ) { for ( int32_t i = 0 ; i < hlen; i++ ) { if ( head[i] != '%' ) { // *p++ = head[i]; sb.safeMemcpy((char*)&head[i], 1); continue; } if ( i + 1 >= hlen ) { // *p++ = head[i]; sb.safeMemcpy((char*)&head[i], 1); continue; } if ( head[i+1] == 'S' ) { // now we got the %S, insert "spiders are [on/off]" bool spidersOn = true; if ( !g_conf.m_spideringEnabled ) { spidersOn = false; } if ( cr && !cr->m_spideringEnabled ) { spidersOn = false; } if ( spidersOn ) { sb.safePrintf("Spiders are on"); } else { sb.safePrintf("Spiders are off"); } // skip over %S i += 1; continue; } if ( head[i+1] == 'q' ) { // now we got the %q, insert the query char *p = (char*) sb.getBufPtr(); char *pend = (char*) sb.getBufEnd(); int32_t eqlen = dequote ( p , pend , q , qlen ); //p += eqlen; sb.incrementLength(eqlen); // skip over %q i += 1; continue; } if ( head[i+1] == 'c' ) { // now we got the %q, insert the query if ( cr ) sb.safeStrcpy(cr->m_coll); // skip over %c i += 1; continue; } if ( head[i+1] == 'w' && head[i+2] == 'h' && head[i+3] == 'e' && head[i+4] == 'r' && head[i+5] == 'e' ) { // insert the location int32_t whereLen; const char *where = r->getString("where",&whereLen); // get it from cookie as well! if ( ! where ) where = r->getStringFromCookie("where", &whereLen); // fix for getStringFromCookie if ( where && ! where[0] ) where = NULL; // skip over the %where i += 5; if (where) { sb.dequote (where,whereLen); } continue; } if ( head[i+1] == 'w' && head[i+2] == 'h' && head[i+3] == 'e' && head[i+4] == 'n' ) { // insert the location int32_t whenLen; const char *when = r->getString("when",&whenLen); // skip over the %when i += 4; if ( ! when ) continue; sb.dequote (when,whenLen); continue; } // %sortby if ( head[i+1] == 's' && head[i+2] == 'o' && head[i+3] == 'r' && head[i+4] == 't' && head[i+5] == 'b' && head[i+6] == 'y' ) { // insert the location int32_t sortBy = r->getLong("sortby",1); // print the radio buttons const char *cs[5]; cs[0]=""; cs[1]=""; cs[2]=""; cs[3]=""; cs[4]=""; if ( sortBy >=1 && sortBy <=4 ) cs[sortBy] = " checked"; sb.safePrintf( "<input type=radio name=sortby value=1%s>date " "<input type=radio name=sortby value=2%s>distance " "<input type=radio name=sortby value=3%s>relevancy " "<input type=radio name=sortby value=4%s>popularity", cs[1],cs[2],cs[3],cs[4]); // skip over the %sortby i += 6; continue; } if ( head[i+1] == 'e' ) { // now we got the %e, insert the query char *p = (char*) sb.getBufPtr(); int32_t plen = sb.getAvail(); int32_t eqlen = urlEncode ( p , plen , q , qlen ); //p += eqlen; sb.incrementLength(eqlen); // skip over %e i += 1; continue; } if ( head[i+1] == 'N' ) { //now each host tells us how many docs it has in itsping int64_t c = g_hostdb.getNumGlobalRecs(); c += g_conf.m_docCountAdjustment; // never allow to go negative if ( c < 0 ) c = 0; //p+=ulltoa(p,c); char *p = (char*) sb.getBufPtr(); sb.reserve2x(16); int32_t len = ulltoa(p, c); sb.incrementLength(len); // skip over %N i += 1; continue; } if ( head[i+1] == 'n' ) { // now we got the %n, insert the collection doc count //p+=ulltoa(p,docsInColl); char *p = (char*) sb.getBufPtr(); sb.reserve2x(16); int64_t docsInColl = 0; if ( cr ) docsInColl = cr->getNumDocsIndexed(); int32_t len = ulltoa(p, docsInColl); sb.incrementLength(len); // skip over %n i += 1; continue; } // print the drop down menu for selecting the # of reslts if ( head[i+1] == 'D' ) { // skip over %D i += 1; continue; } if ( head[i+1] == 'H' ) { // . insert the secret key here, to stop seo bots // . TODO: randomize its position to make parsing more // difficult // . this secret key is for submitting a new query // int32_t key; // char kname[4]; // g_httpServer.getKey (&key,kname,NULL,0,time(NULL),0, // 10); //sprintf (p , "<input type=hidden name=%s value=%" PRId32">", // kname,key); //p += strlen ( p ); // sb.safePrintf( "<input type=hidden name=%s " //"value=%" PRId32">", // kname,key); //adds param for default screen size //if(cr) // sb.safePrintf("<input type=hidden " //"id='screenWidth' name='ws' value=%" PRId32">", //cr->m_screenWidth); // insert collection name too int32_t collLen; const char *coll = r->getString ( "c" , &collLen ); if ( collLen > 0 && collLen < MAX_COLL_LEN ) { //sprintf (p,"<input type=hidden name=c " // "value=\""); //p += strlen ( p ); sb.safePrintf("<input type=hidden name=c " "value=\""); //gbmemcpy ( p , coll , collLen ); //p += collLen; sb.safeMemcpy(coll, collLen); //sprintf ( p , "\">\n"); //p += strlen ( p ); sb.safePrintf("\">\n"); } // skip over %H i += 1; continue; } // MDW if ( head[i+1] == 'F' ) { i += 1; if ( ! method ) method = "GET"; sb.safePrintf("<form method=%s action=\"/search\" " "name=\"f\">\n",method); continue; } if ( head[i+1] == 'L' ) { i += 1; printLogo ( sb , si ); continue; } if ( head[i+1] == 'f' ) { i += 1; printFamilyFilter ( sb , si->m_familyFilter ); continue; } if ( head[i+1] == 'R' ) { i += 1; printRadioButtons ( sb , si ); continue; } sb.safeMemcpy((char*)&head[i], 1); } //return p; return true; } bool printLeftColumnRocketAndTabs ( SafeBuf *sb , bool isSearchResultsPage , CollectionRec *cr , const char *tabName ) { struct MenuItem { const char *m_text; const char *m_url; }; static const MenuItem mi[] = { {"SEARCH","/"}, {"ADVANCED","/adv.html"}, {"RANK TEST","/ranktest.html"}, {"ADD URL","/addurl"}, {"WIDGETS","/widgets.html"}, {"SYNTAX","/syntax.html"}, {"FAQ","/faq.html"}, {"API","/api.html"} }; static const int32_t n = sizeof(mi) / sizeof(MenuItem); const char *coll = ""; if ( cr ) coll = cr->m_coll; // // first the nav column // sb->safePrintf( "<TD bgcolor=#%s " // f3c714 " // yellow/gold "valign=top " "style=\"width:210px;" "border-right:3px solid blue;" "\">" "<br>" "<center>" "<a href=\"/?c=%s\">" "<div style=\"" "background-color:white;" "padding:10px;" "border-radius:100px;" "border-color:blue;" "border-width:3px;" "border-style:solid;" "width:100px;" "height:100px;" "\">" , GOLD , coll ); sb->safePrintf("<br style=line-height:10px;>" "<img border=0 " "width=54 height=79 src=/rocket.jpg>" ); sb->safePrintf ( "</div>" "</a>" "</center>" "<br>" "<br>" ); for ( int32_t i = 0 ; i < n ; i++ ) { // just show search, directort and advanced tab in serps if ( isSearchResultsPage && i >= 3 ) break; char delim = '?'; if ( strstr ( mi[i].m_url,"?") ) delim = '&'; sb->safePrintf( "<a href=%s%cc=%s>" "<div style=\"" "padding:5px;" "position:relative;" "text-align:right;" "border-width:3px;" "border-right-width:0px;" "border-style:solid;" "margin-left:10px;" "border-top-left-radius:10px;" "border-bottom-left-radius:10px;" "font-size:14px;" "x-overflow:;" , mi[i].m_url , delim , coll ); //if ( i == pageNum ) bool matched = false; if ( strcasecmp(mi[i].m_text,tabName) == 0 ) matched = true; if ( matched ) sb->safePrintf( "border-color:blue;" "color:black;" "background-color:white;\" "); else sb->safePrintf("border-color:white;" "color:white;" "background-color:blue;\" " " onmouseover=\"" "this.style.backgroundColor='lightblue';" "this.style.color='black';\"" " onmouseout=\"" "this.style.backgroundColor='blue';" "this.style.color='white';\"" ); sb->safePrintf(">" // make button wider "<nobr>" " " "<b>%s</b> </nobr>" , mi[i].m_text ); // // begin hack: white out the blue border line!! // if ( matched ) sb->safePrintf( "<div style=padding:5px;top:0;" "background-color:white;" "display:inline-block;" "position:absolute;>" " " "</div>" ); // end hack sb->safePrintf( "</div>" "</a>" "<br>" ); } // admin link if ( isSearchResultsPage ) return true; sb->safePrintf( "<a href=\"/admin/settings?c=%s\">" "<div style=\"background-color:green;" // for try it out bubble: //"position:relative;" "padding:5px;" "text-align:right;" "border-width:3px;" "border-right-width:0px;" "border-style:solid;" "margin-left:10px;" "border-color:white;" "border-top-left-radius:10px;" "border-bottom-left-radius:10px;" "font-size:14px;" "color:white;" "cursor:hand;" "cursor:pointer;\" " " onmouseover=\"" "this.style.backgroundColor='lightgreen';" "this.style.color='black';\"" " onmouseout=\"" "this.style.backgroundColor='green';" "this.style.color='white';\"" ">" /* // try it out bubble div "<div " " onmouseover=\"" "this.style.box-shadow='10px 10px 5px #888888';" "\"" " onmouseout=\"" "this.style.box-shadow='';" "\"" "style=\"" "vertical-align:middle;" "text-align:left;" "cursor:pointer;" "cursor:hand;" //"border-color:black;" //"border-style:solid;" //"border-width:2px;" "padding:3px;" //"width:30px;" //"height:20px;" //"margin-top:-20px;" "margin-left:-120px;" "position:absolute;" //"top:-20px;" //"left:10px;" "display:inline-block;" "\"" ">" "<b style=font-size:11px;>" "Click for demo" "</b>" "</div>" */ // end try it out bubble div "<b>ADMIN</b> " "</div>" "</a>" "<br>" "</TD>" , coll ); return true; } bool printFrontPageShell ( SafeBuf *sb , const char *tabName , CollectionRec *cr , bool printGigablast ) { sb->safePrintf("<html>\n"); sb->safePrintf("<head>\n"); sb->safePrintf("<meta name=\"description\" content=\"A powerful, new search engine that does real-time indexing!\">\n"); sb->safePrintf("<meta name=\"keywords\" content=\"search, search engine, search engines, search the web, fresh index, green search engine, green search, clean search engine, clean search\">\n"); const char *title = "An Alternative Open Source Search Engine"; if ( strcasecmp(tabName,"search") != 0 ) { title = tabName; } sb->safePrintf("<title>Gigablast - %s</title>\n",title); sb->safePrintf("<style><!--\n"); sb->safePrintf("body {\n"); sb->safePrintf("font-family:Arial, Helvetica, sans-serif;\n"); sb->safePrintf("color: #000000;\n"); sb->safePrintf("font-size: 12px;\n"); sb->safePrintf("margin: 0px 0px;\n"); sb->safePrintf("letter-spacing: 0.04em;\n"); sb->safePrintf("}\n"); sb->safePrintf("a {text-decoration:none;}\n"); sb->safePrintf(".bold {font-weight: bold;}\n"); sb->safePrintf(".bluetable {background:#d1e1ff;margin-bottom:15px;font-size:12px;}\n"); sb->safePrintf(".url {color:#008000;}\n"); sb->safePrintf(".cached, .cached a {font-size: 10px;color: #666666;\n"); sb->safePrintf("}\n"); sb->safePrintf("table {\n"); sb->safePrintf("font-family:Arial, Helvetica, sans-serif;\n"); sb->safePrintf("color: #000000;\n"); sb->safePrintf("font-size: 12px;\n"); sb->safePrintf("}\n"); sb->safePrintf(".directory {font-size: 16px;}\n" ".nav {font-size:20px;align:right;}\n" ); sb->safePrintf("-->\n"); sb->safePrintf("</style>\n"); sb->safePrintf("\n"); sb->safePrintf("</head>\n"); sb->safePrintf("<script>\n"); sb->safePrintf("<!--\n"); sb->safePrintf("function x(){document.f.q.focus();}\n"); sb->safePrintf("// --></script>\n"); sb->safePrintf("<body onload=\"x()\">\n"); // // DIVIDE INTO TWO PANES, LEFT COLUMN and MAIN COLUMN // sb->safePrintf("<TABLE border=0 height=100%% cellspacing=0 " "cellpadding=0>" "\n<TR>\n"); // . also prints <TD>...</TD> // . false = isSearchResultsPage? printLeftColumnRocketAndTabs ( sb , false , cr , tabName ); // // now the MAIN column // sb->safePrintf("\n<TD valign=top style=padding-left:30px;>\n"); sb->safePrintf("<br><br>"); if ( printGigablast ) { sb->safePrintf("<a href=\"/\"><img border=0 width=470 " "height=44 src=/gigablast.jpg></a>\n"); } return true; } static bool printWebHomePage ( SafeBuf &sb , HttpRequest *r , TcpSocket *sock ) { SearchInput si; si.set ( sock , r ); // if there's a ton of sites use the post method otherwise // they won't fit into the http request, the browser will reject // sending such a large request with "GET" const char *method = "GET"; if ( si.m_sites && strlen(si.m_sites)>800 ) { method = "POST"; } // if the provided their own CollectionRec *cr = g_collectiondb.getRec ( r ); if ( cr && cr->m_htmlRoot.length() ) { return expandHtml ( sb , cr->m_htmlRoot.getBufStart(), cr->m_htmlRoot.length(), NULL, 0, r , &si, //TcpSocket *s , method , // "GET" or "POST" cr );//CollectionRec *cr ) { } // . search special types // . defaults to web which is "search" // . can be like "images" "products" "articles" const char *searchType = r->getString("searchtype",NULL,"search",NULL); log("searchtype=%s",searchType); // pass searchType in as tabName printFrontPageShell ( &sb , searchType , cr , true ); sb.safePrintf("<br><br>\n"); sb.safePrintf("<br><br><br>\n"); // submit to https now sb.safePrintf("<form method=%s action=/search name=f>\n", method); if ( cr ) sb.safePrintf("<input type=hidden name=c value=\"%s\">", cr->m_coll); // put search box in a box sb.safePrintf("<div style=" "background-color:#%s;"//fcc714;" "border-style:solid;" "border-width:3px;" "border-color:blue;" //"background-color:blue;" "padding:20px;" "border-radius:20px;" ">" ,GOLD ); sb.safePrintf("<input name=q type=text " "style=\"" //"width:%" PRId32"px;" "height:26px;" "padding:0px;" "font-weight:bold;" "padding-left:5px;" //"border-radius:10px;" "margin:0px;" "border:1px inset lightgray;" "background-color:#ffffff;" "font-size:18px;" "\" " "size=40 value=\"\"> " //"<input type=\"submit\" value=\"Search\">" "<div onclick=document.f.submit(); " " onmouseover=\"" "this.style.backgroundColor='lightgreen';" "this.style.color='black';\"" " onmouseout=\"" "this.style.backgroundColor='green';" "this.style.color='white';\" " "style=border-radius:28px;" "cursor:pointer;" "cursor:hand;" "border-color:white;" "border-style:solid;" "border-width:3px;" "padding:12px;" "width:20px;" "height:20px;" "display:inline-block;" "background-color:green;color:white;>" "<b style=margin-left:-5px;font-size:18px;" ">GO</b>" "</div>" "\n" ); sb.safePrintf("</div>\n"); sb.safePrintf("\n"); sb.safePrintf("</form>\n"); sb.safePrintf("<br>\n"); sb.safePrintf("\n"); if ( cr ) { // && strcmp(cr->m_coll,"main") ) { sb.safePrintf("<center>" "Searching the <b>%s</b> collection." "</center>", cr->m_coll); sb.safePrintf("<br>\n"); sb.safePrintf("\n"); } // print any red boxes we might need to if ( printRedBox2 ( &sb , sock , r ) ) sb.safePrintf("<br>\n"); sb.safePrintf("\n"); sb.safePrintf("\n"); sb.safePrintf("<br><br>\n"); printNav ( sb , r ); return true; } static bool printAddUrlHomePage ( SafeBuf &sb , const char *url , HttpRequest *r ) { CollectionRec *cr = g_collectiondb.getRec ( r ); printFrontPageShell ( &sb , "add url" , cr , true ); sb.safePrintf("<br><br><br><br>\n"); sb.safePrintf("<b>WARNING</b>: Adding URLs this way DOES NOT handle redirects."); sb.safePrintf("<br><br>"); sb.safePrintf("If you add somesite.com and it redirects to www.somesite.com, it will be indexed as somesite.com, NOT www.somesite.com!<br>"); sb.safePrintf("Use Admin -> Add Urls instead if you want redirects handled correctly."); sb.safePrintf("<script type=\"text/javascript\">\n" "function handler() {\n" "if(this.readyState == 4 ) {\n" "document.getElementById('msgbox').innerHTML=" "this.responseText;\n" //"alert(this.status+this.statusText+" //"this.responseXML+this.responseText);\n" "}}\n" "</script>\n"); sb.safePrintf("<br><br>\n"); sb.safePrintf("<br><br><br>\n"); // submit to https now sb.safePrintf("<form method=GET " "action=/addurl name=f>\n" ); const char *coll = ""; if ( cr ) { coll = cr->m_coll; sb.safePrintf("<input type=hidden name=c value=\"%s\">", cr->m_coll); } // put search box in a box sb.safePrintf("<div style=" "background-color:#%s;" // fcc714;" "border-style:solid;" "border-width:3px;" "border-color:blue;" //"background-color:blue;" "padding:20px;" "border-radius:20px;" ">" , GOLD ); sb.safePrintf("<input name=urls type=text " "style=\"" //"width:%" PRId32"px;" "height:26px;" "padding:0px;" "font-weight:bold;" "padding-left:5px;" //"border-radius:10px;" "margin:0px;" "border:1px inset lightgray;" "background-color:#ffffff;" "font-size:18px;" "\" " "size=40 value=\"" ); if ( url ) { SafeBuf tmp; tmp.safePrintf("%s",url); // don't let double quotes in the url close our val attribute tmp.replace("\"","%22"); sb.safeMemcpy(&tmp); } else sb.safePrintf("http://"); sb.safePrintf("\"> " //"<input type=\"submit\" value=\"Add Url\">\n" "<div onclick=document.f.submit(); " " onmouseover=\"" "this.style.backgroundColor='lightgreen';" "this.style.color='black';\"" " onmouseout=\"" "this.style.backgroundColor='green';" "this.style.color='white';\" " "style=border-radius:28px;" "cursor:pointer;" "cursor:hand;" "border-color:white;" "border-style:solid;" "border-width:3px;" "padding:12px;" "width:20px;" "height:20px;" "display:inline-block;" "background-color:green;color:white;>" "<b style=margin-left:-5px;font-size:18px;>GO</b>" "</div>" "\n" ); sb.safePrintf("\n"); sb.safePrintf("</div>\n"); sb.safePrintf("\n"); sb.safePrintf("<br>\n"); sb.safePrintf("\n"); // if addurl is turned off, just print "disabled" msg const char *msg = NULL; if ( ! g_conf.m_addUrlEnabled ) msg = "Add url is temporarily disabled"; // can also be turned off in the collection rec //if ( ! cr->m_addUrlEnabled ) // msg = "Add url is temporarily disabled"; // or if in read-only mode if ( g_conf.m_readOnlyMode ) msg = "Add url is temporarily disabled"; sb.safePrintf("<br><center>" "Add a url to the <b>%s</b> collection</center>",coll); // if url is non-empty the ajax will receive this identical msg // and display it in the div, so do not duplicate the msg! if ( msg && ! url ) sb.safePrintf("<br><br>%s",msg); // . the ajax msgbox div // . when loaded with the main page for the first time it will // immediately replace its content... if ( url ) { const char *root = ""; sb.safePrintf("<br>" "<br>" "<div id=msgbox>" //"<b>Injecting your url. Please wait...</b>" "<center>" "<img src=%s/gears.gif width=50 height=50>" "</center>" "<script type=text/javascript>" //"alert('shit');" "var client = new XMLHttpRequest();\n" "client.onreadystatechange = handler;\n" "var url='/addurl?urls=" , root ); urlEncode(&sb,url); // propagate "admin" if set //int32_t admin = hr->getLong("admin",-1); //if ( admin != -1 ) sb.safePrintf("&admin=%" PRId32,admin); // provide hash of the query so clients can't just pass in // a bogus id to get search results from us uint32_t h32 = hash32n(url); if ( h32 == 0 ) h32 = 1; uint64_t rand64 = gettimeofdayInMilliseconds(); // msg7 needs an explicit collection for /addurl for injecting // in PageInject.cpp. it does not use defaults for safety. sb.safePrintf("&id=%" PRIu32"&c=%s&rand=%" PRIu64"';\n" "client.open('GET', url );\n" "client.send();\n" "</script>\n" , h32 , coll , rand64 ); sb.safePrintf("</div>\n"); } sb.safePrintf("</form>\n"); sb.safePrintf("<br>\n"); sb.safePrintf("\n"); sb.safePrintf("<br><br>\n"); printNav ( sb , r ); return true; } // . returns false if blocked, true otherwise // . sets errno on error // . make a web page displaying the config of this host // . call g_httpServer.sendDynamicPage() to send it bool sendPageRoot ( TcpSocket *s , HttpRequest *r, char *cookie ) { // don't allow pages bigger than 128k in cache // a ptr into "buf" //char *p = buf; //char *pend = buf + 10*1024 + MAX_QUERY_LEN - 100 ; StackBuf<10*1024> sb;//+ MAX_QUERY_LEN); // print bgcolors, set focus, set font style //p = g_httpServer.printFocus ( p , pend ); //p = g_httpServer.printColors ( p , pend ); //int32_t qlen; //char *q = r->getString ( "q" , &qlen , NULL ); // insert collection name too CollectionRec *cr = g_collectiondb.getRec(r); if ( ! cr ) { g_errno = ENOCOLLREC; return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno)); } printWebHomePage(sb,r,s); // . print last 5 queries // . put 'em in a table // . disable for now, impossible to monitor/control //p += printLastQueries ( p , pend ); // are we the admin? //bool isAdmin = g_collectiondb.isAdmin ( r , s ); // calculate bufLen //int32_t bufLen = p - buf; // . now encapsulate it in html head/tail and send it off // . the 0 means browser caches for however int32_t it's set for // . but we don't use 0 anymore, use -2 so it never gets cached so // our display of the # of pages in the index is fresh // . no, but that will piss people off, its faster to keep it cached //return g_httpServer.sendDynamicPage ( s , buf , bufLen , -1 ); return g_httpServer.sendDynamicPage ( s, (char*) sb.getBufStart(), sb.length(), // 120 seconds cachetime // don't cache anymore since // we have the login bar at // the top of the page 0,//120, // cachetime false,// post? "text/html", 200, NULL, // cookie "UTF-8", r); } ///////////////// // // ADD URL PAGE // ///////////////// #include "PageInject.h" #include "Spider.h" static bool canSubmit (uint32_t h, int32_t now, int32_t maxUrlsPerIpDom); void resetPageAddUrl ( ) ; class State1i { public: Msg7 m_msg7; TcpSocket *m_socket; bool m_isMasterAdmin; char m_coll[MAX_COLL_LEN+1]; bool m_goodAnswer; // int32_t m_ufuLen; // char m_ufu[MAX_URL_LEN]; int32_t m_urlLen; char m_url[MAX_URL_LEN]; //char m_username[MAX_USER_SIZE]; bool m_strip; bool m_spiderLinks; bool m_forceRespider; // buf filled by the links coming from google, msn, yahoo, etc //State2 m_state2[5]; // gb, goog, yahoo, msn, ask //int32_t m_raw; //SpiderRequest m_sreq; }; static void doneInjectingWrapper3 ( void *st ) ; // . returns false if blocked, true otherwise // . sets g_errno on error bool sendPageAddUrl ( TcpSocket *sock , HttpRequest *hr ) { // . get fields from cgi field of the requested url // . get the search query int32_t urlLen = 0; const char *url = hr->getString ( "urls" , &urlLen , NULL /*default*/); // see if they provided a url of a file of urls if they did not // provide a url to add directly bool isAdmin = g_conf.isCollAdmin ( sock , hr ); // int32_t ufuLen = 0; // char *ufu = NULL; //if ( isAdmin ) // // get the url of a file of urls (ufu) // ufu = hr->getString ( "ufu" , &ufuLen , NULL ); // can't be too long, that's obnoxious if ( urlLen > MAX_URL_LEN ) { // || ufuLen > MAX_URL_LEN ) { g_errno = EBUFTOOSMALL; g_msg = " (error: url too long)"; return g_httpServer.sendErrorReply(sock,500,"url too long"); } // get collection rec CollectionRec *cr = g_collectiondb.getRec ( hr ); // bitch if no collection rec found if ( ! cr ) { g_errno = ENOCOLLREC; g_msg = " (error: no collection)"; return g_httpServer.sendErrorReply(sock,500,"no coll rec"); } // // if no url, print the main homepage page // if ( ! url ) { SafeBuf sb; printAddUrlHomePage ( sb , NULL , hr ); return g_httpServer.sendDynamicPage(sock, sb.getBufStart(), sb.length(), // 120 secs cachetime // don't cache any more // since we have the // login bar at top of page 0,//120 ,// cachetime false,// post? "text/html", 200, NULL, // cookie "UTF-8", hr); } // // run the ajax script on load to submit the url now // int32_t id = hr->getLong("id",0); // if we are not being called by the ajax loader, the put the // ajax loader script into the html now if ( id == 0 ) { SafeBuf sb; printAddUrlHomePage ( sb , url , hr ); return g_httpServer.sendDynamicPage ( sock, sb.getBufStart(), sb.length(), // don't cache any more // since we have the // login bar at top of //page 0,//3600,// cachetime false,// post? "text/html", 200, NULL, // cookie "UTF-8", hr); } // // ok, inject the provided url!! // // // check for errors first // // if addurl is turned off, just print "disabled" msg const char *msg = NULL; if ( ! g_conf.m_addUrlEnabled ) msg = "Add url is temporarily disabled"; // can also be turned off in the collection rec //if ( ! cr->m_addUrlEnabled ) // msg = "Add url is temporarily disabled"; // or if in read-only mode if ( g_conf.m_readOnlyMode ) msg = "Add url is temporarily disabled"; // . send msg back to the ajax request // . use cachetime of 3600 so it does not re-inject if you hit the // back button! if ( msg ) { SafeBuf sb; sb.safePrintf("%s",msg); g_httpServer.sendDynamicPage (sock, sb.getBufStart(), sb.length(), 3600,//-1, // cachetime false,// post? "text/html", 200, // http status NULL, // cookie "UTF-8"); return true; } // make a new state State1i *st1 ; try { st1 = new (State1i); } catch(std::bad_alloc&) { g_errno = ENOMEM; log("PageAddUrl: new(%i): %s", (int)sizeof(State1i),mstrerror(g_errno)); return g_httpServer.sendErrorReply(sock,500,mstrerror(g_errno)); } mnew ( st1 , sizeof(State1i) , "PageAddUrl" ); // save socket and isAdmin st1->m_socket = sock; st1->m_isMasterAdmin = isAdmin; // save the "ufu" (url of file of urls) // st1->m_ufu[0] = '\0'; // st1->m_ufuLen = ufuLen; // gbmemcpy ( st1->m_ufu , ufu , ufuLen ); // st1->m_ufu[ufuLen] = '\0'; st1->m_spiderLinks = true; st1->m_strip = true; // save the collection name in the State1i class //if ( collLen > MAX_COLL_LEN ) collLen = MAX_COLL_LEN; //strncpy ( st1->m_coll , coll , collLen ); //st1->m_coll [ collLen ] = '\0'; strcpy ( st1->m_coll , cr->m_coll ); // assume they answered turing test correctly st1->m_goodAnswer = true; st1->m_strip = hr->getLong("strip",0); // . Remember, for cgi, if the box is not checked, then it is not // reported in the request, so set default return value to 0 // . support both camel case and all lower-cases st1->m_spiderLinks = hr->getLong("spiderLinks",0); st1->m_spiderLinks = hr->getLong("spiderlinks",st1->m_spiderLinks); // . should we force it into spiderdb even if already in there // . use to manually update spider times for a url // . however, will not remove old scheduled spider times // . mdw: made force on the default st1->m_forceRespider = hr->getLong("force",1); // 0); uint32_t h = iptop ( sock->m_ip ); int32_t now = getTimeGlobal(); // . allow 1 submit every 1 hour // . restrict by submitter domain ip if ( ! st1->m_isMasterAdmin && ! canSubmit ( h , now , cr->m_maxAddUrlsPerIpDomPerDay ) ) { // return error page SafeBuf sb; sb.safePrintf("You breached your add url quota."); mdelete ( st1 , sizeof(State1i) , "PageAddUrl" ); delete (st1); // use cachetime of 3600 so it does not re-inject if you hit // the back button! g_httpServer.sendDynamicPage (sock, sb.getBufStart(), sb.length(), 3600,//-1, // cachetime false,// post? "text/html", 200, // http status NULL, // cookie "UTF-8"); return true; } Msg7 *msg7 = &st1->m_msg7; // set this. InjectionRequest *ir = &msg7->m_injectionRequest; // default to zero memset ( ir , 0 , sizeof(InjectionRequest) ); // this will fill in GigablastRequest so all the parms we need are set //setInjectionRequestFromParms ( sock , hr , cr , ir ); int32_t collLen; const char *coll = hr->getString ( "c" , &collLen ,NULL ); if ( ! coll ) coll = g_conf.m_defaultColl; ir->m_collnum = g_collectiondb.getCollnum ( coll ); //save the URL st1->m_urlLen = strlen(url); memcpy(st1->m_url, url, st1->m_urlLen+1); ir->ptr_url = st1->m_url; // include \0 in size ir->size_url = strlen(ir->ptr_url)+1; // get back a short reply so we can show the status code easily ir->m_shortReply = true; ir->m_spiderLinks = st1->m_spiderLinks; // this is really an injection, not add url, so make // GigablastRequest::m_url point to Gigablast::m_urlsBuf because // the PAGE_ADDURLS2 parms in Parms.cpp fill in the m_urlsBuf. // HACK! //gr->m_url = gr->m_urlsBuf; //ir->ptr_url = gr->m_urlsBuf; // // inject using msg7 // // . pass in the cleaned url // . returns false if blocked, true otherwise if ( ! msg7->sendInjectionRequestToHost ( ir, st1 , doneInjectingWrapper3 ) ) { // there was an error log("http: error sending injection request: %s" ,mstrerror(g_errno)); // we did not block, but had an error return true; } //log("http: injection did not block"); // some kinda error, g_errno should be set i guess //doneInjectingWrapper3 ( st1 ); // we did not block //return true; // wait for the reply, this 'blocked' return false; } static void doneInjectingWrapper3 ( void *st ) { State1i *st1 = (State1i *)st; // get the state properly //State1i *st1 = (State1i *) state; // in order to see what sites are being added log it, then we can // more easily remove sites from sitesearch.gigablast.com that are // being added but not being searched //char *url = st1->m_msg7.m_xd.m_firstUrl.m_url; Msg7 *msg7 = &st1->m_msg7; InjectionRequest *ir = &msg7->m_injectionRequest; #ifdef _VALGRIND_ VALGRIND_CHECK_MEM_IS_DEFINED(ir,sizeof(*ir)); VALGRIND_CHECK_MEM_IS_DEFINED(ir->ptr_url,ir->size_url); VALGRIND_CHECK_MEM_IS_DEFINED(ir->ptr_contentDelim,ir->size_contentDelim); VALGRIND_CHECK_MEM_IS_DEFINED(ir->ptr_contentFile,ir->size_contentFile); VALGRIND_CHECK_MEM_IS_DEFINED(ir->ptr_contentTypeStr,ir->size_contentTypeStr); VALGRIND_CHECK_MEM_IS_DEFINED(ir->ptr_content,ir->size_content); VALGRIND_CHECK_MEM_IS_DEFINED(ir->ptr_metadata,ir->size_metadata); #endif char *url = ir->ptr_url; log(LOG_INFO,"http: add url %s (%s)",url ,mstrerror(g_errno)); // extract info from state TcpSocket *sock = st1->m_socket; //bool isAdmin = st1->m_isMasterAdmin; //char *url = NULL; //if ( st1->m_urlLen ) url = st1->m_url; // re-null it out if just http:// //bool printUrl = true; //if ( st1->m_urlLen == 0 ) printUrl = false; //if ( ! st1->m_url ) printUrl = false; //if(st1->m_urlLen==7&&st1->m_url&&!strncasecmp(st1->m_url,"http://",7) // printUrl = false; // page is not more than 32k StackBuf<1024*32+MAX_URL_LEN*2> sb; //char rawbuf[1024*8]; //SafeBuf rb(rawbuf, 1024*8); //rb.safePrintf("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"); //rb.safePrintf("<status>\n"); //CollectionRec *cr = g_collectiondb.getRec ( st1->m_coll ); // collection name const char *coll = st1->m_coll; //char tt [ 128 ]; //tt[0] = '\0'; //if ( st1->m_coll[0] != '\0' && ! isAdmin ) // sprintf ( tt , " for %s", st1->m_coll ); // // what we print here will just be the error msg, because the // ajax will fill the text we print here into the div below // the add url box // // if there was an error let them know //char msg[MAX_URL_LEN + 1024]; const char *pm = ""; if ( g_errno ) { sb.safePrintf("Error adding url(s): <b>%s[%i]</b>", mstrerror(g_errno) , g_errno); } else { if ( ! g_conf.m_addUrlEnabled ) { pm = "<font color=#ff0000>" "Sorry, this feature is temporarily disabled. " "Please try again later.</font>"; if ( url ) { char ipbuf[16]; log("addurls: failed for user at %s: " "add url is disabled. " "Enable add url on the " "Master Controls page and " "on the Spider Controls page for " "this collection.", iptoa(sock->m_ip,ipbuf)); } sb.safePrintf("%s",pm); //rb.safePrintf("Sorry, this feature is temporarily " // "disabled. Please try again later."); } // did they fail the turing test? else if ( ! st1->m_goodAnswer ) { pm = "<font color=#ff0000>" "Oops, you did not enter the 4 large letters " "you see below. Please try again.</font>"; //rb.safePrintf("could not add the url" // " because the turing test" // " is enabled."); sb.safePrintf("%s",pm); } else if ( msg7->m_replyIndexCode ) { //st1->m_msg7.m_xd.m_indexCodeValid && // st1->m_msg7.m_xd.m_indexCode ) { //int32_t ic = st1->m_msg7.m_xd.m_indexCode; sb.safePrintf("<b>Had error injecting url: %s</b>", mstrerror(msg7->m_replyIndexCode)); } /* if ( url && ! st1->m_ufu[0] && url[0] && printUrl ) { sprintf ( msg ,"<u>%s</u> added to spider " "queue " "successfully", url ); //rb.safePrintf("%s added to spider " // "queue successfully", url ); } else if ( st1->m_ufu[0] ) { sprintf ( msg ,"urls in <u>%s</u> " "added to spider queue " "successfully", st1->m_ufu ); //rb.safePrintf("urls in %s added to spider " // "queue successfully", url ); } */ else { //rb.safePrintf("Add the url you want:"); // avoid hitting browser page cache uint32_t rand32 = rand(); // in the mime to 0 seconds! sb.safePrintf("<b>Url successfully added. " "<a href=\"/search?rand=%" PRIu32"&" "c=%s&q=url%%3A", rand32, coll); urlEncode(&sb,url); sb.safePrintf("\">Check it</a>"// or " //"<a href=http://www.gigablast." //"com/seo?u="); //sb.urlEncode(url); //sb.safePrintf(">SEO it</a>" "." "</b>"); } } // store it sb.safePrintf("<b>%s</b>",pm ); // clear g_errno, if any, so our reply send goes through g_errno = 0; // nuke state mdelete ( st1 , sizeof(State1i) , "PageAddUrl" ); delete (st1); // this reply should be loaded from the ajax loader so use a cache // time of 1 hour so it does not re-inject the url if you hit the // back button g_httpServer.sendDynamicPage (sock, sb.getBufStart(), sb.length(), 3600, // cachetime false,// post? "text/html", 200, // http status NULL, // cookie "UTF-8"); } // we get like 100k submissions a day!!! static HashTable s_htable; static bool s_init = false; static int32_t s_lastTime = 0; static bool canSubmit ( uint32_t h , int32_t now , int32_t maxAddUrlsPerIpDomPerDay ) { // . sometimes no limit // . 0 means no limit because if they don't want any submission they // can just turn off add url and we want to avoid excess // troubleshooting for why a url can't be added if ( maxAddUrlsPerIpDomPerDay <= 0 ) return true; // init the table if ( ! s_init ) { s_htable.set ( 50000 ); s_init = true; } // clean out table every 24 hours if ( now - s_lastTime > 24*60*60 ) { s_lastTime = now; s_htable.clear(); } // . if table almost full clean out ALL slots // . TODO: just clean out oldest slots if (s_htable.getNumUsedSlots() > 47000 ) s_htable.clear (); // . how many times has this IP domain submitted? // . allow 10 times per day int32_t n = s_htable.getValue ( h ); // if over 24hr limit then bail if ( n >= maxAddUrlsPerIpDomPerDay ) return false; // otherwise, inc it n++; // add to table, will replace old values s_htable.addKey ( h , n ); return true; } void resetPageAddUrl ( ) { s_htable.reset(); } bool sendPageHelp ( TcpSocket *sock , HttpRequest *hr ) { SafeBuf sb; CollectionRec *cr = g_collectiondb.getRec ( hr ); printFrontPageShell ( &sb , "syntax" , cr , true ); sb.safePrintf("<br><br>\n"); sb.safePrintf("<br><br><br>\n"); const char *qc = "demo"; const char *host = ""; // for debug make it local on laptop sb.safePrintf( "<br>" "<table width=650px cellpadding=5 cellspacing=0 border=0>" "" // yellow/gold bar "<tr>" "<td colspan=2 bgcolor=#%s>" // f3c714>" "<b>" "Basic Query Syntax" "</b>" "</td>" "</tr>\n" "<tr bgcolor=#0340fd>" "" "<th><font color=33dcff>Example Query</font></th>" "<th><font color=33dcff>Description</font></th>" "</tr>" "<tr> " "<td><a href=\"%s/search?c=%s&q=cat dog\">cat dog</a></td>" " <td>Search results have the word <em>cat</em> and the word <em>dog</em> " " in them. They could also have <i>cats</i> and <i>dogs</i>.</td>" " </tr>" "" "" " <tr bgcolor=#E1FFFF> " " <td><a href=\"%s/search?c=%s&q=%%2Bcat\">+cat</a></td>" " <td>Search results have the word <em>cat</em> in them. If the search results has the word <i>cats</i> then it will not be included. The plus sign indicates an exact match and not to use synonyms, hypernyms or hyponyms or any other form of the word.</td>" " </tr>" "" "" " <tr> " " <td height=10><a href=\"%s/search?c=%s&q=mp3 %%22take five%%22\">mp3 \"take five\"</a></td>" " <td>Search results have the word <em>mp3</em> and the exact phrase <em>take " " five</em> in them.</td>" " </tr>" " <tr bgcolor=#E1FFFF> " " <td><a href=\"%s/search?c=%s&q=%%22john smith%%22 -%%22bob dole%%22\">john smith\" -\"bob dole\"</a></td>" " <td>Search results have the phrase <em>john smith</em> but NOT the " " phrase <em>bob dole</em> in them.</td>" " </tr>" " <tr> " " <td><a href=\"%s/search?c=%s&q=bmx -game\">bmx -game</a></td>" " <td>Search results have the word <em>bmx</em> but not <em>game</em>.</td>" " </tr>" // " <tr> " // " <td><a href=\"/search?q=inurl%%3Aedu title%%3Auniversity\"><b>inurl:</b></a><a href=\"/search?q=inurl%%3Aedu title%%3Auniversity\">edu <b>title:</b>university</a></td>" // " <td>Search results have <em>university</em> in their title and <em>edu</em> " // " in their url.</td>" // " </tr>" // " <tr bgcolor=#E1FFFF> " // " <td><a href=\"/search?q=site%%3Awww.ibm.com %%22big blue%%22\"><b>site:</b></a><a href=\"/search?q=site%%3Awww.ibm.com %%22big blue%%22\">www.ibm.com \"big blue\"</a></td>" // " <td>Search results are from the site <em>www.ibm.com</em> and have the phrase " // " <em>big blue</em> in them.</td>" // " </tr>" // " <tr> " // " <td><a href=\"/search?q=url%%3Awww.yahoo.com\"><b>url:</b></a><a href=\"/search?q=url%%3Awww.yahoo.com&n=10\">www.yahoo.com</a></td>" // " <td>Search result is the single URL www.yahoo.com, if it is indexed.</td>" // " </tr>" // " <tr bgcolor=#E1FFFF> " // " <td><nobr><a href=\"/search?q=title%%3A%%22the news%%22 -%%22weather report%%22\"><b>title:</b>\"the " // " news\" -\"weather report\"</a></nobr></td>" // " <td>Search results have the phrase <em>the news</em> in their title, " // " and do NOT have the phrase <em>weather report</em> anywhere in their " // " content.</td>" // " </tr>" // " <tr> " // " <td><a href=\"/search?q=ip%%3A216.32.120 cars\"><b>ip:</b></a><a href=\"/search?q=ip%%3A216.32.120\">216.32.120</a></td>" // " <td>Search results are from the the ip 216.32.120.*.</td>" // " </tr>" // "" // " <tr bgcolor=#E1FFFF> " // " <td><a href=\"/search?q=type%%3Apdf nutrition\"><b>type:</b>pdf nutrition</a></td>" // " <td>Search results are PDF (Portable Document Format) documents that " // " contain the word <em>nutrition</em>.</td>" // " </tr>" // " <tr> " // " <td><a href=\"/search?q=type%%3Adoc\"><b>type:</b>doc</a></td>" // " <td>Search results are Microsoft Word documents.</td>" // " </tr>" // " <tr bgcolor=#E1FFFF> " // " <td><a href=\"/search?q=type%%3Axls\"><b>type:</b>xls</a></td>" // " <td>Search results are Microsoft Excel documents.</td>" // " </tr>" // " <tr> " // " <td><a href=\"/search?q=type%%3Appt\"><b>type:</b>ppt</a></td>" // " <td>Search results are Microsoft Power Point documents.</td>" // " </tr>" // " <tr bgcolor=#E1FFFF> " // " <td><a href=\"/search?q=type%%3Aps\"><b>type:</b>ps</a></td>" // " <td>Search results are Postscript documents.</td>" // " </tr>" // " <tr> " // " <td><a href=\"/search?q=type%%3Atext\"><b>type:</b>text</a></td>" // " <td>Search results are plain text documents.</td>" // " </tr>" // " <tr bgcolor=#E1FFFF> " // " <td><a href=\"/search?q=filetype%%3Apdf\"><b>filetype:</b>pdf</a></td>" // " <td>Search results are PDF documents.</td>" // " </tr>" // "" // "" // " <tr bgcolor=#E1FFFF> " // " <td><a href=\"/search?q=link%%3Awww.yahoo.com\"><b>link:</b>www.yahoo.com</a></td>" // " <td>All the pages that link to www.yahoo.com.</td>" // " </tr>" // "" // " <tr bgcolor=#E1FFFF> " // " <td><a href=\"/search?q=sitelink%%3Awww.yahoo.com\"><b>sitelink:</b>www.yahoo.com</a></td>" // " <td>All the pages that link to any page on www.yahoo.com.</td>" // " </tr>" // "" // " <tr bgcolor=#E1FFFF> " // " <td><a href=\"/search?q=ext%%3Atxt\"><b>ext:</b>txt</a></td>" // " <td>All the pages whose url ends in the .txt extension.</td>" // " </tr>" // "" // "" , GOLD , host , qc , host , qc , host , qc , host , qc , host , qc ); sb.safePrintf( // spacer //"<tr><td><br></td><td></td></tr>" //"<tr bgcolor=#0340fd>" // "<td><font color=33dcff><b>Special Query</b>" // "</font></td>" //"<td><font color=33dcff><b>Description</b></font></td>" // "</tr>" "<tr bgcolor=#E1FFFF>" "<td><a href=%s/search?c=%s&q=cat|dog>cat | dog</a>" "</td><td>" "Match documents that have cat and dog in them, but " "do not allow cat to affect the ranking score, only " "dog. This is called a <i>query refinement</i>." "</td></tr>\n" "<tr bgcolor=#ffFFFF>" "<td><a href=%s/search?c=%s&q=document.title:paper>" "document.title:paper</a></td><td>" "That query will match a JSON document like " "<i>" "{ \"document\":{\"title\":\"This is a good paper.\" " "}}</i> or, alternatively, an XML document like <i>" , host , qc , host , qc ); sb.htmlEncode("<document><title>This is a good paper" "</title></document>" ); sb.safePrintf("</i></td></tr>\n"); const char *bg1 = "#E1FFFF"; const char *bg2 = "#ffffff"; const char *bgcolor = bg1; // table of the query keywords int32_t n = getNumFieldCodes(); for ( int32_t i = 0 ; i < n ; i++ ) { // get field #i const QueryField *f = &g_fields[i]; if ( g_fields[i].m_flag & QTF_HIDE ) continue; // new table? if ( g_fields[i].m_flag & QTF_BEGINNEWTABLE ) { sb.safePrintf("</table>" "<br>" "<br>" "<br>" "<table width=650px " "cellpadding=5 cellspacing=0 border=0>" // yellow/gold bar "<tr>" "<td colspan=2 bgcolor=#%s>"//f3c714>" "<b>" "%s" "</b>" "</td>" "</tr>\n" "<tr bgcolor=#0340fd>" "<th><font color=33dcff>" "Example Query</font></th>" "<th><font color=33dcff>" "Description</font></th>" "</tr>\n" , GOLD , g_fields[i].m_title ); } // print it out const char *d = f->desc; // fix table internal cell bordering if ( ! d || d[0] == '\0' ) d = " "; sb.safePrintf("<tr bgcolor=%s>" "<td><nobr><a href=\"%s/search?c=%s&q=" , bgcolor , host , qc ); urlEncode(&sb,f->example); sb.safePrintf("\">"); sb.safePrintf("%s</a></nobr></td>" "<td>%s</td></tr>\n", f->example, d); if ( bgcolor == bg1 ) bgcolor = bg2; else bgcolor = bg1; } sb.safePrintf( // " <tr> " // " <td style=padding-bottom:12px;> </td>" // " <td style=padding-bottom:12px;> </td>" // " </tr>" // "" "</table>" "<br><br><br>" "<table width=650px " "cellpadding=5 cellspacing=0 border=0>" // yellow/gold bar "<tr>" "<td colspan=2 bgcolor=#%s>" // f3c714>" "<b>" "Boolean Queries" "</b>" "</td>" "</tr>\n" "<tr bgcolor=#0340fd>" "" " <th><font color=33dcff>Example Query</font></th>" " <th><font color=33dcff>Description</font></th>" "" " </tr>" "" " <tr> " " <td colspan=2 bgcolor=#FFFFCC><center>" " Note: boolean operators must be in UPPER CASE. " " </td>" " </tr>" " <tr> " " <td><a href=%s/search?c=%s&q=cat+AND+dog>cat AND dog</a></td>" " <td>Search results have the word <em>cat</em> AND the word <em>dog</em> " " in them.</td>" " </tr>" " <tr bgcolor=#E1FFFF> " " <td><a href=%s/search?c=%s&q=cat+OR+dog>cat OR dog</a></td>" " <td>Search results have the word <em>cat</em> OR the word <em>dog</em> " " in them, but preference is given to results that have both words.</td>" " </tr>" " <tr> " " <td><a href=%s/search?c=%s&q=cat+dog+OR+pig>cat dog OR pig</a></td>" " <td>Search results have the two words <em>cat</em> and <em>dog</em> " " OR search results have the word <em>pig</em>, but preference is " " given to results that have all three words. This illustrates how " " the individual words of one operand are all required for that operand " " to be true.</td>" " </tr>" " <tr bgcolor=#E1FFFF> " " <td><a href=%s/search?c=%s&q=%%22cat+dog%%22+OR+pig>\"cat dog\" OR pig</a></td>" " <td>Search results have the phrase <em>\"cat dog\"</em> in them OR they " " have the word <em>pig</em>, but preference is given to results that " " have both.</td>" " </tr>" " <tr> " " <td><a href=%s/search?c=%s&q=title%%3A%%22cat+dog%%22+OR+pig>title</a><a href=%s/search?c=%s&q=title%%3A%%22cat+dog%%22+OR+pig>:\"cat " " dog\" OR pig</a></td>" " <td>Search results have the phrase <em>\"cat dog\"</em> in their title " " OR they have the word <em>pig</em>, but preference is given to results " " that have both.</td>" " </tr>" " <tr bgcolor=#E1FFFF> " " <td><a href=%s/search?c=%s&q=cat+OR+dog+OR+pig>cat OR dog OR pig</a></td>" " <td>Search results need only have one word, <em>cat</em> or <em>dog</em> " " or <em>pig</em>, but preference is given to results that have the " " most of the words.</td>" " </tr>" " <tr> " " <td><a href=%s/search?c=%s&q=cat+OR+dog+AND+pig>cat OR dog AND pig</a></td>" " <td>Search results have <em>dog</em> and <em>pig</em>, but they may " " or may not have <em>cat</em>. Preference is given to results that " " have all three. To evaluate expressions with more than two operands, " " as in this case where we have three, you can divide the expression " " up into sub-expressions that consist of only one operator each. " " In this case we would have the following two sub-expressions: <em>cat " " OR dog</em> and <em>dog AND pig</em>. Then, for the original expression " " to be true, at least one of the sub-expressions that have an OR " " operator must be true, and, in addition, all of the sub-expressions " " that have AND operators must be true. Using this logic you can evaluate " " expressions with more than one boolean operator.</td>" " </tr>" " <tr bgcolor=#E1FFFF> " " <td><a href=%s/search?c=%s&q=cat+AND+NOT+dog>cat AND NOT dog</a></td>" " <td>Search results have <em>cat</em> but do not have <em>dog</em>.</td>" " </tr>" " <tr> " " <td><a href=%s/search?c=%s&q=cat+AND+NOT+%%28dog+OR+pig%%29>cat AND NOT (dog OR pig)</a></td>" " <td>Search results have <em>cat</em> but do not have <em>dog</em> " " and do not have <em>pig</em>. When evaluating a boolean expression " " that contains ()'s you can evaluate the sub-expression in the ()'s " " first. So if a document has <em>dog</em> or it has <em>pig</em> " " or it has both, then the expression, <em>(dog OR pig)</em> would " " be true. So you could, in this case, substitute <em>true</em> for " " that expression to get the following: <em>cat AND NOT (true) = cat " " AND false = false</em>. Does anyone actually read this far?</td>" " </tr>" " <tr bgcolor=#E1FFFF> " " <td><a href=%s/search?c=%s&q=%%28cat+OR+dog%%29+AND+NOT+%%28cat+AND+dog%%29>(cat OR dog) AND NOT (cat AND dog)</a></td>" " <td>Search results have <em>cat</em> or <em>dog</em> but not both.</td>" " </tr>" " <tr> " " <td>left-operand OPERATOR right-operand</td>" " <td>This is the general format of a boolean expression. The possible " " operators are: OR and AND. The operands can themselves be boolean " " expressions and can be optionally enclosed in parentheses. A NOT " " operator can optionally preceed the left or the right operand.</td>" " </tr>" "" //" </table>" "" "" "" //"</td></tr>" //"</table>" //"<br>" , GOLD , host , qc , host , qc , host , qc , host , qc , host , qc , host , qc , host , qc , host , qc , host , qc , host , qc , host , qc ); //sb.safePrintf("<tr><td></td><td></td></tr>\n"); //sb.safePrintf("<tr><td></td><td></td></tr>\n"); //sb.safePrintf("<tr><td></td><td></td></tr>\n"); //sb.safePrintf("<tr><td></td><td></td></tr>\n"); sb.safePrintf("</table>"); //sb.safePrintf("</form>\n"); sb.safePrintf("<br>\n"); sb.safePrintf("\n"); sb.safePrintf("<br><br>\n"); printNav ( sb , hr ); g_httpServer.sendDynamicPage (sock, sb.getBufStart(), sb.length(), 3600, // cachetime false,// post? "text/html", 200, // http status NULL, // cookie "UTF-8"); return true; }