hack about 35%ish done

This commit is contained in:
mwells
2014-04-08 19:34:43 -07:00
parent 61b4ec4ca6
commit 9e1199f113
7 changed files with 671 additions and 56 deletions

@ -28,10 +28,11 @@
#include "TcpSocket.h"
// values for HttpRequest::m_replyFormat
#define FORMAT_HTML 0
#define FORMAT_XML 1
#define FORMAT_JSON 2
#define FORMAT_CSV 3
#define FORMAT_HTML 0
#define FORMAT_XML 1
#define FORMAT_JSON 2
#define FORMAT_CSV 3
#define FORMAT_WIDGET 4
class HttpRequest {

@ -189,8 +189,8 @@ bool sendReply ( State0 *st , char *reply ) {
return true;
}
bool printCSSHead ( SafeBuf *sb ) {
return sb->safePrintf(
bool printCSSHead ( SafeBuf *sb , char format ) {
sb->safePrintf(
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML "
"4.01 Transitional//EN\">\n"
//"<meta http-equiv=\"Content-Type\" "
@ -201,7 +201,13 @@ bool printCSSHead ( SafeBuf *sb ) {
"<style><!--"
"body {"
"font-family:Arial, Helvetica, sans-serif;"
"color: #000000;"
);
if ( format == FORMAT_WIDGET )
sb->safePrintf("background-color:000000;");
sb->safePrintf( "color: #000000;"
"font-size: 12px;"
//"margin: 20px 5px;"
"}"
@ -225,6 +231,7 @@ bool printCSSHead ( SafeBuf *sb ) {
"</style>\n"
"</head>\n"
);
return true;
}
// . returns false if blocked, true otherwise
@ -257,7 +264,7 @@ bool sendPageResults ( TcpSocket *s , HttpRequest *hr ) {
g_conf.m_isMattWells ) {
SafeBuf sb;
printCSSHead ( &sb );
printCSSHead ( &sb ,format );
sb.safePrintf(
"<body "
"onLoad=\""
@ -396,8 +403,11 @@ bool sendPageResults ( TcpSocket *s , HttpRequest *hr ) {
"<br/>"
"<center>"
"<font color=gray>"
"Copyright &copy; 2013. All Rights Reserved.<br/>"
"Powered by the <a href='https://www.gigablast.com/'>GigaBlast</a> open source search engine."
"Copyright &copy; 2014. "
"All Rights Reserved.<br/>"
"Powered by the "
"<a href='http://www.gigablast.com/'>"
"GigaBlast</a> open source search engine."
"</font>"
"</center>\n"
@ -470,7 +480,7 @@ bool sendPageResults ( TcpSocket *s , HttpRequest *hr ) {
&st->m_hr,
&st->m_q ) ) {
log("query: set search input: %s",mstrerror(g_errno));
g_errno = EBADENGINEER;
if ( ! g_errno ) g_errno = EBADENGINEER;
return sendReply ( st, NULL );
}
@ -959,13 +969,15 @@ bool printSearchResultsHeader ( State0 *st ) {
// . if not matt wells we do not do ajax
// . the ajax is just there to prevent bots from slamming me
// with queries.
if ( ! g_conf.m_isMattWells && si->m_format == FORMAT_HTML ) {
printCSSHead ( sb );
if ( ! g_conf.m_isMattWells &&
(si->m_format == FORMAT_HTML ||si->m_format==FORMAT_WIDGET)) {
printCSSHead ( sb ,si->m_format );
sb->safePrintf("<body>");
printLogoAndSearchBox ( sb , &st->m_hr , -1 ); // catId = -1
}
if ( ! g_conf.m_isMattWells && si->m_format == FORMAT_HTML ) {
printLogoAndSearchBox ( sb , &st->m_hr , -1 ); // catId = -1
}
// xml
if ( si->m_format == FORMAT_XML )
@ -980,9 +992,11 @@ bool printSearchResultsHeader ( State0 *st ) {
long long globalNowMS = localToGlobalTimeMilliseconds(nowMS);
sb->safePrintf("\t<currentTimeUTC>%lu</currentTimeUTC>\n",
(long)(globalNowMS/1000));
} else if ( st->m_header && si->m_format == FORMAT_JSON ) {
}
else if ( st->m_header && si->m_format == FORMAT_JSON ) {
long long globalNowMS = localToGlobalTimeMilliseconds(nowMS);
sb->safePrintf("\"currentTimeUTC\":%lu,\n", (long)(globalNowMS/1000));
sb->safePrintf("\"currentTimeUTC\":%lu,\n",
(long)(globalNowMS/1000));
}
// show response time if not doing Quality Assurance
@ -1223,7 +1237,8 @@ bool printSearchResultsHeader ( State0 *st ) {
long collLen = gbstrlen(coll);
// otherwise, we had no error
if ( numResults == 0 && si->m_format == FORMAT_HTML ) {
if ( numResults == 0 &&
(si->m_format == FORMAT_HTML || si->m_format==FORMAT_WIDGET) ) {
sb->safePrintf ( "No results found in <b>%s</b> collection.",
cr->m_coll);
}
@ -1444,7 +1459,7 @@ bool printSearchResultsHeader ( State0 *st ) {
if ( firstIgnored ) {
if ( si->m_format == FORMAT_XML )
sb->safePrintf ("\t<ignoredWords><![CDATA[");
else
else if ( si->m_format == FORMAT_HTML )
sb->safePrintf (" &nbsp; <font "
"color=\"#707070\">The "
"following query words "
@ -1463,7 +1478,7 @@ bool printSearchResultsHeader ( State0 *st ) {
sb->incrementLength(-1);
if ( si->m_format == FORMAT_XML )
sb->safePrintf("]]></ignoredWords>\n");
else
else if ( si->m_format == FORMAT_HTML )
sb->safePrintf ("</b>. Preceed each with a '+' or "
"wrap in "
"quotes to not ignore.</font>");
@ -1478,7 +1493,7 @@ bool printSearchResultsHeader ( State0 *st ) {
SafeBuf *gbuf = &msg40->m_gigabitBuf;
long numGigabits = gbuf->length()/sizeof(Gigabit);
if ( si->m_format == FORMAT_XML ) numGigabits = 0;
if ( si->m_format != FORMAT_HTML ) numGigabits = 0;
// print gigabits
Gigabit *gigabits = (Gigabit *)gbuf->getBufStart();
@ -1615,7 +1630,7 @@ bool printSearchResultsTail ( State0 *st ) {
long firstNum = msg40->getFirstResultNum() ;
// end the two-pane table
if ( si->m_format == FORMAT_HTML ) sb->safePrintf("</td></tr></table>");
if ( si->m_format == FORMAT_HTML) sb->safePrintf("</td></tr></table>");
// for storing a list of all of the sites we displayed, now we print a
// link at the bottom of the page to ban all of the sites displayed
@ -1647,12 +1662,16 @@ bool printSearchResultsTail ( State0 *st ) {
args.safePrintf("&sb=0");
// collection
args.safePrintf("&c=%s",coll);
// formatting info
if ( si->m_format == FORMAT_WIDGET )
args.safePrintf("&format=widget");
// carry over the sites we are restricting the search results to
if ( si->m_whiteListBuf.length() )
args.safePrintf("&sites=%s",si->m_whiteListBuf.getBufStart());
if ( firstNum > 0 && si->m_format == FORMAT_HTML ) {
if ( firstNum > 0 &&
(si->m_format == FORMAT_HTML || si->m_format==FORMAT_WIDGET)) {
long ss = firstNum - msg40->getDocsWanted();
sb->safePrintf("<a href=\"/search?s=%li&q=",ss);
// our current query parameters
@ -1667,7 +1686,8 @@ bool printSearchResultsTail ( State0 *st ) {
}
// now print "Next X Results"
if ( msg40->moreResultsFollow() && si->m_format == FORMAT_HTML ) {
if ( msg40->moreResultsFollow() &&
(si->m_format == FORMAT_HTML || si->m_format==FORMAT_WIDGET)) {
long ss = firstNum + msg40->getDocsWanted();
// print a separator first if we had a prev results before us
if ( sb->length() > remember ) sb->safePrintf ( " &nbsp; " );
@ -1759,7 +1779,7 @@ bool printSearchResultsTail ( State0 *st ) {
sb->safePrintf ( "<br>"
"<center>"
"<font color=gray>"
"Copyright &copy; 2013. All Rights "
"Copyright &copy; 2014. All Rights "
"Reserved.<br/>"
"Powered by the <a href='https://www."
"gigablast.com/'>GigaBlast</a> open source "
@ -1772,6 +1792,27 @@ bool printSearchResultsTail ( State0 *st ) {
);
}
if ( si->m_format == FORMAT_WIDGET ) {
sb->safePrintf ( "<br>"
"<center>"
"<font color=gray>"
// link to edit the list of widget sites
// or various other widget content properties
// because we can't edit the width/height
// of the widget like this.
"<a href=/widget?inlineedit=1>edit</a> "
"&bull; "
//"Copyright &copy; 2014. All Rights "
//"Reserved.<br/>"
"Powered by <a href=http://www.diffbot.com/>"
"Diffbot</a>."
"</font>"
"</center>\n"
"</body>\n"
"</html>\n"
);
}
if ( sb->length() == 0 && si && si->m_format == FORMAT_JSON )
sb->safePrintf("[]\n");
@ -2053,6 +2094,8 @@ bool printResult ( State0 *st, long ix ) {
SafeBuf *sb = &st->m_sb;
HttpRequest *hr = &st->m_hr;
CollectionRec *cr = NULL;
cr = g_collectiondb.getRec ( st->m_collnum );
if ( ! cr ) {
@ -2247,6 +2290,8 @@ bool printResult ( State0 *st, long ix ) {
}
*/
char *diffbotSuffix = strstr(url,"-diffbotxyz");
// print youtube and metacafe thumbnails here
// http://www.youtube.com/watch?v=auQbi_fkdGE
// http://img.youtube.com/vi/auQbi_fkdGE/2.jpg
@ -2254,6 +2299,69 @@ bool printResult ( State0 *st, long ix ) {
if ( mr->ptr_imgUrl && si->m_format == FORMAT_HTML )
sb->safePrintf ("<a href=%s><image src=%s></a>",
url,mr->ptr_imgUrl);
// print image for widget
if ( mr->ptr_imgUrl && si->m_format == FORMAT_WIDGET ) {
long widgetwidth = hr->getLong("widgetwidth",200);
// make a div around this for widget so we can print text
// on top
sb->safePrintf("<div "
"style=\""
"width:%lipx;"
"min-height:140px;"
"padding:8px;"
"height:140px;"
"display:table-cell;"
"vertical-align:bottom;"
"background-repeat:no-repeat;"
"background-size:%lipx 140px;"
"background-image:url('%s');"
"\""
">"
, widgetwidth - 2*15 // padding is 15px
, widgetwidth - 2*15 // padding is 15px
, mr->ptr_imgUrl);
sb->safePrintf ( "<a "
"target=_blank "
"style=text-decoration:none; href=" );
// truncate off -diffbotxyz%li
long newLen = urlLen;
if ( diffbotSuffix ) newLen = diffbotSuffix - url;
// print the url in the href tag
sb->safeMemcpy ( url , newLen );
// then finish the a href tag and start a bold for title
sb->safePrintf ( ">");//<font size=+0>" );
sb->safePrintf("<b style=\""
"text-decoration:none;"
"font-size: 20px;"
"font-weight:bold;"
"background-color:rgba(0,0,0,.5);"
"color:white;"
"font-family:arial;"
//"text-shadow:2px 4px 3px rgba(0,0,1,3);"
"text-shadow: 2px 2px 0 #000 "
",-2px -2px 0 #000 "
",-2px 2px 0 #000 "
", 2px -2px 0 #000 "
", 2px -2px 0 #000 "
", 0px -2px 0 #000 "
", 0px 2px 0 #000 "
", -2px 0px 0 #000 "
", 2px 0px 0 #000 "
";"
//"-2px 2px 0 #000 "
//"2px -2px 0 #000 "
//"-2px -2px 0 #000;"
"\">");
//sb->safePrintf ("<image width=50 height=50 src=%s></a>",
// mr->ptr_imgUrl);
// then title over image
}
// the a href tag
if ( si->m_format == FORMAT_HTML ) sb->safePrintf ( "\n\n" );
@ -2261,6 +2369,7 @@ bool printResult ( State0 *st, long ix ) {
if ( mr->m_isBanned && si->m_format == FORMAT_HTML )
sb->safePrintf("<font color=red><b>BANNED</b></font> ");
///////
//
// PRINT THE TITLE
@ -2271,13 +2380,28 @@ bool printResult ( State0 *st, long ix ) {
// the a href tag
if ( si->m_format == FORMAT_HTML ) {
sb->safePrintf ( "<a href=" );
// truncate off -diffbotxyz%li
long newLen = urlLen;
if ( diffbotSuffix ) newLen = diffbotSuffix - url;
// print the url in the href tag
sb->safeMemcpy ( url , urlLen );
sb->safeMemcpy ( url , newLen );
// then finish the a href tag and start a bold for title
sb->safePrintf ( ">");//<font size=+0>" );
}
// only do link here
if (si->m_format == FORMAT_WIDGET && ! mr->ptr_imgUrl ) {
sb->safePrintf ( "<a href=" );
// truncate off -diffbotxyz%li
long newLen = urlLen;
if ( diffbotSuffix ) newLen = diffbotSuffix - url;
// print the url in the href tag
sb->safeMemcpy ( url , newLen );
// then finish the a href tag and start a bold for title
sb->safePrintf ( ">");//<font size=+0>" );
}
// . then the title (should be NULL terminated)
// . the title can be NULL
// . highlight it first
@ -2375,6 +2499,12 @@ bool printResult ( State0 *st, long ix ) {
if ( si->m_format == FORMAT_HTML ) sb->safePrintf ("</a><br>\n" ) ;
// close the image div
if ( si->m_format == FORMAT_WIDGET )
sb->safePrintf("</b></a></div>\n");
/////
//
// print content type after title
@ -2390,7 +2520,7 @@ bool printResult ( State0 *st, long ix ) {
"]]>"
"</contentType>\n",
cs);
else {
else if ( si->m_format == FORMAT_HTML ) {
sb->safePrintf(" <b><font style=color:white;"
"background-color:maroon;>");
char *p = cs;
@ -2599,7 +2729,7 @@ bool printResult ( State0 *st, long ix ) {
if ( isAdmin ) {
if ( isAdmin && si->m_format == FORMAT_HTML ) {
long lang = mr->m_language;
if ( lang ) sb->safePrintf(" - %s",getLanguageString(lang));
uint16_t cc = mr->m_computedCountry;
@ -2617,7 +2747,7 @@ bool printResult ( State0 *st, long ix ) {
if ( mr->m_noArchive ) printCached = false;
if ( isAdmin ) printCached = true;
if ( mr->m_contentLen <= 0 ) printCached = false;
if ( si->m_format == FORMAT_XML ) printCached = false;
if ( si->m_format != FORMAT_HTML ) printCached = false;
// get collnum result is from
//collnum_t collnum = si->m_cr->m_collnum;
@ -2954,7 +3084,7 @@ bool printResult ( State0 *st, long ix ) {
*/
if ( si->m_format == FORMAT_HTML )
if ( si->m_format == FORMAT_HTML || si->m_format == FORMAT_WIDGET )
sb->safePrintf ( "<br><br>\n");
@ -5394,3 +5524,432 @@ bool printJsonItemInCSV ( char *json , SafeBuf *sb , State0 *st ) {
return true;
}
bool printWidgetPage ( SafeBuf *sb , HttpRequest *hr ) {
//
// begin print controls
//
sb->safePrintf("<html>"
"<body bgcolor=#e8e8e8>"
"<title>Widget Creator</title>"
);
char *coll = "GLOBAL-INDEX";
CollectionRec *cr = g_collectiondb.getRec(coll);
if ( ! cr ) {
sb->safePrintf("Error. collection %s does not exist",
coll);
return true;
}
// if admin clicks "edit" in the live widget itself put up
// some simpler content editing boxes. token required!
long edit = hr->getLong("inlineedit",0);
if ( edit ) {
// get widget sites
char *sites = cr->m_siteListBuf.getBufStart();
sb->safePrintf("<textarea>"
"%s"
"</textarea>"
, sites);
sb->safePrintf("<br>"
"<input type=text name=token>"
"<br>"
"<input type=submit name=submit value=ok>"
);
return true;
}
sb->safePrintf("<script>\n");
// onclick of a checkbox toggle it here since we reload after
sb->safePrintf("function toggleBool ( control , id ) {\n"
"if(document.forms[0].elements[id].value == 1 ) {\n"
"document.forms[0].elements[id].value = 0;\n"
"} else {\n"
"document.forms[0].elements[id].value = 1;\n"
"}\n"
"}\n"
);
// construct url based on input parms
sb->safePrintf("function getFormParms ( ) {\n"
"var i;\n"
"var url = '';\n"
"for(i=0; i<document.myform.elements.length; i++){\n"
"var elm = document.myform.elements[i];\n"
// skip submit button and nameless checkboxes
"if ( elm.name == '' ) {\n"
//"alert(document.myform.elements[i].value)\n"
"continue;\n"
"}\n"
// until we had def=%li to each input parm assume
// default is 0. i guess if it has no def= attribute
// assume default is 0
//"if ( elm.value == '0' ) {\n"
//"continue;\n"
//"}\n"
"if ( elm.value == '' ) {\n"
"continue;\n"
"}\n"
"url = "
"url + "
"elm.name + \"=\" + "
"elm.value + \"&\" ;\n"
"}\n"
"return url;\n"
"}\n"
);
sb->safePrintf("function reload() {\n"
"var url='/widget?' + getFormParms();\n"
"window.location.href=url;\n"
"}\n"
);
sb->safePrintf("</script>\n");
char *c1 = "";
char *c2 = "";
char *c3 = "";
long x1 = hr->getLong("dates" ,1);
long x2 = hr->getLong("summaries",1);
long x3 = hr->getLong("border" ,1);
if ( x1 ) c1 = " checked";
if ( x2 ) c2 = " checked";
if ( x3 ) c3 = " checked";
long width = hr->getLong("width",100);
long height = hr->getLong("height",300);
long refresh = hr->getLong("refresh",300);
char *def = "<style>html {font-size:12px;font-family:arial;background-color:transparent;color:black;}span.dayheader { font-size:14px;font-weight:bold;}span.title { font-size:16px;font-weight:bold;}span.countdown { font-size:12px;color:red;}span.summary { font-size:12px;}span.address { font-size:12px;color:purple;}span.times { font-size:12px;color:green;}span.dates { font-size:12px;}span.prevnext { font-size:12px;font-weight:bold;}</style>";//<h2>News</h2>";
long len1,len2,len3,len4;
char *header = hr->getString("header",&len1,def);
char *sites = hr->getString("sites",&len2,"");
char *token = hr->getString("token",&len3,"");
char *query =hr->getString("query",&len4,"type:article gbsortby:date");
sb->safePrintf("<form method=GET action=/widget>"
"<input type=hidden name=c value=\"%s\">"
"<input type=hidden name=format value=\"widget\">"
, cr->m_coll
);
sb->safePrintf(
"<div style=\""
"margin-left:5px;"
"padding:15px;"
"width:600px;"
"height:600px;"
"font-family:Arial;"
"border-radius:10px;"
"line-height:30px;"
"background-color:lightgray;"
"text-align:right;"
"\""
">"
"<table cellpadding=0>"
"<tr>"
"<td "
"style=padding:15px;background-color:lightblue;"
//"text-align:right;"
"bottom-margin:5px; "
"colspan=10>"
"<b style=font-size:22px;><font style=font-size:27px;>"
"W</font>"
"idget <font style=font-size:27px;>C</font>reator</b>"
"</td>"
"</tr>"
"<tr>"
"<td style=text-align:right;line-height:30px;>"
"Websites to crawl:"
"<br>"
"<textarea rows=10 name=sites style=width:100%%;>"
"%s"
"</textarea>"
"<br>"
"Token:"
"<br>"
"<textarea name=token style=width:100%%;>"
"%s"
"</textarea>"
"<br>"
"Query:"
"<br>"
"<textarea name=query style=width:100%%;>"
"%s"
"</textarea>"
"<br>"
"Show Dates "
"<input type=checkbox "
"onclick=\"toggleBool(this,'dates');reload();\" "
"name=dates%s>"
"<br>"
"Show Summaries "
"<input type=checkbox "
"onclick=\"toggleBool(this,'summaries');reload();\" "
"name=summaries%s>"
"<br>"
"Frame border "
"<input type=checkbox "
"onclick=\"toggleBool(this,'border');reload();\" "
"name=border%s>"
"<br>"
"Width "
"<input size=4 type=text value=%li "
"name=width>"
"<br>"
"Height "
"<input size=4 type=text value=%li "
"name=height>"
"<br>"
"<nobr>Refresh in seconds "
"<input size=4 type=text value=%li "
"name=refresh></nobr>"
"<br>"
"<nobr>Custom widget header:</nobr>"
"<br>"
"<textarea rows=5 name=header style=width:100%%;>"
"%s"
"</textarea>"
"<br>"
"<input type=submit name=submit value=ok>"
"</div>"
"</td>"
, sites
, token
, query
, c1
, c2
, c3
, width
, height
, refresh
, header
);
//
// end print controls
//
//
// begin print widget
//
sb->safePrintf ( "<td>"
"<div style=\""
"width:30px;"//%lipx;"
//"position:absolute;"
//"top:300px;"
//"right:0;"
//"left:0;"
//"bottom:0;"
"\">"
"<div style=line-height:13px;><br></div>"
//"<br>"
//, RESULTSWIDTHSTR
//,width
);
//printTabs ( sb , st );
//printRedBoxes ( sb , st );
#define SHADOWCOLOR "#000000"
sb->safePrintf (
// end widget div
"</div>"
// end widget column in table
"</td>"
"<td>"
// begin div with source in it
/*
"<div "
//"class=grad3 "
"style=\""
"border-radius:10px;"
"box-shadow: 6px 6px 3px %s;"
"border:2px solid black;"
"padding:15px;"
"width:600px;"
//"background-image:url('/ss.jpg');"
//"background-repeat:repeat;"
//"background-attachment:fixed;"
"background-color:lightgray;"
"\">"
, SHADOWCOLOR
//"<br>"
*/
);
// space widget to the right using this table
sb->safePrintf(
//class=grad3 "
//"style=\""
//"border:2px solid black;"
//"padding-bottom:10px;"
//"padding-top:10px;"
//"padding-left:10px;"
//"\""
//">"
"</td>"
"<td valign=top>"
"<img src=/gears32.png width=64 height=64>"
"<br><br>"
);
long start = sb->length();
// this iframe contains the WIDGET
sb->safePrintf (
/*
"<div "
"id=scrollerxyz "
"style=\""
//"width:%lipx;" // 200;"
//"height:%lipx;" // 400;"
//"overflow:hidden;"
"padding:0px;"
"margin:0px;"
"background-color:white;"
//"padding-left:7px;"
"%s"
//"background-color:%s;"//lightblue;"
//"foreground-color:%s;"
//"overflow:scroll;"
//"overflow-scrolling:touch;"
"\">"
*/
"<iframe width=\"%lipx\" height=\"%lipx\" "
//"scrolling=yes "
/*
"style=\"background-color:white;"
"padding-right:0px;"
//"%s\" "
"scrolling=no "
"frameborder=no "
//"src=\"http://neo.diffbot.com:8000/search?"
*/
"src=\""
"http://127.0.0.1:8000/search?"
"format=widget&"
"widgetwidth=%li&widgetheight=%li&"
"c=GLOBAL-INDEX&"
// show articles sorted by newest pubdate first
"q=type%%3Aarticle+gbsortbyint%%3Adate"
"\">"
, width
, height
, width
, height
);
sb->safePrintf ( // do not reset the user's "where" cookie
// to NYC from looking at this widget!
//"cookie=0&"
//"%s"
"Your browser does not support iframes"
"</iframe>\n"
//"</div>"
//, si->m_urlParms);
//, wp
);
long end = sb->length();
sb->reserve ( end - start + 1000 );
char *wdir = "on the left";
long cols = 32;
//if ( width <= 240 )
sb->safePrintf("</td><td>&nbsp;&nbsp;</td><td valign=top>");
//else {
// sb->safePrintf("</td></tr><tr><td><br><br>");
// wdir = "above";
// cols = 60;
// }
sb->safePrintf ( "\n\n"
//"<br><br><br>"
"<font style=\"font-size:16px;\">"
"Insert the following code into your website to "
"generate the widget %s. "
//"<br>"
//"<b><u>"
//"<a style=color:white href=/widget.html>"
//"Make $1 per click!</a></u></b>"
//"</font>"
"<br><br><b>" , wdir );
char *p = sb->getBufStart() + start;
sb->safePrintf("<textarea rows=30 cols=%li "
"style=\"border:2px solid black;\">", cols);
sb->htmlEncode ( p ,
end - start ,
false , // bool encodePoundSign
0 ); // niceness
sb->safePrintf("</textarea>");
sb->safePrintf("</b>");
// space widget to the right using this table
sb->safePrintf("</td></tr></table>");
sb->safePrintf("</div>");
sb->safePrintf("</form>");
sb->safePrintf("</body>");
sb->safePrintf("</html>");
return true;
}
bool sendPageWidget ( TcpSocket *s , HttpRequest *hr ) {
SafeBuf sb;
printWidgetPage ( &sb , hr );
return g_httpServer.sendDynamicPage(s,
sb.getBufStart(),
sb.length(),
-1,//cacheTime -1 means not tocache
false, // POST?
"text/html",
200, // httpstatus
NULL, // cookie
"UTF-8"); // charset
}

@ -61,15 +61,15 @@ static WebPage s_pages[] = {
// publicly accessible pages
{ PAGE_ROOT , "index.html" , 0 , "root" , 0 , 0 ,
//USER_PUBLIC | USER_MASTER | USER_ADMIN | USER_CLIENT,
"search page to query",
sendPageRoot , 0 } ,
{ PAGE_RESULTS , "search" , 0 , "search" , 0 , 0 ,
//USER_PUBLIC | USER_MASTER | USER_ADMIN | USER_CLIENT,
"results page",
sendPageResults, 0 },
{ PAGE_WIDGET , "widget" , 0 , "widget" , 0 , 0 ,
"widget page",
sendPageWidget, 0 },
{ PAGE_ADDURL , "addurl" , 0 , "add url" , 0 , 0 ,
//USER_PUBLIC | USER_MASTER | USER_ADMIN | USER_PROXY,
"Page where you can add url for spidering",
sendPageAddUrl, 0 },

@ -35,6 +35,7 @@ bool sendPageBasicStatus ( TcpSocket *s , HttpRequest *r );
bool sendPageRoot ( TcpSocket *s , HttpRequest *r );
bool sendPageRoot ( TcpSocket *s , HttpRequest *r, char *cookie );
bool sendPageResults ( TcpSocket *s , HttpRequest *r );
bool sendPageWidget ( TcpSocket *s , HttpRequest *r );
//bool sendPageEvents ( TcpSocket *s , HttpRequest *r );
bool sendPageAddUrl ( TcpSocket *s , HttpRequest *r );
bool sendPageGet ( TcpSocket *s , HttpRequest *r );
@ -291,6 +292,7 @@ enum {
// public pages
PAGE_ROOT ,
PAGE_RESULTS ,
PAGE_WIDGET,
PAGE_ADDURL , // 5
PAGE_GET ,
PAGE_LOGIN ,

@ -299,6 +299,12 @@ bool SearchInput::set ( TcpSocket *sock , HttpRequest *r , Query *q ) {
m_cr = cr;
if ( ! cr ) {
log("si: collection does not exist");
g_errno = ENOCOLLREC;
return false;
}
//m_coll2 = m_cr->m_coll;
//m_collLen2 = gbstrlen(m_coll2);
@ -1403,6 +1409,7 @@ char getFormatFromRequest ( HttpRequest *r ) {
if ( formatStr && strcmp(formatStr,"json") == 0 ) format = FORMAT_JSON;
if ( formatStr && strcmp(formatStr,"xml") == 0 ) format = FORMAT_XML;
if ( formatStr && strcmp(formatStr,"csv") == 0 ) format = FORMAT_CSV;
if ( formatStr && strcmp(formatStr,"widget")==0)format=FORMAT_WIDGET;
// support old api &xml=1 to mean &format=1
@ -1419,5 +1426,9 @@ char getFormatFromRequest ( HttpRequest *r ) {
format = FORMAT_CSV;
}
if ( r->getLong("widget",0) ) {
format = FORMAT_WIDGET;
}
return format;
}

@ -27535,12 +27535,12 @@ Msg20Reply *XmlDoc::getMsg20Reply ( ) {
}
// get thumbnail image url
if ( ! reply->ptr_imgUrl && m_req->m_getImageUrl ) {
if ( ! reply->ptr_imgUrl ) { // && m_req->m_getImageUrl ) {
char **iu = getImageUrl();
if ( ! iu || iu == (char **)-1 ) return (Msg20Reply *)iu;
reply-> ptr_imgUrl = *iu;
reply->size_imgUrl = 0;
if ( *iu ) reply->size_imgUrl = gbstrlen(*iu);
if ( *iu ) reply->size_imgUrl = gbstrlen(*iu)+1;
}
// . adids contained in the doc
@ -28142,6 +28142,43 @@ char **XmlDoc::getImageUrl() {
// assume none
m_imageUrl = NULL;
m_imageUrlValid = true;
// diffbot often extracts an image in the json. but even if pure
// json it might be diffbot json that was injected an we don't know
// it so check contentType...
if ( m_isDiffbotJSONObject || m_contentType == CT_JSON ) {
char *iu = strstr(ptr_utf8Content,"\"images\":[{");
if ( ! iu ) return &m_imageUrl;
// temp null
char *end = strstr(iu+11,"]");
if ( ! end ) return &m_imageUrl;
char c = *end;
*end = '\0';
// now use strstr to find the first image url
char *needle = "\"url\":\"";
char *find = strstr(iu,needle);
// return NULL if not found
if ( ! find ) {
// revert temp null
*end = c;
return &m_imageUrl;
}
// find end of it
char *start = find + 7;
char *urlEnd = strstr(start,"\"");
// revert temp null
*end = c;
// did not find quote ending the url! wtf?
if ( ! urlEnd ) return &m_imageUrl;
// too big?
long iulen = urlEnd - start;
if ( iulen >= MAX_URL_LEN-1 ) return &m_imageUrl;
// ok, we got it, just copy that
m_imageUrlBuf.safeMemcpy ( start , iulen );
m_imageUrlBuf.nullTerm();
m_imageUrl = m_imageUrlBuf.getBufStart();
return &m_imageUrl;
}
// all done if not youtube or meta cafe
char *host = f->getHost();
char found = 0;
@ -28159,21 +28196,22 @@ char **XmlDoc::getImageUrl() {
if ( ! s ) return &m_imageUrl;
// point to the id
s += 2;
m_imageUrl = m_imageUrlBuf;
char *p = m_imageUrlBuf;
memcpy ( p , "http://img.youtube.com/vi/" , 26 );
p += 26;
//m_imageUrl = m_imageUrlBuf;
//char *p = m_imageUrlBuf;
m_imageUrlBuf.safeStrcpy("http://img.youtube.com/vi/");
// do not break
char *pend = m_imageUrlBuf + 80;
//char *pend = m_imageUrlBuf + 80;
// copy the id/number
for ( ; is_digit(*s) && p < pend ; ) *p++ = *s++;
//for ( ; is_digit(*s) && p < pend ; ) *p++ = *s++;
for ( ; is_digit(*s) ; s++ )
m_imageUrlBuf.pushChar(*s);
// wrap it up
memcpy ( p , "/2.jpg\0" , 7 );
p += 7;
m_imageUrlBuf.safeStrcpy ( "/2.jpg" );
// size includes \0;
m_imageUrlSize = p - m_imageUrl ;
//m_imageUrlSize = p - m_imageUrl ;
// sanity check
if ( m_imageUrlSize > 100 ) { char *xx=NULL;*xx=0; }
//if ( m_imageUrlSize > 100 ) { char *xx=NULL;*xx=0; }
m_imageUrl = m_imageUrlBuf.getBufStart();
return &m_imageUrl;
}
// must be meta cafe now
@ -28188,17 +28226,20 @@ char **XmlDoc::getImageUrl() {
// skip ifnot good
if ( id <= 0 ) continue;
// make the url
m_imageUrl = m_imageUrlBuf;
char *p = m_imageUrlBuf;
memcpy ( p , "http://s2.mcstatic.com/thumb/" , 29 );
p += 29;
p += sprintf ( p , "%li" , id );
memcpy ( p , ".jpg\0" , 5 );
p += 5;
//m_imageUrl = m_imageUrlBuf;
//char *p = m_imageUrlBuf;
//memcpy ( p , "http://s2.mcstatic.com/thumb/" , 29 );
//p += 29;
//p += sprintf ( p , "%li" , id );
//memcpy ( p , ".jpg\0" , 5 );
//p += 5;
m_imageUrlBuf.safePrintf("http://s2.mcstatic."
"com/thumb/%li.jpg", id);
m_imageUrl = m_imageUrlBuf.getBufStart();
// size includes \0;
m_imageUrlSize = p - m_imageUrl ;
//m_imageUrlSize = p - m_imageUrl ;
// sanity check
if ( m_imageUrlSize > 100 ) { char *xx=NULL;*xx=0; }
//if ( m_imageUrlSize > 100 ) { char *xx=NULL;*xx=0; }
break;
}
return &m_imageUrl;

@ -1942,8 +1942,9 @@ class XmlDoc {
//long m_gsbufAllocSize;
char *m_note;
char *m_imageUrl;
char m_imageUrlBuf[100];
long m_imageUrlSize;
//char m_imageUrlBuf[100];
SafeBuf m_imageUrlBuf;
//long m_imageUrlSize;
MatchOffsets m_matchOffsets;
Query m_query;
Matches m_matches;