Merge branch 'diffbot' of github.com:gigablast/open-source-search-engine into diffbot
This commit is contained in:
20
PageGet.cpp
20
PageGet.cpp
@ -33,7 +33,7 @@ public:
|
||||
//TagRec m_tagRec;
|
||||
TcpSocket *m_socket;
|
||||
HttpRequest m_r;
|
||||
char m_coll[50];
|
||||
char m_coll[MAX_COLL_LEN+2];
|
||||
//CollectionRec *m_cr;
|
||||
bool m_isAdmin;
|
||||
bool m_isLocal;
|
||||
@ -136,7 +136,7 @@ bool sendPageGet ( TcpSocket *s , HttpRequest *r ) {
|
||||
uint8_t langId = getLangIdFromAbbr ( langAbbr );
|
||||
st->m_langId = langId;
|
||||
}
|
||||
strncpy ( st->m_coll , coll , 40 );
|
||||
strncpy ( st->m_coll , coll , MAX_COLL_LEN+1 );
|
||||
// store query for query highlighting
|
||||
st->m_netTestResults = r->getLong ("rnettest", false );
|
||||
if( st->m_netTestResults ) {
|
||||
@ -179,14 +179,22 @@ bool sendPageGet ( TcpSocket *s , HttpRequest *r ) {
|
||||
sreq.reset();
|
||||
strcpy(sreq.m_url, url );
|
||||
sreq.setDataSize();
|
||||
xd->set4 ( &sreq , NULL , coll , NULL , st->m_niceness );
|
||||
// this returns false if "coll" is invalid
|
||||
if ( ! xd->set4 ( &sreq , NULL , coll , NULL , st->m_niceness ) )
|
||||
goto hadSetError;
|
||||
}
|
||||
// . when getTitleRec() is called it will load the old one
|
||||
// since XmlDoc::m_setFromTitleRec will be true
|
||||
// . niceness is 0
|
||||
else {
|
||||
// use st->m_coll since XmlDoc just points to it!
|
||||
xd->set3 ( docId , st->m_coll , 0 );
|
||||
// . use st->m_coll since XmlDoc just points to it!
|
||||
// . this returns false if "coll" is invalid
|
||||
else if ( ! xd->set3 ( docId , st->m_coll , 0 ) ) {
|
||||
hadSetError:
|
||||
mdelete ( st , sizeof(State2) , "PageGet1" );
|
||||
delete ( st );
|
||||
g_errno = ENOMEM;
|
||||
log("PageGet: set3: %s", mstrerror(g_errno));
|
||||
return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));
|
||||
}
|
||||
// if it blocks while it loads title rec, it will re-call this routine
|
||||
xd->setCallback ( st , processLoopWrapper );
|
||||
|
53
XmlDoc.cpp
53
XmlDoc.cpp
@ -12916,34 +12916,43 @@ SafeBuf *XmlDoc::getTokenizedDiffbotReply ( ) {
|
||||
// in order for us to do the array separation logic below.
|
||||
// we don't want to do this logic for articles because they
|
||||
// contain an image array!!!
|
||||
char *needleA = "\"type\":\"product";
|
||||
char *needleB = "\"type\":\"image";
|
||||
char *productPtr = strstr ( text , needleA );
|
||||
char *imagePtr = strstr ( text , needleB );
|
||||
if ( ! productPtr && ! imagePtr ) {
|
||||
|
||||
// this must be on the FIRST level of the json object, otherwise
|
||||
// we get errors because we got type:article and it
|
||||
// contains an images array!
|
||||
|
||||
long valLen;
|
||||
char *val = getJSONFieldValue ( text , "type", &valLen );
|
||||
|
||||
bool isProduct = false;
|
||||
bool isImage = false;
|
||||
|
||||
if ( val && valLen == 7 && strncmp ( val , "product", 7) == 0 )
|
||||
isProduct = true;
|
||||
|
||||
if ( val && valLen == 5 && strncmp ( val , "image", 5) == 0 )
|
||||
isImage = true;
|
||||
|
||||
if ( ! isProduct && ! isImage ) {
|
||||
m_tokenizedDiffbotReplyValid = true;
|
||||
m_tokenizedDiffbotReplyPtr = &m_diffbotReply;
|
||||
return m_tokenizedDiffbotReplyPtr;
|
||||
}
|
||||
|
||||
|
||||
char *needle1 = ",\"products\":[";
|
||||
char *needle2 = ",\"images\":[";
|
||||
char *parray = strstr ( text , needle1 );
|
||||
char *pstart = NULL;
|
||||
char *newTerm = NULL;
|
||||
if ( parray ) {
|
||||
// point to [
|
||||
pstart = parray + 13 - 1;
|
||||
char *needle;
|
||||
char *newTerm;
|
||||
if ( isProduct ) {
|
||||
needle = ",\"products\":[";
|
||||
newTerm = "product";
|
||||
}
|
||||
else {
|
||||
parray = strstr ( text , needle2 );
|
||||
// point to [
|
||||
if ( parray ) pstart = parray + 11 - 1;
|
||||
needle = ",\"images\":[";
|
||||
newTerm = "image";
|
||||
}
|
||||
|
||||
char *parray = strstr ( text , needle );
|
||||
|
||||
// if not found, no need to do anything...
|
||||
if ( ! parray ) {
|
||||
m_tokenizedDiffbotReplyValid = true;
|
||||
@ -12951,6 +12960,10 @@ SafeBuf *XmlDoc::getTokenizedDiffbotReply ( ) {
|
||||
return m_tokenizedDiffbotReplyPtr;
|
||||
}
|
||||
|
||||
|
||||
// point to [
|
||||
char *pstart = parray + gbstrlen(needle) - 1;
|
||||
|
||||
//
|
||||
// ok, now we have to do so json ju jitsu to fix it
|
||||
//
|
||||
@ -43913,6 +43926,7 @@ char *getJSONFieldValue ( char *json , char *field , long *valueLen ) {
|
||||
char *stringStart = NULL;
|
||||
char *p = json;
|
||||
bool gotOne = false;
|
||||
long depth = 0;
|
||||
// scan
|
||||
for ( ; *p ; p++ ) {
|
||||
// escaping a quote? ignore quote then.
|
||||
@ -43921,6 +43935,11 @@ char *getJSONFieldValue ( char *json , char *field , long *valueLen ) {
|
||||
p++;
|
||||
continue;
|
||||
}
|
||||
// count {} depth
|
||||
if ( ! inQuotes ) {
|
||||
if ( *p == '{' ) depth++;
|
||||
if ( *p == '}' ) depth--;
|
||||
}
|
||||
// a quote?
|
||||
if ( *p == '\"' ) {
|
||||
inQuotes = ! inQuotes;
|
||||
@ -43932,6 +43951,8 @@ char *getJSONFieldValue ( char *json , char *field , long *valueLen ) {
|
||||
else if ( ! inQuotes &&
|
||||
! gotOne &&
|
||||
p[1] == ':' &&
|
||||
// {"title":"whatever",...}
|
||||
depth == 1 &&
|
||||
stringStart &&
|
||||
(p - stringStart) == fieldLen &&
|
||||
strncmp(field,stringStart,fieldLen)==0 ) {
|
||||
|
Reference in New Issue
Block a user