mirror of
https://github.com/privacore/open-source-search-engine.git
synced 2025-07-14 02:36:06 -04:00
qa test fixes
This commit is contained in:
@ -302,7 +302,7 @@ bool Matches::set ( XmlDoc *xd ,
|
||||
|
||||
// add the title in
|
||||
if ( ! addMatches ( tt->getTitle() ,
|
||||
tt->getTitleSize() ,
|
||||
tt->getTitleLen() ,
|
||||
MF_TITLEGEN ,
|
||||
xd->m_docId ,
|
||||
niceness ))
|
||||
|
188
PageResults.cpp
188
PageResults.cpp
@ -47,6 +47,14 @@ bool printScoresHeader ( SafeBuf *sb ) ;
|
||||
bool printSingleScore ( SafeBuf *sb , SearchInput *si , SingleScore *ss ,
|
||||
Msg20Reply *mr , Msg40 *msg40 ) ;
|
||||
|
||||
bool printDmozEntry ( SafeBuf *sb ,
|
||||
long catId ,
|
||||
bool direct ,
|
||||
char *dmozTitle ,
|
||||
char *dmozSummary ,
|
||||
char *dmozAnchor ,
|
||||
SearchInput *si );
|
||||
|
||||
bool sendReply ( State0 *st , char *reply ) {
|
||||
|
||||
long savedErr = g_errno;
|
||||
@ -3597,74 +3605,65 @@ bool printResult ( State0 *st, long ix , long *numPrintedSoFar ) {
|
||||
|
||||
// print all dmoz info for xml/json.
|
||||
// seems like both direct and indirect dmoz entries here.
|
||||
if ( mr->size_dmozTitles > 1 &&
|
||||
if ( mr->size_catIds > 0 &&
|
||||
( si->m_format == FORMAT_JSON ||
|
||||
si->m_format == FORMAT_XML ) ) {
|
||||
char *dmozTitle = mr->ptr_dmozTitles;
|
||||
|
||||
char *dmozTitle = mr->ptr_dmozTitles;
|
||||
char *dmozSummary = mr->ptr_dmozSumms;
|
||||
char *dmozAnchor = mr->ptr_dmozAnchors;
|
||||
long *catIds = mr->ptr_catIds;
|
||||
long numCats = mr->size_catIds / 4;
|
||||
char *dmozAnchor = mr->ptr_dmozAnchors;
|
||||
long *catIds = mr->ptr_catIds;
|
||||
long numCats = mr->size_catIds / 4;
|
||||
// loop through looking for the right ID
|
||||
for (long i = 0; i < numCats ; i++ ) {
|
||||
// assign shit if we match the dmoz cat we are showing
|
||||
//if ( catIds[i] == si->m_catId) break;
|
||||
if ( si->m_format == FORMAT_XML ) {
|
||||
sb->safePrintf("\t\t<dmozEntry>\n");
|
||||
sb->safePrintf("\t\t\t<dmozCatId>%li"
|
||||
"</dmozCatId>\n",catIds[i]);
|
||||
// print the name of the dmoz category
|
||||
sb->safePrintf("\t\t\t<dmozCatStr><![CDATA[");
|
||||
char xbuf[256];
|
||||
SafeBuf xb(xbuf,256,0,false);
|
||||
g_categories->printPathFromId(&xb,
|
||||
catIds[i],
|
||||
false,
|
||||
si->m_isRTL);
|
||||
sb->cdataEncode(xb.getBufStart());
|
||||
sb->safePrintf("]]></dmozCatStr>\n");
|
||||
sb->safePrintf("\t\t\t<dmozTitle><![CDATA[");
|
||||
sb->cdataEncode(dmozTitle);
|
||||
sb->safePrintf("]]></dmozTitle>\n");
|
||||
sb->safePrintf("\t\t\t<dmozSum><![CDATA[");
|
||||
sb->cdataEncode(dmozSummary);
|
||||
sb->safePrintf("]]></dmozSum>\n");
|
||||
sb->safePrintf("\t\t\t<dmozAnchor><![CDATA[");
|
||||
sb->cdataEncode(dmozAnchor);
|
||||
sb->safePrintf("]]></dmozAnchor>\n");
|
||||
sb->safePrintf("\t\t</dmozEntry>\n");
|
||||
}
|
||||
if ( si->m_format == FORMAT_JSON ) {
|
||||
sb->safePrintf("\t\t\"dmozEntry\":{\n");
|
||||
sb->safePrintf("\t\t\t\"dmozCatId\":%li,\n",
|
||||
catIds[i]);
|
||||
// print the name of the dmoz category
|
||||
sb->safePrintf("\t\t\t\"dmozCatStr\":\"");
|
||||
char xbuf[256];
|
||||
SafeBuf xb(xbuf,256,0,false);
|
||||
g_categories->printPathFromId(&xb,
|
||||
catIds[i],
|
||||
false,
|
||||
si->m_isRTL);
|
||||
sb->jsonEncode(xb.getBufStart());
|
||||
sb->safePrintf("\",\n");
|
||||
sb->safePrintf("\t\t\t\"dmozTitle\":\"");
|
||||
sb->jsonEncode(dmozTitle);
|
||||
sb->safePrintf("\",\n");
|
||||
sb->safePrintf("\t\t\t\"dmozSum\":\"");
|
||||
sb->jsonEncode(dmozSummary);
|
||||
sb->safePrintf("\",\n");
|
||||
sb->safePrintf("\t\t\t\"dmozAnchor\":\"");
|
||||
sb->jsonEncode(dmozAnchor);
|
||||
sb->safePrintf("\"\n");
|
||||
sb->safePrintf("\t\t},\n");
|
||||
}
|
||||
dmozTitle +=gbstrlen(dmozTitle)+1;
|
||||
dmozSummary +=gbstrlen(dmozSummary)+1;
|
||||
dmozAnchor += gbstrlen(dmozAnchor)+1;
|
||||
printDmozEntry ( sb,
|
||||
catIds[i],
|
||||
true,
|
||||
dmozTitle,
|
||||
dmozSummary,
|
||||
dmozAnchor ,
|
||||
si );
|
||||
dmozTitle += gbstrlen(dmozTitle ) + 1;
|
||||
dmozSummary += gbstrlen(dmozSummary) + 1;
|
||||
dmozAnchor += gbstrlen(dmozAnchor ) + 1;
|
||||
}
|
||||
}
|
||||
|
||||
if ( mr->size_indCatIds > 0 &&
|
||||
( si->m_format == FORMAT_JSON ||
|
||||
si->m_format == FORMAT_XML ) ) {
|
||||
// print INDIRECT dmoz entries as well
|
||||
long nIndCatids = mr->size_indCatIds / 4;
|
||||
for ( long i = 0; i < nIndCatids; i++ ) {
|
||||
long catId = ((long *)(mr->ptr_indCatIds))[i];
|
||||
if ( si->m_format == FORMAT_XML )
|
||||
sb->safePrintf("\t\t<indirectDmozCatId>"
|
||||
"%li</indirectDmozCatId>\n",
|
||||
catId);
|
||||
if ( si->m_format == FORMAT_JSON )
|
||||
sb->safePrintf("\t\t\"indirectDmozCatId\":"
|
||||
"%li,\n",catId);
|
||||
}
|
||||
// print INDIRECT dmoz entries as well
|
||||
// long nIndCatids = mr->size_indCatIds / 4;
|
||||
// dmozTitle = mr->ptr_indDmozTitles;
|
||||
// dmozSummary = mr->ptr_dmozSumms;
|
||||
// dmozAnchor = mr->ptr_dmozAnchors;
|
||||
// for ( long i = 0; i < nIndCatids; i++ ) {
|
||||
// long catId = ((long *)(mr->ptr_indCatIds))[i];
|
||||
// printDmozEntry ( sb ,
|
||||
// catId ,
|
||||
// false,
|
||||
// dmozTitle,
|
||||
// dmozSummary,
|
||||
// dmozAnchor ,
|
||||
// si );
|
||||
// dmozTitle += gbstrlen(dmozTitle ) + 1;
|
||||
// dmozSummary += gbstrlen(dmozSummary) + 1;
|
||||
// dmozAnchor += gbstrlen(dmozAnchor ) + 1;
|
||||
// }
|
||||
}
|
||||
|
||||
|
||||
/////
|
||||
//
|
||||
@ -7475,3 +7474,72 @@ bool sendPageWidget ( TcpSocket *s , HttpRequest *hr ) {
|
||||
"UTF-8"); // charset
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
bool printDmozEntry ( SafeBuf *sb ,
|
||||
long catId ,
|
||||
bool direct ,
|
||||
char *dmozTitle ,
|
||||
char *dmozSummary ,
|
||||
char *dmozAnchor ,
|
||||
SearchInput *si ) {
|
||||
|
||||
// assign shit if we match the dmoz cat we are showing
|
||||
//if ( catIds[i] == si->m_catId) break;
|
||||
if ( si->m_format == FORMAT_XML ) {
|
||||
sb->safePrintf("\t\t<dmozEntry>\n");
|
||||
sb->safePrintf("\t\t\t<dmozCatId>%li"
|
||||
"</dmozCatId>\n",catId);
|
||||
sb->safePrintf("\t\t\t<directCatId>%li</directCatId>\n",
|
||||
(long)direct);
|
||||
// print the name of the dmoz category
|
||||
sb->safePrintf("\t\t\t<dmozCatStr><![CDATA[");
|
||||
char xbuf[256];
|
||||
SafeBuf xb(xbuf,256,0,false);
|
||||
g_categories->printPathFromId(&xb,
|
||||
catId,
|
||||
false,
|
||||
si->m_isRTL);
|
||||
sb->cdataEncode(xb.getBufStart());
|
||||
sb->safePrintf("]]></dmozCatStr>\n");
|
||||
sb->safePrintf("\t\t\t<dmozTitle><![CDATA[");
|
||||
sb->cdataEncode(dmozTitle);
|
||||
sb->safePrintf("]]></dmozTitle>\n");
|
||||
sb->safePrintf("\t\t\t<dmozSum><![CDATA[");
|
||||
sb->cdataEncode(dmozSummary);
|
||||
sb->safePrintf("]]></dmozSum>\n");
|
||||
sb->safePrintf("\t\t\t<dmozAnchor><![CDATA[");
|
||||
sb->cdataEncode(dmozAnchor);
|
||||
sb->safePrintf("]]></dmozAnchor>\n");
|
||||
sb->safePrintf("\t\t</dmozEntry>\n");
|
||||
return true;
|
||||
}
|
||||
if ( si->m_format == FORMAT_JSON ) {
|
||||
sb->safePrintf("\t\t\"dmozEntry\":{\n");
|
||||
sb->safePrintf("\t\t\t\"dmozCatId\":%li,\n",
|
||||
catId);
|
||||
sb->safePrintf("\t\t\t\"directCatId\":%li,\n",(long)direct);
|
||||
// print the name of the dmoz category
|
||||
sb->safePrintf("\t\t\t\"dmozCatStr\":\"");
|
||||
char xbuf[256];
|
||||
SafeBuf xb(xbuf,256,0,false);
|
||||
g_categories->printPathFromId(&xb,
|
||||
catId,
|
||||
false,
|
||||
si->m_isRTL);
|
||||
sb->jsonEncode(xb.getBufStart());
|
||||
sb->safePrintf("\",\n");
|
||||
sb->safePrintf("\t\t\t\"dmozTitle\":\"");
|
||||
sb->jsonEncode(dmozTitle);
|
||||
sb->safePrintf("\",\n");
|
||||
sb->safePrintf("\t\t\t\"dmozSum\":\"");
|
||||
sb->jsonEncode(dmozSummary);
|
||||
sb->safePrintf("\",\n");
|
||||
sb->safePrintf("\t\t\t\"dmozAnchor\":\"");
|
||||
sb->jsonEncode(dmozAnchor);
|
||||
sb->safePrintf("\"\n");
|
||||
sb->safePrintf("\t\t},\n");
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -1868,6 +1868,8 @@ bool Title::copyTitle ( Words *w , Pos *pos ,
|
||||
(srcEnd[-1] == ':' ||
|
||||
srcEnd[-1] == ' ' ||
|
||||
srcEnd[-1] == '-' ||
|
||||
srcEnd[-1] == '\n' ||
|
||||
srcEnd[-1] == '\r' ||
|
||||
srcEnd[-1] == '|' ) ;
|
||||
srcEnd-- );
|
||||
|
||||
|
2
Title.h
2
Title.h
@ -48,7 +48,7 @@ class Title {
|
||||
|
||||
|
||||
char *getTitle ( ) { return m_title; };
|
||||
long getTitleSize ( ) { return m_titleBytes; }; // does NOT include \0
|
||||
long getTitleLen ( ) { return m_titleBytes; }; // does NOT include \0
|
||||
|
||||
|
||||
bool copyTitle ( class Words *words, class Pos *pos,
|
||||
|
21
XmlDoc.cpp
21
XmlDoc.cpp
@ -3557,15 +3557,16 @@ bool XmlDoc::setDmozInfo () {
|
||||
ds += dslen;
|
||||
da += dalen;
|
||||
// null terminate
|
||||
if ( dtlen>0 && dt[dtlen-1]!='\0' ) { *dt++=0; dtlen++; }
|
||||
if ( dslen>0 && ds[dslen-1]!='\0' ) { *ds++=0; dslen++; }
|
||||
if ( dalen>0 && da[dalen-1]!='\0' ) { *da++=0; dalen++; }
|
||||
// must always be something!
|
||||
if ( dtlen==0 ) {*dt++=0; dtlen++;}
|
||||
if ( dslen==0 ) {*ds++=0; dslen++;}
|
||||
if ( dalen==0 ) {*da++=0; dalen++;}
|
||||
*dt++ = 0;
|
||||
*ds++ = 0;
|
||||
*ds++ = 0;
|
||||
}
|
||||
|
||||
// if empty, make it a \0 to keep in sync with the rest
|
||||
if ( dt == titles ) *dt++ = '\0';
|
||||
if ( ds == summs ) *ds++ = '\0';
|
||||
if ( da == anchors ) *da++ = '\0';
|
||||
|
||||
// set these
|
||||
ptr_dmozTitles = titles;
|
||||
ptr_dmozSumms = summs;
|
||||
@ -29083,9 +29084,11 @@ Msg20Reply *XmlDoc::getMsg20Reply ( ) {
|
||||
Title *ti = getTitle();
|
||||
if ( ! ti || ti == (Title *)-1 ) return (Msg20Reply *)ti;
|
||||
char *tit = ti->getTitle();
|
||||
long titLen = ti->getTitleSize();
|
||||
long titLen = ti->getTitleLen();
|
||||
reply-> ptr_tbuf = tit;
|
||||
reply->size_tbuf = titLen + 1; // include \0
|
||||
// sanity
|
||||
if ( tit[titLen] != '\0' ) { char *xx=NULL;*xx=0; }
|
||||
if ( ! tit || titLen <= 0 ) {
|
||||
reply->ptr_tbuf = NULL;
|
||||
reply->size_tbuf = 0;
|
||||
@ -29783,7 +29786,7 @@ Msg20Reply *XmlDoc::getMsg20Reply ( ) {
|
||||
Title *ti = getTitle();
|
||||
if ( ! ti || ti == (Title *)-1 ) return (Msg20Reply *)ti;
|
||||
char *tit = ti->getTitle();
|
||||
long titLen = ti->getTitleSize();
|
||||
long titLen = ti->getTitleLen();
|
||||
reply-> ptr_tbuf = tit;
|
||||
reply->size_tbuf = titLen + 1; // include \0
|
||||
if ( ! tit || titLen <= 0 ) {
|
||||
|
20
qa.cpp
20
qa.cpp
@ -326,7 +326,7 @@ bool qainject ( ) {
|
||||
usleep(1500000);
|
||||
s_x5 = true;
|
||||
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=%2Bthe",
|
||||
838663174 ) )
|
||||
702467314 ) )
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -335,7 +335,7 @@ bool qainject ( ) {
|
||||
if ( ! s_x7 ) {
|
||||
s_x7 = true;
|
||||
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&"
|
||||
"q=sports+news",-1013701120 ) )
|
||||
"q=sports+news",2009472889 ) )
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -396,7 +396,7 @@ bool qainject ( ) {
|
||||
usleep(1500000);
|
||||
s_y4 = true;
|
||||
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=%2Bthe",
|
||||
1604983811 ) )
|
||||
-1804253505 ) )
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -406,7 +406,7 @@ bool qainject ( ) {
|
||||
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=sports"
|
||||
"+news&ns=1&tml=20&smxcpl=30&"
|
||||
"sw=10&showimages=1"
|
||||
,1707718509 ) )
|
||||
,-1874756636 ) )
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -416,7 +416,7 @@ bool qainject ( ) {
|
||||
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=sports"
|
||||
"+news&ns=1&tml=20&smxcpl=30&"
|
||||
"sw=10&showimages=0&hacr=1"
|
||||
,1098495613 ) )
|
||||
,1651330319 ) )
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -426,7 +426,7 @@ bool qainject ( ) {
|
||||
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=sports"
|
||||
"+news&ns=1&tml=20&smxcpl=30&"
|
||||
"sw=10&showimages=0&sc=1"
|
||||
,810978859 ) )
|
||||
,-1405546537 ) )
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -688,7 +688,7 @@ bool qaspider1 ( ) {
|
||||
s_t0 = true;
|
||||
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&"
|
||||
"q=gbhopcount%3A0",
|
||||
304539772 ) )
|
||||
908338607 ) )
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -698,7 +698,7 @@ bool qaspider1 ( ) {
|
||||
s_y5 = true;
|
||||
if ( ! getUrl ( "/search?c=qatest123&format=json&"
|
||||
"q=gbfacetstr%3Agbxpathsitehash2492664135",
|
||||
-638832233 ) )
|
||||
55157060 ) )
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -854,7 +854,7 @@ bool qaspider2 ( ) {
|
||||
static bool s_t0 = false;
|
||||
if ( ! s_t0 ) {
|
||||
s_t0 = true;
|
||||
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&"
|
||||
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&n=500&"
|
||||
"q=gbhopcount%3A0",
|
||||
0 ) )
|
||||
return false;
|
||||
@ -919,7 +919,7 @@ bool qaspider ( ) {
|
||||
|
||||
// do first qa test for spider
|
||||
// returns true when done, false when blocked
|
||||
//if ( ! qaspider1() ) return false;
|
||||
if ( ! qaspider1() ) return false;
|
||||
|
||||
// do second qa test for spider
|
||||
// returns true when done, false when blocked
|
||||
|
Reference in New Issue
Block a user