mirror of
https://github.com/privacore/open-source-search-engine.git
synced 2025-07-12 02:26:07 -04:00
updating help table. still more work on query.cpp to do
This commit is contained in:
74
PageRoot.cpp
74
PageRoot.cpp
@ -2675,16 +2675,15 @@ bool sendPageHelp ( TcpSocket *sock , HttpRequest *hr ) {
|
||||
|
||||
sb.safePrintf(
|
||||
"<br>"
|
||||
" <table width=650px cellpadding=5 cellspacing=0 border=0>"
|
||||
"<table width=650px cellpadding=5 cellspacing=0 border=0>"
|
||||
""
|
||||
"<tr bgcolor=#0340fd>"
|
||||
""
|
||||
" <th><font color=33dcff>Search</font></th>"
|
||||
" <th><font color=33dcff>Description</font></th>"
|
||||
" </tr>"
|
||||
" <tr> "
|
||||
" <td><a href=/search?q=cat+dog>cat "
|
||||
" dog</a></td>"
|
||||
"<th><font color=33dcff>Search</font></th>"
|
||||
"<th><font color=33dcff>Description</font></th>"
|
||||
"</tr>"
|
||||
"<tr> "
|
||||
"<td><a href=/search?q=cat+dog>cat dog</a></td>"
|
||||
" <td>Search results have the word <em>cat</em> and the word <em>dog</em> "
|
||||
" in them. They could also have <i>cats</i> and <i>dogs</i>.</td>"
|
||||
" </tr>"
|
||||
@ -2783,11 +2782,54 @@ bool sendPageHelp ( TcpSocket *sock , HttpRequest *hr ) {
|
||||
" </tr>"
|
||||
""
|
||||
""
|
||||
);
|
||||
|
||||
|
||||
sb.safePrintf(
|
||||
// spacer
|
||||
//"<tr><td><br></td><td></td></tr>"
|
||||
|
||||
//"<tr bgcolor=#0340fd>"
|
||||
// "<td><font color=33dcff><b>Special Query</b>"
|
||||
// "</font></td>"
|
||||
//"<td><font color=33dcff><b>Description</b></font></td>"
|
||||
// "</tr>"
|
||||
"<tr><td>document<b>.</b>title<b>:</b>paper</td><td>"
|
||||
"That query will match a JSON document like "
|
||||
"<i>"
|
||||
"{ \"document\":{\"title\":\"This is a good paper.\" "
|
||||
"}}</i> or, alternatively, an XML document like <i>"
|
||||
);
|
||||
sb.htmlEncode("<document><title>This is a good paper"
|
||||
"</title></document>" );
|
||||
sb.safePrintf("</i></td></tr>\n");
|
||||
|
||||
|
||||
// table of the query keywords
|
||||
long n = getNumFieldCodes();
|
||||
for ( long i = 0 ; i < n ; i++ ) {
|
||||
// get field #i
|
||||
QueryField *f = &g_fields[i];
|
||||
// print it out
|
||||
char *d = f->desc;
|
||||
// fix table internal cell bordering
|
||||
if ( d[0] == '\0' ) d = " ";
|
||||
sb.safePrintf("<tr bgcolor=#%s>"
|
||||
"<td><b>%s</b></td><td>%s</td></tr>\n",
|
||||
LIGHT_BLUE,f->example,d);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
sb.safePrintf(
|
||||
" <tr> "
|
||||
" <td style=padding-bottom:12px;> </td>"
|
||||
" <td style=padding-bottom:12px;> </td>"
|
||||
" </tr>"
|
||||
""
|
||||
|
||||
|
||||
"<tr bgcolor=#0340fd>"
|
||||
""
|
||||
" <th><font color=33dcff>Boolean Search</font></th>"
|
||||
@ -2878,16 +2920,26 @@ bool sendPageHelp ( TcpSocket *sock , HttpRequest *hr ) {
|
||||
" operator can optionally preceed the left or the right operand.</td>"
|
||||
" </tr>"
|
||||
""
|
||||
" </table>"
|
||||
//" </table>"
|
||||
""
|
||||
""
|
||||
""
|
||||
"</td></tr>"
|
||||
"</table>"
|
||||
"<br>"
|
||||
//"</td></tr>"
|
||||
//"</table>"
|
||||
//"<br>"
|
||||
);
|
||||
|
||||
|
||||
|
||||
//sb.safePrintf("<tr><td></td><td></td></tr>\n");
|
||||
//sb.safePrintf("<tr><td></td><td></td></tr>\n");
|
||||
//sb.safePrintf("<tr><td></td><td></td></tr>\n");
|
||||
//sb.safePrintf("<tr><td></td><td></td></tr>\n");
|
||||
|
||||
|
||||
sb.safePrintf("</table>");
|
||||
|
||||
|
||||
//sb.safePrintf("</form>\n");
|
||||
sb.safePrintf("<br>\n");
|
||||
sb.safePrintf("\n");
|
||||
|
493
Query.cpp
493
Query.cpp
@ -3162,96 +3162,457 @@ static bool s_isInitialized = false;
|
||||
|
||||
// 3rd field = m_hasColon
|
||||
struct QueryField g_fields[] = {
|
||||
{"url", FIELD_URL, true,"Match the exact url. Example: url:www.gigablast.com/addurl.htm"},
|
||||
{"ext", FIELD_EXT, true,"Match the url extension. Example: ext:htm or ext:mpeg to find urls ending in .htm or .mpeg respectively."},
|
||||
|
||||
{"url2", FIELD_URL, true,"Match the exact url. Example: url:www.gigablast.com/addurl.htm"},
|
||||
{"ext2", FIELD_EXT, true,"Match the url extension. Example: ext:htm or ext:mpeg to find urls ending in .htm or .mpeg respectively."},
|
||||
{"url",
|
||||
FIELD_URL,
|
||||
true,
|
||||
"url:www.abc.com/page.html",
|
||||
"Matches the page with that exact url. Uses the first url, not "
|
||||
"the url it redirects to, if any." ,
|
||||
0 },
|
||||
|
||||
|
||||
{"link", FIELD_LINK, true,"Match pages that link to the given url. Example: link:www.gigablast.com will return all pages linking to the www.gigablast.com page."},
|
||||
{"links", FIELD_LINKS, true,"Same as link:."},
|
||||
{"ilink", FIELD_ILINK, true,"Similar to above."},
|
||||
{"sitelink", FIELD_SITELINK, true,"Matches all pages that link to the given site. Example:sitelink:www.gigablast.com matches all pages that link to some page on the www.gigablast.com site."},
|
||||
{"ext",
|
||||
FIELD_EXT,
|
||||
true,
|
||||
"ext:doc",
|
||||
"Match documents whose url ends in the <i>.doc</i> file extension.",
|
||||
0 },
|
||||
|
||||
|
||||
{"url2",
|
||||
FIELD_URL,
|
||||
true,
|
||||
"url2:www.abc.com/page.html",
|
||||
"Matches the <i>Spider Status</i> documents for the specified url. "
|
||||
"These special documents "
|
||||
"let you know exactly when the url was attempted to be "
|
||||
"spidered and the outcome.",
|
||||
0 },
|
||||
|
||||
|
||||
{"link",
|
||||
FIELD_LINK,
|
||||
true,
|
||||
"link:http://www.gigablast.com/foobar.html",
|
||||
"Matches all the documents that have a link to "
|
||||
"http://www.gigablast.com/foobar.html",
|
||||
0 },
|
||||
|
||||
//{"links", FIELD_LINKS, true,"Same as link:."},
|
||||
//{"ilink", FIELD_ILINK, true,"Similar to above."},
|
||||
|
||||
|
||||
{"sitelink",
|
||||
FIELD_SITELINK,
|
||||
true,
|
||||
"sitelink:abc.foobar.com",
|
||||
"Matches all documents that link to any page on the "
|
||||
"<i>abc.foobar.com</i> site.",
|
||||
0 },
|
||||
|
||||
{"site",
|
||||
FIELD_SITE,
|
||||
true,
|
||||
"site:mysite.com",
|
||||
"Matches all documents on the mysite.com domain.",
|
||||
0 },
|
||||
|
||||
{"site",
|
||||
FIELD_SITE,
|
||||
true,
|
||||
"site:www.mysite.com/dir1/dir2/",
|
||||
"Matches all documents whose url starts with "
|
||||
"www.mysite.com/dir1/dir2/",
|
||||
QTF_DUP },
|
||||
|
||||
|
||||
{"site", FIELD_SITE, true,"Matches all pages from the given site. Example: site:www.gigablast.com will return all the pages on the gigablast site"},
|
||||
//{"coll", FIELD_COLL, true,"Not sure if this works."},
|
||||
{"ip", FIELD_IP, true,"Matches all pages with the given ip. Example:1.2.3.4 will match all pages whose urls have that IP address."},
|
||||
{"inurl", FIELD_SUBURL, true,"Matches all pages that have the given terms in the url. Example inurl:water will match all pages whose url has the word water in it, but the word must be delineated by punctuation."},
|
||||
{"ip",
|
||||
FIELD_IP,
|
||||
true,
|
||||
"ip:1.2.3.4",
|
||||
"Matches all documents whose IP is 1.2.3.4.",
|
||||
0 },
|
||||
|
||||
|
||||
{"site2", FIELD_SITE, true,"Matches all pages from the given site. Example: site:www.gigablast.com will return all the pages on the gigablast site"},
|
||||
//{"coll", FIELD_COLL, true,"Not sure if this works."},
|
||||
{"ip2", FIELD_IP, true,"Matches all pages with the given ip. Example:1.2.3.4 will match all pages whose urls have that IP address."},
|
||||
{"inurl2", FIELD_SUBURL, true,"Matches all pages that have the given terms in the url. Example inurl:water will match all pages whose url has the word water in it, but the word must be delineated by punctuation."},
|
||||
{"ip",
|
||||
FIELD_IP,
|
||||
true,
|
||||
"ip:1.2.3",
|
||||
"Matches all documents whose IP STARTS with 1.2.3.",
|
||||
QTF_DUP },
|
||||
|
||||
|
||||
{"suburl", FIELD_SUBURL, true,"Same as inurl."},
|
||||
{"intitle", FIELD_TITLE, false,"Matches all pages that have pages that have the given term in their title. Example: title:web returns all pages that have the word web in their title."},
|
||||
{"title", FIELD_TITLE, false,"Same as intitle:"},
|
||||
{"isclean", FIELD_ISCLEAN, true,"Matches all pages that are deemed non-offensive and safe for children."},
|
||||
{"gbrss", FIELD_GBRSS, true,"Matches all pages that are rss feeds."},
|
||||
//{"gbruleset",FIELD_GBRULESET, true,"Obsolete."},
|
||||
{"type", FIELD_TYPE, false,"Matches all pages of the specified file type. Example: type:pdf will match pdf documents, regardless of their file extension. Examples: type:doc type:status type:json type:xls"},
|
||||
{"filetype", FIELD_TYPE, false,"Same as type:"},
|
||||
{"gbisadult",FIELD_GENERIC,false,"use gbisadult:0 and gbisadult:1 to restrict results to non-adult and adult documents respectively."},
|
||||
{"gbimage",FIELD_URL,false,"use gbimage:<url> to return all documents containing that image url."},
|
||||
{"inurl",
|
||||
FIELD_SUBURL,
|
||||
true,
|
||||
"inurl:dog",
|
||||
"Matches all documents that have the word dog in their url, like "
|
||||
"http://www.mysite.com/dog/food.html. However will not match "
|
||||
"http://www.mysite.com/dogfood.html because it is not an "
|
||||
"individual word. It must be delineated by punctuation.",
|
||||
0 },
|
||||
|
||||
{"gbstatus",FIELD_GENERIC,false,"If document is a spider reply, then search the spider status as a number using this. 0 means success, so gbstatus:0 would return all successful statuses."},
|
||||
{"gbstatusmsg",FIELD_GENERIC,false,"If document is a spider reply, then search the spider status description, which might be something like 'TCP Timed out' or 'Robots.txt disallows' or 'Success', if no error."},
|
||||
|
||||
{"gbhasthumbnail",FIELD_GENERIC,false,"use gbhasthumbnail:0 and gbhasthumbnail:1 to restrict results to those that do not have or have thumbnails respectively."},
|
||||
{"gbtag*", FIELD_TAG, false,"Matches all pages whose tag named * have the specified value. Example: gbtagingoogle:1 matches all pages that have a value of 1 for their ingoogle tag in tagdb."},
|
||||
{"zip", FIELD_ZIP, false,"Matches all pages that have the specified zip code in their meta zip code tag. Not to be used with events."},
|
||||
{"zipcode", FIELD_ZIP, false,"Same as zip:"},
|
||||
{"suburl",
|
||||
FIELD_SUBURL,
|
||||
true,
|
||||
"suburl:dog",
|
||||
"Same as inurl.",
|
||||
0},
|
||||
|
||||
{"intitle",
|
||||
FIELD_TITLE,
|
||||
false,
|
||||
"title:cat",
|
||||
"Matches all the documents that have the word cat in their "
|
||||
"title.",
|
||||
0 },
|
||||
|
||||
|
||||
{"intitle",
|
||||
FIELD_TITLE,
|
||||
false,
|
||||
"title:\"cat food\"",
|
||||
"Matches all the documents that have the phrase \"cat food\" "
|
||||
"in their title.",
|
||||
QTF_DUP },
|
||||
|
||||
|
||||
{"title",
|
||||
FIELD_TITLE,
|
||||
false,
|
||||
"title:cat",
|
||||
"Same as intitle:",
|
||||
0},
|
||||
|
||||
|
||||
//{"isclean", FIELD_ISCLEAN, true,"Matches all pages that are deemed non-offensive and safe for children."},
|
||||
|
||||
|
||||
{"gbinrss",
|
||||
FIELD_GBRSS,
|
||||
true,
|
||||
"gbinrss:1",
|
||||
"Matches all documents that are in RSS feeds. Likewise, use "
|
||||
"<i>gbinrss:0</i> to match all documents that are NOT in RSS feeds.",
|
||||
0},
|
||||
|
||||
|
||||
{"type",
|
||||
FIELD_TYPE,
|
||||
false,
|
||||
"type:pdf",
|
||||
"Matches all documents that are PDFs. Other possible types include "
|
||||
"<i>html, text, xml, pdf, doc, xls, ppt, ps, css, json, status.</i> "
|
||||
"<i>status</i> matches special documents that are stored every time "
|
||||
"a url is spidered so you can see all the spider attempts and when "
|
||||
"they occurred as well as the outcome.",
|
||||
0},
|
||||
|
||||
{"filetype",
|
||||
FIELD_TYPE,
|
||||
false,
|
||||
"filetype:pdf",
|
||||
"Same as type: above.",
|
||||
0},
|
||||
|
||||
{"gbisadult",
|
||||
FIELD_GENERIC,
|
||||
false,
|
||||
"gbisadult:1",
|
||||
"Matches all documents that have been detected as adult documents "
|
||||
"and may be unsuitable for children. Likewise, use "
|
||||
"<i>gbisadult:0</i> to match all documents that were NOT detected "
|
||||
"as adult documents.",
|
||||
0},
|
||||
|
||||
{"gbimage",
|
||||
FIELD_URL,
|
||||
false,
|
||||
"gbimage:site.com/image.jpg",
|
||||
"Matches all documents that contain the specified image.",
|
||||
0},
|
||||
|
||||
{"gbhasthumbnail",
|
||||
FIELD_GENERIC,
|
||||
false,
|
||||
"gbhasthumbnail:1",
|
||||
"Matches all documents for which Gigablast detected a thumbnail. "
|
||||
"Likewise use <i>gbhasthumbnail:0</i> to match all documents that "
|
||||
"do not have thumbnails.",
|
||||
0},
|
||||
|
||||
|
||||
{"gbtag*",
|
||||
FIELD_TAG,
|
||||
false,
|
||||
"gbtag*",
|
||||
"Matches all documents whose tag named * have the specified value "
|
||||
"in the tagdb entry for the url. Example: gbtagsitenuminlinks:2 "
|
||||
"matches all documents that have 2 qualified "
|
||||
"inlinks pointing to their site "
|
||||
"based on the tagdb record. You can also provide your own "
|
||||
"tags in addition to the tags already present. See the <i>tagdb</i> "
|
||||
"menu for more information.",
|
||||
0},
|
||||
|
||||
|
||||
{"gbzipcode",
|
||||
FIELD_ZIP,
|
||||
false,
|
||||
"gbzip:90210",
|
||||
"Matches all documents that have the specified zip code "
|
||||
"in their meta zip code tag.",
|
||||
0},
|
||||
|
||||
//{"range", FIELD_RANGE, false,""}, // obsolete, datedb replaced
|
||||
{"charset", FIELD_CHARSET, false,"Matches all pages in the given character set."},
|
||||
{"urlhash",FIELD_URLHASH, false,""},
|
||||
{"urlhashdiv10",FIELD_URLHASHDIV10, false,""},
|
||||
{"urlhashdiv100",FIELD_URLHASHDIV100, false,""},
|
||||
{"gblang",FIELD_GBLANG,false,"Matches all pages in the given language. Examples: gblang:en gblang:fr gblang:de"},
|
||||
{"gbquality",FIELD_GBQUALITY,true,""},
|
||||
{"gblinktextin",FIELD_LINKTEXTIN,true,""},
|
||||
{"gblinktextout",FIELD_LINKTEXTOUT,true,""},
|
||||
{"gbkeyword",FIELD_KEYWORD,true,""},
|
||||
{"gbcharset", FIELD_CHARSET, false,""},
|
||||
{"gbpathdepth", FIELD_GBOTHER, false,"the path depth of the url's path."},
|
||||
{"gbhopcount", FIELD_GBOTHER, false,"the hop count of the url. roots are 0."},
|
||||
{"gbhasfilename", FIELD_GBOTHER, false,""},
|
||||
{"gbiscgi", FIELD_GBOTHER, false,""},
|
||||
{"gbhasext", FIELD_GBOTHER, false,""},
|
||||
{"gbsubmiturl", FIELD_GBOTHER, false,""},
|
||||
|
||||
{"gbcharset",
|
||||
FIELD_CHARSET,
|
||||
false,
|
||||
"gbcharset:utf-8",
|
||||
"Matches all documents originally in the Utf-8 charset. "
|
||||
"Available character sets are listed in the <i>iana_charset.cpp</i> "
|
||||
"file in the open source distribution. There are a lot. Some "
|
||||
"more popular ones are: <i>us, latin1, iso-8859-1, csascii, ascii, "
|
||||
"latin2, latin3, latin4, greek, shift_jis.",
|
||||
0},
|
||||
|
||||
|
||||
// this just complicates things for now, so comment out
|
||||
//{"urlhash",FIELD_URLHASH, false,""},
|
||||
//{"urlhashdiv10",FIELD_URLHASHDIV10, false,""},
|
||||
//{"urlhashdiv100",FIELD_URLHASHDIV100, false,""},
|
||||
|
||||
{"gblang",
|
||||
FIELD_GBLANG,
|
||||
false,
|
||||
"gblang:de",
|
||||
"Matches all documents in german. "
|
||||
"The supported language abbreviations "
|
||||
"are at the bottom of the <i>url filters</i> page. Some more "
|
||||
"common ones are <i>en, es, fr, zh_cn</i>.",
|
||||
0},
|
||||
|
||||
//{"gbquality",FIELD_GBQUALITY,true,""},
|
||||
//{"gblinktextin",FIELD_LINKTEXTIN,true,""},
|
||||
//{"gblinktextout",FIELD_LINKTEXTOUT,true,""},
|
||||
//{"gbkeyword",FIELD_KEYWORD,true,""},
|
||||
//{"gbcharset", FIELD_CHARSET, false,""},
|
||||
|
||||
{"gbpathdepth",
|
||||
FIELD_GBOTHER,
|
||||
false,
|
||||
"gbpathdepth:3",
|
||||
"Matches all documents whose url has 3 path components to it like "
|
||||
"http://somedomain.com/dir1/dir2/dir3/foo.html",
|
||||
0},
|
||||
|
||||
|
||||
{"gbhopcount",
|
||||
FIELD_GBOTHER,
|
||||
false,
|
||||
"gbhopcount:2",
|
||||
"Matches all documents that are a minimum of two link hops away "
|
||||
"from a root url.",
|
||||
0},
|
||||
|
||||
|
||||
{"gbhasfilename",
|
||||
FIELD_GBOTHER,
|
||||
false,
|
||||
"gbhasfilename:1",
|
||||
"Matches all documents whose url ends in a filename like "
|
||||
"<i>http://somedomain.com/dir1/myfile</i> and not "
|
||||
"<i>http://somedomain.com/dir1/dir2/</i>. Likewise, use "
|
||||
"<i>gbhasfilename:0</i> to match all the documents that do not "
|
||||
"have a filename in their url.",
|
||||
0},
|
||||
|
||||
|
||||
{"gbiscgi",
|
||||
FIELD_GBOTHER,
|
||||
false,
|
||||
"gbiscgi:1",
|
||||
"Matches all documents that have a question mark in their url. "
|
||||
"Likewise gbiscgi:0 matches all documents that do not.",
|
||||
0},
|
||||
|
||||
|
||||
{"gbhasext",
|
||||
FIELD_GBOTHER,
|
||||
false,
|
||||
"gbhasext:1",
|
||||
"Matches all documents that have a file extension in their url. "
|
||||
"Likewise, <i>gbhasext:0</i> matches all documents that do not have "
|
||||
"a file extension in their url.",
|
||||
0},
|
||||
|
||||
{"gbsubmiturl",
|
||||
FIELD_GBOTHER,
|
||||
false,
|
||||
"gbsubmiturl:domain.com/process.php",
|
||||
"Matches all documents that have a form that submits to the "
|
||||
"specified url.",
|
||||
0},
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//
|
||||
// for content type CT_STATUS documents (Spider status docs)
|
||||
{"gbpathdepth2", FIELD_GBOTHER, false,"the path depth of the url's path."},
|
||||
{"gbhopcount2", FIELD_GBOTHER, false,"the hop count of the url. roots are 0."},
|
||||
{"gbhasfilename2", FIELD_GBOTHER, false,""},
|
||||
{"gbiscgi2", FIELD_GBOTHER, false,""},
|
||||
{"gbhasext2", FIELD_GBOTHER, false,""},
|
||||
//
|
||||
|
||||
|
||||
{"gbstatus",
|
||||
FIELD_GENERIC,
|
||||
false,
|
||||
"gbstatus:0",
|
||||
"Matches all special spider status documents that spidered "
|
||||
"their url successfully. Replace <i>0</i> with other numeric error "
|
||||
"codes to get the other outcomes.",
|
||||
0},
|
||||
|
||||
|
||||
{"gbstatusmsg",
|
||||
FIELD_GENERIC,
|
||||
false,
|
||||
"gbstatusmsg:tcp",
|
||||
"Matches all special spider status documents that had a status "
|
||||
"message containing the word <i>tcp</i> like in "
|
||||
"<i>TCP Timed Out</i>. Similarly, gbstatus:success, "
|
||||
"gbstatus:\"robots.txt\" are other possibilities.",
|
||||
0},
|
||||
|
||||
|
||||
{"site2",
|
||||
FIELD_SITE,
|
||||
true,
|
||||
"site2:mysite.com",
|
||||
"Matches all the special spider status documents on the "
|
||||
"mysite.com domain.",
|
||||
0 },
|
||||
|
||||
|
||||
{"ip2",
|
||||
FIELD_IP,
|
||||
true,
|
||||
"ip2:1.2.3.4",
|
||||
"Matches all the special spider status "
|
||||
"documents whose IP is 1.2.3.4.",
|
||||
0 },
|
||||
|
||||
{"inurl2",
|
||||
FIELD_SUBURL,
|
||||
true,
|
||||
"inurl2:dog",
|
||||
"Matches all the special spider status "
|
||||
"documents that have the word dog in their url, like "
|
||||
"http://www.mysite.com/dog/food.html. However will not match "
|
||||
"http://www.mysite.com/dogfood.html because it is not an "
|
||||
"individual word. It must be delineated by punctuation.",
|
||||
0 },
|
||||
|
||||
|
||||
{"gbpathdepth2",
|
||||
FIELD_GBOTHER,
|
||||
false,
|
||||
"gbpathdepth2:2",
|
||||
"Similar to gbpathdepth: described above but for special "
|
||||
"spider status documents.",
|
||||
0},
|
||||
|
||||
{"gbhopcount2",
|
||||
FIELD_GBOTHER,
|
||||
false,
|
||||
"gbhopcount2:3",
|
||||
"Similar to gbhopcount: described above but for special "
|
||||
"spider status documents.",
|
||||
0},
|
||||
|
||||
|
||||
{"gbhasfilename2",
|
||||
FIELD_GBOTHER,
|
||||
false,
|
||||
"gbhasfilename2:1",
|
||||
"Similar to gbhasfilename: described above but for special "
|
||||
"spider status documents.",
|
||||
0},
|
||||
|
||||
{"gbiscgi2",
|
||||
FIELD_GBOTHER,
|
||||
false,
|
||||
"gbiscgi2:1",
|
||||
"Similar to gbiscgi: described above but for special "
|
||||
"spider status documents.",
|
||||
0},
|
||||
|
||||
{"gbhasext2",
|
||||
FIELD_GBOTHER,
|
||||
false,
|
||||
"gbhasext2:1",
|
||||
"Similar to gbhasext: described above but for special "
|
||||
"spider status documents.",
|
||||
0}
|
||||
|
||||
/*
|
||||
left off here
|
||||
|
||||
//{"qdom", FIELD_QUOTA, false,""},
|
||||
//{"qhost", FIELD_QUOTA, false,""},
|
||||
{"gbtagvector", FIELD_GBTAGVECTOR, false,""},
|
||||
|
||||
{"gbgigabitvector", FIELD_GBGIGABITVECTOR, false,""},
|
||||
{"gbsamplevector", FIELD_GBSAMPLEVECTOR, false,""},
|
||||
{"gbcontenthash", FIELD_GBCONTENTHASH, false,""},
|
||||
|
||||
{"gbsortby", FIELD_GBSORTBY, false,
|
||||
"Example: gbsortby:price. Fields can be "
|
||||
"in JSON or in meta tag."},
|
||||
"dog gbsortbyint:gbspiderdate",
|
||||
"Sort the search results that contain 'dog' by "
|
||||
"the date they were last spidered, with the newest "
|
||||
"on top."},
|
||||
|
||||
{"gbrevsortby", FIELD_GBREVSORTBY, false,
|
||||
"Example: gbrevsortby:item.price . "
|
||||
"Fields can be in JSON or in meta tag."},
|
||||
"dog gbrevsortbyint:gbspiderdate",
|
||||
"Sort the search results that contain 'dog' by "
|
||||
"the date they were last spidered, but with the "
|
||||
"oldest on top."},
|
||||
|
||||
|
||||
{"gbsortbyfloat", FIELD_GBSORTBY, false,
|
||||
"cameras gbsortbyfloat:price","Sort all search results that "
|
||||
"contain 'camera' by price. <i>price</i> can be a JSON field or "
|
||||
"in a meta tag, or in an xml <price> tag.", 0 },
|
||||
|
||||
|
||||
{"gbsortbyfloat", FIELD_GBSORTBY, false,
|
||||
"cameras gbsortbyfloat:product.price","Sort all search results that "
|
||||
"contain 'camera' by price. <i>price</i> can be in a JSON document "
|
||||
"like "
|
||||
"<i>{ \"product\":{\"price\":1500.00}} "
|
||||
"</i> or, alternatively, an XML document like <i>"
|
||||
"<product><price>1500.00</price></product>"
|
||||
"</i>", QTF_DUP },
|
||||
|
||||
|
||||
|
||||
{"gbrevsortbyfloat",
|
||||
FIELD_GBREVSORTBY,
|
||||
false,
|
||||
"Example: gbrevsortbyfloat:item.price . "
|
||||
"Fields can be in JSON or in meta tag.",
|
||||
0},
|
||||
|
||||
|
||||
|
||||
|
||||
// gbmin:price:1.23
|
||||
{"gbmin", FIELD_GBNUMBERMIN, false,"Usage: gbmin:price:1.99 . Numeric "
|
||||
"fields can be in JSON or in meta tag."},
|
||||
{"gbmax", FIELD_GBNUMBERMAX, false,"Usage: gbmax:price:1.99"},
|
||||
|
||||
{"gbminfloat", FIELD_GBNUMBERMIN, false,"Usage: gbminfloat:price:1.99 . Numeric "
|
||||
"fields can be in JSON or in meta tag."},
|
||||
{"gbmaxfloat", FIELD_GBNUMBERMAX, false,"Usage: gbmaxfloat:price:1.99"},
|
||||
|
||||
|
||||
{"gbdocspiderdate",FIELD_GENERIC,false,
|
||||
"Example: gbspiderdate:1400081479 will return results that have "
|
||||
@ -3344,6 +3705,13 @@ struct QueryField g_fields[] = {
|
||||
|
||||
//{"gbsectionhash" ,FIELD_GBSECTIONHASH,false,"Internal use only."},
|
||||
|
||||
// they don't need to know about this
|
||||
{"gbtagvector", FIELD_GBTAGVECTOR, false,"","",QTF_INTERNAL},
|
||||
{"gbgigabitvector", FIELD_GBGIGABITVECTOR, false,"","",QTF_INTERNAL},
|
||||
{"gbsamplevector", FIELD_GBSAMPLEVECTOR, false,"","",QTF_INTERNAL},
|
||||
{"gbcontenthash", FIELD_GBCONTENTHASH, false,"","",QTF_INTERNAL},
|
||||
|
||||
|
||||
{"gbduphash" ,FIELD_GBOTHER,false,"Internal use only."},
|
||||
{"gbsitetemplate" ,FIELD_GBOTHER,false,"Internal use only."},
|
||||
{"gboutlinkedtitle" ,FIELD_GBOTHER,false,"gboutlinkedtitle:0 and gboutlinkedtitle:1 matches events whose title is not in and in a hyperlink, respectively."},
|
||||
@ -3361,6 +3729,7 @@ struct QueryField g_fields[] = {
|
||||
"were extract from this parent url. Example: "
|
||||
"gbparenturl:www.gigablast.com/addurl.htm"},
|
||||
{"gbdocid",FIELD_GBDOCID,false,"restrict results to this docid"}
|
||||
*/
|
||||
|
||||
};
|
||||
|
||||
@ -3384,6 +3753,8 @@ static bool initFieldTable(){
|
||||
// now add in all the stop words
|
||||
long n = getNumFieldCodes();
|
||||
for ( long i = 0 ; i < n ; i++ ) {
|
||||
// skip if dup
|
||||
if ( g_fields[i].m_flag & QTF_DUP ) continue;
|
||||
long long h = hash64b ( g_fields[i].text );
|
||||
// store the entity index in the hash table as score
|
||||
if ( ! s_table.addTerm ( &h, i+1 ) ) return false;
|
||||
|
15
Query.h
15
Query.h
@ -136,11 +136,18 @@ char getFieldCode3 ( long long h64 ) ;
|
||||
|
||||
long getNumFieldCodes ( );
|
||||
|
||||
// . values for QueryField::m_flag
|
||||
// . QTF_DUP means it is just for the help page in PageRoot.cpp to
|
||||
// illustrate a second or third example
|
||||
#define QTF_DUP 0x01
|
||||
|
||||
struct QueryField {
|
||||
char *text;
|
||||
char field;
|
||||
bool hasColon;
|
||||
char *example;
|
||||
char *desc;
|
||||
char m_flag;
|
||||
};
|
||||
|
||||
extern struct QueryField g_fields[];
|
||||
@ -421,6 +428,14 @@ class QueryTerm {
|
||||
long m_termLen;
|
||||
// point to the posdblist that represents us
|
||||
class RdbList *m_posdbListPtr;
|
||||
|
||||
// the ()'s following an int/float facet term dictate the
|
||||
// ranges for clustering the numeric values. like
|
||||
// gbfacetfloat:price:(0-10,10-20,...)
|
||||
// values outside the ranges will be ignored
|
||||
char *m_parenList;
|
||||
long m_parenListLen;
|
||||
|
||||
// . our representative bits
|
||||
// . the bits in this bit vector is 1-1 with the QueryTerms
|
||||
// . if a doc has query term #i then bit #i will be set
|
||||
|
@ -26404,7 +26404,8 @@ bool XmlDoc::hashMetaZip ( HashTableX *tt ) {
|
||||
HashInfo hi;
|
||||
hi.m_hashGroup = HASHGROUP_INTAG;
|
||||
hi.m_tt = tt;
|
||||
hi.m_prefix = "zipcode";
|
||||
//hi.m_prefix = "zipcode";
|
||||
hi.m_prefix = "gbzipcode";
|
||||
|
||||
nextZip:
|
||||
// . parse out the zip codes, may be multiple ones
|
||||
|
Reference in New Issue
Block a user