mirror of
https://github.com/yacy/yacy_search_server.git
synced 2025-07-18 08:36:07 -04:00
- fixed opensearch bugs
- added 'full domain' button to expert crawl start - removed not-workin 'only one domain' button, the regex allowed crawling of other domains git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4125 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
@ -3,7 +3,7 @@ javacSource=1.4
|
||||
javacTarget=1.4
|
||||
|
||||
# Release Configuration
|
||||
releaseVersion=0.546
|
||||
releaseVersion=0.547
|
||||
releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
|
||||
proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
|
||||
releaseFileParentDir=yacy
|
||||
|
@ -72,13 +72,13 @@
|
||||
<tr valign="top" class="TableCellDark">
|
||||
<td><label for="crawlingFilter">Crawling Filter</label>:</td>
|
||||
<td>
|
||||
<input name="crawlingFilter" id="crawlingFilter" type="text" size="20" maxlength="100" value="#[crawlingFilter]#" />
|
||||
<input type="button" value="only one domain" onclick='var domain=prompt("please enter domain without \"www.\" (i.e. \"yacy.net\").");if(domain!=null){document.getElementById("crawlingFilter").value="(^|.*\\.)"+domain.replace("\.", "\\.")+"/.*"}' />
|
||||
<input name="crawlingFilter" id="crawlingFilter" type="text" size="20" maxlength="100" value="#[crawlingFilter]#" />
|
||||
<input type="radio" name="range" value="wide" checked="checked" />Use filter |
|
||||
<input type="radio" name="range" value="domain" />Restrict to start domain
|
||||
</td>
|
||||
<td>
|
||||
This is an emacs-like regular expression that must match with the URLs which are used to be crawled.
|
||||
Use this i.e. to crawl a single domain. If you set this filter it makes sense to increase
|
||||
the crawling depth.
|
||||
The filter is an emacs-like regular expression that must match with the URLs which are used to be crawled; default is 'catch all'.
|
||||
You can also use an automatic domain-restriction to fully crawl a single domain.
|
||||
</td>
|
||||
</tr>
|
||||
<tr valign="top" class="TableCellLight">
|
||||
|
@ -104,15 +104,15 @@ public class WatchCrawler_p {
|
||||
boolean fullDomain = post.get("range", "wide").equals("domain"); // special property in simple crawl start
|
||||
|
||||
String newcrawlingfilter = post.get("crawlingFilter", ".*");
|
||||
if (newcrawlingfilter.length() < 2) newcrawlingfilter = ".*"; // avoid that all urls are filtered out if bad value was submitted
|
||||
env.setConfig("crawlingFilter", newcrawlingfilter);
|
||||
if (fullDomain) try {
|
||||
newcrawlingfilter = ".*" + (new yacyURL(post.get("crawlingURL",""), null)).getHost() + ".*";
|
||||
} catch (MalformedURLException e) {}
|
||||
if (newcrawlingfilter.length() < 2) newcrawlingfilter = ".*"; // avoid that all urls are filtered out if bad value was submitted
|
||||
env.setConfig("crawlingFilter", newcrawlingfilter);
|
||||
|
||||
int newcrawlingdepth = Integer.parseInt(post.get("crawlingDepth", "8"));
|
||||
if (fullDomain) newcrawlingdepth = 8;
|
||||
env.setConfig("crawlingDepth", Integer.toString(newcrawlingdepth));
|
||||
if ((fullDomain) && (newcrawlingdepth < 8)) newcrawlingdepth = 8;
|
||||
|
||||
boolean crawlingIfOlderCheck = post.get("crawlingIfOlderCheck", "off").equals("on");
|
||||
int crawlingIfOlderNumber = Integer.parseInt(post.get("crawlingIfOlderNumber", "-1"));
|
||||
|
@ -1,5 +1,5 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<OpenSearchDescription xmlns="http://www.opensearch.org/Specifications/OpenSearch/1.1">
|
||||
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/">
|
||||
<ShortName>YaCy/#[clientname]#</ShortName>
|
||||
<LongName>YaCy.net - #[SearchPageGreeting]#</LongName>
|
||||
<Image type="image/gif">http://#[thisaddress]#/env/grafics/yacy.gif</Image>
|
||||
|
@ -152,7 +152,7 @@ public class yacysearch {
|
||||
}
|
||||
if (sb.facilityDB != null) try { sb.facilityDB.update("zeitgeist", querystring, post); } catch (Exception e) {}
|
||||
|
||||
int count = post.getInt("count", 10);
|
||||
int itemsPerPage = post.getInt("count", 10);
|
||||
int offset = post.getInt("offset", 0);
|
||||
boolean global = (post == null) ? true : post.get("resource", "global").equals("global");
|
||||
final boolean indexof = post.get("indexof","").equals("on");
|
||||
@ -186,7 +186,7 @@ public class yacysearch {
|
||||
int contentdomCode = plasmaSearchQuery.contentdomParser(post.get("contentdom", "text"));
|
||||
|
||||
// patch until better search profiles are available
|
||||
if ((contentdomCode != plasmaSearchQuery.CONTENTDOM_TEXT) && (count <= 30)) count = 30;
|
||||
if ((contentdomCode != plasmaSearchQuery.CONTENTDOM_TEXT) && (itemsPerPage <= 30)) itemsPerPage = 30;
|
||||
|
||||
serverObjects prop = new serverObjects();
|
||||
if (post.get("cat", "href").equals("href")) {
|
||||
@ -257,7 +257,7 @@ public class yacysearch {
|
||||
prefermask,
|
||||
contentdomCode,
|
||||
true,
|
||||
count,
|
||||
itemsPerPage,
|
||||
offset,
|
||||
searchtime,
|
||||
urlmask,
|
||||
@ -319,8 +319,9 @@ public class yacysearch {
|
||||
prop.put("num-results_totalcount", theSearch.getLocalCount() + theSearch.getGlobalCount());
|
||||
prop.put("num-results_globalresults", 1);
|
||||
prop.put("num-results_globalresults_globalcount", theSearch.getGlobalCount());
|
||||
prop.put("num-results_offset", 0);
|
||||
prop.put("num-results_offset", offset);
|
||||
prop.put("num-results_linkcount", 0);
|
||||
prop.put("num-results_itemsPerPage", itemsPerPage);
|
||||
|
||||
// compose page navigation
|
||||
StringBuffer resnav = new StringBuffer();
|
||||
@ -391,7 +392,7 @@ public class yacysearch {
|
||||
prop.putASIS("input_promoteSearchPageGreeting", promoteSearchPageGreeting);
|
||||
prop.put("input_former", querystring);
|
||||
prop.put("former", post.get("search", ""));
|
||||
prop.put("input_count", count);
|
||||
prop.put("input_count", itemsPerPage);
|
||||
prop.put("input_offset", offset);
|
||||
prop.put("input_resource", (global) ? "global" : "local");
|
||||
prop.put("input_time", searchtime / 1000);
|
||||
|
@ -11,9 +11,9 @@
|
||||
<url>#[rssYacyImageURL]#</url>
|
||||
<title>Search for #[former]#</title>
|
||||
</image>
|
||||
<opensearch:totalResults>#[results]#</opensearch:totalResults>
|
||||
<opensearch:startIndex>1</opensearch:startIndex>
|
||||
<opensearch:itemsPerPage>#[results]#</opensearch:itemsPerPage>
|
||||
<opensearch:totalResults>#[num-results_totalcount]#</opensearch:totalResults>
|
||||
<opensearch:startIndex>#[num-results_offset]#</opensearch:startIndex>
|
||||
<opensearch:itemsPerPage>#[num-results_itemsPerPage]#</opensearch:itemsPerPage>
|
||||
<opensearch:link rel="search" href="opensearchdescription.xml" type="application/opensearchdescription+xml"/>
|
||||
<opensearch:Query role="request" searchTerms="#[former]#" />
|
||||
|
||||
|
Reference in New Issue
Block a user