mirror of
https://github.com/yacy/yacy_search_server.git
synced 2025-07-21 09:04:37 -04:00
refactoring of search process: store results in a searchResults structure. At the moment, its just stored in it, and read from it again.
Next step: return searchResults instead of serverObjects, and parse the results in the servlets. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3241 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
@ -5,6 +5,8 @@ version 0.50
|
||||
* UPDATED: Result Ranking
|
||||
* UPDATED: Crawl Monitor
|
||||
* CHANGED: Migrated to the new Database Structure
|
||||
* ADDED: XSS protection for all pages as default.
|
||||
* ADDED: searchResults structure.
|
||||
|
||||
version 0.49
|
||||
* CHANGED: New Database Structure for Index and URL Storage
|
||||
|
149
source/de/anomic/data/searchResults.java
Normal file
149
source/de/anomic/data/searchResults.java
Normal file
@ -0,0 +1,149 @@
|
||||
//plasmaSearchResults.java - a container for searchresults.
|
||||
//----------------------------------------------------------
|
||||
//part of YaCy
|
||||
//
|
||||
// (C) 2007 by Alexander Schier
|
||||
//
|
||||
// last change: $LastChangedDate: $ by $LastChangedBy: $
|
||||
// $LastChangedRevision: $
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package de.anomic.data;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
|
||||
import de.anomic.index.indexURLEntry;
|
||||
import de.anomic.plasma.plasmaSearchQuery;
|
||||
import de.anomic.plasma.plasmaSearchRankingProfile;
|
||||
import de.anomic.plasma.plasmaSnippetCache;
|
||||
|
||||
public class searchResults {
|
||||
private int totalcount=0;
|
||||
private int filteredcount=0;
|
||||
private int orderedcount=0;
|
||||
private int linkcount=0;
|
||||
private int globalresults=0;
|
||||
private plasmaSearchRankingProfile ranking=null;
|
||||
private String formerSearch="";
|
||||
private plasmaSearchQuery query=null;
|
||||
private ArrayList results=null;
|
||||
|
||||
public searchResults(){
|
||||
this.results=new ArrayList();
|
||||
}
|
||||
public searchResults(int totalcount, int filteredcount, int orderedcount, int linkcount){
|
||||
this.results=new ArrayList();
|
||||
this.totalcount=totalcount;
|
||||
this.filteredcount=filteredcount;
|
||||
this.orderedcount=orderedcount;
|
||||
this.linkcount=linkcount;
|
||||
}
|
||||
public void appendResult(searchResult result){
|
||||
results.add(result);
|
||||
}
|
||||
public void setTotalcount(int totalcount) {
|
||||
this.totalcount = totalcount;
|
||||
}
|
||||
public int getTotalcount() {
|
||||
return totalcount;
|
||||
}
|
||||
public void setFilteredcount(int filteredcount) {
|
||||
this.filteredcount = filteredcount;
|
||||
}
|
||||
public int getFilteredcount() {
|
||||
return filteredcount;
|
||||
}
|
||||
public void setOrderedcount(int orderedcount) {
|
||||
this.orderedcount = orderedcount;
|
||||
}
|
||||
public int getOrderedcount() {
|
||||
return orderedcount;
|
||||
}
|
||||
public void setLinkcount(int linkcount) {
|
||||
this.linkcount = linkcount;
|
||||
}
|
||||
public int getLinkcount() {
|
||||
return linkcount;
|
||||
}
|
||||
public void setGlobalresults(int globalresults) {
|
||||
this.globalresults = globalresults;
|
||||
}
|
||||
public int getGlobalresults() {
|
||||
return globalresults;
|
||||
}
|
||||
public void setRanking(plasmaSearchRankingProfile ranking) {
|
||||
this.ranking = ranking;
|
||||
}
|
||||
public plasmaSearchRankingProfile getRanking() {
|
||||
return ranking;
|
||||
}
|
||||
public searchResult createSearchResult(){
|
||||
return new searchResult();
|
||||
}
|
||||
public void setFormerSearch(String formerSearch) {
|
||||
this.formerSearch = formerSearch;
|
||||
}
|
||||
public String getFormerSearch() {
|
||||
return formerSearch;
|
||||
}
|
||||
public void setQuery(plasmaSearchQuery query) {
|
||||
this.query = query;
|
||||
}
|
||||
public plasmaSearchQuery getQuery() {
|
||||
return query;
|
||||
}
|
||||
public class searchResult{
|
||||
private String url="";
|
||||
private String urlname="";
|
||||
private plasmaSnippetCache.TextSnippet snippet=null;
|
||||
private indexURLEntry urlentry=null;
|
||||
|
||||
public searchResult(){
|
||||
|
||||
}
|
||||
|
||||
public void setUrl(String url) {
|
||||
this.url = url;
|
||||
}
|
||||
public String getUrl() {
|
||||
return url;
|
||||
}
|
||||
public void setUrlname(String urlname) {
|
||||
this.urlname = urlname;
|
||||
}
|
||||
public String getUrlname() {
|
||||
return urlname;
|
||||
}
|
||||
public void setSnippet(plasmaSnippetCache.TextSnippet snippet) {
|
||||
this.snippet = snippet;
|
||||
}
|
||||
public plasmaSnippetCache.TextSnippet getSnippet() {
|
||||
return snippet;
|
||||
}
|
||||
public void setUrlentry(indexURLEntry urlentry) {
|
||||
this.urlentry = urlentry;
|
||||
}
|
||||
public indexURLEntry getUrlentry() {
|
||||
return urlentry;
|
||||
}
|
||||
public String getUrlhash(){
|
||||
return urlentry.hash();
|
||||
}
|
||||
public boolean hasSnippet(){
|
||||
return this.snippet!=null && this.snippet.exists();
|
||||
}
|
||||
}
|
||||
}
|
@ -126,6 +126,7 @@ import de.anomic.data.blogBoard;
|
||||
import de.anomic.data.bookmarksDB;
|
||||
import de.anomic.data.listManager;
|
||||
import de.anomic.data.messageBoard;
|
||||
import de.anomic.data.searchResults;
|
||||
import de.anomic.data.userDB;
|
||||
import de.anomic.data.wikiBoard;
|
||||
import de.anomic.htmlFilter.htmlFilterContentScraper;
|
||||
@ -2090,10 +2091,15 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
||||
intermissionAllThreads(2 * query.maximumTime);
|
||||
|
||||
serverObjects prop = new serverObjects();
|
||||
searchResults results=new searchResults();
|
||||
results.setRanking(ranking);
|
||||
results.setQuery(query);
|
||||
results.setFormerSearch("");
|
||||
try {
|
||||
// filter out words that appear in bluelist
|
||||
//log.logInfo("E");
|
||||
query.filterOut(blueList);
|
||||
results.setQuery(query);
|
||||
|
||||
// log
|
||||
log.logInfo("INIT WORD SEARCH: " + query.queryWords + ":" + query.queryHashes + " - " + query.wantedResults + " links, " + (query.maximumTime / 1000) + " seconds");
|
||||
@ -2116,26 +2122,35 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
||||
|
||||
// result is a List of urlEntry elements: prepare answer
|
||||
if (acc == null) {
|
||||
prop.put("num-results_totalcount", 0);
|
||||
prop.put("num-results_filteredcount", 0);
|
||||
prop.put("num-results_orderedcount", 0);
|
||||
prop.put("num-results_linkcount", 0);
|
||||
results.setTotalcount(0);
|
||||
results.setFilteredcount(0);
|
||||
results.setOrderedcount(0);
|
||||
results.setLinkcount(0);
|
||||
prop.put("num-results_totalcount", results.getTotalcount());
|
||||
prop.put("num-results_filteredcount", results.getFilteredcount());
|
||||
prop.put("num-results_orderedcount", results.getOrderedcount());
|
||||
prop.put("num-results_linkcount", results.getLinkcount());
|
||||
prop.put("references", 0);
|
||||
prop.put("type_results", 0);
|
||||
} else {
|
||||
prop.put("num-results_totalcount", acc.globalContributions + acc.localContributions);
|
||||
prop.put("num-results_filteredcount", acc.filteredResults);
|
||||
prop.put("num-results_orderedcount", Integer.toString(acc.sizeOrdered()));
|
||||
prop.put("num-results_globalresults", acc.globalContributions);
|
||||
results.setTotalcount(acc.globalContributions + acc.localContributions);
|
||||
results.setFilteredcount(acc.filteredResults);
|
||||
results.setOrderedcount(acc.sizeOrdered());
|
||||
results.setGlobalresults(acc.globalContributions);
|
||||
results.setRanking(ranking);
|
||||
|
||||
prop.put("num-results_totalcount", results.getTotalcount());
|
||||
prop.put("num-results_filteredcount", results.getFilteredcount());
|
||||
prop.put("num-results_orderedcount", Integer.toString(results.getOrderedcount())); //why toString?
|
||||
prop.put("num-results_globalresults", results.getGlobalresults());
|
||||
int i = 0;
|
||||
int p;
|
||||
indexURLEntry urlentry;
|
||||
String urlstring, urlname, filename, urlhash;
|
||||
String host, hash, address;
|
||||
yacySeed seed;
|
||||
plasmaSnippetCache.TextSnippet snippet;
|
||||
boolean includeSnippets = false;
|
||||
String formerSearch = query.words(" ");
|
||||
results.setFormerSearch(query.words(" "));
|
||||
long targetTime = timestamp + query.maximumTime;
|
||||
if (targetTime < System.currentTimeMillis()) targetTime = System.currentTimeMillis() + 1000;
|
||||
while ((acc.hasMoreElements()) && (i < query.wantedResults) && (System.currentTimeMillis() < targetTime)) {
|
||||
@ -2178,42 +2193,51 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
||||
//addScoreForked(ref, gs, descr.split(" "));
|
||||
//addScoreForked(ref, gs, urlstring.split("/"));
|
||||
URL wordURL;
|
||||
if (urlstring.matches(query.urlMask)) { //.* is default
|
||||
searchResults.searchResult result=results.createSearchResult();
|
||||
result.setUrl(urlstring);
|
||||
result.setUrlname(urlname);
|
||||
result.setUrlentry(urlentry);
|
||||
if (urlstring.matches(results.getQuery().urlMask)) { //.* is default
|
||||
if (includeSnippets) {
|
||||
snippet = snippetCache.retrieveTextSnippet(comp.url(), query.queryHashes, false, urlentry.flags().get(plasmaCondenser.flag_cat_indexof), 260, 1000);
|
||||
result.setSnippet(snippetCache.retrieveTextSnippet(comp.url(), results.getQuery().queryHashes, false, urlentry.flags().get(plasmaCondenser.flag_cat_indexof), 260, 1000));
|
||||
//snippet = snippetCache.retrieveTextSnippet(comp.url(), query.queryHashes, false, urlentry.flags().get(plasmaCondenser.flag_cat_indexof), 260, 1000);
|
||||
} else {
|
||||
snippet = null;
|
||||
//snippet = null;
|
||||
result.setSnippet(null);
|
||||
}
|
||||
/*
|
||||
if ((snippet != null) && (snippet.getSource() == plasmaSnippetCache.ERROR_NO_MATCH)) {
|
||||
// suppress line: there is no match in that resource
|
||||
} else {*/
|
||||
prop.put("type_results_" + i + "_authorized_recommend", (yacyCore.newsPool.getSpecific(yacyNewsPool.OUTGOING_DB, "stippadd", "url", urlstring) == null) ? 1 : 0);
|
||||
prop.put("type_results_" + i + "_authorized_recommend_deletelink", "/yacysearch.html?search=" + formerSearch + "&Enter=Search&count=" + query.wantedResults + "&order=" + crypt.simpleEncode(ranking.toExternalString()) + "&resource=local&time=3&deleteref=" + urlhash + "&urlmaskfilter=.*");
|
||||
prop.put("type_results_" + i + "_authorized_recommend_recommendlink", "/yacysearch.html?search=" + formerSearch + "&Enter=Search&count=" + query.wantedResults + "&order=" + crypt.simpleEncode(ranking.toExternalString()) + "&resource=local&time=3&recommendref=" + urlhash + "&urlmaskfilter=.*");
|
||||
prop.put("type_results_" + i + "_authorized_urlhash", urlhash);
|
||||
prop.put("type_results_" + i + "_description", comp.descr());
|
||||
prop.put("type_results_" + i + "_url", urlstring);
|
||||
prop.put("type_results_" + i + "_urlhash", urlhash);
|
||||
prop.put("type_results_" + i + "_urlhexhash", yacySeed.b64Hash2hexHash(urlhash));
|
||||
prop.put("type_results_" + i + "_urlname", nxTools.shortenURLString(urlname, 120));
|
||||
prop.put("type_results_" + i + "_date", dateString(urlentry.moddate()));
|
||||
prop.put("type_results_" + i + "_ybr", plasmaSearchPreOrder.ybr(urlentry.hash()));
|
||||
prop.put("type_results_" + i + "_size", Long.toString(urlentry.size()));
|
||||
prop.put("type_results_" + i + "_words", URLEncoder.encode(query.queryWords.toString(),"UTF-8"));
|
||||
prop.put("type_results_" + i + "_former", formerSearch);
|
||||
prop.put("type_results_" + i + "_rankingprops", urlentry.word().toPropertyForm() + ", domLengthEstimated=" + plasmaURL.domLengthEstimation(urlhash) +
|
||||
((plasmaURL.probablyRootURL(urlhash)) ? ", probablyRootURL" : "") +
|
||||
|
||||
|
||||
prop.put("type_results_" + i + "_authorized_recommend", (yacyCore.newsPool.getSpecific(yacyNewsPool.OUTGOING_DB, "stippadd", "url", result.getUrl()) == null) ? 1 : 0);
|
||||
prop.put("type_results_" + i + "_authorized_recommend_deletelink", "/yacysearch.html?search=" + results.getFormerSearch() + "&Enter=Search&count=" + results.getQuery().wantedResults + "&order=" + crypt.simpleEncode(results.getRanking().toExternalString()) + "&resource=local&time=3&deleteref=" + result.getUrlhash() + "&urlmaskfilter=.*");
|
||||
prop.put("type_results_" + i + "_authorized_recommend_recommendlink", "/yacysearch.html?search=" + results.getFormerSearch() + "&Enter=Search&count=" + results.getQuery().wantedResults + "&order=" + crypt.simpleEncode(results.getRanking().toExternalString()) + "&resource=local&time=3&recommendref=" + result.getUrlhash() + "&urlmaskfilter=.*");
|
||||
prop.put("type_results_" + i + "_authorized_urlhash", result.getUrlhash());
|
||||
prop.put("type_results_" + i + "_description", result.getUrlentry().comp().descr());
|
||||
prop.put("type_results_" + i + "_url", result.getUrl());
|
||||
prop.put("type_results_" + i + "_urlhash", result.getUrlhash());
|
||||
prop.put("type_results_" + i + "_urlhexhash", yacySeed.b64Hash2hexHash(result.getUrlhash()));
|
||||
prop.put("type_results_" + i + "_urlname", nxTools.shortenURLString(result.getUrlname(), 120));
|
||||
prop.put("type_results_" + i + "_date", dateString(result.getUrlentry().moddate()));
|
||||
prop.put("type_results_" + i + "_ybr", plasmaSearchPreOrder.ybr(result.getUrlentry().hash()));
|
||||
prop.put("type_results_" + i + "_size", Long.toString(result.getUrlentry().size()));
|
||||
prop.put("type_results_" + i + "_words", URLEncoder.encode(results.getQuery().queryWords.toString(),"UTF-8"));
|
||||
prop.put("type_results_" + i + "_former", results.getFormerSearch());
|
||||
prop.put("type_results_" + i + "_rankingprops", result.getUrlentry().word().toPropertyForm() + ", domLengthEstimated=" + plasmaURL.domLengthEstimation(result.getUrlhash()) +
|
||||
((plasmaURL.probablyRootURL(result.getUrlhash())) ? ", probablyRootURL" : "") +
|
||||
(((wordURL = plasmaURL.probablyWordURL(urlhash, query.words(""))) != null) ? ", probablyWordURL=" + wordURL.toNormalform() : ""));
|
||||
// adding snippet if available
|
||||
if ((snippet != null) && (snippet.exists())) {
|
||||
if (result.hasSnippet()) {
|
||||
prop.put("type_results_" + i + "_snippet", 1);
|
||||
prop.putASIS("type_results_" + i + "_snippet_text", snippet.getLineMarked(query.queryHashes));//FIXME: the ASIS should not be needed, if there is no html in .java
|
||||
prop.putASIS("type_results_" + i + "_snippet_text", result.getSnippet().getLineMarked(query.queryHashes));//FIXME: the ASIS should not be needed, if there is no html in .java
|
||||
} else {
|
||||
prop.put("type_results_" + i + "_snippet", 0);
|
||||
prop.put("type_results_" + i + "_snippet_text", "");
|
||||
}
|
||||
i++;
|
||||
results.appendResult(result);
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user