mirror of
https://github.com/yacy/yacy_search_server.git
synced 2025-07-18 08:36:07 -04:00
added the visualization of error-urls to host browser
- only visible for admins - a faceted search generates a huge list for all hosts in the host list - the faceted search algorithms had to be modified for that - within the browsing of the directory path, the error cause is written to the url which is presented as error-url - the errors are also accumulated for directory sums
This commit is contained in:
htroot
source/net/yacy
cora
federate
crawler
data
search
index
@ -24,7 +24,6 @@
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
|
@ -76,16 +76,19 @@ function updatepage(str) {
|
||||
<fieldset><legend>Host List</legend>
|
||||
#{list}#
|
||||
<div style="float:left; padding:1px 5px 1px 5px;">
|
||||
<div style="width:160px; text-align:left; float: left; white-space:nowrap; overflow:hidden;"><div id="info"><a href="/HostBrowser.html?path=#[host]#&facetcount=#[count]#">#[host]#</a><span>browse #[host]#</span></div></div>
|
||||
<div style="width:80px; text-align:right; float: left; white-space:nowrap; overflow:hidden;">#[count]##(crawler)#::/#[pending]##(/crawler)# URLs</div>
|
||||
<div style="width:180px; text-align:left; float: left; white-space:nowrap; overflow:hidden;"><div id="info"><a href="/HostBrowser.html?path=#[host]#&facetcount=#[count]#">#[host]#</a><span>browse #[host]#</span></div></div>
|
||||
<div style="width:100px; text-align:right; float: left; white-space:nowrap; overflow:hidden;"><span class="commit">#[count]#</span>#(crawler)#::/<span class="info">#[pending]#</span>#(/crawler)##(errors)#::/<span class="error">#[count]#</span>#(/errors)# URLs</div>
|
||||
</div>
|
||||
#{/list}#
|
||||
<div style="clear:both; float:left; padding:1px 5px 1px 5px;">
|
||||
<br/><b>Count Colors:</b> <div class="commit">Documents</div><div class="info">Pending in Crawler</div><div class="error">Load Errors</div>
|
||||
</div>
|
||||
</fieldset>
|
||||
#(/hosts)#
|
||||
|
||||
#(files)#::
|
||||
<fieldset><legend>Browser for #[path]#</legend>
|
||||
<p>Documents on host: #[hostsize]#; Documents in subpath: #[subpathsize]#; #(complete)#<a href="/HostBrowser.html?complete=true&path=#[path]#">get complete list</a>::<a href="/HostBrowser.html?path=#[path]#">directory view</a>#(/complete)#
|
||||
<p>Documents on host: #[hostsize]#; Documents in subpath: #[subpathsize]# <!-- #(complete)#;<a href="/HostBrowser.html?complete=true&path=#[path]#">get complete list</a>::<a href="/HostBrowser.html?path=#[path]#">directory view</a>#(/complete)#-->
|
||||
</p>
|
||||
<table border="0" cellpadding="2" cellspacing="2" style="float:left">
|
||||
<tr>
|
||||
|
@ -54,6 +54,10 @@ import net.yacy.server.serverSwitch;
|
||||
|
||||
public class HostBrowser {
|
||||
|
||||
public static enum StoreType {
|
||||
LINK, INDEX, ERROR;
|
||||
}
|
||||
|
||||
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
||||
// return variable that accumulates replacements
|
||||
final Switchboard sb = (Switchboard) env;
|
||||
@ -131,27 +135,35 @@ public class HostBrowser {
|
||||
if (post.containsKey("hosts")) {
|
||||
// generate host list
|
||||
try {
|
||||
int maxcount = 360; // == 6!/2 which makes nice matrixes for 3, 4, 5, 6 rows/colums
|
||||
int maxcount = admin ? 2 * 3 * 2 * 5 * 7 * 2 * 3 : 360; // which makes nice matrixes for 2, 3, 4, 5, 6, 7, 8, 9 rows/colums
|
||||
|
||||
// collect from index
|
||||
ReversibleScoreMap<String> score = fulltext.getSolr().getFacet(YaCySchema.host_s.name(), maxcount);
|
||||
// collect hosts from index
|
||||
ReversibleScoreMap<String> hostscore = fulltext.getSolr().getFacets("*:*", new String[]{YaCySchema.host_s.name()}, maxcount).get(YaCySchema.host_s.name());
|
||||
if (hostscore == null) hostscore = new ClusteredScoreMap<String>();
|
||||
|
||||
// collect from crawler
|
||||
// collect hosts from crawler
|
||||
final Map<String, Integer[]> crawler = (admin) ? sb.crawlQueues.noticeURL.getDomainStackHosts(StackType.LOCAL, sb.robots) : new HashMap<String, Integer[]>();
|
||||
for (Map.Entry<String, Integer[]> host: crawler.entrySet()) {
|
||||
score.inc(host.getKey(), host.getValue()[0]);
|
||||
hostscore.inc(host.getKey(), host.getValue()[0]);
|
||||
}
|
||||
|
||||
// collect the errorurls
|
||||
ReversibleScoreMap<String> errorscore = admin ? fulltext.getSolr().getFacets(YaCySchema.failreason_t.name() + ":[* TO *]", new String[]{YaCySchema.host_s.name()}, maxcount).get(YaCySchema.host_s.name()) : null;
|
||||
if (errorscore == null) errorscore = new ClusteredScoreMap<String>();
|
||||
|
||||
int c = 0;
|
||||
Iterator<String> i = score.keys(false);
|
||||
Iterator<String> i = hostscore.keys(false);
|
||||
String host;
|
||||
while (i.hasNext() && c < maxcount) {
|
||||
host = i.next();
|
||||
prop.put("hosts_list_" + c + "_host", host);
|
||||
prop.put("hosts_list_" + c + "_count", score.get(host));
|
||||
prop.put("hosts_list_" + c + "_count", hostscore.get(host));
|
||||
boolean inCrawler = crawler.containsKey(host);
|
||||
prop.put("hosts_list_" + c + "_crawler", inCrawler ? 1 : 0);
|
||||
if (inCrawler) prop.put("hosts_list_" + c + "_crawler_pending", crawler.get(host)[0]);
|
||||
int errors = errorscore.get(host);
|
||||
prop.put("hosts_list_" + c + "_errors", errors > 0 ? 1 : 0);
|
||||
if (errors > 0) prop.put("hosts_list_" + c + "_errors_count", errors);
|
||||
c++;
|
||||
}
|
||||
prop.put("hosts_list", c);
|
||||
@ -201,10 +213,10 @@ public class HostBrowser {
|
||||
} else {
|
||||
if (facetcount > 1000 && !post.containsKey("nepr")) q.append(" AND ").append(YaCySchema.url_paths_sxt.name()).append(":[* TO *]");
|
||||
}
|
||||
q.append(" AND -").append(YaCySchema.failreason_t.name()).append(":[* TO *]");
|
||||
BlockingQueue<SolrDocument> docs = fulltext.getSolr().concurrentQuery(q.toString(), 0, 100000, 3000, 100);
|
||||
SolrDocument doc;
|
||||
Set<String> storedDocs = new HashSet<String>();
|
||||
Map<String, String> errorDocs = new HashMap<String, String>();
|
||||
Set<String> inboundLinks = new HashSet<String>();
|
||||
Map<String, ReversibleScoreMap<String>> outboundHosts = new HashMap<String, ReversibleScoreMap<String>>();
|
||||
int hostsize = 0;
|
||||
@ -212,42 +224,48 @@ public class HostBrowser {
|
||||
long timeout = System.currentTimeMillis() + 3000;
|
||||
while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
|
||||
String u = (String) doc.getFieldValue(YaCySchema.sku.getSolrFieldName());
|
||||
hostsize++;
|
||||
String error = (String) doc.getFieldValue(YaCySchema.failreason_t.name());
|
||||
if (u.startsWith(path)) {
|
||||
if (delete) {
|
||||
deleteIDs.add(ASCII.getBytes((String) doc.getFieldValue(YaCySchema.id.name())));
|
||||
} else {
|
||||
storedDocs.add(u);
|
||||
if (error == null) storedDocs.add(u); else if (admin) errorDocs.put(u, error);
|
||||
}
|
||||
} else if (complete) {
|
||||
storedDocs.add(u);
|
||||
if (error == null) storedDocs.add(u); else if (admin) errorDocs.put(u, error);
|
||||
}
|
||||
// collect inboundlinks to browse the host
|
||||
Iterator<String> links = URIMetadataNode.getLinks(doc, true);
|
||||
while (links.hasNext()) {
|
||||
u = links.next();
|
||||
if ((complete || u.startsWith(path)) && !storedDocs.contains(u)) inboundLinks.add(u);
|
||||
}
|
||||
|
||||
// collect outboundlinks to browse to the outbound
|
||||
links = URIMetadataNode.getLinks(doc, false);
|
||||
while (links.hasNext()) {
|
||||
u = links.next();
|
||||
try {
|
||||
MultiProtocolURI mu = new MultiProtocolURI(u);
|
||||
if (mu.getHost() != null) {
|
||||
ReversibleScoreMap<String> lks = outboundHosts.get(mu.getHost());
|
||||
if (lks == null) {
|
||||
lks = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
|
||||
outboundHosts.put(mu.getHost(), lks);
|
||||
if (error == null) {
|
||||
hostsize++;
|
||||
// collect inboundlinks to browse the host
|
||||
Iterator<String> links = URIMetadataNode.getLinks(doc, true);
|
||||
while (links.hasNext()) {
|
||||
u = links.next();
|
||||
if ((complete || u.startsWith(path)) && !storedDocs.contains(u)) inboundLinks.add(u);
|
||||
}
|
||||
|
||||
// collect outboundlinks to browse to the outbound
|
||||
links = URIMetadataNode.getLinks(doc, false);
|
||||
while (links.hasNext()) {
|
||||
u = links.next();
|
||||
try {
|
||||
MultiProtocolURI mu = new MultiProtocolURI(u);
|
||||
if (mu.getHost() != null) {
|
||||
ReversibleScoreMap<String> lks = outboundHosts.get(mu.getHost());
|
||||
if (lks == null) {
|
||||
lks = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
|
||||
outboundHosts.put(mu.getHost(), lks);
|
||||
}
|
||||
lks.set(u, u.length());
|
||||
}
|
||||
lks.set(u, u.length());
|
||||
}
|
||||
} catch (MalformedURLException e) {}
|
||||
} catch (MalformedURLException e) {}
|
||||
}
|
||||
}
|
||||
if (System.currentTimeMillis() > timeout) break;
|
||||
}
|
||||
if (deleteIDs.size() > 0) sb.index.fulltext().remove(deleteIDs, true);
|
||||
if (deleteIDs.size() > 0) {
|
||||
for (byte[] b: deleteIDs) sb.crawlQueues.urlRemove(b);
|
||||
sb.index.fulltext().remove(deleteIDs, true);
|
||||
}
|
||||
|
||||
// collect from crawler
|
||||
List<Request> domainStackReferences = (admin) ? sb.crawlQueues.noticeURL.getDomainStackReferences(StackType.LOCAL, host, 1000, 3000) : new ArrayList<Request>(0);
|
||||
@ -255,43 +273,46 @@ public class HostBrowser {
|
||||
for (Request crawlEntry: domainStackReferences) loadingLinks.add(crawlEntry.url().toNormalform(true));
|
||||
|
||||
// now combine all lists into one
|
||||
Map<String, Boolean> files = new HashMap<String, Boolean>();
|
||||
for (String u: storedDocs) files.put(u, true);
|
||||
for (String u: inboundLinks) if (!storedDocs.contains(u)) files.put(u, false);
|
||||
for (String u: loadingLinks) if (u.startsWith(path) && !storedDocs.contains(u)) files.put(u, false);
|
||||
Map<String, StoreType> files = new HashMap<String, StoreType>();
|
||||
for (String u: storedDocs) files.put(u, StoreType.INDEX);
|
||||
for (String u: errorDocs.keySet()) files.put(u, StoreType.ERROR);
|
||||
for (String u: inboundLinks) if (!storedDocs.contains(u)) files.put(u, StoreType.LINK);
|
||||
for (String u: loadingLinks) if (u.startsWith(path) && !storedDocs.contains(u)) files.put(u, StoreType.LINK);
|
||||
Log.logInfo("HostBrowser", "collected " + files.size() + " urls for path " + path);
|
||||
|
||||
// distinguish files and folders
|
||||
Map<String, Object> list = new TreeMap<String, Object>(); // a directory list; if object is boolean, its a file; if its a int[], then its a folder
|
||||
int pl = path.length();
|
||||
String file;
|
||||
boolean loaded;
|
||||
for (Map.Entry<String, Boolean> entry: files.entrySet()) {
|
||||
for (Map.Entry<String, StoreType> entry: files.entrySet()) {
|
||||
if (entry.getKey().length() < pl) continue; // this is not inside the path
|
||||
if (!entry.getKey().startsWith(path)) continue;
|
||||
file = entry.getKey().substring(pl);
|
||||
loaded = entry.getValue().booleanValue();
|
||||
StoreType type = entry.getValue();
|
||||
p = file.indexOf('/');
|
||||
if (p < 0) {
|
||||
// this is a file
|
||||
list.put(entry.getKey(), loaded); // Boolean value: this is a file; true -> file is in index; false -> not in index, maybe in crawler
|
||||
list.put(entry.getKey(), type); // StoreType value: this is a file; true -> file is in index; false -> not in index, maybe in crawler
|
||||
} else {
|
||||
// this is a directory path or a file in a subdirectory
|
||||
String remainingPath = file.substring(0, p + 1);
|
||||
if (complete && remainingPath.indexOf('.') > 0) {
|
||||
list.put(entry.getKey(), loaded); // Boolean value: this is a file
|
||||
list.put(entry.getKey(), type); // StoreType value: this is a file
|
||||
} else {
|
||||
String dir = path + remainingPath;
|
||||
Object c = list.get(dir);
|
||||
boolean incrawler = loadingLinks.contains(entry.getKey());
|
||||
if (c == null) {
|
||||
int[] linkedStored = new int[]{0,0,0};
|
||||
linkedStored[loaded ? 1 : 0]++;
|
||||
if (incrawler) linkedStored[2]++;
|
||||
list.put(dir, linkedStored);
|
||||
int[] linkedStoredIncrawlerError = new int[]{0,0,0,0};
|
||||
if (type == StoreType.LINK) linkedStoredIncrawlerError[0]++;
|
||||
if (type == StoreType.INDEX) linkedStoredIncrawlerError[1]++;
|
||||
if (loadingLinks.contains(entry.getKey())) linkedStoredIncrawlerError[2]++;
|
||||
if (errorDocs.containsKey(entry.getKey())) linkedStoredIncrawlerError[3]++;
|
||||
list.put(dir, linkedStoredIncrawlerError);
|
||||
} else if (c instanceof int[]) {
|
||||
((int[]) c)[loaded ? 1 : 0]++;
|
||||
if (incrawler) ((int[]) c)[2]++;
|
||||
if (type == StoreType.LINK) ((int[]) c)[0]++;
|
||||
if (type == StoreType.INDEX) ((int[]) c)[1]++;
|
||||
if (loadingLinks.contains(entry.getKey())) ((int[]) c)[2]++;
|
||||
if (errorDocs.containsKey(entry.getKey())) ((int[]) c)[3]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -300,16 +321,17 @@ public class HostBrowser {
|
||||
int maxcount = 1000;
|
||||
int c = 0;
|
||||
for (Map.Entry<String, Object> entry: list.entrySet()) {
|
||||
if (entry.getValue() instanceof Boolean) {
|
||||
if (entry.getValue() instanceof StoreType) {
|
||||
// this is a file
|
||||
prop.put("files_list_" + c + "_type", 0);
|
||||
prop.put("files_list_" + c + "_type_url", entry.getKey());
|
||||
boolean indexed = ((Boolean) entry.getValue()).booleanValue();
|
||||
StoreType type = (StoreType) entry.getValue();
|
||||
try {uri = new DigestURI(entry.getKey());} catch (MalformedURLException e) {uri = null;}
|
||||
boolean loading = load.equals(entry.getKey()) || (uri != null && sb.crawlQueues.urlExists(uri.hash()) != null);
|
||||
//String failr = fulltext.failReason(ASCII.String(uri.hash()));
|
||||
prop.put("files_list_" + c + "_type_stored", indexed ? 1 : loading ? 2 : 0);
|
||||
prop.put("files_list_" + c + "_type_stored", type == StoreType.INDEX ? 1 : loading ? 2 : type == StoreType.ERROR ? 3 : 0 /*linked*/);
|
||||
prop.put("files_list_" + c + "_type_stored_load", loadRight ? 1 : 0);
|
||||
if (type == StoreType.ERROR) prop.put("files_list_" + c + "_type_stored_error", errorDocs.get(entry.getKey()));
|
||||
if (loadRight) {
|
||||
prop.put("files_list_" + c + "_type_stored_load_url", entry.getKey());
|
||||
prop.put("files_list_" + c + "_type_stored_load_path", path);
|
||||
@ -321,7 +343,8 @@ public class HostBrowser {
|
||||
int linked = ((int[]) entry.getValue())[0];
|
||||
int stored = ((int[]) entry.getValue())[1];
|
||||
int crawler = ((int[]) entry.getValue())[2];
|
||||
prop.put("files_list_" + c + "_type_count", stored + " stored / " + linked + " linked" + (crawler > 0 ? (" / " + crawler + " pending") : ""));
|
||||
int error = ((int[]) entry.getValue())[3];
|
||||
prop.put("files_list_" + c + "_type_count", stored + " stored / " + linked + " linked" + (crawler > 0 ? (" / " + crawler + " pending") : "") + (error > 0 ? (" / " + error + " errors") : ""));
|
||||
}
|
||||
if (++c >= maxcount) break;
|
||||
}
|
||||
|
2
htroot/env/base.css
vendored
2
htroot/env/base.css
vendored
@ -134,7 +134,7 @@ tt, *.tt {
|
||||
|
||||
.info {
|
||||
font-weight:bold;
|
||||
color:olive;
|
||||
color:darkblue;
|
||||
}
|
||||
|
||||
.commit {
|
||||
|
@ -22,12 +22,12 @@ package net.yacy.cora.federate.solr.connector;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.cora.federate.solr.YaCySchema;
|
||||
import net.yacy.cora.sorting.ClusteredScoreMap;
|
||||
import net.yacy.cora.sorting.ReversibleScoreMap;
|
||||
import net.yacy.cora.storage.ARC;
|
||||
import net.yacy.cora.storage.ConcurrentARC;
|
||||
@ -419,25 +419,26 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
|
||||
return count.get();
|
||||
}
|
||||
|
||||
/**
|
||||
* get a facet of the index: a list of values that are most common in a specific field
|
||||
* @param field the field which is selected for the facet
|
||||
* @param maxresults the maximum size of the resulting map
|
||||
* @return an ordered map of fields
|
||||
* @throws IOException
|
||||
*/
|
||||
public ReversibleScoreMap<String> getFacet(String field, int maxresults) throws IOException {
|
||||
if (this.solr0 == null && this.solr1 == null) return new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
|
||||
@Override
|
||||
public Map<String, ReversibleScoreMap<String>> getFacets(String query, String[] fields, int maxresults) throws IOException {
|
||||
if (this.solr0 == null && this.solr1 == null) return new HashMap<String, ReversibleScoreMap<String>>(0);
|
||||
if (this.solr0 != null && this.solr1 == null) {
|
||||
return this.solr0.getFacet(field, maxresults);
|
||||
return this.solr0.getFacets(query, fields, maxresults);
|
||||
}
|
||||
if (this.solr1 != null && this.solr0 == null) {
|
||||
return this.solr1.getFacet(field, maxresults);
|
||||
return this.solr1.getFacets(query, fields, maxresults);
|
||||
}
|
||||
ReversibleScoreMap<String> facet0 = this.solr0.getFacet(field, maxresults);
|
||||
ReversibleScoreMap<String> facet1 = this.solr1.getFacet(field, maxresults);
|
||||
for (String key: facet1) facet0.inc(key, facet1.get(key));
|
||||
return facet0;
|
||||
Map<String, ReversibleScoreMap<String>> facets0 = this.solr0.getFacets(query, fields, maxresults);
|
||||
Map<String, ReversibleScoreMap<String>> facets1 = this.solr1.getFacets(query, fields, maxresults);
|
||||
for (Map.Entry<String, ReversibleScoreMap<String>> facet0: facets0.entrySet()) {
|
||||
ReversibleScoreMap<String> facet1 = facets1.remove(facet0.getKey());
|
||||
if (facet1 == null) continue;
|
||||
for (String key: facet1) facet0.getValue().inc(key, facet1.get(key));
|
||||
}
|
||||
for (Map.Entry<String, ReversibleScoreMap<String>> facet1: facets1.entrySet()) {
|
||||
facets0.put(facet1.getKey(), facet1.getValue());
|
||||
}
|
||||
return facets0;
|
||||
}
|
||||
|
||||
private void addToCache(SolrDocumentList list) {
|
||||
|
@ -23,6 +23,7 @@ package net.yacy.cora.federate.solr.connector;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
|
||||
import net.yacy.cora.sorting.ReversibleScoreMap;
|
||||
@ -183,8 +184,8 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr
|
||||
}
|
||||
|
||||
@Override
|
||||
public ReversibleScoreMap<String> getFacet(final String field, final int maxresults) throws IOException {
|
||||
return this.solr.getFacet(field, maxresults);
|
||||
public Map<String, ReversibleScoreMap<String>> getFacets(String query, String[] fields, int maxresults) throws IOException {
|
||||
return this.solr.getFacets(query, fields, maxresults);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -22,10 +22,10 @@ package net.yacy.cora.federate.solr.connector;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.cora.sorting.ClusteredScoreMap;
|
||||
import net.yacy.cora.sorting.ReversibleScoreMap;
|
||||
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
@ -225,18 +225,18 @@ public class RetrySolrConnector extends AbstractSolrConnector implements SolrCon
|
||||
}
|
||||
|
||||
@Override
|
||||
public ReversibleScoreMap<String> getFacet(final String field, final int maxresults) throws IOException {
|
||||
public Map<String, ReversibleScoreMap<String>> getFacets(String query, String[] fields, int maxresults) throws IOException {
|
||||
final long t = System.currentTimeMillis() + this.retryMaxTime;
|
||||
Throwable ee = null;
|
||||
while (System.currentTimeMillis() < t) try {
|
||||
return this.solrConnector.getFacet(field, maxresults);
|
||||
return this.solrConnector.getFacets(query, fields, maxresults);
|
||||
} catch (final Throwable e) {
|
||||
ee = e;
|
||||
try {Thread.sleep(10);} catch (final InterruptedException e1) {}
|
||||
continue;
|
||||
}
|
||||
if (ee != null) throw (ee instanceof IOException) ? (IOException) ee : new IOException(ee.getMessage());
|
||||
return new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
|
||||
return new HashMap<String, ReversibleScoreMap<String>>();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -24,11 +24,11 @@ import java.io.IOException;
|
||||
import java.net.InetAddress;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.cora.sorting.ClusteredScoreMap;
|
||||
import net.yacy.cora.sorting.ReversibleScoreMap;
|
||||
import net.yacy.cora.protocol.Domains;
|
||||
|
||||
@ -225,22 +225,24 @@ public class ShardSolrConnector extends AbstractSolrConnector implements SolrCon
|
||||
return count.get();
|
||||
}
|
||||
|
||||
/**
|
||||
* get a facet of the index: a list of values that are most common in a specific field
|
||||
* @param field the field which is selected for the facet
|
||||
* @param maxresults the maximum size of the resulting map
|
||||
* @return an ordered map of fields
|
||||
* @throws IOException
|
||||
*/
|
||||
public ReversibleScoreMap<String> getFacet(final String field, final int maxresults) throws IOException {
|
||||
ReversibleScoreMap<String> acc = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
|
||||
@Override
|
||||
public Map<String, ReversibleScoreMap<String>> getFacets(String query, String[] fields, int maxresults) throws IOException {
|
||||
Map<String, ReversibleScoreMap<String>> facets = new HashMap<String, ReversibleScoreMap<String>>();
|
||||
for (final SolrConnector connector: this.connectors) {
|
||||
ReversibleScoreMap<String> peer = connector.getFacet(field, maxresults);
|
||||
for (String key: peer) acc.inc(key, peer.get(key));
|
||||
Map<String, ReversibleScoreMap<String>> peer = connector.getFacets(query, fields, maxresults);
|
||||
innerloop: for (Map.Entry<String, ReversibleScoreMap<String>> facet: facets.entrySet()) {
|
||||
ReversibleScoreMap<String> peerfacet = peer.remove(facet.getKey());
|
||||
if (peerfacet == null) continue innerloop;
|
||||
for (String key: peerfacet) facet.getValue().inc(key, peerfacet.get(key));
|
||||
}
|
||||
for (Map.Entry<String, ReversibleScoreMap<String>> peerfacet: peer.entrySet()) {
|
||||
facets.put(peerfacet.getKey(), peerfacet.getValue());
|
||||
}
|
||||
}
|
||||
return acc;
|
||||
return facets;
|
||||
}
|
||||
|
||||
|
||||
public long[] getSizeList() {
|
||||
final long[] size = new long[this.connectors.size()];
|
||||
int i = 0;
|
||||
|
@ -23,6 +23,7 @@ package net.yacy.cora.federate.solr.connector;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
|
||||
import net.yacy.cora.sorting.ReversibleScoreMap;
|
||||
@ -134,13 +135,14 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
|
||||
public long getQueryCount(final String querystring) throws IOException;
|
||||
|
||||
/**
|
||||
* get a facet of the index: a list of values that are most common in a specific field
|
||||
* @param field the field which is selected for the facet
|
||||
* @param maxresults the maximum size of the resulting map
|
||||
* @return an ordered map of fields
|
||||
* get facets of the index: a list of lists with values that are most common in a specific field
|
||||
* @param query a query which is performed to get the facets
|
||||
* @param fields the field names which are selected as facet
|
||||
* @param maxresults the maximum size of the resulting maps
|
||||
* @return a map with key = facet field name, value = an ordered map of field values for that field
|
||||
* @throws IOException
|
||||
*/
|
||||
public ReversibleScoreMap<String> getFacet(String field, int maxresults) throws IOException;
|
||||
public Map<String, ReversibleScoreMap<String>> getFacets(String query, String[] fields, int maxresults) throws IOException;
|
||||
|
||||
/**
|
||||
* Get a query result from solr as a stream of documents.
|
||||
|
@ -24,7 +24,9 @@ import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.cora.sorting.ClusteredScoreMap;
|
||||
@ -261,34 +263,38 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
|
||||
final SolrDocumentList docs = rsp.getResults();
|
||||
return docs.getNumFound();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* get a facet of the index: a list of values that are most common in a specific field
|
||||
* @param field the field which is selected for the facet
|
||||
* @param maxresults the maximum size of the resulting map
|
||||
* @return an ordered map of fields
|
||||
* get facets of the index: a list of lists with values that are most common in a specific field
|
||||
* @param query a query which is performed to get the facets
|
||||
* @param fields the field names which are selected as facet
|
||||
* @param maxresults the maximum size of the resulting maps
|
||||
* @return a map with key = facet field name, value = an ordered map of field values for that field
|
||||
* @throws IOException
|
||||
*/
|
||||
@Override
|
||||
public ReversibleScoreMap<String> getFacet(String field, int maxresults) throws IOException {
|
||||
public Map<String, ReversibleScoreMap<String>> getFacets(String query, String[] fields, int maxresults) throws IOException {
|
||||
// construct query
|
||||
final SolrQuery params = new SolrQuery();
|
||||
params.setQuery("*:*");
|
||||
params.setQuery(query);
|
||||
params.setRows(0);
|
||||
params.setStart(0);
|
||||
params.setFacet(true);
|
||||
params.setFacetLimit(maxresults);
|
||||
params.setFacetSort(FacetParams.FACET_SORT_COUNT);
|
||||
params.addFacetField(field);
|
||||
for (String field: fields) params.addFacetField(field);
|
||||
|
||||
// query the server
|
||||
QueryResponse rsp = query(params);
|
||||
FacetField facet = rsp.getFacetField(field);
|
||||
ReversibleScoreMap<String> result = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
|
||||
List<Count> values = facet.getValues();
|
||||
if (values == null) return result;
|
||||
for (Count ff: values) result.set(ff.getName(), (int) ff.getCount());
|
||||
return result;
|
||||
Map<String, ReversibleScoreMap<String>> facets = new HashMap<String, ReversibleScoreMap<String>>(fields.length);
|
||||
for (String field: fields) {
|
||||
FacetField facet = rsp.getFacetField(field);
|
||||
ReversibleScoreMap<String> result = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
|
||||
List<Count> values = facet.getValues();
|
||||
if (values == null) continue;
|
||||
for (Count ff: values) result.set(ff.getName(), (int) ff.getCount());
|
||||
facets.put(field, result);
|
||||
}
|
||||
return facets;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -37,7 +37,6 @@ import java.util.concurrent.LinkedBlockingQueue;
|
||||
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
|
||||
import net.yacy.cora.document.ASCII;
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.cora.federate.solr.connector.SolrConnector;
|
||||
import net.yacy.cora.order.Base64Order;
|
||||
@ -347,44 +346,5 @@ public class ZURL implements Iterable<ZURL.Entry> {
|
||||
|
||||
}
|
||||
|
||||
private class kiter implements Iterator<Entry> {
|
||||
// enumerates entry elements
|
||||
private final Iterator<Row.Entry> i;
|
||||
private boolean error = false;
|
||||
|
||||
private kiter(final boolean up, final String firstHash) throws IOException {
|
||||
this.i = ZURL.this.urlIndex.rows(up, (firstHash == null) ? null : ASCII.getBytes(firstHash));
|
||||
this.error = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
if (this.error) return false;
|
||||
return this.i.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Entry next() throws RuntimeException {
|
||||
final Row.Entry e = this.i.next();
|
||||
if (e == null) return null;
|
||||
try {
|
||||
return new Entry(e);
|
||||
} catch (final IOException ex) {
|
||||
throw new RuntimeException("error '" + ex.getMessage() + "' for hash " + e.getPrimaryKeyASCII());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
this.i.remove();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public Iterator<Entry> entries(final boolean up, final String firstHash) throws IOException {
|
||||
// enumerates entry elements
|
||||
return new kiter(up, firstHash);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -350,6 +350,7 @@ public final class Fulltext implements Iterable<byte[]> {
|
||||
for (byte[] urlHash: deleteIDs) {
|
||||
Fulltext.this.solr.delete(ASCII.String(urlHash));
|
||||
}
|
||||
Fulltext.this.solr.commit();
|
||||
}
|
||||
} catch (final Throwable e) {
|
||||
Log.logException(e);
|
||||
|
Reference in New Issue
Block a user