mirror of
https://github.com/yacy/yacy_search_server.git
synced 2025-07-18 08:36:07 -04:00
- changed solr commit call and added an optimize option. Since Solr
4.0.0 there is a new softcommit feature which implements a near-real-time (NRT) search option. The softcommit does not do IO and does not cause performance issues. YaCy has now an extension in its solr connectors to use the softcommit feature. The softcommit call now replaces all places where a hard commit was used. Furthermore the commit strategy in when doing a search from the web interface was changed (it's done every time before a search is done). The softcommit feature was implemented because it was needed for the following changes (customer demands), which is also included in this git commit: - added a feature to identify all documents which have unique titles and/or unique descriptions. These unique flags are disabled by default. - added also a feature to set a flag when the url from a canonical tag is equal to the document url. This is also disabled by default. To support the new softcommit strategy, the commitWithinMs option was set to -1 do disable automatic commit based on document insert times. If documents are inserted permanently then also a commit would happen permanently whenever the commitWithinMs time is reached. This would conflict with the regular autocommit of 10 minutes and the new softcommit strategy.
This commit is contained in:
@ -1056,7 +1056,7 @@ color_searchurlhover = #008000
|
||||
# - to check whats in solr after indexing, open http://localhost:8983/solr/admin/
|
||||
federated.service.solr.indexing.enabled = false
|
||||
federated.service.solr.indexing.url = http://127.0.0.1:8983/solr
|
||||
federated.service.solr.indexing.commitWithinMs = 180000
|
||||
federated.service.solr.indexing.commitWithinMs = -1
|
||||
federated.service.solr.indexing.sharding = MODULO_HOST_MD5
|
||||
federated.service.solr.indexing.schemefile = solr.keys.default.list
|
||||
# the lazy attribute causes that fields containing "" or 0 are not added and not written
|
||||
|
@ -371,7 +371,7 @@ public class Crawler_p {
|
||||
try {
|
||||
sb.crawlQueues.errorURL.removeHost(ASCII.getBytes(hosthash));
|
||||
sb.index.fulltext().getSolr().deleteByQuery(YaCySchema.host_id_s.getSolrFieldName() + ":\"" + hosthash + "\" AND " + YaCySchema.failreason_t.getSolrFieldName() + ":[* TO *]");
|
||||
sb.index.fulltext().commit();
|
||||
sb.index.fulltext().commit(true);
|
||||
} catch (IOException e) {Log.logException(e);}
|
||||
}
|
||||
|
||||
|
@ -96,7 +96,7 @@ public class HostBrowser {
|
||||
}
|
||||
|
||||
String path = post == null ? "" : post.get("path", "").trim();
|
||||
if (admin && path.length() == 0) sb.index.fulltext().commit();
|
||||
sb.index.fulltext().commit(true);
|
||||
if (post == null || env == null) {
|
||||
return prop;
|
||||
}
|
||||
|
@ -77,7 +77,7 @@ public class IndexFederated_p {
|
||||
final boolean previous_core_fulltext = sb.index.fulltext().connectedLocalSolr() && env.getConfigBool(SwitchboardConstants.CORE_SERVICE_FULLTEXT, false);
|
||||
env.setConfig(SwitchboardConstants.CORE_SERVICE_FULLTEXT, post_core_fulltext);
|
||||
|
||||
final int commitWithinMs = post.getInt("solr.indexing.commitWithinMs", env.getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_COMMITWITHINMS, 180000));
|
||||
final int commitWithinMs = post.getInt("solr.indexing.commitWithinMs", env.getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_COMMITWITHINMS, -1));
|
||||
if (previous_core_fulltext && !post_core_fulltext) {
|
||||
// switch off
|
||||
sb.index.fulltext().disconnectLocalSolr();
|
||||
|
@ -61,10 +61,6 @@ public class index {
|
||||
return prop;
|
||||
}
|
||||
}
|
||||
|
||||
if (authorizedAccess) {
|
||||
sb.index.fulltext().commit(); // call this only as superuser to prevent that this can be misused for DoS
|
||||
}
|
||||
|
||||
boolean global = (post == null) ? true : post.get("resource", "global").equals("global");
|
||||
final boolean focus = (post == null) ? true : post.get("focus", "1").equals("1");
|
||||
|
@ -47,11 +47,6 @@ public class yacyinteractive {
|
||||
prop.put("promoteSearchPageGreeting.homepage", sb.getConfig(SwitchboardConstants.GREETING_HOMEPAGE, ""));
|
||||
prop.put("promoteSearchPageGreeting.smallImage", sb.getConfig(SwitchboardConstants.GREETING_SMALL_IMAGE, ""));
|
||||
|
||||
final boolean admin = sb.verifyAuthentication(header);
|
||||
if (admin) {
|
||||
sb.index.fulltext().commit();
|
||||
}
|
||||
|
||||
final String query = (post == null) ? "" : post.get("query", "");
|
||||
final String startRecord = (post == null) ? "0" : post.get("startRecord", "");
|
||||
final String maximumRecords = (post == null) ? "10" : post.get("maximumRecords", "");
|
||||
|
@ -117,7 +117,6 @@ public class yacysearch {
|
||||
final String originalquerystring = (post == null) ? "" : post.get("query", post.get("search", "")).trim();
|
||||
String querystring = originalquerystring.replace('+', ' ').trim();
|
||||
CacheStrategy snippetFetchStrategy = (post == null) ? null : CacheStrategy.parse(post.get("verify", sb.getConfig("search.verify", "")));
|
||||
if (authenticated && originalquerystring.length() == 0) sb.index.fulltext().commit();
|
||||
|
||||
final servletProperties prop = new servletProperties();
|
||||
prop.put("topmenu", sb.getConfigBool("publicTopmenu", true) ? 1 : 0);
|
||||
|
@ -146,7 +146,7 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
|
||||
|
||||
@Override
|
||||
public synchronized void close() {
|
||||
try {this.commit();} catch (Throwable e) {Log.logException(e);}
|
||||
try {this.commit(false);} catch (Throwable e) {Log.logException(e);}
|
||||
try {super.close();} catch (Throwable e) {Log.logException(e);}
|
||||
try {this.defaultCore.close();} catch (Throwable e) {Log.logException(e);}
|
||||
try {this.cores.shutdown();} catch (Throwable e) {Log.logException(e);}
|
||||
@ -213,7 +213,6 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
|
||||
storage.mkdirs();
|
||||
try {
|
||||
EmbeddedSolrConnector solr = new EmbeddedSolrConnector(storage, solr_config);
|
||||
solr.setCommitWithinMs(100);
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.addField(YaCySchema.id.name(), "ABCD0000abcd");
|
||||
doc.addField(YaCySchema.title.name(), "Lorem ipsum");
|
||||
|
@ -171,9 +171,18 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
|
||||
}
|
||||
|
||||
@Override
|
||||
public void commit() {
|
||||
if (this.solr0 != null) this.solr0.commit();
|
||||
if (this.solr1 != null) this.solr1.commit();
|
||||
public void commit(boolean softCommit) {
|
||||
if (this.solr0 != null) this.solr0.commit(softCommit);
|
||||
if (this.solr1 != null) this.solr1.commit(softCommit);
|
||||
}
|
||||
|
||||
/**
|
||||
* force an explicit merge of segments
|
||||
* @param maxSegments the maximum number of segments. Set to 1 for maximum optimization
|
||||
*/
|
||||
public void optimize(int maxSegments) {
|
||||
if (this.solr0 != null) this.solr0.optimize(maxSegments);
|
||||
if (this.solr1 != null) this.solr1.optimize(maxSegments);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -320,7 +329,7 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
|
||||
// check if there is a autocommit problem
|
||||
if (c.hitCache.containsKey(key)) {
|
||||
// the document should be there, therefore make a commit and check again
|
||||
this.commit();
|
||||
this.commit(true);
|
||||
if ((solr0 != null && ((doc = solr0.getById(key, fields)) != null)) || (solr1 != null && ((doc = solr1.getById(key, fields)) != null))) {
|
||||
addToCache(doc, fields.length == 0);
|
||||
return doc;
|
||||
|
@ -47,7 +47,7 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr
|
||||
this.solr = new RemoteSolrConnector(url);
|
||||
this.queue = new ArrayBlockingQueue<SolrInputDocument>(1000);
|
||||
this.worker = new AddWorker[connections];
|
||||
this.commitWithinMs = 180000;
|
||||
this.commitWithinMs = -1;
|
||||
for (int i = 0; i < connections; i++) {
|
||||
this.worker[i] = new AddWorker(url);
|
||||
this.worker[i].start();
|
||||
@ -58,7 +58,7 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr
|
||||
private final SolrConnector solr;
|
||||
public AddWorker(final String url) throws IOException {
|
||||
this.solr = new RemoteSolrConnector(url);
|
||||
this.solr.setCommitWithinMs(MultipleSolrConnector.this.commitWithinMs);
|
||||
if (MultipleSolrConnector.this.commitWithinMs >= 0 ) this.solr.setCommitWithinMs(MultipleSolrConnector.this.commitWithinMs);
|
||||
}
|
||||
@Override
|
||||
public void run() {
|
||||
@ -97,8 +97,16 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr
|
||||
}
|
||||
|
||||
@Override
|
||||
public void commit() {
|
||||
this.solr.commit();
|
||||
public void commit(boolean softCommit) {
|
||||
this.solr.commit(softCommit);
|
||||
}
|
||||
|
||||
/**
|
||||
* force an explicit merge of segments
|
||||
* @param maxSegments the maximum number of segments. Set to 1 for maximum optimization
|
||||
*/
|
||||
public void optimize(int maxSegments) {
|
||||
this.solr.optimize(maxSegments);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -59,8 +59,16 @@ public class RetrySolrConnector extends AbstractSolrConnector implements SolrCon
|
||||
}
|
||||
|
||||
@Override
|
||||
public void commit() {
|
||||
this.solrConnector.commit();
|
||||
public void commit(boolean softCommit) {
|
||||
this.solrConnector.commit(softCommit);
|
||||
}
|
||||
|
||||
/**
|
||||
* force an explicit merge of segments
|
||||
* @param maxSegments the maximum number of segments. Set to 1 for maximum optimization
|
||||
*/
|
||||
public void optimize(int maxSegments) {
|
||||
this.solrConnector.optimize(maxSegments);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -73,8 +73,16 @@ public class ShardSolrConnector extends AbstractSolrConnector implements SolrCon
|
||||
}
|
||||
|
||||
@Override
|
||||
public void commit() {
|
||||
for (final SolrConnector connector: this.connectors) connector.commit();
|
||||
public void commit(boolean softCommit) {
|
||||
for (final SolrConnector connector: this.connectors) connector.commit(softCommit);
|
||||
}
|
||||
|
||||
/**
|
||||
* force an explicit merge of segments
|
||||
* @param maxSegments the maximum number of segments. Set to 1 for maximum optimization
|
||||
*/
|
||||
public void optimize(int maxSegments) {
|
||||
for (final SolrConnector connector: this.connectors) connector.optimize(maxSegments);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -51,7 +51,13 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
|
||||
/**
|
||||
* force a commit
|
||||
*/
|
||||
public void commit();
|
||||
public void commit(boolean softCommit);
|
||||
|
||||
/**
|
||||
* force an explicit merge of segments
|
||||
* @param maxSegments the maximum number of segments. Set to 1 for maximum optimization
|
||||
*/
|
||||
public void optimize(int maxSegments);
|
||||
|
||||
/**
|
||||
* close the server connection
|
||||
|
@ -91,6 +91,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
|
||||
|
||||
/**
|
||||
* set the solr autocommit delay
|
||||
* when doing continuous inserts, don't set this value because it would cause continuous commits
|
||||
* @param c the maximum waiting time after a solr command until it is transported to the server
|
||||
*/
|
||||
@Override
|
||||
@ -99,9 +100,21 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void commit() {
|
||||
public synchronized void commit(final boolean softCommit) {
|
||||
try {
|
||||
this.server.commit();
|
||||
this.server.commit(true, true, softCommit);
|
||||
} catch (SolrServerException e) {
|
||||
} catch (IOException e) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* force an explicit merge of segments
|
||||
* @param maxSegments the maximum number of segments. Set to 1 for maximum optimization
|
||||
*/
|
||||
public void optimize(int maxSegments) {
|
||||
try {
|
||||
this.server.optimize(true, true, maxSegments);
|
||||
} catch (SolrServerException e) {
|
||||
} catch (IOException e) {
|
||||
}
|
||||
@ -110,7 +123,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
|
||||
@Override
|
||||
public synchronized void close() {
|
||||
try {
|
||||
if (this.server != null) synchronized (this.server) {this.server.commit();}
|
||||
if (this.server != null) synchronized (this.server) {this.server.commit(true, true, false);}
|
||||
this.server = null;
|
||||
} catch (SolrServerException e) {
|
||||
log.warn(e);
|
||||
@ -194,7 +207,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
|
||||
try {
|
||||
synchronized (this.server) {
|
||||
this.server.deleteByQuery("*:*");
|
||||
this.server.commit();
|
||||
this.server.commit(true, true, false);
|
||||
}
|
||||
} catch (final Throwable e) {
|
||||
throw new IOException(e);
|
||||
@ -234,7 +247,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
|
||||
synchronized (this.server) {
|
||||
long c0 = this.getQueryCount(querystring);
|
||||
this.server.deleteByQuery(querystring, this.commitWithinMs);
|
||||
this.commit();
|
||||
this.commit(true);
|
||||
long c1 = this.getQueryCount(querystring);
|
||||
return (int) (c1 - c0);
|
||||
}
|
||||
@ -254,7 +267,6 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
|
||||
try {
|
||||
synchronized (this.server) {
|
||||
this.server.request(up);
|
||||
//this.server.commit();
|
||||
}
|
||||
} catch (final Throwable e) {
|
||||
throw new IOException(e);
|
||||
@ -273,7 +285,6 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
|
||||
// catches "version conflict for": try this again and delete the document in advance
|
||||
try {
|
||||
this.server.deleteById((String) solrdoc.getFieldValue(YaCySchema.id.getSolrFieldName()));
|
||||
//this.server.commit();
|
||||
} catch (SolrServerException e1) {}
|
||||
try {
|
||||
synchronized (this.server) {
|
||||
|
@ -319,7 +319,7 @@ public class migration {
|
||||
}
|
||||
Log.logInfo("migrateUrldbtoSolr", Integer.toString(i) + " entries left (convert next chunk of 1000 entries)");
|
||||
}
|
||||
ft.commit();
|
||||
ft.commit(true);
|
||||
|
||||
} catch (IOException ex) {
|
||||
Log.logInfo("migrateUrldbtoSolr", "error reading old urldb index");
|
||||
|
@ -431,7 +431,7 @@ public final class Switchboard extends serverSwitch {
|
||||
ReferenceContainer.maxReferences = getConfigInt("index.maxReferences", 0);
|
||||
final File segmentsPath = new File(new File(indexPath, networkName), "SEGMENTS");
|
||||
this.index = new Segment(this.log, new File(segmentsPath, "default"), solrScheme);
|
||||
final int connectWithinMs = this.getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_COMMITWITHINMS, 180000);
|
||||
final int connectWithinMs = this.getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_COMMITWITHINMS, -1);
|
||||
if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_RWI, true)) this.index.connectRWI(wordCacheMaxCount, fileSizeMax);
|
||||
if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_CITATION, true)) this.index.connectCitation(wordCacheMaxCount, fileSizeMax);
|
||||
if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_FULLTEXT, true)) {
|
||||
@ -1288,7 +1288,7 @@ public final class Switchboard extends serverSwitch {
|
||||
this.useTailCache,
|
||||
this.exceed134217727);
|
||||
this.index = new Segment(this.log, new File(new File(new File(indexPrimaryPath, networkName), "SEGMENTS"), "default"), solrScheme);
|
||||
final int connectWithinMs = this.getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_COMMITWITHINMS, 180000);
|
||||
final int connectWithinMs = this.getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_COMMITWITHINMS, -1);
|
||||
if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_RWI, true)) this.index.connectRWI(wordCacheMaxCount, fileSizeMax);
|
||||
if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_CITATION, true)) this.index.connectCitation(wordCacheMaxCount, fileSizeMax);
|
||||
if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_FULLTEXT, true)) {
|
||||
@ -1306,7 +1306,7 @@ public final class Switchboard extends serverSwitch {
|
||||
solrurls,
|
||||
ShardSelection.Method.MODULO_HOST_MD5,
|
||||
10000, true);
|
||||
solr.setCommitWithinMs(connectWithinMs);
|
||||
if (connectWithinMs >= 0) solr.setCommitWithinMs(connectWithinMs);
|
||||
this.index.fulltext().connectRemoteSolr(solr);
|
||||
} catch ( final IOException e ) {
|
||||
Log.logException(e);
|
||||
@ -2232,7 +2232,7 @@ public final class Switchboard extends serverSwitch {
|
||||
// execute the (post-) processing steps for all entries that have a process tag assigned
|
||||
if (this.crawlQueues.coreCrawlJobSize() == 0 && index.connectedCitation() && index.fulltext().getSolrScheme().contains(YaCySchema.process_sxt)) {
|
||||
// that means we must search for those entries.
|
||||
index.fulltext().getSolr().commit(); // make sure that we have latest information that can be found
|
||||
index.fulltext().getSolr().commit(true); // make sure that we have latest information that can be found
|
||||
BlockingQueue<SolrDocument> docs = index.fulltext().getSolr().concurrentQuery(YaCySchema.process_sxt.getSolrFieldName() + ":[* TO *]", 0, 1000, 60000, 10);
|
||||
SolrDocument doc;
|
||||
int proccount_clickdepth = 0;
|
||||
|
@ -70,7 +70,7 @@ public class DocumentIndex extends Segment {
|
||||
false, // useTailCache
|
||||
false // exceed134217727
|
||||
);
|
||||
super.fulltext().connectLocalSolr(1000);
|
||||
super.fulltext().connectLocalSolr(-1);
|
||||
final int cores = Runtime.getRuntime().availableProcessors() + 1;
|
||||
this.callback = callback;
|
||||
this.queue = new LinkedBlockingQueue<DigestURI>(cores * 300);
|
||||
|
@ -77,8 +77,6 @@ public final class Fulltext implements Iterable<byte[]> {
|
||||
private static final String SOLR_PATH = "solr_40"; // the number should be identical to the number in the property luceneMatchVersion in solrconfig.xml
|
||||
private static final String SOLR_OLD_PATH[] = new String[]{"solr_36"};
|
||||
|
||||
private static final long forcedCommitTimeout = 3000; // wait this time until a next forced commit is executed
|
||||
|
||||
// class objects
|
||||
private final File location;
|
||||
private Index urlIndexFile;
|
||||
@ -87,7 +85,6 @@ public final class Fulltext implements Iterable<byte[]> {
|
||||
private ArrayList<HostStat> statsDump;
|
||||
private final MirrorSolrConnector solr;
|
||||
private final SolrConfiguration solrScheme;
|
||||
private long forcedCommitTime;
|
||||
|
||||
protected Fulltext(final File path, final SolrConfiguration solrScheme) {
|
||||
this.location = path;
|
||||
@ -97,7 +94,6 @@ public final class Fulltext implements Iterable<byte[]> {
|
||||
this.statsDump = null;
|
||||
this.solr = new MirrorSolrConnector(10000, 10000, 100);
|
||||
this.solrScheme = solrScheme;
|
||||
this.forcedCommitTime = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -158,7 +154,7 @@ public final class Fulltext implements Iterable<byte[]> {
|
||||
if (oldLocation.exists()) oldLocation.renameTo(solrLocation);
|
||||
}
|
||||
EmbeddedSolrConnector esc = new EmbeddedSolrConnector(solrLocation, new File(new File(Switchboard.getSwitchboard().appPath, "defaults"), "solr"));
|
||||
esc.setCommitWithinMs(commitWithin);
|
||||
if (commitWithin >= 0) esc.setCommitWithinMs(commitWithin);
|
||||
Version luceneVersion = esc.getConfig().getLuceneVersion("luceneMatchVersion");
|
||||
String lvn = luceneVersion.name();
|
||||
int p = lvn.indexOf('_');
|
||||
@ -239,11 +235,8 @@ public final class Fulltext implements Iterable<byte[]> {
|
||||
return this.solr.getCommitWithinMs();
|
||||
}
|
||||
|
||||
public void commit() {
|
||||
if (this.forcedCommitTime + forcedCommitTimeout > System.currentTimeMillis()) return;
|
||||
this.forcedCommitTime = Long.MAX_VALUE - forcedCommitTimeout; // set the time high to prevent that other processes get to this point meanwhile
|
||||
this.solr.commit();
|
||||
this.forcedCommitTime = System.currentTimeMillis(); // set the exact time
|
||||
public void commit(boolean softCommit) {
|
||||
this.solr.commit(softCommit);
|
||||
}
|
||||
|
||||
public Date getLoadDate(final String urlHash) {
|
||||
@ -378,7 +371,7 @@ public final class Fulltext implements Iterable<byte[]> {
|
||||
synchronized (Fulltext.this.solr) {
|
||||
try {
|
||||
count.addAndGet(Fulltext.this.solr.deleteByQuery(q));
|
||||
if (count.get() > 0) Fulltext.this.solr.commit();
|
||||
if (count.get() > 0) Fulltext.this.solr.commit(true);
|
||||
} catch (IOException e) {}
|
||||
}
|
||||
|
||||
@ -444,7 +437,7 @@ public final class Fulltext implements Iterable<byte[]> {
|
||||
count.incrementAndGet();
|
||||
}
|
||||
}
|
||||
if (count.get() > 0) Fulltext.this.solr.commit();
|
||||
if (count.get() > 0) Fulltext.this.solr.commit(true);
|
||||
} catch (InterruptedException e) {}
|
||||
}
|
||||
};
|
||||
@ -466,7 +459,7 @@ public final class Fulltext implements Iterable<byte[]> {
|
||||
for (byte[] urlHash: deleteIDs) {
|
||||
Fulltext.this.solr.delete(ASCII.String(urlHash));
|
||||
}
|
||||
Fulltext.this.solr.commit();
|
||||
Fulltext.this.solr.commit(true);
|
||||
}
|
||||
} catch (final Throwable e) {
|
||||
Log.logException(e);
|
||||
|
@ -35,6 +35,9 @@ import java.util.Properties;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
|
||||
import org.apache.solr.client.solrj.util.ClientUtils;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
|
||||
import net.yacy.cora.document.ASCII;
|
||||
@ -68,6 +71,7 @@ import net.yacy.kelondro.rwi.ReferenceContainer;
|
||||
import net.yacy.kelondro.rwi.ReferenceFactory;
|
||||
import net.yacy.kelondro.util.Bitfield;
|
||||
import net.yacy.kelondro.util.ISO639;
|
||||
import net.yacy.kelondro.util.MemoryControl;
|
||||
import net.yacy.repository.LoaderDispatcher;
|
||||
import net.yacy.search.Switchboard;
|
||||
import net.yacy.search.SwitchboardConstants;
|
||||
@ -347,6 +351,21 @@ public class Segment {
|
||||
) {
|
||||
final long startTime = System.currentTimeMillis();
|
||||
|
||||
// DO A SOFT/HARD COMMIT IF NEEDED
|
||||
if (MemoryControl.shortStatus()) {
|
||||
// do a 'hard' commit to flush index caches
|
||||
this.fulltext.getSolr().commit(false);
|
||||
} else {
|
||||
if (
|
||||
(this.fulltext.getSolrScheme().contains(YaCySchema.exact_signature_l) && this.fulltext.getSolrScheme().contains(YaCySchema.exact_signature_unique_b)) ||
|
||||
(this.fulltext.getSolrScheme().contains(YaCySchema.fuzzy_signature_l) && this.fulltext.getSolrScheme().contains(YaCySchema.fuzzy_signature_unique_b)) ||
|
||||
this.fulltext.getSolrScheme().contains(YaCySchema.title_unique_b) ||
|
||||
this.fulltext.getSolrScheme().contains(YaCySchema.description_unique_b)
|
||||
) {
|
||||
this.fulltext.getSolr().commit(true); // make sure that we have latest information for the postprocessing steps
|
||||
}
|
||||
}
|
||||
|
||||
// CREATE INDEX
|
||||
|
||||
// load some document metadata
|
||||
@ -368,13 +387,13 @@ public class Segment {
|
||||
for (YaCySchema[] checkfields: new YaCySchema[][]{
|
||||
{YaCySchema.exact_signature_l, YaCySchema.exact_signature_unique_b},
|
||||
{YaCySchema.fuzzy_signature_l, YaCySchema.fuzzy_signature_unique_b}}) {
|
||||
YaCySchema hashfield = checkfields[0];
|
||||
YaCySchema checkfield = checkfields[0];
|
||||
YaCySchema uniquefield = checkfields[1];
|
||||
if (this.fulltext.getSolrScheme().contains(hashfield) && this.fulltext.getSolrScheme().contains(uniquefield)) {
|
||||
if (this.fulltext.getSolrScheme().contains(checkfield) && this.fulltext.getSolrScheme().contains(uniquefield)) {
|
||||
// lookup the document with the same signature
|
||||
long signature = ((Long) solrInputDoc.getField(hashfield.getSolrFieldName()).getValue()).longValue();
|
||||
long signature = ((Long) solrInputDoc.getField(checkfield.getSolrFieldName()).getValue()).longValue();
|
||||
try {
|
||||
if (this.fulltext.getSolr().exists(hashfield.getSolrFieldName(), Long.toString(signature))) {
|
||||
if (this.fulltext.getSolr().exists(checkfield.getSolrFieldName(), Long.toString(signature))) {
|
||||
// change unique attribut in content
|
||||
solrInputDoc.setField(uniquefield.getSolrFieldName(), false);
|
||||
}
|
||||
@ -382,13 +401,44 @@ public class Segment {
|
||||
}
|
||||
}
|
||||
|
||||
// CHECK IF TITLE AND DESCRIPTION IS UNIQUE (this is by default not switched on)
|
||||
uniquecheck: for (YaCySchema[] checkfields: new YaCySchema[][]{
|
||||
{YaCySchema.title, YaCySchema.title_unique_b},
|
||||
{YaCySchema.description, YaCySchema.description_unique_b}}) {
|
||||
YaCySchema checkfield = checkfields[0];
|
||||
YaCySchema uniquefield = checkfields[1];
|
||||
if (this.fulltext.getSolrScheme().contains(checkfield) && this.fulltext.getSolrScheme().contains(uniquefield)) {
|
||||
// lookup in the index for the same title
|
||||
String checkstring = checkfield == YaCySchema.title ? document.dc_title() : document.dc_description();
|
||||
if (checkstring.length() == 0) {
|
||||
solrInputDoc.setField(uniquefield.getSolrFieldName(), false);
|
||||
continue uniquecheck;
|
||||
}
|
||||
checkstring = ClientUtils.escapeQueryChars("\"" + checkstring + "\"");
|
||||
try {
|
||||
if (this.fulltext.getSolr().exists(checkfield.getSolrFieldName(), checkstring)) {
|
||||
// switch unique attribute in new document
|
||||
solrInputDoc.setField(uniquefield.getSolrFieldName(), false);
|
||||
// switch attribute also in all existing documents (which should be exactly only one!)
|
||||
SolrDocumentList docs = this.fulltext.getSolr().query(checkfield.getSolrFieldName() + ":" + checkstring + " AND " + uniquefield.getSolrFieldName() + ":true", 0, 1000, YaCySchema.id.getSolrFieldName());
|
||||
for (SolrDocument doc: docs) {
|
||||
SolrInputDocument sid = ClientUtils.toSolrInputDocument(doc);
|
||||
sid.setField(uniquefield.getSolrFieldName(), false);
|
||||
this.fulltext.getSolr().add(sid);
|
||||
}
|
||||
} else {
|
||||
solrInputDoc.setField(uniquefield.getSolrFieldName(), true);
|
||||
}
|
||||
} catch (IOException e) {}
|
||||
}
|
||||
}
|
||||
|
||||
// ENRICH DOCUMENT WITH RANKING INFORMATION
|
||||
if (this.urlCitationIndex != null && this.fulltext.getSolrScheme().contains(YaCySchema.references_i)) {
|
||||
int references = this.urlCitationIndex.count(url.hash());
|
||||
if (references > 0) solrInputDoc.setField(YaCySchema.references_i.getSolrFieldName(), references);
|
||||
}
|
||||
|
||||
|
||||
// STORE TO SOLR
|
||||
String error = null;
|
||||
tryloop: for (int i = 0; i < 20; i++) {
|
||||
@ -399,7 +449,7 @@ public class Segment {
|
||||
} catch ( final IOException e ) {
|
||||
error = "failed to send " + urlNormalform + " to solr";
|
||||
Log.logWarning("SOLR", error + e.getMessage());
|
||||
if (i == 10) this.fulltext.commit();
|
||||
if (i == 10) this.fulltext.commit(false);
|
||||
try {Thread.sleep(1000);} catch (InterruptedException e1) {}
|
||||
continue tryloop;
|
||||
}
|
||||
|
@ -645,6 +645,10 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
|
||||
inboundLinks.remove(canonical);
|
||||
outboundLinks.remove(canonical);
|
||||
add(doc, YaCySchema.canonical_t, canonical.toNormalform(false));
|
||||
// set a flag if this is equal to sku
|
||||
if (contains(YaCySchema.canonical_equal_sku_b) && canonical.equals(docurl)) {
|
||||
add(doc, YaCySchema.canonical_equal_sku_b, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -210,6 +210,9 @@ public final class SearchEvent {
|
||||
.getFlagAcceptRemoteIndex()));
|
||||
final long start = System.currentTimeMillis();
|
||||
|
||||
// do a soft commit for fresh results
|
||||
query.getSegment().fulltext().commit(true);
|
||||
|
||||
// prepare a local RWI search
|
||||
// initialize a ranking process that is the target for data
|
||||
// that is generated concurrently from local and global search threads
|
||||
|
Reference in New Issue
Block a user