mirror of
https://github.com/yacy/yacy_search_server.git
synced 2025-07-21 09:04:37 -04:00
Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git
Conflicts: source/net/yacy/migration.java
This commit is contained in:
source/net/yacy
@ -24,7 +24,10 @@ package net.yacy;
|
||||
import net.yacy.search.index.ReindexSolrBusyThread;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import net.yacy.cora.order.Base64Order;
|
||||
import net.yacy.cora.order.Digest;
|
||||
@ -34,7 +37,6 @@ import net.yacy.search.Switchboard;
|
||||
import net.yacy.search.SwitchboardConstants;
|
||||
|
||||
import com.google.common.io.Files;
|
||||
import java.util.Iterator;
|
||||
import net.yacy.cora.storage.Configuration.Entry;
|
||||
import net.yacy.kelondro.data.meta.URIMetadataRow;
|
||||
import net.yacy.kelondro.index.Index;
|
||||
@ -42,6 +44,10 @@ import net.yacy.kelondro.index.Row;
|
||||
import net.yacy.kelondro.workflow.BusyThread;
|
||||
import net.yacy.search.index.Fulltext;
|
||||
import net.yacy.search.schema.CollectionConfiguration;
|
||||
import net.yacy.search.schema.CollectionSchema;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.request.LukeRequest;
|
||||
import org.apache.solr.client.solrj.response.LukeResponse;
|
||||
|
||||
public class migration {
|
||||
//SVN constants
|
||||
@ -354,34 +360,73 @@ public class migration {
|
||||
// a reindex job is already running
|
||||
if (bt != null) {
|
||||
return bt.getJobCount();
|
||||
}
|
||||
|
||||
ReindexSolrBusyThread reidx = new ReindexSolrBusyThread(null); // ("*:*" would reindex all)
|
||||
|
||||
// add all disabled fields
|
||||
CollectionConfiguration colcfg = Switchboard.getSwitchboard().index.fulltext().getDefaultConfiguration();
|
||||
Iterator<Entry> itcol = colcfg.entryIterator();
|
||||
while (itcol.hasNext()) {
|
||||
Entry etr = itcol.next();
|
||||
if (!etr.enabled()) {
|
||||
reidx.addSelectFieldname(etr.key());
|
||||
}
|
||||
}
|
||||
|
||||
// add obsolete fields (not longer part of main index)
|
||||
reidx.addSelectFieldname("inboundlinks_tag_txt");
|
||||
reidx.addSelectFieldname("inboundlinks_relflags_val");
|
||||
reidx.addSelectFieldname("inboundlinks_rel_sxt");
|
||||
reidx.addSelectFieldname("inboundlinks_text_txt");
|
||||
reidx.addSelectFieldname("inboundlinks_alttag_txt");
|
||||
|
||||
reidx.addSelectFieldname("outboundlinks_tag_txt");
|
||||
reidx.addSelectFieldname("outboundlinks_relflags_val");
|
||||
reidx.addSelectFieldname("outboundlinks_rel_sxt");
|
||||
reidx.addSelectFieldname("outboundlinks_text_txt");
|
||||
reidx.addSelectFieldname("outboundlinks_alttag_txt");
|
||||
|
||||
sb.deployThread("reindexSolr", "Reindex Solr", "reindex documents with obsolete fields in embedded Solr index", "/IndexReIndexMonitor_p.html",reidx /*privateWorkerThread*/, 0);
|
||||
boolean lukeCheckok = false;
|
||||
Set<String> omitFields = new HashSet<String>(3);
|
||||
omitFields.add(CollectionSchema.author_sxt.getSolrFieldName()); // special fields to exclude from disabled check
|
||||
omitFields.add(CollectionSchema.coordinate_p_0_coordinate.getSolrFieldName());
|
||||
omitFields.add(CollectionSchema.coordinate_p_1_coordinate.getSolrFieldName());
|
||||
CollectionConfiguration colcfg = Switchboard.getSwitchboard().index.fulltext().getDefaultConfiguration();
|
||||
ReindexSolrBusyThread reidx = new ReindexSolrBusyThread(null); // ("*:*" would reindex all);
|
||||
|
||||
try { // get all fields contained in index
|
||||
LukeRequest lukeRequest = new LukeRequest();
|
||||
lukeRequest.setNumTerms(1);
|
||||
LukeResponse lukeResponse = lukeRequest.process(Switchboard.getSwitchboard().index.fulltext().getDefaultEmbeddedConnector().getServer());
|
||||
|
||||
for (LukeResponse.FieldInfo solrfield : lukeResponse.getFieldInfo().values()) {
|
||||
if (!colcfg.contains(solrfield.getName()) && !omitFields.contains(solrfield.getName())) { // add found fields not in config for reindexing
|
||||
reidx.addSelectFieldname(solrfield.getName());
|
||||
}
|
||||
}
|
||||
lukeCheckok = true;
|
||||
} catch (SolrServerException ex) {
|
||||
Log.logException(ex);
|
||||
} catch (IOException ex) {
|
||||
Log.logException(ex);
|
||||
}
|
||||
|
||||
if (!lukeCheckok) { // if luke failed alternatively use config and manual list
|
||||
// add all disabled fields
|
||||
Iterator<Entry> itcol = colcfg.entryIterator();
|
||||
while (itcol.hasNext()) { // check for disabled fields in config
|
||||
Entry etr = itcol.next();
|
||||
if (!etr.enabled() && !omitFields.contains(etr.key())) {
|
||||
reidx.addSelectFieldname(etr.key());
|
||||
}
|
||||
}
|
||||
|
||||
// add obsolete fields (not longer part of main index)
|
||||
reidx.addSelectFieldname("author_s");
|
||||
reidx.addSelectFieldname("css_tag_txt");
|
||||
reidx.addSelectFieldname("css_url_txt");
|
||||
reidx.addSelectFieldname("scripts_txt");
|
||||
reidx.addSelectFieldname("images_tag_txt");
|
||||
reidx.addSelectFieldname("images_urlstub_txt");
|
||||
reidx.addSelectFieldname("canonical_t");
|
||||
reidx.addSelectFieldname("frames_txt");
|
||||
reidx.addSelectFieldname("iframes_txt");
|
||||
|
||||
reidx.addSelectFieldname("inboundlinks_tag_txt");
|
||||
reidx.addSelectFieldname("inboundlinks_relflags_val");
|
||||
reidx.addSelectFieldname("inboundlinks_name_txt");
|
||||
reidx.addSelectFieldname("inboundlinks_rel_sxt");
|
||||
reidx.addSelectFieldname("inboundlinks_text_txt");
|
||||
reidx.addSelectFieldname("inboundlinks_text_chars_val");
|
||||
reidx.addSelectFieldname("inboundlinks_text_words_val");
|
||||
reidx.addSelectFieldname("inboundlinks_alttag_txt");
|
||||
|
||||
reidx.addSelectFieldname("outboundlinks_tag_txt");
|
||||
reidx.addSelectFieldname("outboundlinks_relflags_val");
|
||||
reidx.addSelectFieldname("outboundlinks_name_txt");
|
||||
reidx.addSelectFieldname("outboundlinks_rel_sxt");
|
||||
reidx.addSelectFieldname("outboundlinks_text_txt");
|
||||
reidx.addSelectFieldname("outboundlinks_text_chars_val");
|
||||
reidx.addSelectFieldname("outboundlinks_text_words_val");
|
||||
reidx.addSelectFieldname("outboundlinks_alttag_txt");
|
||||
}
|
||||
sb.deployThread("reindexSolr", "Reindex Solr", "reindex documents with obsolete fields in embedded Solr index", "/IndexReIndexMonitor_p.html",reidx , 0);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
@ -24,9 +24,15 @@ import net.yacy.kelondro.logging.Log;
|
||||
import net.yacy.search.Switchboard;
|
||||
import java.util.ArrayList;
|
||||
import java.util.concurrent.Semaphore;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector;
|
||||
import net.yacy.kelondro.workflow.AbstractBusyThread;
|
||||
import net.yacy.search.schema.CollectionConfiguration;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.request.LukeRequest;
|
||||
import org.apache.solr.client.solrj.response.LukeResponse;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
@ -72,7 +78,7 @@ import org.apache.solr.common.SolrInputDocument;
|
||||
}
|
||||
setName("reindexSolr");
|
||||
this.setPriority(Thread.MIN_PRIORITY);
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
@ -126,14 +132,13 @@ import org.apache.solr.common.SolrInputDocument;
|
||||
} else {
|
||||
Log.logInfo("MIGRATION-REINDEX", "reindex docs with query=" + query + " found=" + docstoreindex + " start=" + start);
|
||||
start = start + chunksize;
|
||||
}
|
||||
|
||||
for (SolrDocument doc : xdocs) {
|
||||
SolrInputDocument idoc = colcfg.toSolrInputDocument(doc);
|
||||
Switchboard.getSwitchboard().index.fulltext().putDocument(idoc);
|
||||
processed++;
|
||||
}
|
||||
|
||||
|
||||
for (SolrDocument doc : xdocs) {
|
||||
SolrInputDocument idoc = colcfg.toSolrInputDocument(doc);
|
||||
Switchboard.getSwitchboard().index.fulltext().putDocument(idoc);
|
||||
processed++;
|
||||
}
|
||||
}
|
||||
} catch (IOException ex) {
|
||||
Log.logException(ex);
|
||||
} finally {
|
||||
@ -152,11 +157,15 @@ import org.apache.solr.common.SolrInputDocument;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void terminate(final boolean waitFor) {
|
||||
querylist.clear();
|
||||
super.terminate(waitFor);
|
||||
}
|
||||
@Override
|
||||
public void terminate(final boolean waitFor) {
|
||||
querylist.clear();
|
||||
// if interrupted without finished commit to reflect latest changes
|
||||
if (docstoreindex > 0 && processed > 0) {
|
||||
esc.commit(true);
|
||||
}
|
||||
super.terminate(waitFor);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return total number of processed documents
|
||||
@ -168,7 +177,7 @@ import org.apache.solr.common.SolrInputDocument;
|
||||
/**
|
||||
* @return the currently processed Solr select query
|
||||
*/
|
||||
public String getCurrentQuery() {
|
||||
public String getCurrentQuery() {
|
||||
return querylist.isEmpty() ? "" : querylist.get(0);
|
||||
}
|
||||
|
||||
@ -186,6 +195,8 @@ import org.apache.solr.common.SolrInputDocument;
|
||||
if (chunksize > 2) {
|
||||
this.chunksize = this.chunksize / 2;
|
||||
}
|
||||
esc.commit(true);
|
||||
start = 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
Reference in New Issue
Block a user