1
0
mirror of https://github.com/yacy/yacy_search_server.git synced 2025-07-17 08:26:08 -04:00

content of surrogates/out never accessed (remove)

After import the conent is never accessed but may take up a lot of disk space,
also the getLoadedOAIServer (which lists the files in surrogate out) is not used.
Making the surrogate.out obsolete. Removed keeping of xmls after import.
This commit is contained in:
reger
2014-05-04 09:29:07 +02:00
parent d781fcd809
commit 8a7c68e4c7
3 changed files with 8 additions and 72 deletions

@ -22,13 +22,8 @@
package net.yacy.document.importer;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.text.ParseException;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.date.GenericFormatter;
@ -181,43 +176,12 @@ public class OAIPMHImporter extends Thread implements Importer, Comparable<OAIPM
return 0;
}
/**
* get a map for already loaded oai-pmh servers and their latest access date
* @param surrogatesIn
* @param surrogatesOut
* @return a map where the key is the hostID of the servers and the value is the last access date
*/
@SuppressWarnings("unchecked")
public static Map<String, Date> getLoadedOAIServer(File surrogatesIn, File surrogatesOut) {
Map<String, Date> map = getLoadedOAIServer(surrogatesOut);
map.putAll((Map<? extends String, ? extends Date>) getLoadedOAIServer(surrogatesIn).entrySet());
return map;
}
private static Map<String, Date> getLoadedOAIServer(File surrogates) {
HashMap<String, Date> map = new HashMap<String, Date>();
//oaipmh_opus.bsz-bw.de_20091102113118728.xml
for (String s: surrogates.list()) {
if (s.startsWith(filenamePrefix) && s.endsWith(".xml") && s.charAt(s.length() - 22) == filenameSeparationChar) {
try {
Date fd = GenericFormatter.SHORT_MILSEC_FORMATTER.parse(s.substring(s.length() - 21, s.length() - 4));
String hostID = s.substring(7, s.length() - 22);
Date md = map.get(hostID);
if (md == null || fd.after(md)) map.put(hostID, fd);
} catch (final ParseException e) {
ConcurrentLog.logException(e);
}
}
}
return map;
}
public static final char hostReplacementChar = '_';
public static final char filenameSeparationChar = '.';
public static final String filenamePrefix = "oaipmh";
/**
* compute a host id that is also used in the getLoadedOAIServer method for the map key
* compute a host id
* @param source
* @return a string that is a key for the given host
*/

@ -37,17 +37,14 @@
package net.yacy.search;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Reader;
import java.net.MalformedURLException;
import java.security.NoSuchAlgorithmException;
@ -78,7 +75,6 @@ import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
@ -243,7 +239,7 @@ public final class Switchboard extends serverSwitch {
public File networkRoot;
public File queuesRoot;
public File surrogatesInPath;
public File surrogatesOutPath;
//public File surrogatesOutPath;
public Segment index;
public LoaderDispatcher loader;
public CrawlSwitchboard crawler;
@ -675,13 +671,13 @@ public final class Switchboard extends serverSwitch {
SwitchboardConstants.SURROGATES_IN_PATH_DEFAULT);
this.log.info("surrogates.in Path = " + this.surrogatesInPath.getAbsolutePath());
this.surrogatesInPath.mkdirs();
this.surrogatesOutPath =
/* this.surrogatesOutPath =
getDataPath(
SwitchboardConstants.SURROGATES_OUT_PATH,
SwitchboardConstants.SURROGATES_OUT_PATH_DEFAULT);
this.log.info("surrogates.out Path = " + this.surrogatesOutPath.getAbsolutePath());
this.surrogatesOutPath.mkdirs();
*/
// copy opensearch heuristic config (if not exist)
final File osdConfig = new File(getDataPath(), "DATA/SETTINGS/heuristicopensearch.conf");
if (!osdConfig.exists()) {
@ -1833,8 +1829,6 @@ public final class Switchboard extends serverSwitch {
if ( !infile.exists() || !infile.canWrite() || !infile.canRead() ) {
return false;
}
final File outfile = new File(this.surrogatesOutPath, s);
//if (outfile.exists()) return false;
boolean moved = false;
if ( s.endsWith("xml.zip") ) {
// open the zip file with all the xml files in it
@ -1858,7 +1852,7 @@ public final class Switchboard extends serverSwitch {
} catch (final IOException e ) {
ConcurrentLog.logException(e);
} finally {
moved = infile.renameTo(outfile);
moved = infile.delete();
if (zis != null) try {zis.close();} catch (final IOException e) {}
}
return moved;
@ -1874,29 +1868,7 @@ public final class Switchboard extends serverSwitch {
ConcurrentLog.logException(e);
} finally {
if (!shallTerminate()) {
moved = infile.renameTo(outfile);
if ( moved ) {
// check if this file is already compressed, if not, compress now
if ( !outfile.getName().endsWith(".gz") ) {
final String gzname = outfile.getName() + ".gz";
final File gzfile = new File(outfile.getParentFile(), gzname);
try {
final OutputStream os =
new BufferedOutputStream(new GZIPOutputStream(new FileOutputStream(gzfile)));
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(outfile));
FileUtils.copy(bis, os);
os.close();
bis.close();
if ( gzfile.exists() ) {
FileUtils.deletedelete(outfile);
}
} catch (final FileNotFoundException e ) {
ConcurrentLog.logException(e);
} catch (final IOException e ) {
ConcurrentLog.logException(e);
}
}
}
moved = infile.delete();
}
if (is != null) try {is.close();} catch (IOException e) {}
}

@ -387,8 +387,8 @@ public final class SwitchboardConstants {
public static final String SURROGATES_IN_PATH = "surrogates.in";
public static final String SURROGATES_IN_PATH_DEFAULT = "DATA/SURROGATES/in";
public static final String SURROGATES_OUT_PATH = "surrogates.out";
public static final String SURROGATES_OUT_PATH_DEFAULT = "DATA/SURROGATES/out";
//public static final String SURROGATES_OUT_PATH = "surrogates.out";
//public static final String SURROGATES_OUT_PATH_DEFAULT = "DATA/SURROGATES/out";
public static final String DICTIONARY_SOURCE_PATH = "dictionaries";
public static final String DICTIONARY_SOURCE_PATH_DEFAULT = "DATA/DICTIONARIES";