mirror of
https://github.com/yacy/yacy_search_server.git
synced 2025-07-22 09:14:38 -04:00
- removed some debugging code from search process - should speed up now
- added some profiling code to search event - more time details in PerformanceSearch_p.html git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4594 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
@ -143,7 +143,7 @@ public class ConfigNetwork_p {
|
||||
try {
|
||||
RTCbusySleep = Integer.parseInt(env.getConfig(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, "100"));
|
||||
} catch (NumberFormatException e) {}
|
||||
int RTCppm = (int) (60000L / RTCbusySleep);
|
||||
int RTCppm = (int) (60000L / (RTCbusySleep + 1));
|
||||
prop.put("acceptCrawlLimit", RTCppm);
|
||||
|
||||
boolean indexDistribute = sb.getConfig(plasmaSwitchboard.INDEX_DIST_ALLOW, "true").equals("true");
|
||||
|
@ -17,7 +17,8 @@
|
||||
<td>Query</td>
|
||||
<td>Event</td>
|
||||
<td>Time</td>
|
||||
<td>Duration (milliseconds)</td>
|
||||
<td>Delta (ms)</td>
|
||||
<td>Duration (ms)</td>
|
||||
<td>Result-Count</td>
|
||||
</tr>
|
||||
#{table}#
|
||||
@ -25,6 +26,7 @@
|
||||
<td>#[query]#</td>
|
||||
<td>#[event]#</td>
|
||||
<td>#[time]#</td>
|
||||
<td>#[delta]#</td>
|
||||
<td>#[duration]#</td>
|
||||
<td>#[count]#</td>
|
||||
</tr>
|
||||
|
@ -43,15 +43,18 @@ public class PerformanceSearch_p {
|
||||
int c = 0;
|
||||
serverProfiling.Event event;
|
||||
plasmaProfiling.searchEvent search;
|
||||
long lastt = 0;
|
||||
while (events.hasNext()) {
|
||||
event = events.next();
|
||||
search = (plasmaProfiling.searchEvent) event.payload;
|
||||
prop.put("table_" + c + "_query", search.queryID);
|
||||
prop.put("table_" + c + "_event", search.processName);
|
||||
prop.putNum("table_" + c + "_count", search.resultCount);
|
||||
prop.putNum("table_" + c + "_delta", event.time - lastt);
|
||||
prop.put("table_" + c + "_time", (new Date(event.time)).toString());
|
||||
prop.putNum("table_" + c + "_duration", search.duration);
|
||||
c++;
|
||||
lastt = event.time;
|
||||
}
|
||||
prop.put("table", c);
|
||||
|
||||
|
@ -55,6 +55,7 @@ import de.anomic.kelondro.kelondroBitfield;
|
||||
import de.anomic.kelondro.kelondroMSetTools;
|
||||
import de.anomic.plasma.plasmaCondenser;
|
||||
import de.anomic.plasma.plasmaParserDocument;
|
||||
import de.anomic.plasma.plasmaProfiling;
|
||||
import de.anomic.plasma.plasmaSearchEvent;
|
||||
import de.anomic.plasma.plasmaSearchQuery;
|
||||
import de.anomic.plasma.plasmaSearchRankingProfile;
|
||||
@ -62,6 +63,7 @@ import de.anomic.plasma.plasmaSnippetCache;
|
||||
import de.anomic.plasma.plasmaSwitchboard;
|
||||
import de.anomic.server.serverCore;
|
||||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverProfiling;
|
||||
import de.anomic.server.serverSwitch;
|
||||
import de.anomic.server.logging.serverLog;
|
||||
import de.anomic.tools.yFormatter;
|
||||
@ -259,7 +261,7 @@ public class yacysearch {
|
||||
true,
|
||||
yacyURL.TLD_any_zone_filter,
|
||||
client);
|
||||
|
||||
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(theQuery.id(true), plasmaSearchEvent.INITIALIZATION, 0, 0));
|
||||
|
||||
// tell all threads to do nothing for a specific time
|
||||
sb.intermissionAllThreads(10000);
|
||||
|
@ -35,12 +35,14 @@ import java.util.TreeSet;
|
||||
import de.anomic.http.httpHeader;
|
||||
import de.anomic.kelondro.kelondroMSetTools;
|
||||
import de.anomic.kelondro.kelondroNaturalOrder;
|
||||
import de.anomic.plasma.plasmaProfiling;
|
||||
import de.anomic.plasma.plasmaSearchEvent;
|
||||
import de.anomic.plasma.plasmaSearchQuery;
|
||||
import de.anomic.plasma.plasmaSearchRankingProcess;
|
||||
import de.anomic.plasma.plasmaSnippetCache;
|
||||
import de.anomic.plasma.plasmaSwitchboard;
|
||||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverProfiling;
|
||||
import de.anomic.server.serverSwitch;
|
||||
import de.anomic.tools.crypt;
|
||||
import de.anomic.tools.nxTools;
|
||||
@ -83,7 +85,7 @@ public class yacysearchitem {
|
||||
return prop;
|
||||
}
|
||||
plasmaSearchQuery theQuery = theSearch.getQuery();
|
||||
|
||||
|
||||
// dynamically update count values
|
||||
if (!rss) {
|
||||
int offset = theQuery.neededResults() - theQuery.displayResults() + 1;
|
||||
@ -163,6 +165,7 @@ public class yacysearchitem {
|
||||
prop.put("references", "1");
|
||||
}
|
||||
}
|
||||
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(theQuery.id(true), plasmaSearchEvent.FINALIZATION + "-" + "bottomline", 0, 0));
|
||||
|
||||
return prop;
|
||||
}
|
||||
@ -223,6 +226,8 @@ public class yacysearchitem {
|
||||
(((wordURL = yacyURL.probablyWordURL(result.hash(), query[0])) != null) ? ", probablyWordURL=" + wordURL.toNormalform(false, true) : ""));
|
||||
plasmaSnippetCache.TextSnippet snippet = result.textSnippet();
|
||||
prop.put("content_snippet", (snippet == null) ? "(snippet not found)" : snippet.getLineMarked(theQuery.queryHashes));
|
||||
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(theQuery.id(true), plasmaSearchEvent.FINALIZATION + "-" + item, 0, 0));
|
||||
|
||||
return prop;
|
||||
}
|
||||
|
||||
|
@ -55,11 +55,13 @@ import de.anomic.yacy.yacyURL;
|
||||
|
||||
public final class plasmaSearchEvent {
|
||||
|
||||
public static final String INITIALIZATION = "initialization";
|
||||
public static final String COLLECTION = "collection";
|
||||
public static final String JOIN = "join";
|
||||
public static final String PRESORT = "presort";
|
||||
public static final String URLFETCH = "urlfetch";
|
||||
public static final String NORMALIZING = "normalizing";
|
||||
public static final String FINALIZATION = "finalization";
|
||||
|
||||
public static int workerThreadCount = 10;
|
||||
public static String lastEventID = "";
|
||||
@ -198,6 +200,7 @@ public final class plasmaSearchEvent {
|
||||
this.workerThreads[i] = new resultWorker(i, 10000);
|
||||
this.workerThreads[i].start();
|
||||
}
|
||||
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), "online snippet fetch threads started", 0, 0));
|
||||
} else {
|
||||
// prepare result vector directly without worker threads
|
||||
long timer = System.currentTimeMillis();
|
||||
@ -229,6 +232,7 @@ public final class plasmaSearchEvent {
|
||||
|
||||
// clean up events
|
||||
cleanupEvents(false);
|
||||
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), "event-cleanup", 0, 0));
|
||||
|
||||
// store this search to a cache so it can be re-used
|
||||
lastEvents.put(query.id(false), this);
|
||||
@ -282,6 +286,8 @@ public final class plasmaSearchEvent {
|
||||
|
||||
// load only urls if there was not yet a root url of that hash
|
||||
// find the url entry
|
||||
|
||||
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), "obtain result entry - start", 0, 0));
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
indexURLEntry.Components comp = page.comp();
|
||||
@ -344,6 +350,7 @@ public final class plasmaSearchEvent {
|
||||
plasmaSnippetCache.TextSnippet snippet = plasmaSnippetCache.retrieveTextSnippet(comp, snippetFetchWordHashes, (snippetFetchMode == 2), ((query.constraint != null) && (query.constraint.get(plasmaCondenser.flag_cat_indexof))), 180, 3000, (snippetFetchMode == 2) ? Integer.MAX_VALUE : 100000);
|
||||
long snippetComputationTime = System.currentTimeMillis() - startTime;
|
||||
serverLog.logInfo("SEARCH_EVENT", "text snippet load time for " + comp.url() + ": " + snippetComputationTime + ", " + ((snippet.getErrorCode() < 11) ? "snippet found" : ("no snippet found (" + snippet.getError() + ")")));
|
||||
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), "obtain result entry - finish", 0, 0));
|
||||
|
||||
if (snippet.getErrorCode() < 11) {
|
||||
// we loaded the file and found the snippet
|
||||
|
@ -289,9 +289,9 @@ public final class plasmaSearchQuery {
|
||||
public String id(boolean anonymized) {
|
||||
// generate a string that identifies a search so results can be re-used in a cache
|
||||
if (anonymized) {
|
||||
return anonymizedQueryHashes(this.queryHashes) + "-" + anonymizedQueryHashes(this.excludeHashes) + "*" + this.contentdom + "*" + this.zonecode + "*" + this.ranking.toExternalString();
|
||||
return anonymizedQueryHashes(this.queryHashes) + "-" + anonymizedQueryHashes(this.excludeHashes) + "*" + this.contentdom + "*" + this.zonecode + "*" + plasmaCondenser.word2hash(this.ranking.toExternalString());
|
||||
} else {
|
||||
return hashSet2hashString(this.queryHashes) + "-" + hashSet2hashString(this.excludeHashes) + "*" + this.contentdom + "*" + this.zonecode + "*" + this.ranking.toExternalString();
|
||||
return hashSet2hashString(this.queryHashes) + "-" + hashSet2hashString(this.excludeHashes) + "*" + this.contentdom + "*" + this.zonecode + "*" + plasmaCondenser.word2hash(this.ranking.toExternalString());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -184,9 +184,11 @@ public final class plasmaSearchRankingProcess {
|
||||
}
|
||||
|
||||
// count domZones
|
||||
indexURLEntry uentry = wordIndex.loadedURL.load(iEntry.urlHash, iEntry, 0);
|
||||
/*
|
||||
indexURLEntry uentry = wordIndex.loadedURL.load(iEntry.urlHash, iEntry, 0); // this eats up a lot of time!!!
|
||||
yacyURL uurl = (uentry == null) ? null : uentry.comp().url();
|
||||
System.out.println("DEBUG domDomain dom=" + ((uurl == null) ? "null" : uurl.getHost()) + ", zone=" + yacyURL.domDomain(iEntry.urlHash()));
|
||||
*/
|
||||
this.domZones[yacyURL.domDomain(iEntry.urlHash())]++;
|
||||
|
||||
// insert
|
||||
|
Reference in New Issue
Block a user