mirror of
https://github.com/yacy/yacy_search_server.git
synced 2025-07-22 09:14:38 -04:00
some enhancements to caching
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2236 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
@ -60,7 +60,7 @@ public class kelondroBytesIntMap extends kelondroRowBufferedSet {
|
||||
if (System.currentTimeMillis() - this.lastTimeWrote > 10000) this.trim();
|
||||
return -1;
|
||||
}
|
||||
kelondroRow.Entry indexentry = remove(key);
|
||||
kelondroRow.Entry indexentry = removeMarked(key);
|
||||
if (indexentry == null) return -1;
|
||||
return (int) indexentry.getColLongB256(1);
|
||||
}
|
||||
|
@ -54,7 +54,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
|
||||
if ((i % 10000) == 0) System.out.print('.');
|
||||
}
|
||||
this.index.setOrdering(kelondroNaturalOrder.naturalOrder, 0);
|
||||
index.sort();
|
||||
index.shape();
|
||||
System.out.println(index.size() + " index entries initialized and sorted");
|
||||
}
|
||||
|
||||
|
@ -60,7 +60,7 @@ public class kelondroIntBytesMap extends kelondroRowBufferedSet {
|
||||
if (System.currentTimeMillis() - this.lastTimeWrote > 10000) this.trim();
|
||||
return null;
|
||||
}
|
||||
kelondroRow.Entry indexentry = super.remove(kelondroNaturalOrder.encodeLong((long) ii, 4));
|
||||
kelondroRow.Entry indexentry = super.removeMarked(kelondroNaturalOrder.encodeLong((long) ii, 4));
|
||||
if (indexentry == null) return null;
|
||||
return indexentry.getColBytes(1);
|
||||
}
|
||||
|
@ -81,7 +81,7 @@ public class kelondroRecords {
|
||||
|
||||
// constants
|
||||
private static final int NUL = Integer.MIN_VALUE; // the meta value for the kelondroRecords' NUL abstraction
|
||||
private static final long memBlock = 500000; // do not fill cache further if the amount of available memory is less that this
|
||||
private static final long memBlock = 50000000; // do not fill cache further if the amount of available memory is less that this
|
||||
public final static boolean useWriteBuffer = false;
|
||||
|
||||
// memory calculation
|
||||
@ -427,9 +427,9 @@ public class kelondroRecords {
|
||||
this.cacheScore = new kelondroMScoreCluster(); // cache control of CP_HIGH caches
|
||||
}
|
||||
this.cacheHeaders = new kelondroIntBytesMap[]{
|
||||
new kelondroIntBytesMap(this.headchunksize, this.cacheSize / 2),
|
||||
new kelondroIntBytesMap(this.headchunksize, 0),
|
||||
new kelondroIntBytesMap(this.headchunksize, 0)
|
||||
new kelondroIntBytesMap(this.headchunksize, 0),
|
||||
new kelondroIntBytesMap(this.headchunksize, this.cacheSize / 2)
|
||||
};
|
||||
this.cacheHeaders[0].setOrdering(kelondroNaturalOrder.naturalOrder, 0);
|
||||
this.cacheHeaders[1].setOrdering(kelondroNaturalOrder.naturalOrder, 0);
|
||||
@ -489,6 +489,20 @@ public class kelondroRecords {
|
||||
};
|
||||
}
|
||||
|
||||
public String cacheNodeStatusString() {
|
||||
return
|
||||
"cacheMaxSize=" + cacheSize +
|
||||
", CP_HIGH=" + cacheHeaders[CP_HIGH].size() +
|
||||
", CP_MEDIUM=" + cacheHeaders[CP_MEDIUM].size() +
|
||||
", CP_LOW=" + cacheHeaders[CP_LOW].size() +
|
||||
", readHit=" + readHit +
|
||||
", readMiss=" + readMiss +
|
||||
", writeUnique=" + writeUnique +
|
||||
", writeDouble=" + writeDouble +
|
||||
", cacheDelete=" + cacheDelete +
|
||||
", cacheFlush=" + cacheFlush;
|
||||
}
|
||||
|
||||
private static int[] cacheCombinedStatus(int[] a, int[] b) {
|
||||
int[] c = new int[a.length];
|
||||
for (int i = a.length - 1; i >= 0; i--) c[i] = a[i] + b[i];
|
||||
@ -916,9 +930,9 @@ public class kelondroRecords {
|
||||
// we simply clear the cache
|
||||
String error = "cachScore error: " + e.getMessage() + "; cachesize=" + cacheSize + ", cache.size()=[" + cacheHeaders[0].size() + "," + cacheHeaders[1].size() + "," + cacheHeaders[2].size() + "], cacheScore.size()=" + cacheScore.size();
|
||||
cacheScore = new kelondroMScoreCluster();
|
||||
cacheHeaders[CP_LOW] = new kelondroIntBytesMap(headchunksize, cacheSize / 2);
|
||||
cacheHeaders[CP_LOW] = new kelondroIntBytesMap(headchunksize, 0);
|
||||
cacheHeaders[CP_MEDIUM] = new kelondroIntBytesMap(headchunksize, 0);
|
||||
cacheHeaders[CP_HIGH] = new kelondroIntBytesMap(headchunksize, 0);
|
||||
cacheHeaders[CP_HIGH] = new kelondroIntBytesMap(headchunksize, cacheSize / 2);
|
||||
cacheHeaders[0].setOrdering(kelondroNaturalOrder.naturalOrder, 0);
|
||||
cacheHeaders[1].setOrdering(kelondroNaturalOrder.naturalOrder, 0);
|
||||
cacheHeaders[2].setOrdering(kelondroNaturalOrder.naturalOrder, 0);
|
||||
|
@ -164,13 +164,13 @@ public class kelondroRowBufferedSet extends kelondroRowSet {
|
||||
}
|
||||
}
|
||||
|
||||
public kelondroRow.Entry remove(byte[] a) {
|
||||
public kelondroRow.Entry removeShift(byte[] a) {
|
||||
synchronized (buffer) {
|
||||
if (useRowCollection) {
|
||||
kelondroRow.Entry oldentry = (kelondroRow.Entry) buffer.remove(a);
|
||||
if (oldentry == null) {
|
||||
// try the collection
|
||||
return super.remove(a);
|
||||
return super.removeShift(a);
|
||||
} else {
|
||||
// the entry was in buffer
|
||||
return oldentry;
|
||||
@ -181,11 +181,28 @@ public class kelondroRowBufferedSet extends kelondroRowSet {
|
||||
}
|
||||
}
|
||||
|
||||
public void removeAll(kelondroRowCollection c) {
|
||||
public kelondroRow.Entry removeMarked(byte[] a) {
|
||||
synchronized (buffer) {
|
||||
if (useRowCollection) {
|
||||
kelondroRow.Entry oldentry = (kelondroRow.Entry) buffer.remove(a);
|
||||
if (oldentry == null) {
|
||||
// try the collection
|
||||
return super.removeMarked(a);
|
||||
} else {
|
||||
// the entry was in buffer
|
||||
return oldentry;
|
||||
}
|
||||
} else {
|
||||
return (kelondroRow.Entry) buffer.remove(a); // test
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void removeMarkedAll(kelondroRowCollection c) {
|
||||
// this can be enhanced
|
||||
synchronized (buffer) {
|
||||
flush();
|
||||
super.removeAll(c);
|
||||
super.removeMarkedAll(c);
|
||||
}
|
||||
}
|
||||
|
||||
@ -195,8 +212,8 @@ public class kelondroRowBufferedSet extends kelondroRowSet {
|
||||
c.setOrdering(kelondroNaturalOrder.naturalOrder, 0);
|
||||
for (int i = 0; i < test.length; i++) c.add(test[i].getBytes());
|
||||
for (int i = 0; i < test.length; i++) c.add(test[i].getBytes());
|
||||
c.sort();
|
||||
c.remove("fuenf".getBytes());
|
||||
c.removeMarked("fuenf".getBytes());
|
||||
c.shape();
|
||||
Iterator i = c.elements();
|
||||
String s;
|
||||
System.out.print("INPUT-ITERATOR: ");
|
||||
@ -207,7 +224,7 @@ public class kelondroRowBufferedSet extends kelondroRowSet {
|
||||
}
|
||||
System.out.println("");
|
||||
System.out.println("INPUT-TOSTRING: " + c.toString());
|
||||
c.sort();
|
||||
c.shape();
|
||||
System.out.println("SORTED : " + c.toString());
|
||||
c.uniq();
|
||||
System.out.println("UNIQ : " + c.toString());
|
||||
@ -232,7 +249,7 @@ public class kelondroRowBufferedSet extends kelondroRowSet {
|
||||
" entries/second, size = " + c.size());
|
||||
}
|
||||
System.out.println("bevore sort: " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
|
||||
c.sort();
|
||||
c.shape();
|
||||
System.out.println("after sort: " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
|
||||
c.uniq();
|
||||
System.out.println("after uniq: " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
|
||||
|
@ -145,13 +145,18 @@ public class kelondroRowCollection {
|
||||
}
|
||||
}
|
||||
|
||||
public final void remove(int p) {
|
||||
protected final void removeShift(int pos, int dist, int upBound) {
|
||||
System.arraycopy(chunkcache, (pos + dist) * rowdef.objectsize(),
|
||||
chunkcache, pos * rowdef.objectsize(),
|
||||
(upBound - pos - dist) * rowdef.objectsize());
|
||||
if ((pos < sortBound) && (upBound >= sortBound)) sortBound -= dist;
|
||||
}
|
||||
|
||||
public final void removeShift(int p) {
|
||||
assert ((p >= 0) && (p < chunkcount) && (chunkcount > 0));
|
||||
//System.out.println("REMOVE at pos " + p + ", chunkcount=" + chunkcount + ", sortBound=" + sortBound);
|
||||
synchronized (chunkcache) {
|
||||
System.arraycopy(chunkcache, (p + 1) * rowdef.objectsize(), chunkcache, p * rowdef.objectsize(), (chunkcount - p - 1) * rowdef.objectsize());
|
||||
chunkcount--;
|
||||
if (p < sortBound) sortBound--;
|
||||
removeShift(p, 1, chunkcount--);
|
||||
}
|
||||
this.lastTimeWrote = System.currentTimeMillis();
|
||||
}
|
||||
@ -299,7 +304,7 @@ public class kelondroRowCollection {
|
||||
swap(j, j - 1, 0);
|
||||
}
|
||||
|
||||
private final int swap(int i, int j, int p) {
|
||||
protected final int swap(int i, int j, int p) {
|
||||
if (i == j) return p;
|
||||
if (this.chunkcount * this.rowdef.objectsize() < this.chunkcache.length) {
|
||||
// there is space in the chunkcache that we can use as buffer
|
||||
@ -326,7 +331,7 @@ public class kelondroRowCollection {
|
||||
while (i < chunkcount - 1) {
|
||||
if (compare(i, i + 1) == 0) {
|
||||
//System.out.println("DOUBLE: " + new String(this.chunkcache, this.chunksize * i, this.chunksize));
|
||||
remove(i);
|
||||
removeShift(i);
|
||||
} else {
|
||||
i++;
|
||||
}
|
||||
@ -368,5 +373,5 @@ public class kelondroRowCollection {
|
||||
*/
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -24,19 +24,25 @@
|
||||
|
||||
package de.anomic.kelondro;
|
||||
|
||||
import java.util.TreeSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Random;
|
||||
|
||||
public class kelondroRowSet extends kelondroRowCollection {
|
||||
|
||||
private static final int collectionReSortLimit = 90;
|
||||
private static final int removeMaxSize = 100;
|
||||
|
||||
private TreeSet removeMarker;
|
||||
|
||||
public kelondroRowSet(kelondroRow rowdef) {
|
||||
super(rowdef);
|
||||
this.removeMarker = new TreeSet();
|
||||
}
|
||||
|
||||
public kelondroRowSet(kelondroRow rowdef, int objectCount) {
|
||||
super(rowdef, objectCount);
|
||||
this.removeMarker = new TreeSet();
|
||||
}
|
||||
|
||||
public kelondroRow.Entry get(byte[] key) {
|
||||
@ -45,17 +51,24 @@ public class kelondroRowSet extends kelondroRowCollection {
|
||||
|
||||
private kelondroRow.Entry get(byte[] key, int astart, int alength) {
|
||||
synchronized (chunkcache) {
|
||||
int i = find(key, astart, alength);
|
||||
if (i >= 0) return get(i);
|
||||
int index = find(key, astart, alength);
|
||||
if ((index < 0) || (isMarkedRemoved(index))){
|
||||
return null;
|
||||
} else {
|
||||
return get(index);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public kelondroRow.Entry put(kelondroRow.Entry entry) {
|
||||
int index = -1;
|
||||
synchronized (chunkcache) {
|
||||
index = find(entry.bytes(), super.rowdef.colstart[super.sortColumn], super.rowdef.width(super.sortColumn));
|
||||
if (index < 0) {
|
||||
if (isMarkedRemoved(index)) {
|
||||
set(index, entry);
|
||||
removeMarker.remove(new Integer(index));
|
||||
return null;
|
||||
} else if (index < 0) {
|
||||
add(entry);
|
||||
return null;
|
||||
} else {
|
||||
@ -66,11 +79,15 @@ public class kelondroRowSet extends kelondroRowCollection {
|
||||
}
|
||||
}
|
||||
|
||||
public kelondroRow.Entry remove(byte[] a) {
|
||||
return remove(a, 0, a.length);
|
||||
public int size() {
|
||||
return super.size() - removeMarker.size();
|
||||
}
|
||||
|
||||
public kelondroRow.Entry removeMarked(byte[] a) {
|
||||
return removeMarked(a, 0, a.length);
|
||||
}
|
||||
|
||||
private kelondroRow.Entry remove(byte[] a, int astart, int alength) {
|
||||
private kelondroRow.Entry removeMarked(byte[] a, int astart, int alength) {
|
||||
// the byte[] a may be shorter than the chunksize
|
||||
if (chunkcount == 0) return null;
|
||||
kelondroRow.Entry b = null;
|
||||
@ -78,17 +95,98 @@ public class kelondroRowSet extends kelondroRowCollection {
|
||||
int p = find(a, astart, alength);
|
||||
if (p < 0) return null;
|
||||
b = get(p);
|
||||
remove(p);
|
||||
if (p < sortBound) {
|
||||
removeMarker.add(new Integer(p));
|
||||
} else {
|
||||
super.swap(p, --chunkcount, 0);
|
||||
}
|
||||
}
|
||||
if (removeMarker.size() == chunkcount) {
|
||||
chunkcount = 0;
|
||||
sortBound = 0;
|
||||
removeMarker.clear();
|
||||
}
|
||||
if (removeMarker.size() >= removeMaxSize) resolveMarkedRemoved();
|
||||
return b;
|
||||
}
|
||||
|
||||
private boolean isMarkedRemoved(int index) {
|
||||
return removeMarker.contains(new Integer(index));
|
||||
}
|
||||
|
||||
public void shape() {
|
||||
//System.out.println("SHAPE");
|
||||
synchronized (chunkcache) {
|
||||
resolveMarkedRemoved();
|
||||
super.sort();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
private void resolveMarkedRemoved1() {
|
||||
//long start = System.currentTimeMillis();
|
||||
//int c = removeMarker.size();
|
||||
Integer idx = new Integer(sortBound);
|
||||
while (removeMarker.size() > 0) {
|
||||
idx = (Integer) removeMarker.last();
|
||||
removeMarker.remove(idx);
|
||||
chunkcount--;
|
||||
if (idx.intValue() < chunkcount) {
|
||||
super.swap(idx.intValue(), chunkcount, 0);
|
||||
}
|
||||
}
|
||||
if (idx.intValue() < sortBound) sortBound = idx.intValue();
|
||||
removeMarker.clear();
|
||||
//System.out.println("RESOLVED " + c + " entries in " + (System.currentTimeMillis() - start) + " milliseconds");
|
||||
}
|
||||
*/
|
||||
|
||||
private void resolveMarkedRemoved() {
|
||||
if (removeMarker.size() == 0) return;
|
||||
Integer nxt = (Integer) removeMarker.first();
|
||||
removeMarker.remove(nxt);
|
||||
int idx = nxt.intValue();
|
||||
int d = 1;
|
||||
while (removeMarker.size() > 0) {
|
||||
nxt = (Integer) removeMarker.first();
|
||||
removeMarker.remove(nxt);
|
||||
super.removeShift(idx, d, nxt.intValue());
|
||||
idx = nxt.intValue() - d;
|
||||
d++;
|
||||
}
|
||||
super.removeShift(idx, d, chunkcount);
|
||||
chunkcount -= d;
|
||||
removeMarker.clear();
|
||||
}
|
||||
|
||||
|
||||
protected kelondroRow.Entry removeShift(byte[] a) {
|
||||
return removeShift(a, 0, a.length);
|
||||
}
|
||||
|
||||
private kelondroRow.Entry removeShift(byte[] a, int astart, int alength) {
|
||||
// the byte[] a may be shorter than the chunksize
|
||||
if (chunkcount == 0) return null;
|
||||
kelondroRow.Entry b = null;
|
||||
synchronized(chunkcache) {
|
||||
int p = find(a, astart, alength);
|
||||
if (p < 0) return null;
|
||||
b = get(p);
|
||||
if (p < sortBound) {
|
||||
removeShift(p);
|
||||
} else {
|
||||
super.swap(p, --chunkcount, 0);
|
||||
}
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
public void removeAll(kelondroRowCollection c) {
|
||||
public void removeMarkedAll(kelondroRowCollection c) {
|
||||
Iterator i = c.elements();
|
||||
byte[] b;
|
||||
while (i.hasNext()) {
|
||||
b = (byte[]) i.next();
|
||||
remove(b, 0, b.length);
|
||||
removeMarked(b, 0, b.length);
|
||||
}
|
||||
}
|
||||
|
||||
@ -113,7 +211,7 @@ public class kelondroRowSet extends kelondroRowCollection {
|
||||
if (this.sortOrder == null) return iterativeSearch(a, astart, alength);
|
||||
|
||||
// check if a re-sorting make sense
|
||||
if ((this.chunkcount - this.sortBound) > collectionReSortLimit) sort();
|
||||
if ((this.chunkcount - this.sortBound) > collectionReSortLimit) shape();
|
||||
|
||||
// first try to find in sorted area
|
||||
int p = binarySearch(a, astart, alength);
|
||||
@ -177,8 +275,8 @@ public class kelondroRowSet extends kelondroRowCollection {
|
||||
c.setOrdering(kelondroNaturalOrder.naturalOrder, 0);
|
||||
for (int i = 0; i < test.length; i++) c.add(test[i].getBytes());
|
||||
for (int i = 0; i < test.length; i++) c.add(test[i].getBytes());
|
||||
c.sort();
|
||||
c.remove("fuenf".getBytes(), 0, 5);
|
||||
c.shape();
|
||||
c.removeMarked("fuenf".getBytes(), 0, 5);
|
||||
Iterator i = c.elements();
|
||||
String s;
|
||||
System.out.print("INPUT-ITERATOR: ");
|
||||
@ -189,7 +287,7 @@ public class kelondroRowSet extends kelondroRowCollection {
|
||||
}
|
||||
System.out.println("");
|
||||
System.out.println("INPUT-TOSTRING: " + c.toString());
|
||||
c.sort();
|
||||
c.shape();
|
||||
System.out.println("SORTED : " + c.toString());
|
||||
c.uniq();
|
||||
System.out.println("UNIQ : " + c.toString());
|
||||
@ -214,7 +312,7 @@ public class kelondroRowSet extends kelondroRowCollection {
|
||||
" entries/second, size = " + c.size());
|
||||
}
|
||||
System.out.println("bevore sort: " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
|
||||
c.sort();
|
||||
c.shape();
|
||||
System.out.println("after sort: " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
|
||||
c.uniq();
|
||||
System.out.println("after uniq: " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
|
||||
|
Reference in New Issue
Block a user