mirror of
https://github.com/yacy/yacy_search_server.git
synced 2025-07-18 08:36:07 -04:00
enhanced index storage data structure kelondroBytesIntMap
this stores now two index structures, one for data that is aquired during start-up and one for data that is aquired during run-time. This reduces the grow factor, and should reduce the memory amount in case that a index-reorganisation happens. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3733 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
source/de/anomic/kelondro
@ -25,53 +25,110 @@
|
||||
package de.anomic.kelondro;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
|
||||
public class kelondroBytesIntMap {
|
||||
|
||||
private kelondroIndex ki;
|
||||
private kelondroRow rowdef;
|
||||
private kelondroIndex index0, index1;
|
||||
|
||||
public kelondroBytesIntMap(kelondroIndex ki) throws IOException {
|
||||
assert (ki.row().columns() == 2); // must be a key/index relation
|
||||
assert (ki.row().width(1) == 4); // the value must be a b256-encoded int, 4 bytes long
|
||||
this.ki = ki;
|
||||
this.index0 = null; // not used
|
||||
this.index1 = ki;
|
||||
this.rowdef = ki.row();
|
||||
}
|
||||
|
||||
public kelondroBytesIntMap(int keylength, kelondroOrder objectOrder, int space) {
|
||||
this.ki = new kelondroRowSet(new kelondroRow(new kelondroColumn[]{new kelondroColumn("key", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, keylength, "key"), new kelondroColumn("int c-4 {b256}")}, objectOrder, 0), space);
|
||||
this.rowdef = new kelondroRow(new kelondroColumn[]{new kelondroColumn("key", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, keylength, "key"), new kelondroColumn("int c-4 {b256}")}, objectOrder, 0);
|
||||
this.index0 = new kelondroRowSet(rowdef, space);
|
||||
this.index1 = null; // to show that this is the initialization phase
|
||||
}
|
||||
|
||||
public kelondroRow row() throws IOException {
|
||||
return ki.row();
|
||||
return index0.row();
|
||||
}
|
||||
|
||||
public synchronized int geti(byte[] key) throws IOException {
|
||||
assert (key != null);
|
||||
//assert (!(serverLog.allZero(key)));
|
||||
kelondroRow.Entry indexentry = ki.get(key);
|
||||
if (indexentry == null) return -1;
|
||||
return (int) indexentry.getColLong(1);
|
||||
if (index0 != null) {
|
||||
if (index1 == null) {
|
||||
// finish initialization phase
|
||||
if (index0 instanceof kelondroRowSet) {
|
||||
((kelondroRowSet) index0).sort();
|
||||
((kelondroRowSet) index0).uniq(10000);
|
||||
}
|
||||
index1 = new kelondroRowSet(rowdef, 0);
|
||||
//System.out.println("finished initialization phase at size = " + index0.size() + " in geti");
|
||||
}
|
||||
kelondroRow.Entry indexentry = index0.get(key);
|
||||
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
|
||||
if (indexentry != null) return (int) indexentry.getColLong(1);
|
||||
}
|
||||
if (index1 != null) {
|
||||
kelondroRow.Entry indexentry = index1.get(key);
|
||||
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
|
||||
if (indexentry != null) return (int) indexentry.getColLong(1);
|
||||
}
|
||||
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
|
||||
return -1;
|
||||
}
|
||||
|
||||
public synchronized int puti(byte[] key, int i) throws IOException {
|
||||
assert i >= 0 : "i = " + i;
|
||||
assert (key != null);
|
||||
//assert (!(serverLog.allZero(key)));
|
||||
kelondroRow.Entry newentry = ki.row().newEntry();
|
||||
if (index0 != null) {
|
||||
if (index1 == null) {
|
||||
// finish initialization phase
|
||||
if (index0 instanceof kelondroRowSet) {
|
||||
((kelondroRowSet) index0).sort();
|
||||
((kelondroRowSet) index0).uniq(10000);
|
||||
}
|
||||
index1 = new kelondroRowSet(rowdef, 0);
|
||||
//System.out.println("finished initialization phase at size = " + index0.size() + " in puti");
|
||||
}
|
||||
// if the new entry is within the initialization part, just overwrite it
|
||||
kelondroRow.Entry indexentry = index0.get(key);
|
||||
if (indexentry != null) {
|
||||
int oldi = (int) indexentry.getColLong(1);
|
||||
indexentry.setCol(0, key);
|
||||
indexentry.setCol(1, i);
|
||||
index0.put(indexentry);
|
||||
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
|
||||
return oldi;
|
||||
}
|
||||
// else place it in the index1
|
||||
}
|
||||
// at this point index1 cannot be null
|
||||
assert (index1 != null);
|
||||
kelondroRow.Entry newentry = index1.row().newEntry();
|
||||
newentry.setCol(0, key);
|
||||
newentry.setCol(1, i);
|
||||
kelondroRow.Entry oldentry = ki.put(newentry);
|
||||
kelondroRow.Entry oldentry = index1.put(newentry);
|
||||
if (oldentry == null) return -1;
|
||||
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
|
||||
return (int) oldentry.getColLong(1);
|
||||
}
|
||||
|
||||
public synchronized void addi(byte[] key, int i) throws IOException {
|
||||
assert i >= 0 : "i = " + i;
|
||||
assert (key != null);
|
||||
assert index0 != null;
|
||||
//assert index1 == null;
|
||||
if (index1 != null) {
|
||||
// the initialization phase is over, put this entry to the secondary index
|
||||
puti(key, i);
|
||||
return;
|
||||
}
|
||||
//assert (!(serverLog.allZero(key)));
|
||||
kelondroRow.Entry newentry = ki.row().newEntry();
|
||||
kelondroRow.Entry newentry = this.rowdef.newEntry();
|
||||
newentry.setCol(0, key);
|
||||
newentry.setCol(1, i);
|
||||
ki.addUnique(newentry);
|
||||
index0.addUnique(newentry);
|
||||
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
|
||||
}
|
||||
|
||||
public synchronized int removei(byte[] key) throws IOException {
|
||||
@ -79,45 +136,153 @@ public class kelondroBytesIntMap {
|
||||
//assert (!(serverLog.allZero(key)));
|
||||
// returns the integer index of the key, if the key can be found and was removed
|
||||
// and -1 if the key was not found.
|
||||
if (ki.size() == 0) return -1;
|
||||
kelondroRow.Entry indexentry = ki.remove(key);
|
||||
if (index0 != null) {
|
||||
if (index1 == null) {
|
||||
// finish initialization phase
|
||||
if (index0 instanceof kelondroRowSet) {
|
||||
((kelondroRowSet) index0).sort();
|
||||
((kelondroRowSet) index0).uniq(10000);
|
||||
}
|
||||
index1 = new kelondroRowSet(rowdef, 0);
|
||||
//System.out.println("finished initialization phase at size = " + index0.size() + " in removei");
|
||||
}
|
||||
// if the new entry is within the initialization part, just overwrite it
|
||||
kelondroRow.Entry indexentry = index0.remove(key);
|
||||
if (indexentry != null) {
|
||||
assert index0.remove(key) == null; // check if remove worked
|
||||
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
|
||||
return (int) indexentry.getColLong(1);
|
||||
}
|
||||
// else remove it from the index1
|
||||
}
|
||||
// at this point index1 cannot be null
|
||||
assert (index1 != null);
|
||||
if (index1.size() == 0) return -1;
|
||||
kelondroRow.Entry indexentry = index1.remove(key);
|
||||
if (indexentry == null) return -1;
|
||||
assert index1.remove(key) == null; // check if remove worked
|
||||
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
|
||||
return (int) indexentry.getColLong(1);
|
||||
}
|
||||
|
||||
public synchronized int removeonei() throws IOException {
|
||||
if (ki.size() == 0) return -1;
|
||||
kelondroRow.Entry indexentry = ki.removeOne();
|
||||
assert (indexentry != null);
|
||||
if (indexentry == null) return -1;
|
||||
return (int) indexentry.getColLong(1);
|
||||
if ((index1 != null) && (index1.size() != 0)) {
|
||||
kelondroRow.Entry indexentry = index1.removeOne();
|
||||
assert (indexentry != null);
|
||||
if (indexentry == null) return -1;
|
||||
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
|
||||
return (int) indexentry.getColLong(1);
|
||||
}
|
||||
if ((index0 != null) && (index0.size() != 0)) {
|
||||
kelondroRow.Entry indexentry = index0.removeOne();
|
||||
assert (indexentry != null);
|
||||
if (indexentry == null) return -1;
|
||||
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
|
||||
return (int) indexentry.getColLong(1);
|
||||
}
|
||||
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
|
||||
return -1;
|
||||
}
|
||||
|
||||
public synchronized int size() {
|
||||
return ki.size();
|
||||
if ((index0 != null) && (index1 == null)) {
|
||||
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
|
||||
return index0.size();
|
||||
}
|
||||
if ((index0 == null) && (index1 != null)) {
|
||||
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
|
||||
return index1.size();
|
||||
}
|
||||
assert ((index0 != null) && (index1 != null));
|
||||
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
|
||||
return index0.size() + index1.size();
|
||||
}
|
||||
|
||||
public synchronized kelondroCloneableIterator rows(boolean up, byte[] firstKey) throws IOException {
|
||||
// returns the row-iterator of the underlying kelondroIndex
|
||||
// col[0] = key
|
||||
// col[1] = integer as {b265}
|
||||
return ki.rows(up, firstKey);
|
||||
if ((index0 != null) && (index1 == null)) {
|
||||
// finish initialization phase
|
||||
if (index0 instanceof kelondroRowSet) {
|
||||
((kelondroRowSet) index0).sort();
|
||||
((kelondroRowSet) index0).uniq(10000);
|
||||
}
|
||||
index1 = new kelondroRowSet(rowdef, 0);
|
||||
//System.out.println("finished initialization phase at size = " + index0.size() + " in rows");
|
||||
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
|
||||
return index0.rows(up, firstKey);
|
||||
}
|
||||
if ((index0 == null) && (index1 != null)) {
|
||||
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
|
||||
return index1.rows(up, firstKey);
|
||||
}
|
||||
assert ((index0 != null) && (index1 != null));
|
||||
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
|
||||
return new kelondroMergeIterator(index0.rows(up, firstKey), index1.rows(up, firstKey), rowdef.objectOrder, kelondroMergeIterator.simpleMerge, true);
|
||||
}
|
||||
|
||||
public kelondroProfile profile() {
|
||||
return ki.profile();
|
||||
if (index0 != null) return index0.profile();
|
||||
if (index1 != null) return index1.profile();
|
||||
return null;
|
||||
}
|
||||
|
||||
public synchronized void close() {
|
||||
ki.close();
|
||||
if (index0 != null) index0.close();
|
||||
if (index1 != null) index1.close();
|
||||
}
|
||||
|
||||
public synchronized String consistencyAnalysis() {
|
||||
String s0 = (index0 == null) ? "index0: is NULL" : ("index0: " + singleConsistency((kelondroRowSet) index0));
|
||||
String s1 = (index1 == null) ? "index1: is NULL" : ("index1: " + singleConsistency((kelondroRowSet) index1));
|
||||
String combined = "";
|
||||
if ((index0 == null) && (index1 == null)) return "all null";
|
||||
if ((index0 != null) && (index1 != null)) {
|
||||
Iterator i;
|
||||
try {
|
||||
i = index0.rows(true, null);
|
||||
kelondroRow.Entry entry;
|
||||
while (i.hasNext()) {
|
||||
entry = (kelondroRow.Entry) i.next();
|
||||
if (index1.has(entry.getColBytes(0))) {
|
||||
combined = combined + ", common = " + new String(entry.getColBytes(0));
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {}
|
||||
}
|
||||
return s0 + ", " + s1 + combined;
|
||||
}
|
||||
|
||||
public synchronized void sort() {
|
||||
if (ki instanceof kelondroRowSet) ((kelondroRowSet) ki).sort();
|
||||
public synchronized boolean consistencyAnalysis0() {
|
||||
boolean s0 = ((index0 == null) || (!(index0 instanceof kelondroRowSet))) ? true : singleConsistency0((kelondroRowSet) index0);
|
||||
boolean s1 = ((index1 == null) || (!(index1 instanceof kelondroRowSet))) ? true : singleConsistency0((kelondroRowSet) index1);
|
||||
if (!(s0 && s1)) return false;
|
||||
if ((index0 == null) && (index1 == null)) return true;
|
||||
if ((index0 != null) && (index1 != null)) {
|
||||
Iterator i;
|
||||
try {
|
||||
i = index0.rows(true, null);
|
||||
kelondroRow.Entry entry;
|
||||
while (i.hasNext()) {
|
||||
entry = (kelondroRow.Entry) i.next();
|
||||
if (index1.has(entry.getColBytes(0))) return false;
|
||||
}
|
||||
} catch (IOException e) {}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public synchronized void uniq(long time) {
|
||||
if (ki instanceof kelondroRowSet) ((kelondroRowSet) ki).uniq(time);
|
||||
private String singleConsistency(kelondroRowSet rs) {
|
||||
int s = rs.size();
|
||||
rs.sort();
|
||||
rs.uniq(10000);
|
||||
if (rs.size() == s) return "set is sound"; else return "set has " + (rs.size() - s) + " double-entries";
|
||||
}
|
||||
private boolean singleConsistency0(kelondroRowSet rs) {
|
||||
int s = rs.size();
|
||||
rs.sort();
|
||||
rs.uniq(10000);
|
||||
return rs.size() == s;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -174,10 +174,8 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
|
||||
}
|
||||
System.out.print(" -ordering- ");
|
||||
System.out.flush();
|
||||
ri.sort();
|
||||
int sbu = ri.size();
|
||||
ri.uniq(10000);
|
||||
if (ri.size() != sbu) serverLog.logSevere("kelondroFlexTable.initializeRamIndex: " + tablename, "; size before uniq = " + sbu + ", after uniq = " + ri.size());
|
||||
//int sbu = ri.size();
|
||||
//if (ri.size() != sbu) serverLog.logSevere("kelondroFlexTable.initializeRamIndex: " + tablename, "; size before uniq = " + sbu + ", after uniq = " + ri.size());
|
||||
return ri;
|
||||
}
|
||||
|
||||
@ -212,7 +210,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
|
||||
|
||||
public synchronized kelondroRow.Entry get(byte[] key) throws IOException {
|
||||
int pos = index.geti(key);
|
||||
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
|
||||
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size() + ", analysis: " + index.consistencyAnalysis();
|
||||
if (pos < 0) return null;
|
||||
// i may be greater than this.size(), because this table may have deleted entries
|
||||
// the deleted entries are subtracted from the 'real' tablesize,
|
||||
|
@ -75,7 +75,7 @@ public class kelondroIntBytesMap {
|
||||
|
||||
public byte[] putb(int ii, byte[] value) {
|
||||
initPhase = false;
|
||||
kelondroRow.Entry newentry = index1.row().newEntry();
|
||||
kelondroRow.Entry newentry = rowdef.newEntry();
|
||||
newentry.setCol(0, (long) ii);
|
||||
newentry.setCol(1, value);
|
||||
kelondroRow.Entry indexentry = index0.get(kelondroNaturalOrder.encodeLong((long) ii, 4));
|
||||
|
Reference in New Issue
Block a user