mirror of
https://github.com/yacy/yacy_search_server.git
synced 2025-05-03 20:29:33 -04:00
Updated Javadoc and Junit tests for the WebStructureGraph class.
This commit is contained in:
parent
17b7c92009
commit
5c8958bcea
source/net/yacy/peers/graphics
test/java/net/yacy/peers/graphics
@ -81,10 +81,18 @@ public class WebStructureGraph {
|
||||
/** Eventual backup file */
|
||||
private final File structureFile;
|
||||
|
||||
/** Older structure entries (notably loaded from the backup file) */
|
||||
private final TreeMap<String, byte[]> structure_old; // <b64hash(6)>','<host> to <date-yyyymmdd(8)>{<target-b64hash(6)><target-count-hex(4)>}*
|
||||
/**
|
||||
* <p>Older structure entries (notably loaded from the backup file).</p>
|
||||
* <p>Maps from two parts concatenated string keys to byte array encoded references lists :
|
||||
* "'b64hash(6)','hostname" to 'date-yyyymmdd(8)'{'target-b64hash(6)''target-count-hex(4)'}*</p>
|
||||
* */
|
||||
private final TreeMap<String, byte[]> structure_old;
|
||||
|
||||
/** Recently computed structure entries */
|
||||
/**
|
||||
* <p>Recently computed structure entries</p>
|
||||
* <p>Maps from two parts concatenated string keys to byte array encoded references lists :
|
||||
* "'b64hash(6)','hostname" to 'date-yyyymmdd(8)'{'target-b64hash(6)''target-count-hex(4)'}*</p>
|
||||
* */
|
||||
private final TreeMap<String, byte[]> structure_new;
|
||||
|
||||
/** Queue used to receive new entries to store */
|
||||
@ -164,6 +172,9 @@ public class WebStructureGraph {
|
||||
this.publicRefDNSResolvingWorker.start();
|
||||
}
|
||||
|
||||
/**
|
||||
* Task consuming the queue of new entries to compute and add to the structure
|
||||
*/
|
||||
private class PublicRefDNSResolvingProcess extends Thread {
|
||||
private PublicRefDNSResolvingProcess() {
|
||||
this.setName("WebStructureGraph.PublicRefDNSResolvingProcess");
|
||||
@ -181,6 +192,9 @@ public class WebStructureGraph {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear the complete web structure.
|
||||
*/
|
||||
public void clear() {
|
||||
this.structure_old.clear();
|
||||
this.structure_new.clear();
|
||||
@ -236,12 +250,20 @@ public class WebStructureGraph {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param refs references information serialized in a string
|
||||
* @return the decoded references map size
|
||||
*/
|
||||
private static int refstr2count(final String refs) {
|
||||
if (refs == null || refs.length() <= 8) return 0;
|
||||
assert (refs.length() - 8) % 10 == 0 : "refs = " + refs + ", length = " + refs.length();
|
||||
return (refs.length() - 8) / 10;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param refs references information serialized in a string
|
||||
* @return the decoded references mapping from host hashes to counts
|
||||
*/
|
||||
private static Map<String, Integer> refstr2map(final String refs) {
|
||||
if (refs == null || refs.length() <= 8) return new HashMap<String, Integer>();
|
||||
final Map<String, Integer> map = new HashMap<String, Integer>();
|
||||
@ -260,10 +282,17 @@ public class WebStructureGraph {
|
||||
return map;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return an empty references map serialized to a string
|
||||
*/
|
||||
private static String none2refstr() {
|
||||
return GenericFormatter.SHORT_DAY_FORMATTER.format();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param map references mapping from host hashes to counts
|
||||
* @return the map serialized as a string
|
||||
*/
|
||||
private static String map2refstr(final Map<String, Integer> map) {
|
||||
final StringBuilder s = new StringBuilder(GenericFormatter.PATTERN_SHORT_DAY.length() + map.size() * 10);
|
||||
s.append(GenericFormatter.SHORT_DAY_FORMATTER.format());
|
||||
@ -289,6 +318,10 @@ public class WebStructureGraph {
|
||||
return s.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param hosthash host hash
|
||||
* @return true when this host hash is present in this web structure (either in latest or elder known entries)
|
||||
*/
|
||||
public boolean exists(final String hosthash) {
|
||||
// returns a map with a hosthash(String):refcount(Integer) relation
|
||||
assert hosthash.length() == 6;
|
||||
@ -314,6 +347,11 @@ public class WebStructureGraph {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute outgoing references from the source host hash
|
||||
* @param srcHostName reference source host hash
|
||||
* @return outgoing structure with references mapped from target host hashes to counts or null when the host is not known
|
||||
*/
|
||||
public StructureEntry outgoingReferences(final String hosthash) {
|
||||
// returns a map with a hosthash(String):refcount(Integer) relation
|
||||
assert hosthash.length() == 6;
|
||||
@ -355,9 +393,9 @@ public class WebStructureGraph {
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute outgoing references from source hostName on any source protocol or port.
|
||||
* Compute outgoing references from the source hostName on any source protocol or port.
|
||||
* @param srcHostName reference source host name
|
||||
* @return outgoing references mapped from target host hash to count
|
||||
* @return outgoing references mapped from target host hashes to counts. Can be empty when the host name is not known.
|
||||
*/
|
||||
public Map<String, Integer> outgoingReferencesByHostName(final String srcHostName) {
|
||||
Set<String> srcHostHashes = this.hostName2HostHashes(srcHostName);
|
||||
@ -385,6 +423,11 @@ public class WebStructureGraph {
|
||||
return targetHashesToCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute incoming references to the target host hash
|
||||
* @param hosthash reference target host hash
|
||||
* @return incoming structure with references mapped from source host hashes to counts or null when the target is not known
|
||||
*/
|
||||
public StructureEntry incomingReferences(final String hosthash) {
|
||||
final String hostname = hostHash2hostName(hosthash);
|
||||
if ( hostname == null ) {
|
||||
@ -767,6 +810,9 @@ public class WebStructureGraph {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Feed the elder entries structure map with latest computed entries map and then clear this last one.
|
||||
*/
|
||||
public void joinOldNew() {
|
||||
synchronized ( this.structure_new ) {
|
||||
joinStructure(this.structure_old, this.structure_new);
|
||||
@ -835,18 +881,38 @@ public class WebStructureGraph {
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param latest <ul>
|
||||
* <li>true : iterate only the latest computed entries</li>
|
||||
* <li>false : iterate only the elder computed entries, excluding the latest</li>
|
||||
* </ul>
|
||||
* @return an iterator over the web structure
|
||||
*/
|
||||
public Iterator<StructureEntry> structureEntryIterator(final boolean latest) {
|
||||
return new StructureIterator(latest);
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterator over the web structure
|
||||
*/
|
||||
private class StructureIterator extends LookAheadIterator<StructureEntry> implements Iterator<StructureEntry> {
|
||||
|
||||
/** Internal iterator instance */
|
||||
private final Iterator<Map.Entry<String, byte[]>> i;
|
||||
|
||||
/**
|
||||
* @param latest <ul>
|
||||
* <li>true : iterate only the latest computed entries</li>
|
||||
* <li>false : iterate only the elder computed entries, excluding the latest</li>
|
||||
* </ul>
|
||||
*/
|
||||
private StructureIterator(final boolean latest) {
|
||||
this.i = ((latest) ? WebStructureGraph.this.structure_new : WebStructureGraph.this.structure_old).entrySet().iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterate to the next structure entry, decoding on the fly the references information from the byte array
|
||||
*/
|
||||
@Override
|
||||
public StructureEntry next0() {
|
||||
Map.Entry<String, byte[]> entry = null;
|
||||
@ -879,7 +945,7 @@ public class WebStructureGraph {
|
||||
}
|
||||
|
||||
public static class StructureEntry implements Comparable<StructureEntry> {
|
||||
/** the tail of the host hash */
|
||||
/** 6 bytes host hash */
|
||||
public String hosthash;
|
||||
|
||||
/** the host name */
|
||||
@ -888,9 +954,14 @@ public class WebStructureGraph {
|
||||
/** date of latest change */
|
||||
public String date;
|
||||
|
||||
/** a map from the referenced host hash to the number of referenced to that host */
|
||||
/** a map from the referenced host hash to the number of references to that host */
|
||||
public Map<String, Integer> references;
|
||||
|
||||
/**
|
||||
* Create a new empty (no references) entry
|
||||
* @param hosthash host hash
|
||||
* @param hostname host name
|
||||
*/
|
||||
private StructureEntry(final String hosthash, final String hostname) {
|
||||
this(hosthash, hostname, GenericFormatter.SHORT_DAY_FORMATTER.format(), new HashMap<String, Integer>());
|
||||
}
|
||||
|
@ -260,6 +260,59 @@ public class WebStructureGraphTest {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Incoming references from multiple sources on the same host to one target
|
||||
* URL accumulated between old and new structure
|
||||
*/
|
||||
@Test
|
||||
public void testIncomingReferencesFromNewAndOld() throws MalformedURLException {
|
||||
|
||||
WebStructureGraph graph = new WebStructureGraph(null);
|
||||
try {
|
||||
final DigestURL indexSource = new DigestURL("http://source.net/index.html");
|
||||
final String sourceHash = indexSource.hosthash();
|
||||
Set<DigestURL> targets = new HashSet<>();
|
||||
|
||||
final DigestURL target = new DigestURL("http://target.com/index.html");
|
||||
final String targetHash = target.hosthash();
|
||||
targets.add(target);
|
||||
|
||||
LearnObject lro = new LearnObject(indexSource, targets);
|
||||
graph.learnrefs(lro);
|
||||
|
||||
/* Backup learned reference to the old structure */
|
||||
graph.joinOldNew();
|
||||
|
||||
final DigestURL pathSource = new DigestURL("http://source.net/path/doc.html");
|
||||
targets = new HashSet<>();
|
||||
targets.add(target);
|
||||
|
||||
lro = new LearnObject(pathSource, targets);
|
||||
graph.learnrefs(lro);
|
||||
|
||||
final DigestURL querySource = new DigestURL("http://source.net/query?param=value");
|
||||
targets = new HashSet<>();
|
||||
targets.add(target);
|
||||
|
||||
lro = new LearnObject(querySource, targets);
|
||||
graph.learnrefs(lro);
|
||||
|
||||
/* Check that reference to the exact target URL is retrieved from structure */
|
||||
StructureEntry inRefs = graph.incomingReferences(targetHash);
|
||||
|
||||
Assert.assertNotNull(inRefs);
|
||||
Assert.assertEquals("target.com", inRefs.hostname);
|
||||
Assert.assertNotNull(inRefs.references);
|
||||
/* One accumulated host source reference */
|
||||
Assert.assertEquals(1, inRefs.references.size());
|
||||
/* 3 accumulated links from that host */
|
||||
Assert.assertEquals(Integer.valueOf(3), inRefs.references.get(sourceHash));
|
||||
|
||||
} finally {
|
||||
graph.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple performance measurements with a test structure filled to its limits.
|
||||
*/
|
||||
|
Loading…
x
Reference in New Issue
Block a user