From f8cd57c92f73ea78355c0649a6859ea349a9f40b Mon Sep 17 00:00:00 2001
From: Michael Peter Christen <mc@yacy.net>
Date: Sun, 22 Apr 2012 02:05:17 +0200
Subject: [PATCH] new indexing strategy: ALL links that appear anywhere are
 indexed, not only links where the content can be parsed. All non-parseable
 links are placed into the noload queue. The search process must therefore be
 able to filter out non-text search results. - This fixes the problem that
 image search results appeared in the text search. - The interactive search
 can retrieve now ALL types of links - The p2p interface is now extended to
 retrieve only certain types of links (text, image, video, apps) - The search
 process has an extension to filter the right document type according to the
 search query

---
 htroot/js/yacyinteractive.js                  |  2 +-
 htroot/yacy/search.java                       |  2 +-
 htroot/yacyinteractive.html                   |  3 ++
 htroot/yacysearch.java                        |  2 +-
 htroot/yacysearchitem.java                    |  2 +-
 source/de/anomic/crawler/CrawlStacker.java    |  9 ++++--
 .../yacy/cora/document/Classification.java    | 29 +++++++++++++++++--
 .../yacy/cora/document/MultiProtocolURI.java  | 12 ++++++++
 source/net/yacy/document/Condenser.java       |  9 +++---
 source/net/yacy/document/Document.java        | 17 +++++++++++
 source/net/yacy/peers/Protocol.java           |  5 ++++
 source/net/yacy/peers/RemoteSearch.java       | 12 ++++----
 source/net/yacy/search/Switchboard.java       |  5 +++-
 source/net/yacy/search/query/RWIProcess.java  | 17 +++++------
 source/net/yacy/search/query/SearchEvent.java |  1 +
 .../net/yacy/search/query/SnippetProcess.java |  6 ++--
 16 files changed, 102 insertions(+), 31 deletions(-)

diff --git a/htroot/js/yacyinteractive.js b/htroot/js/yacyinteractive.js
index f7d979175..71bcde91d 100644
--- a/htroot/js/yacyinteractive.js
+++ b/htroot/js/yacyinteractive.js
@@ -36,7 +36,7 @@ function search(search, count, offset) {
   } else if (window.ActiveXObject) { // IE
     self.xmlHttpReq = new ActiveXObject("Microsoft.XMLHTTP");
   }
-  self.xmlHttpReq.open('GET', "yacysearch.json?verify=false&resource=local&nav=all&maximumRecords=" + maximumRecords + "&startRecord=" + startRecord + "&query=" + query, true);
+  self.xmlHttpReq.open('GET', "yacysearch.json?verify=false&resource=local&nav=all&contentdom=all&maximumRecords=" + maximumRecords + "&startRecord=" + startRecord + "&query=" + query, true);
   self.xmlHttpReq.setRequestHeader('Content-Type', 'application/x-www-form-urlencoded');
   self.xmlHttpReq.onreadystatechange = function() {
     if (self.xmlHttpReq.readyState == 4) {
diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java
index bb6265536..da33bc4ff 100644
--- a/htroot/yacy/search.java
+++ b/htroot/yacy/search.java
@@ -119,7 +119,7 @@ public final class search {
         final int     maxdist= post.getInt("maxdist", Integer.MAX_VALUE);
         final String  prefer = post.get("prefer", "");
         final String  modifier = post.get("modifier", "").trim();
-        final String  contentdom = post.get("contentdom", "text");
+        final String  contentdom = post.get("contentdom", "all");
         final String  filter = post.get("filter", ".*"); // a filter on the url
         final Pattern snippetPattern = Pattern.compile(post.get("snippet", ".*")); // a filter on the snippet
         String  sitehash = post.get("sitehash", ""); if (sitehash.length() == 0) sitehash = null;
diff --git a/htroot/yacyinteractive.html b/htroot/yacyinteractive.html
index f24120475..f0309e03c 100644
--- a/htroot/yacyinteractive.html
+++ b/htroot/yacyinteractive.html
@@ -56,6 +56,9 @@ To see a list of all APIs, please visit the <a href="http://www.yacy-websuche.de
   <fieldset class="yacys">
   <input type="hidden" name="maximumRecords" value="#[maximumRecords]#" />
   <input type="hidden" name="startRecord" value="#[startRecord]#" />
+  <input type="hidden" name="verify", value="false" />
+  <input type="hidden" name="resource", value="local" />
+  <input type="hidden" name="contentdom", value="all" />
   <input id="search" class="searchinput" name="query" type="text" value="#[query]#" size="40" maxlength="80" onFocus="this.select()" />
   #(allowrealtime)#
   <input id="Enter" type="submit" name="Enter" value="Search" />::
diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java
index c4ff6c123..8cb0468ba 100644
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@@ -272,7 +272,7 @@ public class yacysearch {
 
         // find search domain
         final Classification.ContentDomain contentdom =
-            ContentDomain.contentdomParser(post == null ? "text" : post.get("contentdom", "text"));
+            ContentDomain.contentdomParser(post == null ? "all" : post.get("contentdom", "all"));
 
         // patch until better search profiles are available
         if ( contentdom == ContentDomain.TEXT ) {
diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java
index 93577644a..f57ab8be5 100644
--- a/htroot/yacysearchitem.java
+++ b/htroot/yacysearchitem.java
@@ -105,7 +105,7 @@ public class yacysearchitem {
         prop.put("navurlBase", QueryParams.navurlBase("html", theQuery, null, theQuery.urlMask.toString(), theQuery.navigators).toString());
         final String target_special_pattern = sb.getConfig(SwitchboardConstants.SEARCH_TARGET_SPECIAL_PATTERN, "");
 
-        if (theQuery.contentdom == Classification.ContentDomain.TEXT) {
+        if (theQuery.contentdom == Classification.ContentDomain.TEXT || theQuery.contentdom == Classification.ContentDomain.ALL) {
             // text search
 
             // generate result object
diff --git a/source/de/anomic/crawler/CrawlStacker.java b/source/de/anomic/crawler/CrawlStacker.java
index 3c3c53cbc..6c5e96080 100644
--- a/source/de/anomic/crawler/CrawlStacker.java
+++ b/source/de/anomic/crawler/CrawlStacker.java
@@ -39,6 +39,7 @@ import java.util.Properties;
 import java.util.concurrent.BlockingQueue;
 
 import net.yacy.cora.document.ASCII;
+import net.yacy.cora.document.Classification.ContentDomain;
 import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.cora.document.UTF8;
 import net.yacy.cora.protocol.Domains;
@@ -353,9 +354,11 @@ public final class CrawlStacker {
 
         // check availability of parser and maxfilesize
         String warning = null;
-        if (entry.size() > maxFileSize /*||
-            (entry.url().getFileExtension().length() > 0 && TextParser.supports(entry.url(), null) != null)
-            */) {
+        if (entry.size() > maxFileSize ||
+            entry.url().getContentDomain() == ContentDomain.APP  ||
+            entry.url().getContentDomain() == ContentDomain.IMAGE  ||
+            entry.url().getContentDomain() == ContentDomain.AUDIO  ||
+            entry.url().getContentDomain() == ContentDomain.VIDEO ) {
             warning = this.nextQueue.noticeURL.push(NoticedURL.StackType.NOLOAD, entry);
             //if (warning != null) this.log.logWarning("CrawlStacker.stackCrawl of URL " + entry.url().toNormalform(true, false) + " - not pushed: " + warning);
             return null;
diff --git a/source/net/yacy/cora/document/Classification.java b/source/net/yacy/cora/document/Classification.java
index c70f4b1e0..f0caad698 100644
--- a/source/net/yacy/cora/document/Classification.java
+++ b/source/net/yacy/cora/document/Classification.java
@@ -24,11 +24,13 @@ import java.io.BufferedInputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.util.HashSet;
+import java.util.Map.Entry;
 import java.util.Properties;
 import java.util.Set;
 
 public class Classification {
 
+    private static final Set<String> textExtSet = new HashSet<String>();
     private static final Set<String> mediaExtSet = new HashSet<String>();
     private static final Set<String> imageExtSet = new HashSet<String>();
     private static final Set<String> audioExtSet = new HashSet<String>();
@@ -78,11 +80,13 @@ public class Classification {
 
     static {
 
-        final String apps = "7z,ace,arc,arj,apk,asf,asx,bat,bin,bkf,bz2,cab,com,css,dcm,deb,dll,dmg,exe,gho,ghs,gz,hqx,img,iso,jar,lha,rar,sh,sit,sitx,tar,tbz,tgz,tib,torrent,vbs,war,zip";
+        final String text = "htm,html,phtml,shtml,xhtml,php,php3,php4,php5,cfm,asp,aspx,tex,txt,jsp,mf,asp,aspx,csv,gpx,vcf,xsl,xml,pdf,doc,docx,xls,xlsx,ppt,pptx";
+        final String apps = "7z,ace,arc,arj,apk,asf,asx,bat,bin,bkf,bz2,cab,com,css,dcm,deb,dll,dmg,exe,java,gho,ghs,gz,hqx,img,iso,jar,lha,rar,sh,sit,sitx,tar,tbz,tgz,tib,torrent,vbs,war,zip";
         final String audio = "aac,aif,aiff,flac,m4a,m4p,mid,mp2,mp3,oga,ogg,ram,sid,wav,wma";
         final String video = "3g2,3gp,3gp2,3gpp,3gpp2,3ivx,asf,asx,avi,div,divx,dv,dvx,env,f4v,flv,hdmov,m1v,m4v,m-jpeg,moov,mov,movie,mp2v,mp4,mpe,mpeg,mpg,mpg4,mv4,ogm,ogv,qt,rm,rv,vid,swf,wmv";
         final String image = "ai,bmp,cdr,cmx,emf,eps,gif,img,jpeg,jpg,mng,pct,pdd,pdn,pict,png,psb,psd,psp,tif,tiff,wmf";
 
+        addSet(textExtSet, text); // image formats
         addSet(imageExtSet, image); // image formats
         addSet(audioExtSet, audio); // audio formats
         addSet(videoExtSet, video); // video formats
@@ -95,6 +99,11 @@ public class Classification {
         for (String s: extString.split(",")) set.add(s.toLowerCase().trim());
     }
 
+    public static boolean isTextExtension(String textExt) {
+        if (textExt == null) return false;
+        return textExtSet.contains(textExt.trim().toLowerCase());
+    }
+
     public static boolean isMediaExtension(String mediaExt) {
         if (mediaExt == null) return false;
         return mediaExtSet.contains(mediaExt.trim().toLowerCase());
@@ -120,12 +129,20 @@ public class Classification {
         return appsExtSet.contains(appsExt.trim().toLowerCase());
     }
 
+    public static ContentDomain getContentDomain(String ext) {
+        if (isTextExtension(ext)) return ContentDomain.TEXT;
+        if (isImageExtension(ext)) return ContentDomain.IMAGE;
+        if (isAudioExtension(ext)) return ContentDomain.AUDIO;
+        if (isVideoExtension(ext)) return ContentDomain.VIDEO;
+        if (isApplicationExtension(ext)) return ContentDomain.APP;
+        return ContentDomain.ALL;
+    }
+
     public static boolean isPictureMime(final String mimeType) {
         if (mimeType == null) return false;
         return mimeType.toUpperCase().startsWith("IMAGE");
     }
 
-
     private static final Properties mimeTable = new Properties();
 
     public static void init(final File mimeFile) {
@@ -140,6 +157,14 @@ public class Classification {
                 if (mimeTableInputStream != null) try { mimeTableInputStream.close(); } catch (final Exception e1) {}
             }
         }
+        for (Entry<Object, Object> entry: mimeTable.entrySet()) {
+            String ext = (String) entry.getKey();
+            String mime = (String) entry.getValue();
+            if (mime.startsWith("text/")) textExtSet.add(ext.toLowerCase());
+            if (mime.startsWith("audio/")) audioExtSet.add(ext.toLowerCase());
+            if (mime.startsWith("video/")) videoExtSet.add(ext.toLowerCase());
+            if (mime.startsWith("application/")) appsExtSet.add(ext.toLowerCase());
+        }
     }
 
     public static int countMimes() {
diff --git a/source/net/yacy/cora/document/MultiProtocolURI.java b/source/net/yacy/cora/document/MultiProtocolURI.java
index cc6d67d24..f0c79bb89 100644
--- a/source/net/yacy/cora/document/MultiProtocolURI.java
+++ b/source/net/yacy/cora/document/MultiProtocolURI.java
@@ -45,6 +45,7 @@ import java.util.regex.Pattern;
 import jcifs.smb.SmbException;
 import jcifs.smb.SmbFile;
 import jcifs.smb.SmbFileInputStream;
+import net.yacy.cora.document.Classification.ContentDomain;
 import net.yacy.cora.document.Punycode.PunycodeException;
 import net.yacy.cora.protocol.Domains;
 import net.yacy.cora.protocol.TimeoutRequest;
@@ -89,6 +90,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
     protected       String host, path, quest, ref;
     protected       int port;
     protected       InetAddress hostAddress;
+    protected       ContentDomain contentDomain;
 
     /**
      * initialization of a MultiProtocolURI to produce poison pills for concurrent blocking queues
@@ -101,6 +103,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
         this.path = null;
         this.quest = null;
         this.ref = null;
+        this.contentDomain = null;
         this.port = -1;
     }
 
@@ -116,6 +119,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
         this.path = url.path;
         this.quest = url.quest;
         this.ref = url.ref;
+        this.contentDomain = null;
         this.port = url.port;
     }
 
@@ -123,6 +127,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
         if (url == null) throw new MalformedURLException("url string is null");
 
         this.hostAddress = null;
+        this.contentDomain = null;
 
         // identify protocol
         assert (url != null);
@@ -258,6 +263,13 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
     public final boolean isFile()  { return this.protocol.equals("file"); }
     public final boolean isSMB()   { return this.protocol.equals("smb"); }
 
+    public final ContentDomain getContentDomain() {
+        if (this.contentDomain == null) {
+            this.contentDomain = Classification.getContentDomain(this.getFileExtension());
+        }
+        return this.contentDomain;
+    }
+
     public static MultiProtocolURI newURL(final String baseURL, final String relPath) throws MalformedURLException {
         if ((baseURL == null) ||
             isHTTP(relPath) ||
diff --git a/source/net/yacy/document/Condenser.java b/source/net/yacy/document/Condenser.java
index 9915a052d..9effd19f7 100644
--- a/source/net/yacy/document/Condenser.java
+++ b/source/net/yacy/document/Condenser.java
@@ -38,6 +38,7 @@ import java.util.SortedSet;
 import java.util.TreeMap;
 
 import net.yacy.cora.document.ASCII;
+import net.yacy.cora.document.Classification.ContentDomain;
 import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.cora.document.UTF8;
 import net.yacy.document.language.Identificator;
@@ -112,10 +113,10 @@ public final class Condenser {
         this.RESULT_FLAGS = new Bitfield(4);
 
         // construct flag set for document
-        if (!document.getImages().isEmpty())     this.RESULT_FLAGS.set(flag_cat_hasimage, true);
-        if (!document.getAudiolinks().isEmpty()) this.RESULT_FLAGS.set(flag_cat_hasaudio, true);
-        if (!document.getVideolinks().isEmpty()) this.RESULT_FLAGS.set(flag_cat_hasvideo, true);
-        if (!document.getApplinks().isEmpty())   this.RESULT_FLAGS.set(flag_cat_hasapp,   true);
+        if (document.dc_source().getContentDomain() == ContentDomain.IMAGE || !document.getImages().isEmpty())     this.RESULT_FLAGS.set(flag_cat_hasimage, true);
+        if (document.dc_source().getContentDomain() == ContentDomain.AUDIO || !document.getAudiolinks().isEmpty()) this.RESULT_FLAGS.set(flag_cat_hasaudio, true);
+        if (document.dc_source().getContentDomain() == ContentDomain.VIDEO || !document.getVideolinks().isEmpty()) this.RESULT_FLAGS.set(flag_cat_hasvideo, true);
+        if (document.dc_source().getContentDomain() == ContentDomain.APP   || !document.getApplinks().isEmpty())   this.RESULT_FLAGS.set(flag_cat_hasapp,   true);
         if (document.lat() != 0.0f && document.lon() != 0.0f) this.RESULT_FLAGS.set(flag_cat_haslocation, true);
 
         this.languageIdentificator = new Identificator();
diff --git a/source/net/yacy/document/Document.java b/source/net/yacy/document/Document.java
index 6f45ca449..dc2cb8be8 100644
--- a/source/net/yacy/document/Document.java
+++ b/source/net/yacy/document/Document.java
@@ -833,5 +833,22 @@ dc_rights
         return result;
     }
 
+    public static Map<MultiProtocolURI, String> getAudiolinks(final Document[] documents) {
+        final Map<MultiProtocolURI, String> result = new HashMap<MultiProtocolURI, String>();
+        for (final Document d: documents) result.putAll(d.audiolinks);
+        return result;
+    }
+
+    public static Map<MultiProtocolURI, String> getVideolinks(final Document[] documents) {
+        final Map<MultiProtocolURI, String> result = new HashMap<MultiProtocolURI, String>();
+        for (final Document d: documents) result.putAll(d.videolinks);
+        return result;
+    }
+
+    public static Map<MultiProtocolURI, String> getApplinks(final Document[] documents) {
+        final Map<MultiProtocolURI, String> result = new HashMap<MultiProtocolURI, String>();
+        for (final Document d: documents) result.putAll(d.applinks);
+        return result;
+    }
 
 }
diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java
index 6de5f4a00..c74b77f34 100644
--- a/source/net/yacy/peers/Protocol.java
+++ b/source/net/yacy/peers/Protocol.java
@@ -589,6 +589,7 @@ public final class Protocol
         final String language,
         final String sitehash,
         final String authorhash,
+        final String contentdom,
         final int count,
         final long time,
         final int maxDistance,
@@ -634,6 +635,7 @@ public final class Protocol
                     language,
                     sitehash,
                     authorhash,
+                    contentdom,
                     count,
                     time,
                     maxDistance,
@@ -893,6 +895,7 @@ public final class Protocol
             final String language,
             final String sitehash,
             final String authorhash,
+            final String contentdom,
             final int count,
             final long time,
             final int maxDistance,
@@ -945,6 +948,7 @@ public final class Protocol
             parts.put("language", UTF8.StringBody(language));
             parts.put("sitehash", UTF8.StringBody(sitehash));
             parts.put("authorhash", UTF8.StringBody(authorhash));
+            parts.put("contentdom", UTF8.StringBody(contentdom));
             parts.put("ttl", UTF8.StringBody("0"));
             parts.put("maxdist", UTF8.StringBody(Integer.toString(maxDistance)));
             parts.put("profile", UTF8.StringBody(crypt.simpleEncode(rankingProfile.toExternalString())));
@@ -1516,6 +1520,7 @@ public final class Protocol
                             "", // language,
                             "", // sitehash,
                             "", // authorhash,
+                            "all", // contentdom,
                             10, // count,
                             3000, // time,
                             1000, // maxDistance,
diff --git a/source/net/yacy/peers/RemoteSearch.java b/source/net/yacy/peers/RemoteSearch.java
index cfacb3f4c..0db60280c 100644
--- a/source/net/yacy/peers/RemoteSearch.java
+++ b/source/net/yacy/peers/RemoteSearch.java
@@ -46,7 +46,7 @@ public class RemoteSearch extends Thread {
 
     private static final ThreadGroup ysThreadGroup = new ThreadGroup("yacySearchThreadGroup");
 
-    final private String wordhashes, excludehashes, urlhashes, sitehash, authorhash;
+    final private String wordhashes, excludehashes, urlhashes, sitehash, authorhash, contentdom;
     final private boolean global;
     final private int partitions;
     final private Segment indexSegment;
@@ -72,7 +72,7 @@ public class RemoteSearch extends Thread {
               final Pattern snippet,
               final QueryParams.Modifier modifier,
               final String language,
-              final String sitehash, final String authorhash,
+              final String sitehash, final String authorhash, final String contentdom,
               final int count, final long time, final int maxDistance,
               final boolean global, final int partitions,
               final Seed targetPeer,
@@ -96,6 +96,7 @@ public class RemoteSearch extends Thread {
         this.language = language;
         this.sitehash = sitehash;
         this.authorhash = authorhash;
+        this.contentdom = contentdom;
         this.global = global;
         this.partitions = partitions;
         this.indexSegment = indexSegment;
@@ -120,7 +121,7 @@ public class RemoteSearch extends Thread {
                         this.peers.mySeed(),
                         this.wordhashes, this.excludehashes, this.urlhashes,
                         this.prefer, this.filter, this.snippet, this.modifier.getModifier(),
-                        this.language, this.sitehash, this.authorhash,
+                        this.language, this.sitehash, this.authorhash, this.contentdom,
                         this.count, this.time, this.maxDistance, this.global, this.partitions,
                         this.targetPeer, this.indexSegment, this.containerCache, this.secondarySearchSuperviser,
                         this.blacklist, this.rankingProfile, this.constraint);
@@ -166,6 +167,7 @@ public class RemoteSearch extends Thread {
             final String language,
             final String sitehash,
             final String authorhash,
+            final String contentdom,
             final int count, final long time, final int maxDist,
             final Segment indexSegment,
             final SeedDB peers,
@@ -200,7 +202,7 @@ public class RemoteSearch extends Thread {
             try {
                 RemoteSearch rs = new RemoteSearch(
                     wordhashes, excludehashes, "", prefer, filter, snippet, modifier,
-                    language, sitehash, authorhash,
+                    language, sitehash, authorhash, contentdom,
                     count, time, maxDist, true, targets, targetPeers[i],
                     indexSegment, peers, containerCache, secondarySearchSuperviser, blacklist, rankingProfile, constraint);
                 rs.start();
@@ -233,7 +235,7 @@ public class RemoteSearch extends Thread {
         if (targetPeer == null || targetPeer.hash == null) return null;
         if (clusterselection != null) targetPeer.setAlternativeAddress(clusterselection.get(ASCII.getBytes(targetPeer.hash)));
         final RemoteSearch searchThread = new RemoteSearch(
-                wordhashes, "", urlhashes, QueryParams.matchnothing_pattern, QueryParams.catchall_pattern, QueryParams.catchall_pattern, new QueryParams.Modifier(""), "", "", "", 20, time, 9999, true, 0, targetPeer,
+                wordhashes, "", urlhashes, QueryParams.matchnothing_pattern, QueryParams.catchall_pattern, QueryParams.catchall_pattern, new QueryParams.Modifier(""), "", "", "", "all", 20, time, 9999, true, 0, targetPeer,
                 indexSegment, peers, containerCache, null, blacklist, rankingProfile, constraint);
         searchThread.start();
         return searchThread;
diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java
index 291c83cbc..ce4252d9b 100644
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@@ -2355,8 +2355,11 @@ public final class Switchboard extends serverSwitch
             // get the hyperlinks
             final Map<MultiProtocolURI, String> hl = Document.getHyperlinks(documents);
 
-            // add all images also to the crawl stack
+            // add all media links also to the crawl stack. They will be re-sorted to the NOLOAD queue and indexed afterwards as pure links
             hl.putAll(Document.getImagelinks(documents));
+            hl.putAll(Document.getApplinks(documents));
+            hl.putAll(Document.getVideolinks(documents));
+            hl.putAll(Document.getAudiolinks(documents));
 
             // insert those hyperlinks to the crawler
             MultiProtocolURI nextUrl;
diff --git a/source/net/yacy/search/query/RWIProcess.java b/source/net/yacy/search/query/RWIProcess.java
index dce9fcf8e..648bdedc2 100644
--- a/source/net/yacy/search/query/RWIProcess.java
+++ b/source/net/yacy/search/query/RWIProcess.java
@@ -289,7 +289,7 @@ public final class RWIProcess extends Thread
                 }
 
                 // check document domain
-                if ( this.query.contentdom != Classification.ContentDomain.TEXT ) {
+                if ( this.query.contentdom != Classification.ContentDomain.ALL ) {
                     if ( (this.query.contentdom == ContentDomain.AUDIO)
                         && (!(iEntry.flags().get(Condenser.flag_cat_hasaudio))) ) {
                         continue pollloop;
@@ -588,6 +588,12 @@ public final class RWIProcess extends Thread
                 continue; // rare case where the url is corrupted
             }
 
+            // check content domain
+            if (this.query.contentdom != Classification.ContentDomain.ALL && page.url().getContentDomain() != this.query.contentdom) {
+                this.sortout++;
+                continue;
+            }
+
             final String pageurl = page.url().toNormalform(true, true);
             final String pageauthor = page.dc_creator();
             final String pagetitle = page.dc_title().toLowerCase();
@@ -620,15 +626,6 @@ public final class RWIProcess extends Thread
                 continue;
             }
 
-            // check content domain
-            if ( (this.query.contentdom == ContentDomain.AUDIO && page.laudio() == 0)
-                || (this.query.contentdom == ContentDomain.VIDEO && page.lvideo() == 0)
-                || (this.query.contentdom == ContentDomain.IMAGE && page.limage() == 0)
-                || (this.query.contentdom == ContentDomain.APP && page.lapp() == 0) ) {
-                this.sortout++;
-                continue;
-            }
-
             // check vocabulary constraint
             final String tags = page.dc_subject();
             final String[] taglist = tags == null || tags.length() == 0 ? new String[0] : SPACE_PATTERN.split(page.dc_subject());
diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java
index 5e2887812..e49f072e8 100644
--- a/source/net/yacy/search/query/SearchEvent.java
+++ b/source/net/yacy/search/query/SearchEvent.java
@@ -176,6 +176,7 @@ public final class SearchEvent
                             SearchEvent.this.query.targetlang == null ? "" : SearchEvent.this.query.targetlang,
                             SearchEvent.this.query.sitehash == null ? "" : SearchEvent.this.query.sitehash,
                             SearchEvent.this.query.authorhash == null ? "" : SearchEvent.this.query.authorhash,
+                            SearchEvent.this.query.contentdom == null ? "all" : SearchEvent.this.query.contentdom.toString(),
                             remote_maxcount,
                             remote_maxtime,
                             SearchEvent.this.query.maxDistance,
diff --git a/source/net/yacy/search/query/SnippetProcess.java b/source/net/yacy/search/query/SnippetProcess.java
index 745b113b2..554f0cca6 100644
--- a/source/net/yacy/search/query/SnippetProcess.java
+++ b/source/net/yacy/search/query/SnippetProcess.java
@@ -580,7 +580,7 @@ public class SnippetProcess {
         }
 
         // load snippet
-        if (this.query.contentdom == Classification.ContentDomain.TEXT) {
+        if (page.url().getContentDomain() == Classification.ContentDomain.TEXT) {
             // attach text snippet
             startTime = System.currentTimeMillis();
             final TextSnippet snippet = new TextSnippet(
@@ -612,7 +612,7 @@ public class SnippetProcess {
                 Log.logInfo("SEARCH", "sorted out url " + page.url().toNormalform(true, false) + " during search: " + reason);
                 return null;
             }
-        } else {
+        } else if (page.url().getContentDomain() == Classification.ContentDomain.IMAGE) {
             // attach media information
             startTime = System.currentTimeMillis();
             final List<MediaSnippet> mediaSnippets = MediaSnippet.retrieveMediaSnippets(page.url(), this.snippetFetchWordHashes, this.query.contentdom, cacheStrategy, 6000, !this.query.isLocal());
@@ -633,6 +633,8 @@ public class SnippetProcess {
                 Log.logInfo("SEARCH", "sorted out url " + page.url().toNormalform(true, false) + " during search: " + reason);
                 return null;
             }
+        } else {
+            return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, dbRetrievalTime, 0); // result without snippet
         }
         // finished, no more actions possible here
     }