mirror of
https://github.com/yacy/yacy_search_server.git
synced 2025-07-19 08:44:42 -04:00
Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git
This commit is contained in:
@ -218,7 +218,7 @@ public class yacysearchitem {
|
||||
prop.putHTML("content_publisher", result.publisher());
|
||||
prop.putHTML("content_creator", result.creator());// author
|
||||
prop.putHTML("content_subject", result.subject());
|
||||
final List<String> query = theSearch.query.getQueryGoal().getAllStrings();
|
||||
final List<String> query = theSearch.query.getQueryGoal().getIncludeStrings();
|
||||
final StringBuilder s = new StringBuilder(query.size() * 20);
|
||||
for (final String t: query) {
|
||||
s.append('+').append(t);
|
||||
|
@ -49,18 +49,16 @@ public class QueryGoal {
|
||||
|
||||
private String query_original;
|
||||
private HandleSet include_hashes, exclude_hashes;
|
||||
private final ArrayList<String> include_words, exclude_words, all_words;
|
||||
private final ArrayList<String> include_strings, exclude_strings, all_strings;
|
||||
private final ArrayList<String> include_words, exclude_words;
|
||||
private final ArrayList<String> include_strings, exclude_strings;
|
||||
|
||||
|
||||
public QueryGoal(HandleSet include_hashes, HandleSet exclude_hashes) {
|
||||
this.query_original = null;
|
||||
this.include_words = new ArrayList<String>();
|
||||
this.exclude_words = new ArrayList<String>();
|
||||
this.all_words = new ArrayList<String>();
|
||||
this.include_strings = new ArrayList<String>();
|
||||
this.exclude_strings = new ArrayList<String>();
|
||||
this.all_strings = new ArrayList<String>();
|
||||
this.include_hashes = include_hashes;
|
||||
this.exclude_hashes = exclude_hashes;
|
||||
}
|
||||
@ -71,10 +69,8 @@ public class QueryGoal {
|
||||
this.query_original = query_original;
|
||||
this.include_words = new ArrayList<String>();
|
||||
this.exclude_words = new ArrayList<String>();
|
||||
this.all_words = new ArrayList<String>();
|
||||
this.include_strings = new ArrayList<String>();
|
||||
this.exclude_strings = new ArrayList<String>();
|
||||
this.all_strings = new ArrayList<String>();
|
||||
|
||||
// remove funny symbols
|
||||
query_words = CharacterCoding.html2unicode(AbstractScraper.stripAllTags(query_words.toCharArray())).toLowerCase().trim();
|
||||
@ -86,11 +82,11 @@ public class QueryGoal {
|
||||
}
|
||||
|
||||
// parse first quoted strings
|
||||
parseQuery(query_words, this.include_strings, this.exclude_strings, this.all_strings);
|
||||
|
||||
parseQuery(query_words, this.include_strings, this.exclude_strings);
|
||||
|
||||
// .. end then take these strings apart to generate word lists
|
||||
for (String s: this.include_strings) parseQuery(s, this.include_words, this.include_words, this.all_words);
|
||||
for (String s: this.exclude_strings) parseQuery(s, this.exclude_words, this.exclude_words, this.all_words);
|
||||
for (String s: this.include_strings) parseQuery(s, this.include_words, this.include_words);
|
||||
for (String s: this.exclude_strings) parseQuery(s, this.exclude_words, this.exclude_words);
|
||||
|
||||
WordCache.learn(this.include_strings);
|
||||
WordCache.learn(this.exclude_strings);
|
||||
@ -111,7 +107,7 @@ public class QueryGoal {
|
||||
* sq = '\''
|
||||
* dq = '"'
|
||||
*/
|
||||
private static void parseQuery(String s, ArrayList<String> include_string, ArrayList<String> exclude_string, ArrayList<String> all_string) {
|
||||
private static void parseQuery(String s, ArrayList<String> include_string, ArrayList<String> exclude_string) {
|
||||
while (s.length() > 0) {
|
||||
// parse query
|
||||
int p = 0;
|
||||
@ -135,7 +131,6 @@ public class QueryGoal {
|
||||
p++; // go behind the stop character (eats up space, sq and dq)
|
||||
s = p < s.length() ? s.substring(p) : "";
|
||||
if (string.length() > 0) {
|
||||
if (!all_string.contains(string)) all_string.add(string);
|
||||
if (inc) {
|
||||
if (!include_string.contains(string)) include_string.add(string);
|
||||
} else {
|
||||
@ -195,11 +190,7 @@ public class QueryGoal {
|
||||
for (String e: this.exclude_strings) if (t.indexOf(e.toLowerCase()) >= 0) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStrings() {
|
||||
return all_strings;
|
||||
}
|
||||
|
||||
|
||||
public void filterOut(final SortedSet<String> blueList) {
|
||||
// filter out words that appear in this set
|
||||
// this is applied to the queryHashes
|
||||
|
Reference in New Issue
Block a user