Unittest for Domains.cpp; added initialize/finalize for Domains Test

This commit is contained in:
Ivan Skytte Jørgensen
2018-04-23 13:11:46 +02:00
parent 518f60803d
commit c93ca32eac
6 changed files with 63 additions and 36 deletions

@ -101,16 +101,17 @@ const char *getTLD ( const char *host , int32_t hostLen ) {
static HashTableX s_table;
static bool s_isInitialized = false;
static GbMutex s_tableMutex;
#include "tlds.inc"
static bool loadTLDs() {
FILE *fp = fopen("tlds.txt","r");
static bool loadTLDs(const char *data_dir) {
char full_filename[1024];
sprintf(full_filename,"%s/tlds.txt", data_dir);
FILE *fp = fopen(full_filename,"r");
if(!fp)
return false;
log(LOG_DEBUG,"build: Loading TLDs from 'tlds.txt'");
log(LOG_DEBUG,"build: Loading TLDs from '%s'",full_filename);
int num_tlds_loaded = 0;
char line[128];
while(fgets(line,sizeof(line),fp)) {
@ -118,7 +119,7 @@ static bool loadTLDs() {
if(s) *s='\0';
s = strchr(line,'#');
if(s) *s='\0';
if(!s || isspace(*s))
if(s && isspace(*s))
continue;
size_t dlen = strlen(line);
@ -132,39 +133,32 @@ static bool loadTLDs() {
num_tlds_loaded++;
}
fclose(fp);
log(LOG_DEBUG,"build: Loading %d TLDs from 'tlds.txt'", num_tlds_loaded);
log(LOG_DEBUG,"build: Loading %d TLDs from '%s'", num_tlds_loaded,full_filename);
return true;
}
static bool initializeTLDTable() {
ScopedLock sl(s_tableMutex);
if(!s_isInitialized) {
if(!s_table.set(8, 0, sizeof(s_tlds)*2,NULL,0,false, "tldtbl")) {
log(LOG_WARN, "build: Could not init table of TLDs.");
return false;
}
static bool initializeTLDTable(const char *data_dir) {
if(!s_table.set(8, 0, sizeof(s_tlds)*2,NULL,0,false, "tldtbl")) {
log(LOG_WARN, "build: Could not init table of TLDs.");
return false;
}
if(!loadTLDs()) {
//use burned-in default
for(int32_t i = 0; s_tlds[i]; i++) {
const char *d = s_tlds[i];
int32_t dlen = strlen (d);
int64_t dh = hash64Lower_a(d, dlen);
if(!s_table.addKey (&dh,NULL)) {
log( LOG_WARN, "build: dom table failed");
return false;
}
if(!loadTLDs(data_dir)) {
//use burned-in default
for(int32_t i = 0; s_tlds[i]; i++) {
const char *d = s_tlds[i];
int32_t dlen = strlen (d);
int64_t dh = hash64Lower_a(d, dlen);
if(!s_table.addKey (&dh,NULL)) {
log( LOG_WARN, "build: dom table failed");
return false;
}
}
s_isInitialized = true;
}
}
return true;
}
static bool isTLDForUrl(const char *tld, int32_t tldLen) {
if(!initializeTLDTable())
return false;
int32_t pcount = 0;
for ( int32_t i = 0 ; i < tldLen ; i++ ) {
// period count
@ -185,14 +179,15 @@ static bool isTLDForUrl(const char *tld, int32_t tldLen) {
bool isTLD(const char *tld, int32_t tldLen) {
if(!initializeTLDTable())
return false;
int64_t h = hash64Lower_a(tld, tldLen);
return s_table.isInTable(&h);
}
void resetDomains ( ) {
s_table.reset();
s_isInitialized = false;
bool initializeDomains(const char *data_dir) {
return initializeTLDTable(data_dir);
}
void finalizeDomains() {
s_table.reset();
}

@ -18,4 +18,7 @@ const char *getTLD ( const char *host , int32_t hostLen ) ;
//is the string (com or co.uk) a known TLD?
bool isTLD(const char *tld, int32_t tldLen);
bool initializeDomains(const char *data_dir);
void finalizeDomains();
#endif // GB_DOMAINS_H

@ -29,6 +29,7 @@
#include "StopWords.h"
#include "Wiki.h"
#include "Wiktionary.h"
#include "Domains.h"
#include "Proxy.h"
#include "Rebalance.h"
#include "SpiderProxy.h"
@ -59,7 +60,6 @@ bool g_inAutoSave;
extern void resetPageAddUrl ( );
extern void resetHttpMime ( );
extern void reset_iana_charset ( );
extern void resetDomains ( );
extern void resetEntities ( );
extern void resetQuery ( );
extern void resetAbbrTable ( );
@ -1027,7 +1027,7 @@ void Process::resetAll ( ) {
resetPageAddUrl();
resetHttpMime();
reset_iana_charset();
resetDomains();
finalizeDomains();
resetEntities();
resetQuery();
resetAbbrTable();

@ -46,6 +46,7 @@
#include "Wiktionary.h" // g_wiktionary
#include "WordVariations.h"
#include "CountryCode.h"
#include "Domains.h"
#include "Pos.h"
#include "Title.h"
#include "Speller.h"
@ -203,7 +204,6 @@ extern void resetPageAddUrl ( );
extern void resetHttpMime ( );
extern void reset_iana_charset ( );
extern void resetAdultBit ( );
extern void resetDomains ( );
extern void resetEntities ( );
extern void resetQuery ( );
@ -1234,6 +1234,11 @@ int main2 ( int argc , char *argv[] ) {
return 1;
}
if(!initializeDomains(g_hostdb.m_dir)) {
log( LOG_ERROR, "Domains initialization failed!" );
return 1;
}
// shout out if we're in read only mode
if ( g_conf.m_readOnlyMode )
log("db: -- Read Only Mode Set. Can Not Add New Data. --");

23
test/unit/DomainsTest.cpp Normal file

@ -0,0 +1,23 @@
#include <gtest/gtest.h>
#include "Domains.h"
#include <stdio.h>
TEST(DomainsTest, one_and_only_test) {
//this test relies on sensible data in tlds.txt
FILE *fp=fopen("tlds.txt","w");
ASSERT_TRUE(fp!=NULL);
fprintf(fp,"#test line 1\n");
fprintf(fp,"com\n");
fprintf(fp,"co.uk\n");
fprintf(fp,"boo\n");
fprintf(fp,"\n");
fprintf(fp,"hemorroid\n");
fclose(fp);
ASSERT_TRUE(initializeDomains("."));
ASSERT_TRUE(isTLD("com",3));
ASSERT_TRUE(isTLD("co.uk",5));
ASSERT_FALSE(isTLD("foo.boo.goo",11));
ASSERT_TRUE(isTLD("hemorroid",9));
}

@ -18,6 +18,7 @@ OBJECTS = GigablastTest.o GigablastTestUtils.o \
UnicodeTest.o UrlBlockCheckTest.o UrlComponentTest.o UrlMatchListTest.o UrlParserTest.o UrlTest.o \
WordsTest.o \
XmlDocTest.o XmlTest.o \
DomainsTest.o \
.PHONY: all
all: $(TARGET)