fix for www.gov.uk having iswwwdup bug.

because .gov.uk is a tld and so is .uk.
also added code to handle seg fault signals
better and run the default handler after saving
rather than calling abort(). hopefully a core
will be dumped all the time now.
This commit is contained in:
Matt Wells 2015-12-09 15:54:53 -08:00
parent 58eb71e4c1
commit 27e49df739
2 changed files with 18 additions and 1 deletions

@ -1666,9 +1666,16 @@ bool Process::shutdown2 ( ) {
if ( setrlimit(RLIMIT_CORE,&lim) )
log("gb: setrlimit: %s.", mstrerror(errno) );
// this is the trick: it will trigger the core dump by
// calling the original SIGSEGV handler.
int signum = SIGSEGV;
signal(signum, SIG_DFL);
kill(getpid(), signum);
// default handler should be called after we return now
// . force an abnormal termination which will cause a core dump
// . do not dump core on SIGHUP signals any more though
abort();
//abort();
// keep compiler happy
return true;
}

@ -15160,6 +15160,16 @@ char *XmlDoc::getIsWWWDup ( ) {
return &m_isWWWDup;
}
// watch out for idiot urls like www.gov.uk and www.gov.za
// treat them as though the TLD is uk/za and the domain
// is gov.uk and gov.za
if ( u->getDomain() &&
strncmp ( u->getDomain() , "www." , 4 ) == 0 ) {
m_isWWWDup = false;
m_isWWWDupValid = true;
return &m_isWWWDup;
}
// make it without the www
char withoutWWW[MAX_URL_LEN+1];
char *proto = "http";