privacore-open-source-searc.../Dns.cpp

2616 lines
87 KiB
C++

#include "Dns.h"
#include "HashTableT.h"
#include "Process.h"
#include "File.h"
#include "Conf.h"
#include "Hostdb.h"
#include "Dns_internals.h"
#include "ip.h"
#include "Mem.h"
#include "Errno.h"
#include "hash.h"
#include "utf8_fast.h"
#include "gbmemcpy.h"
#include <fcntl.h>
#include <cctype>
// comment out the following line to disable DNS TLD caching
// TLD caching seems to give about 15% performance increase over not caching.
// it has been pretty thoroughly tested, but if there is a problem,
// feel free to disable it.
#define DNS_TLD_CACHE
// See section 7. RESOLVER IMPLEMENTATION in the rfc 1035
// TODO: use the canonical name as a normalization!!
#define MAX_DEPTH 18 // we can have a lot of CNAME aliases w/ akamai
#define MAX_TRIED_IPS 32 // stop after asking 32 nameservers, return timed out
#define LOOP_BUF_SIZE 26100
#define MAX_DNS_HOSTNAME_LEN 127
// use a default of 1 day for both caches
#define DNS_CACHE_MAX_AGE (60*60*24)
struct DnsState {
key96_t m_hostnameKey;
// key for lookup into s_dnsTable hash table
int64_t m_tableKey;
Dns *m_this ;
void *m_state;
void (*m_callback)(void *state, int32_t ip);
bool m_freeit;
char m_hostname[MAX_DNS_HOSTNAME_LEN+1];
// . point to the replies received from dns servers
// . m_dnsNames[] should point into these reply buffers
//char *m_replyBufPtrs[6];
//int32_t m_numReplies;
// we can do a recursion up to 5 levels deep. sometimes the reply
// we get back is a list of ips of nameservers we need to ask.
// that can happen a few times in a row, and we have to keep track
// of the depth here. initially we set these ips to those of the
// root servers (or sometimes the local bind servers).
bool m_rootTLD [MAX_DEPTH];
int32_t m_fallbacks[MAX_DEPTH];
int32_t m_dnsIps [MAX_DEPTH][MAX_DNS_IPS];
int32_t m_numDnsIps[MAX_DEPTH];
int32_t m_depth; // current depth
// . use these nameservers to do the lookup
// . if not provided, they default to the root nameservers
// . the first one we ask is based on hash of hostname % m_numDns,
// if that times out, the 2nd is right after the first, etc. so we
// always stay in order.
// . m_dnsNames point into m_nameBuf, m_namePtr pts to the end of
// m_nameBuf so you can add new names to it.
// m_dnsNames are NULLified when getIPOfDNS() get their ip address
// which is then added to m_dnsIps[]
char *m_dnsNames [MAX_DEPTH][MAX_DNS_IPS];
int32_t m_numDnsNames [MAX_DEPTH];
char m_nameBuf [512];
char *m_nameBufPtr;
char *m_nameBufEnd;
// this holds the one and only dns request
char m_request[512];
int32_t m_requestSize;
// after we send to a nameserver add its ip to this list so we don't
// send to it again. or so we do not even add it to m_dnsIps again.
int32_t m_triedIps[MAX_TRIED_IPS];
int32_t m_numTried;
// if we have to get the ip of the dns, then we get back more dns
// that refer to that dns and we have to get the ip of those, ... etc.
// for getting the ip of a dns we cast m_buf as a DnsState and use
// that to avoid having to allocate more memory. however, we have to
// keep track of how many times we do that recursively until we run
// out of m_buf.
int32_t m_loopCount;
// set to EDNSDEAD (hostname does not exist) if we encounter that
// error, however, we continue to ask other dns servers about the
// hostname because we can often uncover the ip address that way.
// but if we never do, we want to return this error, not ETIMEDOUT.
int32_t m_errno;
// we have to turn it off in some requests for some reason
// like for www.fsis.usda.gov, othewise we get a refused to talk error
bool m_recursionDesired;
// have a total timeout function
int32_t m_startTime;
char m_buf[LOOP_BUF_SIZE];
};
// a global class extern'd in .h file
Dns g_dns;
static RdbCache g_timedoutCache;
static int64_t s_antiLockCount = 1LL;
#define TIMEOUT_SINGLE_HOST_MS 30000
#define TIMEOUT_TOTAL 90
// CallbackEntry now defined in HashTableT.cpp
static HashTableT<int64_t,CallbackEntry> s_dnstable;
static HashTableT<uint32_t,TLDIPEntry> s_TLDIPtable;
Dns::Dns() {
m_ips = NULL;
m_keys = NULL;
m_numSlots = 0;
m_dnsClientPort = 0;
}
// reset the udp server and rdb cache
void Dns::reset() {
log("db: resetting dns");
m_udpServer.reset();
m_rdbCache.reset();
g_timedoutCache.reset();
s_dnstable.reset();
s_TLDIPtable.reset();
m_rdbCacheLocal.reset();
// free hash table of /etc/hosts
if ( m_ips ) mfree ( m_ips , m_numSlots*4 , "Dns");
if ( m_keys ) mfree ( m_keys , m_numSlots*sizeof(key96_t), "Dns");
m_ips = NULL;
m_keys = NULL;
m_numSlots = 0;
}
// . port will be incremented if already in use
// . use 1 socket for recving and sending
// . we can use a shared rdb cache
// . we use the dbId to separate our cache entries from other db's entries
bool Dns::init ( uint16_t clientPort ) {
// get primary dns server info from the conf class
m_dnsClientPort = clientPort; // g_conf.m_dnsClientPort;
// set the name of the cache. it will save to WORKDIR/{name}.dat
int32_t maxMem = g_conf.m_dnsMaxCacheMem ;
// . how many nodes in cache tree can we fit?
// . each rec is key (12) and ip(4)
// . overhead in cache is 56
// . that makes 56 + 4 = 60
// . not correct? stats suggest it's less than 25 bytes each
int32_t maxCacheNodes = maxMem / 25;
// make a copy of our protocol to pass to udp server
// static DnsProtocol proto;
// set the cache
if ( ! m_rdbCache.init ( maxMem ,
4 , // fixed data size of rec
maxCacheNodes ,
"dns" , // dbname
true, // save cache to disk?
12, //cachekeysize
-1 )) { // numPtrsMax
log( LOG_ERROR, "dns: Cache init failed." );
return false;
}
// make a copy of our protocol to pass to udp server
// static DnsProtocol proto;
// set the cache
int32_t maxMemLocal = 100000;
if ( ! m_rdbCacheLocal.init ( maxMemLocal ,
4 , // fixed data size of rec
maxMemLocal/25,
"dnsLocal" , // dbname
true, // save cache to disk?
12, //cachekeysize
-1 )) { // numPtrsMax
log( LOG_ERROR, "dns: Cache local init failed." );
return false;
}
// . set the port, proto and hostmap in our udpServer
// . poll for timeouts every 11 seconds (11000 milliseconds)
if ( ! m_udpServer.init ( m_dnsClientPort,
&m_proto ,
64000 ,// sock read buf
32000 ,// sock write buf
500 ,//polltime(.5secs)
500 ,//maxudpslots
true )) { // is dns?
log( LOG_ERROR, "dns: Udp server init failed." );
return false;
}
// innocent log msg
log ( LOG_INIT,"dns: Sending requests on client port %" PRIu16, (uint16_t)m_dnsClientPort );
for ( int32_t i = 0 ; i < g_conf.m_numDns ; i++ ) {
if ( !g_conf.m_dnsIps[i] ) continue;
char ipbuf[16];
log ( LOG_INIT, "dns: Using nameserver %s:%i.",
iptoa(g_conf.m_dnsIps[i],ipbuf) , g_conf.m_dnsPorts[i] );
}
// . only init the timedout cache once
// . cache the dns servers' ips who timeout on us so we don't slow
// things down. later we can turn this into a "speed" cache, so
// we ask the fastest servers first.
static bool s_init = false;
if ( s_init ) return true;
// just 30k for this little guy
int32_t maxCacheMem = 30000;
maxCacheNodes = maxCacheMem / 25;
g_timedoutCache.init ( maxCacheMem ,
4 , // fixed data size of rec
maxCacheNodes ,
"dnstimedout" , // dbname
true, // save cache to disk?
12, //cachekeysize
-1); // numPtrsMax
return true;
}
static bool isTimedOut(int32_t ip) {
// is this ip address in the "timed out" cache. if so,
// then do not try again for at least 1 hour
char *rec;
int32_t recSize;
int32_t maxAge = 3600; // 1 hour in seconds
key96_t k;
k.n0 = 0LL;
k.n1 = ip;
RdbCacheLock rcl(g_timedoutCache);
bool inCache = g_timedoutCache.getRecord ( (collnum_t)0 ,
k , // key
&rec ,
&recSize,
false ,//do copy?
maxAge ,
true );//inc cnt
return inCache;
}
inline bool parseTLD(DnsState* ds, char* buf, int32_t* len) {
// parse out the TLD
const char* hostname= ds->m_hostname;
const char* cbeg = hostname + strlen(hostname);
const char* cend = cbeg;
bool found = false;
char* curs;
while (cbeg > hostname) {
if (*cbeg == '.') {
cbeg++;
found = true;
if (cend - cbeg > *len - 1)
return false;
*len = cend - cbeg;
gbmemcpy(buf, cbeg, *len);
buf[*len] = '\0';
for (curs = buf; *curs; curs++)
*curs = to_lower_a(*curs);
//log(LOG_DEBUG,"dns: parseTLD found '%s'", buf);
return true;
}
cbeg--;
}
return found;
}
inline uint32_t TLDIPKey(char* buf, int32_t len) {
// build "key" for TLD hash table
uint32_t key;
key = 0;
if (len > 4)
len = 4;
gbmemcpy(&key, buf, len);
return key;
}
#ifdef DNS_TLD_CACHE
// returns NULL if we had a TLD cache miss
// returns ptr if we had a TLD cache hit
// adjusts ds->m_depth and ds->m_dnsIps on a hit
static const TLDIPEntry* getTLDIP(DnsState* ds) {
//log(LOG_DEBUG, "dns: getTLDIP entry");
char buf[64];
int32_t len = sizeof(buf);
if (!parseTLD(ds, buf, &len)) {
log(LOG_WARN, "dns: unable to determine TLD for %s",
ds->m_hostname);
return NULL;
}
uint32_t key = TLDIPKey(buf, len);
if (key == 0) {
log(LOG_WARN, "dns: getTLDIP invalid key");
return NULL;
}
TLDIPEntry* tldip = s_TLDIPtable.getValuePointer(key);
if (tldip == NULL) {
//log(LOG_DEBUG, "dns: getTLDIP not in cache");
return NULL;
}
// JAB: 2038
if (tldip->expiry <= time(NULL)) {
log(LOG_DEBUG, "dns: getTLDIP expired for %s", ds->m_hostname);
return NULL;
}
log(LOG_DEBUG,"dns: TLD cache hit .%s NS depth %" PRId32" for %s.",
buf, (int32_t) ds->m_depth, ds->m_hostname);
return tldip;
}
static void dumpTLDIP( const char* tld,
TLDIPEntry* tldip) {
for (int32_t i = 0; i < tldip->numTLDIPs; i++) {
char ipbuf[16];
log(LOG_DEBUG, "dns: .%s TLD IP %s",
tld, iptoa(tldip->TLDIP[i],ipbuf));
}
}
static void setTLDIP( DnsState* ds,
TLDIPEntry* tldip) {
// see if it's already in cache (i.e. poss. expired).
const TLDIPEntry* cached = getTLDIP(ds);
int32_t now = time(NULL);
// expire TLD NS in 24 hours
tldip->expiry = now + 3600 * 24;
char buf[64];
int32_t len = sizeof(buf);
if (!parseTLD(ds, buf, &len)) {
log(LOG_WARN, "dns: unable to determine TLD for %s",
ds->m_hostname);
return;
}
char tld[64];
gbmemcpy(tld, buf, len);
tld[len] = '\0';
if (cached == NULL) {
uint32_t key = TLDIPKey(buf, len);
if (!s_TLDIPtable.addKey(key, *tldip)) {
log(LOG_WARN, "dns: unable to add %s to TLD cache",
ds->m_hostname);
return;
}
log(LOG_DEBUG, "dns: TLD .%s NS cache add", buf);
dumpTLDIP(tld, tldip);
}
else if (cached->expiry <= now) {
// JAB: non-const cast...
gbmemcpy((TLDIPEntry*) cached, tldip, sizeof(TLDIPEntry));
log(LOG_DEBUG, "dns: TLD .%s NS cache update", buf);
dumpTLDIP(tld, tldip);
}
else {
//log(LOG_DEBUG, "dns: TLD cache up-to-date");
}
}
#else // DNS_TLD_CACHE
// code that is run when TLDIP cache is disabled...
static const TLDIPEntry* getTLDIP(DnsState* ds) {
return NULL;
}
#endif // DNS_TLD_CACHE
// . returns true and sets g_errno on error
// . returns false if transaction blocked, true if completed
// . returns false if you must wait
// . calls gotIp with ip when it gets it or timesOut or errors out
// . sets *ip to 0 if none (does not exist)
// . sets *ip to -1 and sets g_errno if there was an error
bool Dns::getIp ( const char *hostname,
int32_t hostnameLen ,
int32_t *ip ,
void *state ,
void (* callback ) ( void *state , int32_t ip ) ,
DnsState *ds ,
int32_t timeout ,
bool dnsLookup) {
// . don't accept large hostnames
// . technically the limit is 255 but i'm stricter
if ( hostnameLen > MAX_DNS_HOSTNAME_LEN ) {
g_errno = EHOSTNAMETOOBIG;
log("dns: Asked to get IP of hostname over %d characters long.", MAX_DNS_HOSTNAME_LEN);
*ip=0;
return true;
}
// debug msg
char tmp[MAX_DNS_HOSTNAME_LEN+1];
gbmemcpy ( tmp , hostname , hostnameLen );
tmp [ hostnameLen ] = '\0';
log(LOG_DEBUG, "dns: hostname '%s'", tmp);
// assume no error
g_errno = 0;
// only g_dnsDistributed should be calling this, not g_dnsLocal
if ( this != &g_dns ) { g_process.shutdownAbort(true); }
// not thread safe
//if ( g_threads.amThread() ) { g_process.shutdownAbort(true); }
if ( hostnameLen <= 0 ) {
log(LOG_LOGIC,"dns: Asked to get IP of zero length hostname.");
*ip = 0;
return true;
}
// if url is already in a.b.c.d format return that
*ip = atoip ( hostname , hostnameLen );
if ( *ip != 0 ) {
log(LOG_DEBUG, "dns: IP address passed into getIp '%s'", tmp);
return true;
}
// key is hash of the hostname
key96_t hostKey96 = hash96 ( hostname , hostnameLen );
// . is it in the /etc/hosts file?
// . BAD: could have a key collision!! TODO: fix..
if ( g_conf.m_useEtcHosts && isInFile ( hostKey96 , ip ) ) return true;
// . try getting from the cache first
// . this returns true if was in the cache and sets *ip to the ip
// . we now cached EDNSTIMEDOUT errors for a day, so *ip can be -1
// . TODO: watchout for key collision
if ( isInCache ( hostKey96 , ip ) ) {
// return 1 to indicate we got it right away in *ip
if ( ! g_conf.m_logDebugDns ) return true;
//char *dd = "distributed";
//if ( this == &g_dnsLocal ) dd = "local";
// debug msg
char ipbuf[16];
log(LOG_DEBUG,"dns: got ip of %s for %s in distributed cache.",
iptoa(*ip,ipbuf),tmp);
return true;
}
// . if this hostname request is already in progress, wait for that
// reply to to come back rather than launching a duplicate request.
// . each bucket in the s_dnstable hashtable is a possible head of a
// linked list of callback/state pairs which are waiting for that
// hostname's ip
// . is the ip for this hostname already being fetched?
// . if so, there will be a callback entry class that should match its
// DnsState::m_callback in there and have a key of key.n0 (see below)
// . TODO: we can have collisions and end up getting back the wrong ip
// how can we fix this? keep a ptr to ds->m_hostname? and if does
// not match then just error out?
int64_t hostKey64 = hostKey96.n0 & 0x7fffffffffffffffLL;
// never let this be zero
if ( hostKey64 == 0 ) {
hostKey64 = 1;
}
// see if we are already looking up this hostname
CallbackEntry *ptr = s_dnstable.getValuePointer ( hostKey64 );
// if he has our key see if his hostname matches ours, it should.
if ( ptr &&
// we do not store hostnameLen in ds, so make sure this is 0
! ptr->m_ds->m_hostname[hostnameLen] &&
(int32_t)strlen(ptr->m_ds->m_hostname) == hostnameLen &&
strncmp ( ptr->m_ds->m_hostname, hostname, hostnameLen ) != 0 ) {
g_errno = EBADENGINEER;
log(LOG_WARN, "dns: Found key collision in wait queue. host %s has "
"same key as %s. key=%" PRIu64".",
ptr->m_ds->m_hostname, tmp, hostKey64);
//g_process.shutdownAbort(true);
// we should just error out if this happens, it is better
// than giving him the wrong ip, he will be retried later
// by the spider.
return true;
}
// regardless, add our "ce" to the table, but assume we are NOT first
// in line for a hostname and use a bogus key. it doesn't matter,
// we just need some memory to store our CallbackEntry class.
static int64_t s_bogus = 0;
// make a CallbackEntry class to add to a slot in the table
CallbackEntry ce;
ce.m_callback = callback;
ce.m_state = state;
ce.m_nextKey = 0LL; // assume we are the first for this hostname
ce.m_ds = NULL;
ce.m_listSize = 0;
ce.m_listId = 0;
// always inc now no matter what now so no danger of re-use
s_bogus++;
// if we are the first guy requesting the ip for this hostname
// then use "hostKey" to get the slot to store "ce",
int64_t finalKey = hostKey64 ;
// otherwise use "s_bogus" as the key. the bogus key is just for
// getting a slot to use to store "ce".
if ( ptr ) {
// let's hash it up for efficiency
finalKey = hash64 ( (char *)&s_bogus,8);
// never let this be 0
if ( finalKey == 0 ) finalKey = 1LL;
// bogus should never equal a key.n0 for any request, otherwise
// that is a collision. to avoid this possibility keep its hi
// bit set, and hi bit clear on the key.n0 key (hostKey). this
// way, a waiting slot can never collide with any other slot.
finalKey |= 0x8000000000000000LL;
}
// BUT if we are looking up a dns server's ip, then NEVER wait in
// line because we could deadlock!
if ( dnsLookup ) {
loop:
finalKey = hash64 ( hostKey64 , s_antiLockCount++ );
// it is not waiting in anyone's line, so turn this bit off
finalKey &= 0x7fffffffffffffffLL;
// ensure not 0
if ( finalKey == 0 ) finalKey = 1;
// assume hostKey is not in the table, even though it
// may be, we cannot wait in line behind it
ptr = NULL;
// ensure no collision, if so, s_antiLockCount will be
// different now so hash again until we do not collide
if ( s_dnstable.getValuePointer ( finalKey ) )
goto loop;
}
// assume we have no parent
int64_t parentKey = 0;
// if parent, set parentKey to "hostKey", the hash of the hostname
if ( ptr ) parentKey = hostKey64;
// make sure we do not have a circular dependency if we are looking
// up the ip of a dns in order to ask him the ip of what we are
// looking up.
// EXAMPLE:
// 1. get ip of xyz.com
// 2. have to ask dns1.xyz.com
// 3. to get his ip we have to ask dns2.xyz.com
// 4. and to get his ip we have to as dns1.xyz.com
// 5. which we'll see that it is already outstanding in the hashtable,
// i.e., it has a parent in there, and it will just wait in line
// never to get out of it, if it were not for the following circular
// dependency check:
// example url: www.hagener-schulen.de
int32_t loopCount = 0;
// loopCount is how many times we've had to ask for the ip of a
// nameserver recursively.
if ( ds ) loopCount = ds->m_loopCount;
// point to the current DnsState
char *parent = (char *)ds;
// the DnsState was built to hold a few DnsStates in it for just
// this purpose, so we can "backup" to our "parents" and make sure
// they did not initiate this linked list. Search for "ds2" below
// to see where we initiate the recursion.
while ( ptr && loopCount-- > 0 ) {
// the recursive "ds"es occupy DnsState::m_buf of their
// containing DnsState. go back one.
parent -= ((char *)ds->m_buf - (char *)ds);
// sanity check
//if ( ((DnsState *)parent)->m_buf != (char *)ds ) {
// g_process.shutdownAbort(true); }
// do we have the circular dependency?
if ( parent == (char *)ptr->m_ds ) {
g_errno = EBADENGINEER;
log(LOG_DEBUG,"dns: Caught circular dependency.");
return true;
}
}
// debug msg
log(LOG_DEBUG,"dns: Adding key %" PRIu64" from table. "
"parentKey=%" PRIu64" callback=%p state=%p.",
finalKey,parentKey,callback,state);
// ensure "bogus" key not already present in table, otherwise,
// addKey will just overwrite the value!!
while ( ptr && s_dnstable.getValuePointer ( finalKey ) ) {
log("dns: Got collision on incremental key.");
finalKey += 1LL;
finalKey |= 0x8000000000000000LL;
}
// we need to be able to add ourselves to the table so our callback
// can get called, otherwise it is pointless. this returns false
// and sets g_errno on error.
int32_t slotNum = -1;
if ( ! s_dnstable.addKey ( finalKey , ce , &slotNum ) ) {
log("dns: Failed to add key to table: %s.",mstrerror(g_errno));
return true;
}
// get the value from the slot so we can insert into linked list.
CallbackEntry *ppp = s_dnstable.getValuePointerFromSlot ( slotNum );
// sanity check
if ( ppp->m_callback != callback || ppp->m_state != state ) {
log("dns: Failed sanity check 3.");
g_process.shutdownAbort(true);
}
// adding a key may have changed the parent ptr... get again just
// in case
if ( ptr ) {
ptr = s_dnstable.getValuePointer ( hostKey64 );
// sanity check - it should still be there for sure
if ( ! ptr ) { g_process.shutdownAbort(true); }
}
// . insert into beginning of the linked list to avoid having to scan
// . "ptr" is a ptr to the parent CallbackEntry, head of linked list
if ( ptr ) {
int64_t oldNext = ptr->m_nextKey;
ptr->m_nextKey = finalKey;
ppp->m_nextKey = oldNext;
// let parent know how big its linked list is
ptr->m_listSize++;
// propagate the list id, it is stored in the parent node
// so put it into us, too
ppp->m_listId = ptr->m_listId;
if ( g_conf.m_logDebugDns )
log(LOG_DEBUG,"dns: Waiting in line for %s. key=%" PRIu64". "
"nextKey=%" PRIu64" listSize=%" PRId32" listId=%" PRId32" "
"numSlots=%" PRId32".",
tmp,finalKey,oldNext,
ptr->m_listSize,ptr->m_listId,
s_dnstable.getNumSlots());
// ok, we block now, waiting for the initial callback
return false;
}
// init our linked list size count
ppp->m_listSize = 1;
// it is the parent, use 0 to indicate none
static int32_t s_listId = 0;
ppp->m_listId = s_listId++;
// . make a DnsState
// . set g_errno and return true on malloc() error
if ( ds )
ds->m_freeit = false;
else {
ds = (DnsState *) mmalloc ( sizeof(DnsState ), "Dns" );
if ( ! ds ) {
log("dns: Failed to allocate mem for ip lookup.");
// debug msg
log(LOG_DEBUG,"dns: Removing2 key %" PRIu64" from table. "
"parentKey=%" PRIu64" callback=%p state=%p.",
hostKey64,parentKey,
callback,state);
s_dnstable.removeKey ( finalKey );
return true;
}
ds->m_freeit = true;
// keep track of how many times we pluck out a DnsState
// from DnsState::m_buf.
ds->m_loopCount = 0;
ds->m_startTime = getTime();
}
// set the ce.m_ds to our dns state so if a key collides later
// we can check DnsState::m_hostname. actually i think this is only
// used for sanity checking now.
ppp->m_ds = ds;
// reset this stuff
ds->m_numDnsIps [0] = 0;
ds->m_numDnsNames[0] = 0;
ds->m_depth = 0;
ds->m_numTried = 0;
ds->m_nameBufPtr = ds->m_nameBuf ;
ds->m_nameBufEnd = ds->m_nameBuf + 512;
ds->m_errno = 0;
ds->m_recursionDesired = true;
// debug msg
//log("dns::getIp: %s (key=%" PRIu64") NOT in cache...",tmp,key.n0);
// reset m_loopCount and startTime if we are just starting
if ( callback != gotIpOfDNSWrapper ) {
ds->m_loopCount = 0;
ds->m_startTime = getTime();
}
// set caller callback info
// hostKey96 and finalKey are basically the same thing for hostnames
// that are NOT waiting in line. but finalKey is the lower 64 bits
// of hostKey96, but finalKey should have its hi bit cleared to
// indicate it is not waiting in line. Also, if looking up the IP
// of a dns, dnsLookup is true, and finalKey is hashed with a
// special count to give a unique hash because we can not have
// dnsLookups waiting in line because of deadlock issues.
// search for "dns A" below to see what i'm talking about.
ds->m_hostnameKey = hostKey96;
ds->m_tableKey = finalKey;
ds->m_this = this;
ds->m_state = state;
ds->m_callback = callback;
gbmemcpy ( ds->m_hostname , hostname , hostnameLen );
ds->m_hostname [ hostnameLen ] = '\0';
// copy the sendBuf cuz we need it in gotIp() to ensure hostnames match
//char *copy = (char *) mdup ( msg , msgSize , "Dns" );
//if ( ! copy ) {
// if ( ds->m_freeit ) mfree (ds,sizeof(DnsState),"Dns");
// return -1;
//}
// hack this for now
//int32_t numDns = 0;
//int32_t dnsIps[MAX_DNSIPS];
// copy the initial nameserver ips into ds->m_dnsIps[0] (depth 0)
if ( g_conf.m_askRootNameservers ) {
// ROOT TLD CACHE ATTEMPT GOES HERE...
// this will fill in depth 1 in the query,
// if we have the nameservers cached...
log(LOG_DEBUG,"dns: hostname %s", ds->m_hostname);
gbmemcpy(ds->m_dnsIps[0],g_conf.m_rnsIps, g_conf.m_numRns * 4);
ds->m_numDnsIps[0] = g_conf.m_numRns;
ds->m_numDnsNames[0] = 0;
ds->m_rootTLD[0] = true;
ds->m_fallbacks[0] = 0;
// if a TLD is cached, copy it to depth 1
const TLDIPEntry* tldip = getTLDIP(ds);
if (tldip) {
gbmemcpy( ds->m_dnsIps[1],
tldip->TLDIP,
tldip->numTLDIPs * sizeof(uint32_t));
ds->m_numDnsIps[1] = tldip->numTLDIPs;
ds->m_numDnsNames[1] = 0;
ds->m_rootTLD[1] = true;
ds->m_fallbacks[1] = 0;
ds->m_depth = 1;
}
}
// otherwise, use the local bind9 servers
else {
//gbmemcpy(ds->m_dnsIps[0],g_conf.m_dnsIps,g_conf.m_numDns * 4);
int32_t numDns = 0;
for ( int32_t i = 0; i < MAX_DNSIPS; i++ ) {
if ( g_conf.m_dnsIps[i] == 0 ) continue;
ds->m_dnsIps[0][numDns] = g_conf.m_dnsIps[i];
numDns++;
}
ds->m_numDnsIps[0] = numDns;
ds->m_numDnsNames[0] = 0;
ds->m_rootTLD[0] = false;
ds->m_fallbacks[0] = 0;
}
// return 0 if we block on the reply
//if ( ! sendToNextDNS ( ds , timeout ) ) return false;
// timeout from an individual dns is 20 seconds
if ( ! sendToNextDNS ( ds ) )
return false;
// debug msg
log(LOG_DEBUG,"dns: Removing3 key %" PRIu64" from table. "
"parentKey=%" PRIu64" callback=%p state=%p.",
hostKey64,parentKey,
callback,state);
// if we made it here, remove from table
s_dnstable.removeKey ( finalKey ) ;
// should we free it
if ( ds->m_freeit ) mfree ( ds , sizeof(DnsState) ,"Dns" );
// ok, g_errno should still be set, return true as specified
return true;
}
// . returns false if blocked, sets g_errno and returns true otherwise
// . this is called by sendToNextDNS() when it has to get the ip of the DNS to
// send the request to.
bool Dns::getIpOfDNS ( DnsState *ds ) {
// bail if none
if ( ds->m_numDnsNames[ds->m_depth] <= 0 ) {
log(LOG_DEBUG, "dns: no dnsnames for '%s'",
ds->m_hostname);
return true;
}
// use the secondary ds for doing this
DnsState *ds2 = (DnsState *)ds->m_buf;
// do not keep getting the ip of the ns which may require us to get
// the ips of its ns, etc...
if ( ds->m_loopCount >= 3 ) {
addToCache ( ds->m_hostnameKey , -1 );
g_errno = EBADENGINEER;
log(LOG_INFO,"dns: Hit too many authority redirects for %s.",
ds->m_hostname);
return true;
}
// sanity check
if ( LOOP_BUF_SIZE / (sizeof(DnsState) - LOOP_BUF_SIZE) < 3 ) {
log("dns: Increase LOOP_BUF_SIZE, %" PRId32", in Dns.h.",
(int32_t)LOOP_BUF_SIZE);
g_process.shutdownAbort(true);
}
// increment the loop count, we can only use m_buf so many times
// before running out of room.
ds2->m_loopCount = ds->m_loopCount + 1;
// set start time for timing out
ds2->m_startTime = ds->m_startTime;
// or if we have too many ips already, do not bother adding more
if (ds->m_numDnsIps[ds->m_depth]>=MAX_DNS_IPS){
log(LOG_WARN, "dns: Already have %" PRId32" ips at depth %" PRId32".",
(int32_t)MAX_DNS_IPS,(int32_t)ds->m_depth);
g_errno=EBUFTOOSMALL;
return true;
}
// do not do this! this will break!
// int32_t n = ds->m_hostnameKey.n0 % numNames;
int32_t n = 0; // first is usually ns1, usually better
loop:
// get the name to get the ip for
int32_t depth = ds->m_depth;
int32_t numNames = ds->m_numDnsNames[depth];
char *hostname = ds->m_dnsNames[depth][n];
int32_t hostnameLen = strlen(hostname);
int32_t ip = 0;
// loop over all dnsnames in case one causes a circular dependency
// . remove him from the array so we do not do him again
// . actually, this is not a guarantee, so we put a circular
// dependency check in getIP() above
ds->m_dnsNames [depth][n] = ds->m_dnsNames[depth][numNames-1];
ds->m_numDnsNames[depth]--;
//ds->m_numTried++;
//if (ds->m_numTried > .....
// debug note
log(LOG_DEBUG,"dns: Getting ip address of dns, %s for %s.",
hostname,ds->m_hostname);
// . returns -1 and sets g_errno on error
// . returns 0 if transaction blocked, 1 if completed
// . returns 0 if you must wait
// . calls gotIp with ip when it gets it or timesOut or errors out
// . set *ip to 0 if none (does not exist)
// . keep the timeout down to only 5 secs
// . do not set a mutual exclusion lock on ip lookups of dns servers
// in order to avoid having to lookups locking each other up. like
// . 1. we are getting the ip of dns A for resolve of hostname #1
// . 2. we are getting the ip of dns B for resolve of hostname #2
// . 3. dns A says to ask B and B says to ask A, we end up in
// a deadlock
if ( !g_dns.getIp ( hostname ,
hostnameLen ,
&ip ,
ds ,//state
gotIpOfDNSWrapper , //state,ip
ds2 ,
5 , // timeout
true )) { // dns lookup?
log(LOG_DEBUG, "dns: no block for getIp for '%s'", hostname);
return false;
}
// if that would cause a circulare dependency, try the next one
if ( g_errno == EBADENGINEER ) {
if ( ds->m_numDnsNames[depth] ) {
log("dns: looping in getIpOfDns for '%s'",
hostname);
goto loop;
}
else
log("dns: No names left to try after %s",hostname);
}
// did it have an error? g_errno will be set
// . if ip is 0 it was a does not exist
// . add it to the array of ips
if ( ip != 0 && ds->m_numDnsIps[depth] + 1 < MAX_DNS_IPS) {
if (isTimedOut(ip)) {
char ipbuf[16];
log(LOG_DEBUG, "dns: Not adding [1] ip %s - timed out",
iptoa(ip,ipbuf));
}
else {
int32_t depth = ds->m_depth;
char ipbuf[16];
log(LOG_DEBUG,
"dns: Added ip [1-%" PRId32"] %s to depth %" PRId32" for %s.",
ds->m_numDnsIps[depth],
iptoa(ip,ipbuf),(int32_t)depth,ds->m_hostname);
ds->m_dnsIps[depth][ds->m_numDnsIps[depth]++] = ip ;
}
}
// we did not block
return true;
}
void Dns::gotIpOfDNSWrapper(void *state, int32_t ip) {
DnsState *ds = (DnsState *)state;
// log debug msg
//DnsState *ds2 = (DnsState *)ds->m_buf;
char ipbuf[16];
log(LOG_DEBUG,"dns: Got ip of dns %s for %s.",
iptoa(ip,ipbuf),ds->m_hostname);
// sanity check
if ( ds->m_numDnsIps[ds->m_depth] + 1 >= MAX_DNS_IPS ) {
log("dns: Wierd. Not enough buffer.");
g_process.shutdownAbort(true);
}
// . if ip is 0 it was a does not exist
// . add it to the array of ips
if ( ! g_errno && ip != 0 &&
ds->m_numDnsIps[ds->m_depth] + 1 < MAX_DNS_IPS) {
if (isTimedOut(ip)) {
char ipbuf[16];
log(LOG_DEBUG, "dns: Not adding [2] ip %s - timed out",
iptoa(ip,ipbuf));
}
else {
int32_t depth = ds->m_depth;
ds->m_dnsIps[depth][ds->m_numDnsIps[depth]++] = ip ;
char ipbuf[16];
log(LOG_DEBUG,
"dns: Added ip [2-%" PRId32"] %s to depth %" PRId32" for %s.",
ds->m_numDnsIps[depth],
iptoa(ip,ipbuf),(int32_t)depth,ds->m_hostname);
}
}
// disregard any g_errnos cuz we will try again
g_errno = 0;
// just return if this blocks
if ( ! g_dns.sendToNextDNS ( ds ) ) {
log(LOG_DEBUG, "dns: sendToNextDns blocking for '%s'",
ds->m_hostname);
return ;
}
// if that does not block, then we are done... we got the final ip
// or g_errno is set. so call the callbacks.
log(LOG_DEBUG, "dns: getIpOfDNSWrapper calling returnIp for '%s'",
ds->m_hostname);
returnIp ( ds , ip );
// . otherwise, we must call the callback
// . call the callback w/ state and ip if there is one
// . g_errno may be set
//if ( ds->m_callback ) ds->m_callback ( ds->m_state , ip );
// free our state holding structure
//if ( ds->m_freeit ) mfree ( ds , sizeof(DnsState) ,"Dns" );
}
// returns false if blocked, sets g_errno and returns true otherwise
bool Dns::sendToNextDNS ( DnsState *ds ) {
//log(LOG_DEBUG, "dns: sendToNextDNS depth %d", ds->m_depth);
// let's clear g_errno since caller may have set it in gotIp()
g_errno = 0;
// if we have been at this too long, give up
int32_t now = getTime();
int32_t delta = now - ds->m_startTime;
// quick fix if the system clock was changed on us
if ( delta < 0 ) ds->m_startTime = now;
//if ( delta > 100 ) ds->m_startTime = now;
if ( delta > TIMEOUT_TOTAL ) {
log(LOG_DEBUG,"dns: Timing out the request for %s. Took over "
"%" PRId32" seconds. delta=%" PRId32". now=%" PRId32".",
ds->m_hostname,(int32_t)TIMEOUT_TOTAL,delta,now);
if ( ds->m_errno ) g_errno = ds->m_errno;
else g_errno = EDNSTIMEDOUT;
return true;
}
// if we have no more room to add to tried array, we're done,
// we've tried to ask too many nameservers already
if ( ds->m_numTried >= MAX_TRIED_IPS ) {
log(LOG_INFO,"dns: Asked maximum number of name servers, "
"%" PRId32", for %s. Timing out.",(int32_t)MAX_TRIED_IPS,
ds->m_hostname);
if ( ds->m_errno ) g_errno = ds->m_errno;
else g_errno = EDNSTIMEDOUT;
return true;
}
// get the current depth. if we exhaust all nameserver ips at this
// depth we may have to decrease it until we find some nameservers
// we haven't yet asked.
int32_t depth = ds->m_depth;
top:
log(LOG_DEBUG, "dns: at 'top' for '%s'", ds->m_hostname);
int32_t n = -1;
// how many ip do we have at this depth level? save this for
// comparing down below.
int32_t numDnsIps = ds->m_numDnsIps[depth];
// each DnsState has a list of ips of the nameservers to ask
// but which one we ask first depends on this hash
if ( ds->m_numDnsIps[depth] > 0 ) {
// easy var
int32_t num = ds->m_numDnsIps[depth];
// . pick the first candidate to send to
// . this should not always be zero because picking the groupId
// and hostId to send to is now in Dns::getResponsibleHost()
// and uses key.n1 exclusively
n = ds->m_hostnameKey.n0 % num;
// conenvience ptr
int32_t *ips = ds->m_dnsIps[depth];
// save
int32_t orign = n;
do {
if (!isTimedOut(ips[n]))
break;
// note it
char ipbuf[16];
log(LOG_DEBUG, "dns: skipping ip %s - timed out",
iptoa(ips[n],ipbuf));
// advance
if ( ++n >= num )
n = 0;
} while (n != orign);
}
// . save n for wrap check below, to make sure we do not re-ask him
// . this may be -1 if we did not have any dns ips to pick from
int32_t startn = n;
// is the nth ip the next best candidate to send the request to?
checkip:
// get the nth ip
int32_t ip = 0;
if ( n >= 0 ) ip = ds->m_dnsIps[depth][n];
// loop over all the ips of nameservers we've already tried and make
// sure this one is not one of them. only check if n is valid (>=0)
bool tried = false;
for ( int32_t i = 0 ; n >=0 && i < ds->m_numTried ; i++ ) {
// check next tried ip if this one does not match.
if ( ip != ds->m_triedIps[i] ) continue;
// we've already tried this ip, do not send to it again
tried = true;
break;
}
// advance n if we already tried its ip, or if its ip is bogus.
// only do this if n >= 0 already, though. if it is -1 that means we
// have no candidates at this depth level
if ( n >= 0 && (tried || ip == 0 || ip == -1) ) {
// advance n
if ( ++n == ds->m_numDnsIps[depth] ) n = 0;
// if still a bogus ip, keep advancing
// but if we wrap, there are no valid n's
if ( n != startn ) goto checkip;
// set n to -1 to indicate no new and good ip available at
// this depth level
n = -1;
}
// if no ips to try, try the canonical names if there are some
if ( n == -1 && ds->m_numDnsNames[depth] > 0 ) {
// . return false if this blocked
// . this will remove the names from m_numDnsNames and
// put them in as ips into m_dnsIps.
// . it will decrease m_numDnsNames
// . this returns true if it does not block
if ( ! getIpOfDNS ( ds ) ) {
log(LOG_DEBUG, "dns: SendToNextDns blocked on "
"getIpOfDNS for '%s'", ds->m_hostname);
return false;
}
// getIpOfDNS may have set g_errno. it, as of this writing,
// only does that if we have too many ips already,
// MAX_DNS_IPS, so in that case, just ignore it and go down a
// level, it will not have added a new ip.
// NOTE: it will also set to EBADENGINEER if it caught
// a circular dependency. (see above)
g_errno = 0;
// ok, did we gain a new ip to try? if so, try run it through
// from the top.
if ( ds->m_numDnsIps[depth] > numDnsIps ) {
log(LOG_DEBUG, "dns: "
"SendToNextDNS going back to top 0 "
"for '%s'", ds->m_hostname);
goto top;
}
// if you made it here, we did not gain a new ip, probably
// because of error. n is still -1 to indicate no candidate
// at this depth level.
}
// ok, if we have no dns ips or hostnames to try at this point on
// this depth level then go back up to the previous level and see if
// those nameservers can recommend another set of nameservers to try.
// but this will be -1 if there are no more left to ask, and we will
// send back a EDNSTIMEDOUT error above.
if ( n == -1 ) {
depth--;
// decrease the depth, this may be the end of the chain... and
// we do not want to chain through all the root servers at depth 0
// because they are almost never wrong i guess.
if ( depth <= 0 ) {
log(LOG_DEBUG,"dns: Exhausted all chains except "
"root. Giving up for %s",ds->m_hostname);
if ( ds->m_errno ) g_errno = ds->m_errno;
else g_errno = EDNSTIMEDOUT;
return true;
}
// log this...
ds->m_fallbacks[depth]++;
log(LOG_DEBUG,
"dns: depth %" PRId32"/%" PRId32" rootTLD %d #fb %" PRId32" #ip %" PRId32" #name %" PRId32,
depth, ds->m_depth,
ds->m_rootTLD[depth],
ds->m_fallbacks[depth],
ds->m_numDnsIps[depth],
ds->m_numDnsNames[depth]);
// too many fallbacks on root or TLD nameservers?
if ( ds->m_rootTLD[depth] &&
ds->m_fallbacks[depth] > 2) {
log(LOG_DEBUG,
"dns: too many fallbacks on rootTLD. "
"Giving up for %s.",
ds->m_hostname);
if ( ds->m_errno ) g_errno = ds->m_errno;
else g_errno = EDNSTIMEDOUT;
return true;
}
// ok, we have more chains to explore starting at this decreased depth
// so take it from the top, "depth" as been decreased.
log(LOG_DEBUG, "dns: "
"SendToNextDNS going back to top 1 for '%s'",
ds->m_hostname);
goto top;
}
// sanity check
if ( ip != ds->m_dnsIps[depth][n] ) { g_process.shutdownAbort(true); }
// alright, we got a valid ip to send the request to.
// mark this ip we are about to ask as tried.
ds->m_triedIps[ds->m_numTried++] = ip; //ds->m_dnsIps[depth][n];
// record our current depth in case it changed
ds->m_depth = depth;
// this 512 byte buffer is now part of the DnsState
char *msg = ds->m_request;
// . the dns header has this format:
// . u_int16_t dns_id; /* client query ID number */
// . u_int16_t dns_flags; /* qualify contents <see below> */
// . u_int16_t dns_q_count; /* number of questions */
// . u_int16_t dns_rr_count; /* number of answer RRs */
// . u_int16_t dns_auth_count; /* number of authority RRs */
// . u_int16_t dns_add_count; /* number of additional RRs */
/// @todo ALC something fishy is going on here. investigate!
// HACK: if udpserver's transId is too big for us, reset it
if(m_udpServer.m_nextTransId > 65535 )
m_udpServer.m_nextTransId =0;
// . first word is id (not really that releveant since queried domain
// should also be in response)
// . steal the transId from our g_udpServer
uint16_t transId = m_udpServer.m_nextTransId;
// . *(uint16_t *) msg = htons ( m_dnsTransId++ );
// . UdpServer will inc it's m_transId in UdpServer::sendRequest()
// when it calls getTransId()
*(uint16_t *) msg = htons ( transId );
// . some fancy foot work (big endian here) this byte is msg[2]
// . qr: 1; /* response flag (high bit)*/
// . opcode: 4; /* purpose of message */ 0 = query, 1=invQuery, ...
// . aa: 1; /* authoritative answer */
// . tc: 1; /* truncated message */
// . rd: 1; /* recursion desired (low bit)*/
// ---------------------------------------- this byte is msg[3]
// . ra: 1; /* recursion available (high bit)*/
// . unused: 1; /* unused bits (MBZ as of 4.9.3a3) */
// . ad: 1; /* authentic data from named */
// . cd: 1; /* checking disabled by resolver */
// . rcode: 4; /* response code (low bits)*/
// ----------------------------------------
// . values of rcode:
// . "No Error", /* 0: ok */
// . "Format Error", /* 1: bad query */
// . "Server Failure", /* 2: server is hosed */
// . "Name Error", /* 3: name doesn't exist (authoritative) */
// . "Not Implemented", /* 4: server doesn't support query */
// . "Refused" /* 5: server refused request */
// some dns'es REFUSE our request if this is set... so keep it 0
// like www.fsis.usda.gov, however www.altx.com needs it set!
if ( ds->m_recursionDesired ) msg[2] = 0x01 ;
else msg[2] = 0x00 ;
// if asking bind9 servers, always set it on
if ( ! g_conf.m_askRootNameservers ) msg[2] = 0x01;
// try this fix
//msg[2] = 0x04;
msg[3] = 0;
// rr means resource record
*(int16_t *)(msg + 4 ) = htons ( 1 ); // we have 1 question
*(int16_t *)(msg + 6 ) = 0 ; // we have 0 answer rr's
*(int16_t *)(msg + 8 ) = 0 ; // we have 0 authority rr's
*(int16_t *)(msg + 10 ) = 0 ; // we have 0 addition rr's
// ask for MX record? used by Emailer in Facebook.cpp.
char *hostname = ds->m_hostname;
bool getmx = false;
if ( strncmp(ds->m_hostname,"gbmxrec-",8) == 0 ) {
hostname += 8;
getmx = true;
}
// . we're done populating the dns request HEADER
// . now make the QNAME entry (the hostname to get ip for)
// . break up the hostname by the dots
// . make "msg" point passed header
char *start = hostname;
char *end = hostname;
// . point to where to store the length of each name and name itself
// . a name is a component in the hostname, like the "com" in "x.com"
u_char *len = (u_char *)msg+12;
char *dest = msg+13;
// . now make the query entry
// . break the hostname into labels and store in dns record style.
// . basically store length/label pairs
char *hostEnd = hostname + strlen(hostname);
while ( start < hostEnd ) {
while ( *start != '.' && *start && start < hostEnd ) {
//log(LOG_DEBUG,"dns: name: %c", *start);
*dest++ = *start++;
}
//log(LOG_DEBUG,"dns: name delimit");
// . each "name" in the hostname must be less than 64 bytes
// . the 2 high bits are set for compression
int32_t nlen = start - end;
if ( nlen >= 64 ) {
g_errno = EHOSTNAMETOOBIG;
log(LOG_INFO,"dns: Request's host component is %" PRId32" bytes. "
"Must be under 64.",nlen);
return true;
}
// store the length as a byte
*len = (u_char)nlen ;
// advance the length
len = (u_char *)dest;
// advance the dest over the length
dest++;
// skip start over the . or \0
start++;
// set the end to to the beginning of the next name
end = start;
}
// store a 0 at the end
len[0] = 0;
// . now store queryType (2 bytes) and queryClass (2 bytes)
// . a query type of 0 is "host address" query (see nameser.h)
// . a queryClass of 1 means the arpa internet (see nameser.h)
// . watch out for alignment
len[1] = 0; // query type (network order)
if ( getmx ) len[2] = 15; // query type (network order)
else len[2] = 1;
len[3] = 0; // query class (network order)
len[4] = 1; // query class (network order)
// compute the msgSize
//int32_t msgSize = (char *)len - msg + 5 ;
ds->m_requestSize = (char *)len - msg + 5 ;
// copy the msg into an alloc'd buffer
// char *copy = mdup ( msg , msgSize );
// if ( ! copy ) return true;
// debug msg
char ipbuf[16];
log(LOG_DEBUG,
"dns: Asking %s (depth=%" PRId32",%" PRId32") att %" PRId32" "
"for ip of %s (tid=%" PRId32")",
iptoa(ds->m_dnsIps[depth][n],ipbuf), (int32_t)depth,(int32_t)n,
(int32_t) ds->m_numTried, ds->m_hostname , (int32_t)transId);
// . queue a send
// . this returns false and sets g_errno on error
// . calls callback when reply is received
// . resend timeout for non-ack protocols (dns) is 10 secs in UdpSlot
// . seems like 60 second timeout is about right,
// . seems like dns really slows down when we're looking up failed ips
// while doing url retries
// . well, i went back to 30 seconds after i fixed the transId overflow
// bug
// . resend time is set to 20 seconds in UdpSlot::setResendTime()
// use niceness 0 now so if the msgC slot gets converted from 1 to 0 this will not hold it up!
if (!m_udpServer.sendRequest(ds->m_request, ds->m_requestSize, msg_type_dns, ip, 53, -1, NULL, ds, gotIpWrapper, TIMEOUT_SINGLE_HOST_MS, 0, ds->m_hostname)) {
// g_errno should be set at this point and we will not try
// any more nameservers because the error seemed too bad.
log(LOG_DEBUG, "dns: errors seemed too bad for '%s'...", ds->m_hostname);
return true;
}
// return 0 cuz we're blocking on the reply
log(LOG_DEBUG, "dns: SendToNextDNS blocking on reply for '%s'", ds->m_hostname);
return false;
}
void Dns::gotIpWrapper(void *state, UdpSlot *slot) {
DnsState *ds = (DnsState *) state;
log(LOG_DEBUG, "dns: gotIpWrapper for '%s'", ds->m_hostname);
//log(LOG_DEBUG, "dns: gotIpWrapper depth %d", ds->m_depth);
// never let udpserver free the send buffer, we own that, it is
// ds->m_request
slot->m_sendBufAlloc = NULL;
// get our Dns server
// Dns *THIS = ds->m_this;
// set ip to -1 to indicate dns transaction error
int32_t ip = -1;
// THIS is obsolete because we got s_dnstable now.
// sometimes a parallel request might have thrown it in the cache
// and we timeout because the dns is pissed at us hitting it with
// requests all the time.
//if (g_errno&& g_dnsDistributed.isInCache ( ds->m_hostnameKey, &ip ) )
// g_errno = 0;
// might as well check local, too
//if ( g_errno && g_dnsLocal.isInCache ( ds->m_hostnameKey, &ip ) )
// g_errno = 0;
// get the ip from slot if no error
if ( ! g_errno ) ip = g_dns.gotIp ( slot , ds );
// . if we timed out change it to a more specific thing
// . an ip of -1 (255.255.255.255) means it timed out i guess
// . an ip of -1 could also be a SERVFAIL reply too, in which case
// g_errno will not be set...
if ( g_errno == EUDPTIMEDOUT || ip == -1 ) {
// log it so we know which dns server had the problem
if ( g_errno ) {
char ipbuf[16];
log(LOG_DEBUG,"dns: dns server at %s timed out.",
iptoa(slot->getIp(),ipbuf));
g_errno = EDNSTIMEDOUT;
}
else {
char ipbuf[16];
log(LOG_DEBUG,"dns: dns server at %s failed.",
iptoa(slot->getIp(),ipbuf));
}
// try again? yes, if we timed out on router1's bind9
if ( ds->m_dnsIps[0][0] != atoip(PUBLICLY_AVAILABLE_DNS1) ) {
g_errno = ETRYAGAIN;
//ds->m_depth++;
// note it
char ipbuf[16];
log(LOG_DEBUG, "dns: trying backup-dns %s (old=%s)",
PUBLICLY_AVAILABLE_DNS1,iptoa(ds->m_dnsIps[0][0],ipbuf));
// try google's public dns
ds->m_dnsIps[0][0] = atoip(PUBLICLY_AVAILABLE_DNS1);
}
}
// debug msg
//log("got %s [%" PRIu32"] for %s",iptoa(ip),ip,ds->m_hostname );
// bitch on error
if ( g_errno ) {
//int32_t type = LOG_WARN;
// . these can be those SERVFAIL messages we get from nslookup
// usually when a domain name has disappeared from radar
// . might also happen if recursive-clients is set too low
// on a dns server
//if ( g_errno == EDNSDEAD ) type = LOG_INFO;
// this happens so much... not necessarily an error, but it
// certainly can be if our dns is dead
//if ( g_errno == EDNSTIMEDOUT ) type = LOG_INFO;
if ( g_errno != ETRYAGAIN )
log(LOG_DEBUG,"dns: %s: %s.",
ds->m_hostname,mstrerror(g_errno));
// save this for returning later
if (g_errno != ETRYAGAIN &&
g_errno != EDNSTIMEDOUT &&
g_errno != EUDPTIMEDOUT )
//g_errno != ETIMEDOUT ) // == DNSDEAD
ds->m_errno = g_errno;
// mdw
if ( (g_errno == EUDPTIMEDOUT || g_errno == EDNSTIMEDOUT) &&
// do not do this if we our hitting our local bind9
// server. this was adding public DNS server
// was then logging "skipping ip x.x.x.x - timed out"
// and we were missing out!
g_conf.m_askRootNameservers ) {
int32_t timestamp = getTime();
key96_t k;
k.n0 = 0LL;
k.n1 = slot->getIp();
static const char s_data[] = "1111";
char ipbuf[16];
log(LOG_DEBUG, "dns: adding ip %s to timedout cache: %s",
iptoa(slot->getIp(),ipbuf),mstrerror(g_errno));
RdbCacheLock rcl(g_timedoutCache);
g_timedoutCache.addRecord((collnum_t)0,
k , // key
s_data , // value
4 , // value size
timestamp );
}
// . send to the next nameserver in line
// . this returns false if blocks, returns true and sets
// g_errno otherwise. does recursion.
// . do not send to next guy if we got EDNSDEAD because that
// means the hostname does not exist
// . ok, i've seen too many false positives with EDNSDEAD,
// so let's try again on that too!
// . set timeout to 10 seconds
if ( //g_errno != EDNSDEAD &&
! g_dns.sendToNextDNS(ds) ) {
log(LOG_DEBUG, "dns: gotIpWrapper blocking on "
"SendToNextDNS for '%s'", ds->m_hostname);
return;
}
}
// . otherwise, we must call the callback
// . call the callback w/ state and ip if there is one
// . g_errno may be set
// . if we are just getting the ip of a dns server in the chain, then
// call the callback now, it is not the *final* callback.
//if ( ds->m_callback == gotIpOfDNSWrapper ) {
// ds->m_callback ( ds->m_state , ip );
// return;
//}
log(LOG_DEBUG, "dns: gotIpWrapper calling returnIp for '%s'",
ds->m_hostname);
// call the callbacks
returnIp ( ds , ip );
}
// caller should set g_errno because we call the callbacks here
void Dns::returnIp(DnsState *ds, int32_t ip) {
// ok, we got the final answer at this point
// debug msg
const char *pre = "";
if ( ip == 0 ) pre = " [NXDOMAIN]";
if ( ip == -1 ) pre = " [DNSTIMEDOUT|SERVFAIL]";
if ( g_conf.m_logDebugDns ) {
char ipbuf1[16];
iptoa(ds->m_dnsIps[0][0],ipbuf1);
char ipbuf2[16];
log(LOG_DEBUG,"dns: Got FINAL ANSWER of %s for %s %s(%s) "
"from dns %s.",
iptoa(ip,ipbuf2),ds->m_hostname,pre,mstrerror(g_errno),ipbuf1);
}
// not thread safe
//if ( g_threads.amThread() ) { g_process.shutdownAbort(true); }
// save g_errno
int32_t err = g_errno;
// if we timed out, cache this so future lookups are fast
// but only give it a ttl of about a day so we can retry a day later
// with a fresh lookup. and add a -1 to the cache instead of a zero
// so we know it is a cached error and not a cached EDNSDEAD, which
// means the hostname does not exist for sure. (that is not an error
// really) Well only cache for 10 minutes, we might really need it.
// if the internet goes down briefly then we end up pretty useless
// for 10 minutes then! and the spider recs stream on by. so let's
// comment this out. actually, let's keep it here for 15 seconds
// so when adding links if they are all from a non-responsive domain
// like www.castleburyinn.com then we don't wait for a 30 second
// timeout 100 times in a row.
bool cache = false;
// no longer cache these! i think the spider should evenly sample
// every other IP address before returning to the timed out IP address...
// ideally. plus i added the <company>'s public dns x.x.x.x as a secondary
// dns ip to fallback to in the case of timeouts i guess... so make
// sure that's what it does.
// CRAP, we lookup the dns entry of all the outlinks, so we need this,
// so leave it there...
if ( g_errno == EDNSTIMEDOUT || g_errno==EUDPTIMEDOUT ) cache = true;
// and NEVER cache a timeout on a root server or root TLD server
if ( g_conf.m_askRootNameservers && ds->m_rootTLD[ds->m_depth] )
cache = false;
// cache for 6 hrs, these things slow us down
if ( cache ) g_dns.addToCache ( ds->m_hostnameKey, -1, 3600*6 );
// . otherwise, we must call the callback
// . call the callback w/ state and ip if there is one
// . g_errno may be set
// . no, it will be called below from the hash table
//if ( ds->m_callback ) ds->m_callback ( ds->m_state , ip );
// . if the ip request is in progress, wait for it to come back
// . each bucket in the callback entry hashtable is a linked list of
// callback/state pairs (CallbackEntries) waiting for that ip.
//int64_t key = ds->m_hostnameKey.n0 & 0x7fffffffffffffffLL;
int64_t key = ds->m_tableKey & 0x7fffffffffffffffLL;
// was it a dns lookup?
bool dnsLookup = false;
// these will be the same if not. i created ds->m_tableKey
// just so we could make a special hash for such lookups so
// they would not be waiting in lines and deadlock.
if ( key != (int64_t)(ds->m_hostnameKey.n0 & 0x7fffffffffffffffLL) )
dnsLookup = true;
// look up the entry from the table
CallbackEntry *ce = s_dnstable.getValuePointer ( key );
// sanity check
if ( ! ce ||
ce->m_callback != ds->m_callback ||
ce->m_state != ds->m_state ||
ce->m_ds != ds ) {
log("dns: Failed sanity check 1.");
g_process.shutdownAbort(true);
}
// calling the callback for "ds" could free it, so record this now
bool freeit = ds->m_freeit;
// and record the hostname here, too, it should be NULL terminated
char tmp[2048];
if ( g_conf.m_logDebugDns )
strncpy ( tmp , ds->m_hostname , 2040 );
// save parent for debugging purposes
int64_t parentKey = key;
// how many in the list?
int32_t listSize = ce->m_listSize;
// only the nodes in this list have this id
int32_t listId = ce->m_listId;
// go through each node in the linked list
int32_t count = 0;
while ( ce ) {
// restore g_errno
g_errno = err;
// get the next one to call in the linked list
int64_t nextKey = ce->m_nextKey;
// debug msg
log(LOG_DEBUG,"dns: Removing key %" PRIu64" from table. "
"parentKey=%" PRIu64" nextKey=%" PRIu64" callback=%p "
"state=%p.",
key,
parentKey,
nextKey,
ce->m_callback,
ce->m_state);
// get stuff
void (* callback ) ( void *state , int32_t ip );
callback = ce->m_callback;
void *state = ce->m_state;
// remove from hash table, so if this was the parent node
// of the linked list, and "callback" calls getIp() again
// for this hostname, it will be added to a SEPARATE linked
// list and not mess us up. otherwise, it may get inserted
// and NEVER get called.
s_dnstable.removeKey ( key );
// debug msg
log(LOG_DEBUG,"dns: table now has %" PRId32" used slots.",
(int32_t)s_dnstable.getNumUsedSlots());
// then call the callback
// CAREFUL: calling this callback can alter the hash
// table (if it is a dns wrapper callback) and move what
// "ce" points to! SO, get nextKey before calling this...
//if ( ce->m_callback ) ce->m_callback ( ce->m_state , ip );
if ( callback ) callback ( state , ip );
// count for debug purposes
count++;
// now that we secured "nextKey" we can delete this guy
//s_dnstable.removeKey ( key );
// if nextKey is 0 that is the end of the list. waiting
// buckets (not firstInLine, that is) always use the "s_bogus"
// key which starts 1 and is incremented from there
if ( nextKey == 0LL ) break;
// nobody is allowed to wait in line of a dns lookup
if ( dnsLookup ) {
log("dns: Failed sanity check 8.");
g_process.shutdownAbort(true);
}
// another sanity check
if ( ! (nextKey & 0x8000000000000000LL) ) {
log("dns: Failed sanity check 7.");
g_process.shutdownAbort(true);
}
// get next guy's value
ce = s_dnstable.getValuePointer ( nextKey );
// he better be there, otherwise, core
if ( ! ce ) {
log("dns: Failed sanity check 5.");
g_process.shutdownAbort(true);
}
// update key
key = nextKey;
}
// debug msg
if ( g_conf.m_logDebugDns )
log(LOG_DEBUG,"dns: Called %" PRId32" callbacks for %s.",
count,tmp);
// free our state holding structure
if ( freeit ) mfree ( ds , sizeof(DnsState) ,"Dns" );
// debug check
if ( count == listSize ) return;
log("dns: Only called %" PRId32" callbacks out of %" PRId32". Critical "
"error. Scanning table for missing callback.",count,listSize);
loop:
// scan for the missing guys
for ( int32_t i = 0 ; i < s_dnstable.getNumSlots() ; i++ ) {
// get its key
key = s_dnstable.getKey(i);
// skip if empty. a key of 0LL signifies an empty bucket/slot.
if ( key == 0LL ) continue;
// get it
CallbackEntry *e = s_dnstable.getValuePointerFromSlot(i);
// skip if no match
if ( e->m_listId != listId ) continue;
// call its callback
ce->m_callback ( ce->m_state , ip );
// note it
log("dns: Calling callback for slot #%" PRId32".",i);
// remove it. restart from top in case table shrank
s_dnstable.removeKey ( key );
goto loop;
}
}
// return NULL and set g_errno on error
static const char *getRRName ( const char *rr, const char *dgram, const char *end ) {
static char s_buf[1024];
char *bufEnd = s_buf + 1024;
char *dst = s_buf;
size_t dgram_size = (size_t)(end-dgram);
// store into our buffer
const char *p = rr;
while ( *p && p < end ) {
// is compression bit set?
while ( (((uint8_t)(*p)) & 0xc0) == 0xc0 ) {
// we are jumping within the dgram
uint16_t offset = ntohs(*(int16_t*)p)&0x3fff;
if(offset>=dgram_size)
break;
p = dgram + offset;
// bust out of while loop if we should
if ( !*p || p>=end )
break;
}
// watch out for corruption
if ( dst + *p + 1 > bufEnd ) {
g_errno = EBADREPLY;
return NULL;
}
// watch out for corruption
if ( *p < 0 ) {
g_errno = EBADREPLY;
return NULL;
}
// copy the hostname
memcpy ( dst, p+1, *p );
dst += *p;
*dst++ = '.';
p += ((u_char)*p) + 1;
}
if ( dst > s_buf && dst[-1] == '.' ) dst--;
*dst = '\0';
return s_buf;
}
// . function to parse out the smallest ip from DNS reply
// . returns -1 on error and sets g_errno (could be ETIMEOUT)
// . returns 0 if hostname does not have an ip (non-existant)
// . otherwise, returns the ip
// . NOTE: we also call HostMap::stampHost() here rather than override readPoll
// . TODO: update timestamp for this dns server
int32_t Dns::gotIp ( UdpSlot *slot , DnsState *ds ) {
log(LOG_DEBUG, "dns: gotIp called for '%s'", ds->m_hostname);
char ipbuf[16];
//log(LOG_DEBUG, "dns: gotIp depth %d", ds->m_depth);
// get the hostname from the request's query record
char *dgram = (char *) slot->m_sendBuf;
if ( ! dgram ) {
log(LOG_DEBUG, "dns: dgram is NULL for '%s'", ds->m_hostname);
return 0;
}
// how many query records in the record heap?
int16_t qdcount = ntohs (*(int16_t *)(dgram + 4 )) ;
// we better have 1 and only 1 query record
if ( qdcount != 1 ) {
log(LOG_DEBUG, "dns: more than one query record for '%s'",
ds->m_hostname);
return 0;
}
// now get the hostname from reply's query record to see if the same
dgram = slot->m_readBuf;
// return -1 and set g_errno if no read buf
if ( ! dgram ) {
g_errno = EBADREPLY;
log("dns: Nameserver (%s) returned empty reply", iptoa(slot->getIp(),ipbuf));
return -1;
}
// get the size of the read buf
int32_t dgramSize = slot->m_readBufSize;
// return -1 if too small
if ( dgramSize < 12 ) {
log(LOG_INFO,"dns: Nameserver (%s) returned bad "
"reply size of %" PRId32" bytes which is less than 12 bytes.",
iptoa(slot->getIp(),ipbuf),dgramSize);
g_errno = EBADREPLY;
return -1;
}
// get the response code (lower 4 bits of byte #3)
int32_t rcode = dgram[3] & 0x0f;
// . return -1 if response code indicates an error
// . set g_errno as well
switch ( rcode ) {
case 0: break; // valid
case 1: log(LOG_DEBUG,"dns: Nameserver (%s) returned request format error.",
iptoa(slot->getIp(),ipbuf));
g_errno = EBADREQUEST;
return -1;
case 2: //log("dns::gotIp: dns server failure" );
// log("dns: Nameserver (%s) could not handle query. "
// "Should you add \"recursive-clients 2000\" in "
// "\"options { ... };\" bracket of "
// "/etc/bind/named.conf?");
// we have to try another dns if we get this message!
log(LOG_DEBUG,"dns: Nameserver (%s) returned SERVFAIL.",
iptoa(slot->getIp(),ipbuf));
//g_errno = ETRYAGAIN;
//break;
return -1;
// name does not exist
case 3: log(LOG_DEBUG,
"dns::gotIp: name does not exist, returning ip of 0"
"for '%s'", ds->m_hostname);
// cache a not-found ip entry for this hostname
addToCache ( ds->m_hostnameKey , 0 );
return 0;
case 4: log(LOG_DEBUG,"dns: Nameserver (%s) does not support query.",
iptoa(slot->getIp(),ipbuf));
g_errno = EBADREQUEST;
return -1;
case 5: log(LOG_DEBUG,"dns: Nameserver (%s) refused request.",
iptoa(slot->getIp(),ipbuf));
// www.fsis.usda.gov will error here if recursion bit is set
// so restart from the beginning with it turned off
if ( ds->m_recursionDesired ) {
log(LOG_DEBUG,"dns: Turning off recursion "
"desired bit and retrying.");
ds->m_recursionDesired = false;
// retry the same ips of dns'es again
ds->m_numTried = 0;
g_errno = ETRYAGAIN;
return -1;
}
g_errno = EDNSREFUSED;
return -1;
default: log(LOG_INFO,"dns: Nameserver (%s) returned unknown "
"return code = %" PRId32".",
iptoa(slot->getIp(),ipbuf), rcode );
g_errno = EBADREPLY;
return -1;
}
// does this name server support recursion?
bool supportRecursion = dgram[3] & 0x80;
// now if it said name does not exist but it did NOT set the
// recursion is available flag, that's bad
if ( ! supportRecursion && rcode == 3 ) {
g_errno = EDNSBAD;
char ipbuf[16];
log(LOG_INFO,"dns: Nameserver (%s) will not not recurse.",
iptoa(slot->getIp(),ipbuf));
return -1;
}
// otherwise if rcode is 3 then the name really does not exist so ret 0
if ( rcode == 3 ) return 0;
// how many query & answer records in the record heap?
qdcount = ntohs (*(int16_t *)(dgram + 4 )) ;
// . we better have 1 query record
// . return -1 on error
if ( qdcount != 1 ) {
g_errno = EBADREPLY;
char ipbuf[16];
log (LOG_INFO,"dns: Nameserver (%s) returned query count of %" PRId32" (not 1).",
iptoa(slot->getIp(),ipbuf), (int32_t)qdcount);
return -1;
}
// . now we should have our answer here
// . if no answer/resource records then that's an error
// . however, if nscount is positive that is a referral... and
// we need to ask those servers. and the additional section
// will have the ip addresses of those servers sometimes. that is
// why we need to check that section first for ips.
int16_t ancount = ntohs (*(int16_t *)(dgram + 6 )) ; // answer
int16_t nscount = ntohs (*(int16_t *)(dgram + 8 )) ; // authority
int16_t arcount = ntohs (*(int16_t *)(dgram + 10 )); // additional
if ( ancount < 0 ) {
g_errno = EBADREPLY;
char ipbuf[16];
log (LOG_WARN, "dns: Nameserver (%s) returned a negative answer count of %" PRId32".",
iptoa(slot->getIp(),ipbuf), (int32_t)ancount);
return -1;
}
// if it does not exist and no CNAME in the answer, bail now
if ( rcode == 2 && ancount == 0 ) {
addToCache ( ds->m_hostnameKey , 0 );
g_errno = EDNSDEAD;
return 0;
}
// treat a 0 answer count like an rcode of 3, a non-existent domain
//if ( ancount == 0 ) {
// log ("dns: Nameserver (%s) returned a 0 answer count. "
// "Assuming domain name does not exist.",
// iptoa ( slot->m_ip ) );
// addToCache ( ds->m_hostnameKey , 0 );
// return 0;
//}
// point to the end of our reply dgram
// char *end = dgram + dgramSize;
// . get hostname this is the ip for
// . dgram+12 points to the hostname in label format
//char hostname[256];
//if ( ! extractHostname ( dgram , dgram + 12 , hostname ) ) return -1;
// . now we should point to to the meat of the resource record
// . we should have "ancount" resource records
// . here's the format of one resource record (length is 12 bytes)
// . u_int16_t rr_type; /* RR type code (e.g. A, MX, NS, etc.) */
// . u_int16_t rr_class; /* RR class code (IN for Internet) */
// . u_int32_t rr_ttl; /* Time-to-live for resource */
// . u_int16_t rr_rdlength; /* length of RDATA field (in octets) */
// . u_int16_t rr_rdata; /* (fieldname used as a ptr) */
// . here's SOME values for rr_type field
// . A 1 IP Address (32-bit IP version 4)
// . NS 2 Name server QNAME (for referrals & recursive queries)
// . CNAME 5 Canonical name of an alias (in QNAME format)
// . SOA 6 Start of Zone Transfer (see definition below)
// . WKS 11 Well-known services (see definition below)
// . PTR 12 QNAME pointing to other nodes (e.g. in inverse lookups)
// . HINFO 13 Host Information (CPU string, then OS string)
// . MX 15 Mail server preference and QNAME (see below)
// . here's values for the rr_class field
// . "<invalid>",
// . IN 1 Internet - used for most queries!
// . CS 2 CSNET <obsolete>
// . CH 3 CHAOS Net
// . HS 4 Hesiod (mit?)
// . point to heap of resource records (after the query record)
// . TODO: what is this 2 byte thing?
char *end = dgram + slot->m_readBufSize ;
char *rr = dgram ;
// . skip rr over first canonical name which actually, may not be
// the name we sent!!!
// . for instance, www.montigny78.fr first name is actually
// www.mairie-montigny78.fr ... strange ...
// . pages.plaza-antique-mall.com has a different name too
// . first point to start of canonical name
rr += 12;
// watch out for corruption
if ( rr >= end ) {
addToCache ( ds->m_hostnameKey , -1 );
g_errno = EBADREPLY;
char ipbuf[16];
log(LOG_INFO,"dns: Nameserver (%s) returned a corrupt reply [0] for %s.",
iptoa(slot->getIp(),ipbuf),ds->m_hostname);
return -1;
}
// jump over each label
unsigned char len = *rr;
while ( len > 0 ) {
// point to length byte of next label
rr += len + 1;
// break if we exceed the end
if ( rr >= end ) break;
// get length of next label
len = *rr;
}
// skip over last byte, it's contents should be 0
rr++;
// skip 4 bytes after... for what?
rr += 4;
// watch out for corruption
if ( rr >= end ) {
addToCache ( ds->m_hostnameKey , -1 );
g_errno = EBADREPLY;
char ipbuf[16];
log(LOG_INFO,"dns: Nameserver (%s) returned a corrupt reply [1] for %s.",
iptoa(slot->getIp(),ipbuf),ds->m_hostname);
return -1;
}
// . store the ips of the nameservers we have to ask into "ips"
// . throw these ips into the next depth level, though
int32_t *ips = NULL;
int32_t numIps = 0;
// but may be too deep, make sure we can increase the depth by 1
if ( ds->m_depth + 1 < MAX_DEPTH ) {
ips = ds->m_dnsIps [ds->m_depth+1];
ds->m_rootTLD [ ds->m_depth+1 ] = false;
ds->m_fallbacks [ ds->m_depth+1 ] = 0;
ds->m_numDnsIps [ ds->m_depth+1 ] = 0;
ds->m_numDnsNames [ ds->m_depth+1 ] = 0;
}
// ask for MX record? used by Emailer in Facebook.cpp.
bool getmx = false;
if ( strncmp(ds->m_hostname,"gbmxrec-",8) == 0 )
getmx = true;
// scan ALL answer records for ips and select the minimum
uint32_t minIp = 0;
int32_t minttl = 15;
bool gotIp = false;
//while ( ancount-- ) {
int32_t maxi = ancount;
// seems like mx records themselves are type 15 and are only
// text, the answer sections has the ips
if ( getmx ) maxi = ancount + nscount + arcount;
if ( maxi == 0 ) maxi = nscount + arcount;
TLDIPEntry tldip;
tldip.numTLDIPs = 0;
int64_t answerHash64 = 0LL;
for ( int32_t i = 0 ; i < maxi; i++ ) {
// well, this is the name of the record
// kinda, so this is how we will map
// an MX resource record to its A record
// in the answer section
const char *s = getRRName ( rr , dgram, end );
//log("dns: got rr name: %s",s);
int64_t rrHash64 = 0LL;
if ( s ) rrHash64 = hash64n ( s );
// . now a domain name should follow
// . but if the next byte has hi bit set then it means its
// a 2 byte pointer to another domain label
// . if hi bit is clear the label follows here
// . both bits must be on for message compression
while ( rr < end ) {
// parse out the label
if ( ! *rr ) {
rr += 1;
break;
}
else if ( (*rr & 0xc0) != 0xc0 )
rr += (u_char)(*rr)+1;
// skip over the compressed thingy
else {
rr += 2;
break;
}
}
// need to have room for the rdata
if ( rr + 10 >= end ) {
addToCache ( ds->m_hostnameKey , -1 );
g_errno = EBADREPLY;
char ipbuf[16];
log(LOG_INFO,"dns: Nameserver (%s) returned a corrupt reply [2] for %s.",
iptoa(slot->getIp(),ipbuf),ds->m_hostname);
return -1;
}
// the type (A=1,CNAME=5,...)
int16_t rrtype = ntohs ( *(int16_t *)rr);
rr += 2;
// skip the rr class(2)
rr += 2;
// skip the ttl(4)
int32_t ttl = ntohl(*(int32_t *)rr);
rr += 4;
// get the length of the rr data (sometimes ip)
int16_t rlen = ntohs ( *(int16_t *)rr);
// skip to the actual resource data
rr += 2;
// ask for MX record? used by Emailer in Facebook.cpp.
if ( getmx && rrtype == 15 && rlen == 4 ) goto extractIp;
// a match by hash?
if ( rrtype == 1 && rlen == 4 && answerHash64 &&
answerHash64 == rrHash64 )
goto extractIp;
// get the ip if we have an A record
if ( rrtype == 1 && rlen == 4 && ! answerHash64 )
goto extractIp;
// watch our for negative rlens
if ( rlen < 0 ) {
g_errno = EBADREPLY;
char ipbuf[16];
log(LOG_INFO,"dns: Nameserver (%s) returned a negative resource len.", iptoa(slot->getIp(),ipbuf) );
return -1;
}
// . TODO: rewrite this messy section of code nice and neat.
// . do we have a hostname to ask for this ip?
// . ns records always come after answers records, which we
// have none of if this is true, and they always come before
// "additional records" (arcount). Usually the additional
// records just contain the ip addresses of the ns records.
// but when they do not we may have to get the ips of the
// hostnames of these dns servers, so store the names into
// ds->m_nameBuf and have ds->m_dnsNames point into
// ds->m_nameBuf.
if ( //! getmx &&
// if we are in the NS section we have a referring name
// server, most likely without an IP since arcount<nscount
// so store that name into the name buffer
((ancount == 0 && i < nscount && arcount < nscount &&
rrtype==2) ||
// OR, if we are in the answer section and we do not
// have an IP yet, we may just have an alias we have
// to use as the hostname
(!gotIp && i < ancount && (rrtype==5||rrtype==15)))
&&
ds->m_depth + 1 < MAX_DEPTH &&
ds->m_numDnsNames[ds->m_depth+1] < MAX_DNS_IPS &&
ds->m_nameBufPtr + rlen + 1 < ds->m_nameBufEnd ) {
//log("got rr we want");
char *dst = ds->m_nameBufPtr;
// store into our buffer
char *p = rr;
char *pend = p + rlen;
// for mx records, i think we got a int16_t mx #
if ( rrtype == 15 ) {
p += 2;
}
while ( *p && p < pend ) {
// is compression bit set?
while ( ((( uint8_t)(*p)) & 0xc0) == 0xc0 ) {
// we are jumping within the dgram
uint16_t offset = ntohs(*(int16_t *)p)&0x3fff;
if(offset>(end-dgram))
break;
p = dgram + offset;
// bust out of while loop if we should
if ( !*p || p>=pend )
break;
}
// watch out for corruption
if ( dst + *p + 1 > ds->m_nameBufEnd ) {
addToCache ( ds->m_hostnameKey , -1);
g_errno = EBADREPLY;
char ipbuf[16];
log(LOG_INFO,
"dns: Nameserver (%s) returned"
" a corrupt reply [3] for %s.",
iptoa(slot->getIp(),ipbuf),
ds->m_hostname);
return -1;
}
// watch out for corruption
if ( *p < 0 ) {
addToCache ( ds->m_hostnameKey , -1);
g_errno = EBADREPLY;
char ipbuf[16];
log(LOG_INFO,
"dns: Nameserver (%s) returned"
" a corrupt reply [4] for %s.",
iptoa(slot->getIp(),ipbuf),
ds->m_hostname);
return -1;
}
// copy the hostname
memcpy ( dst , p+1 , *p );
dst += *p;
*dst++ = '.';
p += ((u_char)*p) + 1;
}
// last '.' should be a 0 really
dst[-1] = '\0';
// make ptr to it
// if it was a NS name, do that
if ( rrtype == 2 ) {
log(LOG_DEBUG,
"dns: Added name [0-%" PRId32"] %s to "
"depth %" PRId32" for %s.",
ds->m_numDnsNames[ds->m_depth+1],
ds->m_nameBufPtr,
(int32_t)ds->m_depth+1,
ds->m_hostname);
ds->m_dnsNames[ds->m_depth+1]
[ds->m_numDnsNames[ds->m_depth+1]++] =
ds->m_nameBufPtr;
}
// otherwise, a new hostname
else {
// note it
if ( getmx || g_conf.m_logDebugDns )
logf(LOG_DEBUG,"dns: Got CNAME "
"alias %s for %s",
ds->m_nameBufPtr, ds->m_hostname);
// . get hash of it
// . then if we are scanning the additional
// resource records that are A records
// and they match this hash, use that ip!
answerHash64 = hash64n ( ds->m_nameBufPtr );
// reset the tries, because we can retry the
// ips of the nameservers again because now
// we have a different hostname. otherwise,
// we will fail looking up www.astronomy.org.nz
ds->m_numTried = 0;
// hostname changes now
int32_t len = strlen(ds->m_nameBufPtr);
if ( len > 127 ) {
/*
this spams the log!
log(LOG_INFO,
"dns: Aliased hostname %s is %" PRId32" > "
"127 chars long.",ds->m_nameBufPtr,len);
*/
g_errno = EBUFTOOSMALL;
return -1;
}
strncpy ( ds->m_hostname ,
ds->m_nameBufPtr,
len );
ds->m_hostname[len] = 0;
// so we have to start over...
int32_t d = ds->m_depth+1;
const TLDIPEntry *cached = getTLDIP(ds);
if ( g_conf.m_askRootNameservers && cached) {
gbmemcpy( ds->m_dnsIps[d],
cached->TLDIP,
cached->numTLDIPs *
sizeof(uint32_t));
ds->m_numDnsIps[d] = cached->numTLDIPs;
ds->m_numDnsNames[d] = 0;
ds->m_rootTLD[d] = true;
ds->m_fallbacks[d] = 0;
numIps = cached->numTLDIPs;
}
else
if ( g_conf.m_askRootNameservers ) {
gbmemcpy ( ds->m_dnsIps[d],
g_conf.m_rnsIps,
g_conf.m_numRns * 4);
ds->m_numDnsIps[d] = g_conf.m_numRns;
ds->m_numDnsNames[d] = 0;
ds->m_rootTLD[d] = true;
ds->m_fallbacks[d] = 0;
numIps = g_conf.m_numRns;
}
// otherwise, use the local bind9 servers
else {
gbmemcpy ( ds->m_dnsIps[d],
g_conf.m_dnsIps,
g_conf.m_numDns * 4);
ds->m_numDnsIps[d] = g_conf.m_numDns;
ds->m_numDnsNames[d] = 0;
ds->m_rootTLD[d] = false;
ds->m_fallbacks[d] = 0;
numIps = g_conf.m_numDns;
}
}
// update our current name buf pointer
ds->m_nameBufPtr = (char *)dst;
}
// . if record is not type 5 (CNAME) then bitch
// . we can be in an authority/additional section now
// so comment this out
//if ( rrtype != 5 ) {
// g_errno = EBADREPLY;
// log("dns: Nameserver (%s) returned "
// "a bad rr type of %" PRId32".", iptoa(slot->m_ip),
// (int32_t)rrtype);
// return -1;
//}
//HEY, see rfc1034... gota redo with this as the name i think
// TODO: use the canonical name as a normalization!!
// skip resource data
rr += rlen;
continue;
extractIp:
// . now "s" should pt to the resource data, hopefully the ip
// . add another ip to our array and inc numIps
// . ips should be in network order
uint32_t ip ; gbmemcpy ( (char *)&ip , rr , 4 );
// TODO: detect bogus responses such as those from Verisign's SiteFinder "service" where unknown domains resolved to a fixed IP-address.
// if ... something ...
// addToCache ( ds->m_hostnameKey , 0 );
// return 0;
// debug msg
//fprintf(stderr,".... got ip=%s for %s\n",iptoa(ip),hostname);
// get the smallest ip (or largest??? what is the byte order?)
if ( ip < minIp || ! gotIp ) {
minIp = ip;
minttl = ttl;
}
// we got one now
gotIp = true;
// . add to list of ips of nameservers to ask
// . must not be in the answer section (i>=ancount)
if ( ips && numIps + 1 < MAX_DNS_IPS ) {
if (isTimedOut(ip)) {
char ipbuf[16];
log(LOG_DEBUG, "dns: Not adding [0] ip %s - timed out",
iptoa(ip,ipbuf));
}
else {
ips[numIps] = ip;
char ipbuf[16];
log(LOG_DEBUG,
"dns: Added ip [0-%" PRId32"] %s to depth %" PRId32" "
"for %s.",
numIps, iptoa(ip,ipbuf),(int32_t)ds->m_depth+1,
ds->m_hostname);
numIps++;
}
// build TLDIP entry, regardless of timed-out status
if ( g_conf.m_askRootNameservers && ds->m_depth == 0 &&
tldip.numTLDIPs < MAX_DNS_IPS ) {
tldip.TLDIP[tldip.numTLDIPs++] = ip;
}
}
// skip resource data and continue
rr += rlen;
}
//log(LOG_DEBUG, "dns: gotIp depth %d done", ds->m_depth);
#ifdef DNS_TLD_CACHE
if ( g_conf.m_askRootNameservers && ds->m_depth == 0 && g_errno == 0 ) {
setTLDIP(ds, &tldip);
ds->m_rootTLD[1] = true;
}
#endif
//if ( ! gotIp && ancount > 0 ) {
// log("got answer with no ip!");
//}
// debug msg
//fprintf(stderr,".... using minip=%s for %s\n",iptoa(minIp),hostname);
// if we did have an answer section, assume minIp is the answer
if ( ancount > 0 && gotIp ) {
char ipbuf[16];
log(LOG_DEBUG, "dns: got ip %s for '%s'",
iptoa(minIp,ipbuf), ds->m_hostname);
// . get the key of the hostname
addToCache ( ds->m_hostnameKey , minIp , minttl );
// return the minimum ip
return minIp;
}
//
// begin the new code.
//
// if we did not have an ip address in our reply we probably got a
// list of nameservers we have to ask for the ip address. so our
// caller should call sendToNextDNS() after we return.
//
// bail if already went too deep
if ( ! ips ) {
g_errno = EBADREPLY;
log(LOG_INFO,"dns: Exceeded max recursion depth of %" PRId32" for %s",
(int32_t)MAX_DEPTH,ds->m_hostname);
return -1;
}
// if we did not get more nameserver ips to ask, this was a dead end
//if ( numIps <= 0 ) {
// //g_errno = EBADREPLY;
// log("dns: Got reply with no ip or nameserver ips.");
// //return -1;
//}
// ok, increase the depth since we got some more nameserver ips to ask
ds->m_depth++;
// set up ds2
ds->m_numDnsIps[ds->m_depth] = numIps;
log(LOG_DEBUG,
"dns: depth +%" PRId32" rootTLD %d #fb %" PRId32" #ip %" PRId32" #name %" PRId32,
ds->m_depth,
ds->m_rootTLD[ds->m_depth],
ds->m_fallbacks[ds->m_depth],
ds->m_numDnsIps[ds->m_depth],
ds->m_numDnsNames[ds->m_depth]);
// initialize next depth level
int32_t d1 = ds->m_depth + 1;
if( d1 < MAX_DEPTH ) {
ds->m_rootTLD[d1] = false;
ds->m_fallbacks[d1] = 0;
ds->m_numDnsIps[d1] = 0;
ds->m_numDnsNames[d1] = 0;
}
// set g_errno so gotIp() will call sendToNextDNS(ds)
log(LOG_DEBUG, "dns: gotIp returning ETRYAGAIN");
g_errno = ETRYAGAIN;
return -1;
}
bool Dns::isInCache ( key96_t key , int32_t *ip ) {
// debug msg
//log("dns::isInCache: checking");
// . returns 0 if not in cache
// . returns -1 if a cached not-found
// . otherwise returns ip of "hostname"
// . any cache entry over 24hrs is stale
int32_t maxAge = DNS_CACHE_MAX_AGE; // 60*60*24;
// . look up the ip in the cache
// . TODO: can you pass a NULL ptr for the dataSize?
// . TODO: is it ok to leave the caller hanging??
// . the callback, gotIp(), can be NULL if we're just updating times
// . TODO: ensure list owns the data
// if not found, return false;
char *rec;
int32_t recSize;
// return false if not in cache
RdbCacheLock rcl(m_rdbCache);
if ( ! m_rdbCache.getRecord ( (collnum_t)0 ,
key ,
&rec ,
&recSize ,
false , // do copy?
maxAge ,
true )) // inc count?
return false;
// recSize must be 4 -- sanity check
if ( recSize != 4 ) {
log( LOG_WARN, "dns: Got bad record of size %" PRId32" from cache.", recSize);
return false;
}
// the data ptr itself is the ip
*ip = *(int32_t *)rec ;
// return true since we found it
return true;
}
// ttl is in seconds
void Dns::addToCache ( key96_t hostnameKey , int32_t ip , int32_t ttl ) {
// debug msg
char ipbuf[16];
log(LOG_DEBUG, "dns: addToCache added key %" PRIu64" ip %s ttl %" PRId32" to cache",
hostnameKey.n0, iptoa(ip,ipbuf), ttl);
// set timestamp to be maxTime (24 hours = 8640 sec) - ttl
int32_t timestamp;
// watch out for crazy ttls, bigger than 2 days
if ( ttl > 60*60*24*2 ) ttl = 60*60*24*2;
// if ttl is less than how long we trust the cached ip for, reduce
// the timestamp to fool Dns::isInCache()
if ( ttl > 0 ) timestamp = getTime() - DNS_CACHE_MAX_AGE + ttl;
else timestamp = getTime();
// . select the cache, we should add it to
// . generally, local cache is used to save over-the-network lookups
// and is smaller than the distributed cache, m_rdbCache
RdbCache *c ;
// if ( getResponsibleHost(hostnameKey) != g_hostdb.m_myHost )
// c = &m_rdbCacheLocal;
// else
c = &m_rdbCache;
// just add a record to the cache
RdbCacheLock rcl(*c);
c->addRecord((collnum_t)0,hostnameKey,(char *)&ip,4,
timestamp);//rec size
// reset g_errno in case it had an error (we don't care)
g_errno = 0;
}
// . s pts to the hostname in the len/label pair format
// . hostname will be filled with the hostname
bool Dns::extractHostname ( const char *dgram,
const char *record,
char *hostname ) {
hostname[0] = '\0';
int32_t i = 0;
const char *end = dgram + DGRAM_SIZE;
while ( *record ) {
int16_t len = (u_char)(*record);
const char *src = record + 1;
int32_t times = 0;
// if 2 hi bits on "len" are set it's a label offset
while ( (len & 0xc0) && times++ < 5 ) {
int16_t offset = (len & 0x3f) << 8 | src[0];
if ( dgram + offset >= end ) {
g_errno = EBADREPLY; return false; }
len = (u_char)(*(dgram + offset));
src = dgram + offset + 1;
}
// breech check
if ( times >= 5 ) { g_errno = EBADREPLY; return false; }
if ( i + len >= 255 ) { g_errno = EBADREPLY; return false; }
if ( src + len >= end ) { g_errno = EBADREPLY; return false; }
// copy to hostname
gbmemcpy ( &hostname[i] , record + 1 , len );
// if we had a ptr then just add 2
if ( times > 0 ) record += 2;
else record += 1 + len;
// advance the hostname
i += len;
// there should be SOME room left over (enough for . and \0)
if ( i >= 254 ) { g_errno = EBADREPLY; return false; }
// add a .
hostname [ i++ ] = '.';
}
// cover up that dot
if ( i < 0 ) { g_errno = EBADREPLY; return false; }
hostname [ i - 1 ] = '\0';
return true;
}
// TODO: hostname key collision is possible, so watch out.
// should we also store ptr to the whole hostname in hash table?
bool Dns::isInFile ( key96_t key , int32_t *ip ) {
// flush and reload our /etc/hosts hash table every minute
static int32_t s_lastTime = 0;
int32_t now = getTime();
if ( now - 60 >= s_lastTime ) {
s_lastTime = now;
loadFile ();
// ignore any error from this
g_errno = 0;
}
// bail if no hash table
if ( m_numSlots <= 0 ) return false;
// check hash table
int32_t n = key.n1 % m_numSlots;
while ( m_ips[n] && m_keys[n] != key )
if ( ++n >= m_numSlots ) n = 0;
// return false if not found in our hash table (/etc/hosts file)
if ( ! m_ips[n] ) return false;
// otherwise we got it
*ip = m_ips[n];
return true;
}
// returns false and sets g_errno on error
bool Dns::loadFile ( ) {
File f;
f.set ("/etc/","hosts");
int32_t fsize = f.getFileSize();
if ( fsize < 0 ) {
log( LOG_WARN, "dns: Getting file size of /etc/hosts : %s.", mstrerror(g_errno) );
return false;
}
// add 1 so we can NULL terminate
int32_t bufSize = fsize + 1;
// make mem
char *buf = (char *)mmalloc ( bufSize , "Dns" );
if ( ! buf ) {
log( LOG_WARN, "dns: Could not read /etc/hosts : %s.", mstrerror(g_errno));
return false;
}
// pre-open the file
f.open ( O_RDONLY );
// read it all in
if ( f.read ( buf , fsize , 0 ) < 0 ) {
mfree ( buf , bufSize , "Dns" );
f.close();
log( LOG_WARN, "dns: Could not read /etc/hosts : %s.", mstrerror(g_errno));
return false;
}
// NULL terminate it
buf [ fsize ] = '\0';
// free hash table
mfree ( m_ips , m_numSlots*4 , "Dns");
mfree ( m_keys , m_numSlots*sizeof(key96_t), "Dns");
m_ips = NULL;
m_keys = NULL;
m_numSlots = 0;
// count # of lines in buf as upper bound for number of entries
char *p = buf;
int32_t count = 0;
for ( ; *p ; p++ ) if ( *p == '\n' ) count++;
// alloc the hash table
m_numSlots = count * 2;
m_ips = (int32_t *) mmalloc ( 4 * m_numSlots , "Dns" );
m_keys = (key96_t *) mmalloc ( sizeof(key96_t) * m_numSlots , "Dns" );
if ( ! m_ips || ! m_keys ) {
if ( m_ips ) mfree ( m_ips , m_numSlots*4 , "Dns");
if ( m_keys) mfree ( m_keys , m_numSlots*sizeof(key96_t), "Dns");
m_numSlots = 0;
mfree ( buf , bufSize , "Dns" );
f.close();
log( LOG_WARN, "dns: Read /etc/hosts : %s.", mstrerror(g_errno));
return false;
}
// clear hash table
memset ( m_ips , 0 , 4 * m_numSlots );
// declare vars here
char *e;
int32_t ip;
key96_t key;
int32_t n;
// point to first line
p = buf;
loop:
// skip comments
if ( *p == '#' ) goto skipline;
// skip spaces
while ( isspace ( *p ) ) p++;
// if this is not a digit, continue
if ( ! isdigit(*p) ) goto skipline;
// find the end, a space
e = p;
while ( *e && ! isspace(*e) ) e++;
// get ip, will stop at first space
ip = atoip ( p , (int32_t)(e - p) );
// get the hostname
p = e;
// skip spaces after ip
while ( isspace ( *p ) ) p++;
// this should be the hostname, starting with an alnum
if ( ! isalnum (*p) ) goto skipline;
// get the end of the hostname
e = p;
while ( isalnum(*e) || *e=='.' || *e=='-' ) e++;
// get the hash of the hostname
key = hash96 ( p , e - p );
// advance p
p = e;
// store in hash table
n = key.n1 % m_numSlots;
while ( m_ips[n] && m_keys[n] != key )
if ( ++n >= m_numSlots ) n = 0;
// if already in there skip it
if ( m_ips[n] ) goto skipline;
// otherwise add it
m_ips [n] = ip;
m_keys[n] = key;
skipline:
while ( *p && *p != '\n' ) p++;
while ( *p == '\n' ) p++;
if ( *p ) goto loop;
// now all the hostname/ip pairs are hashed
mfree ( buf , bufSize , "Dns" );
f.close();
return true;
}
key96_t Dns::getKey ( const char *hostname, int32_t hostnameLen ) {
// use the domain name name. so *.blogspot.com does not flood their dns
return hash96 ( hostname , hostnameLen );
//int32_t dlen = 0;
//char *dom = getDomFastFromHostname ( hostname , &dlen );
//if ( ! dom || dlen <=2 ) return hash96 ( hostname , hostnameLen );
//return hash96 ( dom , dlen );
}
// . MsgC uses this to see which host is responsible for this key
// which is just a hash96() of the hostname (see getKey() above)
// . returns -1 if not host available to send request to
Host *Dns::getIPLookupHost(key96_t key) {
logTrace( g_conf.m_logTraceDns, "BEGIN" );
// get the hostNum that should handle this
int32_t hostId = key.n1 % g_hostdb.getNumHosts();
logTrace( g_conf.m_logTraceDns, "numHosts: %" PRIu32, g_hostdb.getNumHosts() );
logTrace( g_conf.m_logTraceDns, "key.n1: %" PRIu32, key.n1 );
logTrace( g_conf.m_logTraceDns, "hostId: %" PRIu32, hostId );
// return it if it is alive
Host* h = g_hostdb.getHost ( hostId );
if ( h->m_spiderEnabled && ! g_hostdb.isDead ( hostId ) ) {
logTrace( g_conf.m_logTraceDns, "END. Spidering enabled and not dead. Returning." );
return h;
}
// how many are up?
int32_t numAlive = g_hostdb.getNumHostsAlive();
logTrace( g_conf.m_logTraceDns, "Above is dead. numAlive: %" PRIu32, numAlive );
// NULL if none
if ( numAlive == 0 ) {
logTrace( g_conf.m_logTraceDns, "END. None alive. return NULL" );
g_errno = EHOSTDEAD;
return NULL;
}
// try another hostNum
int32_t hostNum = key.n1 % numAlive;
logTrace( g_conf.m_logTraceDns, "hostNum: %" PRIu32, hostNum );
// otherwise, chain to him
int32_t count = 0;
for ( int32_t i = 0 ; i < g_hostdb.getNumHosts() ; i++ ) {
// get the ith host
Host *host = &g_hostdb.m_hosts[i];
if ( !host->m_spiderEnabled ) {
logTrace( g_conf.m_logTraceDns, "i: %" PRId32" - spidering disabled", i );
continue;
}
// skip him if he is dead
if ( g_hostdb.isDead ( host ) ) {
logTrace( g_conf.m_logTraceDns, "i: %" PRId32" - dead", i );
continue;
}
// count it if alive, continue if not our number
if ( count++ != hostNum ) {
logTrace( g_conf.m_logTraceDns, "i: %" PRId32" - not our host (% " PRId32")", i, hostNum );
continue;
}
// we got a match, we cannot use hostNum as the hostId now
// because the host with that hostId might be dead
logTrace( g_conf.m_logTraceDns, "END. i: %" PRId32" - Match!", i );
return host;
}
logTrace( g_conf.m_logTraceDns, "END. Return EHOSTDEAD. None found" );
g_errno = EHOSTDEAD;
return NULL;
}