squid proxy fixes

This commit is contained in:
mwells
2014-06-09 16:10:24 -07:00
parent 5bf3042633
commit 29e90d1d55
4 changed files with 82 additions and 6 deletions

@ -559,6 +559,19 @@ void resetHttpMime ( ) {
s_mimeTable.reset();
}
const char *HttpMime::getContentTypeFromExtension ( char *ext , long elen) {
// assume text/html if no extension provided
if ( ! ext || ! ext[0] ) return "text/html";
if ( elen <= 0 ) return "text/html";
// get hash for table look up
long key = hash32 ( ext , elen );
char *ptr = (char *)s_mimeTable.getValue ( key );
if ( ptr ) return ptr;
// if not found in table, assume text/html
return "text/html";
}
// . list of types is on: http://www.duke.edu/websrv/file-extensions.html
// . i copied it to the bottom of this file though
const char *HttpMime::getContentTypeFromExtension ( char *ext ) {

@ -136,6 +136,7 @@ class HttpMime {
// convert a file extension like "gif" to "images/gif"
const char *getContentTypeFromExtension ( char *ext ) ;
const char *getContentTypeFromExtension ( char *ext , long elen ) ;
// used for bz2, gz files
const char *getContentEncodingFromExtension ( char *ext ) ;

@ -3076,6 +3076,20 @@ static void gotSquidProxiedContent ( void *state ) ;
void gotSquidProxiedUrlIp ( void *state , long ip ) {
SquidState *sqs = (SquidState *)state;
// send the exact request. hide in the url buf i guess.
TcpSocket *sock = sqs->m_sock;
// if ip lookup failed... return
if ( ip == 0 || ip == -1 ) {
mdelete ( sqs, sizeof(SquidState), "sqs");
delete (sqs);
// what is ip lookup failure for proxy?
g_httpServer.sendErrorReply(sock,404,"Not Found (via Proxy)");
return;
}
// pick the host to send the msg13 to now based on the ip
Msg13Request *r = &sqs->m_request;
@ -3087,9 +3101,6 @@ void gotSquidProxiedUrlIp ( void *state , long ip ) {
// let msg13 know to just send the request in m_url
r->m_isSquidProxiedUrl = true;
// send the exact request. hide in the url buf i guess.
//TcpSocket *sock = sqs->m_sock;
char *proxiedReqBuf = r->m_url;
// store into there
@ -3132,6 +3143,9 @@ void gotSquidProxiedUrlIp ( void *state , long ip ) {
r->m_compressReply = false;
r->m_isCustomCrawl = 0;
// log for now
log("proxy: getting proxied content for req=%s",r->m_url);
// isTestColl = false. return if blocked.
if ( ! sqs->m_msg13.getDoc ( r, false ,sqs, gotSquidProxiedContent ) )
return;
@ -3148,11 +3162,31 @@ void gotSquidProxiedContent ( void *state ) {
long replySize = sqs->m_msg13.m_replyBufSize;
long replyAllocSize = sqs->m_msg13.m_replyBufAllocSize;
TcpSocket *sock = sqs->m_sock;
// if it timed out or something...
if ( g_errno ) {
log("proxy: proxy reply had error=%s",mstrerror(g_errno));
mdelete ( sqs, sizeof(SquidState), "sqs");
delete (sqs);
// what is ip lookup failure for proxy?
g_httpServer.sendErrorReply(sock,505,"Timed Out (via Proxy)");
return;
}
// another debg log
long clen = 500;
if ( clen > replySize ) clen = replySize -1;
if ( clen < 0 ) clen = 0;
char c = reply[clen];
reply[clen]=0;
log("proxy: got proxied reply=%s",reply);
reply[clen]=c;
// don't let Msg13::reset() free it
sqs->m_msg13.m_replyBuf = NULL;
TcpSocket *sock = sqs->m_sock;
// sanity, this should be exact... since TcpServer.cpp needs that
//if ( replySize != replyAllocSize ) { char *xx=NULL;*xx=0; }

@ -520,6 +520,9 @@ void handleRequest13 ( UdpSlot *slot , long niceness ) {
// . an empty rec is a cached not found (no robot.txt file)
// . therefore it's allowed, so set *reply to 1 (true)
if ( inCache ) {
// log debug?
if ( r->m_isSquidProxiedUrl )
log("proxy: found %li bytes in cache",recSize);
// helpful for debugging. even though you may see a robots.txt
// redirect and think we are downloading that each time,
// we are not... the redirect is cached here as well.
@ -2739,6 +2742,10 @@ void fixGETorPOST ( char *squidProxiedReqBuf ) {
char *reqEnd = squidProxiedReqBuf + gbstrlen(squidProxiedReqBuf);
// include the terminating \0, so add +1
memcpy ( httpStart , s , reqEnd - s + 1 );
// now make HTTP/1.1 into HTTP/1.0
char *hs = strstr ( httpStart , "HTTP/1.1" );
if ( ! hs ) return;
hs[7] = '0';
}
// . for the page cache we hash the url and the cookie to make the cache key
@ -2763,10 +2770,28 @@ long long computeProxiedCacheKey64 ( char *squidProxiedReqBuf ) {
s += slen;
// skip till we hit end of url
// skip until / or space or \r or \n or \0
for ( ; *s && ! is_wspace_a(*s) ; s++ );
char *cgi = NULL;
for ( ; *s && ! is_wspace_a(*s) ; s++ ) {
if ( *s == '?' && ! cgi ) cgi = s; }
// hash the url
long long h64 = hash64 ( start , s - start );
//
// if file extension implies it is an image, do not hash cookie
//
char *extEnd = NULL;
if ( cgi ) extEnd = cgi;
else extEnd = s;
char *ext = extEnd;
for ( ; ext>extEnd-6 && ext>start && *ext!='.' && *ext!='/' ; ext-- );
if ( *ext == '.' && ext+1 < extEnd ) {
HttpMime mime;
const char *cts;
ext++; // skip over .
cts = mime.getContentTypeFromExtension ( ext , extEnd-ext );
if ( strncmp(cts,"image/",6) == 0 ) return h64;
}
// now for cookie
s = strstr ( squidProxiedReqBuf , "Cookie: ");
// if not there, just return url hash
@ -2777,6 +2802,9 @@ long long computeProxiedCacheKey64 ( char *squidProxiedReqBuf ) {
for ( ; *s && *s != '\r' && *s != '\n' ; s++ );
// incorporate cookie hash
h64 = hash64 ( start , s - start , h64 );
//log("debug: cookiehash=%lli",hash64(start,s-start));
return h64;
}