Merge branch 'master' of git@github.com:gigablast/open-source-search-engine

This commit is contained in:
Matt Wells
2013-10-16 14:54:02 -07:00
15 changed files with 690 additions and 173 deletions

@ -518,6 +518,7 @@ long HttpMime::getContentTypePrivate ( char *s ) {
else if (!strcasecmp(s,"image/jpeg" ) ) ct = CT_JPG;
else if (!strcasecmp(s,"image/png" ) ) ct = CT_PNG;
else if (!strcasecmp(s,"image/tiff" ) ) ct = CT_TIFF;
else if (!strncasecmp(s,"image/",6 ) ) ct = CT_IMAGE;
else if (!strcasecmp(s,"application/javascript" ) ) ct = CT_JS;
else if (!strcasecmp(s,"application/x-javascript") ) ct = CT_JS;
else if (!strcasecmp(s,"text/javascript" ) ) ct = CT_JS;

@ -36,6 +36,7 @@ time_t atotime5 ( char *s ) ;
#define CT_JS 14
#define CT_CSS 15
#define CT_JSON 16
#define CT_IMAGE 17
#define ET_IDENTITY 0
#define ET_GZIP 1

@ -459,7 +459,11 @@ unsigned char getLanguageFromUserAgent(char *abbr) {
return langUnknown;
}
// these are going to be adult, in any language
// . these are going to be adult, in any language
// . this seems only to be used by Speller.cpp when splitting up words
// in the url domain.
// . s/slen is a full word that is found in our "dictionary" so using
// phrases like biglittlestuff probably should not go here.
bool isAdult( char *s, long slen, char **loc ) {
char **p = NULL;
char *a = NULL;

@ -33,7 +33,7 @@ OBJS = Tfndb.o UdpSlot.o \
HttpMime.o Hostdb.o \
Highlight.o File.o Errno.o Entities.o \
Dns.o Dir.o Conf.o Bits.o \
Stats.o BigFile.o AdultBit.o Ads.o Msg17.o \
Stats.o BigFile.o Ads.o Msg17.o \
Speller.o DiskPageCache.o \
PingServer.o StopWords.o TopTree.o \
Parms.o Pages.o Msg28.o Msg30.o \

@ -109,11 +109,11 @@ bool sendReply ( State0 *st , char *reply ) {
STAT_QUERY );
// add to statsdb, use # of qterms as the value/qty
//g_statsdb.addStat ( 0,
// "query",
// st->m_startTime,
// nowms,
// st->m_q.m_numTerms);
g_statsdb.addStat ( 0,
"query",
st->m_startTime,
nowms,
st->m_q.m_numTerms);
// log the time
if ( st->m_took >= g_conf.m_logQueryTimeThreshold ) {
@ -1651,7 +1651,7 @@ static bool printDMOZCategoryUnderResult ( SafeBuf &sb ,
// print a link to apply your query to this DMOZ category
//
//////
sb.safePrintf("<a href=\"/search?s=0&q=gbpdcat%%3A%li",catid);
sb.safePrintf("<a href=\"/search?s=0&q=gbipcatid%%3A%li",catid);
sb.urlEncode("|",1);
sb.urlEncode(si->m_sbuf1.getBufStart(),si->m_sbuf1.length());
sb.safePrintf("\">Search in Category</a>: ");

@ -67,7 +67,8 @@ bool sendPageStatsdb ( TcpSocket *s, HttpRequest *r ) {
st->m_niceness = MAX_NICENESS;
st->m_socket = s;
st->m_request = *r;
//st->m_request = *r;
st->m_request.copy ( r );
// hostId must be one of the following:
// 0-n - a valid hostId
@ -120,7 +121,9 @@ bool sendPageStatsdb ( TcpSocket *s, HttpRequest *r ) {
st->m_endDate = st->m_endDateR;
}
//
// this is no longer a gif, but an html graph in g_statsdb.m_sb
//
if ( ! g_statsdb.makeGIF ( st->m_endDateR ,
st->m_startDateR ,
st->m_samples ,
@ -211,15 +214,28 @@ void sendReply ( void *state ) {
buf.safePrintf("<table cellpadding=10 border=0>\n");
buf.safePrintf("<tr><td>"
"<center>"
"<img src=\"/stats%li.gif\" height=%li width=%li "
"border=\"0px\">"
"</center>"
"<center>");
/////////////////////////
//
// insert the div graph here
//
/////////////////////////
buf.cat ( g_statsdb.m_gw );
// purge it
g_statsdb.m_gw.purge();
g_statsdb.m_dupTable.reset();
//"<img src=\"/stats%li.gif\" height=%li width=%li "
//"border=\"0px\">"
//st->m_hostId,
//g_statsdb.getImgHeight(),
//g_statsdb.getImgWidth());
buf.safePrintf("</center>"
//"class=\"statsdb_image\">"
"</td></tr>\n",
st->m_hostId,
g_statsdb.getImgHeight(),
g_statsdb.getImgWidth());
"</td></tr>\n");
// the map key
buf.safePrintf("<tr><td>");

@ -56,7 +56,7 @@ long g_qbufNeedSave = 0;
extern void resetPageAddUrl ( );
extern void resetHttpMime ( );
extern void reset_iana_charset ( );
extern void resetAdultBit ( );
//extern void resetAdultBit ( );
extern void resetDomains ( );
extern void resetEntities ( );
extern void resetQuery ( );
@ -1709,7 +1709,7 @@ void Process::resetAll ( ) {
resetPageAddUrl();
resetHttpMime();
reset_iana_charset();
resetAdultBit();
//resetAdultBit();
resetDomains();
resetEntities();
resetQuery();

@ -1537,7 +1537,8 @@ bool Speller::findNext( char *s, char *send, char **nextWord, bool *isPorn,
long slen = send - s;
// check if there is an adult word in there
// NOTE: The word 'adult' gives a lot of false positives, so even
// though it is in the isAdult() list, skip it
// though it is in the isAdult() list, skip it.
// s/slen constitues an individual word.
if ( isAdult ( s, slen, &loc ) && strncmp ( s, "adult", 5 ) != 0 ){
// if this string starts with the adult word, don't check
// further

@ -3129,6 +3129,8 @@ void SpiderLoop::spiderDoledUrls ( ) {
if ( m_cri >= g_collectiondb.m_numRecs ) m_cri = 0;
// get rec
cr = g_collectiondb.m_recs[m_cri];
// skip if gone
if ( ! cr ) continue;
// stop if not enabled
if ( ! cr->m_spideringEnabled ) continue;
// get the spider collection for this collnum

@ -488,7 +488,7 @@ void Stats::addSpiderPoint ( long errCode, bool isNew ) {
}
}
// draw a HORIZONTAL line in html 5 i guess
// draw a HORIZONTAL line in html
void drawLine2 ( SafeBuf &sb ,
long x1 ,
long x2 ,

@ -80,7 +80,7 @@ static Label s_labels[] = {
// . max = -1, means dynamic size the ymax!
// . use 1B for now again...
// . color=pink
{GRAPH_QUANTITY,1000000000.0,"docs_indexed", .1,"%.0fK docs" , .001 , 0x00cc0099,"docs indexed" }
{GRAPH_QUANTITY,50000000.0,"docs_indexed", .1,"%.0fK docs" , .001 , 0x00cc0099,"docs indexed" }
//{ "termlist_intersect",0x0000ff00},
@ -101,6 +101,13 @@ static Label s_labels[] = {
//{ "parm_change",0xffc0c0} // pink?
};
void drawLine3 ( SafeBuf &sb ,
long x1 ,
long x2 ,
long fy1 ,
long color ,
long width ) ;
Label *Statsdb::getLabel ( long labelHash ) {
Label **label = (Label **)m_labelTable.getValue ( &labelHash );
if ( ! label ) return NULL;
@ -116,7 +123,7 @@ bool Statsdb::init ( ) {
// 20 pixel borders
m_bx = 10;
m_by = 30;
m_by = 40;
// keep it at least at 20MB otherwise it is filling up the tree
// constantly and dumping
@ -473,6 +480,11 @@ bool Statsdb::makeGIF ( long t1Arg ,
m_sb3.reset();
m_ht3.reset();
// print graph in here as a bunch of divs now:
m_gw.purge();
m_dupTable.reset();
m_dupTable.set(4,0,20000,NULL,0,false,0,"statstbl");
// . start at t1 and get stats lists, up to 1MB of stats at a time
// . subtract 60 seconds so we can have a better shot at having
// a moving average for the last SAMPLE points
@ -491,6 +503,7 @@ bool Statsdb::makeGIF ( long t1Arg ,
return true;
// open the file for the gif
/*
char fname [ 1024 ];
sprintf ( fname , "%s/stats%li.gif" ,
g_hostdb.m_httpRootDir , g_hostdb.m_hostId );
@ -500,10 +513,13 @@ bool Statsdb::makeGIF ( long t1Arg ,
fname , mstrerror(errno) );
return true;
}
*/
return gifLoop ();
}
#define POINTWIDTH 8
#define MAX_POINTS 6000
#define MAX_WIDTH 6
#define DY 600 // pixels vertical
@ -538,9 +554,9 @@ bool Statsdb::gifLoop ( ) {
// shortcut
Msg5 *m = &m_msg5;
#ifndef _USEPLOTTER_
return true;
#endif
//#ifndef _USEPLOTTER_
//return true;
//#endif
// loop over all the lists in the time range, [m_t1,m_t2]
for ( ; ! m_done ; ) {
@ -572,53 +588,87 @@ bool Statsdb::gifLoop ( ) {
}
// define time delta - commented out because it's currently not used.
//long dt = m_t2 - m_t1;
long dt = m_t2 - m_t1;
#ifdef _USEPLOTTER_
//#ifdef _USEPLOTTER_
// gif size
char tmp[64];
//char tmp[64];
// dimensions of the gif
sprintf ( tmp , "%lix%li", (long)DX+m_bx*2 , (long)DY+m_by*2 );
GIFPlotter::parampl ( "BITMAPSIZE" , (void *)tmp );
//sprintf ( tmp , "%lix%li", (long)DX+m_bx*2 , (long)DY+m_by*2 );
//GIFPlotter::parampl ( "BITMAPSIZE" , (void *)tmp );
// create one
GIFPlotter plotter ( NULL , m_fd , NULL );
//GIFPlotter plotter ( NULL , m_fd , NULL );
// open it
plotter.openpl ( );
//plotter.openpl ( );
// define the space with boundaries 100 unit wide boundaries
//plotter.space ( -m_bx , -m_by , DX + m_bx , DY + m_by );
plotter.space ( 0 , 0 , DX + m_bx * 2 , DY + m_by * 2 );
//plotter.space ( 0 , 0 , DX + m_bx * 2 , DY + m_by * 2 );
// line thickness in user coordinates (pixels for us)
plotter.linewidth ( 1 );
//plotter.linewidth ( 1 );
// set bg color to gray (r/g/b)
plotter.bgcolor ( 0xd600 , 0xce00 , 0xd600 );
// set bg color to white (r/g/b)
//plotter.bgcolor ( 0xff00 , 0xff00 , 0xff00 );
//plotter.bgcolor ( 0xd600 , 0xce00 , 0xd600 );
// erase Plotter's graphics display
plotter.erase ();
//plotter.erase ();
// draw axises in black
plotter.pencolorname ("black");
//plotter.pencolorname ("black");
//
// main graphing window
//
m_gw.safePrintf("<div style=\"position:relative;"
"background-color:#c0c0c0;"
//"overflow-y:hidden;"
"overflow-x:hidden;"
"z-index:-10;"
// the tick marks we print below are based on it
// being a window of the last 20 seconds... and using
// DX pixels
"min-width:%lipx;"
"min-height:%lipx;"
//"width:100%%;"
//"min-height:600px;"
"margin-top:10px;"
"margin-bottom:10px;"
"margin-right:10px;"
"margin-left:10px;\">"
,(long)DX + 2 *m_bx
,(long)DY + 2*m_by);
// draw the x-axis
plotter.line ( m_bx , m_by , DX + m_bx , m_by );
// draw the y-axis
plotter.line ( m_bx , m_by , m_bx , DY + m_by);
//plotter.line ( m_bx , m_by , DX + m_bx , m_by );
// 10 x-axis tick marks
for ( int x = DX/10 + m_bx ; x < DX - m_bx ; x += DX/10 ) {
for ( int x = DX/20 ; x <= DX ; x += DX/20 ) {
// tick mark
plotter.line ( x , m_by - 15 , x , m_by + 15 );
// generate label
long xv = (long)(dt * (long long)x / (long long)DX) -(long)dt;
char buf [ 32 ];
// in seconds, so put "s" in there
sprintf ( buf , "%lis" , xv );//(float)xv / 1000.0 );
// move cursor
plotter.move ( x , m_by - m_by / 2 - 9 );
// plot label
plotter.alabel ( 'c' , 'c' , buf );
//plotter.line ( x , -20 , x , 20 );
m_gw.safePrintf("<div style=\"position:absolute;"
"left:%li;"
"bottom:0;"
"background-color:#000000;"
"z-index:110;"
"min-height:20px;"
"min-width:3px;\"></div>\n"
, m_bx + (long)x-1
);
long xv = (long)(dt * (long long)x/(long long)DX)-(long)dt;
// LABEL
m_gw.safePrintf("<div style=\"position:absolute;"
"left:%li;"
"bottom:20;"
//"background-color:#000000;"
"z-index:110;"
"min-height:20px;"
"min-width:3px;\">%lis</div>\n"
, (long)x-10 + m_bx
// the label:
, xv
);
}
HashTableX tmpht;
tmpht.set(4,0,0,NULL,0,false,m_niceness,"statsparms");
@ -647,7 +697,7 @@ bool Statsdb::gifLoop ( ) {
// . graph this single graph of this color
// . returns ptr to first point of different color!
plotGraph ( p , pend , gh , &plotter , zoff );
plotGraph ( p , pend , gh , m_gw , zoff );
// prevent collisions
zoff += 20;
@ -705,7 +755,7 @@ bool Statsdb::gifLoop ( ) {
}
// set the line width
plotter.linewidth ( pp->m_thickness );
//plotter.linewidth ( pp->m_thickness );
// get parm hash
long colorHash = pp->m_parmHash;
@ -716,9 +766,9 @@ bool Statsdb::gifLoop ( ) {
// . is really the parm hash in disguise
long c1 = colorHash & 0x00ffffff;
// use the color specified from addStat_r() for this line/pt
plotter.pencolor ( ((c1 >> 16) & 0xff) << 8 ,
((c1 >> 8) & 0xff) << 8 ,
((c1 >> 0) & 0xff) << 8 );
//plotter.pencolor ( ((c1 >> 16) & 0xff) << 8 ,
// ((c1 >> 8) & 0xff) << 8 ,
// ((c1 >> 0) & 0xff) << 8 );
long x1 = pp->m_a;
long x2 = pp->m_b;
@ -727,9 +777,10 @@ bool Statsdb::gifLoop ( ) {
if ( x2 < x1 + 10 ) x2 = x1 + 10;
// . flip the y so we don't have to scroll the browser down
// . DY does not include the axis and tick marks
long fy1 = DY - y1 + m_by ;
//long fy1 = DY - y1 + m_by ;
// plot it
plotter.line ( x1 , fy1 , x2 , fy1 );
//plotter.line ( x1 , fy1 , x2 , fy1 );
drawLine3 ( m_gw , x1 , x2 , y1 , c1 , pp->m_thickness );
// add to map key? only if we haven't already
if ( tmpht.isInTable ( &colorHash ) ) continue;
@ -781,12 +832,15 @@ bool Statsdb::gifLoop ( ) {
//
// all done
if ( plotter.closepl () < 0 )
log("admin: Could not close performance graph object.");
//if ( plotter.closepl () < 0 )
// log("admin: Could not close performance graph object.");
// close the file
fclose ( m_fd );
//fclose ( m_fd );
#endif
//#endif
// close main graphing window
m_gw.safePrintf("</div>\n");
return true;
}
@ -795,15 +849,10 @@ bool Statsdb::gifLoop ( ) {
char *Statsdb::plotGraph ( char *pstart ,
char *pend ,
long graphHash ,
GIFPlotter *plotter ,
//GIFPlotter *plotter ,
SafeBuf &gw ,
long zoff ) {
#ifndef _USEPLOTTER_
return NULL;
#else
// . use "graphHash" to map to unit display
// . this is a disk read volume
Label *label = getLabel ( graphHash );
@ -853,20 +902,16 @@ char *Statsdb::plotGraph ( char *pstart ,
char *retp = p;
// set the line width
plotter->linewidth ( 1 );
//plotter->linewidth ( 1 );
long color = label->m_color;
// use the color specified from addStat_r() for this line/pt
plotter->pencolor ( ((color >> 16) & 0xff) << 8 ,
((color >> 8) & 0xff) << 8 ,
((color >> 0) & 0xff) << 8 );
//plotter->pencolor ( ((color >> 16) & 0xff) << 8 ,
// ((color >> 8) & 0xff) << 8 ,
// ((color >> 0) & 0xff) << 8 );
// how many points per pixel do we have now
//float res = (ymax - ymin) / (float)DY;
// . the minimum difference between ymax and ymin is minDiff.
// . this prevents us from zooming in too close!
float minDiff = (float)DY * label->m_minRes ;
@ -892,7 +937,7 @@ char *Statsdb::plotGraph ( char *pstart ,
// set the line width
plotter->linewidth ( 2 );
//plotter->linewidth ( 2 );
// reset for 2nd scan
p = pstart;
@ -936,8 +981,8 @@ char *Statsdb::plotGraph ( char *pstart ,
// . flip the y so we don't have to scroll the browser down
// . DY does not include the axis and tick marks
// . do not flip y any more for statsdb graphs
long fy1 = (long)(y1+.5) + m_by ;
long fy2 = (long)(y2+.5) + m_by ;
long fy1 = (long)(y1+.5);// + m_by ;
long fy2 = (long)(y2+.5);// + m_by ;
// how are we getting -.469 for "query" point?
if ( fy1 < 0 ) continue;
@ -945,7 +990,10 @@ char *Statsdb::plotGraph ( char *pstart ,
// skip if can't make a line
if ( firstPoint ) {
plotter->circle ( x2 , fy2 , 2 );
//plotter->circle ( x2 , fy2 , 2 );
long width = POINTWIDTH;
// draw a 4x4 box now:
drawLine3(m_gw,x2-width/2,x2+width/2,fy2,color,width);
firstPoint = false;
continue;
}
@ -959,32 +1007,38 @@ char *Statsdb::plotGraph ( char *pstart ,
// plot it
// BUT only iff not more than 5 seconds difference
float secondsPerPixel = (m_t2-m_t1)/(float)DX;
float dt = (x2 - x1) * secondsPerPixel;
//float secondsPerPixel = (m_t2-m_t1)/(float)DX;
// avoid this for now. mdw oct 14 2013.
//float dt = (x2 - x1) * secondsPerPixel;
//if ( dt <= 13 || x2 - x1 <= 10 )
// plotter->line ( x1 , fy1 , x2 , fy2 );
if ( dt <= 13 || x2 - x1 <= 10 )
plotter->line ( x1 , fy1 , x2 , fy2 );
// circle second point
plotter->circle ( x1 , fy1 , 2 );
plotter->circle ( x2 , fy2 , 2 );
//plotter->circle ( x1 , fy1 , 2 );
//plotter->circle ( x2 , fy2 , 2 );
// draw a 4x4 boxes now:
long width = POINTWIDTH;
drawLine3 ( m_gw,x1-width/2, x1+width/2, fy1,color, width);
drawLine3 ( m_gw,x2-width/2, x2+width/2, fy2,color, width);
}
plotter->linewidth ( 1 );
//plotter->linewidth ( 1 );
// plot unit lines
float deltaz = (ymax-ymin) / 6;
if ( strstr(label->m_keyDesc,"latency" ) ) {
// draw it
drawHR ( 400.0 - 111.0 , ymin , ymax , plotter , label , zoff,0xff0000);
drawHR ( 600.0 - 111.0 , ymin , ymax , plotter , label , zoff , color);
drawHR ( 400.0 - 111.0 , ymin,ymax,m_gw,label,zoff,0xff0000);
drawHR ( 600.0-111.0,ymin,ymax,m_gw,label,zoff,color);
}
if ( strstr(label->m_keyDesc,"queries per sec" ) ) {
// draw it
//deltaz /= 2;
//drawHR ( 120.0 , ymin , ymax , plotter , label , zoff , color );
//drawHR ( 130.0 , ymin , ymax , plotter , label , zoff , color );
drawHR ( 140.0 , ymin , ymax , plotter , label , zoff , color );
//drawHR(120.0, ymin , ymax , plotter , label , zoff , color );
//drawHR(130.0, ymin , ymax , plotter , label , zoff , color );
drawHR ( 140.0 , ymin , ymax ,m_gw , label , zoff , color );
}
@ -992,18 +1046,19 @@ char *Statsdb::plotGraph ( char *pstart ,
// breathe
QUICKPOLL ( m_niceness );
// draw it
drawHR ( z , ymin , ymax , plotter , label , zoff , color );
drawHR ( z , ymin , ymax , m_gw , label , zoff , color );
}
return retp;
#endif
//#endif
}
void Statsdb::drawHR ( float z ,
float ymin ,
float ymax ,
GIFPlotter *plotter ,
//GIFPlotter *plotter ,
SafeBuf &gw,
Label *label ,
float zoff ,
long color ) {
@ -1013,29 +1068,34 @@ void Statsdb::drawHR ( float z ,
// avoid collisions with other graphs
z2 += zoff;
// border
z2 += m_by;
//z2 += m_by;
// round off error
z2 += 0.5;
// for adjusatmnet
//float ptsPerPixel = (ymax-ymin)/ (float)DY;
float ptsPerPixel = (ymax-ymin)/ (float)DY;
// make an adjustment to the label then! -- Commented out because it's currently not used.
//float zadj = zoff * ptsPerPixel;
float zadj = zoff * ptsPerPixel;
#ifdef _USEPLOTTER_
//#ifdef _USEPLOTTER_
// use the color specified from addStat_r() for this line/pt
plotter->pencolor ( ((color >> 16) & 0xff) << 8 ,
((color >> 8) & 0xff) << 8 ,
((color >> 0) & 0xff) << 8 );
//plotter->pencolor ( ((color >> 16) & 0xff) << 8 ,
// ((color >> 8) & 0xff) << 8 ,
// ((color >> 0) & 0xff) << 8 );
// horizontal line
plotter->line ( m_bx, (long)z2 , DX + m_bx, (long)z2 );
//plotter->line ( m_bx, (long)z2 , DX + m_bx, (long)z2 );
long width = 1;
drawLine3 ( m_gw, 0, DX , (long)z2,color, width);
// make label
char tmp[128];
// . use "graphHash" to map to unit display
// . this is a disk read volume
sprintf(tmp,label->m_format,z +zadj);//* label->m_yscalar);
/*
// a white shadow
plotter->pencolor ( 0xffff,0xffff,0xffff );
plotter->move ( m_bx + 80 + 2 , z2 + 10 - 2 );
@ -1056,7 +1116,24 @@ void Statsdb::drawHR ( float z ,
plotter->move ( m_bx + 80 , z2 + 10 );
// plot label
plotter->alabel ( 'c' , 'c' , tmp );
#endif
*/
// LABEL
gw.safePrintf("<div style=\"position:absolute;"
"left:%li;"
"bottom:%li;"
"color:#%lx;"
"z-index:110;"
"font-size:14px;"
"min-height:20px;"
"min-width:3px;\">%s</div>\n"
, (long)(m_bx)
, (long)z2 +m_by
, color
// the label:
, tmp
);
}
void gotListWrapper ( void *state , RdbList *list, Msg5 *msg5 ) {
@ -1285,7 +1362,7 @@ bool Statsdb::addPoint ( long x ,
// convert x into pixel position
float xf = (float)DX * (float)(x - m_t1) / (float)(m_t2 - m_t1);
// round it to nearest pixel
long x2 = (long)(xf + .5) + m_bx;
long x2 = (long)(xf + .5) ;//+ m_bx;
// make this our y pos
float y2 = y;
// average values if tied
@ -1367,7 +1444,7 @@ bool Statsdb::addEventPoint ( long t1 ,
// convert t1 into pixel position
float af = (float)DX * (float)(t1 - m_t1) / (float)(m_t2 - m_t1);
// round it to nearest pixel
long a = (long)(af + .5) + m_bx;
long a = (long)(af + .5) ;//+ m_bx;
// convert t2 into pixel position
//float bf = (float)DX * (float)(t2 - m_t1) / (float)(m_t2 - m_t1);
@ -1435,3 +1512,43 @@ bool Statsdb::addEventPoint ( long t1 ,
log("stats: no room in graph for event");
return true;
}
//////////
//
// NEW CODE HERE
//
//////////
// draw a HORIZONTAL line in html
void Statsdb::drawLine3 ( SafeBuf &sb ,
long x1 ,
long x2 ,
long fy1 ,
long color ,
long width ) {
// do not draw repeats in the case we have a ton of points to plot
long key32 ;
key32 = hash32h ( x1 , 0 );
key32 = hash32h ( x2 , key32);
key32 = hash32h ( fy1 , key32);
key32 = hash32h ( color , key32);
key32 = hash32h ( width , key32);
if ( m_dupTable.isInTable(&key32) ) return;
m_dupTable.addKey(&key32);
sb.safePrintf("<div style=\"position:absolute;"
"left:%li;"
"bottom:%li;"
"background-color:#%lx;"
"z-index:-5;"
"min-height:%lipx;"
"min-width:%lipx;\"></div>\n"
, x1 + m_bx
, (fy1 - width/2) + m_by
, color
, width
, x2 - x1
);
}

@ -73,13 +73,22 @@ class Statsdb {
char *plotGraph ( char *pstart ,
char *pend ,
long graphHash ,
class GIFPlotter *plotter ,
//class GIFPlotter *plotter ,
SafeBuf &gw,
long zoff );
void drawLine3 ( SafeBuf &sb ,
long x1 ,
long x2 ,
long fy1 ,
long color ,
long width ) ;
void drawHR ( float z ,
float ymin ,
float ymax ,
class GIFPlotter *plotter ,
//class GIFPlotter *plotter ,
SafeBuf &gw,
class Label *label ,
float zoff ,
long color ) ;
@ -119,6 +128,10 @@ class Statsdb {
RdbList m_list;
Msg1 m_msg1;
// the graphing window. now a bunch of absolute divs in html
SafeBuf m_gw;
HashTableX m_dupTable;
SafeBuf m_sb0;
SafeBuf m_sb1;

@ -3244,7 +3244,8 @@ static Needle s_dirtyWords [] = {
{"stripper" ,0,1,0,0,NULL,0,NULL},
{"softcore" ,0,2,0,0,NULL,0,NULL},
{"whore" ,0,2,0,0,NULL,0,NULL},
{"slut" ,0,2,0,0,NULL,0,NULL},
// gary slutkin on ted.com. make this just 1 point.
{"slut" ,0,1,0,0,NULL,0,NULL},
{"smut" ,0,2,0,0,NULL,0,NULL},
{"tits" ,0,2,0,0,NULL,0,NULL},
{"lesbian" ,0,2,0,0,NULL,0,NULL},
@ -3271,7 +3272,9 @@ static Needle s_dirtyWords [] = {
{"bestial" ,0,2,0,0,NULL,0,NULL},
{"beastial" ,0,2,0,0,NULL,0,NULL},
{"kink" ,0,2,0,0,NULL,0,NULL},
{"sex" ,0,2,0,0,NULL,0,NULL},
// . "sex" is often substring in tagids.
// . too many false positives, make "1" not "2"
{"sex" ,0,1,0,0,NULL,0,NULL},
{"anal" ,0,2,0,0,NULL,0,NULL},
{"cum" ,0,2,0,0,NULL,0,NULL}, // often used for cumulative
{"clit" ,0,2,0,0,NULL,0,NULL},
@ -3280,7 +3283,7 @@ static Needle s_dirtyWords [] = {
{"wank" ,0,2,0,0,NULL,0,NULL},
{"fick" ,0,2,0,0,NULL,0,NULL},
{"eroti" ,0,2,0,0,NULL,0,NULL},
{"gay" ,0,2,0,0,NULL,0,NULL},
{"gay" ,0,1,0,0,NULL,0,NULL}, // make 1 pt. 'marvin gay'
// new stuff not in Url.cpp
{"thong" ,0,1,0,0,NULL,0,NULL},
{"masturbat" ,0,2,0,0,NULL,0,NULL},
@ -3299,36 +3302,51 @@ static Needle s_dirtyWords [] = {
{"shit" ,0,2,0,0,NULL,0,NULL},
{"naked" ,0,1,0,0,NULL,0,NULL},
{"nympho" ,0,2,0,0,NULL,0,NULL},
{"hardcore" ,0,2,0,0,NULL,0,NULL},
{"hardcore" ,0,1,0,0,NULL,0,NULL}, // hardcore gamer, count as 1
{"sodom" ,0,2,0,0,NULL,0,NULL},
{"titties" ,0,2,0,0,NULL,0,NULL}, // re-do
{"twat" ,0,2,0,0,NULL,0,NULL},
{"bastard" ,0,1,0,0,NULL,0,NULL},
{"erotik" ,0,2,0,0,NULL,0,NULL},
// EXCEPTIONS
// smut
{"transmut" ,0,-2,0,0,NULL,0,NULL},
{"bismuth" ,0,-2,0,0,NULL,0,NULL},
// sex
{"middlesex" ,0,-1,0,0,NULL,0,NULL},
{"sussex" ,0,-1,0,0,NULL,0,NULL},
{"essex" ,0,-1,0,0,NULL,0,NULL},
{"deusex" ,0,-1,0,0,NULL,0,NULL},
{"sexchange" ,0,-1,0,0,NULL,0,NULL},
{"sexpress" ,0,-1,0,0,NULL,0,NULL},
{"sexpert" ,0,-1,0,0,NULL,0,NULL},
// EXCEPTIONS
// sex
{"middlesex" ,0,-2,0,0,NULL,0,NULL},
{"sussex" ,0,-2,0,0,NULL,0,NULL},
{"essex" ,0,-2,0,0,NULL,0,NULL},
{"deusex" ,0,-2,0,0,NULL,0,NULL},
{"sexchange" ,0,-2,0,0,NULL,0,NULL},
{"sexpress" ,0,-2,0,0,NULL,0,NULL},
{"sexpert" ,0,-2,0,0,NULL,0,NULL},
{"sexcel" ,0,-2,0,0,NULL,0,NULL},
{"sexist" ,0,-2,0,0,NULL,0,NULL},
{"sexile" ,0,-2,0,0,NULL,0,NULL},
{"sexperi" ,0,-2,0,0,NULL,0,NULL},
{"sexual" ,0,-2,0,0,NULL,0,NULL},
{"sexpose" ,0,-2,0,0,NULL,0,NULL},
{"sexclu" ,0,-2,0,0,NULL,0,NULL},
{"sexo" ,0,-2,0,0,NULL,0,NULL},
{"sexism" ,0,-2,0,0,NULL,0,NULL},
{"sexpan" ,0,-2,0,0,NULL,0,NULL}, // buttonsexpanion
{"same-sex" ,0,-2,0,0,NULL,0,NULL},
{"opposite sex",0,-2,0,0,NULL,0,NULL},
{"middlesex" ,0,-1,0,0,NULL,0,NULL},
{"sussex" ,0,-1,0,0,NULL,0,NULL},
{"essex" ,0,-1,0,0,NULL,0,NULL},
{"deusex" ,0,-1,0,0,NULL,0,NULL},
{"sexchange" ,0,-1,0,0,NULL,0,NULL},
{"sexpress" ,0,-1,0,0,NULL,0,NULL},
{"sexpert" ,0,-1,0,0,NULL,0,NULL},
{"sexcel" ,0,-1,0,0,NULL,0,NULL},
{"sexist" ,0,-1,0,0,NULL,0,NULL},
{"sexile" ,0,-1,0,0,NULL,0,NULL},
{"sexperi" ,0,-1,0,0,NULL,0,NULL},
{"sexual" ,0,-1,0,0,NULL,0,NULL},
{"sexpose" ,0,-1,0,0,NULL,0,NULL},
{"sexclu" ,0,-1,0,0,NULL,0,NULL},
{"sexo" ,0,-1,0,0,NULL,0,NULL},
{"sexism" ,0,-1,0,0,NULL,0,NULL},
{"sexpan" ,0,-1,0,0,NULL,0,NULL}, // buttonsexpanion
{"same-sex" ,0,-1,0,0,NULL,0,NULL},
{"opposite sex",0,-1,0,0,NULL,0,NULL},
// anal
{"analog" ,0,-2,0,0,NULL,0,NULL},
@ -3431,6 +3449,265 @@ static Needle s_dirtyWords [] = {
{"shitak" ,0,-2,0,0,NULL,0,NULL}
};
////
//// New stuff from sex.com adult word list
////
////
//// make it a 2nd part because of performance limits on matches2.cpp algo
////
static Needle s_dirtyWordsPart2 [] = {
{"amateurfoto" ,0,2,0,0,NULL,0,NULL},
{"amateurhardcore" ,0,2,0,0,NULL,0,NULL},
{"amateurindex" ,0,2,0,0,NULL,0,NULL},
{"amateurnaked" ,0,2,0,0,NULL,0,NULL},
{"amatuerhardcore" ,0,2,0,0,NULL,0,NULL},
{"ampland" ,0,2,0,0,NULL,0,NULL},
//{"animehentai" ,0,2,0,0,NULL,0,NULL}, dup
{"anitablonde" ,0,2,0,0,NULL,0,NULL},
{"asiacarrera" ,0,2,0,0,NULL,0,NULL},
{"asshole" ,0,2,0,0,NULL,0,NULL},
{"asslick" ,0,2,0,0,NULL,0,NULL},
{"asspic" ,0,2,0,0,NULL,0,NULL},
{"assworship" ,0,2,0,0,NULL,0,NULL},
//{"badgirl" ,0,2,0,0,NULL,0,NULL}, not necessarily bad
{"bareceleb" ,0,2,0,0,NULL,0,NULL},
{"barenaked" ,0,2,0,0,NULL,0,NULL},
{"beaverboy" ,0,2,0,0,NULL,0,NULL},
{"beavershot" ,0,2,0,0,NULL,0,NULL}, // was beavershots
//{"bigball" ,0,2,0,0,NULL,0,NULL}, // not necessarily bad
{"bigbreast" ,0,2,0,0,NULL,0,NULL},
//{"bigbutt" ,0,2,0,0,NULL,0,NULL}, // not necessarily bad
{"bigcock" ,0,2,0,0,NULL,0,NULL},
{"bigdick" ,0,2,0,0,NULL,0,NULL},
{"biggestdick" ,0,2,0,0,NULL,0,NULL},
{"biggesttit" ,0,2,0,0,NULL,0,NULL},
{"bighairyball" ,0,2,0,0,NULL,0,NULL},
{"bighooter" ,0,2,0,0,NULL,0,NULL},
{"bignipple" ,0,2,0,0,NULL,0,NULL},
{"bigtit" ,0,2,0,0,NULL,0,NULL},
{"blackbooty" ,0,2,0,0,NULL,0,NULL},
{"blackbutt" ,0,2,0,0,NULL,0,NULL},
{"blackcock" ,0,2,0,0,NULL,0,NULL},
{"blackdick" ,0,2,0,0,NULL,0,NULL},
{"blackhardcore" ,0,2,0,0,NULL,0,NULL},
{"blackonblonde" ,0,2,0,0,NULL,0,NULL},
{"blacksonblonde" ,0,2,0,0,NULL,0,NULL},
{"blacktit" ,0,2,0,0,NULL,0,NULL},
{"blacktwat" ,0,2,0,0,NULL,0,NULL},
{"boner" ,0,1,0,0,NULL,0,NULL}, // softcore, someone's lastname?
{"bordello" ,0,2,0,0,NULL,0,NULL},
{"braless" ,0,2,0,0,NULL,0,NULL},
{"brothel" ,0,2,0,0,NULL,0,NULL},
{"bukake" ,0,2,0,0,NULL,0,NULL},
{"bukkake" ,0,2,0,0,NULL,0,NULL},
{"bustyblonde" ,0,2,0,0,NULL,0,NULL},
{"bustyceleb" ,0,2,0,0,NULL,0,NULL},
{"butthole" ,0,2,0,0,NULL,0,NULL},
{"buttman" ,0,2,0,0,NULL,0,NULL},
{"buttpic" ,0,2,0,0,NULL,0,NULL},
{"buttplug" ,0,2,0,0,NULL,0,NULL},
{"buttthumbnails" ,0,2,0,0,NULL,0,NULL},
{"callgirl" ,0,2,0,0,NULL,0,NULL},
{"celebritiesnaked" ,0,2,0,0,NULL,0,NULL},
{"celebritybush" ,0,2,0,0,NULL,0,NULL},
{"celebritybutt" ,0,2,0,0,NULL,0,NULL},
{"chaseylain" ,0,2,0,0,NULL,0,NULL},
{"chickswithdick" ,0,2,0,0,NULL,0,NULL},
{"christycanyon" ,0,2,0,0,NULL,0,NULL},
{"cicciolina" ,0,2,0,0,NULL,0,NULL},
//{"cunilingus" ,0,2,0,0,NULL,0,NULL},
{"cunniling" ,0,2,0,0,NULL,0,NULL}, // abbreviate
{"cyberlust" ,0,2,0,0,NULL,0,NULL},
{"danniashe" ,0,2,0,0,NULL,0,NULL},
{"dicksuck" ,0,2,0,0,NULL,0,NULL},
{"dirtymind" ,0,2,0,0,NULL,0,NULL},
{"dirtypicture" ,0,2,0,0,NULL,0,NULL},
{"doggiestyle" ,0,2,0,0,NULL,0,NULL},
{"doggystyle" ,0,2,0,0,NULL,0,NULL},
{"domatrix" ,0,2,0,0,NULL,0,NULL},
{"dominatrix" ,0,2,0,0,NULL,0,NULL},
//{"dyke" ,0,2,0,0,NULL,0,NULL}, // dick van dyke!
{"ejaculation" ,0,2,0,0,NULL,0,NULL},
{"erosvillage" ,0,2,0,0,NULL,0,NULL},
{"facesit" ,0,2,0,0,NULL,0,NULL},
{"fatass" ,0,2,0,0,NULL,0,NULL},
{"feetfetish" ,0,2,0,0,NULL,0,NULL},
{"felatio" ,0,2,0,0,NULL,0,NULL},
{"fellatio" ,0,2,0,0,NULL,0,NULL},
{"femdom" ,0,2,0,0,NULL,0,NULL},
{"fetishwear" ,0,2,0,0,NULL,0,NULL},
{"fettegirl" ,0,2,0,0,NULL,0,NULL},
{"fingerbang" ,0,2,0,0,NULL,0,NULL},
{"fingering" ,0,1,0,0,NULL,0,NULL}, // fingering the keyboard? use 1
{"flesh4free" ,0,2,0,0,NULL,0,NULL},
{"footfetish" ,0,2,0,0,NULL,0,NULL},
{"footjob" ,0,2,0,0,NULL,0,NULL},
{"footlicking" ,0,2,0,0,NULL,0,NULL},
{"footworship" ,0,2,0,0,NULL,0,NULL},
{"fornication" ,0,2,0,0,NULL,0,NULL},
{"freeass" ,0,2,0,0,NULL,0,NULL},
{"freebigtit" ,0,2,0,0,NULL,0,NULL},
{"freedick" ,0,2,0,0,NULL,0,NULL},
{"freehardcore" ,0,2,0,0,NULL,0,NULL},
//{"freehentai" ,0,2,0,0,NULL,0,NULL}, dup
{"freehooter" ,0,2,0,0,NULL,0,NULL},
{"freelargehooter" ,0,2,0,0,NULL,0,NULL},
{"freenakedpic" ,0,2,0,0,NULL,0,NULL},
{"freenakedwomen" ,0,2,0,0,NULL,0,NULL},
{"freetit" ,0,2,0,0,NULL,0,NULL},
{"freevoyeur" ,0,2,0,0,NULL,0,NULL},
{"gratishardcoregalerie" ,0,2,0,0,NULL,0,NULL},
{"hardcorecelebs" ,0,2,0,0,NULL,0,NULL},
{"hardcorefree" ,0,2,0,0,NULL,0,NULL},
{"hardcorehooter" ,0,2,0,0,NULL,0,NULL},
{"hardcorejunkie" ,0,2,0,0,NULL,0,NULL},
{"hardcorejunky" ,0,2,0,0,NULL,0,NULL},
{"hardcoremovie" ,0,2,0,0,NULL,0,NULL},
{"hardcorepic" ,0,2,0,0,NULL,0,NULL},
{"hardcorepix" ,0,2,0,0,NULL,0,NULL},
{"hardcoresample" ,0,2,0,0,NULL,0,NULL},
{"hardcorestories" ,0,2,0,0,NULL,0,NULL},
{"hardcorethumb" ,0,2,0,0,NULL,0,NULL},
{"hardcorevideo" ,0,2,0,0,NULL,0,NULL},
{"harddick" ,0,2,0,0,NULL,0,NULL},
{"hardnipple" ,0,2,0,0,NULL,0,NULL},
{"hardon" ,0,2,0,0,NULL,0,NULL},
{"hentai" ,0,2,0,0,NULL,0,NULL},
{"interacialhardcore" ,0,2,0,0,NULL,0,NULL},
{"intercourseposition" ,0,2,0,0,NULL,0,NULL},
{"interracialhardcore" ,0,2,0,0,NULL,0,NULL},
{"ittybittytitty" ,0,2,0,0,NULL,0,NULL},
{"jackoff" ,0,2,0,0,NULL,0,NULL},
{"jennajameson" ,0,2,0,0,NULL,0,NULL},
{"jennicam" ,0,2,0,0,NULL,0,NULL},
{"jerkoff" ,0,2,0,0,NULL,0,NULL},
{"jism" ,0,2,0,0,NULL,0,NULL},
{"jiz" ,0,2,0,0,NULL,0,NULL},
{"justhardcore" ,0,2,0,0,NULL,0,NULL},
{"karasamateurs" ,0,2,0,0,NULL,0,NULL},
{"kascha" ,0,2,0,0,NULL,0,NULL},
{"kaylakleevage" ,0,2,0,0,NULL,0,NULL},
{"kobetai" ,0,2,0,0,NULL,0,NULL},
{"lapdance" ,0,2,0,0,NULL,0,NULL},
{"largedick" ,0,2,0,0,NULL,0,NULL},
{"largehooter" ,0,2,0,0,NULL,0,NULL},
{"largestbreast" ,0,2,0,0,NULL,0,NULL},
{"largetit" ,0,2,0,0,NULL,0,NULL},
{"lesben" ,0,2,0,0,NULL,0,NULL},
{"lesbo" ,0,2,0,0,NULL,0,NULL},
{"lickadick" ,0,2,0,0,NULL,0,NULL},
{"lindalovelace" ,0,2,0,0,NULL,0,NULL},
{"longdick" ,0,2,0,0,NULL,0,NULL},
{"lovedoll" ,0,2,0,0,NULL,0,NULL},
{"makinglove" ,0,2,0,0,NULL,0,NULL},
{"mangax" ,0,2,0,0,NULL,0,NULL},
{"manpic" ,0,2,0,0,NULL,0,NULL},
{"marilynchambers" ,0,2,0,0,NULL,0,NULL},
{"massivecock" ,0,2,0,0,NULL,0,NULL},
{"masterbating" ,0,2,0,0,NULL,0,NULL},
{"mensdick" ,0,2,0,0,NULL,0,NULL},
{"milf" ,0,2,0,0,NULL,0,NULL},
{"minka" ,0,2,0,0,NULL,0,NULL},
{"monstercock" ,0,2,0,0,NULL,0,NULL},
{"monsterdick" ,0,2,0,0,NULL,0,NULL},
{"muffdiving" ,0,2,0,0,NULL,0,NULL},
{"nacktfoto" ,0,2,0,0,NULL,0,NULL},
{"nakedblackwomen" ,0,2,0,0,NULL,0,NULL},
{"nakedceleb" ,0,2,0,0,NULL,0,NULL},
{"nakedcelebrity" ,0,2,0,0,NULL,0,NULL},
{"nakedcheerleader" ,0,2,0,0,NULL,0,NULL},
{"nakedchick" ,0,2,0,0,NULL,0,NULL},
{"nakedgirl" ,0,2,0,0,NULL,0,NULL},
{"nakedguy" ,0,2,0,0,NULL,0,NULL},
{"nakedladies" ,0,2,0,0,NULL,0,NULL},
{"nakedlady" ,0,2,0,0,NULL,0,NULL},
{"nakedman" ,0,2,0,0,NULL,0,NULL},
{"nakedmen" ,0,2,0,0,NULL,0,NULL},
{"nakedness" ,0,2,0,0,NULL,0,NULL},
{"nakedphoto" ,0,2,0,0,NULL,0,NULL},
{"nakedpic" ,0,2,0,0,NULL,0,NULL},
{"nakedstar" ,0,2,0,0,NULL,0,NULL},
{"nakedwife" ,0,2,0,0,NULL,0,NULL},
{"nakedwoman" ,0,2,0,0,NULL,0,NULL},
{"nakedwomen" ,0,2,0,0,NULL,0,NULL},
{"nastychat" ,0,2,0,0,NULL,0,NULL},
{"nastythumb" ,0,2,0,0,NULL,0,NULL},
{"naughtylink" ,0,2,0,0,NULL,0,NULL},
{"naughtylinx" ,0,2,0,0,NULL,0,NULL},
{"naughtylynx" ,0,2,0,0,NULL,0,NULL},
{"naughtynurse" ,0,2,0,0,NULL,0,NULL},
{"niceass" ,0,2,0,0,NULL,0,NULL},
{"nikkinova" ,0,2,0,0,NULL,0,NULL},
{"nikkityler" ,0,2,0,0,NULL,0,NULL},
{"nylonfetish" ,0,2,0,0,NULL,0,NULL},
{"nympho" ,0,2,0,0,NULL,0,NULL},
{"openleg" ,0,2,0,0,NULL,0,NULL},
{"oral4free" ,0,2,0,0,NULL,0,NULL},
{"pantyhosefetish" ,0,2,0,0,NULL,0,NULL},
{"peepcam" ,0,2,0,0,NULL,0,NULL},
{"persiankitty" ,0,2,0,0,NULL,0,NULL},
{"perverted" ,0,2,0,0,NULL,0,NULL},
{"pimpserver" ,0,2,0,0,NULL,0,NULL},
{"pissing" ,0,2,0,0,NULL,0,NULL},
{"poontang" ,0,2,0,0,NULL,0,NULL},
{"privatex" ,0,2,0,0,NULL,0,NULL},
{"prono" ,0,2,0,0,NULL,0,NULL},
{"publicnudity" ,0,2,0,0,NULL,0,NULL},
{"puffynipple" ,0,2,0,0,NULL,0,NULL},
{"racqueldarrian" ,0,2,0,0,NULL,0,NULL},
//{"rape" ,0,2,0,0,NULL,0,NULL}, // dup!
{"rawlink" ,0,2,0,0,NULL,0,NULL},
{"realhardcore" ,0,2,0,0,NULL,0,NULL},
{"rubberfetish" ,0,2,0,0,NULL,0,NULL},
{"seka" ,0,2,0,0,NULL,0,NULL},
{"sheboy" ,0,2,0,0,NULL,0,NULL},
{"showcam" ,0,2,0,0,NULL,0,NULL},
{"showercam" ,0,2,0,0,NULL,0,NULL},
{"smallbreast" ,0,2,0,0,NULL,0,NULL},
{"smalldick" ,0,2,0,0,NULL,0,NULL},
{"spycamadult" ,0,2,0,0,NULL,0,NULL},
{"strapon" ,0,2,0,0,NULL,0,NULL},
{"stripclub" ,0,2,0,0,NULL,0,NULL},
{"stripshow" ,0,2,0,0,NULL,0,NULL},
{"striptease" ,0,2,0,0,NULL,0,NULL},
{"strokeit" ,0,2,0,0,NULL,0,NULL},
{"strokeme" ,0,2,0,0,NULL,0,NULL},
{"suckdick" ,0,2,0,0,NULL,0,NULL},
{"sylviasaint" ,0,2,0,0,NULL,0,NULL},
{"teenhardcore" ,0,2,0,0,NULL,0,NULL},
{"teenie" ,0,2,0,0,NULL,0,NULL},
{"teenpic" ,0,2,0,0,NULL,0,NULL},
{"teensuck" ,0,2,0,0,NULL,0,NULL},
{"tgp" ,0,2,0,0,NULL,0,NULL},
{"threesome" ,0,2,0,0,NULL,0,NULL},
{"thumblord" ,0,2,0,0,NULL,0,NULL},
{"thumbzilla" ,0,2,0,0,NULL,0,NULL},
{"tiffanytowers" ,0,2,0,0,NULL,0,NULL},
{"tinytitties" ,0,2,0,0,NULL,0,NULL},
//{"tities" ,0,2,0,0,NULL,0,NULL}, // entities
{"titman" ,0,2,0,0,NULL,0,NULL},
{"titsandass" ,0,2,0,0,NULL,0,NULL},
{"titties" ,0,2,0,0,NULL,0,NULL},
{"titts" ,0,2,0,0,NULL,0,NULL},
{"titty" ,0,2,0,0,NULL,0,NULL},
{"tokyotopless" ,0,2,0,0,NULL,0,NULL},
{"tommysbookmark" ,0,2,0,0,NULL,0,NULL},
{"toplesswomen" ,0,2,0,0,NULL,0,NULL},
{"trannies" ,0,2,0,0,NULL,0,NULL},
{"twinks" ,0,2,0,0,NULL,0,NULL},
{"ultradonkey" ,0,2,0,0,NULL,0,NULL},
{"ultrahardcore" ,0,2,0,0,NULL,0,NULL},
{"uncutcock" ,0,2,0,0,NULL,0,NULL},
{"vividtv" ,0,2,0,0,NULL,0,NULL},
{"wendywhoppers" ,0,2,0,0,NULL,0,NULL},
{"wetdick" ,0,2,0,0,NULL,0,NULL},
{"wetpanties" ,0,2,0,0,NULL,0,NULL},
{"wifesharing" ,0,2,0,0,NULL,0,NULL},
{"wifeswapping" ,0,2,0,0,NULL,0,NULL},
{"xrated" ,0,2,0,0,NULL,0,NULL}
};
// . store this in clusterdb rec so family filter works!
// . check content for adult words
char *XmlDoc::getIsAdult ( ) {
@ -3545,6 +3822,51 @@ long getDirtyPoints ( char *s , long slen , long niceness , char *url ) {
,url
);
}
////
//
// repeat for part2
//
// we have to do two separate parts otherwise the algo in
// matches2.cpp gets really slow. it was not meant to match
// so many needles in one haystack.
//
///
long numDirty2 = sizeof(s_dirtyWordsPart2) / sizeof(Needle);
//numDirty2 = 0;
getMatches2 ( s_dirtyWordsPart2 ,
numDirty2 ,
s ,
slen ,
NULL , // linkPos
NULL , // needleNum
false , // stopAtFirstMatch?
NULL , // hadPreMatch ptr
true , // saveQuickTables?
niceness );
// each needle has an associated score
for ( long i = 0 ; i < numDirty2 ; i++ ) {
// skip if no match
if ( s_dirtyWordsPart2[i].m_count <= 0 ) continue;
// . the "id", is positive for dirty words, - for clean
// . uses +2/-2 for really dirty words
// . uses +1/-1 for borderline dirty words
points += s_dirtyWordsPart2[i].m_id;
// log debug
if ( ! g_conf.m_logDebugDirty ) continue;
// show it in the log
log("dirty: %s %li %s"
,s_dirtyWordsPart2[i].m_string
,(long)s_dirtyWordsPart2[i].m_id
,url
);
}
return points;
}

@ -594,7 +594,7 @@
<logDebugBuildMessages>0</>
<logDebugBuildTimeMessages>0</>
<logDebugDatabaseMessages>0</>
<logDebugDirtyMessages>0</>
<logDebugDirtyMessages>1</>
<logDebugDiskMessages>0</>
<logDebugDnsMessages>0</>
<logDebugHttpMessages>0</>

@ -6,7 +6,8 @@
#include "HashTableT.h"
//make the key, it is just the needles ptr
static HashTableT<unsigned long long , char*> s_quickTables;
//static HashTableT<unsigned long long , char*> s_quickTables;
static HashTableX s_quickTables;
/*
// returns false and sets g_errno on error
@ -63,6 +64,9 @@ bool fast_highlight ( // highlight these query terms:
// to lower and store into tmp[]. TODO.
// . a space (includes \r \n) in a needle will match a consecutive sequence
// of spaces in the haystack
#define BITVEC unsigned long long
char *getMatches2 ( Needle *needles ,
long numNeedles ,
char *haystack ,
@ -108,51 +112,69 @@ char *getMatches2 ( Needle *needles ,
// . TODO: use a static cache of like 4 of these tables where the key
// is the Needles ptr ... done
long numNeedlesToInit = numNeedles;
char space[256 * 5 * sizeof(unsigned long)];
char space[256 * 6 * sizeof(BITVEC)];
char *buf = NULL;
unsigned long *s0;
unsigned long *s1;
unsigned long *s2;
unsigned long *s3;
unsigned long *s4;
BITVEC *s0;
BITVEC *s1;
BITVEC *s2;
BITVEC *s3;
BITVEC *s4;
BITVEC *s5;
/*
static bool s_quickTableInit = false;
static char s_qtbuf[128*(12+1)*2];
long slot = -1;
if(saveQuickTables) {
uint64_t key = (uint32_t)needles;
long slot = s_quickTables.getSlot(key);
if(slot == -1) {
buf = (char*)mcalloc(sizeof(unsigned long)*256*5,
"matches");
if(buf) s_quickTables.addKey(key, buf);
//sanity check, no reason why there needs to be a
//limit, I just don't expect there to be this many
//static needles at this point.
if(s_quickTables.getNumSlotsUsed() > 32){
char *xx=NULL; *xx = 0;
}
if ( ! s_quickTableInit ) {
s_quickTableInit = true;
s_quickTables.set(8,4,128,s_qtbuf,256*13,false,0,"qx");
}
else {
uint64_t key = (uint32_t)needles;
slot = s_quickTables.getSlot(&key);
if ( slot >= 0 ) {
buf = s_quickTables.getValueFromSlot(slot);
numNeedlesToInit = 0;
}
}
*/
if(!buf) {
buf = space;
memset ( buf , 0 , sizeof(unsigned long)*256*5);
memset ( buf , 0 , sizeof(BITVEC)*256*6);
}
long offset = 0;
s0 = (unsigned long*)(buf + offset);
offset += sizeof(unsigned long)*256;
s1 = (unsigned long*)(buf + offset);
offset += sizeof(unsigned long)*256;
s2 = (unsigned long*)(buf + offset);
offset += sizeof(unsigned long)*256;
s3 = (unsigned long*)(buf + offset);
offset += sizeof(unsigned long)*256;
s4 = (unsigned long*)(buf + offset);
/*
if( useQuickTables && slot == -1 ) {
//buf = (char*)mcalloc(sizeof(unsigned long)*256*5,
// "matches");
if(buf) s_quickTables.addKey(&key, &buf);
//sanity check, no reason why there needs to be a
//limit, I just don't expect there to be this many
//static needles at this point.
if(s_quickTables.getNumSlotsUsed() > 32){
char *xx=NULL; *xx = 0;
}
}
*/
unsigned long mask;
// try 64 bit bit vectors now since we doubled # of needles
long offset = 0;
s0 = (BITVEC *)(buf + offset);
offset += sizeof(BITVEC)*256;
s1 = (BITVEC *)(buf + offset);
offset += sizeof(BITVEC)*256;
s2 = (BITVEC *)(buf + offset);
offset += sizeof(BITVEC)*256;
s3 = (BITVEC *)(buf + offset);
offset += sizeof(BITVEC)*256;
s4 = (BITVEC *)(buf + offset);
offset += sizeof(BITVEC)*256;
s5 = (BITVEC *)(buf + offset);
BITVEC mask;
// set the letter tables, s0[] through sN[], for each needle
for ( long i = 0 ; i < numNeedlesToInit ; i++ ) {
@ -160,7 +182,8 @@ char *getMatches2 ( Needle *needles ,
QUICKPOLL(niceness);
unsigned char *w = (unsigned char *)needles[i].m_string;
unsigned char *wend = w + needles[i].m_stringSize;
mask = (1<<(i&0x1f)); // (1<<(i%32));
// BITVEC is now 64 bits
mask = (1<<(i&0x3f)); // (1<<(i%64));
// if the needle is small, fill up the remaining letter tables
// with its mask... so it matches any character in haystack.
s0[(unsigned char)to_lower_a(*w)] |= mask;
@ -172,6 +195,7 @@ char *getMatches2 ( Needle *needles ,
s2[j] |= mask;
s3[j] |= mask;
s4[j] |= mask;
s5[j] |= mask;
}
continue;
}
@ -184,6 +208,7 @@ char *getMatches2 ( Needle *needles ,
s2[j] |= mask;
s3[j] |= mask;
s4[j] |= mask;
s5[j] |= mask;
}
continue;
}
@ -195,6 +220,7 @@ char *getMatches2 ( Needle *needles ,
for ( long j = 0 ; j < 256 ; j++ ) {
s3[j] |= mask;
s4[j] |= mask;
s5[j] |= mask;
}
continue;
}
@ -206,12 +232,24 @@ char *getMatches2 ( Needle *needles ,
if ( w >= wend ) {
for ( long j = 0 ; j < 256 ; j++ ) {
s4[j] |= mask;
s5[j] |= mask;
}
continue;
}
s4[(unsigned char)to_lower_a(*w)] |= mask;
s4[(unsigned char)to_upper_a(*w)] |= mask;
w += 1;//step;
if ( w >= wend ) {
for ( long j = 0 ; j < 256 ; j++ ) {
s5[j] |= mask;
}
continue;
}
s5[(unsigned char)to_lower_a(*w)] |= mask;
s5[(unsigned char)to_upper_a(*w)] |= mask;
w += 1;//step;
}
// return a ptr to the first match if we should, this is it
@ -245,6 +283,8 @@ char *getMatches2 ( Needle *needles ,
if ( ! mask ) continue;
mask &= s4[*(p+4)];
if ( ! mask ) continue;
mask &= s5[*(p+5)];
if ( ! mask ) continue;
//debugCount++;
/*
// display
@ -273,7 +313,7 @@ char *getMatches2 ( Needle *needles ,
// we got a good candidate, loop through all the needles
for ( long j = 0 ; j < numNeedles ; j++ ) {
// skip if does not match mask, will save time
if ( ! ((1<<(j&0x1f)) & mask) ) continue;
if ( ! ((1<<(j&0x3f)) & mask) ) continue;
if( needles[j].m_stringSize > 3) {
// ensure first 4 bytes matches this needle's
if (needles[j].m_string[0]!=to_lower_a(*(p+0)))
@ -421,7 +461,7 @@ char *getMatches2 ( Needle *needles ,
// we got a good candidate, loop through all the needles
for ( long j = 0 ; j < numNeedles ; j++ ) {
// skip if does not match mask, will save time
if ( ! ((1<<(j&0x1f)) & mask) ) continue;
if ( ! ((1<<(j&0x3f)) & mask) ) continue;
if( needles[j].m_stringSize > 3) {
// ensure first 4 bytes matches this needle's
if (needles[j].m_string[0]!=to_lower_a(*(p+0)))