Remove old misc/urlinfo.cpp. Use tools/print_urlinfo.cpp instead

2025-07-13 02:36:06 -04:00 · 2016-11-23 11:55:41 +01:00
parent 230cfa9346
commit e67a1ab66f
1 changed files with 0 additions and 184 deletions
--- a/misc/urlinfo.cpp
+++ b/misc/urlinfo.cpp
@ -1,184 +0,0 @@
-// Matt Wells, copyright Jan 2002
-
-// normalizes urls from stdin
-
-#include "gb-include.h"
-
-#include "Url.h"
-#include "Mem.h"
-#include "Titledb.h"
-#include "HttpMime.h"
-#include "SiteGetter.h"
-
-bool g_recoveryMode = false;
-int32_t g_recoveryLevel = 0;
-
-int main ( int argc , char *argv[] ) {
-	bool addWWW = true;
-	bool stripSession = true;
-	// check for arguments
-	for (int32_t i = 1; i < argc; i++) {
-		if (strcmp(argv[i], "-w") == 0)
-			addWWW = false;
-		else if (strcmp(argv[i], "-s") == 0)
-			stripSession = false;
-	}
-	// initialize
-	//g_mem.init(100*1024);
-	hashinit();
-	//g_conf.m_tfndbExtBits = 23;
- loop:
-	// read a url from stddin
-	char sbuf[1024];
-	if ( ! fgets ( sbuf , 1024 , stdin ) ) exit(1);
-	char *s = sbuf;
-	char fbuf[1024];
-	// decode if we should
-	if ( strncmp(s,"http%3A%2F%2F",13) == 0 ||
-	     strncmp(s,"https%3A%2F%2F",13) == 0 ) {
-		urlDecode(fbuf,s,gbstrlen(s));
-		s = fbuf;
-	}
-	// old url
-	printf("###############\n");
-	printf("old: %s",s);
-	int32_t slen = gbstrlen(s);
-	// remove any www. if !addWWW
-	if (!addWWW) {
-		if (slen >= 4 &&
-		    strncasecmp(s, "www.", 4) == 0) {
-			slen -= 4;
-			memmove(s, &s[4], slen);
-		}
-		else {
-			// get past a ://
-			int32_t si = 0;
-			while (si < slen &&
-			       ( s[si] != ':' ||
-				 s[si+1] != '/' ||
-				 s[si+2] != '/' ) )
-				si++;
-			// remove the www.
-			if (si + 7 < slen) {
-				si += 3;
-				if (strncasecmp(&s[si], "www.", 4) == 0) {
-					slen -= 4;
-					memmove(&s[si], &s[si+4], slen-si);
-				}
-			}
-		}
-	}
-	// set it
-	Url u;
-	u.set ( s , slen ,
-		addWWW   ,      /*add www?*/
-		stripSession ); /*strip session ids?*/
-	// print it
-	char out[1024*4];
-	char *p = out;
-	p += sprintf(p,"tld: ");
-	gbmemcpy ( p, u.getTLD(),u.getTLDLen());
-	p += u.getTLDLen();
-	char c = *p;
-	*p = '\0';
-	printf("%s\n",out);
-	*p = c;
-	
-
-	// dom
-	p = out;
-	sprintf ( p , "dom: ");
-	p += gbstrlen ( p );
-	gbmemcpy ( p , u.getDomain() , u.getDomainLen() );
-	p += u.getDomainLen();
-	c = *p;
-	*p = '\0';
-	printf("%s\n",out);
-	*p = c;
-	// host
-	p = out;
-	sprintf ( p , "host: ");
-	p += gbstrlen ( p );
-	gbmemcpy ( p , u.getHost() , u.getHostLen() );
-	p += u.getHostLen();
-	c = *p;
-	*p = '\0';
-	printf("%s\n",out);
-	*p = c;
-	// then the whole url
-	printf("url: %s\n", u.getUrl() );
-
-	/*
-	int32_t  siteLen;
-	char *site = u.getSite ( &siteLen , NULL , false );
-	if ( site ) {
-		c = site[siteLen];
-		site[siteLen] = '\0';
-	}
-	printf("site: %s\n", site );
-	if ( site ) site[siteLen] = c;
-	*/
-	SiteGetter sg;
-	sg.getSite ( u.getUrl() ,
-		     NULL , // tagrec
-		     0 , // timestamp
-		     0, // coll
-		     0 , // niceness
-		     NULL , // state
-		     NULL ); // callback
-
-	if ( sg.m_siteLen )
-		printf("site: %s\n",sg.m_site);
-
-	printf("isRoot: %"INT32"\n",(int32_t)u.isRoot());
-
-	/*
-	bool perm = ::isPermalink ( NULL        , // coll
-				    NULL        , // Links ptr
-				    &u          , // the url
-				    CT_HTML     , // contentType
-				    NULL        , // LinkInfo ptr
-				    false       );// isRSS?
-	printf ("isPermalink: %"INT32"\n",(int32_t)perm);
-	*/
-
-	// print the path too
-	p = out;
-
-	p += sprintf ( p , "path: " );
-	gbmemcpy ( p , u.getPath(), u.getPathLen() );
-	p += u.getPathLen();
-
-	if ( u.getFilename() ) {
-		p += sprintf ( p , "\nfilename: " );
-		gbmemcpy ( p , u.getFilename(), u.getFilenameLen() );
-		p += u.getFilenameLen();
-		*p = '\0';
-		printf("%s\n", out );
-	}
-
-	// encoded
-	char dst[MAX_URL_LEN+200];
-	urlEncode ( dst,MAX_URL_LEN+100,
-				u.getUrl(), u.getUrlLen(), 
-				false ); // are we encoding a request path?
-	printf("encoded: %s\n",dst);
-
-	// the probable docid
-	int64_t pd = Titledb::getProbableDocId(&u);
-	printf("pdocid: %"UINT64"\n", pd );
-	printf("dom8: 0x%"XINT32"\n", (int32_t)Titledb::getDomHash8FromDocId(pd) );
-	if ( u.isLinkLoop() ) printf("islinkloop: yes\n");
-	else                  printf("islinkloop: no\n");
-	int64_t hh64 = u.getHostHash64();
-	printf("hosthash64: 0x%016"XINT64"\n",hh64);
-	uint32_t hh32 = u.getHostHash32();
-	printf("hosthash32: 0x%08"XINT32" (%"UINT32")\n",hh32,hh32);
-	int64_t dh64 = u.getDomainHash64();
-	printf("domhash64: 0x%016"XINT64"\n",dh64);
-	int64_t uh64 = u.getUrlHash64();
-	printf("urlhash64: 0x%016"XINT64"\n",uh64);
-	//if(isUrlUnregulated(NULL ,0,&u)) printf("unregulated: yes\n");
-	//else                            printf("unregulated: no\n");
-	goto loop;
-}