mirror of
				https://github.com/privacore/open-source-search-engine.git
				synced 2025-10-30 16:36:11 -04:00 
			
		
		
		
	
		
			
				
	
	
		
			104 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			104 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| #include "gb-include.h"
 | |
| 
 | |
| #include <sys/time.h>
 | |
| #include <sys/types.h>
 | |
| #include <sys/stat.h>
 | |
| #include <fcntl.h>
 | |
| #include <errno.h>
 | |
| #include "iana_charset.h"
 | |
| #include <iconv.h>
 | |
| 
 | |
| #include "Mem.h"
 | |
| 
 | |
| bool g_recoveryMode = false;
 | |
| int32_t g_recoveryLevel = 0;
 | |
| 
 | |
| int main (int argc, char **argv) {
 | |
| 	// initialize Gigablast variables
 | |
| 	g_conf.m_maxMem = 1000000000LL;
 | |
| 	g_mem.m_memtablesize = 8194*1024;
 | |
| 	g_log.init("/dev/stdout");
 | |
| 	g_conf.m_logDebugBuild = true;
 | |
| 
 | |
| 	// init our table for doing zobrist hashing
 | |
| 	if (!hashinit()) {
 | |
| 		log("db: Failed to init hashtable." );
 | |
| 		exit(1);
 | |
| 	}
 | |
| 
 | |
| 	if (!ucInit()) {
 | |
| 		log("Unicode initialization failed!");
 | |
| 		exit(1);
 | |
| 	}
 | |
| 
 | |
| 	for (int i=2; i <= 2259 ; i++ ){
 | |
| 		char *charset = get_charset_str(i);
 | |
| 		if (!charset) {
 | |
| 			continue;
 | |
| 		}
 | |
| 		
 | |
| 		const char *csAlias = charset;
 | |
| 		if (!strncmp(charset, "x-windows-949", 13)) {
 | |
| 			csAlias = "CP949";
 | |
| 		}
 | |
| 
 | |
| 		if (!strncmp(charset, "Windows-31J", 13)) {
 | |
| 			csAlias = "CP932";
 | |
| 		}
 | |
| 		
 | |
| 		// Treat all latin1 as windows-1252 extended charset
 | |
| 		if (!strcmp(charset, "ISO-8859-1") ) {
 | |
| 			csAlias = "WINDOWS-1252";
 | |
| 		}
 | |
| 		
 | |
| 		iconv_t cd1 = gbiconv_open("UTF-16LE", csAlias);
 | |
| 
 | |
| 		if (cd1 == (iconv_t)-1) {	
 | |
| 			//printf("boo: %8s %5d %50s\n", "",i, csAlias);
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		iconv_t cd2 = gbiconv_open(csAlias, "UTF-16LE");
 | |
| 		if (cd2 == (iconv_t)-1) {	
 | |
| 			//printf("boo: %8s %5d %50s\n", "",i, csAlias);
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| // 		char *buf1 = "testing";
 | |
| // 		size_t incount = 7;
 | |
| // 		char buf2[256];
 | |
| // 		size_t outcount = 256;
 | |
| // 		char *p1 = buf1;
 | |
| // 		char *p2 = buf2;
 | |
| 
 | |
| // 		int res = iconv(cd1, &p1, &incount,&p2, &outcount);
 | |
| // 		if (res < 0 && errno) {
 | |
| // 			printf("oops1: %d (%s)\n", errno, strerror(errno));
 | |
| //			continue;
 | |
| // 		}
 | |
| 
 | |
| // 		char buf3[256];
 | |
| // 		incount = outcount;
 | |
| // 		outcount = 256;
 | |
| // 		p1 = buf2;
 | |
| // 		p2 = buf3;
 | |
| // 		res = iconv(cd2, &p1, &incount,&p2, &outcount);
 | |
| // 		if (res < 0 && errno) {
 | |
| // 			printf("oops2: %d (%s)\n", errno, strerror(errno));
 | |
| //			continue;
 | |
| 
 | |
| // 		}
 | |
| 
 | |
| 		//printf("%08x %08x %5d %50s\n",cd1, cd2, i, csAlias);
 | |
| 		printf("%5d %50s\n", i, csAlias);
 | |
| 	}
 | |
| 
 | |
| 	if (gbiconv_open("UTF-8", "WINDOWS-1252") < 0)  {
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	if (gbiconv_open("WINDOWS-1252", "UTF-8") < 0) {
 | |
| 		return false;
 | |
| 	}
 | |
| }
 |