Cleanup related to languages/Compiler warnings

2025-07-12 02:26:07 -04:00 · 2015-11-23 14:45:18 +01:00
parent 0b2e2bbe3d
commit 6184e376a4
20 changed files with 41 additions and 8252 deletions
--- a/CountryCode.cpp
+++ b/CountryCode.cpp
@ -6,6 +6,7 @@
 #include "HashTable.h"
 #include "Categories.h"
 #include "LanguageIdentifier.h"
+#include "Threads.h"

 // record for unified language/country hash table
 typedef union catcountryrec_t {
--- a/Iso8859.cpp
+++ b/Iso8859.cpp
@ -1,438 +0,0 @@
-#include "Iso8859.h"
-// default for charsets that are highly "non-latin"
-// i.e. only allow ASCII to pass...
-const unsigned char map_8859_default[256]	=
-{
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
-0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
-0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
-0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
-0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
-0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
-0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-};
-// Latin-1
-// Adapted from Wikipedia:
-// Albanian, Basque, Catalan, Danish, Dutch (missing some letters),
-// English, Estonian (missing some letters), Faroese,
-// French (missing some letters), Finnish (missing some letters),
-// Galician, German, Icelandic, Irish (new orthography), Italian,
-// Latin, Norwegian, Portuguese, Rhaeto-Romanic, Scottish, Spanish,
-// Swedish, Afrikaans, Swahili
-const unsigned char map_8859_1[256]	=
-{
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
-0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
-0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
-0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
-0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
-0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
-0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I',
- 'D', 'N', 'O', 'O', 'O', 'O', 'O', ' ', ' ', 'U', 'U', 'U', 'U', 'Y', ' ', ' ',
- 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
- 'o', 'n', 'o', 'o', 'o', 'o', 'o', ' ', ' ', 'u', 'u', 'u', 'u', 'y', ' ', 'y'
-};
-// Latin-2
-// Adapted from Wikipedia:
-// Bosnian, Croatian, Czech, Hungarian, Polish, Romainian, Serbian,
-// Serbocroatian, Slovak, Slovenian, Upper Sorbian and Lower Sorbian
-const unsigned char map_8859_2[256]	=
-{
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
-0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
-0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
-0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
-0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
-0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
-0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
- ' ', 'A', ' ', 'L', ' ', 'L', 'S', ' ', ' ', 'S', 'S', 'T', 'Z', ' ', 'Z', 'Z',
- ' ', 'a', ' ', 'l', ' ', 'l', 's', ' ', ' ', 's', 's', 't', 'z', ' ', 'z', 'z',
- 'R', 'A', 'A', 'A', 'A', 'L', 'C', 'C', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'D',
- 'D', 'N', 'N', 'O', 'O', 'O', 'O', ' ', 'R', 'U', 'U', 'U', 'U', 'Y', 'T', ' ',
- 'r', 'a', 'a', 'a', 'a', 'l', 'c', 'c', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'd',
- 'd', 'n', 'n', 'o', 'o', 'o', 'o', ' ', 'r', 'u', 'u', 'u', 'u', 'y', 't', ' '
-};
-// Latin-3 (South European)
-// Adapted from Wikipedia:
-// Turkish (superceded by 8859-9), Maltese, Esperanto
-const unsigned char map_8859_3[256]	=
-{
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
-0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
-0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
-0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
-0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
-0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
-0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
- ' ', 'H', ' ', ' ', ' ', ' ', 'H', ' ', ' ', 'L', 'S', 'G', 'J', ' ', ' ', 'Z',
- ' ', 'h', ' ', ' ', ' ', ' ', 'h', ' ', ' ', 'l', 's', 'g', 'j', ' ', ' ', 'z',
- 'A', 'A', 'A', ' ', 'A', 'C', 'C', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I',
- ' ', 'N', 'O', 'O', 'O', 'G', 'O', ' ', 'G', 'U', 'U', 'U', 'U', 'U', 'S', ' ',
- 'a', 'a', 'a', ' ', 'a', 'c', 'c', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
- ' ', 'n', 'o', 'o', 'o', 'g', 'o', ' ', 'g', 'u', 'u', 'u', 'u', 'u', 's', ' '
-};
-// Latin-4 (North European)
-// Adapted from Wikipedia:
-// Estonian, Latvian, Lithuanian, Greenlandic, and Sami
-const unsigned char map_8859_4[256]	=
-{
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
-0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
-0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
-0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
-0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
-0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
-0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
- ' ', 'A', 'k', 'R', ' ', 'I', 'L', ' ', ' ', 'S', 'E', 'G', 'T', ' ', 'Z', ' ',
- ' ', 'a', ' ', 'r', ' ', 'i', 'l', ' ', ' ', 's', 'e', 'g', 't', 'N', 'z', 'n',
- 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'I', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I',
- 'D', 'N', 'O', 'K', 'O', 'O', 'O', ' ', ' ', 'U', 'U', 'U', 'U', 'U', 'U', ' ',
- 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'i', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i',
- 'd', 'n', 'o', 'k', 'o', 'o', 'o', ' ', ' ', 'u', 'u', 'u', 'u', 'u', 'u', ' '
-};
-// Latin-5 (Turkish)
-// Adapted from Wikipedia:
-// Turkish
-const unsigned char map_8859_9[256]	=
-{
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
-0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
-0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
-0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
-0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
-0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
-0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I',
- 'G', 'N', 'O', 'O', 'O', 'O', 'O', ' ', ' ', 'U', 'U', 'U', 'U', 'I', 'S', ' ',
- 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
- 'g', 'n', 'o', 'o', 'o', 'o', 'o', ' ', ' ', 'u', 'u', 'u', 'u', 'i', 's', 'y'
-};
-// Latin-6 (Nordic)
-// Adapted from Wikipedia
-const unsigned char map_8859_10[256]	=
-{
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
-0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
-0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
-0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
-0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
-0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
-0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
- ' ', 'A', 'E', 'G', 'I', 'I', 'K', ' ', 'L', 'D', 'S', 'T', 'Z', ' ', 'U', 'N',
- ' ', 'a', 'e', 'g', 'i', 'i', 'k', ' ', 'l', 'd', 's', 't', 'z', ' ', 'u', 'n',
- 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'I', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I',
- 'D', 'N', 'O', 'O', 'O', 'O', 'O', 'U', ' ', 'U', 'U', 'U', 'U', 'Y', ' ', ' ',
- 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'i', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i',
- 'o', 'n', 'o', 'o', 'o', 'o', 'o', 'u', ' ', 'u', 'u', 'u', 'u', 'y', ' ', 'k'
-};
-// Latin-7 (Baltic Rim)
-// Adapted from Wikipedia:
-const unsigned char map_8859_13[256]	=
-{
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
-0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
-0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
-0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
-0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
-0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
-0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'R', ' ', ' ', ' ', ' ', 'A',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- 'A', 'I', 'A', 'C', 'A', 'A', 'E', 'E', 'C', 'E', 'Z', 'E', 'G', 'K', 'I', 'L',
- 'S', 'N', 'N', 'O', 'O', 'O', 'O', ' ', 'U', 'L', 'S', 'U', 'U', 'Z', 'Z', ' ',
- 'a', 'i', 'a', 'c', 'a', 'a', 'e', 'e', 'c', 'e', 'z', 'e', 'g', 'k', 'i', 'l',
- 's', 'n', 'n', 'o', 'o', 'o', 'o', ' ', 'u', 'l', 's', 'u', 'u', 'z', 'z', ' '
-};
-// Latin-8 (Celtic)
-// Adapted from Wikipedia:
-// Gaelic, Welsh, Breton
-const unsigned char map_8859_14[256]	=
-{
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
-0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
-0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
-0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
-0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
-0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
-0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
- ' ', 'B', 'b', ' ', 'C', 'c', 'D', ' ', 'W', ' ', 'W', 'd', 'Y', ' ', ' ', 'Y',
- 'F', 'f', 'G', 'g', 'M', 'm', ' ', 'P', 'w', 'p', 'w', 'S', 'y', 'W', 'w', 's',
- 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I',
- 'W', 'N', 'O', 'O', 'O', 'O', 'O', 'T', ' ', 'U', 'U', 'U', 'U', 'Y', 'y', ' ',
- 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
- 'w', 'n', 'o', 'o', 'o', 'o', 'o', 't', ' ', 'u', 'u', 'u', 'u', 'y', 'y', 'y'
-};
-// Latin-9
-// Adapted from Wikipedia:
-// Update of 8859-1
-// English, French, German, Spanish and Portuguese
-const unsigned char map_8859_15[256]	=
-{
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
-0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
-0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
-0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
-0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
-0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
-0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
- ' ', ' ', ' ', ' ', ' ', ' ', 'S', ' ', 's', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', 'Z', ' ', ' ', ' ', 'z', ' ', ' ', ' ', 'O', 'o', 'Y', ' ',
- 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I',
- 'D', 'N', 'O', 'O', 'O', 'O', 'O', ' ', ' ', 'U', 'U', 'U', 'U', 'Y', ' ', ' ',
- 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
- 'o', 'n', 'o', 'o', 'o', 'o', 'o', ' ', ' ', 'u', 'u', 'u', 'u', 'y', ' ', 'y'
-};
-// Latin-10 "South-Eastern European"
-// Adapted from Wikipedia:
-// Albanian, Croatian, Hungarian, Polish, Romanian and Slovenian, French,
-// Italian and Irish Gaelic (new orthography).
-const unsigned char map_8859_16[256]	=
-{
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
-0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
-0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
-0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
-0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
-0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
-0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
- ' ', 'A', 'a', 'L', ' ', ' ', 'S', ' ', 's', ' ', 'S', ' ', 'Z', ' ', 'z', 'Z',
- ' ', ' ', 'C', 'l', 'Z', ' ', ' ', ' ', 'z', 'c', 's', ' ', 'O', 'o', 'Y', 'z',
- 'A', 'A', 'A', 'A', 'A', 'C', 'A', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I',
- 'D', 'N', 'O', 'O', 'O', 'O', 'O', 'S', 'U', 'U', 'U', 'U', 'U', 'E', 'T', ' ',
- 'a', 'a', 'a', 'a', 'a', 'c', 'a', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
- 'd', 'n', 'o', 'o', 'o', 'o', 'o', 's', 'u', 'u', 'u', 'u', 'u', 'e', 't', 'y'
-};
-// array of all 8859 charsets
-const unsigned char* map_8859[ISO_8859_NUM_CHARSETS + 1]	=
-{
-	map_8859_default,
-	map_8859_1,
-	map_8859_2,
-	map_8859_3,
-	map_8859_4,
-	map_8859_default,
-	map_8859_default,
-	map_8859_default,
-	map_8859_default,
-	map_8859_9,
-	map_8859_10,
-	map_8859_default,
-	map_8859_default,
-	map_8859_13,
-	map_8859_14,
-	map_8859_15,
-	map_8859_16
-};
-const unsigned char map_win_1251[256]	=
-{
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
-0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
-0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
-0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
-0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
-0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
-0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'K', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'k', ' ', ' ',
-// ^^^ control chars ^^^
- ' ', 'Y', 'y', 'J', ' ', ' ', ' ', ' ', 'E', ' ', ' ', ' ', ' ', '-', ' ', 'I',
- ' ', 'I', 'i', ' ', ' ', ' ', ' ', ' ', 'e', ' ', ' ', ' ', 'j', 'S', 's', 'i',
- 'A', 'b', 'B', ' ', ' ', 'E', ' ', ' ', 'N', 'N', 'K', ' ', 'M', 'H', 'O', ' ',
- 'P', 'C', 'T', 'Y', ' ', 'X', ' ', ' ', 'W', 'W', 'b', ' ', 'b', ' ', ' ', 'R',
- 'a', 'o', 'b', ' ', ' ', 'e', ' ', ' ', 'n', 'n', 'k', ' ', 'm', 'h', 'o', ' ',
- 'p', 'c', 't', 'y', ' ', 'x', ' ', ' ', 'w', 'w', 'b', ' ', 'b', ' ', ' ', 'r'
-};
-const unsigned char map_win_1252[256]	=
-{
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
-0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
-0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
-0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
-0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
-0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
-0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'S', ' ', 'O', ' ', 'Z', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 's', ' ', 'o', ' ', 'z', ' ',
-// ^^^ control chars ^^^
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '-', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I',
- 'D', 'N', 'O', 'O', 'O', 'O', 'O', ' ', ' ', 'U', 'U', 'U', 'U', 'Y', ' ', ' ',
- 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
- 'o', 'n', 'o', 'o', 'o', 'o', 'o', ' ', ' ', 'u', 'u', 'u', 'u', 'y', ' ', 'y'
-};
-const unsigned char map_win_1253[256]	=
-{
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
-0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
-0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
-0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
-0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
-0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
-0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
- ' ', ' ', 'A', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'E', 'H', 'I', ' ', 'O', ' ', 'Y', ' ',
- ' ', 'A', 'B', ' ', ' ', 'E', 'Z', 'H', ' ', 'I', 'K', ' ', 'M', 'N', ' ', 'O',
- ' ', 'P', ' ', ' ', 'T', 'Y', ' ', 'X', ' ', ' ', 'I', 'Y', 'a', 'e', 'n', 'i',
- 'v', 'a', 'b', ' ', ' ', 'e', ' ', 'n', ' ', 'l', 'k', ' ', ' ', 'v', ' ', 'o',
- ' ', 'p', ' ', 'o', 't', 'v', ' ', 'X', ' ', 'w', 'i', 'v', 'o', 'v', 'w', ' '
-};
-const unsigned char map_win_1254[256]	=
-{
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
-0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
-0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
-0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
-0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
-0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
-0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'S', ' ', 'O', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 's', ' ', 'o', ' ', ' ', 'Y',
-// ^^^ control chars ^^^
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I',
- 'G', 'N', 'O', 'O', 'O', 'O', 'O', 'X', '0', 'U', 'U', 'U', 'U', 'I', 'S', ' ',
- 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
- 'g', 'n', 'o', 'o', 'o', 'o', 'o', ' ', 'o', 'u', 'u', 'u', 'u', 'i', 's', ' '
-};
-const unsigned char map_win_1255[256]	=
-{
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
-0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
-0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
-0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
-0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
-0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
-0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
-// vvv control chars vvv
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
-// ^^^ control chars ^^^
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
- ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '
-};
-// array of all windows charsets
-const unsigned char* map_win_125x[WIN_125X_NUM_CHARSETS + 1]	=
-{
-	map_win_1252,	// dummy, will never be used - no NULL here...
-	map_win_1251,
-	map_win_1252,
-	map_win_1253,
-	map_win_1254,
-	map_win_1255
-};
--- a/Iso8859.h
+++ b/Iso8859.h
@ -1,54 +0,0 @@
-// default map for charsets that are highly "non-latin"
-extern const unsigned char map_8859_default[256];
-// Latin-1
-// Adapted from Wikipedia:
-// Albanian, Basque, Catalan, Danish, Dutch (missing some letters),
-// English, Estonian (missing some letters), Faroese,
-// French (missing some letters), Finnish (missing some letters),
-// Galician, German, Icelandic, Irish (new orthography), Italian,
-// Latin, Norwegian, Portuguese, Rhaeto-Romanic, Scottish, Spanish,
-// Swedish, Afrikaans, Swahili
-extern const unsigned char map_8859_1[256];
-// Latin-2
-// Adapted from Wikipedia:
-// Bosnian, Croatian, Czech, Hungarian, Polish, Romainian, Serbian,
-// Serbocroatian, Slovak, Slovenian, Upper Sorbian and Lower Sorbian
-extern const unsigned char map_8859_2[256];
-// Latin-3 (South European)
-// Adapted from Wikipedia:
-// Turkish (superceded by 8859-9), Maltese, Esperanto
-extern const unsigned char map_8859_3[256];
-// Latin-4 (North European)
-// Adapted from Wikipedia:
-// Estonian, Latvian, Lithuanian, Greenlandic, and Sami
-extern const unsigned char map_8859_4[256];
-// Latin-5 (Turkish)
-// Adapted from Wikipedia:
-// Turkish
-extern const unsigned char map_8859_9[256];
-// Latin-6 (Nordic)
-// Adapted from Wikipedia
-extern const unsigned char map_8859_10[256];
-// Latin-7 (Baltic Rim)
-// Adapted from Wikipedia:
-extern const unsigned char map_8859_13[256];
-// Latin-8 (Celtic)
-// Adapted from Wikipedia:
-// Gaelic, Welsh, Breton
-extern const unsigned char map_8859_14[256];
-// Latin-9
-// Adapted from Wikipedia:
-// Update of 8859-1
-// English, French, German, Spanish and Portuguese
-extern const unsigned char map_8859_15[256];
-// Latin-10 "South-Eastern European"
-// Adapted from Wikipedia:
-// Albanian, Croatian, Hungarian, Polish, Romanian and Slovenian, French,
-// Italian and Irish Gaelic (new orthography).
-extern const unsigned char map_8859_16[256];
-// array of all 8859 charsets
-#define ISO_8859_NUM_CHARSETS 16
-extern const unsigned char* map_8859[ISO_8859_NUM_CHARSETS + 1];
-// MS-WIN codepage 1252
-#define WIN_125X_NUM_CHARSETS 5 
-extern const unsigned char* map_win_125x[WIN_125X_NUM_CHARSETS + 1];
--- a/Lang.cpp
+++ b/Lang.cpp
@ -8,107 +8,6 @@ void languageToString ( unsigned char langId , char *buf ) {
 	strcpy(buf,p);
 }

-static char *s_nativeLangStrings[] = {
-	"unknown",
-	"english",
-	"français",
-	"español",
-	"русcкий",
-	"t<EFBFBD>rk<EFBFBD>e", // not sure...
-	"japanese", // don't know yet
-	"chinese traditional", // don't know yet
-	"chinese simplified", // don't know yet
-	"korean", // don't know yet
-	"deutsch",
-	"nederlands",
-	"italiano",
-	"suomi",
-	"svenska",
-	"norsk",
-	"português",
-	"vietnamese", // don't know yet
-	"arabic", // don't know yet
-	"hebrew", // don't know yet
-	"indonesian", // don't know yet
-	"greek", // don't know yet
-	"thai", // don't know yet
-	"hindi", // don't know yet
-	"bengala", // don't know yet
-	"polski",
-	"tagalog", // don't know yet
-
-	"latin",
-	"esperanto",
-	"catalan",
-	"bulgarian",
-	"translingual",
-	"serbo-croatin",
-	"hungarian",
-	"danish",
-	"lithuanian",
-	"czech",
-	"galician",
-	"georgian",
-	"scottish gaelic",
-	"gothic",
-	"romanian",
-	"irish",
-	"latvian",
-	"armenian",
-	"icelandic",
-	"ancient greek",
-	"manx",
-	"ido",
-	"persian",
-	"telugu",
-	"venetian",
-	"malagasy",
-	"kurdish",
-	"luxembourgish",
-	"estonian",
-
-	NULL
-};
-static char *s_lowerLangStrings[] = {
-	"unknown","english","french","spanish","russian","turkish","japanese",
-	"chinese traditional","chinese simplified","korean","german","dutch",
-	"italian","finnish","swedish","norwegian","portuguese","vietnamese",
-	"arabic","hebrew","indonesian","greek","thai","hindi","bengala",
-	"polish","tagalog",
-
-	"latin",
-	"esperanto",
-	"catalan",
-	"bulgarian",
-	"translingual",
-	"serbo-croatian",
-	"hungarian",
-	"danish",
-	"lithuanian",
-	"czech",
-	"galician",
-	"georgian",
-	"scottish gaelic",
-	"gothic",
-	"romanian",
-	"irish",
-	"latvian",
-	"armenian",
-	"icelandic",
-	"ancient greek",
-	"manx",
-	"ido",
-	"persian",
-	"telugu",
-	"venetian",
-	"malagasy",
-	"kurdish",
-	"luxembourgish",
-	"estonian",
-
-	NULL
-};
-
 static char *s_langStrings[] = {
 	"Unknown","English","French","Spanish","Russian","Turkish","Japanese",
 	"Chinese Traditional","Chinese Simplified","Korean","German","Dutch",
@ -151,12 +50,7 @@ static char *s_langStrings[] = {
 char* getLanguageString ( unsigned char langId ) {
 	if ( langId >= sizeof(s_langStrings)/sizeof(char *) ) return NULL;
 	return s_langStrings[langId];
-};
-
-char* getNativeLanguageString ( unsigned char langId ) {
-	if ( langId >= sizeof(s_nativeLangStrings)/sizeof(char *) ) return NULL;
-	return s_nativeLangStrings[langId];
-};
+}

 static char *s_langAbbr[] = {
 	"xx","en","fr","es","ru","tr","ja","zh_tw","zh_cn","ko","de","nl",
@ -195,31 +89,6 @@ static char *s_langAbbr[] = {
 	NULL
 };

-// fix bug:
-//#ifndef PRIVATESTUFF
-#define csISOLatin6 cslatin6
-//#endif
-
-static unsigned char s_langCharset[] = {
-	csUnknown,csISOLatin1,csISOLatin1,csISOLatin1,//"xx","en","fr","es",
-	csUnknown,csUnknown,csUnknown,csUnknown,//"ru","zz","ja","zh_tw",
-	csUnknown,csUnknown,csISOLatin1,csISOLatin1,//"zh_cn","ko","de","nl",
-	csISOLatin1,csISOLatin6,csISOLatin6,csISOLatin6,//"it","fi","sv","no",
-	csISOLatin1,csUnknown,csUnknown,csUnknown,//"pt","vi","ar","he",
-	csUnknown,csUnknown,csUnknown,csUnknown,//"id","el","th","hi",
-	csUnknown,csUnknown,csUnknown,//"bn","pl","tl","en_uk",
-	csUnknown//"en_au"
-};
-
-uint8_t getLanguageFromName(uint8_t *name) {
-	int x;
-	for(x = 0; x < MAX_LANGUAGES && s_lowerLangStrings[x]; x++)
-		if(!strcasecmp((char*)name, s_lowerLangStrings[x])) return(x);
-	for(x = 0; x < MAX_LANGUAGES && s_nativeLangStrings[x]; x++)
-		if(!strcasecmp((char*)name, s_nativeLangStrings[x])) return(x);
-	return(0);
-}
-
 uint8_t getLangIdFromAbbr ( const char *abbr ) {
 	int x;
 	for(x = 0; x < MAX_LANGUAGES && s_langAbbr[x]; x++)
@ -231,6 +100,16 @@ uint8_t getLangIdFromAbbr ( const char *abbr ) {
 	return langUnknown;//0;
 }

+uint8_t getLangIdFromAbbrN ( const char *abbr ) {
+	for (int x = 0; x < MAX_LANGUAGES && s_langAbbr[x]; ++x) {
+		if (!strncasecmp((char*)abbr, s_langAbbr[x], strlen(s_langAbbr[x]))) {
+			return x;
+		}
+	}
+
+	return langUnknown;
+}
+
 char *getLangAbbr ( uint8_t langId ) {
 	return s_langAbbr[langId]; 
 }
@ -238,225 +117,6 @@ char *getLangAbbr ( uint8_t langId ) {
 char* getLanguageAbbr ( unsigned char langId ) {
 	if ( langId >= sizeof(s_langAbbr)/sizeof(char *) ) return NULL;
 	return s_langAbbr[langId];
-};
-
-unsigned char  getLanguageCharset ( unsigned char langId ){
-	if ( langId >= sizeof(s_langAbbr)/sizeof(char *) ) return csUnknown;
-	return s_langCharset[langId];
-}
-
-/*
-unsigned char getLanguageFromScript(UChar32 c) {
-	switch(ucGetScript(c)) {
-	case ucScriptArabic:
-		return langArabic;
-		break;
-	case ucScriptGreek:
-		return langGreek;
-		break;
-	case ucScriptHangul:
-	case ucScriptHanunoo:
-		return langKorean;
-		break;
-		//case ucScriptHan:
-		//return langChineseTrad;
-
-	case	ucScriptHiragana:
-	case	ucScriptKannada:
-	case	ucScriptKatakana:
-	case	ucScriptKatakana_Or_Hiragana:
-		return langJapanese;
-		break;
-	case ucScriptHebrew:
-		return langHebrew;
-		break;
-	case ucScriptThai:
-		return langThai;
-		break;
-	case ucScriptBengali:
-		return langBengala;
-		break;
-	case ucScriptDevanagari:
-		return langHindi;
-		break;
-
-	default:
-		return langUnknown;
-		break;
-	}
-};
-*/
-
-unsigned char getLanguageFromAbbr(char *abbr) {
-	// if(!strcmp(abbr, "en-GB")) return langBritish;
-	// if(!strcmp(abbr, "en_AU")) return langAustralia;
-	// if(!strcmp(abbr, "en-AU")) return langAustralia;
-	if(!strcmp(abbr, "en_US")) return langEnglish;
-	if(!strcmp(abbr, "en-US")) return langEnglish;
-	if(!strcmp(abbr, "en")) return langEnglish;
-	if(!strcmp(abbr, "fr")) return langFrench;
-	if(!strcmp(abbr, "es_MX")) return langSpanish;
-	if(!strcmp(abbr, "es-MX")) return langSpanish;
-	if(!strcmp(abbr, "es")) return langSpanish;
-	if(!strcmp(abbr, "ru")) return langRussian;
-	if(!strcmp(abbr, "ua")) return langRussian; // ukrainian?
-	if(!strcmp(abbr, "ja")) return langJapanese;
-	if(!strcmp(abbr, "zh_tw")) return langChineseTrad;
-	if(!strcmp(abbr, "zh_cn")) return langChineseSimp;
-	if(!strcmp(abbr, "ko")) return langKorean;
-	if(!strcmp(abbr, "de")) return langGerman;
-	if(!strcmp(abbr, "nl")) return langDutch;
-	if(!strcmp(abbr, "it")) return langItalian;
-	if(!strcmp(abbr, "fi")) return langFinnish;
-	if(!strcmp(abbr, "sv")) return langSwedish;
-	if(!strcmp(abbr, "no")) return langNorwegian;
-	if(!strcmp(abbr, "pt")) return langPortuguese;
-	if(!strcmp(abbr, "vi")) return langVietnamese;
-	if(!strcmp(abbr, "ar")) return langArabic;
-	if(!strcmp(abbr, "he")) return langHebrew;
-	if(!strcmp(abbr, "id")) return langIndonesian;
-	if(!strcmp(abbr, "el")) return langGreek;
-	if(!strcmp(abbr, "th")) return langThai;
-	if(!strcmp(abbr, "hi")) return langHindi;
-	if(!strcmp(abbr, "bn")) return langBengala;
-	if(!strcmp(abbr, "pl")) return langPolish;
-	if(!strcmp(abbr, "tl")) return langTagalog;
-	if(!strcmp(abbr, "tr")) return langTurkish;
-	return langUnknown;
-}
-
-unsigned char getLanguageFromAbbrN(char *abbr) {
-	// if(!strcmp(abbr, "en-GB")) return langBritish;
-	// if(!strcmp(abbr, "en_AU")) return langAustralia;
-	// if(!strcmp(abbr, "en-AU")) return langAustralia;
-	if(!strncasecmp(abbr, "en_US", 5)) return langEnglish;
-	if(!strncasecmp(abbr, "en-US", 5)) return langEnglish;
-	if(!strncasecmp(abbr, "en", 2)) return langEnglish;
-	if(!strncasecmp(abbr, "fr", 2)) return langFrench;
-	if(!strncasecmp(abbr, "es_MX", 5)) return langSpanish;
-	if(!strncasecmp(abbr, "es-MX", 5)) return langSpanish;
-	if(!strncasecmp(abbr, "es", 2)) return langSpanish;
-	if(!strncasecmp(abbr, "ru", 2)) return langRussian;
-	if(!strncasecmp(abbr, "ua", 2)) return langRussian; // ukrainian?
-	if(!strncasecmp(abbr, "ja", 2)) return langJapanese;
-	if(!strncasecmp(abbr, "zh_tw", 5)) return langChineseTrad;
-	if(!strncasecmp(abbr, "zh_cn", 5)) return langChineseSimp;
-	if(!strncasecmp(abbr, "ko", 2)) return langKorean;
-	if(!strncasecmp(abbr, "de", 2)) return langGerman;
-	if(!strncasecmp(abbr, "nl", 2)) return langDutch;
-	if(!strncasecmp(abbr, "it", 2)) return langItalian;
-	if(!strncasecmp(abbr, "fi", 2)) return langFinnish;
-	if(!strncasecmp(abbr, "sv", 2)) return langSwedish;
-	if(!strncasecmp(abbr, "no", 2)) return langNorwegian;
-	if(!strncasecmp(abbr, "pt", 2)) return langPortuguese;
-	if(!strncasecmp(abbr, "vi", 2)) return langVietnamese;
-	if(!strncasecmp(abbr, "ar", 2)) return langArabic;
-	if(!strncasecmp(abbr, "he", 2)) return langHebrew;
-	if(!strncasecmp(abbr, "id", 2)) return langIndonesian;
-	if(!strncasecmp(abbr, "el", 2)) return langGreek;
-	if(!strncasecmp(abbr, "th", 2)) return langThai;
-	if(!strncasecmp(abbr, "hi", 2)) return langHindi;
-	if(!strncasecmp(abbr, "bn", 2)) return langBengala;
-	if(!strncasecmp(abbr, "pl", 2)) return langPolish;
-	if(!strncasecmp(abbr, "tl", 2)) return langTagalog;
-	if(!strncasecmp(abbr, "tr", 2)) return langTurkish;
-	return langUnknown;
-}
-
-unsigned char getLanguageFromUnicodeAbbr(char *abbr) {
-	// if     (!memcmp(abbr, "e\0n\0_\0g\0b\0",10)) return langBritish;
-	// else if(!memcmp(abbr, "e\0n\0-\0g\0b\0",10)) return langBritish;
-	// else if(!memcmp(abbr, "e\0n\0_\0a\0u\0",10)) return langAustralia;
-	// else if(!memcmp(abbr, "e\0n\0-\0a\0u\0",10)) return langAustralia;
-	if(!memcmp(abbr, "en_us",5)) return langEnglish;
-	if(!memcmp(abbr, "en-us",5)) return langEnglish;
-	if(!memcmp(abbr, "es_mx",5)) return langSpanish;
-	if(!memcmp(abbr, "es-mx",5)) return langSpanish;
-	if(!memcmp(abbr, "zh_tw",5)) return langChineseTrad;
-	if(!memcmp(abbr, "zh_cn",5)) return langChineseSimp;
-	if(!memcmp(abbr, "en",2)) return langEnglish;
-	if(!memcmp(abbr, "fr",2)) return langFrench;
-	if(!memcmp(abbr, "es",2)) return langSpanish;
-	if(!memcmp(abbr, "ru",2)) return langRussian;
-	if(!memcmp(abbr, "ja",2)) return langJapanese;
-	if(!memcmp(abbr, "ko",2)) return langKorean;
-	if(!memcmp(abbr, "de",2)) return langGerman;
-	if(!memcmp(abbr, "nl",2)) return langDutch;
-	if(!memcmp(abbr, "it",2)) return langItalian;
-	if(!memcmp(abbr, "fi",2)) return langFinnish;
-	if(!memcmp(abbr, "sv",2)) return langSwedish;
-	if(!memcmp(abbr, "no",2)) return langNorwegian;
-	if(!memcmp(abbr, "pt",2)) return langPortuguese;
-	if(!memcmp(abbr, "vi",2)) return langVietnamese;
-	if(!memcmp(abbr, "ar",2)) return langArabic;
-	if(!memcmp(abbr, "he",2)) return langHebrew;
-	if(!memcmp(abbr, "id",2)) return langIndonesian;
-	if(!memcmp(abbr, "el",2)) return langGreek;
-	if(!memcmp(abbr, "th",2)) return langThai;
-	if(!memcmp(abbr, "hi",2)) return langHindi;
-	if(!memcmp(abbr, "bn",2)) return langBengala;
-	if(!memcmp(abbr, "pl",2)) return langPolish;
-	if(!memcmp(abbr, "tl",2)) return langTagalog;
-	if(!memcmp(abbr, "tr",2)) return langTurkish;
-	return langUnknown;
-}
-
-
-unsigned char getLanguageFromCountryCode(char *code) {
-	// Check the ones we know are different first,
-	// then revert to abbr
-	if(!strcmp(code, "us")) return(langEnglish);
-	if(!strcmp(code, "uk")) return(langEnglish);
-	// if(!strcmp(code, "gb")) return(langBritish);
-	// if(!strcmp(code, "vg")) return(langBritish);
-	if(!strcmp(code, "vi")) return(langEnglish);
-	// if(!strcmp(code, "au")) return(langAustralia);
-	if(!strcmp(code, "ae")) return(langArabic);
-	if(!strcmp(code, "cn")) return(langChineseSimp);
-	if(!strcmp(code, "tw")) return(langChineseTrad);
-	if(!strcmp(code, "vn")) return(langVietnamese);
-	return(getLanguageFromAbbr(code));
-}
-
-// This is only here to avoid mangling the string
-// as we look for tags, if at all possible use the
-// getLanguageFromAbbr instead.
-unsigned char getLanguageFromUserAgent(char *abbr) {
-	// if(!strncmp(abbr, "en_GB", 5)) return langBritish;
-	// if(!strncmp(abbr, "en-GB", 5)) return langBritish;
-	// if(!strncmp(abbr, "en_AU", 5)) return langAustralia;
-	// if(!strncmp(abbr, "en-AU", 5)) return langAustralia;
-	if(!strncmp(abbr, "en_US", 5)) return langEnglish;
-	if(!strncmp(abbr, "en-US", 5)) return langEnglish;
-	if(!strncmp(abbr, "en", 2)) return langEnglish;
-	if(!strncmp(abbr, "fr", 2)) return langFrench;
-	if(!strncmp(abbr, "es_MX", 5)) return langSpanish;
-	if(!strncmp(abbr, "es-MX", 5)) return langSpanish;
-	if(!strncmp(abbr, "es", 2)) return langSpanish;
-	if(!strncmp(abbr, "ru", 2)) return langRussian;
-	if(!strncmp(abbr, "ja", 2)) return langJapanese;
-	if(!strncmp(abbr, "zh_tw", 5)) return langChineseTrad;
-	if(!strncmp(abbr, "zh_cn", 5)) return langChineseSimp;
-	if(!strncmp(abbr, "ko", 2)) return langKorean;
-	if(!strncmp(abbr, "de", 2)) return langGerman;
-	if(!strncmp(abbr, "nl", 2)) return langDutch;
-	if(!strncmp(abbr, "it", 2)) return langItalian;
-	if(!strncmp(abbr, "fi", 2)) return langFinnish;
-	if(!strncmp(abbr, "sv", 2)) return langSwedish;
-	if(!strncmp(abbr, "no", 2)) return langNorwegian;
-	if(!strncmp(abbr, "pt", 2)) return langPortuguese;
-	if(!strncmp(abbr, "vi", 2)) return langVietnamese;
-	if(!strncmp(abbr, "ar", 2)) return langArabic;
-	if(!strncmp(abbr, "he", 2)) return langHebrew;
-	if(!strncmp(abbr, "id", 2)) return langIndonesian;
-	if(!strncmp(abbr, "el", 2)) return langGreek;
-	if(!strncmp(abbr, "th", 2)) return langThai;
-	if(!strncmp(abbr, "hi", 2)) return langHindi;
-	if(!strncmp(abbr, "bn", 2)) return langBengala;
-	if(!strncmp(abbr, "pl", 2)) return langPolish;
-	if(!strncmp(abbr, "tl", 2)) return langTagalog;
-	if(!strncmp(abbr, "tr", 2)) return langTurkish;
-	return langUnknown;
 }

 // . these are going to be adult, in any language
--- a/Lang.h
+++ b/Lang.h
@ -12,8 +12,8 @@
 // translingual is the 31st bit, english is the first bit
 #define LANG_BIT_MASK 0x007fffffffffffffLL
 #include "Unicode.h"
-#include "Iso8859.h"
 #include "iana_charset.h"
+
 enum {
 	langUnknown     = 0,
 	langEnglish     = 1,
@ -76,23 +76,14 @@ enum {
 	langLast           = 56
 };

-uint8_t getLanguageFromName(uint8_t *name);
 uint8_t getLangIdFromAbbr ( const char *abbr ) ;
+uint8_t getLangIdFromAbbrN ( const char *abbr ) ;
 char *getLangAbbr ( uint8_t langId ) ;

 void  	 languageToString ( unsigned char lang , char *buf );
 char* 	 getLanguageString ( unsigned char lang);
-char* 	 getNativeLanguageString ( unsigned char lang);
 char*    getLanguageAbbr ( unsigned char lang);
-unsigned char  getLanguageCharset ( unsigned char LangId );
+
 bool     isAdult( char *s, int32_t slen, char **loc = NULL );
-//unsigned char getLanguageFromScript(UChar32 c);
-unsigned char getLanguageFromAbbr(char *abbr);
-unsigned char getLanguageFromAbbrN(char *abbr);
-//unsigned char getLanguageFromUnicodeAbbr(UChar *abbr);
-// abbr is now in utf8
-unsigned char getLanguageFromUnicodeAbbr(char *abbr);
-unsigned char getLanguageFromUserAgent(char *abbr);
-unsigned char getLanguageFromCountryCode(char *code);

 #endif
--- a/Language.cpp
+++ b/Language.cpp
--- a/Language.h
+++ b/Language.h
@ -1,299 +0,0 @@
-
-#ifndef _LANGUAGE_H_
-#define _LANGUAGE_H_
-//#include <wchar.h>
-#include "gb-include.h"
-//#include "UnicodeProperties.h" //UChar32
-#include "File.h"
-#include "HashTableT.h"
-#include "Query.h"
-#include "Lang.h"
-#include "Multicast.h"
-#include "Threads.h"
-#include "Titledb.h"
-#include "Iso8859.h"
-#include "IndexList.h"
-//#include "Msg3a.h"
-
-#include "Msg20.h"
-
-// max chars in any language
-#define MAX_WORDS_PER_PHRASE 5
-#define MAX_CHARS 256
-#define TOP_POP_PHRASES 40 * 1024
-#define NUM_CHARS 40
-#define MAX_FRAG_SIZE 1024
-// max chars that start the rule
-
-#define MAX_PHRASE_LEN 80
-#define MAX_RECOMMENDATIONS 10
-#define LARGE_SCORE 0xfffff
-#define MAX_NARROW_SEARCHES 19
-
-/*
-// used only while generating titles from wikipedia pages, makeWikiFiles()
-class StateWik {
-public:
-	bool getIndexList(  );
-	bool getSummary (  );
-	bool gotSummary (  );
-
-	int       m_fdw;
-	Msg0      m_msg0;
-	IndexList m_list;
-	Query     m_q;
-	key_t     m_startKey;
-	key_t     m_endKey;
-	char     *m_coll;
-	int32_t      m_collLen;
-	int64_t m_termId;
-	int32_t      m_minRecSize;
-	Msg20     m_msg20s[MAX_FRAG_SIZE];
-	int32_t      m_numMsg20sOutstanding;
-	int32_t      m_numMsg20sLaunched;
-	int32_t      m_numMsg20sReceived;
-};
-
-class StateDict{
- public:
-	char      *m_dictBuf;
-	int32_t       m_dictBufSize;
-	char      *m_buf;
-	int32_t       m_bufSize;
-	char     **m_wordsPtr;
-	int64_t *m_termIds;
-	int64_t *m_termFreqs;
-	int32_t       m_numTuples;
-	Msg37      m_msg37;
-};
-*/
-
-/*class StateAff{
- public:
-	bool openAffinityFile ( );
-	bool launchAffinity ( );
-	bool gotAffinityFreqs1 ( );
-	bool gotAffinityFreqs2 ( );
-	bool doneAffinities ( );
-
-	FILE      *m_fdr;
-	int        m_fdw;
-	int32_t       m_fileNum;
-	char       m_buf[1026];
-	Msg3a      m_msg3a;
-	Query      m_q;
-	int64_t  m_numerator;
-	int64_t  m_denominator;
-	};*/
-
-typedef struct Reco{
-	char reco[MAX_PHRASE_LEN];
-	int32_t score;
-}Reco;
-
-class Language {
-
- public:
-
-	Language();
-	~Language();
-
-	void reset();
-
-	bool init( char *unifiedBuf, int32_t unifiedBufSize, int32_t lang, 
-		   int32_t hostsPerSplit, uint32_t myHash );
-
-	void setLang( int32_t lang ) { m_lang = lang; };
-	
-	//bool makeAffinities();
-
-	//int32_t getPhrasePopularity ( char *s, uint64_t h,
-	//		       bool checkTitleRecDict );
-
-	bool checkDict(char *s, int32_t slen, char encodeType);
-
-	bool getRecommendation( char *origWord, int32_t origWordLen,
-				char *recommendation, int32_t recommendationLen,
-				bool *found, int32_t *score, int32_t *popularity, 
-				bool  forceReco = false );
-
-	//int32_t narrowPhrase ( char *request, char *phrases, int32_t *pops, 
-	//		    int32_t maxPhrases );
-
-	//bool generateDicts ( int32_t numWordsToDump , char *coll );
-
-	//bool convertLatin1DictToUTF8 ( char *infile );
-
-	// needed for makeDict
-	//bool       gotTermFreqs( StateDict *st );
-	//StateDict *m_stateDict;
-
-	// hash table of the dictionary
-	HashTableT <uint64_t, int32_t>m_dict;
-
- private:
-	int32_t spellcheckDict();
-
-	// always accepts only ascii chars. makeClean() converts unicode into
-	// ascii
-	bool getPhonetic( char *origWord, int32_t origWordLen,
-			  char *target, int32_t targetLen );
-
-	bool loadRules();
-
-	bool loadSpellerDict( char *spellerBuf, int32_t spellerbufSize,
-			      int32_t hostsPerSplit, uint32_t myHash );
-
-	//bool loadTitleRecDicts( );
-
-	//bool loadNarrow( char *spellerBuf, int32_t spellerBufSize, 
-	//		 int32_t hostsPerSplit, uint32_t myHash );
-
-	bool loadDictHashTable( );
-
-	//bool genTopPopFile ( char *infile );
-
-	bool genDistributedPopFile ( char *infile, uint32_t myHash );
-	
-	//bool cleanDictFile ( );
-
-	bool makeClean( char *inBuf, int32_t inBufSize,
-			char *outBuf, int32_t outBufSize );//, bool isUTF16 );
-	
-	//bool makePhonet( char *infile);
-
-	//bool makeDict();
-
-	//bool makeQueryFiles ( );
-
-	//bool makeWikiFiles ( );
-
-	bool loadWikipediaWords();
-
-	bool loadMispelledWords();
-	
-	bool hasMispelling(char *phrase, int32_t phraseLen);
-
-	int32_t tryPhonet( char *phonetTmp, char *origPhonet,
-			char *origClean, int32_t tryForScore,
-			Reco *recos, int32_t numRecos, int32_t *lowestScore );
-
-	int32_t editDistance( char *a, char *b, int32_t level, // starting level
-			   int32_t limit ); // maximum level
-
-	int32_t weightedAverage(int32_t soundslikeScore, int32_t wordScore);
-
-	int32_t limitEditDistance( char *a, char *b, int32_t limit );
-
-	int32_t limit1EditDistance( char *a, char *b );
-
-	int32_t limit2EditDistance( char *a, char *b );
-
-	int32_t checkRest( char *a, char *b, int32_t w, char *amax, int32_t min );
-
-	int32_t check2( char *a, char *b, int32_t w, char *amax, int32_t min );
-
-	int16_t editDistance( char *a0, char *b0 );
-
-	int16_t reduceScore ( char *a, char *b );
-
-	//bool makeWordFiles ( int32_t numWordsToDump , int32_t numWordsPerPhrase ,
-	//		     char *coll );
-
-	//bool makePopFiles ( int32_t numWordsToDump , int32_t numWordsPerPhrase ,
-	//			    char *coll);
-
-	//bool makeScoreFiles ( int32_t maxWordsPerFile );
-
-	// this map maps a char to a "dict char"
-	//unsigned char m_map [ 256 ];
-
-	// . when comparing letter pairs, we only allow them to consist of
-	//   certain chars: 0-9, A-Z, apostrophe and space and \0 otherwise
-	//   m_table gets too big. This implies a NUM_CHARS of 
-	// . this compressed the value, too
-	// . \0, space, 0-9, A-Z, \'   is the ordering
-	//unsigned char to_dict_char ( unsigned char c ) { return m_map[c]; };
-
-	// Temporary unicode workaround for latin-1 compatibility
-	//unsigned char uc_to_dict_char ( UChar c ) { 
-	//	if (c>255)c=0;
-	//	return m_map[c]; 
-	//};
-
-	// what language loaded
-	int32_t  m_lang;
-
-	// what charset does this language use
-	unsigned char    m_charset;
-
-	// buffer to store the phonetic rules
-	char   *m_rulesBuf;
-	int32_t    m_rulesBufSize;
-	char  **m_rulesPtr;
-	int32_t    m_rulesPtrSize;
-	int32_t    m_numRules;
-	// points to the index of each rule that starts with a new character
-	int32_t    m_ruleStarts[MAX_CHARS];
-	// the chars that are in a phonet
-	bool    m_ruleChars[MAX_CHARS];
-
-	// buffers to store the dictionaries
-	char  *m_distributedBuf;
-	int32_t   m_distributedBufSize;
-	char **m_tuplePtr;
-	int32_t   m_tuplePtrSize;
-	int32_t   m_numTuples;
-
-	// total number of phonets
-	int32_t m_numPhonets;
-
-	// narrow phrase
-	char  *m_narrowBuf;
-	int32_t   m_narrowBufSize;
-	int32_t   m_numNarrowPtrs;
-	char **m_frntPtrs;
-	char **m_bckPtrs;
-	int32_t  *m_frntCharPtrs;//[NUM_CHARS][NUM_CHARS][NUM_CHARS];
-	int32_t  *m_bckCharPtrs;//[NUM_CHARS][NUM_CHARS][NUM_CHARS];
-
-	// m_phonetics stores the hash of the phonetic as the key.
-	// the value is a composite of index in m_tuplePtrs where the list
-	// starts as the high 32 bits of the value and the number of 
-	// words having the same phonetic as the low 32 bits of the value
-	HashTableT <uint64_t, uint64_t > m_phonetics;
-
-	// hash table of the distributed pop words dictionary
-	//	HashTableT <uint32_t, int32_t> m_titlerecDict;
-
-	// hash table of the distributed pop words dictionary
-	HashTableT <uint64_t, int32_t>m_distributedPopPhrases;
-
-	// hash table of the top popular words in the dictionary
-	//	HashTableT <uint32_t, char *> m_topPopPhrases;
-
-	// hash table of mispelled words
-	HashTableT <uint32_t, bool>m_misp;
-
-	// hash table of wikipedia words
-	HashTableT <uint32_t, bool>m_wiki;
-
-	// PARMS, which can be adjusted. Currently all languages have the 
-	// same adjustments, so using the same parms.
-	int32_t m_editDistanceWeightsDel1;
-	int32_t m_editDistanceWeightsDel2;
-	int32_t m_editDistanceWeightsSwap;
-	int32_t m_editDistanceWeightsSub;
-	int32_t m_editDistanceWeightsSimilar;
-	int32_t m_editDistanceWeightsMin;
-	int32_t m_editDistanceWeightsMax;
-	int32_t m_soundslikeWeight;
-	int32_t m_wordWeight;
-	int32_t m_span;
-
-	bool m_followup;
-	bool m_collapseResult;
-	bool m_removeAccents;
-};
-
-#endif
--- a/LanguageIdentifier.cpp
+++ b/LanguageIdentifier.cpp
@ -12,46 +12,6 @@

 LanguageIdentifier g_langId;

-/// List of TLDs that should not be used for language detection.
-/// NULL terminated.
-///
-/// Sadly, .de seems to be about half German pages and about half
-/// English as well. We cannot use it to distinguish language.
-/// Also, .at has some english pages.
-/// Also, .nl has some english pages.
-/// Also, .no has some english pages.
-/// Also, .vn has some english pages.
-/// Also, .ro has some english pages.
-/// Also, .gr has some english pages.
-/// Also, .th has some english pages.
-/// Also, .pl has some english pages.
-/// Also, .gs has some english pages.
-///
-/// (Pretty soon it will be faster to have a list of domains that
-/// WILL work instead of domains that won't.)
-///
-static char *ambiguousTLDs[] = {
-	"info",
-	"com",
-	"org",
-	"net",
-	"mil",
-	"de",
-	"at",
-	"tv",
-	"nl",
-	"no",
-	"ws",
-	"vn",
-	"ro",
-	"ru",
-	"gr",
-	"th",
-	"pl",
-	"gs",
-	NULL
-};
-
 const uint8_t *langToTopic[] = {
 	(uint8_t*)"Unknown",
 	(uint8_t*)"English",
@ -82,527 +42,10 @@ const uint8_t *langToTopic[] = {
 	(uint8_t*)"Tagalog"
 };

-#define MAX_DOCTYPE_SEARCH_LEN (512)
-
-/// Find a language tag in a DOCTYPE element.
-///
-/// This looks more complex than it is.
-/// Find second quote mark, back up to
-/// slash, move forward one, and that
-/// should be the language identifier.
-///
-/// @param content pointer to the document's content
-///
-/// @return pointer to the language tag, or NULL
-///
-static char * FindLanguageIndex(char *content) {
-	char *str;
-	str = strchr(content, '"');
-	if(!str)
-		return(NULL);
-
-	// Got first quote, skip it
-	str++;
-	str = strchr(str, '"');
-	if(!str)
-		return(NULL);
-
-	// Got second quote char, skip it
-	str++;
-	// now back up to slash character...
-	while(str && *str && str > content && *str != '/')
-		str--;
-	// make sure we found the slash...
-	if(str && *str && str > content && *str == '/') {
-		str++;
-		return(str);
-	}
-	return(NULL);
-}
-
-/// Copy a language tag.
-///
-/// Does NULL terminate dst.
-///
-/// @param dst the destination
-/// @param src the source (returned from FindLanguageIndex())
-/// @param maxSize max length of dst, not counting NULL
-///
-/// @return true on successful copy, false otherwise
-///
-static bool copyLangTag(char *dst, char *src, int maxSize) {
-	int len = 0;
-
-	if(!dst || !src || maxSize < 1)
-		return(false);
-
-	while ( *src && *src != '"' ) { // && len++ < maxSize) {
-		//if(len < 2) {
-		//	*dst++ = tolower(*src++);
-		//} else {
-		//	*dst++ = *src++;
-		//}
-		*dst++ = tolower(*src++);
-		// how many chars have we copied over?
-		len++;
-		// leave 1 char for a \0 termination
-		if ( len + 1 >= maxSize ) break;
-	}
-	*dst = 0;
-	return(true);
-}
-
-
 LanguageIdentifier::LanguageIdentifier() {
 	return;
 }

-inline bool LanguageIdentifier::isAmbiguousTLD(char *tld, int len) {
-	register int x;
-	for(x = 0; ambiguousTLDs[x]; x++) {
-		if(!strncmp(tld, ambiguousTLDs[x],
-					maxOf(len, gbstrlen(ambiguousTLDs[x]))))
-			return(true);
-	}
-	return(false);
-}
-
-uint8_t getLanguageFromAbbr2 ( char *str , int32_t len ) {
-	// truncate
-	if ( len > 5 ) len = 5;
-	// copy it and check it
-	char lang[6];
-	for ( int32_t j = 0 ; j < len ; j++ )
-		lang[j] = to_lower_a(str[j]);
-	lang[len]='\0';
-	return getLanguageFromAbbr(lang);
-}
-
-uint8_t LanguageIdentifier::guessLanguageFromTag(Xml *xml) {
-	uint8_t rv = langUnknown;
-	int32_t len = 0;
-	//char lang[6];
-	int id;
-	char *str;
-
-	if(!xml) return(langUnknown);
-
-	for(int32_t i = 0; i < xml->getNumNodes(); i++) {
-		id = xml->getNodeId(i);
-
-		// look for meta tag
-		if(id == TAG_META) {
-			str = (char *) xml->getString(i, "name", &len);
-			if(str &&
-			   (!strncasecmp(str, "Content-Language",16) ||
-			    !strncasecmp(str, "language",8) ||
-			    !strncasecmp(str, "Content_Language",16) ) ) {
-				str = (char *) xml->getString(i, "content", &len);
-				rv = getLanguageFromAbbr2(str,len);
-				if(rv != langUnknown) return(rv);
-			}
-			else {
-				str = (char *) xml->getString(i, "http-equiv", &len);
-				if(str && !strncasecmp(str, "Language", 8) ) {
-					str = (char *) xml->getString(i, "content", &len);
-					rv = getLanguageFromAbbr2(str,len);
-					if(rv != langUnknown) return(rv);
-				}
-			}
-		}  // end looking for meta tag
-
-
-		if(id != TAG_HTML &&      // html
-		   id != TAG_BODY && // body
-		   id != TAG_HEAD)   // head
-			continue;
-
-		str = (char *) xml->getString(i, "lang", &len);
-		rv = getLanguageFromAbbr2(str,len);
-		if(rv != langUnknown) return(rv);
-	}
-	return(rv);
-}
-
-uint8_t LanguageIdentifier::guessLanguageFromOutlinks(Links *links) {
-	char link[MAX_URL_LEN];
-	int32_t langs[32];
-	int lc;
-	char *cp = NULL;
-	int max = 0;
-	int oldmax = 0;
-	uint8_t l;
-	uint8_t maxlang = 0;
-	int len;
-
-	if(!links) return(langUnknown);
-
-	// Try to catch bad pointers
-	//if(!isValidPointer(links)) {
-	//	log(LOG_WARN, "build: Bad pointer 0x%08x not above data segment.\n",
-	//			(uint32_t) links);
-	//	return(langUnknown);
-	//}
-
-	if(links->getNumLinks() < 1) {
-		return(langUnknown);
-	}
-
-	if(links->getNumLinks() < 15) {
-		return(langUnknown);
-	}
-
-	// clear list
-	memset(langs, 0, sizeof(uint32_t) * 32);
-
-	// trim to only 100 links to prevent
-	// spinning on some large pages
-	for(lc = 0; lc < links->getNumLinks() && lc < 100; lc++) {
-		cp = links->getLink(lc);
-
-		if(cp) {
-			// skip http://
-			cp += 7;
-			
-			len = links->getLinkLen(lc) - 7;
-			char* p = link;
-			while(*cp && *cp != '/') *p++ = *cp++;
-			*p = '\0';
-
-			if((cp = strrchr(link, '.')) != NULL) {
-
-				// skip to tld
-				cp++;
-
-				// only bother if not a common TLD
-				len = gbstrlen(cp);
-				if(!isAmbiguousTLD(cp, len)) {
-					for(l = 1; l < 32; l++) {
-						if(g_langList.isLangValidForTld(cp, len, l)) 
-							langs[l]++;
-					}
-				}
-			}
-		}
-	}
-
-	// look for a clear winner from the list
-	// don't bother with langUnknown, it reduces hits
-	for(l = 1; l < 32; l++) {
-		if(langs[l] >= max) {
-			oldmax = max;
-			max = langs[l];
-			maxlang = l;
-		}
-	}
-
-	// 1st place must beat 2nd place by 5
-	if(max - oldmax > 5) {
-		return(maxlang);
-	}
-	return(langUnknown);
-}
-
-uint8_t LanguageIdentifier::guessLanguageFromTld(char *linktext) {
-#if 0
-	// This is not a good check of language
-	int len = 0;
-	char *cp;
-
-	if(!linktext) return(langUnknown);
-
-	// skip http://
-	cp = linktext + 7;
-
-	// if no slash, start at the end of the link
-	if(!(cp = strchr(cp, '/')))
-		cp = linktext + (gbstrlen(linktext) - 1);
-
-	// find last dot
-	while(*cp && cp > linktext && *cp != '.') {
-		cp--;
-		len++;
-	}
-
-	// skip '.'
-	len--; cp++;
-
-	if(len != 2) return(langUnknown);
-#endif // 0
-
-	return(langUnknown);
-
-}
-
-uint8_t LanguageIdentifier::guessLanguageFromInlinks(LinkInfo *linkInfo, int32_t ip) {
-	int32_t x;
-	//int32_t y;
-	uint8_t languages[32];
-	uint8_t max = langUnknown;
-	uint8_t oldmax = langUnknown;
-	uint8_t maxIndex = 0;
-	uint8_t oldmaxIndex = 0;
-	int hits = 0;
-
-	// sanity check
-	//if(linkInfo->m_numLangs != linkInfo->getNumDocIds()) {
-	//	log(LOG_DEBUG, "build: Number of languages (%"INT32") != number of docids (%"INT32")\n",
-	//			linkInfo->m_numLangs, linkInfo->getNumDocIds());
-	//	return(langUnknown);
-	//}
-
-	if(linkInfo->getNumGoodInlinks() < 7) return(langUnknown);
-
-	memset(languages, 0, 32);
-
-	// only check the first 100 inlinks, or we'll spin
-	// on some monstrous sites.
-	//for(x = 0; x < linkInfo->m_numLangs && x < 100; x++) {
-	for (Inlink*k=NULL;(k=linkInfo->getNextInlink(k)); ) {
-		//int32_t id = linkInfo->getLanguageId(x);
-		int32_t id = k->m_language;
-		// sanity check, we are still getting bad lang ids!!
-		if ( id < 0 || id >= 32 ) {
-			log("build: Got bad lang id of %"INT32". how can this "
-			    "happen?",id);
-			continue;
-		}
-		// don't count langUnknown pages, it reduces hits
-		if ( ! id ) continue;
-
-		// skip if not from a different enough IP
-		if((k->m_ip&0x0000ffff)==(ip&0x0000ffff) )
-			continue;
-		// otherwise count it
-		languages[id]++;
-		hits++;
-	}
-	if(hits < 7) return(langUnknown);
-	for(x = 1; x < 32; x++) {
-		if(languages[x] >= max) {
-			oldmax = max;
-			max = languages[x];
-			oldmaxIndex = maxIndex;
-			maxIndex = x;
-		}
-	}
-
-	// sanity check
-	if(maxIndex > 31 || oldmaxIndex > 31) {
-		log(LOG_INFO,
-			"build: guessLanguageFromInlinks(): Possible stack corruption: %d:%d\n",
-				maxIndex, oldmaxIndex);
-		return(langUnknown);
-	}
-
-	// Need better than 50%
-	// if(max - oldmax > 4)
-	if(max > (linkInfo->getNumGoodInlinks() / 2))
-		return(maxIndex);
-	return(langUnknown);
-}
-
-uint8_t LanguageIdentifier::guessLanguageFromDoctype(Xml *xml, char *content) {
-	uint8_t rvDoc = langUnknown;
-	int id;
-	char *str;
-	char lang[6];
-
-	if(!content) return(langUnknown);
-
-	for(int32_t i = 0; i < xml->getNumNodes(); i++) {
-		id = xml->getNodeId(i);
-		// skip if not DOCTYPE
-		if ( id != TAG_DOCTYPE ) continue;
-		// get the tag ptr to the tag
-		char *tag    = xml->getNode(i);
-		// this is in BYTES
-		//int32_t  tagLen = xml->getNodeLen(i);
-		// case might be upper, so we change
-		// the first two letters to lower.
-		str = FindLanguageIndex(tag);
-		if(!str) continue;
-		if(copyLangTag(lang, str, 5))
-			rvDoc = getLanguageFromAbbr(lang);
-		return(rvDoc);
-	}
-	return(rvDoc);
-}
-
-/// Skip whitespace in a string.
-///
-/// Includes CR and LF.
-///
-/// @param str the string
-///
-/// @return pointer to next character that is not whitespace, or NULL
-///
-static char *skipwhite(char *str) {
-	while(str && *str &&
-			(*str == ' ' ||
-			 *str == '\t' ||
-			 *str == '\n' ||
-			 *str == '\r'))
-		str++;
-	return(str);
-}
-
-/// Skip over 'words' in a string.
-///
-/// Skips over everything until there's whitespace.
-///
-/// @param str the string to search
-///
-/// @return the pointer to the next whitespace character
-///
-static char *skipword(char *str) {
-	while(str && *str &&
-			(*str != ' ' &&
-			 *str != '\t' &&
-			 *str != '\n' &&
-			 *str != '\r'))
-		str++;
-	return(str);
-}
-
-uint8_t LanguageIdentifier::guessLanguageFromUserAgent(char *str) {
-	// Mozilla/5.0 (X11; U; Linux i686;
-	// en-US; rv:1.8.1.4) Gecko/20070531 Firefox/2.0.0.4
-	uint8_t lang = langUnknown;
-	while(*str) {
-		if(!(str = skipwhite(str)))
-			return(langUnknown);
-		if((lang = getLanguageFromUserAgent(str)) != langUnknown)
-				return(lang);
-		if(!(str = skipword(str)))
-			return(langUnknown);
-	}
-	return(langUnknown);
-}
-
-uint8_t LanguageIdentifier::guessLanguageFromDMOZ(char *addr) {
-	return(g_categories->findLanguage(addr));
-}
-
-uint8_t LanguageIdentifier::guessLanguageFromQuery(Query *q) {
-	uint8_t lang;
-	if(q->getNumTerms() == 1) {
-		if(g_langList.lookup(q->getTermId(1), &lang))
-			return(lang);
-	} else {
-		// Look for two consecutive identical languages
-		// Not as good as a frequency count, but much faster
-		uint8_t last = 255;
-		register int32_t qcount;
-		for(qcount = 0; qcount < q->getNumTerms(); qcount++) {
-			if(g_langList.lookup(q->getTermId(qcount), &lang) &&
-					last == lang) {
-				return(lang);
-				break;
-			}
-		}
-	}
-	return(langUnknown);
-}
-
-uint8_t LanguageIdentifier::getBestLanguage(char** method,
-					    Url* url,
-					    Xml* xml,
-					    Links* links,
-					    LinkInfo* linkInfo,
-					    char* content) {
-	uint8_t langEnum;
-	// Let the site tell us what language it's in
-	langEnum = g_langId.guessLanguageFromTag(xml);
-	*method = "Tag";
-
-	if(langEnum != langUnknown) return langEnum;
-
-	// Get the language from a DMOZ category
-	// Accurate, but low hit rate
-	langEnum = g_langId.guessLanguageFromDMOZ(url->getUrl());
-	*method = "DMOZ";
-	if(langEnum != langUnknown) return langEnum;
-
-
-	// Guess from the TLD
-	uint8_t possibleLanguage = g_langId.guessLanguageFromTld(url->getUrl());
-	if(possibleLanguage) langEnum = possibleLanguage;
-	*method = "TLD";
-	if(langEnum != langUnknown) return langEnum;
-
-	// m_newDoc->getLinks() can return a bad address
-	// Guess from the outlinks
-	langEnum = g_langId.guessLanguageFromOutlinks(links);
-	*method = "Outlinks";
-	if(langEnum != langUnknown) return langEnum;
-	// m_newDoc->getLinks() can return a bad address
-
-	// Guess from the inlinks
-	//	langEnum = g_langId.guessLanguageFromInlinks(linkInfo);
-	//	*method = "Inlinks";
-	if(langEnum != langUnknown) return langEnum;
-
-	// Word frequency count
-	langEnum = xml->getLanguage();
-	*method = "Freq";
-	if(langEnum != langUnknown) return langEnum;
-
-	// Let the doctype tell us what language it's in
-	langEnum = g_langId.guessLanguageFromDoctype(xml, content);
-	*method = "Doctype";
-
-	return langEnum;
-}
-
-
-
-uint8_t LanguageIdentifier::getBestLangsFromVec(char* langCount,
-						//SiteType* typeVec,
-						int32_t *langIds ,
-						uint8_t *langScores ,
-						int32_t tagVecSize) {
-	int32_t bestCount = -1;
-	uint8_t numTags = 0;
-
-	int32_t langTotal = 0;
-	for(int32_t j = 0; j < MAX_LANGUAGES; j++) {	
-		langTotal += langCount[j];
-	}
-	if(langTotal == 0 || langCount[langUnknown] == langTotal)
-		return 0;
-
-	//dont store unknown language
-	langTotal -= langCount[langUnknown];
-	langCount[langUnknown] = 0;
-	
-
-	for(int32_t i = 0; i < tagVecSize; i++) {
-		int32_t maxCount = 0;
-		int32_t maxCountNdx = 0;
-		for(int32_t j = 0; j < MAX_LANGUAGES; j++) {	
-			if(langCount[j] > maxCount) {
-				maxCount = langCount[j];
-				maxCountNdx = j;
-			}
-		}
-		if(i == 0) bestCount = maxCount;
-		//if none found or this one is half as much as previous
-		//then quit.
-		if(maxCount == 0 ||
-		   maxCount < (bestCount/2)) break;
-		//typeVec[i].m_type = maxCountNdx;
-		//typeVec[i].m_score = (uint8_t)((maxCount * 100.0) 
-		//			       / langTotal);
-		langIds   [i] = maxCountNdx;
-		langScores[i] = (uint8_t)((maxCount * 100.0) / langTotal);
-		langCount[maxCountNdx] = 0;
-		numTags++;
-	}
-	return numTags;
-}
-
-
 uint8_t LanguageIdentifier::findLangFromDMOZTopic(char *topic) {
 	int x;
 	for(x = 0; x < (int)(sizeof(langToTopic)/sizeof(uint8_t *)); x++) {
@ -614,228 +57,6 @@ uint8_t LanguageIdentifier::findLangFromDMOZTopic(char *topic) {
 	return(langUnknown);
 }

-uint8_t LanguageIdentifier::guessGBLanguageFromUrl(char *url) {
-	if(!url) return(langUnknown);
-	uint8_t lang;
-	if((lang = guessLanguageFromUrl(url)) != langUnknown)
-		return(lang);
-	char code[6];
-	char *cp = url;
-	memset(code, 0, 6);
-	for(int x = 0; x < 6; x++) {
-		if((cp[x] < 'a' || cp[x] > 'z') &&
-				(cp[x] < 'A' || cp[x] > 'Z') &&
-				cp[x] != '_' && cp[x] != '-')
-			break;
-		code[x] = cp[x];
-	}
-	return(getLanguageFromCountryCode(code));
-}
-
-static inline bool s_checkCharIsBoundary(uint8_t x) {
-		if(x < '0') return(true);
-		if(x > '9' && x < 'A') return(true);
-		if(x > 'Z' && x < 'a') return(true);
-		if(x > 'z' && x < 128) return(true);
-		return(false);
-}
-
-static inline bool s_isRightBoundedAbbr(char *pointer, uint8_t l) {
-	if(s_checkCharIsBoundary(*(pointer + 2)))
-		return(true);
-	if((*(pointer + 3) == '-' || *(pointer + 3) == '_') &&
-			s_checkCharIsBoundary(*(pointer + 5)))
-		return(true);
-	return(false);
-}
-
-static inline bool s_isRightBoundedLanguageWord(char *pointer, uint8_t l) {
-	if(s_checkCharIsBoundary(*(pointer + gbstrlen(getNativeLanguageString(l)))))
-		return(true);
-	if(s_checkCharIsBoundary(*(pointer + gbstrlen(getLanguageString(l)))))
-		return(true);
-	return(false);
-}
-
-uint8_t s_lookForLanguageParam(char *url) {
-	char *cp = url;
-	uint8_t l;
-	// Try to find lan= or lang= or language=
-	while(cp && *cp && (cp = strstr(cp, "lan"))) {
-		if(!s_checkCharIsBoundary(*(cp - 1))) {
-			cp++;
-			continue;
-		}
-		if(!strncmp(cp, "lan=", 4)) cp += 4;
-		else if(!strncmp(cp, "lang=", 5)) cp += 5;
-		else if(!strncmp(cp, "language=", 9)) cp += 9;
-
-		if((l = getLanguageFromName((uint8_t*)cp)) &&
-				s_isRightBoundedLanguageWord(cp, l))
-			return(l);
-
-		if((l = getLanguageFromAbbrN(cp)) &&
-				s_isRightBoundedAbbr(cp, l))
-			return(l);
-		cp++;
-	}
-	// Try to find l=
-	cp = url;
-	while(cp && *cp && (cp = strstr(cp, "l="))) {
-		if(!s_checkCharIsBoundary(*(cp - 1))) {
-			cp++;
-			continue;
-		}
-
-		if((l = getLanguageFromName((uint8_t*)cp)) &&
-				s_isRightBoundedLanguageWord(cp, l))
-			return(l);
-
-		if((l = getLanguageFromAbbrN(cp)) &&
-				s_isRightBoundedAbbr(cp, l))
-			return(l);
-		cp++;
-	}
-	return(0);
-}
-
-uint8_t s_lookForLanguagePrefix(char *url) {
-	char *cp = url;
-	uint8_t l = 0;
-	// Look for a prefix on the url
-	// Do not add a postfix or TLD detector,
-	// they are not good indications at all.
-	if(!strncmp(url, "http://", 7)) cp = url + 7;
-	else cp = url;
-
-	if((l = getLanguageFromAbbrN(cp)) &&
-			s_isRightBoundedAbbr(cp, l))
-		return(l);
-
-	// Lookup, and see if it's on a word boundary
-	if((l = getLanguageFromName((uint8_t*)cp)) &&
-			s_isRightBoundedLanguageWord(cp, l))
-		return(l);
-	return(0);
-}
-
-
-uint8_t LanguageIdentifier::guessLanguageFromUrl(char *url) {
-	int len = 0;
-	char *cp = url;
-	char code[3];
-	uint8_t l = 0;
-
-	if(!url) return(langUnknown);
-
-	// Look for a parameter that would indicate the language
-	if((l = s_lookForLanguageParam(url))) return(l);
-
-	// Look for a prefix that would indicate the language
-	if((l = s_lookForLanguagePrefix(url))) return(l);
-
-	// if no slash, start at the end of the link
-	if(!(cp = strchr(url, '/')))
-		cp = url + (gbstrlen(url) - 1);
-
-	// find last dot
-	while(*cp && cp > url && *cp != '.') {
-		cp--;
-		len++;
-	}
-
-	// No dot?
-	if(cp <= url) return(langUnknown);
-
-	// skip '.'
-	len--; cp++;
-
-	code[0] = cp[0];
-	code[1] = cp[1];
-	code[2] = 0;
-
-	return(getLanguageFromCountryCode(code));
-}
-
-static inline int s_findMaxInList(int *list, int numItems) {
-	int max, oldmax, idx;
-	if(!list) return(0);
-	max = oldmax = INT_MIN;
-	idx = 0;
-	for(int x = 0; x < numItems; x++) {
-		if(list[x] >= max) {
-			oldmax = max;
-			max = list[x];
-			idx = x;
-		}
-	}
-	if(oldmax == max) return(0);
-	return(idx);
-}
-
-uint8_t LanguageIdentifier::guessLanguageFreqCount(Xml *xml,
-		int pageLimit /* = 512 */) {
-	if(!xml) return(langUnknown);
-
-	int votes[MAX_LANGUAGES];
-	int limit = xml->getNumNodes();
-	int scores[MAX_LANGUAGES];
-
-	if(pageLimit < limit) limit = pageLimit;
-
-	memset(votes, 0, sizeof(int) * MAX_LANGUAGES);
-
-	// Do term frequency count
-	for(int x = 0; x < limit; x++) {
-		if(xml->isTag(x) || xml->getNodeLen((int32_t)x) < 2) continue;
-		char *cp = g_speller.getPhraseRecord(xml->getNode((int32_t)x),
-						     xml->getNodeLen((int32_t)x));
-		if(!cp) continue;
-		memset(scores, 0, sizeof(int) * MAX_LANGUAGES);
-		while(*cp) {
-			// skip leading whitespace
-			while(*cp && (*cp == ' ' || *cp == '\t')) cp++;
-			// get language
-			int l = atoi(cp);
-			// skip to next delimiter
-			while(*cp && *cp != '\t') cp++;
-			// skip over tab
-			cp++;
-			// get score
-			scores[l] = atoi(cp);
-			// skip to next delimiter
-			while(*cp && *cp != '\t') cp++;
-		}
-		votes[s_findMaxInList(scores, MAX_LANGUAGES)]++;
-	}
-
-	// Find max
-	int max = 0;
-	int maxidx = 0;
-	int oldmax = 0;
-	for(int x = 0; x < MAX_LANGUAGES; x++) {
-		if(votes[x] < max) continue;
-		oldmax = max;
-		max = votes[x];
-		maxidx = x;
-	}
-
-	if(max == 0) maxidx = 0;
-
-#if 0
-	// English, British, and Australian are no longer separate
-	// If it's a toss up between any version of English, go with it.
-	if((max == langEnglish || max == langAustralia || max == langBritish) &&
-			(oldmax == langEnglish || oldmax == langAustralia || oldmax == langBritish))
-		return(maxidx);
-#endif // 0
-
-	// Note the winner
-	if(oldmax <= 0 || max > oldmax)
-		return maxidx;
-	return langUnknown;
-}
-
 uint8_t LanguageIdentifier::guessCountryTLD(const char *url) {
 	uint8_t country = 0;
 	char code[3];
@ -864,46 +85,3 @@ uint8_t LanguageIdentifier::guessCountryTLD(const char *url) {
 	}
 	return(country);
 }
-
-static int s_wordLen(char *str) {
-	char *cp = str;
-	while(*cp && *cp != ' ' && *cp != ';' &&*cp != '\t' &&
-			*cp != '\n' && *cp != '\r' && *cp != '.' && *cp != ',')
-		cp++;
-	return(cp - str);
-}
-
-static bool s_isLangTag(char *str) {
-	int len = s_wordLen(str);
-	if(len == 2) return(true);
-	if(len != 5) return(false);
-	if(str[2] == '_' || str[2] == '-') return(true);
-	return(false);
-}
-
-static uint8_t s_getCountryFromSpec(char *str) {
-	char code[6];
-	memset(code, 0,6);
-	gbmemcpy(code, str, s_wordLen(str));
-	for(int x = 0; x < 6; x++)
-		if(code[x] > 'A' && code[x] < 'Z') code[x] -= ('A' - 'a');
-	if(code[2] == '_' || code[2] == '-')
-		return g_countryCode.getIndexOfAbbr(&code[3]);
-	return g_countryCode.getIndexOfAbbr(code);
-}
-
-uint8_t LanguageIdentifier::guessCountryFromUserAgent(char *ua) {
-	if(!ua) return(0);
-	uint8_t country = 0;
-	while(*ua) {
-		if(!(ua = skipwhite(ua)))
-			return(0);
-		if(s_isLangTag(ua) &&
-				(country = s_getCountryFromSpec(ua)) != 0)
-				return(country);
-		if(!(ua = skipword(ua)))
-			return(0);
-	}
-	return(0);
-}
-
--- a/LanguageIdentifier.h
+++ b/LanguageIdentifier.h
@ -3,22 +3,12 @@
 /// Contains the main utility function, guessLanguage(), and all
 /// the support routines for detecting the language of a web page.
 ///
-/// 2007 May 24 09:02:52
-/// $ID$
-/// $Author: John Nanney$
-/// $Workfile$
-/// $Log$
-///

 // using a different macro because there's already a Language.h
 #ifndef LANGUAGEIDENTIFIER_H
 #define LANGUAGEIDENTIFIER_H

 #include "gb-include.h"
-#include "Xml.h"
-#include "Linkdb.h"
-//#include "LinkInfo.h"
-#include "Query.h"

 /// Contains methods of language identification by various means.
 class LanguageIdentifier {
@ -29,97 +19,6 @@ class LanguageIdentifier {
 		/// Destructor, does very little.
 		~LanguageIdentifier() { return; }

-		/// Get the language from the page's lang="" tag.
-		///
-		/// Looks for a lang="x" property in the HTML, BODY, or HEAD
-		/// tag. Returns the first match. This is usually a very
-		/// accurate guess of the language, since the author of the
-		/// page went through all the trouble to make sure it was
-		/// in there.
-		///
-		/// @param xml the page's xml object
-		///
-		/// @return the language, or langUnknown
-		///
-		uint8_t guessLanguageFromTag(Xml *xml);
-
-		/// Guess the language from the TLDs of outlinks found in the page.
-		///
-		/// TLDs which are ambiguous like .com are skipped.
-		///
-		/// @param links a list of links
-		///
-		/// @return the language, or langUnknown
-		///
-		uint8_t guessLanguageFromOutlinks(Links *links);
-
-		/// Guess the language from the page's TLD.
-		///
-		/// @param linktext the ascii URL
-		///
-		/// @return the language, or langUnknown
-		///
-		uint8_t guessLanguageFromTld(char *linktext);
-
-		/// Guess the language from the languages of the inlinks.
-		///
-		/// @param linkInfo 
-		///
-		/// @return the language, or langUnknown
-		///
-		uint8_t guessLanguageFromInlinks(LinkInfo *linkInfo, int32_t ip);
-
-		/// Determine whether a given TLD is suitable for language detection.
-		/// @param tld the TLD in ascii
-		/// @param len the length of tld
-		/// @return true if suitable, false if not
-		///
-		inline bool isAmbiguousTLD(char *tld, int len);
-
-		/// Return the greater of two ints.
-		inline int maxOf(int a, int b) {
-			if(b > a) return(b);
-			return(a);
-		}
-
-		/// Guesses language from the DOCTYPE string present in many pages.
-		///
-		/// @param xml the page's xml object
-		/// @param content the page's content, for finding the doctype
-		///
-		/// @return the language, or langUnknown
-		///
-		uint8_t guessLanguageFromDoctype(Xml *xml, char *content);
-
-		/// Guess a language from a tag in the user agent string.
-		///
-		/// @param str the user agent string
-		///
-		/// @return the language, or langUknown
-		///
-		uint8_t guessLanguageFromUserAgent(char *str);
-
-		/// Find an address in DMOZ for the language.
-		///
-		/// Looks up the page address in the category language tables.
-		///
-		/// @param addr the page address
-		///
-		/// @return language, or langUnknown if not found
-		///
-		uint8_t guessLanguageFromDMOZ(char *addr);
-
-		/// Guess the query language from the query terms.
-		///
-		/// This algorithm looks for two consecutive terms with the
-		/// same language.
-		///
-		/// @param q the query object
-		///
-		/// @return the language, or langUnknown
-		///
-		uint8_t guessLanguageFromQuery(Query *q);
-
 		/// Find a language from DMOZ topic.
 		///
 		/// The function name is a bit misleading, we expect
@ -131,29 +30,7 @@ class LanguageIdentifier {
 		///
 		uint8_t findLangFromDMOZTopic(char *topic);

-
-	uint8_t getBestLanguage(char** method,
-				Url* url,
-				Xml* xml,
-				Links* links,
-				LinkInfo* linkInfo,
-				char* content);
-
-	uint8_t getBestLangsFromVec(char* langCount,
-				    //SiteType* typeVec,
-				    int32_t *langIds ,
-				    uint8_t *langScores ,
-				    int32_t tagVecSize);
-
-	uint8_t guessGBLanguageFromUrl(char *url);
-	uint8_t guessLanguageFromUrl(char *url);
-
-	uint8_t guessLanguageFreqCount(Xml *xml,
-			int pageLimit /* = 512 */);
-
 	uint8_t guessCountryTLD(const char *url);
-
-	uint8_t guessCountryFromUserAgent(char *ua);
 };

 extern class LanguageIdentifier g_langId;
--- a/7
+++ b/7
@ -45,7 +45,7 @@ OBJS =  UdpSlot.o Rebalance.o \
 	Speller.o \
 	PingServer.o StopWords.o TopTree.o \
 	Parms.o Pages.o \
-	Unicode.o iana_charset.o Iso8859.o \
+	Unicode.o iana_charset.o \
 	SearchInput.o \
 	Categories.o Msg2a.o PageCatdb.o PageDirectory.o \
 	SafeBuf.o Datedb.o \
@ -56,7 +56,7 @@ OBJS =  UdpSlot.o Rebalance.o \
 	PageLogView.o Msg1f.o Blaster.o MsgC.o \
 	PageSpam.o Proxy.o PageThreads.o Linkdb.o \
 	matches2.o LanguageIdentifier.o \
-	Language.o Repair.o Process.o \
+	Repair.o Process.o \
 	Abbreviations.o \
 	RequestTable.o TuringTest.o Msg51.o \
 	Msg40.o Msg4.o SpiderProxy.o \
@ -477,9 +477,6 @@ Spider.o:
 test_parser2.o:
 	$(CXX) $(DEFS) $(CPPFLAGS) -O2 -c $*.cpp

-Language.o:
-	$(CXX) $(DEFS) $(CPPFLAGS) -O3 -c $*.cpp
-
 PostQueryRerank.o:
 	$(CXX) $(DEFS) $(CPPFLAGS)  -O2 -c $*.cpp

--- a/Msg3a.cpp
+++ b/Msg3a.cpp
@ -11,8 +11,6 @@
 static void gotReplyWrapper3a     ( void *state , void *state2 ) ;
 //static void gotRerankedDocIds     ( void *state );

-int32_t *g_ggg = NULL;
-
 Msg3a::Msg3a ( ) {
 	constructor();
 }
--- a/Msg3a.h
+++ b/Msg3a.h
@ -49,9 +49,6 @@ public:
 			 void        (* callback) ( void *state ) ,
 			 class Host *specialHost = NULL );

-
-	bool gotTermFreqs();
-
 	// Msg40 calls this to get Query m_q to pass to Summary class
 	Query *getQuery ( ) { return m_q ; };

--- a/Msg40.cpp
+++ b/Msg40.cpp
@ -25,7 +25,7 @@ bool printHttpMime ( class State0 *st ) ;

 //static void handleRequest40              ( UdpSlot *slot , int32_t netnice );
 //static void gotExternalReplyWrapper      ( void *state , void *state2 ) ;
-static void gotCacheReplyWrapper         ( void *state );
+//static void gotCacheReplyWrapper         ( void *state );
 static void gotDocIdsWrapper             ( void *state );
 static bool gotSummaryWrapper            ( void *state );
 //static void didTaskWrapper               ( void *state );
@ -508,15 +508,15 @@ bool Msg40::gotExternalReply ( ) {
 */
 		
 // msg17 calls this after it gets a reply
-void gotCacheReplyWrapper ( void *state ) {
-	Msg40 *THIS = (Msg40 *)state;
-	// reset g_errno, we're just a cache
-	g_errno = 0;
-	// handle the reply
-	if ( ! THIS->gotCacheReply() ) return;
-	// otherwise, call callback
-	THIS->m_callback ( THIS->m_state );
-}
+//void gotCacheReplyWrapper ( void *state ) {
+//	Msg40 *THIS = (Msg40 *)state;
+//	// reset g_errno, we're just a cache
+//	g_errno = 0;
+//	// handle the reply
+//	if ( ! THIS->gotCacheReply() ) return;
+//	// otherwise, call callback
+//	THIS->m_callback ( THIS->m_state );
+//}

 bool Msg40::gotCacheReply ( ) {
 	// if not found, get the result the hard way
--- a/Multicast.cpp
+++ b/Multicast.cpp
@ -1052,7 +1052,6 @@ void sleepWrapper1 ( int bogusfd , void    *state ) {
 	int32_t docsWanted;
 	int32_t firstResultNum;
 	int32_t nqterms;
-	int32_t rerankRuleset;
 	int32_t wait;
 	char exact;
 	//int32_t hid = -1;
--- a/PageResults.h
+++ b/PageResults.h
@ -2,9 +2,9 @@
 #define _PAGERESULTS_H_

 #include "SafeBuf.h"
-#include "Language.h" // MAX_FRAG_SIZE
 #include "Msg40.h"
 #include "Msg0.h"
+#include "Speller.h" // MAX_FRAG_SIZE

 // height of each search result div in the widget
 #define RESULT_HEIGHT 120
--- a/PageRoot.cpp
+++ b/PageRoot.cpp
@ -1851,49 +1851,6 @@ bool sendPageRoot ( TcpSocket *s , HttpRequest *r, char *cookie ) {
 		return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno)); 
 	}

-
-	// get the collection rec
-	/*
-	CollectionRec *cr = g_collectiondb.getRec ( coll );
-	uint8_t *hp = NULL;
-	int32_t  hpLen;
-	int64_t  docsInColl = -1;
-	if ( ! cr ) {
-		// use the default 
-		Parm *pp = g_parms.getParm ( "hp" );
-		if ( ! pp ) {
-			g_errno = ENOTFOUND;
-			g_msg = " (error: no such collection)";		
-			return g_httpServer.sendErrorReply(s,500,
-							   mstrerror(g_errno));
-		}
-		hp       = (uint8_t*)pp->m_def;
-		if ( hp ) hpLen = uint8strlen ( hp );
-		if ( hpLen <= 0 || ! hp )
-			log(LOG_INFO,"http: No root page html present.");
-	} else {
-		if(cr->m_useLanguagePages) {
-			uint8_t lang = g_langId.guessGBLanguageFromUrl(r->getHost());
-			if(lang && (hp = g_languagePages.getLanguagePage(lang)) != NULL) {
-					hpLen = uint8strlen(hp);
-					// Set sort language as well
-					// This might not be a good idea, as it
-					// overrides any other setting. May be
-					// better to let the user agent string
-					// tell us what the user wants.
-					strcpy(cr->m_defaultSortLanguage,
-							getLanguageAbbr(lang));
-			}
-		}
-		if(!hp) {
-			hp    = (uint8_t*)cr->m_htmlRoot;
-			hpLen = cr->m_htmlRootLen;
-		}
-		//RdbBase *base = getRdbBase ( RDB_CHECKSUMDB , coll );
-		RdbBase *base = getRdbBase ( (uint8_t)RDB_CLUSTERDB , coll );
-		if ( base ) docsInColl = base->getNumGlobalRecs();
-	}
-	*/
 	// print the page out
 	/*
 	expandRootHtml     ( sb, 
--- a/Speller.cpp
+++ b/Speller.cpp
@ -11,114 +11,6 @@
 #include <stdio.h>
 #include <ctype.h>

-/*
-static void handleRequestSpeller ( UdpSlot *slot , int32_t netnice );
-
-static void gotSpellerReplyWrapper (void *state, void *state2);
-
-bool Speller::registerHandler ( ) {
-	// . register ourselves with the udp server
-	// . it calls our callback when it receives a msg of type 0x39
-	if ( ! g_udpServer.registerHandler ( 0x3d, handleRequestSpeller )) 
-		return false;
-	return true;
-}
-
-// . handle a request to get a linkInfo for a given docId/url/collection
-// . returns false if slot should be nuked and no reply sent
-// . sometimes sets g_errno on error
-void handleRequestSpeller ( UdpSlot *slot , int32_t netnice ) {
-	// The request is the string to be spellchecked, null ended
-	char *request = slot->m_readBuf;
-
-	// first tells us if we should narrow the search stuff
-	bool narrowP = *(bool *) request;
-	request += sizeof(bool);
-
-	// is it found in dict or pop words
-	bool found;
-	int32_t score;
-	char reco[MAX_PHRASE_LEN];
-	int32_t pop;
-	int64_t start = gettimeofdayInMilliseconds();
-	bool recommendation = g_speller.m_language[langEnglish].
-		getRecommendation( request, gbstrlen(request), 
-				   reco, MAX_PHRASE_LEN, 
-				   &found, &score,
-				   &pop );
-
-	log ( LOG_DEBUG,"speller: %s --> %s", request, reco );
-
-	int32_t numNarrow = 0;
-	char narrow[MAX_NARROW_SEARCHES * MAX_PHRASE_LEN];
-	int32_t narrowPops[MAX_NARROW_SEARCHES];
-	//if ( narrowP )
-	//	numNarrow = g_speller.m_language[langEnglish].
-	//		narrowPhrase ( request, narrow, narrowPops,
-	//			       MAX_NARROW_SEARCHES );
-	
-	// calculate total reply size
-	// int32_t replySize = found + recommendation + score + pop + reco
-	int32_t replySize = sizeof(bool) + sizeof(bool) + 4 + 4 + 
-		gbstrlen(reco) + 1;
-
-	if ( narrowP ){
-		replySize += 4; // numPhrases 
-		for ( int32_t i = 0; i < numNarrow; i++ )
-			replySize += 4 + gbstrlen(&narrow[i*MAX_FRAG_SIZE]) + 1;
-	}
-
-	char *reply = (char*) mmalloc(replySize, "SpellerReplyBuf");
-	if ( !reply ) {
-		g_errno = ENOMEM;
-		//g_udpServer.sendReply_ass( NULL, 0, NULL, 0, slot );
-		g_udpServer.sendErrorReply( slot , g_errno );
-		return;
-	}
-	char *p = reply;
-
-	*(bool *)p = found;
-	p += sizeof(bool);
-	
-	*(bool *)p = recommendation;
-	p += sizeof(bool);
-
-	// store the score and pop
-	*(int32_t *) p = score; p += 4;
-	*(int32_t *) p = pop; p += 4;
-
-	// store the recommendation
-	strcpy( p, reco );
-	p += gbstrlen(reco) + 1;
-	if ( narrowP ){
-		// store the number of narrow phrases found
-		*(int32_t *) p = numNarrow;
-		p += 4;
-		for ( int32_t i = 0; i < numNarrow; i++ ){
-			*(int32_t *)p = narrowPops[i];
-			p += 4;
-			strcpy(p, &narrow[i * MAX_FRAG_SIZE]);
-			p += gbstrlen(&narrow[i * MAX_FRAG_SIZE]) + 1;
-		}
-	}
-
-	//sanity check
-	if ( p - reply != replySize ){
-		char *xx = NULL; *xx = 0;
-	}
-
-	int64_t end = gettimeofdayInMilliseconds();
-	if ( end - start > 1 )
-		log (LOG_INFO,"speller: took %"INT64" ms to spellcheck "
-		     "fragment %s", end-  start, request);
-	g_udpServer.sendReply_ass ( reply   ,
-				    replySize, 
-				    reply   , 
-				    replySize,
-				    slot    );
-}
-*/
-
 Speller g_speller;

 Speller::Speller(){
@ -219,764 +111,8 @@ void Speller::test ( char *ff ) {
 	fclose(fd);
 }

-/*
-///////////////////////////////////////////////////////
-// RECOMMENDATION ROUTINES BELOW HERE
-//
-// These will spellcheck and give recommendations
-///////////////////////////////////////////////////////
-
-bool Speller::canStart( QueryWord *qw ) {
-	// can only start with a alpha character, no numeric
-	if ( ! is_alnum_utf8 ( qw->m_word+0 ) ) return false;
-
-	if ( qw->m_ignoreWord &&
-	     qw->m_ignoreWord != IGNORE_CONNECTED &&
-	     qw->m_ignoreWord != IGNORE_QUOTED ) return false;
-
-	// don't check 'rom' in phrase "cd-rom", or 't' in "ain't"
-	if ( qw->m_leftConnected ) 
-		return false;
-
-	// don't start with a stop word
-	if ( qw->m_isStopWord )
-		return false;
-	
-	// a lot of field terms should not be spell checked
-	if ( qw->m_fieldCode ) {
-		if ( qw->m_fieldCode != FIELD_TITLE   &&
-		     qw->m_fieldCode != FIELD_CITY    &&
-		     qw->m_fieldCode != FIELD_AUTHOR  &&
-		     qw->m_fieldCode != FIELD_COUNTRY   )
-			return false;
-	}
-	return true;
-}
-
-
-// . returns false if blocked
-//   recommended something different than original query, "q"
-//   and false otherwise
-// . also returns false and sets g_errno on error
-// . stores recommended query in "dst" and NULL terminates it
-// . if dst is too small it will bitch and return true with g_errno set
-bool Speller::getRecommendation ( Query *q, 
-				  bool   spellcheck,
-				  char  *dst, // recommendation destination
-				  int32_t   dstLen, // recommendation max len
-				  bool   narrowSearch,
-				  char  *narrow, // narrow search
-				  int32_t   narrowLen,  // narrow search len
-				  int32_t  *numNarrows, // num narrows found
-				  void  *state, 
-				  void (*callback)(void *state) ){
-	*dst = '\0';
-	*narrow = '\0';
-	// no narrowing search if spellchecking is off
-	if ( !spellcheck )
-		return true;
-
-	// don't spellcheck queries that are more than MAX_FRAG_SIZE int32_t.
-	if ( q->getQueryLen() >= MAX_FRAG_SIZE )
-		return true;
-
-	StateSpeller *st ;
-	try { st = new (StateSpeller); }
-	catch ( ... ) { 
-		g_errno = ENOMEM;
-		log("Speller: new(%i): %s", sizeof(StateSpeller),
-		    mstrerror(g_errno));
-		return true; 
-	}
-	mnew ( st , sizeof(StateSpeller) , "State00" );
-       
-	st->m_state = state;
-	st->m_callback = callback;
-	st->m_q = q;
-	st->m_spellcheck = spellcheck;
-	st->m_dst = dst;
-	st->m_dend = dst + dstLen;
-	st->m_narrowSearch = narrowSearch;
-	st->m_nrw = narrow;
-	st->m_nend = narrow + narrowLen;
-	st->m_numNarrow = numNarrows;
-	*st->m_numNarrow = 0;
-	st->m_start = gettimeofdayInMilliseconds();
-	st->m_numFrags = 0;
-	st->m_numFragsReceived = 0;
-	
-	// . break query down into fragments
-	// . each fragment is a string of words
-	// . quotes and field names will separate fragments
-	// . TODO: make field data in its own fragment
-	int32_t nqw = q->m_numWords;
-
-	for ( int32_t i = 0 ; i < nqw ; i++ ) {
-		// get a word in the Query to start a fragment with
-		QueryWord *qw = &q->m_qwords[i];
-		// can he start the phrase?
-		if ( ! canStart( qw ) )
-			continue;
-
-		bool inQuotes  = qw->m_inQuotes;
-		char fieldCode = qw->m_fieldCode;
-		// . get longest continual fragment that starts with word #i
-		// . get the following words that can be in a fragment
-		//   that starts with word #i
-		// . start of the frag
-		int32_t  endQword = i;
-		int32_t  startQword = i;
-		for ( ; i < nqw ; i++ ) {
-			// . skip if we should
-			// . keep punct, however
-			QueryWord *qw1 = &q->m_qwords[i];
-			if ( qw1->m_opcode                 ) break;
-			if ( qw1->m_inQuotes  != inQuotes  ) break;
-			if ( qw1->m_fieldCode != fieldCode ) break;
-			if ( qw1->m_ignoreWord == IGNORE_FIELDNAME ) break;
-			if ( qw1->m_phraseSign && 
-			     !qw1->m_rightConnected ) break;
-			// are we punct?
-			if ( ! is_alnum_utf8(qw1->m_word) ) 
-				endQword = i - 1;
-			else    
-				endQword = i;
-		}
-		// revisit this i in big loop since we did not include it
-		i = endQword;
-
-		//create a new stateFrag
-		StateFrag *stFrag;
-		try { stFrag = new (StateFrag); }
-		catch ( ... ) { 
-			mdelete ( st, sizeof(StateSpeller),  "StateSpeller" );
-			delete (st);
-			g_errno = ENOMEM;
-			log("Speller: new(%i): %s", sizeof(StateFrag),
-			    mstrerror(g_errno));
-			//continue;
-			return true;
-		}
-		mnew ( stFrag, sizeof(StateFrag),
-		       "StateFrag" );
-
-		stFrag->m_state = (void*) st;
-		stFrag->m_narrowPhrase = st->m_narrowSearch;
-		stFrag->m_q = q;
-		stFrag->m_startQword = startQword;
-		stFrag->m_endQword = endQword;
-		stFrag->m_errno = 0;
-		st->m_stFrag[st->m_numFrags] = stFrag;
-		st->m_numFrags++;
-		// blocked
-		if ( !getRecommendation( stFrag ) ){
-			continue;
-		}
-		st->m_numFragsReceived++;
-	}
-	// if outstanding frags
-	if ( st->m_numFragsReceived < st->m_numFrags )
-		return false;
-	gotFrags(st);
-	// delete state
-	mdelete ( st, sizeof(StateSpeller),  "StateSpeller" );
-	delete (st);
-	return true;
-}
-
-bool Speller::getRecommendation ( StateFrag *st ){
-	st->m_recommended = false;
-	st->m_numFound = 0;
-	st->m_numNarrowPhrases = 0;
-	char *dst = st->m_dst;
-	
-	// normalize this fragment and store in "dst"
-	bool wasAlnum = true;
-	for ( int32_t i = st->m_startQword; i <= st->m_endQword; i++ ){
-		// start of each word
-		st->m_wp[i] = dst;
-		char *p = st->m_q->m_qwords[i].m_word;
-		int32_t  plen = st->m_q->m_qwords[i].m_wordLen;
-		for ( int32_t j = 0; dst-st->m_dst <MAX_FRAG_SIZE&&j<plen;j++ ) {
-			if ( !getClean_utf8(p+j) ) 
-				continue;
-			// skip back to back punct/spaces
-			if (j>0 && !is_alnum_utf8(p+j) &&!wasAlnum)
-				continue;
-			*dst = p[j];
-			dst++;
-			wasAlnum = is_alnum_utf8 ( p+j );
-		}
-		st->m_wplen[i] = dst - st->m_wp[i];
-		st->m_isfound[i] = false;
-	}
-	*dst = '\0';
-	
-	// debug msg
-	log(LOG_DEBUG,"speller: Getting recommendation for frag=%s",
-	    st->m_dst);
-
-	// give each word in the phrase a chance to start the subphrase
-	int32_t maxPhrase = st->m_endQword - st->m_startQword;
-	if ( maxPhrase > MAX_WORDS_PER_PHRASE )
-		maxPhrase = MAX_WORDS_PER_PHRASE;
-
-	// store the phraseLen and posn
-	st->m_pLen = maxPhrase;
-	st->m_pPosn = st->m_startQword;
-	
-	return launchReco(st);
-}
-
-bool Speller::launchReco(StateFrag *st){
-	// if we checked all the phrases or found all the words
-	if ( st->m_numFound == st->m_endQword - st->m_startQword + 1 || 
-	     st->m_pLen < 0 ){
-		return true;
-	}
-
-	bool launchPhrase = false;
- 	for ( ; st->m_pLen >= 0; st->m_pLen-- ){
-		for ( ; st->m_pPosn + st->m_pLen <= st->m_endQword; 
-		      st->m_pPosn++ ) {
-			// find a word that can start the phrase
-			QueryWord *qw = &st->m_q->m_qwords[st->m_pPosn];
-			if ( !canStart (qw) )
-				continue;
-			// don't do this phrase if we have found even one
-			// word in the phrase
-			bool found = false;
-			for ( int32_t k = st->m_pPosn; 
-			      k <= st->m_pPosn + st->m_pLen; k++ ) {
-				if ( st->m_isfound[k] ){
-					found = true;
-					break;
-				}
-			}
-			if ( found )
-				continue;
-
-			// cannot end on a stop word, punct, right-connected
-			// word
-			QueryWord *qwEnd = 
-				&st->m_q->m_qwords[st->m_pPosn + st->m_pLen];
-			if ( qwEnd->m_isStopWord || qwEnd->m_isPunct ||
-			     qwEnd->m_rightConnected )
-				continue;
-			
-			// found someone to start the phrase with
-			// what is the new phrase parms?
-			st->m_a = st->m_wp[st->m_pPosn];
-			st->m_b = st->m_wp[st->m_pLen + st->m_pPosn]+
-				st->m_wplen[st->m_pLen + st->m_pPosn];
-			
-			// also store the tmp char that we are changing
-			st->m_c = *(st->m_b);
-			*(st->m_b) = '\0';
-
-			// if it is just a number, don't get recommendation
-			// lest we emabarrass ourselves
-			if ( st->m_pPosn == 0 && is_digit(st->m_a[0]) ) {
-				char *k = st->m_a+1;
-				while ( is_digit(*k) ) k++;
-				if ( ! *k ) { 
-					*st->m_b = st->m_c ; 
-					continue;
-				}
-			}
-
-			// if it is an adult phrase, don't get a recommendation
-			// check if isAdult really finds a word.
-			char *adultLoc = NULL;
-			if ( isAdult(st->m_a, gbstrlen(st->m_a), &adultLoc) &&
-			     ( adultLoc == st->m_a || *(adultLoc-1) == ' ' ) ){
-				// mark as found
-				for ( int32_t k = st->m_pPosn; 
-				      k <= st->m_pPosn + st->m_pLen; k++ )
-					st->m_isfound[k] = true;
-				*(st->m_b) = st->m_c;
-				continue;
-			}
-			// if the phrase is in dict or in the top pop words,
-			// phrase is found. Don't check if we are narrowing 
-			// the phrase because we need to multicast anyways
-			uint64_t h ;
-			h = hash64d(st->m_a, gbstrlen(st->m_a) );
-			if ( !st->m_narrowPhrase && 
-			     getPhrasePopularity( st->m_a, h, false ) > 0 ){
-				// mark as found
-				for ( int32_t k = st->m_pPosn; 
-				      k <= st->m_pPosn + st->m_pLen; k++ )
-					st->m_isfound[k] = true;
-				*(st->m_b) = st->m_c;
-				continue;
-			}
-			launchPhrase = true;
-			break;
-		}
-		if ( launchPhrase )
-			break;
-		st->m_pPosn = st->m_startQword;
-	}
-
-	if ( st->m_pLen < 0 ){
-		return true;
-	}
-
-	// debug msg
-	log(LOG_DEBUG,"speller: ----------");
-	log(LOG_DEBUG,"speller: Checking phrase=%s", st->m_a);
-
-
-	// launch for all the splits
-	st->m_numRequests = 0;
-	st->m_numReplies = 0;
-
-
-	int32_t hostsPerSplit = g_hostdb.m_numHosts / g_hostdb.m_indexSplits;
-	// don't send to twins...
-	hostsPerSplit /= g_hostdb.m_numHostsPerShard;
-	int32_t mySplit = g_hostdb.m_hostId % g_hostdb.m_indexSplits;
-
-	int32_t key = st->m_q->getQueryHash();//0;
-	int32_t timeout = 30;
-	int32_t niceness = 0;
-	char request[MAX_FRAG_SIZE + 4];
-	char *p = request;
-	*(bool *)p = st->m_narrowPhrase;
-	p += sizeof(bool);
-	strcpy ( p, st->m_a );
-	// send the null end too
-	p += gbstrlen(st->m_a)+1;
-	int32_t plen = p - request;
-	for ( int32_t i = 0; i < hostsPerSplit; i++ ){
-		// get the hostId of the host we're sending to
-		uint32_t hostId = 
-			mySplit + ( i * g_hostdb.m_indexSplits );
-		Host *h = g_hostdb.getHost(hostId);
-		st->m_mcast[i].reset();
-
-		bool status = st->m_mcast[i].
-			send(request   ,
-			     plen      , // request size
-			     0x3d      , // msgType 0x3d
-			     false     , // multicast owns m_request?
-			     h->m_groupId, // group to send to (groupKey)
-			     false     , // send to whole group?
-			     key       , 
-			     st        , // state data
-			     NULL      , // state data
-			     gotSpellerReplyWrapper ,
-			     timeout      , // in seconds
-			     niceness  ,
-			     false     , // realtime?
-			     -1        , // m_q->m_bestHandlingHostId ,
-			     NULL      , // m_replyBuf   ,
-			     0         , // MSG39REPLYSIZE,
-			     // this is true if multicast should free
-			     // the
-			     // reply, otherwise caller is responsible
-			     // for freeing it after calling
-			     // getBestReply).
-			     // actually, this should always be false,
-			     // there
-			     // is a bug in Multicast.cpp.
-			     false        );
-
-		if (!status){
-			st->m_numReplies++;
-			log("speller: Multicast had error: %s",
-			    mstrerror(g_errno));
-			st->m_errno = g_errno;
-			continue;
-		}
-		// blocked
-		else
-			st->m_numRequests++;
-	}
-
-	if ( st->m_numReplies == st->m_numRequests )
-		return true;
-	return false;
-}
-
-void gotSpellerReplyWrapper( void *state, void *state2 ){
-	StateFrag *stFrag = (StateFrag *) state;
-	stFrag->m_numReplies++;
-	if ( stFrag->m_numReplies < stFrag->m_numRequests )
-		return;
-	// blocked
-	if ( !g_speller.gotSpellerReply(stFrag) )
-		return;
-
-	StateSpeller *st = (StateSpeller *)stFrag->m_state;
-	// One more frag received
-	st->m_numFragsReceived++;
-	if ( st->m_numFragsReceived < st->m_numFrags )
-		return;
-
-	g_speller.gotFrags(st);
-	// callback
-	st->m_callback( st->m_state );
-	// delete state
-	mdelete ( st, sizeof(StateSpeller),  "StateSpeller" );
-	delete (st);
-}
-
-bool Speller::gotSpellerReply( StateFrag *st ){
-	int32_t minScore = LARGE_SCORE;
-	int32_t maxPop = -1;
-	char *bestReco = NULL;
-
-	char *reply[MAX_UNIQUE_HOSTS_PER_SPLIT];
-	int32_t  replySize[MAX_UNIQUE_HOSTS_PER_SPLIT];
-	int32_t  replyMaxSize[MAX_UNIQUE_HOSTS_PER_SPLIT];
-	bool  freeit;
-	bool  found = false; //phrase was found in dict or pop words
-	int32_t hostsPerSplit = g_hostdb.m_numHosts / g_hostdb.m_indexSplits;
-	// don't send to twins...
-	hostsPerSplit /= g_hostdb.m_numHostsPerShard;
-
-	int32_t  numNarrowPhrases[MAX_UNIQUE_HOSTS_PER_SPLIT];
-	char *narrowPtrs[MAX_UNIQUE_HOSTS_PER_SPLIT];
-
-	// init narrowSearch arrays
-	for ( int32_t i = 0; i < MAX_UNIQUE_HOSTS_PER_SPLIT; i++ ){
-		numNarrowPhrases[i] = 0;
-		narrowPtrs[i] = NULL;
-	}
-
-	for ( int32_t i = 0; i < hostsPerSplit; i++ ){
-		reply[i] = st->m_mcast[i].getBestReply( &replySize[i] ,
-							&replyMaxSize[i] ,
-							&freeit );
-		// multicast may have an empty reply buffer if there was an
-		// OOM error or something. m_errno should have been set, but
-		// we have to loop through all the multicasts to free the
-		// reply buffers.
-		char *p = reply[i];
-
-		if ( g_errno || st->m_errno || !p){
-			continue;
-		}
-		// was is found in dict
-		bool foundInDict = *(bool *)p;
-		p += sizeof(bool);
-		if ( foundInDict )
-			found = true;
-
-		// first is if there is a recommendation or not
-		bool recommendation = *(bool *) p;
-		p += sizeof (bool);
-
-		if ( !recommendation && !st->m_narrowPhrase )
-			continue;
-
-		int32_t score = *(int32_t *)p;
-		p += 4;
-		int32_t pop = *(int32_t *)p;
-		p += 4;
-
-		if ( recommendation ){
-			log ( LOG_DEBUG,"speller: Received reco %s, "
-			      "score=%"INT32", pop=%"INT32"", p, score, pop );
-
-			// we have a recommendation with score and pop
-			// choose the one with the lowest score, and if the
-			// score is same then the max pop 
-			// HACK: we are getting bad recommendations for smaller
-			// popularities. So don't consider them
-			if ( pop > 8 && ( score < minScore || 
-				   ( score == minScore && pop > maxPop ) ) ){
-				bestReco = p;
-				minScore = score;
-				maxPop = pop;
-			}
-		}
-
-		p += gbstrlen(p) + 1;
-		if ( st->m_narrowPhrase ){
-			numNarrowPhrases[i] = *(int32_t *)p;
-			p += 4;
-			narrowPtrs[i] = p;
-		}
-	}
-	
-	// merge all the narrow results
-	if ( st->m_narrowPhrase ){
-		int32_t currPhrase[MAX_UNIQUE_HOSTS_PER_SPLIT];
-		for ( int32_t i = 0; i < MAX_UNIQUE_HOSTS_PER_SPLIT; i++ )
-			currPhrase[i] = 0;
-		for ( int32_t i = 0; i < MAX_NARROW_SEARCHES; i++ ){
-			int32_t maxHost = -1;
-			int32_t maxPop = 0;
-			for ( int32_t j = 0; j < hostsPerSplit; j++ ){
-				if ( numNarrowPhrases[j] <= currPhrase[j] )
-					continue;
-				int32_t pop = *(int32_t *)narrowPtrs[j];
-				if ( pop <= maxPop )
-					continue;
-				maxPop = pop;
-				maxHost = j;
-			}
-			if ( maxHost < 0 )
-				break;
-			// 
-			narrowPtrs[maxHost] += 4;
-			strcpy( st->m_narrowPhrases[i], narrowPtrs[maxHost] );
-			narrowPtrs[maxHost] +=gbstrlen(narrowPtrs[maxHost]) + 1;
-			currPhrase[maxHost]++;
-			st->m_numNarrowPhrases++;
-		}
-	}
-
-	// make narrowPhrase false here, so that its not launched a second time
-	// for the same frag;
-	st->m_narrowPhrase = false;
-
-	// revert
-	*(st->m_b) = st->m_c;
-
-	// if we found a recommendation,or if the phrase was found in the
-	// dictionary or pop words then mark all the
-	// words that fall under the phrase as found
-	if ( found || bestReco ){
-		for ( int32_t k = st->m_pPosn; 
-		      k <= st->m_pLen + st->m_pPosn; k++ )
-			st->m_isfound[k] = true;
-		st->m_numFound += st->m_pLen + 1;
-	}
-
-	// if not found in the dictionary or a recommendation, copy the phrase
-	if ( !found && bestReco){
-		// this fragment is going to be recommended
-		st->m_recommended = true;
-		// insert our recommendation into the phrase to get a new one
-		char *s1    = st->m_wp[st->m_startQword];
-		int32_t  slen1 = st->m_a - st->m_wp[st->m_startQword];
-		char *s2    = bestReco;
-		int32_t  slen2 = gbstrlen(bestReco);
-		char *s3    = st->m_b ;
-		// store the difference in length between the reco and the 
-		// original string
-		int32_t  diff = slen2 - ( st->m_b - st->m_a );
-		int32_t  slen3 = st->m_wp[st->m_endQword] + 
-			st->m_wplen[st->m_endQword] - st->m_b;
-
-		if ( slen3 < 0 )
-			slen3 = 0;
-
-		int32_t  tlen = slen1 + slen2 + slen3 ;
-		if ( tlen > MAX_FRAG_SIZE ){
-			log(LOG_LOGIC,"speller: buf too small. Fix me 3.");
-			// blocked
-			if ( !launchReco(st) )
-				return false;
-			return true;
-		}
-		// make substitution and store in "dst"
-		char buf2 [ MAX_FRAG_SIZE];
-		char *nf = buf2;
-		gbmemcpy ( nf , s1 , slen1 ) ; nf += slen1;
-		gbmemcpy ( nf , s2 , slen2 ) ; nf += slen2;
-		gbmemcpy ( nf , s3 , slen3 ) ; 
-		nf += slen3;
-	
-		// don't forget to NULL terminate
-		*nf = '\0';
-		// debug msg
-		log( LOG_DEBUG,"speller: Trying substitution \"%s\"",
-		     buf2 );
-
-		strcpy ( st->m_dst , buf2 );
-
-		// the pointers might have to be changed if the 
-		// recommendation was not of the same length as the words
-		if ( diff != 0 ){
-			for ( int32_t k = st->m_pLen+st->m_pPosn+1; 
-			      k <= st->m_endQword; k++ )
-				st->m_wp[k] += diff;
-		}
-	}
-
-	// don't forget to free the replies
-	for ( int32_t i = 0; i < hostsPerSplit; i++ )
-		if ( reply[i] && replyMaxSize[i] > 0 )
-			mfree( reply[i], replyMaxSize[i], "SpellerReplyBuf" );
-	
-	// go to the next position in the phrase. if we have reached the end
-	// of the phrase position, decrement the phrase length and start again
-	if ( st->m_pPosn + st->m_pLen >= st->m_endQword - 1 ){
-		st->m_pLen--;
-		st->m_pPosn = st->m_startQword;
-	}
-	else
-		st->m_pPosn++;
-
-	if ( !launchReco(st) )
-		return false;
-	return true;
-}
-*/
-// . break a NULL-terminated string down into a list of ptrs to the words
-// . return the number of words stored into "wp"
-/*
-int32_t Speller::getWords ( const char *s ,
-			 char *wp     [MAX_FRAG_SIZE] ,
-			 int32_t  wplen  [MAX_FRAG_SIZE] ,
-			 bool *isstop                   ) {
-	int32_t nwp = 0;
- loop:
-	// skip initial punct
-	while ( *s && ! is_alnum ( *s ) ) s++;
-	// bail if done
-	if ( ! *s ) return nwp;
-	// point to word
-	wp [ nwp ] = (char *)s;
-	// convenience ptr
-	char *ww = (char *)s;
-	// count over it
-	while ( is_alnum ( *s ) ) s++;
-	// how long is the word?
-	int32_t slen = s - wp [ nwp ];
-	// set length
-	wplen [ nwp ] = slen ;
-	// is it a stop word?
-	if ( isstop ) {
-		// TODO: make the stop words utf8!!!
-		int64_t h = hash64Lower_utf8 ( ww , slen ) ;
-		bool stop = ::isStopWord       ( ww , slen , h ) ;
-		// BUT ok if Capitalized or number
-		if ( stop ) {
-			if ( is_digit (ww[0])    ) stop = false;
-			if ( is_cap   (ww,slen ) ) stop = false;
-			// e-mail, c file, c. s. lewis
-			if ( slen  == 1 && ww[0] != 'a' ) stop = false;
-		}
-		isstop[nwp] = stop;
-	}
-	nwp++;
-	goto loop;
-}
-*/
-/*
-void Speller::gotFrags( void *state ){
-	StateSpeller *st = (StateSpeller *) state;
-	
-	char *dptr = st->m_dst;
-	char *nptr = st->m_nrw;
-	bool recommendation = false;
-	Query *q = st->m_q;
-
-	// . break query down into fragments
-	// . each fragment is a string of words
-	// . quotes and field names will separate fragments
-	// . TODO: make field data in its own fragment
-	int32_t nqw = q->m_numWords;
-	int32_t currFrag = 0;
-	for ( int32_t i = 0 ; i < nqw ; i++ ) {
-		// get a word in the Query to start a fragment with
-		QueryWord *qw = &q->m_qwords[i];
-		// if he has a phraseSign, put it right away
-		//if ( qw->m_phraseSign ) {
-		// *dptr = qw->m_phraseSign;
-		// dptr++;
-		// }
-		// can he start the phrase?
-		// if he can't start our fragment, just copy over to "dst"
-		if ( !canStart( qw )) {
-			// copy to rp and get next word
-			char *w    = qw->m_word;
-			int32_t  wlen = qw->m_wordLen;
-			if ( dptr + wlen >= st->m_dend ) { 
-				g_errno = EBUFTOOSMALL; continue; }
-			// watch out for LeFtP and RiGhP
-			if      ( qw->m_opcode == OP_LEFTPAREN ) *dptr++ = '(';
-			else if ( qw->m_opcode == OP_RIGHTPAREN) *dptr++ = ')';
-			else if ( qw->m_opcode == OP_PIPE      ) *dptr++ = '|';
-			else { 
-				gbmemcpy ( dptr , w , wlen ); 
-				dptr += wlen; 
-			}
-			*dptr = '\0';
-			continue;
-		}
-		bool inQuotes  = qw->m_inQuotes;
-		char fieldCode = qw->m_fieldCode;
-		// . get longest continual fragment that starts with word #i
-		// . get the following words that can be in a fragment
-		//   that starts with word #i
-		// . start of the frag
-		int32_t  endQword = i;
-		for ( ; i < nqw ; i++ ) {
-			// . skip if we should
-			// . keep punct, however
-			QueryWord *qw1 = &q->m_qwords[i];
-			if ( qw1->m_opcode                 ) break;
-			if ( qw1->m_inQuotes  != inQuotes  ) break;
-			if ( qw1->m_fieldCode != fieldCode ) break;
-			if ( qw1->m_ignoreWord== IGNORE_FIELDNAME ) break;
-			if ( qw1->m_phraseSign && !qw1->m_rightConnected ) 
-				break;
-			// are we punct?
-			if ( ! is_alnum_utf8 (qw1->m_word) ) 
-				endQword = i - 1;
-			else    
-				endQword = i;
-		}
-		// revisit this i in big loop since we did not include it
-		i = endQword;
-
-		// OOM errors might cause us not to launch frags
-		if ( currFrag >= st->m_numFrags )
-			continue;
-		StateFrag *stFrag = st->m_stFrag[currFrag];
-		// don't breech
-		if ( dptr + gbstrlen(stFrag->m_dst) >= st->m_dend ) {
-			g_errno = EBUFTOOSMALL;
-		}
-		else {
-			// store it
-			strcpy ( dptr, stFrag->m_dst );
-			dptr += gbstrlen ( dptr );
-			// add a space between fragments
-			//			*dptr = ' ';
-			//dptr++;
-			*dptr = '\0';
-			// set the flag
-			if ( stFrag->m_recommended )
-				recommendation = true;
-		}
-		// copy over all the narrow searches that can fit
-		for ( int32_t j = 0; j < stFrag->m_numNarrowPhrases; j++ ){
-			// don't breech
-			if ( nptr +gbstrlen(stFrag->m_narrowPhrases[j]) >
-			     st->m_nend )
-				break;
-			strcpy(nptr, stFrag->m_narrowPhrases[j]);
-			nptr += gbstrlen(stFrag->m_narrowPhrases[j]) + 1;
-			(*st->m_numNarrow)++;
-		}
-		
-		mdelete(stFrag, sizeof(StateFrag), "StateFrag");
-		delete (stFrag);
-		// now we get the next frag
-		currFrag++;
-	}
-	if ( !recommendation )
-		*st->m_dst = '\0';
-	
-	int64_t now = gettimeofdayInMilliseconds();
-	if ( now - st->m_start > 50 )
-		log(LOG_INFO,"speller: Took %"INT64" ms to spell check %s",
-		    now - st->m_start, st->m_q->getQuery() );
-	return;
-}
-*/
-
-
 bool Speller::generateDicts ( int32_t numWordsToDump , char *coll ){
-	m_language[2].setLang(2);
+	//m_language[2].setLang(2);
 	//m_language[2].generateDicts ( numWordsToDump, coll );
 	return false;
 }
@ -1776,107 +912,6 @@ bool Speller::findNext( char *s, char *send, char **nextWord, bool *isPorn,
  return false;
  }*/

-bool Speller::createUnifiedDict (){
-	// first get all the tuples from wordlist and query file
-	//HashTableT <uint64_t, char*> ht[MAX_LANGUAGES];
-	HashTableX ht[MAX_LANGUAGES];
-	char ff[1024];
-	for ( int32_t i = 0; i < MAX_LANGUAGES; i++ ){
-		ht[i].set ( 8,4,0,NULL,0,false,0,"cud");
-		sprintf ( ff , "%sdict/%s/%s.wl.phonet", g_hostdb.m_dir,
-			  getLanguageAbbr(i), getLanguageAbbr(i) );
-		populateHashTable(ff, &ht[i], i);
-
-		sprintf ( ff , "%sdict/%s/%s.query.phonet.top", g_hostdb.m_dir,
-			  getLanguageAbbr(i), getLanguageAbbr(i) );
-		populateHashTable(ff, &ht[i], i);
-
-		for ( int32_t j = 0; j < NUM_CHARS; j++ ){
-			sprintf ( ff , "%sdict/%s/%s.dict.%"INT32"", g_hostdb.m_dir,
-				  getLanguageAbbr(i), getLanguageAbbr(i), j );
-			populateHashTable(ff, &ht[i], i);
-		}
-	}
-
-	//sprintf ( ff, "%sdict/unifiedDict",g_hostdb.m_dir );
-	sprintf ( ff, "%sunifiedDict.txt",g_hostdb.m_dir );
-	// delete it first
-	unlink ( ff );
-	// then open a new one for appending
-	int fdw = open ( ff , 
-			 O_CREAT | O_RDWR | O_APPEND ,
-			 getFileCreationFlags());
-			 // S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
-	if ( fdw < 0 ){
-		return log("lang: Could not open for %s "
-			   "writing: %s.",ff, strerror(errno));
-	}
-
-	log(LOG_INIT,"spell: Making %s.", ff );
-
-	//HashTableT <uint64_t, int32_t> phrases;
-	HashTableX phrases;
-	phrases.set(8,4,0,NULL,0,false,0,"phud");
-	char buf[1024];
-	for ( int32_t  i = 0; i < MAX_LANGUAGES; i++ ){
-		// get each slot
-		for ( int32_t j = 0; j < ht[i].getNumSlots(); j++ ){
-			uint64_t key = *(uint64_t *)ht[i].getKey(j);
-			if ( key == 0 )
-				continue;
-			// if key is already found
-			int32_t slot = phrases.getSlot(&key);
-			if ( slot != -1 )
-				continue;
-
-			char *tuple = *(char **)ht[i].getValueFromSlot(j);
-
-			// here we print the phrase and the phonet if present
-			// skip the score
-			while ( *tuple != '\t' )
-				tuple++;
-			tuple++;
-			
-			sprintf( buf, "%s", tuple );
-			
-			char *p = buf;
-			p += gbstrlen(buf);
-
-			// if there wasn't a phonet, its from the titleRec.
-			// add another tab
-			bool fromTitleRec = false;
-			if ( strstr (tuple,"\t") == NULL ){
-				*p = '\t';
-				p++;
-				fromTitleRec = true;
-			}
-
-			for ( int32_t k = 0; k < MAX_LANGUAGES; k++ ){
-				slot = ht[k].getSlot(&key);
-				if ( slot == -1 )
-					continue;
-				char *val = *(char **)ht[k].getValueFromSlot(slot);
-				int32_t pop = atoi(val);
-				if ( fromTitleRec ) pop *= -1;
-				sprintf(p,"\t%"INT32"\t%"INT32"",k,pop);
-				p += gbstrlen(p);
-			}
-			// write out the trailing \n as well
-			*p = '\n';
-			p++;
-			*p = '\0';
-			p++;
-			int32_t bufLen = gbstrlen(buf);
-			int32_t wn = write ( fdw , buf , bufLen ) ;
-			if ( wn != bufLen )
-				return log("lang:  write: %s",strerror(errno));
-			int32_t val = 1;
-			phrases.addKey(&key, &val);
-		}
-	}
-	return true;
-}
-

 bool Speller::populateHashTable( char *ff, HashTableX *htable,
 				 unsigned char langId ){
--- a/Speller.h
+++ b/Speller.h
@ -9,11 +9,15 @@
 #ifndef _SPELLER_H_
 #define _SPELLER_H_

+#define MAX_FRAG_SIZE 1024
+
 // max int32_t returned by getPhrasePopularity() function
 #define MAX_PHRASE_POP 16800

 #include "StopWords.h"
-#include "Language.h"
+#include "Query.h"
+#include "Multicast.h"
+
 // . the height and width of m_stable[][] that takes a letter pair as an index
 // . valid chars are returned by isValidChar() routine
 // . we use A-Z, 0-9, space, hyphen, apostrophe and \0... that's it
@ -48,7 +52,7 @@ class StateFrag{
 	char  m_c;
 	bool  m_narrowPhrase;
 	int32_t  m_numNarrowPhrases;
-	char  m_narrowPhrases[MAX_NARROW_SEARCHES][MAX_FRAG_SIZE];
+	//char  m_narrowPhrases[MAX_NARROW_SEARCHES][MAX_FRAG_SIZE];
 };


@ -99,10 +103,10 @@ class Speller {
 	bool findNext( char *s, char *send, char **nextWord, bool *isPorn,
 			unsigned char langId, int32_t encodeType );

-	int32_t checkDict ( char *s, int32_t slen, char encodeType, 
-			 unsigned char lang = langEnglish ){
-		return m_language[lang].checkDict(s,slen,encodeType);
-	}
+//	int32_t checkDict ( char *s, int32_t slen, char encodeType,
+//			 unsigned char lang = langEnglish ){
+//		return m_language[lang].checkDict(s,slen,encodeType);
+//	}

 	// should be same hash algo to make wordId
 	bool isInDict ( uint64_t wordId ) {
@ -137,11 +141,10 @@ class Speller {
 			int32_t  wplen  [MAX_FRAG_SIZE] ,
 			bool *isstop                   );

-	Language m_language[MAX_LANGUAGES];
+//	Language m_language[MAX_LANGUAGES];

 	char *getRandomWord() ;
 	bool loadUnifiedDict();
-	bool createUnifiedDict ();

 	void dictLookupTest ( char *ff );

--- a/Summary.cpp
+++ b/Summary.cpp
@ -3,6 +3,7 @@
 #include "Words.h"
 //#include "AppendingWordsWindow.h"
 #include "Sections.h"
+#include "Msg20.h"

 Summary::Summary()
            : m_summaryLocs(m_summaryLocBuf, 
--- a/XmlDoc.cpp
+++ b/XmlDoc.cpp
@ -7266,118 +7266,6 @@ char XmlDoc::computeLangId ( Sections *sections , Words *words, char *lv ) {
 	}

 	return maxi;
-	//m_langId = maxi;
-	//m_langIdValid = true;
-	//return &m_langId;
-
-	/*
-	int32_t    freqScore = 0;
-	int32_t lang;
-	if ( ! m_processedLang ) {
-		// do not repeat this call for this document
-		m_processedLang = true;
-		lang = words->getLanguage( sections ,
-					   1000 , // sampleSize ,
-					   m_niceness, 
-					   &freqScore);
-		// return NULL on error with g_errno set
-		if ( lang == -1 ) return NULL;
-		// we got it from words, return
-		if ( lang != 0 ) {
-			m_langId = lang;
-			m_langIdValid = true;
-			return &m_langId;
-		}
-	}
-
-	m_langId = 0;
-	// try from charset
-	uint16_t *charset = getCharset ( );
-	if ( ! charset || charset == (uint16_t *)-1 )return (uint8_t *)charset;
-	// do based on charset
-	if ( *charset == csGB18030 ) m_langId = langChineseTrad;
-	if ( *charset == csGBK     ) m_langId = langChineseSimp;
-
-	if ( m_langId ) {
-		m_langIdValid = true;
-		return &m_langId;
-	}
-
-	// are we a root?
-	char *isRoot = getIsSiteRoot();
-	if ( ! isRoot || isRoot == (char *)-1 ) return (uint8_t *)isRoot;
-	// this lookup here might be unnecessary
-	uint8_t *rl = NULL;
-	if ( ! *isRoot ) {
-		rl = getRootLangId();
-		if ( ! rl || rl == (void *)-1 ) return (uint8_t *)rl;
-	}
-
-	//Url *u = getCurrentUrl();
-	Url *u = getFirstUrl();
-	uint8_t gs[METHOD_CAP];
-	// reset language method vector
-	memset( gs , 0, sizeof(uint8_t) * METHOD_CAP );
-	// Let the site tell us what language it's in
-	gs [METHOD_TAG] = g_langId.guessLanguageFromTag( xml );
-	// Guess from the FIRST URL (unredirected url)
-	gs [METHOD_URL] = g_langId.guessLanguageFromUrl( u->getUrl() );
-	// Guess from the outlinks
-	gs [METHOD_OUTLINKS] = g_langId.guessLanguageFromOutlinks( links );
-	// Guess from the inlinks
-	gs [METHOD_INLINKS] = g_langId.guessLanguageFromInlinks(info1, *ip);
-	// root page's language, if there was one
-	if ( ! *isRoot ) gs [METHOD_ROOT] = *rl;
-
-	int32_t scores[MAX_LANGUAGES];
-	memset( scores, 0, sizeof(int32_t) * MAX_LANGUAGES );
-	// weights for the 10 methods
-	char cw[] = { 8,9,4,7,6,7,8,1,2};
-	// add up weighted scores
-	for(int i = 0; i < METHOD_CAP; i++ ) 
-		scores[gs[i]] += cw[i];
-
-	// reset the "lang" to langUnknown which is 0
-	lang = langUnknown ;
-	int max, oldmax;
-	max = oldmax = 0;
-	// find best language
-	for ( int32_t i = MAX_LANGUAGES  - 1; i > 0 ; i-- ) {
-		if ( scores[i] < max) continue;
-		oldmax = max;
-		max = scores[i];
-		lang = i;
-	}
-	// give up if not too conclusive
-	if( (max - oldmax) < 3 ) { // cr->m_languageThreshold) {
-		//log(LOG_DEBUG, "build: Language: Threshold, score "
-		//    "(%"INT32" - %"INT32") %"INT32" vs. %"INT32".\n",
-		//    (int32_t)max, 
-		//    (int32_t)oldmax, 
-		//    (int32_t)max - oldmax, 
-		//    (int32_t)3);//(int32_t)cr->m_languageThreshold);
-		lang = langUnknown;
-	}
-	// Make sure we're over the bailout value, this
-	// keeps low scoring methods like TLD from being
-	// the decider if it was the only successful method.
-	if ( max < 5 ) { // cr->m_languageBailout ) {
-		//log(LOG_DEBUG, "build: Language: Bailout, "
-		//    "score %"INT32" vs. %"INT32".",
-		//    (int32_t)max, (int32_t)5);//cr->m_languageBailout);
-		lang = langUnknown;
-	}
-	// If the language is still not known,
-	// use the language detected from the frames.
-	//if(lang == langUnknown) lang = frameFoundLang;
-	// . try dmoz if still unknown
-	// . limit to 10 of them
-	// all done, do not repeat
-	m_langIdValid = true;
-	m_langId      = lang;
-	m_langIdScore = max;
-	return &m_langId;
-	*/
 }