// Matt Wells, copyright Jul 2001

#include "gb-include.h"

#include "HashTableX.h"
#include "Process.h"
#include "GbMutex.h"
#include "ScopedLock.h"


class Abbr {
public:
	const char *m_str;
	// MUST it have a word after it????
	char  m_hasWordAfter;
};

// . i shrunk this list a lot
// . see backups for the hold list
static const class Abbr s_abbrs99[] = {
	{"hghway",0},//highway
	{"hway",0},//highway
	{"hwy",0},//highway
	{"ln",0}, // lane
	{"mil",0}, // military
	{"pkway",0}, // parkway
	{"pkwy",0},  // parkway
	{"lp",0}, // Loop
	{"phd",0}, // Loop
	{"demon",0}, // demonstration
	{"alz",0}, // alzheimer's

	{"lang",0}, // language
	{"gr",0}, // grade(s) "xmas concert gr. 1-5"
	{"vars",0}, // varsity
	{"avg",0}, // average
	{"amer",0}, // america

	{"bet",0}, // between 18th and 19th for piratecatradio.com
	{"nr",0}, // near 6th street = nr. 6th street
	{"appt",0},
	{"tel",1},
	{"intl",0},
	{"div",1}, // div. II

	{"int",1}, // Intermediate Dance
	{"beg",1}, // Beginner Dance
	{"adv",1}, // Advanced Dance

	{"feat",1}, // featuring.
	{"tdlr",0}, // toddler
	{"schl",0}, // pre-schl

	// times
	{"am",0}, // unm.edu url puts {"7 am. - 9 am.{" time ranges!
	{"pm",0},
	{"mon",0},
	{"tue",0},
	{"tues",0},
	{"wed",0},
	{"wednes",0},
	{"thu",0},
	{"thur",0},
	{"thurs",0},
	{"fri",0},
	{"sat",0},
	{"sun",0},

	{"Ala",0},
	{"Ariz",0},
	{"Assn",0},
	{"Assoc",0},
	{"asst",0}, // assistant
	{"Atty",0},
	{"Attn",1},
	{"Aug",0},
	{"Ave",0},
	{"Bldg",0},
	{"Bros",0}, // brothers
	{"Blvd",0},
	{"Calif",0},
	{"Capt",1},
	{"Cf",0},
	{"Ch",0},
	{"Co",0},
	{"Col",0},
	{"Colo",0},
	{"Conn",0},
	{"Mfg",0},
	{"Corp",0},
	{"DR",0},
	{"Dec",0},
	{"Dept",0},
	{"Dist",0},
	{"Dr",0},
	{"Drs",0},
	{"Ed",0},
	{"Eq",0},
	{"ext",0}, // extension
	{"FEB",0},
	{"Feb",0},
	{"Fig",0},
	{"Figs",0},
	{"Fla",0},
	{"Ft",1}, // ft. worth texas or feet
	{"Ga",0},
	{"Gen",0},
	{"Gov",0},
	{"HON",0},
	{"Ill",0},
	{"Inc",0},
	{"JR",0},
	{"Jan",0},
	{"Jr",0},
	{"Kan",0},
	//{"Ky",0},
	{"La",0},
	{"Lt",0},
	{"Ltd",0},
	{"MR",1},
	{"MRS",1},
	{"Mar",0},
	{"Mass",0},
	{"Md",0},
	{"Messrs",1},
	{"Mich",0},
	{"Minn",0},
	{"Miss",0},
	{"Mmes",0},
	//{"Mo",0}, no more 2-letter state abbreviations
	{"Mr",1},
	{"Mrs",1},
	{"Ms",1},
	{"Msgr",1},
	{"Mt",1},
	{"NO",0},
	{"No",0},
	{"Nov",0},
	{"Oct",0},
	{"Okla",0},
	{"Op",0},
	{"Ore",0},
	//{"Pa",0},
	{"Pp",0},
	{"Prof",1},
	{"Prop",0},
	{"Rd",0},
	{"Ref",0},
	{"Rep",0},
	{"Reps",0},
	{"Rev",0},
	{"Rte",0},
	{"Sen",0},
	{"Sept",0},
	{"Sr",0},
	{"St",0},
	{"ste",0},
	{"Stat",0},
	{"Supt",0},
	{"Tech",0},
	{"Tex",0},
	{"Va",0},
	{"Vol",0},
	{"Wash",0},
	//{"al",0},
	{"av",0},
	{"ave",0},
	{"ca",0},
	{"cc",0},
	{"chap",0},
	{"cm",0},
	{"cu",0},
	{"dia",0},
	{"dr",0},
	{"eqn",0},
	{"etc",0},
	{"fig",1},
	{"figs",1},
	{"ft",0}, // fort or feet or featuring
	//{"gm",0},
	{"hr",0},
	//{"in",0},
	//{"kc",0},
	{"lb",0},
	{"lbs",0},
	{"mg",0},
	{"ml",0},
	{"mm",0},
	{"mv",0},
	//{"nw",0},
	{"oz",0},
	{"pl",0},
	{"pp",0},
	{"sec",0},
	{"sq",0},
	{"st",0},
	{"vs",1},
	{"yr",0},
	{"yrs",0}, // 3 yrs old
	// middle initials
	{"a",0},
	{"b",0},
	{"c",0},
	{"d",0},
	{"e",0},
	{"f",0},
	{"g",0},
	{"h",0},
	{"i",0},
	{"j",0},
	{"k",0},
	{"l",0},
	{"m",0},
	{"n",0},
	{"o",0},
	{"p",0},
	{"q",0},
	{"r",0},
	{"s",0},
	{"t",0},
	{"u",0},
	{"v",1}, // versus
	{"w",0},
	{"x",0},
	{"y",0},
	{"z",0}
};

static HashTableX s_abbrTable;
static bool       s_abbrInitialized = false;
static GbMutex s_mtx;

bool isAbbr ( int64_t h , bool *hasWordAfter ) {
	ScopedLock sl(s_mtx);
	if ( ! s_abbrInitialized ) {
		// set up the hash table
		int32_t n = ((int32_t)sizeof(s_abbrs99))/ ((int32_t)sizeof(Abbr));
		if ( ! s_abbrTable.set ( 8,4,n*4, NULL,0,false,"abbrtbl")) {
			log( LOG_ERROR, "build: Could not init abbrev table." );
			return false;
		}
		// now add in all the stop words
		for ( int32_t i = 0 ; i < n ; i++ ) {
			const char      *sw    = s_abbrs99[i].m_str;
			int64_t  swh   = hash64Lower_utf8 ( sw );
			int32_t val = i + 1;
			if ( ! s_abbrTable.addKey (&swh,&val) ) return false;
		}
		s_abbrInitialized = true;
		// test it
		int64_t h = hash64Lower_utf8("St");
		if ( ! s_abbrTable.isInTable(&h) ) { g_process.shutdownAbort(true); }
		int32_t sc = s_abbrTable.getScore(h);
		if ( sc >= n ) { g_process.shutdownAbort(true); }
	} 
	// get from table
	int32_t sc = s_abbrTable.getScore(h);
	if ( sc <= 0 ) return false;
	if ( hasWordAfter ) *hasWordAfter = s_abbrs99[sc-1].m_hasWordAfter;
	return true;
}		


void resetAbbrTable ( ) {
	s_abbrTable.reset();
}