530 lines
16 KiB
C++
530 lines
16 KiB
C++
#include "sto.h"
|
|
#include <sys/mman.h>
|
|
#include <fcntl.h>
|
|
#include <unistd.h>
|
|
#include <sys/stat.h>
|
|
#include <string.h>
|
|
#include <algorithm>
|
|
|
|
|
|
//static const char version_1_signature[80] = "parsed-sto-v2\n";
|
|
static const char version_2_signature[80] = "parsed-sto-v2\n";
|
|
|
|
std::vector<const sto::WordForm *> sto::LexicalEntry::query_all_explicit_word_forms() const {
|
|
std::vector<const WordForm*> entries;
|
|
const char *p = reinterpret_cast<const char*>(query_first_explicit_word_form());
|
|
for(unsigned i=0; i<explicit_word_form_count; i++) {
|
|
const WordForm *e = reinterpret_cast<const WordForm*>(p);
|
|
entries.push_back(e);
|
|
p += e->size();
|
|
}
|
|
return entries;
|
|
}
|
|
|
|
|
|
const sto::WordForm *sto::LexicalEntry::find_first_wordform(const std::string &word) const {
|
|
const char *p = reinterpret_cast<const char*>(query_first_explicit_word_form());
|
|
for(unsigned i=0; i<explicit_word_form_count; i++) {
|
|
const WordForm *e = reinterpret_cast<const WordForm*>(p);
|
|
if(e->written_form_length==word.length() &&
|
|
memcmp(e->written_form,word.data(),e->written_form_length)==0)
|
|
return e;
|
|
p += e->size();
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
|
|
//Find the base form of the lexical entry. That means:
|
|
// verbs: infinitive mood, active voice
|
|
// nouns: indefinite singular nominative
|
|
// adjectives: positive, common gender
|
|
// other: <null>
|
|
const sto::WordForm *sto::LexicalEntry::find_base_wordform() const {
|
|
const char *p = reinterpret_cast<const char*>(query_first_explicit_word_form());
|
|
for(unsigned i=0; i<explicit_word_form_count; i++) {
|
|
const WordForm *e = reinterpret_cast<const WordForm*>(p);
|
|
switch(part_of_speech) {
|
|
case part_of_speech_t::deponentVerb:
|
|
case part_of_speech_t::mainVerb: {
|
|
if(e->has_attribute(word_form_attribute_t::verbFormMood_infinitive) &&
|
|
e->has_attribute(word_form_attribute_t::voice_activeVoice))
|
|
return e;
|
|
break;
|
|
}
|
|
case part_of_speech_t::commonNoun:
|
|
case part_of_speech_t::properNoun: {
|
|
if((e->has_attribute(word_form_attribute_t::definiteness_indefinite) || e->has_attribute(word_form_attribute_t::definiteness_unspecified)) &&
|
|
(e->has_attribute(word_form_attribute_t::grammaticalNumber_singular) || e->has_attribute(word_form_attribute_t::grammaticalNumber_unspecified)) &&
|
|
(e->has_attribute(word_form_attribute_t::case_unspecified) || e->has_attribute(word_form_attribute_t::case_nominativeCase)))
|
|
return e;
|
|
break;
|
|
}
|
|
default:
|
|
return NULL;
|
|
}
|
|
p += e->size();
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
|
|
bool sto::Lexicon::load(const std::string &filename) {
|
|
unload();
|
|
|
|
int fd = open(filename.c_str(), O_RDONLY);
|
|
if(fd<0)
|
|
return false;
|
|
|
|
struct stat st;
|
|
if(fstat(fd,&st)!=0) {
|
|
::close(fd);
|
|
return false;
|
|
}
|
|
if((size_t)st.st_size<sizeof(version_2_signature)) {
|
|
::close(fd);
|
|
return false;
|
|
}
|
|
|
|
mapped_memory_start = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
|
|
if(mapped_memory_start==MAP_FAILED) {
|
|
::close(fd);
|
|
return false;
|
|
}
|
|
::close(fd);
|
|
|
|
mapped_memory_size = st.st_size;
|
|
|
|
(void)madvise(mapped_memory_start, mapped_memory_size, MADV_WILLNEED);
|
|
|
|
if(memcmp(mapped_memory_start,version_2_signature,sizeof(version_2_signature))!=0) {
|
|
unload();
|
|
return false;
|
|
}
|
|
|
|
//parse and index the entries
|
|
//see sto_structure.txt for details
|
|
size_t estimated_entries = mapped_memory_size/171;
|
|
size_t entries_to_reserve = (size_t)(estimated_entries*1.25);
|
|
entries.reserve(entries_to_reserve);
|
|
morphological_unit_id_entries.reserve(entries_to_reserve);
|
|
const char *start = reinterpret_cast<const char*>(mapped_memory_start);
|
|
const char *end = start + mapped_memory_size;
|
|
const char *p = start + sizeof(version_2_signature);
|
|
while(p<end) {
|
|
const LexicalEntry *le = reinterpret_cast<const LexicalEntry*>(p);
|
|
p = reinterpret_cast<const char*>(le->query_first_explicit_word_form());
|
|
for(unsigned i=0; i<le->explicit_word_form_count; i++) {
|
|
const WordForm *wf = reinterpret_cast<const WordForm*>(p);
|
|
const char *p2 = p+wf->size();
|
|
if(p2>end)
|
|
return false;
|
|
|
|
entries.emplace_back(wf->written_form,wf->written_form_length,le);
|
|
p = p2;
|
|
}
|
|
morphological_unit_id_entries.emplace_back(le->query_morphological_unit_id(),le->morphological_unit_id_len,le);
|
|
}
|
|
|
|
sort(entries);
|
|
sort(morphological_unit_id_entries);
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
bool sto::Lexicon::MapEntry::compare(const MapEntry &me1, const MapEntry &me2) {
|
|
if(me1.length<me2.length) {
|
|
int r = memcmp(me1.str,me2.str,me1.length);
|
|
return r<=0;
|
|
} else if(me1.length>me2.length) {
|
|
int r = memcmp(me1.str,me2.str,me2.length);
|
|
return r<0;
|
|
} else {
|
|
return memcmp(me1.str,me2.str,me1.length)<0;
|
|
}
|
|
}
|
|
|
|
void sto::Lexicon::sort(std::vector<MapEntry> &v) {
|
|
std::sort(v.begin(),v.end(),MapEntry::compare);
|
|
}
|
|
|
|
|
|
void sto::Lexicon::unload() {
|
|
if(mapped_memory_size!=0) {
|
|
(void)munmap(mapped_memory_start,mapped_memory_size);
|
|
mapped_memory_start = NULL;
|
|
mapped_memory_size = 0;
|
|
}
|
|
entries.clear();
|
|
morphological_unit_id_entries.clear();
|
|
}
|
|
|
|
|
|
|
|
const sto::LexicalEntry *sto::Lexicon::lookup(const std::string &word) const {
|
|
MapEntry me_word(word.data(),word.length(),0);
|
|
auto iter = std::lower_bound(entries.begin(),entries.end(),me_word,MapEntry::compare);
|
|
if(iter!=entries.end() && iter->length==word.length() && memcmp(iter->str,word.data(),iter->length)==0)
|
|
return iter->entry;
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
|
|
std::vector<const sto::LexicalEntry *> sto::Lexicon::query_matches(const std::string &word) const {
|
|
MapEntry me_word(word.data(),word.length(),0);
|
|
auto range = std::equal_range(entries.begin(),entries.end(), me_word, MapEntry::compare);
|
|
std::vector<const LexicalEntry *> entries;
|
|
for(auto iter=range.first; iter!=range.second; ++iter)
|
|
entries.push_back(iter->entry);
|
|
return entries;
|
|
}
|
|
|
|
|
|
|
|
const sto::LexicalEntry *sto::Lexicon::first_entry() const {
|
|
const char *start = reinterpret_cast<const char*>(mapped_memory_start);
|
|
const char *p = start + sizeof(version_2_signature);
|
|
return reinterpret_cast<const LexicalEntry*>(p);
|
|
}
|
|
|
|
|
|
const sto::LexicalEntry *sto::Lexicon::next_entry(const LexicalEntry *le) const {
|
|
const char *p = reinterpret_cast<const char*>(le);
|
|
const char *start = reinterpret_cast<const char*>(mapped_memory_start);
|
|
const char *end = start + mapped_memory_size;
|
|
if(p<start || p>=end)
|
|
return NULL;
|
|
p = reinterpret_cast<const char*>(le->query_first_explicit_word_form());
|
|
for(unsigned i=0; i<le->explicit_word_form_count; i++) {
|
|
const WordForm *wf = reinterpret_cast<const WordForm*>(p);
|
|
const char *p2 = p+wf->size();
|
|
if(p2>end)
|
|
return NULL;
|
|
p = p2;
|
|
}
|
|
if(p<end)
|
|
return reinterpret_cast<const LexicalEntry*>(p);
|
|
else
|
|
return NULL;
|
|
}
|
|
|
|
|
|
std::vector<const sto::LexicalEntry *> sto::Lexicon::query_lexical_entries_with_same_morphological_unit_id(const sto::LexicalEntry *le) const {
|
|
MapEntry me_word(le->query_morphological_unit_id(),le->morphological_unit_id_len,0);
|
|
std::vector<const sto::LexicalEntry *> v;
|
|
auto range = std::equal_range(morphological_unit_id_entries.begin(), morphological_unit_id_entries.end(), me_word, MapEntry::compare);
|
|
for(auto iter=range.first; iter!=range.second; ++iter) {
|
|
v.push_back(iter->entry);
|
|
}
|
|
return v;
|
|
}
|
|
|
|
|
|
#ifdef UNITTEST
|
|
#include <assert.h>
|
|
#include <stdio.h>
|
|
|
|
using namespace sto;
|
|
|
|
int main(void) {
|
|
//plain ctor
|
|
{
|
|
Lexicon l;
|
|
assert(l.lookup("foo")==NULL);
|
|
auto v(l.query_matches("foo"));
|
|
assert(v.empty());
|
|
}
|
|
|
|
//nonexisting file
|
|
{
|
|
::unlink("sto.unittest");
|
|
Lexicon l;
|
|
assert(!l.load("sto.unittest"));
|
|
}
|
|
|
|
//empty file
|
|
{
|
|
int fd = open("sto.unittest",O_WRONLY|O_CREAT|O_TRUNC,0666);
|
|
close(fd);
|
|
Lexicon l;
|
|
assert(!l.load("sto.unittest"));
|
|
}
|
|
|
|
//file with wrong signature
|
|
{
|
|
int fd = open("sto.unittest",O_WRONLY|O_CREAT|O_TRUNC,0666);
|
|
write(fd,"hello world",11);
|
|
for(int i=0; i<10; i++)
|
|
write(fd,"0123456789abcdef",16);
|
|
close(fd);
|
|
Lexicon l;
|
|
assert(!l.load("sto.unittest"));
|
|
}
|
|
|
|
//file with just the signature
|
|
{
|
|
int fd = open("sto.unittest",O_WRONLY|O_CREAT|O_TRUNC,0666);
|
|
write(fd,version_2_signature,sizeof(version_2_signature));
|
|
close(fd);
|
|
Lexicon l;
|
|
assert(l.load("sto.unittest"));
|
|
assert(l.lookup("foo")==NULL);
|
|
}
|
|
|
|
//file with one lexical entry
|
|
//0: foo foos
|
|
{
|
|
int fd = open("sto.unittest",O_WRONLY|O_CREAT|O_TRUNC,0666);
|
|
char tmp[16];
|
|
write(fd,version_2_signature,sizeof(version_2_signature));
|
|
//le#0
|
|
tmp[0] = (char)part_of_speech_t::commonNoun;
|
|
write(fd, tmp, 1);
|
|
tmp[0] = (char)word_form_type_t::wordFormsExplicit;
|
|
write(fd, tmp, 1);
|
|
write(fd, "\006",1); //morph-unit-id len
|
|
write(fd,"\002",1); //wordforms
|
|
write(fd, "morph1",6); //morph-unit-id
|
|
//le#0:wf#0
|
|
tmp[0]=tmp[1]=tmp[2]=tmp[3]=tmp[4]=tmp[5] = (char)word_form_attribute_t::none;
|
|
tmp[0]=(char)word_form_attribute_t::degree_positive;
|
|
write(fd,tmp,6);
|
|
write(fd,"\003foo",4);
|
|
//le#0:wf#1
|
|
tmp[0]=tmp[1]=tmp[2]=tmp[3]=tmp[4]=tmp[5] = (char)word_form_attribute_t::none;
|
|
tmp[0]=(char)word_form_attribute_t::case_nominativeCase;
|
|
write(fd,tmp,6);
|
|
write(fd,"\004foos",5);
|
|
close(fd);
|
|
Lexicon l;
|
|
assert(l.load("sto.unittest"));
|
|
assert(l.lookup("foo")!=NULL);
|
|
assert(l.lookup("foos")!=NULL);
|
|
assert(l.lookup("fooz")==NULL);
|
|
auto e0(l.lookup("foo"));
|
|
auto e1(l.lookup("foos"));
|
|
assert(e0==e1);
|
|
assert(e0->part_of_speech==part_of_speech_t::commonNoun);
|
|
auto wf0(e0->find_first_wordform("foo"));
|
|
assert(wf0);
|
|
assert(wf0->has_attribute(word_form_attribute_t::none));
|
|
assert(wf0->has_attribute(word_form_attribute_t::degree_positive));
|
|
assert(!wf0->has_attribute(word_form_attribute_t::person_thirdPerson));
|
|
auto wf1(e1->find_first_wordform("foos"));
|
|
assert(wf1);
|
|
assert(wf1->has_attribute(word_form_attribute_t::none));
|
|
assert(wf1->has_attribute(word_form_attribute_t::case_nominativeCase));
|
|
assert(!wf1->has_attribute(word_form_attribute_t::person_thirdPerson));
|
|
auto wf2(e0->find_first_wordform("xxxx"));
|
|
assert(!wf2);
|
|
}
|
|
|
|
|
|
//file with three lexical entries
|
|
//0: foo foos
|
|
//1: boo boos
|
|
//2: goo foo boo
|
|
{
|
|
int fd = open("sto.unittest",O_WRONLY|O_CREAT|O_TRUNC,0666);
|
|
char tmp[16];
|
|
write(fd,version_2_signature,sizeof(version_2_signature));
|
|
|
|
//le#0
|
|
tmp[0] = (char)part_of_speech_t::commonNoun;
|
|
write(fd, tmp, 1);
|
|
tmp[0] = (char)word_form_type_t::wordFormsExplicit;
|
|
write(fd, tmp, 1);
|
|
write(fd,"\006",1); //morph-unit-id len
|
|
write(fd,"\002",1); //#wordforms
|
|
write(fd, "morph1",6); //morph-unit-id
|
|
//le#0:wf#0
|
|
tmp[0]=tmp[1]=tmp[2]=tmp[3]=tmp[4]=tmp[5] = (char)word_form_attribute_t::none;
|
|
write(fd,tmp,6);
|
|
write(fd,"\003foo",4);
|
|
//le#0:wf#1
|
|
tmp[0]=tmp[1]=tmp[2]=tmp[3]=tmp[4]=tmp[5] = (char)word_form_attribute_t::none;
|
|
tmp[0]=(char)word_form_attribute_t::case_nominativeCase;
|
|
write(fd,tmp,6);
|
|
write(fd,"\004foos",5);
|
|
|
|
//le#1
|
|
tmp[0] = (char)part_of_speech_t::commonNoun;
|
|
write(fd, tmp, 1);
|
|
tmp[0] = (char)word_form_type_t::wordFormsExplicit;
|
|
write(fd, tmp, 1);
|
|
write(fd,"\006",1); //morph-unit-id len
|
|
write(fd,"\002",1); //#wordforms
|
|
write(fd, "morph2",6); //morph-unit-id
|
|
//le#1:wf#0
|
|
tmp[0]=tmp[1]=tmp[2]=tmp[3]=tmp[4]=tmp[5] = (char)word_form_attribute_t::none;
|
|
write(fd,tmp,6);
|
|
write(fd,"\003boo",4);
|
|
//le#1:wf#1
|
|
tmp[0]=tmp[1]=tmp[2]=tmp[3]=tmp[4]=tmp[5] = (char)word_form_attribute_t::none;
|
|
tmp[0]=(char)word_form_attribute_t::case_nominativeCase;
|
|
write(fd,tmp,6);
|
|
write(fd,"\004boos",5);
|
|
|
|
//le#2
|
|
tmp[0] = (char)part_of_speech_t::commonNoun;
|
|
write(fd, tmp, 1);
|
|
tmp[0] = (char)word_form_type_t::wordFormsExplicit;
|
|
write(fd, tmp, 1);
|
|
write(fd,"\006",1); //morph-unit-id len
|
|
write(fd,"\003",1); //#wordforms
|
|
write(fd, "morph1",6); //morph-unit-id
|
|
//le#2:wf#0
|
|
tmp[0]=tmp[1]=tmp[2]=tmp[3]=tmp[4]=tmp[5] = (char)word_form_attribute_t::none;
|
|
write(fd,tmp,6);
|
|
write(fd,"\003goo",4);
|
|
//le#2:wf#1
|
|
tmp[0]=tmp[1]=tmp[2]=tmp[3]=tmp[4]=tmp[5] = (char)word_form_attribute_t::none;
|
|
tmp[0]=(char)word_form_attribute_t::case_nominativeCase;
|
|
write(fd,tmp,6);
|
|
write(fd,"\003foo",4);
|
|
//le#2:wf#2
|
|
tmp[0]=tmp[1]=tmp[2]=tmp[3]=tmp[4]=tmp[5] = (char)word_form_attribute_t::none;
|
|
tmp[0]=(char)word_form_attribute_t::case_nominativeCase;
|
|
write(fd,tmp,6);
|
|
write(fd,"\003boo",4);
|
|
|
|
close(fd);
|
|
|
|
Lexicon l;
|
|
assert(l.load("sto.unittest"));
|
|
assert(l.lookup("foo")!=NULL);
|
|
assert(l.lookup("foos")!=NULL);
|
|
assert(l.lookup("boo")!=NULL);
|
|
assert(l.lookup("foos")!=NULL);
|
|
assert(l.lookup("goo")!=NULL);
|
|
|
|
auto v0(l.query_matches("foo"));
|
|
assert(v0.size()==2);
|
|
auto v1(l.query_matches("foos"));
|
|
assert(v1.size()==1);
|
|
auto v2(l.query_matches("boo"));
|
|
assert(v2.size()==2);
|
|
auto v3(l.query_matches("boos"));
|
|
assert(v3.size()==1);
|
|
auto v4(l.query_matches("goo"));
|
|
assert(v4.size()==1);
|
|
|
|
assert(v0[0]==v1[0] || v0[1]==v1[0]);
|
|
|
|
auto m0 = l.query_lexical_entries_with_same_morphological_unit_id(l.lookup("foos"));
|
|
assert(m0.size()==2);
|
|
assert(m0[0]!=m0[1]);
|
|
|
|
auto m1 = l.query_lexical_entries_with_same_morphological_unit_id(l.lookup("boos"));
|
|
assert(m1.size()==1);
|
|
assert(m1[0]==l.lookup("boos"));
|
|
}
|
|
|
|
//file with three entries, for testing LexicalEntry::find_base_wordform()
|
|
// verb: aaa1(imperative mood, active voice), aaa2(indicative mood, passive voice), aaa2(indicative mood, active voice)
|
|
// verb: bbb1(imperative mood, active voice), bbb2(indicative mood, passive voice)
|
|
// noun: ccc1(definite, singular, unspecified case), ccc1(indefinite, singular, unspecified case)
|
|
{
|
|
int fd = open("sto.unittest",O_WRONLY|O_CREAT|O_TRUNC,0666);
|
|
char tmp[16];
|
|
write(fd,version_2_signature,sizeof(version_2_signature));
|
|
|
|
//le#0
|
|
tmp[0] = (char)part_of_speech_t::mainVerb;
|
|
write(fd, tmp, 1);
|
|
tmp[0] = (char)word_form_type_t::wordFormsExplicit;
|
|
write(fd, tmp, 1);
|
|
write(fd,"\003",1); //morph-unit-id len
|
|
write(fd,"\003",1); //#wordforms
|
|
write(fd, "aaa",3); //morph-unit-id
|
|
//le#0:wf#0
|
|
tmp[0]=tmp[1]=tmp[2]=tmp[3]=tmp[4]=tmp[5] = (char)word_form_attribute_t::none;
|
|
tmp[0]=(char)word_form_attribute_t::verbFormMood_imperative;
|
|
tmp[1]=(char)word_form_attribute_t::voice_activeVoice;
|
|
write(fd,tmp,6);
|
|
write(fd,"\004aaa1",5);
|
|
//le#0:wf#1
|
|
tmp[0]=tmp[1]=tmp[2]=tmp[3]=tmp[4]=tmp[5] = (char)word_form_attribute_t::none;
|
|
tmp[0]=(char)word_form_attribute_t::verbFormMood_infinitive;
|
|
tmp[1]=(char)word_form_attribute_t::voice_passiveVoice;
|
|
write(fd,tmp,6);
|
|
write(fd,"\004aaa2",5);
|
|
//le#0:wf#1
|
|
tmp[0]=tmp[1]=tmp[2]=tmp[3]=tmp[4]=tmp[5] = (char)word_form_attribute_t::none;
|
|
tmp[0]=(char)word_form_attribute_t::verbFormMood_infinitive;
|
|
tmp[1]=(char)word_form_attribute_t::voice_activeVoice;
|
|
write(fd,tmp,6);
|
|
write(fd,"\004aaa3",5);
|
|
|
|
//le#1
|
|
tmp[0] = (char)part_of_speech_t::mainVerb;
|
|
write(fd, tmp, 1);
|
|
tmp[0] = (char)word_form_type_t::wordFormsExplicit;
|
|
write(fd, tmp, 1);
|
|
write(fd,"\003",1); //morph-unit-id len
|
|
write(fd,"\002",1); //#wordforms
|
|
write(fd, "bbb",3); //morph-unit-id
|
|
//le#0:wf#0
|
|
tmp[0]=tmp[1]=tmp[2]=tmp[3]=tmp[4]=tmp[5] = (char)word_form_attribute_t::none;
|
|
tmp[0]=(char)word_form_attribute_t::verbFormMood_imperative;
|
|
tmp[1]=(char)word_form_attribute_t::voice_activeVoice;
|
|
write(fd,tmp,6);
|
|
write(fd,"\004bbb1",5);
|
|
//le#0:wf#1
|
|
tmp[0]=tmp[1]=tmp[2]=tmp[3]=tmp[4]=tmp[5] = (char)word_form_attribute_t::none;
|
|
tmp[0]=(char)word_form_attribute_t::verbFormMood_infinitive;
|
|
tmp[1]=(char)word_form_attribute_t::voice_passiveVoice;
|
|
write(fd,tmp,6);
|
|
write(fd,"\004bbb2",5);
|
|
|
|
//le#2
|
|
tmp[0] = (char)part_of_speech_t::commonNoun;
|
|
write(fd, tmp, 1);
|
|
tmp[0] = (char)word_form_type_t::wordFormsExplicit;
|
|
write(fd, tmp, 1);
|
|
write(fd,"\003",1); //morph-unit-id len
|
|
write(fd,"\002",1); //#wordforms
|
|
write(fd, "ccc",3); //morph-unit-id
|
|
//le#0:wf#0
|
|
tmp[0]=tmp[1]=tmp[2]=tmp[3]=tmp[4]=tmp[5] = (char)word_form_attribute_t::none;
|
|
tmp[0]=(char)word_form_attribute_t::definiteness_definite;
|
|
tmp[1]=(char)word_form_attribute_t::grammaticalNumber_singular;
|
|
tmp[2]=(char)word_form_attribute_t::case_unspecified;
|
|
write(fd,tmp,6);
|
|
write(fd,"\004ccc1",5);
|
|
//le#0:wf#1
|
|
tmp[0]=tmp[1]=tmp[2]=tmp[3]=tmp[4]=tmp[5] = (char)word_form_attribute_t::none;
|
|
tmp[0]=(char)word_form_attribute_t::definiteness_indefinite;
|
|
tmp[1]=(char)word_form_attribute_t::grammaticalNumber_singular;
|
|
tmp[2]=(char)word_form_attribute_t::case_unspecified;
|
|
write(fd,tmp,6);
|
|
write(fd,"\004ccc2",5);
|
|
|
|
close(fd);
|
|
|
|
Lexicon l;
|
|
assert(l.load("sto.unittest"));
|
|
assert(l.lookup("aaa1")!=NULL);
|
|
assert(l.lookup("aaa2")!=NULL);
|
|
assert(l.lookup("aaa3")!=NULL);
|
|
|
|
const sto::LexicalEntry *le1 = l.lookup("aaa1");
|
|
const WordForm *wf1 = le1->find_base_wordform();
|
|
assert(wf1);
|
|
assert(std::string(wf1->written_form,wf1->written_form_length)=="aaa3");
|
|
|
|
const sto::LexicalEntry *le2 = l.lookup("bbb1");
|
|
const WordForm *wf2 = le2->find_base_wordform();
|
|
assert(!wf2);
|
|
|
|
const sto::LexicalEntry *le3 = l.lookup("ccc1");
|
|
const WordForm *wf3 = le3->find_base_wordform();
|
|
assert(wf3);
|
|
assert(std::string(wf3->written_form,wf3->written_form_length)=="ccc2");
|
|
}
|
|
|
|
}
|
|
#endif
|