Выбрать главу

typedef pairshort,short location;

typedef vectorlocation,allocator loc;

typedef vectorstring,allocator text;

typedef pairtext*,loc* text_loc;

class TextQuery {

public:

TextQuery() { memset( this, 0, sizeof( TextQuery )); }

static void

filter_elements( string felems ) { filt_elems = felems; }

void query_text();

void display_map_text();

void display_text_locations();

void doit() {

retrieve_text();

separate_words();

filter_text();

suffix_text();

strip_caps();

build_word_map();

}

private:

void retrieve_text();

void separate_words():

void filter_text();

void strip_caps();

void suffix_textQ;

void suffix_s( string );

void build_word_map();

private:

vectorstring,allocator *lines_of_text;

text_loc *text_locations;

map string,loc*,

lessstring,allocator *word_map;

static string filt_elems;

};

string TextQuery::filt_elems( "\", •;: !?)(\V" );

int main()

{

TextQuery tq;

tq.doit();

tq.query_text();

tq.display_map_text();

}

void

TextQuery::

retrieve_text()

{

string file_name;

cout "please enter file name: ";

cin file_name;

ifstream infile( file_name.c_str(), ios::in );

if ( !infile ) {

cerr "oops' unable to open file "

file_name " -- bailing out!\n";

exit( -1 );

}

else cout "\n";

lines_of_text = new vectorstring,allocator;

string textline;

while ( getline( infile, textline, '\n' ))

lines_of_text-push_back( textline );

}

void

TextQuery::

separate_words()

{

vectorstring,allocator *words =

new vectorstring,allocator;

vectorlocation,allocator *locations =

new vectorlocation,allocator;

for ( short line_pos = 0; line_pos lines_of_text-size();

line_pos++ )

{

short word_pos = 0;

string textline = (*lines_of_text)[ line_pos ];

string::size_type eol = textline.1ength();

string::size_type pos = 0, prev_pos = 0;

while (( pos = textline.find_first_of( ' ', pos ))

!= string::npos )

{

words-push_back(

textline.substr( prev_pos, pos - prev_pos ));

locations-push_back(

make_pair( line_pos, word_pos ));

word_pos++; pos++; prev_pos = pos;

}

words-push_back(

textline.substr( prev_pos, pos - prev_pos ));

locations-push_back(make_pair(line_pos,word_pos));

}

text_locations = new text_loc( words, locations );

}

void

TextQuery::

filter_text()

{

if ( filt_elems.empty() )

return;

vectorstring,allocator *words = text_locations-first;

vectorstring,allocator::iterator iter = words-begin();

vectorstring,allocator::iterator iter_end = words-end();

while ( iter != iter_end )

{

string::size_type pos = 0;

while ((pos = (*iter).find_first_of(filt_elems, pos))

!= string::npos )

(*iter).erase(pos,l);

++iter;

}

}

void

TextQuery::

suffix_text()

{

vectorstring,allocator *words = text_locations-first;

vectorstring,allocator::iterator iter = words-begin();

vectorstring,allocator::iterator iter_end = words-end() ;

while ( iter != iter_end ) {

if ( (*iter).size() = 3 )

{ iter++; continue; }

if ( (*iter)[ (*iter).size()-l ] == 's' )

suffix_s( *iter );

// дополнительная обработка суффиксов...

iter++;

}

}

void

TextQuery::

suffix_s( string word )

{

string::size_type spos = 0;

string::size_type pos3 = word.size()-3;

// "ous", "ss", "is", "ius"

string suffixes( "oussisius" );

if ( ! word.compare( pos3, 3, suffixes, spos, 3 ) ||

! word.compare( pos3, 3, suffixes, spos+6, 3) ||

! word.compare( pos3+l, 2, suffixes, spos+2, 2 ) ||

! word.compare( pos3+l, 2, suffixes, spos+4, 2 ))

return;

string ies( "ies" );

if ( ! word.compare( pos3, 3, ies ))

{

word.replace( pos3, 3, 1, 'у' );

return;

}

string ses( "ses" );

if ( ! word.compare( pos3, 3, ses ))

{

word.erase( pos3+l, 2 );

return;

}

// удалим 's' в конце

word.erase( pos3+2 );

// удалим "'s"

if ( word[ pos3+l ] == '\'' )

word.erase( pos3+l );

}

void

TextQuery::

strip_caps()

{

vectorstring,allocator *words = text_locations-first;

vectorstring,allocator::iterator iter = words-begin();

vectorstring,allocator::iterator iter_end = words-end();

string caps( "ABCDEFGHI3KLMNOPQRSTUVWXYZ" );

while ( iter != iter_end ) {

string::size_type pos = 0;

while (( pos = (*iter).find_first_of( caps, pos ))

!= string::npos )

(*iter)[ pos ] = to1ower( (*iter)[pos] );

++iter;

}

}

void

TextQuery::

build_word_map()

{

word_map = new mapstring,loc*,lessstring,allocator;

typedef mapstring,loc*,lessstring,allocator::value_type