Выбрать главу

value_type;

typedef setstring,lessstring,allocator::difference_type

diff_type;

setstring,lessstring,allocator exclusion_set;

ifstream infile( "exclusion_set" );

if ( !infile )

{

static string default_excluded_words[25] = {

"the","and","but","that","then","are","been",

"can","can't","cannot","could","did","for",

"had","have","him","his","her","its"."into",

"were","which","when","with","would"

};

cerr "warning! unable to open word exclusion file! -- "

"using default set\n";

copy( default_excluded_words,

default_excluded_words+25,

inserter(exclusion_set, exclusion_set.begin()));

}

else {

istream_iterator string, diff_type

input_set( infile ), eos;

copy( input_set, eos,

inserter( exclusion_set, exclusion_set.begin() ));

}

// пробежимся по всем словам, вставляя пары

vectorstring,allocator *text_words =

text_locations-first;

vectorlocation,allocator *text.locs =

text_locations-second;

register int elem_cnt = text_words-size();

for ( int ix = 0; ix elem_cnt; ++-ix )

{

string textword = ( *text_words )[ ix ];

if ( textword.size() 3 ||

exclusion_set.count( textword ))

continue;

if ( ! word_map-count((*text_words)[ix] ))

{ // слово отсутствует, добавим:

loc *ploc = new vectorlocation,allocator;

ploc-push_back( (*text_locs)[ix] );

word_map-insert( value_type( (*text_words)[ix],ploc ));

}

else (*word_map) [(*text_words) [ix]]-

push_back( (*text_locs) [ix] );

}

}

void

TextQuery::

query_text()

{

string query_text;

do {

cout

"enter a word against which to search the text.\n"

"to quit, enter a single character == ";

cin query_text;

if ( query_text.size() 2 ) break;

string caps( "ABCDEFGHIJKLMNOPQRSTUVWXYZ" );

string::size_type pos = 0;

while (( pos = query_text.find_first_of( caps, pos ))

!= string::npos )

query_text[ pos ] = to1ower( query_text[pos] );

// query_text должно быть введено

if ( !word_map-count( query_text )) {

cout "\nSorry. There are no entries for "

query_text ".\n\n";

continue;

}

loc *ploc = (*word_map) [ query_text ];

setshort,lessshort,allocator occurrence_1i nes;

loc::iterator liter = ploc-begin(),

liter_end = ploc-end();

while ( liter != liter_end ) {

occurrence_lines.1nsert(

occurrence_lines.end(), (*liter).first);

++liter;

}

register int size = occurrence_lines.size();

cout "\n" query_text

" occurs " size

(size == 1 ? " time:" : " times:")

"\n\n";

setshort,lessshort,allocator::iterator

it=occurrence_lines.begin();

for ( ; it != occurrence_"lines.end(); ++it ) {

int line = *it;

cout "\t( line "

// будем нумеровать строки с 1,

// как это принято везде

line + 1 " ) "

(*lines_of_text)[line] endl;

}

cout endl;

}

while ( ! query_text.empty() );

cout "Ok, bye!\n";

}

void

TextQuery::

display_map_text()

{

typedef mapstring,loc*, lessstring, allocator map_text;

map_text::iterator iter = word_map-begin(),

iter_end = word_map-end();

while ( iter != iter_end ) {

cout "word: " (*iter).first " (";

int loc_cnt = 0;

loc *text_locs = (*iter).second;

loc::iterator liter = text_locs-begin(),

liter_end = text_locs-end();

while ( liter != liter_end )

{

if ( loc_cnt )

cout ",";

else ++loc_cnt;

cout "(" (*liter).first

"," (*liter).second ")";

++"liter;

}

cout ")\n";

++iter;

}

cout endl;

}

void

TextQuery::

disp1ay_text_locations()

{

vectorstring,allocator *text_words =

text_locations-first;

vectorlocation,allocator *text_locs =

text_locations-second;

register int elem_cnt = text_words-size();

if ( elem_cnt != text_locs-size() )

{

cerr

"oops! internal error: word and position vectors "

"are of unequal size\n"

"words: " elem_cnt " "

"locs: " text_locs-size()

" -- bailing out!\n";

exit( -2 );

}

for ( int ix=0; ix elem_cnt; ix++ )

{

cout "word: " (*text_words)[ ix ] "\t"

"location: ("

(*text_locs)[ix].first ","

(*text.locs)[ix].second ")"

"\n";

}

cout endl;

}

Упражнение 6.25

Объясните, почему нам потребовался специальный класс inserter для заполнения набора стоп-слов (это упоминается в разделе 6.13.1, а детально рассматривается в 12.4.1).

setstring exclusion_set;

ifstream infile( "exclusion_set" );

copy( default_excluded_words, default_excluded_words+25,

inserter(exclusion_set, exclusion_set.begin() ));

Упражнение 6.26

Первоначальная реализация поисковой системы отражает процедурный подход: набор глобальных функций оперирует набором независимых структур данных. Окончательный вариант представляет собой альтернативный подход, когда мы инкапсулируем функции и данные в класс TextQuery. Сравните оба способа. Каковы недостатки и преимущества каждого?