value_type;
typedef setstring,lessstring,allocator::difference_type
diff_type;
setstring,lessstring,allocator exclusion_set;
ifstream infile( "exclusion_set" );
if ( !infile )
{
static string default_excluded_words[25] = {
"the","and","but","that","then","are","been",
"can","can't","cannot","could","did","for",
"had","have","him","his","her","its"."into",
"were","which","when","with","would"
};
cerr "warning! unable to open word exclusion file! -- "
"using default set\n";
copy( default_excluded_words,
default_excluded_words+25,
inserter(exclusion_set, exclusion_set.begin()));
}
else {
istream_iterator string, diff_type
input_set( infile ), eos;
copy( input_set, eos,
inserter( exclusion_set, exclusion_set.begin() ));
}
// пробежимся по всем словам, вставляя пары
vectorstring,allocator *text_words =
text_locations-first;
vectorlocation,allocator *text.locs =
text_locations-second;
register int elem_cnt = text_words-size();
for ( int ix = 0; ix elem_cnt; ++-ix )
{
string textword = ( *text_words )[ ix ];
if ( textword.size() 3 ||
exclusion_set.count( textword ))
continue;
if ( ! word_map-count((*text_words)[ix] ))
{ // слово отсутствует, добавим:
loc *ploc = new vectorlocation,allocator;
ploc-push_back( (*text_locs)[ix] );
word_map-insert( value_type( (*text_words)[ix],ploc ));
}
else (*word_map) [(*text_words) [ix]]-
push_back( (*text_locs) [ix] );
}
}
void
TextQuery::
query_text()
{
string query_text;
do {
cout
"enter a word against which to search the text.\n"
"to quit, enter a single character == ";
cin query_text;
if ( query_text.size() 2 ) break;
string caps( "ABCDEFGHIJKLMNOPQRSTUVWXYZ" );
string::size_type pos = 0;
while (( pos = query_text.find_first_of( caps, pos ))
!= string::npos )
query_text[ pos ] = to1ower( query_text[pos] );
// query_text должно быть введено
if ( !word_map-count( query_text )) {
cout "\nSorry. There are no entries for "
query_text ".\n\n";
continue;
}
loc *ploc = (*word_map) [ query_text ];
setshort,lessshort,allocator occurrence_1i nes;
loc::iterator liter = ploc-begin(),
liter_end = ploc-end();
while ( liter != liter_end ) {
occurrence_lines.1nsert(
occurrence_lines.end(), (*liter).first);
++liter;
}
register int size = occurrence_lines.size();
cout "\n" query_text
" occurs " size
(size == 1 ? " time:" : " times:")
"\n\n";
setshort,lessshort,allocator::iterator
it=occurrence_lines.begin();
for ( ; it != occurrence_"lines.end(); ++it ) {
int line = *it;
cout "\t( line "
// будем нумеровать строки с 1,
// как это принято везде
line + 1 " ) "
(*lines_of_text)[line] endl;
}
cout endl;
}
while ( ! query_text.empty() );
cout "Ok, bye!\n";
}
void
TextQuery::
display_map_text()
{
typedef mapstring,loc*, lessstring, allocator map_text;
map_text::iterator iter = word_map-begin(),
iter_end = word_map-end();
while ( iter != iter_end ) {
cout "word: " (*iter).first " (";
int loc_cnt = 0;
loc *text_locs = (*iter).second;
loc::iterator liter = text_locs-begin(),
liter_end = text_locs-end();
while ( liter != liter_end )
{
if ( loc_cnt )
cout ",";
else ++loc_cnt;
cout "(" (*liter).first
"," (*liter).second ")";
++"liter;
}
cout ")\n";
++iter;
}
cout endl;
}
void
TextQuery::
disp1ay_text_locations()
{
vectorstring,allocator *text_words =
text_locations-first;
vectorlocation,allocator *text_locs =
text_locations-second;
register int elem_cnt = text_words-size();
if ( elem_cnt != text_locs-size() )
{
cerr
"oops! internal error: word and position vectors "
"are of unequal size\n"
"words: " elem_cnt " "
"locs: " text_locs-size()
" -- bailing out!\n";
exit( -2 );
}
for ( int ix=0; ix elem_cnt; ix++ )
{
cout "word: " (*text_words)[ ix ] "\t"
"location: ("
(*text_locs)[ix].first ","
(*text.locs)[ix].second ")"
"\n";
}
cout endl;
}
Упражнение 6.25
Объясните, почему нам потребовался специальный класс inserter для заполнения набора стоп-слов (это упоминается в разделе 6.13.1, а детально рассматривается в 12.4.1).
setstring exclusion_set;
ifstream infile( "exclusion_set" );
copy( default_excluded_words, default_excluded_words+25,
inserter(exclusion_set, exclusion_set.begin() ));
Первоначальная реализация поисковой системы отражает процедурный подход: набор глобальных функций оперирует набором независимых структур данных. Окончательный вариант представляет собой альтернативный подход, когда мы инкапсулируем функции и данные в класс TextQuery. Сравните оба способа. Каковы недостатки и преимущества каждого?