Skip to content

Commit

Permalink
updates to C++17
Browse files Browse the repository at this point in the history
  • Loading branch information
kosloot committed Oct 1, 2024
1 parent 8f26974 commit 375a6af
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 65 deletions.
28 changes: 14 additions & 14 deletions src/TICCL-LDcalc.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -726,8 +726,8 @@ void add_short( ostream& os,
const map<UnicodeString,size_t>& freqMap,
const map<UnicodeString,size_t>& low_freqMap,
int max_ld, size_t threshold ){
for ( const auto& entry : dis_count ){
vector<UnicodeString> parts = TiCC::split_at( entry.first, "~" );
for ( const auto& [word,point] : dis_count ){
vector<UnicodeString> parts = TiCC::split_at( word, "~" );
ld_record rec( parts[0], parts[1],
0, 0,
freqMap, low_freqMap,
Expand All @@ -736,7 +736,7 @@ void add_short( ostream& os,
continue;
}
rec.fill_fields( threshold );
rec.ngram_point = entry.second;
rec.ngram_point = point;
os << rec.toString() << endl;
}
}
Expand Down Expand Up @@ -1191,31 +1191,31 @@ int main( int argc, char **argv ){
add_short( shortf, dis_count, freqMap, low_freqMap, LDvalue, artifreq );
cout << endl << "creating .ambi file: " << ambiFile << endl;
ofstream amb( ambiFile );
for ( const auto& ambi : dis_map ){
amb << ambi.first << "#";
for ( const auto& val : ambi.second ){
for ( const auto& [word,ambi_set] : dis_map ){
amb << word << "#";
for ( const auto& val : ambi_set ){
amb << val << "#";
}
amb << endl;
}
map<UnicodeString,unsigned int> low_ngramcount;
for ( const auto& ng : ngram_count ){
UnicodeString lv = ng.first;
for ( const auto& [word,count] : ngram_count ){
UnicodeString lv = word;
lv.toLower();
low_ngramcount[lv] += ng.second;
low_ngramcount[lv] += count;
}
for ( const auto& it : ngram_count ){
if ( record_store.find( it.first ) != record_store.end() ){
UnicodeString lv = it.first;
for ( const auto& [word,count] : ngram_count ){
if ( record_store.find( word ) != record_store.end() ){
UnicodeString lv = word;
lv.toLower();
assert( low_ngramcount.find( lv ) != low_ngramcount.end() );
record_store.find(it.first)->second.ngram_point += low_ngramcount[lv];
record_store.find(word)->second.ngram_point += low_ngramcount[lv];
}
else {
// Ok, our data seems to be incomplete
// that is not our problem, so ignore
if ( verbose > 2 ){
cerr << "ignoring " << it.first << endl;
cerr << "ignoring " << word << endl;
}
}
}
Expand Down
19 changes: 7 additions & 12 deletions src/TICCL-anahash.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,11 @@ bool do_ngrams = false;

void create_output( ostream& os,
const map<bitType, set<UnicodeString>>& anagrams ){
for ( const auto& it : anagrams ){
bitType val = it.first;
for ( const auto& [val,str_set] : anagrams ){
os << val << "~";
for ( auto const& s : it.second ){
for ( auto const& s : str_set ){
os << s;
if ( &s != &(*it.second.crbegin()) )
if ( s != *str_set.crbegin() )
os << "#";
}
os << endl;
Expand Down Expand Up @@ -167,8 +166,8 @@ map<bitType, set<UnicodeString>>
extract_foci( const map<UnicodeString,bitType>& freq_list,
const map<UChar,bitType>& alphabet ){
map<bitType, set<UnicodeString>> foci;
for ( const auto& it : freq_list ){
UnicodeString word = it.first;
for ( const auto& [val,freq] : freq_list ){
UnicodeString word = val;
bitType h = ticcl::hash( word, alphabet );
if ( do_ngrams ){
vector<UnicodeString> parts = TiCC::split_at( word, separator );
Expand Down Expand Up @@ -201,7 +200,6 @@ extract_foci( const map<UnicodeString,bitType>& freq_list,
}
}
else {
bitType freq = it.second;
if ( freq < artifreq ){
word.toLower();
const auto l_it = freq_list.find(word);
Expand Down Expand Up @@ -390,18 +388,15 @@ int main( int argc, const char *argv[] ){
cout << "generating foci file: " << foci_file_name << " with " << foci.size() << " entries" << endl;
ofstream fos( foci_file_name );
create_output( fos, foci );
// for ( const auto& f : foci ){
// fos << f.first << endl;
// }
}
if ( do_merge ){
cerr << "merge background corpus: " << backfile << endl;
ifstream bs( backfile );
read_backgound( bs, anagrams, merged, alphabet );
string merge_file_name = file_name + ".merged";
ofstream ms( merge_file_name );
for ( const auto& it : merged ){
ms << it.first << "\t" << it.second << endl;
for ( const auto& [word,freq] : merged ){
ms << word << "\t" << freq << endl;
}
cerr << "stored merged corpus in " << merge_file_name << endl;

Expand Down
48 changes: 24 additions & 24 deletions src/TICCL-chain.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -97,22 +97,22 @@ UnicodeString chain_class::top_head( const UnicodeString& candidate ){
}

void chain_class::final_merge(){
for ( auto& it : table ){
if ( !it.second.empty() ){
for ( auto& [word,word_set] : table ){
if ( !word_set.empty() ){
// for all entries that seem to be a 'head'
UnicodeString head = top_head( it.first );
assert( head != it.first );
UnicodeString head = top_head( word );
assert( head != word );
if ( !head.isEmpty() ){
// so it has a higher head
if ( verbosity > 3 ){
cerr << "merge: " << it.first << it.second << " into "
cerr << "merge: " << word << word_set << " into "
<< head << table[head] << endl;
}
for ( const auto& s : it.second ){
for ( const auto& s : word_set ){
table[head].insert( s );
heads[s] = head;
}
it.second.clear();
word_set.clear();
}
}
}
Expand Down Expand Up @@ -251,29 +251,29 @@ bool chain_class::fill( const UnicodeString& line, bool nounk ){
}

void chain_class::debug_info( ostream& db ){
for ( const auto& it : heads ){
db << "head[" << it.first << "]=" << it.second << endl;
for ( const auto& [word,head] : heads ){
db << "head[" << word << "]=" << head << endl;
}
for ( const auto& it : table ){
db << var_freq[it.first] << " " << it.first
<< " " << it.second << endl;
for ( const auto& [word,word_set] : table ){
db << var_freq[word] << " " << word
<< " " << word_set << endl;
}
}

void chain_class::output( const string& out_file ){
ofstream os( out_file );
multimap<size_t, string,std::greater<size_t>> out_map;
for ( const auto& t_it : table ){
for ( const auto& s : t_it.second ){
for ( const auto& [word,word_set] : table ){
for ( const auto& s : word_set ){
stringstream oss;
oss << s << "#" << var_freq[s] << "#" << t_it.first
<< "#" << var_freq[t_it.first];
oss << s << "#" << var_freq[s] << "#" << word
<< "#" << var_freq[word];
if ( cc_vals_present ){
UnicodeString val = w_cc_conf[s+t_it.first];
UnicodeString val = w_cc_conf[s+word];
if ( val.isEmpty() ){
// cerr << "GEEN waarde voor " << s+t_it.first << endl;
// cerr << "GEEN waarde voor " << s+word << endl;
bitType h1 = ticcl::hash(s, alphabet );
bitType h2 = ticcl::hash(t_it.first, alphabet );
bitType h2 = ticcl::hash(word, alphabet );
bitType h_val;
if ( h1 > h2 ){
h_val = h1 - h2;
Expand All @@ -282,13 +282,13 @@ void chain_class::output( const string& out_file ){
h_val = h2 - h1;
}
// cerr << "h_val=" << h_val << endl;
w_cc_conf[s+t_it.first] = TiCC::toUnicodeString(h_val);
// cerr << "nieuwe waarde voor " << s+t_it.first << "=" << w_cc_conf[s+t_it.first] << endl;
w_cc_conf[s+word] = TiCC::toUnicodeString(h_val);
// cerr << "nieuwe waarde voor " << s+word << "=" << w_cc_conf[s+word] << endl;
}
oss << "#" + w_cc_conf[s+t_it.first];
oss << "#" + w_cc_conf[s+word];
}
oss << "#" << ld( t_it.first, s, caseless ) << "#C";
out_map.insert( make_pair( var_freq[t_it.first], oss.str() ) );
oss << "#" << ld( word, s, caseless ) << "#C";
out_map.insert( make_pair( var_freq[word], oss.str() ) );
}
}
for ( const auto& t_it : out_map ){
Expand Down
16 changes: 7 additions & 9 deletions src/TICCL-indexerNT.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -187,13 +187,11 @@ void handle_exp( const experiment& exp,

void output_result( ostream& os,
const map<bitType,set<bitType>>& result ){
for ( auto const& rit : result ){
os << rit.first << "#";
auto it = rit.second.begin();
while ( it != rit.second.end() ){
os << *it;
++it;
if ( it != rit.second.end() ){
for ( auto const& [bt,bt_set] : result ){
os << bt << "#";
for ( const auto& it: bt_set ){
os << it;
if ( it != *bt_set.rbegin() ){
os << ",";
}
}
Expand All @@ -204,8 +202,8 @@ void output_result( ostream& os,

void output_confusions( ostream& csf,
const map<bitType,set<bitType>>& result ){
for ( auto const& rit : result ){
csf << rit.first << "#" << rit.second.size() << endl;
for ( auto const& [bt,bt_set] : result ){
csf << bt << "#" << bt_set.size() << endl;
csf.flush();
}
}
Expand Down
12 changes: 6 additions & 6 deletions src/TICCL-lexclean.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ void create_wf_list( const map<UnicodeString, unsigned int>& wc,
exit(EXIT_FAILURE);
}
map<unsigned int, set<UnicodeString> > wf;
for ( const auto& cit : wc ){
wf[cit.second].insert( cit.first );
for ( const auto& [word,freq] : wc ){
wf[freq].insert(word);
}
unsigned int sum=0;
auto wit = wf.rbegin();
Expand Down Expand Up @@ -88,10 +88,10 @@ void dump_quarantine( const string& filename,
cerr << "failed to create outputfile '" << filename << "'" << endl;
exit(EXIT_FAILURE);
}
for ( const auto& it : qw ){
os << it.first;
if ( it.second > 0 ){
os << "\t" << it.second;
for ( const auto& [word,freq] : qw ){
os << word;
if ( freq > 0 ){
os << "\t" << freq;
}
os << endl;
}
Expand Down

0 comments on commit 375a6af

Please sign in to comment.