updates to C++17

LanguageMachines · Oct 1, 2024 · 375a6af · 375a6af
1 parent 8f26974
commit 375a6af
Show file tree

Hide file tree

Showing 5 changed files with 58 additions and 65 deletions.
diff --git a/src/TICCL-LDcalc.cxx b/src/TICCL-LDcalc.cxx
@@ -726,8 +726,8 @@ void add_short( ostream& os,
 		const map<UnicodeString,size_t>& freqMap,
 		const map<UnicodeString,size_t>& low_freqMap,
 		int max_ld, size_t threshold ){
-  for ( const auto& entry : dis_count ){
-    vector<UnicodeString> parts = TiCC::split_at( entry.first, "~" );
+  for ( const auto& [word,point] : dis_count ){
+    vector<UnicodeString> parts = TiCC::split_at( word, "~" );
     ld_record rec( parts[0], parts[1],
 		   0, 0,
 		   freqMap, low_freqMap,
@@ -736,7 +736,7 @@ void add_short( ostream& os,
       continue;
     }
     rec.fill_fields( threshold );
-    rec.ngram_point = entry.second;
+    rec.ngram_point = point;
     os << rec.toString() << endl;
   }
 }
@@ -1191,31 +1191,31 @@ int main( int argc, char **argv ){
   add_short( shortf, dis_count, freqMap, low_freqMap, LDvalue, artifreq );
   cout << endl << "creating .ambi file: " << ambiFile << endl;
   ofstream amb( ambiFile );
-  for ( const auto& ambi : dis_map ){
-    amb << ambi.first << "#";
-    for ( const auto& val : ambi.second ){
+  for ( const auto& [word,ambi_set] : dis_map ){
+    amb << word << "#";
+    for ( const auto& val : ambi_set ){
       amb << val << "#";
     }
     amb << endl;
   }
   map<UnicodeString,unsigned int> low_ngramcount;
-  for ( const auto& ng : ngram_count ){
-    UnicodeString lv = ng.first;
+  for ( const auto& [word,count] : ngram_count ){
+    UnicodeString lv = word;
     lv.toLower();
-    low_ngramcount[lv] += ng.second;
+    low_ngramcount[lv] += count;
   }
-  for ( const auto& it : ngram_count ){
-    if ( record_store.find( it.first ) != record_store.end() ){
-      UnicodeString lv = it.first;
+  for ( const auto& [word,count] : ngram_count ){
+    if ( record_store.find( word ) != record_store.end() ){
+      UnicodeString lv = word;
       lv.toLower();
       assert( low_ngramcount.find( lv ) != low_ngramcount.end() );
-      record_store.find(it.first)->second.ngram_point += low_ngramcount[lv];
+      record_store.find(word)->second.ngram_point += low_ngramcount[lv];
     }
     else {
       // Ok, our data seems to be incomplete
       // that is not our problem, so ignore
       if ( verbose > 2 ){
-	cerr << "ignoring " << it.first << endl;
+	cerr << "ignoring " << word << endl;
       }
     }
   }

diff --git a/src/TICCL-anahash.cxx b/src/TICCL-anahash.cxx
@@ -53,12 +53,11 @@ bool do_ngrams = false;
 
 void create_output( ostream& os,
 		    const map<bitType, set<UnicodeString>>& anagrams ){
-  for ( const auto& it : anagrams ){
-    bitType val = it.first;
+  for ( const auto& [val,str_set] : anagrams ){
     os << val << "~";
-    for ( auto const&  s : it.second ){
+    for ( auto const&  s : str_set ){
       os << s;
-      if ( &s != &(*it.second.crbegin()) )
+      if ( s != *str_set.crbegin() )
 	os << "#";
     }
     os << endl;
@@ -167,8 +166,8 @@ map<bitType, set<UnicodeString>>
 extract_foci( const map<UnicodeString,bitType>& freq_list,
 	      const map<UChar,bitType>& alphabet ){
   map<bitType, set<UnicodeString>> foci;
-  for ( const auto& it : freq_list ){
-    UnicodeString word = it.first;
+  for ( const auto& [val,freq] : freq_list ){
+    UnicodeString word = val;
     bitType h = ticcl::hash( word, alphabet );
     if ( do_ngrams ){
       vector<UnicodeString> parts = TiCC::split_at( word, separator );
@@ -201,7 +200,6 @@ extract_foci( const map<UnicodeString,bitType>& freq_list,
       }
     }
     else {
-      bitType freq = it.second;
       if ( freq < artifreq ){
 	word.toLower();
 	const auto l_it = freq_list.find(word);
@@ -390,18 +388,15 @@ int main( int argc, const char *argv[] ){
     cout << "generating foci file: " << foci_file_name << " with " << foci.size() << " entries" << endl;
     ofstream fos( foci_file_name );
     create_output( fos, foci );
-    // for ( const auto& f : foci ){
-    //   fos << f.first << endl;
-    // }
   }
   if ( do_merge ){
     cerr << "merge background corpus: " << backfile << endl;
     ifstream bs( backfile );
     read_backgound( bs, anagrams, merged, alphabet );
     string merge_file_name = file_name + ".merged";
     ofstream ms( merge_file_name );
-    for ( const auto& it : merged ){
-      ms << it.first << "\t" << it.second << endl;
+    for ( const auto& [word,freq] : merged ){
+      ms << word << "\t" << freq << endl;
     }
     cerr << "stored merged corpus in " << merge_file_name << endl;
 

diff --git a/src/TICCL-chain.cxx b/src/TICCL-chain.cxx
@@ -97,22 +97,22 @@ UnicodeString chain_class::top_head( const UnicodeString& candidate ){
 }
 
 void chain_class::final_merge(){
-  for ( auto& it : table ){
-    if ( !it.second.empty() ){
+  for ( auto& [word,word_set] : table ){
+    if ( !word_set.empty() ){
       // for all entries that seem to be a 'head'
-      UnicodeString head = top_head( it.first );
-      assert( head != it.first );
+      UnicodeString head = top_head( word );
+      assert( head != word );
       if ( !head.isEmpty() ){
 	// so it has a higher head
 	if ( verbosity > 3 ){
-	  cerr << "merge: " << it.first << it.second << " into "
+	  cerr << "merge: " << word << word_set << " into "
 	       << head << table[head] << endl;
 	}
-	for ( const auto& s : it.second ){
+	for ( const auto& s : word_set ){
 	  table[head].insert( s );
 	  heads[s] = head;
 	}
-	it.second.clear();
+	word_set.clear();
       }
     }
   }
@@ -251,29 +251,29 @@ bool chain_class::fill( const UnicodeString& line, bool nounk ){
 }
 
 void chain_class::debug_info( ostream& db ){
-  for ( const auto& it : heads ){
-    db << "head[" << it.first << "]=" << it.second << endl;
+  for ( const auto& [word,head] : heads ){
+    db << "head[" << word << "]=" << head << endl;
   }
-  for ( const auto& it : table ){
-    db << var_freq[it.first] << " " << it.first
-       << " " << it.second << endl;
+  for ( const auto& [word,word_set] : table ){
+    db << var_freq[word] << " " << word
+       << " " << word_set << endl;
   }
 }
 
 void chain_class::output( const string& out_file ){
   ofstream os( out_file );
   multimap<size_t, string,std::greater<size_t>> out_map;
-  for ( const auto& t_it : table ){
-    for ( const auto& s : t_it.second ){
+  for ( const auto& [word,word_set] : table ){
+    for ( const auto& s : word_set ){
       stringstream oss;
-      oss << s << "#" << var_freq[s] << "#" << t_it.first
-	  << "#" << var_freq[t_it.first];
+      oss << s << "#" << var_freq[s] << "#" << word
+	  << "#" << var_freq[word];
       if ( cc_vals_present ){
-	UnicodeString val = w_cc_conf[s+t_it.first];
+	UnicodeString val = w_cc_conf[s+word];
 	if ( val.isEmpty() ){
-	  //	  cerr << "GEEN waarde voor " << s+t_it.first << endl;
+	  //	  cerr << "GEEN waarde voor " << s+word << endl;
 	  bitType h1 = ticcl::hash(s, alphabet );
-	  bitType h2 = ticcl::hash(t_it.first, alphabet );
+	  bitType h2 = ticcl::hash(word, alphabet );
 	  bitType h_val;
 	  if ( h1 > h2 ){
 	    h_val = h1 - h2;
@@ -282,13 +282,13 @@ void chain_class::output( const string& out_file ){
 	    h_val = h2 - h1;
 	  }
 	  //	  cerr << "h_val=" << h_val << endl;
-	  w_cc_conf[s+t_it.first] = TiCC::toUnicodeString(h_val);
-	  //	  cerr << "nieuwe waarde voor " << s+t_it.first << "=" << w_cc_conf[s+t_it.first] << endl;
+	  w_cc_conf[s+word] = TiCC::toUnicodeString(h_val);
+	  //	  cerr << "nieuwe waarde voor " << s+word << "=" << w_cc_conf[s+word] << endl;
 	}
-	oss << "#" + w_cc_conf[s+t_it.first];
+	oss << "#" + w_cc_conf[s+word];
       }
-      oss << "#" << ld( t_it.first, s, caseless ) << "#C";
-      out_map.insert( make_pair( var_freq[t_it.first], oss.str() ) );
+      oss << "#" << ld( word, s, caseless ) << "#C";
+      out_map.insert( make_pair( var_freq[word], oss.str() ) );
     }
   }
   for ( const auto& t_it : out_map ){

diff --git a/src/TICCL-indexerNT.cxx b/src/TICCL-indexerNT.cxx
@@ -187,13 +187,11 @@ void handle_exp( const experiment& exp,
 
 void output_result( ostream& os,
 		    const map<bitType,set<bitType>>& result ){
-  for ( auto const& rit : result ){
-    os << rit.first << "#";
-    auto it = rit.second.begin();
-    while ( it != rit.second.end() ){
-      os << *it;
-      ++it;
-      if ( it != rit.second.end() ){
+  for ( auto const& [bt,bt_set] : result ){
+    os << bt << "#";
+    for ( const auto& it: bt_set ){
+      os << it;
+      if ( it != *bt_set.rbegin() ){
 	os << ",";
       }
     }
@@ -204,8 +202,8 @@ void output_result( ostream& os,
 
 void output_confusions( ostream& csf,
 			const map<bitType,set<bitType>>& result ){
-  for ( auto const& rit : result ){
-    csf << rit.first << "#" << rit.second.size() << endl;
+  for ( auto const& [bt,bt_set] : result ){
+    csf << bt << "#" << bt_set.size() << endl;
     csf.flush();
   }
 }

diff --git a/src/TICCL-lexclean.cxx b/src/TICCL-lexclean.cxx
@@ -53,8 +53,8 @@ void create_wf_list( const map<UnicodeString, unsigned int>& wc,
     exit(EXIT_FAILURE);
   }
   map<unsigned int, set<UnicodeString> > wf;
-  for ( const auto& cit : wc ){
-    wf[cit.second].insert( cit.first );
+  for ( const auto& [word,freq] : wc ){
+    wf[freq].insert(word);
   }
   unsigned int sum=0;
   auto wit = wf.rbegin();
@@ -88,10 +88,10 @@ void dump_quarantine( const string& filename,
     cerr << "failed to create outputfile '" << filename << "'" << endl;
     exit(EXIT_FAILURE);
   }
-  for ( const auto& it : qw ){
-    os << it.first;
-    if ( it.second > 0 ){
-      os << "\t" << it.second;
+  for ( const auto& [word,freq] : qw ){
+    os << word;
+    if ( freq > 0 ){
+      os << "\t" << freq;
     }
     os << endl;
   }