Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: log dict file info and line number info when deploying warning #874

Merged
merged 3 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/rime/algo/encoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -255,13 +255,13 @@ bool TableEncoder::DfsEncode(const string& phrase,
}
string encoded;
if (Encode(*code, &encoded)) {
DLOG(INFO) << "encode '" << phrase << "': " << "[" << code->ToString()
<< "] -> [" << encoded << "]";
DLOG(INFO) << "encode '" << phrase << "': "
<< "[" << code->ToString() << "] -> [" << encoded << "]";
collector_->CreateEntry(phrase, encoded, value);
return true;
} else {
DLOG(WARNING) << "failed to encode '" << phrase << "': " << "["
<< code->ToString() << "]";
DLOG(WARNING) << "failed to encode '" << phrase << "': "
<< "[" << code->ToString() << "]";
return false;
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/rime/dict/dict_compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ bool DictCompiler::Compile(const path& schema_file) {
} else {
rebuild_prism = true;
}
LOG(INFO) << dict_file << "[" << dict_files.size() << " file(s)]" << " ("
<< dict_file_checksum << ")";
LOG(INFO) << dict_file << "[" << dict_files.size() << " file(s)]"
<< " (" << dict_file_checksum << ")";
LOG(INFO) << schema_file << " (" << schema_file_checksum << ")";
{
the<ResourceResolver> resolver(
Expand Down
22 changes: 16 additions & 6 deletions src/rime/dict/entry_collector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ void EntryCollector::LoadPresetVocabulary(DictSettings* settings) {

void EntryCollector::Collect(const path& dict_file) {
LOG(INFO) << "collecting entries from " << dict_file;
current_dict_file = dict_file.u8string();
line_number = 0;
// read table
std::ifstream fin(dict_file.c_str());
DictSettings settings;
Expand All @@ -69,13 +71,15 @@ void EntryCollector::Collect(const path& dict_file) {
int weight_column = settings.GetColumnIndex("weight");
int stem_column = settings.GetColumnIndex("stem");
if (text_column == -1) {
LOG(ERROR) << "missing text column definition.";
LOG(ERROR) << "missing text column definition in file: " << dict_file
<< ".";
return;
}
bool enable_comment = true;
string line;
while (getline(fin, line)) {
boost::algorithm::trim_right(line);
line_number++;
// skip empty lines and comments
if (line.empty())
continue;
Expand All @@ -90,7 +94,9 @@ void EntryCollector::Collect(const path& dict_file) {
auto row = strings::split(line, "\t");
int num_columns = static_cast<int>(row.size());
if (num_columns <= text_column || row[text_column].empty()) {
LOG(WARNING) << "Missing entry text at #" << num_entries << ".";
LOG(WARNING) << "Missing entry text at #" << num_entries
<< ", line: " << line_number
<< " of file: " << current_dict_file << ".";
continue;
}
const auto& word(row[text_column]);
Expand All @@ -114,8 +120,8 @@ void EntryCollector::Collect(const path& dict_file) {
encode_queue.push({word, weight_str});
}
if (!stem_str.empty() && !code_str.empty()) {
DLOG(INFO) << "add stem '" << word << "': " << "[" << code_str << "] = ["
<< stem_str << "]";
DLOG(INFO) << "add stem '" << word << "': "
<< "[" << code_str << "] = [" << stem_str << "]";
stems[word].insert(stem_str);
}
}
Expand Down Expand Up @@ -168,15 +174,19 @@ void EntryCollector::CreateEntry(const string& word,
try {
percentage = std::stod(weight_str.substr(0, weight_str.length() - 1));
} catch (...) {
LOG(WARNING) << "invalid entry definition at #" << num_entries << ".";
LOG(WARNING) << "invalid entry definition at #" << num_entries
<< ", line: " << line_number
<< " of file: " << current_dict_file << ".";
percentage = 100.0;
}
e->weight *= percentage / 100.0;
} else if (!weight_str.empty()) { // absolute weight
try {
e->weight = std::stod(weight_str);
} catch (...) {
LOG(WARNING) << "invalid entry definition at #" << num_entries << ".";
LOG(WARNING) << "invalid entry definition at #" << num_entries
<< ", line: " << line_number
<< " of file: " << current_dict_file << ".";
e->weight = 0.0;
}
}
Expand Down
4 changes: 4 additions & 0 deletions src/rime/dict/entry_collector.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ class EntryCollector : public PhraseCollector {
set<string /* word */> collection;
WordMap words;
WeightMap total_weight;

private:
string current_dict_file;
size_t line_number;
};

} // namespace rime
Expand Down