Skip to content

Commit

Permalink
class Jieba: support default dictpath
Browse files Browse the repository at this point in the history
  • Loading branch information
yanyiwu committed Sep 22, 2024
1 parent 6e167a3 commit f2eae47
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 10 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,5 @@ jobs:
working-directory: ${{github.workspace}}/build
# Execute tests defined by the CMake configuration.
# See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail
run: ctest -C ${{env.BUILD_TYPE}}

run: ctest -C ${{env.BUILD_TYPE}} --verbose

54 changes: 46 additions & 8 deletions include/cppjieba/Jieba.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,24 @@ namespace cppjieba {

class Jieba {
public:
Jieba(const string& dict_path,
const string& model_path,
const string& user_dict_path,
const string& idfPath,
const string& stopWordPath)
: dict_trie_(dict_path, user_dict_path),
model_(model_path),
Jieba(const string& dict_path = "",
const string& model_path = "",
const string& user_dict_path = "",
const string& idf_path = "",
const string& stop_word_path = "")
: dict_trie_(getPath(dict_path, "jieba.dict.utf8")),
model_(getPath(model_path, "hmm_model.utf8")),
mp_seg_(&dict_trie_),
hmm_seg_(&model_),
mix_seg_(&dict_trie_, &model_),
full_seg_(&dict_trie_),
query_seg_(&dict_trie_, &model_),
extractor(&dict_trie_, &model_, idfPath, stopWordPath) {
extractor(&dict_trie_, &model_,
getPath(idf_path, "idf.utf8"),
getPath(stop_word_path, "stop_words.utf8")) {
if (!user_dict_path.empty()) {
dict_trie_.LoadUserDict(user_dict_path);
}
}
~Jieba() {
}
Expand Down Expand Up @@ -115,6 +120,39 @@ class Jieba {
}

private:
static string pathJoin(const string& dir, const string& filename) {
if (dir.empty()) {
return filename;
}

char last_char = dir[dir.length() - 1];
if (last_char == '/' || last_char == '\\') {
return dir + filename;
} else {
#ifdef _WIN32
return dir + '\\' + filename;
#else
return dir + '/' + filename;
#endif
}
}

static string getCurrentDirectory() {
string path(__FILE__);
size_t pos = path.find_last_of("/\\");
return (pos == string::npos) ? "" : path.substr(0, pos);
}

static string getPath(const string& path, const string& default_file) {
if (path.empty()) {
string current_dir = getCurrentDirectory();
string parent_dir = current_dir.substr(0, current_dir.find_last_of("/\\"));
string grandparent_dir = parent_dir.substr(0, parent_dir.find_last_of("/\\"));
return pathJoin(pathJoin(grandparent_dir, "dict"), default_file);
}
return path;
}

DictTrie dict_trie_;
HMMModel model_;

Expand Down

0 comments on commit f2eae47

Please sign in to comment.