diff --git a/Cargo.lock b/Cargo.lock index db82cd5..d66b872 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -115,12 +115,13 @@ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "fancy-regex" -version = "0.12.0" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7493d4c459da9f84325ad297371a6b2b8a162800873a22e3b6b6512e61d18c05" +checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2" dependencies = [ "bit-set", - "regex", + "regex-automata", + "regex-syntax", ] [[package]] @@ -178,9 +179,9 @@ dependencies = [ [[package]] name = "magnus" -version = "0.6.4" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1597ef40aa8c36be098249e82c9a20cf7199278ac1c1a1a995eeead6a184479" +checksum = "3d87ae53030f3a22e83879e666cb94e58a7bdf31706878a0ba48752994146dab" dependencies = [ "magnus-macros", "rb-sys", @@ -399,9 +400,9 @@ dependencies = [ [[package]] name = "tiktoken-rs" -version = "0.5.9" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c314e7ce51440f9e8f5a497394682a57b7c323d0f4d0a6b1b13c429056e0e234" +checksum = "44075987ee2486402f0808505dd65692163d243a337fc54363d49afac41087f6" dependencies = [ "anyhow", "base64", @@ -409,6 +410,7 @@ dependencies = [ "fancy-regex", "lazy_static", "parking_lot", + "regex", "rustc-hash", ] diff --git a/ext/tiktoken_ruby/Cargo.toml b/ext/tiktoken_ruby/Cargo.toml index 9f5b07b..ab5314b 100644 --- a/ext/tiktoken_ruby/Cargo.toml +++ b/ext/tiktoken_ruby/Cargo.toml @@ -10,6 +10,6 @@ publish = false crate-type = ["cdylib"] [dependencies] -magnus = { version = "0.6.1" } +magnus = { version = "0.7.1" } rb-sys = { version = "0.9.87", features = ["stable-api-compiled-fallback"] } -tiktoken-rs = { version = "0.5.9" } +tiktoken-rs = { version = "0.6.0" } diff --git a/ext/tiktoken_ruby/src/core_bpe_wrapper.rs b/ext/tiktoken_ruby/src/core_bpe_wrapper.rs index e9f7164..a956d10 100644 --- a/ext/tiktoken_ruby/src/core_bpe_wrapper.rs +++ b/ext/tiktoken_ruby/src/core_bpe_wrapper.rs @@ -1,7 +1,8 @@ use std::collections::HashSet; -use crate::uncicode_error; +use tiktoken_rs::Rank; +use crate::uncicode_error; #[magnus::wrap(class = "Tiktoken::Ext::CoreBPE")] pub struct CoreBPEWrapper { @@ -13,11 +14,15 @@ impl CoreBPEWrapper { Self { core_bpe } } - pub fn encode_ordinary(&self, text: String) -> Vec { + pub fn encode_ordinary(&self, text: String) -> Vec { self.core_bpe.encode_ordinary(text.as_str()) } - pub fn encode(&self, text: String, allowed_special: magnus::RArray) -> Result, magnus::Error> { + pub fn encode( + &self, + text: String, + allowed_special: magnus::RArray, + ) -> Result, magnus::Error> { let allowed_special: Vec = allowed_special.to_vec()?; let allowed_special: Vec<&str> = allowed_special.iter().map(|s| s.as_str()).collect(); let allowed_special: HashSet<&str> = HashSet::from_iter(allowed_special.iter().cloned()); @@ -25,20 +30,18 @@ impl CoreBPEWrapper { Ok(self.core_bpe.encode(text.as_str(), allowed_special)) } - pub fn encode_with_special_tokens(&self, text: String) -> Vec { + pub fn encode_with_special_tokens(&self, text: String) -> Vec { self.core_bpe.encode_with_special_tokens(text.as_str()) } - pub fn decode(&self, ids: Vec) -> Result { - self.core_bpe.decode(ids) - .map_err(|e| { - let error = match uncicode_error() { - Ok(error) => error, - Err(e) => return e - }; - - magnus::Error::new(error, e.to_string()) - }) + pub fn decode(&self, ids: Vec) -> Result { + self.core_bpe.decode(ids).map_err(|e| { + let error = match uncicode_error() { + Ok(error) => error, + Err(e) => return e, + }; + magnus::Error::new(error, e.to_string()) + }) } }