Skip to content

Commit

Permalink
Merge pull request #56 from IAPark/dependabot/cargo/cargo-d67f63c951
Browse files Browse the repository at this point in the history
Bump the cargo group with 2 updates
  • Loading branch information
gjtorikian authored Jan 8, 2025
2 parents 020781a + 452f0c6 commit 278e604
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 23 deletions.
16 changes: 9 additions & 7 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions ext/tiktoken_ruby/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ publish = false
crate-type = ["cdylib"]

[dependencies]
magnus = { version = "0.6.1" }
magnus = { version = "0.7.1" }
rb-sys = { version = "0.9.87", features = ["stable-api-compiled-fallback"] }
tiktoken-rs = { version = "0.5.9" }
tiktoken-rs = { version = "0.6.0" }
31 changes: 17 additions & 14 deletions ext/tiktoken_ruby/src/core_bpe_wrapper.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use std::collections::HashSet;

use crate::uncicode_error;
use tiktoken_rs::Rank;

use crate::uncicode_error;

#[magnus::wrap(class = "Tiktoken::Ext::CoreBPE")]
pub struct CoreBPEWrapper {
Expand All @@ -13,32 +14,34 @@ impl CoreBPEWrapper {
Self { core_bpe }
}

pub fn encode_ordinary(&self, text: String) -> Vec<usize> {
pub fn encode_ordinary(&self, text: String) -> Vec<Rank> {
self.core_bpe.encode_ordinary(text.as_str())
}

pub fn encode(&self, text: String, allowed_special: magnus::RArray) -> Result<Vec<usize>, magnus::Error> {
pub fn encode(
&self,
text: String,
allowed_special: magnus::RArray,
) -> Result<Vec<Rank>, magnus::Error> {
let allowed_special: Vec<String> = allowed_special.to_vec()?;
let allowed_special: Vec<&str> = allowed_special.iter().map(|s| s.as_str()).collect();
let allowed_special: HashSet<&str> = HashSet::from_iter(allowed_special.iter().cloned());

Ok(self.core_bpe.encode(text.as_str(), allowed_special))
}

pub fn encode_with_special_tokens(&self, text: String) -> Vec<usize> {
pub fn encode_with_special_tokens(&self, text: String) -> Vec<Rank> {
self.core_bpe.encode_with_special_tokens(text.as_str())
}

pub fn decode(&self, ids: Vec<usize>) -> Result<String, magnus::Error> {
self.core_bpe.decode(ids)
.map_err(|e| {
let error = match uncicode_error() {
Ok(error) => error,
Err(e) => return e
};

magnus::Error::new(error, e.to_string())
})
pub fn decode(&self, ids: Vec<Rank>) -> Result<String, magnus::Error> {
self.core_bpe.decode(ids).map_err(|e| {
let error = match uncicode_error() {
Ok(error) => error,
Err(e) => return e,
};

magnus::Error::new(error, e.to_string())
})
}
}

0 comments on commit 278e604

Please sign in to comment.