Skip to content

Commit

Permalink
Fix encoding_for_model to return nil for unknown encoding (#29)
Browse files Browse the repository at this point in the history
  • Loading branch information
rob-mindtrip authored Apr 4, 2024
1 parent f2930bf commit c4ba093
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 5 deletions.
12 changes: 7 additions & 5 deletions lib/tiktoken_ruby.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def get_encoding(name)

# Gets the encoding for an OpenAI model
# @param model_name [Symbol|String] The name of the model to get the encoding for
# @return [Tiktoken::Encoding] The encoding instance
# @return [Tiktoken::Encoding, nil] The encoding instance, or nil if no encoding is found
# @example Count tokens for text
# enc = Tiktoken.encoding_for_model("gpt-4")
# enc.encode("hello world").length #=> 2
Expand All @@ -37,10 +37,12 @@ def encoding_for_model(model_name)
return get_encoding(MODEL_TO_ENCODING_NAME[model_name.to_sym])
end

MODEL_PREFIX_TO_ENCODING.each do |prefix, encoding|
if model_name.start_with?(prefix.to_s)
return get_encoding(encoding)
end
_prefix, encoding = MODEL_PREFIX_TO_ENCODING.find do |prefix, _encoding|
model_name.start_with?(prefix.to_s)
end

if encoding
get_encoding(encoding)
end
end

Expand Down
4 changes: 4 additions & 0 deletions spec/tiktoken_ruby_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
expect(Tiktoken.encoding_for_model("ft:gpt-3.5-turbo:org:suffix:abc123")).to be_a(Tiktoken::Encoding)
end

it "fails gracefully when getting an encoding for an unknown model" do
expect(Tiktoken.encoding_for_model("bad-model-name")).to be_nil
end

it "lists available encodings" do
expect(Tiktoken.list_encoding_names).to be_a(Array)
end
Expand Down

0 comments on commit c4ba093

Please sign in to comment.