Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added split_once and index_of to bitarray #629

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions src/gleam/bit_array.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -206,3 +206,36 @@ fn do_inspect(input: BitArray, accumulator: String) -> String {
_ -> accumulator
}
}

/// Finds the position of a bit pattern within a bit array.
///
/// ## Examples
///
/// ```gleam
/// index_of(<<0, 1, 2, 3, 4, 5, 6, 7>>, <<3, 4, 5>>)
/// // -> "3"
/// ```
///
@external(erlang, "gleam_stdlib", "bit_array_index_of")
@external(javascript, "../gleam_stdlib.mjs", "bit_array_index_of")
pub fn index_of(haystack: BitArray, needle: BitArray) -> Int
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove this please 🙏 The ticket was only for split_once.


// error is returned if not found.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment seems to have been misplaced

/// Splits a bit array into left and right parts at the bit pattern provided, an
///
/// ## Examples
///
/// ```gleam
/// split_once(<<0, 1, 2, 3, 4, 5, 6, 7>>, <<3, 4, 5>>)
/// // -> Ok(<<0, 1, 2>>, <<6, 7>>)
///
/// split_once(<<0, 1, 2, 3, 4, 5, 6, 7>>, <<5, 4, 3>>)
/// // -> Error(Nil)
/// ```
///
@external(erlang, "gleam_stdlib", "bit_array_split_once")
@external(javascript, "../gleam_stdlib.mjs", "bit_array_split_once")
pub fn split_once(
haystack: BitArray,
needle: BitArray,
) -> Result(#(BitArray, BitArray), Nil)
23 changes: 22 additions & 1 deletion src/gleam_stdlib.erl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
decode_tuple5/1, decode_tuple6/1, tuple_get/2, classify_dynamic/1, print/1,
println/1, print_error/1, println_error/1, inspect/1, float_to_string/1,
int_from_base_string/2, utf_codepoint_list_to_string/1, contains_string/2,
crop_string/2, base16_decode/1, string_replace/3
crop_string/2, base16_decode/1, string_replace/3, bit_array_index_of/2,
bit_array_split_once/2
]).

%% Taken from OTP's uri_string module
Expand Down Expand Up @@ -536,3 +537,23 @@ base16_decode(String) ->

string_replace(String, Pattern, Replacement) ->
string:replace(String, Pattern, Replacement, all).

bit_array_index_of(Haystack, Needle) ->
case binary:match(Haystack, Needle) of
{Pos, _Len} -> Pos;
_ -> -1
end.

bit_array_split_once(Haystack, Needle) ->
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use the built in split function please 🙏

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The built in function doesn't seem to allow splitting along non-byte aligned boundaries.

e.g. splitting
<<1, 2, 3, 4, 5>>, with <<3:2>> returns {error, nil}
Current implementation correctly splits it as:
<<1, 2, 0:6>>, <<4, 5>>

Copy link
Member

@lpil lpil Jun 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's fine 👍 Performance is the priority here, and I'm not aware of any binary protocol that will use non-byte aligned data.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No problem, i removed the associated tests, and switched to using erlang's built in binary:split

try
case Needle of
<<>> -> {ok, {<<>>, Haystack}};
_ -> case binary:split(Haystack, Needle) of
[Part1, Part2] -> {ok, {Part1, Part2}};
_ -> {error, nil}
end
end
catch
error:badarg -> {error, nil}
end.

37 changes: 36 additions & 1 deletion src/gleam_stdlib.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ export function compile_regex(pattern, options) {
export function regex_split(regex, string) {
return List.fromArray(
string.split(regex).map((item) => (item === undefined ? "" : item)),
);
);
}

export function regex_scan(regex, string) {
Expand Down Expand Up @@ -864,3 +864,38 @@ export function base16_decode(string) {
export function bit_array_inspect(bits) {
return `<<${[...bits.buffer].join(", ")}>>`;
}

export function bit_array_index_of(haystack, needle) {
if (needle.buffer.length > haystack.buffer.length) {
return -1;
}

for (let i = 0; i <= haystack.buffer.length - needle.buffer.length; i++) {
let found = true;
for (let j = 0; j < needle.buffer.length; j++) {
if (haystack.buffer[i + j] !== needle.buffer[j]) {
found = false;
break;
}
}
if (found) {
return i;
}
}

return -1;
}

export function bit_array_split_once(haystack, needle) {
let index = bit_array_index_of(haystack, needle);

if (index === -1) {
return new Error(Nil);
}

const before = new BitArray(haystack.buffer.slice(0, index));
const after = new BitArray(haystack.buffer.slice(index + needle.buffer.length));

return new Ok([before, after]);
}

32 changes: 32 additions & 0 deletions test/gleam/bit_array_test.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -308,3 +308,35 @@ pub fn inspect_partial_bytes_test() {
bit_array.inspect(<<5:3, 11:4, 1:2>>)
|> should.equal("<<182, 1:size(1)>>")
}

pub fn index_of_found_test() {
<<"Hello, World":utf8>>
|> bit_array.index_of(<<", ":utf8>>)
|> should.equal(5)
}

pub fn index_of_not_found_test() {
<<"Hello, World":utf8>>
|> bit_array.index_of(<<"Joe":utf8>>)
|> should.equal(-1)
}

pub fn split_once_found_test() {
<<"Hello, World":utf8>>
|> bit_array.split_once(<<", ":utf8>>)
|> should.be_ok
|> should.equal(#(<<"Hello":utf8>>, <<"World":utf8>>))
}

pub fn split_once_empty_needle_test() {
<<"Hello, World":utf8>>
|> bit_array.split_once(<<>>)
|> should.be_ok
|> should.equal(#(<<>>, <<"Hello, World":utf8>>))
}

pub fn split_once_not_found_test() {
<<"Hello, World":utf8>>
|> bit_array.split_once(<<"Joe":utf8>>)
|> should.be_error
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you include tests for non-byte aligned bit arrays please