Skip to content

Commit

Permalink
test tokenize
Browse files Browse the repository at this point in the history
  • Loading branch information
raphaellaude committed Nov 14, 2023
1 parent e69e46b commit 31ac81b
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 1 deletion.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,7 @@
*/tmp
scratch.rs
data_prep
*.csv
*.csv
.DS_Store
profile.json
/scratch
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ pub fn tokenize(address: &str) -> Vec<String> {
let address: String = clean_address(address);

address
.replace('&', " & ")
.replace('#', " # ")
.split([' ', ',', ';', ')', '\n'].as_ref())
.filter(|x| !x.is_empty())
.map(|s| s.to_string())
Expand Down
22 changes: 22 additions & 0 deletions tests/test_tokenize.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
use us_addrs::tokenize;

#[test]
fn test_tokenizing() {
let mut tokens = tokenize("# 1 abc st");
assert_eq!(tokens, vec!["#", "1", "abc", "st"]);

tokens = tokenize("#1 abc st");
assert_eq!(tokens, vec!["#", "1", "abc", "st"]);

tokens = tokenize("box # 1 abc st");
assert_eq!(tokens, vec!["box", "#", "1", "abc", "st"]);

tokens = tokenize("box #1 abc st");
assert_eq!(tokens, vec!["box", "#", "1", "abc", "st"]);

tokens = tokenize("box# 1 abc st");
assert_eq!(tokens, vec!["box", "#", "1", "abc", "st"]);

tokens = tokenize("box#1 abc st");
assert_eq!(tokens, vec!["box", "#", "1", "abc", "st"]);
}

0 comments on commit 31ac81b

Please sign in to comment.