forked from riotu-lab/aranizer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_aranizer.py
41 lines (32 loc) · 1.24 KB
/
test_aranizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
def test_imports():
try:
import aranizer
print("Import successful.")
except ImportError as e:
print(f"Import failed: {e}")
def test_tokenizer():
try:
from aranizer import get_bpe, get_sp
# Test BPE tokenizer
bpe_tokenizer = get_bpe("bpe32")
sample_text = "هذا نص تجريبي لاختبار وظيفة التوكنيزر."
bpe_tokens = bpe_tokenizer.tokenize(sample_text)
print("BPE Tokenization successful.")
print("Sample Text:", sample_text)
print("BPE Tokens:", bpe_tokens)
# Test SP tokenizer
sp_tokenizer = get_sp("sp32")
sp_tokens = sp_tokenizer.tokenize(sample_text)
print("SP Tokenization successful.")
print("Sample Text:", sample_text)
print("SP Tokens:", sp_tokens)
# Encoding and decoding
encoded_output = sp_tokenizer.encode(sample_text, add_special_tokens=True)
print("Encoded Output:", encoded_output)
decoded_text = sp_tokenizer.decode(encoded_output)
print("Decoded Text:", decoded_text)
except Exception as e:
print(f"Tokenizer test failed: {e}")
if __name__ == "__main__":
test_imports()
test_tokenizer()