Skip to content

Commit

Permalink
update edit sim.
Browse files Browse the repository at this point in the history
  • Loading branch information
shibing624 committed Feb 1, 2023
1 parent 472164b commit d2405d9
Showing 1 changed file with 10 additions and 3 deletions.
13 changes: 10 additions & 3 deletions similarities/utils/distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def edit_distance(str1, str2):
# very fast
# http://stackoverflow.com/questions/14260126/how-python-levenshtein-ratio-is-computed
import Levenshtein
d = 1.0 - Levenshtein.distance(str1, str2) / float(max(len(str1), len(str2)))
d = Levenshtein.distance(str1, str2) / float(max(len(str1), len(str2)))
except:
# https://docs.python.org/2/library/difflib.html
d = 1.0 - SequenceMatcher(lambda x: x == " ", str1, str2).ratio()
Expand Down Expand Up @@ -206,8 +206,15 @@ def z_score(x, axis=0):
str1_test = "你到底是谁?"
str2_test = "没想到我是谁,是真样子"
print('strs:', str1_test, ' vs ', str2_test)
print(edit_distance(str1_test, str2_test))
print('edit sim:', 1 - edit_distance(str1_test, str2_test))
print('edit_dist', edit_distance(str1_test, str2_test))
print('edit_sim:', 1 - edit_distance(str1_test, str2_test))

str1_test = "private Thread currentThread;"
str2_test = "private volatile Thread currentThread;"
print('strs:', str1_test, ' vs ', str2_test)
print('edit_dist', edit_distance(str1_test, str2_test))
print('edit_sim:', 1 - edit_distance(str1_test, str2_test))

print(num_of_common_sub_str(str1_test, str2_test))
print(max_min_normalize(vec1_test)) # 归一化(0-1)
print(z_score(vec1_test)) # 标准化(0附近,正负)
Expand Down

0 comments on commit d2405d9

Please sign in to comment.