Skip to content

Commit

Permalink
Merge pull request #313 from paulhoadley/distance-tests
Browse files Browse the repository at this point in the history
Adds tests for ERXStringUtilities.distance(). #312
  • Loading branch information
darkv committed Nov 7, 2012
2 parents 8855a4d + bb23331 commit c06b99e
Showing 1 changed file with 169 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
import org.junit.Before;
import org.junit.Test;

import com.webobjects.foundation.NSArray;
import com.webobjects.foundation.NSMutableArray;

import er.erxtest.ERXTestSuite;

//This test class does not extend ERXTestCase because doing so borks the jUnit4 features
Expand All @@ -23,8 +26,164 @@ public static junit.framework.Test suite() {
return new JUnit4TestAdapter(ERXStringUtilitiesTest.class);
}

/**
* Represents a simple encapsulation of two strings and their expected
* Levenshtein distance.
*/
private class LevenshteinExample {
/**
* First string
*/
public String s1;

/**
* Second string
*/
public String s2;

/**
* Levenshtein distance between {@code s1} and {@code s2}
*/
public double d;

/**
* Constructor
*
* @param s1
* first string
* @param s2
* second string
* @param d
* Levenshtein distance
*/
public LevenshteinExample(String s1, String s2, double d) {
this.s1 = s1;
this.s2 = s2;
this.d = d;
}
}

/**
* An array of known strings and distances
*/
private NSArray<LevenshteinExample> levs;

@Before
public void setUp() throws Exception {
// Set up the levs array
NSMutableArray<LevenshteinExample> l = new NSMutableArray<ERXStringUtilitiesTest.LevenshteinExample>();

// When the values are the same, the distance is zero.
l.add(new LevenshteinExample("", "", 0));
l.add(new LevenshteinExample("1", "1", 0));
l.add(new LevenshteinExample("12", "12", 0));
l.add(new LevenshteinExample("123", "123", 0));
l.add(new LevenshteinExample("1234", "1234", 0));
l.add(new LevenshteinExample("12345", "12345", 0));
l.add(new LevenshteinExample("password", "password", 0));

// When one of the values is empty, the distance is the length of the
// other value.
l.add(new LevenshteinExample("", "1", 1));
l.add(new LevenshteinExample("", "12", 2));
l.add(new LevenshteinExample("", "123", 3));
l.add(new LevenshteinExample("", "1234", 4));
l.add(new LevenshteinExample("", "12345", 5));
l.add(new LevenshteinExample("", "password", 8));
l.add(new LevenshteinExample("1", "", 1));
l.add(new LevenshteinExample("12", "", 2));
l.add(new LevenshteinExample("123", "", 3));
l.add(new LevenshteinExample("1234", "", 4));
l.add(new LevenshteinExample("12345", "", 5));
l.add(new LevenshteinExample("password", "", 8));

// Whenever a single character is inserted or removed, the distance is
// one.
l.add(new LevenshteinExample("password", "1password", 1));
l.add(new LevenshteinExample("password", "p1assword", 1));
l.add(new LevenshteinExample("password", "pa1ssword", 1));
l.add(new LevenshteinExample("password", "pas1sword", 1));
l.add(new LevenshteinExample("password", "pass1word", 1));
l.add(new LevenshteinExample("password", "passw1ord", 1));
l.add(new LevenshteinExample("password", "passwo1rd", 1));
l.add(new LevenshteinExample("password", "passwor1d", 1));
l.add(new LevenshteinExample("password", "password1", 1));
l.add(new LevenshteinExample("password", "assword", 1));
l.add(new LevenshteinExample("password", "pssword", 1));
l.add(new LevenshteinExample("password", "pasword", 1));
l.add(new LevenshteinExample("password", "pasword", 1));
l.add(new LevenshteinExample("password", "passord", 1));
l.add(new LevenshteinExample("password", "passwrd", 1));
l.add(new LevenshteinExample("password", "passwod", 1));
l.add(new LevenshteinExample("password", "passwor", 1));

// Whenever a single character is replaced, the distance is one.
l.add(new LevenshteinExample("password", "Xassword", 1));
l.add(new LevenshteinExample("password", "pXssword", 1));
l.add(new LevenshteinExample("password", "paXsword", 1));
l.add(new LevenshteinExample("password", "pasXword", 1));
l.add(new LevenshteinExample("password", "passXord", 1));
l.add(new LevenshteinExample("password", "passwXrd", 1));
l.add(new LevenshteinExample("password", "passwoXd", 1));
l.add(new LevenshteinExample("password", "passworX", 1));

// If characters are taken off the front and added to the back and all
// of
// the characters are unique, then the distance is two times the number
// of
// characters shifted, until you get halfway (and then it becomes easier
// to shift from the other direction).
l.add(new LevenshteinExample("12345678", "23456781", 2));
l.add(new LevenshteinExample("12345678", "34567812", 4));
l.add(new LevenshteinExample("12345678", "45678123", 6));
l.add(new LevenshteinExample("12345678", "56781234", 8));
l.add(new LevenshteinExample("12345678", "67812345", 6));
l.add(new LevenshteinExample("12345678", "78123456", 4));
l.add(new LevenshteinExample("12345678", "81234567", 2));

// If all the characters are unique and the values are reversed, then
// the
// distance is the number of characters for an even number of
// characters,
// and one less for an odd number of characters (since the middle
// character will stay the same).
l.add(new LevenshteinExample("12", "21", 2));
l.add(new LevenshteinExample("123", "321", 2));
l.add(new LevenshteinExample("1234", "4321", 4));
l.add(new LevenshteinExample("12345", "54321", 4));
l.add(new LevenshteinExample("123456", "654321", 6));
l.add(new LevenshteinExample("1234567", "7654321", 6));
l.add(new LevenshteinExample("12345678", "87654321", 8));

// The rest of these are miscellaneous interesting examples. They will
// be illustrated using the following key:
// = (the characters are equal)
// + (the character is inserted)
// - (the character is removed)
// # (the character is replaced)

// Mississippi
// ippississiM
// -=##====##=+ --> 6
l.add(new LevenshteinExample("Mississippi", "ippississiM", 6));

// eieio
// oieie
// #===# --> 2
l.add(new LevenshteinExample("eieio", "oieie", 2));

// brad+angelina
// bra ngelina
// ===+++======= --> 3
l.add(new LevenshteinExample("brad+angelina", "brangelina", 3));

// test international chars
// ?e?uli?ka
// e?uli?ka
// -======== --> 1
l.add(new LevenshteinExample("?e?uli?ka", "e?uli?ka", 1));

levs = l.immutableClone();
}

@After
Expand Down Expand Up @@ -75,4 +234,13 @@ public void testMaskStringWithCharacter4() {
ERXStringUtilities.maskStringWithCharacter("0123456789", '*', 11, 12);
}

}
/**
* Tests {@code ERXStringUtilities.distance(String, String)}.
*/
@Test
public void testDistance() {
for (LevenshteinExample l : levs) {
assertEquals(l.d, ERXStringUtilities.distance(l.s1, l.s2), 0.00001);
}
}
}

0 comments on commit c06b99e

Please sign in to comment.