Tidy-up - readme, benchmarking and making

Now runs the benchmark after cleaning and building a new file and prints benchmark info consistently.
petr-tik · Feb 10, 2018 · 2d67280 · 2d67280
1 parent 89db4db
commit 2d67280
Show file tree

Hide file tree

Showing 3 changed files with 31 additions and 28 deletions.
diff --git a/Readme.md b/Readme.md
@@ -6,9 +6,9 @@ Find the difference between writing Python code vs compiling C into a shared lib
 
 ## Method
 
-A good comparison is storing and searching strings. A trie data structure is implemented with 2 methods - add and find. 2 implementations - Python and C compiled into a dynamic library loaded by the CPython interpreter are tested and benchmarked against each other to find out their memory footprint and speed. 
+A good comparison is storing and searching strings in a trie. A trie data structure is implemented with 2 methods - add and find. 2 implementations - Python and C compiled into a dynamic library loaded by the CPython interpreter are tested and benchmarked against each other to find out their memory footprint and speed. 
 
-While C and the conventions of CPython extension hacking are tricky to get your head around. 
+While C and the conventions of CPython extension hacking are tricky to get your head around, they are hopefully worth up for applications with high performance requirements. 
 
 ## Test
 
@@ -28,7 +28,7 @@ Using a bit of bash magic, we can prepare a lowercase-only, clean of punctuation
 head -n 50000 /usr/share/dict/words | tail -n 20000 | tr -d "[A-Z|']" | iconv -f utf8 -t ascii//TRANSLIT | uniq | head -n 18000 > clean_words
 ```
 
-Take the first 50000 words from unix dictionary fail, take the words from the middle, remove all uppercase letters and apostrophes, convert/transliterate all non-ascii chars to ascii and pipe the top 18000 unique words into the clean_words file
+Take the first 50000 words from unix dictionary file, take the words from the middle, remove all uppercase letters and apostrophes, convert/transliterate all non-ascii chars to ascii and pipe the top 18000 unique words into the clean_words file
 
 
 #### Random order
@@ -45,31 +45,32 @@ shuf clean_words > random_words
 head -n 80000 /usr/share/dict/words | tail -n 1000 | tr -d "[A-Z|']" | iconv -f utf8 -t ascii//TRANSLIT | uniq | head -n 800 > missing_words
 ```
 
+### Measure
 
-### Time to add 10000 words 
+#### Time to add 10000 words 
 
 Absolute time
 
-#### In alphabetic order
+##### In alphabetic order
 
-#### In random order
+##### In random order
 
-### Memory footprint
+#### Memory footprint
 
 The size of trie object after all the words have been added.
 
-### Time to find existing words 
+#### Time to find existing words 
 
 Average duration of finding same 50 words that are in the trie
 
-### Time to look for missing words
+#### Time to look for missing words
 
 Average duration of looking for same 50 words that aren't in the trie
 
 
 ## Results
 
-tbd
+Published in a blog and delivered as a presentation.
 
 ## Conclusion
 

diff --git a/benchmark.py b/benchmark.py
@@ -1,44 +1,43 @@
 #! /usr/bin/env python3
 
-import subprocess
 import time
 
 from py_trie import PyTrie
 from ctrie import cTrie
 
+time_fmt = ":5f"
+
 
 def add_sorted_words(trie_constructor):
-    print("\n\n{}".format(trie_constructor.__name__))
     startTime = time.time()
     tr = trie_constructor()
     creationTime = time.time() - startTime
-    print("{} takes {}s to instantiate".format(
-        trie_constructor.__name__, creationTime))
+    print("Takes {:5f}s to instantiate before adding sorted words".format(
+        creationTime))
     with open("clean_words") as f:
         for line in f:
             tr.add(line.strip())
     elapsedTime = time.time() - startTime
-    print("It takes {}s to add 18000 words to a {}".format(
-        elapsedTime, trie_constructor.__name__))
+    print("Takes {:5f}s to add 18000 words".format(elapsedTime))
 
 
 def add_random_words(trie_constructor):
-    print("\n\n{}".format(trie_constructor.__name__))
+
     startTime = time.time()
     tr = trie_constructor()
     creationTime = time.time() - startTime
-    print("{} takes {}s to instantiate".format(
-        trie_constructor.__name__, creationTime))
+    print("Takes {:5f}s to instantiate before adding random words".format(
+        creationTime))
     with open("random_words") as f:
         for line in f:
             tr.add(line.strip())
     elapsedTime = time.time() - startTime
-    print("It takes {}s to add 18000 random words to a {}".format(
-        elapsedTime, trie_constructor.__name__))
+    print("Takes {:5f}s to add 18000 random words".format(
+        elapsedTime))
 
 
 def find_present_words(trie_constructor):
-    print("\n\n{}".format(trie_constructor.__name__))
+
     tr = trie_constructor()
     with open("clean_words") as f:
         for line in f:
@@ -49,12 +48,11 @@ def find_present_words(trie_constructor):
         for line in f:
             tr.find(line.strip())
     elapsedTime = time.time() - startTime
-    print("It takes {}s to find 100 random words in a {}".format(
-        elapsedTime, trie_constructor.__name__))
+    print("Takes {:5f}s to find 100 random words".format(
+        elapsedTime))
 
 
 def find_missing_words(trie_constructor):
-    print("\n\n{}".format(trie_constructor.__name__))
     tr = trie_constructor()
 
     with open("clean_words") as f:
@@ -67,12 +65,13 @@ def find_missing_words(trie_constructor):
         for line in f:
             tr.find(line.strip())
     elapsedTime = time.time() - startTime
-    print("It takes {}s to look for, but fail to find, 800 missing words in a {}".format(
-        elapsedTime, trie_constructor.__name__))
+    print("Takes {:5f}s to look for, but fail to find, 800 missing words".format(
+        elapsedTime))
 
 
 if __name__ == "__main__":
-    for trie in [cTrie, PyTrie]:
+    for trie in [PyTrie, cTrie]:
+        print(trie.__name__)
         add_sorted_words(trie)
         add_random_words(trie)
         find_present_words(trie)

diff --git a/makefile b/makefile
@@ -12,3 +12,6 @@ ext:
 
 test:
 	python3 -m pytest test.py
+
+bench:	clean ext test
+	python3 benchmark.py