diff --git a/phunspell.egg-info/PKG-INFO b/phunspell.egg-info/PKG-INFO index 4f50f01..822675c 100644 --- a/phunspell.egg-info/PKG-INFO +++ b/phunspell.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: phunspell -Version: 0.1.3 +Version: 0.1.4 Summary: Pure Python spell checker, utilizing Spylls a port of Hunspell Home-page: https://github.com/dvwright/phunspell Author: David Wright @@ -8,35 +8,35 @@ Author-email: dvwright@cpan.org License: MIT Download-URL: https://github.com/dvwright/phunspell/archive/v0.1.3.tar.gz Description: # Phunspell - + A pure Python spell checker utilizing [spylls](https://github.com/zverok/spylls) a port of [Hunspell](https://hunspell.github.io/). - + *NOTE: If you are only supporting languages: English, Russian or Swedish then use [spylls](https://github.com/zverok/spylls) directly: (`pip install spylls`)* - + This library includes [dictionaries](https://github.com/LibreOffice/dictionaries) for all languages supported by [LibreOffice](https://wiki.documentfoundation.org/Development/Dictionaries). - + Just a note giving credit where it's due, [spylls](https://github.com/zverok/spylls) is a fantastic project which deserves all the credit. There is a [corresponding blog](https://zverok.github.io/blog/2021-01-05-spellchecker-1.html) entry which is a good read. (and of course [Hunspell](https://hunspell.github.io/) itself) - + ### Usage - + import phunspell - + pspell = phunspell.Phunspell('en_US') print(pspell.lookup("phunspell")) # False print(pspell.lookup("about")) # True - + mispelled = pspell.lookup_list("Bill's TV is borken".split(" ")) print(mispelled) # ["borken"] - + for suggestion in pspell.suggest('phunspell'): print(suggestion) # Hunspell - + ### Installation - - ``` + + ```shell pip install phunspell ``` - + #### Supported Languages Language | Language Code ---------------------------- | ------------- @@ -118,15 +118,72 @@ Description: # Phunspell Turkish | tr_TR Ukrainian | uk_UA Vietnamese | vi_VN - + #### Tests - ``` + ```shell python -m unittest discover -s phunspell/tests -p "test_*.py" ``` - + + #### Experimental + + There is an option to build/store all the dictionaries as pickled data. Since there are security risks associated with pickled data we will not include that data in the distrubution. + + To create your own local pickled dictionaries set an env variable. + + linux/mac osx: + ```shell + $ export PICKLED_DATADIR="/home/dwright/python/phunspell/pickled_data/" + ``` + + enter a python shell: + ```python + $ python + >>> Phunspell(loc_lang="en_US", load_all=True) + ``` + + *NOTE: this will consume a lot of resources!* + + Once completed you should have a picked object for every dictionary supported by this lib. + + ```shell + $ ls /home/dwright/python/phunspell/pickled_data/ + af_ZA + an_ES + be_BY + bg_BG + bn_BD + br_FR + bs_BA + cs_CZ + da_DK + de_AT + de_CH + ... + ... + ... + ``` + + *NOTE: will take up almost 2 GB of space* + + ```shell + $ du -sh . + 1.4G + ``` + + As long as you keep that environmental variable set for all future runs just use the library as: + + ```python + pspell = Phunspell() + ``` + + *NOTE: If you ever update dictionary data, you will need to create a new pickle store for it.* + + and it should find the dictionaries and load them quickly + + #### Misc `python`, `python3`, `hunspell`, `libreoffice`, `spell`, `spell checking` - + Keywords: Spelling,Hunspell,Spylls,Python Platform: UNKNOWN Classifier: Development Status :: 4 - Beta diff --git a/phunspell/__init__.py b/phunspell/__init__.py index 416621a..0d90d45 100644 --- a/phunspell/__init__.py +++ b/phunspell/__init__.py @@ -4,7 +4,7 @@ __name__ = "phunspell" __author__ = "David Wright" __email__ = "dvwright@cpan.org" -__version__ = "0.1.3" +__version__ = "0.1.4" # For relative imports to work in Python 3.6 sys.path.append(os.path.dirname(os.path.realpath(__file__))) diff --git a/setup.py b/setup.py index 65ced9e..0dfd4b7 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name='phunspell', - version='0.1.3', + version='0.1.4', url='https://github.com/dvwright/phunspell', download_url='https://github.com/dvwright/phunspell/archive/v0.1.3.tar.gz', license='MIT',