diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 543d759e..d87d2568 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -1,2 +1,3 @@ +* Jordan Rynning ([@jrynning](https://github.com/jrynning)) * Serdar Tumgoren ([@zstumgoren](https://github.com/zstumgoren)) diff --git a/Pipfile b/Pipfile index 19cb71f1..526bd36e 100644 --- a/Pipfile +++ b/Pipfile @@ -35,6 +35,7 @@ click = "*" retry = "*" urllib3 = "1.26.18" # pegged to avoid test issue typing-extensions = "*" +us = "*" [pipenv] allow_prereleases = false diff --git a/Pipfile.lock b/Pipfile.lock index f9bb5c7d..59d63283 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "b0f91edc7e8d297b29f271fadc4b7116f793086028ec3d2181171ae845c789f5" + "sha256": "06cd9f02c1beaa7c2aafcef82f2bdd6142c102cf09cbd406b259aa54bbfd18ee" }, "pipfile-spec": 6, "requires": {}, @@ -257,6 +257,69 @@ "markers": "python_version >= '3.5'", "version": "==3.7" }, + "jellyfish": { + "hashes": [ + "sha256:010ebed019b7efa27171acb66ca5e7d4f40ab0b122663e6b4062ac22816b5d9a", + "sha256:02611b975311694bc98789f03c12bbd679cba4a95b74d0f51f264a4bf7b14021", + "sha256:0c289620b2a1931237b75f4a08f93531f3a9a0125a840c8a780e50688520b266", + "sha256:0cbe9f573dfd0bffbe60fb6980d54e65c7772dec217c4bc68392141458c8406e", + "sha256:1613e6623de71008c4b27250b8f2c5406104beff6487b9fe48af5089e06de2dd", + "sha256:16cf6a55433ca4fe8f13d5ba96882a058d19030bcd8c50cdd8f62009c4106c55", + "sha256:1d7b7fa3e0e6c7c83fc0fd1e3abce2fa7d72945c97da9bf9b84d396bbfcaf61a", + "sha256:1fab569e574a40fa5e9268d0a00f38d808b997f777a0583e2b9ba135a9536a02", + "sha256:26b07f9f957054a99573d51c40118aa1a400354da54e65d24cf22c41840f7a95", + "sha256:2d28edaaae08b2af9babf39b2e7d30571217fbc70168d88fbdef414e53177ca8", + "sha256:335f287613af3b23bb06ab216a78315b5cec877c84748c8927cb4e4e106fbe6f", + "sha256:3aea11b9a0699bdaa0e15df2e3beeceb5cac82ab072b35f2997ecc3493240027", + "sha256:3ded3e9b5aa82371281f494fbbfab9a0fa79b0a66bf529b63c109fe0328d23c5", + "sha256:44cbafbe1bdf9e878ac144880d15f31ab79fb4f5fb22a7df55378519d80cfdcc", + "sha256:4db605459280deefc2b3497932aeb2784c54ede2aec786a4120cf650281652ad", + "sha256:4de595f3395e15a82f5a45c3265ea8fd594b19f62bbdc7349a3468bba63878a2", + "sha256:4ea2777bbee00e896c9d5bb3a146f6e2387b3c83e0f9bdfa53aef824010ae14b", + "sha256:519297c0f3bf119958012348354afb2c95cbc61f78b4807ff8d1378199f70a4a", + "sha256:5204365138dbbd50f634cb246a4812f64d3b3054d32825c16c5176cae2171dcf", + "sha256:54af2ca0db82b57c022aa3e3a5ed5fa57ac2b8ab3c005ecefc975a648bf771c6", + "sha256:58385a72663e53d753c8c3131d609f35be841068ac319c507bc49c951333b394", + "sha256:60e3b8e7e38b85df90f2e04eeed592fe1abc71941ce09e57a8956e21f05ce64f", + "sha256:6245916cb73242828ba4f8f44dec3f149b96965848d59d3d66b1f809208dc39a", + "sha256:654f2b1543b9927c4429bd5d66f98d1f47e6eb9a4e56212e1907fb4eea258c5a", + "sha256:661c46b427a1c2a4b4343bda71354a37e897648239f8831d149eb1e7a2bf902c", + "sha256:69a53c1ccf26ad480a277ee3147c7db9284511d79e1aa117855078423798d277", + "sha256:6ba932f17566a21c009dac8167c0289dd2175c219eea3f3f695d043c50989f46", + "sha256:6bff57058fb2c9ffefe4b683a4c61de58346603ef699f768f173a2a0637a0c16", + "sha256:76c452ef0f0241fabbd6943abfccbcb29dc6078ccc1dcef066fe537bb518ad6e", + "sha256:790ad5b36796f521189a609120689840540b3d7a44e64b7bc2007ebab6c96d52", + "sha256:7be70324908f9f4be6c06278cd9be58d8a30b6d25f5eb7522537c5da08819ade", + "sha256:7d38c2f19ec0b8b217678074b5ab56d9f44e075327b1fb0d2aa4a9e2968b27b5", + "sha256:7d51a3cc18b1143f03c135ea34919daf2f87126c5e55b0f2e60e4616f1765f8d", + "sha256:8f9f5f2af653696c29466a94bf0237c64fe21699d9416e0e94ca51863c1ce96a", + "sha256:90fc8c600252072e48c5bc4e0e4d835c440c9c94f69e1d26a672328e17de3ec8", + "sha256:91912106d47b5367704d3e222822750998610a12b1aa9b259eb38bf059aa2383", + "sha256:9a92d5dce96711fad362399c8a569923b488ab108f531157ca381decc7096d7d", + "sha256:9df6bb8ba3c6f2508dc030ed77e489f427d44cf24557a0a8ab2bba3c19af99d6", + "sha256:9e2f7172d7c5fda4222f98247ab8d366a4ef879350b927dfbdefe1dd6dd83ef3", + "sha256:a1b1ae7e64e9d58c0d4ce2278a229f3f7ead5eb2744f90369f3967f3c666f28f", + "sha256:a9e70e018c9620378c95b28de6d597c6cf87cd0b9e9b446444468e7e411b159d", + "sha256:b2e563bdaa9abb028b4a9bca9903aaa463a2bae7b05e7af50326d2c1ba959f8e", + "sha256:b5136535cbf5535090ce99a1f56cfaec43f11f29277faf67241f4bf6f0b578bc", + "sha256:bd8000a32da09cbb717d7434c39bf7421e7d8367a711fe617fa6addee3572740", + "sha256:bfff0dc1d6d470183e8e0e76b798f81a7ccfaef92c409647dbc0fb4d0a01e1d5", + "sha256:c82b72feca25036bda4ea4e355cc06707e61724e970672a987e76bf2b2fc6922", + "sha256:cad06b9d0f76d5d030bcb8b86454e50aae8166b1c507d3d610743abcf8b7881a", + "sha256:cd1a25a4ff4b75b8a91ba42f6a27a5f423d0cb1ae2f29090a99e0a29afdfefdc", + "sha256:de3a153b1b915d8e37ce97c47b90ebad061624ae922bf3c250b1e0c3362c3ade", + "sha256:de6c1d9f7e9d2e65e23774d792054bcc9b995d6fae447b0cf99e7be12926f28b", + "sha256:e097c439f33eecdd85ef11a30158905c8e7d2888a163adbbe9f11c96af1af34b", + "sha256:ec049a17942be3ecfd239a8fe9cf34caaf063ac9c14e700fe59b74528798aedd", + "sha256:ed39ff56c19a4150f412b63e9835354c929f3d8108c586d604cc1c342f0ca34c", + "sha256:f44546131011cbaa76f2a38d87faeec73524efa812d04b7d3edbf6e6e76c4969", + "sha256:f7872acd036f2edf1bbe503ee26dd218216f62a9ab717d9d984a8504234cc484", + "sha256:fdc44007f3f69b8637edc79e8fed0a75a3d4fc08209167aadbe4cf9724469e90", + "sha256:ff959e48103f4c7a65a7fd67c5783d8939ecfbc3d3ad2b726030b0652e781e41" + ], + "markers": "python_version >= '3.7'", + "version": "==0.11.2" + }, "pdfminer.six": { "hashes": [ "sha256:6004da3ad1a7a4d45930cb950393df89b068e73be365a6ff64a838d37bcb08c4", @@ -444,6 +507,13 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", "version": "==1.26.18" }, + "us": { + "hashes": [ + "sha256:e347963e8d24a1ca7437af443fa68591776847b50c8650d8ef0eb53482e705c2" + ], + "index": "pypi", + "version": "==3.1.1" + }, "webencodings": { "hashes": [ "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", @@ -463,11 +533,11 @@ }, "backports.tarfile": { "hashes": [ - "sha256:2688f159c21afd56a07b75f01306f9f52c79aebcc5f4a117fb8fbb4445352c75", - "sha256:bcd36290d9684beb524d3fe74f4a2db056824c47746583f090b8e55daf0776e4" + "sha256:91d59138ea401ee2a95e8b839c1e2f51f3e9ca76bdba8b6a29f8d773564686a8", + "sha256:b2f4df351db942d094db94588bbf2c6938697a5f190f44c934acc697da56008b" ], "markers": "python_version < '3.12'", - "version": "==1.0.0" + "version": "==1.1.0" }, "black": { "hashes": [ @@ -515,6 +585,64 @@ "markers": "python_version >= '3.6'", "version": "==2024.2.2" }, + "cffi": { + "hashes": [ + "sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc", + "sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a", + "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417", + "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab", + "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520", + "sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36", + "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743", + "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8", + "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed", + "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684", + "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56", + "sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324", + "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d", + "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235", + "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e", + "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088", + "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000", + "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7", + "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e", + "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673", + "sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c", + "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe", + "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2", + "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098", + "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8", + "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a", + "sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0", + "sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b", + "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896", + "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e", + "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9", + "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2", + "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b", + "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6", + "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404", + "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f", + "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0", + "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4", + "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc", + "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936", + "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba", + "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872", + "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb", + "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614", + "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1", + "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d", + "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969", + "sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b", + "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4", + "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627", + "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956", + "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357" + ], + "markers": "platform_python_implementation != 'PyPy'", + "version": "==1.16.0" + }, "cfgv": { "hashes": [ "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", @@ -687,6 +815,44 @@ "markers": "python_version >= '3.8'", "version": "==7.4.4" }, + "cryptography": { + "hashes": [ + "sha256:0270572b8bd2c833c3981724b8ee9747b3ec96f699a9665470018594301439ee", + "sha256:111a0d8553afcf8eb02a4fea6ca4f59d48ddb34497aa8706a6cf536f1a5ec576", + "sha256:16a48c23a62a2f4a285699dba2e4ff2d1cff3115b9df052cdd976a18856d8e3d", + "sha256:1b95b98b0d2af784078fa69f637135e3c317091b615cd0905f8b8a087e86fa30", + "sha256:1f71c10d1e88467126f0efd484bd44bca5e14c664ec2ede64c32f20875c0d413", + "sha256:2424ff4c4ac7f6b8177b53c17ed5d8fa74ae5955656867f5a8affaca36a27abb", + "sha256:2bce03af1ce5a5567ab89bd90d11e7bbdff56b8af3acbbec1faded8f44cb06da", + "sha256:329906dcc7b20ff3cad13c069a78124ed8247adcac44b10bea1130e36caae0b4", + "sha256:37dd623507659e08be98eec89323469e8c7b4c1407c85112634ae3dbdb926fdd", + "sha256:3eaafe47ec0d0ffcc9349e1708be2aaea4c6dd4978d76bf6eb0cb2c13636c6fc", + "sha256:5e6275c09d2badf57aea3afa80d975444f4be8d3bc58f7f80d2a484c6f9485c8", + "sha256:6fe07eec95dfd477eb9530aef5bead34fec819b3aaf6c5bd6d20565da607bfe1", + "sha256:7367d7b2eca6513681127ebad53b2582911d1736dc2ffc19f2c3ae49997496bc", + "sha256:7cde5f38e614f55e28d831754e8a3bacf9ace5d1566235e39d91b35502d6936e", + "sha256:9481ffe3cf013b71b2428b905c4f7a9a4f76ec03065b05ff499bb5682a8d9ad8", + "sha256:98d8dc6d012b82287f2c3d26ce1d2dd130ec200c8679b6213b3c73c08b2b7940", + "sha256:a011a644f6d7d03736214d38832e030d8268bcff4a41f728e6030325fea3e400", + "sha256:a2913c5375154b6ef2e91c10b5720ea6e21007412f6437504ffea2109b5a33d7", + "sha256:a30596bae9403a342c978fb47d9b0ee277699fa53bbafad14706af51fe543d16", + "sha256:b03c2ae5d2f0fc05f9a2c0c997e1bc18c8229f392234e8a0194f202169ccd278", + "sha256:b6cd2203306b63e41acdf39aa93b86fb566049aeb6dc489b70e34bcd07adca74", + "sha256:b7ffe927ee6531c78f81aa17e684e2ff617daeba7f189f911065b2ea2d526dec", + "sha256:b8cac287fafc4ad485b8a9b67d0ee80c66bf3574f655d3b97ef2e1082360faf1", + "sha256:ba334e6e4b1d92442b75ddacc615c5476d4ad55cc29b15d590cc6b86efa487e2", + "sha256:ba3e4a42397c25b7ff88cdec6e2a16c2be18720f317506ee25210f6d31925f9c", + "sha256:c41fb5e6a5fe9ebcd58ca3abfeb51dffb5d83d6775405305bfa8715b76521922", + "sha256:cd2030f6650c089aeb304cf093f3244d34745ce0cfcc39f20c6fbfe030102e2a", + "sha256:cd65d75953847815962c84a4654a84850b2bb4aed3f26fadcc1c13892e1e29f6", + "sha256:e4985a790f921508f36f81831817cbc03b102d643b5fcb81cd33df3fa291a1a1", + "sha256:e807b3188f9eb0eaa7bbb579b462c5ace579f1cedb28107ce8b48a9f7ad3679e", + "sha256:f12764b8fffc7a123f641d7d049d382b73f96a34117e0b637b80643169cec8ac", + "sha256:f8837fe1d6ac4a8052a9a8ddab256bc006242696f03368a4009be7ee3075cdb7" + ], + "markers": "python_version >= '3.7'", + "version": "==42.0.5" + }, "distlib": { "hashes": [ "sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784", @@ -704,12 +870,12 @@ }, "exceptiongroup": { "hashes": [ - "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14", - "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68" + "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad", + "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16" ], "index": "pypi", "markers": "python_version >= '3.7'", - "version": "==1.2.0" + "version": "==1.2.1" }, "filelock": { "hashes": [ @@ -730,12 +896,12 @@ }, "flake8-bugbear": { "hashes": [ - "sha256:663ef5de80cd32aacd39d362212983bc4636435a6f83700b4ed35acbd0b7d1b8", - "sha256:f9cb5f2a9e792dd80ff68e89a14c12eed8620af8b41a49d823b7a33064ac9658" + "sha256:58581060a1650f4b11344795db8a4934867d4450486319ece86d7720a9414036", + "sha256:d1a87b8f6ca1ed28772c36515f751ea3709e041d78bca60590a570b9cb802e55" ], "index": "pypi", "markers": "python_full_version >= '3.8.1'", - "version": "==24.2.6" + "version": "==24.4.21" }, "flake8-docstrings": { "hashes": [ @@ -748,11 +914,11 @@ }, "identify": { "hashes": [ - "sha256:10a7ca245cfcd756a554a7288159f72ff105ad233c7c4b9c6f0f4d108f5f6791", - "sha256:c4de0081837b211594f8e877a6b4fad7ca32bbfc1a9307fdd61c28bfe923f13e" + "sha256:37d93f380f4de590500d9dba7db359d0d3da95ffe7f9de1753faa159e71e7dfa", + "sha256:e5e00f54165f9047fbebeb4a560f9acfb8af4c88232be60a488e9b68d122745d" ], "markers": "python_version >= '3.8'", - "version": "==2.5.35" + "version": "==2.5.36" }, "idna": { "hashes": [ @@ -796,11 +962,19 @@ }, "jaraco.functools": { "hashes": [ - "sha256:c279cb24c93d694ef7270f970d499cab4d3813f4e08273f95398651a634f0925", - "sha256:daf276ddf234bea897ef14f43c4e1bf9eefeac7b7a82a4dd69228ac20acff68d" + "sha256:3b24ccb921d6b593bdceb56ce14799204f473976e2a9d4b15b04d0f2c2326664", + "sha256:d33fa765374c0611b52f8b3a795f8900869aa88c84769d4d1746cd68fb28c3e8" ], "markers": "python_version >= '3.8'", - "version": "==4.0.0" + "version": "==4.0.1" + }, + "jeepney": { + "hashes": [ + "sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806", + "sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755" + ], + "markers": "sys_platform == 'linux'", + "version": "==0.8.0" }, "jellyfish": { "hashes": [ @@ -1181,11 +1355,11 @@ }, "pluggy": { "hashes": [ - "sha256:7db9f7b503d67d1c5b95f59773ebb58a8c1c288129a88665838012cfb07b8981", - "sha256:8c85c2876142a764e5b7548e7d9a0e0ddb46f5185161049a79b7e974454223be" + "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", + "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669" ], "markers": "python_version >= '3.8'", - "version": "==1.4.0" + "version": "==1.5.0" }, "pre-commit": { "hashes": [ @@ -1204,6 +1378,14 @@ "markers": "python_version >= '3.8'", "version": "==2.11.1" }, + "pycparser": { + "hashes": [ + "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6", + "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc" + ], + "markers": "python_version >= '3.8'", + "version": "==2.22" + }, "pydocstyle": { "hashes": [ "sha256:118762d452a49d6b05e194ef344a55822987a462831ade91ec5c06fd2169d019", @@ -1343,6 +1525,14 @@ "markers": "python_full_version >= '3.7.0'", "version": "==13.7.1" }, + "secretstorage": { + "hashes": [ + "sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77", + "sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99" + ], + "markers": "sys_platform == 'linux'", + "version": "==3.3.3" + }, "setuptools": { "hashes": [ "sha256:6c1fccdac05a97e598fb0ae3bbed5904ccb317337a51139dcd51453611bbb987", @@ -1460,11 +1650,11 @@ }, "virtualenv": { "hashes": [ - "sha256:961c026ac520bac5f69acb8ea063e8a4f071bcc9457b9c1f28f6b085c511583a", - "sha256:e08e13ecdca7a0bd53798f356d5831434afa5b07b93f0abdf0797b7a06ffe197" + "sha256:7bb554bbdfeaacc3349fa614ea5bff6ac300fc7c335e9facf3a3bcfc703f45be", + "sha256:8aac4332f2ea6ef519c648d0bc48a5b1d324994753519919bddbb1aff25a104e" ], "markers": "python_version >= '3.7'", - "version": "==20.25.1" + "version": "==20.25.3" }, "wrapt": { "hashes": [ diff --git a/clean/ca/orange_county_sheriff.py b/clean/ca/orange_county_sheriff.py new file mode 100644 index 00000000..33dc9957 --- /dev/null +++ b/clean/ca/orange_county_sheriff.py @@ -0,0 +1,93 @@ +import time +from pathlib import Path +from typing import List + +from bs4 import BeautifulSoup + +from .. import utils +from ..cache import Cache + + +class Site: + name = "Orange County Sheriffs Department" + + def __init__(self, data_dir=utils.CLEAN_DATA_DIR, cache_dir=utils.CLEAN_CACHE_DIR): + self.base_url = "https://www.ocsheriff.gov" + self.disclosure_url = ( + f"{self.base_url}/about-ocsheriff/peace-officer-records-releases" + ) + self.data_dir = data_dir + self.cache_dir = cache_dir + self.cache = Cache(cache_dir) + + @property + def agency_slug(self) -> str: + """Construct the agency slug.""" + mod = Path(__file__) + state_postal = mod.parent.stem + return f"{state_postal}_{mod.stem}" # ca_orange_county_sheriff + + def scrape_meta(self, throttle: int = 0) -> Path: + self._download_index_pages(self.disclosure_url) + downloadable_files = self._create_json() + return downloadable_files + + def scrape(self, throttle: int = 0, filter: str = "") -> List[Path]: + metadata = self.cache.read_json( + self.data_dir.joinpath(f"{self.agency_slug}.json") + ) + downloaded_assets = [] + for asset in metadata: + url = asset["asset_url"] + if filter and filter not in url: + continue + index_dir = ( + asset["parent_page"].split(f"{self.agency_slug}/")[-1].rstrip(".html") + ) + asset_name = asset["name"].replace(" ", "_") + download_path = Path(self.agency_slug, "assets", index_dir, asset_name) + time.sleep(throttle) + downloaded_assets.append(self.cache.download(str(download_path), url)) + return downloaded_assets + + def _create_json(self) -> Path: + metadata = [] + file_stem = self.disclosure_url.split("/")[-1] + html_location = f"{self.agency_slug}/{file_stem}.html" + html = self.cache.read(html_location) + soup = BeautifulSoup(html, "html.parser") # type: ignore + title = soup.find("title").text.strip() # type: ignore + links = soup.article.find_all("a") # type: ignore + urls = [] + name = [] + for link in links: + if "http" in link["href"]: + urls.append(link["href"]) + for url in urls: + url_to_name = url.split("Mediazip/")[-1] + url_to_name1 = url_to_name.replace("/", "_") + url_to_name2 = url_to_name1.replace( + f"{url_to_name1}", f"Orange_County_Sheriffs_Department_{url_to_name1}" + ) + url_to_name3 = url_to_name2.replace("%20", "_") + url_to_name4 = url_to_name3.strip() + url_to_name5 = url_to_name4.replace(".", "_") + url_to_name6 = url_to_name5.replace("_zip", ".zip") + name.append(url_to_name6) + url_dict = {name[i]: urls[i] for i in range(len(urls))} + for key, value in url_dict.items(): + payload = { + "title": title, + "parent_page": html_location, + "asset_url": value, + "name": key, + } + metadata.append(payload) + outfile = self.data_dir.joinpath(f"{self.agency_slug}.json") + self.cache.write_json(outfile, metadata) + return outfile + + def _download_index_pages(self, url: str) -> Path: + file_stem = url.split("/")[-1] + base_file = f"{self.agency_slug}/{file_stem}.html" + return self.cache.download(base_file, url, "utf-8") diff --git a/setup.cfg b/setup.cfg index d9b995f0..84177b91 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,4 +2,4 @@ test=pytest [flake8] -extend-ignore = B006,D100,D103,D104,E203,E501 +extend-ignore = B006,D100,D101,D102,D103,D104,D107,E203,E501