From 09635c0f8614d894fa826b4cb175c1acbece5a0a Mon Sep 17 00:00:00 2001
From: Patrick Jentsch <patrickjentsch@gmx.net>
Date: Mon, 23 Oct 2023 15:04:35 +0200
Subject: [PATCH] Create sqids package, move constants to a module

---
 pyproject.toml                 |   5 +-
 sqids/__init__.py              |   1 +
 sqids.py => sqids/constants.py | 173 +--------------------------------
 sqids/sqids.py                 | 170 ++++++++++++++++++++++++++++++++
 tests/test_minlength.py        |   3 +-
 5 files changed, 178 insertions(+), 174 deletions(-)
 create mode 100644 sqids/__init__.py
 rename sqids.py => sqids/constants.py (61%)
 create mode 100644 sqids/sqids.py

diff --git a/pyproject.toml b/pyproject.toml
index 5dc90db..b812346 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,4 +20,7 @@ Homepage = "https://sqids.org/python"
 
 [build-system]
 requires = ["setuptools", "setuptools-scm"]
-build-backend = "setuptools.build_meta"
\ No newline at end of file
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools]
+packages = ["sqids"]
diff --git a/sqids/__init__.py b/sqids/__init__.py
new file mode 100644
index 0000000..19e939d
--- /dev/null
+++ b/sqids/__init__.py
@@ -0,0 +1 @@
+from .sqids import Sqids
diff --git a/sqids.py b/sqids/constants.py
similarity index 61%
rename from sqids.py
rename to sqids/constants.py
index 93598dd..601ffb2 100644
--- a/sqids.py
+++ b/sqids/constants.py
@@ -1,9 +1,4 @@
-from typing import List, Set
-import sys
-
-
 DEFAULT_ALPHABET = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
-DEFAULT_MIN_LENGTH = 0
 DEFAULT_BLOCKLIST = [
     "0rgasm",
     "1d10t",
@@ -566,170 +561,4 @@
     "zocco1a",
     "zoccola",
 ]
-
-
-class Sqids:
-    def __init__(
-        self,
-        alphabet: str = DEFAULT_ALPHABET,
-        min_length: int = DEFAULT_MIN_LENGTH,
-        blocklist: List[str] = DEFAULT_BLOCKLIST,
-    ):
-        for char in alphabet:
-            if ord(char) > 127:
-                raise ValueError("Alphabet cannot contain multibyte characters")
-
-        if len(alphabet) < 3:
-            raise ValueError("Alphabet length must be at least 3")
-
-        if len(set(alphabet)) != len(alphabet):
-            raise ValueError("Alphabet must contain unique characters")
-
-        if not isinstance(min_length, int):
-            raise TypeError("Minimum length must be an int")
-
-        MIN_LENGTH_LIMIT = 255
-        if min_length < 0 or min_length > MIN_LENGTH_LIMIT:
-            raise ValueError(
-                f"Minimum length has to be between 0 and {MIN_LENGTH_LIMIT}"
-            )
-
-        filtered_blocklist: Set[str] = set()
-        alphabet_lower = alphabet.lower()
-        for word_lower in (w.lower() for w in blocklist if len(w) >= 3):
-            intersection = [c for c in word_lower if c in alphabet_lower]
-            if len(intersection) == len(word_lower):
-                filtered_blocklist.add(word_lower)
-
-        self.__alphabet = self.__shuffle(alphabet)
-        self.__min_length = min_length
-        self.__blocklist = filtered_blocklist
-
-    def encode(self, numbers: List[int]) -> str:
-        if not numbers:
-            return ""
-
-        in_range_numbers = [n for n in numbers if 0 <= n <= sys.maxsize]
-        if len(in_range_numbers) != len(numbers):
-            raise ValueError(f"Encoding supports numbers between 0 and {sys.maxsize}")
-
-        return self.__encode_numbers(numbers, 0)
-
-    def __encode_numbers(self, numbers: List[int], increment: int = 0) -> str:
-        if increment > len(self.__alphabet):
-            raise ValueError("Reached max attempts to re-generate the ID")
-
-        offset = sum(
-            (
-                ord(self.__alphabet[v % len(self.__alphabet)]) + i
-                for i, v in enumerate(numbers)
-            ),
-            start=len(numbers),
-        ) % len(self.__alphabet)
-        offset = (offset + increment) % len(self.__alphabet)
-        alphabet = self.__alphabet[offset:] + self.__alphabet[:offset]
-        prefix = alphabet[0]
-        alphabet = alphabet[::-1]
-
-        ret = [prefix]
-
-        for i, num in enumerate(numbers):
-            ret.append(self.__to_id(num, alphabet[1:]))
-
-            if i >= len(numbers) - 1:
-                continue
-
-            ret.append(alphabet[0])
-            alphabet = self.__shuffle(alphabet)
-
-        id_ = "".join(ret)
-
-        if self.__min_length > len(id_):
-            id_ += alphabet[0]
-
-            while self.__min_length - len(id_) > 0:
-                alphabet = self.__shuffle(alphabet)
-                id_ += alphabet[: min(self.__min_length - len(id_), len(alphabet))]
-
-        if self.__is_blocked_id(id_):
-            id_ = self.__encode_numbers(numbers, increment + 1)
-
-        return id_
-
-    def decode(self, id_: str) -> List[int]:
-        ret: List[int] = []
-
-        if not id_:
-            return ret
-
-        alphabet_chars = list(self.__alphabet)
-        if any(c not in alphabet_chars for c in id_):
-            return ret
-
-        prefix = id_[0]
-        offset = self.__alphabet.index(prefix)
-        alphabet = self.__alphabet[offset:] + self.__alphabet[:offset]
-        alphabet = alphabet[::-1]
-        id_ = id_[1:]
-
-        while id_:
-            separator = alphabet[0]
-            chunks = id_.split(separator)
-            if chunks:
-                if not chunks[0]:
-                    return ret
-
-                ret.append(self.__to_number(chunks[0], alphabet[1:]))
-                if len(chunks) > 1:
-                    alphabet = self.__shuffle(alphabet)
-
-            id_ = separator.join(chunks[1:])
-
-        return ret
-
-    def __shuffle(self, alphabet: str) -> str:
-        chars = list(alphabet)
-
-        i = 0
-        j = len(chars) - 1
-        while j > 0:
-            r = (i * j + ord(chars[i]) + ord(chars[j])) % len(chars)
-            chars[i], chars[r] = chars[r], chars[i]
-            i += 1
-            j -= 1
-
-        return "".join(chars)
-
-    def __to_id(self, num: int, alphabet: str) -> str:
-        id_chars: List[str] = []
-        chars = list(alphabet)
-        result = num
-
-        while True:
-            id_chars.insert(0, chars[result % len(chars)])
-            result = result // len(chars)
-            if result == 0:
-                break
-
-        return "".join(id_chars)
-
-    def __to_number(self, id_: str, alphabet: str) -> int:
-        chars = list(alphabet)
-        return sum(chars.index(c) * (len(chars) ** i) for i, c in enumerate(id_[::-1]))
-
-    def __is_blocked_id(self, id_: str) -> bool:
-        id_ = id_.lower()
-
-        for word in self.__blocklist:
-            if len(word) > len(id_):
-                continue
-            if len(id_) <= 3 or len(word) <= 3:
-                if id_ == word:
-                    return True
-            elif any(c.isdigit() for c in word):
-                if id_.startswith(word) or id_.endswith(word):
-                    return True
-            elif word in id_:
-                return True
-
-        return False
+DEFAULT_MIN_LENGTH = 0
diff --git a/sqids/sqids.py b/sqids/sqids.py
new file mode 100644
index 0000000..2f314fd
--- /dev/null
+++ b/sqids/sqids.py
@@ -0,0 +1,170 @@
+from typing import List, Set
+import sys
+from .constants import DEFAULT_ALPHABET, DEFAULT_BLOCKLIST, DEFAULT_MIN_LENGTH
+
+
+class Sqids:
+    def __init__(
+        self,
+        alphabet: str = DEFAULT_ALPHABET,
+        min_length: int = DEFAULT_MIN_LENGTH,
+        blocklist: List[str] = DEFAULT_BLOCKLIST,
+    ):
+        for char in alphabet:
+            if ord(char) > 127:
+                raise ValueError("Alphabet cannot contain multibyte characters")
+
+        if len(alphabet) < 3:
+            raise ValueError("Alphabet length must be at least 3")
+
+        if len(set(alphabet)) != len(alphabet):
+            raise ValueError("Alphabet must contain unique characters")
+
+        if not isinstance(min_length, int):
+            raise TypeError("Minimum length must be an int")
+
+        MIN_LENGTH_LIMIT = 255
+        if min_length < 0 or min_length > MIN_LENGTH_LIMIT:
+            raise ValueError(
+                f"Minimum length has to be between 0 and {MIN_LENGTH_LIMIT}"
+            )
+
+        filtered_blocklist: Set[str] = set()
+        alphabet_lower = alphabet.lower()
+        for word_lower in (w.lower() for w in blocklist if len(w) >= 3):
+            intersection = [c for c in word_lower if c in alphabet_lower]
+            if len(intersection) == len(word_lower):
+                filtered_blocklist.add(word_lower)
+
+        self.__alphabet = self.__shuffle(alphabet)
+        self.__min_length = min_length
+        self.__blocklist = filtered_blocklist
+
+    def encode(self, numbers: List[int]) -> str:
+        if not numbers:
+            return ""
+
+        in_range_numbers = [n for n in numbers if 0 <= n <= sys.maxsize]
+        if len(in_range_numbers) != len(numbers):
+            raise ValueError(f"Encoding supports numbers between 0 and {sys.maxsize}")
+
+        return self.__encode_numbers(numbers, 0)
+
+    def __encode_numbers(self, numbers: List[int], increment: int = 0) -> str:
+        if increment > len(self.__alphabet):
+            raise ValueError("Reached max attempts to re-generate the ID")
+
+        offset = sum(
+            (
+                ord(self.__alphabet[v % len(self.__alphabet)]) + i
+                for i, v in enumerate(numbers)
+            ),
+            start=len(numbers),
+        ) % len(self.__alphabet)
+        offset = (offset + increment) % len(self.__alphabet)
+        alphabet = self.__alphabet[offset:] + self.__alphabet[:offset]
+        prefix = alphabet[0]
+        alphabet = alphabet[::-1]
+
+        ret = [prefix]
+
+        for i, num in enumerate(numbers):
+            ret.append(self.__to_id(num, alphabet[1:]))
+
+            if i >= len(numbers) - 1:
+                continue
+
+            ret.append(alphabet[0])
+            alphabet = self.__shuffle(alphabet)
+
+        id_ = "".join(ret)
+
+        if self.__min_length > len(id_):
+            id_ += alphabet[0]
+
+            while self.__min_length - len(id_) > 0:
+                alphabet = self.__shuffle(alphabet)
+                id_ += alphabet[: min(self.__min_length - len(id_), len(alphabet))]
+
+        if self.__is_blocked_id(id_):
+            id_ = self.__encode_numbers(numbers, increment + 1)
+
+        return id_
+
+    def decode(self, id_: str) -> List[int]:
+        ret: List[int] = []
+
+        if not id_:
+            return ret
+
+        alphabet_chars = list(self.__alphabet)
+        if any(c not in alphabet_chars for c in id_):
+            return ret
+
+        prefix = id_[0]
+        offset = self.__alphabet.index(prefix)
+        alphabet = self.__alphabet[offset:] + self.__alphabet[:offset]
+        alphabet = alphabet[::-1]
+        id_ = id_[1:]
+
+        while id_:
+            separator = alphabet[0]
+            chunks = id_.split(separator)
+            if chunks:
+                if not chunks[0]:
+                    return ret
+
+                ret.append(self.__to_number(chunks[0], alphabet[1:]))
+                if len(chunks) > 1:
+                    alphabet = self.__shuffle(alphabet)
+
+            id_ = separator.join(chunks[1:])
+
+        return ret
+
+    def __shuffle(self, alphabet: str) -> str:
+        chars = list(alphabet)
+
+        i = 0
+        j = len(chars) - 1
+        while j > 0:
+            r = (i * j + ord(chars[i]) + ord(chars[j])) % len(chars)
+            chars[i], chars[r] = chars[r], chars[i]
+            i += 1
+            j -= 1
+
+        return "".join(chars)
+
+    def __to_id(self, num: int, alphabet: str) -> str:
+        id_chars: List[str] = []
+        chars = list(alphabet)
+        result = num
+
+        while True:
+            id_chars.insert(0, chars[result % len(chars)])
+            result = result // len(chars)
+            if result == 0:
+                break
+
+        return "".join(id_chars)
+
+    def __to_number(self, id_: str, alphabet: str) -> int:
+        chars = list(alphabet)
+        return sum(chars.index(c) * (len(chars) ** i) for i, c in enumerate(id_[::-1]))
+
+    def __is_blocked_id(self, id_: str) -> bool:
+        id_ = id_.lower()
+
+        for word in self.__blocklist:
+            if len(word) > len(id_):
+                continue
+            if len(id_) <= 3 or len(word) <= 3:
+                if id_ == word:
+                    return True
+            elif any(c.isdigit() for c in word):
+                if id_.startswith(word) or id_.endswith(word):
+                    return True
+            elif word in id_:
+                return True
+
+        return False
diff --git a/tests/test_minlength.py b/tests/test_minlength.py
index b0bec93..474170d 100644
--- a/tests/test_minlength.py
+++ b/tests/test_minlength.py
@@ -1,6 +1,7 @@
 import pytest
 import sys
-from sqids import Sqids, DEFAULT_ALPHABET
+from sqids import Sqids
+from sqids.constants import DEFAULT_ALPHABET
 
 
 def test_simple():