From 09635c0f8614d894fa826b4cb175c1acbece5a0a Mon Sep 17 00:00:00 2001 From: Patrick Jentsch Date: Mon, 23 Oct 2023 15:04:35 +0200 Subject: [PATCH] Create sqids package, move constants to a module --- pyproject.toml | 5 +- sqids/__init__.py | 1 + sqids.py => sqids/constants.py | 173 +-------------------------------- sqids/sqids.py | 170 ++++++++++++++++++++++++++++++++ tests/test_minlength.py | 3 +- 5 files changed, 178 insertions(+), 174 deletions(-) create mode 100644 sqids/__init__.py rename sqids.py => sqids/constants.py (61%) create mode 100644 sqids/sqids.py diff --git a/pyproject.toml b/pyproject.toml index 5dc90db..b812346 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,4 +20,7 @@ Homepage = "https://sqids.org/python" [build-system] requires = ["setuptools", "setuptools-scm"] -build-backend = "setuptools.build_meta" \ No newline at end of file +build-backend = "setuptools.build_meta" + +[tool.setuptools] +packages = ["sqids"] diff --git a/sqids/__init__.py b/sqids/__init__.py new file mode 100644 index 0000000..19e939d --- /dev/null +++ b/sqids/__init__.py @@ -0,0 +1 @@ +from .sqids import Sqids diff --git a/sqids.py b/sqids/constants.py similarity index 61% rename from sqids.py rename to sqids/constants.py index 93598dd..601ffb2 100644 --- a/sqids.py +++ b/sqids/constants.py @@ -1,9 +1,4 @@ -from typing import List, Set -import sys - - DEFAULT_ALPHABET = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" -DEFAULT_MIN_LENGTH = 0 DEFAULT_BLOCKLIST = [ "0rgasm", "1d10t", @@ -566,170 +561,4 @@ "zocco1a", "zoccola", ] - - -class Sqids: - def __init__( - self, - alphabet: str = DEFAULT_ALPHABET, - min_length: int = DEFAULT_MIN_LENGTH, - blocklist: List[str] = DEFAULT_BLOCKLIST, - ): - for char in alphabet: - if ord(char) > 127: - raise ValueError("Alphabet cannot contain multibyte characters") - - if len(alphabet) < 3: - raise ValueError("Alphabet length must be at least 3") - - if len(set(alphabet)) != len(alphabet): - raise ValueError("Alphabet must contain unique characters") - - if not isinstance(min_length, int): - raise TypeError("Minimum length must be an int") - - MIN_LENGTH_LIMIT = 255 - if min_length < 0 or min_length > MIN_LENGTH_LIMIT: - raise ValueError( - f"Minimum length has to be between 0 and {MIN_LENGTH_LIMIT}" - ) - - filtered_blocklist: Set[str] = set() - alphabet_lower = alphabet.lower() - for word_lower in (w.lower() for w in blocklist if len(w) >= 3): - intersection = [c for c in word_lower if c in alphabet_lower] - if len(intersection) == len(word_lower): - filtered_blocklist.add(word_lower) - - self.__alphabet = self.__shuffle(alphabet) - self.__min_length = min_length - self.__blocklist = filtered_blocklist - - def encode(self, numbers: List[int]) -> str: - if not numbers: - return "" - - in_range_numbers = [n for n in numbers if 0 <= n <= sys.maxsize] - if len(in_range_numbers) != len(numbers): - raise ValueError(f"Encoding supports numbers between 0 and {sys.maxsize}") - - return self.__encode_numbers(numbers, 0) - - def __encode_numbers(self, numbers: List[int], increment: int = 0) -> str: - if increment > len(self.__alphabet): - raise ValueError("Reached max attempts to re-generate the ID") - - offset = sum( - ( - ord(self.__alphabet[v % len(self.__alphabet)]) + i - for i, v in enumerate(numbers) - ), - start=len(numbers), - ) % len(self.__alphabet) - offset = (offset + increment) % len(self.__alphabet) - alphabet = self.__alphabet[offset:] + self.__alphabet[:offset] - prefix = alphabet[0] - alphabet = alphabet[::-1] - - ret = [prefix] - - for i, num in enumerate(numbers): - ret.append(self.__to_id(num, alphabet[1:])) - - if i >= len(numbers) - 1: - continue - - ret.append(alphabet[0]) - alphabet = self.__shuffle(alphabet) - - id_ = "".join(ret) - - if self.__min_length > len(id_): - id_ += alphabet[0] - - while self.__min_length - len(id_) > 0: - alphabet = self.__shuffle(alphabet) - id_ += alphabet[: min(self.__min_length - len(id_), len(alphabet))] - - if self.__is_blocked_id(id_): - id_ = self.__encode_numbers(numbers, increment + 1) - - return id_ - - def decode(self, id_: str) -> List[int]: - ret: List[int] = [] - - if not id_: - return ret - - alphabet_chars = list(self.__alphabet) - if any(c not in alphabet_chars for c in id_): - return ret - - prefix = id_[0] - offset = self.__alphabet.index(prefix) - alphabet = self.__alphabet[offset:] + self.__alphabet[:offset] - alphabet = alphabet[::-1] - id_ = id_[1:] - - while id_: - separator = alphabet[0] - chunks = id_.split(separator) - if chunks: - if not chunks[0]: - return ret - - ret.append(self.__to_number(chunks[0], alphabet[1:])) - if len(chunks) > 1: - alphabet = self.__shuffle(alphabet) - - id_ = separator.join(chunks[1:]) - - return ret - - def __shuffle(self, alphabet: str) -> str: - chars = list(alphabet) - - i = 0 - j = len(chars) - 1 - while j > 0: - r = (i * j + ord(chars[i]) + ord(chars[j])) % len(chars) - chars[i], chars[r] = chars[r], chars[i] - i += 1 - j -= 1 - - return "".join(chars) - - def __to_id(self, num: int, alphabet: str) -> str: - id_chars: List[str] = [] - chars = list(alphabet) - result = num - - while True: - id_chars.insert(0, chars[result % len(chars)]) - result = result // len(chars) - if result == 0: - break - - return "".join(id_chars) - - def __to_number(self, id_: str, alphabet: str) -> int: - chars = list(alphabet) - return sum(chars.index(c) * (len(chars) ** i) for i, c in enumerate(id_[::-1])) - - def __is_blocked_id(self, id_: str) -> bool: - id_ = id_.lower() - - for word in self.__blocklist: - if len(word) > len(id_): - continue - if len(id_) <= 3 or len(word) <= 3: - if id_ == word: - return True - elif any(c.isdigit() for c in word): - if id_.startswith(word) or id_.endswith(word): - return True - elif word in id_: - return True - - return False +DEFAULT_MIN_LENGTH = 0 diff --git a/sqids/sqids.py b/sqids/sqids.py new file mode 100644 index 0000000..2f314fd --- /dev/null +++ b/sqids/sqids.py @@ -0,0 +1,170 @@ +from typing import List, Set +import sys +from .constants import DEFAULT_ALPHABET, DEFAULT_BLOCKLIST, DEFAULT_MIN_LENGTH + + +class Sqids: + def __init__( + self, + alphabet: str = DEFAULT_ALPHABET, + min_length: int = DEFAULT_MIN_LENGTH, + blocklist: List[str] = DEFAULT_BLOCKLIST, + ): + for char in alphabet: + if ord(char) > 127: + raise ValueError("Alphabet cannot contain multibyte characters") + + if len(alphabet) < 3: + raise ValueError("Alphabet length must be at least 3") + + if len(set(alphabet)) != len(alphabet): + raise ValueError("Alphabet must contain unique characters") + + if not isinstance(min_length, int): + raise TypeError("Minimum length must be an int") + + MIN_LENGTH_LIMIT = 255 + if min_length < 0 or min_length > MIN_LENGTH_LIMIT: + raise ValueError( + f"Minimum length has to be between 0 and {MIN_LENGTH_LIMIT}" + ) + + filtered_blocklist: Set[str] = set() + alphabet_lower = alphabet.lower() + for word_lower in (w.lower() for w in blocklist if len(w) >= 3): + intersection = [c for c in word_lower if c in alphabet_lower] + if len(intersection) == len(word_lower): + filtered_blocklist.add(word_lower) + + self.__alphabet = self.__shuffle(alphabet) + self.__min_length = min_length + self.__blocklist = filtered_blocklist + + def encode(self, numbers: List[int]) -> str: + if not numbers: + return "" + + in_range_numbers = [n for n in numbers if 0 <= n <= sys.maxsize] + if len(in_range_numbers) != len(numbers): + raise ValueError(f"Encoding supports numbers between 0 and {sys.maxsize}") + + return self.__encode_numbers(numbers, 0) + + def __encode_numbers(self, numbers: List[int], increment: int = 0) -> str: + if increment > len(self.__alphabet): + raise ValueError("Reached max attempts to re-generate the ID") + + offset = sum( + ( + ord(self.__alphabet[v % len(self.__alphabet)]) + i + for i, v in enumerate(numbers) + ), + start=len(numbers), + ) % len(self.__alphabet) + offset = (offset + increment) % len(self.__alphabet) + alphabet = self.__alphabet[offset:] + self.__alphabet[:offset] + prefix = alphabet[0] + alphabet = alphabet[::-1] + + ret = [prefix] + + for i, num in enumerate(numbers): + ret.append(self.__to_id(num, alphabet[1:])) + + if i >= len(numbers) - 1: + continue + + ret.append(alphabet[0]) + alphabet = self.__shuffle(alphabet) + + id_ = "".join(ret) + + if self.__min_length > len(id_): + id_ += alphabet[0] + + while self.__min_length - len(id_) > 0: + alphabet = self.__shuffle(alphabet) + id_ += alphabet[: min(self.__min_length - len(id_), len(alphabet))] + + if self.__is_blocked_id(id_): + id_ = self.__encode_numbers(numbers, increment + 1) + + return id_ + + def decode(self, id_: str) -> List[int]: + ret: List[int] = [] + + if not id_: + return ret + + alphabet_chars = list(self.__alphabet) + if any(c not in alphabet_chars for c in id_): + return ret + + prefix = id_[0] + offset = self.__alphabet.index(prefix) + alphabet = self.__alphabet[offset:] + self.__alphabet[:offset] + alphabet = alphabet[::-1] + id_ = id_[1:] + + while id_: + separator = alphabet[0] + chunks = id_.split(separator) + if chunks: + if not chunks[0]: + return ret + + ret.append(self.__to_number(chunks[0], alphabet[1:])) + if len(chunks) > 1: + alphabet = self.__shuffle(alphabet) + + id_ = separator.join(chunks[1:]) + + return ret + + def __shuffle(self, alphabet: str) -> str: + chars = list(alphabet) + + i = 0 + j = len(chars) - 1 + while j > 0: + r = (i * j + ord(chars[i]) + ord(chars[j])) % len(chars) + chars[i], chars[r] = chars[r], chars[i] + i += 1 + j -= 1 + + return "".join(chars) + + def __to_id(self, num: int, alphabet: str) -> str: + id_chars: List[str] = [] + chars = list(alphabet) + result = num + + while True: + id_chars.insert(0, chars[result % len(chars)]) + result = result // len(chars) + if result == 0: + break + + return "".join(id_chars) + + def __to_number(self, id_: str, alphabet: str) -> int: + chars = list(alphabet) + return sum(chars.index(c) * (len(chars) ** i) for i, c in enumerate(id_[::-1])) + + def __is_blocked_id(self, id_: str) -> bool: + id_ = id_.lower() + + for word in self.__blocklist: + if len(word) > len(id_): + continue + if len(id_) <= 3 or len(word) <= 3: + if id_ == word: + return True + elif any(c.isdigit() for c in word): + if id_.startswith(word) or id_.endswith(word): + return True + elif word in id_: + return True + + return False diff --git a/tests/test_minlength.py b/tests/test_minlength.py index b0bec93..474170d 100644 --- a/tests/test_minlength.py +++ b/tests/test_minlength.py @@ -1,6 +1,7 @@ import pytest import sys -from sqids import Sqids, DEFAULT_ALPHABET +from sqids import Sqids +from sqids.constants import DEFAULT_ALPHABET def test_simple():