Skip to content

Commit

Permalink
[BUG] Bound tokenizers, patch CVP test (#3211)
Browse files Browse the repository at this point in the history
## Description of changes

*Summarize the changes made by this PR.*
 - Improvements & Bug fixes
   - Bounds tokenizers as in #3202 
- Includes a change to CVP test to install an old version of a dep in
case of b/w incompat like this has.
 - New functionality
   - None

## Test plan
*How are these changes tested?*
- [x] Tests pass locally with `pytest` for python, `yarn test` for js,
`cargo test` for rust

## Documentation Changes
None
  • Loading branch information
HammadB authored Nov 27, 2024
1 parent f36c386 commit 3800d70
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 23 deletions.
13 changes: 11 additions & 2 deletions chromadb/test/client/test_http_client_v1_compatability.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import multiprocessing
from unittest.mock import patch

import sys as pysys
from multiprocessing.connection import Connection

from chromadb.config import System
Expand Down Expand Up @@ -41,7 +41,16 @@ def test_http_client_bw_compatibility() -> None:
port = sys.settings.chroma_server_http_port

old_version = "0.5.11" # Module with known v1 client
install_version(old_version)

# Version <3.9 requires bounding tokenizers<=0.20.3
# TOOD(hammadb): This code is duplicated in test_cross_version_persist.py
# for expediency on 11/27/2024 I am copy pasting rather than refactoring
# to DRY. Refactor later.
(major, minor, _) = pysys.version_info[:3]
if major == 3 and minor < 9:
install_version(old_version, {"tokenizers": "<=0.20.3"})
else:
install_version(old_version, {})

ctx = multiprocessing.get_context("spawn")
conn1, conn2 = multiprocessing.Pipe()
Expand Down
12 changes: 10 additions & 2 deletions chromadb/test/property/test_cross_version_persist.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import chromadb.test.property.invariants as invariants
from packaging import version as packaging_version
import re
import sys
import multiprocessing
from chromadb.config import Settings
from chromadb.api.client import Client as ClientCreator
Expand All @@ -38,7 +39,7 @@
version_re = re.compile(r"^[0-9]+\.[0-9]+\.[0-9]+$")

# Some modules do not work across versions, since we upgrade our support for them, and should be explicitly reimported in the subprocess
VERSIONED_MODULES = ["pydantic", "numpy"]
VERSIONED_MODULES = ["pydantic", "numpy", "tokenizers"]


def versions() -> List[str]:
Expand Down Expand Up @@ -148,7 +149,14 @@ def configurations(versions: List[str]) -> List[Tuple[str, Settings]]:
def version_settings(request) -> Generator[Tuple[str, Settings], None, None]:
configuration = request.param
version = configuration[0]
install_version(version)

# Version <3.9 requires bounding tokenizers<=0.20.3
(major, minor, patch) = sys.version_info[:3]
if major == 3 and minor < 9:
install_version(version, {"tokenizers": "<=0.20.3"})
else:
install_version(version, {})

yield configuration
# Cleanup the installed version
path = get_path_to_version_install(version)
Expand Down
31 changes: 14 additions & 17 deletions chromadb/test/utils/cross_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os
import tempfile
from types import ModuleType
from typing import List
from typing import Dict, List

base_install_dir = tempfile.gettempdir() + "/persistence_test_chromadb_versions"

Expand Down Expand Up @@ -38,16 +38,16 @@ def get_path_to_version_library(version: str) -> str:
return get_path_to_version_install(version) + "/chromadb/__init__.py"


def install_version(version: str) -> None:
def install_version(version: str, dep_overrides: Dict[str, str]) -> None:
# Check if already installed
version_library = get_path_to_version_library(version)
if os.path.exists(version_library):
return
path = get_path_to_version_install(version)
install(f"chromadb=={version}", path)
install(f"chromadb=={version}", path, dep_overrides)


def install(pkg: str, path: str) -> int:
def install(pkg: str, path: str, dep_overrides: Dict[str, str]) -> int:
# -q -q to suppress pip output to ERROR level
# https://pip.pypa.io/en/stable/cli/pip/#quiet
print("Purging pip cache")
Expand All @@ -60,17 +60,14 @@ def install(pkg: str, path: str) -> int:
"purge",
]
)

command = [sys.executable, "-m", "pip", "-q", "-q", "install", pkg]

for dep, operator_version in dep_overrides.items():
command.append(f"{dep}{operator_version}")

command.append("--no-binary=chroma-hnswlib")
command.append(f"--target={path}")

print(f"Installing chromadb version {pkg} to {path}")
return subprocess.check_call(
[
sys.executable,
"-m",
"pip",
"-q",
"-q",
"install",
pkg,
"--no-binary=chroma-hnswlib",
"--target={}".format(path),
]
)
return subprocess.check_call(command)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ dependencies = [
'opentelemetry-exporter-otlp-proto-grpc>=1.2.0',
'opentelemetry-instrumentation-fastapi>=0.41b0',
'opentelemetry-sdk>=1.2.0',
'tokenizers >= 0.13.2',
'tokenizers >= 0.13.2, <= 0.20.3',
'pypika >= 0.48.9',
'tqdm >= 4.65.0',
'overrides >= 7.3.1',
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ pypika>=0.48.9
PyYAML>=6.0.0
rich>=10.11.0
tenacity>=8.2.3
tokenizers>=0.13.2
tokenizers>=0.13.2,<=0.20.3
tqdm>=4.65.0
typer>=0.9.0
typing_extensions>=4.5.0
Expand Down

0 comments on commit 3800d70

Please sign in to comment.