diff --git a/README.md b/README.md index b15754d..f9aa7f2 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,10 @@ The data is stored in sqlite3 file including all binary indices. chops collection snapshot /path/to/persist_dir --collection -o /path/to/snapshot.sqlite3 ``` +Additional options: + +- `--yes` (`-y`) - skip confirmation prompt (default: `False`, prompt will be shown) + **Go:** > [!NOTE] @@ -208,6 +212,10 @@ This command cleans up orphanated HNSW segment subdirectories. chops db clean /path/to/persist_dir ``` +Additional options: + +- `--yes` (`-y`) - skip confirmation prompt (default: `False`, prompt will be shown) + **Go:** @@ -325,6 +333,10 @@ This command rebuilds the full-text search index. chops fts rebuild /path/to/persist_dir ``` +Additional options: + +- `--yes` (`-y`) - skip confirmation prompt (default: `False`, prompt will be shown) + Change the tokenizer to `unicode61` by passing `--tokenizer unicode61` (or `-t unicode61`) option. ```bash diff --git a/chroma_ops/fts.py b/chroma_ops/fts.py index f969b30..d00a29d 100644 --- a/chroma_ops/fts.py +++ b/chroma_ops/fts.py @@ -1,4 +1,4 @@ -import sys +from typing import Optional import chromadb import typer @@ -11,6 +11,8 @@ read_script, ) +from rich.console import Console + fts_commands = typer.Typer(no_args_is_help=True) @@ -28,31 +30,38 @@ def validate_tokenizer(tokenizer: str) -> None: ) -def rebuild_fts(persist_dir: str, tokenizer: str = DEFAULT_TOKENIZER) -> None: +def rebuild_fts( + persist_dir: str, + tokenizer: str = DEFAULT_TOKENIZER, + yes: Optional[bool] = False, +) -> None: validate_chroma_persist_dir(persist_dir) validate_tokenizer(tokenizer) + console = Console() + if not yes: + if not typer.confirm( + f"\nAre you sure you want to rebuild the FTS index in {persist_dir}? This action will drop the existing FTS index and create a new one.", + default=False, + show_default=True, + ): + console.print("[yellow]Rebuild FTS cancelled by user[/yellow]") + return with get_sqlite_connection(persist_dir, SqliteMode.READ_WRITE) as conn: cursor = conn.cursor() script = read_script("scripts/drop_fts.sql") script = script.replace("__TOKENIZER__", tokenizer) cursor.executescript(script) cursor.close() - typer.echo("Dropped FTS. Will try to start your Chroma now.", file=sys.stderr) - typer.echo( + console.print("Rebuilt FTS. Will try to start your Chroma now.") + console.print( "NOTE: Depending on the size of your documents in Chroma it may take a while for Chroma to start up again.", - file=sys.stderr, - color=typer.colors.YELLOW, ) try: chromadb.PersistentClient(path=persist_dir) - typer.echo("Chroma started successfully.", file=sys.stderr) + console.print("[green]Chroma started successfully. FTS rebuilt.[/green]") except Exception as e: - typer.echo( - f"Chroma failed to start. Error: {repr(e)}", - file=sys.stderr, - color=typer.colors.RED, - err=True, - ) + console.print(f"[red]Chroma failed to start. Error: {repr(e)}[/red]") + raise e def rebuild_command( @@ -63,8 +72,11 @@ def rebuild_command( "-t", help="The tokenizer to use for the FTS index. Supported values: 'trigram', 'unicode61', 'ascii', 'porter'. See https://www.sqlite.org/fts5.html#tokenizers", ), + yes: Optional[bool] = typer.Option( + False, "--yes", "-y", help="Skip confirmation prompt" + ), ) -> None: - rebuild_fts(persist_dir, tokenizer) + rebuild_fts(persist_dir, tokenizer, yes=yes) fts_commands.command( diff --git a/tests/test_fts.py b/tests/test_fts.py index 784323f..7432079 100644 --- a/tests/test_fts.py +++ b/tests/test_fts.py @@ -32,7 +32,7 @@ def test_rebuild_fts() -> None: col.get(where_document={"$contains": "document 0"}) assert "no such table: embedding_fulltext_search" in str(e) - rebuild_fts(temp_dir) + rebuild_fts(temp_dir, yes=True) fixed_temp_dir = os.path.join(temp_dir, "fixed") shutil.copytree(temp_dir, fixed_temp_dir) client = chromadb.PersistentClient(path=fixed_temp_dir) @@ -74,7 +74,7 @@ def test_rebuild_fts_with_unicode61() -> None: fixed_temp_dir = os.path.join(temp_dir, "fixed") shutil.copytree(temp_dir, fixed_temp_dir) - rebuild_fts(fixed_temp_dir, tokenizer="unicode61") + rebuild_fts(fixed_temp_dir, tokenizer="unicode61", yes=True) client = chromadb.PersistentClient(path=fixed_temp_dir) col = client.get_collection("test") res_after = col.get(where_document={"$contains": "순매"})