From 4740c5bf4633c1a784498db9a4ddf309cef70911 Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Tue, 14 Jan 2025 11:07:28 +0200 Subject: [PATCH] feat: Better prompts and messages for wal export (#101) --- README.md | 4 ++++ chroma_ops/wal_export.py | 44 +++++++++++++++++++++++++++++++++++++--- tests/test_wal_export.py | 2 +- 3 files changed, 46 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ba21e51..9a638ce 100644 --- a/README.md +++ b/README.md @@ -292,6 +292,10 @@ This commands exports the WAL to a `jsonl` file. The command can be useful in ta chops wal export /path/to/persist_dir --out /path/to/export.jsonl ``` +Additional options: + +- `--yes` (`-y`) - skip confirmation prompt (default: `False`, prompt will be shown) + > [!NOTE] > If --out or -o is not specified the command will print the output to stdout. diff --git a/chroma_ops/wal_export.py b/chroma_ops/wal_export.py index 9485b07..42b78f2 100644 --- a/chroma_ops/wal_export.py +++ b/chroma_ops/wal_export.py @@ -6,12 +6,16 @@ import typer +from chroma_ops.constants import DEFAULT_TENANT_ID, DEFAULT_TOPIC_NAMESPACE from chroma_ops.utils import ( SqliteMode, get_sqlite_connection, validate_chroma_persist_dir, ) +from rich.console import Console +from rich.table import Table + @contextmanager def smart_open( @@ -28,9 +32,42 @@ def smart_open( fh.close() -def export_wal(persist_dir: str, output_file: str) -> None: +def export_wal( + persist_dir: str, + output_file: str, + *, + tenant: Optional[str] = DEFAULT_TENANT_ID, + topic_namespace: Optional[str] = DEFAULT_TOPIC_NAMESPACE, + yes: Optional[bool] = False, +) -> None: validate_chroma_persist_dir(persist_dir) + console = Console(stderr=True) + table = Table(title="Exporting WAL") + table.add_column("Collection", style="cyan") + table.add_column("WAL Entries", style="magenta") with get_sqlite_connection(persist_dir, SqliteMode.READ_ONLY) as conn: + collections = conn.execute( + "SELECT c.name,c.id, s.id FROM collections c left join segments s on c.id=s.collection where s.scope='VECTOR'" + ).fetchall() + wal_topic_groups = conn.execute( + "SELECT topic, count(*) FROM embeddings_queue group by topic" + ).fetchall() + for collection in collections: + topic = f"persistent://{tenant}/{topic_namespace}/{collection[1]}" + table.add_row( + collection[0], + str([s[1] for s in wal_topic_groups if s[0] == topic][0]), + ) + console.print(table) + if not yes: + console.print("Are you sure you want to export the WAL? (y/N)") + if not typer.confirm( + "\nAre you sure you want to export the WAL?", + default=False, + show_default=True, + ): + console.print("[yellow]WAL export cancelled by user[/yellow]") + return cursor = conn.cursor() query = "SELECT * FROM embeddings_queue ORDER BY seq_id ASC;" cursor.execute(query) @@ -47,11 +84,12 @@ def export_wal(persist_dir: str, output_file: str) -> None: json_file.write(json.dumps(row_data) + "\n") exported_rows += 1 - typer.echo(f"Exported {exported_rows} rows", file=sys.stderr) + console.print(f"[green]Exported {exported_rows} rows[/green]") def command( persist_dir: str = typer.Argument(..., help="The persist directory"), out: str = typer.Option(None, "--out", "-o", help="The output jsonl file"), + yes: Optional[bool] = typer.Option(False, "--yes", "-y", help="Skip confirmation"), ) -> None: - export_wal(persist_dir, out) + export_wal(persist_dir, out, yes=yes) diff --git a/tests/test_wal_export.py b/tests/test_wal_export.py index 9fe88ea..579f868 100644 --- a/tests/test_wal_export.py +++ b/tests/test_wal_export.py @@ -37,7 +37,7 @@ def test_basic_export(records_to_add: int) -> None: if s["scope"] == SegmentScope.VECTOR ] _sync_threshold = vector_segments[0]._sync_threshold - export_wal(temp_dir, temp_file.name) + export_wal(temp_dir, temp_file.name, yes=True) assert os.path.exists(temp_file.name) if tuple(int(part) for part in chromadb.__version__.split(".")) > (0, 5, 5): if records_to_add % _sync_threshold == 0: