Skip to content

Commit

Permalink
feat: Better prompts and messages for wal export (#101)
Browse files Browse the repository at this point in the history
  • Loading branch information
tazarov authored Jan 14, 2025
1 parent 7beb2be commit 4740c5b
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 4 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,10 @@ This commands exports the WAL to a `jsonl` file. The command can be useful in ta
chops wal export /path/to/persist_dir --out /path/to/export.jsonl
```

Additional options:

- `--yes` (`-y`) - skip confirmation prompt (default: `False`, prompt will be shown)

> [!NOTE]
> If --out or -o is not specified the command will print the output to stdout.
Expand Down
44 changes: 41 additions & 3 deletions chroma_ops/wal_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,16 @@

import typer

from chroma_ops.constants import DEFAULT_TENANT_ID, DEFAULT_TOPIC_NAMESPACE
from chroma_ops.utils import (
SqliteMode,
get_sqlite_connection,
validate_chroma_persist_dir,
)

from rich.console import Console
from rich.table import Table


@contextmanager
def smart_open(
Expand All @@ -28,9 +32,42 @@ def smart_open(
fh.close()


def export_wal(persist_dir: str, output_file: str) -> None:
def export_wal(
persist_dir: str,
output_file: str,
*,
tenant: Optional[str] = DEFAULT_TENANT_ID,
topic_namespace: Optional[str] = DEFAULT_TOPIC_NAMESPACE,
yes: Optional[bool] = False,
) -> None:
validate_chroma_persist_dir(persist_dir)
console = Console(stderr=True)
table = Table(title="Exporting WAL")
table.add_column("Collection", style="cyan")
table.add_column("WAL Entries", style="magenta")
with get_sqlite_connection(persist_dir, SqliteMode.READ_ONLY) as conn:
collections = conn.execute(
"SELECT c.name,c.id, s.id FROM collections c left join segments s on c.id=s.collection where s.scope='VECTOR'"
).fetchall()
wal_topic_groups = conn.execute(
"SELECT topic, count(*) FROM embeddings_queue group by topic"
).fetchall()
for collection in collections:
topic = f"persistent://{tenant}/{topic_namespace}/{collection[1]}"
table.add_row(
collection[0],
str([s[1] for s in wal_topic_groups if s[0] == topic][0]),
)
console.print(table)
if not yes:
console.print("Are you sure you want to export the WAL? (y/N)")
if not typer.confirm(
"\nAre you sure you want to export the WAL?",
default=False,
show_default=True,
):
console.print("[yellow]WAL export cancelled by user[/yellow]")
return
cursor = conn.cursor()
query = "SELECT * FROM embeddings_queue ORDER BY seq_id ASC;"
cursor.execute(query)
Expand All @@ -47,11 +84,12 @@ def export_wal(persist_dir: str, output_file: str) -> None:
json_file.write(json.dumps(row_data) + "\n")
exported_rows += 1

typer.echo(f"Exported {exported_rows} rows", file=sys.stderr)
console.print(f"[green]Exported {exported_rows} rows[/green]")


def command(
persist_dir: str = typer.Argument(..., help="The persist directory"),
out: str = typer.Option(None, "--out", "-o", help="The output jsonl file"),
yes: Optional[bool] = typer.Option(False, "--yes", "-y", help="Skip confirmation"),
) -> None:
export_wal(persist_dir, out)
export_wal(persist_dir, out, yes=yes)
2 changes: 1 addition & 1 deletion tests/test_wal_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def test_basic_export(records_to_add: int) -> None:
if s["scope"] == SegmentScope.VECTOR
]
_sync_threshold = vector_segments[0]._sync_threshold
export_wal(temp_dir, temp_file.name)
export_wal(temp_dir, temp_file.name, yes=True)
assert os.path.exists(temp_file.name)
if tuple(int(part) for part in chromadb.__version__.split(".")) > (0, 5, 5):
if records_to_add % _sync_threshold == 0:
Expand Down

0 comments on commit 4740c5b

Please sign in to comment.