Skip to content

Commit

Permalink
feat: Go - clean action (#40)
Browse files Browse the repository at this point in the history
feat: Go - clean action

chore: go mod tidy

docs: Updated README.md
  • Loading branch information
tazarov authored Jan 3, 2025
1 parent 8233ad6 commit ed26a2b
Show file tree
Hide file tree
Showing 20 changed files with 1,275 additions and 3 deletions.
73 changes: 73 additions & 0 deletions .github/workflows/go-binaries.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
on:
push:
branches:
- main

jobs:
releases-matrix:
name: Release Go Binary
runs-on: ubuntu-latest
strategy:
matrix:
goos: [linux, windows, darwin]
goarch: [amd64, arm64]
exclude:
- goarch: arm64
goos: windows
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up Go
uses: actions/setup-go@v4
with:
go-version-file: 'go.mod'
- name: Run golangci-lint
uses: golangci/golangci-lint-action@v3
with:
version: latest
- name: Test
run: make test
- name: Build
run: make build
- name: Create release
id: create_release
uses: anzz1/action-create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: ${{ needs.tag-job.outputs.release-tag }}
release_name: ${{ steps.release_name.outputs.release_name }}
- name: Upload artifacts
id: upload_release
uses: actions/github-script@v3
with:
github-token: ${{secrets.GITHUB_TOKEN}}
script: |
const path = require('path');
const fs = require('fs').promises;
const release_id = '${{ steps.create_release.outputs.id }}';
async function uploadDir(dirPath) {
const entries = await fs.readdir(dirPath, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dirPath, entry.name);
if (entry.isDirectory()) {
// If it's a directory, recursively upload its contents
await uploadDir(fullPath);
} else if (entry.name.endsWith('.zip') || entry.name.endsWith('.tar.gz')) {
// If it's a zip file, upload it
console.log('uploadReleaseAsset', entry.name);
await github.repos.uploadReleaseAsset({
owner: context.repo.owner,
repo: context.repo.repo,
release_id: release_id,
name: entry.name,
data: await fs.readFile(fullPath)
});
}
}
}
await uploadDir('./artifact/release');
28 changes: 28 additions & 0 deletions .github/workflows/go-lint.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: Go Lint

on:
pull_request: {}

jobs:
lint:
runs-on: ubuntu-latest
permissions:
# Required: allow read access to the content for analysis.
contents: read
# Optional: allow read access to pull request. Use with `only-new-issues` option.
pull-requests: read
# Optional: Allow write access to checks to allow the action to annotate code in the PR.
checks: write
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up Go
uses: actions/setup-go@v4
with:
go-version-file: 'go.mod'
- name: Run golangci-lint
uses: golangci/golangci-lint-action@v3
with:
version: latest
33 changes: 33 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,36 @@ test:
build-docker:
@echo "Building docker image"
@docker build -t chromadb-dp .


.PHONY: go-test
go-test:
@echo "Running tests"
@go test --count=1 -v --tags "fts5" ./...


.PHONY: go-install
go-install:
@go install -tags "fts5" -ldflags "-X 'main.Version=1.0.1-$$(git log -1 --format=%h)' -X 'main.BuildDate=$$(date +%Y-%m-%d)'"

.PHONY: go-lint
go-lint:
@golangci-lint run

.PHONY: lint-fix
go-lint-fix:
@golangci-lint run --fix ./...

.PHONY: go-build
go-build:
@go build -tags "fts5" -ldflags "-X 'main.Version=1.0.1' -X 'main.BuildHash=$$(git log -1 --format=%h)' -X 'main.BuildDate=$$(date +%Y-%m-%d)'" -o chops

.PHONY: go-binary-tarball
go-binary-tarball: go-build
@tar -czf chops-${{ matrix.goos }}-${{ matrix.goarch }}.tar.gz chops

.PHONY: sqlc
sqlc:
@echo "Generating SQLC code"
@go install github.com/sqlc-dev/sqlc/cmd/sqlc@latest
@sqlc generate
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,18 @@ Before you use these tools make sure your ChromaDB persistent dir, on which you

## Installation

### Python

```bash
pip install chromadb-ops
```

### Go

```bash
go install github.com/amikos-tech/chromadb-ops/cmd/chops
```

## Usage

### Info
Expand Down Expand Up @@ -210,6 +218,9 @@ chops clean /path/to/persist_dir
> Note: The command is particularly useful for windows users where deleting collections may leave behind orphaned vector
> segment directories due to Windows file locking.
For the `go` version of the tool the command it is also possible to use `--dry-run` option to see what would be deleted
without actually deleting anything.

### Using Docker

> Note: You have to mount your persist directory into the container for the commands to work.
Expand Down
4 changes: 2 additions & 2 deletions chroma_ops/scripts/drop_fts.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ DROP TABLE IF EXISTS embedding_fulltext_search_idx;
CREATE VIRTUAL TABLE embedding_fulltext_search USING fts5(string_value, tokenize='trigram');
INSERT INTO embedding_fulltext_search (rowid, string_value) SELECT em.rowid, COALESCE(doc.string_value, '')
FROM embeddings em
LEFT JOIN embedding_metadata doc
ON em.id = doc.id
LEFT JOIN embedding_metadata doc
ON em.id = doc.id
AND doc.key = 'chroma:document'
GROUP BY doc.id;
COMMIT TRANSACTION;
97 changes: 97 additions & 0 deletions cmd/clean.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package cmd

import (
"context"
"fmt"
"os"
"path/filepath"
"slices"

"database/sql"

"github.com/amikos-tech/chromadb-ops/internal/chroma"
chromadb "github.com/amikos-tech/chromadb-ops/internal/db"
_ "github.com/mattn/go-sqlite3"
"github.com/pkg/errors"
"github.com/spf13/cobra"
)

var CleanCommand = &cobra.Command{
Use: "clean",
Short: "Clean up orphanated segments",
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
err := clean(cmd, args)
if err != nil {
cmd.SilenceUsage = true
return err
}
return nil
},
}

func clean(cmd *cobra.Command, args []string) error {
persistDir := args[0]
if err := chroma.CheckPersistDir(persistDir); err != nil {
return errors.Wrap(err, "failed to check persist directory")
}
dryRun, err := cmd.Flags().GetBool("dry-run")
if err != nil {
return errors.Wrap(err, "failed to get dry-run flag")
}
if dryRun {
fmt.Fprintf(os.Stderr, "Note: Dry run mode enabled. No changes will be made.\n")
}
fmt.Fprintf(os.Stderr, "Cleaning orphanated segments in %s\n", persistDir)

ctx := context.Background()

sqlFile := filepath.Join(persistDir, "chroma.sqlite3")
db, err := sql.Open("sqlite3", "file:"+sqlFile+"?mode=ro")
if err != nil {
return err
}

queries := chromadb.New(db)
segments, err := queries.GetSegments(ctx)
if err != nil {
return errors.Wrap(err, "failed to get segments")
}

segmentDirs, err := chroma.GetSegmentDirs(persistDir)
if err != nil {
return errors.Wrap(err, "failed to get segment dirs")
}

if len(segmentDirs) == 0 {
fmt.Fprintln(os.Stderr, "no segments found")
return nil
}
var segmentIDs []string

for _, segment := range segments {
segmentIDs = append(segmentIDs, segment.ID)
}
var deletedCount int = 0
var deletedDirs []string
for _, segmentDir := range segmentDirs {
if !slices.Contains(segmentIDs, segmentDir) {
fmt.Fprintf(os.Stderr, "Deleting orphanated segment dir: %s\n", filepath.Join(persistDir, segmentDir))
if !dryRun {
err := os.RemoveAll(filepath.Join(persistDir, segmentDir))
if err != nil {
return errors.Wrap(err, "failed to delete orphanated segment dir")
}
deletedCount++
deletedDirs = append(deletedDirs, filepath.Join(persistDir, segmentDir))
}
}
}
fmt.Fprintf(os.Stderr, "Deleted %d orphanated segment dirs: %v\n", deletedCount, deletedDirs)
return nil
}

func init() {
CleanCommand.Flags().BoolP("dry-run", "d", false, "Dry run the operation")
RootCmd.AddCommand(CleanCommand)
}
Loading

0 comments on commit ed26a2b

Please sign in to comment.