Skip to content

Commit

Permalink
Using int instead of uint32 + refactored types + containerized eigen db
Browse files Browse the repository at this point in the history
  • Loading branch information
Ryan-Awad committed Jun 30, 2024
1 parent d642f4c commit dd55914
Show file tree
Hide file tree
Showing 13 changed files with 47 additions and 42 deletions.
8 changes: 4 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
FROM golang:latest
FROM golang:1.20

WORKDIR /go/src/app
WORKDIR /app

COPY . .

RUN go get
RUN go mod download

RUN go build

EXPOSE 8080

CMD ["/go/src/app/eigen_db"]
CMD ["./eigen_db"]
2 changes: 1 addition & 1 deletion api/endpoints/vector/bulk_insert.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
)

type bulkInsertRequestBody struct {
SetOfComponents []t.VectorComponents `json:"setOfComponents" binding:"required"`
SetOfComponents [][]t.VectorComponent `json:"setOfComponents" binding:"required"`
}

func BulkInsert(vectorFactory vector_io.IVectorFactory) func(*gin.Context) {
Expand Down
2 changes: 1 addition & 1 deletion api/endpoints/vector/insert_vector.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
)

type insertRequestBody struct {
Components t.VectorComponents `json:"components" binding:"required"`
Components []t.VectorComponent `json:"components" binding:"required"`
}

func Insert(vectorFactory vector_io.IVectorFactory) func(*gin.Context) {
Expand Down
2 changes: 1 addition & 1 deletion api/endpoints/vector/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (

type searchRequestBody struct {
QueryVectorId t.VectorId `json:"queryVectorId" binding:"required"`
K uint32 `json:"k" binding:"required"`
K int `json:"k" binding:"required"`
}

func Search(searcher vector_io.IVectorSearcher) func(*gin.Context) {
Expand Down
4 changes: 2 additions & 2 deletions api/endpoints/vector/search_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ func TestSearch(t *testing.T) {

w := httptest.NewRecorder()
body := searchRequestBody{
QueryVectorId: uint32(1),
K: uint32(5),
QueryVectorId: 1,
K: 5,
}
bodyJSON, err := json.Marshal(body)
if err != nil {
Expand Down
30 changes: 15 additions & 15 deletions cfg/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@ type Config struct {
TimeInterval time.Duration `yaml:"timeInterval"`
} `yaml:"persistence"`
API struct {
Port uint32 `yaml:"port"`
Port int `yaml:"port"`
Address string `yaml:"address"`
} `yaml:"api"`
HNSWParams struct {
Dimensions uint32 `yaml:"dimensions"`
Dimensions int `yaml:"dimensions"`
SimilarityMetric t.SimilarityMetric `yaml:"similarityMetric"`
SpaceSize uint32 `yaml:"vectorSpaceSize"`
M uint32 `yaml:"M"`
EfConstruction uint32 `yaml:"efConstruction"`
SpaceSize int `yaml:"vectorSpaceSize"`
M int `yaml:"M"`
EfConstruction int `yaml:"efConstruction"`
} `yaml:"hnswParams"`
}

Expand Down Expand Up @@ -57,62 +57,62 @@ func (c *Config) GetPersistenceTimeInterval() time.Duration {
return c.Persistence.TimeInterval
}

func (c *Config) GetAPIPort() uint32 {
func (c *Config) GetAPIPort() int {
return c.API.Port
}

func (c *Config) GetAPIAddress() string {
return c.API.Address
}

func (c *Config) GetHNSWParamsDimensions() uint32 {
func (c *Config) GetHNSWParamsDimensions() int {
return c.HNSWParams.Dimensions
}

func (c *Config) GetHNSWParamsSimilarityMetric() t.SimilarityMetric {
return c.HNSWParams.SimilarityMetric
}

func (c *Config) GetHNSWParamsSpaceSize() uint32 {
func (c *Config) GetHNSWParamsSpaceSize() int {
return c.HNSWParams.SpaceSize
}

func (c *Config) GetHNSWParamsM() uint32 {
func (c *Config) GetHNSWParamsM() int {
return c.HNSWParams.M
}

func (c *Config) GetHNSWParamsEfConstruction() uint32 {
func (c *Config) GetHNSWParamsEfConstruction() int {
return c.HNSWParams.EfConstruction
}

func (c *Config) SetPersistenceTimeInterval(timeInterval time.Duration) {
c.Persistence.TimeInterval = timeInterval
}

func (c *Config) SetAPIPort(port uint32) {
func (c *Config) SetAPIPort(port int) {
c.API.Port = port
}

func (c *Config) SetAPIAddress(address string) {
c.API.Address = address
}

func (c *Config) SetHNSWParamsDimensions(dimensions uint32) {
func (c *Config) SetHNSWParamsDimensions(dimensions int) {
c.HNSWParams.Dimensions = dimensions
}

func (c *Config) SetHNSWParamsSimilarityMetric(similarityMetric t.SimilarityMetric) {
c.HNSWParams.SimilarityMetric = similarityMetric
}

func (c *Config) SetHNSWParamsSpaceSize(spaceSize uint32) {
func (c *Config) SetHNSWParamsSpaceSize(spaceSize int) {
c.HNSWParams.SpaceSize = spaceSize
}

func (c *Config) SetHNSWParamsM(M uint32) {
func (c *Config) SetHNSWParamsM(M int) {
c.HNSWParams.M = M
}

func (c *Config) SetHNSWParamsEfConstruction(efConstruction uint32) {
func (c *Config) SetHNSWParamsEfConstruction(efConstruction int) {
c.HNSWParams.EfConstruction = efConstruction
}
5 changes: 5 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,8 @@ services:
build: .
ports:
- "8080:8080"
volumes:
- eigen_db:/app/eigen

volumes:
eigen_db:
4 changes: 2 additions & 2 deletions test_utils/handler_mocks.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ type MockVector struct{}

type MockVectorSearcher struct{}

func (factory *MockVectorFactory) NewVector(components types.VectorComponents) (vector_io.IVector, error) {
func (factory *MockVectorFactory) NewVector(components []types.VectorComponent) (vector_io.IVector, error) {
NewVectorInvocations++
if len(components) == factory.Dimensions {
return &MockVector{}, nil
Expand All @@ -30,7 +30,7 @@ func (vector *MockVector) Insert() {
InsertInvocations++
}

func (searcher *MockVectorSearcher) SimilaritySearch(queryVectorId types.VectorId, k uint32) ([]types.VectorId, error) {
func (searcher *MockVectorSearcher) SimilaritySearch(queryVectorId types.VectorId, k int) ([]types.VectorId, error) {
SimilaritySearchInvocations++
return []types.VectorId{1, 2, 3}, nil
}
Expand Down
4 changes: 2 additions & 2 deletions types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ import (
"github.com/evan176/hnswgo"
)

type VectorId = uint32
type VectorComponents = []float32
type VectorId = int
type VectorComponent = float32
type VectorSpace = *hnswgo.HNSW

// Config types
Expand Down
4 changes: 2 additions & 2 deletions vector_io/interfaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ package vector_io
import t "eigen_db/types"

type IVectorFactory interface {
NewVector(t.VectorComponents) (IVector, error)
NewVector([]t.VectorComponent) (IVector, error)
}

type IVector interface {
Insert()
}

type IVectorSearcher interface {
SimilaritySearch(t.VectorId, uint32) ([]t.VectorId, error)
SimilaritySearch(t.VectorId, int) ([]t.VectorId, error)
}
2 changes: 1 addition & 1 deletion vector_io/persistence.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ func (store *vectorStore) LoadPersistedVectors() error {
}

for id, v := range store.StoredVectors { // load deserialized stored vectors into the vector space
store.vectorSpace.AddPoint(v.Components, id)
store.vectorSpace.AddPoint(v.Components, uint32(id))
}

return nil
Expand Down
6 changes: 3 additions & 3 deletions vector_io/vector.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ import (
)

type Vector struct {
Id t.VectorId `json:"id"`
Components t.VectorComponents `json:"components"`
Id t.VectorId `json:"id"`
Components []t.VectorComponent `json:"components"`
}

func (v *Vector) Insert() {
Expand All @@ -17,7 +17,7 @@ func (v *Vector) Insert() {

type VectorFactory struct{}

func (factory *VectorFactory) NewVector(components t.VectorComponents) (IVector, error) {
func (factory *VectorFactory) NewVector(components []t.VectorComponent) (IVector, error) {
dimensions := cfg.GetConfig().GetHNSWParamsDimensions()
if len(components) == int(dimensions) {
v := &Vector{}
Expand Down
16 changes: 8 additions & 8 deletions vector_io/vector_space.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ type VectorSearcher struct{}
func (store *vectorStore) writeVector(v *Vector) {
v.Id = vectorStoreInstance.LatestId + 1
vectorStoreInstance.LatestId++
store.vectorSpace.AddPoint(v.Components, v.Id)
store.vectorSpace.AddPoint(v.Components, uint32(v.Id))
store.StoredVectors[v.Id] = v
}

func (searcher *VectorSearcher) SimilaritySearch(queryVectorId t.VectorId, k uint32) ([]t.VectorId, error) {
func (searcher *VectorSearcher) SimilaritySearch(queryVectorId t.VectorId, k int) ([]t.VectorId, error) {
// we perform similarity search using the HNSW algorithm with a time complexity of O(log n)
// when performing the algorithm, we use k+1 as the resulting k-nearest neighbors will always include the query vector itself.
// therefore we simply perform the search for k+1 nearest neighbors and remove the queryVectorId from the output
Expand All @@ -35,28 +35,28 @@ func (searcher *VectorSearcher) SimilaritySearch(queryVectorId t.VectorId, k uin
if err != nil {
return nil, err
}
ids, _ := vectorStoreInstance.vectorSpace.SearchKNN(queryVector.Components, int(k)+1) // returns ids of resulting vectors and the vectors' distances from the query vector
ids, _ := vectorStoreInstance.vectorSpace.SearchKNN(queryVector.Components, k+1) // returns ids of resulting vectors and the vectors' distances from the query vector

idsExcludingQuery := make([]t.VectorId, 0)
for _, id := range ids {
if id != queryVectorId {
idsExcludingQuery = append(idsExcludingQuery, id)
if int(id) != queryVectorId {
idsExcludingQuery = append(idsExcludingQuery, int(id))
}
}
return idsExcludingQuery, nil
}

func instantiateVectorStore(dim uint32, similarityMetric t.SimilarityMetric, spaceSize uint32, M uint32, efConstruction uint32) {
func instantiateVectorStore(dim int, similarityMetric t.SimilarityMetric, spaceSize int, M int, efConstruction int) {
vectorStoreInstance = &vectorStore{}
vectorStoreInstance.vectorSpace = hnswgo.New(
int(dim),
int(M),
int(efConstruction),
int(time.Now().Unix()),
spaceSize,
uint32(spaceSize),
similarityMetric,
)
vectorStoreInstance.StoredVectors = make(map[uint32]*Vector)
vectorStoreInstance.StoredVectors = make(map[int]*Vector)

err := vectorStoreInstance.LoadPersistedVectors()
if err != nil {
Expand Down

0 comments on commit dd55914

Please sign in to comment.