Skip to content

Commit

Permalink
make store index full private
Browse files Browse the repository at this point in the history
  • Loading branch information
micpst committed May 25, 2024
1 parent 779220c commit e164975
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 61 deletions.
74 changes: 37 additions & 37 deletions pkg/store/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,33 +9,33 @@ import (
"github.com/micpst/minisearch/pkg/tokenizer"
)

type FindParams struct {
Term string
Property string
Exact bool
Tolerance int
Relevance BM25Params
DocsCount int
type findParams struct {
term string
property string
exact bool
tolerance int
relevance BM25Params
docsCount int
}

type IndexParams[S Schema] struct {
Id string
Document S
DocsCount int
type indexParams[S Schema] struct {
id string
document S
docsCount int
language tokenizer.Language
tokenizerConfig *tokenizer.Config
}

type Index[S Schema] struct {
type index[S Schema] struct {
indexes map[string]*radix.Trie
searchableProperties []string
avgFieldLength map[string]float64
fieldLengths map[string]map[string]int
tokenOccurrences map[string]map[string]int
}

func newIndex[S Schema]() *Index[S] {
idx := &Index[S]{
func newIndex[S Schema]() *index[S] {
idx := &index[S]{
indexes: make(map[string]*radix.Trie),
searchableProperties: make([]string, 0),
avgFieldLength: make(map[string]float64),
Expand All @@ -46,7 +46,7 @@ func newIndex[S Schema]() *Index[S] {
return idx
}

func (idx *Index[S]) build() {
func (idx *index[S]) build() {
var s S
for key, value := range flattenSchema(s) {
switch value.(type) {
Expand All @@ -61,8 +61,8 @@ func (idx *Index[S]) build() {
}
}

func (idx *Index[S]) Insert(params *IndexParams[S]) {
document := flattenSchema(params.Document)
func (idx *index[S]) insert(params *indexParams[S]) {
document := flattenSchema(params.document)

for propName, index := range idx.indexes {
tokens, _ := tokenizer.Tokenize(&tokenizer.TokenizeParams{
Expand All @@ -77,20 +77,20 @@ func (idx *Index[S]) Insert(params *IndexParams[S]) {
for token, count := range tokensCount {
tokenFrequency := float64(count) / allTokensCount
index.Insert(&radix.InsertParams{
Id: params.Id,
Id: params.id,
Word: token,
TermFrequency: tokenFrequency,
})
idx.tokenOccurrences[propName][token]++
}

idx.avgFieldLength[propName] = (idx.avgFieldLength[propName]*float64(params.DocsCount-1) + allTokensCount) / float64(params.DocsCount)
idx.fieldLengths[propName][params.Id] = int(allTokensCount)
idx.avgFieldLength[propName] = (idx.avgFieldLength[propName]*float64(params.docsCount-1) + allTokensCount) / float64(params.docsCount)
idx.fieldLengths[propName][params.id] = int(allTokensCount)
}
}

func (idx *Index[S]) Delete(params *IndexParams[S]) {
document := flattenSchema(params.Document)
func (idx *index[S]) delete(params *indexParams[S]) {
document := flattenSchema(params.document)

for propName, index := range idx.indexes {
tokens, _ := tokenizer.Tokenize(&tokenizer.TokenizeParams{
Expand All @@ -101,7 +101,7 @@ func (idx *Index[S]) Delete(params *IndexParams[S]) {

for _, token := range tokens {
index.Delete(&radix.DeleteParams{
Id: params.Id,
Id: params.id,
Word: token,
})
idx.tokenOccurrences[propName][token]--
Expand All @@ -110,30 +110,30 @@ func (idx *Index[S]) Delete(params *IndexParams[S]) {
}
}

idx.avgFieldLength[propName] = (idx.avgFieldLength[propName]*float64(params.DocsCount) - float64(len(tokens))) / float64(params.DocsCount-1)
delete(idx.fieldLengths[propName], params.Id)
idx.avgFieldLength[propName] = (idx.avgFieldLength[propName]*float64(params.docsCount) - float64(len(tokens))) / float64(params.docsCount-1)
delete(idx.fieldLengths[propName], params.id)
}
}

func (idx *Index[S]) Find(params *FindParams) map[string]float64 {
func (idx *index[S]) find(params *findParams) map[string]float64 {
idScores := make(map[string]float64)

if index, ok := idx.indexes[params.Property]; ok {
if index, ok := idx.indexes[params.property]; ok {
infos := index.Find(&radix.FindParams{
Term: params.Term,
Tolerance: params.Tolerance,
Exact: params.Exact,
Term: params.term,
Tolerance: params.tolerance,
Exact: params.exact,
})
for _, info := range infos {
idScores[info.Id] = lib.BM25(
info.TermFrequency,
idx.tokenOccurrences[params.Property][params.Term],
idx.fieldLengths[params.Property][info.Id],
idx.avgFieldLength[params.Property],
params.DocsCount,
params.Relevance.K,
params.Relevance.B,
params.Relevance.D,
idx.tokenOccurrences[params.property][params.term],
idx.fieldLengths[params.property][info.Id],
idx.avgFieldLength[params.property],
params.docsCount,
params.relevance.K,
params.relevance.B,
params.relevance.D,
)
}
}
Expand Down
48 changes: 24 additions & 24 deletions pkg/store/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ type Config struct {
type MemDB[S Schema] struct {
mutex sync.RWMutex
documents map[string]S
index *Index[S]
index *index[S]
defaultLanguage tokenizer.Language
tokenizerConfig *tokenizer.Config
}
Expand Down Expand Up @@ -124,10 +124,10 @@ func (db *MemDB[S]) Insert(params *InsertParams[S]) (Record[S], error) {

db.documents[id] = params.Document

db.index.Insert(&IndexParams[S]{
Id: id,
Document: params.Document,
DocsCount: len(db.documents),
db.index.insert(&indexParams[S]{
id: id,
document: params.Document,
docsCount: len(db.documents),
language: language,
tokenizerConfig: db.tokenizerConfig,
})
Expand Down Expand Up @@ -197,17 +197,17 @@ func (db *MemDB[S]) Update(params *UpdateParams[S]) (Record[S], error) {

db.documents[params.Id] = params.Document

db.index.Insert(&IndexParams[S]{
Id: params.Id,
Document: params.Document,
DocsCount: len(db.documents),
db.index.insert(&indexParams[S]{
id: params.Id,
document: params.Document,
docsCount: len(db.documents),
language: language,
tokenizerConfig: db.tokenizerConfig,
})
db.index.Delete(&IndexParams[S]{
Id: params.Id,
Document: oldDocument,
DocsCount: len(db.documents),
db.index.delete(&indexParams[S]{
id: params.Id,
document: oldDocument,
docsCount: len(db.documents),
language: language,
tokenizerConfig: db.tokenizerConfig,
})
Expand All @@ -232,10 +232,10 @@ func (db *MemDB[S]) Delete(params *DeleteParams[S]) error {
return &DocumentNotFoundError{Id: params.Id}
}

db.index.Delete(&IndexParams[S]{
Id: params.Id,
Document: document,
DocsCount: len(db.documents),
db.index.delete(&indexParams[S]{
id: params.Id,
document: document,
docsCount: len(db.documents),
language: language,
tokenizerConfig: db.tokenizerConfig,
})
Expand Down Expand Up @@ -273,13 +273,13 @@ func (db *MemDB[S]) Search(params *SearchParams) (SearchResult[S], error) {

for _, prop := range properties {
for _, token := range tokens {
idScores := db.index.Find(&FindParams{
Term: token,
Property: prop,
Exact: params.Exact,
Tolerance: params.Tolerance,
Relevance: params.Relevance,
DocsCount: len(db.documents),
idScores := db.index.find(&findParams{
term: token,
property: prop,
exact: params.Exact,
tolerance: params.Tolerance,
relevance: params.Relevance,
docsCount: len(db.documents),
})
for id, score := range idScores {
allIdScores[id] += score
Expand Down

0 comments on commit e164975

Please sign in to comment.