Skip to content

Commit

Permalink
Merge pull request #75 from SwissDataScienceCenter/case-insensitive-s…
Browse files Browse the repository at this point in the history
…earch

feat: case insensitive search
  • Loading branch information
eikek authored Mar 27, 2024
2 parents 22c6f17 + c346679 commit b56f66c
Show file tree
Hide file tree
Showing 28 changed files with 236 additions and 84 deletions.
1 change: 1 addition & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ lazy val searchCli = project
.settings(
name := "search-cli",
description := "A set of CLI tools",
Compile / run / fork := true,
libraryDependencies ++=
Dependencies.decline ++
Dependencies.http4sClient
Expand Down
6 changes: 3 additions & 3 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 14 additions & 16 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -41,27 +41,25 @@
jq
coreutils
scala-cli
kubectl
devshellToolsPkgs.sbt17
devshellToolsPkgs.openapi-docs
]
++ (builtins.attrValues selfPkgs);

queueNames = [
"projectCreated"
"projectUpdated"
"projectRemoved"
"projectAuthAdded"
"projectAuthUpdated"
"projectAuthRemoved"
"userAdded"
"userUpdated"
"userRemoved"
];
queueNameConfig = builtins.listToAttrs (builtins.map (qn: {
name = "RS_REDIS_QUEUE_${qn}";
value = qn;
})
queueNames);
queueNames = {
projectCreated = "project.created";
projectUpdated = "project.updated";
projectRemoved = "project.removed";
projectAuthAdded = "projectAuth.added";
projectAuthUpdated = "projectAuth.updated";
projectAuthRemoved = "projectAuth.removed";
userAdded = "user.added";
userUpdated = "user.updated";
userRemoved = "user.removed";
};

queueNameConfig = with nixpkgs.lib; mapAttrs' (key: qn: nameValuePair "RS_REDIS_QUEUE_${key}" qn) queueNames;
in {
formatter = pkgs.alejandra;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,14 @@ import io.renku.search.api.data.*
import io.renku.search.model.Id
import io.renku.search.model.users.FirstName
import io.renku.search.query.Query
import io.renku.search.solr.client.SearchSolrSpec
import io.renku.search.solr.client.SearchSolrSuite
import io.renku.search.solr.client.SolrDocumentGenerators.*
import io.renku.search.solr.documents.{EntityDocument, User as SolrUser}
import munit.CatsEffectSuite
import scribe.Scribe
import org.scalacheck.Gen
import io.renku.search.model.projects.Visibility

class SearchApiSpec extends CatsEffectSuite with SearchSolrSpec:
class SearchApiSpec extends SearchSolrSuite:

private given Scribe[IO] = scribe.cats[IO]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,19 @@ package io.renku.search.provision
import cats.effect.{IO, Resource}
import cats.syntax.all.*
import fs2.Stream

import io.renku.queue.client.{QueueClient, QueueSpec}
import io.renku.redis.client.{ClientId, QueueName}
import io.renku.search.LoggingConfigure
import io.renku.search.model.Id
import io.renku.search.provision.handler.PipelineSteps
import io.renku.search.provision.project.ProjectSyntax
import io.renku.search.provision.user.UserSyntax
import io.renku.search.solr.client.{SearchSolrClient, SearchSolrSpec}
import io.renku.search.solr.client.{SearchSolrClient, SearchSolrSuite}
import io.renku.search.solr.documents.*
import munit.CatsEffectSuite

trait ProvisioningSuite
extends CatsEffectSuite
with LoggingConfigure
extends SearchSolrSuite
with QueueSpec
with SearchSolrSpec
with ProjectSyntax
with UserSyntax:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ import io.renku.search.solr.client.SearchSolrClient
import io.renku.search.solr.client.SolrDocumentGenerators
import io.renku.search.solr.documents.PartialEntityDocument
import io.renku.search.solr.documents.{Project as ProjectDocument, SolrDocument}
import munit.CatsEffectSuite

class AuthorizationAddedProvisioningSpec extends ProvisioningSuite:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ import io.renku.search.solr.client.SearchSolrClient
import io.renku.search.solr.client.SolrDocumentGenerators
import io.renku.search.solr.documents.PartialEntityDocument
import io.renku.search.solr.documents.{Project as ProjectDocument, *}
import munit.CatsEffectSuite

class AuthorizationRemovedProvisioningSpec extends ProvisioningSuite:
testCases.foreach { tc =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ import io.renku.search.solr.client.SolrDocumentGenerators
import io.renku.search.solr.documents.PartialEntityDocument
import io.renku.search.solr.documents.SolrDocument
import io.renku.search.solr.documents.Project as ProjectDocument
import munit.CatsEffectSuite

class AuthorizationUpdatedProvisioningSpec extends ProvisioningSuite:
testCases.foreach { tc =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ import io.renku.search.provision.{BackgroundCollector, ProvisioningSuite}
import io.renku.search.solr.client.SearchSolrClient
import io.renku.search.solr.client.SolrDocumentGenerators
import io.renku.search.solr.documents.{Project as ProjectDocument, *}
import munit.CatsEffectSuite

class ProjectCreatedProvisioningSpec extends ProvisioningSuite:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ import io.renku.search.query.Query
import io.renku.search.query.Query.Segment
import io.renku.search.query.Query.Segment.typeIs
import io.renku.search.solr.documents.{CompoundId, EntityDocument}
import munit.CatsEffectSuite

class ProjectRemovedProcessSpec extends ProvisioningSuite:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ import io.renku.search.GeneratorSyntax.*
import io.renku.search.model.Id
import io.renku.search.provision.ProvisioningSuite
import io.renku.search.solr.documents.{CompoundId, EntityDocument}
import munit.CatsEffectSuite

class ProjectUpdatedProvisioningSpec extends ProvisioningSuite:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ import io.renku.search.GeneratorSyntax.*
import io.renku.search.model.Id
import io.renku.search.provision.ProvisioningSuite
import io.renku.search.solr.documents.{CompoundId, EntityDocument}
import munit.CatsEffectSuite

class UserAddedProvisioningSpec extends ProvisioningSuite:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ import io.renku.search.GeneratorSyntax.*
import io.renku.search.model.Id
import io.renku.search.provision.ProvisioningSuite
import io.renku.search.solr.documents.{CompoundId, EntityDocument}
import munit.CatsEffectSuite

class UserUpdatedProvisioningSpec extends ProvisioningSuite:
(firstNameUpdate :: lastNameUpdate :: emailUpdate :: noUpdate :: Nil).foreach {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ object EntityDocumentSchema:
object FieldTypes:
val id: FieldType = FieldType.id(TypeName("SearchId")).makeDocValue
val string: FieldType = FieldType.str(TypeName("SearchString")).makeDocValue
val text: FieldType = FieldType.text(TypeName("SearchText"), Analyzer.classic)
val text: FieldType = FieldType.text(TypeName("SearchText"), Analyzer.defaultSearch)
val textAll: FieldType =
FieldType.text(TypeName("SearchTextAll"), Analyzer.classic).makeMultiValued
FieldType.text(TypeName("SearchTextAll"), Analyzer.defaultSearch).makeMultiValued
val dateTime: FieldType = FieldType.dateTimePoint(TypeName("SearchDateTime"))

val initialEntityDocumentAdd: Seq[SchemaCommand] = Seq(
Expand All @@ -61,7 +61,7 @@ object EntityDocumentSchema:
SchemaCommand.Add(FieldTypes.dateTime),
SchemaCommand.Add(Field(Fields.entityType, FieldTypes.string)),
SchemaCommand.Add(Field(Fields.kind, FieldTypes.string)),
SchemaCommand.Add(Field(Fields.name, FieldTypes.string)),
SchemaCommand.Add(Field(Fields.name, FieldTypes.text)),
SchemaCommand.Add(Field(Fields.slug, FieldTypes.string)),
SchemaCommand.Add(Field(Fields.repositories, FieldTypes.string).makeMultiValued),
SchemaCommand.Add(Field(Fields.visibility, FieldTypes.string)),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,8 @@ import io.renku.search.solr.documents.EntityOps.*
import io.renku.search.solr.documents.*
import io.renku.search.solr.schema.EntityDocumentSchema.Fields
import io.renku.solr.client.QueryData
import munit.CatsEffectSuite

class SearchSolrClientSpec extends CatsEffectSuite with SearchSolrSpec:
class SearchSolrClientSpec extends SearchSolrSuite:

test("be able to insert and fetch a Project document"):
withSearchSolrClient().use { client =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,26 +21,21 @@ package io.renku.search.solr.client
import cats.effect.{IO, Resource}
import io.renku.search.solr.schema.Migrations
import io.renku.solr.client.migration.SchemaMigrator
import io.renku.solr.client.util.SolrSpec
import io.renku.solr.client.{SolrClient, SolrConfig}
import io.renku.solr.client.util.SolrClientBaseSuite
import io.renku.solr.client.SolrClient

trait SearchSolrSpec extends SolrSpec:
self: munit.Suite =>
abstract class SearchSolrSuite extends SolrClientBaseSuite:

abstract class SolrFixture
extends Fixture[Resource[IO, SearchSolrClient[IO]]]("search-solr"):
def solrConfig: SolrConfig
extends Fixture[Resource[IO, SearchSolrClient[IO]]]("search-solr")

val withSearchSolrClient: SolrFixture = new SolrFixture:

def apply(): Resource[IO, SearchSolrClient[IO]] =
SolrClient[IO](solrConfig)
SolrClient[IO](solrConfig.copy(core = server.searchCoreName))
.evalTap(SchemaMigrator[IO](_).migrate(Migrations.all).attempt.void)
.map(new SearchSolrClientImpl[IO](_))

override lazy val solrConfig: SolrConfig =
self.solrConfig.copy(core = server.searchCoreName)

override def beforeAll(): Unit =
server.start()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,28 +26,22 @@ import cats.effect.IO
import cats.syntax.all.*

import io.bullet.borer.{Decoder, Reader}
import io.renku.search.LoggingConfigure
import io.renku.search.model
import io.renku.search.model.EntityType
import io.renku.search.query.Query
import io.renku.search.query.QueryGenerators
import io.renku.search.solr.SearchRole
import io.renku.search.solr.client.SearchSolrSpec
import io.renku.search.solr.client.SearchSolrSuite
import io.renku.search.solr.documents.DocumentKind
import io.renku.search.solr.schema.EntityDocumentSchema.Fields
import io.renku.search.solr.schema.Migrations
import io.renku.solr.client.migration.SchemaMigrator
import io.renku.solr.client.{QueryData, QueryString}
import munit.CatsEffectSuite
import munit.ScalaCheckEffectSuite
import org.scalacheck.Test.Parameters
import org.scalacheck.effect.PropF

class LuceneQueryInterpreterSpec
extends CatsEffectSuite
with LoggingConfigure
with ScalaCheckEffectSuite
with SearchSolrSpec:
class LuceneQueryInterpreterSpec extends SearchSolrSuite with ScalaCheckEffectSuite:

override protected lazy val coreName: String = server.testCoreName2

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ package io.renku.solr.client.schema
final case class Analyzer(
tokenizer: Tokenizer,
`type`: Analyzer.AnalyzerType = Analyzer.AnalyzerType.None,
filter: Seq[Filter] = Nil
filters: Seq[Filter] = Nil
)

object Analyzer:
Expand All @@ -36,4 +36,17 @@ object Analyzer:
def index(tokenizer: Tokenizer, filters: Filter*): Analyzer =
Analyzer(tokenizer, AnalyzerType.Index, filters)

val classic: Analyzer = Analyzer(Tokenizer.classic, filter = List(Filter.classic))
def query(tokenizer: Tokenizer, filters: Filter*): Analyzer =
Analyzer(tokenizer, AnalyzerType.Query, filters)

val classic: Analyzer = Analyzer(Tokenizer.classic, filters = List(Filter.classic))

val defaultSearch: Analyzer = Analyzer(
tokenizer = Tokenizer.uax29UrlEmail,
filters = Seq(
Filter.lowercase,
Filter.stop,
Filter.englishMinimalStem,
Filter.asciiFolding
)
)
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,10 @@ package io.renku.solr.client.schema
final case class Filter(name: String)

object Filter:
val asciiFolding: Filter = Filter("asciiFolding")
val lowercase: Filter = Filter("lowercase")
val stop: Filter = Filter("stop")
val englishPorter: Filter = Filter("englishPorter")
val englishMinimalStem: Filter = Filter("englishMinimalStem")
val classic: Filter = Filter("classic")
val daitchMokotoffSoundex: Filter = Filter("daitchMokotoffSoundex")
val doubleMetaphone: Filter = Filter("doubleMetaphone")
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,9 @@ object Tokenizer:
val standard: Tokenizer = Tokenizer("standard")
val whitespace: Tokenizer = Tokenizer("whitespace")
val classic: Tokenizer = Tokenizer("classic")

// https://solr.apache.org/guide/solr/latest/indexing-guide/tokenizers.html#uax29-url-email-tokenizer
val uax29UrlEmail: Tokenizer = Tokenizer("uax29UrlEmail")

val icu: Tokenizer = Tokenizer("icu")
val openNlp: Tokenizer = Tokenizer("openNlp")
Loading

0 comments on commit b56f66c

Please sign in to comment.