Skip to content

Commit

Permalink
Merge pull request #76 from SwissDataScienceCenter/more-fuzzy-search
Browse files Browse the repository at this point in the history
chore: Enable fuzzy search for catch-all field
  • Loading branch information
eikek authored Mar 27, 2024
2 parents b56f66c + 75d7a84 commit 671de58
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 27 deletions.
2 changes: 2 additions & 0 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
RS_SOLR_HOST = "rsdev-cnt";
RS_SOLR_URL = "http://rsdev-cnt:8983/solr";
RS_SOLR_CORE = "rsdev-test";
RS_SOLR_DEFAULT_COMMIT_WITHIN = "1 seconds";
RS_REDIS_HOST = "rsdev-cnt";
RS_REDIS_PORT = "6379";
RS_CONTAINER = "rsdev";
Expand All @@ -93,6 +94,7 @@
RS_SOLR_PORT = "18983";
RS_SOLR_URL = "http://localhost:18983/solr";
RS_SOLR_CORE = "rsdev-test";
RS_SOLR_DEFAULT_COMMIT_WITHIN = "1 seconds";
RS_REDIS_HOST = "localhost";
RS_REDIS_PORT = "16379";
VM_SSH_PORT = "10022";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ object SolrToken:
case Comparison.LowerThan => "<"

def contentAll(text: String): SolrToken =
s"${SolrField.contentAll.name}:${StringEscape.queryChars(text)}"
val terms: Seq[SolrToken] = text.split("\\s+").map(_.trim).toSeq
s"${SolrField.contentAll.name}:${terms.fuzzy}"

def orFieldIs(field: FieldName, values: NonEmptyList[SolrToken]): SolrToken =
values.map(fieldIs(field, _)).toList.foldOr
Expand Down Expand Up @@ -129,3 +130,7 @@ object SolrToken:
if (self.sizeIs <= 1) all else s"($all)"
def foldOr: SolrToken = foldM(using orMonoid)
def foldAnd: SolrToken = foldM(using andMonoid)
def fuzzy: SolrToken =
if (self.isEmpty) SolrToken.empty
else if (self.tail.isEmpty) s"${StringEscape.queryChars(self.head)}~"
else self.map(StringEscape.queryChars).map(e => s"$e~").mkString("(", " ", ")")
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,15 @@ class LuceneQueryInterpreterSpec extends SearchSolrSuite with ScalaCheckEffectSu
test("amend query with auth data"):
assertEquals(
query("help", SearchRole.user(model.Id("13"))).query,
"((content_all:help) AND (visibility:public OR owners:13 OR members:13) AND _kind:fullentity)"
"((content_all:help~) AND (visibility:public OR owners:13 OR members:13) AND _kind:fullentity)"
)
assertEquals(
query("help", SearchRole.Anonymous).query,
"((content_all:help) AND visibility:public AND _kind:fullentity)"
"((content_all:help~) AND visibility:public AND _kind:fullentity)"
)
assertEquals(
query("help", SearchRole.Admin).query,
"(content_all:help AND _kind:fullentity)"
"(content_all:help~ AND _kind:fullentity)"
)

test("amend empty query with auth data"):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ class SearchCaseInsensitiveSpec extends SolrClientBaseSuite:
)

private def truncate(client: SolrClient[IO]): IO[Unit] =
truncateAll(client)(
truncateQuery(client)(
SearchCaseInsensitiveSpec.idQuery,
Seq(FieldName("my_name"), FieldName("currentSchemaVersion")),
Seq(TypeName("my_text_field"))
)
Expand All @@ -51,42 +52,46 @@ class SearchCaseInsensitiveSpec extends SolrClientBaseSuite:

// find pogacar without this Č character
r1 <- client.query[TestData](QueryString("my_name:pogacar"))
_ = assertEquals(r1.responseBody.docs.head, TestData.get("11"))
_ = assertEquals(r1.responseBody.docs.head, TestData.get(11))
// find pogi with that Č character
r2 <- client.query[TestData](QueryString("my_name:POGAČAR"))
_ = assertEquals(r2.responseBody.docs.head, TestData.get("11"))
_ = assertEquals(r2.responseBody.docs.head, TestData.get(11))
// find with umlaut
r3 <- client.query[TestData](QueryString("my_name:über"))
_ = assertEquals(r3.responseBody.docs.head, TestData.get("31"))
_ = assertEquals(r3.responseBody.docs.head, TestData.get(31))
// find without umlaut
r4 <- client.query[TestData](QueryString("my_name:uber"))
_ = assertEquals(r4.responseBody.docs.head, TestData.get("31"))
_ = assertEquals(r4.responseBody.docs.head, TestData.get(31))
} yield ()
}

object SearchCaseInsensitiveSpec:
def idQuery: String = s"id:${getClass.getSimpleName}*"
def id(num: Int): String = s"${getClass.getSimpleName}_$num"

final case class TestData(id: String, @key("my_name") name: String)
object TestData:
val sample = Seq(
TestData("1", "Eddy MERCKX"),
TestData("2", "Alejandro VALVERDE"),
TestData("3", "Sean KELLY"),
TestData("4", "Gino BARTALI"),
TestData("5", "Francesco MOSER"),
TestData("11", "Tadej POGAČAR"),
TestData("12", "Jasper PHILIPSEN"),
TestData("13", "Mads PEDERSEN"),
TestData("14", "Juan AYUSO PESQUERA"),
TestData("15", "Matteo JORGENSON"),
TestData("21", "uae_team_emirates"),
TestData("22", "team_visma_lease_a_bike"),
TestData("23", "lidl_trek"),
TestData("31", "Über den Wolken"),
TestData("32", "thé café")
TestData(id(1), "Eddy MERCKX"),
TestData(id(2), "Alejandro VALVERDE"),
TestData(id(3), "Sean KELLY"),
TestData(id(4), "Gino BARTALI"),
TestData(id(5), "Francesco MOSER"),
TestData(id(11), "Tadej POGAČAR"),
TestData(id(12), "Jasper PHILIPSEN"),
TestData(id(13), "Mads PEDERSEN"),
TestData(id(14), "Juan AYUSO PESQUERA"),
TestData(id(15), "Matteo JORGENSON"),
TestData(id(21), "uae_team_emirates"),
TestData(id(22), "team_visma_lease_a_bike"),
TestData(id(23), "lidl_trek"),
TestData(id(31), "Über den Wolken"),
TestData(id(32), "thé café")
)
def get(id: String): TestData =
sample.find(_.id == id).getOrElse(sys.error(s"Literal test data not found: $id"))
def get(num: Int): TestData =
sample
.find(_.id == id(num))
.getOrElse(sys.error(s"Literal test data not found: $id"))

given Encoder[TestData] = MapBasedCodecs.deriveEncoder
given Decoder[TestData] = MapBasedCodecs.deriveDecoder
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,13 @@ trait SolrTruncate {
def truncateAll(
client: SolrClient[IO]
)(fields: Seq[FieldName], types: Seq[TypeName]): IO[Unit] =
truncateQuery(client)("*:*", fields, types)

def truncateQuery(
client: SolrClient[IO]
)(query: String, fields: Seq[FieldName], types: Seq[TypeName]): IO[Unit] =
for {
_ <- client.delete(QueryString("*:*"))
_ <- client.delete(QueryString(query))
_ <- fields
.map(SchemaCommand.DeleteField.apply)
.traverse_(modifyIgnoreError(client))
Expand Down
5 changes: 5 additions & 0 deletions nix/dev-scripts.nix
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
inherit system;
};

redis-port-forward = devshell-tools.lib.installScript {
script = ./scripts/redis-port-forward;
inherit system;
};

k8s-reprovision = devshell-tools.lib.installScript {
script = ./scripts/k8s-reprovision;
inherit system;
Expand Down
35 changes: 35 additions & 0 deletions nix/scripts/redis-port-forward
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env bash

set -e

port=${1:-26379}
if [ -z "$RENKU_ENV" ]; then
echo "Please set RENKU_ENV environment variable to the deployment"
echo "environment you want to run against"
exit 1
fi

get_pod_name() {
local pat="$1"
kubectl -n $RENKU_ENV get pods -o json|jq -r '.items[]|.metadata.name'|grep "$pat"
}

get_redis_pod() {
pod_name=$(get_pod_name redis-node-0)
echo $pod_name
}

get_redis_secret() {
local pod_name="$1"

local redis_secret=$(kubectl -n $RENKU_ENV get secrets -o json | jq -r '.items[]|select(.metadata.name == "redis-secret")|.data."redis-password"|@base64d')

echo "*** redis-secret ***"
echo "secret: $redis_secret"
echo "url: localhost:$port"
echo "example: redis-cli -e --json -h localhost -p $port -n 3 -a $redis_secret"
}

pod="$(get_redis_pod)"
get_redis_secret "$pod"
kubectl -n $RENKU_ENV port-forward $pod $port:6379

0 comments on commit 671de58

Please sign in to comment.