From b08a4a3664fc5cb7c4674049bc01d9a8b9a59492 Mon Sep 17 00:00:00 2001 From: eikek <701128+eikek@users.noreply.github.com> Date: Tue, 27 Feb 2024 17:16:08 +0100 Subject: [PATCH] feat: Convert user query to solr (#33) - Convert the user query from what is currently supported into a solr (standard) query. - Adds solrs `score` field to the response - Allow to pass custom sorting - Allow to filter by entity type --- build.sbt | 3 +- flake.nix | 4 + .../io/renku/search/model/EntityType.scala | 40 +++++ .../renku/search/model/CommonGenerators.scala | 29 ++++ .../io/renku/search/api/HttpApplication.scala | 4 +- .../io/renku/search/api/SearchApiImpl.scala | 2 +- .../renku/search/api/data/SearchEntity.scala | 4 +- .../io/renku/search/api/tapir/Params.scala | 2 +- .../io/renku/search/api/SearchApiSpec.scala | 5 +- .../search/provision/SearchProvisioner.scala | 2 +- .../provision/SearchProvisionerSpec.scala | 7 +- modules/search-query-docs/docs/manual.md | 46 ++++++ .../io/renku/search/query/DateTimeRef.scala | 25 +++ .../io/renku/search/query/EntityType.scala | 17 ++ .../scala/io/renku/search/query/Field.scala | 1 + .../io/renku/search/query/FieldTerm.scala | 6 + .../scala/io/renku/search/query/Order.scala | 74 +++++++++ .../scala/io/renku/search/query/Query.scala | 5 + .../io/renku/search/query/SortableField.scala | 42 +++++ .../search/query/json/QueryJsonCodec.scala | 18 +++ .../search/query/parse/QueryParser.scala | 59 ++++++- .../renku/search/query/parse/QueryUtil.scala | 8 +- .../renku/search/query/DateTimeRefSpec.scala | 79 +++++++++ .../renku/search/query/QueryGenerators.scala | 33 ++-- .../search/query/parse/QueryParserSpec.scala | 16 +- .../search/solr/client/QueryInterpreter.scala | 65 -------- .../solr/client/SearchSolrClientImpl.scala | 15 +- .../renku/search/solr/documents/Project.scala | 3 +- .../io/renku/search/solr/query/Context.scala | 45 ++++++ .../solr/query/LuceneQueryEncoders.scala | 151 ++++++++++++++++++ .../solr/query/LuceneQueryInterpreter.scala | 40 +++++ .../search/solr/query/QueryInterpreter.scala | 32 ++++ .../renku/search/solr/query/SolrQuery.scala | 42 +++++ .../search/solr/query/SolrSortCreate.scala | 43 +++++ .../renku/search/solr/query/SolrToken.scala | 132 +++++++++++++++ .../search/solr/query/SolrTokenEncoder.scala | 91 +++++++++++ .../solr/schema/EntityDocumentSchema.scala | 2 + .../solr/client/SearchSolrClientSpec.scala | 5 +- .../solr/query/LuceneQueryEncoderSpec.scala | 103 ++++++++++++ .../io/renku/solr/client/QueryData.scala | 13 +- .../scala/io/renku/solr/client/SolrSort.scala | 58 +++++++ nix/scripts/redis-push | 2 +- project/Dependencies.scala | 5 - 43 files changed, 1262 insertions(+), 116 deletions(-) create mode 100644 modules/commons/src/main/scala/io/renku/search/model/EntityType.scala create mode 100644 modules/commons/src/test/scala/io/renku/search/model/CommonGenerators.scala create mode 100644 modules/search-query/src/main/scala/io/renku/search/query/EntityType.scala create mode 100644 modules/search-query/src/main/scala/io/renku/search/query/Order.scala create mode 100644 modules/search-query/src/main/scala/io/renku/search/query/SortableField.scala create mode 100644 modules/search-query/src/test/scala/io/renku/search/query/DateTimeRefSpec.scala delete mode 100644 modules/search-solr-client/src/main/scala/io/renku/search/solr/client/QueryInterpreter.scala create mode 100644 modules/search-solr-client/src/main/scala/io/renku/search/solr/query/Context.scala create mode 100644 modules/search-solr-client/src/main/scala/io/renku/search/solr/query/LuceneQueryEncoders.scala create mode 100644 modules/search-solr-client/src/main/scala/io/renku/search/solr/query/LuceneQueryInterpreter.scala create mode 100644 modules/search-solr-client/src/main/scala/io/renku/search/solr/query/QueryInterpreter.scala create mode 100644 modules/search-solr-client/src/main/scala/io/renku/search/solr/query/SolrQuery.scala create mode 100644 modules/search-solr-client/src/main/scala/io/renku/search/solr/query/SolrSortCreate.scala create mode 100644 modules/search-solr-client/src/main/scala/io/renku/search/solr/query/SolrToken.scala create mode 100644 modules/search-solr-client/src/main/scala/io/renku/search/solr/query/SolrTokenEncoder.scala create mode 100644 modules/search-solr-client/src/test/scala/io/renku/search/solr/query/LuceneQueryEncoderSpec.scala create mode 100644 modules/solr-client/src/main/scala/io/renku/solr/client/SolrSort.scala diff --git a/build.sbt b/build.sbt index 36bbc356..9dbcd069 100644 --- a/build.sbt +++ b/build.sbt @@ -197,8 +197,7 @@ lazy val searchSolrClient = project name := "search-solr-client", libraryDependencies ++= Dependencies.catsCore ++ - Dependencies.catsEffect ++ - Dependencies.luceneQueryParser + Dependencies.catsEffect ) .dependsOn( avroCodec % "compile->compile;test->test", diff --git a/flake.nix b/flake.nix index a249a506..b119a6f3 100644 --- a/flake.nix +++ b/flake.nix @@ -81,6 +81,8 @@ with selfPkgs; [ redis jq + sbt + scala-cli redis-push recreate-container @@ -107,6 +109,8 @@ with selfPkgs; [ redis jq + sbt + scala-cli redis-push vm-build diff --git a/modules/commons/src/main/scala/io/renku/search/model/EntityType.scala b/modules/commons/src/main/scala/io/renku/search/model/EntityType.scala new file mode 100644 index 00000000..3bc87e3c --- /dev/null +++ b/modules/commons/src/main/scala/io/renku/search/model/EntityType.scala @@ -0,0 +1,40 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.renku.search.model + +import io.bullet.borer.Encoder +import io.bullet.borer.Decoder + +enum EntityType: + case Project + case User + + def name: String = productPrefix.toLowerCase + +object EntityType: + def fromString(str: String): Either[String, EntityType] = + EntityType.values + .find(_.name.equalsIgnoreCase(str)) + .toRight(s"Invalid entity type: $str") + + def unsafeFromString(str: String): EntityType = + fromString(str).fold(sys.error, identity) + + given Encoder[EntityType] = Encoder.forString.contramap(_.name) + given Decoder[EntityType] = Decoder.forString.mapEither(fromString) diff --git a/modules/commons/src/test/scala/io/renku/search/model/CommonGenerators.scala b/modules/commons/src/test/scala/io/renku/search/model/CommonGenerators.scala new file mode 100644 index 00000000..5ab2314c --- /dev/null +++ b/modules/commons/src/test/scala/io/renku/search/model/CommonGenerators.scala @@ -0,0 +1,29 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.renku.search.model + +import cats.data.NonEmptyList +import org.scalacheck.Gen + +object CommonGenerators: + def nelOfN[A](n: Int, gen: Gen[A]): Gen[NonEmptyList[A]] = + for { + e0 <- gen + en <- Gen.listOfN(n - 1, gen) + } yield NonEmptyList(e0, en) diff --git a/modules/search-api/src/main/scala/io/renku/search/api/HttpApplication.scala b/modules/search-api/src/main/scala/io/renku/search/api/HttpApplication.scala index 877e97e1..b7474d87 100644 --- a/modules/search-api/src/main/scala/io/renku/search/api/HttpApplication.scala +++ b/modules/search-api/src/main/scala/io/renku/search/api/HttpApplication.scala @@ -37,8 +37,8 @@ final class HttpApplication[F[_]: Async](searchApi: SearchApi[F]) extends Http4s private val prefix = "/search" - private val search = new SearchRoutes[F](searchApi) - private val openapi = new OpenApiRoute[F](prefix, "Renku Search API", search.endpoints) + private val search = SearchRoutes[F](searchApi) + private val openapi = OpenApiRoute[F](prefix, "Renku Search API", search.endpoints) lazy val router: HttpApp[F] = Router[F]( diff --git a/modules/search-api/src/main/scala/io/renku/search/api/SearchApiImpl.scala b/modules/search-api/src/main/scala/io/renku/search/api/SearchApiImpl.scala index 7daaa49a..feb280fb 100644 --- a/modules/search-api/src/main/scala/io/renku/search/api/SearchApiImpl.scala +++ b/modules/search-api/src/main/scala/io/renku/search/api/SearchApiImpl.scala @@ -47,7 +47,7 @@ private class SearchApiImpl[F[_]: Async](solrClient: SearchSolrClient[F]) phrase: String ): Throwable => F[Either[String, SearchResult]] = err => - val message = s"Finding by '$phrase' phrase failed" + val message = s"Finding by '$phrase' phrase failed: ${err.getMessage}" Scribe[F] .error(message, err) .as(message) diff --git a/modules/search-api/src/main/scala/io/renku/search/api/data/SearchEntity.scala b/modules/search-api/src/main/scala/io/renku/search/api/data/SearchEntity.scala index 4a457c1b..3612fc0d 100644 --- a/modules/search-api/src/main/scala/io/renku/search/api/data/SearchEntity.scala +++ b/modules/search-api/src/main/scala/io/renku/search/api/data/SearchEntity.scala @@ -20,6 +20,7 @@ package io.renku.search.api.data import io.bullet.borer.derivation.MapBasedCodecs.{deriveAllCodecs, deriveCodec} import io.bullet.borer.{AdtEncodingStrategy, Codec, Decoder, Encoder} +import io.bullet.borer.NullOptions.given import io.renku.search.model.* import sttp.tapir.Schema.SName import sttp.tapir.SchemaType.{SDateTime, SProductField} @@ -37,7 +38,8 @@ final case class Project( description: Option[projects.Description] = None, createdBy: User, creationDate: projects.CreationDate, - members: Seq[User] + members: Seq[User], + score: Option[Double] = None ) extends SearchEntity object Project: diff --git a/modules/search-api/src/main/scala/io/renku/search/api/tapir/Params.scala b/modules/search-api/src/main/scala/io/renku/search/api/tapir/Params.scala index c851fa54..7f4c62bb 100644 --- a/modules/search-api/src/main/scala/io/renku/search/api/tapir/Params.scala +++ b/modules/search-api/src/main/scala/io/renku/search/api/tapir/Params.scala @@ -42,7 +42,7 @@ object Params extends TapirCodecs with TapirBorerJson { .validate(Validator.max(100)) .default(PageDef.default.limit) - (page / perPage).map(PageDef.fromPage.tupled)(Tuple.fromProductTyped) + (page / perPage).map(PageDef.fromPage.tupled)(p => (p.page, p.limit)) } val queryInput: EndpointInput[QueryInput] = query.and(pageDef).mapTo[QueryInput] diff --git a/modules/search-api/src/test/scala/io/renku/search/api/SearchApiSpec.scala b/modules/search-api/src/test/scala/io/renku/search/api/SearchApiSpec.scala index 061f2f5f..f78abd2c 100644 --- a/modules/search-api/src/test/scala/io/renku/search/api/SearchApiSpec.scala +++ b/modules/search-api/src/test/scala/io/renku/search/api/SearchApiSpec.scala @@ -43,9 +43,12 @@ class SearchApiSpec extends CatsEffectSuite with SearchSolrSpec: results <- searchApi .query(mkQuery("matching")) .map(_.fold(err => fail(s"Calling Search API failed with $err"), identity)) - } yield assert(results.items contains toApiProject(project1)) + } yield assert(results.items.map(scoreToNone) contains toApiProject(project1)) } + private def scoreToNone(e: SearchEntity): SearchEntity = e match + case p: Project => p.copy(score = None) + private def mkQuery(phrase: String): QueryInput = QueryInput.pageOne(Query.parse(phrase).fold(sys.error, identity)) diff --git a/modules/search-provision/src/main/scala/io/renku/search/provision/SearchProvisioner.scala b/modules/search-provision/src/main/scala/io/renku/search/provision/SearchProvisioner.scala index 83c3d6d9..4889fdae 100644 --- a/modules/search-provision/src/main/scala/io/renku/search/provision/SearchProvisioner.scala +++ b/modules/search-provision/src/main/scala/io/renku/search/provision/SearchProvisioner.scala @@ -132,7 +132,7 @@ private class SearchProvisionerImpl[F[_]: Async]( (from: v1.Visibility) => projects.Visibility.unsafeFromString(from.name()) private lazy val toSolrDocuments: Seq[ProjectCreated] => Seq[Project] = - _.map(_.to[Project]) + _.map(_.into[Project].transform(Field.default(_.score))) private def markProcessedOnFailure( message: QueueMessage, diff --git a/modules/search-provision/src/test/scala/io/renku/search/provision/SearchProvisionerSpec.scala b/modules/search-provision/src/test/scala/io/renku/search/provision/SearchProvisionerSpec.scala index 825a6349..5ddb89a6 100644 --- a/modules/search-provision/src/test/scala/io/renku/search/provision/SearchProvisionerSpec.scala +++ b/modules/search-provision/src/test/scala/io/renku/search/provision/SearchProvisionerSpec.scala @@ -66,7 +66,7 @@ class SearchProvisionerSpec extends CatsEffectSuite with QueueSpec with SearchSo .awakeEvery[IO](500 millis) .evalMap(_ => solrClient.findProjects("*")) .flatMap(Stream.emits(_)) - .evalMap(d => solrDocs.update(_ + d)) + .evalMap(d => solrDocs.update(_ + d.copy(score = None))) .compile .drain .start @@ -103,7 +103,7 @@ class SearchProvisionerSpec extends CatsEffectSuite with QueueSpec with SearchSo .evalMap(_ => solrClient.findProjects("*")) .flatMap(Stream.emits(_)) .evalTap(IO.println) - .evalMap(d => solrDocs.update(_ + d)) + .evalMap(d => solrDocs.update(_ + d.copy(score = None))) .compile .drain .start @@ -125,7 +125,8 @@ class SearchProvisionerSpec extends CatsEffectSuite with QueueSpec with SearchSo Field.computed( _.visibility, pc => projects.Visibility.unsafeFromString(pc.visibility.name()) - ) + ), + Field.default(_.score) ) override def munitFixtures: Seq[Fixture[_]] = diff --git a/modules/search-query-docs/docs/manual.md b/modules/search-query-docs/docs/manual.md index 5606a5e4..eb70e5c8 100644 --- a/modules/search-query-docs/docs/manual.md +++ b/modules/search-query-docs/docs/manual.md @@ -73,6 +73,7 @@ Multiple alternative values can be given as a comma separated list. The following fields are available: ```scala mdoc:passthrough +import io.renku.search.model.EntityType import io.renku.search.query.* println(Field.values.map(e => s"`${e.name}`").mkString("- ", "\n- ", "")) ``` @@ -81,6 +82,22 @@ Each field allows to specify one or more values, separated by comma. The value must be separated by a `:`. For date fields, additional `<` and `>` is supported. +### EntityTypes + +The field `type` allows to search for specific entity types. If it is +missing, all entity types are included in the result. Entity types are: + +```scala mdoc:passthrough +println( + EntityType.values.map(e => s"`${e.name}`").mkString("- ", "\n- ", "") +) +``` + +Example: +```scala mdoc:passthrough +println(s" `${Field.Type.name}:${EntityType.Project.name}`") +``` + ### Dates Date fields, like @@ -159,3 +176,32 @@ created:2023-03,2023-06 ``` The above means to match entities created in March 2023 or June 2023. + +## Sorting + +The query allows to define terms for sorting. Sorting is limited to +specific fields, which are: + +```scala mdoc:passthrough +println( + SortableField.values.map(e => s"`${e.name}`").mkString("- ", "\n- ", "") +) +``` + +Sorting by a field is defined by writing the field name, followed by a +dash and the sort direction. Multiple such definitions can be +specified, using a comma separated list. Alternatively, multiple +`sort:…` terms will be combined into a single one in the order they +appear. + +Example: +```scala mdoc:passthrough +val str = Order(SortableField.Score -> Order.Direction.Desc, SortableField.Created -> Order.Direction.Asc).render +println(s"`$str`") +``` +is equivalent to +```scala mdoc:passthrough +val str1 = Order(SortableField.Score -> Order.Direction.Desc).render +val str2 = Order(SortableField.Created -> Order.Direction.Asc).render +println(s"`$str1 $str2`") +``` diff --git a/modules/search-query/src/main/scala/io/renku/search/query/DateTimeRef.scala b/modules/search-query/src/main/scala/io/renku/search/query/DateTimeRef.scala index 3b9d9387..b4a1b67c 100644 --- a/modules/search-query/src/main/scala/io/renku/search/query/DateTimeRef.scala +++ b/modules/search-query/src/main/scala/io/renku/search/query/DateTimeRef.scala @@ -21,6 +21,8 @@ package io.renku.search.query import cats.syntax.all.* import io.bullet.borer.{Decoder, Encoder} import io.renku.search.query.parse.DateTimeParser +import java.time.Instant +import java.time.ZoneId enum DateTimeRef: case Literal(ref: PartialDateTime) @@ -32,6 +34,29 @@ enum DateTimeRef: case Relative(ref) => ref.name case Calc(ref) => ref.asString + /** Resolves the date-time reference to a concrete instant using the given reference + * date. It either returns a single instant or a time range. + */ + def resolve(ref: Instant, zoneId: ZoneId): (Instant, Option[Instant]) = this match + case Relative(RelativeDate.Today) => (ref, None) + case Relative(RelativeDate.Yesterday) => + (ref.atZone(zoneId).minusDays(1).toInstant, None) + case Literal(pdate) => + val min = pdate.instantMin(zoneId) + val max = pdate.instantMax(zoneId) + (min, Some(max).filter(_ != min)) + case Calc(cdate) => + val ts = cdate.ref match + case pd: PartialDateTime => + pd.instantMin(zoneId).atZone(zoneId) + + case rd: RelativeDate => + Relative(rd).resolve(ref, zoneId)._1.atZone(zoneId) + + if (cdate.range) + (ts.minus(cdate.amount).toInstant, Some(ts.plus(cdate.amount).toInstant)) + else (ts.plus(cdate.amount).toInstant, None) + object DateTimeRef: given Encoder[DateTimeRef] = Encoder.forString.contramap(_.asString) given Decoder[DateTimeRef] = Decoder.forString.mapEither { str => diff --git a/modules/search-query/src/main/scala/io/renku/search/query/EntityType.scala b/modules/search-query/src/main/scala/io/renku/search/query/EntityType.scala new file mode 100644 index 00000000..924f8219 --- /dev/null +++ b/modules/search-query/src/main/scala/io/renku/search/query/EntityType.scala @@ -0,0 +1,17 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ diff --git a/modules/search-query/src/main/scala/io/renku/search/query/Field.scala b/modules/search-query/src/main/scala/io/renku/search/query/Field.scala index 9679d3ae..9b44a5de 100644 --- a/modules/search-query/src/main/scala/io/renku/search/query/Field.scala +++ b/modules/search-query/src/main/scala/io/renku/search/query/Field.scala @@ -27,6 +27,7 @@ enum Field: case Visibility case Created case CreatedBy + case Type val name: String = Strings.lowerFirst(productPrefix) diff --git a/modules/search-query/src/main/scala/io/renku/search/query/FieldTerm.scala b/modules/search-query/src/main/scala/io/renku/search/query/FieldTerm.scala index aee0cc5f..51ac2a4c 100644 --- a/modules/search-query/src/main/scala/io/renku/search/query/FieldTerm.scala +++ b/modules/search-query/src/main/scala/io/renku/search/query/FieldTerm.scala @@ -19,9 +19,12 @@ package io.renku.search.query import cats.data.NonEmptyList +import io.renku.search.model.EntityType import io.renku.search.model.projects.Visibility enum FieldTerm(val field: Field, val cmp: Comparison): + case TypeIs(values: NonEmptyList[EntityType]) + extends FieldTerm(Field.Type, Comparison.Is) case ProjectIdIs(values: NonEmptyList[String]) extends FieldTerm(Field.ProjectId, Comparison.Is) case NameIs(values: NonEmptyList[String]) extends FieldTerm(Field.Name, Comparison.Is) @@ -35,6 +38,9 @@ enum FieldTerm(val field: Field, val cmp: Comparison): private[query] def asString = val value = this match + case TypeIs(values) => + val ts = values.toList.distinct.map(_.name) + ts.mkString(",") case ProjectIdIs(values) => FieldTerm.nelToString(values) case NameIs(values) => FieldTerm.nelToString(values) case SlugIs(values) => FieldTerm.nelToString(values) diff --git a/modules/search-query/src/main/scala/io/renku/search/query/Order.scala b/modules/search-query/src/main/scala/io/renku/search/query/Order.scala new file mode 100644 index 00000000..18aa8418 --- /dev/null +++ b/modules/search-query/src/main/scala/io/renku/search/query/Order.scala @@ -0,0 +1,74 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.renku.search.query + +import cats.data.NonEmptyList +import cats.syntax.all.* +import io.bullet.borer.{Decoder, Encoder} +import io.renku.search.query.parse.QueryParser + +final case class Order(fields: NonEmptyList[Order.OrderedBy]): + def render: String = + s"sort:${fields.render}" + +object Order: + def apply(field: OrderedBy, fields: OrderedBy*): Order = + Order(NonEmptyList(field, fields.toList)) + + def apply( + field: (SortableField, Direction), + fields: (SortableField, Direction)* + ): Order = + Order(NonEmptyList(field, fields.toList).map(OrderedBy.apply.tupled)) + + enum Direction: + case Asc + case Desc + + def name: String = productPrefix.toLowerCase + + object Direction: + def fromString(s: String): Either[String, Direction] = + Direction.values + .find(_.name.equalsIgnoreCase(s)) + .toRight(s"Invalid sort direction: $s") + + def unsafeFromString(s: String): Direction = + fromString(s).fold(sys.error, identity) + + final case class OrderedBy( + field: SortableField, + direction: Order.Direction + ): + def render: String = s"${field.name}-${direction.name}" + + object OrderedBy: + given Encoder[OrderedBy] = Encoder.forString.contramap(_.render) + given Decoder[OrderedBy] = + Decoder.forString.mapEither(s => QueryParser.orderedBy.parseAll(s).leftMap(_.show)) + given cats.Order[OrderedBy] = cats.Order.by(_.render) + + def fromString(s: String): Either[String, Order] = + QueryParser.sortTerm.parseAll(s).leftMap(_.show) + + def unsafeFromString(s: String): Order = + fromString(s).fold(sys.error, identity) + + extension (self: NonEmptyList[OrderedBy]) + def render: String = self.map(_.render).toList.mkString(",") diff --git a/modules/search-query/src/main/scala/io/renku/search/query/Query.scala b/modules/search-query/src/main/scala/io/renku/search/query/Query.scala index 1db4de84..3de80a66 100644 --- a/modules/search-query/src/main/scala/io/renku/search/query/Query.scala +++ b/modules/search-query/src/main/scala/io/renku/search/query/Query.scala @@ -36,6 +36,7 @@ final case class Query( .map { case Query.Segment.Field(v) => v.asString case Query.Segment.Text(v) => v + case Query.Segment.Sort(v) => v.render } .mkString(" ") @@ -57,6 +58,7 @@ object Query: enum Segment: case Field(value: FieldTerm) case Text(value: String) + case Sort(value: Order) object Segment: given Monoid[Segment.Text] = @@ -68,6 +70,9 @@ object Query: else if (self.value.isEmpty) other else Segment.Text(s"${self.value} ${other.value}") + def sort(order: Order.OrderedBy, more: Order.OrderedBy*): Segment = + Segment.Sort(Order(NonEmptyList(order, more.toList))) + def text(phrase: String): Segment = Segment.Text(phrase) diff --git a/modules/search-query/src/main/scala/io/renku/search/query/SortableField.scala b/modules/search-query/src/main/scala/io/renku/search/query/SortableField.scala new file mode 100644 index 00000000..6fef6940 --- /dev/null +++ b/modules/search-query/src/main/scala/io/renku/search/query/SortableField.scala @@ -0,0 +1,42 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.renku.search.query + +import io.bullet.borer.{Decoder, Encoder} + +enum SortableField: + case Name + case Created + case Score + + val name: String = Strings.lowerFirst(productPrefix) + +object SortableField: + given Encoder[SortableField] = Encoder.forString.contramap(_.name) + given Decoder[SortableField] = Decoder.forString.mapEither(fromString) + + private[this] val allNames: String = SortableField.values.map(_.name).mkString(", ") + + def fromString(str: String): Either[String, SortableField] = + SortableField.values + .find(_.name.equalsIgnoreCase(str)) + .toRight(s"Invalid field: $str. Allowed are: $allNames") + + def unsafeFromString(str: String): SortableField = + fromString(str).fold(sys.error, identity) diff --git a/modules/search-query/src/main/scala/io/renku/search/query/json/QueryJsonCodec.scala b/modules/search-query/src/main/scala/io/renku/search/query/json/QueryJsonCodec.scala index 21239fb2..e32d9a8a 100644 --- a/modules/search-query/src/main/scala/io/renku/search/query/json/QueryJsonCodec.scala +++ b/modules/search-query/src/main/scala/io/renku/search/query/json/QueryJsonCodec.scala @@ -21,6 +21,7 @@ package io.renku.search.query.json import cats.data.NonEmptyList import io.bullet.borer.compat.cats.* import io.bullet.borer.{Decoder, Encoder, Reader, Writer} +import io.renku.search.model.EntityType import io.renku.search.model.projects.Visibility import io.renku.search.query.* import io.renku.search.query.FieldTerm.* @@ -45,15 +46,18 @@ import scala.collection.mutable.ListBuffer */ private[query] object QueryJsonCodec: private[this] val freeTextField = "_text" + private[this] val sortTextField = "_sort" enum Name: case FieldName(v: Field) + case SortName case TextName private given Decoder[Name] = new Decoder[Name]: def read(r: Reader): Name = if (r.tryReadString(freeTextField)) Name.TextName + else if (r.tryReadString(sortTextField)) Name.SortName else Decoder[Field].map(Name.FieldName.apply).read(r) private def writeNelValue[T: Encoder](w: Writer, ts: NonEmptyList[T]): w.type = @@ -62,6 +66,9 @@ private[query] object QueryJsonCodec: private def writeFieldTermValue(w: Writer, term: FieldTerm): Writer = term match + case FieldTerm.TypeIs(values) => + writeNelValue(w, values) + case FieldTerm.ProjectIdIs(values) => writeNelValue(w, values) @@ -90,6 +97,9 @@ private[query] object QueryJsonCodec: case Segment.Field(v) => w.write(v.field) writeFieldTermValue(w, v) + case Segment.Sort(v) => + w.write(sortTextField) + writeNelValue(w, v.fields) } w.writeMapClose() } @@ -103,6 +113,10 @@ private[query] object QueryJsonCodec: case Name.TextName => Segment.Text(r.readString()) + case Name.FieldName(Field.Type) => + val values = readNel[EntityType](r) + Segment.Field(TypeIs(values)) + case Name.FieldName(Field.ProjectId) => val values = readNel[String](r) Segment.Field(ProjectIdIs(values)) @@ -128,6 +142,10 @@ private[query] object QueryJsonCodec: Decoder.forTuple[(Comparison, NonEmptyList[DateTimeRef])].read(r) Segment.Field(Created(cmp, date)) + case Name.SortName => + val values = readNel[Order.OrderedBy](r) + Segment.Sort(Order(values)) + val decoder: Decoder[List[Segment]] = new Decoder[List[Segment]] { def read(r: Reader) = { diff --git a/modules/search-query/src/main/scala/io/renku/search/query/parse/QueryParser.scala b/modules/search-query/src/main/scala/io/renku/search/query/parse/QueryParser.scala index d7f54a42..0fbe5438 100644 --- a/modules/search-query/src/main/scala/io/renku/search/query/parse/QueryParser.scala +++ b/modules/search-query/src/main/scala/io/renku/search/query/parse/QueryParser.scala @@ -20,6 +20,7 @@ package io.renku.search.query.parse import cats.data.NonEmptyList import cats.parse.{Parser as P, Parser0 as P0} +import io.renku.search.model.EntityType import io.renku.search.model.projects.Visibility import io.renku.search.query.* @@ -41,6 +42,28 @@ private[query] object QueryParser { def fieldNameFrom(candidates: Set[Field]) = P.stringIn(mkFieldNames(candidates)).map(Field.unsafeFromString) + val sortableField: P[SortableField] = + P.stringIn( + SortableField.values + .map(_.name) + .toSeq ++ SortableField.values.map(_.name.toLowerCase).toSeq + ).map(SortableField.unsafeFromString) + + val sortDirection: P[Order.Direction] = + P.stringIn( + Order.Direction.values + .map(_.name) + .toSeq ++ Order.Direction.values.map(_.name.toLowerCase).toSeq + ).map(Order.Direction.unsafeFromString) + + val orderedBy: P[Order.OrderedBy] = + (sortableField ~ (P.string("-") *> sortDirection)).map { case (f, s) => + Order.OrderedBy(f, s) + } + + val orderedByNel: P[NonEmptyList[Order.OrderedBy]] = + nelOf(orderedBy, commaSep).map(_.distinct) + val comparison: P[Comparison] = P.stringIn(Comparison.values.map(_.asString)).map(Comparison.unsafeFromString) @@ -48,6 +71,9 @@ private[query] object QueryParser { val gt: P[Unit] = P.string(Comparison.GreaterThan.asString) val lt: P[Unit] = P.string(Comparison.LowerThan.asString) + val sortTerm: P[Order] = + (P.string("sort").with1 *> (is *> orderedByNel)).map(Order.apply) + val visibility: P[Visibility] = P.stringIn( Visibility.values @@ -64,19 +90,35 @@ private[query] object QueryParser { val visibilities: P[NonEmptyList[Visibility]] = nelOf(visibility, commaSep) + val entityType: P[EntityType] = + P.stringIn( + EntityType.values + .map(_.name.toLowerCase) + .toSet ++ EntityType.values.map(_.name).toSet + ).map(EntityType.unsafeFromString) + + val entityTypes: P[NonEmptyList[EntityType]] = + nelOf(entityType, commaSep) + val termIs: P[FieldTerm] = { - val field = fieldNameFrom(Field.values.toSet - Field.Created - Field.Visibility) + val field = fieldNameFrom( + Field.values.toSet - Field.Created - Field.Visibility - Field.Type + ) ((field <* is) ~ values).map { case (f, v) => f match - case Field.Name => FieldTerm.NameIs(v) - case Field.ProjectId => FieldTerm.ProjectIdIs(v) - case Field.Slug => FieldTerm.SlugIs(v) - case Field.CreatedBy => FieldTerm.CreatedByIs(v) - case Field.Visibility => sys.error("visibility not allowed") - case Field.Created => sys.error("created not allowed") + case Field.Name => FieldTerm.NameIs(v) + case Field.ProjectId => FieldTerm.ProjectIdIs(v) + case Field.Slug => FieldTerm.SlugIs(v) + case Field.CreatedBy => FieldTerm.CreatedByIs(v) + // other fields are excluded from the field list above + case f => sys.error(s"$f not allowed") } } + val typeIs: P[FieldTerm] = + val field = fieldNameFrom(Set(Field.Type)) + ((field ~ is).void *> entityTypes).map(v => FieldTerm.TypeIs(v)) + val visibilityIs: P[FieldTerm] = { val field = fieldNameFrom(Set(Field.Visibility)) ((field ~ is).void *> visibilities).map(v => FieldTerm.VisibilityIs(v)) @@ -90,13 +132,14 @@ private[query] object QueryParser { } } - val fieldTerm: P[FieldTerm] = termIs | visibilityIs | created + val fieldTerm: P[FieldTerm] = termIs | visibilityIs | typeIs | created val freeText: P[String] = P.charsWhile(c => !c.isWhitespace) val segment: P[Query.Segment] = fieldTerm.map(Query.Segment.Field.apply) | + sortTerm.map(Query.Segment.Sort.apply) | freeText.map(Query.Segment.Text.apply) val query: P[Query] = diff --git a/modules/search-query/src/main/scala/io/renku/search/query/parse/QueryUtil.scala b/modules/search-query/src/main/scala/io/renku/search/query/parse/QueryUtil.scala index 5e249720..dc553509 100644 --- a/modules/search-query/src/main/scala/io/renku/search/query/parse/QueryUtil.scala +++ b/modules/search-query/src/main/scala/io/renku/search/query/parse/QueryUtil.scala @@ -40,11 +40,11 @@ private[query] object QueryUtil { case (t1: Segment.Text, tc) => loop(rest, tc |+| Some(t1), result) - case (f: Segment.Field, Some(tc)) => - loop(rest, None, f :: tc :: result) + case t @ ((_: Segment.Field) | (_: Segment.Sort), Some(tc)) => + loop(rest, None, t._1 :: tc :: result) - case (f: Segment.Field, None) => - loop(rest, None, f :: result) + case t @ ((_: Segment.Field) | (_: Segment.Sort), None) => + loop(rest, None, t._1 :: result) case Nil => (curr.toList ::: result).reverse diff --git a/modules/search-query/src/test/scala/io/renku/search/query/DateTimeRefSpec.scala b/modules/search-query/src/test/scala/io/renku/search/query/DateTimeRefSpec.scala new file mode 100644 index 00000000..0a334a42 --- /dev/null +++ b/modules/search-query/src/test/scala/io/renku/search/query/DateTimeRefSpec.scala @@ -0,0 +1,79 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.renku.search.query + +import cats.syntax.all.* +import munit.FunSuite +import java.time.Instant +import java.time.ZoneId +import java.time.Period + +class DateTimeRefSpec extends FunSuite: + + val refDate: Instant = Instant.parse("2024-02-27T15:34:55Z") + val utc: ZoneId = ZoneId.of("UTC") + + test("resolve relative dates") { + assertEquals( + RelativeDate.Today.resolve, + (refDate, None) + ) + assertEquals( + DateTimeRef.Relative(RelativeDate.Yesterday).resolve(refDate, utc), + (refDate.atZone(utc).minusDays(1).toInstant(), None) + ) + } + + test("resolve partial date") { + val may = PartialDateTime.unsafeFromString("2023-05") + assertEquals( + may.resolve, + (may.instantMin(utc), may.instantMax(utc).some) + ) + val exact = PartialDateTime.fromInstant(Instant.EPOCH) + assertEquals(exact.resolve, (exact.instantMin(utc), None)) + } + + test("resolve date calc") { + val may = PartialDateTime.unsafeFromString("2023-05") + val calc1 = DateTimeCalc(may, Period.ofDays(5), false) + assertEquals( + calc1.resolve, + (may.instantMin(utc).atZone(utc).plusDays(5).toInstant(), None) + ) + + val calc2 = DateTimeCalc(may, Period.ofDays(-5), false) + assertEquals( + calc2.resolve, + (may.instantMin(utc).atZone(utc).minusDays(5).toInstant(), None) + ) + + val range = DateTimeCalc(may, Period.ofDays(5), true) + assertEquals( + range.resolve, + ( + may.instantMin(utc).atZone(utc).minusDays(5).toInstant(), + may.instantMin(utc).atZone(utc).plusDays(5).toInstant().some + ) + ) + } + + extension (r: RelativeDate) def resolve = DateTimeRef(r).resolve(refDate, utc) + extension (d: PartialDateTime) def resolve = DateTimeRef(d).resolve(refDate, utc) + extension (r: DateTimeCalc) def resolve = DateTimeRef(r).resolve(refDate, utc) diff --git a/modules/search-query/src/test/scala/io/renku/search/query/QueryGenerators.scala b/modules/search-query/src/test/scala/io/renku/search/query/QueryGenerators.scala index 6c016caa..9a2401d1 100644 --- a/modules/search-query/src/test/scala/io/renku/search/query/QueryGenerators.scala +++ b/modules/search-query/src/test/scala/io/renku/search/query/QueryGenerators.scala @@ -20,6 +20,7 @@ package io.renku.search.query import cats.data.NonEmptyList import cats.syntax.all.* +import io.renku.search.model.{CommonGenerators, ModelGenerators} import io.renku.search.model.projects.Visibility import io.renku.search.query.parse.QueryUtil import org.scalacheck.Gen @@ -77,16 +78,17 @@ object QueryGenerators: val field: Gen[Field] = Gen.oneOf(Field.values.toSeq) - // TODO move to commons - val visibility: Gen[Visibility] = - Gen.oneOf(Visibility.values.toSeq) + val sortableField: Gen[SortableField] = + Gen.oneOf(SortableField.values.toSeq) - // TODO move to commons - def nelOfN[A](n: Int, gen: Gen[A]): Gen[NonEmptyList[A]] = + val sortDirection: Gen[Order.Direction] = + Gen.oneOf(Order.Direction.values.toSeq) + + val orderedBy: Gen[Order.OrderedBy] = for { - e0 <- gen - en <- Gen.listOfN(n - 1, gen) - } yield NonEmptyList(e0, en) + field <- sortableField + dir <- sortDirection + } yield Order.OrderedBy(field, dir) private val alphaNumChars = ('a' to 'z') ++ ('A' to 'Z') ++ ('0' to '9') private val simpleWord: Gen[String] = { @@ -109,7 +111,7 @@ object QueryGenerators: } private val stringValues: Gen[NonEmptyList[String]] = - Gen.choose(1, 4).flatMap(n => nelOfN(n, phrase)) + Gen.choose(1, 4).flatMap(n => CommonGenerators.nelOfN(n, phrase)) val projectIdTerm: Gen[FieldTerm] = stringValues.map(FieldTerm.ProjectIdIs(_)) @@ -125,7 +127,10 @@ object QueryGenerators: val visibilityTerm: Gen[FieldTerm] = Gen - .frequency(10 -> visibility.map(NonEmptyList.one), 1 -> nelOfN(2, visibility)) + .frequency( + 10 -> ModelGenerators.visibilityGen.map(NonEmptyList.one), + 1 -> CommonGenerators.nelOfN(2, ModelGenerators.visibilityGen) + ) .map(vs => FieldTerm.VisibilityIs(vs.distinct)) private val comparison: Gen[Comparison] = @@ -135,7 +140,7 @@ object QueryGenerators: for { cmp <- comparison len <- Gen.frequency(5 -> Gen.const(1), 1 -> Gen.choose(1, 3)) - pd <- nelOfN(len, dateTimeRef) + pd <- CommonGenerators.nelOfN(len, dateTimeRef) } yield FieldTerm.Created(cmp, pd) val fieldTerm: Gen[FieldTerm] = @@ -153,9 +158,15 @@ object QueryGenerators: Gen.listOfN(len, phrase).map(_.mkString(" ")) } + val sortTerm: Gen[Order] = + Gen.choose(1, 5).flatMap { len => + CommonGenerators.nelOfN(len, orderedBy).map(_.distinct).map(Order.apply) + } + val segment: Gen[Query.Segment] = Gen.oneOf( fieldTerm.map(Query.Segment.Field.apply), + sortTerm.map(Query.Segment.Sort.apply), freeText.map(Query.Segment.Text.apply) ) diff --git a/modules/search-query/src/test/scala/io/renku/search/query/parse/QueryParserSpec.scala b/modules/search-query/src/test/scala/io/renku/search/query/parse/QueryParserSpec.scala index 2c263f4a..01911ec9 100644 --- a/modules/search-query/src/test/scala/io/renku/search/query/parse/QueryParserSpec.scala +++ b/modules/search-query/src/test/scala/io/renku/search/query/parse/QueryParserSpec.scala @@ -24,9 +24,22 @@ import io.renku.search.query.Query.Segment import io.renku.search.query.Comparison.{GreaterThan, LowerThan} import munit.{FunSuite, ScalaCheckSuite} import org.scalacheck.Prop +import io.renku.search.model.EntityType class QueryParserSpec extends ScalaCheckSuite with ParserSuite { + test("sort term") { + val p = QueryParser.sortTerm + assertEquals(p.run("sort:name-asc"), Order(SortableField.Name -> Order.Direction.Asc)) + assertEquals( + p.run("sort:name-asc,score-desc"), + Order( + SortableField.Name -> Order.Direction.Asc, + SortableField.Score -> Order.Direction.Desc + ) + ) + } + test("string list") { val p = QueryParser.values assertEquals(p.run("a,b,c"), nel("a", "b", "c")) @@ -70,7 +83,8 @@ class QueryParserSpec extends ScalaCheckSuite with ParserSuite { val data = List( "projectId:id5" -> FieldTerm.ProjectIdIs(Nel.of("id5")), "name:\"my project\"" -> FieldTerm.NameIs(Nel.of("my project")), - "slug:ab1,ab2" -> FieldTerm.SlugIs(Nel.of("ab1", "ab2")) + "slug:ab1,ab2" -> FieldTerm.SlugIs(Nel.of("ab1", "ab2")), + "type:project" -> FieldTerm.TypeIs(Nel.of(EntityType.Project)) ) data.foreach { case (in, expect) => assertEquals(p.run(in), expect) diff --git a/modules/search-solr-client/src/main/scala/io/renku/search/solr/client/QueryInterpreter.scala b/modules/search-solr-client/src/main/scala/io/renku/search/solr/client/QueryInterpreter.scala deleted file mode 100644 index f2561956..00000000 --- a/modules/search-solr-client/src/main/scala/io/renku/search/solr/client/QueryInterpreter.scala +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright 2024 Swiss Data Science Center (SDSC) - * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and - * Eidgenössische Technische Hochschule Zürich (ETHZ). - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.renku.search.solr.client - -import io.renku.search.query.Query -import io.renku.search.query.Query.Segment -import io.renku.search.query.FieldTerm -import io.renku.search.solr.schema.EntityDocumentSchema.Fields -import org.apache.lucene.queryparser.flexible.standard.QueryParserUtil - -private object QueryInterpreter { - - def apply(query: Query): String = - if (query.isEmpty) "_type:Project" // User not yet supported to decode - else - query.segments - .map { - case Segment.Field(FieldTerm.ProjectIdIs(ids)) => - ids.toList - .map(escape) - .map(id => s"${Fields.id.name}:$id") - .mkString("(", " OR ", ")") - - case Segment.Field(FieldTerm.SlugIs(slugs)) => - slugs.toList - .map(escape) - .map(slug => s"${Fields.slug.name}:$slug") - .mkString("(", " OR ", ")") - - case Segment.Field(FieldTerm.NameIs(names)) => - names.toList - .map(escape) - .map(name => s"${Fields.name.name}:$name") - .mkString("(", " OR ", ")") - - case Segment.Text(txt) => - s"${Fields.contentAll.name}:${escape(txt)}" - - case _ => - "" - } - .mkString(" AND ") - - private def escape(s: String): String = - val escaped = QueryParserUtil.escape(s) - if (escaped.exists(_.isWhitespace)) s"($escaped)" - else escaped - -} diff --git a/modules/search-solr-client/src/main/scala/io/renku/search/solr/client/SearchSolrClientImpl.scala b/modules/search-solr-client/src/main/scala/io/renku/search/solr/client/SearchSolrClientImpl.scala index 5056354e..40bd8cd3 100644 --- a/modules/search-solr-client/src/main/scala/io/renku/search/solr/client/SearchSolrClientImpl.scala +++ b/modules/search-solr-client/src/main/scala/io/renku/search/solr/client/SearchSolrClientImpl.scala @@ -21,6 +21,7 @@ package io.renku.search.solr.client import cats.effect.Async import cats.syntax.all.* import io.renku.search.solr.documents.Project +import io.renku.search.solr.query.LuceneQueryInterpreter import io.renku.search.solr.schema.EntityDocumentSchema import io.renku.solr.client.{QueryData, QueryString, SolrClient} import io.renku.search.query.Query @@ -30,6 +31,7 @@ private class SearchSolrClientImpl[F[_]: Async](solrClient: SolrClient[F]) extends SearchSolrClient[F]: private[this] val logger = scribe.cats.effect[F] + private[this] val interpreter = LuceneQueryInterpreter.forSync[F] override def insertProjects(projects: Seq[Project]): F[Unit] = solrClient.insert(projects).void @@ -39,10 +41,15 @@ private class SearchSolrClientImpl[F[_]: Async](solrClient: SolrClient[F]) limit: Int, offset: Int ): F[QueryResponse[Project]] = - val solrQuery = QueryInterpreter(query) - logger.debug(s"Query: ${query.render} ->Solr: $solrQuery") >> - solrClient - .query[Project](QueryData.withChildren(QueryString(solrQuery, limit, offset))) + for { + solrQuery <- interpreter.run(query) + _ <- logger.debug(s"Query: ${query.render} ->Solr: $solrQuery") + res <- solrClient + .query[Project]( + QueryData(QueryString(solrQuery.query.value, limit, offset)) + .copy(sort = solrQuery.sort) + ) + } yield res override def findProjects(phrase: String): F[List[Project]] = solrClient diff --git a/modules/search-solr-client/src/main/scala/io/renku/search/solr/documents/Project.scala b/modules/search-solr-client/src/main/scala/io/renku/search/solr/documents/Project.scala index 491558f2..650e2649 100644 --- a/modules/search-solr-client/src/main/scala/io/renku/search/solr/documents/Project.scala +++ b/modules/search-solr-client/src/main/scala/io/renku/search/solr/documents/Project.scala @@ -33,7 +33,8 @@ final case class Project( description: Option[projects.Description] = None, createdBy: users.Id, creationDate: projects.CreationDate, - members: Seq[users.Id] = Seq.empty + members: Seq[users.Id] = Seq.empty, + score: Option[Double] = None ) object Project: diff --git a/modules/search-solr-client/src/main/scala/io/renku/search/solr/query/Context.scala b/modules/search-solr-client/src/main/scala/io/renku/search/solr/query/Context.scala new file mode 100644 index 00000000..c242da55 --- /dev/null +++ b/modules/search-solr-client/src/main/scala/io/renku/search/solr/query/Context.scala @@ -0,0 +1,45 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.renku.search.solr.query + +import java.time.Instant +import cats.effect.{Clock, Sync} +import cats.syntax.all.* +import java.time.ZoneId +import cats.Applicative + +trait Context[F[_]]: + def currentTime: F[Instant] + def zoneId: F[ZoneId] + +object Context: + def forSync[F[_]: Sync]: Context[F] = + new Context[F]: + def currentTime: F[Instant] = Clock[F].realTimeInstant + def zoneId: F[ZoneId] = Sync[F].delay(ZoneId.systemDefault()) + + def fixed[F[_]: Applicative](time: Instant, zone: ZoneId): Context[F] = + new Context[F]: + def currentTime = time.pure[F] + def zoneId = zone.pure[F] + + def fixedZone[F[_]: Applicative: Clock](zone: ZoneId): Context[F] = + new Context[F]: + def currentTime = Clock[F].realTimeInstant + def zoneId = zone.pure[F] diff --git a/modules/search-solr-client/src/main/scala/io/renku/search/solr/query/LuceneQueryEncoders.scala b/modules/search-solr-client/src/main/scala/io/renku/search/solr/query/LuceneQueryEncoders.scala new file mode 100644 index 00000000..b3d78106 --- /dev/null +++ b/modules/search-solr-client/src/main/scala/io/renku/search/solr/query/LuceneQueryEncoders.scala @@ -0,0 +1,151 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.renku.search.solr.query + +import cats.syntax.all.* +import io.renku.search.query.Query.Segment +import io.renku.search.query.FieldTerm +import io.renku.search.query.Field +import io.renku.search.query.Query + +import cats.Monad +import cats.Applicative +import io.renku.search.query.Comparison + +trait LuceneQueryEncoders: + + given projectIdIs[F[_]: Applicative]: SolrTokenEncoder[F, FieldTerm.ProjectIdIs] = + SolrTokenEncoder.basic { case FieldTerm.ProjectIdIs(ids) => + SolrQuery(SolrToken.orFieldIs(Field.ProjectId, ids.map(SolrToken.fromString))) + } + + given nameIs[F[_]: Applicative]: SolrTokenEncoder[F, FieldTerm.NameIs] = + SolrTokenEncoder.basic { case FieldTerm.NameIs(names) => + SolrQuery(SolrToken.orFieldIs(Field.Name, names.map(SolrToken.fromString))) + } + + given typeIs[F[_]: Applicative]: SolrTokenEncoder[F, FieldTerm.TypeIs] = + SolrTokenEncoder.basic { case FieldTerm.TypeIs(values) => + SolrQuery(SolrToken.orFieldIs(Field.Type, values.map(SolrToken.fromEntityType))) + } + + given slugIs[F[_]: Applicative]: SolrTokenEncoder[F, FieldTerm.SlugIs] = + SolrTokenEncoder.basic { case FieldTerm.SlugIs(names) => + SolrQuery(SolrToken.orFieldIs(Field.Slug, names.map(SolrToken.fromString))) + } + + given createdByIs[F[_]: Applicative]: SolrTokenEncoder[F, FieldTerm.CreatedByIs] = + SolrTokenEncoder.basic { case FieldTerm.CreatedByIs(names) => + SolrQuery(SolrToken.orFieldIs(Field.CreatedBy, names.map(SolrToken.fromString))) + } + + given visibilityIs[F[_]: Applicative]: SolrTokenEncoder[F, FieldTerm.VisibilityIs] = + SolrTokenEncoder.basic { case FieldTerm.VisibilityIs(values) => + SolrQuery( + SolrToken.orFieldIs(Field.Visibility, values.map(SolrToken.fromVisibility)) + ) + } + + given created[F[_]: Monad]: SolrTokenEncoder[F, FieldTerm.Created] = + val created = SolrToken.fromField(Field.Created) + SolrTokenEncoder.create[F, FieldTerm.Created] { + case (ctx, FieldTerm.Created(Comparison.Is, values)) => + (ctx.currentTime, ctx.zoneId).mapN { (ref, zone) => + SolrQuery( + values + .map(_.resolve(ref, zone)) + .map { case (min, maxOpt) => + maxOpt + .map(max => created === SolrToken.fromDateRange(min, max)) + .getOrElse(created === SolrToken.fromInstant(min)) + } + .toList + .foldOr + ) + } + + case (ctx, FieldTerm.Created(Comparison.GreaterThan, values)) => + (ctx.currentTime, ctx.zoneId).mapN { (ref, zone) => + SolrQuery( + values + .map(_.resolve(ref, zone)) + .map { case (min, maxOpt) => + created > SolrToken.fromInstant(maxOpt.getOrElse(min)) + } + .toList + .foldOr + ) + } + + case (ctx, FieldTerm.Created(Comparison.LowerThan, values)) => + (ctx.currentTime, ctx.zoneId).mapN { (ref, zone) => + SolrQuery( + values + .map(_.resolve(ref, zone)) + .map { case (min, _) => + created < SolrToken.fromInstant(min) + } + .toList + .foldOr + ) + } + } + + given fieldTerm[F[_]: Monad]: SolrTokenEncoder[F, FieldTerm] = + SolrTokenEncoder.derived[F, FieldTerm] + + given fieldSegment[F[_]: Applicative](using + fe: SolrTokenEncoder[F, FieldTerm] + ): SolrTokenEncoder[F, Segment.Field] = + SolrTokenEncoder.curried[F, Segment.Field] { ctx => + { case Segment.Field(f) => + fe.encode(ctx, f) + } + } + + given textSegment[F[_]: Applicative]: SolrTokenEncoder[F, Segment.Text] = + SolrTokenEncoder.basic(t => SolrQuery(SolrToken.contentAll(t.value))) + + given sortSegment[F[_]: Applicative]: SolrTokenEncoder[F, Segment.Sort] = + SolrTokenEncoder.basic(t => SolrQuery.sort(t.value)) + + given segment[F[_]](using + et: SolrTokenEncoder[F, Segment.Text], + ef: SolrTokenEncoder[F, Segment.Field], + es: SolrTokenEncoder[F, Segment.Sort] + ): SolrTokenEncoder[F, Segment] = + SolrTokenEncoder.curried[F, Segment] { ctx => + { + case s: Segment.Text => et.encode(ctx, s) + case s: Segment.Field => ef.encode(ctx, s) + case s: Segment.Sort => es.encode(ctx, s) + } + } + + given segmentAnd[F[_]: Monad, A](using + se: SolrTokenEncoder[F, A] + ): SolrTokenEncoder[F, List[A]] = + SolrTokenEncoder.create[F, List[A]] { (ctx, nel) => + nel.traverse(se.encode(ctx, _)).map(_.toSeq.combineAll) + } + + given query[F[_]: Monad](using + se: SolrTokenEncoder[F, List[Segment]] + ): SolrTokenEncoder[F, Query] = + se.contramap(_.segments) diff --git a/modules/search-solr-client/src/main/scala/io/renku/search/solr/query/LuceneQueryInterpreter.scala b/modules/search-solr-client/src/main/scala/io/renku/search/solr/query/LuceneQueryInterpreter.scala new file mode 100644 index 00000000..a1759660 --- /dev/null +++ b/modules/search-solr-client/src/main/scala/io/renku/search/solr/query/LuceneQueryInterpreter.scala @@ -0,0 +1,40 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.renku.search.solr.query + +import cats.Monad +import cats.effect.Sync +import cats.syntax.all.* +import io.renku.search.query.Query + +/** Provides conversion into solrs standard query. See + * https://solr.apache.org/guide/solr/latest/query-guide/standard-query-parser.html + */ +final class LuceneQueryInterpreter[F[_]: Monad] + extends QueryInterpreter[F] + with LuceneQueryEncoders: + private val encoder = SolrTokenEncoder[F, Query] + + def run(ctx: Context[F], query: Query): F[SolrQuery] = + if (query.isEmpty) SolrQuery(SolrToken.allTypes).pure[F] + else encoder.encode(ctx, query) + +object LuceneQueryInterpreter: + def forSync[F[_]: Sync]: QueryInterpreter.WithContext[F] = + QueryInterpreter.withContext(LuceneQueryInterpreter[F], Context.forSync[F]) diff --git a/modules/search-solr-client/src/main/scala/io/renku/search/solr/query/QueryInterpreter.scala b/modules/search-solr-client/src/main/scala/io/renku/search/solr/query/QueryInterpreter.scala new file mode 100644 index 00000000..02ad7787 --- /dev/null +++ b/modules/search-solr-client/src/main/scala/io/renku/search/solr/query/QueryInterpreter.scala @@ -0,0 +1,32 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.renku.search.solr.query + +import io.renku.search.query.Query + +trait QueryInterpreter[F[_]]: + def run(ctx: Context[F], q: Query): F[SolrQuery] + +object QueryInterpreter: + trait WithContext[F[_]]: + def run(q: Query): F[SolrQuery] + + def withContext[F[_]](qi: QueryInterpreter[F], ctx: Context[F]): WithContext[F] = + new WithContext[F]: + def run(q: Query) = qi.run(ctx, q) diff --git a/modules/search-solr-client/src/main/scala/io/renku/search/solr/query/SolrQuery.scala b/modules/search-solr-client/src/main/scala/io/renku/search/solr/query/SolrQuery.scala new file mode 100644 index 00000000..8ceac60c --- /dev/null +++ b/modules/search-solr-client/src/main/scala/io/renku/search/solr/query/SolrQuery.scala @@ -0,0 +1,42 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.renku.search.solr.query + +import cats.Monoid +import cats.syntax.all.* +import io.renku.search.query.Order +import io.renku.solr.client.SolrSort + +final case class SolrQuery( + query: SolrToken, + sort: SolrSort +): + def ++(next: SolrQuery): SolrQuery = + SolrQuery(query && next.query, sort ++ next.sort) + +object SolrQuery: + val empty: SolrQuery = SolrQuery(SolrToken.empty, SolrSort.empty) + + def apply(e: SolrToken): SolrQuery = + SolrQuery(e, SolrSort.empty) + + def sort(order: Order): SolrQuery = + SolrQuery(SolrToken.empty, SolrSortCreate(order.fields)) + + given Monoid[SolrQuery] = Monoid.instance(empty, (a, b) => a ++ b) diff --git a/modules/search-solr-client/src/main/scala/io/renku/search/solr/query/SolrSortCreate.scala b/modules/search-solr-client/src/main/scala/io/renku/search/solr/query/SolrSortCreate.scala new file mode 100644 index 00000000..ffa49510 --- /dev/null +++ b/modules/search-solr-client/src/main/scala/io/renku/search/solr/query/SolrSortCreate.scala @@ -0,0 +1,43 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.renku.search.solr.query + +import io.renku.search.query.{Order, SortableField} +import io.renku.search.solr.schema.EntityDocumentSchema.Fields as SolrField +import io.renku.solr.client.schema.FieldName +import io.renku.solr.client.SolrSort +import cats.data.NonEmptyList + +private object SolrSortCreate: + private def fromField(field: SortableField): FieldName = + field match + case SortableField.Name => SolrField.name + case SortableField.Score => SolrField.score + case SortableField.Created => SolrField.creationDate + + private def fromDirection(d: Order.Direction): SolrSort.Direction = + d match + case Order.Direction.Asc => SolrSort.Direction.Asc + case Order.Direction.Desc => SolrSort.Direction.Desc + + def apply(ts: Order.OrderedBy*): SolrSort = + SolrSort(ts.map(e => (fromField(e.field), fromDirection(e.direction))): _*) + + def apply(ts: NonEmptyList[Order.OrderedBy]): SolrSort = + apply(ts.toList: _*) diff --git a/modules/search-solr-client/src/main/scala/io/renku/search/solr/query/SolrToken.scala b/modules/search-solr-client/src/main/scala/io/renku/search/solr/query/SolrToken.scala new file mode 100644 index 00000000..cc78fff4 --- /dev/null +++ b/modules/search-solr-client/src/main/scala/io/renku/search/solr/query/SolrToken.scala @@ -0,0 +1,132 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.renku.search.solr.query + +import cats.syntax.all.* +import io.renku.search.query.Field +import cats.data.NonEmptyList +import cats.Monoid +import java.time.Instant +import io.renku.search.solr.documents.{Project as SolrProject, User as SolrUser} +import io.renku.search.solr.schema.EntityDocumentSchema.Fields as SolrField +import io.renku.solr.client.schema.FieldName +import io.renku.search.query.Comparison +import io.renku.search.model.EntityType +import io.renku.search.model.projects.Visibility + +opaque type SolrToken = String + +object SolrToken: + val empty: SolrToken = "" + def fromString(str: String): SolrToken = Escape.queryChars(str) + def fromVisibility(v: Visibility): SolrToken = v.name + def fromEntityType(et: EntityType): SolrToken = + et match + case EntityType.Project => SolrProject.entityType + case EntityType.User => SolrUser.entityType + + def fromField(field: Field): SolrToken = + (field match + case Field.ProjectId => SolrField.id + case Field.Name => SolrField.name + case Field.Slug => SolrField.slug + case Field.Visibility => SolrField.visibility + case Field.CreatedBy => SolrField.createdBy + case Field.Created => SolrField.creationDate + case Field.Type => SolrField.entityType + ).name + + def fromInstant(ts: Instant): SolrToken = ts.toString + def fromDateRange(min: Instant, max: Instant): SolrToken = s"[$min TO $max]" + + def fromComparison(op: Comparison): SolrToken = + op match + case Comparison.Is => ":" + case Comparison.GreaterThan => ">" + case Comparison.LowerThan => "<" + + def contentAll(text: String): SolrToken = + s"${SolrField.contentAll.name}:${Escape.queryChars(text)}" + + def orFieldIs(field: Field, values: NonEmptyList[SolrToken]): SolrToken = + values.map(fieldIs(field, _)).toList.foldOr + + def dateIs(field: Field, date: Instant): SolrToken = fieldIs(field, fromInstant(date)) + def dateGt(field: Field, date: Instant): SolrToken = + fieldOp(field, Comparison.GreaterThan, date.toString) + def dateLt(field: Field, date: Instant): SolrToken = + fieldOp(field, Comparison.LowerThan, date.toString) + + // TODO: currently only projects work, user can't be decoded + val allTypes: SolrToken = fieldIs(Field.Type, "Project") + + private def fieldOp(field: Field, op: Comparison, value: SolrToken): SolrToken = + val cmp = fromComparison(op) + val f = fromField(field) + f ~ cmp ~ value + + def fieldIs(field: Field, value: SolrToken): SolrToken = + fieldOp(field, Comparison.Is, value) + + private def monoidWith(sep: String): Monoid[SolrToken] = + Monoid.instance( + empty, + (a, b) => if (a.isEmpty) b else if (b.isEmpty) a else s"$a$sep$b" + ) + private val orMonoid: Monoid[SolrToken] = monoidWith(" OR ") + private val andMonoid: Monoid[SolrToken] = monoidWith(" AND ") + private val spaceMonoid: Monoid[SolrToken] = monoidWith(" ") + + extension (self: SolrToken) + def value: String = self + def isEmpty: Boolean = self.isEmpty + def nonEmpty: Boolean = !self.isEmpty + def ~(next: SolrToken): SolrToken = self + next + def +=(next: SolrToken): SolrToken = spaceMonoid.combine(self, next) + def &&(next: SolrToken): SolrToken = andMonoid.combine(self, next) + def ||(next: SolrToken): SolrToken = orMonoid.combine(self, next) + def ===(next: SolrToken): SolrToken = self ~ Comparison.Is.token ~ next + def <(next: SolrToken): SolrToken = self ~ Comparison.LowerThan.token ~ next + def >(next: SolrToken): SolrToken = self ~ Comparison.GreaterThan.token ~ next + + extension (self: Comparison) def token: SolrToken = fromComparison(self) + + extension (self: Seq[SolrToken]) + def foldM(using Monoid[SolrToken]): SolrToken = + val all = self.combineAll + if (self.sizeIs <= 1) all else s"($all)" + def foldOr: SolrToken = foldM(using orMonoid) + def foldAnd: SolrToken = foldM(using andMonoid) + + // Escapes query characters for solr. This is taken from here: + // https://github.com/apache/solr/blob/bcb9f144974ed07aa3b66766302474542067b522/solr/solrj/src/java/org/apache/solr/client/solrj/util/ClientUtils.java#L163 + // to not introduce too many dependencies only for this little function + private object Escape { + private[this] val specialChars = "\\+-!():^[]\"{}~*?|&;/" + private inline def isSpecial(c: Char) = c.isWhitespace || specialChars.contains(c) + + def queryChars(s: String): String = { + val sb = new StringBuilder(); + s.foreach { c => + if (isSpecial(c)) sb.append('\\') + sb.append(c) + } + s.toString + } + } diff --git a/modules/search-solr-client/src/main/scala/io/renku/search/solr/query/SolrTokenEncoder.scala b/modules/search-solr-client/src/main/scala/io/renku/search/solr/query/SolrTokenEncoder.scala new file mode 100644 index 00000000..86f8a2b1 --- /dev/null +++ b/modules/search-solr-client/src/main/scala/io/renku/search/solr/query/SolrTokenEncoder.scala @@ -0,0 +1,91 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.renku.search.solr.query + +import cats.{Applicative, Monad} +import cats.syntax.all.* +import scala.deriving.* +import scala.collection.AbstractIterable + +trait SolrTokenEncoder[F[_], A]: + def encode(ctx: Context[F], value: A): F[SolrQuery] + final def contramap[B](f: B => A): SolrTokenEncoder[F, B] = + SolrTokenEncoder.create((ctx, b) => encode(ctx, f(b))) + +object SolrTokenEncoder: + def apply[F[_], A](using e: SolrTokenEncoder[F, A]): SolrTokenEncoder[F, A] = e + + def create[F[_], A]( + f: (ctx: Context[F], value: A) => F[SolrQuery] + ): SolrTokenEncoder[F, A] = + new SolrTokenEncoder[F, A]: + def encode(ctx: Context[F], value: A) = f(ctx, value) + + def curried[F[_], A](f: Context[F] => A => F[SolrQuery]): SolrTokenEncoder[F, A] = + create[F, A]((ctx, v) => f(ctx)(v)) + + inline def derived[F[_]: Monad, A](using Mirror.Of[A]): SolrTokenEncoder[F, A] = + Macros.derived[F, A] + + def basic[F[_]: Applicative, A](f: A => SolrQuery): SolrTokenEncoder[F, A] = + create[F, A]((_, v) => f(v).pure[F]) + + private object Macros { + import scala.compiletime.* + + inline def derived[F[_]: Monad, T](using m: Mirror.Of[T]): SolrTokenEncoder[F, T] = + lazy val elemInstances = summonInstances[F, T, m.MirroredElemTypes] + inline m match + case s: Mirror.SumOf[T] => sumTokenEncoder(s, elemInstances) + case p: Mirror.ProductOf[T] => prodTokenEncoder(p, elemInstances) + + inline def summonInstances[F[_]: Monad, T, Elems <: Tuple] + : List[SolrTokenEncoder[F, ?]] = + inline erasedValue[Elems] match + case _: (elem *: elems) => + summonInline[SolrTokenEncoder[F, elem]] :: summonInstances[F, T, elems] + case _: EmptyTuple => Nil + + def sumTokenEncoder[F[_]: Monad, T]( + s: Mirror.SumOf[T], + elems: => List[SolrTokenEncoder[F, ?]] + ): SolrTokenEncoder[F, T] = + SolrTokenEncoder.create[F, T] { (ctx, v) => + val ord = s.ordinal(v) + elems(ord).asInstanceOf[SolrTokenEncoder[F, Any]].encode(ctx, v) + } + + def prodTokenEncoder[F[_]: Monad, T]( + s: Mirror.ProductOf[T], + elems: => List[SolrTokenEncoder[F, ?]] + ): SolrTokenEncoder[F, T] = + SolrTokenEncoder.create[F, T] { (ctx, v) => + val vel = iterable(v) + .zip(elems) + .map { case (va, ea) => + ea.asInstanceOf[SolrTokenEncoder[F, Any]].encode(ctx, va) + } + .toList + .sequence + vel.map(_.combineAll) + } + + def iterable[T](p: T): Iterable[Any] = new AbstractIterable[Any]: + def iterator: Iterator[Any] = p.asInstanceOf[Product].productIterator + } diff --git a/modules/search-solr-client/src/main/scala/io/renku/search/solr/schema/EntityDocumentSchema.scala b/modules/search-solr-client/src/main/scala/io/renku/search/solr/schema/EntityDocumentSchema.scala index a6e8b60a..f2fe9a44 100644 --- a/modules/search-solr-client/src/main/scala/io/renku/search/solr/schema/EntityDocumentSchema.scala +++ b/modules/search-solr-client/src/main/scala/io/renku/search/solr/schema/EntityDocumentSchema.scala @@ -38,6 +38,8 @@ object EntityDocumentSchema: val nestParent: FieldName = FieldName("_nest_parent_") // catch-all field val contentAll: FieldName = FieldName("content_all") + // virtual score field + val score: FieldName = FieldName("score") object FieldTypes: val id: FieldType = FieldType.id(TypeName("SearchId")).makeDocValue diff --git a/modules/search-solr-client/src/test/scala/io/renku/search/solr/client/SearchSolrClientSpec.scala b/modules/search-solr-client/src/test/scala/io/renku/search/solr/client/SearchSolrClientSpec.scala index c92ebdf7..6294b6ef 100644 --- a/modules/search-solr-client/src/test/scala/io/renku/search/solr/client/SearchSolrClientSpec.scala +++ b/modules/search-solr-client/src/test/scala/io/renku/search/solr/client/SearchSolrClientSpec.scala @@ -21,6 +21,7 @@ package io.renku.search.solr.client import cats.effect.IO import io.renku.search.solr.client.SearchSolrClientGenerators.* import munit.CatsEffectSuite +import io.renku.search.query.Query class SearchSolrClientSpec extends CatsEffectSuite with SearchSolrSpec: @@ -30,7 +31,7 @@ class SearchSolrClientSpec extends CatsEffectSuite with SearchSolrSpec: projectDocumentGen("solr-project", "solr project description").generateOne for { _ <- client.insertProjects(Seq(project)) - r <- client.findProjects("solr") - _ = assert(r contains project) + r <- client.queryProjects(Query.parse("solr").toOption.get, 10, 0) + _ = assert(r.responseBody.docs.map(_.copy(score = None)) contains project) } yield () } diff --git a/modules/search-solr-client/src/test/scala/io/renku/search/solr/query/LuceneQueryEncoderSpec.scala b/modules/search-solr-client/src/test/scala/io/renku/search/solr/query/LuceneQueryEncoderSpec.scala new file mode 100644 index 00000000..c9b04401 --- /dev/null +++ b/modules/search-solr-client/src/test/scala/io/renku/search/solr/query/LuceneQueryEncoderSpec.scala @@ -0,0 +1,103 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.renku.search.solr.query + +import munit.FunSuite +import cats.Id +import cats.data.NonEmptyList as Nel +import io.renku.search.query.{Comparison, FieldTerm} +import java.time.{Instant, ZoneId} +import io.renku.search.query.* + +class LuceneQueryEncoderSpec extends FunSuite with LuceneQueryEncoders: + + val refDate: Instant = Instant.parse("2024-02-27T15:34:55Z") + val utc: ZoneId = ZoneId.of("UTC") + + val ctx: Context[Id] = Context.fixed(refDate, utc) + val createdEncoder = SolrTokenEncoder[Id, FieldTerm.Created] + + test("use date-max for greater-than"): + val pd = PartialDateTime.unsafeFromString("2023-05") + val date: FieldTerm.Created = + FieldTerm.Created(Comparison.GreaterThan, Nel.of(DateTimeRef(pd))) + assertEquals( + createdEncoder.encode(ctx, date), + SolrQuery( + SolrToken.fromField(Field.Created) ~ + SolrToken.fromComparison(Comparison.GreaterThan) ~ + SolrToken.fromInstant(pd.instantMax(utc)) + ) + ) + + test("use date-min for lower-than"): + val pd = PartialDateTime.unsafeFromString("2023-05") + val date: FieldTerm.Created = + FieldTerm.Created(Comparison.LowerThan, Nel.of(DateTimeRef(pd))) + assertEquals( + createdEncoder.encode(ctx, date), + SolrQuery( + SolrToken.fromField(Field.Created) ~ + SolrToken.fromComparison(Comparison.LowerThan) ~ + SolrToken.fromInstant(pd.instantMin(utc)) + ) + ) + + test("created comparison is"): + val cToday: FieldTerm.Created = + FieldTerm.Created(Comparison.Is, Nel.of(DateTimeRef(RelativeDate.Today))) + assertEquals( + createdEncoder.encode(ctx, cToday), + SolrQuery(SolrToken.dateIs(Field.Created, refDate)) + ) + + test("single range"): + val pd = PartialDateTime.unsafeFromString("2023-05") + val date: FieldTerm.Created = + FieldTerm.Created(Comparison.Is, Nel.of(DateTimeRef(pd))) + assertEquals( + createdEncoder.encode(ctx, date), + SolrQuery( + SolrToken.fieldIs( + Field.Created, + SolrToken.fromDateRange(pd.instantMin(utc), pd.instantMax(utc)) + ) + ) + ) + + test("multiple range"): + val pd1 = PartialDateTime.unsafeFromString("2023-05") + val pd2 = PartialDateTime.unsafeFromString("2023-08") + val date: FieldTerm.Created = + FieldTerm.Created(Comparison.Is, Nel.of(DateTimeRef(pd1), DateTimeRef(pd2))) + assertEquals( + createdEncoder.encode(ctx, date), + SolrQuery( + List( + SolrToken.fieldIs( + Field.Created, + SolrToken.fromDateRange(pd1.instantMin(utc), pd1.instantMax(utc)) + ), + SolrToken.fieldIs( + Field.Created, + SolrToken.fromDateRange(pd2.instantMin(utc), pd2.instantMax(utc)) + ) + ).foldOr + ) + ) diff --git a/modules/solr-client/src/main/scala/io/renku/solr/client/QueryData.scala b/modules/solr-client/src/main/scala/io/renku/solr/client/QueryData.scala index aee7010a..ef61c856 100644 --- a/modules/solr-client/src/main/scala/io/renku/solr/client/QueryData.scala +++ b/modules/solr-client/src/main/scala/io/renku/solr/client/QueryData.scala @@ -28,6 +28,7 @@ final case class QueryData( limit: Int, offset: Int, fields: Seq[FieldName], + sort: SolrSort, params: Map[String, String] ): def nextPage: QueryData = @@ -47,9 +48,17 @@ final case class QueryData( object QueryData: def apply(query: QueryString): QueryData = - QueryData(query.q, Nil, query.limit, query.offset, Nil, Map.empty) + QueryData(query.q, Nil, query.limit, query.offset, Nil, SolrSort.empty, Map.empty) def withChildren(query: QueryString): QueryData = - QueryData(query.q, Nil, query.limit, query.offset, Nil, Map("fl" -> "*,[child]")) + QueryData( + query.q, + Nil, + query.limit, + query.offset, + Nil, + SolrSort.empty, + Map("fl" -> "* score,[child]") + ) given Encoder[QueryData] = deriveEncoder diff --git a/modules/solr-client/src/main/scala/io/renku/solr/client/SolrSort.scala b/modules/solr-client/src/main/scala/io/renku/solr/client/SolrSort.scala new file mode 100644 index 00000000..45e506ae --- /dev/null +++ b/modules/solr-client/src/main/scala/io/renku/solr/client/SolrSort.scala @@ -0,0 +1,58 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.renku.solr.client + +import cats.kernel.Monoid +import io.renku.solr.client.schema.FieldName +import io.bullet.borer.Encoder + +opaque type SolrSort = Seq[(FieldName, SolrSort.Direction)] + +object SolrSort: + enum Direction: + case Asc + case Desc + val name: String = productPrefix.toLowerCase + + object Direction: + def fromString(s: String): Either[String, Direction] = + Direction.values + .find(_.toString.equalsIgnoreCase(s)) + .toRight(s"Invalid sort direction: $s") + def unsafeFromString(s: String): Direction = + fromString(s).fold(sys.error, identity) + + given Encoder[Direction] = Encoder.forString.contramap(_.name) + + def apply(s: (FieldName, Direction)*): SolrSort = s + + val empty: SolrSort = Seq.empty + + extension (self: SolrSort) + def isEmpty: Boolean = self.isEmpty + def nonEmpty: Boolean = !self.isEmpty + def ++(next: SolrSort): SolrSort = + Monoid[SolrSort].combine(self, next) + + given Monoid[SolrSort] = + Monoid.instance(empty, (a, b) => if (a.isEmpty) b else if (b.isEmpty) a else a ++ b) + + given Encoder[SolrSort] = Encoder.forString.contramap(list => + list.map { case (f, d) => s"${f.name} ${d.name}" }.mkString(",") + ) diff --git a/nix/scripts/redis-push b/nix/scripts/redis-push index 400a88dc..0cbaf7c2 100644 --- a/nix/scripts/redis-push +++ b/nix/scripts/redis-push @@ -11,4 +11,4 @@ redis_port=${RS_REDIS_PORT:-6379} header='{"source":"dev","type":"project.created","dataContentType":"application/avro+json","schemaVersion":"1","time":0,"requestId":"r1"}' payload=$(jq --null-input --arg id "$1" --arg name "$2" --arg slug "$1/$2" '{"id":$id,"name":$name,"slug":$slug, "repositories":[],"visibility":"PUBLIC","description":{"string":"my project description"},"createdBy":"dev","creationDate":0,"members":[]}') -redis-cli -h $redis_host -p $redis_port XADD events '*' header "$header" payload "$payload" +redis-cli -h $redis_host -p $redis_port XADD events '*' headers "$header" payload "$payload" diff --git a/project/Dependencies.scala b/project/Dependencies.scala index c78fd420..0f274eee 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -16,7 +16,6 @@ object Dependencies { val ducktape = "0.1.11" val fs2 = "3.9.4" val http4s = "0.23.25" - val luceneQueryParser = "9.9.2" val redis4Cats = "1.5.2" val scalacheckEffectMunit = "1.0.4" val scodec = "2.2.2" @@ -26,10 +25,6 @@ object Dependencies { val tapir = "1.9.10" } - val luceneQueryParser = Seq( - "org.apache.lucene" % "lucene-queryparser" % V.luceneQueryParser - ) - val catsScalaCheck = Seq( "io.chrisdavenport" %% "cats-scalacheck" % V.catsScalaCheck )