Skip to content

Commit

Permalink
feat: Add entity-type facets to search results (#47)
Browse files Browse the repository at this point in the history
- allow to specify facet queries in solr-client
- use it to provide a simple result count per entity-type in current search
  • Loading branch information
eikek authored Mar 7, 2024
1 parent 1ec4974 commit e8bb153
Show file tree
Hide file tree
Showing 24 changed files with 616 additions and 60 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ object BorerEntities:
EitherT(StreamProvider(media.body).flatMap { implicit input =>
for {
res <- Async[F].delay(Json.decode(input).to[A].valueEither)
} yield res.left.map(BorerDecodeFailure("<not available>", _))
txt <- if (res.isLeft) media.bodyText.compile.string else Async[F].pure("")
} yield res.left.map(BorerDecodeFailure(txt, _))
})

def decodeCbor[F[_]: Async, A: Decoder](media: Media[F]): DecodeResult[F, A] =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,12 @@ import io.renku.search.api.data.*
import io.renku.search.model.users
import io.renku.search.solr.client.SearchSolrClient
import io.renku.search.solr.documents.Entity as SolrEntity
import io.renku.search.solr.schema.EntityDocumentSchema.Fields
import io.renku.solr.client.QueryResponse
import org.http4s.dsl.Http4sDsl
import scribe.Scribe
import io.renku.search.model.EntityType
import io.renku.solr.client.facet.FacetResponse

private class SearchApiImpl[F[_]: Async](solrClient: SearchSolrClient[F])
extends Http4sDsl[F]
Expand Down Expand Up @@ -58,9 +61,19 @@ private class SearchApiImpl[F[_]: Async](solrClient: SearchSolrClient[F])
): SearchResult =
val hasMore = solrResult.responseBody.docs.size > currentPage.limit
val pageInfo = PageWithTotals(currentPage, solrResult.responseBody.numFound, hasMore)
val facets = solrResult.facetResponse
.flatMap(_.buckets.get(Fields.entityType))
.map { counts =>
val all =
counts.buckets.flatMap { case FacetResponse.Bucket(key, count) =>
EntityType.fromString(key).toOption.map(et => et -> count)
}.toMap
FacetData(all)
}
.getOrElse(FacetData.empty)
val items = solrResult.responseBody.docs.map(toApiEntity)
if (hasMore) SearchResult(items.init, pageInfo)
else SearchResult(items, pageInfo)
if (hasMore) SearchResult(items.init, facets, pageInfo)
else SearchResult(items, facets, pageInfo)

private lazy val toApiEntity: SolrEntity => SearchEntity =
given Transformer[users.Id, UserId] = (id: users.Id) => UserId(id)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Copyright 2024 Swiss Data Science Center (SDSC)
* A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and
* Eidgenössische Technische Hochschule Zürich (ETHZ).
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.renku.search.api.data

import io.renku.search.model.EntityType
import io.bullet.borer.Decoder
import io.bullet.borer.derivation.MapBasedCodecs
import sttp.tapir.Schema
import io.bullet.borer.Encoder
import io.renku.search.api.tapir.SchemaSyntax.*

final case class FacetData(
entityType: Map[EntityType, Int]
)

object FacetData:
val empty: FacetData = FacetData(Map.empty)

given Decoder[FacetData] = MapBasedCodecs.deriveDecoder
given Encoder[FacetData] = MapBasedCodecs.deriveEncoder
given Schema[FacetData] = {
given Schema[Map[EntityType, Int]] = Schema.schemaForMap(_.name)
Schema
.derived[FacetData]
.jsonExample(
FacetData(
Map(
EntityType.Project -> 15,
EntityType.User -> 3
)
)
)
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import io.bullet.borer.*
import io.bullet.borer.NullOptions.given
import io.bullet.borer.derivation.MapBasedCodecs.{deriveAllCodecs, deriveCodec}
import io.renku.search.model.*
import io.renku.search.api.tapir.SchemaSyntax.*
import sttp.tapir.Schema.SName
import sttp.tapir.SchemaType.{SCoproduct, SDateTime, SProductField, SRef}
import sttp.tapir.generic.Configuration
Expand Down Expand Up @@ -54,20 +55,18 @@ object Project:
private given Schema[projects.CreationDate] = Schema(SDateTime())
given Schema[Project] = Schema
.derived[Project]
.encodedExample(
Json.encode {
Project(
projects.Id("01HRA7AZ2Q234CDQWGA052F8MK"),
projects.Name("renku"),
projects.Slug("renku"),
Seq(projects.Repository("https://github.com/renku")),
projects.Visibility.Public,
Some(projects.Description("Renku project")),
UserId(users.Id("1CAF4C73F50D4514A041C9EDDB025A36")),
projects.CreationDate(Instant.now),
Some(1.0)
).asInstanceOf[SearchEntity]
}.toUtf8String
.jsonExample(
Project(
projects.Id("01HRA7AZ2Q234CDQWGA052F8MK"),
projects.Name("renku"),
projects.Slug("renku"),
Seq(projects.Repository("https://github.com/renku")),
projects.Visibility.Public,
Some(projects.Description("Renku project")),
UserId(users.Id("1CAF4C73F50D4514A041C9EDDB025A36")),
projects.CreationDate(Instant.now),
Some(1.0)
): SearchEntity
)

final case class UserId(id: users.Id)
Expand All @@ -77,9 +76,7 @@ object UserId:
private given Schema[users.Id] = Schema.string[users.Id]
given Schema[UserId] = Schema
.derived[UserId]
.encodedExample(
Json.encode(UserId(users.Id("01HRA7AZ2Q234CDQWGA052F8MK"))).toUtf8String
)
.jsonExample(UserId(users.Id("01HRA7AZ2Q234CDQWGA052F8MK")))

final case class User(
id: users.Id,
Expand All @@ -96,16 +93,14 @@ object User:
private given Schema[users.Email] = Schema.string[users.Email]
given Schema[User] = Schema
.derived[User]
.encodedExample(
Json.encode {
User(
users.Id("1CAF4C73F50D4514A041C9EDDB025A36"),
Some(users.FirstName("Albert")),
Some(users.LastName("Einstein")),
Some(users.Email("[email protected]")),
Some(2.1)
).asInstanceOf[SearchEntity]
}.toUtf8String
.jsonExample(
User(
users.Id("1CAF4C73F50D4514A041C9EDDB025A36"),
Some(users.FirstName("Albert")),
Some(users.LastName("Einstein")),
Some(users.Email("[email protected]")),
Some(2.1)
): SearchEntity
)

object SearchEntity:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import sttp.tapir.Schema

final case class SearchResult(
items: Seq[SearchEntity],
facets: FacetData,
pagingInfo: PageWithTotals
)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Copyright 2024 Swiss Data Science Center (SDSC)
* A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and
* Eidgenössische Technische Hochschule Zürich (ETHZ).
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.renku.search.api.tapir

import io.bullet.borer.Encoder
import io.bullet.borer.Json
import sttp.tapir.Schema

trait SchemaSyntax:

extension [T](self: Schema[T])
def jsonExample[TT >: T](value: TT)(using Encoder[TT]): Schema[T] =
self.encodedExample(Json.encode(value).toUtf8String)

object SchemaSyntax extends SchemaSyntax
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,18 @@ package io.renku.search.api.tapir
import sttp.tapir.*
import io.renku.search.api.data.*
import io.renku.search.query.Query
import io.renku.search.model.EntityType

trait TapirCodecs:
given Codec[String, Query, CodecFormat.TextPlain] =
Codec.string.mapEither(Query.parse(_))(_.render)

given Schema[Query] = Schema.anyObject[Query]
given Schema[QueryInput] = Schema.derived

given Codec[String, EntityType, CodecFormat.TextPlain] =
Codec.string.mapEither(EntityType.fromString(_))(_.name)

given Schema[EntityType] = Schema.derivedEnumeration.defaultStringBased

object TapirCodecs extends TapirCodecs
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,21 @@ import io.renku.search.query.Query
import io.renku.search.solr.documents.Entity
import io.renku.search.solr.query.LuceneQueryInterpreter
import io.renku.solr.client.{QueryData, QueryResponse, QueryString, SolrClient}
import io.renku.solr.client.schema.FieldName
import io.renku.solr.client.facet.{Facet, Facets}
import io.renku.search.solr.schema.EntityDocumentSchema

private class SearchSolrClientImpl[F[_]: Async](solrClient: SolrClient[F])
extends SearchSolrClient[F]:

private[this] val logger = scribe.cats.effect[F]
private[this] val interpreter = LuceneQueryInterpreter.forSync[F]

private val typeTerms = Facet.Terms(
EntityDocumentSchema.Fields.entityType,
EntityDocumentSchema.Fields.entityType
)

override def insert[D: Encoder](documents: Seq[D]): F[Unit] =
solrClient.insert(documents).void

Expand All @@ -47,6 +55,7 @@ private class SearchSolrClientImpl[F[_]: Async](solrClient: SolrClient[F])
.query[Entity](
QueryData(QueryString(solrQuery.query.value, limit, offset))
.withSort(solrQuery.sort)
.withScore
.withFacet(Facets(typeTerms))
.withFields(FieldName.all, FieldName.score)
)
} yield res
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ class LuceneQueryInterpreterSpec
override protected lazy val coreName: String = server.testCoreName2

given Decoder[Unit] = new Decoder {
def read(r: Reader) = ()
def read(r: Reader) =
r.skipElement()
()
}

def query(s: String | Query): QueryData =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,38 +21,38 @@ package io.renku.solr.client
import io.bullet.borer.Encoder
import io.bullet.borer.derivation.MapBasedCodecs.deriveEncoder
import io.renku.solr.client.schema.FieldName
import io.renku.solr.client.facet.Facets

final case class QueryData(
query: String,
filter: Seq[String],
limit: Int,
offset: Int,
fields: Seq[FieldName],
sort: SolrSort,
params: Map[String, String]
fields: Seq[FieldName] = Seq.empty,
sort: SolrSort = SolrSort.empty,
params: Map[String, String] = Map.empty,
facet: Facets = Facets.empty
):
def nextPage: QueryData =
copy(offset = offset + limit)

def withHighLight(fields: List[FieldName], pre: String, post: String): QueryData =
copy(params =
params ++ Map(
"hl" -> "on",
"hl.requireFieldMatch" -> "true",
"hl.fl" -> fields.map(_.name).mkString(","),
"hl.simple.pre" -> pre,
"hl.simple.post" -> post
)
)

def withSort(sort: SolrSort): QueryData = copy(sort = sort)
def withFieldList(fl: String): QueryData = copy(params = params.updated("fl", fl))
def withScore: QueryData = withFieldList("* score")
def withScoreAndChildren: QueryData = withFieldList("* score,[child]")
def withFields(field: FieldName*) = copy(fields = field)
def addFilter(q: String): QueryData = copy(filter = filter :+ q)
def withFacet(facet: Facets): QueryData = copy(facet = facet)

object QueryData:

def apply(query: QueryString): QueryData =
QueryData(query.q, Nil, query.limit, query.offset, Nil, SolrSort.empty, Map.empty)
QueryData(
query.q,
Nil,
query.limit,
query.offset,
Nil,
SolrSort.empty,
Map.empty,
Facets.empty
)

given Encoder[QueryData] = deriveEncoder
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,19 @@
package io.renku.solr.client

import io.bullet.borer.Decoder
import io.bullet.borer.derivation.MapBasedCodecs.deriveDecoder
import io.bullet.borer.NullOptions.given
import io.bullet.borer.derivation.MapBasedCodecs
import io.bullet.borer.derivation.key
import io.renku.solr.client.facet.FacetResponse

final case class QueryResponse[A](
responseHeader: ResponseHeader,
@key("response") responseBody: ResponseBody[A]
@key("response") responseBody: ResponseBody[A],
@key("facets") facetResponse: Option[FacetResponse] = None
):
def map[B](f: A => B): QueryResponse[B] =
copy(responseBody = responseBody.map(f))

object QueryResponse:
given [A](using Decoder[A]): Decoder[QueryResponse[A]] =
deriveDecoder
MapBasedCodecs.deriveDecoder
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Copyright 2024 Swiss Data Science Center (SDSC)
* A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and
* Eidgenössische Technische Hochschule Zürich (ETHZ).
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.renku.solr.client.facet

import io.renku.solr.client.schema.FieldName
import cats.data.NonEmptyList

enum Facet:
// https://solr.apache.org/guide/solr/latest/query-guide/json-facet-api.html#terms-facet
case Terms(
name: FieldName,
field: FieldName,
limit: Option[Int] = None,
minCount: Option[Int] = None,
method: Option[FacetAlgorithm] = None,
missing: Boolean = false,
numBuckets: Boolean = false,
allBuckets: Boolean = false
)

// https://solr.apache.org/guide/solr/latest/query-guide/json-facet-api.html#range-facet
case ArbitraryRange(
name: FieldName,
field: FieldName,
ranges: NonEmptyList[FacetRange]
)
Loading

0 comments on commit e8bb153

Please sign in to comment.