From 8e33685da05b35b0809c921dec57c0d08daff8c5 Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Wed, 14 Feb 2024 19:01:33 +0100 Subject: [PATCH] WIP: query parsing, first draft done --- .../scala/io/renku/commons/Visibility.scala | 4 ++ .../scala/io/renku/search/query/Query.scala | 14 ++++- .../search/query/json/QueryJsonCodec.scala | 2 +- .../renku/search/query/parse/QueryUtil.scala | 55 +++++++++++++++++++ .../renku/search/query/QueryGenerators.scala | 38 +++++++++---- .../search/query/json/QueryJsonSpec.scala | 36 ++++-------- .../search/query/parse/QueryParserSpec.scala | 40 ++++++-------- 7 files changed, 125 insertions(+), 64 deletions(-) create mode 100644 modules/search-query/src/main/scala/io/renku/search/query/parse/QueryUtil.scala diff --git a/modules/commons/src/main/scala/io/renku/commons/Visibility.scala b/modules/commons/src/main/scala/io/renku/commons/Visibility.scala index 4db38aed..16ef2729 100644 --- a/modules/commons/src/main/scala/io/renku/commons/Visibility.scala +++ b/modules/commons/src/main/scala/io/renku/commons/Visibility.scala @@ -18,6 +18,8 @@ package io.renku.commons +import cats.kernel.Order + enum Visibility: case Public case Private @@ -25,5 +27,7 @@ enum Visibility: def name: String = productPrefix.toLowerCase object Visibility: + given Order[Visibility] = Order.by(_.ordinal) + def unsafeFromString(s: String): Visibility = Visibility.valueOf(s.capitalize) diff --git a/modules/search-query/src/main/scala/io/renku/search/query/Query.scala b/modules/search-query/src/main/scala/io/renku/search/query/Query.scala index d86f4368..fa6fc963 100644 --- a/modules/search-query/src/main/scala/io/renku/search/query/Query.scala +++ b/modules/search-query/src/main/scala/io/renku/search/query/Query.scala @@ -25,7 +25,7 @@ import io.renku.commons.Visibility import io.renku.search.query.FieldTerm.Created import io.renku.search.query.Query.Segment import io.renku.search.query.json.QueryJsonCodec -import io.renku.search.query.parse.QueryParser +import io.renku.search.query.parse.{QueryParser, QueryUtil} final case class Query( segments: List[Query.Segment] @@ -47,13 +47,23 @@ object Query: def parse(str: String): Either[String, Query] = val trimmed = str.trim if (trimmed.isEmpty) Right(empty) - else QueryParser.query.parseAll(trimmed).leftMap(_.show) + else + QueryParser.query + .parseAll(trimmed) + .leftMap(_.show) + .map(QueryUtil.collapse) enum Segment: case Field(value: FieldTerm) case Text(value: String) object Segment: + extension (self: Segment.Text) + def ++(other: Segment.Text): Segment.Text = + if (other.value.isEmpty) self + else if (self.value.isEmpty) other + else Segment.Text(s"${self.value} ${other.value}") + def text(phrase: String): Segment = Segment.Text(phrase) diff --git a/modules/search-query/src/main/scala/io/renku/search/query/json/QueryJsonCodec.scala b/modules/search-query/src/main/scala/io/renku/search/query/json/QueryJsonCodec.scala index 224d6a05..662c1866 100644 --- a/modules/search-query/src/main/scala/io/renku/search/query/json/QueryJsonCodec.scala +++ b/modules/search-query/src/main/scala/io/renku/search/query/json/QueryJsonCodec.scala @@ -45,7 +45,7 @@ import scala.collection.mutable.ListBuffer */ private[query] object QueryJsonCodec: // temporary - given Decoder[Visibility] = Decoder.forString.map(Visibility.valueOf) + given Decoder[Visibility] = Decoder.forString.map(Visibility.unsafeFromString) given Encoder[Visibility] = Encoder.forString.contramap(_.name) private[this] val freeTextField = "_text" diff --git a/modules/search-query/src/main/scala/io/renku/search/query/parse/QueryUtil.scala b/modules/search-query/src/main/scala/io/renku/search/query/parse/QueryUtil.scala new file mode 100644 index 00000000..23f934e1 --- /dev/null +++ b/modules/search-query/src/main/scala/io/renku/search/query/parse/QueryUtil.scala @@ -0,0 +1,55 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.renku.search.query.parse + +import io.renku.search.query.Query +import io.renku.search.query.Query.Segment + +private[query] object QueryUtil { + + def collapse(q: Query): Query = + Query(collapseTextSegments(q.segments)) + + private def collapseTextSegments(segs: List[Segment]): List[Segment] = { + @annotation.tailrec + def loop( + in: List[Segment], + curr: Option[Segment.Text], + result: List[Segment] + ): List[Segment] = + in match + case first :: rest => + (first, curr) match + case (t1: Segment.Text, Some(tc)) => + loop(rest, Some(tc ++ t1), result) + + case (e: Segment.Text, None) => + loop(rest, Some(e), result) + + case (f: Segment.Field, Some(tc)) => + loop(rest, None, f :: tc :: result) + + case (f: Segment.Field, None) => + loop(rest, None, f :: result) + + case Nil => (curr.toList ::: result).reverse + + loop(segs, None, Nil) + } +} diff --git a/modules/search-query/src/test/scala/io/renku/search/query/QueryGenerators.scala b/modules/search-query/src/test/scala/io/renku/search/query/QueryGenerators.scala index f27c30d9..36b73ca9 100644 --- a/modules/search-query/src/test/scala/io/renku/search/query/QueryGenerators.scala +++ b/modules/search-query/src/test/scala/io/renku/search/query/QueryGenerators.scala @@ -21,6 +21,7 @@ package io.renku.search.query import cats.data.NonEmptyList import cats.syntax.all.* import io.renku.commons.Visibility +import io.renku.search.query.parse.QueryUtil import org.scalacheck.Gen import org.scalacheck.cats.implicits.* @@ -34,7 +35,7 @@ object QueryGenerators: for { h <- Gen.choose(0, 23) m <- Gen.option(Gen.choose(0, 59)) - s <- Gen.option(Gen.choose(0, 59)) + s <- if (m.isDefined) Gen.option(Gen.choose(0, 59)) else Gen.const(None) } yield PartialDateTime.Time(h, m, s) val partialDate: Gen[PartialDateTime.Date] = @@ -55,13 +56,14 @@ object QueryGenerators: val ref: Gen[PartialDateTime | RelativeDate] = Gen.oneOf(partialDateTime, relativeDate) - val period: Gen[Period] = + val periodPos: Gen[Period] = Gen.oneOf(1 to 13).map(n => Period.ofDays(n)) + val periodNeg: Gen[Period] = Gen.oneOf((-8 to -1) ++ (1 to 8)).map(n => Period.ofDays(n)) for { date <- ref - amount <- period range <- Gen.oneOf(true, false) + amount <- if (range) periodPos else periodNeg } yield DateTimeCalc(date, amount, range) } @@ -86,15 +88,28 @@ object QueryGenerators: en <- Gen.listOfN(n - 1, gen) } yield NonEmptyList(e0, en) - private val simpleString: Gen[String] = Gen.alphaNumStr - private val quotedString: Gen[String] = - Gen.alphaNumStr.map(s => s"\"$s\"") + private val alphaNumChars = ('a' to 'z') ++ ('A' to 'Z') ++ ('0' to '9') + private val simpleWord: Gen[String] = { + val len = Gen.choose(2, 12) + len.flatMap(n => Gen.stringOfN(n, Gen.oneOf(alphaNumChars))) + } + + private val word: Gen[String] = { + val chars = alphaNumChars ++ "/{}*?()-:@…_[]^!<>=&#|~`+%\"'".toSeq + val len = Gen.choose(2, 12) + len.flatMap(n => Gen.stringOfN(n, Gen.oneOf(chars))) + } - private val valueString: Gen[String] = - Gen.oneOf(simpleString, quotedString) + private val phrase: Gen[String] = { + val w = Gen.frequency(5 -> simpleWord, 1 -> word) + Gen + .choose(1, 3) + .flatMap(n => Gen.listOfN(n, w)) + .map(_.mkString(" ")) + } private val stringValues: Gen[NonEmptyList[String]] = - Gen.choose(1, 4).flatMap(n => nelOfN(n, valueString)) + Gen.choose(1, 4).flatMap(n => nelOfN(n, phrase)) val projectIdTerm: Gen[FieldTerm] = stringValues.map(FieldTerm.ProjectIdIs(_)) @@ -111,7 +126,7 @@ object QueryGenerators: val visibilityTerm: Gen[FieldTerm] = Gen .frequency(10 -> visibility.map(NonEmptyList.one), 1 -> nelOfN(2, visibility)) - .map(FieldTerm.VisibilityIs(_)) + .map(vs => FieldTerm.VisibilityIs(vs.distinct)) private val comparison: Gen[Comparison] = Gen.oneOf(Comparison.values.toSeq) @@ -135,7 +150,7 @@ object QueryGenerators: val freeText: Gen[String] = Gen.choose(1, 5).flatMap { len => - Gen.listOfN(len, valueString).map(_.mkString(" ")) + Gen.listOfN(len, phrase).map(_.mkString(" ")) } val segment: Gen[Query.Segment] = @@ -149,3 +164,4 @@ object QueryGenerators: .choose(0, 12) .flatMap(n => Gen.listOfN(n, segment)) .map(Query.apply) + .map(QueryUtil.collapse) diff --git a/modules/search-query/src/test/scala/io/renku/search/query/json/QueryJsonSpec.scala b/modules/search-query/src/test/scala/io/renku/search/query/json/QueryJsonSpec.scala index 79d791f4..2e93f15c 100644 --- a/modules/search-query/src/test/scala/io/renku/search/query/json/QueryJsonSpec.scala +++ b/modules/search-query/src/test/scala/io/renku/search/query/json/QueryJsonSpec.scala @@ -19,33 +19,17 @@ package io.renku.search.query.json import io.bullet.borer.Json -import io.renku.search.query.{PartialDateTime, Query} -import io.renku.search.query.Query.Segment -import munit.FunSuite +import io.renku.search.query.{Query, QueryGenerators} +import munit.{FunSuite, ScalaCheckSuite} +import org.scalacheck.Prop -import java.time.Instant +class QueryJsonSpec extends ScalaCheckSuite { -class QueryJsonSpec extends FunSuite { - - test("playing") { - println(Query.empty.asString) - val q = Query( - Segment.projectIdIs("p1"), - Segment.text("foo bar"), - Segment.nameIs("ai-project-15048"), - Segment.creationDateLt(PartialDateTime.fromInstant(Instant.now())) - ) - println(q.asString) - val jsonStr = Json.encode(q).toUtf8String - println(jsonStr) - val decoded = Json.decode(jsonStr.getBytes).to[Query].value - println(decoded) - assertEquals(decoded, q) - - val q2 = Query(Segment.projectIdIs("id-2"), Segment.projectIdIs("id-3")) - val q2Json = Json.encode(q2).toUtf8String - assertEquals(q2Json, """{"projectId":"id-2","projectId":"id-3"}""") - val decodedQ2 = Json.decode(q2Json.getBytes).to[Query].value - println(decodedQ2) + property("query json encode/decode") { + Prop.forAll(QueryGenerators.query) { q => + val jsonStr = Json.encode(q).toUtf8String + val decoded = Json.decode(jsonStr.getBytes).to[Query].value + assertEquals(decoded, q) + } } } diff --git a/modules/search-query/src/test/scala/io/renku/search/query/parse/QueryParserSpec.scala b/modules/search-query/src/test/scala/io/renku/search/query/parse/QueryParserSpec.scala index ec0dfe36..b024595b 100644 --- a/modules/search-query/src/test/scala/io/renku/search/query/parse/QueryParserSpec.scala +++ b/modules/search-query/src/test/scala/io/renku/search/query/parse/QueryParserSpec.scala @@ -19,13 +19,12 @@ package io.renku.search.query.parse import cats.data.NonEmptyList as Nel -import io.renku.search.query.Comparison.{GreaterThan, LowerThan} import io.renku.search.query.* -import munit.FunSuite - -import java.util.concurrent.atomic.AtomicInteger +import io.renku.search.query.Comparison.{GreaterThan, LowerThan} +import munit.{FunSuite, ScalaCheckSuite} +import org.scalacheck.Prop -class QueryParserSpec extends FunSuite with ParserSuite { +class QueryParserSpec extends ScalaCheckSuite with ParserSuite { test("string list") { val p = QueryParser.values @@ -93,28 +92,21 @@ class QueryParserSpec extends FunSuite with ParserSuite { ) } - test("example queries") { + test("example queries".ignore) { val p = QueryParser.query - println(p.run("projectId:1 foo name:test foo bar created>today/5d")) + println(p.run("name:\"vQgCg mpZU4cCgF3N eVZUMkH7\",JHRt visibility:private WX59P")) } - test("generated queries") { - val counter = new AtomicInteger(0) - QueryGenerators - .nelOfN(20, QueryGenerators.query) - .sample - .toList - .flatMap(_.toList) - .foreach { q => - val qStr = q.asString - println(s">>: ${qStr}") - val parsed = Query.parse(qStr) - if (parsed.isLeft) { - val _ = counter.incrementAndGet() - } - println(s" >> $parsed") + property("generated queries") { + Prop.forAll(QueryGenerators.query) { q => + val qStr = q.asString + val parsed = Query.parse(qStr).fold(sys.error, identity) + if (q != parsed) { + // this is for better error messages when things fail + println(qStr) + assertEquals(q, parsed) } - - println(s"==== Errors: ${counter.get()}") + parsed == q + } } }