From d472b78391055b14087e753ba6abf58b2ebedaa4 Mon Sep 17 00:00:00 2001 From: eikek <701128+eikek@users.noreply.github.com> Date: Thu, 22 Feb 2024 17:20:56 +0100 Subject: [PATCH] chore: Add documentation for the query string (#27) Additionally creates a dev setup using nix and adds a logging setup based on a simple verbosity level. --- .envrc | 1 + .gitignore | 4 +- build.sbt | 44 +++-- flake.lock | 61 +++++++ flake.nix | 107 ++++++++++++ .../scala/io/renku/logging/LoggingSetup.scala | 66 +++++++ .../io/renku/search/config/ConfigValues.scala | 37 ++-- .../io/renku/search/api/HttpApplication.scala | 32 +++- .../io/renku/search/api/Microservice.scala | 2 + .../scala/io/renku/search/api/SearchApi.scala | 2 + .../io/renku/search/api/SearchApiConfig.scala | 6 +- .../io/renku/search/api/SearchApiImpl.scala | 9 + .../io/renku/search/api/TapirCodecs.scala | 29 ++++ .../renku/search/provision/Microservice.scala | 2 + .../provision/SearchProvisionConfig.scala | 6 +- modules/search-query-docs/docs/manual.md | 161 ++++++++++++++++++ .../search/query/docs/SearchQueryManual.scala | 8 + .../scala/io/renku/search/query/Field.scala | 2 +- .../scala/io/renku/search/query/Query.scala | 6 +- .../search/query/parse/DateTimeParser.scala | 5 +- .../renku/search/query/parse/QueryUtil.scala | 8 +- .../search/query/parse/QueryParserSpec.scala | 15 +- .../search/solr/client/QueryInterpreter.scala | 63 +++++++ .../search/solr/client/SearchSolrClient.scala | 2 + .../solr/client/SearchSolrClientImpl.scala | 10 ++ .../renku/search/solr/documents/Project.scala | 4 +- .../solr/schema/EntityDocumentSchema.scala | 14 ++ .../renku/search/solr/schema/Migrations.scala | 3 +- .../renku/solr/client/schema/FieldType.scala | 1 + nix/dev-scripts.nix | 67 ++++++++ nix/dev-vm.nix | 48 ++++++ nix/scripts/recreate-container | 10 ++ nix/scripts/redis-push | 14 ++ nix/services.nix | 23 +++ nix/solr-module.nix | 85 +++++++++ nix/solr.nix | 40 +++++ project/Dependencies.scala | 5 + project/SearchQueryDocsPlugin.scala | 56 ++++++ 38 files changed, 1001 insertions(+), 57 deletions(-) create mode 100644 .envrc create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 modules/commons/src/main/scala/io/renku/logging/LoggingSetup.scala create mode 100644 modules/search-api/src/main/scala/io/renku/search/api/TapirCodecs.scala create mode 100644 modules/search-query-docs/docs/manual.md create mode 100644 modules/search-query-docs/src/main/scala/io/renku/search/query/docs/SearchQueryManual.scala create mode 100644 modules/search-solr-client/src/main/scala/io/renku/search/solr/client/QueryInterpreter.scala create mode 100644 nix/dev-scripts.nix create mode 100644 nix/dev-vm.nix create mode 100644 nix/scripts/recreate-container create mode 100644 nix/scripts/redis-push create mode 100644 nix/services.nix create mode 100644 nix/solr-module.nix create mode 100644 nix/solr.nix create mode 100644 project/SearchQueryDocsPlugin.scala diff --git a/.envrc b/.envrc new file mode 100644 index 00000000..3550a30f --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake diff --git a/.gitignore b/.gitignore index 336fd272..33df8e62 100644 --- a/.gitignore +++ b/.gitignore @@ -116,4 +116,6 @@ helm-chart/renku-graph/charts/*tgz # MAc .DS_Store -.direnv/ \ No newline at end of file +.direnv/ +*.qcow2 +.tmp \ No newline at end of file diff --git a/build.sbt b/build.sbt index e782985c..c8fbf8cd 100644 --- a/build.sbt +++ b/build.sbt @@ -40,6 +40,7 @@ lazy val root = project .in(file(".")) .withId("renku-search") .enablePlugins(DbTestPlugin) + .disablePlugins(RevolverPlugin) .settings( publish / skip := true, publishTo := Some( @@ -92,12 +93,12 @@ lazy val commons = project }.taskValue ) .enablePlugins(AutomateHeaderPlugin) - .disablePlugins(DbTestPlugin) + .disablePlugins(DbTestPlugin, RevolverPlugin) lazy val http4sBorer = project .in(file("modules/http4s-borer")) .enablePlugins(AutomateHeaderPlugin) - .disablePlugins(DbTestPlugin) + .disablePlugins(DbTestPlugin, RevolverPlugin) .withId("http4s-borer") .settings(commonSettings) .settings( @@ -114,7 +115,7 @@ lazy val httpClient = project .in(file("modules/http-client")) .withId("http-client") .enablePlugins(AutomateHeaderPlugin) - .disablePlugins(DbTestPlugin) + .disablePlugins(DbTestPlugin, RevolverPlugin) .settings(commonSettings) .settings( name := "http-client", @@ -141,6 +142,7 @@ lazy val redisClient = project Dependencies.redis4CatsStreams ) .enablePlugins(AutomateHeaderPlugin) + .disablePlugins(RevolverPlugin) .dependsOn( commons % "test->test" ) @@ -157,6 +159,7 @@ lazy val renkuRedisClient = project Dependencies.redis4CatsStreams ) .enablePlugins(AutomateHeaderPlugin) + .disablePlugins(RevolverPlugin) .dependsOn( events % "compile->compile;test->test", redisClient % "compile->compile;test->test" @@ -166,6 +169,7 @@ lazy val solrClient = project .in(file("modules/solr-client")) .withId("solr-client") .enablePlugins(AvroCodeGen, AutomateHeaderPlugin) + .disablePlugins(RevolverPlugin) .settings(commonSettings) .settings( name := "solr-client", @@ -183,22 +187,25 @@ lazy val searchSolrClient = project .in(file("modules/search-solr-client")) .withId("search-solr-client") .enablePlugins(AvroCodeGen, AutomateHeaderPlugin) + .disablePlugins(RevolverPlugin) .settings(commonSettings) .settings( name := "search-solr-client", libraryDependencies ++= Dependencies.catsCore ++ - Dependencies.catsEffect + Dependencies.catsEffect ++ + Dependencies.luceneQueryParser ) .dependsOn( avroCodec % "compile->compile;test->test", solrClient % "compile->compile;test->test", - commons % "compile->compile;test->test" + commons % "compile->compile;test->test", + searchQuery % "compile->compile;test->test" ) lazy val avroCodec = project .in(file("modules/avro-codec")) - .disablePlugins(DbTestPlugin) + .disablePlugins(DbTestPlugin, RevolverPlugin) .settings(commonSettings) .settings( name := "avro-codec", @@ -210,7 +217,7 @@ lazy val avroCodec = project lazy val http4sAvro = project .in(file("modules/http4s-avro")) .enablePlugins(AutomateHeaderPlugin) - .disablePlugins(DbTestPlugin) + .disablePlugins(DbTestPlugin, RevolverPlugin) .withId("http4s-avro") .settings(commonSettings) .settings( @@ -235,11 +242,12 @@ lazy val events = project avroCodec % "compile->compile;test->test" ) .enablePlugins(AutomateHeaderPlugin, AvroSchemaDownload) - .disablePlugins(DbTestPlugin) + .disablePlugins(DbTestPlugin, RevolverPlugin) lazy val configValues = project .in(file("modules/config-values")) .withId("config-values") + .disablePlugins(RevolverPlugin) .settings(commonSettings) .settings( name := "config-values", @@ -255,6 +263,7 @@ lazy val configValues = project lazy val searchQuery = project .in(file("modules/search-query")) .withId("search-query") + .disablePlugins(RevolverPlugin) .settings(commonSettings) .settings( name := "search-query", @@ -266,6 +275,18 @@ lazy val searchQuery = project ) .enablePlugins(AutomateHeaderPlugin) +lazy val searchQueryDocs = project + .in(file("modules/search-query-docs")) + .withId("search-query-docs") + .dependsOn(searchQuery) + .enablePlugins(SearchQueryDocsPlugin) + .settings( + name := "search-query-docs", + publish := {}, + publishLocal := {}, + publishArtifact := false + ) + lazy val searchProvision = project .in(file("modules/search-provision")) .withId("search-provision") @@ -281,7 +302,7 @@ lazy val searchProvision = project searchSolrClient % "compile->compile;test->test", configValues % "compile->compile;test->test" ) - .enablePlugins(AutomateHeaderPlugin, DockerImagePlugin) + .enablePlugins(AutomateHeaderPlugin, DockerImagePlugin, RevolverPlugin) lazy val searchApi = project .in(file("modules/search-api")) @@ -300,9 +321,10 @@ lazy val searchApi = project commons % "compile->compile;test->test", http4sBorer % "compile->compile;test->test", searchSolrClient % "compile->compile;test->test", - configValues % "compile->compile;test->test" + configValues % "compile->compile;test->test", + searchQueryDocs % "compile->compile;test->test" ) - .enablePlugins(AutomateHeaderPlugin, DockerImagePlugin) + .enablePlugins(AutomateHeaderPlugin, DockerImagePlugin, RevolverPlugin) lazy val commonSettings = Seq( organization := "io.renku", diff --git a/flake.lock b/flake.lock new file mode 100644 index 00000000..9613ccfb --- /dev/null +++ b/flake.lock @@ -0,0 +1,61 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1705309234, + "narHash": "sha256-uNRRNRKmJyCRC/8y1RqBkqWBLM034y4qN7EprSdmgyA=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "1ef2e671c3b0c19053962c07dbda38332dcebf26", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1708447487, + "narHash": "sha256-MGWA+gyB8xoYxzzMJKnZft2grvgTzN/86T9iGiNdUp4=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "d7097df74c6aa2254ec28d702b66687357a8b843", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "release-23.11", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 00000000..29032a71 --- /dev/null +++ b/flake.nix @@ -0,0 +1,107 @@ +{ + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/release-23.11"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = inputs @ { + self, + nixpkgs, + flake-utils, + }: + { + overlays.default = final: prev: { + solr = self.packages.${prev.system}.solr; + }; + nixosConfigurations = let + selfOverlay = { + lib, + config, + ... + }: { + nixpkgs.overlays = [ + self.overlays.default + ]; + system.stateVersion = "23.11"; + }; + in { + dev-vm = nixpkgs.lib.nixosSystem { + system = flake-utils.lib.system.x86_64-linux; + specialArgs = {inherit inputs;}; + modules = [ + selfOverlay + ./nix/dev-vm.nix + ]; + }; + + container = nixpkgs.lib.nixosSystem { + system = flake-utils.lib.system.x86_64-linux; + modules = [ + ({pkgs, ...}: { + boot.isContainer = true; + networking.useDHCP = false; + }) + selfOverlay + ./nix/solr-module.nix + ./nix/services.nix + ]; + }; + }; + } + // flake-utils.lib.eachDefaultSystem (system: let + pkgs = nixpkgs.legacyPackages.${system}; + selfPkgs = self.packages.${system}; + in { + formatter = pkgs.alejandra; + packages = + ((import ./nix/dev-scripts.nix) {inherit (pkgs) concatTextFile writeShellScriptBin;}) + // { + solr = pkgs.callPackage (import ./nix/solr.nix) {}; + }; + + devShells = rec { + default = container; + container = pkgs.mkShell { + RS_SOLR_URL = "http://rsdev:8983/solr"; + RS_REDIS_HOST = "rsdev"; + RS_REDIS_PORT = "6379"; + RS_CONTAINER = "rsdev"; + RS_LOG_LEVEL = "3"; + + buildInputs = with pkgs; + with selfPkgs; [ + redis + jq + + redis-push + recreate-container + start-container + solr-create-core + solr-delete-core + solr-recreate-core + ]; + }; + vm = pkgs.mkShell { + RS_SOLR_URL = "http://localhost:18983/solr"; + RS_REDIS_HOST = "localhost"; + RS_REDIS_PORT = "16379"; + VM_SSH_PORT = "10022"; + RS_LOG_LEVEL = "3"; + + buildInputs = with pkgs; + with selfPkgs; [ + redis + jq + + redis-push + vm-build + vm-run + vm-ssh + vm-solr-create-core + vm-solr-delete-core + vm-solr-recreate-core + ]; + }; + }; + }); +} diff --git a/modules/commons/src/main/scala/io/renku/logging/LoggingSetup.scala b/modules/commons/src/main/scala/io/renku/logging/LoggingSetup.scala new file mode 100644 index 00000000..702f00d3 --- /dev/null +++ b/modules/commons/src/main/scala/io/renku/logging/LoggingSetup.scala @@ -0,0 +1,66 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.renku.logging + +import scribe.Level +import scribe.writer.SystemOutWriter +import scribe.format.Formatter + +object LoggingSetup: + + def doConfigure(verbosity: Int): Unit = + println(s">> Setting up logging with verbosity=$verbosity") + val root = scribe.Logger.root.clearHandlers().clearModifiers() + verbosity match + case n if n <= 0 => + () + + case 1 => + root.withMinimumLevel(Level.Warn).replace() + () + + case 2 => + root.withMinimumLevel(Level.Warn).replace() + configureRenkuSearch(Level.Info) + + case 3 => + root.withMinimumLevel(Level.Info).replace() + configureRenkuSearch(Level.Debug) + + case 4 => + root.withMinimumLevel(Level.Info).replace() + configureRenkuSearch(Level.Trace) + + case _ => + root.withMinimumLevel(Level.Debug).replace() + configureRenkuSearch(Level.Trace) + + private def configureRenkuSearch(level: Level): Unit = { + scribe + .Logger("io.renku.search") + .clearHandlers() + .withHandler( + formatter = Formatter.classic, + writer = SystemOutWriter, + minimumLevel = Some(level) + ) + .replace() + + () + } diff --git a/modules/config-values/src/main/scala/io/renku/search/config/ConfigValues.scala b/modules/config-values/src/main/scala/io/renku/search/config/ConfigValues.scala index 8af2f8ad..8161f93d 100644 --- a/modules/config-values/src/main/scala/io/renku/search/config/ConfigValues.scala +++ b/modules/config-values/src/main/scala/io/renku/search/config/ConfigValues.scala @@ -30,37 +30,42 @@ object ConfigValues extends ConfigDecoders: private val prefix = "RS" + private def renv(name: String) = + env(s"${prefix}_$name") + + val logLevel: ConfigValue[Effect, Int] = + renv("LOG_LEVEL").default("2").as[Int] + val redisConfig: ConfigValue[Effect, RedisConfig] = { - val host = env(s"${prefix}_REDIS_HOST").default("localhost").as[RedisHost] - val port = env(s"${prefix}_REDIS_PORT").default("6379").as[RedisPort] - val sentinel = env(s"${prefix}_REDIS_SENTINEL").as[Boolean].default(false) - val maybeDB = env(s"${prefix}_REDIS_DB").as[RedisDB].option - val maybePass = env(s"${prefix}_REDIS_PASSWORD").as[RedisPassword].option - val maybeMasterSet = env(s"${prefix}_REDIS_MASTER_SET").as[RedisMasterSet].option + val host = renv("REDIS_HOST").default("localhost").as[RedisHost] + val port = renv("REDIS_PORT").default("6379").as[RedisPort] + val sentinel = renv("REDIS_SENTINEL").as[Boolean].default(false) + val maybeDB = renv("REDIS_DB").as[RedisDB].option + val maybePass = renv("REDIS_PASSWORD").as[RedisPassword].option + val maybeMasterSet = renv("REDIS_MASTER_SET").as[RedisMasterSet].option (host, port, sentinel, maybeDB, maybePass, maybeMasterSet).mapN(RedisConfig.apply) } val eventsQueueName: ConfigValue[Effect, QueueName] = - env(s"${prefix}_REDIS_QUEUE_NAME").default("events").as[QueueName] + renv("REDIS_QUEUE_NAME").default("events").as[QueueName] val retryOnErrorDelay: ConfigValue[Effect, FiniteDuration] = - env(s"${prefix}_RETRY_ON_ERROR_DELAY").default("2 seconds").as[FiniteDuration] + renv("RETRY_ON_ERROR_DELAY").default("2 seconds").as[FiniteDuration] val solrConfig: ConfigValue[Effect, SolrConfig] = { - val url = env(s"${prefix}_SOLR_URL").default("http://localhost:8983/solr").as[Uri] - val core = env(s"${prefix}_SOLR_CORE").default("search-core-test") + val url = renv("SOLR_URL").default("http://localhost:8983/solr").as[Uri] + val core = renv("SOLR_CORE").default("search-core-test") val maybeUser = - (env(s"${prefix}_SOLR_USER").option -> env(s"${prefix}_SOLR_PASS").option) - .mapN { case (maybeUsername, maybePass) => - (maybeUsername, maybePass).mapN(SolrUser.apply) - } + (renv("SOLR_USER"), renv("SOLR_PASS")) + .mapN(SolrUser.apply) + .option val defaultCommit = - env(s"${prefix}_SOLR_DEFAULT_COMMIT_WITHIN") + renv("SOLR_DEFAULT_COMMIT_WITHIN") .default("0 seconds") .as[FiniteDuration] .option val logMessageBodies = - env(s"${prefix}_SOLR_LOG_MESSAGE_BODIES").default("false").as[Boolean] + renv("SOLR_LOG_MESSAGE_BODIES").default("false").as[Boolean] (url, core, maybeUser, defaultCommit, logMessageBodies).mapN(SolrConfig.apply) } diff --git a/modules/search-api/src/main/scala/io/renku/search/api/HttpApplication.scala b/modules/search-api/src/main/scala/io/renku/search/api/HttpApplication.scala index 1c01f17b..e601e638 100644 --- a/modules/search-api/src/main/scala/io/renku/search/api/HttpApplication.scala +++ b/modules/search-api/src/main/scala/io/renku/search/api/HttpApplication.scala @@ -33,6 +33,8 @@ import sttp.tapir.* import sttp.tapir.docs.openapi.OpenAPIDocsInterpreter import sttp.tapir.server.ServerEndpoint import sttp.tapir.server.http4s.Http4sServerInterpreter +import io.renku.search.query.Query +import io.renku.search.query.docs.SearchQueryManual object HttpApplication: def apply[F[_]: Async: Network]( @@ -42,7 +44,8 @@ object HttpApplication: class HttpApplication[F[_]: Async](searchApi: SearchApi[F]) extends Http4sDsl[F] - with TapirBorerJson: + with TapirBorerJson + with TapirCodecs: private val businessRoot = "search" @@ -57,18 +60,31 @@ class HttpApplication[F[_]: Async](searchApi: SearchApi[F]) private lazy val businessEndpoints: List[ServerEndpoint[Any, F]] = List( - searchEndpoint.serverLogic(searchApi.find) + searchEndpointGet.serverLogic(searchApi.query), + searchEndpointPost.serverLogic(searchApi.query) ) - private lazy val searchEndpoint - : PublicEndpoint[String, String, List[SearchEntity], Any] = - val query = - path[String].name("user query").description("User defined query e.g. renku~") + private lazy val searchEndpointGet + : PublicEndpoint[Query, String, List[SearchEntity], Any] = + val q = + query[Query]("q").description("User defined query e.g. renku") endpoint.get - .in(query) + .in(q) .errorOut(borerJsonBody[String]) .out(borerJsonBody[List[SearchEntity]]) - .description("Search API for searching Renku entities") + .description(SearchQueryManual.markdown) + + private val searchEndpointPost: PublicEndpoint[Query, String, List[SearchEntity], Any] = + endpoint.post + .errorOut(borerJsonBody[String]) + .in( + borerJsonBody[Query] + .example( + Query(Query.Segment.nameIs("proj-name1"), Query.Segment.text("flight sim")) + ) + ) + .out(borerJsonBody[List[SearchEntity]]) + .description(SearchQueryManual.markdown) private lazy val openAPIEndpoint = val docs = OpenAPIDocsInterpreter() diff --git a/modules/search-api/src/main/scala/io/renku/search/api/Microservice.scala b/modules/search-api/src/main/scala/io/renku/search/api/Microservice.scala index 249bab14..b6b19c67 100644 --- a/modules/search-api/src/main/scala/io/renku/search/api/Microservice.scala +++ b/modules/search-api/src/main/scala/io/renku/search/api/Microservice.scala @@ -19,6 +19,7 @@ package io.renku.search.api import cats.effect.{ExitCode, IO, IOApp} +import io.renku.logging.LoggingSetup object Microservice extends IOApp: @@ -27,6 +28,7 @@ object Microservice extends IOApp: override def run(args: List[String]): IO[ExitCode] = for { config <- loadConfig + _ <- IO(LoggingSetup.doConfigure(config.verbosity)) _ <- HttpApplication[IO](config.solrConfig) .flatMap(HttpServer.build) .use(_ => IO.never) diff --git a/modules/search-api/src/main/scala/io/renku/search/api/SearchApi.scala b/modules/search-api/src/main/scala/io/renku/search/api/SearchApi.scala index 525936ff..99ba102f 100644 --- a/modules/search-api/src/main/scala/io/renku/search/api/SearchApi.scala +++ b/modules/search-api/src/main/scala/io/renku/search/api/SearchApi.scala @@ -22,9 +22,11 @@ import cats.effect.{Async, Resource} import fs2.io.net.Network import io.renku.search.solr.client.SearchSolrClient import io.renku.solr.client.SolrConfig +import io.renku.search.query.Query trait SearchApi[F[_]]: def find(phrase: String): F[Either[String, List[SearchEntity]]] + def query(query: Query): F[Either[String, List[SearchEntity]]] object SearchApi: def apply[F[_]: Async: Network]( diff --git a/modules/search-api/src/main/scala/io/renku/search/api/SearchApiConfig.scala b/modules/search-api/src/main/scala/io/renku/search/api/SearchApiConfig.scala index 86261ac5..0d607a12 100644 --- a/modules/search-api/src/main/scala/io/renku/search/api/SearchApiConfig.scala +++ b/modules/search-api/src/main/scala/io/renku/search/api/SearchApiConfig.scala @@ -18,14 +18,16 @@ package io.renku.search.api +import cats.syntax.all.* import ciris.{ConfigValue, Effect} import io.renku.search.config.ConfigValues import io.renku.solr.client.SolrConfig final case class SearchApiConfig( - solrConfig: SolrConfig + solrConfig: SolrConfig, + verbosity: Int ) object SearchApiConfig: val config: ConfigValue[Effect, SearchApiConfig] = - ConfigValues.solrConfig.map(SearchApiConfig.apply) + (ConfigValues.solrConfig, ConfigValues.logLevel).mapN(SearchApiConfig.apply) diff --git a/modules/search-api/src/main/scala/io/renku/search/api/SearchApiImpl.scala b/modules/search-api/src/main/scala/io/renku/search/api/SearchApiImpl.scala index e79b5aa7..3db71d2c 100644 --- a/modules/search-api/src/main/scala/io/renku/search/api/SearchApiImpl.scala +++ b/modules/search-api/src/main/scala/io/renku/search/api/SearchApiImpl.scala @@ -24,6 +24,7 @@ import io.renku.search.solr.client.SearchSolrClient import io.renku.search.solr.documents.{Project as SolrProject, User as SolrUser} import org.http4s.dsl.Http4sDsl import scribe.Scribe +import io.renku.search.query.Query private class SearchApiImpl[F[_]: Async](solrClient: SearchSolrClient[F]) extends Http4sDsl[F] @@ -39,6 +40,14 @@ private class SearchApiImpl[F[_]: Async](solrClient: SearchSolrClient[F]) .handleErrorWith(errorResponse(phrase)) .widen + override def query(query: Query): F[Either[String, List[SearchEntity]]] = + solrClient + .queryProjects(query) + .map(toApiModel) + .map(_.asRight[String]) + .handleErrorWith(errorResponse(query.render)) + .widen + private def errorResponse( phrase: String ): Throwable => F[Either[String, List[Project]]] = diff --git a/modules/search-api/src/main/scala/io/renku/search/api/TapirCodecs.scala b/modules/search-api/src/main/scala/io/renku/search/api/TapirCodecs.scala new file mode 100644 index 00000000..1e844d46 --- /dev/null +++ b/modules/search-api/src/main/scala/io/renku/search/api/TapirCodecs.scala @@ -0,0 +1,29 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.renku.search.api + +import sttp.tapir.* +import io.renku.search.query.Query + +trait TapirCodecs: + given Codec[String, Query, CodecFormat.TextPlain] = + Codec.string.mapEither(Query.parse(_))(_.render) + + given Schema[Query] = + Schema.string[Query] diff --git a/modules/search-provision/src/main/scala/io/renku/search/provision/Microservice.scala b/modules/search-provision/src/main/scala/io/renku/search/provision/Microservice.scala index 8b82ba84..c6dbe296 100644 --- a/modules/search-provision/src/main/scala/io/renku/search/provision/Microservice.scala +++ b/modules/search-provision/src/main/scala/io/renku/search/provision/Microservice.scala @@ -21,6 +21,7 @@ package io.renku.search.provision import cats.effect.{ExitCode, IO, IOApp, Temporal} import io.renku.search.solr.schema.Migrations import io.renku.solr.client.migration.SchemaMigrator +import io.renku.logging.LoggingSetup import scribe.Scribe import scribe.cats.* @@ -32,6 +33,7 @@ object Microservice extends IOApp: override def run(args: List[String]): IO[ExitCode] = for { config <- loadConfig + _ <- IO(LoggingSetup.doConfigure(config.verbosity)) _ <- runSolrMigrations(config) _ <- startProvisioning(config) } yield ExitCode.Success diff --git a/modules/search-provision/src/main/scala/io/renku/search/provision/SearchProvisionConfig.scala b/modules/search-provision/src/main/scala/io/renku/search/provision/SearchProvisionConfig.scala index 8d9f105d..241b1ed0 100644 --- a/modules/search-provision/src/main/scala/io/renku/search/provision/SearchProvisionConfig.scala +++ b/modules/search-provision/src/main/scala/io/renku/search/provision/SearchProvisionConfig.scala @@ -30,7 +30,8 @@ final case class SearchProvisionConfig( redisConfig: RedisConfig, queueName: QueueName, solrConfig: SolrConfig, - retryOnErrorDelay: FiniteDuration + retryOnErrorDelay: FiniteDuration, + verbosity: Int ) object SearchProvisionConfig: @@ -40,5 +41,6 @@ object SearchProvisionConfig: ConfigValues.redisConfig, ConfigValues.eventsQueueName, ConfigValues.solrConfig, - ConfigValues.retryOnErrorDelay + ConfigValues.retryOnErrorDelay, + ConfigValues.logLevel ).mapN(SearchProvisionConfig.apply) diff --git a/modules/search-query-docs/docs/manual.md b/modules/search-query-docs/docs/manual.md new file mode 100644 index 00000000..5606a5e4 --- /dev/null +++ b/modules/search-query-docs/docs/manual.md @@ -0,0 +1,161 @@ +## Search Query + +**NOTE: this is a work in progress** + +The search accepts queries in two representations: JSON and a simple +query string. A query may contain specific and unspecific search +terms. + +### Query String + +A query is a sequence of words. All words that are not recognized as +specific search terms are used for searching in various entity +properties, such as `name` or `description`. Specific search terms are +matched exactly against a certain field. Terms are separated by +whitespace. + +Example: +``` +numpy flight visibility:public +``` + +Searches for entities containing `numpy` _and_ `flight` that are +public. + +The term order is usually not relevant, it may influence the score of +a result, though. + +If a value for a specific field contains whitespace, quotes or a comma +it must be enclosed in quotes. Additionally, multiple values can be +provided for each field by using a comma separated list. The values +are treated as alternatives, so any such value would yield a result. + +Example: +``` +numpy flight visibility:public,private +``` + +Searches for entities containing `numpy` _and_ `flight` that are +_either_ `public` _or_ `private`. + +### Query JSON + +The JSON format allows to specify the same query as a JSON object. A +JSON object may contain specific terms by including the corresponding +field-value pair. For unspecific terms, the special field `_text` is +used. + +Example: +```json +{ + "_text": "numpy flight", + "visibility": "public" +} +``` + +JSON objects are sequences of key-value pairs. As such, the encoding +allows to specifiy multiple same named fields in one JSON object. This +would be a valid query: + +```json +{ + "_text": "numpy", + "visibility": "public", + "_text": "flight" +} +``` + +The JSON variant follows the same rules for specifying field values. +Multiple alternative values can be given as a comma separated list. + +### Fields + +The following fields are available: + +```scala mdoc:passthrough +import io.renku.search.query.* +println(Field.values.map(e => s"`${e.name}`").mkString("- ", "\n- ", "")) +``` + +Each field allows to specify one or more values, separated by comma. +The value must be separated by a `:`. For date fields, additional `<` +and `>` is supported. + +### Dates + +Date fields, like + +```scala mdoc:passthrough +println(List(Field.Created).map(e => s"`${e.name}`").mkString("- ", "\n- ", "")) +``` + +accept date strings which can be specified in various ways. There are + +- relative dates: `today` +- partial timestamps: `2023-05`, `2023-11-12T10` +- calculations based on the above: `today-5d`, `2023-10-15/10d` + + +#### Relative dates + +There are the following keywords for relative dates: + +```scala mdoc:passthrough +println( + RelativeDate.values.map(e => s"`${e.name}`").mkString("- ", "\n- ", "") +) +``` + +#### Partial Timestamps + +Timestamps must be in ISO8601 form and are UTC based and allow to +specify time up to seconds. The full form is + +``` +yyyy-mm-ddTHH:MM:ssZ +``` + +Any part starting from right can be omitted. When querying, it will be +filled with either the maximum or minimum possible value depending on +the side of comparison. When the date is an upper bound, the missing +parts will be set to their minimum values. Conversely, when used as a +lower bound then the parts are set to its maximum value. + +Example: +- `created>2023-03` will turn into `created>2023-03-31T23:59:59` +- `created<2023-03` will turn into `created<2023-03-01T00:00:00` + +#### Date calculations + +At last, a date can be specified by adding or subtracting days from a +reference date. The reference date must be given either as a relative +date or partial timestamp. Then a `+`, `-` or `/` follows with the +amount of days. + +The `/` character allows to add and substract the days from the +reference date, making the reference date the middle. + +Example: +- `created>today-14d` things created from 14 days ago +- `created<2023-05/14d` things created from last two weeks of April + and first two weeks of May + +#### Date Comparison + +Comparing dates with `>` and `<` is done as expected. More interesting +is to specify more than one date and the use of the `:` comparison. + +The `:` can be used to specify ranges more succinctly. For a full +timestamp, it means /equals/. With partial timestamps it searches +within the minimum and maximum possible date for that partial +timestamp. + +Since multiple values are combined using `OR`, it is possible to +search in multiple ranges. + +Example: +``` +created:2023-03,2023-06 +``` + +The above means to match entities created in March 2023 or June 2023. diff --git a/modules/search-query-docs/src/main/scala/io/renku/search/query/docs/SearchQueryManual.scala b/modules/search-query-docs/src/main/scala/io/renku/search/query/docs/SearchQueryManual.scala new file mode 100644 index 00000000..861cebe5 --- /dev/null +++ b/modules/search-query-docs/src/main/scala/io/renku/search/query/docs/SearchQueryManual.scala @@ -0,0 +1,8 @@ +package io.renku.search.query.docs + +object SearchQueryManual { + + lazy val markdown: String = + scala.io.Source.fromURL(getClass.getResource("/query-manual/manual.md")).mkString + +} diff --git a/modules/search-query/src/main/scala/io/renku/search/query/Field.scala b/modules/search-query/src/main/scala/io/renku/search/query/Field.scala index 2af7b75f..9679d3ae 100644 --- a/modules/search-query/src/main/scala/io/renku/search/query/Field.scala +++ b/modules/search-query/src/main/scala/io/renku/search/query/Field.scala @@ -34,7 +34,7 @@ object Field: given Encoder[Field] = Encoder.forString.contramap(_.name) given Decoder[Field] = Decoder.forString.mapEither(fromString) - private[this] val allNames: String = Field.values.mkString(", ") + private[this] val allNames: String = Field.values.map(_.name).mkString(", ") def fromString(str: String): Either[String, Field] = Field.values diff --git a/modules/search-query/src/main/scala/io/renku/search/query/Query.scala b/modules/search-query/src/main/scala/io/renku/search/query/Query.scala index acf3f00f..1db4de84 100644 --- a/modules/search-query/src/main/scala/io/renku/search/query/Query.scala +++ b/modules/search-query/src/main/scala/io/renku/search/query/Query.scala @@ -26,11 +26,12 @@ import io.renku.search.query.FieldTerm.Created import io.renku.search.query.Query.Segment import io.renku.search.query.json.QueryJsonCodec import io.renku.search.query.parse.{QueryParser, QueryUtil} +import cats.kernel.Monoid final case class Query( segments: List[Query.Segment] ): - def asString: String = + def render: String = segments .map { case Query.Segment.Field(v) => v.asString @@ -58,6 +59,9 @@ object Query: case Text(value: String) object Segment: + given Monoid[Segment.Text] = + Monoid.instance(Text(""), _ ++ _) + extension (self: Segment.Text) def ++(other: Segment.Text): Segment.Text = if (other.value.isEmpty) self diff --git a/modules/search-query/src/main/scala/io/renku/search/query/parse/DateTimeParser.scala b/modules/search-query/src/main/scala/io/renku/search/query/parse/DateTimeParser.scala index bfad2ac8..d5701e87 100644 --- a/modules/search-query/src/main/scala/io/renku/search/query/parse/DateTimeParser.scala +++ b/modules/search-query/src/main/scala/io/renku/search/query/parse/DateTimeParser.scala @@ -23,8 +23,8 @@ import io.renku.search.query.{DateTimeCalc, DateTimeRef, PartialDateTime, Relati import java.time.* -/** Allows parsing partial date-time strings, filling missing parts with either lowest or - * highest possible values. +/** Allows parsing partial date-time strings, allowing to fill missing parts with either + * lowest or highest possible values. */ object DateTimeParser { @@ -105,5 +105,4 @@ object DateTimeParser { dateCalc.map(DateTimeRef.apply).backtrack | partialDateTime.map(DateTimeRef.apply) | relativeDate.map(DateTimeRef.apply) - } diff --git a/modules/search-query/src/main/scala/io/renku/search/query/parse/QueryUtil.scala b/modules/search-query/src/main/scala/io/renku/search/query/parse/QueryUtil.scala index 23f934e1..5e249720 100644 --- a/modules/search-query/src/main/scala/io/renku/search/query/parse/QueryUtil.scala +++ b/modules/search-query/src/main/scala/io/renku/search/query/parse/QueryUtil.scala @@ -18,6 +18,7 @@ package io.renku.search.query.parse +import cats.syntax.all.* import io.renku.search.query.Query import io.renku.search.query.Query.Segment @@ -36,11 +37,8 @@ private[query] object QueryUtil { in match case first :: rest => (first, curr) match - case (t1: Segment.Text, Some(tc)) => - loop(rest, Some(tc ++ t1), result) - - case (e: Segment.Text, None) => - loop(rest, Some(e), result) + case (t1: Segment.Text, tc) => + loop(rest, tc |+| Some(t1), result) case (f: Segment.Field, Some(tc)) => loop(rest, None, f :: tc :: result) diff --git a/modules/search-query/src/test/scala/io/renku/search/query/parse/QueryParserSpec.scala b/modules/search-query/src/test/scala/io/renku/search/query/parse/QueryParserSpec.scala index b024595b..2c263f4a 100644 --- a/modules/search-query/src/test/scala/io/renku/search/query/parse/QueryParserSpec.scala +++ b/modules/search-query/src/test/scala/io/renku/search/query/parse/QueryParserSpec.scala @@ -20,6 +20,7 @@ package io.renku.search.query.parse import cats.data.NonEmptyList as Nel import io.renku.search.query.* +import io.renku.search.query.Query.Segment import io.renku.search.query.Comparison.{GreaterThan, LowerThan} import munit.{FunSuite, ScalaCheckSuite} import org.scalacheck.Prop @@ -92,14 +93,20 @@ class QueryParserSpec extends ScalaCheckSuite with ParserSuite { ) } - test("example queries".ignore) { - val p = QueryParser.query - println(p.run("name:\"vQgCg mpZU4cCgF3N eVZUMkH7\",JHRt visibility:private WX59P")) + test("invalid field terms converted as text".ignore) { + assertEquals( + Query.parse("projectId:"), + Right(Query(Segment.Text("projectId:"))) + ) + assertEquals( + Query.parse("projectId1"), + Right(Query(Segment.Text("projectId1"))) + ) } property("generated queries") { Prop.forAll(QueryGenerators.query) { q => - val qStr = q.asString + val qStr = q.render val parsed = Query.parse(qStr).fold(sys.error, identity) if (q != parsed) { // this is for better error messages when things fail diff --git a/modules/search-solr-client/src/main/scala/io/renku/search/solr/client/QueryInterpreter.scala b/modules/search-solr-client/src/main/scala/io/renku/search/solr/client/QueryInterpreter.scala new file mode 100644 index 00000000..d35a5250 --- /dev/null +++ b/modules/search-solr-client/src/main/scala/io/renku/search/solr/client/QueryInterpreter.scala @@ -0,0 +1,63 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.renku.search.solr.client + +import io.renku.search.query.Query +import io.renku.search.query.Query.Segment +import io.renku.search.query.FieldTerm +import io.renku.search.solr.schema.EntityDocumentSchema.Fields +import org.apache.lucene.queryparser.flexible.standard.QueryParserUtil + +private object QueryInterpreter { + + def apply(query: Query): String = + query.segments + .map { + case Segment.Field(FieldTerm.ProjectIdIs(ids)) => + ids.toList + .map(escape) + .map(id => s"${Fields.id.name}:$id") + .mkString("(", " OR ", ")") + + case Segment.Field(FieldTerm.SlugIs(slugs)) => + slugs.toList + .map(escape) + .map(slug => s"${Fields.slug.name}:$slug") + .mkString("(", " OR ", ")") + + case Segment.Field(FieldTerm.NameIs(names)) => + names.toList + .map(escape) + .map(name => s"${Fields.name.name}:$name") + .mkString("(", " OR ", ")") + + case Segment.Text(txt) => + s"${Fields.contentAll.name}:${escape(txt)}" + + case _ => + "" + } + .mkString(" AND ") + + private def escape(s: String): String = + val escaped = QueryParserUtil.escape(s) + if (escaped.exists(_.isWhitespace)) s"($escaped)" + else escaped + +} diff --git a/modules/search-solr-client/src/main/scala/io/renku/search/solr/client/SearchSolrClient.scala b/modules/search-solr-client/src/main/scala/io/renku/search/solr/client/SearchSolrClient.scala index d5e7b632..fdf26ba3 100644 --- a/modules/search-solr-client/src/main/scala/io/renku/search/solr/client/SearchSolrClient.scala +++ b/modules/search-solr-client/src/main/scala/io/renku/search/solr/client/SearchSolrClient.scala @@ -22,12 +22,14 @@ import cats.effect.{Async, Resource} import fs2.io.net.Network import io.renku.search.solr.documents.Project import io.renku.solr.client.{SolrClient, SolrConfig} +import io.renku.search.query.Query trait SearchSolrClient[F[_]]: def insertProjects(projects: Seq[Project]): F[Unit] def findProjects(phrase: String): F[List[Project]] + def queryProjects(query: Query): F[List[Project]] object SearchSolrClient: def make[F[_]: Async: Network]( diff --git a/modules/search-solr-client/src/main/scala/io/renku/search/solr/client/SearchSolrClientImpl.scala b/modules/search-solr-client/src/main/scala/io/renku/search/solr/client/SearchSolrClientImpl.scala index a1b80aab..523dbb41 100644 --- a/modules/search-solr-client/src/main/scala/io/renku/search/solr/client/SearchSolrClientImpl.scala +++ b/modules/search-solr-client/src/main/scala/io/renku/search/solr/client/SearchSolrClientImpl.scala @@ -23,13 +23,23 @@ import cats.syntax.all.* import io.renku.search.solr.documents.Project import io.renku.search.solr.schema.EntityDocumentSchema import io.renku.solr.client.{QueryData, QueryString, SolrClient} +import io.renku.search.query.Query private class SearchSolrClientImpl[F[_]: Async](solrClient: SolrClient[F]) extends SearchSolrClient[F]: + private[this] val logger = scribe.cats.effect[F] + override def insertProjects(projects: Seq[Project]): F[Unit] = solrClient.insert(projects).void + override def queryProjects(query: Query): F[List[Project]] = + val solrQuery = QueryInterpreter(query) + logger.debug(s"Query: ${query.render} ->Solr: $solrQuery") >> + solrClient + .query[Project](QueryData.withChildren(QueryString(solrQuery))) + .map(_.responseBody.docs.toList) + override def findProjects(phrase: String): F[List[Project]] = solrClient .query[Project]( diff --git a/modules/search-solr-client/src/main/scala/io/renku/search/solr/documents/Project.scala b/modules/search-solr-client/src/main/scala/io/renku/search/solr/documents/Project.scala index 9f145a5d..e7fe6196 100644 --- a/modules/search-solr-client/src/main/scala/io/renku/search/solr/documents/Project.scala +++ b/modules/search-solr-client/src/main/scala/io/renku/search/solr/documents/Project.scala @@ -28,12 +28,12 @@ final case class Project( id: projects.Id, name: projects.Name, slug: projects.Slug, - repositories: Seq[projects.Repository], + repositories: Seq[projects.Repository] = Seq.empty, visibility: projects.Visibility, description: Option[projects.Description] = None, createdBy: User, creationDate: projects.CreationDate, - members: Seq[User] + members: Seq[User] = Seq.empty ) object Project: diff --git a/modules/search-solr-client/src/main/scala/io/renku/search/solr/schema/EntityDocumentSchema.scala b/modules/search-solr-client/src/main/scala/io/renku/search/solr/schema/EntityDocumentSchema.scala index ebce4682..6b6227b3 100644 --- a/modules/search-solr-client/src/main/scala/io/renku/search/solr/schema/EntityDocumentSchema.scala +++ b/modules/search-solr-client/src/main/scala/io/renku/search/solr/schema/EntityDocumentSchema.scala @@ -36,10 +36,14 @@ object EntityDocumentSchema: val nestPath: FieldName = FieldName("_nest_path_") val root: FieldName = FieldName("_root_") val nestParent: FieldName = FieldName("_nest_parent_") + // catch-all field + val contentAll: FieldName = FieldName("content_all") object FieldTypes: val string: FieldType = FieldType.str(TypeName("SearchString")).makeDocValue val text: FieldType = FieldType.text(TypeName("SearchText"), Analyzer.classic) + val textAll: FieldType = + FieldType.text(TypeName("SearchTextAll"), Analyzer.classic).makeMultiValued val dateTime: FieldType = FieldType.dateTimePoint(TypeName("SearchDateTime")) val initialEntityDocumentAdd: Seq[SchemaCommand] = Seq( @@ -57,3 +61,13 @@ object EntityDocumentSchema: SchemaCommand.Add(Field(Fields.members, FieldType.nestedPath).makeMultiValued), SchemaCommand.Add(Field(Fields.nestParent, FieldTypes.string)) ) + + val copyContentField: Seq[SchemaCommand] = Seq( + SchemaCommand.Add(FieldTypes.textAll), + SchemaCommand.Add(Field(Fields.contentAll, FieldTypes.textAll).makeMultiValued), + SchemaCommand.Add(CopyFieldRule(Fields.name, Fields.contentAll)), + SchemaCommand.Add(CopyFieldRule(Fields.description, Fields.contentAll)), + SchemaCommand.Add(CopyFieldRule(Fields.slug, Fields.contentAll)), + SchemaCommand.Add(CopyFieldRule(Fields.repositories, Fields.contentAll)), + SchemaCommand.Add(CopyFieldRule(Fields.visibility, Fields.contentAll)) + ) diff --git a/modules/search-solr-client/src/main/scala/io/renku/search/solr/schema/Migrations.scala b/modules/search-solr-client/src/main/scala/io/renku/search/solr/schema/Migrations.scala index 71d0984e..380ec44f 100644 --- a/modules/search-solr-client/src/main/scala/io/renku/search/solr/schema/Migrations.scala +++ b/modules/search-solr-client/src/main/scala/io/renku/search/solr/schema/Migrations.scala @@ -23,6 +23,7 @@ import io.renku.solr.client.migration.SchemaMigration object Migrations { val all: Seq[SchemaMigration] = Seq( - SchemaMigration(version = 1L, EntityDocumentSchema.initialEntityDocumentAdd) + SchemaMigration(version = 1L, EntityDocumentSchema.initialEntityDocumentAdd), + SchemaMigration(version = 2L, EntityDocumentSchema.copyContentField) ) } diff --git a/modules/solr-client/src/main/scala/io/renku/solr/client/schema/FieldType.scala b/modules/solr-client/src/main/scala/io/renku/solr/client/schema/FieldType.scala index 82fa0ea9..32b90001 100644 --- a/modules/solr-client/src/main/scala/io/renku/solr/client/schema/FieldType.scala +++ b/modules/solr-client/src/main/scala/io/renku/solr/client/schema/FieldType.scala @@ -31,6 +31,7 @@ final case class FieldType( sortMissingLast: Boolean = true ): lazy val makeDocValue: FieldType = copy(docValues = true) + lazy val makeMultiValued: FieldType = copy(multiValued = true) object FieldType: diff --git a/nix/dev-scripts.nix b/nix/dev-scripts.nix new file mode 100644 index 00000000..f4edf4af --- /dev/null +++ b/nix/dev-scripts.nix @@ -0,0 +1,67 @@ +{ + concatTextFile, + writeShellScriptBin, +}: rec { + redis-push = concatTextFile { + name = "redis-push"; + files = [./scripts/redis-push]; + executable = true; + destination = "/bin/redis-push"; + }; + + recreate-container = concatTextFile { + name = "recreate-container"; + files = [./scripts/recreate-container]; + executable = true; + destination = "/bin/recreate-container"; + }; + + start-container = writeShellScriptBin "start-container" '' + cnt=''${RS_CONTAINER:-rsdev} + sudo nixos-container start $cnt + ''; + + solr-create-core = writeShellScriptBin "solr-create-core" '' + core_name=''${1:-search-core-test} + sudo nixos-container run ''${RS_CONTAINER:-rsdev} -- su solr -c "solr create -c $core_name" + sudo nixos-container run ''${RS_CONTAINER:-rsdev} -- find /var/solr/data/$core_name/conf -type f -exec chmod 644 {} \; + ''; + + solr-delete-core = writeShellScriptBin "solr-delete-core" '' + core_name=''${1:-search-core-test} + sudo nixos-container run ''${RS_CONTAINER:-rsdev} -- su solr -c "solr delete -c $core_name" + ''; + + solr-recreate-core = writeShellScriptBin "solr-recreate-core" '' + ${solr-delete-core}/bin/solr-delete-core "$1" + ${solr-create-core}/bin/solr-create-core "$1" + ''; + + vm-build = writeShellScriptBin "vm-build" '' + nix build .#nixosConfigurations.dev-vm.config.system.build.vm + ''; + + vm-run = writeShellScriptBin "vm-run" '' + nix run .#nixosConfigurations.dev-vm.config.system.build.vm + ''; + + vm-ssh = writeShellScriptBin "vm-ssh" '' + ssh -p $VM_SSH_PORT root@localhost "$@" + ''; + + vm-solr-create-core = writeShellScriptBin "solr-create-core" '' + core_name=''${1:-search-core-test} + ssh -p $VM_SSH_PORT root@localhost "su solr -c \"solr create -c $core_name\"" + ssh -p $VM_SSH_PORT root@localhost "find /var/solr/data/$core_name/conf -type f -exec chmod 644 {} \;" + ''; + + vm-solr-delete-core = writeShellScriptBin "solr-delete-core" '' + core_name=''${1:-search-core-test} + ssh -p $VM_SSH_PORT root@localhost "su solr -c \"solr delete -c $core_name\"" + ''; + + vm-solr-recreate-core = writeShellScriptBin "solr-recreate-core" '' + ${vm-solr-delete-core}/bin/solr-delete-core "$1" + ${vm-solr-create-core}/bin/solr-create-core "$1" + ''; +} diff --git a/nix/dev-vm.nix b/nix/dev-vm.nix new file mode 100644 index 00000000..8a12a053 --- /dev/null +++ b/nix/dev-vm.nix @@ -0,0 +1,48 @@ +{ + modulesPath, + lib, + config, + ... +}: { + imports = [ + (modulesPath + "/virtualisation/qemu-vm.nix") + ./solr-module.nix + ./services.nix + ]; + + services.openssh = { + enable = true; + settings.PermitRootLogin = "yes"; + }; + + users.users.root = { + password = "root"; + }; + i18n = {defaultLocale = "de_DE.UTF-8";}; + console.keyMap = "de"; + + networking = { + hostName = "renku-search-testvm"; + }; + + virtualisation.memorySize = 4096; + + virtualisation.forwardPorts = [ + { + from = "host"; + host.port = 10022; + guest.port = 22; + } + { + from = "host"; + host.port = 18983; + guest.port = 8983; + } + { + from = "host"; + host.port = 16379; + guest.port = 6379; + } + ]; + documentation.enable = false; +} diff --git a/nix/scripts/recreate-container b/nix/scripts/recreate-container new file mode 100644 index 00000000..7b96c570 --- /dev/null +++ b/nix/scripts/recreate-container @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +cnt=''${RS_CONTAINER:-rsdev} +if nixos-container list | grep $cnt > /dev/null; then + echo "Destroying container $cnt" + sudo nixos-container destroy $cnt +fi +echo "Creating and starting container $cnt ..." +sudo nixos-container create $cnt --flake . +sudo nixos-container start $cnt diff --git a/nix/scripts/redis-push b/nix/scripts/redis-push new file mode 100644 index 00000000..400a88dc --- /dev/null +++ b/nix/scripts/redis-push @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +if [ -z "$1" ] || [ -z "$2" ]; then + echo "Provide a project id and name, please." + exit 1 +fi + +redis_host=${RS_REDIS_HOST:-rsdev} +redis_port=${RS_REDIS_PORT:-6379} + +header='{"source":"dev","type":"project.created","dataContentType":"application/avro+json","schemaVersion":"1","time":0,"requestId":"r1"}' +payload=$(jq --null-input --arg id "$1" --arg name "$2" --arg slug "$1/$2" '{"id":$id,"name":$name,"slug":$slug, "repositories":[],"visibility":"PUBLIC","description":{"string":"my project description"},"createdBy":"dev","creationDate":0,"members":[]}') + +redis-cli -h $redis_host -p $redis_port XADD events '*' header "$header" payload "$payload" diff --git a/nix/services.nix b/nix/services.nix new file mode 100644 index 00000000..22715e91 --- /dev/null +++ b/nix/services.nix @@ -0,0 +1,23 @@ +{ + lib, + config, + ... +}: { + services.solr = { + enable = true; + }; + + services.redis.servers.search = { + enable = true; + port = 6379; + bind = "0.0.0.0"; + openFirewall = true; + settings = { + "protected-mode" = "no"; + }; + }; + + networking = { + firewall.allowedTCPPorts = [8983]; + }; +} diff --git a/nix/solr-module.nix b/nix/solr-module.nix new file mode 100644 index 00000000..d5f72798 --- /dev/null +++ b/nix/solr-module.nix @@ -0,0 +1,85 @@ +{ + config, + lib, + pkgs, + ... +}: let + cfg = config.services.solr; +in { + ## interface + options = with lib; { + services.solr = { + enable = mkOption { + default = false; + description = "Whether to enable solr."; + }; + bindAddress = mkOption { + type = types.str; + default = "0.0.0.0"; + description = "The address to bind to"; + }; + port = mkOption { + type = types.int; + default = 8983; + description = "The port solr is listening on."; + }; + heap = mkOption { + type = types.int; + default = 2048; + description = "The heap setting in megabytes"; + }; + home-dir = mkOption { + type = types.str; + default = "/var/solr/data"; + description = "Home dir of solr, to store the data"; + }; + }; + }; + + ## implementation + config = lib.mkIf config.services.solr.enable { + # Create a user for solr + users.users.solr = { + isNormalUser = false; + isSystemUser = true; + group = "solr"; + useDefaultShell = true; + }; + users.groups = {solr = {};}; + + # to allow playing with the solr cli + environment.systemPackages = [pkgs.solr]; + + environment.etc = {solr = {source = "${pkgs.solr}/server/solr";};}; + + # Create directories for storage + systemd.tmpfiles.rules = [ + "d /var/solr 0755 solr solr - -" + "d /var/solr/data 0755 solr solr - -" + "d /var/solr/logs 0755 solr solr - -" + ]; + + systemd.services.solr = { + enable = true; + description = "Apache Solr"; + wantedBy = ["multi-user.target"]; + path = with pkgs; [solr lsof coreutils procps gawk]; + environment = { + SOLR_PORT = toString cfg.port; + SOLR_JETTY_HOST = cfg.bindAddress; + SOLR_HEAP = "${toString cfg.heap}m"; + SOLR_PID_DIR = "/var/solr"; + SOLR_HOME = "${cfg.home-dir}"; + SOLR_LOGS_DIR = "/var/solr/logs"; + }; + serviceConfig = { + ExecStart = "${pkgs.solr}/bin/solr start -f -Dsolr.modules=analysis-extras"; + ExecStop = "${pkgs.solr}/bin/solr stop"; + LimitNOFILE = "65000"; + LimitNPROC = "65000"; + User = "solr"; + Group = "solr"; + }; + }; + }; +} diff --git a/nix/solr.nix b/nix/solr.nix new file mode 100644 index 00000000..15ddc58e --- /dev/null +++ b/nix/solr.nix @@ -0,0 +1,40 @@ +{ + lib, + stdenv, + fetchurl, + makeWrapper, + jre, +}: +# https://discourse.nixos.org/t/solr-has-been-removed-what-are-my-options/33504/3 +stdenv.mkDerivation rec { + pname = "solr"; + version = "9.4.1"; + + src = fetchurl { + url = "mirror://apache/solr/${pname}/${version}/${pname}-${version}.tgz"; + sha256 = "sha256-QQFdemk/76S4pTf10Jgq2ujxPzgu3znJSjSX+bN4MlA="; + }; + + nativeBuildInputs = [makeWrapper]; + + installPhase = '' + mkdir -p $out + cp -r * $out/ + rm -rf $out/bin/init.d + rm $out/bin/postlogs + rm $out/bin/install_solr_service.sh + rm $out/bin/solr.in.sh + rm $out/bin/*.cmd + + wrapProgram $out/bin/solr --set JAVA_HOME "${jre}" + wrapProgram $out/bin/post --set JAVA_HOME "${jre}" + ''; + + meta = with lib; { + homepage = "https://lucene.apache.org/solr/"; + description = "Open source enterprise search platform from the Apache Lucene project"; + license = licenses.asl20; + latforms = platforms.all; + maintainers = with maintainers; []; + }; +} diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 64a4419e..58463492 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -15,6 +15,7 @@ object Dependencies { val ciris = "3.5.0" val fs2 = "3.9.4" val http4s = "0.23.25" + val luceneQueryParser = "9.9.1" val redis4Cats = "1.5.2" val scalacheckEffectMunit = "1.0.4" val scodec = "2.2.2" @@ -24,6 +25,10 @@ object Dependencies { val tapir = "1.9.10" } + val luceneQueryParser = Seq( + "org.apache.lucene" % "lucene-queryparser" % V.luceneQueryParser + ) + val catsScalaCheck = Seq( "io.chrisdavenport" %% "cats-scalacheck" % V.catsScalaCheck ) diff --git a/project/SearchQueryDocsPlugin.scala b/project/SearchQueryDocsPlugin.scala new file mode 100644 index 00000000..0c0659b0 --- /dev/null +++ b/project/SearchQueryDocsPlugin.scala @@ -0,0 +1,56 @@ +import sbt._ +import java.nio.file.Path + +object SearchQueryDocsPlugin extends AutoPlugin { + + object autoImport { + val Docs = config("docs") + + val docDirectory = settingKey[File]("The directory containing doc sources") + val outputDirectory = settingKey[File]("The directory to place processed files") + val makeManualFile = taskKey[Unit]("Generate doc file") + + } + import autoImport._ + + override def projectConfigurations: Seq[Configuration] = + Seq(Docs) + + override def projectSettings = + inConfig(Docs)(Defaults.configSettings) ++ Seq( + docDirectory := (Compile / Keys.baseDirectory).value / "docs", + outputDirectory := (Compile / Keys.resourceManaged).value / "query-manual", + Keys.libraryDependencies ++= Seq( + "org.scalameta" %% "mdoc" % "2.5.2" % Docs + ), + makeManualFile := Def.taskDyn { + val cp = (Compile / Keys.dependencyClasspath).value + val cpArg = cp.files.mkString(java.io.File.pathSeparator) + val in = docDirectory.value + val out = outputDirectory.value + IO.createDirectory(out) + + val options = List( + // "--verbose", + "--classpath", + cpArg, + "--in", + in, + "--out", + out + ).mkString(" ") + + (Docs / Keys.runMain).toTask(s" mdoc.SbtMain $options") + }.value, + Compile / Keys.resourceGenerators += Def.task { + val _ = makeManualFile.value + val out = outputDirectory.value + (out ** "*.md").get + }, + Keys.watchSources += Watched.WatchSource( + docDirectory.value, + FileFilter.globFilter("*.md"), + HiddenFileFilter + ) + ) +}