From 17e371de2f708222babab9eae0c6c25879c593a0 Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Thu, 8 Feb 2024 17:27:08 +0100 Subject: [PATCH 1/6] WIP: sbt plugin retrieving the avro schemas --- build.sbt | 2 +- project/AvroCodeGen.scala | 7 ++- project/AvroSchemaDownload.scala | 85 ++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+), 3 deletions(-) create mode 100644 project/AvroSchemaDownload.scala diff --git a/build.sbt b/build.sbt index 3210cdfc..96c17dea 100644 --- a/build.sbt +++ b/build.sbt @@ -214,7 +214,7 @@ lazy val messages = project commons % "compile->compile;test->test", avroCodec % "compile->compile;test->test" ) - .enablePlugins(AvroCodeGen, AutomateHeaderPlugin) + .enablePlugins(AutomateHeaderPlugin, AvroSchemaDownload) .disablePlugins(DbTestPlugin) lazy val configValues = project diff --git a/project/AvroCodeGen.scala b/project/AvroCodeGen.scala index 35d8b874..3960b3cb 100644 --- a/project/AvroCodeGen.scala +++ b/project/AvroCodeGen.scala @@ -6,7 +6,7 @@ import sbtavrohugger.SbtAvrohugger.autoImport.* object AvroCodeGen extends AutoPlugin { override def requires = SbtAvrohugger - override def projectSettings = Seq( + def avroHuggerSettings = Seq( libraryDependencies ++= Dependencies.avro, Compile / avroScalaCustomTypes := { avrohugger.format.SpecificRecord.defaultTypes.copy( @@ -17,7 +17,10 @@ object AvroCodeGen extends AutoPlugin { avrohugger.format.SpecificRecord.defaultTypes.copy( record = avrohugger.types.ScalaCaseClassWithSchema ) - }, + } + ) + + override def projectSettings = avroHuggerSettings ++ Seq( Compile / sourceGenerators += (Compile / avroScalaGenerate).taskValue ) } diff --git a/project/AvroSchemaDownload.scala b/project/AvroSchemaDownload.scala new file mode 100644 index 00000000..a4ea9456 --- /dev/null +++ b/project/AvroSchemaDownload.scala @@ -0,0 +1,85 @@ +import sbt._ +import sbt.Keys._ +import com.github.sbt.git._ +import sbtavrohugger.SbtAvrohugger +import sbtavrohugger.SbtAvrohugger.autoImport.* +import java.io.File + +object AvroSchemaDownload extends AutoPlugin { + + override def requires = GitPlugin && SbtAvrohugger + + object autoImport { + val schemaRepository = settingKey[String]("The repository to download") + val schemaBranch = settingKey[Option[String]]("The branch to checkout") + val schemaTargetDirectory = settingKey[File]("The directory to download into") + val schemaDownloadRepository = taskKey[Seq[File]]("Download the repository") + val schemaClearDownload = taskKey[Unit]("Removes all downloaded files") + } + + import autoImport._ + + override def projectSettings = AvroCodeGen.avroHuggerSettings ++ Seq( + schemaRepository := "https://github.com/SwissDataScienceCenter/renku-search", + schemaBranch := None, + schemaTargetDirectory := (Compile / target).value / "renku-avro-schemas", + schemaClearDownload := { + val target = schemaTargetDirectory.value + IO.delete(target) + }, + schemaDownloadRepository := { + val logger = streams.value.log + val repo = schemaRepository.value + val branch = schemaBranch.value + val output = schemaTargetDirectory.value + synchronizeSchemaFiles(logger, repo, branch, output) + Seq(output) + }, + + Compile / avroSourceDirectories := Seq( + schemaTargetDirectory.value + ), + + Compile / sourceGenerators += Def.sequential( + schemaDownloadRepository, Compile / avroScalaGenerate + ).taskValue + ) + + def synchronizeSchemaFiles( + logger: Logger, + repo: String, + branch: Option[String], + target: File + ): Unit = + if (target.exists) pullRepository(logger, target, branch) + else cloneRepository(logger, repo, branch, target) + + + def pullRepository(logger: Logger, base: File, branch: Option[String]) = { + logger.info(s"Updating schema repository at $base") + val git = JGit(base) + val result = git.porcelain.pull().call() + if (!result.isSuccessful) { + val msg = s"The pull from ${git.remoteOrigin} failed!" + logger.error(msg) + sys.error(msg) + } + switchBranch(logger, git, branch) + } + + def cloneRepository(logger: Logger, repo: String, branch: Option[String], target: File): Unit = { + logger.info(s"Downloading repository $repo to $target") + val jgit = JGit.clone(repo, target) + switchBranch(logger, jgit, branch) + } + + //TODO the same for tags + def switchBranch(logger: Logger, git: JGit, branch: Option[String]) = + branch match { + case Some(b) if b != git.branch => + logger.info(s"Changing to branch $b") + git.checkoutBranch(b) + + case _ => () + } +} From 4c57639bb3fb0a6b2586811117cc0160b9d995fe Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Fri, 9 Feb 2024 09:02:54 +0100 Subject: [PATCH 2/6] Make it work with tags and commit shas --- project/AvroSchemaDownload.scala | 71 ++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/project/AvroSchemaDownload.scala b/project/AvroSchemaDownload.scala index a4ea9456..08edc832 100644 --- a/project/AvroSchemaDownload.scala +++ b/project/AvroSchemaDownload.scala @@ -4,6 +4,7 @@ import com.github.sbt.git._ import sbtavrohugger.SbtAvrohugger import sbtavrohugger.SbtAvrohugger.autoImport.* import java.io.File +import org.eclipse.jgit.api.ResetCommand.ResetType object AvroSchemaDownload extends AutoPlugin { @@ -11,7 +12,8 @@ object AvroSchemaDownload extends AutoPlugin { object autoImport { val schemaRepository = settingKey[String]("The repository to download") - val schemaBranch = settingKey[Option[String]]("The branch to checkout") + val schemaRef = + settingKey[Option[String]]("The branch, tag or commit sha to checkout") val schemaTargetDirectory = settingKey[File]("The directory to download into") val schemaDownloadRepository = taskKey[Seq[File]]("Download the repository") val schemaClearDownload = taskKey[Unit]("Removes all downloaded files") @@ -21,7 +23,7 @@ object AvroSchemaDownload extends AutoPlugin { override def projectSettings = AvroCodeGen.avroHuggerSettings ++ Seq( schemaRepository := "https://github.com/SwissDataScienceCenter/renku-search", - schemaBranch := None, + schemaRef := Some("v0.0.1"), schemaTargetDirectory := (Compile / target).value / "renku-avro-schemas", schemaClearDownload := { val target = schemaTargetDirectory.value @@ -30,55 +32,60 @@ object AvroSchemaDownload extends AutoPlugin { schemaDownloadRepository := { val logger = streams.value.log val repo = schemaRepository.value - val branch = schemaBranch.value + val refspec = schemaRef.value val output = schemaTargetDirectory.value - synchronizeSchemaFiles(logger, repo, branch, output) + synchronizeSchemaFiles(logger, repo, refspec, output) Seq(output) }, - Compile / avroSourceDirectories := Seq( schemaTargetDirectory.value ), - - Compile / sourceGenerators += Def.sequential( - schemaDownloadRepository, Compile / avroScalaGenerate - ).taskValue + Compile / sourceGenerators += Def + .sequential( + schemaDownloadRepository, + Compile / avroScalaGenerate + ) + .taskValue ) def synchronizeSchemaFiles( - logger: Logger, - repo: String, - branch: Option[String], - target: File + logger: Logger, + repo: String, + refspec: Option[String], + target: File ): Unit = - if (target.exists) pullRepository(logger, target, branch) - else cloneRepository(logger, repo, branch, target) + if (target.exists) updateRepository(logger, target, refspec) + else cloneRepository(logger, repo, refspec, target) - - def pullRepository(logger: Logger, base: File, branch: Option[String]) = { + def updateRepository(logger: Logger, base: File, refspec: Option[String]) = { logger.info(s"Updating schema repository at $base") val git = JGit(base) - val result = git.porcelain.pull().call() - if (!result.isSuccessful) { - val msg = s"The pull from ${git.remoteOrigin} failed!" - logger.error(msg) - sys.error(msg) - } - switchBranch(logger, git, branch) + git.porcelain.fetch().call() + switchBranch(logger, git, refspec) } - def cloneRepository(logger: Logger, repo: String, branch: Option[String], target: File): Unit = { + def cloneRepository( + logger: Logger, + repo: String, + refspec: Option[String], + target: File + ): Unit = { logger.info(s"Downloading repository $repo to $target") val jgit = JGit.clone(repo, target) - switchBranch(logger, jgit, branch) + switchBranch(logger, jgit, refspec) } - //TODO the same for tags - def switchBranch(logger: Logger, git: JGit, branch: Option[String]) = - branch match { - case Some(b) if b != git.branch => - logger.info(s"Changing to branch $b") - git.checkoutBranch(b) + def switchBranch(logger: Logger, git: JGit, refspec: Option[String]) = + refspec match { + case Some(ref) + if ref != git.branch && !git.currentTags.contains(ref) && !git.headCommitSha + .contains(ref) => + logger.info(s"Changing to $ref") + val cmd = git.porcelain.reset() + cmd.setMode(ResetType.HARD) + cmd.setRef(ref) + val res = cmd.call() + logger.info(s"Repository now on $res") case _ => () } From 8bdc6650a6323edd8a3ba0e56181a18f0d08679c Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Fri, 9 Feb 2024 13:46:58 +0100 Subject: [PATCH 3/6] Add package name despite not supported by the plugin --- project/AvroSchemaDownload.scala | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/project/AvroSchemaDownload.scala b/project/AvroSchemaDownload.scala index 08edc832..328d377a 100644 --- a/project/AvroSchemaDownload.scala +++ b/project/AvroSchemaDownload.scala @@ -22,8 +22,8 @@ object AvroSchemaDownload extends AutoPlugin { import autoImport._ override def projectSettings = AvroCodeGen.avroHuggerSettings ++ Seq( - schemaRepository := "https://github.com/SwissDataScienceCenter/renku-search", - schemaRef := Some("v0.0.1"), + schemaRepository := "https://github.com/SwissDataScienceCenter/renku-schema", + schemaRef := Some("main"), schemaTargetDirectory := (Compile / target).value / "renku-avro-schemas", schemaClearDownload := { val target = schemaTargetDirectory.value @@ -37,13 +37,22 @@ object AvroSchemaDownload extends AutoPlugin { synchronizeSchemaFiles(logger, repo, refspec, output) Seq(output) }, + Compile / avroScalaCustomNamespace := Map("*" -> "io.renku.messages"), + Compile / avroScalaSpecificCustomNamespace := Map("*" -> "io.renku.messages"), Compile / avroSourceDirectories := Seq( schemaTargetDirectory.value ), Compile / sourceGenerators += Def .sequential( schemaDownloadRepository, - Compile / avroScalaGenerate + Compile / avroScalaGenerate, + Def.task { + val out = (Compile / avroScalaSource).value + val pkg = "io.renku.messages" + val logger = streams.value.log + evilHackAddPackage(logger, out, pkg) + Seq.empty[File] + } ) .taskValue ) @@ -89,4 +98,20 @@ object AvroSchemaDownload extends AutoPlugin { case _ => () } + + def evilHackAddPackage(logger: Logger, dir: File, pkg: String): Unit = { + val pkgLine = s"package $pkg" + + def prependPackage(file: File) = { + val content = IO.read(file) + if (!content.startsWith(pkgLine)) { + logger.info(s"Add package to: $file") + IO.write(file, s"$pkgLine;\n\n") // scala & java ... + IO.append(file, content) + } + } + + (dir ** "*.scala").get().foreach(prependPackage) + (dir ** "*.java").get().foreach(prependPackage) + } } From a7826667fd8da66f5c79fe5486490b5ff7151a08 Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Fri, 9 Feb 2024 15:43:29 +0100 Subject: [PATCH 4/6] Fix ordering of avsc files to make it work with referencing typedefs --- project/AvroSchemaDownload.scala | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/project/AvroSchemaDownload.scala b/project/AvroSchemaDownload.scala index 328d377a..e6ae3ace 100644 --- a/project/AvroSchemaDownload.scala +++ b/project/AvroSchemaDownload.scala @@ -5,6 +5,7 @@ import sbtavrohugger.SbtAvrohugger import sbtavrohugger.SbtAvrohugger.autoImport.* import java.io.File import org.eclipse.jgit.api.ResetCommand.ResetType +import avrohugger.filesorter.AvscFileSorter object AvroSchemaDownload extends AutoPlugin { @@ -17,11 +18,14 @@ object AvroSchemaDownload extends AutoPlugin { val schemaTargetDirectory = settingKey[File]("The directory to download into") val schemaDownloadRepository = taskKey[Seq[File]]("Download the repository") val schemaClearDownload = taskKey[Unit]("Removes all downloaded files") + val schemaEnableDownload = + settingKey[Boolean]("Whether to enable downloading schema repository") } import autoImport._ override def projectSettings = AvroCodeGen.avroHuggerSettings ++ Seq( + schemaEnableDownload := true, schemaRepository := "https://github.com/SwissDataScienceCenter/renku-schema", schemaRef := Some("main"), schemaTargetDirectory := (Compile / target).value / "renku-avro-schemas", @@ -34,14 +38,19 @@ object AvroSchemaDownload extends AutoPlugin { val repo = schemaRepository.value val refspec = schemaRef.value val output = schemaTargetDirectory.value - synchronizeSchemaFiles(logger, repo, refspec, output) + val enabled = schemaEnableDownload.value + if (enabled) { + synchronizeSchemaFiles(logger, repo, refspec, output) + } else { + logger.info("Downloading avro schema files is disabled.") + } Seq(output) }, - Compile / avroScalaCustomNamespace := Map("*" -> "io.renku.messages"), - Compile / avroScalaSpecificCustomNamespace := Map("*" -> "io.renku.messages"), - Compile / avroSourceDirectories := Seq( - schemaTargetDirectory.value - ), + Compile / avroSourceDirectories := + AvscFileSorter.sortSchemaFiles( + // need to do this weird ordering so dependend files are read in first + (schemaTargetDirectory.value ** "*.avsc").get.reverse + ), Compile / sourceGenerators += Def .sequential( schemaDownloadRepository, @@ -51,7 +60,7 @@ object AvroSchemaDownload extends AutoPlugin { val pkg = "io.renku.messages" val logger = streams.value.log evilHackAddPackage(logger, out, pkg) - Seq.empty[File] + IO.listFiles(out).toSeq } ) .taskValue @@ -104,7 +113,7 @@ object AvroSchemaDownload extends AutoPlugin { def prependPackage(file: File) = { val content = IO.read(file) - if (!content.startsWith(pkgLine)) { + if (!content.startsWith("package ")) { logger.info(s"Add package to: $file") IO.write(file, s"$pkgLine;\n\n") // scala & java ... IO.append(file, content) From be3ea9903101684d7e41c6786e82e87cefdaa1d9 Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Fri, 9 Feb 2024 16:22:02 +0100 Subject: [PATCH 5/6] Trivially fixing compile errors due to new schema And remove obsolete message.avdl --- modules/messages/src/main/avro/messages.avdl | 31 ------------------- .../messages/SerializeDeserializeTest.scala | 27 +++++----------- .../search/provision/SearchProvisioner.scala | 4 ++- .../provision/SearchProvisionerSpec.scala | 16 ++++++++-- 4 files changed, 23 insertions(+), 55 deletions(-) delete mode 100644 modules/messages/src/main/avro/messages.avdl diff --git a/modules/messages/src/main/avro/messages.avdl b/modules/messages/src/main/avro/messages.avdl deleted file mode 100644 index 04885b52..00000000 --- a/modules/messages/src/main/avro/messages.avdl +++ /dev/null @@ -1,31 +0,0 @@ -@namespace("io.renku.messages") -protocol Messages { - enum Shapes { - SQUARE, TRIANGLE, CIRCLE - } - - /* An example record for a "project-created-event" */ - record ProjectCreated { - string id; - string name; - string description; - string? owner; - timestamp_ms createdAt; - } - - /* A project got updated */ - record ProjectUpdated { - string id; - string name; - string @aliases(["oldDescription"]) description; - Shapes icon; - union { string, int } index; - timestamp_ms updatedAt; - } - - record ProjectDeleted { - string id; - string name; - timestamp_ms deletedAt; - } -} \ No newline at end of file diff --git a/modules/messages/src/test/scala/io/renku/messages/SerializeDeserializeTest.scala b/modules/messages/src/test/scala/io/renku/messages/SerializeDeserializeTest.scala index aabca478..431cd96e 100644 --- a/modules/messages/src/test/scala/io/renku/messages/SerializeDeserializeTest.scala +++ b/modules/messages/src/test/scala/io/renku/messages/SerializeDeserializeTest.scala @@ -33,9 +33,13 @@ class SerializeDeserializeTest extends FunSuite { val data = ProjectCreated( UUID.randomUUID().toString, "my-project", - "a description for it", - None, - Instant.now().truncatedTo(ChronoUnit.MILLIS) + "slug", + Seq.empty, + Visibility.PUBLIC, + Some("a description for it"), + "created-by-me", + Instant.now().truncatedTo(ChronoUnit.MILLIS), + Seq.empty ) val avro = AvroIO(ProjectCreated.SCHEMA$) @@ -45,21 +49,4 @@ class SerializeDeserializeTest extends FunSuite { assertEquals(decoded, List(data)) } - test("serialize and deserialize ProjectUpdated") { - val data1 = ProjectUpdated( - UUID.randomUUID().toString, - "my-project", - "a description for it", - Shapes.CIRCLE, - Right(42), - Instant.now().truncatedTo(ChronoUnit.MILLIS) - ) - val data2 = data1.copy(index = Left("fourtytwo")) - val avro = AvroIO(ProjectUpdated.SCHEMA$) - - val bytes = avro.write(Seq(data1, data2)) - val decoded = avro.read[ProjectUpdated](bytes) - - assertEquals(decoded, List(data1, data2)) - } } diff --git a/modules/search-provision/src/main/scala/io/renku/search/provision/SearchProvisioner.scala b/modules/search-provision/src/main/scala/io/renku/search/provision/SearchProvisioner.scala index 1a6d2c9b..a3a012fc 100644 --- a/modules/search-provision/src/main/scala/io/renku/search/provision/SearchProvisioner.scala +++ b/modules/search-provision/src/main/scala/io/renku/search/provision/SearchProvisioner.scala @@ -102,7 +102,9 @@ private class SearchProvisionerImpl[F[_]: Async]( } private lazy val toSolrDocuments: Seq[ProjectCreated] => Seq[Project] = - _.map(pc => Project(id = pc.id, name = pc.name, description = pc.description)) + _.map(pc => + Project(id = pc.id, name = pc.name, description = pc.description.getOrElse("")) + ) private def markProcessedOnFailure( message: Message diff --git a/modules/search-provision/src/test/scala/io/renku/search/provision/SearchProvisionerSpec.scala b/modules/search-provision/src/test/scala/io/renku/search/provision/SearchProvisionerSpec.scala index 91b64e36..2f7dd501 100644 --- a/modules/search-provision/src/test/scala/io/renku/search/provision/SearchProvisionerSpec.scala +++ b/modules/search-provision/src/test/scala/io/renku/search/provision/SearchProvisionerSpec.scala @@ -24,7 +24,7 @@ import fs2.Stream import fs2.concurrent.SignallingRef import io.renku.avro.codec.AvroIO import io.renku.avro.codec.encoders.all.given -import io.renku.messages.ProjectCreated +import io.renku.messages.{ProjectCreated, Visibility} import io.renku.queue.client.Encoding import io.renku.redis.client.RedisClientGenerators import io.renku.redis.client.RedisClientGenerators.* @@ -131,10 +131,20 @@ class SearchProvisionerSpec extends CatsEffectSuite with RedisSpec with SearchSo name = s"$prefix-${generateString(max = 5).sample.get}" desc = s"$prefix ${generateString(max = 10).sample.get}" ownerGen = generateString(max = 5).map(prefix + _) - yield ProjectCreated(uuid.toString, name, desc, Gen.option(ownerGen).sample.get, now) + yield ProjectCreated( + uuid.toString, + name, + "slug", + Seq.empty, + Visibility.PUBLIC, + Some(desc), + ownerGen.sample.get, + now, + Seq.empty + ) private def toSolrDocument(created: ProjectCreated): Project = - Project(created.id, created.name, created.description) + Project(created.id, created.name, created.description.getOrElse("")) override def munitFixtures: Seq[Fixture[_]] = List(withRedisClient, withSearchSolrClient) From 38980259ef35fa1dc41131c1563ca62ccc2fb56e Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Fri, 9 Feb 2024 16:43:16 +0100 Subject: [PATCH 6/6] Fix ci task --- build.sbt | 2 +- project/AvroSchemaDownload.scala | 17 ++++++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/build.sbt b/build.sbt index 96c17dea..1469b243 100644 --- a/build.sbt +++ b/build.sbt @@ -27,7 +27,7 @@ releaseVersionBump := sbtrelease.Version.Bump.Minor releaseIgnoreUntrackedFiles := true releaseTagName := (ThisBuild / version).value -addCommandAlias("ci", "; lint; dbTests; publishLocal") +addCommandAlias("ci", "; lint; compile; Test/compile; dbTests; publishLocal") addCommandAlias( "lint", "; scalafmtSbtCheck; scalafmtCheckAll;" // Compile/scalafix --check; Test/scalafix --check diff --git a/project/AvroSchemaDownload.scala b/project/AvroSchemaDownload.scala index e6ae3ace..285cdb65 100644 --- a/project/AvroSchemaDownload.scala +++ b/project/AvroSchemaDownload.scala @@ -47,9 +47,12 @@ object AvroSchemaDownload extends AutoPlugin { Seq(output) }, Compile / avroSourceDirectories := - AvscFileSorter.sortSchemaFiles( - // need to do this weird ordering so dependend files are read in first - (schemaTargetDirectory.value ** "*.avsc").get.reverse + // need to do this custom correct ordering of inputs so the files are + // evaluated in correct "dependency order" (unfortunately, the plugin doesn't sort it out) + // must be constant values, because settingKeys are evaluated at project load time + Seq( + schemaTargetDirectory.value / "common", + schemaTargetDirectory.value / "project" ), Compile / sourceGenerators += Def .sequential( @@ -60,7 +63,6 @@ object AvroSchemaDownload extends AutoPlugin { val pkg = "io.renku.messages" val logger = streams.value.log evilHackAddPackage(logger, out, pkg) - IO.listFiles(out).toSeq } ) .taskValue @@ -108,7 +110,7 @@ object AvroSchemaDownload extends AutoPlugin { case _ => () } - def evilHackAddPackage(logger: Logger, dir: File, pkg: String): Unit = { + def evilHackAddPackage(logger: Logger, dir: File, pkg: String): Seq[File] = { val pkgLine = s"package $pkg" def prependPackage(file: File) = { @@ -118,9 +120,10 @@ object AvroSchemaDownload extends AutoPlugin { IO.write(file, s"$pkgLine;\n\n") // scala & java ... IO.append(file, content) } + file } - (dir ** "*.scala").get().foreach(prependPackage) - (dir ** "*.java").get().foreach(prependPackage) + (dir ** "*.scala").get().map(prependPackage) ++ + (dir ** "*.java").get().map(prependPackage) } }