From 04489f92a5a44ca25ea59f06ec8aec1fc38dd2fd Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Fri, 19 Jan 2024 16:38:02 +0100 Subject: [PATCH] Codec for either and java enums The code generation plugin uses java enums. --- .../scala/io/renku/avro/codec/AvroIO.scala | 18 +++ .../io/renku/avro/codec/AvroReader.scala | 18 +++ .../io/renku/avro/codec/AvroWriter.scala | 18 +++ .../io/renku/avro/codec/ByteVectorInput.scala | 18 +++ .../avro/codec/decoders/EitherDecoders.scala | 51 ++++++++ .../codec/decoders/JavaEnumDecoders.scala | 33 +++++ .../codec/decoders/TypeGuardedDecoding.scala | 117 ++++++++++++++++++ .../io/renku/avro/codec/decoders/all.scala | 2 + .../avro/codec/encoders/EitherEncoders.scala | 39 ++++++ .../codec/encoders/JavaEnumEncoders.scala | 35 ++++++ .../io/renku/avro/codec/encoders/all.scala | 2 + .../io/renku/avro/codec/AvroReaderTest.scala | 18 +++ modules/messages/src/main/avro/messages.avdl | 6 + .../messages/SerializeDeserializeTest.scala | 19 ++- 14 files changed, 393 insertions(+), 1 deletion(-) create mode 100644 modules/avro-codec/src/main/scala/io/renku/avro/codec/decoders/EitherDecoders.scala create mode 100644 modules/avro-codec/src/main/scala/io/renku/avro/codec/decoders/JavaEnumDecoders.scala create mode 100644 modules/avro-codec/src/main/scala/io/renku/avro/codec/decoders/TypeGuardedDecoding.scala create mode 100644 modules/avro-codec/src/main/scala/io/renku/avro/codec/encoders/EitherEncoders.scala create mode 100644 modules/avro-codec/src/main/scala/io/renku/avro/codec/encoders/JavaEnumEncoders.scala diff --git a/modules/avro-codec/src/main/scala/io/renku/avro/codec/AvroIO.scala b/modules/avro-codec/src/main/scala/io/renku/avro/codec/AvroIO.scala index 85c8c264..17ffc392 100644 --- a/modules/avro-codec/src/main/scala/io/renku/avro/codec/AvroIO.scala +++ b/modules/avro-codec/src/main/scala/io/renku/avro/codec/AvroIO.scala @@ -1,3 +1,21 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package io.renku.avro.codec import org.apache.avro.Schema import scodec.bits.ByteVector diff --git a/modules/avro-codec/src/main/scala/io/renku/avro/codec/AvroReader.scala b/modules/avro-codec/src/main/scala/io/renku/avro/codec/AvroReader.scala index 97fe9a50..a7d2de0e 100644 --- a/modules/avro-codec/src/main/scala/io/renku/avro/codec/AvroReader.scala +++ b/modules/avro-codec/src/main/scala/io/renku/avro/codec/AvroReader.scala @@ -1,3 +1,21 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package io.renku.avro.codec import org.apache.avro.Schema diff --git a/modules/avro-codec/src/main/scala/io/renku/avro/codec/AvroWriter.scala b/modules/avro-codec/src/main/scala/io/renku/avro/codec/AvroWriter.scala index 7b4e5853..e1df9edc 100644 --- a/modules/avro-codec/src/main/scala/io/renku/avro/codec/AvroWriter.scala +++ b/modules/avro-codec/src/main/scala/io/renku/avro/codec/AvroWriter.scala @@ -1,3 +1,21 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package io.renku.avro.codec import org.apache.avro.Schema diff --git a/modules/avro-codec/src/main/scala/io/renku/avro/codec/ByteVectorInput.scala b/modules/avro-codec/src/main/scala/io/renku/avro/codec/ByteVectorInput.scala index f323bc77..9b26f426 100644 --- a/modules/avro-codec/src/main/scala/io/renku/avro/codec/ByteVectorInput.scala +++ b/modules/avro-codec/src/main/scala/io/renku/avro/codec/ByteVectorInput.scala @@ -1,3 +1,21 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package io.renku.avro.codec import org.apache.avro.file.SeekableInput diff --git a/modules/avro-codec/src/main/scala/io/renku/avro/codec/decoders/EitherDecoders.scala b/modules/avro-codec/src/main/scala/io/renku/avro/codec/decoders/EitherDecoders.scala new file mode 100644 index 00000000..4c15a184 --- /dev/null +++ b/modules/avro-codec/src/main/scala/io/renku/avro/codec/decoders/EitherDecoders.scala @@ -0,0 +1,51 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.renku.avro.codec.decoders + +import io.renku.avro.codec.{AvroCodecException, AvroDecoder} + +trait EitherDecoders { + + given [A, B](using + da: AvroDecoder[A], + db: AvroDecoder[B], + ta: TypeGuardedDecoding[A], + tb: TypeGuardedDecoding[B] + ): AvroDecoder[Either[A, B]] = + AvroDecoder.curried[Either[A, B]] { schema => + require(schema.isUnion) + require(schema.getTypes.size() == 2) + + val leftSchema = schema.getTypes.get(0) + val rightSchema = schema.getTypes.get(1) + + { value => + if (ta.guard(leftSchema).isDefinedAt(value)) Left(da.decode(schema)(value)) + else if (tb.guard(rightSchema).isDefinedAt(value)) + Right(db.decode(schema)(value)) + else { + val nameA = leftSchema.getFullName + val nameB = rightSchema.getFullName + throw AvroCodecException.decode( + s"Could not decode $value into Either[$nameA, $nameB]" + ) + } + } + } +} diff --git a/modules/avro-codec/src/main/scala/io/renku/avro/codec/decoders/JavaEnumDecoders.scala b/modules/avro-codec/src/main/scala/io/renku/avro/codec/decoders/JavaEnumDecoders.scala new file mode 100644 index 00000000..7f69344b --- /dev/null +++ b/modules/avro-codec/src/main/scala/io/renku/avro/codec/decoders/JavaEnumDecoders.scala @@ -0,0 +1,33 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.renku.avro.codec.decoders + +import io.renku.avro.codec.AvroDecoder +import org.apache.avro.generic.GenericEnumSymbol + +import scala.reflect.ClassTag + +trait JavaEnumDecoders { + + given [E <: Enum[E]](using ctag: ClassTag[E]): AvroDecoder[E] = AvroDecoder.basic { + case e: Enum[?] => e.asInstanceOf[E] + case e: GenericEnumSymbol[?] => + Enum.valueOf[E](ctag.runtimeClass.asInstanceOf[Class[E]], e.toString) + } +} diff --git a/modules/avro-codec/src/main/scala/io/renku/avro/codec/decoders/TypeGuardedDecoding.scala b/modules/avro-codec/src/main/scala/io/renku/avro/codec/decoders/TypeGuardedDecoding.scala new file mode 100644 index 00000000..4ebb25b3 --- /dev/null +++ b/modules/avro-codec/src/main/scala/io/renku/avro/codec/decoders/TypeGuardedDecoding.scala @@ -0,0 +1,117 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.renku.avro.codec.decoders + +import org.apache.avro.Schema +import org.apache.avro.generic.{GenericContainer, GenericFixed} +import org.apache.avro.util.Utf8 + +import java.nio.ByteBuffer +import java.util.UUID + +// Taken from https://github.com/sksamuel/avro4s + +trait TypeGuardedDecoding[T] extends Serializable { + def guard(schema: Schema): PartialFunction[Any, Boolean] +} + +object TypeGuardedDecoding { + + def apply[T](using g: TypeGuardedDecoding[T]): TypeGuardedDecoding[T] = g + + given TypeGuardedDecoding[String] = new TypeGuardedDecoding[String]: + override def guard(schema: Schema): PartialFunction[Any, Boolean] = { + case v: Utf8 => true + case v: String => true + } + + given TypeGuardedDecoding[Boolean] = new TypeGuardedDecoding[Boolean]: + override def guard(schema: Schema): PartialFunction[Any, Boolean] = { + case v: Boolean => true + } + + given TypeGuardedDecoding[Double] = new TypeGuardedDecoding[Double]: + override def guard(schema: Schema): PartialFunction[Any, Boolean] = { + case v: Double => true + case v: Float => true + } + + given TypeGuardedDecoding[Float] = new TypeGuardedDecoding[Float]: + override def guard(schema: Schema): PartialFunction[Any, Boolean] = { case v: Float => + true + } + + given TypeGuardedDecoding[Long] = new TypeGuardedDecoding[Long]: + override def guard(schema: Schema): PartialFunction[Any, Boolean] = { + case v: Long => true + case v: Int => true + case v: Short => true + case v: Byte => true + } + + given TypeGuardedDecoding[Int] = new TypeGuardedDecoding[Int]: + override def guard(schema: Schema): PartialFunction[Any, Boolean] = { case v: Int => + true + } + + given TypeGuardedDecoding[UUID] = new TypeGuardedDecoding[UUID]: + override def guard(schema: Schema): PartialFunction[Any, Boolean] = { + case v: Utf8 => true + case v: String => true + } + + given [T]: TypeGuardedDecoding[Map[String, T]] = + new TypeGuardedDecoding[Map[String, T]]: + override def guard(schema: Schema): PartialFunction[Any, Boolean] = { + case v: java.util.Map[_, _] => true + } + + given TypeGuardedDecoding[Array[Byte]] = new TypeGuardedDecoding[Array[Byte]]: + override def guard(schema: Schema): PartialFunction[Any, Boolean] = { + case v: ByteBuffer => true + case v: Array[Byte] => true + case v: GenericFixed => true + } + + given TypeGuardedDecoding[ByteBuffer] = new TypeGuardedDecoding[ByteBuffer]: + override def guard(schema: Schema): PartialFunction[Any, Boolean] = { + case v: ByteBuffer => true + case v: Array[Byte] => true + case v: GenericFixed => true + } + + given [T]: TypeGuardedDecoding[List[T]] = new TypeGuardedDecoding[List[T]]: + override def guard(schema: Schema): PartialFunction[Any, Boolean] = { + case v: Array[_] => true + case v: java.util.Collection[_] => true + case v: Iterable[_] => true + } + + given [T]: TypeGuardedDecoding[Seq[T]] = new TypeGuardedDecoding[Seq[T]]: + override def guard(schema: Schema): PartialFunction[Any, Boolean] = { + case v: Array[_] => true + case v: java.util.Collection[_] => true + case v: Iterable[_] => true + } + + given [T]: TypeGuardedDecoding[T] = new TypeGuardedDecoding[T]: + override def guard(schema: Schema): PartialFunction[Any, Boolean] = { + case v: GenericContainer if v.getSchema.getFullName == schema.getFullName => true + } +} diff --git a/modules/avro-codec/src/main/scala/io/renku/avro/codec/decoders/all.scala b/modules/avro-codec/src/main/scala/io/renku/avro/codec/decoders/all.scala index 4fefe95d..92937b5e 100644 --- a/modules/avro-codec/src/main/scala/io/renku/avro/codec/decoders/all.scala +++ b/modules/avro-codec/src/main/scala/io/renku/avro/codec/decoders/all.scala @@ -26,4 +26,6 @@ object all with OptionDecoders with CollectionDecoders with ByteArrayDecoders + with JavaEnumDecoders + with EitherDecoders with RecordDecoders diff --git a/modules/avro-codec/src/main/scala/io/renku/avro/codec/encoders/EitherEncoders.scala b/modules/avro-codec/src/main/scala/io/renku/avro/codec/encoders/EitherEncoders.scala new file mode 100644 index 00000000..5efc2950 --- /dev/null +++ b/modules/avro-codec/src/main/scala/io/renku/avro/codec/encoders/EitherEncoders.scala @@ -0,0 +1,39 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.renku.avro.codec.encoders + +import io.renku.avro.codec.AvroEncoder + +trait EitherEncoders { + + given [A, B](using ea: AvroEncoder[A], eb: AvroEncoder[B]): AvroEncoder[Either[A, B]] = + AvroEncoder.curried { schema => + require(schema.isUnion, s"Either must use a union schema. Got: ${schema.getType}") + require( + schema.getTypes.size() == 2, + s"Either must use a UNION of two types. Got: ${schema.getTypes}" + ) + + { + case Left(a) => ea.encode(schema.getTypes.get(0))(a) + case Right(b) => eb.encode(schema.getTypes.get(1))(b) + } + } + +} diff --git a/modules/avro-codec/src/main/scala/io/renku/avro/codec/encoders/JavaEnumEncoders.scala b/modules/avro-codec/src/main/scala/io/renku/avro/codec/encoders/JavaEnumEncoders.scala new file mode 100644 index 00000000..0bf7a087 --- /dev/null +++ b/modules/avro-codec/src/main/scala/io/renku/avro/codec/encoders/JavaEnumEncoders.scala @@ -0,0 +1,35 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.renku.avro.codec.encoders + +import io.renku.avro.codec.AvroEncoder +import org.apache.avro.Schema +import org.apache.avro.generic.GenericData + +trait JavaEnumEncoders { + + given [E <: Enum[E]]: AvroEncoder[E] = + AvroEncoder.curried { schema => e => + require( + schema.getType == Schema.Type.ENUM, + s"schema is not an enum: $schema (${schema.getType})" + ) + GenericData.get().createEnum(e.name(), schema) + } +} diff --git a/modules/avro-codec/src/main/scala/io/renku/avro/codec/encoders/all.scala b/modules/avro-codec/src/main/scala/io/renku/avro/codec/encoders/all.scala index 53b066ea..4db87c24 100644 --- a/modules/avro-codec/src/main/scala/io/renku/avro/codec/encoders/all.scala +++ b/modules/avro-codec/src/main/scala/io/renku/avro/codec/encoders/all.scala @@ -24,6 +24,8 @@ object all with BigDecimalEncoders with DateTimeEncoders with OptionEncoders + with EitherEncoders with CollectionEncoders with ByteArrayEncoders + with JavaEnumEncoders with RecordEncoders diff --git a/modules/avro-codec/src/test/scala/io/renku/avro/codec/AvroReaderTest.scala b/modules/avro-codec/src/test/scala/io/renku/avro/codec/AvroReaderTest.scala index 7bf0a0cc..1cba2f3e 100644 --- a/modules/avro-codec/src/test/scala/io/renku/avro/codec/AvroReaderTest.scala +++ b/modules/avro-codec/src/test/scala/io/renku/avro/codec/AvroReaderTest.scala @@ -1,3 +1,21 @@ +/* + * Copyright 2024 Swiss Data Science Center (SDSC) + * A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and + * Eidgenössische Technische Hochschule Zürich (ETHZ). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package io.renku.avro.codec import io.renku.avro.codec.decoders.all.given diff --git a/modules/messages/src/main/avro/messages.avdl b/modules/messages/src/main/avro/messages.avdl index 2d99bc70..89b5b11d 100644 --- a/modules/messages/src/main/avro/messages.avdl +++ b/modules/messages/src/main/avro/messages.avdl @@ -1,5 +1,8 @@ @namespace("io.renku.messages") protocol Messages { + enum Shapes { + SQUARE, TRIANGLE, CIRCLE + } /* An example record for a "project-created-event" */ record ProjectCreated { @@ -13,6 +16,8 @@ protocol Messages { record ProjectUpdated { string name; string @aliases(["oldDescription"]) description; + Shapes icon; + union { string, int } index; timestamp_ms updatedAt; } @@ -21,6 +26,7 @@ protocol Messages { timestamp_ms deletedAt; } + /* record ProjectMsg { union { ProjectCreated, ProjectUpdated, ProjectDeleted } message; } */ diff --git a/modules/messages/src/test/scala/io/renku/messages/SerializeDeserializeTest.scala b/modules/messages/src/test/scala/io/renku/messages/SerializeDeserializeTest.scala index e31d9e9b..7c2b9e57 100644 --- a/modules/messages/src/test/scala/io/renku/messages/SerializeDeserializeTest.scala +++ b/modules/messages/src/test/scala/io/renku/messages/SerializeDeserializeTest.scala @@ -28,7 +28,7 @@ import java.time.temporal.ChronoUnit class SerializeDeserializeTest extends FunSuite { - test("serialize and deserialize") { + test("serialize and deserialize ProjectCreated") { val data = ProjectCreated( "my-project", "a description for it", @@ -42,4 +42,21 @@ class SerializeDeserializeTest extends FunSuite { assertEquals(decoded, List(data)) } + + test("serialize and deserialize ProjectUpdated") { + val data1 = ProjectUpdated( + "my-project", + "a description for it", + Shapes.CIRCLE, + Right(42), + Instant.now().truncatedTo(ChronoUnit.MILLIS) + ) + val data2 = data1.copy(index = Left("fourtytwo")) + val avro = AvroIO(ProjectUpdated.SCHEMA$) + + val bytes = avro.write(Seq(data1, data2)) + val decoded = avro.read[ProjectUpdated](bytes) + + assertEquals(decoded, List(data1, data2)) + } }