Skip to content

Commit

Permalink
ability to ignore property casing on deserialization (#536)
Browse files Browse the repository at this point in the history
  • Loading branch information
aloneguid committed Oct 2, 2024
1 parent 202f421 commit cf2730f
Show file tree
Hide file tree
Showing 21 changed files with 356 additions and 78 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

env:
VERSION: 5.0.0
PACKAGE_SUFFIX: '-pre.2'
PACKAGE_SUFFIX: '-pre.3'
# PACKAGE_SUFFIX: ''
ASM_VERSION: 5.0.0
DOC_INSTANCE: wrs/pq
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
.idea/
*.iml
*.user
.vs/
bin/
obj/
Expand Down
7 changes: 6 additions & 1 deletion docs/release-history.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@

### Breaking changes

- This is the first version without old Table/Row API, which is now completely removed. This has been one of the major headaches and source of bugs since being introduced in the very first version of this library. If you need a similar functionality, consider [untyped serializer](https://aloneguid.github.io/parquet-dotnet/untyped-serializer.html).
- This is the first version without old Table/Row API, which is now completely removed. This has been one of the major headaches and source of bugs since being introduced in the very first version of this library. If you need a similar functionality, consider [untyped serializer](https://aloneguid.github.io/parquet-dotnet/untyped-serializer.html) which should be stable enough (Floor utility relies on this exclusively for quite some time).
- `ParquetSerializer`'s `SerializeAsync` was accepting `ParquetSerializerOptions` but `DeserializeAsync` was accepting `ParquetOptions`. This is now aligned for consistency so they both use `ParquetSerializerOptions`.

### New features

- todo: case insensitive

### Improvements

Expand Down
3 changes: 2 additions & 1 deletion src/Parquet.Floor/Controllers/ParquetToCsvConverter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using System.Threading.Tasks;
using CsvHelper;
using Parquet.Schema;
using Parquet.Serialization;
using Parquet.Utils;

namespace Parquet.Floor.Controllers {
Expand All @@ -15,7 +16,7 @@ class ParquetToCsvConverter : FlatTableConverter {

private readonly StreamWriter _sw;
private readonly CsvWriter _csv;
public ParquetToCsvConverter(Stream parquetStream, string csvFilePath, ParquetOptions? options = null)
public ParquetToCsvConverter(Stream parquetStream, string csvFilePath, ParquetSerializerOptions? options = null)
: base(parquetStream, options) {
_sw = new StreamWriter(csvFilePath);
_csv = new CsvWriter(_sw, CultureInfo.InvariantCulture);
Expand Down
9 changes: 0 additions & 9 deletions src/Parquet.Floor/Parquet.Floor.csproj.user

This file was deleted.

6 changes: 4 additions & 2 deletions src/Parquet.Floor/ViewModels/DataViewModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,10 @@ public async Task InitReaderAsync(FileViewModel? file, Stream fileStream) {
try {
ParquetSerializer.UntypedResult fd = await ParquetSerializer.DeserializeAsync(
fileStream,
new ParquetOptions {
TreatByteArrayAsString = true
new ParquetSerializerOptions {
ParquetOptions = new ParquetOptions {
TreatByteArrayAsString = true
}
});
data = fd.Data;
} catch(Exception ex) {
Expand Down
2 changes: 1 addition & 1 deletion src/Parquet.Test/Parquet.Test.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<PropertyGroup>
<IsPackable>false</IsPackable>
<LangVersion>latest</LangVersion>
<TargetFrameworks>net6.0;net8.0</TargetFrameworks>
<TargetFrameworks>net8.0;net6.0</TargetFrameworks>

<SignAssembly>true</SignAssembly>
<AssemblyOriginatorKeyFile>../fake.snk</AssemblyOriginatorKeyFile>
Expand Down
126 changes: 126 additions & 0 deletions src/Parquet.Test/Schema/SchemaTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -367,5 +367,131 @@ public void Decode_list_legacy_no_mid_group() {
Assert.Fail("list expected");
}
}


[Fact]
public void Augment_changes_name_and_path() {
var typeSchema = new ParquetSchema(
new DataField<int>("id"),
new DataField<string>("name"));

var fileSchema = new ParquetSchema(
new DataField<int>("id"),
new DataField<string>("Name"));

Assert.Equal("name", typeSchema[1].Name);
Assert.Equal("name", typeSchema[1].ClrPropName);
Assert.Equal("name", typeSchema[1].Path.ToString());
typeSchema.Augment(fileSchema);
Assert.Equal("Name", typeSchema[1].Name);
Assert.Equal("name", typeSchema[1].ClrPropName); // but should not change ClrPropName
Assert.Equal("Name", typeSchema[1].Path.ToString());
}

[Fact]
public void Augment_changes_struct_member_name_and_path() {
var typeSchema = new ParquetSchema(
new StructField("s",
new DataField<int>("id"),
new DataField<string>("name")));

var fileSchema = new ParquetSchema(
new StructField("s",
new DataField<int>("id"),
new DataField<string>("Name")));

Assert.Equal("name", typeSchema[0].Children[1].Name);
Assert.Equal("s/name", typeSchema[0].Children[1].Path.ToString());
typeSchema.Augment(fileSchema);
Assert.Equal("Name", typeSchema[0].Children[1].Name);
Assert.Equal("s/Name", typeSchema[0].Children[1].Path.ToString());
}

[Fact]
public void Augment_changes_list_member_name_and_path() {
var typeSchema = new ParquetSchema(
new ListField("l",
new DataField<int>("id")));

var fileSchema1 = new ParquetSchema(
new ListField("l",
new DataField<int>("Id")));

// pre-checks
ListField lf = (ListField)typeSchema[0];
Assert.Equal("l", lf.Name);
Assert.Equal("id", lf.Item.Name);
Assert.Equal("l/list", lf.Path.ToString());
Assert.Equal("l/list/id", lf.Item.Path.ToString());

// augment
typeSchema.Augment(fileSchema1);

// post-checks
Assert.Equal("l", lf.Name);
Assert.Equal("Id", lf.Item.Name);
Assert.Equal("l/list", lf.Path.ToString());
Assert.Equal("l/list/Id", lf.Item.Path.ToString());
}

[Fact]
public void Augment_changes_list_container_and_item_name_and_path() {
var typeSchema = new ParquetSchema(
new ListField("l",
new DataField<int>("id")));

var fileSchema1 = new ParquetSchema(
new ListField("L",
new DataField<int>("Id")));

// pre-checks
ListField lf = (ListField)typeSchema[0];
Assert.Equal("l", lf.Name);
Assert.Equal("id", lf.Item.Name);
Assert.Equal("l/list", lf.Path.ToString());
Assert.Equal("l/list/id", lf.Item.Path.ToString());

// augment
typeSchema.Augment(fileSchema1);

// post-checks
Assert.Equal("L", lf.Name);
Assert.Equal("Id", lf.Item.Name);
Assert.Equal("L/list", lf.Path.ToString());
Assert.Equal("L/list/Id", lf.Item.Path.ToString());
}

[Fact]
public void Augment_changes_map_key_and_value_name_and_path() {
var typeSchema = new ParquetSchema(
new MapField("m",
new DataField<int>("key"),
new DataField<string>("value")));

var fileSchema = new ParquetSchema(
new MapField("m",
new DataField<int>("Key"),
new DataField<string>("Value")));

// pre-checks
MapField mf = (MapField)typeSchema[0];
Assert.Equal("m", mf.Name);
Assert.Equal("key", mf.Key.Name);
Assert.Equal("value", mf.Value.Name);
Assert.Equal("m/key_value", mf.Path.ToString());
Assert.Equal("m/key_value/key", mf.Key.Path.ToString());
Assert.Equal("m/key_value/value", mf.Value.Path.ToString());

// augment
typeSchema.Augment(fileSchema);

// post-checks
Assert.Equal("m", mf.Name);
Assert.Equal("Key", mf.Key.Name);
Assert.Equal("Value", mf.Value.Name);
Assert.Equal("m/key_value", mf.Path.ToString());
Assert.Equal("m/key_value/Key", mf.Key.Path.ToString());
Assert.Equal("m/key_value/Value", mf.Value.Path.ToString());
}
}
}
45 changes: 45 additions & 0 deletions src/Parquet.Test/Serialisation/ParquetSerializerTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -938,6 +938,51 @@ public async Task String_RequiredIntoOptional_Serde() {
Assert.Equivalent(dataRequired, dataOptional);
}*/

class BeforeRename {
public string? lowerCase { get; set; }
}

class AfterRename {
public string? LowerCase { get; set; }
}

[Fact]
public async Task RenameProperty_Serde() {
var data = Enumerable.Range(0, 1_000).Select(i => new BeforeRename {
lowerCase = i % 2 == 0 ? "on" : "off"
}).ToList();

// serialise to memory stream
using var ms = new MemoryStream();
await ParquetSerializer.SerializeAsync(data, ms);
ms.Position = 0;

// deserialize from memory stream, but use the new class
IList<AfterRename> data2 = await ParquetSerializer.DeserializeAsync<AfterRename>(ms);

// because property names are case sensitive, we should have nulls in the new class
Assert.True(data2.All(d => d.LowerCase == null));
}

[Fact]
public async Task RenameProperty_CaseInsensitive_Serde() {
var data = Enumerable.Range(0, 1_000).Select(i => new BeforeRename {
lowerCase = i % 2 == 0 ? "on" : "off"
}).ToList();

// serialise to memory stream
using var ms = new MemoryStream();
await ParquetSerializer.SerializeAsync(data, ms);
ms.Position = 0;

// deserialize from memory stream, but use the new class
IList<AfterRename> data2 = await ParquetSerializer.DeserializeAsync<AfterRename>(ms,
new ParquetSerializerOptions { PropertyNameCaseInsensitive = true });

// because property names are case sensitive, we should have nulls in the new class
Assert.True(data2.All(d => d.LowerCase == "on" || d.LowerCase == "off"));
}

#if NET6_0_OR_GREATER

record RecordContainingDateAndtimeOnly {
Expand Down
17 changes: 0 additions & 17 deletions src/Parquet.Test/Serialisation/RichColumn.cs

This file was deleted.

2 changes: 1 addition & 1 deletion src/Parquet/Parquet.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

<PropertyGroup>
<TargetFrameworks Condition="'$(LATEST_NET_ONLY)' != ''">net8.0</TargetFrameworks>
<TargetFrameworks Condition="'$(LATEST_NET_ONLY)' == ''">netstandard2.0;netstandard2.1;net6.0;net8.0</TargetFrameworks>
<TargetFrameworks Condition="'$(LATEST_NET_ONLY)' == ''">net8.0;net6.0;netstandard2.1;netstandard2.0</TargetFrameworks>
<Company></Company>
<PackageId>Parquet.Net</PackageId>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
Expand Down
10 changes: 9 additions & 1 deletion src/Parquet/Schema/DataField.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ namespace Parquet.Schema {
/// <summary>
/// Field containing actual data, unlike fields containing metadata.
/// </summary>
public class DataField : Field {
public class DataField : Field, ICloneable {

private bool _isNullable;
private bool _isArray;
Expand Down Expand Up @@ -187,6 +187,14 @@ private static void Discover(Type t, out Type baseType, out bool isArray, out bo
}
}

/// <summary>
/// Simple memberwise clone
/// </summary>
/// <returns></returns>
public object Clone() {
return MemberwiseClone();
}

#endregion
}
}
10 changes: 10 additions & 0 deletions src/Parquet/Schema/Field.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Xml.Linq;
using Parquet.Meta;

namespace Parquet.Schema {
Expand Down Expand Up @@ -111,6 +112,15 @@ internal virtual Field[] NaturalChildren {

internal virtual bool IsAtomic => false;

/// <summary>
/// Rename this field. Renaming should also fix up the path in complex nested schemas.
/// </summary>
/// <param name="newName"></param>
internal virtual void Rename(string newName) {
Name = newName;
Path = new FieldPath(newName);
}

internal bool Equals(SchemaElement tse) {
if(ReferenceEquals(tse, null))
return false;
Expand Down
5 changes: 5 additions & 0 deletions src/Parquet/Schema/ListField.cs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ internal override FieldPath? PathPrefix {
}
}

internal override void Rename(string newName) {
base.Rename(newName);
PathPrefix = null;
}

internal override Field[] Children => new Field[] { Item };

internal SchemaElement? GroupSchemaElement { get; set; } = null;
Expand Down
5 changes: 5 additions & 0 deletions src/Parquet/Schema/MapField.cs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ internal override FieldPath? PathPrefix {
}
}

internal override void Rename(string newName) {
base.Rename(newName);
PathPrefix = null;
}

internal override Field[] Children => new Field[] { Key, Value };

internal SchemaElement ? GroupSchemaElement { get; set; } = null;
Expand Down
Loading

0 comments on commit cf2730f

Please sign in to comment.