diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index b7dce30f..d5e9a33e 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -1,7 +1,7 @@
name: 'Build'
env:
- VERSION: 4.17.0
+ VERSION: 4.18.0
ASM_VERSION: 4.0.0
DOC_ARTIFACT: webHelp-all.zip
diff --git a/docs/release-history.md b/docs/release-history.md
index e67d278c..d421cba1 100644
--- a/docs/release-history.md
+++ b/docs/release-history.md
@@ -2,7 +2,11 @@
### Improvements
-- Added explicit target for `.NET 8`.
+- Added explicit target for `.NET 8` by @aloneguid.
+
+### Bug fixes
+
+- `DataFrameMapper` returns incompatible `DataFrameColumn` by @aloneguid (#343).
## 4.17.0
diff --git a/src/Parquet.PerfRunner/Parquet.PerfRunner.csproj b/src/Parquet.PerfRunner/Parquet.PerfRunner.csproj
index 7de97ff5..465c41c7 100644
--- a/src/Parquet.PerfRunner/Parquet.PerfRunner.csproj
+++ b/src/Parquet.PerfRunner/Parquet.PerfRunner.csproj
@@ -11,7 +11,7 @@
-
+
diff --git a/src/Parquet.Test/DataAnalysis/DataFrameReaderTest.cs b/src/Parquet.Test/DataAnalysis/DataFrameReaderTest.cs
index e0e6f227..ae8529a1 100644
--- a/src/Parquet.Test/DataAnalysis/DataFrameReaderTest.cs
+++ b/src/Parquet.Test/DataAnalysis/DataFrameReaderTest.cs
@@ -16,6 +16,10 @@ public class DataFrameReaderTest : TestBase {
[InlineData(typeof(int?), null, 2)]
[InlineData(typeof(bool), true, false)]
[InlineData(typeof(bool?), true, null)]
+ [InlineData(typeof(long), 1L, 2L)]
+ [InlineData(typeof(long?), 1L, 2L)]
+ [InlineData(typeof(ulong), 1UL, 2UL)]
+ [InlineData(typeof(ulong?), 1UL, 2UL)]
[InlineData(typeof(string), "1", "2")]
[InlineData(typeof(string), null, "2")]
public async Task Roundtrip_all_types(Type t, object el1, object el2) {
@@ -53,6 +57,14 @@ public async Task Roundtrip_all_types(Type t, object el1, object el2) {
ms1.Position = 0;
DataFrame df1 = await ms1.ReadParquetAsDataFrameAsync();
+ if(t == typeof(long)) {
+ // Int64 is a special case in DataFrame
+ // see https://github.com/aloneguid/parquet-dotnet/issues/343 for more info
+ df1.Columns.GetInt64Column(t.Name);
+ } else if (t == typeof(ulong)) {
+ df1.Columns.GetUInt64Column(t.Name);
+ }
+
Assert.Equal(df.Columns.Count, df1.Columns.Count);
for(int i = 0; i < df.Columns.Count; i++) {
Assert.Equal(df.Columns[i], df1.Columns[i]);
diff --git a/src/Parquet.Test/Parquet.Test.csproj b/src/Parquet.Test/Parquet.Test.csproj
index 48ef1aba..7f5b8266 100644
--- a/src/Parquet.Test/Parquet.Test.csproj
+++ b/src/Parquet.Test/Parquet.Test.csproj
@@ -19,16 +19,16 @@
-
-
-
+
+
+
runtime; build; native; contentfiles; analyzers; buildtransitive
all
-
-
+
+
diff --git a/src/Parquet/Data/Analysis/DataFrameMapper.cs b/src/Parquet/Data/Analysis/DataFrameMapper.cs
index a2ec1dd6..6e92831a 100644
--- a/src/Parquet/Data/Analysis/DataFrameMapper.cs
+++ b/src/Parquet/Data/Analysis/DataFrameMapper.cs
@@ -10,86 +10,107 @@ public static DataFrameColumn ToDataFrameColumn(DataColumn dc) {
if(dc.Field.ClrType == typeof(bool)) {
if(dc.Field.ClrType == dc.Field.ClrNullableIfHasNullsType) {
- return new PrimitiveDataFrameColumn(colName, (bool[])dc.Data);
+ return new BooleanDataFrameColumn(colName, (bool[])dc.Data);
} else {
- return new PrimitiveDataFrameColumn(colName, (bool?[])dc.Data);
+ return new BooleanDataFrameColumn(colName, (bool?[])dc.Data);
}
}
if(dc.Field.ClrType == typeof(int)) {
if(dc.Field.ClrType == dc.Field.ClrNullableIfHasNullsType) {
- return new PrimitiveDataFrameColumn(colName, (int[])dc.Data);
+ return new Int32DataFrameColumn(colName, (int[])dc.Data);
} else {
- return new PrimitiveDataFrameColumn(colName, (int?[])dc.Data);
+ return new Int32DataFrameColumn(colName, (int?[])dc.Data);
}
}
if(dc.Field.ClrType == typeof(uint)) {
if(dc.Field.ClrType == dc.Field.ClrNullableIfHasNullsType) {
- return new PrimitiveDataFrameColumn(colName, (uint[])dc.Data);
+ return new UInt32DataFrameColumn(colName, (uint[])dc.Data);
} else {
- return new PrimitiveDataFrameColumn(colName, (uint?[])dc.Data);
+ return new UInt32DataFrameColumn(colName, (uint?[])dc.Data);
}
}
- if(dc.Field.ClrType == typeof(long)) {
+ if(dc.Field.ClrType == typeof(double)) {
if(dc.Field.ClrType == dc.Field.ClrNullableIfHasNullsType) {
- return new PrimitiveDataFrameColumn(colName, (long[])dc.Data);
+ return new DoubleDataFrameColumn(colName, (double[])dc.Data);
} else {
- return new PrimitiveDataFrameColumn(colName, (long?[])dc.Data);
+ return new DoubleDataFrameColumn(colName, (double?[])dc.Data);
}
}
- if(dc.Field.ClrType == typeof(ulong)) {
+ if(dc.Field.ClrType == typeof(float)) {
if(dc.Field.ClrType == dc.Field.ClrNullableIfHasNullsType) {
- return new PrimitiveDataFrameColumn(colName, (ulong[])dc.Data);
+ return new SingleDataFrameColumn(colName, (float[])dc.Data);
} else {
- return new PrimitiveDataFrameColumn(colName, (ulong?[])dc.Data);
+ return new SingleDataFrameColumn(colName, (float?[])dc.Data);
}
}
if(dc.Field.ClrType == typeof(byte)) {
if(dc.Field.ClrType == dc.Field.ClrNullableIfHasNullsType) {
- return new PrimitiveDataFrameColumn(colName, (byte[])dc.Data);
+ return new ByteDataFrameColumn(colName, (byte[])dc.Data);
} else {
- return new PrimitiveDataFrameColumn(colName, (byte?[])dc.Data);
+ return new ByteDataFrameColumn(colName, (byte?[])dc.Data);
}
}
if(dc.Field.ClrType == typeof(sbyte)) {
if(dc.Field.ClrType == dc.Field.ClrNullableIfHasNullsType) {
- return new PrimitiveDataFrameColumn(colName, (sbyte[])dc.Data);
+ return new SByteDataFrameColumn(colName, (sbyte[])dc.Data);
} else {
- return new PrimitiveDataFrameColumn(colName, (sbyte?[])dc.Data);
+ return new SByteDataFrameColumn(colName, (sbyte?[])dc.Data);
}
}
- if(dc.Field.ClrType == typeof(DateTime)) {
+ if(dc.Field.ClrType == typeof(short)) {
if(dc.Field.ClrType == dc.Field.ClrNullableIfHasNullsType) {
- return new PrimitiveDataFrameColumn(colName, (DateTime[])dc.Data);
+ return new Int16DataFrameColumn(colName, (short[])dc.Data);
} else {
- return new PrimitiveDataFrameColumn(colName, (DateTime?[])dc.Data);
+ return new Int16DataFrameColumn(colName, (short?[])dc.Data);
}
}
- if(dc.Field.ClrType == typeof(TimeSpan)) {
+ if(dc.Field.ClrType == typeof(ushort)) {
if(dc.Field.ClrType == dc.Field.ClrNullableIfHasNullsType) {
- return new PrimitiveDataFrameColumn(colName, (TimeSpan[])dc.Data);
+ return new UInt16DataFrameColumn(colName, (ushort[])dc.Data);
} else {
- return new PrimitiveDataFrameColumn(colName, (TimeSpan?[])dc.Data);
+ return new UInt16DataFrameColumn(colName, (ushort?[])dc.Data);
}
}
- if(dc.Field.ClrType == typeof(decimal)) {
+ if(dc.Field.ClrType == typeof(long)) {
if(dc.Field.ClrType == dc.Field.ClrNullableIfHasNullsType) {
- return new PrimitiveDataFrameColumn(colName, (decimal[])dc.Data);
+ return new Int64DataFrameColumn(colName, (long[])dc.Data);
} else {
- return new PrimitiveDataFrameColumn(colName, (decimal?[])dc.Data);
+ return new Int64DataFrameColumn(colName, (long?[])dc.Data);
}
}
- if(dc.Field.ClrType == typeof(float)) {
+ if(dc.Field.ClrType == typeof(ulong)) {
if(dc.Field.ClrType == dc.Field.ClrNullableIfHasNullsType) {
- return new PrimitiveDataFrameColumn(colName, (float[])dc.Data);
+ return new UInt64DataFrameColumn(colName, (ulong[])dc.Data);
} else {
- return new PrimitiveDataFrameColumn(colName, (float?[])dc.Data);
+ return new UInt64DataFrameColumn(colName, (ulong?[])dc.Data);
}
}
- if(dc.Field.ClrType == typeof(double)) {
+ if(dc.Field.ClrType == typeof(string)) {
+ if(dc.Field.ClrType == dc.Field.ClrNullableIfHasNullsType) {
+ return new StringDataFrameColumn(colName, (string[])dc.Data);
+ } else {
+ return new StringDataFrameColumn(colName, (string?[])dc.Data);
+ }
+ }
+ if(dc.Field.ClrType == typeof(DateTime)) {
+ if(dc.Field.ClrType == dc.Field.ClrNullableIfHasNullsType) {
+ return new DateTimeDataFrameColumn(colName, (DateTime[])dc.Data);
+ } else {
+ return new DateTimeDataFrameColumn(colName, (DateTime?[])dc.Data);
+ }
+ }
+ if(dc.Field.ClrType == typeof(TimeSpan)) {
+ if(dc.Field.ClrType == dc.Field.ClrNullableIfHasNullsType) {
+ return new PrimitiveDataFrameColumn(colName, (TimeSpan[])dc.Data);
+ } else {
+ return new PrimitiveDataFrameColumn(colName, (TimeSpan?[])dc.Data);
+ }
+ }
+ if(dc.Field.ClrType == typeof(decimal)) {
if(dc.Field.ClrType == dc.Field.ClrNullableIfHasNullsType) {
- return new PrimitiveDataFrameColumn(colName, (double[])dc.Data);
+ return new DecimalDataFrameColumn(colName, (decimal[])dc.Data);
} else {
- return new PrimitiveDataFrameColumn(colName, (double?[])dc.Data);
+ return new DecimalDataFrameColumn(colName, (decimal?[])dc.Data);
}
}
// special case
@@ -271,55 +292,42 @@ public static void AppendValues(DataFrameColumn dfc, DataColumn dc) {
}
public static Array GetTypedDataFast(DataFrameColumn col) {
-
if(col.DataType == typeof(bool)) {
return ((PrimitiveDataFrameColumn)col).ToArray();
}
-
if(col.DataType == typeof(int)) {
return ((PrimitiveDataFrameColumn)col).ToArray();
}
-
if(col.DataType == typeof(uint)) {
return ((PrimitiveDataFrameColumn)col).ToArray();
}
-
if(col.DataType == typeof(long)) {
return ((PrimitiveDataFrameColumn)col).ToArray();
}
-
if(col.DataType == typeof(ulong)) {
return ((PrimitiveDataFrameColumn)col).ToArray();
}
-
if(col.DataType == typeof(byte)) {
return ((PrimitiveDataFrameColumn)col).ToArray();
}
-
if(col.DataType == typeof(sbyte)) {
return ((PrimitiveDataFrameColumn)col).ToArray();
}
-
if(col.DataType == typeof(DateTime)) {
return ((PrimitiveDataFrameColumn)col).ToArray();
}
-
if(col.DataType == typeof(TimeSpan)) {
return ((PrimitiveDataFrameColumn)col).ToArray();
}
-
if(col.DataType == typeof(decimal)) {
return ((PrimitiveDataFrameColumn)col).ToArray();
}
-
if(col.DataType == typeof(float)) {
return ((PrimitiveDataFrameColumn)col).ToArray();
}
-
if(col.DataType == typeof(double)) {
return ((PrimitiveDataFrameColumn)col).ToArray();
}
-
// special case
if(col.DataType == typeof(string)) {
return ((StringDataFrameColumn)col).ToArray();
diff --git a/src/Parquet/Data/Analysis/DataFrameMapper.tt b/src/Parquet/Data/Analysis/DataFrameMapper.tt
index ee6426cf..71e94cce 100644
--- a/src/Parquet/Data/Analysis/DataFrameMapper.tt
+++ b/src/Parquet/Data/Analysis/DataFrameMapper.tt
@@ -15,6 +15,23 @@
"decimal",
"float",
"double" };
+ var clrToColumnType = new Dictionary {
+ { "bool", "BooleanDataFrameColumn" },
+ { "int", "Int32DataFrameColumn" },
+ { "uint", "UInt32DataFrameColumn" },
+ { "double", "DoubleDataFrameColumn" },
+ { "float", "SingleDataFrameColumn" },
+ { "byte", "ByteDataFrameColumn" },
+ { "sbyte", "SByteDataFrameColumn" },
+ { "short", "Int16DataFrameColumn" },
+ { "ushort", "UInt16DataFrameColumn" },
+ { "long", "Int64DataFrameColumn" },
+ { "ulong", "UInt64DataFrameColumn" },
+ { "string", "StringDataFrameColumn" },
+ { "DateTime", "DateTimeDataFrameColumn" },
+ { "TimeSpan", "PrimitiveDataFrameColumn" },
+ { "decimal", "DecimalDataFrameColumn" }
+ };
#>using System;
using System.Linq;
using System.Numerics;
@@ -25,11 +42,11 @@ namespace Parquet.Data.Analysis {
public static DataFrameColumn ToDataFrameColumn(DataColumn dc) {
string colName = string.Join("_", dc.Field.Path.ToList());
- <# foreach(var t in valueTypes) { #>if(dc.Field.ClrType == typeof(<#= t #>)) {
+ <# foreach(var t in clrToColumnType) { #>if(dc.Field.ClrType == typeof(<#= t.Key #>)) {
if(dc.Field.ClrType == dc.Field.ClrNullableIfHasNullsType) {
- return new PrimitiveDataFrameColumn<<#= t #>>(colName, (<#= t #>[])dc.Data);
+ return new <#= t.Value #>(colName, (<#= t.Key #>[])dc.Data);
} else {
- return new PrimitiveDataFrameColumn<<#= t #>>(colName, (<#= t #>?[])dc.Data);
+ return new <#= t.Value #>(colName, (<#= t.Key #>?[])dc.Data);
}
}
<# } #>
@@ -70,12 +87,12 @@ namespace Parquet.Data.Analysis {
}
public static Array GetTypedDataFast(DataFrameColumn col) {
- <# foreach(var t in valueTypes) { #>
- if(col.DataType == typeof(<#= t #>)) {
+ <# foreach(var t in valueTypes) {
+ #>if(col.DataType == typeof(<#= t #>)) {
return ((PrimitiveDataFrameColumn<<#= t #>>)col).ToArray();
}
- <# } #>
- // special case
+ <# }
+ #>// special case
if(col.DataType == typeof(string)) {
return ((StringDataFrameColumn)col).ToArray();
}
diff --git a/src/Parquet/Parquet.csproj b/src/Parquet/Parquet.csproj
index 32fd1d0a..36011800 100644
--- a/src/Parquet/Parquet.csproj
+++ b/src/Parquet/Parquet.csproj
@@ -58,7 +58,7 @@
-
+