Skip to content

Commit d531836

Browse files
authored
Support the TimeOnly and DateOnly types added in .NET 6 (#424)
* Support round tripping DateOnly and TimeOnly, requiring LogicalReaderOverride * Allow configuring use of DateOnly and TimeOnly in LogicalTypeFactory * Update type factories documentation
1 parent 862568b commit d531836

File tree

6 files changed

+630
-19
lines changed

6 files changed

+630
-19
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,8 @@ For more detailed information on how to use ParquetSharp, see the following docu
9696
* [Working with nested data](docs/Nested.md)
9797
* [Reading and writing Arrow data](docs/Arrow.md) — how to read and write data using the [Apache Arrow format](https://arrow.apache.org/)
9898
* [Row-oriented API](docs/RowOriented.md) — a higher level API that abstracts away the column-oriented nature of Parquet files
99-
* [Custom types](docs/TypeFactories.md) — how to override the mapping between .NET and Parquet types
99+
* [Custom types](docs/TypeFactories.md) — how to customize the mapping between .NET and Parquet types,
100+
including using the `DateOnly` and `TimeOnly` types added in .NET 6.
100101
* [Writing TimeSpan data](docs/TimeSpan.md) — interoperability with other libraries when writing TimeSpan data
101102
* [Use from PowerShell](docs/PowerShell.md)
102103

csharp.test/TestLogicalTypeRoundtrip.cs

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,219 @@ public static void TestRoundTripBuffered(
118118
}
119119
}
120120

121+
#if NET6_0_OR_GREATER
122+
[Test]
123+
public static void TestRoundTripDateOnly([Values] bool useReaderOverride)
124+
{
125+
var schemaColumns = new Column[]
126+
{
127+
new Column<DateOnly>("date"),
128+
new Column<DateOnly?>("nullable_date"),
129+
};
130+
131+
const int numRows = 100;
132+
var dateValues = Enumerable.Range(0, numRows)
133+
.Select(i => new DateOnly(2024, 1, 1).AddDays(i))
134+
.ToArray();
135+
var nullableDateValues = Enumerable.Range(0, numRows)
136+
.Select(i => i % 5 == 1 ? (DateOnly?) null : new DateOnly(2024, 1, 1).AddDays(i))
137+
.ToArray();
138+
139+
using var buffer = new ResizableBuffer();
140+
using (var outStream = new BufferOutputStream(buffer))
141+
{
142+
using var fileWriter = new ParquetFileWriter(outStream, schemaColumns);
143+
using var rowGroupWriter = fileWriter.AppendRowGroup();
144+
{
145+
using var columnWriter = rowGroupWriter.NextColumn().LogicalWriter<DateOnly>();
146+
columnWriter.WriteBatch(dateValues);
147+
}
148+
{
149+
using var columnWriter = rowGroupWriter.NextColumn().LogicalWriter<DateOnly?>();
150+
columnWriter.WriteBatch(nullableDateValues);
151+
}
152+
fileWriter.Close();
153+
}
154+
155+
DateOnly[] readDateValues;
156+
DateOnly?[] readNullableDateValues;
157+
using (var inStream = new BufferReader(buffer))
158+
{
159+
using var fileReader = new ParquetFileReader(inStream);
160+
if (!useReaderOverride)
161+
{
162+
fileReader.LogicalTypeFactory = new LogicalTypeFactory
163+
{
164+
DateAsDateOnly = true,
165+
};
166+
}
167+
using var rowGroupReader = fileReader.RowGroup(0);
168+
{
169+
using var columnReader = rowGroupReader.Column(0);
170+
using var logicalReader = useReaderOverride
171+
? columnReader.LogicalReaderOverride<DateOnly>()
172+
: columnReader.LogicalReader<DateOnly>();
173+
readDateValues = logicalReader.ReadAll(numRows);
174+
}
175+
{
176+
using var columnReader = rowGroupReader.Column(1);
177+
using var logicalReader = useReaderOverride
178+
? columnReader.LogicalReaderOverride<DateOnly?>()
179+
: columnReader.LogicalReader<DateOnly?>();
180+
readNullableDateValues = logicalReader.ReadAll(numRows);
181+
}
182+
}
183+
184+
Assert.AreEqual(dateValues, readDateValues);
185+
Assert.AreEqual(nullableDateValues, readNullableDateValues);
186+
}
187+
188+
[TestCase(null, true)]
189+
[TestCase(TimeUnit.Micros, true)]
190+
[TestCase(TimeUnit.Millis, true)]
191+
[TestCase(TimeUnit.Millis, false)]
192+
public static void TestRoundTripTimeOnly(TimeUnit? timeUnit, bool useReaderOverride)
193+
{
194+
LogicalType? logicalTypeOverride = null;
195+
if (timeUnit.HasValue)
196+
{
197+
logicalTypeOverride = LogicalType.Time(isAdjustedToUtc: true, timeUnit.Value);
198+
}
199+
var schemaColumns = new Column[]
200+
{
201+
new Column<TimeOnly>("time", logicalTypeOverride: logicalTypeOverride),
202+
new Column<TimeOnly?>("nullable_time", logicalTypeOverride: logicalTypeOverride),
203+
};
204+
205+
const int numRows = 100;
206+
var timeValues = Enumerable.Range(0, numRows)
207+
.Select(i => new TimeOnly(0, 0, 0).Add(TimeSpan.FromSeconds(i)))
208+
.ToArray();
209+
var nullableTimeValues = Enumerable.Range(0, numRows)
210+
.Select(i => i % 5 == 1 ? (TimeOnly?) null : new TimeOnly(0, 0, 0).Add(TimeSpan.FromSeconds(i)))
211+
.ToArray();
212+
213+
using var buffer = new ResizableBuffer();
214+
using (var outStream = new BufferOutputStream(buffer))
215+
{
216+
using var fileWriter = new ParquetFileWriter(outStream, schemaColumns);
217+
using var rowGroupWriter = fileWriter.AppendRowGroup();
218+
{
219+
using var columnWriter = rowGroupWriter.NextColumn().LogicalWriter<TimeOnly>();
220+
columnWriter.WriteBatch(timeValues);
221+
}
222+
{
223+
using var columnWriter = rowGroupWriter.NextColumn().LogicalWriter<TimeOnly?>();
224+
columnWriter.WriteBatch(nullableTimeValues);
225+
}
226+
fileWriter.Close();
227+
}
228+
229+
TimeOnly[] readTimeValues;
230+
TimeOnly?[] readNullableTimeValues;
231+
using (var inStream = new BufferReader(buffer))
232+
{
233+
using var fileReader = new ParquetFileReader(inStream);
234+
if (!useReaderOverride)
235+
{
236+
fileReader.LogicalTypeFactory = new LogicalTypeFactory
237+
{
238+
TimeAsTimeOnly = true,
239+
};
240+
}
241+
using var rowGroupReader = fileReader.RowGroup(0);
242+
{
243+
using var columnReader = rowGroupReader.Column(0);
244+
using var logicalReader = useReaderOverride
245+
? columnReader.LogicalReaderOverride<TimeOnly>()
246+
: columnReader.LogicalReader<TimeOnly>();
247+
readTimeValues = logicalReader.ReadAll(numRows);
248+
}
249+
{
250+
using var columnReader = rowGroupReader.Column(1);
251+
using var logicalReader = useReaderOverride
252+
? columnReader.LogicalReaderOverride<TimeOnly?>()
253+
: columnReader.LogicalReader<TimeOnly?>();
254+
readNullableTimeValues = logicalReader.ReadAll(numRows);
255+
}
256+
}
257+
258+
Assert.AreEqual(timeValues, readTimeValues);
259+
Assert.AreEqual(nullableTimeValues, readNullableTimeValues);
260+
}
261+
262+
[Test]
263+
[NonParallelizable]
264+
public static void TestSetTimeOnlyAndDateOnlyOnDefaultTypeFactory()
265+
{
266+
var defaultDateAsDateOnly = LogicalTypeFactory.Default.DateAsDateOnly;
267+
var defaultTimeAsTimeOnly = LogicalTypeFactory.Default.TimeAsTimeOnly;
268+
269+
try
270+
{
271+
LogicalTypeFactory.Default.DateAsDateOnly = true;
272+
LogicalTypeFactory.Default.TimeAsTimeOnly = true;
273+
274+
// Create schema directly rather than using the column abstraction,
275+
// to test that this uses the correct types from the type factory when writing.
276+
using var dateNode = new PrimitiveNode("date", Repetition.Required, LogicalType.Date(), PhysicalType.Int32);
277+
using var timeNode = new PrimitiveNode("time", Repetition.Required, LogicalType.Time(true, TimeUnit.Millis), PhysicalType.Int32);
278+
using var schemaNode = new GroupNode("schema", Repetition.Required, new[] {dateNode, timeNode});
279+
280+
const int numRows = 100;
281+
var timeValues = Enumerable.Range(0, numRows)
282+
.Select(i => new TimeOnly(0, 0, 0).Add(TimeSpan.FromSeconds(i)))
283+
.ToArray();
284+
var dateValues = Enumerable.Range(0, numRows)
285+
.Select(i => new DateOnly(2024, 1, 1).AddDays(i))
286+
.ToArray();
287+
288+
using var buffer = new ResizableBuffer();
289+
using (var outStream = new BufferOutputStream(buffer))
290+
{
291+
292+
using var builder = new WriterPropertiesBuilder();
293+
using var writerProperties = builder.Build();
294+
using var fileWriter = new ParquetFileWriter(outStream, schemaNode, writerProperties);
295+
using var rowGroupWriter = fileWriter.AppendRowGroup();
296+
{
297+
using var dateWriter = rowGroupWriter.NextColumn().LogicalWriter<DateOnly>();
298+
dateWriter.WriteBatch(dateValues);
299+
using var timeWriter = rowGroupWriter.NextColumn().LogicalWriter<TimeOnly>();
300+
timeWriter.WriteBatch(timeValues);
301+
}
302+
fileWriter.Close();
303+
}
304+
305+
DateOnly[] readDateValues;
306+
TimeOnly[] readTimeValues;
307+
using (var inStream = new BufferReader(buffer))
308+
{
309+
using var fileReader = new ParquetFileReader(inStream);
310+
using var rowGroupReader = fileReader.RowGroup(0);
311+
{
312+
using var columnReader = rowGroupReader.Column(0);
313+
using var logicalReader = columnReader.LogicalReader<DateOnly>();
314+
readDateValues = logicalReader.ReadAll(numRows);
315+
}
316+
{
317+
using var columnReader = rowGroupReader.Column(1);
318+
using var logicalReader = columnReader.LogicalReader<TimeOnly>();
319+
readTimeValues = logicalReader.ReadAll(numRows);
320+
}
321+
}
322+
323+
Assert.AreEqual(dateValues, readDateValues);
324+
Assert.AreEqual(timeValues, readTimeValues);
325+
}
326+
finally
327+
{
328+
LogicalTypeFactory.Default.DateAsDateOnly = defaultDateAsDateOnly;
329+
LogicalTypeFactory.Default.TimeAsTimeOnly = defaultTimeAsTimeOnly;
330+
}
331+
}
332+
#endif
333+
121334
[TestCase(DateTimeKind.Utc, TimeUnit.Micros)]
122335
[TestCase(DateTimeKind.Utc, TimeUnit.Millis)]
123336
[TestCase(DateTimeKind.Unspecified, TimeUnit.Micros)]

csharp/LogicalRead.cs

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,43 @@ public static Delegate GetConverter(ColumnDescriptor columnDescriptor, ColumnChu
286286
return LogicalRead.GetNullableNativeConverter<TimeSpanNanos, long>();
287287
}
288288

289+
#if NET6_0_OR_GREATER
290+
if (typeof(TLogical) == typeof(DateOnly))
291+
{
292+
return (LogicalRead<DateOnly, int>.Converter) ((s, _, d, _) => LogicalRead.ConvertDateOnly(s, d));
293+
}
294+
295+
if (typeof(TLogical) == typeof(DateOnly?))
296+
{
297+
return (LogicalRead<DateOnly?, int>.Converter) LogicalRead.ConvertDateOnly;
298+
}
299+
300+
if (typeof(TLogical) == typeof(TimeOnly))
301+
{
302+
switch (((TimeLogicalType) logicalType).TimeUnit)
303+
{
304+
case TimeUnit.Millis:
305+
return (LogicalRead<TimeOnly, int>.Converter) ((s, _, d, _) => LogicalRead.ConvertTimeOnlyMillis(s, d));
306+
case TimeUnit.Micros:
307+
return (LogicalRead<TimeOnly, long>.Converter) ((s, _, d, _) => LogicalRead.ConvertTimeOnlyMicros(s, d));
308+
}
309+
}
310+
311+
if (typeof(TLogical) == typeof(TimeOnly?))
312+
{
313+
var timeLogicalType = (TimeLogicalType) logicalType;
314+
var timeUnit = timeLogicalType.TimeUnit;
315+
316+
switch (timeUnit)
317+
{
318+
case TimeUnit.Millis:
319+
return (LogicalRead<TimeOnly?, int>.Converter) LogicalRead.ConvertTimeOnlyMillis;
320+
case TimeUnit.Micros:
321+
return (LogicalRead<TimeOnly?, long>.Converter) LogicalRead.ConvertTimeOnlyMicros;
322+
}
323+
}
324+
#endif
325+
289326
if (typeof(TLogical) == typeof(string))
290327
{
291328
var byteArrayCache = new ByteArrayReaderCache<TPhysical, TLogical>(columnChunkMetaData);
@@ -572,6 +609,56 @@ public static void ConvertTimeSpanMillis(ReadOnlySpan<int> source, ReadOnlySpan<
572609
}
573610
}
574611

612+
#if NET6_0_OR_GREATER
613+
public static void ConvertDateOnly(ReadOnlySpan<int> source, Span<DateOnly> destination)
614+
{
615+
for (int i = 0; i < destination.Length; ++i)
616+
{
617+
destination[i] = ToDateOnly(source[i]);
618+
}
619+
}
620+
621+
public static void ConvertDateOnly(ReadOnlySpan<int> source, ReadOnlySpan<short> defLevels, Span<DateOnly?> destination, short definedLevel)
622+
{
623+
for (int i = 0, src = 0; i < destination.Length; ++i)
624+
{
625+
destination[i] = defLevels[i] != definedLevel ? default(DateOnly?) : ToDateOnly(source[src++]);
626+
}
627+
}
628+
629+
public static void ConvertTimeOnlyMicros(ReadOnlySpan<long> source, Span<TimeOnly> destination)
630+
{
631+
for (int i = 0; i < destination.Length; ++i)
632+
{
633+
destination[i] = ToTimeOnlyMicros(source[i]);
634+
}
635+
}
636+
637+
public static void ConvertTimeOnlyMicros(ReadOnlySpan<long> source, ReadOnlySpan<short> defLevels, Span<TimeOnly?> destination, short definedLevel)
638+
{
639+
for (int i = 0, src = 0; i < destination.Length; ++i)
640+
{
641+
destination[i] = defLevels[i] != definedLevel ? default(TimeOnly?) : ToTimeOnlyMicros(source[src++]);
642+
}
643+
}
644+
645+
public static void ConvertTimeOnlyMillis(ReadOnlySpan<int> source, Span<TimeOnly> destination)
646+
{
647+
for (int i = 0; i < destination.Length; ++i)
648+
{
649+
destination[i] = ToTimeOnlyMillis(source[i]);
650+
}
651+
}
652+
653+
public static void ConvertTimeOnlyMillis(ReadOnlySpan<int> source, ReadOnlySpan<short> defLevels, Span<TimeOnly?> destination, short definedLevel)
654+
{
655+
for (int i = 0, src = 0; i < destination.Length; ++i)
656+
{
657+
destination[i] = defLevels[i] != definedLevel ? default(TimeOnly?) : ToTimeOnlyMillis(source[src++]);
658+
}
659+
}
660+
#endif
661+
575662
public static void ConvertString(ReadOnlySpan<ByteArray> source, ReadOnlySpan<short> defLevels, Span<string?> destination, short definedLevel, ByteArrayReaderCache<ByteArray, string> byteArrayCache)
576663
{
577664
for (int i = 0, src = 0; i < destination.Length; ++i)
@@ -737,6 +824,28 @@ public static byte[] ToByteArray(ByteArray byteArray)
737824
return array;
738825
}
739826

827+
#if NET6_0_OR_GREATER
828+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
829+
public static DateOnly ToDateOnly(int source)
830+
{
831+
return DateOnly.FromDayNumber(BaseDateOnlyNumber + source);
832+
}
833+
834+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
835+
public static TimeOnly ToTimeOnlyMicros(long source)
836+
{
837+
return TimeOnly.FromTimeSpan(TimeSpan.FromTicks(source * (TimeSpan.TicksPerMillisecond / 1000)));
838+
}
839+
840+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
841+
public static TimeOnly ToTimeOnlyMillis(int source)
842+
{
843+
return TimeOnly.FromTimeSpan(TimeSpan.FromTicks(source * TimeSpan.TicksPerMillisecond));
844+
}
845+
846+
private static readonly int BaseDateOnlyNumber = LogicalWrite.BaseDateOnlyNumber;
847+
#endif
848+
740849
public const long DateTimeOffset = LogicalWrite.DateTimeOffset;
741850
}
742851
}

0 commit comments

Comments
 (0)