Skip to content

Commit bcc6e84

Browse files
c
1 parent b32a261 commit bcc6e84

File tree

30 files changed

+950
-96
lines changed

30 files changed

+950
-96
lines changed

crates/polars-core/src/config.rs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,11 @@ pub fn get_engine_affinity() -> String {
3939

4040
/// Prints a log message if sensitive verbose logging has been enabled.
4141
pub fn verbose_print_sensitive<F: Fn() -> String>(create_log_message: F) {
42-
fn do_log(create_log_message: &dyn Fn() -> String) {
43-
if std::env::var("POLARS_VERBOSE_SENSITIVE").as_deref() == Ok("1") {
44-
// Force the message to be a single line.
45-
let msg = create_log_message().replace('\n', "");
46-
eprintln!("[SENSITIVE]: {msg}")
47-
}
42+
if std::env::var("POLARS_VERBOSE_SENSITIVE").as_deref() == Ok("1") {
43+
// Force the message to be a single line.
44+
let msg = create_log_message().replace('\n', " ");
45+
eprintln!("[SENSITIVE]: {msg}")
4846
}
49-
50-
do_log(&create_log_message)
5147
}
5248

5349
pub fn get_file_prefetch_size() -> usize {

crates/polars-lazy/src/scan/anonymous_scan.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,15 @@ impl LazyFrame {
5050
cache: false,
5151
glob: false,
5252
projection: None,
53+
column_mapping: None,
54+
default_values: None,
5355
row_index: None,
5456
pre_slice: args.n_rows.map(|len| Slice::Positive { offset: 0, len }),
5557
cast_columns_policy: CastColumnsPolicy::ERROR_ON_MISMATCH,
5658
missing_columns_policy: MissingColumnsPolicy::Raise,
5759
extra_columns_policy: ExtraColumnsPolicy::Raise,
5860
include_file_paths: None,
59-
column_mapping: None,
61+
6062
deletion_files: None,
6163
},
6264
)?

crates/polars-lazy/src/scan/csv.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -338,13 +338,14 @@ impl LazyFileListReader for LazyCsvReader {
338338
cache: self.cache,
339339
glob: self.glob,
340340
projection: None,
341+
column_mapping: None,
342+
default_values: None,
341343
row_index,
342344
pre_slice,
343345
cast_columns_policy: CastColumnsPolicy::ERROR_ON_MISMATCH,
344346
missing_columns_policy: MissingColumnsPolicy::Raise,
345347
extra_columns_policy: ExtraColumnsPolicy::Raise,
346348
include_file_paths: self.include_file_paths,
347-
column_mapping: None,
348349
deletion_files: None,
349350
},
350351
)?

crates/polars-lazy/src/scan/ipc.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,13 +72,14 @@ impl LazyFileListReader for LazyIpcReader {
7272
cache,
7373
glob: true,
7474
projection: None,
75+
column_mapping: None,
76+
default_values: None,
7577
row_index,
7678
pre_slice,
7779
cast_columns_policy: CastColumnsPolicy::ERROR_ON_MISMATCH,
7880
missing_columns_policy: MissingColumnsPolicy::Raise,
7981
extra_columns_policy: ExtraColumnsPolicy::Raise,
8082
include_file_paths,
81-
column_mapping: None,
8283
deletion_files: None,
8384
},
8485
)?

crates/polars-lazy/src/scan/ndjson.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,13 +133,14 @@ impl LazyFileListReader for LazyJsonLineReader {
133133
cache: false,
134134
glob: true,
135135
projection: None,
136+
column_mapping: None,
137+
default_values: None,
136138
row_index: self.row_index,
137139
pre_slice: self.n_rows.map(|len| Slice::Positive { offset: 0, len }),
138140
cast_columns_policy: CastColumnsPolicy::ERROR_ON_MISMATCH,
139141
missing_columns_policy: MissingColumnsPolicy::Raise,
140142
extra_columns_policy: ExtraColumnsPolicy::Raise,
141143
include_file_paths: self.include_file_paths,
142-
column_mapping: None,
143144
deletion_files: None,
144145
};
145146

crates/polars-lazy/src/scan/parquet.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ impl LazyFileListReader for LazyParquetReader {
7979
cache: self.args.cache,
8080
glob: self.args.glob,
8181
projection: None,
82+
column_mapping: None,
83+
default_values: None,
8284
// Note: We call `with_row_index()` on the LazyFrame below
8385
row_index: None,
8486
pre_slice: self
@@ -93,7 +95,6 @@ impl LazyFileListReader for LazyParquetReader {
9395
},
9496
extra_columns_policy: ExtraColumnsPolicy::Raise,
9597
include_file_paths: self.args.include_file_paths,
96-
column_mapping: None,
9798
deletion_files: None,
9899
};
99100

crates/polars-plan/dsl-schema-hashes.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
"DataTypeExpr": "3304a33a01090cd946ec1444fd8a7527a2576c80b2ea643363b1f4961f480f4d",
3535
"DataTypeFunction": "c6d63255017b5b7ea53657f54f00c6b97812290b04f0be97f7e0344bec35f2fe",
3636
"DataTypeSelector": "2cf166ffa145c2bb96c06e4974aa7e9c779444d55f2aaa13a5ae4a9a34e639cc",
37+
"DefaultFieldValues": "f8bbcd3a9b2aedd977747c67565f6681e077cf4d884eb58950f80c7963f9c57a",
3738
"DeletionFilesList": "9ce4b5ebd1ee44407cededa5c938331366e5785db5f65a5c6bb7734718e02e99",
3839
"Dimension": "db975873400c15eb91a6d03a3696ea4dd5729d8f93c7166f3900b81de788cf86",
3940
"DistinctOptionsDSL": "99aa6caaf18719a03fcd2899c1372d92de6241e4cc69b12d3fdb6d9525085f86",
@@ -63,6 +64,7 @@
6364
"HiveOptions": "3a5e4555c96948c0a0663cb8e4c2f8d07ae5a680d7cdd50d0709046758dd1c7c",
6465
"IcebergColumn": "032ccd7204e92b3c0d57b22e994f57c31299c41627b80f3615f27d34f09764e9",
6566
"IcebergColumnType": "fc05ab489814a3d0fc4ed17194ecd7e616dafd4178d11327c5ec8c68afbf8c17",
67+
"IcebergIdentityTransformedPartitionFields": "665b6d5ba1670e94ac5a11c1c46e3cd8f4200516c2213e10116b82d967bc685c",
6668
"IcebergSchema": "2aa1815f2639935363c09a49b265173645141a8d68c2f0567f54fa15ef123906",
6769
"IntDataTypeExpr": "cd66dcd9c44cdddd8864c0fe642e5fcef5263f6f142cce906011a0180e0fd161",
6870
"InterpolationMethod": "157b72c21c66950baafe8033836c3335571d2f227dd882ba6b9c8d3e2f5928d3",
@@ -122,6 +124,7 @@
122124
"RankOptions": "300760580450dfc03093e26e9e45e0140f661281fb0ea25f8bd0a44e1caeb722",
123125
"RenameAliasFn": "3b5eca3813cf1c2a169c6f57995a4ec092bb46b80e96d213ec21991ddae8e5f5",
124126
"ReshapeDimension": "9debed355d7b3acefaf615351d4e95dc752db954c922f4d39f01bd8f0cb7947a",
127+
"Result_of_Array_of_Null_or_String": "fdd297b7f679e331000a6a0372ccdfd796d9229fe17d3c8b0ee8f9f742dee837",
125128
"Roll": "dc230ab207ef2823d5a152bf398e51d99e03e8bb16f4a076a272c293090339f9",
126129
"RollingCovOptions": "228d120dacb1fe731d1fd63113261da00ec9c3fe51a804d0e1029558b0be81fd",
127130
"RollingFnParams": "a76a631377990594ac614e00e07d4fb445fc0ac61a09f5a9db1913e5ce1d01c4",
@@ -165,7 +168,7 @@
165168
"TimeZoneSet": "356f30d12c7f870acf96650eebe6a8cf0d2ea68d2ce976f93f69e2500dca3d25",
166169
"TrigonometricFunction": "9444fa00e47ea519496e1242418c2383101508ddd0dcec6174a6175f4e6d5371",
167170
"UnicodeForm": "f539f29f54ef29faede48a9842191bf0c0ca7206e4f7d32ef1a54972b4a0cae5",
168-
"UnifiedScanArgs": "7146d373d313297cc702720c6fedb69c0b080b4762b9f6525f7a4d8abee8b83f",
171+
"UnifiedScanArgs": "fe5639b4e01fc9e7d211e11ce8fa02236557870f6b7def016eeffdf1f0b12956",
169172
"UnionArgs": "ebf94f6b6f44122a166aacd5ae2ae94e874a816022bf44939531dfa6b18246e1",
170173
"UniqueKeepStrategy": "f62f1d9723372528ed52da652de8c2f7f321cccb7dbf001b03f1e6f3881b66b0",
171174
"UnknownKind": "bbf073303ab5d295d839b879812fe42cb27f6c87678c4cc4d8ca578125f28316",
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
use std::sync::Arc;
2+
3+
use polars_core::prelude::{Column, PlIndexMap};
4+
5+
/// Default field values when they are missing from the data file.
6+
#[derive(Debug, Clone, Eq, Hash, PartialEq)]
7+
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
8+
#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
9+
pub enum DefaultFieldValues {
10+
/// This is to follow the spec for missing columns:
11+
/// * Return the value from partition metadata if an Identity Transform exists for the field
12+
///
13+
/// Note: This is not the Iceberg V3 `initial-default`.
14+
Iceberg(Arc<IcebergIdentityTransformedPartitionFields>),
15+
}
16+
17+
#[derive(Debug, Clone, PartialEq)]
18+
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
19+
pub struct IcebergIdentityTransformedPartitionFields(pub PlIndexMap<u32, Result<Column, String>>);
20+
21+
impl Eq for IcebergIdentityTransformedPartitionFields {}
22+
23+
impl std::hash::Hash for IcebergIdentityTransformedPartitionFields {
24+
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
25+
for key in self.keys() {
26+
key.hash(state);
27+
}
28+
}
29+
}
30+
31+
impl std::ops::Deref for IcebergIdentityTransformedPartitionFields {
32+
type Target = PlIndexMap<u32, Result<Column, String>>;
33+
34+
fn deref(&self) -> &Self::Target {
35+
&self.0
36+
}
37+
}
38+
39+
impl std::ops::DerefMut for IcebergIdentityTransformedPartitionFields {
40+
fn deref_mut(&mut self) -> &mut Self::Target {
41+
&mut self.0
42+
}
43+
}
44+
45+
#[cfg(feature = "dsl-schema")]
46+
impl schemars::JsonSchema for IcebergIdentityTransformedPartitionFields {
47+
fn schema_name() -> String {
48+
"IcebergIdentityTransformedPartitionFields".to_owned()
49+
}
50+
51+
fn schema_id() -> std::borrow::Cow<'static, str> {
52+
std::borrow::Cow::Borrowed(concat!(
53+
module_path!(),
54+
"::",
55+
"IcebergIdentityTransformedPartitionFields"
56+
))
57+
}
58+
59+
fn json_schema(generator: &mut schemars::r#gen::SchemaGenerator) -> schemars::schema::Schema {
60+
PlIndexMap::<u32, Result<Vec<()>, String>>::json_schema(generator)
61+
}
62+
}

crates/polars-plan/src/dsl/file_scan/mod.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ use serde::{Deserialize, Serialize};
2020
use strum_macros::IntoStaticStr;
2121

2222
use super::*;
23+
use crate::dsl::default_values::DefaultFieldValues;
24+
pub mod default_values;
2325
pub mod deletion;
2426

2527
#[cfg(feature = "python")]
@@ -237,6 +239,9 @@ pub struct UnifiedScanArgs {
237239
pub glob: bool,
238240

239241
pub projection: Option<Arc<[PlSmallStr]>>,
242+
pub column_mapping: Option<ColumnMapping>,
243+
/// Default values for missing columns.
244+
pub default_values: Option<DefaultFieldValues>,
240245
pub row_index: Option<RowIndex>,
241246
/// Slice applied before predicates
242247
pub pre_slice: Option<Slice>,
@@ -247,7 +252,6 @@ pub struct UnifiedScanArgs {
247252
pub include_file_paths: Option<PlSmallStr>,
248253

249254
pub deletion_files: Option<DeletionFilesList>,
250-
pub column_mapping: Option<ColumnMapping>,
251255
}
252256

253257
impl Default for UnifiedScanArgs {
@@ -260,14 +264,15 @@ impl Default for UnifiedScanArgs {
260264
cache: false,
261265
glob: true,
262266
projection: None,
267+
column_mapping: None,
268+
default_values: None,
263269
row_index: None,
264270
pre_slice: None,
265271
cast_columns_policy: CastColumnsPolicy::default(),
266272
missing_columns_policy: MissingColumnsPolicy::default(),
267273
extra_columns_policy: ExtraColumnsPolicy::default(),
268274
include_file_paths: None,
269275
deletion_files: None,
270-
column_mapping: None,
271276
}
272277
}
273278
}

crates/polars-plan/src/plans/optimizer/expand_datasets.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,14 +123,15 @@ impl OptimizationRule for ExpandDatasets {
123123
cache,
124124
glob: _,
125125
projection: _projection @ None,
126+
column_mapping,
127+
default_values,
126128
row_index: _row_index @ None,
127129
pre_slice: _pre_slice @ None,
128130
cast_columns_policy,
129131
missing_columns_policy,
130132
extra_columns_policy,
131133
include_file_paths: _include_file_paths @ None,
132134
deletion_files,
133-
column_mapping,
134135
} = resolved_unified_scan_args.as_ref()
135136
else {
136137
panic!(
@@ -145,8 +146,9 @@ impl OptimizationRule for ExpandDatasets {
145146
unified_scan_args.cast_columns_policy = cast_columns_policy.clone();
146147
unified_scan_args.missing_columns_policy = *missing_columns_policy;
147148
unified_scan_args.extra_columns_policy = *extra_columns_policy;
148-
unified_scan_args.deletion_files = deletion_files.clone();
149149
unified_scan_args.column_mapping = column_mapping.clone();
150+
unified_scan_args.default_values = default_values.clone();
151+
unified_scan_args.deletion_files = deletion_files.clone();
150152

151153
*sources = resolved_sources.clone();
152154

0 commit comments

Comments
 (0)