@@ -29,66 +29,82 @@ impl OptimizationRule for ExpandDatasets {
29
29
_expr_arena : & mut Arena < crate :: prelude:: AExpr > ,
30
30
node : Node ,
31
31
) -> PolarsResult < Option < IR > > {
32
- let ir = lp_arena. get ( node) ;
33
-
32
+ // # Note
33
+ // This function mutates the IR node in-place rather than returning the new IR - the
34
+ // StackOptimizer will re-call this function otherwise.
34
35
if let IR :: Scan {
36
+ sources,
35
37
scan_type,
36
38
unified_scan_args,
37
- ..
38
- } = ir
39
+
40
+ file_info : _,
41
+ hive_parts : _,
42
+ predicate : _,
43
+ output_schema : _,
44
+ } = lp_arena. get_mut ( node)
39
45
{
40
46
let projection = unified_scan_args. projection . clone ( ) ;
41
47
let limit = match unified_scan_args. pre_slice . clone ( ) {
42
48
Some ( v @ Slice :: Positive { .. } ) => Some ( v. end_position ( ) ) ,
43
49
_ => None ,
44
50
} ;
45
51
46
- match scan_type. as_ref ( ) {
52
+ match scan_type. as_mut ( ) {
47
53
#[ cfg( feature = "python" ) ]
48
54
FileScanIR :: PythonDataset {
49
55
dataset_object,
50
56
cached_ir,
51
57
} => {
52
58
let cached_ir = cached_ir. clone ( ) ;
53
-
54
59
let mut guard = cached_ir. lock ( ) . unwrap ( ) ;
55
60
56
- // Note: We always get called twice in succession from the stack optimizer,
57
- // as it was designed to optimize until fixed point. Ensure we return
58
- // Ok(None) if the mutex contains the initialized state.
59
- if match guard. as_ref ( ) {
60
- // Reject cached if limit or projection does not match. This can happen if a scan is reused.
61
+ if config:: verbose ( ) {
62
+ eprintln ! (
63
+ "expand_datasets(): python[{}]: limit: {:?}, project: {}" ,
64
+ dataset_object. name( ) ,
65
+ limit,
66
+ projection. as_ref( ) . map_or(
67
+ PlSmallStr :: from_static( "all" ) ,
68
+ |x| format_pl_smallstr!( "{}" , x. len( ) )
69
+ )
70
+ )
71
+ }
72
+
73
+ let can_use_existing = match guard. as_ref ( ) {
61
74
Some ( resolved) => {
62
75
let ExpandedDataset {
63
76
limit : cached_limit,
64
77
projection : cached_projection,
65
- resolved_ir : _,
78
+ expanded_dsl : _,
66
79
python_scan : _,
67
80
} = resolved;
68
81
69
82
cached_limit == & limit && cached_projection == & projection
70
83
} ,
71
84
72
85
None => false ,
73
- } {
74
- return Ok ( None ) ;
75
- }
86
+ } ;
76
87
77
- if config:: verbose ( ) {
78
- eprintln ! (
79
- "expand_datasets(): python[{}]: limit: {:?}, project: {}" ,
80
- dataset_object. name( ) ,
88
+ if !can_use_existing {
89
+ let expanded_dsl =
90
+ dataset_object. to_dataset_scan ( limit, projection. as_deref ( ) ) ?;
91
+
92
+ * guard = Some ( ExpandedDataset {
81
93
limit,
82
- projection. as_ref( ) . map_or(
83
- PlSmallStr :: from_static( "all" ) ,
84
- |x| format_pl_smallstr!( "{}" , x. len( ) )
85
- )
86
- )
94
+ projection,
95
+ expanded_dsl,
96
+ python_scan : None ,
97
+ } )
87
98
}
88
99
89
- let plan = dataset_object. to_dataset_scan ( limit, projection. as_deref ( ) ) ?;
100
+ let ExpandedDataset {
101
+ limit : _,
102
+ projection : _,
103
+ expanded_dsl,
104
+ python_scan,
105
+ } = guard. as_mut ( ) . unwrap ( ) ;
90
106
91
- let ( resolved_ir , python_scan ) = match plan {
107
+ match expanded_dsl {
92
108
DslPlan :: Scan {
93
109
sources : resolved_sources,
94
110
unified_scan_args : resolved_unified_scan_args,
@@ -97,22 +113,6 @@ impl OptimizationRule for ExpandDatasets {
97
113
} => {
98
114
use crate :: dsl:: FileScanDsl ;
99
115
100
- let mut ir = ir. clone ( ) ;
101
-
102
- let IR :: Scan {
103
- sources,
104
- scan_type,
105
- unified_scan_args,
106
-
107
- file_info : _,
108
- hive_parts : _,
109
- predicate : _,
110
- output_schema : _,
111
- } = & mut ir
112
- else {
113
- unreachable ! ( )
114
- } ;
115
-
116
116
// We only want a few configuration flags from here (e.g. column casting config).
117
117
// The rest we either expect to be None (e.g. projection / row_index), or ignore.
118
118
let UnifiedScanArgs {
@@ -131,25 +131,26 @@ impl OptimizationRule for ExpandDatasets {
131
131
include_file_paths : _include_file_paths @ None ,
132
132
deletion_files,
133
133
column_mapping,
134
- } = * resolved_unified_scan_args
134
+ } = resolved_unified_scan_args. as_ref ( )
135
135
else {
136
136
panic ! (
137
137
"invalid scan args from python dataset resolve: {:?}" ,
138
138
& resolved_unified_scan_args
139
139
)
140
140
} ;
141
141
142
- unified_scan_args. cloud_options = cloud_options;
143
- unified_scan_args. rechunk = rechunk;
144
- unified_scan_args. cache = cache;
145
- unified_scan_args. cast_columns_policy = cast_columns_policy;
146
- unified_scan_args. missing_columns_policy = missing_columns_policy;
147
- unified_scan_args. extra_columns_policy = extra_columns_policy;
148
- unified_scan_args. deletion_files = deletion_files;
149
- unified_scan_args. column_mapping = column_mapping;
150
-
151
- * sources = resolved_sources;
152
- * scan_type = Box :: new ( match * resolved_scan_type {
142
+ unified_scan_args. cloud_options = cloud_options. clone ( ) ;
143
+ unified_scan_args. rechunk = * rechunk;
144
+ unified_scan_args. cache = * cache;
145
+ unified_scan_args. cast_columns_policy = cast_columns_policy. clone ( ) ;
146
+ unified_scan_args. missing_columns_policy = * missing_columns_policy;
147
+ unified_scan_args. extra_columns_policy = * extra_columns_policy;
148
+ unified_scan_args. deletion_files = deletion_files. clone ( ) ;
149
+ unified_scan_args. column_mapping = column_mapping. clone ( ) ;
150
+
151
+ * sources = resolved_sources. clone ( ) ;
152
+
153
+ * scan_type = Box :: new ( match * resolved_scan_type. clone ( ) {
153
154
#[ cfg( feature = "csv" ) ]
154
155
FileScanDsl :: Csv { options } => FileScanIR :: Csv { options } ,
155
156
@@ -182,18 +183,15 @@ impl OptimizationRule for ExpandDatasets {
182
183
file_info : _,
183
184
} => FileScanIR :: Anonymous { options, function } ,
184
185
} ) ;
185
-
186
- ( ir, None )
187
186
} ,
188
187
189
- DslPlan :: PythonScan { options } => (
190
- ir. clone ( ) ,
191
- Some ( ExpandedPythonScan {
188
+ DslPlan :: PythonScan { options } => {
189
+ * python_scan = Some ( ExpandedPythonScan {
192
190
name : dataset_object. name ( ) ,
193
- scan_fn : options. scan_fn . unwrap ( ) ,
194
- variant : options. python_source ,
195
- } ) ,
196
- ) ,
191
+ scan_fn : options. scan_fn . clone ( ) . unwrap ( ) ,
192
+ variant : options. python_source . clone ( ) ,
193
+ } )
194
+ } ,
197
195
198
196
dsl => {
199
197
polars_bail ! (
@@ -203,24 +201,12 @@ impl OptimizationRule for ExpandDatasets {
203
201
)
204
202
} ,
205
203
} ;
206
-
207
- let resolved = ExpandedDataset {
208
- limit,
209
- projection,
210
- resolved_ir,
211
- python_scan,
212
- } ;
213
-
214
- * guard = Some ( resolved) ;
215
-
216
- let resolved_ir = guard. as_ref ( ) . map ( |x| x. resolved_ir . clone ( ) ) . unwrap ( ) ;
217
-
218
- return Ok ( Some ( resolved_ir) ) ;
219
204
} ,
220
205
221
206
_ => { } ,
222
207
}
223
208
}
209
+
224
210
Ok ( None )
225
211
}
226
212
}
@@ -229,7 +215,7 @@ impl OptimizationRule for ExpandDatasets {
229
215
pub struct ExpandedDataset {
230
216
limit : Option < usize > ,
231
217
projection : Option < Arc < [ PlSmallStr ] > > ,
232
- resolved_ir : IR ,
218
+ expanded_dsl : DslPlan ,
233
219
234
220
/// Fallback python scan
235
221
#[ cfg( feature = "python" ) ]
@@ -256,7 +242,7 @@ impl Debug for ExpandedDataset {
256
242
let ExpandedDataset {
257
243
limit,
258
244
projection,
259
- resolved_ir ,
245
+ expanded_dsl ,
260
246
261
247
#[ cfg( feature = "python" ) ]
262
248
python_scan,
@@ -265,8 +251,10 @@ impl Debug for ExpandedDataset {
265
251
return display:: ExpandedDataset {
266
252
limit,
267
253
projection,
268
- resolved_ir,
269
-
254
+ expanded_dsl : & match expanded_dsl. display ( ) {
255
+ Ok ( v) => v. to_string ( ) ,
256
+ Err ( e) => e. to_string ( ) ,
257
+ } ,
270
258
#[ cfg( feature = "python" ) ]
271
259
python_scan : python_scan. as_ref ( ) . map (
272
260
|ExpandedPythonScan {
@@ -281,18 +269,17 @@ impl Debug for ExpandedDataset {
281
269
. fmt ( f) ;
282
270
283
271
mod display {
272
+ use std:: fmt:: Debug ;
284
273
use std:: sync:: Arc ;
285
274
286
275
use polars_utils:: pl_str:: PlSmallStr ;
287
276
288
- use crate :: prelude:: IR ;
289
-
290
277
#[ derive( Debug ) ]
291
278
#[ expect( unused) ]
292
279
pub struct ExpandedDataset < ' a > {
293
280
pub limit : & ' a Option < usize > ,
294
281
pub projection : & ' a Option < Arc < [ PlSmallStr ] > > ,
295
- pub resolved_ir : & ' a IR ,
282
+ pub expanded_dsl : & ' a str ,
296
283
297
284
#[ cfg( feature = "python" ) ]
298
285
pub python_scan : Option < PlSmallStr > ,
0 commit comments