Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/lint-rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ on:
- crates/**
- docs/source/src/rust/**
- examples/**
- pyo3-polars/**
- pyo3-polars/Cargo.toml
- py-polars/src/**
- py-polars/Cargo.toml
- Cargo.toml
Expand All @@ -17,6 +19,8 @@ on:
- crates/**
- docs/source/src/rust/**
- examples/**
- pyo3-polars/**
- pyo3-polars/Cargo.toml
- py-polars/src/**
- py-polars/Cargo.toml
- Cargo.toml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@ mod parallel_jaccard_mod;
use polars::prelude::*;
use polars_lazy::frame::IntoLazy;
use polars_lazy::prelude::LazyFrame;
use pyo3_polars::{PyDataFrame, PyLazyFrame};
use pyo3::prelude::*;
use pyo3_polars::error::PyPolarsErr;
use pyo3_polars::PolarsAllocator;
use pyo3_polars::{PolarsAllocator, PyDataFrame, PyLazyFrame};

#[global_allocator]
static ALLOC: PolarsAllocator = PolarsAllocator::new();
Expand Down
2 changes: 1 addition & 1 deletion pyo3-polars/example/io_plugin/io_plugin/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
mod samplers;

use polars::prelude::*;
use pyo3_polars::{PyDataFrame, PyExpr, PySchema};
use pyo3::prelude::*;
use pyo3_polars::error::PyPolarsErr;
use pyo3_polars::{PyDataFrame, PyExpr, PySchema};

use crate::samplers::PySampler;

Expand Down
2 changes: 1 addition & 1 deletion pyo3-polars/pyo3-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ thiserror = "1"

[features]
# Polars python is needed because all variants need to be activated of the DSL.
lazy = ["polars/serde-lazy", "polars-plan", "polars-lazy/serde", "polars-utils", "polars-lazy/python"]
lazy = ["polars/serde-lazy", "polars/lazy", "polars-plan", "polars-lazy/serde", "polars-utils", "polars-lazy/python"]
derive = ["pyo3-polars-derive", "polars-plan/python", "polars-ffi", "serde-pickle", "serde"]
dtype-full = ["polars/dtype-full", "dtype-decimal", "dtype-array", "dtype-struct", "dtype-categorical"]
object = ["polars/object"]
Expand Down
2 changes: 1 addition & 1 deletion pyo3-polars/pyo3-polars/src/export.rs
Original file line number Diff line number Diff line change
@@ -1 +1 @@
pub use {polars_core, polars_ffi, polars_plan, arrow as polars_arrow, polars_error};
pub use {arrow as polars_arrow, polars_core, polars_error, polars_ffi, polars_plan};
1 change: 0 additions & 1 deletion pyo3-polars/pyo3-polars/src/ffi/to_py.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use arrow::ffi;

use polars::prelude::{ArrayRef, ArrowField};
use pyo3::ffi::Py_uintptr_t;
use pyo3::prelude::*;
Expand Down
5 changes: 3 additions & 2 deletions pyo3-polars/pyo3-polars/src/ffi/to_rust.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use crate::error::PyPolarsErr;
use polars::prelude::*;
use arrow::ffi;
use polars::prelude::*;
use pyo3::ffi::Py_uintptr_t;
use pyo3::prelude::*;

use crate::error::PyPolarsErr;

pub fn array_to_rust(obj: &Bound<PyAny>) -> PyResult<ArrayRef> {
// prepare a pointer to receive the Array struct
let array = Box::new(ffi::ArrowArray::empty());
Expand Down
2 changes: 1 addition & 1 deletion pyo3-polars/pyo3-polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ pub mod types;
pub use crate::alloc::PolarsAllocator;
mod ffi;

pub use types::*;
use once_cell::sync::Lazy;
use pyo3::prelude::*;
pub use types::*;

pub(crate) static POLARS: Lazy<Py<PyModule>> =
Lazy::new(|| Python::with_gil(|py| PyModule::import(py, "polars").unwrap().unbind()));
Expand Down
123 changes: 59 additions & 64 deletions pyo3-polars/pyo3-polars/src/types.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
use std::convert::Infallible;

use super::*;

use crate::error::PyPolarsErr;
use crate::ffi::to_py::to_py_array;
use arrow;
use polars_core::datatypes::{CompatLevel, DataType};
use polars_core::prelude::*;
Expand All @@ -21,12 +17,15 @@ use pyo3::ffi::Py_uintptr_t;
use pyo3::intern;
use pyo3::prelude::*;
use pyo3::pybacked::PyBackedStr;
#[cfg(feature = "lazy")]
use pyo3::types::PyBytes;
#[cfg(feature = "dtype-struct")]
use pyo3::types::PyList;
use pyo3::types::{PyDict, PyString};

#[cfg(feature = "lazy")]
use pyo3::types::PyBytes;
use super::*;
use crate::error::PyPolarsErr;
use crate::ffi::to_py::to_py_array;

#[cfg(feature = "dtype-categorical")]
pub(crate) fn get_series(obj: &Bound<'_, PyAny>) -> PyResult<Series> {
Expand Down Expand Up @@ -100,7 +99,7 @@ impl<'py> FromPyObject<'py> for PyTimeUnit {
return Err(PyValueError::new_err(format!(
"`time_unit` must be one of {{'ns', 'us', 'ms'}}, got {v}",
)))
}
},
};
Ok(PyTimeUnit(parsed))
}
Expand Down Expand Up @@ -309,7 +308,7 @@ impl<'py> IntoPyObject<'py> for PySeries {
}

Ok(pyseries)
}
},
// Go via pyarrow
Err(_) => {
let s = self.0.rechunk();
Expand All @@ -321,7 +320,7 @@ impl<'py> IntoPyObject<'py> for PySeries {
let s = polars.call_method1("from_arrow", (arg,)).unwrap();
let s = s.call_method1("rename", (name,)).unwrap();
Ok(s)
}
},
}
}
}
Expand Down Expand Up @@ -351,13 +350,17 @@ impl<'py> IntoPyObject<'py> for PyLazyFrame {
type Error = PyErr;

fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
dbg!("into py");
use polars::prelude::PlanSerializationContext;

let polars = POLARS.bind(py);
let cls = polars.getattr("LazyFrame")?;
let instance = cls.call_method1(intern!(py, "__new__"), (&cls,)).unwrap();

let mut v = vec![];
self.0.logical_plan.serialize_versioned(&mut v, polars::prelude::PlanSerializationContext::default()).unwrap();
self.0
.logical_plan
.serialize_versioned(&mut v, PlanSerializationContext::default())
.unwrap();
instance.call_method1("__setstate__", (&v,))?;
Ok(instance)
}
Expand Down Expand Up @@ -413,108 +416,105 @@ impl<'py> IntoPyObject<'py> for PyDataType {
DataType::Int8 => {
let class = pl.getattr(intern!(py, "Int8")).unwrap();
class.call0()
}
},
DataType::Int16 => {
let class = pl.getattr(intern!(py, "Int16")).unwrap();
class.call0()
}
},
DataType::Int32 => {
let class = pl.getattr(intern!(py, "Int32")).unwrap();
class.call0()
}
},
DataType::Int64 => {
let class = pl.getattr(intern!(py, "Int64")).unwrap();
class.call0()
}
},
DataType::UInt8 => {
let class = pl.getattr(intern!(py, "UInt8")).unwrap();
class.call0()
}
},
DataType::UInt16 => {
let class = pl.getattr(intern!(py, "UInt16")).unwrap();
class.call0()
}
},
DataType::UInt32 => {
let class = pl.getattr(intern!(py, "UInt32")).unwrap();
class.call0()
}
},
DataType::UInt64 => {
let class = pl.getattr(intern!(py, "UInt64")).unwrap();
class.call0()
}
},
DataType::Float32 => {
let class = pl.getattr(intern!(py, "Float32")).unwrap();
class.call0()
}
},
DataType::Float64 | DataType::Unknown(UnknownKind::Float) => {
let class = pl.getattr(intern!(py, "Float64")).unwrap();
class.call0()
}
},
#[cfg(feature = "dtype-decimal")]
DataType::Decimal(precision, scale) => {
let class = pl.getattr(intern!(py, "Decimal")).unwrap();
let args = (*precision, *scale);
class.call1(args)
}
},
DataType::Boolean => {
let class = pl.getattr(intern!(py, "Boolean")).unwrap();
class.call0()
}
},
DataType::String | DataType::Unknown(UnknownKind::Str) => {
let class = pl.getattr(intern!(py, "String")).unwrap();
class.call0()
}
},
DataType::Binary => {
let class = pl.getattr(intern!(py, "Binary")).unwrap();
class.call0()
}
},
#[cfg(feature = "dtype-array")]
DataType::Array(inner, size) => {
let class = pl.getattr(intern!(py, "Array")).unwrap();
let inner = PyDataType(*inner.clone()).into_pyobject(py)?;
let args = (inner, *size);
class.call1(args)
}
},
DataType::List(inner) => {
let class = pl.getattr(intern!(py, "List")).unwrap();
let inner = PyDataType(*inner.clone()).into_pyobject(py)?;
class.call1((inner,))
}
},
DataType::Date => {
let class = pl.getattr(intern!(py, "Date")).unwrap();
class.call0()
}
},
DataType::Datetime(tu, tz) => {
let datetime_class = pl.getattr(intern!(py, "Datetime")).unwrap();
datetime_class.call1((tu.to_ascii(), tz.as_ref().map(|s| s.as_str())))
}
},
DataType::Duration(tu) => {
let duration_class = pl.getattr(intern!(py, "Duration")).unwrap();
duration_class.call1((tu.to_ascii(),))
}
},
#[cfg(feature = "object")]
DataType::Object(_) => {
let class = pl.getattr(intern!(py, "Object")).unwrap();
class.call0()
}
},
#[cfg(feature = "dtype-categorical")]
DataType::Categorical(_, ordering) => {
DataType::Categorical(_, _) => {
let class = pl.getattr(intern!(py, "Categorical")).unwrap();
let ordering = match ordering {
CategoricalOrdering::Physical => "physical",
CategoricalOrdering::Lexical => "lexical",
};
class.call1((ordering,))
}
class.call1(())
},
#[cfg(feature = "dtype-categorical")]
DataType::Enum(rev_map, _) => {
DataType::Enum(categories, _) => {
// we should always have an initialized rev_map coming from rust
let categories = rev_map.as_ref().unwrap().get_categories();
let class = pl.getattr(intern!(py, "Enum")).unwrap();
let s = Series::from_arrow("category".into(), categories.clone().boxed()).unwrap();
let s =
Series::from_arrow("category".into(), categories.categories().clone().boxed())
.unwrap();
let series = to_series(py, PySeries(s));
return class.call1((series,));
}
class.call1((series,))
},
DataType::Time => pl.getattr(intern!(py, "Time")),
#[cfg(feature = "dtype-struct")]
DataType::Struct(fields) => {
Expand All @@ -530,21 +530,21 @@ impl<'py> IntoPyObject<'py> for PyDataType {
let fields = PyList::new(py, iter)?;
let struct_class = pl.getattr(intern!(py, "Struct")).unwrap();
struct_class.call1((fields,))
}
},
DataType::Null => {
let class = pl.getattr(intern!(py, "Null")).unwrap();
class.call0()
}
},
DataType::Unknown(UnknownKind::Int(v)) => {
PyDataType(materialize_dyn_int(*v).dtype()).into_pyobject(py)
}
},
DataType::Unknown(_) => {
let class = pl.getattr(intern!(py, "Unknown")).unwrap();
class.call0()
}
},
DataType::BinaryOffset => {
panic!("this type isn't exposed to python")
}
},
#[allow(unreachable_patterns)]
_ => panic!("activate dtype"),
}
Expand Down Expand Up @@ -592,9 +592,13 @@ impl<'py> FromPyObject<'py> for PyDataType {
"String" => DataType::String,
"Binary" => DataType::Binary,
#[cfg(feature = "dtype-categorical")]
"Categorical" => DataType::Categorical(None, Default::default()),
"Categorical" => DataType::Categorical(Categories::global(), Categories::global().mapping()),
#[cfg(feature = "dtype-categorical")]
"Enum" => DataType::Enum(None, Default::default()),
"Enum" => {
let categories = FrozenCategories::new([]).unwrap();
let mapping = categories.mapping().clone();
DataType::Enum(categories, mapping)
},
"Date" => DataType::Date,
"Time" => DataType::Time,
"Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
Expand Down Expand Up @@ -632,26 +636,17 @@ impl<'py> FromPyObject<'py> for PyDataType {
"Binary" => DataType::Binary,
#[cfg(feature = "dtype-categorical")]
"Categorical" => {
let ordering = ob.getattr(intern!(py, "ordering")).unwrap();
let ordering = ordering.extract::<PyBackedStr>()?;
let ordering = match ordering.as_bytes() {
b"physical" => CategoricalOrdering::Physical,
b"lexical" => CategoricalOrdering::Lexical,
ordering => {
let ordering = std::str::from_utf8(ordering).unwrap();
return Err(PyValueError::new_err(format!("invalid ordering argument: {ordering}")))
}
};

DataType::Categorical(None, ordering)
DataType::Categorical(Categories::global(), Categories::global().mapping())
},
#[cfg(feature = "dtype-categorical")]
"Enum" => {
let categories = ob.getattr(intern!(py, "categories")).unwrap();
let s = get_series(&categories.as_borrowed())?;
let ca = s.str().map_err(PyPolarsErr::from)?;
let categories = ca.downcast_iter().next().unwrap().clone();
DataType::Enum(Some(Arc::new(RevMapping::build_local(categories))), Default::default())
let categories = ca.iter();
let categories = FrozenCategories::new(categories.map(|v| v.unwrap())).unwrap();
let mapping = categories.mapping().clone();
DataType::Enum(categories, mapping)
},
"Date" => DataType::Date,
"Time" => DataType::Time,
Expand Down
Loading