diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3d69440..a45b1b0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -94,6 +94,9 @@ jobs: - name: Test (abi3) run: cargo test --verbose --features pyo3/abi3-py37 + - name: Test (arbitrary_precision) + run: cargo test --verbose --features arbitrary_precision + env: RUST_BACKTRACE: 1 diff --git a/CHANGELOG.md b/CHANGELOG.md index f267740..e5b1649 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 0.28.0 - 2026-02-17 +- Add `arbitrary_precision` feature + ## 0.27.0 - 2025-11-07 - Update to PyO3 0.27 diff --git a/Cargo.toml b/Cargo.toml index 4a714a3..1ce336f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ documentation = "https://docs.rs/crate/pythonize/" [dependencies] serde = { version = "1.0", default-features = false, features = ["std"] } +serde_json = { version = "1.0", optional = true } pyo3 = { version = "0.27", default-features = false } [dev-dependencies] @@ -22,3 +23,6 @@ serde_json = "1.0" serde_bytes = "0.11" maplit = "1.0.2" serde_path_to_error = "0.1.15" + +[features] +arbitrary_precision = ["serde_json", "serde_json/arbitrary_precision"] diff --git a/README.md b/README.md index 2667523..6ade4de 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ Pythonize has two main public APIs: `pythonize` and `depythonize`. [Serde]: https://github.com/serde-rs/serde [PyO3]: https://github.com/PyO3/pyo3 -# Examples +## Examples ```rust use serde::{Serialize, Deserialize}; @@ -47,3 +47,14 @@ Python::attach(|py| { assert_eq!(new_sample, sample); }) ``` + +## Features + +### `arbitrary_precision` + +Enable support for `serde_json`'s `arbitrary_precision` feature, which allows handling numbers that exceed the range of `i128`/`u128` when converting `serde_json::Value` to and from Python. + +```toml +[dependencies] +pythonize = { version = "0.28", features = ["arbitrary_precision"] } +``` diff --git a/src/de.rs b/src/de.rs index a30dbca..b763cce 100644 --- a/src/de.rs +++ b/src/de.rs @@ -4,6 +4,9 @@ use serde::Deserialize; use crate::error::{ErrorImpl, PythonizeError, Result}; +#[cfg(feature = "arbitrary_precision")] +const TOKEN: &str = "$serde_json::private::Number"; + /// Attempt to convert a Python object to an instance of `T` pub fn depythonize<'a, 'py, T>(obj: &'a Bound<'py, PyAny>) -> Result where @@ -68,8 +71,7 @@ impl<'a, 'py> Depythonizer<'a, 'py> { } else { visitor.visit_u128(x) } - } else { - let x: i128 = int.extract()?; + } else if let Ok(x) = int.extract::() { if let Ok(x) = i8::try_from(x) { visitor.visit_i8(x) } else if let Ok(x) = i16::try_from(x) { @@ -81,6 +83,19 @@ impl<'a, 'py> Depythonizer<'a, 'py> { } else { visitor.visit_i128(x) } + } else { + #[cfg(feature = "arbitrary_precision")] + { + visitor.visit_map(NumberDeserializer { + number: Some(int.to_string()), + }) + } + #[cfg(not(feature = "arbitrary_precision"))] + { + // Re-attempt to return the original error. + let _: i128 = int.extract()?; + unreachable!() + } } } } @@ -513,6 +528,34 @@ impl<'de> de::VariantAccess<'de> for PyEnumAccess<'_, '_> { } } +// See serde_json +#[cfg(feature = "arbitrary_precision")] +struct NumberDeserializer { + number: Option, +} + +#[cfg(feature = "arbitrary_precision")] +impl<'de> de::MapAccess<'de> for NumberDeserializer { + type Error = PythonizeError; + + fn next_key_seed(&mut self, seed: K) -> Result> + where + K: de::DeserializeSeed<'de>, + { + if self.number.is_none() { + return Ok(None); + } + seed.deserialize(TOKEN.into_deserializer()).map(Some) + } + + fn next_value_seed(&mut self, seed: V) -> Result + where + V: de::DeserializeSeed<'de>, + { + seed.deserialize(self.number.take().unwrap().into_deserializer()) + } +} + #[cfg(test)] mod test { use std::ffi::CStr; diff --git a/src/ser.rs b/src/ser.rs index c8e6dd1..425b358 100644 --- a/src/ser.rs +++ b/src/ser.rs @@ -4,6 +4,8 @@ use pyo3::types::{ PyDict, PyDictMethods, PyList, PyListMethods, PyMapping, PySequence, PyString, PyTuple, PyTupleMethods, }; +#[cfg(feature = "arbitrary_precision")] +use pyo3::types::{PyAnyMethods, PyFloat, PyInt}; use pyo3::{Bound, BoundObject, IntoPyObject, PyAny, PyResult, Python}; use serde::{ser, Serialize}; @@ -229,6 +231,21 @@ pub struct PythonStructVariantSerializer<'py, P: PythonizeTypes> { inner: PythonStructDictSerializer<'py, P>, } +#[cfg(feature = "arbitrary_precision")] +#[doc(hidden)] +pub enum StructSerializer<'py, P: PythonizeTypes> { + Struct(PythonStructDictSerializer<'py, P>), + Number { + py: Python<'py>, + number_string: Option, + _types: PhantomData

, + }, +} + +#[cfg(not(feature = "arbitrary_precision"))] +#[doc(hidden)] +pub type StructSerializer<'py, P> = PythonStructDictSerializer<'py, P>; + #[doc(hidden)] pub struct PythonStructDictSerializer<'py, P: PythonizeTypes> { py: Python<'py>, @@ -266,7 +283,7 @@ impl<'py, P: PythonizeTypes> ser::Serializer for Pythonizer<'py, P> { type SerializeTupleStruct = PythonCollectionSerializer<'py, P>; type SerializeTupleVariant = PythonTupleVariantSerializer<'py, P>; type SerializeMap = PythonMapSerializer<'py, P>; - type SerializeStruct = PythonStructDictSerializer<'py, P>; + type SerializeStruct = StructSerializer<'py, P>; type SerializeStructVariant = PythonStructVariantSerializer<'py, P>; fn serialize_bool(self, v: bool) -> Result> { @@ -439,12 +456,34 @@ impl<'py, P: PythonizeTypes> ser::Serializer for Pythonizer<'py, P> { self, name: &'static str, len: usize, - ) -> Result> { - Ok(PythonStructDictSerializer { - py: self.py, - builder: P::NamedMap::builder(self.py, len, name)?, - _types: PhantomData, - }) + ) -> Result> { + #[cfg(feature = "arbitrary_precision")] + { + // With arbitrary_precision enabled, a serde_json::Number serializes as a "$serde_json::private::Number" + // struct with a "$serde_json::private::Number" field, whose value is the String in Number::n. + if name == "$serde_json::private::Number" && len == 1 { + return Ok(StructSerializer::Number { + py: self.py, + number_string: None, + _types: PhantomData, + }); + } + + Ok(StructSerializer::Struct(PythonStructDictSerializer { + py: self.py, + builder: P::NamedMap::builder(self.py, len, name)?, + _types: PhantomData, + })) + } + + #[cfg(not(feature = "arbitrary_precision"))] + { + Ok(PythonStructDictSerializer { + py: self.py, + builder: P::NamedMap::builder(self.py, len, name)?, + _types: PhantomData, + }) + } } fn serialize_struct_variant( @@ -569,6 +608,58 @@ impl<'py, P: PythonizeTypes> ser::SerializeMap for PythonMapSerializer<'py, P> { } } +#[cfg(feature = "arbitrary_precision")] +impl<'py, P: PythonizeTypes> ser::SerializeStruct for StructSerializer<'py, P> { + type Ok = Bound<'py, PyAny>; + type Error = PythonizeError; + + fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + match self { + StructSerializer::Struct(s) => s.serialize_field(key, value), + StructSerializer::Number { number_string, .. } => { + let serde_json::Value::String(s) = value + .serialize(serde_json::value::Serializer) + .map_err(|e| PythonizeError::msg(format!("Failed to serialize number: {}", e)))? + else { + return Err(PythonizeError::msg("Expected string in serde_json::Number")); + }; + + *number_string = Some(s); + Ok(()) + } + } + } + + fn end(self) -> Result> { + match self { + StructSerializer::Struct(s) => s.end(), + StructSerializer::Number { + py, number_string: Some(s), .. + } => { + if let Ok(i) = s.parse::() { + return Ok(PyInt::new(py, i).into_any()); + } + if let Ok(u) = s.parse::() { + return Ok(PyInt::new(py, u).into_any()); + } + if s.chars().any(|c| c == '.' || c == 'e' || c == 'E') { + if let Ok(f) = s.parse::() { + return Ok(PyFloat::new(py, f).into_any()); + } + } + // Fall back to Python's int() constructor, which supports arbitrary precision. + py.get_type::() + .call1((s.as_str(),)) + .map_err(|e| PythonizeError::msg(format!("Invalid number: {}", e))) + } + StructSerializer::Number { .. } => Err(PythonizeError::msg("Empty serde_json::Number")), + } + } +} + impl<'py, P: PythonizeTypes> ser::SerializeStruct for PythonStructDictSerializer<'py, P> { type Ok = Bound<'py, PyAny>; type Error = PythonizeError; diff --git a/tests/test_arbitrary_precision.rs b/tests/test_arbitrary_precision.rs new file mode 100644 index 0000000..dc5b38f --- /dev/null +++ b/tests/test_arbitrary_precision.rs @@ -0,0 +1,108 @@ +#![cfg(feature = "arbitrary_precision")] + +use pyo3::prelude::*; +use pythonize::{depythonize, pythonize}; +use serde_json::Value; + +#[test] +fn test_greater_than_u64_max() { + Python::attach(|py| { + let json_str = r#"18446744073709551616"#; + let value: Value = serde_json::from_str(json_str).unwrap(); + let result = pythonize(py, &value).unwrap(); + let number_str = result.str().unwrap().to_string(); + + assert!(result.is_instance_of::()); + assert_eq!(number_str, "18446744073709551616"); + }); +} + +#[test] +fn test_less_than_i64_min() { + Python::attach(|py| { + let json_str = r#"-9223372036854775809"#; + let value: Value = serde_json::from_str(json_str).unwrap(); + let result = pythonize(py, &value).unwrap(); + let number_str = result.str().unwrap().to_string(); + + assert!(result.is_instance_of::()); + assert_eq!(number_str, "-9223372036854775809"); + }); +} + +#[test] +fn test_float() { + Python::attach(|py| { + let json_str = r#"3.141592653589793238"#; + let value: Value = serde_json::from_str(json_str).unwrap(); + let result = pythonize(py, &value).unwrap(); + let num: f32 = result.extract().unwrap(); + + assert!(result.is_instance_of::()); + assert_eq!(num, 3.141592653589793238); // not {'$serde_json::private::Number': ...} + }); +} + +#[test] +fn test_int() { + Python::attach(|py| { + let json_str = r#"2"#; + let value: Value = serde_json::from_str(json_str).unwrap(); + let result = pythonize(py, &value).unwrap(); + let num: i32 = result.extract().unwrap(); + + assert!(result.is_instance_of::()); + assert_eq!(num, 2); // not {'$serde_json::private::Number': '2'} + }); +} + +#[test] +fn test_serde_error_if_token_empty() { + let json_str = r#"{"$serde_json::private::Number": ""}"#; + let result: Result = serde_json::from_str(json_str); + + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .contains("EOF while parsing a value")); +} + +#[test] +fn test_serde_error_if_token_invalid() { + let json_str = r#"{"$serde_json::private::Number": 2}"#; + let result: Result = serde_json::from_str(json_str); + + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .contains("invalid type: integer `2`, expected string containing a number")); +} + +#[test] +fn test_token_valid() { + Python::attach(|py| { + let json_str = r#"{"$serde_json::private::Number": "2"}"#; + let value: Value = serde_json::from_str(json_str).unwrap(); + let result = pythonize(py, &value).unwrap(); + let num: i32 = result.extract().unwrap(); + + assert!(result.is_instance_of::()); + assert_eq!(num, 2); + }); +} + +#[test] +fn test_depythonize_greater_than_u128_max() { + Python::attach(|py| { + // u128::MAX + 1 + let py_int = py + .eval(c"340282366920938463463374607431768211456", None, None) + .unwrap(); + let value: Value = depythonize(&py_int).unwrap(); + + assert!(value.is_number()); + assert_eq!(value.to_string(), "340282366920938463463374607431768211456"); + }); +}