diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..f8b19380 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +crates/numcodecs-wasm-host-reproducible/tests/round.wasm filter=lfs diff=lfs merge=lfs -text diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0bcabaad..f15ceb7b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -114,6 +114,8 @@ jobs: steps: - name: Checkout the Repository uses: actions/checkout@v2 + with: + lfs: true - name: Install the Rust toolchain uses: actions-rs/toolchain@v1 diff --git a/Cargo.toml b/Cargo.toml index 7d98125d..ee06421e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -81,7 +81,7 @@ burn = { version = "0.17", default-features = false } clap = { version = "4.5", default-features = false } convert_case = { version = "0.8", default-features = false } format_serde_error = { version = "0.3", default-features = false } -indexmap = { version = "2.7.1", default-features = false } +indexmap = { version = "2.10", default-features = false } itertools = { version = "0.14", default-features = false } log = { version = "0.4.27", default-features = false } simple_logger = { version = "5.0", default-features = false } @@ -100,7 +100,7 @@ pyo3-error = { version = "0.5", default-features = false } pyo3-log = { version = "0.12.4", default-features = false } pythonize = { version = "0.25", default-features = false } rand = { version = "0.9.1", default-features = false } -schemars = { version = "=1.0.0-alpha.15", default-features = false } +schemars = { version = "1.0.3", default-features = false } scratch = { version = "1.0", default-features = false } semver = { version = "1.0.23", default-features = false } serde = { version = "1.0.218", default-features = false } @@ -122,7 +122,7 @@ wasmtime = { version = "33.0", default-features = false } wasmtime_runtime_layer = { version = "33.0", default-features = false } wasm-encoder = { version = "0.235", default-features = false } wasm_runtime_layer = { version = "0.5", default-features = false } -wit-bindgen = { version = "0.42", default-features = false } +wit-bindgen = { version = "0.43", default-features = false } wit-component = { version = "0.235", default-features = false } wit-parser = { version = "0.235", default-features = false } wyhash = { version = "0.6", default-features = false } diff --git a/codecs/fourier-network/tests/schema.json b/codecs/fourier-network/tests/schema.json index e6cdafc7..e97ea2dc 100644 --- a/codecs/fourier-network/tests/schema.json +++ b/codecs/fourier-network/tests/schema.json @@ -37,7 +37,7 @@ ], "format": "uint", "minimum": 1, - "description": "The optional mini-batch size used during training\n\n Setting the mini-batch size to `None` disables the use of batching,\n i.e. the network is trained using one large batch that includes the\n full data." + "description": "The optional mini-batch size used during training\n\nSetting the mini-batch size to `None` disables the use of batching,\ni.e. the network is trained using one large batch that includes the\nfull data." }, "seed": { "type": "integer", @@ -61,7 +61,7 @@ "mini_batch_size", "seed" ], - "description": "Fourier network codec which trains and overfits a fourier feature neural\n network on encoding and predicts during decoding.\n\n The approach is based on the papers by Tancik et al. 2020\n ()\n and by Huang and Hoefler 2020 ().", + "description": "Fourier network codec which trains and overfits a fourier feature neural\nnetwork on encoding and predicts during decoding.\n\nThe approach is based on the papers by Tancik et al. 2020\n()\nand by Huang and Hoefler 2020 ().", "title": "FourierNetworkCodec", "$schema": "https://json-schema.org/draft/2020-12/schema" } \ No newline at end of file diff --git a/codecs/jpeg2000/src/ffi/image.rs b/codecs/jpeg2000/src/ffi/image.rs index 4172b045..b0a0b4d1 100644 --- a/codecs/jpeg2000/src/ffi/image.rs +++ b/codecs/jpeg2000/src/ffi/image.rs @@ -25,8 +25,9 @@ impl Image { ) -> Result { let mut image = std::ptr::null_mut(); - if unsafe { openjpeg_sys::opj_read_header(stream.as_raw(), decoder.as_raw(), &mut image) } - != 1 + if unsafe { + openjpeg_sys::opj_read_header(stream.as_raw(), decoder.as_raw(), &raw mut image) + } != 1 { return Err(Jpeg2000Error::InvalidMainHeader); } @@ -56,7 +57,7 @@ impl Image { let image = NonNull::new(unsafe { openjpeg_sys::opj_image_create( 1, - &mut image_params, + &raw mut image_params, openjpeg_sys::OPJ_COLOR_SPACE::OPJ_CLRSPC_GRAY, ) }) diff --git a/codecs/jpeg2000/src/ffi/mod.rs b/codecs/jpeg2000/src/ffi/mod.rs index 778014a1..c775aa15 100644 --- a/codecs/jpeg2000/src/ffi/mod.rs +++ b/codecs/jpeg2000/src/ffi/mod.rs @@ -103,7 +103,7 @@ pub fn encode_into( let mut image = Image::from_gray_data(data, width, height)?; if unsafe { - openjpeg_sys::opj_setup_encoder(encoder.as_raw(), &mut encode_params, image.as_raw()) + openjpeg_sys::opj_setup_encoder(encoder.as_raw(), &raw mut encode_params, image.as_raw()) } != 1 { return Err(Jpeg2000Error::EncoderSetupError); @@ -136,7 +136,7 @@ pub fn decode(bytes: &[u8]) -> Result<(Vec, (usize, usize let mut decode_params = unsafe { decode_params.assume_init() }; decode_params.decod_format = 1; // JP2 - if unsafe { openjpeg_sys::opj_setup_decoder(decoder.as_raw(), &mut decode_params) } != 1 { + if unsafe { openjpeg_sys::opj_setup_decoder(decoder.as_raw(), &raw mut decode_params) } != 1 { return Err(Jpeg2000Error::DecoderSetupError); } diff --git a/codecs/jpeg2000/tests/schema.json b/codecs/jpeg2000/tests/schema.json index 59e96fa2..dc712c7c 100644 --- a/codecs/jpeg2000/tests/schema.json +++ b/codecs/jpeg2000/tests/schema.json @@ -54,7 +54,7 @@ "description": "Lossless compression" } ], - "description": "Codec providing compression using JPEG 2000.\n\n Arrays that are higher-dimensional than 2D are encoded by compressing each\n 2D slice with JPEG 2000 independently. Specifically, the array's shape is\n interpreted as `[.., height, width]`. If you want to compress 2D slices\n along two different axes, you can swizzle the array axes beforehand.", + "description": "Codec providing compression using JPEG 2000.\n\nArrays that are higher-dimensional than 2D are encoded by compressing each\n2D slice with JPEG 2000 independently. Specifically, the array's shape is\ninterpreted as `[.., height, width]`. If you want to compress 2D slices\nalong two different axes, you can swizzle the array axes beforehand.", "properties": { "_version": { "type": "string", diff --git a/codecs/pco/tests/schema.json b/codecs/pco/tests/schema.json index 3cb2c33d..ed8f127b 100644 --- a/codecs/pco/tests/schema.json +++ b/codecs/pco/tests/schema.json @@ -21,7 +21,7 @@ 11, 12 ], - "description": "Compression level, ranging from 0 (weak) over 8 (very good) to 12\n (expensive)" + "description": "Compression level, ranging from 0 (weak) over 8 (very good) to 12\n(expensive)" }, "_version": { "type": "string", @@ -46,7 +46,7 @@ "required": [ "mode" ], - "description": "Automatically detects a good mode.\n\n This works well most of the time, but costs some compression time and\n can select a bad mode in adversarial cases." + "description": "Automatically detects a good mode.\n\nThis works well most of the time, but costs some compression time and\ncan select a bad mode in adversarial cases." }, { "type": "object", @@ -63,7 +63,7 @@ }, { "type": "object", - "description": "Tries using the `FloatMult` mode with a given base.\n\n Only applies to floating-point types.", + "description": "Tries using the `FloatMult` mode with a given base.\n\nOnly applies to floating-point types.", "properties": { "float_mult_base": { "type": "number", @@ -82,7 +82,7 @@ }, { "type": "object", - "description": "Tries using the `FloatQuant` mode with the given number of bits of\n quantization.\n\n Only applies to floating-point types.", + "description": "Tries using the `FloatQuant` mode with the given number of bits of\nquantization.\n\nOnly applies to floating-point types.", "properties": { "float_quant_bits": { "type": "integer", @@ -102,7 +102,7 @@ }, { "type": "object", - "description": "Tries using the `IntMult` mode with a given base.\n\n Only applies to integer types.", + "description": "Tries using the `IntMult` mode with a given base.\n\nOnly applies to integer types.", "properties": { "int_mult_base": { "type": "integer", @@ -135,7 +135,7 @@ "required": [ "delta" ], - "description": "Automatically detects a detects a good delta encoding.\n\n This works well most of the time, but costs some compression time and\n can select a bad delta encoding in adversarial cases." + "description": "Automatically detects a detects a good delta encoding.\n\nThis works well most of the time, but costs some compression time and\ncan select a bad delta encoding in adversarial cases." }, { "type": "object", @@ -148,11 +148,11 @@ "required": [ "delta" ], - "description": "Never uses delta encoding.\n\n This is best if your data is in a random order or adjacent numbers have\n no relation to each other." + "description": "Never uses delta encoding.\n\nThis is best if your data is in a random order or adjacent numbers have\nno relation to each other." }, { "type": "object", - "description": "Tries taking nth order consecutive deltas.\n\n Supports a delta encoding order up to 7. For instance, 1st order is\n just regular delta encoding, 2nd is deltas-of-deltas, etc. It is legal\n to use 0th order, but it is identical to None.", + "description": "Tries taking nth order consecutive deltas.\n\nSupports a delta encoding order up to 7. For instance, 1st order is\njust regular delta encoding, 2nd is deltas-of-deltas, etc. It is legal\nto use 0th order, but it is identical to None.", "properties": { "delta_encoding_order": { "type": "integer", @@ -189,7 +189,7 @@ "required": [ "delta" ], - "description": "Tries delta encoding according to an extra latent variable of\n \"lookback\".\n\n This can improve compression ratio when there are nontrivial patterns\n in the array, but reduces compression speed substantially." + "description": "Tries delta encoding according to an extra latent variable of\n\"lookback\".\n\nThis can improve compression ratio when there are nontrivial patterns\nin the array, but reduces compression speed substantially." } ] } @@ -197,7 +197,7 @@ "oneOf": [ { "type": "object", - "description": "Divide the chunk into equal pages of up to this many numbers.\n\n For example, with equal pages up to 100,000, a chunk of 150,000 numbers\n would be divided into 2 pages, each of 75,000 numbers.", + "description": "Divide the chunk into equal pages of up to this many numbers.\n\nFor example, with equal pages up to 100,000, a chunk of 150,000 numbers\nwould be divided into 2 pages, each of 75,000 numbers.", "properties": { "equal_pages_up_to": { "type": "integer", diff --git a/codecs/random-projection/tests/schema.json b/codecs/random-projection/tests/schema.json index b5f7bb63..9f48b20f 100644 --- a/codecs/random-projection/tests/schema.json +++ b/codecs/random-projection/tests/schema.json @@ -17,7 +17,7 @@ "required": [ "seed" ], - "description": "Codec that uses random projections to reduce the dimensionality of high-\n dimensional data to compress it.\n\n A two-dimensional array of shape `$N \\times D$` is encoded as n array of\n shape `$N \\times K$`, where `$K$` is either set explicitly or chosen using\n the the Johnson-Lindenstrauss lemma. For `$K$` to be smaller than `$D$`,\n `$D$` must be quite large. Therefore, this codec should only applied on\n large datasets as it otherwise significantly inflates the data size instead\n of reducing it.\n\n Choosing a lower distortion rate `epsilon` will improve the quality of the\n lossy compression, i.e. reduce the compression error, at the cost of\n increasing `$K$`.\n\n This codec only supports finite floating point data.", + "description": "Codec that uses random projections to reduce the dimensionality of high-\ndimensional data to compress it.\n\nA two-dimensional array of shape `$N \\times D$` is encoded as n array of\nshape `$N \\times K$`, where `$K$` is either set explicitly or chosen using\nthe the Johnson-Lindenstrauss lemma. For `$K$` to be smaller than `$D$`,\n`$D$` must be quite large. Therefore, this codec should only applied on\nlarge datasets as it otherwise significantly inflates the data size instead\nof reducing it.\n\nChoosing a lower distortion rate `epsilon` will improve the quality of the\nlossy compression, i.e. reduce the compression error, at the cost of\nincreasing `$K$`.\n\nThis codec only supports finite floating point data.", "allOf": [ { "oneOf": [ @@ -39,7 +39,7 @@ "reduction", "epsilon" ], - "description": "The reduced dimensionality `$K$` is derived from `epsilon`, as defined\n by the Johnson-Lindenstrauss lemma." + "description": "The reduced dimensionality `$K$` is derived from `epsilon`, as defined\nby the Johnson-Lindenstrauss lemma." }, { "type": "object", @@ -59,7 +59,7 @@ "reduction", "k" ], - "description": "The reduced dimensionality `$K$`, to which the data is projected, is\n given explicitly." + "description": "The reduced dimensionality `$K$`, to which the data is projected, is\ngiven explicitly." } ] }, @@ -76,7 +76,7 @@ "required": [ "projection" ], - "description": "The random projection matrix is dense and its components are sampled\n from `$\\text{N}\\left( 0, \\frac{1}{k} \\right)$`" + "description": "The random projection matrix is dense and its components are sampled\nfrom `$\\text{N}\\left( 0, \\frac{1}{k} \\right)$`" }, { "type": "object", @@ -88,7 +88,7 @@ ], "exclusiveMinimum": 0.0, "maximum": 1.0, - "description": "The `density` of the sparse projection matrix.\n\n Setting `density` to `$\\frac{1}{3}$` reproduces the settings by\n Achlioptas [^1]. If `density` is `None`, it is set to\n `$\\frac{1}{\\sqrt{d}}$`,\n the minimum density as recommended by Li et al [^2].\n\n\n [^1]: Achlioptas, D. (2003). Database-friendly random projections:\n Johnson-Lindenstrauss with binary coins. *Journal of Computer\n and System Sciences*, 66(4), 671-687. Available from:\n [doi:10.1016/S0022-0000(03)00025-4](https://doi.org/10.1016/S0022-0000(03)00025-4).\n\n [^2]: Li, P., Hastie, T. J., and Church, K. W. (2006). Very sparse\n random projections. In *Proceedings of the 12th ACM SIGKDD\n international conference on Knowledge discovery and data\n mining (KDD '06)*. Association for Computing Machinery, New\n York, NY, USA, 287–296. Available from:\n [doi:10.1145/1150402.1150436](https://doi.org/10.1145/1150402.1150436)." + "description": "The `density` of the sparse projection matrix.\n\nSetting `density` to `$\\frac{1}{3}$` reproduces the settings by\nAchlioptas [^1]. If `density` is `None`, it is set to\n`$\\frac{1}{\\sqrt{d}}$`,\nthe minimum density as recommended by Li et al [^2].\n\n\n[^1]: Achlioptas, D. (2003). Database-friendly random projections:\n Johnson-Lindenstrauss with binary coins. *Journal of Computer\n and System Sciences*, 66(4), 671-687. Available from:\n [doi:10.1016/S0022-0000(03)00025-4](https://doi.org/10.1016/S0022-0000(03)00025-4).\n\n[^2]: Li, P., Hastie, T. J., and Church, K. W. (2006). Very sparse\n random projections. In *Proceedings of the 12th ACM SIGKDD\n international conference on Knowledge discovery and data\n mining (KDD '06)*. Association for Computing Machinery, New\n York, NY, USA, 287–296. Available from:\n [doi:10.1145/1150402.1150436](https://doi.org/10.1145/1150402.1150436)." }, "projection": { "type": "string", @@ -98,7 +98,7 @@ "required": [ "projection" ], - "description": "The random projection matrix is sparse where only `density`% of entries\n are non-zero.\n\n The matrix's components are sampled from\n\n - `$-\\sqrt{\\frac{1}{k \\cdot density}}$` with probability\n `$0.5 \\cdot density$`\n - `$0$` with probability `$1 - density$`\n - `$+\\sqrt{\\frac{1}{k \\cdot density}}$` with probability\n `$0.5 \\cdot density$`" + "description": "The random projection matrix is sparse where only `density`% of entries\nare non-zero.\n\nThe matrix's components are sampled from\n\n- `$-\\sqrt{\\frac{1}{k \\cdot density}}$` with probability\n `$0.5 \\cdot density$`\n- `$0$` with probability `$1 - density$`\n- `$+\\sqrt{\\frac{1}{k \\cdot density}}$` with probability\n `$0.5 \\cdot density$`" } ] } diff --git a/codecs/sperr/tests/schema.json b/codecs/sperr/tests/schema.json index ac46d915..d849d6e2 100644 --- a/codecs/sperr/tests/schema.json +++ b/codecs/sperr/tests/schema.json @@ -60,7 +60,7 @@ "description": "Fixed point-wise (absolute) error" } ], - "description": "Codec providing compression using SPERR.\n\n Arrays that are higher-dimensional than 3D are encoded by compressing each\n 3D slice with SPERR independently. Specifically, the array's shape is\n interpreted as `[.., depth, height, width]`. If you want to compress 3D\n slices along three different axes, you can swizzle the array axes\n beforehand.", + "description": "Codec providing compression using SPERR.\n\nArrays that are higher-dimensional than 3D are encoded by compressing each\n3D slice with SPERR independently. Specifically, the array's shape is\ninterpreted as `[.., depth, height, width]`. If you want to compress 3D\nslices along three different axes, you can swizzle the array axes\nbeforehand.", "properties": { "_version": { "type": "string", diff --git a/codecs/sz3/tests/schema.json b/codecs/sz3/tests/schema.json index 70d52aef..c32d21c6 100644 --- a/codecs/sz3/tests/schema.json +++ b/codecs/sz3/tests/schema.json @@ -121,7 +121,7 @@ "oneOf": [ { "type": "object", - "description": "Errors are bounded by *both* the absolute and relative error, i.e. by\n whichever bound is stricter", + "description": "Errors are bounded by *both* the absolute and relative error, i.e. by\nwhichever bound is stricter", "properties": { "eb_abs": { "type": "number", @@ -146,7 +146,7 @@ }, { "type": "object", - "description": "Errors are bounded by *either* the absolute or relative error, i.e. by\n whichever bound is weaker", + "description": "Errors are bounded by *either* the absolute or relative error, i.e. by\nwhichever bound is weaker", "properties": { "eb_abs": { "type": "number", diff --git a/codecs/zfp-classic/tests/schema.json b/codecs/zfp-classic/tests/schema.json index c3427b23..710e8594 100644 --- a/codecs/zfp-classic/tests/schema.json +++ b/codecs/zfp-classic/tests/schema.json @@ -27,7 +27,7 @@ "min_exp": { "type": "integer", "format": "int32", - "description": "Smallest absolute bit plane number encoded.\n\n This parameter applies to floating-point data only and is ignored\n for integer data." + "description": "Smallest absolute bit plane number encoded.\n\nThis parameter applies to floating-point data only and is ignored\nfor integer data." }, "mode": { "type": "string", @@ -44,7 +44,7 @@ }, { "type": "object", - "description": "In fixed-rate mode, each d-dimensional compressed block of `$4^d$`\n values is stored using a fixed number of bits. This number of\n compressed bits per block is amortized over the `$4^d$` values to give\n a rate of `$rate = \\frac{maxbits}{4^d}$` in bits per value.", + "description": "In fixed-rate mode, each d-dimensional compressed block of `$4^d$`\nvalues is stored using a fixed number of bits. This number of\ncompressed bits per block is amortized over the `$4^d$` values to give\na rate of `$rate = \\frac{maxbits}{4^d}$` in bits per value.", "properties": { "rate": { "type": "number", @@ -63,7 +63,7 @@ }, { "type": "object", - "description": "In fixed-precision mode, the number of bits used to encode a block may\n vary, but the number of bit planes (the precision) encoded for the\n transform coefficients is fixed.", + "description": "In fixed-precision mode, the number of bits used to encode a block may\nvary, but the number of bit planes (the precision) encoded for the\ntransform coefficients is fixed.", "properties": { "precision": { "type": "integer", @@ -83,7 +83,7 @@ }, { "type": "object", - "description": "In fixed-accuracy mode, all transform coefficient bit planes up to a\n minimum bit plane number are encoded. The smallest absolute bit plane\n number is chosen such that\n `$minexp = \\text{floor}(\\log_{2}(tolerance))$`.", + "description": "In fixed-accuracy mode, all transform coefficient bit planes up to a\nminimum bit plane number are encoded. The smallest absolute bit plane\nnumber is chosen such that\n`$minexp = \\text{floor}(\\log_{2}(tolerance))$`.", "properties": { "tolerance": { "type": "number", @@ -111,7 +111,7 @@ "required": [ "mode" ], - "description": "Lossless per-block compression that preserves integer and floating point\n bit patterns." + "description": "Lossless per-block compression that preserves integer and floating point\nbit patterns." } ], "description": "Codec providing compression using ZFP (classic)", diff --git a/codecs/zfp/tests/schema.json b/codecs/zfp/tests/schema.json index d63a7f52..da4b2f9b 100644 --- a/codecs/zfp/tests/schema.json +++ b/codecs/zfp/tests/schema.json @@ -27,7 +27,7 @@ "min_exp": { "type": "integer", "format": "int32", - "description": "Smallest absolute bit plane number encoded.\n\n This parameter applies to floating-point data only and is ignored\n for integer data." + "description": "Smallest absolute bit plane number encoded.\n\nThis parameter applies to floating-point data only and is ignored\nfor integer data." }, "mode": { "type": "string", @@ -44,7 +44,7 @@ }, { "type": "object", - "description": "In fixed-rate mode, each d-dimensional compressed block of `$4^d$`\n values is stored using a fixed number of bits. This number of\n compressed bits per block is amortized over the `$4^d$` values to give\n a rate of `$rate = \\frac{maxbits}{4^d}$` in bits per value.", + "description": "In fixed-rate mode, each d-dimensional compressed block of `$4^d$`\nvalues is stored using a fixed number of bits. This number of\ncompressed bits per block is amortized over the `$4^d$` values to give\na rate of `$rate = \\frac{maxbits}{4^d}$` in bits per value.", "properties": { "rate": { "type": "number", @@ -63,7 +63,7 @@ }, { "type": "object", - "description": "In fixed-precision mode, the number of bits used to encode a block may\n vary, but the number of bit planes (the precision) encoded for the\n transform coefficients is fixed.", + "description": "In fixed-precision mode, the number of bits used to encode a block may\nvary, but the number of bit planes (the precision) encoded for the\ntransform coefficients is fixed.", "properties": { "precision": { "type": "integer", @@ -83,7 +83,7 @@ }, { "type": "object", - "description": "In fixed-accuracy mode, all transform coefficient bit planes up to a\n minimum bit plane number are encoded. The smallest absolute bit plane\n number is chosen such that\n `$minexp = \\text{floor}(\\log_{2}(tolerance))$`.", + "description": "In fixed-accuracy mode, all transform coefficient bit planes up to a\nminimum bit plane number are encoded. The smallest absolute bit plane\nnumber is chosen such that\n`$minexp = \\text{floor}(\\log_{2}(tolerance))$`.", "properties": { "tolerance": { "type": "number", @@ -111,7 +111,7 @@ "required": [ "mode" ], - "description": "Lossless per-block compression that preserves integer and floating point\n bit patterns." + "description": "Lossless per-block compression that preserves integer and floating point\nbit patterns." } ], "description": "Codec providing compression using ZFP", diff --git a/crates/numcodecs-python/src/schema.rs b/crates/numcodecs-python/src/schema.rs index f9dc4cee..96988362 100644 --- a/crates/numcodecs-python/src/schema.rs +++ b/crates/numcodecs-python/src/schema.rs @@ -137,7 +137,7 @@ pub fn docs_from_schema(schema: &Schema) -> Option { let mut docs = String::new(); if let Some(Value::String(description)) = schema.get("description") { - docs.push_str(&derust_doc_comment(description)); + docs.push_str(description); docs.push_str("\n\n"); } @@ -307,7 +307,7 @@ fn extend_parameters_from_one_of_schema<'a>( _ => &[], }; let variant_docs = match schema.get("description") { - Some(Value::String(docs)) => Some(derust_doc_comment(docs)), + Some(Value::String(docs)) => Some(docs), _ => None, }; @@ -321,7 +321,7 @@ fn extend_parameters_from_one_of_schema<'a>( name, parameter, required, - variant_docs.clone(), + variant_docs.map(|x| Cow::Borrowed(x.as_str())), )); } Entry::Occupied(mut entry) => { @@ -330,7 +330,7 @@ fn extend_parameters_from_one_of_schema<'a>( name, parameter, required, - variant_docs.clone(), + variant_docs.map(|x| Cow::Borrowed(x.as_str())), ); } } @@ -352,22 +352,6 @@ fn extend_parameters_from_one_of_schema<'a>( } } -fn derust_doc_comment(docs: &str) -> Cow { - if docs.trim() != docs { - return Cow::Borrowed(docs); - } - - if !docs - .split('\n') - .skip(1) - .all(|l| l.trim().is_empty() || l.starts_with(' ')) - { - return Cow::Borrowed(docs); - } - - Cow::Owned(docs.replace("\n ", "\n")) -} - #[derive(Debug, Error)] pub enum SchemaError { #[error("codec class' cached config schema is invalid")] @@ -412,7 +396,7 @@ impl<'a> Parameter<'a> { .any(|r| matches!(r, Value::String(n) if n == name)), default: parameter.get("default"), docs: match parameter.get("description") { - Some(Value::String(docs)) => Some(derust_doc_comment(docs)), + Some(Value::String(docs)) => Some(Cow::Borrowed(docs.as_str())), _ => None, }, } @@ -541,7 +525,7 @@ mod tests { fn schema() { assert_eq!( format!("{}", schema_for!(MyCodec).to_value()), - r#"{"type":"object","properties":{"param":{"type":["integer","null"],"format":"int32","description":"An optional integer value."}},"unevaluatedProperties":false,"oneOf":[{"type":"object","description":"Mode a.\n\n It gets another line.","properties":{"value":{"type":"boolean","description":"A boolean value. And some really, really, really, long first\n line that wraps around.\n\n With multiple lines of comments."},"common":{"type":"string","description":"A common string value.\n\n Something else here."},"mode":{"type":"string","const":"A"}},"required":["mode","value","common"]},{"type":"object","description":"Mode b.","properties":{"common":{"type":"string","description":"A common string value.\n\n Something else here."},"mode":{"type":"string","const":"B"}},"required":["mode","common"]}],"description":"A codec that does something on encoding and decoding.\n\n With multiple lines of comments.","title":"MyCodec","$schema":"https://json-schema.org/draft/2020-12/schema"}"# + r#"{"type":"object","properties":{"param":{"type":["integer","null"],"format":"int32","description":"An optional integer value."}},"unevaluatedProperties":false,"oneOf":[{"type":"object","description":"Mode a.\n\nIt gets another line.","properties":{"value":{"type":"boolean","description":"A boolean value. And some really, really, really, long first\nline that wraps around.\n\nWith multiple lines of comments."},"common":{"type":"string","description":"A common string value.\n\nSomething else here."},"mode":{"type":"string","const":"A"}},"required":["mode","value","common"]},{"type":"object","description":"Mode b.","properties":{"common":{"type":"string","description":"A common string value.\n\nSomething else here."},"mode":{"type":"string","const":"B"}},"required":["mode","common"]}],"description":"A codec that does something on encoding and decoding.\n\nWith multiple lines of comments.","title":"MyCodec","$schema":"https://json-schema.org/draft/2020-12/schema"}"# ); } diff --git a/crates/numcodecs-wasm-host-reproducible/Cargo.toml b/crates/numcodecs-wasm-host-reproducible/Cargo.toml index 8de9011e..0c454105 100644 --- a/crates/numcodecs-wasm-host-reproducible/Cargo.toml +++ b/crates/numcodecs-wasm-host-reproducible/Cargo.toml @@ -42,5 +42,11 @@ wit-component = { workspace = true } wit-parser = { workspace = true } vecmap-rs = { workspace = true } +[dev-dependencies] +ndarray = { workspace = true } +ndarray-rand = { workspace = true } +wasmtime = { workspace = true, features = ["runtime", "cranelift", "cache", "gc-null"] } +wasmtime_runtime_layer = { workspace = true } + [lints] workspace = true diff --git a/crates/numcodecs-wasm-host-reproducible/src/lib.rs b/crates/numcodecs-wasm-host-reproducible/src/lib.rs index d35f1cfe..0e38ccf0 100644 --- a/crates/numcodecs-wasm-host-reproducible/src/lib.rs +++ b/crates/numcodecs-wasm-host-reproducible/src/lib.rs @@ -30,4 +30,7 @@ mod logging; mod stdio; mod transform; +#[cfg(test)] +mod tests; + pub use codec::{ReproducibleWasmCodec, ReproducibleWasmCodecError, ReproducibleWasmCodecType}; diff --git a/crates/numcodecs-wasm-host-reproducible/src/tests.rs b/crates/numcodecs-wasm-host-reproducible/src/tests.rs new file mode 100644 index 00000000..3b8db2d9 --- /dev/null +++ b/crates/numcodecs-wasm-host-reproducible/src/tests.rs @@ -0,0 +1,92 @@ +use ndarray::Array; +use ndarray_rand::RandomExt; +use ndarray_rand::rand_distr::Normal; +use numcodecs::{Codec, DynCodecType}; + +use crate::ReproducibleWasmCodecType; + +// codecs don't need to preallocate the full 4GB wasm32 memory space, but +// still give them a reasonable static allocation for better codegen +const WASM_PAGE_SIZE: u32 = 0x10000 /* 64kiB */; +const MEMORY_RESERVATION: u32 = WASM_PAGE_SIZE * 16 * 64 /* 64MiB */; +const MEMORY_GUARD_SIZE: u32 = WASM_PAGE_SIZE * 16 * 64 /* 64MiB */; +const MEMORY_RESERVATION_FOR_GROWTH: u32 = WASM_PAGE_SIZE * 16 * 64 /* 64MiB */; + +#[test] +fn codec_roundtrip() { + // keep in sync with numcodecs-wasm + let mut config = wasmtime::Config::new(); + config + .cranelift_nan_canonicalization(true) + .cranelift_opt_level(wasmtime::OptLevel::Speed) + .memory_reservation(u64::from(MEMORY_RESERVATION)) + .memory_guard_size(u64::from(MEMORY_GUARD_SIZE)) + .memory_reservation_for_growth(u64::from(MEMORY_RESERVATION_FOR_GROWTH)) + // WASM feature restrictions, follows the feature validation in + // numcodecs_wasm_host_reproducible::engine::ValidatedModule::new + .wasm_bulk_memory(true) + .wasm_custom_page_sizes(false) + .wasm_extended_const(false) + .wasm_function_references(false) + .wasm_gc(false) + .wasm_memory64(false) + .wasm_multi_memory(true) + .wasm_multi_value(true) + .wasm_reference_types(false) + .wasm_relaxed_simd(false) + .wasm_simd(true) + .wasm_tail_call(false) + .wasm_backtrace_details(wasmtime::WasmBacktraceDetails::Enable) + // wasmtime is compiled without the `threads` feature + // .wasm_threads(false) + .wasm_wide_arithmetic(true); + + wasmtime::Cache::from_file(None) + .map(|cache| config.cache(Some(cache))) + .unwrap(); + + let engine = wasmtime_runtime_layer::Engine::new(wasmtime::Engine::new(&config).unwrap()); + + let ty = match ReproducibleWasmCodecType::new(engine, include_bytes!("../tests/round.wasm")) { + Ok(ty) => ty, + Err(err) => panic!( + "ReproducibleWasmCodecType::new:\n===\n{err}\n===\n{err:?}\n===\n{err:#}\n===\n{err:#?}\n===\n" + ), + }; + + assert_eq!(ty.codec_id(), "round.rs"); + + let codec = match ty.codec_from_config(serde_json::json!({ "precision": 1 })) { + Ok(codec) => codec, + Err(err) => panic!( + "ReproducibleWasmCodecType::codec_from_config:\n===\n{err}\n===\n{err:?}\n===\n{err:#}\n===\n{err:#?}\n===\n" + ), + }; + + let data = Array::random((256, 256), Normal::new(0.0, 1.0).unwrap()); + + let encoded = match codec.encode(numcodecs::AnyArray::F64(data.clone().into_dyn()).into_cow()) { + Ok(encoded) => encoded, + Err(err) => panic!( + "ReproducibleWasmCodec::encode:\n===\n{err}\n===\n{err:?}\n===\n{err:#}\n===\n{err:#?}\n===\n" + ), + }; + + let mut decode_into = numcodecs::AnyArray::F64(Array::zeros((256, 256)).into_dyn()); + + match codec.decode_into(encoded.view(), decode_into.view_mut()) { + Ok(()) => (), + Err(err) => panic!( + "ReproducibleWasmCodec::decode_into:\n===\n{err}\n===\n{err:?}\n===\n{err:#}\n===\n{err:#?}\n===\n" + ), + }; + + let decoded = match codec.decode(encoded.into_cow()) { + Ok(decoded) => decoded, + Err(err) => panic!( + "ReproducibleWasmCodec::decode:\n===\n{err}\n===\n{err:?}\n===\n{err:#}\n===\n{err:#?}\n===\n" + ), + }; + + assert_eq!(decoded, decode_into); +} diff --git a/crates/numcodecs-wasm-host-reproducible/src/transform/instcnt.rs b/crates/numcodecs-wasm-host-reproducible/src/transform/instcnt.rs index 22a9d61f..9e744d4c 100644 --- a/crates/numcodecs-wasm-host-reproducible/src/transform/instcnt.rs +++ b/crates/numcodecs-wasm-host-reproducible/src/transform/instcnt.rs @@ -292,7 +292,8 @@ impl InstructionCounterInjecterReencoder { wasmparser::Operator::Drop | wasmparser::Operator::Select => Some(false), // === Reference types === // no control flow - wasmparser::Operator::TypedSelect { .. } => Some(false), + wasmparser::Operator::TypedSelect { .. } + | wasmparser::Operator::TypedSelectMulti { .. } => Some(false), // === MVP === // no control flow wasmparser::Operator::LocalGet { .. } diff --git a/crates/numcodecs-wasm-host-reproducible/src/transform/mod.rs b/crates/numcodecs-wasm-host-reproducible/src/transform/mod.rs index 64b7cc58..45cdce17 100644 --- a/crates/numcodecs-wasm-host-reproducible/src/transform/mod.rs +++ b/crates/numcodecs-wasm-host-reproducible/src/transform/mod.rs @@ -229,7 +229,14 @@ struct PackageWithPorts { fn register_wasi_component_packages( wac: &mut wac_graph::CompositionGraph, ) -> Result, Error> { - let wasi_component_packages = wasi_sandboxed_component_provider::ALL_COMPONENTS + // TODO: switch to merged component with wasi_sandboxed_component_provider >= v0.2.4 + // const WASI_COMPONENTS: &[(&str, &[u8])] = &[( + // "wasi-sandboxed:merged", + // wasi_sandboxed_component_provider::MERGED_COMPONENT, + // )]; + const WASI_COMPONENTS: &[(&str, &[u8])] = wasi_sandboxed_component_provider::ALL_COMPONENTS; + + let wasi_component_packages = WASI_COMPONENTS .iter() .map(|(component_name, component_bytes)| -> Result<_, Error> { let component_package = wac_graph::types::Package::from_bytes( diff --git a/crates/numcodecs-wasm-host-reproducible/src/transform/nan.rs b/crates/numcodecs-wasm-host-reproducible/src/transform/nan.rs index 60f142eb..1d40a64c 100644 --- a/crates/numcodecs-wasm-host-reproducible/src/transform/nan.rs +++ b/crates/numcodecs-wasm-host-reproducible/src/transform/nan.rs @@ -324,7 +324,8 @@ impl NaNCanonicaliserReencoder { wasmparser::Operator::Drop | wasmparser::Operator::Select => Ok(None), // === Reference types === // non-float operation - wasmparser::Operator::TypedSelect { .. } => Ok(None), + wasmparser::Operator::TypedSelect { .. } + | wasmparser::Operator::TypedSelectMulti { .. } => Ok(None), // locals may contain floats, but get/set/tee are deterministic wasmparser::Operator::LocalGet { .. } | wasmparser::Operator::LocalSet { .. } diff --git a/crates/numcodecs-wasm-host-reproducible/tests/round.wasm b/crates/numcodecs-wasm-host-reproducible/tests/round.wasm new file mode 100644 index 00000000..acec20e1 --- /dev/null +++ b/crates/numcodecs-wasm-host-reproducible/tests/round.wasm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9498652bc5911a86d50632baedca282036eb20169736acebbd251c7242efbeae +size 378559