diff --git a/.cargo/config b/.cargo/config index 463a7ac..62921b0 100644 --- a/.cargo/config +++ b/.cargo/config @@ -1,25 +1,33 @@ [unstable] -build-std = ["core"] +# build-std = ["core"] +configurable-env = true [target.thumbv6m-none-eabi] rustflags = [ "-C", "relocation-model=ropi", "-C", "link-arg=-Tscript.ld", - "-Z", "emit-stack-sizes", - "--cfg", "target_os=\"nanos\"", "-C", "opt-level=3", - "-C", "link-arg=--target=thumbv6m-none-eabi", "-C", "passes=ledger-ropi", + "-C", "codegen-units=1", + "-C", "lto", + "-C", "embed-bitcode", + "--emit=obj,llvm-ir,llvm-bc", "--cfg", "target_os=\"nanos\"", - "--emit=llvm-ir" + "-Z", "macro-backtrace" +# "-Z", "print-type-sizes", ] -linker = "clang" -runner = "speculos --display headless -k 2.0" +linker = "armv6m-unknown-none-eabi-clang" +runner = "./speculos-wrapper -k 2.0" [build] -target = "thumbv6m-none-eabi" -# target = "x86_64-unknown-linux-gnu" +# target = "thumbv6m-none-eabi" +# rustdocflags = [ "--cfg", "target_os=\"nanos\"" ] [alias] +tt = "test --features extra_debug" br = "build --release" -stackcheck = "stack-sizes --release --bin rust-app --target=thumbv6m-none-eabi" +stackcheck = "stack-sizes --release --bin pocket --target=thumbv6m-none-eabi" +unit = "test --features=speculos -Z build-std=core --lib" + +[env] +RUST_TEST_THREADS="1" diff --git a/Cargo.toml b/Cargo.toml index ac27f9e..2beda60 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,9 +18,13 @@ generic-array = { version = "0.14.4", default-features = false } log = "0.4.14" paste = "1.0" bstringify = "0.1.2" +enum-init = { git = "https://github.com/obsidiansystems/enum-init" } +num-traits = { version = "0.2.14", default-features = false } +num-derive = "0.3.3" +trie-enum = { git = "https://github.com/ea-nasir/trie-enum" } [dependencies.ledger-log] -git = "https://github.com/obsidiansystems/ledger-platform" +git = "https://github.com/ea-nasir/ledger-log" features = ["log_trace"] optional = true diff --git a/src/async_parser.rs b/src/async_parser.rs new file mode 100644 index 0000000..f2dd168 --- /dev/null +++ b/src/async_parser.rs @@ -0,0 +1,292 @@ +//! Parser implmementation using Futures. +//! +//! The main entry point of this module is [AsyncParser]. +//! +//! Note: Currently, all of the parsers in this module are implemented using async blocks, but it is +//! worth remembering that if for some reason one of them is exhibiting poor stack / state +//! behavior, we _can_ manually construct a state type and directly implement Future for that +//! state. +//! +use crate::schema::*; +use crate::interp::{SubInterp,DefaultInterp,Action, ObserveBytes, DropInterp}; +use crate::endianness::{Endianness, Convert}; + +use core::future::Future; +use core::convert::TryInto; +use arrayvec::ArrayVec; + +pub trait HasDefParser where DefaultInterp: HasOutput { + fn def_parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c>; + + /// Type synonym for the future returned by this parser. + type State<'c>: Future>::Output>; +} + +impl HasDefParser for T where DefaultInterp: AsyncParser { + fn def_parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + >::parse(&DefaultInterp, input) + } + type State<'c> = impl Future>::Output>; +} + +/// Reject the parse. +pub fn reject() -> impl Future { + // Do some out-of-band rejection thingie + core::future::pending() +} + +/// Readable defines an interface for input to a parser. +pub trait Readable { + /// Type alias for the future type of read + type OutFut<'a, const N: usize> : 'a + Future; + /// read N bytes from this Readable; returns a future that will complete with a byte array of + /// the result. + fn read<'a: 'b, 'b, const N: usize>(&'a mut self) -> Self::OutFut<'b, N>; +} + +pub trait HasOutput { + type Output; +} + +/// Core trait; parser that consumes a Readable according to the Schema and returns a future returning the result type. +/// +/// +pub trait AsyncParser : HasOutput { + /// Parse input, returning a future that returns this parser's return type for this schema. + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c>; + + /// Type synonym for the future returned by this parser. + type State<'c>: Future; + +} + +impl, const N: usize> HasOutput> for SubInterp { + type Output = [S::Output; N]; +} + +impl AsyncParser, BS> for SubInterp where T : DefaultArray, S: AsyncParser { + type State<'c> = impl Future>::Output; N]>; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + async move { + let mut accumulator = ArrayVec::<>::Output, N>::new(); + while !accumulator.is_full() { + accumulator.push(>::parse(&self.0, input).await); + } + match accumulator.take().into_inner() { + Ok(rv) => rv, + _ => reject().await, + } + } + } +} + +impl HasOutput for DefaultInterp { + type Output = u8; +} + +impl AsyncParser for DefaultInterp { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + async move { + let [u] = input.read().await; + return u; + } + } +} + +/// DefaultArray marks array element types where there is no specialization for the array type. +/// +/// This allows trait selection to operate with positive reasoning except for +/// where we implement !DefaultArray explicitly for particular types, such as Byte. +pub auto trait DefaultArray { } + +impl !DefaultArray for Byte { } + +/// We can implement DefaultInterp for [u8; N] becuase Byte has !DefaultArray. +impl HasOutput> for DefaultInterp { + type Output = [u8; N]; +} + +/// We can implement DefaultInterp for [u8; N] becuase Byte has !DefaultArray. +/// +/// Overlap is not permitted, but this doesn't overlap because we've disabled the default +/// implementation. +impl AsyncParser, BS> for DefaultInterp { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + async move { + input.read().await + } + } +} + +macro_rules! number_parser { + ($p:ident, $size:expr, $t:ty) => { + impl HasOutput<$p> for DefaultInterp { + type Output = $t; + } + impl AsyncParser<$p, BS> for DefaultInterp where $t : Convert { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + async move { + Convert::::deserialize(input.read::<$size>().await) + } + } + } + } +} +number_parser! { U16, 2, u16 } +number_parser! { U32, 4, u32 } +number_parser! { U64, 8, u64 } + +impl HasOutput> for DefaultInterp where T : DefaultArray, DefaultInterp: HasOutput { + type Output = [>::Output; N]; +} + +impl AsyncParser, BS> for DefaultInterp where T : DefaultArray, DefaultInterp: AsyncParser { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + as AsyncParser, BS>>::parse(&SubInterp(DefaultInterp), input) + } +} + +impl, const M: usize> HasOutput> for SubInterp { + type Output = ArrayVec; +} + +impl AsyncParser, BS> for SubInterp where S: AsyncParser, DefaultInterp: AsyncParser, >::Output: TryInto { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + async move { + let length : usize = match DefaultInterp.parse(input).await.try_into() { + Ok(a) => a, + Err(_) => reject().await, + }; + let mut accumulator = ArrayVec::new(); + for _ in 1..length { + accumulator.push(self.0.parse(input).await); + } + accumulator + } + } +} + +impl HasOutput for DropInterp { + type Output = (); +} + +impl AsyncParser, BS> for DropInterp where DefaultInterp: AsyncParser, DefaultInterp: AsyncParser, >::Output: TryInto +, DefaultInterp: AsyncParser { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + async move { + let length : usize = match >::parse(&DefaultInterp, input).await.try_into() { + Ok(a) => a, + Err(_) => reject().await, + }; + for _ in 1..length { + >::parse(&DefaultInterp, input).await; + } + } + } +} + +impl, R, F: Fn(>::Output) -> Option> HasOutput for Action { + type Output = R; +} + +impl, R, F: Fn(>::Output) -> Option, BS: Readable> AsyncParser for Action { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + async move { + match self.1(self.0.parse(input).await) { + Some(a) => a, + None => reject().await, + } + } + } +} + +impl HasOutput for Action) -> Option<()>> { + type Output = R; +} + +impl, BS: Readable> AsyncParser for Action>::Output, &mut Option) -> Option<()>> { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + async move { + let mut destination = None; + match self.1(&self.0.parse(input).await, &mut destination) { + Some(()) => { match destination { + Some(a) => a, + None => reject().await, + } } + None => reject().await, + } + } + } +} + +/// Slightly different version of Action; present for reasons of impl selection. +pub struct FAction(pub S, pub fn(O) -> Option); + +impl, R> HasOutput for FAction { + type Output = R; +} + +impl, R, BS: Readable> AsyncParser for FAction>::Output, R> { // FAction>::Output) -> Option> { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + async move { + match self.1(self.0.parse(input).await) { + Some(a) => a, + None => reject().await, + } + } + } +} + +impl> HasOutput for ObserveBytes { + type Output = (X, Option<>::Output>); +} + +/// HashIntercept wraps a Readable and updates a hash-like object with the data as it passes. +/// +/// Used to support ObserveBytes for async parsers. +pub struct HashIntercept(pub BS, pub X); + +impl Readable for HashIntercept { + type OutFut<'a, const N: usize> = impl core::future::Future; + fn read<'a: 'b, 'b, const N: usize>(&'a mut self) -> Self::OutFut<'b, N> { + self.0.read() + } +} + +/// ObserveBytes for AsyncParser operates by passing a HashIntercept to the sub-parser. +impl>, A, BS: 'static + Readable + Clone> AsyncParser for ObserveBytes { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + async move { + let mut hi = HashIntercept(input.clone(), (self.0)()); + let rv = self.2.parse(&mut hi).await; + *input = hi.0; + (hi.1, Some(rv)) + } + } +} + +impl, T: HasOutput> HasOutput<(A, B)> for (S, T) { + type Output = (Option, Option); +} + +/// Pairs of parsers parse the sequence of their two schemas. +impl, T: AsyncParser, BS: Readable> AsyncParser<(A, B), BS> for (S, T) { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + async move { + let t = self.0.parse(input).await; + let s = self.1.parse(input).await; + (Some(t), Some(s)) + } + } +} diff --git a/src/core_parsers.rs b/src/core_parsers.rs index dbcfd2a..1b502a0 100644 --- a/src/core_parsers.rs +++ b/src/core_parsers.rs @@ -1,24 +1,21 @@ use crate::endianness::Endianness; +pub use crate::schema::*; + // use generic_array::{ArrayLength, GenericArray}; pub trait RV { type R; } -#[derive(Default)] -pub struct Byte; impl RV for Byte { type R = u8; } -#[derive(Default)] -pub struct Array(pub I); - +/// Fixed-length array. impl< I : RV, const N : usize > RV for Array { type R = [I::R; N]; } -pub struct DArray(pub N, pub I); use arrayvec::ArrayVec; use core::convert::TryInto; @@ -31,9 +28,6 @@ impl< N : RV, I : RV, const M : usize > RV for DArray where macro_rules! number_parser { ($p:ident, $t:ty) => { - #[derive(Default)] - pub struct $p; - impl RV for $p { type R = $t; } @@ -60,6 +54,8 @@ impl< I : RV, N : RV > RV for NOf where //pub struct DArray; //pub struct Table; +/// LengthFallback; frame a subparser with a length byte. pub struct LengthFallback(pub N, pub S); +/// Alternative; schema is either A or B. pub struct Alt(pub A, pub B); diff --git a/src/interp.rs b/src/interp.rs new file mode 100644 index 0000000..f3d6573 --- /dev/null +++ b/src/interp.rs @@ -0,0 +1,82 @@ +//! Generic "interpretations" for schema types. + +/// Parse the schema using the "natural" interpretation. +/// +/// usually this translates the input to an equivalent rust type or struct, for instance +/// `DefaultIntepr` on a `U32<{Endianness::Big}>` schema will produce a `u32` in the ledger's native +/// little-endian byte order, and an `Array,5>` would produce a `[u16;5]` +/// result; `DArray` becomes `ArrayVec`, etc. + +pub struct DefaultInterp; + +/// For array-like schemas; given a subparser for the item, produce a parser for the whole array. +pub struct SubInterp(pub S); + +/// Structurally checks and skips the schema in the input, but consumes only the minimum memory +/// required to do so and returns nothing. +pub struct DropInterp; + + +/// Action is essentailly an fmap that can fail. +/// +/// We _could_ constraint F to actually be an fn(..) -> Option<()> to improve error messages when +/// functions do not have the correct shape, but that reduces our ability to write different +/// instances later. +#[derive(Clone)] +pub struct Action(pub S, pub F); + +/// A MoveAction is the same as an Action with the distinction that it takes it's argument via Move, +/// thus enabling it to work with types that do not have Copy or Clone and have nontrivial semantics +/// involving Drop. +pub struct MoveAction(pub S, pub F); + +#[derive(Clone)] +/// Monadic bind. +/// +/// S is the first subparser to run +/// F is a function that returns the continuation parser to run, which can depend on the result of S +pub struct Bind(pub S, pub F); + +/// Bind, with reduced-copying parameter passing. +/// +/// This is the main consumer for DynInterpParser; rather than constructing a fully new parser F, +/// this uses a static second parser that accepts a parameter via DynInterpParser to achieve a +/// similar result. This is possibly sufficiently less flexible that calling it monadic is +/// incorrect. +#[derive(Clone)] +pub struct DynBind(pub S, pub F); + +/// ObserveBytes runs a subparser and in parallel feeds the bytes consumed by the subparser into an +/// "observer", typically a hashing algorithm. +/// +/// The first parameter is a function to build the observer, the second is the function to update +/// the observer, and S is the subparser. +#[derive(Clone)] +pub struct ObserveBytes(pub fn() -> X, pub F, pub S); + +/// Essentially SubInterp but for LengthFallback. +#[derive(Clone)] +pub struct LengthLimited { + pub bytes_limit : usize, + pub subparser : S +} + +/// ObserveLengthedBytes is notionally the composition of a LengthFallback parser with +/// ObserveBytes. +/// +/// This has the caveat that the length portion of the LengthFallback is _not_ fed to the +/// hashing function, which complicates separated implementation. +/// +/// * I is a closure to initialize the observer of the input, namely X, which is usually a hasher +/// * F is a method which does the observing for the observer. +/// * S is the parser for the input of the hasher from the raw input +/// * the bool specifies whether to hard reject if the subparser fails. +/// +/// Note that ObserveLengthedBytes also consumes a length prefix from the raw input +/// Confer: LengthFallback +#[derive(Clone)] +pub struct ObserveLengthedBytes X, X, F, S>(pub I, pub F, pub S, pub bool); + + +pub struct Buffer; + diff --git a/src/interp_parser.rs b/src/interp_parser.rs index 2540341..2a27c6e 100644 --- a/src/interp_parser.rs +++ b/src/interp_parser.rs @@ -1,10 +1,29 @@ +//! The parsers in interp_parser divide "Schema" from "Interpretation". +//! +//! The core trait is [InterpParser], and that together with [DynInterpParser] form the public +//! interface of this module. +//! +//! Schema in this context describes the structure of the input; the "one byte of length and then +//! up to ten 32-bit numbers" part. +//! +//! Interpretation is then what we want to _do_ with the input; for ledger apps, this is usually +//! "cache it for a moment to prompt", "prompt for the return values of these sub-parsers" or +//! "forget it", corresponding to DefaultInterp, Action, and DropInterp. +//! +//! Arrays of values in the input we usually don't want to cache, as that would require either a +//! limit or non-constant memory; for those we handle them piece by piece with SubInterp. use crate::core_parsers::*; use crate::endianness::{Endianness, Convert}; use arrayvec::ArrayVec; +pub use crate::interp::*; + #[cfg(feature = "logging")] -use ledger_log::error; +use ledger_log::{trace,error}; + +/// Out-Of-Band messages; currently only Reject, but could plausibly be used to do prompts in a +/// co-routine way. #[derive(PartialEq, Debug)] pub enum OOB { // Prompt removed due to excessive memory use; we gain testability improvements if we can @@ -15,21 +34,25 @@ pub enum OOB { Reject } -// PResult stands for Partial Result -// None = Incomplete +/// PResult stands for Partial Result +/// None = Incomplete pub type PResult = Option; -// This represents the part of the input that hasn't been yet consumed by the -// parser, represented as a slice of the input. +/// This represents the part of the input that hasn't been yet consumed by the +/// parser, represented as a slice of the input. pub type RemainingSlice<'a> = &'a [u8]; -// If the parser does its job correctly, we just need to return the remaining -// slice. If the parser still needs data, it will return a None (and in that -// case the remaining slice is empty because we consumed it all). If the parser -// encounters an error condition, it will signal it in the OOB type, and we'll -// return the remaining slice for further elaboration or resuming. + +/// Return type for a parser. +/// +/// If the parser completes its job correctly, we just need to return the remaining +/// slice. If the parser still needs data, it will return a None (and in that +/// case the remaining slice is empty because we consumed it all). If the parser +/// encounters an error condition, it will signal it in the OOB type, and we'll +/// return the remaining slice for further elaboration or resuming. pub type ParseResult<'a> = Result, (PResult, RemainingSlice<'a>)>; + pub fn reject<'a, R>(chunk: &'a [u8]) -> Result, &'a [u8])> { Err((Some(OOB::Reject), chunk)) } @@ -41,25 +64,38 @@ pub fn need_more<'a, R>(chunk: &'a [u8]) -> Result, &'a [u8])> // Core trait; describes an "interpretation" of a given datatype (specified with the types from // core_parsers), which can have a variable return type and do stateful actions. +/// An InterpParser provides an actual parser between a schema `P` by interpretation mechanism +/// `Self`. +/// +/// To use, first create a variable of type `State` by `init`, and initialize a destination to +/// None; and then repeatedly call `parse` with chunks of input until the result is `Ok(())`; at +/// that point destination will be filled. destination may become non-None at any point during the +/// parse, and should be a stable location. +/// +/// For better memory usage during initialization in most cases, instead of using `init` declare a +/// `MaybeUninit` variable and use `init_in_place` to construct a state in it, then convert it to +/// `State`. + pub trait InterpParser

{ type State; type Returning; fn init(&self) -> Self::State; + fn init_in_place(&self, state: *mut core::mem::MaybeUninit) { + unsafe { (*state).as_mut_ptr().write(self.init()); } + } fn parse<'a, 'b>(&self, state: &'b mut Self::State, chunk: &'a [u8], destination: &mut Option) -> ParseResult<'a>; } +/// DynInterpParser extends InterpParser by adding a method to set a parameter into the parser +/// as part of initialization. +/// +/// This is mostly useful to avoid duplication of memory when implementing DynBind. + pub trait DynInterpParser

: InterpParser

{ type Parameter; fn init_param(&self, params: Self::Parameter, state: &mut Self::State, destination: &mut Option); } -pub struct DefaultInterp; - -pub struct SubInterp(pub S); - -// Structurally checks and skips the format, but consumes only the minimum memory required to do so -// and returns nothing. -pub struct DropInterp; pub struct ByteState; @@ -77,6 +113,10 @@ pub fn set_from_thunk X>(x: &mut X, f: F) { pub fn call_me_maybe Option<()>>(f: F) -> Option<()> { f() } +#[inline(never)] +pub fn call_me_maybe2 Option<()>>(f: F) -> Option<()> { + f() +} impl InterpParser for DefaultInterp { type State = ByteState; @@ -151,10 +191,10 @@ impl, const N : usize> InterpParser> for Sub } macro_rules! number_parser { - ($p:ident, $size:expr) => { - impl InterpParser<$p> for DefaultInterp where <$p as RV>::R : Convert { + ($p:ident, $size:expr, $t:ty) => { + impl InterpParser<$p> for DefaultInterp where $t : Convert { type State = >>::State; - type Returning = <$p as RV>::R; + type Returning = $t; fn init(&self) -> Self::State { >>::init(&DefaultInterp) } @@ -182,9 +222,9 @@ macro_rules! number_parser { } } } -number_parser! { U16, 2 } -number_parser! { U32, 4 } -number_parser! { U64, 8 } +number_parser! { U16, 2, u16 } +number_parser! { U32, 4, u32 } +number_parser! { U64, 8, u64 } pub enum ForwardDArrayParserState { Length(N), @@ -262,21 +302,22 @@ impl< N, I, const M : usize> InterpParser> for DefaultInterp whe } */ -// Action is essentailly an fmap that can fail. -// We _could_ constraint F to actually be an fn(..) -> Option<()> to improve error messages when -// functions do not have the correct shape, but that reduces our ability to write different -// instances later. -#[derive(Clone)] -pub struct Action(pub S, pub F); - impl> InterpParser for Action>::Returning, &mut Option) -> Option<()>> { type State = ( >::State, Option< >::Returning>); type Returning = R; + + #[inline(never)] fn init(&self) -> Self::State { (>::init(&self.0), None) } + #[inline(never)] + fn init_in_place(&self, state: *mut core::mem::MaybeUninit) { + self.0.init_in_place(unsafe { core::ptr::addr_of_mut!((*(*state).as_mut_ptr()).0) as *mut core::mem::MaybeUninit< >::State> }); + call_fn( || unsafe { (core::ptr::addr_of_mut!((*(*state).as_mut_ptr()).1) as *mut Option< >::Returning> ).write(None)} ); + } + #[inline(never)] fn parse<'a, 'b>(&self, state: &'b mut Self::State, chunk: &'a [u8], destination: &mut Option) -> ParseResult<'a> { let new_chunk = self.0.parse(&mut state.0, chunk, &mut state.1)?; @@ -292,8 +333,88 @@ impl> DynInterpParser for Action) { - state.0 = >::init(&self.0); - state.1 = None; + set_from_thunk(&mut state.0, || >::init(&self.0)); + set_from_thunk(&mut state.1, || None); + self.0.init_param(param, &mut state.0, &mut state.1); + } + } + +/// This impl exists to allow the _function_ of an Action to be the target of the parameter for +/// DynInterpParser, thus giving an escape hatch to thread a parameter past a non-parameterized +/// parser. Whether this should still be an Action as opposed to some other name is not immediately +/// clear. +impl, C> InterpParser for Action>::Returning, &mut Option, C) -> Option<()>> +{ + type State = ( >::State, Option< >::Returning>, Option); + type Returning = R; + + #[inline(never)] + fn init(&self) -> Self::State { + (>::init(&self.0), None, None) + } + + #[inline(never)] + fn init_in_place(&self, state: *mut core::mem::MaybeUninit) { + self.0.init_in_place(unsafe { core::ptr::addr_of_mut!((*(*state).as_mut_ptr()).0) as *mut core::mem::MaybeUninit< >::State> }); + call_fn( || unsafe { (core::ptr::addr_of_mut!((*(*state).as_mut_ptr()).1) as *mut Option< >::Returning> ).write(None)} ); + call_fn( || unsafe { (core::ptr::addr_of_mut!((*(*state).as_mut_ptr()).2) as *mut Option ).write(None)} ); + } + + #[inline(never)] + fn parse<'a, 'b>(&self, state: &'b mut Self::State, chunk: &'a [u8], destination: &mut Option) -> ParseResult<'a> { + let new_chunk = self.0.parse(&mut state.0, chunk, &mut state.1)?; + match (self.1)(state.1.as_ref().ok_or((Some(OOB::Reject),new_chunk))?, destination, core::mem::take(&mut state.2).ok_or((Some(OOB::Reject),new_chunk))?) { + None => { Err((Some(OOB::Reject),new_chunk)) } + Some(()) => { Ok(new_chunk) } + } + } +} + +impl, C> DynInterpParser for Action>::Returning, &mut Option, C) -> Option<()>> + { + type Parameter = C; + #[inline(never)] + fn init_param(&self, param: Self::Parameter, state: &mut Self::State, _destination: &mut Option) { + set_from_thunk(&mut state.0, || >::init(&self.0)); + set_from_thunk(&mut state.1, || None); + set_from_thunk(&mut state.2, || Some(param)); + } + } + +impl> InterpParser for MoveAction>::Returning, &mut Option) -> Option<()>> +{ + type State = ( >::State, Option< >::Returning>); + type Returning = R; + + #[inline(never)] + fn init(&self) -> Self::State { + (>::init(&self.0), None) + } + + #[inline(never)] + fn init_in_place(&self, state: *mut core::mem::MaybeUninit) { + self.0.init_in_place(unsafe { core::ptr::addr_of_mut!((*(*state).as_mut_ptr()).0) as *mut core::mem::MaybeUninit< >::State> }); + call_fn( || unsafe { (core::ptr::addr_of_mut!((*(*state).as_mut_ptr()).1) as *mut Option< >::Returning> ).write(None)} ); + } + + #[inline(never)] + fn parse<'a, 'b>(&self, state: &'b mut Self::State, chunk: &'a [u8], destination: &mut Option) -> ParseResult<'a> { + let new_chunk = self.0.parse(&mut state.0, chunk, &mut state.1)?; + match (self.1)(core::mem::take(&mut state.1).ok_or((Some(OOB::Reject),new_chunk))?, destination) { + None => { Err((Some(OOB::Reject),new_chunk)) } + Some(()) => { Ok(new_chunk) } + } + } +} + +impl> DynInterpParser for MoveAction>::Returning, &mut Option) -> Option<()>> + { + type Parameter = S::Parameter; + #[inline(never)] + fn init_param(&self, param: Self::Parameter, state: &mut Self::State, _destination: &mut Option) { + set_from_thunk(&mut state.0, || >::init(&self.0)); + set_from_thunk(&mut state.1, || None); self.0.init_param(param, &mut state.0, &mut state.1); } } @@ -302,30 +423,34 @@ fn rej<'a>(cnk: &'a [u8]) -> (PResult, RemainingSlice<'a>) { (Some(OOB::Reject), cnk) } -#[derive(Clone)] -// S is the first subparser to run -// F is the continuation parser to run, which can depend on the result of S -pub struct Bind(pub S, pub F); -// Initially the state is the state of the first subparser, and its result location -// After the first subparser runs, if it failed, then the whole bind parser will fail -// but if it succeeds, then the parser state transitions to BindSecond. +/// State for Bind +/// +/// Initially the state is the state of the first subparser, and its result location +/// After the first subparser runs, if it failed, then the whole bind parser will fail +/// but if it succeeds, then the parser state transitions to BindSecond. +#[derive(InPlaceInit)] pub enum BindState,T:InterpParser> { BindFirst(S::State, Option< >::Returning>), - BindSecond(T, T::State) + BindSecond(T, >::State) } impl, T : InterpParser> InterpParser<(A,B)> for Bind>::Returning) -> Option> { type State = BindState; type Returning = >::Returning; + #[inline(never)] fn init(&self) -> Self::State { use BindState::*; - #[cfg(feature = "logging")] - error!("Bind T size: {} {}", core::mem::size_of::(), core::mem::size_of::()); + // #[cfg(feature = "logging")] + // error!("Bind T size: {} {}", core::mem::size_of::(), core::mem::size_of::()); BindFirst (>::init(&self.0), None) } + fn init_in_place(&self, state: *mut core::mem::MaybeUninit) { + Self::State::init_bind_first(state, |a| >::init_in_place(&self.0, a), |b| call_fn2(|| unsafe { (*b).as_mut_ptr().write(None); })); + } + #[inline(never)] fn parse<'a, 'b>(&self, state: &'b mut Self::State, chunk: &'a [u8], destination: &mut Option) -> ParseResult<'a> { use BindState::*; @@ -351,12 +476,20 @@ impl, T : InterpParser> InterpParser<(A,B)> for Bin } } -#[derive(Clone)] -pub struct DynBind(pub S, pub F); - +#[derive(InPlaceInit)] +#[repr(u8)] pub enum DynBindState,T:InterpParser> { BindFirst(S::State, Option< >::Returning>), - BindSecond(T::State) + BindSecond(>::State) +} + +#[inline(never)] +fn call_fn(f: impl FnOnce()) { + f() +} +#[inline(never)] +fn call_fn2(f: impl FnOnce()) { + f() } impl, T : DynInterpParser> InterpParser<(A,B)> for DynBind @@ -364,12 +497,18 @@ impl, T : DynInterpParser { type State = DynBindState; type Returning = >::Returning; + #[inline(never)] fn init(&self) -> Self::State { use DynBindState::*; - #[cfg(feature = "logging")] - error!("Bind T size: {} {}", core::mem::size_of::(), core::mem::size_of::()); + // #[cfg(feature = "logging")] + // error!("Bind T size: {} {}", core::mem::size_of::(), core::mem::size_of::()); BindFirst (>::init(&self.0), None) } + + #[inline(never)] + fn init_in_place(&self, state: *mut core::mem::MaybeUninit) { + Self::State::init_bind_first(state, |a| call_fn2(|| >::init_in_place(&self.0, a)), |b| call_fn2(|| unsafe { (*b).as_mut_ptr().write(None); })); + } #[inline(never)] fn parse<'a, 'b>(&self, state: &'b mut Self::State, chunk: &'a [u8], destination: &mut Option) -> ParseResult<'a> { @@ -379,14 +518,28 @@ impl, T : DynInterpParser match state { BindFirst(ref mut s, ref mut r) => { cursor = self.0.parse(s, cursor, r)?; - let r_temp = core::mem::take(r); call_me_maybe(|| { - *state = BindSecond(self.1.init()); - match state { - BindSecond(ref mut s) => self.1.init_param(r_temp?, s, destination), - _ => return None, - }; + let r_temp; + if let BindFirst(_, ref mut r) = state { + r_temp = core::mem::take(r)?; + } else { + unreachable!(); + } + Self::State::init_bind_second(unsafe { core::mem::transmute(state as *mut Self::State) }, |a| call_fn(|| self.1.init_in_place(a))); + if let BindSecond(ref mut s) = state { + self.1.init_param(r_temp, s, destination); + } else { + unreachable!(); + } Some(()) + /* call_me_maybe2(|| { + // *state = BindSecond(self.1.init()); + match state { + BindSecond(ref mut s) => call_fn(|| self.1.init_param(r_temp, s, destination)), + _ => return None, + }; + Some(()) + })*/ }).ok_or((Some(OOB::Reject), cursor))?; } BindSecond(ref mut s) => { @@ -403,7 +556,8 @@ impl, T: DynInterpParser) { - *state = DynBindState::BindFirst(>::init(&self.0), None); + self.init_in_place(unsafe { core::mem::transmute(state as *mut Self::State) }); + // *state = DynBindState::BindFirst(>::init(&self.0), None); match state { DynBindState::BindFirst(ref mut s, ref mut sub_destination) => self.0.init_param(param, s, sub_destination), _ => unreachable!(), @@ -411,15 +565,13 @@ impl, T: DynInterpParser(pub fn() -> X, pub F, pub S); - impl(), S : InterpParser> InterpParser for ObserveBytes { type State = Option<>::State>; // Making a compromise here; if we return our sub-parser's result still wrapped in Option, we // can avoid storing it in our own state and then copying. type Returning = (X, Option<>::Returning>); + #[inline(never)] fn init(&self) -> Self::State { None } @@ -447,7 +599,7 @@ impl(), S: InterpParser> DynInterpParser) { - *destination = Some((param, None)); + *destination = Some((param.clone(), None)); *state = Some(>::init(&self.2)); } } @@ -458,9 +610,11 @@ pub enum PairState { Second(B), } +/// Pairs of parsers parse the sequence of their schema types, and return a pair of their results. impl, B : InterpParser, C, D> InterpParser<(C, D)> for (A, B) { type State = PairState<>::State, >::State>; type Returning = (Option, Option); + #[inline(never)] fn init(&self) -> Self::State { PairState::Init } @@ -506,6 +660,7 @@ macro_rules! def_table { } */ +#[derive(InPlaceInit)] pub enum LengthFallbackParserState { Length(N, NO), Element(usize, usize, IS), @@ -520,11 +675,13 @@ pub struct LengthLimitedState { } // Now define the parser type, which will resemble the mirror image of the state -#[derive(Clone)] -pub struct LengthLimited { - bytes_limit : usize, - subparser : S -} +// Defined in interp.rs: +// +// #[derive(Clone)] +// pub struct LengthLimited { +// bytes_limit : usize, +// subparser : S +//} // Implement InterpParser for the parser impl> InterpParser for LengthLimited { @@ -572,24 +729,22 @@ impl> InterpParser for LengthLimited { } } -// I is a closure to initialize the observer of the input, namely X, which is usually a hasher -// F is a method which does the observing for the observer. -// S is the parser for the input of the hasher from the raw input -// Note that ObserveLengthedBytes also consumes a length prefix from the raw input -// Confer: LengthFallback -#[derive(Clone)] -pub struct ObserveLengthedBytes X, X, F, S>(pub I, pub F, pub S, pub bool); -impl X, N, I, S : InterpParser, X: Clone, F: Fn(&mut X, &[u8])->()> InterpParser> for ObserveLengthedBytes where +impl X, N, I, S : InterpParser, X, F: Fn(&mut X, &[u8])->()> InterpParser> for ObserveLengthedBytes where DefaultInterp : InterpParser, usize: TryFrom<>::Returning>, >::Returning: Copy { type State=LengthFallbackParserState<>::State, Option<>::Returning>, >::State>; type Returning = (Option<>::Returning>, X); + #[inline(never)] fn init(&self) -> Self::State { LengthFallbackParserState::Length(>::init(&DefaultInterp), None) } #[inline(never)] + fn init_in_place(&self, state: *mut core::mem::MaybeUninit) { + Self::State::init_length(state, |a| >::init_in_place(&DefaultInterp, a), |b| call_fn( || unsafe { (*b).as_mut_ptr().write(None); })); + } + #[inline(never)] fn parse<'a, 'b>(&self, state: &'b mut Self::State, chunk: &'a [u8], destination: &mut Option) -> ParseResult<'a> { use LengthFallbackParserState::*; let mut cursor : &'a [u8] = chunk; @@ -600,11 +755,11 @@ impl X, N, I, S : InterpParser, X: Clone, F: Fn(&mut X, &[u8] let len = >::Returning>>::try_from(length_out.ok_or(rej(cursor))?).or(Err(rej(cursor)))?; match destination { None => { - call_me_maybe(|| { + /*call_me_maybe(|| { let result = self.0(); *destination = Some((None, result)); Some(()) - }).ok_or(rej(cursor))?; + }).ok_or(rej(cursor))?;*/ } _ => { } } @@ -673,14 +828,15 @@ impl X, N, I, S : InterpParser, X: Clone, F: Fn(&mut X, &[u8] } } -impl X, N, I, S : InterpParser, X: Clone, F: Fn(&mut X, &[u8])->()> DynInterpParser> for ObserveLengthedBytes where +impl X, N, I, S : InterpParser, X, F: Fn(&mut X, &[u8])->()> DynInterpParser> for ObserveLengthedBytes where DefaultInterp : InterpParser, usize: TryFrom<>::Returning>, >::Returning: Copy { type Parameter = X; #[inline(never)] fn init_param(&self, param: Self::Parameter, state: &mut Self::State, destination: &mut Option) { - *destination = Some((None, param)); + set_from_thunk(destination, || { Some((None, param)) }); + // *destination = Some((None, param.clone())); *state = LengthFallbackParserState::Length(>::init(&DefaultInterp), None) } } @@ -767,9 +923,7 @@ mod test { } None => { panic!("Ran out of input chunks before parser accepted"); - } - } - } + } } } Ok(new_cursor) => { assert_eq!(destination.as_ref().unwrap(), result); assert_eq!(new_cursor, &[][..]); diff --git a/src/json_interp.rs b/src/json_interp.rs index 2c1e03d..f1ae827 100644 --- a/src/json_interp.rs +++ b/src/json_interp.rs @@ -65,6 +65,9 @@ pub trait JsonInterp { type State; type Returning; fn init(&self) -> Self::State; + fn init_in_place(&self, state: *mut core::mem::MaybeUninit) { + unsafe { (*state).as_mut_ptr().write(self.init()); } + } fn parse<'a>(&self, state: &mut Self::State, token: JsonToken<'a>, destination: &mut Option) -> Result<(), Option>; } @@ -246,6 +249,10 @@ impl> InterpParser> for Json { fn init(&self) -> Self::State { (JsonTokenizerState::Value, >::init(&self.0)) } + fn init_in_place(&self, state: *mut core::mem::MaybeUninit) { + unsafe { (core::ptr::addr_of_mut!((*(*state).as_mut_ptr()).0) as *mut JsonTokenizerState).write(JsonTokenizerState::Value); } + self.0.init_in_place(unsafe { core::ptr::addr_of_mut!((*(*state).as_mut_ptr()).0) as *mut core::mem::MaybeUninit< >::State> }); + } #[inline(never)] fn parse<'a, 'b>(&self, state: &'b mut Self::State, chunk: &'a [u8], destination: &mut Option) -> ParseResult<'a> { let mut cursor : &[u8] = chunk; @@ -1004,12 +1011,14 @@ macro_rules! define_json_struct_interp { match &key[..] { $( $crate::json_interp::bstringify!($field) => { + #[cfg(feature = "logging")] trace!("json-struct-interp parser: checking key {:?}\n", core::str::from_utf8(key)); $crate::interp_parser::set_from_thunk(state, || [<$name State>]::[](<[] as JsonInterp<$schemaType>>::init(&self.[]))); } )* , _ => { + #[cfg(feature = "logging")] error!("json-struct-interp parser: Got unexpected key {:?}\n", core::str::from_utf8(key)); return Err(Some($crate::interp_parser::OOB::Reject)) } } diff --git a/src/lib.rs b/src/lib.rs index 3a1976e..2e5106f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,12 +2,27 @@ #![allow(incomplete_features)] #![feature(bindings_after_at)] #![feature(const_generics)] - +#![feature(const_evaluatable_checked)] +#![feature(const_fn_trait_bound)] +#![feature(min_type_alias_impl_trait)] +#![feature(min_specialization)] +#![feature(generic_associated_types)] +#![feature(auto_traits)] +#![feature(negative_impls)] +#![feature(member_constraints)] +#![feature(trace_macros)] +#![feature(log_syntax)] #![cfg_attr(all(target_os="nanos", test), no_main)] #![cfg_attr(target_os="nanos", feature(custom_test_frameworks))] #![reexport_test_harness_main = "test_main"] #![cfg_attr(target_os="nanos", test_runner(nanos_sdk::sdk_test_runner))] +#[macro_use] +extern crate enum_init; + +#[macro_use] +extern crate num_derive; + //#[cfg(all(not(target_os = "linux"), test))] //use nanos_sdk::exit_app; #[cfg(all(not(target_os = "linux"), test))] @@ -44,14 +59,20 @@ fn handle_panic(_: &PanicInfo) -> ! { exit_app(0); } +pub mod schema; +pub mod endianness; +pub mod interp; pub mod core_parsers; // pub mod forward_parser; -pub mod endianness; pub mod interp_parser; pub mod json; pub mod json_interp; + +pub mod async_parser; + +pub mod protobufs; diff --git a/src/protobufs/async_parser.rs b/src/protobufs/async_parser.rs new file mode 100644 index 0000000..4bb7e73 --- /dev/null +++ b/src/protobufs/async_parser.rs @@ -0,0 +1,695 @@ +// use crate::schema::*; +use crate::interp::*; +use core::future::Future; +use crate::async_parser::*; +use crate::protobufs::schema::*; +use crate::protobufs::interp::*; +use arrayvec::ArrayVec; +pub use num_traits::FromPrimitive; + +trait IsLengthDelimited { } + +pub fn parse_varint<'a: 'c, 'c, T, BS: Readable>(input: &'a mut BS) -> impl Future + 'c where + T: Default + core::ops::Shl + core::ops::AddAssign + core::convert::From +{ + async move { + let mut accumulator : T = Default::default(); + let mut n : u8 = 0; + loop { + let [current] : [u8; 1] = input.read().await; + // Check that adding this base-128 digit will not overflow + if 7*n as usize > (core::mem::size_of::()-1)*8 && 0 != ((current & 0x7f) >> (core::mem::size_of::()*8 - (7*n as usize))) { + reject().await + } + accumulator += core::convert::Into::::into(current & 0x7f) << core::convert::From::from(7*n as u8); + n += 1; + if current & 0x80 == 0 { + return accumulator; + } + } + } +} + +fn skip_varint<'a: 'c, 'c, BS: Readable>(input: &'a mut BS) -> impl Future + 'c where +{ + async move { + loop { + let [current] : [u8; 1] = input.read().await; + if current & 0x80 == 0 { + return (); + } + } + } +} + +macro_rules! VarintPrimitive { + { $name:ident : $returning:ty : $v:ident => $($decode:tt)* } => + { $crate::protobufs::async_parser::paste! { + impl HasOutput<[<$name:camel>]> for DefaultInterp { + type Output = $returning; + } + + impl AsyncParser<[<$name:camel>], BS> for DefaultInterp { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + async move { + let $v = parse_varint::<'a, 'c, $returning, BS>(input).await; + $($decode)* + } + } + } + + impl AsyncParser<[<$name:camel>], BS> for DropInterp { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + async move { + parse_varint::<'a, 'c, $returning, BS>(input).await; + } + } + } + } } +} + +VarintPrimitive! { int32 : i32 : x => x } +VarintPrimitive! { int64 : i64 : x => x } +VarintPrimitive! { uint32 : u32 : x => x } +VarintPrimitive! { uint64 : u64 : x => x } +VarintPrimitive! { sint32 : i32 : x => x >> 1 ^ (-(x & 1)) } +VarintPrimitive! { sint64 : i64 : x => x >> 1 ^ (-(x & 1)) } + +impl HasOutput for DefaultInterp { + type Output = bool; +} + +impl AsyncParser for DefaultInterp { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + async move { + parse_varint::<'a, 'c, u16, BS>(input).await == 1 + } + } +} + +impl HasOutput for DefaultInterp { + type Output = [u8; 8]; +} + +impl AsyncParser for DefaultInterp { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + async move { + input.read().await + } + } + type State<'c> = impl Future; +} + +pub trait LengthDelimitedParser : HasOutput{ + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS, length: usize) -> Self::State<'c>; + type State<'c>: Future; +} + + +impl, R, BS: Readable, F: Fn(>::Output) -> Option> LengthDelimitedParser for Action { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS, length: usize) -> Self::State<'c> { + async move { + match self.1(self.0.parse(input, length).await) { + Some(a) => a, + None => reject().await, + } + } + } +} + +impl LengthDelimitedParser for DropInterp { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS, length: usize) -> Self::State<'c> { + async move { + for _ in 0..length { + let [_]: [u8; 1] = input.read().await; + } + } + } +} + +impl HasOutput for Buffer { + type Output = ArrayVec; +} + +async fn read_arrayvec_n<'a, const N: usize, BS: Readable>(input: &'a mut BS, length: usize) -> ArrayVec { + if length > N { + reject().await + } + let mut accumulator = ArrayVec::new(); + for _ in 0..length { + let [byte] = input.read().await; + accumulator.push(byte); + } + accumulator +} + +impl LengthDelimitedParser for Buffer { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS, length: usize) -> Self::State<'c> { + read_arrayvec_n(input, length) + } + type State<'c> = impl Future; +} + +impl HasOutput for Buffer { + type Output = ArrayVec; +} + +impl LengthDelimitedParser for Buffer { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS, length: usize) -> Self::State<'c> { + read_arrayvec_n(input, length) + } + type State<'c> = impl Future; +} + +impl> HasOutput> for MessageFieldInterp { + type Output = Value::Output; +} + +#[derive(Clone)] +pub struct TrackLength(pub BS, pub usize); + +impl Readable for TrackLength { + type OutFut<'a, const N: usize> = impl Future; + fn read<'a: 'b, 'b, const N: usize>(&'a mut self) -> Self::OutFut<'b, N> { + self.1 += N; + self.0.read() + } +} + +pub async fn skip_field(fmt: ProtobufWire, i: &mut BS) { + match fmt { + ProtobufWire::Varint => { skip_varint::<'_, '_, BS>(i).await } // >::parse(&DropInterp, i).await; } + ProtobufWire::Fixed64Bit => { i.read::<8>().await; } + ProtobufWire::LengthDelimited => { + let len = >::parse(&DefaultInterp, i).await; + for _ in 0..len { + i.read::<1>().await; + } + } + ProtobufWire::StartGroup => { reject().await } + ProtobufWire::EndGroup => { reject().await } + ProtobufWire::Fixed32Bit => { i.read::<4>().await; } + } +} + +pub struct BytesAsMessage>(Schema, M); + +impl> HasOutput for BytesAsMessage { + type Output = M::Output; +} + +impl, BS: Readable> LengthDelimitedParser for BytesAsMessage { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS, length: usize) -> Self::State<'c> { + self.1.parse(input, length) + } + type State<'c> = impl Future; +} + + +pub use paste::paste; + +#[macro_export] +macro_rules! define_message { + { $name:ident { $($field:ident : $schemaType:tt $(($($schemaParams:tt)*))* = $number:literal),* } } => { + define_message!{ @enrich, $name { , $($field : $schemaType$(($($schemaParams)*))* = $number),* } { } } + }; + { @enrich, $name:ident { , $field:ident : message($schemaType:tt) = $number:literal $($rest:tt)* } { $($body:tt)* } } => { + define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (LengthDelimitedParser, $schemaType, false) = $number } } + }; + { @enrich, $name:ident { , $field:ident : repeated(message($schemaType:tt)) = $number:literal $($rest:tt)* } { $($body:tt)* } } => { + define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (LengthDelimitedParser, $schemaType, true) = $number } } + }; + { @enrich, $name:ident { , $field:ident : packed($schemaType:tt) = $number:literal $($rest:tt)* } { $($body:tt)* } } => { + define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (LengthDelimitedParser, $schemaType, true) = $number } } + }; + { @enrich, $name:ident { , $field:ident : bytes = $number:literal $($rest:tt)* } { $($body:tt)* } } => { + define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (LengthDelimitedParser, Bytes, false) = $number } } + }; + { @enrich, $name:ident { , $field:ident : string = $number:literal $($rest:tt)* } { $($body:tt)* } } => { + define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (LengthDelimitedParser, String, false) = $number } } + }; + { @enrich, $name:ident { , $field:ident: enum($schemaType:ty) = $number:literal $($rest:tt)* } { $($body:tt)* } } => { + define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (AsyncParser, $schemaType, false) = $number } } + }; + { @enrich, $name:ident { , $field:ident: $schemaType:ty = $number:literal $($rest:tt)* } { $($body:tt)* } } => { + define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (AsyncParser, $schemaType, false) = $number } } + }; + { @enrich, $name:ident { } { $($body:tt)* } } => { + define_message!{ @impl $name { $($body)* } } + }; + { @impl $name:ident { , $($field:ident : ($parseTrait:ident, $schemaType:ty, $repeated:literal) = $number:literal),* } } => { + $crate::protobufs::async_parser::paste! { + pub struct [<$name Interp>]<$([]),*> { + $(pub [] : [] ),* + } + + pub struct [<$name:camel>]; + + impl<$([] : HasOutput<[<$schemaType:camel>], Output=()>),*> HasOutput<[<$name:camel>]> for [<$name:camel Interp>]<$([]),*> { + type Output = (); + } + + impl ProtobufWireFormat for [<$name:camel>] { + const FORMAT: ProtobufWire = ProtobufWire::LengthDelimited; + } + + impl] : HasOutput<[<$schemaType:camel>], Output=()> + $parseTrait<[<$schemaType:camel>], TrackLength>),*> LengthDelimitedParser<[<$name:camel>], BS> for [<$name:camel Interp>]<$([]),*> { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS, length: usize) -> Self::State<'c> { + async move { + // First, structural check: + let mut tl = TrackLength(input.clone(), 0); + loop { + // Probably should check for presence of all expected fields here as + // well. On the other hand, fields that we specify an interpretation + // for are _required_. + let tag : u32 = parse_varint(&mut tl).await; + let wire = match ProtobufWire::from_u32(tag & 0x07) { Some(w) => w, None => reject().await, }; + skip_field(wire, &mut tl).await; + if tl.1 == length { + break; + } + if tl.1 > length { + return reject().await; + } + } + $( + let mut tl = TrackLength(input.clone(), 0); + let mut seen = false; + loop { + let tag : u32 = parse_varint(&mut tl).await; + let wire = match ProtobufWire::from_u32(tag & 0x07) { Some(w) => w, None => reject().await, }; + if tag >> 3 == $number { + if wire != [<$schemaType:camel>]::FORMAT { + return reject().await; + } + define_message! { @call_parser_for, $parseTrait, tl, self.[] } + if(seen && ! $repeated) { + // Rejecting because of multiple fields on non-repeating; + // protobuf spec says we should "take the last value" but + // our flow doesn't permit this. + return reject().await; + } + seen = true; + } else { + skip_field(wire, &mut tl).await; + // Skip it + } + if tl.1 == length { + break; + } + if tl.1 >= length { + return reject().await; + } + } + )* + () + } + } + type State<'c> = impl Future; + } + } + }; + { @call_parser_for, AsyncParser, $tl:ident, $($p:tt)* } => { + $($p)*.parse(&mut $tl).await; + }; + { @call_parser_for, LengthDelimitedParser, $tl:ident, $($p:tt)* } => { { + let length : usize = parse_varint(&mut $tl).await; + $($p)*.parse(&mut $tl, length).await; + } }; +} + +#[macro_export] +macro_rules! define_enum { + { $name:ident { $($variant:ident = $number:literal),* } } => + { + $crate::protobufs::async_parser::paste! { + #[derive(FromPrimitive, PartialEq)] + #[repr(u32)] + pub enum $name { + $([<$variant:camel>] = $number),* + } + + impl HasOutput<$name> for DefaultInterp { + type Output = $name; + } + + impl ProtobufWireFormat for [<$name:camel>] { + const FORMAT: ProtobufWire = ProtobufWire::Varint; + } + + impl AsyncParser<$name, BS> for DefaultInterp { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + async move { + match $name::from_u32(parse_varint(input).await) { + None => reject().await, + Some(a) => a, + } + } + } + type State<'c> = impl Future; + } + } + } +} + +use trie_enum::TrieLookup; +pub struct StringEnum(core::marker::PhantomData); + +pub const fn string_enum() -> StringEnum { + StringEnum(core::marker::PhantomData) +} + +impl HasOutput for StringEnum { + type Output = E; +} + +impl LengthDelimitedParser for StringEnum where [(); E::N]: Sized { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS, length: usize) -> Self::State<'c> { + async move { + let mut cursor = E::start(); + for _ in 0..length { + let [c] = input.read().await; + cursor = match cursor.step(c) { + None => reject().await, + Some(cur) => cur + } + } + match cursor.get_val() { + None => reject().await, + Some(r) => *r + } + } + } +} + +pub use trie_enum::enum_trie; + +#[macro_export] +macro_rules! any_of { + { $name:ident { $($variant:ident : $schema:ident = $string:literal),* } } => + { $crate::protobufs::async_parser::paste! { + + enum_trie! { [< $name Discriminator >] { $($variant = $string),* } } + struct $name<$([< $variant:camel Interp >]),*>{ + $([<$variant:snake>]: [< $variant:camel Interp >]),* + } + + impl]: HasOutput<$schema, Output = O>),*> HasOutput for $name<$([< $variant:camel Interp >]),*> + { + type Output = O; + } + + impl]: LengthDelimitedParser<$schema, TrackLength> + HasOutput<$schema, Output = O>),*> LengthDelimitedParser for $name<$([< $variant:camel Interp >]),*> { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS, length: usize) -> Self::State<'c> { + async move { + let mut tl = TrackLength(input.clone(), 0); + let mut seen = false; + let mut discriminator = None; + loop { + let tag : u32 = parse_varint(&mut tl).await; + let wire = match ProtobufWire::from_u32(tag & 0x07) { Some(w) => w, None => reject().await, }; + if tag >> 3 == 1 { + if wire != ProtobufWire::LengthDelimited { + return reject().await; + } + if(seen) { + return reject().await; + } + seen = true; + let length : usize = parse_varint(&mut tl).await; + let discrim_parse = [<$name Discriminator>]::start(); + for _ in 0..length { + let [c] = tl.read().await; + discrim_parse.step(c); + } + discriminator = discrim_parse.get_val(); + } else { + skip_field(wire, &mut tl).await; + // Skip it + } + if tl.1 == length { + break; + } + if tl.1 >= length { + return reject().await; + } + } + + match discriminator { + None => { reject().await } + $( + Some([<$name Discriminator>]::$variant) => { + let mut tl = TrackLength(input.clone(), 0); + let mut seen = false; + loop { + let tag : u32 = parse_varint(&mut tl).await; + let wire = match ProtobufWire::from_u32(tag & 0x07) { Some(w) => w, None => reject().await, }; + if tag >> 3 == 2 { + if wire != [<$schema:camel>]::FORMAT { + return reject().await; + } + let length : usize = parse_varint(&mut tl).await; + self.[<$variant:snake>].parse(&mut tl, length).await; + if(seen) { + // Rejecting because of multiple fields on non-repeating; + // protobuf spec says we should "take the last value" but + // our flow doesn't permit this. + return reject().await; + } + seen = true; + } else { + skip_field(wire, &mut tl).await; + // Skip it + } + if tl.1 == length { + break; + } + if tl.1 >= length { + return reject().await; + } + } + } + )* + + } + + return reject().await; + + } + } + } + } } +} + +/* +// Any handler: take a list of +pub struct AnyOf(T); + +impl HasOutput for AnyOf { + type Output = (); +} + +impl LengthDelimitedParser for AnyOf { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + { + let mut tl = TrackLength(input.clone(), 0); + let mut seen = false; + loop { + let tag : u32 = parse_varint(&mut tl).await; + let wire = match ProtobufWire::from_u32(tag & 0x07) { Some(w) => w, None => reject().await, }; + if tag >> 3 == 1 { + if wire != ProtobufWire::LengthDelimited { + println!("Incorrect format: reject"); + return reject().await; + } + let length : usize = parse_varint(&mut tl).await; + + define_message! { @call_parser_for, $parseTrait, tl, self.[] } + if(seen && ! $repeated) { + // Rejecting because of multiple fields on non-repeating; + // protobuf spec says we should "take the last value" but + // our flow doesn't permit this. + return reject().await; + } + seen = true; + } else { + println!("Not current field; skipping"); + skip_field(wire, &mut tl).await; + // Skip it + } + if tl.1 == length { + break; + } + if tl.1 >= length { + println!("Message off end; rejecting."); + return reject().await; + } + } + } + + } +} +*/ + +#[cfg(test)] +mod test { + use super::*; + use core::future::Future; + pub use num_traits::FromPrimitive; + // trace_macros!(true); + // trace_macros!(false); + + #[derive(Clone)] + struct TestReadable([u8; N], usize); + impl Readable for TestReadable { + type OutFut<'a, const N: usize> = impl 'a + Future; + fn read<'a: 'b, 'b, const N: usize>(&'a mut self) -> Self::OutFut<'b, N> { + if self.1 + N <= self.0.len() { + let offset = self.1; + self.1+=N; + use core::convert::TryInto; + core::future::ready(self.0[offset..self.1].try_into().unwrap()) + } else { + panic!("Read past end of input"); + } + } + } + + use core::task::*; + + static RAW_WAKER_VTABLE : RawWakerVTable = RawWakerVTable::new(|a| RawWaker::new(a, &RAW_WAKER_VTABLE), |_| {}, |_| {}, |_| {}); + + fn poll_once(mut input: F) -> core::task::Poll { + let waker = unsafe { Waker::from_raw(RawWaker::new(&(), &RAW_WAKER_VTABLE)) }; + let mut ctxd = Context::from_waker(&waker); + let mut pinned = unsafe { core::pin::Pin::new_unchecked(&mut input) }; + pinned.as_mut().poll(&mut ctxd) + } + + #[test] + fn test_varint() { + let mut input = TestReadable([0,0,0],0); + assert_eq!(poll_once(Int32.def_parse(&mut input)), Poll::Ready(0)); + let mut input = TestReadable([255,0,0],0); + assert_eq!(poll_once(Int32.def_parse(&mut input)), Poll::Ready(127)); + let mut input = TestReadable([254,0,0],0); + assert_eq!(poll_once(Sint32.def_parse(&mut input)), Poll::Ready(63)); + let mut input = TestReadable([255,0,0],0); + assert_eq!(poll_once(Sint32.def_parse(&mut input)), Poll::Ready(-64)); + let mut input = TestReadable([0,0,0],0); + assert_eq!(poll_once(Sint32.def_parse(&mut input)), Poll::Ready(0)); + let mut input = TestReadable([1,0,0],0); + assert_eq!(poll_once(Sint32.def_parse(&mut input)), Poll::Ready(-1)); + let mut input = TestReadable([2,0,0],0); + assert_eq!(poll_once(Sint32.def_parse(&mut input)), Poll::Ready(1)); + let mut input = TestReadable([128,128,128,128,2,0,0],0); + assert_eq!(poll_once(Int32.def_parse(&mut input)), Poll::Ready(1<<(7*4+1))); + let mut input = TestReadable([128,128,128,128,2,0,0],0); + assert_eq!(poll_once(Sint32.def_parse(&mut input)), Poll::Ready((1<<(7*4+1))/2)); + } + + #[test] + fn test_bytes() { + let mut input = TestReadable([1,2,3,4,5],0); + if let Poll::Ready(res) = poll_once( as LengthDelimitedParser>>::parse(&Buffer::<10>, &mut input, 5)) { + assert_eq!(&res[..], &[1,2,3,4,5]); + } else { assert_eq!(true, false) } + } + + define_message! { OtherMessage { foo: bytes = 0 } } + define_message! { SimpleMessage { foo: message(otherMessage) = 0, bar: enum(SimpleEnum) = 1 } } + define_enum! { SimpleEnum { default = 0, noodle = 1 } } + define_message! { + SignDoc { + body_bytes: bytes = 1, + auth_info_bytes: bytes = 2, + chain_id: string = 3, + account_number: uint64 = 4 + }} + + define_message! { + Any { + type_url: string = 1, + value: bytes = 2 + } + } + define_message! { + TxBody { + messages: repeated(message(Any)) = 1, + memo: string = 2, + timeout_height: int64 = 3, + extension_options: repeated(message(Any)) = 1023 + } + } + + any_of! { + FooAnyInterp { + TxBody: TxBody = b"some.uri.here", + SignDoc: SignDoc = b"some.other.uri.here" + } + } + + #[test] + fn test_messages() { + let mut input = TestReadable([(0<<3)+2,2,0,1],0); + let cell = core::cell::RefCell::new(0); + let interp = OtherMessageInterp { + field_foo: Action(Buffer::<5>, |a: ArrayVec| { *cell.borrow_mut()+=a.len() as u64; Some(()) }), + }; + if let Poll::Ready(res) = poll_once(interp.parse(&mut input, 4)) { + assert_eq!(res, ()); + assert_eq!(cell.into_inner(), 2); + } else { assert!(false, "Failed to parse") } + + let mut input = TestReadable([(1<<3)+2,2,0,1,(2<<3)+2,0,(3<<3)+2,0,(4<<3),4],0); + let cell = core::cell::RefCell::new(0); + let interp = SignDocInterp { + field_body_bytes: Action(Buffer::<5>, |a: ArrayVec| { *cell.borrow_mut()+=a.len() as u64; Some(()) }), + field_auth_info_bytes: DropInterp, // Action(Buffer::<5>, |_| Some(())), + field_chain_id: Action(Buffer::<5>, |_| Some(())), + field_account_number: Action(DefaultInterp, |a| { *cell.borrow_mut()+=a; Some(()) }) + }; + if let Poll::Ready(res) = poll_once(interp.parse(&mut input, 10)) { + assert_eq!(res, ()); + assert_eq!(cell.into_inner(), 6); + } else { assert!(false, "Failed to parse") } + + let mut input = TestReadable([(1<<3)+2,2,0,1,(2<<3)+2,0,(3<<3)+2,0,(4<<3),4],0); + let cell = core::cell::RefCell::new(0); + let interp = BytesAsMessage(SignDoc, SignDocInterp { + field_body_bytes: Action(Buffer::<5>, |a: ArrayVec| { *cell.borrow_mut()+=a.len() as u64; Some(()) }), + field_auth_info_bytes: DropInterp, // Action(Buffer::<5>, |_| Some(())), + field_chain_id: Action(Buffer::<5>, |_| Some(())), + field_account_number: Action(DefaultInterp, |a| { *cell.borrow_mut()+=a; Some(()) }) + }); + if let Poll::Ready(res) = poll_once(interp.parse(&mut input, 10)) { + assert_eq!(res, ()); + assert_eq!(cell.into_inner(), 6); + } else { assert!(false, "Failed to parse") } + + + // Testing embedding of Message in Bytes field + let mut input = TestReadable([(1<<3)+2,2,2,0,(2<<3)+2,0,(3<<3)+2,0,(4<<3),4],0); + let cell = core::cell::RefCell::new(0); + let interp = SignDocInterp { + field_body_bytes: + BytesAsMessage(OtherMessage, OtherMessageInterp { field_foo: Action(Buffer::<5>, |_| { *cell.borrow_mut()+=5; Some(()) }) }), + field_auth_info_bytes: DropInterp, // Action(Buffer::<5>, |_| Some(())), + field_chain_id: Action(Buffer::<5>, |_| Some(())), + field_account_number: Action(DefaultInterp, |a| { *cell.borrow_mut()+=a; Some(()) }) + }; + if let Poll::Ready(res) = poll_once(interp.parse(&mut input, 10)) { + assert_eq!(res, ()); + assert_eq!(cell.into_inner(), 9); + } else { assert!(false, "Failed to parse") } + } +} + diff --git a/src/protobufs/interp.rs b/src/protobufs/interp.rs new file mode 100644 index 0000000..6452a80 --- /dev/null +++ b/src/protobufs/interp.rs @@ -0,0 +1,4 @@ + +pub struct MessageFieldInterp(pub Value); + +// Note: Reusing Product and ProductIdentity diff --git a/src/protobufs/mod.rs b/src/protobufs/mod.rs new file mode 100644 index 0000000..057de11 --- /dev/null +++ b/src/protobufs/mod.rs @@ -0,0 +1,3 @@ +pub mod async_parser; +pub mod schema; +pub mod interp; diff --git a/src/protobufs/schema.rs b/src/protobufs/schema.rs new file mode 100644 index 0000000..08a5b09 --- /dev/null +++ b/src/protobufs/schema.rs @@ -0,0 +1,112 @@ +use core::ops::Mul; + +#[derive(FromPrimitive, PartialEq, Debug)] +#[repr(u8)] +pub enum ProtobufWire { + Varint = 0, + Fixed64Bit = 1, + LengthDelimited = 2, + StartGroup = 3, // Deprecated + EndGroup = 4, // Deprecated + Fixed32Bit = 5 +} + +pub trait ProtobufWireFormat { + const FORMAT: ProtobufWire; +} + +macro_rules! VarintPrimitive { + { $name:ident } => + { $crate::protobufs::async_parser::paste! { + pub struct [<$name:camel>]; + impl ProtobufWireFormat for [<$name:camel>] { + const FORMAT: ProtobufWire = ProtobufWire::Varint; + } + } } +} + +VarintPrimitive! { int32 } +VarintPrimitive! { int64 } +VarintPrimitive! { uint32 } +VarintPrimitive! { uint64 } +VarintPrimitive! { sint32 } +VarintPrimitive! { sint64 } +VarintPrimitive! { bool } + +// Varint +// pub struct Varint; +/* +impl ProtobufWireFormat for Varint { + const FORMAT: ProtobufWire = ProtobufWire::Varint; +} +*/ + +// 64 Bit +pub struct Fixed64; +impl ProtobufWireFormat for Fixed64 { + const FORMAT: ProtobufWire = ProtobufWire::Fixed64Bit; +} + +pub struct Sfixed64; +impl ProtobufWireFormat for Sfixed64 { + const FORMAT: ProtobufWire = ProtobufWire::Fixed64Bit; +} + +pub struct Double; +impl ProtobufWireFormat for Double { + const FORMAT: ProtobufWire = ProtobufWire::Fixed64Bit; +} + +// Uses WireLengthDelimted +pub struct String; +impl ProtobufWireFormat for String { + const FORMAT: ProtobufWire = ProtobufWire::LengthDelimited; +} +pub struct Bytes; +impl ProtobufWireFormat for Bytes { + const FORMAT: ProtobufWire = ProtobufWire::LengthDelimited; +} +pub struct Embedded; +impl ProtobufWireFormat for Embedded { + const FORMAT: ProtobufWire = ProtobufWire::LengthDelimited; +} +pub struct PackedRepeatedFields; +impl ProtobufWireFormat for PackedRepeatedFields { + const FORMAT: ProtobufWire = ProtobufWire::LengthDelimited; +} + +// Wire32Bit +pub struct Fixed32; +impl ProtobufWireFormat for Fixed32 { + const FORMAT: ProtobufWire = ProtobufWire::Fixed32Bit; +} +pub struct Sfixed32; +impl ProtobufWireFormat for Sfixed32 { + const FORMAT: ProtobufWire = ProtobufWire::Fixed32Bit; +} +pub struct Float; +impl ProtobufWireFormat for Float { + const FORMAT: ProtobufWire = ProtobufWire::Fixed32Bit; +} + +// Fields of a message +// A MessageField describes a message containing a particular field with the given type. +pub struct MessageField(Value); + +// Denote the intersection of two formats. +pub struct Product(A, B); +pub struct ProductIdentity; + +impl Mul for ProductIdentity { + type Output = Product; + fn mul(self, rhs: A) -> Self::Output { + Product(self, rhs) + } +} +impl Mul for Product { + type Output = Product>; + fn mul(self, rhs: C) -> Self::Output { + Product(self.0, Product(self.1, rhs)) + } +} + diff --git a/src/schema.rs b/src/schema.rs new file mode 100644 index 0000000..a2a956a --- /dev/null +++ b/src/schema.rs @@ -0,0 +1,31 @@ +pub use crate::endianness::Endianness; + +/// A schema for a single byte. +#[derive(Default)] +pub struct Byte; + +/// A schema for a fixed `N`-item array of `I`, laid out sequentially in the input stream with no +/// prefix. +#[derive(Default)] +pub struct Array(pub I); + +/// A Dynamic length array, with a maximum length. +/// +/// Memory layout is first the schema `N`, then `N` copies of schema `I`, with a maximum `N` of +/// `M`. +pub struct DArray(pub N, pub I); + +macro_rules! number_parser { + ($p:ident, $n:expr) => { + + #[doc="Parse an `E`-endian unsigned"] + #[doc=$n] + #[doc="-bit integer"] + #[derive(Default)] + pub struct $p; + } +} + +number_parser! { U16, "16" } +number_parser! { U32, "32" } +number_parser! { U64, "64" }