From 90ae9a4de016e3473864bf94c384171b4695b6f8 Mon Sep 17 00:00:00 2001 From: "Jonathan D.K. Gibbons" Date: Fri, 22 Apr 2022 09:49:06 -0400 Subject: [PATCH 01/16] WIP: use enum-init. --- Cargo.toml | 1 + src/interp_parser.rs | 83 ++++++++++++++++++++++++++++++++++++++------ src/lib.rs | 4 +++ 3 files changed, 77 insertions(+), 11 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ac27f9e..16603fd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ generic-array = { version = "0.14.4", default-features = false } log = "0.4.14" paste = "1.0" bstringify = "0.1.2" +enum-init = { path = "../../../enum-init" } [dependencies.ledger-log] git = "https://github.com/obsidiansystems/ledger-platform" diff --git a/src/interp_parser.rs b/src/interp_parser.rs index 2540341..ad02525 100644 --- a/src/interp_parser.rs +++ b/src/interp_parser.rs @@ -45,6 +45,9 @@ pub trait InterpParser

{ type State; type Returning; fn init(&self) -> Self::State; + fn init_in_place(&self, state: *mut core::mem::MaybeUninit) { + unsafe { (*state).as_mut_ptr().write(self.init()); } + } fn parse<'a, 'b>(&self, state: &'b mut Self::State, chunk: &'a [u8], destination: &mut Option) -> ParseResult<'a>; } @@ -77,6 +80,10 @@ pub fn set_from_thunk X>(x: &mut X, f: F) { pub fn call_me_maybe Option<()>>(f: F) -> Option<()> { f() } +#[inline(never)] +pub fn call_me_maybe2 Option<()>>(f: F) -> Option<()> { + f() +} impl InterpParser for DefaultInterp { type State = ByteState; @@ -273,10 +280,18 @@ impl> InterpParser for Action >::State, Option< >::Returning>); type Returning = R; + + #[inline(never)] fn init(&self) -> Self::State { (>::init(&self.0), None) } + #[inline(never)] + fn init_in_place(&self, state: *mut core::mem::MaybeUninit) { + self.0.init_in_place(unsafe { core::ptr::addr_of_mut!((*(*state).as_mut_ptr()).0) as *mut core::mem::MaybeUninit< >::State> }); + call_fn( || unsafe { (core::ptr::addr_of_mut!((*(*state).as_mut_ptr()).1) as *mut Option< >::Returning> ).write(None)} ); + } + #[inline(never)] fn parse<'a, 'b>(&self, state: &'b mut Self::State, chunk: &'a [u8], destination: &mut Option) -> ParseResult<'a> { let new_chunk = self.0.parse(&mut state.0, chunk, &mut state.1)?; @@ -310,15 +325,17 @@ pub struct Bind(pub S, pub F); // Initially the state is the state of the first subparser, and its result location // After the first subparser runs, if it failed, then the whole bind parser will fail // but if it succeeds, then the parser state transitions to BindSecond. +#[derive(InPlaceInit)] pub enum BindState,T:InterpParser> { BindFirst(S::State, Option< >::Returning>), - BindSecond(T, T::State) + BindSecond(T, >::State) } impl, T : InterpParser> InterpParser<(A,B)> for Bind>::Returning) -> Option> { type State = BindState; type Returning = >::Returning; + #[inline(never)] fn init(&self) -> Self::State { use BindState::*; #[cfg(feature = "logging")] @@ -326,6 +343,10 @@ impl, T : InterpParser> InterpParser<(A,B)> for Bin BindFirst (>::init(&self.0), None) } + fn init_in_place(&self, state: *mut core::mem::MaybeUninit) { + Self::State::init_bind_first(state, |a| >::init_in_place(&self.0, a), |b| call_fn( || unsafe { (*b).as_mut_ptr().write(None); })); + } + #[inline(never)] fn parse<'a, 'b>(&self, state: &'b mut Self::State, chunk: &'a [u8], destination: &mut Option) -> ParseResult<'a> { use BindState::*; @@ -354,9 +375,20 @@ impl, T : InterpParser> InterpParser<(A,B)> for Bin #[derive(Clone)] pub struct DynBind(pub S, pub F); +#[derive(InPlaceInit)] +#[repr(u8)] pub enum DynBindState,T:InterpParser> { BindFirst(S::State, Option< >::Returning>), - BindSecond(T::State) + BindSecond(>::State) +} + +#[inline(never)] +fn call_fn(f: impl FnOnce()) { + f() +} +#[inline(never)] +fn call_fn2(f: impl FnOnce()) { + f() } impl, T : DynInterpParser> InterpParser<(A,B)> for DynBind @@ -364,12 +396,18 @@ impl, T : DynInterpParser { type State = DynBindState; type Returning = >::Returning; + #[inline(never)] fn init(&self) -> Self::State { use DynBindState::*; #[cfg(feature = "logging")] error!("Bind T size: {} {}", core::mem::size_of::(), core::mem::size_of::()); BindFirst (>::init(&self.0), None) } + + #[inline(never)] + fn init_in_place(&self, state: *mut core::mem::MaybeUninit) { + Self::State::init_bind_first(state, |a| call_fn2(|| >::init_in_place(&self.0, a)), |b| call_fn2(|| unsafe { (*b).as_mut_ptr().write(None); })); + } #[inline(never)] fn parse<'a, 'b>(&self, state: &'b mut Self::State, chunk: &'a [u8], destination: &mut Option) -> ParseResult<'a> { @@ -379,14 +417,28 @@ impl, T : DynInterpParser match state { BindFirst(ref mut s, ref mut r) => { cursor = self.0.parse(s, cursor, r)?; - let r_temp = core::mem::take(r); call_me_maybe(|| { - *state = BindSecond(self.1.init()); - match state { - BindSecond(ref mut s) => self.1.init_param(r_temp?, s, destination), - _ => return None, - }; + let r_temp; + if let BindFirst(_, ref mut r) = state { + r_temp = core::mem::take(r)?; + } else { + unreachable!(); + } + Self::State::init_bind_second(unsafe { core::mem::transmute(state as *mut Self::State) }, |a| call_fn(|| self.1.init_in_place(a))); + if let BindSecond(ref mut s) = state { + self.1.init_param(r_temp, s, destination); + } else { + unreachable!(); + } Some(()) + /* call_me_maybe2(|| { + // *state = BindSecond(self.1.init()); + match state { + BindSecond(ref mut s) => call_fn(|| self.1.init_param(r_temp, s, destination)), + _ => return None, + }; + Some(()) + })*/ }).ok_or((Some(OOB::Reject), cursor))?; } BindSecond(ref mut s) => { @@ -403,7 +455,8 @@ impl, T: DynInterpParser) { - *state = DynBindState::BindFirst(>::init(&self.0), None); + self.init_in_place(unsafe { core::mem::transmute(state as *mut Self::State) }); + // *state = DynBindState::BindFirst(>::init(&self.0), None); match state { DynBindState::BindFirst(ref mut s, ref mut sub_destination) => self.0.init_param(param, s, sub_destination), _ => unreachable!(), @@ -420,6 +473,7 @@ impl(), S : InterpParser> InterpParser< // Making a compromise here; if we return our sub-parser's result still wrapped in Option, we // can avoid storing it in our own state and then copying. type Returning = (X, Option<>::Returning>); + #[inline(never)] fn init(&self) -> Self::State { None } @@ -461,6 +515,7 @@ pub enum PairState { impl, B : InterpParser, C, D> InterpParser<(C, D)> for (A, B) { type State = PairState<>::State, >::State>; type Returning = (Option, Option); + #[inline(never)] fn init(&self) -> Self::State { PairState::Init } @@ -506,6 +561,7 @@ macro_rules! def_table { } */ +#[derive(InPlaceInit)] pub enum LengthFallbackParserState { Length(N, NO), Element(usize, usize, IS), @@ -586,10 +642,15 @@ impl X, N, I, S : InterpParser, X: Clone, F: Fn(&mut X, &[u8] >::Returning: Copy { type State=LengthFallbackParserState<>::State, Option<>::Returning>, >::State>; type Returning = (Option<>::Returning>, X); + #[inline(never)] fn init(&self) -> Self::State { LengthFallbackParserState::Length(>::init(&DefaultInterp), None) } #[inline(never)] + fn init_in_place(&self, state: *mut core::mem::MaybeUninit) { + Self::State::init_length(state, |a| >::init_in_place(&DefaultInterp, a), |b| call_fn( || unsafe { (*b).as_mut_ptr().write(None); })); + } + #[inline(never)] fn parse<'a, 'b>(&self, state: &'b mut Self::State, chunk: &'a [u8], destination: &mut Option) -> ParseResult<'a> { use LengthFallbackParserState::*; let mut cursor : &'a [u8] = chunk; @@ -600,11 +661,11 @@ impl X, N, I, S : InterpParser, X: Clone, F: Fn(&mut X, &[u8] let len = >::Returning>>::try_from(length_out.ok_or(rej(cursor))?).or(Err(rej(cursor)))?; match destination { None => { - call_me_maybe(|| { + /*call_me_maybe(|| { let result = self.0(); *destination = Some((None, result)); Some(()) - }).ok_or(rej(cursor))?; + }).ok_or(rej(cursor))?;*/ } _ => { } } diff --git a/src/lib.rs b/src/lib.rs index 3a1976e..6f19509 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,6 +8,10 @@ #![reexport_test_harness_main = "test_main"] #![cfg_attr(target_os="nanos", test_runner(nanos_sdk::sdk_test_runner))] +#[macro_use] +extern crate enum_init; + + //#[cfg(all(not(target_os = "linux"), test))] //use nanos_sdk::exit_app; #[cfg(all(not(target_os = "linux"), test))] From 9faaa62c8b64461011046cdc58a5dfe485fb8a18 Mon Sep 17 00:00:00 2001 From: "Jonathan D.K. Gibbons" Date: Thu, 28 Apr 2022 12:09:01 -0400 Subject: [PATCH 02/16] WIP: mixed work for pocket; split out later. --- Cargo.toml | 2 +- src/interp_parser.rs | 109 ++++++++++++++++++++++++++++++++++++++----- src/json_interp.rs | 7 +++ 3 files changed, 105 insertions(+), 13 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 16603fd..4c1d49c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ generic-array = { version = "0.14.4", default-features = false } log = "0.4.14" paste = "1.0" bstringify = "0.1.2" -enum-init = { path = "../../../enum-init" } +enum-init = { git = "https://github.com/obsidiansystems/enum-init" } [dependencies.ledger-log] git = "https://github.com/obsidiansystems/ledger-platform" diff --git a/src/interp_parser.rs b/src/interp_parser.rs index ad02525..10f721c 100644 --- a/src/interp_parser.rs +++ b/src/interp_parser.rs @@ -3,7 +3,7 @@ use crate::endianness::{Endianness, Convert}; use arrayvec::ArrayVec; #[cfg(feature = "logging")] -use ledger_log::error; +use ledger_log::{trace,error}; #[derive(PartialEq, Debug)] pub enum OOB { @@ -307,8 +307,92 @@ impl> DynInterpParser for Action) { - state.0 = >::init(&self.0); - state.1 = None; + set_from_thunk(&mut state.0, || >::init(&self.0)); + set_from_thunk(&mut state.1, || None); + self.0.init_param(param, &mut state.0, &mut state.1); + } + } + +/* This impl exists to allow the _function_ of an Action to be the target of the parameter for + * DynInterpParser, thus giving an escape hatch to thread a parameter past a non-parameterized + * parser. Whether this should still be an Action as opposed to some other name is not immediately + * clear. */ +impl, C> InterpParser for Action>::Returning, &mut Option, C) -> Option<()>> +{ + type State = ( >::State, Option< >::Returning>, Option); + type Returning = R; + + #[inline(never)] + fn init(&self) -> Self::State { + (>::init(&self.0), None, None) + } + + #[inline(never)] + fn init_in_place(&self, state: *mut core::mem::MaybeUninit) { + self.0.init_in_place(unsafe { core::ptr::addr_of_mut!((*(*state).as_mut_ptr()).0) as *mut core::mem::MaybeUninit< >::State> }); + call_fn( || unsafe { (core::ptr::addr_of_mut!((*(*state).as_mut_ptr()).1) as *mut Option< >::Returning> ).write(None)} ); + call_fn( || unsafe { (core::ptr::addr_of_mut!((*(*state).as_mut_ptr()).2) as *mut Option ).write(None)} ); + } + + #[inline(never)] + fn parse<'a, 'b>(&self, state: &'b mut Self::State, chunk: &'a [u8], destination: &mut Option) -> ParseResult<'a> { + let new_chunk = self.0.parse(&mut state.0, chunk, &mut state.1)?; + match (self.1)(state.1.as_ref().ok_or((Some(OOB::Reject),new_chunk))?, destination, core::mem::take(&mut state.2).ok_or((Some(OOB::Reject),new_chunk))?) { + None => { Err((Some(OOB::Reject),new_chunk)) } + Some(()) => { Ok(new_chunk) } + } + } +} + +impl, C> DynInterpParser for Action>::Returning, &mut Option, C) -> Option<()>> + { + type Parameter = C; + #[inline(never)] + fn init_param(&self, param: Self::Parameter, state: &mut Self::State, _destination: &mut Option) { + set_from_thunk(&mut state.0, || >::init(&self.0)); + set_from_thunk(&mut state.1, || None); + set_from_thunk(&mut state.2, || Some(param)); + } + } + +/* A MoveAction is the same as an Action with the distinction that it takes it's argument via Move, + * thus enabling it to work with types that do not have Copy or Clone and have nontrivial semantics + * involving Drop. */ +pub struct MoveAction(pub S, pub F); +impl> InterpParser for MoveAction>::Returning, &mut Option) -> Option<()>> +{ + type State = ( >::State, Option< >::Returning>); + type Returning = R; + + #[inline(never)] + fn init(&self) -> Self::State { + (>::init(&self.0), None) + } + + #[inline(never)] + fn init_in_place(&self, state: *mut core::mem::MaybeUninit) { + self.0.init_in_place(unsafe { core::ptr::addr_of_mut!((*(*state).as_mut_ptr()).0) as *mut core::mem::MaybeUninit< >::State> }); + call_fn( || unsafe { (core::ptr::addr_of_mut!((*(*state).as_mut_ptr()).1) as *mut Option< >::Returning> ).write(None)} ); + } + + #[inline(never)] + fn parse<'a, 'b>(&self, state: &'b mut Self::State, chunk: &'a [u8], destination: &mut Option) -> ParseResult<'a> { + let new_chunk = self.0.parse(&mut state.0, chunk, &mut state.1)?; + match (self.1)(core::mem::take(&mut state.1).ok_or((Some(OOB::Reject),new_chunk))?, destination) { + None => { Err((Some(OOB::Reject),new_chunk)) } + Some(()) => { Ok(new_chunk) } + } + } +} + +impl> DynInterpParser for MoveAction>::Returning, &mut Option) -> Option<()>> + { + type Parameter = S::Parameter; + #[inline(never)] + fn init_param(&self, param: Self::Parameter, state: &mut Self::State, _destination: &mut Option) { + set_from_thunk(&mut state.0, || >::init(&self.0)); + set_from_thunk(&mut state.1, || None); self.0.init_param(param, &mut state.0, &mut state.1); } } @@ -338,13 +422,13 @@ impl, T : InterpParser> InterpParser<(A,B)> for Bin #[inline(never)] fn init(&self) -> Self::State { use BindState::*; - #[cfg(feature = "logging")] - error!("Bind T size: {} {}", core::mem::size_of::(), core::mem::size_of::()); + // #[cfg(feature = "logging")] + // error!("Bind T size: {} {}", core::mem::size_of::(), core::mem::size_of::()); BindFirst (>::init(&self.0), None) } fn init_in_place(&self, state: *mut core::mem::MaybeUninit) { - Self::State::init_bind_first(state, |a| >::init_in_place(&self.0, a), |b| call_fn( || unsafe { (*b).as_mut_ptr().write(None); })); + Self::State::init_bind_first(state, |a| >::init_in_place(&self.0, a), |b| call_fn2(|| unsafe { (*b).as_mut_ptr().write(None); })); } #[inline(never)] @@ -399,8 +483,8 @@ impl, T : DynInterpParser #[inline(never)] fn init(&self) -> Self::State { use DynBindState::*; - #[cfg(feature = "logging")] - error!("Bind T size: {} {}", core::mem::size_of::(), core::mem::size_of::()); + // #[cfg(feature = "logging")] + // error!("Bind T size: {} {}", core::mem::size_of::(), core::mem::size_of::()); BindFirst (>::init(&self.0), None) } @@ -501,7 +585,7 @@ impl(), S: InterpParser> DynInterpParser) { - *destination = Some((param, None)); + *destination = Some((param.clone(), None)); *state = Some(>::init(&self.2)); } } @@ -636,7 +720,7 @@ impl> InterpParser for LengthLimited { #[derive(Clone)] pub struct ObserveLengthedBytes X, X, F, S>(pub I, pub F, pub S, pub bool); -impl X, N, I, S : InterpParser, X: Clone, F: Fn(&mut X, &[u8])->()> InterpParser> for ObserveLengthedBytes where +impl X, N, I, S : InterpParser, X, F: Fn(&mut X, &[u8])->()> InterpParser> for ObserveLengthedBytes where DefaultInterp : InterpParser, usize: TryFrom<>::Returning>, >::Returning: Copy { @@ -734,14 +818,15 @@ impl X, N, I, S : InterpParser, X: Clone, F: Fn(&mut X, &[u8] } } -impl X, N, I, S : InterpParser, X: Clone, F: Fn(&mut X, &[u8])->()> DynInterpParser> for ObserveLengthedBytes where +impl X, N, I, S : InterpParser, X, F: Fn(&mut X, &[u8])->()> DynInterpParser> for ObserveLengthedBytes where DefaultInterp : InterpParser, usize: TryFrom<>::Returning>, >::Returning: Copy { type Parameter = X; #[inline(never)] fn init_param(&self, param: Self::Parameter, state: &mut Self::State, destination: &mut Option) { - *destination = Some((None, param)); + set_from_thunk(destination, || { Some((None, param)) }); + // *destination = Some((None, param.clone())); *state = LengthFallbackParserState::Length(>::init(&DefaultInterp), None) } } diff --git a/src/json_interp.rs b/src/json_interp.rs index 2c1e03d..11fc358 100644 --- a/src/json_interp.rs +++ b/src/json_interp.rs @@ -65,6 +65,9 @@ pub trait JsonInterp { type State; type Returning; fn init(&self) -> Self::State; + fn init_in_place(&self, state: *mut core::mem::MaybeUninit) { + unsafe { (*state).as_mut_ptr().write(self.init()); } + } fn parse<'a>(&self, state: &mut Self::State, token: JsonToken<'a>, destination: &mut Option) -> Result<(), Option>; } @@ -246,6 +249,10 @@ impl> InterpParser> for Json { fn init(&self) -> Self::State { (JsonTokenizerState::Value, >::init(&self.0)) } + fn init_in_place(&self, state: *mut core::mem::MaybeUninit) { + unsafe { (core::ptr::addr_of_mut!((*(*state).as_mut_ptr()).0) as *mut JsonTokenizerState).write(JsonTokenizerState::Value); } + self.0.init_in_place(unsafe { core::ptr::addr_of_mut!((*(*state).as_mut_ptr()).0) as *mut core::mem::MaybeUninit< >::State> }); + } #[inline(never)] fn parse<'a, 'b>(&self, state: &'b mut Self::State, chunk: &'a [u8], destination: &mut Option) -> ParseResult<'a> { let mut cursor : &[u8] = chunk; From 21f44d99b135f96a15c5e4cb7f5f14095d31dc21 Mon Sep 17 00:00:00 2001 From: "Jonathan D.K. Gibbons" Date: Sat, 21 May 2022 13:48:06 -0400 Subject: [PATCH 03/16] Initial implementation of async-parser; enough for example app. --- src/async_parser.rs | 246 ++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 8 +- 2 files changed, 253 insertions(+), 1 deletion(-) create mode 100644 src/async_parser.rs diff --git a/src/async_parser.rs b/src/async_parser.rs new file mode 100644 index 0000000..8c86071 --- /dev/null +++ b/src/async_parser.rs @@ -0,0 +1,246 @@ +use crate::core_parsers::*; +use crate::interp_parser::{SubInterp,DefaultInterp,Action, ObserveBytes, DropInterp}; +use crate::endianness::{Endianness, Convert}; + +use core::future::Future; +use core::convert::TryInto; +use arrayvec::ArrayVec; + +pub fn reject() -> impl Future { + // Do some out-of-band rejection thingie + core::future::pending() +} + +pub trait Readable { + type OutFut<'a, const N: usize> : 'a + Future; + fn read<'a: 'b, 'b, const N: usize>(&'a mut self) -> Self::OutFut<'b, N>; +} + +pub trait HasOutput { + type Output; +} + +pub trait AsyncParser : HasOutput { + type State<'c>: Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c>; +} + +impl, const N: usize> HasOutput> for SubInterp { + type Output = [S::Output; N]; +} + +impl AsyncParser, ByteStream> for SubInterp where T : DefaultArray, S: AsyncParser { + type State<'c> = impl Future>::Output; N]>; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c> { + async move { + let mut accumulator = ArrayVec::<>::Output, N>::new(); + while !accumulator.is_full() { + accumulator.push(>::parse(&self.0, input).await); + } + match accumulator.take().into_inner() { + Ok(rv) => rv, + _ => reject().await, + } + } + } +} + +impl HasOutput for DefaultInterp { + type Output = u8; +} + +impl AsyncParser for DefaultInterp { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c> { + async move { + let [u] = input.read().await; + return u; + } + } +} + +pub auto trait DefaultArray { } + +impl !DefaultArray for Byte { } + + +impl HasOutput> for DefaultInterp { + type Output = [u8; N]; +} + +// We can't overlap +impl AsyncParser, ByteStream> for DefaultInterp { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c> { + async move { + input.read().await + } + } +} + + +macro_rules! number_parser { + ($p:ident, $size:expr, $t:ty) => { + impl HasOutput<$p> for DefaultInterp { + type Output = $t; + } + impl AsyncParser<$p, ByteStream> for DefaultInterp where <$p as RV>::R : Convert { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c> { + async move { + Convert::::deserialize(input.read::<$size>().await) + } + } + } + } +} +number_parser! { U16, 2, u16 } +number_parser! { U32, 4, u32 } +number_parser! { U64, 8, u64 } + +impl HasOutput> for DefaultInterp where T : DefaultArray, DefaultInterp: HasOutput { + type Output = [>::Output; N]; +} + +impl AsyncParser, ByteStream> for DefaultInterp where T : DefaultArray, DefaultInterp: AsyncParser { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c> { + as AsyncParser, ByteStream>>::parse(&SubInterp(DefaultInterp), input) + } +} + +impl, const M: usize> HasOutput> for SubInterp { + type Output = ArrayVec; +} + +impl AsyncParser, ByteStream> for SubInterp where S: AsyncParser, DefaultInterp: AsyncParser, >::Output: TryInto { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c> { + async move { + let length : usize = match DefaultInterp.parse(input).await.try_into() { + Ok(a) => a, + Err(_) => reject().await, + }; + let mut accumulator = ArrayVec::new(); + for _ in 1..length { + accumulator.push(self.0.parse(input).await); + } + accumulator + } + } +} + +impl HasOutput> for DropInterp { + type Output = (); +} + +impl AsyncParser, ByteStream> for DropInterp where DefaultInterp: AsyncParser, DefaultInterp: AsyncParser, >::Output: TryInto +, DefaultInterp: AsyncParser { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c> { + async move { + let length : usize = match >::parse(&DefaultInterp, input).await.try_into() { + Ok(a) => a, + Err(_) => reject().await, + }; + for _ in 1..length { + >::parse(&DefaultInterp, input).await; + } + } + } +} + +impl, R> HasOutput for Action>::Output) -> Option> { + type Output = R; +} + +impl, R, ByteStream: Readable> AsyncParser for Action>::Output) -> Option> { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c> { + async move { + match self.1(self.0.parse(input).await) { + Some(a) => a, + None => reject().await, + } + } + } +} + +impl HasOutput for Action) -> Option<()>> { + type Output = R; +} + +impl, ByteStream: Readable> AsyncParser for Action>::Output, &mut Option) -> Option<()>> { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c> { + async move { + let mut destination = None; + match self.1(&self.0.parse(input).await, &mut destination) { + Some(()) => { match destination { + Some(a) => a, + None => reject().await, + } } + None => reject().await, + } + } + } +} + +pub struct FAction(pub S, pub fn(O) -> Option); + +impl, R> HasOutput for FAction { + type Output = R; +} + +impl, R, ByteStream: Readable> AsyncParser for FAction>::Output, R> { // FAction>::Output) -> Option> { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c> { + async move { + match self.1(self.0.parse(input).await) { + Some(a) => a, + None => reject().await, + } + } + } +} + +impl> HasOutput for ObserveBytes { + type Output = (X, Option<>::Output>); +} + +pub struct HashIntercept(pub BS, pub X); + +impl Readable for HashIntercept { + type OutFut<'a, const N: usize> = impl core::future::Future; + fn read<'a: 'b, 'b, const N: usize>(&'a mut self) -> Self::OutFut<'b, N> { + self.0.read() + } +} + +impl>, A, ByteStream: 'static + Readable + Clone> AsyncParser for ObserveBytes { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c> { + async move { + let mut hi = HashIntercept(input.clone(), (self.0)()); + let rv = self.2.parse(&mut hi).await; + *input = hi.0; + (hi.1, Some(rv)) + } + } +} + +impl, T: HasOutput> HasOutput<(A, B)> for (S, T) { + type Output = (Option, Option); +} + + +impl, T: AsyncParser, ByteStream: Readable> AsyncParser<(A, B), ByteStream> for (S, T) { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c> { + async move { + let t = self.0.parse(input).await; + let s = self.1.parse(input).await; + (Some(t), Some(s)) + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 6f19509..d40ab32 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,7 +2,11 @@ #![allow(incomplete_features)] #![feature(bindings_after_at)] #![feature(const_generics)] - +#![feature(min_type_alias_impl_trait)] +#![feature(generic_associated_types)] +#![feature(auto_traits)] +#![feature(negative_impls)] +#![feature(member_constraints)] #![cfg_attr(all(target_os="nanos", test), no_main)] #![cfg_attr(target_os="nanos", feature(custom_test_frameworks))] #![reexport_test_harness_main = "test_main"] @@ -59,3 +63,5 @@ pub mod interp_parser; pub mod json; pub mod json_interp; + +pub mod async_parser; From 6f7c59308ccf38560ed1b71bd273b2f05bfb756c Mon Sep 17 00:00:00 2001 From: "Jonathan D.K. Gibbons" Date: Mon, 23 May 2022 17:08:50 -0400 Subject: [PATCH 04/16] Documentation! and minor backwards-compatible reorg. --- src/async_parser.rs | 106 +++++++++++++++++++++------------ src/core_parsers.rs | 14 ++--- src/interp.rs | 78 ++++++++++++++++++++++++ src/interp_parser.rs | 138 +++++++++++++++++++++++-------------------- src/lib.rs | 4 +- src/schema.rs | 31 ++++++++++ 6 files changed, 259 insertions(+), 112 deletions(-) create mode 100644 src/interp.rs create mode 100644 src/schema.rs diff --git a/src/async_parser.rs b/src/async_parser.rs index 8c86071..b6d3c7a 100644 --- a/src/async_parser.rs +++ b/src/async_parser.rs @@ -1,18 +1,32 @@ -use crate::core_parsers::*; -use crate::interp_parser::{SubInterp,DefaultInterp,Action, ObserveBytes, DropInterp}; +//! Parser implmementation using Futures. +//! +//! The main entry point of this module is [AsyncParser]. +//! +//! Note: Currently, all of the parsers in this module are implemented using async blocks, but it is +//! worth remembering that if for some reason one of them is exhibiting poor stack / state +//! behavior, we _can_ manually construct a state type and directly implement Future for that +//! state. +//! +use crate::schema::*; +use crate::interp::{SubInterp,DefaultInterp,Action, ObserveBytes, DropInterp}; use crate::endianness::{Endianness, Convert}; use core::future::Future; use core::convert::TryInto; use arrayvec::ArrayVec; +/// Reject the parse. pub fn reject() -> impl Future { // Do some out-of-band rejection thingie core::future::pending() } +/// Readable defines an interface for input to a parser. pub trait Readable { + /// Type alias for the future type of read type OutFut<'a, const N: usize> : 'a + Future; + /// read N bytes from this Readable; returns a future that will complete with a byte array of + /// the result. fn read<'a: 'b, 'b, const N: usize>(&'a mut self) -> Self::OutFut<'b, N>; } @@ -20,22 +34,29 @@ pub trait HasOutput { type Output; } -pub trait AsyncParser : HasOutput { +/// Core trait; parser that consumes a Readable according to the Schema and returns a future returning the result type. +/// +/// +pub trait AsyncParser : HasOutput { + /// Parse input, returning a future that returns this parser's return type for this schema. + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c>; + + /// Type synonym for the future returned by this parser. type State<'c>: Future; - fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c>; + } impl, const N: usize> HasOutput> for SubInterp { type Output = [S::Output; N]; } -impl AsyncParser, ByteStream> for SubInterp where T : DefaultArray, S: AsyncParser { +impl AsyncParser, BS> for SubInterp where T : DefaultArray, S: AsyncParser { type State<'c> = impl Future>::Output; N]>; - fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c> { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { async move { let mut accumulator = ArrayVec::<>::Output, N>::new(); while !accumulator.is_full() { - accumulator.push(>::parse(&self.0, input).await); + accumulator.push(>::parse(&self.0, input).await); } match accumulator.take().into_inner() { Ok(rv) => rv, @@ -49,9 +70,9 @@ impl HasOutput for DefaultInterp { type Output = u8; } -impl AsyncParser for DefaultInterp { +impl AsyncParser for DefaultInterp { type State<'c> = impl Future; - fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c> { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { async move { let [u] = input.read().await; return u; @@ -59,34 +80,40 @@ impl AsyncParser for DefaultInterp { } } +/// DefaultArray marks array element types where there is no specialization for the array type. +/// +/// This allows trait selection to operate with positive reasoning except for +/// where we implement !DefaultArray explicitly for particular types, such as Byte. pub auto trait DefaultArray { } impl !DefaultArray for Byte { } - +/// We can implement DefaultInterp for [u8; N] becuase Byte has !DefaultArray. impl HasOutput> for DefaultInterp { type Output = [u8; N]; } -// We can't overlap -impl AsyncParser, ByteStream> for DefaultInterp { +/// We can implement DefaultInterp for [u8; N] becuase Byte has !DefaultArray. +/// +/// Overlap is not permitted, but this doesn't overlap because we've disabled the default +/// implementation. +impl AsyncParser, BS> for DefaultInterp { type State<'c> = impl Future; - fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c> { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { async move { input.read().await } } } - macro_rules! number_parser { ($p:ident, $size:expr, $t:ty) => { impl HasOutput<$p> for DefaultInterp { type Output = $t; } - impl AsyncParser<$p, ByteStream> for DefaultInterp where <$p as RV>::R : Convert { + impl AsyncParser<$p, BS> for DefaultInterp where $t : Convert { type State<'c> = impl Future; - fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c> { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { async move { Convert::::deserialize(input.read::<$size>().await) } @@ -102,10 +129,10 @@ impl HasOutput> for DefaultInterp where T : Defau type Output = [>::Output; N]; } -impl AsyncParser, ByteStream> for DefaultInterp where T : DefaultArray, DefaultInterp: AsyncParser { +impl AsyncParser, BS> for DefaultInterp where T : DefaultArray, DefaultInterp: AsyncParser { type State<'c> = impl Future; - fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c> { - as AsyncParser, ByteStream>>::parse(&SubInterp(DefaultInterp), input) + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + as AsyncParser, BS>>::parse(&SubInterp(DefaultInterp), input) } } @@ -113,9 +140,9 @@ impl, const M: usize> HasOutput> for SubIn type Output = ArrayVec; } -impl AsyncParser, ByteStream> for SubInterp where S: AsyncParser, DefaultInterp: AsyncParser, >::Output: TryInto { +impl AsyncParser, BS> for SubInterp where S: AsyncParser, DefaultInterp: AsyncParser, >::Output: TryInto { type State<'c> = impl Future; - fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c> { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { async move { let length : usize = match DefaultInterp.parse(input).await.try_into() { Ok(a) => a, @@ -134,17 +161,17 @@ impl HasOutput> for DropInterp { type Output = (); } -impl AsyncParser, ByteStream> for DropInterp where DefaultInterp: AsyncParser, DefaultInterp: AsyncParser, >::Output: TryInto -, DefaultInterp: AsyncParser { +impl AsyncParser, BS> for DropInterp where DefaultInterp: AsyncParser, DefaultInterp: AsyncParser, >::Output: TryInto +, DefaultInterp: AsyncParser { type State<'c> = impl Future; - fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c> { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { async move { - let length : usize = match >::parse(&DefaultInterp, input).await.try_into() { + let length : usize = match >::parse(&DefaultInterp, input).await.try_into() { Ok(a) => a, Err(_) => reject().await, }; for _ in 1..length { - >::parse(&DefaultInterp, input).await; + >::parse(&DefaultInterp, input).await; } } } @@ -154,9 +181,9 @@ impl, R> HasOutput for Action>::O type Output = R; } -impl, R, ByteStream: Readable> AsyncParser for Action>::Output) -> Option> { +impl, R, BS: Readable> AsyncParser for Action>::Output) -> Option> { type State<'c> = impl Future; - fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c> { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { async move { match self.1(self.0.parse(input).await) { Some(a) => a, @@ -170,9 +197,9 @@ impl HasOutput for Action) -> Option<( type Output = R; } -impl, ByteStream: Readable> AsyncParser for Action>::Output, &mut Option) -> Option<()>> { +impl, BS: Readable> AsyncParser for Action>::Output, &mut Option) -> Option<()>> { type State<'c> = impl Future; - fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c> { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { async move { let mut destination = None; match self.1(&self.0.parse(input).await, &mut destination) { @@ -186,15 +213,16 @@ impl, ByteStream: Readable> AsyncParser(pub S, pub fn(O) -> Option); impl, R> HasOutput for FAction { type Output = R; } -impl, R, ByteStream: Readable> AsyncParser for FAction>::Output, R> { // FAction>::Output) -> Option> { +impl, R, BS: Readable> AsyncParser for FAction>::Output, R> { // FAction>::Output) -> Option> { type State<'c> = impl Future; - fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c> { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { async move { match self.1(self.0.parse(input).await) { Some(a) => a, @@ -208,6 +236,9 @@ impl> HasOutput for ObserveBytes { type Output = (X, Option<>::Output>); } +/// HashIntercept wraps a Readable and updates a hash-like object with the data as it passes. +/// +/// Used to support ObserveBytes for async parsers. pub struct HashIntercept(pub BS, pub X); impl Readable for HashIntercept { @@ -217,9 +248,10 @@ impl Readable for HashIntercept { } } -impl>, A, ByteStream: 'static + Readable + Clone> AsyncParser for ObserveBytes { +/// ObserveBytes for AsyncParser operates by passing a HashIntercept to the sub-parser. +impl>, A, BS: 'static + Readable + Clone> AsyncParser for ObserveBytes { type State<'c> = impl Future; - fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c> { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { async move { let mut hi = HashIntercept(input.clone(), (self.0)()); let rv = self.2.parse(&mut hi).await; @@ -233,10 +265,10 @@ impl, T: HasOutput> HasOutput<(A, B)> for (S, T) { type Output = (Option, Option); } - -impl, T: AsyncParser, ByteStream: Readable> AsyncParser<(A, B), ByteStream> for (S, T) { +/// Pairs of parsers parse the sequence of their two schemas. +impl, T: AsyncParser, BS: Readable> AsyncParser<(A, B), BS> for (S, T) { type State<'c> = impl Future; - fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut ByteStream) -> Self::State<'c> { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { async move { let t = self.0.parse(input).await; let s = self.1.parse(input).await; diff --git a/src/core_parsers.rs b/src/core_parsers.rs index dbcfd2a..1b502a0 100644 --- a/src/core_parsers.rs +++ b/src/core_parsers.rs @@ -1,24 +1,21 @@ use crate::endianness::Endianness; +pub use crate::schema::*; + // use generic_array::{ArrayLength, GenericArray}; pub trait RV { type R; } -#[derive(Default)] -pub struct Byte; impl RV for Byte { type R = u8; } -#[derive(Default)] -pub struct Array(pub I); - +/// Fixed-length array. impl< I : RV, const N : usize > RV for Array { type R = [I::R; N]; } -pub struct DArray(pub N, pub I); use arrayvec::ArrayVec; use core::convert::TryInto; @@ -31,9 +28,6 @@ impl< N : RV, I : RV, const M : usize > RV for DArray where macro_rules! number_parser { ($p:ident, $t:ty) => { - #[derive(Default)] - pub struct $p; - impl RV for $p { type R = $t; } @@ -60,6 +54,8 @@ impl< I : RV, N : RV > RV for NOf where //pub struct DArray; //pub struct Table; +/// LengthFallback; frame a subparser with a length byte. pub struct LengthFallback(pub N, pub S); +/// Alternative; schema is either A or B. pub struct Alt(pub A, pub B); diff --git a/src/interp.rs b/src/interp.rs new file mode 100644 index 0000000..b059e6c --- /dev/null +++ b/src/interp.rs @@ -0,0 +1,78 @@ +//! Generic "interpretations" for schema types. + +/// Parse the schema using the "natural" interpretation. +/// +/// usually this translates the input to an equivalent rust type or struct, for instance +/// `DefaultIntepr` on a `U32<{Endianness::Big}>` schema will produce a `u32` in the ledger's native +/// little-endian byte order, and an `Array,5>` would produce a `[u16;5]` +/// result; `DArray` becomes `ArrayVec`, etc. + +pub struct DefaultInterp; + +/// For array-like schemas; given a subparser for the item, produce a parser for the whole array. +pub struct SubInterp(pub S); + +/// Structurally checks and skips the schema in the input, but consumes only the minimum memory +/// required to do so and returns nothing. +pub struct DropInterp; + + +/// Action is essentailly an fmap that can fail. +/// +/// We _could_ constraint F to actually be an fn(..) -> Option<()> to improve error messages when +/// functions do not have the correct shape, but that reduces our ability to write different +/// instances later. +#[derive(Clone)] +pub struct Action(pub S, pub F); + +/// A MoveAction is the same as an Action with the distinction that it takes it's argument via Move, +/// thus enabling it to work with types that do not have Copy or Clone and have nontrivial semantics +/// involving Drop. +pub struct MoveAction(pub S, pub F); + +#[derive(Clone)] +/// Monadic bind. +/// +/// S is the first subparser to run +/// F is a function that returns the continuation parser to run, which can depend on the result of S +pub struct Bind(pub S, pub F); + +/// Bind, with reduced-copying parameter passing. +/// +/// This is the main consumer for DynInterpParser; rather than constructing a fully new parser F, +/// this uses a static second parser that accepts a parameter via DynInterpParser to achieve a +/// similar result. This is possibly sufficiently less flexible that calling it monadic is +/// incorrect. +#[derive(Clone)] +pub struct DynBind(pub S, pub F); + +/// ObserveBytes runs a subparser and in parallel feeds the bytes consumed by the subparser into an +/// "observer", typically a hashing algorithm. +/// +/// The first parameter is a function to build the observer, the second is the function to update +/// the observer, and S is the subparser. +#[derive(Clone)] +pub struct ObserveBytes(pub fn() -> X, pub F, pub S); + +/// Essentially SubInterp but for LengthFallback. +#[derive(Clone)] +pub struct LengthLimited { + pub bytes_limit : usize, + pub subparser : S +} + +/// ObserveLengthedBytes is notionally the composition of a LengthFallback parser with +/// ObserveBytes. +/// +/// This has the caveat that the length portion of the LengthFallback is _not_ fed to the +/// hashing function, which complicates separated implementation. +/// +/// * I is a closure to initialize the observer of the input, namely X, which is usually a hasher +/// * F is a method which does the observing for the observer. +/// * S is the parser for the input of the hasher from the raw input +/// * the bool specifies whether to hard reject if the subparser fails. +/// +/// Note that ObserveLengthedBytes also consumes a length prefix from the raw input +/// Confer: LengthFallback +#[derive(Clone)] +pub struct ObserveLengthedBytes X, X, F, S>(pub I, pub F, pub S, pub bool); diff --git a/src/interp_parser.rs b/src/interp_parser.rs index 10f721c..2a27c6e 100644 --- a/src/interp_parser.rs +++ b/src/interp_parser.rs @@ -1,10 +1,29 @@ +//! The parsers in interp_parser divide "Schema" from "Interpretation". +//! +//! The core trait is [InterpParser], and that together with [DynInterpParser] form the public +//! interface of this module. +//! +//! Schema in this context describes the structure of the input; the "one byte of length and then +//! up to ten 32-bit numbers" part. +//! +//! Interpretation is then what we want to _do_ with the input; for ledger apps, this is usually +//! "cache it for a moment to prompt", "prompt for the return values of these sub-parsers" or +//! "forget it", corresponding to DefaultInterp, Action, and DropInterp. +//! +//! Arrays of values in the input we usually don't want to cache, as that would require either a +//! limit or non-constant memory; for those we handle them piece by piece with SubInterp. use crate::core_parsers::*; use crate::endianness::{Endianness, Convert}; use arrayvec::ArrayVec; +pub use crate::interp::*; + #[cfg(feature = "logging")] use ledger_log::{trace,error}; + +/// Out-Of-Band messages; currently only Reject, but could plausibly be used to do prompts in a +/// co-routine way. #[derive(PartialEq, Debug)] pub enum OOB { // Prompt removed due to excessive memory use; we gain testability improvements if we can @@ -15,21 +34,25 @@ pub enum OOB { Reject } -// PResult stands for Partial Result -// None = Incomplete +/// PResult stands for Partial Result +/// None = Incomplete pub type PResult = Option; -// This represents the part of the input that hasn't been yet consumed by the -// parser, represented as a slice of the input. +/// This represents the part of the input that hasn't been yet consumed by the +/// parser, represented as a slice of the input. pub type RemainingSlice<'a> = &'a [u8]; -// If the parser does its job correctly, we just need to return the remaining -// slice. If the parser still needs data, it will return a None (and in that -// case the remaining slice is empty because we consumed it all). If the parser -// encounters an error condition, it will signal it in the OOB type, and we'll -// return the remaining slice for further elaboration or resuming. + +/// Return type for a parser. +/// +/// If the parser completes its job correctly, we just need to return the remaining +/// slice. If the parser still needs data, it will return a None (and in that +/// case the remaining slice is empty because we consumed it all). If the parser +/// encounters an error condition, it will signal it in the OOB type, and we'll +/// return the remaining slice for further elaboration or resuming. pub type ParseResult<'a> = Result, (PResult, RemainingSlice<'a>)>; + pub fn reject<'a, R>(chunk: &'a [u8]) -> Result, &'a [u8])> { Err((Some(OOB::Reject), chunk)) } @@ -41,6 +64,18 @@ pub fn need_more<'a, R>(chunk: &'a [u8]) -> Result, &'a [u8])> // Core trait; describes an "interpretation" of a given datatype (specified with the types from // core_parsers), which can have a variable return type and do stateful actions. +/// An InterpParser provides an actual parser between a schema `P` by interpretation mechanism +/// `Self`. +/// +/// To use, first create a variable of type `State` by `init`, and initialize a destination to +/// None; and then repeatedly call `parse` with chunks of input until the result is `Ok(())`; at +/// that point destination will be filled. destination may become non-None at any point during the +/// parse, and should be a stable location. +/// +/// For better memory usage during initialization in most cases, instead of using `init` declare a +/// `MaybeUninit` variable and use `init_in_place` to construct a state in it, then convert it to +/// `State`. + pub trait InterpParser

{ type State; type Returning; @@ -51,18 +86,16 @@ pub trait InterpParser

{ fn parse<'a, 'b>(&self, state: &'b mut Self::State, chunk: &'a [u8], destination: &mut Option) -> ParseResult<'a>; } +/// DynInterpParser extends InterpParser by adding a method to set a parameter into the parser +/// as part of initialization. +/// +/// This is mostly useful to avoid duplication of memory when implementing DynBind. + pub trait DynInterpParser

: InterpParser

{ type Parameter; fn init_param(&self, params: Self::Parameter, state: &mut Self::State, destination: &mut Option); } -pub struct DefaultInterp; - -pub struct SubInterp(pub S); - -// Structurally checks and skips the format, but consumes only the minimum memory required to do so -// and returns nothing. -pub struct DropInterp; pub struct ByteState; @@ -158,10 +191,10 @@ impl, const N : usize> InterpParser> for Sub } macro_rules! number_parser { - ($p:ident, $size:expr) => { - impl InterpParser<$p> for DefaultInterp where <$p as RV>::R : Convert { + ($p:ident, $size:expr, $t:ty) => { + impl InterpParser<$p> for DefaultInterp where $t : Convert { type State = >>::State; - type Returning = <$p as RV>::R; + type Returning = $t; fn init(&self) -> Self::State { >>::init(&DefaultInterp) } @@ -189,9 +222,9 @@ macro_rules! number_parser { } } } -number_parser! { U16, 2 } -number_parser! { U32, 4 } -number_parser! { U64, 8 } +number_parser! { U16, 2, u16 } +number_parser! { U32, 4, u32 } +number_parser! { U64, 8, u64 } pub enum ForwardDArrayParserState { Length(N), @@ -269,13 +302,6 @@ impl< N, I, const M : usize> InterpParser> for DefaultInterp whe } */ -// Action is essentailly an fmap that can fail. -// We _could_ constraint F to actually be an fn(..) -> Option<()> to improve error messages when -// functions do not have the correct shape, but that reduces our ability to write different -// instances later. -#[derive(Clone)] -pub struct Action(pub S, pub F); - impl> InterpParser for Action>::Returning, &mut Option) -> Option<()>> { type State = ( >::State, Option< >::Returning>); @@ -313,10 +339,10 @@ impl> DynInterpParser for Action, C> InterpParser for Action>::Returning, &mut Option, C) -> Option<()>> { @@ -356,10 +382,6 @@ impl, C> DynInterpParser for Action(pub S, pub F); impl> InterpParser for MoveAction>::Returning, &mut Option) -> Option<()>> { type State = ( >::State, Option< >::Returning>); @@ -401,14 +423,12 @@ fn rej<'a>(cnk: &'a [u8]) -> (PResult, RemainingSlice<'a>) { (Some(OOB::Reject), cnk) } -#[derive(Clone)] -// S is the first subparser to run -// F is the continuation parser to run, which can depend on the result of S -pub struct Bind(pub S, pub F); -// Initially the state is the state of the first subparser, and its result location -// After the first subparser runs, if it failed, then the whole bind parser will fail -// but if it succeeds, then the parser state transitions to BindSecond. +/// State for Bind +/// +/// Initially the state is the state of the first subparser, and its result location +/// After the first subparser runs, if it failed, then the whole bind parser will fail +/// but if it succeeds, then the parser state transitions to BindSecond. #[derive(InPlaceInit)] pub enum BindState,T:InterpParser> { BindFirst(S::State, Option< >::Returning>), @@ -456,9 +476,6 @@ impl, T : InterpParser> InterpParser<(A,B)> for Bin } } -#[derive(Clone)] -pub struct DynBind(pub S, pub F); - #[derive(InPlaceInit)] #[repr(u8)] pub enum DynBindState,T:InterpParser> { @@ -548,9 +565,6 @@ impl, T: DynInterpParser(pub fn() -> X, pub F, pub S); - impl(), S : InterpParser> InterpParser for ObserveBytes { type State = Option<>::State>; @@ -596,6 +610,7 @@ pub enum PairState { Second(B), } +/// Pairs of parsers parse the sequence of their schema types, and return a pair of their results. impl, B : InterpParser, C, D> InterpParser<(C, D)> for (A, B) { type State = PairState<>::State, >::State>; type Returning = (Option, Option); @@ -660,11 +675,13 @@ pub struct LengthLimitedState { } // Now define the parser type, which will resemble the mirror image of the state -#[derive(Clone)] -pub struct LengthLimited { - bytes_limit : usize, - subparser : S -} +// Defined in interp.rs: +// +// #[derive(Clone)] +// pub struct LengthLimited { +// bytes_limit : usize, +// subparser : S +//} // Implement InterpParser for the parser impl> InterpParser for LengthLimited { @@ -712,13 +729,6 @@ impl> InterpParser for LengthLimited { } } -// I is a closure to initialize the observer of the input, namely X, which is usually a hasher -// F is a method which does the observing for the observer. -// S is the parser for the input of the hasher from the raw input -// Note that ObserveLengthedBytes also consumes a length prefix from the raw input -// Confer: LengthFallback -#[derive(Clone)] -pub struct ObserveLengthedBytes X, X, F, S>(pub I, pub F, pub S, pub bool); impl X, N, I, S : InterpParser, X, F: Fn(&mut X, &[u8])->()> InterpParser> for ObserveLengthedBytes where DefaultInterp : InterpParser, @@ -913,9 +923,7 @@ mod test { } None => { panic!("Ran out of input chunks before parser accepted"); - } - } - } + } } } Ok(new_cursor) => { assert_eq!(destination.as_ref().unwrap(), result); assert_eq!(new_cursor, &[][..]); diff --git a/src/lib.rs b/src/lib.rs index d40ab32..5579b2a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -52,12 +52,14 @@ fn handle_panic(_: &PanicInfo) -> ! { exit_app(0); } +pub mod schema; +pub mod endianness; +pub mod interp; pub mod core_parsers; // pub mod forward_parser; -pub mod endianness; pub mod interp_parser; diff --git a/src/schema.rs b/src/schema.rs new file mode 100644 index 0000000..a2a956a --- /dev/null +++ b/src/schema.rs @@ -0,0 +1,31 @@ +pub use crate::endianness::Endianness; + +/// A schema for a single byte. +#[derive(Default)] +pub struct Byte; + +/// A schema for a fixed `N`-item array of `I`, laid out sequentially in the input stream with no +/// prefix. +#[derive(Default)] +pub struct Array(pub I); + +/// A Dynamic length array, with a maximum length. +/// +/// Memory layout is first the schema `N`, then `N` copies of schema `I`, with a maximum `N` of +/// `M`. +pub struct DArray(pub N, pub I); + +macro_rules! number_parser { + ($p:ident, $n:expr) => { + + #[doc="Parse an `E`-endian unsigned"] + #[doc=$n] + #[doc="-bit integer"] + #[derive(Default)] + pub struct $p; + } +} + +number_parser! { U16, "16" } +number_parser! { U32, "32" } +number_parser! { U64, "64" } From f05b7e68dcbfffa5f8fa2845edf73202c967fc24 Mon Sep 17 00:00:00 2001 From: "Jonathan D.K. Gibbons" Date: Fri, 3 Jun 2022 12:05:33 -0400 Subject: [PATCH 05/16] Initial protobufs parser. --- .cargo/config | 28 ++-- Cargo.toml | 2 + src/interp.rs | 4 + src/lib.rs | 7 + src/protobufs/async_parser.rs | 289 ++++++++++++++++++++++++++++++++++ src/protobufs/interp.rs | 4 + src/protobufs/mod.rs | 3 + src/protobufs/schema.rs | 93 +++++++++++ 8 files changed, 420 insertions(+), 10 deletions(-) create mode 100644 src/protobufs/async_parser.rs create mode 100644 src/protobufs/interp.rs create mode 100644 src/protobufs/mod.rs create mode 100644 src/protobufs/schema.rs diff --git a/.cargo/config b/.cargo/config index 463a7ac..62921b0 100644 --- a/.cargo/config +++ b/.cargo/config @@ -1,25 +1,33 @@ [unstable] -build-std = ["core"] +# build-std = ["core"] +configurable-env = true [target.thumbv6m-none-eabi] rustflags = [ "-C", "relocation-model=ropi", "-C", "link-arg=-Tscript.ld", - "-Z", "emit-stack-sizes", - "--cfg", "target_os=\"nanos\"", "-C", "opt-level=3", - "-C", "link-arg=--target=thumbv6m-none-eabi", "-C", "passes=ledger-ropi", + "-C", "codegen-units=1", + "-C", "lto", + "-C", "embed-bitcode", + "--emit=obj,llvm-ir,llvm-bc", "--cfg", "target_os=\"nanos\"", - "--emit=llvm-ir" + "-Z", "macro-backtrace" +# "-Z", "print-type-sizes", ] -linker = "clang" -runner = "speculos --display headless -k 2.0" +linker = "armv6m-unknown-none-eabi-clang" +runner = "./speculos-wrapper -k 2.0" [build] -target = "thumbv6m-none-eabi" -# target = "x86_64-unknown-linux-gnu" +# target = "thumbv6m-none-eabi" +# rustdocflags = [ "--cfg", "target_os=\"nanos\"" ] [alias] +tt = "test --features extra_debug" br = "build --release" -stackcheck = "stack-sizes --release --bin rust-app --target=thumbv6m-none-eabi" +stackcheck = "stack-sizes --release --bin pocket --target=thumbv6m-none-eabi" +unit = "test --features=speculos -Z build-std=core --lib" + +[env] +RUST_TEST_THREADS="1" diff --git a/Cargo.toml b/Cargo.toml index 4c1d49c..233ffde 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,8 @@ log = "0.4.14" paste = "1.0" bstringify = "0.1.2" enum-init = { git = "https://github.com/obsidiansystems/enum-init" } +num-traits = "0.2.15" +num-derive = "0.3.3" [dependencies.ledger-log] git = "https://github.com/obsidiansystems/ledger-platform" diff --git a/src/interp.rs b/src/interp.rs index b059e6c..f3d6573 100644 --- a/src/interp.rs +++ b/src/interp.rs @@ -76,3 +76,7 @@ pub struct LengthLimited { /// Confer: LengthFallback #[derive(Clone)] pub struct ObserveLengthedBytes X, X, F, S>(pub I, pub F, pub S, pub bool); + + +pub struct Buffer; + diff --git a/src/lib.rs b/src/lib.rs index 5579b2a..b1928c4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,10 +3,13 @@ #![feature(bindings_after_at)] #![feature(const_generics)] #![feature(min_type_alias_impl_trait)] +#![feature(min_specialization)] #![feature(generic_associated_types)] #![feature(auto_traits)] #![feature(negative_impls)] #![feature(member_constraints)] +#![feature(trace_macros)] +#![feature(log_syntax)] #![cfg_attr(all(target_os="nanos", test), no_main)] #![cfg_attr(target_os="nanos", feature(custom_test_frameworks))] #![reexport_test_harness_main = "test_main"] @@ -15,6 +18,8 @@ #[macro_use] extern crate enum_init; +#[macro_use] +extern crate num_derive; //#[cfg(all(not(target_os = "linux"), test))] //use nanos_sdk::exit_app; @@ -67,3 +72,5 @@ pub mod json; pub mod json_interp; pub mod async_parser; + +pub mod protobufs; diff --git a/src/protobufs/async_parser.rs b/src/protobufs/async_parser.rs new file mode 100644 index 0000000..36e4836 --- /dev/null +++ b/src/protobufs/async_parser.rs @@ -0,0 +1,289 @@ +// use crate::schema::*; +use crate::interp::*; +use core::future::Future; +use crate::async_parser::*; +use crate::protobufs::schema::*; +use crate::protobufs::interp::*; +use arrayvec::ArrayVec; +pub use num_traits::FromPrimitive; + +trait IsLengthDelimited { } + +struct VarInt32; + +impl HasOutput for VarInt32 { + type Output = u32; +} + +impl AsyncParser for VarInt32 { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + async move { + let mut accumulator : u32 = 0; + let mut n : u8 = 0; + loop { + let [current] : [u8; 1] = input.read().await; + if n == 4 && current > 0x0f { + reject().await + } + accumulator += (current as u32) << 7*n; + n += 1; + if current & 0x80 == 0 { + return accumulator; + } + } + } + } + type State<'c> = impl Future; +} + +impl HasOutput for DropInterp { + type Output = (); +} +impl AsyncParser for DropInterp { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + async move { + let mut n : u8 = 0; + loop { + let [current] : [u8; 1] = input.read().await; + if n == 4 && current > 0x0f { + reject().await + } + n += 1; + if current & 0x80 == 0 { + return () + } + } + } + } + type State<'c> = impl Future; +} + +impl HasOutput for DefaultInterp { + type Output = [u8; 8]; +} + +impl AsyncParser for DefaultInterp { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + async move { + input.read().await + } + } + type State<'c> = impl Future; +} + +trait LengthDelimitedParser : HasOutput{ + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS, length: usize) -> Self::State<'c>; + type State<'c>: Future; +} + +impl HasOutput for Buffer { + type Output = ArrayVec; +} + +async fn read_arrayvec_n<'a, const N: usize, BS: Readable>(input: &'a mut BS, length: usize) -> ArrayVec { + if length > N { + reject().await + } + let mut accumulator = ArrayVec::new(); + for _ in 0..length { + let [byte] = input.read().await; + accumulator.push(byte); + } + accumulator +} + +impl LengthDelimitedParser for Buffer { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS, length: usize) -> Self::State<'c> { + read_arrayvec_n(input, length) + } + type State<'c> = impl Future; +} + +impl HasOutput for Buffer { + type Output = ArrayVec; +} + +impl LengthDelimitedParser for Buffer { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS, length: usize) -> Self::State<'c> { + read_arrayvec_n(input, length) + } + type State<'c> = impl Future; +} + +impl> HasOutput> for MessageFieldInterp { + type Output = Value::Output; +} + +struct TrackLength(BS, usize); + +impl Readable for TrackLength { + type OutFut<'a, const N: usize> = impl Future; + fn read<'a: 'b, 'b, const N: usize>(&'a mut self) -> Self::OutFut<'b, N> { + self.1 += N; + self.0.read() + } +} + +pub async fn skip_field(fmt: ProtobufWire, i: &mut BS) { + match fmt { + ProtobufWire::Varint => { >::parse(&DropInterp, i).await; } + ProtobufWire::Fixed64Bit => { i.read::<8>().await; } + ProtobufWire::LengthDelimited => { + let len = >::parse(&VarInt32, i).await; + for _ in [0..len] { + i.read::<1>().await; + } + } + ProtobufWire::StartGroup => { reject().await } + ProtobufWire::EndGroup => { reject().await } + ProtobufWire::Fixed32Bit => { i.read::<4>().await; } + } +} + +pub use paste::paste; + +#[macro_export] +macro_rules! define_message { + { $name:ident { $($field:ident : $schemaType:tt $(($schemaParams:tt))* = $number:literal),* } } => { + define_message!{ @enrich, $name { , $($field : $schemaType$(($schemaParams))* = $number),* } { } } + }; + { @enrich, $name:ident { , $field:ident : message($schemaType:tt) = $number:literal $($rest:tt)* } { $($body:tt)* } } => { + define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (LengthDelimitedParser, $schemaType) = $number } } + }; + { @enrich, $name:ident { , $field:ident : packed($schemaType:tt) = $number:literal $($rest:tt)* } { $($body:tt)* } } => { + define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (LengthDelimitedParser, $schemaType) = $number } } + }; + { @enrich, $name:ident { , $field:ident : bytes = $number:literal $($rest:tt)* } { $($body:tt)* } } => { + define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (LengthDelimitedParser, Bytes) = $number } } + }; + { @enrich, $name:ident { , $field:ident : string = $number:literal $($rest:tt)* } { $($body:tt)* } } => { + define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (LengthDelimitedParser, ProtoString) = $number } } + }; + { @enrich, $name:ident { , $field:ident: enum($schemaType:ty) = $number:literal $($rest:tt)* } { $($body:tt)* } } => { + define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (AsyncParser, $schemaType) = $number } } + }; + { @enrich, $name:ident { , $field:ident: $schemaType:ty = $number:literal $($rest:tt)* } { $($body:tt)* } } => { + define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (AsyncParser, $schemaType) = $number } } + }; + { @enrich, $name:ident { } { $($body:tt)* } } => { + define_message!{ @impl $name { $($body)* } } + }; + { @impl $name:ident { , $($field:ident : ($parseTrait:ident, $schemaType:ty) = $number:literal),* } } => { + $crate::protobufs::async_parser::paste! { + pub struct [<$name Interp>]<$([]),*> { + $(pub [] : [] ),* + } + + pub struct [<$name:camel>]; + + impl<$([] : HasOutput<[<$schemaType:camel>]>),*> HasOutput<[<$name:camel>]> for [<$name:camel Interp>]<$([]),*> { + type Output = (); + } + + impl ProtobufWireFormat for [<$name:camel>] { + const FORMAT: ProtobufWire = ProtobufWire::LengthDelimited; + } + + impl] : $parseTrait<[<$schemaType:camel>], TrackLength>),*> LengthDelimitedParser<[<$name:camel>], BS> for [<$name:camel Interp>]<$([]),*> { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS, length: usize) -> Self::State<'c> { + async move { + // First, structural check: + let mut tl = TrackLength(input.clone(), 0); + loop { + // Probably should check for presence of all expected fields here as + // well. On the other hand, fields that we specify an interpretation + // for are _required_. + let tag = VarInt32.parse(&mut tl).await; + let wire = match ProtobufWire::from_u32(tag & 0x07) { Some(w) => w, None => reject().await, }; + skip_field(wire, &mut tl).await; + if tl.1 == length { + break; + } + if tl.1 > length { + return reject().await; + } + } + $( + let mut tl = TrackLength(input.clone(), 0); + loop { + let tag = VarInt32.parse(&mut tl).await; + let wire = match ProtobufWire::from_u32(tag & 0x07) { Some(w) => w, None => reject().await, }; + if tag >> 3 == $number { + if wire != [<$schemaType:camel>]::FORMAT { + return reject().await; + } + define_message! { @call_parser_for, $parseTrait, tl, self.[] } + break; + } else { + skip_field(wire, &mut tl).await; + // Skip it + } + if tl.1 >= length { + return reject().await; + } + } + )* + () + } + } + type State<'c> = impl Future; + } + } + }; + { @call_parser_for, AsyncParser, $tl:ident, $($p:tt)* } => { + $($p)*.parse(&mut $tl).await; + }; + { @call_parser_for, LengthDelimitedParser, $tl:ident, $($p:tt)* } => { { + let length = VarInt32.parse(&mut $tl).await as usize; + $($p)*.parse(&mut $tl, length).await; + } }; +} + +#[macro_export] +macro_rules! define_enum { + { $name:ident { $($variant:ident = $number:literal),* } } => + { + $crate::protobufs::async_parser::paste! { + #[derive($crate::num_derive::FromPrimitive, PartialEq)] + #[repr(u32)] + pub enum $name { + $([<$variant:camel>] = $number),* + } + + impl HasOutput<$name> for DefaultInterp { + type Output = $name; + } + + impl ProtobufWireFormat for [<$name:camel>] { + const FORMAT: ProtobufWire = ProtobufWire::Varint; + } + + impl AsyncParser<$name, BS> for DefaultInterp { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + async move { + match $name::from_u32(VarInt32.parse(input).await) { + None => reject().await, + Some(a) => a, + } + } + } + type State<'c> = impl Future; + } + } + } +} + +#[cfg(test)] +mod test { + trace_macros!(true); + define_message! { OtherMessage { foo: bytes = 0 } } + define_message! { SimpleMessage { foo: message(otherMessage) = 0, bar: enum(SimpleEnum) = 1 } } + define_enum! { SimpleEnum { default = 0, noodle = 1 } } + trace_macros!(false); + + #[test] + fn test_parser() { + + } +} + diff --git a/src/protobufs/interp.rs b/src/protobufs/interp.rs new file mode 100644 index 0000000..6452a80 --- /dev/null +++ b/src/protobufs/interp.rs @@ -0,0 +1,4 @@ + +pub struct MessageFieldInterp(pub Value); + +// Note: Reusing Product and ProductIdentity diff --git a/src/protobufs/mod.rs b/src/protobufs/mod.rs new file mode 100644 index 0000000..057de11 --- /dev/null +++ b/src/protobufs/mod.rs @@ -0,0 +1,3 @@ +pub mod async_parser; +pub mod schema; +pub mod interp; diff --git a/src/protobufs/schema.rs b/src/protobufs/schema.rs new file mode 100644 index 0000000..6849123 --- /dev/null +++ b/src/protobufs/schema.rs @@ -0,0 +1,93 @@ +use core::ops::Mul; + +#[derive(FromPrimitive, PartialEq)] +#[repr(u8)] +pub enum ProtobufWire { + Varint = 0, + Fixed64Bit = 1, + LengthDelimited = 2, + StartGroup = 3, // Deprecated + EndGroup = 4, // Deprecated + Fixed32Bit = 5 +} + +pub trait ProtobufWireFormat { + const FORMAT: ProtobufWire; +} + +// Varint +pub struct Varint; + +impl ProtobufWireFormat for Varint { + const FORMAT: ProtobufWire = ProtobufWire::Varint; +} + +// 64 Bit +pub struct Fixed64; +impl ProtobufWireFormat for Fixed64 { + const FORMAT: ProtobufWire = ProtobufWire::Fixed64Bit; +} + +pub struct SFixed64; +impl ProtobufWireFormat for SFixed64 { + const FORMAT: ProtobufWire = ProtobufWire::Fixed64Bit; +} + +pub struct Double; +impl ProtobufWireFormat for Double { + const FORMAT: ProtobufWire = ProtobufWire::Fixed64Bit; +} + +// Uses WireLengthDelimted +pub struct String; +impl ProtobufWireFormat for String { + const FORMAT: ProtobufWire = ProtobufWire::LengthDelimited; +} +pub struct Bytes; +impl ProtobufWireFormat for Bytes { + const FORMAT: ProtobufWire = ProtobufWire::LengthDelimited; +} +pub struct Embedded; +impl ProtobufWireFormat for Embedded { + const FORMAT: ProtobufWire = ProtobufWire::LengthDelimited; +} +pub struct PackedRepeatedFields; +impl ProtobufWireFormat for PackedRepeatedFields { + const FORMAT: ProtobufWire = ProtobufWire::LengthDelimited; +} + +// Wire32Bit +pub struct Fixed32; +impl ProtobufWireFormat for Fixed32 { + const FORMAT: ProtobufWire = ProtobufWire::Fixed32Bit; +} +pub struct SFixed32; +impl ProtobufWireFormat for SFixed32 { + const FORMAT: ProtobufWire = ProtobufWire::Fixed32Bit; +} +pub struct Float; +impl ProtobufWireFormat for Float { + const FORMAT: ProtobufWire = ProtobufWire::Fixed32Bit; +} + +// Fields of a message +// A MessageField describes a message containing a particular field with the given type. +pub struct MessageField(Value); + +// Denote the intersection of two formats. +pub struct Product(A, B); +pub struct ProductIdentity; + +impl Mul for ProductIdentity { + type Output = Product; + fn mul(self, rhs: A) -> Self::Output { + Product(self, rhs) + } +} +impl Mul for Product { + type Output = Product>; + fn mul(self, rhs: C) -> Self::Output { + Product(self.0, Product(self.1, rhs)) + } +} + From 713896286a01d9f6b17533622f2da454faba807e Mon Sep 17 00:00:00 2001 From: "Jonathan D.K. Gibbons" Date: Fri, 3 Jun 2022 17:01:21 -0400 Subject: [PATCH 06/16] Varint numeric types, mostly done. --- src/protobufs/async_parser.rs | 114 ++++++++++++++++++++++++++++++---- src/protobufs/schema.rs | 31 +++++++-- 2 files changed, 126 insertions(+), 19 deletions(-) diff --git a/src/protobufs/async_parser.rs b/src/protobufs/async_parser.rs index 36e4836..7f56e27 100644 --- a/src/protobufs/async_parser.rs +++ b/src/protobufs/async_parser.rs @@ -9,12 +9,91 @@ pub use num_traits::FromPrimitive; trait IsLengthDelimited { } -struct VarInt32; +fn parse_varint<'a: 'c, 'c, T, BS: Readable>(input: &'a mut BS) -> impl Future + 'c where + T: Default + core::ops::Shl + core::ops::AddAssign + core::convert::From +{ + async move { + let mut accumulator : T = Default::default(); + let mut n : u8 = 0; + loop { + let [current] : [u8; 1] = input.read().await; + // Check that adding this base-128 digit will not overflow + if 0 != ((current & 0x7f) >> (core::mem::size_of::()*8 - (7*n as usize))) { + reject().await + } + accumulator += core::convert::Into::::into(current) << core::convert::From::from(7*n as u8); + n += 1; + if current & 0x80 == 0 { + return accumulator; + } + } + } +} + +fn skip_varint<'a: 'c, 'c, BS: Readable>(input: &'a mut BS) -> impl Future + 'c where +{ + async move { + loop { + let [current] : [u8; 1] = input.read().await; + if current & 0x80 == 0 { + return (); + } + } + } +} + +macro_rules! VarintPrimitive { + { $name:ident : $returning:ty : $v:ident => $($decode:tt)* } => + { $crate::protobufs::async_parser::paste! { + impl HasOutput<[<$name:camel>]> for DefaultInterp { + type Output = $returning; + } + + impl AsyncParser<[<$name:camel>], BS> for DefaultInterp { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + async move { + let $v = parse_varint::<'a, 'c, $returning, BS>(input).await; + $($decode)* + } + } + } + impl HasOutput<[<$name:camel>]> for DropInterp { + type Output = (); + } -impl HasOutput for VarInt32 { - type Output = u32; + impl AsyncParser<[<$name:camel>], BS> for DropInterp { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + async move { + parse_varint::<'a, 'c, $returning, BS>(input).await; + } + } + } + } } } +VarintPrimitive! { int32 : i32 : x => x } +VarintPrimitive! { int64 : i64 : x => x } +VarintPrimitive! { uint32 : u32 : x => x } +VarintPrimitive! { uint64 : u64 : x => x } +VarintPrimitive! { sint32 : i32 : x => x >> 1 ^ (-(x & 1)) } +VarintPrimitive! { sint64 : i64 : x => x >> 1 ^ (-(x & 1)) } + +impl HasOutput for DefaultInterp { + type Output = bool; +} + +impl AsyncParser for DefaultInterp { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + async move { + parse_varint::<'a, 'c, u16, BS>(input).await == 1 + } + } +} + +/* impl AsyncParser for VarInt32 { fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { async move { @@ -57,6 +136,7 @@ impl AsyncParser for DropInterp { } type State<'c> = impl Future; } +*/ impl HasOutput for DefaultInterp { type Output = [u8; 8]; @@ -126,10 +206,10 @@ impl Readable for TrackLength { pub async fn skip_field(fmt: ProtobufWire, i: &mut BS) { match fmt { - ProtobufWire::Varint => { >::parse(&DropInterp, i).await; } + ProtobufWire::Varint => { skip_varint::<'_, '_, BS>(i).await } // >::parse(&DropInterp, i).await; } ProtobufWire::Fixed64Bit => { i.read::<8>().await; } ProtobufWire::LengthDelimited => { - let len = >::parse(&VarInt32, i).await; + let len = >::parse(&DefaultInterp, i).await; for _ in [0..len] { i.read::<1>().await; } @@ -157,7 +237,7 @@ macro_rules! define_message { define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (LengthDelimitedParser, Bytes) = $number } } }; { @enrich, $name:ident { , $field:ident : string = $number:literal $($rest:tt)* } { $($body:tt)* } } => { - define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (LengthDelimitedParser, ProtoString) = $number } } + define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (LengthDelimitedParser, String) = $number } } }; { @enrich, $name:ident { , $field:ident: enum($schemaType:ty) = $number:literal $($rest:tt)* } { $($body:tt)* } } => { define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (AsyncParser, $schemaType) = $number } } @@ -173,7 +253,7 @@ macro_rules! define_message { pub struct [<$name Interp>]<$([]),*> { $(pub [] : [] ),* } - + pub struct [<$name:camel>]; impl<$([] : HasOutput<[<$schemaType:camel>]>),*> HasOutput<[<$name:camel>]> for [<$name:camel Interp>]<$([]),*> { @@ -193,7 +273,7 @@ macro_rules! define_message { // Probably should check for presence of all expected fields here as // well. On the other hand, fields that we specify an interpretation // for are _required_. - let tag = VarInt32.parse(&mut tl).await; + let tag : u32 = parse_varint(&mut tl).await; let wire = match ProtobufWire::from_u32(tag & 0x07) { Some(w) => w, None => reject().await, }; skip_field(wire, &mut tl).await; if tl.1 == length { @@ -206,7 +286,7 @@ macro_rules! define_message { $( let mut tl = TrackLength(input.clone(), 0); loop { - let tag = VarInt32.parse(&mut tl).await; + let tag : u32 = parse_varint(&mut tl).await; let wire = match ProtobufWire::from_u32(tag & 0x07) { Some(w) => w, None => reject().await, }; if tag >> 3 == $number { if wire != [<$schemaType:camel>]::FORMAT { @@ -234,7 +314,7 @@ macro_rules! define_message { $($p)*.parse(&mut $tl).await; }; { @call_parser_for, LengthDelimitedParser, $tl:ident, $($p:tt)* } => { { - let length = VarInt32.parse(&mut $tl).await as usize; + let length : usize = parse_varint(&mut $tl).await; $($p)*.parse(&mut $tl, length).await; } }; } @@ -253,7 +333,7 @@ macro_rules! define_enum { impl HasOutput<$name> for DefaultInterp { type Output = $name; } - + impl ProtobufWireFormat for [<$name:camel>] { const FORMAT: ProtobufWire = ProtobufWire::Varint; } @@ -261,7 +341,7 @@ macro_rules! define_enum { impl AsyncParser<$name, BS> for DefaultInterp { fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { async move { - match $name::from_u32(VarInt32.parse(input).await) { + match $name::from_u32(Int32.parse(input).await) { None => reject().await, Some(a) => a, } @@ -273,14 +353,22 @@ macro_rules! define_enum { } } + #[cfg(test)] mod test { trace_macros!(true); define_message! { OtherMessage { foo: bytes = 0 } } define_message! { SimpleMessage { foo: message(otherMessage) = 0, bar: enum(SimpleEnum) = 1 } } define_enum! { SimpleEnum { default = 0, noodle = 1 } } + define_message! { + SignDoc { + body_bytes: bytes = 1, + auth_info_bytes: bytes = 2, + chain_id: string = 3, + account_number: uint64 = 4 + }} trace_macros!(false); - + #[test] fn test_parser() { diff --git a/src/protobufs/schema.rs b/src/protobufs/schema.rs index 6849123..3880be3 100644 --- a/src/protobufs/schema.rs +++ b/src/protobufs/schema.rs @@ -15,12 +15,31 @@ pub trait ProtobufWireFormat { const FORMAT: ProtobufWire; } -// Varint -pub struct Varint; +macro_rules! VarintPrimitive { + { $name:ident } => + { $crate::protobufs::async_parser::paste! { + pub struct [<$name:camel>]; + impl ProtobufWireFormat for [<$name:camel>] { + const FORMAT: ProtobufWire = ProtobufWire::Varint; + } + } } +} +VarintPrimitive! { int32 } +VarintPrimitive! { int64 } +VarintPrimitive! { uint32 } +VarintPrimitive! { uint64 } +VarintPrimitive! { sint32 } +VarintPrimitive! { sint64 } +VarintPrimitive! { bool } + +// Varint +// pub struct Varint; +/* impl ProtobufWireFormat for Varint { const FORMAT: ProtobufWire = ProtobufWire::Varint; } +*/ // 64 Bit pub struct Fixed64; @@ -28,8 +47,8 @@ impl ProtobufWireFormat for Fixed64 { const FORMAT: ProtobufWire = ProtobufWire::Fixed64Bit; } -pub struct SFixed64; -impl ProtobufWireFormat for SFixed64 { +pub struct Sfixed64; +impl ProtobufWireFormat for Sfixed64 { const FORMAT: ProtobufWire = ProtobufWire::Fixed64Bit; } @@ -61,8 +80,8 @@ pub struct Fixed32; impl ProtobufWireFormat for Fixed32 { const FORMAT: ProtobufWire = ProtobufWire::Fixed32Bit; } -pub struct SFixed32; -impl ProtobufWireFormat for SFixed32 { +pub struct Sfixed32; +impl ProtobufWireFormat for Sfixed32 { const FORMAT: ProtobufWire = ProtobufWire::Fixed32Bit; } pub struct Float; From 9f87d8e9dd5c60c1faa7f3f4a60533d218b9e4bc Mon Sep 17 00:00:00 2001 From: "Jonathan D.K. Gibbons" Date: Sat, 11 Jun 2022 11:10:09 -0400 Subject: [PATCH 07/16] Properly gate the logging in json_interp --- src/json_interp.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/json_interp.rs b/src/json_interp.rs index 11fc358..f1ae827 100644 --- a/src/json_interp.rs +++ b/src/json_interp.rs @@ -1011,12 +1011,14 @@ macro_rules! define_json_struct_interp { match &key[..] { $( $crate::json_interp::bstringify!($field) => { + #[cfg(feature = "logging")] trace!("json-struct-interp parser: checking key {:?}\n", core::str::from_utf8(key)); $crate::interp_parser::set_from_thunk(state, || [<$name State>]::[](<[] as JsonInterp<$schemaType>>::init(&self.[]))); } )* , _ => { + #[cfg(feature = "logging")] error!("json-struct-interp parser: Got unexpected key {:?}\n", core::str::from_utf8(key)); return Err(Some($crate::interp_parser::OOB::Reject)) } } From 86286daffc96685ba43cdb3ce0e558171921348f Mon Sep 17 00:00:00 2001 From: "Jonathan D.K. Gibbons" Date: Sat, 11 Jun 2022 11:21:35 -0400 Subject: [PATCH 08/16] Generalize the HasOutput instance for DropInterp. --- src/async_parser.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/async_parser.rs b/src/async_parser.rs index b6d3c7a..fa4565f 100644 --- a/src/async_parser.rs +++ b/src/async_parser.rs @@ -30,7 +30,7 @@ pub trait Readable { fn read<'a: 'b, 'b, const N: usize>(&'a mut self) -> Self::OutFut<'b, N>; } -pub trait HasOutput { +pub trait HasOutput { type Output; } @@ -157,7 +157,7 @@ impl AsyncParser, BS> for } } -impl HasOutput> for DropInterp { +impl HasOutput for DropInterp { type Output = (); } From 4a4314a7f16314b199730dc1d56e0a89d994bb8f Mon Sep 17 00:00:00 2001 From: "Jonathan D.K. Gibbons" Date: Sat, 11 Jun 2022 11:22:18 -0400 Subject: [PATCH 09/16] Add HasDefParser trait to ease calling default parsers. --- src/async_parser.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/async_parser.rs b/src/async_parser.rs index fa4565f..a7a62a7 100644 --- a/src/async_parser.rs +++ b/src/async_parser.rs @@ -15,6 +15,20 @@ use core::future::Future; use core::convert::TryInto; use arrayvec::ArrayVec; +pub trait HasDefParser where DefaultInterp: HasOutput { + fn def_parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c>; + + /// Type synonym for the future returned by this parser. + type State<'c>: Future>::Output>; +} + +impl HasDefParser for T where DefaultInterp: AsyncParser { + fn def_parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + >::parse(&DefaultInterp, input) + } + type State<'c> = impl Future>::Output>; +} + /// Reject the parse. pub fn reject() -> impl Future { // Do some out-of-band rejection thingie From 0c9cb455bdcd288c0115cee139e223d814d32d29 Mon Sep 17 00:00:00 2001 From: "Jonathan D.K. Gibbons" Date: Sat, 11 Jun 2022 11:29:55 -0400 Subject: [PATCH 10/16] Generalize the function bound on async Action. --- src/async_parser.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/async_parser.rs b/src/async_parser.rs index a7a62a7..f2dd168 100644 --- a/src/async_parser.rs +++ b/src/async_parser.rs @@ -191,11 +191,11 @@ impl AsyncParser, BS> for Dr } } -impl, R> HasOutput for Action>::Output) -> Option> { +impl, R, F: Fn(>::Output) -> Option> HasOutput for Action { type Output = R; } -impl, R, BS: Readable> AsyncParser for Action>::Output) -> Option> { +impl, R, F: Fn(>::Output) -> Option, BS: Readable> AsyncParser for Action { type State<'c> = impl Future; fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { async move { From c04a425eeef195ed5f768ec5e4b94d946567a5e5 Mon Sep 17 00:00:00 2001 From: "Jonathan D.K. Gibbons" Date: Sat, 11 Jun 2022 11:37:26 -0400 Subject: [PATCH 11/16] Fixes and tests for protobufs parser. --- src/protobufs/async_parser.rs | 314 ++++++++++++++++++++++++++-------- src/protobufs/schema.rs | 2 +- 2 files changed, 248 insertions(+), 68 deletions(-) diff --git a/src/protobufs/async_parser.rs b/src/protobufs/async_parser.rs index 7f56e27..3648224 100644 --- a/src/protobufs/async_parser.rs +++ b/src/protobufs/async_parser.rs @@ -18,10 +18,10 @@ fn parse_varint<'a: 'c, 'c, T, BS: Readable>(input: &'a mut BS) -> impl Future> (core::mem::size_of::()*8 - (7*n as usize))) { + if 7*n as usize > (core::mem::size_of::()-1)*8 && 0 != ((current & 0x7f) >> (core::mem::size_of::()*8 - (7*n as usize))) { reject().await } - accumulator += core::convert::Into::::into(current) << core::convert::From::from(7*n as u8); + accumulator += core::convert::Into::::into(current & 0x7f) << core::convert::From::from(7*n as u8); n += 1; if current & 0x80 == 0 { return accumulator; @@ -58,9 +58,6 @@ macro_rules! VarintPrimitive { } } } - impl HasOutput<[<$name:camel>]> for DropInterp { - type Output = (); - } impl AsyncParser<[<$name:camel>], BS> for DropInterp { type State<'c> = impl Future; @@ -93,67 +90,46 @@ impl AsyncParser for DefaultInterp { } } -/* -impl AsyncParser for VarInt32 { +impl HasOutput for DefaultInterp { + type Output = [u8; 8]; +} + +impl AsyncParser for DefaultInterp { fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { async move { - let mut accumulator : u32 = 0; - let mut n : u8 = 0; - loop { - let [current] : [u8; 1] = input.read().await; - if n == 4 && current > 0x0f { - reject().await - } - accumulator += (current as u32) << 7*n; - n += 1; - if current & 0x80 == 0 { - return accumulator; - } - } + input.read().await } } type State<'c> = impl Future; } -impl HasOutput for DropInterp { - type Output = (); +trait LengthDelimitedParser : HasOutput{ + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS, length: usize) -> Self::State<'c>; + type State<'c>: Future; } -impl AsyncParser for DropInterp { - fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + + +impl, R, BS: Readable, F: Fn(>::Output) -> Option> LengthDelimitedParser for Action { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS, length: usize) -> Self::State<'c> { async move { - let mut n : u8 = 0; - loop { - let [current] : [u8; 1] = input.read().await; - if n == 4 && current > 0x0f { - reject().await - } - n += 1; - if current & 0x80 == 0 { - return () - } + match self.1(self.0.parse(input, length).await) { + Some(a) => a, + None => reject().await, } } } - type State<'c> = impl Future; -} -*/ - -impl HasOutput for DefaultInterp { - type Output = [u8; 8]; } -impl AsyncParser for DefaultInterp { - fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { +impl LengthDelimitedParser for DropInterp { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS, length: usize) -> Self::State<'c> { async move { - input.read().await + for _ in 0..length { + let [_]: [u8; 1] = input.read().await; + } } } - type State<'c> = impl Future; -} - -trait LengthDelimitedParser : HasOutput{ - fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS, length: usize) -> Self::State<'c>; - type State<'c>: Future; } impl HasOutput for Buffer { @@ -194,6 +170,7 @@ impl(BS, usize); impl Readable for TrackLength { @@ -210,7 +187,7 @@ pub async fn skip_field(fmt: ProtobufWire, i: &mut BS) { ProtobufWire::Fixed64Bit => { i.read::<8>().await; } ProtobufWire::LengthDelimited => { let len = >::parse(&DefaultInterp, i).await; - for _ in [0..len] { + for _ in 0..len { i.read::<1>().await; } } @@ -220,35 +197,52 @@ pub async fn skip_field(fmt: ProtobufWire, i: &mut BS) { } } +pub struct BytesAsMessage>(Schema, M); + +impl> HasOutput for BytesAsMessage { + type Output = M::Output; +} + +impl, BS: Readable> LengthDelimitedParser for BytesAsMessage { + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS, length: usize) -> Self::State<'c> { + self.1.parse(input, length) + } + type State<'c> = impl Future; +} + + pub use paste::paste; #[macro_export] macro_rules! define_message { - { $name:ident { $($field:ident : $schemaType:tt $(($schemaParams:tt))* = $number:literal),* } } => { - define_message!{ @enrich, $name { , $($field : $schemaType$(($schemaParams))* = $number),* } { } } + { $name:ident { $($field:ident : $schemaType:tt $(($($schemaParams:tt)*))* = $number:literal),* } } => { + define_message!{ @enrich, $name { , $($field : $schemaType$(($($schemaParams)*))* = $number),* } { } } }; { @enrich, $name:ident { , $field:ident : message($schemaType:tt) = $number:literal $($rest:tt)* } { $($body:tt)* } } => { - define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (LengthDelimitedParser, $schemaType) = $number } } + define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (LengthDelimitedParser, $schemaType, false) = $number } } + }; + { @enrich, $name:ident { , $field:ident : repeated(message($schemaType:tt)) = $number:literal $($rest:tt)* } { $($body:tt)* } } => { + define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (LengthDelimitedParser, $schemaType, true) = $number } } }; { @enrich, $name:ident { , $field:ident : packed($schemaType:tt) = $number:literal $($rest:tt)* } { $($body:tt)* } } => { - define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (LengthDelimitedParser, $schemaType) = $number } } + define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (LengthDelimitedParser, $schemaType, true) = $number } } }; { @enrich, $name:ident { , $field:ident : bytes = $number:literal $($rest:tt)* } { $($body:tt)* } } => { - define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (LengthDelimitedParser, Bytes) = $number } } + define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (LengthDelimitedParser, Bytes, false) = $number } } }; { @enrich, $name:ident { , $field:ident : string = $number:literal $($rest:tt)* } { $($body:tt)* } } => { - define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (LengthDelimitedParser, String) = $number } } + define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (LengthDelimitedParser, String, false) = $number } } }; { @enrich, $name:ident { , $field:ident: enum($schemaType:ty) = $number:literal $($rest:tt)* } { $($body:tt)* } } => { - define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (AsyncParser, $schemaType) = $number } } + define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (AsyncParser, $schemaType, false) = $number } } }; { @enrich, $name:ident { , $field:ident: $schemaType:ty = $number:literal $($rest:tt)* } { $($body:tt)* } } => { - define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (AsyncParser, $schemaType) = $number } } + define_message!{ @enrich, $name { $($rest)* } { $($body)*, $field: (AsyncParser, $schemaType, false) = $number } } }; { @enrich, $name:ident { } { $($body:tt)* } } => { define_message!{ @impl $name { $($body)* } } }; - { @impl $name:ident { , $($field:ident : ($parseTrait:ident, $schemaType:ty) = $number:literal),* } } => { + { @impl $name:ident { , $($field:ident : ($parseTrait:ident, $schemaType:ty, $repeated:literal) = $number:literal),* } } => { $crate::protobufs::async_parser::paste! { pub struct [<$name Interp>]<$([]),*> { $(pub [] : [] ),* @@ -256,7 +250,7 @@ macro_rules! define_message { pub struct [<$name:camel>]; - impl<$([] : HasOutput<[<$schemaType:camel>]>),*> HasOutput<[<$name:camel>]> for [<$name:camel Interp>]<$([]),*> { + impl<$([] : HasOutput<[<$schemaType:camel>], Output=()>),*> HasOutput<[<$name:camel>]> for [<$name:camel Interp>]<$([]),*> { type Output = (); } @@ -264,7 +258,7 @@ macro_rules! define_message { const FORMAT: ProtobufWire = ProtobufWire::LengthDelimited; } - impl] : $parseTrait<[<$schemaType:camel>], TrackLength>),*> LengthDelimitedParser<[<$name:camel>], BS> for [<$name:camel Interp>]<$([]),*> { + impl] : HasOutput<[<$schemaType:camel>], Output=()> + $parseTrait<[<$schemaType:camel>], TrackLength>),*> LengthDelimitedParser<[<$name:camel>], BS> for [<$name:camel Interp>]<$([]),*> { fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS, length: usize) -> Self::State<'c> { async move { // First, structural check: @@ -285,6 +279,7 @@ macro_rules! define_message { } $( let mut tl = TrackLength(input.clone(), 0); + let mut seen = false; loop { let tag : u32 = parse_varint(&mut tl).await; let wire = match ProtobufWire::from_u32(tag & 0x07) { Some(w) => w, None => reject().await, }; @@ -293,11 +288,20 @@ macro_rules! define_message { return reject().await; } define_message! { @call_parser_for, $parseTrait, tl, self.[] } - break; + if(seen && ! $repeated) { + // Rejecting because of multiple fields on non-repeating; + // protobuf spec says we should "take the last value" but + // our flow doesn't permit this. + return reject().await; + } + seen = true; } else { skip_field(wire, &mut tl).await; // Skip it } + if tl.1 == length { + break; + } if tl.1 >= length { return reject().await; } @@ -341,7 +345,7 @@ macro_rules! define_enum { impl AsyncParser<$name, BS> for DefaultInterp { fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { async move { - match $name::from_u32(Int32.parse(input).await) { + match $name::from_u32(parse_varint(input).await) { None => reject().await, Some(a) => a, } @@ -353,10 +357,122 @@ macro_rules! define_enum { } } +/* +// Any handler: take a list of +pub struct AnyOf(T); + +impl HasOutput for AnyOf { + type Output = (); +} + +impl LengthDelimitedParser for AnyOf { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS) -> Self::State<'c> { + { + let mut tl = TrackLength(input.clone(), 0); + let mut seen = false; + loop { + let tag : u32 = parse_varint(&mut tl).await; + let wire = match ProtobufWire::from_u32(tag & 0x07) { Some(w) => w, None => reject().await, }; + if tag >> 3 == 1 { + if wire != ProtobufWire::LengthDelimited { + println!("Incorrect format: reject"); + return reject().await; + } + let length : usize = parse_varint(&mut tl).await; + + define_message! { @call_parser_for, $parseTrait, tl, self.[] } + if(seen && ! $repeated) { + // Rejecting because of multiple fields on non-repeating; + // protobuf spec says we should "take the last value" but + // our flow doesn't permit this. + return reject().await; + } + seen = true; + } else { + println!("Not current field; skipping"); + skip_field(wire, &mut tl).await; + // Skip it + } + if tl.1 == length { + break; + } + if tl.1 >= length { + println!("Message off end; rejecting."); + return reject().await; + } + } + } + + } +} +*/ #[cfg(test)] mod test { - trace_macros!(true); + use super::*; + use core::future::Future; + pub use num_traits::FromPrimitive; + // trace_macros!(true); + // trace_macros!(false); + + #[derive(Clone)] + struct TestReadable([u8; N], usize); + impl Readable for TestReadable { + type OutFut<'a, const N: usize> = impl 'a + Future; + fn read<'a: 'b, 'b, const N: usize>(&'a mut self) -> Self::OutFut<'b, N> { + if self.1 + N <= self.0.len() { + let offset = self.1; + self.1+=N; + use core::convert::TryInto; + core::future::ready(self.0[offset..self.1].try_into().unwrap()) + } else { + panic!("Read past end of input"); + } + } + } + + use core::task::*; + + static RAW_WAKER_VTABLE : RawWakerVTable = RawWakerVTable::new(|a| RawWaker::new(a, &RAW_WAKER_VTABLE), |_| {}, |_| {}, |_| {}); + + fn poll_once(mut input: F) -> core::task::Poll { + let waker = unsafe { Waker::from_raw(RawWaker::new(&(), &RAW_WAKER_VTABLE)) }; + let mut ctxd = Context::from_waker(&waker); + let mut pinned = unsafe { core::pin::Pin::new_unchecked(&mut input) }; + pinned.as_mut().poll(&mut ctxd) + } + + #[test] + fn test_varint() { + let mut input = TestReadable([0,0,0],0); + assert_eq!(poll_once(Int32.def_parse(&mut input)), Poll::Ready(0)); + let mut input = TestReadable([255,0,0],0); + assert_eq!(poll_once(Int32.def_parse(&mut input)), Poll::Ready(127)); + let mut input = TestReadable([254,0,0],0); + assert_eq!(poll_once(Sint32.def_parse(&mut input)), Poll::Ready(63)); + let mut input = TestReadable([255,0,0],0); + assert_eq!(poll_once(Sint32.def_parse(&mut input)), Poll::Ready(-64)); + let mut input = TestReadable([0,0,0],0); + assert_eq!(poll_once(Sint32.def_parse(&mut input)), Poll::Ready(0)); + let mut input = TestReadable([1,0,0],0); + assert_eq!(poll_once(Sint32.def_parse(&mut input)), Poll::Ready(-1)); + let mut input = TestReadable([2,0,0],0); + assert_eq!(poll_once(Sint32.def_parse(&mut input)), Poll::Ready(1)); + let mut input = TestReadable([128,128,128,128,2,0,0],0); + assert_eq!(poll_once(Int32.def_parse(&mut input)), Poll::Ready(1<<(7*4+1))); + let mut input = TestReadable([128,128,128,128,2,0,0],0); + assert_eq!(poll_once(Sint32.def_parse(&mut input)), Poll::Ready((1<<(7*4+1))/2)); + } + + #[test] + fn test_bytes() { + let mut input = TestReadable([1,2,3,4,5],0); + if let Poll::Ready(res) = poll_once( as LengthDelimitedParser>>::parse(&Buffer::<10>, &mut input, 5)) { + assert_eq!(&res[..], &[1,2,3,4,5]); + } else { assert_eq!(true, false) } + } + define_message! { OtherMessage { foo: bytes = 0 } } define_message! { SimpleMessage { foo: message(otherMessage) = 0, bar: enum(SimpleEnum) = 1 } } define_enum! { SimpleEnum { default = 0, noodle = 1 } } @@ -367,11 +483,75 @@ mod test { chain_id: string = 3, account_number: uint64 = 4 }} - trace_macros!(false); - #[test] - fn test_parser() { + define_message! { + Any { + type_url: string = 1, + value: bytes = 2 + } + } + define_message! { + TxBody { + messages: repeated(message(Any)) = 1, + memo: string = 2, + timeout_height: int64 = 3, + extension_options: repeated(message(Any)) = 1023 + } + } + #[test] + fn test_messages() { + let mut input = TestReadable([(0<<3)+2,2,0,1],0); + let cell = core::cell::RefCell::new(0); + let interp = OtherMessageInterp { + field_foo: Action(Buffer::<5>, |a: ArrayVec| { *cell.borrow_mut()+=a.len() as u64; Some(()) }), + }; + if let Poll::Ready(res) = poll_once(interp.parse(&mut input, 4)) { + assert_eq!(res, ()); + assert_eq!(cell.into_inner(), 2); + } else { assert!(false, "Failed to parse") } + + let mut input = TestReadable([(1<<3)+2,2,0,1,(2<<3)+2,0,(3<<3)+2,0,(4<<3),4],0); + let cell = core::cell::RefCell::new(0); + let interp = SignDocInterp { + field_body_bytes: Action(Buffer::<5>, |a: ArrayVec| { *cell.borrow_mut()+=a.len() as u64; Some(()) }), + field_auth_info_bytes: DropInterp, // Action(Buffer::<5>, |_| Some(())), + field_chain_id: Action(Buffer::<5>, |_| Some(())), + field_account_number: Action(DefaultInterp, |a| { *cell.borrow_mut()+=a; Some(()) }) + }; + if let Poll::Ready(res) = poll_once(interp.parse(&mut input, 10)) { + assert_eq!(res, ()); + assert_eq!(cell.into_inner(), 6); + } else { assert!(false, "Failed to parse") } + + let mut input = TestReadable([(1<<3)+2,2,0,1,(2<<3)+2,0,(3<<3)+2,0,(4<<3),4],0); + let cell = core::cell::RefCell::new(0); + let interp = BytesAsMessage(SignDoc, SignDocInterp { + field_body_bytes: Action(Buffer::<5>, |a: ArrayVec| { *cell.borrow_mut()+=a.len() as u64; Some(()) }), + field_auth_info_bytes: DropInterp, // Action(Buffer::<5>, |_| Some(())), + field_chain_id: Action(Buffer::<5>, |_| Some(())), + field_account_number: Action(DefaultInterp, |a| { *cell.borrow_mut()+=a; Some(()) }) + }); + if let Poll::Ready(res) = poll_once(interp.parse(&mut input, 10)) { + assert_eq!(res, ()); + assert_eq!(cell.into_inner(), 6); + } else { assert!(false, "Failed to parse") } + + + // Testing embedding of Message in Bytes field + let mut input = TestReadable([(1<<3)+2,2,2,0,(2<<3)+2,0,(3<<3)+2,0,(4<<3),4],0); + let cell = core::cell::RefCell::new(0); + let interp = SignDocInterp { + field_body_bytes: + BytesAsMessage(OtherMessage, OtherMessageInterp { field_foo: Action(Buffer::<5>, |_| { *cell.borrow_mut()+=5; Some(()) }) }), + field_auth_info_bytes: DropInterp, // Action(Buffer::<5>, |_| Some(())), + field_chain_id: Action(Buffer::<5>, |_| Some(())), + field_account_number: Action(DefaultInterp, |a| { *cell.borrow_mut()+=a; Some(()) }) + }; + if let Poll::Ready(res) = poll_once(interp.parse(&mut input, 10)) { + assert_eq!(res, ()); + assert_eq!(cell.into_inner(), 9); + } else { assert!(false, "Failed to parse") } } } diff --git a/src/protobufs/schema.rs b/src/protobufs/schema.rs index 3880be3..08a5b09 100644 --- a/src/protobufs/schema.rs +++ b/src/protobufs/schema.rs @@ -1,6 +1,6 @@ use core::ops::Mul; -#[derive(FromPrimitive, PartialEq)] +#[derive(FromPrimitive, PartialEq, Debug)] #[repr(u8)] pub enum ProtobufWire { Varint = 0, From 7601fe773b172ff2700c2d6b100bc8ca257a7d70 Mon Sep 17 00:00:00 2001 From: "Jonathan D.K. Gibbons" Date: Mon, 13 Jun 2022 13:36:49 -0400 Subject: [PATCH 12/16] Add any_of macro to handle Any messages in protobuf. This allows mapping a set of handlers to the different (known) possible URIs in an Any. --- Cargo.toml | 1 + src/lib.rs | 2 + src/protobufs/async_parser.rs | 138 ++++++++++++++++++++++++++++++++++ 3 files changed, 141 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 233ffde..cf14ec6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,7 @@ bstringify = "0.1.2" enum-init = { git = "https://github.com/obsidiansystems/enum-init" } num-traits = "0.2.15" num-derive = "0.3.3" +trie-enum = { git = "https://github.com/ea-nasir/trie-enum" } [dependencies.ledger-log] git = "https://github.com/obsidiansystems/ledger-platform" diff --git a/src/lib.rs b/src/lib.rs index b1928c4..2e5106f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,8 @@ #![allow(incomplete_features)] #![feature(bindings_after_at)] #![feature(const_generics)] +#![feature(const_evaluatable_checked)] +#![feature(const_fn_trait_bound)] #![feature(min_type_alias_impl_trait)] #![feature(min_specialization)] #![feature(generic_associated_types)] diff --git a/src/protobufs/async_parser.rs b/src/protobufs/async_parser.rs index 3648224..eaae256 100644 --- a/src/protobufs/async_parser.rs +++ b/src/protobufs/async_parser.rs @@ -357,6 +357,137 @@ macro_rules! define_enum { } } +use trie_enum::TrieLookup; +pub struct StringEnum(core::marker::PhantomData); + +pub const fn string_enum() -> StringEnum { + StringEnum(core::marker::PhantomData) +} + +impl HasOutput for StringEnum { + type Output = E; +} + +impl LengthDelimitedParser for StringEnum where [(); E::N]: Sized { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS, length: usize) -> Self::State<'c> { + async move { + let mut cursor = E::start(); + for _ in 0..length { + let [c] = input.read().await; + cursor = match cursor.step(c) { + None => reject().await, + Some(cur) => cur + } + } + match cursor.get_val() { + None => reject().await, + Some(r) => *r + } + } + } +} + +pub use trie_enum::enum_trie; + +#[macro_export] +macro_rules! any_of { + { $name:ident { $($variant:ident : $schema:ident = $string:literal),* } } => + { $crate::protobufs::async_parser::paste! { + + enum_trie! { [< $name Discriminator >] { $($variant = $string),* } } + struct $name<$([< $variant:camel Interp >]),*>{ + $([<$variant:snake>]: [< $variant:camel Interp >]),* + } + + impl]: HasOutput<$schema, Output = O>),*> HasOutput for $name<$([< $variant:camel Interp >]),*> + { + type Output = O; + } + + impl]: LengthDelimitedParser<$schema, TrackLength> + HasOutput<$schema, Output = O>),*> LengthDelimitedParser for $name<$([< $variant:camel Interp >]),*> { + type State<'c> = impl Future; + fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS, length: usize) -> Self::State<'c> { + async move { + let mut tl = TrackLength(input.clone(), 0); + let mut seen = false; + let mut discriminator = None; + loop { + let tag : u32 = parse_varint(&mut tl).await; + let wire = match ProtobufWire::from_u32(tag & 0x07) { Some(w) => w, None => reject().await, }; + if tag >> 3 == 1 { + if wire != ProtobufWire::LengthDelimited { + return reject().await; + } + if(seen) { + return reject().await; + } + seen = true; + let length : usize = parse_varint(&mut tl).await; + let discrim_parse = [<$name Discriminator>]::start(); + for _ in 0..length { + let [c] = tl.read().await; + discrim_parse.step(c); + } + discriminator = discrim_parse.get_val(); + } else { + skip_field(wire, &mut tl).await; + // Skip it + } + if tl.1 == length { + break; + } + if tl.1 >= length { + return reject().await; + } + } + + match discriminator { + None => { reject().await } + $( + Some([<$name Discriminator>]::$variant) => { + let mut tl = TrackLength(input.clone(), 0); + let mut seen = false; + loop { + let tag : u32 = parse_varint(&mut tl).await; + let wire = match ProtobufWire::from_u32(tag & 0x07) { Some(w) => w, None => reject().await, }; + if tag >> 3 == 2 { + if wire != [<$schema:camel>]::FORMAT { + return reject().await; + } + let length : usize = parse_varint(&mut tl).await; + self.[<$variant:snake>].parse(&mut tl, length).await; + if(seen) { + // Rejecting because of multiple fields on non-repeating; + // protobuf spec says we should "take the last value" but + // our flow doesn't permit this. + return reject().await; + } + seen = true; + } else { + skip_field(wire, &mut tl).await; + // Skip it + } + if tl.1 == length { + break; + } + if tl.1 >= length { + return reject().await; + } + } + } + )* + + } + + return reject().await; + + } + } + } + } } +} + /* // Any handler: take a list of pub struct AnyOf(T); @@ -499,6 +630,13 @@ mod test { } } + any_of! { + FooAnyInterp { + TxBody: TxBody = b"some.uri.here", + SignDoc: SignDoc = b"some.other.uri.here" + } + } + #[test] fn test_messages() { let mut input = TestReadable([(0<<3)+2,2,0,1],0); From 5170723710647d918d9c961f28e6d482dc71c3d5 Mon Sep 17 00:00:00 2001 From: Rosario Pulella Date: Wed, 15 Jun 2022 22:44:37 -0400 Subject: [PATCH 13/16] Make public parse_varint, LengthDelimitedParser, TrackLength --- src/protobufs/async_parser.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/protobufs/async_parser.rs b/src/protobufs/async_parser.rs index eaae256..a9e77d0 100644 --- a/src/protobufs/async_parser.rs +++ b/src/protobufs/async_parser.rs @@ -9,7 +9,7 @@ pub use num_traits::FromPrimitive; trait IsLengthDelimited { } -fn parse_varint<'a: 'c, 'c, T, BS: Readable>(input: &'a mut BS) -> impl Future + 'c where +pub fn parse_varint<'a: 'c, 'c, T, BS: Readable>(input: &'a mut BS) -> impl Future + 'c where T: Default + core::ops::Shl + core::ops::AddAssign + core::convert::From { async move { @@ -103,7 +103,7 @@ impl AsyncParser for DefaultInterp { type State<'c> = impl Future; } -trait LengthDelimitedParser : HasOutput{ +pub trait LengthDelimitedParser : HasOutput{ fn parse<'a: 'c, 'b: 'c, 'c>(&'b self, input: &'a mut BS, length: usize) -> Self::State<'c>; type State<'c>: Future; } @@ -171,7 +171,7 @@ impl(BS, usize); +pub struct TrackLength(pub BS, pub usize); impl Readable for TrackLength { type OutFut<'a, const N: usize> = impl Future; From 930531efd8ecb63c09ad3b4eaddf1b4ddc3220f2 Mon Sep 17 00:00:00 2001 From: Rosario Pulella Date: Tue, 28 Jun 2022 20:42:14 -0400 Subject: [PATCH 14/16] Align num-traits dep with ledger-nano-sdk --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index cf14ec6..26e6f01 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ log = "0.4.14" paste = "1.0" bstringify = "0.1.2" enum-init = { git = "https://github.com/obsidiansystems/enum-init" } -num-traits = "0.2.15" +num-traits = { version = "0.2.14", default-features = false } num-derive = "0.3.3" trie-enum = { git = "https://github.com/ea-nasir/trie-enum" } From cfbd9ec2141155afabf6cfd79cbd65103e945e0c Mon Sep 17 00:00:00 2001 From: Rosario Pulella Date: Tue, 12 Jul 2022 17:55:47 -0400 Subject: [PATCH 15/16] Reference FromPrimitive directly in macro output This will require uses to add ``` #[macro_use] extern create num_derive; ``` to the top-level of there project. --- src/protobufs/async_parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/protobufs/async_parser.rs b/src/protobufs/async_parser.rs index a9e77d0..4bb7e73 100644 --- a/src/protobufs/async_parser.rs +++ b/src/protobufs/async_parser.rs @@ -328,7 +328,7 @@ macro_rules! define_enum { { $name:ident { $($variant:ident = $number:literal),* } } => { $crate::protobufs::async_parser::paste! { - #[derive($crate::num_derive::FromPrimitive, PartialEq)] + #[derive(FromPrimitive, PartialEq)] #[repr(u32)] pub enum $name { $([<$variant:camel>] = $number),* From bd7f1b377704c2d1f96657c36d143e20fb5c40b7 Mon Sep 17 00:00:00 2001 From: Rosario Pulella Date: Thu, 14 Jul 2022 13:54:48 -0400 Subject: [PATCH 16/16] Update ledger-log dep to ea-nasir/ledger-log --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 26e6f01..2beda60 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,7 +24,7 @@ num-derive = "0.3.3" trie-enum = { git = "https://github.com/ea-nasir/trie-enum" } [dependencies.ledger-log] -git = "https://github.com/obsidiansystems/ledger-platform" +git = "https://github.com/ea-nasir/ledger-log" features = ["log_trace"] optional = true