diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index 46a97ee2..df0c91cb 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -175,7 +175,6 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { fn value_body(&self) -> IonResult<&'top [u8]> { let value_total_length = self.encoded_value.total_length(); if self.input.len() < value_total_length { - eprintln!("[value_body] Incomplete {:?}", self); return IonResult::incomplete( "only part of the requested value is available in the buffer", self.input.offset(), diff --git a/src/lazy/binary/raw/value.rs b/src/lazy/binary/raw/value.rs index 78cc85b6..aac56176 100644 --- a/src/lazy/binary/raw/value.rs +++ b/src/lazy/binary/raw/value.rs @@ -231,7 +231,7 @@ impl<'a, 'top> EncodedBinaryValueData_1_0<'a, 'top> { pub fn trailing_length_span(&self) -> Span<'top> { let stream_range = self.trailing_length_range(); let offset = self.value.input.offset(); - let local_range = stream_range.start - offset .. stream_range.end - offset; + let local_range = stream_range.start - offset..stream_range.end - offset; let bytes = &self.value.input.bytes()[local_range]; Span::with_offset(stream_range.start, bytes) } @@ -252,7 +252,7 @@ impl<'a, 'top> EncodedBinaryValueData_1_0<'a, 'top> { pub fn body_span(&self) -> Span<'top> { let stream_range = self.body_range(); let offset = self.value.input.offset(); - let local_range = stream_range.start - offset .. stream_range.end - offset; + let local_range = stream_range.start - offset..stream_range.end - offset; let bytes = &self.span().bytes()[local_range]; Span::with_offset(stream_range.start, bytes) } diff --git a/src/lazy/decoder.rs b/src/lazy/decoder.rs index 3e85295f..2b57e021 100644 --- a/src/lazy/decoder.rs +++ b/src/lazy/decoder.rs @@ -269,7 +269,7 @@ impl<'top, D: LazyDecoder> HasRange for LazyRawFieldExpr<'top, D> { // function while also preventing users from seeing or depending on it. pub(crate) mod private { use crate::lazy::expanded::r#struct::UnexpandedField; - use crate::lazy::expanded::EncodingContext; + use crate::lazy::expanded::EncodingContextRef; use crate::IonResult; use super::{LazyDecoder, LazyRawFieldExpr, LazyRawStruct}; @@ -286,15 +286,21 @@ pub(crate) mod private { /// expansion process. fn unexpanded_fields( &self, - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, ) -> RawStructUnexpandedFieldsIterator<'top, D>; } pub struct RawStructUnexpandedFieldsIterator<'top, D: LazyDecoder> { - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, raw_fields: as LazyRawStruct<'top, D>>::Iterator, } + impl<'top, D: LazyDecoder> RawStructUnexpandedFieldsIterator<'top, D> { + pub fn context(&self) -> EncodingContextRef<'top> { + self.context + } + } + impl<'top, D: LazyDecoder> Iterator for RawStructUnexpandedFieldsIterator<'top, D> { type Item = IonResult>; @@ -320,7 +326,7 @@ pub(crate) mod private { { fn unexpanded_fields( &self, - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, ) -> RawStructUnexpandedFieldsIterator<'top, D> { let raw_fields = >::iter(self); RawStructUnexpandedFieldsIterator { diff --git a/src/lazy/encoding.rs b/src/lazy/encoding.rs index c7145267..bb155614 100644 --- a/src/lazy/encoding.rs +++ b/src/lazy/encoding.rs @@ -29,7 +29,7 @@ use crate::lazy::text::raw::v1_1::reader::{ }; use crate::lazy::text::value::{ LazyRawTextValue, LazyRawTextValue_1_0, LazyRawTextValue_1_1, LazyRawTextVersionMarker_1_0, - LazyRawTextVersionMarker_1_1, MatchedRawTextValue, RawTextAnnotationsIterator, + LazyRawTextVersionMarker_1_1, RawTextAnnotationsIterator, }; use crate::{TextKind, WriteConfig}; @@ -100,26 +100,16 @@ pub trait BinaryEncoding<'top>: Encoding + LazyDecoder {} /// Marker trait for text encodings. pub trait TextEncoding<'top>: - Encoding + LazyDecoder = RawTextAnnotationsIterator<'top>> + Encoding + + LazyDecoder< + AnnotationsIterator<'top> = RawTextAnnotationsIterator<'top>, + Value<'top> = LazyRawTextValue<'top, Self>, + > { - fn value_from_matched( - matched: MatchedRawTextValue<'top, Self>, - ) -> ::Value<'top>; -} -impl<'top> TextEncoding<'top> for TextEncoding_1_0 { - fn value_from_matched( - matched: MatchedRawTextValue<'_, Self>, - ) -> ::Value<'_> { - LazyRawTextValue_1_0::from(matched) - } -} -impl<'top> TextEncoding<'top> for TextEncoding_1_1 { - fn value_from_matched( - matched: MatchedRawTextValue<'_, Self>, - ) -> ::Value<'_> { - LazyRawTextValue_1_1::from(matched) - } + // No methods, just a marker } +impl<'top> TextEncoding<'top> for TextEncoding_1_0 {} +impl<'top> TextEncoding<'top> for TextEncoding_1_1 {} /// Marker trait for encodings that support macros. pub trait EncodingWithMacroSupport {} @@ -192,7 +182,6 @@ impl LazyDecoder for BinaryEncoding_1_1 { // the implementation will conflict with the core `impl From for T` implementation. pub trait RawValueLiteral {} -impl<'top, E: TextEncoding<'top>> RawValueLiteral for MatchedRawTextValue<'top, E> {} impl<'top, E: TextEncoding<'top>> RawValueLiteral for LazyRawTextValue<'top, E> {} impl<'top> RawValueLiteral for LazyRawBinaryValue_1_0<'top> {} impl<'top> RawValueLiteral for LazyRawBinaryValue_1_1<'top> {} diff --git a/src/lazy/expanded/compiler.rs b/src/lazy/expanded/compiler.rs index c22f3312..758342cc 100644 --- a/src/lazy/expanded/compiler.rs +++ b/src/lazy/expanded/compiler.rs @@ -9,7 +9,7 @@ use crate::lazy::expanded::template::{ TemplateBodyMacroInvocation, TemplateBodyValueExpr, TemplateMacro, TemplateStructIndex, TemplateValue, }; -use crate::lazy::expanded::EncodingContext; +use crate::lazy::expanded::EncodingContextRef; use crate::lazy::r#struct::LazyStruct; use crate::lazy::reader::LazyTextReader_1_1; use crate::lazy::sequence::{LazyList, LazySExp}; @@ -61,7 +61,7 @@ impl TemplateCompiler { /// to the template without interpretation. `(quote ...)` does not appear in the compiled /// template as there is nothing more for it to do at expansion time. pub fn compile_from_text( - context: EncodingContext, + context: EncodingContextRef, expression: &str, ) -> IonResult { // TODO: This is a rudimentary implementation that panics instead of performing thorough @@ -137,7 +137,7 @@ impl TemplateCompiler { /// /// If `is_quoted` is true, nested symbols and s-expressions will not be interpreted. fn compile_value<'top, D: LazyDecoder>( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, signature: &MacroSignature, definition: &mut TemplateBody, is_quoted: bool, @@ -210,7 +210,7 @@ impl TemplateCompiler { /// Helper method for visiting all of the child expressions in a list. fn compile_list<'top, D: LazyDecoder>( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, signature: &MacroSignature, definition: &mut TemplateBody, is_quoted: bool, @@ -238,7 +238,7 @@ impl TemplateCompiler { /// Helper method for visiting all of the child expressions in a sexp. fn compile_sexp<'top, D: LazyDecoder>( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, signature: &MacroSignature, definition: &mut TemplateBody, is_quoted: bool, @@ -272,7 +272,7 @@ impl TemplateCompiler { /// Adds a `lazy_sexp` that has been determined to represent a macro invocation to the /// TemplateBody. fn compile_macro<'top, D: LazyDecoder>( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, signature: &MacroSignature, definition: &mut TemplateBody, lazy_sexp: LazySExp<'top, D>, @@ -311,7 +311,7 @@ impl TemplateCompiler { /// Given a `LazyValue` that represents a macro ID (name or address), attempts to resolve the /// ID to a macro address. fn name_and_address_from_id_expr<'top, D: LazyDecoder>( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, id_expr: Option>>, ) -> IonResult<(Option, usize)> { match id_expr { @@ -352,7 +352,7 @@ impl TemplateCompiler { /// without interpretation. `lazy_sexp` itself is the `quote` macro, and does not get added /// to the template body as there is nothing more for it to do at evaluation time. fn compile_quoted_elements<'top, D: LazyDecoder>( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, signature: &MacroSignature, definition: &mut TemplateBody, lazy_sexp: LazySExp<'top, D>, @@ -375,7 +375,7 @@ impl TemplateCompiler { /// Adds `lazy_sexp` to the template body without interpretation. fn compile_quoted_sexp<'top, D: LazyDecoder>( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, signature: &MacroSignature, definition: &mut TemplateBody, annotations_range: Range, @@ -419,7 +419,7 @@ impl TemplateCompiler { /// Recursively adds all of the expressions in `lazy_struct` to the `TemplateBody`. fn compile_struct<'top, D: LazyDecoder>( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, signature: &MacroSignature, definition: &mut TemplateBody, is_quoted: bool, @@ -476,7 +476,7 @@ impl TemplateCompiler { /// Resolves `variable` to a parameter in the macro signature and adds a corresponding /// `TemplateExpansionStep` to the `TemplateBody`. fn compile_variable_reference( - _context: EncodingContext, + _context: EncodingContextRef, signature: &MacroSignature, definition: &mut TemplateBody, annotations_range: Range, @@ -497,7 +497,10 @@ impl TemplateCompiler { .ok_or_else(|| { IonError::decoding_error(format!("variable '{name}' is not recognized")) })?; - definition.push_variable(signature_index); + if signature_index > u16::MAX as usize { + return IonResult::decoding_error("this implementation supports up to 65K parameters"); + } + definition.push_variable(signature_index as u16); Ok(()) } } @@ -558,7 +561,7 @@ mod tests { definition, index, TemplateBodyValueExpr::Variable(TemplateBodyVariableReference::new( - expected_signature_index, + expected_signature_index as u16, )), ) } @@ -630,7 +633,7 @@ mod tests { let expression = "(macro foo () 42)"; - let template = TemplateCompiler::compile_from_text(context, expression)?; + let template = TemplateCompiler::compile_from_text(context.get_ref(), expression)?; assert_eq!(template.name(), "foo"); assert_eq!(template.signature().parameters().len(), 0); expect_value(&template, 0, TemplateValue::Int(42.into()))?; @@ -644,7 +647,7 @@ mod tests { let expression = "(macro foo () [1, 2, 3])"; - let template = TemplateCompiler::compile_from_text(context, expression)?; + let template = TemplateCompiler::compile_from_text(context.get_ref(), expression)?; assert_eq!(template.name(), "foo"); assert_eq!(template.signature().parameters().len(), 0); expect_value(&template, 0, TemplateValue::List(ExprRange::new(1..4)))?; @@ -661,7 +664,7 @@ mod tests { let expression = r#"(macro foo () (values 42 "hello" false))"#; - let template = TemplateCompiler::compile_from_text(context, expression)?; + let template = TemplateCompiler::compile_from_text(context.get_ref(), expression)?; assert_eq!(template.name(), "foo"); assert_eq!(template.signature().parameters().len(), 0); expect_macro( @@ -683,7 +686,7 @@ mod tests { let expression = "(macro foo (x y z) [100, [200, a::b::300], x, {y: [true, false, z]}])"; - let template = TemplateCompiler::compile_from_text(context, expression)?; + let template = TemplateCompiler::compile_from_text(context.get_ref(), expression)?; expect_value(&template, 0, TemplateValue::List(ExprRange::new(1..12)))?; expect_value(&template, 1, TemplateValue::Int(Int::from(100)))?; expect_value(&template, 2, TemplateValue::List(ExprRange::new(3..5)))?; @@ -713,7 +716,7 @@ mod tests { let expression = "(macro identity (x) x)"; - let template = TemplateCompiler::compile_from_text(context, expression)?; + let template = TemplateCompiler::compile_from_text(context.get_ref(), expression)?; assert_eq!(template.name(), "identity"); assert_eq!(template.signature().parameters().len(), 1); expect_variable(&template, 0, 0)?; @@ -736,7 +739,7 @@ mod tests { (values x)))) "#; - let template = TemplateCompiler::compile_from_text(context, expression)?; + let template = TemplateCompiler::compile_from_text(context.get_ref(), expression)?; assert_eq!(template.name(), "foo"); assert_eq!(template.signature().parameters().len(), 1); // Outer `values` diff --git a/src/lazy/expanded/e_expression.rs b/src/lazy/expanded/e_expression.rs index d3235696..0ef6c1c9 100644 --- a/src/lazy/expanded/e_expression.rs +++ b/src/lazy/expanded/e_expression.rs @@ -5,7 +5,7 @@ use crate::lazy::decoder::{LazyDecoder, LazyRawValueExpr}; use crate::lazy::encoding::TextEncoding_1_1; use crate::lazy::expanded::macro_evaluator::{MacroExpr, RawEExpression, ValueExpr}; use crate::lazy::expanded::macro_table::MacroRef; -use crate::lazy::expanded::{EncodingContext, LazyExpandedValue}; +use crate::lazy::expanded::{EncodingContextRef, LazyExpandedValue}; use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::IonResult; use std::fmt::{Debug, Formatter}; @@ -13,12 +13,15 @@ use std::fmt::{Debug, Formatter}; /// An e-expression (in Ion format `D`) that has been resolved in the current encoding context. #[derive(Copy, Clone)] pub struct EExpression<'top, D: LazyDecoder> { - pub(crate) context: EncodingContext<'top>, + pub(crate) context: EncodingContextRef<'top>, pub(crate) raw_invocation: D::EExp<'top>, pub(crate) invoked_macro: MacroRef<'top>, } impl<'top, D: LazyDecoder> EExpression<'top, D> { + pub fn context(&self) -> EncodingContextRef<'top> { + self.context + } pub fn raw_invocation(&self) -> D::EExp<'top> { self.raw_invocation } @@ -35,7 +38,7 @@ impl<'top, D: LazyDecoder> Debug for EExpression<'top, D> { impl<'top, D: LazyDecoder> EExpression<'top, D> { pub fn new( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, raw_invocation: D::EExp<'top>, invoked_macro: MacroRef<'top>, ) -> Self { @@ -67,7 +70,7 @@ impl<'top, D: LazyDecoder> From> for MacroExpr<'top, D> { } pub struct EExpressionArgsIterator<'top, D: LazyDecoder> { - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, raw_args: as RawEExpression<'top, D>>::RawArgumentsIterator<'top>, } diff --git a/src/lazy/expanded/macro_evaluator.rs b/src/lazy/expanded/macro_evaluator.rs index 1da20e1b..3a3d0ac8 100644 --- a/src/lazy/expanded/macro_evaluator.rs +++ b/src/lazy/expanded/macro_evaluator.rs @@ -24,8 +24,8 @@ use crate::lazy::expanded::template::{ TemplateBodyValueExpr, TemplateBodyVariableReference, TemplateElement, TemplateMacroInvocation, TemplateMacroInvocationArgsIterator, TemplateMacroRef, TemplateValue, }; -use crate::lazy::expanded::EncodingContext; -use crate::lazy::expanded::{ExpandedValueRef, ExpandedValueSource, LazyExpandedValue}; +use crate::lazy::expanded::EncodingContextRef; +use crate::lazy::expanded::{ExpandedValueRef, LazyExpandedValue}; use crate::lazy::str_ref::StrRef; use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::result::IonFailure; @@ -51,7 +51,7 @@ pub trait RawEExpression<'top, D: LazyDecoder = Self>>: /// If the lookup is successful, returns an `Ok` containing a resolved `EExpression` that holds /// a reference to the macro being invoked. /// If the ID cannot be found in the `EncodingContext`, returns `Err`. - fn resolve(self, context: EncodingContext<'top>) -> IonResult> { + fn resolve(self, context: EncodingContextRef<'top>) -> IonResult> { let invoked_macro = context .macro_table .macro_with_id(self.id()) @@ -97,6 +97,13 @@ impl<'top, D: LazyDecoder> MacroExpr<'top, D> { MacroExpr::EExp(e) => e.invoked_macro(), } } + + fn context(&self) -> EncodingContextRef<'top> { + match self { + MacroExpr::TemplateMacro(t) => t.context(), + MacroExpr::EExp(e) => e.context(), + } + } } pub enum MacroExprArgsKind<'top, D: LazyDecoder> { @@ -117,6 +124,13 @@ impl<'top, D: LazyDecoder> Iterator for MacroExprArgsIterator<'top, D> { MacroExprArgsKind::EExp(e) => e.next(), } } + + fn size_hint(&self) -> (usize, Option) { + match &self.source { + MacroExprArgsKind::Macro(m) => m.size_hint(), + MacroExprArgsKind::EExp(e) => e.size_hint(), + } + } } /// A single expression appearing in argument position within a macro invocation. @@ -205,12 +219,9 @@ impl<'top, D: LazyDecoder> MacroExpansion<'top, D> { /// * produces another value. /// * encounters another macro or variable that needs to be expanded. /// * is completed. - fn next( - &mut self, - context: EncodingContext<'top>, - environment: Environment<'top, D>, - ) -> IonResult>> { + fn next(&mut self, environment: Environment<'top, D>) -> IonResult>> { use MacroExpansionKind::*; + let context = self.invocation.context(); // Delegate the call to `next()` based on the macro kind. match &mut self.kind { MakeString(make_string_expansion) => make_string_expansion.next(context, environment), @@ -239,8 +250,6 @@ pub type EnvironmentStack<'top, D> = BumpVec<'top, Environment<'top, D>>; /// For eager evaluation, use [`MacroEvaluator::evaluate`], which returns an iterator that will /// yield the expanded values. pub struct MacroEvaluator<'top, D: LazyDecoder> { - // Holds references to the macro table, symbol table, and bump allocator. - context: EncodingContext<'top>, // A stack with the most recent macro invocations at the top. This stack grows each time a macro // of any kind begins evaluation. macro_stack: MacroStack<'top, D>, @@ -259,14 +268,13 @@ pub struct MacroEvaluator<'top, D: LazyDecoder> { } impl<'top, D: LazyDecoder> MacroEvaluator<'top, D> { - pub fn new(context: EncodingContext<'top>, environment: Environment<'top, D>) -> Self { + pub fn new(context: EncodingContextRef<'top>, environment: Environment<'top, D>) -> Self { let macro_stack = BumpVec::new_in(context.allocator); let mut env_stack = BumpVec::new_in(context.allocator); env_stack.push(environment); Self { macro_stack, env_stack, - context, } } @@ -292,7 +300,14 @@ impl<'top, D: LazyDecoder> MacroEvaluator<'top, D> { &mut self, invocation: MacroExpr<'top, D>, ) -> IonResult> { - let mut args = BumpVec::new_in(self.context.allocator); + // Get an allocator reference from the `env_stack` BumpVec. + let allocator = self.env_stack.bump(); + let args_iter = invocation.arguments(self.environment()); + // Use the iterator's size hint to determine an initial capacity to aim for. + let num_args_hint = args_iter.size_hint(); + let capacity_hint = num_args_hint.1.unwrap_or(num_args_hint.0); + let mut args = BumpVec::with_capacity_in(capacity_hint, allocator); + for arg in invocation.arguments(self.environment()) { args.push(arg?); } @@ -394,7 +409,7 @@ impl<'top, D: LazyDecoder> MacroEvaluator<'top, D> { // Ask that expansion to continue its evaluation by one step. use ValueExpr::*; - match current_expansion.next(self.context, environment)? { + match current_expansion.next(environment)? { // If we get a value, return it to the caller. Some(ValueLiteral(value)) => { return Ok(Some(value)); @@ -505,7 +520,7 @@ impl<'top, D: LazyDecoder> ValuesExpansion<'top, D> { /// Yields the next [`ValueExpr`] in this macro's evaluation. pub fn next( &mut self, - _context: EncodingContext<'top>, + _context: EncodingContextRef<'top>, _environment: Environment<'top, D>, ) -> IonResult>> { // We visit the argument expressions in the invocation in order from left to right. @@ -549,7 +564,7 @@ impl<'top, D: LazyDecoder> MakeStringExpansion<'top, D> { /// Yields the next [`ValueExpr`] in this `make_string` macro's evaluation. pub fn next( &mut self, - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, environment: Environment<'top, D>, ) -> IonResult>> { // `make_string` always produces a single value. Once that value has been returned, it needs @@ -592,16 +607,14 @@ impl<'top, D: LazyDecoder> MakeStringExpansion<'top, D> { static EMPTY_ANNOTATIONS: &[&str] = &[]; self.is_complete = true; - Ok(Some(ValueExpr::ValueLiteral(LazyExpandedValue { - context, - source: ExpandedValueSource::Constructed(EMPTY_ANNOTATIONS, expanded_value_ref), - variable: None, - }))) + Ok(Some(ValueExpr::ValueLiteral( + LazyExpandedValue::from_constructed(context, EMPTY_ANNOTATIONS, expanded_value_ref), + ))) } /// Appends a string fragment to the `BumpString` being constructed. fn append_expanded_raw_text_value( - context: EncodingContext<'_>, + context: EncodingContextRef<'_>, buffer: &mut BumpString, value: ExpandedValueRef<'_, D>, ) -> IonResult<()> { @@ -656,7 +669,7 @@ impl<'top> TemplateExpansion<'top> { fn next<'data: 'top, D: LazyDecoder>( &mut self, - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, environment: Environment<'top, D>, ) -> IonResult>> { let value_expr = match self.template.body().expressions().get(self.step_index) { diff --git a/src/lazy/expanded/mod.rs b/src/lazy/expanded/mod.rs index 4e67bb48..38cad505 100644 --- a/src/lazy/expanded/mod.rs +++ b/src/lazy/expanded/mod.rs @@ -4,7 +4,7 @@ //! data, replacing the word `Raw` with the word `Expanded` in the type name. //! //! The expanded types expose largely the same API, with some key differences: -//! 1. Most method invocations require an [`EncodingContext`] to be specified, giving the +//! 1. Most method invocations require an [`EncodingContextRef`] to be specified, giving the //! evaluator access to the necessary macro definitions and the symbol table. //! 2. All macro invocations encountered in the raw layer are fully expanded, meaning that //! values surfaced by calls to `next()` on readers/iterators may be the result of macro @@ -35,6 +35,7 @@ use std::cell::{Cell, UnsafeCell}; use std::fmt::{Debug, Formatter}; use std::iter::empty; +use std::ops::Deref; use bumpalo::Bump as BumpAllocator; @@ -77,7 +78,7 @@ pub mod r#struct; pub mod template; /// A collection of resources that can be used to encode or decode Ion values. -/// The `'top` lifetime associated with the [`EncodingContext`] reflects the fact that it can only +/// The `'top` lifetime associated with the [`EncodingContextRef`] reflects the fact that it can only /// be used as long as the reader is positioned on the same top level expression (i.e. the symbol and /// macro tables are guaranteed not to change). // It should be possible to loosen this definition of `'top` to include several top level values @@ -105,6 +106,29 @@ impl<'top> EncodingContext<'top> { allocator, } } + + pub fn get_ref(&'top self) -> EncodingContextRef<'top> { + EncodingContextRef { context: self } + } +} + +#[derive(Debug, Copy, Clone)] +pub struct EncodingContextRef<'top> { + context: &'top EncodingContext<'top>, +} + +impl<'top> EncodingContextRef<'top> { + pub fn new(context: &'top EncodingContext<'top>) -> Self { + Self { context } + } +} + +impl<'top> Deref for EncodingContextRef<'top> { + type Target = EncodingContext<'top>; + + fn deref(&self) -> &Self::Target { + self.context + } } #[derive(Debug)] @@ -211,8 +235,9 @@ impl LazyExpandingReader IonResult { + let context = self.context(); let template_macro: TemplateMacro = - { TemplateCompiler::compile_from_text(self.context(), template_definition)? }; + { TemplateCompiler::compile_from_text(context.get_ref(), template_definition)? }; let macro_table = self.macro_table.get_mut(); macro_table.add_macro(template_macro) @@ -361,6 +386,7 @@ impl LazyExpandingReader LazyExpandingReader return Ok(SystemStreamItem::VersionMarker(marker)), // We got our value; return it. Value(raw_value) => { - let value = LazyExpandedValue { - source: ExpandedValueSource::ValueLiteral(raw_value), - context: self.context(), - variable: None, - }; + let value = LazyExpandedValue::from_literal(context_ref, raw_value); return self.interpret_value(value); } // It's another macro invocation, we'll start evaluating it. EExpression(e_exp) => { let context = self.context(); - let resolved_e_exp = e_exp.resolve(context)?; + let resolved_e_exp = e_exp.resolve(context_ref)?; // Get the current evaluator or make a new one let evaluator = match self.evaluator_ptr.get() { // If there's already an evaluator, dereference the pointer. @@ -388,7 +410,9 @@ impl LazyExpandingReader context .allocator // E-expressions always have an empty environment - .alloc_with(move || MacroEvaluator::new(context, Environment::empty())), + .alloc_with(move || { + MacroEvaluator::new(context_ref, Environment::empty()) + }), }; // Push the invocation onto the evaluation stack. evaluator.push(resolved_e_exp)?; @@ -484,23 +508,34 @@ impl<'top, V: RawValueLiteral, Encoding: LazyDecoder = V>> From #[derive(Debug, Copy, Clone)] pub struct TemplateVariableReference<'top> { template: TemplateMacroRef<'top>, - signature_index: usize, + signature_index: u16, } impl<'top> TemplateVariableReference<'top> { + pub fn new(template: TemplateMacroRef<'top>, signature_index: u16) -> Self { + Self { + template, + signature_index, + } + } + fn name(&self) -> &'top str { - self.template.signature.parameters()[self.signature_index].name() + self.template.signature.parameters()[self.signature_index()].name() } fn host_template(&self) -> TemplateMacroRef<'top> { self.template } + + fn signature_index(&self) -> usize { + self.signature_index as usize + } } /// A value produced by expanding the 'raw' view of the input data. #[derive(Copy, Clone)] pub struct LazyExpandedValue<'top, Encoding: LazyDecoder> { - pub(crate) context: EncodingContext<'top>, + pub(crate) context: EncodingContextRef<'top>, pub(crate) source: ExpandedValueSource<'top, Encoding>, // If this value came from a variable reference in a template macro expansion, the // template and the name of the variable can be found here. @@ -515,7 +550,7 @@ impl<'top, Encoding: LazyDecoder> Debug for LazyExpandedValue<'top, Encoding> { impl<'top, Encoding: LazyDecoder> LazyExpandedValue<'top, Encoding> { pub(crate) fn from_literal( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, value: Encoding::Value<'top>, ) -> Self { Self { @@ -526,7 +561,7 @@ impl<'top, Encoding: LazyDecoder> LazyExpandedValue<'top, Encoding> { } pub(crate) fn from_template( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, environment: Environment<'top, Encoding>, element: TemplateElement<'top>, ) -> Self { @@ -537,6 +572,18 @@ impl<'top, Encoding: LazyDecoder> LazyExpandedValue<'top, Encoding> { } } + pub(crate) fn from_constructed( + context: EncodingContextRef<'top>, + annotations: &'top [&'top str], + value: &'top ExpandedValueRef<'top, Encoding>, + ) -> Self { + Self { + context, + source: ExpandedValueSource::Constructed(annotations, value), + variable: None, + } + } + pub(crate) fn via_variable(mut self, variable_ref: TemplateVariableReference<'top>) -> Self { self.variable = Some(variable_ref); self @@ -594,7 +641,7 @@ impl<'top, Encoding: LazyDecoder> LazyExpandedValue<'top, Encoding> { } } - pub fn context(&self) -> EncodingContext<'top> { + pub fn context(&self) -> EncodingContextRef<'top> { self.context } @@ -834,7 +881,7 @@ impl<'top, Encoding: LazyDecoder> ExpandedValueRef<'top, Encoding> { } } - fn from_raw(context: EncodingContext<'top>, value: RawValueRef<'top, Encoding>) -> Self { + fn from_raw(context: EncodingContextRef<'top>, value: RawValueRef<'top, Encoding>) -> Self { use RawValueRef::*; match value { Null(ion_type) => ExpandedValueRef::Null(ion_type), @@ -878,7 +925,7 @@ impl<'top, D: LazyDecoder> Debug for ExpandedValueRef<'top, D> { impl<'top, Encoding: LazyDecoder> ExpandedValueRef<'top, Encoding> { fn from_template( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, environment: Environment<'top, Encoding>, element: &TemplateElement<'top>, ) -> Self { diff --git a/src/lazy/expanded/sequence.rs b/src/lazy/expanded/sequence.rs index bb021a73..15786948 100644 --- a/src/lazy/expanded/sequence.rs +++ b/src/lazy/expanded/sequence.rs @@ -7,8 +7,7 @@ use crate::lazy::expanded::template::{ AnnotationsRange, ExprRange, TemplateMacroRef, TemplateSequenceIterator, }; use crate::lazy::expanded::{ - EncodingContext, ExpandedAnnotationsIterator, ExpandedAnnotationsSource, ExpandedValueSource, - LazyExpandedValue, + EncodingContextRef, ExpandedAnnotationsIterator, ExpandedAnnotationsSource, LazyExpandedValue, }; use crate::result::IonFailure; use crate::{IonError, IonResult, IonType}; @@ -84,13 +83,13 @@ pub enum ExpandedListSource<'top, D: LazyDecoder> { /// a template. #[derive(Clone, Copy)] pub struct LazyExpandedList<'top, D: LazyDecoder> { - pub(crate) context: EncodingContext<'top>, + pub(crate) context: EncodingContextRef<'top>, pub(crate) source: ExpandedListSource<'top, D>, } impl<'top, D: LazyDecoder> LazyExpandedList<'top, D> { pub fn from_literal( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, list: D::List<'top>, ) -> LazyExpandedList<'top, D> { let source = ExpandedListSource::ValueLiteral(list); @@ -98,7 +97,7 @@ impl<'top, D: LazyDecoder> LazyExpandedList<'top, D> { } pub fn from_template( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, environment: Environment<'top, D>, template: TemplateMacroRef<'top>, annotations_range: AnnotationsRange, @@ -173,7 +172,7 @@ pub enum ExpandedListIteratorSource<'top, D: LazyDecoder> { /// Iterates over the child values of a [`LazyExpandedList`]. pub struct ExpandedListIterator<'top, D: LazyDecoder> { - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, source: ExpandedListIteratorSource<'top, D>, } @@ -209,7 +208,7 @@ pub enum ExpandedSExpSource<'top, D: LazyDecoder> { #[derive(Clone, Copy)] pub struct LazyExpandedSExp<'top, D: LazyDecoder> { pub(crate) source: ExpandedSExpSource<'top, D>, - pub(crate) context: EncodingContext<'top>, + pub(crate) context: EncodingContextRef<'top>, } impl<'top, D: LazyDecoder> LazyExpandedSExp<'top, D> { @@ -263,7 +262,7 @@ impl<'top, D: LazyDecoder> LazyExpandedSExp<'top, D> { } pub fn from_literal( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, sexp: D::SExp<'top>, ) -> LazyExpandedSExp<'top, D> { let source = ExpandedSExpSource::ValueLiteral(sexp); @@ -271,7 +270,7 @@ impl<'top, D: LazyDecoder> LazyExpandedSExp<'top, D> { } pub fn from_template( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, environment: Environment<'top, D>, template: TemplateMacroRef<'top>, annotations: AnnotationsRange, @@ -296,7 +295,7 @@ pub enum ExpandedSExpIteratorSource<'top, D: LazyDecoder> { /// Iterates over the child values of a [`LazyExpandedSExp`]. pub struct ExpandedSExpIterator<'top, D: LazyDecoder> { - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, source: ExpandedSExpIteratorSource<'top, D>, } @@ -316,7 +315,7 @@ impl<'top, D: LazyDecoder> Iterator for ExpandedSExpIterator<'top, D> { /// For both lists and s-expressions, yields the next sequence value by either continuing a macro /// evaluation already in progress or reading the next item from the input stream. fn expand_next_sequence_value<'top, D: LazyDecoder>( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, evaluator: &mut MacroEvaluator<'top, D>, iter: &mut impl Iterator>>, ) -> Option>> { @@ -335,11 +334,7 @@ fn expand_next_sequence_value<'top, D: LazyDecoder>( match iter.next() { None => return None, Some(Ok(RawValueExpr::ValueLiteral(value))) => { - return Some(Ok(LazyExpandedValue { - source: ExpandedValueSource::ValueLiteral(value), - context, - variable: None, - })) + return Some(Ok(LazyExpandedValue::from_literal(context, value))) } Some(Ok(RawValueExpr::MacroInvocation(invocation))) => { let resolved_invocation = match invocation.resolve(context) { diff --git a/src/lazy/expanded/struct.rs b/src/lazy/expanded/struct.rs index c9293a82..96ad72ee 100644 --- a/src/lazy/expanded/struct.rs +++ b/src/lazy/expanded/struct.rs @@ -12,8 +12,8 @@ use crate::lazy::expanded::template::{ TemplateMacroRef, TemplateStructIndex, TemplateStructUnexpandedFieldsIterator, }; use crate::lazy::expanded::{ - EncodingContext, ExpandedAnnotationsIterator, ExpandedAnnotationsSource, ExpandedValueRef, - ExpandedValueSource, LazyExpandedValue, TemplateVariableReference, + EncodingContextRef, ExpandedAnnotationsIterator, ExpandedAnnotationsSource, ExpandedValueRef, + LazyExpandedValue, TemplateVariableReference, }; use crate::result::IonFailure; use crate::symbol_ref::AsSymbolRef; @@ -27,9 +27,9 @@ use crate::{IonError, IonResult, RawSymbolTokenRef, SymbolRef}; // and expands the field as part of its iteration process. #[derive(Debug, Clone, Copy)] pub enum UnexpandedField<'top, D: LazyDecoder> { - RawNameValue(EncodingContext<'top>, D::FieldName<'top>, D::Value<'top>), - RawNameEExp(EncodingContext<'top>, D::FieldName<'top>, D::EExp<'top>), - RawEExp(EncodingContext<'top>, D::EExp<'top>), + RawNameValue(EncodingContextRef<'top>, D::FieldName<'top>, D::Value<'top>), + RawNameEExp(EncodingContextRef<'top>, D::FieldName<'top>, D::EExp<'top>), + RawEExp(EncodingContextRef<'top>, D::EExp<'top>), TemplateNameValue(SymbolRef<'top>, TemplateElement<'top>), TemplateNameMacro(SymbolRef<'top>, TemplateMacroInvocation<'top>), TemplateNameVariable( @@ -65,7 +65,7 @@ impl<'top, D: LazyDecoder> LazyExpandedField<'top, D> { impl<'top, D: LazyDecoder> LazyExpandedField<'top, D> { fn from_raw_field( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, name: D::FieldName<'top>, value: impl Into>, ) -> Self { @@ -89,7 +89,7 @@ impl<'top, D: LazyDecoder> LazyExpandedField<'top, D> { #[derive(Debug, Clone, Copy)] pub enum LazyExpandedFieldName<'top, D: LazyDecoder> { - RawName(EncodingContext<'top>, D::FieldName<'top>), + RawName(EncodingContextRef<'top>, D::FieldName<'top>), TemplateName(TemplateMacroRef<'top>, SymbolRef<'top>), // TODO: `Constructed` needed for names in `(make_struct ...)` } @@ -136,13 +136,13 @@ pub enum ExpandedStructSource<'top, D: LazyDecoder> { #[derive(Copy, Clone)] pub struct LazyExpandedStruct<'top, D: LazyDecoder> { - pub(crate) context: EncodingContext<'top>, + pub(crate) context: EncodingContextRef<'top>, pub(crate) source: ExpandedStructSource<'top, D>, } //TODO: Feature gate impl<'top, D: LazyDecoder> LazyExpandedStruct<'top, D> { - pub fn context(&self) -> EncodingContext<'top> { + pub fn context(&self) -> EncodingContextRef<'top> { self.context } pub fn source(&self) -> ExpandedStructSource<'top, D> { @@ -152,7 +152,7 @@ impl<'top, D: LazyDecoder> LazyExpandedStruct<'top, D> { impl<'top, D: LazyDecoder> LazyExpandedStruct<'top, D> { pub fn from_literal( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, sexp: D::Struct<'top>, ) -> LazyExpandedStruct<'top, D> { let source = ExpandedStructSource::ValueLiteral(sexp); @@ -160,7 +160,7 @@ impl<'top, D: LazyDecoder> LazyExpandedStruct<'top, D> { } pub fn from_template( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, environment: Environment<'top, D>, template: TemplateMacroRef<'top>, annotations: AnnotationsRange, @@ -224,7 +224,6 @@ impl<'top, D: LazyDecoder> LazyExpandedStruct<'top, D> { } }; ExpandedStructIterator { - context: self.context, source, state: ExpandedStructIteratorState::ReadingFieldFromSource, } @@ -273,14 +272,11 @@ impl<'top, D: LazyDecoder> LazyExpandedStruct<'top, D> { match first_result_expr { // If the expression is a value literal, wrap it in a LazyExpandedValue and return it. TemplateBodyValueExpr::Element(element) => { - let value = LazyExpandedValue { - context: self.context, - source: ExpandedValueSource::Template( - *environment, - TemplateElement::new(*template, element), - ), - variable: None, - }; + let value = LazyExpandedValue::from_template( + self.context, + *environment, + TemplateElement::new(*template, element), + ); Ok(Some(value)) } // If the expression is a variable, resolve it in the current environment. @@ -344,7 +340,7 @@ pub enum ExpandedStructIteratorSource<'top, D: LazyDecoder> { } pub struct ExpandedStructIterator<'top, D: LazyDecoder> { - context: EncodingContext<'top>, + // Each variant of 'source' below holds its own encoding context reference source: ExpandedStructIteratorSource<'top, D>, // Stores information about any operations that are still in progress. state: ExpandedStructIteratorState<'top, D>, @@ -384,16 +380,25 @@ impl<'top, D: LazyDecoder> Iterator for ExpandedStructIterator<'top, D> { fn next(&mut self) -> Option { let Self { - context, ref mut source, ref mut state, } = *self; match source { ExpandedStructIteratorSource::Template(tdl_macro_evaluator, template_iterator) => { - Self::next_field_from(context, state, tdl_macro_evaluator, template_iterator) + Self::next_field_from( + template_iterator.context(), + state, + tdl_macro_evaluator, + template_iterator, + ) } ExpandedStructIteratorSource::ValueLiteral(e_exp_evaluator, raw_struct_iter) => { - Self::next_field_from(context, state, e_exp_evaluator, raw_struct_iter) + Self::next_field_from( + raw_struct_iter.context(), + state, + e_exp_evaluator, + raw_struct_iter, + ) } } } @@ -416,7 +421,7 @@ impl<'top, D: LazyDecoder> ExpandedStructIterator<'top, D> { // LazyRawStruct, or it could be a `TemplateStructRawFieldsIterator`. I: Iterator>>, >( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, state: &'a mut ExpandedStructIteratorState<'top, D>, evaluator: &'a mut MacroEvaluator<'top, D>, iter: &'a mut I, @@ -480,7 +485,7 @@ impl<'top, D: LazyDecoder> ExpandedStructIterator<'top, D> { /// Pulls a single unexpanded field expression from the source iterator and sets `state` according to /// the expression's kind. fn next_from_iterator>>>( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, state: &mut ExpandedStructIteratorState<'top, D>, evaluator: &mut MacroEvaluator<'top, D>, iter: &mut I, diff --git a/src/lazy/expanded/template.rs b/src/lazy/expanded/template.rs index f96c9b8b..fef29e00 100644 --- a/src/lazy/expanded/template.rs +++ b/src/lazy/expanded/template.rs @@ -9,7 +9,7 @@ use crate::lazy::expanded::macro_table::MacroRef; use crate::lazy::expanded::r#struct::UnexpandedField; use crate::lazy::expanded::sequence::Environment; use crate::lazy::expanded::{ - EncodingContext, ExpandedValueSource, LazyExpandedValue, TemplateVariableReference, + EncodingContextRef, ExpandedValueSource, LazyExpandedValue, TemplateVariableReference, }; use crate::lazy::text::raw::v1_1::reader::{MacroAddress, MacroIdRef}; use crate::result::IonFailure; @@ -144,7 +144,7 @@ impl<'top> Deref for TemplateMacroRef<'top> { /// Steps over the child expressions of a list or s-expression found in the body of a template. pub struct TemplateSequenceIterator<'top, D: LazyDecoder> { - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, template: TemplateMacroRef<'top>, evaluator: MacroEvaluator<'top, D>, value_expressions: &'top [TemplateBodyValueExpr], @@ -153,7 +153,7 @@ pub struct TemplateSequenceIterator<'top, D: LazyDecoder> { impl<'top, D: LazyDecoder> TemplateSequenceIterator<'top, D> { pub fn new( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, evaluator: MacroEvaluator<'top, D>, template: TemplateMacroRef<'top>, value_expressions: &'top [TemplateBodyValueExpr], @@ -248,16 +248,22 @@ impl<'top, D: LazyDecoder> Iterator for TemplateSequenceIterator<'top, D> { /// mimic reading them from input. The [`LazyExpandedStruct`](crate::lazy::expanded::struct) handles /// evaluating any macro invocations that this yields. pub struct TemplateStructUnexpandedFieldsIterator<'top, D: LazyDecoder> { - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, environment: Environment<'top, D>, template: TemplateMacroRef<'top>, expressions: &'top [TemplateBodyValueExpr], index: usize, } +impl<'top, D: LazyDecoder> TemplateStructUnexpandedFieldsIterator<'top, D> { + pub fn context(&self) -> EncodingContextRef<'top> { + self.context + } +} + impl<'top, D: LazyDecoder> TemplateStructUnexpandedFieldsIterator<'top, D> { pub fn new( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, environment: Environment<'top, D>, template: TemplateMacroRef<'top>, expressions: &'top [TemplateBodyValueExpr], @@ -367,7 +373,7 @@ impl TemplateBody { .push(TemplateBodyValueExpr::Element(element)) } - pub fn push_variable(&mut self, signature_index: usize) { + pub fn push_variable(&mut self, signature_index: u16) { self.expressions.push(TemplateBodyValueExpr::Variable( TemplateBodyVariableReference::new(signature_index), )) @@ -622,12 +628,12 @@ impl TemplateBodyMacroInvocation { /// Finds the definition of the macro being invoked in the provided `context`'s macro table. /// - /// It is a logic error for this method to be called with an [`EncodingContext`] that does not + /// It is a logic error for this method to be called with an [`EncodingContextRef`] that does not /// contain the necessary information; doing so will cause this method to panic. pub(crate) fn resolve<'top>( self, host_template: TemplateMacroRef<'top>, - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, ) -> TemplateMacroInvocation<'top> { let invoked_macro = context .macro_table @@ -653,7 +659,7 @@ impl TemplateBodyMacroInvocation { /// holds references to the invoked macro and its argument expressions. #[derive(Copy, Clone)] pub struct TemplateMacroInvocation<'top> { - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, // The definition of the template in which this macro invocation appears. This is useful as // debugging information / viewing in stack traces. host_template: TemplateMacroRef<'top>, @@ -676,7 +682,7 @@ impl<'top> Debug for TemplateMacroInvocation<'top> { impl<'top> TemplateMacroInvocation<'top> { pub fn new( - context: EncodingContext<'top>, + context: EncodingContextRef<'top>, host_template: TemplateMacroRef<'top>, invoked_macro: MacroRef<'top>, arg_expressions: &'top [TemplateBodyValueExpr], @@ -707,6 +713,9 @@ impl<'top> TemplateMacroInvocation<'top> { pub fn invoked_macro(&self) -> MacroRef<'top> { self.invoked_macro } + pub fn context(&self) -> EncodingContextRef<'top> { + self.context + } } impl<'top, D: LazyDecoder> From> for MacroExpr<'top, D> { @@ -783,20 +792,20 @@ impl<'top, D: LazyDecoder> Iterator for TemplateMacroInvocationArgsIterator<'top /// A reference to a variable in a template body. #[derive(Debug, Copy, Clone, PartialEq)] pub struct TemplateBodyVariableReference { - signature_index: usize, + signature_index: u16, } impl TemplateBodyVariableReference { - pub fn new(signature_index: usize) -> Self { + pub fn new(signature_index: u16) -> Self { Self { signature_index } } pub fn signature_index(&self) -> usize { - self.signature_index + self.signature_index as usize } pub fn name<'a>(&self, signature: &'a MacroSignature) -> &'a str { signature .parameters() - .get(self.signature_index) + .get(self.signature_index()) .unwrap() .name() } diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs index 39444550..f4f79dbe 100644 --- a/src/lazy/text/buffer.rs +++ b/src/lazy/text/buffer.rs @@ -27,8 +27,8 @@ use crate::lazy::raw_stream_item::{EndPosition, LazyRawStreamItem, RawStreamItem use crate::lazy::text::encoded_value::EncodedTextValue; use crate::lazy::text::matched::{ MatchedBlob, MatchedClob, MatchedDecimal, MatchedFieldName, MatchedFieldNameSyntax, - MatchedFloat, MatchedHoursAndMinutes, MatchedInt, MatchedString, MatchedSymbol, - MatchedTimestamp, MatchedTimestampOffset, MatchedValue, + MatchedFloat, MatchedInt, MatchedString, MatchedSymbol, MatchedTimestamp, + MatchedTimestampOffset, MatchedValue, }; use crate::lazy::text::parse_result::{InvalidInputError, IonParseError}; use crate::lazy::text::parse_result::{IonMatchResult, IonParseResult}; @@ -40,7 +40,7 @@ use crate::lazy::text::raw::v1_1::reader::{ TextListSpanFinder_1_1, TextSExpSpanFinder_1_1, TextStructSpanFinder_1_1, }; use crate::lazy::text::value::{ - LazyRawTextValue_1_0, LazyRawTextValue_1_1, LazyRawTextVersionMarker, MatchedRawTextValue, + LazyRawTextValue, LazyRawTextValue_1_0, LazyRawTextValue_1_1, LazyRawTextVersionMarker, }; use crate::result::DecodingError; use crate::{IonError, IonResult, IonType, TimestampPrecision}; @@ -311,21 +311,30 @@ impl<'top> TextBufferView<'top> { /// Matches one or more annotations. pub fn match_annotations(self) -> IonMatchResult<'top> { - recognize(many1_count(Self::match_annotation))(self) + let (remaining, matched) = recognize(many1_count(Self::match_annotation))(self)?; + if matched.len() > u16::MAX as usize { + let error = InvalidInputError::new(matched) + .with_description("the maximum supported annotations sequence length is 65KB") + .with_label("parsing annotations"); + Err(nom::Err::Error(IonParseError::Invalid(error))) + } else { + Ok((remaining, matched)) + } } /// Matches an annotation (symbol token) and a terminating '::'. pub fn match_annotation(self) -> IonParseResult<'top, (MatchedSymbol, Range)> { terminated( whitespace_and_then(match_and_span(Self::match_symbol)), - whitespace_and_then(complete_tag("::")), + whitespace_and_then(terminated( + complete_tag("::"), + Self::match_optional_comments_and_whitespace, + )), )(self) } /// Matches an optional annotations sequence and a value, including operators. - pub fn match_sexp_value( - self, - ) -> IonParseResult<'top, Option>> { + pub fn match_sexp_value(self) -> IonParseResult<'top, Option>> { whitespace_and_then(alt(( value(None, tag(")")), pair( @@ -336,16 +345,8 @@ impl<'top> TextBufferView<'top> { // `-3` as an operator (`-`) and an int (`3`). Thus, we run `match_value` first. whitespace_and_then(alt((Self::match_value, Self::match_operator))), ) - .map(|(maybe_annotations, mut value)| { - if let Some(annotations) = maybe_annotations { - value.encoded_value = value - .encoded_value - .with_annotations_sequence(annotations.offset(), annotations.len()); - // Rewind the value's input to include the annotations sequence. - value.input = self.slice_to_end(annotations.offset() - self.offset()); - } - Some(value) - }), + .map(|(maybe_annotations, value)| self.apply_annotations(maybe_annotations, value)) + .map(Some), ))) .parse(self) } @@ -367,23 +368,35 @@ impl<'top> TextBufferView<'top> { // `-3` as an operator (`-`) and an int (`3`). Thus, we run `match_value` first. whitespace_and_then(alt((Self::match_value_1_1, Self::match_operator))), ) - .map(|(maybe_annotations, mut value)| { - if let Some(annotations) = maybe_annotations { - value.encoded_value = value - .encoded_value - .with_annotations_sequence(annotations.offset(), annotations.len()); - // Rewind the value's input to include the annotations sequence. - value.input = self.slice_to_end(annotations.offset() - self.offset()); - } - Some(value) - }) - .map(|maybe_matched| { - maybe_matched.map(|matched| RawValueExpr::ValueLiteral(matched.into())) - }), + .map(|(maybe_annotations, value)| self.apply_annotations(maybe_annotations, value)) + .map(RawValueExpr::ValueLiteral) + .map(Some), ))) .parse(self) } + fn apply_annotations>( + self, + maybe_annotations: Option>, + mut value: LazyRawTextValue<'top, E>, + ) -> LazyRawTextValue<'top, E> { + if let Some(annotations) = maybe_annotations { + let annotations_length = + u16::try_from(annotations.len()).expect("already length checked"); + // Update the encoded value's record of how many bytes of annotations precede the data. + value.encoded_value = value + .encoded_value + .with_annotations_sequence(annotations_length); + let unannotated_value_length = value.input.len(); + // Rewind the value's input to include the annotations sequence. + value.input = self.slice( + annotations.offset() - self.offset(), + annotations_length as usize + unannotated_value_length, + ); + } + value + } + /// Matches a struct field name/value pair. /// /// If a pair is found, returns `Some(field)` and consumes the following comma if present. @@ -401,10 +414,8 @@ impl<'top> TextBufferView<'top> { // Otherwise, match a name/value pair and turn it into a `LazyRawTextField`. Self::match_struct_field_name_and_value.map(move |(matched_field_name, value)| { let field_name = LazyRawTextFieldName_1_0::new(matched_field_name); - let field_value = LazyRawTextValue_1_0::new(value); Some(LazyRawFieldExpr::<'top, TextEncoding_1_0>::NameValue( - field_name, - field_value, + field_name, value, )) }), ))(input_including_field_name) @@ -419,13 +430,7 @@ impl<'top> TextBufferView<'top> { /// input bytes where the field name is found, and the value. pub fn match_struct_field_name_and_value( self, - ) -> IonParseResult< - 'top, - ( - MatchedFieldName<'top>, - MatchedRawTextValue<'top, TextEncoding_1_0>, - ), - > { + ) -> IonParseResult<'top, (MatchedFieldName<'top>, LazyRawTextValue_1_0<'top>)> { terminated( separated_pair( whitespace_and_then(Self::match_struct_field_name), @@ -463,8 +468,7 @@ impl<'top> TextBufferView<'top> { // Otherwise, match a name/value pair and turn it into a `LazyRawTextField`. Self::match_struct_field_name_and_value_1_1.map(move |(field_name, value)| { let field_name = LazyRawTextFieldName_1_1::new(field_name); - let field_value = LazyRawTextValue_1_1::new(value); - Ok(Some(LazyRawFieldExpr::NameValue(field_name, field_value))) + Ok(Some(LazyRawFieldExpr::NameValue(field_name, value))) }), ))(input_including_field_name)?; Ok((input_after_field, field_expr_result?)) @@ -491,13 +495,7 @@ impl<'top> TextBufferView<'top> { /// range of input bytes where the field name is found, and the value. pub fn match_struct_field_name_and_value_1_1( self, - ) -> IonParseResult< - 'top, - ( - MatchedFieldName<'top>, - MatchedRawTextValue<'top, TextEncoding_1_1>, - ), - > { + ) -> IonParseResult<'top, (MatchedFieldName<'top>, LazyRawTextValue_1_1<'top>)> { terminated( separated_pair( whitespace_and_then(Self::match_struct_field_name), @@ -509,44 +507,22 @@ impl<'top> TextBufferView<'top> { } /// Matches an optional annotation sequence and a trailing value. - pub fn match_annotated_value( - self, - ) -> IonParseResult<'top, MatchedRawTextValue<'top, TextEncoding_1_0>> { + pub fn match_annotated_value(self) -> IonParseResult<'top, LazyRawTextValue_1_0<'top>> { pair( opt(Self::match_annotations), whitespace_and_then(Self::match_value), ) - .map(|(maybe_annotations, mut value)| { - if let Some(annotations) = maybe_annotations { - value.encoded_value = value - .encoded_value - .with_annotations_sequence(annotations.offset(), annotations.len()); - // Rewind the value's input to include the annotations sequence. - value.input = self.slice_to_end(annotations.offset() - self.offset()); - } - value - }) + .map(|(maybe_annotations, value)| self.apply_annotations(maybe_annotations, value)) .parse(self) } /// Matches an optional annotation sequence and a trailing v1.1 value. - pub fn match_annotated_value_1_1( - self, - ) -> IonParseResult<'top, MatchedRawTextValue<'top, TextEncoding_1_1>> { + pub fn match_annotated_value_1_1(self) -> IonParseResult<'top, LazyRawTextValue_1_1<'top>> { pair( opt(Self::match_annotations), whitespace_and_then(Self::match_value_1_1), ) - .map(|(maybe_annotations, mut value)| { - if let Some(annotations) = maybe_annotations { - value.encoded_value = value - .encoded_value - .with_annotations_sequence(annotations.offset(), annotations.len()); - // Rewind the value's input to include the annotations sequence. - value.input = self.slice_to_end(annotations.offset() - self.offset()); - } - value - }) + .map(|(maybe_annotations, value)| self.apply_annotations(maybe_annotations, value)) .parse(self) } @@ -611,241 +587,120 @@ impl<'top> TextBufferView<'top> { } /// Matches a single scalar value or the beginning of a container. - pub fn match_value(self) -> IonParseResult<'top, MatchedRawTextValue<'top, TextEncoding_1_0>> { - alt(( + pub fn match_value(self) -> IonParseResult<'top, LazyRawTextValue_1_0<'top>> { + consumed(alt(( // For `null` and `bool`, we use `read_` instead of `match_` because there's no additional // parsing to be done. - map(match_and_length(Self::match_null), |(ion_type, length)| { - EncodedTextValue::new(MatchedValue::Null(ion_type), self.offset(), length) + map(Self::match_null, |ion_type| { + EncodedTextValue::new(MatchedValue::Null(ion_type)) }), - map(match_and_length(Self::match_bool), |(value, length)| { - EncodedTextValue::new(MatchedValue::Bool(value), self.offset(), length) + map(Self::match_bool, |value| { + EncodedTextValue::new(MatchedValue::Bool(value)) }), // For `int` and the other types, we use `match` and store the partially-processed input in the // `matched_value` field of the `EncodedTextValue` we return. - map( - match_and_length(Self::match_int), - |(matched_int, length)| { - EncodedTextValue::new(MatchedValue::Int(matched_int), self.offset(), length) - }, - ), - map( - match_and_length(Self::match_float), - |(matched_float, length)| { - EncodedTextValue::new(MatchedValue::Float(matched_float), self.offset(), length) - }, - ), - map( - match_and_length(Self::match_decimal), - |(matched_decimal, length)| { - EncodedTextValue::new( - MatchedValue::Decimal(matched_decimal), - self.offset(), - length, - ) - }, - ), - map( - match_and_length(Self::match_timestamp), - |(matched_timestamp, length)| { - EncodedTextValue::new( - MatchedValue::Timestamp(matched_timestamp), - self.offset(), - length, - ) - }, - ), - map( - match_and_length(Self::match_string), - |(matched_string, length)| { - EncodedTextValue::new( - MatchedValue::String(matched_string), - self.offset(), - length, - ) - }, - ), - map( - match_and_length(Self::match_symbol), - |(matched_symbol, length)| { - EncodedTextValue::new( - MatchedValue::Symbol(matched_symbol), - self.offset(), - length, - ) - }, - ), - map( - match_and_length(Self::match_blob), - |(matched_blob, length)| { - EncodedTextValue::new(MatchedValue::Blob(matched_blob), self.offset(), length) - }, - ), - map( - match_and_length(Self::match_clob), - |(matched_clob, length)| { - EncodedTextValue::new(MatchedValue::Clob(matched_clob), self.offset(), length) - }, - ), - map( - match_and_length(Self::match_list), - |(matched_list, length)| { - // TODO: Cache child expressions found in 1.0 list - let not_yet_used_in_1_0 = - bumpalo::collections::Vec::new_in(self.allocator).into_bump_slice(); - EncodedTextValue::new( - MatchedValue::List(not_yet_used_in_1_0), - matched_list.offset(), - length, - ) - }, - ), - map( - match_and_length(Self::match_sexp), - |(matched_list, length)| { - // TODO: Cache child expressions found in 1.0 sexp - let not_yet_used_in_1_0 = - bumpalo::collections::Vec::new_in(self.allocator).into_bump_slice(); - EncodedTextValue::new( - MatchedValue::SExp(not_yet_used_in_1_0), - matched_list.offset(), - length, - ) - }, - ), - map( - match_and_length(Self::match_struct), - |(matched_struct, length)| { - // TODO: Cache child expressions found in 1.0 struct - let not_yet_used_in_1_0 = - bumpalo::collections::Vec::new_in(self.allocator).into_bump_slice(); - EncodedTextValue::new( - MatchedValue::Struct(not_yet_used_in_1_0), - matched_struct.offset(), - length, - ) - }, - ), - )) - .map(|encoded_value| MatchedRawTextValue { + map(Self::match_int, |matched_int| { + EncodedTextValue::new(MatchedValue::Int(matched_int)) + }), + map(Self::match_float, |matched_float| { + EncodedTextValue::new(MatchedValue::Float(matched_float)) + }), + map(Self::match_decimal, |matched_decimal| { + EncodedTextValue::new(MatchedValue::Decimal(matched_decimal)) + }), + map(Self::match_timestamp, |matched_timestamp| { + EncodedTextValue::new(MatchedValue::Timestamp(matched_timestamp)) + }), + map(Self::match_string, |matched_string| { + EncodedTextValue::new(MatchedValue::String(matched_string)) + }), + map(Self::match_symbol, |matched_symbol| { + EncodedTextValue::new(MatchedValue::Symbol(matched_symbol)) + }), + map(Self::match_blob, |matched_blob| { + EncodedTextValue::new(MatchedValue::Blob(matched_blob)) + }), + map(Self::match_clob, |matched_clob| { + EncodedTextValue::new(MatchedValue::Clob(matched_clob)) + }), + map(Self::match_list, |_matched_list| { + // TODO: Cache child expressions found in 1.0 list + let not_yet_used_in_1_0 = + bumpalo::collections::Vec::new_in(self.allocator).into_bump_slice(); + EncodedTextValue::new(MatchedValue::List(not_yet_used_in_1_0)) + }), + map(Self::match_sexp, |_matched_sexp| { + // TODO: Cache child expressions found in 1.0 sexp + let not_yet_used_in_1_0 = + bumpalo::collections::Vec::new_in(self.allocator).into_bump_slice(); + EncodedTextValue::new(MatchedValue::SExp(not_yet_used_in_1_0)) + }), + map(Self::match_struct, |_matched_struct| { + // TODO: Cache child expressions found in 1.0 struct + let not_yet_used_in_1_0 = + bumpalo::collections::Vec::new_in(self.allocator).into_bump_slice(); + EncodedTextValue::new(MatchedValue::Struct(not_yet_used_in_1_0)) + }), + ))) + .map(|(input, encoded_value)| LazyRawTextValue_1_0 { encoded_value, - input: self, + input, }) .parse(self) } - pub fn match_value_1_1( - self, - ) -> IonParseResult<'top, MatchedRawTextValue<'top, TextEncoding_1_1>> { - alt(( + pub fn match_value_1_1(self) -> IonParseResult<'top, LazyRawTextValue_1_1<'top>> { + consumed(alt(( // For `null` and `bool`, we use `read_` instead of `match_` because there's no additional // parsing to be done. - map(match_and_length(Self::match_null), |(ion_type, length)| { - EncodedTextValue::new(MatchedValue::Null(ion_type), self.offset(), length) + map(Self::match_null, |ion_type| { + EncodedTextValue::new(MatchedValue::Null(ion_type)) }), - map(match_and_length(Self::match_bool), |(value, length)| { - EncodedTextValue::new(MatchedValue::Bool(value), self.offset(), length) + map(Self::match_bool, |value| { + EncodedTextValue::new(MatchedValue::Bool(value)) }), // For `int` and the other types, we use `match` and store the partially-processed input in the // `matched_value` field of the `EncodedTextValue` we return. + map(Self::match_int, |matched_int| { + EncodedTextValue::new(MatchedValue::Int(matched_int)) + }), + map(Self::match_float, |matched_float| { + EncodedTextValue::new(MatchedValue::Float(matched_float)) + }), + map(Self::match_decimal, |matched_decimal| { + EncodedTextValue::new(MatchedValue::Decimal(matched_decimal)) + }), + map(Self::match_timestamp, |matched_timestamp| { + EncodedTextValue::new(MatchedValue::Timestamp(matched_timestamp)) + }), + map(Self::match_string, |matched_string| { + EncodedTextValue::new(MatchedValue::String(matched_string)) + }), + map(Self::match_symbol, |matched_symbol| { + EncodedTextValue::new(MatchedValue::Symbol(matched_symbol)) + }), + map(Self::match_blob, |matched_blob| { + EncodedTextValue::new(MatchedValue::Blob(matched_blob)) + }), + map(Self::match_clob, |matched_clob| { + EncodedTextValue::new(MatchedValue::Clob(matched_clob)) + }), + map(Self::match_list_1_1, |(_matched_list, child_expr_cache)| { + EncodedTextValue::new(MatchedValue::List(child_expr_cache)) + }), + map(Self::match_sexp_1_1, |(_matched_sexp, child_expr_cache)| { + EncodedTextValue::new(MatchedValue::SExp(child_expr_cache)) + }), map( - match_and_length(Self::match_int), - |(matched_int, length)| { - EncodedTextValue::new(MatchedValue::Int(matched_int), self.offset(), length) - }, - ), - map( - match_and_length(Self::match_float), - |(matched_float, length)| { - EncodedTextValue::new(MatchedValue::Float(matched_float), self.offset(), length) - }, - ), - map( - match_and_length(Self::match_decimal), - |(matched_decimal, length)| { - EncodedTextValue::new( - MatchedValue::Decimal(matched_decimal), - self.offset(), - length, - ) - }, - ), - map( - match_and_length(Self::match_timestamp), - |(matched_timestamp, length)| { - EncodedTextValue::new( - MatchedValue::Timestamp(matched_timestamp), - self.offset(), - length, - ) - }, - ), - map( - match_and_length(Self::match_string), - |(matched_string, length)| { - EncodedTextValue::new( - MatchedValue::String(matched_string), - self.offset(), - length, - ) - }, - ), - map( - match_and_length(Self::match_symbol), - |(matched_symbol, length)| { - EncodedTextValue::new( - MatchedValue::Symbol(matched_symbol), - self.offset(), - length, - ) - }, - ), - map( - match_and_length(Self::match_blob), - |(matched_blob, length)| { - EncodedTextValue::new(MatchedValue::Blob(matched_blob), self.offset(), length) - }, - ), - map( - match_and_length(Self::match_clob), - |(matched_clob, length)| { - EncodedTextValue::new(MatchedValue::Clob(matched_clob), self.offset(), length) - }, - ), - map( - match_and_length(Self::match_list_1_1), - |((matched_list, child_expr_cache), length)| { - EncodedTextValue::new( - MatchedValue::List(child_expr_cache), - matched_list.offset(), - length, - ) - }, - ), - map( - match_and_length(Self::match_sexp_1_1), - |((matched_sexp, child_expr_cache), length)| { - EncodedTextValue::new( - MatchedValue::SExp(child_expr_cache), - matched_sexp.offset(), - length, - ) - }, - ), - map( - match_and_length(Self::match_struct_1_1), - |((matched_struct, field_expr_cache), length)| { - EncodedTextValue::new( - MatchedValue::Struct(field_expr_cache), - matched_struct.offset(), - length, - ) + Self::match_struct_1_1, + |(_matched_struct, field_expr_cache)| { + EncodedTextValue::new(MatchedValue::Struct(field_expr_cache)) }, ), - )) - .map(|encoded_value| MatchedRawTextValue { + ))) + .map(|(input, encoded_value)| LazyRawTextValue_1_1 { encoded_value, - input: self, + input, }) .parse(self) } @@ -956,7 +811,7 @@ impl<'top> TextBufferView<'top> { Err(e) => { return { let error = InvalidInputError::new(self) - .with_label("matching a sexp") + .with_label("matching a 1.1 sexp") .with_description(format!("{}", e)); Err(nom::Err::Failure(IonParseError::Invalid(error))) } @@ -973,9 +828,7 @@ impl<'top> TextBufferView<'top> { /// /// If a value is found, returns `Ok(Some(value))`. If the end of the list is found, returns /// `Ok(None)`. - pub fn match_list_value( - self, - ) -> IonParseResult<'top, Option>> { + pub fn match_list_value(self) -> IonParseResult<'top, Option>> { preceded( // Some amount of whitespace/comments... Self::match_optional_comments_and_whitespace, @@ -1010,9 +863,7 @@ impl<'top> TextBufferView<'top> { // ...followed by a comma or end-of-list Self::match_delimiter_after_list_value, ) - .map(|maybe_matched| { - maybe_matched.map(|matched| RawValueExpr::ValueLiteral(matched.into())) - }), + .map(|maybe_matched| maybe_matched.map(RawValueExpr::ValueLiteral)), ))) .parse(self) } @@ -1619,18 +1470,12 @@ impl<'top> TextBufferView<'top> { /// Matches an operator symbol, which can only legally appear within an s-expression fn match_operator>( self, - ) -> IonParseResult<'top, MatchedRawTextValue<'top, E>> { - match_and_length(is_a("!#%&*+-./;<=>?@^`|~")) - .map( - |(text, length): (TextBufferView, usize)| MatchedRawTextValue { - input: self, - encoded_value: EncodedTextValue::new( - MatchedValue::Symbol(MatchedSymbol::Operator), - text.offset(), - length, - ), - }, - ) + ) -> IonParseResult<'top, LazyRawTextValue<'top, E>> { + is_a("!#%&*+-./;<=>?@^`|~") + .map(|text: TextBufferView| LazyRawTextValue { + input: text, + encoded_value: EncodedTextValue::new(MatchedValue::Symbol(MatchedSymbol::Operator)), + }) .parse(self) } @@ -2019,13 +1864,12 @@ impl<'top> TextBufferView<'top> { complete_one_of("-+"), Self::match_timestamp_offset_hours_and_minutes, ), - |(sign, (hours, _minutes))| { - let is_negative = sign == '-'; - let hours_offset = hours.offset(); - MatchedTimestampOffset::HoursAndMinutes(MatchedHoursAndMinutes::new( - is_negative, - hours_offset, - )) + |(sign, (_hours, _minutes))| { + if sign == '-' { + MatchedTimestampOffset::NegativeHoursAndMinutes + } else { + MatchedTimestampOffset::PositiveHoursAndMinutes + } }, ), ))(self) @@ -2898,6 +2742,7 @@ mod tests { "(:foo foo)", ]; for input in good_inputs { + println!("test: {input}"); match_macro_invocation(input); } diff --git a/src/lazy/text/encoded_value.rs b/src/lazy/text/encoded_value.rs index a597f869..060388fe 100644 --- a/src/lazy/text/encoded_value.rs +++ b/src/lazy/text/encoded_value.rs @@ -7,54 +7,31 @@ use crate::IonType; /// Represents the type, offset, and length metadata of the various components of an encoded value /// in a text input stream. /// -/// Each [`LazyRawTextValue`](crate::lazy::text::value::MatchedRawTextValue) contains an `EncodedValue`, +/// Each [`LazyRawTextValue`](crate::lazy::text::value::LazyRawTextValue) contains an `EncodedTextValue`, /// allowing a user to re-read (that is: parse) the body of the value as many times as necessary /// without re-parsing its header information each time. #[derive(Copy, Clone, Debug, PartialEq)] pub(crate) struct EncodedTextValue<'top, E: TextEncoding<'top>> { - // TODO: Update this comment now that field_name is not part of 'value' - // Each encoded text value has up to three components, appearing in the following order: // - // [ field_name? | annotations? | data ] + // [annotations? | data ] // // Components shown with a `?` are optional. - // The following is an example encoding of a struct field with an annotated value-- the only kind - // of Ion value that has both of the optional components--that appears 5 gigabytes into the input - // stream: + // The following is an example encoding of an annotated value: // - // ┌─── field_name_offset: 12 - // │ ┌─── annotations_offset: 5 - // │ │ ┌─── data_offset: 5_000_000_012 - // price: USD::55.99, - // └─┬─┘ └─┬─┘└─┬─┘ - // │ │ └─ data_length: 5 - // │ └─ annotations_length: 5 - // └─ field_name_length: 5 + // USD::55.99, + // └─┬─┘ + // └─ data_offset: 5 // - // Notice that only `data_offset` is an absolute offset from the beginning of the stream; - // this is because `data` is the only field that is always guaranteed to be present. - // `field_name_offset` and `annotations_offset` are stored as the number of bytes _before_ - // `data_offset`, allowing them to be stored in fewer bytes. - - // The absolute position (in bytes) of this value's `data` component within the overall stream - // being decoded. - data_offset: usize, - // The number of bytes _before_ `data_offset` at which the field name begins. If this value - // does not have a field name, this value will be zero. - // field_name_offset: u32, - // The number of bytes _before_ `data_offset` at which the annotations sequence begins. - // If this value does not have a field name, this value will be zero. - annotations_offset: u32, - - // The number of bytes used to encode the data component of this Ion value. - data_length: usize, + // Notice that `data_offset` is a relative offset from the beginning of the matched buffer. + // The offset accommodates leading annotations, potentially including interstitial whitespace + // and/or comments. - // The number of bytes used to encode the annotations sequence preceding the data, if any. - // If there is no annotations sequence, this will be zero. If there is whitespace before the - // annotations sequence, this will not include it. - annotations_length: u32, + // The relative position within the buffer at which the data portion of this value begins. + // All buffer contents beyond this point are part of the data; the buffer ends when the value + // ends. + data_offset: u16, // Information that was recorded about the value as it was being matched. // For some types (e.g. bool), matching the text is the complete parsing process so the whole @@ -64,34 +41,22 @@ pub(crate) struct EncodedTextValue<'top, E: TextEncoding<'top>> { } impl<'top, E: TextEncoding<'top>> EncodedTextValue<'top, E> { - pub(crate) fn new( - matched_value: MatchedValue<'top, E>, - offset: usize, - length: usize, - ) -> EncodedTextValue<'top, E> { + pub(crate) fn new(matched_value: MatchedValue<'top, E>) -> EncodedTextValue<'top, E> { EncodedTextValue { - data_offset: offset, - data_length: length, - annotations_offset: 0, - annotations_length: 0, + data_offset: 0, matched_value, } } // The annotations should include all of the symbol tokens, their delimiting '::'s, and any - // interstitial whitespace. It should not include any leading/trailing whitespace or the value - // itself. + // interstitial or trailing whitespace. It should not include any leading whitespace or the + // value itself. // Examples: // foo::bar:: // 'foo'::'bar':: // foo :: 'bar' :: - pub(crate) fn with_annotations_sequence( - mut self, - offset: usize, - length: usize, - ) -> EncodedTextValue<'top, E> { - self.annotations_offset = (self.data_offset - offset) as u32; - self.annotations_length = length as u32; + pub(crate) fn with_annotations_sequence(mut self, length: u16) -> EncodedTextValue<'top, E> { + self.data_offset = length; self } @@ -118,65 +83,21 @@ impl<'top, E: TextEncoding<'top>> EncodedTextValue<'top, E> { } pub fn data_offset(&self) -> usize { - self.data_offset - } - - pub fn data_length(&self) -> usize { - self.data_length - } - - pub fn data_range(&self) -> Range { - self.data_offset..(self.data_offset + self.data_length) + self.data_offset as usize } pub fn annotations_range(&self) -> Option> { - if self.annotations_offset == 0 { + if self.data_offset == 0 { return None; } - let start = self.data_offset - (self.annotations_offset as usize); - let end = start + (self.annotations_length as usize); - Some(start..end) + Some(0..self.data_offset as usize) } pub fn has_annotations(&self) -> bool { - self.annotations_offset > 0 - } - - /// Returns the total number of bytes used to represent the current value, including its - /// annotations (if any), its header (type descriptor + length bytes), and its value. - pub fn total_length(&self) -> usize { - self.data_length + self.annotations_offset as usize - } - - pub fn annotated_value_range(&self) -> Range { - let start = self.data_offset - self.annotations_length as usize; - let end = self.data_offset + self.data_length; - start..end + self.data_offset > 0 } pub fn matched(&self) -> MatchedValue<'top, E> { self.matched_value } } - -#[cfg(test)] -mod tests { - use crate::lazy::encoding::TextEncoding_1_0; - - use super::*; - - #[test] - fn total_length_data_only() { - let value = - EncodedTextValue::::new(MatchedValue::Null(IonType::Null), 100, 12); - assert_eq!(value.total_length(), 12); - } - - #[test] - fn total_length_data_with_annotations() { - let value = - EncodedTextValue::::new(MatchedValue::Null(IonType::Null), 100, 12) - .with_annotations_sequence(90, 4); - assert_eq!(value.total_length(), 22); - } -} diff --git a/src/lazy/text/matched.rs b/src/lazy/text/matched.rs index 79241878..c4a600f3 100644 --- a/src/lazy/text/matched.rs +++ b/src/lazy/text/matched.rs @@ -147,9 +147,9 @@ impl<'top> MatchedFieldName<'top> { /// A partially parsed Ion int. #[derive(Copy, Clone, Debug, PartialEq)] pub struct MatchedInt { - radix: u32, + radix: u8, // Can only be 2, 10, or 16 // Offset of the digits from the beginning of the value - digits_offset: usize, + digits_offset: u8, is_negative: bool, } @@ -158,10 +158,14 @@ impl MatchedInt { const STACK_ALLOC_BUFFER_CAPACITY: usize = 32; /// Constructs a new `MatchedInt`. - pub fn new(radix: u32, is_negative: bool, digits_offset: usize) -> Self { + pub fn new(radix: u8, is_negative: bool, digits_offset: usize) -> Self { + debug_assert!( + digits_offset < u8::MAX as usize, + "digits offset can only be 0-3 to accommodate a sign and/or leading radix like `0x`" + ); Self { radix, - digits_offset, + digits_offset: digits_offset as u8, is_negative, } } @@ -174,12 +178,12 @@ impl MatchedInt { /// One of: `2`, `10`, or `16`, as determined by whether the partially parsed integer began /// with a `0b`/`0B`, `0x`/`0X`, or no prefix. pub fn radix(&self) -> u32 { - self.radix + self.radix as u32 } /// Attempts to finish reading the partially parsed integer. pub fn read(&self, matched_input: TextBufferView) -> IonResult { - let digits = matched_input.slice_to_end(self.digits_offset); + let digits = matched_input.slice_to_end(self.digits_offset as usize); let mut sanitized: SmallVec<[u8; Self::STACK_ALLOC_BUFFER_CAPACITY]> = SmallVec::with_capacity(Self::STACK_ALLOC_BUFFER_CAPACITY); // Copy the input text over to the sanitization buffer, discarding any underscores. These @@ -964,15 +968,22 @@ impl MatchedTimestamp { // This is the only field that doesn't have a fixed location; it's always at the end // of the timestamp, and the timestamp's length varies by its precision. // The `MatchedHoursAndMinutes` stores the offset at which `hours` begins. - MatchedTimestampOffset::HoursAndMinutes(matched_offset) => { - let hours_start = matched_offset.hours_offset() - matched_input.offset(); + MatchedTimestampOffset::PositiveHoursAndMinutes + | MatchedTimestampOffset::NegativeHoursAndMinutes => { + // There has to be a day for there to also be an offset. The last day index is 9. + // Starting at index 10, look for the next '-' or '+'. + let hours_start = &matched_input.bytes()[10..] + .iter() + .position(|b| *b == b'-' || *b == b'+') + .expect("the parser reported that this timestamp had an HH:MM component") + + 11; // The position reported is relative to the offset where the search began let hours_text = matched_input.slice(hours_start, 2).as_text().unwrap(); let hours = i32::from_str(hours_text).unwrap(); let minutes_start = hours_start + 3; let minutes_text = matched_input.slice(minutes_start, 2).as_text().unwrap(); let minutes = i32::from_str(minutes_text).unwrap(); let offset_magnitude_minutes = (hours * 60) + minutes; - if matched_offset.is_negative { + if self.offset == MatchedTimestampOffset::NegativeHoursAndMinutes { Some(-offset_magnitude_minutes) } else { Some(offset_magnitude_minutes) @@ -1050,32 +1061,11 @@ impl MatchedTimestamp { #[derive(Clone, Copy, Debug, PartialEq)] pub enum MatchedTimestampOffset { Zulu, - HoursAndMinutes(MatchedHoursAndMinutes), + PositiveHoursAndMinutes, + NegativeHoursAndMinutes, Unknown, } -#[derive(Clone, Copy, Debug, PartialEq)] -pub struct MatchedHoursAndMinutes { - is_negative: bool, - /// This is the offset of the first `H` in the offset string `HH:MM`. - hours_offset: usize, -} - -impl MatchedHoursAndMinutes { - pub fn new(is_negative: bool, hours_offset: usize) -> Self { - Self { - is_negative, - hours_offset, - } - } - pub fn is_negative(&self) -> bool { - self.is_negative - } - pub fn hours_offset(&self) -> usize { - self.hours_offset - } -} - #[derive(Clone, Copy, Debug, PartialEq)] pub struct MatchedBlob { // Position within the blob at which the base64 characters begin diff --git a/src/lazy/text/parse_result.rs b/src/lazy/text/parse_result.rs index 00d93188..0d13479c 100644 --- a/src/lazy/text/parse_result.rs +++ b/src/lazy/text/parse_result.rs @@ -251,7 +251,14 @@ impl<'data, T> AddContext<'data, T> for IonParseError<'data> { 'data: 'a, { match self { - IonParseError::Incomplete => IonResult::incomplete(label, input.offset()), + IonParseError::Incomplete => IonResult::incomplete( + format!( + "{}; buffer utf-8: {}", + label.into(), + input.as_text().unwrap_or("") + ), + input.offset(), + ), IonParseError::Invalid(invalid_input_error) => Err(IonError::from(invalid_input_error)), } } diff --git a/src/lazy/text/raw/reader.rs b/src/lazy/text/raw/reader.rs index ef4e0b9e..d64b824a 100644 --- a/src/lazy/text/raw/reader.rs +++ b/src/lazy/text/raw/reader.rs @@ -62,7 +62,6 @@ impl<'data> LazyRawTextReader_1_0<'data> { ))); } let buffer_after_whitespace = buffer_after_whitespace.local_lifespan(); - let (remaining, matched_item) = buffer_after_whitespace .match_top_level_item_1_0() .with_context("reading a top-level value", buffer_after_whitespace)?; @@ -297,7 +296,6 @@ mod tests { reader.expect_next(RawValueRef::Null(IonType::Bool)); // null.int reader.expect_next(RawValueRef::Null(IonType::Int)); - // false reader.expect_next(RawValueRef::Bool(false)); // true @@ -336,7 +334,6 @@ mod tests { reader.expect_next(RawValueRef::Decimal(Decimal::new(-6, 5))); // 6d-5 reader.expect_next(RawValueRef::Decimal(Decimal::new(6, -5))); - // 2023T reader.expect_next(RawValueRef::Timestamp(Timestamp::with_year(2023).build()?)); // 2023-08-13T @@ -411,7 +408,6 @@ mod tests { sum += value?.expect_value()?.read()?.expect_i64()?; } assert_eq!(sum, 6); - // (foo++ 1 2) let sexp = reader.next()?.expect_value()?.read()?.expect_sexp()?; let mut sexp_elements = sexp.iter(); diff --git a/src/lazy/text/raw/sequence.rs b/src/lazy/text/raw/sequence.rs index 04a3d99a..3ac46d11 100644 --- a/src/lazy/text/raw/sequence.rs +++ b/src/lazy/text/raw/sequence.rs @@ -29,15 +29,10 @@ impl<'data> LazyRawTextList_1_0<'data> { } pub fn iter(&self) -> RawTextListIterator_1_0<'data> { - let open_bracket_index = - self.value.matched.encoded_value.data_offset() - self.value.matched.input.offset(); + // Skip past any annotations and the opening '[' + let list_contents_start = self.value.encoded_value.data_offset() + 1; // Make an iterator over the input bytes that follow the initial `[` - RawTextListIterator_1_0::new( - self.value - .matched - .input - .slice_to_end(open_bracket_index + 1), - ) + RawTextListIterator_1_0::new(self.value.input.slice_to_end(list_contents_start)) } } @@ -114,10 +109,8 @@ impl<'data> RawTextListIterator_1_0<'data> { let input_after_last = if let Some(value_result) = self.last() { let value = value_result?.expect_value()?; // ...the input slice that follows the last sequence value... - value - .matched - .input - .slice_to_end(value.matched.encoded_value.total_length()) + self.input + .slice_to_end(value.input.offset() + value.total_length() - self.input.offset()) } else { // ...or there aren't values, so it's just the input after the opening delimiter. self.input @@ -180,11 +173,9 @@ impl<'data> LazyRawTextSExp_1_0<'data> { pub fn iter(&self) -> RawTextSExpIterator_1_0<'data> { // Make an iterator over the input bytes that follow the initial `(`; account for - // a leading field name and/or annotations. - let open_paren_index = - self.value.matched.encoded_value.data_offset() - self.value.matched.input.offset(); - // Make an iterator over the input bytes that follow the initial `(` - RawTextSExpIterator_1_0::new(self.value.matched.input.slice_to_end(open_paren_index + 1)) + // a leading annotations sequence. + let sexp_contents_start = self.value.encoded_value.data_offset() + 1; + RawTextSExpIterator_1_0::new(self.value.input.slice_to_end(sexp_contents_start)) } } @@ -217,10 +208,8 @@ impl<'top> RawTextSExpIterator_1_0<'top> { let input_after_last = if let Some(value_result) = self.last() { let value = value_result?.expect_value()?; // ...the input slice that follows the last sequence value... - value - .matched - .input - .slice_to_end(value.matched.encoded_value.total_length()) + self.input + .slice_to_end(value.input.offset() + value.total_length() - self.input.offset()) } else { // ...or there aren't values, so it's just the input after the opening delimiter. self.input @@ -321,7 +310,7 @@ mod tests { let allocator = BumpAllocator::new(); let reader = &mut LazyRawTextReader_1_0::new(ion_data.as_bytes()); let value = reader.next(&allocator)?.expect_value()?; - let actual_range = value.matched.encoded_value.data_range(); + let actual_range = value.data_range(); assert_eq!( actual_range, expected, "Sequence range ({:?}) did not match expected range ({:?})", @@ -346,6 +335,7 @@ mod tests { ("[1, 2, [3, [a, b, c], 5], 6]", 0..28), ]; for test in tests { + println!("input: {}", test.0); expect_sequence_range(test.0, test.1.clone())?; } Ok(()) diff --git a/src/lazy/text/raw/struct.rs b/src/lazy/text/raw/struct.rs index d1454a50..5409c3ad 100644 --- a/src/lazy/text/raw/struct.rs +++ b/src/lazy/text/raw/struct.rs @@ -36,12 +36,9 @@ impl<'top> RawTextStructIterator_1_0<'top> { let start = self.input.offset() - 1; // We need to find the input slice containing the closing delimiter. It's either... let input_after_last = if let Some(field_result) = self.last() { - let (_name, value) = field_result?.expect_name_value()?; - // ...the input slice that follows the last field... - value - .matched - .input - .slice_to_end(value.matched.encoded_value.total_length()) + let field = field_result?; + self.input + .slice_to_end(field.range().end - self.input.offset()) } else { // ...or there aren't fields, so it's just the input after the opening delimiter. self.input @@ -57,7 +54,7 @@ impl<'top> RawTextStructIterator_1_0<'top> { .match_optional_comments_and_whitespace() .with_context("skipping a list's trailing comma", input_after_ws)?; } - let (input_after_end, _end_delimiter) = satisfy(|c| c == b'}' as char)(input_after_ws) + let (input_after_end, _end_delimiter) = satisfy(|c| c == '}')(input_after_ws) .with_context("seeking the closing delimiter of a struct", input_after_ws)?; let end = input_after_end.offset(); Ok(start..end) @@ -140,10 +137,10 @@ impl<'top> LazyRawStruct<'top, TextEncoding_1_0> for LazyRawTextStruct_1_0<'top> } fn iter(&self) -> Self::Iterator { - let open_brace_index = - self.value.matched.encoded_value.data_offset() - self.value.matched.input.offset(); - // Slice the input to skip the opening `{` - RawTextStructIterator_1_0::new(self.value.matched.input.slice_to_end(open_brace_index + 1)) + // Make an iterator over the input bytes that follow the initial `{`; account for + // a leading annotations sequence. + let struct_contents_start = self.value.encoded_value.data_offset() + 1; + RawTextStructIterator_1_0::new(self.value.input.slice_to_end(struct_contents_start)) } } @@ -170,7 +167,7 @@ mod tests { let allocator = BumpAllocator::new(); let reader = &mut LazyRawTextReader_1_0::new(ion_data.as_bytes()); let value = reader.next(&allocator)?.expect_value()?; - let actual_range = value.matched.encoded_value.data_range(); + let actual_range = value.data_range(); assert_eq!( actual_range, expected, "Struct range ({:?}) did not match expected range ({:?})", diff --git a/src/lazy/text/raw/v1_1/reader.rs b/src/lazy/text/raw/v1_1/reader.rs index 2d45733d..02622a2a 100644 --- a/src/lazy/text/raw/v1_1/reader.rs +++ b/src/lazy/text/raw/v1_1/reader.rs @@ -4,28 +4,27 @@ use std::fmt; use std::fmt::{Debug, Display, Formatter}; use std::ops::Range; +use bumpalo::collections::Vec as BumpVec; +use bumpalo::Bump as BumpAllocator; use nom::character::streaming::satisfy; use crate::lazy::decoder::private::LazyContainerPrivate; use crate::lazy::decoder::{ HasRange, HasSpan, LazyDecoder, LazyRawContainer, LazyRawFieldExpr, LazyRawFieldName, - LazyRawReader, LazyRawSequence, LazyRawStruct, LazyRawValue, LazyRawValueExpr, RawValueExpr, + LazyRawReader, LazyRawSequence, LazyRawStruct, LazyRawValue, LazyRawValueExpr, RawVersionMarker, }; use crate::lazy::encoding::TextEncoding_1_1; +use crate::lazy::expanded::macro_evaluator::RawEExpression; use crate::lazy::raw_stream_item::{EndPosition, LazyRawStreamItem, RawStreamItem}; +use crate::lazy::span::Span; use crate::lazy::text::buffer::TextBufferView; +use crate::lazy::text::matched::{MatchedFieldName, MatchedValue}; use crate::lazy::text::parse_result::{AddContext, ToIteratorOutput}; use crate::lazy::text::value::{LazyRawTextValue_1_1, RawTextAnnotationsIterator}; use crate::result::IonFailure; use crate::{IonResult, IonType, RawSymbolTokenRef}; -use crate::lazy::expanded::macro_evaluator::RawEExpression; -use crate::lazy::span::Span; -use crate::lazy::text::matched::{MatchedFieldName, MatchedValue}; -use bumpalo::collections::Vec as BumpVec; -use bumpalo::Bump as BumpAllocator; - pub struct LazyRawTextReader_1_1<'data> { input: &'data [u8], // The offset from the beginning of the overall stream at which the `input` slice begins @@ -255,26 +254,14 @@ impl<'top> TextListSpanFinder_1_1<'top> { child_expr_cache.push(expr); } - let input_after_last_expr = if let Some(value_expr) = child_expr_cache.last() { - // ...the input slice that follows the last sequence value... - match value_expr { - RawValueExpr::ValueLiteral(value) => value - .matched - .input - .slice_to_end(value.matched.encoded_value.total_length()), - RawValueExpr::MacroInvocation(invocation) => { - let end_of_expr = invocation.input.offset() + invocation.input.len(); - let remaining = self - .iterator - .input - .slice_to_end(end_of_expr - self.iterator.input.offset()); - remaining - } - } - } else { - // ...or there weren't any child values, so it's just the input after the opening delimiter. - self.iterator.input - }; + let end = child_expr_cache + .last() + .map(|e| e.range().end) + .unwrap_or(self.iterator.input.offset()); + let input_after_last_expr = self + .iterator + .input + .slice_to_end(end - self.iterator.input.offset()); let (mut input_after_ws, _ws) = input_after_last_expr .match_optional_comments_and_whitespace() @@ -393,31 +380,20 @@ impl<'top> TextSExpSpanFinder_1_1<'top> { // The input has already skipped past the opening delimiter. let start = self.iterator.input.offset() - initial_bytes_skipped; let mut child_expr_cache = BumpVec::new_in(self.allocator); + for expr_result in self.iterator { let expr = expr_result?; child_expr_cache.push(expr); } - let input_after_last_expr = if let Some(value_expr) = child_expr_cache.last() { - // ...the input slice that follows the last sequence value... - match value_expr { - RawValueExpr::ValueLiteral(value) => value - .matched - .input - .slice_to_end(value.matched.encoded_value.total_length()), - RawValueExpr::MacroInvocation(invocation) => { - let end_of_expr = invocation.input.offset() + invocation.input.len(); - let remaining = self - .iterator - .input - .slice_to_end(end_of_expr - self.iterator.input.offset()); - remaining - } - } - } else { - // ...or there weren't any child values, so it's just the input after the opening delimiter. - self.iterator.input - }; + let end = child_expr_cache + .last() + .map(|e| e.range().end) + .unwrap_or(self.iterator.input.offset()); + let input_after_last_expr = self + .iterator + .input + .slice_to_end(end - self.iterator.input.offset()); let (input_after_ws, _ws) = input_after_last_expr .match_optional_comments_and_whitespace() @@ -426,8 +402,8 @@ impl<'top> TextSExpSpanFinder_1_1<'top> { .with_context("seeking the closing delimiter of a sexp", input_after_ws)?; let end = input_after_end.offset(); - let span = start..end; - Ok((span, child_expr_cache.into_bump_slice())) + let range = start..end; + Ok((range, child_expr_cache.into_bump_slice())) } } @@ -439,7 +415,7 @@ impl<'top> LazyContainerPrivate<'top, TextEncoding_1_1> for LazyRawTextSExp_1_1< impl<'top> LazyRawContainer<'top, TextEncoding_1_1> for LazyRawTextSExp_1_1<'top> { fn as_value(&self) -> ::Value<'top> { - self.value.matched.into() + self.value } } @@ -455,7 +431,7 @@ impl<'top> LazyRawSequence<'top, TextEncoding_1_1> for LazyRawTextSExp_1_1<'top> } fn iter(&self) -> Self::Iterator { - let MatchedValue::SExp(child_exprs) = self.value.matched.encoded_value.matched() else { + let MatchedValue::SExp(child_exprs) = self.value.encoded_value.matched() else { unreachable!("s-expression contained a matched value of the wrong type") }; RawTextSequenceCacheIterator_1_1::new(child_exprs) @@ -593,7 +569,7 @@ impl<'top> LazyContainerPrivate<'top, TextEncoding_1_1> for LazyRawTextList_1_1< impl<'top> LazyRawContainer<'top, TextEncoding_1_1> for LazyRawTextList_1_1<'top> { fn as_value(&self) -> LazyRawTextValue_1_1<'top> { - self.value.matched.into() + self.value } } @@ -609,7 +585,7 @@ impl<'top> LazyRawSequence<'top, TextEncoding_1_1> for LazyRawTextList_1_1<'top> } fn iter(&self) -> Self::Iterator { - let MatchedValue::List(child_exprs) = self.value.matched.encoded_value.matched() else { + let MatchedValue::List(child_exprs) = self.value.encoded_value.matched() else { unreachable!("list contained a matched value of the wrong type") }; RawTextSequenceCacheIterator_1_1::new(child_exprs) @@ -661,7 +637,7 @@ impl<'top> LazyRawStruct<'top, TextEncoding_1_1> for LazyRawTextStruct_1_1<'top> } fn iter(&self) -> Self::Iterator { - let MatchedValue::Struct(field_exprs) = self.value.matched.encoded_value.matched() else { + let MatchedValue::Struct(field_exprs) = self.value.encoded_value.matched() else { unreachable!("struct contained a matched value of the wrong type") }; RawTextStructCacheIterator_1_1::new(field_exprs) @@ -722,28 +698,18 @@ impl<'top> TextStructSpanFinder_1_1<'top> { child_expr_cache.push(expr); } - // We need to find the input slice containing the closing delimiter. - let input_after_last = if let Some(field) = child_expr_cache.last() { - // If there are any field expressions, we need to isolate the input slice that follows - // the last one. - use LazyRawFieldExpr::*; - match field { - NameValue(_, value) => value - .matched - .input - .slice_to_end(value.matched.encoded_value.total_length()), - NameEExp(_, eexp) | EExp(eexp) => { - self.iterator.input.slice_to_end(eexp.input.len()) - } - } - } else { - // ...or there aren't fields, so it's just the input after the opening delimiter. - self.iterator.input - }; - let (mut input_after_ws, _ws) = - input_after_last - .match_optional_comments_and_whitespace() - .with_context("seeking the end of a struct", input_after_last)?; + let end = child_expr_cache + .last() + .map(|e| e.range().end) + .unwrap_or(start + 1); + let input_after_last_field_expr = self + .iterator + .input + .slice_to_end(end - self.iterator.input.offset()); + + let (mut input_after_ws, _ws) = input_after_last_field_expr + .match_optional_comments_and_whitespace() + .with_context("seeking the end of a struct", input_after_last_field_expr)?; // Skip an optional comma and more whitespace if input_after_ws.bytes().first() == Some(&b',') { (input_after_ws, _) = input_after_ws @@ -760,9 +726,10 @@ impl<'top> TextStructSpanFinder_1_1<'top> { #[cfg(test)] mod tests { - use super::*; use crate::lazy::raw_value_ref::RawValueRef; + use super::*; + fn expect_next<'top, 'data: 'top>( allocator: &'top BumpAllocator, reader: &'top mut LazyRawTextReader_1_1<'data>, diff --git a/src/lazy/text/value.rs b/src/lazy/text/value.rs index d0b12d16..66d316c1 100644 --- a/src/lazy/text/value.rs +++ b/src/lazy/text/value.rs @@ -21,35 +21,60 @@ use crate::{IonResult, IonType, RawSymbolTokenRef}; /// format than in its binary format, but is still possible.) For a resolved lazy value that /// includes a text definition for these items whenever one exists, see /// [`crate::lazy::value::LazyValue`]. -// This type is version agnostic, and is wrapped by the LazyRawValue implementations for all -// existing encodings. #[derive(Copy, Clone)] -pub struct MatchedRawTextValue<'top, E: TextEncoding<'top>> { +pub struct LazyRawTextValue<'top, E: TextEncoding<'top>> { pub(crate) encoded_value: EncodedTextValue<'top, E>, pub(crate) input: TextBufferView<'top>, } -impl<'top, E: TextEncoding<'top>> Debug for MatchedRawTextValue<'top, E> { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!( - f, - "MatchedRawTextValue {{\n val={:?},\n buf={:?}\n}}\n", - self.encoded_value, self.input - ) +impl<'top, E: TextEncoding<'top>> LazyRawTextValue<'top, E> { + pub(crate) fn new( + input: TextBufferView<'top>, + encoded_value: EncodedTextValue<'top, E>, + ) -> Self { + Self { + encoded_value, + input, + } } } -// ===== Version-specific wrappers ===== -// -// These types provide Ion-version-specific impls of the LazyRawValue trait -#[derive(Copy, Clone)] -pub struct LazyRawTextValue<'top, E: TextEncoding<'top>> { - pub(crate) matched: MatchedRawTextValue<'top, E>, -} - impl<'top, E: TextEncoding<'top>> LazyRawTextValue<'top, E> { - pub fn new(matched: MatchedRawTextValue<'top, E>) -> Self { - Self { matched } + pub fn data_range(&self) -> Range { + // If the matched value has annotations, the `data_offset` will be the offset beyond + // the annotations at which the value's data begins. + let data_offset = self.encoded_value.data_offset(); + let data_length = self.input.len() - data_offset; + // Add the input buffer's offset to the data offset to get the absolute offset. + let start = self.input.offset() + data_offset; + let end = start + data_length; + start..end + } + + pub fn has_annotations(&self) -> bool { + self.encoded_value.data_offset() > 0 + } + + pub fn annotations_range(&self) -> Option> { + if !self.has_annotations() { + return None; + } + let annotations_length = self.encoded_value.data_offset(); + let start = self.input.offset(); + let end = start + annotations_length; + Some(start..end) + } + + pub fn annotations_span(&self) -> Option> { + let range = self.annotations_range()?; + let bytes = &self.input.bytes()[..range.len()]; + Some(Span::with_offset(range.start, bytes)) + } + + /// Returns the total number of bytes used to represent the current value, including its + /// annotations (if any) and its value. + pub fn total_length(&self) -> usize { + self.input.len() } } @@ -112,44 +137,39 @@ impl<'top, E: TextEncoding<'top>> Debug for LazyRawTextValue<'top, E> { // If we can read the value, show it Ok(value) => write!(f, " {{{value:?}}}"), // Otherwise, write out diagnostic information - Err(e) => write!(f, " {{\n matched={:?}\n err={:?}\n}}\n", self.matched, e), + Err(e) => write!( + f, + " {{\n encoded_value={:?}\n {:?}\n err={:?}\n}}\n", + self.encoded_value, self.input, e + ), } } } -impl<'top> From> for LazyRawTextValue_1_0<'top> { - fn from(matched: MatchedRawTextValue<'top, TextEncoding_1_0>) -> Self { - LazyRawTextValue::new(matched) - } -} - -impl<'top> From> for LazyRawTextValue_1_1<'top> { - fn from(matched: MatchedRawTextValue<'top, TextEncoding_1_1>) -> Self { - LazyRawTextValue::new(matched) - } -} - // ===== Ion-version-agnostic functionality ===== // // These trait impls are common to all Ion versions, but require the caller to specify a type // parameter. -impl<'top, E: TextEncoding<'top>> HasRange for MatchedRawTextValue<'top, E> { +impl<'top, E: TextEncoding<'top>> HasRange for LazyRawTextValue<'top, E> { fn range(&self) -> Range { - self.encoded_value.annotated_value_range() + self.input.range() } } -impl<'top, E: TextEncoding<'top>> HasSpan<'top> for MatchedRawTextValue<'top, E> { +impl<'top, E: TextEncoding<'top>> HasSpan<'top> for LazyRawTextValue<'top, E> { fn span(&self) -> Span<'top> { + Span::with_offset(self.input.offset(), self.input.bytes()) + /* let range = self.range(); let input_offset = self.input.offset(); let local_range = (range.start - input_offset)..(range.end - input_offset); Span::with_offset(range.start, &self.input.bytes()[local_range]) + */ } } -impl<'top, E: TextEncoding<'top>> LazyRawValue<'top, E> for MatchedRawTextValue<'top, E> { +impl<'top, E: TextEncoding<'top>> LazyRawValue<'top, E> for LazyRawTextValue<'top, E> { fn ion_type(&self) -> IonType { self.encoded_value.ion_type() } @@ -159,22 +179,17 @@ impl<'top, E: TextEncoding<'top>> LazyRawValue<'top, E> for MatchedRawTextValue< } fn annotations(&self) -> ::AnnotationsIterator<'top> { - let span = self + let range = self .encoded_value .annotations_range() .unwrap_or(self.input.offset()..self.input.offset()); - let annotations_bytes = self - .input - .slice(span.start - self.input.offset(), span.len()); + let annotations_bytes = self.input.slice(0, range.len()); RawTextAnnotationsIterator::new(annotations_bytes) } fn read(&self) -> IonResult> { - let matched_input = self.input.slice( - self.encoded_value.data_offset() - self.input.offset(), - self.encoded_value.data_length(), - ); - + // Get the value's matched input, skipping over any annotations + let matched_input = self.input.slice_to_end(self.encoded_value.data_offset()); let allocator = self.input.allocator; use crate::lazy::text::matched::MatchedValue::*; @@ -189,44 +204,14 @@ impl<'top, E: TextEncoding<'top>> LazyRawValue<'top, E> for MatchedRawTextValue< Symbol(s) => RawValueRef::Symbol(s.read(allocator, matched_input)?), Blob(b) => RawValueRef::Blob(b.read(allocator, matched_input)?), Clob(c) => RawValueRef::Clob(c.read(allocator, matched_input)?), - List(_) => RawValueRef::List(E::List::<'top>::from_value(E::value_from_matched(*self))), - SExp(_) => RawValueRef::SExp(E::SExp::<'top>::from_value(E::value_from_matched(*self))), - Struct(_) => RawValueRef::Struct(E::Struct::from_value(E::value_from_matched(*self))), + List(_) => RawValueRef::List(E::List::<'top>::from_value(*self)), + SExp(_) => RawValueRef::SExp(E::SExp::<'top>::from_value(*self)), + Struct(_) => RawValueRef::Struct(E::Struct::from_value(*self)), }; Ok(value_ref) } } -impl<'top, E: TextEncoding<'top>> HasRange for LazyRawTextValue<'top, E> { - fn range(&self) -> Range { - self.matched.range() - } -} - -impl<'top, E: TextEncoding<'top>> HasSpan<'top> for LazyRawTextValue<'top, E> { - fn span(&self) -> Span<'top> { - self.matched.span() - } -} - -impl<'top, E: TextEncoding<'top>> LazyRawValue<'top, E> for LazyRawTextValue<'top, E> { - fn ion_type(&self) -> IonType { - self.matched.ion_type() - } - - fn is_null(&self) -> bool { - self.matched.is_null() - } - - fn annotations(&self) -> ::AnnotationsIterator<'top> { - self.matched.annotations() - } - - fn read(&self) -> IonResult> { - self.matched.read() - } -} - pub struct RawTextAnnotationsIterator<'data> { input: TextBufferView<'data>, has_returned_error: bool,