diff --git a/examples/write_log_events.rs b/examples/write_log_events.rs index cc32fe38..cc1a8ac0 100644 --- a/examples/write_log_events.rs +++ b/examples/write_log_events.rs @@ -194,9 +194,9 @@ mod example { .write(11, event.thread_id)? .write(12, &event.thread_name)? // v--- The fixed strings from the log statement are also SIDs - .write(13, RawSymbolToken::SymbolId(17))? // logger name - .write(14, RawSymbolToken::SymbolId(18))? // log level - .write(15, RawSymbolToken::SymbolId(19))? // format + .write(13, RawSymbolTokenRef::SymbolId(17))? // logger name + .write(14, RawSymbolTokenRef::SymbolId(18))? // log level + .write(15, RawSymbolTokenRef::SymbolId(19))? // format .write(16, &event.parameters)?; struct_.close() } diff --git a/src/lazy/any_encoding.rs b/src/lazy/any_encoding.rs index d7875b6c..cd4a7d1a 100644 --- a/src/lazy/any_encoding.rs +++ b/src/lazy/any_encoding.rs @@ -7,25 +7,30 @@ use bumpalo::Bump as BumpAllocator; use crate::lazy::any_encoding::RawReaderKind::{Binary_1_0, Text_1_0}; use crate::lazy::binary::raw::annotations_iterator::RawBinaryAnnotationsIterator as RawBinaryAnnotationsIterator_1_0; -use crate::lazy::binary::raw::r#struct::{LazyRawBinaryStruct_1_0, RawBinaryStructIterator_1_0}; +use crate::lazy::binary::raw::r#struct::{ + LazyRawBinaryFieldName_1_0, LazyRawBinaryStruct_1_0, RawBinaryStructIterator_1_0, +}; use crate::lazy::binary::raw::reader::LazyRawBinaryReader_1_0; use crate::lazy::binary::raw::sequence::{ LazyRawBinaryList_1_0, LazyRawBinarySExp_1_0, RawBinarySequenceIterator_1_0, }; use crate::lazy::binary::raw::v1_1::r#struct::{ - LazyRawBinaryStruct_1_1, RawBinaryStructIterator_1_1, + LazyRawBinaryFieldName_1_1, LazyRawBinaryStruct_1_1, RawBinaryStructIterator_1_1, }; use crate::lazy::binary::raw::v1_1::reader::LazyRawBinaryReader_1_1; use crate::lazy::binary::raw::v1_1::sequence::{ LazyRawBinaryList_1_1, LazyRawBinarySExp_1_1, RawBinarySequenceIterator_1_1, }; -use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1; +use crate::lazy::binary::raw::v1_1::value::{ + LazyRawBinaryValue_1_1, LazyRawBinaryVersionMarker_1_1, +}; use crate::lazy::binary::raw::v1_1::RawBinaryAnnotationsIterator_1_1; -use crate::lazy::binary::raw::value::LazyRawBinaryValue_1_0; -use crate::lazy::decoder::private::{LazyContainerPrivate, LazyRawValuePrivate}; +use crate::lazy::binary::raw::value::{LazyRawBinaryValue_1_0, LazyRawBinaryVersionMarker_1_0}; +use crate::lazy::decoder::private::LazyContainerPrivate; use crate::lazy::decoder::{ - LazyDecoder, LazyRawFieldExpr, LazyRawReader, LazyRawSequence, LazyRawStruct, LazyRawValue, - LazyRawValueExpr, RawFieldExpr, RawValueExpr, + HasRange, HasSpan, LazyDecoder, LazyRawContainer, LazyRawFieldExpr, LazyRawFieldName, + LazyRawReader, LazyRawSequence, LazyRawStruct, LazyRawValue, LazyRawValueExpr, RawValueExpr, + RawVersionMarker, }; use crate::lazy::encoding::{ BinaryEncoding_1_0, BinaryEncoding_1_1, TextEncoding_1_0, TextEncoding_1_1, @@ -34,17 +39,22 @@ use crate::lazy::expanded::macro_evaluator::RawEExpression; use crate::lazy::never::Never; use crate::lazy::raw_stream_item::LazyRawStreamItem; use crate::lazy::raw_value_ref::RawValueRef; -use crate::lazy::text::raw::r#struct::{LazyRawTextStruct_1_0, RawTextStructIterator_1_0}; +use crate::lazy::span::Span; +use crate::lazy::text::raw::r#struct::{ + LazyRawTextFieldName_1_0, LazyRawTextStruct_1_0, RawTextStructIterator_1_0, +}; use crate::lazy::text::raw::reader::LazyRawTextReader_1_0; use crate::lazy::text::raw::sequence::{ LazyRawTextList_1_0, LazyRawTextSExp_1_0, RawTextListIterator_1_0, RawTextSExpIterator_1_0, }; use crate::lazy::text::raw::v1_1::reader::{ - LazyRawTextList_1_1, LazyRawTextSExp_1_1, LazyRawTextStruct_1_1, MacroIdRef, - RawTextEExpression_1_1, RawTextSequenceCacheIterator_1_1, RawTextStructCacheIterator_1_1, + LazyRawTextFieldName_1_1, LazyRawTextList_1_1, LazyRawTextSExp_1_1, LazyRawTextStruct_1_1, + MacroIdRef, RawTextEExpression_1_1, RawTextSequenceCacheIterator_1_1, + RawTextStructCacheIterator_1_1, }; use crate::lazy::text::value::{ - LazyRawTextValue_1_0, LazyRawTextValue_1_1, RawTextAnnotationsIterator, + LazyRawTextValue_1_0, LazyRawTextValue_1_1, LazyRawTextVersionMarker_1_0, + LazyRawTextVersionMarker_1_1, RawTextAnnotationsIterator, }; use crate::{IonResult, IonType, RawSymbolTokenRef}; @@ -62,9 +72,90 @@ impl LazyDecoder for AnyEncoding { type SExp<'top> = LazyRawAnySExp<'top>; type List<'top> = LazyRawAnyList<'top>; type Struct<'top> = LazyRawAnyStruct<'top>; + type FieldName<'top> = LazyRawAnyFieldName<'top>; type AnnotationsIterator<'top> = RawAnyAnnotationsIterator<'top>; - type EExpression<'top> = LazyRawAnyEExpression<'top>; + type EExp<'top> = LazyRawAnyEExpression<'top>; + type VersionMarker<'top> = LazyRawAnyVersionMarker<'top>; +} + +#[derive(Debug, Copy, Clone)] +pub struct LazyRawAnyVersionMarker<'top> { + encoding: LazyRawAnyVersionMarkerKind<'top>, } + +#[derive(Debug, Copy, Clone)] +pub enum LazyRawAnyVersionMarkerKind<'top> { + Text_1_0(LazyRawTextVersionMarker_1_0<'top>), + Binary_1_0(LazyRawBinaryVersionMarker_1_0<'top>), + Text_1_1(LazyRawTextVersionMarker_1_1<'top>), + Binary_1_1(LazyRawBinaryVersionMarker_1_1<'top>), +} + +impl<'top> HasSpan<'top> for LazyRawAnyVersionMarker<'top> { + fn span(&self) -> Span<'top> { + use LazyRawAnyVersionMarkerKind::*; + match self.encoding { + Text_1_0(marker) => marker.span(), + Binary_1_0(marker) => marker.span(), + Text_1_1(marker) => marker.span(), + Binary_1_1(marker) => marker.span(), + } + } +} + +impl<'top> HasRange for LazyRawAnyVersionMarker<'top> { + fn range(&self) -> Range { + use LazyRawAnyVersionMarkerKind::*; + match self.encoding { + Text_1_0(marker) => marker.range(), + Binary_1_0(marker) => marker.range(), + Text_1_1(marker) => marker.range(), + Binary_1_1(marker) => marker.range(), + } + } +} + +impl<'top> RawVersionMarker<'top> for LazyRawAnyVersionMarker<'top> { + fn version(&self) -> (u8, u8) { + use LazyRawAnyVersionMarkerKind::*; + match self.encoding { + Text_1_0(marker) => marker.version(), + Binary_1_0(marker) => marker.version(), + Text_1_1(marker) => marker.version(), + Binary_1_1(marker) => marker.version(), + } + } +} + +impl<'top> From> for LazyRawAnyVersionMarker<'top> { + fn from(value: LazyRawBinaryVersionMarker_1_0<'top>) -> Self { + LazyRawAnyVersionMarker { + encoding: LazyRawAnyVersionMarkerKind::Binary_1_0(value), + } + } +} +impl<'top> From> for LazyRawAnyVersionMarker<'top> { + fn from(value: LazyRawBinaryVersionMarker_1_1<'top>) -> Self { + LazyRawAnyVersionMarker { + encoding: LazyRawAnyVersionMarkerKind::Binary_1_1(value), + } + } +} +impl<'top> From> for LazyRawAnyVersionMarker<'top> { + fn from(value: LazyRawTextVersionMarker_1_0<'top>) -> Self { + LazyRawAnyVersionMarker { + encoding: LazyRawAnyVersionMarkerKind::Text_1_0(value), + } + } +} +impl<'top> From> for LazyRawAnyVersionMarker<'top> { + fn from(value: LazyRawTextVersionMarker_1_1<'top>) -> Self { + LazyRawAnyVersionMarker { + encoding: LazyRawAnyVersionMarkerKind::Text_1_1(value), + } + } +} + #[derive(Debug, Copy, Clone)] pub struct LazyRawAnyEExpression<'top> { encoding: LazyRawAnyEExpressionKind<'top>, @@ -72,13 +163,8 @@ pub struct LazyRawAnyEExpression<'top> { #[derive(Debug, Copy, Clone)] enum LazyRawAnyEExpressionKind<'top> { - // Ion 1.0 does not support macro invocations. Having these variants hold an instance of - // `Never` (which cannot be instantiated) informs the compiler that it can eliminate these - // branches in code paths exclusive to v1.0. - Text_1_0(Never), - Binary_1_0(Never), Text_1_1(RawTextEExpression_1_1<'top>), - Binary_1_1(Never), + Binary_1_1(Never), // TODO: RawBinaryEExpression_1_1 } impl<'top> From> for LazyRawAnyEExpression<'top> { @@ -89,29 +175,47 @@ impl<'top> From> for LazyRawAnyEExpression<'top> { } } +impl<'top> HasSpan<'top> for LazyRawAnyEExpression<'top> { + fn span(&self) -> Span<'top> { + use LazyRawAnyEExpressionKind::*; + match self.encoding { + Text_1_1(m) => m.span(), + Binary_1_1(m) => m.span(), + } + } +} + +impl<'top> HasRange for LazyRawAnyEExpression<'top> { + fn range(&self) -> Range { + use LazyRawAnyEExpressionKind::*; + match self.encoding { + Text_1_1(m) => m.range(), + Binary_1_1(m) => m.range(), + } + } +} + impl<'top> RawEExpression<'top, AnyEncoding> for LazyRawAnyEExpression<'top> { type RawArgumentsIterator<'a> = LazyRawAnyMacroArgsIterator<'top,> where Self: 'a; fn id(&self) -> MacroIdRef<'top> { + use LazyRawAnyEExpressionKind::*; match self.encoding { - LazyRawAnyEExpressionKind::Text_1_0(_) => unreachable!("macro in text Ion 1.0"), - LazyRawAnyEExpressionKind::Binary_1_0(_) => unreachable!("macro in binary Ion 1.0"), - LazyRawAnyEExpressionKind::Text_1_1(ref m) => m.id(), - LazyRawAnyEExpressionKind::Binary_1_1(_) => { + Text_1_1(ref m) => m.id(), + Binary_1_1(_) => { todo!("macros in binary Ion 1.1 are not implemented") } } } fn raw_arguments(&self) -> Self::RawArgumentsIterator<'_> { + use LazyRawAnyEExpressionKind::*; match self.encoding { - LazyRawAnyEExpressionKind::Text_1_0(_) => unreachable!("macro in text Ion 1.0"), - LazyRawAnyEExpressionKind::Binary_1_0(_) => unreachable!("macro in binary Ion 1.0"), - LazyRawAnyEExpressionKind::Text_1_1(m) => LazyRawAnyMacroArgsIterator { + Text_1_1(m) => LazyRawAnyMacroArgsIterator { encoding: LazyRawAnyMacroArgsIteratorKind::Text_1_1(m.raw_arguments()), }, - LazyRawAnyEExpressionKind::Binary_1_1(_) => { - todo!("macros in binary Ion 1.1 are not implemented") + Binary_1_1(_) => { + todo!("macros in binary Ion 1.1 are not yet implemented") } } } @@ -280,6 +384,13 @@ pub struct LazyRawAnyValue<'top> { encoding: LazyRawValueKind<'top>, } +impl<'top> LazyRawAnyValue<'top> { + // TODO: feature gate, doc comments + pub fn kind(&self) -> LazyRawValueKind<'top> { + self.encoding + } +} + #[derive(Debug, Copy, Clone)] pub enum LazyRawValueKind<'top> { Text_1_0(LazyRawTextValue_1_0<'top>), @@ -324,12 +435,7 @@ impl<'top> From> for LazyRawValueExpr<' fn from(value: LazyRawValueExpr<'top, TextEncoding_1_0>) -> Self { match value { RawValueExpr::ValueLiteral(v) => RawValueExpr::ValueLiteral(v.into()), - RawValueExpr::MacroInvocation(m) => { - let invocation = LazyRawAnyEExpression { - encoding: LazyRawAnyEExpressionKind::Text_1_0(m), - }; - RawValueExpr::MacroInvocation(invocation) - } + RawValueExpr::MacroInvocation(_) => unreachable!("macro invocation in text Ion 1.0"), } } } @@ -340,12 +446,7 @@ impl<'top> From> fn from(value: LazyRawValueExpr<'top, BinaryEncoding_1_0>) -> Self { match value { RawValueExpr::ValueLiteral(v) => RawValueExpr::ValueLiteral(v.into()), - RawValueExpr::MacroInvocation(m) => { - let invocation = LazyRawAnyEExpression { - encoding: LazyRawAnyEExpressionKind::Binary_1_0(m), - }; - RawValueExpr::MacroInvocation(invocation) - } + RawValueExpr::MacroInvocation(_) => unreachable!("macro invocation in binary Ion 1.0"), } } } @@ -469,8 +570,8 @@ impl<'top> From> { fn from(value: LazyRawStreamItem<'top, TextEncoding_1_0>) -> Self { match value { - LazyRawStreamItem::::VersionMarker(major, minor) => { - LazyRawStreamItem::::VersionMarker(major, minor) + LazyRawStreamItem::::VersionMarker(marker) => { + LazyRawStreamItem::::VersionMarker(marker.into()) } LazyRawStreamItem::::Value(value) => { LazyRawStreamItem::::Value(value.into()) @@ -478,8 +579,8 @@ impl<'top> From> LazyRawStreamItem::::EExpression(_) => { unreachable!("Ion 1.0 does not support macro invocations") } - LazyRawStreamItem::::EndOfStream => { - LazyRawStreamItem::::EndOfStream + LazyRawStreamItem::::EndOfStream(end) => { + LazyRawStreamItem::::EndOfStream(end) } } } @@ -490,8 +591,8 @@ impl<'top> From> { fn from(value: LazyRawStreamItem<'top, BinaryEncoding_1_0>) -> Self { match value { - LazyRawStreamItem::::VersionMarker(major, minor) => { - LazyRawStreamItem::::VersionMarker(major, minor) + LazyRawStreamItem::::VersionMarker(marker) => { + LazyRawStreamItem::::VersionMarker(marker.into()) } LazyRawStreamItem::::Value(value) => { LazyRawStreamItem::::Value(value.into()) @@ -499,8 +600,8 @@ impl<'top> From> LazyRawStreamItem::::EExpression(_) => { unreachable!("Ion 1.0 does not support macro invocations") } - LazyRawStreamItem::::EndOfStream => { - LazyRawStreamItem::::EndOfStream + LazyRawStreamItem::::EndOfStream(end) => { + LazyRawStreamItem::::EndOfStream(end) } } } @@ -511,8 +612,8 @@ impl<'top> From> { fn from(value: LazyRawStreamItem<'top, TextEncoding_1_1>) -> Self { match value { - LazyRawStreamItem::::VersionMarker(major, minor) => { - LazyRawStreamItem::::VersionMarker(major, minor) + LazyRawStreamItem::::VersionMarker(marker) => { + LazyRawStreamItem::::VersionMarker(marker.into()) } LazyRawStreamItem::::Value(value) => { LazyRawStreamItem::::Value(value.into()) @@ -522,8 +623,8 @@ impl<'top> From> encoding: LazyRawAnyEExpressionKind::Text_1_1(invocation), }) } - LazyRawStreamItem::::EndOfStream => { - LazyRawStreamItem::::EndOfStream + LazyRawStreamItem::::EndOfStream(end) => { + LazyRawStreamItem::::EndOfStream(end) } } } @@ -534,8 +635,8 @@ impl<'top> From> { fn from(value: LazyRawStreamItem<'top, BinaryEncoding_1_1>) -> Self { match value { - LazyRawStreamItem::::VersionMarker(major, minor) => { - LazyRawStreamItem::::VersionMarker(major, minor) + LazyRawStreamItem::::VersionMarker(marker) => { + LazyRawStreamItem::::VersionMarker(marker.into()) } LazyRawStreamItem::::Value(value) => { LazyRawStreamItem::::Value(value.into()) @@ -543,21 +644,33 @@ impl<'top> From> LazyRawStreamItem::::EExpression(_) => { todo!("Macro invocations not yet implemented in binary 1.1") } - LazyRawStreamItem::::EndOfStream => { - LazyRawStreamItem::::EndOfStream + LazyRawStreamItem::::EndOfStream(end) => { + LazyRawStreamItem::::EndOfStream(end) } } } } -impl<'top> LazyRawValuePrivate<'top> for LazyRawAnyValue<'top> { - fn field_name(&self) -> IonResult> { +impl<'top> HasSpan<'top> for LazyRawAnyValue<'top> { + fn span(&self) -> Span<'top> { + use LazyRawValueKind::*; + match &self.encoding { + Text_1_0(v) => v.span(), + Binary_1_0(v) => v.span(), + Text_1_1(v) => v.span(), + Binary_1_1(v) => v.span(), + } + } +} + +impl<'top> HasRange for LazyRawAnyValue<'top> { + fn range(&self) -> Range { use LazyRawValueKind::*; match &self.encoding { - Text_1_0(v) => v.field_name(), - Binary_1_0(v) => v.field_name(), - Text_1_1(v) => v.field_name(), - Binary_1_1(v) => v.field_name(), + Text_1_0(v) => v.range(), + Binary_1_0(v) => v.range(), + Text_1_1(v) => v.range(), + Binary_1_1(v) => v.range(), } } } @@ -610,26 +723,6 @@ impl<'top> LazyRawValue<'top, AnyEncoding> for LazyRawAnyValue<'top> { Binary_1_1(v) => Ok(v.read()?.into()), } } - - fn range(&self) -> Range { - use LazyRawValueKind::*; - match &self.encoding { - Text_1_0(v) => v.range(), - Binary_1_0(v) => v.range(), - Text_1_1(v) => v.range(), - Binary_1_1(v) => v.range(), - } - } - - fn span(&self) -> &[u8] { - use LazyRawValueKind::*; - match &self.encoding { - Text_1_0(v) => v.span(), - Binary_1_0(v) => v.span(), - Text_1_1(v) => v.span(), - Binary_1_1(v) => v.span(), - } - } } // ===== Annotations ===== @@ -665,6 +758,24 @@ pub struct LazyRawAnyList<'top> { encoding: LazyRawListKind<'top>, } +impl<'top> LazyRawAnyList<'top> { + pub fn as_value(&self) -> LazyRawAnyValue<'top> { + use LazyRawListKind::*; + match self.encoding { + Text_1_0(_) => todo!(), + Binary_1_0(s) => s.as_value().into(), + Text_1_1(_) => todo!(), + Binary_1_1(_) => todo!(), + } + } +} + +impl<'top> LazyRawAnyList<'top> { + pub fn kind(&self) -> LazyRawListKind<'top> { + self.encoding + } +} + #[derive(Debug, Copy, Clone)] pub enum LazyRawListKind<'top> { Text_1_0(LazyRawTextList_1_0<'top>), @@ -724,6 +835,17 @@ impl<'data> Iterator for RawAnyListIterator<'data> { } } +impl<'top> LazyRawContainer<'top, AnyEncoding> for LazyRawAnyList<'top> { + fn as_value(&self) -> ::Value<'top> { + match &self.encoding { + LazyRawListKind::Text_1_0(s) => s.as_value().into(), + LazyRawListKind::Binary_1_0(s) => s.as_value().into(), + LazyRawListKind::Text_1_1(s) => s.as_value().into(), + LazyRawListKind::Binary_1_1(s) => s.as_value().into(), + } + } +} + impl<'top> LazyRawSequence<'top, AnyEncoding> for LazyRawAnyList<'top> { type Iterator = RawAnyListIterator<'top>; @@ -756,15 +878,6 @@ impl<'top> LazyRawSequence<'top, AnyEncoding> for LazyRawAnyList<'top> { }, } } - - fn as_value(&self) -> LazyRawAnyValue<'top> { - match &self.encoding { - LazyRawListKind::Text_1_0(s) => s.as_value().into(), - LazyRawListKind::Binary_1_0(s) => s.as_value().into(), - LazyRawListKind::Text_1_1(s) => s.as_value().into(), - LazyRawListKind::Binary_1_1(s) => s.as_value().into(), - } - } } impl<'data> From> for LazyRawAnyList<'data> { @@ -806,6 +919,12 @@ pub struct LazyRawAnySExp<'data> { encoding: LazyRawSExpKind<'data>, } +impl<'top> LazyRawAnySExp<'top> { + pub fn kind(&self) -> LazyRawSExpKind<'top> { + self.encoding + } +} + #[derive(Debug, Copy, Clone)] pub enum LazyRawSExpKind<'data> { Text_1_0(LazyRawTextSExp_1_0<'data>), @@ -814,6 +933,18 @@ pub enum LazyRawSExpKind<'data> { Binary_1_1(LazyRawBinarySExp_1_1<'data>), } +impl<'top> LazyRawContainer<'top, AnyEncoding> for LazyRawAnySExp<'top> { + fn as_value(&self) -> ::Value<'top> { + use LazyRawSExpKind::*; + match self.encoding { + Text_1_0(s) => s.as_value().into(), + Binary_1_0(s) => s.as_value().into(), + Text_1_1(s) => s.as_value().into(), + Binary_1_1(s) => s.as_value().into(), + } + } +} + impl<'data> LazyContainerPrivate<'data, AnyEncoding> for LazyRawAnySExp<'data> { fn from_value(value: LazyRawAnyValue<'data>) -> Self { match value.encoding { @@ -897,15 +1028,6 @@ impl<'top> LazyRawSequence<'top, AnyEncoding> for LazyRawAnySExp<'top> { }, } } - - fn as_value(&self) -> LazyRawAnyValue<'top> { - match &self.encoding { - LazyRawSExpKind::Text_1_0(s) => (s.as_value()).into(), - LazyRawSExpKind::Binary_1_0(s) => (s.as_value()).into(), - LazyRawSExpKind::Text_1_1(s) => (s.as_value()).into(), - LazyRawSExpKind::Binary_1_1(s) => (s.as_value()).into(), - } - } } impl<'data> From> for LazyRawAnySExp<'data> { @@ -955,6 +1077,96 @@ pub enum LazyRawStructKind<'data> { Binary_1_1(LazyRawBinaryStruct_1_1<'data>), } +impl<'top> LazyRawContainer<'top, AnyEncoding> for LazyRawAnyStruct<'top> { + fn as_value(&self) -> ::Value<'top> { + match self.encoding { + LazyRawStructKind::Text_1_0(s) => s.as_value().into(), + LazyRawStructKind::Binary_1_0(s) => s.as_value().into(), + LazyRawStructKind::Text_1_1(s) => s.as_value().into(), + LazyRawStructKind::Binary_1_1(s) => s.as_value().into(), + } + } +} + +#[derive(Debug, Copy, Clone)] +pub struct LazyRawAnyFieldName<'data> { + encoding: LazyRawFieldNameKind<'data>, +} + +#[derive(Debug, Copy, Clone)] +pub enum LazyRawFieldNameKind<'data> { + Text_1_0(LazyRawTextFieldName_1_0<'data>), + Binary_1_0(LazyRawBinaryFieldName_1_0<'data>), + Text_1_1(LazyRawTextFieldName_1_1<'data>), + Binary_1_1(LazyRawBinaryFieldName_1_1<'data>), +} + +impl<'top> HasSpan<'top> for LazyRawAnyFieldName<'top> { + fn span(&self) -> Span<'top> { + use LazyRawFieldNameKind::*; + match self.encoding { + Text_1_0(name) => name.span(), + Binary_1_0(name) => name.span(), + Text_1_1(name) => name.span(), + Binary_1_1(name) => name.span(), + } + } +} + +impl<'top> HasRange for LazyRawAnyFieldName<'top> { + fn range(&self) -> Range { + use LazyRawFieldNameKind::*; + match self.encoding { + Text_1_0(name) => name.range(), + Binary_1_0(name) => name.range(), + Text_1_1(name) => name.range(), + Binary_1_1(name) => name.range(), + } + } +} + +impl<'top> LazyRawFieldName<'top> for LazyRawAnyFieldName<'top> { + fn read(&self) -> IonResult> { + use LazyRawFieldNameKind::*; + match self.encoding { + Text_1_0(name) => name.read(), + Binary_1_0(name) => name.read(), + Text_1_1(name) => name.read(), + Binary_1_1(name) => name.read(), + } + } +} + +impl<'top> From> for LazyRawAnyFieldName<'top> { + fn from(value: LazyRawFieldNameKind<'top>) -> Self { + LazyRawAnyFieldName { encoding: value } + } +} + +impl<'top> From> for LazyRawAnyFieldName<'top> { + fn from(value: LazyRawTextFieldName_1_0<'top>) -> Self { + LazyRawFieldNameKind::Text_1_0(value).into() + } +} + +impl<'top> From> for LazyRawAnyFieldName<'top> { + fn from(value: LazyRawTextFieldName_1_1<'top>) -> Self { + LazyRawFieldNameKind::Text_1_1(value).into() + } +} + +impl<'top> From> for LazyRawAnyFieldName<'top> { + fn from(value: LazyRawBinaryFieldName_1_0<'top>) -> Self { + LazyRawFieldNameKind::Binary_1_0(value).into() + } +} + +impl<'top> From> for LazyRawAnyFieldName<'top> { + fn from(value: LazyRawBinaryFieldName_1_1<'top>) -> Self { + LazyRawFieldNameKind::Binary_1_1(value).into() + } +} + pub struct RawAnyStructIterator<'data> { encoding: RawAnyStructIteratorKind<'data>, } @@ -991,14 +1203,12 @@ impl<'data> From> for LazyRawFieldExpr<'data, AnyEncoding> { fn from(text_field: LazyRawFieldExpr<'data, TextEncoding_1_0>) -> Self { - let (name, value) = match text_field { - RawFieldExpr::NameValuePair(name, value) => (name, value), - RawFieldExpr::MacroInvocation(_) => { - unreachable!("macro invocation in Ion 1.0") - } - }; - // Convert the text-encoded value into an any-encoded value - RawFieldExpr::NameValuePair(name, value.into()) + use LazyRawFieldExpr::*; + match text_field { + NameValue(name, value) => NameValue(name.into(), value.into()), + NameEExp(_, _) => unreachable!("(name, e-exp) field in text Ion 1.0"), + EExp(_) => unreachable!("e-exp field in text Ion 1.0"), + } } } @@ -1006,14 +1216,12 @@ impl<'data> From> for LazyRawFieldExpr<'data, AnyEncoding> { fn from(binary_field: LazyRawFieldExpr<'data, BinaryEncoding_1_0>) -> Self { - let (name, value) = match binary_field { - RawFieldExpr::NameValuePair(name, value) => (name, value), - RawFieldExpr::MacroInvocation(_) => { - unreachable!("macro invocation in Ion 1.0") - } - }; - // Convert the binary-encoded value into an any-encoded value - RawFieldExpr::NameValuePair(name, value.into()) + use LazyRawFieldExpr::*; + match binary_field { + NameValue(name, value) => NameValue(name.into(), value.into()), + NameEExp(_, _) => unreachable!("(name, e-exp) field in binary Ion 1.0"), + EExp(_) => unreachable!("e-exp field in binary Ion 1.0"), + } } } @@ -1021,16 +1229,11 @@ impl<'data> From> for LazyRawFieldExpr<'data, AnyEncoding> { fn from(text_field: LazyRawFieldExpr<'data, TextEncoding_1_1>) -> Self { - use RawFieldExpr::{MacroInvocation as FieldMacroInvocation, NameValuePair}; - use RawValueExpr::{MacroInvocation as ValueMacroInvocation, ValueLiteral}; + use LazyRawFieldExpr::*; match text_field { - NameValuePair(name, ValueLiteral(value)) => { - NameValuePair(name, ValueLiteral(value.into())) - } - NameValuePair(name, ValueMacroInvocation(invocation)) => { - NameValuePair(name, ValueMacroInvocation(invocation.into())) - } - FieldMacroInvocation(invocation) => FieldMacroInvocation(invocation.into()), + NameValue(name, value) => NameValue(name.into(), value.into()), + NameEExp(name, eexp) => NameEExp(name.into(), eexp.into()), + EExp(eexp) => EExp(eexp.into()), } } } @@ -1039,14 +1242,12 @@ impl<'data> From> for LazyRawFieldExpr<'data, AnyEncoding> { fn from(binary_field: LazyRawFieldExpr<'data, BinaryEncoding_1_1>) -> Self { - let (name, value) = match binary_field { - RawFieldExpr::NameValuePair(name, value) => (name, value), - RawFieldExpr::MacroInvocation(_) => { - todo!("macro invocation in Ion 1.1 binary not implemented") - } - }; - // Convert the binary-encoded value into an any-encoded value - RawFieldExpr::NameValuePair(name, value.into()) + use LazyRawFieldExpr::*; + match binary_field { + NameValue(name, value) => NameValue(name.into(), value.into()), + NameEExp(_, _) => todo!("(name, e-exp) field in binary Ion 1.1"), + EExp(_) => todo!("e-exp field in binary Ion 1.1"), + } } } @@ -1165,7 +1366,7 @@ mod tests { let allocator = BumpAllocator::new(); let mut reader = LazyRawAnyReader::new(data); - assert_eq!(reader.next(&allocator)?.expect_ivm()?, (1, 0)); + assert_eq!(reader.next(&allocator)?.expect_ivm()?.version(), (1, 0)); let _strukt = reader .next(&allocator)? .expect_value()? @@ -1211,7 +1412,7 @@ mod tests { assert!(matches!( reader.next(&allocator)?, - LazyRawStreamItem::::EndOfStream + LazyRawStreamItem::::EndOfStream(_) )); Ok(()) } diff --git a/src/lazy/binary/encoded_value.rs b/src/lazy/binary/encoded_value.rs index 6510c527..f2112c49 100644 --- a/src/lazy/binary/encoded_value.rs +++ b/src/lazy/binary/encoded_value.rs @@ -1,5 +1,4 @@ use crate::lazy::binary::raw::type_descriptor::Header; -use crate::types::SymbolId; use crate::IonType; use std::ops::Range; @@ -53,33 +52,29 @@ pub(crate) struct EncodedValue { // and IonType. pub(crate) header: HeaderType, - // Each encoded value has up to five components, appearing in the following order: + // Each encoded value has up to four components, appearing in the following order: // - // [ field_id? | annotations? | header (type descriptor) | header_length? | value ] + // [ annotations? | header (type descriptor) | header_length? | value_body ] // // Components shown with a `?` are optional. // // EncodedValue stores the offset of the type descriptor byte from the beginning of the // data source (`header_offset`). The lengths of the other fields can be used to calculate // their positions relative to the type descriptor byte. For example, to find the offset of the - // field ID (if present), we can do: - // header_offset - annotations_header_length - field_id_length + // annotations header (if present), we can do: + // header_offset - annotations_header_length // // This allows us to store a single `usize` for the header offset, while other lengths can be - // packed into a `u8`. Values are not permitted to have a field ID or annotations that take - // more than 255 bytes to represent. + // packed into a `u8`. In this implementation, values are not permitted to have annotations that + // take more than 255 bytes to represent. // // We store the offset for the header byte because it is guaranteed to be present for all values. - // Field IDs and annotations appear earlier in the stream but are optional. - - // The number of bytes used to encode the field ID (if present) preceding the Ion value. If - // `field_id` is undefined, `field_id_length` will be zero. - pub field_id_length: u8, - // If this value is inside a struct, `field_id` will contain the SymbolId that represents - // its field name. - pub field_id: Option, + // Annotations appear earlier in the stream but are optional. + // The number of bytes used to encode the annotations wrapper (if present) preceding the Ion - // value. If `annotations` is empty, `annotations_header_length` will be zero. + // value. If `annotations` is empty, `annotations_header_length` will be zero. The annotations + // wrapper contains several fields: an opcode, a wrapper length, a sequence length, and the + // sequence itself. pub annotations_header_length: u8, // The number of bytes used to encode the series of symbol IDs inside the annotations wrapper. pub annotations_sequence_length: u8, @@ -89,9 +84,9 @@ pub(crate) struct EncodedValue { pub length_length: u8, // The number of bytes used to encode the value itself, not including the header byte // or length fields. - pub value_length: usize, + pub value_body_length: usize, // The sum total of: - // field_id_length + annotations_header_length + header_length + value_length + // annotations_header_length + header_length + value_length // While this can be derived from the above fields, storing it for reuse offers a modest // optimization. `total_length` is needed when stepping into a value, skipping a value, // and reading a value's data. @@ -127,53 +122,27 @@ impl EncodedValue { /// If the value can fit in the type descriptor byte (e.g. `true`, `false`, `null`, `0`), /// this function will return 0. #[inline(always)] - pub fn value_length(&self) -> usize { - self.value_length + pub fn value_body_length(&self) -> usize { + self.value_body_length } /// The offset of the first byte following the header (including length bytes, if present). /// If `value_length()` returns zero, this offset is actually the first byte of /// the next encoded value and should not be read. - pub fn value_offset(&self) -> usize { + pub fn value_body_offset(&self) -> usize { self.header_offset + self.header_length() } /// Returns an offset Range containing any bytes following the header. - pub fn value_range(&self) -> Range { - let start = self.value_offset(); - let end = start + self.value_length; + pub fn value_body_range(&self) -> Range { + let start = self.value_body_offset(); + let end = start + self.value_body_length; start..end } /// Returns the index of the first byte that is beyond the end of the current value's encoding. pub fn value_end_exclusive(&self) -> usize { - self.value_offset() + self.value_length - } - - /// Returns the number of bytes used to encode this value's field ID, if present. - pub fn field_id_length(&self) -> Option { - self.field_id.as_ref()?; - Some(self.field_id_length as usize) - } - - /// Returns the offset of the first byte used to encode this value's field ID, if present. - pub fn field_id_offset(&self) -> Option { - self.field_id.as_ref()?; - Some( - self.header_offset - - self.annotations_header_length as usize - - self.field_id_length as usize, - ) - } - - /// Returns an offset Range that contains the bytes used to encode this value's field ID, - /// if present. - pub fn field_id_range(&self) -> Option> { - if let Some(start) = self.field_id_offset() { - let end = start + self.field_id_length as usize; - return Some(start..end); - } - None + self.value_body_offset() + self.value_body_length } /// Returns true if this encoded value has an annotations wrapper. @@ -233,20 +202,28 @@ impl EncodedValue { None } - /// Returns the total number of bytes used to represent the current value, including the - /// field ID (if any), its annotations (if any), its header (type descriptor + length bytes), - /// and its value. + /// Returns the total number of bytes used to represent the current value, including + /// its annotations (if any), its header (type descriptor + length bytes), and the body of + /// the value. pub fn total_length(&self) -> usize { self.total_length } /// The offset Range (starting from the beginning of the stream) that contains this value's - /// complete encoding, including annotations. (It does not include the leading field ID, if - /// any.) + /// complete encoding, including annotations. pub fn annotated_value_range(&self) -> Range { - // [ field_id? | annotations? | header (type descriptor) | header_length? | value ] + // [ annotations? | header (type descriptor) | header_length? | value ] + let start = self.header_offset - self.annotations_header_length as usize; + let end = start + self.total_length; + start..end + } + + /// The offset Range (starting from the beginning of the stream) that contains this value's + /// complete encoding, not including any annotations. + pub fn unannotated_value_range(&self) -> Range { + // [ annotations? | header (type descriptor) | header_length? | value ] let start = self.header_offset - self.annotations_header_length as usize; - let end = start - self.field_id_length as usize + self.total_length; + let end = start + self.total_length; start..end } @@ -264,20 +241,18 @@ mod tests { #[test] fn accessors() -> IonResult<()> { - // 3-byte String with 1-byte annotation and field ID $10 + // 3-byte String with 1-byte annotation let value = EncodedValue { header: Header { ion_type: IonType::String, ion_type_code: IonTypeCode::String, length_code: 3, }, - field_id_length: 1, - field_id: Some(10), annotations_header_length: 3, annotations_sequence_length: 1, header_offset: 200, length_length: 0, - value_length: 3, + value_body_length: 3, total_length: 7, }; assert_eq!(value.ion_type(), IonType::String); @@ -292,18 +267,15 @@ mod tests { assert_eq!(value.header_offset(), 200); assert_eq!(value.header_length(), 1); assert_eq!(value.header_range(), 200..201); - assert_eq!(value.field_id_length(), Some(1)); - assert_eq!(value.field_id_offset(), Some(196)); - assert_eq!(value.field_id_range(), Some(196..197)); assert!(value.has_annotations()); assert_eq!(value.annotations_range(), Some(197..200)); assert_eq!(value.annotations_header_length(), Some(3)); assert_eq!(value.annotations_sequence_offset(), Some(199)); assert_eq!(value.annotations_sequence_length(), Some(1)); assert_eq!(value.annotations_sequence_range(), Some(199..200)); - assert_eq!(value.value_length(), 3); - assert_eq!(value.value_offset(), 201); - assert_eq!(value.value_range(), 201..204); + assert_eq!(value.value_body_length(), 3); + assert_eq!(value.value_body_offset(), 201); + assert_eq!(value.value_body_range(), 201..204); assert_eq!(value.value_end_exclusive(), 204); assert_eq!(value.total_length(), 7); Ok(()) diff --git a/src/lazy/binary/immutable_buffer.rs b/src/lazy/binary/immutable_buffer.rs index 6f0c29bd..3a02fad5 100644 --- a/src/lazy/binary/immutable_buffer.rs +++ b/src/lazy/binary/immutable_buffer.rs @@ -1,19 +1,25 @@ +use std::fmt::{Debug, Formatter}; +use std::mem; +use std::ops::Range; + +use num_bigint::{BigInt, BigUint, Sign}; + use crate::binary::constants::v1_0::{length_codes, IVM}; use crate::binary::int::DecodedInt; use crate::binary::uint::DecodedUInt; use crate::binary::var_int::VarInt; use crate::binary::var_uint::VarUInt; use crate::lazy::binary::encoded_value::EncodedValue; +use crate::lazy::binary::raw::r#struct::LazyRawBinaryFieldName_1_0; use crate::lazy::binary::raw::type_descriptor::{Header, TypeDescriptor, ION_1_0_TYPE_DESCRIPTORS}; -use crate::lazy::binary::raw::value::LazyRawBinaryValue_1_0; +use crate::lazy::binary::raw::value::{LazyRawBinaryValue_1_0, LazyRawBinaryVersionMarker_1_0}; +use crate::lazy::decoder::LazyRawFieldExpr; use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; use crate::lazy::encoder::binary::v1_1::flex_uint::FlexUInt; +use crate::lazy::encoding::BinaryEncoding_1_0; use crate::result::IonFailure; use crate::types::UInt; use crate::{Int, IonError, IonResult, IonType}; -use num_bigint::{BigInt, BigUint, Sign}; -use std::fmt::{Debug, Formatter}; -use std::mem; // This limit is used for stack-allocating buffer space to encode/decode UInts. const UINT_STACK_BUFFER_SIZE: usize = 16; @@ -69,7 +75,7 @@ impl<'a> ImmutableBuffer<'a> { } /// Returns a slice containing all of the buffer's bytes. - pub fn bytes(&self) -> &[u8] { + pub fn bytes(&self) -> &'a [u8] { self.data } @@ -100,6 +106,10 @@ impl<'a> ImmutableBuffer<'a> { self.data.len() } + pub fn range(&self) -> Range { + self.offset..self.offset + self.len() + } + /// Returns `true` if there are no bytes in the buffer. Otherwise, returns `false`. pub fn is_empty(&self) -> bool { self.data.is_empty() @@ -143,15 +153,16 @@ impl<'a> ImmutableBuffer<'a> { /// returns an `Ok(_)` containing a `(major, minor)` version tuple. /// /// See: - pub fn read_ivm(self) -> ParseResult<'a, (u8, u8)> { + pub fn read_ivm(self) -> ParseResult<'a, LazyRawBinaryVersionMarker_1_0<'a>> { let bytes = self .peek_n_bytes(IVM.len()) .ok_or_else(|| IonError::incomplete("an IVM", self.offset()))?; match bytes { [0xE0, major, minor, 0xEA] => { - let version = (*major, *minor); - Ok((version, self.consume(IVM.len()))) + let matched = ImmutableBuffer::new_with_offset(bytes, self.offset); + let marker = LazyRawBinaryVersionMarker_1_0::new(matched, *major, *minor); + Ok((marker, self.consume(IVM.len()))) } invalid_ivm => IonResult::decoding_error(format!("invalid IVM: {invalid_ivm:?}")), } @@ -607,7 +618,7 @@ impl<'a> ImmutableBuffer<'a> { } /// Reads a field ID and a value from the buffer. - pub(crate) fn peek_field(self) -> IonResult>> { + pub(crate) fn peek_field(self) -> IonResult>> { let mut input = self; if self.is_empty() { // We're at the end of the struct @@ -625,7 +636,7 @@ impl<'a> ImmutableBuffer<'a> { let mut type_descriptor = input_after_field_id.peek_type_descriptor()?; if type_descriptor.is_nop() { // Read past NOP fields until we find the first one that's an actual value - // or we run out of struct bytes. Note that we read the NOP field(s) from `self` (the + // or we run out of struct bytes. Note that we read the NOP field(s) from `input` (the // initial input) rather than `input_after_field_id` because it simplifies // the logic of `read_struct_field_nop_pad()`, which is very rarely called. (field_id_var_uint, input_after_field_id) = match input.read_struct_field_nop_pad()? { @@ -643,15 +654,12 @@ impl<'a> ImmutableBuffer<'a> { }; } - let field_id_length = field_id_var_uint.size_in_bytes() as u8; let field_id = field_id_var_uint.value(); + let matched_field_id = input.slice(0, field_id_var_uint.size_in_bytes()); + let field_name = LazyRawBinaryFieldName_1_0::new(field_id, matched_field_id); - let mut value = input_after_field_id.read_value(type_descriptor)?; - value.encoded_value.field_id = Some(field_id); - value.encoded_value.field_id_length = field_id_length; - value.encoded_value.total_length += field_id_length as usize; - value.input = input; - Ok(Some(value)) + let field_value = input_after_field_id.read_value(type_descriptor)?; + Ok(Some(LazyRawFieldExpr::NameValue(field_name, field_value))) } #[cold] @@ -745,15 +753,12 @@ impl<'a> ImmutableBuffer<'a> { let encoded_value = EncodedValue { header, - // If applicable, these are populated by the caller: `peek_field()` - field_id_length: 0, - field_id: None, // If applicable, these are populated by the caller: `read_annotated_value()` annotations_header_length: 0, annotations_sequence_length: 0, header_offset, length_length, - value_length, + value_body_length: value_length, total_length, }; let lazy_value = LazyRawBinaryValue_1_0 { @@ -810,10 +815,12 @@ pub struct AnnotationsWrapper { #[cfg(test)] mod tests { - use super::*; - use crate::IonError; use num_traits::Num; + use crate::IonError; + + use super::*; + fn input_test>(input: A) { let input = ImmutableBuffer::new(input.as_ref()); // We can peek at the first byte... diff --git a/src/lazy/binary/raw/reader.rs b/src/lazy/binary/raw/reader.rs index a7e5a986..cfaac215 100644 --- a/src/lazy/binary/raw/reader.rs +++ b/src/lazy/binary/raw/reader.rs @@ -2,9 +2,11 @@ use crate::lazy::binary::immutable_buffer::ImmutableBuffer; use crate::lazy::binary::raw::value::LazyRawBinaryValue_1_0; -use crate::lazy::decoder::{LazyDecoder, LazyRawReader}; +use crate::lazy::decoder::{ + HasRange, LazyDecoder, LazyRawFieldExpr, LazyRawReader, RawVersionMarker, +}; use crate::lazy::encoding::BinaryEncoding_1_0; -use crate::lazy::raw_stream_item::{LazyRawStreamItem, RawStreamItem}; +use crate::lazy::raw_stream_item::{EndPosition, LazyRawStreamItem, RawStreamItem}; use crate::result::IonFailure; use crate::IonResult; @@ -40,16 +42,18 @@ impl<'data> LazyRawBinaryReader_1_0<'data> { where 'data: 'top, { - let ((major, minor), _buffer_after_ivm) = buffer.read_ivm()?; + let (marker, _buffer_after_ivm) = buffer.read_ivm()?; + let (major, minor) = marker.version(); if (major, minor) != (1, 0) { return IonResult::decoding_error(format!( - "unsupported version of Ion: v{}.{}; only 1.0 is supported", - major, minor, + "unsupported version of Ion: v{major}.{minor}; only 1.0 is supported" )); } self.data.buffer = buffer; self.data.bytes_to_skip = 4; // IVM length - Ok(LazyRawStreamItem::::VersionMarker(1, 0)) + Ok(LazyRawStreamItem::::VersionMarker( + marker, + )) } fn read_value<'top>( @@ -61,7 +65,11 @@ impl<'data> LazyRawBinaryReader_1_0<'data> { { let lazy_value = match ImmutableBuffer::peek_sequence_value(buffer)? { Some(lazy_value) => lazy_value, - None => return Ok(LazyRawStreamItem::::EndOfStream), + None => { + return Ok(LazyRawStreamItem::::EndOfStream( + EndPosition::new(self.position()), + )) + } }; self.data.buffer = buffer; self.data.bytes_to_skip = lazy_value.encoded_value.total_length(); @@ -75,7 +83,9 @@ impl<'data> LazyRawBinaryReader_1_0<'data> { // Get a new buffer view starting beyond the last item we returned. let mut buffer = self.data.advance_to_next_item()?; if buffer.is_empty() { - return Ok(LazyRawStreamItem::::EndOfStream); + return Ok(LazyRawStreamItem::::EndOfStream( + EndPosition::new(self.position()), + )); } // Peek at the first byte in the new buffer view let mut type_descriptor = buffer.peek_type_descriptor()?; @@ -84,7 +94,9 @@ impl<'data> LazyRawBinaryReader_1_0<'data> { // ...advance until we find something that isn't a nop. (_, buffer) = buffer.consume_nop_padding(type_descriptor)?; if buffer.is_empty() { - return Ok(LazyRawStreamItem::::EndOfStream); + return Ok(LazyRawStreamItem::::EndOfStream( + EndPosition::new(buffer.offset()), + )); } type_descriptor = buffer.peek_type_descriptor()?; } @@ -168,7 +180,7 @@ impl<'data> DataSource<'data> { /// If it succeeds, marks the `DataSource` as ready to advance by the 'n' bytes /// that were consumed. /// If it does not succeed, the `DataSource` remains unchanged. - pub(crate) fn try_parse_next< + pub(crate) fn try_parse_next_value< F: Fn(ImmutableBuffer<'data>) -> IonResult>>, >( &mut self, @@ -188,12 +200,41 @@ impl<'data> DataSource<'data> { self.bytes_to_skip = lazy_value.encoded_value.total_length(); Ok(Some(lazy_value)) } + + /// Runs the provided parsing function on this DataSource's buffer. + /// If it succeeds, marks the `DataSource` as ready to advance by the 'n' bytes + /// that were consumed. + /// If it does not succeed, the `DataSource` remains unchanged. + pub(crate) fn try_parse_next_field< + F: Fn( + ImmutableBuffer<'data>, + ) -> IonResult>>, + >( + &mut self, + parser: F, + ) -> IonResult>> { + let buffer = self.advance_to_next_item()?; + + let field = match parser(buffer) { + Ok(Some(output)) => output, + Ok(None) => return Ok(None), + Err(e) => return Err(e), + }; + + // If the field name we read doesn't start where we began reading, there was a NOP field. + let field_range = field.range(); + let num_nop_bytes = field_range.start - buffer.offset(); + self.buffer = buffer.consume(num_nop_bytes); + self.bytes_to_skip = field_range.end - self.buffer.offset(); + Ok(Some(field)) + } } #[cfg(test)] mod tests { use crate::lazy::binary::raw::reader::LazyRawBinaryReader_1_0; use crate::lazy::binary::test_utilities::to_binary_ion; + use crate::lazy::decoder::{LazyRawFieldName, RawVersionMarker}; use crate::lazy::raw_stream_item::RawStreamItem; use crate::raw_symbol_token_ref::AsRawSymbolTokenRef; use crate::{IonResult, IonType, RawSymbolTokenRef}; @@ -212,7 +253,7 @@ mod tests { let lazy_struct = value.read()?.expect_struct()?; let mut fields = lazy_struct.iter(); let (name, _value) = fields.next().expect("field 1")?.expect_name_value()?; - assert_eq!(name, 4.as_raw_symbol_token_ref()); // 'name' + assert_eq!(name.read()?, 4.as_raw_symbol_token_ref()); // 'name' Ok(()) } @@ -272,11 +313,11 @@ mod tests { loop { use RawStreamItem::*; match reader.next()? { - VersionMarker(major, minor) => { - println!("IVM: v{}.{}", major, minor) + VersionMarker(marker) => { + println!("IVM: v{}.{}", marker.major(), marker.minor()) } Value(value) => println!("{:?}", value.read()?), - EndOfStream => break, + EndOfStream(_) => break, EExpression(_) => unreachable!("No macros in Ion 1.0"), } } diff --git a/src/lazy/binary/raw/sequence.rs b/src/lazy/binary/raw/sequence.rs index bec9079d..1480190f 100644 --- a/src/lazy/binary/raw/sequence.rs +++ b/src/lazy/binary/raw/sequence.rs @@ -5,7 +5,9 @@ use crate::lazy::binary::raw::annotations_iterator::RawBinaryAnnotationsIterator use crate::lazy::binary::raw::reader::DataSource; use crate::lazy::binary::raw::value::LazyRawBinaryValue_1_0; use crate::lazy::decoder::private::LazyContainerPrivate; -use crate::lazy::decoder::{LazyRawSequence, LazyRawValueExpr, RawValueExpr}; +use crate::lazy::decoder::{ + LazyDecoder, LazyRawContainer, LazyRawSequence, LazyRawValueExpr, RawValueExpr, +}; use crate::lazy::encoding::BinaryEncoding_1_0; use crate::{IonResult, IonType}; use std::fmt::{Debug, Formatter}; @@ -15,6 +17,12 @@ pub struct LazyRawBinaryList_1_0<'top> { pub(crate) sequence: LazyRawBinarySequence_1_0<'top>, } +impl<'top> LazyRawBinaryList_1_0<'top> { + pub fn as_value(&self) -> LazyRawBinaryValue_1_0<'top> { + self.sequence.value + } +} + #[derive(Debug, Copy, Clone)] pub struct LazyRawBinarySExp_1_0<'top> { pub(crate) sequence: LazyRawBinarySequence_1_0<'top>, @@ -28,6 +36,12 @@ impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_0> for LazyRawBinaryList_ } } +impl<'top> LazyRawContainer<'top, BinaryEncoding_1_0> for LazyRawBinaryList_1_0<'top> { + fn as_value(&self) -> ::Value<'top> { + self.sequence.value + } +} + impl<'top> LazyRawSequence<'top, BinaryEncoding_1_0> for LazyRawBinaryList_1_0<'top> { type Iterator = RawBinarySequenceIterator_1_0<'top>; @@ -42,10 +56,6 @@ impl<'top> LazyRawSequence<'top, BinaryEncoding_1_0> for LazyRawBinaryList_1_0<' fn iter(&self) -> Self::Iterator { self.sequence.iter() } - - fn as_value(&self) -> LazyRawBinaryValue_1_0<'top> { - self.sequence.value - } } impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_0> for LazyRawBinarySExp_1_0<'top> { @@ -56,6 +66,12 @@ impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_0> for LazyRawBinarySExp_ } } +impl<'top> LazyRawContainer<'top, BinaryEncoding_1_0> for LazyRawBinarySExp_1_0<'top> { + fn as_value(&self) -> ::Value<'top> { + self.sequence.value + } +} + impl<'top> LazyRawSequence<'top, BinaryEncoding_1_0> for LazyRawBinarySExp_1_0<'top> { type Iterator = RawBinarySequenceIterator_1_0<'top>; @@ -70,10 +86,6 @@ impl<'top> LazyRawSequence<'top, BinaryEncoding_1_0> for LazyRawBinarySExp_1_0<' fn iter(&self) -> Self::Iterator { self.sequence.iter() } - - fn as_value(&self) -> LazyRawBinaryValue_1_0<'top> { - self.sequence.value - } } #[derive(Copy, Clone)] @@ -145,7 +157,7 @@ impl<'top> Iterator for RawBinarySequenceIterator_1_0<'top> { fn next(&mut self) -> Option { match self .source - .try_parse_next(ImmutableBuffer::peek_sequence_value) + .try_parse_next_value(ImmutableBuffer::peek_sequence_value) { Ok(Some(value)) => Some(Ok(RawValueExpr::ValueLiteral(value))), Ok(None) => None, diff --git a/src/lazy/binary/raw/struct.rs b/src/lazy/binary/raw/struct.rs index 5f58feaa..e181cd62 100644 --- a/src/lazy/binary/raw/struct.rs +++ b/src/lazy/binary/raw/struct.rs @@ -2,25 +2,32 @@ use std::fmt; use std::fmt::{Debug, Formatter}; +use std::ops::Range; use crate::lazy::binary::immutable_buffer::ImmutableBuffer; use crate::lazy::binary::raw::annotations_iterator::RawBinaryAnnotationsIterator; use crate::lazy::binary::raw::reader::DataSource; use crate::lazy::binary::raw::value::LazyRawBinaryValue_1_0; -use crate::lazy::decoder::private::{ - LazyContainerPrivate, LazyRawFieldPrivate, LazyRawValuePrivate, -}; +use crate::lazy::decoder::private::LazyContainerPrivate; use crate::lazy::decoder::{ - LazyRawField, LazyRawFieldExpr, LazyRawStruct, RawFieldExpr, RawValueExpr, + HasRange, HasSpan, LazyDecoder, LazyRawContainer, LazyRawFieldExpr, LazyRawFieldName, + LazyRawStruct, }; use crate::lazy::encoding::BinaryEncoding_1_0; -use crate::{IonResult, RawSymbolTokenRef}; +use crate::lazy::span::Span; +use crate::{IonResult, RawSymbolTokenRef, SymbolId}; #[derive(Copy, Clone)] pub struct LazyRawBinaryStruct_1_0<'top> { pub(crate) value: LazyRawBinaryValue_1_0<'top>, } +impl<'top> LazyRawBinaryStruct_1_0<'top> { + pub fn as_value(&self) -> LazyRawBinaryValue_1_0<'top> { + self.value + } +} + impl<'a, 'top> IntoIterator for &'a LazyRawBinaryStruct_1_0<'top> { type Item = IonResult>; type IntoIter = RawBinaryStructIterator_1_0<'top>; @@ -62,6 +69,12 @@ impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_0> for LazyRawBinaryStruc } } +impl<'top> LazyRawContainer<'top, BinaryEncoding_1_0> for LazyRawBinaryStruct_1_0<'top> { + fn as_value(&self) -> ::Value<'top> { + self.value + } +} + impl<'top> LazyRawStruct<'top, BinaryEncoding_1_0> for LazyRawBinaryStruct_1_0<'top> { type Iterator = RawBinaryStructIterator_1_0<'top>; @@ -90,65 +103,93 @@ impl<'top> Iterator for RawBinaryStructIterator_1_0<'top> { type Item = IonResult>; fn next(&mut self) -> Option { - match self.source.try_parse_next(ImmutableBuffer::peek_field) { - Ok(Some(lazy_raw_value)) => Some(Ok(RawFieldExpr::NameValuePair( - lazy_raw_value.field_name().unwrap(), - RawValueExpr::ValueLiteral(lazy_raw_value), - ))), - Ok(None) => None, - Err(e) => Some(Err(e)), - } + self.source + .try_parse_next_field(ImmutableBuffer::peek_field) + .transpose() } } -#[derive(Copy, Clone)] -pub struct LazyRawBinaryField<'top> { - pub(crate) value: LazyRawBinaryValue_1_0<'top>, +#[derive(Debug, Copy, Clone)] +pub struct LazyRawBinaryFieldName_1_0<'top> { + // The field ID has to be read in order to discover its length, so we store it here to avoid + // needing to re-read it. + field_id: SymbolId, + matched: ImmutableBuffer<'top>, } -impl<'top> LazyRawBinaryField<'top> { - pub(crate) fn new(value: LazyRawBinaryValue_1_0<'top>) -> Self { - LazyRawBinaryField { value } - } - - pub fn name(&self) -> RawSymbolTokenRef<'top> { - // We're in a struct field, the field ID must be populated. - let field_id = self.value.encoded_value.field_id.unwrap(); - RawSymbolTokenRef::SymbolId(field_id) - } - - pub fn value(&self) -> LazyRawBinaryValue_1_0<'top> { - self.value - } - - pub(crate) fn into_value(self) -> LazyRawBinaryValue_1_0<'top> { - self.value +impl<'top> LazyRawBinaryFieldName_1_0<'top> { + pub fn new(field_id: SymbolId, matched: ImmutableBuffer<'top>) -> Self { + Self { field_id, matched } } } -impl<'top> LazyRawFieldPrivate<'top, BinaryEncoding_1_0> for LazyRawBinaryField<'top> { - fn into_value(self) -> LazyRawBinaryValue_1_0<'top> { - self.value +impl<'top> HasSpan<'top> for LazyRawBinaryFieldName_1_0<'top> { + fn span(&self) -> Span<'top> { + Span::with_offset(self.matched.offset(), self.matched.bytes()) } } -impl<'top> LazyRawField<'top, BinaryEncoding_1_0> for LazyRawBinaryField<'top> { - fn name(&self) -> RawSymbolTokenRef<'top> { - LazyRawBinaryField::name(self) +impl<'top> HasRange for LazyRawBinaryFieldName_1_0<'top> { + fn range(&self) -> Range { + self.matched.range() } +} - fn value(&self) -> LazyRawBinaryValue_1_0<'top> { - self.value() +impl<'top> LazyRawFieldName<'top> for LazyRawBinaryFieldName_1_0<'top> { + fn read(&self) -> IonResult> { + Ok(RawSymbolTokenRef::SymbolId(self.field_id)) } } -impl<'top> Debug for LazyRawBinaryField<'top> { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!( - f, - "${}: {:?}", - self.value.encoded_value.field_id.unwrap(), - self.value() - ) +#[cfg(test)] +mod tests { + use std::ops::Range; + + use crate::lazy::binary::raw::reader::LazyRawBinaryReader_1_0; + use crate::IonResult; + + use super::*; + + #[test] + #[allow(clippy::single_range_in_vec_init)] + fn field_name_ranges() -> IonResult<()> { + // For each pair below, we'll confirm that the top-level struct's field names are found to + // occupy the specified input ranges. + type FieldNameAndRange<'a> = (RawSymbolTokenRef<'a>, Range); + type FieldTest<'a> = (&'a [u8], &'a [FieldNameAndRange<'a>]); + let tests: &[FieldTest] = &[ + // (Ion input, expected ranges of the struct's field names) + ( + &[0xD2, 0x84, 0x80], // {name: ""} + &[(RawSymbolTokenRef::SymbolId(4), 1..2)], + ), + ]; + for (input, field_name_ranges) in tests { + let mut reader = LazyRawBinaryReader_1_0::new(input); + let struct_ = reader.next()?.expect_value()?.read()?.expect_struct()?; + for (field_result, (expected_name, range)) in + struct_.iter().zip(field_name_ranges.iter()) + { + let name = field_result?.name(); + assert_eq!( + name.read()?, + *expected_name, + "span failure for input {input:0X?} -> field {name:?}" + ); + assert_eq!( + name.range(), + *range, + "range failure for input {input:0X?} -> field {name:?}" + ); + println!( + "SUCCESS: input {:0X?} -> field {:?} -> {:0X?} ({:?})", + input, + expected_name, + name.span(), + name.range() + ); + } + } + Ok(()) } } diff --git a/src/lazy/binary/raw/v1_1/immutable_buffer.rs b/src/lazy/binary/raw/v1_1/immutable_buffer.rs index d28cc67a..f2526c8f 100644 --- a/src/lazy/binary/raw/v1_1/immutable_buffer.rs +++ b/src/lazy/binary/raw/v1_1/immutable_buffer.rs @@ -1,7 +1,9 @@ use crate::binary::constants::v1_1::IVM; use crate::binary::var_uint::VarUInt; use crate::lazy::binary::encoded_value::EncodedValue; -use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1; +use crate::lazy::binary::raw::v1_1::value::{ + LazyRawBinaryValue_1_1, LazyRawBinaryVersionMarker_1_1, +}; use crate::lazy::binary::raw::v1_1::{Header, LengthType, Opcode, ION_1_1_OPCODES}; use crate::lazy::encoder::binary::v1_1::fixed_int::FixedInt; use crate::lazy::encoder::binary::v1_1::flex_int::FlexInt; @@ -9,6 +11,7 @@ use crate::lazy::encoder::binary::v1_1::flex_uint::FlexUInt; use crate::result::IonFailure; use crate::{IonError, IonResult}; use std::fmt::{Debug, Formatter}; +use std::ops::Range; // This limit is used for stack-allocating buffer space to encode/decode UInts. const UINT_STACK_BUFFER_SIZE: usize = 16; @@ -64,7 +67,7 @@ impl<'a> ImmutableBuffer<'a> { } /// Returns a slice containing all of the buffer's bytes. - pub fn bytes(&self) -> &[u8] { + pub fn bytes(&self) -> &'a [u8] { self.data } @@ -95,6 +98,10 @@ impl<'a> ImmutableBuffer<'a> { self.data.len() } + pub fn range(&self) -> Range { + self.offset..self.offset + self.len() + } + /// Returns `true` if there are no bytes in the buffer. Otherwise, returns `false`. pub fn is_empty(&self) -> bool { self.data.is_empty() @@ -138,15 +145,16 @@ impl<'a> ImmutableBuffer<'a> { /// returns an `Ok(_)` containing a `(major, minor)` version tuple. /// /// See: - pub fn read_ivm(self) -> ParseResult<'a, (u8, u8)> { + pub fn read_ivm(self) -> ParseResult<'a, LazyRawBinaryVersionMarker_1_1<'a>> { let bytes = self .peek_n_bytes(IVM.len()) .ok_or_else(|| IonError::incomplete("an IVM", self.offset()))?; match bytes { [0xE0, major, minor, 0xEA] => { - let version = (*major, *minor); - Ok((version, self.consume(IVM.len()))) + let matched = ImmutableBuffer::new_with_offset(bytes, self.offset); + let marker = LazyRawBinaryVersionMarker_1_1::new(matched, *major, *minor); + Ok((marker, self.consume(IVM.len()))) } invalid_ivm => IonResult::decoding_error(format!("invalid IVM: {invalid_ivm:?}")), } @@ -312,15 +320,12 @@ impl<'a> ImmutableBuffer<'a> { let encoded_value = EncodedValue { header, - // If applicable, these are populated by the caller: `peek_field()` - field_id_length: 0, - field_id: None, // If applicable, these are populated by the caller: `read_annotated_value()` annotations_header_length: 0, annotations_sequence_length: 0, header_offset, length_length, - value_length, + value_body_length: value_length, total_length, }; let lazy_value = LazyRawBinaryValue_1_1 { diff --git a/src/lazy/binary/raw/v1_1/reader.rs b/src/lazy/binary/raw/v1_1/reader.rs index 7c19494a..9800c402 100644 --- a/src/lazy/binary/raw/v1_1/reader.rs +++ b/src/lazy/binary/raw/v1_1/reader.rs @@ -2,10 +2,10 @@ use crate::lazy::binary::raw::v1_1::immutable_buffer::ImmutableBuffer; use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1; -use crate::lazy::decoder::{LazyDecoder, LazyRawReader}; +use crate::lazy::decoder::{LazyDecoder, LazyRawReader, RawVersionMarker}; use crate::lazy::encoder::private::Sealed; use crate::lazy::encoding::BinaryEncoding_1_1; -use crate::lazy::raw_stream_item::{LazyRawStreamItem, RawStreamItem}; +use crate::lazy::raw_stream_item::{EndPosition, LazyRawStreamItem, RawStreamItem}; use crate::result::IonFailure; use crate::IonResult; @@ -36,16 +36,18 @@ impl<'data> LazyRawBinaryReader_1_1<'data> { where 'data: 'top, { - let ((major, minor), _buffer_after_ivm) = buffer.read_ivm()?; + let (marker, _buffer_after_ivm) = buffer.read_ivm()?; + let (major, minor) = marker.version(); if (major, minor) != (1, 1) { return IonResult::decoding_error(format!( - "unsupported version of Ion: v{}.{}; only 1.1 is supported by this reader", - major, minor, + "unsupported version of Ion: v{major}.{minor}; only 1.1 is supported by this reader", )); } self.data = buffer; self.bytes_to_skip = 4; - Ok(LazyRawStreamItem::::VersionMarker(1, 1)) + Ok(LazyRawStreamItem::::VersionMarker( + marker, + )) } fn read_value<'top>( @@ -57,7 +59,11 @@ impl<'data> LazyRawBinaryReader_1_1<'data> { { let lazy_value = match ImmutableBuffer::peek_sequence_value(buffer)? { Some(lazy_value) => lazy_value, - None => return Ok(LazyRawStreamItem::::EndOfStream), + None => { + return Ok(LazyRawStreamItem::::EndOfStream( + EndPosition::new(self.position()), + )) + } }; self.data = buffer; self.bytes_to_skip = lazy_value.encoded_value.total_length(); @@ -85,14 +91,18 @@ impl<'data> LazyRawBinaryReader_1_1<'data> { { let mut buffer = self.advance_to_next_item()?; if buffer.is_empty() { - return Ok(LazyRawStreamItem::::EndOfStream); + return Ok(LazyRawStreamItem::::EndOfStream( + EndPosition::new(buffer.offset()), + )); } let type_descriptor = buffer.peek_opcode()?; if type_descriptor.is_nop() { (_, buffer) = buffer.consume_nop_padding(type_descriptor)?; if buffer.is_empty() { - return Ok(LazyRawStreamItem::::EndOfStream); + return Ok(LazyRawStreamItem::::EndOfStream( + EndPosition::new(buffer.offset()), + )); } } if type_descriptor.is_ivm_start() { diff --git a/src/lazy/binary/raw/v1_1/sequence.rs b/src/lazy/binary/raw/v1_1/sequence.rs index 5a0f3369..dcdf5e8e 100644 --- a/src/lazy/binary/raw/v1_1/sequence.rs +++ b/src/lazy/binary/raw/v1_1/sequence.rs @@ -4,7 +4,9 @@ use crate::lazy::binary::raw::v1_1::annotations_iterator::RawBinaryAnnotationsIt use crate::lazy::binary::raw::v1_1::immutable_buffer::ImmutableBuffer; use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryValue_1_1; use crate::lazy::decoder::private::LazyContainerPrivate; -use crate::lazy::decoder::{LazyRawSequence, LazyRawValueExpr, RawValueExpr}; +use crate::lazy::decoder::{ + LazyDecoder, LazyRawContainer, LazyRawSequence, LazyRawValueExpr, RawValueExpr, +}; use crate::lazy::encoding::BinaryEncoding_1_1; use crate::{IonResult, IonType}; use std::fmt::{Debug, Formatter}; @@ -27,6 +29,12 @@ impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_1> for LazyRawBinaryList_ } } +impl<'top> LazyRawContainer<'top, BinaryEncoding_1_1> for LazyRawBinaryList_1_1<'top> { + fn as_value(&self) -> ::Value<'top> { + self.sequence.value + } +} + impl<'top> LazyRawSequence<'top, BinaryEncoding_1_1> for LazyRawBinaryList_1_1<'top> { type Iterator = RawBinarySequenceIterator_1_1<'top>; @@ -41,10 +49,6 @@ impl<'top> LazyRawSequence<'top, BinaryEncoding_1_1> for LazyRawBinaryList_1_1<' fn iter(&self) -> Self::Iterator { self.sequence.iter() } - - fn as_value(&self) -> LazyRawBinaryValue_1_1<'top> { - self.sequence.value - } } impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_1> for LazyRawBinarySExp_1_1<'top> { @@ -55,6 +59,12 @@ impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_1> for LazyRawBinarySExp_ } } +impl<'top> LazyRawContainer<'top, BinaryEncoding_1_1> for LazyRawBinarySExp_1_1<'top> { + fn as_value(&self) -> ::Value<'top> { + self.sequence.value + } +} + impl<'top> LazyRawSequence<'top, BinaryEncoding_1_1> for LazyRawBinarySExp_1_1<'top> { type Iterator = RawBinarySequenceIterator_1_1<'top>; @@ -69,10 +79,6 @@ impl<'top> LazyRawSequence<'top, BinaryEncoding_1_1> for LazyRawBinarySExp_1_1<' fn iter(&self) -> Self::Iterator { self.sequence.iter() } - - fn as_value(&self) -> LazyRawBinaryValue_1_1<'top> { - self.sequence.value - } } #[derive(Copy, Clone)] diff --git a/src/lazy/binary/raw/v1_1/struct.rs b/src/lazy/binary/raw/v1_1/struct.rs index 89ebd839..f6dd42b5 100644 --- a/src/lazy/binary/raw/v1_1/struct.rs +++ b/src/lazy/binary/raw/v1_1/struct.rs @@ -2,20 +2,57 @@ use std::fmt; use std::fmt::{Debug, Formatter}; +use std::ops::Range; use crate::lazy::binary::raw::v1_1::annotations_iterator::RawBinaryAnnotationsIterator_1_1; use crate::lazy::binary::raw::v1_1::{ immutable_buffer::ImmutableBuffer, value::LazyRawBinaryValue_1_1, }; -use crate::lazy::decoder::private::{ - LazyContainerPrivate, LazyRawFieldPrivate, LazyRawValuePrivate, -}; +use crate::lazy::decoder::private::LazyContainerPrivate; use crate::lazy::decoder::{ - LazyRawField, LazyRawFieldExpr, LazyRawStruct, RawFieldExpr, RawValueExpr, + HasRange, HasSpan, LazyDecoder, LazyRawContainer, LazyRawFieldExpr, LazyRawFieldName, + LazyRawStruct, }; use crate::lazy::encoding::BinaryEncoding_1_1; +use crate::lazy::span::Span; use crate::{IonResult, RawSymbolTokenRef}; +#[derive(Debug, Copy, Clone)] +pub struct LazyRawBinaryFieldName_1_1<'top> { + // The field name has to be read in order to discover its length, so we store it here to avoid + // needing to re-read it. + field_name: RawSymbolTokenRef<'top>, + // For viewing the span/range of the field name + matched: ImmutableBuffer<'top>, +} + +impl<'top> LazyRawBinaryFieldName_1_1<'top> { + pub fn new(field_name: RawSymbolTokenRef<'top>, matched: ImmutableBuffer<'top>) -> Self { + Self { + field_name, + matched, + } + } +} + +impl<'top> HasSpan<'top> for LazyRawBinaryFieldName_1_1<'top> { + fn span(&self) -> Span<'top> { + todo!() + } +} + +impl<'top> HasRange for LazyRawBinaryFieldName_1_1<'top> { + fn range(&self) -> Range { + todo!() + } +} + +impl<'top> LazyRawFieldName<'top> for LazyRawBinaryFieldName_1_1<'top> { + fn read(&self) -> IonResult> { + todo!() + } +} + #[derive(Copy, Clone)] pub struct LazyRawBinaryStruct_1_1<'top> { pub(crate) value: LazyRawBinaryValue_1_1<'top>, @@ -62,6 +99,12 @@ impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_1> for LazyRawBinaryStruc } } +impl<'top> LazyRawContainer<'top, BinaryEncoding_1_1> for LazyRawBinaryStruct_1_1<'top> { + fn as_value(&self) -> ::Value<'top> { + self.value + } +} + impl<'top> LazyRawStruct<'top, BinaryEncoding_1_1> for LazyRawBinaryStruct_1_1<'top> { type Iterator = RawBinaryStructIterator_1_1<'top>; @@ -88,65 +131,6 @@ impl<'top> Iterator for RawBinaryStructIterator_1_1<'top> { type Item = IonResult>; fn next(&mut self) -> Option { - match self.source.try_parse_next(ImmutableBuffer::peek_field) { - Ok(Some(lazy_raw_value)) => Some(Ok(RawFieldExpr::NameValuePair( - lazy_raw_value.field_name().unwrap(), - RawValueExpr::ValueLiteral(lazy_raw_value), - ))), - Ok(None) => None, - Err(e) => Some(Err(e)), - } - } -} - -#[derive(Copy, Clone)] -pub struct LazyRawBinaryField_1_1<'top> { - pub(crate) value: LazyRawBinaryValue_1_1<'top>, -} - -impl<'top> LazyRawBinaryField_1_1<'top> { - pub(crate) fn new(value: LazyRawBinaryValue_1_1<'top>) -> Self { - LazyRawBinaryField_1_1 { value } - } - - pub fn name(&self) -> RawSymbolTokenRef<'top> { - // We're in a struct field, the field ID must be populated. - let field_id = self.value.encoded_value.field_id.unwrap(); - RawSymbolTokenRef::SymbolId(field_id) - } - - pub fn value(&self) -> LazyRawBinaryValue_1_1<'top> { - self.value - } - - pub(crate) fn into_value(self) -> LazyRawBinaryValue_1_1<'top> { - self.value - } -} - -impl<'top> LazyRawFieldPrivate<'top, BinaryEncoding_1_1> for LazyRawBinaryField_1_1<'top> { - fn into_value(self) -> LazyRawBinaryValue_1_1<'top> { - self.value - } -} - -impl<'top> LazyRawField<'top, BinaryEncoding_1_1> for LazyRawBinaryField_1_1<'top> { - fn name(&self) -> RawSymbolTokenRef<'top> { - LazyRawBinaryField_1_1::name(self) - } - - fn value(&self) -> LazyRawBinaryValue_1_1<'top> { - self.value() - } -} - -impl<'top> Debug for LazyRawBinaryField_1_1<'top> { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!( - f, - "${}: {:?}", - self.value.encoded_value.field_id.unwrap(), - self.value() - ) + todo!() } } diff --git a/src/lazy/binary/raw/v1_1/value.rs b/src/lazy/binary/raw/v1_1/value.rs index d865cfb9..46a97ee2 100644 --- a/src/lazy/binary/raw/v1_1/value.rs +++ b/src/lazy/binary/raw/v1_1/value.rs @@ -2,6 +2,8 @@ use std::ops::Range; +use crate::lazy::decoder::{HasRange, HasSpan, RawVersionMarker}; +use crate::lazy::span::Span; use crate::{ lazy::{ binary::{ @@ -14,30 +16,68 @@ use crate::{ value::ValueParseResult, }, }, - decoder::{private::LazyRawValuePrivate, LazyDecoder, LazyRawValue}, + decoder::{LazyDecoder, LazyRawValue}, encoding::BinaryEncoding_1_1, raw_value_ref::RawValueRef, }, result::IonFailure, types::SymbolId, - IonError, IonResult, IonType, RawSymbolTokenRef, + IonError, IonResult, IonType, }; +#[derive(Debug, Copy, Clone)] +pub struct LazyRawBinaryVersionMarker_1_1<'top> { + major: u8, + minor: u8, + input: ImmutableBuffer<'top>, +} + +impl<'top> LazyRawBinaryVersionMarker_1_1<'top> { + pub fn new(input: ImmutableBuffer<'top>, major: u8, minor: u8) -> Self { + Self { + major, + minor, + input, + } + } +} + +impl<'top> HasSpan<'top> for LazyRawBinaryVersionMarker_1_1<'top> { + fn span(&self) -> Span<'top> { + Span::with_offset(self.input.offset(), self.input.bytes()) + } +} + +impl<'top> HasRange for LazyRawBinaryVersionMarker_1_1<'top> { + fn range(&self) -> Range { + self.input.range() + } +} + +impl<'top> RawVersionMarker<'top> for LazyRawBinaryVersionMarker_1_1<'top> { + fn version(&self) -> (u8, u8) { + (self.major, self.minor) + } +} + #[derive(Debug, Copy, Clone)] pub struct LazyRawBinaryValue_1_1<'top> { pub(crate) encoded_value: EncodedValue
, pub(crate) input: ImmutableBuffer<'top>, } -impl<'top> LazyRawValuePrivate<'top> for LazyRawBinaryValue_1_1<'top> { - fn field_name(&self) -> IonResult> { - if let Some(field_id) = self.encoded_value.field_id { - Ok(RawSymbolTokenRef::SymbolId(field_id)) - } else { - IonResult::illegal_operation( - "requested field name, but value was not in a struct field", - ) - } +impl<'top> HasSpan<'top> for LazyRawBinaryValue_1_1<'top> { + fn span(&self) -> Span<'top> { + let range = self.range(); + let local_range = (range.start - self.input.offset())..(range.end - self.input.offset()); + let bytes = &self.input.bytes()[local_range]; + Span::with_offset(range.start, bytes) + } +} + +impl<'top> HasRange for LazyRawBinaryValue_1_1<'top> { + fn range(&self) -> Range { + self.encoded_value.annotated_value_range() } } @@ -57,16 +97,6 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for LazyRawBinaryValue_1_1<'to fn read(&self) -> IonResult> { self.read() } - - fn range(&self) -> Range { - self.encoded_value.annotated_value_range() - } - - fn span(&self) -> &[u8] { - let range = self.range(); - let local_range = (range.start - self.input.offset())..(range.end - self.input.offset()); - &self.input.bytes()[local_range] - } } impl<'top> LazyRawBinaryValue_1_1<'top> { @@ -99,17 +129,8 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { }); let (sequence_offset, sequence_length) = match offset_and_length { None => { - return self - .input - // A value's binary layout is: - // - // field_id? | annotation_sequence? | type_descriptor | length? | body - // - // If this value has no annotation sequence, then the first byte after the - // field ID is the type descriptor. - // - // If there is no field ID, field_id_length will be zero. - .slice(self.encoded_value.field_id_length as usize, 0); + // If there are no annotations, return an empty slice starting at the opcode. + return self.input.slice(0, 0); } Some(offset_and_length) => offset_and_length, }; @@ -160,7 +181,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { self.input.offset(), ); } - let value_body_length = self.encoded_value.value_length(); + let value_body_length = self.encoded_value.value_body_length(); let value_offset = value_total_length - value_body_length; Ok(self.input.bytes_range(value_offset, value_body_length)) } @@ -170,7 +191,7 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { /// fully buffered before reading begins. pub(crate) fn available_body(&self) -> ImmutableBuffer<'top> { let value_total_length = self.encoded_value.total_length(); - let value_body_length = self.encoded_value.value_length(); + let value_body_length = self.encoded_value.value_body_length(); let value_offset = value_total_length - value_body_length; let bytes_needed = std::cmp::min(self.input.len() - value_offset, value_body_length); @@ -178,12 +199,6 @@ impl<'top> LazyRawBinaryValue_1_1<'top> { buffer_slice } - /// If this value is within a struct, returns its associated field name as a `Some(SymbolID)`. - /// Otherwise, returns `None`. - pub(crate) fn field_id(&self) -> Option { - self.encoded_value.field_id - } - /// Helper method called by [`Self::read`]. Reads the current value as a bool. fn read_bool(&self) -> ValueParseResult<'top, BinaryEncoding_1_1> { debug_assert!(self.encoded_value.ion_type() == IonType::Bool); diff --git a/src/lazy/binary/raw/value.rs b/src/lazy/binary/raw/value.rs index 98afd9f2..78cc85b6 100644 --- a/src/lazy/binary/raw/value.rs +++ b/src/lazy/binary/raw/value.rs @@ -10,10 +10,10 @@ use crate::lazy::binary::raw::sequence::{ LazyRawBinaryList_1_0, LazyRawBinarySExp_1_0, LazyRawBinarySequence_1_0, }; use crate::lazy::binary::raw::type_descriptor::Header; -use crate::lazy::decoder::private::LazyRawValuePrivate; -use crate::lazy::decoder::LazyRawValue; +use crate::lazy::decoder::{HasRange, HasSpan, LazyRawValue, RawVersionMarker}; use crate::lazy::encoding::BinaryEncoding_1_0; use crate::lazy::raw_value_ref::RawValueRef; +use crate::lazy::span::Span; use crate::lazy::str_ref::StrRef; use crate::result::IonFailure; use crate::types::SymbolId; @@ -23,6 +23,41 @@ use std::fmt::{Debug, Formatter}; use std::ops::Range; use std::{fmt, mem}; +#[derive(Debug, Copy, Clone)] +pub struct LazyRawBinaryVersionMarker_1_0<'top> { + major: u8, + minor: u8, + input: ImmutableBuffer<'top>, +} + +impl<'top> LazyRawBinaryVersionMarker_1_0<'top> { + pub fn new(input: ImmutableBuffer<'top>, major: u8, minor: u8) -> Self { + Self { + major, + minor, + input, + } + } +} + +impl<'top> HasSpan<'top> for LazyRawBinaryVersionMarker_1_0<'top> { + fn span(&self) -> Span<'top> { + Span::with_offset(self.input.offset(), self.input.bytes()) + } +} + +impl<'top> HasRange for LazyRawBinaryVersionMarker_1_0<'top> { + fn range(&self) -> Range { + self.input.range() + } +} + +impl<'top> RawVersionMarker<'top> for LazyRawBinaryVersionMarker_1_0<'top> { + fn version(&self) -> (u8, u8) { + (self.major, self.minor) + } +} + /// A value that has been identified in the input stream but whose data has not yet been read. /// /// If only part of the value is in the input buffer, calls to [`LazyRawBinaryValue_1_0::read`] (which examines @@ -50,15 +85,18 @@ impl<'top> Debug for LazyRawBinaryValue_1_0<'top> { pub type ValueParseResult<'top, F> = IonResult>; -impl<'top> LazyRawValuePrivate<'top> for LazyRawBinaryValue_1_0<'top> { - fn field_name(&self) -> IonResult> { - if let Some(field_id) = self.encoded_value.field_id { - Ok(RawSymbolTokenRef::SymbolId(field_id)) - } else { - IonResult::illegal_operation( - "requested field name, but value was not in a struct field", - ) - } +impl<'top> HasSpan<'top> for LazyRawBinaryValue_1_0<'top> { + fn span(&self) -> Span<'top> { + let range = self.range(); + // Subtract the `offset()` of the ImmutableBuffer to get the local indexes for start/end + let local_range = (range.start - self.input.offset())..(range.end - self.input.offset()); + Span::with_offset(range.start, &self.input.bytes()[local_range]) + } +} + +impl<'top> HasRange for LazyRawBinaryValue_1_0<'top> { + fn range(&self) -> Range { + self.encoded_value.annotated_value_range() } } @@ -78,20 +116,161 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_0> for LazyRawBinaryValue_1_0<'to fn read(&self) -> IonResult> { self.read() } +} - fn range(&self) -> Range { - self.encoded_value.annotated_value_range() +#[derive(Copy, Clone)] +pub struct EncodedBinaryAnnotations_1_0<'a, 'top> { + value: &'a LazyRawBinaryValue_1_0<'top>, +} + +impl<'a, 'top> EncodedBinaryAnnotations_1_0<'a, 'top> { + /// Returns the input stream index range that contains the bytes representing the complete + /// annotations wrapper, including its opcode, wrapper length, annotations sequence length, + /// and the sequence itself. + pub fn range(&self) -> Range { + self.value.encoded_value.annotations_range().unwrap() } - fn span(&self) -> &[u8] { + /// Returns the encoded bytes representing the complete annotations wrapper, including its + /// opcode, wrapper length, annotations sequence length, and the sequence itself. + pub fn span(&self) -> Span<'top> { let range = self.range(); - // Subtract the `offset()` of the ImmutableBuffer to get the local indexes for start/end - let local_range = (range.start - self.input.offset())..(range.end - self.input.offset()); - &self.input.bytes()[local_range] + let start = range.start - self.value.input.offset(); + let end = start + range.len(); + let bytes = &self.value.input.bytes()[start..end]; + Span::with_offset(range.start, bytes) + } + + /// Returns the input stream index range that contains the bytes representing the annotations + /// wrapper's opcode. + pub fn opcode_range(&self) -> Range { + let stream_start = self.range().start; + stream_start..stream_start + 1 + } + + /// Returns the encoded bytes representing the annotations wrapper's opcode. + pub fn opcode_span(&self) -> Span<'top> { + let stream_range = self.opcode_range(); + let local_range = 0..1; + let bytes = &self.span().bytes()[local_range]; + Span::with_offset(stream_range.start, bytes) + } + + /// Returns the encoded bytes representing the annotations wrapper's header (that is: the opcode, + /// wrapper length, and sequence length, but not the annotations sequence). + pub fn header_span(&self) -> Span<'top> { + let range = self.range(); + let sequence_length = self.value.encoded_value.annotations_sequence_length as usize; + let local_end = range.len() - sequence_length; + let bytes = &self.span().bytes()[..local_end]; + Span::with_offset(range.start, bytes) + } + + // TODO: separate span accessors for the wrapper length and sequence length? + + /// Returns the encoded bytes representing the annotations wrapper's annotations sequence. + pub fn sequence_span(&self) -> Span<'top> { + let range = self.range(); + let sequence_length = self.value.encoded_value.annotations_sequence_length as usize; + let local_start = range.len() - sequence_length; + let bytes = &self.span().bytes()[local_start..]; + let stream_start = range.start + local_start; + Span::with_offset(stream_start, bytes) + } +} + +#[derive(Copy, Clone)] +pub struct EncodedBinaryValueData_1_0<'a, 'top> { + value: &'a LazyRawBinaryValue_1_0<'top>, +} + +impl<'a, 'top> EncodedBinaryValueData_1_0<'a, 'top> { + /// Returns the input stream index range that contains the bytes representing the complete value, + /// including its opcode, length, and body. + pub fn range(&self) -> Range { + let encoded = &self.value.encoded_value; + encoded.unannotated_value_range() + } + + /// Returns the encoded bytes that represent the complete value, including its opcode, length, + /// and body. + pub fn span(&self) -> Span<'top> { + let stream_range = self.range(); + let offset = self.value.input.offset(); + let local_range = stream_range.start - offset..stream_range.end - offset; + let bytes = &self.value.input.bytes()[local_range]; + Span::with_offset(stream_range.start, bytes) + } + + /// Returns the input stream index range that contains the bytes representing the + /// value's opcode. In Ion 1.0, this is always a range of a single byte. + fn opcode_range(&self) -> Range { + let offset = self.range().start; + offset..offset + 1 + } + + /// Returns the encoded bytes representing the value's opcode. In Ion 1.0, this is always a + /// slice of a single byte. + pub fn opcode_span(&self) -> Span<'top> { + let stream_range = self.opcode_range(); + let bytes = &self.span().bytes()[0..1]; + Span::with_offset(stream_range.start, bytes) + } + + /// Returns the input stream index range that contains the bytes representing the + /// value's length as a `VarUInt`. If the value's length was able to be encoded directly in + /// the type descriptor byte, the range returned will be empty. + pub fn trailing_length_range(&self) -> Range { + let range = self.range(); + range.start + 1..range.start + 1 + self.value.encoded_value.length_length as usize + } + + /// Returns the encoded bytes representing the value's length as a `VarUInt`. + /// If the value's length was able to be encoded directly in the type descriptor byte, + /// the slice returned will be empty. + pub fn trailing_length_span(&self) -> Span<'top> { + let stream_range = self.trailing_length_range(); + let offset = self.value.input.offset(); + let local_range = stream_range.start - offset .. stream_range.end - offset; + let bytes = &self.value.input.bytes()[local_range]; + Span::with_offset(stream_range.start, bytes) + } + + /// Returns the input stream index range that contains the bytes representing the + /// value's body (that is: the content of the value that follows its opcode and length). + pub fn body_range(&self) -> Range { + let encoded = &self.value.encoded_value; + let body_offset = encoded.header_length(); + let body_length = encoded.value_body_length(); + let start = self.range().start + body_offset; + let end = start + body_length; + start..end + } + + /// Returns the encoded bytes representing the value's body (that is: the content of the value + /// that follows its opcode and length). + pub fn body_span(&self) -> Span<'top> { + let stream_range = self.body_range(); + let offset = self.value.input.offset(); + let local_range = stream_range.start - offset .. stream_range.end - offset; + let bytes = &self.span().bytes()[local_range]; + Span::with_offset(stream_range.start, bytes) } } impl<'top> LazyRawBinaryValue_1_0<'top> { + pub fn encoded_annotations(&self) -> Option> { + if self.has_annotations() { + Some(EncodedBinaryAnnotations_1_0 { value: self }) + } else { + None + } + } + + pub fn encoded_data(&self) -> EncodedBinaryValueData_1_0<'_, 'top> { + EncodedBinaryValueData_1_0 { value: self } + } + /// Indicates the Ion data type of this value. Calling this method does not require additional /// parsing of the input stream. pub fn ion_type(&self) -> IonType { @@ -121,17 +300,9 @@ impl<'top> LazyRawBinaryValue_1_0<'top> { }); let (sequence_offset, sequence_length) = match offset_and_length { None => { - return self - .input - // A value's binary layout is: - // - // field_id? | annotation_sequence? | type_descriptor | length? | body - // - // If this value has no annotation sequence, then the first byte after the - // field ID is the type descriptor. - // - // If there is no field ID, field_id_length will be zero. - .slice(self.encoded_value.field_id_length as usize, 0); + // If there are no annotations, return an empty slice positioned on the type + // descriptor. + return self.input.slice(0, 0); } Some(offset_and_length) => offset_and_length, }; @@ -173,18 +344,12 @@ impl<'top> LazyRawBinaryValue_1_0<'top> { } /// Returns the encoded byte slice representing this value's data. - fn value_body(&self) -> IonResult<&'top [u8]> { + pub fn value_body(&self) -> &'top [u8] { let value_total_length = self.encoded_value.total_length(); - if self.input.len() < value_total_length { - eprintln!("[value_body] Incomplete {:?}", self); - return IonResult::incomplete( - "only part of the requested value is available in the buffer", - self.input.offset(), - ); - } - let value_body_length = self.encoded_value.value_length(); + debug_assert!(self.input.len() >= value_total_length); + let value_body_length = self.encoded_value.value_body_length(); let value_offset = value_total_length - value_body_length; - Ok(self.input.bytes_range(value_offset, value_body_length)) + self.input.bytes_range(value_offset, value_body_length) } /// Returns an [`ImmutableBuffer`] containing whatever bytes of this value's body are currently @@ -192,7 +357,7 @@ impl<'top> LazyRawBinaryValue_1_0<'top> { /// fully buffered before reading begins. pub(crate) fn available_body(&self) -> ImmutableBuffer<'top> { let value_total_length = self.encoded_value.total_length(); - let value_body_length = self.encoded_value.value_length(); + let value_body_length = self.encoded_value.value_body_length(); let value_offset = value_total_length - value_body_length; let bytes_needed = std::cmp::min(self.input.len() - value_offset, value_body_length); @@ -200,12 +365,6 @@ impl<'top> LazyRawBinaryValue_1_0<'top> { buffer_slice } - /// If this value is within a struct, returns its associated field name as a `Some(SymbolID)`. - /// Otherwise, returns `None`. - pub(crate) fn field_id(&self) -> Option { - self.encoded_value.field_id - } - /// Helper method called by [`Self::read`]. Reads the current value as a bool. fn read_bool(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { debug_assert!(self.encoded_value.ion_type() == IonType::Bool); @@ -227,8 +386,7 @@ impl<'top> LazyRawBinaryValue_1_0<'top> { fn read_int(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { debug_assert!(self.encoded_value.ion_type() == IonType::Int); // `value_body()` returns a buffer starting at the body of the value. - // It also confirms that the entire value is in the buffer. - let uint_bytes = self.value_body()?; + let uint_bytes = self.value_body(); let magnitude: Int = if uint_bytes.len() <= mem::size_of::() { DecodedUInt::small_uint_from_slice(uint_bytes).into() } else { @@ -253,8 +411,8 @@ impl<'top> LazyRawBinaryValue_1_0<'top> { /// Helper method called by [`Self::read`]. Reads the current value as a float. fn read_float(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { debug_assert!(self.encoded_value.ion_type() == IonType::Float); - let ieee_bytes = self.value_body()?; - let number_of_bytes = self.encoded_value.value_length(); + let ieee_bytes = self.value_body(); + let number_of_bytes = self.encoded_value.value_body_length(); let value = match number_of_bytes { 0 => 0f64, 4 => f64::from(BigEndian::read_f32(ieee_bytes)), @@ -268,16 +426,16 @@ impl<'top> LazyRawBinaryValue_1_0<'top> { fn read_decimal(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { debug_assert!(self.encoded_value.ion_type() == IonType::Decimal); - if self.encoded_value.value_length() == 0 { + if self.encoded_value.value_body_length() == 0 { return Ok(RawValueRef::Decimal(Decimal::new(0i32, 0i64))); } // Skip the type descriptor and length bytes - let input = ImmutableBuffer::new(self.value_body()?); + let input = ImmutableBuffer::new(self.value_body()); let (exponent_var_int, remaining) = input.read_var_int()?; let coefficient_size_in_bytes = - self.encoded_value.value_length() - exponent_var_int.size_in_bytes(); + self.encoded_value.value_body_length() - exponent_var_int.size_in_bytes(); let exponent = exponent_var_int.value(); let (coefficient, _remaining) = remaining.read_int(coefficient_size_in_bytes)?; @@ -295,7 +453,7 @@ impl<'top> LazyRawBinaryValue_1_0<'top> { fn read_timestamp(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { debug_assert!(self.encoded_value.ion_type() == IonType::Timestamp); - let input = ImmutableBuffer::new(self.value_body()?); + let input = ImmutableBuffer::new(self.value_body()); let (offset, input) = input.read_var_int()?; let is_known_offset = !offset.is_negative_zero(); @@ -369,7 +527,7 @@ impl<'top> LazyRawBinaryValue_1_0<'top> { let (subsecond_exponent_var_uint, input) = input.read_var_int()?; let subsecond_exponent = subsecond_exponent_var_uint.value(); // The remaining bytes represent the coefficient. - let coefficient_size_in_bytes = self.encoded_value.value_length() - input.offset(); + let coefficient_size_in_bytes = self.encoded_value.value_body_length() - input.offset(); let (subsecond_coefficient, _input) = if coefficient_size_in_bytes == 0 { (DecodedInt::zero(), input) } else { @@ -390,7 +548,7 @@ impl<'top> LazyRawBinaryValue_1_0<'top> { /// Helper method called by [`Self::read_symbol`]. Reads the current value as a symbol ID. fn read_symbol_id(&self) -> IonResult { debug_assert!(self.encoded_value.ion_type() == IonType::Symbol); - let uint_bytes = self.value_body()?; + let uint_bytes = self.value_body(); if uint_bytes.len() > mem::size_of::() { return IonResult::decoding_error( "found a symbol ID that was too large to fit in a usize", @@ -411,7 +569,7 @@ impl<'top> LazyRawBinaryValue_1_0<'top> { /// Helper method called by [`Self::read`]. Reads the current value as a string. fn read_string(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { debug_assert!(self.encoded_value.ion_type() == IonType::String); - let raw_bytes = self.value_body()?; + let raw_bytes = self.value_body(); let text = std::str::from_utf8(raw_bytes) .map_err(|_| IonError::decoding_error("found a string with invalid utf-8 data"))?; Ok(RawValueRef::String(StrRef::from(text))) @@ -420,14 +578,14 @@ impl<'top> LazyRawBinaryValue_1_0<'top> { /// Helper method called by [`Self::read`]. Reads the current value as a blob. fn read_blob(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { debug_assert!(self.encoded_value.ion_type() == IonType::Blob); - let bytes = self.value_body()?; + let bytes = self.value_body(); Ok(RawValueRef::Blob(bytes.into())) } /// Helper method called by [`Self::read`]. Reads the current value as a clob. fn read_clob(&self) -> ValueParseResult<'top, BinaryEncoding_1_0> { debug_assert!(self.encoded_value.ion_type() == IonType::Clob); - let bytes = self.value_body()?; + let bytes = self.value_body(); Ok(RawValueRef::Clob(bytes.into())) } diff --git a/src/lazy/bytes_ref.rs b/src/lazy/bytes_ref.rs index e6f8bad9..2968dd43 100644 --- a/src/lazy/bytes_ref.rs +++ b/src/lazy/bytes_ref.rs @@ -1,25 +1,24 @@ use crate::text::text_formatter::IonValueFormatter; use crate::Bytes; -use std::borrow::Cow; use std::fmt::{Debug, Display, Formatter}; use std::ops::Deref; -#[derive(Clone)] +#[derive(Copy, Clone)] pub struct BytesRef<'data> { - data: Cow<'data, [u8]>, + data: &'data [u8], } impl<'data> Deref for BytesRef<'data> { type Target = [u8]; fn deref(&self) -> &Self::Target { - self.data.as_ref() + self.data } } impl<'data> BytesRef<'data> { pub fn to_owned(&self) -> Bytes { - Bytes::from(self.as_ref()) + Bytes::from(self.data) } pub fn into_owned(self) -> Bytes { @@ -33,41 +32,26 @@ impl<'data> BytesRef<'data> { impl<'data> From> for Bytes { fn from(value: BytesRef<'data>) -> Self { - match value.data { - Cow::Borrowed(bytes) => Bytes::from(bytes), - Cow::Owned(bytes) => Bytes::from(bytes), - } + Bytes::from(value.data) } } impl<'data, const N: usize> From<&'data [u8; N]> for BytesRef<'data> { fn from(bytes: &'data [u8; N]) -> Self { - BytesRef { - data: Cow::from(bytes.as_ref()), - } + BytesRef { data: bytes } } } impl<'data> From<&'data [u8]> for BytesRef<'data> { fn from(bytes: &'data [u8]) -> Self { - BytesRef { - data: Cow::from(bytes), - } - } -} - -impl<'data> From> for BytesRef<'data> { - fn from(bytes: Vec) -> Self { - BytesRef { - data: Cow::from(bytes), - } + BytesRef { data: bytes } } } impl<'data> From<&'data str> for BytesRef<'data> { fn from(text: &'data str) -> Self { BytesRef { - data: Cow::from(text.as_bytes()), + data: text.as_bytes(), } } } @@ -108,7 +92,7 @@ impl<'data> Display for BytesRef<'data> { impl<'data> Debug for BytesRef<'data> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { const NUM_BYTES_TO_SHOW: usize = 32; - let data = self.data.as_ref(); + let data = self.data; // Shows up to the first 32 bytes in hex write!(f, "BytesRef: [")?; for byte in data.iter().copied().take(NUM_BYTES_TO_SHOW) { diff --git a/src/lazy/decoder.rs b/src/lazy/decoder.rs index f4e78feb..3e85295f 100644 --- a/src/lazy/decoder.rs +++ b/src/lazy/decoder.rs @@ -3,12 +3,22 @@ use std::ops::Range; use bumpalo::Bump as BumpAllocator; +use crate::lazy::encoding::{BinaryEncoding_1_0, RawValueLiteral, TextEncoding_1_0}; use crate::lazy::expanded::macro_evaluator::RawEExpression; use crate::lazy::raw_stream_item::LazyRawStreamItem; use crate::lazy::raw_value_ref::RawValueRef; +use crate::lazy::span::Span; use crate::result::IonFailure; use crate::{IonResult, IonType, RawSymbolTokenRef}; +pub trait HasSpan<'top>: HasRange { + fn span(&self) -> Span<'top>; +} + +pub trait HasRange { + fn range(&self) -> Range; +} + /// A family of types that collectively comprise the lazy reader API for an Ion serialization /// format. These types operate at the 'raw' level; they do not attempt to resolve symbols /// using the active symbol table. @@ -34,10 +44,25 @@ pub trait LazyDecoder: 'static + Sized + Debug + Clone + Copy { type List<'top>: LazyRawSequence<'top, Self>; /// A struct whose fields may be accessed iteratively or by field name. type Struct<'top>: LazyRawStruct<'top, Self>; + /// A symbol token representing the name of a field within a struct. + type FieldName<'top>: LazyRawFieldName<'top>; /// An iterator over the annotations on the input stream's values. type AnnotationsIterator<'top>: Iterator>>; /// An e-expression invoking a macro. (Ion 1.1+) - type EExpression<'top>: RawEExpression<'top, Self>; + type EExp<'top>: RawEExpression<'top, Self>; + + type VersionMarker<'top>: RawVersionMarker<'top>; +} + +pub trait RawVersionMarker<'top>: Debug + Copy + Clone + HasSpan<'top> { + fn major(&self) -> u8 { + self.version().0 + } + fn minor(&self) -> u8 { + self.version().1 + } + + fn version(&self) -> (u8, u8); } /// An expression found in value position in either serialized Ion or a template. @@ -66,7 +91,7 @@ pub enum RawValueExpr { /// For a version of this type that is not constrained to a particular encoding, see /// [`RawValueExpr`]. pub type LazyRawValueExpr<'top, D> = - RawValueExpr<::Value<'top>, ::EExpression<'top>>; + RawValueExpr<::Value<'top>, ::EExp<'top>>; impl RawValueExpr { pub fn expect_value(self) -> IonResult { @@ -89,58 +114,147 @@ impl RawValueExpr { } } -/// An item found in field position within a struct. -/// This item may be: -/// * a name/value pair (as it is in Ion 1.0) -/// * a name/e-expression pair -/// * an e-expression -#[derive(Clone, Debug)] -pub enum RawFieldExpr<'top, V, M> { - NameValuePair(RawSymbolTokenRef<'top>, RawValueExpr), - MacroInvocation(M), +impl HasRange for RawValueExpr { + fn range(&self) -> Range { + match self { + RawValueExpr::ValueLiteral(value) => value.range(), + RawValueExpr::MacroInvocation(eexp) => eexp.range(), + } + } } -// As with the `RawValueExpr`/`LazyRawValueExpr` type pair, a `RawFieldExpr` has no constraints -// on the types used for values or macros, while the `LazyRawFieldExpr` type alias below uses the -// value and macro types associated with the decoder `D`. +impl<'top, V: HasSpan<'top>, M: HasSpan<'top>> HasSpan<'top> for RawValueExpr { + fn span(&self) -> Span<'top> { + match self { + RawValueExpr::ValueLiteral(value) => value.span(), + RawValueExpr::MacroInvocation(eexp) => eexp.span(), + } + } +} -/// An item found in struct field position an Ion data stream written in the encoding represented -/// by the LazyDecoder `D`. -pub type LazyRawFieldExpr<'top, D> = - RawFieldExpr<'top, ::Value<'top>, ::EExpression<'top>>; +/// A (name, value expression) pair representing a field in a struct. +/// The value expression may be either: +/// * a value literal +/// * an e-expression +#[derive(Copy, Clone, Debug)] +pub enum LazyRawFieldExpr<'top, D: LazyDecoder> { + NameValue(D::FieldName<'top>, D::Value<'top>), + NameEExp(D::FieldName<'top>, D::EExp<'top>), + EExp(D::EExp<'top>), +} -impl<'name, V: Debug, M: Debug> RawFieldExpr<'name, V, M> { - pub fn expect_name_value(self) -> IonResult<(RawSymbolTokenRef<'name>, V)> { - match self { - RawFieldExpr::NameValuePair(name, RawValueExpr::ValueLiteral(value)) => { - Ok((name, value)) - } - _ => IonResult::decoding_error(format!( +impl<'top, D: LazyDecoder> LazyRawFieldExpr<'top, D> { + pub fn expect_name_value(self) -> IonResult<(D::FieldName<'top>, D::Value<'top>)> { + let LazyRawFieldExpr::NameValue(name, value) = self else { + return IonResult::decoding_error(format!( "expected a name/value pair but found {:?}", self - )), + )); + }; + Ok((name, value)) + } + + pub fn expect_name_eexp(self) -> IonResult<(D::FieldName<'top>, D::EExp<'top>)> { + let LazyRawFieldExpr::NameEExp(name, eexp) = self else { + return IonResult::decoding_error(format!( + "expected a name/e-expression pair but found {:?}", + self + )); + }; + Ok((name, eexp)) + } + + pub fn expect_eexp(self) -> IonResult> { + let LazyRawFieldExpr::EExp(eexp) = self else { + return IonResult::decoding_error(format!( + "expected an e-expression but found {:?}", + self + )); + }; + Ok(eexp) + } +} + +// ======= 1.0 text fields are guaranteed to have a name and value ====== + +impl<'top> LazyRawFieldExpr<'top, TextEncoding_1_0> { + pub fn name(&self) -> ::FieldName<'top> { + use LazyRawFieldExpr::*; + match self { + NameValue(name, _value) => *name, + NameEExp(_, _) => unreachable!("name/eexp field in text Ion 1.0"), + EExp(_) => unreachable!("eexp field in text Ion 1.0"), + } + } + pub fn value(&self) -> ::Value<'top> { + use LazyRawFieldExpr::*; + match self { + NameValue(_name, value) => *value, + NameEExp(_, _) => unreachable!("name/eexp field in text Ion 1.0"), + EExp(_) => unreachable!("eexp field in text Ion 1.0"), } } - pub fn expect_name_macro(self) -> IonResult<(RawSymbolTokenRef<'name>, M)> { + pub fn name_and_value( + &self, + ) -> ( + ::FieldName<'top>, + ::Value<'top>, + ) { + use LazyRawFieldExpr::*; match self { - RawFieldExpr::NameValuePair(name, RawValueExpr::MacroInvocation(invocation)) => { - Ok((name, invocation)) - } - _ => IonResult::decoding_error(format!( - "expected a name/macro pair but found {:?}", - self - )), + NameValue(name, value) => (*name, *value), + NameEExp(_, _) => unreachable!("name/eexp field in text Ion 1.0"), + EExp(_) => unreachable!("eexp field in text Ion 1.0"), } } +} - pub fn expect_macro(self) -> IonResult { +// ======= 1.0 binary fields are guaranteed to have a name and value ====== + +impl<'top> LazyRawFieldExpr<'top, BinaryEncoding_1_0> { + pub fn name(&self) -> ::FieldName<'top> { + use LazyRawFieldExpr::*; match self { - RawFieldExpr::MacroInvocation(invocation) => Ok(invocation), - _ => IonResult::decoding_error(format!( - "expected a macro invocation but found {:?}", - self - )), + NameValue(name, _value) => *name, + NameEExp(_, _) => unreachable!("name/eexp field in binary Ion 1.0"), + EExp(_) => unreachable!("eexp field in text Ion 1.0"), + } + } + pub fn value(&self) -> ::Value<'top> { + use LazyRawFieldExpr::*; + match self { + NameValue(_name, value) => *value, + NameEExp(_, _) => unreachable!("name/eexp field in text Ion 1.0"), + EExp(_) => unreachable!("eexp field in text Ion 1.0"), + } + } + + pub fn name_and_value( + &self, + ) -> ( + ::FieldName<'top>, + ::Value<'top>, + ) { + use LazyRawFieldExpr::*; + match self { + NameValue(name, value) => (*name, *value), + NameEExp(_, _) => unreachable!("name/eexp field in text Ion 1.0"), + EExp(_) => unreachable!("eexp field in text Ion 1.0"), + } + } +} + +impl<'top, D: LazyDecoder> HasRange for LazyRawFieldExpr<'top, D> { + // This type does not offer a `span()` method to get the bytes of the entire field. + // In the case of a name/value or name/eexp pair, text parsers would need to provide a span that + // included the interstitial whitespace and delimiting `:` between the name and value, + // which is not especially useful. + fn range(&self) -> Range { + match self { + LazyRawFieldExpr::NameValue(name, value) => name.range().start..value.range().end, + LazyRawFieldExpr::NameEExp(name, eexp) => name.range().start..eexp.range().end, + LazyRawFieldExpr::EExp(eexp) => eexp.range(), } } } @@ -154,18 +268,11 @@ impl<'name, V: Debug, M: Debug> RawFieldExpr<'name, V, M> { // internal code that is defined in terms of `LazyRawField` to call the private `into_value()` // function while also preventing users from seeing or depending on it. pub(crate) mod private { - use crate::lazy::encoding::RawValueLiteral; - use crate::{IonResult, RawSymbolTokenRef}; + use crate::lazy::expanded::r#struct::UnexpandedField; + use crate::lazy::expanded::EncodingContext; + use crate::IonResult; - use super::LazyDecoder; - - pub trait LazyRawFieldPrivate<'top, D: LazyDecoder> { - /// Converts the `LazyRawField` impl to a `LazyRawValue` impl. - // At the moment, `LazyRawField`s are just thin wrappers around a `LazyRawValue` that can - // safely assume that the value has a field name associated with it. This method allows - // us to convert from one to the other when needed. - fn into_value(self) -> D::Value<'top>; - } + use super::{LazyDecoder, LazyRawFieldExpr, LazyRawStruct}; pub trait LazyContainerPrivate<'top, D: LazyDecoder> { /// Constructs a new lazy raw container from a lazy raw value that has been confirmed to be @@ -173,10 +280,54 @@ pub(crate) mod private { fn from_value(value: D::Value<'top>) -> Self; } - pub trait LazyRawValuePrivate<'top>: RawValueLiteral { - /// Returns the field name associated with this value. If the value is not inside a struct, - /// returns `IllegalOperation`. - fn field_name(&self) -> IonResult>; + pub trait LazyRawStructPrivate<'top, D: LazyDecoder> { + /// Creates an iterator that converts each raw struct field into an `UnexpandedField`, a + /// common representation for both raw fields and template fields that is used in the + /// expansion process. + fn unexpanded_fields( + &self, + context: EncodingContext<'top>, + ) -> RawStructUnexpandedFieldsIterator<'top, D>; + } + + pub struct RawStructUnexpandedFieldsIterator<'top, D: LazyDecoder> { + context: EncodingContext<'top>, + raw_fields: as LazyRawStruct<'top, D>>::Iterator, + } + + impl<'top, D: LazyDecoder> Iterator for RawStructUnexpandedFieldsIterator<'top, D> { + type Item = IonResult>; + + fn next(&mut self) -> Option { + let field: LazyRawFieldExpr<'top, D> = match self.raw_fields.next() { + Some(Ok(field)) => field, + Some(Err(e)) => return Some(Err(e)), + None => return None, + }; + use LazyRawFieldExpr::*; + let unexpanded_field = match field { + NameValue(name, value) => UnexpandedField::RawNameValue(self.context, name, value), + NameEExp(name, eexp) => UnexpandedField::RawNameEExp(self.context, name, eexp), + EExp(eexp) => UnexpandedField::RawEExp(self.context, eexp), + }; + Some(Ok(unexpanded_field)) + } + } + + impl<'top, D: LazyDecoder = S>, S> LazyRawStructPrivate<'top, D> for S + where + S: LazyRawStruct<'top, D>, + { + fn unexpanded_fields( + &self, + context: EncodingContext<'top>, + ) -> RawStructUnexpandedFieldsIterator<'top, D> { + let raw_fields = >::iter(self); + RawStructUnexpandedFieldsIterator { + context, + raw_fields, + } + } } } @@ -204,30 +355,35 @@ pub trait LazyRawReader<'data, D: LazyDecoder>: Sized { fn position(&self) -> usize; } +pub trait LazyRawContainer<'top, D: LazyDecoder> { + fn as_value(&self) -> D::Value<'top>; +} + pub trait LazyRawValue<'top, D: LazyDecoder>: - private::LazyRawValuePrivate<'top> + Copy + Clone + Debug + HasSpan<'top> + RawValueLiteral + Copy + Clone + Debug + Sized { fn ion_type(&self) -> IonType; fn is_null(&self) -> bool; fn annotations(&self) -> D::AnnotationsIterator<'top>; fn read(&self) -> IonResult>; - - fn range(&self) -> Range; - fn span(&self) -> &[u8]; } pub trait LazyRawSequence<'top, D: LazyDecoder>: - private::LazyContainerPrivate<'top, D> + Debug + Copy + Clone + LazyRawContainer<'top, D> + private::LazyContainerPrivate<'top, D> + Debug + Copy + Clone { type Iterator: Iterator>>; fn annotations(&self) -> D::AnnotationsIterator<'top>; fn ion_type(&self) -> IonType; fn iter(&self) -> Self::Iterator; - fn as_value(&self) -> D::Value<'top>; } pub trait LazyRawStruct<'top, D: LazyDecoder>: - private::LazyContainerPrivate<'top, D> + Debug + Copy + Clone + LazyRawContainer<'top, D> + + private::LazyContainerPrivate<'top, D> + + private::LazyRawStructPrivate<'top, D> + + Debug + + Copy + + Clone { type Iterator: Iterator>>; @@ -236,9 +392,6 @@ pub trait LazyRawStruct<'top, D: LazyDecoder>: fn iter(&self) -> Self::Iterator; } -pub trait LazyRawField<'top, D: LazyDecoder>: - private::LazyRawFieldPrivate<'top, D> + Debug -{ - fn name(&self) -> RawSymbolTokenRef<'top>; - fn value(&self) -> D::Value<'top>; +pub trait LazyRawFieldName<'top>: HasSpan<'top> + Copy + Debug + Clone { + fn read(&self) -> IonResult>; } diff --git a/src/lazy/encoder/annotation_seq.rs b/src/lazy/encoder/annotation_seq.rs index 4d5870b3..34bbde7f 100644 --- a/src/lazy/encoder/annotation_seq.rs +++ b/src/lazy/encoder/annotation_seq.rs @@ -1,6 +1,7 @@ -use crate::{RawSymbolTokenRef, SymbolId}; use smallvec::SmallVec; +use crate::{RawSymbolTokenRef, SymbolId}; + /// A sequence of annotations. /// /// When the sequence is two or fewer annotations, it will not require a heap allocation. @@ -17,7 +18,7 @@ impl<'a> AnnotationSeq<'a> for &'a str { /// Converts the value into an `AnnotationsVec`. fn into_annotations_vec(self) -> AnnotationsVec<'a> { let mut vec = AnnotationsVec::new(); - vec.push(RawSymbolTokenRef::Text(self.into())); + vec.push(RawSymbolTokenRef::Text(self)); vec } } @@ -25,7 +26,7 @@ impl<'a> AnnotationSeq<'a> for &'a str { impl<'a> AnnotationSeq<'a> for &'a &str { fn into_annotations_vec(self) -> AnnotationsVec<'a> { let mut vec = AnnotationsVec::new(); - vec.push(RawSymbolTokenRef::Text((*self).into())); + vec.push(RawSymbolTokenRef::Text(self)); vec } } diff --git a/src/lazy/encoder/binary/v1_0/container_writers.rs b/src/lazy/encoder/binary/v1_0/container_writers.rs index d644105f..5a6733e9 100644 --- a/src/lazy/encoder/binary/v1_0/container_writers.rs +++ b/src/lazy/encoder/binary/v1_0/container_writers.rs @@ -57,7 +57,7 @@ impl<'value, 'top> BinaryContainerWriter_1_0<'value, 'top> { RawSymbolTokenRef::Text(text) => { return cold_path! { IonResult::encoding_error( - format!("binary Ion 1.0 does not support text annotation literals (received '{}')", text.as_ref()) + format!("binary Ion 1.0 does not support text annotation literals (received '{}')", text) ) }; } diff --git a/src/lazy/encoder/binary/v1_1/value_writer.rs b/src/lazy/encoder/binary/v1_1/value_writer.rs index b66aad2d..3b07b6d8 100644 --- a/src/lazy/encoder/binary/v1_1/value_writer.rs +++ b/src/lazy/encoder/binary/v1_1/value_writer.rs @@ -844,7 +844,7 @@ mod tests { use crate::lazy::encoder::write_as_ion::{WriteAsIon, WriteAsSExp}; use crate::raw_symbol_token_ref::AsRawSymbolTokenRef; use crate::{ - Decimal, Element, Int, IonResult, IonType, Null, RawSymbolToken, SymbolId, Timestamp, + Decimal, Element, Int, IonResult, IonType, Null, RawSymbolTokenRef, SymbolId, Timestamp, }; fn encoding_test( @@ -2336,7 +2336,7 @@ mod tests { } /// A list of field name/value pairs that will be serialized as a struct in each test. - type TestStruct<'a> = &'a [(RawSymbolToken, Element)]; + type TestStruct<'a> = &'a [(RawSymbolTokenRef<'a>, Element)]; impl<'a> WriteAsIon for TestStruct<'a> { fn write_as_ion(&self, writer: V) -> IonResult<()> { let mut struct_writer = writer.struct_writer()?; @@ -2348,7 +2348,10 @@ mod tests { } /// Constructs a field name/value pair out of a symbol token and a value written as Ion text. - fn field(name: impl Into, value: &str) -> (RawSymbolToken, Element) { + fn field<'a>( + name: impl Into>, + value: &'a str, + ) -> (RawSymbolTokenRef<'a>, Element) { ( name.into(), Element::read_one(value).expect("failed to read field value"), @@ -2705,8 +2708,8 @@ mod tests { case( 0.annotated_with([ - RawSymbolToken::SymbolId(4), - RawSymbolToken::Text("foo".into()), + RawSymbolTokenRef::SymbolId(4), + RawSymbolTokenRef::Text("foo"), ]), &[ 0xE8, // Two FlexSym annotations follow @@ -2718,8 +2721,8 @@ mod tests { )?; case( 0.annotated_with([ - RawSymbolToken::Text("foo".into()), - RawSymbolToken::SymbolId(4), + RawSymbolTokenRef::Text("foo"), + RawSymbolTokenRef::SymbolId(4), ]), &[ 0xE8, // Two FlexSym annotations follow @@ -2731,9 +2734,9 @@ mod tests { )?; case( 0.annotated_with([ - RawSymbolToken::Text("foo".into()), - RawSymbolToken::SymbolId(4), - RawSymbolToken::Text("baz".into()), + RawSymbolTokenRef::Text("foo"), + RawSymbolTokenRef::SymbolId(4), + RawSymbolTokenRef::Text("baz"), ]), &[ 0xE9, // A FlexUInt follows that indicates the byte length of the FlexSym annotations sequence @@ -2748,9 +2751,9 @@ mod tests { )?; case( 0.annotated_with([ - RawSymbolToken::SymbolId(4), - RawSymbolToken::Text("foo".into()), - RawSymbolToken::SymbolId(5), + RawSymbolTokenRef::SymbolId(4), + RawSymbolTokenRef::Text("foo"), + RawSymbolTokenRef::SymbolId(5), ]), &[ 0xE9, // A FlexUInt follows that indicates the byte length of the FlexSym annotations sequence @@ -2765,7 +2768,7 @@ mod tests { // === Special cases: "" and $0 === case( - 0.annotated_with([RawSymbolToken::Text("".into()), RawSymbolToken::SymbolId(0)]), + 0.annotated_with([RawSymbolTokenRef::Text(""), RawSymbolTokenRef::SymbolId(0)]), &[ 0xE8, // Two FlexSym annotations follow 0x01, // Opcode follows diff --git a/src/lazy/encoder/text/v1_0/value_writer.rs b/src/lazy/encoder/text/v1_0/value_writer.rs index beded852..1b7fb9e3 100644 --- a/src/lazy/encoder/text/v1_0/value_writer.rs +++ b/src/lazy/encoder/text/v1_0/value_writer.rs @@ -10,7 +10,7 @@ use crate::lazy::never::Never; use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::raw_symbol_token_ref::AsRawSymbolTokenRef; use crate::result::IonFailure; -use crate::text::text_formatter::IonValueFormatter; +use crate::text::text_formatter::{IoFmtShim, IonValueFormatter}; use crate::text::whitespace_config::WhitespaceConfig; use crate::types::{ContainerType, ParentType}; use crate::{Decimal, Int, IonResult, IonType, RawSymbolTokenRef, Timestamp}; @@ -28,67 +28,13 @@ pub struct TextValueWriter_1_0<'value, W: Write + 'value> { pub(crate) parent_type: ParentType, } -/// Returns `true` if the provided `token`'s text is an 'identifier'. That is, the text starts -/// with a `$`, `_` or ASCII letter and is followed by a sequence of `$`, `_`, or ASCII letters -/// and numbers. Examples: -/// * `firstName` -/// * `first_name` -/// * `name_1` -/// * `$name` -/// Unlike other symbols, identifiers don't have to be wrapped in quotes. -fn token_is_identifier(token: &str) -> bool { - if token.is_empty() { - return false; - } - let mut chars = token.chars(); - let first = chars.next().unwrap(); - (first == '$' || first == '_' || first.is_ascii_alphabetic()) - && chars.all(|c| c == '$' || c == '_' || c.is_ascii_alphanumeric()) -} - -/// Returns `true` if the provided text is an Ion keyword. Keywords like `true` or `null` -/// resemble identifiers, but writers must wrap them in quotes when using them as symbol text. -fn token_is_keyword(token: &str) -> bool { - const KEYWORDS: &[&str] = &["true", "false", "nan", "null"]; - KEYWORDS.contains(&token) -} - -/// Returns `true` if this token's text resembles a symbol ID literal. For example: `'$99'` is a -/// symbol with the text `$99`. However, `$99` (without quotes) is a symbol ID that maps to -/// different text. -fn token_resembles_symbol_id(token: &str) -> bool { - if token.is_empty() { - return false; - } - let mut chars = token.chars(); - let first = chars.next().unwrap(); - first == '$' && chars.all(|c| c.is_numeric()) -} - pub(crate) fn write_symbol_token( output: &mut O, token: A, ) -> IonResult<()> { - match token.as_raw_symbol_token_ref() { - RawSymbolTokenRef::SymbolId(sid) => write!(output, "${sid}")?, - RawSymbolTokenRef::Text(text) - if token_is_keyword(text.as_ref()) || token_resembles_symbol_id(text.as_ref()) => - { - // Write the symbol text in single quotes - write!(output, "'{text}'")?; - } - RawSymbolTokenRef::Text(text) if token_is_identifier(text.as_ref()) => { - // Write the symbol text without quotes - write!(output, "{text}")? - } - RawSymbolTokenRef::Text(text) => { - // Write the symbol text using quotes and escaping any characters that require it. - write!(output, "\'")?; - write_escaped_text_body(output, text)?; - write!(output, "\'")?; - } - }; - Ok(()) + let mut io_shim = IoFmtShim::new(output); + let _ = io_shim.value_formatter().format_symbol_token(token); + io_shim.into_result() } /// Writes the body (i.e. no start or end delimiters) of a string or symbol with any illegal @@ -97,37 +43,9 @@ pub(crate) fn write_escaped_text_body>( output: &mut O, value: S, ) -> IonResult<()> { - let mut start = 0usize; - let text = value.as_ref(); - for (byte_index, character) in text.char_indices() { - let escaped = match character { - '\n' => r"\n", - '\r' => r"\r", - '\t' => r"\t", - '\\' => r"\\", - '/' => r"\/", - '"' => r#"\""#, - '\'' => r"\'", - '?' => r"\?", - '\x00' => r"\0", // NUL - '\x07' => r"\a", // alert BEL - '\x08' => r"\b", // backspace - '\x0B' => r"\v", // vertical tab - '\x0C' => r"\f", // form feed - _ => { - // Other characters can be left as-is - continue; - } - }; - // If we reach this point, the current character needed to be escaped. - // Write all of the text leading up to this character to output, then the escaped - // version of this character. - write!(output, "{}{}", &text[start..byte_index], escaped)?; - // Update `start` to point to the first byte after the end of this character. - start = byte_index + character.len_utf8(); - } - write!(output, "{}", &text[start..])?; - Ok(()) + let mut io_shim = IoFmtShim::new(output); + let _ = io_shim.value_formatter().format_escaped_text_body(value); + io_shim.into_result() } impl<'value, W: Write + 'value> TextValueWriter_1_0<'value, W> { @@ -198,7 +116,7 @@ impl<'value, W: Write> TextAnnotatedValueWriter_1_0<'value, W> { for annotation in self.annotations { match annotation.as_raw_symbol_token_ref() { RawSymbolTokenRef::Text(token) => { - write_symbol_token(output, token.as_ref())?; + write_symbol_token(output, token)?; write!(output, "::") } RawSymbolTokenRef::SymbolId(sid) => write!(output, "${sid}::"), @@ -572,6 +490,7 @@ impl<'value, W: Write> ValueWriter for TextValueWriter_1_0<'value, W> { fn write_null(mut self, ion_type: IonType) -> IonResult<()> { use crate::IonType::*; self.write_indentation()?; + let null_text = match ion_type { Null => "null", Bool => "null.bool", diff --git a/src/lazy/encoder/text/v1_1/writer.rs b/src/lazy/encoder/text/v1_1/writer.rs index f81445f7..a2318b46 100644 --- a/src/lazy/encoder/text/v1_1/writer.rs +++ b/src/lazy/encoder/text/v1_1/writer.rs @@ -210,7 +210,7 @@ mod tests { #[test] fn write_struct() -> IonResult<()> { let mut writer = LazyRawTextWriter_1_1::new(vec![])?; - let empty_field_list: [(String, i64); 0] = []; + let empty_field_list: [(&str, i64); 0] = []; writer .write_struct(empty_field_list)? .write_struct([("foo", 1)])? @@ -261,7 +261,7 @@ mod tests { let mut reader = LazyRawTextReader_1_1::new(encoded_text.as_bytes()); let bump = bumpalo::Bump::new(); - let (_major, _minor) = reader.next(&bump)?.expect_ivm()?; + let _marker = reader.next(&bump)?.expect_ivm()?; let eexp = reader.next(&bump)?.expect_macro_invocation()?; assert_eq!(MacroIdRef::LocalName("foo"), eexp.id()); let mut args = eexp.raw_arguments(); @@ -308,7 +308,7 @@ mod tests { .expect_value()? .read()? .expect_symbol()?; - assert_eq!(symbol_arg, RawSymbolTokenRef::Text("+++".into())); + assert_eq!(symbol_arg, RawSymbolTokenRef::Text("+++")); Ok(()) } diff --git a/src/lazy/encoder/write_as_ion.rs b/src/lazy/encoder/write_as_ion.rs index eb636e05..687efa52 100644 --- a/src/lazy/encoder/write_as_ion.rs +++ b/src/lazy/encoder/write_as_ion.rs @@ -16,10 +16,14 @@ //! provided by the [`Annotate`](crate::lazy::encoder::annotate::Annotatable) trait. use std::marker::PhantomData; -use crate::lazy::encoder::value_writer::ValueWriter; +use crate::lazy::decoder::LazyDecoder; +use crate::lazy::encoder::annotation_seq::AnnotationsVec; +use crate::lazy::encoder::value_writer::{SequenceWriter, StructWriter, ValueWriter}; +use crate::lazy::value::LazyValue; +use crate::lazy::value_ref::ValueRef; use crate::{ - Blob, Clob, Decimal, Element, Int, IonResult, IonType, Null, RawSymbolToken, RawSymbolTokenRef, - Symbol, SymbolRef, Timestamp, Value, + Blob, Clob, Decimal, Element, Int, IonResult, IonType, Null, RawSymbolTokenRef, Symbol, + SymbolRef, Timestamp, Value, }; /// Defines how a Rust type should be serialized as Ion in terms of the methods available @@ -84,10 +88,10 @@ impl_write_as_ion_value!( usize => write_int with self as &Int::from(*self), f32 => write_f32 with self as *self, f64 => write_f64 with self as *self, + Int => write_int, Decimal => write_decimal, Timestamp => write_timestamp, Symbol => write_symbol, - RawSymbolToken => write_symbol, &str => write_string, String => write_string, &[u8] => write_blob, @@ -207,20 +211,72 @@ impl_write_as_ion_value_for_sexp_type_hint!([T; N], T, const N: usize); impl WriteAsIon for Value { fn write_as_ion(&self, value_writer: V) -> IonResult<()> { + use Value::*; match self { - Value::Null(i) => value_writer.write_null(*i), - Value::Bool(b) => value_writer.write_bool(*b), - Value::Int(i) => value_writer.write_int(i), - Value::Float(f) => value_writer.write_f64(*f), - Value::Decimal(d) => value_writer.write_decimal(d), - Value::Timestamp(t) => value_writer.write_timestamp(t), - Value::Symbol(s) => value_writer.write_symbol(s), - Value::String(s) => value_writer.write_string(s), - Value::Clob(c) => value_writer.write_clob(c), - Value::Blob(b) => value_writer.write_blob(b), - Value::List(l) => value_writer.write_list(l), - Value::SExp(s) => value_writer.write_sexp(s), - Value::Struct(s) => value_writer.write_struct(s.iter()), + Null(i) => value_writer.write_null(*i), + Bool(b) => value_writer.write_bool(*b), + Int(i) => value_writer.write_int(i), + Float(f) => value_writer.write_f64(*f), + Decimal(d) => value_writer.write_decimal(d), + Timestamp(t) => value_writer.write_timestamp(t), + Symbol(s) => value_writer.write_symbol(s), + String(s) => value_writer.write_string(s), + Clob(c) => value_writer.write_clob(c), + Blob(b) => value_writer.write_blob(b), + List(l) => value_writer.write_list(l), + SExp(s) => value_writer.write_sexp(s), + Struct(s) => value_writer.write_struct(s.iter()), + } + } +} + +impl<'a, D: LazyDecoder> WriteAsIon for LazyValue<'a, D> { + fn write_as_ion(&self, writer: V) -> IonResult<()> { + let mut annotations = AnnotationsVec::new(); + for annotation in self.annotations() { + annotations.push(annotation?.into()); + } + self.read()? + .write_as_ion(writer.with_annotations(annotations)?) + } +} + +impl<'a, D: LazyDecoder> WriteAsIon for ValueRef<'a, D> { + fn write_as_ion(&self, value_writer: V) -> IonResult<()> { + use ValueRef::*; + match self { + Null(i) => value_writer.write_null(*i), + Bool(b) => value_writer.write_bool(*b), + Int(i) => value_writer.write_int(i), + Float(f) => value_writer.write_f64(*f), + Decimal(d) => value_writer.write_decimal(d), + Timestamp(t) => value_writer.write_timestamp(t), + Symbol(s) => value_writer.write_symbol(s), + String(s) => value_writer.write_string(s.text()), + Clob(c) => value_writer.write_clob(c.as_ref()), + Blob(b) => value_writer.write_blob(b.as_ref()), + List(l) => { + let mut list = value_writer.list_writer()?; + for value in l { + list.write(value?.read()?)?; + } + list.close() + } + SExp(s) => { + let mut sexp = value_writer.list_writer()?; + for value in s { + sexp.write(value?.read()?)?; + } + sexp.close() + } + Struct(s) => { + let mut struct_ = value_writer.struct_writer()?; + for field_result in s { + let field = field_result?; + struct_.write(field.name()?, field.value().read()?)?; + } + struct_.close() + } } } } diff --git a/src/lazy/encoder/writer.rs b/src/lazy/encoder/writer.rs index 81cdf05f..b1f3e47a 100644 --- a/src/lazy/encoder/writer.rs +++ b/src/lazy/encoder/writer.rs @@ -79,6 +79,14 @@ impl ApplicationWriter { Ok(writer) } + pub fn output(&self) -> &Output { + &self.output + } + + pub fn output_mut(&mut self) -> &mut Output { + &mut self.output + } + /// Writes bytes of previously encoded values to the output stream. pub fn flush(&mut self) -> IonResult<()> { if self.encoding_context.num_pending_symbols > 0 { @@ -207,13 +215,13 @@ impl<'value, V: ValueWriter> AnnotatableWriter for ApplicationValueWriter<'value RawSymbolTokenRef::SymbolId(sid) => sid, // The token is text... RawSymbolTokenRef::Text(text) => { - if let Some(sid) = self.symbol_table().sid_for(&text.as_ref()) { + if let Some(sid) = self.symbol_table().sid_for(&text) { //...that was already in the symbol table. sid } else { // ...that we need to add to the symbol table. self.encoding.num_pending_symbols += 1; - self.symbol_table().add_symbol(text.as_ref()) + self.symbol_table().add_symbol(text) } } }; @@ -268,11 +276,11 @@ impl<'value, V: ValueWriter> ValueWriter for ApplicationValueWriter<'value, V> { if self.encoding.supports_text_tokens && self.encoding.symbol_creation_policy == SymbolCreationPolicy::WriteProvidedToken { - return self.raw_value_writer.write_symbol(text.as_ref()); + return self.raw_value_writer.write_symbol(text); } // Otherwise, see if the symbol is already in the symbol table. - let symbol_id = match self.symbol_table().sid_for(&text.as_ref()) { + let symbol_id = match self.symbol_table().sid_for(&text) { // If so, use the existing ID. Some(sid) => sid, // If not, add it to the symbol table and make a note to add it to the LST on the next @@ -363,11 +371,11 @@ impl<'value, V: ValueWriter> FieldEncoder for ApplicationStructWriter<'value, V> if self.encoding.supports_text_tokens && self.encoding.symbol_creation_policy == SymbolCreationPolicy::WriteProvidedToken { - return self.raw_struct_writer.encode_field_name(text.as_ref()); + return self.raw_struct_writer.encode_field_name(text); } // Otherwise, see if the symbol is already in the symbol table. - let symbol_id = match self.encoding.symbol_table.sid_for(&text.as_ref()) { + let symbol_id = match self.encoding.symbol_table.sid_for(&text) { // If so, use the existing ID. Some(sid) => sid, // If not, add it to the symbol table and make a note to add it to the LST on the next diff --git a/src/lazy/encoding.rs b/src/lazy/encoding.rs index 973ef82e..c7145267 100644 --- a/src/lazy/encoding.rs +++ b/src/lazy/encoding.rs @@ -4,30 +4,32 @@ use std::fmt::Debug; use crate::lazy::any_encoding::LazyRawAnyValue; use crate::lazy::binary::raw::annotations_iterator::RawBinaryAnnotationsIterator; -use crate::lazy::binary::raw::r#struct::LazyRawBinaryStruct_1_0; +use crate::lazy::binary::raw::r#struct::{LazyRawBinaryFieldName_1_0, LazyRawBinaryStruct_1_0}; use crate::lazy::binary::raw::reader::LazyRawBinaryReader_1_0; use crate::lazy::binary::raw::sequence::{LazyRawBinaryList_1_0, LazyRawBinarySExp_1_0}; +use crate::lazy::binary::raw::v1_1::r#struct::LazyRawBinaryFieldName_1_1; use crate::lazy::binary::raw::v1_1::reader::LazyRawBinaryReader_1_1; +use crate::lazy::binary::raw::v1_1::value::LazyRawBinaryVersionMarker_1_1; use crate::lazy::binary::raw::v1_1::{ r#struct::LazyRawBinaryStruct_1_1, sequence::{LazyRawBinaryList_1_1, LazyRawBinarySExp_1_1}, value::LazyRawBinaryValue_1_1, RawBinaryAnnotationsIterator_1_1, }; -use crate::lazy::binary::raw::value::LazyRawBinaryValue_1_0; +use crate::lazy::binary::raw::value::{LazyRawBinaryValue_1_0, LazyRawBinaryVersionMarker_1_0}; use crate::lazy::decoder::LazyDecoder; use crate::lazy::encoder::LazyEncoder; use crate::lazy::never::Never; -use crate::lazy::text::raw::r#struct::LazyRawTextStruct_1_0; +use crate::lazy::text::raw::r#struct::{LazyRawTextFieldName_1_0, LazyRawTextStruct_1_0}; use crate::lazy::text::raw::reader::LazyRawTextReader_1_0; use crate::lazy::text::raw::sequence::{LazyRawTextList_1_0, LazyRawTextSExp_1_0}; use crate::lazy::text::raw::v1_1::reader::{ - LazyRawTextList_1_1, LazyRawTextReader_1_1, LazyRawTextSExp_1_1, LazyRawTextStruct_1_1, - RawTextEExpression_1_1, + LazyRawTextFieldName_1_1, LazyRawTextList_1_1, LazyRawTextReader_1_1, LazyRawTextSExp_1_1, + LazyRawTextStruct_1_1, RawTextEExpression_1_1, }; use crate::lazy::text::value::{ - LazyRawTextValue, LazyRawTextValue_1_0, LazyRawTextValue_1_1, MatchedRawTextValue, - RawTextAnnotationsIterator, + LazyRawTextValue, LazyRawTextValue_1_0, LazyRawTextValue_1_1, LazyRawTextVersionMarker_1_0, + LazyRawTextVersionMarker_1_1, MatchedRawTextValue, RawTextAnnotationsIterator, }; use crate::{TextKind, WriteConfig}; @@ -130,9 +132,11 @@ impl LazyDecoder for BinaryEncoding_1_0 { type SExp<'top> = LazyRawBinarySExp_1_0<'top>; type List<'top> = LazyRawBinaryList_1_0<'top>; type Struct<'top> = LazyRawBinaryStruct_1_0<'top>; + type FieldName<'top> = LazyRawBinaryFieldName_1_0<'top>; type AnnotationsIterator<'top> = RawBinaryAnnotationsIterator<'top>; // Macros are not supported in Ion 1.0 - type EExpression<'top> = Never; + type EExp<'top> = Never; + type VersionMarker<'top> = LazyRawBinaryVersionMarker_1_0<'top>; } impl LazyDecoder for TextEncoding_1_0 { @@ -142,9 +146,11 @@ impl LazyDecoder for TextEncoding_1_0 { type SExp<'top> = LazyRawTextSExp_1_0<'top>; type List<'top> = LazyRawTextList_1_0<'top>; type Struct<'top> = LazyRawTextStruct_1_0<'top>; + type FieldName<'top> = LazyRawTextFieldName_1_0<'top>; type AnnotationsIterator<'top> = RawTextAnnotationsIterator<'top>; // Macros are not supported in Ion 1.0 - type EExpression<'top> = Never; + type EExp<'top> = Never; + type VersionMarker<'top> = LazyRawTextVersionMarker_1_0<'top>; } impl LazyDecoder for TextEncoding_1_1 { @@ -154,8 +160,10 @@ impl LazyDecoder for TextEncoding_1_1 { type SExp<'top> = LazyRawTextSExp_1_1<'top>; type List<'top> = LazyRawTextList_1_1<'top>; type Struct<'top> = LazyRawTextStruct_1_1<'top>; + type FieldName<'top> = LazyRawTextFieldName_1_1<'top>; type AnnotationsIterator<'top> = RawTextAnnotationsIterator<'top>; - type EExpression<'top> = RawTextEExpression_1_1<'top>; + type EExp<'top> = RawTextEExpression_1_1<'top>; + type VersionMarker<'top> = LazyRawTextVersionMarker_1_1<'top>; } impl LazyDecoder for BinaryEncoding_1_1 { @@ -164,10 +172,12 @@ impl LazyDecoder for BinaryEncoding_1_1 { type Value<'top> = LazyRawBinaryValue_1_1<'top>; type SExp<'top> = LazyRawBinarySExp_1_1<'top>; type List<'top> = LazyRawBinaryList_1_1<'top>; + type FieldName<'top> = LazyRawBinaryFieldName_1_1<'top>; type Struct<'top> = LazyRawBinaryStruct_1_1<'top>; type AnnotationsIterator<'top> = RawBinaryAnnotationsIterator_1_1<'top>; // TODO: implement macros in 1.1 - type EExpression<'top> = Never; + type EExp<'top> = Never; + type VersionMarker<'top> = LazyRawBinaryVersionMarker_1_1<'top>; } /// Marker trait for types that represent value literals in an Ion stream of some encoding. diff --git a/src/lazy/expanded/e_expression.rs b/src/lazy/expanded/e_expression.rs index 62d4053e..d3235696 100644 --- a/src/lazy/expanded/e_expression.rs +++ b/src/lazy/expanded/e_expression.rs @@ -14,12 +14,12 @@ use std::fmt::{Debug, Formatter}; #[derive(Copy, Clone)] pub struct EExpression<'top, D: LazyDecoder> { pub(crate) context: EncodingContext<'top>, - pub(crate) raw_invocation: D::EExpression<'top>, + pub(crate) raw_invocation: D::EExp<'top>, pub(crate) invoked_macro: MacroRef<'top>, } impl<'top, D: LazyDecoder> EExpression<'top, D> { - pub fn raw_invocation(&self) -> D::EExpression<'top> { + pub fn raw_invocation(&self) -> D::EExp<'top> { self.raw_invocation } pub fn invoked_macro(&self) -> MacroRef<'top> { @@ -36,7 +36,7 @@ impl<'top, D: LazyDecoder> Debug for EExpression<'top, D> { impl<'top, D: LazyDecoder> EExpression<'top, D> { pub fn new( context: EncodingContext<'top>, - raw_invocation: D::EExpression<'top>, + raw_invocation: D::EExp<'top>, invoked_macro: MacroRef<'top>, ) -> Self { Self { @@ -68,7 +68,7 @@ impl<'top, D: LazyDecoder> From> for MacroExpr<'top, D> { pub struct EExpressionArgsIterator<'top, D: LazyDecoder> { context: EncodingContext<'top>, - raw_args: as RawEExpression<'top, D>>::RawArgumentsIterator<'top>, + raw_args: as RawEExpression<'top, D>>::RawArgumentsIterator<'top>, } impl<'top, D: LazyDecoder> Iterator for EExpressionArgsIterator<'top, D> { @@ -82,7 +82,7 @@ impl<'top, D: LazyDecoder> Iterator for EExpressionArgsIterator<'top, D> { let expr = match raw_arg { LazyRawValueExpr::::ValueLiteral(value) => { - ValueExpr::ValueLiteral(LazyExpandedValue::from_value(self.context, value)) + ValueExpr::ValueLiteral(LazyExpandedValue::from_literal(self.context, value)) } LazyRawValueExpr::::MacroInvocation(raw_invocation) => { let invocation = match raw_invocation.resolve(self.context) { diff --git a/src/lazy/expanded/macro_evaluator.rs b/src/lazy/expanded/macro_evaluator.rs index 1e8d1acd..1da20e1b 100644 --- a/src/lazy/expanded/macro_evaluator.rs +++ b/src/lazy/expanded/macro_evaluator.rs @@ -16,7 +16,7 @@ use std::fmt::{Debug, Formatter}; use bumpalo::collections::{String as BumpString, Vec as BumpVec}; -use crate::lazy::decoder::{LazyDecoder, LazyRawValueExpr}; +use crate::lazy::decoder::{HasSpan, LazyDecoder, LazyRawValueExpr}; use crate::lazy::expanded::e_expression::{EExpression, EExpressionArgsIterator}; use crate::lazy::expanded::macro_table::{MacroKind, MacroRef}; use crate::lazy::expanded::sequence::Environment; @@ -33,8 +33,8 @@ use crate::{IonError, IonResult, RawSymbolTokenRef}; /// The syntactic entity in format `D` that represents an e-expression. This expression has not /// yet been resolved in the current encoding context. -pub trait RawEExpression<'top, D: LazyDecoder = Self>>: - Debug + Copy + Clone +pub trait RawEExpression<'top, D: LazyDecoder = Self>>: + HasSpan<'top> + Debug + Copy + Clone { /// An iterator that yields the macro invocation's arguments in order. type RawArgumentsIterator<'a>: Iterator>> @@ -144,9 +144,7 @@ impl<'top, D: LazyDecoder> ArgExpr<'top, D> { ) -> IonResult> { match self { ArgExpr::ValueLiteral(value) => Ok(ValueExpr::ValueLiteral(*value)), - ArgExpr::Variable(variable) => environment - .get_expected(variable.signature_index()) - .copied(), + ArgExpr::Variable(variable) => environment.get_expected(variable.signature_index()), ArgExpr::MacroInvocation(invocation) => Ok(ValueExpr::MacroInvocation(*invocation)), } } @@ -597,6 +595,7 @@ impl<'top, D: LazyDecoder> MakeStringExpansion<'top, D> { Ok(Some(ValueExpr::ValueLiteral(LazyExpandedValue { context, source: ExpandedValueSource::Constructed(EMPTY_ANNOTATIONS, expanded_value_ref), + variable: None, }))) } @@ -681,7 +680,7 @@ impl<'top> TemplateExpansion<'top> { )) } TemplateBodyValueExpr::Variable(variable) => { - *environment.get_expected(variable.signature_index())? + environment.get_expected(variable.signature_index())? } TemplateBodyValueExpr::MacroInvocation(raw_invocation) => { let invocation = raw_invocation.resolve(self.template, context); @@ -1115,9 +1114,9 @@ mod tests { e: (:make_string foo bar baz), f: 5, - + // If a macro appears in field name position, it MUST produce a single struct (which - // may be empty). That struct's fields will be merged into the host struct. + // may be empty). That struct's fields will be merged into the host struct. (:values {g: 6, h: 7}), g: 8 diff --git a/src/lazy/expanded/mod.rs b/src/lazy/expanded/mod.rs index a6243074..4e67bb48 100644 --- a/src/lazy/expanded/mod.rs +++ b/src/lazy/expanded/mod.rs @@ -49,7 +49,9 @@ use crate::lazy::expanded::macro_evaluator::{MacroEvaluator, RawEExpression}; use crate::lazy::expanded::macro_table::MacroTable; use crate::lazy::expanded::r#struct::LazyExpandedStruct; use crate::lazy::expanded::sequence::Environment; -use crate::lazy::expanded::template::{TemplateElement, TemplateMacro, TemplateValue}; +use crate::lazy::expanded::template::{ + TemplateElement, TemplateMacro, TemplateMacroRef, TemplateValue, +}; use crate::lazy::r#struct::LazyStruct; use crate::lazy::raw_value_ref::RawValueRef; use crate::lazy::sequence::{LazyList, LazySExp}; @@ -323,7 +325,7 @@ impl LazyExpandingReader IonResult>> { loop { match self.next_item()? { - SystemStreamItem::VersionMarker(_, _) => { + SystemStreamItem::VersionMarker(_marker) => { // TODO: Handle version changes 1.0 <-> 1.1 } SystemStreamItem::SymbolTable(_) => { @@ -331,7 +333,7 @@ impl LazyExpandingReader return Ok(Some(value)), - SystemStreamItem::EndOfStream => return Ok(None), + SystemStreamItem::EndOfStream(_) => return Ok(None), } } } @@ -364,14 +366,13 @@ impl LazyExpandingReader { - return Ok(SystemStreamItem::VersionMarker(major, minor)) - } + VersionMarker(marker) => return Ok(SystemStreamItem::VersionMarker(marker)), // We got our value; return it. Value(raw_value) => { let value = LazyExpandedValue { source: ExpandedValueSource::ValueLiteral(raw_value), context: self.context(), + variable: None, }; return self.interpret_value(value); } @@ -404,7 +405,9 @@ impl LazyExpandingReader return Ok(SystemStreamItem::EndOfStream), + EndOfStream(end_position) => { + return Ok(SystemStreamItem::EndOfStream(end_position)); + } }; } } @@ -477,11 +480,31 @@ impl<'top, V: RawValueLiteral, Encoding: LazyDecoder = V>> From } } +/// A variable found in the body of a template macro. +#[derive(Debug, Copy, Clone)] +pub struct TemplateVariableReference<'top> { + template: TemplateMacroRef<'top>, + signature_index: usize, +} + +impl<'top> TemplateVariableReference<'top> { + fn name(&self) -> &'top str { + self.template.signature.parameters()[self.signature_index].name() + } + + fn host_template(&self) -> TemplateMacroRef<'top> { + self.template + } +} + /// A value produced by expanding the 'raw' view of the input data. #[derive(Copy, Clone)] pub struct LazyExpandedValue<'top, Encoding: LazyDecoder> { pub(crate) context: EncodingContext<'top>, pub(crate) source: ExpandedValueSource<'top, Encoding>, + // If this value came from a variable reference in a template macro expansion, the + // template and the name of the variable can be found here. + pub(crate) variable: Option>, } impl<'top, Encoding: LazyDecoder> Debug for LazyExpandedValue<'top, Encoding> { @@ -491,10 +514,14 @@ impl<'top, Encoding: LazyDecoder> Debug for LazyExpandedValue<'top, Encoding> { } impl<'top, Encoding: LazyDecoder> LazyExpandedValue<'top, Encoding> { - pub(crate) fn from_value(context: EncodingContext<'top>, value: Encoding::Value<'top>) -> Self { + pub(crate) fn from_literal( + context: EncodingContext<'top>, + value: Encoding::Value<'top>, + ) -> Self { Self { context, source: ExpandedValueSource::ValueLiteral(value), + variable: None, } } @@ -506,9 +533,15 @@ impl<'top, Encoding: LazyDecoder> LazyExpandedValue<'top, Encoding> { Self { context, source: ExpandedValueSource::Template(environment, element), + variable: None, } } + pub(crate) fn via_variable(mut self, variable_ref: TemplateVariableReference<'top>) -> Self { + self.variable = Some(variable_ref); + self + } + pub fn ion_type(&self) -> IonType { use ExpandedValueSource::*; match &self.source { @@ -564,6 +597,11 @@ impl<'top, Encoding: LazyDecoder> LazyExpandedValue<'top, Encoding> { pub fn context(&self) -> EncodingContext<'top> { self.context } + + // TODO: Feature gate + pub fn source(&self) -> ExpandedValueSource<'top, Encoding> { + self.source + } } impl<'top, Encoding: LazyDecoder> From> @@ -750,7 +788,7 @@ impl<'top, Encoding: LazyDecoder> ExpandedValueRef<'top, Encoding> { pub fn expect_symbol(self) -> IonResult> { if let ExpandedValueRef::Symbol(s) = self { - Ok(s.clone()) + Ok(s) } else { self.expected("symbol") } diff --git a/src/lazy/expanded/sequence.rs b/src/lazy/expanded/sequence.rs index ca9af687..bb021a73 100644 --- a/src/lazy/expanded/sequence.rs +++ b/src/lazy/expanded/sequence.rs @@ -42,9 +42,10 @@ impl<'top, D: LazyDecoder> Environment<'top, D> { /// Returns the expression for the corresponding signature index -- the variable's offset within /// the template's signature. If the requested index is out of bounds, returns `Err`. - pub fn get_expected(&self, signature_index: usize) -> IonResult<&'top ValueExpr<'top, D>> { + pub fn get_expected(&self, signature_index: usize) -> IonResult> { self.expressions() .get(signature_index) + .copied() // The TemplateCompiler should detect any invalid variable references prior to evaluation .ok_or_else(|| { IonError::decoding_error(format!( @@ -108,6 +109,10 @@ impl<'top, D: LazyDecoder> LazyExpandedList<'top, D> { Self { source, context } } + pub fn source(&self) -> ExpandedListSource<'top, D> { + self.source + } + pub fn ion_type(&self) -> IonType { IonType::List } @@ -208,6 +213,10 @@ pub struct LazyExpandedSExp<'top, D: LazyDecoder> { } impl<'top, D: LazyDecoder> LazyExpandedSExp<'top, D> { + pub fn source(&self) -> ExpandedSExpSource<'top, D> { + self.source + } + pub fn ion_type(&self) -> IonType { IonType::SExp } @@ -329,6 +338,7 @@ fn expand_next_sequence_value<'top, D: LazyDecoder>( return Some(Ok(LazyExpandedValue { source: ExpandedValueSource::ValueLiteral(value), context, + variable: None, })) } Some(Ok(RawValueExpr::MacroInvocation(invocation))) => { diff --git a/src/lazy/expanded/struct.rs b/src/lazy/expanded/struct.rs index 6fdd0069..c9293a82 100644 --- a/src/lazy/expanded/struct.rs +++ b/src/lazy/expanded/struct.rs @@ -1,64 +1,127 @@ use std::ops::ControlFlow; use crate::element::iterators::SymbolsIterator; -use crate::lazy::decoder::{ - LazyDecoder, LazyRawFieldExpr, LazyRawStruct, RawFieldExpr, RawValueExpr, -}; +use crate::lazy::decoder::private::{LazyRawStructPrivate, RawStructUnexpandedFieldsIterator}; +use crate::lazy::decoder::{LazyDecoder, LazyRawFieldName, LazyRawStruct}; use crate::lazy::expanded::macro_evaluator::{ MacroEvaluator, MacroExpr, RawEExpression, ValueExpr, }; use crate::lazy::expanded::sequence::Environment; use crate::lazy::expanded::template::{ - AnnotationsRange, ExprRange, TemplateBodyValueExpr, TemplateElement, TemplateMacroRef, - TemplateStructIndex, TemplateStructRawFieldsIterator, + AnnotationsRange, ExprRange, TemplateBodyValueExpr, TemplateElement, TemplateMacroInvocation, + TemplateMacroRef, TemplateStructIndex, TemplateStructUnexpandedFieldsIterator, }; use crate::lazy::expanded::{ EncodingContext, ExpandedAnnotationsIterator, ExpandedAnnotationsSource, ExpandedValueRef, - ExpandedValueSource, LazyExpandedValue, + ExpandedValueSource, LazyExpandedValue, TemplateVariableReference, }; use crate::result::IonFailure; +use crate::symbol_ref::AsSymbolRef; use crate::{IonError, IonResult, RawSymbolTokenRef, SymbolRef}; -#[derive(Debug, Clone)] +/// A unified type embodying all possible field representations coming from both input data +/// (i.e. raw structs of some encoding) and template bodies. +// LazyRawStruct implementations have a `unexpanded_fields` method that lifts its raw fields into +// `UnexpandedField` instances. Similarly, the `TemplateStructUnexpandedFieldsIterator` turns a +// template's struct body into `UnexpandedField` instances. The `ExpandedStructIterator` unpacks +// and expands the field as part of its iteration process. +#[derive(Debug, Clone, Copy)] +pub enum UnexpandedField<'top, D: LazyDecoder> { + RawNameValue(EncodingContext<'top>, D::FieldName<'top>, D::Value<'top>), + RawNameEExp(EncodingContext<'top>, D::FieldName<'top>, D::EExp<'top>), + RawEExp(EncodingContext<'top>, D::EExp<'top>), + TemplateNameValue(SymbolRef<'top>, TemplateElement<'top>), + TemplateNameMacro(SymbolRef<'top>, TemplateMacroInvocation<'top>), + TemplateNameVariable( + SymbolRef<'top>, + // The variable name and the expression to which it referred. + // The expression may be either a raw value or a template element, so it's represented + // as a `ValueExpr`, which can accommodate both. + (TemplateVariableReference<'top>, ValueExpr<'top, D>), + ), +} + +#[derive(Debug, Clone, Copy)] pub struct LazyExpandedField<'top, D: LazyDecoder> { - name: RawSymbolTokenRef<'top>, - pub(crate) value: LazyExpandedValue<'top, D>, + name: LazyExpandedFieldName<'top, D>, + value: LazyExpandedValue<'top, D>, } +impl<'top, D: LazyDecoder> LazyExpandedField<'top, D> {} + impl<'top, D: LazyDecoder> LazyExpandedField<'top, D> { - pub fn new(name: RawSymbolTokenRef<'top>, value: LazyExpandedValue<'top, D>) -> Self { + pub fn new(name: LazyExpandedFieldName<'top, D>, value: LazyExpandedValue<'top, D>) -> Self { Self { name, value } } - /// Returns the symbol token that was used to encode the field name. This may be either text - /// or a symbol ID whose text resides in the symbol table. - pub fn raw_name(&self) -> RawSymbolTokenRef<'top> { - self.name.clone() + pub fn value(&self) -> LazyExpandedValue<'top, D> { + self.value } - /// Attempts to resolve the field name and return it as a [`SymbolRef`]. - pub fn name(&self) -> IonResult> { - let field_name_token = self.raw_name(); - let field_id = match field_name_token { - RawSymbolTokenRef::SymbolId(sid) => sid, - RawSymbolTokenRef::Text(text) => return Ok(SymbolRef::with_text(text)), - }; - self.value - .context - .symbol_table - .symbol_for(field_id) - .map(|symbol| symbol.into()) - .ok_or_else(|| { - IonError::decoding_error("found a symbol ID that was not in the symbol table") - }) + pub fn name(&self) -> LazyExpandedFieldName<'top, D> { + self.name } +} - pub fn value(&self) -> &LazyExpandedValue<'top, D> { - &self.value +impl<'top, D: LazyDecoder> LazyExpandedField<'top, D> { + fn from_raw_field( + context: EncodingContext<'top>, + name: D::FieldName<'top>, + value: impl Into>, + ) -> Self { + Self { + name: LazyExpandedFieldName::RawName(context, name), + value: value.into(), + } + } + + fn from_template( + template: TemplateMacroRef<'top>, + name: SymbolRef<'top>, + value: impl Into>, + ) -> Self { + Self { + name: LazyExpandedFieldName::TemplateName(template, name), + value: value.into(), + } + } +} + +#[derive(Debug, Clone, Copy)] +pub enum LazyExpandedFieldName<'top, D: LazyDecoder> { + RawName(EncodingContext<'top>, D::FieldName<'top>), + TemplateName(TemplateMacroRef<'top>, SymbolRef<'top>), + // TODO: `Constructed` needed for names in `(make_struct ...)` +} + +impl<'top, D: LazyDecoder> LazyExpandedFieldName<'top, D> { + pub(crate) fn read(&self) -> IonResult> { + match self { + LazyExpandedFieldName::RawName(context, name) => match name.read()? { + RawSymbolTokenRef::Text(text) => Ok(text.into()), + RawSymbolTokenRef::SymbolId(sid) => context + .symbol_table + .symbol_for(sid) + .map(AsSymbolRef::as_symbol_ref) + .ok_or_else(|| { + IonError::decoding_error(format!( + "field name with sid ${sid} has unknown text" + )) + }), + }, + LazyExpandedFieldName::TemplateName(_template_ref, symbol_ref) => Ok(*symbol_ref), + } + } + + pub(crate) fn read_raw(&self) -> IonResult> { + match self { + LazyExpandedFieldName::RawName(_, name) => name.read(), + LazyExpandedFieldName::TemplateName(_, name) => Ok((*name).into()), + } } } -#[derive(Clone)] +#[derive(Copy, Clone)] pub enum ExpandedStructSource<'top, D: LazyDecoder> { ValueLiteral(D::Struct<'top>), Template( @@ -71,12 +134,22 @@ pub enum ExpandedStructSource<'top, D: LazyDecoder> { // TODO: Constructed } -#[derive(Clone)] +#[derive(Copy, Clone)] pub struct LazyExpandedStruct<'top, D: LazyDecoder> { pub(crate) context: EncodingContext<'top>, pub(crate) source: ExpandedStructSource<'top, D>, } +//TODO: Feature gate +impl<'top, D: LazyDecoder> LazyExpandedStruct<'top, D> { + pub fn context(&self) -> EncodingContext<'top> { + self.context + } + pub fn source(&self) -> ExpandedStructSource<'top, D> { + self.source + } +} + impl<'top, D: LazyDecoder> LazyExpandedStruct<'top, D> { pub fn from_literal( context: EncodingContext<'top>, @@ -128,7 +201,7 @@ impl<'top, D: LazyDecoder> LazyExpandedStruct<'top, D> { ExpandedStructSource::ValueLiteral(raw_struct) => { ExpandedStructIteratorSource::ValueLiteral( MacroEvaluator::new(self.context, Environment::empty()), - raw_struct.iter(), + raw_struct.unexpanded_fields(self.context), ) } ExpandedStructSource::Template( @@ -141,7 +214,7 @@ impl<'top, D: LazyDecoder> LazyExpandedStruct<'top, D> { let evaluator = MacroEvaluator::new(self.context, *environment); ExpandedStructIteratorSource::Template( evaluator, - TemplateStructRawFieldsIterator::new( + TemplateStructUnexpandedFieldsIterator::new( self.context, *environment, *template, @@ -175,8 +248,8 @@ impl<'top, D: LazyDecoder> LazyExpandedStruct<'top, D> { ExpandedStructSource::ValueLiteral(_) => { for field_result in self.iter() { let field = field_result?; - if field.name()?.text() == Some(name) { - return Ok(Some(*field.value())); + if field.name().read()?.text() == Some(name) { + return Ok(Some(field.value)); } } // If there is no such field, return None. @@ -206,6 +279,7 @@ impl<'top, D: LazyDecoder> LazyExpandedStruct<'top, D> { *environment, TemplateElement::new(*template, element), ), + variable: None, }; Ok(Some(value)) } @@ -222,7 +296,6 @@ impl<'top, D: LazyDecoder> LazyExpandedStruct<'top, D> { // the first value back. ValueExpr::MacroInvocation(invocation) => { let mut evaluator = MacroEvaluator::new(self.context, *environment); - // TODO: Remove the context parameter from these; it's baked into MacroEvaluator now. evaluator.push(*invocation)?; evaluator.next() } @@ -259,13 +332,13 @@ pub enum ExpandedStructIteratorSource<'top, D: LazyDecoder> { // Giving the struct iterator its own evaluator means that we can abandon the iterator // at any time without impacting the evaluation state of its parent container. MacroEvaluator<'top, D>, - as LazyRawStruct<'top, D>>::Iterator, + RawStructUnexpandedFieldsIterator<'top, D>, ), // The struct we're iterating over is a value in a TDL template. It may contain macro // invocations that need to be evaluated. Template( MacroEvaluator<'top, D>, - TemplateStructRawFieldsIterator<'top, D>, + TemplateStructUnexpandedFieldsIterator<'top, D>, ), // TODO: Constructed } @@ -291,7 +364,7 @@ enum ExpandedStructIteratorState<'top, D: LazyDecoder> { // foo: 3, // This variant holds the field name that will be repeated for every value in the macro's // expansion. - ExpandingValueExpr(RawSymbolTokenRef<'top>), + ExpandingValueExpr(LazyExpandedFieldName<'top, D>), // The iterator is in the process of incrementally inlining a macro found in field name // position that expands to a struct; for example: // (:values {foo: 1, bar: 2}) @@ -320,25 +393,7 @@ impl<'top, D: LazyDecoder> Iterator for ExpandedStructIterator<'top, D> { Self::next_field_from(context, state, tdl_macro_evaluator, template_iterator) } ExpandedStructIteratorSource::ValueLiteral(e_exp_evaluator, raw_struct_iter) => { - let mut iter_adapter = raw_struct_iter.map( - |field: IonResult>| match field? { - RawFieldExpr::NameValuePair(name, RawValueExpr::MacroInvocation(m)) => { - let resolved_invocation = m.resolve(context)?; - Ok(RawFieldExpr::NameValuePair( - name, - RawValueExpr::MacroInvocation(resolved_invocation.into()), - )) - } - RawFieldExpr::NameValuePair(name, RawValueExpr::ValueLiteral(value)) => Ok( - RawFieldExpr::NameValuePair(name, RawValueExpr::ValueLiteral(value)), - ), - RawFieldExpr::MacroInvocation(invocation) => { - let resolved_invocation = invocation.resolve(context)?; - Ok(RawFieldExpr::MacroInvocation(resolved_invocation.into())) - } - }, - ); - Self::next_field_from(context, state, e_exp_evaluator, &mut iter_adapter) + Self::next_field_from(context, state, e_exp_evaluator, raw_struct_iter) } } } @@ -349,7 +404,6 @@ impl<'top, D: LazyDecoder> Iterator for ExpandedStructIterator<'top, D> { // potentially intimidating generics as a result. We'll walk through them as they're introduced. // // 'top: The lifetime associated with the top-level value we're currently reading at some depth. -// 'data: The lifetime associated with the byte array containing the Ion we're reading from. // D: The decoder being used to read the Ion data stream. For example: `TextEncoding_1_1` impl<'top, D: LazyDecoder> ExpandedStructIterator<'top, D> { /// Pulls the next expanded field from the raw source struct. The field returned may correspond @@ -358,16 +412,9 @@ impl<'top, D: LazyDecoder> ExpandedStructIterator<'top, D> { fn next_field_from< // The lifetime of this method invocation. 'a, - // The lifetime of the field name that we return; it needs to live at least as long as - // `top -- the amount of time that the reader will be parked on this top level value. - 'name: 'top, - // We have an iterator (see `I` below) that gives us raw fields from an input struct. - // This type, `V`, is the type of value in that raw field. For example: `LazyRawTextValue_1_1` - // when reading text Ion 1.1, or `&'top Element` when evaluating a TDL macro. - V: Into>, // An iterator over the struct we're expanding. It may be the fields iterator from a // LazyRawStruct, or it could be a `TemplateStructRawFieldsIterator`. - I: Iterator>>>, + I: Iterator>>, >( context: EncodingContext<'top>, state: &'a mut ExpandedStructIteratorState<'top, D>, @@ -417,10 +464,7 @@ impl<'top, D: LazyDecoder> ExpandedStructIterator<'top, D> { Ok(Some(next_value)) => { // We got another value from the macro we're evaluating. Emit // it as another field using the same field_name. - return Some(Ok(LazyExpandedField::new( - field_name.clone(), - next_value, - ))); + return Some(Ok(LazyExpandedField::new(*field_name, next_value))); } Ok(None) => { // The macro in the value position is no longer emitting values. Switch @@ -433,15 +477,9 @@ impl<'top, D: LazyDecoder> ExpandedStructIterator<'top, D> { } } - /// Pulls a single raw field expression from the source iterator and sets `state` according to + /// Pulls a single unexpanded field expression from the source iterator and sets `state` according to /// the expression's kind. - fn next_from_iterator< - // These generics are all carried over from the function above. - 'a, - 'name: 'top, - V: Into>, - I: Iterator>>>, - >( + fn next_from_iterator>>>( context: EncodingContext<'top>, state: &mut ExpandedStructIteratorState<'top, D>, evaluator: &mut MacroEvaluator<'top, D>, @@ -453,41 +491,54 @@ impl<'top, D: LazyDecoder> ExpandedStructIterator<'top, D> { use ControlFlow::*; // If the iterator is empty, we're done. - let field_expr_result = match iter.next() { - Some(result) => result, + let unexpanded_field = match iter.next() { + Some(Ok(field_expr)) => field_expr, + Some(Err(error)) => { + return Break(Some(Err::, IonError>(error))) + } None => return Break(None), }; - return match field_expr_result { - Err(e) => Break(Some(Err::, IonError>(e))), - // Plain (name, value literal) pair. For example: `foo: 1` - Ok(RawFieldExpr::NameValuePair(name, RawValueExpr::ValueLiteral(value))) => { - Break(Some(Ok(LazyExpandedField::new( + use UnexpandedField::*; + match unexpanded_field { + RawNameValue(context, name, value) => { + Break(Some(Ok(LazyExpandedField::from_raw_field( + context, name, - LazyExpandedValue { - context, - source: value.into(), - }, + LazyExpandedValue::from_literal(context, value), )))) } + TemplateNameValue(name, value) => Break(Some(Ok(LazyExpandedField::from_template( + value.template(), + name, + LazyExpandedValue::from_template(context, evaluator.environment(), value), + )))), // (name, macro invocation) pair. For example: `foo: (:bar)` - Ok(RawFieldExpr::NameValuePair(name, RawValueExpr::MacroInvocation(invocation))) => { - if let Err(e) = evaluator.push(invocation) { - return Break(Some(Err(e))); + RawNameEExp(context, raw_name, raw_eexp) => { + let eexp = match raw_eexp.resolve(context) { + Ok(eexp) => eexp, + Err(e) => return Break(Some(Err(e))), }; + if let Err(e) = evaluator.push(eexp) { + return Break(Some(Err(e))); + } + let name = LazyExpandedFieldName::RawName(context, raw_name); *state = ExpandedStructIteratorState::ExpandingValueExpr(name); // We've pushed the macro invocation onto the evaluator's stack, but further evaluation // is needed to get our next field. Continue(()) } - // Macro invocation in field name position. - Ok(RawFieldExpr::MacroInvocation(invocation)) => { + RawEExp(context, eexp) => { + let invocation = match eexp.resolve(context) { + Ok(invocation) => invocation, + Err(e) => return Break(Some(Err(e))), + }; // The next expression from the iterator was a macro. We expect it to expand to a // single struct whose fields will be merged into the one we're iterating over. For example: // {a: 1, (:make_struct b 2 c 3), d: 4} // expands to: // {a: 1, b: 2, c: 3, d: 4} - match Self::begin_inlining_struct_from_macro(state, evaluator, invocation) { + match Self::begin_inlining_struct_from_macro(state, evaluator, invocation.into()) { // If the macro expanded to a struct as expected, continue the evaluation // until we get a field to return. Ok(_) => Continue(()), @@ -495,7 +546,49 @@ impl<'top, D: LazyDecoder> ExpandedStructIterator<'top, D> { Err(e) => Break(Some(Err(e))), } } - }; + TemplateNameMacro(name_symbol, invocation) => { + if let Err(e) = evaluator.push(invocation) { + return Break(Some(Err(e))); + } + let name = + LazyExpandedFieldName::TemplateName(invocation.host_template(), name_symbol); + *state = ExpandedStructIteratorState::ExpandingValueExpr(name); + // We've pushed the macro invocation onto the evaluator's stack, but further evaluation + // is needed to get our next field. + Continue(()) + } + TemplateNameVariable(name_symbol, (variable_ref, value_expr)) => { + use ValueExpr::*; + let name = LazyExpandedFieldName::TemplateName(variable_ref.template, name_symbol); + match value_expr { + ValueLiteral(value) => { + return Break(Some(Ok(LazyExpandedField::from_template( + variable_ref.template, + name_symbol, + value.via_variable(variable_ref), + )))) + } + MacroInvocation(MacroExpr::EExp(eexp)) => { + if let Err(e) = evaluator.push(eexp) { + return Break(Some(Err(e))); + } + *state = ExpandedStructIteratorState::ExpandingValueExpr(name); + // We've pushed the macro invocation onto the evaluator's stack, but further evaluation + // is needed to get our next field. + Continue(()) + } + MacroInvocation(MacroExpr::TemplateMacro(invocation)) => { + if let Err(e) = evaluator.push(invocation) { + return Break(Some(Err(e))); + } + *state = ExpandedStructIteratorState::ExpandingValueExpr(name); + // We've pushed the macro invocation onto the evaluator's stack, but further evaluation + // is needed to get our next field. + Continue(()) + } + } + } + } } /// Pulls the next value from the evaluator, confirms that it's a struct, and then switches diff --git a/src/lazy/expanded/template.rs b/src/lazy/expanded/template.rs index 92fae3e4..f96c9b8b 100644 --- a/src/lazy/expanded/template.rs +++ b/src/lazy/expanded/template.rs @@ -3,16 +3,17 @@ use std::fmt; use std::fmt::{Debug, Formatter}; use std::ops::{Deref, Range}; -use crate::lazy::decoder::{LazyDecoder, RawFieldExpr, RawValueExpr}; +use crate::lazy::decoder::LazyDecoder; use crate::lazy::expanded::macro_evaluator::{MacroEvaluator, MacroExpr, ValueExpr}; use crate::lazy::expanded::macro_table::MacroRef; +use crate::lazy::expanded::r#struct::UnexpandedField; use crate::lazy::expanded::sequence::Environment; use crate::lazy::expanded::{ - EncodingContext, ExpandedValueRef, ExpandedValueSource, LazyExpandedValue, + EncodingContext, ExpandedValueSource, LazyExpandedValue, TemplateVariableReference, }; use crate::lazy::text::raw::v1_1::reader::{MacroAddress, MacroIdRef}; use crate::result::IonFailure; -use crate::{Bytes, Decimal, Int, IonResult, IonType, Str, Symbol, Timestamp, Value}; +use crate::{Bytes, Decimal, Int, IonResult, IonType, Str, Symbol, SymbolRef, Timestamp, Value}; /// A parameter in a user-defined macro's signature. #[derive(Debug, Clone)] @@ -193,6 +194,7 @@ impl<'top, D: LazyDecoder> Iterator for TemplateSequenceIterator<'top, D> { self.evaluator.environment(), TemplateElement::new(self.template, element), ), + variable: None, }; Some(Ok(value)) } @@ -242,10 +244,10 @@ impl<'top, D: LazyDecoder> Iterator for TemplateSequenceIterator<'top, D> { } } -/// An iterator that pulls expressions from a template body and wraps them in a [`RawFieldExpr`] to +/// An iterator that pulls expressions from a template body and wraps them in a [`UnexpandedField`] to /// mimic reading them from input. The [`LazyExpandedStruct`](crate::lazy::expanded::struct) handles /// evaluating any macro invocations that this yields. -pub struct TemplateStructRawFieldsIterator<'top, D: LazyDecoder> { +pub struct TemplateStructUnexpandedFieldsIterator<'top, D: LazyDecoder> { context: EncodingContext<'top>, environment: Environment<'top, D>, template: TemplateMacroRef<'top>, @@ -253,7 +255,7 @@ pub struct TemplateStructRawFieldsIterator<'top, D: LazyDecoder> { index: usize, } -impl<'top, D: LazyDecoder> TemplateStructRawFieldsIterator<'top, D> { +impl<'top, D: LazyDecoder> TemplateStructUnexpandedFieldsIterator<'top, D> { pub fn new( context: EncodingContext<'top>, environment: Environment<'top, D>, @@ -270,8 +272,8 @@ impl<'top, D: LazyDecoder> TemplateStructRawFieldsIterator<'top, D> { } } -impl<'top, D: LazyDecoder> Iterator for TemplateStructRawFieldsIterator<'top, D> { - type Item = IonResult, MacroExpr<'top, D>>>; +impl<'top, D: LazyDecoder> Iterator for TemplateStructUnexpandedFieldsIterator<'top, D> { + type Item = IonResult>; fn next(&mut self) -> Option { let name_expr_address = self.index; @@ -280,30 +282,18 @@ impl<'top, D: LazyDecoder> Iterator for TemplateStructRawFieldsIterator<'top, D> .get(name_expr_address)? .expect_element() .expect("field name must be a literal"); - let name_value = LazyExpandedValue::::from_template( - self.context, - // because the name token must be a literal, the env is irrelevant - Environment::empty(), - TemplateElement::new(self.template, name_element), - ); - let name_token = match name_value.read() { - Ok(ExpandedValueRef::Symbol(token)) => token, - Ok(ExpandedValueRef::String(str_ref)) => str_ref.into(), - Ok(_) => { - return Some(IonResult::decoding_error( - "template struct had a non-text field name", - )) - } - Err(e) => return Some(Err(e)), + let name: SymbolRef = match &name_element.value { + TemplateValue::Symbol(s) => s.into(), + TemplateValue::String(s) => s.text().into(), + _ => unreachable!("template struct field had a non-text field name"), }; let value_expr_address = name_expr_address + 1; - let value_source = match self.expressions.get(value_expr_address) { - None => { - return Some(IonResult::decoding_error( - "template struct had field name with no value", - )) - } - Some(TemplateBodyValueExpr::Element(element)) => { + let value_expr = self + .expressions + .get(value_expr_address) + .expect("template struct had field name with no value"); + let unexpanded_field = match value_expr { + TemplateBodyValueExpr::Element(element) => { match element.value() { TemplateValue::List(range) | TemplateValue::SExp(range) @@ -313,12 +303,12 @@ impl<'top, D: LazyDecoder> Iterator for TemplateStructRawFieldsIterator<'top, D> // accounted for the first expression, so there's nothing else to do here. } }; - RawValueExpr::ValueLiteral(ExpandedValueSource::Template( - self.environment, + UnexpandedField::TemplateNameValue( + name, TemplateElement::new(self.template, element), - )) + ) } - Some(TemplateBodyValueExpr::MacroInvocation(body_invocation)) => { + TemplateBodyValueExpr::MacroInvocation(body_invocation) => { let invoked_macro = self .context .macro_table @@ -335,25 +325,19 @@ impl<'top, D: LazyDecoder> Iterator for TemplateStructRawFieldsIterator<'top, D> .unwrap(), ); self.index += invocation.arg_expressions.len(); - RawValueExpr::MacroInvocation(MacroExpr::TemplateMacro(invocation)) + UnexpandedField::TemplateNameMacro(name, invocation) } - Some(TemplateBodyValueExpr::Variable(variable)) => { - let arg_expr = match self.environment.get_expected(variable.signature_index()) { - Ok(expr) => expr, - Err(e) => return Some(Err(e)), - }; - match arg_expr { - ValueExpr::ValueLiteral(expansion) => { - RawValueExpr::ValueLiteral(expansion.source) - } - ValueExpr::MacroInvocation(invocation) => { - RawValueExpr::MacroInvocation(*invocation) - } - } + TemplateBodyValueExpr::Variable(variable) => { + let arg_expr = self + .environment + .get_expected(variable.signature_index()) + .expect("reference to non-existent parameter"); + let variable_ref = variable.resolve(self.template); + UnexpandedField::TemplateNameVariable(name, (variable_ref, arg_expr)) } }; self.index += 2; - Some(Ok(RawFieldExpr::NameValuePair(name_token, value_source))) + Some(Ok(unexpanded_field)) } } @@ -780,7 +764,7 @@ impl<'top, D: LazyDecoder> Iterator for TemplateMacroInvocationArgsIterator<'top .environment .get_expected(variable_ref.signature_index()) { - Ok(expr) => *expr, + Ok(expr) => expr, Err(e) => return Some(Err(e)), }, TemplateBodyValueExpr::MacroInvocation(body_invocation) => { @@ -809,6 +793,24 @@ impl TemplateBodyVariableReference { pub fn signature_index(&self) -> usize { self.signature_index } + pub fn name<'a>(&self, signature: &'a MacroSignature) -> &'a str { + signature + .parameters() + .get(self.signature_index) + .unwrap() + .name() + } + /// Pairs this variable reference with the given template macro reference, allowing information + /// about the template definition to be retrieved later. + pub(crate) fn resolve<'top>( + &self, + template: TemplateMacroRef<'top>, + ) -> TemplateVariableReference<'top> { + TemplateVariableReference { + template, + signature_index: self.signature_index, + } + } } /// A value literal found in the body of a template. @@ -974,6 +976,14 @@ impl SmallRange { } } + pub fn start(&self) -> usize { + self.start as usize + } + + pub fn end(&self) -> usize { + self.end as usize + } + /// Produces an equivalent [`std::ops::Range`]. /// /// `std::ops::Range` is twice as large as `SmallRange` on 64 bit machines and does not diff --git a/src/lazy/mod.rs b/src/lazy/mod.rs index 91598a43..4326f360 100644 --- a/src/lazy/mod.rs +++ b/src/lazy/mod.rs @@ -14,6 +14,7 @@ pub mod raw_stream_item; pub mod raw_value_ref; pub mod reader; pub mod sequence; +pub mod span; pub mod str_ref; pub mod streaming_raw_reader; pub mod r#struct; diff --git a/src/lazy/never.rs b/src/lazy/never.rs index 8c1577c3..95fbac2d 100644 --- a/src/lazy/never.rs +++ b/src/lazy/never.rs @@ -1,6 +1,7 @@ use std::fmt::Debug; +use std::ops::Range; -use crate::lazy::decoder::{LazyDecoder, LazyRawValueExpr}; +use crate::lazy::decoder::{HasRange, HasSpan, LazyDecoder, LazyRawValueExpr}; use crate::lazy::encoder::annotation_seq::AnnotationSeq; use crate::lazy::encoder::value_writer::internal::{FieldEncoder, MakeValueWriter}; use crate::lazy::encoder::value_writer::{ @@ -8,6 +9,7 @@ use crate::lazy::encoder::value_writer::{ }; use crate::lazy::encoder::value_writer::{EExpWriter, SequenceWriter, StructWriter}; use crate::lazy::expanded::macro_evaluator::{MacroExpr, RawEExpression}; +use crate::lazy::span::Span; use crate::lazy::text::raw::v1_1::reader::MacroIdRef; use crate::raw_symbol_token_ref::AsRawSymbolTokenRef; use crate::{Decimal, Int, IonResult, IonType, Timestamp}; @@ -18,9 +20,21 @@ pub enum Never { // Has no variants, cannot be instantiated. } +impl<'top> HasSpan<'top> for Never { + fn span(&self) -> Span<'top> { + unreachable!("::span") + } +} + +impl HasRange for Never { + fn range(&self) -> Range { + unreachable!("::range") + } +} + // Ion 1.0 uses `Never` as a placeholder type for MacroInvocation. // The compiler should optimize these methods away. -impl<'top, D: LazyDecoder = Self>> RawEExpression<'top, D> for Never { +impl<'top, D: LazyDecoder = Self>> RawEExpression<'top, D> for Never { // These use Box to avoid defining yet another placeholder type. type RawArgumentsIterator<'a> = Box>>>; diff --git a/src/lazy/raw_stream_item.rs b/src/lazy/raw_stream_item.rs index 62c275d5..ac1f0847 100644 --- a/src/lazy/raw_stream_item.rs +++ b/src/lazy/raw_stream_item.rs @@ -1,14 +1,16 @@ -use crate::lazy::decoder::LazyDecoder; +use crate::lazy::decoder::{HasRange, HasSpan, LazyDecoder}; +use crate::lazy::span::Span; use crate::result::IonFailure; use crate::{IonError, IonResult}; use std::fmt::Debug; +use std::ops::Range; #[derive(Debug)] /// Raw stream components that a RawReader may encounter. -pub enum RawStreamItem { +pub enum RawStreamItem { /// An Ion Version Marker (IVM) indicating the Ion major and minor version that were used to /// encode the values that follow. - VersionMarker(u8, u8), + VersionMarker(M), /// An Ion value whose data has not yet been read. For more information about how to read its /// data and (in the case of containers) access any nested values, see the documentation /// for [`LazyRawBinaryValue`](crate::lazy::binary::raw::value::LazyRawBinaryValue_1_0). @@ -16,18 +18,49 @@ pub enum RawStreamItem { /// An Ion 1.1+ macro invocation. Ion 1.0 readers will never return a macro invocation. EExpression(E), /// The end of the stream - EndOfStream, + EndOfStream(EndPosition), } -pub type LazyRawStreamItem<'top, D> = - RawStreamItem<::Value<'top>, ::EExpression<'top>>; +pub type LazyRawStreamItem<'top, D> = RawStreamItem< + ::VersionMarker<'top>, + ::Value<'top>, + ::EExp<'top>, +>; -impl RawStreamItem { +impl HasRange + for RawStreamItem +{ + fn range(&self) -> Range { + use RawStreamItem::*; + match self { + VersionMarker(marker) => marker.range(), + Value(value) => value.range(), + EExpression(eexp) => eexp.range(), + EndOfStream(eos) => eos.range(), + } + } +} + +impl<'top, M: Debug + HasSpan<'top>, V: Debug + HasSpan<'top>, E: Debug + HasSpan<'top>> + HasSpan<'top> for RawStreamItem +{ + fn span(&self) -> Span<'top> { + use RawStreamItem::*; + match self { + VersionMarker(marker) => marker.span(), + Value(value) => value.span(), + EExpression(eexp) => eexp.span(), + EndOfStream(eos) => eos.span(), + } + } +} + +impl RawStreamItem { /// If this item is an Ion version marker (IVM), returns `Some((major, minor))` indicating the /// version. Otherwise, returns `None`. - pub fn version_marker(&self) -> Option<(u8, u8)> { - if let Self::VersionMarker(major, minor) = self { - Some((*major, *minor)) + pub fn version_marker(&self) -> Option { + if let Self::VersionMarker(marker) = self { + Some(*marker) } else { None } @@ -35,7 +68,7 @@ impl RawStreamItem { /// Like [`Self::version_marker`], but returns a [`IonError::Decoding`] if this item /// is not an IVM. - pub fn expect_ivm(self) -> IonResult<(u8, u8)> { + pub fn expect_ivm(self) -> IonResult { self.version_marker() .ok_or_else(|| IonError::decoding_error(format!("expected IVM, found {:?}", self))) } @@ -75,3 +108,36 @@ impl RawStreamItem { } } } + +/// Represents the end of a raw input stream. +/// +/// This type implements [`HasRange`] and [`HasSpan`]. These traits aren't especially useful for the +/// `EndPosition` type itself, but implementing them allows the `RawStreamItem` type (which contains +/// an `EndOfStream(EndPosition)` variant) to also implement them. +#[derive(Debug, Copy, Clone)] +pub struct EndPosition { + position: usize, +} + +impl EndPosition { + pub(crate) fn new(position: usize) -> Self { + Self { position } + } +} + +impl HasRange for EndPosition { + /// Returns an empty range whose matching `start` and `end` represent the first byte index at + /// the end of the stream which contains no data. For example, in the stream `1 2 3`, + /// `EndPosition::range(...)` would return the range `5..5`. + fn range(&self) -> Range { + self.position..self.position + } +} + +impl<'top> HasSpan<'top> for EndPosition { + /// Returns an empty [`Span`]. The range of the span will match that produced by + /// [`range`](Self::range). + fn span(&self) -> Span<'top> { + Span::with_offset(self.position, &[]) + } +} diff --git a/src/lazy/raw_value_ref.rs b/src/lazy/raw_value_ref.rs index c73c7466..18fc9a7d 100644 --- a/src/lazy/raw_value_ref.rs +++ b/src/lazy/raw_value_ref.rs @@ -135,7 +135,7 @@ impl<'top, D: LazyDecoder> RawValueRef<'top, D> { pub fn expect_symbol(self) -> IonResult> { if let RawValueRef::Symbol(s) = self { - Ok(s.clone()) + Ok(s) } else { IonResult::decoding_error("expected a symbol") } diff --git a/src/lazy/sequence.rs b/src/lazy/sequence.rs index ed29320e..4d14080f 100644 --- a/src/lazy/sequence.rs +++ b/src/lazy/sequence.rs @@ -62,6 +62,11 @@ impl<'top, D: LazyDecoder> LazyList<'top, D> { } } + // TODO: Feature gate + pub fn lower(&self) -> LazyExpandedList<'top, D> { + self.expanded_list + } + /// Returns an iterator over the annotations on this value. If this value has no annotations, /// the resulting iterator will be empty. /// @@ -181,6 +186,10 @@ impl<'top, D: LazyDecoder> Debug for LazySExp<'top, D> { } impl<'top, D: LazyDecoder> LazySExp<'top, D> { + pub fn lower(&self) -> LazyExpandedSExp<'top, D> { + self.expanded_sexp + } + /// Returns an iterator over the values in this sequence. See: [`LazyValue`]. pub fn iter(&self) -> SExpIterator<'top, D> { SExpIterator { diff --git a/src/lazy/span.rs b/src/lazy/span.rs new file mode 100644 index 00000000..bb5d41fb --- /dev/null +++ b/src/lazy/span.rs @@ -0,0 +1,44 @@ +use crate::result::IonFailure; +use crate::{IonError, IonResult}; +use std::ops::Range; + +/// Represents a slice of input data. +/// +/// Various items in the Reader API implement the [`HasSpan`](crate::lazy::decoder::HasSpan) trait, +/// allowing the byte slice that represented that item in the input by calling `span()`. +#[derive(Debug, Copy, Clone)] +pub struct Span<'a> { + // The input bytes that represented the item implementing `HasSpan`. + bytes: &'a [u8], + // The offset in the overall stream at which the contents of `bytes` were found. + offset: usize, +} + +impl<'a, A: AsRef<[u8]>> PartialEq for Span<'a> { + fn eq(&self, other: &A) -> bool { + self.bytes() == other.as_ref() + } +} + +impl<'a> Span<'a> { + pub fn with_offset(offset: usize, bytes: &'a [u8]) -> Self { + Self { bytes, offset } + } + + pub fn range(&self) -> Range { + self.offset..self.offset + self.bytes.len() + } + + pub fn bytes(&self) -> &'a [u8] { + self.bytes + } + + pub fn text(&self) -> Option<&'a str> { + self.expect_text().ok() + } + + pub fn expect_text(&self) -> IonResult<&'a str> { + std::str::from_utf8(self.bytes) + .map_err(|_| IonError::decoding_error("span text was not valid UTF-8")) + } +} diff --git a/src/lazy/str_ref.rs b/src/lazy/str_ref.rs index 5f143138..61f17c6f 100644 --- a/src/lazy/str_ref.rs +++ b/src/lazy/str_ref.rs @@ -1,20 +1,19 @@ use crate::lazy::bytes_ref::BytesRef; use crate::text::text_formatter::IonValueFormatter; use crate::{RawSymbolTokenRef, Str}; -use std::borrow::Cow; use std::fmt::{Display, Formatter}; use std::ops::Deref; /// A reference to an immutable in-memory representation of an Ion string. To get an owned [`Str`] /// instead, see [`StrRef::to_owned`]. -#[derive(Clone, PartialEq, Debug)] +#[derive(Copy, Clone, PartialEq, Debug)] pub struct StrRef<'data> { - text: Cow<'data, str>, + text: &'data str, } impl<'data> StrRef<'data> { pub fn to_owned(&self) -> Str { - Str::from(self.text.as_ref()) + Str::from(self.text) } pub fn into_owned(self) -> Str { @@ -22,7 +21,7 @@ impl<'data> StrRef<'data> { } pub fn text(&self) -> &str { - self.text.as_ref() + self.text } } @@ -30,7 +29,7 @@ impl<'data> Deref for StrRef<'data> { type Target = str; fn deref(&self) -> &Self::Target { - self.text.as_ref() + self.text } } @@ -63,33 +62,20 @@ impl<'data> Display for StrRef<'data> { impl<'a> From<&'a str> for StrRef<'a> { fn from(value: &'a str) -> Self { - StrRef { - text: Cow::from(value), - } - } -} - -impl<'a> From for StrRef<'a> { - fn from(value: String) -> Self { - StrRef { - text: Cow::from(value), - } + StrRef { text: value } } } impl<'data> From> for Str { fn from(str_ref: StrRef<'data>) -> Self { - let text: String = str_ref.text.into_owned(); + let text: String = str_ref.text.to_owned(); Str::from(text) } } impl<'data> From> for BytesRef<'data> { fn from(value: StrRef<'data>) -> Self { - match value.text { - Cow::Borrowed(text) => text.as_bytes().into(), - Cow::Owned(text) => Vec::from(text).into(), - } + value.text.into() } } diff --git a/src/lazy/streaming_raw_reader.rs b/src/lazy/streaming_raw_reader.rs index 16cafbcb..86fb29df 100644 --- a/src/lazy/streaming_raw_reader.rs +++ b/src/lazy/streaming_raw_reader.rs @@ -349,7 +349,7 @@ mod tests { fn expect_end_of_stream(actual: LazyRawStreamItem) -> IonResult<()> { assert!(matches!( actual, - LazyRawStreamItem::::EndOfStream + LazyRawStreamItem::::EndOfStream(_) )); Ok(()) } diff --git a/src/lazy/struct.rs b/src/lazy/struct.rs index 1c9f7084..41178217 100644 --- a/src/lazy/struct.rs +++ b/src/lazy/struct.rs @@ -4,18 +4,17 @@ use std::fmt; use std::fmt::{Debug, Formatter}; use crate::element::builders::StructBuilder; -use crate::lazy::decoder::LazyDecoder; +use crate::lazy::decoder::{LazyDecoder, LazyRawContainer}; use crate::lazy::encoding::BinaryEncoding_1_0; use crate::lazy::expanded::r#struct::{ - ExpandedStructIterator, LazyExpandedField, LazyExpandedStruct, + ExpandedStructIterator, ExpandedStructSource, LazyExpandedField, LazyExpandedStruct, }; +use crate::lazy::expanded::template::TemplateElement; +use crate::lazy::expanded::LazyExpandedValue; use crate::lazy::value::{AnnotationsIterator, LazyValue}; use crate::lazy::value_ref::ValueRef; use crate::result::IonFailure; -use crate::{ - Annotations, Element, IntoAnnotatedElement, IonError, IonResult, RawSymbolTokenRef, Struct, - SymbolRef, -}; +use crate::{Annotations, Element, IntoAnnotatedElement, IonError, IonResult, Struct, SymbolRef}; /// An as-of-yet unread binary Ion struct. `LazyStruct` is immutable; its fields and annotations /// can be read any number of times. @@ -46,7 +45,7 @@ use crate::{ ///# Ok(()) ///# } /// ``` -#[derive(Clone)] +#[derive(Copy, Clone)] pub struct LazyStruct<'top, D: LazyDecoder> { pub(crate) expanded_struct: LazyExpandedStruct<'top, D>, } @@ -78,6 +77,28 @@ impl<'top, D: LazyDecoder> LazyStruct<'top, D> { } } + //TODO: Feature gate + pub fn lower(&self) -> LazyExpandedStruct<'top, D> { + self.expanded_struct + } + pub fn as_value(&self) -> LazyValue<'top, D> { + let expanded_value = match self.expanded_struct.source { + ExpandedStructSource::ValueLiteral(v) => { + LazyExpandedValue::from_literal(self.expanded_struct.context, v.as_value()) + } + ExpandedStructSource::Template(env, template_ref, _, fields_range, _) => { + let element = TemplateElement::new( + template_ref, + template_ref.body().expressions()[fields_range.start() - 1] + .expect_element() + .unwrap(), + ); + LazyExpandedValue::from_template(self.expanded_struct.context, env, element) + } + }; + LazyValue::new(expanded_value) + } + /// Returns the value of the first field with the specified name, if any. The returned value is /// a [`LazyValue`]. Its type and annotations can be inspected without calling [LazyValue::read]. /// @@ -249,27 +270,14 @@ impl<'top, D: LazyDecoder> Debug for LazyField<'top, D> { impl<'top, D: LazyDecoder> LazyField<'top, D> { /// Returns a symbol representing the name of this field. pub fn name(&self) -> IonResult> { - let field_name = self.expanded_field.raw_name(); - let field_id = match field_name { - RawSymbolTokenRef::SymbolId(sid) => sid, - RawSymbolTokenRef::Text(text) => return Ok(SymbolRef::with_text(text)), - }; - self.expanded_field - .value - .context - .symbol_table - .symbol_for(field_id) - .map(|symbol| symbol.into()) - .ok_or_else(|| { - IonError::decoding_error("found a symbol ID that was not in the symbol table") - }) + self.expanded_field.name().read() } /// Returns a lazy value representing the value of this field. To access the value's data, /// see [`LazyValue::read`]. pub fn value(&self) -> LazyValue<'top, D> { LazyValue { - expanded_value: *self.expanded_field.value(), + expanded_value: self.expanded_field.value(), } } } diff --git a/src/lazy/system_reader.rs b/src/lazy/system_reader.rs index 4c379353..eb6e835c 100644 --- a/src/lazy/system_reader.rs +++ b/src/lazy/system_reader.rs @@ -97,7 +97,7 @@ impl PendingLst { } impl LazySystemAnyReader { - pub(crate) fn new(ion_data: Input) -> LazySystemAnyReader { + pub fn new(ion_data: Input) -> LazySystemAnyReader { let raw_reader = StreamingRawReader::new(AnyEncoding, ion_data); let expanding_reader = LazyExpandingReader::new(raw_reader); LazySystemReader { expanding_reader } @@ -105,7 +105,7 @@ impl LazySystemAnyReader { } impl LazySystemBinaryReader { - pub(crate) fn new(ion_data: Input) -> LazySystemBinaryReader { + pub fn new(ion_data: Input) -> LazySystemBinaryReader { let raw_reader = StreamingRawReader::new(BinaryEncoding_1_0, ion_data); let expanding_reader = LazyExpandingReader::new(raw_reader); LazySystemReader { expanding_reader } @@ -113,7 +113,7 @@ impl LazySystemBinaryReader { } impl LazySystemTextReader_1_1 { - pub(crate) fn new(ion_data: Input) -> LazySystemTextReader_1_1 { + pub fn new(ion_data: Input) -> LazySystemTextReader_1_1 { let raw_reader = StreamingRawReader::new(TextEncoding_1_1, ion_data); let expanding_reader = LazyExpandingReader::new(raw_reader); LazySystemReader { expanding_reader } @@ -182,7 +182,7 @@ impl LazySystemReader { for field_result in symbol_table.iter() { let field = field_result?; - if field.raw_name().matches_sid_or_text(7, "symbols") { + if field.name().read_raw()?.matches_sid_or_text(7, "symbols") { if found_symbols_field { return IonResult::decoding_error( "found symbol table with multiple 'symbols' fields", @@ -191,7 +191,7 @@ impl LazySystemReader { found_symbols_field = true; Self::process_symbols(pending_lst, field.value())?; } - if field.raw_name().matches_sid_or_text(6, "imports") { + if field.name().read_raw()?.matches_sid_or_text(6, "imports") { if found_imports_field { return IonResult::decoding_error( "found symbol table with multiple 'imports' fields", @@ -208,7 +208,7 @@ impl LazySystemReader { // Store any strings defined in the `symbols` field in the `PendingLst` for future application. fn process_symbols( pending_lst: &mut PendingLst, - symbols: &LazyExpandedValue<'_, Encoding>, + symbols: LazyExpandedValue<'_, Encoding>, ) -> IonResult<()> { if let ExpandedValueRef::List(list) = symbols.read()? { for symbol_text_result in list.iter() { @@ -226,7 +226,7 @@ impl LazySystemReader { // Check for `imports: $ion_symbol_table`. fn process_imports( pending_lst: &mut PendingLst, - imports: &LazyExpandedValue<'_, Encoding>, + imports: LazyExpandedValue<'_, Encoding>, ) -> IonResult<()> { match imports.read()? { ExpandedValueRef::Symbol(symbol_ref) => { @@ -253,6 +253,7 @@ impl LazySystemReader { #[cfg(test)] mod tests { use crate::lazy::binary::test_utilities::to_binary_ion; + use crate::lazy::decoder::RawVersionMarker; use crate::lazy::system_stream_item::SystemStreamItem; use crate::IonResult; @@ -275,12 +276,12 @@ mod tests { let mut system_reader = LazySystemBinaryReader::new(ion_data); loop { match system_reader.next_item()? { - SystemStreamItem::VersionMarker(major, minor) => { - println!("ivm => v{}.{}", major, minor) + SystemStreamItem::VersionMarker(marker) => { + println!("ivm => v{}.{}", marker.major(), marker.minor()) } SystemStreamItem::SymbolTable(ref s) => println!("symtab => {:?}", s), SystemStreamItem::Value(ref v) => println!("value => {:?}", v.read()?), - SystemStreamItem::EndOfStream => break, + SystemStreamItem::EndOfStream(_) => break, } } Ok(()) @@ -305,7 +306,7 @@ mod tests { println!("{:?}", value?.read()?); } } - SystemStreamItem::EndOfStream => break, + SystemStreamItem::EndOfStream(_) => break, _ => {} } } @@ -333,7 +334,7 @@ mod tests { println!("{:?}: {:?},", field.name()?, field.value().read()?); } } - SystemStreamItem::EndOfStream => break, + SystemStreamItem::EndOfStream(_) => break, _ => {} } } diff --git a/src/lazy/system_stream_item.rs b/src/lazy/system_stream_item.rs index 23c2ac22..70415b18 100644 --- a/src/lazy/system_stream_item.rs +++ b/src/lazy/system_stream_item.rs @@ -1,43 +1,46 @@ -use crate::lazy::decoder::LazyDecoder; +use std::fmt::{Debug, Formatter}; + +use crate::lazy::decoder::{LazyDecoder, RawVersionMarker}; +use crate::lazy::expanded::ExpandedValueSource; use crate::lazy::r#struct::LazyStruct; +use crate::lazy::raw_stream_item::{EndPosition, LazyRawStreamItem, RawStreamItem}; use crate::lazy::value::LazyValue; use crate::result::IonFailure; use crate::{IonError, IonResult}; -use std::fmt::{Debug, Formatter}; /// System stream elements that a SystemReader may encounter. #[non_exhaustive] pub enum SystemStreamItem<'top, D: LazyDecoder> { /// An Ion Version Marker (IVM) indicating the Ion major and minor version that were used to /// encode the values that follow. - VersionMarker(u8, u8), + VersionMarker(D::VersionMarker<'top>), /// An Ion symbol table encoded as a struct annotated with `$ion_symbol_table`. SymbolTable(LazyStruct<'top, D>), /// An application-level Ion value Value(LazyValue<'top, D>), /// The end of the stream - EndOfStream, + EndOfStream(EndPosition), } impl<'top, D: LazyDecoder> Debug for SystemStreamItem<'top, D> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { - SystemStreamItem::VersionMarker(major, minor) => { - write!(f, "version marker v{}.{}", major, minor) + SystemStreamItem::VersionMarker(marker) => { + write!(f, "version marker v{}.{}", marker.major(), marker.minor()) } SystemStreamItem::SymbolTable(_) => write!(f, "a symbol table"), SystemStreamItem::Value(value) => write!(f, "{}", value.ion_type()), - SystemStreamItem::EndOfStream => write!(f, ""), + SystemStreamItem::EndOfStream(_) => write!(f, ""), } } } impl<'top, D: LazyDecoder> SystemStreamItem<'top, D> { - /// If this item is an Ion version marker (IVM), returns `Some((major, minor))` indicating the + /// If this item is an Ion version marker (IVM), returns `Some(version_marker)` indicating the /// version. Otherwise, returns `None`. - pub fn version_marker(&self) -> Option<(u8, u8)> { - if let Self::VersionMarker(major, minor) = self { - Some((*major, *minor)) + pub fn version_marker(&self) -> Option> { + if let Self::VersionMarker(marker) = self { + Some(*marker) } else { None } @@ -45,7 +48,7 @@ impl<'top, D: LazyDecoder> SystemStreamItem<'top, D> { /// Like [`Self::version_marker`], but returns a [`crate::IonError::Decoding`] if this item /// is not an IVM. - pub fn expect_ivm(self) -> IonResult<(u8, u8)> { + pub fn expect_ivm(self) -> IonResult> { self.version_marker() .ok_or_else(|| IonError::decoding_error(format!("expected IVM, found {:?}", self))) } @@ -69,4 +72,26 @@ impl<'top, D: LazyDecoder> SystemStreamItem<'top, D> { IonResult::decoding_error(format!("expected value, found {:?}", self)) } } + + pub fn raw_stream_item(&self) -> Option> { + let item = match self { + SystemStreamItem::VersionMarker(marker) => RawStreamItem::VersionMarker(*marker), + SystemStreamItem::SymbolTable(symtab) => { + use ExpandedValueSource::*; + match symtab.as_value().lower().source { + ValueLiteral(literal) => RawStreamItem::Value(literal), + Template(..) | Constructed(..) => return None, + } + } + SystemStreamItem::Value(value) => { + use ExpandedValueSource::*; + match value.lower().source { + ValueLiteral(literal) => RawStreamItem::Value(literal), + Template(..) | Constructed(..) => return None, + } + } + SystemStreamItem::EndOfStream(end) => RawStreamItem::EndOfStream(*end), + }; + Some(item) + } } diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs index f3c582d2..39444550 100644 --- a/src/lazy/text/buffer.rs +++ b/src/lazy/text/buffer.rs @@ -21,11 +21,9 @@ use nom::multi::{fold_many1, fold_many_m_n, many0_count, many1_count}; use nom::sequence::{delimited, pair, preceded, separated_pair, terminated, tuple}; use nom::{AsBytes, CompareResult, IResult, InputLength, InputTake, Needed, Parser}; -use crate::lazy::decoder::private::LazyRawValuePrivate; -use crate::lazy::decoder::{LazyRawFieldExpr, LazyRawValueExpr, RawFieldExpr, RawValueExpr}; +use crate::lazy::decoder::{LazyRawFieldExpr, LazyRawValueExpr, RawValueExpr}; use crate::lazy::encoding::{TextEncoding, TextEncoding_1_0, TextEncoding_1_1}; -use crate::lazy::never::Never; -use crate::lazy::raw_stream_item::{LazyRawStreamItem, RawStreamItem}; +use crate::lazy::raw_stream_item::{EndPosition, LazyRawStreamItem, RawStreamItem}; use crate::lazy::text::encoded_value::EncodedTextValue; use crate::lazy::text::matched::{ MatchedBlob, MatchedClob, MatchedDecimal, MatchedFieldName, MatchedFieldNameSyntax, @@ -34,14 +32,16 @@ use crate::lazy::text::matched::{ }; use crate::lazy::text::parse_result::{InvalidInputError, IonParseError}; use crate::lazy::text::parse_result::{IonMatchResult, IonParseResult}; -use crate::lazy::text::raw::r#struct::{LazyRawTextField_1_0, RawTextStructIterator_1_0}; +use crate::lazy::text::raw::r#struct::{LazyRawTextFieldName_1_0, RawTextStructIterator_1_0}; use crate::lazy::text::raw::sequence::{RawTextListIterator_1_0, RawTextSExpIterator_1_0}; use crate::lazy::text::raw::v1_1::reader::{ - EncodedTextMacroInvocation, MacroIdRef, RawTextEExpression_1_1, RawTextListIterator_1_1, - RawTextSExpIterator_1_1, RawTextStructIterator_1_1, TextListSpanFinder_1_1, - TextSExpSpanFinder_1_1, TextStructSpanFinder_1_1, + EncodedTextMacroInvocation, LazyRawTextFieldName_1_1, MacroIdRef, RawTextEExpression_1_1, + RawTextListIterator_1_1, RawTextSExpIterator_1_1, RawTextStructIterator_1_1, + TextListSpanFinder_1_1, TextSExpSpanFinder_1_1, TextStructSpanFinder_1_1, +}; +use crate::lazy::text::value::{ + LazyRawTextValue_1_0, LazyRawTextValue_1_1, LazyRawTextVersionMarker, MatchedRawTextValue, }; -use crate::lazy::text::value::{LazyRawTextValue_1_0, LazyRawTextValue_1_1, MatchedRawTextValue}; use crate::result::DecodingError; use crate::{IonError, IonResult, IonType, TimestampPrecision}; @@ -86,22 +86,6 @@ const WHITESPACE_CHARACTERS: &[char] = &[ /// Same as [WHITESPACE_CHARACTERS], but formatted as a string for use in some `nom` APIs pub(crate) const WHITESPACE_CHARACTERS_AS_STR: &str = " \t\r\n\x09\x0B\x0C"; -/// This helper function takes a parser and returns a closure that performs the same parsing -/// but prints the Result before returning the output. This is handy for debugging. -// A better implementation would use a macro to auto-generate the label from the file name and -// line number. -fn dbg_parse>( - label: &'static str, - mut parser: P, -) -> impl Parser { - move |input: I| { - let result = parser.parse(input); - #[cfg(debug_assertions)] - println!("{}: {:?}", label, result); - result - } -} - /// A slice of unsigned bytes that can be cheaply copied and which defines methods for parsing /// the various encoding elements of a text Ion stream. /// @@ -110,7 +94,7 @@ fn dbg_parse>( /// `TextBufferView`) or a `MatchedValue` that retains information discovered during parsing that /// will be useful if the match is later fully materialized into a value. #[derive(Clone, Copy)] -pub(crate) struct TextBufferView<'top> { +pub struct TextBufferView<'top> { // `data` is a slice of remaining data in the larger input stream. // `offset` is the absolute position in the overall input stream where that slice begins. // @@ -120,7 +104,7 @@ pub(crate) struct TextBufferView<'top> { // offset: 6 data: &'top [u8], offset: usize, - allocator: &'top BumpAllocator, + pub(crate) allocator: &'top BumpAllocator, } impl<'a> PartialEq for TextBufferView<'a> { @@ -186,7 +170,7 @@ impl<'top> TextBufferView<'top> { } /// Returns a slice containing all of the buffer's bytes. - pub fn bytes(&self) -> &[u8] { + pub fn bytes(&self) -> &'top [u8] { self.data } @@ -201,6 +185,11 @@ impl<'top> TextBufferView<'top> { self.data.len() } + /// Returns the stream byte offset range that this buffer contains. + pub fn range(&self) -> Range { + self.offset..self.offset + self.len() + } + /// Returns `true` if there are no bytes in the buffer. Otherwise, returns `false`. pub fn is_empty(&self) -> bool { self.data.is_empty() @@ -289,8 +278,10 @@ impl<'top> TextBufferView<'top> { } /// Matches an Ion version marker (e.g. `$ion_1_0` or `$ion_1_1`.) - pub fn match_ivm(self) -> IonParseResult<'top, (u8, u8)> { - let (remaining, (major, minor)) = terminated( + pub fn match_ivm>( + self, + ) -> IonParseResult<'top, LazyRawTextVersionMarker<'top, E>> { + let (remaining, (matched_marker, (matched_major, matched_minor))) = consumed(terminated( preceded( complete_tag("$ion_"), separated_pair(complete_digit1, complete_tag("_"), complete_digit1), @@ -298,22 +289,24 @@ impl<'top> TextBufferView<'top> { // Look ahead to make sure the IVM isn't followed by a '::'. If it is, then it's not // an IVM, it's an annotation. peek(whitespace_and_then(not(complete_tag("::")))), - )(self)?; + ))(self)?; // `major` and `minor` are base 10 digits. Turning them into `&str`s is guaranteed to succeed. - let major_version = u8::from_str(major.as_text().unwrap()).map_err(|_| { - let error = InvalidInputError::new(major) + let major_version = u8::from_str(matched_major.as_text().unwrap()).map_err(|_| { + let error = InvalidInputError::new(matched_major) .with_label("parsing an IVM major version") .with_description("value did not fit in an unsigned byte"); nom::Err::Failure(IonParseError::Invalid(error)) })?; - let minor_version = u8::from_str(minor.as_text().unwrap()).map_err(|_| { - let error = InvalidInputError::new(minor) + let minor_version = u8::from_str(matched_minor.as_text().unwrap()).map_err(|_| { + let error = InvalidInputError::new(matched_minor) .with_label("parsing an IVM minor version") .with_description("value did not fit in an unsigned byte"); nom::Err::Failure(IonParseError::Invalid(error)) })?; + let marker = + LazyRawTextVersionMarker::::new(matched_marker, major_version, minor_version); - Ok((remaining, (major_version, minor_version))) + Ok((remaining, marker)) } /// Matches one or more annotations. @@ -395,7 +388,9 @@ impl<'top> TextBufferView<'top> { /// /// If a pair is found, returns `Some(field)` and consumes the following comma if present. /// If no pair is found (that is: the end of the struct is next), returns `None`. - pub fn match_struct_field(self) -> IonParseResult<'top, Option>> { + pub fn match_struct_field( + self, + ) -> IonParseResult<'top, Option>> { // A struct field can have leading whitespace, but we want the buffer slice that we match // to begin with the field name. Here we skip any whitespace so we have another named // slice (`input_including_field_name`) with that property. @@ -404,19 +399,13 @@ impl<'top> TextBufferView<'top> { // If the next thing in the input is a `}`, return `None`. value(None, Self::match_struct_end), // Otherwise, match a name/value pair and turn it into a `LazyRawTextField`. - Self::match_struct_field_name_and_value.map(move |(matched_field_name, mut value)| { - // Add the field name offsets to the `EncodedTextValue` - value.encoded_value = value.encoded_value.with_field_name( - matched_field_name.syntax(), - matched_field_name.span().start, - matched_field_name.span().len(), - ); - // Replace the value's buffer slice (which starts with the value itself) with the - // buffer slice we created that begins with the field name. - value.input = input_including_field_name; - Some(LazyRawTextField_1_0 { - value: value.into(), - }) + Self::match_struct_field_name_and_value.map(move |(matched_field_name, value)| { + let field_name = LazyRawTextFieldName_1_0::new(matched_field_name); + let field_value = LazyRawTextValue_1_0::new(value); + Some(LazyRawFieldExpr::<'top, TextEncoding_1_0>::NameValue( + field_name, + field_value, + )) }), ))(input_including_field_name) } @@ -433,7 +422,7 @@ impl<'top> TextBufferView<'top> { ) -> IonParseResult< 'top, ( - MatchedFieldName, + MatchedFieldName<'top>, MatchedRawTextValue<'top, TextEncoding_1_0>, ), > { @@ -462,56 +451,20 @@ impl<'top> TextBufferView<'top> { // If the next thing in the input is a `}`, return `None`. Self::match_struct_end.map(|_| Ok(None)), terminated( - Self::match_e_expression, + Self::match_e_expression.map(|eexp| Ok(Some(LazyRawFieldExpr::EExp(eexp)))), whitespace_and_then(alt((tag(","), peek(tag("}"))))), - ) - .map(|invocation| Ok(Some(RawFieldExpr::MacroInvocation(invocation)))), + ), Self::match_struct_field_name_and_e_expression_1_1.map(|(field_name, invocation)| { - // TODO: We're discarding the name encoding information here. When we revise our field name - // storage strategy[1], we should make sure to capture this for tooling's sake. - // [1]: https://github.com/amazon-ion/ion-rust/issues/631 - let name_bytes = self.slice( - field_name.span().start - self.offset(), - field_name.span().len(), - ); - let name = match field_name.read(name_bytes) { - Ok(name) => name, - Err(e) => { - let error = InvalidInputError::new(name_bytes).with_description(format!( - "failed to read field name associated with e-expression: {e:?}" - )); - return Err(nom::Err::Error(IonParseError::Invalid(error))); - } - }; - Ok(Some(RawFieldExpr::NameValuePair( - name, - RawValueExpr::MacroInvocation(invocation), + Ok(Some(LazyRawFieldExpr::NameEExp( + LazyRawTextFieldName_1_1::new(field_name), + invocation, ))) }), // Otherwise, match a name/value pair and turn it into a `LazyRawTextField`. - Self::match_struct_field_name_and_value_1_1.map(move |(field_name, mut value)| { - // Add the field name offsets to the `EncodedTextValue` - value.encoded_value = value.encoded_value.with_field_name( - field_name.syntax(), - field_name.span().start, - field_name.span().len(), - ); - // Replace the value's buffer slice (which starts with the value itself) with the - // buffer slice we created that begins with the field name. - value.input = input_including_field_name; - let field_name = match value.field_name() { - Ok(name) => name, - Err(e) => { - let error = InvalidInputError::new(self) - .with_description(format!("failed to struct field name: {e:?}")); - return Err(nom::Err::Error(IonParseError::Invalid(error))); - } - }; + Self::match_struct_field_name_and_value_1_1.map(move |(field_name, value)| { + let field_name = LazyRawTextFieldName_1_1::new(field_name); let field_value = LazyRawTextValue_1_1::new(value); - Ok(Some(RawFieldExpr::NameValuePair( - field_name, - RawValueExpr::ValueLiteral(field_value), - ))) + Ok(Some(LazyRawFieldExpr::NameValue(field_name, field_value))) }), ))(input_including_field_name)?; Ok((input_after_field, field_expr_result?)) @@ -522,7 +475,7 @@ impl<'top> TextBufferView<'top> { /// range of input bytes where the field name is found, and the value. pub fn match_struct_field_name_and_e_expression_1_1( self, - ) -> IonParseResult<'top, (MatchedFieldName, RawTextEExpression_1_1<'top>)> { + ) -> IonParseResult<'top, (MatchedFieldName<'top>, RawTextEExpression_1_1<'top>)> { terminated( separated_pair( whitespace_and_then(Self::match_struct_field_name), @@ -541,7 +494,7 @@ impl<'top> TextBufferView<'top> { ) -> IonParseResult< 'top, ( - MatchedFieldName, + MatchedFieldName<'top>, MatchedRawTextValue<'top, TextEncoding_1_1>, ), > { @@ -602,28 +555,31 @@ impl<'top> TextBufferView<'top> { /// * An identifier /// * A symbol ID /// * A short-form string - pub fn match_struct_field_name(self) -> IonParseResult<'top, MatchedFieldName> { - match_and_span(alt(( + pub fn match_struct_field_name(self) -> IonParseResult<'top, MatchedFieldName<'top>> { + consumed(alt(( Self::match_string.map(MatchedFieldNameSyntax::String), Self::match_symbol.map(MatchedFieldNameSyntax::Symbol), ))) - .map(|(syntax, span)| MatchedFieldName::new(syntax, span)) + .map(|(matched_inpet, syntax)| MatchedFieldName::new(matched_inpet, syntax)) .parse(self) } /// Matches a single top-level value, an IVM, or the end of the stream. pub fn match_top_level_item_1_0( self, - ) -> IonParseResult<'top, RawStreamItem, Never>> { + ) -> IonParseResult<'top, LazyRawStreamItem<'top, TextEncoding_1_0>> { // If only whitespace/comments remain, we're at the end of the stream. let (input_after_ws, _ws) = self.match_optional_comments_and_whitespace()?; if input_after_ws.is_empty() { - return Ok((input_after_ws, RawStreamItem::EndOfStream)); + return Ok(( + input_after_ws, + RawStreamItem::EndOfStream(EndPosition::new(input_after_ws.offset())), + )); } // Otherwise, the next item must be an IVM or a value. // We check for IVMs first because the rules for a symbol identifier will match them. alt(( - Self::match_ivm.map(|(major, minor)| RawStreamItem::VersionMarker(major, minor)), + Self::match_ivm::.map(RawStreamItem::VersionMarker), Self::match_annotated_value .map(LazyRawTextValue_1_0::from) .map(RawStreamItem::Value), @@ -638,12 +594,15 @@ impl<'top> TextBufferView<'top> { // If only whitespace/comments remain, we're at the end of the stream. let (input_after_ws, _ws) = self.match_optional_comments_and_whitespace()?; if input_after_ws.is_empty() { - return Ok((input_after_ws, RawStreamItem::EndOfStream)); + return Ok(( + input_after_ws, + RawStreamItem::EndOfStream(EndPosition::new(input_after_ws.offset())), + )); } // Otherwise, the next item must be an IVM or a value. // We check for IVMs first because the rules for a symbol identifier will match them. alt(( - Self::match_ivm.map(|(major, minor)| RawStreamItem::VersionMarker(major, minor)), + Self::match_ivm::.map(RawStreamItem::VersionMarker), Self::match_e_expression.map(RawStreamItem::EExpression), Self::match_annotated_value_1_1 .map(LazyRawTextValue_1_1::from) @@ -2476,10 +2435,12 @@ mod tests { #[test] fn test_match_ivm() { fn match_ivm(input: &str) { - MatchTest::new(input).expect_match(match_length(TextBufferView::match_ivm)); + MatchTest::new(input) + .expect_match(match_length(TextBufferView::match_ivm::)); } fn mismatch_ivm(input: &str) { - MatchTest::new(input).expect_mismatch(match_length(TextBufferView::match_ivm)); + MatchTest::new(input) + .expect_mismatch(match_length(TextBufferView::match_ivm::)); } match_ivm("$ion_1_0"); @@ -2954,9 +2915,6 @@ mod tests { #[case::e_exp_in_e_exp("(:foo (:bar 1))")] #[case::e_exp_in_list("[a, b, (:foo 1)]")] #[case::e_exp_in_sexp("(a (:foo 1) c)")] - #[case::e_exp_in_struct("{(:foo)}")] - // #[case::e_exp_in_struct_with_space_before("{ (:foo)}")] - #[case::e_exp_in_struct_with_space_after("{(:foo) }")] // #[case::e_exp_in_struct_field("{a:(:foo)}")] // #[case::e_exp_in_struct_field_with_comma("{a:(:foo),}")] #[case::e_exp_in_struct_field_with_comma_and_second_field("{a:(:foo), b:2}")] diff --git a/src/lazy/text/encoded_value.rs b/src/lazy/text/encoded_value.rs index c1e1151e..a597f869 100644 --- a/src/lazy/text/encoded_value.rs +++ b/src/lazy/text/encoded_value.rs @@ -1,10 +1,9 @@ -use crate::lazy::encoding::TextEncoding; -use crate::lazy::text::buffer::TextBufferView; -use crate::lazy::text::matched::{MatchedFieldNameSyntax, MatchedValue}; -use crate::result::IonFailure; -use crate::{IonResult, IonType, RawSymbolTokenRef}; use std::ops::Range; +use crate::lazy::encoding::TextEncoding; +use crate::lazy::text::matched::MatchedValue; +use crate::IonType; + /// Represents the type, offset, and length metadata of the various components of an encoded value /// in a text input stream. /// @@ -13,6 +12,8 @@ use std::ops::Range; /// without re-parsing its header information each time. #[derive(Copy, Clone, Debug, PartialEq)] pub(crate) struct EncodedTextValue<'top, E: TextEncoding<'top>> { + // TODO: Update this comment now that field_name is not part of 'value' + // Each encoded text value has up to three components, appearing in the following order: // // [ field_name? | annotations? | data ] @@ -42,17 +43,14 @@ pub(crate) struct EncodedTextValue<'top, E: TextEncoding<'top>> { data_offset: usize, // The number of bytes _before_ `data_offset` at which the field name begins. If this value // does not have a field name, this value will be zero. - field_name_offset: u32, + // field_name_offset: u32, // The number of bytes _before_ `data_offset` at which the annotations sequence begins. // If this value does not have a field name, this value will be zero. annotations_offset: u32, // The number of bytes used to encode the data component of this Ion value. data_length: usize, - // The number of bytes used to encode the field name preceding the data, if any. - // If there is no field name (i.e. the value is not inside a struct), this will be zero. - // If there is whitespace before the field name, this will not include it. - field_name_length: u32, + // The number of bytes used to encode the annotations sequence preceding the data, if any. // If there is no annotations sequence, this will be zero. If there is whitespace before the // annotations sequence, this will not include it. @@ -63,11 +61,6 @@ pub(crate) struct EncodedTextValue<'top, E: TextEncoding<'top>> { // value is stored. For others (e.g. a timestamp), the various components of the value are // recognized during matching and partial information like subfield offsets can be stored here. matched_value: MatchedValue<'top, E>, - - // If this value is a struct field value, this will be populated with an enum indicating - // the syntax of the associated field name. If the field name is later read, the decoder - // can avoid re-parsing the input from scratch. - field_name_syntax: Option, } impl<'top, E: TextEncoding<'top>> EncodedTextValue<'top, E> { @@ -79,34 +72,12 @@ impl<'top, E: TextEncoding<'top>> EncodedTextValue<'top, E> { EncodedTextValue { data_offset: offset, data_length: length, - field_name_length: 0, - field_name_offset: 0, annotations_offset: 0, annotations_length: 0, matched_value, - field_name_syntax: None, } } - // The field name range should contain the field name literal itself without any trailing - // whitespace or the delimiting ':'. - // Examples: - // foo - // 'foo' - // "foo" - // $10 - pub(crate) fn with_field_name( - mut self, - field_name_syntax: MatchedFieldNameSyntax, - offset: usize, - length: usize, - ) -> EncodedTextValue<'top, E> { - self.field_name_syntax = Some(field_name_syntax); - self.field_name_offset = (self.data_offset - offset) as u32; - self.field_name_length = length as u32; - self - } - // The annotations should include all of the symbol tokens, their delimiting '::'s, and any // interstitial whitespace. It should not include any leading/trailing whitespace or the value // itself. @@ -158,31 +129,6 @@ impl<'top, E: TextEncoding<'top>> EncodedTextValue<'top, E> { self.data_offset..(self.data_offset + self.data_length) } - pub fn field_name<'data>( - &self, - input: TextBufferView<'data>, - ) -> IonResult> { - if let Some(field_name_syntax) = self.field_name_syntax() { - let relative_start = - self.data_offset - input.offset() - (self.field_name_offset as usize); - let field_name_bytes = input.slice(relative_start, self.field_name_length as usize); - field_name_syntax.read(field_name_bytes) - } else { - IonResult::illegal_operation( - "requested field name, but value was not in a struct field", - ) - } - } - - pub fn field_name_range(&self) -> Option> { - if self.field_name_offset == 0 { - return None; - } - let start = self.data_offset - (self.field_name_offset as usize); - let end = start + (self.field_name_length as usize); - Some(start..end) - } - pub fn annotations_range(&self) -> Option> { if self.annotations_offset == 0 { return None; @@ -192,19 +138,14 @@ impl<'top, E: TextEncoding<'top>> EncodedTextValue<'top, E> { Some(start..end) } - pub fn has_field_name(&self) -> bool { - self.field_name_offset > 0 - } - pub fn has_annotations(&self) -> bool { self.annotations_offset > 0 } - /// Returns the total number of bytes used to represent the current value, including the - /// field ID (if any), its annotations (if any), its header (type descriptor + length bytes), - /// and its value. + /// Returns the total number of bytes used to represent the current value, including its + /// annotations (if any), its header (type descriptor + length bytes), and its value. pub fn total_length(&self) -> usize { - self.data_length + u32::max(self.annotations_offset, self.field_name_offset) as usize + self.data_length + self.annotations_offset as usize } pub fn annotated_value_range(&self) -> Range { @@ -213,10 +154,6 @@ impl<'top, E: TextEncoding<'top>> EncodedTextValue<'top, E> { start..end } - pub fn field_name_syntax(&self) -> Option { - self.field_name_syntax - } - pub fn matched(&self) -> MatchedValue<'top, E> { self.matched_value } @@ -224,9 +161,9 @@ impl<'top, E: TextEncoding<'top>> EncodedTextValue<'top, E> { #[cfg(test)] mod tests { - use super::*; use crate::lazy::encoding::TextEncoding_1_0; - use crate::lazy::text::matched::MatchedSymbol; + + use super::*; #[test] fn total_length_data_only() { @@ -235,18 +172,6 @@ mod tests { assert_eq!(value.total_length(), 12); } - #[test] - fn total_length_data_with_field_name() { - let value = - EncodedTextValue::::new(MatchedValue::Null(IonType::Null), 100, 12) - .with_field_name( - MatchedFieldNameSyntax::Symbol(MatchedSymbol::Identifier), - 90, - 4, - ); - assert_eq!(value.total_length(), 22); - } - #[test] fn total_length_data_with_annotations() { let value = @@ -254,28 +179,4 @@ mod tests { .with_annotations_sequence(90, 4); assert_eq!(value.total_length(), 22); } - - #[test] - fn total_length_data_with_field_name_and_annotations() { - let value = - EncodedTextValue::::new(MatchedValue::Null(IonType::Null), 100, 12) - .with_field_name( - MatchedFieldNameSyntax::Symbol(MatchedSymbol::Identifier), - 90, - 4, - ) - .with_annotations_sequence(94, 6); - assert_eq!(value.total_length(), 22); - - // Same test but with extra whitespace between the components - let value = - EncodedTextValue::::new(MatchedValue::Null(IonType::Null), 100, 12) - .with_field_name( - MatchedFieldNameSyntax::Symbol(MatchedSymbol::Identifier), - 80, - 4, - ) - .with_annotations_sequence(91, 6); - assert_eq!(value.total_length(), 32, "{:?}", value); - } } diff --git a/src/lazy/text/matched.rs b/src/lazy/text/matched.rs index 24126cae..79241878 100644 --- a/src/lazy/text/matched.rs +++ b/src/lazy/text/matched.rs @@ -19,16 +19,17 @@ //! use the previously recorded information to minimize the amount of information that needs to be //! re-discovered. -use std::borrow::Cow; use std::num::IntErrorKind; use std::ops::Range; use std::str::FromStr; +use bumpalo::collections::Vec as BumpVec; +use bumpalo::Bump as BumpAllocator; use nom::branch::alt; use nom::bytes::streaming::tag; use nom::character::is_hex_digit; use nom::sequence::preceded; -use nom::{AsChar, Parser}; +use nom::{AsBytes, AsChar, Parser}; use num_bigint::{BigInt, BigUint}; use num_traits::Num; use smallvec::SmallVec; @@ -36,6 +37,7 @@ use smallvec::SmallVec; use crate::decimal::coefficient::{Coefficient, Sign}; use crate::lazy::bytes_ref::BytesRef; use crate::lazy::decoder::{LazyDecoder, LazyRawFieldExpr, LazyRawValueExpr}; +use crate::lazy::span::Span; use crate::lazy::str_ref::StrRef; use crate::lazy::text::as_utf8::AsUtf8; use crate::lazy::text::buffer::TextBufferView; @@ -48,7 +50,7 @@ use crate::{ /// A partially parsed Ion value. #[derive(Clone, Copy, Debug)] -pub(crate) enum MatchedValue<'top, D: LazyDecoder> { +pub enum MatchedValue<'top, D: LazyDecoder> { // `Null` and `Bool` are fully parsed because they only involve matching a keyword. Null(IonType), Bool(bool), @@ -90,7 +92,7 @@ impl<'top, D: LazyDecoder> PartialEq for MatchedValue<'top, D> { } #[derive(Copy, Clone, Debug, PartialEq)] -pub(crate) enum MatchedFieldNameSyntax { +pub enum MatchedFieldNameSyntax { Symbol(MatchedSymbol), String(MatchedString), } @@ -98,49 +100,53 @@ pub(crate) enum MatchedFieldNameSyntax { impl MatchedFieldNameSyntax { pub fn read<'data>( &self, + // TODO: Remove allocator, use the one in TBV + allocator: &'data BumpAllocator, matched_input: TextBufferView<'data>, ) -> IonResult> { match self { - MatchedFieldNameSyntax::Symbol(matched_symbol) => matched_symbol.read(matched_input), - MatchedFieldNameSyntax::String(matched_string) => { - matched_string.read(matched_input).map(|s| s.into()) + MatchedFieldNameSyntax::Symbol(matched_symbol) => { + matched_symbol.read(allocator, matched_input) } + MatchedFieldNameSyntax::String(matched_string) => matched_string + .read(allocator, matched_input) + .map(|s| s.into()), } } } #[derive(Copy, Clone, Debug, PartialEq)] -pub(crate) struct MatchedFieldName { +pub struct MatchedFieldName<'top> { // This is stored as a tuple to allow this type to be `Copy`; Range is not `Copy`. - span: (usize, usize), + input: TextBufferView<'top>, syntax: MatchedFieldNameSyntax, } -impl MatchedFieldName { - pub fn new(syntax: MatchedFieldNameSyntax, span: Range) -> Self { - Self { - span: (span.start, span.end), - syntax, - } - } - pub fn span(&self) -> Range { - self.span.0..self.span.1 +impl<'top> MatchedFieldName<'top> { + pub fn new(input: TextBufferView<'top>, syntax: MatchedFieldNameSyntax) -> Self { + Self { input, syntax } } + pub fn syntax(&self) -> MatchedFieldNameSyntax { self.syntax } - pub fn read<'data>( - &self, - matched_input: TextBufferView<'data>, - ) -> IonResult> { - self.syntax.read(matched_input) + pub fn read(&self) -> IonResult> { + self.syntax.read(self.input.allocator, self.input) + } + + pub fn range(&self) -> Range { + self.input.range() + } + + pub fn span(&self) -> Span<'top> { + Span::with_offset(self.input.offset(), self.input.bytes()) } } /// A partially parsed Ion int. #[derive(Copy, Clone, Debug, PartialEq)] -pub(crate) struct MatchedInt { +pub struct MatchedInt { radix: u32, // Offset of the digits from the beginning of the value digits_offset: usize, @@ -215,7 +221,7 @@ impl MatchedInt { /// A partially parsed Ion float. #[derive(Copy, Clone, Debug, PartialEq)] -pub(crate) enum MatchedFloat { +pub enum MatchedFloat { /// `+inf` PositiveInfinity, /// `-inf` @@ -255,7 +261,7 @@ impl MatchedFloat { } #[derive(Copy, Clone, Debug, PartialEq)] -pub(crate) struct MatchedDecimal { +pub struct MatchedDecimal { is_negative: bool, digits_offset: u16, digits_length: u16, @@ -361,7 +367,7 @@ impl MatchedDecimal { } #[derive(Clone, Copy, Debug, PartialEq)] -pub(crate) enum MatchedString { +pub enum MatchedString { /// The string only has one segment. (e.g. "foo") ShortWithoutEscapes, ShortWithEscapes, @@ -383,19 +389,25 @@ impl MatchedString { // Strings longer than 64 bytes will allocate a larger space on the heap. const STACK_ALLOC_BUFFER_CAPACITY: usize = 64; - pub fn read<'data>(&self, matched_input: TextBufferView<'data>) -> IonResult> { + pub fn read<'data>( + &self, + allocator: &'data BumpAllocator, + matched_input: TextBufferView<'data>, + ) -> IonResult> { match self { MatchedString::ShortWithoutEscapes => { self.read_short_string_without_escapes(matched_input) } - MatchedString::ShortWithEscapes => self.read_short_string_with_escapes(matched_input), + MatchedString::ShortWithEscapes => { + self.read_short_string_with_escapes(allocator, matched_input) + } MatchedString::LongSingleSegmentWithoutEscapes => { self.read_long_string_single_segment_without_escapes(matched_input) } MatchedString::LongSingleSegmentWithEscapes => { - self.read_long_string_single_segment_with_escapes(matched_input) + self.read_long_string_single_segment_with_escapes(allocator, matched_input) } - MatchedString::Long => self.read_long_string(matched_input), + MatchedString::Long => self.read_long_string(allocator, matched_input), } } @@ -413,12 +425,13 @@ impl MatchedString { fn read_long_string_single_segment_with_escapes<'data>( &self, + allocator: &'data BumpAllocator, matched_input: TextBufferView<'data>, ) -> IonResult> { // Take a slice of the input that ignores the first and last three bytes, which are quotes. let body = matched_input.slice(3, matched_input.len() - 6); // There are no escaped characters, so we can just validate the string in-place. - let mut sanitized = Vec::with_capacity(matched_input.len()); + let mut sanitized = BumpVec::with_capacity_in(matched_input.len(), allocator); replace_escapes_with_byte_values( body, &mut sanitized, @@ -427,19 +440,20 @@ impl MatchedString { // Support unicode escapes true, )?; - let text = String::from_utf8(sanitized).unwrap(); - Ok(StrRef::from(text.to_string())) + let text = std::str::from_utf8(sanitized.into_bump_slice()).unwrap(); + Ok(StrRef::from(text)) } fn read_long_string<'data>( &self, + allocator: &'data BumpAllocator, matched_input: TextBufferView<'data>, ) -> IonResult> { // We're going to re-parse the input to visit each segment, copying its sanitized bytes into // a contiguous buffer. // Create a new buffer to hold the sanitized data. - let mut sanitized = Vec::with_capacity(matched_input.len()); + let mut sanitized = BumpVec::with_capacity_in(matched_input.len(), allocator); let mut remaining = matched_input; // Iterate over the string segments using the match_long_string_segment parser. @@ -460,7 +474,7 @@ impl MatchedString { true, )?; } - let text = String::from_utf8(sanitized).unwrap(); + let text = std::str::from_utf8(sanitized.into_bump_slice()).unwrap(); Ok(StrRef::from(text)) } @@ -478,13 +492,14 @@ impl MatchedString { fn read_short_string_with_escapes<'data>( &self, + allocator: &'data BumpAllocator, matched_input: TextBufferView<'data>, ) -> IonResult> { // Take a slice of the input that ignores the first and last bytes, which are quotes. let body = matched_input.slice(1, matched_input.len() - 2); // There are escaped characters. We need to build a new version of our string // that replaces the escaped characters with their corresponding bytes. - let mut sanitized = Vec::with_capacity(matched_input.len()); + let mut sanitized = BumpVec::with_capacity_in(matched_input.len(), allocator); replace_escapes_with_byte_values( body, &mut sanitized, @@ -493,14 +508,14 @@ impl MatchedString { // Support Unicode escapes true, )?; - let text = String::from_utf8(sanitized).unwrap(); - Ok(StrRef::from(text.to_string())) + let text = std::str::from_utf8(sanitized.into_bump_slice()).unwrap(); + Ok(StrRef::from(text)) } } fn replace_escapes_with_byte_values( matched_input: TextBufferView, - sanitized: &mut Vec, + sanitized: &mut BumpVec, // If the text being escaped is in a long string or a clob, then unescaped \r\n and \r get // normalized to \n. normalize_newlines: bool, @@ -552,7 +567,7 @@ fn replace_escapes_with_byte_values( #[cold] fn normalize_newline<'data>( remaining: TextBufferView<'data>, - sanitized: &mut Vec, + sanitized: &mut BumpVec, escape_offset: usize, ) -> TextBufferView<'data> { // Insert the normalized newline @@ -573,7 +588,7 @@ fn normalize_newline<'data>( /// sequence. fn decode_escape_into_bytes<'data>( input: TextBufferView<'data>, - sanitized: &mut Vec, + sanitized: &mut BumpVec, support_unicode_escapes: bool, ) -> IonResult> { // Note that by the time this method has been called, the parser has already confirmed that @@ -651,7 +666,7 @@ fn decode_escape_into_bytes<'data>( fn decode_hex_digits_escape<'data>( num_digits: usize, input: TextBufferView<'data>, - sanitized: &mut Vec, + sanitized: &mut BumpVec, support_unicode_escapes: bool, ) -> IonResult> { if input.len() < num_digits { @@ -733,7 +748,7 @@ fn decode_hex_digits_escape<'data>( /// with the specified high surrogate. Appends the UTF-8 encoding of the resulting Unicode scalar /// to `sanitized` and returns the remaining text in the buffer. fn complete_surrogate_pair<'data>( - sanitized: &mut Vec, + sanitized: &mut BumpVec, high_surrogate: u32, input: TextBufferView<'data>, ) -> IonResult> { @@ -807,7 +822,7 @@ fn code_point_is_a_high_surrogate(value: u32) -> bool { } #[derive(Clone, Copy, Debug, PartialEq)] -pub(crate) enum MatchedSymbol { +pub enum MatchedSymbol { /// A numeric symbol ID (e.g. `$21`) SymbolId, /// The symbol is an unquoted identifier (e.g. `foo`) @@ -823,13 +838,14 @@ pub(crate) enum MatchedSymbol { impl MatchedSymbol { pub fn read<'data>( &self, + allocator: &'data BumpAllocator, matched_input: TextBufferView<'data>, ) -> IonResult> { use MatchedSymbol::*; match self { SymbolId => self.read_symbol_id(matched_input), Identifier | Operator => self.read_unquoted(matched_input), - QuotedWithEscapes => self.read_quoted_with_escapes(matched_input), + QuotedWithEscapes => self.read_quoted_with_escapes(allocator, matched_input), QuotedWithoutEscapes => self.read_quoted_without_escapes(matched_input), } } @@ -841,13 +857,16 @@ impl MatchedSymbol { // Take a slice of the input that ignores the first and last bytes, which are quotes. let body = matched_input.slice(1, matched_input.len() - 2); // There are no escaped characters, so we can just validate the string in-place. - let text = body.as_text()?; - let str_ref = RawSymbolTokenRef::Text(text.into()); + let text = body + .as_text() + .expect("successfully lexed symbol later found to be invalid UTF-8"); + let str_ref = RawSymbolTokenRef::Text(text); Ok(str_ref) } pub(crate) fn read_quoted_with_escapes<'data>( &self, + allocator: &'data BumpAllocator, matched_input: TextBufferView<'data>, ) -> IonResult> { // Take a slice of the input that ignores the first and last bytes, which are quotes. @@ -855,11 +874,10 @@ impl MatchedSymbol { // There are escaped characters. We need to build a new version of our symbol // that replaces the escaped characters with their corresponding bytes. - let mut sanitized = Vec::with_capacity(matched_input.len()); - + let mut sanitized = BumpVec::with_capacity_in(matched_input.len(), allocator); replace_escapes_with_byte_values(body, &mut sanitized, false, true)?; - let text = String::from_utf8(sanitized).unwrap(); - Ok(RawSymbolTokenRef::Text(text.into())) + let text = std::str::from_utf8(sanitized.into_bump_slice()).unwrap(); + Ok(RawSymbolTokenRef::Text(text)) } /// Reads a symbol with no surrounding quotes (and therefore no escapes). @@ -868,9 +886,7 @@ impl MatchedSymbol { &self, matched_input: TextBufferView<'data>, ) -> IonResult> { - matched_input - .as_text() - .map(|t| RawSymbolTokenRef::Text(Cow::Borrowed(t))) + matched_input.as_text().map(RawSymbolTokenRef::Text) } fn read_symbol_id<'data>( @@ -1078,6 +1094,7 @@ impl MatchedBlob { pub(crate) fn read<'data>( &self, + allocator: &'data BumpAllocator, matched_input: TextBufferView<'data>, ) -> IonResult> { let base64_text = matched_input.slice(self.content_offset, self.content_length); @@ -1087,27 +1104,49 @@ impl MatchedBlob { // has inner whitespace, we need to strip it out. let contains_whitespace = matched_bytes.iter().any(|b| b.is_ascii_whitespace()); + let max_decoded_size = (matched_bytes.len() + 3) / 4 * 3; + let mut decoding_buffer = BumpVec::with_capacity_in(max_decoded_size, allocator); + + decoding_buffer.resize(max_decoded_size, 0u8); let decode_result = if contains_whitespace { // This allocates a fresh Vec to store the sanitized bytes. It could be replaced by // a reusable buffer if this proves to be a bottleneck. - let sanitized_base64_text: Vec = matched_bytes + let mut sanitized_base64_text = + BumpVec::with_capacity_in(matched_bytes.len(), allocator); + let non_whitespaces_bytes = matched_bytes .iter() .copied() - .filter(|b| !b.is_ascii_whitespace()) - .collect(); - base64::decode(sanitized_base64_text) + .filter(|b| !b.is_ascii_whitespace()); + sanitized_base64_text.extend(non_whitespaces_bytes); + base64::decode_config_slice( + sanitized_base64_text.as_bytes(), + base64::STANDARD, + decoding_buffer.as_mut_slice(), + ) } else { - base64::decode(matched_bytes) + base64::decode_config_slice( + matched_bytes, + base64::STANDARD, + decoding_buffer.as_mut_slice(), + ) }; - decode_result - .map_err(|e| { - IonError::decoding_error(format!( + let decoded_size = match decode_result { + Ok(size) => size, + Err(e) => { + return IonResult::decoding_error(format!( "failed to parse blob with invalid base64 data:\n'{:?}'\n{e:?}:", matched_input.bytes() )) - }) - .map(BytesRef::from) + } + }; + + let decoded_bytes = decoding_buffer + .into_bump_slice() + .get(..decoded_size) + .expect("decoding buffer was shorter than indicated"); + + Ok(BytesRef::from(decoded_bytes)) } } @@ -1122,6 +1161,7 @@ pub enum MatchedClob { impl MatchedClob { pub(crate) fn read<'data>( &self, + allocator: &'data BumpAllocator, matched_input: TextBufferView<'data>, ) -> IonResult> { // `matched_input` contains the entire clob, including the opening {{ and closing }}. @@ -1129,12 +1169,13 @@ impl MatchedClob { // long-form string content. let matched_inside_braces = matched_input.slice(2, matched_input.len() - 4); match self { - MatchedClob::Short => self.read_short_clob(matched_inside_braces), - MatchedClob::Long => self.read_long_clob(matched_inside_braces), + MatchedClob::Short => self.read_short_clob(allocator, matched_inside_braces), + MatchedClob::Long => self.read_long_clob(allocator, matched_inside_braces), } } fn read_short_clob<'data>( &self, + allocator: &'data BumpAllocator, matched_inside_braces: TextBufferView<'data>, ) -> IonResult> { // There can be whitespace between the leading {{ and the `"`, so we need to scan ahead @@ -1152,7 +1193,7 @@ impl MatchedClob { let (_, (body, _has_escapes)) = remaining.match_short_string_body().unwrap(); // There are escaped characters. We need to build a new version of our string // that replaces the escaped characters with their corresponding bytes. - let mut sanitized = Vec::with_capacity(body.len()); + let mut sanitized = BumpVec::with_capacity_in(body.len(), allocator); replace_escapes_with_byte_values( body, &mut sanitized, @@ -1161,17 +1202,18 @@ impl MatchedClob { // Unicode escapes are not supported false, )?; - Ok(BytesRef::from(sanitized)) + Ok(BytesRef::from(sanitized.into_bump_slice())) } fn read_long_clob<'data>( &self, + allocator: &'data BumpAllocator, matched_inside_braces: TextBufferView<'data>, ) -> IonResult> { // We're going to re-parse the input to visit each segment, copying its sanitized bytes into // a contiguous buffer. // Create a new buffer to hold the sanitized data. - let mut sanitized = Vec::with_capacity(matched_inside_braces.len()); + let mut sanitized = BumpVec::with_capacity_in(matched_inside_braces.len(), allocator); let mut remaining = matched_inside_braces; // Iterate over the string segments using the match_long_string_segment parser. @@ -1192,7 +1234,7 @@ impl MatchedClob { false, )?; } - Ok(BytesRef::from(sanitized)) + Ok(BytesRef::from(sanitized.into_bump_slice())) } } @@ -1407,7 +1449,7 @@ mod tests { let allocator = BumpAllocator::new(); let buffer = TextBufferView::new(&allocator, data.as_bytes()); let (_remaining, matched) = buffer.match_blob().unwrap(); - let actual = matched.read(buffer).unwrap(); + let actual = matched.read(&allocator, buffer).unwrap(); assert_eq!( actual, expected.as_ref(), @@ -1446,7 +1488,7 @@ mod tests { let buffer = TextBufferView::new(&allocator, data.as_bytes()); let (_remaining, matched) = buffer.match_string().unwrap(); let matched_input = buffer.slice(0, buffer.len() - 2); - let actual = matched.read(matched_input).unwrap(); + let actual = matched.read(&allocator, matched_input).unwrap(); assert_eq!( actual, expected, "Actual didn't match expected for input '{}'.\n{:?}\n!=\n{:?}", @@ -1484,7 +1526,7 @@ mod tests { // call to `match_clob()`. let (_remaining, matched) = buffer.match_clob().unwrap(); // The resulting buffer slice may be rejected during reading. - matched.read(buffer) + matched.read(allocator, buffer) } fn expect_clob_error(allocator: &BumpAllocator, data: &str) { diff --git a/src/lazy/text/raw/reader.rs b/src/lazy/text/raw/reader.rs index b8371e04..ef4e0b9e 100644 --- a/src/lazy/text/raw/reader.rs +++ b/src/lazy/text/raw/reader.rs @@ -1,14 +1,14 @@ #![allow(non_camel_case_types)] -use crate::lazy::decoder::{LazyDecoder, LazyRawReader}; + +use bumpalo::Bump as BumpAllocator; + +use crate::lazy::decoder::{LazyDecoder, LazyRawReader, RawVersionMarker}; use crate::lazy::encoding::TextEncoding_1_0; -use crate::lazy::never::Never; -use crate::lazy::raw_stream_item::{LazyRawStreamItem, RawStreamItem}; +use crate::lazy::raw_stream_item::{EndPosition, LazyRawStreamItem, RawStreamItem}; use crate::lazy::text::buffer::TextBufferView; use crate::lazy::text::parse_result::AddContext; -use crate::lazy::text::value::LazyRawTextValue_1_0; use crate::result::IonFailure; use crate::IonResult; -use bumpalo::Bump as BumpAllocator; /// A text Ion 1.0 reader that yields [`LazyRawStreamItem`]s representing the top level values found /// in the provided input stream. @@ -44,7 +44,7 @@ impl<'data> LazyRawTextReader_1_0<'data> { pub fn next<'top>( &'top mut self, allocator: &'top BumpAllocator, - ) -> IonResult, Never>> + ) -> IonResult> where 'data: 'top, { @@ -57,7 +57,9 @@ impl<'data> LazyRawTextReader_1_0<'data> { .match_optional_comments_and_whitespace() .with_context("reading whitespace/comments at the top level", input)?; if buffer_after_whitespace.is_empty() { - return Ok(RawStreamItem::EndOfStream); + return Ok(RawStreamItem::EndOfStream(EndPosition::new( + buffer_after_whitespace.offset(), + ))); } let buffer_after_whitespace = buffer_after_whitespace.local_lifespan(); @@ -65,11 +67,12 @@ impl<'data> LazyRawTextReader_1_0<'data> { .match_top_level_item_1_0() .with_context("reading a top-level value", buffer_after_whitespace)?; - if let RawStreamItem::VersionMarker(major, minor) = matched_item { + if let RawStreamItem::VersionMarker(version_marker) = matched_item { // TODO: It is not the raw reader's responsibility to report this error. It should // surface the IVM to the caller, who can then either create a different reader // for the reported version OR raise an error. // See: https://github.com/amazon-ion/ion-rust/issues/644 + let (major, minor) = version_marker.version(); if (major, minor) != (1, 0) { return IonResult::decoding_error(format!( "Ion version {major}.{minor} is not supported" @@ -109,7 +112,7 @@ impl<'data> LazyRawReader<'data, TextEncoding_1_0> for LazyRawTextReader_1_0<'da #[cfg(test)] mod tests { - use crate::lazy::decoder::{LazyRawStruct, LazyRawValue}; + use crate::lazy::decoder::{HasRange, HasSpan, LazyRawFieldName, LazyRawStruct, LazyRawValue}; use crate::lazy::raw_value_ref::RawValueRef; use crate::raw_symbol_token_ref::AsRawSymbolTokenRef; use crate::{Decimal, IonType, RawSymbolTokenRef, Timestamp}; @@ -286,7 +289,7 @@ mod tests { allocator: BumpAllocator::new(), }; - assert_eq!(reader.next()?.expect_ivm()?, (1, 0)); + assert_eq!(reader.next()?.expect_ivm()?.version(), (1, 0)); // null reader.expect_next(RawValueRef::Null(IonType::Null)); @@ -373,25 +376,19 @@ mod tests { // "\"Hello,\\\n world!\" " reader.expect_next(RawValueRef::String("Hello, world!".into())); // 'foo' - reader.expect_next(RawValueRef::Symbol(RawSymbolTokenRef::Text("foo".into()))); + reader.expect_next(RawValueRef::Symbol(RawSymbolTokenRef::Text("foo"))); reader.expect_next(RawValueRef::Symbol(RawSymbolTokenRef::Text( - "Hello, world!".into(), - ))); - reader.expect_next(RawValueRef::Symbol(RawSymbolTokenRef::Text( - "😎😎😎".into(), + "Hello, world!", ))); + reader.expect_next(RawValueRef::Symbol(RawSymbolTokenRef::Text("😎😎😎"))); // firstName - reader.expect_next(RawValueRef::Symbol(RawSymbolTokenRef::Text( - "firstName".into(), - ))); + reader.expect_next(RawValueRef::Symbol(RawSymbolTokenRef::Text("firstName"))); // date_of_birth reader.expect_next(RawValueRef::Symbol(RawSymbolTokenRef::Text( - "date_of_birth".into(), + "date_of_birth", ))); // $variable - reader.expect_next(RawValueRef::Symbol(RawSymbolTokenRef::Text( - "$variable".into(), - ))); + reader.expect_next(RawValueRef::Symbol(RawSymbolTokenRef::Text("$variable"))); // $0 reader.expect_next(RawValueRef::Symbol(RawSymbolTokenRef::SymbolId(0))); // $10 @@ -443,17 +440,17 @@ mod tests { let mut fields = strukt.iter(); sum += { let (name, value) = fields.next().unwrap()?.expect_name_value()?; - assert_eq!(name, "foo".as_raw_symbol_token_ref()); + assert_eq!(name.read()?, "foo".as_raw_symbol_token_ref()); value.read()?.expect_i64()? }; sum += { let (name, value) = fields.next().unwrap()?.expect_name_value()?; - assert_eq!(name, "bar".as_raw_symbol_token_ref()); + assert_eq!(name.read()?, "bar".as_raw_symbol_token_ref()); value.read()?.expect_i64()? }; sum += { let (name, value) = fields.next().unwrap()?.expect_name_value()?; - assert_eq!(name, "baz".as_raw_symbol_token_ref()); + assert_eq!(name.read()?, "baz".as_raw_symbol_token_ref()); value.read()?.expect_i64()? }; assert_eq!(sum, 600); @@ -461,18 +458,9 @@ mod tests { let value = reader.next()?.expect_value()?; assert_eq!(value.read()?.expect_i64()?, 42); let mut annotations = value.annotations(); - assert_eq!( - annotations.next().unwrap()?, - RawSymbolTokenRef::Text("foo".into()) - ); - assert_eq!( - annotations.next().unwrap()?, - RawSymbolTokenRef::Text("bar".into()) - ); - assert_eq!( - annotations.next().unwrap()?, - RawSymbolTokenRef::Text("baz".into()) - ); + assert_eq!(annotations.next().unwrap()?, RawSymbolTokenRef::Text("foo")); + assert_eq!(annotations.next().unwrap()?, RawSymbolTokenRef::Text("bar")); + assert_eq!(annotations.next().unwrap()?, RawSymbolTokenRef::Text("baz")); Ok(()) } diff --git a/src/lazy/text/raw/sequence.rs b/src/lazy/text/raw/sequence.rs index 38ce5013..04a3d99a 100644 --- a/src/lazy/text/raw/sequence.rs +++ b/src/lazy/text/raw/sequence.rs @@ -6,7 +6,9 @@ use std::ops::Range; use nom::character::streaming::satisfy; use crate::lazy::decoder::private::LazyContainerPrivate; -use crate::lazy::decoder::{LazyRawSequence, LazyRawValue, LazyRawValueExpr, RawValueExpr}; +use crate::lazy::decoder::{ + LazyDecoder, LazyRawContainer, LazyRawSequence, LazyRawValue, LazyRawValueExpr, RawValueExpr, +}; use crate::lazy::encoding::TextEncoding_1_0; use crate::lazy::text::buffer::TextBufferView; use crate::lazy::text::parse_result::AddContext; @@ -45,6 +47,12 @@ impl<'data> LazyContainerPrivate<'data, TextEncoding_1_0> for LazyRawTextList_1_ } } +impl<'data> LazyRawContainer<'data, TextEncoding_1_0> for LazyRawTextList_1_0<'data> { + fn as_value(&self) -> ::Value<'data> { + self.value + } +} + impl<'data> LazyRawSequence<'data, TextEncoding_1_0> for LazyRawTextList_1_0<'data> { type Iterator = RawTextListIterator_1_0<'data>; @@ -59,10 +67,6 @@ impl<'data> LazyRawSequence<'data, TextEncoding_1_0> for LazyRawTextList_1_0<'da fn iter(&self) -> Self::Iterator { LazyRawTextList_1_0::iter(self) } - - fn as_value(&self) -> LazyRawTextValue_1_0<'data> { - self.value - } } impl<'a, 'data> IntoIterator for &'a LazyRawTextList_1_0<'data> { @@ -261,6 +265,12 @@ impl<'data> LazyContainerPrivate<'data, TextEncoding_1_0> for LazyRawTextSExp_1_ } } +impl<'data> LazyRawContainer<'data, TextEncoding_1_0> for LazyRawTextSExp_1_0<'data> { + fn as_value(&self) -> ::Value<'data> { + self.value + } +} + impl<'data> LazyRawSequence<'data, TextEncoding_1_0> for LazyRawTextSExp_1_0<'data> { type Iterator = RawTextSExpIterator_1_0<'data>; @@ -275,10 +285,6 @@ impl<'data> LazyRawSequence<'data, TextEncoding_1_0> for LazyRawTextSExp_1_0<'da fn iter(&self) -> Self::Iterator { LazyRawTextSExp_1_0::iter(self) } - - fn as_value(&self) -> LazyRawTextValue_1_0<'data> { - self.value - } } impl<'a, 'data> IntoIterator for &'a LazyRawTextSExp_1_0<'data> { diff --git a/src/lazy/text/raw/struct.rs b/src/lazy/text/raw/struct.rs index e0af24fb..d1454a50 100644 --- a/src/lazy/text/raw/struct.rs +++ b/src/lazy/text/raw/struct.rs @@ -4,12 +4,15 @@ use std::ops::Range; use nom::character::streaming::satisfy; -use crate::lazy::decoder::private::{LazyContainerPrivate, LazyRawFieldPrivate}; +use crate::lazy::decoder::private::LazyContainerPrivate; use crate::lazy::decoder::{ - LazyRawField, LazyRawFieldExpr, LazyRawStruct, LazyRawValue, RawFieldExpr, RawValueExpr, + HasRange, HasSpan, LazyDecoder, LazyRawContainer, LazyRawFieldExpr, LazyRawFieldName, + LazyRawStruct, LazyRawValue, }; use crate::lazy::encoding::TextEncoding_1_0; +use crate::lazy::span::Span; use crate::lazy::text::buffer::TextBufferView; +use crate::lazy::text::matched::MatchedFieldName; use crate::lazy::text::parse_result::{AddContext, ToIteratorOutput}; use crate::lazy::text::value::{LazyRawTextValue_1_0, RawTextAnnotationsIterator}; use crate::{IonResult, RawSymbolTokenRef}; @@ -33,13 +36,7 @@ impl<'top> RawTextStructIterator_1_0<'top> { let start = self.input.offset() - 1; // We need to find the input slice containing the closing delimiter. It's either... let input_after_last = if let Some(field_result) = self.last() { - let value = match field_result? { - LazyRawFieldExpr::::NameValuePair( - _name, - RawValueExpr::ValueLiteral(value), - ) => value, - _ => unreachable!("struct field with macro invocation in Ion 1.0"), - }; + let (_name, value) = field_result?.expect_name_value()?; // ...the input slice that follows the last field... value .matched @@ -77,10 +74,7 @@ impl<'top> Iterator for RawTextStructIterator_1_0<'top> { match self.input.match_struct_field() { Ok((remaining_input, Some(field))) => { self.input = remaining_input; - Some(Ok(RawFieldExpr::NameValuePair( - field.name(), - RawValueExpr::ValueLiteral(field.value), - ))) + Some(Ok(field)) } Ok((_, None)) => None, Err(e) => { @@ -92,60 +86,32 @@ impl<'top> Iterator for RawTextStructIterator_1_0<'top> { } } -#[derive(Clone, Copy, Debug)] -pub struct LazyRawTextField_1_0<'top> { - pub(crate) value: LazyRawTextValue_1_0<'top>, +#[derive(Debug, Copy, Clone)] +pub struct LazyRawTextFieldName_1_0<'top> { + matched: MatchedFieldName<'top>, } -impl<'top> LazyRawTextField_1_0<'top> { - pub(crate) fn new(value: LazyRawTextValue_1_0<'top>) -> Self { - LazyRawTextField_1_0 { value } - } - - pub fn name(&self) -> RawSymbolTokenRef<'top> { - // We're in a struct field, the field name _must_ be populated. - // If it's not (or the field name is not a valid SID or UTF-8 string despite matching), - // that's a bug. We can safely unwrap/expect here. - let matched_symbol = self - .value - .matched - .encoded_value - .field_name_syntax() - .expect("field name syntax not available"); - let name_length = self - .value - .matched - .encoded_value - .field_name_range() - .expect("field name length not available") - .len(); - matched_symbol - .read(self.value.matched.input.slice(0, name_length)) - .expect("invalid struct field name") - } - - pub fn value(&self) -> LazyRawTextValue_1_0<'top> { - self.value - } - - pub(crate) fn into_value(self) -> LazyRawTextValue_1_0<'top> { - self.value +impl<'top> LazyRawTextFieldName_1_0<'top> { + pub(crate) fn new(matched: MatchedFieldName<'top>) -> Self { + Self { matched } } } -impl<'top> LazyRawFieldPrivate<'top, TextEncoding_1_0> for LazyRawTextField_1_0<'top> { - fn into_value(self) -> LazyRawTextValue_1_0<'top> { - self.value +impl<'top> HasSpan<'top> for LazyRawTextFieldName_1_0<'top> { + fn span(&self) -> Span<'top> { + self.matched.span() } } -impl<'top> LazyRawField<'top, TextEncoding_1_0> for LazyRawTextField_1_0<'top> { - fn name(&self) -> RawSymbolTokenRef<'top> { - LazyRawTextField_1_0::name(self) +impl<'top> HasRange for LazyRawTextFieldName_1_0<'top> { + fn range(&self) -> Range { + self.matched.range() } +} - fn value(&self) -> LazyRawTextValue_1_0<'top> { - self.value() +impl<'top> LazyRawFieldName<'top> for LazyRawTextFieldName_1_0<'top> { + fn read(&self) -> IonResult> { + self.matched.read() } } @@ -160,6 +126,12 @@ impl<'top> LazyContainerPrivate<'top, TextEncoding_1_0> for LazyRawTextStruct_1_ } } +impl<'top> LazyRawContainer<'top, TextEncoding_1_0> for LazyRawTextStruct_1_0<'top> { + fn as_value(&self) -> ::Value<'top> { + self.value + } +} + impl<'top> LazyRawStruct<'top, TextEncoding_1_0> for LazyRawTextStruct_1_0<'top> { type Iterator = RawTextStructIterator_1_0<'top>; @@ -188,9 +160,11 @@ impl<'top> IntoIterator for LazyRawTextStruct_1_0<'top> { mod tests { use std::ops::Range; + use bumpalo::Bump as BumpAllocator; + + use crate::lazy::decoder::{HasRange, HasSpan, LazyRawStruct, LazyRawValue}; use crate::lazy::text::raw::reader::LazyRawTextReader_1_0; use crate::IonResult; - use bumpalo::Bump as BumpAllocator; fn expect_struct_range(ion_data: &str, expected: Range) -> IonResult<()> { let allocator = BumpAllocator::new(); @@ -208,8 +182,8 @@ mod tests { #[test] fn struct_range() -> IonResult<()> { - // For each pair below, we'll confirm that the top-level list is found to - // occupy the specified input span. + // For each pair below, we'll confirm that the top-level struct is found to + // occupy the specified input range. let tests = &[ // (Ion input, expected range of the struct) ("{}", 0..2), @@ -228,4 +202,68 @@ mod tests { } Ok(()) } + + #[test] + // Clippy thinks a slice with a single range inside is likely to be a mistake, but in this + // test it's intentional. + #[allow(clippy::single_range_in_vec_init)] + fn field_name_ranges() -> IonResult<()> { + // For each pair below, we'll confirm that the top-level struct's field names are found to + // occupy the specified input ranges. + type FieldNameAndRange<'a> = (&'a str, Range); + type FieldTest<'a> = (&'a str, &'a [FieldNameAndRange<'a>]); + let tests: &[FieldTest] = &[ + // (Ion input, expected ranges of the struct's field names) + ("{a:1}", &[("a", 1..2)]), + ("{a: 1}", &[("a", 1..2)]), + ("{a: 1, b: 2}", &[("a", 1..2), ("b", 7..8)]), + ( + "{a: 1, /* comment }}} */ b: 2}", + &[("a", 1..2), ("b", 25..26)], + ), + ("{ a: /* comment */ 1, b: 2}", &[("a", 2..3), ("b", 22..23)]), + ( + "{a: 1, b: 2, c: {d: 3, e: 4, f: 5}, g: 6}", + &[ + ("a", 1..2), + ("b", 7..8), + ("c", 13..14), + //...nested fields... + ("g", 36..37), + ], + ), + ]; + for (input, field_name_ranges) in tests { + let bump = bumpalo::Bump::new(); + let mut reader = LazyRawTextReader_1_0::new(input.as_bytes()); + let struct_ = reader + .next(&bump)? + .expect_value()? + .read()? + .expect_struct()?; + for (field_result, (expected_name, expected_range)) in + struct_.iter().zip(field_name_ranges.iter()) + { + let field_name = field_result?.name(); + assert_eq!( + field_name.span(), + expected_name.as_bytes(), + "span failure for input {input} -> field {expected_name}" + ); + assert_eq!( + field_name.range(), + *expected_range, + "range failure for input {input} -> field {expected_name}" + ); + println!( + "SUCCESS: input {} -> field {} -> {} ({:?})", + input, + expected_name, + field_name.span().expect_text()?, + field_name.range() + ); + } + } + Ok(()) + } } diff --git a/src/lazy/text/raw/v1_1/reader.rs b/src/lazy/text/raw/v1_1/reader.rs index b5d28e81..2d45733d 100644 --- a/src/lazy/text/raw/v1_1/reader.rs +++ b/src/lazy/text/raw/v1_1/reader.rs @@ -8,19 +8,21 @@ use nom::character::streaming::satisfy; use crate::lazy::decoder::private::LazyContainerPrivate; use crate::lazy::decoder::{ - LazyDecoder, LazyRawFieldExpr, LazyRawReader, LazyRawSequence, LazyRawStruct, LazyRawValue, - LazyRawValueExpr, RawFieldExpr, RawValueExpr, + HasRange, HasSpan, LazyDecoder, LazyRawContainer, LazyRawFieldExpr, LazyRawFieldName, + LazyRawReader, LazyRawSequence, LazyRawStruct, LazyRawValue, LazyRawValueExpr, RawValueExpr, + RawVersionMarker, }; use crate::lazy::encoding::TextEncoding_1_1; -use crate::lazy::raw_stream_item::{LazyRawStreamItem, RawStreamItem}; +use crate::lazy::raw_stream_item::{EndPosition, LazyRawStreamItem, RawStreamItem}; use crate::lazy::text::buffer::TextBufferView; use crate::lazy::text::parse_result::{AddContext, ToIteratorOutput}; use crate::lazy::text::value::{LazyRawTextValue_1_1, RawTextAnnotationsIterator}; use crate::result::IonFailure; -use crate::{IonResult, IonType}; +use crate::{IonResult, IonType, RawSymbolTokenRef}; use crate::lazy::expanded::macro_evaluator::RawEExpression; -use crate::lazy::text::matched::MatchedValue; +use crate::lazy::span::Span; +use crate::lazy::text::matched::{MatchedFieldName, MatchedValue}; use bumpalo::collections::Vec as BumpVec; use bumpalo::Bump as BumpAllocator; @@ -74,6 +76,18 @@ pub struct RawTextEExpression_1_1<'top> { pub(crate) arg_expr_cache: &'top [LazyRawValueExpr<'top, TextEncoding_1_1>], } +impl<'top> HasSpan<'top> for RawTextEExpression_1_1<'top> { + fn span(&self) -> Span<'top> { + Span::with_offset(self.input.offset(), self.input.bytes()) + } +} + +impl<'top> HasRange for RawTextEExpression_1_1<'top> { + fn range(&self) -> Range { + self.input.range() + } +} + impl<'top> RawEExpression<'top, TextEncoding_1_1> for RawTextEExpression_1_1<'top> { type RawArgumentsIterator<'a> = RawTextSequenceCacheIterator_1_1<'top> where Self: 'a; @@ -153,18 +167,21 @@ impl<'data> LazyRawReader<'data, TextEncoding_1_1> for LazyRawTextReader_1_1<'da .match_optional_comments_and_whitespace() .with_context("reading v1.1 whitespace/comments at the top level", input)?; if buffer_after_whitespace.is_empty() { - return Ok(RawStreamItem::EndOfStream); + return Ok(RawStreamItem::EndOfStream(EndPosition::new( + buffer_after_whitespace.offset(), + ))); } let (remaining, matched_item) = buffer_after_whitespace .match_top_level_item_1_1() .with_context("reading a v1.1 top-level value", buffer_after_whitespace)?; - if let RawStreamItem::VersionMarker(major, minor) = matched_item { + if let RawStreamItem::VersionMarker(marker) = matched_item { // TODO: It is not the raw reader's responsibility to report this error. It should // surface the IVM to the caller, who can then either create a different reader // for the reported version OR raise an error. // See: https://github.com/amazon-ion/ion-rust/issues/644 + let (major, minor) = marker.version(); if (major, minor) != (1, 1) { return IonResult::decoding_error(format!( "Ion version {major}.{minor} is not supported" @@ -420,6 +437,12 @@ impl<'top> LazyContainerPrivate<'top, TextEncoding_1_1> for LazyRawTextSExp_1_1< } } +impl<'top> LazyRawContainer<'top, TextEncoding_1_1> for LazyRawTextSExp_1_1<'top> { + fn as_value(&self) -> ::Value<'top> { + self.value.matched.into() + } +} + impl<'top> LazyRawSequence<'top, TextEncoding_1_1> for LazyRawTextSExp_1_1<'top> { type Iterator = RawTextSequenceCacheIterator_1_1<'top>; @@ -437,10 +460,6 @@ impl<'top> LazyRawSequence<'top, TextEncoding_1_1> for LazyRawTextSExp_1_1<'top> }; RawTextSequenceCacheIterator_1_1::new(child_exprs) } - - fn as_value(&self) -> LazyRawTextValue_1_1<'top> { - self.value.matched.into() - } } impl<'top> Iterator for RawTextSExpIterator_1_1<'top> { @@ -465,6 +484,35 @@ impl<'top> Iterator for RawTextSExpIterator_1_1<'top> { } } +#[derive(Debug, Copy, Clone)] +pub struct LazyRawTextFieldName_1_1<'top> { + matched: MatchedFieldName<'top>, +} + +impl<'top> LazyRawTextFieldName_1_1<'top> { + pub(crate) fn new(matched: MatchedFieldName<'top>) -> Self { + Self { matched } + } +} + +impl<'top> HasSpan<'top> for LazyRawTextFieldName_1_1<'top> { + fn span(&self) -> Span<'top> { + self.matched.span() + } +} + +impl<'top> HasRange for LazyRawTextFieldName_1_1<'top> { + fn range(&self) -> Range { + self.matched.range() + } +} + +impl<'top> LazyRawFieldName<'top> for LazyRawTextFieldName_1_1<'top> { + fn read(&self) -> IonResult> { + self.matched.read() + } +} + #[derive(Copy, Clone)] pub struct LazyRawTextStruct_1_1<'top> { pub(crate) value: LazyRawTextValue_1_1<'top>, @@ -473,18 +521,18 @@ pub struct LazyRawTextStruct_1_1<'top> { impl<'a> Debug for LazyRawTextStruct_1_1<'a> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!(f, "{{")?; - for field in self.iter() { - match field? { - LazyRawFieldExpr::::NameValuePair( - name, - RawValueExpr::ValueLiteral(value), - ) => write!(f, "{name:?}: {value:?}, "), - LazyRawFieldExpr::::NameValuePair( - name, - RawValueExpr::MacroInvocation(invocation), - ) => write!(f, "{name:?}: {invocation:?}, "), - LazyRawFieldExpr::::MacroInvocation(invocation) => { - write!(f, "{invocation:?}, ") + for field_result in self.iter() { + let field = field_result?; + use LazyRawFieldExpr::*; + match field { + NameValue(name, value) => { + write!(f, "{name:?}: {value:?}") + } + NameEExp(name, eexp) => { + write!(f, "{name:?}: {eexp:?}") + } + EExp(eexp) => { + write!(f, "{eexp:?}") } }?; } @@ -530,8 +578,8 @@ impl<'top> Iterator for RawTextStructCacheIterator_1_1<'top> { fn next(&mut self) -> Option { let next_expr = self.field_exprs.get(self.index)?; self.index += 1; - // TODO: Remove the result wrapper - Some(Ok(next_expr.clone())) + // TODO: Remove the result wrapper because these values are already in the cache + Some(Ok(*next_expr)) } } @@ -543,6 +591,12 @@ impl<'top> LazyContainerPrivate<'top, TextEncoding_1_1> for LazyRawTextList_1_1< } } +impl<'top> LazyRawContainer<'top, TextEncoding_1_1> for LazyRawTextList_1_1<'top> { + fn as_value(&self) -> LazyRawTextValue_1_1<'top> { + self.value.matched.into() + } +} + impl<'top> LazyRawSequence<'top, TextEncoding_1_1> for LazyRawTextList_1_1<'top> { type Iterator = RawTextSequenceCacheIterator_1_1<'top>; @@ -560,10 +614,6 @@ impl<'top> LazyRawSequence<'top, TextEncoding_1_1> for LazyRawTextList_1_1<'top> }; RawTextSequenceCacheIterator_1_1::new(child_exprs) } - - fn as_value(&self) -> LazyRawTextValue_1_1<'top> { - self.value.matched.into() - } } impl<'top> Iterator for RawTextListIterator_1_1<'top> { @@ -597,6 +647,12 @@ impl<'top> LazyContainerPrivate<'top, TextEncoding_1_1> for LazyRawTextStruct_1_ } } +impl<'top> LazyRawContainer<'top, TextEncoding_1_1> for LazyRawTextStruct_1_1<'top> { + fn as_value(&self) -> ::Value<'top> { + self.value + } +} + impl<'top> LazyRawStruct<'top, TextEncoding_1_1> for LazyRawTextStruct_1_1<'top> { type Iterator = RawTextStructCacheIterator_1_1<'top>; @@ -634,50 +690,6 @@ impl<'top> Iterator for RawTextStructIterator_1_1<'top> { } } -impl<'top> RawTextStructIterator_1_1<'top> { - // TODO: DRY with RawTextStructIterator_1_0 - pub(crate) fn find_span(&self) -> IonResult> { - // The input has already skipped past the opening delimiter. - let start = self.input.offset() - 1; - // We need to find the input slice containing the closing delimiter. - let input_after_last = if let Some(field_result) = self.last() { - // If there are any field expressions, we need to isolate the input slice that follows - // the last one. - use RawFieldExpr::*; - match field_result? { - // foo: bar - NameValuePair(_name, RawValueExpr::ValueLiteral(value)) => { - value.matched.input.slice_to_end(value.matched.encoded_value.total_length()) - }, - // foo: (:bar ...) - NameValuePair(_, RawValueExpr::MacroInvocation(invocation)) - // (:foo) - | MacroInvocation(invocation) => { - self.input.slice_to_end(invocation.input.len()) - } - } - } else { - // ...or there aren't fields, so it's just the input after the opening delimiter. - self.input - }; - let (mut input_after_ws, _ws) = - input_after_last - .match_optional_comments_and_whitespace() - .with_context("seeking the end of a struct", input_after_last)?; - // Skip an optional comma and more whitespace - if input_after_ws.bytes().first() == Some(&b',') { - (input_after_ws, _) = input_after_ws - .slice_to_end(1) - .match_optional_comments_and_whitespace() - .with_context("skipping a list's trailing comma", input_after_ws)?; - } - let (input_after_end, _end_delimiter) = satisfy(|c| c == b'}' as char)(input_after_ws) - .with_context("seeking the closing delimiter of a struct", input_after_ws)?; - let end = input_after_end.offset(); - Ok(start..end) - } -} - /// Wraps a [`RawTextStructIterator_1_1`] (which parses the body of a struct) and caches the field /// expressions the iterator yields along the way. Finally, returns a `Range` representing /// the span of input bytes that the struct occupies. @@ -711,20 +723,17 @@ impl<'top> TextStructSpanFinder_1_1<'top> { } // We need to find the input slice containing the closing delimiter. - let input_after_last = if let Some(field_result) = child_expr_cache.last() { + let input_after_last = if let Some(field) = child_expr_cache.last() { // If there are any field expressions, we need to isolate the input slice that follows // the last one. - use RawFieldExpr::*; - match field_result { - // foo: bar - NameValuePair(_name, RawValueExpr::ValueLiteral(value)) => { - value.matched.input.slice_to_end(value.matched.encoded_value.total_length()) - }, - // foo: (:bar ...) - NameValuePair(_, RawValueExpr::MacroInvocation(invocation)) - // (:foo) - | MacroInvocation(invocation) => { - self.iterator.input.slice_to_end(invocation.input.len()) + use LazyRawFieldExpr::*; + match field { + NameValue(_, value) => value + .matched + .input + .slice_to_end(value.matched.encoded_value.total_length()), + NameEExp(_, eexp) | EExp(eexp) => { + self.iterator.input.slice_to_end(eexp.input.len()) } } } else { @@ -788,7 +797,7 @@ mod tests { let reader = &mut LazyRawTextReader_1_1::new(data.as_bytes()); // $ion_1_1 - assert_eq!(reader.next(&allocator)?.expect_ivm()?, (1, 1)); + assert_eq!(reader.next(&allocator)?.expect_ivm()?.version(), (1, 1)); // "foo" expect_next(&allocator, reader, RawValueRef::String("foo".into())); // bar diff --git a/src/lazy/text/value.rs b/src/lazy/text/value.rs index 2bd770de..d0b12d16 100644 --- a/src/lazy/text/value.rs +++ b/src/lazy/text/value.rs @@ -2,26 +2,27 @@ use std::fmt; use std::fmt::{Debug, Formatter}; +use std::marker::PhantomData; use std::ops::Range; -use crate::lazy::decoder::private::{LazyContainerPrivate, LazyRawValuePrivate}; -use crate::lazy::decoder::{LazyDecoder, LazyRawValue}; +use crate::lazy::decoder::private::LazyContainerPrivate; +use crate::lazy::decoder::{HasRange, HasSpan, LazyDecoder, LazyRawValue, RawVersionMarker}; use crate::lazy::encoding::{TextEncoding, TextEncoding_1_0, TextEncoding_1_1}; use crate::lazy::raw_value_ref::RawValueRef; +use crate::lazy::span::Span; use crate::lazy::text::buffer::TextBufferView; use crate::lazy::text::encoded_value::EncodedTextValue; use crate::{IonResult, IonType, RawSymbolTokenRef}; /// A value that has been identified in the text input stream but whose data has not yet been read. /// -/// If only part of the value is in the input buffer, calls to [`MatchedRawTextValue::read`] (which examines -/// bytes beyond the value's header) may return [`IonError::Incomplete`](crate::result::IonError::Incomplete). -/// /// `LazyRawTextValue`s are "unresolved," which is to say that symbol values, annotations, and /// struct field names may or may not include a text definition. (This is less common in Ion's text /// format than in its binary format, but is still possible.) For a resolved lazy value that /// includes a text definition for these items whenever one exists, see /// [`crate::lazy::value::LazyValue`]. +// This type is version agnostic, and is wrapped by the LazyRawValue implementations for all +// existing encodings. #[derive(Copy, Clone)] pub struct MatchedRawTextValue<'top, E: TextEncoding<'top>> { pub(crate) encoded_value: EncodedTextValue<'top, E>, @@ -42,7 +43,7 @@ impl<'top, E: TextEncoding<'top>> Debug for MatchedRawTextValue<'top, E> { // // These types provide Ion-version-specific impls of the LazyRawValue trait #[derive(Copy, Clone)] -pub struct LazyRawTextValue<'top, E: TextEncoding<'top> + Copy> { +pub struct LazyRawTextValue<'top, E: TextEncoding<'top>> { pub(crate) matched: MatchedRawTextValue<'top, E>, } @@ -52,6 +53,53 @@ impl<'top, E: TextEncoding<'top>> LazyRawTextValue<'top, E> { } } +#[derive(Debug, Copy, Clone)] +pub struct LazyRawTextVersionMarker<'top, E: TextEncoding<'top>> { + major: u8, + minor: u8, + input: TextBufferView<'top>, + // We need distinct version marker types for 1.0 and 1.1 even though the data/logic is the same. + // This allows us to implement a `From for LazyRawAnyVersionMarker` + // unambiguously for the two encodings. + spooky: PhantomData, +} + +impl<'top, E: TextEncoding<'top>> LazyRawTextVersionMarker<'top, E> { + pub fn new( + input: TextBufferView<'top>, + major: u8, + minor: u8, + ) -> LazyRawTextVersionMarker<'top, E> { + Self { + major, + minor, + input, + spooky: PhantomData, + } + } +} + +pub type LazyRawTextVersionMarker_1_0<'top> = LazyRawTextVersionMarker<'top, TextEncoding_1_0>; +pub type LazyRawTextVersionMarker_1_1<'top> = LazyRawTextVersionMarker<'top, TextEncoding_1_1>; + +impl<'top, E: TextEncoding<'top>> HasSpan<'top> for LazyRawTextVersionMarker<'top, E> { + fn span(&self) -> Span<'top> { + Span::with_offset(self.input.offset(), self.input.bytes()) + } +} + +impl<'top, E: TextEncoding<'top>> HasRange for LazyRawTextVersionMarker<'top, E> { + fn range(&self) -> Range { + self.input.range() + } +} + +impl<'top, E: TextEncoding<'top>> RawVersionMarker<'top> for LazyRawTextVersionMarker<'top, E> { + fn version(&self) -> (u8, u8) { + (self.major, self.minor) + } +} + pub type LazyRawTextValue_1_0<'top> = LazyRawTextValue<'top, TextEncoding_1_0>; pub type LazyRawTextValue_1_1<'top> = LazyRawTextValue<'top, TextEncoding_1_1>; @@ -81,18 +129,26 @@ impl<'top> From> for LazyRawTextValu } } -impl<'top, E: TextEncoding<'top>> LazyRawValuePrivate<'top> for MatchedRawTextValue<'top, E> { - // TODO: We likely want to move this functionality to the Ion-version-specific LazyDecoder::Field - // implementations. See: https://github.com/amazon-ion/ion-rust/issues/631 - fn field_name(&self) -> IonResult> { - self.encoded_value.field_name(self.input) - } -} - // ===== Ion-version-agnostic functionality ===== // // These trait impls are common to all Ion versions, but require the caller to specify a type // parameter. + +impl<'top, E: TextEncoding<'top>> HasRange for MatchedRawTextValue<'top, E> { + fn range(&self) -> Range { + self.encoded_value.annotated_value_range() + } +} + +impl<'top, E: TextEncoding<'top>> HasSpan<'top> for MatchedRawTextValue<'top, E> { + fn span(&self) -> Span<'top> { + let range = self.range(); + let input_offset = self.input.offset(); + let local_range = (range.start - input_offset)..(range.end - input_offset); + Span::with_offset(range.start, &self.input.bytes()[local_range]) + } +} + impl<'top, E: TextEncoding<'top>> LazyRawValue<'top, E> for MatchedRawTextValue<'top, E> { fn ion_type(&self) -> IonType { self.encoded_value.ion_type() @@ -119,6 +175,8 @@ impl<'top, E: TextEncoding<'top>> LazyRawValue<'top, E> for MatchedRawTextValue< self.encoded_value.data_length(), ); + let allocator = self.input.allocator; + use crate::lazy::text::matched::MatchedValue::*; let value_ref = match self.encoded_value.matched() { Null(ion_type) => RawValueRef::Null(ion_type), @@ -127,32 +185,27 @@ impl<'top, E: TextEncoding<'top>> LazyRawValue<'top, E> for MatchedRawTextValue< Float(f) => RawValueRef::Float(f.read(matched_input)?), Decimal(d) => RawValueRef::Decimal(d.read(matched_input)?), Timestamp(t) => RawValueRef::Timestamp(t.read(matched_input)?), - String(s) => RawValueRef::String(s.read(matched_input)?), - Symbol(s) => RawValueRef::Symbol(s.read(matched_input)?), - Blob(b) => RawValueRef::Blob(b.read(matched_input)?), - Clob(c) => RawValueRef::Clob(c.read(matched_input)?), + String(s) => RawValueRef::String(s.read(allocator, matched_input)?), + Symbol(s) => RawValueRef::Symbol(s.read(allocator, matched_input)?), + Blob(b) => RawValueRef::Blob(b.read(allocator, matched_input)?), + Clob(c) => RawValueRef::Clob(c.read(allocator, matched_input)?), List(_) => RawValueRef::List(E::List::<'top>::from_value(E::value_from_matched(*self))), SExp(_) => RawValueRef::SExp(E::SExp::<'top>::from_value(E::value_from_matched(*self))), Struct(_) => RawValueRef::Struct(E::Struct::from_value(E::value_from_matched(*self))), }; Ok(value_ref) } +} +impl<'top, E: TextEncoding<'top>> HasRange for LazyRawTextValue<'top, E> { fn range(&self) -> Range { - self.encoded_value.annotated_value_range() - } - - fn span(&self) -> &[u8] { - let range = self.range(); - let input_offset = self.input.offset(); - let local_range = (range.start - input_offset)..(range.end - input_offset); - &self.input.bytes()[local_range] + self.matched.range() } } -impl<'top, E: TextEncoding<'top>> LazyRawValuePrivate<'top> for LazyRawTextValue<'top, E> { - fn field_name(&self) -> IonResult> { - self.matched.field_name() +impl<'top, E: TextEncoding<'top>> HasSpan<'top> for LazyRawTextValue<'top, E> { + fn span(&self) -> Span<'top> { + self.matched.span() } } @@ -172,14 +225,6 @@ impl<'top, E: TextEncoding<'top>> LazyRawValue<'top, E> for LazyRawTextValue<'to fn read(&self) -> IonResult> { self.matched.read() } - - fn range(&self) -> Range { - self.matched.range() - } - - fn span(&self) -> &[u8] { - self.matched.span() - } } pub struct RawTextAnnotationsIterator<'data> { @@ -213,7 +258,7 @@ impl<'top> Iterator for RawTextAnnotationsIterator<'top> { let matched_input = self .input .slice(span.start - self.input.offset(), span.len()); - let text = match symbol.read(matched_input) { + let text = match symbol.read(self.input.allocator, matched_input) { Ok(text) => text, Err(e) => { self.has_returned_error = true; @@ -239,9 +284,9 @@ mod tests { let allocator = BumpAllocator::new(); let input = TextBufferView::new(&allocator, input.as_bytes()); let mut iter = RawTextAnnotationsIterator::new(input); - assert_eq!(iter.next().unwrap()?, RawSymbolTokenRef::Text("foo".into())); - assert_eq!(iter.next().unwrap()?, RawSymbolTokenRef::Text("bar".into())); - assert_eq!(iter.next().unwrap()?, RawSymbolTokenRef::Text("baz".into())); + assert_eq!(iter.next().unwrap()?, RawSymbolTokenRef::Text("foo")); + assert_eq!(iter.next().unwrap()?, RawSymbolTokenRef::Text("bar")); + assert_eq!(iter.next().unwrap()?, RawSymbolTokenRef::Text("baz")); Ok(()) } test("foo::bar::baz::")?; diff --git a/src/lazy/value.rs b/src/lazy/value.rs index d84989dd..34daebd9 100644 --- a/src/lazy/value.rs +++ b/src/lazy/value.rs @@ -1,5 +1,3 @@ -use std::borrow::Cow; - use crate::lazy::decoder::LazyDecoder; use crate::lazy::encoding::BinaryEncoding_1_0; use crate::lazy::expanded::{ExpandedAnnotationsIterator, ExpandedValueRef, LazyExpandedValue}; @@ -53,7 +51,7 @@ use crate::{ ///# Ok(()) ///# } /// ``` -#[derive(Clone)] +#[derive(Copy, Clone)] pub struct LazyValue<'top, D: LazyDecoder> { pub(crate) expanded_value: LazyExpandedValue<'top, D>, } @@ -95,6 +93,22 @@ impl<'top, D: LazyDecoder> LazyValue<'top, D> { self.expanded_value.ion_type() } + pub fn is_container(&self) -> bool { + matches!( + self.expanded_value.ion_type(), + IonType::List | IonType::SExp | IonType::Struct + ) + } + + pub fn is_scalar(&self) -> bool { + !self.is_container() + } + + // TODO: Feature gate + pub fn lower(&self) -> LazyExpandedValue<'top, D> { + self.expanded_value + } + /// Returns `true` if this value is any form of `null`, including /// `null`, `null.string`, `null.int`, etc. Otherwise, returns `false`. /// @@ -160,6 +174,10 @@ impl<'top, D: LazyDecoder> LazyValue<'top, D> { } } + pub fn has_annotations(&self) -> bool { + self.expanded_value.annotations().next().is_some() + } + /// Reads the body of this value (that is: its data) and returns it as a [`ValueRef`]. /// ``` ///# use ion_rs::IonResult; @@ -212,8 +230,7 @@ impl<'top, D: LazyDecoder> LazyValue<'top, D> { )) })? .into(), - RawSymbolTokenRef::Text(Cow::Borrowed(text)) => text.into(), - RawSymbolTokenRef::Text(Cow::Owned(text)) => text.into(), + RawSymbolTokenRef::Text(text) => text.into(), }; ValueRef::Symbol(symbol) } diff --git a/src/lazy/value_ref.rs b/src/lazy/value_ref.rs index b9d787ac..a636c8fe 100644 --- a/src/lazy/value_ref.rs +++ b/src/lazy/value_ref.rs @@ -45,6 +45,8 @@ impl<'top, D: LazyDecoder> PartialEq for ValueRef<'top, D> { (Symbol(s1), Symbol(s2)) => s1 == s2, (Blob(b1), Blob(b2)) => b1 == b2, (Clob(c1), Clob(c2)) => c1 == c2, + // TODO: The following is no longer true; should we finish implementing PartialEq for + // container types? https://github.com/amazon-ion/ion-rust/issues/761 // We cannot compare lazy containers as we cannot guarantee that their complete contents // are available in the buffer. Is `{foo: bar}` equal to `{foo: b`? _ => false, diff --git a/src/lib.rs b/src/lib.rs index e57559c0..80dc714d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -140,10 +140,7 @@ use rstest_reuse; // These re-exports are only visible if the "experimental-reader" feature is enabled. #[cfg(feature = "experimental-reader")] -pub use { - raw_symbol_token::RawSymbolToken, raw_symbol_token_ref::RawSymbolTokenRef, - symbol_table::SymbolTable, -}; +pub use {raw_symbol_token_ref::RawSymbolTokenRef, symbol_table::SymbolTable}; // Exposed to allow benchmark comparisons between the 1.0 primitives and 1.1 primitives pub use catalog::{Catalog, MapCatalog}; pub use element::builders::{SequenceBuilder, StructBuilder}; @@ -177,12 +174,13 @@ pub use types::decimal; #[cfg(feature = "experimental-lazy-reader")] pub use write_config::WriteConfig; +pub use crate::text::text_formatter::{IoFmtShim, IonValueFormatter}; + // Private modules that serve to organize implementation details. pub(crate) mod binary; mod catalog; mod constants; mod ion_data; -mod raw_symbol_token; mod raw_symbol_token_ref; mod shared_symbol_table; mod symbol_ref; diff --git a/src/raw_symbol_token.rs b/src/raw_symbol_token.rs deleted file mode 100644 index f30022c7..00000000 --- a/src/raw_symbol_token.rs +++ /dev/null @@ -1,76 +0,0 @@ -use crate::SymbolId; - -/// A symbol token encountered in a text or binary Ion stream. -/// [RawSymbolToken]s do not store import source information for the token encountered. Similarly, -/// a [RawSymbolToken] cannot store both a symbol ID _and_ text, which means that it is not suitable -/// for representing a resolved symbol. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum RawSymbolToken { - SymbolId(SymbolId), - Text(String), -} - -impl RawSymbolToken { - pub fn matches(&self, sid: SymbolId, text: &str) -> bool { - match self { - RawSymbolToken::SymbolId(s) if *s == sid => true, - RawSymbolToken::Text(t) if t == text => true, - _ => false, - } - } - - pub fn local_sid(&self) -> Option { - match self { - RawSymbolToken::SymbolId(s) => Some(*s), - RawSymbolToken::Text(_t) => None, - } - } - - pub fn text(&self) -> Option<&str> { - match self { - RawSymbolToken::SymbolId(_s) => None, - RawSymbolToken::Text(t) => Some(t.as_str()), - } - } -} - -/// Constructs a [`RawSymbolToken`] with unknown text and a local ID. -/// A common case for binary parsing (though technically relevant in text). -#[inline] -pub fn local_sid_token(local_sid: SymbolId) -> RawSymbolToken { - RawSymbolToken::SymbolId(local_sid) -} - -/// Constructs an [`RawSymbolToken`] with just text. -/// A common case for text and synthesizing tokens. -#[inline] -pub fn text_token>(text: T) -> RawSymbolToken { - RawSymbolToken::Text(text.into()) -} - -impl From for RawSymbolToken { - fn from(symbol_id: SymbolId) -> Self { - RawSymbolToken::SymbolId(symbol_id) - } -} - -impl From for RawSymbolToken { - fn from(text: String) -> Self { - RawSymbolToken::Text(text) - } -} - -impl From<&str> for RawSymbolToken { - fn from(text: &str) -> Self { - RawSymbolToken::Text(text.to_string()) - } -} - -impl From<&T> for RawSymbolToken -where - T: Clone + Into, -{ - fn from(value: &T) -> Self { - value.clone().into() - } -} diff --git a/src/raw_symbol_token_ref.rs b/src/raw_symbol_token_ref.rs index d8bb55f5..1fa817d4 100644 --- a/src/raw_symbol_token_ref.rs +++ b/src/raw_symbol_token_ref.rs @@ -1,13 +1,10 @@ -use crate::raw_symbol_token::RawSymbolToken; -use crate::types::symbol::SymbolText; -use crate::{Symbol, SymbolId}; -use std::borrow::Cow; +use crate::{Symbol, SymbolId, SymbolRef}; /// Like RawSymbolToken, but the Text variant holds a borrowed reference instead of a String. -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum RawSymbolTokenRef<'a> { SymbolId(SymbolId), - Text(Cow<'a, str>), + Text(&'a str), } impl<'a> RawSymbolTokenRef<'a> { @@ -17,7 +14,7 @@ impl<'a> RawSymbolTokenRef<'a> { pub fn matches_sid_or_text(&self, symbol_id: SymbolId, symbol_text: &str) -> bool { match self { RawSymbolTokenRef::SymbolId(sid) => symbol_id == *sid, - RawSymbolTokenRef::Text(text) => symbol_text == text, + RawSymbolTokenRef::Text(text) => symbol_text == *text, } } } @@ -29,7 +26,7 @@ pub trait AsRawSymbolTokenRef { impl<'a> AsRawSymbolTokenRef for RawSymbolTokenRef<'a> { fn as_raw_symbol_token_ref(&self) -> RawSymbolTokenRef { - self.clone() + *self } } @@ -39,22 +36,16 @@ impl AsRawSymbolTokenRef for SymbolId { } } -impl AsRawSymbolTokenRef for String { - fn as_raw_symbol_token_ref(&self) -> RawSymbolTokenRef { - RawSymbolTokenRef::Text(Cow::from(self.as_str())) - } -} - impl AsRawSymbolTokenRef for &str { fn as_raw_symbol_token_ref(&self) -> RawSymbolTokenRef { - RawSymbolTokenRef::Text(Cow::from(*self)) + RawSymbolTokenRef::Text(self) } } impl AsRawSymbolTokenRef for Symbol { fn as_raw_symbol_token_ref(&self) -> RawSymbolTokenRef { match self.text() { - Some(text) => RawSymbolTokenRef::Text(Cow::from(text)), + Some(text) => RawSymbolTokenRef::Text(text), None => RawSymbolTokenRef::SymbolId(0), } } @@ -69,45 +60,21 @@ where } } -impl AsRawSymbolTokenRef for RawSymbolToken { - fn as_raw_symbol_token_ref(&self) -> RawSymbolTokenRef { - match self { - RawSymbolToken::SymbolId(sid) => RawSymbolTokenRef::SymbolId(*sid), - RawSymbolToken::Text(text) => RawSymbolTokenRef::Text(Cow::from(text.as_str())), - } - } -} - -impl<'a> From for RawSymbolTokenRef<'a> { - fn from(value: RawSymbolToken) -> Self { - match value { - RawSymbolToken::SymbolId(sid) => RawSymbolTokenRef::SymbolId(sid), - RawSymbolToken::Text(text) => RawSymbolTokenRef::Text(text.into()), - } - } -} - -impl<'a> From<&'a RawSymbolToken> for RawSymbolTokenRef<'a> { - fn from(value: &'a RawSymbolToken) -> Self { - value.as_raw_symbol_token_ref() - } -} - -impl<'a, 'b> From<&'a RawSymbolTokenRef<'b>> for RawSymbolTokenRef<'b> { +impl<'a, 'b> From<&'a RawSymbolTokenRef<'b>> for RawSymbolTokenRef<'a> { fn from(value: &'a RawSymbolTokenRef<'b>) -> Self { - value.clone() + *value } } impl<'a> From<&'a str> for RawSymbolTokenRef<'a> { fn from(value: &'a str) -> Self { - RawSymbolTokenRef::Text(Cow::Borrowed(value)) + RawSymbolTokenRef::Text(value) } } impl<'a> From<&'a &str> for RawSymbolTokenRef<'a> { fn from(value: &'a &str) -> Self { - RawSymbolTokenRef::Text(Cow::Borrowed(value)) + RawSymbolTokenRef::Text(value) } } @@ -123,15 +90,11 @@ impl<'a> From<&'a SymbolId> for RawSymbolTokenRef<'a> { } } -impl<'a> From for RawSymbolTokenRef<'a> { - fn from(value: Symbol) -> Self { - let Symbol { text } = value; - match text { - SymbolText::Shared(shared) => { - RawSymbolTokenRef::Text(String::from(shared.as_ref()).into()) - } - SymbolText::Owned(owned) => RawSymbolTokenRef::Text(owned.into()), - SymbolText::Unknown => RawSymbolTokenRef::SymbolId(0), +impl<'a> From> for RawSymbolTokenRef<'a> { + fn from(value: SymbolRef<'a>) -> Self { + match value.text() { + None => RawSymbolTokenRef::SymbolId(0), + Some(text) => RawSymbolTokenRef::Text(text), } } } diff --git a/src/serde/ser.rs b/src/serde/ser.rs index d99eb0ce..e98c33f6 100644 --- a/src/serde/ser.rs +++ b/src/serde/ser.rs @@ -166,9 +166,9 @@ impl<'a, V: ValueWriter + 'a> ser::Serializer for ValueSerializer<'a, V> { self.value_writer.write(Null(IonType::Null)) } - fn serialize_some(self, value: &T) -> Result + fn serialize_some(self, value: &T) -> Result where - T: Serialize, + T: ?Sized + Serialize, { value.serialize(self) } @@ -190,13 +190,13 @@ impl<'a, V: ValueWriter + 'a> ser::Serializer for ValueSerializer<'a, V> { self.value_writer.write(variant.as_symbol_ref()) } - fn serialize_newtype_struct( + fn serialize_newtype_struct( self, name: &'static str, value: &T, ) -> Result where - T: Serialize, + T: ?Sized + Serialize, { if name == TUNNELED_TIMESTAMP_TYPE_NAME { assert_eq!( @@ -222,7 +222,7 @@ impl<'a, V: ValueWriter + 'a> ser::Serializer for ValueSerializer<'a, V> { } } - fn serialize_newtype_variant( + fn serialize_newtype_variant( self, _name: &'static str, _variant_index: u32, @@ -230,7 +230,7 @@ impl<'a, V: ValueWriter + 'a> ser::Serializer for ValueSerializer<'a, V> { value: &T, ) -> Result where - T: Serialize, + T: ?Sized + Serialize, { value.serialize(ValueSerializer::new( self.value_writer.with_annotations([variant])?, @@ -328,9 +328,9 @@ impl ser::SerializeSeq for SeqWriter { type Ok = (); type Error = IonError; - fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> + fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> where - T: Serialize, + T: ?Sized + Serialize, { value.serialize(ValueSerializer::new(self.value_writer())) } @@ -344,9 +344,9 @@ impl ser::SerializeTuple for SeqWriter { type Ok = (); type Error = IonError; - fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> + fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> where - T: Serialize, + T: ?Sized + Serialize, { value.serialize(ValueSerializer::new(self.value_writer())) } @@ -360,9 +360,9 @@ impl ser::SerializeTupleStruct for SeqWriter { type Ok = (); type Error = IonError; - fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> + fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> where - T: Serialize, + T: ?Sized + Serialize, { value.serialize(ValueSerializer::new(self.value_writer())) } @@ -376,9 +376,9 @@ impl ser::SerializeTupleVariant for SeqWriter { type Ok = (); type Error = IonError; - fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> + fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> where - T: Serialize, + T: ?Sized + Serialize, { value.serialize(ValueSerializer::new(self.value_writer())) } @@ -410,9 +410,9 @@ impl ser::SerializeMap for MapWriter { type Ok = (); type Error = IonError; - fn serialize_key(&mut self, key: &T) -> Result<(), Self::Error> + fn serialize_key(&mut self, key: &T) -> Result<(), Self::Error> where - T: Serialize, + T: ?Sized + Serialize, { // We need to verify that the key is a string type or can be converted // to string @@ -421,9 +421,9 @@ impl ser::SerializeMap for MapWriter { self.encode_field_name(field_name.as_str()) } - fn serialize_value(&mut self, value: &T) -> Result<(), Self::Error> + fn serialize_value(&mut self, value: &T) -> Result<(), Self::Error> where - T: Serialize, + T: ?Sized + Serialize, { let serializer = ValueSerializer::new(self.make_value_writer()); value.serialize(serializer) @@ -438,13 +438,9 @@ impl ser::SerializeStructVariant for MapWriter { type Ok = (); type Error = IonError; - fn serialize_field( - &mut self, - key: &'static str, - value: &T, - ) -> Result<(), Self::Error> + fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<(), Self::Error> where - T: Serialize, + T: ?Sized + Serialize, { let serializer = ValueSerializer::new(self.field_writer(key)); value.serialize(serializer) @@ -459,13 +455,9 @@ impl ser::SerializeStruct for MapWriter { type Ok = (); type Error = IonError; - fn serialize_field( - &mut self, - key: &'static str, - value: &T, - ) -> Result + fn serialize_field(&mut self, key: &'static str, value: &T) -> Result where - T: Serialize, + T: ?Sized + Serialize, { let serializer = ValueSerializer::new(self.field_writer(key)); value.serialize(serializer) @@ -504,13 +496,13 @@ impl ser::Serializer for MapKeySerializer { Ok(variant.to_string()) } - fn serialize_newtype_struct( + fn serialize_newtype_struct( self, _name: &'static str, value: &T, ) -> Result where - T: Serialize, + T: ?Sized + Serialize, { value.serialize(self) } @@ -579,9 +571,9 @@ impl ser::Serializer for MapKeySerializer { Err(key_must_be_a_string()) } - fn serialize_some(self, value: &T) -> Result + fn serialize_some(self, value: &T) -> Result where - T: Serialize, + T: ?Sized + Serialize, { value.serialize(self) } @@ -594,7 +586,7 @@ impl ser::Serializer for MapKeySerializer { Err(key_must_be_a_string()) } - fn serialize_newtype_variant( + fn serialize_newtype_variant( self, _name: &'static str, _variant_index: u32, @@ -602,7 +594,7 @@ impl ser::Serializer for MapKeySerializer { _value: &T, ) -> Result where - T: Serialize, + T: ?Sized + Serialize, { Err(key_must_be_a_string()) } diff --git a/src/symbol_ref.rs b/src/symbol_ref.rs index ece1ab7c..6a61648f 100644 --- a/src/symbol_ref.rs +++ b/src/symbol_ref.rs @@ -1,14 +1,14 @@ use crate::raw_symbol_token_ref::{AsRawSymbolTokenRef, RawSymbolTokenRef}; -use crate::Symbol; -use std::borrow::{Borrow, Cow}; +use crate::{Str, Symbol}; +use std::borrow::Borrow; use std::fmt::{Debug, Formatter}; use std::hash::{Hash, Hasher}; /// A reference to a fully resolved symbol. Like `Symbol` (a fully resolved symbol with a /// static lifetime), a `SymbolRef` may have known or undefined text (i.e. `$0`). -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone)] +#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] pub struct SymbolRef<'a> { - text: Option>, + text: Option<&'a str>, } impl<'a> Debug for SymbolRef<'a> { @@ -19,8 +19,8 @@ impl<'a> Debug for SymbolRef<'a> { impl<'a> SymbolRef<'a> { /// If this symbol has known text, returns `Some(&str)`. Otherwise, returns `None`. - pub fn text(&self) -> Option<&str> { - self.text.as_ref().map(|t| t.as_ref()) + pub fn text(&self) -> Option<&'a str> { + self.text } /// Constructs a `SymbolRef` with unknown text. @@ -29,17 +29,14 @@ impl<'a> SymbolRef<'a> { } /// Constructs a `SymbolRef` with the specified text. - pub fn with_text(text: impl Into>) -> SymbolRef<'a> { - SymbolRef { - text: Some(text.into()), - } + pub fn with_text(text: &'a str) -> SymbolRef<'a> { + SymbolRef { text: Some(text) } } pub fn to_owned(self) -> Symbol { match self.text { None => Symbol::unknown_text(), - Some(Cow::Borrowed(text)) => Symbol::owned(text), - Some(Cow::Owned(text)) => Symbol::owned(text), + Some(text) => Symbol::owned(Str::from(text)), } } } @@ -64,7 +61,7 @@ pub trait AsSymbolRef { impl<'a, A: AsRef + 'a> AsSymbolRef for A { fn as_symbol_ref(&self) -> SymbolRef { SymbolRef { - text: Some(Cow::Borrowed(self.as_ref())), + text: Some(self.as_ref()), } } } @@ -80,30 +77,15 @@ impl<'a> Hash for SymbolRef<'a> { impl<'a> From<&'a str> for SymbolRef<'a> { fn from(text: &'a str) -> Self { - Self { - text: Some(Cow::Borrowed(text)), - } - } -} - -impl<'a> From for SymbolRef<'a> { - fn from(text: String) -> Self { - Self { - text: Some(Cow::Owned(text)), - } - } -} - -impl<'a> From> for SymbolRef<'a> { - fn from(value: Cow<'a, str>) -> Self { - Self { text: Some(value) } + Self { text: Some(text) } } } impl<'a> From<&'a Symbol> for SymbolRef<'a> { fn from(symbol: &'a Symbol) -> Self { - let text = symbol.text().map(Cow::Borrowed); - Self { text } + Self { + text: symbol.text(), + } } } @@ -134,18 +116,11 @@ impl AsSymbolRef for &Symbol { } } -impl<'borrow, 'data> AsSymbolRef for &'borrow SymbolRef<'data> { - fn as_symbol_ref(&self) -> SymbolRef<'data> { - // This is essentially free; the only data inside is an Option<&str> - (*self).clone() - } -} - impl<'a> AsRawSymbolTokenRef for SymbolRef<'a> { fn as_raw_symbol_token_ref(&self) -> RawSymbolTokenRef { match &self.text { None => RawSymbolTokenRef::SymbolId(0), - Some(text) => RawSymbolTokenRef::Text(text.as_ref().into()), + Some(text) => RawSymbolTokenRef::Text(text), } } } diff --git a/src/text/text_formatter.rs b/src/text/text_formatter.rs index 404d556d..511c29f7 100644 --- a/src/text/text_formatter.rs +++ b/src/text/text_formatter.rs @@ -1,7 +1,9 @@ use crate::raw_symbol_token_ref::{AsRawSymbolTokenRef, RawSymbolTokenRef}; +use crate::result::IonFailure; use crate::{Annotations, Sequence}; use crate::{Decimal, Int, Struct, Timestamp}; use crate::{IonResult, IonType}; +use std::{fmt, io}; pub const STRING_ESCAPE_CODES: &[&str] = &string_escape_code_init(); @@ -180,10 +182,62 @@ pub(crate) const fn string_escape_code_init() -> [&'static str; 256] { /// Provides a text formatter for Ion values /// This is used with the Display implementation of `OwnedElement` -pub struct IonValueFormatter<'a, W: std::fmt::Write> { +pub struct IonValueFormatter<'a, W: fmt::Write> { pub(crate) output: &'a mut W, } +impl<'a, W: fmt::Write> IonValueFormatter<'a, W> { + pub fn new(output: &'a mut W) -> Self { + Self { output } + } +} + +/// A shim that allows values to be formatted to implementations of `io::Write` instead of being limited +/// to implementations of `fmt::Write`. +pub struct IoFmtShim { + output: W, + // If an I/O error happens while writing the formatted text to output, write!() will return + // a `fmt::Error` (which contains no information). This result can then be inspected for the + // cause. This approach is similar to the one the Rust std library uses internally. See: + // https://github.com/rust-lang/rust/blob/cc4dd6fc9f1a5c798df269933c7e442b79661a86/library/std/src/io/mod.rs#L1665 + result: IonResult<()>, +} + +impl IoFmtShim { + pub fn new(output: W) -> Self { + Self { + output, + result: Ok(()), + } + } + + pub fn into_result(self) -> IonResult<()> { + self.result + } + + pub fn value_formatter(&mut self) -> IonValueFormatter<'_, Self> { + IonValueFormatter::new(self) + } +} + +impl fmt::Write for IoFmtShim { + fn write_str(&mut self, s: &str) -> fmt::Result { + let io_result = self.output.write_all(s.as_bytes()); + match io_result { + Ok(_) => { + self.result = Ok(()); + Ok(()) + } + Err(e) => { + self.result = IonResult::encoding_error(format!( + "I/O failure occurred during formatting: {e:?}", + )); + Err(fmt::Error) + } + } + } +} + impl<'a, W: std::fmt::Write> IonValueFormatter<'a, W> { /// Returns `true` if the provided `token`'s text is an 'identifier'. That is, the text starts /// with a `$`, `_` or ASCII letter and is followed by a sequence of `$`, `_`, or ASCII letters @@ -229,13 +283,12 @@ impl<'a, W: std::fmt::Write> IonValueFormatter<'a, W> { match token.as_raw_symbol_token_ref() { RawSymbolTokenRef::SymbolId(sid) => write!(self.output, "${sid}")?, RawSymbolTokenRef::Text(text) - if Self::token_is_keyword(text.as_ref()) - || Self::token_resembles_symbol_id(text.as_ref()) => + if Self::token_is_keyword(text) || Self::token_resembles_symbol_id(text) => { // Write the symbol text in single quotes write!(self.output, "'{text}'")?; } - RawSymbolTokenRef::Text(text) if Self::token_is_identifier(text.as_ref()) => { + RawSymbolTokenRef::Text(text) if Self::token_is_identifier(text) => { // Write the symbol text without quotes write!(self.output, "{text}")? } @@ -364,19 +417,19 @@ impl<'a, W: std::fmt::Write> IonValueFormatter<'a, W> { value.format(self.output) } - pub(crate) fn format_symbol(&mut self, value: A) -> IonResult<()> { + pub fn format_symbol(&mut self, value: A) -> IonResult<()> { self.format_symbol_token(value)?; Ok(()) } - pub(crate) fn format_string>(&mut self, value: S) -> IonResult<()> { + pub fn format_string>(&mut self, value: S) -> IonResult<()> { write!(self.output, "\"")?; self.format_escaped_text_body(value)?; write!(self.output, "\"")?; Ok(()) } - pub(crate) fn format_clob>(&mut self, value: A) -> IonResult<()> { + pub fn format_clob>(&mut self, value: A) -> IonResult<()> { // clob_value to be written based on defined STRING_ESCAPE_CODES. const NUM_DELIMITER_BYTES: usize = 4; // {{}} const NUM_HEX_BYTES_PER_BYTE: usize = 4; // \xHH @@ -400,7 +453,7 @@ impl<'a, W: std::fmt::Write> IonValueFormatter<'a, W> { Ok(()) } - pub(crate) fn format_blob>(&mut self, value: A) -> IonResult<()> { + pub fn format_blob>(&mut self, value: A) -> IonResult<()> { write!(self.output, "{{{{{}}}}}", base64::encode(value))?; Ok(()) } diff --git a/src/types/decimal/mod.rs b/src/types/decimal/mod.rs index 36cdb623..b6604653 100644 --- a/src/types/decimal/mod.rs +++ b/src/types/decimal/mod.rs @@ -704,7 +704,7 @@ mod decimal_tests { 309, "f64::MAX should have 309 decimal digits" ); - assert_eq!(diff_fract, 0.into()); + assert_eq!(diff_fract, 0f64); // MIN f64 - e.g., -1.7976931348623157e+308_f64 let actual: Decimal = f64::MIN.try_into().unwrap(); @@ -716,7 +716,7 @@ mod decimal_tests { 309, "f64::MIN should have 309 decimal digits" ); - assert_eq!(diff_fract, 0.into()); + assert_eq!(diff_fract, 0f64); } #[test] @@ -729,7 +729,7 @@ mod decimal_tests { UInt::from(diff_int.magnitude().clone()).number_of_decimal_digits(), 1 ); - assert_eq!(diff_fract, 0.into()); + assert_eq!(diff_fract, 0f64); // MIN_POSITIVE f64 - e.g., 2.2250738585072014e-308_f64 let actual: Decimal = f64::MIN_POSITIVE.try_into().unwrap(); @@ -740,7 +740,7 @@ mod decimal_tests { UInt::from(diff_int.magnitude().clone()).number_of_decimal_digits(), 1 ); - assert_eq!(diff_fract, 0.into()); + assert_eq!(diff_fract, 0f64); } #[rstest]